import torch
import torch.nn as nn
import torch.nn.functional as F
import os
import numpy as np
import pandas as pd
from torch.utils.data import DataLoader
from torch.utils.data import TensorDataset
from sklearn import metrics
emb_type_list = ["qc_merge","qid","qaid","qcid_merge"]
emb_type_map = {"iekt-qid":"qc_merge",
"iekt-qc_merge":"qc_merge",
"iekt_ce-qid":"qc_merge",
"dkt_que-qid":"qaid_qc"
}
[docs]class QueEmb(nn.Module):
def __init__(self,num_q,num_c,emb_size,model_name,device='cpu',emb_type='qid',emb_path="", pretrain_dim=768):
"""_summary_
Args:
num_q (_type_): num of question
num_c (_type_): num of concept
emb_size (_type_): emb_size
device (str, optional): device. Defaults to 'cpu'.
emb_type (str, optional): how to encode question id. Defaults to 'qid'. qid:question_id one-hot;
qaid:question_id + r*question_num one-hot; qc_merge: question emb + avg(concept emb);
emb_path (str, optional): _description_. Defaults to "".
pretrain_dim (int, optional): _description_. Defaults to 768.
"""
super().__init__()
self.device = device
self.num_q = num_q
self.num_c = num_c
self.emb_size = emb_size
#get emb type
tmp_emb_type = f"{model_name}-{emb_type}"
emb_type = emb_type_map.get(tmp_emb_type,tmp_emb_type.replace(f"{model_name}_",""))
print(f"emb_type is {emb_type}")
self.emb_type = emb_type
self.emb_path = emb_path
self.pretrain_dim = pretrain_dim
if emb_type in ["qc_merge","qaid_qc"]:
self.concept_emb = nn.Parameter(torch.randn(self.num_c, self.emb_size).to(device), requires_grad=True)#concept embeding
if emb_type in ["qc_merge","qaid_qc"]:
self.que_emb = nn.Embedding(self.num_q, self.emb_size)#question embeding
self.que_c_linear = nn.Linear(2*self.emb_size,self.emb_size)
if emb_type =="qaid_c":
self.que_c_linear = nn.Linear(2*self.emb_size,self.emb_size)
if emb_type.startswith("qaid"):
self.interaction_emb = nn.Embedding(self.num_q * 2, self.emb_size)
if emb_type.startswith("qid"):
self.q_emb = nn.Embedding(self.num_q, self.emb_size)
self.output_emb_dim = emb_size
[docs] def get_avg_skill_emb(self,c):
# add zero for padding
concept_emb_cat = torch.cat(
[torch.zeros(1, self.emb_size).to(self.device),
self.concept_emb], dim=0)
# shift c
related_concepts = (c+1).long()
#[batch_size, seq_len, emb_dim]
concept_emb_sum = concept_emb_cat[related_concepts, :].sum(
axis=-2)
#[batch_size, seq_len,1]
concept_num = torch.where(related_concepts != 0, 1, 0).sum(
axis=-1).unsqueeze(-1)
concept_num = torch.where(concept_num == 0, 1, concept_num)
concept_avg = (concept_emb_sum / concept_num)
return concept_avg
[docs] def forward(self,q,c,r=None):
emb_type = self.emb_type
if "qc_merge" in emb_type:
concept_avg = self.get_avg_skill_emb(c)#[batch,max_len-1,emb_size]
que_emb = self.que_emb(q)#[batch,max_len-1,emb_size]
# print(f"que_emb shape is {que_emb.shape}")
que_c_emb = torch.cat([concept_avg,que_emb],dim=-1)#[batch,max_len-1,2*emb_size]
if emb_type == "qaid":
x = q + self.num_q * r
xemb = self.interaction_emb(x)#[batch,max_len-1,emb_size]
# print("qid")
elif emb_type == "qid":
xemb = self.q_emb(q)#[batch,max_len-1,emb_size]
elif emb_type == "qaid+qc_merge":
x = q + self.num_q * r
xemb = self.interaction_emb(x)#[batch,max_len-1,emb_size]
que_c_emb = self.que_c_linear(que_c_emb)#[batch,max_len-1,emb_size]
xemb = xemb + que_c_emb
# print("qid+qc_merge")
elif emb_type=="qc_merge":
# print("qc_merge")
xemb = que_c_emb
elif emb_type =="qaid_qc":
x = q + self.num_q * r
xemb = self.interaction_emb(x)
concept_avg = self.get_avg_skill_emb(c)#[batch,max_len-1,emb_size]
xemb = torch.cat([xemb,concept_avg],dim=-1)
xemb = self.que_c_linear(xemb)
return xemb
from pykt.utils import set_seed
[docs]class QueBaseModel(nn.Module):
def __init__(self,model_name,emb_type,emb_path,pretrain_dim,device,seed=0):
super().__init__()
self.model_name = model_name
self.emb_type = emb_type
self.emb_path = emb_path
self.pretrain_dim = pretrain_dim
self.device = device
# set_seed(seed)
[docs] def compile(self, optimizer,lr=0.001,
loss='binary_crossentropy',
metrics=None):
"""
:param optimizer: String (name of optimizer) or optimizer instance. See [optimizers](https://pytorch.org/docs/stable/optim.html).
:param loss: String (name of objective function) or objective function. See [losses](https://pytorch.org/docs/stable/nn.functional.html#loss-functions).
:param metrics: List of metrics to be evaluated by the model during training and testing. Typically you will use `metrics=['accuracy']`.
ref from https://github.com/shenweichen/DeepCTR-Torch/blob/2cd84f305cb50e0fd235c0f0dd5605c8114840a2/deepctr_torch/models/basemodel.py
"""
self.lr = lr
# self.metrics_names = ["loss"]
self.opt = self._get_optimizer(optimizer)
self.loss_func = self._get_loss_func(loss)
# self.metrics = self._get_metrics(metrics)
def _get_loss_func(self, loss):
if isinstance(loss, str):
if loss == "binary_crossentropy":
loss_func = F.binary_cross_entropy
elif loss == "mse":
loss_func = F.mse_loss
elif loss == "mae":
loss_func = F.l1_loss
else:
raise NotImplementedError
else:
loss_func = loss
return loss_func
def _get_optimizer(self,optimizer):
if isinstance(optimizer, str):
if optimizer == 'gd':
optimizer = torch.optim.SGD(self.model.parameters(), lr=self.lr)
elif optimizer == 'adagrad':
optimizer = torch.optim.Adagrad(self.model.parameters(), lr=self.lr)
elif optimizer == 'adadelta':
optimizer = torch.optim.Adadelta(self.model.parameters(), lr=self.lr)
elif optimizer == 'adam':
optimizer = torch.optim.Adam(self.model.parameters(), lr=self.lr)
else:
raise ValueError("Unknown Optimizer: " + self.optimizer)
return optimizer
[docs] def train_one_step(self,data,process=True):
raise NotImplemented()
[docs] def predict_one_step(self,data,process=True):
raise NotImplemented()
[docs] def get_loss(self, ys,rshft,sm):
y_pred = torch.masked_select(ys, sm)
y_true = torch.masked_select(rshft, sm)
loss = self.loss_func(y_pred.double(), y_true.double())
return loss
def _save_model(self):
torch.save(self.model.state_dict(), os.path.join(self.save_dir, self.model.emb_type+"_model.ckpt"))
[docs] def load_model(self,save_dir):
net = torch.load(os.path.join(save_dir, self.emb_type+"_model.ckpt"))
self.model.load_state_dict(net)
[docs] def batch_to_device(self,data,process=True):
if not process:
return data
dcur = data
# q, c, r, t = dcur["qseqs"], dcur["cseqs"], dcur["rseqs"], dcur["tseqs"]
# qshft, cshft, rshft, tshft = dcur["shft_qseqs"], dcur["shft_cseqs"], dcur["shft_rseqs"], dcur["shft_tseqs"]
# m, sm = dcur["masks"], dcur["smasks"]
data_new = {}
data_new['cq'] = torch.cat((dcur["qseqs"][:,0:1], dcur["shft_qseqs"]), dim=1)
data_new['cc'] = torch.cat((dcur["cseqs"][:,0:1], dcur["shft_cseqs"]), dim=1)
data_new['cr'] = torch.cat((dcur["rseqs"][:,0:1], dcur["shft_rseqs"]), dim=1)
data_new['ct'] = torch.cat((dcur["tseqs"][:,0:1], dcur["shft_tseqs"]), dim=1)
data_new['q'] = dcur["qseqs"]
data_new['c'] = dcur["cseqs"]
data_new['r'] = dcur["rseqs"]
data_new['t'] = dcur["tseqs"]
data_new['qshft'] = dcur["shft_qseqs"]
data_new['cshft'] = dcur["shft_cseqs"]
data_new['rshft'] = dcur["shft_rseqs"]
data_new['tshft'] = dcur["shft_tseqs"]
data_new['m'] = dcur["masks"]
data_new['sm'] = dcur["smasks"]
return data_new
[docs] def train(self,train_dataset, valid_dataset,batch_size=16,valid_batch_size=None,num_epochs=32, test_loader=None, test_window_loader=None,save_dir="tmp",save_model=False,patient=10,shuffle=True,process=True):
self.save_dir = save_dir
os.makedirs(self.save_dir,exist_ok=True)
if valid_batch_size is None:
valid_batch_size = batch_size
train_loader = DataLoader(train_dataset, batch_size=batch_size,shuffle=shuffle)
max_auc, best_epoch = 0, -1
train_step = 0
for i in range(1, num_epochs + 1):
loss_mean = []
for data in train_loader:
train_step += 1
self.model.train()
y,loss = self.train_one_step(data,process=process)
self.opt.zero_grad()
loss.backward()#compute gradients
self.opt.step()#update model’s parameters
loss_mean.append(loss.detach().cpu().numpy())
loss_mean = np.mean(loss_mean)
auc, acc = self.evaluate(valid_dataset,batch_size=valid_batch_size)
if auc > max_auc:
if save_model:
self._save_model()
max_auc = auc
best_epoch = i
testauc, testacc = -1, -1
window_testauc, window_testacc = -1, -1
validauc, validacc = round(auc, 4), round(acc, 4)#model.evaluate(valid_dataset, emb_type)
testauc, testacc, window_testauc, window_testacc = round(testauc, 4), round(testacc, 4), round(window_testauc, 4), round(window_testacc, 4)
max_auc = round(max_auc, 4)
print(f"Epoch: {i}, validauc: {validauc}, validacc: {validacc}, best epoch: {best_epoch}, best auc: {max_auc}, loss: {loss_mean}, emb_type: {self.model.emb_type}, model: {self.model.model_name}, save_dir: {self.save_dir}")
print(f" testauc: {testauc}, testacc: {testacc}, window_testauc: {window_testauc}, window_testacc: {window_testacc}")
if i - best_epoch >= patient:
break
return testauc, testacc, window_testauc, window_testacc, validauc, validacc, best_epoch
[docs] def evaluate(self,dataset,batch_size,acc_threshold=0.5):
ps,ts = self.predict(dataset,batch_size=batch_size)
auc = metrics.roc_auc_score(y_true=ts, y_score=ps)
prelabels = [1 if p >= acc_threshold else 0 for p in ps]
acc = metrics.accuracy_score(ts, prelabels)
# eval_result = {"auc":auc,"acc":acc}
# return eval_result
return auc,acc
def _parser_row(self,row,data_config,ob_portions=0.5):
max_concepts = data_config["max_concepts"]
max_len = data_config["maxlen"]
start_index,seq_len = self._get_multi_ahead_start_index(row['concepts'],ob_portions)
questions = [int(x) for x in row["questions"].split(",")]
responses = [int(x) for x in row["responses"].split(",")]
concept_list = []
for concept in row["concepts"].split(","):
if concept == "-1":
skills = [-1] * max_concepts
else:
skills = [int(_) for _ in concept.split("_")]
skills = skills +[-1]*(max_concepts-len(skills))
concept_list.append(skills)
cq_full = torch.tensor(questions).to(self.device)
cc_full = torch.tensor(concept_list).to(self.device)
cr_full = torch.tensor(responses).to(self.device)
history_start_index = max(start_index - max_len,0)
hist_q = cq_full[history_start_index:start_index].unsqueeze(0)
hist_c = cc_full[history_start_index:start_index].unsqueeze(0)
hist_r = cr_full[history_start_index:start_index].unsqueeze(0)
return hist_q,hist_c,hist_r,cq_full,cc_full,cr_full,seq_len,start_index
def _get_multi_ahead_start_index(self,cc,ob_portions=0.5):
"""_summary_
Args:
cc (str): the concept sequence
ob_portions (float, optional): _description_. Defaults to 0.5.
Returns:
_type_: _description_
"""
filter_cc = [x for x in cc.split(",") if x != "-1"]
seq_len = len(filter_cc)
start_index = int(seq_len * ob_portions)
if start_index == 0:
start_index = 1
if start_index == seq_len:
start_index = seq_len - 1
return start_index,seq_len
def _evaluate_multi_ahead_accumulative(self,data_config,batch_size=1,ob_portions=0.5,acc_threshold=0.5):
testf = os.path.join(data_config["dpath"], "test.csv")
df = pd.read_csv(testf)
print("total sequence length is {}".format(len(df)))
y_pred_list = []
y_true_list = []
for i, row in df.iterrows():
hist_q,hist_c,hist_r,cq_full,cc_full,cr_full,seq_len,start_index = self._parser_row(row,data_config=data_config,ob_portions=ob_portions)
if i%10==0:
print(f"predict step {i}")
seq_y_pred_hist = [cr_full[start_index]]
for i in range(start_index,seq_len):
cur_q = cq_full[start_index:i+1].unsqueeze(0)
cur_c = cc_full[start_index:i+1].unsqueeze(0)
cur_r = torch.tensor(seq_y_pred_hist).unsqueeze(0).to(self.device)
# print(f"cur_q is {cur_q} shape is {cur_q.shape}")
# print(f"cur_r is {cur_r} shape is {cur_r.shape}")
cq = torch.cat([hist_q,cur_q],axis=1)
cc = torch.cat([hist_c,cur_c],axis=1)
cr = torch.cat([hist_r,cur_r],axis=1)
# print(f"cc_full is {cc_full}")
# print(f"cr is {cr} shape is {cr.shape}")
# print(f"cq is {cq} shape is {cq.shape}")
data = [cq,cc,cr]
cq,cc,cr = [x.to(self.device) for x in data]#full sequence,[1,n]
q,c,r = [x[:,:-1].to(self.device) for x in data]#[0,n-1]
qshft,cshft,rshft = [x[:,1:].to(self.device) for x in data]#[1,n]
data = {"cq":cq,"cc":cc,"cr":cr,"q":q,"c":c,"r":r,"qshft":qshft,"cshft":cshft,"rshft":rshft}
y_last_pred = self.predict_one_step(data,process=False)[:,-1][0]
seq_y_pred_hist.append(1 if y_last_pred>acc_threshold else 0)
y_true_list.append(cr_full[i].item())
y_pred_list.append(y_last_pred.item())
print(f"num of y_pred_list is {len(y_pred_list)}")
print(f"num of y_true_list is {len(y_true_list)}")
y_pred_list = np.array(y_pred_list)
y_true_list = np.array(y_true_list)
auc = metrics.roc_auc_score(y_true_list, y_pred_list)
acc = metrics.accuracy_score(y_true_list, [1 if p >= acc_threshold else 0 for p in y_pred_list])
return auc,acc
def _evaluate_multi_ahead_help(self,data_config,batch_size,ob_portions=0.5,acc_threshold=0.5):
"""generate multi-ahead dataset
Args:
data_config (_type_): data_config
ob_portions (float, optional): portions of observed student interactions. . Defaults to 0.5.
Returns:
dataset: new dataset for multi-ahead prediction
"""
testf = os.path.join(data_config["dpath"], "test.csv")
df = pd.read_csv(testf)
print("total sequence length is {}".format(len(df)))
y_pred_list = []
y_true_list = []
for i, row in df.iterrows():
hist_q,hist_c,hist_r,cq_full,cc_full,cr_full,seq_len,start_index = self._parser_row(row,data_config=data_config,ob_portions=ob_portions)
if i%10==0:
print(f"predict step {i}")
cq_list = []
cc_list = []
cr_list = []
for i in range(start_index,seq_len):
cur_q = cq_full[i:i+1].unsqueeze(0)
cur_c = cc_full[i:i+1].unsqueeze(0)
cur_r = cr_full[i:i+1].unsqueeze(0)
cq_list.append(torch.cat([hist_q,cur_q],axis=1))
cc_list.append(torch.cat([hist_c,cur_c],axis=1))
cr_list.append(torch.cat([hist_r,cur_r],axis=1))
y_true_list.append(cr_full[i].item())
# print(f"cq_list is {len(cq_list)}")
cq_ahead = torch.cat(cq_list,axis=0)
cc_ahead = torch.cat(cc_list,axis=0)
cr_ahead = torch.cat(cr_list,axis=0)
# print(f"cq_ahead shape is {cq_ahead.shape}")
tensor_dataset = TensorDataset(cq_ahead,cc_ahead,cr_ahead)
dataloader = DataLoader(dataset=tensor_dataset,batch_size=batch_size)
for data in dataloader:
cq,cc,cr = [x.to(self.device) for x in data]#full sequence,[1,n]
q,c,r = [x[:,:-1].to(self.device) for x in data]#[0,n-1]
qshft,cshft,rshft = [x[:,1:].to(self.device) for x in data]#[1,n]
data = {"cq":cq,"cc":cc,"cr":cr,"q":q,"c":c,"r":r,"qshft":qshft,"cshft":cshft,"rshft":rshft}
y = self.predict_one_step(data,process=False)[:,-1].detach().cpu().numpy().flatten()
y_pred_list.extend(list(y))
print(f"num of y_pred_list is {len(y_pred_list)}")
print(f"num of y_true_list is {len(y_true_list)}")
y_pred_list = np.array(y_pred_list)
y_true_list = np.array(y_true_list)
auc = metrics.roc_auc_score(y_true_list, y_pred_list)
acc = metrics.accuracy_score(y_true_list, [1 if p >= acc_threshold else 0 for p in y_pred_list])
return auc,acc
[docs] def evaluate_multi_ahead(self,data_config,batch_size,ob_portions=0.5,acc_threshold=0.5,accumulative=False):
"""Predictions in the multi-step ahead prediction scenario
Args:
data_config (_type_): data_config
batch_size (int): batch_size
ob_portions (float, optional): portions of observed student interactions. Defaults to 0.5.
accumulative (bool, optional): `True` for accumulative prediction and `False` for non-accumulative prediction. Defaults to False.
acc_threshold (float, optional): threshold for accuracy. Defaults to 0.5.
Returns:
metrics: auc,acc
"""
self.model.eval()
with torch.no_grad():
if accumulative:
print("predict use accumulative")
auc,acc = self._evaluate_multi_ahead_accumulative(data_config,batch_size=batch_size,ob_portions=ob_portions,acc_threshold=acc_threshold)
else:
print("predict use no accumulative")
auc,acc = self._evaluate_multi_ahead_help(data_config,batch_size=batch_size,ob_portions=ob_portions,acc_threshold=acc_threshold)
return {"auc":auc,"acc":acc}
[docs] def predict(self,dataset,batch_size,return_ts=False,process=True):
test_loader = DataLoader(dataset, batch_size=batch_size,shuffle=False)
self.model.eval()
with torch.no_grad():
y_trues = []
y_scores = []
for data in test_loader:
new_data = self.batch_to_device(data,process=process)
y = self.predict_one_step(data)
y = torch.masked_select(y, new_data['sm']).detach().cpu()
t = torch.masked_select(new_data['rshft'], new_data['sm']).detach().cpu()
y_trues.append(t.numpy())
y_scores.append(y.numpy())
ts = np.concatenate(y_trues, axis=0)
ps = np.concatenate(y_scores, axis=0)
print(f"ts.shape: {ts.shape}, ps.shape: {ps.shape}")
return ps,ts