Source code for pykt.models.atkt

# coding: utf-8
import torch
import torch.nn as nn
from torch.autograd import Variable
import numpy as np
from .utils import ut_mask

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

[docs]class ATKT(nn.Module): def __init__(self, num_c, skill_dim, answer_dim, hidden_dim, attention_dim=80, epsilon=10, beta=0.2, dropout=0.2, emb_type="qid", emb_path="", fix=True): super(ATKT, self).__init__() self.model_name = "atkt" self.fix = fix print(f"fix: {fix}") if self.fix == True: self.model_name = "atktfix" self.emb_type = emb_type self.skill_dim=skill_dim self.answer_dim=answer_dim self.hidden_dim = hidden_dim self.num_c = num_c self.epsilon = epsilon self.beta = beta self.rnn = nn.LSTM(self.skill_dim+self.answer_dim, self.hidden_dim, batch_first=True) self.dropout_layer = nn.Dropout(dropout) self.fc = nn.Linear(self.hidden_dim*2, self.num_c) self.sig = nn.Sigmoid() self.skill_emb = nn.Embedding(self.num_c+1, self.skill_dim) self.skill_emb.weight.data[-1]= 0 self.answer_emb = nn.Embedding(2+1, self.answer_dim) self.answer_emb.weight.data[-1]= 0 self.attention_dim = attention_dim self.mlp = nn.Linear(self.hidden_dim, self.attention_dim) self.similarity = nn.Linear(self.attention_dim, 1, bias=False)
[docs] def attention_module(self, lstm_output): # lstm_output = lstm_output[0:1, :, :] # print(f"lstm_output: {lstm_output.shape}") att_w = self.mlp(lstm_output) # print(f"att_w: {att_w.shape}") att_w = torch.tanh(att_w) att_w = self.similarity(att_w) # print(f"att_w: {att_w.shape}") if self.fix == True: attn_mask = ut_mask(lstm_output.shape[1]) att_w = att_w.transpose(1,2).expand(lstm_output.shape[0], lstm_output.shape[1], lstm_output.shape[1]).clone() att_w = att_w.masked_fill_(attn_mask, float("-inf")) alphas = torch.nn.functional.softmax(att_w, dim=-1) attn_ouput = torch.bmm(alphas, lstm_output) else: # 原来的官方实现 alphas=nn.Softmax(dim=1)(att_w) # print(f"alphas: {alphas.shape}") attn_ouput = alphas*lstm_output # 整个seq的attn之和为1,计算前面的的时候,所有的attn都<<1,不会有问题?做的少的时候,历史作用小,做得多的时候,历史作用变大? # print(f"attn_ouput: {attn_ouput.shape}") attn_output_cum=torch.cumsum(attn_ouput, dim=1) # print(f"attn_ouput: {attn_ouput}") # print(f"attn_output_cum: {attn_output_cum}") attn_output_cum_1=attn_output_cum-attn_ouput # print(f"attn_output_cum_1: {attn_output_cum_1}") # print(f"lstm_output: {lstm_output}") final_output=torch.cat((attn_output_cum_1, lstm_output),2) # import sys # sys.exit() return final_output
[docs] def forward(self, skill, answer, perturbation=None): emb_type = self.emb_type r = answer skill_embedding=self.skill_emb(skill) answer_embedding=self.answer_emb(answer) skill_answer=torch.cat((skill_embedding,answer_embedding), 2) answer_skill=torch.cat((answer_embedding,skill_embedding), 2) answer=answer.unsqueeze(2).expand_as(skill_answer) skill_answer_embedding=torch.where(answer==1, skill_answer, answer_skill) # print(skill_answer_embedding) skill_answer_embedding1=skill_answer_embedding if perturbation is not None: skill_answer_embedding += perturbation out,_ = self.rnn(skill_answer_embedding) # print(f"out: {out.shape}") out=self.attention_module(out) # print(f"after attn out: {out.shape}") res = self.sig(self.fc(self.dropout_layer(out))) # res = res[:, :-1, :] # pred_res = self._get_next_pred(res, skill) return res, skill_answer_embedding1
from torch.autograd import Variable def _l2_normalize_adv(d): if isinstance(d, Variable): d = d.data.cpu().numpy() elif isinstance(d, torch.FloatTensor) or isinstance(d, torch.cuda.FloatTensor): d = d.cpu().numpy() d /= (np.sqrt(np.sum(d ** 2, axis=(1, 2))).reshape((-1, 1, 1)) + 1e-16) return torch.from_numpy(d)