参考

感谢以上工作者对我学习的帮助。

If I have seen further, it is by standing on the shoulders of giants.

前言

因为是随便写的,关于本模型的详细介绍这里不再赘述,简单来说其实就是在NNLM的基础上添加了一层RNN,相关的学习可以参考上面的内容,其中深度之眼的视频是我用来学习NNLM原理的,下面的文章用来学习NNLM的实现和RNN的对接,不过这里不建议照着RNN那篇文章复现,原文的PyTorch版本较老,但思想值得借鉴。
另外本文基于中文数据,而上面的参考都是英文数据,在对照的时候需要注意

数据生成

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
# utf-8
# pytorch vrsion = 1.8.0
# code by Angel Hair

import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as Data
# print(torch.__version__)

import jieba


dtype = torch.FloatTensor


# 文本数据处理
data = [
"我喜欢咖啡",
"我爱咖啡因",
"我讨厌奶茶",
"我恨脂肪酸",
"我经常喝水"
]


sentences = [jieba.lcut(i) for i in data] # [['我', '喜欢', '咖啡'],...,['我', '经常', '喝水']]
word_list = [b for i in sentences for b in i] # ['我','喜欢',...,'喝水']
word_list = list(set(word_list)) # ['脂肪酸', '咖啡因', '喜欢', '讨厌', '我', '经常', '奶茶', '咖啡', '爱', '喝水', '恨']

word_dict = {w: i for i, w in enumerate(word_list)}
num_dict = {i: w for i, w in enumerate(word_list)}

参数设置

1
2
3
4
5
6
# 参数
n_class = len(word_dict) # 词表大小
n_step = len(sentences[0]) - 1
n_hidden = 2 # 隐藏层神经元个数
num_layers = 1 # RNN层数
embed_dim = 2 # 词向量维度,通常与 n_hidden 相等

构建迭代器

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
def make_batch(sentences):
input_batch = []
target_batch = []

for word in sentences:
input = [word_dict[n] for n in word[:-1]]
target = word_dict[word[-1]]

input_batch.append(input)
target_batch.append(target)

return input_batch, target_batch

input_batch, target_batch = make_batch(sentences)
input_batch = torch.LongTensor(input_batch)
target_batch = torch.LongTensor(target_batch)

data_set = Data.TensorDataset(input_batch, target_batch)
loader = Data.DataLoader(
dataset = data_set,
batch_size = 16,
shuffle=True
)

模型

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
class RNNLM(nn.Module):
def __init__(self):
super(RNNLM, self).__init__()
self.rnn = nn.RNN(m, n_hidden, num_layers, nonlinearity='relu')
self.C = nn.Embedding(n_class, m) # 生成word embedding
self.H = nn.Parameter(torch.randn(n_step*m, n_hidden).type(dtype))
self.W = nn.Parameter(torch.randn(n_step*m, n_class).type(dtype))
self.d = nn.Parameter(torch.randn(n_hidden).type(dtype))
self.U = nn.Parameter(torch.randn(n_hidden, n_class).type(dtype))
self.b = nn.Parameter(torch.randn(n_class).type(dtype))


def forward(self, X, hidden):
X = self.C(X) # [batch_size, n_step] => [batch_size, n_step, m]

X, h_n = self.rnn(X)
X = X.view(-1, n_step * m) # [batch_size, n_step * m]

hidden_out = torch.tanh(self.d + torch.mm(X, self.H)) # [batch_size, n_hidden]

output = self.b + torch.mm(X, self.W) + torch.mm(hidden_out, self.U) # [batch_size, n_class]

return output, h_n

model = RNNLM()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

训练与测试

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
input_hidden = torch.autograd.Variable(torch.randn(m, n_hidden))
for epoch in range(10000):
for batch_x, batch_y in loader:
output, input_hidden = model(batch_x, input_hidden)
optimizer.zero_grad()
loss = criterion(output, batch_y)

if(epoch + 1)%100 == 0:
print('Epoch:', '%04d' % (epoch + 1), 'cost =', f'{loss:.6f}')

loss.backward()
optimizer.step()

# 预测

predict, _ = model(input_batch, input_hidden)
predict = predict.data.max(1, keepdim=True)[1]

# 测试

print([word[:n_step] for word in sentences], '->', [num_dict[n.item()] for n in predict.squeeze()])

模型推广(GRU)

有了上面这种方法后,我们就可以很方便地把模型推广到其他的RNN模型上,比如GRU,只需要将模型中的nn.RNN修改为nn.GRU就可以了,如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
class GRULM(nn.Module):
def __init__(self):
super(GRULM, self).__init__()
self.gru = nn.GRU(m, n_hidden, num_layers)
self.C = nn.Embedding(n_class, m) # 生成word embedding
self.H = nn.Parameter(torch.randn(n_step*m, n_hidden).type(dtype))
self.W = nn.Parameter(torch.randn(n_step*m, n_class).type(dtype))
self.d = nn.Parameter(torch.randn(n_hidden).type(dtype))
self.U = nn.Parameter(torch.randn(n_hidden, n_class).type(dtype))
self.b = nn.Parameter(torch.randn(n_class).type(dtype))


def forward(self, X, hidden):
X = self.C(X) # [batch_size, n_step] => [batch_size, n_step, m]

X, h_n = self.gru(X)
X = X.view(-1, n_step * m) # [batch_size, n_step * m]

hidden_out = torch.tanh(self.d + torch.mm(X, self.H)) # [batch_size, n_hidden]

output = self.b + torch.mm(X, self.W) + torch.mm(hidden_out, self.U) # [batch_size, n_class]

return output, h_n

model = GRULM()

另外,关于GRU的原理和实现可以参考:

后记

关于最近的一些事情,算法岗位内卷到灰飞烟灭,本来已经打算转行了……去年和室友一起出去创业(做游戏),结果失败后灰头土脸地被导师叫回来推荐了一个面试,虽然最后还是拒了,但是非常感谢导师没有放弃我QAQ