如何解决如何从 nn.Transformer 生成句子?
我正在尝试使 Transformer 模型生成句子。现在我正在尝试使用光束搜索生成句子。
但我的模型只生成一个句子。
这个符号表示这是句子的开始。
我不知道为什么会生成它?你有什么好的解决办法吗?
这是Transformer模型的代码。
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn import TransformerEncoder,TransformerEncoderLayer,TransformerDecoder,TransformerDecoderLayer
class TransformerModel(nn.Module):
def __init__(self,in_token,out_token,ninp,nhead,nhid,nlayers,dropout=0.5):
super(TransformerModel,self).__init__()
self.model_type = 'Transformer'
self.pos_encoder = PositionalEncoding(ninp,dropout)
encoder_layers = TransformerEncoderLayer(ninp,dropout)
self.transformer_encoder = TransformerEncoder(encoder_layers,nlayers)
self.encoder = nn.Embedding(in_token,ninp)
self.decoder = nn.Embedding(out_token,ninp)
self.ninp = ninp
self.linear = nn.Linear(ninp,out_token)
decoder_layers = TransformerDecoderLayer(ninp,dropout)
self.transformer_decoder = TransformerDecoder(decoder_layers,norm = self.linear)
self.init_weights()
def generate_square_subsequent_mask(self,sz):
mask = (torch.triu(torch.ones(sz,sz)) == 1).transpose(0,1)
mask = mask.float().masked_fill(mask == 0,float('-inf')).masked_fill(mask == 1,float(0.0))
return mask
def init_weights(self):
initrange = 0.1
self.encoder.weight.data.uniform_(-initrange,initrange)
# self.decoder.bias.data.zero_()
self.decoder.weight.data.uniform_(-initrange,initrange)
def forward(self,src,trg):
src_mask = model.generate_square_subsequent_mask(src.size()[0]).to(device)
trg_mask = model.generate_square_subsequent_mask(trg.size()[0]).to(device)
src = self.encoder(src)
trg = self.decoder(trg)
src = self.pos_encoder(src)
trg = self.pos_encoder(trg)
output = self.transformer_encoder(src,mask = src_mask)
output = self.transformer_decoder(trg,output,tgt_mask = trg_mask)
return output
class PositionalEncoding(nn.Module):
def __init__(self,d_model,dropout=0.1,max_len=5000):
super(PositionalEncoding,self).__init__()
self.dropout = nn.Dropout(p=dropout)
pe = torch.zeros(max_len,d_model)
position = torch.arange(0,max_len,dtype=torch.float).unsqueeze(1)
div_term = torch.exp(torch.arange(0,2).float() * (-math.log(10000.0) / d_model))
#print(pe[:,0::2].size())
#print(pe[:,1::2].size())
#print((position*div_term).size())
pe[:,0::2] = torch.sin(position * div_term)
pe[:,1::2] = torch.cos(position * div_term)
pe = pe.unsqueeze(0).transpose(0,1)
self.register_buffer('pe',pe)
def forward(self,x):
x = x + self.pe[:x.size(0),:]
return self.dropout(x)
# this is the parametor of model
n_ntokens = len(SRC.vocab.stoi) # the size of vocabulary
out_ntokens = len(TRG.vocab.stoi)
emsize = 512 # embedding dimension
nhid = 512 # the dimension of the feedforward network model in nn.TransformerEncoder
nlayers = 1 # the number of nn.TransformerEncoderLayer in nn.TransformerEncoder
nhead = 2 # the number of heads in the multiheadattention models
dropout = 0.3 # the dropout value
model = TransformerModel(in_ntokens,out_ntokens,emsize,dropout).to(device)
这是生成句子的代码。
def gen_sentence(sentence,src_field,trg_field,model,max_len = 50):
model.eval()
tokens = [src_field.init_token] + \
tokenizer(sentence) + [src_field.eos_token]
src = [src_field.vocab.stoi[i] for i in tokens]
src = torch.LongTensor([src])
src = torch.t(src)
src = src.to(device)
src_tensor = model.encoder(src)
src_tensor = model.pos_encoder(src_tensor).to(device)
with torch.no_grad():
src_output = model.transformer_encoder(src_tensor)#,mask = src_mask)
trg = trg_field.vocab.stoi[trg_field.init_token]
trg = torch.LongTensor([[trg]]).to(device)
output = []
for i in range(max_len):
trg_tensor = model.decoder(trg)
trg_tensor = model.pos_encoder(trg_tensor).to(device)
trg_mask = model.generate_square_subsequent_mask(trg_tensor.size()[0]).to(device)
with torch.no_grad():
pred = model.transformer_decoder(trg_tensor,src_output,tgt_mask = trg_mask)
pred_word_index = pred.argmax(2)[-1]
output.append(pred_word_index)
if pred_word_index == trg_field.vocab.stoi[trg_field.eos_token]:
break
last_index = torch.LongTensor([[pred_word_index.item()]]).to(device)
trg = torch.cat((trg,last_index))
# predict = "".join(output)
predict = [trg_field.vocab.itos[i] for i in output]
predict = "".join(predict)
# print(predict)
return predict
# this is the code when I use this function
sentence = "あまりにも気温が高いということでそれをやめてですね電車で来ました "
gen_sentence(sentence,SRC,TRG,model)
这是输出。
<sos><sos><sos><sos><sos><sos><sos><sos><sos><sos><sos><sos><sos><sos><sos><sos><sos><sos><sos><sos><sos><sos><sos><sos><sos><sos><sos><sos><sos><sos><sos><sos><sos><sos><sos><sos><sos><sos><sos><sos><sos><sos><sos><sos><sos><sos><sos><sos><sos><sos>
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。