Tensorflow自定义层在编译模型时不起作用

如何解决Tensorflow自定义层在编译模型时不起作用

我正在尝试更改此page

中的转换器代码

这是我的代码，我添加了“ calculate_class_attention”功能。计算查询词和预定义矩阵之间的注意力权重：

class MultiHeadSelfAttentionWithClass(layers.Layer):
def __init__(self,embed_dim,num_heads=8,**kwargs):
    super(MultiHeadSelfAttentionWithClass,self).__init__(**kwargs)
    self.embed_dim = embed_dim
    self.num_heads = num_heads
    if embed_dim % num_heads != 0:
        raise ValueError(
            f"embedding dimension = {embed_dim} should be divisible by number of heads = {num_heads}"
        )
    self.projection_dim = embed_dim // num_heads
    self.query_dense = layers.Dense(embed_dim)
    self.key_dense = layers.Dense(embed_dim)
    self.value_dense = layers.Dense(embed_dim)
    self.class_dense=layers.Dense(embed_dim)
    self.combine_heads = layers.Dense(embed_dim)
    self.class_convert = layers.Dense(self.projection_dim)

def calculate_class_attention(self,query,batch_size):

  class_embedding_batch=tf.expand_dims(class_embedding_tensor,0)
  class_embedding_batch=tf.repeat(class_embedding_batch,batch_size,0)
  class_matrix=self.class_dense(class_embedding_batch)
  class_matrix = self.separate_heads(
        class_matrix,batch_size
    )  # (batch_size,num_heads,seq_len,projection_dim)
  score = tf.matmul(query,class_matrix,transpose_b=True)
  dim_key = tf.cast(tf.shape(class_matrix)[-1],tf.float32)
  scaled_score = score / tf.math.sqrt(dim_key)
  weights = tf.nn.softmax(scaled_score,axis=-1)
  output= self.class_convert(weights)      
  return output

def attention(self,key,value,batch_size):

    score = tf.matmul(query,transpose_b=True)
    dim_key = tf.cast(tf.shape(key)[-1],tf.float32)
    scaled_score = score / tf.math.sqrt(dim_key)
    weights = tf.nn.softmax(scaled_score,axis=-1)
    output = tf.matmul(weights,value)
    class_attention=self.calculate_class_attention(query,batch_size)
    return output+class_attention,weights

def separate_heads(self,x,batch_size):
    x = tf.reshape(x,(batch_size,-1,self.num_heads,self.projection_dim))
    return tf.transpose(x,perm=[0,2,1,3])

def call(self,inputs):
    # x.shape = [batch_size,embedding_dim]
    batch_size = tf.shape(inputs)[0]
    query = self.query_dense(inputs)  # (batch_size,embed_dim)
    key = self.key_dense(inputs)  # (batch_size,embed_dim)
    value = self.value_dense(inputs)  # (batch_size,embed_dim)
    query = self.separate_heads(
        query,projection_dim)
    key = self.separate_heads(
        key,projection_dim)
    value = self.separate_heads(
        value,projection_dim)
    attention,weights = self.attention(query,batch_size)
    attention = tf.transpose(
        attention,3]
    )  # (batch_size,projection_dim)
    concat_attention = tf.reshape(
        attention,self.embed_dim)
    )  # (batch_size,embed_dim)
    output = self.combine_heads(
        concat_attention
    )  # (batch_size,embed_dim)
    return output

如果我手动将数据馈送到图层，则该图层可以正常工作，但是当我构建模型时，在“ calculate_class_attention”功能的最后一行的“ output = self.class_convert（weights）”行，错误提示：

<ipython-input-39-14e423d44519> in <module>()
----> 1 model=build_model()
  2 mcp_save = ModelCheckpoint('/content/drive/My Drive/paper 2/mdl_wts.hdf5',save_best_only=True,monitor='val_macro',mode='max')
  3 
  4 model.compile(loss=tf.keras.losses.BinaryCrossentropy(),5           optimizer=tf.keras.optimizers.Adam(0.0001),3 frames
/usr/local/lib/python3.6/dist-packages/tensorflow/python/autograph/impl/api.py in wrapper(*args,**kwargs)
256       except Exception as e:  # pylint:disable=broad-except
257         if hasattr(e,'ag_error_metadata'):
--> 258           raise e.ag_error_metadata.to_exception(e)
259         else:
260           raise

ValueError: in user code:

<ipython-input-33-14beaa39b909>:132 call  *
    attn_output = self.att(inputs)
<ipython-input-31-22858698552e>:93 call  *
    attention,inputs[1])
<ipython-input-31-22858698552e>:71 attention  *
    class_attention=self.calculate_class_attention(query,class_emb)
<ipython-input-37-4e225d27205c>:59 calculate_class_attention  *
    output= self.class_convert(weights)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/base_layer.py:926 __call__  **
    input_list)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/base_layer.py:1098 _functional_construction_call
    self._maybe_build(inputs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/base_layer.py:2643 _maybe_build
    self.build(input_shapes)  # pylint:disable=not-callable
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/layers/core.py:1168 build
    raise ValueError('The last dimension of the inputs to `Dense` '

ValueError: The last dimension of the inputs to `Dense` should be defined. Found `None`.

如果我尝试构建模型，但是如果我首先使用以下示例数据运行transformer_block，则会发生此错误：

q=embedding_layer(np.zeros((5,512)))
transformer_block(q)

，然后尝试运行模型的其余部分。它构建成功。我有什么想念的吗？

Tensorflow自定义层在编译模型时不起作用

如何解决Tensorflow自定义层在编译模型时不起作用

相关推荐