如何解决In [0]和In [1]必须具有兼容的批处理尺寸:[64,32,32,128]与[128,32,32,64]
我正在使用tensorflow和keras(带有Python3的TensorFlow(+ Keras2)(CUDA 10.0和Intel MKL-DNN)),但遇到批次尺寸不兼容的问题,但是我不知道哪一部分出错了。我将不胜感激。我使用的数据是MNIST和USPS。 错误就像
Traceback (most recent call last):
File "/home/ubuntu/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tensorflow_core/python/client/session.py",line 1365,in _do_call
return fn(*args)
File "/home/ubuntu/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tensorflow_core/python/client/session.py",line 1350,in _run_fn
target_list,run_metadata)
File "/home/ubuntu/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tensorflow_core/python/client/session.py",line 1443,in _call_tf_sessionrun
run_metadata)
tensorflow.python.framework.errors_impl.InvalidArgumentError: 2 root error(s) found.
(0) Invalid argument: In[0] and In[1] must have compatible batch dimensions: [64,32,128] vs. [128,64]
[[{{node MatMul}}]]
[[Mean/_67]]
(1) Invalid argument: In[0] and In[1] must have compatible batch dimensions: [64,64]
[[{{node MatMul}}]]
0 successful operations.
0 derived errors ignored.
During handling of the above exception,another exception occurred:
Traceback (most recent call last):
File "Deep_CORAL_Keras_MNIST.py",line 343,in <module>
model.fit(Xs,ys,Xt,yt)
File "Deep_CORAL_Keras_MNIST.py",line 255,in fit
feed_dict=feed_dict)
File "/home/ubuntu/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tensorflow_core/python/client/session.py",line 956,in run
run_metadata_ptr)
File "/home/ubuntu/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tensorflow_core/python/client/session.py",line 1180,in _run
feed_dict_tensor,options,line 1359,in _do_run
run_metadata)
File "/home/ubuntu/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tensorflow_core/python/client/session.py",line 1384,in _do_call
raise type(e)(node_def,op,message)
tensorflow.python.framework.errors_impl.InvalidArgumentError: 2 root error(s) found.
(0) Invalid argument: In[0] and In[1] must have compatible batch dimensions: [64,64]
[[node MatMul (defined at /home/ubuntu/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tensorflow_core/python/framework/ops.py:1748) ]]
[[Mean/_67]]
(1) Invalid argument: In[0] and In[1] must have compatible batch dimensions: [64,64]
[[node MatMul (defined at /home/ubuntu/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tensorflow_core/python/framework/ops.py:1748) ]]
0 successful operations.
0 derived errors ignored.
下面是代码
import tensorflow as tf
import numpy as np
import pandas as pd
from sklearn.metrics import log_loss
from tensorflow.python.framework import ops
from keras import backend as K
from keras.layers import Dense,Dropout,Activation,GaussianNoise,Flatten
from keras.layers import Conv2D,MaxPooling2D,MaxPool2D
from keras.layers.normalization import BatchNormalization
from keras.layers.advanced_activations import PReLU,ELU,LeakyReLU
K.set_image_data_format('channels_last')
N_CLASS = 10
def shuffle_aligned_list(data):
"""Shuffle arrays in a list by shuffling each array identically."""
idx = data[0].shape[0]
p = np.random.permutation(idx)
return [d[p] for d in data]
def batch_gen(data,batch_size,shuffle=True):
"""Generate batches of data.
Given a list of array-like objects,generate batches of a given
size by yielding a list of array-like objects corresponding to the
same slice of each input.
"""
if shuffle:
data = shuffle_aligned_list(data)
batch_count = 0
while True:
if batch_count * batch_size + batch_size >= len(data[0]):
batch_count = 0
if shuffle:
data = shuffle_aligned_list(data)
start = batch_count * batch_size
end = start + batch_size
batch_count += 1
yield [d[start:end] for d in data]
def val_batch_gen(data,batch_size):
"""Generate batches of data.
Given a list of array-like objects,generate batches of a given
size by yielding a list of array-like objects corresponding to the
same slice of each input.
"""
batch_count = 0
# while True:
nbatch = len(data[0]) // batch_size
if nbatch * batch_size < len(data[0]): nbatch += 1
for i in range(nbatch):
start = batch_count * batch_size
end = start + batch_size
batch_count += 1
yield [d[start:end] for d in data]
class DeepCoralNet(object):
def __init__(self,nfeatures=50,arch=[8,'act'],coral_layer_idx=[1],batch_size=16,supervised=False,confusion=1e4,confusion_incr=50,confusion_max=1e9,val_data=None,validate_every=1,activations='relu',epochs=1000,optimizer=None,noise=0.0,droprate=0.0,verbose=False):
self.batch_size = batch_size
self.epochs = epochs
self.validate_every = validate_every
self.supervised = supervised
self.verbose = verbose
if val_data is None:
self.validate_every = 0
else:
self.Xval = val_data[0]
self.yval = val_data[1]
self._build_model(nfeatures,arch,supervised,confusion,confusion_incr,confusion_max,activations,noise,droprate,coral_layer_idx,optimizer)
self.sess = tf.Session()
K.set_session(self.sess)
self.sess.run(tf.global_variables_initializer())
def _coral_loss(self,layer_a,layer_b):
d = tf.cast(tf.shape(layer_a)[1],tf.float32)
# Source covariance
xm = layer_a - tf.reduce_mean(layer_a,keep_dims=True)
xc = tf.matmul(tf.transpose(xm),xm) / self.batch_size
# Target covariance
xmt = layer_b - tf.reduce_mean(layer_b,keep_dims=True)
xct = tf.matmul(tf.transpose(xmt),xmt) / self.batch_size
coral_loss = tf.reduce_sum(tf.multiply((xc - xct),(xc - xct)))
coral_loss /= 4 * d * d
return coral_loss
def _build_model(self,nfeatures,architecture,optimizer):
# self.inp_a = tf.placeholder(tf.float32,shape=(None,nfeatures))
# self.inp_b = tf.placeholder(tf.float32,nfeatures))
self.inp_a = tf.placeholder(tf.float32,3))
self.inp_b = tf.placeholder(tf.float32,3))
self.labels_a = tf.placeholder(tf.float32,N_CLASS))
self.lr = tf.placeholder(tf.float32,[],name='lr')
nlayers = len(architecture)
layers_a = [self.inp_a]
layers_b = [self.inp_b]
for i,nunits in enumerate(architecture):
# print
# nunits,# if i in coral_layer_idx: print
# '(CORAL)'
# else: print
if isinstance(nunits,int):
shared_layer = Dense(nunits,activation='linear')
elif nunits == 'noise':
shared_layer = GaussianNoise(noise)
elif nunits == 'bn':
shared_layer = BatchNormalization()
elif nunits == 'drop':
shared_layer = Dropout(droprate)
elif nunits == 'act':
if activations == 'prelu':
shared_layer = PReLU()
elif activations == 'elu':
shared_layer = ELU()
elif activations == 'leakyrelu':
shared_layer = LeakyReLU()
else:
shared_layer = Activation(activations)
elif nunits == 'block1_conv1':
shared_layer =Conv2D(64,(3,3),activation='relu',padding='same',name='block1_conv1')
elif nunits == 'block1_conv2':
shared_layer += Conv2D(64,name='block1_conv2')
elif nunits == 'block1_pool':
shared_layer += MaxPool2D((2,2),strides=(2,name='block1_pool')
layers_a += [shared_layer(layers_a[-1])]
layers_b += [shared_layer(layers_b[-1])]
layers_a += [Flatten()(layers_a[-1])]
layers_b += [Flatten()(layers_b[-1])]
output_layer = Dense(N_CLASS,activation='linear')
y_logits = output_layer(layers_a[-1])
b_logits = output_layer(layers_b[-1])
self.y_clf = Activation('sigmoid')(y_logits)
# Sum the losses from both branches...
self.xe_loss = tf.reduce_mean(
tf.nn.sigmoid_cross_entropy_with_logits(labels=self.labels_a,logits=y_logits))
self.coral_losses = []
for idx in coral_layer_idx:
self.coral_losses += [self._coral_loss(layers_a[idx],layers_b[idx])]
self.coral_losses += [self._coral_loss(y_logits,b_logits)]
self.domain_loss = tf.reduce_sum(self.coral_losses)
self.confusion = tf.Variable(float(confusion),trainable=False,dtype=tf.float32)
conf_incr = tf.cond(self.confusion < confusion_max,lambda: float(confusion_incr),lambda: 0.)
self.increment_confusion = tf.assign(self.confusion,self.confusion + conf_incr)
self.domain_loss = self.confusion * self.domain_loss
self.total_loss = tf.add(self.domain_loss,self.xe_loss)
if supervised:
self.labels_b = tf.placeholder(tf.float32,1))
self.bloss = tf.reduce_mean(
tf.nn.sigmoid_cross_entropy_with_logits(labels=self.labels_b,logits=b_logits))
self.total_loss = tf.add(self.total_loss,self.bloss)
if optimizer is None:
self.train_step = tf.train.MomentumOptimizer(self.lr,0.9)
else:
self.train_step = optimizer
self.train_step = self.train_step.minimize(self.total_loss)
def predict_proba(self,X,batch_size=None):
if batch_size is None: batch_size = self.batch_size
yprobs = np.zeros((X.shape[0]),N_CLASS,dtype=float)
idx = np.arange(X.shape[0])
vbatch = val_batch_gen([idx,X],batch_size)
for i,(thisidx,thisX) in enumerate(vbatch):
yprobs[thisidx] = self.sess.run(self.y_clf,feed_dict={self.inp_a: thisX,K.learning_phase(): 0}).flatten()
return yprobs
def evaluate(self,y,batch_size=None):
yprobs = self.predict_proba(X,batch_size)
acc = np.mean(np.equal(np.argmax(self.predict_proba(X,batch_size).astype("float32"),axis=1),np.argmax(y,axis=1)))
return log_loss(y,yprobs),acc
def fit(self,Xs,yt=None,Xval=None,yval=None,epochs=None,batch_size=None,verbose=None):
if epochs is None: epochs = self.epochs
if batch_size is None: batch_size = self.batch_size
if Xval is None:
Xval = self.Xval
yval = self.yval
if verbose is None: verbose = self.verbose
S_batches = batch_gen([Xs,ys],batch_size=batch_size)
if yt is None: yt = np.ones(Xt.shape[0])
T_batches = batch_gen([Xt,yt],batch_size=batch_size)
self.history = {'source_loss': [],'target_loss': [],'val_loss': [],'domain_loss': []}
for i in range(epochs):
p = i / float(epochs)
lr = 0.01 / (1 + 10. * p) ** 0.75
Xsource,ysource = next(S_batches)#.next()
Xtarget,ytarget = next(T_batches)#.next()
feed_dict = {self.inp_a: Xsource,self.inp_b: Xtarget,# self.labels_a: ysource.reshape(-1,1),K.learning_phase(): 1,self.labels_a: ysource,self.lr: lr}
if self.supervised:
feed_dict[self.labels_b] = ytarget #.reshape(-1,1)
# train
_,_,xeloss,dloss,tloss = self.sess.run([
self.train_step,self.increment_confusion,self.confusion,self.xe_loss,self.domain_loss,self.total_loss],feed_dict=feed_dict)
if self.validate_every > 0 and i % self.validate_every == 0:
if i == 0:
print ('Epoch confusion dloss tloss sloss tloss vloss')
self.history['source_loss'] += [self.evaluate(Xs,ys)]
self.history['target_loss'] += [self.evaluate(Xt,yt)]
self.history['val_loss'] += [self.evaluate(Xval,yval)]
self.history['domain_loss'] += [dloss]
print ('{:04d} {:.2f} {:.4E} {:.4f} {:.5f} {:.5f} {:.5f} '.format(i,tloss,self.history['source_loss'][-1],self.history['target_loss'][-1],self.history['val_loss'][-1]))
print('{:04d} {:.2f} {:.4E} {:.4f} {:.5f} {:.5f} {:.5f} '.format(i,self.evaluate(Xs,ys)[1],self.evaluate(Xt,yt)[1],self.evaluate(Xval,yval)[1]))
# input data
from tensorflow.examples.tutorials.mnist import input_data
def return_mnist(path_train,path_test):
mnist = input_data.read_data_sets('MNIST_data',one_hot=True)
mnist_train = np.reshape(np.load(path_train),(55000,1))
mnist_train = np.reshape(mnist_train,1))
mnist_train = mnist_train.astype(np.float32)
mnist_test = np.reshape(np.load(path_test),(10000,1)).astype(
np.float32)
mnist_test = np.reshape(mnist_test,1))
mnist_train = np.concatenate([mnist_train,mnist_train,mnist_train],3)
mnist_test = np.concatenate([mnist_test,mnist_test,mnist_test],3)
return mnist_train,mnist.train.labels,mnist.test.labels
path_mnist_train = '/home/miles/atda-master/train_mnist_32x32.npy'
path_mnist_test = '/home/miles/atda-master/test_mnist_32x32.npy'
import cv2
import pickle as pkl
def __resize_array_images(array_images,size):
new_array = []
for i in range(len(array_images)):
img = cv2.resize( array_images[i],(size,size),interpolation = cv2.INTER_CUBIC )
new_array.append( img )
return np.array(new_array)
path = '/home/miles/atda-master/usps.h5'
import h5py
with h5py.File(path,'r') as hf:
train = hf.get('train')
X_tr = train.get('data')[:]
y_tr = train.get('target')[:]
test = hf.get('test')
X_te = test.get('data')[:]
y_te = test.get('target')[:]
X_tr = np.reshape(X_tr,[X_tr.shape[0],16,1])
X_tr = __resize_array_images(X_tr,32)
X_tr = np.expand_dims(X_tr,-1)
X_tr = np.concatenate([X_tr,X_tr,X_tr],3)
X_te = np.reshape(X_te,[X_te.shape[0],1])
X_te = __resize_array_images(X_te,32)
X_te = np.expand_dims(X_te,-1)
X_te = np.concatenate([X_te,X_te,X_te],3)
y_tr = tf.keras.utils.to_categorical(y_tr,10)
y_te = tf.keras.utils.to_categorical(y_te,10)
Xt = X_tr
yt = y_tr
Xv = X_te
yv = y_te
_,mnist_test_label = return_mnist(path_mnist_train,path_mnist_test)
from sklearn.model_selection import train_test_split
Xs,_ = train_test_split(
mnist_test,mnist_test_label,test_size=0.3)
opt = tf.train.MomentumOptimizer(1e-3,0.9)
K.set_image_data_format('channels_last')
model = DeepCoralNet(nfeatures=Xs.shape[1],arch=['block1_conv1'],val_data=(Xv,yv),epochs=10000,batch_size=128,validate_every=100,optimizer=opt,activations='leakyrelu')
model.fit(Xs,yt)
vloss_grl = model.evaluate(Xv,yv)
解决方法
您正在transpose
之前执行matmul
操作,即tf.matmul(tf.transpose(xm),xm)
在您的代码中,这就是您的维度被重新排列的原因。您能否参考这份tf.linalg.matmul张量流文档。
重新创建问题的代码:
import tensorflow as tf
a = tf.constant([1,2,3,4,5,6,7,8,9,10,11,12],shape=[2,2])
print(a)
b = tf.constant([1,2])
print(b)
x = tf.matmul(a,b)
print(x)
输出-
tf.Tensor(
[[[ 1 2]
[ 3 4]
[ 5 6]]
[[ 7 8]
[ 9 10]
[11 12]]],shape=(2,2),dtype=int32)
tf.Tensor(
[[[ 1 2]
[ 3 4]
[ 5 6]]
[[ 7 8]
[ 9 10]
[11 12]]],dtype=int32)
---------------------------------------------------------------------------
InvalidArgumentError Traceback (most recent call last)
<ipython-input-18-8f20ea7be877> in <module>()
7 print(b)
8
----> 9 x = tf.matmul(a,b)
10 print(x)
4 frames
/usr/local/lib/python3.6/dist-packages/six.py in raise_from(value,from_value)
InvalidArgumentError: In[0] mismatch In[1] shape: 2 vs. 3: [2,2] [2,2] 0 0 [Op:BatchMatMulV2]
在进行任何换位后,输入必须是等级> = 2的张量,其中内部2维指定有效的矩阵乘法维。
固定代码:
import tensorflow as tf
a = tf.constant([1,3])
print(b)
x = tf.matmul(a,dtype=int32)
tf.Tensor(
[[[ 1 2 3]
[ 4 5 6]]
[[ 7 8 9]
[10 11 12]]],3),dtype=int32)
tf.Tensor(
[[[ 9 12 15]
[ 19 26 33]
[ 29 40 51]]
[[129 144 159]
[163 182 201]
[197 220 243]]],dtype=int32)
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。