如何解决如何更正在 python 中存在值错误的代码?
我运行我的代码,但出现错误:
ValueError: matmul: Input operand 1 has an mismatch in its core dimension 0,with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 8 is different from 11)
我正在学习如何编程,请帮助我纠正这个错误。
这是我的完整代码,也许你可以在那里找到错误:
import math
import numpy as np
import pandas as pd
import seaborn as sn
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.svm import SVR
from sklearn.model_selection import GridSearchCV,cross_validate
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn import linear_model
data = pd.concat([pd.read_excel(r'training_data.xlsx')])
'''
Analizamos la distribución de las columnas,mediante
un box plot,para eliminar outliers
'''
for name in data.columns:
bp = sn.boxplot(x=data[name])
plt.title('boxplot para ' + name)
plt.show()
'''
Imprimimos la correlación de las variables
'''
correlation = data.corr()
sn.heatmap(correlation,annot=True)
plt.title('Pearson correlation')
plt.show()
'''
seleccionamos la data importante
'''
correlation = data.corr()
# Correlation with output variable
cor_target = abs(correlation["DIL"][1:])
# Selecting highly correlated features
relevant_features = cor_target[(cor_target > 0.10)]
columns = relevant_features.index
x = data[columns]
y = data.iloc[:,0]
columns_ = ['BUZAMIENTO','NIVEL','ANCHO','RQD','LONGITUD DE TAJO','DENSIDAD','BURDEN','ESPACIAMIENTO','ALTURA DE TAJO','RMR','TN/MPERF']
x_ = data[columns_]
y_ = data.iloc[:,0]
'''
análisis de regresión
'''
modelo= linear_model.LinearRegression()
modelo.fit(x,y)
'''
procedemos a normalizar la data
'''
scaler = MinMaxScaler(feature_range=(0,1))
scaler.fit(x)
x = scaler.transform(x)
scaler.fit(x_)
x_ = scaler.transform(x_)
'''
dividimos la data en 7/3
'''
seed = 42
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.3,random_state=seed)
x_train_,x_test_,y_train_,y_test_ = train_test_split(x_,y_,random_state=seed)
'''
exploración de un modelo de support vector machine para
un problema de regression y un kernel sigmoid,mediante un
grid search de sus hyper parámetros. Luego medimos resultados con
las métricas neg_mean_square_error y neg_mean_absolute_error
usando cross validation con 3 k-folds.
'''
def svr_model(x,kernel_name):
gsc = GridSearchCV(
estimator=SVR(kernel=kernel_name),param_grid={
'C': [0.01,0.1,1,100,1000],'epsilon': [0.0001,0.0005,0.001,0.005,0.01,0.05,0.5,5,10],'gamma': [0.0001,3,5]
},cv=5,scoring='neg_mean_squared_error',verbose=0,n_jobs=-1)
grid_result = gsc.fit(x,y)
best_params = grid_result.best_params_
best_svr = SVR(kernel=kernel_name,C=best_params["C"],epsilon=best_params["epsilon"],gamma=best_params["gamma"],coef0=0.09,shrinking=True,tol=0.001,verbose=False,max_iter=-1)
scoring = {'abs_error': 'neg_mean_absolute_error','squared_error': 'neg_mean_squared_error'}
scores = cross_validate(best_svr,x,cv=3,scoring=scoring,return_train_score=True)
print("support vector regression:")
print("MAE :",abs(scores['test_abs_error'].mean()),"| RMSE :",math.sqrt(abs(scores['test_squared_error'].mean())),"| Coef. Det. R2",modelo.score(x,y))
return best_svr
'''
exploración de un modelo ensemble de random forest para un problema
de regression usando Giny impudity,mediante un grid search de
sus hyper parámetros. Luego medimos resultados con
las métricas neg_mean_square_error y neg_mean_absolute_error
usando cross validation.
'''
def rfr_model(x,y):
# Perform Grid-Search
gsc = GridSearchCV(
estimator=RandomForestRegressor(),param_grid={
'max_depth': range(2,25),'n_estimators': (10,20,50,200,1000),},n_jobs=-1)
grid_result = gsc.fit(x,y)
best_params = grid_result.best_params_
rfr = RandomForestRegressor(max_depth=best_params["max_depth"],n_estimators=best_params["n_estimators"],random_state=False,verbose=False)
# Perform K-Fold CV
scoring = {'abs_error': 'neg_mean_absolute_error','squared_error': 'neg_mean_squared_error'}
scores = cross_validate(rfr,return_train_score=True)
print("random forest regression")
print("MAE :",y))
return rfr
# for name in ['linear','rbf','sigmoid']:
# print('kernel: ',name,svr_model(X,name))
svr = svr_model(x_train_,'sigmoid')
rfr = rfr_model(x_train,y_train)
svr.fit(x_train_,y_train_)
rfr.fit(x_train,y_train)
'''
imprimimos las predicciones del random forest vs las del svm vs la data
real.
'''
#fig = plt.figure()
plt.title('predicciones')
#ax = fig.add_axes([0,1])
#ax.bar(np.arange(len(y_test)),y_test,width=0.4)
y_predict1 = rfr.predict(x_test) #* 0.4 + svr.predict(X_test) * 0.6
y_predict2 = svr.predict(x_test_)
plt.plot(np.arange(len(y_predict1)),y_predict1,'y-')
plt.plot(np.arange(len(y_predict2)),y_predict2,'r-')
plt.show()
#fig.savefig('predicciones.svg',format='svg')
'''
leemos la data a evaluar y hacemos las transformaciones
respectivas
'''
data = pd.read_csv('evaluation_data.csv',';')
data_copy = data.copy()
data['BUZAMIENTO'] = 1 - np.cos(data['BUZAMIENTO'])
data['NIVEL'] = 1 / data['NIVEL']
data['ANCHO'] = 1 / (data['ANCHO'] ** 2)
data['RQD'] = np.log(data['RQD'])
data2 = pd.read_csv('evaluation_data.csv',';')
X_test = scaler.transform(data[columns])
X_test_ = scaler.transform(data[columns_])
data2['DIL'] = rfr.predict(X_test)
data2['DIL rfr'] = rfr.predict(X_test)
#data2['DIL rfr'] = data2['DIL']
data2['DIL svr'] = svr.predict(X_test_)
print(data2)
scoring = {'abs_error': 'neg_mean_absolute_error','squared_error': 'neg_mean_squared_error'}
scores_rfr = cross_validate(rfr,x_train,return_train_score=True)
scores_svr = cross_validate(svr,x_train_,return_train_score=True)
data2['rfr error %'] = 100 * (abs(scores_rfr['test_abs_error'].mean()) / y.mean())
data2['svr error %'] = 100 * (abs(scores_svr['test_abs_error'].mean()) / y_.mean())
data2.to_csv('evaluation_data.csv',';',index=False)
'''
mostramos la importancia de las variables estimadoras
en el random forest
'''
importances = rfr.feature_importances_
std = np.std([tree.feature_importances_ for tree in rfr.estimators_],axis=0)
indices = np.argsort(importances)[::-1]
# Plot the impurity-based feature importances of the forest
#fig = plt.figure()
plt.title("Feature importances")
plt.bar(range(x_train.shape[1]),importances[indices],color="r",yerr=std[indices],align="center")
plt.xticks(range(x_train.shape[1]),indices)
plt.xlim([-1,x.shape[1]])
plt.show()
#fig.savefig('feature_importances.svg',format='svg')
解决方法
我能够将您的代码用于不同的数据集,并绘制了箱线图。尝试使用不同的数据集。 还找到了这个答案: SO link for matmul error
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。