如何解决UndefinedMetricWarning:调用和F分数定义不正确,在没有真实样本的标签中设置为0.0
required data set is here please click 当我使用scikit learning运行DecisionTreeClassifier并将sklearn.metrics用于metrics.classification_report时,我得到以下错误警告:
/srv/conda/envs/notebook/lib/python3.7/site-packages/sklearn/metrics/classification.py:1439:
UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples.
'recall','true',average,warn_for)
在这行代码中
print('Classification report : \n',metrics.classification_report(Y_test,Y_pred))
请提出如何解决此问题的建议
下面是我的工作代码:
import pandas as pd
import numpy as np
import sklearn
import sklearn.preprocessing as pp
import sklearn
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import metrics
m_test = pd.read_csv('matchestest.csv')
m_train = pd.read_csv('matchestrain.csv',index_col=0)
m_train.head()
m_train.team1.unique()
m_train.winner[m_train.winner.isna()] =-1
m_train.winner[m_train.winner==-1]
#m_train.fillna('NaN',inplace=True)
g = ['Chennai Super Kings','Kolkata Knight Riders','Delhi Capitals','Kings XI Punjab','Mumbai Indians','Sunrisers Hyderabad','Rajasthan Royals','Royal Challengers Bangalore','Pune Warriors','Gujarat Lions','Rising Pune Supergiants','Deccan Chargers','Kochi Tuskers Kerala','nan']
k= [0,1,2,3,4,5,6,7,8,9,10,11,12,-1]
m_train.winner = m_train.winner.replace(g,k)
m_train.team1=m_train.team1.replace(g,k)
m_train.team2=m_train.team2.replace(g,k)
m_test.team1=m_test.team1.replace(g,k,)
m_test.team2=m_test.team2.replace(g,k)
m_train.toss_winner=m_train.toss_winner.replace(g,k)
m_test.toss_winner=m_test.toss_winner.replace(g,k)
m_train['winner']=m_train['winner'].astype(int)
m_train.dtypes
m_train.winner[m_train['winner']==-1]
m_train.drop(['season','umpire1','umpire2','umpire3','date'],inplace =True,axis = 1)
m_test.drop(['season',axis = 1)
m_test.head()
m_train.city.replace('Bangalore','Bengaluru',inplace = True)
m_test.city.replace('Bangalore',inplace = True)
m_train.fillna('NaN',inplace=True)
m_test.fillna('NaN',inplace=True)
m_train.city.dtype
labelencoder = pp.LabelEncoder()
label_city = labelencoder.fit(m_train.city)
m_train.city = label_city.transform(m_train.city)
m_test.city = label_city.transform(m_test.city)
label_venue = labelencoder.fit(m_train.venue)
m_train.venue = label_venue.transform(m_train.venue)
m_test.venue = label_venue.transform(m_test.venue)
x_m_train_player =m_train.player_of_match.unique()
x_m_test_player = m_test.player_of_match.unique()
x_m = np.concatenate([x_m_train_player,x_m_test_player])
x_p = set(x_m)
x_p = list(x_p)
print(x_p)
label_player_of_match = labelencoder.fit(x_p)
m_train.player_of_match = label_player_of_match.transform(m_train.player_of_match)
m_test.player_of_match = label_player_of_match.transform(m_test.player_of_match)
print(np.count_nonzero(x_m_train_player))
print(np.count_nonzero(x_m_test_player))
print(np.count_nonzero(x_m))
print(np.count_nonzero(x_p))
label_toss_decision = labelencoder.fit(m_train.result)
m_train.result = label_toss_decision.transform(m_train.result)
m_test.result = label_toss_decision.transform(m_test.result)
label_t_d =labelencoder.fit(m_train.toss_decision)
m_train.toss_decision = label_t_d.transform(m_train.toss_decision)
m_test.toss_decision = label_t_d.transform(m_test.toss_decision)
m_train.head(10)
m_test.head(10)
target = m_train.winner
m_train.drop('winner',inplace=True,axis=1)
target.head()
print(m_train_imputed.shape)
print(m_test.shape)
m_train.iloc[0:,1:].shape
m_test.head()
imputer = pp.Imputer(missing_values='NaN',strategy='most_frequent',axis=1)
imputer = imputer.fit(m_train.iloc[0:,1:])
m_train_imputed = imputer.transform(m_train.iloc[0:,1:])
#m_train.fillna(m_train.mean(),inplace=True)
m_train_imputed.shape
m_train_imputed.shape
m_test_model = m_test.iloc[0:,1:]
m_test_model.shape
target.shape
X_train,X_test,Y_train,Y_test = train_test_split(m_train_imputed,target,shuffle=True)
from sklearn.tree import DecisionTreeClassifier
dt_classifier = DecisionTreeClassifier(max_depth=13)
dt_classifier = dt_classifier.fit(X_train,Y_train)
print(dt_classifier.score(X_train,Y_train))
print(dt_classifier.score(X_test,Y_test))
print('------------------')
result = dt_classifier.predict(m_test_model)
for i,j in enumerate(m_test['id']):
print(j,',result[i])
from sklearn import metrics
Y_pred = dt_classifier.predict(X_test)
print('Classification report : \n',Y_pred))
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。