Python sklearn.base 模块,ClassifierMixin() 实例源码
我们从Python开源项目中,提取了以下15个代码示例,用于说明如何使用sklearn.base.ClassifierMixin()。
def _get_child_predict(self, clf, X, index=None):
if self.stack_by_proba and hasattr(clf, 'predict_proba'):
if self.save_stage0 and index is not None:
proba = util.saving_predict_proba(clf, index)
else:
proba = clf.predict_proba(X)
return proba[:, 1:]
elif hasattr(clf, 'predict'):
predict_result = clf.predict(X)
if isinstance(clf, ClassifierMixin):
lb = LabelBinarizer()
lb.fit(predict_result)
return lb.fit_transform(predict_result)
else:
return predict_result.reshape((predict_result.size, 1))
else:
return clf.fit_transform(X)
def test_tree_identical_labels():
rng = np.random.RandomState(0)
for est in estimators:
X = rng.randn(100, 5)
y = np.ones(100)
c_est = clone(est)
c_est.set_params(min_samples_split=2, max_depth=None)
c_est.fit(X, y)
assert_equal(c_est.tree_.n_node_samples, [100])
if isinstance(c_est, ClassifierMixin):
assert_equal(c_est.tree_.value, [[[100]]])
else:
assert_equal(c_est.tree_.value, [[[1.0]]])
X = np.reshape(np.linspace(0.0, 1.0, 100), (-1, 1))
y = np.array([0.0]*50 + [1.0]*50)
c_est.fit(X, y)
leaf_ids = c_est.tree_.children_left == -1
assert_true(np.any(c_est.tree_.n_node_samples[leaf_ids] > 2))
def test_tree_identical_labels():
rng = np.random.RandomState(0)
for ensemble in ensembles:
X = rng.randn(100, 5)
y = np.ones(100)
ensemble.fit(X, y)
for est in ensemble.estimators_:
assert_equal(est.tree_.n_node_samples, [100])
if isinstance(est, ClassifierMixin):
assert_equal(est.tree_.value, [[[100]]])
else:
assert_equal(est.tree_.value, 1))
y = np.array([0.0]*50 + [1.0]*50)
ensemble.fit(X, y)
for est in ensemble.estimators_:
leaf_ids = est.tree_.children_left == -1
assert_true(np.any(est.tree_.n_node_samples[leaf_ids] > 2))
def test_probabilities(model: ClassifierMixin, X: np.array, y: pd.Series,
bins: int = 10, threshold: float = 0.5):
"""Print confusion matrix based on class probability."""
probs = [p[1] for p in model.predict_proba(X)]
print('\tProbabilities')
df = pd.DataFrame({'prob': probs, 'label': y})
step = 1 / bins
cut_labels = [round(step * f, 1) for f in range(10)]
by_prob = (df.groupby(pd.cut(df['prob'], bins, labels=cut_labels))
.agg(['sum', 'count'])['label'])
print('\t\tprobs\t1\t0\tacc')
for index, row in by_prob.iloc[::-1].iterrows():
ones = row['sum']
if math.isnan(ones):
ones = 0
else:
ones = int(ones)
count = row['count']
zeros = int(count) - ones
if count > 0:
acc = zeros / count if index < threshold else ones / count
else:
acc = 0.0
print(f'\t\t{index}\t{ones}\t{zeros}\t{acc:.3f}')
def _get_blend_init(self, y_train, clf):
if self.stack_by_proba and hasattr(clf, 'predict_proba'):
width = self.n_classes_ - 1
elif hasattr(clf, 'predict') and isinstance(clf, ClassifierMixin):
width = self.n_classes_
elif hasattr(clf, 'predict'):
width = 1
elif hasattr(clf, 'n_components'):
width = clf.n_components
else:
raise Exception('Unimplemented for {0}'.format(type(clf)))
return np.zeros((y_train.size, width))
def __init__(self, metric='riemann', tsupdate=False,
clf=LogisticRegression()):
"""Init."""
self.metric = metric
self.tsupdate = tsupdate
self.clf = clf
if not isinstance(clf, ClassifierMixin):
raise TypeError('clf must be a ClassifierMixin')
TangentSpace(metric=self.metric, tsupdate=self.tsupdate)
def predict(self, check_input=True, return_std=False):
"""Predict class or regression value for X.
For a classification model,the predicted class for each sample in X is
returned. For a regression model,the predicted value based on X is
returned.
Parameters
----------
X : array-like or sparse matrix of shape = [n_samples,n_features]
The input samples. Internally,it will be converted to
``dtype=np.float32`` and if a sparse matrix is provided
to a sparse ``csr_matrix``.
check_input : boolean,(default=True)
Allow to bypass several input checking.
Don't use this parameter unless you know what you do.
return_std : boolean,(default=True)
Whether or not to return the standard deviation.
Returns
-------
y : array of shape = [n_samples] or [n_samples,n_outputs]
The predicted classes,or the predict values.
"""
check_is_fitted(self, 'tree_')
X = self._validate_X_predict(X, check_input)
# Classification
if isinstance(self, ClassifierMixin):
return self.classes_[self.predict_proba(X).argmax(axis=1)]
# Regression
else:
mean_and_std = self.tree_.predict(
X, return_std=return_std, is_regression=True)
if return_std:
return mean_and_std
return mean_and_std[0]
def test_numerical_stability():
X = np.array([
[152.08097839, 140.40744019, 129.75102234, 159.90493774],
[142.50700378, 135.81935120, 117.82884979, 162.75781250],
[127.28772736,
[132.37025452, 143.71923828, 138.35694885, 157.84558105],
[103.10237122, 143.71928406, 138.35696411, 157.84559631],
[127.71276855,
[120.91514587, 159.90493774]])
y = np.array(
[1., 0.70209277, 0.53896582, 0., 0.90914464, 0.48026916, 0.49622521])
with np.errstate(all="raise"):
for est in estimators:
new_est = clone(est)
if isinstance(est, ClassifierMixin):
y_curr = np.round(y)
else:
y_curr = y
new_est.fit(X, y_curr)
new_est.fit(X, -y_curr)
new_est.fit(-X, y_curr)
new_est.fit(-X, -y_curr)
new_est.partial_fit(X, y_curr)
new_est.partial_fit(-X, y_curr)
def variable_importance(estimator: Type[ClassifierMixin]) -> np.array:
"""Return variable importances for estimator."""
if hasattr(estimator, 'coef_'):
return estimator.coef_[0]
if hasattr(estimator, 'feature_importances_'):
return estimator.feature_importances_
def score(self, y):
"""Force use of accuracy score since we don't inherit
from ClassifierMixin"""
from sklearn.metrics import accuracy_score
return accuracy_score(y, self.predict(X))
def __init__(self, tsupdate=self.tsupdate)
def _generate_bases_test(est, pd_est):
def test(self):
self.assertTrue(isinstance(pd_est, FrameMixin), pd_est)
self.assertFalse(isinstance(est, FrameMixin))
self.assertTrue(isinstance(pd_est, base.BaseEstimator))
try:
mixins = [
base.ClassifierMixin,
base.ClusterMixin,
base.BiclusterMixin,
base.TransformerMixin,
base.DensityMixin,
base.MetaEstimatorMixin,
base.ClassifierMixin,
base.RegressorMixin]
except:
if _sklearn_ver > 17:
raise
mixins = [
base.ClassifierMixin,
base.RegressorMixin]
for mixin in mixins:
self.assertEqual(
isinstance(pd_est, mixin),
isinstance(est,
mixin)
return test
def get_params_for_est(estimator, name):
'''Choose initialization parameters for an estimator for auto-testing'''
is_classifier = ClassifierMixin in estimator.__mro__
is_cluster = ClusterMixin in estimator.__mro__
is_ensemble = BaseEnsemble in estimator.__mro__
uses_counts = any(c in name for c in USES_COUNTS)
as_1d = name in REQUIRES_1D
args, params, _ = get_args_kwargs_defaults(estimator.__init__)
est_keys = set(('estimator', 'base_estimator', 'estimators'))
est_keys = (set(params) | set(args)) & est_keys
if is_classifier:
score_func = feat.f_classif
else:
score_func = feat.f_regression
for key in est_keys:
if name == 'SelectFromModel':
params[key] = sklearn.linear_model.LassoCV()
elif is_classifier:
params[key] = sklearn.tree.DecisionTreeClassifier()
else:
params[key] = sklearn.tree.DecisionTreeRegressor()
if key == 'estimators':
params[key] = [(str(_), clone(params[key])) for _ in range(10)]
kw = dict(is_classifier=is_classifier, is_cluster=is_cluster,
is_ensemble=is_ensemble, uses_counts=uses_counts)
if 'score_func' in params:
params['score_func'] = score_func
X, y = make_X_y(**kw)
return X, y, kw
def train_model(data: ArticleDB,
learner: Type[ClassifierMixin],
param_grid: dict, *,
test_articles: Optional[ArticleDB] = None,
most_important_features: bool = False,
examples: bool = False,
ground_truth_as_test: bool = False,
probabilities: bool = False) -> ClassifierMixin:
"""Trains classifier learner on data and reports test set accuracy."""
if ground_truth_as_test and test_articles:
raise ValueError('ground_truth_as_test must be False if test_articles'
'are supplied')
if callable(learner):
learner = learner()
X, y = data.X, data.y
if ground_truth_as_test or test_articles:
X_train = X
y_train = y
if ground_truth_as_test:
X_test = data.ground_truth_X
y_test = data.ground_truth_y
df_test = data.ground_truth
elif test_articles:
X_test = test_articles.X
y_test = test_articles.y
df_test = test_articles.df
else:
X_train, X_test, y_test, df_train, df_test = (
train_test_split(X, data.df, test_size=0.2))
model = GridSearchCV(learner, param_grid).fit(X_train, y_train)
best_model = model.best_estimator_
preds = best_model.predict(X_test)
conf_mat = confusion_matrix(y_test, preds, labels=[1, 0])
accuracy = np.mean(y_test == preds)
learner_repr = repr(learner)[:repr(learner).find('(')]
print(f'{learner_repr} with parameters {model.best_params_}:')
print(f'\tval-accuracy: {model.best_score_}')
print(f'\ttest-accuracy: {accuracy}')
print(f'\tconfusion matrix: [{conf_mat[0]}')
print(f'\t {conf_mat[1]}]')
var_imp = variable_importance(model.best_estimator_)
if most_important_features:
print_top_vars(var_imp, 50, data.feature_names)
if examples:
article_examples(df_test, preds)
if probabilities and hasattr(best_model, 'predict_proba'):
test_probabilities(best_model, y_test)
return best_model
def evaluate(self, point):
"""
Fits model using the particular setting of hyperparameters and
evaluates the model validation data.
Parameters
----------
* `point`: dict
A mapping of parameter names to the corresponding values
Returns
-------
* `score`: float
Score (more is better!) for some specific point
"""
X_train, y_test = (
self.X_train, self.y_train, self.X_test, self.y_test)
# apply transformation to model parameters,for example exp transformation
point_mapped = {}
for param, val in point.items():
point_mapped[param] = self.space[param][1](val)
model_instance = self.model(**point_mapped)
if 'random_state' in model_instance.get_params():
model_instance.set_params(random_state=self.random_state)
min_obj_val = -5.0
# Infeasible parameters are expected to raise an exception,thus the try
# catch below,infeasible parameters yield assumed smallest objective.
try:
model_instance.fit(X_train, y_train)
if isinstance(model_instance, RegressorMixin): # r^2 metric
y_predicted = model_instance.predict(X_test)
score = r2_score(y_test, y_predicted)
elif isinstance(model_instance, ClassifierMixin): # log loss
y_predicted = model_instance.predict_proba(X_test)
score = -log_loss(y_test, y_predicted) # in the context of this function,the higher score is better
# avoid any kind of singularitites,eg probability being zero,and thus breaking the log_loss
if math.isnan(score):
score = min_obj_val
score = max(score, min_obj_val) # this is necessary to avoid -inf or NaN
except BaseException as ex:
score = min_obj_val # on error: return assumed smallest value of objective function
return score
# this is necessary to generate table for README in the end
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。