When running the MatPipe.fit command with the tpot algorithm on my regression dataset, I frequently encounter one of two errors. This post describes the first which is:
ValueError: Unsupported set of arguments: The combination of penalty=‘l2’ and loss=‘epsilon_insensitive’ are not supported when dual=False, Parameters: penalty=‘l2’, loss=‘epsilon_insensitive’, dual=False
It appears to me that the genetic algorithm is attempting to pass invalid combinations of arguments to sklearn models. This occurs in ‘debug’ and ‘express’ presets.
The full stack trace is:
_pre_test decorator: _random_mutation_operator: num_test=0 Found array with 0 feature(s) (shape=(22, 0)) while a minimum of 1 is required by MaxAbsScaler…
_pre_test decorator: _random_mutation_operator: num_test=1 Found array with 0 feature(s) (shape=(22, 0)) while a minimum of 1 is required by MaxAbsScaler…
_pre_test decorator: _random_mutation_operator: num_test=0 Found array with 0 feature(s) (shape=(22, 0)) while a minimum of 1 is required by MaxAbsScaler…
_pre_test decorator: _random_mutation_operator: num_test=0 Found array with 0 feature(s) (shape=(22, 0)) while a minimum of 1 is required by MaxAbsScaler…
_pre_test decorator: _random_mutation_operator: num_test=0 Found array with 0 feature(s) (shape=(22, 0)) while a minimum of 1 is required by MaxAbsScaler…
Pipeline encountered that has previously been evaluated during the optimization process. Using the score from the previous evaluation.
ValueError Traceback (most recent call last)
in
1 # Create Matpipe in ‘debug’ mode for quick test
2 dummy_pipe = MatPipe.from_preset(preset=“debug”, n_jobs=20)
----> 3 dummy_pipe.fit(df=train_df, target=target_name)
~/.local/lib/python3.6/site-packages/automatminer/utils/pkg.py in wrapper(*args, **kwargs)
102 def wrapper(*args, **kwargs):
103 args[0].is_fit = False
→ 104 result = func(*args, **kwargs)
105 args[0].is_fit = True
106 return result
~/.local/lib/python3.6/site-packages/automatminer/pipeline.py in fit(self, df, target)
182 df = self.cleaner.fit_transform(df, target)
183 df = self.reducer.fit_transform(df, target)
→ 184 self.learner.fit(df, target)
185 logger.info(“MatPipe successfully fit.”)
186 self.post_fit_df = df
~/.local/lib/python3.6/site-packages/automatminer/utils/log.py in wrapper(*args, **kwargs)
94 self = args[0]
95 logger.info(“{}Starting {}.”.format(self._log_prefix, operation))
—> 96 result = meth(*args, **kwargs)
97 logger.info(“{}Finished {}.”.format(self._log_prefix, operation))
98 return result
~/.local/lib/python3.6/site-packages/automatminer/utils/pkg.py in wrapper(*args, **kwargs)
102 def wrapper(*args, **kwargs):
103 args[0].is_fit = False
→ 104 result = func(*args, **kwargs)
105 args[0].is_fit = True
106 return result
~/.local/lib/python3.6/site-packages/automatminer/automl/adaptors.py in fit(self, df, target, **fit_kwargs)
135 self._features = df.drop(columns=target).columns.tolist()
136 self._fitted_target = target
→ 137 self._backend = self._backend.fit(X, y, **fit_kwargs)
138 return self
139
~/.local/lib/python3.6/site-packages/tpot/base.py in fit(self, features, target, sample_weight, groups)
744 # raise the exception if it’s our last attempt
745 if attempt == (attempts - 1):
→ 746 raise e
747 return self
748
~/.local/lib/python3.6/site-packages/tpot/base.py in fit(self, features, target, sample_weight, groups)
736
737 self._update_top_pipeline()
→ 738 self._summary_of_best_pipeline(features, target)
739 # Delete the temporary cache before exiting
740 self._cleanup_memory()
~/.local/lib/python3.6/site-packages/tpot/base.py in summary_of_best_pipeline(self, features, target)
860 with warnings.catch_warnings():
861 warnings.simplefilter(‘ignore’)
→ 862 self.pareto_front_fitted_pipelines[str(pipeline)].fit(features, target)
863
864 def predict(self, features):
~/.local/lib/python3.6/site-packages/sklearn/pipeline.py in fit(self, X, y, **fit_params)
352 self._log_message(len(self.steps) - 1)):
353 if self._final_estimator != ‘passthrough’:
→ 354 self._final_estimator.fit(Xt, y, **fit_params)
355 return self
356
~/.local/lib/python3.6/site-packages/sklearn/svm/classes.py in fit(self, X, y, sample_weight)
430 None, penalty, self.dual, self.verbose,
431 self.max_iter, self.tol, self.random_state, loss=self.loss,
→ 432 epsilon=self.epsilon, sample_weight=sample_weight)
433 self.coef = self.coef_.ravel()
434
~/.local/lib/python3.6/site-packages/sklearn/svm/_base.py in fit_liblinear(X, y, C, fit_intercept, intercept_scaling, class_weight, penalty, dual, verbose, max_iter, tol, random_state, multi_class, loss, epsilon, sample_weight)
933 dtype=np.float64)
934
→ 935 solver_type = get_liblinear_solver_type(multi_class, penalty, loss, dual)
936 raw_coef, n_iter = liblinear.train_wrap(
937 X, y_ind, sp.isspmatrix(X), solver_type, tol, bias, C,
~/.local/lib/python3.6/site-packages/sklearn/svm/_base.py in _get_liblinear_solver_type(multi_class, penalty, loss, dual)
791 raise ValueError('Unsupported set of arguments: %s, ’
792 'Parameters: penalty=%r, loss=%r, dual=r' --> 793 (error_string, penalty, loss, dual))
794
795