I am using automatminer from the express preset.
During the run, I frequently see
_pre_test decorator: _random_mutation_operator: num_test=1 Found array with 0 feature(s) (shape=(22, 0)) while a minimum of 1 is required by RobustScaler…
ultimately leading to
ValueError: Found array with 0 feature(s) (shape=(25, 0)) while a minimum of 1 is required by RobustScaler.
This error does not only occur with RobustScaler, I see it with the other sklearn methods as well.
Here is the log leading up to tpot showing there should be features available:
2020-08-26 10:39:15 INFO DataCleaner: After handling na: 25 samples, 4147 features
2020-08-26 10:39:15 INFO DataCleaner: Finished fitting.
2020-08-26 10:39:15 INFO FeatureReducer: Starting fitting.
2020-08-26 10:39:38 INFO FeatureReducer: 1385 features removed due to cross correlation more than 0.95
2020-08-26 10:39:50 INFO TreeFeatureReducer: Finished tree-based feature reduction of 2761 initial features to 78
2020-08-26 10:39:50 INFO FeatureReducer: Finished fitting.
2020-08-26 10:39:50 INFO FeatureReducer: Starting transforming.
2020-08-26 10:39:50 INFO FeatureReducer: Finished transforming.
2020-08-26 10:39:50 INFO TPOTAdaptor: Starting fitting.
27 operators have been imported by TPOT.
There are also pareto front scores shown during the process:
Generation 1 - Current Pareto front scores:
-3 -64.88276749444293 RidgeCV(ZeroCount(SelectFwe(input_matrix, SelectFwe__alpha=0.03)))
…
Generation 10 - Current Pareto front scores:
-3 -44.85358622288145 RidgeCV(RobustScaler(VarianceThreshold(input_matrix, VarianceThreshold__threshold=0.01)))
…
Full stack trace:
ValueError Traceback (most recent call last)
in
1 # Create Matpipe in ‘express’ mode for recommended settings
2 pipe = MatPipe.from_preset(preset=“express”, n_jobs=22)
----> 3 pipe.fit(df=train_df, target=target_name)
~/.local/lib/python3.6/site-packages/automatminer/utils/pkg.py in wrapper(*args, **kwargs)
102 def wrapper(*args, **kwargs):
103 args[0].is_fit = False
→ 104 result = func(*args, **kwargs)
105 args[0].is_fit = True
106 return result
~/.local/lib/python3.6/site-packages/automatminer/pipeline.py in fit(self, df, target)
182 df = self.cleaner.fit_transform(df, target)
183 df = self.reducer.fit_transform(df, target)
→ 184 self.learner.fit(df, target)
185 logger.info(“MatPipe successfully fit.”)
186 self.post_fit_df = df
~/.local/lib/python3.6/site-packages/automatminer/utils/log.py in wrapper(*args, **kwargs)
94 self = args[0]
95 logger.info(“{}Starting {}.”.format(self._log_prefix, operation))
—> 96 result = meth(*args, **kwargs)
97 logger.info(“{}Finished {}.”.format(self._log_prefix, operation))
98 return result
~/.local/lib/python3.6/site-packages/automatminer/utils/pkg.py in wrapper(*args, **kwargs)
102 def wrapper(*args, **kwargs):
103 args[0].is_fit = False
→ 104 result = func(*args, **kwargs)
105 args[0].is_fit = True
106 return result
~/.local/lib/python3.6/site-packages/automatminer/automl/adaptors.py in fit(self, df, target, **fit_kwargs)
135 self._features = df.drop(columns=target).columns.tolist()
136 self._fitted_target = target
→ 137 self._backend = self._backend.fit(X, y, **fit_kwargs)
138 return self
139
~/.local/lib/python3.6/site-packages/tpot/base.py in fit(self, features, target, sample_weight, groups)
744 # raise the exception if it’s our last attempt
745 if attempt == (attempts - 1):
→ 746 raise e
747 return self
748
~/.local/lib/python3.6/site-packages/tpot/base.py in fit(self, features, target, sample_weight, groups)
736
737 self._update_top_pipeline()
→ 738 self._summary_of_best_pipeline(features, target)
739 # Delete the temporary cache before exiting
740 self._cleanup_memory()
~/.local/lib/python3.6/site-packages/tpot/base.py in summary_of_best_pipeline(self, features, target)
860 with warnings.catch_warnings():
861 warnings.simplefilter(‘ignore’)
→ 862 self.pareto_front_fitted_pipelines[str(pipeline)].fit(features, target)
863
864 def predict(self, features):
~/.local/lib/python3.6/site-packages/sklearn/pipeline.py in fit(self, X, y, **fit_params)
348 This estimator
349 “”"
→ 350 Xt, fit_params = self._fit(X, y, **fit_params)
351 with _print_elapsed_time(‘Pipeline’,
352 self._log_message(len(self.steps) - 1)):
~/.local/lib/python3.6/site-packages/sklearn/pipeline.py in _fit(self, X, y, **fit_params)
313 message_clsname=‘Pipeline’,
314 message=self._log_message(step_idx),
→ 315 **fit_params_steps[name])
316 # Replace the transformer of the step with the fitted
317 # transformer. This is necessary when loading the transformer
~/.local/lib/python3.6/site-packages/joblib/memory.py in call(self, *args, **kwargs)
563
564 def call(self, *args, **kwargs):
→ 565 return self._cached_call(args, kwargs)[0]
566
567 def getstate(self):
~/.local/lib/python3.6/site-packages/joblib/memory.py in _cached_call(self, args, kwargs, shelving)
529
530 if must_call:
→ 531 out, metadata = self.call(*args, **kwargs)
532 if self.mmap_mode is not None:
533 # Memmap the output at the first call to be consistent with
~/.local/lib/python3.6/site-packages/joblib/memory.py in call(self, *args, **kwargs)
725 if self._verbose > 0:
726 print(format_call(self.func, args, kwargs))
→ 727 output = self.func(*args, **kwargs)
728 self.store_backend.dump_item(
729 [func_id, args_id], output, verbose=self._verbose)
~/.local/lib/python3.6/site-packages/sklearn/pipeline.py in _fit_transform_one(transformer, X, y, weight, message_clsname, message, **fit_params)
726 with _print_elapsed_time(message_clsname, message):
727 if hasattr(transformer, ‘fit_transform’):
→ 728 res = transformer.fit_transform(X, y, **fit_params)
729 else:
730 res = transformer.fit(X, y, **fit_params).transform(X)
~/.local/lib/python3.6/site-packages/sklearn/base.py in fit_transform(self, X, y, **fit_params)
572 else:
573 # fit method of arity 2 (supervised transformation)
→ 574 return self.fit(X, y, **fit_params).transform(X)
575
576
~/.local/lib/python3.6/site-packages/sklearn/preprocessing/_data.py in fit(self, X, y)
1198 # the quantiles
1199 X = check_array(X, accept_sparse=‘csc’, estimator=self,
→ 1200 dtype=FLOAT_DTYPES, force_all_finite=‘allow-nan’)
1201
1202 q_min, q_max = self.quantile_range
~/.local/lib/python3.6/site-packages/sklearn/utils/validation.py in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, warn_on_dtype, estimator)
592 " a minimum of %d is required%s."
593 % (n_features, array.shape, ensure_min_features,
→ 594 context))
595
596 if warn_on_dtype and dtype_orig is not None and array.dtype != dtype_orig:
ValueError: Found array with 0 feature(s) (shape=(25, 0)) while a minimum of 1 is required by RobustScaler.