This isn’t really about pymatgen and I’m not sure if this question belongs here since neither matgl nor maml are listed as possible topics but I’m trying here first because I know the Ong group watches this forum so perhaps you can direct me to the correct place.
Goal
Use BOWSR + M3GNet (using PyTorch) to quickly optimize structures within their current symmetry. The example in the maml repo uses MEGNet but this uses tensorflow which I’d like to avoid.
Code
import matgl
from matgl.ext.ase import Relaxer
from maml.apps.bowsr.optimizer import BayesianOptimizer
from pymatgen.core.structure import Structure, Lattice
### Choose surrogate model
model = matgl.load_model('M3GNet-MP-2021.2.8-PES')
### Create test structure
struc = Structure.from_spacegroup("Pm-3m", Lattice.cubic(4.5), ["Cs", "Cl"], [[0, 0, 0], [0.5, 0.5, 0.5]])
### Create optimizer
compressed_optimizer = BayesianOptimizer(
model=model,
structure=struc,
relax_coords=True,
relax_lattice=True,
use_symmetry=True,
seed=99
)
compressed_optimizer.set_bounds()
compressed_optimizer.optimize(n_init=100, n_iter=100, alpha=0.026 ** 2)
### Relax Structure
start_time = time.time()
relaxer = Relaxer(potential=model)
relax_results = relaxer.relax(struc, fmax=0.01)
# extract results
final_structure = relax_results["final_structure"]
final_sg = final_structure.get_space_group_info()
final_energy = relax_results["trajectory"].energies[-1]
# print out the final relaxed structure and energy
print(final_sg)
print(final_structure)
print(f"The final energy is {float(final_energy):.3f} eV.")
print(f"Relaxation took {time.time() - start_time:.3f} seconds.")
Result
This error looks like it could indicate incompatibility. Before I dig into the code and try to debug, I figured I would ask if the matgl surrogate models are compatible with bowsr.
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In[15], line 22
13 compressed_optimizer = BayesianOptimizer(
14 model=model,
15 structure=struc,
(...)
19 seed=99
20 )
21 compressed_optimizer.set_bounds()
---> 22 compressed_optimizer.optimize(n_init=100, n_iter=100, alpha=0.026 ** 2)
24 ### Relax Structure
25 start_time = time.time()
File ~/mambaforge/envs/pmg/lib/python3.10/site-packages/maml/apps/bowsr/optimizer.py:329, in BayesianOptimizer.optimize(self, n_init, n_iter, acq_type, kappa, xi, n_warmup, is_continue, sampler, **gpr_params)
327 iteration = 0
328 while iteration < n_iter:
--> 329 x_next = self.propose(acquisition_function=acq, n_warmup=n_warmup, sampler=sampler)
330 self.add_query(x_next)
331 iteration += 1
File ~/mambaforge/envs/pmg/lib/python3.10/site-packages/maml/apps/bowsr/optimizer.py:254, in BayesianOptimizer.propose(self, acquisition_function, n_warmup, sampler)
252 with warnings.catch_warnings():
253 warnings.simplefilter("ignore")
--> 254 self.gpr.fit(self.space.params, self.space.target)
256 if self.noisy:
257 y_max = np.max(self.gpr.predict(self.space.params))
File ~/mambaforge/envs/pmg/lib/python3.10/site-packages/sklearn/gaussian_process/_gpr.py:237, in GaussianProcessRegressor.fit(self, X, y)
235 else:
236 dtype, ensure_2d = None, False
--> 237 X, y = self._validate_data(
238 X,
239 y,
240 multi_output=True,
241 y_numeric=True,
242 ensure_2d=ensure_2d,
243 dtype=dtype,
244 )
246 # Normalize target value
247 if self.normalize_y:
File ~/mambaforge/envs/pmg/lib/python3.10/site-packages/sklearn/base.py:584, in BaseEstimator._validate_data(self, X, y, reset, validate_separately, **check_params)
582 y = check_array(y, input_name="y", **check_y_params)
583 else:
--> 584 X, y = check_X_y(X, y, **check_params)
585 out = X, y
587 if not no_val_X and check_params.get("ensure_2d", True):
File ~/mambaforge/envs/pmg/lib/python3.10/site-packages/sklearn/utils/validation.py:1106, in check_X_y(X, y, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, multi_output, ensure_min_samples, ensure_min_features, y_numeric, estimator)
1101 estimator_name = _check_estimator_name(estimator)
1102 raise ValueError(
1103 f"{estimator_name} requires y to be passed, but the target y is None"
1104 )
-> 1106 X = check_array(
1107 X,
1108 accept_sparse=accept_sparse,
1109 accept_large_sparse=accept_large_sparse,
1110 dtype=dtype,
1111 order=order,
1112 copy=copy,
1113 force_all_finite=force_all_finite,
1114 ensure_2d=ensure_2d,
1115 allow_nd=allow_nd,
1116 ensure_min_samples=ensure_min_samples,
1117 ensure_min_features=ensure_min_features,
1118 estimator=estimator,
1119 input_name="X",
1120 )
1122 y = _check_y(y, multi_output=multi_output, y_numeric=y_numeric, estimator=estimator)
1124 check_consistent_length(X, y)
File ~/mambaforge/envs/pmg/lib/python3.10/site-packages/sklearn/utils/validation.py:931, in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, estimator, input_name)
929 n_samples = _num_samples(array)
930 if n_samples < ensure_min_samples:
--> 931 raise ValueError(
932 "Found array with %d sample(s) (shape=%s) while a"
933 " minimum of %d is required%s."
934 % (n_samples, array.shape, ensure_min_samples, context)
935 )
937 if ensure_min_features > 0 and array.ndim == 2:
938 n_features = array.shape[1]
ValueError: Found array with 0 sample(s) (shape=(0, 1)) while a minimum of 1 is required by GaussianProcessRegressor.