Using M3GNet with BOWSR (matgl + maml)

This isn’t really about pymatgen and I’m not sure if this question belongs here since neither matgl nor maml are listed as possible topics but I’m trying here first because I know the Ong group watches this forum so perhaps you can direct me to the correct place.

Goal

Use BOWSR + M3GNet (using PyTorch) to quickly optimize structures within their current symmetry. The example in the maml repo uses MEGNet but this uses tensorflow which I’d like to avoid.

Code

import matgl
from matgl.ext.ase import Relaxer
from maml.apps.bowsr.optimizer import BayesianOptimizer
from pymatgen.core.structure import Structure, Lattice

### Choose surrogate model
model = matgl.load_model('M3GNet-MP-2021.2.8-PES')

### Create test structure
struc = Structure.from_spacegroup("Pm-3m", Lattice.cubic(4.5), ["Cs", "Cl"], [[0, 0, 0], [0.5, 0.5, 0.5]])

### Create optimizer
compressed_optimizer = BayesianOptimizer(
    model=model, 
    structure=struc, 
    relax_coords=True, 
    relax_lattice=True,
    use_symmetry=True, 
    seed=99
)
compressed_optimizer.set_bounds()
compressed_optimizer.optimize(n_init=100, n_iter=100, alpha=0.026 ** 2)

### Relax Structure
start_time = time.time()
relaxer = Relaxer(potential=model)
relax_results = relaxer.relax(struc, fmax=0.01)
# extract results
final_structure = relax_results["final_structure"]
final_sg = final_structure.get_space_group_info()
final_energy = relax_results["trajectory"].energies[-1]
# print out the final relaxed structure and energy
print(final_sg)
print(final_structure)
print(f"The final energy is {float(final_energy):.3f} eV.")
print(f"Relaxation took {time.time() - start_time:.3f} seconds.")

Result

This error looks like it could indicate incompatibility. Before I dig into the code and try to debug, I figured I would ask if the matgl surrogate models are compatible with bowsr.

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[15], line 22
     13 compressed_optimizer = BayesianOptimizer(
     14     model=model, 
     15     structure=struc, 
   (...)
     19     seed=99
     20 )
     21 compressed_optimizer.set_bounds()
---> 22 compressed_optimizer.optimize(n_init=100, n_iter=100, alpha=0.026 ** 2)
     24 ### Relax Structure
     25 start_time = time.time()

File ~/mambaforge/envs/pmg/lib/python3.10/site-packages/maml/apps/bowsr/optimizer.py:329, in BayesianOptimizer.optimize(self, n_init, n_iter, acq_type, kappa, xi, n_warmup, is_continue, sampler, **gpr_params)
    327 iteration = 0
    328 while iteration < n_iter:
--> 329     x_next = self.propose(acquisition_function=acq, n_warmup=n_warmup, sampler=sampler)
    330     self.add_query(x_next)
    331     iteration += 1

File ~/mambaforge/envs/pmg/lib/python3.10/site-packages/maml/apps/bowsr/optimizer.py:254, in BayesianOptimizer.propose(self, acquisition_function, n_warmup, sampler)
    252 with warnings.catch_warnings():
    253     warnings.simplefilter("ignore")
--> 254     self.gpr.fit(self.space.params, self.space.target)
    256 if self.noisy:
    257     y_max = np.max(self.gpr.predict(self.space.params))

File ~/mambaforge/envs/pmg/lib/python3.10/site-packages/sklearn/gaussian_process/_gpr.py:237, in GaussianProcessRegressor.fit(self, X, y)
    235 else:
    236     dtype, ensure_2d = None, False
--> 237 X, y = self._validate_data(
    238     X,
    239     y,
    240     multi_output=True,
    241     y_numeric=True,
    242     ensure_2d=ensure_2d,
    243     dtype=dtype,
    244 )
    246 # Normalize target value
    247 if self.normalize_y:

File ~/mambaforge/envs/pmg/lib/python3.10/site-packages/sklearn/base.py:584, in BaseEstimator._validate_data(self, X, y, reset, validate_separately, **check_params)
    582         y = check_array(y, input_name="y", **check_y_params)
    583     else:
--> 584         X, y = check_X_y(X, y, **check_params)
    585     out = X, y
    587 if not no_val_X and check_params.get("ensure_2d", True):

File ~/mambaforge/envs/pmg/lib/python3.10/site-packages/sklearn/utils/validation.py:1106, in check_X_y(X, y, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, multi_output, ensure_min_samples, ensure_min_features, y_numeric, estimator)
   1101         estimator_name = _check_estimator_name(estimator)
   1102     raise ValueError(
   1103         f"{estimator_name} requires y to be passed, but the target y is None"
   1104     )
-> 1106 X = check_array(
   1107     X,
   1108     accept_sparse=accept_sparse,
   1109     accept_large_sparse=accept_large_sparse,
   1110     dtype=dtype,
   1111     order=order,
   1112     copy=copy,
   1113     force_all_finite=force_all_finite,
   1114     ensure_2d=ensure_2d,
   1115     allow_nd=allow_nd,
   1116     ensure_min_samples=ensure_min_samples,
   1117     ensure_min_features=ensure_min_features,
   1118     estimator=estimator,
   1119     input_name="X",
   1120 )
   1122 y = _check_y(y, multi_output=multi_output, y_numeric=y_numeric, estimator=estimator)
   1124 check_consistent_length(X, y)

File ~/mambaforge/envs/pmg/lib/python3.10/site-packages/sklearn/utils/validation.py:931, in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, estimator, input_name)
    929     n_samples = _num_samples(array)
    930     if n_samples < ensure_min_samples:
--> 931         raise ValueError(
    932             "Found array with %d sample(s) (shape=%s) while a"
    933             " minimum of %d is required%s."
    934             % (n_samples, array.shape, ensure_min_samples, context)
    935         )
    937 if ensure_min_features > 0 and array.ndim == 2:
    938     n_features = array.shape[1]

ValueError: Found array with 0 sample(s) (shape=(0, 1)) while a minimum of 1 is required by GaussianProcessRegressor.