In the past, I’ve often made multiple connections to the Materials Project API in order to fetch data for a few hundred compounds quickly, usually just a few times in a day. I’m pretty sure this level of usage does not fall under the “heavy API” usage. My approach looks like this:
I define some worker functions to carry out a single operation requiring MP API data, like making a Pourbaix diagram.
# workers.py
# Function definitions to use in multiprocessing pools
# Pourbaix diagram imports
import pymatgen.analysis.phase_diagram as phase_diagram
import pymatgen.analysis.pourbaix_diagram as pbx
from pymatgen.core import Composition
from pymatgen.entries.computed_entries import ComputedStructureEntry
from pymatgen.ext.matproj import MPRester
mprst = MPRester()
def make_PON_pourbaix_object(
perovskite_entry: ComputedStructureEntry,
) -> pbx.PourbaixDiagram:
# Test case: use first perovskite structure as test structure to add.
# Look up all the chemical system ion entries that should correspond to this structure
ion_reference_entries = mprst.get_entries_in_chemsys(
elements=perovskite_entry.structure.symbol_set, compatible_only=True
)
ion_reference_phasediagram = phase_diagram.PhaseDiagram(
entries=ion_reference_entries
)
# Make a dummy phase diagram entry for our structure of interest
dummy_perovskite_pd_entry = phase_diagram.PDEntry(
composition=perovskite_entry.composition, energy=perovskite_entry.energy
)
# Now use this dummy entry to create a real phase diagram entry for the structure
# with the decomposition energy relative to all the other ionic compounds in
# the phase diagram.
perovskite_pd_entry = phase_diagram.PDEntry(
composition=perovskite_entry.composition,
energy=ion_reference_phasediagram.get_form_energy(dummy_perovskite_pd_entry),
)
# Now, convert this phase diagram entry to a Pourbaix diagram entry
perovskite_pbx_entry = pbx.PourbaixEntry(entry=perovskite_pd_entry)
# Now, fetch all Pourbaix diagram entries for our system, automatically
# standardizing to Materials Project compatibility settings when necessary.
pbx_entries = mprst.get_pourbaix_entries(
chemsys=perovskite_entry.structure.symbol_set,
solid_compat="MaterialsProjectCompatibility",
)
# Append in the entry for our structure
pbx_entries.append(perovskite_pbx_entry)
# Have the composition match that of the cations perovskite oxynitride, fractionally.
comp_dict = perovskite_pbx_entry.composition.as_dict()
for k in ["O", "H"]:
try:
comp_dict.pop(k)
except KeyError:
pass
cation_comp = Composition.from_dict(comp_dict)
# Make a Pourbaix diagram object, which we can then query for stable domains.
pourbaix_diagram = pbx.PourbaixDiagram(
entries=pbx_entries,
filter_solids=True,
comp_dict=cation_comp.fractional_composition.as_dict(),
)
# Need to return back the generated Pourbaix entry for the perovskite structure,
# so that we can later use it to make shaded stability plots
return {
"diagram_data": pourbaix_diagram,
"perovskite_pourbaix_entry": perovskite_pbx_entry,
}
def apply_make_PON_function(dict_item):
"""
Simple wrapper allowing us to apply the above function in a multiprocessing
pool.
Parameters
----------
dict_item : tuple
The tuple representing the key-value pair in a dictionary item.
Returns
-------
tuple
The tuple representing the key-value pair in the processed dictionary
item.
"""
return (dict_item[0], make_PON_pourbaix_object(dict_item[1]))
Then I call them in a multiprocessing pool:
# Map to entire list of loosely relaxed entries
# Update 12 May 2022: Unfortunately the MPAPI looks like it
# doesn't accept concurrent connections anymore.
from workers import apply_make_PON_function
def operation():
# Temporarily suppress the large amount of ion oxidation state warnings.
with warnings.catch_warnings(record=True):
with multiprocessing.Pool(processes=6) as p:
pourbaix_diagram_objects = dict(
tqdm.tqdm(
p.imap(
apply_make_PON_function,
loosely_relaxed_mp_compat_entries.items(),
),
total=len(loosely_relaxed_mp_compat_entries),
)
)
return pourbaix_diagram_objects
pourbaix_diagram_objects = operation()
But now I’m getting RemoteDisconnected
errors:
RemoteTraceback Traceback (most recent call last)
RemoteTraceback:
"""
Traceback (most recent call last):
File "/home/samueldy/miniconda3/envs/pt-ru-convex-hull-lanl/lib/python3.9/site-packages/urllib3/connectionpool.py", line 699, in urlopen
httplib_response = self._make_request(
File "/home/samueldy/miniconda3/envs/pt-ru-convex-hull-lanl/lib/python3.9/site-packages/urllib3/connectionpool.py", line 445, in _make_request
six.raise_from(e, None)
File "<string>", line 3, in raise_from
File "/home/samueldy/miniconda3/envs/pt-ru-convex-hull-lanl/lib/python3.9/site-packages/urllib3/connectionpool.py", line 440, in _make_request
httplib_response = conn.getresponse()
File "/home/samueldy/miniconda3/envs/pt-ru-convex-hull-lanl/lib/python3.9/http/client.py", line 1345, in getresponse
response.begin()
File "/home/samueldy/miniconda3/envs/pt-ru-convex-hull-lanl/lib/python3.9/http/client.py", line 307, in begin
version, status, reason = self._read_status()
File "/home/samueldy/miniconda3/envs/pt-ru-convex-hull-lanl/lib/python3.9/http/client.py", line 276, in _read_status
raise RemoteDisconnected("Remote end closed connection without"
http.client.RemoteDisconnected: Remote end closed connection without response
It appears to work if I use a loop, making just one API call at a time. Did Materials Project change the policy on the legacy and new APIs to prohibit concurrent connections from one client?
Thanks in advance!