@ardunn This is quite explicatory indeed! Really appreciate the depth of insight you provided. I took some time and ran the batch experiments and did some benchmarking. It somehow appears that single experiment takes longer then batch of say 15 parallel evaluations. This is quite bewildering to me as the difference is consistent in multiple simulation:
Time for Batch of 1 vs 15 is 3.57 and 2.43
Shortened code for testing:
import time
import random
import numpy as np
from fireworks.core.firework import FireTaskBase, Firework, FWAction, Workflow
from fireworks.core.launchpad import LaunchPad
from fireworks.core.rocket_launcher import rapidfire
from fireworks.utilities.fw_utilities import explicit_serialize
from rocketsled.control import MissionControl
from rocketsled.task import OptTask
from rocketsled.utils import split_xz
import datetime
# Setting up the FireWorks LaunchPad
launchpad = LaunchPad(name="rsled")
opt_label = "opt_default"
db_info = {"launchpad": launchpad, "opt_label": opt_label}
x_dim = [(-5.0, 5.0), (-5.0, 5.0)]
@explicit_serialize
class RosenbrockTask(FireTaskBase):
_fw_name = "RosenbrockTask"
def run_task(self, fw_spec):
x = fw_spec["_x"]
y = (1 - x[0]) ** 2 + 100 * (x[1] - x[0] ** 2) ** 2
return FWAction(update_spec={"_y": y})
def wf_creator_rosenbrock(x):
spec = {"_x": x}
# ObjectiveFuncTask writes _y field to the spec internally.
firework1 = Firework([RosenbrockTask(), OptTask(**db_info)], spec=spec)
return Workflow([firework1])
if __name__ == "__main__":
mc = MissionControl(**db_info)
launchpad.reset(password=str(datetime.datetime.now().year)+'-'+str(datetime.datetime.now().month)+'-'+str(datetime.datetime.now().day).zfill(2), require_password=True)
#########################BATCH SIZE A ###################
batch_size_a = 1
mc.reset(hard=True)
mc.configure(
wf_creator=wf_creator_rosenbrock,
dimensions=x_dim,
predictor="GaussianProcessRegressor",
batch_size=batch_size_a,
acq='ei',
)
for bs in range(batch_size_a):
launchpad.add_wf(
wf_creator_rosenbrock(
[np.random.uniform(-5, 5), np.random.uniform(-5, 5)]
)
)
batch_of_a_initial = time.time()
rapidfire(launchpad, nlaunches=30, sleep_time=0)
batch_of_a_final = time.time()
plt = mc.plot()
######################### BATCH SIZE B ###################
batch_size_b = 115
launchpad.reset(password=str(datetime.datetime.now().year)+'-'+str(datetime.datetime.now().month)+'-'+str(datetime.datetime.now().day).zfill(2), require_password=True)
mc.reset(hard=True)
mc.configure(
wf_creator=wf_creator_rosenbrock,
dimensions=x_dim,
predictor="GaussianProcessRegressor",
batch_size=batch_size_b,
acq='ei',
)
for bs in range(batch_size_b):
launchpad.add_wf(
wf_creator_rosenbrock(
[np.random.uniform(-5, 5), np.random.uniform(-5, 5)]
)
)
batch_of_b_initial = time.time()
rapidfire(launchpad, nlaunches=30, sleep_time=0)
batch_of_b_final = time.time()
plt = mc.plot()
#########################################################
print("Time for Batch of {} vs {} is {} and {}".format(batch_size_a,batch_size_b, (batch_of_a_final - batch_of_a_initial),(batch_of_b_final - batch_of_b_initial)))
# plt.show()
Thus, my final question on this is:
After adding workflows to the bulk, they run in ‘next’ iteration of the optimization and FWAction here is merely updating the unique ids for the ‘current’ optimization run ?