Template with dynamic resources (CPU app and GPU app)¶
Launches either the CPU-only or GPU version of the forces MPI app and auto-assigns ranks and GPU resources as requested by the generator.
This makes efficient use of each node as the expensive GPU simulations will use the GPUs on the node/s, while the rest of the CPU cores are assigned to the simple CPU-only simulations.
See this publication for a real-world demonstration of these capabilities.
- forces_multi_app.forces_simf.run_forces(H, persis_info, sim_specs, libE_info)¶
Launches the either the CPU-only or GPU version of the forces MPI app and auto-assigns ranks and GPU resources as requested by the generator.
forces_simf.py
1import numpy as np
2
3# Optional status codes to display in libE_stats.txt for each gen or sim
4from libensemble.message_numbers import TASK_FAILED, WORKER_DONE
5
6# Optional - to print GPU settings
7from libensemble.tools.test_support import check_gpu_setting
8
9
10def run_forces(H, persis_info, sim_specs, libE_info):
11 """Launches the either the CPU-only or GPU version of the forces MPI app
12 and auto-assigns ranks and GPU resources as requested by the generator.
13 """
14
15 calc_status = 0
16
17 # Parse out num particles, from generator function
18 particles = str(int(H["x"][0][0]))
19
20 # app arguments: num particles, timesteps, also using num particles as seed
21 args = particles + " " + str(10) + " " + particles
22
23 # Retrieve our MPI Executor
24 exctr = libE_info["executor"]
25
26 app_type = H["app_type"][0].decode()
27
28 # Submit our forces app for execution.
29 task = exctr.submit(
30 app_name=app_type,
31 app_args=args,
32 )
33
34 # Block until the task finishes
35 task.wait()
36
37 # Optional - prints GPU assignment (method and numbers)
38 check_gpu_setting(task, assert_setting=False, print_setting=True, desc=app_type)
39
40 # Stat file to check for bad runs
41 statfile = "forces.stat"
42
43 # Try loading final energy reading, set the sim's status
44 try:
45 data = np.loadtxt(statfile)
46 final_energy = data[-1]
47 calc_status = WORKER_DONE
48 except Exception:
49 final_energy = np.nan
50 calc_status = TASK_FAILED
51
52 # Define our output array, populate with energy reading
53 outspecs = sim_specs["out"]
54 output = np.zeros(1, dtype=outspecs)
55 output["energy"] = final_energy
56
57 # Return final information to worker, for reporting to manager
58 return output, persis_info, calc_status
The generator in the example below assigns to each simulation either the CPU application or the GPU application and also randomly assigns the number of processors for each simulation. For the GPU application, one GPU is used for each MPI rank. As many nodes as necessary will be used for each application.
The special generator output fields “num_procs” and “num_gpus” are automatically picked up by each worker and these will be used when the simulation is run, unless overridden.
Example usage
1#!/usr/bin/env python
2
3"""
4This example runs different applications, one that uses only CPUs and one
5that uses GPUs. Both use a variable number of processors. The GPU application
6uses one GPU per processor. As the generator creates simulations, it randomly
7assigns between one and max_proc processors to each simulation, and also randomly
8assigns which application is to be run.
9
10The forces.c application should be compiled for the CPU to `forces_cpu.x`, and
11for the GPU (setting the GPU preprocessor condition) to `forces_gpu.x`.
12
13For compile lines, see examples in ../forces_app/build_forces.sh.
14
15It is recommended to run this test such that:
16 ((nworkers - 1) - gpus_on_node) >= gen_specs["user"][max_procs]
17
18E.g., if running on one node with four GPUs, then use:
19 python run_libE_forces.py --nworkers 9
20
21E.g., if running on one node with eight GPUs, then use:
22 python run_libE_forces.py --nworkers 17
23"""
24
25import os
26import sys
27from pathlib import Path
28
29import numpy as np
30from forces_simf import run_forces # Sim func from current dir
31
32from libensemble import Ensemble
33from libensemble.alloc_funcs.start_only_persistent import only_persistent_gens as alloc_f
34from libensemble.executors import MPIExecutor
35from libensemble.gen_funcs.persistent_sampling_var_resources import uniform_sample_diff_simulations as gen_f
36from libensemble.specs import AllocSpecs, ExitCriteria, GenSpecs, LibeSpecs, SimSpecs
37
38if __name__ == "__main__":
39 # Initialize MPI Executor instance
40 exctr = MPIExecutor()
41
42 # Register simulation executable with executor
43 cpu_app = Path.cwd() / "../forces_app/forces_cpu.x"
44 gpu_app = Path.cwd() / "../forces_app/forces_gpu.x"
45
46 if not os.path.isfile(cpu_app):
47 sys.exit(f"{cpu_app} not found - please build first in ../forces_app dir")
48 if not os.path.isfile(gpu_app):
49 sys.exit(f"{gpu_app} not found - please build first in ../forces_app dir")
50
51 exctr.register_app(full_path=cpu_app, app_name="cpu_app")
52 exctr.register_app(full_path=gpu_app, app_name="gpu_app")
53
54 # Parse number of workers, comms type, etc. from arguments
55 ensemble = Ensemble(parse_args=True, executor=exctr)
56 nsim_workers = ensemble.nworkers - 1 # One worker is for persistent generator
57
58 # Persistent gen does not need resources
59 ensemble.libE_specs = LibeSpecs(
60 num_resource_sets=nsim_workers,
61 sim_dirs_make=True,
62 stats_fmt={"show_resource_sets": True}, # see resource sets in libE_stats.txt
63 # resource_info = {"gpus_on_node": 4}, # for mocking GPUs
64 )
65
66 ensemble.sim_specs = SimSpecs(
67 sim_f=run_forces,
68 inputs=["x", "app_type"],
69 outputs=[("energy", float)],
70 )
71
72 ensemble.gen_specs = GenSpecs(
73 gen_f=gen_f,
74 inputs=[], # No input when starting persistent generator
75 persis_in=["sim_id"], # Return sim_ids of evaluated points to generator
76 outputs=[
77 ("x", float, (1,)),
78 ("num_procs", int), # num_procs auto given to sim when using MPIExecutor
79 ("num_gpus", int), # num_gpus auto given to sim when using MPIExecutor
80 ("app_type", "S10"), # select app type (cpu_app or gpu_app)
81 ],
82 user={
83 "initial_batch_size": nsim_workers,
84 "lb": np.array([5000]), # min particles
85 "ub": np.array([10000]), # max particles
86 "max_procs": (nsim_workers) // 2, # Any sim created can req. 1 worker up to max
87 },
88 )
89
90 # Starts one persistent generator. Simulated values are returned in batch.
91 ensemble.alloc_specs = AllocSpecs(
92 alloc_f=alloc_f,
93 user={
94 "async_return": False, # False causes batch returns
95 },
96 )
97
98 # Instruct libEnsemble to exit after this many simulations.
99 ensemble.exit_criteria = ExitCriteria(sim_max=nsim_workers * 2)
100
101 # Run ensemble
102 ensemble.run()
103
104 if ensemble.is_manager:
105 # Note, this will change if changing sim_max, nworkers, lb, ub, etc.
106 chksum = np.sum(ensemble.H["energy"])
107 print(f"Final energy checksum: {chksum}")
108
109 exp_chksums = {16: -21935405.696289998, 32: -26563930.6356}
110 exp_chksum = exp_chksums.get(ensemble.exit_criteria.sim_max)
111
112 if exp_chksum is not None:
113 assert np.isclose(chksum, exp_chksum), f"energy check sum is {chksum}"
114 print("Checksum passed")
115 else:
116 print("Run complete. An expected checksum has not been provided for the given sim_max")
More information is available in the Forces GPU tutorial and the video demonstration on Frontier.