Template for GPU executables with dynamic resources¶
- forces_gpu_var_resources.forces_simf.run_forces(H, persis_info, sim_specs, libE_info)¶
Launches the forces MPI app and auto-assigns ranks and GPU resources (based on generator output).
forces_simf.py
1import numpy as np
2
3# Optional status codes to display in libE_stats.txt for each gen or sim
4from libensemble.message_numbers import TASK_FAILED, WORKER_DONE
5
6# Optional - to print GPU settings
7from libensemble.tools.test_support import check_gpu_setting
8
9
10def run_forces(H, persis_info, sim_specs, libE_info):
11 """Launches the forces MPI app and auto-assigns ranks and GPU resources
12 (based on generator output).
13 """
14
15 calc_status = 0
16
17 # Parse out num particles, from generator function
18 particles = str(int(H["x"][0][0]))
19
20 # app arguments: num particles, timesteps, also using num particles as seed
21 args = particles + " " + str(10) + " " + particles
22
23 # Retrieve our MPI Executor
24 exctr = libE_info["executor"]
25
26 # Submit our forces app for execution.
27 task = exctr.submit(app_name="forces", app_args=args)
28
29 # Block until the task finishes
30 task.wait()
31
32 # Optional - prints GPU assignment (method and numbers)
33 check_gpu_setting(task, assert_setting=False, print_setting=True)
34
35 # Try loading final energy reading, set the sim's status
36 statfile = "forces.stat"
37 try:
38 data = np.loadtxt(statfile)
39 final_energy = data[-1]
40 calc_status = WORKER_DONE
41 except Exception:
42 final_energy = np.nan
43 calc_status = TASK_FAILED
44
45 # Define our output array, populate with energy reading
46 output = np.zeros(1, dtype=sim_specs["out"])
47 output["energy"] = final_energy
48
49 # Return final information to worker, for reporting to manager
50 return output, persis_info, calc_status
Example usage
Note the use of the generator function uniform_sample_with_var_gpus that sets num_gpus as a gen_specs output field corresponding to each generated simulation input.
The special generator output field “num_gpus” is automatically picked up by each worker and will be used when the simulation is run, unless overridden.
1#!/usr/bin/env python
2
3"""
4This example is similar to the forces_gpu test.
5
6The forces.c application should be built by setting the GPU preprocessor condition
7(usually -DGPU) in addition to openMP GPU flags for the given system. See examples
8in ../forces_app/build_forces.sh. We recommend running forces.x standalone first
9and confirming it is running on the GPU (this is given clearly in the output).
10
11A number of GPUs is requested based on the number of particles (randomly chosen
12from the range for each simulation). For simplicity, the number of GPUs requested
13is based on a linear split of the range (lb to ub), rather than absolute particle
14count.
15
16To mock on a non-GPU system, uncomment the resource_info line in libE_specs. You
17will compile forces without the -DGPU option. It is recommended that the ub and/or lb for
18particle counts are reduced for CPU performance.
19"""
20
21import os
22import sys
23from pathlib import Path
24
25import numpy as np
26from forces_simf import run_forces # Sim func from current dir
27
28from libensemble import Ensemble
29from libensemble.alloc_funcs.start_only_persistent import only_persistent_gens as alloc_f
30from libensemble.executors import MPIExecutor
31from libensemble.gen_funcs.persistent_sampling_var_resources import uniform_sample_with_var_gpus as gen_f
32from libensemble.specs import AllocSpecs, ExitCriteria, GenSpecs, LibeSpecs, SimSpecs
33
34if __name__ == "__main__":
35 # Initialize MPI Executor
36 exctr = MPIExecutor()
37 sim_app = Path.cwd() / "../forces_app/forces.x"
38
39 if not os.path.isfile(sim_app):
40 sys.exit("forces.x not found - please build first in ../forces_app dir")
41
42 exctr.register_app(full_path=sim_app, app_name="forces")
43
44 # Parse number of workers, comms type, etc. from arguments
45 ensemble = Ensemble(parse_args=True, executor=exctr)
46 nsim_workers = ensemble.nworkers - 1 # One worker is for persistent generator
47
48 # Persistent gen does not need resources
49 ensemble.libE_specs = LibeSpecs(
50 num_resource_sets=nsim_workers,
51 sim_dirs_make=True,
52 stats_fmt={"show_resource_sets": True}, # see resource sets in libE_stats.txt
53 # resource_info = {"gpus_on_node": 4}, # for mocking GPUs
54 )
55
56 ensemble.sim_specs = SimSpecs(
57 sim_f=run_forces,
58 inputs=["x"],
59 outputs=[("energy", float)],
60 )
61
62 ensemble.gen_specs = GenSpecs(
63 gen_f=gen_f,
64 inputs=[], # No input when start persistent generator
65 persis_in=["sim_id"], # Return sim_ids of evaluated points to generator
66 outputs=[
67 ("x", float, (1,)),
68 ("num_gpus", int), # num_gpus auto given to sim when use MPIExecutor.
69 ],
70 user={
71 "initial_batch_size": nsim_workers,
72 "lb": np.array([50000]), # min particles
73 "ub": np.array([100000]), # max particles
74 "max_gpus": nsim_workers,
75 },
76 )
77
78 # Starts one persistent generator. Simulated values are returned in batch.
79 ensemble.alloc_specs = AllocSpecs(
80 alloc_f=alloc_f,
81 user={
82 "async_return": False, # False causes batch returns
83 },
84 )
85
86 # Instruct libEnsemble to exit after this many simulations.
87 ensemble.exit_criteria = ExitCriteria(sim_max=8)
88
89 # Run ensemble
90 ensemble.run()
91
92 if ensemble.is_manager:
93 # Note, this will change if changing sim_max, nworkers, lb, ub, etc.
94 if ensemble.exit_criteria.sim_max == 8:
95 chksum = np.sum(ensemble.H["energy"])
96 assert np.isclose(chksum, 96288744.35136001), f"energy check sum is {chksum}"
97 print("Checksum passed")
98 else:
99 print("Run complete; a checksum has not been provided for the given sim_max")
Also see the Forces GPU tutorial and the video demonstration on Frontier.