Template for GPU executables¶
- forces_gpu.forces_simf.run_forces(H, persis_info, sim_specs, libE_info)¶
Launches the forces MPI app and auto-assigns ranks and GPU resources.
Assigns one MPI rank to each GPU assigned to the worker.
forces_simf.py
1import numpy as np
2
3# Optional status codes to display in libE_stats.txt for each gen or sim
4from libensemble.message_numbers import TASK_FAILED, WORKER_DONE
5
6# Optional - to print GPU settings
7from libensemble.tools.test_support import check_gpu_setting
8
9
10def run_forces(H, persis_info, sim_specs, libE_info):
11 """Launches the forces MPI app and auto-assigns ranks and GPU resources.
12
13 Assigns one MPI rank to each GPU assigned to the worker.
14 """
15
16 calc_status = 0
17
18 # Parse out num particles, from generator function
19 particles = str(int(H["x"][0][0]))
20
21 # app arguments: num particles, timesteps, also using num particles as seed
22 args = particles + " " + str(10) + " " + particles
23
24 # Retrieve our MPI Executor
25 exctr = libE_info["executor"]
26
27 # Submit our forces app for execution.
28 task = exctr.submit(
29 app_name="forces",
30 app_args=args,
31 auto_assign_gpus=True,
32 match_procs_to_gpus=True,
33 )
34
35 # Block until the task finishes
36 task.wait()
37
38 # Optional - prints GPU assignment (method and numbers)
39 check_gpu_setting(task, assert_setting=False, print_setting=True)
40
41 # Try loading final energy reading, set the sim's status
42 statfile = "forces.stat"
43 try:
44 data = np.loadtxt(statfile)
45 final_energy = data[-1]
46 calc_status = WORKER_DONE
47 except Exception:
48 final_energy = np.nan
49 calc_status = TASK_FAILED
50
51 # Define our output array, populate with energy reading
52 output = np.zeros(1, dtype=sim_specs["out"])
53 output["energy"] = final_energy
54
55 # Return final information to worker, for reporting to manager
56 return output, persis_info, calc_status
Example usage
1#!/usr/bin/env python
2
3"""
4This example is based on the simple forces test. The default number of
5particles is increased considerably to give perceptible time on the GPUs when
6live-checking GPU usage.
7
8The forces.c application should be built by setting the GPU preprocessor condition
9(usually -DGPU) in addition to openMP GPU flags for the given system. See examples
10in ../forces_app/build_forces.sh. We recommend running forces.x standalone first
11and confirm it is running on the GPU (this is given clearly in the output).
12
13To mock on a non-GPU system, uncomment the resource_info line in libE_specs. You
14will compile forces without -DGPU option. It is recommended that the ub and/or lb for
15particle counts are reduced for CPU performance.
16"""
17
18import os
19import sys
20from pathlib import Path
21
22import numpy as np
23from forces_simf import run_forces # Sim func from current dir
24
25from libensemble import Ensemble
26from libensemble.alloc_funcs.start_only_persistent import only_persistent_gens as alloc_f
27from libensemble.executors import MPIExecutor
28from libensemble.gen_funcs.persistent_sampling import persistent_uniform as gen_f
29from libensemble.specs import AllocSpecs, ExitCriteria, GenSpecs, LibeSpecs, SimSpecs
30
31if __name__ == "__main__":
32 # Initialize MPI Executor
33 exctr = MPIExecutor()
34 sim_app = Path.cwd() / "../forces_app/forces.x"
35
36 if not os.path.isfile(sim_app):
37 sys.exit("forces.x not found - please build first in ../forces_app dir")
38
39 exctr.register_app(full_path=sim_app, app_name="forces")
40
41 # Parse number of workers, comms type, etc. from arguments
42 ensemble = Ensemble(parse_args=True, executor=exctr)
43 nsim_workers = ensemble.nworkers - 1 # One worker is for persistent generator
44
45 # Persistent gen does not need resources
46 ensemble.libE_specs = LibeSpecs(
47 num_resource_sets=nsim_workers,
48 sim_dirs_make=True,
49 # resource_info = {"gpus_on_node": 4} # for mocking GPUs
50 )
51
52 ensemble.sim_specs = SimSpecs(
53 sim_f=run_forces,
54 inputs=["x"],
55 outputs=[("energy", float)],
56 )
57
58 ensemble.gen_specs = GenSpecs(
59 gen_f=gen_f,
60 inputs=[], # No input when start persistent generator
61 persis_in=["sim_id"], # Return sim_ids of evaluated points to generator
62 outputs=[("x", float, (1,))],
63 user={
64 "initial_batch_size": nsim_workers,
65 "lb": np.array([50000]), # min particles
66 "ub": np.array([100000]), # max particles
67 },
68 )
69
70 # Starts one persistent generator. Simulated values are returned in batch.
71 ensemble.alloc_specs = AllocSpecs(
72 alloc_f=alloc_f,
73 user={
74 "async_return": False, # False causes batch returns
75 },
76 )
77
78 # Instruct libEnsemble to exit after this many simulations
79 ensemble.exit_criteria = ExitCriteria(sim_max=8)
80
81 # Run ensemble
82 ensemble.run()
83
84 if ensemble.is_manager:
85 # Note, this will change if changing sim_max, nworkers, lb, ub, etc.
86 if ensemble.exit_criteria.sim_max == 8:
87 chksum = np.sum(ensemble.H["energy"])
88 assert np.isclose(chksum, 96288744.35136001), f"energy check sum is {chksum}"
89 print("Checksum passed")
90 else:
91 print("Run complete. A checksum has not been provided for the given sim_max")
Also see the Forces GPU tutorial and the video demonstration on Frontier.