Template for GPU executables with dynamic resources

forces_gpu_var_resources.forces_simf.run_forces(H, persis_info, sim_specs, libE_info)

Launches the forces MPI app and auto-assigns ranks and GPU resources (based on generator output).

forces_simf.py
 1import numpy as np
 2
 3# Optional status codes to display in libE_stats.txt for each gen or sim
 4from libensemble.message_numbers import TASK_FAILED, WORKER_DONE
 5
 6# Optional - to print GPU settings
 7from libensemble.tools.test_support import check_gpu_setting
 8
 9
10def run_forces(H, persis_info, sim_specs, libE_info):
11    """Launches the forces MPI app and auto-assigns ranks and GPU resources
12    (based on generator output).
13    """
14
15    calc_status = 0
16
17    # Parse out num particles, from generator function
18    particles = str(int(H["x"][0][0]))
19
20    # app arguments: num particles, timesteps, also using num particles as seed
21    args = particles + " " + str(10) + " " + particles
22
23    # Retrieve our MPI Executor
24    exctr = libE_info["executor"]
25
26    # Submit our forces app for execution.
27    task = exctr.submit(app_name="forces", app_args=args)
28
29    # Block until the task finishes
30    task.wait()
31
32    # Optional - prints GPU assignment (method and numbers)
33    check_gpu_setting(task, assert_setting=False, print_setting=True)
34
35    # Try loading final energy reading, set the sim's status
36    statfile = "forces.stat"
37    try:
38        data = np.loadtxt(statfile)
39        final_energy = data[-1]
40        calc_status = WORKER_DONE
41    except Exception:
42        final_energy = np.nan
43        calc_status = TASK_FAILED
44
45    # Define our output array, populate with energy reading
46    output = np.zeros(1, dtype=sim_specs["out"])
47    output["energy"] = final_energy
48
49    # Return final information to worker, for reporting to manager
50    return output, persis_info, calc_status
Example usage

Note the use of the generator function uniform_sample_with_var_gpus that sets num_gpus as a gen_specs output field corresponding to each generated simulation input.

The special generator output field “num_gpus” is automatically picked up by each worker and will be used when the simulation is run, unless overridden.

 1#!/usr/bin/env python
 2
 3"""
 4This example is similar to the forces_gpu test.
 5
 6The forces.c application should be built by setting the GPU preprocessor condition
 7(usually -DGPU) in addition to openMP GPU flags for the given system. See examples
 8in ../forces_app/build_forces.sh. We recommend running forces.x standalone first
 9and confirming it is running on the GPU (this is given clearly in the output).
10
11A number of GPUs is requested based on the number of particles (randomly chosen
12from the range for each simulation). For simplicity, the number of GPUs requested
13is based on a linear split of the range (lb to ub), rather than absolute particle
14count.
15
16To mock on a non-GPU system, uncomment the resource_info line in libE_specs. You
17will compile forces without the -DGPU option. It is recommended that the ub and/or lb for
18particle counts are reduced for CPU performance.
19"""
20
21import os
22import sys
23from pathlib import Path
24
25import numpy as np
26from forces_simf import run_forces  # Sim func from current dir
27
28from libensemble import Ensemble
29from libensemble.alloc_funcs.start_only_persistent import only_persistent_gens as alloc_f
30from libensemble.executors import MPIExecutor
31from libensemble.gen_funcs.persistent_sampling_var_resources import uniform_sample_with_var_gpus as gen_f
32from libensemble.specs import AllocSpecs, ExitCriteria, GenSpecs, LibeSpecs, SimSpecs
33
34if __name__ == "__main__":
35    # Initialize MPI Executor
36    exctr = MPIExecutor()
37    sim_app = Path.cwd() / "../forces_app/forces.x"
38
39    if not os.path.isfile(sim_app):
40        sys.exit("forces.x not found - please build first in ../forces_app dir")
41
42    exctr.register_app(full_path=sim_app, app_name="forces")
43
44    # Parse number of workers, comms type, etc. from arguments
45    ensemble = Ensemble(parse_args=True, executor=exctr)
46    nsim_workers = ensemble.nworkers - 1  # One worker is for persistent generator
47
48    # Persistent gen does not need resources
49    ensemble.libE_specs = LibeSpecs(
50        num_resource_sets=nsim_workers,
51        sim_dirs_make=True,
52        stats_fmt={"show_resource_sets": True},  # see resource sets in libE_stats.txt
53        # resource_info = {"gpus_on_node": 4},  # for mocking GPUs
54    )
55
56    ensemble.sim_specs = SimSpecs(
57        sim_f=run_forces,
58        inputs=["x"],
59        outputs=[("energy", float)],
60    )
61
62    ensemble.gen_specs = GenSpecs(
63        gen_f=gen_f,
64        inputs=[],  # No input when start persistent generator
65        persis_in=["sim_id"],  # Return sim_ids of evaluated points to generator
66        outputs=[
67            ("x", float, (1,)),
68            ("num_gpus", int),  # num_gpus auto given to sim when use MPIExecutor.
69        ],
70        user={
71            "initial_batch_size": nsim_workers,
72            "lb": np.array([50000]),  # min particles
73            "ub": np.array([100000]),  # max particles
74            "max_gpus": nsim_workers,
75        },
76    )
77
78    # Starts one persistent generator. Simulated values are returned in batch.
79    ensemble.alloc_specs = AllocSpecs(
80        alloc_f=alloc_f,
81        user={
82            "async_return": False,  # False causes batch returns
83        },
84    )
85
86    # Instruct libEnsemble to exit after this many simulations.
87    ensemble.exit_criteria = ExitCriteria(sim_max=8)
88
89    # Run ensemble
90    ensemble.run()
91
92    if ensemble.is_manager:
93        # Note, this will change if changing sim_max, nworkers, lb, ub, etc.
94        if ensemble.exit_criteria.sim_max == 8:
95            chksum = np.sum(ensemble.H["energy"])
96            assert np.isclose(chksum, 96288744.35136001), f"energy check sum is {chksum}"
97            print("Checksum passed")
98        else:
99            print("Run complete; a checksum has not been provided for the given sim_max")

Also see the Forces GPU tutorial and the video demonstration on Frontier.