Template for GPU executables

forces_gpu.forces_simf.run_forces(H, persis_info, sim_specs, libE_info)

Launches the forces MPI app and auto-assigns ranks and GPU resources.

Assigns one MPI rank to each GPU assigned to the worker.

forces_simf.py
 1import numpy as np
 2
 3# Optional status codes to display in libE_stats.txt for each gen or sim
 4from libensemble.message_numbers import TASK_FAILED, WORKER_DONE
 5
 6# Optional - to print GPU settings
 7from libensemble.tools.test_support import check_gpu_setting
 8
 9
10def run_forces(H, persis_info, sim_specs, libE_info):
11    """Launches the forces MPI app and auto-assigns ranks and GPU resources.
12
13    Assigns one MPI rank to each GPU assigned to the worker.
14    """
15
16    calc_status = 0
17
18    # Parse out num particles, from generator function
19    particles = str(int(H["x"][0][0]))
20
21    # app arguments: num particles, timesteps, also using num particles as seed
22    args = particles + " " + str(10) + " " + particles
23
24    # Retrieve our MPI Executor
25    exctr = libE_info["executor"]
26
27    # Submit our forces app for execution.
28    task = exctr.submit(
29        app_name="forces",
30        app_args=args,
31        auto_assign_gpus=True,
32        match_procs_to_gpus=True,
33    )
34
35    # Block until the task finishes
36    task.wait()
37
38    # Optional - prints GPU assignment (method and numbers)
39    check_gpu_setting(task, assert_setting=False, print_setting=True)
40
41    # Try loading final energy reading, set the sim's status
42    statfile = "forces.stat"
43    try:
44        data = np.loadtxt(statfile)
45        final_energy = data[-1]
46        calc_status = WORKER_DONE
47    except Exception:
48        final_energy = np.nan
49        calc_status = TASK_FAILED
50
51    # Define our output array, populate with energy reading
52    output = np.zeros(1, dtype=sim_specs["out"])
53    output["energy"] = final_energy
54
55    # Return final information to worker, for reporting to manager
56    return output, persis_info, calc_status
Example usage
 1#!/usr/bin/env python
 2
 3"""
 4This example is based on the simple forces test. The default number of
 5particles is increased considerably to give perceptible time on the GPUs when
 6live-checking GPU usage.
 7
 8The forces.c application should be built by setting the GPU preprocessor condition
 9(usually -DGPU) in addition to openMP GPU flags for the given system. See examples
10in ../forces_app/build_forces.sh. We recommend running forces.x standalone first
11and confirm it is running on the GPU (this is given clearly in the output).
12
13To mock on a non-GPU system, uncomment the resource_info line in libE_specs. You
14will compile forces without -DGPU option. It is recommended that the ub and/or lb for
15particle counts are reduced for CPU performance.
16"""
17
18import os
19import sys
20from pathlib import Path
21
22import numpy as np
23from forces_simf import run_forces  # Sim func from current dir
24
25from libensemble import Ensemble
26from libensemble.alloc_funcs.start_only_persistent import only_persistent_gens as alloc_f
27from libensemble.executors import MPIExecutor
28from libensemble.gen_funcs.persistent_sampling import persistent_uniform as gen_f
29from libensemble.specs import AllocSpecs, ExitCriteria, GenSpecs, LibeSpecs, SimSpecs
30
31if __name__ == "__main__":
32    # Initialize MPI Executor
33    exctr = MPIExecutor()
34    sim_app = Path.cwd() / "../forces_app/forces.x"
35
36    if not os.path.isfile(sim_app):
37        sys.exit("forces.x not found - please build first in ../forces_app dir")
38
39    exctr.register_app(full_path=sim_app, app_name="forces")
40
41    # Parse number of workers, comms type, etc. from arguments
42    ensemble = Ensemble(parse_args=True, executor=exctr)
43    nsim_workers = ensemble.nworkers - 1  # One worker is for persistent generator
44
45    # Persistent gen does not need resources
46    ensemble.libE_specs = LibeSpecs(
47        num_resource_sets=nsim_workers,
48        sim_dirs_make=True,
49        # resource_info = {"gpus_on_node": 4}  # for mocking GPUs
50    )
51
52    ensemble.sim_specs = SimSpecs(
53        sim_f=run_forces,
54        inputs=["x"],
55        outputs=[("energy", float)],
56    )
57
58    ensemble.gen_specs = GenSpecs(
59        gen_f=gen_f,
60        inputs=[],  # No input when start persistent generator
61        persis_in=["sim_id"],  # Return sim_ids of evaluated points to generator
62        outputs=[("x", float, (1,))],
63        user={
64            "initial_batch_size": nsim_workers,
65            "lb": np.array([50000]),  # min particles
66            "ub": np.array([100000]),  # max particles
67        },
68    )
69
70    # Starts one persistent generator. Simulated values are returned in batch.
71    ensemble.alloc_specs = AllocSpecs(
72        alloc_f=alloc_f,
73        user={
74            "async_return": False,  # False causes batch returns
75        },
76    )
77
78    # Instruct libEnsemble to exit after this many simulations
79    ensemble.exit_criteria = ExitCriteria(sim_max=8)
80
81    # Run ensemble
82    ensemble.run()
83
84    if ensemble.is_manager:
85        # Note, this will change if changing sim_max, nworkers, lb, ub, etc.
86        if ensemble.exit_criteria.sim_max == 8:
87            chksum = np.sum(ensemble.H["energy"])
88            assert np.isclose(chksum, 96288744.35136001), f"energy check sum is {chksum}"
89            print("Checksum passed")
90        else:
91            print("Run complete. A checksum has not been provided for the given sim_max")

Also see the Forces GPU tutorial and the video demonstration on Frontier.