Adding an Experiment

This guide is intended for those wanting to define a new set of experiment parameters for a given benchmark.

Similar to systems, Benchpark also provides an API where you can represent experiments as objects and customize their description with command line arguments.

Experiment specifications are created with experiment.py files each located in the experiment repo: benchpark/experiments/${Benchmark1}.

  • If you are adding experiments to an existing benchmark, it is best to extend the current experiment.py for that benchmark in the experiment repo.

  • If you are adding experiments to a benchmark you created, create a new folder for your benchmark in the experiment repo, and put your new experiment.py inside of it.

These experiment.py files inherit from the Experiment base class in /lib/benchpark/experiment.py shown below, and when used in conjunction with the system configuration files and package/application repositories, are used to generate a set of concrete Ramble experiments for the target system and programming model.

# Copyright 2023 Lawrence Livermore National Security, LLC and other
# Benchpark Project Developers. See the top-level COPYRIGHT file for details.
#
# SPDX-License-Identifier: Apache-2.0

from typing import Dict
import yaml  # TODO: some way to ensure yaml available
import sys
from enum import Enum

from benchpark.error import BenchparkError
from benchpark.directives import ExperimentSystemBase
from benchpark.directives import variant
from benchpark.variables import VariableDict
import benchpark.spec
import benchpark.variant

import ramble.language.language_base  # noqa
import ramble.language.language_helpers  # noqa


class ExperimentHelper:
    def __init__(self, exp):
        self.spec = exp.spec
        self._expr_vars = VariableDict()
        self.env_vars = {
            "set": {},
            "append": [{"paths": {}, "vars": {}}],
        }

    def compute_include_section(self):
        return []

    def compute_config_section(self):
        return {}

    def compute_modifiers_section(self):
        return []

    def compute_applications_section(self):
        return {}

    def compute_package_section(self):
        return {}

    def get_helper_name_prefix(self):
        return None

    def get_spack_variants(self):
        return None

    def compute_variables_section(self):
        return {}

    def set_environment_variable(self, name, value):
        """Set value of environment variable"""
        self.env_vars["set"][name] = value

    def append_environment_variable(self, name, value, target="paths"):
        """Append to existing environment variable PATH ('paths') or other variable ('vars')
        Matches expected ramble format. Example:
        https://ramble.readthedocs.io/en/latest/workspace_config.html#environment-variable-control
        """
        self.env_vars["append"][0][target][name] = value

    def compute_config_variables(self):
        pass

    def compute_config_variables_wrapper(self):
        self.compute_config_variables()
        return self._expr_vars, self.env_vars


class SingleNode:
    variant(
        "single_node",
        default=True,
        description="Single node execution mode",
    )

    class Helper(ExperimentHelper):
        def get_helper_name_prefix(self):
            return "single_node" if self.spec.satisfies("+single_node") else ""


class Affinity:
    variant(
        "affinity",
        default="none",
        values=(
            "none",
            "on",
        ),
        multi=False,
        description="Build and run the affinity package",
    )

    class Helper(ExperimentHelper):
        def compute_modifiers_section(self):
            modifier_list = []
            if not self.spec.satisfies("affinity=none"):
                affinity_modifier_modes = {}
                affinity_modifier_modes["name"] = "affinity"
                if self.spec.satisfies("+cuda"):
                    affinity_modifier_modes["mode"] = "cuda"
                elif self.spec.satisfies("+rocm"):
                    affinity_modifier_modes["mode"] = "rocm"
                else:
                    affinity_modifier_modes["mode"] = "mpi"
                modifier_list.append(affinity_modifier_modes)
            return modifier_list

        def compute_package_section(self):
            # set package versions
            affinity_version = "master"

            # get system config options
            # TODO: Get compiler/mpi/package handles directly from system.py
            system_specs = {}
            system_specs["compiler"] = "default-compiler"
            if self.spec.satisfies("+cuda"):
                system_specs["cuda_arch"] = "{cuda_arch}"
            if self.spec.satisfies("+rocm"):
                system_specs["rocm_arch"] = "{rocm_arch}"

            # set package spack specs
            package_specs = {}

            if not self.spec.satisfies("affinity=none"):
                package_specs["affinity"] = {
                    "pkg_spec": f"affinity@{affinity_version}+mpi",
                    "compiler": system_specs["compiler"],
                }
                if self.spec.satisfies("+cuda"):
                    package_specs["affinity"]["pkg_spec"] += "+cuda"
                elif self.spec.satisfies("+rocm"):
                    package_specs["affinity"]["pkg_spec"] += "+rocm"

            return {
                "packages": {k: v for k, v in package_specs.items() if v},
                "environments": {"affinity": {"packages": list(package_specs.keys())}},
            }


class HwlocVariantValues(str, Enum):
    NONE = "none"
    ON = "on"


class Hwloc:
    variant(
        "hwloc",
        default=HwlocVariantValues.NONE.value,
        values=tuple(v.value for v in HwlocVariantValues),
        multi=False,
        description="Get underlying infrastructure topology",
    )

    class Helper(ExperimentHelper):
        def compute_modifiers_section(self):
            modifier_list = []

            if not self.spec.satisfies(f"hwloc={HwlocVariantValues.NONE.value}"):
                affinity_modifier_modes = {}
                affinity_modifier_modes["name"] = "hwloc"
                affinity_modifier_modes["mode"] = self.spec.variants["hwloc"][0]
                modifier_list.append(affinity_modifier_modes)

            return modifier_list


class Experiment(ExperimentSystemBase, SingleNode, Affinity, Hwloc):
    """This is the superclass for all benchpark experiments.

    ***The Experiment class***

    Experiments are written in pure Python.

    There are two main parts of a Benchpark experiment:

      1. **The experiment class**.  Classes contain ``directives``, which are
         special functions, that add metadata (variants) to packages (see
         ``directives.py``).

      2. **Experiment instances**. Once instantiated, an experiment is
         essentially a collection of files defining an experiment in a
         Ramble workspace.
    """

    #
    # These are default values for instance variables.
    #

    # This allows analysis tools to correctly interpret the class attributes.
    variants: Dict[
        "benchpark.spec.Spec",
        Dict[str, benchpark.variant.Variant],
    ]

    variant(
        "package_manager",
        default="spack",
        values=("spack", "environment-modules", "None"),
        description="package manager to use",
    )

    variant(
        "append_path",
        default=" ",
        description="Append to environment PATH during experiment execution",
    )

    def __init__(self, spec):
        self.spec: "benchpark.spec.ConcreteExperimentSpec" = spec
        # Device type must be set before super with absence of mpionly experiment type
        self.device_type = "cpu"
        super().__init__()
        self.helpers = []
        self._spack_name = None
        self._ramble_name = None
        self._expr_vars = VariableDict()
        self.req_vars = [
            "n_resources",
            "process_problem_size",
            "total_problem_size",
            "device_type",
        ]

        for cls in self.__class__.mro()[1:]:
            if cls is not Experiment and cls is not object:
                if hasattr(cls, "Helper"):
                    helper_instance = cls.Helper(self)
                    self.helpers.append(helper_instance)

        self.name = self.spec.name

        if "workload" in self.spec.variants:
            self.workload = self.spec.variants["workload"]
        else:
            raise BenchparkError(f"No workload variant defined for package {self.name}")

        self.package_specs = {}

    @property
    def spack_name(self):
        """The name of the spack package that is used to build this benchmark"""
        return self._spack_name

    @spack_name.setter
    def spack_name(self, value: str):
        self._spack_name = value

    @property
    def ramble_name(self):
        """The name of the ramble application associated with this benchmark"""
        return self._ramble_name

    @ramble_name.setter
    def ramble_name(self, value: str):
        self._ramble_name = value

    @property
    def expr_vars(self):
        """Dictionary of experiment variables"""
        return self._expr_vars

    def set_required_variables(self, **kwargs):
        """Helper function to set required variables."""
        self.add_experiment_variable("device_type", self.device_type, False)
        for var in kwargs.keys():
            if var not in self.req_vars:
                raise ValueError(f"Unexpected experiment variable provided '{var}'")
            self.add_experiment_variable(var, kwargs[var], False)

    def check_required_variables(self):
        """Raises error if any of the self.req_vars variables are not set in derived classes."""
        unset_vars = [v for v in self.req_vars if v not in self.variables.keys()]
        if len(unset_vars) > 0:
            raise NotImplementedError(
                f"The following experiment variables must be set with 'self.add_experiment_variable': {', '.join([v for v in unset_vars])}."
            )

    def compute_include_section(self):
        # include the config directory
        return ["./configs"]

    def compute_config_section(self):
        # default configs for all experiments
        default_config = {
            "deprecated": True,
            "benchpark_experiment_command": "benchpark " + " ".join(sys.argv[1:]),
        }
        if self.spec.variants["package_manager"][0] == "spack":
            default_config["spack_flags"] = {
                "install": "--add --keep-stage",
                "concretize": "-U -f",
            }
        return default_config

    def compute_modifiers_section(self):
        return []

    def compute_modifiers_section_wrapper(self):
        # by default we use the allocation modifier and no others
        modifier_list = [{"name": "allocation"}, {"name": "exit-code"}]
        modifier_list += self.compute_modifiers_section()
        for cls in self.helpers:
            modifier_list += cls.compute_modifiers_section()
        return modifier_list

    def add_experiment_variable(self, name, values, named=False, matrixed=False):
        if isinstance(values, dict):
            self.expr_vars.add_dimensional_variable(name, values, named, True, matrixed)
            self.zips[name] = list(values.keys())
            if matrixed:
                self.matrix.append(name)
        else:
            self.expr_vars.add_scalar_variable(name, values, named, False, matrixed)
            if matrixed:
                self.matrix.append(name)

    def set_environment_variable(self, name, values):
        """Set value of environment variable"""
        self.env_vars["set"][name] = values

    def append_environment_variable(self, name, values, target="paths"):
        """Append to existing environment variable PATH ('paths') or other variable ('vars')
        Matches expected ramble format. Example:
        https://ramble.readthedocs.io/en/latest/workspace_config.html#environment-variable-control
        """
        if target not in ["paths", "vars"]:
            raise ValueError("Invalid target specified. Must be 'paths' or 'vars'.")

        self.env_vars["append"][0][target][name] = values

    def add_experiment_exclude(self, exclude_clause):
        self.excludes.append(exclude_clause)

    def compute_applications_section(self):
        raise NotImplementedError(
            "Each experiment must implement compute_applications_section"
        )

    def compute_applications_section_wrapper(self):
        self.expr_var_names = []
        self.env_vars = {
            "set": {},
            "append": [{"paths": {}, "vars": {}}],
        }
        self.variables = {}
        self.zips = {}
        self.matrix = []
        self.excludes = []

        for cls in self.helpers:
            variables, env_vars = cls.compute_config_variables_wrapper()
            self.expr_vars.extend(variables)
            self.env_vars["set"] |= env_vars["set"]
            self.env_vars["append"][0] |= env_vars["append"][0]

        self.compute_applications_section()

        if "scaling" in self.spec.variants and not self.spec.satisfies("scaling=off"):
            self.expr_vars.extend(self.scale())

        for var in self.expr_vars.values():
            for dim in var.dims():
                if var.is_named:
                    self.expr_var_names.append(f"{{{dim}}}")
                if len(var[dim]) == 1 and not var.is_zipped and not var.is_matrixed:
                    self.variables[dim] = var[dim][0]
                else:
                    self.variables[dim] = var[dim]

        expr_helper_list = []
        for cls in self.helpers:
            helper_prefix = cls.get_helper_name_prefix()
            if helper_prefix:
                expr_helper_list.append(helper_prefix)
        expr_name_suffix = "_".join(expr_helper_list + self.expr_var_names)

        self.check_required_variables()

        expr_setup = {
            "variants": {"package_manager": self.spec.variants["package_manager"][0]},
            "env_vars": self.env_vars,
            "variables": self.variables,
            "zips": self.zips,
            "matrix": self.matrix,
            "exclude": ({"where": self.excludes} if self.excludes else {}),
        }

        workloads = {}
        for workload in self.workload:
            expr_name = f"{self.name}_{workload}_{expr_name_suffix}"
            workloads[workload] = {
                "experiments": {
                    expr_name: expr_setup,
                }
            }

        return {
            self.name: {
                "workloads": workloads,
            }
        }

    def add_package_spec(self, package_name, spec=None):
        if spec:
            self.package_specs[package_name] = {
                "pkg_spec": spec[0],
            }
        else:
            self.package_specs[package_name] = {}

    def compute_package_section(self):
        raise NotImplementedError(
            "Each experiment must implement compute_package_section"
        )

    def compute_package_section_wrapper(self):
        pkg_manager = self.spec.variants["package_manager"][0]

        for cls in self.helpers:
            cls_package_specs = cls.compute_package_section()
            if cls_package_specs and "packages" in cls_package_specs:
                self.package_specs |= cls_package_specs["packages"]

        self.compute_package_section()

        if self.name not in self.package_specs:
            raise BenchparkError(
                f"Package section must be defined for application package {self.name}"
            )

        if pkg_manager == "spack":
            spack_variants = list(
                filter(
                    lambda v: v is not None,
                    (cls.get_spack_variants() for cls in self.helpers),
                )
            )
            self.package_specs[self.name]["pkg_spec"] += " ".join(
                spack_variants
            ).strip()

        if "append_path" in self.spec.variants:
            self.append_environment_variable(
                "PATH", self.spec.variants["append_path"][0]
            )

        return {
            "packages": {k: v for k, v in self.package_specs.items() if v},
            "environments": {self.name: {"packages": list(self.package_specs.keys())}},
        }

    def compute_variables_section(self):
        return {}

    def compute_variables_section_wrapper(self):
        # For each helper class compute any additional variables
        additional_vars = {}
        for cls in self.helpers:
            additional_vars.update(cls.compute_variables_section())
        return additional_vars

    def compute_ramble_dict(self):
        # This can be overridden by any subclass that needs more flexibility
        ramble_dict = {
            "ramble": {
                "include": self.compute_include_section(),
                "config": self.compute_config_section(),
                "modifiers": self.compute_modifiers_section_wrapper(),
                "applications": self.compute_applications_section_wrapper(),
                "software": self.compute_package_section_wrapper(),
            }
        }
        # Add any variables from helper classes if necessary
        additional_vars = self.compute_variables_section_wrapper()
        if additional_vars:
            ramble_dict["ramble"].update({"variables": additional_vars})

        return ramble_dict

    def write_ramble_dict(self, filepath):
        ramble_dict = self.compute_ramble_dict()
        with open(filepath, "w") as f:
            yaml.dump(ramble_dict, f)

Some or all of the functions in the Experiment base class can be overridden to define custom behavior, such as adding experiment variants.

compute_applications_section

In compute_applications_section, we define the experiment variables necessary to perform scaling runs (single_node, strong, weak, or throughput) using ramble. We also define programming model (CUDA, ROCm, or OpenMP) specific variables, such as arch, which may be used by the benchmark.

We can specify experiment variables to benchpark using the Experiment.add_experiment_variable() member function. One of n_ranks, n_nodes, n_gpus must be set, using add_experiment_variable for benchpark to allocate the correct amount of resources for the experiment. Additionally, all of n_resources, process_problem_size, and total_problem_size must be set, which can be accomplished using Experiment.set_required_variables().

add_experiment_variable

The method add_experiment_variable is used to add a variable to the experiment’s ramble.yaml. It has the following signature:

def add_experiment_variable(self, name, value, named, matrixed)

where,

  • name is the name of the variable

  • value is the value of the variable

  • named indicates if the variable’s name should appear in the experiment name (default False)

  • matrixed indicates if the variable must be matrixed in ramble.yaml (default False)

add_experiment_variable can be used to define multi-dimensional and scalar variables. e.g.:

self.add_experiment_variable("n_resources_dict", {"px": 2, "py": 2, "pz": 1}, named=True, matrix=True)
self.add_experiment_variable("groups", 16, named=True, matrix=True)
self.add_experiment_variable("n_gpus", 8, named=False, matrix=False)

In the above example, n_resources_dict is added as 3D variable with dimensions px, py and pz and assigned the values 2, 2, and 1 respectively. groups and n_gpus are scalar variables with values 16 and 8 respectively. If named is set to True, unexpanded variable name (individual dimension names for multi-dimensional variables) is appended to the experiment name in ramble.yaml

Every multi-dimensional experiment variable is defined as a zip in the ramble.yaml. If matrixed is set to True, the variable (or the zip iin case of a multi-dimensional variable) is declared as a matrix in ramble.yaml. The generated ramble.yaml for the above example would be look like:

experiments:
  amg2023_{px}_{py}_{pz}_{groups}:
    ...
    variables:
        px: 2
        py: 2
        pz: 2
        groups: 16
        n_gpus: 8
    zips:
      n_resources_dict:
      - px
      - py
      - pz
    matrix:
      - n_resources_dict
      - groups

A variable also can be assigned a list of values, each individual value corresponding to a single experiment. Refer to the Ramble documentation for a detailed explanation of zip and matrix.

compute_package_section

In compute_package_section add the benchmark’s package spec. Required packages for the benchmark should be defined in the package.py. ADDITIONAL_SPECS should be specifications that the exeperiment always uses, such as +mpi, e.g. amg2023@{app_version} +mpi.

def compute_package_section(self):
    app_version = self.spec.variants["version"][0]
    self.add_package_spec(self.name, [f"BENCHMARK@{app_version} [ADDITIONAL_SPECS]"])

Variants

Variants of the experiment can be added to utilize different ProgrammingModels used for on-node parallelization, e.g., benchpark/experiments/amg2023/experiment.py can be updated to inherit from different experiments to , which can be set to cuda for an experiment using CUDA (on an NVIDIA GPU), or openmp for an experiment using OpenMP (on a CPU).:

class Amg2023(
  Experiment,
  OpenMPExperiment,
  CudaExperiment,
  ROCmExperiment,
  Scaling(ScalingMode.Strong, ScalingMode.Weak, ScalingMode.Throughput),
  Caliper,
):

Multiple types of experiments can be created using variants as well (e.g., strong scaling, weak scaling). See AMG2023 or Kripke for examples. When implementing scaling, the following variants are available to the experiment

  • scaling defines the scaling mode e.g. strong, weak and throughput

  • scaling-factor defines the factor by which a variable should be scaled

  • scaling-iterations defines the number of scaling experiments to be generated

Once an experiment class has been written, an experiment is initialized with the following command, with any boolean variants with +/~ or string variants defined in your experiment.py passed in as key-value pairs: ``benchpark experiment init –dest {path/to/dest} {benchmark_name} +/~{boolean variant} {string variant}={value} ``

For example, to run the AMG2023 strong scaling experiment for problem 1, using CUDA the command would be: benchpark experiment init --dest amg2023_experiment amg2023 +cuda workload=problem1 scaling=strong scaling-factor=2 scaling-iterations=4

Initializing an experiment generates the following yaml files:

  • ramble.yaml defines the Ramble specs for building, running, analyzing and archiving experiments.

  • execution_template.tpl serves as a template for the final experiment script that will be concretized and executed.

A detailed description of Ramble configuration files is available at Ramble workspace_config.

For more advanced usage, such as customizing hardware allocation or performance profiling see Benchpark Modifiers.

register_scaling_config

For each scaling mode supported by an application, the register_scaling_config method must define the scaled variables and their corresponding scaling function. The input to register_scaling_config is a dictionary of the form:

{
    ScalingMode.Strong: {
        "v1": strong_scaling_function1,
        "v2": strong_scaling_function2,
        ...
    },
    ScalingMode.Weak: {
        "v1": weak_scaling_function1,
        "v2": weak_scaling_function2,
        ...
    },
    ...
}

Scaled variables can be multi-dimensional or one-dimensional. All multi-dimensional variables in a scaling mode must have the same dimensionality. The scaling function for each variable takes the form:

def scaling_function(var, i, dim, sf):
   # scale var[dim] for the i-th experiment
   scaled_val = ...
   return scaled_val

where,

  • var is the benchpark.Variable instance corresponding to the scaled variable

  • i is the i-th experiment in the specified number of scaling-iterations

  • dim is the current dimension that is being scaled (in any given experiment iteration the same dimension of each variable is scaled)

  • sf is the value by which the variable must be scaled, as specified by scaling-factor

In the list of variables defined for each scaling mode, scaling starts from the dimension that has the minimum value for the first variable and proceeds through the dimensions in a round-robin manner till the specified number of experiments are generated e.g. if the scaling config is defined as:

register_scaling_config ({
    ScalingMode.Strong: {
        "n_resources_dict": lambda var, i, dim, sf: var.val(dim) * sf,
        "process_problem_size_dict": lambda var, i, dim, sf: var.val(dim) * sf,
    }
})

and the initial values of the variables are:

"n_resources_dict" : {
    "px": 2, # dim 0
    "py": 2, # dim 1
    "pz": 1, # dim 2
},
"process_problem_size_dict" : {
    "nx": 16, # dim 0
    "ny": 32, # dim 1
    "nz": 32, # dim 2
},

then after 4 scaling iterations (i.e. 3 scalings), the final values of the scaled variables will be:

"n_resources_dict" : {
    "px": [2, 2, 4, 4]
    "py": [2, 2, 2, 4]
    "pz": [1, 2, 2, 2]
},
"process_problem_size_dict" : {
    "nx": [16, 16, 32, 32]
    "ny": [32, 32, 32, 64]
    "nz": [32, 64, 64, 64]
},

Note that scaling starts from the minimum value dimension (pz) of the first variable (n_resources_dict) and proceeds in a round-robin manner through the other dimensions. See AMG2023 or Kripke for examples of different scaling configurations.

Validating the Benchmark/Experiment

To manually validate your new experiments work, you should initialize an existing system, and run your experiments. For example if you just created a benchmark baz with OpenMP and strong scaling variants it may look like this::

benchpark system init --dest=genericx86-system genericx86
benchpark experiment init --dest=baz-benchmark baz +openmp +strong ~single_node
benchpark setup ./baz-benchmark ./x86 workspace/

When this is complete you have successfully completed the Setting Up a Benchpark Workspace step and can run and analyze following the Benchpark output or following steps in Building an Experiment in Benchpark.