Source code for skim.utils.misc

#!/usr/bin/env python3

##########################################################################
# basf2 (Belle II Analysis Software Framework)                           #
# Author: The Belle II Collaboration                                     #
#                                                                        #
# See git log for contributors and copyright holders.                    #
# This file is licensed under LGPL-3.0, see LICENSE.md.                  #
##########################################################################

"""
Miscellaneous utility functions for skim experts.
"""

import subprocess
import json
import re
from pathlib import Path

from skim.registry import Registry



[docs]
def get_file_metadata(filename):
    """
    Retrieve the metadata for a file using ``b2file-metadata-show``.

    Parameters:
       metadata (str): File to get number of events from.

    Returns:
        dict: Metadata of file in dict format.
    """
    if not Path(filename).exists():
        raise FileNotFoundError(f"Could not find file {filename}")

    proc = subprocess.run(
        ["b2file-metadata-show", "--json", str(filename)],
        stdout=subprocess.PIPE,
        check=True,
    )
    metadata = json.loads(proc.stdout.decode("utf-8"))
    return metadata




[docs]
def get_eventN(filename):
    """
    Retrieve the number of events in a file using ``b2file-metadata-show``.

    Parameters:
       filename (str): File to get number of events from.

    Returns:
        int: Number of events in the file.
    """
    return int(get_file_metadata(filename)["nEvents"])




[docs]
def resolve_skim_modules(SkimsOrModules, *, LocalModule=None):
    """
    Produce an ordered list of skims, by expanding any Python skim module names into a
    list of skims in that module. Also produce a dict of skims grouped by Python module.

    Raises:
        RuntimeError: Raised if a skim is listed twice.
        ValueError: Raised if ``LocalModule`` is passed and skims are normally expected
            from more than one module.
    """
    skims = []

    for name in SkimsOrModules:
        if name in Registry.names:
            skims.append(name)
        elif name in Registry.modules:
            skims.extend(Registry.get_skims_in_module(name))

    duplicates = {skim for skim in skims if skims.count(skim) > 1}
    if duplicates:
        raise RuntimeError(
            f"Skim{'s'*(len(duplicates)>1)} requested more than once: {', '.join(duplicates)}"
        )

    modules = sorted({Registry.get_skim_module(skim) for skim in skims})
    if LocalModule:
        if len(modules) > 1:
            raise ValueError(
                f"Local module {LocalModule} specified, but the combined skim expects "
                "skims from more than one module. No steering file written."
            )
        modules = {LocalModule.rstrip(".py"): sorted(skims)}
    else:
        modules = {
            module: sorted(
                [skim for skim in skims if Registry.get_skim_module(skim) == module]
            )
            for module in modules
        }

    return skims, modules



class _hashable_list(list):
    def __hash__(self):
        return hash(tuple(self))


def _sphinxify_decay(decay_string):
    """Format the given decay string by using LaTeX commands instead of plain-text.
    Output is formatted for use with Sphinx (ReStructured Text).

    This is a utility function for autogenerating skim documentation.

    Parameters:
        decay_string (str): A decay descriptor.

    Returns:
        sphinxed_string (str): LaTeX version of the decay descriptor.
    """

    decay_string = re.sub("^(B.):generic", "\\1_{\\\\text{had}}", decay_string)
    decay_string = decay_string.replace(":generic", "")
    decay_string = decay_string.replace(":semileptonic", "_{\\text{SL}}")
    decay_string = decay_string.replace(":FSP", "_{FSP}")
    decay_string = decay_string.replace(":V0", "_{V0}")
    decay_string = re.sub("_[0-9]+", "", decay_string)
    # Note: these are applied from top to bottom, so if you have
    # both B0 and anti-B0, put anti-B0 first.
    substitutes = [
        ("==>", "\\to"),
        ("->", "\\to"),
        ("gamma", "\\gamma"),
        ("p+", "p"),
        ("anti-p-", "\\bar{p}"),
        ("pi+", "\\pi^+"),
        ("pi-", "\\pi^-"),
        ("pi0", "\\pi^0"),
        ("K_S0", "K^0_S"),
        ("K_L0", "K^0_L"),
        ("mu+", "\\mu^+"),
        ("mu-", "\\mu^-"),
        ("tau+", "\\tau^+"),
        ("tau-", "\\tau^-"),
        ("nu", "\\nu"),
        ("K+", "K^+"),
        ("K-", "K^-"),
        ("e+", "e^+"),
        ("e-", "e^-"),
        ("J/psi", "J/\\psi"),
        ("anti-Lambda_c-", "\\Lambda^{-}_{c}"),
        ("anti-Sigma+", "\\overline{\\Sigma}^{+}"),
        ("anti-Lambda0", "\\overline{\\Lambda}^{0}"),
        ("anti-D0*", "\\overline{D}^{0*}"),
        ("anti-D*0", "\\overline{D}^{0*}"),
        ("anti-D0", "\\overline{D}^0"),
        ("anti-B0", "\\overline{B}^0"),
        ("Sigma+", "\\Sigma^{+}"),
        ("Lambda_c+", "\\Lambda^{+}_{c}"),
        ("Lambda0", "\\Lambda^{0}"),
        ("D+", "D^+"),
        ("D-", "D^-"),
        ("D0", "D^0"),
        ("D*+", "D^{+*}"),
        ("D*-", "D^{-*}"),
        ("D*0", "D^{0*}"),
        ("D_s+", "D^+_s"),
        ("D_s-", "D^-_s"),
        ("D_s*+", "D^{+*}_s"),
        ("D_s*-", "D^{-*}_s"),
        ("B+", "B^+"),
        ("B-", "B^-"),
        ("B0", "B^0"),
        ("B_s0", "B^0_s"),
        ("K*0", "K^{0*}"),
    ]
    tex_string = decay_string
    for (key, value) in substitutes:
        tex_string = tex_string.replace(key, value)
    return f":math:`{tex_string}`"



[docs]
def fancy_skim_header(SkimClass):
    """Decorator to generate a fancy header to skim documentation and prepend it to the
    docstring. Add this just above the definition of a skim.

    Also ensures the documentation of the template functions like `BaseSkim.build_lists`
    is not repeated in every skim documentation.

    .. code-block:: python

        @fancy_skim_header
        class MySkimName(BaseSkim):
            # docstring here describing your skim, and explaining cuts.
    """
    SkimName = SkimClass.__name__
    SkimCode = Registry.encode_skim_name(SkimName)
    authors = SkimClass.__authors__ or ["(no authors listed)"]
    description = SkimClass.__description__ or "(no description)"
    contact = SkimClass.__contact__ or "(no contact listed)"
    category = SkimClass.__category__ or "(no category listed)"

    if isinstance(authors, str):
        # If we were given a string, split it up at: commas, "and", "&", and newlines
        authors = re.split(
            r",\s+and\s+|\s+and\s+|,\s+&\s+|\s+&\s+|,\s+|\s*\n\s*", authors
        )
        # Strip any remaining whitespace either side of an author's name
        authors = [re.sub(r"^\s+|\s+$", "", author) for author in authors]

    if isinstance(category, list):
        category = ", ".join(category)

    # If multiple contacts were given, split them up:
    contacts = re.split(r",\s+and\s+|\s+and\s+|,\s+&\s+|\s+&\s+|,\s+|\s*\n\s*", contact)
    # If the contact is of the form "NAME <EMAIL>" or "NAME (EMAIL)", then make it a link
    matches = [re.match("([^<>()`]+) [<(]([^<>()`]+@[^<>()`]+)[>)]", contact) for contact in contacts]
    for i, match in enumerate(matches):
        if match:
            name, email = match[1], match[2]
            contacts[i] = f"`{name} <mailto:{email}>`_"
        else:
            contacts[i] = contacts[i]

    header = f"""
    Note:
        * **Skim description**: {description}
        * **Skim name**: {SkimName}
        * **Skim LFN code**: {SkimCode}
        * **Category**: {category}
        * **Author{"s"*(len(authors) > 1)}**: {", ".join(authors)}
        * **Contact{"s"*(len(contacts) > 1)}**: {", ".join(contacts)}
    """

    if SkimClass.ApplyHLTHadronCut:
        HLTLine = "*This skim includes a selection on the HLT flag* ``hlt_hadron``."
        header = f"{header.rstrip()}\n\n        {HLTLine}\n"

    if SkimClass.__doc__:
        SkimClass.__doc__ = header + "\n\n" + SkimClass.__doc__.lstrip("\n")
    else:
        # Handle case where docstring is empty, or was not redefined
        SkimClass.__doc__ = header

    # If documentation of template functions not redefined, make sure BaseSkim docstring is not repeated
    SkimClass.load_standard_lists.__doc__ = SkimClass.load_standard_lists.__doc__ or ""
    SkimClass.build_lists.__doc__ = SkimClass.build_lists.__doc__ or ""
    SkimClass.validation_histograms.__doc__ = (
        SkimClass.validation_histograms.__doc__ or ""
    )
    SkimClass.additional_setup.__doc__ = SkimClass.additional_setup.__doc__ or ""

    return SkimClass




[docs]
def dry_run_steering_file(SteeringFile):
    """
    Check if the steering file at the given path can be run with the "--dry-run" option.
    """
    proc = subprocess.run(
        ["basf2", "--dry-run", "-i", "i.root", "-o", "o.root", str(SteeringFile)],
        stderr=subprocess.PIPE,
        stdout=subprocess.PIPE,
    )

    if proc.returncode != 0:
        stdout = proc.stdout.decode("utf-8")
        stderr = proc.stderr.decode("utf-8")

        raise RuntimeError(
            f"An error occurred while dry-running steering file {SteeringFile}\n"
            f"Script output:\n{stdout}\n{stderr}"
        )