`nhra_gt.domain.ihacpa_api`¶

IHACPA API Client and Data Ingestion.

Handles fetching and parsing National Efficient Price (NEP) series data.

Classes¶

`IHACPAClient` ¶

Simulated client for IHACPA data. Actually parses local NWAU calculators (Excel .xlsb) to extract NEP values.

Source code in src/nhra_gt/domain/ihacpa_api.py

class IHACPAClient:
    """
    Simulated client for IHACPA data.
    Actually parses local NWAU calculators (Excel .xlsb) to extract NEP values.
    """

    def __init__(self, raw_dir: Path | str = "data/raw"):
        self.raw_dir = Path(raw_dir)

    def extract_nep_from_file(self, file_path: Path) -> float | None:
        """Parses a single .xlsb file to find the NEP value."""
        if open_workbook is None:
            logger.warning("pyxlsb is not installed; cannot parse %s", file_path)
            return None
        try:
            with open_workbook(str(file_path)) as wb:
                if "Formula breakdown" not in wb.sheets:
                    return None

                with wb.get_sheet("Formula breakdown") as sheet:
                    for row in sheet.rows():
                        # Based on inspection, NEP label is followed by value
                        # Usually row 14, col 9 is 'NEP', col 10 is value
                        # We search for 'NEP' string then take next cell
                        row_vals = [cell.v for cell in row]
                        for i, val in enumerate(row_vals):
                            if val == "NEP" and i + 1 < len(row_vals):
                                nep_val = row_vals[i + 1]
                                if isinstance(nep_val, int | float):
                                    return float(nep_val)
        except Exception as e:
            logger.error(f"Error parsing {file_path}: {e}")
        return None

    def fetch_nep_series(self) -> pd.DataFrame:
        """
        Scans raw_dir for calculators and builds the NEP series.

        Returns:
            DataFrame with columns ['year', 'nep_per_nwau'].
        """
        # Historical hardcoded values as baseline (pre-2025)
        # Sourced from Determination documents
        data = {
            2011: 4808.0,
            2012: 4808.0,
            2013: 4993.0,
            2014: 5007.0,
            2015: 4971.0,
            2016: 4883.0,
            2017: 4933.07,
            2018: 5012.0,
            2019: 5134.0,
            2020: 5320.0,
            2021: 5597.0,
            2022: 5797.0,
            2023: 6032.0,
            2024: 6465.0,
            2025: 7258.0,
        }

        # Scan for newer files (e.g. nwau26...)
        pattern = re.compile(r"nwau(\d{2})_calculator")
        for file in self.raw_dir.glob("*.xlsb"):
            match = pattern.search(file.name)
            if match:
                yy = int(match.group(1))
                year = 2000 + yy
                # If we don't have it or want to verify
                nep = self.extract_nep_from_file(file)
                if nep:
                    logger.info(f"Extracted NEP {nep} for year {year} from {file.name}")
                    data[year] = nep

        df = pd.DataFrame(list(data.items()), columns=["year", "nep_per_nwau"])
        return df.sort_values("year")

Functions¶

`extract_nep_from_file(file_path)` ¶

Parses a single .xlsb file to find the NEP value.

Source code in src/nhra_gt/domain/ihacpa_api.py

def extract_nep_from_file(self, file_path: Path) -> float | None:
    """Parses a single .xlsb file to find the NEP value."""
    if open_workbook is None:
        logger.warning("pyxlsb is not installed; cannot parse %s", file_path)
        return None
    try:
        with open_workbook(str(file_path)) as wb:
            if "Formula breakdown" not in wb.sheets:
                return None

            with wb.get_sheet("Formula breakdown") as sheet:
                for row in sheet.rows():
                    # Based on inspection, NEP label is followed by value
                    # Usually row 14, col 9 is 'NEP', col 10 is value
                    # We search for 'NEP' string then take next cell
                    row_vals = [cell.v for cell in row]
                    for i, val in enumerate(row_vals):
                        if val == "NEP" and i + 1 < len(row_vals):
                            nep_val = row_vals[i + 1]
                            if isinstance(nep_val, int | float):
                                return float(nep_val)
    except Exception as e:
        logger.error(f"Error parsing {file_path}: {e}")
    return None

`fetch_nep_series()` ¶

Scans raw_dir for calculators and builds the NEP series.

Returns:

Type	Description
`DataFrame`	DataFrame with columns ['year', 'nep_per_nwau'].

Source code in src/nhra_gt/domain/ihacpa_api.py

def fetch_nep_series(self) -> pd.DataFrame:
    """
    Scans raw_dir for calculators and builds the NEP series.

    Returns:
        DataFrame with columns ['year', 'nep_per_nwau'].
    """
    # Historical hardcoded values as baseline (pre-2025)
    # Sourced from Determination documents
    data = {
        2011: 4808.0,
        2012: 4808.0,
        2013: 4993.0,
        2014: 5007.0,
        2015: 4971.0,
        2016: 4883.0,
        2017: 4933.07,
        2018: 5012.0,
        2019: 5134.0,
        2020: 5320.0,
        2021: 5597.0,
        2022: 5797.0,
        2023: 6032.0,
        2024: 6465.0,
        2025: 7258.0,
    }

    # Scan for newer files (e.g. nwau26...)
    pattern = re.compile(r"nwau(\d{2})_calculator")
    for file in self.raw_dir.glob("*.xlsb"):
        match = pattern.search(file.name)
        if match:
            yy = int(match.group(1))
            year = 2000 + yy
            # If we don't have it or want to verify
            nep = self.extract_nep_from_file(file)
            if nep:
                logger.info(f"Extracted NEP {nep} for year {year} from {file.name}")
                data[year] = nep

    df = pd.DataFrame(list(data.items()), columns=["year", "nep_per_nwau"])
    return df.sort_values("year")

nhra_gt.domain.ihacpa_api¶

Classes¶

IHACPAClient ¶

Functions¶

extract_nep_from_file(file_path) ¶

fetch_nep_series() ¶

`nhra_gt.domain.ihacpa_api`¶

`IHACPAClient` ¶

`extract_nep_from_file(file_path)` ¶

`fetch_nep_series()` ¶