Skip to content

nhra_gt.audit.fingerprint

Codebase Fingerprinting for Auditability.

Extracts structural signatures from Python files to detect drift or lost features.

Classes

ClassInfo

Bases: TypedDict

Signature of a Python class, including its methods and bases.

Source code in src/nhra_gt/audit/fingerprint.py
class ClassInfo(TypedDict):
    """Signature of a Python class, including its methods and bases."""

    methods: list[str]
    bases: list[str]

Fingerprint

Bases: TypedDict

Complete structural signature of a Python file.

Source code in src/nhra_gt/audit/fingerprint.py
class Fingerprint(TypedDict):
    """Complete structural signature of a Python file."""

    constants: dict[str, Any]
    functions: dict[str, list[str]]
    classes: dict[str, ClassInfo]

Functions

extract_fingerprint(code)

Extract logic fingerprint from Python code using AST.

Source code in src/nhra_gt/audit/fingerprint.py
def extract_fingerprint(code: str) -> Fingerprint:
    """Extract logic fingerprint from Python code using AST."""
    fingerprint: Fingerprint = {
        "constants": {},
        "functions": {},
        "classes": {},
    }

    try:
        tree = ast.parse(code)
    except SyntaxError:
        # Handling legacy syntax or encoding issues gracefully
        return fingerprint

    for _node in ast.iter_fields(tree):
        # We only look at top-level body
        pass

    # Better to just iterate over body
    for node in tree.body:
        if isinstance(node, ast.Assign):
            # Extract constants
            # We assume constants are uppercase assignments
            for target in node.targets:
                if isinstance(target, ast.Name):
                    # Attempt to get literal value
                    value = _get_literal_value(node.value)
                    if value is not None:
                        fingerprint["constants"][target.id] = value

        elif isinstance(node, ast.FunctionDef):
            args = [arg.arg for arg in node.args.args]
            fingerprint["functions"][node.name] = args

        elif isinstance(node, ast.ClassDef):
            methods = []
            bases = [base.id for base in node.bases if isinstance(base, ast.Name)]
            for item in node.body:
                if isinstance(item, ast.FunctionDef):
                    methods.append(item.name)

            fingerprint["classes"][node.name] = {"methods": methods, "bases": bases}

    return fingerprint

fingerprint_zip(zip_path)

Fingerprint all Python files in a zip archive.

Source code in src/nhra_gt/audit/fingerprint.py
def fingerprint_zip(zip_path: Path) -> dict[str, Fingerprint]:
    """Fingerprint all Python files in a zip archive."""
    results = {}
    try:
        with zipfile.ZipFile(zip_path, "r") as zf:
            for name in zf.namelist():
                if name.endswith(".py"):
                    try:
                        content = zf.read(name).decode("utf-8")
                        results[name] = extract_fingerprint(content)
                    except Exception:
                        # Skip files we can't read/parse
                        continue  # nosec
    except Exception:
        pass  # nosec
    return results