Codebase Fingerprinting for Auditability.
Extracts structural signatures from Python files to detect drift or lost features.
Classes
ClassInfo
Bases: TypedDict
Signature of a Python class, including its methods and bases.
Source code in src/nhra_gt/audit/fingerprint.py
| class ClassInfo(TypedDict):
"""Signature of a Python class, including its methods and bases."""
methods: list[str]
bases: list[str]
|
Fingerprint
Bases: TypedDict
Complete structural signature of a Python file.
Source code in src/nhra_gt/audit/fingerprint.py
| class Fingerprint(TypedDict):
"""Complete structural signature of a Python file."""
constants: dict[str, Any]
functions: dict[str, list[str]]
classes: dict[str, ClassInfo]
|
Functions
Extract logic fingerprint from Python code using AST.
Source code in src/nhra_gt/audit/fingerprint.py
| def extract_fingerprint(code: str) -> Fingerprint:
"""Extract logic fingerprint from Python code using AST."""
fingerprint: Fingerprint = {
"constants": {},
"functions": {},
"classes": {},
}
try:
tree = ast.parse(code)
except SyntaxError:
# Handling legacy syntax or encoding issues gracefully
return fingerprint
for _node in ast.iter_fields(tree):
# We only look at top-level body
pass
# Better to just iterate over body
for node in tree.body:
if isinstance(node, ast.Assign):
# Extract constants
# We assume constants are uppercase assignments
for target in node.targets:
if isinstance(target, ast.Name):
# Attempt to get literal value
value = _get_literal_value(node.value)
if value is not None:
fingerprint["constants"][target.id] = value
elif isinstance(node, ast.FunctionDef):
args = [arg.arg for arg in node.args.args]
fingerprint["functions"][node.name] = args
elif isinstance(node, ast.ClassDef):
methods = []
bases = [base.id for base in node.bases if isinstance(base, ast.Name)]
for item in node.body:
if isinstance(item, ast.FunctionDef):
methods.append(item.name)
fingerprint["classes"][node.name] = {"methods": methods, "bases": bases}
return fingerprint
|
fingerprint_zip(zip_path)
Fingerprint all Python files in a zip archive.
Source code in src/nhra_gt/audit/fingerprint.py
| def fingerprint_zip(zip_path: Path) -> dict[str, Fingerprint]:
"""Fingerprint all Python files in a zip archive."""
results = {}
try:
with zipfile.ZipFile(zip_path, "r") as zf:
for name in zf.namelist():
if name.endswith(".py"):
try:
content = zf.read(name).decode("utf-8")
results[name] = extract_fingerprint(content)
except Exception:
# Skip files we can't read/parse
continue # nosec
except Exception:
pass # nosec
return results
|