Skip to content

API Reference: Utils

Device detection

auditml.utils.device

Device detection and management for AuditML.

get_device(preference: str = 'auto') -> torch.device

Return the best available torch.device.

Parameters:

Name Type Description Default
preference str

"auto" (default) picks CUDA > MPS > CPU. "cuda", "mps", or "cpu" force a specific backend.

'auto'

Returns:

Type Description
device
Source code in src/auditml/utils/device.py
def get_device(preference: str = "auto") -> torch.device:
    """Return the best available ``torch.device``.

    Parameters
    ----------
    preference:
        ``"auto"`` (default) picks CUDA > MPS > CPU.
        ``"cuda"``, ``"mps"``, or ``"cpu"`` force a specific backend.

    Returns
    -------
    torch.device
    """
    if preference == "auto":
        if torch.cuda.is_available():
            return torch.device("cuda")
        if hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
            return torch.device("mps")
        return torch.device("cpu")

    device = torch.device(preference)
    if preference == "cuda" and not torch.cuda.is_available():
        print("Warning: CUDA requested but not available, falling back to CPU.")
        return torch.device("cpu")
    if preference == "mps" and not torch.backends.mps.is_available():
        print("Warning: MPS requested but not available, falling back to CPU.")
        return torch.device("cpu")
    return device

device_info() -> dict[str, str | bool]

Return a dict of device/hardware information.

Source code in src/auditml/utils/device.py
def device_info() -> dict[str, str | bool]:
    """Return a dict of device/hardware information."""
    info: dict[str, str | bool] = {
        "pytorch_version": torch.__version__,
        "cuda_available": torch.cuda.is_available(),
    }
    if torch.cuda.is_available():
        info["cuda_version"] = torch.version.cuda or "unknown"
        info["gpu_name"] = torch.cuda.get_device_name(0)
        mem = torch.cuda.get_device_properties(0).total_memory
        info["gpu_memory_gb"] = f"{mem / (1024 ** 3):.1f}"
    info["mps_available"] = (
        hasattr(torch.backends, "mps") and torch.backends.mps.is_available()
    )
    return info

Rust acceleration

auditml.utils.rust_accel

Rust acceleration helpers with transparent NumPy fallback.

AuditML ships an optional Rust extension (auditml_rust) that provides 10-15x speedups for two compute-intensive operations:

  • find_best_threshold — scans all possible thresholds to find the one that maximises binary classification accuracy (used by ThresholdMIA).
  • compute_ssim / batch_ssim — Structural Similarity Index between pixel arrays (used by ModelInversion to measure reconstruction quality).

If the Rust extension is not available (e.g. fresh clone without building), this module falls back to equivalent NumPy implementations automatically. No code outside this file needs to change.

Build the Rust extension
cd rust/
maturin build --release --out ../dist
pip install ../dist/auditml_rust-*.whl

Or for development (rebuilds on every change): maturin develop --release # must run from rust/ directory

rust_status() -> str

Return a human-readable string indicating whether Rust acceleration is active.

Source code in src/auditml/utils/rust_accel.py
def rust_status() -> str:
    """Return a human-readable string indicating whether Rust acceleration is active."""
    if RUST_AVAILABLE:
        return "Rust acceleration: ENABLED (auditml_rust)"
    return "Rust acceleration: DISABLED (using NumPy fallback)"

find_best_threshold(scores: np.ndarray, labels: np.ndarray) -> tuple[float, float]

Find the threshold on scores that maximises binary accuracy.

Parameters:

Name Type Description Default
scores ndarray

1-D float array of per-sample confidence scores.

required
labels ndarray

1-D int array of ground-truth binary labels (1=member, 0=non-member).

required

Returns:

Type Description
(best_threshold, best_accuracy)
Source code in src/auditml/utils/rust_accel.py
def find_best_threshold(
    scores: np.ndarray,
    labels: np.ndarray,
) -> tuple[float, float]:
    """Find the threshold on *scores* that maximises binary accuracy.

    Parameters
    ----------
    scores:
        1-D float array of per-sample confidence scores.
    labels:
        1-D int array of ground-truth binary labels (1=member, 0=non-member).

    Returns
    -------
    (best_threshold, best_accuracy)
    """
    if RUST_AVAILABLE:
        return _rust.find_best_threshold(
            scores.astype(float).tolist(),
            labels.astype(int).tolist(),
        )
    return _find_best_threshold_numpy(scores, labels)

compute_ssim(img_a: np.ndarray, img_b: np.ndarray) -> float

Compute SSIM between two images (flat float arrays, values in [0,1]).

Parameters:

Name Type Description Default
img_a ndarray

Reference image, flattened to 1-D, pixel values in [0, 1].

required
img_b ndarray

Reconstructed image, same shape as img_a.

required

Returns:

Type Description
float

SSIM in [-1, 1]. 1.0 = identical.

Source code in src/auditml/utils/rust_accel.py
def compute_ssim(img_a: np.ndarray, img_b: np.ndarray) -> float:
    """Compute SSIM between two images (flat float arrays, values in [0,1]).

    Parameters
    ----------
    img_a:
        Reference image, flattened to 1-D, pixel values in [0, 1].
    img_b:
        Reconstructed image, same shape as img_a.

    Returns
    -------
    float
        SSIM in [-1, 1]. 1.0 = identical.
    """
    if RUST_AVAILABLE:
        return float(_rust.compute_ssim(
            img_a.astype(float).flatten().tolist(),
            img_b.astype(float).flatten().tolist(),
        ))
    return _compute_ssim_numpy(img_a.flatten(), img_b.flatten())

batch_ssim(originals: np.ndarray | list[np.ndarray], reconstructed: np.ndarray | list[np.ndarray]) -> list[float]

Compute SSIM for a batch of (reference, reconstructed) pairs.

Parameters:

Name Type Description Default
originals ndarray | list[ndarray]

List of reference images (each a flat float array).

required
reconstructed ndarray | list[ndarray]

List of reconstructed images (same length as originals).

required

Returns:

Type Description
list[float]

SSIM score per pair.

Source code in src/auditml/utils/rust_accel.py
def batch_ssim(
    originals: np.ndarray | list[np.ndarray],
    reconstructed: np.ndarray | list[np.ndarray],
) -> list[float]:
    """Compute SSIM for a batch of (reference, reconstructed) pairs.

    Parameters
    ----------
    originals:
        List of reference images (each a flat float array).
    reconstructed:
        List of reconstructed images (same length as originals).

    Returns
    -------
    list[float]
        SSIM score per pair.
    """
    if RUST_AVAILABLE:
        return _rust.batch_ssim(
            [np.asarray(a).astype(float).flatten().tolist() for a in originals],
            [np.asarray(b).astype(float).flatten().tolist() for b in reconstructed],
        )
    return [
        _compute_ssim_numpy(
            np.asarray(a).flatten(),
            np.asarray(b).flatten(),
        )
        for a, b in zip(originals, reconstructed)
    ]

Reproducibility

auditml.utils.reproducibility

Reproducibility utilities for AuditML.

Call set_seed before any training or data splitting to ensure deterministic, reproducible results across runs.

set_seed(seed: int = 42) -> None

Set random seeds for full reproducibility.

Configures Python, NumPy, and PyTorch (CPU + CUDA) random number generators and enables deterministic cuDNN behaviour.

Parameters:

Name Type Description Default
seed int

Integer seed value.

42
Source code in src/auditml/utils/reproducibility.py
def set_seed(seed: int = 42) -> None:
    """Set random seeds for full reproducibility.

    Configures Python, NumPy, and PyTorch (CPU + CUDA) random number
    generators and enables deterministic cuDNN behaviour.

    Parameters
    ----------
    seed:
        Integer seed value.
    """
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
    # Deterministic algorithms (may reduce performance slightly)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    os.environ["PYTHONHASHSEED"] = str(seed)

Experiment utilities

auditml.utils.experiment

Experiment tracking for AuditML.

ExperimentLogger manages the output directory for a single experiment run, saving configs, metrics, and system information in a structured layout::

results/<experiment_name>/
├── config.yaml
├── system_info.json
├── metrics.csv
└── logs/
    └── <experiment_name>.log

ExperimentLogger

Manages artefacts for a single experiment run.

Parameters:

Name Type Description Default
experiment_name str

Human-readable experiment identifier.

'audit'
output_dir str | Path

Root directory under which the experiment folder is created.

'./results'
Source code in src/auditml/utils/experiment.py
class ExperimentLogger:
    """Manages artefacts for a single experiment run.

    Parameters
    ----------
    experiment_name:
        Human-readable experiment identifier.
    output_dir:
        Root directory under which the experiment folder is created.
    """

    def __init__(
        self,
        experiment_name: str = "audit",
        output_dir: str | Path = "./results",
    ) -> None:
        self.experiment_name = experiment_name
        self.run_dir = Path(output_dir) / experiment_name
        self.run_dir.mkdir(parents=True, exist_ok=True)

        self.log_dir = self.run_dir / "logs"
        self.logger = setup_logging(
            log_dir=self.log_dir,
            experiment_name=experiment_name,
        )

        self._metrics_rows: list[dict[str, Any]] = []

    # ── config ───────────────────────────────────────────────────────────

    def log_config(self, config: dict[str, Any]) -> None:
        """Write the experiment config to ``config.yaml``."""
        path = self.run_dir / "config.yaml"
        path.write_text(yaml.dump(config, default_flow_style=False, sort_keys=False))
        self.logger.info("Config saved to %s", path)

    # ── system info ──────────────────────────────────────────────────────

    def log_system_info(self) -> dict[str, Any]:
        """Capture and save system/hardware information."""
        info: dict[str, Any] = {
            "python_version": sys.version,
            "platform": platform.platform(),
            "hostname": platform.node(),
        }
        if _TORCH_AVAILABLE:
            info["pytorch_version"] = torch.__version__
            info["cuda_available"] = torch.cuda.is_available()
            if torch.cuda.is_available():
                info["cuda_version"] = torch.version.cuda
                info["gpu_name"] = torch.cuda.get_device_name(0)

        path = self.run_dir / "system_info.json"
        path.write_text(json.dumps(info, indent=2))
        self.logger.info("System info: %s", json.dumps(info))
        return info

    # ── metrics ──────────────────────────────────────────────────────────

    def log_metrics(self, metrics: dict[str, Any], step: int | None = None) -> None:
        """Record a metrics snapshot.

        Logged to both the Python logger and an in-memory list that can
        be flushed to CSV with ``save_metrics``.
        """
        row = dict(metrics)
        if step is not None:
            row["step"] = step
        self._metrics_rows.append(row)
        self.logger.info("Metrics (step=%s): %s", step, metrics)

    def save_metrics(self) -> Path:
        """Flush accumulated metrics to ``metrics.csv``."""
        import pandas as pd

        path = self.run_dir / "metrics.csv"
        df = pd.DataFrame(self._metrics_rows)
        df.to_csv(path, index=False)
        self.logger.info("Metrics saved to %s (%d rows)", path, len(df))
        return path

    # ── convenience ──────────────────────────────────────────────────────

    def log_model_summary(self, model: Any) -> None:
        """Log architecture and parameter counts."""
        from auditml.models.base import count_parameters

        counts = count_parameters(model)
        self.logger.info(
            "Model: %s | Params: %s total, %s trainable",
            model.__class__.__name__,
            f"{counts['total']:,}",
            f"{counts['trainable']:,}",
        )

    def info(self, msg: str, *args: Any) -> None:
        """Shortcut to ``self.logger.info``."""
        self.logger.info(msg, *args)

log_config(config: dict[str, Any]) -> None

Write the experiment config to config.yaml.

Source code in src/auditml/utils/experiment.py
def log_config(self, config: dict[str, Any]) -> None:
    """Write the experiment config to ``config.yaml``."""
    path = self.run_dir / "config.yaml"
    path.write_text(yaml.dump(config, default_flow_style=False, sort_keys=False))
    self.logger.info("Config saved to %s", path)

log_system_info() -> dict[str, Any]

Capture and save system/hardware information.

Source code in src/auditml/utils/experiment.py
def log_system_info(self) -> dict[str, Any]:
    """Capture and save system/hardware information."""
    info: dict[str, Any] = {
        "python_version": sys.version,
        "platform": platform.platform(),
        "hostname": platform.node(),
    }
    if _TORCH_AVAILABLE:
        info["pytorch_version"] = torch.__version__
        info["cuda_available"] = torch.cuda.is_available()
        if torch.cuda.is_available():
            info["cuda_version"] = torch.version.cuda
            info["gpu_name"] = torch.cuda.get_device_name(0)

    path = self.run_dir / "system_info.json"
    path.write_text(json.dumps(info, indent=2))
    self.logger.info("System info: %s", json.dumps(info))
    return info

log_metrics(metrics: dict[str, Any], step: int | None = None) -> None

Record a metrics snapshot.

Logged to both the Python logger and an in-memory list that can be flushed to CSV with save_metrics.

Source code in src/auditml/utils/experiment.py
def log_metrics(self, metrics: dict[str, Any], step: int | None = None) -> None:
    """Record a metrics snapshot.

    Logged to both the Python logger and an in-memory list that can
    be flushed to CSV with ``save_metrics``.
    """
    row = dict(metrics)
    if step is not None:
        row["step"] = step
    self._metrics_rows.append(row)
    self.logger.info("Metrics (step=%s): %s", step, metrics)

save_metrics() -> Path

Flush accumulated metrics to metrics.csv.

Source code in src/auditml/utils/experiment.py
def save_metrics(self) -> Path:
    """Flush accumulated metrics to ``metrics.csv``."""
    import pandas as pd

    path = self.run_dir / "metrics.csv"
    df = pd.DataFrame(self._metrics_rows)
    df.to_csv(path, index=False)
    self.logger.info("Metrics saved to %s (%d rows)", path, len(df))
    return path

log_model_summary(model: Any) -> None

Log architecture and parameter counts.

Source code in src/auditml/utils/experiment.py
def log_model_summary(self, model: Any) -> None:
    """Log architecture and parameter counts."""
    from auditml.models.base import count_parameters

    counts = count_parameters(model)
    self.logger.info(
        "Model: %s | Params: %s total, %s trainable",
        model.__class__.__name__,
        f"{counts['total']:,}",
        f"{counts['trainable']:,}",
    )

info(msg: str, *args: Any) -> None

Shortcut to self.logger.info.

Source code in src/auditml/utils/experiment.py
def info(self, msg: str, *args: Any) -> None:
    """Shortcut to ``self.logger.info``."""
    self.logger.info(msg, *args)