Source code for geolatent.api.decision

"""High-level API: ``visualize_decision_geometry``.

This module exposes the primary entry point for decision-boundary analysis.
The function orchestrates the full pipeline:

1. **Validation** — coerce and validate inputs; surface informative errors early.
2. **Projection** — fit a :class:`~geolatent.core.projector.DimensionalityProjector`
   (PCA by default) on the training data ``X``, reduce to 3 principal components.
3. **Mesh construction** — when PCA is used, build a regular 3-D prediction mesh
   by querying the model on a grid of inverse-transformed points.
4. **Scene assembly** — instantiate :class:`~geolatent.rendering.scene.Scene3D`,
   layer decision surfaces, scatter clouds, centroids, and optional structural
   overlays according to the caller-supplied flags.
5. **Rendering** — apply the dark-scientific theme and return the completed
   ``go.Figure``.

Usage
-----
::

    from geolatent import visualize_decision_geometry
    from sklearn.svm import SVC

    model = SVC(kernel="rbf", probability=True).fit(X_train, y_train)
    fig = visualize_decision_geometry(model, X_train, y_train)
    fig.show()
"""

from __future__ import annotations

import warnings
from typing import Dict, List, Optional

import numpy as np
import plotly.graph_objects as go

from ..config.themes import DARK_SCIENTIFIC, VisualizationConfig
from ..core.geometry import GeometryUtils
from ..core.mesh_builder import MeshBuilder
from ..core.projector import DimensionalityProjector
from ..rendering.overlays import DataOverlay
from ..rendering.scene import Scene3D
from ..rendering.surfaces import DecisionSurfaceRenderer
from ..utils.validators import (
    validate_class_names,
    validate_classification_labels,
    validate_feature_matrix,
    validate_label_vector,
    validate_sklearn_model,
)



[docs]
def visualize_decision_geometry(
    model: object,
    X: np.ndarray,
    y: np.ndarray,
    *,
    config: Optional[VisualizationConfig] = None,
    projection_method: str = "pca",
    predict_fn=None,
    feature_names: Optional[List[str]] = None,
    mesh_resolution: int = 30,
    show_surface: bool = True,
    show_confidence: bool = True,
    show_scatter: bool = True,
    show_centroids: bool = True,
    show_ellipsoids: bool = False,
    show_convex_hulls: bool = False,
    ellipsoid_confidence: float = 0.90,
    class_names: Optional[Dict] = None,
    title: Optional[str] = None,
    batch_size: Optional[int] = None,
) -> go.Figure:
    """Render the decision geometry of a scikit-learn-compatible classifier in 3-D.

    The input feature matrix ``X`` is projected to 3 principal components via
    PCA (or t-SNE / UMAP for pure scatter visualisation).  When PCA is used,
    the model's decision function is evaluated on a regular 3-D grid that is
    inverse-transformed back into the original feature space, producing decision
    boundary isosurfaces anchored to the actual model geometry — not an
    approximation in an arbitrary slice.

    Parameters
    ----------
    model : sklearn-compatible estimator
        Must implement ``predict(X)``.  Also implements ``predict_proba(X)``
        for richer confidence-surface rendering (recommended).
    X : array-like of shape (n_samples, n_features)
        Training feature matrix.  Will be standardised and projected internally.
    y : array-like of shape (n_samples,)
        Class label vector.  Integer or string labels are both supported.
    config : VisualizationConfig, optional
        Custom theme and rendering configuration.  Defaults to
        :data:`~geolatent.config.themes.DARK_SCIENTIFIC`.
    projection_method : {"pca", "tsne", "umap", "sensitivity"}
        Dimensionality-reduction algorithm.  ``"pca"`` and ``"sensitivity"``
        both support decision-surface rendering.  ``"sensitivity"`` uses
        finite-difference Jacobians to find axes the model actually cares about
        and works with any callable (sklearn, PyTorch, XGBoost, etc.).
    predict_fn : callable, optional
        Required when ``projection_method="sensitivity"`` with a non-sklearn
        model.  For sklearn models it is auto-derived from ``model.predict_proba``
        or ``model.predict`` when not supplied.
    feature_names : list of str, optional
        Names of the input features.  Shown on axes and sensitivity labels.
    mesh_resolution : int
        Grid resolution per dimension for the prediction mesh.  Total inference
        calls equal ``mesh_resolution³``.  Default 30.
    show_surface : bool
        Whether to render the decision boundary / probability surfaces.
    show_confidence : bool
        When ``True`` and the model exposes ``predict_proba``, render nested
        confidence isosurfaces in addition to the primary boundary shell.
    show_scatter : bool
        Whether to render the data-point scatter cloud.
    show_centroids : bool
        Whether to render class-centroid diamond markers.
    show_ellipsoids : bool
        Whether to overlay Mahalanobis-distance confidence ellipsoids.
    show_convex_hulls : bool
        Whether to overlay transparent convex-hull surfaces per class.
    ellipsoid_confidence : float
        Confidence level for ellipsoid construction (default 0.90 → 90 % region).
    class_names : dict, optional
        Mapping from class label to human-readable display string.
    title : str, optional
        Figure title.  Overrides ``config.title`` when supplied.
    batch_size : int, optional
        Batch size for model inference on the prediction mesh.

    Returns
    -------
    fig : plotly.graph_objects.Figure
        Interactive 3-D Plotly figure.

    Raises
    ------
    TypeError
        If ``model`` does not expose a ``predict`` method.
    ValueError
        If ``X`` or ``y`` fail validation (shape, NaN, insufficient classes).

    Examples
    --------
    >>> from sklearn.ensemble import GradientBoostingClassifier
    >>> from sklearn.datasets import make_classification
    >>> from geolatent import visualize_decision_geometry
    >>>
    >>> X, y = make_classification(n_samples=400, n_features=20, n_classes=3,
    ...                            n_informative=10, random_state=0)
    >>> clf = GradientBoostingClassifier(n_estimators=50, random_state=0).fit(X, y)
    >>> fig = visualize_decision_geometry(clf, X, y,
    ...                                   title="GBM — 3-class Decision Geometry")
    >>> fig.show()
    """
    # 1. Validate inputs
    validate_sklearn_model(model)
    X = validate_feature_matrix(X, min_samples=4, min_features=2)
    y = validate_label_vector(y, n_samples=len(X))
    y = validate_classification_labels(y)
    class_names = validate_class_names(class_names, np.unique(y))

    # 2. Resolve configuration
    cfg = (config or DARK_SCIENTIFIC).copy()
    cfg.projection.method = projection_method
    if title:
        cfg.title = title

    invertible_methods = ("pca", "sensitivity")
    if projection_method not in invertible_methods and show_surface:
        warnings.warn(
            f"projection_method='{projection_method}' does not support "
            "inverse_transform; decision surfaces will not be rendered.  "
            "Use projection_method='pca' or 'sensitivity'.",
            UserWarning,
            stacklevel=2,
        )
        show_surface = False

    # 3. Auto-derive predict_fn for sensitivity when not supplied
    if projection_method == "sensitivity" and predict_fn is None:
        if hasattr(model, "predict_proba"):
            predict_fn = model.predict_proba
        else:
            predict_fn = lambda X_: model.predict(X_).astype(np.float64)  # noqa: E731

    # 4. Project data to 3-D
    projector = DimensionalityProjector(cfg.projection)
    proj_result = projector.fit_transform(X, predict_fn=predict_fn, feature_names=feature_names)
    X_3d = proj_result.coordinates

    # 5. Build prediction mesh
    mesh = None
    if show_surface and projector.supports_inverse_transform:
        try:
            builder = MeshBuilder(
                resolution=mesh_resolution,
                batch_size=batch_size,
            )
            mesh = builder.build_prediction_mesh(model, projector, X_3d)
        except Exception as exc:  # noqa: BLE001
            warnings.warn(
                f"Mesh construction failed: {exc!r}.  "
                "Proceeding without decision surfaces.",
                UserWarning,
                stacklevel=2,
            )

    # 5. Assemble scene
    scene = Scene3D(cfg)
    scene.set_axis_labels(proj_result.axis_labels)

    # 6. Decision surfaces
    if mesh is not None:
        renderer = DecisionSurfaceRenderer(cfg)
        surface_traces = renderer.render(
            mesh,
            class_names=class_names,
            show_confidence=show_confidence,
        )
        scene.add_traces(surface_traces)

    # 7. Data-point scatter
    overlay = DataOverlay(cfg)

    if show_scatter:
        scatter_traces = overlay.render_scatter(X_3d, y, class_names=class_names)
        scene.add_traces(scatter_traces)

    # 8. Class centroids
    if show_centroids:
        centroid_trace = overlay.render_centroids(X_3d, y, class_names=class_names)
        scene.add_trace(centroid_trace)

    # 9. Confidence ellipsoids
    if show_ellipsoids:
        ellipsoid_traces = overlay.render_ellipsoids(
            X_3d,
            y,
            confidence=ellipsoid_confidence,
            class_names=class_names,
        )
        scene.add_traces(ellipsoid_traces)

    # 10. Convex hulls
    if show_convex_hulls:
        hull_traces = overlay.render_convex_hulls(X_3d, y, class_names=class_names)
        scene.add_traces(hull_traces)

    # 11. Annotations
    final_title = cfg.title or _auto_title(model, projection_method)
    scene.set_title(final_title)

    if cfg.show_variance_annotation and proj_result.explained_variance_ratio is not None:
        scene.add_variance_annotation(proj_result.explained_variance_ratio)

    n_samples, n_features = X.shape
    scene.add_text_annotation(
        f"n={n_samples:,}  d={n_features}-&gt;3  "
        f"{type(model).__name__}",
        x=0.99,
        y=0.99,
    )

    return scene.render()



# Private helpers


def _auto_title(model: object, projection_method: str) -> str:
    model_name = type(model).__name__
    proj_tag = {
        "pca": "PCA", "tsne": "t-SNE", "umap": "UMAP", "sensitivity": "Sensitivity"
    }.get(projection_method, projection_method.upper())
    return f"Decision Geometry — {model_name} ({proj_tag} projection)"