Source code for detoxai.cavs.cav

"""
Credit: https://github.com/frederikpahde/rrclarc
"""

import logging

import numpy as np
import torch
from sklearn.linear_model import Ridge
from sklearn.model_selection import GridSearchCV
from sklearn.svm import LinearSVC

logger = logging.getLogger(__name__)



[docs]
def compute_cav(vecs: np.ndarray, targets: np.ndarray, cav_type: str = "svm") -> tuple:
    """Compute a concept activation vector (CAV) for a set of vectors and targets.

    Args:
      vecs: torch.Tensor of shape (n_samples, n_features)
      targets: torch.Tensor of shape (n_samples,)
      cav_type: str, type of CAV to compute. One of ["svm", "ridge", "signal", "mean"]
      vecs: np.ndarray:
      targets: np.ndarray:
      cav_type: str:  (Default value = "svm")

    Returns:
      torch.Tensor of shape (1, n_features)

    """

    num_targets = (targets == 1).sum()
    num_notargets = (targets == 0).sum()
    weights = (targets == 1) * 1 / num_targets + (targets == 0) * 1 / num_notargets
    weights = weights / weights.max()

    X = vecs

    if "svm" in cav_type:
        linear = LinearSVC(random_state=0, fit_intercept=True, max_iter=200)
        grid_search = GridSearchCV(
            linear, param_grid={"C": [10**i for i in range(-5, 5)]}
        )
        grid_search.fit(X, targets, sample_weight=weights)
        linear = grid_search.best_estimator_
        logger.debug("Best C:", linear.C)
        # linear.fit(X, targets, sample_weight=weights)
        w = torch.Tensor(linear.coef_)
    elif "ridge" in cav_type:
        clf = Ridge(alpha=100, fit_intercept=True, random_state=0)
        grid_search = GridSearchCV(
            clf, param_grid={"alpha": [10**i for i in range(-5, 5)]}
        )
        grid_search.fit(X, targets * 2 - 1, sample_weight=weights)
        clf = grid_search.best_estimator_
        logger.debug("Best alpha:", clf.alpha)
        # clf.fit(X, targets * 2 - 1, sample_weight=weights)
        w = torch.tensor(clf.coef_)[None]

    elif "lasso" in cav_type:
        from sklearn.linear_model import Lasso

        clf = Lasso(alpha=0.1, fit_intercept=True, max_iter=200, random_state=0)

        grid_search = GridSearchCV(
            clf, param_grid={"alpha": [10**i for i in range(-5, 5)]}
        )
        grid_search.fit(X, targets * 2 - 1, sample_weight=weights)
        clf = grid_search.best_estimator_
        logger.debug("Best alpha:", clf.alpha)
        # clf.fit(X, targets * 2 - 1, sample_weight=weights)
        w = torch.tensor(clf.coef_)[None]

    elif "logistic" in cav_type:
        from sklearn.linear_model import LogisticRegression

        clf = LogisticRegression(fit_intercept=True, random_state=0)

        grid_search = GridSearchCV(clf, param_grid={"C": [10**i for i in range(-5, 5)]})
        grid_search.fit(X, targets * 2 - 1, sample_weight=weights)
        clf = grid_search.best_estimator_
        logger.debug("Best C:", clf.C)

        # clf.fit(X, targets, sample_weight=weights)
        w = torch.tensor(clf.coef_)

    elif "signal" in cav_type:
        logger.debug("Calculating signal CAV")
        y = targets
        mean_y = y.mean()
        X_residuals = X - X.mean(axis=0)[None]
        covar = (X_residuals * (y - mean_y)[:, np.newaxis]).sum(axis=0) / (
            y.shape[0] - 1
        )
        vary = np.sum((y - mean_y) ** 2, axis=0) / (y.shape[0] - 1)
        w = covar / vary
        w = torch.tensor(w)[None]
    elif "mean-mass" in cav_type:
        mean_act_nonartif = torch.tensor(X[targets == 0].mean(0), dtype=torch.float32)
        mean_act_artif = torch.tensor(X[targets == 1].mean(0), dtype=torch.float32)

        cav = mean_act_nonartif - mean_act_artif

    else:
        raise NotImplementedError()

    cav = w / torch.sqrt((w**2).sum())
    cav = cav.detach().cpu()

    mean_act_nonartif = torch.tensor(X[targets == 0].mean(0), dtype=torch.float32)
    mean_act_artif = torch.tensor(X[targets == 1].mean(0), dtype=torch.float32)

    # print("largest CAV values:", torch.topk(cav.flatten(), 10))

    return cav, mean_act_nonartif, mean_act_artif