Interpreting ALR: Automatic Feature Selection via Weights#

This example demonstrates the “Embedded Feature Selection” capability of Hybrid AnDE models (ALR).

The Result: By ignoring noise, ALR achieves significantly lower Log Loss and higher Accuracy.

Impact of Noise on Model Weights and Performance, AnDE (Generative) Strategy: Uniform Attention, ALR (Hybrid) Strategy: Learned Attention

Training models...

# Author: The scikit-bayes Developers
# SPDX-License-Identifier: BSD-3-Clause

import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import accuracy_score, log_loss
from sklearn.model_selection import train_test_split

from skbn.ande import ALR, AnDE

# --- 1. Generate Dataset with Heavy Noise ---
np.random.seed(42)
n_samples = 20000  # Large sample size to allow convergence of weights
n_features = 10

# A. Informative Features (Mixed Type)
X_cat = np.random.randint(0, 3, size=(n_samples, 1))
X_cont = np.random.randn(n_samples, 1)

# Logic: Cat=0 & X>0.5 OR Cat=1 & X<-0.5 -> Class 1
y = np.zeros(n_samples, dtype=int)
mask_0 = (X_cat.flatten() == 0) & (X_cont.flatten() > 0.5)
mask_1 = (X_cat.flatten() == 1) & (X_cont.flatten() < -0.5)
y[mask_0 | mask_1] = 1

# B. Noise Features
X_noise = np.random.randn(n_samples, 8)
X = np.hstack([X_cat, X_cont, X_noise])

# Split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

# --- 2. Fit and Evaluate ---
print("Training models...")

# Model 1: AnDE
ande = AnDE(n_dependence=1)
ande.fit(X_train, y_train)
acc_ande = accuracy_score(y_test, ande.predict(X_test))
ll_ande = log_loss(y_test, ande.predict_proba(X_test))

# Model 2: ALR (Higher regularization for visualization sparsity)
alr = ALR(n_dependence=1, l2_reg=0.05, max_iter=200)
alr.fit(X_train, y_train)
acc_alr = accuracy_score(y_test, alr.predict(X_test))
ll_alr = log_loss(y_test, alr.predict_proba(X_test))

# --- 3. Visualization ---
features = [f"F{i}\n(Signal)" if i < 2 else f"F{i}\n(Noise)" for i in range(n_features)]
indices = np.arange(n_features)

ande_weights = np.ones(n_features)

# Detect if we have weights per class (Level 3) or simple weights (Level 1)
n_models = len(alr.ensemble_)

if alr.learned_weights_.size == n_models:
    # Level 1: One weight per feature. Use directly.
    alr_weights = alr.learned_weights_
else:
    # Level 3: Weights per class. Reshape and average.
    n_classes = len(alr.classes_)
    weights_matrix = alr.learned_weights_.reshape(n_models, n_classes)
    alr_weights = np.mean(weights_matrix, axis=1)

fig, axes = plt.subplots(1, 2, figsize=(16, 7), sharey=True)


# Helper for info box (MOVED TO RIGHT)
def add_score_box(ax, acc, ll):
    textstr = "\n".join(
        (r"$\bf{Performance (Test):}$", f"Accuracy: {acc:.3f}", f"Log Loss: {ll:.3f}")
    )
    props = dict(boxstyle="round", facecolor="white", alpha=0.9, edgecolor="gray")
    ax.text(
        0.95,
        0.95,
        textstr,
        transform=ax.transAxes,
        fontsize=12,
        verticalalignment="top",
        horizontalalignment="right",
        bbox=props,
    )


# Plot AnDE (Generative)
axes[0].bar(indices, ande_weights, color="lightgray", edgecolor="gray")
axes[0].set_title("AnDE (Generative)\nStrategy: Uniform Attention", fontsize=14)
axes[0].set_xlabel("Feature (as Super-Parent)", fontsize=12)
axes[0].set_ylabel("Weight Magnitude", fontsize=12)
axes[0].set_xticks(indices)
axes[0].set_xticklabels(features, rotation=45, ha="right")

# Highlight Signal Features with GOLD (to show ground truth importance)
axes[0].patches[0].set_facecolor("gold")
axes[0].patches[0].set_edgecolor("orange")
axes[0].patches[1].set_facecolor("gold")
axes[0].patches[1].set_edgecolor("orange")

add_score_box(axes[0], acc_ande, ll_ande)

# Plot ALR (Hybrid)
# Use INDIGO for learned weights
bars = axes[1].bar(indices, alr_weights, color="indigo", edgecolor="black", alpha=0.8)
axes[1].set_title("ALR (Hybrid)\nStrategy: Learned Attention", fontsize=14)
axes[1].set_xlabel("Feature (as Super-Parent)", fontsize=12)
axes[1].set_xticks(indices)
axes[1].set_xticklabels(features, rotation=45, ha="right")

add_score_box(axes[1], acc_alr, ll_alr)

# Annotate values on ALR
for bar in bars:
    height = bar.get_height()
    if height > 0.1:
        axes[1].annotate(
            f"{height:.2f}",
            xy=(bar.get_x() + bar.get_width() / 2, height),
            xytext=(0, 3),
            textcoords="offset points",
            ha="center",
            va="bottom",
            fontweight="bold",
            color="indigo",
        )

fig.suptitle("Impact of Noise on Model Weights and Performance", fontsize=18)
plt.tight_layout()
plt.subplots_adjust(top=0.85)
plt.show()

Total running time of the script: (0 minutes 1.323 seconds)

Gallery generated by Sphinx-Gallery

Interpreting ALR: Automatic Feature Selection via Weights#

This Page