Useful Data Tips

ROC Curves and AUC Explained

⏱️ 29 sec read 📈 Data Analysis

ROC (Receiver Operating Characteristic) curves visualize classifier performance across all decision thresholds. AUC (Area Under Curve) provides a single metric to compare models.

Understanding ROC Curves

# ROC plots:
# X-axis: False Positive Rate (FPR) = FP / (FP + TN)
# Y-axis: True Positive Rate (TPR) = TP / (TP + FN) = Recall

# Shows tradeoff between catching positives and avoiding false alarms

Creating an ROC Curve

from sklearn.metrics import roc_curve, roc_auc_score
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
import matplotlib.pyplot as plt

# Create dataset
X, y = make_classification(n_samples=1000, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

# Train model
model = LogisticRegression()
model.fit(X_train, y_train)

# Get prediction probabilities (not just 0/1)
y_scores = model.predict_proba(X_test)[:, 1]

# Calculate ROC curve points
fpr, tpr, thresholds = roc_curve(y_test, y_scores)

# Calculate AUC
auc = roc_auc_score(y_test, y_scores)

# Plot
plt.plot(fpr, tpr, label=f'Model (AUC = {auc:.2f})')
plt.plot([0, 1], [0, 1], 'k--', label='Random (AUC = 0.50)')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve')
plt.legend()
plt.show()

Interpreting AUC Values

# AUC = 1.0: Perfect classifier
# AUC = 0.9-1.0: Excellent
# AUC = 0.8-0.9: Good
# AUC = 0.7-0.8: Fair
# AUC = 0.6-0.7: Poor
# AUC = 0.5: Random guessing (no better than coin flip)
# AUC < 0.5: Worse than random (predictions inverted!)

Comparing Multiple Models

from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB

# Train multiple models
models = {
    'Logistic Regression': LogisticRegression(),
    'Random Forest': RandomForestClassifier(),
    'Naive Bayes': GaussianNB()
}

plt.figure(figsize=(10, 6))

for name, model in models.items():
    model.fit(X_train, y_train)
    y_scores = model.predict_proba(X_test)[:, 1]

    fpr, tpr, _ = roc_curve(y_test, y_scores)
    auc = roc_auc_score(y_test, y_scores)

    plt.plot(fpr, tpr, label=f'{name} (AUC = {auc:.2f})')

plt.plot([0, 1], [0, 1], 'k--', label='Random')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curves - Model Comparison')
plt.legend()
plt.show()

# Higher AUC = Better model

Finding Optimal Threshold

from sklearn.metrics import roc_curve
import numpy as np

# Get ROC curve
fpr, tpr, thresholds = roc_curve(y_test, y_scores)

# Find threshold that maximizes TPR - FPR
optimal_idx = np.argmax(tpr - fpr)
optimal_threshold = thresholds[optimal_idx]

print(f"Optimal threshold: {optimal_threshold:.3f}")
print(f"TPR at optimal: {tpr[optimal_idx]:.3f}")
print(f"FPR at optimal: {fpr[optimal_idx]:.3f}")

# Use this threshold for predictions
y_pred_optimal = (y_scores >= optimal_threshold).astype(int)

ROC vs Precision-Recall Curves

# ROC Curve: Use when classes are balanced
# Precision-Recall Curve: Use when classes are imbalanced

from sklearn.metrics import precision_recall_curve, average_precision_score

# For imbalanced data
precision, recall, _ = precision_recall_curve(y_test, y_scores)
ap = average_precision_score(y_test, y_scores)

plt.plot(recall, precision, label=f'AP = {ap:.2f}')
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('Precision-Recall Curve')
plt.legend()
plt.show()

Multi-Class ROC

from sklearn.preprocessing import label_binarize
from sklearn.multiclass import OneVsRestClassifier

# Multi-class example (3 classes)
y_test_bin = label_binarize(y_test, classes=[0, 1, 2])

# Train one-vs-rest classifier
classifier = OneVsRestClassifier(LogisticRegression())
y_scores = classifier.fit(X_train, y_train_bin).predict_proba(X_test)

# Plot ROC for each class
for i in range(3):
    fpr, tpr, _ = roc_curve(y_test_bin[:, i], y_scores[:, i])
    auc = roc_auc_score(y_test_bin[:, i], y_scores[:, i])
    plt.plot(fpr, tpr, label=f'Class {i} (AUC = {auc:.2f})')

plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Multi-Class ROC Curves')
plt.legend()
plt.show()

Cross-Validated ROC

from sklearn.model_selection import cross_val_predict

# Get cross-validated predictions
y_scores_cv = cross_val_predict(
    model, X, y,
    cv=5,
    method='predict_proba'
)[:, 1]

# Calculate ROC from CV predictions
fpr, tpr, _ = roc_curve(y, y_scores_cv)
auc_cv = roc_auc_score(y, y_scores_cv)

print(f"Cross-validated AUC: {auc_cv:.3f}")

# More reliable estimate of true performance

Practical Example: Credit Scoring

# Scenario: Predict loan default
# High FPR = Approve bad loans (lose money)
# Low TPR = Reject good loans (miss opportunities)

# Business requirements might prioritize precision over recall
# Use ROC to find threshold that matches business goals

fpr, tpr, thresholds = roc_curve(y_test, y_scores)

# Find threshold where FPR < 10%
target_fpr = 0.10
idx = np.argmin(np.abs(fpr - target_fpr))

selected_threshold = thresholds[idx]
selected_tpr = tpr[idx]

print(f"At FPR = {target_fpr:.0%}:")
print(f"Threshold: {selected_threshold:.3f}")
print(f"TPR (Recall): {selected_tpr:.1%}")
print(f"We catch {selected_tpr:.1%} of defaults while keeping false positives under {target_fpr:.0%}")

Key Concepts

Common Mistakes

Pro Tip: Always use prediction probabilities (not binary predictions) for ROC curves. For imbalanced datasets, Precision-Recall curves are more informative than ROC. Report both AUC and the confusion matrix at your chosen threshold!

← Back to Data Analysis Tips