ROC Curves and AUC Explained

⏱️ 29 sec read 📈 Data Analysis

ROC (Receiver Operating Characteristic) curves visualize classifier performance across all decision thresholds. AUC (Area Under Curve) provides a single metric to compare models.

Understanding ROC Curves

# ROC plots:
# X-axis: False Positive Rate (FPR) = FP / (FP + TN)
# Y-axis: True Positive Rate (TPR) = TP / (TP + FN) = Recall

# Shows tradeoff between catching positives and avoiding false alarms

Creating an ROC Curve

from sklearn.metrics import roc_curve, roc_auc_score
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
import matplotlib.pyplot as plt

# Create dataset
X, y = make_classification(n_samples=1000, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

# Train model
model = LogisticRegression()
model.fit(X_train, y_train)

# Get prediction probabilities (not just 0/1)
y_scores = model.predict_proba(X_test)[:, 1]

# Calculate ROC curve points
fpr, tpr, thresholds = roc_curve(y_test, y_scores)

# Calculate AUC
auc = roc_auc_score(y_test, y_scores)

# Plot
plt.plot(fpr, tpr, label=f'Model (AUC = {auc:.2f})')
plt.plot([0, 1], [0, 1], 'k--', label='Random (AUC = 0.50)')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve')
plt.legend()
plt.show()

Interpreting AUC Values

# AUC = 1.0: Perfect classifier
# AUC = 0.9-1.0: Excellent
# AUC = 0.8-0.9: Good
# AUC = 0.7-0.8: Fair
# AUC = 0.6-0.7: Poor
# AUC = 0.5: Random guessing (no better than coin flip)
# AUC < 0.5: Worse than random (predictions inverted!)

Comparing Multiple Models

from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB

# Train multiple models
models = {
    'Logistic Regression': LogisticRegression(),
    'Random Forest': RandomForestClassifier(),
    'Naive Bayes': GaussianNB()
}

plt.figure(figsize=(10, 6))

for name, model in models.items():
    model.fit(X_train, y_train)
    y_scores = model.predict_proba(X_test)[:, 1]

    fpr, tpr, _ = roc_curve(y_test, y_scores)
    auc = roc_auc_score(y_test, y_scores)

    plt.plot(fpr, tpr, label=f'{name} (AUC = {auc:.2f})')

plt.plot([0, 1], [0, 1], 'k--', label='Random')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curves - Model Comparison')
plt.legend()
plt.show()

# Higher AUC = Better model

Finding Optimal Threshold

from sklearn.metrics import roc_curve
import numpy as np

# Get ROC curve
fpr, tpr, thresholds = roc_curve(y_test, y_scores)

# Find threshold that maximizes TPR - FPR
optimal_idx = np.argmax(tpr - fpr)
optimal_threshold = thresholds[optimal_idx]

print(f"Optimal threshold: {optimal_threshold:.3f}")
print(f"TPR at optimal: {tpr[optimal_idx]:.3f}")
print(f"FPR at optimal: {fpr[optimal_idx]:.3f}")

# Use this threshold for predictions
y_pred_optimal = (y_scores >= optimal_threshold).astype(int)

ROC vs Precision-Recall Curves

# ROC Curve: Use when classes are balanced
# Precision-Recall Curve: Use when classes are imbalanced

from sklearn.metrics import precision_recall_curve, average_precision_score

# For imbalanced data
precision, recall, _ = precision_recall_curve(y_test, y_scores)
ap = average_precision_score(y_test, y_scores)

plt.plot(recall, precision, label=f'AP = {ap:.2f}')
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('Precision-Recall Curve')
plt.legend()
plt.show()

Multi-Class ROC

from sklearn.preprocessing import label_binarize
from sklearn.multiclass import OneVsRestClassifier

# Multi-class example (3 classes)
y_test_bin = label_binarize(y_test, classes=[0, 1, 2])

# Train one-vs-rest classifier
classifier = OneVsRestClassifier(LogisticRegression())
y_scores = classifier.fit(X_train, y_train_bin).predict_proba(X_test)

# Plot ROC for each class
for i in range(3):
    fpr, tpr, _ = roc_curve(y_test_bin[:, i], y_scores[:, i])
    auc = roc_auc_score(y_test_bin[:, i], y_scores[:, i])
    plt.plot(fpr, tpr, label=f'Class {i} (AUC = {auc:.2f})')

plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Multi-Class ROC Curves')
plt.legend()
plt.show()

Cross-Validated ROC

from sklearn.model_selection import cross_val_predict

# Get cross-validated predictions
y_scores_cv = cross_val_predict(
    model, X, y,
    cv=5,
    method='predict_proba'
)[:, 1]

# Calculate ROC from CV predictions
fpr, tpr, _ = roc_curve(y, y_scores_cv)
auc_cv = roc_auc_score(y, y_scores_cv)

print(f"Cross-validated AUC: {auc_cv:.3f}")

# More reliable estimate of true performance

Practical Example: Credit Scoring

# Scenario: Predict loan default
# High FPR = Approve bad loans (lose money)
# Low TPR = Reject good loans (miss opportunities)

# Business requirements might prioritize precision over recall
# Use ROC to find threshold that matches business goals

fpr, tpr, thresholds = roc_curve(y_test, y_scores)

# Find threshold where FPR < 10%
target_fpr = 0.10
idx = np.argmin(np.abs(fpr - target_fpr))

selected_threshold = thresholds[idx]
selected_tpr = tpr[idx]

print(f"At FPR = {target_fpr:.0%}:")
print(f"Threshold: {selected_threshold:.3f}")
print(f"TPR (Recall): {selected_tpr:.1%}")
print(f"We catch {selected_tpr:.1%} of defaults while keeping false positives under {target_fpr:.0%}")

Key Concepts

TPR (Recall): % of actual positives correctly identified
FPR: % of actual negatives incorrectly flagged
Diagonal line: Random classifier performance
Top-left corner: Perfect classifier (TPR=1, FPR=0)
AUC: Single number summarizing overall performance

Common Mistakes

Using predicted labels instead of probabilities
Not using stratified splits with imbalanced data
Comparing ROC curves for different datasets
Ignoring class imbalance (use PR curve instead)

Pro Tip: Always use prediction probabilities (not binary predictions) for ROC curves. For imbalanced datasets, Precision-Recall curves are more informative than ROC. Report both AUC and the confusion matrix at your chosen threshold!

← Back to Data Analysis Tips