What is Supervised Learning?
Supervised learning is the most common type of machine learning where the model learns from labeled data. The algorithm learns a mapping function from input features (X) to output labels (y), enabling predictions on new, unseen data.
Think of it as learning with a teacher - you have both questions and answers during training, and the model learns to predict answers for new questions.
Two Main Types
- Regression: Predicts continuous values (prices, temperatures, sales)
- Classification: Predicts categorical labels (spam/not spam, disease/healthy)
Linear Regression
The foundation of regression - fits a linear relationship between features and target:
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
# Prepare data
X = df[['sqft', 'bedrooms', 'bathrooms']]
y = df['price']
# Split data
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=42
)
# Train model
model = LinearRegression()
model.fit(X_train, y_train)
# Predictions
y_pred = model.predict(X_test)
# Evaluate
print(f"R² Score: {r2_score(y_test, y_pred):.4f}")
print(f"RMSE: {np.sqrt(mean_squared_error(y_test, y_pred)):.2f}")
# Coefficients
print("Coefficients:", dict(zip(X.columns, model.coef_)))
print("Intercept:", model.intercept_)
Polynomial Regression
from sklearn.preprocessing import PolynomialFeatures
# Create polynomial features
poly = PolynomialFeatures(degree=2, include_bias=False)
X_poly = poly.fit_transform(X)
# Train on polynomial features
model = LinearRegression()
model.fit(X_poly, y)
# For new predictions
X_new_poly = poly.transform(X_new)
predictions = model.predict(X_new_poly)
Regularized Regression
from sklearn.linear_model import Ridge, Lasso, ElasticNet
# Ridge Regression (L2 regularization)
ridge = Ridge(alpha=1.0)
ridge.fit(X_train, y_train)
# Lasso Regression (L1 regularization - feature selection)
lasso = Lasso(alpha=0.1)
lasso.fit(X_train, y_train)
# ElasticNet (combines L1 and L2)
elastic = ElasticNet(alpha=0.1, l1_ratio=0.5)
elastic.fit(X_train, y_train)
# Cross-validation for alpha selection
from sklearn.linear_model import RidgeCV, LassoCV
ridge_cv = RidgeCV(alphas=[0.1, 1.0, 10.0, 100.0])
ridge_cv.fit(X_train, y_train)
print(f"Best alpha: {ridge_cv.alpha_}")
Logistic Regression (Classification)
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
# Binary classification
X = df[['age', 'income', 'credit_score']]
y = df['approved'] # 0 or 1
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=42
)
# Train model
model = LogisticRegression(random_state=42)
model.fit(X_train, y_train)
# Predictions
y_pred = model.predict(X_test)
y_prob = model.predict_proba(X_test)[:, 1] # Probability of class 1
# Evaluate
print(f"Accuracy: {accuracy_score(y_test, y_pred):.4f}")
print("\nClassification Report:")
print(classification_report(y_test, y_pred))
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))
Decision Trees
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.tree import plot_tree
import matplotlib.pyplot as plt
# Classification tree
clf = DecisionTreeClassifier(
max_depth=5,
min_samples_split=10,
min_samples_leaf=5,
random_state=42
)
clf.fit(X_train, y_train)
# Regression tree
reg = DecisionTreeRegressor(max_depth=5, random_state=42)
reg.fit(X_train, y_train)
# Visualize tree
plt.figure(figsize=(20, 10))
plot_tree(clf, feature_names=X.columns, class_names=['No', 'Yes'],
filled=True, rounded=True)
plt.savefig('decision_tree.png', dpi=150, bbox_inches='tight')
# Feature importance
importance = pd.DataFrame({
'feature': X.columns,
'importance': clf.feature_importances_
}).sort_values('importance', ascending=False)
print(importance)
Support Vector Machines (SVM)
from sklearn.svm import SVC, SVR
from sklearn.preprocessing import StandardScaler
# SVM requires feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
# Classification with different kernels
svm_linear = SVC(kernel='linear', C=1.0)
svm_rbf = SVC(kernel='rbf', C=1.0, gamma='scale')
svm_poly = SVC(kernel='poly', degree=3, C=1.0)
svm_rbf.fit(X_train_scaled, y_train)
y_pred = svm_rbf.predict(X_test_scaled)
# Probability predictions
svm_prob = SVC(kernel='rbf', probability=True)
svm_prob.fit(X_train_scaled, y_train)
probabilities = svm_prob.predict_proba(X_test_scaled)
# SVM Regression
svr = SVR(kernel='rbf', C=100, epsilon=0.1)
svr.fit(X_train_scaled, y_train)
y_pred = svr.predict(X_test_scaled)
K-Nearest Neighbors (KNN)
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
# Classification
knn = KNeighborsClassifier(n_neighbors=5, weights='distance')
knn.fit(X_train_scaled, y_train)
y_pred = knn.predict(X_test_scaled)
# Finding optimal K
from sklearn.model_selection import cross_val_score
k_range = range(1, 31)
k_scores = []
for k in k_range:
knn = KNeighborsClassifier(n_neighbors=k)
scores = cross_val_score(knn, X_train_scaled, y_train, cv=5)
k_scores.append(scores.mean())
# Plot K vs accuracy
plt.plot(k_range, k_scores)
plt.xlabel('K')
plt.ylabel('Cross-Validation Accuracy')
plt.title('KNN: Choosing K')
plt.show()
optimal_k = k_range[np.argmax(k_scores)]
print(f"Optimal K: {optimal_k}")
Naive Bayes
from sklearn.naive_bayes import GaussianNB, MultinomialNB, BernoulliNB
# Gaussian Naive Bayes (continuous features)
gnb = GaussianNB()
gnb.fit(X_train, y_train)
y_pred = gnb.predict(X_test)
# Multinomial Naive Bayes (text classification)
from sklearn.feature_extraction.text import CountVectorizer
vectorizer = CountVectorizer()
X_text = vectorizer.fit_transform(documents)
mnb = MultinomialNB()
mnb.fit(X_text, labels)
# Bernoulli Naive Bayes (binary features)
bnb = BernoulliNB()
bnb.fit(X_binary, y)
Model Comparison Pipeline
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
# Define models
models = {
'Logistic Regression': LogisticRegression(random_state=42),
'Decision Tree': DecisionTreeClassifier(random_state=42),
'SVM': SVC(random_state=42),
'KNN': KNeighborsClassifier(),
'Naive Bayes': GaussianNB()
}
# Compare with cross-validation
results = {}
for name, model in models.items():
# Create pipeline with scaling
pipeline = Pipeline([
('scaler', StandardScaler()),
('model', model)
])
scores = cross_val_score(pipeline, X, y, cv=5, scoring='accuracy')
results[name] = {
'mean': scores.mean(),
'std': scores.std()
}
print(f"{name}: {scores.mean():.4f} (+/- {scores.std():.4f})")
# Visualize comparison
names = list(results.keys())
means = [r['mean'] for r in results.values()]
plt.barh(names, means)
plt.xlabel('Accuracy')
plt.title('Model Comparison')
plt.show()
Choosing the Right Algorithm
- Linear/Logistic Regression: Simple, interpretable, good baseline
- Decision Trees: Handles non-linear relationships, interpretable
- SVM: Effective in high dimensions, works with clear margins
- KNN: Simple, no training time, good for small datasets
- Naive Bayes: Fast, works well with text, requires less data
Master Machine Learning
Our Data Science program covers supervised learning in depth with hands-on projects. Learn to build and deploy production-ready ML models.
Explore Data Science Program