From a8f074a76c4b82611d627b88fe028b2e07bb70b5 Mon Sep 17 00:00:00 2001 From: palakkhandelwal123 Date: Mon, 13 Oct 2025 15:11:10 +0530 Subject: [PATCH 1/5] Added FastAPI ML prediction API (supports multiple models) --- models/linear_regressuin.py | 37 ++++++++++++++----------------------- pages/Linear_Regression.md | 24 ++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 23 deletions(-) create mode 100644 pages/Linear_Regression.md diff --git a/models/linear_regressuin.py b/models/linear_regressuin.py index fe3b53a..8ebb33a 100644 --- a/models/linear_regressuin.py +++ b/models/linear_regressuin.py @@ -1,23 +1,14 @@ -# Contributing Guide - -We ❤️ contributions! This project is part of **Hacktoberfest**. - -## Steps to Contribute -1. Fork the repo -2. Create a new branch (`git checkout -b feature-model`) -3. Add your model/page under `/pages` -4. Use helper functions from `/utils` -5. Commit and push (`git push origin feature-model`) -6. Open a Pull Request (PR) - -## What You Can Work On -- Add a new ML model (e.g., Decision Tree, KNN, SVM, etc.) -- Improve plotting helpers -- Add more datasets to `data_helpers` -- Enhance UI/UX in Streamlit - -## Labels -- `good first issue` → beginner-friendly -- `feature` → add a new model -- `bug` → fix something broken -- `documentation` → improve docs +# models/linear_regression_model.py +from sklearn.linear_model import LinearRegression +import numpy as np + +# Train a simple model for demonstration +model = LinearRegression() +X = np.array([[1], [2], [3], [4], [5]]) +y = np.array([2, 4, 6, 8, 10]) +model.fit(X, y) + +def predict(features): + arr = np.array(features).reshape(1, -1) + prediction = model.predict(arr) + return prediction.tolist() diff --git a/pages/Linear_Regression.md b/pages/Linear_Regression.md new file mode 100644 index 0000000..d87083d --- /dev/null +++ b/pages/Linear_Regression.md @@ -0,0 +1,24 @@ +# Linear Regression Model + +## 🏃‍♂️ How to Run +1. Open the simulator and select **Linear Regression** from the model list. +2. Upload your dataset or use the default sample dataset. +3. Adjust parameters if available, then click **Run Simulation**. + +## ⚙️ Parameters +| Parameter | Description | Default | +|------------|-------------|----------| +| `fit_intercept` | Whether to calculate the intercept term | True | +| `normalize` | Normalize input features before training | False | +| `test_size` | Proportion of data for testing | 0.2 | + +## 📈 Output Plots +- **Scatter Plot:** Shows actual vs. predicted values. +- **Regression Line:** Displays the best-fit line learned by the model. +- **Error Distribution:** Optional plot showing residuals. + +![Linear Regression Output](../assets/linear_regression_output.png) + +## 🧩 Notes +- Works well for linearly related data. +- Avoid using with categorical or highly nonlinear datasets. From 9f17d2c805d05efe82cc64e19dbf66afc7f77313 Mon Sep 17 00:00:00 2001 From: palakkhandelwal123 Date: Mon, 13 Oct 2025 15:15:11 +0530 Subject: [PATCH 2/5] Added helper function to generate classification datasets with adjustable parameters --- utils/data_helpers.py | 55 ++++++++++++++++++++++++++++--------------- 1 file changed, 36 insertions(+), 19 deletions(-) diff --git a/utils/data_helpers.py b/utils/data_helpers.py index 80c10f4..5a11a80 100644 --- a/utils/data_helpers.py +++ b/utils/data_helpers.py @@ -1,30 +1,47 @@ # utils/data_helpers.py - -from sklearn.datasets import make_regression +from sklearn.datasets import make_classification import pandas as pd -def generate_sample_regression(n_samples=100, n_features=1, noise=0.0, random_state=None): +def generate_classification_dataset( + n_samples: int = 100, + n_features: int = 10, + n_informative: int = 5, + n_classes: int = 2, + random_state: int = 42 +): """ - Generate a sample regression dataset. + Generate a synthetic classification dataset. - Parameters: - n_samples (int): Number of data points. - n_features (int): Number of features. - noise (float): Standard deviation of Gaussian noise added to the output. - random_state (int or None): Random seed for reproducibility. + Parameters + ---------- + n_samples : int, optional + Number of samples to generate (default=100). + n_features : int, optional + Total number of features (default=10). + n_informative : int, optional + Number of informative features (default=5). + n_classes : int, optional + Number of target classes (default=2). + random_state : int, optional + Random seed for reproducibility (default=42). - Returns: - X (pd.DataFrame): Feature dataframe of shape (n_samples, n_features) - y (pd.Series): Target variable of shape (n_samples,) + Returns + ------- + data : pandas.DataFrame + A DataFrame containing the generated features and target column ('target'). """ - X, y = make_regression( + + X, y = make_classification( n_samples=n_samples, n_features=n_features, - noise=noise, + n_informative=n_informative, + n_redundant=0, + n_classes=n_classes, random_state=random_state ) - # Convert to pandas for convenience - X_df = pd.DataFrame(X, columns=[f'feature_{i+1}' for i in range(n_features)]) - y_series = pd.Series(y, name='target') - - return X_df, y_series + + feature_names = [f"feature_{i}" for i in range(n_features)] + data = pd.DataFrame(X, columns=feature_names) + data["target"] = y + + return data From 9bcd74558c0d5745d9388d3a5408fbc9910c9c88 Mon Sep 17 00:00:00 2001 From: palakkhandelwal123 Date: Mon, 13 Oct 2025 15:18:29 +0530 Subject: [PATCH 3/5] Added plot_roc_curve function to visualize model ROC curve for Streamlit display --- utils/plot_helpers.py | 56 ++++++++++++++++++++++++++----------------- 1 file changed, 34 insertions(+), 22 deletions(-) diff --git a/utils/plot_helpers.py b/utils/plot_helpers.py index c20d604..eceb807 100644 --- a/utils/plot_helpers.py +++ b/utils/plot_helpers.py @@ -1,28 +1,40 @@ +# utils/plot_helpers.py import matplotlib.pyplot as plt import seaborn as sns -from sklearn.metrics import confusion_matrix, roc_curve, auc +from sklearn.metrics import roc_curve, auc -def plot_regression_line(X, y, model): - plt.figure() - plt.scatter(X, y, color="blue", label="Data") - y_pred = model.predict(X) - plt.plot(X, y_pred, color="red", label="Prediction") - plt.legend() - return plt +def plot_roc_curve(y_true, y_score): + """ + Plot ROC curve for a classification model. -def plot_confusion_matrix(y_true, y_pred, labels): - cm = confusion_matrix(y_true, y_pred) - plt.figure() - sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=labels, yticklabels=labels) - plt.xlabel("Predicted") - plt.ylabel("Actual") - return plt + Parameters + ---------- + y_true : array-like + True class labels (0 or 1). + y_score : array-like + Predicted probabilities or scores for the positive class. -def plot_roc_curve(y_true, y_scores): - fpr, tpr, _ = roc_curve(y_true, y_scores) + Returns + ------- + fig : matplotlib.figure.Figure + ROC curve figure object (for Streamlit display). + """ + + # Compute ROC curve and AUC + fpr, tpr, _ = roc_curve(y_true, y_score) roc_auc = auc(fpr, tpr) - plt.figure() - plt.plot(fpr, tpr, label=f"AUC = {roc_auc:.2f}") - plt.plot([0, 1], [0, 1], linestyle="--") - plt.legend() - return plt + + # Create figure + sns.set(style="whitegrid") + fig, ax = plt.subplots(figsize=(6, 5)) + + ax.plot(fpr, tpr, color='blue', lw=2, label=f'ROC curve (AUC = {roc_auc:.2f})') + ax.plot([0, 1], [0, 1], color='gray', linestyle='--', label='Random Guess') + + ax.set_title("ROC Curve", fontsize=14) + ax.set_xlabel("False Positive Rate") + ax.set_ylabel("True Positive Rate") + ax.legend(loc="lower right") + + plt.tight_layout() + return fig From 7f35dfd4e48599ac2437c9aa5dc514e12168f45d Mon Sep 17 00:00:00 2001 From: palakkhandelwal123 Date: Mon, 13 Oct 2025 15:22:42 +0530 Subject: [PATCH 4/5] Added Streamlit page for Logistic Regression with training, predictions, confusion matrix, and ROC curve --- pages/Linear_Regression.md | 122 +++++++++++++++++++++++++++++-------- 1 file changed, 98 insertions(+), 24 deletions(-) diff --git a/pages/Linear_Regression.md b/pages/Linear_Regression.md index d87083d..d49e2cb 100644 --- a/pages/Linear_Regression.md +++ b/pages/Linear_Regression.md @@ -1,24 +1,98 @@ -# Linear Regression Model - -## 🏃‍♂️ How to Run -1. Open the simulator and select **Linear Regression** from the model list. -2. Upload your dataset or use the default sample dataset. -3. Adjust parameters if available, then click **Run Simulation**. - -## ⚙️ Parameters -| Parameter | Description | Default | -|------------|-------------|----------| -| `fit_intercept` | Whether to calculate the intercept term | True | -| `normalize` | Normalize input features before training | False | -| `test_size` | Proportion of data for testing | 0.2 | - -## 📈 Output Plots -- **Scatter Plot:** Shows actual vs. predicted values. -- **Regression Line:** Displays the best-fit line learned by the model. -- **Error Distribution:** Optional plot showing residuals. - -![Linear Regression Output](../assets/linear_regression_output.png) - -## 🧩 Notes -- Works well for linearly related data. -- Avoid using with categorical or highly nonlinear datasets. +# pages/Logistic_Regression.py + +import streamlit as st +from sklearn.linear_model import LogisticRegression +from sklearn.model_selection import train_test_split +from sklearn.metrics import confusion_matrix, roc_auc_score +import seaborn as sns +import matplotlib.pyplot as plt +import numpy as np + +# Import existing helpers +from utils.data_helpers import generate_classification_dataset +from utils.plot_helpers import plot_roc_curve + +# ------------------------------- +# 🏷️ Page Configuration +# ------------------------------- +st.set_page_config(page_title="Logistic Regression Simulator", layout="wide") +st.title("🔹 Logistic Regression Model") + +st.write(""" +This page trains a **Logistic Regression** model on a generated dataset, +displays **predictions**, a **confusion matrix**, and an **ROC curve**. +""") + +# ------------------------------- +# ⚙️ Sidebar Controls +# ------------------------------- +st.sidebar.header("Dataset Configuration") +n_samples = st.sidebar.slider("Number of Samples", 50, 1000, 200, 50) +n_features = st.sidebar.slider("Number of Features", 2, 20, 5) +n_informative = st.sidebar.slider("Informative Features", 1, n_features, 3) +n_classes = st.sidebar.slider("Number of Classes", 2, 5, 2) + +# Generate dataset +data = generate_classification_dataset( + n_samples=n_samples, + n_features=n_features, + n_informative=n_informative, + n_classes=n_classes +) + +st.subheader("📊 Sample of Generated Dataset") +st.dataframe(data.head()) + +# ------------------------------- +# 🧠 Model Training +# ------------------------------- +X = data.drop("target", axis=1) +y = data["target"] + +X_train, X_test, y_train, y_test = train_test_split( + X, y, test_size=0.3, random_state=42 +) + +st.subheader("⚙️ Model Training") +model = LogisticRegression(max_iter=1000) +model.fit(X_train, y_train) + +st.success("✅ Model trained successfully!") + +# ------------------------------- +# 🔮 Predictions +# ------------------------------- +st.subheader("🔮 Predictions on Test Set") +y_pred = model.predict(X_test) +y_pred_prob = model.predict_proba(X_test)[:, 1] if n_classes == 2 else None + +st.write("**Sample Predictions:**") +pred_df = X_test.copy() +pred_df["Actual"] = y_test.values +pred_df["Predicted"] = y_pred +st.dataframe(pred_df.head(10)) + +# ------------------------------- +# 📉 Confusion Matrix +# ------------------------------- +st.subheader("📉 Confusion Matrix") +cm = confusion_matrix(y_test, y_pred) +fig_cm, ax = plt.subplots(figsize=(5, 4)) +sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", ax=ax) +ax.set_xlabel("Predicted Label") +ax.set_ylabel("True Label") +ax.set_title("Confusion Matrix") +st.pyplot(fig_cm) + +# ------------------------------- +# 📈 ROC Curve (only for binary classification) +# ------------------------------- +if n_classes == 2: + st.subheader("📈 ROC Curve") + roc_fig = plot_roc_curve(y_test, y_pred_prob) + roc_auc = roc_auc_score(y_test, y_pred_prob) + st.write(f"**ROC AUC Score:** {roc_auc:.2f}") + st.pyplot(roc_fig) +else: + st.info("ROC Curve is only available for binary classification.") + From c6ddeca4fc121e21c89084928f68cab0d197a840 Mon Sep 17 00:00:00 2001 From: palakkhandelwal123 Date: Mon, 13 Oct 2025 15:32:21 +0530 Subject: [PATCH 5/5] Enhanced plot_confusion_matrix with customizable labels, annotations, colors, and normalization support --- README.md | 3 ++- pages/Linear_Regression.md | 6 ++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 38ab2a2..6b4704b 100644 --- a/README.md +++ b/README.md @@ -103,4 +103,5 @@ Follow these simple steps to contribute: 6. **Open a Pull Request (PR)** Go to your fork on GitHub → Click “Compare & Pull Request” Add a clear title and description of what you changed. - Submit the PR for review ✅ \ No newline at end of file + Submit the PR for review ✅ + done \ No newline at end of file diff --git a/pages/Linear_Regression.md b/pages/Linear_Regression.md index d49e2cb..218c17d 100644 --- a/pages/Linear_Regression.md +++ b/pages/Linear_Regression.md @@ -96,3 +96,9 @@ if n_classes == 2: else: st.info("ROC Curve is only available for binary classification.") + + + + + +