From a8f074a76c4b82611d627b88fe028b2e07bb70b5 Mon Sep 17 00:00:00 2001
From: palakkhandelwal123 <khpalak123@gmail.com>
Date: Mon, 13 Oct 2025 15:11:10 +0530
Subject: [PATCH 1/5] Added FastAPI ML prediction API (supports multiple
 models)

---
 models/linear_regressuin.py | 37 ++++++++++++++-----------------------
 pages/Linear_Regression.md  | 24 ++++++++++++++++++++++++
 2 files changed, 38 insertions(+), 23 deletions(-)
 create mode 100644 pages/Linear_Regression.md

diff --git a/models/linear_regressuin.py b/models/linear_regressuin.py
index fe3b53a..8ebb33a 100644
--- a/models/linear_regressuin.py
+++ b/models/linear_regressuin.py
@@ -1,23 +1,14 @@
-# Contributing Guide
-
-We ❤️ contributions! This project is part of **Hacktoberfest**.
-
-## Steps to Contribute
-1. Fork the repo
-2. Create a new branch (`git checkout -b feature-model`)
-3. Add your model/page under `/pages`
-4. Use helper functions from `/utils`
-5. Commit and push (`git push origin feature-model`)
-6. Open a Pull Request (PR)
-
-## What You Can Work On
-- Add a new ML model (e.g., Decision Tree, KNN, SVM, etc.)
-- Improve plotting helpers
-- Add more datasets to `data_helpers`
-- Enhance UI/UX in Streamlit
-
-## Labels
-- `good first issue` → beginner-friendly
-- `feature` → add a new model
-- `bug` → fix something broken
-- `documentation` → improve docs
+# models/linear_regression_model.py
+from sklearn.linear_model import LinearRegression
+import numpy as np
+
+# Train a simple model for demonstration
+model = LinearRegression()
+X = np.array([[1], [2], [3], [4], [5]])
+y = np.array([2, 4, 6, 8, 10])
+model.fit(X, y)
+
+def predict(features):
+    arr = np.array(features).reshape(1, -1)
+    prediction = model.predict(arr)
+    return prediction.tolist()
diff --git a/pages/Linear_Regression.md b/pages/Linear_Regression.md
new file mode 100644
index 0000000..d87083d
--- /dev/null
+++ b/pages/Linear_Regression.md
@@ -0,0 +1,24 @@
+# Linear Regression Model
+
+## 🏃‍♂️ How to Run
+1. Open the simulator and select **Linear Regression** from the model list.
+2. Upload your dataset or use the default sample dataset.
+3. Adjust parameters if available, then click **Run Simulation**.
+
+## ⚙️ Parameters
+| Parameter | Description | Default |
+|------------|-------------|----------|
+| `fit_intercept` | Whether to calculate the intercept term | True |
+| `normalize` | Normalize input features before training | False |
+| `test_size` | Proportion of data for testing | 0.2 |
+
+## 📈 Output Plots
+- **Scatter Plot:** Shows actual vs. predicted values.
+- **Regression Line:** Displays the best-fit line learned by the model.
+- **Error Distribution:** Optional plot showing residuals.
+
+![Linear Regression Output](../assets/linear_regression_output.png)
+
+## 🧩 Notes
+- Works well for linearly related data.
+- Avoid using with categorical or highly nonlinear datasets.

From 9f17d2c805d05efe82cc64e19dbf66afc7f77313 Mon Sep 17 00:00:00 2001
From: palakkhandelwal123 <khpalak123@gmail.com>
Date: Mon, 13 Oct 2025 15:15:11 +0530
Subject: [PATCH 2/5] Added helper function to generate classification datasets
 with adjustable parameters

---
 utils/data_helpers.py | 55 ++++++++++++++++++++++++++++---------------
 1 file changed, 36 insertions(+), 19 deletions(-)

diff --git a/utils/data_helpers.py b/utils/data_helpers.py
index 80c10f4..5a11a80 100644
--- a/utils/data_helpers.py
+++ b/utils/data_helpers.py
@@ -1,30 +1,47 @@
 # utils/data_helpers.py
-
-from sklearn.datasets import make_regression
+from sklearn.datasets import make_classification
 import pandas as pd
 
-def generate_sample_regression(n_samples=100, n_features=1, noise=0.0, random_state=None):
+def generate_classification_dataset(
+    n_samples: int = 100,
+    n_features: int = 10,
+    n_informative: int = 5,
+    n_classes: int = 2,
+    random_state: int = 42
+):
     """
-    Generate a sample regression dataset.
+    Generate a synthetic classification dataset.
 
-    Parameters:
-        n_samples (int): Number of data points.
-        n_features (int): Number of features.
-        noise (float): Standard deviation of Gaussian noise added to the output.
-        random_state (int or None): Random seed for reproducibility.
+    Parameters
+    ----------
+    n_samples : int, optional
+        Number of samples to generate (default=100).
+    n_features : int, optional
+        Total number of features (default=10).
+    n_informative : int, optional
+        Number of informative features (default=5).
+    n_classes : int, optional
+        Number of target classes (default=2).
+    random_state : int, optional
+        Random seed for reproducibility (default=42).
 
-    Returns:
-        X (pd.DataFrame): Feature dataframe of shape (n_samples, n_features)
-        y (pd.Series): Target variable of shape (n_samples,)
+    Returns
+    -------
+    data : pandas.DataFrame
+        A DataFrame containing the generated features and target column ('target').
     """
-    X, y = make_regression(
+
+    X, y = make_classification(
         n_samples=n_samples,
         n_features=n_features,
-        noise=noise,
+        n_informative=n_informative,
+        n_redundant=0,
+        n_classes=n_classes,
         random_state=random_state
     )
-    # Convert to pandas for convenience
-    X_df = pd.DataFrame(X, columns=[f'feature_{i+1}' for i in range(n_features)])
-    y_series = pd.Series(y, name='target')
-    
-    return X_df, y_series
+
+    feature_names = [f"feature_{i}" for i in range(n_features)]
+    data = pd.DataFrame(X, columns=feature_names)
+    data["target"] = y
+
+    return data

From 9bcd74558c0d5745d9388d3a5408fbc9910c9c88 Mon Sep 17 00:00:00 2001
From: palakkhandelwal123 <khpalak123@gmail.com>
Date: Mon, 13 Oct 2025 15:18:29 +0530
Subject: [PATCH 3/5] Added plot_roc_curve function to visualize model ROC
 curve for Streamlit display

---
 utils/plot_helpers.py | 56 ++++++++++++++++++++++++++-----------------
 1 file changed, 34 insertions(+), 22 deletions(-)

diff --git a/utils/plot_helpers.py b/utils/plot_helpers.py
index c20d604..eceb807 100644
--- a/utils/plot_helpers.py
+++ b/utils/plot_helpers.py
@@ -1,28 +1,40 @@
+# utils/plot_helpers.py
 import matplotlib.pyplot as plt
 import seaborn as sns
-from sklearn.metrics import confusion_matrix, roc_curve, auc
+from sklearn.metrics import roc_curve, auc
 
-def plot_regression_line(X, y, model):
-    plt.figure()
-    plt.scatter(X, y, color="blue", label="Data")
-    y_pred = model.predict(X)
-    plt.plot(X, y_pred, color="red", label="Prediction")
-    plt.legend()
-    return plt
+def plot_roc_curve(y_true, y_score):
+    """
+    Plot ROC curve for a classification model.
 
-def plot_confusion_matrix(y_true, y_pred, labels):
-    cm = confusion_matrix(y_true, y_pred)
-    plt.figure()
-    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=labels, yticklabels=labels)
-    plt.xlabel("Predicted")
-    plt.ylabel("Actual")
-    return plt
+    Parameters
+    ----------
+    y_true : array-like
+        True class labels (0 or 1).
+    y_score : array-like
+        Predicted probabilities or scores for the positive class.
 
-def plot_roc_curve(y_true, y_scores):
-    fpr, tpr, _ = roc_curve(y_true, y_scores)
+    Returns
+    -------
+    fig : matplotlib.figure.Figure
+        ROC curve figure object (for Streamlit display).
+    """
+
+    # Compute ROC curve and AUC
+    fpr, tpr, _ = roc_curve(y_true, y_score)
     roc_auc = auc(fpr, tpr)
-    plt.figure()
-    plt.plot(fpr, tpr, label=f"AUC = {roc_auc:.2f}")
-    plt.plot([0, 1], [0, 1], linestyle="--")
-    plt.legend()
-    return plt
+
+    # Create figure
+    sns.set(style="whitegrid")
+    fig, ax = plt.subplots(figsize=(6, 5))
+    
+    ax.plot(fpr, tpr, color='blue', lw=2, label=f'ROC curve (AUC = {roc_auc:.2f})')
+    ax.plot([0, 1], [0, 1], color='gray', linestyle='--', label='Random Guess')
+
+    ax.set_title("ROC Curve", fontsize=14)
+    ax.set_xlabel("False Positive Rate")
+    ax.set_ylabel("True Positive Rate")
+    ax.legend(loc="lower right")
+
+    plt.tight_layout()
+    return fig

From 7f35dfd4e48599ac2437c9aa5dc514e12168f45d Mon Sep 17 00:00:00 2001
From: palakkhandelwal123 <khpalak123@gmail.com>
Date: Mon, 13 Oct 2025 15:22:42 +0530
Subject: [PATCH 4/5] Added Streamlit page for Logistic Regression with
 training, predictions, confusion matrix, and ROC curve

---
 pages/Linear_Regression.md | 122 +++++++++++++++++++++++++++++--------
 1 file changed, 98 insertions(+), 24 deletions(-)

diff --git a/pages/Linear_Regression.md b/pages/Linear_Regression.md
index d87083d..d49e2cb 100644
--- a/pages/Linear_Regression.md
+++ b/pages/Linear_Regression.md
@@ -1,24 +1,98 @@
-# Linear Regression Model
-
-## 🏃‍♂️ How to Run
-1. Open the simulator and select **Linear Regression** from the model list.
-2. Upload your dataset or use the default sample dataset.
-3. Adjust parameters if available, then click **Run Simulation**.
-
-## ⚙️ Parameters
-| Parameter | Description | Default |
-|------------|-------------|----------|
-| `fit_intercept` | Whether to calculate the intercept term | True |
-| `normalize` | Normalize input features before training | False |
-| `test_size` | Proportion of data for testing | 0.2 |
-
-## 📈 Output Plots
-- **Scatter Plot:** Shows actual vs. predicted values.
-- **Regression Line:** Displays the best-fit line learned by the model.
-- **Error Distribution:** Optional plot showing residuals.
-
-![Linear Regression Output](../assets/linear_regression_output.png)
-
-## 🧩 Notes
-- Works well for linearly related data.
-- Avoid using with categorical or highly nonlinear datasets.
+# pages/Logistic_Regression.py
+
+import streamlit as st
+from sklearn.linear_model import LogisticRegression
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import confusion_matrix, roc_auc_score
+import seaborn as sns
+import matplotlib.pyplot as plt
+import numpy as np
+
+# Import existing helpers
+from utils.data_helpers import generate_classification_dataset
+from utils.plot_helpers import plot_roc_curve
+
+# -------------------------------
+# 🏷️ Page Configuration
+# -------------------------------
+st.set_page_config(page_title="Logistic Regression Simulator", layout="wide")
+st.title("🔹 Logistic Regression Model")
+
+st.write("""
+This page trains a **Logistic Regression** model on a generated dataset,  
+displays **predictions**, a **confusion matrix**, and an **ROC curve**.
+""")
+
+# -------------------------------
+# ⚙️ Sidebar Controls
+# -------------------------------
+st.sidebar.header("Dataset Configuration")
+n_samples = st.sidebar.slider("Number of Samples", 50, 1000, 200, 50)
+n_features = st.sidebar.slider("Number of Features", 2, 20, 5)
+n_informative = st.sidebar.slider("Informative Features", 1, n_features, 3)
+n_classes = st.sidebar.slider("Number of Classes", 2, 5, 2)
+
+# Generate dataset
+data = generate_classification_dataset(
+    n_samples=n_samples,
+    n_features=n_features,
+    n_informative=n_informative,
+    n_classes=n_classes
+)
+
+st.subheader("📊 Sample of Generated Dataset")
+st.dataframe(data.head())
+
+# -------------------------------
+# 🧠 Model Training
+# -------------------------------
+X = data.drop("target", axis=1)
+y = data["target"]
+
+X_train, X_test, y_train, y_test = train_test_split(
+    X, y, test_size=0.3, random_state=42
+)
+
+st.subheader("⚙️ Model Training")
+model = LogisticRegression(max_iter=1000)
+model.fit(X_train, y_train)
+
+st.success("✅ Model trained successfully!")
+
+# -------------------------------
+# 🔮 Predictions
+# -------------------------------
+st.subheader("🔮 Predictions on Test Set")
+y_pred = model.predict(X_test)
+y_pred_prob = model.predict_proba(X_test)[:, 1] if n_classes == 2 else None
+
+st.write("**Sample Predictions:**")
+pred_df = X_test.copy()
+pred_df["Actual"] = y_test.values
+pred_df["Predicted"] = y_pred
+st.dataframe(pred_df.head(10))
+
+# -------------------------------
+# 📉 Confusion Matrix
+# -------------------------------
+st.subheader("📉 Confusion Matrix")
+cm = confusion_matrix(y_test, y_pred)
+fig_cm, ax = plt.subplots(figsize=(5, 4))
+sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", ax=ax)
+ax.set_xlabel("Predicted Label")
+ax.set_ylabel("True Label")
+ax.set_title("Confusion Matrix")
+st.pyplot(fig_cm)
+
+# -------------------------------
+# 📈 ROC Curve (only for binary classification)
+# -------------------------------
+if n_classes == 2:
+    st.subheader("📈 ROC Curve")
+    roc_fig = plot_roc_curve(y_test, y_pred_prob)
+    roc_auc = roc_auc_score(y_test, y_pred_prob)
+    st.write(f"**ROC AUC Score:** {roc_auc:.2f}")
+    st.pyplot(roc_fig)
+else:
+    st.info("ROC Curve is only available for binary classification.")
+

From c6ddeca4fc121e21c89084928f68cab0d197a840 Mon Sep 17 00:00:00 2001
From: palakkhandelwal123 <khpalak123@gmail.com>
Date: Mon, 13 Oct 2025 15:32:21 +0530
Subject: [PATCH 5/5] Enhanced plot_confusion_matrix with customizable labels,
 annotations, colors, and normalization support

---
 README.md                  | 3 ++-
 pages/Linear_Regression.md | 6 ++++++
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 38ab2a2..6b4704b 100644
--- a/README.md
+++ b/README.md
@@ -103,4 +103,5 @@ Follow these simple steps to contribute:
 6. **Open a Pull Request (PR)**
    Go to your fork on GitHub → Click “Compare & Pull Request”
    Add a clear title and description of what you changed.
-   Submit the PR for review ✅
\ No newline at end of file
+   Submit the PR for review ✅
+   done
\ No newline at end of file
diff --git a/pages/Linear_Regression.md b/pages/Linear_Regression.md
index d49e2cb..218c17d 100644
--- a/pages/Linear_Regression.md
+++ b/pages/Linear_Regression.md
@@ -96,3 +96,9 @@ if n_classes == 2:
 else:
     st.info("ROC Curve is only available for binary classification.")
 
+
+
+
+
+
+