Add files via upload

fenar · web-flow · commit 2be8986ffce3 · 2024-06-01T04:36:35.000-05:00
diff --git a/5gnetops/5gran-modelserver.py b/5gnetops/5gran-modelserver.py
@@ -0,0 +1,66 @@
+import os
+import torch
+from flask import Flask, request, jsonify
+from transformers import GPT2LMHeadModel, AutoTokenizer
+import logging
+
+app = Flask(__name__)
+
+# Setup logging
+logging.basicConfig(level=logging.DEBUG)
+
+# Load the trained model and tokenizer
+model_path = "models/5g_oss_model"
+tokenizer = AutoTokenizer.from_pretrained(model_path)
+model = GPT2LMHeadModel.from_pretrained(model_path)
+
+# Ensure the model is on the correct device
+device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
+model.to(device)
+
+@app.route('/')
+def home():
+    return "GPT-2 5G Model Serving"
+
+@app.route('/predict', methods=['POST'])
+def predict():
+    try:
+        data = request.json
+        logging.debug(f"Received data: {data}")
+        
+        input_data = data.get('input_data', '')
+        question = data.get('question', '')
+
+        if not input_data or not question:
+            return jsonify({'error': 'Invalid input data or question'}), 400
+
+        # Prepare the input text for the model
+        input_text = f"Data: {input_data}\nQuestion: {question}\nAnswer:"
+
+        # Tokenize the input text
+        inputs = tokenizer(input_text, return_tensors='pt').to(device)
+        logging.debug(f"Tokenized inputs: {inputs}")
+
+        # Generate predictions
+        try:
+            with torch.no_grad():
+                outputs = model.generate(**inputs, max_length=512, num_return_sequences=1)
+            logging.debug(f"Model outputs: {outputs}")
+        except Exception as e:
+            logging.error(f"Error during model generation: {e}")
+            return jsonify({'error': 'Error during model generation'}), 500
+
+        # Decode the generated text
+        generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
+        logging.debug(f"Generated text: {generated_text}")
+
+        # Extract the answer part
+        answer = generated_text.split("Answer:")[-1].strip()
+
+        return jsonify({'answer': answer})
+    except Exception as e:
+        app.logger.error(f"Error during prediction: {e}", exc_info=True)
+        return jsonify({'error': 'Internal Server Error'}), 500
+
+if __name__ == '__main__':
+    app.run(host='0.0.0.0', port=5000)
diff --git a/5gnetops/5gran-predictions.ipynb b/5gnetops/5gran-predictions.ipynb
@@ -0,0 +1,323 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 5G Network Operations Insights with Fine Tuning of GPT2 (This is the smallest version of GPT-2, with 124M parameters.)\n",
+    "## Project Overview\n",
+    "Author: Fatih E. NAR\n",
+    "This project aims to deliver a 5g network insight with fine tuning a network performant LLM\n",
+    "Model card: https://huggingface.co/openai-community/gpt2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%pip install -r requirements.txt\n",
+    "%pip install --pre torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/nightly/cpu"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import lzma\n",
+    "import shutil\n",
+    "import pandas as pd\n",
+    "import os\n",
+    "import torch\n",
+    "import psutil\n",
+    "import threading\n",
+    "import sys\n",
+    "import time\n",
+    "import gc\n",
+    "from datasets import Dataset\n",
+    "from transformers import GPT2LMHeadModel, GPT2Tokenizer, Trainer, TrainingArguments, get_linear_schedule_with_warmup\n",
+    "\n",
+    "from peft import get_peft_model, LoraConfig, TaskType\n",
+    "\n",
+    "# Save the model and tokenizer\n",
+    "model_save_path = \"models/5g_oss_model\"\n",
+    "#model_name = \"distilgpt2\"\n",
+    "model_name = \"gpt2\"\n",
+    "\n",
+    "# Set TOKENIZERS_PARALLELISM to false to avoid warnings\n",
+    "os.environ[\"TOKENIZERS_PARALLELISM\"] = \"false\"\n",
+    "\n",
+    "# Clear GPU cache before starting\n",
+    "torch.cuda.empty_cache()\n",
+    "gc.collect()\n",
+    "\n",
+    "# Cap memory usage to a specific size (e.g., 8 GB) for cuda\n",
+    "max_memory_gb = 8\n",
+    "max_memory_mb = max_memory_gb * 1024\n",
+    "os.environ['PYTORCH_CUDA_ALLOC_CONF'] = f'max_split_size_mb:{max_memory_mb}'\n",
+    "\n",
+    "# Check if any accelerator is available \n",
+    "if torch.cuda.is_available():\n",
+    "    print(\"Using CUDA (NVIDIA GPU)\")\n",
+    "    os.environ[\"CUDA_LAUNCH_BLOCKING\"] = \"1\"\n",
+    "    # Leverage multi-gpu if available\n",
+    "    device1 = torch.device(\"cuda:0\")\n",
+    "    device2 = torch.device(\"cuda:1\") if torch.cuda.device_count() > 1 else torch.device(\"cuda:0\")\n",
+    "    print(\"Using CUDA\")\n",
+    "    # Clear GPU cache before starting\n",
+    "    torch.cuda.empty_cache()\n",
+    "# Check if MPS (Apple Silicon GPU) is available\n",
+    "elif torch.backends.mps.is_available():\n",
+    "    os.environ[\"PYTORCH_MPS_HIGH_WATERMARK_RATIO\"] = \"0.0\"\n",
+    "    os.environ[\"PYTORCH_ENABLE_MPS_FALLBACK\"] = \"1\"\n",
+    "    # Leverage multi-gpu if available\n",
+    "    device1 = torch.device(\"mps:0\")\n",
+    "    device2 = torch.device(\"mps:1\") \n",
+    "    print(\"Using MPS\")\n",
+    "else:\n",
+    "    device1 = torch.device(\"cpu\")\n",
+    "    device2 = torch.device(\"cpu\")\n",
+    "    print(\"Using CPU\")\n",
+    "\n",
+    "# Extract the .xz file\n",
+    "with lzma.open('data/5G_netops_data.csv.xz', 'rb') as f_in:\n",
+    "    with open('data/5G_netops_data.csv', 'wb') as f_out:\n",
+    "        shutil.copyfileobj(f_in, f_out)\n",
+    "\n",
+    "# Load the synthetic telecom data\n",
+    "data_path = \"data/5G_netops_data.csv\"\n",
+    "data = pd.read_csv(data_path)\n",
+    "\n",
+    "# Display basic information about the full dataset\n",
+    "data.info()\n",
+    "data.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Fill NaN values and prepare input and target texts\n",
+    "# Ensure all NaN values are filled with empty strings\n",
+    "data = data.fillna('')\n",
+    "\n",
+    "# Ensure 'Zip' column is treated as a string\n",
+    "data['Zip'] = data['Zip'].astype(str)\n",
+    "\n",
+    "# Create the input_text column\n",
+    "data['input_text'] = data.apply(lambda row: f\"Date: {row['Date']} Cell Availability: {row['Cell Availability (%)']} MTTR: {row['MTTR (hours)']} Throughput: {row['Throughput (Mbps)']} Latency: {row['Latency (ms)']} Packet Loss Rate: {row['Packet Loss Rate (%)']} Call Drop Rate: {row['Call Drop Rate (%)']} Handover Success Rate: {row['Handover Success Rate (%)']} Alarm Count: {row['Alarm Count']} Critical Alarm Count: {row['Critical Alarm Count']} Parameter Changes: {row['Parameter Changes']} Successful Configuration Changes: {row['Successful Configuration Changes (%)']} Data Usage: {row['Data Usage (GB)']} User Count: {row['User Count']} Signal Strength: {row['Signal Strength (dBm)']} Jitter: {row['Jitter (ms)']} Connection Setup Success Rate: {row['Connection Setup Success Rate (%)']} Security Incidents: {row['Security Incidents']} Authentication Failures: {row['Authentication Failures']} Temperature: {row['Temperature (°C)']} Humidity: {row['Humidity (%)']} Weather: {row['Weather']} Issue Reported: {row['Issue Reported']} City: {row['City']} State: {row['State']} Zip: {row['Zip']}\", axis=1)\n",
+    "\n",
+    "# Create the target_text column\n",
+    "data['target_text'] = data['Fault Occurrence Rate'].astype(str)\n",
+    "\n",
+    "# Convert to HuggingFace Dataset\n",
+    "dataset = Dataset.from_pandas(data)\n",
+    "\n",
+    "# Split the dataset into training and evaluation\n",
+    "train_test_split = dataset.train_test_split(test_size=0.1)\n",
+    "train_dataset = train_test_split['train']\n",
+    "eval_dataset = train_test_split['test']\n",
+    "\n",
+    "# Check the loaded dataset\n",
+    "print(f\"Training Dataset size: {len(train_dataset)}\")\n",
+    "print(f\"Evaluation Dataset size: {len(eval_dataset)}\")\n",
+    "print(train_dataset[0])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Load the tokenizer from the pretrained model\n",
+    "tokenizer = GPT2Tokenizer.from_pretrained(model_name)\n",
+    "\n",
+    "# Add the pad token if it doesn't exist\n",
+    "if tokenizer.pad_token is None:\n",
+    "    tokenizer.add_special_tokens({'pad_token': tokenizer.eos_token})\n",
+    "else:\n",
+    "    tokenizer.pad_token = tokenizer.eos_token\n",
+    "\n",
+    "# Save the tokenizer\n",
+    "tokenizer.save_pretrained(model_save_path)\n",
+    "\n",
+    "model = GPT2LMHeadModel.from_pretrained(model_name)\n",
+    "model.resize_token_embeddings(len(tokenizer))\n",
+    "# Save the new model\n",
+    "model.save_pretrained(model_save_path)\n",
+    "\n",
+    "# Define preprocessing function\n",
+    "def preprocess_function(examples):\n",
+    "    inputs = examples['input_text']\n",
+    "    targets = examples['target_text']\n",
+    "    model_inputs = tokenizer(inputs, max_length=512, truncation=True, padding='max_length')\n",
+    "    with tokenizer.as_target_tokenizer():\n",
+    "        labels = tokenizer(targets, max_length=512, truncation=True, padding='max_length')\n",
+    "    model_inputs['labels'] = labels['input_ids']\n",
+    "    return model_inputs\n",
+    "\n",
+    "# Apply preprocessing\n",
+    "train_dataset = train_dataset.map(preprocess_function, batched=True)\n",
+    "eval_dataset = eval_dataset.map(preprocess_function, batched=True)\n",
+    "\n",
+    "columns = ['input_ids', 'attention_mask', 'labels']\n",
+    "train_dataset.set_format(type='torch', columns=columns)\n",
+    "eval_dataset.set_format(type='torch', columns=columns)\n",
+    "\n",
+    "# Check the tokenized dataset\n",
+    "print(f\"Tokenized Training Dataset size: {len(train_dataset)}\")\n",
+    "print(f\"Tokenized Evaluation Dataset size: {len(eval_dataset)}\")\n",
+    "print(train_dataset[0])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# PEFT Part\n",
+    "lora_config = LoraConfig(\n",
+    "    task_type=TaskType.CAUSAL_LM,\n",
+    "    inference_mode=False,\n",
+    "    r=2,\n",
+    "    lora_alpha=16,\n",
+    "    lora_dropout=0.05\n",
+    ")\n",
+    "\n",
+    "model = get_peft_model(model, lora_config)\n",
+    "\n",
+    "# Manually split the model across the two GPUs\n",
+    "model.transformer.h[:6].to(device1)  # First half of layers to GPU 1\n",
+    "model.transformer.h[6:].to(device2)  # Second half of layers to GPU 2\n",
+    "model.transformer.ln_f.to(device2)  # Final normalization layer to GPU 2\n",
+    "model.lm_head.to(device2)  # Language modeling head to GPU 2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Set training arguments\n",
+    "training_args = TrainingArguments(\n",
+    "    output_dir=\"./results\",  # Output directory\n",
+    "    overwrite_output_dir=True,  # Overwrite the content of the output directory\n",
+    "    num_train_epochs=10,  # Number of training epochs\n",
+    "    per_device_train_batch_size=36,  # Batch size per device during training\n",
+    "    gradient_accumulation_steps=12,  # Accumulate gradients over multiple steps\n",
+    "    learning_rate=5e-5,  # Learning rate\n",
+    "    save_steps=2000,  # Save checkpoint every 2000 steps\n",
+    "    save_total_limit=2,  # Limit the total amount of checkpoints\n",
+    "    evaluation_strategy=\"steps\",  # Evaluate during training at each `logging_steps`\n",
+    "    logging_steps=500,  # Log every 500 steps\n",
+    "    eval_steps=2000,  # Evaluate every 2000 steps\n",
+    "    load_best_model_at_end=True,  # Load the best model at the end of training\n",
+    "    metric_for_best_model=\"loss\",  # Use loss to evaluate the best model\n",
+    "    fp16=False,  # Disable mixed precision training for MPS\n",
+    ")\n",
+    "\n",
+    "# Create the learning rate scheduler\n",
+    "total_steps = len(train_dataset) // training_args.per_device_train_batch_size * training_args.num_train_epochs\n",
+    "optimizer = torch.optim.AdamW(model.parameters(), lr=training_args.learning_rate)\n",
+    "lr_scheduler = get_linear_schedule_with_warmup(\n",
+    "    optimizer,\n",
+    "    num_warmup_steps=total_steps // 10,  # Warm-up for 10% of the total steps\n",
+    "    num_training_steps=total_steps\n",
+    ")\n",
+    "\n",
+    "# Create Trainer instance\n",
+    "trainer = Trainer(\n",
+    "    model=model,\n",
+    "    args=training_args,\n",
+    "    train_dataset=train_dataset,\n",
+    "    eval_dataset=eval_dataset,\n",
+    "    tokenizer=tokenizer,\n",
+    "    optimizers=(optimizer, lr_scheduler)\n",
+    ")\n",
+    "\n",
+    "# Function to monitor system usage including GPU metrics\n",
+    "def print_system_usage(stop_event):\n",
+    "    while not stop_event.is_set():\n",
+    "        cpu_usage = psutil.cpu_percent()\n",
+    "        memory_usage = psutil.virtual_memory().percent\n",
+    "        if torch.cuda.is_available():\n",
+    "            gpu_alloc_mem = torch.cuda.memory_allocated() / (1024 ** 2)\n",
+    "            gpu_cached = torch.cuda.memory_reserved() / (1024 ** 2)\n",
+    "            sys.stdout.write(f\"\\rCPU Usage: {cpu_usage}% | Memory Usage: {memory_usage}% | GPU-Allocated-Memory Usage: {gpu_alloc_mem:.2f}MB | GPU-Cached-Memory Usage: {gpu_cached:.2f}MB\")\n",
+    "        elif torch.backends.mps.is_available():\n",
+    "            gpu_alloc_mem = torch.mps.current_allocated_memory() / (1024 ** 2)\n",
+    "            gpu_driver_mem = torch.mps.driver_allocated_memory() / (1024 ** 2)\n",
+    "            sys.stdout.write(f\"\\rCPU Usage: {cpu_usage}% | Memory Usage: {memory_usage}% | GPU-Allocated-Memory Usage: {gpu_alloc_mem:.2f}MB | GPU-Driver-Memory Usage: {gpu_driver_mem:.2f}MB\")\n",
+    "        sys.stdout.flush()\n",
+    "        time.sleep(300)  # Wait to remeasure system usage\n",
+    "\n",
+    "# Create an event to stop the thread\n",
+    "stop_event = threading.Event()\n",
+    "\n",
+    "# Start the system usage monitoring thread\n",
+    "monitoring_thread = threading.Thread(target=print_system_usage, args=(stop_event,))\n",
+    "monitoring_thread.start()\n",
+    "\n",
+    "# Train the model\n",
+    "try:\n",
+    "    trainer.train()\n",
+    "except RuntimeError as e:\n",
+    "    if 'out of memory' in str(e):\n",
+    "        print(\"CUDA OutOfMemoryError: Out of memory during training. Try reducing the batch size or model size.\")\n",
+    "    else:\n",
+    "        raise\n",
+    "finally:\n",
+    "    # Stop the monitoring thread\n",
+    "    stop_event.set()\n",
+    "    monitoring_thread.join()\n",
+    "    if torch.cuda.device_count() > 1:\n",
+    "        model.module.save_pretrained(model_save_path)\n",
+    "    else:\n",
+    "        model.save_pretrained(model_save_path)\n",
+    "    tokenizer.save_pretrained(model_save_path)\n",
+    "\n",
+    "print(\"Training complete and model saved.\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Results\n",
+    "results = trainer.evaluate(eval_dataset)\n",
+    "print(\"Evaluation Results:\", results)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}