Skip to content

Commit 2be8986

Browse files
authored
Add files via upload
1 parent 7605a1e commit 2be8986

File tree

2 files changed

+389
-0
lines changed

2 files changed

+389
-0
lines changed

5gnetops/5gran-modelserver.py

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
import os
2+
import torch
3+
from flask import Flask, request, jsonify
4+
from transformers import GPT2LMHeadModel, AutoTokenizer
5+
import logging
6+
7+
app = Flask(__name__)
8+
9+
# Setup logging
10+
logging.basicConfig(level=logging.DEBUG)
11+
12+
# Load the trained model and tokenizer
13+
model_path = "models/5g_oss_model"
14+
tokenizer = AutoTokenizer.from_pretrained(model_path)
15+
model = GPT2LMHeadModel.from_pretrained(model_path)
16+
17+
# Ensure the model is on the correct device
18+
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
19+
model.to(device)
20+
21+
@app.route('/')
22+
def home():
23+
return "GPT-2 5G Model Serving"
24+
25+
@app.route('/predict', methods=['POST'])
26+
def predict():
27+
try:
28+
data = request.json
29+
logging.debug(f"Received data: {data}")
30+
31+
input_data = data.get('input_data', '')
32+
question = data.get('question', '')
33+
34+
if not input_data or not question:
35+
return jsonify({'error': 'Invalid input data or question'}), 400
36+
37+
# Prepare the input text for the model
38+
input_text = f"Data: {input_data}\nQuestion: {question}\nAnswer:"
39+
40+
# Tokenize the input text
41+
inputs = tokenizer(input_text, return_tensors='pt').to(device)
42+
logging.debug(f"Tokenized inputs: {inputs}")
43+
44+
# Generate predictions
45+
try:
46+
with torch.no_grad():
47+
outputs = model.generate(**inputs, max_length=512, num_return_sequences=1)
48+
logging.debug(f"Model outputs: {outputs}")
49+
except Exception as e:
50+
logging.error(f"Error during model generation: {e}")
51+
return jsonify({'error': 'Error during model generation'}), 500
52+
53+
# Decode the generated text
54+
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
55+
logging.debug(f"Generated text: {generated_text}")
56+
57+
# Extract the answer part
58+
answer = generated_text.split("Answer:")[-1].strip()
59+
60+
return jsonify({'answer': answer})
61+
except Exception as e:
62+
app.logger.error(f"Error during prediction: {e}", exc_info=True)
63+
return jsonify({'error': 'Internal Server Error'}), 500
64+
65+
if __name__ == '__main__':
66+
app.run(host='0.0.0.0', port=5000)

5gnetops/5gran-predictions.ipynb

Lines changed: 323 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,323 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"metadata": {},
6+
"source": [
7+
"# 5G Network Operations Insights with Fine Tuning of GPT2 (This is the smallest version of GPT-2, with 124M parameters.)\n",
8+
"## Project Overview\n",
9+
"Author: Fatih E. NAR\n",
10+
"This project aims to deliver a 5g network insight with fine tuning a network performant LLM\n",
11+
"Model card: https://huggingface.co/openai-community/gpt2"
12+
]
13+
},
14+
{
15+
"cell_type": "code",
16+
"execution_count": null,
17+
"metadata": {},
18+
"outputs": [],
19+
"source": [
20+
"%pip install -r requirements.txt\n",
21+
"%pip install --pre torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/nightly/cpu"
22+
]
23+
},
24+
{
25+
"cell_type": "code",
26+
"execution_count": null,
27+
"metadata": {},
28+
"outputs": [],
29+
"source": [
30+
"import lzma\n",
31+
"import shutil\n",
32+
"import pandas as pd\n",
33+
"import os\n",
34+
"import torch\n",
35+
"import psutil\n",
36+
"import threading\n",
37+
"import sys\n",
38+
"import time\n",
39+
"import gc\n",
40+
"from datasets import Dataset\n",
41+
"from transformers import GPT2LMHeadModel, GPT2Tokenizer, Trainer, TrainingArguments, get_linear_schedule_with_warmup\n",
42+
"\n",
43+
"from peft import get_peft_model, LoraConfig, TaskType\n",
44+
"\n",
45+
"# Save the model and tokenizer\n",
46+
"model_save_path = \"models/5g_oss_model\"\n",
47+
"#model_name = \"distilgpt2\"\n",
48+
"model_name = \"gpt2\"\n",
49+
"\n",
50+
"# Set TOKENIZERS_PARALLELISM to false to avoid warnings\n",
51+
"os.environ[\"TOKENIZERS_PARALLELISM\"] = \"false\"\n",
52+
"\n",
53+
"# Clear GPU cache before starting\n",
54+
"torch.cuda.empty_cache()\n",
55+
"gc.collect()\n",
56+
"\n",
57+
"# Cap memory usage to a specific size (e.g., 8 GB) for cuda\n",
58+
"max_memory_gb = 8\n",
59+
"max_memory_mb = max_memory_gb * 1024\n",
60+
"os.environ['PYTORCH_CUDA_ALLOC_CONF'] = f'max_split_size_mb:{max_memory_mb}'\n",
61+
"\n",
62+
"# Check if any accelerator is available \n",
63+
"if torch.cuda.is_available():\n",
64+
" print(\"Using CUDA (NVIDIA GPU)\")\n",
65+
" os.environ[\"CUDA_LAUNCH_BLOCKING\"] = \"1\"\n",
66+
" # Leverage multi-gpu if available\n",
67+
" device1 = torch.device(\"cuda:0\")\n",
68+
" device2 = torch.device(\"cuda:1\") if torch.cuda.device_count() > 1 else torch.device(\"cuda:0\")\n",
69+
" print(\"Using CUDA\")\n",
70+
" # Clear GPU cache before starting\n",
71+
" torch.cuda.empty_cache()\n",
72+
"# Check if MPS (Apple Silicon GPU) is available\n",
73+
"elif torch.backends.mps.is_available():\n",
74+
" os.environ[\"PYTORCH_MPS_HIGH_WATERMARK_RATIO\"] = \"0.0\"\n",
75+
" os.environ[\"PYTORCH_ENABLE_MPS_FALLBACK\"] = \"1\"\n",
76+
" # Leverage multi-gpu if available\n",
77+
" device1 = torch.device(\"mps:0\")\n",
78+
" device2 = torch.device(\"mps:1\") \n",
79+
" print(\"Using MPS\")\n",
80+
"else:\n",
81+
" device1 = torch.device(\"cpu\")\n",
82+
" device2 = torch.device(\"cpu\")\n",
83+
" print(\"Using CPU\")\n",
84+
"\n",
85+
"# Extract the .xz file\n",
86+
"with lzma.open('data/5G_netops_data.csv.xz', 'rb') as f_in:\n",
87+
" with open('data/5G_netops_data.csv', 'wb') as f_out:\n",
88+
" shutil.copyfileobj(f_in, f_out)\n",
89+
"\n",
90+
"# Load the synthetic telecom data\n",
91+
"data_path = \"data/5G_netops_data.csv\"\n",
92+
"data = pd.read_csv(data_path)\n",
93+
"\n",
94+
"# Display basic information about the full dataset\n",
95+
"data.info()\n",
96+
"data.head()"
97+
]
98+
},
99+
{
100+
"cell_type": "code",
101+
"execution_count": null,
102+
"metadata": {},
103+
"outputs": [],
104+
"source": [
105+
"# Fill NaN values and prepare input and target texts\n",
106+
"# Ensure all NaN values are filled with empty strings\n",
107+
"data = data.fillna('')\n",
108+
"\n",
109+
"# Ensure 'Zip' column is treated as a string\n",
110+
"data['Zip'] = data['Zip'].astype(str)\n",
111+
"\n",
112+
"# Create the input_text column\n",
113+
"data['input_text'] = data.apply(lambda row: f\"Date: {row['Date']} Cell Availability: {row['Cell Availability (%)']} MTTR: {row['MTTR (hours)']} Throughput: {row['Throughput (Mbps)']} Latency: {row['Latency (ms)']} Packet Loss Rate: {row['Packet Loss Rate (%)']} Call Drop Rate: {row['Call Drop Rate (%)']} Handover Success Rate: {row['Handover Success Rate (%)']} Alarm Count: {row['Alarm Count']} Critical Alarm Count: {row['Critical Alarm Count']} Parameter Changes: {row['Parameter Changes']} Successful Configuration Changes: {row['Successful Configuration Changes (%)']} Data Usage: {row['Data Usage (GB)']} User Count: {row['User Count']} Signal Strength: {row['Signal Strength (dBm)']} Jitter: {row['Jitter (ms)']} Connection Setup Success Rate: {row['Connection Setup Success Rate (%)']} Security Incidents: {row['Security Incidents']} Authentication Failures: {row['Authentication Failures']} Temperature: {row['Temperature (°C)']} Humidity: {row['Humidity (%)']} Weather: {row['Weather']} Issue Reported: {row['Issue Reported']} City: {row['City']} State: {row['State']} Zip: {row['Zip']}\", axis=1)\n",
114+
"\n",
115+
"# Create the target_text column\n",
116+
"data['target_text'] = data['Fault Occurrence Rate'].astype(str)\n",
117+
"\n",
118+
"# Convert to HuggingFace Dataset\n",
119+
"dataset = Dataset.from_pandas(data)\n",
120+
"\n",
121+
"# Split the dataset into training and evaluation\n",
122+
"train_test_split = dataset.train_test_split(test_size=0.1)\n",
123+
"train_dataset = train_test_split['train']\n",
124+
"eval_dataset = train_test_split['test']\n",
125+
"\n",
126+
"# Check the loaded dataset\n",
127+
"print(f\"Training Dataset size: {len(train_dataset)}\")\n",
128+
"print(f\"Evaluation Dataset size: {len(eval_dataset)}\")\n",
129+
"print(train_dataset[0])"
130+
]
131+
},
132+
{
133+
"cell_type": "code",
134+
"execution_count": null,
135+
"metadata": {},
136+
"outputs": [],
137+
"source": [
138+
"# Load the tokenizer from the pretrained model\n",
139+
"tokenizer = GPT2Tokenizer.from_pretrained(model_name)\n",
140+
"\n",
141+
"# Add the pad token if it doesn't exist\n",
142+
"if tokenizer.pad_token is None:\n",
143+
" tokenizer.add_special_tokens({'pad_token': tokenizer.eos_token})\n",
144+
"else:\n",
145+
" tokenizer.pad_token = tokenizer.eos_token\n",
146+
"\n",
147+
"# Save the tokenizer\n",
148+
"tokenizer.save_pretrained(model_save_path)\n",
149+
"\n",
150+
"model = GPT2LMHeadModel.from_pretrained(model_name)\n",
151+
"model.resize_token_embeddings(len(tokenizer))\n",
152+
"# Save the new model\n",
153+
"model.save_pretrained(model_save_path)\n",
154+
"\n",
155+
"# Define preprocessing function\n",
156+
"def preprocess_function(examples):\n",
157+
" inputs = examples['input_text']\n",
158+
" targets = examples['target_text']\n",
159+
" model_inputs = tokenizer(inputs, max_length=512, truncation=True, padding='max_length')\n",
160+
" with tokenizer.as_target_tokenizer():\n",
161+
" labels = tokenizer(targets, max_length=512, truncation=True, padding='max_length')\n",
162+
" model_inputs['labels'] = labels['input_ids']\n",
163+
" return model_inputs\n",
164+
"\n",
165+
"# Apply preprocessing\n",
166+
"train_dataset = train_dataset.map(preprocess_function, batched=True)\n",
167+
"eval_dataset = eval_dataset.map(preprocess_function, batched=True)\n",
168+
"\n",
169+
"columns = ['input_ids', 'attention_mask', 'labels']\n",
170+
"train_dataset.set_format(type='torch', columns=columns)\n",
171+
"eval_dataset.set_format(type='torch', columns=columns)\n",
172+
"\n",
173+
"# Check the tokenized dataset\n",
174+
"print(f\"Tokenized Training Dataset size: {len(train_dataset)}\")\n",
175+
"print(f\"Tokenized Evaluation Dataset size: {len(eval_dataset)}\")\n",
176+
"print(train_dataset[0])"
177+
]
178+
},
179+
{
180+
"cell_type": "code",
181+
"execution_count": null,
182+
"metadata": {},
183+
"outputs": [],
184+
"source": [
185+
"# PEFT Part\n",
186+
"lora_config = LoraConfig(\n",
187+
" task_type=TaskType.CAUSAL_LM,\n",
188+
" inference_mode=False,\n",
189+
" r=2,\n",
190+
" lora_alpha=16,\n",
191+
" lora_dropout=0.05\n",
192+
")\n",
193+
"\n",
194+
"model = get_peft_model(model, lora_config)\n",
195+
"\n",
196+
"# Manually split the model across the two GPUs\n",
197+
"model.transformer.h[:6].to(device1) # First half of layers to GPU 1\n",
198+
"model.transformer.h[6:].to(device2) # Second half of layers to GPU 2\n",
199+
"model.transformer.ln_f.to(device2) # Final normalization layer to GPU 2\n",
200+
"model.lm_head.to(device2) # Language modeling head to GPU 2"
201+
]
202+
},
203+
{
204+
"cell_type": "code",
205+
"execution_count": null,
206+
"metadata": {},
207+
"outputs": [],
208+
"source": [
209+
"# Set training arguments\n",
210+
"training_args = TrainingArguments(\n",
211+
" output_dir=\"./results\", # Output directory\n",
212+
" overwrite_output_dir=True, # Overwrite the content of the output directory\n",
213+
" num_train_epochs=10, # Number of training epochs\n",
214+
" per_device_train_batch_size=36, # Batch size per device during training\n",
215+
" gradient_accumulation_steps=12, # Accumulate gradients over multiple steps\n",
216+
" learning_rate=5e-5, # Learning rate\n",
217+
" save_steps=2000, # Save checkpoint every 2000 steps\n",
218+
" save_total_limit=2, # Limit the total amount of checkpoints\n",
219+
" evaluation_strategy=\"steps\", # Evaluate during training at each `logging_steps`\n",
220+
" logging_steps=500, # Log every 500 steps\n",
221+
" eval_steps=2000, # Evaluate every 2000 steps\n",
222+
" load_best_model_at_end=True, # Load the best model at the end of training\n",
223+
" metric_for_best_model=\"loss\", # Use loss to evaluate the best model\n",
224+
" fp16=False, # Disable mixed precision training for MPS\n",
225+
")\n",
226+
"\n",
227+
"# Create the learning rate scheduler\n",
228+
"total_steps = len(train_dataset) // training_args.per_device_train_batch_size * training_args.num_train_epochs\n",
229+
"optimizer = torch.optim.AdamW(model.parameters(), lr=training_args.learning_rate)\n",
230+
"lr_scheduler = get_linear_schedule_with_warmup(\n",
231+
" optimizer,\n",
232+
" num_warmup_steps=total_steps // 10, # Warm-up for 10% of the total steps\n",
233+
" num_training_steps=total_steps\n",
234+
")\n",
235+
"\n",
236+
"# Create Trainer instance\n",
237+
"trainer = Trainer(\n",
238+
" model=model,\n",
239+
" args=training_args,\n",
240+
" train_dataset=train_dataset,\n",
241+
" eval_dataset=eval_dataset,\n",
242+
" tokenizer=tokenizer,\n",
243+
" optimizers=(optimizer, lr_scheduler)\n",
244+
")\n",
245+
"\n",
246+
"# Function to monitor system usage including GPU metrics\n",
247+
"def print_system_usage(stop_event):\n",
248+
" while not stop_event.is_set():\n",
249+
" cpu_usage = psutil.cpu_percent()\n",
250+
" memory_usage = psutil.virtual_memory().percent\n",
251+
" if torch.cuda.is_available():\n",
252+
" gpu_alloc_mem = torch.cuda.memory_allocated() / (1024 ** 2)\n",
253+
" gpu_cached = torch.cuda.memory_reserved() / (1024 ** 2)\n",
254+
" sys.stdout.write(f\"\\rCPU Usage: {cpu_usage}% | Memory Usage: {memory_usage}% | GPU-Allocated-Memory Usage: {gpu_alloc_mem:.2f}MB | GPU-Cached-Memory Usage: {gpu_cached:.2f}MB\")\n",
255+
" elif torch.backends.mps.is_available():\n",
256+
" gpu_alloc_mem = torch.mps.current_allocated_memory() / (1024 ** 2)\n",
257+
" gpu_driver_mem = torch.mps.driver_allocated_memory() / (1024 ** 2)\n",
258+
" sys.stdout.write(f\"\\rCPU Usage: {cpu_usage}% | Memory Usage: {memory_usage}% | GPU-Allocated-Memory Usage: {gpu_alloc_mem:.2f}MB | GPU-Driver-Memory Usage: {gpu_driver_mem:.2f}MB\")\n",
259+
" sys.stdout.flush()\n",
260+
" time.sleep(300) # Wait to remeasure system usage\n",
261+
"\n",
262+
"# Create an event to stop the thread\n",
263+
"stop_event = threading.Event()\n",
264+
"\n",
265+
"# Start the system usage monitoring thread\n",
266+
"monitoring_thread = threading.Thread(target=print_system_usage, args=(stop_event,))\n",
267+
"monitoring_thread.start()\n",
268+
"\n",
269+
"# Train the model\n",
270+
"try:\n",
271+
" trainer.train()\n",
272+
"except RuntimeError as e:\n",
273+
" if 'out of memory' in str(e):\n",
274+
" print(\"CUDA OutOfMemoryError: Out of memory during training. Try reducing the batch size or model size.\")\n",
275+
" else:\n",
276+
" raise\n",
277+
"finally:\n",
278+
" # Stop the monitoring thread\n",
279+
" stop_event.set()\n",
280+
" monitoring_thread.join()\n",
281+
" if torch.cuda.device_count() > 1:\n",
282+
" model.module.save_pretrained(model_save_path)\n",
283+
" else:\n",
284+
" model.save_pretrained(model_save_path)\n",
285+
" tokenizer.save_pretrained(model_save_path)\n",
286+
"\n",
287+
"print(\"Training complete and model saved.\")"
288+
]
289+
},
290+
{
291+
"cell_type": "code",
292+
"execution_count": null,
293+
"metadata": {},
294+
"outputs": [],
295+
"source": [
296+
"# Results\n",
297+
"results = trainer.evaluate(eval_dataset)\n",
298+
"print(\"Evaluation Results:\", results)"
299+
]
300+
}
301+
],
302+
"metadata": {
303+
"kernelspec": {
304+
"display_name": "Python 3",
305+
"language": "python",
306+
"name": "python3"
307+
},
308+
"language_info": {
309+
"codemirror_mode": {
310+
"name": "ipython",
311+
"version": 3
312+
},
313+
"file_extension": ".py",
314+
"mimetype": "text/x-python",
315+
"name": "python",
316+
"nbconvert_exporter": "python",
317+
"pygments_lexer": "ipython3",
318+
"version": "3.9.6"
319+
}
320+
},
321+
"nbformat": 4,
322+
"nbformat_minor": 2
323+
}

0 commit comments

Comments
 (0)