From 905d587186be4dbea432d6d1abcc460369a29970 Mon Sep 17 00:00:00 2001
From: Afnan <72243953+Afnanksalal@users.noreply.github.com>
Date: Wed, 5 Jun 2024 21:45:53 +0530
Subject: [PATCH] added CPU support to se_extractor.py

implemented an if statement to check if device is cuda or cpu, if it is a GPU device use GPU for extracting, else use CPU. formerly there was no CPU extraction support by default. by doing this way we can utilize CPU power for small scale extraction without wasting GPU power
---
 openvoice/se_extractor.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/openvoice/se_extractor.py b/openvoice/se_extractor.py
index a41c2566..16bcda21 100644
--- a/openvoice/se_extractor.py
+++ b/openvoice/se_extractor.py
@@ -13,13 +13,18 @@
 import librosa
 from whisper_timestamped.transcribe import get_audio_tensor, get_vad_segments
 
+# Check if CUDA is available and use it if so, otherwise use CPU
+device = "cuda" if torch.cuda.is_available() else "cpu"
+print(f"Using device: {device}")
+
 model_size = "medium"
-# Run on GPU with FP16
+# Run on GPU with FP16 if CUDA is available, otherwise use CPU with FP32
 model = None
+
 def split_audio_whisper(audio_path, audio_name, target_dir='processed'):
     global model
     if model is None:
-        model = WhisperModel(model_size, device="cuda", compute_type="float16")
+        model = WhisperModel(model_size, device=device, compute_type="float16" if device == "cuda" else "float32")
     audio = AudioSegment.from_file(audio_path)
     max_len = len(audio)
 
@@ -150,4 +155,3 @@ def get_se(audio_path, vc_model, target_dir='processed', vad=True):
         raise NotImplementedError('No audio segments found!')
     
     return vc_model.extract_se(audio_segs, se_save_path=se_path), audio_name
-