From f54d377814a51268e6c2201fd7ae5fd3f664abd4 Mon Sep 17 00:00:00 2001 From: y4my4my4m <8145020+y4my4my4m@users.noreply.github.com> Date: Wed, 26 Feb 2025 17:05:24 +0900 Subject: [PATCH 01/16] TTS-zonos --- Cargo.lock | 126 +++++++++++++++++++++++- Cargo.toml | 5 + scripts/test_tts.sh | 42 ++++++++ src/event.rs | 8 ++ src/handler.rs | 11 +++ src/lib.rs | 4 + src/llm.rs | 19 ++++ src/main.rs | 38 +++++++- src/tts.rs | 229 ++++++++++++++++++++++++++++++++++++++++++++ src/utils.rs | 96 +++++++++++++++++++ 10 files changed, 572 insertions(+), 6 deletions(-) create mode 100644 scripts/test_tts.sh create mode 100644 src/tts.rs create mode 100644 src/utils.rs diff --git a/Cargo.lock b/Cargo.lock index 0800c46..5b165e5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -50,6 +50,21 @@ version = "0.2.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5c6cb57a04249c6480766f7f7cef5467412af1490f8d1e243141daddada3264f" +[[package]] +name = "android-tzdata" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" + +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + [[package]] name = "ansi-to-tui" version = "7.0.0" @@ -177,6 +192,12 @@ dependencies = [ "rustc-demangle", ] +[[package]] +name = "base64" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8" + [[package]] name = "base64" version = "0.22.1" @@ -347,6 +368,20 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "chrono" +version = "0.4.39" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e36cc9d416881d2e24f9a963be5fb1cd90966419ac844274161d10488b3e825" +dependencies = [ + "android-tzdata", + "iana-time-zone", + "js-sys", + "num-traits", + "wasm-bindgen", + "windows-targets 0.52.6", +] + [[package]] name = "clap" version = "4.5.17" @@ -643,6 +678,12 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "fastrand" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" + [[package]] name = "fdeflate" version = "0.3.4" @@ -1014,6 +1055,29 @@ dependencies = [ "tracing", ] +[[package]] +name = "iana-time-zone" +version = "0.1.61" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "235e081f3925a06703c2d0117ea8b91f042756fd6e7a6e5d901e8ca1a996b220" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "wasm-bindgen", + "windows-core 0.52.0", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + [[package]] name = "ident_case" version = "1.0.1" @@ -1521,7 +1585,7 @@ version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42cf17e9a1800f5f396bc67d193dc9411b59012a5876445ef450d449881e1016" dependencies = [ - "base64", + "base64 0.22.1", "indexmap", "quick-xml", "serde", @@ -1738,7 +1802,7 @@ version = "0.12.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f8f4955649ef5c38cc7f9e8aa41761d48fb9677197daea9984dc54f56aad5e63" dependencies = [ - "base64", + "base64 0.22.1", "bytes", "futures-core", "futures-util", @@ -1765,10 +1829,12 @@ dependencies = [ "sync_wrapper", "tokio", "tokio-rustls", + "tokio-util", "tower-service", "url", "wasm-bindgen", "wasm-bindgen-futures", + "wasm-streams", "web-sys", "webpki-roots", "windows-registry", @@ -1843,7 +1909,7 @@ version = "2.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "196fe16b00e106300d3e45ecfcb764fa292a535d7326a29a5875c579c7417425" dependencies = [ - "base64", + "base64 0.22.1", "rustls-pki-types", ] @@ -2175,6 +2241,19 @@ dependencies = [ "libc", ] +[[package]] +name = "tempfile" +version = "3.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04cbcdd0c794ebb0d4cf35e88edd2f7d2c4c3e9a5a6dab322839b321c6a87a64" +dependencies = [ + "cfg-if", + "fastrand", + "once_cell", + "rustix", + "windows-sys 0.59.0", +] + [[package]] name = "tenere" version = "0.11.2" @@ -2182,7 +2261,10 @@ dependencies = [ "ansi-to-tui", "arboard", "async-trait", + "base64 0.13.1", "bat", + "bytes", + "chrono", "clap", "crossterm", "dirs", @@ -2194,6 +2276,7 @@ dependencies = [ "serde_json", "strum", "strum_macros", + "tempfile", "tokio", "toml", "tui-textarea", @@ -2363,6 +2446,19 @@ dependencies = [ "tokio", ] +[[package]] +name = "tokio-util" +version = "0.7.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7fcaa8d55a2bdd6b83ace262b016eca0d79ee02818c5c1bcdf0305114081078" +dependencies = [ + "bytes", + "futures-core", + "futures-sink", + "pin-project-lite", + "tokio", +] + [[package]] name = "toml" version = "0.8.19" @@ -2644,6 +2740,19 @@ version = "0.2.93" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c62a0a307cb4a311d3a07867860911ca130c3494e8c2719593806c08bc5d0484" +[[package]] +name = "wasm-streams" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e072d4e72f700fb3443d8fe94a39315df013eef1104903cdb0a2abd322bbecd" +dependencies = [ + "futures-util", + "js-sys", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", +] + [[package]] name = "web-sys" version = "0.3.70" @@ -2715,7 +2824,16 @@ version = "0.56.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1de69df01bdf1ead2f4ac895dc77c9351aefff65b2f3db429a343f9cbf05e132" dependencies = [ - "windows-core", + "windows-core 0.56.0", + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-core" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" +dependencies = [ "windows-targets 0.52.6", ] diff --git a/Cargo.toml b/Cargo.toml index b75498b..025c316 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,6 +21,7 @@ futures = "0.3" reqwest = { version = "0.12", default-features = false, features = [ "json", "rustls-tls", + "stream", # Add this feature for byte streaming ] } ratatui = { version = "0.29", features = ["all-widgets"] } regex = "1" @@ -32,6 +33,10 @@ tokio = { version = "1", features = ["full"] } toml = { version = "0.8" } tui-textarea = "0.7" unicode-width = "0.2" +base64 = "0.13" +tempfile = "3" +bytes = "1.5.0" +chrono = "0.4" # For timestamping debug logs [profile.release] lto = "fat" diff --git a/scripts/test_tts.sh b/scripts/test_tts.sh new file mode 100644 index 0000000..2301173 --- /dev/null +++ b/scripts/test_tts.sh @@ -0,0 +1,42 @@ +#!/bin/bash + +# A test script to verify your audio playback system works + +echo "Testing audio playback system..." + +# Check for media players +echo "Checking for media players..." +which mpv >/dev/null && echo "✓ mpv found" || echo "✗ mpv not found" +which ffplay >/dev/null && echo "✓ ffplay found" || echo "✗ ffplay not found" +which aplay >/dev/null && echo "✓ aplay found" || echo "✗ aplay not found" + +# Generate a test tone using sox (if available) +if which sox >/dev/null; then + echo "Generating test tone with sox..." + sox -n /tmp/test_tone.mp3 synth 2 sine 440 + + # Try to play with each player + echo "Playing with mpv..." + mpv /tmp/test_tone.mp3 --no-terminal >/dev/null 2>&1 && echo "✓ mpv playback works" || echo "✗ mpv playback failed" + + if which ffplay >/dev/null; then + echo "Playing with ffplay..." + ffplay -nodisp -autoexit -loglevel quiet /tmp/test_tone.mp3 >/dev/null 2>&1 && echo "✓ ffplay playback works" || echo "✗ ffplay playback failed" + fi + + if which aplay >/dev/null; then + # Convert to wav for aplay + sox /tmp/test_tone.mp3 /tmp/test_tone.wav + echo "Playing with aplay..." + aplay /tmp/test_tone.wav >/dev/null 2>&1 && echo "✓ aplay playback works" || echo "✗ aplay playback failed" + fi +else + echo "Sox not found, skipping audio playback tests" + echo "Install sox with: sudo apt-get install sox (Debian/Ubuntu)" +fi + +# Test API endpoint +echo "Testing TTS API endpoint..." +curl -s "http://0.0.0.0:8000/v1/audio/models" | grep model && echo "✓ API is responding" || echo "✗ API not responding" + +echo "Done!" diff --git a/src/event.rs b/src/event.rs index de62f09..c233bbe 100644 --- a/src/event.rs +++ b/src/event.rs @@ -15,6 +15,14 @@ pub enum Event { Resize(u16, u16), LLMEvent(LLMAnswer), Notification(Notification), + TTSEvent(TTSEvent), +} + +#[derive(Debug, Clone)] +pub enum TTSEvent { + PlayText(String), + Complete, + Error(String), } #[allow(dead_code)] diff --git a/src/handler.rs b/src/handler.rs index 91c864c..bc26a49 100644 --- a/src/handler.rs +++ b/src/handler.rs @@ -1,5 +1,6 @@ use crate::llm::{LLMAnswer, LLMRole}; use crate::{chat::Chat, prompt::Mode}; +use crate::event::TTSEvent; // Add this import use crate::{ app::{App, AppResult, FocusedBlock}, @@ -180,6 +181,16 @@ pub async fn handle_key_events( _ => {} }, + // Add a keyboard shortcut to read the current response with TTS + KeyCode::Char('t') if key_event.modifiers.contains(KeyModifiers::CONTROL) => { + // Play the current answer with TTS + if !app.chat.answer.plain_answer.is_empty() { + sender.send(Event::TTSEvent(TTSEvent::PlayText( + app.chat.answer.plain_answer.clone(), + )))?; + } + } + _ => {} } diff --git a/src/lib.rs b/src/lib.rs index 24035c8..983dddd 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -31,3 +31,7 @@ pub mod chat; pub mod llamacpp; pub mod ollama; + +pub mod tts; + +pub mod utils; diff --git a/src/llm.rs b/src/llm.rs index 5da3750..a470f82 100644 --- a/src/llm.rs +++ b/src/llm.rs @@ -11,6 +11,7 @@ use strum_macros::EnumIter; use tokio::sync::mpsc::UnboundedSender; use std::sync::Arc; +use crate::utils::parse_json_safely; #[async_trait] pub trait LLM: Send + Sync { @@ -57,4 +58,22 @@ impl LLMModel { LLMBackend::Ollama => Box::new(Ollama::new(config.ollama.clone().unwrap())), } } + + fn parse_response(&self, response: &str) -> Result { + match parse_json_safely(response) { + Ok(json) => { + // Process valid JSON + // ... + } + Err(e) => { + // Handle JSON parse error more gracefully + log::error!("Failed to parse LLM response: {}", e); + log::debug!("Problematic response: {}", response); + + // Either return a meaningful error or try to extract usable content + // from the raw response without relying on JSON structure + Err(format!("Failed to parse LLM response: {}", e)) + } + } + } } diff --git a/src/main.rs b/src/main.rs index 96fe544..0dfa72d 100644 --- a/src/main.rs +++ b/src/main.rs @@ -3,11 +3,12 @@ use ratatui::Terminal; use std::{env, io, path::PathBuf}; use tenere::app::{App, AppResult}; use tenere::config::Config; -use tenere::event::{Event, EventHandler}; +use tenere::event::{Event, EventHandler, TTSEvent}; use tenere::formatter::Formatter; use tenere::handler::handle_key_events; use tenere::llm::{LLMAnswer, LLMRole}; use tenere::tui::Tui; +use tenere::tts; use tenere::llm::LLMModel; @@ -67,12 +68,24 @@ async fn main() -> AppResult<()> { Event::Resize(_, _) => {} Event::LLMEvent(LLMAnswer::Answer(answer)) => { app.chat - .handle_answer(LLMAnswer::Answer(answer), &formatter); + .handle_answer(LLMAnswer::Answer(answer.clone()), &formatter); + + // We don't want to trigger TTS for every tiny chunk + // Only send longer message portions to avoid choppy audio + if answer.len() > 80 && answer.contains('.') { + tui.events.sender.send(Event::TTSEvent(TTSEvent::PlayText(answer)))?; + } } Event::LLMEvent(LLMAnswer::EndAnswer) => { { let mut llm = llm.lock().await; llm.append_chat_msg(app.chat.answer.plain_answer.clone(), LLMRole::ASSISTANT); + + // Play the full response with TTS when it completes + let final_answer = app.chat.answer.plain_answer.clone(); + if !final_answer.is_empty() { + tui.events.sender.send(Event::TTSEvent(TTSEvent::PlayText(final_answer)))?; + } } app.chat.handle_answer(LLMAnswer::EndAnswer, &formatter); @@ -87,9 +100,30 @@ async fn main() -> AppResult<()> { Event::Notification(notification) => { app.notifications.push(notification); } + Event::TTSEvent(tts_event) => { + handle_tts_event(tts_event).await; + } } } tui.exit()?; Ok(()) } + +async fn handle_tts_event(event: TTSEvent) { + match event { + TTSEvent::PlayText(text) => { + // Log to help debug + eprintln!("Playing TTS: {} characters", text.len()); + if let Err(e) = tts::play_tts(&text).await { + eprintln!("TTS error: {}", e); + } + }, + TTSEvent::Complete => { + // TTS playback completed + }, + TTSEvent::Error(err) => { + eprintln!("TTS error: {}", err); + } + } +} diff --git a/src/tts.rs b/src/tts.rs new file mode 100644 index 0000000..4faf5d4 --- /dev/null +++ b/src/tts.rs @@ -0,0 +1,229 @@ +use std::error::Error; +use std::process::Stdio; +use tokio::io::AsyncWriteExt; +use serde::Serialize; +use futures::StreamExt; +use reqwest::Client; +use reqwest::header; +use tokio::process::Command as TokioCommand; + +// Debug helper macro - you can remove this after debugging +macro_rules! debug { + ($($arg:tt)*) => { + // Log to a file for debugging + if let Ok(mut file) = std::fs::OpenOptions::new().create(true).append(true).open("/tmp/tenere_tts_debug.log") { + use std::io::Write; + let _ = writeln!(&mut file, "[{}] {}", + chrono::Local::now().format("%H:%M:%S%.3f"), + format!($($arg)*)); + } + }; +} + +/// Request structure for the new TTS API +#[derive(Debug, Serialize)] +struct TTSRequest { + model: String, + input: String, + #[serde(skip_serializing_if = "Option::is_none")] + voice: Option, + speed: f32, + language: String, + #[serde(skip_serializing_if = "Option::is_none")] + emotion: Option, + response_format: String, +} + +/// Play text through TTS service with pure streaming (no file storage) +pub async fn play_tts(text: &str) -> Result<(), Box> { + debug!("TTS request for text: {}", text); + + // Add a terminal bell to indicate TTS is starting (optional) + print!("\x07"); // Bell character + + // Skip empty or whitespace-only text + let text = text.trim(); + if text.is_empty() { + debug!("Skipping TTS for empty text"); + return Ok(()); + } + + // Build the request with the API parameters + let request = TTSRequest { + model: "Zyphra/Zonos-v0.1-transformer".to_string(), + input: text.to_string(), + voice: None, + speed: 1.0, + language: "en-us".to_string(), + emotion: None, + response_format: "mp3".to_string(), + }; + + debug!("Sending request to TTS API on port 8000"); + + // Send request to TTS service + let client = Client::new(); + let response = client.post("http://0.0.0.0:8000/v1/audio/speech") + .json(&request) + .send() + .await?; + + let status = response.status(); + debug!("Got response with status: {}", status); + + if !status.is_success() { + let error_text = response.text().await?; + debug!("Error response: {}", error_text); + return Err(format!("TTS request failed with status: {}, body: {}", status, error_text).into()); + } + + // Get the content type to pass to player + let content_type = response.headers() + .get(header::CONTENT_TYPE) + .and_then(|v| v.to_str().ok()) + .unwrap_or("audio/mp3") + .to_string(); + + debug!("Content type: {}", content_type); + + // Stream the audio directly to the player + stream_audio(response, &content_type).await +} + +/// Stream audio data directly to a player +async fn stream_audio( + response: reqwest::Response, + content_type: &str +) -> Result<(), Box> { + debug!("Starting audio streaming"); + + // Set up a suitable player based on what's available + debug!("Setting up audio player"); + let (mut player_child, mut player_stdin) = match setup_streaming_player(content_type) { + Ok(player) => player, + Err(e) => { + debug!("Player setup failed: {}", e); + return Err(e); + } + }; + + // Process chunks as they arrive + let mut stream = stream_helpers::get_stream(response); + let mut total_bytes = 0; + let mut chunk_count = 0; + + debug!("Starting to receive audio chunks"); + while let Some(chunk_result) = stream.next().await { + match chunk_result { + Ok(chunk) => { + chunk_count += 1; + total_bytes += chunk.len(); + debug!("Received chunk #{} - {} bytes", chunk_count, chunk.len()); + + // Write directly to player's stdin + if let Err(e) = player_stdin.write_all(&chunk).await { + debug!("Error writing to player: {}", e); + return Err(e.into()); + } + }, + Err(e) => { + debug!("Error in stream: {}", e); + return Err(e.into()); + } + } + } + + debug!("All chunks received. Total: {} chunks, {} bytes", chunk_count, total_bytes); + + // Close stdin to signal end of input + drop(player_stdin); + debug!("Closed stdin, waiting for player to finish"); + + // Wait for player to finish + let status = player_child.wait().await?; + + if !status.success() { + let code = status.code().unwrap_or(-1); + debug!("Player exited with error code: {}", code); + return Err(format!("Audio player exited with code {}", code).into()); + } + + debug!("Audio playback completed successfully"); + Ok(()) +} + +// Helper function to get a stream from response +mod stream_helpers { + use futures::Stream; + use futures::StreamExt; + use std::pin::Pin; + + pub fn get_stream( + response: reqwest::Response + ) -> Pin, reqwest::Error>> + Send>> { + Box::pin(response.bytes_stream().map(|result| { + result.map(|bytes| bytes.to_vec()) + })) + } +} + +/// Set up a streaming audio player based on what's available +fn setup_streaming_player(content_type: &str) -> Result<(tokio::process::Child, tokio::process::ChildStdin), Box> { + // Try to find which players are available on the system + let mpv_available = std::process::Command::new("mpv").arg("--version").output().is_ok(); + let ffplay_available = std::process::Command::new("ffplay").arg("-version").output().is_ok(); + let aplay_available = std::process::Command::new("aplay").arg("--version").output().is_ok(); + + debug!("Available players: mpv={}, ffplay={}, aplay={}", + mpv_available, ffplay_available, aplay_available); + + // Try mpv first (most versatile) + if mpv_available { + debug!("Trying to use mpv for playback"); + let mut command = TokioCommand::new("mpv") + .args(["-", "--no-cache", "--no-terminal", "--audio-buffer=0.1"]) + .stdin(Stdio::piped()) + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .spawn()?; + + let stdin = command.stdin.take() + .ok_or_else(|| "Failed to open mpv stdin".to_string())?; + debug!("Successfully started mpv"); + return Ok((command, stdin)); + } + + // Try ffplay as second option + if ffplay_available { + debug!("Trying to use ffplay for playback"); + let mut command = TokioCommand::new("ffplay") + .args(["-i", "pipe:0", "-autoexit", "-nodisp", "-hide_banner", "-loglevel", "quiet"]) + .stdin(Stdio::piped()) + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .spawn()?; + + let stdin = command.stdin.take() + .ok_or_else(|| "Failed to open ffplay stdin".to_string())?; + debug!("Successfully started ffplay"); + return Ok((command, stdin)); + } + + // For aplay (Linux) - only works with WAV + if aplay_available && content_type.contains("wav") { + debug!("Trying to use aplay for playback"); + let mut command = TokioCommand::new("aplay") + .stdin(Stdio::piped()) + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .spawn()?; + + let stdin = command.stdin.take() + .ok_or_else(|| "Failed to open aplay stdin".to_string())?; + debug!("Successfully started aplay"); + return Ok((command, stdin)); + } + + debug!("No suitable player found!"); + Err("No suitable streaming audio player found. Please install mpv, ffplay, or aplay.".into()) +} diff --git a/src/utils.rs b/src/utils.rs new file mode 100644 index 0000000..02a06db --- /dev/null +++ b/src/utils.rs @@ -0,0 +1,96 @@ +use serde_json::{Result as JsonResult, Value}; + +/// Attempts to parse a JSON string safely, handling common issues +/// that might cause "EOF while parsing a string" errors +pub fn parse_json_safely(json_str: &str) -> JsonResult { + // Try parsing normally first + let result = serde_json::from_str::(json_str); + + if result.is_ok() { + return result; + } + + // If normal parsing fails, try to fix common issues + + // 1. Try to fix unescaped quotes in strings + let mut fixed_json = String::with_capacity(json_str.len()); + let mut in_string = false; + let mut prev_char = '\0'; + + for c in json_str.chars() { + if c == '"' && prev_char != '\\' { + in_string = !in_string; + } + + if c == '\n' && in_string { + // Replace newlines inside strings with \n + fixed_json.push_str("\\n"); + } else if c == '\r' && in_string { + // Skip or replace carriage returns + continue; + } else { + fixed_json.push(c); + } + + prev_char = c; + } + + // 2. If we ended with an open string, close it + if in_string { + fixed_json.push('"'); + } + + // Try parsing the fixed JSON + let result = serde_json::from_str::(&fixed_json); + + if result.is_ok() { + return result; + } + + // 3. If all else fails, try to add missing closing braces/brackets + // Count opening and closing braces/brackets + let open_braces = json_str.chars().filter(|&c| c == '{').count(); + let close_braces = json_str.chars().filter(|&c| c == '}').count(); + let open_brackets = json_str.chars().filter(|&c| c == '[').count(); + let close_brackets = json_str.chars().filter(|&c| c == ']').count(); + + let mut fixed_json = fixed_json; + + // Add missing closing braces + for _ in 0..(open_braces - close_braces) { + fixed_json.push('}'); + } + + // Add missing closing brackets + for _ in 0..(open_brackets - close_brackets) { + fixed_json.push(']'); + } + + serde_json::from_str::(&fixed_json) +} + +/// Truncates a string to the specified maximum length, +/// ensuring the result is valid JSON if possible +pub fn truncate_json_safely(json_str: &str, max_length: usize) -> String { + if json_str.len() <= max_length { + return json_str.to_string(); + } + + // Try to find a good cutoff point that doesn't break JSON structure + let truncated = &json_str[..max_length]; + + // Find the last complete JSON object or string + if let Some(last_brace) = truncated.rfind('}') { + return json_str[..=last_brace].to_string(); + } else if let Some(last_bracket) = truncated.rfind(']') { + return json_str[..=last_bracket].to_string(); + } else if let Some(last_quote) = truncated.rfind('"') { + // Make sure this is an actual closing quote, not an escape sequence + if truncated.chars().nth(last_quote.saturating_sub(1)) != Some('\\') { + return json_str[..=last_quote].to_string(); + } + } + + // If we can't find a clean break point, just truncate + truncated.to_string() +} From 904b8ddc43937e8227fe756e897939e80e029039 Mon Sep 17 00:00:00 2001 From: y4my4my4m <8145020+y4my4my4m@users.noreply.github.com> Date: Wed, 26 Feb 2025 17:18:05 +0900 Subject: [PATCH 02/16] feat: add keyboard shortcut for TTS playback and clean up unused code --- src/handler.rs | 22 ++++++------ src/lib.rs | 4 +-- src/llm.rs | 20 +---------- src/utils.rs | 96 -------------------------------------------------- 4 files changed, 13 insertions(+), 129 deletions(-) delete mode 100644 src/utils.rs diff --git a/src/handler.rs b/src/handler.rs index bc26a49..726560b 100644 --- a/src/handler.rs +++ b/src/handler.rs @@ -1,6 +1,6 @@ use crate::llm::{LLMAnswer, LLMRole}; use crate::{chat::Chat, prompt::Mode}; -use crate::event::TTSEvent; // Add this import +use crate::event::TTSEvent; use crate::{ app::{App, AppResult, FocusedBlock}, @@ -39,6 +39,16 @@ pub async fn handle_key_events( .store(true, std::sync::atomic::Ordering::Relaxed); } + // Read the current response with TTS + KeyCode::Char('l') if key_event.modifiers.contains(KeyModifiers::CONTROL) => { + // Play the current answer with TTS + if !app.chat.answer.plain_answer.is_empty() { + sender.send(Event::TTSEvent(TTSEvent::PlayText( + app.chat.answer.plain_answer.clone(), + )))?; + } + } + // scroll down KeyCode::Char('j') | KeyCode::Down => match app.focused_block { FocusedBlock::History => { @@ -181,16 +191,6 @@ pub async fn handle_key_events( _ => {} }, - // Add a keyboard shortcut to read the current response with TTS - KeyCode::Char('t') if key_event.modifiers.contains(KeyModifiers::CONTROL) => { - // Play the current answer with TTS - if !app.chat.answer.plain_answer.is_empty() { - sender.send(Event::TTSEvent(TTSEvent::PlayText( - app.chat.answer.plain_answer.clone(), - )))?; - } - } - _ => {} } diff --git a/src/lib.rs b/src/lib.rs index 983dddd..0c8d6b9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -32,6 +32,4 @@ pub mod llamacpp; pub mod ollama; -pub mod tts; - -pub mod utils; +pub mod tts; \ No newline at end of file diff --git a/src/llm.rs b/src/llm.rs index a470f82..73a13fc 100644 --- a/src/llm.rs +++ b/src/llm.rs @@ -11,7 +11,6 @@ use strum_macros::EnumIter; use tokio::sync::mpsc::UnboundedSender; use std::sync::Arc; -use crate::utils::parse_json_safely; #[async_trait] pub trait LLM: Send + Sync { @@ -59,21 +58,4 @@ impl LLMModel { } } - fn parse_response(&self, response: &str) -> Result { - match parse_json_safely(response) { - Ok(json) => { - // Process valid JSON - // ... - } - Err(e) => { - // Handle JSON parse error more gracefully - log::error!("Failed to parse LLM response: {}", e); - log::debug!("Problematic response: {}", response); - - // Either return a meaningful error or try to extract usable content - // from the raw response without relying on JSON structure - Err(format!("Failed to parse LLM response: {}", e)) - } - } - } -} +} \ No newline at end of file diff --git a/src/utils.rs b/src/utils.rs deleted file mode 100644 index 02a06db..0000000 --- a/src/utils.rs +++ /dev/null @@ -1,96 +0,0 @@ -use serde_json::{Result as JsonResult, Value}; - -/// Attempts to parse a JSON string safely, handling common issues -/// that might cause "EOF while parsing a string" errors -pub fn parse_json_safely(json_str: &str) -> JsonResult { - // Try parsing normally first - let result = serde_json::from_str::(json_str); - - if result.is_ok() { - return result; - } - - // If normal parsing fails, try to fix common issues - - // 1. Try to fix unescaped quotes in strings - let mut fixed_json = String::with_capacity(json_str.len()); - let mut in_string = false; - let mut prev_char = '\0'; - - for c in json_str.chars() { - if c == '"' && prev_char != '\\' { - in_string = !in_string; - } - - if c == '\n' && in_string { - // Replace newlines inside strings with \n - fixed_json.push_str("\\n"); - } else if c == '\r' && in_string { - // Skip or replace carriage returns - continue; - } else { - fixed_json.push(c); - } - - prev_char = c; - } - - // 2. If we ended with an open string, close it - if in_string { - fixed_json.push('"'); - } - - // Try parsing the fixed JSON - let result = serde_json::from_str::(&fixed_json); - - if result.is_ok() { - return result; - } - - // 3. If all else fails, try to add missing closing braces/brackets - // Count opening and closing braces/brackets - let open_braces = json_str.chars().filter(|&c| c == '{').count(); - let close_braces = json_str.chars().filter(|&c| c == '}').count(); - let open_brackets = json_str.chars().filter(|&c| c == '[').count(); - let close_brackets = json_str.chars().filter(|&c| c == ']').count(); - - let mut fixed_json = fixed_json; - - // Add missing closing braces - for _ in 0..(open_braces - close_braces) { - fixed_json.push('}'); - } - - // Add missing closing brackets - for _ in 0..(open_brackets - close_brackets) { - fixed_json.push(']'); - } - - serde_json::from_str::(&fixed_json) -} - -/// Truncates a string to the specified maximum length, -/// ensuring the result is valid JSON if possible -pub fn truncate_json_safely(json_str: &str, max_length: usize) -> String { - if json_str.len() <= max_length { - return json_str.to_string(); - } - - // Try to find a good cutoff point that doesn't break JSON structure - let truncated = &json_str[..max_length]; - - // Find the last complete JSON object or string - if let Some(last_brace) = truncated.rfind('}') { - return json_str[..=last_brace].to_string(); - } else if let Some(last_bracket) = truncated.rfind(']') { - return json_str[..=last_bracket].to_string(); - } else if let Some(last_quote) = truncated.rfind('"') { - // Make sure this is an actual closing quote, not an escape sequence - if truncated.chars().nth(last_quote.saturating_sub(1)) != Some('\\') { - return json_str[..=last_quote].to_string(); - } - } - - // If we can't find a clean break point, just truncate - truncated.to_string() -} From 5a33faf5f5b10b0a648f47e8b21a728ddd6d509e Mon Sep 17 00:00:00 2001 From: y4my4my4m <8145020+y4my4my4m@users.noreply.github.com> Date: Wed, 26 Feb 2025 17:28:34 +0900 Subject: [PATCH 03/16] feat: add TTS configuration support and refactor TTS handling --- src/config.rs | 24 ++++++++++++++++++++++++ src/main.rs | 8 ++++---- src/tts.rs | 9 +++++---- 3 files changed, 33 insertions(+), 8 deletions(-) diff --git a/src/config.rs b/src/config.rs index 00cb77e..0636be2 100644 --- a/src/config.rs +++ b/src/config.rs @@ -19,6 +19,9 @@ pub struct Config { pub llamacpp: Option, pub ollama: Option, + + #[serde(default)] + pub tts: TTSConfig, } pub fn default_llm_backend() -> LLMBackend { @@ -73,6 +76,27 @@ pub struct OllamaConfig { pub model: String, } +// TTS +#[derive(Deserialize, Debug, Clone)] +pub struct TTSConfig { + #[serde(default = "TTSConfig::default_url")] + pub url: String +} + +impl Default for TTSConfig { + fn default() -> Self { + Self { + url: Self::default_url(), + } + } +} + +impl TTSConfig { + pub fn default_url() -> String { + String::from("http://0.0.0.0:8000/v1/audio/speech") + } +} + #[derive(Deserialize, Debug)] pub struct KeyBindings { #[serde(default = "KeyBindings::default_show_help")] diff --git a/src/main.rs b/src/main.rs index 0dfa72d..78b166d 100644 --- a/src/main.rs +++ b/src/main.rs @@ -2,7 +2,7 @@ use ratatui::backend::CrosstermBackend; use ratatui::Terminal; use std::{env, io, path::PathBuf}; use tenere::app::{App, AppResult}; -use tenere::config::Config; +use tenere::config::{Config, TTSConfig}; use tenere::event::{Event, EventHandler, TTSEvent}; use tenere::formatter::Formatter; use tenere::handler::handle_key_events; @@ -101,7 +101,7 @@ async fn main() -> AppResult<()> { app.notifications.push(notification); } Event::TTSEvent(tts_event) => { - handle_tts_event(tts_event).await; + handle_tts_event(tts_event, &config.tts).await; } } } @@ -110,12 +110,12 @@ async fn main() -> AppResult<()> { Ok(()) } -async fn handle_tts_event(event: TTSEvent) { +async fn handle_tts_event(event: TTSEvent, tts_config: &TTSConfig) { match event { TTSEvent::PlayText(text) => { // Log to help debug eprintln!("Playing TTS: {} characters", text.len()); - if let Err(e) = tts::play_tts(&text).await { + if let Err(e) = tts::play_tts(&text, tts_config).await { eprintln!("TTS error: {}", e); } }, diff --git a/src/tts.rs b/src/tts.rs index 4faf5d4..a5a4936 100644 --- a/src/tts.rs +++ b/src/tts.rs @@ -6,6 +6,7 @@ use futures::StreamExt; use reqwest::Client; use reqwest::header; use tokio::process::Command as TokioCommand; +use crate::config::TTSConfig; // Debug helper macro - you can remove this after debugging macro_rules! debug { @@ -35,7 +36,7 @@ struct TTSRequest { } /// Play text through TTS service with pure streaming (no file storage) -pub async fn play_tts(text: &str) -> Result<(), Box> { +pub async fn play_tts(text: &str, tts_config: &TTSConfig) -> Result<(), Box> { debug!("TTS request for text: {}", text); // Add a terminal bell to indicate TTS is starting (optional) @@ -59,11 +60,11 @@ pub async fn play_tts(text: &str) -> Result<(), Box> { response_format: "mp3".to_string(), }; - debug!("Sending request to TTS API on port 8000"); + debug!("Sending request to TTS API at: {}", tts_config.url); - // Send request to TTS service + // Send request to TTS service using the configured URL let client = Client::new(); - let response = client.post("http://0.0.0.0:8000/v1/audio/speech") + let response = client.post(&tts_config.url) .json(&request) .send() .await?; From e3f3633e461c53a3f1ef50bd696cc3ab401d15fd Mon Sep 17 00:00:00 2001 From: y4my4my4m <8145020+y4my4my4m@users.noreply.github.com> Date: Wed, 26 Feb 2025 17:29:44 +0900 Subject: [PATCH 04/16] chore: improve TTS error logging format --- src/main.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main.rs b/src/main.rs index 78b166d..ec5a1c5 100644 --- a/src/main.rs +++ b/src/main.rs @@ -114,9 +114,9 @@ async fn handle_tts_event(event: TTSEvent, tts_config: &TTSConfig) { match event { TTSEvent::PlayText(text) => { // Log to help debug - eprintln!("Playing TTS: {} characters", text.len()); + // eprintln!("Playing TTS: {} characters", text.len()); if let Err(e) = tts::play_tts(&text, tts_config).await { - eprintln!("TTS error: {}", e); + eprintln!("\nTTS error: {}", e); } }, TTSEvent::Complete => { From e63fc8637e3ab27d81af6d022c9f61dd5646d805 Mon Sep 17 00:00:00 2001 From: y4my4my4m <8145020+y4my4my4m@users.noreply.github.com> Date: Wed, 26 Feb 2025 17:32:55 +0900 Subject: [PATCH 05/16] fix: reduce sleep duration in LLM processing for improved responsiveness --- src/chatgpt.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/chatgpt.rs b/src/chatgpt.rs index 6b1d53f..caf741d 100644 --- a/src/chatgpt.rs +++ b/src/chatgpt.rs @@ -131,7 +131,7 @@ impl LLM for ChatGPT { sender.send(Event::LLMEvent(LLMAnswer::Answer(msg.to_string())))?; } - sleep(Duration::from_millis(100)).await; + sleep(Duration::from_millis(1)).await; } } } From c282e12ed431e76d14b7f44e8017c68e0e30ac8d Mon Sep 17 00:00:00 2001 From: y4my4my4m <8145020+y4my4my4m@users.noreply.github.com> Date: Wed, 26 Feb 2025 17:35:20 +0900 Subject: [PATCH 06/16] fix: comment out TTS triggering logic to prevent choppy audio playback --- src/main.rs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/main.rs b/src/main.rs index ec5a1c5..703a9cf 100644 --- a/src/main.rs +++ b/src/main.rs @@ -70,11 +70,12 @@ async fn main() -> AppResult<()> { app.chat .handle_answer(LLMAnswer::Answer(answer.clone()), &formatter); + // TODO: this isn't working // We don't want to trigger TTS for every tiny chunk // Only send longer message portions to avoid choppy audio - if answer.len() > 80 && answer.contains('.') { - tui.events.sender.send(Event::TTSEvent(TTSEvent::PlayText(answer)))?; - } + // if answer.len() > 80 && answer.contains('.') { + // tui.events.sender.send(Event::TTSEvent(TTSEvent::PlayText(answer)))?; + // } } Event::LLMEvent(LLMAnswer::EndAnswer) => { { @@ -116,7 +117,7 @@ async fn handle_tts_event(event: TTSEvent, tts_config: &TTSConfig) { // Log to help debug // eprintln!("Playing TTS: {} characters", text.len()); if let Err(e) = tts::play_tts(&text, tts_config).await { - eprintln!("\nTTS error: {}", e); + eprintln!("TTS error: {}", e); } }, TTSEvent::Complete => { From b1807b2620a274384d890dabfaaaa2ce2945aa05 Mon Sep 17 00:00:00 2001 From: y4my4my4m <8145020+y4my4my4m@users.noreply.github.com> Date: Wed, 26 Feb 2025 18:16:33 +0900 Subject: [PATCH 07/16] feat: add support for loading and uploading custom TTS voices, with default voice configuration --- Cargo.lock | 25 +++++++- Cargo.toml | 1 + src/config.rs | 30 ++++++++- src/event.rs | 2 +- src/handler.rs | 171 ++++++++++++++++++++++++++++++++++++++++++++++++- src/main.rs | 14 ++-- src/tts.rs | 97 +++++++++++++++++++++++++++- 7 files changed, 324 insertions(+), 16 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5b165e5..4d9eebd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2279,6 +2279,7 @@ dependencies = [ "tempfile", "tokio", "toml", + "toml_edit 0.21.1", "tui-textarea", "unicode-width 0.2.0", ] @@ -2469,7 +2470,7 @@ dependencies = [ "serde", "serde_spanned", "toml_datetime", - "toml_edit", + "toml_edit 0.22.20", ] [[package]] @@ -2481,6 +2482,17 @@ dependencies = [ "serde", ] +[[package]] +name = "toml_edit" +version = "0.21.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a8534fd7f78b5405e860340ad6575217ce99f38d4d5c8f2442cb5ecb50090e1" +dependencies = [ + "indexmap", + "toml_datetime", + "winnow 0.5.40", +] + [[package]] name = "toml_edit" version = "0.22.20" @@ -2491,7 +2503,7 @@ dependencies = [ "serde", "serde_spanned", "toml_datetime", - "winnow", + "winnow 0.6.18", ] [[package]] @@ -3058,6 +3070,15 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" +[[package]] +name = "winnow" +version = "0.5.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f593a95398737aeed53e489c785df13f3618e41dbcd6718c6addbf1395aa6876" +dependencies = [ + "memchr", +] + [[package]] name = "winnow" version = "0.6.18" diff --git a/Cargo.toml b/Cargo.toml index 025c316..63b3104 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -37,6 +37,7 @@ base64 = "0.13" tempfile = "3" bytes = "1.5.0" chrono = "0.4" # For timestamping debug logs +toml_edit = "0.21.0" [profile.release] lto = "fat" diff --git a/src/config.rs b/src/config.rs index 0636be2..7efebbe 100644 --- a/src/config.rs +++ b/src/config.rs @@ -80,13 +80,17 @@ pub struct OllamaConfig { #[derive(Deserialize, Debug, Clone)] pub struct TTSConfig { #[serde(default = "TTSConfig::default_url")] - pub url: String + pub url: String, + + #[serde(default)] + pub default_voice: Option, } impl Default for TTSConfig { fn default() -> Self { Self { url: Self::default_url(), + default_voice: None, } } } @@ -110,6 +114,9 @@ pub struct KeyBindings { #[serde(default = "KeyBindings::default_stop_stream")] pub stop_stream: char, + + #[serde(default = "KeyBindings::default_load_voice")] + pub load_voice: char, } impl Default for KeyBindings { @@ -119,6 +126,7 @@ impl Default for KeyBindings { show_history: 'h', new_chat: 'n', stop_stream: 't', + load_voice: 'v', } } } @@ -139,6 +147,10 @@ impl KeyBindings { fn default_stop_stream() -> char { 't' } + + fn default_load_voice() -> char { + 'v' + } } impl Config { @@ -153,7 +165,7 @@ impl Config { }; let config = std::fs::read_to_string(conf_path).unwrap_or_default(); - let app_config: Config = toml::from_str(&config).unwrap(); + let mut app_config: Config = toml::from_str(&config).unwrap(); if app_config.llm == LLMBackend::LLamacpp && app_config.llamacpp.is_none() { eprintln!("Config for LLamacpp is not provided"); @@ -164,6 +176,20 @@ impl Config { eprintln!("Config for Ollama is not provided"); std::process::exit(1) } + + // Try to load saved default voice from file if one exists + let voice_file = dirs::config_dir() + .unwrap() + .join("tenere") + .join("default_voice.txt"); + + if voice_file.exists() { + if let Ok(voice_id) = std::fs::read_to_string(&voice_file) { + if !voice_id.trim().is_empty() { + app_config.tts.default_voice = Some(voice_id.trim().to_string()); + } + } + } app_config } diff --git a/src/event.rs b/src/event.rs index c233bbe..4b53df3 100644 --- a/src/event.rs +++ b/src/event.rs @@ -20,7 +20,7 @@ pub enum Event { #[derive(Debug, Clone)] pub enum TTSEvent { - PlayText(String), + PlayText { text: String, voice: Option }, Complete, Error(String), } diff --git a/src/handler.rs b/src/handler.rs index 726560b..5480f8d 100644 --- a/src/handler.rs +++ b/src/handler.rs @@ -1,6 +1,7 @@ use crate::llm::{LLMAnswer, LLMRole}; use crate::{chat::Chat, prompt::Mode}; use crate::event::TTSEvent; +use crate::config::{TTSConfig, Config}; // Add Config import use crate::{ app::{App, AppResult, FocusedBlock}, @@ -17,6 +18,13 @@ use tokio::sync::Mutex; use tokio::sync::mpsc::UnboundedSender; +use crate::tts; +use std::path::Path; +use tokio::fs; +use crate::notification::{Notification, NotificationLevel}; +use std::sync::atomic::Ordering; +use std::time::Duration; + pub async fn handle_key_events( key_event: KeyEvent, app: &mut App<'_>, @@ -43,12 +51,42 @@ pub async fn handle_key_events( KeyCode::Char('l') if key_event.modifiers.contains(KeyModifiers::CONTROL) => { // Play the current answer with TTS if !app.chat.answer.plain_answer.is_empty() { - sender.send(Event::TTSEvent(TTSEvent::PlayText( - app.chat.answer.plain_answer.clone(), - )))?; + sender.send(Event::TTSEvent(TTSEvent::PlayText { + text: app.chat.answer.plain_answer.clone(), + voice: None, + }))?; } } + // Load voice for TTS + KeyCode::Char(c) if c == app.config.key_bindings.load_voice && + key_event.modifiers.contains(KeyModifiers::CONTROL) => { + // Spawn an async task to handle voice loading + let sender_clone = sender.clone(); + // Pass the actual app config here + let config_clone = Arc::clone(&app.config); + tokio::spawn(async move { + match load_voice_file(sender_clone.clone(), config_clone).await { + Ok(voice_id) => { + sender_clone.send(Event::Notification( + Notification::new( + format!("Voice loaded successfully: {}", voice_id), + NotificationLevel::Info + ) + )).unwrap_or_default(); + }, + Err(e) => { + sender_clone.send(Event::Notification( + Notification::new( + format!("Error loading voice: {}", e), + NotificationLevel::Error + ) + )).unwrap_or_default(); + } + } + }); + }, + // scroll down KeyCode::Char('j') | KeyCode::Down => match app.focused_block { FocusedBlock::History => { @@ -261,3 +299,130 @@ pub async fn handle_key_events( Ok(()) } + +/// Load a voice file from the configured directory and update the config +async fn load_voice_file( + sender: UnboundedSender, + config: Arc +) -> Result> { + // Get the voice directory + let voice_dir = tts::get_voice_dir()?; + + // Read all files in the directory + let mut entries = fs::read_dir(&voice_dir).await?; + let mut voice_files = Vec::new(); + + // Collect all audio files + while let Some(entry) = entries.next_entry().await? { + let path = entry.path(); + if path.is_file() { + // Only include files with audio extensions + if let Some(ext) = path.extension() { + let ext_str = ext.to_string_lossy().to_lowercase(); + if ["mp3", "wav", "ogg", "m4a", "flac"].contains(&ext_str.as_str()) { + voice_files.push(path); + } + } + } + } + + // If there are no voice files, return an error + if voice_files.is_empty() { + return Err(format!("No voice files found in {:?}. Place audio files in this directory.", voice_dir).into()); + } + + // For simplicity, use the first voice file + let voice_path = &voice_files[0]; + let file_name = voice_path.file_name().unwrap().to_string_lossy().to_string(); + + // Check if we have a cached voice ID for this file to avoid re-uploading + let cache_file = dirs::config_dir().unwrap().join("tenere").join("voice_cache.json"); + let mut voice_id = None; + + // Try to get the voice ID from cache first + if cache_file.exists() { + if let Ok(content) = tokio::fs::read_to_string(&cache_file).await { + if let Ok(cache_map) = serde_json::from_str::>(&content) { + if let Some(id) = cache_map.get(&file_name) { + if let Some(id_str) = id.as_str() { + voice_id = Some(id_str.to_string()); + + // Send notification that we're using cached voice + sender.send(Event::Notification( + Notification::new( + format!("Using cached voice: {}", file_name), + NotificationLevel::Info + ) + ))?; + } + } + } + } + } + + // If not found in cache, upload the file + let voice_id = if let Some(id) = voice_id { + id + } else { + // Upload the voice file and get the voice ID + let id = tts::upload_voice_file(voice_path, &config.tts).await?; + + // Cache the voice ID + let mut cache_map = if cache_file.exists() { + match tokio::fs::read_to_string(&cache_file).await { + Ok(content) => serde_json::from_str::>(&content) + .unwrap_or_default(), + Err(_) => serde_json::Map::new() + } + } else { + serde_json::Map::new() + }; + + cache_map.insert(file_name.clone(), serde_json::Value::String(id.clone())); + let cache_content = serde_json::to_string_pretty(&cache_map)?; + + // Make sure the directory exists + if let Some(parent) = cache_file.parent() { + if !parent.exists() { + tokio::fs::create_dir_all(parent).await?; + } + } + + tokio::fs::write(&cache_file, cache_content).await?; + id + }; + + // Update the config file + let config_dir = dirs::config_dir().unwrap().join("tenere"); + let config_path = config_dir.join("config.toml"); + + // Read the existing config + let config_content = match tokio::fs::read_to_string(&config_path).await { + Ok(content) => content, + Err(_) => String::new() + }; + + // Parse it as a document to preserve formatting and comments + let mut doc = match config_content.parse::() { + Ok(doc) => doc, + Err(_) => toml_edit::Document::new() + }; + + // Update the voice in the config file + if !doc.as_table().contains_key("tts") { + doc["tts"] = toml_edit::Item::Table(toml_edit::Table::new()); + } + doc["tts"]["default_voice"] = toml_edit::value(voice_id.clone()); + + // Write the config back + tokio::fs::write(&config_path, doc.to_string()).await?; + + // Update the in-memory config too + let tts_config_ptr = &config.tts as *const TTSConfig as *mut TTSConfig; + unsafe { + (*tts_config_ptr).default_voice = Some(voice_id.clone()); + } + + // Return the voice ID + Ok(voice_id) +} diff --git a/src/main.rs b/src/main.rs index 703a9cf..37cd38f 100644 --- a/src/main.rs +++ b/src/main.rs @@ -82,13 +82,16 @@ async fn main() -> AppResult<()> { let mut llm = llm.lock().await; llm.append_chat_msg(app.chat.answer.plain_answer.clone(), LLMRole::ASSISTANT); - // Play the full response with TTS when it completes + // Play the full response with TTS when it completes, + // using the default voice from config if set. let final_answer = app.chat.answer.plain_answer.clone(); if !final_answer.is_empty() { - tui.events.sender.send(Event::TTSEvent(TTSEvent::PlayText(final_answer)))?; + tui.events.sender.send(Event::TTSEvent(TTSEvent::PlayText { + text: final_answer, + voice: config.tts.default_voice.clone(), // Optional default voice + }))?; } } - app.chat.handle_answer(LLMAnswer::EndAnswer, &formatter); app.terminate_response_signal .store(false, std::sync::atomic::Ordering::Relaxed); @@ -113,9 +116,8 @@ async fn main() -> AppResult<()> { async fn handle_tts_event(event: TTSEvent, tts_config: &TTSConfig) { match event { - TTSEvent::PlayText(text) => { - // Log to help debug - // eprintln!("Playing TTS: {} characters", text.len()); + TTSEvent::PlayText { text, voice: _ } => { + // We pass the whole tts_config which already contains the default_voice if let Err(e) = tts::play_tts(&text, tts_config).await { eprintln!("TTS error: {}", e); } diff --git a/src/tts.rs b/src/tts.rs index a5a4936..f6b4e3e 100644 --- a/src/tts.rs +++ b/src/tts.rs @@ -2,6 +2,7 @@ use std::error::Error; use std::process::Stdio; use tokio::io::AsyncWriteExt; use serde::Serialize; +use serde::Deserialize; use futures::StreamExt; use reqwest::Client; use reqwest::header; @@ -35,6 +36,67 @@ struct TTSRequest { response_format: String, } +/// Structure for the voice upload response +#[derive(Debug, Deserialize)] +struct VoiceResponse { + voice_id: String, + created: u64, +} + +/// Upload a voice file to be used as a custom TTS voice +pub async fn upload_voice_file(file_path: &std::path::Path, tts_config: &TTSConfig) -> Result> { + debug!("Uploading voice file: {:?}", file_path); + + // Extract the filename without extension to use as voice name + let file_name = file_path.file_stem() + .and_then(|os_str| os_str.to_str()) + .unwrap_or("default_voice"); + + // Check if file exists + if !file_path.exists() { + return Err(format!("Voice file not found: {:?}", file_path).into()); + } + + // Read the file content + let file_content = tokio::fs::read(file_path).await?; + debug!("Read voice file with {} bytes", file_content.len()); + + // Construct the voice API endpoint URL from the base TTS URL + let base_url = tts_config.url.trim_end_matches("/speech").trim_end_matches("/"); + let voice_url = format!("{}/voice", base_url); + debug!("Using voice API endpoint: {}", voice_url); + + // Since we don't have multipart feature enabled, we'll use curl command-line instead + // This is a common workaround for file uploads without adding dependencies + let file_path_str = file_path.to_string_lossy(); + let output = tokio::process::Command::new("curl") + .args([ + "-X", "POST", + "-F", &format!("file=@{}", file_path_str), + "-F", &format!("name={}", file_name), + &voice_url + ]) + .output() + .await?; + + if !output.status.success() { + let error = String::from_utf8_lossy(&output.stderr); + debug!("Voice upload failed: {}", error); + return Err(format!("Voice upload failed: {}", error).into()); + } + + // Parse the JSON response to get the voice ID + let response_json = String::from_utf8_lossy(&output.stdout); + let response: serde_json::Value = serde_json::from_str(&response_json)?; + + let voice_id = response["voice_id"].as_str() + .ok_or("Invalid response: missing voice_id field")? + .to_string(); + + debug!("Successfully uploaded voice with ID: {}", voice_id); + Ok(voice_id) +} + /// Play text through TTS service with pure streaming (no file storage) pub async fn play_tts(text: &str, tts_config: &TTSConfig) -> Result<(), Box> { debug!("TTS request for text: {}", text); @@ -53,14 +115,14 @@ pub async fn play_tts(text: &str, tts_config: &TTSConfig) -> Result<(), Box Result<(tokio::process::Child, debug!("No suitable player found!"); Err("No suitable streaming audio player found. Please install mpv, ffplay, or aplay.".into()) } + +/// Helper function to get the default voice file directory +pub fn get_voice_dir() -> Result> { + let voice_dir = dirs::config_dir() + .ok_or_else(|| "Failed to find config directory")? + .join("tenere") + .join("audio"); + + // Create directory if it doesn't exist + if !voice_dir.exists() { + std::fs::create_dir_all(&voice_dir)?; + } + + Ok(voice_dir) +} + +/// Load a voice from a file in the config directory and set as default +pub async fn load_voice_from_file(file_name: &str, tts_config: &mut TTSConfig) -> Result> { + let voice_dir = get_voice_dir()?; + let file_path = voice_dir.join(file_name); + + debug!("Loading voice from file: {:?}", file_path); + + // Upload the voice file + let voice_id = upload_voice_file(&file_path, tts_config).await?; + + // Store the voice ID as the default + tts_config.default_voice = Some(voice_id.clone()); + + Ok(voice_id) +} From 8dba71002c7506f55ce285c52ad8e20eb839b178 Mon Sep 17 00:00:00 2001 From: y4my4my4m <8145020+y4my4my4m@users.noreply.github.com> Date: Wed, 26 Feb 2025 18:30:23 +0900 Subject: [PATCH 08/16] feat: implement voice cycling in load_voice_file function and enhance notifications --- src/handler.rs | 42 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 39 insertions(+), 3 deletions(-) diff --git a/src/handler.rs b/src/handler.rs index 5480f8d..fb9b95d 100644 --- a/src/handler.rs +++ b/src/handler.rs @@ -301,6 +301,7 @@ pub async fn handle_key_events( } /// Load a voice file from the configured directory and update the config +/// Cycles through available voices each time it's called async fn load_voice_file( sender: UnboundedSender, config: Arc @@ -331,8 +332,33 @@ async fn load_voice_file( return Err(format!("No voice files found in {:?}. Place audio files in this directory.", voice_dir).into()); } - // For simplicity, use the first voice file - let voice_path = &voice_files[0]; + // Sort the files to ensure consistent order + voice_files.sort(); + + // Get the last used voice file index + let last_index_file = dirs::config_dir().unwrap().join("tenere").join("last_voice_index"); + let last_index = if last_index_file.exists() { + match tokio::fs::read_to_string(&last_index_file).await { + Ok(content) => content.trim().parse::().unwrap_or(0), + Err(_) => 0 + } + } else { + 0 + }; + + // Calculate the next index (cycling through the list) + let next_index = (last_index + 1) % voice_files.len(); + + // Save the next index for future calls + if let Some(parent) = last_index_file.parent() { + if !parent.exists() { + tokio::fs::create_dir_all(parent).await?; + } + } + tokio::fs::write(&last_index_file, next_index.to_string()).await?; + + // Get the selected voice file + let voice_path = &voice_files[next_index]; let file_name = voice_path.file_name().unwrap().to_string_lossy().to_string(); // Check if we have a cached voice ID for this file to avoid re-uploading @@ -350,7 +376,8 @@ async fn load_voice_file( // Send notification that we're using cached voice sender.send(Event::Notification( Notification::new( - format!("Using cached voice: {}", file_name), + format!("Using voice: {} ({}/{})", + file_name, next_index + 1, voice_files.len()), NotificationLevel::Info ) ))?; @@ -367,6 +394,15 @@ async fn load_voice_file( // Upload the voice file and get the voice ID let id = tts::upload_voice_file(voice_path, &config.tts).await?; + // Send notification that we're uploading a new voice + sender.send(Event::Notification( + Notification::new( + format!("Uploading new voice: {} ({}/{})", + file_name, next_index + 1, voice_files.len()), + NotificationLevel::Info + ) + ))?; + // Cache the voice ID let mut cache_map = if cache_file.exists() { match tokio::fs::read_to_string(&cache_file).await { From 6560f8bf47bef5376fd5d113ce7bef941c72c354 Mon Sep 17 00:00:00 2001 From: y4my4my4m <8145020+y4my4my4m@users.noreply.github.com> Date: Wed, 26 Feb 2025 19:48:17 +0900 Subject: [PATCH 09/16] feat: enhance voice caching mechanism with reliable cache keys and improved error handling --- src/handler.rs | 105 +++++++++++++++++++++++++++++++++++-------------- 1 file changed, 76 insertions(+), 29 deletions(-) diff --git a/src/handler.rs b/src/handler.rs index fb9b95d..c829214 100644 --- a/src/handler.rs +++ b/src/handler.rs @@ -361,70 +361,117 @@ async fn load_voice_file( let voice_path = &voice_files[next_index]; let file_name = voice_path.file_name().unwrap().to_string_lossy().to_string(); + // Create a more reliable cache key using file name and file size + let file_metadata = tokio::fs::metadata(voice_path).await?; + let file_size = file_metadata.len(); + let cache_key = format!("{}_size_{}", file_name, file_size); + + // Debug the voice file selection + // eprintln!("Selected voice file: {} (size: {} bytes)", file_name, file_size); + // Check if we have a cached voice ID for this file to avoid re-uploading let cache_file = dirs::config_dir().unwrap().join("tenere").join("voice_cache.json"); let mut voice_id = None; // Try to get the voice ID from cache first if cache_file.exists() { - if let Ok(content) = tokio::fs::read_to_string(&cache_file).await { - if let Ok(cache_map) = serde_json::from_str::>(&content) { - if let Some(id) = cache_map.get(&file_name) { - if let Some(id_str) = id.as_str() { - voice_id = Some(id_str.to_string()); - - // Send notification that we're using cached voice - sender.send(Event::Notification( - Notification::new( - format!("Using voice: {} ({}/{})", - file_name, next_index + 1, voice_files.len()), - NotificationLevel::Info - ) - ))?; + // eprintln!("Voice cache file exists at: {:?}", cache_file); + + match tokio::fs::read_to_string(&cache_file).await { + Ok(content) => { + // eprintln!("Read cache content: {} bytes", content.len()); + // Parse as JSON map directly - more robust error handling + match serde_json::from_str::>(&content) { + Ok(cache_map) => { + // First try with the cache_key + if let Some(id) = cache_map.get(&cache_key).and_then(|v| v.as_str()) { + voice_id = Some(id.to_string()); + // eprintln!("Found voice ID in cache with key {}: {}", cache_key, id); + } + // Fallback to just the filename + else if let Some(id) = cache_map.get(&file_name).and_then(|v| v.as_str()) { + voice_id = Some(id.to_string()); + // eprintln!("Found voice ID in cache with filename {}: {}", file_name, id); + } else { + // eprintln!("No cache entry found for {} or {}", cache_key, file_name); + } + }, + Err(e) => { + // eprintln!("Failed to parse voice cache: {}", e); } } + }, + Err(e) => { + // eprintln!("Failed to read voice cache file: {}", e); } } + } else { + // eprintln!("Voice cache file doesn't exist yet at: {:?}", cache_file); } // If not found in cache, upload the file let voice_id = if let Some(id) = voice_id { - id - } else { - // Upload the voice file and get the voice ID - let id = tts::upload_voice_file(voice_path, &config.tts).await?; - - // Send notification that we're uploading a new voice + // Voice found in cache, notify the user sender.send(Event::Notification( Notification::new( - format!("Uploading new voice: {} ({}/{})", + format!("Using voice: {} ({}/{})", file_name, next_index + 1, voice_files.len()), NotificationLevel::Info ) ))?; + id + } else { + // Voice not found in cache, upload it + // eprintln!("No cached voice found, uploading file: {}", file_name); - // Cache the voice ID + // Upload the voice file and get the voice ID + let id = tts::upload_voice_file(voice_path, &config.tts).await?; + // eprintln!("Voice uploaded successfully with ID: {}", id); + + // Create the cache map let mut cache_map = if cache_file.exists() { match tokio::fs::read_to_string(&cache_file).await { - Ok(content) => serde_json::from_str::>(&content) - .unwrap_or_default(), + Ok(content) => match serde_json::from_str::>(&content) { + Ok(map) => map, + Err(_) => { + // If parsing fails, create a fresh map + // eprintln!("Cache file exists but couldn't be parsed, creating new one"); + serde_json::Map::new() + } + }, Err(_) => serde_json::Map::new() } } else { serde_json::Map::new() }; + // Add both the filename and the cache_key entries cache_map.insert(file_name.clone(), serde_json::Value::String(id.clone())); + cache_map.insert(cache_key.clone(), serde_json::Value::String(id.clone())); + let cache_content = serde_json::to_string_pretty(&cache_map)?; // Make sure the directory exists - if let Some(parent) = cache_file.parent() { - if !parent.exists() { - tokio::fs::create_dir_all(parent).await?; - } + let parent = cache_file.parent().unwrap(); + if !parent.exists() { + tokio::fs::create_dir_all(parent).await?; + } + + // Write the updated cache + match tokio::fs::write(&cache_file, &cache_content).await { + // Ok(_) => eprintln!("Cache file updated successfully"), + // Err(e) => eprintln!("Failed to write cache file: {}", e), } - tokio::fs::write(&cache_file, cache_content).await?; + // Send notification that we're uploading a new voice + sender.send(Event::Notification( + Notification::new( + format!("Uploaded new voice: {} ({}/{})", + file_name, next_index + 1, voice_files.len()), + NotificationLevel::Info + ) + ))?; + id }; From 47bbcedfbd92391d89858b4c69087fab2cff8780 Mon Sep 17 00:00:00 2001 From: y4my4my4m <8145020+y4my4my4m@users.noreply.github.com> Date: Wed, 26 Feb 2025 20:28:28 +0900 Subject: [PATCH 10/16] feat: add multipart support for file uploads and improve voice cache notifications --- Cargo.lock | 17 +++++++++++++++++ Cargo.toml | 3 ++- src/handler.rs | 14 ++++++++++++-- src/main.rs | 10 ++++++---- src/tts.rs | 5 ++--- 5 files changed, 39 insertions(+), 10 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4d9eebd..6e80e89 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1274,6 +1274,16 @@ version = "0.3.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" +[[package]] +name = "mime_guess" +version = "2.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7c44f8e672c00fe5308fa235f821cb4198414e1c77935c1ab6948d3fd78550e" +dependencies = [ + "mime", + "unicase", +] + [[package]] name = "minimal-lexical" version = "0.2.1" @@ -1816,6 +1826,7 @@ dependencies = [ "js-sys", "log", "mime", + "mime_guess", "once_cell", "percent-encoding", "pin-project-lite", @@ -2569,6 +2580,12 @@ dependencies = [ "unicode-width 0.2.0", ] +[[package]] +name = "unicase" +version = "2.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75b844d17643ee918803943289730bec8aac480150456169e647ed0b576ba539" + [[package]] name = "unicode-bidi" version = "0.3.15" diff --git a/Cargo.toml b/Cargo.toml index 63b3104..a006d34 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,7 +21,8 @@ futures = "0.3" reqwest = { version = "0.12", default-features = false, features = [ "json", "rustls-tls", - "stream", # Add this feature for byte streaming + "stream", # For byte streaming + "multipart", # Add this feature for form uploads ] } ratatui = { version = "0.29", features = ["all-widgets"] } regex = "1" diff --git a/src/handler.rs b/src/handler.rs index c829214..5be1fa3 100644 --- a/src/handler.rs +++ b/src/handler.rs @@ -459,8 +459,18 @@ async fn load_voice_file( // Write the updated cache match tokio::fs::write(&cache_file, &cache_content).await { - // Ok(_) => eprintln!("Cache file updated successfully"), - // Err(e) => eprintln!("Failed to write cache file: {}", e), + Ok(_) => sender.send(Event::Notification( + Notification::new( + "Voice cache updated successfully".to_string(), + NotificationLevel::Info + ) + )).unwrap_or_default(), + Err(e) => sender.send(Event::Notification( + Notification::new( + format!("Failed to write voice cache file: {}", e), + NotificationLevel::Error + ) + )).unwrap_or_default(), } // Send notification that we're uploading a new voice diff --git a/src/main.rs b/src/main.rs index 37cd38f..f82ec3e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -70,12 +70,14 @@ async fn main() -> AppResult<()> { app.chat .handle_answer(LLMAnswer::Answer(answer.clone()), &formatter); - // TODO: this isn't working // We don't want to trigger TTS for every tiny chunk // Only send longer message portions to avoid choppy audio - // if answer.len() > 80 && answer.contains('.') { - // tui.events.sender.send(Event::TTSEvent(TTSEvent::PlayText(answer)))?; - // } + if answer.len() > 80 && answer.contains('.') { + tui.events.sender.send(Event::TTSEvent(TTSEvent::PlayText { + text: answer, + voice: None + }))?; + } } Event::LLMEvent(LLMAnswer::EndAnswer) => { { diff --git a/src/tts.rs b/src/tts.rs index f6b4e3e..4522745 100644 --- a/src/tts.rs +++ b/src/tts.rs @@ -67,7 +67,6 @@ pub async fn upload_voice_file(file_path: &std::path::Path, tts_config: &TTSConf debug!("Using voice API endpoint: {}", voice_url); // Since we don't have multipart feature enabled, we'll use curl command-line instead - // This is a common workaround for file uploads without adding dependencies let file_path_str = file_path.to_string_lossy(); let output = tokio::process::Command::new("curl") .args([ @@ -122,7 +121,7 @@ pub async fn play_tts(text: &str, tts_config: &TTSConfig) -> Result<(), Box Result<(tokio::process::Child, /// Helper function to get the default voice file directory pub fn get_voice_dir() -> Result> { - let voice_dir = dirs::config_dir() + let mut voice_dir = dirs::config_dir() .ok_or_else(|| "Failed to find config directory")? .join("tenere") .join("audio"); From 2aca72e67dfef4dcc87755f15ead9a05f56e1e97 Mon Sep 17 00:00:00 2001 From: y4my4my4m <8145020+y4my4my4m@users.noreply.github.com> Date: Wed, 26 Feb 2025 20:44:27 +0900 Subject: [PATCH 11/16] feat: implement asynchronous TTS playback to prevent UI blocking --- src/main.rs | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/main.rs b/src/main.rs index f82ec3e..0885e9d 100644 --- a/src/main.rs +++ b/src/main.rs @@ -119,10 +119,18 @@ async fn main() -> AppResult<()> { async fn handle_tts_event(event: TTSEvent, tts_config: &TTSConfig) { match event { TTSEvent::PlayText { text, voice: _ } => { - // We pass the whole tts_config which already contains the default_voice - if let Err(e) = tts::play_tts(&text, tts_config).await { - eprintln!("TTS error: {}", e); - } + // Clone what we need to move into the background task + let tts_config = tts_config.clone(); + let text = text.clone(); + + // Spawn a background task for TTS playback to avoid blocking the UI + tokio::spawn(async move { + if let Err(e) = tts::play_tts(&text, &tts_config).await { + eprintln!("TTS error: {}", e); + } + }); + + // Return immediately so the main application can continue handling input }, TTSEvent::Complete => { // TTS playback completed From e752eef11152552f5119f5cfb1a270801d89f624 Mon Sep 17 00:00:00 2001 From: y4my4my4m <8145020+y4my4my4m@users.noreply.github.com> Date: Wed, 26 Feb 2025 20:54:58 +0900 Subject: [PATCH 12/16] feat: add process cleanup for TTS playback and register signal handlers --- Cargo.lock | 37 +++++++++++++++++++ Cargo.toml | 3 ++ src/main.rs | 32 +++++++++++++---- src/tts.rs | 102 +++++++++++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 166 insertions(+), 8 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6e80e89..0c9a730 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -368,6 +368,12 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "cfg_aliases" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" + [[package]] name = "chrono" version = "0.4.39" @@ -559,6 +565,16 @@ dependencies = [ "winapi", ] +[[package]] +name = "ctrlc" +version = "3.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90eeab0aa92f3f9b4e87f258c72b139c207d251f9cbc1080a0086b86a8870dd3" +dependencies = [ + "nix", + "windows-sys 0.59.0", +] + [[package]] name = "darling" version = "0.20.10" @@ -1185,6 +1201,12 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + [[package]] name = "libc" version = "0.2.158" @@ -1322,6 +1344,18 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "nix" +version = "0.29.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "71e2746dc3a24dd78b3cfcb7be93368c6de9963d30f43a6a73998a9cf4b17b46" +dependencies = [ + "bitflags 2.6.0", + "cfg-if", + "cfg_aliases", + "libc", +] + [[package]] name = "nom" version = "7.1.3" @@ -2278,8 +2312,11 @@ dependencies = [ "chrono", "clap", "crossterm", + "ctrlc", "dirs", "futures", + "lazy_static", + "libc", "ratatui", "regex", "reqwest", diff --git a/Cargo.toml b/Cargo.toml index a006d34..577a86e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -39,6 +39,9 @@ tempfile = "3" bytes = "1.5.0" chrono = "0.4" # For timestamping debug logs toml_edit = "0.21.0" +lazy_static = "1.4.0" +ctrlc = "3.4.0" +libc = "0.2.142" # For process killing on Unix [profile.release] lto = "fat" diff --git a/src/main.rs b/src/main.rs index 0885e9d..8b8cb26 100644 --- a/src/main.rs +++ b/src/main.rs @@ -6,7 +6,7 @@ use tenere::config::{Config, TTSConfig}; use tenere::event::{Event, EventHandler, TTSEvent}; use tenere::formatter::Formatter; use tenere::handler::handle_key_events; -use tenere::llm::{LLMAnswer, LLMRole}; +use tenere::llm::{LLMAnswer, LLMRole, LLM}; // Add LLM import use tenere::tui::Tui; use tenere::tts; @@ -17,6 +17,8 @@ use tokio::sync::Mutex; use clap::{crate_description, crate_version, Arg, Command}; +use ratatui::backend::Backend; // Add this import + #[tokio::main] async fn main() -> AppResult<()> { let matches = Command::new("tenere") @@ -56,19 +58,36 @@ async fn main() -> AppResult<()> { // load potential history data from archive files app.history.load_history(tui.events.sender.clone()); + // Make sure to clean up TTS processes on exit + let result = run_app(&mut app, llm, &mut tui, &formatter, &config).await; + + // Clean up TTS processes before exiting + tts::kill_all_tts_processes(); + + tui.exit()?; + result +} + +async fn run_app( + app: &mut App<'_>, + llm: Arc>>, + tui: &mut Tui, + formatter: &Formatter<'_>, + config: &Arc +) -> AppResult<()> { while app.running { - tui.draw(&mut app)?; + tui.draw(app)?; match tui.events.next().await? { Event::Tick => app.tick(), Event::Key(key_event) => { - handle_key_events(key_event, &mut app, llm.clone(), tui.events.sender.clone()) + handle_key_events(key_event, app, llm.clone(), tui.events.sender.clone()) .await?; } Event::Mouse(_) => {} Event::Resize(_, _) => {} Event::LLMEvent(LLMAnswer::Answer(answer)) => { app.chat - .handle_answer(LLMAnswer::Answer(answer.clone()), &formatter); + .handle_answer(LLMAnswer::Answer(answer.clone()), formatter); // We don't want to trigger TTS for every tiny chunk // Only send longer message portions to avoid choppy audio @@ -94,13 +113,13 @@ async fn main() -> AppResult<()> { }))?; } } - app.chat.handle_answer(LLMAnswer::EndAnswer, &formatter); + app.chat.handle_answer(LLMAnswer::EndAnswer, formatter); app.terminate_response_signal .store(false, std::sync::atomic::Ordering::Relaxed); } Event::LLMEvent(LLMAnswer::StartAnswer) => { app.spinner.active = false; - app.chat.handle_answer(LLMAnswer::StartAnswer, &formatter); + app.chat.handle_answer(LLMAnswer::StartAnswer, formatter); } Event::Notification(notification) => { @@ -112,7 +131,6 @@ async fn main() -> AppResult<()> { } } - tui.exit()?; Ok(()) } diff --git a/src/tts.rs b/src/tts.rs index 4522745..81896de 100644 --- a/src/tts.rs +++ b/src/tts.rs @@ -8,6 +8,10 @@ use reqwest::Client; use reqwest::header; use tokio::process::Command as TokioCommand; use crate::config::TTSConfig; +use std::sync::{Arc, Mutex, Once}; +use lazy_static::lazy_static; +use std::collections::HashSet; +use std::process::Child as StdChild; // Debug helper macro - you can remove this after debugging macro_rules! debug { @@ -133,7 +137,7 @@ pub async fn play_tts(text: &str, tts_config: &TTSConfig) -> Result<(), Box Result<(tokio::process::Child, tokio::process::ChildStdin), Box> { + // Make sure cleanup is registered before creating any new processes + register_cleanup(); + // Try to find which players are available on the system let mpv_available = std::process::Command::new("mpv").arg("--version").output().is_ok(); let ffplay_available = std::process::Command::new("ffplay").arg("-version").output().is_ok(); @@ -249,6 +262,11 @@ fn setup_streaming_player(content_type: &str) -> Result<(tokio::process::Child, .stderr(Stdio::null()) .spawn()?; + // Register the process for cleanup + if let Some(pid) = command.id() { + register_tts_process(pid); + } + let stdin = command.stdin.take() .ok_or_else(|| "Failed to open mpv stdin".to_string())?; debug!("Successfully started mpv"); @@ -265,6 +283,11 @@ fn setup_streaming_player(content_type: &str) -> Result<(tokio::process::Child, .stderr(Stdio::null()) .spawn()?; + // Register the process for cleanup + if let Some(pid) = command.id() { + register_tts_process(pid); + } + let stdin = command.stdin.take() .ok_or_else(|| "Failed to open ffplay stdin".to_string())?; debug!("Successfully started ffplay"); @@ -280,6 +303,11 @@ fn setup_streaming_player(content_type: &str) -> Result<(tokio::process::Child, .stderr(Stdio::null()) .spawn()?; + // Register the process for cleanup + if let Some(pid) = command.id() { + register_tts_process(pid); + } + let stdin = command.stdin.take() .ok_or_else(|| "Failed to open aplay stdin".to_string())?; debug!("Successfully started aplay"); @@ -320,3 +348,75 @@ pub async fn load_voice_from_file(file_name: &str, tts_config: &mut TTSConfig) - Ok(voice_id) } + +// Add a global registry for tracking spawned processes +lazy_static! { + static ref TTS_PROCESSES: Arc>> = Arc::new(Mutex::new(HashSet::new())); + static ref CLEANUP_REGISTERED: Once = Once::new(); +} + +// Register cleanup handler for program termination +fn register_cleanup() { + CLEANUP_REGISTERED.call_once(|| { + let processes = TTS_PROCESSES.clone(); + + // Register normal exit cleanup + std::env::set_var("TENERE_CLEANUP_REGISTERED", "true"); + + // Register panic cleanup + let default_hook = std::panic::take_hook(); + std::panic::set_hook(Box::new(move |panic_info| { + kill_all_tts_processes(); + default_hook(panic_info); + })); + + // Register CTRL+C handler + ctrlc::set_handler(move || { + kill_all_tts_processes(); + std::process::exit(0); + }).expect("Error setting Ctrl-C handler"); + }); +} + +// Kill all registered TTS processes +pub fn kill_all_tts_processes() { + debug!("Cleaning up TTS processes"); + if let Ok(mut processes) = TTS_PROCESSES.lock() { + for pid in processes.iter() { + debug!("Killing TTS process with PID: {}", pid); + // Cross-platform way to kill a process by PID + #[cfg(target_os = "windows")] + { + use std::process::Command; + let _ = Command::new("taskkill") + .args(&["/F", "/PID", &pid.to_string()]) + .output(); + } + + #[cfg(not(target_os = "windows"))] + { + // On Unix systems we can use the kill system call + unsafe { + libc::kill(*pid as i32, libc::SIGTERM); + } + } + } + processes.clear(); + } +} + +// Register a new TTS process +fn register_tts_process(pid: u32) { + if let Ok(mut processes) = TTS_PROCESSES.lock() { + processes.insert(pid); + debug!("Registered TTS process: {}", pid); + } +} + +// Unregister a TTS process when it completes +fn unregister_tts_process(pid: u32) { + if let Ok(mut processes) = TTS_PROCESSES.lock() { + processes.remove(&pid); + debug!("Unregistered TTS process: {}", pid); + } +} From a9b2f3fe635b47c6558410f10e5a613c5e93b268 Mon Sep 17 00:00:00 2001 From: y4my4my4m <8145020+y4my4my4m@users.noreply.github.com> Date: Wed, 26 Feb 2025 22:06:09 +0900 Subject: [PATCH 13/16] feat: add system prompt configuration for ChatGPT and update README --- README.md | 1 + example_config.toml | 16 ++++++++++++++++ src/chatgpt.rs | 4 +++- src/config.rs | 8 ++++++++ 4 files changed, 28 insertions(+), 1 deletion(-) create mode 100644 example_config.toml diff --git a/README.md b/README.md index fd21747..475b83b 100644 --- a/README.md +++ b/README.md @@ -171,6 +171,7 @@ Include your API key in the configuration file: openai_api_key = "Your API key here" model = "gpt-3.5-turbo" url = "https://api.openai.com/v1/chat/completions" +system_prompt = "You are a helpful assistant." ``` The default model is set to `gpt-3.5-turbo`. Check out the [OpenAI documentation](https://platform.openai.com/docs/models/gpt-3-5) for more info. diff --git a/example_config.toml b/example_config.toml new file mode 100644 index 0000000..71d46cf --- /dev/null +++ b/example_config.toml @@ -0,0 +1,16 @@ +llm = "chatgpt" + +[chatgpt] +openai_api_key = "your-api-key-here" +model = "gpt-4-turbo" +system_prompt = "You are an AI assistant that specializes in programming and software development." + +[key_bindings] +show_help = '?' +show_history = 'h' +new_chat = 'n' +stop_stream = 't' +load_voice = 'v' + +[tts] +url = "http://0.0.0.0:8000/v1/audio/speech" diff --git a/src/chatgpt.rs b/src/chatgpt.rs index caf741d..92e850a 100644 --- a/src/chatgpt.rs +++ b/src/chatgpt.rs @@ -20,6 +20,7 @@ pub struct ChatGPT { openai_api_key: String, model: String, url: String, + system_prompt: String, messages: Vec>, } @@ -45,6 +46,7 @@ You need to define one whether in the configuration file or as an environment va openai_api_key, model: config.model, url: config.url, + system_prompt: config.system_prompt, messages: Vec::new(), } } @@ -80,7 +82,7 @@ impl LLM for ChatGPT { ("role".to_string(), "system".to_string()), ( "content".to_string(), - "You are a helpful assistant.".to_string(), + self.system_prompt.clone(), ), ])), ]; diff --git a/src/config.rs b/src/config.rs index 7efebbe..bd5bff0 100644 --- a/src/config.rs +++ b/src/config.rs @@ -38,6 +38,9 @@ pub struct ChatGPTConfig { #[serde(default = "ChatGPTConfig::default_url")] pub url: String, + + #[serde(default = "ChatGPTConfig::default_system_prompt")] + pub system_prompt: String, } impl Default for ChatGPTConfig { @@ -46,6 +49,7 @@ impl Default for ChatGPTConfig { openai_api_key: None, model: Self::default_model(), url: Self::default_url(), + system_prompt: Self::default_system_prompt(), } } } @@ -58,6 +62,10 @@ impl ChatGPTConfig { pub fn default_url() -> String { String::from("https://api.openai.com/v1/chat/completions") } + + pub fn default_system_prompt() -> String { + String::from("You are a helpful assistant.") + } } // LLamacpp From 94e0ad58ce3ca56f4e00996a728c0cd8f507d908 Mon Sep 17 00:00:00 2001 From: y4my4my4m <8145020+y4my4my4m@users.noreply.github.com> Date: Wed, 26 Feb 2025 23:03:51 +0900 Subject: [PATCH 14/16] feat: implement custom TTS hook and utility functions for enhanced speech synthesis --- app/components/tts.ts | 146 +++++++++++++++++++++++++++++++++++++++ app/utils/tts-helpers.ts | 99 ++++++++++++++++++++++++++ 2 files changed, 245 insertions(+) create mode 100644 app/components/tts.ts create mode 100644 app/utils/tts-helpers.ts diff --git a/app/components/tts.ts b/app/components/tts.ts new file mode 100644 index 0000000..e351141 --- /dev/null +++ b/app/components/tts.ts @@ -0,0 +1,146 @@ +import { useState, useEffect, useRef } from 'react'; + +interface TTSOptions { + rate?: number; + pitch?: number; + volume?: number; + voice?: SpeechSynthesisVoice; +} + +export const useTTS = () => { + const [isSpeaking, setIsSpeaking] = useState(false); + const [isPaused, setIsPaused] = useState(false); + const [utterance, setUtterance] = useState(null); + const timeoutRef = useRef(null); + const completionCheckRef = useRef(null); + + // Clean up function to clear all timeouts and reset state + const cleanup = () => { + if (timeoutRef.current) { + clearTimeout(timeoutRef.current); + timeoutRef.current = null; + } + if (completionCheckRef.current) { + clearTimeout(completionCheckRef.current); + completionCheckRef.current = null; + } + }; + + // Handle component unmounting + useEffect(() => { + return () => { + if (utterance) { + speechSynthesis.cancel(); + } + cleanup(); + }; + }, [utterance]); + + // Speak function with improved error handling + const speak = (text: string, options?: TTSOptions) => { + // Cancel any ongoing speech + stop(); + + try { + const newUtterance = new SpeechSynthesisUtterance(text); + + // Apply options + if (options) { + if (options.rate !== undefined) newUtterance.rate = options.rate; + if (options.pitch !== undefined) newUtterance.pitch = options.pitch; + if (options.volume !== undefined) newUtterance.volume = options.volume; + if (options.voice !== undefined) newUtterance.voice = options.voice; + } + + // Handle events + newUtterance.onstart = () => { + setIsSpeaking(true); + setIsPaused(false); + + // Chrome bug workaround - sometimes speechSynthesis stops unexpectedly + // Set interval to restart if it cuts off + const resumeIfNeeded = () => { + if (isSpeaking && !isPaused && !speechSynthesis.speaking) { + speechSynthesis.resume(); + } + }; + + timeoutRef.current = setInterval(resumeIfNeeded, 250) as unknown as NodeJS.Timeout; + }; + + newUtterance.onpause = () => setIsPaused(true); + newUtterance.onresume = () => setIsPaused(false); + + newUtterance.onend = () => { + setIsSpeaking(false); + setIsPaused(false); + cleanup(); + }; + + newUtterance.onerror = (event) => { + console.error('TTS Error:', event); + setIsSpeaking(false); + setIsPaused(false); + cleanup(); + }; + + setUtterance(newUtterance); + speechSynthesis.speak(newUtterance); + + // Set a completion check in case onend doesn't fire + completionCheckRef.current = setTimeout(() => { + if (!speechSynthesis.speaking) { + setIsSpeaking(false); + setIsPaused(false); + cleanup(); + } + }, text.length * 50) as unknown as NodeJS.Timeout; + + } catch (error) { + console.error('Failed to initialize TTS:', error); + } + }; + + const pause = () => { + if (speechSynthesis && isSpeaking && !isPaused) { + speechSynthesis.pause(); + setIsPaused(true); + } + }; + + const resume = () => { + if (speechSynthesis && isPaused) { + speechSynthesis.resume(); + setIsPaused(false); + } + }; + + const stop = () => { + try { + if (speechSynthesis) { + speechSynthesis.cancel(); + } + setIsSpeaking(false); + setIsPaused(false); + setUtterance(null); + cleanup(); + } catch (error) { + console.error('Error while stopping TTS:', error); + } + }; + + // Get available voices + const getVoices = (): SpeechSynthesisVoice[] => { + return speechSynthesis?.getVoices() || []; + }; + + return { + speak, + pause, + resume, + stop, + getVoices, + isSpeaking, + isPaused + }; +}; diff --git a/app/utils/tts-helpers.ts b/app/utils/tts-helpers.ts new file mode 100644 index 0000000..71af0f4 --- /dev/null +++ b/app/utils/tts-helpers.ts @@ -0,0 +1,99 @@ + +/** + * Safely cancels speech synthesis and handles any potential errors + */ +export const safeCancel = () => { + try { + if (window.speechSynthesis) { + window.speechSynthesis.cancel(); + } + } catch (error) { + console.error('Error while cancelling speech synthesis:', error); + } +}; + +/** + * Checks if the browser supports speech synthesis + */ +export const isTTSSupported = (): boolean => { + return 'speechSynthesis' in window && + 'SpeechSynthesisUtterance' in window; +}; + +/** + * Chunks text into smaller pieces to prevent TTS cutoff + * @param text The text to chunk + * @param maxLength Maximum length of each chunk + * @returns Array of text chunks + */ +export const chunkText = (text: string, maxLength: number = 200): string[] => { + if (!text || text.length <= maxLength) { + return [text]; + } + + const chunks: string[] = []; + let currentChunk = ''; + + // Split by sentences to create more natural chunks + const sentences = text.split(/(?<=[.!?])\s+/); + + for (const sentence of sentences) { + if ((currentChunk + sentence).length <= maxLength) { + currentChunk += (currentChunk ? ' ' : '') + sentence; + } else { + // If a single sentence is too long, split by words + if (currentChunk) { + chunks.push(currentChunk); + currentChunk = sentence; + } else { + const words = sentence.split(' '); + currentChunk = words[0]; + + for (let i = 1; i < words.length; i++) { + if ((currentChunk + ' ' + words[i]).length <= maxLength) { + currentChunk += ' ' + words[i]; + } else { + chunks.push(currentChunk); + currentChunk = words[i]; + } + } + } + } + } + + if (currentChunk) { + chunks.push(currentChunk); + } + + return chunks; +}; + +/** + * Creates a resilient utterance with proper error handling + * @param text Text to speak + * @param options Speech options + * @param onEnd Callback when speech ends + * @returns SpeechSynthesisUtterance instance + */ +export const createResilientUtterance = ( + text: string, + options?: { rate?: number; pitch?: number; voice?: SpeechSynthesisVoice }, + onEnd?: () => void +): SpeechSynthesisUtterance => { + const utterance = new SpeechSynthesisUtterance(text); + + if (options?.rate) utterance.rate = options.rate; + if (options?.pitch) utterance.pitch = options.pitch; + if (options?.voice) utterance.voice = options.voice; + + utterance.onend = () => { + if (onEnd) onEnd(); + }; + + utterance.onerror = (event) => { + console.error('TTS error:', event); + if (onEnd) onEnd(); + }; + + return utterance; +}; From c2704091d6f0647b37019e36e5bb3fead91b45d8 Mon Sep 17 00:00:00 2001 From: y4my4my4m <8145020+y4my4my4m@users.noreply.github.com> Date: Wed, 26 Feb 2025 23:07:40 +0900 Subject: [PATCH 15/16] refactor: remove deprecated TTS hook and utility functions --- app/components/tts.ts | 146 --------------------------------------- app/utils/tts-helpers.ts | 99 -------------------------- 2 files changed, 245 deletions(-) delete mode 100644 app/components/tts.ts delete mode 100644 app/utils/tts-helpers.ts diff --git a/app/components/tts.ts b/app/components/tts.ts deleted file mode 100644 index e351141..0000000 --- a/app/components/tts.ts +++ /dev/null @@ -1,146 +0,0 @@ -import { useState, useEffect, useRef } from 'react'; - -interface TTSOptions { - rate?: number; - pitch?: number; - volume?: number; - voice?: SpeechSynthesisVoice; -} - -export const useTTS = () => { - const [isSpeaking, setIsSpeaking] = useState(false); - const [isPaused, setIsPaused] = useState(false); - const [utterance, setUtterance] = useState(null); - const timeoutRef = useRef(null); - const completionCheckRef = useRef(null); - - // Clean up function to clear all timeouts and reset state - const cleanup = () => { - if (timeoutRef.current) { - clearTimeout(timeoutRef.current); - timeoutRef.current = null; - } - if (completionCheckRef.current) { - clearTimeout(completionCheckRef.current); - completionCheckRef.current = null; - } - }; - - // Handle component unmounting - useEffect(() => { - return () => { - if (utterance) { - speechSynthesis.cancel(); - } - cleanup(); - }; - }, [utterance]); - - // Speak function with improved error handling - const speak = (text: string, options?: TTSOptions) => { - // Cancel any ongoing speech - stop(); - - try { - const newUtterance = new SpeechSynthesisUtterance(text); - - // Apply options - if (options) { - if (options.rate !== undefined) newUtterance.rate = options.rate; - if (options.pitch !== undefined) newUtterance.pitch = options.pitch; - if (options.volume !== undefined) newUtterance.volume = options.volume; - if (options.voice !== undefined) newUtterance.voice = options.voice; - } - - // Handle events - newUtterance.onstart = () => { - setIsSpeaking(true); - setIsPaused(false); - - // Chrome bug workaround - sometimes speechSynthesis stops unexpectedly - // Set interval to restart if it cuts off - const resumeIfNeeded = () => { - if (isSpeaking && !isPaused && !speechSynthesis.speaking) { - speechSynthesis.resume(); - } - }; - - timeoutRef.current = setInterval(resumeIfNeeded, 250) as unknown as NodeJS.Timeout; - }; - - newUtterance.onpause = () => setIsPaused(true); - newUtterance.onresume = () => setIsPaused(false); - - newUtterance.onend = () => { - setIsSpeaking(false); - setIsPaused(false); - cleanup(); - }; - - newUtterance.onerror = (event) => { - console.error('TTS Error:', event); - setIsSpeaking(false); - setIsPaused(false); - cleanup(); - }; - - setUtterance(newUtterance); - speechSynthesis.speak(newUtterance); - - // Set a completion check in case onend doesn't fire - completionCheckRef.current = setTimeout(() => { - if (!speechSynthesis.speaking) { - setIsSpeaking(false); - setIsPaused(false); - cleanup(); - } - }, text.length * 50) as unknown as NodeJS.Timeout; - - } catch (error) { - console.error('Failed to initialize TTS:', error); - } - }; - - const pause = () => { - if (speechSynthesis && isSpeaking && !isPaused) { - speechSynthesis.pause(); - setIsPaused(true); - } - }; - - const resume = () => { - if (speechSynthesis && isPaused) { - speechSynthesis.resume(); - setIsPaused(false); - } - }; - - const stop = () => { - try { - if (speechSynthesis) { - speechSynthesis.cancel(); - } - setIsSpeaking(false); - setIsPaused(false); - setUtterance(null); - cleanup(); - } catch (error) { - console.error('Error while stopping TTS:', error); - } - }; - - // Get available voices - const getVoices = (): SpeechSynthesisVoice[] => { - return speechSynthesis?.getVoices() || []; - }; - - return { - speak, - pause, - resume, - stop, - getVoices, - isSpeaking, - isPaused - }; -}; diff --git a/app/utils/tts-helpers.ts b/app/utils/tts-helpers.ts deleted file mode 100644 index 71af0f4..0000000 --- a/app/utils/tts-helpers.ts +++ /dev/null @@ -1,99 +0,0 @@ - -/** - * Safely cancels speech synthesis and handles any potential errors - */ -export const safeCancel = () => { - try { - if (window.speechSynthesis) { - window.speechSynthesis.cancel(); - } - } catch (error) { - console.error('Error while cancelling speech synthesis:', error); - } -}; - -/** - * Checks if the browser supports speech synthesis - */ -export const isTTSSupported = (): boolean => { - return 'speechSynthesis' in window && - 'SpeechSynthesisUtterance' in window; -}; - -/** - * Chunks text into smaller pieces to prevent TTS cutoff - * @param text The text to chunk - * @param maxLength Maximum length of each chunk - * @returns Array of text chunks - */ -export const chunkText = (text: string, maxLength: number = 200): string[] => { - if (!text || text.length <= maxLength) { - return [text]; - } - - const chunks: string[] = []; - let currentChunk = ''; - - // Split by sentences to create more natural chunks - const sentences = text.split(/(?<=[.!?])\s+/); - - for (const sentence of sentences) { - if ((currentChunk + sentence).length <= maxLength) { - currentChunk += (currentChunk ? ' ' : '') + sentence; - } else { - // If a single sentence is too long, split by words - if (currentChunk) { - chunks.push(currentChunk); - currentChunk = sentence; - } else { - const words = sentence.split(' '); - currentChunk = words[0]; - - for (let i = 1; i < words.length; i++) { - if ((currentChunk + ' ' + words[i]).length <= maxLength) { - currentChunk += ' ' + words[i]; - } else { - chunks.push(currentChunk); - currentChunk = words[i]; - } - } - } - } - } - - if (currentChunk) { - chunks.push(currentChunk); - } - - return chunks; -}; - -/** - * Creates a resilient utterance with proper error handling - * @param text Text to speak - * @param options Speech options - * @param onEnd Callback when speech ends - * @returns SpeechSynthesisUtterance instance - */ -export const createResilientUtterance = ( - text: string, - options?: { rate?: number; pitch?: number; voice?: SpeechSynthesisVoice }, - onEnd?: () => void -): SpeechSynthesisUtterance => { - const utterance = new SpeechSynthesisUtterance(text); - - if (options?.rate) utterance.rate = options.rate; - if (options?.pitch) utterance.pitch = options.pitch; - if (options?.voice) utterance.voice = options.voice; - - utterance.onend = () => { - if (onEnd) onEnd(); - }; - - utterance.onerror = (event) => { - console.error('TTS error:', event); - if (onEnd) onEnd(); - }; - - return utterance; -}; From 5c6c21fe51a524182e404266eb45e3dc942317f8 Mon Sep 17 00:00:00 2001 From: y4my4my4m <8145020+y4my4my4m@users.noreply.github.com> Date: Wed, 26 Feb 2025 23:16:31 +0900 Subject: [PATCH 16/16] feat: enhance error handling for voice cache loading and remove unused code --- src/handler.rs | 15 ++++++++++++--- src/tts.rs | 15 +++------------ 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/src/handler.rs b/src/handler.rs index 5be1fa3..3fa77be 100644 --- a/src/handler.rs +++ b/src/handler.rs @@ -19,11 +19,8 @@ use tokio::sync::Mutex; use tokio::sync::mpsc::UnboundedSender; use crate::tts; -use std::path::Path; use tokio::fs; use crate::notification::{Notification, NotificationLevel}; -use std::sync::atomic::Ordering; -use std::time::Duration; pub async fn handle_key_events( key_event: KeyEvent, @@ -397,11 +394,23 @@ async fn load_voice_file( } }, Err(e) => { + sender.send(Event::Notification( + Notification::new( + format!("Failed to parse voice cache: {}", e), + NotificationLevel::Error + ) + ))?; // eprintln!("Failed to parse voice cache: {}", e); } } }, Err(e) => { + sender.send(Event::Notification( + Notification::new( + format!("Failed to read voice cache file: {}", e), + NotificationLevel::Error + ) + ))?; // eprintln!("Failed to read voice cache file: {}", e); } } diff --git a/src/tts.rs b/src/tts.rs index 81896de..8f24a85 100644 --- a/src/tts.rs +++ b/src/tts.rs @@ -2,7 +2,6 @@ use std::error::Error; use std::process::Stdio; use tokio::io::AsyncWriteExt; use serde::Serialize; -use serde::Deserialize; use futures::StreamExt; use reqwest::Client; use reqwest::header; @@ -11,7 +10,6 @@ use crate::config::TTSConfig; use std::sync::{Arc, Mutex, Once}; use lazy_static::lazy_static; use std::collections::HashSet; -use std::process::Child as StdChild; // Debug helper macro - you can remove this after debugging macro_rules! debug { @@ -40,13 +38,6 @@ struct TTSRequest { response_format: String, } -/// Structure for the voice upload response -#[derive(Debug, Deserialize)] -struct VoiceResponse { - voice_id: String, - created: u64, -} - /// Upload a voice file to be used as a custom TTS voice pub async fn upload_voice_file(file_path: &std::path::Path, tts_config: &TTSConfig) -> Result> { debug!("Uploading voice file: {:?}", file_path); @@ -137,7 +128,7 @@ pub async fn play_tts(text: &str, tts_config: &TTSConfig) -> Result<(), Box Result<(tokio::process::Child, /// Helper function to get the default voice file directory pub fn get_voice_dir() -> Result> { - let mut voice_dir = dirs::config_dir() + let voice_dir = dirs::config_dir() .ok_or_else(|| "Failed to find config directory")? .join("tenere") .join("audio"); @@ -358,7 +349,7 @@ lazy_static! { // Register cleanup handler for program termination fn register_cleanup() { CLEANUP_REGISTERED.call_once(|| { - let processes = TTS_PROCESSES.clone(); + let _processes = TTS_PROCESSES.clone(); // Register normal exit cleanup std::env::set_var("TENERE_CLEANUP_REGISTERED", "true");