From 90e0fcb054be1f15d2119caefb3e75a562ac6759 Mon Sep 17 00:00:00 2001 From: husk007 <72927717+husk007@users.noreply.github.com> Date: Wed, 25 Sep 2024 10:39:02 +0200 Subject: [PATCH] Update pdf-to-mp3.py --- pdf-to-mp3.py | 104 +++++++++++++++++++++++++++----------------------- 1 file changed, 57 insertions(+), 47 deletions(-) diff --git a/pdf-to-mp3.py b/pdf-to-mp3.py index 907e78e..11cf256 100644 --- a/pdf-to-mp3.py +++ b/pdf-to-mp3.py @@ -1,19 +1,16 @@ -# this script converts pdf files to mp3 in the language of your choice - +# this script converts pdf files to mp3 in the language of your choice import os import glob import PyPDF2 -from googletrans import Translator, LANGUAGES +from googletrans import Translator from gtts import gTTS - def PdfConverter(): while True: # Create an mp3 folder if it doesn't exist def create_mp3_folder(): current_dir = os.getcwd() - mp3_folder_path = os.path.join(current_dir, 'mp3') if not os.path.exists(mp3_folder_path): os.makedirs(mp3_folder_path) @@ -21,7 +18,6 @@ def create_mp3_folder(): # Create a pdf folder if it doesn't exist def create_pdf_folder(): current_dir = os.getcwd() - pdf_folder_path = os.path.join(current_dir, 'pdf') if not os.path.exists(pdf_folder_path): os.makedirs(pdf_folder_path) @@ -32,6 +28,7 @@ def get_pdf_files(): if len(pdf_files) == 0: print('No pdf files to convert') + input("Press any key to continue...") return None # List files @@ -42,7 +39,6 @@ def get_pdf_files(): return pdf_files - # We get the file that needs to be converted and convert it according to the selected language def get_pdf_file_and_convert(pdf_files): # Get the number of the file to be converted @@ -68,13 +64,15 @@ def get_pdf_file_and_convert(pdf_files): # Get a list of PDFs pdf_files = get_pdf_files() if pdf_files is None: - print("End of the program.") + print("No PDFs found. End of the program.") + input("Press any key to exit...") exit() # Get the number of the file to be converted pdf_file_to_convert = get_pdf_file_and_convert(pdf_files) if pdf_file_to_convert is None: - print("End of the program.") + print("No file selected. End of the program.") + input("Press any key to exit...") exit() # Getting the playback language @@ -85,49 +83,60 @@ def get_pdf_file_and_convert(pdf_files): if language_choice in ['1', '2', '3', '4']: break print('Incorrect choice. Try again.') + language_map = {'1': 'en', '2': 'pl', '3': 'uk'} language = language_map[language_choice] if language_choice != '4' else None + try: + # Open PDF file + pdf_file = open(pdf_file_to_convert, 'rb') + pdf_reader = PyPDF2.PdfReader(pdf_file) - # Open PDF file - pdf_file = open(pdf_file_to_convert, 'rb') - pdf_reader = PyPDF2.PdfReader(pdf_file) - - # Initialize the text object - text = '' - - # Iterate through all the pages of the PDF file and get the text - for page_num in range(len(pdf_reader.pages)): - page = pdf_reader.pages[page_num].extract_text() - text += page.strip().replace("\n", " ").replace(" ' ", "'") - - # Translate text if needed - if language: - translator = Translator() - translation_text = '' - for i in range(0, len(text), 5000): - text_chunk = text[i:i+5000] - translation = translator.translate(text_chunk, dest=language) - translation_text += translation.text - text = translation_text - # Recognize text and get language code - elif language == None: - translator = Translator() - text_chunk = text[:5000] - detected = translator.detect(text_chunk) - lang_code = detected.lang - language = lang_code + # Initialize the text object + text = '' - # Synthesizing speech and saving the audio file - print("The file is being written. Waitin...") - tts = gTTS(text=text, lang=language, slow=False) - - mp3_file_name = os.path.splitext(os.path.basename(pdf_file_to_convert))[0] + '.mp3' - mp3_file_path = os.path.join('mp3', mp3_file_name) + # Iterate through all the pages of the PDF file and get the text + for page_num in range(len(pdf_reader.pages)): + page = pdf_reader.pages[page_num].extract_text() + if page: + text += page.strip().replace("\n", " ").replace(" ' ", "'") + + # Translate text if needed + if language: + translator = Translator() + translation_text = '' + for i in range(0, len(text), 5000): + text_chunk = text[i:i+5000] + translation = translator.translate(text_chunk, dest=language) + translation_text += translation.text + text = translation_text + elif language == None: + translator = Translator() + text_chunk = text[:5000] + detected = translator.detect(text_chunk) + if detected: + language = detected.lang + else: + # Obsłuż sytuację, gdy język nie został wykryty + print("Language could not be detected. I use the default English language.") + language = 'en' + + # Synthesizing speech and saving the audio file + print("The file is being written. Wait...") + tts = gTTS(text=text, lang=language, slow=False) + + mp3_file_name = os.path.splitext(os.path.basename(pdf_file_to_convert))[0] + '.mp3' + mp3_file_path = os.path.join('mp3', mp3_file_name) + + tts.save(mp3_file_path) + print(f'{mp3_file_name} file successfully created in mp3 folder.\n') + input("Press any key to continue...") # Pause for the user to review information + + except Exception as e: + print(f"An error occurred: {e}") + input("Press any key to exit...") # Wait before closing in case of error + exit() - tts.save(mp3_file_path) - print(f'{mp3_file_name} file successfully created in mp3 folder.\n') - # Selection result while True: try: @@ -136,6 +145,7 @@ def get_pdf_file_and_convert(pdf_files): break elif choice_result == 2: print("End of the program.") + input("Press any key to exit...") exit() except ValueError: print("Invalid number entered. Try again.") @@ -144,4 +154,4 @@ def get_pdf_file_and_convert(pdf_files): if __name__ == '__main__': - PdfConverter() \ No newline at end of file + PdfConverter()