import datetime import sys import subprocess import os import threading import socket import json import requests from PyQt5.QtWidgets import QApplication, QSystemTrayIcon, QMenu, QAction from PyQt5.QtGui import QIcon from PyQt5.QtCore import QThread, pyqtSignal from pyvtt.configuration import read_configurations from pyvtt.notify import notify, play_sound CONFIGURATION = read_configurations() CURRENT_PRESET = CONFIGURATION["presets"][0] # Default to first preset class WhisperWorker(QThread): """ A PyQt QThread subclass that handles the transcription of audio files using Whisper and processes the result with Ollama. The final output is copied to the clipboard and a signal is emitted upon completion. Signals: finished (pyqtSignal): Emitted with the formatted transcription result as a string when the process is successfully completed. Methods: run(): Executes the transcription process using Whisper, sends the result to Ollama for further processing, and copies the final output to the clipboard. Handles errors at various stages and provides notifications for failures. """ finished = pyqtSignal(str) def run(self): try: # Whisper ausführen whisper_cmd = [ CONFIGURATION["whisper_path"], "-m", CURRENT_PRESET["whisper_model"], "-f", CONFIGURATION["audio_file"], "-l", CURRENT_PRESET["language"], "-otxt", "-of", CONFIGURATION["output_file"].replace(".txt", "") ] try: subprocess.run(whisper_cmd, check=True) except subprocess.CalledProcessError as e: print(f"Whisper Fehler: {e}") notify("Fehler", "Ein Fehler mit 'Whisper' ist aufgetreten!") return try: with open(CONFIGURATION["output_file"], "r") as f: raw_result = "\n".join(line.strip() for line in f.readlines()) except Exception as e: print(f"Datei Fehler: {e}") notify("Fehler", "Ein Fehler beim Lesen der Whisper-Ausgabe ist aufgetreten!") return print("Whisper Transkript erhalten.") # --- An Ollama schicken --- if CURRENT_PRESET["ollama"] != "disable": if isinstance(CURRENT_PRESET["ollama_prompt"], list): prompt = "\n".join(CURRENT_PRESET["ollama_prompt"]) else: prompt = CURRENT_PRESET["ollama_prompt"] payload = { "model": CURRENT_PRESET["ollama_model"], "messages": [ {"role": "system", "content": prompt}, {"role": "user", "content": raw_result} ], "options": { "num_ctx": CURRENT_PRESET["ollama_context"] }, "stream": False } ollama_endpoint = f"{CONFIGURATION['ollama_url']}:{CONFIGURATION['ollama_port']}/api/chat" response = requests.post(ollama_endpoint, json=payload) try: response.raise_for_status() except requests.exceptions.HTTPError as e: print(f"HTTP Fehler: {e}") notify("Fehler", "Ein Fehler bei der Kommunikation mit 'Ollama' ist aufgetreten!") return json_response = response.json() formatted_result = json_response.get("message", {}).get("content", "").strip() formatted_result = "\n".join(line.strip() for line in formatted_result.splitlines()) print("Ollama Antwort erhalten.") else: formatted_result = raw_result print("Kein Ollama Prompt angegeben, nur Whisper Ergebnis verwendet.") # Ergebnis ins Clipboard kopieren if CURRENT_PRESET["mode"] == "journal": today = datetime.date.today().strftime("%Y.%m.%d") journal_path = os.path.join(CONFIGURATION["journal_path"], f"{today} - {CURRENT_PRESET['journal_name']}.md") now = datetime.datetime.now().strftime("%H:%M:%S") if not os.path.exists(journal_path): try: with open(journal_path, "w") as f: f.write(f"# {CURRENT_PRESET['journal_name']} - {today}\n\n") except Exception as e: print(f"Journal Erstellungsfehler: {e}") notify("Fehler", "Ein Fehler beim Erstellen des Journals ist aufgetreten!") return try: with open(journal_path, "a") as f: f.write(f"## {now} Uhr\n") f.write(f"{formatted_result}\n\n") except Exception as e: print(f"Journal Fehler: {e}") notify("Fehler", "Ein Fehler beim Schreiben ins Journal ist aufgetreten!") return else: try: subprocess.run(["wl-copy"], input=formatted_result.encode(), check=True) except subprocess.CalledProcessError as e: print(f"Clipboard Fehler: {e}") notify("Fehler", "Ein Fehler beim Kopieren des Ergebnisses ist aufgetreten!") return notify("Spracherkennung", "Transkription abgeschlossen!") play_sound() self.finished.emit(formatted_result) except Exception as e: print(f"Fehler: {e}") notify("Fehler", "Ein Fehler ist aufgetreten!") return class SocketListener(threading.Thread): """ A thread-based socket listener for handling inter-process communication via a UNIX domain socket. This class listens for specific commands ("toggle", "start", "stop") sent to the socket and triggers corresponding methods in the provided tray application instance. Attributes: tray_app (object): The tray application instance that provides methods for handling recording actions. sock (socket.socket): The UNIX domain socket used for communication. Methods: run(): Continuously listens for incoming connections on the socket. Processes received commands and invokes the appropriate methods on the tray application instance. """ def __init__(self, tray_app): super().__init__(daemon=True) self.tray_app = tray_app if os.path.exists(CONFIGURATION["socket_path"]): os.remove(CONFIGURATION["socket_path"]) self.sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) self.sock.bind(CONFIGURATION["socket_path"]) os.chmod(CONFIGURATION["socket_path"], 0o666) self.sock.listen(1) def run(self): while True: conn, _ = self.sock.accept() with conn: data = conn.recv(1024).decode().strip() if data == "toggle": self.tray_app.toggle_recording() elif data == "start": self.tray_app.start_recording() elif data == "stop": self.tray_app.stop_recording_if_possible() class TrayApp: """ TrayApp is a system tray application that provides voice-to-text functionality. It allows users to manage presets, start and stop audio recording, and process the recorded audio using a WhisperWorker. Attributes: app (QApplication): The main application instance. tray (QSystemTrayIcon): The system tray icon for the application. menu (QMenu): The context menu for the system tray icon. preset_actions (list): A list of QAction objects representing the preset options. preset_group (QMenu): A submenu for managing presets. quit_action (QAction): An action to quit the application. reload_action (QAction): An action to reload configurations. recording_process (subprocess.Popen or None): The process handling audio recording. socket_listener (SocketListener): A listener for socket communication. worker (WhisperWorker or None): A worker thread for processing audio with Whisper. Methods: __init__(): Initializes the TrayApp instance, setting up the system tray, menu, and socket listener. set_preset(index): Sets the active preset based on the given index and updates the UI. start_recording(): Starts audio recording using ffmpeg. stop_recording_if_possible(): Stops the audio recording process if it is running. toggle_recording(): Toggles between starting and stopping the audio recording. start_whisper_worker(): Starts a WhisperWorker thread to process the recorded audio. show_result(text): Displays the processed text result from the WhisperWorker. reload_configurations(): Reloads configurations from the settings file and updates the UI. cleanup(): Cleans up resources, such as removing the socket file, before the application exits. run(): Starts the application's event loop. """ def __init__(self): self.app = QApplication(sys.argv) self.tray = QSystemTrayIcon(QIcon.fromTheme("audio-input-microphone")) self.menu = QMenu() self.app.aboutToQuit.connect(self.cleanup) # Preset Menü self.preset_actions = [] self.preset_group = QMenu("Presets") for i, preset in enumerate(CONFIGURATION["presets"]): action = QAction(preset["name"], self.menu) action.setCheckable(True) if i == 0: action.setChecked(True) action.triggered.connect(lambda checked, index=i: self.set_preset(index)) self.preset_group.addAction(action) self.preset_actions.append(action) self.menu.addMenu(self.preset_group) # Reload Configurations self.reload_action = QAction("Einstellungen neu laden") self.reload_action.triggered.connect(self.reload_configurations) self.menu.addAction(self.reload_action) # Quit self.quit_action = QAction("Beenden") self.quit_action.triggered.connect(self.app.quit) self.menu.addAction(self.quit_action) self.tray.setContextMenu(self.menu) self.tray.setToolTip("Voice to Text") self.tray.show() self.recording_process = None self.socket_listener = SocketListener(self) self.socket_listener.start() def set_preset(self, index): global CURRENT_PRESET print(f"Preset gewechselt: {CONFIGURATION['presets'][index]['name']}") CURRENT_PRESET = CONFIGURATION["presets"][index] # Nur einer darf gecheckt sein for i, action in enumerate(self.preset_actions): action.setChecked(i == index) def start_recording(self): if self.recording_process is None: print("Starte Aufnahme...") self.recording_process = subprocess.Popen([ "ffmpeg", "-f", "pulse", "-i", "default", "-ar", "16000", "-ac", "1", CONFIGURATION["audio_file"], "-y", "-loglevel", "quiet" ]) notify("Aufnahme", "Aufnahme gestartet!") def stop_recording_if_possible(self): if self.recording_process: print("Stoppe Aufnahme...") self.recording_process.terminate() self.recording_process.wait() self.recording_process = None notify("Aufnahme", "Aufnahme beendet, verarbeite...") self.start_whisper_worker() def toggle_recording(self): if self.recording_process: self.stop_recording_if_possible() else: self.start_recording() def start_whisper_worker(self): self.worker = WhisperWorker() self.worker.finished.connect(self.show_result) self.worker.start() def show_result(self, text): print(f"Fertig:\n{text}") def reload_configurations(self): global CONFIGURATION, CURRENT_PRESET print("Lade Einstellungen neu...") CONFIGURATION = read_configurations() CURRENT_PRESET = CONFIGURATION["presets"][0] # Default to first preset # Update preset menu self.preset_group.clear() self.preset_actions = [] for i, preset in enumerate(CONFIGURATION["presets"]): action = QAction(preset["name"], self.menu) action.setCheckable(True) if i == 0: action.setChecked(True) action.triggered.connect(lambda checked, index=i: self.set_preset(index)) self.preset_group.addAction(action) self.preset_actions.append(action) print("Einstellungen erfolgreich neu geladen.") def cleanup(self): if os.path.exists(CONFIGURATION["socket_path"]): os.remove(CONFIGURATION["socket_path"]) print("Socket sauber entfernt.") def run(self): sys.exit(self.app.exec_()) def main(): TrayApp().run()