PyVtT/src/pyvtt/libs/whisper.py

import subprocess
from typing import Optional
from pathlib import Path

from pyvtt.libs.notify import notify
from pyvtt.models.config import AppConfig, PresetConfig


class WhisperClient:
    def __init__(self, config: AppConfig):
        """
        Initialisiert den Whisper-Client mit der globalen Anwendungskonfiguration.

        :param config: AppConfig-Instanz mit Pfaden zur Whisper-Binary, Audio- und Ausgabedatei.
        """
        self.whisper_path = config.whisper_path
        self.audio_file = config.audio_file
        self.output_file = config.output_file

    def transcribe(self, config: PresetConfig) -> str:
        """
        Führt Whisper (CLI) zur Transkription der Audiodatei aus und gibt das Transkript zurück.

        :param config: PresetConfig-Instanz mit Whisper-Modell und Spracheinstellungen.
        :return: Das rohe Transkript als String – oder None bei Fehlern.
        """
        output_base = self.output_file.replace(".txt", "")
        whisper_cmd = [
            self.whisper_path,
            "-m", config.whisper_model,
            "-f", self.audio_file,
            "-l", config.language,
            "-otxt",
            "-of", output_base
        ]

        try:
            subprocess.run(whisper_cmd, check=True)
        except subprocess.CalledProcessError as e:
            print(f"[WhisperClient] Whisper-Ausführungsfehler: {e}")
            notify("Fehler", "Ein Fehler mit 'Whisper' ist aufgetreten!")
            return ""

        try:
            with open(self.output_file, "r", encoding="utf-8") as f:
                return "\n".join(line.strip() for line in f.readlines())
        except Exception as e:
            print(f"[WhisperClient] Fehler beim Einlesen der Ausgabedatei: {e}")
            notify("Fehler", "Ein Fehler beim Lesen der Whisper-Ausgabe ist aufgetreten!")
            return ""