feat(libs): integrate Ollama and Whisper clients with config models

- Add `AppConfig` and `PresetConfig` models using Pydantic for config validation
- Refactor `read_configurations` to return an `AppConfig` instance
- Implement `OllamaClient` for chat-based server interaction
- Implement `WhisperClient` for transcription via Whisper CLI
- Migrate notification utilities to `libs` directory
- Update tray application to use new clients and config structure
- Simplify Whisper and Ollama integration logic in `WhisperWorker`

Signed-off-by: Max P. <Mail@MPassarello.de>
This commit is contained in:
2025-05-05 12:00:33 +02:00
parent 5688769437
commit 58b9cb586c
6 changed files with 182 additions and 88 deletions

View File

@@ -2,9 +2,11 @@ import json
import os
from pathlib import Path
from pyvtt.models.config import AppConfig
DEFAULT_CONFIG_PATH = Path.home() / ".pyvtt.json"
def read_configurations():
def read_configurations() -> AppConfig:
"""
Reads the configuration settings from a JSON file named 'pyvtt.settings.json'
located in the same directory as the script.
@@ -18,7 +20,8 @@ def read_configurations():
"""
try:
with open(DEFAULT_CONFIG_PATH) as f:
return json.load(f)
raw_config = json.load(f)
return AppConfig(**raw_config)
except Exception as e:
print(f"Error reading configurations: {e}")
raise Exception(f"Error reading configurations: {e}")

67
src/pyvtt/libs/ollama.py Normal file
View File

@@ -0,0 +1,67 @@
import requests
from typing import Union, List, Optional
from pyvtt.libs.notify import notify
from pyvtt.models.config import AppConfig, PresetConfig
class OllamaClient:
def __init__(self, config: AppConfig):
"""
Initialisiert den Ollama-Client mit der Basis-Konfiguration aus der globalen App-Konfiguration.
:param config: AppConfig-Instanz mit Host und Port für den Ollama-Server.
"""
self.base_url = config.ollama_url
self.port = config.ollama_port
def send_chat(
self,
user_message: str,
config: PresetConfig,
) -> str:
"""
Sendet eine Chat-Anfrage an den Ollama-Server basierend auf der spezifischen Preset-Konfiguration.
:param user_message: Der vom Nutzer erzeugte Eingabetext (z. B. Transkript).
:param config: PresetConfig-Instanz mit modell-, prompt- und kontextbezogenen Parametern.
:return: Der von Ollama zurückgegebene, formatierte Antworttext, die user_message
unverändert zurückgibt, wenn Ollama deaktiviert ist oder none bei einem Fehler.
"""
if config.ollama and config.ollama.lower() == "disable":
print("[OllamaClient] Ollama ist im Preset deaktiviert.")
print("[OllamaClient] Gebe die Eingabe unverändert zurück.")
return user_message
# Prompt als String aufbereiten – Liste wird zu Zeilen verbunden
if isinstance(config.ollama_prompt, list):
prompt_str = "\n".join(config.ollama_prompt)
else:
prompt_str = config.ollama_prompt
# Payload für die API-Anfrage vorbereiten
payload = {
"model": config.ollama_model,
"messages": [
{"role": "system", "content": prompt_str},
{"role": "user", "content": user_message}
],
"options": {
"num_ctx": config.ollama_context,
} if config.ollama_context else {},
"stream": False
}
endpoint = f"{self.base_url}:{self.port}/api/chat"
# Anfrage an Ollama senden und Antwort extrahieren
try:
response = requests.post(endpoint, json=payload)
response.raise_for_status()
json_response = response.json()
content = json_response.get("message", {}).get("content", "").strip()
return "\n".join(line.strip() for line in content.splitlines())
except requests.exceptions.RequestException as e:
print(f"[OllamaClient] HTTP-Fehler: {e}")
notify("Fehler", "Ein Fehler bei der Kommunikation mit 'Ollama' ist aufgetreten!")
return ""

50
src/pyvtt/libs/whisper.py Normal file
View File

@@ -0,0 +1,50 @@
import subprocess
from typing import Optional
from pathlib import Path
from pyvtt.libs.notify import notify
from pyvtt.models.config import AppConfig, PresetConfig
class WhisperClient:
def __init__(self, config: AppConfig):
"""
Initialisiert den Whisper-Client mit der globalen Anwendungskonfiguration.
:param config: AppConfig-Instanz mit Pfaden zur Whisper-Binary, Audio- und Ausgabedatei.
"""
self.whisper_path = config.whisper_path
self.audio_file = config.audio_file
self.output_file = config.output_file
def transcribe(self, config: PresetConfig) -> str:
"""
Führt Whisper (CLI) zur Transkription der Audiodatei aus und gibt das Transkript zurück.
:param config: PresetConfig-Instanz mit Whisper-Modell und Spracheinstellungen.
:return: Das rohe Transkript als String – oder None bei Fehlern.
"""
output_base = self.output_file.replace(".txt", "")
whisper_cmd = [
self.whisper_path,
"-m", config.whisper_model,
"-f", self.audio_file,
"-l", config.language,
"-otxt",
"-of", output_base
]
try:
subprocess.run(whisper_cmd, check=True)
except subprocess.CalledProcessError as e:
print(f"[WhisperClient] Whisper-Ausführungsfehler: {e}")
notify("Fehler", "Ein Fehler mit 'Whisper' ist aufgetreten!")
return ""
try:
with open(self.output_file, "r", encoding="utf-8") as f:
return "\n".join(line.strip() for line in f.readlines())
except Exception as e:
print(f"[WhisperClient] Fehler beim Einlesen der Ausgabedatei: {e}")
notify("Fehler", "Ein Fehler beim Lesen der Whisper-Ausgabe ist aufgetreten!")
return ""

View File

@@ -0,0 +1,25 @@
from typing import List, Optional, Union
from pydantic import BaseModel, HttpUrl, Field
class PresetConfig(BaseModel):
name: str
language: str
whisper_model: str
ollama: Optional[str] = None
ollama_model: Optional[str] = None
ollama_context: Optional[int] = None
ollama_prompt: Optional[Union[str, List[str]]] = None
mode: Optional[str] = None
journal_name: Optional[str] = None
class AppConfig(BaseModel):
audio_file: str
output_file: str
whisper_path: str
socket_path: str
ollama_url: str
ollama_port: int
journal_path: str
presets: List[PresetConfig]

View File

@@ -10,10 +10,12 @@ from PyQt5.QtWidgets import QApplication, QSystemTrayIcon, QMenu, QAction
from PyQt5.QtGui import QIcon
from PyQt5.QtCore import QThread, pyqtSignal
from pyvtt.configuration import read_configurations
from pyvtt.notify import notify, play_sound
from pyvtt.libs.notify import notify, play_sound
from pyvtt.libs.ollama import OllamaClient
from pyvtt.libs.whisper import WhisperClient
CONFIGURATION = read_configurations()
CURRENT_PRESET = CONFIGURATION["presets"][0] # Default to first preset
CONFIG = read_configurations()
CURRENT_PRESET = CONFIG.presets[0] # Default to first preset
class WhisperWorker(QThread):
"""
@@ -31,82 +33,28 @@ class WhisperWorker(QThread):
"""
finished = pyqtSignal(str)
def __init__(self):
super().__init__()
self.whisper = WhisperClient(CONFIG)
self.ollama = OllamaClient(CONFIG)
def run(self):
CURENT_CONFIGURATION_LOCALE = CONFIGURATION
CURENT_CONFIG_LOCALE = CONFIG
CURRENT_PRESET_LOCALE = CURRENT_PRESET
try:
# Whisper ausführen
whisper_cmd = [
CURENT_CONFIGURATION_LOCALE["whisper_path"],
"-m", CURRENT_PRESET_LOCALE["whisper_model"],
"-f", CURENT_CONFIGURATION_LOCALE["audio_file"],
"-l", CURRENT_PRESET_LOCALE["language"],
"-otxt",
"-of", CURENT_CONFIGURATION_LOCALE["output_file"].replace(".txt", "")
]
try:
subprocess.run(whisper_cmd, check=True)
except subprocess.CalledProcessError as e:
print(f"Whisper Fehler: {e}")
notify("Fehler", "Ein Fehler mit 'Whisper' ist aufgetreten!")
return
try:
with open(CURENT_CONFIGURATION_LOCALE["output_file"], "r") as f:
raw_result = "\n".join(line.strip() for line in f.readlines())
except Exception as e:
print(f"Datei Fehler: {e}")
notify("Fehler", "Ein Fehler beim Lesen der Whisper-Ausgabe ist aufgetreten!")
return
print("Whisper Transkript erhalten.")
# --- An Ollama schicken ---
if CURRENT_PRESET_LOCALE["ollama"] != "disable":
if isinstance(CURRENT_PRESET_LOCALE["ollama_prompt"], list):
prompt = "\n".join(CURRENT_PRESET_LOCALE["ollama_prompt"])
else:
prompt = CURRENT_PRESET_LOCALE["ollama_prompt"]
payload = {
"model": CURRENT_PRESET_LOCALE["ollama_model"],
"messages": [
{"role": "system", "content": prompt},
{"role": "user", "content": raw_result}
],
"options": {
"num_ctx": CURRENT_PRESET_LOCALE["ollama_context"]
},
"stream": False
}
ollama_endpoint = f"{CURENT_CONFIGURATION_LOCALE['ollama_url']}:{CURENT_CONFIGURATION_LOCALE['ollama_port']}/api/chat"
response = requests.post(ollama_endpoint, json=payload)
try:
response.raise_for_status()
except requests.exceptions.HTTPError as e:
print(f"HTTP Fehler: {e}")
notify("Fehler", "Ein Fehler bei der Kommunikation mit 'Ollama' ist aufgetreten!")
return
json_response = response.json()
formatted_result = json_response.get("message", {}).get("content", "").strip()
formatted_result = "\n".join(line.strip() for line in formatted_result.splitlines())
print("Ollama Antwort erhalten.")
else:
formatted_result = raw_result
print("Kein Ollama Prompt angegeben, nur Whisper Ergebnis verwendet.")
raw_result = self.whisper.transcribe(CURRENT_PRESET_LOCALE)
formatted_result = self.ollama.send_chat(raw_result, CURRENT_PRESET_LOCALE)
# Ergebnis ins Clipboard kopieren
if CURRENT_PRESET_LOCALE.get("mode") == "journal":
if CURRENT_PRESET_LOCALE.mode == "journal":
today = datetime.date.today().strftime("%Y.%m.%d")
journal_path = os.path.join(CURENT_CONFIGURATION_LOCALE["journal_path"], f"{today} - {CURRENT_PRESET_LOCALE['journal_name']}.md")
journal_path = os.path.join(CURENT_CONFIG_LOCALE.journal_path, f"{today} - {CURRENT_PRESET_LOCALE.journal_name}.md")
now = datetime.datetime.now().strftime("%H:%M:%S")
if not os.path.exists(journal_path):
try:
with open(journal_path, "w") as f:
f.write(f"# {CURRENT_PRESET_LOCALE['journal_name']} - {today}\n\n")
f.write(f"# {CURRENT_PRESET_LOCALE.journal_name} - {today}\n\n")
except Exception as e:
print(f"Journal Erstellungsfehler: {e}")
notify("Fehler", "Ein Fehler beim Erstellen des Journals ist aufgetreten!")
@@ -157,11 +105,11 @@ class SocketListener(threading.Thread):
def __init__(self, tray_app):
super().__init__(daemon=True)
self.tray_app = tray_app
if os.path.exists(CONFIGURATION["socket_path"]):
os.remove(CONFIGURATION["socket_path"])
if os.path.exists(CONFIG.socket_path):
os.remove(CONFIG.socket_path)
self.sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
self.sock.bind(CONFIGURATION["socket_path"])
os.chmod(CONFIGURATION["socket_path"], 0o666)
self.sock.bind(CONFIG.socket_path)
os.chmod(CONFIG.socket_path, 0o666)
self.sock.listen(1)
def run(self):
@@ -174,8 +122,8 @@ class SocketListener(threading.Thread):
if len(cmd) > 1:
data = cmd[0]
preset = cmd[1]
if preset in [p["name"] for p in CONFIGURATION["presets"]]:
self.tray_app.set_preset([p["name"] for p in CONFIGURATION["presets"]].index(preset))
if preset in [p.name for p in CONFIG.presets]:
self.tray_app.set_preset([p.name for p in CONFIG.presets].index(preset))
else:
data = cmd[0]
if data == "toggle":
@@ -224,8 +172,8 @@ class TrayApp:
# Preset Menü
self.preset_actions = []
self.preset_group = QMenu("Presets")
for i, preset in enumerate(CONFIGURATION["presets"]):
action = QAction(preset["name"], self.menu)
for i, preset in enumerate(CONFIG.presets):
action = QAction(preset.name, self.menu)
action.setCheckable(True)
if i == 0:
action.setChecked(True)
@@ -255,8 +203,9 @@ class TrayApp:
def set_preset(self, index):
global CURRENT_PRESET
print(f"Preset gewechselt: {CONFIGURATION['presets'][index]['name']}")
CURRENT_PRESET = CONFIGURATION["presets"][index]
selected_preset = CONFIG.presets[index]
print(f"Preset gewechselt: {selected_preset.name}")
CURRENT_PRESET = selected_preset
# Nur einer darf gecheckt sein
for i, action in enumerate(self.preset_actions):
action.setChecked(i == index)
@@ -266,7 +215,7 @@ class TrayApp:
print("Starte Aufnahme...")
self.recording_process = subprocess.Popen([
"ffmpeg", "-f", "pulse", "-i", "default", "-ar", "16000",
"-ac", "1", CONFIGURATION["audio_file"], "-y", "-loglevel", "quiet"
"-ac", "1", CONFIG.audio_file, "-y", "-loglevel", "quiet"
])
notify("Aufnahme", "Aufnahme gestartet!")
@@ -294,15 +243,15 @@ class TrayApp:
print(f"Fertig:\n{text}")
def reload_configurations(self):
global CONFIGURATION, CURRENT_PRESET
global CONFIG, CURRENT_PRESET
print("Lade Einstellungen neu...")
CONFIGURATION = read_configurations()
CURRENT_PRESET = CONFIGURATION["presets"][0] # Default to first preset
CONFIG = read_configurations()
CURRENT_PRESET = CONFIG.presets[0] # Default to first preset
# Update preset menu
self.preset_group.clear()
self.preset_actions = []
for i, preset in enumerate(CONFIGURATION["presets"]):
action = QAction(preset["name"], self.menu)
for i, preset in enumerate(CONFIG.presets):
action = QAction(preset.name, self.menu)
action.setCheckable(True)
if i == 0:
action.setChecked(True)
@@ -312,8 +261,8 @@ class TrayApp:
print("Einstellungen erfolgreich neu geladen.")
def cleanup(self):
if os.path.exists(CONFIGURATION["socket_path"]):
os.remove(CONFIGURATION["socket_path"])
if os.path.exists(CONFIG.socket_path):
os.remove(CONFIG.socket_path)
print("Socket sauber entfernt.")
def run(self):