feat(libs): integrate Ollama and Whisper clients with config models
- Add `AppConfig` and `PresetConfig` models using Pydantic for config validation - Refactor `read_configurations` to return an `AppConfig` instance - Implement `OllamaClient` for chat-based server interaction - Implement `WhisperClient` for transcription via Whisper CLI - Migrate notification utilities to `libs` directory - Update tray application to use new clients and config structure - Simplify Whisper and Ollama integration logic in `WhisperWorker` Signed-off-by: Max P. <Mail@MPassarello.de>
This commit is contained in:
@@ -2,9 +2,11 @@ import json
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from pyvtt.models.config import AppConfig
|
||||
|
||||
DEFAULT_CONFIG_PATH = Path.home() / ".pyvtt.json"
|
||||
|
||||
def read_configurations():
|
||||
def read_configurations() -> AppConfig:
|
||||
"""
|
||||
Reads the configuration settings from a JSON file named 'pyvtt.settings.json'
|
||||
located in the same directory as the script.
|
||||
@@ -18,7 +20,8 @@ def read_configurations():
|
||||
"""
|
||||
try:
|
||||
with open(DEFAULT_CONFIG_PATH) as f:
|
||||
return json.load(f)
|
||||
raw_config = json.load(f)
|
||||
return AppConfig(**raw_config)
|
||||
except Exception as e:
|
||||
print(f"Error reading configurations: {e}")
|
||||
raise Exception(f"Error reading configurations: {e}")
|
67
src/pyvtt/libs/ollama.py
Normal file
67
src/pyvtt/libs/ollama.py
Normal file
@@ -0,0 +1,67 @@
|
||||
import requests
|
||||
from typing import Union, List, Optional
|
||||
|
||||
from pyvtt.libs.notify import notify
|
||||
from pyvtt.models.config import AppConfig, PresetConfig
|
||||
|
||||
|
||||
class OllamaClient:
|
||||
def __init__(self, config: AppConfig):
|
||||
"""
|
||||
Initialisiert den Ollama-Client mit der Basis-Konfiguration aus der globalen App-Konfiguration.
|
||||
|
||||
:param config: AppConfig-Instanz mit Host und Port für den Ollama-Server.
|
||||
"""
|
||||
self.base_url = config.ollama_url
|
||||
self.port = config.ollama_port
|
||||
|
||||
def send_chat(
|
||||
self,
|
||||
user_message: str,
|
||||
config: PresetConfig,
|
||||
) -> str:
|
||||
"""
|
||||
Sendet eine Chat-Anfrage an den Ollama-Server basierend auf der spezifischen Preset-Konfiguration.
|
||||
|
||||
:param user_message: Der vom Nutzer erzeugte Eingabetext (z. B. Transkript).
|
||||
:param config: PresetConfig-Instanz mit modell-, prompt- und kontextbezogenen Parametern.
|
||||
:return: Der von Ollama zurückgegebene, formatierte Antworttext, die user_message
|
||||
unverändert zurückgibt, wenn Ollama deaktiviert ist oder none bei einem Fehler.
|
||||
"""
|
||||
if config.ollama and config.ollama.lower() == "disable":
|
||||
print("[OllamaClient] Ollama ist im Preset deaktiviert.")
|
||||
print("[OllamaClient] Gebe die Eingabe unverändert zurück.")
|
||||
return user_message
|
||||
|
||||
# Prompt als String aufbereiten – Liste wird zu Zeilen verbunden
|
||||
if isinstance(config.ollama_prompt, list):
|
||||
prompt_str = "\n".join(config.ollama_prompt)
|
||||
else:
|
||||
prompt_str = config.ollama_prompt
|
||||
|
||||
# Payload für die API-Anfrage vorbereiten
|
||||
payload = {
|
||||
"model": config.ollama_model,
|
||||
"messages": [
|
||||
{"role": "system", "content": prompt_str},
|
||||
{"role": "user", "content": user_message}
|
||||
],
|
||||
"options": {
|
||||
"num_ctx": config.ollama_context,
|
||||
} if config.ollama_context else {},
|
||||
"stream": False
|
||||
}
|
||||
|
||||
endpoint = f"{self.base_url}:{self.port}/api/chat"
|
||||
|
||||
# Anfrage an Ollama senden und Antwort extrahieren
|
||||
try:
|
||||
response = requests.post(endpoint, json=payload)
|
||||
response.raise_for_status()
|
||||
json_response = response.json()
|
||||
content = json_response.get("message", {}).get("content", "").strip()
|
||||
return "\n".join(line.strip() for line in content.splitlines())
|
||||
except requests.exceptions.RequestException as e:
|
||||
print(f"[OllamaClient] HTTP-Fehler: {e}")
|
||||
notify("Fehler", "Ein Fehler bei der Kommunikation mit 'Ollama' ist aufgetreten!")
|
||||
return ""
|
50
src/pyvtt/libs/whisper.py
Normal file
50
src/pyvtt/libs/whisper.py
Normal file
@@ -0,0 +1,50 @@
|
||||
import subprocess
|
||||
from typing import Optional
|
||||
from pathlib import Path
|
||||
|
||||
from pyvtt.libs.notify import notify
|
||||
from pyvtt.models.config import AppConfig, PresetConfig
|
||||
|
||||
|
||||
class WhisperClient:
|
||||
def __init__(self, config: AppConfig):
|
||||
"""
|
||||
Initialisiert den Whisper-Client mit der globalen Anwendungskonfiguration.
|
||||
|
||||
:param config: AppConfig-Instanz mit Pfaden zur Whisper-Binary, Audio- und Ausgabedatei.
|
||||
"""
|
||||
self.whisper_path = config.whisper_path
|
||||
self.audio_file = config.audio_file
|
||||
self.output_file = config.output_file
|
||||
|
||||
def transcribe(self, config: PresetConfig) -> str:
|
||||
"""
|
||||
Führt Whisper (CLI) zur Transkription der Audiodatei aus und gibt das Transkript zurück.
|
||||
|
||||
:param config: PresetConfig-Instanz mit Whisper-Modell und Spracheinstellungen.
|
||||
:return: Das rohe Transkript als String – oder None bei Fehlern.
|
||||
"""
|
||||
output_base = self.output_file.replace(".txt", "")
|
||||
whisper_cmd = [
|
||||
self.whisper_path,
|
||||
"-m", config.whisper_model,
|
||||
"-f", self.audio_file,
|
||||
"-l", config.language,
|
||||
"-otxt",
|
||||
"-of", output_base
|
||||
]
|
||||
|
||||
try:
|
||||
subprocess.run(whisper_cmd, check=True)
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(f"[WhisperClient] Whisper-Ausführungsfehler: {e}")
|
||||
notify("Fehler", "Ein Fehler mit 'Whisper' ist aufgetreten!")
|
||||
return ""
|
||||
|
||||
try:
|
||||
with open(self.output_file, "r", encoding="utf-8") as f:
|
||||
return "\n".join(line.strip() for line in f.readlines())
|
||||
except Exception as e:
|
||||
print(f"[WhisperClient] Fehler beim Einlesen der Ausgabedatei: {e}")
|
||||
notify("Fehler", "Ein Fehler beim Lesen der Whisper-Ausgabe ist aufgetreten!")
|
||||
return ""
|
25
src/pyvtt/models/config.py
Normal file
25
src/pyvtt/models/config.py
Normal file
@@ -0,0 +1,25 @@
|
||||
from typing import List, Optional, Union
|
||||
from pydantic import BaseModel, HttpUrl, Field
|
||||
|
||||
|
||||
class PresetConfig(BaseModel):
|
||||
name: str
|
||||
language: str
|
||||
whisper_model: str
|
||||
ollama: Optional[str] = None
|
||||
ollama_model: Optional[str] = None
|
||||
ollama_context: Optional[int] = None
|
||||
ollama_prompt: Optional[Union[str, List[str]]] = None
|
||||
mode: Optional[str] = None
|
||||
journal_name: Optional[str] = None
|
||||
|
||||
|
||||
class AppConfig(BaseModel):
|
||||
audio_file: str
|
||||
output_file: str
|
||||
whisper_path: str
|
||||
socket_path: str
|
||||
ollama_url: str
|
||||
ollama_port: int
|
||||
journal_path: str
|
||||
presets: List[PresetConfig]
|
@@ -10,10 +10,12 @@ from PyQt5.QtWidgets import QApplication, QSystemTrayIcon, QMenu, QAction
|
||||
from PyQt5.QtGui import QIcon
|
||||
from PyQt5.QtCore import QThread, pyqtSignal
|
||||
from pyvtt.configuration import read_configurations
|
||||
from pyvtt.notify import notify, play_sound
|
||||
from pyvtt.libs.notify import notify, play_sound
|
||||
from pyvtt.libs.ollama import OllamaClient
|
||||
from pyvtt.libs.whisper import WhisperClient
|
||||
|
||||
CONFIGURATION = read_configurations()
|
||||
CURRENT_PRESET = CONFIGURATION["presets"][0] # Default to first preset
|
||||
CONFIG = read_configurations()
|
||||
CURRENT_PRESET = CONFIG.presets[0] # Default to first preset
|
||||
|
||||
class WhisperWorker(QThread):
|
||||
"""
|
||||
@@ -31,82 +33,28 @@ class WhisperWorker(QThread):
|
||||
"""
|
||||
finished = pyqtSignal(str)
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.whisper = WhisperClient(CONFIG)
|
||||
self.ollama = OllamaClient(CONFIG)
|
||||
|
||||
def run(self):
|
||||
CURENT_CONFIGURATION_LOCALE = CONFIGURATION
|
||||
CURENT_CONFIG_LOCALE = CONFIG
|
||||
CURRENT_PRESET_LOCALE = CURRENT_PRESET
|
||||
|
||||
try:
|
||||
# Whisper ausführen
|
||||
whisper_cmd = [
|
||||
CURENT_CONFIGURATION_LOCALE["whisper_path"],
|
||||
"-m", CURRENT_PRESET_LOCALE["whisper_model"],
|
||||
"-f", CURENT_CONFIGURATION_LOCALE["audio_file"],
|
||||
"-l", CURRENT_PRESET_LOCALE["language"],
|
||||
"-otxt",
|
||||
"-of", CURENT_CONFIGURATION_LOCALE["output_file"].replace(".txt", "")
|
||||
]
|
||||
try:
|
||||
subprocess.run(whisper_cmd, check=True)
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(f"Whisper Fehler: {e}")
|
||||
notify("Fehler", "Ein Fehler mit 'Whisper' ist aufgetreten!")
|
||||
return
|
||||
|
||||
try:
|
||||
with open(CURENT_CONFIGURATION_LOCALE["output_file"], "r") as f:
|
||||
raw_result = "\n".join(line.strip() for line in f.readlines())
|
||||
except Exception as e:
|
||||
print(f"Datei Fehler: {e}")
|
||||
notify("Fehler", "Ein Fehler beim Lesen der Whisper-Ausgabe ist aufgetreten!")
|
||||
return
|
||||
|
||||
print("Whisper Transkript erhalten.")
|
||||
|
||||
# --- An Ollama schicken ---
|
||||
if CURRENT_PRESET_LOCALE["ollama"] != "disable":
|
||||
if isinstance(CURRENT_PRESET_LOCALE["ollama_prompt"], list):
|
||||
prompt = "\n".join(CURRENT_PRESET_LOCALE["ollama_prompt"])
|
||||
else:
|
||||
prompt = CURRENT_PRESET_LOCALE["ollama_prompt"]
|
||||
|
||||
payload = {
|
||||
"model": CURRENT_PRESET_LOCALE["ollama_model"],
|
||||
"messages": [
|
||||
{"role": "system", "content": prompt},
|
||||
{"role": "user", "content": raw_result}
|
||||
],
|
||||
"options": {
|
||||
"num_ctx": CURRENT_PRESET_LOCALE["ollama_context"]
|
||||
},
|
||||
"stream": False
|
||||
}
|
||||
ollama_endpoint = f"{CURENT_CONFIGURATION_LOCALE['ollama_url']}:{CURENT_CONFIGURATION_LOCALE['ollama_port']}/api/chat"
|
||||
response = requests.post(ollama_endpoint, json=payload)
|
||||
|
||||
try:
|
||||
response.raise_for_status()
|
||||
except requests.exceptions.HTTPError as e:
|
||||
print(f"HTTP Fehler: {e}")
|
||||
notify("Fehler", "Ein Fehler bei der Kommunikation mit 'Ollama' ist aufgetreten!")
|
||||
return
|
||||
|
||||
json_response = response.json()
|
||||
formatted_result = json_response.get("message", {}).get("content", "").strip()
|
||||
formatted_result = "\n".join(line.strip() for line in formatted_result.splitlines())
|
||||
print("Ollama Antwort erhalten.")
|
||||
else:
|
||||
formatted_result = raw_result
|
||||
print("Kein Ollama Prompt angegeben, nur Whisper Ergebnis verwendet.")
|
||||
raw_result = self.whisper.transcribe(CURRENT_PRESET_LOCALE)
|
||||
formatted_result = self.ollama.send_chat(raw_result, CURRENT_PRESET_LOCALE)
|
||||
|
||||
# Ergebnis ins Clipboard kopieren
|
||||
if CURRENT_PRESET_LOCALE.get("mode") == "journal":
|
||||
if CURRENT_PRESET_LOCALE.mode == "journal":
|
||||
today = datetime.date.today().strftime("%Y.%m.%d")
|
||||
journal_path = os.path.join(CURENT_CONFIGURATION_LOCALE["journal_path"], f"{today} - {CURRENT_PRESET_LOCALE['journal_name']}.md")
|
||||
journal_path = os.path.join(CURENT_CONFIG_LOCALE.journal_path, f"{today} - {CURRENT_PRESET_LOCALE.journal_name}.md")
|
||||
now = datetime.datetime.now().strftime("%H:%M:%S")
|
||||
if not os.path.exists(journal_path):
|
||||
try:
|
||||
with open(journal_path, "w") as f:
|
||||
f.write(f"# {CURRENT_PRESET_LOCALE['journal_name']} - {today}\n\n")
|
||||
f.write(f"# {CURRENT_PRESET_LOCALE.journal_name} - {today}\n\n")
|
||||
except Exception as e:
|
||||
print(f"Journal Erstellungsfehler: {e}")
|
||||
notify("Fehler", "Ein Fehler beim Erstellen des Journals ist aufgetreten!")
|
||||
@@ -157,11 +105,11 @@ class SocketListener(threading.Thread):
|
||||
def __init__(self, tray_app):
|
||||
super().__init__(daemon=True)
|
||||
self.tray_app = tray_app
|
||||
if os.path.exists(CONFIGURATION["socket_path"]):
|
||||
os.remove(CONFIGURATION["socket_path"])
|
||||
if os.path.exists(CONFIG.socket_path):
|
||||
os.remove(CONFIG.socket_path)
|
||||
self.sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
|
||||
self.sock.bind(CONFIGURATION["socket_path"])
|
||||
os.chmod(CONFIGURATION["socket_path"], 0o666)
|
||||
self.sock.bind(CONFIG.socket_path)
|
||||
os.chmod(CONFIG.socket_path, 0o666)
|
||||
self.sock.listen(1)
|
||||
|
||||
def run(self):
|
||||
@@ -174,8 +122,8 @@ class SocketListener(threading.Thread):
|
||||
if len(cmd) > 1:
|
||||
data = cmd[0]
|
||||
preset = cmd[1]
|
||||
if preset in [p["name"] for p in CONFIGURATION["presets"]]:
|
||||
self.tray_app.set_preset([p["name"] for p in CONFIGURATION["presets"]].index(preset))
|
||||
if preset in [p.name for p in CONFIG.presets]:
|
||||
self.tray_app.set_preset([p.name for p in CONFIG.presets].index(preset))
|
||||
else:
|
||||
data = cmd[0]
|
||||
if data == "toggle":
|
||||
@@ -224,8 +172,8 @@ class TrayApp:
|
||||
# Preset Menü
|
||||
self.preset_actions = []
|
||||
self.preset_group = QMenu("Presets")
|
||||
for i, preset in enumerate(CONFIGURATION["presets"]):
|
||||
action = QAction(preset["name"], self.menu)
|
||||
for i, preset in enumerate(CONFIG.presets):
|
||||
action = QAction(preset.name, self.menu)
|
||||
action.setCheckable(True)
|
||||
if i == 0:
|
||||
action.setChecked(True)
|
||||
@@ -255,8 +203,9 @@ class TrayApp:
|
||||
|
||||
def set_preset(self, index):
|
||||
global CURRENT_PRESET
|
||||
print(f"Preset gewechselt: {CONFIGURATION['presets'][index]['name']}")
|
||||
CURRENT_PRESET = CONFIGURATION["presets"][index]
|
||||
selected_preset = CONFIG.presets[index]
|
||||
print(f"Preset gewechselt: {selected_preset.name}")
|
||||
CURRENT_PRESET = selected_preset
|
||||
# Nur einer darf gecheckt sein
|
||||
for i, action in enumerate(self.preset_actions):
|
||||
action.setChecked(i == index)
|
||||
@@ -266,7 +215,7 @@ class TrayApp:
|
||||
print("Starte Aufnahme...")
|
||||
self.recording_process = subprocess.Popen([
|
||||
"ffmpeg", "-f", "pulse", "-i", "default", "-ar", "16000",
|
||||
"-ac", "1", CONFIGURATION["audio_file"], "-y", "-loglevel", "quiet"
|
||||
"-ac", "1", CONFIG.audio_file, "-y", "-loglevel", "quiet"
|
||||
])
|
||||
notify("Aufnahme", "Aufnahme gestartet!")
|
||||
|
||||
@@ -294,15 +243,15 @@ class TrayApp:
|
||||
print(f"Fertig:\n{text}")
|
||||
|
||||
def reload_configurations(self):
|
||||
global CONFIGURATION, CURRENT_PRESET
|
||||
global CONFIG, CURRENT_PRESET
|
||||
print("Lade Einstellungen neu...")
|
||||
CONFIGURATION = read_configurations()
|
||||
CURRENT_PRESET = CONFIGURATION["presets"][0] # Default to first preset
|
||||
CONFIG = read_configurations()
|
||||
CURRENT_PRESET = CONFIG.presets[0] # Default to first preset
|
||||
# Update preset menu
|
||||
self.preset_group.clear()
|
||||
self.preset_actions = []
|
||||
for i, preset in enumerate(CONFIGURATION["presets"]):
|
||||
action = QAction(preset["name"], self.menu)
|
||||
for i, preset in enumerate(CONFIG.presets):
|
||||
action = QAction(preset.name, self.menu)
|
||||
action.setCheckable(True)
|
||||
if i == 0:
|
||||
action.setChecked(True)
|
||||
@@ -312,8 +261,8 @@ class TrayApp:
|
||||
print("Einstellungen erfolgreich neu geladen.")
|
||||
|
||||
def cleanup(self):
|
||||
if os.path.exists(CONFIGURATION["socket_path"]):
|
||||
os.remove(CONFIGURATION["socket_path"])
|
||||
if os.path.exists(CONFIG.socket_path):
|
||||
os.remove(CONFIG.socket_path)
|
||||
print("Socket sauber entfernt.")
|
||||
|
||||
def run(self):
|
||||
|
Reference in New Issue
Block a user