feat(libs): integrate Ollama and Whisper clients with config models
- Add `AppConfig` and `PresetConfig` models using Pydantic for config validation - Refactor `read_configurations` to return an `AppConfig` instance - Implement `OllamaClient` for chat-based server interaction - Implement `WhisperClient` for transcription via Whisper CLI - Migrate notification utilities to `libs` directory - Update tray application to use new clients and config structure - Simplify Whisper and Ollama integration logic in `WhisperWorker` Signed-off-by: Max P. <Mail@MPassarello.de>
This commit is contained in:
@@ -2,9 +2,11 @@ import json
|
|||||||
import os
|
import os
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
from pyvtt.models.config import AppConfig
|
||||||
|
|
||||||
DEFAULT_CONFIG_PATH = Path.home() / ".pyvtt.json"
|
DEFAULT_CONFIG_PATH = Path.home() / ".pyvtt.json"
|
||||||
|
|
||||||
def read_configurations():
|
def read_configurations() -> AppConfig:
|
||||||
"""
|
"""
|
||||||
Reads the configuration settings from a JSON file named 'pyvtt.settings.json'
|
Reads the configuration settings from a JSON file named 'pyvtt.settings.json'
|
||||||
located in the same directory as the script.
|
located in the same directory as the script.
|
||||||
@@ -18,7 +20,8 @@ def read_configurations():
|
|||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
with open(DEFAULT_CONFIG_PATH) as f:
|
with open(DEFAULT_CONFIG_PATH) as f:
|
||||||
return json.load(f)
|
raw_config = json.load(f)
|
||||||
|
return AppConfig(**raw_config)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Error reading configurations: {e}")
|
print(f"Error reading configurations: {e}")
|
||||||
raise Exception(f"Error reading configurations: {e}")
|
raise Exception(f"Error reading configurations: {e}")
|
67
src/pyvtt/libs/ollama.py
Normal file
67
src/pyvtt/libs/ollama.py
Normal file
@@ -0,0 +1,67 @@
|
|||||||
|
import requests
|
||||||
|
from typing import Union, List, Optional
|
||||||
|
|
||||||
|
from pyvtt.libs.notify import notify
|
||||||
|
from pyvtt.models.config import AppConfig, PresetConfig
|
||||||
|
|
||||||
|
|
||||||
|
class OllamaClient:
|
||||||
|
def __init__(self, config: AppConfig):
|
||||||
|
"""
|
||||||
|
Initialisiert den Ollama-Client mit der Basis-Konfiguration aus der globalen App-Konfiguration.
|
||||||
|
|
||||||
|
:param config: AppConfig-Instanz mit Host und Port für den Ollama-Server.
|
||||||
|
"""
|
||||||
|
self.base_url = config.ollama_url
|
||||||
|
self.port = config.ollama_port
|
||||||
|
|
||||||
|
def send_chat(
|
||||||
|
self,
|
||||||
|
user_message: str,
|
||||||
|
config: PresetConfig,
|
||||||
|
) -> str:
|
||||||
|
"""
|
||||||
|
Sendet eine Chat-Anfrage an den Ollama-Server basierend auf der spezifischen Preset-Konfiguration.
|
||||||
|
|
||||||
|
:param user_message: Der vom Nutzer erzeugte Eingabetext (z. B. Transkript).
|
||||||
|
:param config: PresetConfig-Instanz mit modell-, prompt- und kontextbezogenen Parametern.
|
||||||
|
:return: Der von Ollama zurückgegebene, formatierte Antworttext, die user_message
|
||||||
|
unverändert zurückgibt, wenn Ollama deaktiviert ist oder none bei einem Fehler.
|
||||||
|
"""
|
||||||
|
if config.ollama and config.ollama.lower() == "disable":
|
||||||
|
print("[OllamaClient] Ollama ist im Preset deaktiviert.")
|
||||||
|
print("[OllamaClient] Gebe die Eingabe unverändert zurück.")
|
||||||
|
return user_message
|
||||||
|
|
||||||
|
# Prompt als String aufbereiten – Liste wird zu Zeilen verbunden
|
||||||
|
if isinstance(config.ollama_prompt, list):
|
||||||
|
prompt_str = "\n".join(config.ollama_prompt)
|
||||||
|
else:
|
||||||
|
prompt_str = config.ollama_prompt
|
||||||
|
|
||||||
|
# Payload für die API-Anfrage vorbereiten
|
||||||
|
payload = {
|
||||||
|
"model": config.ollama_model,
|
||||||
|
"messages": [
|
||||||
|
{"role": "system", "content": prompt_str},
|
||||||
|
{"role": "user", "content": user_message}
|
||||||
|
],
|
||||||
|
"options": {
|
||||||
|
"num_ctx": config.ollama_context,
|
||||||
|
} if config.ollama_context else {},
|
||||||
|
"stream": False
|
||||||
|
}
|
||||||
|
|
||||||
|
endpoint = f"{self.base_url}:{self.port}/api/chat"
|
||||||
|
|
||||||
|
# Anfrage an Ollama senden und Antwort extrahieren
|
||||||
|
try:
|
||||||
|
response = requests.post(endpoint, json=payload)
|
||||||
|
response.raise_for_status()
|
||||||
|
json_response = response.json()
|
||||||
|
content = json_response.get("message", {}).get("content", "").strip()
|
||||||
|
return "\n".join(line.strip() for line in content.splitlines())
|
||||||
|
except requests.exceptions.RequestException as e:
|
||||||
|
print(f"[OllamaClient] HTTP-Fehler: {e}")
|
||||||
|
notify("Fehler", "Ein Fehler bei der Kommunikation mit 'Ollama' ist aufgetreten!")
|
||||||
|
return ""
|
50
src/pyvtt/libs/whisper.py
Normal file
50
src/pyvtt/libs/whisper.py
Normal file
@@ -0,0 +1,50 @@
|
|||||||
|
import subprocess
|
||||||
|
from typing import Optional
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from pyvtt.libs.notify import notify
|
||||||
|
from pyvtt.models.config import AppConfig, PresetConfig
|
||||||
|
|
||||||
|
|
||||||
|
class WhisperClient:
|
||||||
|
def __init__(self, config: AppConfig):
|
||||||
|
"""
|
||||||
|
Initialisiert den Whisper-Client mit der globalen Anwendungskonfiguration.
|
||||||
|
|
||||||
|
:param config: AppConfig-Instanz mit Pfaden zur Whisper-Binary, Audio- und Ausgabedatei.
|
||||||
|
"""
|
||||||
|
self.whisper_path = config.whisper_path
|
||||||
|
self.audio_file = config.audio_file
|
||||||
|
self.output_file = config.output_file
|
||||||
|
|
||||||
|
def transcribe(self, config: PresetConfig) -> str:
|
||||||
|
"""
|
||||||
|
Führt Whisper (CLI) zur Transkription der Audiodatei aus und gibt das Transkript zurück.
|
||||||
|
|
||||||
|
:param config: PresetConfig-Instanz mit Whisper-Modell und Spracheinstellungen.
|
||||||
|
:return: Das rohe Transkript als String – oder None bei Fehlern.
|
||||||
|
"""
|
||||||
|
output_base = self.output_file.replace(".txt", "")
|
||||||
|
whisper_cmd = [
|
||||||
|
self.whisper_path,
|
||||||
|
"-m", config.whisper_model,
|
||||||
|
"-f", self.audio_file,
|
||||||
|
"-l", config.language,
|
||||||
|
"-otxt",
|
||||||
|
"-of", output_base
|
||||||
|
]
|
||||||
|
|
||||||
|
try:
|
||||||
|
subprocess.run(whisper_cmd, check=True)
|
||||||
|
except subprocess.CalledProcessError as e:
|
||||||
|
print(f"[WhisperClient] Whisper-Ausführungsfehler: {e}")
|
||||||
|
notify("Fehler", "Ein Fehler mit 'Whisper' ist aufgetreten!")
|
||||||
|
return ""
|
||||||
|
|
||||||
|
try:
|
||||||
|
with open(self.output_file, "r", encoding="utf-8") as f:
|
||||||
|
return "\n".join(line.strip() for line in f.readlines())
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[WhisperClient] Fehler beim Einlesen der Ausgabedatei: {e}")
|
||||||
|
notify("Fehler", "Ein Fehler beim Lesen der Whisper-Ausgabe ist aufgetreten!")
|
||||||
|
return ""
|
25
src/pyvtt/models/config.py
Normal file
25
src/pyvtt/models/config.py
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
from typing import List, Optional, Union
|
||||||
|
from pydantic import BaseModel, HttpUrl, Field
|
||||||
|
|
||||||
|
|
||||||
|
class PresetConfig(BaseModel):
|
||||||
|
name: str
|
||||||
|
language: str
|
||||||
|
whisper_model: str
|
||||||
|
ollama: Optional[str] = None
|
||||||
|
ollama_model: Optional[str] = None
|
||||||
|
ollama_context: Optional[int] = None
|
||||||
|
ollama_prompt: Optional[Union[str, List[str]]] = None
|
||||||
|
mode: Optional[str] = None
|
||||||
|
journal_name: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
|
class AppConfig(BaseModel):
|
||||||
|
audio_file: str
|
||||||
|
output_file: str
|
||||||
|
whisper_path: str
|
||||||
|
socket_path: str
|
||||||
|
ollama_url: str
|
||||||
|
ollama_port: int
|
||||||
|
journal_path: str
|
||||||
|
presets: List[PresetConfig]
|
@@ -10,10 +10,12 @@ from PyQt5.QtWidgets import QApplication, QSystemTrayIcon, QMenu, QAction
|
|||||||
from PyQt5.QtGui import QIcon
|
from PyQt5.QtGui import QIcon
|
||||||
from PyQt5.QtCore import QThread, pyqtSignal
|
from PyQt5.QtCore import QThread, pyqtSignal
|
||||||
from pyvtt.configuration import read_configurations
|
from pyvtt.configuration import read_configurations
|
||||||
from pyvtt.notify import notify, play_sound
|
from pyvtt.libs.notify import notify, play_sound
|
||||||
|
from pyvtt.libs.ollama import OllamaClient
|
||||||
|
from pyvtt.libs.whisper import WhisperClient
|
||||||
|
|
||||||
CONFIGURATION = read_configurations()
|
CONFIG = read_configurations()
|
||||||
CURRENT_PRESET = CONFIGURATION["presets"][0] # Default to first preset
|
CURRENT_PRESET = CONFIG.presets[0] # Default to first preset
|
||||||
|
|
||||||
class WhisperWorker(QThread):
|
class WhisperWorker(QThread):
|
||||||
"""
|
"""
|
||||||
@@ -31,82 +33,28 @@ class WhisperWorker(QThread):
|
|||||||
"""
|
"""
|
||||||
finished = pyqtSignal(str)
|
finished = pyqtSignal(str)
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__()
|
||||||
|
self.whisper = WhisperClient(CONFIG)
|
||||||
|
self.ollama = OllamaClient(CONFIG)
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
CURENT_CONFIGURATION_LOCALE = CONFIGURATION
|
CURENT_CONFIG_LOCALE = CONFIG
|
||||||
CURRENT_PRESET_LOCALE = CURRENT_PRESET
|
CURRENT_PRESET_LOCALE = CURRENT_PRESET
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Whisper ausführen
|
raw_result = self.whisper.transcribe(CURRENT_PRESET_LOCALE)
|
||||||
whisper_cmd = [
|
formatted_result = self.ollama.send_chat(raw_result, CURRENT_PRESET_LOCALE)
|
||||||
CURENT_CONFIGURATION_LOCALE["whisper_path"],
|
|
||||||
"-m", CURRENT_PRESET_LOCALE["whisper_model"],
|
|
||||||
"-f", CURENT_CONFIGURATION_LOCALE["audio_file"],
|
|
||||||
"-l", CURRENT_PRESET_LOCALE["language"],
|
|
||||||
"-otxt",
|
|
||||||
"-of", CURENT_CONFIGURATION_LOCALE["output_file"].replace(".txt", "")
|
|
||||||
]
|
|
||||||
try:
|
|
||||||
subprocess.run(whisper_cmd, check=True)
|
|
||||||
except subprocess.CalledProcessError as e:
|
|
||||||
print(f"Whisper Fehler: {e}")
|
|
||||||
notify("Fehler", "Ein Fehler mit 'Whisper' ist aufgetreten!")
|
|
||||||
return
|
|
||||||
|
|
||||||
try:
|
|
||||||
with open(CURENT_CONFIGURATION_LOCALE["output_file"], "r") as f:
|
|
||||||
raw_result = "\n".join(line.strip() for line in f.readlines())
|
|
||||||
except Exception as e:
|
|
||||||
print(f"Datei Fehler: {e}")
|
|
||||||
notify("Fehler", "Ein Fehler beim Lesen der Whisper-Ausgabe ist aufgetreten!")
|
|
||||||
return
|
|
||||||
|
|
||||||
print("Whisper Transkript erhalten.")
|
|
||||||
|
|
||||||
# --- An Ollama schicken ---
|
|
||||||
if CURRENT_PRESET_LOCALE["ollama"] != "disable":
|
|
||||||
if isinstance(CURRENT_PRESET_LOCALE["ollama_prompt"], list):
|
|
||||||
prompt = "\n".join(CURRENT_PRESET_LOCALE["ollama_prompt"])
|
|
||||||
else:
|
|
||||||
prompt = CURRENT_PRESET_LOCALE["ollama_prompt"]
|
|
||||||
|
|
||||||
payload = {
|
|
||||||
"model": CURRENT_PRESET_LOCALE["ollama_model"],
|
|
||||||
"messages": [
|
|
||||||
{"role": "system", "content": prompt},
|
|
||||||
{"role": "user", "content": raw_result}
|
|
||||||
],
|
|
||||||
"options": {
|
|
||||||
"num_ctx": CURRENT_PRESET_LOCALE["ollama_context"]
|
|
||||||
},
|
|
||||||
"stream": False
|
|
||||||
}
|
|
||||||
ollama_endpoint = f"{CURENT_CONFIGURATION_LOCALE['ollama_url']}:{CURENT_CONFIGURATION_LOCALE['ollama_port']}/api/chat"
|
|
||||||
response = requests.post(ollama_endpoint, json=payload)
|
|
||||||
|
|
||||||
try:
|
|
||||||
response.raise_for_status()
|
|
||||||
except requests.exceptions.HTTPError as e:
|
|
||||||
print(f"HTTP Fehler: {e}")
|
|
||||||
notify("Fehler", "Ein Fehler bei der Kommunikation mit 'Ollama' ist aufgetreten!")
|
|
||||||
return
|
|
||||||
|
|
||||||
json_response = response.json()
|
|
||||||
formatted_result = json_response.get("message", {}).get("content", "").strip()
|
|
||||||
formatted_result = "\n".join(line.strip() for line in formatted_result.splitlines())
|
|
||||||
print("Ollama Antwort erhalten.")
|
|
||||||
else:
|
|
||||||
formatted_result = raw_result
|
|
||||||
print("Kein Ollama Prompt angegeben, nur Whisper Ergebnis verwendet.")
|
|
||||||
|
|
||||||
# Ergebnis ins Clipboard kopieren
|
# Ergebnis ins Clipboard kopieren
|
||||||
if CURRENT_PRESET_LOCALE.get("mode") == "journal":
|
if CURRENT_PRESET_LOCALE.mode == "journal":
|
||||||
today = datetime.date.today().strftime("%Y.%m.%d")
|
today = datetime.date.today().strftime("%Y.%m.%d")
|
||||||
journal_path = os.path.join(CURENT_CONFIGURATION_LOCALE["journal_path"], f"{today} - {CURRENT_PRESET_LOCALE['journal_name']}.md")
|
journal_path = os.path.join(CURENT_CONFIG_LOCALE.journal_path, f"{today} - {CURRENT_PRESET_LOCALE.journal_name}.md")
|
||||||
now = datetime.datetime.now().strftime("%H:%M:%S")
|
now = datetime.datetime.now().strftime("%H:%M:%S")
|
||||||
if not os.path.exists(journal_path):
|
if not os.path.exists(journal_path):
|
||||||
try:
|
try:
|
||||||
with open(journal_path, "w") as f:
|
with open(journal_path, "w") as f:
|
||||||
f.write(f"# {CURRENT_PRESET_LOCALE['journal_name']} - {today}\n\n")
|
f.write(f"# {CURRENT_PRESET_LOCALE.journal_name} - {today}\n\n")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Journal Erstellungsfehler: {e}")
|
print(f"Journal Erstellungsfehler: {e}")
|
||||||
notify("Fehler", "Ein Fehler beim Erstellen des Journals ist aufgetreten!")
|
notify("Fehler", "Ein Fehler beim Erstellen des Journals ist aufgetreten!")
|
||||||
@@ -157,11 +105,11 @@ class SocketListener(threading.Thread):
|
|||||||
def __init__(self, tray_app):
|
def __init__(self, tray_app):
|
||||||
super().__init__(daemon=True)
|
super().__init__(daemon=True)
|
||||||
self.tray_app = tray_app
|
self.tray_app = tray_app
|
||||||
if os.path.exists(CONFIGURATION["socket_path"]):
|
if os.path.exists(CONFIG.socket_path):
|
||||||
os.remove(CONFIGURATION["socket_path"])
|
os.remove(CONFIG.socket_path)
|
||||||
self.sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
|
self.sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
|
||||||
self.sock.bind(CONFIGURATION["socket_path"])
|
self.sock.bind(CONFIG.socket_path)
|
||||||
os.chmod(CONFIGURATION["socket_path"], 0o666)
|
os.chmod(CONFIG.socket_path, 0o666)
|
||||||
self.sock.listen(1)
|
self.sock.listen(1)
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
@@ -174,8 +122,8 @@ class SocketListener(threading.Thread):
|
|||||||
if len(cmd) > 1:
|
if len(cmd) > 1:
|
||||||
data = cmd[0]
|
data = cmd[0]
|
||||||
preset = cmd[1]
|
preset = cmd[1]
|
||||||
if preset in [p["name"] for p in CONFIGURATION["presets"]]:
|
if preset in [p.name for p in CONFIG.presets]:
|
||||||
self.tray_app.set_preset([p["name"] for p in CONFIGURATION["presets"]].index(preset))
|
self.tray_app.set_preset([p.name for p in CONFIG.presets].index(preset))
|
||||||
else:
|
else:
|
||||||
data = cmd[0]
|
data = cmd[0]
|
||||||
if data == "toggle":
|
if data == "toggle":
|
||||||
@@ -224,8 +172,8 @@ class TrayApp:
|
|||||||
# Preset Menü
|
# Preset Menü
|
||||||
self.preset_actions = []
|
self.preset_actions = []
|
||||||
self.preset_group = QMenu("Presets")
|
self.preset_group = QMenu("Presets")
|
||||||
for i, preset in enumerate(CONFIGURATION["presets"]):
|
for i, preset in enumerate(CONFIG.presets):
|
||||||
action = QAction(preset["name"], self.menu)
|
action = QAction(preset.name, self.menu)
|
||||||
action.setCheckable(True)
|
action.setCheckable(True)
|
||||||
if i == 0:
|
if i == 0:
|
||||||
action.setChecked(True)
|
action.setChecked(True)
|
||||||
@@ -255,8 +203,9 @@ class TrayApp:
|
|||||||
|
|
||||||
def set_preset(self, index):
|
def set_preset(self, index):
|
||||||
global CURRENT_PRESET
|
global CURRENT_PRESET
|
||||||
print(f"Preset gewechselt: {CONFIGURATION['presets'][index]['name']}")
|
selected_preset = CONFIG.presets[index]
|
||||||
CURRENT_PRESET = CONFIGURATION["presets"][index]
|
print(f"Preset gewechselt: {selected_preset.name}")
|
||||||
|
CURRENT_PRESET = selected_preset
|
||||||
# Nur einer darf gecheckt sein
|
# Nur einer darf gecheckt sein
|
||||||
for i, action in enumerate(self.preset_actions):
|
for i, action in enumerate(self.preset_actions):
|
||||||
action.setChecked(i == index)
|
action.setChecked(i == index)
|
||||||
@@ -266,7 +215,7 @@ class TrayApp:
|
|||||||
print("Starte Aufnahme...")
|
print("Starte Aufnahme...")
|
||||||
self.recording_process = subprocess.Popen([
|
self.recording_process = subprocess.Popen([
|
||||||
"ffmpeg", "-f", "pulse", "-i", "default", "-ar", "16000",
|
"ffmpeg", "-f", "pulse", "-i", "default", "-ar", "16000",
|
||||||
"-ac", "1", CONFIGURATION["audio_file"], "-y", "-loglevel", "quiet"
|
"-ac", "1", CONFIG.audio_file, "-y", "-loglevel", "quiet"
|
||||||
])
|
])
|
||||||
notify("Aufnahme", "Aufnahme gestartet!")
|
notify("Aufnahme", "Aufnahme gestartet!")
|
||||||
|
|
||||||
@@ -294,15 +243,15 @@ class TrayApp:
|
|||||||
print(f"Fertig:\n{text}")
|
print(f"Fertig:\n{text}")
|
||||||
|
|
||||||
def reload_configurations(self):
|
def reload_configurations(self):
|
||||||
global CONFIGURATION, CURRENT_PRESET
|
global CONFIG, CURRENT_PRESET
|
||||||
print("Lade Einstellungen neu...")
|
print("Lade Einstellungen neu...")
|
||||||
CONFIGURATION = read_configurations()
|
CONFIG = read_configurations()
|
||||||
CURRENT_PRESET = CONFIGURATION["presets"][0] # Default to first preset
|
CURRENT_PRESET = CONFIG.presets[0] # Default to first preset
|
||||||
# Update preset menu
|
# Update preset menu
|
||||||
self.preset_group.clear()
|
self.preset_group.clear()
|
||||||
self.preset_actions = []
|
self.preset_actions = []
|
||||||
for i, preset in enumerate(CONFIGURATION["presets"]):
|
for i, preset in enumerate(CONFIG.presets):
|
||||||
action = QAction(preset["name"], self.menu)
|
action = QAction(preset.name, self.menu)
|
||||||
action.setCheckable(True)
|
action.setCheckable(True)
|
||||||
if i == 0:
|
if i == 0:
|
||||||
action.setChecked(True)
|
action.setChecked(True)
|
||||||
@@ -312,8 +261,8 @@ class TrayApp:
|
|||||||
print("Einstellungen erfolgreich neu geladen.")
|
print("Einstellungen erfolgreich neu geladen.")
|
||||||
|
|
||||||
def cleanup(self):
|
def cleanup(self):
|
||||||
if os.path.exists(CONFIGURATION["socket_path"]):
|
if os.path.exists(CONFIG.socket_path):
|
||||||
os.remove(CONFIGURATION["socket_path"])
|
os.remove(CONFIG.socket_path)
|
||||||
print("Socket sauber entfernt.")
|
print("Socket sauber entfernt.")
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
|
Reference in New Issue
Block a user