Compare commits

..

8 Commits

Author SHA1 Message Date
79f4156ffa chore(version): bump to 0.6.0
All checks were successful
Build and Publish / build-and-publish (push) Successful in 1m23s
2025-10-05 20:33:46 +02:00
7f4b559644 feat(module): add support for llama-swap API integration 2025-10-05 20:33:27 +02:00
98b5ab1f1c chore(version): bump to 0.5.0
All checks were successful
Build and Publish / build-and-publish (push) Successful in 1m27s
2025-10-05 20:14:16 +02:00
e628816ea8 feat(config): enhance Ollama configuration with dynamic path support 2025-10-05 20:13:42 +02:00
4e4389a03f chore(version): bump to 0.4.5
All checks were successful
Build and Publish / build-and-publish (push) Successful in 1m40s
- Update project version from 0.4.4 to 0.4.5
- Prepares for the next release with minor updates
2025-05-18 20:41:38 +02:00
2c0cffe16a fix(worker): adjust timestamp format in journal entries
- Updates the timestamp format from HH:MM:SS to HH:MM for journal logs
- Simplifies the time representation in generated journal paths
2025-05-18 20:41:14 +02:00
98fc0596d4 chore: bump version to 0.4.4
All checks were successful
Build and Publish / build-and-publish (push) Successful in 19s
Signed-off-by: Max P. <Mail@MPassarello.de>
2025-05-05 12:02:55 +02:00
7059ebda4c refactor(send_cmd): rename config variable for clarity
- Rename `CONFIGURATION` to `CONFIG` for consistency and brevity.
- Update method call to use dot notation for accessing `socket_path`.

Signed-off-by: Max P. <Mail@MPassarello.de>
2025-05-05 12:02:51 +02:00
6 changed files with 88 additions and 48 deletions

View File

@@ -1,6 +1,6 @@
[project] [project]
name = "pyvtt" name = "pyvtt"
version = "0.4.3" version = "0.6.0"
description = "Python Voice to Text + LLMA" description = "Python Voice to Text + LLMA"
authors = [{ name = "Max P.", email = "Mail@MPassarello.de" }] authors = [{ name = "Max P.", email = "Mail@MPassarello.de" }]
license = { text = "MIT" } license = { text = "MIT" }

View File

@@ -2,23 +2,26 @@
"audio_file": "/tmp/pyvtt_recording.wav", "audio_file": "/tmp/pyvtt_recording.wav",
"output_file": "/tmp/pyvtt_transcript.txt", "output_file": "/tmp/pyvtt_transcript.txt",
"whisper_path": "/path/to/whisper-cli", "whisper_path": "/path/to/whisper-cli",
"language": "en",
"socket_path": "/tmp/pyvtt.sock", "socket_path": "/tmp/pyvtt.sock",
"ollama_url": "http://localhost", "ollama_url": "http://localhost",
"ollama_path": "/api/chat",
"ollama_port": 12345, "ollama_port": 12345,
"presets": [ "presets": [
{ {
"name": "Default", "name": "Default",
"language": "en", "language": "en",
"whisper_model": "/path/to/default-whisper-model.bin", "whisper_model": "/path/to/default-whisper-model.bin",
"ollama_model": "default-model", "ollama": "disable"
"ollama_prompt": "Provide a detailed response to the following text:\n\n"
}, },
{ {
"name": "Quick English", "name": "Quick English",
"whisper_model": "/path/to/quick-whisper-model.bin", "whisper_model": "/path/to/quick-whisper-model.bin",
"ollama_model": "quick-model", "ollama_model": "gemma3:4b",
"ollama_prompt": "Quickly correct the following English text for grammar and punctuation:\n\n" "ollama_context": 131072,
"ollama_prompt": [
"Quickly correct the following English text for grammar and punctuation:\n",
"\n"
]
}, },
{ {
"name": "German Correction", "name": "German Correction",

View File

@@ -1,3 +1,4 @@
import json
import requests import requests
from typing import Union, List, Optional from typing import Union, List, Optional
@@ -8,60 +9,95 @@ from pyvtt.models.config import AppConfig, PresetConfig
class OllamaClient: class OllamaClient:
def __init__(self, config: AppConfig): def __init__(self, config: AppConfig):
""" """
Initialisiert den Ollama-Client mit der Basis-Konfiguration aus der globalen App-Konfiguration. Initialisiert den API-Client (Ollama oder llama-swap) mit Basis-Konfiguration.
:param config: AppConfig-Instanz mit Host und Port für den Ollama-Server.
""" """
self.base_url = config.ollama_url self.base_url = config.ollama_url.rstrip("/")
self.port = config.ollama_port self.port = config.ollama_port
self.path = config.ollama_path or "/api/chat"
# Falls llama-swap (OpenAI-API-Kompatibel), verwende den OpenAI-Pfad
if "v1" in self.path or "completions" in self.path:
self.is_llama_swap = True
else:
self.is_llama_swap = False
def send_chat( def send_chat(self, user_message: str, config: PresetConfig) -> str:
self,
user_message: str,
config: PresetConfig,
) -> str:
""" """
Sendet eine Chat-Anfrage an den Ollama-Server basierend auf der spezifischen Preset-Konfiguration. Sendet eine Chat-Anfrage an Ollama oder llama-swap.
:param user_message: Der vom Nutzer erzeugte Eingabetext (z. B. Transkript).
:param config: PresetConfig-Instanz mit modell-, prompt- und kontextbezogenen Parametern.
:return: Der von Ollama zurückgegebene, formatierte Antworttext, die user_message
unverändert zurückgibt, wenn Ollama deaktiviert ist oder none bei einem Fehler.
""" """
if config.ollama and config.ollama.lower() == "disable": if config.ollama and config.ollama.lower() == "disable":
print("[OllamaClient] Ollama ist im Preset deaktiviert.") print("[OllamaClient] Ollama ist im Preset deaktiviert.")
print("[OllamaClient] Gebe die Eingabe unverändert zurück.")
return user_message return user_message
# Prompt als String aufbereiten – Liste wird zu Zeilen verbunden # Prompt aufbereiten
if isinstance(config.ollama_prompt, list): prompt_str = (
prompt_str = "\n".join(config.ollama_prompt) "\n".join(config.ollama_prompt)
else: if isinstance(config.ollama_prompt, list)
prompt_str = config.ollama_prompt else str(config.ollama_prompt)
)
# Payload für die API-Anfrage vorbereiten # === Payload vorbereiten ===
if self.is_llama_swap:
# OpenAI-/llama-swap-kompatibles Format
payload = { payload = {
"model": config.ollama_model, "model": config.ollama_model,
"messages": [ "messages": [
{"role": "system", "content": prompt_str}, {"role": "system", "content": prompt_str},
{"role": "user", "content": user_message} {"role": "user", "content": user_message},
], ],
"options": { "stream": False,
"num_ctx": config.ollama_context, }
} if config.ollama_context else {}, # Kontextgröße optional hinzufügen
"stream": False if config.ollama_context:
payload["num_ctx"] = config.ollama_context
else:
# Klassisches Ollama-Format
payload = {
"model": config.ollama_model,
"messages": [
{"role": "system", "content": prompt_str},
{"role": "user", "content": user_message},
],
"options": (
{"num_ctx": config.ollama_context}
if config.ollama_context
else {}
),
"stream": False,
} }
endpoint = f"{self.base_url}:{self.port}/api/chat" endpoint = f"{self.base_url}:{self.port}{self.path}"
# Anfrage an Ollama senden und Antwort extrahieren # === Anfrage senden ===
try: try:
response = requests.post(endpoint, json=payload) headers = {"Content-Type": "application/json"}
if self.is_llama_swap:
headers["Authorization"] = "Bearer no-key"
response = requests.post(endpoint, headers=headers, data=json.dumps(payload))
response.raise_for_status() response.raise_for_status()
json_response = response.json() json_response = response.json()
content = json_response.get("message", {}).get("content", "").strip()
# === Antwort extrahieren ===
if self.is_llama_swap:
# OpenAI-kompatible Struktur
content = (
json_response.get("choices", [{}])[0]
.get("message", {})
.get("content", "")
.strip()
)
else:
# Ollama-eigene Struktur
content = (
json_response.get("message", {})
.get("content", "")
.strip()
)
return "\n".join(line.strip() for line in content.splitlines()) return "\n".join(line.strip() for line in content.splitlines())
except requests.exceptions.RequestException as e: except requests.exceptions.RequestException as e:
print(f"[OllamaClient] HTTP-Fehler: {e}") print(f"[OllamaClient] HTTP-Fehler: {e}")
notify("Fehler", "Ein Fehler bei der Kommunikation mit 'Ollama' ist aufgetreten!") notify("Fehler", "Kommunikationsfehler mit Ollama / llama-swap!")
return "" return ""

View File

@@ -20,6 +20,7 @@ class AppConfig(BaseModel):
whisper_path: str whisper_path: str
socket_path: str socket_path: str
ollama_url: str ollama_url: str
ollama_path: str
ollama_port: int ollama_port: int
journal_path: str journal_path: str
presets: List[PresetConfig] presets: List[PresetConfig]

View File

@@ -4,7 +4,7 @@ import argparse
from pyvtt.configuration import read_configurations from pyvtt.configuration import read_configurations
from typing import Optional from typing import Optional
CONFIGURATION = read_configurations() CONFIG = read_configurations()
def send_cmd(cmd: str, socket_path: str, preset: Optional[str] = None) -> None: def send_cmd(cmd: str, socket_path: str, preset: Optional[str] = None) -> None:
@@ -62,4 +62,4 @@ def main():
args = parser.parse_args() args = parser.parse_args()
send_cmd(args.command, CONFIGURATION["socket_path"], args.preset) send_cmd(args.command, CONFIG.socket_path, args.preset)

View File

@@ -50,7 +50,7 @@ class WhisperWorker(QThread):
if CURRENT_PRESET_LOCALE.mode == "journal": if CURRENT_PRESET_LOCALE.mode == "journal":
today = datetime.date.today().strftime("%Y.%m.%d") today = datetime.date.today().strftime("%Y.%m.%d")
journal_path = os.path.join(CURENT_CONFIG_LOCALE.journal_path, f"{today} - {CURRENT_PRESET_LOCALE.journal_name}.md") journal_path = os.path.join(CURENT_CONFIG_LOCALE.journal_path, f"{today} - {CURRENT_PRESET_LOCALE.journal_name}.md")
now = datetime.datetime.now().strftime("%H:%M:%S") now = datetime.datetime.now().strftime("%H:%M")
if not os.path.exists(journal_path): if not os.path.exists(journal_path):
try: try:
with open(journal_path, "w") as f: with open(journal_path, "w") as f: