MemeDb/scanners/SpeechScanner.py
Djalim Simaila 03a41b6996 feat: add initial implementation of the shitpost scanning application with database integration and various scanners for music, text, speech, and tags extraction
- Introduce `app.py` as the main application file to handle shitpost scanning.
- Create `config.py` for configuration settings including scan paths and file types.
- Implement database models in `Models.py` for shitposts, songs, speech outputs, and tags.
- Add database creation logic in `db.py`.
- Develop various scanners (`OcrScanner.py`, `SongScanner.py`, `SpeechScanner.py`, `TagScanner.py`) for extracting information from shitposts.
- Implement utility functions in `dateExtractor.py` and `shitpostFactory.py` for handling file metadata and creating shitpost objects.
- Include a `pyproject.toml` for project dependencies and configuration.
2025-06-23 23:28:04 +02:00

52 lines
1.9 KiB
Python

import tempfile
from time import time
from config import VIDEO_FILETYPES
from db.Models import Shitpost, SpeechOutput
from faster_whisper import WhisperModel
import threading
import subprocess
import json
def extractSpeech(shitpost: Shitpost,lock:threading.Lock):
t1 = time()
print(f"\tstarting to extract speech for {shitpost.hash[:4]} aka {shitpost.path}")
if shitpost.speech_output is None and shitpost.file_type not in VIDEO_FILETYPES:
#whisper(shitpost)
fastWhisper(shitpost, lock)
print(f"\tspeech extracted for {shitpost.hash[:4]} in :{time()-t1} ")
def whisper(shitpost: Shitpost):
filename = shitpost.path.split("/")[-1]
filename = filename.split(".")[0]
with tempfile.TemporaryDirectory() as tmpdir:
cmd = f"whisper --verbose False -f json -o {tmpdir} \"{shitpost.path}\""
subprocess.run(cmd, shell=True)
with open(f"{tmpdir}/{filename}.json", "r") as file:
data = json.load(file)
print("extracted speech :", data["text"] )
shitpost.speech_output = SpeechOutput(
text=json.dumps(data),
)
def fastWhisper(shitpost: Shitpost,lock:threading.Lock):
dico = {}
filename = shitpost.path.split("/")[-1]
filename = filename.split(".")[0]
with tempfile.TemporaryDirectory() as tmpdir:
model = WhisperModel("turbo", device="cpu", compute_type="int8")
segments, info = model.transcribe(shitpost.path,beam_size=5,language_detection_segments=2)
#print("Detected language '%s' with probability %f" % (info.language, info.language_probability))
for segment in segments:
#print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))
dico[round(segment.start,2)] = segment.text
with lock:
shitpost.speech_output = SpeechOutput(
text=json.dumps(dico),
)