import tempfile from time import time from config import VIDEO_FILETYPES from db.Models import Shitpost, SpeechOutput from faster_whisper import WhisperModel import threading import subprocess import json def extractSpeech(shitpost: Shitpost,lock:threading.Lock): t1 = time() print(f"\tstarting to extract speech for {shitpost.hash[:4]} aka {shitpost.path}") if shitpost.speech_output is None and shitpost.file_type not in VIDEO_FILETYPES: #whisper(shitpost) fastWhisper(shitpost, lock) print(f"\tspeech extracted for {shitpost.hash[:4]} in :{time()-t1} ") def whisper(shitpost: Shitpost): filename = shitpost.path.split("/")[-1] filename = filename.split(".")[0] with tempfile.TemporaryDirectory() as tmpdir: cmd = f"whisper --verbose False -f json -o {tmpdir} \"{shitpost.path}\"" subprocess.run(cmd, shell=True) with open(f"{tmpdir}/{filename}.json", "r") as file: data = json.load(file) print("extracted speech :", data["text"] ) shitpost.speech_output = SpeechOutput( text=json.dumps(data), ) def fastWhisper(shitpost: Shitpost,lock:threading.Lock): dico = {} filename = shitpost.path.split("/")[-1] filename = filename.split(".")[0] with tempfile.TemporaryDirectory() as tmpdir: model = WhisperModel("turbo", device="cpu", compute_type="int8") segments, info = model.transcribe(shitpost.path,beam_size=5,language_detection_segments=2) #print("Detected language '%s' with probability %f" % (info.language, info.language_probability)) for segment in segments: #print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text)) dico[round(segment.start,2)] = segment.text with lock: shitpost.speech_output = SpeechOutput( text=json.dumps(dico), )