- Introduce `app.py` as the main application file to handle shitpost scanning. - Create `config.py` for configuration settings including scan paths and file types. - Implement database models in `Models.py` for shitposts, songs, speech outputs, and tags. - Add database creation logic in `db.py`. - Develop various scanners (`OcrScanner.py`, `SongScanner.py`, `SpeechScanner.py`, `TagScanner.py`) for extracting information from shitposts. - Implement utility functions in `dateExtractor.py` and `shitpostFactory.py` for handling file metadata and creating shitpost objects. - Include a `pyproject.toml` for project dependencies and configuration.
52 lines
1.9 KiB
Python
52 lines
1.9 KiB
Python
import tempfile
|
|
from time import time
|
|
|
|
from config import VIDEO_FILETYPES
|
|
from db.Models import Shitpost, SpeechOutput
|
|
from faster_whisper import WhisperModel
|
|
|
|
|
|
import threading
|
|
import subprocess
|
|
import json
|
|
|
|
def extractSpeech(shitpost: Shitpost,lock:threading.Lock):
|
|
t1 = time()
|
|
print(f"\tstarting to extract speech for {shitpost.hash[:4]} aka {shitpost.path}")
|
|
if shitpost.speech_output is None and shitpost.file_type not in VIDEO_FILETYPES:
|
|
#whisper(shitpost)
|
|
fastWhisper(shitpost, lock)
|
|
print(f"\tspeech extracted for {shitpost.hash[:4]} in :{time()-t1} ")
|
|
|
|
|
|
|
|
def whisper(shitpost: Shitpost):
|
|
filename = shitpost.path.split("/")[-1]
|
|
filename = filename.split(".")[0]
|
|
with tempfile.TemporaryDirectory() as tmpdir:
|
|
cmd = f"whisper --verbose False -f json -o {tmpdir} \"{shitpost.path}\""
|
|
subprocess.run(cmd, shell=True)
|
|
with open(f"{tmpdir}/{filename}.json", "r") as file:
|
|
data = json.load(file)
|
|
print("extracted speech :", data["text"] )
|
|
shitpost.speech_output = SpeechOutput(
|
|
text=json.dumps(data),
|
|
)
|
|
|
|
def fastWhisper(shitpost: Shitpost,lock:threading.Lock):
|
|
dico = {}
|
|
filename = shitpost.path.split("/")[-1]
|
|
filename = filename.split(".")[0]
|
|
with tempfile.TemporaryDirectory() as tmpdir:
|
|
model = WhisperModel("turbo", device="cpu", compute_type="int8")
|
|
segments, info = model.transcribe(shitpost.path,beam_size=5,language_detection_segments=2)
|
|
#print("Detected language '%s' with probability %f" % (info.language, info.language_probability))
|
|
for segment in segments:
|
|
#print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))
|
|
dico[round(segment.start,2)] = segment.text
|
|
|
|
with lock:
|
|
shitpost.speech_output = SpeechOutput(
|
|
text=json.dumps(dico),
|
|
)
|