import os import time from typing import List, Tuple import warnings import asyncio import threading from sqlalchemy import create_engine from sqlalchemy.orm import Session from config import SCAN_PATHS from db.Models import Shitpost from db.db import create_db from scanners.OcrScanner import extractText from scanners.SongScanner import extractSong from scanners.SpeechScanner import extractSpeech from utils.shitpostFactory import ShitpostFactory warnings.filterwarnings("ignore") def scanMusics(shitposts:List[Tuple[Shitpost,threading.Lock]]): for shitpost in shitposts: lock = shitpost[1] shitpost = shitpost[0] extractSong(shitpost,lock) def scanText(shitposts:List[Tuple[Shitpost,threading.Lock]]): for shitpost in shitposts: lock = shitpost[1] shitpost = shitpost[0] extractText(shitpost,lock) def scanSpeech(shitposts:List[Tuple[Shitpost,threading.Lock]]): for shitpost in shitposts: lock = shitpost[1] shitpost = shitpost[0] extractSpeech(shitpost,lock) async def scanShitposts(): engine = create_engine("sqlite:///Shitpost.db", future=True) session = Session(engine) shitposts = [] paths = set() for shitpost in session.query(Shitpost).all(): shitposts.append((shitpost,threading.Lock())) paths.add(shitpost.path) for path in SCAN_PATHS: for r,d,f in os.walk(path): for file in f: path = os.path.join(r,file) if path not in paths: print(file) try: shitposts.append((ShitpostFactory(path),threading.Lock())) except: f = open("failed.txt","a") f.write(file+"\n") f.close() # scanMusics(shitposts) # scanText(shitposts) # scanSpeech(shitposts) task1 = asyncio.to_thread(scanSpeech,shitposts) task2 = asyncio.to_thread(scanText,shitposts) task3 = asyncio.to_thread(scanMusics,shitposts) await asyncio.gather(task1,task2,task3) for shitpost in shitposts: shitpost = shitpost[0] session.add(shitpost) session.commit() session.close() if __name__ == "__main__": create_db() asyncio.run(scanShitposts())