- Introduce `app.py` as the main application file to handle shitpost scanning. - Create `config.py` for configuration settings including scan paths and file types. - Implement database models in `Models.py` for shitposts, songs, speech outputs, and tags. - Add database creation logic in `db.py`. - Develop various scanners (`OcrScanner.py`, `SongScanner.py`, `SpeechScanner.py`, `TagScanner.py`) for extracting information from shitposts. - Implement utility functions in `dateExtractor.py` and `shitpostFactory.py` for handling file metadata and creating shitpost objects. - Include a `pyproject.toml` for project dependencies and configuration.
80 lines
2.3 KiB
Python
80 lines
2.3 KiB
Python
import os
|
|
import time
|
|
from typing import List, Tuple
|
|
import warnings
|
|
import asyncio
|
|
import threading
|
|
|
|
from sqlalchemy import create_engine
|
|
from sqlalchemy.orm import Session
|
|
|
|
from config import SCAN_PATHS
|
|
from db.Models import Shitpost
|
|
from db.db import create_db
|
|
from scanners.OcrScanner import extractText
|
|
from scanners.SongScanner import extractSong
|
|
from scanners.SpeechScanner import extractSpeech
|
|
from utils.shitpostFactory import ShitpostFactory
|
|
|
|
warnings.filterwarnings("ignore")
|
|
|
|
|
|
def scanMusics(shitposts:List[Tuple[Shitpost,threading.Lock]]):
|
|
for shitpost in shitposts:
|
|
lock = shitpost[1]
|
|
shitpost = shitpost[0]
|
|
extractSong(shitpost,lock)
|
|
|
|
def scanText(shitposts:List[Tuple[Shitpost,threading.Lock]]):
|
|
for shitpost in shitposts:
|
|
lock = shitpost[1]
|
|
shitpost = shitpost[0]
|
|
extractText(shitpost,lock)
|
|
|
|
def scanSpeech(shitposts:List[Tuple[Shitpost,threading.Lock]]):
|
|
for shitpost in shitposts:
|
|
lock = shitpost[1]
|
|
shitpost = shitpost[0]
|
|
extractSpeech(shitpost,lock)
|
|
|
|
|
|
async def scanShitposts():
|
|
engine = create_engine("sqlite:///Shitpost.db", future=True)
|
|
session = Session(engine)
|
|
shitposts = []
|
|
paths = set()
|
|
for shitpost in session.query(Shitpost).all():
|
|
shitposts.append((shitpost,threading.Lock()))
|
|
paths.add(shitpost.path)
|
|
|
|
for path in SCAN_PATHS:
|
|
for r,d,f in os.walk(path):
|
|
for file in f:
|
|
path = os.path.join(r,file)
|
|
if path not in paths:
|
|
print(file)
|
|
try:
|
|
shitposts.append((ShitpostFactory(path),threading.Lock()))
|
|
except:
|
|
f = open("failed.txt","a")
|
|
f.write(file+"\n")
|
|
f.close()
|
|
|
|
# scanMusics(shitposts)
|
|
# scanText(shitposts)
|
|
# scanSpeech(shitposts)
|
|
task1 = asyncio.to_thread(scanSpeech,shitposts)
|
|
task2 = asyncio.to_thread(scanText,shitposts)
|
|
task3 = asyncio.to_thread(scanMusics,shitposts)
|
|
await asyncio.gather(task1,task2,task3)
|
|
for shitpost in shitposts:
|
|
shitpost = shitpost[0]
|
|
session.add(shitpost)
|
|
session.commit()
|
|
session.close()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
create_db()
|
|
asyncio.run(scanShitposts())
|