MemeDb/app.py
Djalim Simaila 03a41b6996 feat: add initial implementation of the shitpost scanning application with database integration and various scanners for music, text, speech, and tags extraction
- Introduce `app.py` as the main application file to handle shitpost scanning.
- Create `config.py` for configuration settings including scan paths and file types.
- Implement database models in `Models.py` for shitposts, songs, speech outputs, and tags.
- Add database creation logic in `db.py`.
- Develop various scanners (`OcrScanner.py`, `SongScanner.py`, `SpeechScanner.py`, `TagScanner.py`) for extracting information from shitposts.
- Implement utility functions in `dateExtractor.py` and `shitpostFactory.py` for handling file metadata and creating shitpost objects.
- Include a `pyproject.toml` for project dependencies and configuration.
2025-06-23 23:28:04 +02:00

80 lines
2.3 KiB
Python

import os
import time
from typing import List, Tuple
import warnings
import asyncio
import threading
from sqlalchemy import create_engine
from sqlalchemy.orm import Session
from config import SCAN_PATHS
from db.Models import Shitpost
from db.db import create_db
from scanners.OcrScanner import extractText
from scanners.SongScanner import extractSong
from scanners.SpeechScanner import extractSpeech
from utils.shitpostFactory import ShitpostFactory
warnings.filterwarnings("ignore")
def scanMusics(shitposts:List[Tuple[Shitpost,threading.Lock]]):
for shitpost in shitposts:
lock = shitpost[1]
shitpost = shitpost[0]
extractSong(shitpost,lock)
def scanText(shitposts:List[Tuple[Shitpost,threading.Lock]]):
for shitpost in shitposts:
lock = shitpost[1]
shitpost = shitpost[0]
extractText(shitpost,lock)
def scanSpeech(shitposts:List[Tuple[Shitpost,threading.Lock]]):
for shitpost in shitposts:
lock = shitpost[1]
shitpost = shitpost[0]
extractSpeech(shitpost,lock)
async def scanShitposts():
engine = create_engine("sqlite:///Shitpost.db", future=True)
session = Session(engine)
shitposts = []
paths = set()
for shitpost in session.query(Shitpost).all():
shitposts.append((shitpost,threading.Lock()))
paths.add(shitpost.path)
for path in SCAN_PATHS:
for r,d,f in os.walk(path):
for file in f:
path = os.path.join(r,file)
if path not in paths:
print(file)
try:
shitposts.append((ShitpostFactory(path),threading.Lock()))
except:
f = open("failed.txt","a")
f.write(file+"\n")
f.close()
# scanMusics(shitposts)
# scanText(shitposts)
# scanSpeech(shitposts)
task1 = asyncio.to_thread(scanSpeech,shitposts)
task2 = asyncio.to_thread(scanText,shitposts)
task3 = asyncio.to_thread(scanMusics,shitposts)
await asyncio.gather(task1,task2,task3)
for shitpost in shitposts:
shitpost = shitpost[0]
session.add(shitpost)
session.commit()
session.close()
if __name__ == "__main__":
create_db()
asyncio.run(scanShitposts())