- Introduce `app.py` as the main application file to handle shitpost scanning. - Create `config.py` for configuration settings including scan paths and file types. - Implement database models in `Models.py` for shitposts, songs, speech outputs, and tags. - Add database creation logic in `db.py`. - Develop various scanners (`OcrScanner.py`, `SongScanner.py`, `SpeechScanner.py`, `TagScanner.py`) for extracting information from shitposts. - Implement utility functions in `dateExtractor.py` and `shitpostFactory.py` for handling file metadata and creating shitpost objects. - Include a `pyproject.toml` for project dependencies and configuration.
56 lines
1.4 KiB
Python
56 lines
1.4 KiB
Python
import hashlib
|
|
import os
|
|
import shutil
|
|
import tempfile
|
|
|
|
from thumbnail import generate_thumbnail
|
|
|
|
from db.Models import Shitpost
|
|
from utils.dateExtractor import extract_date_from_path
|
|
|
|
options = {
|
|
'trim': False,
|
|
'height': 300,
|
|
'width': 300,
|
|
'quality': 85,
|
|
'type': 'thumbnail'
|
|
}
|
|
|
|
|
|
def hashfile(file_path:str)->str:
|
|
with open(file_path, 'rb', buffering=0) as f:
|
|
return hashlib.file_digest(f, 'sha256').hexdigest()
|
|
|
|
def ShitpostFactory(file_path:str):
|
|
with tempfile.TemporaryDirectory() as tmpdir:
|
|
|
|
filename = file_path.split("/")[-1]
|
|
filetype = os.path.splitext(filename)[1].lower()[1:]
|
|
shitpost_hash = hashfile(file_path)
|
|
|
|
|
|
#get date file
|
|
shitpost_date = extract_date_from_path(file_path).timestamp()
|
|
shitpost = Shitpost(
|
|
hash=shitpost_hash,
|
|
path=file_path,
|
|
date=shitpost_date,
|
|
file_type=filetype
|
|
)
|
|
|
|
#create thumbnail
|
|
shitpost_cpy = os.path.join(tmpdir, f"{shitpost_hash}.{filetype}")
|
|
shutil.copyfile(file_path, shitpost_cpy)
|
|
|
|
thumpath = f"{tmpdir}/{shitpost_hash}.png"
|
|
generate_thumbnail(shitpost_cpy,thumpath, options)
|
|
|
|
thumb = open(f"{tmpdir}/{shitpost_hash}.png", "rb")
|
|
shitpost.thumbnail = thumb.read()
|
|
thumb.close()
|
|
|
|
#song match default value
|
|
shitpost.correct_song_match = False
|
|
|
|
return shitpost
|