- Introduce `app.py` as the main application file to handle shitpost scanning. - Create `config.py` for configuration settings including scan paths and file types. - Implement database models in `Models.py` for shitposts, songs, speech outputs, and tags. - Add database creation logic in `db.py`. - Develop various scanners (`OcrScanner.py`, `SongScanner.py`, `SpeechScanner.py`, `TagScanner.py`) for extracting information from shitposts. - Implement utility functions in `dateExtractor.py` and `shitpostFactory.py` for handling file metadata and creating shitpost objects. - Include a `pyproject.toml` for project dependencies and configuration.
93 lines
4.1 KiB
Python
93 lines
4.1 KiB
Python
from sqlalchemy import Column, ForeignKey, LargeBinary, Table, String, Integer, Boolean # Added String, Integer, Boolean for Table definition
|
|
from sqlalchemy.orm import declarative_base, Mapped, mapped_column, relationship
|
|
from typing import List
|
|
|
|
Base = declarative_base()
|
|
|
|
# It's good practice to add primary_key=True for columns in an association table
|
|
# and explicitly state types, though SQLAlchemy can often infer them.
|
|
shitposts_tags = Table(
|
|
"shitposts_tags",
|
|
Base.metadata,
|
|
Column("left_id", String, ForeignKey("shitposts.hash"), primary_key=True),
|
|
Column("right_id", Integer, ForeignKey("tags.id"), primary_key=True),
|
|
)
|
|
|
|
class Shitpost(Base):
|
|
__tablename__ = 'shitposts'
|
|
hash: Mapped[str] = mapped_column(primary_key=True)
|
|
path: Mapped[str] = mapped_column()
|
|
date: Mapped[str] = mapped_column() # Consider using sqlalchemy.types.DateTime for date fields
|
|
file_type: Mapped[str] = mapped_column()
|
|
thumbnail: Mapped[str] = mapped_column(LargeBinary,deferred=True)
|
|
correct_song_match: Mapped[bool] = mapped_column()
|
|
|
|
# One-to-one relationship with OcrOutput
|
|
# Mapped["OcrOutput"] implies uselist=False, making it scalar.
|
|
# For this to be truly 1-to-1, OcrOutput.shitpost_id should be unique.
|
|
ocr_output: Mapped["OcrOutput"] = relationship(back_populates="shitpost")
|
|
|
|
# One-to-one relationship with SpeechOutput
|
|
# Mapped["SpeechOutput"] implies uselist=False.
|
|
# For this to be truly 1-to-1, SpeechOutput.shitpost_id should be unique.
|
|
speech_output: Mapped["SpeechOutput"] = relationship(back_populates="shitpost")
|
|
|
|
# Many-to-many relationship with Tags
|
|
tags: Mapped[List["Tags"]] = relationship(
|
|
secondary=shitposts_tags,
|
|
back_populates="shitposts" # Matches Tags.shitposts
|
|
)
|
|
|
|
# Foreign key to SongMatch
|
|
song_id: Mapped[int] = mapped_column(ForeignKey('song_match.id'), nullable=True) # Assuming a shitpost might not have a song
|
|
# Many-to-one relationship with SongMatch
|
|
song: Mapped["SongMatch"] = relationship(back_populates="shitposts") # Added back_populates
|
|
|
|
class SongMatch(Base):
|
|
__tablename__ = 'song_match'
|
|
id: Mapped[int] = mapped_column(primary_key=True)
|
|
song_name: Mapped[str] = mapped_column() # Added mapped_column()
|
|
artist_name: Mapped[str] = mapped_column() # Added mapped_column()
|
|
|
|
# One-to-many relationship with Shitpost
|
|
shitposts: Mapped[List["Shitpost"]] = relationship(back_populates="song")
|
|
|
|
class SpeechOutput(Base):
|
|
__tablename__ = 'speech_output' # Corrected: was 'ocr_output'
|
|
id: Mapped[int] = mapped_column(primary_key=True)
|
|
text: Mapped[str] = mapped_column()
|
|
|
|
# Foreign key column linking to Shitpost.
|
|
# Shitpost.hash is Mapped[str], so shitpost_id must be Mapped[str].
|
|
shitpost_id: Mapped[str] = mapped_column(ForeignKey('shitposts.hash')) # Corrected type and ForeignKey usage
|
|
|
|
# Relationship to Shitpost. Mapped['Shitpost'] indicates a scalar (single object) relationship.
|
|
shitpost: Mapped['Shitpost'] = relationship(back_populates='speech_output') # Corrected: uses relationship()
|
|
|
|
class Tags(Base):
|
|
__tablename__ = "tags"
|
|
id: Mapped[int] = mapped_column(primary_key=True)
|
|
name: Mapped[str] = mapped_column() # Added mapped_column()
|
|
|
|
# Many-to-many relationship with Shitpost
|
|
shitposts: Mapped[List["Shitpost"]] = relationship(
|
|
secondary=shitposts_tags,
|
|
back_populates="tags" # Corrected: uses relationship(), matches Shitpost.tags
|
|
)
|
|
|
|
class OcrOutput(Base):
|
|
__tablename__ = 'ocr_output'
|
|
id: Mapped[int] = mapped_column(primary_key=True)
|
|
text: Mapped[str] = mapped_column()
|
|
|
|
# Foreign key column linking to Shitpost.
|
|
# Shitpost.hash is Mapped[str], so shitpost_id must be Mapped[str].
|
|
shitpost_id: Mapped[str] = mapped_column(
|
|
ForeignKey('shitposts.hash'), # Corrected: target table 'shitposts', column 'hash', and ForeignKey usage
|
|
nullable=False,
|
|
index=True
|
|
)
|
|
# Relationship to Shitpost. Mapped['Shitpost'] indicates a scalar relationship.
|
|
shitpost: Mapped['Shitpost'] = relationship(back_populates='ocr_output')
|
|
# Removed duplicated shitpost_id definition that was here.
|