MemeDb/db/Models.py
Djalim Simaila 03a41b6996 feat: add initial implementation of the shitpost scanning application with database integration and various scanners for music, text, speech, and tags extraction
- Introduce `app.py` as the main application file to handle shitpost scanning.
- Create `config.py` for configuration settings including scan paths and file types.
- Implement database models in `Models.py` for shitposts, songs, speech outputs, and tags.
- Add database creation logic in `db.py`.
- Develop various scanners (`OcrScanner.py`, `SongScanner.py`, `SpeechScanner.py`, `TagScanner.py`) for extracting information from shitposts.
- Implement utility functions in `dateExtractor.py` and `shitpostFactory.py` for handling file metadata and creating shitpost objects.
- Include a `pyproject.toml` for project dependencies and configuration.
2025-06-23 23:28:04 +02:00

93 lines
4.1 KiB
Python

from sqlalchemy import Column, ForeignKey, LargeBinary, Table, String, Integer, Boolean # Added String, Integer, Boolean for Table definition
from sqlalchemy.orm import declarative_base, Mapped, mapped_column, relationship
from typing import List
Base = declarative_base()
# It's good practice to add primary_key=True for columns in an association table
# and explicitly state types, though SQLAlchemy can often infer them.
shitposts_tags = Table(
"shitposts_tags",
Base.metadata,
Column("left_id", String, ForeignKey("shitposts.hash"), primary_key=True),
Column("right_id", Integer, ForeignKey("tags.id"), primary_key=True),
)
class Shitpost(Base):
__tablename__ = 'shitposts'
hash: Mapped[str] = mapped_column(primary_key=True)
path: Mapped[str] = mapped_column()
date: Mapped[str] = mapped_column() # Consider using sqlalchemy.types.DateTime for date fields
file_type: Mapped[str] = mapped_column()
thumbnail: Mapped[str] = mapped_column(LargeBinary,deferred=True)
correct_song_match: Mapped[bool] = mapped_column()
# One-to-one relationship with OcrOutput
# Mapped["OcrOutput"] implies uselist=False, making it scalar.
# For this to be truly 1-to-1, OcrOutput.shitpost_id should be unique.
ocr_output: Mapped["OcrOutput"] = relationship(back_populates="shitpost")
# One-to-one relationship with SpeechOutput
# Mapped["SpeechOutput"] implies uselist=False.
# For this to be truly 1-to-1, SpeechOutput.shitpost_id should be unique.
speech_output: Mapped["SpeechOutput"] = relationship(back_populates="shitpost")
# Many-to-many relationship with Tags
tags: Mapped[List["Tags"]] = relationship(
secondary=shitposts_tags,
back_populates="shitposts" # Matches Tags.shitposts
)
# Foreign key to SongMatch
song_id: Mapped[int] = mapped_column(ForeignKey('song_match.id'), nullable=True) # Assuming a shitpost might not have a song
# Many-to-one relationship with SongMatch
song: Mapped["SongMatch"] = relationship(back_populates="shitposts") # Added back_populates
class SongMatch(Base):
__tablename__ = 'song_match'
id: Mapped[int] = mapped_column(primary_key=True)
song_name: Mapped[str] = mapped_column() # Added mapped_column()
artist_name: Mapped[str] = mapped_column() # Added mapped_column()
# One-to-many relationship with Shitpost
shitposts: Mapped[List["Shitpost"]] = relationship(back_populates="song")
class SpeechOutput(Base):
__tablename__ = 'speech_output' # Corrected: was 'ocr_output'
id: Mapped[int] = mapped_column(primary_key=True)
text: Mapped[str] = mapped_column()
# Foreign key column linking to Shitpost.
# Shitpost.hash is Mapped[str], so shitpost_id must be Mapped[str].
shitpost_id: Mapped[str] = mapped_column(ForeignKey('shitposts.hash')) # Corrected type and ForeignKey usage
# Relationship to Shitpost. Mapped['Shitpost'] indicates a scalar (single object) relationship.
shitpost: Mapped['Shitpost'] = relationship(back_populates='speech_output') # Corrected: uses relationship()
class Tags(Base):
__tablename__ = "tags"
id: Mapped[int] = mapped_column(primary_key=True)
name: Mapped[str] = mapped_column() # Added mapped_column()
# Many-to-many relationship with Shitpost
shitposts: Mapped[List["Shitpost"]] = relationship(
secondary=shitposts_tags,
back_populates="tags" # Corrected: uses relationship(), matches Shitpost.tags
)
class OcrOutput(Base):
__tablename__ = 'ocr_output'
id: Mapped[int] = mapped_column(primary_key=True)
text: Mapped[str] = mapped_column()
# Foreign key column linking to Shitpost.
# Shitpost.hash is Mapped[str], so shitpost_id must be Mapped[str].
shitpost_id: Mapped[str] = mapped_column(
ForeignKey('shitposts.hash'), # Corrected: target table 'shitposts', column 'hash', and ForeignKey usage
nullable=False,
index=True
)
# Relationship to Shitpost. Mapped['Shitpost'] indicates a scalar relationship.
shitpost: Mapped['Shitpost'] = relationship(back_populates='ocr_output')
# Removed duplicated shitpost_id definition that was here.