- Introduce `app.py` as the main application file to handle shitpost scanning. - Create `config.py` for configuration settings including scan paths and file types. - Implement database models in `Models.py` for shitposts, songs, speech outputs, and tags. - Add database creation logic in `db.py`. - Develop various scanners (`OcrScanner.py`, `SongScanner.py`, `SpeechScanner.py`, `TagScanner.py`) for extracting information from shitposts. - Implement utility functions in `dateExtractor.py` and `shitpostFactory.py` for handling file metadata and creating shitpost objects. - Include a `pyproject.toml` for project dependencies and configuration.
62 lines
2.7 KiB
Python
62 lines
2.7 KiB
Python
import re
|
|
import os
|
|
from datetime import datetime
|
|
from typing import Optional, Union # For type hinting
|
|
|
|
def extract_date_from_path(file_path: str) -> Optional[datetime]:
|
|
"""
|
|
Extracts a datetime object from a file path.
|
|
It first tries to parse the date and time from the filename.
|
|
If unsuccessful, it falls back to the file's last modification timestamp.
|
|
|
|
Args:
|
|
file_path (str): The absolute or relative path to the file.
|
|
|
|
Returns:
|
|
Optional[datetime]: A datetime object if a date could be extracted,
|
|
otherwise None.
|
|
"""
|
|
filename = os.path.basename(file_path)
|
|
|
|
# Define regex patterns to try. Order might matter if filenames could match multiple.
|
|
# Pattern 1: (Optional_Prefix_)YYYYMMDD_HHMMSS(_Optional_Suffix)
|
|
# e.g., IMG_20210811_141036.jpg, 20210509_005303.jpg, IMG_20190723_211320_065.jpg
|
|
patterns = [
|
|
re.compile(r"(?:.*_)?(\d{4})(\d{2})(\d{2})_(\d{2})(\d{2})(\d{2})(?:_.*)?\..+"), # YYYYMMDD_HHMMSS
|
|
re.compile(r"(\d{4})(\d{2})(\d{2})_(\d{2})(\d{2})(\d{2})(?:_.*)?\..+"), # Strict YYYYMMDD_HHMMSS at start
|
|
re.compile(r"(\d{8})_(\d{6})(?:_.*)?\..+") # YYYYYMMDD_HHMMSS (compact)
|
|
]
|
|
|
|
for pattern in patterns:
|
|
match = pattern.search(filename)
|
|
if match:
|
|
groups = match.groups()
|
|
try:
|
|
if len(groups) == 6: # YYYY, MM, DD, HH, MM, SS
|
|
year, month, day, hour, minute, second = map(int, groups)
|
|
return datetime(year, month, day, hour, minute, second)
|
|
elif len(groups) == 2: # YYYYMMDD, HHMMSS
|
|
date_str, time_str = groups
|
|
return datetime.strptime(date_str + time_str, "%Y%m%d%H%M%S")
|
|
except ValueError as e:
|
|
print(f"Warning: Could not parse date from filename groups {groups} for {filename}: {e}")
|
|
# Continue to next pattern or fallback
|
|
pass
|
|
|
|
if os.path.exists(file_path):
|
|
try:
|
|
# Get the last modification time
|
|
mtime = os.path.getmtime(file_path)
|
|
return datetime.fromtimestamp(mtime)
|
|
except OSError as e:
|
|
print(f"Error: Could not get metadata for {file_path}: {e}")
|
|
return None
|
|
except Exception as e: # Catch any other unexpected error during metadata access
|
|
print(f"Unexpected error getting metadata for {file_path}: {e}")
|
|
return None
|
|
else:
|
|
print(f"Warning: File not found at '{file_path}'. Cannot get metadata.")
|
|
return None
|
|
|
|
return None # Should not be reached if os.path.exists is handled, but as a final fallback.
|