feat: Add video title and enhanced metadata to transcripts and summaries

This commit is contained in:
2025-01-03 14:54:47 +01:00
parent d3451c19e4
commit 4761a590ad

View File

@@ -74,8 +74,8 @@ def ensure_yts_dirs() -> tuple[Path, Path]:
return transcript_dir, summary_dir return transcript_dir, summary_dir
def download_subtitles(url: str, quiet: bool = False) -> tuple[bool, Path | None, int]: def download_subtitles(url: str, quiet: bool = False) -> tuple[bool, Path | None, int, str]:
"""Download subtitles from YouTube using yt-dlp and return success, path, and duration in minutes.""" """Download subtitles from YouTube using yt-dlp and return success, path, duration in minutes, and title."""
try: try:
ydl_opts = { ydl_opts = {
"skip_download": True, "skip_download": True,
@@ -85,12 +85,13 @@ def download_subtitles(url: str, quiet: bool = False) -> tuple[bool, Path | None
"no_warnings": quiet, "no_warnings": quiet,
} }
# First get video duration # First get video info
info_opts = dict(ydl_opts) info_opts = dict(ydl_opts)
info_opts["extract_flat"] = True info_opts["extract_flat"] = True
with YoutubeDL(info_opts) as ydl: with YoutubeDL(info_opts) as ydl:
info = ydl.extract_info(url, download=False) info = ydl.extract_info(url, download=False)
duration_mins = int(info.get("duration", 0) / 60) # type: ignore duration_mins = int(info.get("duration", 0) / 60) # type: ignore
title = info.get("title", "Unknown Title") # type: ignore
if not quiet: if not quiet:
print(f"Debug: Downloading subtitles for {url}") print(f"Debug: Downloading subtitles for {url}")
@@ -109,7 +110,7 @@ def download_subtitles(url: str, quiet: bool = False) -> tuple[bool, Path | None
print("No VTT file found after download", file=sys.stderr) print("No VTT file found after download", file=sys.stderr)
return False, None, 0 return False, None, 0
return True, vtt_files[0], duration_mins return True, vtt_files[0], duration_mins, title
except Exception as e: except Exception as e:
print(f"Error during subtitle download: {str(e)}", file=sys.stderr) print(f"Error during subtitle download: {str(e)}", file=sys.stderr)
@@ -169,15 +170,17 @@ def process_file(input_path: Path, output_path: Path | None = None, quiet: bool
return "" return ""
def save_transcript(text: str, url: str) -> Path: def save_transcript(text: str, url: str, title: str) -> Path:
"""Save transcript with metadata to ~/.yts/transcripts/.""" """Save transcript with metadata to ~/.yts/transcripts/."""
transcript_dir, _ = ensure_yts_dirs() transcript_dir, _ = ensure_yts_dirs()
filename = sanitize_filename(url) + ".txt" filename = sanitize_filename(url) + ".txt"
filepath = transcript_dir / filename filepath = transcript_dir / filename
metadata = f"""URL: {url} metadata = f"""Title: {title}
URL: {url}
Script Version: {__version__} Script Version: {__version__}
Timestamp: {datetime.now().isoformat()} Timestamp: {datetime.now().isoformat()}
Type: Transcript
--- ---
""" """
@@ -240,7 +243,7 @@ def main():
# Download subtitles # Download subtitles
if not args.quiet: if not args.quiet:
print("Downloading subtitles...") print("Downloading subtitles...")
success, vtt_path, duration_mins = download_subtitles(args.url, args.quiet) success, vtt_path, duration_mins, video_title = download_subtitles(args.url, args.quiet)
if not success: if not success:
cleanup_files(None) cleanup_files(None)
sys.exit(1) sys.exit(1)
@@ -250,7 +253,7 @@ def main():
cleaned_text = process_file(vtt_path, None, args.quiet) # type: ignore cleaned_text = process_file(vtt_path, None, args.quiet) # type: ignore
# Save transcript # Save transcript
transcript_path = save_transcript(cleaned_text, args.url) transcript_path = save_transcript(cleaned_text, args.url, video_title)
print(f"\nTranscript saved to: {transcript_path}") print(f"\nTranscript saved to: {transcript_path}")
# Estimate and display cost # Estimate and display cost
@@ -274,8 +277,16 @@ def main():
summary_filename = sanitize_filename(args.url) + "_summary.txt" summary_filename = sanitize_filename(args.url) + "_summary.txt"
summary_path = summary_dir / summary_filename summary_path = summary_dir / summary_filename
summary_metadata = f"""Title: {video_title}
URL: {args.url}
Script Version: {__version__}
Timestamp: {datetime.now().isoformat()}
Type: Summary
---
"""
with open(summary_path, "w", encoding="utf-8") as f: with open(summary_path, "w", encoding="utf-8") as f:
f.write(output) f.write(summary_metadata + output)
print("\n=== Summary ===\n") print("\n=== Summary ===\n")
print(summary) print(summary)
print() print()