From 4761a590adfc8f76db59e2dd85f5530521c28b49 Mon Sep 17 00:00:00 2001 From: "Zev Averbach (aider)" Date: Fri, 3 Jan 2025 14:54:47 +0100 Subject: [PATCH] feat: Add video title and enhanced metadata to transcripts and summaries --- summarize_yt/cli.py | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/summarize_yt/cli.py b/summarize_yt/cli.py index 84e9754..6c9fe7d 100644 --- a/summarize_yt/cli.py +++ b/summarize_yt/cli.py @@ -74,8 +74,8 @@ def ensure_yts_dirs() -> tuple[Path, Path]: return transcript_dir, summary_dir -def download_subtitles(url: str, quiet: bool = False) -> tuple[bool, Path | None, int]: - """Download subtitles from YouTube using yt-dlp and return success, path, and duration in minutes.""" +def download_subtitles(url: str, quiet: bool = False) -> tuple[bool, Path | None, int, str]: + """Download subtitles from YouTube using yt-dlp and return success, path, duration in minutes, and title.""" try: ydl_opts = { "skip_download": True, @@ -85,12 +85,13 @@ def download_subtitles(url: str, quiet: bool = False) -> tuple[bool, Path | None "no_warnings": quiet, } - # First get video duration + # First get video info info_opts = dict(ydl_opts) info_opts["extract_flat"] = True with YoutubeDL(info_opts) as ydl: info = ydl.extract_info(url, download=False) duration_mins = int(info.get("duration", 0) / 60) # type: ignore + title = info.get("title", "Unknown Title") # type: ignore if not quiet: print(f"Debug: Downloading subtitles for {url}") @@ -109,7 +110,7 @@ def download_subtitles(url: str, quiet: bool = False) -> tuple[bool, Path | None print("No VTT file found after download", file=sys.stderr) return False, None, 0 - return True, vtt_files[0], duration_mins + return True, vtt_files[0], duration_mins, title except Exception as e: print(f"Error during subtitle download: {str(e)}", file=sys.stderr) @@ -169,15 +170,17 @@ def process_file(input_path: Path, output_path: Path | None = None, quiet: bool return "" -def save_transcript(text: str, url: str) -> Path: +def save_transcript(text: str, url: str, title: str) -> Path: """Save transcript with metadata to ~/.yts/transcripts/.""" transcript_dir, _ = ensure_yts_dirs() filename = sanitize_filename(url) + ".txt" filepath = transcript_dir / filename - metadata = f"""URL: {url} + metadata = f"""Title: {title} +URL: {url} Script Version: {__version__} Timestamp: {datetime.now().isoformat()} +Type: Transcript --- """ @@ -240,7 +243,7 @@ def main(): # Download subtitles if not args.quiet: print("Downloading subtitles...") - success, vtt_path, duration_mins = download_subtitles(args.url, args.quiet) + success, vtt_path, duration_mins, video_title = download_subtitles(args.url, args.quiet) if not success: cleanup_files(None) sys.exit(1) @@ -250,7 +253,7 @@ def main(): cleaned_text = process_file(vtt_path, None, args.quiet) # type: ignore # Save transcript - transcript_path = save_transcript(cleaned_text, args.url) + transcript_path = save_transcript(cleaned_text, args.url, video_title) print(f"\nTranscript saved to: {transcript_path}") # Estimate and display cost @@ -274,8 +277,16 @@ def main(): summary_filename = sanitize_filename(args.url) + "_summary.txt" summary_path = summary_dir / summary_filename + summary_metadata = f"""Title: {video_title} +URL: {args.url} +Script Version: {__version__} +Timestamp: {datetime.now().isoformat()} +Type: Summary +--- + +""" with open(summary_path, "w", encoding="utf-8") as f: - f.write(output) + f.write(summary_metadata + output) print("\n=== Summary ===\n") print(summary) print()