diff --git a/summarize_yt/cli.py b/summarize_yt/cli.py index b9382ec..84e9754 100644 --- a/summarize_yt/cli.py +++ b/summarize_yt/cli.py @@ -1,30 +1,47 @@ -# Standard library imports remain the same import argparse import os +from pathlib import Path import re +import signal import sys from datetime import datetime -from yt_dlp import YoutubeDL -from pathlib import Path -from typing import Optional, Tuple -# Third-party packages import anthropic +from yt_dlp import YoutubeDL -# Local modules from . import __version__ # Add cost estimation constants -CLAUDE_COST_PER_1K_INPUT = 0.015 # Cost per 1K tokens for input -CLAUDE_COST_PER_1K_OUTPUT = 0.075 # Cost per 1K tokens for output -ESTIMATED_TOKENS_PER_CHAR = 0.25 # Rough estimate of tokens per character -HAIKU_OUTPUT_TOKENS = 50 # Haiku summaries are very short +CLAUDE_COST_PER_1K_INPUT = 0.80 / 1000 +CLAUDE_COST_PER_1K_OUTPUT = 4 / 1000 +ESTIMATED_TOKENS_PER_CHAR = 0.25 -def estimate_api_cost(text: str) -> float: +def setup_terminal_control(): + """Set up terminal control at program start.""" + try: + # Put process in its own process group and take control of terminal + os.setpgrp() + + # Ignore terminal control signals + signal.signal(signal.SIGTTOU, signal.SIG_IGN) + signal.signal(signal.SIGTTIN, signal.SIG_IGN) + signal.signal(signal.SIGTSTP, signal.SIG_IGN) + + # Take control of terminal if we're running in one + if sys.stdin.isatty(): + import termios + + termios.tcsetpgrp(sys.stdin.fileno(), os.getpgrp()) # type: ignore + except Exception: + # If we can't get terminal control, just continue + pass + + +def estimate_api_cost(text: str, target_words: float) -> float: """Estimate the cost of sending text to Claude API.""" estimated_input_tokens = len(text) * ESTIMATED_TOKENS_PER_CHAR - estimated_output_tokens = HAIKU_OUTPUT_TOKENS + estimated_output_tokens = target_words input_cost = (estimated_input_tokens / 1000) * CLAUDE_COST_PER_1K_INPUT output_cost = (estimated_output_tokens / 1000) * CLAUDE_COST_PER_1K_OUTPUT @@ -47,7 +64,7 @@ def sanitize_filename(url: str) -> str: return f"{timestamp}_{video_id if video_id else 'video'}" -def ensure_yts_dirs() -> Tuple[Path, Path]: +def ensure_yts_dirs() -> tuple[Path, Path]: """Create and return paths to transcript and summary directories.""" base_dir = Path.home() / ".yts" transcript_dir = base_dir / "transcripts" @@ -57,23 +74,23 @@ def ensure_yts_dirs() -> Tuple[Path, Path]: return transcript_dir, summary_dir -def download_subtitles(url: str, quiet: bool = False) -> Tuple[bool, Optional[Path], int]: +def download_subtitles(url: str, quiet: bool = False) -> tuple[bool, Path | None, int]: """Download subtitles from YouTube using yt-dlp and return success, path, and duration in minutes.""" try: ydl_opts = { - 'skip_download': True, - 'writeautomaticsub': True, - 'subtitleslangs': ['en'], - 'quiet': quiet, - 'no_warnings': quiet, + "skip_download": True, + "writeautomaticsub": True, + "subtitleslangs": ["en"], + "quiet": quiet, + "no_warnings": quiet, } - + # First get video duration info_opts = dict(ydl_opts) - info_opts['extract_flat'] = True + info_opts["extract_flat"] = True with YoutubeDL(info_opts) as ydl: info = ydl.extract_info(url, download=False) - duration_mins = int(info.get('duration', 0) / 60) + duration_mins = int(info.get("duration", 0) / 60) # type: ignore if not quiet: print(f"Debug: Downloading subtitles for {url}") @@ -84,10 +101,10 @@ def download_subtitles(url: str, quiet: bool = False) -> Tuple[bool, Optional[Pa # Find the downloaded VTT file current_dir = Path(".") vtt_files = list(current_dir.glob("*.en.vtt")) - + if not quiet: print(f"Debug: Found VTT files: {vtt_files}") - + if not vtt_files: print("No VTT file found after download", file=sys.stderr) return False, None, 0 @@ -99,6 +116,7 @@ def download_subtitles(url: str, quiet: bool = False) -> Tuple[bool, Optional[Pa if not quiet: print(f"Debug: Full exception info: {type(e).__name__}: {str(e)}") import traceback + traceback.print_exc() return False, None, 0 @@ -151,7 +169,7 @@ def process_file(input_path: Path, output_path: Path | None = None, quiet: bool return "" -def save_transcript(text: str, url: str, prompt: str) -> Path: +def save_transcript(text: str, url: str) -> Path: """Save transcript with metadata to ~/.yts/transcripts/.""" transcript_dir, _ = ensure_yts_dirs() filename = sanitize_filename(url) + ".txt" @@ -160,7 +178,6 @@ def save_transcript(text: str, url: str, prompt: str) -> Path: metadata = f"""URL: {url} Script Version: {__version__} Timestamp: {datetime.now().isoformat()} -Claude Prompt: {prompt} --- """ @@ -170,7 +187,7 @@ Claude Prompt: {prompt} return filepath -def cleanup_files(vtt_path: Optional[Path]): +def cleanup_files(vtt_path: Path | None): """Remove downloaded files after processing.""" try: if vtt_path and vtt_path.exists(): @@ -185,7 +202,7 @@ def cleanup_files(vtt_path: Optional[Path]): print(f"Warning: Could not clean up temporary files: {str(e)}", file=sys.stderr) -def get_summary_from_claude(text: str, duration_mins: int, prompt: str = None) -> str: +def get_summary_from_claude(text: str, target_words: float) -> str: """Send text to Claude API for summarization.""" try: api_key = os.environ.get("ANTHROPIC_API_KEY") @@ -193,20 +210,17 @@ def get_summary_from_claude(text: str, duration_mins: int, prompt: str = None) - raise ValueError("ANTHROPIC_API_KEY environment variable not set") client = anthropic.Anthropic() - # Calculate target word count based on duration - target_words = max(500, (duration_mins // 10) * 500) - - if prompt is None: - prompt = "Please summarize this transcript as a haiku. A haiku is a three-line poem with 5 syllables in the first line, 7 syllables in the second line, and 5 syllables in the third line." - + + prompt = f"Please summarize this transcript in {target_words} or less." + message = client.messages.create( - model="claude-3-sonnet-20240229", + model="claude-3-5-haiku-latest", max_tokens=2048, # Increased for longer summaries temperature=0, - system="You are a helpful assistant that summarizes transcripts accurately and concisely.", + system="You are a helpful assistant that summarizes transcripts accurately.", messages=[{"role": "user", "content": f"{prompt}:\n\n{text}"}], ) - return message.content[0].text + return message.content[0].text # type: ignore except Exception as e: print(f"Error getting summary from Claude: {str(e)}", file=sys.stderr) @@ -214,16 +228,12 @@ def get_summary_from_claude(text: str, duration_mins: int, prompt: str = None) - def main(): + setup_terminal_control() parser = argparse.ArgumentParser(description="Download YouTube subtitles and get Claude summary") parser.add_argument("url", help="YouTube video URL", type=str) - parser.add_argument("-o", "--output", help="Output file for summary") parser.add_argument("-q", "--quiet", action="store_true", help="Suppress status messages") parser.add_argument("--keep-files", action="store_true", help="Don't delete downloaded VTT files") parser.add_argument("--transcript", action="store_true", help="Include full transcript in output") - parser.add_argument( - "--prompt", help="Custom prompt for Claude (default: auto-calculated based on video length)" - ) - parser.add_argument("-y", "--yes", action="store_true", help="Skip cost confirmation") args = parser.parse_args() @@ -234,36 +244,24 @@ def main(): if not success: cleanup_files(None) sys.exit(1) + target_words = max(500, (duration_mins // 10) * 500) # Process the VTT file - cleaned_text = process_file(vtt_path, None, args.quiet) + cleaned_text = process_file(vtt_path, None, args.quiet) # type: ignore # Save transcript - transcript_path = save_transcript(cleaned_text, args.url, args.prompt) + transcript_path = save_transcript(cleaned_text, args.url) print(f"\nTranscript saved to: {transcript_path}") # Estimate and display cost - estimated_cost = estimate_api_cost(cleaned_text) + estimated_cost = estimate_api_cost(cleaned_text, target_words) print(f"\nEstimated API cost: ${estimated_cost:.4f}") - if not args.yes: - try: - response = input("\nDo you want to proceed with getting the summary? (Y/n): ").strip().lower() - if response == 'n': - print("Operation cancelled by user.") - cleanup_files(vtt_path) - sys.exit(0) - except (EOFError, KeyboardInterrupt): - print("\nOperation cancelled by user.") - cleanup_files(vtt_path) - sys.exit(0) - # Get summary from Claude if not args.quiet: print("\nGetting summary from Claude...") - summary = get_summary_from_claude(cleaned_text, duration_mins, args.prompt) + summary = get_summary_from_claude(cleaned_text, target_words) - # Prepare output output = "" if args.transcript: output += "=== Full Transcript ===\n\n" @@ -275,23 +273,14 @@ def main(): _, summary_dir = ensure_yts_dirs() summary_filename = sanitize_filename(args.url) + "_summary.txt" summary_path = summary_dir / summary_filename - + with open(summary_path, "w", encoding="utf-8") as f: f.write(output) - print(f"\nSummary saved to: {summary_path}") - - # Print the summary to console print("\n=== Summary ===\n") print(summary) - print() # Extra newline for readability + print() + print(f"\nSummary saved to: {summary_path}") - # If output path specified, also save there - if args.output: - with open(args.output, "w", encoding="utf-8") as f: - f.write(output) - print(f"Summary also saved to: {args.output}") - - # Cleanup downloaded files unless --keep-files is specified if not args.keep_files: cleanup_files(vtt_path)