refactor: Simplify CLI, remove optional features, and streamline summary generation
This commit is contained in:
@@ -1,30 +1,47 @@
|
|||||||
# Standard library imports remain the same
|
|
||||||
import argparse
|
import argparse
|
||||||
import os
|
import os
|
||||||
|
from pathlib import Path
|
||||||
import re
|
import re
|
||||||
|
import signal
|
||||||
import sys
|
import sys
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from yt_dlp import YoutubeDL
|
|
||||||
from pathlib import Path
|
|
||||||
from typing import Optional, Tuple
|
|
||||||
|
|
||||||
# Third-party packages
|
|
||||||
import anthropic
|
import anthropic
|
||||||
|
from yt_dlp import YoutubeDL
|
||||||
|
|
||||||
# Local modules
|
|
||||||
from . import __version__
|
from . import __version__
|
||||||
|
|
||||||
# Add cost estimation constants
|
# Add cost estimation constants
|
||||||
CLAUDE_COST_PER_1K_INPUT = 0.015 # Cost per 1K tokens for input
|
CLAUDE_COST_PER_1K_INPUT = 0.80 / 1000
|
||||||
CLAUDE_COST_PER_1K_OUTPUT = 0.075 # Cost per 1K tokens for output
|
CLAUDE_COST_PER_1K_OUTPUT = 4 / 1000
|
||||||
ESTIMATED_TOKENS_PER_CHAR = 0.25 # Rough estimate of tokens per character
|
ESTIMATED_TOKENS_PER_CHAR = 0.25
|
||||||
HAIKU_OUTPUT_TOKENS = 50 # Haiku summaries are very short
|
|
||||||
|
|
||||||
|
|
||||||
def estimate_api_cost(text: str) -> float:
|
def setup_terminal_control():
|
||||||
|
"""Set up terminal control at program start."""
|
||||||
|
try:
|
||||||
|
# Put process in its own process group and take control of terminal
|
||||||
|
os.setpgrp()
|
||||||
|
|
||||||
|
# Ignore terminal control signals
|
||||||
|
signal.signal(signal.SIGTTOU, signal.SIG_IGN)
|
||||||
|
signal.signal(signal.SIGTTIN, signal.SIG_IGN)
|
||||||
|
signal.signal(signal.SIGTSTP, signal.SIG_IGN)
|
||||||
|
|
||||||
|
# Take control of terminal if we're running in one
|
||||||
|
if sys.stdin.isatty():
|
||||||
|
import termios
|
||||||
|
|
||||||
|
termios.tcsetpgrp(sys.stdin.fileno(), os.getpgrp()) # type: ignore
|
||||||
|
except Exception:
|
||||||
|
# If we can't get terminal control, just continue
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def estimate_api_cost(text: str, target_words: float) -> float:
|
||||||
"""Estimate the cost of sending text to Claude API."""
|
"""Estimate the cost of sending text to Claude API."""
|
||||||
estimated_input_tokens = len(text) * ESTIMATED_TOKENS_PER_CHAR
|
estimated_input_tokens = len(text) * ESTIMATED_TOKENS_PER_CHAR
|
||||||
estimated_output_tokens = HAIKU_OUTPUT_TOKENS
|
estimated_output_tokens = target_words
|
||||||
|
|
||||||
input_cost = (estimated_input_tokens / 1000) * CLAUDE_COST_PER_1K_INPUT
|
input_cost = (estimated_input_tokens / 1000) * CLAUDE_COST_PER_1K_INPUT
|
||||||
output_cost = (estimated_output_tokens / 1000) * CLAUDE_COST_PER_1K_OUTPUT
|
output_cost = (estimated_output_tokens / 1000) * CLAUDE_COST_PER_1K_OUTPUT
|
||||||
@@ -47,7 +64,7 @@ def sanitize_filename(url: str) -> str:
|
|||||||
return f"{timestamp}_{video_id if video_id else 'video'}"
|
return f"{timestamp}_{video_id if video_id else 'video'}"
|
||||||
|
|
||||||
|
|
||||||
def ensure_yts_dirs() -> Tuple[Path, Path]:
|
def ensure_yts_dirs() -> tuple[Path, Path]:
|
||||||
"""Create and return paths to transcript and summary directories."""
|
"""Create and return paths to transcript and summary directories."""
|
||||||
base_dir = Path.home() / ".yts"
|
base_dir = Path.home() / ".yts"
|
||||||
transcript_dir = base_dir / "transcripts"
|
transcript_dir = base_dir / "transcripts"
|
||||||
@@ -57,23 +74,23 @@ def ensure_yts_dirs() -> Tuple[Path, Path]:
|
|||||||
return transcript_dir, summary_dir
|
return transcript_dir, summary_dir
|
||||||
|
|
||||||
|
|
||||||
def download_subtitles(url: str, quiet: bool = False) -> Tuple[bool, Optional[Path], int]:
|
def download_subtitles(url: str, quiet: bool = False) -> tuple[bool, Path | None, int]:
|
||||||
"""Download subtitles from YouTube using yt-dlp and return success, path, and duration in minutes."""
|
"""Download subtitles from YouTube using yt-dlp and return success, path, and duration in minutes."""
|
||||||
try:
|
try:
|
||||||
ydl_opts = {
|
ydl_opts = {
|
||||||
'skip_download': True,
|
"skip_download": True,
|
||||||
'writeautomaticsub': True,
|
"writeautomaticsub": True,
|
||||||
'subtitleslangs': ['en'],
|
"subtitleslangs": ["en"],
|
||||||
'quiet': quiet,
|
"quiet": quiet,
|
||||||
'no_warnings': quiet,
|
"no_warnings": quiet,
|
||||||
}
|
}
|
||||||
|
|
||||||
# First get video duration
|
# First get video duration
|
||||||
info_opts = dict(ydl_opts)
|
info_opts = dict(ydl_opts)
|
||||||
info_opts['extract_flat'] = True
|
info_opts["extract_flat"] = True
|
||||||
with YoutubeDL(info_opts) as ydl:
|
with YoutubeDL(info_opts) as ydl:
|
||||||
info = ydl.extract_info(url, download=False)
|
info = ydl.extract_info(url, download=False)
|
||||||
duration_mins = int(info.get('duration', 0) / 60)
|
duration_mins = int(info.get("duration", 0) / 60) # type: ignore
|
||||||
|
|
||||||
if not quiet:
|
if not quiet:
|
||||||
print(f"Debug: Downloading subtitles for {url}")
|
print(f"Debug: Downloading subtitles for {url}")
|
||||||
@@ -99,6 +116,7 @@ def download_subtitles(url: str, quiet: bool = False) -> Tuple[bool, Optional[Pa
|
|||||||
if not quiet:
|
if not quiet:
|
||||||
print(f"Debug: Full exception info: {type(e).__name__}: {str(e)}")
|
print(f"Debug: Full exception info: {type(e).__name__}: {str(e)}")
|
||||||
import traceback
|
import traceback
|
||||||
|
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
return False, None, 0
|
return False, None, 0
|
||||||
|
|
||||||
@@ -151,7 +169,7 @@ def process_file(input_path: Path, output_path: Path | None = None, quiet: bool
|
|||||||
return ""
|
return ""
|
||||||
|
|
||||||
|
|
||||||
def save_transcript(text: str, url: str, prompt: str) -> Path:
|
def save_transcript(text: str, url: str) -> Path:
|
||||||
"""Save transcript with metadata to ~/.yts/transcripts/."""
|
"""Save transcript with metadata to ~/.yts/transcripts/."""
|
||||||
transcript_dir, _ = ensure_yts_dirs()
|
transcript_dir, _ = ensure_yts_dirs()
|
||||||
filename = sanitize_filename(url) + ".txt"
|
filename = sanitize_filename(url) + ".txt"
|
||||||
@@ -160,7 +178,6 @@ def save_transcript(text: str, url: str, prompt: str) -> Path:
|
|||||||
metadata = f"""URL: {url}
|
metadata = f"""URL: {url}
|
||||||
Script Version: {__version__}
|
Script Version: {__version__}
|
||||||
Timestamp: {datetime.now().isoformat()}
|
Timestamp: {datetime.now().isoformat()}
|
||||||
Claude Prompt: {prompt}
|
|
||||||
---
|
---
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@@ -170,7 +187,7 @@ Claude Prompt: {prompt}
|
|||||||
return filepath
|
return filepath
|
||||||
|
|
||||||
|
|
||||||
def cleanup_files(vtt_path: Optional[Path]):
|
def cleanup_files(vtt_path: Path | None):
|
||||||
"""Remove downloaded files after processing."""
|
"""Remove downloaded files after processing."""
|
||||||
try:
|
try:
|
||||||
if vtt_path and vtt_path.exists():
|
if vtt_path and vtt_path.exists():
|
||||||
@@ -185,7 +202,7 @@ def cleanup_files(vtt_path: Optional[Path]):
|
|||||||
print(f"Warning: Could not clean up temporary files: {str(e)}", file=sys.stderr)
|
print(f"Warning: Could not clean up temporary files: {str(e)}", file=sys.stderr)
|
||||||
|
|
||||||
|
|
||||||
def get_summary_from_claude(text: str, duration_mins: int, prompt: str = None) -> str:
|
def get_summary_from_claude(text: str, target_words: float) -> str:
|
||||||
"""Send text to Claude API for summarization."""
|
"""Send text to Claude API for summarization."""
|
||||||
try:
|
try:
|
||||||
api_key = os.environ.get("ANTHROPIC_API_KEY")
|
api_key = os.environ.get("ANTHROPIC_API_KEY")
|
||||||
@@ -193,20 +210,17 @@ def get_summary_from_claude(text: str, duration_mins: int, prompt: str = None) -
|
|||||||
raise ValueError("ANTHROPIC_API_KEY environment variable not set")
|
raise ValueError("ANTHROPIC_API_KEY environment variable not set")
|
||||||
|
|
||||||
client = anthropic.Anthropic()
|
client = anthropic.Anthropic()
|
||||||
# Calculate target word count based on duration
|
|
||||||
target_words = max(500, (duration_mins // 10) * 500)
|
|
||||||
|
|
||||||
if prompt is None:
|
prompt = f"Please summarize this transcript in {target_words} or less."
|
||||||
prompt = "Please summarize this transcript as a haiku. A haiku is a three-line poem with 5 syllables in the first line, 7 syllables in the second line, and 5 syllables in the third line."
|
|
||||||
|
|
||||||
message = client.messages.create(
|
message = client.messages.create(
|
||||||
model="claude-3-sonnet-20240229",
|
model="claude-3-5-haiku-latest",
|
||||||
max_tokens=2048, # Increased for longer summaries
|
max_tokens=2048, # Increased for longer summaries
|
||||||
temperature=0,
|
temperature=0,
|
||||||
system="You are a helpful assistant that summarizes transcripts accurately and concisely.",
|
system="You are a helpful assistant that summarizes transcripts accurately.",
|
||||||
messages=[{"role": "user", "content": f"{prompt}:\n\n{text}"}],
|
messages=[{"role": "user", "content": f"{prompt}:\n\n{text}"}],
|
||||||
)
|
)
|
||||||
return message.content[0].text
|
return message.content[0].text # type: ignore
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Error getting summary from Claude: {str(e)}", file=sys.stderr)
|
print(f"Error getting summary from Claude: {str(e)}", file=sys.stderr)
|
||||||
@@ -214,16 +228,12 @@ def get_summary_from_claude(text: str, duration_mins: int, prompt: str = None) -
|
|||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
setup_terminal_control()
|
||||||
parser = argparse.ArgumentParser(description="Download YouTube subtitles and get Claude summary")
|
parser = argparse.ArgumentParser(description="Download YouTube subtitles and get Claude summary")
|
||||||
parser.add_argument("url", help="YouTube video URL", type=str)
|
parser.add_argument("url", help="YouTube video URL", type=str)
|
||||||
parser.add_argument("-o", "--output", help="Output file for summary")
|
|
||||||
parser.add_argument("-q", "--quiet", action="store_true", help="Suppress status messages")
|
parser.add_argument("-q", "--quiet", action="store_true", help="Suppress status messages")
|
||||||
parser.add_argument("--keep-files", action="store_true", help="Don't delete downloaded VTT files")
|
parser.add_argument("--keep-files", action="store_true", help="Don't delete downloaded VTT files")
|
||||||
parser.add_argument("--transcript", action="store_true", help="Include full transcript in output")
|
parser.add_argument("--transcript", action="store_true", help="Include full transcript in output")
|
||||||
parser.add_argument(
|
|
||||||
"--prompt", help="Custom prompt for Claude (default: auto-calculated based on video length)"
|
|
||||||
)
|
|
||||||
parser.add_argument("-y", "--yes", action="store_true", help="Skip cost confirmation")
|
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
@@ -234,36 +244,24 @@ def main():
|
|||||||
if not success:
|
if not success:
|
||||||
cleanup_files(None)
|
cleanup_files(None)
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
target_words = max(500, (duration_mins // 10) * 500)
|
||||||
|
|
||||||
# Process the VTT file
|
# Process the VTT file
|
||||||
cleaned_text = process_file(vtt_path, None, args.quiet)
|
cleaned_text = process_file(vtt_path, None, args.quiet) # type: ignore
|
||||||
|
|
||||||
# Save transcript
|
# Save transcript
|
||||||
transcript_path = save_transcript(cleaned_text, args.url, args.prompt)
|
transcript_path = save_transcript(cleaned_text, args.url)
|
||||||
print(f"\nTranscript saved to: {transcript_path}")
|
print(f"\nTranscript saved to: {transcript_path}")
|
||||||
|
|
||||||
# Estimate and display cost
|
# Estimate and display cost
|
||||||
estimated_cost = estimate_api_cost(cleaned_text)
|
estimated_cost = estimate_api_cost(cleaned_text, target_words)
|
||||||
print(f"\nEstimated API cost: ${estimated_cost:.4f}")
|
print(f"\nEstimated API cost: ${estimated_cost:.4f}")
|
||||||
|
|
||||||
if not args.yes:
|
|
||||||
try:
|
|
||||||
response = input("\nDo you want to proceed with getting the summary? (Y/n): ").strip().lower()
|
|
||||||
if response == 'n':
|
|
||||||
print("Operation cancelled by user.")
|
|
||||||
cleanup_files(vtt_path)
|
|
||||||
sys.exit(0)
|
|
||||||
except (EOFError, KeyboardInterrupt):
|
|
||||||
print("\nOperation cancelled by user.")
|
|
||||||
cleanup_files(vtt_path)
|
|
||||||
sys.exit(0)
|
|
||||||
|
|
||||||
# Get summary from Claude
|
# Get summary from Claude
|
||||||
if not args.quiet:
|
if not args.quiet:
|
||||||
print("\nGetting summary from Claude...")
|
print("\nGetting summary from Claude...")
|
||||||
summary = get_summary_from_claude(cleaned_text, duration_mins, args.prompt)
|
summary = get_summary_from_claude(cleaned_text, target_words)
|
||||||
|
|
||||||
# Prepare output
|
|
||||||
output = ""
|
output = ""
|
||||||
if args.transcript:
|
if args.transcript:
|
||||||
output += "=== Full Transcript ===\n\n"
|
output += "=== Full Transcript ===\n\n"
|
||||||
@@ -278,20 +276,11 @@ def main():
|
|||||||
|
|
||||||
with open(summary_path, "w", encoding="utf-8") as f:
|
with open(summary_path, "w", encoding="utf-8") as f:
|
||||||
f.write(output)
|
f.write(output)
|
||||||
print(f"\nSummary saved to: {summary_path}")
|
|
||||||
|
|
||||||
# Print the summary to console
|
|
||||||
print("\n=== Summary ===\n")
|
print("\n=== Summary ===\n")
|
||||||
print(summary)
|
print(summary)
|
||||||
print() # Extra newline for readability
|
print()
|
||||||
|
print(f"\nSummary saved to: {summary_path}")
|
||||||
|
|
||||||
# If output path specified, also save there
|
|
||||||
if args.output:
|
|
||||||
with open(args.output, "w", encoding="utf-8") as f:
|
|
||||||
f.write(output)
|
|
||||||
print(f"Summary also saved to: {args.output}")
|
|
||||||
|
|
||||||
# Cleanup downloaded files unless --keep-files is specified
|
|
||||||
if not args.keep_files:
|
if not args.keep_files:
|
||||||
cleanup_files(vtt_path)
|
cleanup_files(vtt_path)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user