refactor: Replace subprocess with yt-dlp for subtitle download
This commit is contained in:
@@ -3,9 +3,9 @@ import argparse
|
||||
import os
|
||||
import re
|
||||
import signal
|
||||
import subprocess
|
||||
import sys
|
||||
from datetime import datetime
|
||||
from yt_dlp import YoutubeDL
|
||||
from pathlib import Path
|
||||
from typing import Optional, Tuple
|
||||
|
||||
@@ -56,33 +56,27 @@ def ensure_transcript_dir() -> Path:
|
||||
def download_subtitles(url: str, quiet: bool = False) -> Tuple[bool, Optional[Path]]:
|
||||
"""Download subtitles from YouTube using yt-dlp."""
|
||||
try:
|
||||
cmd = ["yt-dlp", "--skip-download", "--write-auto-sub", "--sub-lang", "en"]
|
||||
if quiet:
|
||||
cmd.append("--quiet")
|
||||
cmd.append(url)
|
||||
ydl_opts = {
|
||||
'skip_download': True,
|
||||
'writeautomaticsub': True,
|
||||
'subtitleslangs': ['en'],
|
||||
'quiet': quiet,
|
||||
'no_warnings': quiet,
|
||||
}
|
||||
|
||||
print(f"Debug: Running command: {cmd}") # Debug line
|
||||
if not quiet:
|
||||
print(f"Debug: Downloading subtitles for {url}")
|
||||
|
||||
process = subprocess.Popen(
|
||||
cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True # This might help with encoding issues
|
||||
)
|
||||
|
||||
try:
|
||||
stdout, stderr = process.communicate(timeout=30)
|
||||
print(f"Debug: stdout: {stdout}") # Debug line
|
||||
print(f"Debug: stderr: {stderr}") # Debug line
|
||||
if process.returncode != 0:
|
||||
print(f"Error downloading subtitles: {stderr}", file=sys.stderr)
|
||||
return False, None
|
||||
except subprocess.TimeoutExpired:
|
||||
process.kill()
|
||||
print("Download timed out after 30 seconds", file=sys.stderr)
|
||||
return False, None
|
||||
with YoutubeDL(ydl_opts) as ydl:
|
||||
ydl.download([url])
|
||||
|
||||
# Find the downloaded VTT file
|
||||
current_dir = Path(".")
|
||||
vtt_files = list(current_dir.glob("*.en.vtt"))
|
||||
print(f"Debug: Found VTT files: {vtt_files}") # Debug line
|
||||
|
||||
if not quiet:
|
||||
print(f"Debug: Found VTT files: {vtt_files}")
|
||||
|
||||
if not vtt_files:
|
||||
print("No VTT file found after download", file=sys.stderr)
|
||||
return False, None
|
||||
@@ -91,10 +85,10 @@ def download_subtitles(url: str, quiet: bool = False) -> Tuple[bool, Optional[Pa
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error during subtitle download: {str(e)}", file=sys.stderr)
|
||||
print(f"Debug: Full exception info: {type(e).__name__}: {str(e)}") # Debug line
|
||||
import traceback
|
||||
|
||||
traceback.print_exc() # This will print the full traceback
|
||||
if not quiet:
|
||||
print(f"Debug: Full exception info: {type(e).__name__}: {str(e)}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
return False, None
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user