refactor: Replace subprocess with yt-dlp for subtitle download
This commit is contained in:
@@ -3,9 +3,9 @@ import argparse
|
|||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import signal
|
import signal
|
||||||
import subprocess
|
|
||||||
import sys
|
import sys
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
from yt_dlp import YoutubeDL
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Optional, Tuple
|
from typing import Optional, Tuple
|
||||||
|
|
||||||
@@ -56,33 +56,27 @@ def ensure_transcript_dir() -> Path:
|
|||||||
def download_subtitles(url: str, quiet: bool = False) -> Tuple[bool, Optional[Path]]:
|
def download_subtitles(url: str, quiet: bool = False) -> Tuple[bool, Optional[Path]]:
|
||||||
"""Download subtitles from YouTube using yt-dlp."""
|
"""Download subtitles from YouTube using yt-dlp."""
|
||||||
try:
|
try:
|
||||||
cmd = ["yt-dlp", "--skip-download", "--write-auto-sub", "--sub-lang", "en"]
|
ydl_opts = {
|
||||||
if quiet:
|
'skip_download': True,
|
||||||
cmd.append("--quiet")
|
'writeautomaticsub': True,
|
||||||
cmd.append(url)
|
'subtitleslangs': ['en'],
|
||||||
|
'quiet': quiet,
|
||||||
|
'no_warnings': quiet,
|
||||||
|
}
|
||||||
|
|
||||||
print(f"Debug: Running command: {cmd}") # Debug line
|
if not quiet:
|
||||||
|
print(f"Debug: Downloading subtitles for {url}")
|
||||||
|
|
||||||
process = subprocess.Popen(
|
with YoutubeDL(ydl_opts) as ydl:
|
||||||
cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True # This might help with encoding issues
|
ydl.download([url])
|
||||||
)
|
|
||||||
|
|
||||||
try:
|
|
||||||
stdout, stderr = process.communicate(timeout=30)
|
|
||||||
print(f"Debug: stdout: {stdout}") # Debug line
|
|
||||||
print(f"Debug: stderr: {stderr}") # Debug line
|
|
||||||
if process.returncode != 0:
|
|
||||||
print(f"Error downloading subtitles: {stderr}", file=sys.stderr)
|
|
||||||
return False, None
|
|
||||||
except subprocess.TimeoutExpired:
|
|
||||||
process.kill()
|
|
||||||
print("Download timed out after 30 seconds", file=sys.stderr)
|
|
||||||
return False, None
|
|
||||||
|
|
||||||
# Find the downloaded VTT file
|
# Find the downloaded VTT file
|
||||||
current_dir = Path(".")
|
current_dir = Path(".")
|
||||||
vtt_files = list(current_dir.glob("*.en.vtt"))
|
vtt_files = list(current_dir.glob("*.en.vtt"))
|
||||||
print(f"Debug: Found VTT files: {vtt_files}") # Debug line
|
|
||||||
|
if not quiet:
|
||||||
|
print(f"Debug: Found VTT files: {vtt_files}")
|
||||||
|
|
||||||
if not vtt_files:
|
if not vtt_files:
|
||||||
print("No VTT file found after download", file=sys.stderr)
|
print("No VTT file found after download", file=sys.stderr)
|
||||||
return False, None
|
return False, None
|
||||||
@@ -91,10 +85,10 @@ def download_subtitles(url: str, quiet: bool = False) -> Tuple[bool, Optional[Pa
|
|||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Error during subtitle download: {str(e)}", file=sys.stderr)
|
print(f"Error during subtitle download: {str(e)}", file=sys.stderr)
|
||||||
print(f"Debug: Full exception info: {type(e).__name__}: {str(e)}") # Debug line
|
if not quiet:
|
||||||
|
print(f"Debug: Full exception info: {type(e).__name__}: {str(e)}")
|
||||||
import traceback
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
traceback.print_exc() # This will print the full traceback
|
|
||||||
return False, None
|
return False, None
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user