Skip to content

Commit 07076bc

Browse files
feat(youtube): Implement batch source validation method and refactor transcript handling
Signed-off-by: David Anyatonwu <davidanyatonwu@gmail.com>
1 parent c7e687e commit 07076bc

1 file changed

Lines changed: 63 additions & 149 deletions

File tree

supadata/youtube.py

Lines changed: 63 additions & 149 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,55 @@ def __init__(self, request_handler: Callable[[str, str, Any], Dict[str, Any]]):
3333
self._transcript_instance = None
3434
self._batch_instance = None
3535

36+
def _validate_batch_sources(
37+
self,
38+
video_ids: Optional[List[str]] = None,
39+
playlist_id: Optional[str] = None,
40+
channel_id: Optional[str] = None,
41+
limit: Optional[int] = None,
42+
) -> Dict[str, Any]:
43+
"""Validate batch source parameters and construct the payload.
44+
45+
Args:
46+
video_ids: Array of YouTube video IDs or URLs.
47+
playlist_id: YouTube playlist URL or ID.
48+
channel_id: YouTube channel URL, handle or ID.
49+
limit: Maximum number of videos to process.
50+
51+
Returns:
52+
Dict containing the validated payload.
53+
54+
Raises:
55+
SupadataError: If validation fails.
56+
"""
57+
payload = {}
58+
if video_ids:
59+
payload["videoIds"] = video_ids
60+
if playlist_id:
61+
payload["playlistId"] = playlist_id
62+
if channel_id:
63+
payload["channelId"] = channel_id
64+
65+
if not payload:
66+
raise SupadataError(
67+
error="invalid-request",
68+
message="Missing source",
69+
details="One of video_ids, playlist_id, or channel_id must be provided.",
70+
)
71+
72+
if len(payload) > 1:
73+
raise SupadataError(
74+
error="invalid-request",
75+
message="Multiple sources",
76+
details="Only one of video_ids, playlist_id, or channel_id can be provided.",
77+
)
78+
79+
if limit is not None:
80+
self._validate_limit(limit)
81+
payload["limit"] = limit
82+
83+
return payload
84+
3685
def transcript(
3786
self, video_id: str, lang: str = None, text: bool = False
3887
) -> Transcript:
@@ -49,39 +98,10 @@ def transcript(
4998
Raises:
5099
SupadataError: If the API request fails
51100
"""
52-
params = {"videoId": video_id, "text": str(text).lower()}
53-
54-
if lang:
55-
params["lang"] = lang
56-
57-
response = self._request("GET", "/youtube/transcript", params=params)
58-
59-
# Convert chunks if present
60-
content = response.get("content")
61-
if not text:
62-
if isinstance(content, list):
63-
processed_content = []
64-
for chunk in content:
65-
chunk_obj = TranscriptChunk(
66-
text=chunk.get("text", ""),
67-
offset=chunk.get("offset", 0),
68-
duration=chunk.get("duration", 0),
69-
lang=chunk.get("lang", ""),
70-
)
71-
processed_content.append(chunk_obj)
72-
else:
73-
processed_content = []
74-
else:
75-
processed_content = content if isinstance(content, str) else ""
76-
77-
response["content"] = processed_content
78-
79-
if "lang" not in response:
80-
response["lang"] = ""
81-
if "available_langs" not in response:
82-
response["available_langs"] = []
83-
84-
return Transcript(**response)
101+
# Delegate to _Transcript.__call__
102+
if self._transcript_instance is None:
103+
self._transcript_instance = self._Transcript(self)
104+
return self._transcript_instance(video_id, lang, text)
85105

86106
def translate(
87107
self, video_id: str, lang: str, text: bool = False
@@ -99,37 +119,10 @@ def translate(
99119
Raises:
100120
SupadataError: If the API request fails
101121
"""
102-
response = self._request(
103-
"GET",
104-
"/youtube/transcript/translate",
105-
params={"videoId": video_id, "lang": lang, "text": str(text).lower()},
106-
)
107-
108-
# Convert chunks if present
109-
content = response.get("content")
110-
if not text:
111-
if isinstance(content, list):
112-
processed_content = []
113-
for chunk in content:
114-
chunk_obj = TranscriptChunk(
115-
text=chunk.get("text", ""),
116-
offset=chunk.get("offset", 0),
117-
duration=chunk.get("duration", 0),
118-
lang=chunk.get("lang", ""),
119-
)
120-
processed_content.append(chunk_obj)
121-
else:
122-
processed_content = []
123-
else:
124-
processed_content = content if isinstance(content, str) else ""
125-
126-
response["content"] = processed_content
127-
128-
# Add default value for missing lang field
129-
if "lang" not in response:
130-
response["lang"] = lang
131-
132-
return TranslatedTranscript(**response)
122+
# Delegate to _Transcript.translate
123+
if self._transcript_instance is None:
124+
self._transcript_instance = self._Transcript(self)
125+
return self._transcript_instance.translate(video_id, lang, text)
133126

134127
def video(self, id: str) -> YoutubeVideo:
135128
"""Get the video metadata for a YouTube video.
@@ -320,37 +313,11 @@ def batch(
320313
Raises:
321314
SupadataError: If the API request fails or input validation fails.
322315
"""
323-
payload = {}
324-
if video_ids:
325-
payload["videoIds"] = video_ids
326-
if playlist_id:
327-
payload["playlistId"] = playlist_id
328-
if channel_id:
329-
payload["channelId"] = channel_id
330-
331-
if not payload:
332-
raise SupadataError(
333-
error="invalid-request",
334-
message="Missing source",
335-
details="One of video_ids, playlist_id, or channel_id must be provided.",
336-
)
337-
338-
if len(payload) > 1:
339-
raise SupadataError(
340-
error="invalid-request",
341-
message="Multiple sources",
342-
details="Only one of video_ids, playlist_id, or channel_id can be provided.",
343-
)
344-
345-
if limit is not None:
346-
self._youtube._validate_limit(limit)
347-
payload["limit"] = limit
316+
payload = self._youtube._validate_batch_sources(video_ids, playlist_id, channel_id, limit)
348317
if lang:
349318
payload["lang"] = lang
350319

351-
response = self._youtube._request(
352-
"POST", "/youtube/transcript/batch", json=payload
353-
)
320+
response = self._youtube._request("POST", "/youtube/transcript/batch", json=payload)
354321
return BatchJob(**response)
355322

356323
class _Playlist:
@@ -500,35 +467,8 @@ def batch(
500467
Raises:
501468
SupadataError: If the API request fails or input validation fails.
502469
"""
503-
payload = {}
504-
if video_ids:
505-
payload["videoIds"] = video_ids
506-
if playlist_id:
507-
payload["playlistId"] = playlist_id
508-
if channel_id:
509-
payload["channelId"] = channel_id
510-
511-
if not payload:
512-
raise SupadataError(
513-
error="invalid-request",
514-
message="Missing source",
515-
details="One of video_ids, playlist_id, or channel_id must be provided.",
516-
)
517-
518-
if len(payload) > 1:
519-
raise SupadataError(
520-
error="invalid-request",
521-
message="Multiple sources",
522-
details="Only one of video_ids, playlist_id, or channel_id can be provided.",
523-
)
524-
525-
if limit is not None:
526-
self._youtube._validate_limit(limit)
527-
payload["limit"] = limit
528-
529-
response = self._youtube._request(
530-
"POST", "/youtube/video/batch", json=payload
531-
)
470+
payload = self._youtube._validate_batch_sources(video_ids, playlist_id, channel_id, limit)
471+
response = self._youtube._request("POST", "/youtube/video/batch", json=payload)
532472
return BatchJob(**response)
533473

534474
class _Transcript:
@@ -608,37 +548,11 @@ def batch(
608548
Raises:
609549
SupadataError: If the API request fails or input validation fails.
610550
"""
611-
payload = {}
612-
if video_ids:
613-
payload["videoIds"] = video_ids
614-
if playlist_id:
615-
payload["playlistId"] = playlist_id
616-
if channel_id:
617-
payload["channelId"] = channel_id
618-
619-
if not payload:
620-
raise SupadataError(
621-
error="invalid-request",
622-
message="Missing source",
623-
details="One of video_ids, playlist_id, or channel_id must be provided.",
624-
)
625-
626-
if len(payload) > 1:
627-
raise SupadataError(
628-
error="invalid-request",
629-
message="Multiple sources",
630-
details="Only one of video_ids, playlist_id, or channel_id can be provided.",
631-
)
632-
633-
if limit is not None:
634-
self._youtube._validate_limit(limit)
635-
payload["limit"] = limit
551+
payload = self._youtube._validate_batch_sources(video_ids, playlist_id, channel_id, limit)
636552
if lang:
637553
payload["lang"] = lang
638554

639-
response = self._youtube._request(
640-
"POST", "/youtube/transcript/batch", json=payload
641-
)
555+
response = self._youtube._request("POST", "/youtube/transcript/batch", json=payload)
642556
return BatchJob(**response)
643557

644558
def translate(self, video_id: str, lang: str, text: bool = False) -> TranslatedTranscript:

0 commit comments

Comments
 (0)