Skip to content

Commit aae525e

Browse files
committed
feat(youtube): add support for new endpoints
Fixes #9 fix(project): fix the tests and fix imports on __init__.py fix(supadata): Fix small typos in docstrings fix(supadata): Fix small typo in docstring fix(tests) fix function called in test
1 parent 8d94a68 commit aae525e

5 files changed

Lines changed: 581 additions & 145 deletions

File tree

README.md

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,26 @@ try:
6565
print(f"Content: {page.content}")
6666
except SupadataError as e:
6767
print(f"Crawl job failed: {e}")
68+
69+
# Get Video Metadata
70+
video = supadata.youtube.video(id="VIDEO_ID")
71+
print(f"Video: {video}")
72+
73+
# Get Channel Metadata
74+
channel = supadata.youtube.channel(id="CHANNEL_ID")
75+
print(f"Channel: {channel}")
76+
77+
# Get a list of the channel video IDs
78+
channel_videos = supadata.youtube.channel_videos(id="CHANNEL_ID")
79+
print(f"Channel Video IDs: {channel_videos}")
80+
81+
# Get Playlist metadata
82+
playlist = supadata.youtube.playlist(id="PLAYLIST_ID")
83+
print(f"Playlist: {playlist}")
84+
85+
# Get a list of the playlist video IDs
86+
playlist_videos = supadata.youtube.playlist_videos(id="PLAYLIST_ID")
87+
print(f"Playlist Videos IDs: {playlist_videos}")
6888
```
6989

7090
## Error Handling

supadata/__init__.py

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,18 +3,23 @@
33
44
The official Python SDK for Supadata - scrape web and YouTube content with ease.
55
"""
6+
67
from importlib.metadata import version
8+
79
from supadata.client import Supadata
810
from supadata.errors import SupadataError
911
from supadata.types import (
10-
Transcript,
11-
TranslatedTranscript,
12-
TranscriptChunk,
13-
Scrape,
14-
Map,
1512
CrawlJob,
1613
CrawlPage,
17-
CrawlResponse
14+
CrawlResponse,
15+
Map,
16+
Scrape,
17+
Transcript,
18+
TranscriptChunk,
19+
TranslatedTranscript,
20+
YoutubeChannel,
21+
YoutubePlaylist,
22+
YoutubeVideo,
1823
)
1924

2025
__version__ = version("supadata")
@@ -28,5 +33,8 @@
2833
"SupadataError",
2934
"CrawlJob",
3035
"CrawlPage",
31-
"CrawlResponse"
32-
]
36+
"CrawlResponse",
37+
"YoutubeChannel",
38+
"YoutubePlaylist",
39+
"YoutubeVideo",
40+
]

supadata/types.py

Lines changed: 109 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,21 @@
11
"""Type definitions for Supadata API responses."""
22

3-
from typing import List, Optional, Union
43
from dataclasses import dataclass
4+
from datetime import datetime
5+
from typing import List, Optional, TypedDict, Union
56

67

78
@dataclass
89
class TranscriptChunk:
910
"""A chunk of a video transcript.
10-
11+
1112
Attributes:
1213
text: Transcript segment text
1314
offset: Start time in milliseconds
1415
duration: Duration in milliseconds
1516
lang: ISO 639-1 language code of chunk
1617
"""
18+
1719
text: str
1820
offset: int
1921
duration: int
@@ -23,12 +25,13 @@ class TranscriptChunk:
2325
@dataclass
2426
class Transcript:
2527
"""A complete video transcript.
26-
28+
2729
Attributes:
2830
content: List of transcript chunks or plain text when text=true
2931
lang: ISO 639-1 language code of transcript
3032
available_langs: List of available language codes
3133
"""
34+
3235
content: Union[List[TranscriptChunk], str]
3336
lang: str
3437
available_langs: List[str]
@@ -37,19 +40,20 @@ class Transcript:
3740
@dataclass
3841
class TranslatedTranscript:
3942
"""A translated video transcript.
40-
43+
4144
Attributes:
4245
content: List of transcript chunks or plain text when text=true
4346
lang: ISO 639-1 language code of translation
4447
"""
48+
4549
content: Union[List[TranscriptChunk], str]
4650
lang: str
4751

4852

4953
@dataclass
5054
class Scrape:
5155
"""Scraped web content.
52-
56+
5357
Attributes:
5458
url: The URL that was scraped
5559
content: The Markdown content extracted from the URL
@@ -59,6 +63,7 @@ class Scrape:
5963
count_characters: The number of characters in the content
6064
urls: List of URLs found on the webpage
6165
"""
66+
6267
url: str
6368
content: str
6469
name: str
@@ -71,22 +76,25 @@ class Scrape:
7176
@dataclass
7277
class Map:
7378
"""A site map containing URLs.
74-
79+
7580
Attributes:
7681
urls: List of URLs found on the webpage
7782
"""
83+
7884
urls: List[str]
7985

86+
8087
@dataclass
8188
class CrawlPage:
8289
"""A page from a crawl job.
83-
90+
8491
Attributes:
8592
url: The URL that was scraped
8693
content: The markdown content extracted from the URL
8794
name: The title of the webpage
8895
description: A description of the webpage
8996
"""
97+
9098
url: str
9199
content: str
92100
name: str
@@ -96,12 +104,13 @@ class CrawlPage:
96104
@dataclass
97105
class CrawlResponse:
98106
"""Response from a crawl job.
99-
107+
100108
Attributes:
101109
status: The status of the crawl job
102110
pages: List of crawled pages (only when completed)
103111
next: URL for the next page of results
104112
"""
113+
105114
status: str # 'scraping', 'completed', 'failed' or 'cancelled'
106115
pages: Optional[List[CrawlPage]] = None
107116
next: Optional[str] = None
@@ -110,9 +119,100 @@ class CrawlResponse:
110119
@dataclass
111120
class CrawlJob:
112121
"""A new crawl job.
113-
122+
114123
Attributes:
115124
job_id: The ID of the crawl job
116125
"""
126+
117127
job_id: str
118128

129+
130+
@dataclass
131+
class YoutubeChannelBaseDict(TypedDict):
132+
"""YouTube Channel dict
133+
134+
Attribute:
135+
id: The channel id
136+
name: The channel name
137+
138+
"""
139+
140+
id: str
141+
name: str
142+
143+
144+
@dataclass
145+
class YoutubeVideo:
146+
"""YouTube video details.
147+
148+
Attributes:
149+
id: YouTube video ID
150+
title: Video title
151+
description: Video description
152+
duration: Duration of video in seconds
153+
channel: A dict containing the channel ID and channel name
154+
thumbnail: The URL of the video thumbnail
155+
view_count: Number of views
156+
like_count: Number of likes
157+
tags: List of video tags
158+
"""
159+
160+
id: str
161+
title: str
162+
description: str
163+
duration: int
164+
channel: YoutubeChannelBaseDict
165+
tags: List[str]
166+
thumbnail: str
167+
uploaded_date: datetime
168+
view_count: int
169+
like_count: int
170+
transcript_languages: List[str]
171+
172+
173+
@dataclass
174+
class YoutubeChannel:
175+
"""YouTube Channel Details
176+
177+
Attributes:
178+
id: Channel ID
179+
name: Channel name
180+
handle: The YouTube Channel Handle
181+
description: Channel description
182+
subscriber_count: Number of subscribers
183+
video_count: Number of videos
184+
thumbnail: The URL of the channel Thumbnail
185+
banner: The URL of the Channel banner
186+
"""
187+
188+
id: str
189+
name: str
190+
handle: str
191+
description: str
192+
subscriber_count: int
193+
video_count: int
194+
thumbnail: str
195+
banner: str
196+
197+
198+
@dataclass
199+
class YoutubePlaylist:
200+
"""Youtube Playlist Details
201+
202+
Attributes:
203+
id: Playlist ID
204+
title: Playlist Title
205+
description: Playlist Description
206+
video_count: Number of videos in the playlist
207+
view_count: Number of views in the playlist
208+
last_updated: Playlist last update date
209+
channel: A dict containing the channel ID and channel name
210+
"""
211+
212+
id: str
213+
title: str
214+
video_count: int
215+
view_count: int
216+
last_updated: datetime
217+
channel: YoutubeChannelBaseDict
218+
description: Optional[str] = None

0 commit comments

Comments
 (0)