mirror of
https://github.com/yt-dlp/yt-dlp
synced 2025-04-04 14:10:17 -05:00
parent
05c8023a27
commit
4432a9390c
50
yt_dlp/extractor/youtube/__init__.py
Normal file
50
yt_dlp/extractor/youtube/__init__.py
Normal file
@ -0,0 +1,50 @@
|
||||
# flake8: noqa: F401
|
||||
from ._base import YoutubeBaseInfoExtractor
|
||||
from ._clip import YoutubeClipIE
|
||||
from ._mistakes import YoutubeTruncatedIDIE, YoutubeTruncatedURLIE
|
||||
from ._notifications import YoutubeNotificationsIE
|
||||
from ._redirect import (
|
||||
YoutubeConsentRedirectIE,
|
||||
YoutubeFavouritesIE,
|
||||
YoutubeFeedsInfoExtractor,
|
||||
YoutubeHistoryIE,
|
||||
YoutubeLivestreamEmbedIE,
|
||||
YoutubeRecommendedIE,
|
||||
YoutubeShortsAudioPivotIE,
|
||||
YoutubeSubscriptionsIE,
|
||||
YoutubeWatchLaterIE,
|
||||
YoutubeYtBeIE,
|
||||
YoutubeYtUserIE,
|
||||
)
|
||||
from ._search import YoutubeMusicSearchURLIE, YoutubeSearchDateIE, YoutubeSearchIE, YoutubeSearchURLIE
|
||||
from ._tab import YoutubePlaylistIE, YoutubeTabBaseInfoExtractor, YoutubeTabIE
|
||||
from ._video import YoutubeIE
|
||||
|
||||
# Hack to allow plugin overrides work
|
||||
for _cls in [
|
||||
YoutubeBaseInfoExtractor,
|
||||
YoutubeClipIE,
|
||||
YoutubeTruncatedIDIE,
|
||||
YoutubeTruncatedURLIE,
|
||||
YoutubeNotificationsIE,
|
||||
YoutubeConsentRedirectIE,
|
||||
YoutubeFavouritesIE,
|
||||
YoutubeFeedsInfoExtractor,
|
||||
YoutubeHistoryIE,
|
||||
YoutubeLivestreamEmbedIE,
|
||||
YoutubeRecommendedIE,
|
||||
YoutubeShortsAudioPivotIE,
|
||||
YoutubeSubscriptionsIE,
|
||||
YoutubeWatchLaterIE,
|
||||
YoutubeYtBeIE,
|
||||
YoutubeYtUserIE,
|
||||
YoutubeMusicSearchURLIE,
|
||||
YoutubeSearchDateIE,
|
||||
YoutubeSearchIE,
|
||||
YoutubeSearchURLIE,
|
||||
YoutubePlaylistIE,
|
||||
YoutubeTabBaseInfoExtractor,
|
||||
YoutubeTabIE,
|
||||
YoutubeIE,
|
||||
]:
|
||||
_cls.__module__ = 'yt_dlp.extractor.youtube'
|
1145
yt_dlp/extractor/youtube/_base.py
Normal file
1145
yt_dlp/extractor/youtube/_base.py
Normal file
File diff suppressed because it is too large
Load Diff
66
yt_dlp/extractor/youtube/_clip.py
Normal file
66
yt_dlp/extractor/youtube/_clip.py
Normal file
@ -0,0 +1,66 @@
|
||||
from ._tab import YoutubeTabBaseInfoExtractor
|
||||
from ._video import YoutubeIE
|
||||
from ...utils import ExtractorError, traverse_obj
|
||||
|
||||
|
||||
class YoutubeClipIE(YoutubeTabBaseInfoExtractor):
|
||||
IE_NAME = 'youtube:clip'
|
||||
_VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
# FIXME: Other metadata should be extracted from the clip, not from the base video
|
||||
'url': 'https://www.youtube.com/clip/UgytZKpehg-hEMBSn3F4AaABCQ',
|
||||
'info_dict': {
|
||||
'id': 'UgytZKpehg-hEMBSn3F4AaABCQ',
|
||||
'ext': 'mp4',
|
||||
'section_start': 29.0,
|
||||
'section_end': 39.7,
|
||||
'duration': 10.7,
|
||||
'age_limit': 0,
|
||||
'availability': 'public',
|
||||
'categories': ['Gaming'],
|
||||
'channel': 'Scott The Woz',
|
||||
'channel_id': 'UC4rqhyiTs7XyuODcECvuiiQ',
|
||||
'channel_url': 'https://www.youtube.com/channel/UC4rqhyiTs7XyuODcECvuiiQ',
|
||||
'description': 'md5:7a4517a17ea9b4bd98996399d8bb36e7',
|
||||
'like_count': int,
|
||||
'playable_in_embed': True,
|
||||
'tags': 'count:17',
|
||||
'thumbnail': 'https://i.ytimg.com/vi_webp/ScPX26pdQik/maxresdefault.webp',
|
||||
'title': 'Mobile Games on Console - Scott The Woz',
|
||||
'upload_date': '20210920',
|
||||
'uploader': 'Scott The Woz',
|
||||
'uploader_id': '@ScottTheWoz',
|
||||
'uploader_url': 'https://www.youtube.com/@ScottTheWoz',
|
||||
'view_count': int,
|
||||
'live_status': 'not_live',
|
||||
'channel_follower_count': int,
|
||||
'chapters': 'count:20',
|
||||
'comment_count': int,
|
||||
'heatmap': 'count:100',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
clip_id = self._match_id(url)
|
||||
_, data = self._extract_webpage(url, clip_id)
|
||||
|
||||
video_id = traverse_obj(data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'))
|
||||
if not video_id:
|
||||
raise ExtractorError('Unable to find video ID')
|
||||
|
||||
clip_data = traverse_obj(data, (
|
||||
'engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'clipSectionRenderer',
|
||||
'contents', ..., 'clipAttributionRenderer', 'onScrubExit', 'commandExecutorCommand', 'commands', ...,
|
||||
'openPopupAction', 'popup', 'notificationActionRenderer', 'actionButton', 'buttonRenderer', 'command',
|
||||
'commandExecutorCommand', 'commands', ..., 'loopCommand'), get_all=False)
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': f'https://www.youtube.com/watch?v={video_id}',
|
||||
'ie_key': YoutubeIE.ie_key(),
|
||||
'id': clip_id,
|
||||
'section_start': int(clip_data['startTimeMs']) / 1000,
|
||||
'section_end': int(clip_data['endTimeMs']) / 1000,
|
||||
'_format_sort_fields': ( # https protocol is prioritized for ffmpeg compatibility
|
||||
'proto:https', 'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec', 'channels', 'acodec', 'lang'),
|
||||
}
|
69
yt_dlp/extractor/youtube/_mistakes.py
Normal file
69
yt_dlp/extractor/youtube/_mistakes.py
Normal file
@ -0,0 +1,69 @@
|
||||
|
||||
from ._base import YoutubeBaseInfoExtractor
|
||||
from ...utils import ExtractorError
|
||||
|
||||
|
||||
class YoutubeTruncatedURLIE(YoutubeBaseInfoExtractor):
|
||||
IE_NAME = 'youtube:truncated_url'
|
||||
IE_DESC = False # Do not list
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:https?://)?
|
||||
(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
|
||||
(?:watch\?(?:
|
||||
feature=[a-z_]+|
|
||||
annotation_id=annotation_[^&]+|
|
||||
x-yt-cl=[0-9]+|
|
||||
hl=[^&]*|
|
||||
t=[0-9]+
|
||||
)?
|
||||
|
|
||||
attribution_link\?a=[^&]+
|
||||
)
|
||||
$
|
||||
'''
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.youtube.com/watch?',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.youtube.com/watch?feature=foo',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.youtube.com/watch?hl=en-GB',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.youtube.com/watch?t=2372',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
raise ExtractorError(
|
||||
'Did you forget to quote the URL? Remember that & is a meta '
|
||||
'character in most shells, so you want to put the URL in quotes, '
|
||||
'like yt-dlp '
|
||||
'"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
|
||||
' or simply yt-dlp BaW_jenozKc .',
|
||||
expected=True)
|
||||
|
||||
|
||||
class YoutubeTruncatedIDIE(YoutubeBaseInfoExtractor):
|
||||
IE_NAME = 'youtube:truncated_id'
|
||||
IE_DESC = False # Do not list
|
||||
_VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
raise ExtractorError(
|
||||
f'Incomplete YouTube ID {video_id}. URL {url} looks truncated.',
|
||||
expected=True)
|
98
yt_dlp/extractor/youtube/_notifications.py
Normal file
98
yt_dlp/extractor/youtube/_notifications.py
Normal file
@ -0,0 +1,98 @@
|
||||
import itertools
|
||||
import re
|
||||
|
||||
from ._tab import YoutubeTabBaseInfoExtractor, YoutubeTabIE
|
||||
from ._video import YoutubeIE
|
||||
from ...utils import traverse_obj
|
||||
|
||||
|
||||
class YoutubeNotificationsIE(YoutubeTabBaseInfoExtractor):
|
||||
IE_NAME = 'youtube:notif'
|
||||
IE_DESC = 'YouTube notifications; ":ytnotif" keyword (requires cookies)'
|
||||
_VALID_URL = r':ytnotif(?:ication)?s?'
|
||||
_LOGIN_REQUIRED = True
|
||||
_TESTS = [{
|
||||
'url': ':ytnotif',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': ':ytnotifications',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _extract_notification_menu(self, response, continuation_list):
|
||||
notification_list = traverse_obj(
|
||||
response,
|
||||
('actions', 0, 'openPopupAction', 'popup', 'multiPageMenuRenderer', 'sections', 0, 'multiPageMenuNotificationSectionRenderer', 'items'),
|
||||
('actions', 0, 'appendContinuationItemsAction', 'continuationItems'),
|
||||
expected_type=list) or []
|
||||
continuation_list[0] = None
|
||||
for item in notification_list:
|
||||
entry = self._extract_notification_renderer(item.get('notificationRenderer'))
|
||||
if entry:
|
||||
yield entry
|
||||
continuation = item.get('continuationItemRenderer')
|
||||
if continuation:
|
||||
continuation_list[0] = continuation
|
||||
|
||||
def _extract_notification_renderer(self, notification):
|
||||
video_id = traverse_obj(
|
||||
notification, ('navigationEndpoint', 'watchEndpoint', 'videoId'), expected_type=str)
|
||||
url = f'https://www.youtube.com/watch?v={video_id}'
|
||||
channel_id = None
|
||||
if not video_id:
|
||||
browse_ep = traverse_obj(
|
||||
notification, ('navigationEndpoint', 'browseEndpoint'), expected_type=dict)
|
||||
channel_id = self.ucid_or_none(traverse_obj(browse_ep, 'browseId', expected_type=str))
|
||||
post_id = self._search_regex(
|
||||
r'/post/(.+)', traverse_obj(browse_ep, 'canonicalBaseUrl', expected_type=str),
|
||||
'post id', default=None)
|
||||
if not channel_id or not post_id:
|
||||
return
|
||||
# The direct /post url redirects to this in the browser
|
||||
url = f'https://www.youtube.com/channel/{channel_id}/community?lb={post_id}'
|
||||
|
||||
channel = traverse_obj(
|
||||
notification, ('contextualMenu', 'menuRenderer', 'items', 1, 'menuServiceItemRenderer', 'text', 'runs', 1, 'text'),
|
||||
expected_type=str)
|
||||
notification_title = self._get_text(notification, 'shortMessage')
|
||||
if notification_title:
|
||||
notification_title = notification_title.replace('\xad', '') # remove soft hyphens
|
||||
# TODO: handle recommended videos
|
||||
title = self._search_regex(
|
||||
rf'{re.escape(channel or "")}[^:]+: (.+)', notification_title,
|
||||
'video title', default=None)
|
||||
timestamp = (self._parse_time_text(self._get_text(notification, 'sentTimeText'))
|
||||
if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE)
|
||||
else None)
|
||||
return {
|
||||
'_type': 'url',
|
||||
'url': url,
|
||||
'ie_key': (YoutubeIE if video_id else YoutubeTabIE).ie_key(),
|
||||
'video_id': video_id,
|
||||
'title': title,
|
||||
'channel_id': channel_id,
|
||||
'channel': channel,
|
||||
'uploader': channel,
|
||||
'thumbnails': self._extract_thumbnails(notification, 'videoThumbnail'),
|
||||
'timestamp': timestamp,
|
||||
}
|
||||
|
||||
def _notification_menu_entries(self, ytcfg):
|
||||
continuation_list = [None]
|
||||
response = None
|
||||
for page in itertools.count(1):
|
||||
ctoken = traverse_obj(
|
||||
continuation_list, (0, 'continuationEndpoint', 'getNotificationMenuEndpoint', 'ctoken'), expected_type=str)
|
||||
response = self._extract_response(
|
||||
item_id=f'page {page}', query={'ctoken': ctoken} if ctoken else {}, ytcfg=ytcfg,
|
||||
ep='notification/get_notification_menu', check_get_keys='actions',
|
||||
headers=self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response)))
|
||||
yield from self._extract_notification_menu(response, continuation_list)
|
||||
if not continuation_list[0]:
|
||||
break
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = 'notifications'
|
||||
ytcfg = self._download_ytcfg('web', display_id) if not self.skip_webpage else {}
|
||||
self._report_playlist_authcheck(ytcfg)
|
||||
return self.playlist_result(self._notification_menu_entries(ytcfg), display_id, display_id)
|
247
yt_dlp/extractor/youtube/_redirect.py
Normal file
247
yt_dlp/extractor/youtube/_redirect.py
Normal file
@ -0,0 +1,247 @@
|
||||
import base64
|
||||
import urllib.parse
|
||||
|
||||
from ._base import YoutubeBaseInfoExtractor
|
||||
from ._tab import YoutubeTabIE
|
||||
from ...utils import ExtractorError, classproperty, parse_qs, update_url_query, url_or_none
|
||||
|
||||
|
||||
class YoutubeYtBeIE(YoutubeBaseInfoExtractor):
|
||||
IE_DESC = 'youtu.be'
|
||||
_VALID_URL = rf'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{{11}})/*?.*?\blist=(?P<playlist_id>{YoutubeBaseInfoExtractor._PLAYLIST_ID_RE})'
|
||||
_TESTS = [{
|
||||
'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
|
||||
'info_dict': {
|
||||
'id': 'yeWKywCrFtk',
|
||||
'ext': 'mp4',
|
||||
'title': 'Small Scale Baler and Braiding Rugs',
|
||||
'uploader': 'Backus-Page House Museum',
|
||||
'uploader_id': '@backuspagemuseum',
|
||||
'uploader_url': r're:https?://(?:www\.)?youtube\.com/@backuspagemuseum',
|
||||
'upload_date': '20161008',
|
||||
'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
|
||||
'categories': ['Nonprofits & Activism'],
|
||||
'tags': list,
|
||||
'like_count': int,
|
||||
'age_limit': 0,
|
||||
'playable_in_embed': True,
|
||||
'thumbnail': r're:^https?://.*\.webp',
|
||||
'channel': 'Backus-Page House Museum',
|
||||
'channel_id': 'UCEfMCQ9bs3tjvjy1s451zaw',
|
||||
'live_status': 'not_live',
|
||||
'view_count': int,
|
||||
'channel_url': 'https://www.youtube.com/channel/UCEfMCQ9bs3tjvjy1s451zaw',
|
||||
'availability': 'public',
|
||||
'duration': 59,
|
||||
'comment_count': int,
|
||||
'channel_follower_count': int,
|
||||
},
|
||||
'params': {
|
||||
'noplaylist': True,
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = self._match_valid_url(url)
|
||||
video_id = mobj.group('id')
|
||||
playlist_id = mobj.group('playlist_id')
|
||||
return self.url_result(
|
||||
update_url_query('https://www.youtube.com/watch', {
|
||||
'v': video_id,
|
||||
'list': playlist_id,
|
||||
'feature': 'youtu.be',
|
||||
}), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
|
||||
|
||||
|
||||
class YoutubeLivestreamEmbedIE(YoutubeBaseInfoExtractor):
|
||||
IE_DESC = 'YouTube livestream embeds'
|
||||
_VALID_URL = r'https?://(?:\w+\.)?youtube\.com/embed/live_stream/?\?(?:[^#]+&)?channel=(?P<id>[^&#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.youtube.com/embed/live_stream?channel=UC2_KI6RB__jGdlnK6dvFEZA',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_id = self._match_id(url)
|
||||
return self.url_result(
|
||||
f'https://www.youtube.com/channel/{channel_id}/live',
|
||||
ie=YoutubeTabIE.ie_key(), video_id=channel_id)
|
||||
|
||||
|
||||
class YoutubeYtUserIE(YoutubeBaseInfoExtractor):
|
||||
IE_DESC = 'YouTube user videos; "ytuser:" prefix'
|
||||
IE_NAME = 'youtube:user'
|
||||
_VALID_URL = r'ytuser:(?P<id>.+)'
|
||||
_TESTS = [{
|
||||
'url': 'ytuser:phihag',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
user_id = self._match_id(url)
|
||||
return self.url_result(f'https://www.youtube.com/user/{user_id}', YoutubeTabIE, user_id)
|
||||
|
||||
|
||||
class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
|
||||
IE_NAME = 'youtube:favorites'
|
||||
IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'
|
||||
_VALID_URL = r':ytfav(?:ou?rite)?s?'
|
||||
_LOGIN_REQUIRED = True
|
||||
_TESTS = [{
|
||||
'url': ':ytfav',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': ':ytfavorites',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
return self.url_result(
|
||||
'https://www.youtube.com/playlist?list=LL',
|
||||
ie=YoutubeTabIE.ie_key())
|
||||
|
||||
|
||||
class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
|
||||
"""
|
||||
Base class for feed extractors
|
||||
Subclasses must re-define the _FEED_NAME property.
|
||||
"""
|
||||
_LOGIN_REQUIRED = True
|
||||
_FEED_NAME = 'feeds'
|
||||
|
||||
@classproperty
|
||||
def IE_NAME(cls):
|
||||
return f'youtube:{cls._FEED_NAME}'
|
||||
|
||||
def _real_extract(self, url):
|
||||
return self.url_result(
|
||||
f'https://www.youtube.com/feed/{self._FEED_NAME}', ie=YoutubeTabIE.ie_key())
|
||||
|
||||
|
||||
class YoutubeWatchLaterIE(YoutubeBaseInfoExtractor):
|
||||
IE_NAME = 'youtube:watchlater'
|
||||
IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'
|
||||
_VALID_URL = r':ytwatchlater'
|
||||
_TESTS = [{
|
||||
'url': ':ytwatchlater',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
return self.url_result(
|
||||
'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
|
||||
|
||||
|
||||
class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
|
||||
IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'
|
||||
_VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
|
||||
_FEED_NAME = 'recommended'
|
||||
_LOGIN_REQUIRED = False
|
||||
_TESTS = [{
|
||||
'url': ':ytrec',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': ':ytrecommended',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://youtube.com',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
|
||||
class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
|
||||
IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'
|
||||
_VALID_URL = r':ytsub(?:scription)?s?'
|
||||
_FEED_NAME = 'subscriptions'
|
||||
_TESTS = [{
|
||||
'url': ':ytsubs',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': ':ytsubscriptions',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
|
||||
class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
|
||||
IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'
|
||||
_VALID_URL = r':ythis(?:tory)?'
|
||||
_FEED_NAME = 'history'
|
||||
_TESTS = [{
|
||||
'url': ':ythistory',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
|
||||
class YoutubeShortsAudioPivotIE(YoutubeBaseInfoExtractor):
|
||||
IE_DESC = 'YouTube Shorts audio pivot (Shorts using audio of a given video)'
|
||||
IE_NAME = 'youtube:shorts:pivot:audio'
|
||||
_VALID_URL = r'https?://(?:www\.)?youtube\.com/source/(?P<id>[\w-]{11})/shorts'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.youtube.com/source/Lyj-MZSAA9o/shorts',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _generate_audio_pivot_params(video_id):
|
||||
"""
|
||||
Generates sfv_audio_pivot browse params for this video id
|
||||
"""
|
||||
pb_params = b'\xf2\x05+\n)\x12\'\n\x0b%b\x12\x0b%b\x1a\x0b%b' % ((video_id.encode(),) * 3)
|
||||
return urllib.parse.quote(base64.b64encode(pb_params).decode())
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
return self.url_result(
|
||||
f'https://www.youtube.com/feed/sfv_audio_pivot?bp={self._generate_audio_pivot_params(video_id)}',
|
||||
ie=YoutubeTabIE)
|
||||
|
||||
|
||||
class YoutubeConsentRedirectIE(YoutubeBaseInfoExtractor):
|
||||
IE_NAME = 'youtube:consent'
|
||||
IE_DESC = False # Do not list
|
||||
_VALID_URL = r'https?://consent\.youtube\.com/m\?'
|
||||
_TESTS = [{
|
||||
'url': 'https://consent.youtube.com/m?continue=https%3A%2F%2Fwww.youtube.com%2Flive%2FqVv6vCqciTM%3Fcbrd%3D1&gl=NL&m=0&pc=yt&hl=en&src=1',
|
||||
'info_dict': {
|
||||
'id': 'qVv6vCqciTM',
|
||||
'ext': 'mp4',
|
||||
'age_limit': 0,
|
||||
'uploader_id': '@sana_natori',
|
||||
'comment_count': int,
|
||||
'chapters': 'count:13',
|
||||
'upload_date': '20221223',
|
||||
'thumbnail': 'https://i.ytimg.com/vi/qVv6vCqciTM/maxresdefault.jpg',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCIdEIHpS0TdkqRkHL5OkLtA',
|
||||
'uploader_url': 'https://www.youtube.com/@sana_natori',
|
||||
'like_count': int,
|
||||
'release_date': '20221223',
|
||||
'tags': ['Vtuber', '月ノ美兎', '名取さな', 'にじさんじ', 'クリスマス', '3D配信'],
|
||||
'title': '【 #インターネット女クリスマス 】3Dで歌ってはしゃぐインターネットの女たち【月ノ美兎/名取さな】',
|
||||
'view_count': int,
|
||||
'playable_in_embed': True,
|
||||
'duration': 4438,
|
||||
'availability': 'public',
|
||||
'channel_follower_count': int,
|
||||
'channel_id': 'UCIdEIHpS0TdkqRkHL5OkLtA',
|
||||
'categories': ['Entertainment'],
|
||||
'live_status': 'was_live',
|
||||
'release_timestamp': 1671793345,
|
||||
'channel': 'さなちゃんねる',
|
||||
'description': 'md5:6aebf95cc4a1d731aebc01ad6cc9806d',
|
||||
'uploader': 'さなちゃんねる',
|
||||
'channel_is_verified': True,
|
||||
'heatmap': 'count:100',
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
'params': {'skip_download': 'Youtube'},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
redirect_url = url_or_none(parse_qs(url).get('continue', [None])[-1])
|
||||
if not redirect_url:
|
||||
raise ExtractorError('Invalid cookie consent redirect URL', expected=True)
|
||||
return self.url_result(redirect_url)
|
167
yt_dlp/extractor/youtube/_search.py
Normal file
167
yt_dlp/extractor/youtube/_search.py
Normal file
@ -0,0 +1,167 @@
|
||||
import urllib.parse
|
||||
|
||||
from ._tab import YoutubeTabBaseInfoExtractor
|
||||
from ..common import SearchInfoExtractor
|
||||
from ...utils import join_nonempty, parse_qs
|
||||
|
||||
|
||||
class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
|
||||
IE_DESC = 'YouTube search'
|
||||
IE_NAME = 'youtube:search'
|
||||
_SEARCH_KEY = 'ytsearch'
|
||||
_SEARCH_PARAMS = 'EgIQAfABAQ==' # Videos only
|
||||
_TESTS = [{
|
||||
'url': 'ytsearch5:youtube-dl test video',
|
||||
'playlist_count': 5,
|
||||
'info_dict': {
|
||||
'id': 'youtube-dl test video',
|
||||
'title': 'youtube-dl test video',
|
||||
},
|
||||
}, {
|
||||
'note': 'Suicide/self-harm search warning',
|
||||
'url': 'ytsearch1:i hate myself and i wanna die',
|
||||
'playlist_count': 1,
|
||||
'info_dict': {
|
||||
'id': 'i hate myself and i wanna die',
|
||||
'title': 'i hate myself and i wanna die',
|
||||
},
|
||||
}]
|
||||
|
||||
|
||||
class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
|
||||
IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
|
||||
_SEARCH_KEY = 'ytsearchdate'
|
||||
IE_DESC = 'YouTube search, newest videos first'
|
||||
_SEARCH_PARAMS = 'CAISAhAB8AEB' # Videos only, sorted by date
|
||||
_TESTS = [{
|
||||
'url': 'ytsearchdate5:youtube-dl test video',
|
||||
'playlist_count': 5,
|
||||
'info_dict': {
|
||||
'id': 'youtube-dl test video',
|
||||
'title': 'youtube-dl test video',
|
||||
},
|
||||
}]
|
||||
|
||||
|
||||
class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):
|
||||
IE_DESC = 'YouTube search URLs with sorting and filter support'
|
||||
IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
|
||||
_VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:results|search)\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
|
||||
'playlist_mincount': 5,
|
||||
'info_dict': {
|
||||
'id': 'youtube-dl test video',
|
||||
'title': 'youtube-dl test video',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',
|
||||
'playlist_mincount': 5,
|
||||
'info_dict': {
|
||||
'id': 'python',
|
||||
'title': 'python',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.youtube.com/results?search_query=%23cats',
|
||||
'playlist_mincount': 1,
|
||||
'info_dict': {
|
||||
'id': '#cats',
|
||||
'title': '#cats',
|
||||
# The test suite does not have support for nested playlists
|
||||
# 'entries': [{
|
||||
# 'url': r're:https://(www\.)?youtube\.com/hashtag/cats',
|
||||
# 'title': '#cats',
|
||||
# }],
|
||||
},
|
||||
}, {
|
||||
# Channel results
|
||||
'url': 'https://www.youtube.com/results?search_query=kurzgesagt&sp=EgIQAg%253D%253D',
|
||||
'info_dict': {
|
||||
'id': 'kurzgesagt',
|
||||
'title': 'kurzgesagt',
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'_type': 'url',
|
||||
'id': 'UCsXVk37bltHxD1rDPwtNM8Q',
|
||||
'url': 'https://www.youtube.com/channel/UCsXVk37bltHxD1rDPwtNM8Q',
|
||||
'ie_key': 'YoutubeTab',
|
||||
'channel': 'Kurzgesagt – In a Nutshell',
|
||||
'description': 'md5:4ae48dfa9505ffc307dad26342d06bfc',
|
||||
'title': 'Kurzgesagt – In a Nutshell',
|
||||
'channel_id': 'UCsXVk37bltHxD1rDPwtNM8Q',
|
||||
# No longer available for search as it is set to the handle.
|
||||
# 'playlist_count': int,
|
||||
'channel_url': 'https://www.youtube.com/channel/UCsXVk37bltHxD1rDPwtNM8Q',
|
||||
'thumbnails': list,
|
||||
'uploader_id': '@kurzgesagt',
|
||||
'uploader_url': 'https://www.youtube.com/@kurzgesagt',
|
||||
'uploader': 'Kurzgesagt – In a Nutshell',
|
||||
'channel_is_verified': True,
|
||||
'channel_follower_count': int,
|
||||
},
|
||||
}],
|
||||
'params': {'extract_flat': True, 'playlist_items': '1'},
|
||||
'playlist_mincount': 1,
|
||||
}, {
|
||||
'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
qs = parse_qs(url)
|
||||
query = (qs.get('search_query') or qs.get('q'))[0]
|
||||
return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)
|
||||
|
||||
|
||||
class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor):
|
||||
IE_DESC = 'YouTube music search URLs with selectable sections, e.g. #songs'
|
||||
IE_NAME = 'youtube:music:search_url'
|
||||
_VALID_URL = r'https?://music\.youtube\.com/search\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'https://music.youtube.com/search?q=royalty+free+music',
|
||||
'playlist_count': 16,
|
||||
'info_dict': {
|
||||
'id': 'royalty free music',
|
||||
'title': 'royalty free music',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://music.youtube.com/search?q=royalty+free+music&sp=EgWKAQIIAWoKEAoQAxAEEAkQBQ%3D%3D',
|
||||
'playlist_mincount': 30,
|
||||
'info_dict': {
|
||||
'id': 'royalty free music - songs',
|
||||
'title': 'royalty free music - songs',
|
||||
},
|
||||
'params': {'extract_flat': 'in_playlist'},
|
||||
}, {
|
||||
'url': 'https://music.youtube.com/search?q=royalty+free+music#community+playlists',
|
||||
'playlist_mincount': 30,
|
||||
'info_dict': {
|
||||
'id': 'royalty free music - community playlists',
|
||||
'title': 'royalty free music - community playlists',
|
||||
},
|
||||
'params': {'extract_flat': 'in_playlist'},
|
||||
}]
|
||||
|
||||
_SECTIONS = {
|
||||
'albums': 'EgWKAQIYAWoKEAoQAxAEEAkQBQ==',
|
||||
'artists': 'EgWKAQIgAWoKEAoQAxAEEAkQBQ==',
|
||||
'community playlists': 'EgeKAQQoAEABagoQChADEAQQCRAF',
|
||||
'featured playlists': 'EgeKAQQoADgBagwQAxAJEAQQDhAKEAU==',
|
||||
'songs': 'EgWKAQIIAWoKEAoQAxAEEAkQBQ==',
|
||||
'videos': 'EgWKAQIQAWoKEAoQAxAEEAkQBQ==',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
qs = parse_qs(url)
|
||||
query = (qs.get('search_query') or qs.get('q'))[0]
|
||||
params = qs.get('sp', (None,))[0]
|
||||
if params:
|
||||
section = next((k for k, v in self._SECTIONS.items() if v == params), params)
|
||||
else:
|
||||
section = urllib.parse.unquote_plus(([*url.split('#'), ''])[1]).lower()
|
||||
params = self._SECTIONS.get(section)
|
||||
if not params:
|
||||
section = None
|
||||
title = join_nonempty(query, section, delim=' - ')
|
||||
return self.playlist_result(self._search_results(query, params, default_client='web_music'), title, title)
|
2348
yt_dlp/extractor/youtube/_tab.py
Normal file
2348
yt_dlp/extractor/youtube/_tab.py
Normal file
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user