mirror of
https://github.com/ytdl-org/youtube-dl.git
synced 2025-08-01 18:14:09 -05:00
Compare commits
37 Commits
2013.12.04
...
2013.12.08
Author | SHA1 | Date | |
---|---|---|---|
![]() |
d4df5ed14c | ||
![]() |
303b479e0a | ||
![]() |
4c52160646 | ||
![]() |
a213880aaf | ||
![]() |
42d3bf844a | ||
![]() |
b860967ce4 | ||
![]() |
8ca6b8fba1 | ||
![]() |
c4d9e6731a | ||
![]() |
0d9ec5d963 | ||
![]() |
870fc4e578 | ||
![]() |
f623530d6e | ||
![]() |
ca9e02dc00 | ||
![]() |
fb30ec22fd | ||
![]() |
5cc14c2fd7 | ||
![]() |
d349cd2240 | ||
![]() |
0b6a9f639f | ||
![]() |
715c8e7bdb | ||
![]() |
7d4afc557f | ||
![]() |
563e405411 | ||
![]() |
f53c966a73 | ||
![]() |
336c3a69bd | ||
![]() |
4e76179476 | ||
![]() |
ef4fd84857 | ||
![]() |
72135030d1 | ||
![]() |
3514813d5b | ||
![]() |
9e60602084 | ||
![]() |
19e3dfc9f8 | ||
![]() |
a1ef7e85d6 | ||
![]() |
ef2fac6f4a | ||
![]() |
7fc3fa0545 | ||
![]() |
673d1273ff | ||
![]() |
b9a2c53833 | ||
![]() |
e9bf7479d2 | ||
![]() |
bfb9f7bc4c | ||
![]() |
6a656a843a | ||
![]() |
29030c0a4c | ||
![]() |
c0ade33e16 |
@@ -16,6 +16,8 @@ from youtube_dl.extractor import (
|
||||
DailymotionUserIE,
|
||||
VimeoChannelIE,
|
||||
VimeoUserIE,
|
||||
VimeoAlbumIE,
|
||||
VimeoGroupsIE,
|
||||
UstreamChannelIE,
|
||||
SoundcloudSetIE,
|
||||
SoundcloudUserIE,
|
||||
@@ -65,6 +67,22 @@ class TestPlaylists(unittest.TestCase):
|
||||
self.assertEqual(result['title'], u'Nki')
|
||||
self.assertTrue(len(result['entries']) > 65)
|
||||
|
||||
def test_vimeo_album(self):
|
||||
dl = FakeYDL()
|
||||
ie = VimeoAlbumIE(dl)
|
||||
result = ie.extract('http://vimeo.com/album/2632481')
|
||||
self.assertIsPlaylist(result)
|
||||
self.assertEqual(result['title'], u'Staff Favorites: November 2013')
|
||||
self.assertTrue(len(result['entries']) > 12)
|
||||
|
||||
def test_vimeo_groups(self):
|
||||
dl = FakeYDL()
|
||||
ie = VimeoGroupsIE(dl)
|
||||
result = ie.extract('http://vimeo.com/groups/rolexawards')
|
||||
self.assertIsPlaylist(result)
|
||||
self.assertEqual(result['title'], u'Rolex Awards for Enterprise')
|
||||
self.assertTrue(len(result['entries']) > 72)
|
||||
|
||||
def test_ustream_channel(self):
|
||||
dl = FakeYDL()
|
||||
ie = UstreamChannelIE(dl)
|
||||
|
@@ -26,6 +26,7 @@ from youtube_dl.utils import (
|
||||
unsmuggle_url,
|
||||
shell_quote,
|
||||
encodeFilename,
|
||||
str_to_int,
|
||||
)
|
||||
|
||||
if sys.version_info < (3, 0):
|
||||
@@ -176,6 +177,10 @@ class TestUtil(unittest.TestCase):
|
||||
args = ['ffmpeg', '-i', encodeFilename(u'ñ€ß\'.mp4')]
|
||||
self.assertEqual(shell_quote(args), u"""ffmpeg -i 'ñ€ß'"'"'.mp4'""")
|
||||
|
||||
def test_str_to_int(self):
|
||||
self.assertEqual(str_to_int('123,456'), 123456)
|
||||
self.assertEqual(str_to_int('123.456'), 123456)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -33,6 +33,7 @@ TEST_ID = 'BaW_jenozKc'
|
||||
INFO_JSON_FILE = TEST_ID + '.info.json'
|
||||
DESCRIPTION_FILE = TEST_ID + '.mp4.description'
|
||||
EXPECTED_DESCRIPTION = u'''test chars: "'/\ä↭𝕐
|
||||
test URL: https://github.com/rg3/youtube-dl/issues/1892
|
||||
|
||||
This is a test video for youtube-dl.
|
||||
|
||||
|
@@ -204,11 +204,27 @@ class FileDownloader(object):
|
||||
"""Report destination filename."""
|
||||
self.to_screen(u'[download] Destination: ' + filename)
|
||||
|
||||
def _report_progress_status(self, msg, is_last_line=False):
|
||||
fullmsg = u'[download] ' + msg
|
||||
if self.params.get('progress_with_newline', False):
|
||||
self.to_screen(fullmsg)
|
||||
else:
|
||||
if os.name == 'nt':
|
||||
prev_len = getattr(self, '_report_progress_prev_line_length',
|
||||
0)
|
||||
if prev_len > len(fullmsg):
|
||||
fullmsg += u' ' * (prev_len - len(fullmsg))
|
||||
self._report_progress_prev_line_length = len(fullmsg)
|
||||
clear_line = u'\r'
|
||||
else:
|
||||
clear_line = (u'\r\x1b[K' if sys.stderr.isatty() else u'\r')
|
||||
self.to_screen(clear_line + fullmsg, skip_eol=not is_last_line)
|
||||
self.to_console_title(u'youtube-dl ' + msg)
|
||||
|
||||
def report_progress(self, percent, data_len_str, speed, eta):
|
||||
"""Report download progress."""
|
||||
if self.params.get('noprogress', False):
|
||||
return
|
||||
clear_line = (u'\x1b[K' if sys.stderr.isatty() and os.name != 'nt' else u'')
|
||||
if eta is not None:
|
||||
eta_str = self.format_eta(eta)
|
||||
else:
|
||||
@@ -218,14 +234,20 @@ class FileDownloader(object):
|
||||
else:
|
||||
percent_str = 'Unknown %'
|
||||
speed_str = self.format_speed(speed)
|
||||
if self.params.get('progress_with_newline', False):
|
||||
self.to_screen(u'[download] %s of %s at %s ETA %s' %
|
||||
(percent_str, data_len_str, speed_str, eta_str))
|
||||
|
||||
msg = (u'%s of %s at %s ETA %s' %
|
||||
(percent_str, data_len_str, speed_str, eta_str))
|
||||
self._report_progress_status(msg)
|
||||
|
||||
def report_finish(self, data_len_str, tot_time):
|
||||
"""Report download finished."""
|
||||
if self.params.get('noprogress', False):
|
||||
self.to_screen(u'[download] Download completed')
|
||||
else:
|
||||
self.to_screen(u'\r%s[download] %s of %s at %s ETA %s' %
|
||||
(clear_line, percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
|
||||
self.to_console_title(u'youtube-dl - %s of %s at %s ETA %s' %
|
||||
(percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
|
||||
self._report_progress_status(
|
||||
(u'100%% of %s in %s' %
|
||||
(data_len_str, self.format_seconds(tot_time))),
|
||||
is_last_line=True)
|
||||
|
||||
def report_resuming_byte(self, resume_len):
|
||||
"""Report attempt to resume at given byte."""
|
||||
@@ -246,15 +268,6 @@ class FileDownloader(object):
|
||||
"""Report it was impossible to resume download."""
|
||||
self.to_screen(u'[download] Unable to resume')
|
||||
|
||||
def report_finish(self, data_len_str, tot_time):
|
||||
"""Report download finished."""
|
||||
if self.params.get('noprogress', False):
|
||||
self.to_screen(u'[download] Download completed')
|
||||
else:
|
||||
clear_line = (u'\x1b[K' if sys.stderr.isatty() and os.name != 'nt' else u'')
|
||||
self.to_screen(u'\r%s[download] 100%% of %s in %s' %
|
||||
(clear_line, data_len_str, self.format_seconds(tot_time)))
|
||||
|
||||
def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url, live):
|
||||
def run_rtmpdump(args):
|
||||
start = time.time()
|
||||
|
@@ -405,7 +405,8 @@ class YoutubeDL(object):
|
||||
for key, value in extra_info.items():
|
||||
info_dict.setdefault(key, value)
|
||||
|
||||
def extract_info(self, url, download=True, ie_key=None, extra_info={}):
|
||||
def extract_info(self, url, download=True, ie_key=None, extra_info={},
|
||||
process=True):
|
||||
'''
|
||||
Returns a list with a dictionary for each video we find.
|
||||
If 'download', also downloads the videos.
|
||||
@@ -441,7 +442,10 @@ class YoutubeDL(object):
|
||||
'webpage_url': url,
|
||||
'extractor_key': ie.ie_key(),
|
||||
})
|
||||
return self.process_ie_result(ie_result, download, extra_info)
|
||||
if process:
|
||||
return self.process_ie_result(ie_result, download, extra_info)
|
||||
else:
|
||||
return ie_result
|
||||
except ExtractorError as de: # An error we somewhat expected
|
||||
self.report_error(compat_str(de), de.format_traceback())
|
||||
break
|
||||
@@ -474,8 +478,33 @@ class YoutubeDL(object):
|
||||
download,
|
||||
ie_key=ie_result.get('ie_key'),
|
||||
extra_info=extra_info)
|
||||
elif result_type == 'playlist':
|
||||
elif result_type == 'url_transparent':
|
||||
# Use the information from the embedding page
|
||||
info = self.extract_info(
|
||||
ie_result['url'], ie_key=ie_result.get('ie_key'),
|
||||
extra_info=extra_info, download=False, process=False)
|
||||
|
||||
def make_result(embedded_info):
|
||||
new_result = ie_result.copy()
|
||||
for f in ('_type', 'url', 'ext', 'player_url', 'formats',
|
||||
'entries', 'urlhandle', 'ie_key', 'duration',
|
||||
'subtitles', 'annotations', 'format',
|
||||
'thumbnail', 'thumbnails'):
|
||||
if f in new_result:
|
||||
del new_result[f]
|
||||
if f in embedded_info:
|
||||
new_result[f] = embedded_info[f]
|
||||
return new_result
|
||||
new_result = make_result(info)
|
||||
|
||||
assert new_result.get('_type') != 'url_transparent'
|
||||
if new_result.get('_type') == 'compat_list':
|
||||
new_result['entries'] = [
|
||||
make_result(e) for e in new_result['entries']]
|
||||
|
||||
return self.process_ie_result(
|
||||
new_result, download=download, extra_info=extra_info)
|
||||
elif result_type == 'playlist':
|
||||
# We process each entry in the playlist
|
||||
playlist = ie_result.get('title', None) or ie_result.get('id', None)
|
||||
self.to_screen(u'[download] Downloading playlist: %s' % playlist)
|
||||
|
@@ -56,7 +56,7 @@ from .flickr import FlickrIE
|
||||
from .francetv import (
|
||||
PluzzIE,
|
||||
FranceTvInfoIE,
|
||||
France2IE,
|
||||
FranceTVIE,
|
||||
GenerationQuoiIE
|
||||
)
|
||||
from .freesound import FreesoundIE
|
||||
@@ -102,6 +102,7 @@ from .nbc import NBCNewsIE
|
||||
from .newgrounds import NewgroundsIE
|
||||
from .nhl import NHLIE, NHLVideocenterIE
|
||||
from .niconico import NiconicoIE
|
||||
from .ninegag import NineGagIE
|
||||
from .nowvideo import NowVideoIE
|
||||
from .ooyala import OoyalaIE
|
||||
from .orf import ORFIE
|
||||
@@ -110,6 +111,7 @@ from .photobucket import PhotobucketIE
|
||||
from .podomatic import PodomaticIE
|
||||
from .pornhub import PornHubIE
|
||||
from .pornotube import PornotubeIE
|
||||
from .pyvideo import PyvideoIE
|
||||
from .rbmaradio import RBMARadioIE
|
||||
from .redtube import RedTubeIE
|
||||
from .ringtv import RingTVIE
|
||||
@@ -144,6 +146,7 @@ from .teamcoco import TeamcocoIE
|
||||
from .techtalks import TechTalksIE
|
||||
from .ted import TEDIE
|
||||
from .tf1 import TF1IE
|
||||
from .theplatform import ThePlatformIE
|
||||
from .thisav import ThisAVIE
|
||||
from .toutv import TouTvIE
|
||||
from .traileraddict import TrailerAddictIE
|
||||
@@ -168,6 +171,8 @@ from .vimeo import (
|
||||
VimeoIE,
|
||||
VimeoChannelIE,
|
||||
VimeoUserIE,
|
||||
VimeoAlbumIE,
|
||||
VimeoGroupsIE,
|
||||
)
|
||||
from .vine import VineIE
|
||||
from .viki import VikiIE
|
||||
@@ -176,6 +181,7 @@ from .wat import WatIE
|
||||
from .websurg import WeBSurgIE
|
||||
from .weibo import WeiboIE
|
||||
from .wimp import WimpIE
|
||||
from .wistia import WistiaIE
|
||||
from .worldstarhiphop import WorldStarHipHopIE
|
||||
from .xhamster import XHamsterIE
|
||||
from .xnxx import XNXXIE
|
||||
|
@@ -13,7 +13,7 @@ from ..utils import (
|
||||
|
||||
class AddAnimeIE(InfoExtractor):
|
||||
|
||||
_VALID_URL = r'^http://(?:\w+\.)?add-anime\.net/watch_video.php\?(?:.*?)v=(?P<video_id>[\w_]+)(?:.*)'
|
||||
_VALID_URL = r'^http://(?:\w+\.)?add-anime\.net/watch_video\.php\?(?:.*?)v=(?P<video_id>[\w_]+)(?:.*)'
|
||||
IE_NAME = u'AddAnime'
|
||||
_TEST = {
|
||||
u'url': u'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9',
|
||||
|
@@ -10,7 +10,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class AppleTrailersIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?trailers.apple.com/trailers/(?P<company>[^/]+)/(?P<movie>[^/]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/trailers/(?P<company>[^/]+)/(?P<movie>[^/]+)'
|
||||
_TEST = {
|
||||
u"url": u"http://trailers.apple.com/trailers/wb/manofsteel/",
|
||||
u"playlist": [
|
||||
|
@@ -11,7 +11,7 @@ from ..utils import (
|
||||
class ArchiveOrgIE(InfoExtractor):
|
||||
IE_NAME = 'archive.org'
|
||||
IE_DESC = 'archive.org videos'
|
||||
_VALID_URL = r'(?:https?://)?(?:www\.)?archive.org/details/(?P<id>[^?/]+)(?:[?].*)?$'
|
||||
_VALID_URL = r'(?:https?://)?(?:www\.)?archive\.org/details/(?P<id>[^?/]+)(?:[?].*)?$'
|
||||
_TEST = {
|
||||
u"url": u"http://archive.org/details/XD300-23_68HighlightsAResearchCntAugHumanIntellect",
|
||||
u'file': u'XD300-23_68HighlightsAResearchCntAugHumanIntellect.ogv',
|
||||
|
@@ -17,8 +17,8 @@ from ..utils import (
|
||||
# add tests.
|
||||
|
||||
class ArteTvIE(InfoExtractor):
|
||||
_VIDEOS_URL = r'(?:http://)?videos.arte.tv/(?P<lang>fr|de)/.*-(?P<id>.*?).html'
|
||||
_LIVEWEB_URL = r'(?:http://)?liveweb.arte.tv/(?P<lang>fr|de)/(?P<subpage>.+?)/(?P<name>.+)'
|
||||
_VIDEOS_URL = r'(?:http://)?videos\.arte\.tv/(?P<lang>fr|de)/.*-(?P<id>.*?)\.html'
|
||||
_LIVEWEB_URL = r'(?:http://)?liveweb\.arte\.tv/(?P<lang>fr|de)/(?P<subpage>.+?)/(?P<name>.+)'
|
||||
_LIVE_URL = r'index-[0-9]+\.html$'
|
||||
|
||||
IE_NAME = u'arte.tv'
|
||||
|
@@ -16,7 +16,7 @@ class AUEngineIE(InfoExtractor):
|
||||
u"title": u"[Commie]The Legend of the Legendary Heroes - 03 - Replication Eye (Alpha Stigma)[F9410F5A]"
|
||||
}
|
||||
}
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?auengine\.com/embed.php\?.*?file=([^&]+).*?'
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?auengine\.com/embed\.php\?.*?file=([^&]+).*?'
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
@@ -54,7 +54,7 @@ class BambuserIE(InfoExtractor):
|
||||
|
||||
class BambuserChannelIE(InfoExtractor):
|
||||
IE_NAME = u'bambuser:channel'
|
||||
_VALID_URL = r'http://bambuser.com/channel/(?P<user>.*?)(?:/|#|\?|$)'
|
||||
_VALID_URL = r'https?://bambuser\.com/channel/(?P<user>.*?)(?:/|#|\?|$)'
|
||||
# The maximum number we can get with each request
|
||||
_STEP = 50
|
||||
|
||||
|
@@ -4,7 +4,7 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class BloombergIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.bloomberg\.com/video/(?P<name>.+?).html'
|
||||
_VALID_URL = r'https?://www\.bloomberg\.com/video/(?P<name>.+?)\.html'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html',
|
||||
|
@@ -12,7 +12,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class ComedyCentralIE(MTVServicesInfoExtractor):
|
||||
_VALID_URL = r'http://www.comedycentral.com/(video-clips|episodes|cc-studios)/(?P<title>.*)'
|
||||
_VALID_URL = r'https?://(?:www.)?comedycentral.com/(video-clips|episodes|cc-studios)/(?P<title>.*)'
|
||||
_FEED_URL = u'http://comedycentral.com/feeds/mrss/'
|
||||
|
||||
_TEST = {
|
||||
|
@@ -55,6 +55,9 @@ class InfoExtractor(object):
|
||||
subtitles: The subtitle file contents as a dictionary in the format
|
||||
{language: subtitles}.
|
||||
view_count: How many users have watched the video on the platform.
|
||||
like_count: Number of positive ratings of the video
|
||||
dislike_count: Number of negative ratings of the video
|
||||
comment_count: Number of comments on the video
|
||||
urlhandle: [internal] The urlHandle to be used to download the file,
|
||||
like returned by urllib.request.urlopen
|
||||
age_limit: Age restriction for the video, as an integer (years)
|
||||
|
@@ -6,7 +6,7 @@ from ..utils import (
|
||||
)
|
||||
|
||||
class CSpanIE(InfoExtractor):
|
||||
_VALID_URL = r'http://www.c-spanvideo.org/program/(.*)'
|
||||
_VALID_URL = r'http://www\.c-spanvideo\.org/program/(.*)'
|
||||
_TEST = {
|
||||
u'url': u'http://www.c-spanvideo.org/program/HolderonV',
|
||||
u'file': u'315139.flv',
|
||||
|
@@ -11,6 +11,7 @@ from ..utils import (
|
||||
get_element_by_attribute,
|
||||
get_element_by_id,
|
||||
orderedSet,
|
||||
str_to_int,
|
||||
|
||||
ExtractorError,
|
||||
)
|
||||
@@ -146,6 +147,9 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
self._list_available_subtitles(video_id, webpage)
|
||||
return
|
||||
|
||||
view_count = str_to_int(self._search_regex(
|
||||
r'video_views_value[^>]+>([\d\.,]+)<', webpage, u'view count'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
@@ -155,6 +159,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
'subtitles': video_subtitles,
|
||||
'thumbnail': info['thumbnail_url'],
|
||||
'age_limit': age_limit,
|
||||
'view_count': view_count,
|
||||
}
|
||||
|
||||
def _get_available_subtitles(self, video_id, webpage):
|
||||
|
@@ -11,7 +11,7 @@ from ..utils import (
|
||||
|
||||
class DreiSatIE(InfoExtractor):
|
||||
IE_NAME = '3sat'
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?3sat.de/mediathek/index.php\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$'
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?3sat\.de/mediathek/index\.php\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$'
|
||||
_TEST = {
|
||||
u"url": u"http://www.3sat.de/mediathek/index.php?obj=36983",
|
||||
u'file': u'36983.webm',
|
||||
|
@@ -10,7 +10,7 @@ from ..utils import (
|
||||
|
||||
class EightTracksIE(InfoExtractor):
|
||||
IE_NAME = '8tracks'
|
||||
_VALID_URL = r'https?://8tracks.com/(?P<user>[^/]+)/(?P<id>[^/#]+)(?:#.*)?$'
|
||||
_VALID_URL = r'https?://8tracks\.com/(?P<user>[^/]+)/(?P<id>[^/#]+)(?:#.*)?$'
|
||||
_TEST = {
|
||||
u"name": u"EightTracks",
|
||||
u"url": u"http://8tracks.com/ytdl/youtube-dl-test-tracks-a",
|
||||
|
@@ -8,7 +8,7 @@ class ExfmIE(InfoExtractor):
|
||||
IE_NAME = u'exfm'
|
||||
IE_DESC = u'ex.fm'
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?ex\.fm/song/([^/]+)'
|
||||
_SOUNDCLOUD_URL = r'(?:http://)?(?:www\.)?api\.soundcloud.com/tracks/([^/]+)/stream'
|
||||
_SOUNDCLOUD_URL = r'(?:http://)?(?:www\.)?api\.soundcloud\.com/tracks/([^/]+)/stream'
|
||||
_TESTS = [
|
||||
{
|
||||
u'url': u'http://ex.fm/song/eh359',
|
||||
|
@@ -9,7 +9,7 @@ from ..utils import (
|
||||
|
||||
class FazIE(InfoExtractor):
|
||||
IE_NAME = u'faz.net'
|
||||
_VALID_URL = r'https?://www\.faz\.net/multimedia/videos/.*?-(?P<id>\d+).html'
|
||||
_VALID_URL = r'https?://www\.faz\.net/multimedia/videos/.*?-(?P<id>\d+)\.html'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://www.faz.net/multimedia/videos/stockholm-chemie-nobelpreis-fuer-drei-amerikanische-forscher-12610585.html',
|
||||
|
@@ -12,7 +12,7 @@ from ..utils import (
|
||||
|
||||
class FKTVIE(InfoExtractor):
|
||||
IE_NAME = u'fernsehkritik.tv'
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?fernsehkritik.tv/folge-(?P<ep>[0-9]+)(?:/.*)?'
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?fernsehkritik\.tv/folge-(?P<ep>[0-9]+)(?:/.*)?'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://fernsehkritik.tv/folge-1',
|
||||
@@ -52,7 +52,7 @@ class FKTVIE(InfoExtractor):
|
||||
|
||||
class FKTVPosteckeIE(InfoExtractor):
|
||||
IE_NAME = u'fernsehkritik.tv:postecke'
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?fernsehkritik.tv/inline-video/postecke.php\?(.*&)?ep=(?P<ep>[0-9]+)(&|$)'
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?fernsehkritik\.tv/inline-video/postecke\.php\?(.*&)?ep=(?P<ep>[0-9]+)(&|$)'
|
||||
_TEST = {
|
||||
u'url': u'http://fernsehkritik.tv/inline-video/postecke.php?iframe=true&width=625&height=440&ep=120',
|
||||
u'file': u'0120.flv',
|
||||
|
@@ -21,7 +21,7 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
|
||||
thumbnail_path = info.find('image').text
|
||||
|
||||
return {'id': video_id,
|
||||
'ext': 'mp4',
|
||||
'ext': 'flv' if video_url.startswith('rtmp') else 'mp4',
|
||||
'url': video_url,
|
||||
'title': info.find('titre').text,
|
||||
'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', thumbnail_path),
|
||||
@@ -45,7 +45,7 @@ class PluzzIE(FranceTVBaseInfoExtractor):
|
||||
|
||||
class FranceTvInfoIE(FranceTVBaseInfoExtractor):
|
||||
IE_NAME = u'francetvinfo.fr'
|
||||
_VALID_URL = r'https?://www\.francetvinfo\.fr/replay.*/(?P<title>.+).html'
|
||||
_VALID_URL = r'https?://www\.francetvinfo\.fr/replay.*/(?P<title>.+)\.html'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html',
|
||||
@@ -66,35 +66,101 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor):
|
||||
return self._extract_video(video_id)
|
||||
|
||||
|
||||
class France2IE(FranceTVBaseInfoExtractor):
|
||||
IE_NAME = u'france2.fr'
|
||||
_VALID_URL = r'''(?x)https?://www\.france2\.fr/
|
||||
class FranceTVIE(FranceTVBaseInfoExtractor):
|
||||
IE_NAME = u'francetv'
|
||||
IE_DESC = u'France 2, 3, 4, 5 and Ô'
|
||||
_VALID_URL = r'''(?x)https?://www\.france[2345o]\.fr/
|
||||
(?:
|
||||
emissions/.*?/videos/(?P<id>\d+)
|
||||
| emission/(?P<key>[^/?]+)
|
||||
emissions/.*?/(videos|emissions)/(?P<id>[^/?]+)
|
||||
| (emissions?|jt)/(?P<key>[^/?]+)
|
||||
)'''
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://www.france2.fr/emissions/13h15-le-samedi-le-dimanche/videos/75540104',
|
||||
u'file': u'75540104.mp4',
|
||||
u'info_dict': {
|
||||
u'title': u'13h15, le samedi...',
|
||||
u'description': u'md5:2e5b58ba7a2d3692b35c792be081a03d',
|
||||
_TESTS = [
|
||||
# france2
|
||||
{
|
||||
u'url': u'http://www.france2.fr/emissions/13h15-le-samedi-le-dimanche/videos/75540104',
|
||||
u'file': u'75540104.mp4',
|
||||
u'info_dict': {
|
||||
u'title': u'13h15, le samedi...',
|
||||
u'description': u'md5:2e5b58ba7a2d3692b35c792be081a03d',
|
||||
},
|
||||
u'params': {
|
||||
# m3u8 download
|
||||
u'skip_download': True,
|
||||
},
|
||||
},
|
||||
u'params': {
|
||||
u'skip_download': True,
|
||||
# france3
|
||||
{
|
||||
u'url': u'http://www.france3.fr/emissions/pieces-a-conviction/diffusions/13-11-2013_145575',
|
||||
u'info_dict': {
|
||||
u'id': u'000702326_CAPP_PicesconvictionExtrait313022013_120220131722_Au',
|
||||
u'ext': u'flv',
|
||||
u'title': u'Le scandale du prix des médicaments',
|
||||
u'description': u'md5:1384089fbee2f04fc6c9de025ee2e9ce',
|
||||
},
|
||||
u'params': {
|
||||
# rtmp download
|
||||
u'skip_download': True,
|
||||
},
|
||||
},
|
||||
}
|
||||
# france4
|
||||
{
|
||||
u'url': u'http://www.france4.fr/emissions/hero-corp/videos/rhozet_herocorp_bonus_1_20131106_1923_06112013172108_F4',
|
||||
u'info_dict': {
|
||||
u'id': u'rhozet_herocorp_bonus_1_20131106_1923_06112013172108_F4',
|
||||
u'ext': u'flv',
|
||||
u'title': u'Hero Corp Making of - Extrait 1',
|
||||
u'description': u'md5:c87d54871b1790679aec1197e73d650a',
|
||||
},
|
||||
u'params': {
|
||||
# rtmp download
|
||||
u'skip_download': True,
|
||||
},
|
||||
},
|
||||
# france5
|
||||
{
|
||||
u'url': u'http://www.france5.fr/emissions/c-a-dire/videos/92837968',
|
||||
u'info_dict': {
|
||||
u'id': u'92837968',
|
||||
u'ext': u'mp4',
|
||||
u'title': u'C à dire ?!',
|
||||
u'description': u'md5:fb1db1cbad784dcce7c7a7bd177c8e2f',
|
||||
},
|
||||
u'params': {
|
||||
# m3u8 download
|
||||
u'skip_download': True,
|
||||
},
|
||||
},
|
||||
# franceo
|
||||
{
|
||||
u'url': u'http://www.franceo.fr/jt/info-afrique/04-12-2013',
|
||||
u'info_dict': {
|
||||
u'id': u'92327925',
|
||||
u'ext': u'mp4',
|
||||
u'title': u'Infô-Afrique',
|
||||
u'description': u'md5:ebf346da789428841bee0fd2a935ea55',
|
||||
},
|
||||
u'params': {
|
||||
# m3u8 download
|
||||
u'skip_download': True,
|
||||
},
|
||||
u'skip': u'The id changes frequently',
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
if mobj.group('key'):
|
||||
webpage = self._download_webpage(url, mobj.group('key'))
|
||||
video_id = self._html_search_regex(
|
||||
r'''(?x)<div\s+class="video-player">\s*
|
||||
id_res = [
|
||||
(r'''(?x)<div\s+class="video-player">\s*
|
||||
<a\s+href="http://videos.francetv.fr/video/([0-9]+)"\s+
|
||||
class="francetv-video-player">''',
|
||||
webpage, u'video ID')
|
||||
class="francetv-video-player">'''),
|
||||
(r'<a id="player_direct" href="http://info\.francetelevisions'
|
||||
'\.fr/\?id-video=([^"/&]+)'),
|
||||
(r'<a class="video" id="ftv_player_(.+?)"'),
|
||||
]
|
||||
video_id = self._html_search_regex(id_res, webpage, u'video ID')
|
||||
else:
|
||||
video_id = mobj.group('id')
|
||||
return self._extract_video(video_id)
|
||||
|
@@ -4,7 +4,7 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class GamekingsIE(InfoExtractor):
|
||||
_VALID_URL = r'http?://www\.gamekings\.tv/videos/(?P<name>[0-9a-z\-]+)'
|
||||
_VALID_URL = r'http://www\.gamekings\.tv/videos/(?P<name>[0-9a-z\-]+)'
|
||||
_TEST = {
|
||||
u"url": u"http://www.gamekings.tv/videos/phoenix-wright-ace-attorney-dual-destinies-review/",
|
||||
u'file': u'20130811.mp4',
|
||||
|
@@ -4,8 +4,7 @@ from .mtv import MTVServicesInfoExtractor
|
||||
|
||||
|
||||
class GametrailersIE(MTVServicesInfoExtractor):
|
||||
_VALID_URL = r'http://www.gametrailers.com/(?P<type>videos|reviews|full-episodes)/(?P<id>.*?)/(?P<title>.*)'
|
||||
|
||||
_VALID_URL = r'http://www\.gametrailers\.com/(?P<type>videos|reviews|full-episodes)/(?P<id>.*?)/(?P<title>.*)'
|
||||
_TEST = {
|
||||
u'url': u'http://www.gametrailers.com/videos/zbvr8i/mirror-s-edge-2-e3-2013--debut-trailer',
|
||||
u'file': u'70e9a5d7-cf25-4a10-9104-6f3e7342ae0d.mp4',
|
||||
|
@@ -169,8 +169,13 @@ class GenericIE(InfoExtractor):
|
||||
# Site Name | Video Title
|
||||
# Video Title - Tagline | Site Name
|
||||
# and so on and so forth; it's just not practical
|
||||
video_title = self._html_search_regex(r'<title>(.*)</title>',
|
||||
webpage, u'video title', default=u'video', flags=re.DOTALL)
|
||||
video_title = self._html_search_regex(
|
||||
r'(?s)<title>(.*?)</title>', webpage, u'video title',
|
||||
default=u'video')
|
||||
|
||||
# video uploader is domain name
|
||||
video_uploader = self._search_regex(
|
||||
r'^(?:https?://)?([^/]*)/.*', url, u'video uploader')
|
||||
|
||||
# Look for BrightCove:
|
||||
bc_url = BrightcoveIE._extract_brightcove_url(webpage)
|
||||
@@ -188,7 +193,7 @@ class GenericIE(InfoExtractor):
|
||||
|
||||
# Look for embedded YouTube player
|
||||
matches = re.findall(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?youtube.com/embed/.+?)\1', webpage)
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?youtube\.com/embed/.+?)\1', webpage)
|
||||
if matches:
|
||||
urlrs = [self.url_result(unescapeHTML(tuppl[1]), 'Youtube')
|
||||
for tuppl in matches]
|
||||
@@ -197,13 +202,26 @@ class GenericIE(InfoExtractor):
|
||||
|
||||
# Look for embedded Dailymotion player
|
||||
matches = re.findall(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion.com/embed/video/.+?)\1', webpage)
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/embed/video/.+?)\1', webpage)
|
||||
if matches:
|
||||
urlrs = [self.url_result(unescapeHTML(tuppl[1]), 'Dailymotion')
|
||||
for tuppl in matches]
|
||||
return self.playlist_result(
|
||||
urlrs, playlist_id=video_id, playlist_title=video_title)
|
||||
|
||||
# Look for embedded Wistia player
|
||||
match = re.search(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
|
||||
if match:
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': unescapeHTML(match.group('url')),
|
||||
'ie_key': 'Wistia',
|
||||
'uploader': video_uploader,
|
||||
'title': video_title,
|
||||
'id': video_id,
|
||||
}
|
||||
|
||||
# Look for Bandcamp pages with custom domain
|
||||
mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
|
||||
if mobj is not None:
|
||||
@@ -247,14 +265,9 @@ class GenericIE(InfoExtractor):
|
||||
# here's a fun little line of code for you:
|
||||
video_id = os.path.splitext(video_id)[0]
|
||||
|
||||
# video uploader is domain name
|
||||
video_uploader = self._search_regex(r'(?:https?://)?([^/]*)/.*',
|
||||
url, u'video uploader')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'uploader': video_uploader,
|
||||
'upload_date': None,
|
||||
'title': video_title,
|
||||
}
|
||||
|
@@ -11,7 +11,7 @@ class HotNewHipHopIE(InfoExtractor):
|
||||
u'file': u'1435540.mp3',
|
||||
u'md5': u'2c2cd2f76ef11a9b3b581e8b232f3d96',
|
||||
u'info_dict': {
|
||||
u"title": u"Freddie Gibbs - Lay It Down"
|
||||
u"title": u'Freddie Gibbs "Lay It Down"'
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -103,7 +103,7 @@ class IGNIE(InfoExtractor):
|
||||
class OneUPIE(IGNIE):
|
||||
"""Extractor for 1up.com, it uses the ign videos system."""
|
||||
|
||||
_VALID_URL = r'https?://gamevideos.1up.com/(?P<type>video)/id/(?P<name_or_id>.+)'
|
||||
_VALID_URL = r'https?://gamevideos\.1up\.com/(?P<type>video)/id/(?P<name_or_id>.+)'
|
||||
IE_NAME = '1up.com'
|
||||
|
||||
_DESCRIPTION_RE = r'<div id="vid_summary">(.+?)</div>'
|
||||
|
@@ -21,7 +21,6 @@ class ImdbIE(InfoExtractor):
|
||||
u'ext': u'mp4',
|
||||
u'title': u'Ice Age: Continental Drift Trailer (No. 2) - IMDb',
|
||||
u'description': u'md5:9061c2219254e5d14e03c25c98e96a81',
|
||||
u'duration': 151,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -35,6 +34,7 @@ class ImdbIE(InfoExtractor):
|
||||
flags=re.MULTILINE)
|
||||
formats = []
|
||||
for f_id, f_path in available_formats:
|
||||
f_path = f_path.strip()
|
||||
format_page = self._download_webpage(
|
||||
compat_urlparse.urljoin(url, f_path),
|
||||
u'Downloading info for %s format' % f_id)
|
||||
@@ -46,7 +46,6 @@ class ImdbIE(InfoExtractor):
|
||||
formats.append({
|
||||
'format_id': f_id,
|
||||
'url': format_info['url'],
|
||||
'height': int(info['titleObject']['encoding']['selected'][:-1]),
|
||||
})
|
||||
|
||||
return {
|
||||
@@ -55,5 +54,4 @@ class ImdbIE(InfoExtractor):
|
||||
'formats': formats,
|
||||
'description': descr,
|
||||
'thumbnail': format_info['slate'],
|
||||
'duration': int(info['titleObject']['title']['duration_seconds']),
|
||||
}
|
||||
|
@@ -3,7 +3,7 @@ import re
|
||||
from .common import InfoExtractor
|
||||
|
||||
class InstagramIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:http://)?instagram.com/p/(.*?)/'
|
||||
_VALID_URL = r'(?:http://)?instagram\.com/p/(.*?)/'
|
||||
_TEST = {
|
||||
u'url': u'http://instagram.com/p/aye83DjauH/?foo=bar#abc',
|
||||
u'file': u'aye83DjauH.mp4',
|
||||
|
@@ -8,7 +8,7 @@ from ..utils import (
|
||||
)
|
||||
|
||||
class JukeboxIE(InfoExtractor):
|
||||
_VALID_URL = r'^http://www\.jukebox?\..+?\/.+[,](?P<video_id>[a-z0-9\-]+).html'
|
||||
_VALID_URL = r'^http://www\.jukebox?\..+?\/.+[,](?P<video_id>[a-z0-9\-]+)\.html'
|
||||
_IFRAME = r'<iframe .*src="(?P<iframe>[^"]*)".*>'
|
||||
_VIDEO_URL = r'"config":{"file":"(?P<video_url>http:[^"]+[.](?P<video_ext>[^.?]+)[?]mdtk=[0-9]+)"'
|
||||
_TITLE = r'<h1 class="inline">(?P<title>[^<]+)</h1>.*<span id="infos_article_artist">(?P<artist>[^<]+)</span>'
|
||||
|
@@ -8,7 +8,7 @@ from ..utils import (
|
||||
|
||||
class LiveLeakIE(InfoExtractor):
|
||||
|
||||
_VALID_URL = r'^(?:http?://)?(?:\w+\.)?liveleak\.com/view\?(?:.*?)i=(?P<video_id>[\w_]+)(?:.*)'
|
||||
_VALID_URL = r'^(?:http://)?(?:\w+\.)?liveleak\.com/view\?(?:.*?)i=(?P<video_id>[\w_]+)(?:.*)'
|
||||
IE_NAME = u'liveleak'
|
||||
_TEST = {
|
||||
u'url': u'http://www.liveleak.com/view?i=757_1364311680',
|
||||
|
@@ -11,7 +11,7 @@ from ..utils import (
|
||||
|
||||
class LivestreamIE(InfoExtractor):
|
||||
IE_NAME = u'livestream'
|
||||
_VALID_URL = r'http://new.livestream.com/.*?/(?P<event_name>.*?)(/videos/(?P<id>\d+))?/?$'
|
||||
_VALID_URL = r'http://new\.livestream\.com/.*?/(?P<event_name>.*?)(/videos/(?P<id>\d+))?/?$'
|
||||
_TEST = {
|
||||
u'url': u'http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370',
|
||||
u'file': u'4719370.mp4',
|
||||
|
@@ -69,6 +69,21 @@ class MetacafeIE(InfoExtractor):
|
||||
u'age_limit': 18,
|
||||
},
|
||||
},
|
||||
# cbs video
|
||||
{
|
||||
u'url': u'http://www.metacafe.com/watch/cb-0rOxMBabDXN6/samsung_galaxy_note_2_samsungs_next_generation_phablet/',
|
||||
u'info_dict': {
|
||||
u'id': u'0rOxMBabDXN6',
|
||||
u'ext': u'flv',
|
||||
u'title': u'Samsung Galaxy Note 2: Samsung\'s next-generation phablet',
|
||||
u'description': u'md5:54d49fac53d26d5a0aaeccd061ada09d',
|
||||
u'duration': 129,
|
||||
},
|
||||
u'params': {
|
||||
# rtmp download
|
||||
u'skip_download': True,
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
@@ -106,10 +121,16 @@ class MetacafeIE(InfoExtractor):
|
||||
|
||||
video_id = mobj.group(1)
|
||||
|
||||
# Check if video comes from YouTube
|
||||
mobj2 = re.match(r'^yt-(.*)$', video_id)
|
||||
if mobj2 is not None:
|
||||
return [self.url_result('http://www.youtube.com/watch?v=%s' % mobj2.group(1), 'Youtube')]
|
||||
# the video may come from an external site
|
||||
m_external = re.match('^(\w{2})-(.*)$', video_id)
|
||||
if m_external is not None:
|
||||
prefix, ext_id = m_external.groups()
|
||||
# Check if video comes from YouTube
|
||||
if prefix == 'yt':
|
||||
return self.url_result('http://www.youtube.com/watch?v=%s' % ext_id, 'Youtube')
|
||||
# CBS videos use theplatform.com
|
||||
if prefix == 'cb':
|
||||
return self.url_result('theplatform:%s' % ext_id, 'ThePlatform')
|
||||
|
||||
# Retrieve video webpage to extract further information
|
||||
req = compat_urllib_request.Request('http://www.metacafe.com/watch/%s/' % video_id)
|
||||
|
@@ -9,7 +9,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class MuzuTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www.muzu.tv/(.+?)/(.+?)/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://www\.muzu\.tv/(.+?)/(.+?)/(?P<id>\d+)'
|
||||
IE_NAME = u'muzu.tv'
|
||||
|
||||
_TEST = {
|
||||
|
@@ -9,7 +9,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class MySpassIE(InfoExtractor):
|
||||
_VALID_URL = r'http://www.myspass.de/.*'
|
||||
_VALID_URL = r'http://www\.myspass\.de/.*'
|
||||
_TEST = {
|
||||
u'url': u'http://www.myspass.de/myspass/shows/tvshows/absolute-mehrheit/Absolute-Mehrheit-vom-17022013-Die-Highlights-Teil-2--/11741/',
|
||||
u'file': u'11741.mp4',
|
||||
|
43
youtube_dl/extractor/ninegag.py
Normal file
43
youtube_dl/extractor/ninegag.py
Normal file
@@ -0,0 +1,43 @@
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class NineGagIE(InfoExtractor):
|
||||
IE_NAME = '9gag'
|
||||
_VALID_URL = r'^https?://(?:www\.)?9gag\.tv/v/(?P<id>[0-9]+)'
|
||||
|
||||
_TEST = {
|
||||
u"url": u"http://9gag.tv/v/1912",
|
||||
u"file": u"1912.mp4",
|
||||
u"info_dict": {
|
||||
u"description": u"This 3-minute video will make you smile and then make you feel untalented and insignificant. Anyway, you should share this awesomeness. (Thanks, Dino!)",
|
||||
u"title": u"\"People Are Awesome 2013\" Is Absolutely Awesome"
|
||||
},
|
||||
u'add_ie': [u'Youtube']
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
data_json = self._html_search_regex(r'''(?x)
|
||||
<div\s*id="tv-video"\s*data-video-source="youtube"\s*
|
||||
data-video-meta="([^"]+)"''', webpage, u'video metadata')
|
||||
|
||||
data = json.loads(data_json)
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': data['youtubeVideoId'],
|
||||
'ie_key': 'Youtube',
|
||||
'id': video_id,
|
||||
'title': data['title'],
|
||||
'description': data['description'],
|
||||
'view_count': int(data['view_count']),
|
||||
'like_count': int(data['statistic']['like']),
|
||||
'dislike_count': int(data['statistic']['dislike']),
|
||||
'thumbnail': data['thumbnail_url'],
|
||||
}
|
@@ -12,7 +12,7 @@ from ..utils import (
|
||||
)
|
||||
|
||||
class ORFIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://tvthek.orf.at/(programs/.+?/episodes|topics/.+?)/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://tvthek\.orf\.at/(programs/.+?/episodes|topics/.+?)/(?P<id>\d+)'
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
@@ -5,7 +5,7 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class PBSIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://video.pbs.org/video/(?P<id>\d+)/?'
|
||||
_VALID_URL = r'https?://video\.pbs\.org/video/(?P<id>\d+)/?'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://video.pbs.org/video/2365006249/',
|
||||
|
51
youtube_dl/extractor/pyvideo.py
Normal file
51
youtube_dl/extractor/pyvideo.py
Normal file
@@ -0,0 +1,51 @@
|
||||
import re
|
||||
import os
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class PyvideoIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?pyvideo\.org/video/(?P<id>\d+)/(.*)'
|
||||
_TESTS = [{
|
||||
u'url': u'http://pyvideo.org/video/1737/become-a-logging-expert-in-30-minutes',
|
||||
u'file': u'24_4WWkSmNo.mp4',
|
||||
u'md5': u'de317418c8bc76b1fd8633e4f32acbc6',
|
||||
u'info_dict': {
|
||||
u"title": u"Become a logging expert in 30 minutes",
|
||||
u"description": u"md5:9665350d466c67fb5b1598de379021f7",
|
||||
u"upload_date": u"20130320",
|
||||
u"uploader": u"NextDayVideo",
|
||||
u"uploader_id": u"NextDayVideo",
|
||||
},
|
||||
u'add_ie': ['Youtube'],
|
||||
},
|
||||
{
|
||||
u'url': u'http://pyvideo.org/video/2542/gloriajw-spotifywitherikbernhardsson182m4v',
|
||||
u'md5': u'5fe1c7e0a8aa5570330784c847ff6d12',
|
||||
u'info_dict': {
|
||||
u'id': u'2542',
|
||||
u'ext': u'm4v',
|
||||
u'title': u'Gloriajw-SpotifyWithErikBernhardsson182',
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
m_youtube = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', webpage)
|
||||
|
||||
if m_youtube is not None:
|
||||
return self.url_result(m_youtube.group(1), 'Youtube')
|
||||
|
||||
title = self._html_search_regex(r'<div class="section">.*?<h3>([^>]+?)</h3>',
|
||||
webpage, u'title', flags=re.DOTALL)
|
||||
video_url = self._search_regex([r'<source src="(.*?)"',
|
||||
r'<dt>Download</dt>.*?<a href="(.+?)"'],
|
||||
webpage, u'video url', flags=re.DOTALL)
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': os.path.splitext(title)[0],
|
||||
'url': video_url,
|
||||
}
|
@@ -11,7 +11,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class RutubeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://rutube.ru/video/(?P<long_id>\w+)'
|
||||
_VALID_URL = r'https?://rutube\.ru/video/(?P<long_id>\w+)'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/',
|
||||
|
@@ -4,7 +4,7 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class SlashdotIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://tv.slashdot.org/video/\?embed=(?P<id>.*?)(&|$)'
|
||||
_VALID_URL = r'https?://tv\.slashdot\.org/video/\?embed=(?P<id>.*?)(&|$)'
|
||||
|
||||
_TEST = {
|
||||
u'add_ie': ['Ooyala'],
|
||||
|
@@ -6,7 +6,6 @@ import hashlib
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError
|
||||
)
|
||||
|
||||
|
@@ -25,7 +25,7 @@ class SoundcloudIE(InfoExtractor):
|
||||
_VALID_URL = r'''^(?:https?://)?
|
||||
(?:(?:(?:www\.)?soundcloud\.com/([\w\d-]+)/([\w\d-]+)/?(?:[?].*)?$)
|
||||
|(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+))
|
||||
|(?P<widget>w.soundcloud.com/player/?.*?url=.*)
|
||||
|(?P<widget>w\.soundcloud\.com/player/?.*?url=.*)
|
||||
)
|
||||
'''
|
||||
IE_NAME = u'soundcloud'
|
||||
@@ -217,7 +217,7 @@ class SoundcloudSetIE(SoundcloudIE):
|
||||
|
||||
|
||||
class SoundcloudUserIE(SoundcloudIE):
|
||||
_VALID_URL = r'https?://(www\.)?soundcloud.com/(?P<user>[^/]+)(/?(tracks/)?)?(\?.*)?$'
|
||||
_VALID_URL = r'https?://(www\.)?soundcloud\.com/(?P<user>[^/]+)(/?(tracks/)?)?(\?.*)?$'
|
||||
IE_NAME = u'soundcloud:user'
|
||||
|
||||
# it's in tests/test_playlists.py
|
||||
|
@@ -6,7 +6,7 @@ from ..utils import RegexNotFoundError, ExtractorError
|
||||
|
||||
|
||||
class SpaceIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.space\.com/\d+-(?P<title>[^/\.\?]*?)-video.html'
|
||||
_VALID_URL = r'https?://www\.space\.com/\d+-(?P<title>[^/\.\?]*?)-video\.html'
|
||||
_TEST = {
|
||||
u'add_ie': ['Brightcove'],
|
||||
u'url': u'http://www.space.com/23373-huge-martian-landforms-detail-revealed-by-european-probe-video.html',
|
||||
|
@@ -18,7 +18,7 @@ from ..utils import (
|
||||
class StanfordOpenClassroomIE(InfoExtractor):
|
||||
IE_NAME = u'stanfordoc'
|
||||
IE_DESC = u'Stanford Open ClassRoom'
|
||||
_VALID_URL = r'^(?:https?://)?openclassroom.stanford.edu(?P<path>/?|(/MainFolder/(?:HomePage|CoursePage|VideoPage)\.php([?]course=(?P<course>[^&]+)(&video=(?P<video>[^&]+))?(&.*)?)?))$'
|
||||
_VALID_URL = r'^(?:https?://)?openclassroom\.stanford\.edu(?P<path>/?|(/MainFolder/(?:HomePage|CoursePage|VideoPage)\.php([?]course=(?P<course>[^&]+)(&video=(?P<video>[^&]+))?(&.*)?)?))$'
|
||||
_TEST = {
|
||||
u'url': u'http://openclassroom.stanford.edu/MainFolder/VideoPage.php?course=PracticalUnix&video=intro-environment&speed=100',
|
||||
u'file': u'PracticalUnix_intro-environment.mp4',
|
||||
|
@@ -7,7 +7,7 @@ from .common import InfoExtractor
|
||||
|
||||
class TF1IE(InfoExtractor):
|
||||
"""TF1 uses the wat.tv player."""
|
||||
_VALID_URL = r'http://videos.tf1.fr/.*-(.*?).html'
|
||||
_VALID_URL = r'http://videos\.tf1\.fr/.*-(.*?)\.html'
|
||||
_TEST = {
|
||||
u'url': u'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html',
|
||||
u'file': u'10635995.mp4',
|
||||
|
68
youtube_dl/extractor/theplatform.py
Normal file
68
youtube_dl/extractor/theplatform.py
Normal file
@@ -0,0 +1,68 @@
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
xpath_with_ns,
|
||||
)
|
||||
|
||||
_x = lambda p: xpath_with_ns(p, {'smil': 'http://www.w3.org/2005/SMIL21/Language'})
|
||||
|
||||
|
||||
class ThePlatformIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:https?://link\.theplatform\.com/s/[^/]+/|theplatform:)(?P<id>[^/\?]+)'
|
||||
|
||||
_TEST = {
|
||||
# from http://www.metacafe.com/watch/cb-e9I_cZgTgIPd/blackberrys_big_bold_z30/
|
||||
u'url': u'http://link.theplatform.com/s/dJ5BDC/e9I_cZgTgIPd/meta.smil?format=smil&Tracking=true&mbr=true',
|
||||
u'info_dict': {
|
||||
u'id': u'e9I_cZgTgIPd',
|
||||
u'ext': u'flv',
|
||||
u'title': u'Blackberry\'s big, bold Z30',
|
||||
u'description': u'The Z30 is Blackberry\'s biggest, baddest mobile messaging device yet.',
|
||||
u'duration': 247,
|
||||
},
|
||||
u'params': {
|
||||
# rtmp download
|
||||
u'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _get_info(self, video_id):
|
||||
smil_url = ('http://link.theplatform.com/s/dJ5BDC/{0}/meta.smil?'
|
||||
'format=smil&mbr=true'.format(video_id))
|
||||
meta = self._download_xml(smil_url, video_id)
|
||||
info_url = 'http://link.theplatform.com/s/dJ5BDC/{0}?format=preview'.format(video_id)
|
||||
info_json = self._download_webpage(info_url, video_id)
|
||||
info = json.loads(info_json)
|
||||
|
||||
head = meta.find(_x('smil:head'))
|
||||
body = meta.find(_x('smil:body'))
|
||||
base_url = head.find(_x('smil:meta')).attrib['base']
|
||||
switch = body.find(_x('smil:switch'))
|
||||
formats = []
|
||||
for f in switch.findall(_x('smil:video')):
|
||||
attr = f.attrib
|
||||
formats.append({
|
||||
'url': base_url,
|
||||
'play_path': 'mp4:' + attr['src'],
|
||||
'ext': 'flv',
|
||||
'width': int(attr['width']),
|
||||
'height': int(attr['height']),
|
||||
'vbr': int(attr['system-bitrate']),
|
||||
})
|
||||
formats.sort(key=lambda f: (f['height'], f['width'], f['vbr']))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': info['title'],
|
||||
'formats': formats,
|
||||
'description': info['description'],
|
||||
'thumbnail': info['defaultThumbnailUrl'],
|
||||
'duration': info['duration']//1000,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
return self._get_info(video_id)
|
@@ -3,7 +3,7 @@ import re
|
||||
from .common import InfoExtractor
|
||||
|
||||
class UnistraIE(InfoExtractor):
|
||||
_VALID_URL = r'http://utv.unistra.fr/(?:index|video).php\?id_video\=(\d+)'
|
||||
_VALID_URL = r'http://utv\.unistra\.fr/(?:index|video)\.php\?id_video\=(\d+)'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://utv.unistra.fr/video.php?id_video=154',
|
||||
|
@@ -9,7 +9,7 @@ from ..utils import (
|
||||
)
|
||||
|
||||
class VeeHDIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://veehd.com/video/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://veehd\.com/video/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://veehd.com/video/4686958',
|
||||
|
@@ -15,7 +15,7 @@ class VevoIE(InfoExtractor):
|
||||
Accepts urls from vevo.com or in the format 'vevo:{id}'
|
||||
(currently used by MTVIE)
|
||||
"""
|
||||
_VALID_URL = r'((http://www.vevo.com/watch/.*?/.*?/)|(vevo:))(?P<id>.*?)(\?|$)'
|
||||
_VALID_URL = r'((http://www\.vevo\.com/watch/(?:[^/]+/[^/]+/)?)|(vevo:))(?P<id>.*?)(\?|$)'
|
||||
_TESTS = [{
|
||||
u'url': u'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
|
||||
u'file': u'GB1101300280.mp4',
|
||||
@@ -24,7 +24,7 @@ class VevoIE(InfoExtractor):
|
||||
u"upload_date": u"20130624",
|
||||
u"uploader": u"Hurts",
|
||||
u"title": u"Somebody to Die For",
|
||||
u"duration": 230,
|
||||
u"duration": 230.12,
|
||||
u"width": 1920,
|
||||
u"height": 1080,
|
||||
}
|
||||
|
@@ -6,7 +6,7 @@ from ..utils import ExtractorError
|
||||
|
||||
|
||||
class ViceIE(InfoExtractor):
|
||||
_VALID_URL = r'http://www.vice.com/.*?/(?P<name>.+)'
|
||||
_VALID_URL = r'http://www\.vice\.com/.*?/(?P<name>.+)'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://www.vice.com/Fringes/cowboy-capitalists-part-1',
|
||||
|
@@ -2,13 +2,10 @@ import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
)
|
||||
|
||||
|
||||
class ViddlerIE(InfoExtractor):
|
||||
_VALID_URL = r'(?P<domain>https?://(?:www\.)?viddler.com)/(?:v|embed|player)/(?P<id>[a-z0-9]+)'
|
||||
_VALID_URL = r'(?P<domain>https?://(?:www\.)?viddler\.com)/(?:v|embed|player)/(?P<id>[a-z0-9]+)'
|
||||
_TEST = {
|
||||
u"url": u"http://www.viddler.com/v/43903784",
|
||||
u'file': u'43903784.mp4',
|
||||
|
@@ -7,7 +7,7 @@ from ..utils import (
|
||||
)
|
||||
|
||||
class VideofyMeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(www.videofy.me/.+?|p.videofy.me/v)/(?P<id>\d+)(&|#|$)'
|
||||
_VALID_URL = r'https?://(www\.videofy\.me/.+?|p\.videofy\.me/v)/(?P<id>\d+)(&|#|$)'
|
||||
IE_NAME = u'videofy.me'
|
||||
|
||||
_TEST = {
|
||||
|
@@ -20,7 +20,7 @@ class VimeoIE(InfoExtractor):
|
||||
"""Information extractor for vimeo.com."""
|
||||
|
||||
# _VALID_URL matches Vimeo URLs
|
||||
_VALID_URL = r'(?P<proto>https?://)?(?:(?:www|(?P<player>player))\.)?vimeo(?P<pro>pro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)/?(?:[?].*)?(?:#.*)?$'
|
||||
_VALID_URL = r'(?P<proto>https?://)?(?:(?:www|(?P<player>player))\.)?vimeo(?P<pro>pro)?\.com/(?:.*?/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)/?(?:[?].*)?(?:#.*)?$'
|
||||
_NETRC_MACHINE = 'vimeo'
|
||||
IE_NAME = u'vimeo'
|
||||
_TESTS = [
|
||||
@@ -196,6 +196,16 @@ class VimeoIE(InfoExtractor):
|
||||
if mobj is not None:
|
||||
video_upload_date = mobj.group(1) + mobj.group(2) + mobj.group(3)
|
||||
|
||||
try:
|
||||
view_count = int(self._search_regex(r'UserPlays:(\d+)', webpage, u'view count'))
|
||||
like_count = int(self._search_regex(r'UserLikes:(\d+)', webpage, u'like count'))
|
||||
comment_count = int(self._search_regex(r'UserComments:(\d+)', webpage, u'comment count'))
|
||||
except RegexNotFoundError:
|
||||
# This info is only available in vimeo.com/{id} urls
|
||||
view_count = None
|
||||
like_count = None
|
||||
comment_count = None
|
||||
|
||||
# Vimeo specific: extract request signature and timestamp
|
||||
sig = config['request']['signature']
|
||||
timestamp = config['request']['timestamp']
|
||||
@@ -242,6 +252,9 @@ class VimeoIE(InfoExtractor):
|
||||
'description': video_description,
|
||||
'formats': formats,
|
||||
'webpage_url': url,
|
||||
'view_count': view_count,
|
||||
'like_count': like_count,
|
||||
'comment_count': comment_count,
|
||||
}
|
||||
|
||||
|
||||
@@ -251,11 +264,17 @@ class VimeoChannelIE(InfoExtractor):
|
||||
_MORE_PAGES_INDICATOR = r'<a.+?rel="next"'
|
||||
_TITLE_RE = r'<link rel="alternate"[^>]+?title="(.*?)"'
|
||||
|
||||
def _page_url(self, base_url, pagenum):
|
||||
return '%s/videos/page:%d/' % (base_url, pagenum)
|
||||
|
||||
def _extract_list_title(self, webpage):
|
||||
return self._html_search_regex(self._TITLE_RE, webpage, u'list title')
|
||||
|
||||
def _extract_videos(self, list_id, base_url):
|
||||
video_ids = []
|
||||
for pagenum in itertools.count(1):
|
||||
webpage = self._download_webpage(
|
||||
'%s/videos/page:%d/' % (base_url, pagenum),list_id,
|
||||
self._page_url(base_url, pagenum) ,list_id,
|
||||
u'Downloading page %s' % pagenum)
|
||||
video_ids.extend(re.findall(r'id="clip_(\d+?)"', webpage))
|
||||
if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
|
||||
@@ -263,11 +282,9 @@ class VimeoChannelIE(InfoExtractor):
|
||||
|
||||
entries = [self.url_result('http://vimeo.com/%s' % video_id, 'Vimeo')
|
||||
for video_id in video_ids]
|
||||
list_title = self._html_search_regex(self._TITLE_RE, webpage,
|
||||
u'list title')
|
||||
return {'_type': 'playlist',
|
||||
'id': list_id,
|
||||
'title': list_title,
|
||||
'title': self._extract_list_title(webpage),
|
||||
'entries': entries,
|
||||
}
|
||||
|
||||
@@ -284,7 +301,7 @@ class VimeoUserIE(VimeoChannelIE):
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
if VimeoChannelIE.suitable(url) or VimeoIE.suitable(url):
|
||||
if VimeoChannelIE.suitable(url) or VimeoIE.suitable(url) or VimeoAlbumIE.suitable(url) or VimeoGroupsIE.suitable(url):
|
||||
return False
|
||||
return super(VimeoUserIE, cls).suitable(url)
|
||||
|
||||
@@ -292,3 +309,30 @@ class VimeoUserIE(VimeoChannelIE):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
name = mobj.group('name')
|
||||
return self._extract_videos(name, 'http://vimeo.com/%s' % name)
|
||||
|
||||
|
||||
class VimeoAlbumIE(VimeoChannelIE):
|
||||
IE_NAME = u'vimeo:album'
|
||||
_VALID_URL = r'(?:https?://)?vimeo.\com/album/(?P<id>\d+)'
|
||||
_TITLE_RE = r'<header id="page_header">\n\s*<h1>(.*?)</h1>'
|
||||
|
||||
def _page_url(self, base_url, pagenum):
|
||||
return '%s/page:%d/' % (base_url, pagenum)
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
album_id = mobj.group('id')
|
||||
return self._extract_videos(album_id, 'http://vimeo.com/album/%s' % album_id)
|
||||
|
||||
|
||||
class VimeoGroupsIE(VimeoAlbumIE):
|
||||
IE_NAME = u'vimeo:group'
|
||||
_VALID_URL = r'(?:https?://)?vimeo.\com/groups/(?P<name>[^/]+)'
|
||||
|
||||
def _extract_list_title(self, webpage):
|
||||
return self._og_search_title(webpage)
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
name = mobj.group('name')
|
||||
return self._extract_videos(name, 'http://vimeo.com/groups/%s' % name)
|
||||
|
@@ -11,7 +11,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class WatIE(InfoExtractor):
|
||||
_VALID_URL=r'http://www.wat.tv/.*-(?P<shortID>.*?)_.*?.html'
|
||||
_VALID_URL=r'http://www\.wat\.tv/.*-(?P<shortID>.*?)_.*?\.html'
|
||||
IE_NAME = 'wat.tv'
|
||||
_TEST = {
|
||||
u'url': u'http://www.wat.tv/video/world-war-philadelphia-vost-6bv55_2fjr7_.html',
|
||||
|
55
youtube_dl/extractor/wistia.py
Normal file
55
youtube_dl/extractor/wistia.py
Normal file
@@ -0,0 +1,55 @@
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class WistiaIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://(?:fast\.)?wistia\.net/embed/iframe/(?P<id>[a-z0-9]+)'
|
||||
|
||||
_TEST = {
|
||||
u"url": u"http://fast.wistia.net/embed/iframe/sh7fpupwlt",
|
||||
u"file": u"sh7fpupwlt.mov",
|
||||
u"md5": u"cafeb56ec0c53c18c97405eecb3133df",
|
||||
u"info_dict": {
|
||||
u"title": u"cfh_resourceful_zdkh_final_1"
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
data_json = self._html_search_regex(
|
||||
r'Wistia.iframeInit\((.*?), {}\);', webpage, u'video data')
|
||||
|
||||
data = json.loads(data_json)
|
||||
|
||||
formats = []
|
||||
thumbnails = []
|
||||
for atype, a in data['assets'].items():
|
||||
if atype == 'still':
|
||||
thumbnails.append({
|
||||
'url': a['url'],
|
||||
'resolution': '%dx%d' % (a['width'], a['height']),
|
||||
})
|
||||
continue
|
||||
if atype == 'preview':
|
||||
continue
|
||||
formats.append({
|
||||
'format_id': atype,
|
||||
'url': a['url'],
|
||||
'width': a['width'],
|
||||
'height': a['height'],
|
||||
'filesize': a['size'],
|
||||
'ext': a['ext'],
|
||||
})
|
||||
formats.sort(key=lambda a: a['filesize'])
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': data['name'],
|
||||
'formats': formats,
|
||||
'thumbnails': thumbnails,
|
||||
}
|
@@ -26,7 +26,7 @@ class XHamsterIE(InfoExtractor):
|
||||
{
|
||||
u'url': u'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd',
|
||||
u'file': u'2221348.flv',
|
||||
u'md5': u'970a94178ca4118c5aa3aaea21211b81',
|
||||
u'md5': u'e767b9475de189320f691f49c679c4c7',
|
||||
u'info_dict': {
|
||||
u"upload_date": u"20130914",
|
||||
u"uploader_id": u"jojo747400",
|
||||
@@ -46,7 +46,7 @@ class XHamsterIE(InfoExtractor):
|
||||
return mobj.group('server')+'/key='+mobj.group('file')
|
||||
|
||||
def is_hd(webpage):
|
||||
return webpage.find('<div class=\'icon iconHD\'>') != -1
|
||||
return webpage.find('<div class=\'icon iconHD\'') != -1
|
||||
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
||||
|
@@ -47,7 +47,7 @@ class YahooIE(InfoExtractor):
|
||||
# The 'meta' field is not always in the video webpage, we request it
|
||||
# from another page
|
||||
long_id = info['id']
|
||||
return self._get_info(info['id'], video_id)
|
||||
return self._get_info(long_id, video_id)
|
||||
|
||||
def _get_info(self, long_id, video_id):
|
||||
query = ('SELECT * FROM yahoo.media.video.streams WHERE id="%s"'
|
||||
|
@@ -7,7 +7,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class YouJizzIE(InfoExtractor):
|
||||
_VALID_URL = r'^(?:https?://)?(?:\w+\.)?youjizz\.com/videos/(?P<videoid>[^.]+).html$'
|
||||
_VALID_URL = r'^(?:https?://)?(?:\w+\.)?youjizz\.com/videos/(?P<videoid>[^.]+)\.html$'
|
||||
_TEST = {
|
||||
u'url': u'http://www.youjizz.com/videos/zeichentrick-1-2189178.html',
|
||||
u'file': u'2189178.flv',
|
||||
|
@@ -388,10 +388,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
super(YoutubeIE, self).__init__(*args, **kwargs)
|
||||
self._player_cache = {}
|
||||
|
||||
def report_video_webpage_download(self, video_id):
|
||||
"""Report attempt to download video webpage."""
|
||||
self.to_screen(u'%s: Downloading video webpage' % video_id)
|
||||
|
||||
def report_video_info_webpage_download(self, video_id):
|
||||
"""Report attempt to download video info webpage."""
|
||||
self.to_screen(u'%s: Downloading video info webpage' % video_id)
|
||||
@@ -1258,15 +1254,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
video_id = self._extract_id(url)
|
||||
|
||||
# Get video webpage
|
||||
self.report_video_webpage_download(video_id)
|
||||
url = 'https://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
|
||||
request = compat_urllib_request.Request(url)
|
||||
try:
|
||||
video_webpage_bytes = compat_urllib_request.urlopen(request).read()
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
raise ExtractorError(u'Unable to download video webpage: %s' % compat_str(err))
|
||||
|
||||
video_webpage = video_webpage_bytes.decode('utf-8', 'ignore')
|
||||
video_webpage = self._download_webpage(url, video_id)
|
||||
|
||||
# Attempt to extract SWF player URL
|
||||
mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
|
||||
@@ -1383,6 +1372,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
else:
|
||||
video_description = u''
|
||||
|
||||
def _extract_count(klass):
|
||||
count = self._search_regex(r'class="%s">([\d,]+)</span>' % re.escape(klass), video_webpage, klass, fatal=False)
|
||||
if count is not None:
|
||||
return int(count.replace(',', ''))
|
||||
return None
|
||||
like_count = _extract_count(u'likes-count')
|
||||
dislike_count = _extract_count(u'dislikes-count')
|
||||
|
||||
# subtitles
|
||||
video_subtitles = self.extract_subtitles(video_id, video_webpage)
|
||||
|
||||
@@ -1515,6 +1512,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
'annotations': video_annotations,
|
||||
'webpage_url': 'https://www.youtube.com/watch?v=%s' % video_id,
|
||||
'view_count': view_count,
|
||||
'like_count': like_count,
|
||||
'dislike_count': dislike_count,
|
||||
})
|
||||
return results
|
||||
|
||||
@@ -1529,10 +1528,10 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
||||
\? (?:.*?&)*? (?:p|a|list)=
|
||||
| p/
|
||||
)
|
||||
((?:PL|EC|UU|FL)?[0-9A-Za-z-_]{10,})
|
||||
((?:PL|EC|UU|FL|RD)?[0-9A-Za-z-_]{10,})
|
||||
.*
|
||||
|
|
||||
((?:PL|EC|UU|FL)[0-9A-Za-z-_]{10,})
|
||||
((?:PL|EC|UU|FL|RD)[0-9A-Za-z-_]{10,})
|
||||
)"""
|
||||
_TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&page=%s'
|
||||
_MORE_PAGES_INDICATOR = r'data-link-type="next"'
|
||||
@@ -1554,7 +1553,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
||||
def _extract_mix(self, playlist_id):
|
||||
# The mixes are generated from a a single video
|
||||
# the id of the playlist is just 'RD' + video_id
|
||||
url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[2:], playlist_id)
|
||||
url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[-11:], playlist_id)
|
||||
webpage = self._download_webpage(url, playlist_id, u'Downloading Youtube mix')
|
||||
title_span = (get_element_by_attribute('class', 'title long-title', webpage) or
|
||||
get_element_by_attribute('class', 'title ', webpage))
|
||||
@@ -1582,7 +1581,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
||||
else:
|
||||
self.to_screen(u'Downloading playlist PL%s - add --no-playlist to just download video %s' % (playlist_id, video_id))
|
||||
|
||||
if len(playlist_id) == 13: # 'RD' + 11 characters for the video id
|
||||
if playlist_id.startswith('RD'):
|
||||
# Mixes require a custom extraction process
|
||||
return self._extract_mix(playlist_id)
|
||||
|
||||
|
@@ -17,7 +17,6 @@ import ssl
|
||||
import socket
|
||||
import sys
|
||||
import traceback
|
||||
import xml.etree.ElementTree
|
||||
import zlib
|
||||
|
||||
try:
|
||||
@@ -562,11 +561,14 @@ def make_HTTPS_handler(opts_no_check_certificate):
|
||||
return HTTPSHandlerV3()
|
||||
else:
|
||||
context = ssl.SSLContext(ssl.PROTOCOL_SSLv3)
|
||||
context.set_default_verify_paths()
|
||||
|
||||
context.verify_mode = (ssl.CERT_NONE
|
||||
if opts_no_check_certificate
|
||||
else ssl.CERT_REQUIRED)
|
||||
context.set_default_verify_paths()
|
||||
try:
|
||||
context.load_default_certs()
|
||||
except AttributeError:
|
||||
pass # Python < 3.4
|
||||
return compat_urllib_request.HTTPSHandler(context=context)
|
||||
|
||||
class ExtractorError(Exception):
|
||||
@@ -1021,3 +1023,7 @@ def format_bytes(bytes):
|
||||
suffix = [u'B', u'KiB', u'MiB', u'GiB', u'TiB', u'PiB', u'EiB', u'ZiB', u'YiB'][exponent]
|
||||
converted = float(bytes) / float(1024 ** exponent)
|
||||
return u'%.2f%s' % (converted, suffix)
|
||||
|
||||
def str_to_int(int_str):
|
||||
int_str = re.sub(r'[,\.]', u'', int_str)
|
||||
return int(int_str)
|
||||
|
@@ -1,2 +1,2 @@
|
||||
|
||||
__version__ = '2013.12.04'
|
||||
__version__ = '2013.12.08'
|
||||
|
Reference in New Issue
Block a user