mirror of
https://github.com/ytdl-org/youtube-dl.git
synced 2025-07-21 21:01:59 -05:00
Compare commits
34 Commits
2015.02.18
...
2015.02.19
Author | SHA1 | Date | |
---|---|---|---|
a21420389e | |||
6140baf4e1 | |||
8fc642eb5b | |||
e66e1a0046 | |||
d5c69f1da4 | |||
5c8a3f862a | |||
a3b9157f49 | |||
b88ba05356 | |||
b74d505577 | |||
9e2d7dca87 | |||
d236b37ac9 | |||
e880c66bd8 | |||
383456aa29 | |||
1a13940c8d | |||
3d54788495 | |||
71d53ace2f | |||
f37e3f99f0 | |||
bd03ffc16e | |||
1ac1af9b47 | |||
3bf5705316 | |||
1c2528c8a3 | |||
7bd15b1a03 | |||
6b961a85fd | |||
7707004043 | |||
a025d3c5a5 | |||
c460bdd56b | |||
b81a359eb6 | |||
d61aefb24c | |||
d305dd73a3 | |||
93a16ba238 | |||
85d5866177 | |||
9789d7535d | |||
d8443cd3f7 | |||
d47c26e168 |
2
Makefile
2
Makefile
@ -1,7 +1,7 @@
|
||||
all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites
|
||||
|
||||
clean:
|
||||
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json *.mp4 *.flv *.mp3 CONTRIBUTING.md.tmp youtube-dl youtube-dl.exe
|
||||
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi CONTRIBUTING.md.tmp youtube-dl youtube-dl.exe
|
||||
|
||||
PREFIX ?= /usr/local
|
||||
BINDIR ?= $(PREFIX)/bin
|
||||
|
@ -68,6 +68,7 @@
|
||||
- **Canalplus**: canalplus.fr, piwiplus.fr and d8.tv
|
||||
- **CBS**
|
||||
- **CBSNews**: CBS News
|
||||
- **CBSSports**
|
||||
- **CeskaTelevize**
|
||||
- **channel9**: Channel 9
|
||||
- **Chilloutzone**
|
||||
@ -121,6 +122,7 @@
|
||||
- **EllenTV**
|
||||
- **EllenTV:clips**
|
||||
- **ElPais**: El País
|
||||
- **Embedly**
|
||||
- **EMPFlix**
|
||||
- **Engadget**
|
||||
- **Eporner**
|
||||
@ -190,6 +192,7 @@
|
||||
- **ign.com**
|
||||
- **imdb**: Internet Movie Database trailers
|
||||
- **imdb:list**: Internet Movie Database lists
|
||||
- **Imgur**
|
||||
- **Ina**
|
||||
- **InfoQ**
|
||||
- **Instagram**
|
||||
@ -262,6 +265,7 @@
|
||||
- **myvideo**
|
||||
- **MyVidster**
|
||||
- **n-tv.de**
|
||||
- **NationalGeographic**
|
||||
- **Naver**
|
||||
- **NBA**
|
||||
- **NBC**
|
||||
@ -319,6 +323,7 @@
|
||||
- **podomatic**
|
||||
- **PornHd**
|
||||
- **PornHub**
|
||||
- **PornHubPlaylist**
|
||||
- **Pornotube**
|
||||
- **PornoXO**
|
||||
- **PromptFile**
|
||||
@ -352,6 +357,7 @@
|
||||
- **rutube:movie**: Rutube movies
|
||||
- **rutube:person**: Rutube person videos
|
||||
- **RUTV**: RUTV.RU
|
||||
- **Sandia**: Sandia National Laboratories
|
||||
- **Sapo**: SAPO Vídeos
|
||||
- **savefrom.net**
|
||||
- **SBS**: sbs.com.au
|
||||
|
@ -113,6 +113,16 @@ def expect_info_dict(self, got_dict, expected_dict):
|
||||
self.assertTrue(
|
||||
got.startswith(start_str),
|
||||
'field %s (value: %r) should start with %r' % (info_field, got, start_str))
|
||||
elif isinstance(expected, compat_str) and expected.startswith('contains:'):
|
||||
got = got_dict.get(info_field)
|
||||
contains_str = expected[len('contains:'):]
|
||||
self.assertTrue(
|
||||
isinstance(got, compat_str),
|
||||
'Expected a %s object, but got %s for field %s' % (
|
||||
compat_str.__name__, type(got).__name__, info_field))
|
||||
self.assertTrue(
|
||||
contains_str in got,
|
||||
'field %s (value: %r) should contain %r' % (info_field, got, contains_str))
|
||||
elif isinstance(expected, type):
|
||||
got = got_dict.get(info_field)
|
||||
self.assertTrue(isinstance(got, expected),
|
||||
|
@ -370,6 +370,10 @@ class TestUtil(unittest.TestCase):
|
||||
"playlist":[{"controls":{"all":null}}]
|
||||
}''')
|
||||
|
||||
inp = '"SAND Number: SAND 2013-7800P\\nPresenter: Tom Russo\\nHabanero Software Training - Xyce Software\\nXyce, Sandia\\u0027s"'
|
||||
json_code = js_to_json(inp)
|
||||
self.assertEqual(json.loads(json_code), json.loads(inp))
|
||||
|
||||
def test_js_to_json_edgecases(self):
|
||||
on = js_to_json("{abc_def:'1\\'\\\\2\\\\\\'3\"4'}")
|
||||
self.assertEqual(json.loads(on), {"abc_def": "1'\\2\\'3\"4"})
|
||||
|
@ -1534,29 +1534,18 @@ class YoutubeDL(object):
|
||||
return res
|
||||
|
||||
def list_formats(self, info_dict):
|
||||
def line(format, idlen=20):
|
||||
return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
|
||||
format['format_id'],
|
||||
format['ext'],
|
||||
self.format_resolution(format),
|
||||
self._format_note(format),
|
||||
))
|
||||
|
||||
formats = info_dict.get('formats', [info_dict])
|
||||
idlen = max(len('format code'),
|
||||
max(len(f['format_id']) for f in formats))
|
||||
formats_s = [
|
||||
line(f, idlen) for f in formats
|
||||
table = [
|
||||
[f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
|
||||
for f in formats
|
||||
if f.get('preference') is None or f['preference'] >= -1000]
|
||||
if len(formats) > 1:
|
||||
formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
|
||||
table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
|
||||
|
||||
header_line = line({
|
||||
'format_id': 'format code', 'ext': 'extension',
|
||||
'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
|
||||
header_line = ['format code', 'extension', 'resolution', 'note']
|
||||
self.to_screen(
|
||||
'[info] Available formats for %s:\n%s\n%s' %
|
||||
(info_dict['id'], header_line, '\n'.join(formats_s)))
|
||||
'[info] Available formats for %s:\n%s' %
|
||||
(info_dict['id'], render_table(header_line, table)))
|
||||
|
||||
def list_thumbnails(self, info_dict):
|
||||
thumbnails = info_dict.get('thumbnails')
|
||||
|
@ -58,6 +58,7 @@ from .canalplus import CanalplusIE
|
||||
from .canalc2 import Canalc2IE
|
||||
from .cbs import CBSIE
|
||||
from .cbsnews import CBSNewsIE
|
||||
from .cbssports import CBSSportsIE
|
||||
from .ccc import CCCIE
|
||||
from .ceskatelevize import CeskaTelevizeIE
|
||||
from .channel9 import Channel9IE
|
||||
@ -121,6 +122,7 @@ from .ellentv import (
|
||||
EllenTVClipsIE,
|
||||
)
|
||||
from .elpais import ElPaisIE
|
||||
from .embedly import EmbedlyIE
|
||||
from .empflix import EMPFlixIE
|
||||
from .engadget import EngadgetIE
|
||||
from .eporner import EpornerIE
|
||||
@ -204,6 +206,7 @@ from .imdb import (
|
||||
ImdbIE,
|
||||
ImdbListIE
|
||||
)
|
||||
from .imgur import ImgurIE
|
||||
from .ina import InaIE
|
||||
from .infoq import InfoQIE
|
||||
from .instagram import InstagramIE, InstagramUserIE
|
||||
@ -282,6 +285,7 @@ from .myspace import MySpaceIE, MySpaceAlbumIE
|
||||
from .myspass import MySpassIE
|
||||
from .myvideo import MyVideoIE
|
||||
from .myvidster import MyVidsterIE
|
||||
from .nationalgeographic import NationalGeographicIE
|
||||
from .naver import NaverIE
|
||||
from .nba import NBAIE
|
||||
from .nbc import (
|
||||
@ -350,7 +354,10 @@ from .playfm import PlayFMIE
|
||||
from .playvid import PlayvidIE
|
||||
from .podomatic import PodomaticIE
|
||||
from .pornhd import PornHdIE
|
||||
from .pornhub import PornHubIE
|
||||
from .pornhub import (
|
||||
PornHubIE,
|
||||
PornHubPlaylistIE,
|
||||
)
|
||||
from .pornotube import PornotubeIE
|
||||
from .pornoxo import PornoXOIE
|
||||
from .promptfile import PromptFileIE
|
||||
@ -386,6 +393,7 @@ from .rutube import (
|
||||
RutubePersonIE,
|
||||
)
|
||||
from .rutv import RUTVIE
|
||||
from .sandia import SandiaIE
|
||||
from .sapo import SapoIE
|
||||
from .savefrom import SaveFromIE
|
||||
from .sbs import SBSIE
|
||||
|
@ -95,6 +95,7 @@ class BrightcoveIE(InfoExtractor):
|
||||
'url': 'http://c.brightcove.com/services/viewer/htmlFederated?playerID=3550052898001&playerKey=AQ%7E%7E%2CAAABmA9XpXk%7E%2C-Kp7jNgisre1fG5OdqpAFUTcs0lP_ZoL',
|
||||
'info_dict': {
|
||||
'title': 'Sealife',
|
||||
'id': '3550319591001',
|
||||
},
|
||||
'playlist_mincount': 7,
|
||||
},
|
||||
@ -247,7 +248,7 @@ class BrightcoveIE(InfoExtractor):
|
||||
playlist_info = json_data['videoList']
|
||||
videos = [self._extract_video_info(video_info) for video_info in playlist_info['mediaCollectionDTO']['videoDTOs']]
|
||||
|
||||
return self.playlist_result(videos, playlist_id=playlist_info['id'],
|
||||
return self.playlist_result(videos, playlist_id='%s' % playlist_info['id'],
|
||||
playlist_title=playlist_info['mediaCollectionDTO']['displayName'])
|
||||
|
||||
def _extract_video_info(self, video_info):
|
||||
|
@ -1,7 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
@ -39,8 +37,7 @@ class CBSIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
real_id = self._search_regex(
|
||||
r"video\.settings\.pid\s*=\s*'([^']+)';",
|
||||
|
30
youtube_dl/extractor/cbssports.py
Normal file
30
youtube_dl/extractor/cbssports.py
Normal file
@ -0,0 +1,30 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class CBSSportsIE(InfoExtractor):
|
||||
_VALID_URL = r'http://www\.cbssports\.com/video/player/(?P<section>[^/]+)/(?P<id>[^/]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.cbssports.com/video/player/tennis/318462531970/0/us-open-flashbacks-1990s',
|
||||
'info_dict': {
|
||||
'id': '_d5_GbO8p1sT',
|
||||
'ext': 'flv',
|
||||
'title': 'US Open flashbacks: 1990s',
|
||||
'description': 'Bill Macatee relives the best moments in US Open history from the 1990s.',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
section = mobj.group('section')
|
||||
video_id = mobj.group('id')
|
||||
all_videos = self._download_json(
|
||||
'http://www.cbssports.com/data/video/player/getVideos/%s?as=json' % section,
|
||||
video_id)
|
||||
# The json file contains the info of all the videos in the section
|
||||
video_info = next(v for v in all_videos if v['pcid'] == video_id)
|
||||
return self.url_result('theplatform:%s' % video_info['pid'], 'ThePlatform')
|
16
youtube_dl/extractor/embedly.py
Normal file
16
youtube_dl/extractor/embedly.py
Normal file
@ -0,0 +1,16 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
|
||||
|
||||
class EmbedlyIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www|cdn\.)?embedly\.com/widgets/media\.html\?(?:[^#]*?&)?url=(?P<id>[^#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://cdn.embedly.com/widgets/media.html?src=http%3A%2F%2Fwww.youtube.com%2Fembed%2Fvideoseries%3Flist%3DUUGLim4T2loE5rwCMdpCIPVg&url=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DSU4fj_aEMVw%26list%3DUUGLim4T2loE5rwCMdpCIPVg&image=http%3A%2F%2Fi.ytimg.com%2Fvi%2FSU4fj_aEMVw%2Fhqdefault.jpg&key=8ee8a2e6a8cc47aab1a5ee67f9a178e0&type=text%2Fhtml&schema=youtube&autoplay=1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
return self.url_result(compat_urllib_parse_unquote(self._match_id(url)))
|
@ -14,6 +14,7 @@ class FiveMinIE(InfoExtractor):
|
||||
IE_NAME = '5min'
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:https?://[^/]*?5min\.com/Scripts/PlayerSeed\.js\?(?:.*?&)?playList=|
|
||||
https?://(?:(?:massively|www)\.)?joystiq\.com/video/|
|
||||
5min:)
|
||||
(?P<id>\d+)
|
||||
'''
|
||||
|
@ -532,7 +532,7 @@ class GenericIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': 'Mrj4DVp2zeA',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20150204',
|
||||
'upload_date': '20150212',
|
||||
'uploader': 'The National Archives UK',
|
||||
'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
|
||||
'uploader_id': 'NationalArchives08',
|
||||
|
97
youtube_dl/extractor/imgur.py
Normal file
97
youtube_dl/extractor/imgur.py
Normal file
@ -0,0 +1,97 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
mimetype2ext,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class ImgurIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?P<id>[a-zA-Z0-9]+)(?:\.mp4|\.gifv)?'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://i.imgur.com/A61SaA1.gifv',
|
||||
'info_dict': {
|
||||
'id': 'A61SaA1',
|
||||
'ext': 'mp4',
|
||||
'title': 'MRW gifv is up and running without any bugs',
|
||||
'description': 'The Internet\'s visual storytelling community. Explore, share, and discuss the best visual stories the Internet has to offer.',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://imgur.com/A61SaA1',
|
||||
'info_dict': {
|
||||
'id': 'A61SaA1',
|
||||
'ext': 'mp4',
|
||||
'title': 'MRW gifv is up and running without any bugs',
|
||||
'description': 'The Internet\'s visual storytelling community. Explore, share, and discuss the best visual stories the Internet has to offer.',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
width = int_or_none(self._search_regex(
|
||||
r'<param name="width" value="([0-9]+)"',
|
||||
webpage, 'width', fatal=False))
|
||||
height = int_or_none(self._search_regex(
|
||||
r'<param name="height" value="([0-9]+)"',
|
||||
webpage, 'height', fatal=False))
|
||||
|
||||
video_elements = self._search_regex(
|
||||
r'(?s)<div class="video-elements">(.*?)</div>',
|
||||
webpage, 'video elements', default=None)
|
||||
if not video_elements:
|
||||
raise ExtractorError(
|
||||
'No sources found for video %s. Maybe an image?' % video_id,
|
||||
expected=True)
|
||||
|
||||
formats = []
|
||||
for m in re.finditer(r'<source\s+src="(?P<src>[^"]+)"\s+type="(?P<type>[^"]+)"', video_elements):
|
||||
formats.append({
|
||||
'format_id': m.group('type').partition('/')[2],
|
||||
'url': self._proto_relative_url(m.group('src')),
|
||||
'ext': mimetype2ext(m.group('type')),
|
||||
'acodec': 'none',
|
||||
'width': width,
|
||||
'height': height,
|
||||
'http_headers': {
|
||||
'User-Agent': 'youtube-dl (like wget)',
|
||||
},
|
||||
})
|
||||
|
||||
gif_json = self._search_regex(
|
||||
r'(?s)var\s+videoItem\s*=\s*(\{.*?\})',
|
||||
webpage, 'GIF code', fatal=False)
|
||||
if gif_json:
|
||||
gifd = self._parse_json(
|
||||
gif_json, video_id, transform_source=js_to_json)
|
||||
formats.append({
|
||||
'format_id': 'gif',
|
||||
'preference': -10,
|
||||
'width': width,
|
||||
'height': height,
|
||||
'ext': 'gif',
|
||||
'acodec': 'none',
|
||||
'vcodec': 'gif',
|
||||
'container': 'gif',
|
||||
'url': self._proto_relative_url(gifd['gifUrl']),
|
||||
'filesize': gifd.get('size'),
|
||||
'http_headers': {
|
||||
'User-Agent': 'youtube-dl (like wget)',
|
||||
},
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'description': self._og_search_description(webpage),
|
||||
'title': self._og_search_title(webpage),
|
||||
}
|
38
youtube_dl/extractor/nationalgeographic.py
Normal file
38
youtube_dl/extractor/nationalgeographic.py
Normal file
@ -0,0 +1,38 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
smuggle_url,
|
||||
url_basename,
|
||||
)
|
||||
|
||||
|
||||
class NationalGeographicIE(InfoExtractor):
|
||||
_VALID_URL = r'http://video\.nationalgeographic\.com/video/.*?'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://video.nationalgeographic.com/video/news/150210-news-crab-mating-vin?source=featuredvideo',
|
||||
'info_dict': {
|
||||
'id': '4DmDACA6Qtk_',
|
||||
'ext': 'flv',
|
||||
'title': 'Mating Crabs Busted by Sharks',
|
||||
'description': 'md5:16f25aeffdeba55aaa8ec37e093ad8b3',
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
name = url_basename(url)
|
||||
|
||||
webpage = self._download_webpage(url, name)
|
||||
feed_url = self._search_regex(r'data-feed-url="([^"]+)"', webpage, 'feed url')
|
||||
guid = self._search_regex(r'data-video-guid="([^"]+)"', webpage, 'guid')
|
||||
|
||||
feed = self._download_xml('%s?byGuid=%s' % (feed_url, guid), name)
|
||||
content = feed.find('.//{http://search.yahoo.com/mrss/}content')
|
||||
theplatform_id = url_basename(content.attrib.get('url'))
|
||||
|
||||
return self.url_result(smuggle_url(
|
||||
'http://link.theplatform.com/s/ngs/%s?format=SMIL&formats=MPEG4&manifest=f4m' % theplatform_id,
|
||||
# For some reason, the normal links don't work and we must force the use of f4m
|
||||
{'force_smil_url': True}))
|
@ -18,13 +18,13 @@ class NBCIE(InfoExtractor):
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.nbc.com/chicago-fire/video/i-am-a-firefighter/2734188',
|
||||
'url': 'http://www.nbc.com/the-tonight-show/segments/112966',
|
||||
# md5 checksum is not stable
|
||||
'info_dict': {
|
||||
'id': 'bTmnLCvIbaaH',
|
||||
'id': 'c9xnCo0YPOPH',
|
||||
'ext': 'flv',
|
||||
'title': 'I Am a Firefighter',
|
||||
'description': 'An emergency puts Dawson\'sf irefighter skills to the ultimate test in this four-part digital series.',
|
||||
'title': 'Jimmy Fallon Surprises Fans at Ben & Jerry\'s',
|
||||
'description': 'Jimmy gives out free scoops of his new "Tonight Dough" ice cream flavor by surprising customers at the Ben & Jerry\'s scoop shop.',
|
||||
},
|
||||
},
|
||||
{
|
||||
|
@ -29,6 +29,9 @@ class NetzkinoIE(InfoExtractor):
|
||||
'timestamp': 1344858571,
|
||||
'age_limit': 12,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'Download only works from Germany',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@ -1,9 +1,6 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
js_to_json,
|
||||
@ -11,7 +8,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class PatreonIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?patreon\.com/creation\?hid=(.+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?patreon\.com/creation\?hid=(?P<id>[^&#]+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.patreon.com/creation?hid=743933',
|
||||
@ -35,6 +32,23 @@ class PatreonIE(InfoExtractor):
|
||||
'thumbnail': 're:^https?://.*$',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'https://www.patreon.com/creation?hid=1682498',
|
||||
'info_dict': {
|
||||
'id': 'SU4fj_aEMVw',
|
||||
'ext': 'mp4',
|
||||
'title': 'I\'m on Patreon!',
|
||||
'uploader': 'TraciJHines',
|
||||
'thumbnail': 're:^https?://.*$',
|
||||
'upload_date': '20150211',
|
||||
'description': 'md5:c5a706b1f687817a3de09db1eb93acd4',
|
||||
'uploader_id': 'TraciJHines',
|
||||
},
|
||||
'params': {
|
||||
'noplaylist': True,
|
||||
'skip_download': True,
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
# Currently Patreon exposes download URL via hidden CSS, so login is not
|
||||
@ -65,26 +79,29 @@ class PatreonIE(InfoExtractor):
|
||||
'''
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group(1)
|
||||
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
title = self._og_search_title(webpage).strip()
|
||||
|
||||
attach_fn = self._html_search_regex(
|
||||
r'<div class="attach"><a target="_blank" href="([^"]+)">',
|
||||
webpage, 'attachment URL', default=None)
|
||||
embed = self._html_search_regex(
|
||||
r'<div id="watchCreation">\s*<iframe class="embedly-embed" src="([^"]+)"',
|
||||
webpage, 'embedded URL', default=None)
|
||||
|
||||
if attach_fn is not None:
|
||||
video_url = 'http://www.patreon.com' + attach_fn
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
uploader = self._html_search_regex(
|
||||
r'<strong>(.*?)</strong> is creating', webpage, 'uploader')
|
||||
elif embed is not None:
|
||||
return self.url_result(embed)
|
||||
else:
|
||||
playlist_js = self._search_regex(
|
||||
playlist = self._parse_json(self._search_regex(
|
||||
r'(?s)new\s+jPlayerPlaylist\(\s*\{\s*[^}]*},\s*(\[.*?,?\s*\])',
|
||||
webpage, 'playlist JSON')
|
||||
playlist_json = js_to_json(playlist_js)
|
||||
playlist = json.loads(playlist_json)
|
||||
webpage, 'playlist JSON'),
|
||||
video_id, transform_source=js_to_json)
|
||||
data = playlist[0]
|
||||
video_url = self._proto_relative_url(data['mp3'])
|
||||
thumbnail = self._proto_relative_url(data.get('cover'))
|
||||
|
@ -56,7 +56,7 @@ class PornHubIE(InfoExtractor):
|
||||
|
||||
video_title = self._html_search_regex(r'<h1 [^>]+>([^<]+)', webpage, 'title')
|
||||
video_uploader = self._html_search_regex(
|
||||
r'(?s)From: .+?<(?:a href="/users/|a href="/channels/|<span class="username)[^>]+>(.+?)<',
|
||||
r'(?s)From: .+?<(?:a href="/users/|a href="/channels/|span class="username)[^>]+>(.+?)<',
|
||||
webpage, 'uploader', fatal=False)
|
||||
thumbnail = self._html_search_regex(r'"image_url":"([^"]+)', webpage, 'thumbnail', fatal=False)
|
||||
if thumbnail:
|
||||
@ -110,3 +110,33 @@ class PornHubIE(InfoExtractor):
|
||||
'formats': formats,
|
||||
'age_limit': 18,
|
||||
}
|
||||
|
||||
|
||||
class PornHubPlaylistIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?pornhub\.com/playlist/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.pornhub.com/playlist/6201671',
|
||||
'info_dict': {
|
||||
'id': '6201671',
|
||||
'title': 'P0p4',
|
||||
},
|
||||
'playlist_mincount': 35,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
entries = [
|
||||
self.url_result('http://www.pornhub.com/%s' % video_url, 'PornHub')
|
||||
for video_url in set(re.findall('href="/?(view_video\.php\?viewkey=\d+[^"]*)"', webpage))
|
||||
]
|
||||
|
||||
playlist = self._parse_json(
|
||||
self._search_regex(
|
||||
r'playlistObject\s*=\s*({.+?});', webpage, 'playlist'),
|
||||
playlist_id)
|
||||
|
||||
return self.playlist_result(
|
||||
entries, playlist_id, playlist.get('title'), playlist.get('description'))
|
||||
|
117
youtube_dl/extractor/sandia.py
Normal file
117
youtube_dl/extractor/sandia.py
Normal file
@ -0,0 +1,117 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import itertools
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
mimetype2ext,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class SandiaIE(InfoExtractor):
|
||||
IE_DESC = 'Sandia National Laboratories'
|
||||
_VALID_URL = r'https?://digitalops\.sandia\.gov/Mediasite/Play/(?P<id>[0-9a-f]+)'
|
||||
_TEST = {
|
||||
'url': 'http://digitalops.sandia.gov/Mediasite/Play/24aace4429fc450fb5b38cdbf424a66e1d',
|
||||
'md5': '9422edc9b9a60151727e4b6d8bef393d',
|
||||
'info_dict': {
|
||||
'id': '24aace4429fc450fb5b38cdbf424a66e1d',
|
||||
'ext': 'mp4',
|
||||
'title': 'Xyce Software Training - Section 1',
|
||||
'description': 're:(?s)SAND Number: SAND 2013-7800.{200,}',
|
||||
'upload_date': '20120904',
|
||||
'duration': 7794,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
req = compat_urllib_request.Request(url)
|
||||
req.add_header('Cookie', 'MediasitePlayerCaps=ClientPlugins=4')
|
||||
webpage = self._download_webpage(req, video_id)
|
||||
|
||||
js_path = self._search_regex(
|
||||
r'<script type="text/javascript" src="(/Mediasite/FileServer/Presentation/[^"]+)"',
|
||||
webpage, 'JS code URL')
|
||||
js_url = compat_urlparse.urljoin(url, js_path)
|
||||
|
||||
js_code = self._download_webpage(
|
||||
js_url, video_id, note='Downloading player')
|
||||
|
||||
def extract_str(key, **args):
|
||||
return self._search_regex(
|
||||
r'Mediasite\.PlaybackManifest\.%s\s*=\s*(.+);\s*?\n' % re.escape(key),
|
||||
js_code, key, **args)
|
||||
|
||||
def extract_data(key, **args):
|
||||
data_json = extract_str(key, **args)
|
||||
if data_json is None:
|
||||
return data_json
|
||||
return self._parse_json(
|
||||
data_json, video_id, transform_source=js_to_json)
|
||||
|
||||
formats = []
|
||||
for i in itertools.count():
|
||||
fd = extract_data('VideoUrls[%d]' % i, default=None)
|
||||
if fd is None:
|
||||
break
|
||||
formats.append({
|
||||
'format_id': '%s' % i,
|
||||
'format_note': fd['MimeType'].partition('/')[2],
|
||||
'ext': mimetype2ext(fd['MimeType']),
|
||||
'url': fd['Location'],
|
||||
'protocol': 'f4m' if fd['MimeType'] == 'video/x-mp4-fragmented' else None,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
slide_baseurl = compat_urlparse.urljoin(
|
||||
url, extract_data('SlideBaseUrl'))
|
||||
slide_template = slide_baseurl + re.sub(
|
||||
r'\{0:D?([0-9+])\}', r'%0\1d', extract_data('SlideImageFileNameTemplate'))
|
||||
slides = []
|
||||
last_slide_time = 0
|
||||
for i in itertools.count(1):
|
||||
sd = extract_str('Slides[%d]' % i, default=None)
|
||||
if sd is None:
|
||||
break
|
||||
timestamp = int_or_none(self._search_regex(
|
||||
r'^Mediasite\.PlaybackManifest\.CreateSlide\("[^"]*"\s*,\s*([0-9]+),',
|
||||
sd, 'slide %s timestamp' % i, fatal=False))
|
||||
slides.append({
|
||||
'url': slide_template % i,
|
||||
'duration': timestamp - last_slide_time,
|
||||
})
|
||||
last_slide_time = timestamp
|
||||
formats.append({
|
||||
'format_id': 'slides',
|
||||
'protocol': 'slideshow',
|
||||
'url': json.dumps(slides),
|
||||
'preference': -10000, # Downloader not yet written
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = extract_data('Title')
|
||||
description = extract_data('Description', fatal=False)
|
||||
duration = int_or_none(extract_data(
|
||||
'Duration', fatal=False), scale=1000)
|
||||
upload_date = unified_strdate(extract_data('AirDate', fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'formats': formats,
|
||||
'upload_date': upload_date,
|
||||
'duration': duration,
|
||||
}
|
@ -25,7 +25,6 @@ class SockshareIE(InfoExtractor):
|
||||
'id': '437BE28B89D799D7',
|
||||
'title': 'big_buck_bunny_720p_surround.avi',
|
||||
'ext': 'avi',
|
||||
'thumbnail': 're:^http://.*\.jpg$',
|
||||
}
|
||||
}
|
||||
|
||||
@ -45,7 +44,7 @@ class SockshareIE(InfoExtractor):
|
||||
''', webpage, 'hash')
|
||||
|
||||
fields = {
|
||||
"hash": confirm_hash,
|
||||
"hash": confirm_hash.encode('utf-8'),
|
||||
"confirm": "Continue as Free User"
|
||||
}
|
||||
|
||||
@ -68,7 +67,7 @@ class SockshareIE(InfoExtractor):
|
||||
webpage, 'title', default=None)
|
||||
thumbnail = self._html_search_regex(
|
||||
r'<img\s+src="([^"]*)".+?name="bg"',
|
||||
webpage, 'thumbnail')
|
||||
webpage, 'thumbnail', default=None)
|
||||
|
||||
formats = [{
|
||||
'format_id': 'sd',
|
||||
|
@ -4,11 +4,10 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError
|
||||
|
||||
|
||||
class TheOnionIE(InfoExtractor):
|
||||
_VALID_URL = r'(?x)https?://(?:www\.)?theonion\.com/video/[^,]+,(?P<article_id>[0-9]+)/?'
|
||||
_VALID_URL = r'https?://(?:www\.)?theonion\.com/video/[^,]+,(?P<id>[0-9]+)/?'
|
||||
_TEST = {
|
||||
'url': 'http://www.theonion.com/video/man-wearing-mm-jacket-gods-image,36918/',
|
||||
'md5': '19eaa9a39cf9b9804d982e654dc791ee',
|
||||
@ -22,10 +21,8 @@ class TheOnionIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
article_id = mobj.group('article_id')
|
||||
|
||||
webpage = self._download_webpage(url, article_id)
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
video_id = self._search_regex(
|
||||
r'"videoId":\s(\d+),', webpage, 'video ID')
|
||||
@ -34,10 +31,6 @@ class TheOnionIE(InfoExtractor):
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
|
||||
sources = re.findall(r'<source src="([^"]+)" type="([^"]+)"', webpage)
|
||||
if not sources:
|
||||
raise ExtractorError(
|
||||
'No sources found for video %s' % video_id, expected=True)
|
||||
|
||||
formats = []
|
||||
for src, type_ in sources:
|
||||
if type_ == 'video/mp4':
|
||||
@ -54,15 +47,15 @@ class TheOnionIE(InfoExtractor):
|
||||
})
|
||||
elif type_ == 'application/x-mpegURL':
|
||||
formats.extend(
|
||||
self._extract_m3u8_formats(src, video_id, preference=-1))
|
||||
self._extract_m3u8_formats(src, display_id, preference=-1))
|
||||
else:
|
||||
self.report_warning(
|
||||
'Encountered unexpected format: %s' % type_)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': thumbnail,
|
||||
|
@ -71,7 +71,9 @@ class ThePlatformIE(SubtitlesInfoExtractor):
|
||||
if not provider_id:
|
||||
provider_id = 'dJ5BDC'
|
||||
|
||||
if mobj.group('config'):
|
||||
if smuggled_data.get('force_smil_url', False):
|
||||
smil_url = url
|
||||
elif mobj.group('config'):
|
||||
config_url = url + '&form=json'
|
||||
config_url = config_url.replace('swf/', 'config/')
|
||||
config_url = config_url.replace('onsite/', 'onsite/config/')
|
||||
|
@ -175,7 +175,7 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
def _verify_video_password(self, url, video_id, webpage):
|
||||
password = self._downloader.params.get('videopassword', None)
|
||||
if password is None:
|
||||
raise ExtractorError('This video is protected by a password, use the --video-password option')
|
||||
raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True)
|
||||
token = self._search_regex(r'xsrft: \'(.*?)\'', webpage, 'login token')
|
||||
data = compat_urllib_parse.urlencode({
|
||||
'password': password,
|
||||
|
@ -45,19 +45,17 @@ class WebOfStoriesIE(InfoExtractor):
|
||||
description = self._html_search_meta('description', webpage)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
|
||||
story_filename = self._search_regex(
|
||||
r'\.storyFileName\("([^"]+)"\)', webpage, 'story filename')
|
||||
speaker_id = self._search_regex(
|
||||
r'\.speakerId\("([^"]+)"\)', webpage, 'speaker ID')
|
||||
story_id = self._search_regex(
|
||||
r'\.storyId\((\d+)\)', webpage, 'story ID')
|
||||
speaker_type = self._search_regex(
|
||||
r'\.speakerType\("([^"]+)"\)', webpage, 'speaker type')
|
||||
great_life = self._search_regex(
|
||||
r'isGreatLifeStory\s*=\s*(true|false)', webpage, 'great life story')
|
||||
embed_params = [s.strip(" \r\n\t'") for s in self._search_regex(
|
||||
r'(?s)\$\("#embedCode"\).html\(getEmbedCode\((.*?)\)',
|
||||
webpage, 'embed params').split(',')]
|
||||
|
||||
(
|
||||
_, speaker_id, story_id, story_duration,
|
||||
speaker_type, great_life, _thumbnail, _has_subtitles,
|
||||
story_filename, _story_order) = embed_params
|
||||
|
||||
is_great_life_series = great_life == 'true'
|
||||
duration = int_or_none(self._search_regex(
|
||||
r'\.duration\((\d+)\)', webpage, 'duration', fatal=False))
|
||||
duration = int_or_none(story_duration)
|
||||
|
||||
# URL building, see: http://www.webofstories.com/scripts/player.js
|
||||
ms_prefix = ''
|
||||
|
@ -18,8 +18,8 @@ class WSJIE(InfoExtractor):
|
||||
'id': '1BD01A4C-BFE8-40A5-A42F-8A8AF9898B1A',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20150202',
|
||||
'uploader_id': 'bbright',
|
||||
'creator': 'bbright',
|
||||
'uploader_id': 'jdesai',
|
||||
'creator': 'jdesai',
|
||||
'categories': list, # a long list
|
||||
'duration': 90,
|
||||
'title': 'Bills Coach Rex Ryan Updates His Old Jets Tattoo',
|
||||
|
@ -22,7 +22,7 @@ class XTubeIE(InfoExtractor):
|
||||
'id': 'kVTUy_G222_',
|
||||
'ext': 'mp4',
|
||||
'title': 'strange erotica',
|
||||
'description': 'http://www.xtube.com an ET kind of thing',
|
||||
'description': 'contains:an ET kind of thing',
|
||||
'uploader': 'greenshowers',
|
||||
'duration': 450,
|
||||
'age_limit': 18,
|
||||
|
@ -24,7 +24,6 @@ class YahooIE(InfoExtractor):
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html',
|
||||
'md5': '4962b075c08be8690a922ee026d05e69',
|
||||
'info_dict': {
|
||||
'id': '2d25e626-2378-391f-ada0-ddaf1417e588',
|
||||
'ext': 'mp4',
|
||||
|
@ -1560,8 +1560,8 @@ def js_to_json(code):
|
||||
return '"%s"' % v
|
||||
|
||||
res = re.sub(r'''(?x)
|
||||
"(?:[^"\\]*(?:\\\\|\\")?)*"|
|
||||
'(?:[^'\\]*(?:\\\\|\\')?)*'|
|
||||
"(?:[^"\\]*(?:\\\\|\\['"nu]))*[^"\\]*"|
|
||||
'(?:[^'\\]*(?:\\\\|\\['"nu]))*[^'\\]*'|
|
||||
[a-zA-Z_][.a-zA-Z_0-9]*
|
||||
''', fix_kv, code)
|
||||
res = re.sub(r',(\s*\])', lambda m: m.group(1), res)
|
||||
@ -1616,6 +1616,15 @@ def args_to_str(args):
|
||||
return ' '.join(shlex_quote(a) for a in args)
|
||||
|
||||
|
||||
def mimetype2ext(mt):
|
||||
_, _, res = mt.rpartition('/')
|
||||
|
||||
return {
|
||||
'x-ms-wmv': 'wmv',
|
||||
'x-mp4-fragmented': 'mp4',
|
||||
}.get(res, res)
|
||||
|
||||
|
||||
def urlhandle_detect_ext(url_handle):
|
||||
try:
|
||||
url_handle.headers
|
||||
@ -1631,7 +1640,7 @@ def urlhandle_detect_ext(url_handle):
|
||||
if e:
|
||||
return e
|
||||
|
||||
return getheader('Content-Type').split("/")[1]
|
||||
return mimetype2ext(getheader('Content-Type'))
|
||||
|
||||
|
||||
def age_restricted(content_limit, age_limit):
|
||||
|
@ -1,3 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = '2015.02.18.1'
|
||||
__version__ = '2015.02.19.3'
|
||||
|
Reference in New Issue
Block a user