Compare commits

...

34 Commits

Author SHA1 Message Date
a21420389e release 2015.02.19.3 2015-02-19 19:28:17 +01:00
6140baf4e1 [nationalgeographic] Add extractor (closes #4960) 2015-02-19 18:17:31 +01:00
8fc642eb5b [pornhub] Fix uploader regex 2015-02-19 22:15:49 +06:00
e66e1a0046 [pornhub] Add support for playlists (Closes #4995) 2015-02-19 22:15:19 +06:00
d5c69f1da4 [5min] Cover joystiq.com URLs (Closes #4962) 2015-02-19 21:47:11 +06:00
5c8a3f862a [nbc] Use a test video that works outside the US 2015-02-19 15:00:39 +01:00
a3b9157f49 [cbssports] Add extractor (closes #4996) 2015-02-19 13:06:53 +01:00
b88ba05356 [imgur] Simplify 2015-02-19 05:53:09 +01:00
b74d505577 Merge remote-tracking branch 'jbboehr/imgur-gifv-improvements' 2015-02-19 05:16:11 +01:00
9e2d7dca87 [imgur] improve error check for non-video URLs 2015-02-18 19:47:54 -08:00
d236b37ac9 [imgur] improve regex #4998 2015-02-18 19:28:19 -08:00
e880c66bd8 [theonion] Modernize 2015-02-19 04:12:40 +01:00
383456aa29 [Makefile] Also delete *.avi files in clean 2015-02-19 04:09:52 +01:00
1a13940c8d [imgur] support regular URL 2015-02-18 18:12:48 -08:00
3d54788495 [webofstories] Fix extraction 2015-02-19 02:12:08 +01:00
71d53ace2f [sockshare] Do not require thumbnail anymore
Thumbnail is not present on the website anymore.
2015-02-19 02:04:30 +01:00
f37e3f99f0 [generic] Correct test case
Video has been reuploaded / edited
2015-02-19 02:00:52 +01:00
bd03ffc16e [netzkino] Skip download in test case
Works fine from Germany, but fails from everywhere else
2015-02-19 01:58:54 +01:00
1ac1af9b47 release 2015.02.19.2 2015-02-19 01:43:28 +01:00
3bf5705316 [imgur] Add new extractor 2015-02-19 01:43:20 +01:00
1c2528c8a3 [cbs] Modernize 2015-02-19 01:22:50 +01:00
7bd15b1a03 release 2015.02.19.1 2015-02-19 01:04:24 +01:00
6b961a85fd [patreon] Add support for embedlies (fixes #4969) 2015-02-19 01:04:19 +01:00
7707004043 [patreon] Modernize 2015-02-19 00:38:05 +01:00
a025d3c5a5 release 2015.02.19 2015-02-19 00:31:23 +01:00
c460bdd56b [sandia] Add new extractor (#4974) 2015-02-19 00:31:01 +01:00
b81a359eb6 [YoutubeDL] Use render_table for format listing 2015-02-19 00:28:58 +01:00
d61aefb24c Merge remote-tracking branch 'origin/master' 2015-02-19 00:01:14 +01:00
d305dd73a3 [utils] Fix js_to_json
Previously, the runtime could be atrocious for longer inputs.
2015-02-18 23:59:51 +01:00
93a16ba238 [vimeo] Raise the ExtractorError with expected=True when no video password is given 2015-02-18 22:00:12 +01:00
85d5866177 [yahoo] Remove md5sum from test case
The md5 sum has changed repeatedly, and we check whether it looks like a video anyways nowadays.
2015-02-18 20:03:04 +01:00
9789d7535d [xtube] Fix test case 2015-02-18 19:58:41 +01:00
d8443cd3f7 [wsj] Correct test case 2015-02-18 19:56:24 +01:00
d47c26e168 [brightcove] Correct keys in playlists 2015-02-18 19:56:10 +01:00
29 changed files with 443 additions and 79 deletions

View File

@ -1,7 +1,7 @@
all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites
clean:
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json *.mp4 *.flv *.mp3 CONTRIBUTING.md.tmp youtube-dl youtube-dl.exe
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi CONTRIBUTING.md.tmp youtube-dl youtube-dl.exe
PREFIX ?= /usr/local
BINDIR ?= $(PREFIX)/bin

View File

@ -68,6 +68,7 @@
- **Canalplus**: canalplus.fr, piwiplus.fr and d8.tv
- **CBS**
- **CBSNews**: CBS News
- **CBSSports**
- **CeskaTelevize**
- **channel9**: Channel 9
- **Chilloutzone**
@ -121,6 +122,7 @@
- **EllenTV**
- **EllenTV:clips**
- **ElPais**: El País
- **Embedly**
- **EMPFlix**
- **Engadget**
- **Eporner**
@ -190,6 +192,7 @@
- **ign.com**
- **imdb**: Internet Movie Database trailers
- **imdb:list**: Internet Movie Database lists
- **Imgur**
- **Ina**
- **InfoQ**
- **Instagram**
@ -262,6 +265,7 @@
- **myvideo**
- **MyVidster**
- **n-tv.de**
- **NationalGeographic**
- **Naver**
- **NBA**
- **NBC**
@ -319,6 +323,7 @@
- **podomatic**
- **PornHd**
- **PornHub**
- **PornHubPlaylist**
- **Pornotube**
- **PornoXO**
- **PromptFile**
@ -352,6 +357,7 @@
- **rutube:movie**: Rutube movies
- **rutube:person**: Rutube person videos
- **RUTV**: RUTV.RU
- **Sandia**: Sandia National Laboratories
- **Sapo**: SAPO Vídeos
- **savefrom.net**
- **SBS**: sbs.com.au

View File

@ -113,6 +113,16 @@ def expect_info_dict(self, got_dict, expected_dict):
self.assertTrue(
got.startswith(start_str),
'field %s (value: %r) should start with %r' % (info_field, got, start_str))
elif isinstance(expected, compat_str) and expected.startswith('contains:'):
got = got_dict.get(info_field)
contains_str = expected[len('contains:'):]
self.assertTrue(
isinstance(got, compat_str),
'Expected a %s object, but got %s for field %s' % (
compat_str.__name__, type(got).__name__, info_field))
self.assertTrue(
contains_str in got,
'field %s (value: %r) should contain %r' % (info_field, got, contains_str))
elif isinstance(expected, type):
got = got_dict.get(info_field)
self.assertTrue(isinstance(got, expected),

View File

@ -370,6 +370,10 @@ class TestUtil(unittest.TestCase):
"playlist":[{"controls":{"all":null}}]
}''')
inp = '"SAND Number: SAND 2013-7800P\\nPresenter: Tom Russo\\nHabanero Software Training - Xyce Software\\nXyce, Sandia\\u0027s"'
json_code = js_to_json(inp)
self.assertEqual(json.loads(json_code), json.loads(inp))
def test_js_to_json_edgecases(self):
on = js_to_json("{abc_def:'1\\'\\\\2\\\\\\'3\"4'}")
self.assertEqual(json.loads(on), {"abc_def": "1'\\2\\'3\"4"})

View File

@ -1534,29 +1534,18 @@ class YoutubeDL(object):
return res
def list_formats(self, info_dict):
def line(format, idlen=20):
return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
format['format_id'],
format['ext'],
self.format_resolution(format),
self._format_note(format),
))
formats = info_dict.get('formats', [info_dict])
idlen = max(len('format code'),
max(len(f['format_id']) for f in formats))
formats_s = [
line(f, idlen) for f in formats
table = [
[f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
for f in formats
if f.get('preference') is None or f['preference'] >= -1000]
if len(formats) > 1:
formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
header_line = line({
'format_id': 'format code', 'ext': 'extension',
'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
header_line = ['format code', 'extension', 'resolution', 'note']
self.to_screen(
'[info] Available formats for %s:\n%s\n%s' %
(info_dict['id'], header_line, '\n'.join(formats_s)))
'[info] Available formats for %s:\n%s' %
(info_dict['id'], render_table(header_line, table)))
def list_thumbnails(self, info_dict):
thumbnails = info_dict.get('thumbnails')

View File

@ -58,6 +58,7 @@ from .canalplus import CanalplusIE
from .canalc2 import Canalc2IE
from .cbs import CBSIE
from .cbsnews import CBSNewsIE
from .cbssports import CBSSportsIE
from .ccc import CCCIE
from .ceskatelevize import CeskaTelevizeIE
from .channel9 import Channel9IE
@ -121,6 +122,7 @@ from .ellentv import (
EllenTVClipsIE,
)
from .elpais import ElPaisIE
from .embedly import EmbedlyIE
from .empflix import EMPFlixIE
from .engadget import EngadgetIE
from .eporner import EpornerIE
@ -204,6 +206,7 @@ from .imdb import (
ImdbIE,
ImdbListIE
)
from .imgur import ImgurIE
from .ina import InaIE
from .infoq import InfoQIE
from .instagram import InstagramIE, InstagramUserIE
@ -282,6 +285,7 @@ from .myspace import MySpaceIE, MySpaceAlbumIE
from .myspass import MySpassIE
from .myvideo import MyVideoIE
from .myvidster import MyVidsterIE
from .nationalgeographic import NationalGeographicIE
from .naver import NaverIE
from .nba import NBAIE
from .nbc import (
@ -350,7 +354,10 @@ from .playfm import PlayFMIE
from .playvid import PlayvidIE
from .podomatic import PodomaticIE
from .pornhd import PornHdIE
from .pornhub import PornHubIE
from .pornhub import (
PornHubIE,
PornHubPlaylistIE,
)
from .pornotube import PornotubeIE
from .pornoxo import PornoXOIE
from .promptfile import PromptFileIE
@ -386,6 +393,7 @@ from .rutube import (
RutubePersonIE,
)
from .rutv import RUTVIE
from .sandia import SandiaIE
from .sapo import SapoIE
from .savefrom import SaveFromIE
from .sbs import SBSIE

View File

@ -95,6 +95,7 @@ class BrightcoveIE(InfoExtractor):
'url': 'http://c.brightcove.com/services/viewer/htmlFederated?playerID=3550052898001&playerKey=AQ%7E%7E%2CAAABmA9XpXk%7E%2C-Kp7jNgisre1fG5OdqpAFUTcs0lP_ZoL',
'info_dict': {
'title': 'Sealife',
'id': '3550319591001',
},
'playlist_mincount': 7,
},
@ -247,7 +248,7 @@ class BrightcoveIE(InfoExtractor):
playlist_info = json_data['videoList']
videos = [self._extract_video_info(video_info) for video_info in playlist_info['mediaCollectionDTO']['videoDTOs']]
return self.playlist_result(videos, playlist_id=playlist_info['id'],
return self.playlist_result(videos, playlist_id='%s' % playlist_info['id'],
playlist_title=playlist_info['mediaCollectionDTO']['displayName'])
def _extract_video_info(self, video_info):

View File

@ -1,7 +1,5 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
@ -39,8 +37,7 @@ class CBSIE(InfoExtractor):
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
real_id = self._search_regex(
r"video\.settings\.pid\s*=\s*'([^']+)';",

View File

@ -0,0 +1,30 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
class CBSSportsIE(InfoExtractor):
_VALID_URL = r'http://www\.cbssports\.com/video/player/(?P<section>[^/]+)/(?P<id>[^/]+)'
_TEST = {
'url': 'http://www.cbssports.com/video/player/tennis/318462531970/0/us-open-flashbacks-1990s',
'info_dict': {
'id': '_d5_GbO8p1sT',
'ext': 'flv',
'title': 'US Open flashbacks: 1990s',
'description': 'Bill Macatee relives the best moments in US Open history from the 1990s.',
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
section = mobj.group('section')
video_id = mobj.group('id')
all_videos = self._download_json(
'http://www.cbssports.com/data/video/player/getVideos/%s?as=json' % section,
video_id)
# The json file contains the info of all the videos in the section
video_info = next(v for v in all_videos if v['pcid'] == video_id)
return self.url_result('theplatform:%s' % video_info['pid'], 'ThePlatform')

View File

@ -0,0 +1,16 @@
# encoding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import compat_urllib_parse_unquote
class EmbedlyIE(InfoExtractor):
_VALID_URL = r'https?://(?:www|cdn\.)?embedly\.com/widgets/media\.html\?(?:[^#]*?&)?url=(?P<id>[^#&]+)'
_TESTS = [{
'url': 'https://cdn.embedly.com/widgets/media.html?src=http%3A%2F%2Fwww.youtube.com%2Fembed%2Fvideoseries%3Flist%3DUUGLim4T2loE5rwCMdpCIPVg&url=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DSU4fj_aEMVw%26list%3DUUGLim4T2loE5rwCMdpCIPVg&image=http%3A%2F%2Fi.ytimg.com%2Fvi%2FSU4fj_aEMVw%2Fhqdefault.jpg&key=8ee8a2e6a8cc47aab1a5ee67f9a178e0&type=text%2Fhtml&schema=youtube&autoplay=1',
'only_matching': True,
}]
def _real_extract(self, url):
return self.url_result(compat_urllib_parse_unquote(self._match_id(url)))

View File

@ -14,6 +14,7 @@ class FiveMinIE(InfoExtractor):
IE_NAME = '5min'
_VALID_URL = r'''(?x)
(?:https?://[^/]*?5min\.com/Scripts/PlayerSeed\.js\?(?:.*?&)?playList=|
https?://(?:(?:massively|www)\.)?joystiq\.com/video/|
5min:)
(?P<id>\d+)
'''

View File

@ -532,7 +532,7 @@ class GenericIE(InfoExtractor):
'info_dict': {
'id': 'Mrj4DVp2zeA',
'ext': 'mp4',
'upload_date': '20150204',
'upload_date': '20150212',
'uploader': 'The National Archives UK',
'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
'uploader_id': 'NationalArchives08',

View File

@ -0,0 +1,97 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
int_or_none,
js_to_json,
mimetype2ext,
ExtractorError,
)
class ImgurIE(InfoExtractor):
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?P<id>[a-zA-Z0-9]+)(?:\.mp4|\.gifv)?'
_TESTS = [{
'url': 'https://i.imgur.com/A61SaA1.gifv',
'info_dict': {
'id': 'A61SaA1',
'ext': 'mp4',
'title': 'MRW gifv is up and running without any bugs',
'description': 'The Internet\'s visual storytelling community. Explore, share, and discuss the best visual stories the Internet has to offer.',
},
}, {
'url': 'https://imgur.com/A61SaA1',
'info_dict': {
'id': 'A61SaA1',
'ext': 'mp4',
'title': 'MRW gifv is up and running without any bugs',
'description': 'The Internet\'s visual storytelling community. Explore, share, and discuss the best visual stories the Internet has to offer.',
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
width = int_or_none(self._search_regex(
r'<param name="width" value="([0-9]+)"',
webpage, 'width', fatal=False))
height = int_or_none(self._search_regex(
r'<param name="height" value="([0-9]+)"',
webpage, 'height', fatal=False))
video_elements = self._search_regex(
r'(?s)<div class="video-elements">(.*?)</div>',
webpage, 'video elements', default=None)
if not video_elements:
raise ExtractorError(
'No sources found for video %s. Maybe an image?' % video_id,
expected=True)
formats = []
for m in re.finditer(r'<source\s+src="(?P<src>[^"]+)"\s+type="(?P<type>[^"]+)"', video_elements):
formats.append({
'format_id': m.group('type').partition('/')[2],
'url': self._proto_relative_url(m.group('src')),
'ext': mimetype2ext(m.group('type')),
'acodec': 'none',
'width': width,
'height': height,
'http_headers': {
'User-Agent': 'youtube-dl (like wget)',
},
})
gif_json = self._search_regex(
r'(?s)var\s+videoItem\s*=\s*(\{.*?\})',
webpage, 'GIF code', fatal=False)
if gif_json:
gifd = self._parse_json(
gif_json, video_id, transform_source=js_to_json)
formats.append({
'format_id': 'gif',
'preference': -10,
'width': width,
'height': height,
'ext': 'gif',
'acodec': 'none',
'vcodec': 'gif',
'container': 'gif',
'url': self._proto_relative_url(gifd['gifUrl']),
'filesize': gifd.get('size'),
'http_headers': {
'User-Agent': 'youtube-dl (like wget)',
},
})
self._sort_formats(formats)
return {
'id': video_id,
'formats': formats,
'description': self._og_search_description(webpage),
'title': self._og_search_title(webpage),
}

View File

@ -0,0 +1,38 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
smuggle_url,
url_basename,
)
class NationalGeographicIE(InfoExtractor):
_VALID_URL = r'http://video\.nationalgeographic\.com/video/.*?'
_TEST = {
'url': 'http://video.nationalgeographic.com/video/news/150210-news-crab-mating-vin?source=featuredvideo',
'info_dict': {
'id': '4DmDACA6Qtk_',
'ext': 'flv',
'title': 'Mating Crabs Busted by Sharks',
'description': 'md5:16f25aeffdeba55aaa8ec37e093ad8b3',
},
'add_ie': ['ThePlatform'],
}
def _real_extract(self, url):
name = url_basename(url)
webpage = self._download_webpage(url, name)
feed_url = self._search_regex(r'data-feed-url="([^"]+)"', webpage, 'feed url')
guid = self._search_regex(r'data-video-guid="([^"]+)"', webpage, 'guid')
feed = self._download_xml('%s?byGuid=%s' % (feed_url, guid), name)
content = feed.find('.//{http://search.yahoo.com/mrss/}content')
theplatform_id = url_basename(content.attrib.get('url'))
return self.url_result(smuggle_url(
'http://link.theplatform.com/s/ngs/%s?format=SMIL&formats=MPEG4&manifest=f4m' % theplatform_id,
# For some reason, the normal links don't work and we must force the use of f4m
{'force_smil_url': True}))

View File

@ -18,13 +18,13 @@ class NBCIE(InfoExtractor):
_TESTS = [
{
'url': 'http://www.nbc.com/chicago-fire/video/i-am-a-firefighter/2734188',
'url': 'http://www.nbc.com/the-tonight-show/segments/112966',
# md5 checksum is not stable
'info_dict': {
'id': 'bTmnLCvIbaaH',
'id': 'c9xnCo0YPOPH',
'ext': 'flv',
'title': 'I Am a Firefighter',
'description': 'An emergency puts Dawson\'sf irefighter skills to the ultimate test in this four-part digital series.',
'title': 'Jimmy Fallon Surprises Fans at Ben & Jerry\'s',
'description': 'Jimmy gives out free scoops of his new "Tonight Dough" ice cream flavor by surprising customers at the Ben & Jerry\'s scoop shop.',
},
},
{

View File

@ -29,6 +29,9 @@ class NetzkinoIE(InfoExtractor):
'timestamp': 1344858571,
'age_limit': 12,
},
'params': {
'skip_download': 'Download only works from Germany',
}
}
def _real_extract(self, url):

View File

@ -1,9 +1,6 @@
# encoding: utf-8
from __future__ import unicode_literals
import json
import re
from .common import InfoExtractor
from ..utils import (
js_to_json,
@ -11,7 +8,7 @@ from ..utils import (
class PatreonIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?patreon\.com/creation\?hid=(.+)'
_VALID_URL = r'https?://(?:www\.)?patreon\.com/creation\?hid=(?P<id>[^&#]+)'
_TESTS = [
{
'url': 'http://www.patreon.com/creation?hid=743933',
@ -35,6 +32,23 @@ class PatreonIE(InfoExtractor):
'thumbnail': 're:^https?://.*$',
},
},
{
'url': 'https://www.patreon.com/creation?hid=1682498',
'info_dict': {
'id': 'SU4fj_aEMVw',
'ext': 'mp4',
'title': 'I\'m on Patreon!',
'uploader': 'TraciJHines',
'thumbnail': 're:^https?://.*$',
'upload_date': '20150211',
'description': 'md5:c5a706b1f687817a3de09db1eb93acd4',
'uploader_id': 'TraciJHines',
},
'params': {
'noplaylist': True,
'skip_download': True,
}
}
]
# Currently Patreon exposes download URL via hidden CSS, so login is not
@ -65,26 +79,29 @@ class PatreonIE(InfoExtractor):
'''
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group(1)
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
title = self._og_search_title(webpage).strip()
attach_fn = self._html_search_regex(
r'<div class="attach"><a target="_blank" href="([^"]+)">',
webpage, 'attachment URL', default=None)
embed = self._html_search_regex(
r'<div id="watchCreation">\s*<iframe class="embedly-embed" src="([^"]+)"',
webpage, 'embedded URL', default=None)
if attach_fn is not None:
video_url = 'http://www.patreon.com' + attach_fn
thumbnail = self._og_search_thumbnail(webpage)
uploader = self._html_search_regex(
r'<strong>(.*?)</strong> is creating', webpage, 'uploader')
elif embed is not None:
return self.url_result(embed)
else:
playlist_js = self._search_regex(
playlist = self._parse_json(self._search_regex(
r'(?s)new\s+jPlayerPlaylist\(\s*\{\s*[^}]*},\s*(\[.*?,?\s*\])',
webpage, 'playlist JSON')
playlist_json = js_to_json(playlist_js)
playlist = json.loads(playlist_json)
webpage, 'playlist JSON'),
video_id, transform_source=js_to_json)
data = playlist[0]
video_url = self._proto_relative_url(data['mp3'])
thumbnail = self._proto_relative_url(data.get('cover'))

View File

@ -56,7 +56,7 @@ class PornHubIE(InfoExtractor):
video_title = self._html_search_regex(r'<h1 [^>]+>([^<]+)', webpage, 'title')
video_uploader = self._html_search_regex(
r'(?s)From:&nbsp;.+?<(?:a href="/users/|a href="/channels/|<span class="username)[^>]+>(.+?)<',
r'(?s)From:&nbsp;.+?<(?:a href="/users/|a href="/channels/|span class="username)[^>]+>(.+?)<',
webpage, 'uploader', fatal=False)
thumbnail = self._html_search_regex(r'"image_url":"([^"]+)', webpage, 'thumbnail', fatal=False)
if thumbnail:
@ -110,3 +110,33 @@ class PornHubIE(InfoExtractor):
'formats': formats,
'age_limit': 18,
}
class PornHubPlaylistIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?pornhub\.com/playlist/(?P<id>\d+)'
_TESTS = [{
'url': 'http://www.pornhub.com/playlist/6201671',
'info_dict': {
'id': '6201671',
'title': 'P0p4',
},
'playlist_mincount': 35,
}]
def _real_extract(self, url):
playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
entries = [
self.url_result('http://www.pornhub.com/%s' % video_url, 'PornHub')
for video_url in set(re.findall('href="/?(view_video\.php\?viewkey=\d+[^"]*)"', webpage))
]
playlist = self._parse_json(
self._search_regex(
r'playlistObject\s*=\s*({.+?});', webpage, 'playlist'),
playlist_id)
return self.playlist_result(
entries, playlist_id, playlist.get('title'), playlist.get('description'))

View File

@ -0,0 +1,117 @@
# coding: utf-8
from __future__ import unicode_literals
import itertools
import json
import re
from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
compat_urlparse,
)
from ..utils import (
int_or_none,
js_to_json,
mimetype2ext,
unified_strdate,
)
class SandiaIE(InfoExtractor):
IE_DESC = 'Sandia National Laboratories'
_VALID_URL = r'https?://digitalops\.sandia\.gov/Mediasite/Play/(?P<id>[0-9a-f]+)'
_TEST = {
'url': 'http://digitalops.sandia.gov/Mediasite/Play/24aace4429fc450fb5b38cdbf424a66e1d',
'md5': '9422edc9b9a60151727e4b6d8bef393d',
'info_dict': {
'id': '24aace4429fc450fb5b38cdbf424a66e1d',
'ext': 'mp4',
'title': 'Xyce Software Training - Section 1',
'description': 're:(?s)SAND Number: SAND 2013-7800.{200,}',
'upload_date': '20120904',
'duration': 7794,
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
req = compat_urllib_request.Request(url)
req.add_header('Cookie', 'MediasitePlayerCaps=ClientPlugins=4')
webpage = self._download_webpage(req, video_id)
js_path = self._search_regex(
r'<script type="text/javascript" src="(/Mediasite/FileServer/Presentation/[^"]+)"',
webpage, 'JS code URL')
js_url = compat_urlparse.urljoin(url, js_path)
js_code = self._download_webpage(
js_url, video_id, note='Downloading player')
def extract_str(key, **args):
return self._search_regex(
r'Mediasite\.PlaybackManifest\.%s\s*=\s*(.+);\s*?\n' % re.escape(key),
js_code, key, **args)
def extract_data(key, **args):
data_json = extract_str(key, **args)
if data_json is None:
return data_json
return self._parse_json(
data_json, video_id, transform_source=js_to_json)
formats = []
for i in itertools.count():
fd = extract_data('VideoUrls[%d]' % i, default=None)
if fd is None:
break
formats.append({
'format_id': '%s' % i,
'format_note': fd['MimeType'].partition('/')[2],
'ext': mimetype2ext(fd['MimeType']),
'url': fd['Location'],
'protocol': 'f4m' if fd['MimeType'] == 'video/x-mp4-fragmented' else None,
})
self._sort_formats(formats)
slide_baseurl = compat_urlparse.urljoin(
url, extract_data('SlideBaseUrl'))
slide_template = slide_baseurl + re.sub(
r'\{0:D?([0-9+])\}', r'%0\1d', extract_data('SlideImageFileNameTemplate'))
slides = []
last_slide_time = 0
for i in itertools.count(1):
sd = extract_str('Slides[%d]' % i, default=None)
if sd is None:
break
timestamp = int_or_none(self._search_regex(
r'^Mediasite\.PlaybackManifest\.CreateSlide\("[^"]*"\s*,\s*([0-9]+),',
sd, 'slide %s timestamp' % i, fatal=False))
slides.append({
'url': slide_template % i,
'duration': timestamp - last_slide_time,
})
last_slide_time = timestamp
formats.append({
'format_id': 'slides',
'protocol': 'slideshow',
'url': json.dumps(slides),
'preference': -10000, # Downloader not yet written
})
self._sort_formats(formats)
title = extract_data('Title')
description = extract_data('Description', fatal=False)
duration = int_or_none(extract_data(
'Duration', fatal=False), scale=1000)
upload_date = unified_strdate(extract_data('AirDate', fatal=False))
return {
'id': video_id,
'title': title,
'description': description,
'formats': formats,
'upload_date': upload_date,
'duration': duration,
}

View File

@ -25,7 +25,6 @@ class SockshareIE(InfoExtractor):
'id': '437BE28B89D799D7',
'title': 'big_buck_bunny_720p_surround.avi',
'ext': 'avi',
'thumbnail': 're:^http://.*\.jpg$',
}
}
@ -45,7 +44,7 @@ class SockshareIE(InfoExtractor):
''', webpage, 'hash')
fields = {
"hash": confirm_hash,
"hash": confirm_hash.encode('utf-8'),
"confirm": "Continue as Free User"
}
@ -68,7 +67,7 @@ class SockshareIE(InfoExtractor):
webpage, 'title', default=None)
thumbnail = self._html_search_regex(
r'<img\s+src="([^"]*)".+?name="bg"',
webpage, 'thumbnail')
webpage, 'thumbnail', default=None)
formats = [{
'format_id': 'sd',

View File

@ -4,11 +4,10 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import ExtractorError
class TheOnionIE(InfoExtractor):
_VALID_URL = r'(?x)https?://(?:www\.)?theonion\.com/video/[^,]+,(?P<article_id>[0-9]+)/?'
_VALID_URL = r'https?://(?:www\.)?theonion\.com/video/[^,]+,(?P<id>[0-9]+)/?'
_TEST = {
'url': 'http://www.theonion.com/video/man-wearing-mm-jacket-gods-image,36918/',
'md5': '19eaa9a39cf9b9804d982e654dc791ee',
@ -22,10 +21,8 @@ class TheOnionIE(InfoExtractor):
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
article_id = mobj.group('article_id')
webpage = self._download_webpage(url, article_id)
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
video_id = self._search_regex(
r'"videoId":\s(\d+),', webpage, 'video ID')
@ -34,10 +31,6 @@ class TheOnionIE(InfoExtractor):
thumbnail = self._og_search_thumbnail(webpage)
sources = re.findall(r'<source src="([^"]+)" type="([^"]+)"', webpage)
if not sources:
raise ExtractorError(
'No sources found for video %s' % video_id, expected=True)
formats = []
for src, type_ in sources:
if type_ == 'video/mp4':
@ -54,15 +47,15 @@ class TheOnionIE(InfoExtractor):
})
elif type_ == 'application/x-mpegURL':
formats.extend(
self._extract_m3u8_formats(src, video_id, preference=-1))
self._extract_m3u8_formats(src, display_id, preference=-1))
else:
self.report_warning(
'Encountered unexpected format: %s' % type_)
self._sort_formats(formats)
return {
'id': video_id,
'display_id': display_id,
'title': title,
'formats': formats,
'thumbnail': thumbnail,

View File

@ -71,7 +71,9 @@ class ThePlatformIE(SubtitlesInfoExtractor):
if not provider_id:
provider_id = 'dJ5BDC'
if mobj.group('config'):
if smuggled_data.get('force_smil_url', False):
smil_url = url
elif mobj.group('config'):
config_url = url + '&form=json'
config_url = config_url.replace('swf/', 'config/')
config_url = config_url.replace('onsite/', 'onsite/config/')

View File

@ -175,7 +175,7 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
def _verify_video_password(self, url, video_id, webpage):
password = self._downloader.params.get('videopassword', None)
if password is None:
raise ExtractorError('This video is protected by a password, use the --video-password option')
raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True)
token = self._search_regex(r'xsrft: \'(.*?)\'', webpage, 'login token')
data = compat_urllib_parse.urlencode({
'password': password,

View File

@ -45,19 +45,17 @@ class WebOfStoriesIE(InfoExtractor):
description = self._html_search_meta('description', webpage)
thumbnail = self._og_search_thumbnail(webpage)
story_filename = self._search_regex(
r'\.storyFileName\("([^"]+)"\)', webpage, 'story filename')
speaker_id = self._search_regex(
r'\.speakerId\("([^"]+)"\)', webpage, 'speaker ID')
story_id = self._search_regex(
r'\.storyId\((\d+)\)', webpage, 'story ID')
speaker_type = self._search_regex(
r'\.speakerType\("([^"]+)"\)', webpage, 'speaker type')
great_life = self._search_regex(
r'isGreatLifeStory\s*=\s*(true|false)', webpage, 'great life story')
embed_params = [s.strip(" \r\n\t'") for s in self._search_regex(
r'(?s)\$\("#embedCode"\).html\(getEmbedCode\((.*?)\)',
webpage, 'embed params').split(',')]
(
_, speaker_id, story_id, story_duration,
speaker_type, great_life, _thumbnail, _has_subtitles,
story_filename, _story_order) = embed_params
is_great_life_series = great_life == 'true'
duration = int_or_none(self._search_regex(
r'\.duration\((\d+)\)', webpage, 'duration', fatal=False))
duration = int_or_none(story_duration)
# URL building, see: http://www.webofstories.com/scripts/player.js
ms_prefix = ''

View File

@ -18,8 +18,8 @@ class WSJIE(InfoExtractor):
'id': '1BD01A4C-BFE8-40A5-A42F-8A8AF9898B1A',
'ext': 'mp4',
'upload_date': '20150202',
'uploader_id': 'bbright',
'creator': 'bbright',
'uploader_id': 'jdesai',
'creator': 'jdesai',
'categories': list, # a long list
'duration': 90,
'title': 'Bills Coach Rex Ryan Updates His Old Jets Tattoo',

View File

@ -22,7 +22,7 @@ class XTubeIE(InfoExtractor):
'id': 'kVTUy_G222_',
'ext': 'mp4',
'title': 'strange erotica',
'description': 'http://www.xtube.com an ET kind of thing',
'description': 'contains:an ET kind of thing',
'uploader': 'greenshowers',
'duration': 450,
'age_limit': 18,

View File

@ -24,7 +24,6 @@ class YahooIE(InfoExtractor):
_TESTS = [
{
'url': 'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html',
'md5': '4962b075c08be8690a922ee026d05e69',
'info_dict': {
'id': '2d25e626-2378-391f-ada0-ddaf1417e588',
'ext': 'mp4',

View File

@ -1560,8 +1560,8 @@ def js_to_json(code):
return '"%s"' % v
res = re.sub(r'''(?x)
"(?:[^"\\]*(?:\\\\|\\")?)*"|
'(?:[^'\\]*(?:\\\\|\\')?)*'|
"(?:[^"\\]*(?:\\\\|\\['"nu]))*[^"\\]*"|
'(?:[^'\\]*(?:\\\\|\\['"nu]))*[^'\\]*'|
[a-zA-Z_][.a-zA-Z_0-9]*
''', fix_kv, code)
res = re.sub(r',(\s*\])', lambda m: m.group(1), res)
@ -1616,6 +1616,15 @@ def args_to_str(args):
return ' '.join(shlex_quote(a) for a in args)
def mimetype2ext(mt):
_, _, res = mt.rpartition('/')
return {
'x-ms-wmv': 'wmv',
'x-mp4-fragmented': 'mp4',
}.get(res, res)
def urlhandle_detect_ext(url_handle):
try:
url_handle.headers
@ -1631,7 +1640,7 @@ def urlhandle_detect_ext(url_handle):
if e:
return e
return getheader('Content-Type').split("/")[1]
return mimetype2ext(getheader('Content-Type'))
def age_restricted(content_limit, age_limit):

View File

@ -1,3 +1,3 @@
from __future__ import unicode_literals
__version__ = '2015.02.18.1'
__version__ = '2015.02.19.3'