Compare commits

...

10 Commits

Author SHA1 Message Date
a025d3c5a5 release 2015.02.19 2015-02-19 00:31:23 +01:00
c460bdd56b [sandia] Add new extractor (#4974) 2015-02-19 00:31:01 +01:00
b81a359eb6 [YoutubeDL] Use render_table for format listing 2015-02-19 00:28:58 +01:00
d61aefb24c Merge remote-tracking branch 'origin/master' 2015-02-19 00:01:14 +01:00
d305dd73a3 [utils] Fix js_to_json
Previously, the runtime could be atrocious for longer inputs.
2015-02-18 23:59:51 +01:00
93a16ba238 [vimeo] Raise the ExtractorError with expected=True when no video password is given 2015-02-18 22:00:12 +01:00
85d5866177 [yahoo] Remove md5sum from test case
The md5 sum has changed repeatedly, and we check whether it looks like a video anyways nowadays.
2015-02-18 20:03:04 +01:00
9789d7535d [xtube] Fix test case 2015-02-18 19:58:41 +01:00
d8443cd3f7 [wsj] Correct test case 2015-02-18 19:56:24 +01:00
d47c26e168 [brightcove] Correct keys in playlists 2015-02-18 19:56:10 +01:00
13 changed files with 159 additions and 28 deletions

View File

@ -352,6 +352,7 @@
- **rutube:movie**: Rutube movies - **rutube:movie**: Rutube movies
- **rutube:person**: Rutube person videos - **rutube:person**: Rutube person videos
- **RUTV**: RUTV.RU - **RUTV**: RUTV.RU
- **Sandia**: Sandia National Laboratories
- **Sapo**: SAPO Vídeos - **Sapo**: SAPO Vídeos
- **savefrom.net** - **savefrom.net**
- **SBS**: sbs.com.au - **SBS**: sbs.com.au

View File

@ -113,6 +113,16 @@ def expect_info_dict(self, got_dict, expected_dict):
self.assertTrue( self.assertTrue(
got.startswith(start_str), got.startswith(start_str),
'field %s (value: %r) should start with %r' % (info_field, got, start_str)) 'field %s (value: %r) should start with %r' % (info_field, got, start_str))
elif isinstance(expected, compat_str) and expected.startswith('contains:'):
got = got_dict.get(info_field)
contains_str = expected[len('contains:'):]
self.assertTrue(
isinstance(got, compat_str),
'Expected a %s object, but got %s for field %s' % (
compat_str.__name__, type(got).__name__, info_field))
self.assertTrue(
contains_str in got,
'field %s (value: %r) should contain %r' % (info_field, got, contains_str))
elif isinstance(expected, type): elif isinstance(expected, type):
got = got_dict.get(info_field) got = got_dict.get(info_field)
self.assertTrue(isinstance(got, expected), self.assertTrue(isinstance(got, expected),

View File

@ -370,6 +370,10 @@ class TestUtil(unittest.TestCase):
"playlist":[{"controls":{"all":null}}] "playlist":[{"controls":{"all":null}}]
}''') }''')
inp = '"SAND Number: SAND 2013-7800P\\nPresenter: Tom Russo\\nHabanero Software Training - Xyce Software\\nXyce, Sandia\\u0027s"'
json_code = js_to_json(inp)
self.assertEqual(json.loads(json_code), json.loads(inp))
def test_js_to_json_edgecases(self): def test_js_to_json_edgecases(self):
on = js_to_json("{abc_def:'1\\'\\\\2\\\\\\'3\"4'}") on = js_to_json("{abc_def:'1\\'\\\\2\\\\\\'3\"4'}")
self.assertEqual(json.loads(on), {"abc_def": "1'\\2\\'3\"4"}) self.assertEqual(json.loads(on), {"abc_def": "1'\\2\\'3\"4"})

View File

@ -1534,29 +1534,18 @@ class YoutubeDL(object):
return res return res
def list_formats(self, info_dict): def list_formats(self, info_dict):
def line(format, idlen=20):
return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
format['format_id'],
format['ext'],
self.format_resolution(format),
self._format_note(format),
))
formats = info_dict.get('formats', [info_dict]) formats = info_dict.get('formats', [info_dict])
idlen = max(len('format code'), table = [
max(len(f['format_id']) for f in formats)) [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
formats_s = [ for f in formats
line(f, idlen) for f in formats
if f.get('preference') is None or f['preference'] >= -1000] if f.get('preference') is None or f['preference'] >= -1000]
if len(formats) > 1: if len(formats) > 1:
formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)' table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
header_line = line({ header_line = ['format code', 'extension', 'resolution', 'note']
'format_id': 'format code', 'ext': 'extension',
'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
self.to_screen( self.to_screen(
'[info] Available formats for %s:\n%s\n%s' % '[info] Available formats for %s:\n%s' %
(info_dict['id'], header_line, '\n'.join(formats_s))) (info_dict['id'], render_table(header_line, table)))
def list_thumbnails(self, info_dict): def list_thumbnails(self, info_dict):
thumbnails = info_dict.get('thumbnails') thumbnails = info_dict.get('thumbnails')

View File

@ -386,6 +386,7 @@ from .rutube import (
RutubePersonIE, RutubePersonIE,
) )
from .rutv import RUTVIE from .rutv import RUTVIE
from .sandia import SandiaIE
from .sapo import SapoIE from .sapo import SapoIE
from .savefrom import SaveFromIE from .savefrom import SaveFromIE
from .sbs import SBSIE from .sbs import SBSIE

View File

@ -95,6 +95,7 @@ class BrightcoveIE(InfoExtractor):
'url': 'http://c.brightcove.com/services/viewer/htmlFederated?playerID=3550052898001&playerKey=AQ%7E%7E%2CAAABmA9XpXk%7E%2C-Kp7jNgisre1fG5OdqpAFUTcs0lP_ZoL', 'url': 'http://c.brightcove.com/services/viewer/htmlFederated?playerID=3550052898001&playerKey=AQ%7E%7E%2CAAABmA9XpXk%7E%2C-Kp7jNgisre1fG5OdqpAFUTcs0lP_ZoL',
'info_dict': { 'info_dict': {
'title': 'Sealife', 'title': 'Sealife',
'id': '3550319591001',
}, },
'playlist_mincount': 7, 'playlist_mincount': 7,
}, },
@ -247,7 +248,7 @@ class BrightcoveIE(InfoExtractor):
playlist_info = json_data['videoList'] playlist_info = json_data['videoList']
videos = [self._extract_video_info(video_info) for video_info in playlist_info['mediaCollectionDTO']['videoDTOs']] videos = [self._extract_video_info(video_info) for video_info in playlist_info['mediaCollectionDTO']['videoDTOs']]
return self.playlist_result(videos, playlist_id=playlist_info['id'], return self.playlist_result(videos, playlist_id='%s' % playlist_info['id'],
playlist_title=playlist_info['mediaCollectionDTO']['displayName']) playlist_title=playlist_info['mediaCollectionDTO']['displayName'])
def _extract_video_info(self, video_info): def _extract_video_info(self, video_info):

View File

@ -0,0 +1,117 @@
# coding: utf-8
from __future__ import unicode_literals
import itertools
import json
import re
from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
compat_urlparse,
)
from ..utils import (
int_or_none,
js_to_json,
mimetype2ext,
unified_strdate,
)
class SandiaIE(InfoExtractor):
IE_DESC = 'Sandia National Laboratories'
_VALID_URL = r'https?://digitalops\.sandia\.gov/Mediasite/Play/(?P<id>[0-9a-f]+)'
_TEST = {
'url': 'http://digitalops.sandia.gov/Mediasite/Play/24aace4429fc450fb5b38cdbf424a66e1d',
'md5': '9422edc9b9a60151727e4b6d8bef393d',
'info_dict': {
'id': '24aace4429fc450fb5b38cdbf424a66e1d',
'ext': 'mp4',
'title': 'Xyce Software Training - Section 1',
'description': 're:(?s)SAND Number: SAND 2013-7800.{200,}',
'upload_date': '20120904',
'duration': 7794,
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
req = compat_urllib_request.Request(url)
req.add_header('Cookie', 'MediasitePlayerCaps=ClientPlugins=4')
webpage = self._download_webpage(req, video_id)
js_path = self._search_regex(
r'<script type="text/javascript" src="(/Mediasite/FileServer/Presentation/[^"]+)"',
webpage, 'JS code URL')
js_url = compat_urlparse.urljoin(url, js_path)
js_code = self._download_webpage(
js_url, video_id, note='Downloading player')
def extract_str(key, **args):
return self._search_regex(
r'Mediasite\.PlaybackManifest\.%s\s*=\s*(.+);\s*?\n' % re.escape(key),
js_code, key, **args)
def extract_data(key, **args):
data_json = extract_str(key, **args)
if data_json is None:
return data_json
return self._parse_json(
data_json, video_id, transform_source=js_to_json)
formats = []
for i in itertools.count():
fd = extract_data('VideoUrls[%d]' % i, default=None)
if fd is None:
break
formats.append({
'format_id': '%s' % i,
'format_note': fd['MimeType'].partition('/')[2],
'ext': mimetype2ext(fd['MimeType']),
'url': fd['Location'],
'protocol': 'f4m' if fd['MimeType'] == 'video/x-mp4-fragmented' else None,
})
self._sort_formats(formats)
slide_baseurl = compat_urlparse.urljoin(
url, extract_data('SlideBaseUrl'))
slide_template = slide_baseurl + re.sub(
r'\{0:D?([0-9+])\}', r'%0\1d', extract_data('SlideImageFileNameTemplate'))
slides = []
last_slide_time = 0
for i in itertools.count(1):
sd = extract_str('Slides[%d]' % i, default=None)
if sd is None:
break
timestamp = int_or_none(self._search_regex(
r'^Mediasite\.PlaybackManifest\.CreateSlide\("[^"]*"\s*,\s*([0-9]+),',
sd, 'slide %s timestamp' % i, fatal=False))
slides.append({
'url': slide_template % i,
'duration': timestamp - last_slide_time,
})
last_slide_time = timestamp
formats.append({
'format_id': 'slides',
'protocol': 'slideshow',
'url': json.dumps(slides),
'preference': -10000, # Downloader not yet written
})
self._sort_formats(formats)
title = extract_data('Title')
description = extract_data('Description', fatal=False)
duration = int_or_none(extract_data(
'Duration', fatal=False), scale=1000)
upload_date = unified_strdate(extract_data('AirDate', fatal=False))
return {
'id': video_id,
'title': title,
'description': description,
'formats': formats,
'upload_date': upload_date,
'duration': duration,
}

View File

@ -175,7 +175,7 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
def _verify_video_password(self, url, video_id, webpage): def _verify_video_password(self, url, video_id, webpage):
password = self._downloader.params.get('videopassword', None) password = self._downloader.params.get('videopassword', None)
if password is None: if password is None:
raise ExtractorError('This video is protected by a password, use the --video-password option') raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True)
token = self._search_regex(r'xsrft: \'(.*?)\'', webpage, 'login token') token = self._search_regex(r'xsrft: \'(.*?)\'', webpage, 'login token')
data = compat_urllib_parse.urlencode({ data = compat_urllib_parse.urlencode({
'password': password, 'password': password,

View File

@ -18,8 +18,8 @@ class WSJIE(InfoExtractor):
'id': '1BD01A4C-BFE8-40A5-A42F-8A8AF9898B1A', 'id': '1BD01A4C-BFE8-40A5-A42F-8A8AF9898B1A',
'ext': 'mp4', 'ext': 'mp4',
'upload_date': '20150202', 'upload_date': '20150202',
'uploader_id': 'bbright', 'uploader_id': 'jdesai',
'creator': 'bbright', 'creator': 'jdesai',
'categories': list, # a long list 'categories': list, # a long list
'duration': 90, 'duration': 90,
'title': 'Bills Coach Rex Ryan Updates His Old Jets Tattoo', 'title': 'Bills Coach Rex Ryan Updates His Old Jets Tattoo',

View File

@ -22,7 +22,7 @@ class XTubeIE(InfoExtractor):
'id': 'kVTUy_G222_', 'id': 'kVTUy_G222_',
'ext': 'mp4', 'ext': 'mp4',
'title': 'strange erotica', 'title': 'strange erotica',
'description': 'http://www.xtube.com an ET kind of thing', 'description': 'contains:an ET kind of thing',
'uploader': 'greenshowers', 'uploader': 'greenshowers',
'duration': 450, 'duration': 450,
'age_limit': 18, 'age_limit': 18,

View File

@ -24,7 +24,6 @@ class YahooIE(InfoExtractor):
_TESTS = [ _TESTS = [
{ {
'url': 'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html', 'url': 'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html',
'md5': '4962b075c08be8690a922ee026d05e69',
'info_dict': { 'info_dict': {
'id': '2d25e626-2378-391f-ada0-ddaf1417e588', 'id': '2d25e626-2378-391f-ada0-ddaf1417e588',
'ext': 'mp4', 'ext': 'mp4',

View File

@ -1560,8 +1560,8 @@ def js_to_json(code):
return '"%s"' % v return '"%s"' % v
res = re.sub(r'''(?x) res = re.sub(r'''(?x)
"(?:[^"\\]*(?:\\\\|\\")?)*"| "(?:[^"\\]*(?:\\\\|\\['"nu]))*[^"\\]*"|
'(?:[^'\\]*(?:\\\\|\\')?)*'| '(?:[^'\\]*(?:\\\\|\\['"nu]))*[^'\\]*'|
[a-zA-Z_][.a-zA-Z_0-9]* [a-zA-Z_][.a-zA-Z_0-9]*
''', fix_kv, code) ''', fix_kv, code)
res = re.sub(r',(\s*\])', lambda m: m.group(1), res) res = re.sub(r',(\s*\])', lambda m: m.group(1), res)
@ -1616,6 +1616,15 @@ def args_to_str(args):
return ' '.join(shlex_quote(a) for a in args) return ' '.join(shlex_quote(a) for a in args)
def mimetype2ext(mt):
_, _, res = mt.rpartition('/')
return {
'x-ms-wmv': 'wmv',
'x-mp4-fragmented': 'mp4',
}.get(res, res)
def urlhandle_detect_ext(url_handle): def urlhandle_detect_ext(url_handle):
try: try:
url_handle.headers url_handle.headers
@ -1631,7 +1640,7 @@ def urlhandle_detect_ext(url_handle):
if e: if e:
return e return e
return getheader('Content-Type').split("/")[1] return mimetype2ext(getheader('Content-Type'))
def age_restricted(content_limit, age_limit): def age_restricted(content_limit, age_limit):

View File

@ -1,3 +1,3 @@
from __future__ import unicode_literals from __future__ import unicode_literals
__version__ = '2015.02.18.1' __version__ = '2015.02.19'