Compare commits

...

10 Commits

7 changed files with 118 additions and 25 deletions

View File

@ -145,7 +145,8 @@ def expect_info_dict(self, expected_dict, got_dict):
info_dict_str = ''.join( info_dict_str = ''.join(
' %s: %s,\n' % (_repr(k), _repr(v)) ' %s: %s,\n' % (_repr(k), _repr(v))
for k, v in test_info_dict.items()) for k, v in test_info_dict.items())
write_string('\n"info_dict": {\n' + info_dict_str + '}\n', out=sys.stderr) write_string(
'\n\'info_dict\': {\n' + info_dict_str + '}\n', out=sys.stderr)
self.assertFalse( self.assertFalse(
missing_keys, missing_keys,
'Missing keys in test definition: %s' % ( 'Missing keys in test definition: %s' % (

View File

@ -16,6 +16,7 @@ import json
import xml.etree.ElementTree import xml.etree.ElementTree
from youtube_dl.utils import ( from youtube_dl.utils import (
clean_html,
DateRange, DateRange,
encodeFilename, encodeFilename,
find_xpath_attr, find_xpath_attr,
@ -45,6 +46,7 @@ from youtube_dl.utils import (
escape_url, escape_url,
js_to_json, js_to_json,
get_filesystem_encoding, get_filesystem_encoding,
intlist_to_bytes,
) )
@ -345,5 +347,14 @@ class TestUtil(unittest.TestCase):
on = js_to_json('{"abc": true}') on = js_to_json('{"abc": true}')
self.assertEqual(json.loads(on), {'abc': True}) self.assertEqual(json.loads(on), {'abc': True})
def test_clean_html(self):
self.assertEqual(clean_html('a:\nb'), 'a: b')
self.assertEqual(clean_html('a:\n "b"'), 'a: "b"')
def test_intlist_to_bytes(self):
self.assertEqual(
intlist_to_bytes([0, 1, 127, 128, 255]),
b'\x00\x01\x7f\x80\xff')
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View File

@ -324,6 +324,7 @@ from .sbs import SBSIE
from .scivee import SciVeeIE from .scivee import SciVeeIE
from .screencast import ScreencastIE from .screencast import ScreencastIE
from .servingsys import ServingSysIE from .servingsys import ServingSysIE
from .sexu import SexuIE
from .sexykarma import SexyKarmaIE from .sexykarma import SexyKarmaIE
from .shared import SharedIE from .shared import SharedIE
from .sharesix import ShareSixIE from .sharesix import ShareSixIE

View File

@ -0,0 +1,61 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
class SexuIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?sexu\.com/(?P<id>\d+)'
_TEST = {
'url': 'http://sexu.com/961791/',
'md5': 'ff615aca9691053c94f8f10d96cd7884',
'info_dict': {
'id': '961791',
'ext': 'mp4',
'title': 'md5:4d05a19a5fc049a63dbbaf05fb71d91b',
'description': 'md5:c5ed8625eb386855d5a7967bd7b77a54',
'categories': list, # NSFW
'thumbnail': 're:https?://.*\.jpg$',
'age_limit': 18,
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
quality_arr = self._search_regex(
r'sources:\s*\[([^\]]+)\]', webpage, 'forrmat string')
formats = [{
'url': fmt[0].replace('\\', ''),
'format_id': fmt[1],
'height': int(fmt[1][:3]),
} for fmt in re.findall(r'"file":"([^"]+)","label":"([^"]+)"', quality_arr)]
self._sort_formats(formats)
title = self._html_search_regex(
r'<title>([^<]+)\s*-\s*Sexu\.Com</title>', webpage, 'title')
description = self._html_search_meta(
'description', webpage, 'description')
thumbnail = self._html_search_regex(
r'image:\s*"([^"]+)"',
webpage, 'thumbnail', fatal=False)
categories_str = self._html_search_meta(
'keywords', webpage, 'categories')
categories = (
None if categories_str is None
else categories_str.split(','))
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'categories': categories,
'formats': formats,
'age_limit': 18,
}

View File

@ -4,11 +4,11 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import compat_urlparse from ..compat import compat_urlparse
class SpiegelIE(InfoExtractor): class SpiegelIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<videoID>[0-9]+)(?:\.html)?(?:#.*)?$' _VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<id>[0-9]+)(?:-embed)?(?:\.html)?(?:#.*)?$'
_TESTS = [{ _TESTS = [{
'url': 'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html', 'url': 'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html',
'md5': '2c2754212136f35fb4b19767d242f66e', 'md5': '2c2754212136f35fb4b19767d242f66e',
@ -29,16 +29,24 @@ class SpiegelIE(InfoExtractor):
'description': 'md5:c2322b65e58f385a820c10fa03b2d088', 'description': 'md5:c2322b65e58f385a820c10fa03b2d088',
'duration': 983, 'duration': 983,
}, },
}, {
'url': 'http://www.spiegel.de/video/astronaut-alexander-gerst-von-der-iss-station-beantwortet-fragen-video-1519126-embed.html',
'md5': 'd8eeca6bfc8f1cd6f490eb1f44695d51',
'info_dict': {
'id': '1519126',
'ext': 'mp4',
'description': 'SPIEGEL ONLINE-Nutzer durften den deutschen Astronauten Alexander Gerst über sein Leben auf der ISS-Station befragen. Hier kommen seine Antworten auf die besten sechs Fragen.',
'title': 'Fragen an Astronaut Alexander Gerst: "Bekommen Sie die Tageszeiten mit?"',
}
}] }]
def _real_extract(self, url): def _real_extract(self, url):
m = re.match(self._VALID_URL, url) video_id = self._match_id(url)
video_id = m.group('videoID')
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
title = self._html_search_regex( title = re.sub(r'\s+', ' ', self._html_search_regex(
r'<div class="module-title">(.*?)</div>', webpage, 'title') r'(?s)<(?:h1|div) class="module-title"[^>]*>(.*?)</(?:h1|div)>',
webpage, 'title'))
description = self._html_search_meta('description', webpage, 'description') description = self._html_search_meta('description', webpage, 'description')
base_url = self._search_regex( base_url = self._search_regex(
@ -79,7 +87,7 @@ class SpiegelArticleIE(InfoExtractor):
_VALID_URL = 'https?://www\.spiegel\.de/(?!video/)[^?#]*?-(?P<id>[0-9]+)\.html' _VALID_URL = 'https?://www\.spiegel\.de/(?!video/)[^?#]*?-(?P<id>[0-9]+)\.html'
IE_NAME = 'Spiegel:Article' IE_NAME = 'Spiegel:Article'
IE_DESC = 'Articles on spiegel.de' IE_DESC = 'Articles on spiegel.de'
_TEST = { _TESTS = [{
'url': 'http://www.spiegel.de/sport/sonst/badminton-wm-die-randsportart-soll-populaerer-werden-a-987092.html', 'url': 'http://www.spiegel.de/sport/sonst/badminton-wm-die-randsportart-soll-populaerer-werden-a-987092.html',
'info_dict': { 'info_dict': {
'id': '1516455', 'id': '1516455',
@ -87,20 +95,34 @@ class SpiegelArticleIE(InfoExtractor):
'title': 'Faszination Badminton: Nennt es bloß nicht Federball', 'title': 'Faszination Badminton: Nennt es bloß nicht Federball',
'description': 're:^Patrick Kämnitz gehört.{100,}', 'description': 're:^Patrick Kämnitz gehört.{100,}',
}, },
} }, {
'url': 'http://www.spiegel.de/wissenschaft/weltall/astronaut-alexander-gerst-antwortet-spiegel-online-lesern-a-989876.html',
'info_dict': {
},
'playlist_count': 6,
}]
def _real_extract(self, url): def _real_extract(self, url):
m = re.match(self._VALID_URL, url) video_id = self._match_id(url)
video_id = m.group('id')
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
# Single video on top of the page
video_link = self._search_regex( video_link = self._search_regex(
r'<a href="([^"]+)" onclick="return spOpenVideo\(this,', webpage, r'<a href="([^"]+)" onclick="return spOpenVideo\(this,', webpage,
'video page URL') 'video page URL', default=None)
video_url = compat_urlparse.urljoin( if video_link:
self.http_scheme() + '//spiegel.de/', video_link) video_url = compat_urlparse.urljoin(
self.http_scheme() + '//spiegel.de/', video_link)
return self.url_result(video_url)
return { # Multiple embedded videos
'_type': 'url', embeds = re.findall(
'url': video_url, r'<div class="vid_holder[0-9]+.*?</div>\s*.*?url\s*=\s*"([^"]+)"',
} webpage)
entries = [
self.url_result(compat_urlparse.urljoin(
self.http_scheme() + '//spiegel.de/', embed_path))
for embed_path in embeds
]
return self.playlist_result(entries)

View File

@ -843,10 +843,7 @@ def bytes_to_intlist(bs):
def intlist_to_bytes(xs): def intlist_to_bytes(xs):
if not xs: if not xs:
return b'' return b''
if isinstance(chr(0), bytes): # Python 2 return struct.pack('%dB' % len(xs), *xs)
return ''.join([chr(x) for x in xs])
else:
return bytes(xs)
# Cross-platform file locking # Cross-platform file locking

View File

@ -1,2 +1,2 @@
__version__ = '2014.11.13' __version__ = '2014.11.13.1'