release 2014.11.13.1

[utils] Fix intlist_to_bytes in Python 2 (#4181 )
[sexu] Modernize (#4171 )
2025-07-21 21:01:59 -05:00 · 2014-11-13 15:42:48 +01:00 · 2014-11-13 15:28:42 +01:00 · 2014-11-13 15:20:49 +01:00 · 2014-11-13 15:18:38 +01:00 · 2014-11-13 15:02:31 +01:00
7 changed files with 118 additions and 25 deletions
--- a/test/helper.py
+++ b/test/helper.py
@ -145,7 +145,8 @@ def expect_info_dict(self, expected_dict, got_dict):
        info_dict_str = ''.join(
            '    %s: %s,\n' % (_repr(k), _repr(v))
            for k, v in test_info_dict.items())
-        write_string('\n"info_dict": {\n' + info_dict_str + '}\n', out=sys.stderr)
+        write_string(
+            '\n\'info_dict\': {\n' + info_dict_str + '}\n', out=sys.stderr)
        self.assertFalse(
            missing_keys,
            'Missing keys in test definition: %s' % (
--- a/test/test_utils.py
+++ b/test/test_utils.py
@ -16,6 +16,7 @@ import json
 import xml.etree.ElementTree

 from youtube_dl.utils import (
+    clean_html,
    DateRange,
    encodeFilename,
    find_xpath_attr,
@ -45,6 +46,7 @@ from youtube_dl.utils import (
    escape_url,
    js_to_json,
    get_filesystem_encoding,
+    intlist_to_bytes,
 )


@ -345,5 +347,14 @@ class TestUtil(unittest.TestCase):
        on = js_to_json('{"abc": true}')
        self.assertEqual(json.loads(on), {'abc': True})

+    def test_clean_html(self):
+        self.assertEqual(clean_html('a:\nb'), 'a: b')
+        self.assertEqual(clean_html('a:\n   "b"'), 'a:    "b"')
+
+    def test_intlist_to_bytes(self):
+        self.assertEqual(
+            intlist_to_bytes([0, 1, 127, 128, 255]),
+            b'\x00\x01\x7f\x80\xff')
+
 if __name__ == '__main__':
    unittest.main()
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -324,6 +324,7 @@ from .sbs import SBSIE
 from .scivee import SciVeeIE
 from .screencast import ScreencastIE
 from .servingsys import ServingSysIE
+from .sexu import SexuIE
 from .sexykarma import SexyKarmaIE
 from .shared import SharedIE
 from .sharesix import ShareSixIE
--- a/youtube_dl/extractor/sexu.py
+++ b/youtube_dl/extractor/sexu.py
@ -0,0 +1,61 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class SexuIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?sexu\.com/(?P<id>\d+)'
+    _TEST = {
+        'url': 'http://sexu.com/961791/',
+        'md5': 'ff615aca9691053c94f8f10d96cd7884',
+        'info_dict': {
+            'id': '961791',
+            'ext': 'mp4',
+            'title': 'md5:4d05a19a5fc049a63dbbaf05fb71d91b',
+            'description': 'md5:c5ed8625eb386855d5a7967bd7b77a54',
+            'categories': list,  # NSFW
+            'thumbnail': 're:https?://.*\.jpg$',
+            'age_limit': 18,
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        quality_arr = self._search_regex(
+            r'sources:\s*\[([^\]]+)\]', webpage, 'forrmat string')
+        formats = [{
+            'url': fmt[0].replace('\\', ''),
+            'format_id': fmt[1],
+            'height': int(fmt[1][:3]),
+        } for fmt in re.findall(r'"file":"([^"]+)","label":"([^"]+)"', quality_arr)]
+        self._sort_formats(formats)
+
+        title = self._html_search_regex(
+            r'<title>([^<]+)\s*-\s*Sexu\.Com</title>', webpage, 'title')
+
+        description = self._html_search_meta(
+            'description', webpage, 'description')
+
+        thumbnail = self._html_search_regex(
+            r'image:\s*"([^"]+)"',
+            webpage, 'thumbnail', fatal=False)
+
+        categories_str = self._html_search_meta(
+            'keywords', webpage, 'categories')
+        categories = (
+            None if categories_str is None
+            else categories_str.split(','))
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'categories': categories,
+            'formats': formats,
+            'age_limit': 18,
+        }
--- a/youtube_dl/extractor/spiegel.py
+++ b/youtube_dl/extractor/spiegel.py
@ -4,11 +4,11 @@ from __future__ import unicode_literals
 import re

 from .common import InfoExtractor
-from ..utils import compat_urlparse
+from ..compat import compat_urlparse


 class SpiegelIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<videoID>[0-9]+)(?:\.html)?(?:#.*)?$'
+    _VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<id>[0-9]+)(?:-embed)?(?:\.html)?(?:#.*)?$'
    _TESTS = [{
        'url': 'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html',
        'md5': '2c2754212136f35fb4b19767d242f66e',
@ -29,16 +29,24 @@ class SpiegelIE(InfoExtractor):
            'description': 'md5:c2322b65e58f385a820c10fa03b2d088',
            'duration': 983,
        },
+    }, {
+        'url': 'http://www.spiegel.de/video/astronaut-alexander-gerst-von-der-iss-station-beantwortet-fragen-video-1519126-embed.html',
+        'md5': 'd8eeca6bfc8f1cd6f490eb1f44695d51',
+        'info_dict': {
+            'id': '1519126',
+            'ext': 'mp4',
+            'description': 'SPIEGEL ONLINE-Nutzer durften den deutschen Astronauten Alexander Gerst über sein Leben auf der ISS-Station befragen. Hier kommen seine Antworten auf die besten sechs Fragen.',
+            'title': 'Fragen an Astronaut Alexander Gerst: "Bekommen Sie die Tageszeiten mit?"',
+        }
    }]

    def _real_extract(self, url):
-        m = re.match(self._VALID_URL, url)
-        video_id = m.group('videoID')
-
+        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)

-        title = self._html_search_regex(
-            r'<div class="module-title">(.*?)</div>', webpage, 'title')
+        title = re.sub(r'\s+', ' ', self._html_search_regex(
+            r'(?s)<(?:h1|div) class="module-title"[^>]*>(.*?)</(?:h1|div)>',
+            webpage, 'title'))
        description = self._html_search_meta('description', webpage, 'description')

        base_url = self._search_regex(
@ -79,7 +87,7 @@ class SpiegelArticleIE(InfoExtractor):
    _VALID_URL = 'https?://www\.spiegel\.de/(?!video/)[^?#]*?-(?P<id>[0-9]+)\.html'
    IE_NAME = 'Spiegel:Article'
    IE_DESC = 'Articles on spiegel.de'
-    _TEST = {
+    _TESTS = [{
        'url': 'http://www.spiegel.de/sport/sonst/badminton-wm-die-randsportart-soll-populaerer-werden-a-987092.html',
        'info_dict': {
            'id': '1516455',
@ -87,20 +95,34 @@ class SpiegelArticleIE(InfoExtractor):
            'title': 'Faszination Badminton: Nennt es bloß nicht Federball',
            'description': 're:^Patrick Kämnitz gehört.{100,}',
        },
-    }
+    }, {
+        'url': 'http://www.spiegel.de/wissenschaft/weltall/astronaut-alexander-gerst-antwortet-spiegel-online-lesern-a-989876.html',
+        'info_dict': {
+
+        },
+        'playlist_count': 6,
+    }]

    def _real_extract(self, url):
-        m = re.match(self._VALID_URL, url)
-        video_id = m.group('id')
-
+        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
+
+        # Single video on top of the page
        video_link = self._search_regex(
            r'<a href="([^"]+)" onclick="return spOpenVideo\(this,', webpage,
-            'video page URL')
+            'video page URL', default=None)
+        if video_link:
            video_url = compat_urlparse.urljoin(
                self.http_scheme() + '//spiegel.de/', video_link)
+            return self.url_result(video_url)

-        return {
-            '_type': 'url',
-            'url': video_url,
-        }
+        # Multiple embedded videos
+        embeds = re.findall(
+            r'<div class="vid_holder[0-9]+.*?</div>\s*.*?url\s*=\s*"([^"]+)"',
+            webpage)
+        entries = [
+            self.url_result(compat_urlparse.urljoin(
+                self.http_scheme() + '//spiegel.de/', embed_path))
+            for embed_path in embeds
+        ]
+        return self.playlist_result(entries)
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@ -843,10 +843,7 @@ def bytes_to_intlist(bs):
 def intlist_to_bytes(xs):
    if not xs:
        return b''
-    if isinstance(chr(0), bytes):  # Python 2
-        return ''.join([chr(x) for x in xs])
-    else:
-        return bytes(xs)
+    return struct.pack('%dB' % len(xs), *xs)


 # Cross-platform file locking
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@ -1,2 +1,2 @@

-__version__ = '2014.11.13'
+__version__ = '2014.11.13.1'
Author	SHA1	Message	Date
Philipp Hagemeister	c3d582985f	release 2014.11.13.1	2014-11-13 15:42:48 +01:00
Philipp Hagemeister	4c0924bb24	[utils] Fix intlist_to_bytes in Python 2 (#4181 )	2014-11-13 15:28:42 +01:00
Philipp Hagemeister	3fa5bb3802	[sexu] Modernize (#4171 )	2014-11-13 15:20:49 +01:00
Philipp Hagemeister	c47ec62b83	Merge remote-tracking branch 'peugeot/sexu'	2014-11-13 15:18:38 +01:00
Philipp Hagemeister	e4bdb37ec6	[spiegel] Add support for embeds	2014-11-13 15:02:31 +01:00
Philipp Hagemeister	3e6e4999ca	[test/helper] Improve output	2014-11-13 14:55:45 +01:00
Philipp Hagemeister	0e15e725a0	[spiegel] Modernize	2014-11-13 14:45:17 +01:00
peugeot	437f68d868	Update sexu.py	2014-11-13 14:02:53 +01:00
peugeot	d91d124081	fix python 2 test	2014-11-13 13:57:10 +01:00
peugeot	bbd5f2de5e	[sexu] initial support	2014-11-12 20:41:13 +01:00