release 2014.11.26

Merge branch 'master' of github.com:rg3/youtube-dl
[generic] Detect direct video links (Fixes #4149 , #4313 )
2025-08-02 10:30:04 -05:00 · 2014-11-26 10:46:12 +01:00 · 2014-11-26 10:45:57 +01:00 · 2014-11-26 10:44:39 +01:00 · 2014-11-25 22:24:33 +06:00 · 2014-11-25 14:34:29 +01:00
6 changed files with 120 additions and 6 deletions
--- a/1
+++ b/1
@@ -87,3 +87,4 @@ William Sewell
 Dao Hoang Son
 Oskar Jauch
 Matthew Rayfield
+t0mm0
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@@ -373,6 +373,7 @@ from .syfy import SyfyIE
 from .sztvhu import SztvHuIE
 from .tagesschau import TagesschauIE
 from .tapely import TapelyIE
+from .tass import TassIE
 from .teachertube import (
    TeacherTubeIE,
    TeacherTubeUserIE,
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -296,9 +296,11 @@ class InfoExtractor(object):
        content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal)
        return (content, urlh)

-    def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True):
+    def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True, prefix=None):
        content_type = urlh.headers.get('Content-Type', '')
        webpage_bytes = urlh.read()
+        if prefix is not None:
+            webpage_bytes = prefix + webpage_bytes
        m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type)
        if m:
            encoding = m.group(1)
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -445,6 +445,30 @@ class GenericIE(InfoExtractor):
                'title': 'Rosetta #CometLanding webcast HL 10',
            }
        },
+        # LazyYT
+        {
+            'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
+            'info_dict': {
+                'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
+            },
+            'playlist_mincount': 2,
+        },
+        # Direct link with incorrect MIME type
+        {
+            'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
+            'md5': '4ccbebe5f36706d85221f204d7eb5913',
+            'info_dict': {
+                'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
+                'id': '5_Lennart_Poettering_-_Systemd',
+                'ext': 'webm',
+                'title': '5_Lennart_Poettering_-_Systemd',
+                'upload_date': '20141120',
+            },
+            'expected_warnings': [
+                'URL could be a direct video link, returning it as such.'
+            ]
+        }
+
    ]

    def report_following_redirect(self, new_url):
@@ -598,10 +622,28 @@ class GenericIE(InfoExtractor):
        if not self._downloader.params.get('test', False) and not is_intentional:
            self._downloader.report_warning('Falling back on generic information extractor.')

-        if full_response:
-            webpage = self._webpage_read_content(full_response, url, video_id)
-        else:
-            webpage = self._download_webpage(url, video_id)
+        if not full_response:
+            full_response = self._request_webpage(url, video_id)
+
+        # Maybe it's a direct link to a video?
+        # Be careful not to download the whole thing!
+        first_bytes = full_response.read(512)
+        if not re.match(r'^\s*<', first_bytes.decode('utf-8', 'replace')):
+            self._downloader.report_warning(
+                'URL could be a direct video link, returning it as such.')
+            upload_date = unified_strdate(
+                head_response.headers.get('Last-Modified'))
+            return {
+                'id': video_id,
+                'title': os.path.splitext(url_basename(url))[0],
+                'direct': True,
+                'url': url,
+                'upload_date': upload_date,
+            }
+
+        webpage = self._webpage_read_content(
+            full_response, url, video_id, prefix=first_bytes)
+
        self.report_extraction(video_id)

        # Is it an RSS feed?
@@ -702,6 +744,12 @@ class GenericIE(InfoExtractor):
            return _playlist_from_matches(
                matches, lambda m: unescapeHTML(m[1]))

+        # Look for lazyYT YouTube embed
+        matches = re.findall(
+            r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)
+        if matches:
+            return _playlist_from_matches(matches, lambda m: unescapeHTML(m))
+
        # Look for embedded Dailymotion player
        matches = re.findall(
            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/embed/video/.+?)\1', webpage)
--- a/youtube_dl/extractor/tass.py
+++ b/youtube_dl/extractor/tass.py
@@ -0,0 +1,62 @@
+# encoding: utf-8
+from __future__ import unicode_literals
+
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+    js_to_json,
+    qualities,
+)
+
+
+class TassIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:tass\.ru|itar-tass\.com)/[^/]+/(?P<id>\d+)'
+    _TESTS = [
+        {
+            'url': 'http://tass.ru/obschestvo/1586870',
+            'md5': '3b4cdd011bc59174596b6145cda474a4',
+            'info_dict': {
+                'id': '1586870',
+                'ext': 'mp4',
+                'title': 'Посетителям московского зоопарка показали красную панду',
+                'description': 'Приехавшую из Дублина Зейну можно увидеть в павильоне "Кошки тропиков"',
+                'thumbnail': 're:^https?://.*\.jpg$',
+            },
+        },
+        {
+            'url': 'http://itar-tass.com/obschestvo/1600009',
+            'only_matching': True,
+        },
+    ]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        sources = json.loads(js_to_json(self._search_regex(
+            r'(?s)sources\s*:\s*(\[.+?\])', webpage, 'sources')))
+
+        quality = qualities(['sd', 'hd'])
+
+        formats = []
+        for source in sources:
+            video_url = source.get('file')
+            if not video_url or not video_url.startswith('http') or not video_url.endswith('.mp4'):
+                continue
+            label = source.get('label')
+            formats.append({
+                'url': video_url,
+                'format_id': label,
+                'quality': quality(label),
+            })
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': self._og_search_title(webpage),
+            'description': self._og_search_description(webpage),
+            'thumbnail': self._og_search_thumbnail(webpage),
+            'formats': formats,
+        }
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@

-__version__ = '2014.11.25'
+__version__ = '2014.11.26'
Author	SHA1	Message	Date
Philipp Hagemeister	ddfd0f2727	release 2014.11.26	2014-11-26 10:46:12 +01:00
Philipp Hagemeister	d0720e7118	Merge branch 'master' of github.com:rg3/youtube-dl	2014-11-26 10:45:57 +01:00
Philipp Hagemeister	4e262a8838	[generic] Detect direct video links (Fixes #4149 , #4313 )	2014-11-26 10:44:39 +01:00
Sergey M․	b9ed3af343	[tass] Add extractor (Closes #4296 )	2014-11-25 22:24:33 +06:00
Philipp Hagemeister	63c9b2c1d9	release 2014.11.25.1	2014-11-25 14:34:29 +01:00
Philipp Hagemeister	65f3a228b1	[generic] Add support for LazyYT embeds (Fixes #4306 )	2014-11-25 14:34:19 +01:00
Philipp Hagemeister	3004ae2c3a	Credit @t0mm0 for xminus (#4302 )	2014-11-25 12:16:48 +01:00