release 2014.09.28.1

[vimeo:likes] Add new extractor (Fixes #3835 )
[played] Remove unused import
2025-08-02 10:30:04 -05:00 · 2014-09-28 12:14:25 +02:00 · 2014-09-28 12:14:16 +02:00 · 2014-09-28 10:56:36 +02:00 · 2014-09-28 10:55:27 +02:00 · 2014-09-28 10:52:23 +02:00
13 changed files with 279 additions and 14 deletions
--- a/test/test_download.py
+++ b/test/test_download.py
@@ -139,7 +139,9 @@ def generator(test_case):

            if is_playlist:
                self.assertEqual(res_dict['_type'], 'playlist')
+                self.assertTrue('entries' in res_dict)
                expect_info_dict(self, test_case.get('info_dict', {}), res_dict)
+
            if 'playlist_mincount' in test_case:
                assertGreaterEqual(
                    self,
@@ -188,7 +190,7 @@ def generator(test_case):
                expect_info_dict(self, tc.get('info_dict', {}), info_dict)
        finally:
            try_rm_tcs_files()
-            if is_playlist and res_dict is not None:
+            if is_playlist and res_dict is not None and res_dict.get('entries'):
                # Remove all other files that may have been extracted if the
                # extractor returns full results even with extract_flat
                res_tcs = [{'info_dict': e} for e in res_dict['entries']]
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@@ -135,12 +135,14 @@ from .gametrailers import GametrailersIE
 from .gdcvault import GDCVaultIE
 from .generic import GenericIE
 from .godtube import GodTubeIE
+from .golem import GolemIE
 from .googleplus import GooglePlusIE
 from .googlesearch import GoogleSearchIE
 from .gorillavid import GorillaVidIE
 from .goshgay import GoshgayIE
 from .grooveshark import GroovesharkIE
 from .hark import HarkIE
+from .heise import HeiseIE
 from .helsinki import HelsinkiIE
 from .hentaistigma import HentaiStigmaIE
 from .hornbunny import HornBunnyIE
@@ -272,6 +274,7 @@ from .parliamentliveuk import ParliamentLiveUKIE
 from .patreon import PatreonIE
 from .pbs import PBSIE
 from .photobucket import PhotobucketIE
+from .played import PlayedIE
 from .playfm import PlayFMIE
 from .playvid import PlayvidIE
 from .podomatic import PodomaticIE
@@ -409,11 +412,12 @@ from .videoweed import VideoWeedIE
 from .vidme import VidmeIE
 from .vimeo import (
    VimeoIE,
-    VimeoChannelIE,
-    VimeoUserIE,
    VimeoAlbumIE,
+    VimeoChannelIE,
    VimeoGroupsIE,
+    VimeoLikesIE,
    VimeoReviewIE,
+    VimeoUserIE,
    VimeoWatchLaterIE,
 )
 from .vimple import VimpleIE
--- a/youtube_dl/extractor/ard.py
+++ b/youtube_dl/extractor/ard.py
@@ -8,8 +8,6 @@ from ..utils import (
    determine_ext,
    ExtractorError,
    qualities,
-    compat_urllib_parse_urlparse,
-    compat_urllib_parse,
    int_or_none,
    parse_duration,
    unified_strdate,
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -22,6 +22,7 @@ from ..utils import (
    clean_html,
    compiled_regex_type,
    ExtractorError,
+    float_or_none,
    int_or_none,
    RegexNotFoundError,
    sanitize_filename,
@@ -720,6 +721,28 @@ class InfoExtractor(object):
        now_str = now.strftime("%Y-%m-%d %H:%M")
        return name + ' ' + now_str

+    def _int(self, v, name, fatal=False, **kwargs):
+        res = int_or_none(v, **kwargs)
+        if 'get_attr' in kwargs:
+            print(getattr(v, kwargs['get_attr']))
+        if res is None:
+            msg = 'Failed to extract %s: Could not parse value %r' % (name, v)
+            if fatal:
+                raise ExtractorError(msg)
+            else:
+                self._downloader.report_warning(msg)
+        return res
+
+    def _float(self, v, name, fatal=False, **kwargs):
+        res = float_or_none(v, **kwargs)
+        if res is None:
+            msg = 'Failed to extract %s: Could not parse value %r' % (name, v)
+            if fatal:
+                raise ExtractorError(msg)
+            else:
+                self._downloader.report_warning(msg)
+        return res
+

 class SearchInfoExtractor(InfoExtractor):
    """
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -397,12 +397,6 @@ class GenericIE(InfoExtractor):
        },
    ]

-    def report_download_webpage(self, video_id):
-        """Report webpage download."""
-        if not self._downloader.params.get('test', False):
-            self._downloader.report_warning('Falling back on generic information extractor.')
-        super(GenericIE, self).report_download_webpage(video_id)
-
    def report_following_redirect(self, new_url):
        """Report information extraction."""
        self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
@@ -502,6 +496,7 @@ class GenericIE(InfoExtractor):

        url, smuggled_data = unsmuggle_url(url)
        force_videoid = None
+        is_intentional = smuggled_data and smuggled_data.get('to_generic')
        if smuggled_data and 'force_videoid' in smuggled_data:
            force_videoid = smuggled_data['force_videoid']
            video_id = force_videoid
@@ -544,6 +539,9 @@ class GenericIE(InfoExtractor):
                    'upload_date': upload_date,
                }

+        if not self._downloader.params.get('test', False) and not is_intentional:
+            self._downloader.report_warning('Falling back on generic information extractor.')
+
        try:
            webpage = self._download_webpage(url, video_id)
        except ValueError:
--- a/youtube_dl/extractor/golem.py
+++ b/youtube_dl/extractor/golem.py
@@ -0,0 +1,71 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_urlparse,
+    determine_ext,
+)
+
+
+class GolemIE(InfoExtractor):
+    _VALID_URL = r'^https?://video\.golem\.de/.+?/(?P<id>.+?)/'
+    _TEST = {
+        'url': 'http://video.golem.de/handy/14095/iphone-6-und-6-plus-test.html',
+        'md5': 'c1a2c0a3c863319651c7c992c5ee29bf',
+        'info_dict': {
+            'id': '14095',
+            'format_id': 'high',
+            'ext': 'mp4',
+            'title': 'iPhone 6 und 6 Plus - Test',
+            'duration': 300.44,
+            'filesize': 65309548,
+        }
+    }
+
+    _PREFIX = 'http://video.golem.de'
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        config = self._download_xml(
+            'https://video.golem.de/xml/{0}.xml'.format(video_id), video_id)
+
+        info = {
+            'id': video_id,
+            'title': config.findtext('./title', 'golem'),
+            'duration': self._float(config.findtext('./playtime'), 'duration'),
+        }
+
+        formats = []
+        for e in config.findall('./*[url]'):
+            url = e.findtext('./url')
+            if not url:
+                self._downloader.report_warning(
+                    "{0}: url: empty, skipping".format(e.tag))
+                continue
+
+            formats.append({
+                'format_id': e.tag,
+                'url': compat_urlparse.urljoin(self._PREFIX, url),
+                'height': self._int(e.get('height'), 'height'),
+                'width': self._int(e.get('width'), 'width'),
+                'filesize': self._int(e.findtext('filesize'), 'filesize'),
+                'ext': determine_ext(e.findtext('./filename')),
+            })
+        self._sort_formats(formats)
+        info['formats'] = formats
+
+        thumbnails = []
+        for e in config.findall('.//teaser[url]'):
+            url = e.findtext('./url')
+            if not url:
+                continue
+            thumbnails.append({
+                'url': compat_urlparse.urljoin(self._PREFIX, url),
+                'width': self._int(e.get('width'), 'thumbnail width'),
+                'height': self._int(e.get('height'), 'thumbnail height'),
+            })
+        info['thumbnails'] = thumbnails
+
+        return info
--- a/youtube_dl/extractor/heise.py
+++ b/youtube_dl/extractor/heise.py
@@ -0,0 +1,81 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    get_meta_content,
+    parse_iso8601,
+)
+
+
+class HeiseIE(InfoExtractor):
+    _VALID_URL = r'''(?x)
+        https?://(?:www\.)?heise\.de/video/artikel/
+        .+?(?P<id>[0-9]+)\.html(?:$|[?#])
+    '''
+    _TEST = {
+        'url': (
+            'http://www.heise.de/video/artikel/Podcast-c-t-uplink-3-3-Owncloud-Tastaturen-Peilsender-Smartphone-2404147.html'
+        ),
+        'md5': 'ffed432483e922e88545ad9f2f15d30e',
+        'info_dict': {
+            'id': '2404147',
+            'ext': 'mp4',
+            'title': (
+                "Podcast: c't uplink 3.3 – Owncloud / Tastaturen / Peilsender Smartphone"
+            ),
+            'format_id': 'mp4_720',
+            'timestamp': 1411812600,
+            'upload_date': '20140927',
+            'description': 'In uplink-Episode 3.3 geht es darum, wie man sich von Cloud-Anbietern emanzipieren kann, worauf man beim Kauf einer Tastatur achten sollte und was Smartphones über uns verraten.',
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+        json_url = self._search_regex(
+            r'json_url:\s*"([^"]+)"', webpage, 'json URL')
+        config = self._download_json(json_url, video_id)
+
+        info = {
+            'id': video_id,
+            'thumbnail': config.get('poster'),
+            'timestamp': parse_iso8601(get_meta_content('date', webpage)),
+            'description': self._og_search_description(webpage),
+        }
+
+        title = get_meta_content('fulltitle', webpage)
+        if title:
+            info['title'] = title
+        elif config.get('title'):
+            info['title'] = config['title']
+        else:
+            info['title'] = self._og_search_title(webpage)
+
+        formats = []
+        for t, rs in config['formats'].items():
+            if not rs or not hasattr(rs, 'items'):
+                self._downloader.report_warning(
+                    'formats: {0}: no resolutions'.format(t))
+                continue
+
+            for height_str, obj in rs.items():
+                format_id = '{0}_{1}'.format(t, height_str)
+
+                if not obj or not obj.get('url'):
+                    self._downloader.report_warning(
+                        'formats: {0}: no url'.format(format_id))
+                    continue
+
+                formats.append({
+                    'url': obj['url'],
+                    'format_id': format_id,
+                    'height': self._int(height_str, 'height'),
+                })
+
+        self._sort_formats(formats)
+        info['formats'] = formats
+
+        return info
--- a/youtube_dl/extractor/muenchentv.py
+++ b/youtube_dl/extractor/muenchentv.py
@@ -22,6 +22,7 @@ class MuenchenTVIE(InfoExtractor):
            'ext': 'mp4',
            'title': 're:^münchen.tv-Livestream [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
            'is_live': True,
+            'thumbnail': 're:^https?://.*\.jpg$'
        },
        'params': {
            'skip_download': True,
@@ -70,5 +71,6 @@ class MuenchenTVIE(InfoExtractor):
            'title': title,
            'formats': formats,
            'is_live': True,
+            'thumbnail': thumbnail,
        }

--- a/youtube_dl/extractor/played.py
+++ b/youtube_dl/extractor/played.py
@@ -0,0 +1,55 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+import os.path
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_urllib_parse,
+    compat_urllib_request,
+)
+
+
+class PlayedIE(InfoExtractor):
+    IE_NAME = 'played.to'
+    _VALID_URL = r'https?://(?:www\.)?played\.to/(?P<id>[a-zA-Z0-9_-]+)'
+
+    _TEST = {
+        'url': 'http://played.to/j2f2sfiiukgt',
+        'md5': 'c2bd75a368e82980e7257bf500c00637',
+        'info_dict': {
+            'id': 'j2f2sfiiukgt',
+            'ext': 'flv',
+            'title': 'youtube-dl_test_video.mp4',
+        },
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        orig_webpage = self._download_webpage(url, video_id)
+        fields = re.findall(
+            r'type="hidden" name="([^"]+)"\s+value="([^"]+)">', orig_webpage)
+        data = dict(fields)
+
+        self._sleep(2, video_id)
+
+        post = compat_urllib_parse.urlencode(data)
+        headers = {
+            b'Content-Type': b'application/x-www-form-urlencoded',
+        }
+        req = compat_urllib_request.Request(url, post, headers)
+        webpage = self._download_webpage(
+            req, video_id, note='Downloading video page ...')
+
+        title = os.path.splitext(data['fname'])[0]
+
+        video_url = self._search_regex(
+            r'file: "?(.+?)",', webpage, 'video URL')
+
+        return {
+            'id': video_id,
+            'title': title,
+            'url': video_url,
+        }
--- a/youtube_dl/extractor/vevo.py
+++ b/youtube_dl/extractor/vevo.py
@@ -5,7 +5,6 @@ import xml.etree.ElementTree

 from .common import InfoExtractor
 from ..utils import (
-    compat_HTTPError,
    compat_urllib_request,
    ExtractorError,
 )
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@@ -15,6 +15,7 @@ from ..utils import (
    get_element_by_attribute,
    ExtractorError,
    RegexNotFoundError,
+    smuggle_url,
    std_headers,
    unsmuggle_url,
    urlencode_postdata,
@@ -529,3 +530,35 @@ class VimeoWatchLaterIE(VimeoBaseInfoExtractor, VimeoChannelIE):

    def _real_extract(self, url):
        return self._extract_videos('watchlater', 'https://vimeo.com/home/watchlater')
+
+
+class VimeoLikesIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?vimeo\.com/user(?P<id>[0-9]+)/likes(?:$|[?#])'
+    IE_NAME = 'vimeo:likes'
+    IE_DESC = 'Vimeo user likes'
+    _TEST = {
+        'url': 'https://vimeo.com/user20132939/likes',
+        'playlist_mincount': 4,
+        'add_ies': ['Generic'],
+        "info_dict": {
+            "description": "Videos Philipp Hagemeister likes on Vimeo.",
+            "title": "Vimeo / Philipp Hagemeister's likes",
+        },
+        'params': {
+            'extract_flat': False,
+        },
+    }
+
+    def _real_extract(self, url):
+        user_id = self._match_id(url)
+        rss_url = '%s//vimeo.com/user%s/likes/rss' % (
+            self.http_scheme(), user_id)
+        surl = smuggle_url(rss_url, {
+            'force_videoid': '%s_likes' % user_id,
+            'to_generic': True,
+        })
+
+        return {
+            '_type': 'url',
+            'url': surl,
+        }
--- a/youtube_dl/extractor/youku.py
+++ b/youtube_dl/extractor/youku.py
@@ -2,7 +2,6 @@

 from __future__ import unicode_literals

-import json
 import math
 import random
 import re
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@

-__version__ = '2014.09.28'
+__version__ = '2014.09.28.1'
Author	SHA1	Message	Date
Philipp Hagemeister	22dd3fad86	release 2014.09.28.1	2014-09-28 12:14:25 +02:00
Philipp Hagemeister	d6e6a42256	[vimeo:likes] Add new extractor (Fixes #3835 )	2014-09-28 12:14:16 +02:00
Philipp Hagemeister	76e7d1e74b	[played] Remove unused import	2014-09-28 10:56:36 +02:00
Philipp Hagemeister	38c4d41b74	[played] Simplify (#3798 )	2014-09-28 10:55:27 +02:00
Philipp Hagemeister	f0b8e3607d	Merge remote-tracking branch 'r4mos/played'	2014-09-28 10:52:23 +02:00
Philipp Hagemeister	51ee08c4bb	Remove unused imports	2014-09-28 10:50:43 +02:00
Philipp Hagemeister	c841789772	[muenchentv] Add thumbnail	2014-09-28 10:49:58 +02:00
Philipp Hagemeister	c121a75b36	[heise] Add support for description	2014-09-28 10:49:12 +02:00
Philipp Hagemeister	5a8b77551d	[heise] Simplify (#3842 )	2014-09-28 10:47:25 +02:00
Philipp Hagemeister	0217aee154	Merge remote-tracking branch 'd912e3/heise'	2014-09-28 10:36:44 +02:00
Philipp Hagemeister	b14f3a4c1d	[golem] Simplify (#3828 )	2014-09-28 10:35:19 +02:00
Philipp Hagemeister	92f7963f6e	Merge remote-tracking branch 'd912e3/golem'	2014-09-28 10:10:34 +02:00
Mats	7b7518124e	[heise] Don't check string type Before Python 3 could be unicode, so don't check at all.	2014-09-27 21:12:23 +02:00
Mats	70752ccefd	[golem] Don't omit positional argument specifiers Required by Python 2.6.	2014-09-27 19:35:55 +02:00
Mats	0155549d6c	[heise] Add new extractor	2014-09-27 19:28:01 +02:00
Mats	6a5af6acb9	[golem] Add new extractor	2014-09-25 16:25:53 +02:00
Carlos Ramos	5aa38e75b2	[played] Add new extractor	2014-09-19 22:46:57 +02:00