release 2014.08.05

Credit @matrixik for #3441
Merge pull request #3441 from matrixik/patch-1
2025-08-03 02:50:01 -05:00 · 2014-08-05 17:02:47 +02:00 · 2014-08-05 19:09:11 +07:00 · 2014-08-05 19:07:05 +07:00 · 2014-08-04 21:37:36 +02:00 · 2014-08-04 00:04:47 +07:00
31 changed files with 837 additions and 133 deletions
--- a/README.md
+++ b/README.md
@@ -46,15 +46,15 @@ which means you can modify it, redistribute it or use it however you like.
                                     an empty string (--proxy "") for direct
                                     connection
    --socket-timeout None            Time to wait before giving up, in seconds
-    --bidi-workaround                Work around terminals that lack
-                                     bidirectional text support. Requires bidiv
-                                     or fribidi executable in PATH
    --default-search PREFIX          Use this prefix for unqualified URLs. For
                                     example "gvsearch2:" downloads two videos
                                     from google videos for  youtube-dl "large
                                     apple". Use the value "auto" to let
-                                     youtube-dl guess. The default value "error"
-                                     just throws an error.
+                                     youtube-dl guess ("auto_warning" to emit a
+                                     warning when guessing). "error" just throws
+                                     an error. The default value "fixup_error"
+                                     repairs broken URLs, but emits an error if
+                                     this is not possible instead of searching.
    --ignore-config                  Do not read configuration files. When given
                                     in the global configuration file /etc
                                     /youtube-dl.conf: do not read the user
@@ -213,6 +213,9 @@ which means you can modify it, redistribute it or use it however you like.
    --add-header FIELD:VALUE         specify a custom HTTP header and its value,
                                     separated by a colon ':'. You can use this
                                     option multiple times
+    --bidi-workaround                Work around terminals that lack
+                                     bidirectional text support. Requires bidiv
+                                     or fribidi executable in PATH

 ## Video Format Options:
    -f, --format FORMAT              video format code, specify the order of
--- a/test/test_playlists.py
+++ b/test/test_playlists.py
@@ -193,10 +193,10 @@ class TestPlaylists(unittest.TestCase):
    def test_bandcamp_album(self):
        dl = FakeYDL()
        ie = BandcampAlbumIE(dl)
-        result = ie.extract('http://mpallante.bandcamp.com/album/nightmare-night-ep')
+        result = ie.extract('http://nightbringer.bandcamp.com/album/hierophany-of-the-open-grave')
        self.assertIsPlaylist(result)
-        self.assertEqual(result['title'], 'Nightmare Night EP')
-        assertGreaterEqual(self, len(result['entries']), 4)
+        self.assertEqual(result['title'], 'Hierophany of the Open Grave')
+        assertGreaterEqual(self, len(result['entries']), 9)
        
    def test_smotri_community(self):
        dl = FakeYDL()
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -275,7 +275,7 @@ class YoutubeDL(object):
            return message

        assert hasattr(self, '_output_process')
-        assert type(message) == type('')
+        assert isinstance(message, compat_str)
        line_count = message.count('\n') + 1
        self._output_process.stdin.write((message + '\n').encode('utf-8'))
        self._output_process.stdin.flush()
@@ -303,7 +303,7 @@ class YoutubeDL(object):

    def to_stderr(self, message):
        """Print message to stderr."""
-        assert type(message) == type('')
+        assert isinstance(message, compat_str)
        if self.params.get('logger'):
            self.params['logger'].error(message)
        else:
@@ -849,7 +849,7 @@ class YoutubeDL(object):
        # Keep for backwards compatibility
        info_dict['stitle'] = info_dict['title']

-        if not 'format' in info_dict:
+        if 'format' not in info_dict:
            info_dict['format'] = info_dict['ext']

        reason = self._match_entry(info_dict)
--- a/youtube_dl/init.py
+++ b/youtube_dl/init.py
@@ -66,6 +66,7 @@ __authors__  = (
    'Naglis Jonaitis',
    'Charles Chen',
    'Hassaan Ali',
+    'Dobrosław Żybort',
 )

 __license__ = 'Public Domain'
@@ -252,13 +253,10 @@ def parseOpts(overrideArguments=None):
    general.add_option(
        '--socket-timeout', dest='socket_timeout',
        type=float, default=None, help=u'Time to wait before giving up, in seconds')
-    general.add_option(
-        '--bidi-workaround', dest='bidi_workaround', action='store_true',
-        help=u'Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH')
    general.add_option(
        '--default-search',
        dest='default_search', metavar='PREFIX',
-        help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for  youtube-dl "large apple". Use the value "auto" to let youtube-dl guess. The default value "error" just throws an error.')
+        help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for  youtube-dl "large apple". Use the value "auto" to let youtube-dl guess ("auto_warning" to emit a warning when guessing). "error" just throws an error. The default value "fixup_error" repairs broken URLs, but emits an error if this is not possible instead of searching.')
    general.add_option(
        '--ignore-config',
        action='store_true',
@@ -386,6 +384,9 @@ def parseOpts(overrideArguments=None):
        dest='headers', action='append',
        help='specify a custom HTTP header and its value, separated by a colon \':\'. You can use this option multiple times',
    )
+    workarounds.add_option(
+        '--bidi-workaround', dest='bidi_workaround', action='store_true',
+        help=u'Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH')

    verbosity.add_option('-q', '--quiet',
            action='store_true', dest='quiet', help='activates quiet mode', default=False)
@@ -709,7 +710,7 @@ def _real_main(argv=None):
        date = DateRange.day(opts.date)
    else:
        date = DateRange(opts.dateafter, opts.datebefore)
-    if opts.default_search not in ('auto', 'auto_warning', None) and ':' not in opts.default_search:
+    if opts.default_search not in ('auto', 'auto_warning', 'error', 'fixup_error', None) and ':' not in opts.default_search:
        parser.error(u'--default-search invalid; did you forget a colon (:) at the end?')

    # Do not download videos when there are audio-only formats
--- a/youtube_dl/downloader/f4m.py
+++ b/youtube_dl/downloader/f4m.py
@@ -220,6 +220,7 @@ class F4mFD(FileDownloader):

    def real_download(self, filename, info_dict):
        man_url = info_dict['url']
+        requested_bitrate = info_dict.get('tbr')
        self.to_screen('[download] Downloading f4m manifest')
        manifest = self.ydl.urlopen(man_url).read()
        self.report_destination(filename)
@@ -233,8 +234,14 @@ class F4mFD(FileDownloader):

        doc = etree.fromstring(manifest)
        formats = [(int(f.attrib.get('bitrate', -1)), f) for f in doc.findall(_add_ns('media'))]
-        formats = sorted(formats, key=lambda f: f[0])
-        rate, media = formats[-1]
+        if requested_bitrate is None:
+            # get the best format
+            formats = sorted(formats, key=lambda f: f[0])
+            rate, media = formats[-1]
+        else:
+            rate, media = list(filter(
+                lambda f: int(f[0]) == requested_bitrate, formats))[0]
+
        base_url = compat_urlparse.urljoin(man_url, media.attrib['url'])
        bootstrap = base64.b64decode(doc.find(_add_ns('bootstrapInfo')).text)
        metadata = base64.b64decode(media.find(_add_ns('metadata')).text)
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@@ -1,3 +1,4 @@
+from .abc import ABCIE
 from .academicearth import AcademicEarthCourseIE
 from .addanime import AddAnimeIE
 from .adultswim import AdultSwimIE
@@ -111,9 +112,11 @@ from .funnyordie import FunnyOrDieIE
 from .gamekings import GamekingsIE
 from .gameone import GameOneIE
 from .gamespot import GameSpotIE
+from .gamestar import GameStarIE
 from .gametrailers import GametrailersIE
 from .gdcvault import GDCVaultIE
 from .generic import GenericIE
+from .godtube import GodTubeIE
 from .googleplus import GooglePlusIE
 from .googlesearch import GoogleSearchIE
 from .gorillavid import GorillaVidIE
@@ -140,6 +143,7 @@ from .ivi import (
    IviIE,
    IviCompilationIE
 )
+from .izlesene import IzleseneIE
 from .jadorecettepub import JadoreCettePubIE
 from .jeuxvideo import JeuxVideoIE
 from .jukebox import JukeboxIE
@@ -322,6 +326,8 @@ from .tumblr import TumblrIE
 from .tutv import TutvIE
 from .tvigle import TvigleIE
 from .tvp import TvpIE
+from .tvplay import TVPlayIE
+from .ubu import UbuIE
 from .udemy import (
    UdemyIE,
    UdemyCourseIE
@@ -343,6 +349,7 @@ from .videofyme import VideofyMeIE
 from .videopremium import VideoPremiumIE
 from .videott import VideoTtIE
 from .videoweed import VideoWeedIE
+from .vidme import VidmeIE
 from .vimeo import (
    VimeoIE,
    VimeoChannelIE,
--- a/youtube_dl/extractor/abc.py
+++ b/youtube_dl/extractor/abc.py
@@ -0,0 +1,48 @@
+from __future__ import unicode_literals
+
+import re
+import json
+
+from .common import InfoExtractor
+
+
+class ABCIE(InfoExtractor):
+    IE_NAME = 'abc.net.au'
+    _VALID_URL = r'http://www\.abc\.net\.au/news/[^/]+/[^/]+/(?P<id>\d+)'
+
+    _TEST = {
+        'url': 'http://www.abc.net.au/news/2014-07-25/bringing-asylum-seekers-to-australia-would-give/5624716',
+        'md5': 'dad6f8ad011a70d9ddf887ce6d5d0742',
+        'info_dict': {
+            'id': '5624716',
+            'ext': 'mp4',
+            'title': 'Bringing asylum seekers to Australia would give them right to asylum claims: professor',
+            'description': 'md5:ba36fa5e27e5c9251fd929d339aea4af',
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        webpage = self._download_webpage(url, video_id)
+
+        urls_info_json = self._search_regex(
+            r'inlineVideoData\.push\((.*?)\);', webpage, 'video urls',
+            flags=re.DOTALL)
+        urls_info = json.loads(urls_info_json.replace('\'', '"'))
+        formats = [{
+            'url': url_info['url'],
+            'width': int(url_info['width']),
+            'height': int(url_info['height']),
+            'tbr': int(url_info['bitrate']),
+            'filesize': int(url_info['filesize']),
+        } for url_info in urls_info]
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': self._og_search_title(webpage),
+            'formats': formats,
+            'description': self._og_search_description(webpage),
+            'thumbnail': self._og_search_thumbnail(webpage),
+        }
--- a/youtube_dl/extractor/ard.py
+++ b/youtube_dl/extractor/ard.py
@@ -8,6 +8,8 @@ from ..utils import (
    determine_ext,
    ExtractorError,
    qualities,
+    compat_urllib_parse_urlparse,
+    compat_urllib_parse,
 )


@@ -44,6 +46,9 @@ class ARDIE(InfoExtractor):
        else:
            video_id = m.group('video_id')

+        urlp = compat_urllib_parse_urlparse(url)
+        url = urlp._replace(path=compat_urllib_parse.quote(urlp.path.encode('utf-8'))).geturl()
+
        webpage = self._download_webpage(url, video_id)

        title = self._html_search_regex(
--- a/youtube_dl/extractor/blinkx.py
+++ b/youtube_dl/extractor/blinkx.py
@@ -52,7 +52,7 @@ class BlinkxIE(InfoExtractor):
                    'height': int(m['h']),
                })
            elif m['type'] == 'original':
-                duration = m['d']
+                duration = float(m['d'])
            elif m['type'] == 'youtube':
                yt_id = m['link']
                self.to_screen('Youtube video detected: %s' % yt_id)
--- a/youtube_dl/extractor/bloomberg.py
+++ b/youtube_dl/extractor/bloomberg.py
@@ -10,7 +10,7 @@ class BloombergIE(InfoExtractor):

    _TEST = {
        'url': 'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html',
-        'md5': '7bf08858ff7c203c870e8a6190e221e5',
+        # The md5 checksum changes
        'info_dict': {
            'id': 'qurhIVlJSB6hzkVi229d8g',
            'ext': 'flv',
@@ -31,8 +31,7 @@ class BloombergIE(InfoExtractor):
        return {
            'id': name.split('-')[-1],
            'title': title,
-            'url': f4m_url,
-            'ext': 'flv',
+            'formats': self._extract_f4m_formats(f4m_url, name),
            'description': self._og_search_description(webpage),
            'thumbnail': self._og_search_thumbnail(webpage),
        }
--- a/youtube_dl/extractor/br.py
+++ b/youtube_dl/extractor/br.py
@@ -7,6 +7,7 @@ from .common import InfoExtractor
 from ..utils import (
    ExtractorError,
    int_or_none,
+    parse_duration,
 )


@@ -22,8 +23,9 @@ class BRIE(InfoExtractor):
            'info_dict': {
                'id': '25e279aa-1ffd-40fd-9955-5325bd48a53a',
                'ext': 'mp4',
-                'title': 'Am 1. und 2. August in Oberammergau',
-                'description': 'md5:dfd224e5aa6819bc1fcbb7826a932021',
+                'title': 'Wenn das Traditions-Theater wackelt',
+                'description': 'Heimatsound-Festival 2014: Wenn das Traditions-Theater wackelt',
+                'duration': 34,
            }
        },
        {
@@ -34,6 +36,7 @@ class BRIE(InfoExtractor):
                'ext': 'mp4',
                'title': 'Über den Pass',
                'description': 'Die Eroberung der Alpen: Über den Pass',
+                'duration': 2588,
            }
        },
        {
@@ -44,6 +47,7 @@ class BRIE(InfoExtractor):
                'ext': 'aac',
                'title': '"Keine neuen Schulden im nächsten Jahr"',
                'description': 'Haushaltsentwurf: "Keine neuen Schulden im nächsten Jahr"',
+                'duration': 64,
            }
        },
        {
@@ -54,6 +58,7 @@ class BRIE(InfoExtractor):
                'ext': 'mp4',
                'title': 'Umweltbewusster Häuslebauer',
                'description': 'Uwe Erdelt: Umweltbewusster Häuslebauer',
+                'duration': 116,
            }
        },
        {
@@ -64,6 +69,7 @@ class BRIE(InfoExtractor):
                'ext': 'mp4',
                'title': 'Folge 1 - Metaphysik',
                'description': 'Kant für Anfänger: Folge 1 - Metaphysik',
+                'duration': 893,
                'uploader': 'Eva Maria Steimle',
                'upload_date': '20140117',
            }
@@ -84,6 +90,7 @@ class BRIE(InfoExtractor):
            media = {
                'id': xml_media.get('externalId'),
                'title': xml_media.find('title').text,
+                'duration': parse_duration(xml_media.find('duration').text),
                'formats': self._extract_formats(xml_media.find('assets')),
                'thumbnails': self._extract_thumbnails(xml_media.find('teaserImage/variants')),
                'description': ' '.join(xml_media.find('shareTitle').text.splitlines()),
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -18,6 +18,7 @@ from ..utils import (
    clean_html,
    compiled_regex_type,
    ExtractorError,
+    int_or_none,
    RegexNotFoundError,
    sanitize_filename,
    unescapeHTML,
@@ -373,7 +374,8 @@ class InfoExtractor(object):
        else:
            for p in pattern:
                mobj = re.search(p, string, flags)
-                if mobj: break
+                if mobj:
+                    break

        if os.name != 'nt' and sys.stderr.isatty():
            _name = u'\033[0;34m%s\033[0m' % name
@@ -589,6 +591,24 @@ class InfoExtractor(object):
        self.to_screen(msg)
        time.sleep(timeout)

+    def _extract_f4m_formats(self, manifest_url, video_id):
+        manifest = self._download_xml(
+            manifest_url, video_id, 'Downloading f4m manifest',
+            'Unable to download f4m manifest')
+
+        formats = []
+        for media_el in manifest.findall('{http://ns.adobe.com/f4m/1.0}media'):
+            formats.append({
+                'url': manifest_url,
+                'ext': 'flv',
+                'tbr': int_or_none(media_el.attrib.get('bitrate')),
+                'width': int_or_none(media_el.attrib.get('width')),
+                'height': int_or_none(media_el.attrib.get('height')),
+            })
+        self._sort_formats(formats)
+
+        return formats
+

 class SearchInfoExtractor(InfoExtractor):
    """
--- a/youtube_dl/extractor/francetv.py
+++ b/youtube_dl/extractor/francetv.py
@@ -19,17 +19,35 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
            + video_id, video_id, 'Downloading XML config')

        manifest_url = info.find('videos/video/url').text
-        video_url = manifest_url.replace('manifest.f4m', 'index_2_av.m3u8')
-        video_url = video_url.replace('/z/', '/i/')
+        manifest_url = manifest_url.replace('/z/', '/i/')
+        
+        if manifest_url.startswith('rtmp'):
+            formats = [{'url': manifest_url, 'ext': 'flv'}]
+        else:
+            formats = []
+            available_formats = self._search_regex(r'/[^,]*,(.*?),k\.mp4', manifest_url, 'available formats')
+            for index, format_descr in enumerate(available_formats.split(',')):
+                format_info = {
+                    'url': manifest_url.replace('manifest.f4m', 'index_%d_av.m3u8' % index),
+                    'ext': 'mp4',
+                }
+                m_resolution = re.search(r'(?P<width>\d+)x(?P<height>\d+)', format_descr)
+                if m_resolution is not None:
+                    format_info.update({
+                        'width': int(m_resolution.group('width')),
+                        'height': int(m_resolution.group('height')),
+                    })
+                formats.append(format_info)
+
        thumbnail_path = info.find('image').text

-        return {'id': video_id,
-                'ext': 'flv' if video_url.startswith('rtmp') else 'mp4',
-                'url': video_url,
-                'title': info.find('titre').text,
-                'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', thumbnail_path),
-                'description': info.find('synopsis').text,
-                }
+        return {
+            'id': video_id,
+            'title': info.find('titre').text,
+            'formats': formats,
+            'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', thumbnail_path),
+            'description': info.find('synopsis').text,
+        }


 class PluzzIE(FranceTVBaseInfoExtractor):
--- a/youtube_dl/extractor/gamestar.py
+++ b/youtube_dl/extractor/gamestar.py
@@ -0,0 +1,74 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    int_or_none,
+    parse_duration,
+    str_to_int,
+    unified_strdate,
+)
+
+
+class GameStarIE(InfoExtractor):
+    _VALID_URL = r'http://www\.gamestar\.de/videos/.*,(?P<id>[0-9]+)\.html'
+    _TEST = {
+        'url': 'http://www.gamestar.de/videos/trailer,3/hobbit-3-die-schlacht-der-fuenf-heere,76110.html',
+        'md5': '96974ecbb7fd8d0d20fca5a00810cea7',
+        'info_dict': {
+            'id': '76110',
+            'ext': 'mp4',
+            'title': 'Hobbit 3: Die Schlacht der Fünf Heere - Teaser-Trailer zum dritten Teil',
+            'description': 'Der Teaser-Trailer zu Hobbit 3: Die Schlacht der Fünf Heere zeigt einige Szenen aus dem dritten Teil der Saga und kündigt den vollständigen Trailer an.',
+            'thumbnail': 'http://images.gamestar.de/images/idgwpgsgp/bdb/2494525/600x.jpg',
+            'upload_date': '20140728',
+            'duration': 17
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, video_id)
+
+        og_title = self._og_search_title(webpage)
+        title = og_title.replace(' - Video bei GameStar.de', '').strip()
+
+        url = 'http://gamestar.de/_misc/videos/portal/getVideoUrl.cfm?premium=0&videoId=' + video_id
+
+        description = self._og_search_description(webpage).strip()
+
+        thumbnail = self._proto_relative_url(
+            self._og_search_thumbnail(webpage), scheme='http:')
+
+        upload_date = unified_strdate(self._html_search_regex(
+            r'<span style="float:left;font-size:11px;">Datum: ([0-9]+\.[0-9]+\.[0-9]+)&nbsp;&nbsp;',
+            webpage, 'upload_date', fatal=False))
+
+        duration = parse_duration(self._html_search_regex(
+            r'&nbsp;&nbsp;Länge: ([0-9]+:[0-9]+)</span>', webpage, 'duration',
+            fatal=False))
+
+        view_count = str_to_int(self._html_search_regex(
+            r'&nbsp;&nbsp;Zuschauer: ([0-9\.]+)&nbsp;&nbsp;', webpage,
+            'view_count', fatal=False))
+
+        comment_count = int_or_none(self._html_search_regex(
+            r'>Kommentieren \(([0-9]+)\)</a>', webpage, 'comment_count',
+            fatal=False))
+
+        return {
+            'id': video_id,
+            'title': title,
+            'url': url,
+            'ext': 'mp4',
+            'thumbnail': thumbnail,
+            'description': description,
+            'upload_date': upload_date,
+            'duration': duration,
+            'view_count': view_count,
+            'comment_count': comment_count
+        }
--- a/youtube_dl/extractor/gdcvault.py
+++ b/youtube_dl/extractor/gdcvault.py
@@ -8,6 +8,7 @@ from ..utils import (
    compat_urllib_request,
 )

+
 class GDCVaultIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?gdcvault\.com/play/(?P<id>\d+)/(?P<name>(\w|-)+)'
    _TESTS = [
@@ -31,6 +32,15 @@ class GDCVaultIE(InfoExtractor):
                'skip_download': True,  # Requires rtmpdump
            }
        },
+        {
+            'url': 'http://www.gdcvault.com/play/1015301/Thexder-Meets-Windows-95-or',
+            'md5': 'a5eb77996ef82118afbbe8e48731b98e',
+            'info_dict': {
+                'id': '1015301',
+                'ext': 'flv',
+                'title': 'Thexder Meets Windows 95, or Writing Great Games in the Windows 95 Environment',
+            }
+        }
    ]

    def _parse_mp4(self, xml_description):
@@ -103,18 +113,40 @@ class GDCVaultIE(InfoExtractor):
        webpage_url = 'http://www.gdcvault.com/play/' + video_id
        start_page = self._download_webpage(webpage_url, video_id)

-        xml_root = self._html_search_regex(r'<iframe src="(?P<xml_root>.*?)player.html.*?".*?</iframe>', start_page, 'xml root', None, False)
+        direct_url = self._search_regex(
+            r's1\.addVariable\("file",\s*encodeURIComponent\("(/[^"]+)"\)\);',
+            start_page, 'url', default=None)
+        if direct_url:
+            video_url = 'http://www.gdcvault.com/' + direct_url
+            title = self._html_search_regex(
+                r'<td><strong>Session Name</strong></td>\s*<td>(.*?)</td>',
+                start_page, 'title')

+            return {
+                'id': video_id,
+                'url': video_url,
+                'ext': 'flv',
+                'title': title,
+            }
+
+        xml_root = self._html_search_regex(
+            r'<iframe src="(?P<xml_root>.*?)player.html.*?".*?</iframe>',
+            start_page, 'xml root', default=None)
        if xml_root is None:
            # Probably need to authenticate
-            start_page = self._login(webpage_url, video_id)
-            if start_page is None:
+            login_res = self._login(webpage_url, video_id)
+            if login_res is None:
                self.report_warning('Could not login.')
            else:
+                start_page = login_res
                # Grab the url from the authenticated page
-                xml_root = self._html_search_regex(r'<iframe src="(?P<xml_root>.*?)player.html.*?".*?</iframe>', start_page, 'xml root')
+                xml_root = self._html_search_regex(
+                    r'<iframe src="(.*?)player.html.*?".*?</iframe>',
+                    start_page, 'xml root')

-        xml_name = self._html_search_regex(r'<iframe src=".*?\?xml=(?P<xml_file>.+?\.xml).*?".*?</iframe>', start_page, 'xml filename', None, False)
+        xml_name = self._html_search_regex(
+            r'<iframe src=".*?\?xml=(.+?\.xml).*?".*?</iframe>',
+            start_page, 'xml filename', default=None)
        if xml_name is None:
            # Fallback to the older format
            xml_name = self._html_search_regex(r'<iframe src=".*?\?xmlURL=xml/(?P<xml_file>.+?\.xml).*?".*?</iframe>', start_page, 'xml filename')
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -383,13 +383,13 @@ class GenericIE(InfoExtractor):
        if not parsed_url.scheme:
            default_search = self._downloader.params.get('default_search')
            if default_search is None:
-                default_search = 'error'
+                default_search = 'fixup_error'

-            if default_search in ('auto', 'auto_warning'):
+            if default_search in ('auto', 'auto_warning', 'fixup_error'):
                if '/' in url:
                    self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
                    return self.url_result('http://' + url)
-                else:
+                elif default_search != 'fixup_error':
                    if default_search == 'auto_warning':
                        if re.match(r'^(?:url|URL)$', url):
                            raise ExtractorError(
@@ -399,7 +399,8 @@ class GenericIE(InfoExtractor):
                            self._downloader.report_warning(
                                'Falling back to youtube search for  %s . Set --default-search "auto" to suppress this warning.' % url)
                    return self.url_result('ytsearch:' + url)
-            elif default_search == 'error':
+
+            if default_search in ('error', 'fixup_error'):
                raise ExtractorError(
                    ('%r is not a valid URL. '
                     'Set --default-search "ytsearch" (or run  youtube-dl "ytsearch:%s" ) to search YouTube'
--- a/youtube_dl/extractor/godtube.py
+++ b/youtube_dl/extractor/godtube.py
@@ -0,0 +1,58 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    parse_duration,
+    parse_iso8601,
+)
+
+
+class GodTubeIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?godtube\.com/watch/\?v=(?P<id>[\da-zA-Z]+)'
+    _TESTS = [
+        {
+            'url': 'https://www.godtube.com/watch/?v=0C0CNNNU',
+            'md5': '77108c1e4ab58f48031101a1a2119789',
+            'info_dict': {
+                'id': '0C0CNNNU',
+                'ext': 'mp4',
+                'title': 'Woman at the well.',
+                'duration': 159,
+                'timestamp': 1205712000,
+                'uploader': 'beverlybmusic',
+                'upload_date': '20080317',
+                'thumbnail': 're:^https?://.*\.jpg$',
+            },
+        },
+    ]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        config = self._download_xml(
+            'http://www.godtube.com/resource/mediaplayer/%s.xml' % video_id.lower(),
+            video_id, 'Downloading player config XML')
+
+        video_url = config.find('.//file').text
+        uploader = config.find('.//author').text
+        timestamp = parse_iso8601(config.find('.//date').text)
+        duration = parse_duration(config.find('.//duration').text)
+        thumbnail = config.find('.//image').text
+
+        media = self._download_xml(
+            'http://www.godtube.com/media/xml/?v=%s' % video_id, video_id, 'Downloading media XML')
+
+        title = media.find('.//title').text
+
+        return {
+            'id': video_id,
+            'url': video_url,
+            'title': title,
+            'thumbnail': thumbnail,
+            'timestamp': timestamp,
+            'uploader': uploader,
+            'duration': duration,
+        }
--- a/youtube_dl/extractor/izlesene.py
+++ b/youtube_dl/extractor/izlesene.py
@@ -0,0 +1,97 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    get_element_by_id,
+    parse_iso8601,
+    determine_ext,
+    int_or_none,
+    str_to_int,
+)
+
+
+class IzleseneIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:(?:www|m)\.)?izlesene\.com/(?:video|embedplayer)/(?:[^/]+/)?(?P<id>[0-9]+)'
+    _STREAM_URL = 'http://panel.izlesene.com/api/streamurl/{id:}/{format:}'
+    _TEST = {
+        'url': 'http://www.izlesene.com/video/sevincten-cildirtan-dogum-gunu-hediyesi/7599694',
+        'md5': '4384f9f0ea65086734b881085ee05ac2',
+        'info_dict': {
+            'id': '7599694',
+            'ext': 'mp4',
+            'title': 'Sevinçten Çıldırtan Doğum Günü Hediyesi',
+            'description': 'Annesi oğluna doğum günü hediyesi olarak minecraft cd si alıyor, ve çocuk hunharca seviniyor',
+            'thumbnail': 're:^http://.*\.jpg',
+            'uploader_id': 'pelikzzle',
+            'timestamp': 1404298698,
+            'upload_date': '20140702',
+            'duration': 95.395,
+            'age_limit': 0,
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        url = 'http://www.izlesene.com/video/%s' % video_id
+
+        webpage = self._download_webpage(url, video_id)
+
+        title = self._og_search_title(webpage)
+        description = self._og_search_description(webpage)
+        thumbnail = self._og_search_thumbnail(webpage)
+
+        uploader = self._html_search_regex(
+            r"adduserUsername\s*=\s*'([^']+)';", webpage, 'uploader', fatal=False, default='')
+        timestamp = parse_iso8601(self._html_search_meta(
+            'uploadDate', webpage, 'upload date', fatal=False))
+
+        duration = int_or_none(self._html_search_regex(
+            r'"videoduration"\s*:\s*"([^"]+)"', webpage, 'duration', fatal=False))
+        if duration:
+            duration /= 1000.0
+
+        view_count = str_to_int(get_element_by_id('videoViewCount', webpage))
+        comment_count = self._html_search_regex(
+            r'comment_count\s*=\s*\'([^\']+)\';', webpage, 'uploader', fatal=False)
+
+        family_friendly = self._html_search_meta(
+            'isFamilyFriendly', webpage, 'age limit', fatal=False)
+
+        content_url = self._html_search_meta(
+            'contentURL', webpage, 'content URL', fatal=False)
+        ext = determine_ext(content_url, 'mp4')
+
+        # Might be empty for some videos.
+        qualities = self._html_search_regex(
+            r'"quality"\s*:\s*"([^"]+)"', webpage, 'qualities', fatal=False, default='')
+
+        formats = []
+        for quality in qualities.split('|'):
+            json = self._download_json(
+                self._STREAM_URL.format(id=video_id, format=quality), video_id,
+                note='Getting video URL for "%s" quality' % quality,
+                errnote='Failed to get video URL for "%s" quality' % quality
+            )
+            formats.append({
+                'url': json.get('streamurl'),
+                'ext': ext,
+                'format_id': '%sp' % quality if quality else 'sd',
+            })
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'uploader_id': uploader,
+            'timestamp': timestamp,
+            'duration': duration,
+            'view_count': int_or_none(view_count),
+            'comment_count': int_or_none(comment_count),
+            'age_limit': 18 if family_friendly == 'False' else 0,
+            'formats': formats,
+        }
--- a/youtube_dl/extractor/justintv.py
+++ b/youtube_dl/extractor/justintv.py
@@ -1,5 +1,6 @@
 from __future__ import unicode_literals

+import itertools
 import json
 import os
 import re
@@ -43,10 +44,11 @@ class JustinTVIE(InfoExtractor):
    }

    # Return count of items, list of *valid* items
-    def _parse_page(self, url, video_id):
-        info_json = self._download_webpage(url, video_id,
-                                           'Downloading video info JSON',
-                                           'unable to download video info JSON')
+    def _parse_page(self, url, video_id, counter):
+        info_json = self._download_webpage(
+            url, video_id,
+            'Downloading video info JSON on page %d' % counter,
+            'Unable to download video info JSON %d' % counter)

        response = json.loads(info_json)
        if type(response) != list:
@@ -138,11 +140,10 @@ class JustinTVIE(InfoExtractor):
        entries = []
        offset = 0
        limit = self._JUSTIN_PAGE_LIMIT
-        while True:
-            if paged:
-                self.report_download_page(video_id, offset)
+        for counter in itertools.count(1):
            page_url = api + ('?offset=%d&limit=%d' % (offset, limit))
-            page_count, page_info = self._parse_page(page_url, video_id)
+            page_count, page_info = self._parse_page(
+                page_url, video_id, counter)
            entries.extend(page_info)
            if not paged or page_count != limit:
                break
--- a/youtube_dl/extractor/livestream.py
+++ b/youtube_dl/extractor/livestream.py
@@ -5,11 +5,14 @@ import json

 from .common import InfoExtractor
 from ..utils import (
+    compat_str,
    compat_urllib_parse_urlparse,
    compat_urlparse,
-    xpath_with_ns,
-    compat_str,
+    ExtractorError,
+    find_xpath_attr,
+    int_or_none,
    orderedSet,
+    xpath_with_ns,
 )


@@ -24,20 +27,82 @@ class LivestreamIE(InfoExtractor):
            'ext': 'mp4',
            'title': 'Live from Webster Hall NYC',
            'upload_date': '20121012',
+            'like_count': int,
+            'view_count': int,
+            'thumbnail': 're:^http://.*\.jpg$'
        }
    }

+    def _parse_smil(self, video_id, smil_url):
+        formats = []
+        _SWITCH_XPATH = (
+            './/{http://www.w3.org/2001/SMIL20/Language}body/'
+            '{http://www.w3.org/2001/SMIL20/Language}switch')
+        smil_doc = self._download_xml(
+            smil_url, video_id,
+            note='Downloading SMIL information',
+            errnote='Unable to download SMIL information',
+            fatal=False)
+        if smil_doc is False:  # Download failed
+            return formats
+        title_node = find_xpath_attr(
+            smil_doc, './/{http://www.w3.org/2001/SMIL20/Language}meta',
+            'name', 'title')
+        if title_node is None:
+            self.report_warning('Cannot find SMIL id')
+            switch_node = smil_doc.find(_SWITCH_XPATH)
+        else:
+            title_id = title_node.attrib['content']
+            switch_node = find_xpath_attr(
+                smil_doc, _SWITCH_XPATH, 'id', title_id)
+        if switch_node is None:
+            raise ExtractorError('Cannot find switch node')
+        video_nodes = switch_node.findall(
+            '{http://www.w3.org/2001/SMIL20/Language}video')
+
+        for vn in video_nodes:
+            tbr = int_or_none(vn.attrib.get('system-bitrate'))
+            furl = (
+                'http://livestream-f.akamaihd.net/%s?v=3.0.3&fp=WIN%%2014,0,0,145' %
+                (vn.attrib['src']))
+            if 'clipBegin' in vn.attrib:
+                furl += '&ssek=' + vn.attrib['clipBegin']
+            formats.append({
+                'url': furl,
+                'format_id': 'smil_%d' % tbr,
+                'ext': 'flv',
+                'tbr': tbr,
+                'preference': -1000,
+            })
+        return formats
+
    def _extract_video_info(self, video_data):
-        video_url = (
-            video_data.get('progressive_url_hd') or
-            video_data.get('progressive_url')
+        video_id = compat_str(video_data['id'])
+
+        FORMAT_KEYS = (
+            ('sd', 'progressive_url'),
+            ('hd', 'progressive_url_hd'),
        )
+        formats = [{
+            'format_id': format_id,
+            'url': video_data[key],
+            'quality': i + 1,
+        } for i, (format_id, key) in enumerate(FORMAT_KEYS)
+            if video_data.get(key)]
+
+        smil_url = video_data.get('smil_url')
+        if smil_url:
+            formats.extend(self._parse_smil(video_id, smil_url))
+        self._sort_formats(formats)
+
        return {
-            'id': compat_str(video_data['id']),
-            'url': video_url,
+            'id': video_id,
+            'formats': formats,
            'title': video_data['caption'],
-            'thumbnail': video_data['thumbnail_url'],
+            'thumbnail': video_data.get('thumbnail_url'),
            'upload_date': video_data['updated_at'].replace('-', '')[:8],
+            'like_count': video_data.get('likes', {}).get('total'),
+            'view_count': video_data.get('views'),
        }

    def _real_extract(self, url):
--- a/youtube_dl/extractor/pbs.py
+++ b/youtube_dl/extractor/pbs.py
@@ -32,13 +32,21 @@ class PBSIE(InfoExtractor):
        },
    }

-    def _real_extract(self, url):
+    def _extract_ids(self, url):
        mobj = re.match(self._VALID_URL, url)

        presumptive_id = mobj.group('presumptive_id')
        display_id = presumptive_id
        if presumptive_id:
            webpage = self._download_webpage(url, display_id)
+
+            # frontline video embed
+            media_id = self._search_regex(
+                r"div\s*:\s*'videoembed'\s*,\s*mediaid\s*:\s*'(\d+)'",
+                webpage, 'frontline video ID', fatal=False, default=None)
+            if media_id:
+                return media_id, presumptive_id
+
            url = self._search_regex(
                r'<iframe\s+id=["\']partnerPlayer["\'].*?\s+src=["\'](.*?)["\']>',
                webpage, 'player URL')
@@ -57,6 +65,11 @@ class PBSIE(InfoExtractor):
            video_id = mobj.group('id')
            display_id = video_id

+        return video_id, display_id
+
+    def _real_extract(self, url):
+        video_id, display_id = self._extract_ids(url)
+
        info_url = 'http://video.pbs.org/videoInfo/%s?format=json' % video_id
        info = self._download_json(info_url, display_id)

--- a/youtube_dl/extractor/streamcloud.py
+++ b/youtube_dl/extractor/streamcloud.py
@@ -1,4 +1,6 @@
 # coding: utf-8
+from __future__ import unicode_literals
+
 import re
 import time

@@ -10,18 +12,18 @@ from ..utils import (


 class StreamcloudIE(InfoExtractor):
-    IE_NAME = u'streamcloud.eu'
+    IE_NAME = 'streamcloud.eu'
    _VALID_URL = r'https?://streamcloud\.eu/(?P<id>[a-zA-Z0-9_-]+)/(?P<fname>[^#?]*)\.html'

    _TEST = {
-        u'url': u'http://streamcloud.eu/skp9j99s4bpz/youtube-dl_test_video_____________-BaW_jenozKc.mp4.html',
-        u'file': u'skp9j99s4bpz.mp4',
-        u'md5': u'6bea4c7fa5daaacc2a946b7146286686',
-        u'info_dict': {
-            u'title': u'youtube-dl test video  \'/\\ ä ↭',
-            u'duration': 9,
+        'url': 'http://streamcloud.eu/skp9j99s4bpz/youtube-dl_test_video_____________-BaW_jenozKc.mp4.html',
+        'md5': '6bea4c7fa5daaacc2a946b7146286686',
+        'info_dict': {
+            'id': 'skp9j99s4bpz',
+            'ext': 'mp4',
+            'title': 'youtube-dl test video  \'/\\ ä ↭',
        },
-        u'skip': u'Only available from the EU'
+        'skip': 'Only available from the EU'
    }

    def _real_extract(self, url):
@@ -46,21 +48,17 @@ class StreamcloudIE(InfoExtractor):
        req = compat_urllib_request.Request(url, post, headers)

        webpage = self._download_webpage(
-            req, video_id, note=u'Downloading video page ...')
+            req, video_id, note='Downloading video page ...')
        title = self._html_search_regex(
-            r'<h1[^>]*>([^<]+)<', webpage, u'title')
+            r'<h1[^>]*>([^<]+)<', webpage, 'title')
        video_url = self._search_regex(
-            r'file:\s*"([^"]+)"', webpage, u'video URL')
-        duration_str = self._search_regex(
-            r'duration:\s*"?([0-9]+)"?', webpage, u'duration', fatal=False)
-        duration = None if duration_str is None else int(duration_str)
+            r'file:\s*"([^"]+)"', webpage, 'video URL')
        thumbnail = self._search_regex(
-            r'image:\s*"([^"]+)"', webpage, u'thumbnail URL', fatal=False)
+            r'image:\s*"([^"]+)"', webpage, 'thumbnail URL', fatal=False)

        return {
            'id': video_id,
            'title': title,
            'url': video_url,
-            'duration': duration,
            'thumbnail': thumbnail,
        }
--- a/youtube_dl/extractor/swrmediathek.py
+++ b/youtube_dl/extractor/swrmediathek.py
@@ -8,7 +8,7 @@ from ..utils import parse_duration


 class SWRMediathekIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?swrmediathek\.de/player\.htm\?show=(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
+    _VALID_URL = r'https?://(?:www\.)?swrmediathek\.de/(?:content/)?player\.htm\?show=(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'

    _TESTS = [{
        'url': 'http://swrmediathek.de/player.htm?show=849790d0-dab8-11e3-a953-0026b975f2e6',
@@ -52,6 +52,20 @@ class SWRMediathekIE(InfoExtractor):
            'uploader': 'SWR 2',
            'uploader_id': '284670',
        }
+    }, {
+        'url': 'http://swrmediathek.de/content/player.htm?show=52dc7e00-15c5-11e4-84bc-0026b975f2e6',
+        'md5': '881531487d0633080a8cc88d31ef896f',
+        'info_dict': {
+            'id': '52dc7e00-15c5-11e4-84bc-0026b975f2e6',
+            'ext': 'mp4',
+            'title': 'Familienspaß am Bodensee',
+            'description': 'md5:0b591225a32cfde7be1629ed49fe4315',
+            'thumbnail': 're:http://.*\.jpg',
+            'duration': 1784,
+            'upload_date': '20140727',
+            'uploader': 'SWR Fernsehen BW',
+            'uploader_id': '281130',
+        }
    }]

    def _real_extract(self, url):
--- a/youtube_dl/extractor/tvplay.py
+++ b/youtube_dl/extractor/tvplay.py
@@ -0,0 +1,85 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    parse_iso8601,
+    qualities,
+)
+
+
+class TVPlayIE(InfoExtractor):
+    _VALID_URL = r'http://(?:www\.)?tvplay\.lv/parraides/[^/]+/(?P<id>\d+)'
+    _TESTS = [
+        {
+            'url': 'http://www.tvplay.lv/parraides/vinas-melo-labak/418113?autostart=true',
+            'info_dict': {
+                'id': '418113',
+                'ext': 'flv',
+                'title': 'Kādi ir īri? - Viņas melo labāk',
+                'description': 'Baiba apsmej īrus, kādi tie ir un ko viņi dara.',
+                'duration': 25,
+                'timestamp': 1406097056,
+                'upload_date': '20140723',
+            },
+            'params': {
+                # rtmp download
+                'skip_download': True,
+            },
+        },
+    ]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        video = self._download_json(
+            'http://playapi.mtgx.tv/v1/videos/%s' % video_id, video_id, 'Downloading video JSON')
+
+        if video['is_geo_blocked']:
+            raise ExtractorError(
+                'This content is not available in your country due to copyright reasons', expected=True)
+
+        streams = self._download_json(
+            'http://playapi.mtgx.tv/v1/videos/stream/%s' % video_id, video_id, 'Downloading streams JSON')
+
+        quality = qualities(['hls', 'medium', 'high'])
+        formats = []
+        for format_id, video_url in streams['streams'].items():
+            if not video_url:
+                continue
+            fmt = {
+                'format_id': format_id,
+                'preference': quality(format_id),
+            }
+            if video_url.startswith('rtmp'):
+                m = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>[^/]+))/(?P<playpath>.+)$', video_url)
+                if not m:
+                    continue
+                fmt.update({
+                    'ext': 'flv',
+                    'url': m.group('url'),
+                    'app': m.group('app'),
+                    'play_path': m.group('playpath'),
+                })
+            else:
+                fmt.update({
+                    'url': video_url,
+                })
+            formats.append(fmt)
+
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': video['title'],
+            'description': video['description'],
+            'duration': video['duration'],
+            'timestamp': parse_iso8601(video['created_at']),
+            'view_count': video['views']['total'],
+            'age_limit': video.get('age_limit', 0),
+            'formats': formats,
+        }
--- a/youtube_dl/extractor/ubu.py
+++ b/youtube_dl/extractor/ubu.py
@@ -0,0 +1,56 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import int_or_none
+
+
+class UbuIE(InfoExtractor):
+    _VALID_URL = r'http://(?:www\.)?ubu\.com/film/(?P<id>[\da-z_-]+)\.html'
+    _TEST = {
+        'url': 'http://ubu.com/film/her_noise.html',
+        'md5': '8edd46ee8aa6b265fb5ed6cf05c36bc9',
+        'info_dict': {
+            'id': 'her_noise',
+            'ext': 'mp4',
+            'title': 'Her Noise - The Making Of (2007)',
+            'duration': 3600,
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, video_id)
+
+        title = self._html_search_regex(
+            r'<title>.+?Film &amp; Video: ([^<]+)</title>', webpage, 'title')
+
+        duration = int_or_none(self._html_search_regex(
+            r'Duration: (\d+) minutes', webpage, 'duration', fatal=False, default=None))
+        if duration:
+            duration *= 60
+
+        formats = []
+
+        FORMAT_REGEXES = [
+            ['sq', r"'flashvars'\s*,\s*'file=([^']+)'"],
+            ['hq', r'href="(http://ubumexico\.centro\.org\.mx/video/[^"]+)"']
+        ]
+
+        for format_id, format_regex in FORMAT_REGEXES:
+            m = re.search(format_regex, webpage)
+            if m:
+                formats.append({
+                    'url': m.group(1),
+                    'format_id': format_id,
+                })
+
+        return {
+            'id': video_id,
+            'title': title,
+            'duration': duration,
+            'formats': formats,
+        }
--- a/youtube_dl/extractor/vevo.py
+++ b/youtube_dl/extractor/vevo.py
@@ -177,6 +177,7 @@ class VevoIE(InfoExtractor):
            self._downloader.report_warning(
                'Cannot download SMIL information, falling back to JSON ..')

+        self._sort_formats(formats)
        timestamp_ms = int(self._search_regex(
            r'/Date\((\d+)\)/', video_info['launchDate'], 'launch date'))

--- a/youtube_dl/extractor/vidme.py
+++ b/youtube_dl/extractor/vidme.py
@@ -0,0 +1,68 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    int_or_none,
+    float_or_none,
+    str_to_int,
+)
+
+
+class VidmeIE(InfoExtractor):
+    _VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z]+)'
+    _TEST = {
+        'url': 'https://vid.me/QNB',
+        'md5': 'f42d05e7149aeaec5c037b17e5d3dc82',
+        'info_dict': {
+            'id': 'QNB',
+            'ext': 'mp4',
+            'title': 'Fishing for piranha - the easy way',
+            'description': 'source: https://www.facebook.com/photo.php?v=312276045600871',
+            'duration': 119.92,
+            'timestamp': 1406313244,
+            'upload_date': '20140725',
+            'thumbnail': 're:^https?://.*\.jpg',
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, video_id)
+
+        video_url = self._html_search_regex(r'<source src="([^"]+)"', webpage, 'video URL')
+
+        title = self._og_search_title(webpage)
+        description = self._og_search_description(webpage, default='')
+        thumbnail = self._og_search_thumbnail(webpage)
+        timestamp = int_or_none(self._og_search_property('updated_time', webpage, fatal=False))
+        width = int_or_none(self._og_search_property('video:width', webpage, fatal=False))
+        height = int_or_none(self._og_search_property('video:height', webpage, fatal=False))
+        duration = float_or_none(self._html_search_regex(
+            r'data-duration="([^"]+)"', webpage, 'duration', fatal=False))
+        view_count = str_to_int(self._html_search_regex(
+            r'<span class="video_views">\s*([\d,\.]+)\s*plays?', webpage, 'view count', fatal=False))
+        like_count = str_to_int(self._html_search_regex(
+            r'class="score js-video-vote-score"[^>]+data-score="([\d,\.\s]+)">',
+            webpage, 'like count', fatal=False))
+        comment_count = str_to_int(self._html_search_regex(
+            r'class="js-comment-count"[^>]+data-count="([\d,\.\s]+)">',
+            webpage, 'comment count', fatal=False))
+
+        return {
+            'id': video_id,
+            'url': video_url,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'timestamp': timestamp,
+            'width': width,
+            'height': height,
+            'duration': duration,
+            'view_count': view_count,
+            'like_count': like_count,
+            'comment_count': comment_count,
+        }
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@@ -121,6 +121,21 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
                'videopassword': 'youtube-dl',
            },
        },
+        {
+            'url': 'http://vimeo.com/channels/keypeele/75629013',
+            'md5': '2f86a05afe9d7abc0b9126d229bbe15d',
+            'note': 'Video is freely available via original URL '
+                    'and protected with password when accessed via http://vimeo.com/75629013',
+            'info_dict': {
+                'id': '75629013',
+                'ext': 'mp4',
+                'title': 'Key & Peele: Terrorist Interrogation',
+                'description': 'md5:8678b246399b070816b12313e8b4eb5c',
+                'uploader_id': 'atencio',
+                'uploader': 'Peter Atencio',
+                'duration': 187,
+            },
+        },
        {
            'url': 'http://vimeo.com/76979871',
            'md5': '3363dd6ffebe3784d56f4132317fd446',
@@ -196,8 +211,6 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
        video_id = mobj.group('id')
        if mobj.group('pro') or mobj.group('player'):
            url = 'http://player.vimeo.com/video/' + video_id
-        else:
-            url = 'https://vimeo.com/' + video_id

        # Retrieve video webpage to extract further information
        request = compat_urllib_request.Request(url, None, headers)
@@ -263,7 +276,7 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
        if video_thumbnail is None:
            video_thumbs = config["video"].get("thumbs")
            if video_thumbs and isinstance(video_thumbs, dict):
-                _, video_thumbnail = sorted((int(width), t_url) for (width, t_url) in video_thumbs.items())[-1]
+                _, video_thumbnail = sorted((int(width if width.isdigit() else 0), t_url) for (width, t_url) in video_thumbs.items())[-1]

        # Extract video description
        video_description = None
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -344,7 +344,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
        """Indicate the download will use the RTMP protocol."""
        self.to_screen(u'RTMP download detected')

-    def _extract_signature_function(self, video_id, player_url, slen):
+    def _signature_cache_id(self, example_sig):
+        """ Return a string representation of a signature """
+        return u'.'.join(compat_str(len(part)) for part in example_sig.split('.'))
+
+    def _extract_signature_function(self, video_id, player_url, example_sig):
        id_m = re.match(
            r'.*-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.(?P<ext>[a-z]+)$',
            player_url)
@@ -354,7 +358,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
        player_id = id_m.group('id')

        # Read from filesystem cache
-        func_id = '%s_%s_%d' % (player_type, player_id, slen)
+        func_id = '%s_%s_%s' % (
+            player_type, player_id, self._signature_cache_id(example_sig))
        assert os.path.basename(func_id) == func_id
        cache_dir = get_cachedir(self._downloader.params)

@@ -388,7 +393,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):

        if cache_enabled:
            try:
-                test_string = u''.join(map(compat_chr, range(slen)))
+                test_string = u''.join(map(compat_chr, range(len(example_sig))))
                cache_res = res(test_string)
                cache_spec = [ord(c) for c in cache_res]
                try:
@@ -404,7 +409,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):

        return res

-    def _print_sig_code(self, func, slen):
+    def _print_sig_code(self, func, example_sig):
        def gen_sig_code(idxs):
            def _genslice(start, end, step):
                starts = u'' if start == 0 else str(start)
@@ -433,11 +438,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
            else:
                yield _genslice(start, i, step)

-        test_string = u''.join(map(compat_chr, range(slen)))
+        test_string = u''.join(map(compat_chr, range(len(example_sig))))
        cache_res = func(test_string)
        cache_spec = [ord(c) for c in cache_res]
        expr_code = u' + '.join(gen_sig_code(cache_spec))
-        code = u'if len(s) == %d:\n    return %s\n' % (slen, expr_code)
+        signature_id_tuple = '(%s)' % (
+            ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
+        code = (u'if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
+                u'    return %s\n') % (signature_id_tuple, expr_code)
        self.to_screen(u'Extracted signature function:\n' + code)

    def _parse_sig_js(self, jscode):
@@ -465,20 +473,20 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
        if player_url.startswith(u'//'):
            player_url = u'https:' + player_url
        try:
-            player_id = (player_url, len(s))
+            player_id = (player_url, self._signature_cache_id(s))
            if player_id not in self._player_cache:
                func = self._extract_signature_function(
-                    video_id, player_url, len(s)
+                    video_id, player_url, s
                )
                self._player_cache[player_id] = func
            func = self._player_cache[player_id]
            if self._downloader.params.get('youtube_print_sig_code'):
-                self._print_sig_code(func, len(s))
+                self._print_sig_code(func, s)
            return func(s)
        except Exception as e:
            tb = traceback.format_exc()
            raise ExtractorError(
-                u'Automatic signature extraction failed: ' + tb, cause=e)
+                u'Signature extraction failed: ' + tb, cause=e)

    def _get_available_subtitles(self, video_id, webpage):
        try:
@@ -806,51 +814,54 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
            url_map = {}
            for url_data_str in encoded_url_map.split(','):
                url_data = compat_parse_qs(url_data_str)
-                if 'itag' in url_data and 'url' in url_data:
-                    url = url_data['url'][0]
-                    if 'sig' in url_data:
-                        url += '&signature=' + url_data['sig'][0]
-                    elif 's' in url_data:
-                        encrypted_sig = url_data['s'][0]
+                if 'itag' not in url_data or 'url' not in url_data:
+                    continue
+                format_id = url_data['itag'][0]
+                url = url_data['url'][0]

-                        if not age_gate:
-                            jsplayer_url_json = self._search_regex(
-                                r'"assets":.+?"js":\s*("[^"]+")',
-                                video_webpage, u'JS player URL')
-                            player_url = json.loads(jsplayer_url_json)
+                if 'sig' in url_data:
+                    url += '&signature=' + url_data['sig'][0]
+                elif 's' in url_data:
+                    encrypted_sig = url_data['s'][0]
+
+                    if not age_gate:
+                        jsplayer_url_json = self._search_regex(
+                            r'"assets":.+?"js":\s*("[^"]+")',
+                            video_webpage, u'JS player URL')
+                        player_url = json.loads(jsplayer_url_json)
+                    if player_url is None:
+                        player_url_json = self._search_regex(
+                            r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
+                            video_webpage, u'age gate player URL')
+                        player_url = json.loads(player_url_json)
+
+                    if self._downloader.params.get('verbose'):
                        if player_url is None:
-                            player_url_json = self._search_regex(
-                                r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
-                                video_webpage, u'age gate player URL')
-                            player_url = json.loads(player_url_json)
-
-                        if self._downloader.params.get('verbose'):
-                            if player_url is None:
-                                player_version = 'unknown'
-                                player_desc = 'unknown'
+                            player_version = 'unknown'
+                            player_desc = 'unknown'
+                        else:
+                            if player_url.endswith('swf'):
+                                player_version = self._search_regex(
+                                    r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
+                                    u'flash player', fatal=False)
+                                player_desc = 'flash player %s' % player_version
                            else:
-                                if player_url.endswith('swf'):
-                                    player_version = self._search_regex(
-                                        r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
-                                        u'flash player', fatal=False)
-                                    player_desc = 'flash player %s' % player_version
-                                else:
-                                    player_version = self._search_regex(
-                                        r'html5player-([^/]+?)(?:/html5player)?\.js',
-                                        player_url,
-                                        'html5 player', fatal=False)
-                                    player_desc = u'html5 player %s' % player_version
+                                player_version = self._search_regex(
+                                    r'html5player-([^/]+?)(?:/html5player)?\.js',
+                                    player_url,
+                                    'html5 player', fatal=False)
+                                player_desc = u'html5 player %s' % player_version

-                            parts_sizes = u'.'.join(compat_str(len(part)) for part in encrypted_sig.split('.'))
-                            self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' %
-                                (len(encrypted_sig), parts_sizes, url_data['itag'][0], player_desc))
+                        parts_sizes = self._signature_cache_id(encrypted_sig)
+                        self.to_screen(u'{%s} signature length %s, %s' %
+                            (format_id, parts_sizes, player_desc))

-                        signature = self._decrypt_signature(
-                            encrypted_sig, video_id, player_url, age_gate)
-                        url += '&signature=' + signature
-                    if 'ratebypass' not in url:
-                        url += '&ratebypass=yes'
-                    url_map[url_data['itag'][0]] = url
+                    signature = self._decrypt_signature(
+                        encrypted_sig, video_id, player_url, age_gate)
+                    url += '&signature=' + signature
+                if 'ratebypass' not in url:
+                    url += '&ratebypass=yes'
+                url_map[format_id] = url
            formats = _map_to_format_list(url_map)
        elif video_info.get('hlsvp'):
            manifest_url = video_info['hlsvp'][0]
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -242,8 +242,8 @@ else:
 if sys.version_info >= (2,7):
    def find_xpath_attr(node, xpath, key, val):
        """ Find the xpath xpath[@key=val] """
-        assert re.match(r'^[a-zA-Z]+$', key)
-        assert re.match(r'^[a-zA-Z0-9@\s:._]*$', val)
+        assert re.match(r'^[a-zA-Z-]+$', key)
+        assert re.match(r'^[a-zA-Z0-9@\s:._-]*$', val)
        expr = xpath + u"[@%s='%s']" % (key, val)
        return node.find(expr)
 else:
@@ -852,6 +852,8 @@ def unified_strdate(date_str):
    return upload_date

 def determine_ext(url, default_ext=u'unknown_video'):
+    if url is None:
+        return default_ext
    guess = url.partition(u'?')[0].rpartition(u'.')[2]
    if re.match(r'^[A-Za-z0-9]+$', guess):
        return guess
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@

-__version__ = '2014.07.25'
+__version__ = '2014.08.05'
Author	SHA1	Message	Date
Philipp Hagemeister	6de0595eb8	release 2014.08.05	2014-08-05 17:02:47 +02:00
Sergey M․	e48a2c646d	Credit @matrixik for #3441	2014-08-05 19:09:11 +07:00
Sergey M.	0f831a1a92	Merge pull request #3441 from matrixik/patch-1 [vimeo] Ignore video 'base' thumbnail (Closes #3438)	2014-08-05 19:07:05 +07:00
Dobrosław Żybort	3e510af38d	[vimeo] Ignore video 'base' thumbnail (Closes #3438 )	2014-08-04 21:37:36 +02:00
Sergey M․	548f31d99c	[vimeo] Use original URL when for standard vimeo.com links (Closes #3428 ) Some videos that are freely accessible without password via the original URL (e.g. http://vimeo.com/channels/keypeele/75629013) ask for password when accessed via http://vimeo.com/<video_id>.	2014-08-04 00:04:47 +07:00
Sergey M․	493987fefe	[ubu] Add missing whitespace	2014-08-03 01:20:51 +07:00
Philipp Hagemeister	c97797a737	release 2014.08.02.1	2014-08-02 18:16:52 +02:00
Sergey M․	8d7d9d3452	[pbs] Add support for frontline videos (Closes #3414 #3405 )	2014-08-02 19:09:36 +07:00
Sergey M․	7a5e7b303c	[ubu] Add extractor (Close #3418 )	2014-08-02 17:56:01 +07:00
Philipp Hagemeister	61aabb9d70	release 2014.08.02	2014-08-02 12:25:40 +02:00
Philipp Hagemeister	62af3a0eb5	[youtube] Use new signature cache ID for in-memory cache as well	2014-08-02 12:23:18 +02:00
Philipp Hagemeister	60064c53f1	[youtube] Make cache ID a tuple of lengths instead of just the whole length	2014-08-02 12:21:53 +02:00
Philipp Hagemeister	98eb1c3fa2	[youtube] Clean up -v signature output	2014-08-02 11:55:20 +02:00
Philipp Hagemeister	201e9eaa0e	[youtube] Show format ID in signature deobfuscation -v output	2014-08-02 06:35:18 +02:00
Sergey M․	9afa6ede21	Merge branch 'naglis-izlesene'	2014-08-01 19:08:27 +07:00
Sergey M․	f4776371ae	[izlesene] Minor changes	2014-08-01 19:08:09 +07:00
Sergey M․	328a20bf9c	Merge branch 'izlesene' of https://github.com/naglis/youtube-dl into naglis-izlesene	2014-08-01 18:16:47 +07:00
Sergey M․	5622f29ae4	[ard] Quote path part instead of whole URL encode	2014-07-31 21:23:15 +07:00
Sergey M․	b4f23afbd1	[ard] Encode url (Closes #3412 )	2014-07-31 20:35:29 +07:00
Sergey M․	0138968a6a	[vidme] Add extractor (Closes #3404 )	2014-07-31 20:26:52 +07:00
Philipp Hagemeister	4f31d0f2b7	release 2014.07.30	2014-07-30 09:50:22 +02:00
Philipp Hagemeister	bff74bdd1a	[vevo] Sort formats (Fixes #3399 )	2014-07-30 09:49:55 +02:00
Philipp Hagemeister	10b04ff7f4	Move --bidi-workaround to workarounds option group Duh.	2014-07-29 17:19:19 +02:00
Philipp Hagemeister	1f7ccb9014	[generic] Add --default-search fixup_error This restores the ability to enter URLs without a scheme (and default to http), but still fail if the input is a search term.	2014-07-29 17:17:46 +02:00
Sergey M․	c7b3209668	[swrmediathek] Improve _VALID_URL	2014-07-29 20:43:31 +07:00
Philipp Hagemeister	895ba7d1dd	[gamestar] Use helper methods to not break if something changes (#3393 )	2014-07-29 05:59:47 +02:00
SyxbEaEQ2	a2a1b0baa2	[gamestar] Add new extractor (init)	2014-07-29 00:37:18 +02:00
SyxbEaEQ2	8646eb790e	[gamestar] Add new extractor	2014-07-29 00:31:33 +02:00
Jaime Marquínez Ferrándiz	f036a6328e	[extractor/common] _extract_f4m_formats: Use more specific messages when downloading the manifest	2014-07-28 15:42:19 +02:00
Jaime Marquínez Ferrándiz	31bb8d3f51	[bloomberg] Extract the available formats (closes #2776 ) It uses a helper method in the InfoExtractor class. The downloader will pick the requested formats using the bitrate in the info dict.	2014-07-28 15:32:38 +02:00
Jaime Marquínez Ferrándiz	4958ae2058	[francetv] Fix wrong variable name	2014-07-28 15:21:05 +02:00
Jaime Marquínez Ferrándiz	7e8d73c183	[francetv] Extract all the available formats (#3278 ) For some videos the resolution is not included in the url, we will need to look in the m3u8 manifest.	2014-07-28 14:37:13 +02:00
Sergey M․	65bc504db8	[br] Extract duration	2014-07-28 00:51:38 +07:00
Sergey M․	0fc74a0d91	[br] Fix test	2014-07-28 00:45:46 +07:00
Sergey M․	8d2cc6fbb1	[blinkx] Fix duration	2014-07-28 00:40:17 +07:00
Sergey M․	a954584f63	[bandcamp] Replace 404 playlist test	2014-07-28 00:27:27 +07:00
Sergey M․	cb3ff6fb01	[godtube] Add extractor (Closes #3367 )	2014-07-27 02:38:05 +07:00
Sergey M․	71aa656d13	[streamcloud] Remove duration and modernize (Closes #3374 )	2014-07-27 02:05:06 +07:00
Naglis Jonaitis	366b1f3cfe	[izlesene] Add new extractor. Closes #3184	2014-07-26 14:35:23 +03:00
Jaime Marquínez Ferrándiz	64ce58db38	[abc] Add extractor (closes #3361 )	2014-07-26 00:05:37 +02:00
Philipp Hagemeister	11b85ce62e	[YouTubeDL] Best practices (Closes #3370 )	2014-07-25 23:37:32 +02:00
Sergey M․	1220352ff7	[tvplay] Add extractor (Closes #3245 )	2014-07-25 21:33:29 +07:00
Philipp Hagemeister	8f3034d871	[livestream] Do not fail if SMIL download fails	2014-07-25 11:53:52 +02:00
Philipp Hagemeister	7fa547ab02	[livestream] Make clipBegin optional in SMIL	2014-07-25 11:50:10 +02:00
Philipp Hagemeister	3182f3e2dc	[justin.tv] Fix page reporting (#3352 ) youtube-dl -j http://www.twitch.tv/fang_i3anger still fails though.	2014-07-25 11:46:53 +02:00
Philipp Hagemeister	cbf915f3f6	[livestream] Parse SMIL (#2713 )	2014-07-25 11:39:17 +02:00
Philipp Hagemeister	b490b8849a	release 2014.07.25.1	2014-07-25 10:47:35 +02:00
Philipp Hagemeister	5d2519e5bf	[gdcvault] Add support for direct URL video type Fixes #3356	2014-07-25 10:45:07 +02:00
Philipp Hagemeister	c3415d1bac	[extractor/common] PEP8	2014-07-25 10:43:03 +02:00