release 2014.03.10

Merge branch 'master' of github.com:rg3/youtube-dl
[generic] Use a different URL for the generic RSS test (Closes #2532 )
2025-07-22 21:31:40 -05:00 · 2014-03-10 13:04:20 +01:00 · 2014-03-10 13:03:52 +01:00 · 2014-03-10 13:03:39 +01:00 · 2014-03-10 12:59:19 +01:00 · 2014-03-10 18:52:00 +07:00
27 changed files with 470 additions and 272 deletions
--- a/test/test_InfoExtractor.py
+++ b/test/test_InfoExtractor.py
@ -0,0 +1,44 @@
+#!/usr/bin/env python
+
+from __future__ import unicode_literals
+
+# Allow direct execution
+import os
+import sys
+import unittest
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from test.helper import FakeYDL
+from youtube_dl.extractor.common import InfoExtractor
+from youtube_dl.extractor import YoutubeIE, get_info_extractor
+
+
+class TestIE(InfoExtractor):
+    pass
+
+
+class TestInfoExtractor(unittest.TestCase):
+    def setUp(self):
+        self.ie = TestIE(FakeYDL())
+
+    def test_ie_key(self):
+        self.assertEqual(get_info_extractor(YoutubeIE.ie_key()), YoutubeIE)
+
+    def test_html_search_regex(self):
+        html = '<p id="foo">Watch this <a href="http://www.youtube.com/watch?v=BaW_jenozKc">video</a></p>'
+        search = lambda re, *args: self.ie._html_search_regex(re, html, *args)
+        self.assertEqual(search(r'<p id="foo">(.+?)</p>', 'foo'), 'Watch this video')
+
+    def test_opengraph(self):
+        ie = self.ie
+        html = '''
+            <meta name="og:title" content='Foo'/>
+            <meta content="Some video's description " name="og:description"/>
+            <meta property='og:image' content='http://domain.com/pic.jpg?key1=val1&amp;key2=val2'/>
+            '''
+        self.assertEqual(ie._og_search_title(html), 'Foo')
+        self.assertEqual(ie._og_search_description(html), 'Some video\'s description ')
+        self.assertEqual(ie._og_search_thumbnail(html), 'http://domain.com/pic.jpg?key1=val1&key2=val2')
+
+if __name__ == '__main__':
+    unittest.main()
--- a/test/test_playlists.py
+++ b/test/test_playlists.py
@ -99,7 +99,7 @@ class TestPlaylists(unittest.TestCase):
        result = ie.extract('http://www.ustream.tv/channel/young-americans-for-liberty')
        self.assertIsPlaylist(result)
        self.assertEqual(result['id'], '5124905')
-        self.assertTrue(len(result['entries']) >= 11)
+        self.assertTrue(len(result['entries']) >= 6)

    def test_soundcloud_set(self):
        dl = FakeYDL()
@ -254,9 +254,9 @@ class TestPlaylists(unittest.TestCase):
    def test_generic_rss_feed(self):
        dl = FakeYDL()
        ie = GenericIE(dl)
-        result = ie.extract('http://www.escapistmagazine.com/rss/videos/list/1.xml')
+        result = ie.extract('http://phihag.de/2014/youtube-dl/rss.xml')
        self.assertIsPlaylist(result)
-        self.assertEqual(result['id'], 'http://www.escapistmagazine.com/rss/videos/list/1.xml')
+        self.assertEqual(result['id'], 'http://phihag.de/2014/youtube-dl/rss.xml')
        self.assertEqual(result['title'], 'Zero Punctuation')
        self.assertTrue(len(result['entries']) > 10)

--- a/test/test_utils.py
+++ b/test/test_utils.py
@ -33,6 +33,7 @@ from youtube_dl.utils import (
    unified_strdate,
    unsmuggle_url,
    url_basename,
+    urlencode_postdata,
    xpath_with_ns,
 )

@ -261,5 +262,9 @@ class TestUtil(unittest.TestCase):
            bam''')
        self.assertEqual(read_batch_urls(f), [u'foo', u'bar', u'baz', u'bam'])

+    def test_urlencode_postdata(self):
+        data = urlencode_postdata({'username': 'foo@bar.com', 'password': '1234'})
+        self.assertTrue(isinstance(data, bytes))
+
 if __name__ == '__main__':
    unittest.main()
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@ -370,12 +370,15 @@ class YoutubeDL(object):
        Print the message to stderr, it will be prefixed with 'WARNING:'
        If stderr is a tty file the 'WARNING:' will be colored
        '''
-        if self._err_file.isatty() and os.name != 'nt':
-            _msg_header = '\033[0;33mWARNING:\033[0m'
+        if self.params.get('logger') is not None:
+            self.params['logger'].warning(message)
        else:
-            _msg_header = 'WARNING:'
-        warning_message = '%s %s' % (_msg_header, message)
-        self.to_stderr(warning_message)
+            if self._err_file.isatty() and os.name != 'nt':
+                _msg_header = '\033[0;33mWARNING:\033[0m'
+            else:
+                _msg_header = 'WARNING:'
+            warning_message = '%s %s' % (_msg_header, message)
+            self.to_stderr(warning_message)

    def report_error(self, message, tb=None):
        '''
@ -413,9 +416,9 @@ class YoutubeDL(object):
                if template_dict.get('width') and template_dict.get('height'):
                    template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
                elif template_dict.get('height'):
-                    res = '%sp' % template_dict['height']
+                    template_dict['resolution'] = '%sp' % template_dict['height']
                elif template_dict.get('width'):
-                    res = '?x%d' % template_dict['width']
+                    template_dict['resolution'] = '?x%d' % template_dict['width']

            sanitize = lambda k, v: sanitize_filename(
                compat_str(v),
@ -918,7 +921,7 @@ class YoutubeDL(object):
                    self.to_screen('[%s] %s: Downloading thumbnail ...' %
                                   (info_dict['extractor'], info_dict['id']))
                    try:
-                        uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
+                        uf = self.urlopen(info_dict['thumbnail'])
                        with open(thumb_filename, 'wb') as thumbf:
                            shutil.copyfileobj(uf, thumbf)
                        self.to_screen('[%s] %s: Writing thumbnail to: %s' %
--- a/youtube_dl/init.py
+++ b/youtube_dl/init.py
@ -50,6 +50,7 @@ __authors__  = (
    'Anthony Weems',
    'David Wagner',
    'Juan C. Olivares',
+    'Mattias Harrysson',
 )

 __license__ = 'Public Domain'
--- a/youtube_dl/downloader/http.py
+++ b/youtube_dl/downloader/http.py
@ -49,7 +49,7 @@ class HttpFD(FileDownloader):
        while count <= retries:
            # Establish connection
            try:
-                data = compat_urllib_request.urlopen(request)
+                data = self.ydl.urlopen(request)
                break
            except (compat_urllib_error.HTTPError, ) as err:
                if (err.code < 500 or err.code >= 600) and err.code != 416:
@ -59,7 +59,7 @@ class HttpFD(FileDownloader):
                    # Unable to resume (requested range not satisfiable)
                    try:
                        # Open the connection again without the range header
-                        data = compat_urllib_request.urlopen(basic_request)
+                        data = self.ydl.urlopen(basic_request)
                        content_length = data.info()['Content-Length']
                    except (compat_urllib_error.HTTPError, ) as err:
                        if err.code < 500 or err.code >= 600:
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -1,5 +1,6 @@
 from .academicearth import AcademicEarthCourseIE
 from .addanime import AddAnimeIE
+from .aftonbladet import AftonbladetIE
 from .anitube import AnitubeIE
 from .aparat import AparatIE
 from .appletrailers import AppleTrailersIE
--- a/youtube_dl/extractor/aftonbladet.py
+++ b/youtube_dl/extractor/aftonbladet.py
@ -0,0 +1,69 @@
+# encoding: utf-8
+from __future__ import unicode_literals
+
+import datetime
+import re
+
+from .common import InfoExtractor
+
+
+class AftonbladetIE(InfoExtractor):
+    _VALID_URL = r'^http://tv\.aftonbladet\.se/webbtv.+?(?P<video_id>article[0-9]+)\.ab(?:$|[?#])'
+    _TEST = {
+        'url': 'http://tv.aftonbladet.se/webbtv/nyheter/vetenskap/rymden/article36015.ab',
+        'info_dict': {
+            'id': 'article36015',
+            'ext': 'mp4',
+            'title': 'Vulkanutbrott i rymden - nu släpper NASA bilderna',
+            'description': 'Jupiters måne mest aktiv av alla himlakroppar',
+            'upload_date': '20140306',
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.search(self._VALID_URL, url)
+
+        video_id = mobj.group('video_id')
+        webpage = self._download_webpage(url, video_id)
+
+        # find internal video meta data
+        META_URL = 'http://aftonbladet-play.drlib.aptoma.no/video/%s.json'
+        internal_meta_id = self._html_search_regex(
+            r'data-aptomaId="([\w\d]+)"', webpage, 'internal_meta_id')
+        internal_meta_url = META_URL % internal_meta_id
+        internal_meta_json = self._download_json(
+            internal_meta_url, video_id, 'Downloading video meta data')
+
+        # find internal video formats
+        FORMATS_URL = 'http://aftonbladet-play.videodata.drvideo.aptoma.no/actions/video/?id=%s'
+        internal_video_id = internal_meta_json['videoId']
+        internal_formats_url = FORMATS_URL % internal_video_id
+        internal_formats_json = self._download_json(
+            internal_formats_url, video_id, 'Downloading video formats')
+
+        formats = []
+        for fmt in internal_formats_json['formats']['http']['pseudostreaming']['mp4']:
+            p = fmt['paths'][0]
+            formats.append({
+                'url': 'http://%s:%d/%s/%s' % (p['address'], p['port'], p['path'], p['filename']),
+                'ext': 'mp4',
+                'width': fmt['width'],
+                'height': fmt['height'],
+                'tbr': fmt['bitrate'],
+                'protocol': 'http',
+            })
+        self._sort_formats(formats)
+
+        timestamp = datetime.datetime.fromtimestamp(internal_meta_json['timePublished'])
+        upload_date = timestamp.strftime('%Y%m%d')
+
+        return {
+            'id': video_id,
+            'title': internal_meta_json['title'],
+            'formats': formats,
+            'thumbnail': internal_meta_json['imageUrl'],
+            'description': internal_meta_json['shortPreamble'],
+            'upload_date': upload_date,
+            'duration': internal_meta_json['duration'],
+            'view_count': internal_meta_json['views'],
+        }
--- a/youtube_dl/extractor/arte.py
+++ b/youtube_dl/extractor/arte.py
@ -72,18 +72,22 @@ class ArteTvIE(InfoExtractor):
            return self._extract_liveweb(url, name, lang)

        if re.search(self._LIVE_URL, url) is not None:
-            raise ExtractorError(u'Arte live streams are not yet supported, sorry')
+            raise ExtractorError('Arte live streams are not yet supported, sorry')
            # self.extractLiveStream(url)
            # return

+        raise ExtractorError('No video found')
+
    def _extract_video(self, url, video_id, lang):
        """Extract from videos.arte.tv"""
        ref_xml_url = url.replace('/videos/', '/do_delegate/videos/')
        ref_xml_url = ref_xml_url.replace('.html', ',view,asPlayerXml.xml')
-        ref_xml_doc = self._download_xml(ref_xml_url, video_id, note=u'Downloading metadata')
+        ref_xml_doc = self._download_xml(
+            ref_xml_url, video_id, note='Downloading metadata')
        config_node = find_xpath_attr(ref_xml_doc, './/video', 'lang', lang)
        config_xml_url = config_node.attrib['ref']
-        config_xml = self._download_webpage(config_xml_url, video_id, note=u'Downloading configuration')
+        config_xml = self._download_webpage(
+            config_xml_url, video_id, note='Downloading configuration')

        video_urls = list(re.finditer(r'<url quality="(?P<quality>.*?)">(?P<url>.*?)</url>', config_xml))
        def _key(m):
--- a/youtube_dl/extractor/collegehumor.py
+++ b/youtube_dl/extractor/collegehumor.py
@ -35,15 +35,15 @@ class CollegeHumorIE(InfoExtractor):
    },
    # embedded youtube video
    {
-        'url': 'http://www.collegehumor.com/embed/6950457',
+        'url': 'http://www.collegehumor.com/embed/6950306',
        'info_dict': {
-            'id': 'W5gMp3ZjYg4',
+            'id': 'Z-bao9fg6Yc',
            'ext': 'mp4',
-            'title': 'Funny Dogs Protecting Babies Compilation 2014 [NEW HD]',
-            'uploader': 'FunnyPlox TV',
-            'uploader_id': 'funnyploxtv',
-            'description': 'md5:7ded37421526d54afdf005e25bc2b7a3',
-            'upload_date': '20140128',
+            'title': 'Young Americans Think President John F. Kennedy Died THIS MORNING IN A CAR ACCIDENT!!!',
+            'uploader': 'Mark Dice',
+            'uploader_id': 'MarkDice',
+            'description': 'md5:62c3dab9351fac7bb44b53b69511d87f',
+            'upload_date': '20140127',
        },
        'params': {
            'skip_download': True,
--- a/youtube_dl/extractor/facebook.py
+++ b/youtube_dl/extractor/facebook.py
@ -11,16 +11,15 @@ from ..utils import (
    compat_urllib_error,
    compat_urllib_parse,
    compat_urllib_request,
+    urlencode_postdata,

    ExtractorError,
 )


 class FacebookIE(InfoExtractor):
-    """Information Extractor for Facebook"""
-
    _VALID_URL = r'''(?x)
-        (?:https?://)?(?:\w+\.)?facebook\.com/
+        https?://(?:\w+\.)?facebook\.com/
        (?:[^#?]*\#!/)?
        (?:video/video\.php|photo\.php|video/embed)\?(?:.*?)
        (?:v|video_id)=(?P<id>[0-9]+)
@ -36,14 +35,10 @@ class FacebookIE(InfoExtractor):
            'id': '120708114770723',
            'ext': 'mp4',
            'duration': 279,
-            'title': 'PEOPLE ARE AWESOME 2013'
+            'title': 'PEOPLE ARE AWESOME 2013',
        }
    }

-    def report_login(self):
-        """Report attempt to log in."""
-        self.to_screen('Logging in')
-
    def _login(self):
        (useremail, password) = self._get_login_info()
        if useremail is None:
@ -51,8 +46,8 @@ class FacebookIE(InfoExtractor):

        login_page_req = compat_urllib_request.Request(self._LOGIN_URL)
        login_page_req.add_header('Cookie', 'locale=en_US')
-        self.report_login()
-        login_page = self._download_webpage(login_page_req, None, note=False,
+        login_page = self._download_webpage(login_page_req, None,
+            note='Downloading login page',
            errnote='Unable to download login page')
        lsd = self._search_regex(
            r'<input type="hidden" name="lsd" value="([^"]*)"',
@ -70,23 +65,25 @@ class FacebookIE(InfoExtractor):
            'timezone': '-60',
            'trynum': '1',
            }
-        request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
+        request = compat_urllib_request.Request(self._LOGIN_URL, urlencode_postdata(login_form))
        request.add_header('Content-Type', 'application/x-www-form-urlencoded')
        try:
-            login_results = compat_urllib_request.urlopen(request).read()
+            login_results = self._download_webpage(request, None,
+                note='Logging in', errnote='unable to fetch login page')
            if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None:
                self._downloader.report_warning('unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.')
                return

            check_form = {
-                'fb_dtsg': self._search_regex(r'"fb_dtsg":"(.*?)"', login_results, 'fb_dtsg'),
+                'fb_dtsg': self._search_regex(r'name="fb_dtsg" value="(.+?)"', login_results, 'fb_dtsg'),
                'nh': self._search_regex(r'name="nh" value="(\w*?)"', login_results, 'nh'),
                'name_action_selected': 'dont_save',
-                'submit[Continue]': self._search_regex(r'<input value="(.*?)" name="submit\[Continue\]"', login_results, 'continue'),
+                'submit[Continue]': self._search_regex(r'<button[^>]+value="(.*?)"[^>]+name="submit\[Continue\]"', login_results, 'continue'),
            }
-            check_req = compat_urllib_request.Request(self._CHECKPOINT_URL, compat_urllib_parse.urlencode(check_form))
+            check_req = compat_urllib_request.Request(self._CHECKPOINT_URL, urlencode_postdata(check_form))
            check_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
-            check_response = compat_urllib_request.urlopen(check_req).read()
+            check_response = self._download_webpage(check_req, None,
+                note='Confirming login')
            if re.search(r'id="checkpointSubmitButton"', check_response) is not None:
                self._downloader.report_warning('Unable to confirm login, you have to login in your brower and authorize the login.')
        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
@ -98,8 +95,6 @@ class FacebookIE(InfoExtractor):

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
-        if mobj is None:
-            raise ExtractorError('Invalid URL: %s' % url)
        video_id = mobj.group('id')

        url = 'https://www.facebook.com/video/video.php?v=%s' % video_id
@ -125,18 +120,14 @@ class FacebookIE(InfoExtractor):
            video_url = video_data['sd_src']
        if not video_url:
            raise ExtractorError('Cannot find video URL')
-        video_duration = int(video_data['video_duration'])
-        thumbnail = video_data['thumbnail_src']

        video_title = self._html_search_regex(
            r'<h2 class="uiHeaderTitle">([^<]*)</h2>', webpage, 'title')

-        info = {
+        return {
            'id': video_id,
            'title': video_title,
            'url': video_url,
-            'ext': 'mp4',
-            'duration': video_duration,
-            'thumbnail': thumbnail,
+            'duration': int(video_data['video_duration']),
+            'thumbnail': video_data['thumbnail_src'],
        }
-        return [info]
--- a/youtube_dl/extractor/gamekings.py
+++ b/youtube_dl/extractor/gamekings.py
@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
 import re

 from .common import InfoExtractor
@ -6,13 +8,14 @@ from .common import InfoExtractor
 class GamekingsIE(InfoExtractor):
    _VALID_URL = r'http://www\.gamekings\.tv/videos/(?P<name>[0-9a-z\-]+)'
    _TEST = {
-        u"url": u"http://www.gamekings.tv/videos/phoenix-wright-ace-attorney-dual-destinies-review/",
-        u'file': u'20130811.mp4',
+        'url': 'http://www.gamekings.tv/videos/phoenix-wright-ace-attorney-dual-destinies-review/',
        # MD5 is flaky, seems to change regularly
-        #u'md5': u'2f32b1f7b80fdc5cb616efb4f387f8a3',
+        # 'md5': '2f32b1f7b80fdc5cb616efb4f387f8a3',
        u'info_dict': {
-            u"title": u"Phoenix Wright: Ace Attorney \u2013 Dual Destinies Review",
-            u"description": u"Melle en Steven hebben voor de review een week in de rechtbank doorbracht met Phoenix Wright: Ace Attorney - Dual Destinies.",
+            'id': '20130811',
+            'ext': 'mp4',
+            'title': 'Phoenix Wright: Ace Attorney \u2013 Dual Destinies Review',
+            'description': 'md5:632e61a9f97d700e83f43d77ddafb6a4',
        }
    }

--- a/youtube_dl/extractor/jukebox.py
+++ b/youtube_dl/extractor/jukebox.py
@ -1,56 +1,61 @@
-# coding: utf-8
+from __future__ import unicode_literals
+
 import re

 from .common import InfoExtractor
 from ..utils import (
    ExtractorError,
+    RegexNotFoundError,
    unescapeHTML,
 )

+
 class JukeboxIE(InfoExtractor):
    _VALID_URL = r'^http://www\.jukebox?\..+?\/.+[,](?P<video_id>[a-z0-9\-]+)\.html'
-    _IFRAME = r'<iframe .*src="(?P<iframe>[^"]*)".*>'
-    _VIDEO_URL = r'"config":{"file":"(?P<video_url>http:[^"]+[.](?P<video_ext>[^.?]+)[?]mdtk=[0-9]+)"'
-    _TITLE = r'<h1 class="inline">(?P<title>[^<]+)</h1>.*<span id="infos_article_artist">(?P<artist>[^<]+)</span>'
-    _IS_YOUTUBE = r'config":{"file":"(?P<youtube_url>http:[\\][/][\\][/]www[.]youtube[.]com[\\][/]watch[?]v=[^"]+)"'
+    _TEST = {
+        'url': 'http://www.jukebox.es/kosheen/videoclip,pride,r303r.html',
+        'md5': '5dc6477e74b1e37042ac5acedd8413e5',
+        'info_dict': {
+            'id': 'r303r',
+            'ext': 'flv',
+            'title': 'Kosheen-En Vivo Pride',
+            'uploader': 'Kosheen',
+        },
+    }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('video_id')

        html = self._download_webpage(url, video_id)
-
-        mobj = re.search(self._IFRAME, html)
-        if mobj is None:
-            raise ExtractorError(u'Cannot extract iframe url')
-        iframe_url = unescapeHTML(mobj.group('iframe'))
+        iframe_url = unescapeHTML(self._search_regex(r'<iframe .*src="([^"]*)"', html, 'iframe url'))

        iframe_html = self._download_webpage(iframe_url, video_id, 'Downloading iframe')
-        mobj = re.search(r'class="jkb_waiting"', iframe_html)
-        if mobj is not None:
-            raise ExtractorError(u'Video is not available(in your country?)!')
+        if re.search(r'class="jkb_waiting"', iframe_html) is not None:
+            raise ExtractorError('Video is not available(in your country?)!')

        self.report_extraction(video_id)

-        mobj = re.search(self._VIDEO_URL, iframe_html)
-        if mobj is None:
-            mobj = re.search(self._IS_YOUTUBE, iframe_html)
-            if mobj is None:
-                raise ExtractorError(u'Cannot extract video url')
-            youtube_url = unescapeHTML(mobj.group('youtube_url')).replace('\/','/')
-            self.to_screen(u'Youtube video detected')
-            return self.url_result(youtube_url,ie='Youtube')
-        video_url = unescapeHTML(mobj.group('video_url')).replace('\/','/')
-        video_ext = unescapeHTML(mobj.group('video_ext'))
+        try:
+            video_url = self._search_regex(r'"config":{"file":"(?P<video_url>http:[^"]+\?mdtk=[0-9]+)"',
+                iframe_html, 'video url')
+            video_url = unescapeHTML(video_url).replace('\/', '/')
+        except RegexNotFoundError:
+            youtube_url = self._search_regex(
+                r'config":{"file":"(http:\\/\\/www\.youtube\.com\\/watch\?v=[^"]+)"',
+                iframe_html, 'youtube url')
+            youtube_url = unescapeHTML(youtube_url).replace('\/', '/')
+            self.to_screen('Youtube video detected')
+            return self.url_result(youtube_url, ie='Youtube')

-        mobj = re.search(self._TITLE, html)
-        if mobj is None:
-            raise ExtractorError(u'Cannot extract title')
-        title = unescapeHTML(mobj.group('title'))
-        artist = unescapeHTML(mobj.group('artist'))
+        title = self._html_search_regex(r'<h1 class="inline">([^<]+)</h1>',
+            html, 'title')
+        artist = self._html_search_regex(r'<span id="infos_article_artist">([^<]+)</span>',
+            html, 'artist')

-        return [{'id': video_id,
-                 'url': video_url,
-                 'title': artist + '-' + title,
-                 'ext': video_ext
-                 }]
+        return {
+            'id': video_id,
+            'url': video_url,
+            'title': artist + '-' + title,
+            'uploader': artist,
+        }
--- a/youtube_dl/extractor/lynda.py
+++ b/youtube_dl/extractor/lynda.py
@ -8,7 +8,9 @@ from .common import InfoExtractor
 from ..utils import (
    compat_urllib_parse,
    compat_urllib_request,
-    ExtractorError
+    ExtractorError,
+    int_or_none,
+    compat_str,
 )


@ -19,16 +21,17 @@ class LyndaIE(SubtitlesInfoExtractor):
    _LOGIN_URL = 'https://www.lynda.com/login/login.aspx'
    _NETRC_MACHINE = 'lynda'

-    _SUCCESSFUL_LOGIN_REGEX = r'<a href="https://www.lynda.com/home/userAccount/ChangeContactInfo.aspx" data-qa="eyebrow_account_menu">My account'
+    _SUCCESSFUL_LOGIN_REGEX = r'isLoggedIn: true'
    _TIMECODE_REGEX = r'\[(?P<timecode>\d+:\d+:\d+[\.,]\d+)\]'

    ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to provide lynda.com account credentials.'

    _TEST = {
        'url': 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html',
-        'file': '114408.mp4',
        'md5': 'ecfc6862da89489161fb9cd5f5a6fac1',
        'info_dict': {
+            'id': '114408',
+            'ext': 'mp4',
            'title': 'Using the exercise files',
            'duration': 68
        }
@ -41,27 +44,44 @@ class LyndaIE(SubtitlesInfoExtractor):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group(1)

-        page = self._download_webpage('http://www.lynda.com/ajax/player?videoId=%s&type=video' % video_id,
-                                      video_id, 'Downloading video JSON')
+        page = self._download_webpage('http://www.lynda.com/ajax/player?videoId=%s&type=video' % video_id, video_id,
+            'Downloading video JSON')
        video_json = json.loads(page)

        if 'Status' in video_json:
            raise ExtractorError('lynda returned error: %s' % video_json['Message'], expected=True)

        if video_json['HasAccess'] is False:
-            raise ExtractorError('Video %s is only available for members. ' % video_id + self.ACCOUNT_CREDENTIALS_HINT, expected=True)
+            raise ExtractorError(
+                'Video %s is only available for members. ' % video_id + self.ACCOUNT_CREDENTIALS_HINT, expected=True)

-        video_id = video_json['ID']
+        video_id = compat_str(video_json['ID'])
        duration = video_json['DurationInSeconds']
        title = video_json['Title']

-        formats = [{'url': fmt['Url'],
+        formats = []
+
+        fmts = video_json.get('Formats')
+        if fmts:
+            formats.extend([
+                {
+                    'url': fmt['Url'],
                    'ext': fmt['Extension'],
                    'width': fmt['Width'],
                    'height': fmt['Height'],
                    'filesize': fmt['FileSize'],
                    'format_id': str(fmt['Resolution'])
-                    } for fmt in video_json['Formats']]
+                } for fmt in fmts])
+
+        prioritized_streams = video_json.get('PrioritizedStreams')
+        if prioritized_streams:
+            formats.extend([
+                {
+                    'url': video_url,
+                    'width': int_or_none(format_id),
+                    'format_id': format_id,
+                } for format_id, video_url in prioritized_streams['0'].items()
+            ])

        self._sort_formats(formats)

@ -91,7 +111,7 @@ class LyndaIE(SubtitlesInfoExtractor):
            'stayPut': 'false'
        }        
        request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
-        login_page = self._download_webpage(request, None, note='Logging in as %s' % username)
+        login_page = self._download_webpage(request, None, 'Logging in as %s' % username)

        # Not (yet) logged in
        m = re.search(r'loginResultJson = \'(?P<json>[^\']+)\';', login_page)
@ -116,7 +136,7 @@ class LyndaIE(SubtitlesInfoExtractor):
                    'stayPut': 'false',
                }
                request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(confirm_form))
-                login_page = self._download_webpage(request, None, note='Confirming log in and log out from another device')
+                login_page = self._download_webpage(request, None, 'Confirming log in and log out from another device')

        if re.search(self._SUCCESSFUL_LOGIN_REGEX, login_page) is None:
            raise ExtractorError('Unable to log in')
@ -150,7 +170,7 @@ class LyndaIE(SubtitlesInfoExtractor):

    def _get_available_subtitles(self, video_id, webpage):
        url = 'http://www.lynda.com/ajax/player?videoId=%s&type=transcript' % video_id
-        sub = self._download_webpage(url, None, note=False)
+        sub = self._download_webpage(url, None, False)
        sub_json = json.loads(sub)
        return {'en': url} if len(sub_json) > 0 else {}

@ -179,6 +199,9 @@ class LyndaCourseIE(InfoExtractor):
        videos = []
        (username, _) = self._get_login_info()

+        # Might want to extract videos right here from video['Formats'] as it seems 'Formats' is not provided
+        # by single video API anymore
+
        for chapter in course_json['Chapters']:
            for video in chapter['Videos']:
                if username is None and video['HasAccess'] is False:
--- a/youtube_dl/extractor/mtv.py
+++ b/youtube_dl/extractor/mtv.py
@ -5,9 +5,12 @@ import re
 from .common import InfoExtractor
 from ..utils import (
    compat_urllib_parse,
+    compat_urllib_request,
    ExtractorError,
    find_xpath_attr,
    fix_xml_ampersands,
+    HEADRequest,
+    unescapeHTML,
    url_basename,
    RegexNotFoundError,
 )
@ -18,6 +21,7 @@ def _media_xml_tag(tag):


 class MTVServicesInfoExtractor(InfoExtractor):
+    _MOBILE_TEMPLATE = None
    @staticmethod
    def _id_from_uri(uri):
        return uri.split(':')[-1]
@ -39,9 +43,29 @@ class MTVServicesInfoExtractor(InfoExtractor):
        else:
            return thumb_node.attrib['url']

-    def _extract_video_formats(self, mdoc):
-        if re.match(r'.*/error_country_block\.swf$', mdoc.find('.//src').text) is not None:
-            raise ExtractorError('This video is not available from your country.', expected=True)
+    def _extract_mobile_video_formats(self, mtvn_id):
+        webpage_url = self._MOBILE_TEMPLATE % mtvn_id
+        req = compat_urllib_request.Request(webpage_url)
+        # Otherwise we get a webpage that would execute some javascript
+        req.add_header('Youtubedl-user-agent', 'curl/7')
+        webpage = self._download_webpage(req, mtvn_id,
+            'Downloading mobile page')
+        metrics_url = unescapeHTML(self._search_regex(r'<a href="(http://metrics.+?)"', webpage, 'url'))
+        req = HEADRequest(metrics_url)
+        response = self._request_webpage(req, mtvn_id, 'Resolving url')
+        url = response.geturl()
+        # Transform the url to get the best quality:
+        url = re.sub(r'.+pxE=mp4', 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=0+_pxK=18639+_pxE=mp4', url, 1)
+        return [{'url': url,'ext': 'mp4'}]
+
+    def _extract_video_formats(self, mdoc, mtvn_id):
+        if re.match(r'.*/(error_country_block\.swf|geoblock\.mp4)$', mdoc.find('.//src').text) is not None:
+            if mtvn_id is not None and self._MOBILE_TEMPLATE is not None:
+                self.to_screen('The normal version is not available from your '
+                    'country, trying with the mobile version')
+                return self._extract_mobile_video_formats(mtvn_id)
+            raise ExtractorError('This video is not available from your country.',
+                expected=True)

        formats = []
        for rendition in mdoc.findall('.//rendition'):
@ -94,9 +118,16 @@ class MTVServicesInfoExtractor(InfoExtractor):
            raise ExtractorError('Could not find video title')
        title = title.strip()

+        # This a short id that's used in the webpage urls
+        mtvn_id = None
+        mtvn_id_node = find_xpath_attr(itemdoc, './/{http://search.yahoo.com/mrss/}category',
+                'scheme', 'urn:mtvn:id')
+        if mtvn_id_node is not None:
+            mtvn_id = mtvn_id_node.text
+
        return {
            'title': title,
-            'formats': self._extract_video_formats(mediagen_doc),
+            'formats': self._extract_video_formats(mediagen_doc, mtvn_id),
            'id': video_id,
            'thumbnail': self._get_thumbnail_url(uri, itemdoc),
            'description': description,
--- a/youtube_dl/extractor/myvideo.py
+++ b/youtube_dl/extractor/myvideo.py
@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
 import binascii
 import base64
 import hashlib
@ -14,18 +16,16 @@ from ..utils import (
 )


-
 class MyVideoIE(InfoExtractor):
-    """Information Extractor for myvideo.de."""
-
-    _VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/(?:[^/]+/)?watch/([0-9]+)/([^?/]+).*'
-    IE_NAME = u'myvideo'
+    _VALID_URL = r'http://(?:www\.)?myvideo\.de/(?:[^/]+/)?watch/(?P<id>[0-9]+)/[^?/]+.*'
+    IE_NAME = 'myvideo'
    _TEST = {
-        u'url': u'http://www.myvideo.de/watch/8229274/bowling_fail_or_win',
-        u'file': u'8229274.flv',
-        u'md5': u'2d2753e8130479ba2cb7e0a37002053e',
-        u'info_dict': {
-            u"title": u"bowling-fail-or-win"
+        'url': 'http://www.myvideo.de/watch/8229274/bowling_fail_or_win',
+        'md5': '2d2753e8130479ba2cb7e0a37002053e',
+        'info_dict': {
+            'id': '8229274',
+            'ext': 'flv',
+            'title': 'bowling-fail-or-win',
        }
    }

@ -53,10 +53,7 @@ class MyVideoIE(InfoExtractor):

    def _real_extract(self,url):
        mobj = re.match(self._VALID_URL, url)
-        if mobj is None:
-            raise ExtractorError(u'invalid URL: %s' % url)
-
-        video_id = mobj.group(1)
+        video_id = mobj.group('id')

        GK = (
          b'WXpnME1EZGhNRGhpTTJNM01XVmhOREU0WldNNVpHTTJOakpt'
@ -74,37 +71,33 @@ class MyVideoIE(InfoExtractor):
            video_url = mobj.group(1) + '.flv'

            video_title = self._html_search_regex('<title>([^<]+)</title>',
-                webpage, u'title')
+                webpage, 'title')

-            video_ext = self._search_regex('[.](.+?)$', video_url, u'extension')
-
-            return [{
-                'id':       video_id,
-                'url':      video_url,
-                'uploader': None,
-                'upload_date':  None,
-                'title':    video_title,
-                'ext':      video_ext,
-            }]
+            return {
+                'id': video_id,
+                'url': video_url,
+                'title': video_title,
+            }

        mobj = re.search(r'data-video-service="/service/data/video/%s/config' % video_id, webpage)
        if mobj is not None:
            request = compat_urllib_request.Request('http://www.myvideo.de/service/data/video/%s/config' % video_id, '')
            response = self._download_webpage(request, video_id,
-                                              u'Downloading video info')
+                                              'Downloading video info')
            info = json.loads(base64.b64decode(response).decode('utf-8'))
-            return {'id': video_id,
-                    'title': info['title'],
-                    'url': info['streaming_url'].replace('rtmpe', 'rtmpt'),
-                    'play_path': info['filename'],
-                    'ext': 'flv',
-                    'thumbnail': info['thumbnail'][0]['url'],
-                    }
+            return {
+                'id': video_id,
+                'title': info['title'],
+                'url': info['streaming_url'].replace('rtmpe', 'rtmpt'),
+                'play_path': info['filename'],
+                'ext': 'flv',
+                'thumbnail': info['thumbnail'][0]['url'],
+            }

        # try encxml
        mobj = re.search('var flashvars={(.+?)}', webpage)
        if mobj is None:
-            raise ExtractorError(u'Unable to extract video')
+            raise ExtractorError('Unable to extract video')

        params = {}
        encxml = ''
@ -118,7 +111,7 @@ class MyVideoIE(InfoExtractor):
            params['domain'] = 'www.myvideo.de'
        xmldata_url = '%s?%s' % (encxml, compat_urllib_parse.urlencode(params))
        if 'flash_playertype=MTV' in xmldata_url:
-            self._downloader.report_warning(u'avoiding MTV player')
+            self._downloader.report_warning('avoiding MTV player')
            xmldata_url = (
                'http://www.myvideo.de/dynamic/get_player_video_xml.php'
                '?flash_playertype=D&ID=%s&_countlimit=4&autorun=yes'
@ -144,7 +137,7 @@ class MyVideoIE(InfoExtractor):
            video_url = compat_urllib_parse.unquote(mobj.group(1))
            if 'myvideo2flash' in video_url:
                self.report_warning(
-                    u'Rewriting URL to use unencrypted rtmp:// ...',
+                    'Rewriting URL to use unencrypted rtmp:// ...',
                    video_id)
                video_url = video_url.replace('rtmpe://', 'rtmp://')

@ -152,39 +145,31 @@ class MyVideoIE(InfoExtractor):
            # extract non rtmp videos
            mobj = re.search('path=\'(http.*?)\' source=\'(.*?)\'', dec_data)
            if mobj is None:
-                raise ExtractorError(u'unable to extract url')
+                raise ExtractorError('unable to extract url')
            video_url = compat_urllib_parse.unquote(mobj.group(1)) + compat_urllib_parse.unquote(mobj.group(2))

-        video_file = self._search_regex('source=\'(.*?)\'', dec_data, u'video file')
+        video_file = self._search_regex('source=\'(.*?)\'', dec_data, 'video file')
        video_file = compat_urllib_parse.unquote(video_file)

        if not video_file.endswith('f4m'):
            ppath, prefix = video_file.split('.')
            video_playpath = '%s:%s' % (prefix, ppath)
-            video_hls_playlist = ''
        else:
            video_playpath = ''
-            video_hls_playlist = (
-                video_file
-            ).replace('.f4m', '.m3u8')

-        video_swfobj = self._search_regex('swfobject.embedSWF\(\'(.+?)\'', webpage, u'swfobj')
+        video_swfobj = self._search_regex('swfobject.embedSWF\(\'(.+?)\'', webpage, 'swfobj')
        video_swfobj = compat_urllib_parse.unquote(video_swfobj)

        video_title = self._html_search_regex("<h1(?: class='globalHd')?>(.*?)</h1>",
-            webpage, u'title')
+            webpage, 'title')

-        return [{
-            'id':                 video_id,
-            'url':                video_url,
-            'tc_url':             video_url,
-            'uploader':           None,
-            'upload_date':        None,
-            'title':              video_title,
-            'ext':                u'flv',
-            'play_path':          video_playpath,
-            'video_file':         video_file,
-            'video_hls_playlist': video_hls_playlist,
-            'player_url':         video_swfobj,
-        }]
+        return {
+            'id': video_id,
+            'url': video_url,
+            'tc_url': video_url,
+            'title': video_title,
+            'ext': 'flv',
+            'play_path': video_playpath,
+            'player_url': video_swfobj,
+        }

--- a/youtube_dl/extractor/photobucket.py
+++ b/youtube_dl/extractor/photobucket.py
@ -1,76 +1,43 @@
+from __future__ import unicode_literals
+
 import datetime
 import json
 import re

 from .common import InfoExtractor

-from ..utils import (
-    ExtractorError,
-)

 class PhotobucketIE(InfoExtractor):
-    """Information extractor for photobucket.com."""
-
-    # TODO: the original _VALID_URL was:
-    # r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)'
-    # Check if it's necessary to keep the old extracion process
-    _VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*(([\?\&]current=)|_)(?P<id>.*)\.(?P<ext>(flv)|(mp4))'
-    IE_NAME = u'photobucket'
+    _VALID_URL = r'http://(?:[a-z0-9]+\.)?photobucket\.com/.*(([\?\&]current=)|_)(?P<id>.*)\.(?P<ext>(flv)|(mp4))'
    _TEST = {
-        u'url': u'http://media.photobucket.com/user/rachaneronas/media/TiredofLinkBuildingTryBacklinkMyDomaincom_zpsc0c3b9fa.mp4.html?filters[term]=search&filters[primary]=videos&filters[secondary]=images&sort=1&o=0',
-        u'file': u'zpsc0c3b9fa.mp4',
-        u'md5': u'7dabfb92b0a31f6c16cebc0f8e60ff99',
-        u'info_dict': {
-            u"upload_date": u"20130504", 
-            u"uploader": u"rachaneronas", 
-            u"title": u"Tired of Link Building? Try BacklinkMyDomain.com!"
+        'url': 'http://media.photobucket.com/user/rachaneronas/media/TiredofLinkBuildingTryBacklinkMyDomaincom_zpsc0c3b9fa.mp4.html?filters[term]=search&filters[primary]=videos&filters[secondary]=images&sort=1&o=0',
+        'file': 'zpsc0c3b9fa.mp4',
+        'md5': '7dabfb92b0a31f6c16cebc0f8e60ff99',
+        'info_dict': {
+            'upload_date': '20130504',
+            'uploader': 'rachaneronas',
+            'title': 'Tired of Link Building? Try BacklinkMyDomain.com!',
        }
    }

    def _real_extract(self, url):
-        # Extract id from URL
        mobj = re.match(self._VALID_URL, url)
-        if mobj is None:
-            raise ExtractorError(u'Invalid URL: %s' % url)
-
        video_id = mobj.group('id')
-
        video_extension = mobj.group('ext')

-        # Retrieve video webpage to extract further information
        webpage = self._download_webpage(url, video_id)

        # Extract URL, uploader, and title from webpage
        self.report_extraction(video_id)
-        # We try first by looking the javascript code:
-        mobj = re.search(r'Pb\.Data\.Shared\.put\(Pb\.Data\.Shared\.MEDIA, (?P<json>.*?)\);', webpage)
-        if mobj is not None:
-            info = json.loads(mobj.group('json'))
-            return [{
-                'id':       video_id,
-                'url':      info[u'downloadUrl'],
-                'uploader': info[u'username'],
-                'upload_date':  datetime.date.fromtimestamp(info[u'creationDate']).strftime('%Y%m%d'),
-                'title':    info[u'title'],
-                'ext':      video_extension,
-                'thumbnail': info[u'thumbUrl'],
-            }]
-
-        # We try looking in other parts of the webpage
-        video_url = self._search_regex(r'<link rel="video_src" href=".*\?file=([^"]+)" />',
-            webpage, u'video URL')
-
-        mobj = re.search(r'<title>(.*) video by (.*) - Photobucket</title>', webpage)
-        if mobj is None:
-            raise ExtractorError(u'Unable to extract title')
-        video_title = mobj.group(1).decode('utf-8')
-        video_uploader = mobj.group(2).decode('utf-8')
-
-        return [{
-            'id':       video_id.decode('utf-8'),
-            'url':      video_url.decode('utf-8'),
-            'uploader': video_uploader,
-            'upload_date':  None,
-            'title':    video_title,
-            'ext':      video_extension.decode('utf-8'),
-        }]
+        info_json = self._search_regex(r'Pb\.Data\.Shared\.put\(Pb\.Data\.Shared\.MEDIA, (.*?)\);',
+            webpage, 'info json')
+        info = json.loads(info_json)
+        return {
+            'id': video_id,
+            'url': info['downloadUrl'],
+            'uploader': info['username'],
+            'upload_date': datetime.date.fromtimestamp(info['creationDate']).strftime('%Y%m%d'),
+            'title': info['title'],
+            'ext': video_extension,
+            'thumbnail': info['thumbUrl'],
+        }
--- a/youtube_dl/extractor/pornhub.py
+++ b/youtube_dl/extractor/pornhub.py
@ -44,7 +44,7 @@ class PornHubIE(InfoExtractor):

        video_urls = list(map(compat_urllib_parse.unquote , re.findall(r'"quality_[0-9]{3}p":"([^"]+)', webpage)))
        if webpage.find('"encrypted":true') != -1:
-            password = self._html_search_regex(r'"video_title":"([^"]+)', webpage, 'password').replace('+', ' ')
+            password = compat_urllib_parse.unquote_plus(self._html_search_regex(r'"video_title":"([^"]+)', webpage, 'password'))
            video_urls = list(map(lambda s: aes_decrypt_text(s, password, 32).decode('utf-8'), video_urls))

        formats = []
--- a/youtube_dl/extractor/soundcloud.py
+++ b/youtube_dl/extractor/soundcloud.py
@ -54,6 +54,7 @@ class SoundcloudIE(InfoExtractor):
                'id': '47127627',
                'ext': 'mp3',
                'title': 'Goldrushed',
+                'description': 'From Stockholm Sweden\r\nPovel / Magnus / Filip / David\r\nwww.theroyalconcept.com',
                'uploader': 'The Royal Concept',
                'upload_date': '20120521',
            },
--- a/youtube_dl/extractor/spike.py
+++ b/youtube_dl/extractor/spike.py
@ -1,10 +1,15 @@
 from __future__ import unicode_literals

+import re
+
 from .mtv import MTVServicesInfoExtractor


 class SpikeIE(MTVServicesInfoExtractor):
-    _VALID_URL = r'https?://www\.spike\.com/(video-clips|episodes)/.+'
+    _VALID_URL = r'''(?x)https?://
+        (www\.spike\.com/(video-clips|episodes)/.+|
+         m\.spike\.com/videos/video.rbml\?id=(?P<mobile_id>[^&]+))
+        '''
    _TEST = {
        'url': 'http://www.spike.com/video-clips/lhtu8m/auction-hunters-can-allen-ride-a-hundred-year-old-motorcycle',
        'md5': '1a9265f32b0c375793d6c4ce45255256',
@ -17,3 +22,11 @@ class SpikeIE(MTVServicesInfoExtractor):
    }

    _FEED_URL = 'http://www.spike.com/feeds/mrss/'
+    _MOBILE_TEMPLATE = 'http://m.spike.com/videos/video.rbml?id=%s'
+
+    def _real_extract(self, url):
+        mobj = re.search(self._VALID_URL, url)
+        mobile_id = mobj.group('mobile_id')
+        if mobile_id is not None:
+            url = 'http://www.spike.com/video-clips/%s' % mobile_id
+        return super(SpikeIE, self)._real_extract(url)
--- a/youtube_dl/extractor/vesti.py
+++ b/youtube_dl/extractor/vesti.py
@ -13,7 +13,7 @@ from ..utils import (
 class VestiIE(InfoExtractor):
    IE_NAME = 'vesti'
    IE_DESC = 'Вести.Ru'
-    _VALID_URL = r'http://(?:.+?\.)?(?:vesti\.ru|russia\.tv)/(?P<id>.+)'
+    _VALID_URL = r'http://(?:(?:.+?\.)?vesti\.ru|(?:2\.)?russia\.tv|tvkultura\.ru|rutv\.ru)/(?P<id>.+)'

    _TESTS = [
        {
@ -72,6 +72,35 @@ class VestiIE(InfoExtractor):
                'skip_download': True,
            },
        },
+        {
+            'url': 'http://sochi2014.vesti.ru/video/index/video_id/766403',
+            'info_dict': {
+                'id': '766403',
+                'ext': 'mp4',
+                'title': 'XXII зимние Олимпийские игры. Российские хоккеисты стартовали на Олимпиаде с победы',
+                'description': 'md5:55805dfd35763a890ff50fa9e35e31b3',
+                'duration': 271,
+            },
+            'params': {
+                # m3u8 download
+                'skip_download': True,
+            },
+            'skip': 'Blocked outside Russia'
+        },
+        {
+            'url': 'http://sochi2014.vesti.ru/live/play/live_id/301',
+            'info_dict': {
+                'id': '51499',
+                'ext': 'flv',
+                'title': 'Сочи-2014. Биатлон. Индивидуальная гонка. Мужчины ',
+                'description': 'md5:9e0ed5c9d2fa1efbfdfed90c9a6d179c',
+            },
+            'params': {
+                # rtmp download
+                'skip_download': True,
+            },
+            'skip': 'Translation has finished'
+        },
        {
            'url': 'http://russia.tv/video/show/brand_id/5169/episode_id/970443/video_id/975648',
            'info_dict': {
@ -101,34 +130,47 @@ class VestiIE(InfoExtractor):
            },
        },
        {
-            'url': 'http://sochi2014.vesti.ru/video/index/video_id/766403',
+            'url': 'http://2.russia.tv/video/show/brand_id/48863/episode_id/972920/video_id/978667/viewtype/picture',
            'info_dict': {
-                'id': '766403',
+                'id': '775081',
                'ext': 'mp4',
-                'title': 'XXII зимние Олимпийские игры. Российские хоккеисты стартовали на Олимпиаде с победы',
-                'description': 'md5:55805dfd35763a890ff50fa9e35e31b3',
-                'duration': 271,
+                'title': 'XXII зимние Олимпийские игры. Россияне заняли весь пьедестал в лыжных гонках',
+                'description': 'md5:15d3741dd8d04b203fbc031c6a47fb0f',
+                'duration': 101,
            },
            'params': {
                # m3u8 download
                'skip_download': True,
            },
-            'skip': 'Blocked outside Russia'
        },
        {
-            'url': 'http://sochi2014.vesti.ru/live/play/live_id/301',
+            'url': 'http://tvkultura.ru/video/show/brand_id/31724/episode_id/972347/video_id/978186',
            'info_dict': {
-                'id': '51499',
-                'ext': 'flv',
-                'title': 'Сочи-2014. Биатлон. Индивидуальная гонка. Мужчины ',
-                'description': 'md5:9e0ed5c9d2fa1efbfdfed90c9a6d179c',
+                'id': '774471',
+                'ext': 'mp4',
+                'title': 'Монологи на все времена',
+                'description': 'md5:18d8b5e6a41fb1faa53819471852d5d5',
+                'duration': 2906,
            },
            'params': {
-                # rtmp download
+                # m3u8 download
                'skip_download': True,
            },
-            'skip': 'Translation has finished'
-        }
+        },
+        {
+            'url': 'http://rutv.ru/brand/show/id/6792/channel/75',
+            'info_dict': {
+                'id': '125521',
+                'ext': 'mp4',
+                'title': 'Грустная дама червей. Х/ф',
+                'description': '',
+                'duration': 4882,
+            },
+            'params': {
+                # m3u8 download
+                'skip_download': True,
+            },
+        },
    ]

    def _real_extract(self, url):
--- a/youtube_dl/extractor/videodetective.py
+++ b/youtube_dl/extractor/videodetective.py
@ -1,22 +1,23 @@
+from __future__ import unicode_literals
+
 import re

 from .common import InfoExtractor
 from .internetvideoarchive import InternetVideoArchiveIE
-from ..utils import (
-    compat_urlparse,
-)
+from ..utils import compat_urlparse


 class VideoDetectiveIE(InfoExtractor):
    _VALID_URL = r'https?://www\.videodetective\.com/[^/]+/[^/]+/(?P<id>\d+)'

    _TEST = {
-        u'url': u'http://www.videodetective.com/movies/kick-ass-2/194487',
-        u'file': u'194487.mp4',
-        u'info_dict': {
-            u'title': u'KICK-ASS 2',
-            u'description': u'md5:65ba37ad619165afac7d432eaded6013',
-            u'duration': 135,
+        'url': 'http://www.videodetective.com/movies/kick-ass-2/194487',
+        'info_dict': {
+            'id': '194487',
+            'ext': 'mp4',
+            'title': 'KICK-ASS 2',
+            'description': 'md5:65ba37ad619165afac7d432eaded6013',
+            'duration': 135,
        },
    }

@ -26,5 +27,4 @@ class VideoDetectiveIE(InfoExtractor):
        webpage = self._download_webpage(url, video_id)
        og_video = self._og_search_video_url(webpage)
        query = compat_urlparse.urlparse(og_video).query
-        return self.url_result(InternetVideoArchiveIE._build_url(query),
-            ie=InternetVideoArchiveIE.ie_key())
+        return self.url_result(InternetVideoArchiveIE._build_url(query), ie=InternetVideoArchiveIE.ie_key())
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@ -8,6 +8,7 @@ import itertools
 from .common import InfoExtractor
 from .subtitles import SubtitlesInfoExtractor
 from ..utils import (
+    compat_HTTPError,
    compat_urllib_parse,
    compat_urllib_request,
    clean_html,
@ -172,7 +173,18 @@ class VimeoIE(SubtitlesInfoExtractor):

        # Retrieve video webpage to extract further information
        request = compat_urllib_request.Request(url, None, headers)
-        webpage = self._download_webpage(request, video_id)
+        try:
+            webpage = self._download_webpage(request, video_id)
+        except ExtractorError as ee:
+            if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403:
+                errmsg = ee.cause.read()
+                if b'Because of its privacy settings, this video cannot be played here' in errmsg:
+                    raise ExtractorError(
+                        'Cannot download embed-only video without embedding '
+                        'URL. Please call youtube-dl with the URL of the page '
+                        'that embeds this video.',
+                        expected=True)
+            raise

        # Now we begin extracting as much information as we can from what we
        # retrieved. First we extract the information common to all extractors,
--- a/youtube_dl/extractor/vube.py
+++ b/youtube_dl/extractor/vube.py
@ -13,7 +13,7 @@ class VubeIE(InfoExtractor):

    _TEST = {
        'url': 'http://vube.com/Chiara+Grispo+Video+Channel/YL2qNPkqon',
-        'md5': 'f81dcf6d0448e3291f54380181695821',
+        'md5': 'db7aba89d4603dadd627e9d1973946fe',
        'info_dict': {
            'id': 'YL2qNPkqon',
            'ext': 'mp4',
@ -77,4 +77,4 @@ class VubeIE(InfoExtractor):
            'like_count': like_count,
            'dislike_count': dislike_count,
            'comment_count': comment_count,
-        }
+        }
--- a/youtube_dl/extractor/xnxx.py
+++ b/youtube_dl/extractor/xnxx.py
@ -1,55 +1,49 @@
+# encoding: utf-8
+from __future__ import unicode_literals
+
 import re

 from .common import InfoExtractor
 from ..utils import (
    compat_urllib_parse,
-
-    ExtractorError,
 )


 class XNXXIE(InfoExtractor):
-    _VALID_URL = r'^(?:https?://)?(?:video|www)\.xnxx\.com/video([0-9]+)/(.*)'
-    VIDEO_URL_RE = r'flv_url=(.*?)&amp;'
-    VIDEO_TITLE_RE = r'<title>(.*?)\s+-\s+XNXX.COM'
-    VIDEO_THUMB_RE = r'url_bigthumb=(.*?)&amp;'
+    _VALID_URL = r'^https?://(?:video|www)\.xnxx\.com/video(?P<id>[0-9]+)/(.*)'
    _TEST = {
-        u'url': u'http://video.xnxx.com/video1135332/lida_naked_funny_actress_5_',
-        u'file': u'1135332.flv',
-        u'md5': u'0831677e2b4761795f68d417e0b7b445',
-        u'info_dict': {
-            u"title": u"lida \u00bb Naked Funny Actress  (5)",
-            u"age_limit": 18,
+        'url': 'http://video.xnxx.com/video1135332/lida_naked_funny_actress_5_',
+        'md5': '0831677e2b4761795f68d417e0b7b445',
+        'info_dict': {
+            'id': '1135332',
+            'ext': 'flv',
+            'title': 'lida » Naked Funny Actress  (5)',
+            'age_limit': 18,
        }
    }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
-        if mobj is None:
-            raise ExtractorError(u'Invalid URL: %s' % url)
-        video_id = mobj.group(1)
+        video_id = mobj.group('id')

        # Get webpage content
        webpage = self._download_webpage(url, video_id)

-        video_url = self._search_regex(self.VIDEO_URL_RE,
-            webpage, u'video URL')
+        video_url = self._search_regex(r'flv_url=(.*?)&amp;',
+            webpage, 'video URL')
        video_url = compat_urllib_parse.unquote(video_url)

-        video_title = self._html_search_regex(self.VIDEO_TITLE_RE,
-            webpage, u'title')
+        video_title = self._html_search_regex(r'<title>(.*?)\s+-\s+XNXX.COM',
+            webpage, 'title')

-        video_thumbnail = self._search_regex(self.VIDEO_THUMB_RE,
-            webpage, u'thumbnail', fatal=False)
+        video_thumbnail = self._search_regex(r'url_bigthumb=(.*?)&amp;',
+            webpage, 'thumbnail', fatal=False)

-        return [{
+        return {
            'id': video_id,
            'url': video_url,
-            'uploader': None,
-            'upload_date': None,
            'title': video_title,
            'ext': 'flv',
            'thumbnail': video_thumbnail,
-            'description': None,
            'age_limit': 18,
-        }]
+        }
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@ -1263,3 +1263,7 @@ def read_batch_urls(batch_fd):

    with contextlib.closing(batch_fd) as fd:
        return [url for url in map(fixup, fd) if url]
+
+
+def urlencode_postdata(*args, **kargs):
+    return compat_urllib_parse.urlencode(*args, **kargs).encode('ascii')
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@ -1,2 +1,2 @@

-__version__ = '2014.03.06'
+__version__ = '2014.03.10'
Author	SHA1	Message	Date
Philipp Hagemeister	963d7ec412	release 2014.03.10	2014-03-10 13:04:20 +01:00
Philipp Hagemeister	e712d94adf	Merge branch 'master' of github.com:rg3/youtube-dl	2014-03-10 13:03:52 +01:00
Philipp Hagemeister	6a72423955	[generic] Use a different URL for the generic RSS test (Closes #2532 )	2014-03-10 13:03:39 +01:00
Jaime Marquínez Ferrándiz	4126826b10	[photobucket] More unicode literals	2014-03-10 12:59:19 +01:00
Sergey M․	b773ead7fd	[vesti] Add support for more sites (Closes #2534 )	2014-03-10 18:52:00 +07:00
Philipp Hagemeister	855e2750bc	Credit @mharrys for aftonbladet	2014-03-10 10:30:17 +01:00
Philipp Hagemeister	805ef3c60b	Correct automatic resolution determination	2014-03-10 10:29:25 +01:00
Philipp Hagemeister	fbc2dcb40b	[aftonbladet] Modernize	2014-03-10 10:28:56 +01:00
Philipp Hagemeister	5375d7ad84	Merge remote-tracking branch 'mharrys/aftonbladet'	2014-03-10 10:23:45 +01:00
Jaime Marquínez Ferrándiz	90f3476180	[photobucket] Modernize and remove the old extraction code	2014-03-09 19:36:46 +01:00
Jaime Marquínez Ferrándiz	ee95c09333	[pornhub] Use compat_urllib_parse.unquote_plus (#2531 )	2014-03-09 19:16:25 +01:00
Jaime Marquínez Ferrándiz	75d06db9fc	Merge branch 'pornhub_unquote_password' of github.com:MikeCol/youtube-dl	2014-03-09 19:15:33 +01:00
Jaime Marquínez Ferrándiz	439a1fffcb	[myvideo] Modernize	2014-03-09 18:58:34 +01:00
Jaime Marquínez Ferrándiz	9d9d70c462	[facebook] Modernize	2014-03-09 18:42:44 +01:00
Jaime Marquínez Ferrándiz	b4a186b7be	[jukebox] Modernize and add a test	2014-03-09 18:33:17 +01:00
Jaime Marquínez Ferrándiz	bdebf51c8f	[xnxx] Modernize	2014-03-09 18:31:39 +01:00
MikeCol	264b86f9b4	Unquote password	2014-03-09 18:26:18 +01:00
Philipp Hagemeister	9e55e37a2e	Merge remote-tracking branch 'origin/master'	2014-03-09 18:08:16 +01:00
Jaime Marquínez Ferrándiz	1471956573	Add a basic test suite for the InfoExtractor class	2014-03-09 17:05:29 +01:00
Mattias Harrysson	27865b2169	[aftonbladet] add extractor for aftonbladet.se	2014-03-09 16:59:18 +01:00
Jaime Marquínez Ferrándiz	6d07ce0162	YoutubeDL: If the logger is set call its `warning` method in `report_warning`	2014-03-09 15:16:54 +01:00
Sergey M․	edb7fc5435	[videodetective] Modernize	2014-03-09 18:39:39 +07:00
Jaime Marquínez Ferrándiz	31f77343f2	[vube] Update the test's checksum	2014-03-09 12:27:38 +01:00
Jaime Marquínez Ferrándiz	63ad031583	[soundcloud] Add the description field to the second test	2014-03-09 12:26:58 +01:00
Jaime Marquínez Ferrándiz	957688cee6	[ustream:channel] Update test's number of entries	2014-03-09 12:03:49 +01:00
Jaime Marquínez Ferrándiz	806d6c2e8c	[gamekings] Modernize and update the test's description field	2014-03-09 11:57:30 +01:00
Jaime Marquínez Ferrándiz	0ef68e04d9	[mtv] Transform the urls from the mobile version to get the best quality And don't report a warning, just log a message, it allows to pass the test from Europe.	2014-03-08 22:09:42 +01:00
Sergey M․	a496524db2	[collegehumor] Replace youtube test	2014-03-09 03:21:26 +07:00
Jaime Marquínez Ferrándiz	935c7360cc	[spike] Add support for mobile urls	2014-03-08 21:10:21 +01:00
Jaime Marquínez Ferrándiz	340b046876	[spike] Add support for downloading the mobile version if the normal version is geoblocked	2014-03-08 20:59:11 +01:00
Jaime Marquínez Ferrándiz	cc1db7f9b7	[mtv] Improve detection of geoblocked videos	2014-03-08 19:46:34 +01:00
Philipp Hagemeister	a4ff6c4762	[arte] Raise a proper error when no video is found	2014-03-08 16:04:03 +01:00
Philipp Hagemeister	1060425cbb	[vimeo] Add a better error message for embed-only videos (#2527 )	2014-03-08 12:25:09 +01:00
Jaime Marquínez Ferrándiz	e9c092f125	YoutubeDL: Use its `urlopen` method for downloading the thumbnail.	2014-03-07 16:43:34 +01:00
Jaime Marquínez Ferrándiz	22ff5d2105	[http] Use the `YoutubeDL.urlopen` method	2014-03-07 16:41:42 +01:00
Sergey M․	136db7881b	[lynda] Modernize	2014-03-07 22:11:01 +07:00
Philipp Hagemeister	dae313e725	release 2014.03.07.1	2014-03-07 15:59:10 +01:00
Jaime Marquínez Ferrándiz	b74fa8cd2c	[facebook] Fix login process It was broken and didn't work in python 3. And use `_download_webpage` instead of `compat_urllib_request.urlopen`.	2014-03-07 15:25:33 +01:00
Philipp Hagemeister	94eae04c94	release 2014.03.07	2014-03-07 06:41:48 +01:00
Sergey M․	16ff7ebc77	[lynda] Fix successful login regex and fix formats extraction (Closes #2520 )	2014-03-07 06:56:48 +07:00