release 2014.01.30

[liveleak] Support multiple formats (Fixes #2262 )
Merge pull request #2272 from dstftw/master
2025-08-03 02:50:01 -05:00 · 2014-01-30 04:52:54 +01:00 · 2014-01-30 04:52:50 +01:00 · 2014-01-29 14:58:14 -08:00 · 2014-01-30 04:33:00 +07:00 · 2014-01-30 04:26:46 +07:00
34 changed files with 801 additions and 495 deletions
--- a/test/test_all_urls.py
+++ b/test/test_all_urls.py
@@ -120,5 +120,9 @@ class TestAllURLsMatching(unittest.TestCase):
    def test_soundcloud_not_matching_sets(self):
        self.assertMatch('http://soundcloud.com/floex/sets/gone-ep', ['soundcloud:set'])

+    def test_tumblr(self):
+        self.assertMatch('http://tatianamaslanydaily.tumblr.com/post/54196191430/orphan-black-dvd-extra-behind-the-scenes', ['Tumblr'])
+        self.assertMatch('http://tatianamaslanydaily.tumblr.com/post/54196191430', ['Tumblr'])
+
 if __name__ == '__main__':
    unittest.main()
--- a/test/test_playlists.py
+++ b/test/test_playlists.py
@@ -33,6 +33,7 @@ from youtube_dl.extractor import (
    ImdbListIE,
    KhanAcademyIE,
    EveryonesMixtapeIE,
+    RutubeChannelIE,
 )


@@ -195,11 +196,11 @@ class TestPlaylists(unittest.TestCase):
    def test_imdb_list(self):
        dl = FakeYDL()
        ie = ImdbListIE(dl)
-        result = ie.extract('http://www.imdb.com/list/sMjedvGDd8U')
+        result = ie.extract('http://www.imdb.com/list/JFs9NWw6XI0')
        self.assertIsPlaylist(result)
-        self.assertEqual(result['id'], 'sMjedvGDd8U')
-        self.assertEqual(result['title'], 'Animated and Family Films')
-        self.assertTrue(len(result['entries']) >= 48)
+        self.assertEqual(result['id'], 'JFs9NWw6XI0')
+        self.assertEqual(result['title'], 'March 23, 2012 Releases')
+        self.assertEqual(len(result['entries']), 7)

    def test_khanacademy_topic(self):
        dl = FakeYDL()
@@ -219,6 +220,14 @@ class TestPlaylists(unittest.TestCase):
        self.assertEqual(result['id'], 'm7m0jJAbMQi')
        self.assertEqual(result['title'], 'Driving')
        self.assertEqual(len(result['entries']), 24)
+        
+    def test_rutube_channel(self):
+        dl = FakeYDL()
+        ie = RutubeChannelIE(dl)
+        result = ie.extract('http://rutube.ru/tags/video/1409')
+        self.assertIsPlaylist(result)
+        self.assertEqual(result['id'], '1409')
+        self.assertTrue(len(result['entries']) >= 34)


 if __name__ == '__main__':
--- a/youtube_dl/init.py
+++ b/youtube_dl/init.py
@@ -40,6 +40,7 @@ __authors__  = (
    'Michael Orlitzky',
    'Chris Gahan',
    'Saimadhav Heblikar',
+    'Mike Col',
 )

 __license__ = 'Public Domain'
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@@ -27,6 +27,7 @@ from .cbs import CBSIE
 from .channel9 import Channel9IE
 from .cinemassacre import CinemassacreIE
 from .clipfish import ClipfishIE
+from .cliphunter import CliphunterIE
 from .clipsyndicate import ClipsyndicateIE
 from .cmt import CMTIE
 from .cnn import CNNIE
@@ -47,6 +48,7 @@ from .depositfiles import DepositFilesIE
 from .dotsub import DotsubIE
 from .dreisat import DreiSatIE
 from .defense import DefenseGouvFrIE
+from .discovery import DiscoveryIE
 from .dropbox import DropboxIE
 from .ebaumsworld import EbaumsWorldIE
 from .ehow import EHowIE
@@ -115,6 +117,7 @@ from .lynda import (
    LyndaCourseIE
 )
 from .macgamestore import MacGameStoreIE
+from .malemotion import MalemotionIE
 from .mdr import MDRIE
 from .metacafe import MetacafeIE
 from .metacritic import MetacriticIE
@@ -158,7 +161,12 @@ from .ro220 import Ro220IE
 from .rottentomatoes import RottenTomatoesIE
 from .roxwel import RoxwelIE
 from .rtlnow import RTLnowIE
-from .rutube import RutubeIE
+from .rutube import (
+    RutubeIE,
+    RutubeChannelIE,
+    RutubeMovieIE,
+    RutubePersonIE,
+)
 from .servingsys import ServingSysIE
 from .sina import SinaIE
 from .slashdot import SlashdotIE
--- a/youtube_dl/extractor/ard.py
+++ b/youtube_dl/extractor/ard.py
@@ -1,22 +1,28 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
 import re

 from .common import InfoExtractor
 from ..utils import (
+    determine_ext,
    ExtractorError,
 )

+
 class ARDIE(InfoExtractor):
-    _VALID_URL = r'^(?:https?://)?(?:(?:www\.)?ardmediathek\.de|mediathek\.daserste\.de)/(?:.*/)(?P<video_id>[^/\?]+)(?:\?.*)?'
-    _TITLE = r'<h1(?: class="boxTopHeadline")?>(?P<title>.*)</h1>'
-    _MEDIA_STREAM = r'mediaCollection\.addMediaStream\((?P<media_type>\d+), (?P<quality>\d+), "(?P<rtmp_url>[^"]*)", "(?P<video_url>[^"]*)", "[^"]*"\)'
+    _VALID_URL = r'^https?://(?:(?:www\.)?ardmediathek\.de|mediathek\.daserste\.de)/(?:.*/)(?P<video_id>[^/\?]+)(?:\?.*)?'
+
    _TEST = {
-        u'url': u'http://www.ardmediathek.de/das-erste/tagesschau-in-100-sek?documentId=14077640',
-        u'file': u'14077640.mp4',
-        u'md5': u'6ca8824255460c787376353f9e20bbd8',
-        u'info_dict': {
-            u"title": u"11.04.2013 09:23 Uhr - Tagesschau in 100 Sekunden"
+        'url': 'http://www.ardmediathek.de/das-erste/guenther-jauch/edward-snowden-im-interview-held-oder-verraeter?documentId=19288786',
+        'file': '19288786.mp4',
+        'md5': '515bf47ce209fb3f5a61b7aad364634c',
+        'info_dict': {
+            'title': 'Edward Snowden im Interview - Held oder Verräter?',
+            'description': 'Edward Snowden hat alles aufs Spiel gesetzt, um die weltweite \xdcberwachung durch die Geheimdienste zu enttarnen. Nun stellt sich der ehemalige NSA-Mitarbeiter erstmals weltweit in einem TV-Interview den Fragen eines NDR-Journalisten. Die Sendung vom Sonntagabend.',
+            'thumbnail': 'http://www.ardmediathek.de/ard/servlet/contentblob/19/28/87/90/19288790/bild/2250037',
        },
-        u'skip': u'Requires rtmpdump'
+        'skip': 'Blocked outside of Germany',
    }

    def _real_extract(self, url):
@@ -29,26 +35,49 @@ class ARDIE(InfoExtractor):
        else:
            video_id = m.group('video_id')

-        # determine title and media streams from webpage
-        html = self._download_webpage(url, video_id)
-        title = re.search(self._TITLE, html).group('title')
-        streams = [mo.groupdict() for mo in re.finditer(self._MEDIA_STREAM, html)]
+        webpage = self._download_webpage(url, video_id)
+
+        title = self._html_search_regex(
+            r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>', webpage, 'title')
+        description = self._html_search_meta(
+            'dcterms.abstract', webpage, 'description')
+        thumbnail = self._og_search_thumbnail(webpage)
+
+        streams = [
+            mo.groupdict()
+            for mo in re.finditer(
+                r'mediaCollection\.addMediaStream\((?P<media_type>\d+), (?P<quality>\d+), "(?P<rtmp_url>[^"]*)", "(?P<video_url>[^"]*)", "[^"]*"\)', webpage)]
        if not streams:
-            assert '"fsk"' in html
-            raise ExtractorError(u'This video is only available after 8:00 pm')
+            if '"fsk"' in webpage:
+                raise ExtractorError('This video is only available after 20:00')

-        # choose default media type and highest quality for now
-        stream = max([s for s in streams if int(s["media_type"]) == 0],
-                     key=lambda s: int(s["quality"]))
+        formats = []
+        for s in streams:
+            format = {
+                'quality': int(s['quality']),
+            }
+            if s.get('rtmp_url'):
+                format['protocol'] = 'rtmp'
+                format['url'] = s['rtmp_url']
+                format['playpath'] = s['video_url']
+            else:
+                format['url'] = s['video_url']

-        # there's two possibilities: RTMP stream or HTTP download
-        info = {'id': video_id, 'title': title, 'ext': 'mp4'}
-        if stream['rtmp_url']:
-            self.to_screen(u'RTMP download detected')
-            assert stream['video_url'].startswith('mp4:')
-            info["url"] = stream["rtmp_url"]
-            info["play_path"] = stream['video_url']
-        else:
-            assert stream["video_url"].endswith('.mp4')
-            info["url"] = stream["video_url"]
-        return [info]
+            quality_name = self._search_regex(
+                r'[,.]([a-zA-Z0-9_-]+),?\.mp4', format['url'],
+                'quality name', default='NA')
+            format['format_id'] = '%s-%s-%s-%s' % (
+                determine_ext(format['url']), quality_name, s['media_type'],
+                s['quality'])
+
+            formats.append(format)
+
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'formats': formats,
+            'thumbnail': thumbnail,
+        }
--- a/youtube_dl/extractor/bloomberg.py
+++ b/youtube_dl/extractor/bloomberg.py
@@ -24,5 +24,5 @@ class BloombergIE(InfoExtractor):
        mobj = re.match(self._VALID_URL, url)
        name = mobj.group('name')
        webpage = self._download_webpage(url, name)
-        ooyala_code = self._search_regex(r'<source src="http://player.ooyala.com/player/[^/]+/([^".]+)', webpage, u'ooyala url')
-        return OoyalaIE._build_url_result(ooyala_code)
+        ooyala_url = self._twitter_search_player(webpage)
+        return self.url_result(ooyala_url, OoyalaIE.ie_key())
--- a/youtube_dl/extractor/brightcove.py
+++ b/youtube_dl/extractor/brightcove.py
@@ -23,7 +23,6 @@ from ..utils import (
 class BrightcoveIE(InfoExtractor):
    _VALID_URL = r'https?://.*brightcove\.com/(services|viewer).*\?(?P<query>.*)'
    _FEDERATED_URL_TEMPLATE = 'http://c.brightcove.com/services/viewer/htmlFederated?%s'
-    _PLAYLIST_URL_TEMPLATE = 'http://c.brightcove.com/services/json/experience/runtime/?command=get_programming_for_experience&playerKey=%s'

    _TESTS = [
        {
@@ -70,7 +69,7 @@ class BrightcoveIE(InfoExtractor):
                'description': 'md5:363109c02998fee92ec02211bd8000df',
                'uploader': 'National Ballet of Canada',
            },
-        },
+        }
    ]

    @classmethod
@@ -131,6 +130,11 @@ class BrightcoveIE(InfoExtractor):
        """Try to extract the brightcove url from the wepbage, returns None
        if it can't be found
        """
+
+        url_m = re.search(r'<meta\s+property="og:video"\s+content="(http://c.brightcove.com/[^"]+)"', webpage)
+        if url_m:
+            return url_m.group(1)
+
        m_brightcove = re.search(
            r'''(?sx)<object
            (?:
@@ -183,8 +187,9 @@ class BrightcoveIE(InfoExtractor):
        return self._extract_video_info(video_info)

    def _get_playlist_info(self, player_key):
-        playlist_info = self._download_webpage(self._PLAYLIST_URL_TEMPLATE % player_key,
-                                               player_key, 'Downloading playlist information')
+        info_url = 'http://c.brightcove.com/services/json/experience/runtime/?command=get_programming_for_experience&playerKey=%s' % player_key
+        playlist_info = self._download_webpage(
+            info_url, player_key, 'Downloading playlist information')

        json_data = json.loads(playlist_info)
        if 'videoList' not in json_data:
--- a/youtube_dl/extractor/channel9.py
+++ b/youtube_dl/extractor/channel9.py
@@ -1,4 +1,4 @@
-# encoding: utf-8
+from __future__ import unicode_literals

 import re

@@ -11,38 +11,38 @@ class Channel9IE(InfoExtractor):

    The type of provided URL (video or playlist) is determined according to
    meta Search.PageType from web page HTML rather than URL itself, as it is
-    not always possible to do.    
+    not always possible to do.
    '''
-    IE_DESC = u'Channel 9'
-    IE_NAME = u'channel9'
+    IE_DESC = 'Channel 9'
+    IE_NAME = 'channel9'
    _VALID_URL = r'^https?://(?:www\.)?channel9\.msdn\.com/(?P<contentpath>.+)/?'

    _TESTS = [
        {
-            u'url': u'http://channel9.msdn.com/Events/TechEd/Australia/2013/KOS002',
-            u'file': u'Events_TechEd_Australia_2013_KOS002.mp4',
-            u'md5': u'bbd75296ba47916b754e73c3a4bbdf10',
-            u'info_dict': {
-                u'title': u'Developer Kick-Off Session: Stuff We Love',
-                u'description': u'md5:c08d72240b7c87fcecafe2692f80e35f',
-                u'duration': 4576,
-                u'thumbnail': u'http://media.ch9.ms/ch9/9d51/03902f2d-fc97-4d3c-b195-0bfe15a19d51/KOS002_220.jpg',
-                u'session_code': u'KOS002',
-                u'session_day': u'Day 1',
-                u'session_room': u'Arena 1A',
-                u'session_speakers': [ u'Ed Blankenship', u'Andrew Coates', u'Brady Gaster', u'Patrick Klug', u'Mads Kristensen' ],
+            'url': 'http://channel9.msdn.com/Events/TechEd/Australia/2013/KOS002',
+            'file': 'Events_TechEd_Australia_2013_KOS002.mp4',
+            'md5': 'bbd75296ba47916b754e73c3a4bbdf10',
+            'info_dict': {
+                'title': 'Developer Kick-Off Session: Stuff We Love',
+                'description': 'md5:c08d72240b7c87fcecafe2692f80e35f',
+                'duration': 4576,
+                'thumbnail': 'http://media.ch9.ms/ch9/9d51/03902f2d-fc97-4d3c-b195-0bfe15a19d51/KOS002_220.jpg',
+                'session_code': 'KOS002',
+                'session_day': 'Day 1',
+                'session_room': 'Arena 1A',
+                'session_speakers': [ 'Ed Blankenship', 'Andrew Coates', 'Brady Gaster', 'Patrick Klug', 'Mads Kristensen' ],
            },
        },
        {
-            u'url': u'http://channel9.msdn.com/posts/Self-service-BI-with-Power-BI-nuclear-testing',
-            u'file': u'posts_Self-service-BI-with-Power-BI-nuclear-testing.mp4',
-            u'md5': u'b43ee4529d111bc37ba7ee4f34813e68',
-            u'info_dict': {
-                u'title': u'Self-service BI with Power BI - nuclear testing',
-                u'description': u'md5:d1e6ecaafa7fb52a2cacdf9599829f5b',
-                u'duration': 1540,
-                u'thumbnail': u'http://media.ch9.ms/ch9/87e1/0300391f-a455-4c72-bec3-4422f19287e1/selfservicenuk_512.jpg',
-                u'authors': [ u'Mike Wilmot' ],
+            'url': 'http://channel9.msdn.com/posts/Self-service-BI-with-Power-BI-nuclear-testing',
+            'file': 'posts_Self-service-BI-with-Power-BI-nuclear-testing.mp4',
+            'md5': 'b43ee4529d111bc37ba7ee4f34813e68',
+            'info_dict': {
+                'title': 'Self-service BI with Power BI - nuclear testing',
+                'description': 'md5:d1e6ecaafa7fb52a2cacdf9599829f5b',
+                'duration': 1540,
+                'thumbnail': 'http://media.ch9.ms/ch9/87e1/0300391f-a455-4c72-bec3-4422f19287e1/selfservicenuk_512.jpg',
+                'authors': [ 'Mike Wilmot' ],
            },
        }
    ]
@@ -60,7 +60,7 @@ class Channel9IE(InfoExtractor):
            return 0
        units = m.group('units')
        try:
-            exponent = [u'B', u'KB', u'MB', u'GB', u'TB', u'PB', u'EB', u'ZB', u'YB'].index(units.upper())
+            exponent = ['B', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB', 'YB'].index(units.upper())
        except ValueError:
            return 0
        size = float(m.group('size'))
@@ -80,7 +80,7 @@ class Channel9IE(InfoExtractor):
            'url': x.group('url'),
            'format_id': x.group('quality'),
            'format_note': x.group('note'),
-            'format': u'%s (%s)' % (x.group('quality'), x.group('note')),
+            'format': '%s (%s)' % (x.group('quality'), x.group('note')),
            'filesize': self._restore_bytes(x.group('filesize')), # File size is approximate
            'preference': self._known_formats.index(x.group('quality')),
            'vcodec': 'none' if x.group('note') == 'Audio only' else None,
@@ -91,10 +91,10 @@ class Channel9IE(InfoExtractor):
        return formats

    def _extract_title(self, html):
-        title = self._html_search_meta(u'title', html, u'title')
+        title = self._html_search_meta('title', html, 'title')
        if title is None:           
            title = self._og_search_title(html)
-            TITLE_SUFFIX = u' (Channel 9)'
+            TITLE_SUFFIX = ' (Channel 9)'
            if title is not None and title.endswith(TITLE_SUFFIX):
                title = title[:-len(TITLE_SUFFIX)]
        return title
@@ -110,7 +110,7 @@ class Channel9IE(InfoExtractor):
        m = re.search(DESCRIPTION_REGEX, html)
        if m is not None:
            return m.group('description')
-        return self._html_search_meta(u'description', html, u'description')
+        return self._html_search_meta('description', html, 'description')

    def _extract_duration(self, html):
        m = re.search(r'data-video_duration="(?P<hours>\d{2}):(?P<minutes>\d{2}):(?P<seconds>\d{2})"', html)
@@ -172,7 +172,7 @@ class Channel9IE(InfoExtractor):

        # Nothing to download
        if len(formats) == 0 and slides is None and zip_ is None:
-            self._downloader.report_warning(u'None of recording, slides or zip are available for %s' % content_path)
+            self._downloader.report_warning('None of recording, slides or zip are available for %s' % content_path)
            return

        # Extract meta
@@ -244,7 +244,7 @@ class Channel9IE(InfoExtractor):
        return contents

    def _extract_list(self, content_path):
-        rss = self._download_xml(self._RSS_URL % content_path, content_path, u'Downloading RSS')
+        rss = self._download_xml(self._RSS_URL % content_path, content_path, 'Downloading RSS')
        entries = [self.url_result(session_url.text, 'Channel9')
                   for session_url in rss.findall('./channel/item/link')]
        title_text = rss.find('./channel/title').text
@@ -254,11 +254,11 @@ class Channel9IE(InfoExtractor):
        mobj = re.match(self._VALID_URL, url)
        content_path = mobj.group('contentpath')

-        webpage = self._download_webpage(url, content_path, u'Downloading web page')
+        webpage = self._download_webpage(url, content_path, 'Downloading web page')

        page_type_m = re.search(r'<meta name="Search.PageType" content="(?P<pagetype>[^"]+)"/>', webpage)
        if page_type_m is None:
-            raise ExtractorError(u'Search.PageType not found, don\'t know how to process this page', expected=True)
+            raise ExtractorError('Search.PageType not found, don\'t know how to process this page', expected=True)

        page_type = page_type_m.group('pagetype')
        if page_type == 'List':         # List page, may contain list of 'item'-like objects
@@ -268,4 +268,4 @@ class Channel9IE(InfoExtractor):
        elif page_type == 'Session':    # Event session page, may contain downloadable content
            return self._extract_session(webpage, content_path)
        else:
-            raise ExtractorError(u'Unexpected Search.PageType %s' % page_type, expected=True)
+            raise ExtractorError('Unexpected Search.PageType %s' % page_type, expected=True)
--- a/youtube_dl/extractor/cliphunter.py
+++ b/youtube_dl/extractor/cliphunter.py
@@ -0,0 +1,59 @@
+from __future__ import unicode_literals
+
+import re
+import string
+
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+)
+
+translation_table = {
+    'a': 'h', 'd': 'e', 'e': 'v', 'f': 'o', 'g': 'f', 'i': 'd', 'l': 'n',
+    'm': 'a', 'n': 'm', 'p': 'u', 'q': 't', 'r': 's', 'v': 'p', 'x': 'r',
+    'y': 'l', 'z': 'i',
+    '$': ':', '&': '.', '(': '=', '^': '&', '=': '/',
+}
+
+
+class CliphunterIE(InfoExtractor):
+    IE_NAME = 'cliphunter'
+
+    _VALID_URL = r'''(?x)http://(?:www\.)?cliphunter\.com/w/
+        (?P<id>[0-9]+)/
+        (?P<seo>.+?)(?:$|[#\?])
+    '''
+    _TEST = {
+        'url': 'http://www.cliphunter.com/w/1012420/Fun_Jynx_Maze_solo',
+        'file': '1012420.flv',
+        'md5': '15e7740f30428abf70f4223478dc1225',
+        'info_dict': {
+            'title': 'Fun Jynx Maze solo',
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, video_id)
+
+        pl_fiji = self._search_regex(
+            r'pl_fiji = \'([^\']+)\'', webpage, 'video data')
+        pl_c_qual = self._search_regex(
+            r'pl_c_qual = "(.)"', webpage, 'video quality')
+        video_title = self._search_regex(
+            r'mediaTitle = "([^"]+)"', webpage, 'title')
+
+        video_url = ''.join(translation_table.get(c, c) for c in pl_fiji)
+
+        formats = [{
+            'url': video_url,
+            'format_id': pl_c_qual,
+        }]
+
+        return {
+            'id': video_id,
+            'title': video_title,
+            'formats': formats,
+        }
--- a/youtube_dl/extractor/comedycentral.py
+++ b/youtube_dl/extractor/comedycentral.py
@@ -14,7 +14,7 @@ from ..utils import (


 class ComedyCentralIE(MTVServicesInfoExtractor):
-    _VALID_URL = r'''(?x)https?://(?:www.)?comedycentral.com/
+    _VALID_URL = r'''(?x)https?://(?:www\.)?comedycentral\.com/
        (video-clips|episodes|cc-studios|video-collections)
        /(?P<title>.*)'''
    _FEED_URL = 'http://comedycentral.com/feeds/mrss/'
@@ -86,7 +86,7 @@ class ComedyCentralShowsIE(InfoExtractor):

    @staticmethod
    def _transform_rtmp_url(rtmp_video_url):
-        m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp.comedystor/.*)$', rtmp_video_url)
+        m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp\.comedystor/.*)$', rtmp_video_url)
        if not m:
            raise ExtractorError('Cannot transform RTMP url')
        base = 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=1+_pxI0=Ripod-h264+_pxL0=undefined+_pxM0=+_pxK=18639+_pxE=mp4/44620/mtvnorigin/'
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -465,7 +465,14 @@ class InfoExtractor(object):
        }
        return RATING_TABLE.get(rating.lower(), None)

+    def _twitter_search_player(self, html):
+        return self._html_search_meta('twitter:player', html,
+            'twitter card player')
+
    def _sort_formats(self, formats):
+        if not formats:
+            raise ExtractorError(u'No video formats found')
+
        def _formats_key(f):
            # TODO remove the following workaround
            from ..utils import determine_ext
--- a/youtube_dl/extractor/discovery.py
+++ b/youtube_dl/extractor/discovery.py
@@ -0,0 +1,46 @@
+from __future__ import unicode_literals
+
+import re
+import json
+
+from .common import InfoExtractor
+
+
+class DiscoveryIE(InfoExtractor):
+    _VALID_URL = r'http://dsc\.discovery\.com\/[a-zA-Z0-9\-]*/[a-zA-Z0-9\-]*/videos/(?P<id>[a-zA-Z0-9\-]*)(.htm)?'
+    _TEST = {
+        'url': 'http://dsc.discovery.com/tv-shows/mythbusters/videos/mission-impossible-outtakes.htm',
+        'file': '614784.mp4',
+        'md5': 'e12614f9ee303a6ccef415cb0793eba2',
+        'info_dict': {
+            'title': 'MythBusters: Mission Impossible Outtakes',
+            'description': ('Watch Jamie Hyneman and Adam Savage practice being'
+                ' each other -- to the point of confusing Jamie\'s dog -- and '
+                'don\'t miss Adam moon-walking as Jamie ... behind Jamie\'s'
+                ' back.'),
+            'duration': 156,
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        webpage = self._download_webpage(url, video_id)
+
+        video_list_json = self._search_regex(r'var videoListJSON = ({.*?});',
+            webpage, 'video list', flags=re.DOTALL)
+        video_list = json.loads(video_list_json)
+        info = video_list['clips'][0]
+        formats = []
+        for f in info['mp4']:
+            formats.append(
+                {'url': f['src'], r'ext': r'mp4', 'tbr': int(f['bitrate'][:-1])})
+
+        return {
+            'id': info['contentId'],
+            'title': video_list['name'],
+            'formats': formats,
+            'description': info['videoCaption'],
+            'thumbnail': info.get('videoStillURL') or info.get('thumbnailURL'),
+            'duration': info['duration'],
+        }
--- a/youtube_dl/extractor/funnyordie.py
+++ b/youtube_dl/extractor/funnyordie.py
@@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
 import re

 from .common import InfoExtractor
@@ -6,13 +8,16 @@ from .common import InfoExtractor
 class FunnyOrDieIE(InfoExtractor):
    _VALID_URL = r'^(?:https?://)?(?:www\.)?funnyordie\.com/videos/(?P<id>[0-9a-f]+)/.*$'
    _TEST = {
-        u'url': u'http://www.funnyordie.com/videos/0732f586d7/heart-shaped-box-literal-video-version',
-        u'file': u'0732f586d7.mp4',
-        u'md5': u'f647e9e90064b53b6e046e75d0241fbd',
-        u'info_dict': {
-            u"description": u"Lyrics changed to match the video. Spoken cameo by Obscurus Lupa (from ThatGuyWithTheGlasses.com). Based on a concept by Dustin McLean (DustFilms.com). Performed, edited, and written by David A. Scott.", 
-            u"title": u"Heart-Shaped Box: Literal Video Version"
-        }
+        'url': 'http://www.funnyordie.com/videos/0732f586d7/heart-shaped-box-literal-video-version',
+        'file': '0732f586d7.mp4',
+        'md5': 'f647e9e90064b53b6e046e75d0241fbd',
+        'info_dict': {
+            'description': ('Lyrics changed to match the video. Spoken cameo '
+                'by Obscurus Lupa (from ThatGuyWithTheGlasses.com). Based on a '
+                'concept by Dustin McLean (DustFilms.com). Performed, edited, '
+                'and written by David A. Scott.'),
+            'title': 'Heart-Shaped Box: Literal Video Version',
+        },
    }

    def _real_extract(self, url):
@@ -23,13 +28,12 @@ class FunnyOrDieIE(InfoExtractor):

        video_url = self._search_regex(
            [r'type="video/mp4" src="(.*?)"', r'src="([^>]*?)" type=\'video/mp4\''],
-            webpage, u'video URL', flags=re.DOTALL)
+            webpage, 'video URL', flags=re.DOTALL)

-        info = {
+        return {
            'id': video_id,
            'url': video_url,
            'ext': 'mp4',
            'title': self._og_search_title(webpage),
            'description': self._og_search_description(webpage),
        }
-        return [info]
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -78,6 +78,18 @@ class GenericIE(InfoExtractor):
                'skip_download': True,
            },
        },
+        {
+            # https://github.com/rg3/youtube-dl/issues/2253
+            'url': 'http://bcove.me/i6nfkrc3',
+            'file': '3101154703001.mp4',
+            'md5': '0ba9446db037002366bab3b3eb30c88c',
+            'info_dict': {
+                'title': 'Still no power',
+                'uploader': 'thestar.com',
+                'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
+            },
+            'add_ie': ['Brightcove'],
+        },
        # Direct link to a video
        {
            'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
@@ -242,7 +254,7 @@ class GenericIE(InfoExtractor):

        # Look for embedded (iframe) Vimeo player
        mobj = re.search(
-            r'<iframe[^>]+?src="((?:https?:)?//player.vimeo.com/video/.+?)"', webpage)
+            r'<iframe[^>]+?src="((?:https?:)?//player\.vimeo\.com/video/.+?)"', webpage)
        if mobj:
            player_url = unescapeHTML(mobj.group(1))
            surl = smuggle_url(player_url, {'Referer': url})
@@ -250,7 +262,7 @@ class GenericIE(InfoExtractor):

        # Look for embedded (swf embed) Vimeo player
        mobj = re.search(
-            r'<embed[^>]+?src="(https?://(?:www\.)?vimeo.com/moogaloop.swf.+?)"', webpage)
+            r'<embed[^>]+?src="(https?://(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage)
        if mobj:
            return self.url_result(mobj.group(1), 'Vimeo')

@@ -320,7 +332,7 @@ class GenericIE(InfoExtractor):
            return self.url_result(mobj.group(1), 'Aparat')

        # Look for MPORA videos
-        mobj = re.search(r'<iframe .*?src="(http://mpora\.com/videos/[^"]+)"', webpage)
+        mobj = re.search(r'<iframe .*?src="(http://mpora\.(?:com|de)/videos/[^"]+)"', webpage)
        if mobj is not None:
            return self.url_result(mobj.group(1), 'Mpora')

@@ -338,7 +350,7 @@ class GenericIE(InfoExtractor):

        # Look for embedded Huffington Post player
        mobj = re.search(
-            r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live.huffingtonpost\.com/.+?)\1', webpage)
+            r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
        if mobj is not None:
            return self.url_result(mobj.group('url'), 'HuffPost')

--- a/youtube_dl/extractor/imdb.py
+++ b/youtube_dl/extractor/imdb.py
@@ -69,12 +69,9 @@ class ImdbListIE(InfoExtractor):
        list_id = mobj.group('id')

        webpage = self._download_webpage(url, list_id)
-        list_code = self._search_regex(
-            r'(?s)<div\s+class="list\sdetail">(.*?)class="see-more"',
-            webpage, 'list code')
        entries = [
            self.url_result('http://www.imdb.com' + m, 'Imdb')
-            for m in re.findall(r'href="(/video/imdb/vi[^"]+)"', webpage)]
+            for m in re.findall(r'href="(/video/imdb/vi[^"]+)"\s+data-type="playlist"', webpage)]

        list_title = self._html_search_regex(
            r'<h1 class="header">(.*?)</h1>', webpage, 'list title')
--- a/youtube_dl/extractor/infoq.py
+++ b/youtube_dl/extractor/infoq.py
@@ -1,27 +1,27 @@
+from __future__ import unicode_literals
+
 import base64
 import re

 from .common import InfoExtractor
 from ..utils import (
    compat_urllib_parse,
-
-    ExtractorError,
 )


 class InfoQIE(InfoExtractor):
    _VALID_URL = r'^(?:https?://)?(?:www\.)?infoq\.com/[^/]+/[^/]+$'
    _TEST = {
-        u"name": u"InfoQ",
-        u"url": u"http://www.infoq.com/presentations/A-Few-of-My-Favorite-Python-Things",
-        u"file": u"12-jan-pythonthings.mp4",
-        u"info_dict": {
-            u"description": u"Mike Pirnat presents some tips and tricks, standard libraries and third party packages that make programming in Python a richer experience.",
-            u"title": u"A Few of My Favorite [Python] Things"
+        "name": "InfoQ",
+        "url": "http://www.infoq.com/presentations/A-Few-of-My-Favorite-Python-Things",
+        "file": "12-jan-pythonthings.mp4",
+        "info_dict": {
+            "description": "Mike Pirnat presents some tips and tricks, standard libraries and third party packages that make programming in Python a richer experience.",
+            "title": "A Few of My Favorite [Python] Things",
+        },
+        "params": {
+            "skip_download": True,
        },
-        u"params": {
-            u"skip_download": True
-        }
    }

    def _real_extract(self, url):
@@ -31,32 +31,25 @@ class InfoQIE(InfoExtractor):
        self.report_extraction(url)

        # Extract video URL
-        mobj = re.search(r"jsclassref ?= ?'([^']*)'", webpage)
-        if mobj is None:
-            raise ExtractorError(u'Unable to extract video url')
-        real_id = compat_urllib_parse.unquote(base64.b64decode(mobj.group(1).encode('ascii')).decode('utf-8'))
+        encoded_id = self._search_regex(r"jsclassref ?= ?'([^']*)'", webpage, 'encoded id')
+        real_id = compat_urllib_parse.unquote(base64.b64decode(encoded_id.encode('ascii')).decode('utf-8'))
        video_url = 'rtmpe://video.infoq.com/cfx/st/' + real_id

        # Extract title
        video_title = self._search_regex(r'contentTitle = "(.*?)";',
-            webpage, u'title')
+            webpage, 'title')

        # Extract description
        video_description = self._html_search_regex(r'<meta name="description" content="(.*)"(?:\s*/)?>',
-            webpage, u'description', fatal=False)
+            webpage, 'description', fatal=False)

        video_filename = video_url.split('/')[-1]
        video_id, extension = video_filename.split('.')

-        info = {
+        return {
            'id': video_id,
            'url': video_url,
-            'uploader': None,
-            'upload_date': None,
            'title': video_title,
            'ext': extension, # Extension is always(?) mp4, but seems to be flv
-            'thumbnail': None,
            'description': video_description,
        }
-
-        return [info]
--- a/youtube_dl/extractor/ivi.py
+++ b/youtube_dl/extractor/ivi.py
@@ -1,4 +1,5 @@
 # encoding: utf-8
+from __future__ import unicode_literals

 import re
 import json
@@ -11,38 +12,38 @@ from ..utils import (


 class IviIE(InfoExtractor):
-    IE_DESC = u'ivi.ru'
-    IE_NAME = u'ivi'
+    IE_DESC = 'ivi.ru'
+    IE_NAME = 'ivi'
    _VALID_URL = r'^https?://(?:www\.)?ivi\.ru/watch(?:/(?P<compilationid>[^/]+))?/(?P<videoid>\d+)'

    _TESTS = [
        # Single movie
        {
-            u'url': u'http://www.ivi.ru/watch/53141',
-            u'file': u'53141.mp4',
-            u'md5': u'6ff5be2254e796ed346251d117196cf4',
-            u'info_dict': {
-                u'title': u'Иван Васильевич меняет профессию',
-                u'description': u'md5:14d8eda24e9d93d29b5857012c6d6346',
-                u'duration': 5498,
-                u'thumbnail': u'http://thumbs.ivi.ru/f20.vcp.digitalaccess.ru/contents/d/1/c3c885163a082c29bceeb7b5a267a6.jpg',
+            'url': 'http://www.ivi.ru/watch/53141',
+            'file': '53141.mp4',
+            'md5': '6ff5be2254e796ed346251d117196cf4',
+            'info_dict': {
+                'title': 'Иван Васильевич меняет профессию',
+                'description': 'md5:b924063ea1677c8fe343d8a72ac2195f',
+                'duration': 5498,
+                'thumbnail': 'http://thumbs.ivi.ru/f20.vcp.digitalaccess.ru/contents/d/1/c3c885163a082c29bceeb7b5a267a6.jpg',
            },
-            u'skip': u'Only works from Russia',
+            'skip': 'Only works from Russia',
        },
        # Serial's serie
        {
-            u'url': u'http://www.ivi.ru/watch/dezhurnyi_angel/74791',
-            u'file': u'74791.mp4',
-            u'md5': u'3e6cc9a848c1d2ebcc6476444967baa9',
-            u'info_dict': {
-                u'title': u'Дежурный ангел - 1 серия',
-                u'duration': 2490,
-                u'thumbnail': u'http://thumbs.ivi.ru/f7.vcp.digitalaccess.ru/contents/8/e/bc2f6c2b6e5d291152fdd32c059141.jpg',
+            'url': 'http://www.ivi.ru/watch/dezhurnyi_angel/74791',
+            'file': '74791.mp4',
+            'md5': '3e6cc9a848c1d2ebcc6476444967baa9',
+            'info_dict': {
+                'title': 'Дежурный ангел - 1 серия',
+                'duration': 2490,
+                'thumbnail': 'http://thumbs.ivi.ru/f7.vcp.digitalaccess.ru/contents/8/e/bc2f6c2b6e5d291152fdd32c059141.jpg',
            },
-            u'skip': u'Only works from Russia',
+            'skip': 'Only works from Russia',
         }
    ]
-    
+
    # Sorted by quality
    _known_formats = ['MP4-low-mobile', 'MP4-mobile', 'FLV-lo', 'MP4-lo', 'FLV-hi', 'MP4-hi', 'MP4-SHQ']

@@ -54,7 +55,7 @@ class IviIE(InfoExtractor):
        return m.group('description') if m is not None else None

    def _extract_comment_count(self, html):
-        m = re.search(u'(?s)<a href="#" id="view-comments" class="action-button dim gradient">\s*Комментарии:\s*(?P<commentcount>\d+)\s*</a>', html)
+        m = re.search('(?s)<a href="#" id="view-comments" class="action-button dim gradient">\s*Комментарии:\s*(?P<commentcount>\d+)\s*</a>', html)
        return int(m.group('commentcount')) if m is not None else 0

    def _real_extract(self, url):
@@ -63,49 +64,49 @@ class IviIE(InfoExtractor):

        api_url = 'http://api.digitalaccess.ru/api/json/'

-        data = {u'method': u'da.content.get',
-                u'params': [video_id, {u'site': u's183',
-                                       u'referrer': u'http://www.ivi.ru/watch/%s' % video_id,
-                                       u'contentid': video_id
-                                    }
-                            ]
+        data = {'method': 'da.content.get',
+                'params': [video_id, {'site': 's183',
+                                      'referrer': 'http://www.ivi.ru/watch/%s' % video_id,
+                                      'contentid': video_id
+                                      }
+                           ]
                }

        request = compat_urllib_request.Request(api_url, json.dumps(data))

-        video_json_page = self._download_webpage(request, video_id, u'Downloading video JSON')
+        video_json_page = self._download_webpage(request, video_id, 'Downloading video JSON')
        video_json = json.loads(video_json_page)

-        if u'error' in video_json:
-            error = video_json[u'error']
-            if error[u'origin'] == u'NoRedisValidData':
-                raise ExtractorError(u'Video %s does not exist' % video_id, expected=True)
-            raise ExtractorError(u'Unable to download video %s: %s' % (video_id, error[u'message']), expected=True)
+        if 'error' in video_json:
+            error = video_json['error']
+            if error['origin'] == 'NoRedisValidData':
+                raise ExtractorError('Video %s does not exist' % video_id, expected=True)
+            raise ExtractorError('Unable to download video %s: %s' % (video_id, error['message']), expected=True)

-        result = video_json[u'result']
+        result = video_json['result']

        formats = [{
-            'url': x[u'url'],
-            'format_id': x[u'content_format'],
-            'preference': self._known_formats.index(x[u'content_format']),
-        } for x in result[u'files'] if x[u'content_format'] in self._known_formats]
+            'url': x['url'],
+            'format_id': x['content_format'],
+            'preference': self._known_formats.index(x['content_format']),
+        } for x in result['files'] if x['content_format'] in self._known_formats]

        self._sort_formats(formats)

        if not formats:
-            raise ExtractorError(u'No media links available for %s' % video_id)
+            raise ExtractorError('No media links available for %s' % video_id)

-        duration = result[u'duration']
-        compilation = result[u'compilation']
-        title = result[u'title']
+        duration = result['duration']
+        compilation = result['compilation']
+        title = result['title']

        title = '%s - %s' % (compilation, title) if compilation is not None else title  

-        previews = result[u'preview']
+        previews = result['preview']
        previews.sort(key=lambda fmt: self._known_thumbnails.index(fmt['content_format']))
-        thumbnail = previews[-1][u'url'] if len(previews) > 0 else None
+        thumbnail = previews[-1]['url'] if len(previews) > 0 else None

-        video_page = self._download_webpage(url, video_id, u'Downloading video page')
+        video_page = self._download_webpage(url, video_id, 'Downloading video page')
        description = self._extract_description(video_page)
        comment_count = self._extract_comment_count(video_page)

@@ -121,8 +122,8 @@ class IviIE(InfoExtractor):


 class IviCompilationIE(InfoExtractor):
-    IE_DESC = u'ivi.ru compilations'
-    IE_NAME = u'ivi:compilation'
+    IE_DESC = 'ivi.ru compilations'
+    IE_NAME = 'ivi:compilation'
    _VALID_URL = r'^https?://(?:www\.)?ivi\.ru/watch/(?!\d+)(?P<compilationid>[a-z\d_-]+)(?:/season(?P<seasonid>\d+))?$'

    def _extract_entries(self, html, compilation_id):
@@ -135,22 +136,23 @@ class IviCompilationIE(InfoExtractor):
        season_id = mobj.group('seasonid')

        if season_id is not None: # Season link
-            season_page = self._download_webpage(url, compilation_id, u'Downloading season %s web page' % season_id)
+            season_page = self._download_webpage(url, compilation_id, 'Downloading season %s web page' % season_id)
            playlist_id = '%s/season%s' % (compilation_id, season_id)
-            playlist_title = self._html_search_meta(u'title', season_page, u'title')
+            playlist_title = self._html_search_meta('title', season_page, 'title')
            entries = self._extract_entries(season_page, compilation_id)
        else: # Compilation link            
-            compilation_page = self._download_webpage(url, compilation_id, u'Downloading compilation web page')
+            compilation_page = self._download_webpage(url, compilation_id, 'Downloading compilation web page')
            playlist_id = compilation_id
-            playlist_title = self._html_search_meta(u'title', compilation_page, u'title')
+            playlist_title = self._html_search_meta('title', compilation_page, 'title')
            seasons = re.findall(r'<a href="/watch/%s/season(\d+)">[^<]+</a>' % compilation_id, compilation_page)
            if len(seasons) == 0: # No seasons in this compilation
                entries = self._extract_entries(compilation_page, compilation_id)
            else:
                entries = []
                for season_id in seasons:
-                    season_page = self._download_webpage('http://www.ivi.ru/watch/%s/season%s' % (compilation_id, season_id),
-                                                         compilation_id, u'Downloading season %s web page' % season_id)
+                    season_page = self._download_webpage(
+                        'http://www.ivi.ru/watch/%s/season%s' % (compilation_id, season_id),
+                        compilation_id, 'Downloading season %s web page' % season_id)
                    entries.extend(self._extract_entries(season_page, compilation_id))

        return self.playlist_result(entries, playlist_id, playlist_title)
--- a/youtube_dl/extractor/keek.py
+++ b/youtube_dl/extractor/keek.py
@@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
 import re

 from .common import InfoExtractor
@@ -5,36 +7,34 @@ from .common import InfoExtractor

 class KeekIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?keek\.com/(?:!|\w+/keeks/)(?P<videoID>\w+)'
-    IE_NAME = u'keek'
+    IE_NAME = 'keek'
    _TEST = {
-        u'url': u'https://www.keek.com/ytdl/keeks/NODfbab',
-        u'file': u'NODfbab.mp4',
-        u'md5': u'9b0636f8c0f7614afa4ea5e4c6e57e83',
-        u'info_dict': {
-            u"uploader": u"ytdl", 
-            u"title": u"test chars: \"'/\\\u00e4<>This is a test video for youtube-dl.For more information, contact phihag@phihag.de ."
-        }
+        'url': 'https://www.keek.com/ytdl/keeks/NODfbab',
+        'file': 'NODfbab.mp4',
+        'md5': '9b0636f8c0f7614afa4ea5e4c6e57e83',
+        'info_dict': {
+            'uploader': 'ytdl',
+            'title': 'test chars: "\'/\\\u00e4<>This is a test video for youtube-dl.For more information, contact phihag@phihag.de .',
+        },
    }

    def _real_extract(self, url):
        m = re.match(self._VALID_URL, url)
        video_id = m.group('videoID')

-        video_url = u'http://cdn.keek.com/keek/video/%s' % video_id
-        thumbnail = u'http://cdn.keek.com/keek/thumbnail/%s/w100/h75' % video_id
+        video_url = 'http://cdn.keek.com/keek/video/%s' % video_id
+        thumbnail = 'http://cdn.keek.com/keek/thumbnail/%s/w100/h75' % video_id
        webpage = self._download_webpage(url, video_id)

-        video_title = self._og_search_title(webpage)
+        uploader = self._html_search_regex(
+            r'<div class="user-name-and-bio">[\S\s]+?<h2>(?P<uploader>.+?)</h2>',
+            webpage, 'uploader', fatal=False)

-        uploader = self._html_search_regex(r'<div class="user-name-and-bio">[\S\s]+?<h2>(?P<uploader>.+?)</h2>',
-            webpage, u'uploader', fatal=False)
-
-        info = {
-                'id': video_id,
-                'url': video_url,
-                'ext': 'mp4',
-                'title': video_title,
-                'thumbnail': thumbnail,
-                'uploader': uploader
+        return {
+            'id': video_id,
+            'url': video_url,
+            'ext': 'mp4',
+            'title': self._og_search_title(webpage),
+            'thumbnail': thumbnail,
+            'uploader': uploader
        }
-        return [info]
--- a/youtube_dl/extractor/la7.py
+++ b/youtube_dl/extractor/la7.py
@@ -10,7 +10,13 @@ from ..utils import (

 class LA7IE(InfoExtractor):
    IE_NAME = 'la7.tv'
-    _VALID_URL = r'https?://(?:www\.)?la7\.tv/richplayer/\?assetid=(?P<id>[0-9]+)'
+    _VALID_URL = r'''(?x)
+        https?://(?:www\.)?la7\.tv/
+        (?:
+            richplayer/\?assetid=|
+            \?contentId=
+        )
+        (?P<id>[0-9]+)'''

    _TEST = {
        'url': 'http://www.la7.tv/richplayer/?assetid=50355319',
@@ -20,7 +26,8 @@ class LA7IE(InfoExtractor):
            'title': 'IL DIVO',
            'description': 'Un film di Paolo Sorrentino con Toni Servillo, Anna Bonaiuto, Giulio Bosetti  e Flavio Bucci',
            'duration': 6254,
-        }
+        },
+        'skip': 'Blocked in the US',
    }

    def _real_extract(self, url):
--- a/youtube_dl/extractor/liveleak.py
+++ b/youtube_dl/extractor/liveleak.py
@@ -1,3 +1,6 @@
+from __future__ import unicode_literals
+
+import json
 import re

 from .common import InfoExtractor
@@ -7,46 +10,43 @@ from ..utils import (


 class LiveLeakIE(InfoExtractor):
-
    _VALID_URL = r'^(?:http://)?(?:\w+\.)?liveleak\.com/view\?(?:.*?)i=(?P<video_id>[\w_]+)(?:.*)'
-    IE_NAME = u'liveleak'
    _TEST = {
-        u'url': u'http://www.liveleak.com/view?i=757_1364311680',
-        u'file': u'757_1364311680.mp4',
-        u'md5': u'0813c2430bea7a46bf13acf3406992f4',
-        u'info_dict': {
-            u"description": u"extremely bad day for this guy..!", 
-            u"uploader": u"ljfriel2", 
-            u"title": u"Most unlucky car accident"
+        'url': 'http://www.liveleak.com/view?i=757_1364311680',
+        'file': '757_1364311680.mp4',
+        'md5': '0813c2430bea7a46bf13acf3406992f4',
+        'info_dict': {
+            'description': 'extremely bad day for this guy..!',
+            'uploader': 'ljfriel2',
+            'title': 'Most unlucky car accident'
        }
    }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
-        if mobj is None:
-            raise ExtractorError(u'Invalid URL: %s' % url)

        video_id = mobj.group('video_id')
-
        webpage = self._download_webpage(url, video_id)
+        sources_raw = self._search_regex(
+            r'(?s)sources:\s*(\[.*?\]),', webpage, 'video URLs')
+        sources_json = re.sub(r'\s([a-z]+):\s', r'"\1": ', sources_raw)
+        sources = json.loads(sources_json)

-        video_url = self._search_regex(r'file: "(.*?)",',
-            webpage, u'video URL')
+        formats = [{
+            'format_note': s.get('label'),
+            'url': s['file'],
+        } for s in sources]
+        self._sort_formats(formats)

        video_title = self._og_search_title(webpage).replace('LiveLeak.com -', '').strip()
-
        video_description = self._og_search_description(webpage)
+        video_uploader = self._html_search_regex(
+            r'By:.*?(\w+)</a>', webpage, 'uploader', fatal=False)

-        video_uploader = self._html_search_regex(r'By:.*?(\w+)</a>',
-            webpage, u'uploader', fatal=False)
-
-        info = {
-            'id':  video_id,
-            'url': video_url,
-            'ext': 'mp4',
+        return {
+            'id': video_id,
            'title': video_title,
            'description': video_description,
-            'uploader': video_uploader
+            'uploader': video_uploader,
+            'formats': formats,
        }
-
-        return [info]
--- a/youtube_dl/extractor/malemotion.py
+++ b/youtube_dl/extractor/malemotion.py
@@ -0,0 +1,58 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_urllib_parse,
+)
+
+class MalemotionIE(InfoExtractor):
+    _VALID_URL = r'^(?:https?://)?malemotion\.com/video/(.+?)\.(?P<id>.+?)(#|$)'
+    _TEST = {
+        'url': 'http://malemotion.com/video/bien-dur.10ew',
+        'file': '10ew.mp4',
+        'md5': 'b3cc49f953b107e4a363cdff07d100ce',
+        'info_dict': {
+            "title": "Bien dur",
+            "age_limit": 18,
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group("id")
+
+        webpage = self._download_webpage(url, video_id)
+
+        self.report_extraction(video_id)
+
+        # Extract video URL
+        video_url = compat_urllib_parse.unquote(
+            self._search_regex(r'<source type="video/mp4" src="(.+?)"', webpage, 'video URL'))
+
+        # Extract title
+        video_title = self._html_search_regex(
+            r'<title>(.*?)</title', webpage, 'title')
+
+        # Extract video thumbnail
+        video_thumbnail = self._search_regex(
+            r'<video .+?poster="(.+?)"', webpage, 'thumbnail', fatal=False)
+
+        formats = [{
+            'url': video_url,
+            'ext': 'mp4',
+            'format_id': 'mp4',
+            'preference': 1,
+        }]
+
+        return {
+            'id': video_id,
+            'formats': formats,
+            'uploader': None,
+            'upload_date': None,
+            'title': video_title,
+            'thumbnail': video_thumbnail,
+            'description': None,
+            'age_limit': 18,
+        }
--- a/youtube_dl/extractor/myspass.py
+++ b/youtube_dl/extractor/myspass.py
@@ -1,3 +1,4 @@
+from __future__ import unicode_literals
 import os.path

 from .common import InfoExtractor
@@ -11,13 +12,13 @@ from ..utils import (
 class MySpassIE(InfoExtractor):
    _VALID_URL = r'http://www\.myspass\.de/.*'
    _TEST = {
-        u'url': u'http://www.myspass.de/myspass/shows/tvshows/absolute-mehrheit/Absolute-Mehrheit-vom-17022013-Die-Highlights-Teil-2--/11741/',
-        u'file': u'11741.mp4',
-        u'md5': u'0b49f4844a068f8b33f4b7c88405862b',
-        u'info_dict': {
-            u"description": u"Wer kann in die Fu\u00dfstapfen von Wolfgang Kubicki treten und die Mehrheit der Zuschauer hinter sich versammeln? Wird vielleicht sogar die Absolute Mehrheit geknackt und der Jackpot von 200.000 Euro mit nach Hause genommen?", 
-            u"title": u"Absolute Mehrheit vom 17.02.2013 - Die Highlights, Teil 2"
-        }
+        'url': 'http://www.myspass.de/myspass/shows/tvshows/absolute-mehrheit/Absolute-Mehrheit-vom-17022013-Die-Highlights-Teil-2--/11741/',
+        'file': '11741.mp4',
+        'md5': '0b49f4844a068f8b33f4b7c88405862b',
+        'info_dict': {
+            "description": "Wer kann in die Fu\u00dfstapfen von Wolfgang Kubicki treten und die Mehrheit der Zuschauer hinter sich versammeln? Wird vielleicht sogar die Absolute Mehrheit geknackt und der Jackpot von 200.000 Euro mit nach Hause genommen?",
+            "title": "Absolute Mehrheit vom 17.02.2013 - Die Highlights, Teil 2",
+        },
    }

    def _real_extract(self, url):
@@ -37,12 +38,11 @@ class MySpassIE(InfoExtractor):
        # extract values from metadata
        url_flv_el = metadata.find('url_flv')
        if url_flv_el is None:
-            raise ExtractorError(u'Unable to extract download url')
+            raise ExtractorError('Unable to extract download url')
        video_url = url_flv_el.text
-        extension = os.path.splitext(video_url)[1][1:]
        title_el = metadata.find('title')
        if title_el is None:
-            raise ExtractorError(u'Unable to extract title')
+            raise ExtractorError('Unable to extract title')
        title = title_el.text
        format_id_el = metadata.find('format_id')
        if format_id_el is None:
@@ -59,13 +59,12 @@ class MySpassIE(InfoExtractor):
            thumbnail = imagePreview_el.text
        else:
            thumbnail = None
-        info = {
+
+        return {
            'id': video_id,
            'url': video_url,
            'title': title,
-            'ext': extension,
            'format': format,
            'thumbnail': thumbnail,
-            'description': description
+            'description': description,
        }
-        return [info]
--- a/youtube_dl/extractor/nba.py
+++ b/youtube_dl/extractor/nba.py
@@ -1,48 +1,39 @@
+from __future__ import unicode_literals
+
 import re

 from .common import InfoExtractor
-from ..utils import (
-    ExtractorError,
-)


 class NBAIE(InfoExtractor):
    _VALID_URL = r'^(?:https?://)?(?:watch\.|www\.)?nba\.com/(?:nba/)?video(/[^?]*?)(?:/index\.html)?(?:\?.*)?$'
    _TEST = {
-        u'url': u'http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html',
-        u'file': u'0021200253-okc-bkn-recap.nba.mp4',
-        u'md5': u'c0edcfc37607344e2ff8f13c378c88a4',
-        u'info_dict': {
-            u"description": u"Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.", 
-            u"title": u"Thunder vs. Nets"
-        }
+        'url': 'http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html',
+        'file': u'0021200253-okc-bkn-recap.nba.mp4',
+        'md5': u'c0edcfc37607344e2ff8f13c378c88a4',
+        'info_dict': {
+            'description': 'Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.',
+            'title': 'Thunder vs. Nets',
+        },
    }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
-        if mobj is None:
-            raise ExtractorError(u'Invalid URL: %s' % url)
-
        video_id = mobj.group(1)

        webpage = self._download_webpage(url, video_id)

-        video_url = u'http://ht-mobile.cdn.turner.com/nba/big' + video_id + '_nba_1280x720.mp4'
+        video_url = 'http://ht-mobile.cdn.turner.com/nba/big' + video_id + '_nba_1280x720.mp4'

        shortened_video_id = video_id.rpartition('/')[2]
        title = self._og_search_title(webpage, default=shortened_video_id).replace('NBA.com: ', '')

-        # It isn't there in the HTML it returns to us
-        # uploader_date = self._html_search_regex(r'<b>Date:</b> (.*?)</div>', webpage, 'upload_date', fatal=False)
-
        description = self._html_search_regex(r'<meta name="description" (?:content|value)="(.*?)" />', webpage, 'description', fatal=False)

-        info = {
+        return {
            'id': shortened_video_id,
            'url': video_url,
            'ext': 'mp4',
            'title': title,
-            # 'uploader_date': uploader_date,
            'description': description,
        }
-        return [info]
--- a/youtube_dl/extractor/ninegag.py
+++ b/youtube_dl/extractor/ninegag.py
@@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
 import json
 import re

@@ -9,13 +11,13 @@ class NineGagIE(InfoExtractor):
    _VALID_URL = r'^https?://(?:www\.)?9gag\.tv/v/(?P<id>[0-9]+)'

    _TEST = {
-        u"url": u"http://9gag.tv/v/1912",
-        u"file": u"1912.mp4",
-        u"info_dict": {
-            u"description": u"This 3-minute video will make you smile and then make you feel untalented and insignificant. Anyway, you should share this awesomeness. (Thanks, Dino!)",
-            u"title": u"\"People Are Awesome 2013\" Is Absolutely Awesome"
+        "url": "http://9gag.tv/v/1912",
+        "file": "1912.mp4",
+        "info_dict": {
+            "description": "This 3-minute video will make you smile and then make you feel untalented and insignificant. Anyway, you should share this awesomeness. (Thanks, Dino!)",
+            "title": "\"People Are Awesome 2013\" Is Absolutely Awesome"
        },
-        u'add_ie': [u'Youtube']
+        'add_ie': ['Youtube']
    }

    def _real_extract(self, url):
@@ -25,7 +27,7 @@ class NineGagIE(InfoExtractor):
        webpage = self._download_webpage(url, video_id)
        data_json = self._html_search_regex(r'''(?x)
            <div\s*id="tv-video"\s*data-video-source="youtube"\s*
-                data-video-meta="([^"]+)"''', webpage, u'video metadata')
+                data-video-meta="([^"]+)"''', webpage, 'video metadata')

        data = json.loads(data_json)

--- a/youtube_dl/extractor/ooyala.py
+++ b/youtube_dl/extractor/ooyala.py
@@ -5,7 +5,7 @@ from .common import InfoExtractor
 from ..utils import unescapeHTML

 class OoyalaIE(InfoExtractor):
-    _VALID_URL = r'https?://.+?\.ooyala\.com/.*?embedCode=(?P<id>.+?)(&|$)'
+    _VALID_URL = r'https?://.+?\.ooyala\.com/.*?(?:embedCode|ec)=(?P<id>.+?)(&|$)'

    _TEST = {
        # From http://it.slashdot.org/story/13/04/25/178216/recovering-data-from-broken-hard-drives-and-ssds-video
--- a/youtube_dl/extractor/pornhd.py
+++ b/youtube_dl/extractor/pornhd.py
@@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
 import re

 from .common import InfoExtractor
@@ -7,12 +9,12 @@ from ..utils import compat_urllib_parse
 class PornHdIE(InfoExtractor):
    _VALID_URL = r'(?:http://)?(?:www\.)?pornhd\.com/(?:[a-z]{2,4}/)?videos/(?P<video_id>[0-9]+)/(?P<video_title>.+)'
    _TEST = {
-        u'url': u'http://www.pornhd.com/videos/1962/sierra-day-gets-his-cum-all-over-herself-hd-porn-video',
-        u'file': u'1962.flv',
-        u'md5': u'35272469887dca97abd30abecc6cdf75',
-        u'info_dict': {
-            u"title": u"sierra-day-gets-his-cum-all-over-herself-hd-porn-video",
-            u"age_limit": 18,
+        'url': 'http://www.pornhd.com/videos/1962/sierra-day-gets-his-cum-all-over-herself-hd-porn-video',
+        'file': '1962.flv',
+        'md5': '35272469887dca97abd30abecc6cdf75',
+        'info_dict': {
+            "title": "sierra-day-gets-his-cum-all-over-herself-hd-porn-video",
+            "age_limit": 18,
        }
    }

@@ -24,9 +26,13 @@ class PornHdIE(InfoExtractor):

        webpage = self._download_webpage(url, video_id)

-        video_url = self._html_search_regex(
-            r'&hd=(http.+?)&', webpage, u'video URL')
-        video_url = compat_urllib_parse.unquote(video_url)
+        next_url = self._html_search_regex(
+            r'&hd=(http.+?)&', webpage, 'video URL')
+        next_url = compat_urllib_parse.unquote(next_url)
+
+        video_url = self._download_webpage(
+            next_url, video_id, note='Retrieving video URL',
+            errnote='Could not retrieve video URL')
        age_limit = 18

        return {
--- a/youtube_dl/extractor/rbmaradio.py
+++ b/youtube_dl/extractor/rbmaradio.py
@@ -1,3 +1,6 @@
+# encoding: utf-8
+from __future__ import unicode_literals
+
 import json
 import re

@@ -12,16 +15,16 @@ from ..utils import (
 class RBMARadioIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?rbmaradio\.com/shows/(?P<videoID>[^/]+)$'
    _TEST = {
-        u'url': u'http://www.rbmaradio.com/shows/ford-lopatin-live-at-primavera-sound-2011',
-        u'file': u'ford-lopatin-live-at-primavera-sound-2011.mp3',
-        u'md5': u'6bc6f9bcb18994b4c983bc3bf4384d95',
-        u'info_dict': {
-            u"uploader_id": u"ford-lopatin", 
-            u"location": u"Spain", 
-            u"description": u"Joel Ford and Daniel \u2019Oneohtrix Point Never\u2019 Lopatin fly their midified pop extravaganza to Spain. Live at Primavera Sound 2011.", 
-            u"uploader": u"Ford & Lopatin", 
-            u"title": u"Live at Primavera Sound 2011"
-        }
+        'url': 'http://www.rbmaradio.com/shows/ford-lopatin-live-at-primavera-sound-2011',
+        'file': 'ford-lopatin-live-at-primavera-sound-2011.mp3',
+        'md5': '6bc6f9bcb18994b4c983bc3bf4384d95',
+        'info_dict': {
+            "uploader_id": "ford-lopatin",
+            "location": "Spain",
+            "description": "Joel Ford and Daniel ’Oneohtrix Point Never’ Lopatin fly their midified pop extravaganza to Spain. Live at Primavera Sound 2011.",
+            "uploader": "Ford & Lopatin",
+            "title": "Live at Primavera Sound 2011",
+        },
    }

    def _real_extract(self, url):
@@ -31,26 +34,24 @@ class RBMARadioIE(InfoExtractor):
        webpage = self._download_webpage(url, video_id)

        json_data = self._search_regex(r'window\.gon.*?gon\.show=(.+?);$',
-            webpage, u'json data', flags=re.MULTILINE)
+            webpage, 'json data', flags=re.MULTILINE)

        try:
            data = json.loads(json_data)
        except ValueError as e:
-            raise ExtractorError(u'Invalid JSON: ' + str(e))
+            raise ExtractorError('Invalid JSON: ' + str(e))

        video_url = data['akamai_url'] + '&cbr=256'
        url_parts = compat_urllib_parse_urlparse(video_url)
-        video_ext = url_parts.path.rpartition('.')[2]
-        info = {
-                'id': video_id,
-                'url': video_url,
-                'ext': video_ext,
-                'title': data['title'],
-                'description': data.get('teaser_text'),
-                'location': data.get('country_of_origin'),
-                'uploader': data.get('host', {}).get('name'),
-                'uploader_id': data.get('host', {}).get('slug'),
-                'thumbnail': data.get('image', {}).get('large_url_2x'),
-                'duration': data.get('duration'),
+
+        return {
+            'id': video_id,
+            'url': video_url,
+            'title': data['title'],
+            'description': data.get('teaser_text'),
+            'location': data.get('country_of_origin'),
+            'uploader': data.get('host', {}).get('name'),
+            'uploader_id': data.get('host', {}).get('slug'),
+            'thumbnail': data.get('image', {}).get('large_url_2x'),
+            'duration': data.get('duration'),
        }
-        return [info]
--- a/youtube_dl/extractor/rutube.py
+++ b/youtube_dl/extractor/rutube.py
@@ -1,58 +1,124 @@
 # encoding: utf-8
+from __future__ import unicode_literals
+
 import re
 import json
+import itertools

 from .common import InfoExtractor
 from ..utils import (
-    compat_urlparse,
    compat_str,
+    unified_strdate,
    ExtractorError,
 )


 class RutubeIE(InfoExtractor):
-    _VALID_URL = r'https?://rutube\.ru/video/(?P<long_id>\w+)'
+    IE_NAME = 'rutube'
+    IE_DESC = 'Rutube videos'
+    _VALID_URL = r'https?://rutube\.ru/video/(?P<id>[\da-z]{32})'

    _TEST = {
-        u'url': u'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/',
-        u'file': u'3eac3b4561676c17df9132a9a1e62e3e.mp4',
-        u'info_dict': {
-            u'title': u'Раненный кенгуру забежал в аптеку',
-            u'uploader': u'NTDRussian',
-            u'uploader_id': u'29790',
+        'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/',
+        'file': '3eac3b4561676c17df9132a9a1e62e3e.mp4',
+        'info_dict': {
+            'title': 'Раненный кенгуру забежал в аптеку',
+            'description': 'http://www.ntdtv.ru ',
+            'duration': 80,
+            'uploader': 'NTDRussian',
+            'uploader_id': '29790',
+            'upload_date': '20131016',
        },
-        u'params': {
+        'params': {
            # It requires ffmpeg (m3u8 download)
-            u'skip_download': True,
+            'skip_download': True,
        },
    }

-    def _get_api_response(self, short_id, subpath):
-        api_url = 'http://rutube.ru/api/play/%s/%s/?format=json' % (subpath, short_id)
-        response_json = self._download_webpage(api_url, short_id,
-            u'Downloading %s json' % subpath)
-        return json.loads(response_json)
-
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
-        long_id = mobj.group('long_id')
-        webpage = self._download_webpage(url, long_id)
-        og_video = self._og_search_video_url(webpage)
-        short_id = compat_urlparse.urlparse(og_video).path[1:]
-        options = self._get_api_response(short_id, 'options')
-        trackinfo = self._get_api_response(short_id, 'trackinfo')
+        video_id = mobj.group('id')
+        
+        api_response = self._download_webpage('http://rutube.ru/api/video/%s/?format=json' % video_id,
+                                              video_id, 'Downloading video JSON')
+        video = json.loads(api_response)
+        
+        api_response = self._download_webpage('http://rutube.ru/api/play/trackinfo/%s/?format=json' % video_id,
+                                              video_id, 'Downloading trackinfo JSON')
+        trackinfo = json.loads(api_response)
+        
        # Some videos don't have the author field
        author = trackinfo.get('author') or {}
        m3u8_url = trackinfo['video_balancer'].get('m3u8')
        if m3u8_url is None:
-            raise ExtractorError(u'Couldn\'t find m3u8 manifest url')
+            raise ExtractorError('Couldn\'t find m3u8 manifest url')

        return {
-            'id': trackinfo['id'],
-            'title': trackinfo['title'],
+            'id': video['id'],
+            'title': video['title'],
+            'description': video['description'],
+            'duration': video['duration'],
+            'view_count': video['hits'],
            'url': m3u8_url,
            'ext': 'mp4',
-            'thumbnail': options['thumbnail_url'],
+            'thumbnail': video['thumbnail_url'],
            'uploader': author.get('name'),
            'uploader_id': compat_str(author['id']) if author else None,
+            'upload_date': unified_strdate(video['created_ts']),
+            'age_limit': 18 if video['is_adult'] else 0,
        }
+
+
+class RutubeChannelIE(InfoExtractor):
+    IE_NAME = 'rutube:channel'
+    IE_DESC = 'Rutube channels'
+    _VALID_URL = r'http://rutube\.ru/tags/video/(?P<id>\d+)'
+
+    _PAGE_TEMPLATE = 'http://rutube.ru/api/tags/video/%s/?page=%s&format=json'
+
+    def _extract_videos(self, channel_id, channel_title=None):
+        entries = []
+        for pagenum in itertools.count(1):
+            api_response = self._download_webpage(
+                self._PAGE_TEMPLATE % (channel_id, pagenum),
+                channel_id, 'Downloading page %s' % pagenum)
+            page = json.loads(api_response)
+            results = page['results']
+            if not results:
+                break
+            entries.extend(self.url_result(result['video_url'], 'Rutube') for result in results)
+            if not page['has_next']:
+                break
+        return self.playlist_result(entries, channel_id, channel_title)
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        channel_id = mobj.group('id')
+        return self._extract_videos(channel_id)
+
+
+class RutubeMovieIE(RutubeChannelIE):
+    IE_NAME = 'rutube:movie'
+    IE_DESC = 'Rutube movies'
+    _VALID_URL = r'http://rutube\.ru/metainfo/tv/(?P<id>\d+)'
+
+    _MOVIE_TEMPLATE = 'http://rutube.ru/api/metainfo/tv/%s/?format=json'
+    _PAGE_TEMPLATE = 'http://rutube.ru/api/metainfo/tv/%s/video?page=%s&format=json'
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        movie_id = mobj.group('id')
+        api_response = self._download_webpage(
+            self._MOVIE_TEMPLATE % movie_id, movie_id,
+            'Downloading movie JSON')
+        movie = json.loads(api_response)
+        movie_name = movie['name']
+        return self._extract_videos(movie_id, movie_name)
+
+
+class RutubePersonIE(RutubeChannelIE):
+    IE_NAME = 'rutube:person'
+    IE_DESC = 'Rutube person videos'
+    _VALID_URL = r'http://rutube\.ru/video/person/(?P<id>\d+)'
+
+    _PAGE_TEMPLATE = 'http://rutube.ru/api/video/person/%s/?page=%s&format=json'
--- a/youtube_dl/extractor/smotri.py
+++ b/youtube_dl/extractor/smotri.py
@@ -1,4 +1,5 @@
 # encoding: utf-8
+from __future__ import unicode_literals

 import os.path
 import re
@@ -16,76 +17,76 @@ from ..utils import (


 class SmotriIE(InfoExtractor):
-    IE_DESC = u'Smotri.com'
-    IE_NAME = u'smotri'
+    IE_DESC = 'Smotri.com'
+    IE_NAME = 'smotri'
    _VALID_URL = r'^https?://(?:www\.)?(?P<url>smotri\.com/video/view/\?id=(?P<videoid>v(?P<realvideoid>[0-9]+)[a-z0-9]{4}))'

    _TESTS = [
        # real video id 2610366
        {
-            u'url': u'http://smotri.com/video/view/?id=v261036632ab',
-            u'file': u'v261036632ab.mp4',
-            u'md5': u'2a7b08249e6f5636557579c368040eb9',
-            u'info_dict': {
-                u'title': u'катастрофа с камер видеонаблюдения',
-                u'uploader': u'rbc2008',
-                u'uploader_id': u'rbc08',
-                u'upload_date': u'20131118',
-                u'description': u'катастрофа с камер видеонаблюдения, видео катастрофа с камер видеонаблюдения',
-                u'thumbnail': u'http://frame6.loadup.ru/8b/a9/2610366.3.3.jpg',
+            'url': 'http://smotri.com/video/view/?id=v261036632ab',
+            'file': 'v261036632ab.mp4',
+            'md5': '2a7b08249e6f5636557579c368040eb9',
+            'info_dict': {
+                'title': 'катастрофа с камер видеонаблюдения',
+                'uploader': 'rbc2008',
+                'uploader_id': 'rbc08',
+                'upload_date': '20131118',
+                'description': 'катастрофа с камер видеонаблюдения, видео катастрофа с камер видеонаблюдения',
+                'thumbnail': 'http://frame6.loadup.ru/8b/a9/2610366.3.3.jpg',
            },
        },
        # real video id 57591
        {
-            u'url': u'http://smotri.com/video/view/?id=v57591cb20',
-            u'file': u'v57591cb20.flv',
-            u'md5': u'830266dfc21f077eac5afd1883091bcd',
-            u'info_dict': {
-                u'title': u'test',
-                u'uploader': u'Support Photofile@photofile',
-                u'uploader_id': u'support-photofile',
-                u'upload_date': u'20070704',
-                u'description': u'test, видео test',
-                u'thumbnail': u'http://frame4.loadup.ru/03/ed/57591.2.3.jpg',
+            'url': 'http://smotri.com/video/view/?id=v57591cb20',
+            'file': 'v57591cb20.flv',
+            'md5': '830266dfc21f077eac5afd1883091bcd',
+            'info_dict': {
+                'title': 'test',
+                'uploader': 'Support Photofile@photofile',
+                'uploader_id': 'support-photofile',
+                'upload_date': '20070704',
+                'description': 'test, видео test',
+                'thumbnail': 'http://frame4.loadup.ru/03/ed/57591.2.3.jpg',
            },
        },
        # video-password
        {
-            u'url': u'http://smotri.com/video/view/?id=v1390466a13c',
-            u'file': u'v1390466a13c.mp4',
-            u'md5': u'f6331cef33cad65a0815ee482a54440b',
-            u'info_dict': {
-                u'title': u'TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1',
-                u'uploader': u'timoxa40',
-                u'uploader_id': u'timoxa40',
-                u'upload_date': u'20100404',
-                u'thumbnail': u'http://frame7.loadup.ru/af/3f/1390466.3.3.jpg',
-                u'description': u'TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1, видео TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1',
+            'url': 'http://smotri.com/video/view/?id=v1390466a13c',
+            'file': 'v1390466a13c.mp4',
+            'md5': 'f6331cef33cad65a0815ee482a54440b',
+            'info_dict': {
+                'title': 'TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1',
+                'uploader': 'timoxa40',
+                'uploader_id': 'timoxa40',
+                'upload_date': '20100404',
+                'thumbnail': 'http://frame7.loadup.ru/af/3f/1390466.3.3.jpg',
+                'description': 'TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1, видео TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1',
            },
-            u'params': {
-                u'videopassword': u'qwerty',
+            'params': {
+                'videopassword': 'qwerty',
            },
        },
        # age limit + video-password
        {
-            u'url': u'http://smotri.com/video/view/?id=v15408898bcf',
-            u'file': u'v15408898bcf.flv',
-            u'md5': u'91e909c9f0521adf5ee86fbe073aad70',
-            u'info_dict': {
-                u'title': u'этот ролик не покажут по ТВ',
-                u'uploader': u'zzxxx',
-                u'uploader_id': u'ueggb',
-                u'upload_date': u'20101001',
-                u'thumbnail': u'http://frame3.loadup.ru/75/75/1540889.1.3.jpg',
-                u'age_limit': 18,
-                u'description': u'этот ролик не покажут по ТВ, видео этот ролик не покажут по ТВ',
+            'url': 'http://smotri.com/video/view/?id=v15408898bcf',
+            'file': 'v15408898bcf.flv',
+            'md5': '91e909c9f0521adf5ee86fbe073aad70',
+            'info_dict': {
+                'title': 'этот ролик не покажут по ТВ',
+                'uploader': 'zzxxx',
+                'uploader_id': 'ueggb',
+                'upload_date': '20101001',
+                'thumbnail': 'http://frame3.loadup.ru/75/75/1540889.1.3.jpg',
+                'age_limit': 18,
+                'description': 'этот ролик не покажут по ТВ, видео этот ролик не покажут по ТВ',
            },
-            u'params': {
-                u'videopassword': u'333'
+            'params': {
+                'videopassword': '333'
            }
        }
    ]
-    
+
    _SUCCESS = 0
    _PASSWORD_NOT_VERIFIED = 1
    _PASSWORD_DETECTED = 2
@@ -106,71 +107,71 @@ class SmotriIE(InfoExtractor):

        # Download video JSON data
        video_json_url = 'http://smotri.com/vt.php?id=%s' % real_video_id
-        video_json_page = self._download_webpage(video_json_url, video_id, u'Downloading video JSON')
+        video_json_page = self._download_webpage(video_json_url, video_id, 'Downloading video JSON')
        video_json = json.loads(video_json_page)
-        
+
        status = video_json['status']
        if status == self._VIDEO_NOT_FOUND:
-            raise ExtractorError(u'Video %s does not exist' % video_id, expected=True)
-        elif status == self._PASSWORD_DETECTED:  # The video is protected by a password, retry with
+            raise ExtractorError('Video %s does not exist' % video_id, expected=True)
+        elif status == self._PASSWORD_DETECTED: # The video is protected by a password, retry with
                                                # video-password set
            video_password = self._downloader.params.get('videopassword', None)
            if not video_password:
-                raise ExtractorError(u'This video is protected by a password, use the --video-password option', expected=True)
+                raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True)
            video_json_url += '&md5pass=%s' % hashlib.md5(video_password.encode('utf-8')).hexdigest()
-            video_json_page = self._download_webpage(video_json_url, video_id, u'Downloading video JSON (video-password set)')
+            video_json_page = self._download_webpage(video_json_url, video_id, 'Downloading video JSON (video-password set)')
            video_json = json.loads(video_json_page)
            status = video_json['status']
            if status == self._PASSWORD_NOT_VERIFIED:
-                raise ExtractorError(u'Video password is invalid', expected=True)
-        
+                raise ExtractorError('Video password is invalid', expected=True)
+
        if status != self._SUCCESS:
-            raise ExtractorError(u'Unexpected status value %s' % status)
-        
+            raise ExtractorError('Unexpected status value %s' % status)
+
        # Extract the URL of the video
        video_url = video_json['file_data']
-        
+
        # Video JSON does not provide enough meta data
        # We will extract some from the video web page instead
        video_page_url = 'http://' + mobj.group('url')
-        video_page = self._download_webpage(video_page_url, video_id, u'Downloading video page')
+        video_page = self._download_webpage(video_page_url, video_id, 'Downloading video page')

        # Warning if video is unavailable
        warning = self._html_search_regex(
            r'<div class="videoUnModer">(.*?)</div>', video_page,
-            u'warning message', default=None)
+            'warning message', default=None)
        if warning is not None:
            self._downloader.report_warning(
-                u'Video %s may not be available; smotri said: %s ' %
+                'Video %s may not be available; smotri said: %s ' %
                (video_id, warning))

        # Adult content
-        if re.search(u'EroConfirmText">', video_page) is not None:
+        if re.search('EroConfirmText">', video_page) is not None:
            self.report_age_confirmation()
            confirm_string = self._html_search_regex(
                r'<a href="/video/view/\?id=%s&confirm=([^"]+)" title="[^"]+">' % video_id,
-                video_page, u'confirm string')
+                video_page, 'confirm string')
            confirm_url = video_page_url + '&confirm=%s' % confirm_string
-            video_page = self._download_webpage(confirm_url, video_id, u'Downloading video page (age confirmed)')
+            video_page = self._download_webpage(confirm_url, video_id, 'Downloading video page (age confirmed)')
            adult_content = True
        else:
            adult_content = False
-        
+
        # Extract the rest of meta data
-        video_title = self._search_meta(u'name', video_page, u'title')
+        video_title = self._search_meta('name', video_page, 'title')
        if not video_title:
            video_title = os.path.splitext(url_basename(video_url))[0]

-        video_description = self._search_meta(u'description', video_page)
-        END_TEXT = u' на сайте Smotri.com'
+        video_description = self._search_meta('description', video_page)
+        END_TEXT = ' на сайте Smotri.com'
        if video_description and video_description.endswith(END_TEXT):
            video_description = video_description[:-len(END_TEXT)]
-        START_TEXT = u'Смотреть онлайн ролик '
+        START_TEXT = 'Смотреть онлайн ролик '
        if video_description and video_description.startswith(START_TEXT):
            video_description = video_description[len(START_TEXT):]
-        video_thumbnail = self._search_meta(u'thumbnail', video_page)
+        video_thumbnail = self._search_meta('thumbnail', video_page)

-        upload_date_str = self._search_meta(u'uploadDate', video_page, u'upload date')
+        upload_date_str = self._search_meta('uploadDate', video_page, 'upload date')
        if upload_date_str:
            upload_date_m = re.search(r'(?P<year>\d{4})\.(?P<month>\d{2})\.(?P<day>\d{2})T', upload_date_str)
            video_upload_date = (
@@ -183,8 +184,8 @@ class SmotriIE(InfoExtractor):
            )
        else:
            video_upload_date = None
-        
-        duration_str = self._search_meta(u'duration', video_page)
+
+        duration_str = self._search_meta('duration', video_page)
        if duration_str:
            duration_m = re.search(r'T(?P<hours>[0-9]{2})H(?P<minutes>[0-9]{2})M(?P<seconds>[0-9]{2})S', duration_str)
            video_duration = (
@@ -197,19 +198,19 @@ class SmotriIE(InfoExtractor):
            )
        else:
            video_duration = None
-        
+
        video_uploader = self._html_search_regex(
-            u'<div class="DescrUser"><div>Автор.*?onmouseover="popup_user_info[^"]+">(.*?)</a>',
-            video_page, u'uploader', fatal=False, flags=re.MULTILINE|re.DOTALL)
-        
+            '<div class="DescrUser"><div>Автор.*?onmouseover="popup_user_info[^"]+">(.*?)</a>',
+            video_page, 'uploader', fatal=False, flags=re.MULTILINE|re.DOTALL)
+
        video_uploader_id = self._html_search_regex(
-            u'<div class="DescrUser"><div>Автор.*?onmouseover="popup_user_info\\(.*?\'([^\']+)\'\\);">',
-            video_page, u'uploader id', fatal=False, flags=re.MULTILINE|re.DOTALL)
-        
+            '<div class="DescrUser"><div>Автор.*?onmouseover="popup_user_info\\(.*?\'([^\']+)\'\\);">',
+            video_page, 'uploader id', fatal=False, flags=re.MULTILINE|re.DOTALL)
+
        video_view_count = self._html_search_regex(
-            u'Общее количество просмотров.*?<span class="Number">(\\d+)</span>',
-            video_page, u'view count', fatal=False, flags=re.MULTILINE|re.DOTALL)
-                
+            'Общее количество просмотров.*?<span class="Number">(\\d+)</span>',
+            video_page, 'view count', fatal=False, flags=re.MULTILINE|re.DOTALL)
+
        return {
            'id': video_id,
            'url': video_url,
@@ -227,8 +228,8 @@ class SmotriIE(InfoExtractor):


 class SmotriCommunityIE(InfoExtractor):
-    IE_DESC = u'Smotri.com community videos'
-    IE_NAME = u'smotri:community'
+    IE_DESC = 'Smotri.com community videos'
+    IE_NAME = 'smotri:community'
    _VALID_URL = r'^https?://(?:www\.)?smotri\.com/community/video/(?P<communityid>[0-9A-Za-z_\'-]+)'
    
    def _real_extract(self, url):
@@ -236,21 +237,21 @@ class SmotriCommunityIE(InfoExtractor):
        community_id = mobj.group('communityid')

        url = 'http://smotri.com/export/rss/video/by/community/-/%s/video.xml' % community_id
-        rss = self._download_xml(url, community_id, u'Downloading community RSS')
+        rss = self._download_xml(url, community_id, 'Downloading community RSS')

        entries = [self.url_result(video_url.text, 'Smotri')
                   for video_url in rss.findall('./channel/item/link')]

        description_text = rss.find('./channel/description').text
        community_title = self._html_search_regex(
-            u'^Видео сообщества "([^"]+)"$', description_text, u'community title')
+            '^Видео сообщества "([^"]+)"$', description_text, 'community title')

        return self.playlist_result(entries, community_id, community_title)


 class SmotriUserIE(InfoExtractor):
-    IE_DESC = u'Smotri.com user videos'
-    IE_NAME = u'smotri:user'
+    IE_DESC = 'Smotri.com user videos'
+    IE_NAME = 'smotri:user'
    _VALID_URL = r'^https?://(?:www\.)?smotri\.com/user/(?P<userid>[0-9A-Za-z_\'-]+)'

    def _real_extract(self, url):
@@ -258,22 +259,22 @@ class SmotriUserIE(InfoExtractor):
        user_id = mobj.group('userid')

        url = 'http://smotri.com/export/rss/user/video/-/%s/video.xml' % user_id
-        rss = self._download_xml(url, user_id, u'Downloading user RSS')
+        rss = self._download_xml(url, user_id, 'Downloading user RSS')

        entries = [self.url_result(video_url.text, 'Smotri')
                   for video_url in rss.findall('./channel/item/link')]

        description_text = rss.find('./channel/description').text
        user_nickname = self._html_search_regex(
-            u'^Видео режиссера (.*)$', description_text,
-            u'user nickname')
+            '^Видео режиссера (.*)$', description_text,
+            'user nickname')

        return self.playlist_result(entries, user_id, user_nickname)


 class SmotriBroadcastIE(InfoExtractor):
-    IE_DESC = u'Smotri.com broadcasts'
-    IE_NAME = u'smotri:broadcast'
+    IE_DESC = 'Smotri.com broadcasts'
+    IE_NAME = 'smotri:broadcast'
    _VALID_URL = r'^https?://(?:www\.)?(?P<url>smotri\.com/live/(?P<broadcastid>[^/]+))/?.*'

    def _real_extract(self, url):
@@ -281,46 +282,40 @@ class SmotriBroadcastIE(InfoExtractor):
        broadcast_id = mobj.group('broadcastid')

        broadcast_url = 'http://' + mobj.group('url')
-        broadcast_page = self._download_webpage(broadcast_url, broadcast_id, u'Downloading broadcast page')
+        broadcast_page = self._download_webpage(broadcast_url, broadcast_id, 'Downloading broadcast page')

-        if re.search(u'>Режиссер с логином <br/>"%s"<br/> <span>не существует<' % broadcast_id, broadcast_page) is not None:
-            raise ExtractorError(u'Broadcast %s does not exist' % broadcast_id, expected=True)
+        if re.search('>Режиссер с логином <br/>"%s"<br/> <span>не существует<' % broadcast_id, broadcast_page) is not None:
+            raise ExtractorError('Broadcast %s does not exist' % broadcast_id, expected=True)

        # Adult content
-        if re.search(u'EroConfirmText">', broadcast_page) is not None:
+        if re.search('EroConfirmText">', broadcast_page) is not None:

            (username, password) = self._get_login_info()
            if username is None:
-                raise ExtractorError(u'Erotic broadcasts allowed only for registered users, '
-                    u'use --username and --password options to provide account credentials.', expected=True)
+                raise ExtractorError('Erotic broadcasts allowed only for registered users, '
+                    'use --username and --password options to provide account credentials.', expected=True)

-            # Log in
-            login_form_strs = {
-                u'login-hint53': '1',
-                u'confirm_erotic': '1',
-                u'login': username,
-                u'password': password,
+            login_form = {
+                'login-hint53': '1',
+                'confirm_erotic': '1',
+                'login': username,
+                'password': password,
            }
-            # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
-            # chokes on unicode
-            login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
-            login_data = compat_urllib_parse.urlencode(login_form).encode('utf-8')
-            login_url = broadcast_url + '/?no_redirect=1'
-            request = compat_urllib_request.Request(login_url, login_data)
-            request.add_header('Content-Type', 'application/x-www-form-urlencoded')
-            broadcast_page = self._download_webpage(
-                request, broadcast_id, note=u'Logging in and confirming age')

-            if re.search(u'>Неверный логин или пароль<', broadcast_page) is not None:
-                raise ExtractorError(u'Unable to log in: bad username or password', expected=True)
+            request = compat_urllib_request.Request(broadcast_url + '/?no_redirect=1', compat_urllib_parse.urlencode(login_form))
+            request.add_header('Content-Type', 'application/x-www-form-urlencoded')
+            broadcast_page = self._download_webpage(request, broadcast_id, 'Logging in and confirming age')
+
+            if re.search('>Неверный логин или пароль<', broadcast_page) is not None:
+                raise ExtractorError('Unable to log in: bad username or password', expected=True)

            adult_content = True
        else:
            adult_content = False

        ticket = self._html_search_regex(
-            u'window\.broadcast_control\.addFlashVar\\(\'file\', \'([^\']+)\'\\);',
-            broadcast_page, u'broadcast ticket')
+            'window\.broadcast_control\.addFlashVar\\(\'file\', \'([^\']+)\'\\);',
+            broadcast_page, 'broadcast ticket')

        url = 'http://smotri.com/broadcast/view/url/?ticket=%s' % ticket

@@ -328,22 +323,22 @@ class SmotriBroadcastIE(InfoExtractor):
        if broadcast_password:
            url += '&pass=%s' % hashlib.md5(broadcast_password.encode('utf-8')).hexdigest()

-        broadcast_json_page = self._download_webpage(url, broadcast_id, u'Downloading broadcast JSON')
+        broadcast_json_page = self._download_webpage(url, broadcast_id, 'Downloading broadcast JSON')

        try:
            broadcast_json = json.loads(broadcast_json_page)

            protected_broadcast = broadcast_json['_pass_protected'] == 1
            if protected_broadcast and not broadcast_password:
-                raise ExtractorError(u'This broadcast is protected by a password, use the --video-password option', expected=True)
+                raise ExtractorError('This broadcast is protected by a password, use the --video-password option', expected=True)

            broadcast_offline = broadcast_json['is_play'] == 0
            if broadcast_offline:
-                raise ExtractorError(u'Broadcast %s is offline' % broadcast_id, expected=True)
+                raise ExtractorError('Broadcast %s is offline' % broadcast_id, expected=True)

            rtmp_url = broadcast_json['_server']
            if not rtmp_url.startswith('rtmp://'):
-                raise ExtractorError(u'Unexpected broadcast rtmp URL')
+                raise ExtractorError('Unexpected broadcast rtmp URL')

            broadcast_playpath = broadcast_json['_streamName']
            broadcast_thumbnail = broadcast_json['_imgURL']
@@ -354,8 +349,8 @@ class SmotriBroadcastIE(InfoExtractor):
            rtmp_conn = 'S:%s' % uuid.uuid4().hex
        except KeyError:
            if protected_broadcast:
-                raise ExtractorError(u'Bad broadcast password', expected=True)
-            raise ExtractorError(u'Unexpected broadcast JSON')
+                raise ExtractorError('Bad broadcast password', expected=True)
+            raise ExtractorError('Unexpected broadcast JSON')

        return {
            'id': broadcast_id,
--- a/youtube_dl/extractor/tumblr.py
+++ b/youtube_dl/extractor/tumblr.py
@@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
 import re

 from .common import InfoExtractor
@@ -7,13 +9,13 @@ from ..utils import (


 class TumblrIE(InfoExtractor):
-    _VALID_URL = r'http://(?P<blog_name>.*?)\.tumblr\.com/((post)|(video))/(?P<id>\d*)/(.*?)'
+    _VALID_URL = r'http://(?P<blog_name>.*?)\.tumblr\.com/((post)|(video))/(?P<id>\d*)($|/)'
    _TEST = {
-        u'url': u'http://tatianamaslanydaily.tumblr.com/post/54196191430/orphan-black-dvd-extra-behind-the-scenes',
-        u'file': u'54196191430.mp4',
-        u'md5': u'479bb068e5b16462f5176a6828829767',
-        u'info_dict': {
-            u"title": u"tatiana maslany news"
+        'url': 'http://tatianamaslanydaily.tumblr.com/post/54196191430/orphan-black-dvd-extra-behind-the-scenes',
+        'file': '54196191430.mp4',
+        'md5': '479bb068e5b16462f5176a6828829767',
+        'info_dict': {
+            "title": "tatiana maslany news"
        }
    }

@@ -28,18 +30,20 @@ class TumblrIE(InfoExtractor):
        re_video = r'src=\\x22(?P<video_url>http://%s\.tumblr\.com/video_file/%s/(.*?))\\x22 type=\\x22video/(?P<ext>.*?)\\x22' % (blog, video_id)
        video = re.search(re_video, webpage)
        if video is None:
-           raise ExtractorError(u'Unable to extract video')
+            raise ExtractorError('Unable to extract video')
        video_url = video.group('video_url')
        ext = video.group('ext')

-        video_thumbnail = self._search_regex(r'posters(.*?)\[\\x22(?P<thumb>.*?)\\x22',
-            webpage, u'thumbnail', fatal=False)  # We pick the first poster
-        if video_thumbnail: video_thumbnail = video_thumbnail.replace('\\', '')
+        video_thumbnail = self._search_regex(
+            r'posters.*?\[\\x22(.*?)\\x22',
+            webpage, 'thumbnail', fatal=False)  # We pick the first poster
+        if video_thumbnail:
+            video_thumbnail = video_thumbnail.replace('\\\\/', '/')

        # The only place where you can get a title, it's not complete,
        # but searching in other places doesn't work for all videos
        video_title = self._html_search_regex(r'<title>(?P<title>.*?)(?: \| Tumblr)?</title>',
-            webpage, u'title', flags=re.DOTALL)
+            webpage, 'title', flags=re.DOTALL)

        return [{'id': video_id,
                 'url': video_url,
--- a/youtube_dl/extractor/tutv.py
+++ b/youtube_dl/extractor/tutv.py
@@ -1,3 +1,4 @@
+from __future__ import unicode_literals
 import base64
 import re

@@ -6,15 +7,16 @@ from ..utils import (
    compat_parse_qs,
 )

+
 class TutvIE(InfoExtractor):
-    _VALID_URL=r'https?://(?:www\.)?tu\.tv/videos/(?P<id>[^/?]+)'
+    _VALID_URL = r'https?://(?:www\.)?tu\.tv/videos/(?P<id>[^/?]+)'
    _TEST = {
-        u'url': u'http://tu.tv/videos/noah-en-pabellon-cuahutemoc',
-        u'file': u'2742556.flv',
-        u'md5': u'5eb766671f69b82e528dc1e7769c5cb2',
-        u'info_dict': {
-            u"title": u"Noah en pabellon cuahutemoc"
-        }
+        'url': 'http://tu.tv/videos/noah-en-pabellon-cuahutemoc',
+        'file': '2742556.flv',
+        'md5': '5eb766671f69b82e528dc1e7769c5cb2',
+        'info_dict': {
+            'title': 'Noah en pabellon cuahutemoc',
+        },
    }

    def _real_extract(self, url):
@@ -22,18 +24,15 @@ class TutvIE(InfoExtractor):
        video_id = mobj.group('id')

        webpage = self._download_webpage(url, video_id)
-        internal_id = self._search_regex(r'codVideo=([0-9]+)', webpage, u'internal video ID')
+        internal_id = self._search_regex(r'codVideo=([0-9]+)', webpage, 'internal video ID')

-        data_url = u'http://tu.tv/flvurl.php?codVideo=' + str(internal_id)
-        data_content = self._download_webpage(data_url, video_id, note=u'Downloading video info')
+        data_url = 'http://tu.tv/flvurl.php?codVideo=' + str(internal_id)
+        data_content = self._download_webpage(data_url, video_id, note='Downloading video info')
        data = compat_parse_qs(data_content)
        video_url = base64.b64decode(data['kpt'][0]).decode('utf-8')
-        ext = video_url.partition(u'?')[0].rpartition(u'.')[2]

-        info = {
+        return {
            'id': internal_id,
            'url': video_url,
-            'ext': ext,
            'title': self._og_search_title(webpage),
        }
-        return [info]
--- a/youtube_dl/extractor/youjizz.py
+++ b/youtube_dl/extractor/youjizz.py
@@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
 import re

 from .common import InfoExtractor
@@ -9,12 +11,12 @@ from ..utils import (
 class YouJizzIE(InfoExtractor):
    _VALID_URL = r'^(?:https?://)?(?:\w+\.)?youjizz\.com/videos/(?P<videoid>[^.]+)\.html$'
    _TEST = {
-        u'url': u'http://www.youjizz.com/videos/zeichentrick-1-2189178.html',
-        u'file': u'2189178.flv',
-        u'md5': u'07e15fa469ba384c7693fd246905547c',
-        u'info_dict': {
-            u"title": u"Zeichentrick 1",
-            u"age_limit": 18,
+        'url': 'http://www.youjizz.com/videos/zeichentrick-1-2189178.html',
+        'file': '2189178.flv',
+        'md5': '07e15fa469ba384c7693fd246905547c',
+        'info_dict': {
+            "title": "Zeichentrick 1",
+            "age_limit": 18,
        }
    }

@@ -30,12 +32,12 @@ class YouJizzIE(InfoExtractor):

        # Get the video title
        video_title = self._html_search_regex(r'<title>(?P<title>.*)</title>',
-            webpage, u'title').strip()
+            webpage, 'title').strip()

        # Get the embed page
        result = re.search(r'https?://www.youjizz.com/videos/embed/(?P<videoid>[0-9]+)', webpage)
        if result is None:
-            raise ExtractorError(u'ERROR: unable to extract embed page')
+            raise ExtractorError('ERROR: unable to extract embed page')

        embed_page_url = result.group(0).strip()
        video_id = result.group('videoid')
@@ -47,23 +49,23 @@ class YouJizzIE(InfoExtractor):
        if m_playlist is not None:
            playlist_url = m_playlist.group('playlist')
            playlist_page = self._download_webpage(playlist_url, video_id,
-                                                   u'Downloading playlist page')
+                                                   'Downloading playlist page')
            m_levels = list(re.finditer(r'<level bitrate="(\d+?)" file="(.*?)"', playlist_page))
            if len(m_levels) == 0:
-                raise ExtractorError(u'Unable to extract video url')
+                raise ExtractorError('Unable to extract video url')
            videos = [(int(m.group(1)), m.group(2)) for m in m_levels]
            (_, video_url) = sorted(videos)[0]
            video_url = video_url.replace('%252F', '%2F')
        else:
            video_url = self._search_regex(r'so.addVariable\("file",encodeURIComponent\("(?P<source>[^"]+)"\)\);',
-                                           webpage, u'video URL')
+                                           webpage, 'video URL')

-        info = {'id': video_id,
-                'url': video_url,
-                'title': video_title,
-                'ext': 'flv',
-                'format': 'flv',
-                'player_url': embed_page_url,
-                'age_limit': age_limit}
-
-        return [info]
+        return {
+            'id': video_id,
+            'url': video_url,
+            'title': video_title,
+            'ext': 'flv',
+            'format': 'flv',
+            'player_url': embed_page_url,
+            'age_limit': age_limit,
+        }
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1662,7 +1662,7 @@ class YoutubeUserIE(InfoExtractor):
                    '_type': 'url',
                    'url': video_id,
                    'ie_key': 'Youtube',
-                    'id': 'video_id',
+                    'id': video_id,
                    'title': title,
                }
        url_results = PagedList(download_page, self._GDATA_PAGE_SIZE)
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@

-__version__ = '2014.01.27'
+__version__ = '2014.01.30'
Author	SHA1	Message	Date
Philipp Hagemeister	ed9cc2f1e0	release 2014.01.30	2014-01-30 04:52:54 +01:00
Philipp Hagemeister	975fa541c2	[liveleak] Support multiple formats (Fixes #2262 )	2014-01-30 04:52:50 +01:00
Jaime Marquínez Ferrándiz	251974e44c	Merge pull request #2272 from dstftw/master Improve some regexes	2014-01-29 14:58:14 -08:00
dst	57b6288358	[comedycentral] Improve regexes	2014-01-30 04:33:00 +07:00
dst	c3f51436bf	Improve some regexes for embedded players	2014-01-30 04:26:46 +07:00
Jaime Marquínez Ferrándiz	0c708f11cb	[bloomberg] Fix ooyala url extraction Added a helper method to InfoExtractor for searching the ‘twitter:player’ meta property. Now the OoyalaIE also recognizes the ‘ec’ parameter in the url as the embed code.	2014-01-29 18:03:32 +01:00
Jaime Marquínez Ferrándiz	fb2a706d11	[myspass] Simplify and use unicode_literals	2014-01-29 16:59:22 +01:00
Jaime Marquínez Ferrándiz	0b76600deb	[youjizz] Simplify and use unicode_literals	2014-01-29 16:59:21 +01:00
Jaime Marquínez Ferrándiz	245b612a36	[rbmaradio] Simplify and use unicode_literals	2014-01-29 16:59:10 +01:00
Jaime Marquínez Ferrándiz	d882161d5a	[infoq] Simplify and use unicode_literals	2014-01-29 15:34:35 +01:00
Jaime Marquínez Ferrándiz	d4a21e0b49	[tutv] Simplify and use unicode_literals	2014-01-29 15:22:41 +01:00
Jaime Marquínez Ferrándiz	26a78d4bbf	[nba] Simplify and use unicode_literals Remove the commented parts for extracting the upload date	2014-01-29 15:16:18 +01:00
Philipp Hagemeister	8db69786c2	release 2014.01.29	2014-01-29 11:16:28 +01:00
Philipp Hagemeister	b11cec4162	[youtube:user] Fix id key (Fixes #1745 )	2014-01-29 11:16:12 +01:00
Philipp Hagemeister	7eeb5bef24	[liveleak] Simplify	2014-01-28 21:57:38 +01:00
Philipp Hagemeister	9d2032932c	Merge remote-tracking branch 'dstftw/ivi'	2014-01-28 21:47:05 +01:00
Philipp Hagemeister	6490306017	Merge remote-tracking branch 'dstftw/channel9'	2014-01-28 21:46:42 +01:00
dst	ceb2b7d257	[ivi] Fix test and use unicode literals	2014-01-29 02:20:48 +07:00
dst	459a53c2c2	[channel9] Remove unnecessary coding cookie	2014-01-29 02:07:29 +07:00
dst	adc267eebf	[channel9] Use unicode literals	2014-01-29 02:00:56 +07:00
dst	ffe8f62d27	[smotri] Simplify login and use unicode literals	2014-01-29 01:52:57 +07:00
Jaime Marquínez Ferrándiz	ed85007039	[ninegag] Use unicode_literals	2014-01-28 18:55:06 +01:00
Jaime Marquínez Ferrándiz	5aaca50d60	[keek] Simplify and use unicode_literals	2014-01-28 18:47:31 +01:00
Jaime Marquínez Ferrándiz	869baf3565	[funnyordie] Simplify and use unicode_literals	2014-01-28 18:41:39 +01:00
Philipp Hagemeister	e299f6d27f	[pornhd] Fix	2014-01-28 03:53:00 +01:00
Philipp Hagemeister	4a192f817e	release 2014.01.28.1	2014-01-28 03:44:19 +01:00
Philipp Hagemeister	bc1d1a5a71	release 2014.01.28	2014-01-28 03:37:42 +01:00
Philipp Hagemeister	456895d9cf	[tumblr] Test new URL format (#2255 )	2014-01-28 03:37:38 +01:00
Philipp Hagemeister	218c15ab59	Merge remote-tracking branch 'mike/tumblr-url'	2014-01-28 03:35:52 +01:00
Philipp Hagemeister	17ab4d3b5e	[brightcove] Move test to generic	2014-01-28 03:35:32 +01:00
Philipp Hagemeister	31ef0ff038	Merge remote-tracking branch 'dstftw/rutube-channel'	2014-01-28 03:32:22 +01:00
Philipp Hagemeister	37e3b90d59	[rutube] Simplify	2014-01-28 03:32:07 +01:00
dst	00ff8f92a5	[rutube] Update test	2014-01-28 09:31:14 +07:00
Philipp Hagemeister	4857beba3a	Merge remote-tracking branch 'dstftw/rutube-channel'	2014-01-28 03:30:21 +01:00
Philipp Hagemeister	c1e60cc2bf	Merge remote-tracking branch 'dstftw/master'	2014-01-28 03:29:10 +01:00
dst	98669ed79c	[imdb] Fix playlist test	2014-01-28 09:13:08 +07:00
dst	a3978a6159	[imdb] Fix duplicated entries bug	2014-01-28 09:12:23 +07:00
dst	e3a9f32f52	[rutube] Add support for user videos	2014-01-28 08:47:17 +07:00
dst	87fac3238d	[rutube] Add channel test	2014-01-28 08:25:56 +07:00
dst	a2fb2a2134	[rutube] Improve video extractor	2014-01-28 08:19:45 +07:00
MikeCol	9e8ee54553	VALID_URL changed to match different kinds of Tumblr-URLs	2014-01-28 01:41:18 +01:00
Philipp Hagemeister	117bec936c	[brightcove] Parse URL from meta element if available (Fixes #2253 )	2014-01-28 01:01:23 +01:00
dst	1547c8cc88	[rutube] Add support for channels and movies	2014-01-28 06:56:09 +07:00
Philipp Hagemeister	075911d48e	[la7] Skip test on travis	2014-01-27 23:47:22 +01:00
Philipp Hagemeister	b21a918984	release 2014.01.27.2	2014-01-27 19:22:45 +01:00
Philipp Hagemeister	f9b8549609	[ard] Support multiple formats (Closes #2247 )	2014-01-27 18:40:10 +01:00
Jaime Marquínez Ferrándiz	e2ba07024f	Merge remote-tracking branch 'origin/master'	2014-01-27 12:45:59 +01:00
Jaime Marquínez Ferrándiz	9b05bd42e5	[discovery] Extract more info and simplify	2014-01-27 12:41:30 +01:00
Philipp Hagemeister	b6d3a99678	[cliphunter] Simplify (#2233 )	2014-01-27 12:39:39 +01:00
Jaime Marquínez Ferrándiz	96d7b8873a	Merge remote-tracking branch 'sahutd/master'	2014-01-27 12:21:00 +01:00
Philipp Hagemeister	efc867775e	[cliphunter] Simplify	2014-01-27 07:55:30 +01:00
Philipp Hagemeister	5ab772f09c	Merge branch 'cliphunter' of https://github.com/pornophage/youtube-dl	2014-01-27 07:48:51 +01:00
Philipp Hagemeister	2a89386232	Credit @MikeCol for malemotion IE	2014-01-27 07:43:41 +01:00
MikeCol	4d9be98dbc	Malemotion extractor	2014-01-27 07:43:02 +01:00
Mike Col	6737907826	[tumblr] Fix thumbnail extraction Signed-off-by: Philipp Hagemeister <phihag@phihag.de>	2014-01-27 07:38:55 +01:00
Philipp Hagemeister	c060b77446	[tumblr] Use unicode_literals	2014-01-27 07:36:18 +01:00
Philipp Hagemeister	7e8caf30c0	Throw an error if no video formats are found	2014-01-27 07:31:54 +01:00
Philipp Hagemeister	ca3e054750	release 2014.01.27.1	2014-01-27 07:09:55 +01:00
Philipp Hagemeister	1da1558f46	[la7] Support more URLs	2014-01-27 07:08:01 +01:00
sahutd	53bfd6b24c	Added support for Discovery Issue #2227	2014-01-26 14:05:34 +05:30
Pornophage	bacb5e4f44	Minor fixes Remove empty description Set correct md5 test	2014-01-25 02:34:08 +01:00
Pornophage	008af8660b	Add cliphunter extractor	2014-01-25 01:46:52 +01:00