release 2013.07.25

Merge remote-tracking branch 'pishposhmcgee/patch-2'
[ina] Fix URL detection (Fixes #1121 )
2025-08-02 02:20:59 -05:00 · 2013-07-25 09:35:25 +02:00 · 2013-07-25 09:34:56 +02:00 · 2013-07-25 09:34:12 +02:00 · 2013-07-24 21:51:08 -05:00 · 2013-07-24 22:27:33 +02:00
24 changed files with 528 additions and 263 deletions
--- a/.travis.yml
+++ b/.travis.yml
@@ -9,6 +9,7 @@ notifications:
    - filippo.valsorda@gmail.com
    - phihag@phihag.de
    - jaime.marquinez.ferrandiz+travis@gmail.com
+    - yasoob.khld@gmail.com
 #  irc:
 #    channels:
 #      - "irc.freenode.org#youtube-dl"
--- a/README.md
+++ b/README.md
@@ -16,7 +16,9 @@ which means you can modify it, redistribute it or use it however you like.
 # OPTIONS
    -h, --help                 print this help text and exit
    --version                  print program version and exit
-    -U, --update               update this program to latest version
+    -U, --update               update this program to latest version. Make sure
+                               that you have sufficient permissions (run with
+                               sudo if needed)
    -i, --ignore-errors        continue on download errors
    --dump-user-agent          display the current browser identification
    --user-agent UA            specify a custom user agent
--- a/devscripts/youtube_genalgo.py
+++ b/devscripts/youtube_genalgo.py
@@ -14,15 +14,15 @@ tests = [
    # 88
    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<",
     "J:|}][{=+-_)(*&;%$#@>MNBVCXZASDFGH^KLPOIUYTREWQ0987654321mnbvcxzasdfghrklpoiuytej"),
-    # 87
+    # 87 - vflART1Nf 2013/07/24
    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<",
-     "!?;:|}][{=+-_)(*&^$#@/MNBVCXZASqFGHJKLPOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytr"),
+     "tyuioplkjhgfdsazxcv<nm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>"),
    # 86 - vfl_ymO4Z 2013/06/27
    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<",
     "ertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!/#$%^&*()_-+={[|};?@"),
-    # 85
+    # 85 - vflSAFCP9 2013/07/19
    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<",
-     "{>/?;}[.=+-_)(*&^%$#@!MqBVCXZASDFwHJKLPOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytr"),
+     "ertyuiqplkjhgfdsazx$vbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#<%^&*()_-+={[};?/c"),
    # 84
    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<",
     "<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWe098765432rmnbvcxzasdfghjklpoiuyt1"),
--- a/test/test_youtube_sig.py
+++ b/test/test_youtube_sig.py
@@ -30,7 +30,7 @@ class TestYoutubeSig(unittest.TestCase):

    def test_87(self):
        wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<"
-        right = "!?;:|}][{=+-_)(*&^$#@/MNBVCXZASqFGHJKLPOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytr"
+        right = "tyuioplkjhgfdsazxcv<nm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>"
        self.assertEqual(sig(wrong), right)

    def test_86(self):
@@ -40,7 +40,7 @@ class TestYoutubeSig(unittest.TestCase):

    def test_85(self):
        wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<"
-        right = "{>/?;}[.=+-_)(*&^%$#@!MqBVCXZASDFwHJKLPOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytr"
+        right = "ertyuiqplkjhgfdsazx$vbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#<%^&*()_-+={[};?/c"
        self.assertEqual(sig(wrong), right)

    def test_84(self):
--- a/youtube_dl/init.py
+++ b/youtube_dl/init.py
@@ -129,7 +129,7 @@ def parseOpts(overrideArguments=None):
    general.add_option('-v', '--version',
            action='version', help='print program version and exit')
    general.add_option('-U', '--update',
-            action='store_true', dest='update_self', help='update this program to latest version')
+            action='store_true', dest='update_self', help='update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)')
    general.add_option('-i', '--ignore-errors',
            action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
    general.add_option('--dump-user-agent',
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@@ -19,6 +19,7 @@ from .dreisat import DreiSatIE
 from .ehow import EHowIE
 from .eighttracks import EightTracksIE
 from .escapist import EscapistIE
+from .exfm import ExfmIE
 from .facebook import FacebookIE
 from .flickr import FlickrIE
 from .freesound import FreesoundIE
@@ -51,6 +52,7 @@ from .pornotube import PornotubeIE
 from .rbmaradio import RBMARadioIE
 from .redtube import RedTubeIE
 from .ringtv import RingTVIE
+from .sina import SinaIE
 from .soundcloud import SoundcloudIE, SoundcloudSetIE
 from .spiegel import SpiegelIE
 from .stanfordoc import StanfordOpenClassroomIE
@@ -70,7 +72,9 @@ from .veoh import VeohIE
 from .vevo import VevoIE
 from .vimeo import VimeoIE
 from .vine import VineIE
+from .c56 import C56IE
 from .wat import WatIE
+from .weibo import WeiboIE
 from .wimp import WimpIE
 from .worldstarhiphop import WorldStarHipHopIE
 from .xhamster import XHamsterIE
@@ -88,6 +92,9 @@ from .youtube import (
    YoutubeChannelIE,
    YoutubeShowIE,
    YoutubeSubscriptionsIE,
+    YoutubeRecommendedIE,
+    YoutubeWatchLaterIE,
+    YoutubeFavouritesIE,
 )
 from .zdf import ZDFIE

--- a/youtube_dl/extractor/breakcom.py
+++ b/youtube_dl/extractor/breakcom.py
@@ -1,6 +1,8 @@
 import re
+import json

 from .common import InfoExtractor
+from ..utils import determine_ext


 class BreakIE(InfoExtractor):
@@ -17,17 +19,20 @@ class BreakIE(InfoExtractor):
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group(1).split("-")[-1]
-        webpage = self._download_webpage(url, video_id)
-        video_url = re.search(r"videoPath: '(.+?)',",webpage).group(1)
-        key = re.search(r"icon: '(.+?)',",webpage).group(1)
-        final_url = str(video_url)+"?"+str(key)
-        thumbnail_url = re.search(r"thumbnailURL: '(.+?)'",webpage).group(1)
-        title = re.search(r"sVidTitle: '(.+)',",webpage).group(1)
-        ext = video_url.split('.')[-1]
+        embed_url = 'http://www.break.com/embed/%s' % video_id
+        webpage = self._download_webpage(embed_url, video_id)
+        info_json = self._search_regex(r'var embedVars = ({.*?});', webpage,
+                                       u'info json', flags=re.DOTALL)
+        info = json.loads(info_json)
+        video_url = info['videoUri']
+        m_youtube = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', video_url)
+        if m_youtube is not None:
+            return self.url_result(m_youtube.group(1), 'Youtube')
+        final_url = video_url + '?' + info['AuthToken']
        return [{
            'id':        video_id,
            'url':       final_url,
-            'ext':       ext,
-            'title':     title,
-            'thumbnail': thumbnail_url,
+            'ext':       determine_ext(final_url),
+            'title':     info['contentName'],
+            'thumbnail': info['thumbUri'],
        }]
--- a/youtube_dl/extractor/c56.py
+++ b/youtube_dl/extractor/c56.py
@@ -0,0 +1,36 @@
+# coding: utf-8
+
+import re
+import json
+
+from .common import InfoExtractor
+from ..utils import determine_ext
+
+class C56IE(InfoExtractor):
+    _VALID_URL = r'https?://((www|player)\.)?56\.com/(.+?/)?(v_|(play_album.+-))(?P<textid>.+?)\.(html|swf)'
+    IE_NAME = u'56.com'
+
+    _TEST ={
+        u'url': u'http://www.56.com/u39/v_OTM0NDA3MTY.html',
+        u'file': u'93440716.mp4',
+        u'md5': u'9dc07b5c8e978112a6441f9e75d2b59e',
+        u'info_dict': {
+            u'title': u'网事知多少 第32期：车怒',
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
+        text_id = mobj.group('textid')
+        info_page = self._download_webpage('http://vxml.56.com/json/%s/' % text_id,
+                                           text_id, u'Downloading video info')
+        info = json.loads(info_page)['info']
+        best_format = sorted(info['rfiles'], key=lambda f: int(f['filesize']))[-1]
+        video_url = best_format['url']
+
+        return {'id': info['vid'],
+                'title': info['Subject'],
+                'url': video_url,
+                'ext': determine_ext(video_url),
+                'thumbnail': info.get('bimg') or info.get('img'),
+                }
--- a/youtube_dl/extractor/collegehumor.py
+++ b/youtube_dl/extractor/collegehumor.py
@@ -1,26 +1,26 @@
 import re
-import socket
 import xml.etree.ElementTree

 from .common import InfoExtractor
 from ..utils import (
-    compat_http_client,
-    compat_str,
-    compat_urllib_error,
    compat_urllib_parse_urlparse,
-    compat_urllib_request,

    ExtractorError,
 )


 class CollegeHumorIE(InfoExtractor):
-    _WORKING = False
-    _VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/video/(?P<videoid>[0-9]+)/(?P<shorttitle>.*)$'
+    _VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/(video|embed)/(?P<videoid>[0-9]+)/(?P<shorttitle>.*)$'

-    def report_manifest(self, video_id):
-        """Report information extraction."""
-        self.to_screen(u'%s: Downloading XML manifest' % video_id)
+    _TEST = {
+        u'url': u'http://www.collegehumor.com/video/6902724/comic-con-cosplay-catastrophe',
+        u'file': u'6902724.mp4',
+        u'md5': u'1264c12ad95dca142a9f0bf7968105a0',
+        u'info_dict': {
+            u'title': u'Comic-Con Cosplay Catastrophe',
+            u'description': u'Fans get creative this year at San Diego.  Too creative.  And yes, that\'s really Joss Whedon.',
+        },
+    }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
@@ -36,14 +36,16 @@ class CollegeHumorIE(InfoExtractor):

        self.report_extraction(video_id)
        xmlUrl = 'http://www.collegehumor.com/moogaloop/video/' + video_id
-        try:
-            metaXml = compat_urllib_request.urlopen(xmlUrl).read()
-        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-            raise ExtractorError(u'Unable to download video info XML: %s' % compat_str(err))
+        metaXml = self._download_webpage(xmlUrl, video_id,
+                                         u'Downloading info XML',
+                                         u'Unable to download video info XML')

        mdoc = xml.etree.ElementTree.fromstring(metaXml)
        try:
            videoNode = mdoc.findall('./video')[0]
+            youtubeIdNode = videoNode.find('./youtubeID')
+            if youtubeIdNode is not None:
+                return self.url_result(youtubeIdNode.text, 'Youtube')
            info['description'] = videoNode.findall('./description')[0].text
            info['title'] = videoNode.findall('./caption')[0].text
            info['thumbnail'] = videoNode.findall('./thumbnail')[0].text
@@ -52,11 +54,9 @@ class CollegeHumorIE(InfoExtractor):
            raise ExtractorError(u'Invalid metadata XML file')

        manifest_url += '?hdcore=2.10.3'
-        self.report_manifest(video_id)
-        try:
-            manifestXml = compat_urllib_request.urlopen(manifest_url).read()
-        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-            raise ExtractorError(u'Unable to download video info XML: %s' % compat_str(err))
+        manifestXml = self._download_webpage(manifest_url, video_id,
+                                             u'Downloading XML manifest',
+                                             u'Unable to download video info XML')

        adoc = xml.etree.ElementTree.fromstring(manifestXml)
        try:
@@ -66,9 +66,8 @@ class CollegeHumorIE(InfoExtractor):
        except IndexError as err:
            raise ExtractorError(u'Invalid manifest file')

-        url_pr = compat_urllib_parse_urlparse(manifest_url)
-        url = url_pr.scheme + '://' + url_pr.netloc + '/z' + video_id[:-2] + '/' + node_id + 'Seg1-Frag1'
+        url_pr = compat_urllib_parse_urlparse(info['thumbnail'])

-        info['url'] = url
-        info['ext'] = 'f4f'
+        info['url'] = url_pr.scheme + '://' + url_pr.netloc + video_id[:-2].replace('.csmil','').replace(',','')
+        info['ext'] = 'mp4'
        return [info]
--- a/youtube_dl/extractor/comedycentral.py
+++ b/youtube_dl/extractor/comedycentral.py
@@ -24,7 +24,9 @@ class ComedyCentralIE(InfoExtractor):
                         (full-episodes/(?P<episode>.*)|
                          (?P<clip>
                              (the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?))
-                              |(watch/(?P<date>[^/]*)/(?P<tdstitle>.*)))))
+                              |(watch/(?P<date>[^/]*)/(?P<tdstitle>.*)))|
+                          (?P<interview>
+                              extended-interviews/(?P<interID>[0-9]+)/playlist_tds_extended_(?P<interview_title>.*?)/.*?)))
                     $"""
    _TEST = {
        u'url': u'http://www.thedailyshow.com/watch/thu-december-13-2012/kristen-stewart',
@@ -87,6 +89,9 @@ class ComedyCentralIE(InfoExtractor):
            else:
                epTitle = mobj.group('cntitle')
            dlNewest = False
+        elif mobj.group('interview'):
+            epTitle = mobj.group('interview_title')
+            dlNewest = False
        else:
            dlNewest = not mobj.group('episode')
            if dlNewest:
--- a/youtube_dl/extractor/exfm.py
+++ b/youtube_dl/extractor/exfm.py
@@ -0,0 +1,54 @@
+import re
+import json
+
+from .common import InfoExtractor
+
+
+class ExfmIE(InfoExtractor):
+    IE_NAME = u'exfm'
+    IE_DESC = u'ex.fm'
+    _VALID_URL = r'(?:http://)?(?:www\.)?ex\.fm/song/([^/]+)'
+    _SOUNDCLOUD_URL = r'(?:http://)?(?:www\.)?api\.soundcloud.com/tracks/([^/]+)/stream'
+    _TESTS = [
+        {
+            u'url': u'http://ex.fm/song/1bgtzg',
+            u'file': u'95223130.mp3',
+            u'md5': u'8a7967a3fef10e59a1d6f86240fd41cf',
+            u'info_dict': {
+                u"title": u"We Can't Stop - Miley Cyrus",
+                u"uploader": u"Miley Cyrus",
+                u'upload_date': u'20130603',
+                u'description': u'Download "We Can\'t Stop" \r\niTunes: http://smarturl.it/WeCantStop?IQid=SC\r\nAmazon: http://smarturl.it/WeCantStopAMZ?IQid=SC',
+            },
+            u'note': u'Soundcloud song',
+        },
+        {
+            u'url': u'http://ex.fm/song/wddt8',
+            u'file': u'wddt8.mp3',
+            u'md5': u'966bd70741ac5b8570d8e45bfaed3643',
+            u'info_dict': {
+                u'title': u'Safe and Sound',
+                u'uploader': u'Capital Cities',
+            },
+        },
+    ]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        song_id = mobj.group(1)
+        info_url = "http://ex.fm/api/v3/song/%s" %(song_id)
+        webpage = self._download_webpage(info_url, song_id)
+        info = json.loads(webpage)
+        song_url = info['song']['url']
+        if re.match(self._SOUNDCLOUD_URL, song_url) is not None:
+            self.to_screen('Soundcloud song detected')
+            return self.url_result(song_url.replace('/stream',''), 'Soundcloud')
+        return [{
+            'id':          song_id,
+            'url':         song_url,
+            'ext':         'mp3',
+            'title':       info['song']['title'],
+            'thumbnail':   info['song']['image']['large'],
+            'uploader':    info['song']['artist'],
+            'view_count':  info['song']['loved_count'],
+        }]
--- a/youtube_dl/extractor/ina.py
+++ b/youtube_dl/extractor/ina.py
@@ -5,7 +5,7 @@ from .common import InfoExtractor

 class InaIE(InfoExtractor):
    """Information Extractor for Ina.fr"""
-    _VALID_URL = r'(?:http://)?(?:www\.)?ina\.fr/video/(?P<id>I[0-9]+)/.*'
+    _VALID_URL = r'(?:http://)?(?:www\.)?ina\.fr/video/(?P<id>[A-F0-9]+)/.*'
    _TEST = {
        u'url': u'www.ina.fr/video/I12055569/francois-hollande-je-crois-que-c-est-clair-video.html',
        u'file': u'I12055569.mp4',
--- a/youtube_dl/extractor/instagram.py
+++ b/youtube_dl/extractor/instagram.py
@@ -10,7 +10,8 @@ class InstagramIE(InfoExtractor):
        u'md5': u'0d2da106a9d2631273e192b372806516',
        u'info_dict': {
            u"uploader_id": u"naomipq", 
-            u"title": u"Video by naomipq"
+            u"title": u"Video by naomipq",
+            u'description': u'md5:1f17f0ab29bd6fe2bfad705f58de3cb8',
        }
    }

@@ -18,20 +19,17 @@ class InstagramIE(InfoExtractor):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group(1)
        webpage = self._download_webpage(url, video_id)
-        html_title = self._html_search_regex(
-            r'<title>(.+?)</title>',
-            webpage, u'title', flags=re.DOTALL)
-        title = re.sub(u'(?: *\(Videos?\))? \u2022 Instagram$', '', html_title).strip()
-        uploader_id = self._html_search_regex(
-            r'<div class="media-user" id="media_user">.*?<h2><a href="[^"]*">([^<]*)</a></h2>',
-            webpage, u'uploader id', fatal=False, flags=re.DOTALL)
-        ext = 'mp4'
+        uploader_id = self._search_regex(r'"owner":{"username":"(.+?)"',
+            webpage, u'uploader id', fatal=False)
+        desc = self._search_regex(r'"caption":"(.*?)"', webpage, u'description',
+            fatal=False)

        return [{
            'id':        video_id,
            'url':       self._og_search_video_url(webpage),
-            'ext':       ext,
-            'title':     title,
+            'ext':       'mp4',
+            'title':     u'Video by %s' % uploader_id,
            'thumbnail': self._og_search_thumbnail(webpage),
-            'uploader_id' : uploader_id
+            'uploader_id' : uploader_id,
+            'description': desc,
        }]
--- a/youtube_dl/extractor/sina.py
+++ b/youtube_dl/extractor/sina.py
@@ -0,0 +1,67 @@
+# coding: utf-8
+
+import re
+import xml.etree.ElementTree
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_urllib_request,
+    compat_urllib_parse,
+)
+
+
+class SinaIE(InfoExtractor):
+    _VALID_URL = r'''https?://(.*?\.)?video\.sina\.com\.cn/
+                        (
+                            (.+?/(((?P<pseudo_id>\d+).html)|(.*?(\#|(vid=))(?P<id>\d+?)($|&))))
+                            |
+                            # This is used by external sites like Weibo
+                            (api/sinawebApi/outplay.php/(?P<token>.+?)\.swf)
+                        )
+                  '''
+
+    _TEST = {
+        u'url': u'http://video.sina.com.cn/news/vlist/zt/chczlj2013/?opsubject_id=top12#110028898',
+        u'file': u'110028898.flv',
+        u'md5': u'd65dd22ddcf44e38ce2bf58a10c3e71f',
+        u'info_dict': {
+            u'title': u'《中国新闻》 朝鲜要求巴拿马立即释放被扣船员',
+        }
+    }
+
+    @classmethod
+    def suitable(cls, url):
+        return re.match(cls._VALID_URL, url, flags=re.VERBOSE) is not None
+
+    def _extract_video(self, video_id):
+        data = compat_urllib_parse.urlencode({'vid': video_id})
+        url_page = self._download_webpage('http://v.iask.com/v_play.php?%s' % data,
+            video_id, u'Downloading video url')
+        image_page = self._download_webpage(
+            'http://interface.video.sina.com.cn/interface/common/getVideoImage.php?%s' % data,
+            video_id, u'Downloading thumbnail info')
+        url_doc = xml.etree.ElementTree.fromstring(url_page.encode('utf-8'))
+
+        return {'id': video_id,
+                'url': url_doc.find('./durl/url').text,
+                'ext': 'flv',
+                'title': url_doc.find('./vname').text,
+                'thumbnail': image_page.split('=')[1],
+                }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
+        video_id = mobj.group('id')
+        if mobj.group('token') is not None:
+            # The video id is in the redirected url
+            self.to_screen(u'Getting video id')
+            request = compat_urllib_request.Request(url)
+            request.get_method = lambda: 'HEAD'
+            (_, urlh) = self._download_webpage_handle(request, 'NA', False)
+            return self._real_extract(urlh.geturl())
+        elif video_id is None:
+            pseudo_id = mobj.group('pseudo_id')
+            webpage = self._download_webpage(url, pseudo_id)
+            video_id = self._search_regex(r'vid:\'(\d+?)\'', webpage, u'video id')
+
+        return self._extract_video(video_id)
--- a/youtube_dl/extractor/soundcloud.py
+++ b/youtube_dl/extractor/soundcloud.py
@@ -19,7 +19,11 @@ class SoundcloudIE(InfoExtractor):
       of the stream token and uid
     """

-    _VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/([\w\d-]+)(?:[?].*)?$'
+    _VALID_URL = r'''^(?:https?://)?
+                    (?:(?:(?:www\.)?soundcloud\.com/([\w\d-]+)/([\w\d-]+)/?(?:[?].*)?$)
+                       |(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+))
+                    )
+                    '''
    IE_NAME = u'soundcloud'
    _TEST = {
        u'url': u'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy',
@@ -33,59 +37,65 @@ class SoundcloudIE(InfoExtractor):
        }
    }

+    _CLIENT_ID = 'b45b1aa10f1ac2941910a7f0d10f8e28'
+
+    @classmethod
+    def suitable(cls, url):
+        return re.match(cls._VALID_URL, url, flags=re.VERBOSE) is not None
+
    def report_resolve(self, video_id):
        """Report information extraction."""
        self.to_screen(u'%s: Resolving id' % video_id)

-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        if mobj is None:
-            raise ExtractorError(u'Invalid URL: %s' % url)
+    @classmethod
+    def _resolv_url(cls, url):
+        return 'http://api.soundcloud.com/resolve.json?url=' + url + '&client_id=' + cls._CLIENT_ID

-        # extract uploader (which is in the url)
-        uploader = mobj.group(1)
-        # extract simple title (uploader + slug of song title)
-        slug_title =  mobj.group(2)
-        full_title = '%s/%s' % (uploader, slug_title)
-
-        self.report_resolve(full_title)
-
-        url = 'http://soundcloud.com/%s/%s' % (uploader, slug_title)
-        resolv_url = 'http://api.soundcloud.com/resolve.json?url=' + url + '&client_id=b45b1aa10f1ac2941910a7f0d10f8e28'
-        info_json = self._download_webpage(resolv_url, full_title, u'Downloading info JSON')
-
-        info = json.loads(info_json)
+    def _extract_info_dict(self, info, full_title=None):
        video_id = info['id']
-        self.report_extraction(full_title)
+        name = full_title or video_id
+        self.report_extraction(name)

-        streams_url = 'https://api.sndcdn.com/i1/tracks/' + str(video_id) + '/streams?client_id=b45b1aa10f1ac2941910a7f0d10f8e28'
-        stream_json = self._download_webpage(streams_url, full_title,
-                                             u'Downloading stream definitions',
-                                             u'unable to download stream definitions')
-
-        streams = json.loads(stream_json)
-        mediaURL = streams['http_mp3_128_url']
-        upload_date = unified_strdate(info['created_at'])
-
-        return [{
+        thumbnail = info['artwork_url']
+        if thumbnail is not None:
+            thumbnail = thumbnail.replace('-large', '-t500x500')
+        return {
            'id':       info['id'],
-            'url':      mediaURL,
+            'url':      info['stream_url'] + '?client_id=' + self._CLIENT_ID,
            'uploader': info['user']['username'],
-            'upload_date': upload_date,
+            'upload_date': unified_strdate(info['created_at']),
            'title':    info['title'],
            'ext':      u'mp3',
            'description': info['description'],
-        }]
+            'thumbnail': thumbnail,
+        }

-class SoundcloudSetIE(InfoExtractor):
-    """Information extractor for soundcloud.com sets
-       To access the media, the uid of the song and a stream token
-       must be extracted from the page source and the script must make
-       a request to media.soundcloud.com/crossdomain.xml. Then
-       the media can be grabbed by requesting from an url composed
-       of the stream token and uid
-     """
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
+        if mobj is None:
+            raise ExtractorError(u'Invalid URL: %s' % url)

+        track_id = mobj.group('track_id')
+        if track_id is not None:
+            info_json_url = 'http://api.soundcloud.com/tracks/' + track_id + '.json?client_id=' + self._CLIENT_ID
+            full_title = track_id
+        else:
+            # extract uploader (which is in the url)
+            uploader = mobj.group(1)
+            # extract simple title (uploader + slug of song title)
+            slug_title =  mobj.group(2)
+            full_title = '%s/%s' % (uploader, slug_title)
+    
+            self.report_resolve(full_title)
+    
+            url = 'http://soundcloud.com/%s/%s' % (uploader, slug_title)
+            info_json_url = self._resolv_url(url)
+        info_json = self._download_webpage(info_json_url, full_title, u'Downloading info JSON')
+
+        info = json.loads(info_json)
+        return self._extract_info_dict(info, full_title)
+
+class SoundcloudSetIE(SoundcloudIE):
    _VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)(?:[?].*)?$'
    IE_NAME = u'soundcloud:set'
    _TEST = {
@@ -153,10 +163,6 @@ class SoundcloudSetIE(InfoExtractor):
        ]
    }

-    def report_resolve(self, video_id):
-        """Report information extraction."""
-        self.to_screen(u'%s: Resolving id' % video_id)
-
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        if mobj is None:
@@ -171,7 +177,7 @@ class SoundcloudSetIE(InfoExtractor):
        self.report_resolve(full_title)

        url = 'http://soundcloud.com/%s/sets/%s' % (uploader, slug_title)
-        resolv_url = 'http://api.soundcloud.com/resolve.json?url=' + url + '&client_id=b45b1aa10f1ac2941910a7f0d10f8e28'
+        resolv_url = self._resolv_url(url)
        info_json = self._download_webpage(resolv_url, full_title)

        videos = []
@@ -182,23 +188,8 @@ class SoundcloudSetIE(InfoExtractor):
            return

        self.report_extraction(full_title)
-        for track in info['tracks']:
-            video_id = track['id']
-
-            streams_url = 'https://api.sndcdn.com/i1/tracks/' + str(video_id) + '/streams?client_id=b45b1aa10f1ac2941910a7f0d10f8e28'
-            stream_json = self._download_webpage(streams_url, video_id, u'Downloading track info JSON')
-
-            self.report_extraction(video_id)
-            streams = json.loads(stream_json)
-            mediaURL = streams['http_mp3_128_url']
-
-            videos.append({
-                'id':       video_id,
-                'url':      mediaURL,
-                'uploader': track['user']['username'],
-                'upload_date':  unified_strdate(track['created_at']),
-                'title':    track['title'],
-                'ext':      u'mp3',
-                'description': track['description'],
-            })
-        return videos
+        return {'_type': 'playlist',
+                'entries': [self._extract_info_dict(track) for track in info['tracks']],
+                'id': info['id'],
+                'title': info['title'],
+                }
--- a/youtube_dl/extractor/teamcoco.py
+++ b/youtube_dl/extractor/teamcoco.py
@@ -33,7 +33,7 @@ class TeamcocoIE(InfoExtractor):
        data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
        data = self._download_webpage(data_url, video_id, 'Downloading data webpage')

-        video_url = self._html_search_regex(r'<file type="high".*?>(.*?)</file>',
+        video_url = self._html_search_regex(r'<file [^>]*type="high".*?>(.*?)</file>',
            data, u'video URL')

        return [{
--- a/youtube_dl/extractor/ted.py
+++ b/youtube_dl/extractor/ted.py
@@ -67,7 +67,7 @@ class TEDIE(InfoExtractor):
        webpage = self._download_webpage(url, video_id, 'Downloading \"%s\" page' % video_name)
        self.report_extraction(video_name)
        # If the url includes the language we get the title translated
-        title = self._html_search_regex(r'<span id="altHeadline" >(?P<title>.*)</span>',
+        title = self._html_search_regex(r'<span .*?id="altHeadline".+?>(?P<title>.*)</span>',
                                        webpage, 'title')
        json_data = self._search_regex(r'<script.*?>var talkDetails = ({.*?})</script>',
                                    webpage, 'json data')
--- a/youtube_dl/extractor/tf1.py
+++ b/youtube_dl/extractor/tf1.py
@@ -10,6 +10,7 @@ class TF1IE(InfoExtractor):
    TF1 uses the wat.tv player, currently it can only download videos with the
    html5 player enabled, it cannot download HD videos.
    """
+    _WORKING = False
    _VALID_URL = r'http://videos.tf1.fr/.*-(.*?).html'
    _TEST = {
        u'url': u'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html',
--- a/youtube_dl/extractor/traileraddict.py
+++ b/youtube_dl/extractor/traileraddict.py
@@ -4,11 +4,11 @@ from .common import InfoExtractor


 class TrailerAddictIE(InfoExtractor):
-    _VALID_URL = r'(?:http://)?(?:www\.)?traileraddict\.com/trailer/([^/]+)/(?:trailer|feature-trailer)'
+    _VALID_URL = r'(?:http://)?(?:www\.)?traileraddict\.com/(?:trailer|clip)/(?P<movie>.+?)/(?P<trailer_name>.+)'
    _TEST = {
        u'url': u'http://www.traileraddict.com/trailer/prince-avalanche/trailer',
        u'file': u'76184.mp4',
-        u'md5': u'41365557f3c8c397d091da510e73ceb4',
+        u'md5': u'57e39dbcf4142ceb8e1f242ff423fd71',
        u'info_dict': {
            u"title": u"Prince Avalanche Trailer",
            u"description": u"Trailer for Prince Avalanche.Two highway road workers spend the summer of 1988 away from their city lives. The isolated landscape becomes a place of misadventure as the men find themselves at odds with each other and the women they left behind."
@@ -17,24 +17,30 @@ class TrailerAddictIE(InfoExtractor):

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group(1)
-        webpage = self._download_webpage(url, video_id)
-        
+        name = mobj.group('movie') + '/' + mobj.group('trailer_name')
+        webpage = self._download_webpage(url, name)
+
        title = self._search_regex(r'<title>(.+?)</title>',
                webpage, 'video title').replace(' - Trailer Addict','')
        view_count = self._search_regex(r'Views: (.+?)<br />',
                webpage, 'Views Count')
        video_id = self._og_search_property('video', webpage, 'Video id').split('=')[1]

-        info_url = "http://www.traileraddict.com/fvar.php?tid=%s" %(str(video_id))
+        # Presence of (no)watchplus function indicates HD quality is available
+        if re.search(r'function (no)?watchplus()', webpage):
+            fvar = "fvarhd"
+        else:
+            fvar = "fvar"
+
+        info_url = "http://www.traileraddict.com/%s.php?tid=%s" % (fvar, str(video_id))
        info_webpage = self._download_webpage(info_url, video_id , "Downloading the info webpage")
-        
+
        final_url = self._search_regex(r'&fileurl=(.+)',
                info_webpage, 'Download url').replace('%3F','?')
        thumbnail_url = self._search_regex(r'&image=(.+?)&',
                info_webpage, 'thumbnail url')
        ext = final_url.split('.')[-1].split('?')[0]
-        
+
        return [{
            'id'          : video_id,
            'url'         : final_url,
--- a/youtube_dl/extractor/wat.py
+++ b/youtube_dl/extractor/wat.py
@@ -12,6 +12,7 @@ from ..utils import (


 class WatIE(InfoExtractor):
+    _WORKING = False
    _VALID_URL=r'http://www.wat.tv/.*-(?P<shortID>.*?)_.*?.html'
    IE_NAME = 'wat.tv'
    _TEST = {
--- a/youtube_dl/extractor/weibo.py
+++ b/youtube_dl/extractor/weibo.py
@@ -0,0 +1,48 @@
+# coding: utf-8
+
+import re
+import json
+
+from .common import InfoExtractor
+
+class WeiboIE(InfoExtractor):
+    """
+    The videos in Weibo come from different sites, this IE just finds the link
+    to the external video and returns it.
+    """
+    _VALID_URL = r'https?://video\.weibo\.com/v/weishipin/t_(?P<id>.+?)\.htm'
+
+    _TEST = {
+        u'url': u'http://video.weibo.com/v/weishipin/t_zjUw2kZ.htm',
+        u'file': u'98322879.flv',
+        u'info_dict': {
+            u'title': u'魔声耳机最新广告“All Eyes On Us”',
+        },
+        u'note': u'Sina video',
+        u'params': {
+            u'skip_download': True,
+        },
+    }
+
+    # Additional example videos from different sites
+    # Youku: http://video.weibo.com/v/weishipin/t_zQGDWQ8.htm
+    # 56.com: http://video.weibo.com/v/weishipin/t_zQ44HxN.htm
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
+        video_id = mobj.group('id')
+        info_url = 'http://video.weibo.com/?s=v&a=play_list&format=json&mix_video_id=t_%s' % video_id
+        info_page = self._download_webpage(info_url, video_id)
+        info = json.loads(info_page)
+
+        videos_urls = map(lambda v: v['play_page_url'], info['result']['data'])
+        #Prefer sina video since they have thumbnails
+        videos_urls = sorted(videos_urls, key=lambda u: u'video.sina.com' in u)
+        player_url = videos_urls[-1]
+        m_sina = re.match(r'https?://video.sina.com.cn/v/b/(\d+)-\d+.html', player_url)
+        if m_sina is not None:
+            self.to_screen('Sina video detected')
+            sina_id = m_sina.group(1)
+            player_url = 'http://you.video.sina.com.cn/swf/quotePlayer.swf?vid=%s' % sina_id
+        return self.url_result(player_url)
+
--- a/youtube_dl/extractor/youku.py
+++ b/youtube_dl/extractor/youku.py
@@ -13,7 +13,7 @@ from ..utils import (


 class YoukuIE(InfoExtractor):
-    _VALID_URL =  r'(?:http://)?v\.youku\.com/v_show/id_(?P<ID>[A-Za-z0-9]+)\.html'
+    _VALID_URL =  r'(?:http://)?(v|player)\.youku\.com/(v_show/id_|player\.php/sid/)(?P<ID>[A-Za-z0-9]+)(\.html|/v.swf)'
    _TEST =   {
        u"url": u"http://v.youku.com/v_show/id_XNDgyMDQ2NTQw.html",
        u"file": u"XNDgyMDQ2NTQw_part00.flv",
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -23,8 +23,114 @@ from ..utils import (
    orderedSet,
 )

+class YoutubeBaseInfoExtractor(InfoExtractor):
+    """Provide base functions for Youtube extractors"""
+    _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
+    _LANG_URL = r'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
+    _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
+    _NETRC_MACHINE = 'youtube'
+    # If True it will raise an error if no login info is provided
+    _LOGIN_REQUIRED = False

-class YoutubeIE(InfoExtractor):
+    def report_lang(self):
+        """Report attempt to set language."""
+        self.to_screen(u'Setting language')
+
+    def _set_language(self):
+        request = compat_urllib_request.Request(self._LANG_URL)
+        try:
+            self.report_lang()
+            compat_urllib_request.urlopen(request).read()
+        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
+            self._downloader.report_warning(u'unable to set language: %s' % compat_str(err))
+            return False
+        return True
+
+    def _login(self):
+        (username, password) = self._get_login_info()
+        # No authentication to be performed
+        if username is None:
+            if self._LOGIN_REQUIRED:
+                raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
+            return False
+
+        request = compat_urllib_request.Request(self._LOGIN_URL)
+        try:
+            login_page = compat_urllib_request.urlopen(request).read().decode('utf-8')
+        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
+            self._downloader.report_warning(u'unable to fetch login page: %s' % compat_str(err))
+            return False
+
+        galx = None
+        dsh = None
+        match = re.search(re.compile(r'<input.+?name="GALX".+?value="(.+?)"', re.DOTALL), login_page)
+        if match:
+          galx = match.group(1)
+        match = re.search(re.compile(r'<input.+?name="dsh".+?value="(.+?)"', re.DOTALL), login_page)
+        if match:
+          dsh = match.group(1)
+
+        # Log in
+        login_form_strs = {
+                u'continue': u'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
+                u'Email': username,
+                u'GALX': galx,
+                u'Passwd': password,
+                u'PersistentCookie': u'yes',
+                u'_utf8': u'霱',
+                u'bgresponse': u'js_disabled',
+                u'checkConnection': u'',
+                u'checkedDomains': u'youtube',
+                u'dnConn': u'',
+                u'dsh': dsh,
+                u'pstMsg': u'0',
+                u'rmShown': u'1',
+                u'secTok': u'',
+                u'signIn': u'Sign in',
+                u'timeStmp': u'',
+                u'service': u'youtube',
+                u'uilel': u'3',
+                u'hl': u'en_US',
+        }
+        # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
+        # chokes on unicode
+        login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
+        login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
+        request = compat_urllib_request.Request(self._LOGIN_URL, login_data)
+        try:
+            self.report_login()
+            login_results = compat_urllib_request.urlopen(request).read().decode('utf-8')
+            if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
+                self._downloader.report_warning(u'unable to log in: bad username or password')
+                return False
+        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
+            self._downloader.report_warning(u'unable to log in: %s' % compat_str(err))
+            return False
+        return True
+
+    def _confirm_age(self):
+        age_form = {
+                'next_url':     '/',
+                'action_confirm':   'Confirm',
+                }
+        request = compat_urllib_request.Request(self._AGE_URL, compat_urllib_parse.urlencode(age_form))
+        try:
+            self.report_age_confirmation()
+            compat_urllib_request.urlopen(request).read().decode('utf-8')
+        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
+            raise ExtractorError(u'Unable to confirm age: %s' % compat_str(err))
+        return True
+
+    def _real_initialize(self):
+        if self._downloader is None:
+            return
+        if not self._set_language():
+            return
+        if not self._login():
+            return
+        self._confirm_age()
+
+class YoutubeIE(YoutubeBaseInfoExtractor):
    IE_DESC = u'YouTube.com'
    _VALID_URL = r"""^
                     (
@@ -45,11 +151,7 @@ class YoutubeIE(InfoExtractor):
                     ([0-9A-Za-z_-]+)                                         # here is it! the YouTube video ID
                     (?(1).+)?                                                # if we found the ID, everything can follow
                     $"""
-    _LANG_URL = r'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
-    _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
-    _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
    _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
-    _NETRC_MACHINE = 'youtube'
    # Listed in order of quality
    _available_formats = ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '17', '13']
    _available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '17', '13']
@@ -139,10 +241,6 @@ class YoutubeIE(InfoExtractor):
        if YoutubePlaylistIE.suitable(url) or YoutubeSubscriptionsIE.suitable(url): return False
        return re.match(cls._VALID_URL, url, re.VERBOSE) is not None

-    def report_lang(self):
-        """Report attempt to set language."""
-        self.to_screen(u'Setting language')
-
    def report_video_webpage_download(self, video_id):
        """Report attempt to download video webpage."""
        self.to_screen(u'%s: Downloading video webpage' % video_id)
@@ -186,11 +284,11 @@ class YoutubeIE(InfoExtractor):
        elif len(s) == 88:
            return s[48] + s[81:67:-1] + s[82] + s[66:62:-1] + s[85] + s[61:48:-1] + s[67] + s[47:12:-1] + s[3] + s[11:3:-1] + s[2] + s[12]
        elif len(s) == 87:
-            return s[62] + s[82:62:-1] + s[83] + s[61:52:-1] + s[0] + s[51:2:-1]
+            return s[4:23] + s[86] + s[24:85]
        elif len(s) == 86:
            return s[2:63] + s[82] + s[64:82] + s[63]
        elif len(s) == 85:
-            return s[76] + s[82:76:-1] + s[83] + s[75:60:-1] + s[0] + s[59:50:-1] + s[1] + s[49:2:-1]
+            return s[2:8] + s[0] + s[9:21] + s[65] + s[22:65] + s[84] + s[66:82] + s[21]
        elif len(s) == 84:
            return s[83:36:-1] + s[2] + s[35:26:-1] + s[3] + s[25:3:-1] + s[26]
        elif len(s) == 83:
@@ -306,91 +404,6 @@ class YoutubeIE(InfoExtractor):
        for x in formats:
            print('%s\t:\t%s\t[%s]' %(x, self._video_extensions.get(x, 'flv'), self._video_dimensions.get(x, '???')))

-    def _real_initialize(self):
-        if self._downloader is None:
-            return
-
-        # Set language
-        request = compat_urllib_request.Request(self._LANG_URL)
-        try:
-            self.report_lang()
-            compat_urllib_request.urlopen(request).read()
-        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-            self._downloader.report_warning(u'unable to set language: %s' % compat_str(err))
-            return
-
-        (username, password) = self._get_login_info()
-
-        # No authentication to be performed
-        if username is None:
-            return
-
-        request = compat_urllib_request.Request(self._LOGIN_URL)
-        try:
-            login_page = compat_urllib_request.urlopen(request).read().decode('utf-8')
-        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-            self._downloader.report_warning(u'unable to fetch login page: %s' % compat_str(err))
-            return
-
-        galx = None
-        dsh = None
-        match = re.search(re.compile(r'<input.+?name="GALX".+?value="(.+?)"', re.DOTALL), login_page)
-        if match:
-          galx = match.group(1)
-
-        match = re.search(re.compile(r'<input.+?name="dsh".+?value="(.+?)"', re.DOTALL), login_page)
-        if match:
-          dsh = match.group(1)
-
-        # Log in
-        login_form_strs = {
-                u'continue': u'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
-                u'Email': username,
-                u'GALX': galx,
-                u'Passwd': password,
-                u'PersistentCookie': u'yes',
-                u'_utf8': u'霱',
-                u'bgresponse': u'js_disabled',
-                u'checkConnection': u'',
-                u'checkedDomains': u'youtube',
-                u'dnConn': u'',
-                u'dsh': dsh,
-                u'pstMsg': u'0',
-                u'rmShown': u'1',
-                u'secTok': u'',
-                u'signIn': u'Sign in',
-                u'timeStmp': u'',
-                u'service': u'youtube',
-                u'uilel': u'3',
-                u'hl': u'en_US',
-        }
-        # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
-        # chokes on unicode
-        login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
-        login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
-        request = compat_urllib_request.Request(self._LOGIN_URL, login_data)
-        try:
-            self.report_login()
-            login_results = compat_urllib_request.urlopen(request).read().decode('utf-8')
-            if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
-                self._downloader.report_warning(u'unable to log in: bad username or password')
-                return
-        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-            self._downloader.report_warning(u'unable to log in: %s' % compat_str(err))
-            return
-
-        # Confirm age
-        age_form = {
-                'next_url':     '/',
-                'action_confirm':   'Confirm',
-                }
-        request = compat_urllib_request.Request(self._AGE_URL, compat_urllib_parse.urlencode(age_form))
-        try:
-            self.report_age_confirmation()
-            compat_urllib_request.urlopen(request).read().decode('utf-8')
-        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-            raise ExtractorError(u'Unable to confirm age: %s' % compat_str(err))
-
    def _extract_id(self, url):
        mobj = re.match(self._VALID_URL, url, re.VERBOSE)
        if mobj is None:
@@ -670,10 +683,10 @@ class YoutubePlaylistIE(InfoExtractor):
                           \? (?:.*?&)*? (?:p|a|list)=
                        |  p/
                        )
-                        ((?:PL|EC|UU)?[0-9A-Za-z-_]{10,})
+                        ((?:PL|EC|UU|FL)?[0-9A-Za-z-_]{10,})
                        .*
                     |
-                        ((?:PL|EC|UU)[0-9A-Za-z-_]{10,})
+                        ((?:PL|EC|UU|FL)[0-9A-Za-z-_]{10,})
                     )"""
    _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/playlists/%s?max-results=%i&start-index=%i&v=2&alt=json&safeSearch=none'
    _MAX_RESULTS = 50
@@ -692,11 +705,14 @@ class YoutubePlaylistIE(InfoExtractor):

        # Download playlist videos from API
        playlist_id = mobj.group(1) or mobj.group(2)
-        page_num = 1
        videos = []

-        while True:
-            url = self._TEMPLATE_URL % (playlist_id, self._MAX_RESULTS, self._MAX_RESULTS * (page_num - 1) + 1)
+        for page_num in itertools.count(1):
+            start_index = self._MAX_RESULTS * (page_num - 1) + 1
+            if start_index >= 1000:
+                self._downloader.report_warning(u'Max number of results reached')
+                break
+            url = self._TEMPLATE_URL % (playlist_id, self._MAX_RESULTS, start_index)
            page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num)

            try:
@@ -716,10 +732,6 @@ class YoutubePlaylistIE(InfoExtractor):
                if 'media$group' in entry and 'media$player' in entry['media$group']:
                    videos.append((index, entry['media$group']['media$player']['url']))

-            if len(response['feed']['entry']) < self._MAX_RESULTS:
-                break
-            page_num += 1
-
        videos = [v[1] for v in sorted(videos)]

        url_results = [self.url_result(vurl, 'Youtube') for vurl in videos]
@@ -731,7 +743,7 @@ class YoutubeChannelIE(InfoExtractor):
    _VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)"
    _TEMPLATE_URL = 'http://www.youtube.com/channel/%s/videos?sort=da&flow=list&view=0&page=%s&gl=US&hl=en'
    _MORE_PAGES_INDICATOR = 'yt-uix-load-more'
-    _MORE_PAGES_URL = 'http://www.youtube.com/channel_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
+    _MORE_PAGES_URL = 'http://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
    IE_NAME = u'youtube:channel'

    def extract_videos_from_page(self, page):
@@ -762,9 +774,7 @@ class YoutubeChannelIE(InfoExtractor):

        # Download any subsequent channel pages using the json-based channel_ajax query
        if self._MORE_PAGES_INDICATOR in page:
-            while True:
-                pagenum = pagenum + 1
-
+            for pagenum in itertools.count(1):
                url = self._MORE_PAGES_URL % (pagenum, channel_id)
                page = self._download_webpage(url, channel_id,
                                              u'Downloading page #%s' % pagenum)
@@ -807,9 +817,8 @@ class YoutubeUserIE(InfoExtractor):
        # all of them.

        video_ids = []
-        pagenum = 0

-        while True:
+        for pagenum in itertools.count(0):
            start_index = pagenum * self._GDATA_PAGE_SIZE + 1

            gdata_url = self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index)
@@ -834,8 +843,6 @@ class YoutubeUserIE(InfoExtractor):
            if len(ids_in_page) < self._GDATA_PAGE_SIZE:
                break

-            pagenum += 1
-
        urls = ['http://www.youtube.com/watch?v=%s' % video_id for video_id in video_ids]
        url_results = [self.url_result(rurl, 'Youtube') for rurl in urls]
        return [self.playlist_result(url_results, playlist_title = username)]
@@ -898,38 +905,75 @@ class YoutubeShowIE(InfoExtractor):
        return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons]


-class YoutubeSubscriptionsIE(YoutubeIE):
-    """It's a subclass of YoutubeIE because we need to login"""
-    IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword(requires authentication)'
-    _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
-    IE_NAME = u'youtube:subscriptions'
-    _FEED_TEMPLATE = 'http://www.youtube.com/feed_ajax?action_load_system_feed=1&feed_name=subscriptions&paging=%s'
+class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
+    """
+    Base class for extractors that fetch info from
+    http://www.youtube.com/feed_ajax
+    Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
+    """
+    _LOGIN_REQUIRED = True
    _PAGING_STEP = 30
+    # use action_load_personal_feed instead of action_load_system_feed
+    _PERSONAL_FEED = False

-    # Overwrite YoutubeIE properties we don't want
-    _TESTS = []
-    @classmethod
-    def suitable(cls, url):
-        return re.match(cls._VALID_URL, url) is not None
+    @property
+    def _FEED_TEMPLATE(self):
+        action = 'action_load_system_feed'
+        if self._PERSONAL_FEED:
+            action = 'action_load_personal_feed'
+        return 'http://www.youtube.com/feed_ajax?%s=1&feed_name=%s&paging=%%s' % (action, self._FEED_NAME)
+
+    @property
+    def IE_NAME(self):
+        return u'youtube:%s' % self._FEED_NAME

    def _real_initialize(self):
-        (username, password) = self._get_login_info()
-        if username is None:
-            raise ExtractorError(u'No login info available, needed for downloading the Youtube subscriptions.', expected=True)
-        super(YoutubeSubscriptionsIE, self)._real_initialize()
+        self._login()

    def _real_extract(self, url):
        feed_entries = []
        # The step argument is available only in 2.7 or higher
        for i in itertools.count(0):
            paging = i*self._PAGING_STEP
-            info = self._download_webpage(self._FEED_TEMPLATE % paging, 'feed',
+            info = self._download_webpage(self._FEED_TEMPLATE % paging,
+                                          u'%s feed' % self._FEED_NAME,
                                          u'Downloading page %s' % i)
            info = json.loads(info)
            feed_html = info['feed_html']
-            m_ids = re.finditer(r'"/watch\?v=(.*?)"', feed_html)
+            m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
            ids = orderedSet(m.group(1) for m in m_ids)
            feed_entries.extend(self.url_result(id, 'Youtube') for id in ids)
            if info['paging'] is None:
                break
-        return self.playlist_result(feed_entries, playlist_title='Youtube Subscriptions')
+        return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
+
+class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
+    IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword(requires authentication)'
+    _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
+    _FEED_NAME = 'subscriptions'
+    _PLAYLIST_TITLE = u'Youtube Subscriptions'
+
+class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
+    IE_DESC = u'YouTube.com recommended videos, "ytrec" keyword (requires authentication)'
+    _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
+    _FEED_NAME = 'recommended'
+    _PLAYLIST_TITLE = u'Youtube Recommended videos'
+
+class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
+    IE_DESC = u'Youtube watch later list, "ytwatchlater" keyword (requires authentication)'
+    _VALID_URL = r'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater'
+    _FEED_NAME = 'watch_later'
+    _PLAYLIST_TITLE = u'Youtube Watch Later'
+    _PAGING_STEP = 100
+    _PERSONAL_FEED = True
+
+class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
+    IE_NAME = u'youtube:favorites'
+    IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'
+    _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:o?rites)?'
+    _LOGIN_REQUIRED = True
+
+    def _real_extract(self, url):
+        webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
+        playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, u'favourites playlist id')
+        return self.url_result(playlist_id, 'YoutubePlaylist')
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@

-__version__ = '2013.07.18'
+__version__ = '2013.07.25'
Author	SHA1	Message	Date
Philipp Hagemeister	d0866f0bb4	release 2013.07.25	2013-07-25 09:35:25 +02:00
Philipp Hagemeister	09eeb75130	Merge remote-tracking branch 'pishposhmcgee/patch-2'	2013-07-25 09:34:56 +02:00
Philipp Hagemeister	0a99956f71	[ina] Fix URL detection (Fixes #1121 )	2013-07-25 09:34:12 +02:00
pishposhmcgee	12ef6aefa8	changed video_url regex Some older videos contain an extra properties such as 'embed' before 'type'.	2013-07-24 21:51:08 -05:00
Jaime Marquínez Ferrándiz	755eb0320e	[youtube] use itertools.count instead of a "while True" loop and a manual counter	2013-07-24 22:27:33 +02:00
Jaime Marquínez Ferrándiz	43ba5456b1	[youtube] add an extractor for the "Watch Later" list	2013-07-24 22:13:39 +02:00
Philipp Hagemeister	156d5ad6da	release 2013.07.24.2	2013-07-24 21:18:41 +02:00
Jaime Marquínez Ferrándiz	c626a3d9fa	Add an extractor for downloading the Youtube favorite videos(closes #127 )	2013-07-24 20:45:19 +02:00
Jaime Marquínez Ferrándiz	b2e8bc1b20	YoutubeIE: Move the code from _real_initialize to a base class This allows to reuse the code in other IEs without having to overwrite some parts.	2013-07-24 20:40:12 +02:00
Jaime Marquínez Ferrándiz	771822ebb8	YoutubePlaylistIE: break only if there's no entry field in the response Otherwise the Favorite videos playlist cannot be downloaded complete. Also break if it reach the maximum value of the start-index.	2013-07-24 20:14:55 +02:00
Jaime Marquínez Ferrándiz	eb6a41ba0f	ExfmIE: extract Soundcloud songs using SoundcloudIE Now SouncloudIE accepts api urls.	2013-07-24 14:39:21 +02:00
Jaime Marquínez Ferrándiz	7d2392691c	[soundcloud]: Some improvements Extract thumbnails. Make SoundcloudSetIE a subclass of SoundcloudIE to reuse some code. Directly extract the file url without downloading an extra page.	2013-07-24 14:15:12 +02:00
Philipp Hagemeister	c216c1894d	release 2013.07.24.1	2013-07-24 13:52:55 +02:00
Jaime Marquínez Ferrándiz	3e1ad508eb	Add Youtube player info for length 87	2013-07-24 12:48:25 +02:00
Jaime Marquínez Ferrándiz	a052c1d785	Merge pull request #1114 from alexvh/traileraddict_hd [traileraddict] Obtain hd quality stream if available Updated md5 checksum of the test video.	2013-07-24 10:52:24 +02:00
Jaime Marquínez Ferrándiz	16484d4923	[traileraddict]: Support clips urls and more trailer urls	2013-07-24 10:43:44 +02:00
Jaime Marquínez Ferrándiz	32a09b4382	Merge pull request #1113 from alexvh/master [traileraddict] Allow all types of trailer URLs	2013-07-24 10:37:52 +02:00
Philipp Hagemeister	870a7e6156	release 2013.07.24	2013-07-24 10:29:34 +02:00
patrickslin	239e3e0cca	YoutubeIE: new algo for length 87 (fixes #1105 ) Squashed commit from the pull requests #1107, #1109 and #1110.	2013-07-24 10:20:52 +02:00
Alex Van't Hof	b1ca5e3ffa	[traileraddict] Obtain hd quality stream if available No clear method for determining if hd is available so opted to just check for presence of hd toggle function.	2013-07-24 02:42:32 -04:00
Alex Van't Hof	b9a1252c96	[traileraddict] Allow all types of trailer URLs Valid url regex for traileraddict.com is too strict. Need to allow, e.g. theatrical-trailer, teaser-trailer, feature-read-band-trailer, etc.	2013-07-24 00:48:11 -04:00
Philipp Hagemeister	fc492de31d	release 2013.07.23.1	2013-07-23 18:37:52 +02:00
Philipp Hagemeister	a9c0f9bc63	Merge branch 'master' of github.com:rg3/youtube-dl	2013-07-23 18:37:09 +02:00
Philipp Hagemeister	b7cc9f5026	[soundcloud] Support URLs with a slash at the end (Fixes #1104 )	2013-07-23 18:35:52 +02:00
Jaime Marquínez Ferrándiz	252580c561	YoutubeChannelE: switch ajax query from channel_ajax to c4_browse_ajax It wasn't detecting when there aren't more videos	2013-07-23 14:58:01 +02:00
Jaime Marquínez Ferrándiz	acc47c1a3f	Mark WatIE and TF1IE as broken (related #1103 )	2013-07-23 14:29:30 +02:00
Jaime Marquínez Ferrándiz	70fa830e4d	CollegeHumorIE: support Youtube videos and embed urls (fixes #1094 )	2013-07-23 14:29:29 +02:00
Philipp Hagemeister	a7af0ebaf5	release 2013.07.23	2013-07-23 14:20:52 +02:00
Jaime Marquínez Ferrándiz	67ae7b4760	Fix BreakIE Also detect videos that come from Youtube	2013-07-23 11:41:05 +02:00
Jaime Marquínez Ferrándiz	de48addae2	Fix CollegHumorIE Now it downloads the video over http in one file, it doesn't downloads in fragments Added a test and use the methods in InfoExtractor for downloading webpages	2013-07-23 11:14:11 +02:00
Jaime Marquínez Ferrándiz	ddbfd0f0c5	ComedyCentralIE: support the extended interviews urls (fixes #1079 )	2013-07-21 11:04:56 +02:00
Jaime Marquínez Ferrándiz	d7ae0639b4	[youtube] Add an extractor for Youtube recommended videos (":ytrec" keyword) (closes #476 ) The new extractor and YoutubeSubscriptionsIE are subclasses of YoutubeFeedsInfoExtractor, which allows to fetch videos from http://www.youtube.com/feed_ajax	2013-07-20 19:33:40 +02:00
Philipp Hagemeister	0382435990	[exfm] Add IE_* descriptions	2013-07-20 11:26:36 +02:00
Philipp Hagemeister	b390d85d95	Merge remote-tracking branch 'yasoob/master'	2013-07-20 11:23:56 +02:00
Philipp Hagemeister	be925dc64c	release 2013.07.19	2013-07-19 23:42:29 +02:00
Jaime Marquínez Ferrándiz	de7a91bfe3	WeiboIE: extract the player urls from a json webpage Also extract a Sina url that doesn't require to follow a redirection.	2013-07-19 20:43:44 +02:00
Jaime Marquínez Ferrándiz	a4358cbabd	YoutubeIE: new algo for length 85 (closes #1080 ), thanks to @patrickslin	2013-07-19 17:12:40 +02:00
Jaime Marquínez Ferrándiz	177ed935a9	TEDIE: fix the title extraction	2013-07-19 16:13:31 +02:00
Jaime Marquínez Ferrándiz	c364f15ff1	Add WeiboIE (closes #1039 ) It just embed video from other sites. Modified the _VALID_URL of Youku to catch embed urls.	2013-07-19 16:09:14 +02:00
Jaime Marquínez Ferrándiz	e1f6e61e6a	Add an extractor for 56.com (related #1039 )	2013-07-19 15:17:34 +02:00
Jaime Marquínez Ferrándiz	0932300e3a	Add SinaIE (related #1039 ): extractor for video.sina.com.cn	2013-07-18 15:31:50 +02:00
Jaime Marquínez Ferrándiz	3f40217704	InstagramIE: fix the extraction of the uploader_id and the title The page title is now 'Instagram', so we build it. Also extract the description	2013-07-18 13:12:27 +02:00
Philipp Hagemeister	f631c3311a	Hint that --update may need sudo	2013-07-18 12:53:24 +02:00
M.Yasoob Khalid	8e5e059d7d	forgot to import json json	2013-07-18 12:40:56 +05:00
M.Yasoob Khalid	2b1b511f6b	removed some unnecessary imports	2013-07-18 12:37:47 +05:00
M.Yasoob Khalid	233ad24ecf	corrected a typo and added myself to travis notifications.	2013-07-18 12:37:02 +05:00
M.Yasoob Khalid	c4949c50f9	added test for ex.fm	2013-07-18 12:33:31 +05:00
M.Yasoob Khalid	b6ef402905	added an IE for ex.fm	2013-07-18 12:30:21 +05:00