release 2013.09.05

[youtube] Playlists: extract the videos id from ['media$group']['yt$videoid'] (fixes #1374 )
'media$player' is not defined for private videos.
2025-08-02 10:30:04 -05:00 · 2013-09-05 22:30:50 +02:00 · 2013-09-05 21:40:04 +02:00 · 2013-09-05 18:02:17 +02:00 · 2013-09-05 10:53:40 +02:00 · 2013-09-05 10:08:17 +02:00
24 changed files with 450 additions and 55 deletions
--- a/README.md
+++ b/README.md
@@ -113,7 +113,8 @@ which means you can modify it, redistribute it or use it however you like.

 ## Video Format Options:
    -f, --format FORMAT        video format code, specifiy the order of
-                               preference using slashes: "-f 22/17/18"
+                               preference using slashes: "-f 22/17/18". "-f mp4"
+                               and "-f flv" are also supported
    --all-formats              download all available video formats
    --prefer-free-formats      prefer free video formats unless a specific one
                               is requested
--- a/devscripts/bash-completion.in
+++ b/devscripts/bash-completion.in
@@ -4,8 +4,12 @@ __youtube-dl()
    COMPREPLY=()
    cur="${COMP_WORDS[COMP_CWORD]}"
    opts="{{flags}}"
+    keywords=":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater"

-    if [[ ${cur} == * ]] ; then
+    if [[ ${cur} =~ : ]]; then
+        COMPREPLY=( $(compgen -W "${keywords}" -- ${cur}) )
+        return 0
+    elif [[ ${cur} == * ]] ; then
        COMPREPLY=( $(compgen -W "${opts}" -- ${cur}) )
        return 0
    fi
--- a/devscripts/gh-pages/update-sites.py
+++ b/devscripts/gh-pages/update-sites.py
@@ -0,0 +1,33 @@
+#!/usr/bin/env python3
+
+import sys
+import os
+import textwrap
+
+# We must be able to import youtube_dl
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+import youtube_dl
+
+def main():
+    with open('supportedsites.html.in', 'r', encoding='utf-8') as tmplf:
+        template = tmplf.read()
+
+    ie_htmls = []
+    for ie in sorted(youtube_dl.gen_extractors(), key=lambda i: i.IE_NAME):
+        ie_html = '<b>{}</b>'.format(ie.IE_NAME)
+        try:
+            ie_html += ': {}'.format(ie.IE_DESC)
+        except AttributeError:
+            pass
+        if ie.working() == False:
+            ie_html += ' (Currently broken)'
+        ie_htmls.append('<li>{}</li>'.format(ie_html))
+
+    template = template.replace('@SITES@', textwrap.indent('\n'.join(ie_htmls), '\t'))
+
+    with open('supportedsites.html', 'w', encoding='utf-8') as sitesf:
+        sitesf.write(template)
+
+if __name__ == '__main__':
+    main()
--- a/devscripts/release.sh
+++ b/devscripts/release.sh
@@ -85,6 +85,7 @@ ROOT=$(pwd)
    "$ROOT/devscripts/gh-pages/sign-versions.py" < "$ROOT/updates_key.pem"
    "$ROOT/devscripts/gh-pages/generate-download.py"
    "$ROOT/devscripts/gh-pages/update-copyright.py"
+    "$ROOT/devscripts/gh-pages/update-sites.py"
    git add *.html *.html.in update
    git commit -m "release $version"
    git show HEAD
--- a/devscripts/youtube_genalgo.py
+++ b/devscripts/youtube_genalgo.py
@@ -20,15 +20,15 @@ tests = [
    # 87
    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<",
     "uioplkjhgfdsazxcvbnm1t34567890QWE2TYUIOPLKJHGFDSAZXCVeNM!@#$^&*()_-+={[]}|:;?/>.<"),
-    # 86 - vflh9ybst 2013/08/23
+    # 86 - vflHOr_nV 2013/08/30
    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<",
-     "yuioplkjhgfdsazxcvbnm1234567890QWERrYUIOPLKqHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<"),
+     "?;}|[{=+._)(*&^%$#@!MNBqCXZASDFGHJKLPOIUYTREWQ<987654321mnbvcxzasdfghjklpoiuytrew"),
    # 85
    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<",
     ".>/?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ0q876543r1mnbvcx9asdfghjklpoiuyt2"),
-    # 84 - vflh9ybst 2013/08/23 (sporadic)
+    # 84 - vflg0g8PQ 2013/08/29 (sporadic)
    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<",
-     "yuioplkjhgfdsazxcvbnm1234567890QWERrYUIOPLKqHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<"),
+     ">?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWq0987654321mnbvcxzasdfghjklpoiuytr"),
    # 83
    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<",
     ".>/?;}[{=+_)(*&^%<#!MNBVCXZASPFGHJKLwOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytreq"),
--- a/test/test_download.py
+++ b/test/test_download.py
@@ -127,12 +127,11 @@ def generator(test_case):
                    info_dict = json.load(infof)
                for (info_field, expected) in tc.get('info_dict', {}).items():
                    if isinstance(expected, compat_str) and expected.startswith('md5:'):
-                        self.assertEqual(expected, 'md5:' + md5(info_dict.get(info_field)))
+                        got = 'md5:' + md5(info_dict.get(info_field))
                    else:
                        got = info_dict.get(info_field)
-                        self.assertEqual(
-                            expected, got,
-                            u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))
+                    self.assertEqual(expected, got,
+                        u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))

                # If checkable fields are missing from the test case, print the info_dict
                test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value))
--- a/youtube_dl/init.py
+++ b/youtube_dl/init.py
@@ -28,6 +28,7 @@ __authors__  = (
    'Axel Noack',
    'Albert Kim',
    'Pierre Rudloff',
+    'Huarong Huo',
 )

 __license__ = 'Public Domain'
@@ -192,7 +193,7 @@ def parseOpts(overrideArguments=None):

    video_format.add_option('-f', '--format',
            action='store', dest='format', metavar='FORMAT',
-            help='video format code, specifiy the order of preference using slashes: "-f 22/17/18"')
+            help='video format code, specifiy the order of preference using slashes: "-f 22/17/18". "-f mp4" and "-f flv" are also supported')
    video_format.add_option('--all-formats',
            action='store_const', dest='format', help='download all available video formats', const='all')
    video_format.add_option('--prefer-free-formats',
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@@ -18,9 +18,11 @@ from .condenast import CondeNastIE
 from .criterion import CriterionIE
 from .cspan import CSpanIE
 from .dailymotion import DailymotionIE, DailymotionPlaylistIE
+from .daum import DaumIE
 from .depositfiles import DepositFilesIE
 from .dotsub import DotsubIE
 from .dreisat import DreiSatIE
+from .defense import DefenseGouvFrIE
 from .ehow import EHowIE
 from .eighttracks import EightTracksIE
 from .escapist import EscapistIE
@@ -56,9 +58,11 @@ from .mtv import MTVIE
 from .muzu import MuzuTVIE
 from .myspass import MySpassIE
 from .myvideo import MyVideoIE
+from .naver import NaverIE
 from .nba import NBAIE
 from .nbc import NBCNewsIE
 from .ooyala import OoyalaIE
+from .orf import ORFIE
 from .pbs import PBSIE
 from .photobucket import PhotobucketIE
 from .pornotube import PornotubeIE
@@ -88,6 +92,7 @@ from .tutv import TutvIE
 from .unistra import UnistraIE
 from .ustream import UstreamIE
 from .vbox7 import Vbox7IE
+from .veehd import VeeHDIE
 from .veoh import VeohIE
 from .vevo import VevoIE
 from .videofyme import VideofyMeIE
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -150,7 +150,7 @@ class InfoExtractor(object):
        if m:
            encoding = m.group(1)
        else:
-            m = re.search(br'<meta[^>]+charset="?([^"]+)[ /">]',
+            m = re.search(br'<meta[^>]+charset=[\'"]?([^\'")]+)[ /\'">]',
                          webpage_bytes[:1024])
            if m:
                encoding = m.group(1).decode('ascii')
--- a/youtube_dl/extractor/dailymotion.py
+++ b/youtube_dl/extractor/dailymotion.py
@@ -55,7 +55,8 @@ class DailymotionIE(InfoExtractor):
        embed_url = 'http://www.dailymotion.com/embed/video/%s' % video_id
        embed_page = self._download_webpage(embed_url, video_id,
                                            u'Downloading embed page')
-        info = self._search_regex(r'var info = ({.*?}),', embed_page, 'video info')
+        info = self._search_regex(r'var info = ({.*?}),$', embed_page,
+            'video info', flags=re.MULTILINE)
        info = json.loads(info)

        # TODO: support choosing qualities
--- a/youtube_dl/extractor/daum.py
+++ b/youtube_dl/extractor/daum.py
@@ -0,0 +1,74 @@
+# encoding: utf-8
+import re
+import xml.etree.ElementTree
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_urllib_parse,
+    determine_ext,
+)
+
+
+class DaumIE(InfoExtractor):
+    _VALID_URL = r'https?://tvpot\.daum\.net/.*?clipid=(?P<id>\d+)'
+    IE_NAME = u'daum.net'
+
+    _TEST = {
+        u'url': u'http://tvpot.daum.net/clip/ClipView.do?clipid=52554690',
+        u'file': u'52554690.mp4',
+        u'info_dict': {
+            u'title': u'DOTA 2GETHER 시즌2 6회 - 2부',
+            u'description': u'DOTA 2GETHER 시즌2 6회 - 2부',
+            u'upload_date': u'20130831',
+            u'duration': 3868,
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group(1)
+        canonical_url = 'http://tvpot.daum.net/v/%s' % video_id
+        webpage = self._download_webpage(canonical_url, video_id)
+        full_id = self._search_regex(r'<link rel="video_src" href=".+?vid=(.+?)"',
+            webpage, u'full id')
+        query = compat_urllib_parse.urlencode({'vid': full_id})
+        info_xml = self._download_webpage(
+            'http://tvpot.daum.net/clip/ClipInfoXml.do?' + query, video_id,
+            u'Downloading video info')
+        urls_xml = self._download_webpage(
+            'http://videofarm.daum.net/controller/api/open/v1_2/MovieData.apixml?' + query,
+            video_id, u'Downloading video formats info')
+        info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
+        urls = xml.etree.ElementTree.fromstring(urls_xml.encode('utf-8'))
+
+        self.to_screen(u'%s: Getting video urls' % video_id)
+        formats = []
+        for format_el in urls.findall('result/output_list/output_list'):
+            profile = format_el.attrib['profile']
+            format_query = compat_urllib_parse.urlencode({
+                'vid': full_id,
+                'profile': profile,
+            })
+            url_xml = self._download_webpage(
+                'http://videofarm.daum.net/controller/api/open/v1_2/MovieLocation.apixml?' + format_query,
+                video_id, note=False)
+            url_doc = xml.etree.ElementTree.fromstring(url_xml.encode('utf-8'))
+            format_url = url_doc.find('result/url').text
+            formats.append({
+                'url': format_url,
+                'ext': determine_ext(format_url),
+                'format_id': profile,
+            })
+
+        info = {
+            'id': video_id,
+            'title': info.find('TITLE').text,
+            'formats': formats,
+            'thumbnail': self._og_search_thumbnail(webpage),
+            'description': info.find('CONTENTS').text,
+            'duration': int(info.find('DURATION').text),
+            'upload_date': info.find('REGDTTM').text[:8],
+        }
+        # TODO: Remove when #980 has been merged
+        info.update(formats[-1])
+        return info
--- a/youtube_dl/extractor/defense.py
+++ b/youtube_dl/extractor/defense.py
@@ -0,0 +1,39 @@
+import re
+import json
+
+from .common import InfoExtractor
+
+
+class DefenseGouvFrIE(InfoExtractor):
+    _IE_NAME = 'defense.gouv.fr'
+    _VALID_URL = (r'http://.*?\.defense\.gouv\.fr/layout/set/'
+        r'ligthboxvideo/base-de-medias/webtv/(.*)')
+
+    _TEST = {
+        u'url': (u'http://www.defense.gouv.fr/layout/set/ligthboxvideo/'
+        u'base-de-medias/webtv/attaque-chimique-syrienne-du-21-aout-2013-1'),
+        u'file': u'11213.mp4',
+        u'md5': u'75bba6124da7e63d2d60b5244ec9430c',
+        "info_dict": {
+            "title": "attaque-chimique-syrienne-du-21-aout-2013-1"
+        }
+    }
+
+    def _real_extract(self, url):
+        title = re.match(self._VALID_URL, url).group(1)
+        webpage = self._download_webpage(url, title)
+        video_id = self._search_regex(
+            r"flashvars.pvg_id=\"(\d+)\";",
+            webpage, 'ID')
+        
+        json_url = ('http://static.videos.gouv.fr/brightcovehub/export/json/'
+            + video_id)
+        info = self._download_webpage(json_url, title,
+                                                  'Downloading JSON config')
+        video_url = json.loads(info)['renditions'][0]['url']
+        
+        return {'id': video_id,
+                'ext': 'mp4',
+                'url': video_url,
+                'title': title,
+                }
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -153,7 +153,7 @@ class GenericIE(InfoExtractor):
                mobj = re.search(r'<meta.*?property="og:video".*?content="(.*?)"', webpage)
        if mobj is None:
            # HTML5 video
-            mobj = re.search(r'<video[^<]*>.*?<source .*?src="([^"]+)"', webpage, flags=re.DOTALL)
+            mobj = re.search(r'<video[^<]*(?:>.*?<source.*?)? src="([^"]+)"', webpage, flags=re.DOTALL)
        if mobj is None:
            raise ExtractorError(u'Invalid URL: %s' % url)

@@ -162,9 +162,9 @@ class GenericIE(InfoExtractor):
        if mobj.group(1) is None:
            raise ExtractorError(u'Invalid URL: %s' % url)

-        video_url = compat_urllib_parse.unquote(mobj.group(1))
+        video_url = mobj.group(1)
        video_url = compat_urlparse.urljoin(url, video_url)
-        video_id = os.path.basename(video_url)
+        video_id = compat_urllib_parse.unquote(os.path.basename(video_url))

        # here's a fun little line of code for you:
        video_extension = os.path.splitext(video_id)[1][1:]
--- a/youtube_dl/extractor/ign.py
+++ b/youtube_dl/extractor/ign.py
@@ -13,7 +13,7 @@ class IGNIE(InfoExtractor):
    Some videos of it.ign.com are also supported
    """

-    _VALID_URL = r'https?://.+?\.ign\.com/(?:videos|show_videos)(/.+)?/(?P<name_or_id>.+)'
+    _VALID_URL = r'https?://.+?\.ign\.com/(?P<type>videos|show_videos|articles)(/.+)?/(?P<name_or_id>.+)'
    IE_NAME = u'ign.com'

    _CONFIG_URL_TEMPLATE = 'http://www.ign.com/videos/configs/id/%s.config'
@@ -41,7 +41,11 @@ class IGNIE(InfoExtractor):
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        name_or_id = mobj.group('name_or_id')
+        page_type = mobj.group('type')
        webpage = self._download_webpage(url, name_or_id)
+        if page_type == 'articles':
+            video_url = self._search_regex(r'var videoUrl = "(.+?)"', webpage, u'video url')
+            return self.url_result(video_url, ie='IGN')
        video_id = self._find_video_id(webpage)
        result = self._get_video_info(video_id)
        description = self._html_search_regex(self._DESCRIPTION_RE,
@@ -68,7 +72,7 @@ class IGNIE(InfoExtractor):
 class OneUPIE(IGNIE):
    """Extractor for 1up.com, it uses the ign videos system."""

-    _VALID_URL = r'https?://gamevideos.1up.com/video/id/(?P<name_or_id>.+)'
+    _VALID_URL = r'https?://gamevideos.1up.com/(?P<type>video)/id/(?P<name_or_id>.+)'
    IE_NAME = '1up.com'

    _DESCRIPTION_RE = r'<div id="vid_summary">(.+?)</div>'
--- a/youtube_dl/extractor/metacafe.py
+++ b/youtube_dl/extractor/metacafe.py
@@ -122,7 +122,7 @@ class MetacafeIE(InfoExtractor):
        video_title = self._html_search_regex(r'(?im)<title>(.*) - Video</title>', webpage, u'title')
        description = self._og_search_description(webpage)
        video_uploader = self._html_search_regex(
-                r'submitter=(.*?);|googletag\.pubads\(\)\.setTargeting\("channel","([^"]+)"\);',
+                r'submitter=(.*?);|googletag\.pubads\(\)\.setTargeting\("(?:channel|submiter)","([^"]+)"\);',
                webpage, u'uploader nickname', fatal=False)

        return {
--- a/youtube_dl/extractor/mit.py
+++ b/youtube_dl/extractor/mit.py
@@ -25,23 +25,21 @@ class TechTVMITIE(InfoExtractor):
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
-        webpage = self._download_webpage(
+        raw_page = self._download_webpage(
            'http://techtv.mit.edu/videos/%s' % video_id, video_id)
-        embed_page = self._download_webpage(
-            'http://techtv.mit.edu/embeds/%s/' % video_id, video_id,
-            note=u'Downloading embed page')
+        clean_page = re.compile(u'<!--.*?-->', re.S).sub(u'', raw_page)

        base_url = self._search_regex(r'ipadUrl: \'(.+?cloudfront.net/)',
-            embed_page, u'base url')
-        formats_json = self._search_regex(r'bitrates: (\[.+?\])', embed_page,
+            raw_page, u'base url')
+        formats_json = self._search_regex(r'bitrates: (\[.+?\])', raw_page,
            u'video formats')
        formats = json.loads(formats_json)
        formats = sorted(formats, key=lambda f: f['bitrate'])

-        title = get_element_by_id('edit-title', webpage)
-        description = clean_html(get_element_by_id('edit-description', webpage))
+        title = get_element_by_id('edit-title', clean_page)
+        description = clean_html(get_element_by_id('edit-description', clean_page))
        thumbnail = self._search_regex(r'playlist:.*?url: \'(.+?)\'',
-            embed_page, u'thumbnail', flags=re.DOTALL)
+            raw_page, u'thumbnail', flags=re.DOTALL)

        return {'id': video_id,
                'title': title,
--- a/youtube_dl/extractor/naver.py
+++ b/youtube_dl/extractor/naver.py
@@ -0,0 +1,73 @@
+# encoding: utf-8
+import re
+import xml.etree.ElementTree
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_urllib_parse,
+    ExtractorError,
+)
+
+
+class NaverIE(InfoExtractor):
+    _VALID_URL = r'https?://tvcast\.naver\.com/v/(?P<id>\d+)'
+
+    _TEST = {
+        u'url': u'http://tvcast.naver.com/v/81652',
+        u'file': u'81652.mp4',
+        u'info_dict': {
+            u'title': u'[9월 모의고사 해설강의][수학_김상희] 수학 A형 16~20번',
+            u'description': u'합격불변의 법칙 메가스터디 | 메가스터디 수학 김상희 선생님이 9월 모의고사 수학A형 16번에서 20번까지 해설강의를 공개합니다.',
+            u'upload_date': u'20130903',
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group(1)
+        webpage = self._download_webpage(url, video_id)
+        m_id = re.search(r'var rmcPlayer = new nhn.rmcnmv.RMCVideoPlayer\("(.+?)", "(.+?)"',
+            webpage)
+        if m_id is None:
+            raise ExtractorError(u'couldn\'t extract vid and key')
+        vid = m_id.group(1)
+        key = m_id.group(2)
+        query = compat_urllib_parse.urlencode({'vid': vid, 'inKey': key,})
+        query_urls = compat_urllib_parse.urlencode({
+            'masterVid': vid,
+            'protocol': 'p2p',
+            'inKey': key,
+        })
+        info_xml = self._download_webpage(
+            'http://serviceapi.rmcnmv.naver.com/flash/videoInfo.nhn?' + query,
+            video_id, u'Downloading video info')
+        urls_xml = self._download_webpage(
+            'http://serviceapi.rmcnmv.naver.com/flash/playableEncodingOption.nhn?' + query_urls,
+            video_id, u'Downloading video formats info')
+        info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
+        urls = xml.etree.ElementTree.fromstring(urls_xml.encode('utf-8'))
+
+        formats = []
+        for format_el in urls.findall('EncodingOptions/EncodingOption'):
+            domain = format_el.find('Domain').text
+            if domain.startswith('rtmp'):
+                continue
+            formats.append({
+                'url': domain + format_el.find('uri').text,
+                'ext': 'mp4',
+                'width': int(format_el.find('width').text),
+                'height': int(format_el.find('height').text),
+            })
+
+        info = {
+            'id': video_id,
+            'title': info.find('Subject').text,
+            'formats': formats,
+            'description': self._og_search_description(webpage),
+            'thumbnail': self._og_search_thumbnail(webpage),
+            'upload_date': info.find('WriteDate').text.replace('.', ''),
+            'view_count': int(info.find('PlayCount').text),
+        }
+        # TODO: Remove when #980 has been merged
+        info.update(formats[-1])
+        return info
--- a/youtube_dl/extractor/orf.py
+++ b/youtube_dl/extractor/orf.py
@@ -0,0 +1,54 @@
+# coding: utf-8
+
+import re
+import xml.etree.ElementTree
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_urlparse,
+    ExtractorError,
+    find_xpath_attr,
+)
+
+class ORFIE(InfoExtractor):
+    _VALID_URL = r'https?://tvthek.orf.at/(programs/.+?/episodes|topics/.+?)/(?P<id>\d+)'
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        playlist_id = mobj.group('id')
+        webpage = self._download_webpage(url, playlist_id)
+
+        flash_xml = self._search_regex('ORF.flashXML = \'(.+?)\'', webpage, u'flash xml')
+        flash_xml = compat_urlparse.parse_qs('xml='+flash_xml)['xml'][0]
+        flash_config = xml.etree.ElementTree.fromstring(flash_xml.encode('utf-8'))
+        playlist_json = self._search_regex(r'playlist\': \'(\[.*?\])\'', webpage, u'playlist').replace(r'\"','"')
+        playlist = json.loads(playlist_json)
+
+        videos = []
+        ns = '{http://tempuri.org/XMLSchema.xsd}'
+        xpath = '%(ns)sPlaylist/%(ns)sItems/%(ns)sItem' % {'ns': ns}
+        webpage_description = self._og_search_description(webpage)
+        for (i, (item, info)) in enumerate(zip(flash_config.findall(xpath), playlist), 1):
+            # Get best quality url
+            rtmp_url = None
+            for q in ['Q6A', 'Q4A', 'Q1A']:
+                video_url = find_xpath_attr(item, '%sVideoUrl' % ns, 'quality', q)
+                if video_url is not None:
+                    rtmp_url = video_url.text
+                    break
+            if rtmp_url is None:
+                raise ExtractorError(u'Couldn\'t get video url: %s' % info['id'])
+            description = self._html_search_regex(
+                r'id="playlist_entry_%s".*?<p>(.*?)</p>' % i, webpage,
+                u'description', default=webpage_description, flags=re.DOTALL)
+            videos.append({
+                '_type': 'video',
+                'id': info['id'],
+                'title': info['title'],
+                'url': rtmp_url,
+                'ext': 'flv',
+                'description': description,
+                })
+
+        return videos
--- a/youtube_dl/extractor/unistra.py
+++ b/youtube_dl/extractor/unistra.py
@@ -11,7 +11,7 @@ class UnistraIE(InfoExtractor):
        u'md5': u'736f605cfdc96724d55bb543ab3ced24',
        u'info_dict': {
            u'title': u'M!ss Yella',
-            u'description': u'md5:75e8439a3e2981cd5d4b6db232e8fdfc',
+            u'description': u'md5:104892c71bd48e55d70b902736b81bbf',
        },
    }

--- a/youtube_dl/extractor/veehd.py
+++ b/youtube_dl/extractor/veehd.py
@@ -0,0 +1,56 @@
+import re
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_urlparse,
+    get_element_by_id,
+    clean_html,
+)
+
+class VeeHDIE(InfoExtractor):
+    _VALID_URL = r'https?://veehd.com/video/(?P<id>\d+)'
+
+    _TEST = {
+        u'url': u'http://veehd.com/video/4686958',
+        u'file': u'4686958.mp4',
+        u'info_dict': {
+            u'title': u'Time Lapse View from Space ( ISS)',
+            u'uploader_id': u'spotted',
+            u'description': u'md5:f0094c4cf3a72e22bc4e4239ef767ad7',
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, video_id)
+        player_path = self._search_regex(r'\$\("#playeriframe"\).attr\({src : "(.+?)"',
+            webpage, u'player path')
+        player_url = compat_urlparse.urljoin(url, player_path)
+        player_page = self._download_webpage(player_url, video_id,
+            u'Downloading player page')
+        config_json = self._search_regex(r'value=\'config=({.+?})\'',
+            player_page, u'config json')
+        config = json.loads(config_json)
+
+        video_url = compat_urlparse.unquote(config['clip']['url'])
+        title = clean_html(get_element_by_id('videoName', webpage).rpartition('|')[0])
+        uploader_id = self._html_search_regex(r'<a href="/profile/\d+">(.+?)</a>',
+            webpage, u'uploader')
+        thumbnail = self._search_regex(r'<img id="veehdpreview" src="(.+?)"',
+            webpage, u'thumbnail')
+        description = self._html_search_regex(r'<td class="infodropdown".*?<div>(.*?)<ul',
+            webpage, u'description', flags=re.DOTALL)
+
+        return {
+            '_type': 'video',
+            'id': video_id,
+            'title': title,
+            'url': video_url,
+            'ext': 'mp4',
+            'uploader_id': uploader_id,
+            'thumbnail': thumbnail,
+            'description': description,
+        }
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@@ -44,6 +44,16 @@ class VimeoIE(InfoExtractor):
                u'title': u'Andy Allan - Putting the Carto into OpenStreetMap Cartography',
            },
        },
+        {
+            u'url': u'http://player.vimeo.com/video/54469442',
+            u'file': u'54469442.mp4',
+            u'md5': u'619b811a4417aa4abe78dc653becf511',
+            u'note': u'Videos that embed the url in the player page',
+            u'info_dict': {
+                u'title': u'Kathy Sierra: Building the minimum Badass User, Business of Software',
+                u'uploader': u'The BLN & Business of Software',
+            },
+        },
    ]

    def _login(self):
@@ -112,7 +122,8 @@ class VimeoIE(InfoExtractor):

        # Extract the config JSON
        try:
-            config = webpage.split(' = {config:')[1].split(',assets:')[0]
+            config = self._search_regex([r' = {config:({.+?}),assets:', r'c=({.+?);'],
+                webpage, u'info section', flags=re.DOTALL)
            config = json.loads(config)
        except:
            if re.search('The creator of this video has not given you permission to embed it on this domain.', webpage):
@@ -132,12 +143,22 @@ class VimeoIE(InfoExtractor):
        video_uploader_id = config["video"]["owner"]["url"].split('/')[-1] if config["video"]["owner"]["url"] else None

        # Extract video thumbnail
-        video_thumbnail = config["video"]["thumbnail"]
+        video_thumbnail = config["video"].get("thumbnail")
+        if video_thumbnail is None:
+            _, video_thumbnail = sorted((int(width), t_url) for (width, t_url) in config["video"]["thumbs"].items())[-1]

        # Extract video description
-        video_description = get_element_by_attribute("itemprop", "description", webpage)
-        if video_description: video_description = clean_html(video_description)
-        else: video_description = u''
+        video_description = None
+        try:
+            video_description = get_element_by_attribute("itemprop", "description", webpage)
+            if video_description: video_description = clean_html(video_description)
+        except AssertionError as err:
+            # On some pages like (http://player.vimeo.com/video/54469442) the
+            # html tags are not closed, python 2.6 cannot handle it
+            if err.args[0] == 'we should not get here!':
+                pass
+            else:
+                raise

        # Extract upload date
        video_upload_date = None
@@ -154,14 +175,15 @@ class VimeoIE(InfoExtractor):
        # TODO bind to format param
        codecs = [('h264', 'mp4'), ('vp8', 'flv'), ('vp6', 'flv')]
        files = { 'hd': [], 'sd': [], 'other': []}
+        config_files = config["video"].get("files") or config["request"].get("files")
        for codec_name, codec_extension in codecs:
-            if codec_name in config["video"]["files"]:
-                if 'hd' in config["video"]["files"][codec_name]:
+            if codec_name in config_files:
+                if 'hd' in config_files[codec_name]:
                    files['hd'].append((codec_name, codec_extension, 'hd'))
-                elif 'sd' in config["video"]["files"][codec_name]:
+                elif 'sd' in config_files[codec_name]:
                    files['sd'].append((codec_name, codec_extension, 'sd'))
                else:
-                    files['other'].append((codec_name, codec_extension, config["video"]["files"][codec_name][0]))
+                    files['other'].append((codec_name, codec_extension, config_files[codec_name][0]))

        for quality in ('hd', 'sd', 'other'):
            if len(files[quality]) > 0:
@@ -173,8 +195,12 @@ class VimeoIE(InfoExtractor):
        else:
            raise ExtractorError(u'No known codec found')

-        video_url = "http://player.vimeo.com/play_redirect?clip_id=%s&sig=%s&time=%s&quality=%s&codecs=%s&type=moogaloop_local&embed_location=" \
-                    %(video_id, sig, timestamp, video_quality, video_codec.upper())
+        video_url = None
+        if isinstance(config_files[video_codec], dict):
+            video_url = config_files[video_codec][video_quality].get("url")
+        if video_url is None:
+            video_url = "http://player.vimeo.com/play_redirect?clip_id=%s&sig=%s&time=%s&quality=%s&codecs=%s&type=moogaloop_local&embed_location=" \
+                        %(video_id, sig, timestamp, video_quality, video_codec.upper())

        return [{
            'id':       video_id,
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -153,8 +153,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                     $"""
    _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
    # Listed in order of quality
-    _available_formats = ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '17', '13',
-                          '95', '94', '93', '92', '132', '151',
+    _available_formats = ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '36', '17', '13',
+                          # Apple HTTP Live Streaming
+                          '96', '95', '94', '93', '92', '132', '151',
                          # 3D
                          '85', '84', '102', '83', '101', '82', '100',
                          # Dash video
@@ -163,8 +164,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                          # Dash audio
                          '141', '172', '140', '171', '139',
                          ]
-    _available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '17', '13',
-                                      '95', '94', '93', '92', '132', '151',
+    _available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '36', '17', '13',
+                                      # Apple HTTP Live Streaming
+                                      '96', '95', '94', '93', '92', '132', '151',
+                                      # 3D
                                      '85', '102', '84', '101', '83', '100', '82',
                                      # Dash video
                                      '138', '248', '137', '247', '136', '246', '245',
@@ -172,11 +175,18 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                                      # Dash audio
                                      '172', '141', '171', '140', '139',
                                      ]
+    _video_formats_map = {
+        'flv': ['35', '34', '6', '5'],
+        '3gp': ['36', '17', '13'],
+        'mp4': ['38', '37', '22', '18'],
+        'webm': ['46', '45', '44', '43'],
+    }
    _video_extensions = {
        '13': '3gp',
-        '17': 'mp4',
+        '17': '3gp',
        '18': 'mp4',
        '22': 'mp4',
+        '36': '3gp',
        '37': 'mp4',
        '38': 'mp4',
        '43': 'webm',
@@ -193,7 +203,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
        '101': 'webm',
        '102': 'webm',

-        # videos that use m3u8
+        # Apple HTTP Live Streaming
        '92': 'mp4',
        '93': 'mp4',
        '94': 'mp4',
@@ -234,6 +244,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
        '22': '720x1280',
        '34': '360x640',
        '35': '480x854',
+        '36': '240x320',
        '37': '1080x1920',
        '38': '3072x4096',
        '43': '360x640',
@@ -335,7 +346,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
            u"info_dict": {
                u"upload_date": u"20120506",
                u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",
-                u"description": u"md5:b085c9804f5ab69f4adea963a2dceb3c",
+                u"description": u"md5:3e2666e0a55044490499ea45fe9037b7",
                u"uploader": u"Icona Pop",
                u"uploader_id": u"IconaPop"
            }
@@ -423,11 +434,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
        elif len(s) == 87:
            return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:]
        elif len(s) == 86:
-            return s[5:40] + s[3] + s[41:48] + s[0] + s[49:86]
+            return s[81:73:-1] + s[84] + s[72:58:-1] + s[0] + s[57:35:-1] + s[85] + s[34:0:-1]
        elif len(s) == 85:
            return s[83:34:-1] + s[0] + s[33:27:-1] + s[3] + s[26:19:-1] + s[34] + s[18:3:-1] + s[27]
        elif len(s) == 84:
-            return s[5:40] + s[3] + s[41:48] + s[0] + s[49:84]
+            return s[81:36:-1] + s[0] + s[35:2:-1]
        elif len(s) == 83:
            return s[81:64:-1] + s[82] + s[63:52:-1] + s[45] + s[51:45:-1] + s[1] + s[44:1:-1] + s[0]
        elif len(s) == 82:
@@ -597,13 +608,25 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
            video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
        else:
            # Specific formats. We pick the first in a slash-delimeted sequence.
-            # For example, if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'.
+            # Format can be specified as itag or 'mp4' or 'flv' etc. We pick the highest quality
+            # available in the specified format. For example,
+            # if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'.
+            # if '1/mp4/3/4' is requested and '1' and '5' (is a mp4) are available, we pick '1'.
+            # if '1/mp4/3/4' is requested and '4' and '5' (is a mp4) are available, we pick '5'.
            req_formats = req_format.split('/')
            video_url_list = None
            for rf in req_formats:
                if rf in url_map:
                    video_url_list = [(rf, url_map[rf])]
                    break
+                if rf in self._video_formats_map:
+                    for srf in self._video_formats_map[rf]:
+                        if srf in url_map:
+                            video_url_list = [(srf, url_map[srf])]
+                            break
+                    else:
+                        continue
+                    break
            if video_url_list is None:
                raise ExtractorError(u'requested format not available')
        return video_url_list
@@ -920,8 +943,11 @@ class YoutubePlaylistIE(InfoExtractor):

            for entry in response['feed']['entry']:
                index = entry['yt$position']['$t']
-                if 'media$group' in entry and 'media$player' in entry['media$group']:
-                    videos.append((index, entry['media$group']['media$player']['url']))
+                if 'media$group' in entry and 'yt$videoid' in entry['media$group']:
+                    videos.append((
+                        index,
+                        'https://www.youtube.com/watch?v=' + entry['media$group']['yt$videoid']['$t']
+                    ))

        videos = [v[1] for v in sorted(videos)]

@@ -1161,7 +1187,7 @@ class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
    IE_NAME = u'youtube:favorites'
    IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'
-    _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:o?rites)?'
+    _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
    _LOGIN_REQUIRED = True

    def _real_extract(self, url):
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -213,7 +213,7 @@ if sys.version_info >= (2,7):
    def find_xpath_attr(node, xpath, key, val):
        """ Find the xpath xpath[@key=val] """
        assert re.match(r'^[a-zA-Z]+$', key)
-        assert re.match(r'^[a-zA-Z@\s]*$', val)
+        assert re.match(r'^[a-zA-Z0-9@\s]*$', val)
        expr = xpath + u"[@%s='%s']" % (key, val)
        return node.find(expr)
 else:
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@

-__version__ = '2013.08.28.1'
+__version__ = '2013.09.05'
Author	SHA1	Message	Date
Philipp Hagemeister	a636203ea5	release 2013.09.05	2013-09-05 22:30:50 +02:00
Jaime Marquínez Ferrándiz	c215217e39	[youtube] Playlists: extract the videos id from ['media$group']['yt$videoid'] (fixes #1374 ) 'media$player' is not defined for private videos.	2013-09-05 21:40:04 +02:00
Jaime Marquínez Ferrándiz	08e291b54d	[generic] Recognize html5 video in the format '<video src=".+?"' and only unquote the url when extracting the id (fixes #1372 )	2013-09-05 18:02:17 +02:00
Jaime Marquínez Ferrándiz	6b95b065be	Add extractor for tvcast.naver.com (closes #1331 )	2013-09-05 10:53:40 +02:00
Jaime Marquínez Ferrándiz	9363169b67	[daum] Get the video page from a canonical url to extract the full id (fixes #1373 ) and extract description.	2013-09-05 10:08:17 +02:00
Jaime Marquínez Ferrándiz	085bea4513	Credit @Huarong for tv.sohu.com	2013-09-04 22:09:22 +02:00
Jaime Marquínez Ferrándiz	150f20828b	Add extractor for daum.net (closes #1330 )	2013-09-04 22:06:50 +02:00
Philipp Hagemeister	08523ee20a	release 2013.09.04	2013-09-04 14:33:32 +02:00
Philipp Hagemeister	5d5171d26a	Merge pull request #1341 from xanadu/master add support for "-f mp4" for YouTube	2013-09-03 18:52:12 -07:00
Allan Zhou	96fb5605b2	AHLS -> Apple HTTP Live Streaming	2013-09-03 18:49:35 -07:00
Philipp Hagemeister	7011de0bc2	Merge pull request #1363 from Rudloff/defense defense.gouv.fr	2013-09-03 18:23:08 -07:00
Allan Zhou	c3dd69eab4	Merge remote-tracking branch 'upstream/master'	2013-09-03 12:22:29 -07:00
Pierre Rudloff	025171c476	Suggested by @phihag	2013-09-03 12:03:19 +02:00
Jaime Marquínez Ferrándiz	c8dbccde30	[orf] Remove the test video, they seem to expire in one week	2013-09-03 11:51:01 +02:00
Jaime Marquínez Ferrándiz	4ff7a0f1f6	[dailymotion] improve the regex for extracting the video info	2013-09-03 11:33:59 +02:00
Jaime Marquínez Ferrándiz	9c2ade40de	[vimeo] Handle Assertions Error when trying to get the description In some pages the html tags are not closed, python 2.6 cannot handle it.	2013-09-03 11:11:36 +02:00
Jaime Marquínez Ferrándiz	aa32314d09	[vimeo] add support for videos that embed the download url in the player page (fixes #1364 )	2013-09-03 10:48:56 +02:00
Pierre Rudloff	52afe99665	Extractor for defense.gouv.fr	2013-09-03 01:51:17 +02:00
Pierre Rudloff	b0446d6a33	Merge remote-tracking branch 'upstream/master'	2013-09-03 01:27:49 +02:00
Jaime Marquínez Ferrándiz	8e4e89f1c2	Add an extractor for VeeHD (closes #1359 )	2013-09-02 11:54:09 +02:00
Jaime Marquínez Ferrándiz	6c758d79de	[metacafe] Add more cases for detecting the uploader detection (reported in #1343 )	2013-08-31 22:35:39 +02:00
Jaime Marquínez Ferrándiz	691008087b	Add an automatic page generator for the supported sites (related #156 ) They are listed in the "supportedsites.html" page.	2013-08-31 15:18:52 +02:00
Allan Zhou	85f03346eb	Merge remote-tracking branch 'upstream/master'	2013-08-30 17:51:59 -07:00
Allan Zhou	bdc6b3fc64	add support for "-f mp4" for YouTube	2013-08-30 17:51:50 -07:00
Pierre Rudloff	847f582290	Merge remote-tracking branch 'upstream/master'	2013-08-31 00:37:29 +02:00
Philipp Hagemeister	10f5c016ec	release 2013.08.30	2013-08-30 21:02:07 +02:00
Jaime Marquínez Ferrándiz	2e756879f1	[youtube] update algo for length 86	2013-08-30 20:49:51 +02:00
Jaime Marquínez Ferrándiz	c7a7750d3b	[youtube] Fix typo in the _VALID_URL for YoutubeFavouritesIE, it was intended to also match :ytfavourites	2013-08-30 20:13:05 +02:00
Jaime Marquínez Ferrándiz	9193c1eede	Add youtube keywords to the bash completion script	2013-08-30 20:11:53 +02:00
Philipp Hagemeister	3243d0f7b6	release 2013.08.29	2013-08-29 23:29:34 +02:00
Jaime Marquínez Ferrándiz	23b00bc0e4	[youtube] update algo for length 84 Only appears sometimes, nearly identical to length 86.	2013-08-29 22:44:29 +02:00
Jaime Marquínez Ferrándiz	52e1eea18b	[youtube] update algo for length 86 (fixes #1349 )	2013-08-29 22:33:58 +02:00
Jaime Marquínez Ferrándiz	ee80d66727	[ign] update 1up extractor to work with the updated IGNIE	2013-08-29 21:51:09 +02:00
Jaime Marquínez Ferrándiz	f1fb2d12b3	[ign] extract videos from articles pages	2013-08-29 21:39:36 +02:00
Jaime Marquínez Ferrándiz	deb2c73212	Merge pull request #1347 from whydoubt/fix_orf_at Fix orf.at extractor by adding file coding mark	2013-08-29 11:05:38 -07:00
Jeff Smith	8928491074	Fix orf.at extractor by adding file coding mark	2013-08-29 12:51:38 -05:00
Jaime Marquínez Ferrándiz	545434670b	Add an extractor for orf.at (closes #1346 ) Make find_xpath_attr also accept numbers in the value	2013-08-29 19:16:07 +02:00
Jaime Marquínez Ferrándiz	54fda45bac	Merge pull request #1342 from whydoubt/fix_mit_26 Fix MIT extractor for Python 2.6	2013-08-29 13:42:08 +02:00
Jaime Marquínez Ferrándiz	c7bf7366bc	Update descriptions checksum for some test for Unistra and Youtube	2013-08-29 13:41:59 +02:00
Jaime Marquínez Ferrándiz	b7052e5087	Also print the field that fails if it is a md5 checksum	2013-08-29 12:15:45 +02:00
Jaime Marquínez Ferrándiz	0d75ae2ce3	Fix detection of the webpage charset if it's declared using ' instead of " Like in "<meta charset='utf-8'/>"	2013-08-29 11:35:15 +02:00
Jeff Smith	b5ba7b9dcf	Fix MIT extractor for Python 2.6 The HTML for the MIT page does not parse cleanly for Python 2.6 due to script tags within an actual script element. The offending piece is inside a comment block, so removing all such comment blocks fixes the parsing.	2013-08-28 14:24:42 -05:00
Allan Zhou	483e0ddd4d	Merge remote-tracking branch 'upstream/master'	2013-08-28 10:19:28 -07:00
Allan Zhou	591078babf	Merge remote-tracking branch 'upstream/master'	2013-08-28 09:57:28 -07:00
Pierre Rudloff	cd9c100963	Merge remote-tracking branch 'upstream/master'	2013-08-28 12:20:12 +02:00
Pierre Rudloff	b7f89fe692	Merge remote-tracking branch 'upstream/master'	2013-08-28 12:10:34 +02:00
Pierre Rudloff	1301a0dd42	Merge remote-tracking branch 'upstream/master'	2013-08-28 11:02:12 +02:00
Pierre Rudloff	c5b921b597	Merge remote-tracking branch 'upstream/master'	2013-08-27 10:47:47 +02:00
Allan Zhou	99859d436c	Merge remote-tracking branch 'upstream/master'	2013-08-26 15:16:13 -07:00
Pierre Rudloff	7a20e2e1f8	Merge remote-tracking branch 'upstream/master'	2013-08-26 03:16:42 +02:00
Pierre Rudloff	5c6658d4dd	Merge remote-tracking branch 'upstream/master'	2013-08-24 23:01:39 +02:00
Allan Zhou	39c6f507df	Merge remote-tracking branch 'upstream/master'	2013-08-23 15:33:36 -07:00
Allan Zhou	614d9c19c1	Merge remote-tracking branch 'upstream/master'	2013-08-22 17:02:41 -07:00
Allan Zhou	b4e60dac23	Merge remote-tracking branch 'upstream/master'	2013-08-22 10:43:51 -07:00
Pierre Rudloff	adeb9c73d6	Merge remote-tracking branch 'upstream/master'	2013-08-22 14:04:30 +02:00
Allan Zhou	5af7e056a7	Merge remote-tracking branch 'upstream/master'	2013-08-21 10:53:42 -07:00
Allan Zhou	a3f62b8255	Merge remote-tracking branch 'upstream/master'	2013-08-21 00:07:03 -07:00