release 2013.11.24

[viki] Add extractor (fixes #1813 )
[youtube] do not use variable name twice
2025-07-29 08:40:18 -05:00 · 2013-11-24 07:30:34 +01:00 · 2013-11-24 07:30:05 +01:00 · 2013-11-24 06:54:26 +01:00 · 2013-11-24 06:53:50 +01:00 · 2013-11-24 06:52:53 +01:00
12 changed files with 271 additions and 18 deletions
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@ -97,6 +97,7 @@ class YoutubeDL(object):
    playlistend:       Playlist item to end at.
    matchtitle:        Download only matching titles.
    rejecttitle:       Reject downloads for matching titles.
+    logger:            Log messages to a logging.Logger instance.
    logtostderr:       Log messages to stderr instead of stdout.
    writedescription:  Write the video description to a .description file
    writeinfojson:     Write the video description to a .info.json file
@ -192,7 +193,9 @@ class YoutubeDL(object):

    def to_screen(self, message, skip_eol=False):
        """Print message to stdout if not in quiet mode."""
-        if not self.params.get('quiet', False):
+        if self.params.get('logger'):
+            self.params['logger'].debug(message)
+        elif not self.params.get('quiet', False):
            terminator = [u'\n', u''][skip_eol]
            output = message + terminator
            write_string(output, self._screen_file)
@ -200,10 +203,13 @@ class YoutubeDL(object):
    def to_stderr(self, message):
        """Print message to stderr."""
        assert type(message) == type(u'')
-        output = message + u'\n'
-        if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
-            output = output.encode(preferredencoding())
-        sys.stderr.write(output)
+        if self.params.get('logger'):
+            self.params['logger'].error(message)
+        else:
+            output = message + u'\n'
+            if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
+                output = output.encode(preferredencoding())
+            sys.stderr.write(output)

    def to_console_title(self, message):
        if not self.params.get('consoletitle', False):
--- a/youtube_dl/init.py
+++ b/youtube_dl/init.py
@ -35,6 +35,7 @@ __authors__  = (
    'Jelle van der Waa',
    'Marcin Cieślak',
    'Anton Larionov',
+    'Takuya Tsuchida',
 )

 __license__ = 'Public Domain'
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -98,6 +98,7 @@ from .nba import NBAIE
 from .nbc import NBCNewsIE
 from .newgrounds import NewgroundsIE
 from .nhl import NHLIE, NHLVideocenterIE
+from .niconico import NiconicoIE
 from .nowvideo import NowVideoIE
 from .ooyala import OoyalaIE
 from .orf import ORFIE
@ -156,6 +157,7 @@ from .videofyme import VideofyMeIE
 from .videopremium import VideoPremiumIE
 from .vimeo import VimeoIE, VimeoChannelIE
 from .vine import VineIE
+from .viki import VikiIE
 from .vk import VKIE
 from .wat import WatIE
 from .websurg import WeBSurgIE
--- a/youtube_dl/extractor/bandcamp.py
+++ b/youtube_dl/extractor/bandcamp.py
@ -34,7 +34,6 @@ class BandcampIE(InfoExtractor):
            json_code = m_trackinfo.group(1)
            data = json.loads(json_code)

-            entries = []
            for d in data:
                formats = [{
                    'format_id': 'format_id',
--- a/youtube_dl/extractor/brightcove.py
+++ b/youtube_dl/extractor/brightcove.py
@ -75,14 +75,17 @@ class BrightcoveIE(InfoExtractor):
        params = {'flashID': object_doc.attrib['id'],
                  'playerID': find_xpath_attr(object_doc, './param', 'name', 'playerID').attrib['value'],
                  }
-        playerKey = find_xpath_attr(object_doc, './param', 'name', 'playerKey')
+        def find_param(name):
+            return find_xpath_attr(object_doc, './param', 'name', name)
+        playerKey = find_param('playerKey')
        # Not all pages define this value
        if playerKey is not None:
            params['playerKey'] = playerKey.attrib['value']
-        videoPlayer = find_xpath_attr(object_doc, './param', 'name', '@videoPlayer')
+        # The three fields hold the id of the video
+        videoPlayer = find_param('@videoPlayer') or find_param('videoId') or find_param('videoID')
        if videoPlayer is not None:
            params['@videoPlayer'] = videoPlayer.attrib['value']
-        linkBase = find_xpath_attr(object_doc, './param', 'name', 'linkBaseURL')
+        linkBase = find_param('linkBaseURL')
        if linkBase is not None:
            params['linkBaseURL'] = linkBase.attrib['value']
        data = compat_urllib_parse.urlencode(params)
--- a/youtube_dl/extractor/niconico.py
+++ b/youtube_dl/extractor/niconico.py
@ -0,0 +1,131 @@
+# encoding: utf-8
+
+import re
+import socket
+import xml.etree.ElementTree
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_http_client,
+    compat_urllib_error,
+    compat_urllib_parse,
+    compat_urllib_request,
+    compat_urlparse,
+    compat_str,
+
+    ExtractorError,
+    unified_strdate,
+)
+
+
+class NiconicoIE(InfoExtractor):
+    IE_NAME = u'niconico'
+    IE_DESC = u'ニコニコ動画'
+
+    _TEST = {
+        u'url': u'http://www.nicovideo.jp/watch/sm22312215',
+        u'file': u'sm22312215.mp4',
+        u'md5': u'd1a75c0823e2f629128c43e1212760f9',
+        u'info_dict': {
+            u'title': u'Big Buck Bunny',
+            u'uploader': u'takuya0301',
+            u'uploader_id': u'2698420',
+            u'upload_date': u'20131123',
+            u'description': u'(c) copyright 2008, Blender Foundation / www.bigbuckbunny.org',
+        },
+        u'params': {
+            u'username': u'ydl.niconico@gmail.com',
+            u'password': u'youtube-dl',
+        },
+    }
+
+    _VALID_URL = r'^https?://(?:www\.|secure\.)?nicovideo\.jp/watch/([a-z][a-z][0-9]+)(?:.*)$'
+    _NETRC_MACHINE = 'niconico'
+    # If True it will raise an error if no login info is provided
+    _LOGIN_REQUIRED = True
+
+    def _real_initialize(self):
+        self._login()
+
+    def _login(self):
+        (username, password) = self._get_login_info()
+        # No authentication to be performed
+        if username is None:
+            if self._LOGIN_REQUIRED:
+                raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
+            return False
+
+        # Log in
+        login_form_strs = {
+            u'mail': username,
+            u'password': password,
+        }
+        # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
+        # chokes on unicode
+        login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
+        login_data = compat_urllib_parse.urlencode(login_form).encode('utf-8')
+        request = compat_urllib_request.Request(
+            u'https://secure.nicovideo.jp/secure/login', login_data)
+        login_results = self._download_webpage(
+            request, u'', note=u'Logging in', errnote=u'Unable to log in')
+        if re.search(r'(?i)<h1 class="mb8p4">Log in error</h1>', login_results) is not None:
+            self._downloader.report_warning(u'unable to log in: bad username or password')
+            return False
+        return True
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group(1)
+
+        # Get video webpage. We are not actually interested in it, but need
+        # the cookies in order to be able to download the info webpage
+        self._download_webpage('http://www.nicovideo.jp/watch/' + video_id, video_id)
+
+        video_info_webpage = self._download_webpage(
+            'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id, video_id,
+            note=u'Downloading video info page')
+
+        # Get flv info
+        flv_info_webpage = self._download_webpage(
+            u'http://flapi.nicovideo.jp/api/getflv?v=' + video_id,
+            video_id, u'Downloading flv info')
+        video_real_url = compat_urlparse.parse_qs(flv_info_webpage)['url'][0]
+
+        # Start extracting information
+        video_info = xml.etree.ElementTree.fromstring(video_info_webpage)
+        video_title = video_info.find('.//title').text
+        video_extension = video_info.find('.//movie_type').text
+        video_format = video_extension.upper()
+        video_thumbnail = video_info.find('.//thumbnail_url').text
+        video_description = video_info.find('.//description').text
+        video_uploader_id = video_info.find('.//user_id').text
+        video_upload_date = unified_strdate(video_info.find('.//first_retrieve').text.split('+')[0])
+        video_view_count = video_info.find('.//view_counter').text
+        video_webpage_url = video_info.find('.//watch_url').text
+
+        # uploader
+        video_uploader = video_uploader_id
+        url = 'http://seiga.nicovideo.jp/api/user/info?id=' + video_uploader_id
+        try:
+            user_info_webpage = self._download_webpage(
+                url, video_id, note=u'Downloading user information')
+        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
+            self._downloader.report_warning(u'Unable to download user info webpage: %s' % compat_str(err))
+        else:
+            user_info = xml.etree.ElementTree.fromstring(user_info_webpage)
+            video_uploader = user_info.find('.//nickname').text
+
+        return {
+            'id':          video_id,
+            'url':         video_real_url,
+            'title':       video_title,
+            'ext':         video_extension,
+            'format':      video_format,
+            'thumbnail':   video_thumbnail,
+            'description': video_description,
+            'uploader':    video_uploader,
+            'upload_date': video_upload_date,
+            'uploader_id': video_uploader_id,
+            'view_count':  video_view_count,
+            'webpage_url': video_webpage_url,
+        }
--- a/youtube_dl/extractor/streamcloud.py
+++ b/youtube_dl/extractor/streamcloud.py
@ -21,6 +21,7 @@ class StreamcloudIE(InfoExtractor):
            u'title': u'youtube-dl test video  \'/\\ ä ↭',
            u'duration': 9,
        },
+        u'skip': u'Only available from the EU'
    }

    def _real_extract(self, url):
--- a/youtube_dl/extractor/viki.py
+++ b/youtube_dl/extractor/viki.py
@ -0,0 +1,91 @@
+import re
+
+from ..utils import (
+    unified_strdate,
+)
+from .subtitles import SubtitlesInfoExtractor
+
+
+class VikiIE(SubtitlesInfoExtractor):
+    IE_NAME = u'viki'
+
+    _VALID_URL = r'^https?://(?:www\.)?viki\.com/videos/(?P<id>[0-9]+v)'
+    _TEST = {
+        u'url': u'http://www.viki.com/videos/1023585v-heirs-episode-14',
+        u'file': u'1023585v.mp4',
+        u'md5': u'a21454021c2646f5433514177e2caa5f',
+        u'info_dict': {
+            u'title': u'Heirs Episode 14',
+            u'uploader': u'SBS',
+            u'description': u'md5:c4b17b9626dd4b143dcc4d855ba3474e',
+            u'upload_date': u'20131121',
+            u'age_limit': 13,
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group(1)
+
+        webpage = self._download_webpage(url, video_id)
+        title = self._og_search_title(webpage)
+        description = self._og_search_description(webpage)
+        thumbnail = self._og_search_thumbnail(webpage)
+
+        uploader = self._html_search_regex(
+            r'<strong>Broadcast Network: </strong>\s*([^<]*)<', webpage,
+            u'uploader')
+        if uploader is not None:
+            uploader = uploader.strip()
+
+        rating_str = self._html_search_regex(
+            r'<strong>Rating: </strong>\s*([^<]*)<', webpage,
+            u'rating information', default='').strip()
+        RATINGS = {
+            'G': 0,
+            'PG': 10,
+            'PG-13': 13,
+            'R': 16,
+            'NC': 18,
+        }
+        age_limit = RATINGS.get(rating_str)
+
+        info_url = 'http://www.viki.com/player5_fragment/%s?action=show&controller=videos' % video_id
+        info_webpage = self._download_webpage(info_url, video_id)
+        video_url = self._html_search_regex(
+            r'<source[^>]+src="([^"]+)"', info_webpage, u'video URL')
+
+        upload_date_str = self._html_search_regex(
+            r'"created_at":"([^"]+)"', info_webpage, u'upload date')
+        upload_date = (
+            unified_strdate(upload_date_str)
+            if upload_date_str is not None
+            else None
+        )
+
+        # subtitles
+        video_subtitles = self.extract_subtitles(video_id, info_webpage)
+        if self._downloader.params.get('listsubtitles', False):
+            self._list_available_subtitles(video_id, info_webpage)
+            return
+
+        return {
+            'id': video_id,
+            'title': title,
+            'url': video_url,
+            'description': description,
+            'thumbnail': thumbnail,
+            'age_limit': age_limit,
+            'uploader': uploader,
+            'subtitles': video_subtitles,
+            'upload_date': upload_date,
+        }
+
+    def _get_available_subtitles(self, video_id, info_webpage):
+        res = {}
+        for sturl in re.findall(r'<track src="([^"]+)"/>'):
+            m = re.search(r'/(?P<lang>[a-z]+)\.vtt', sturl)
+            if not m:
+                continue
+            res[m.group('lang')] = sturl
+        return res
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@ -1571,8 +1571,8 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):

        playlist_title = self._og_search_title(page)

-        url_results = [self.url_result(video_id, 'Youtube', video_id=video_id)
-                       for video_id in ids]
+        url_results = [self.url_result(vid_id, 'Youtube', video_id=vid_id)
+                       for vid_id in ids]
        return self.playlist_result(url_results, playlist_id, playlist_title)


--- a/youtube_dl/update.py
+++ b/youtube_dl/update.py
@ -86,7 +86,7 @@ def update_self(to_screen, verbose):

    def version_tuple(version_str):
        return tuple(map(int, version_str.split('.')))
-    if version_tuple(__version__) >= version_tuple(version_str):
+    if version_tuple(__version__) >= version_tuple(version_id):
        to_screen(u'youtube-dl is up to date (%s)' % __version__)
        return

--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@ -12,6 +12,7 @@ import os
 import pipes
 import platform
 import re
+import ssl
 import socket
 import sys
 import traceback
@ -535,13 +536,31 @@ def formatSeconds(secs):
    else:
        return '%d' % secs

+
 def make_HTTPS_handler(opts):
-    if sys.version_info < (3,2):
-        # Python's 2.x handler is very simplistic
-        return compat_urllib_request.HTTPSHandler()
+    if sys.version_info < (3, 2):
+        import httplib
+
+        class HTTPSConnectionV3(httplib.HTTPSConnection):
+            def __init__(self, *args, **kwargs):
+                httplib.HTTPSConnection.__init__(self, *args, **kwargs)
+
+            def connect(self):
+                sock = socket.create_connection((self.host, self.port), self.timeout)
+                if self._tunnel_host:
+                    self.sock = sock
+                    self._tunnel()
+                try:
+                    self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv3)
+                except ssl.SSLError as e:
+                    self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv23)
+
+        class HTTPSHandlerV3(compat_urllib_request.HTTPSHandler):
+            def https_open(self, req):
+                return self.do_open(HTTPSConnectionV3, req)
+        return HTTPSHandlerV3()
    else:
-        import ssl
-        context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
+        context = ssl.SSLContext(ssl.PROTOCOL_SSLv3)
        context.set_default_verify_paths()
        
        context.verify_mode = (ssl.CERT_NONE
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@ -1,2 +1,2 @@

-__version__ = '2013.11.22.2'
+__version__ = '2013.11.24'
Author	SHA1	Message	Date
Philipp Hagemeister	eaaafc59c2	release 2013.11.24	2013-11-24 07:30:34 +01:00
Philipp Hagemeister	382ed50e0e	[viki] Add extractor (fixes #1813 )	2013-11-24 07:30:05 +01:00
Philipp Hagemeister	66ec019240	[youtube] do not use variable name twice	2013-11-24 06:54:26 +01:00
Philipp Hagemeister	bd49928f7a	[niconico] Clarify download	2013-11-24 06:53:50 +01:00
Philipp Hagemeister	23e6d50d73	[bandcamp] Remove unused variable	2013-11-24 06:52:53 +01:00
Philipp Hagemeister	2e767313e4	[update] fix error	2013-11-24 06:52:21 +01:00
Philipp Hagemeister	38b2db6a66	Credit @takuya0301 for niconico	2013-11-24 06:39:49 +01:00
Philipp Hagemeister	13ebea791f	[niconico] Simplify and make work with old Python versions The website requires SSLv3, otherwise it just times out during SSL negotiation.	2013-11-24 06:39:10 +01:00
Philipp Hagemeister	4c9c57428f	Merge remote-tracking branch 'takuya0301/niconico'	2013-11-24 06:09:11 +01:00
Philipp Hagemeister	8bf9319e9c	Simplify logger code(#1811 )	2013-11-24 06:08:11 +01:00
Philipp Hagemeister	4914120727	Merge remote-tracking branch 'iTaybb/master'	2013-11-24 06:07:12 +01:00
Jaime Marquínez Ferrándiz	36de0a0e1a	[brightcove] Set the 'videoPlayer' value to the 'videoId' if it's missing in the parameters (fixes #1815 )	2013-11-23 23:27:15 +01:00
Philipp Hagemeister	e5c146d586	[streamcloud] skip test on travis	2013-11-23 15:57:42 +01:00
Takuya Tsuchida	52ad14aeb0	Add support for niconico	2013-11-23 18:19:44 +09:00
Itay Brandes	43afe28588	Log to an external logger (fixes #1810 ) Sadly applications using youtube-dl's python sources can't directly access it's log stream. It's pretty much limited to stdout and stderr only. It should log to logging.Logger instance passed to YoutubeDL's params dictionary.	2013-11-23 10:22:18 +02:00