release 2018.08.04

[ChangeLog] Actualize
[ci skip]
2025-08-02 18:39:51 -05:00 · 2018-08-04 01:23:24 +07:00 · 2018-08-04 01:21:23 +07:00 · 2018-08-04 00:26:58 +07:00 · 2018-08-03 22:44:31 +07:00 · 2018-08-03 22:43:23 +07:00
12 changed files with 203 additions and 37 deletions
--- a/.github/ISSUE_TEMPLATE.md
+++ b/.github/ISSUE_TEMPLATE.md
@@ -6,8 +6,8 @@

 ---

-### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.07.29*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.07.29**
+### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.08.04*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
+- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.08.04**

 ### Before submitting an *issue* make sure you have:
 - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl
 [debug] User config: []
 [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
-[debug] youtube-dl version 2018.07.29
+[debug] youtube-dl version 2018.08.04
 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
 [debug] Proxy map: {}
--- a/12
+++ b/12
@@ -1,3 +1,15 @@
+version 2018.08.04
+
+Extractors
+* [funk:channel] Improve byChannelAlias extraction (#17142)
+* [twitch] Fix authentication (#17024, #17126)
+* [twitch:vod] Improve URL regular expression (#17135)
+* [watchbox] Fix extraction (#17107)
+* [pbs] Fix extraction (#17109)
+* [theplatform] Relax URL regular expression (#16181, #17097)
+ [viqeo] Add support for viqeo.tv (#17066)
+
+
 version 2018.07.29

 Extractors
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -1001,6 +1001,7 @@
 - **Vimple**: Vimple - one-click video hosting
 - **Vine**
 - **vine:user**
+ - **Viqeo**
 - **Viu**
 - **viu:ott**
 - **viu:playlist**
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -1291,6 +1291,7 @@ from .viki import (
    VikiIE,
    VikiChannelIE,
 )
+from .viqeo import ViqeoIE
 from .viu import (
    ViuIE,
    ViuPlaylistIE,
--- a/youtube_dl/extractor/funk.py
+++ b/youtube_dl/extractor/funk.py
@@ -1,6 +1,7 @@
 # coding: utf-8
 from __future__ import unicode_literals

+import itertools
 import re

 from .common import InfoExtractor
@@ -125,17 +126,31 @@ class FunkChannelIE(FunkBaseIE):
        # Id-based channels are currently broken on their side: webplayer
        # tries to process them via byChannelAlias endpoint and fails
        # predictably.
-        by_channel_alias = self._download_json(
-            'https://www.funk.net/api/v3.1/webapp/videos/byChannelAlias/%s'
-            % channel_id,
-            'Downloading byChannelAlias JSON', headers=headers, query={
-                'size': 100,
-            }, fatal=False)
-        if by_channel_alias:
+        for page_num in itertools.count():
+            by_channel_alias = self._download_json(
+                'https://www.funk.net/api/v3.1/webapp/videos/byChannelAlias/%s'
+                % channel_id,
+                'Downloading byChannelAlias JSON page %d' % (page_num + 1),
+                headers=headers, query={
+                    'filterFsk': 'false',
+                    'sort': 'creationDate,desc',
+                    'size': 100,
+                    'page': page_num,
+                }, fatal=False)
+            if not by_channel_alias:
+                break
            video_list = try_get(
                by_channel_alias, lambda x: x['_embedded']['videoList'], list)
-            if video_list:
+            if not video_list:
+                break
+            try:
                video = next(r for r in video_list if r.get('alias') == alias)
+                break
+            except StopIteration:
+                pass
+            if not try_get(
+                    by_channel_alias, lambda x: x['_links']['next']):
+                break

        if not video:
            by_id_list = self._download_json(
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -113,6 +113,7 @@ from .peertube import PeerTubeIE
 from .indavideo import IndavideoEmbedIE
 from .apa import APAIE
 from .foxnews import FoxNewsIE
+from .viqeo import ViqeoIE


 class GenericIE(InfoExtractor):
@@ -2060,6 +2061,15 @@ class GenericIE(InfoExtractor):
            },
            'skip': 'TODO: fix nested playlists processing in tests',
        },
+        {
+            # Viqeo embeds
+            'url': 'https://viqeo.tv/',
+            'info_dict': {
+                'id': 'viqeo',
+                'title': 'All-new video platform',
+            },
+            'playlist_count': 6,
+        },
        # {
        #     # TODO: find another test
        #     # http://schema.org/VideoObject
@@ -3094,6 +3104,11 @@ class GenericIE(InfoExtractor):
            return self.playlist_from_matches(
                sharevideos_urls, video_id, video_title)

+        viqeo_urls = ViqeoIE._extract_urls(webpage)
+        if viqeo_urls:
+            return self.playlist_from_matches(
+                viqeo_urls, video_id, video_title, ie=ViqeoIE.ie_key())
+
        # Look for HTML5 media
        entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
        if entries:
--- a/youtube_dl/extractor/pbs.py
+++ b/youtube_dl/extractor/pbs.py
@@ -15,6 +15,7 @@ from ..utils import (
    strip_jsonp,
    strip_or_none,
    unified_strdate,
+    url_or_none,
    US_RATINGS,
 )

@@ -557,6 +558,13 @@ class PBSIE(InfoExtractor):
                if redirect_url and redirect_url not in redirect_urls:
                    redirects.append(redirect)
                    redirect_urls.add(redirect_url)
+            encodings = info.get('encodings')
+            if isinstance(encodings, list):
+                for encoding in encodings:
+                    encoding_url = url_or_none(encoding)
+                    if encoding_url and encoding_url not in redirect_urls:
+                        redirects.append({'url': encoding_url})
+                        redirect_urls.add(encoding_url)

        chapters = []
        # Player pages may also serve different qualities
--- a/youtube_dl/extractor/theplatform.py
+++ b/youtube_dl/extractor/theplatform.py
@@ -310,7 +310,7 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):

 class ThePlatformFeedIE(ThePlatformBaseIE):
    _URL_TEMPLATE = '%s//feed.theplatform.com/f/%s/%s?form=json&%s'
-    _VALID_URL = r'https?://feed\.theplatform\.com/f/(?P<provider_id>[^/]+)/(?P<feed_id>[^?/]+)\?(?:[^&]+&)*(?P<filter>by(?:Gui|I)d=(?P<id>[\w-]+))'
+    _VALID_URL = r'https?://feed\.theplatform\.com/f/(?P<provider_id>[^/]+)/(?P<feed_id>[^?/]+)\?(?:[^&]+&)*(?P<filter>by(?:Gui|I)d=(?P<id>[^&]+))'
    _TESTS = [{
        # From http://player.theplatform.com/p/7wvmTC/MSNBCEmbeddedOffSite?guid=n_hardball_5biden_140207
        'url': 'http://feed.theplatform.com/f/7wvmTC/msnbc_video-p-test?form=json&pretty=true&range=-40&byGuid=n_hardball_5biden_140207',
@@ -327,6 +327,9 @@ class ThePlatformFeedIE(ThePlatformBaseIE):
            'categories': ['MSNBC/Issues/Democrats', 'MSNBC/Issues/Elections/Election 2016'],
            'uploader': 'NBCU-NEWS',
        },
+    }, {
+        'url': 'http://feed.theplatform.com/f/2E2eJC/nnd_NBCNews?byGuid=nn_netcast_180306.Copy.01',
+        'only_matching': True,
    }]

    def _extract_feed_info(self, provider_id, feed_id, filter_query, video_id, custom_fields=None, asset_types_query={}, account_id=None):
--- a/youtube_dl/extractor/twitch.py
+++ b/youtube_dl/extractor/twitch.py
@@ -4,10 +4,10 @@ from __future__ import unicode_literals
 import itertools
 import re
 import random
+import json

 from .common import InfoExtractor
 from ..compat import (
-    compat_HTTPError,
    compat_kwargs,
    compat_parse_qs,
    compat_str,
@@ -26,7 +26,6 @@ from ..utils import (
    try_get,
    unified_timestamp,
    update_url_query,
-    urlencode_postdata,
    url_or_none,
    urljoin,
 )
@@ -37,8 +36,9 @@ class TwitchBaseIE(InfoExtractor):

    _API_BASE = 'https://api.twitch.tv'
    _USHER_BASE = 'https://usher.ttvnw.net'
-    _LOGIN_URL = 'https://www.twitch.tv/login'
-    _CLIENT_ID = 'jzkbprff40iqj646a697cyrvl0zt2m6'
+    _LOGIN_FORM_URL = 'https://www.twitch.tv/login'
+    _LOGIN_POST_URL = 'https://passport.twitch.tv/login'
+    _CLIENT_ID = 'kimne78kx3ncx6brgo4mv6wki5h1ko'
    _NETRC_MACHINE = 'twitch'

    def _handle_error(self, response):
@@ -77,22 +77,21 @@ class TwitchBaseIE(InfoExtractor):
            page_url = urlh.geturl()
            post_url = self._search_regex(
                r'<form[^>]+action=(["\'])(?P<url>.+?)\1', page,
-                'post url', default=page_url, group='url')
+                'post url', default=self._LOGIN_POST_URL, group='url')
            post_url = urljoin(page_url, post_url)

-            headers = {'Referer': page_url}
+            headers = {
+                'Referer': page_url,
+                'Origin': page_url,
+                'Content-Type': 'text/plain;charset=UTF-8',
+            }

-            try:
-                response = self._download_json(
-                    post_url, None, note,
-                    data=urlencode_postdata(form),
-                    headers=headers)
-            except ExtractorError as e:
-                if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
-                    response = self._parse_json(
-                        e.cause.read().decode('utf-8'), None)
-                    fail(response.get('message') or response['errors'][0])
-                raise
+            response = self._download_json(
+                post_url, None, note, data=json.dumps(form).encode(),
+                headers=headers, expected_status=400)
+            error = response.get('error_description') or response.get('error_code')
+            if error:
+                fail(error)

            if 'Authenticated successfully' in response.get('message', ''):
                return None, None
@@ -105,7 +104,7 @@ class TwitchBaseIE(InfoExtractor):
                headers=headers)

        login_page, handle = self._download_webpage_handle(
-            self._LOGIN_URL, None, 'Downloading login page')
+            self._LOGIN_FORM_URL, None, 'Downloading login page')

        # Some TOR nodes and public proxies are blocked completely
        if 'blacklist_message' in login_page:
@@ -115,6 +114,7 @@ class TwitchBaseIE(InfoExtractor):
            login_page, handle, 'Logging in', {
                'username': username,
                'password': password,
+                'client_id': self._CLIENT_ID,
            })

        # Successful login
@@ -240,7 +240,7 @@ class TwitchVodIE(TwitchItemBaseIE):
    _VALID_URL = r'''(?x)
                    https?://
                        (?:
-                            (?:(?:www|go|m)\.)?twitch\.tv/(?:[^/]+/v|videos)/|
+                            (?:(?:www|go|m)\.)?twitch\.tv/(?:[^/]+/v(?:ideo)?|videos)/|
                            player\.twitch\.tv/\?.*?\bvideo=v
                        )
                        (?P<id>\d+)
@@ -296,6 +296,9 @@ class TwitchVodIE(TwitchItemBaseIE):
    }, {
        'url': 'https://m.twitch.tv/beagsandjam/v/247478721',
        'only_matching': True,
+    }, {
+        'url': 'https://www.twitch.tv/northernlion/video/291940395',
+        'only_matching': True,
    }]

    def _real_extract(self, url):
--- a/youtube_dl/extractor/viqeo.py
+++ b/youtube_dl/extractor/viqeo.py
@@ -0,0 +1,99 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    int_or_none,
+    str_or_none,
+    url_or_none,
+)
+
+
+class ViqeoIE(InfoExtractor):
+    _VALID_URL = r'''(?x)
+                        (?:
+                            viqeo:|
+                            https?://cdn\.viqeo\.tv/embed/*\?.*?\bvid=|
+                            https?://api\.viqeo\.tv/v\d+/data/startup?.*?\bvideo(?:%5B%5D|\[\])=
+                        )
+                        (?P<id>[\da-f]+)
+                    '''
+    _TESTS = [{
+        'url': 'https://cdn.viqeo.tv/embed/?vid=cde96f09d25f39bee837',
+        'md5': 'a169dd1a6426b350dca4296226f21e76',
+        'info_dict': {
+            'id': 'cde96f09d25f39bee837',
+            'ext': 'mp4',
+            'title': 'cde96f09d25f39bee837',
+            'thumbnail': r're:^https?://.*\.jpg$',
+            'duration': 76,
+        },
+    }, {
+        'url': 'viqeo:cde96f09d25f39bee837',
+        'only_matching': True,
+    }, {
+        'url': 'https://api.viqeo.tv/v1/data/startup?video%5B%5D=71bbec412ade45c3216c&profile=112',
+        'only_matching': True,
+    }]
+
+    @staticmethod
+    def _extract_urls(webpage):
+        return [
+            mobj.group('url')
+            for mobj in re.finditer(
+                r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//cdn\.viqeo\.tv/embed/*\?.*?\bvid=[\da-f]+.*?)\1',
+                webpage)]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(
+            'https://cdn.viqeo.tv/embed/?vid=%s' % video_id, video_id)
+
+        data = self._parse_json(
+            self._search_regex(
+                r'SLOT_DATA\s*=\s*({.+?})\s*;', webpage, 'slot data'),
+            video_id)
+
+        formats = []
+        thumbnails = []
+        for media_file in data['mediaFiles']:
+            if not isinstance(media_file, dict):
+                continue
+            media_url = url_or_none(media_file.get('url'))
+            if not media_url or not media_url.startswith(('http', '//')):
+                continue
+            media_type = str_or_none(media_file.get('type'))
+            if not media_type:
+                continue
+            media_kind = media_type.split('/')[0].lower()
+            f = {
+                'url': media_url,
+                'width': int_or_none(media_file.get('width')),
+                'height': int_or_none(media_file.get('height')),
+            }
+            format_id = str_or_none(media_file.get('quality'))
+            if media_kind == 'image':
+                f['id'] = format_id
+                thumbnails.append(f)
+            elif media_kind in ('video', 'audio'):
+                is_audio = media_kind == 'audio'
+                f.update({
+                    'format_id': 'audio' if is_audio else format_id,
+                    'fps': int_or_none(media_file.get('fps')),
+                    'vcodec': 'none' if is_audio else None,
+                })
+                formats.append(f)
+        self._sort_formats(formats)
+
+        duration = int_or_none(data.get('duration'))
+
+        return {
+            'id': video_id,
+            'title': video_id,
+            'duration': duration,
+            'thumbnails': thumbnails,
+            'formats': formats,
+        }
--- a/youtube_dl/extractor/watchbox.py
+++ b/youtube_dl/extractor/watchbox.py
@@ -10,6 +10,7 @@ from ..utils import (
    js_to_json,
    strip_or_none,
    try_get,
+    unescapeHTML,
    unified_timestamp,
 )

@@ -67,12 +68,20 @@ class WatchBoxIE(InfoExtractor):

        webpage = self._download_webpage(url, video_id)

-        source = (self._parse_json(
+        player_config = self._parse_json(
            self._search_regex(
-                r'playerConf\s*=\s*({.+?})\s*;', webpage, 'player config',
-                default='{}'),
-            video_id, transform_source=js_to_json,
-            fatal=False) or {}).get('source') or {}
+                r'data-player-conf=(["\'])(?P<data>{.+?})\1', webpage,
+                'player config', default='{}', group='data'),
+            video_id, transform_source=unescapeHTML, fatal=False)
+
+        if not player_config:
+            player_config = self._parse_json(
+                self._search_regex(
+                    r'playerConf\s*=\s*({.+?})\s*;', webpage, 'player config',
+                    default='{}'),
+                video_id, transform_source=js_to_json, fatal=False) or {}
+
+        source = player_config.get('source') or {}

        video_id = compat_str(source.get('videoId') or video_id)

--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,3 +1,3 @@
 from __future__ import unicode_literals

-__version__ = '2018.07.29'
+__version__ = '2018.08.04'
Author	SHA1	Message	Date
Sergey M․	81cc22bab6	release 2018.08.04	2018-08-04 01:23:24 +07:00
Sergey M․	20f96f64bd	[ChangeLog] Actualize [ci skip]	2018-08-04 01:21:23 +07:00
Sergey M․	af322eb830	[funk:channel] Improve byChannelAlias extraction (closes #17142 )	2018-08-04 00:26:58 +07:00
Sergey M․	cb1c3a3c07	[twitch] Update cliend id and modernize (closes #17126 )	2018-08-03 22:44:31 +07:00
Tim Broder	48afc6ca3e	[twitch] Fix authentication (closes #17024 )	2018-08-03 22:43:23 +07:00
Sergey M․	644921b372	[twitch:vod] Improve _VALID_URL (closes #17135 )	2018-08-02 23:16:15 +07:00
Sergey M․	19b9de13c4	[watchbox] Fix extraction (closes #17107 )	2018-07-30 23:28:44 +07:00
Sergey M․	6f2d82a5a0	[pbs] Fix extraction (closes #17109 )	2018-07-30 23:10:40 +07:00
Giuseppe Fabiano	7ff129d3ea	[theplatform] Relax _VALID_URL (closes #16181 )	2018-07-30 03:15:06 +07:00
Sergey M․	9d1b213845	[viqeo] Add extractor (closes #17066 )	2018-07-30 03:05:36 +07:00