mirror of
https://github.com/ytdl-org/youtube-dl.git
synced 2025-08-02 10:30:04 -05:00
Compare commits
35 Commits
2016.05.16
...
2016.05.21
Author | SHA1 | Date | |
---|---|---|---|
![]() |
c8cc3745fb | ||
![]() |
4c718d3c50 | ||
![]() |
115c65793a | ||
![]() |
661d46b28f | ||
![]() |
5ce3d5bd1b | ||
![]() |
612b5f403e | ||
![]() |
9f54e692d2 | ||
![]() |
7b2fcbfd4e | ||
![]() |
16da9bbc29 | ||
![]() |
c8602b2f9b | ||
![]() |
b219f5e51b | ||
![]() |
1846e9ade0 | ||
![]() |
6756602be6 | ||
![]() |
6c114b1210 | ||
![]() |
7ded6545ed | ||
![]() |
aa5957ac49 | ||
![]() |
64413f7563 | ||
![]() |
45f160a43c | ||
![]() |
36ca2c55db | ||
![]() |
f0c96af9cb | ||
![]() |
31a70191e7 | ||
![]() |
ad96b4c8f5 | ||
![]() |
043dc9d36f | ||
![]() |
52f7c75cff | ||
![]() |
f6e588afc0 | ||
![]() |
a001296703 | ||
![]() |
2cbd8c6781 | ||
![]() |
8585dc4cdc | ||
![]() |
dd81769c62 | ||
![]() |
46bc9b7d7c | ||
![]() |
b78531a36a | ||
![]() |
11e6a0b641 | ||
![]() |
15cda1ef77 | ||
![]() |
055f0d3d06 | ||
![]() |
cdd94c2eae |
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@@ -6,8 +6,8 @@
|
||||
|
||||
---
|
||||
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.05.16*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.05.16**
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.05.21*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.05.21**
|
||||
|
||||
### Before submitting an *issue* make sure you have:
|
||||
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||
@@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2016.05.16
|
||||
[debug] youtube-dl version 2016.05.21
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
2
Makefile
2
Makefile
@@ -1,7 +1,7 @@
|
||||
all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites
|
||||
|
||||
clean:
|
||||
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi *.mkv *.webm CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe
|
||||
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi *.mkv *.webm *.jpg *.png CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe
|
||||
find . -name "*.pyc" -delete
|
||||
find . -name "*.class" -delete
|
||||
|
||||
|
@@ -16,6 +16,8 @@
|
||||
- **9gag**
|
||||
- **abc.net.au**
|
||||
- **Abc7News**
|
||||
- **abcnews**
|
||||
- **abcnews:video**
|
||||
- **AcademicEarth:Course**
|
||||
- **acast**
|
||||
- **acast:channel**
|
||||
@@ -104,6 +106,7 @@
|
||||
- **CBCPlayer**
|
||||
- **CBS**
|
||||
- **CBSInteractive**
|
||||
- **CBSLocal**
|
||||
- **CBSNews**: CBS News
|
||||
- **CBSNewsLiveVideo**: CBS News Live Videos
|
||||
- **CBSSports**
|
||||
@@ -213,6 +216,7 @@
|
||||
- **Flickr**
|
||||
- **Folketinget**: Folketinget (ft.dk; Danish parliament)
|
||||
- **FootyRoom**
|
||||
- **Formula1**
|
||||
- **FOX**
|
||||
- **Foxgay**
|
||||
- **FoxNews**: Fox News and Fox Business Video
|
||||
@@ -316,6 +320,7 @@
|
||||
- **la7.tv**
|
||||
- **Laola1Tv**
|
||||
- **Le**: 乐视网
|
||||
- **Learnr**
|
||||
- **Lecture2Go**
|
||||
- **Lemonde**
|
||||
- **LePlaylist**
|
||||
@@ -331,6 +336,7 @@
|
||||
- **livestream**
|
||||
- **livestream:original**
|
||||
- **LnkGo**
|
||||
- **LocalNews8**
|
||||
- **LoveHomePorn**
|
||||
- **lrt.lt**
|
||||
- **lynda**: lynda.com videos
|
||||
@@ -556,6 +562,7 @@
|
||||
- **ScreenJunkies**
|
||||
- **ScreenwaveMedia**
|
||||
- **SenateISVP**
|
||||
- **SendtoNews**
|
||||
- **ServingSys**
|
||||
- **Sexu**
|
||||
- **Shahid**
|
||||
|
@@ -50,6 +50,8 @@ from youtube_dl.utils import (
|
||||
sanitize_path,
|
||||
prepend_extension,
|
||||
replace_extension,
|
||||
remove_start,
|
||||
remove_end,
|
||||
remove_quotes,
|
||||
shell_quote,
|
||||
smuggle_url,
|
||||
@@ -215,6 +217,16 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(replace_extension('.abc', 'temp'), '.abc.temp')
|
||||
self.assertEqual(replace_extension('.abc.ext', 'temp'), '.abc.temp')
|
||||
|
||||
def test_remove_start(self):
|
||||
self.assertEqual(remove_start(None, 'A - '), None)
|
||||
self.assertEqual(remove_start('A - B', 'A - '), 'B')
|
||||
self.assertEqual(remove_start('B - A', 'A - '), 'B - A')
|
||||
|
||||
def test_remove_end(self):
|
||||
self.assertEqual(remove_end(None, ' - B'), None)
|
||||
self.assertEqual(remove_end('A - B', ' - B'), 'A')
|
||||
self.assertEqual(remove_end('B - A', ' - B'), 'B - A')
|
||||
|
||||
def test_remove_quotes(self):
|
||||
self.assertEqual(remove_quotes(None), None)
|
||||
self.assertEqual(remove_quotes('"'), '"')
|
||||
|
135
youtube_dl/extractor/abcnews.py
Normal file
135
youtube_dl/extractor/abcnews.py
Normal file
@@ -0,0 +1,135 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import calendar
|
||||
import re
|
||||
import time
|
||||
|
||||
from .amp import AMPIE
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
|
||||
|
||||
class AbcNewsVideoIE(AMPIE):
|
||||
IE_NAME = 'abcnews:video'
|
||||
_VALID_URL = 'http://abcnews.go.com/[^/]+/video/(?P<display_id>[0-9a-z-]+)-(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://abcnews.go.com/ThisWeek/video/week-exclusive-irans-foreign-minister-zarif-20411932',
|
||||
'info_dict': {
|
||||
'id': '20411932',
|
||||
'ext': 'mp4',
|
||||
'display_id': 'week-exclusive-irans-foreign-minister-zarif',
|
||||
'title': '\'This Week\' Exclusive: Iran\'s Foreign Minister Zarif',
|
||||
'description': 'George Stephanopoulos goes one-on-one with Iranian Foreign Minister Dr. Javad Zarif.',
|
||||
'duration': 180,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://abcnews.go.com/2020/video/2020-husband-stands-teacher-jail-student-affairs-26119478',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
display_id = mobj.group('display_id')
|
||||
video_id = mobj.group('id')
|
||||
info_dict = self._extract_feed_info(
|
||||
'http://abcnews.go.com/video/itemfeed?id=%s' % video_id)
|
||||
info_dict.update({
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
})
|
||||
return info_dict
|
||||
|
||||
|
||||
class AbcNewsIE(InfoExtractor):
|
||||
IE_NAME = 'abcnews'
|
||||
_VALID_URL = 'https?://abcnews\.go\.com/(?:[^/]+/)+(?P<display_id>[0-9a-z-]+)/story\?id=(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://abcnews.go.com/Blotter/News/dramatic-video-rare-death-job-america/story?id=10498713#.UIhwosWHLjY',
|
||||
'info_dict': {
|
||||
'id': '10498713',
|
||||
'ext': 'flv',
|
||||
'display_id': 'dramatic-video-rare-death-job-america',
|
||||
'title': 'Occupational Hazards',
|
||||
'description': 'Nightline investigates the dangers that lurk at various jobs.',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'upload_date': '20100428',
|
||||
'timestamp': 1272412800,
|
||||
},
|
||||
'add_ie': ['AbcNewsVideo'],
|
||||
}, {
|
||||
'url': 'http://abcnews.go.com/Entertainment/justin-timberlake-performs-stop-feeling-eurovision-2016/story?id=39125818',
|
||||
'info_dict': {
|
||||
'id': '39125818',
|
||||
'ext': 'mp4',
|
||||
'display_id': 'justin-timberlake-performs-stop-feeling-eurovision-2016',
|
||||
'title': 'Justin Timberlake Drops Hints For Secret Single',
|
||||
'description': 'Lara Spencer reports the buzziest stories of the day in "GMA" Pop News.',
|
||||
'upload_date': '20160515',
|
||||
'timestamp': 1463329500,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
# The embedded YouTube video is blocked due to copyright issues
|
||||
'playlist_items': '1',
|
||||
},
|
||||
'add_ie': ['AbcNewsVideo'],
|
||||
}, {
|
||||
'url': 'http://abcnews.go.com/Technology/exclusive-apple-ceo-tim-cook-iphone-cracking-software/story?id=37173343',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
display_id = mobj.group('display_id')
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_url = self._search_regex(
|
||||
r'window\.abcnvideo\.url\s*=\s*"([^"]+)"', webpage, 'video URL')
|
||||
full_video_url = compat_urlparse.urljoin(url, video_url)
|
||||
|
||||
youtube_url = self._html_search_regex(
|
||||
r'<iframe[^>]+src="(https://www\.youtube\.com/embed/[^"]+)"',
|
||||
webpage, 'YouTube URL', default=None)
|
||||
|
||||
timestamp = None
|
||||
date_str = self._html_search_regex(
|
||||
r'<span[^>]+class="timestamp">([^<]+)</span>',
|
||||
webpage, 'timestamp', fatal=False)
|
||||
if date_str:
|
||||
tz_offset = 0
|
||||
if date_str.endswith(' ET'): # Eastern Time
|
||||
tz_offset = -5
|
||||
date_str = date_str[:-3]
|
||||
date_formats = ['%b. %d, %Y', '%b %d, %Y, %I:%M %p']
|
||||
for date_format in date_formats:
|
||||
try:
|
||||
timestamp = calendar.timegm(time.strptime(date_str.strip(), date_format))
|
||||
except ValueError:
|
||||
continue
|
||||
if timestamp is not None:
|
||||
timestamp -= tz_offset * 3600
|
||||
|
||||
entry = {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': AbcNewsVideoIE.ie_key(),
|
||||
'url': full_video_url,
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'timestamp': timestamp,
|
||||
}
|
||||
|
||||
if youtube_url:
|
||||
entries = [entry, self.url_result(youtube_url, 'Youtube')]
|
||||
return self.playlist_result(entries)
|
||||
|
||||
return entry
|
@@ -52,7 +52,7 @@ class AMPIE(InfoExtractor):
|
||||
for media_data in media_content:
|
||||
media = media_data['@attributes']
|
||||
media_type = media['type']
|
||||
if media_type == 'video/f4m':
|
||||
if media_type in ('video/f4m', 'application/f4m+xml'):
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
media['url'] + '?hdcore=3.4.0&plugin=aasp-3.4.0.132.124',
|
||||
video_id, f4m_id='hds', fatal=False))
|
||||
@@ -61,7 +61,7 @@ class AMPIE(InfoExtractor):
|
||||
media['url'], video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'format_id': media_data['media-category']['@attributes']['label'],
|
||||
'format_id': media_data.get('media-category', {}).get('@attributes', {}).get('label'),
|
||||
'url': media['url'],
|
||||
'tbr': int_or_none(media.get('bitrate')),
|
||||
'filesize': int_or_none(media.get('fileSize')),
|
||||
|
224
youtube_dl/extractor/anvato.py
Normal file
224
youtube_dl/extractor/anvato.py
Normal file
@@ -0,0 +1,224 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
import hashlib
|
||||
import json
|
||||
import random
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..aes import aes_encrypt
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
bytes_to_intlist,
|
||||
determine_ext,
|
||||
intlist_to_bytes,
|
||||
int_or_none,
|
||||
strip_jsonp,
|
||||
)
|
||||
|
||||
|
||||
def md5_text(s):
|
||||
if not isinstance(s, compat_str):
|
||||
s = compat_str(s)
|
||||
return hashlib.md5(s.encode('utf-8')).hexdigest()
|
||||
|
||||
|
||||
class AnvatoIE(InfoExtractor):
|
||||
# Copied from anvplayer.min.js
|
||||
_ANVACK_TABLE = {
|
||||
'nbcu_nbcd_desktop_web_prod_93d8ead38ce2024f8f544b78306fbd15895ae5e6': 'NNemUkySjxLyPTKvZRiGntBIjEyK8uqicjMakIaQ',
|
||||
'nbcu_nbcd_desktop_web_qa_1a6f01bdd0dc45a439043b694c8a031d': 'eSxJUbA2UUKBTXryyQ2d6NuM8oEqaPySvaPzfKNA',
|
||||
'nbcu_nbcd_desktop_web_acc_eb2ff240a5d4ae9a63d4c297c32716b6c523a129': '89JR3RtUGbvKuuJIiKOMK0SoarLb5MUx8v89RcbP',
|
||||
'nbcu_nbcd_watchvod_web_prod_e61107507180976724ec8e8319fe24ba5b4b60e1': 'Uc7dFt7MJ9GsBWB5T7iPvLaMSOt8BBxv4hAXk5vv',
|
||||
'nbcu_nbcd_watchvod_web_qa_42afedba88a36203db5a4c09a5ba29d045302232': 'T12oDYVFP2IaFvxkmYMy5dKxswpLHtGZa4ZAXEi7',
|
||||
'nbcu_nbcd_watchvod_web_acc_9193214448e2e636b0ffb78abacfd9c4f937c6ca': 'MmobcxUxMedUpohNWwXaOnMjlbiyTOBLL6d46ZpR',
|
||||
'nbcu_local_monitor_web_acc_f998ad54eaf26acd8ee033eb36f39a7b791c6335': 'QvfIoPYrwsjUCcASiw3AIkVtQob2LtJHfidp9iWg',
|
||||
'nbcu_cable_monitor_web_acc_a413759603e8bedfcd3c61b14767796e17834077': 'uwVPJLShvJWSs6sWEIuVem7MTF8A4IknMMzIlFto',
|
||||
'nbcu_nbcd_mcpstage_web_qa_4c43a8f6e95a88dbb40276c0630ba9f693a63a4e': 'PxVYZVwjhgd5TeoPRxL3whssb5OUPnM3zyAzq8GY',
|
||||
'nbcu_comcast_comcast_web_prod_074080762ad4ce956b26b43fb22abf153443a8c4': 'afnaRZfDyg1Z3WZHdupKfy6xrbAG2MHqe3VfuSwh',
|
||||
'nbcu_comcast_comcast_web_qa_706103bb93ead3ef70b1de12a0e95e3c4481ade0': 'DcjsVbX9b3uoPlhdriIiovgFQZVxpISZwz0cx1ZK',
|
||||
'nbcu_comcast_comcastcable_web_prod_669f04817536743563d7331c9293e59fbdbe3d07': '0RwMN2cWy10qhAhOscq3eK7aEe0wqnKt3vJ0WS4D',
|
||||
'nbcu_comcast_comcastcable_web_qa_3d9d2d66219094127f0f6b09cc3c7bb076e3e1ca': '2r8G9DEya7PCqBceKZgrn2XkXgASjwLMuaFE1Aad',
|
||||
'hearst_hearst_demo_web_stage_960726dfef3337059a01a78816e43b29ec04dfc7': 'cuZBPXTR6kSdoTCVXwk5KGA8rk3NrgGn4H6e9Dsp',
|
||||
'anvato_mcpqa_demo_web_stage_18b55e00db5a13faa8d03ae6e41f6f5bcb15b922': 'IOaaLQ8ymqVyem14QuAvE5SndQynTcH5CrLkU2Ih',
|
||||
'anvato_nextmedia_demo_web_stage_9787d56a02ff6b9f43e9a2b0920d8ca88beb5818': 'Pqu9zVzI1ApiIzbVA3VkGBEQHvdKSUuKpD6s2uaR',
|
||||
'anvato_scripps_app_web_prod_0837996dbe373629133857ae9eb72e740424d80a': 'du1ccmn7RxzgizwbWU7hyUaGodNlJn7HtXI0WgXW',
|
||||
'anvato_scripps_app_web_stage_360797e00fe2826be142155c4618cc52fce6c26c': '2PMrQ0BRoqCWl7nzphj0GouIMEh2mZYivAT0S1Su',
|
||||
'fs2go_fs2go_go_all_prod_21934911ccfafc03a075894ead2260d11e2ddd24': 'RcuHlKikW2IJw6HvVoEkqq2UsuEJlbEl11pWXs4Q',
|
||||
'fs2go_fs2go_go_web_prod_ead4b0eec7460c1a07783808db21b49cf1f2f9a7': '4K0HTT2u1zkQA2MaGaZmkLa1BthGSBdr7jllrhk5',
|
||||
'fs2go_fs2go_go_web_stage_407585454a4400355d4391691c67f361': 'ftnc37VKRJBmHfoGGi3kT05bHyeJzilEzhKJCyl3',
|
||||
'fs2go_fs2go_go_android_stage_44b714db6f8477f29afcba15a41e1d30': 'CtxpPvVpo6AbZGomYUhkKs7juHZwNml9b9J0J2gI',
|
||||
'anvato_cbslocal_app_web_prod_547f3e49241ef0e5d30c79b2efbca5d92c698f67': 'Pw0XX5KBDsyRnPS0R2JrSrXftsy8Jnz5pAjaYC8s',
|
||||
'anvato_cbslocal_app_web_stage_547a5f096594cd3e00620c6f825cad1096d28c80': '37OBUhX2uwNyKhhrNzSSNHSRPZpApC3trdqDBpuz',
|
||||
'fs2go_att_att_web_prod_1042dddd089a05438b6a08f972941176f699ffd8': 'JLcF20JwYvpv6uAGcLWIaV12jKwaL1R8us4b6Zkg',
|
||||
'fs2go_att_att_web_stage_807c5001955fc114a3331fe027ddc76e': 'gbu1oO1y0JiOFh4SUipt86P288JHpyjSqolrrT1x',
|
||||
'fs2go_fs2go_tudor_web_prod_a7dd8e5a7cdc830cae55eae6f3e9fee5ee49eb9b': 'ipcp87VCEZXPPe868j3orLqzc03oTy7DXsGkAXXH',
|
||||
'anvato_mhz_app_web_prod_b808218b30de7fdf60340cbd9831512bc1bf6d37': 'Stlm5Gs6BEhJLRTZHcNquyzxGqr23EuFmE5DCgjX',
|
||||
'fs2go_charter_charter_web_stage_c2c6e5a68375a1bf00fff213d3ff8f61a835a54c': 'Lz4hbJp1fwL6jlcz4M2PMzghM4jp4aAmybtT5dPc',
|
||||
'fs2go_charter_charter_web_prod_ebfe3b10f1af215a7321cd3d629e0b81dfa6fa8c': 'vUJsK345A1bVmyYDRhZX0lqFIgVXuqhmuyp1EtPK',
|
||||
'anvato_epfox_app_web_prod_b3373168e12f423f41504f207000188daf88251b': 'GDKq1ixvX3MoBNdU5IOYmYa2DTUXYOozPjrCJnW7',
|
||||
'anvato_epfox_app_web_stage_a3c2ce60f8f83ef374a88b68ee73a950f8ab87ce': '2jz2NH4BsXMaDsoJ5qkHMbcczAfIReo2eFYuVC1C',
|
||||
'fs2go_verizon_verizon_web_stage_08e6df0354a4803f1b1f2428b5a9a382e8dbcd62': 'rKTVapNaAcmnUbGL4ZcuOoY4SE7VmZSQsblPFr7e',
|
||||
'fs2go_verizon_verizon_web_prod_f909564cb606eff1f731b5e22e0928676732c445': 'qLSUuHerM3u9eNPzaHyUK52obai5MvE4XDJfqYe1',
|
||||
'fs2go_foxcom_synd_web_stage_f7b9091f00ea25a4fdaaae77fca5b54cdc7e7043': '96VKF2vLd24fFiDfwPFpzM5llFN4TiIGAlodE0Re',
|
||||
'fs2go_foxcom_synd_web_prod_0f2cdd64d87e4ab6a1d54aada0ff7a7c8387a064': 'agiPjbXEyEZUkbuhcnmVPhe9NNVbDjCFq2xkcx51',
|
||||
'anvato_own_app_web_stage_1214ade5d28422c4dae9d03c1243aba0563c4dba': 'mzhamNac3swG4WsJAiUTacnGIODi6SWeVWk5D7ho',
|
||||
'anvato_own_app_web_prod_944e162ed927ec3e9ed13eb68ed2f1008ee7565e': '9TSxh6G2TXOLBoYm9ro3LdNjjvnXpKb8UR8KoIP9',
|
||||
'anvato_scripps_app_ftv_prod_a10a10468edd5afb16fb48171c03b956176afad1': 'COJ2i2UIPK7xZqIWswxe7FaVBOVgRkP1F6O6qGoH',
|
||||
'anvato_scripps_app_ftv_stage_77d3ad2bdb021ec37ca2e35eb09acd396a974c9a': 'Q7nnopNLe2PPfGLOTYBqxSaRpl209IhqaEuDZi1F',
|
||||
'anvato_univision_app_web_stage_551236ef07a0e17718c3995c35586b5ed8cb5031': 'D92PoLS6UitwxDRA191HUGT9OYcOjV6mPMa5wNyo',
|
||||
'anvato_univision_app_web_prod_039a5c0a6009e637ae8ac906718a79911e0e65e1': '5mVS5u4SQjtw6NGw2uhMbKEIONIiLqRKck5RwQLR',
|
||||
'nbcu_cnbc_springfield_ios_prod_670207fae43d6e9a94c351688851a2ce': 'M7fqCCIP9lW53oJbHs19OlJlpDrVyc2OL8gNeuTa',
|
||||
'nbcu_cnbc_springfieldvod_ios_prod_7a5f04b1ceceb0e9c9e2264a44aa236e08e034c2': 'Yia6QbJahW0S7K1I0drksimhZb4UFq92xLBmmMvk',
|
||||
'anvato_cox_app_web_prod_ce45cda237969f93e7130f50ee8bb6280c1484ab': 'cc0miZexpFtdoqZGvdhfXsLy7FXjRAOgb9V0f5fZ',
|
||||
'anvato_cox_app_web_stage_c23dbe016a8e9d8c7101d10172b92434f6088bf9': 'yivU3MYHd2eDZcOfmLbINVtqxyecKTOp8OjOuoGJ',
|
||||
'anvato_chnzero_app_web_stage_b1164d1352b579e792e542fddf13ee34c0eeb46b': 'A76QkXMmVH8lTCfU15xva1mZnSVcqeY4Xb22Kp7m',
|
||||
'anvato_chnzero_app_web_prod_253d358928dc08ec161eda2389d53707288a730c': 'OA5QI3ZWZZkdtUEDqh28AH8GedsF6FqzJI32596b',
|
||||
'anvato_discovery_vodpoc_web_stage_9fa7077b5e8af1f8355f65d4fb8d2e0e9d54e2b7': 'q3oT191tTQ5g3JCP67PkjLASI9s16DuWZ6fYmry3',
|
||||
'anvato_discovery_vodpoc_web_prod_688614983167a1af6cdf6d76343fda10a65223c1': 'qRvRQCTVHd0VVOHsMvvfidyWmlYVrTbjby7WqIuK',
|
||||
'nbcu_cnbc_springfieldvod_ftv_stage_826040aad1925a46ac5dfb4b3c5143e648c6a30d': 'JQaSb5a8Tz0PT4ti329DNmzDO30TnngTHmvX8Vua',
|
||||
'nbcu_cnbc_springfield_ftv_stage_826040aad1925a46ac5dfb4b3c5143e648c6a30d': 'JQaSb5a8Tz0PT4ti329DNmzDO30TnngTHmvX8Vua',
|
||||
'nbcu_nbcd_capture_web_stage_4dd9d585bfb984ebf856dee35db027b2465cc4ae': '0j1Ov4Vopyi2HpBZJYdL2m8ERJVGYh3nNpzPiO8F',
|
||||
'nbcu_nbcd_watch3_android_prod_7712ca5fcf1c22f19ec1870a9650f9c37db22dcf': '3LN2UB3rPUAMu7ZriWkHky9vpLMXYha8JbSnxBlx',
|
||||
'nbcu_nbcd_watchvod3_android_prod_0910a3a4692d57c0b5ff4316075bc5d096be45b9': 'mJagcQ2II30vUOAauOXne7ERwbf5S9nlB3IP17lQ',
|
||||
'anvato_scripps_app_atv_prod_790deda22e16e71e83df58f880cd389908a45d52': 'CB6trI1mpoDIM5o54DNTsji90NDBQPZ4z4RqBNSH',
|
||||
'nbcu_nbcd_watchv4_android_prod_ff67cef9cb409158c6f8c3533edddadd0b750507': 'j8CHQCUWjlYERj4NFRmUYOND85QNbHViH09UwuKm',
|
||||
'nbcu_nbcd_watchvodv4_android_prod_a814d781609989dea6a629d50ae4c7ad8cc8e907': 'rkVnUXxdA9rawVLUlDQtMue9Y4Q7lFEaIotcUhjt',
|
||||
'rvVKpA50qlOPLFxMjrCGf5pdkdQDm7qn': '1J7ZkY5Qz5lMLi93QOH9IveE7EYB3rLl',
|
||||
'nbcu_dtv_local_web_prod_b266cf49defe255fd4426a97e27c09e513e9f82f': 'HuLnJDqzLa4saCzYMJ79zDRSQpEduw1TzjMNQu2b',
|
||||
'nbcu_att_local_web_prod_4cef038b2d969a6b7d700a56a599040b6a619f67': 'Q0Em5VDc2KpydUrVwzWRXAwoNBulWUxCq2faK0AV',
|
||||
'nbcu_dish_local_web_prod_c56dcaf2da2e9157a4266c82a78195f1dd570f6b': 'bC1LWmRz9ayj2AlzizeJ1HuhTfIaJGsDBnZNgoRg',
|
||||
'nbcu_verizon_local_web_prod_88bebd2ce006d4ed980de8133496f9a74cb9b3e1': 'wzhDKJZpgvUSS1EQvpCQP8Q59qVzcPixqDGJefSk',
|
||||
'nbcu_charter_local_web_prod_9ad90f7fc4023643bb718f0fe0fd5beea2382a50': 'PyNbxNhEWLzy1ZvWEQelRuIQY88Eub7xbSVRMdfT',
|
||||
'nbcu_suddenlink_local_web_prod_20fb711725cac224baa1c1cb0b1c324d25e97178': '0Rph41lPXZbb3fqeXtHjjbxfSrNbtZp1Ygq7Jypa',
|
||||
'nbcu_wow_local_web_prod_652d9ce4f552d9c2e7b5b1ed37b8cb48155174ad': 'qayIBZ70w1dItm2zS42AptXnxW15mkjRrwnBjMPv',
|
||||
'nbcu_centurylink_local_web_prod_2034402b029bf3e837ad46814d9e4b1d1345ccd5': 'StePcPMkjsX51PcizLdLRMzxMEl5k2FlsMLUNV4k',
|
||||
'nbcu_atlanticbrd_local_web_prod_8d5f5ecbf7f7b2f5e6d908dd75d90ae3565f682e': 'NtYLb4TFUS0pRs3XTkyO5sbVGYjVf17bVbjaGscI',
|
||||
'nbcu_nbcd_watchvod_web_dev_08bc05699be47c4f31d5080263a8cfadc16d0f7c': 'hwxi2dgDoSWgfmVVXOYZm14uuvku4QfopstXckhr',
|
||||
'anvato_nextmedia_app_web_prod_a4fa8c7204aa65e71044b57aaf63711980cfe5a0': 'tQN1oGPYY1nM85rJYePWGcIb92TG0gSqoVpQTWOw',
|
||||
'anvato_mcp_lin_web_prod_4c36fbfd4d8d8ecae6488656e21ac6d1ac972749': 'GUXNf5ZDX2jFUpu4WT2Go4DJ5nhUCzpnwDRRUx1K',
|
||||
'anvato_mcp_univision_web_prod_37fe34850c99a3b5cdb71dab10a417dd5cdecafa': 'bLDYF8JqfG42b7bwKEgQiU9E2LTIAtnKzSgYpFUH',
|
||||
'anvato_mcp_fs2go_web_prod_c7b90a93e171469cdca00a931211a2f556370d0a': 'icgGoYGipQMMSEvhplZX1pwbN69srwKYWksz3xWK',
|
||||
'anvato_mcp_sps_web_prod_54bdc90dd6ba21710e9f7074338365bba28da336': 'fA2iQdI7RDpynqzQYIpXALVS83NTPr8LLFK4LFsu',
|
||||
'anvato_mcp_anv_web_prod_791407490f4c1ef2a4bcb21103e0cb1bcb3352b3': 'rMOUZqe9lwcGq2mNgG3EDusm6lKgsUnczoOX3mbg',
|
||||
'anvato_mcp_gray_web_prod_4c10f067c393ed8fc453d3930f8ab2b159973900': 'rMOUZqe9lwcGq2mNgG3EDusm6lKgsUnczoOX3mbg',
|
||||
'anvato_mcp_hearst_web_prod_5356c3de0fc7c90a3727b4863ca7fec3a4524a99': 'P3uXJ0fXXditBPCGkfvlnVScpPEfKmc64Zv7ZgbK',
|
||||
'anvato_mcp_cbs_web_prod_02f26581ff80e5bda7aad28226a8d369037f2cbe': 'mGPvo5ZA5SgjOFAPEPXv7AnOpFUICX8hvFQVz69n',
|
||||
'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582': 'qyT6PXXLjVNCrHaRVj0ugAhalNRS7Ee9BP7LUokD',
|
||||
'nbcu_nbcd_watchvodv4_web_stage_4108362fba2d4ede21f262fea3c4162cbafd66c7': 'DhaU5lj0W2gEdcSSsnxURq8t7KIWtJfD966crVDk',
|
||||
'anvato_scripps_app_ios_prod_409c41960c60b308db43c3cc1da79cab9f1c3d93': 'WPxj5GraLTkYCyj3M7RozLqIycjrXOEcDGFMIJPn',
|
||||
'EZqvRyKBJLrgpClDPDF8I7Xpdp40Vx73': '4OxGd2dEakylntVKjKF0UK9PDPYB6A9W',
|
||||
'M2v78QkpleXm9hPp9jUXI63x5vA6BogR': 'ka6K32k7ZALmpINkjJUGUo0OE42Md1BQ',
|
||||
'nbcu_nbcd_desktop_web_prod_93d8ead38ce2024f8f544b78306fbd15895ae5e6_secure': 'NNemUkySjxLyPTKvZRiGntBIjEyK8uqicjMakIaQ'
|
||||
}
|
||||
|
||||
_AUTH_KEY = b'\x31\xc2\x42\x84\x9e\x73\xa0\xce'
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(AnvatoIE, self).__init__(*args, **kwargs)
|
||||
self.__server_time = None
|
||||
|
||||
def _server_time(self, access_key, video_id):
|
||||
if self.__server_time is not None:
|
||||
return self.__server_time
|
||||
|
||||
self.__server_time = int(self._download_json(
|
||||
self._api_prefix(access_key) + 'server_time?anvack=' + access_key, video_id,
|
||||
note='Fetching server time')['server_time'])
|
||||
|
||||
return self.__server_time
|
||||
|
||||
def _api_prefix(self, access_key):
|
||||
return 'https://tkx2-%s.anvato.net/rest/v2/' % ('prod' if 'prod' in access_key else 'stage')
|
||||
|
||||
def _get_video_json(self, access_key, video_id):
|
||||
# See et() in anvplayer.min.js, which is an alias of getVideoJSON()
|
||||
video_data_url = self._api_prefix(access_key) + 'mcp/video/%s?anvack=%s' % (video_id, access_key)
|
||||
server_time = self._server_time(access_key, video_id)
|
||||
input_data = '%d~%s~%s' % (server_time, md5_text(video_data_url), md5_text(server_time))
|
||||
|
||||
auth_secret = intlist_to_bytes(aes_encrypt(
|
||||
bytes_to_intlist(input_data[:64]), bytes_to_intlist(self._AUTH_KEY)))
|
||||
|
||||
video_data_url += '&X-Anvato-Adst-Auth=' + base64.b64encode(auth_secret).decode('ascii')
|
||||
anvrid = md5_text(time.time() * 1000 * random.random())[:30]
|
||||
payload = {
|
||||
'api': {
|
||||
'anvrid': anvrid,
|
||||
'anvstk': md5_text('%s|%s|%d|%s' % (
|
||||
access_key, anvrid, server_time, self._ANVACK_TABLE[access_key])),
|
||||
'anvts': server_time,
|
||||
},
|
||||
}
|
||||
|
||||
return self._download_json(
|
||||
video_data_url, video_id, transform_source=strip_jsonp,
|
||||
data=json.dumps(payload).encode('utf-8'))
|
||||
|
||||
def _extract_anvato_videos(self, webpage, video_id):
|
||||
anvplayer_data = self._parse_json(self._html_search_regex(
|
||||
r'<script[^>]+data-anvp=\'([^\']+)\'', webpage,
|
||||
'Anvato player data'), video_id)
|
||||
|
||||
video_id = anvplayer_data['video']
|
||||
access_key = anvplayer_data['accessKey']
|
||||
|
||||
video_data = self._get_video_json(access_key, video_id)
|
||||
|
||||
formats = []
|
||||
for published_url in video_data['published_urls']:
|
||||
video_url = published_url['embed_url']
|
||||
ext = determine_ext(video_url)
|
||||
|
||||
if ext == 'smil':
|
||||
formats.extend(self._extract_smil_formats(video_url, video_id))
|
||||
continue
|
||||
|
||||
tbr = int_or_none(published_url.get('kbps'))
|
||||
a_format = {
|
||||
'url': video_url,
|
||||
'format_id': ('-'.join(filter(None, ['http', published_url.get('cdn_name')]))).lower(),
|
||||
'tbr': tbr if tbr != 0 else None,
|
||||
}
|
||||
|
||||
if ext == 'm3u8':
|
||||
# Not using _extract_m3u8_formats here as individual media
|
||||
# playlists are also included in published_urls.
|
||||
if tbr is None:
|
||||
formats.append(self._m3u8_meta_format(video_url, ext='mp4', m3u8_id='hls'))
|
||||
continue
|
||||
else:
|
||||
a_format.update({
|
||||
'format_id': '-'.join(filter(None, ['hls', compat_str(tbr)])),
|
||||
'ext': 'mp4',
|
||||
})
|
||||
elif ext == 'mp3':
|
||||
a_format['vcodec'] = 'none'
|
||||
else:
|
||||
a_format.update({
|
||||
'width': int_or_none(published_url.get('width')),
|
||||
'height': int_or_none(published_url.get('height')),
|
||||
})
|
||||
formats.append(a_format)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
for caption in video_data.get('captions', []):
|
||||
a_caption = {
|
||||
'url': caption['url'],
|
||||
'ext': 'tt' if caption.get('format') == 'SMPTE-TT' else None
|
||||
}
|
||||
subtitles.setdefault(caption['language'], []).append(a_caption)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': video_data.get('def_title'),
|
||||
'description': video_data.get('def_description'),
|
||||
'categories': video_data.get('categories'),
|
||||
'thumbnail': video_data.get('thumbnail'),
|
||||
'subtitles': subtitles,
|
||||
}
|
@@ -444,6 +444,10 @@ class BrightcoveNewIE(InfoExtractor):
|
||||
# non numeric ref: prefixed video id
|
||||
'url': 'http://players.brightcove.net/710858724001/default_default/index.html?videoId=ref:event-stream-356',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# unavailable video without message but with error_code
|
||||
'url': 'http://players.brightcove.net/1305187701/c832abfb-641b-44eb-9da0-2fe76786505f_default/index.html?videoId=4377407326001',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
@@ -514,8 +518,9 @@ class BrightcoveNewIE(InfoExtractor):
|
||||
})
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||
json_data = self._parse_json(e.cause.read().decode(), video_id)
|
||||
raise ExtractorError(json_data[0]['message'], expected=True)
|
||||
json_data = self._parse_json(e.cause.read().decode(), video_id)[0]
|
||||
raise ExtractorError(
|
||||
json_data.get('message') or json_data['error_code'], expected=True)
|
||||
raise
|
||||
|
||||
title = json_data['name'].strip()
|
||||
|
@@ -4,65 +4,66 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import js_to_json
|
||||
from ..utils import (
|
||||
js_to_json,
|
||||
smuggle_url,
|
||||
)
|
||||
|
||||
|
||||
class CBCIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?cbc\.ca/(?:[^/]+/)+(?P<id>[^/?#]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?cbc\.ca/(?!player/)(?:[^/]+/)+(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
# with mediaId
|
||||
'url': 'http://www.cbc.ca/22minutes/videos/clips-season-23/don-cherry-play-offs',
|
||||
'md5': '97e24d09672fc4cf56256d6faa6c25bc',
|
||||
'info_dict': {
|
||||
'id': '2682904050',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Don Cherry – All-Stars',
|
||||
'description': 'Don Cherry has a bee in his bonnet about AHL player John Scott because that guy’s got heart.',
|
||||
'timestamp': 1454475540,
|
||||
'timestamp': 1454463000,
|
||||
'upload_date': '20160203',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
'uploader': 'CBCC-NEW',
|
||||
},
|
||||
}, {
|
||||
# with clipId
|
||||
'url': 'http://www.cbc.ca/archives/entry/1978-robin-williams-freestyles-on-90-minutes-live',
|
||||
'md5': '0274a90b51a9b4971fe005c63f592f12',
|
||||
'info_dict': {
|
||||
'id': '2487345465',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Robin Williams freestyles on 90 Minutes Live',
|
||||
'description': 'Wacky American comedian Robin Williams shows off his infamous "freestyle" comedic talents while being interviewed on CBC\'s 90 Minutes Live.',
|
||||
'upload_date': '19700101',
|
||||
'upload_date': '19780210',
|
||||
'uploader': 'CBCC-NEW',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
'timestamp': 255977160,
|
||||
},
|
||||
}, {
|
||||
# multiple iframes
|
||||
'url': 'http://www.cbc.ca/natureofthings/blog/birds-eye-view-from-vancouvers-burrard-street-bridge-how-we-got-the-shot',
|
||||
'playlist': [{
|
||||
'md5': '377572d0b49c4ce0c9ad77470e0b96b4',
|
||||
'info_dict': {
|
||||
'id': '2680832926',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'An Eagle\'s-Eye View Off Burrard Bridge',
|
||||
'description': 'Hercules the eagle flies from Vancouver\'s Burrard Bridge down to a nearby park with a mini-camera strapped to his back.',
|
||||
'upload_date': '19700101',
|
||||
'upload_date': '20160201',
|
||||
'timestamp': 1454342820,
|
||||
'uploader': 'CBCC-NEW',
|
||||
},
|
||||
}, {
|
||||
'md5': '415a0e3f586113894174dfb31aa5bb1a',
|
||||
'info_dict': {
|
||||
'id': '2658915080',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Fly like an eagle!',
|
||||
'description': 'Eagle equipped with a mini camera flies from the world\'s tallest tower',
|
||||
'upload_date': '19700101',
|
||||
'upload_date': '20150315',
|
||||
'timestamp': 1426443984,
|
||||
'uploader': 'CBCC-NEW',
|
||||
},
|
||||
}],
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
@@ -91,24 +92,54 @@ class CBCIE(InfoExtractor):
|
||||
|
||||
class CBCPlayerIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:cbcplayer:|https?://(?:www\.)?cbc\.ca/(?:player/play/|i/caffeine/syndicate/\?mediaId=))(?P<id>\d+)'
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.cbc.ca/player/play/2683190193',
|
||||
'md5': '64d25f841ddf4ddb28a235338af32e2c',
|
||||
'info_dict': {
|
||||
'id': '2683190193',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Gerry Runs a Sweat Shop',
|
||||
'description': 'md5:b457e1c01e8ff408d9d801c1c2cd29b0',
|
||||
'timestamp': 1455067800,
|
||||
'timestamp': 1455071400,
|
||||
'upload_date': '20160210',
|
||||
'uploader': 'CBCC-NEW',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}, {
|
||||
# Redirected from http://www.cbc.ca/player/AudioMobile/All%20in%20a%20Weekend%20Montreal/ID/2657632011/
|
||||
'url': 'http://www.cbc.ca/player/play/2657631896',
|
||||
'md5': 'e5e708c34ae6fca156aafe17c43e8b75',
|
||||
'info_dict': {
|
||||
'id': '2657631896',
|
||||
'ext': 'mp3',
|
||||
'title': 'CBC Montreal is organizing its first ever community hackathon!',
|
||||
'description': 'The modern technology we tend to depend on so heavily, is never without it\'s share of hiccups and headaches. Next weekend - CBC Montreal will be getting members of the public for its first Hackathon.',
|
||||
'timestamp': 1425704400,
|
||||
'upload_date': '20150307',
|
||||
'uploader': 'CBCC-NEW',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
# available only when we add `formats=MPEG4,FLV,MP3` to theplatform url
|
||||
'url': 'http://www.cbc.ca/player/play/2164402062',
|
||||
'md5': '17a61eb813539abea40618d6323a7f82',
|
||||
'info_dict': {
|
||||
'id': '2164402062',
|
||||
'ext': 'flv',
|
||||
'title': 'Cancer survivor four times over',
|
||||
'description': 'Tim Mayer has beaten three different forms of cancer four times in five years.',
|
||||
'timestamp': 1320410746,
|
||||
'upload_date': '20111104',
|
||||
'uploader': 'CBCC-NEW',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
return self.url_result(
|
||||
'http://feed.theplatform.com/f/ExhSPC/vms_5akSXx4Ng_Zn?byGuid=%s' % video_id,
|
||||
'ThePlatformFeed', video_id)
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'ThePlatform',
|
||||
'url': smuggle_url(
|
||||
'http://link.theplatform.com/s/ExhSPC/media/guid/2655402169/%s?mbr=true&formats=MPEG4,FLV,MP3' % video_id, {
|
||||
'force_smil_url': True
|
||||
}),
|
||||
'id': video_id,
|
||||
}
|
||||
|
84
youtube_dl/extractor/cbslocal.py
Normal file
84
youtube_dl/extractor/cbslocal.py
Normal file
@@ -0,0 +1,84 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import calendar
|
||||
import datetime
|
||||
|
||||
from .anvato import AnvatoIE
|
||||
from .sendtonews import SendtoNewsIE
|
||||
from ..compat import compat_urlparse
|
||||
|
||||
|
||||
class CBSLocalIE(AnvatoIE):
|
||||
_VALID_URL = r'https?://[a-z]+\.cbslocal\.com/\d+/\d+/\d+/(?P<id>[0-9a-z-]+)'
|
||||
|
||||
_TESTS = [{
|
||||
# Anvato backend
|
||||
'url': 'http://losangeles.cbslocal.com/2016/05/16/safety-advocates-say-fatal-car-seat-failures-are-public-health-crisis',
|
||||
'md5': 'f0ee3081e3843f575fccef901199b212',
|
||||
'info_dict': {
|
||||
'id': '3401037',
|
||||
'ext': 'mp4',
|
||||
'title': 'Safety Advocates Say Fatal Car Seat Failures Are \'Public Health Crisis\'',
|
||||
'description': 'Collapsing seats have been the focus of scrutiny for decades, though experts say remarkably little has been done to address the issue. Randy Paige reports.',
|
||||
'thumbnail': 're:^https?://.*',
|
||||
'timestamp': 1463440500,
|
||||
'upload_date': '20160516',
|
||||
'subtitles': {
|
||||
'en': 'mincount:5',
|
||||
},
|
||||
'categories': [
|
||||
'Stations\\Spoken Word\\KCBSTV',
|
||||
'Syndication\\MSN',
|
||||
'Syndication\\NDN',
|
||||
'Syndication\\AOL',
|
||||
'Syndication\\Yahoo',
|
||||
'Syndication\\Tribune',
|
||||
'Syndication\\Curb.tv',
|
||||
'Content\\News'
|
||||
],
|
||||
},
|
||||
}, {
|
||||
# SendtoNews embed
|
||||
'url': 'http://cleveland.cbslocal.com/2016/05/16/indians-score-season-high-15-runs-in-blowout-win-over-reds-rapid-reaction/',
|
||||
'info_dict': {
|
||||
'id': 'GxfCe0Zo7D-175909-5588',
|
||||
'ext': 'mp4',
|
||||
'title': 'Recap: CLE 15, CIN 6',
|
||||
'description': '5/16/16: Indians\' bats explode for 15 runs in a win',
|
||||
'upload_date': '20160516',
|
||||
'timestamp': 1463433840,
|
||||
'duration': 49,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
sendtonews_url = SendtoNewsIE._extract_url(webpage)
|
||||
if sendtonews_url:
|
||||
info_dict = {
|
||||
'_type': 'url_transparent',
|
||||
'url': compat_urlparse.urljoin(url, sendtonews_url),
|
||||
}
|
||||
else:
|
||||
info_dict = self._extract_anvato_videos(webpage, display_id)
|
||||
|
||||
time_str = self._html_search_regex(
|
||||
r'class="entry-date">([^<]+)<', webpage, 'released date', fatal=False)
|
||||
timestamp = None
|
||||
if time_str:
|
||||
timestamp = calendar.timegm(datetime.datetime.strptime(
|
||||
time_str, '%b %d, %Y %I:%M %p').timetuple())
|
||||
|
||||
info_dict.update({
|
||||
'display_id': display_id,
|
||||
'timestamp': timestamp,
|
||||
})
|
||||
|
||||
return info_dict
|
@@ -1058,12 +1058,8 @@ class InfoExtractor(object):
|
||||
})
|
||||
return formats
|
||||
|
||||
def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None,
|
||||
entry_protocol='m3u8', preference=None,
|
||||
m3u8_id=None, note=None, errnote=None,
|
||||
fatal=True, live=False):
|
||||
|
||||
formats = [{
|
||||
def _m3u8_meta_format(self, m3u8_url, ext=None, preference=None, m3u8_id=None):
|
||||
return {
|
||||
'format_id': '-'.join(filter(None, [m3u8_id, 'meta'])),
|
||||
'url': m3u8_url,
|
||||
'ext': ext,
|
||||
@@ -1071,7 +1067,14 @@ class InfoExtractor(object):
|
||||
'preference': preference - 1 if preference else -1,
|
||||
'resolution': 'multiple',
|
||||
'format_note': 'Quality selection URL',
|
||||
}]
|
||||
}
|
||||
|
||||
def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None,
|
||||
entry_protocol='m3u8', preference=None,
|
||||
m3u8_id=None, note=None, errnote=None,
|
||||
fatal=True, live=False):
|
||||
|
||||
formats = [self._m3u8_meta_format(m3u8_url, ext, preference, m3u8_id)]
|
||||
|
||||
format_url = lambda u: (
|
||||
u
|
||||
@@ -1138,7 +1141,7 @@ class InfoExtractor(object):
|
||||
format_id = []
|
||||
if m3u8_id:
|
||||
format_id.append(m3u8_id)
|
||||
last_media_name = last_media.get('NAME') if last_media and last_media.get('TYPE') != 'SUBTITLES' else None
|
||||
last_media_name = last_media.get('NAME') if last_media and last_media.get('TYPE') not in ('SUBTITLES', 'CLOSED-CAPTIONS') else None
|
||||
# Despite specification does not mention NAME attribute for
|
||||
# EXT-X-STREAM-INF it still sometimes may be present
|
||||
stream_name = last_info.get('NAME') or last_media_name
|
||||
@@ -1278,21 +1281,21 @@ class InfoExtractor(object):
|
||||
m3u8_count = 0
|
||||
|
||||
srcs = []
|
||||
videos = smil.findall(self._xpath_ns('.//video', namespace))
|
||||
for video in videos:
|
||||
src = video.get('src')
|
||||
media = smil.findall(self._xpath_ns('.//video', namespace)) + smil.findall(self._xpath_ns('.//audio', namespace))
|
||||
for medium in media:
|
||||
src = medium.get('src')
|
||||
if not src or src in srcs:
|
||||
continue
|
||||
srcs.append(src)
|
||||
|
||||
bitrate = float_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
|
||||
filesize = int_or_none(video.get('size') or video.get('fileSize'))
|
||||
width = int_or_none(video.get('width'))
|
||||
height = int_or_none(video.get('height'))
|
||||
proto = video.get('proto')
|
||||
ext = video.get('ext')
|
||||
bitrate = float_or_none(medium.get('system-bitrate') or medium.get('systemBitrate'), 1000)
|
||||
filesize = int_or_none(medium.get('size') or medium.get('fileSize'))
|
||||
width = int_or_none(medium.get('width'))
|
||||
height = int_or_none(medium.get('height'))
|
||||
proto = medium.get('proto')
|
||||
ext = medium.get('ext')
|
||||
src_ext = determine_ext(src)
|
||||
streamer = video.get('streamer') or base
|
||||
streamer = medium.get('streamer') or base
|
||||
|
||||
if proto == 'rtmp' or streamer.startswith('rtmp'):
|
||||
rtmp_count += 1
|
||||
|
@@ -3,6 +3,10 @@ from __future__ import unicode_literals
|
||||
|
||||
from .abc import ABCIE
|
||||
from .abc7news import Abc7NewsIE
|
||||
from .abcnews import (
|
||||
AbcNewsIE,
|
||||
AbcNewsVideoIE,
|
||||
)
|
||||
from .academicearth import AcademicEarthCourseIE
|
||||
from .acast import (
|
||||
ACastIE,
|
||||
@@ -107,6 +111,7 @@ from .cbc import (
|
||||
CBCPlayerIE,
|
||||
)
|
||||
from .cbs import CBSIE
|
||||
from .cbslocal import CBSLocalIE
|
||||
from .cbsinteractive import CBSInteractiveIE
|
||||
from .cbsnews import (
|
||||
CBSNewsIE,
|
||||
@@ -238,6 +243,7 @@ from .fktv import FKTVIE
|
||||
from .flickr import FlickrIE
|
||||
from .folketinget import FolketingetIE
|
||||
from .footyroom import FootyRoomIE
|
||||
from .formula1 import Formula1IE
|
||||
from .fourtube import FourTubeIE
|
||||
from .fox import FOXIE
|
||||
from .foxgay import FoxgayIE
|
||||
@@ -365,6 +371,7 @@ from .kuwo import (
|
||||
)
|
||||
from .la7 import LA7IE
|
||||
from .laola1tv import Laola1TvIE
|
||||
from .learnr import LearnrIE
|
||||
from .lecture2go import Lecture2GoIE
|
||||
from .lemonde import LemondeIE
|
||||
from .leeco import (
|
||||
@@ -390,6 +397,7 @@ from .livestream import (
|
||||
LivestreamShortenerIE,
|
||||
)
|
||||
from .lnkgo import LnkGoIE
|
||||
from .localnews8 import LocalNews8IE
|
||||
from .lovehomeporn import LoveHomePornIE
|
||||
from .lrt import LRTIE
|
||||
from .lynda import (
|
||||
@@ -663,6 +671,7 @@ from .screencastomatic import ScreencastOMaticIE
|
||||
from .screenjunkies import ScreenJunkiesIE
|
||||
from .screenwavemedia import ScreenwaveMediaIE, TeamFourIE
|
||||
from .senateisvp import SenateISVPIE
|
||||
from .sendtonews import SendtoNewsIE
|
||||
from .servingsys import ServingSysIE
|
||||
from .sexu import SexuIE
|
||||
from .shahid import ShahidIE
|
||||
|
25
youtube_dl/extractor/formula1.py
Normal file
25
youtube_dl/extractor/formula1.py
Normal file
@@ -0,0 +1,25 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class Formula1IE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?formula1\.com/content/fom-website/en/video/\d{4}/\d{1,2}/(?P<id>.+?)\.html'
|
||||
_TEST = {
|
||||
'url': 'http://www.formula1.com/content/fom-website/en/video/2016/5/Race_highlights_-_Spain_2016.html',
|
||||
'md5': '8c79e54be72078b26b89e0e111c0502b',
|
||||
'info_dict': {
|
||||
'id': 'JvYXJpMzE6pArfHWm5ARp5AiUmD-gibV',
|
||||
'ext': 'flv',
|
||||
'title': 'Race highlights - Spain 2016',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
ooyala_embed_code = self._search_regex(
|
||||
r'data-videoid="([^"]+)"', webpage, 'ooyala embed code')
|
||||
return self.url_result(
|
||||
'ooyala:%s' % ooyala_embed_code, 'Ooyala', ooyala_embed_code)
|
@@ -717,15 +717,18 @@ class GenericIE(InfoExtractor):
|
||||
},
|
||||
# Wistia embed
|
||||
{
|
||||
'url': 'http://education-portal.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
|
||||
'md5': '8788b683c777a5cf25621eaf286d0c23',
|
||||
'url': 'http://study.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
|
||||
'md5': '1953f3a698ab51cfc948ed3992a0b7ff',
|
||||
'info_dict': {
|
||||
'id': '1cfaf6b7ea',
|
||||
'id': '6e2wtrbdaf',
|
||||
'ext': 'mov',
|
||||
'title': 'md5:51364a8d3d009997ba99656004b5e20d',
|
||||
'duration': 643.0,
|
||||
'filesize': 182808282,
|
||||
'uploader': 'education-portal.com',
|
||||
'title': 'paywall_north-american-exploration-failed-colonies-of-spain-france-england',
|
||||
'description': 'a Paywall Videos video from Remilon',
|
||||
'duration': 644.072,
|
||||
'uploader': 'study.com',
|
||||
'timestamp': 1459678540,
|
||||
'upload_date': '20160403',
|
||||
'filesize': 24687186,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -734,14 +737,30 @@ class GenericIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': 'uxjb0lwrcz',
|
||||
'ext': 'mp4',
|
||||
'title': 'Conversation about Hexagonal Rails Part 1 - ThoughtWorks',
|
||||
'title': 'Conversation about Hexagonal Rails Part 1',
|
||||
'description': 'a Martin Fowler video from ThoughtWorks',
|
||||
'duration': 1715.0,
|
||||
'uploader': 'thoughtworks.wistia.com',
|
||||
'upload_date': '20140603',
|
||||
'timestamp': 1401832161,
|
||||
'upload_date': '20140603',
|
||||
},
|
||||
},
|
||||
# Wistia standard embed (async)
|
||||
{
|
||||
'url': 'https://www.getdrip.com/university/brennan-dunn-drip-workshop/',
|
||||
'info_dict': {
|
||||
'id': '807fafadvk',
|
||||
'ext': 'mp4',
|
||||
'title': 'Drip Brennan Dunn Workshop',
|
||||
'description': 'a JV Webinars video from getdrip-1',
|
||||
'duration': 4986.95,
|
||||
'timestamp': 1463607249,
|
||||
'upload_date': '20160518',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
# Soundcloud embed
|
||||
{
|
||||
'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/',
|
||||
@@ -1548,21 +1567,26 @@ class GenericIE(InfoExtractor):
|
||||
'url': embed_url,
|
||||
'ie_key': 'Wistia',
|
||||
'uploader': video_uploader,
|
||||
'title': video_title,
|
||||
'id': video_id,
|
||||
}
|
||||
|
||||
match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
|
||||
if match:
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': 'http://fast.wistia.net/embed/iframe/{0:}'.format(match.group('id')),
|
||||
'url': 'wistia:%s' % match.group('id'),
|
||||
'ie_key': 'Wistia',
|
||||
'uploader': video_uploader,
|
||||
'title': video_title,
|
||||
'id': match.group('id')
|
||||
}
|
||||
|
||||
match = re.search(
|
||||
r'''(?sx)
|
||||
<script[^>]+src=(["'])(?:https?:)?//fast\.wistia\.com/assets/external/E-v1\.js\1[^>]*>.*?
|
||||
<div[^>]+class=(["']).*?\bwistia_async_(?P<id>[a-z0-9]+)\b.*?\2
|
||||
''', webpage)
|
||||
if match:
|
||||
return self.url_result(self._proto_relative_url(
|
||||
'wistia:%s' % match.group('id')), 'Wistia')
|
||||
|
||||
# Look for SVT player
|
||||
svt_url = SVTIE._extract_url(webpage)
|
||||
if svt_url:
|
||||
|
@@ -5,33 +5,50 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class JWPlatformBaseIE(InfoExtractor):
|
||||
def _parse_jwplayer_data(self, jwplayer_data, video_id, require_title=True):
|
||||
def _parse_jwplayer_data(self, jwplayer_data, video_id, require_title=True, m3u8_id=None, rtmp_params=None):
|
||||
video_data = jwplayer_data['playlist'][0]
|
||||
|
||||
formats = []
|
||||
for source in video_data['sources']:
|
||||
source_url = self._proto_relative_url(source['file'])
|
||||
source_type = source.get('type') or ''
|
||||
if source_type in ('application/vnd.apple.mpegurl', 'hls'):
|
||||
if source_type in ('application/vnd.apple.mpegurl', 'hls') or determine_ext(source_url) == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
source_url, video_id, 'mp4', 'm3u8_native', fatal=False))
|
||||
source_url, video_id, 'mp4', 'm3u8_native', m3u8_id=m3u8_id, fatal=False))
|
||||
elif source_type.startswith('audio'):
|
||||
formats.append({
|
||||
'url': source_url,
|
||||
'vcodec': 'none',
|
||||
})
|
||||
else:
|
||||
formats.append({
|
||||
a_format = {
|
||||
'url': source_url,
|
||||
'width': int_or_none(source.get('width')),
|
||||
'height': int_or_none(source.get('height')),
|
||||
})
|
||||
}
|
||||
if source_url.startswith('rtmp'):
|
||||
a_format['ext'] = 'flv',
|
||||
|
||||
# See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as
|
||||
# of jwplayer.flash.swf
|
||||
rtmp_url_parts = re.split(
|
||||
r'((?:mp4|mp3|flv):)', source_url, 1)
|
||||
if len(rtmp_url_parts) == 3:
|
||||
rtmp_url, prefix, play_path = rtmp_url_parts
|
||||
a_format.update({
|
||||
'url': rtmp_url,
|
||||
'play_path': prefix + play_path,
|
||||
})
|
||||
if rtmp_params:
|
||||
a_format.update(rtmp_params)
|
||||
formats.append(a_format)
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
|
33
youtube_dl/extractor/learnr.py
Normal file
33
youtube_dl/extractor/learnr.py
Normal file
@@ -0,0 +1,33 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class LearnrIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?learnr\.pro/view/video/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.learnr.pro/view/video/51624-web-development-tutorial-for-beginners-1-how-to-build-webpages-with-html-css-javascript',
|
||||
'md5': '3719fdf0a68397f49899e82c308a89de',
|
||||
'info_dict': {
|
||||
'id': '51624',
|
||||
'ext': 'mp4',
|
||||
'title': 'Web Development Tutorial for Beginners (#1) - How to build webpages with HTML, CSS, Javascript',
|
||||
'description': 'md5:b36dbfa92350176cdf12b4d388485503',
|
||||
'uploader': 'LearnCode.academy',
|
||||
'uploader_id': 'learncodeacademy',
|
||||
'upload_date': '20131021',
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': self._search_regex(
|
||||
r"videoId\s*:\s*'([^']+)'", webpage, 'youtube id'),
|
||||
'id': video_id,
|
||||
}
|
47
youtube_dl/extractor/localnews8.py
Normal file
47
youtube_dl/extractor/localnews8.py
Normal file
@@ -0,0 +1,47 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class LocalNews8IE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?localnews8\.com/(?:[^/]+/)*(?P<display_id>[^/]+)/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.localnews8.com/news/rexburg-business-turns-carbon-fiber-scraps-into-wedding-rings/35183304',
|
||||
'md5': 'be4d48aea61aa2bde7be2ee47691ad20',
|
||||
'info_dict': {
|
||||
'id': '35183304',
|
||||
'display_id': 'rexburg-business-turns-carbon-fiber-scraps-into-wedding-rings',
|
||||
'ext': 'mp4',
|
||||
'title': 'Rexburg business turns carbon fiber scraps into wedding ring',
|
||||
'description': 'The process was first invented by Lamborghini and less than a dozen companies around the world use it.',
|
||||
'duration': 153,
|
||||
'timestamp': 1441844822,
|
||||
'upload_date': '20150910',
|
||||
'uploader_id': 'api',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id')
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
partner_id = self._search_regex(
|
||||
r'partnerId\s*[:=]\s*(["\'])(?P<id>\d+)\1',
|
||||
webpage, 'partner id', group='id')
|
||||
kaltura_id = self._search_regex(
|
||||
r'videoIdString\s*[:=]\s*(["\'])kaltura:(?P<id>[0-9a-z_]+)\1',
|
||||
webpage, 'videl id', group='id')
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': 'kaltura:%s:%s' % (partner_id, kaltura_id),
|
||||
'ie_key': 'Kaltura',
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
}
|
@@ -1,19 +1,18 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
month_by_name,
|
||||
int_or_none,
|
||||
remove_end,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class NDTVIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://(?:www\.)?ndtv\.com/video/player/[^/]*/[^/]*/(?P<id>[a-z0-9]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?ndtv\.com/video/(?:[^/]+/)+[^/?^&]+-(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.ndtv.com/video/player/news/ndtv-exclusive-don-t-need-character-certificate-from-rahul-gandhi-says-arvind-kejriwal/300710',
|
||||
'url': 'http://www.ndtv.com/video/news/news/ndtv-exclusive-don-t-need-character-certificate-from-rahul-gandhi-says-arvind-kejriwal-300710',
|
||||
'md5': '39f992dbe5fb531c395d8bbedb1e5e88',
|
||||
'info_dict': {
|
||||
'id': '300710',
|
||||
@@ -22,7 +21,7 @@ class NDTVIE(InfoExtractor):
|
||||
'description': 'md5:ab2d4b4a6056c5cb4caa6d729deabf02',
|
||||
'upload_date': '20131208',
|
||||
'duration': 1327,
|
||||
'thumbnail': 'http://i.ndtvimg.com/video/images/vod/medium/2013-12/big_300710_1386518307.jpg',
|
||||
'thumbnail': 're:https?://.*\.jpg',
|
||||
},
|
||||
}
|
||||
|
||||
@@ -30,36 +29,19 @@ class NDTVIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = remove_end(self._og_search_title(webpage), ' - NDTV')
|
||||
|
||||
filename = self._search_regex(
|
||||
r"__filename='([^']+)'", webpage, 'video filename')
|
||||
video_url = ('http://bitcast-b.bitgravity.com/ndtvod/23372/ndtv/%s' %
|
||||
filename)
|
||||
video_url = 'http://bitcast-b.bitgravity.com/ndtvod/23372/ndtv/%s' % filename
|
||||
|
||||
duration = int_or_none(self._search_regex(
|
||||
r"__duration='([^']+)'", webpage, 'duration', fatal=False))
|
||||
|
||||
date_m = re.search(r'''(?x)
|
||||
<p\s+class="vod_dateline">\s*
|
||||
Published\s+On:\s*
|
||||
(?P<monthname>[A-Za-z]+)\s+(?P<day>[0-9]+),\s*(?P<year>[0-9]+)
|
||||
''', webpage)
|
||||
upload_date = None
|
||||
upload_date = unified_strdate(self._html_search_meta(
|
||||
'publish-date', webpage, 'upload date', fatal=False))
|
||||
|
||||
if date_m is not None:
|
||||
month = month_by_name(date_m.group('monthname'))
|
||||
if month is not None:
|
||||
upload_date = '%s%02d%02d' % (
|
||||
date_m.group('year'), month, int(date_m.group('day')))
|
||||
|
||||
description = self._og_search_description(webpage)
|
||||
READ_MORE = ' (Read more)'
|
||||
if description.endswith(READ_MORE):
|
||||
description = description[:-len(READ_MORE)]
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
TITLE_SUFFIX = ' - NDTV'
|
||||
if title.endswith(TITLE_SUFFIX):
|
||||
title = title[:-len(TITLE_SUFFIX)]
|
||||
description = remove_end(self._og_search_description(webpage), ' (Read more)')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@@ -2,8 +2,12 @@ from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
sanitized_Request,
|
||||
clean_html,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
qualities,
|
||||
urlencode_postdata,
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
|
||||
@@ -16,12 +20,12 @@ class NFBIE(InfoExtractor):
|
||||
'url': 'https://www.nfb.ca/film/qallunaat_why_white_people_are_funny',
|
||||
'info_dict': {
|
||||
'id': 'qallunaat_why_white_people_are_funny',
|
||||
'ext': 'mp4',
|
||||
'ext': 'flv',
|
||||
'title': 'Qallunaat! Why White People Are Funny ',
|
||||
'description': 'md5:836d8aff55e087d04d9f6df554d4e038',
|
||||
'description': 'md5:6b8e32dde3abf91e58857b174916620c',
|
||||
'duration': 3128,
|
||||
'creator': 'Mark Sandiford',
|
||||
'uploader': 'Mark Sandiford',
|
||||
'uploader_id': 'mark-sandiford',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
@@ -31,65 +35,78 @@ class NFBIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
page = self._download_webpage(
|
||||
'https://www.nfb.ca/film/%s' % video_id, video_id,
|
||||
'Downloading film page')
|
||||
|
||||
uploader_id = self._html_search_regex(r'<a class="director-link" href="/explore-all-directors/([^/]+)/"',
|
||||
page, 'director id', fatal=False)
|
||||
uploader = self._html_search_regex(r'<em class="director-name" itemprop="name">([^<]+)</em>',
|
||||
page, 'director name', fatal=False)
|
||||
|
||||
request = sanitized_Request(
|
||||
config = self._download_xml(
|
||||
'https://www.nfb.ca/film/%s/player_config' % video_id,
|
||||
urlencode_postdata({'getConfig': 'true'}))
|
||||
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
request.add_header('X-NFB-Referer', 'http://www.nfb.ca/medias/flash/NFBVideoPlayer.swf')
|
||||
video_id, 'Downloading player config XML',
|
||||
data=urlencode_postdata({'getConfig': 'true'}),
|
||||
headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
'X-NFB-Referer': 'http://www.nfb.ca/medias/flash/NFBVideoPlayer.swf'
|
||||
})
|
||||
|
||||
config = self._download_xml(request, video_id, 'Downloading player config XML')
|
||||
|
||||
title = None
|
||||
description = None
|
||||
thumbnail = None
|
||||
duration = None
|
||||
formats = []
|
||||
|
||||
def extract_thumbnail(media):
|
||||
thumbnails = {}
|
||||
for asset in media.findall('assets/asset'):
|
||||
thumbnails[asset.get('quality')] = asset.find('default/url').text
|
||||
if not thumbnails:
|
||||
return None
|
||||
if 'high' in thumbnails:
|
||||
return thumbnails['high']
|
||||
return list(thumbnails.values())[0]
|
||||
title, description, thumbnail, duration, uploader, author = [None] * 6
|
||||
thumbnails, formats = [[]] * 2
|
||||
subtitles = {}
|
||||
|
||||
for media in config.findall('./player/stream/media'):
|
||||
if media.get('type') == 'posterImage':
|
||||
thumbnail = extract_thumbnail(media)
|
||||
elif media.get('type') == 'video':
|
||||
duration = int(media.get('duration'))
|
||||
title = media.find('title').text
|
||||
description = media.find('description').text
|
||||
# It seems assets always go from lower to better quality, so no need to sort
|
||||
quality_key = qualities(('low', 'high'))
|
||||
thumbnails = []
|
||||
for asset in media.findall('assets/asset'):
|
||||
for x in asset:
|
||||
asset_url = xpath_text(asset, 'default/url', default=None)
|
||||
if not asset_url:
|
||||
continue
|
||||
quality = asset.get('quality')
|
||||
thumbnails.append({
|
||||
'url': asset_url,
|
||||
'id': quality,
|
||||
'preference': quality_key(quality),
|
||||
})
|
||||
elif media.get('type') == 'video':
|
||||
title = xpath_text(media, 'title', fatal=True)
|
||||
for asset in media.findall('assets/asset'):
|
||||
quality = asset.get('quality')
|
||||
height = int_or_none(self._search_regex(
|
||||
r'^(\d+)[pP]$', quality or '', 'height', default=None))
|
||||
for node in asset:
|
||||
streamer = xpath_text(node, 'streamerURI', default=None)
|
||||
if not streamer:
|
||||
continue
|
||||
play_path = xpath_text(node, 'url', default=None)
|
||||
if not play_path:
|
||||
continue
|
||||
formats.append({
|
||||
'url': x.find('streamerURI').text,
|
||||
'app': x.find('streamerURI').text.split('/', 3)[3],
|
||||
'play_path': x.find('url').text,
|
||||
'url': streamer,
|
||||
'app': streamer.split('/', 3)[3],
|
||||
'play_path': play_path,
|
||||
'rtmp_live': False,
|
||||
'ext': 'mp4',
|
||||
'format_id': '%s-%s' % (x.tag, asset.get('quality')),
|
||||
'ext': 'flv',
|
||||
'format_id': '%s-%s' % (node.tag, quality) if quality else node.tag,
|
||||
'height': height,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
description = clean_html(xpath_text(media, 'description'))
|
||||
uploader = xpath_text(media, 'author')
|
||||
duration = int_or_none(media.get('duration'))
|
||||
for subtitle in media.findall('./subtitles/subtitle'):
|
||||
subtitle_url = xpath_text(subtitle, 'url', default=None)
|
||||
if not subtitle_url:
|
||||
continue
|
||||
lang = xpath_text(subtitle, 'lang', default='en')
|
||||
subtitles.setdefault(lang, []).append({
|
||||
'url': subtitle_url,
|
||||
'ext': (subtitle.get('format') or determine_ext(subtitle_url)).lower(),
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'thumbnails': thumbnails,
|
||||
'duration': duration,
|
||||
'creator': uploader,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
@@ -55,7 +55,9 @@ class NRKBaseIE(InfoExtractor):
|
||||
for subtitle in ('webVtt', 'timedText'):
|
||||
subtitle_url = asset.get('%sSubtitlesUrl' % subtitle)
|
||||
if subtitle_url:
|
||||
subtitles.setdefault('no', []).append({'url': subtitle_url})
|
||||
subtitles.setdefault('no', []).append({
|
||||
'url': compat_urllib_parse_unquote(subtitle_url)
|
||||
})
|
||||
entries.append({
|
||||
'id': asset.get('carrierId') or entry_id,
|
||||
'title': entry_title,
|
||||
|
@@ -64,7 +64,7 @@ def _decrypt_url(png):
|
||||
class RTVEALaCartaIE(InfoExtractor):
|
||||
IE_NAME = 'rtve.es:alacarta'
|
||||
IE_DESC = 'RTVE a la carta'
|
||||
_VALID_URL = r'https?://www\.rtve\.es/(m/)?alacarta/videos/[^/]+/[^/]+/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://www\.rtve\.es/(m/)?(alacarta/videos|filmoteca)/[^/]+/[^/]+/(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.rtve.es/alacarta/videos/balonmano/o-swiss-cup-masculina-final-espana-suecia/2491869/',
|
||||
@@ -87,6 +87,9 @@ class RTVEALaCartaIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://www.rtve.es/m/alacarta/videos/cuentame-como-paso/cuentame-como-paso-t16-ultimo-minuto-nuestra-vida-capitulo-276/2969138/?media=tve',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.rtve.es/filmoteca/no-do/not-1-introduccion-primer-noticiario-espanol/1465256/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_initialize(self):
|
||||
|
86
youtube_dl/extractor/sendtonews.py
Normal file
86
youtube_dl/extractor/sendtonews.py
Normal file
@@ -0,0 +1,86 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .jwplatform import JWPlatformBaseIE
|
||||
from ..compat import compat_parse_qs
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
parse_duration,
|
||||
)
|
||||
|
||||
|
||||
class SendtoNewsIE(JWPlatformBaseIE):
|
||||
_VALID_URL = r'https?://embed\.sendtonews\.com/player/embed\.php\?(?P<query>[^#]+)'
|
||||
|
||||
_TEST = {
|
||||
# From http://cleveland.cbslocal.com/2016/05/16/indians-score-season-high-15-runs-in-blowout-win-over-reds-rapid-reaction/
|
||||
'url': 'http://embed.sendtonews.com/player/embed.php?SK=GxfCe0Zo7D&MK=175909&PK=5588&autoplay=on&sound=yes',
|
||||
'info_dict': {
|
||||
'id': 'GxfCe0Zo7D-175909-5588',
|
||||
'ext': 'mp4',
|
||||
'title': 'Recap: CLE 15, CIN 6',
|
||||
'description': '5/16/16: Indians\' bats explode for 15 runs in a win',
|
||||
'duration': 49,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
_URL_TEMPLATE = '//embed.sendtonews.com/player/embed.php?SK=%s&MK=%s&PK=%s'
|
||||
|
||||
@classmethod
|
||||
def _extract_url(cls, webpage):
|
||||
mobj = re.search(r'''(?x)<script[^>]+src=([\'"])
|
||||
(?:https?:)?//embed\.sendtonews\.com/player/responsiveembed\.php\?
|
||||
.*\bSC=(?P<SC>[0-9a-zA-Z-]+).*
|
||||
\1>''', webpage)
|
||||
if mobj:
|
||||
sk, mk, pk = mobj.group('SC').split('-')
|
||||
return cls._URL_TEMPLATE % (sk, mk, pk)
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
params = compat_parse_qs(mobj.group('query'))
|
||||
|
||||
if 'SK' not in params or 'MK' not in params or 'PK' not in params:
|
||||
raise ExtractorError('Invalid URL', expected=True)
|
||||
|
||||
video_id = '-'.join([params['SK'][0], params['MK'][0], params['PK'][0]])
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
jwplayer_data_str = self._search_regex(
|
||||
r'jwplayer\("[^"]+"\)\.setup\((.+?)\);', webpage, 'JWPlayer data')
|
||||
js_vars = {
|
||||
'w': 1024,
|
||||
'h': 768,
|
||||
'modeVar': 'html5',
|
||||
}
|
||||
for name, val in js_vars.items():
|
||||
js_val = '%d' % val if isinstance(val, int) else '"%s"' % val
|
||||
jwplayer_data_str = jwplayer_data_str.replace(':%s,' % name, ':%s,' % js_val)
|
||||
|
||||
info_dict = self._parse_jwplayer_data(
|
||||
self._parse_json(jwplayer_data_str, video_id),
|
||||
video_id, require_title=False, rtmp_params={'no_resume': True})
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<div[^>]+class="embedTitle">([^<]+)</div>', webpage, 'title')
|
||||
description = self._html_search_regex(
|
||||
r'<div[^>]+class="embedSubTitle">([^<]+)</div>', webpage,
|
||||
'description', fatal=False)
|
||||
duration = parse_duration(self._html_search_regex(
|
||||
r'<div[^>]+class="embedDetails">([0-9:]+)', webpage,
|
||||
'duration', fatal=False))
|
||||
|
||||
info_dict.update({
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
})
|
||||
|
||||
return info_dict
|
@@ -47,7 +47,8 @@ class TwentyFourVideoIE(InfoExtractor):
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
description = self._html_search_regex(
|
||||
r'<span itemprop="description">([^<]+)</span>', webpage, 'description', fatal=False)
|
||||
r'<(p|span)[^>]+itemprop="description"[^>]*>(?P<description>[^<]+)</\1>',
|
||||
webpage, 'description', fatal=False, group='description')
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
duration = int_or_none(self._og_search_property(
|
||||
'duration', webpage, 'duration', fatal=False))
|
||||
|
@@ -3,16 +3,17 @@ from __future__ import unicode_literals
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
sanitized_Request,
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
)
|
||||
|
||||
|
||||
class WistiaIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:fast\.)?wistia\.net/embed/iframe/(?P<id>[a-z0-9]+)'
|
||||
_API_URL = 'http://fast.wistia.com/embed/medias/{0:}.json'
|
||||
_VALID_URL = r'(?:wistia:|https?://(?:fast\.)?wistia\.net/embed/iframe/)(?P<id>[a-z0-9]+)'
|
||||
_API_URL = 'http://fast.wistia.com/embed/medias/%s.json'
|
||||
_IFRAME_URL = 'http://fast.wistia.net/embed/iframe/%s'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://fast.wistia.net/embed/iframe/sh7fpupwlt',
|
||||
'md5': 'cafeb56ec0c53c18c97405eecb3133df',
|
||||
'info_dict': {
|
||||
@@ -24,36 +25,54 @@ class WistiaIE(InfoExtractor):
|
||||
'timestamp': 1386185018,
|
||||
'duration': 117,
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'wistia:sh7fpupwlt',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# with hls video
|
||||
'url': 'wistia:807fafadvk',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
request = sanitized_Request(self._API_URL.format(video_id))
|
||||
request.add_header('Referer', url) # Some videos require this.
|
||||
data_json = self._download_json(request, video_id)
|
||||
data_json = self._download_json(
|
||||
self._API_URL % video_id, video_id,
|
||||
# Some videos require this.
|
||||
headers={
|
||||
'Referer': url if url.startswith('http') else self._IFRAME_URL % video_id,
|
||||
})
|
||||
|
||||
if data_json.get('error'):
|
||||
raise ExtractorError('Error while getting the playlist',
|
||||
expected=True)
|
||||
raise ExtractorError(
|
||||
'Error while getting the playlist', expected=True)
|
||||
|
||||
data = data_json['media']
|
||||
title = data['name']
|
||||
|
||||
formats = []
|
||||
thumbnails = []
|
||||
for a in data['assets']:
|
||||
aurl = a.get('url')
|
||||
if not aurl:
|
||||
continue
|
||||
astatus = a.get('status')
|
||||
atype = a.get('type')
|
||||
if (astatus is not None and astatus != 2) or atype == 'preview':
|
||||
if (astatus is not None and astatus != 2) or atype in ('preview', 'storyboard'):
|
||||
continue
|
||||
elif atype in ('still', 'still_image'):
|
||||
thumbnails.append({
|
||||
'url': a['url'],
|
||||
'resolution': '%dx%d' % (a['width'], a['height']),
|
||||
'url': aurl,
|
||||
'width': int_or_none(a.get('width')),
|
||||
'height': int_or_none(a.get('height')),
|
||||
})
|
||||
else:
|
||||
aext = a.get('ext')
|
||||
is_m3u8 = a.get('container') == 'm3u8' or aext == 'm3u8'
|
||||
formats.append({
|
||||
'format_id': atype,
|
||||
'url': a['url'],
|
||||
'url': aurl,
|
||||
'tbr': int_or_none(a.get('bitrate')),
|
||||
'vbr': int_or_none(a.get('opt_vbitrate')),
|
||||
'width': int_or_none(a.get('width')),
|
||||
@@ -61,7 +80,8 @@ class WistiaIE(InfoExtractor):
|
||||
'filesize': int_or_none(a.get('size')),
|
||||
'vcodec': a.get('codec'),
|
||||
'container': a.get('container'),
|
||||
'ext': a.get('ext'),
|
||||
'ext': 'mp4' if is_m3u8 else aext,
|
||||
'protocol': 'm3u8' if is_m3u8 else None,
|
||||
'preference': 1 if atype == 'original' else None,
|
||||
})
|
||||
|
||||
@@ -73,6 +93,6 @@ class WistiaIE(InfoExtractor):
|
||||
'description': data.get('seoDescription'),
|
||||
'formats': formats,
|
||||
'thumbnails': thumbnails,
|
||||
'duration': int_or_none(data.get('duration')),
|
||||
'duration': float_or_none(data.get('duration')),
|
||||
'timestamp': int_or_none(data.get('createdAt')),
|
||||
}
|
||||
|
@@ -883,12 +883,17 @@ def make_socks_conn_class(base_class, socks_proxy):
|
||||
elif url_components.scheme.lower() == 'socks4a':
|
||||
socks_type = ProxyType.SOCKS4A
|
||||
|
||||
def unquote_if_non_empty(s):
|
||||
if not s:
|
||||
return s
|
||||
return compat_urllib_parse_unquote_plus(s)
|
||||
|
||||
proxy_args = (
|
||||
socks_type,
|
||||
url_components.hostname, url_components.port or 1080,
|
||||
True, # Remote DNS
|
||||
compat_urllib_parse_unquote_plus(url_components.username),
|
||||
compat_urllib_parse_unquote_plus(url_components.password),
|
||||
unquote_if_non_empty(url_components.username),
|
||||
unquote_if_non_empty(url_components.password),
|
||||
)
|
||||
|
||||
class SocksConnection(base_class):
|
||||
@@ -1544,15 +1549,11 @@ def setproctitle(title):
|
||||
|
||||
|
||||
def remove_start(s, start):
|
||||
if s.startswith(start):
|
||||
return s[len(start):]
|
||||
return s
|
||||
return s[len(start):] if s is not None and s.startswith(start) else s
|
||||
|
||||
|
||||
def remove_end(s, end):
|
||||
if s.endswith(end):
|
||||
return s[:-len(end)]
|
||||
return s
|
||||
return s[:-len(end)] if s is not None and s.endswith(end) else s
|
||||
|
||||
|
||||
def remove_quotes(s):
|
||||
|
@@ -1,3 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = '2016.05.16'
|
||||
__version__ = '2016.05.21'
|
||||
|
Reference in New Issue
Block a user