Compare commits

..

13 Commits

Author SHA1 Message Date
Sergey M․
5275efe30d release 2016.07.22 2016-07-22 23:11:28 +07:00
Sergey M․
b13647cf3c [eporner] Fix extraction (Closes #10139) 2016-07-22 23:04:13 +07:00
Sergey M․
add7d2a0e2 [pornhub] Make error regex less ambiguous (Closes #10138) 2016-07-22 21:24:09 +07:00
Sergey M․
e298d3a08c [youtube] Fix authentication (Closes #10140) 2016-07-22 21:05:39 +07:00
Sergey M․
fd8c8c7dcd [youtube:shared] Relax _VALID_URL 2016-07-21 22:58:34 +07:00
Sergey M․
9158af16cc [bbc.co.uk:iplayer:playlist] Add support for group URLs 2016-07-21 22:37:36 +07:00
Sergey M․
c6668e4ad1 [bbc.co.uk:iplayer:playlist] Skip unavailable test 2016-07-21 22:34:55 +07:00
Sergey M․
84e8cca48b [youjizz] Relax _VALID_URL (Closes #10131) 2016-07-20 22:41:13 +07:00
Sergey M․
790b06b7d4 [odatv] Improve (Closes #9285) 2016-07-20 21:43:22 +07:00
skacurt
740d7c49c2 [odatv] Add extractor 2016-07-20 21:42:05 +07:00
Sergey M․
4e51ec5f57 [extractors] Add import for comedycentral.tv 2016-07-19 22:50:37 +07:00
Sergey M․
05087d1b4c [bbc] Improve extraction from sxml playlists 2016-07-19 22:49:38 +07:00
Sergey M․
a66a73ee90 [ard] Add test for rbb-online 2016-07-18 02:25:31 +07:00
11 changed files with 177 additions and 41 deletions

View File

@@ -6,8 +6,8 @@
---
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.07.17*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.07.17**
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.07.22*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.07.22**
### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
[debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2016.07.17
[debug] youtube-dl version 2016.07.22
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {}

View File

@@ -142,6 +142,7 @@
- **ComCarCoff**
- **ComedyCentral**
- **ComedyCentralShows**: The Daily Show / The Colbert Report
- **ComedyCentralTV**
- **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED
- **Coub**
- **Cracked**
@@ -477,6 +478,7 @@
- **NYTimes**
- **NYTimesArticle**
- **ocw.mit.edu**
- **OdaTV**
- **Odnoklassniki**
- **OktoberfestTV**
- **on.aol.com**

View File

@@ -62,6 +62,17 @@ class ARDMediathekIE(InfoExtractor):
}, {
'url': 'http://mediathek.daserste.de/sendungen_a-z/328454_anne-will/22429276_vertrauen-ist-gut-spionieren-ist-besser-geht',
'only_matching': True,
}, {
# audio
'url': 'http://mediathek.rbb-online.de/radio/Hörspiel/Vor-dem-Fest/kulturradio/Audio?documentId=30796318&topRessort=radio&bcastId=9839158',
'md5': '4e8f00631aac0395fee17368ac0e9867',
'info_dict': {
'id': '30796318',
'ext': 'mp3',
'title': 'Vor dem Fest',
'description': 'md5:c0c1c8048514deaed2a73b3a60eecacb',
'duration': 3287,
},
}]
def _extract_media_info(self, media_info_url, webpage, video_id):

View File

@@ -589,7 +589,8 @@ class BBCIE(BBCCoUkIE):
'info_dict': {
'id': '150615_telabyad_kentin_cogu',
'ext': 'mp4',
'title': "YPG: Tel Abyad'ın tamamı kontrolümüzde",
'title': "Tel Abyad'da IŞİD bayrağı indirildi YPG bayrağı çekildi",
'description': 'md5:33a4805a855c9baf7115fcbde57e7025',
'timestamp': 1434397334,
'upload_date': '20150615',
},
@@ -603,6 +604,7 @@ class BBCIE(BBCCoUkIE):
'id': '150619_video_honduras_militares_hospitales_corrupcion_aw',
'ext': 'mp4',
'title': 'Honduras militariza sus hospitales por nuevo escándalo de corrupción',
'description': 'md5:1525f17448c4ee262b64b8f0c9ce66c8',
'timestamp': 1434713142,
'upload_date': '20150619',
},
@@ -818,8 +820,20 @@ class BBCIE(BBCCoUkIE):
# http://www.bbc.com/turkce/multimedya/2015/10/151010_vid_ankara_patlama_ani)
playlist = data_playable.get('otherSettings', {}).get('playlist', {})
if playlist:
entries.append(self._extract_from_playlist_sxml(
playlist.get('progressiveDownloadUrl'), playlist_id, timestamp))
for key in ('progressiveDownload', 'streaming'):
playlist_url = playlist.get('%sUrl' % key)
if not playlist_url:
continue
try:
entries.append(self._extract_from_playlist_sxml(
playlist_url, playlist_id, timestamp))
except Exception as e:
# Some playlist URL may fail with 500, at the same time
# the other one may work fine (e.g.
# http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu)
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 500:
continue
raise
if entries:
return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
@@ -998,10 +1012,10 @@ class BBCCoUkPlaylistBaseIE(InfoExtractor):
class BBCCoUkIPlayerPlaylistIE(BBCCoUkPlaylistBaseIE):
IE_NAME = 'bbc.co.uk:iplayer:playlist'
_VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/iplayer/episodes/(?P<id>%s)' % BBCCoUkIE._ID_REGEX
_VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/iplayer/(?:episodes|group)/(?P<id>%s)' % BBCCoUkIE._ID_REGEX
_URL_TEMPLATE = 'http://www.bbc.co.uk/iplayer/episode/%s'
_VIDEO_ID_TEMPLATE = r'data-ip-id=["\'](%s)'
_TEST = {
_TESTS = [{
'url': 'http://www.bbc.co.uk/iplayer/episodes/b05rcz9v',
'info_dict': {
'id': 'b05rcz9v',
@@ -1009,7 +1023,17 @@ class BBCCoUkIPlayerPlaylistIE(BBCCoUkPlaylistBaseIE):
'description': 'French thriller serial about a missing teenager.',
},
'playlist_mincount': 6,
}
'skip': 'This programme is not currently available on BBC iPlayer',
}, {
# Available for over a year unlike 30 days for most other programmes
'url': 'http://www.bbc.co.uk/iplayer/group/p02tcc32',
'info_dict': {
'id': 'p02tcc32',
'title': 'Bohemian Icons',
'description': 'md5:683e901041b2fe9ba596f2ab04c4dbe7',
},
'playlist_mincount': 10,
}]
def _extract_title_and_description(self, webpage):
title = self._search_regex(r'<h1>([^<]+)</h1>', webpage, 'title', fatal=False)

View File

@@ -4,19 +4,23 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
encode_base_n,
ExtractorError,
int_or_none,
parse_duration,
str_to_int,
)
class EpornerIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?eporner\.com/hd-porn/(?P<id>\w+)/(?P<display_id>[\w-]+)'
_VALID_URL = r'https?://(?:www\.)?eporner\.com/hd-porn/(?P<id>\w+)(?:/(?P<display_id>[\w-]+))?'
_TESTS = [{
'url': 'http://www.eporner.com/hd-porn/95008/Infamous-Tiffany-Teen-Strip-Tease-Video/',
'md5': '39d486f046212d8e1b911c52ab4691f8',
'info_dict': {
'id': '95008',
'id': 'qlDUmNsj6VS',
'display_id': 'Infamous-Tiffany-Teen-Strip-Tease-Video',
'ext': 'mp4',
'title': 'Infamous Tiffany Teen Strip Tease Video',
@@ -28,34 +32,72 @@ class EpornerIE(InfoExtractor):
# New (May 2016) URL layout
'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0/Star-Wars-XXX-Parody/',
'only_matching': True,
}, {
'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0',
'only_matching': True,
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
display_id = mobj.group('display_id')
display_id = mobj.group('display_id') or video_id
webpage = self._download_webpage(url, display_id)
title = self._html_search_regex(
r'<title>(.*?) - EPORNER', webpage, 'title')
webpage, urlh = self._download_webpage_handle(url, display_id)
redirect_url = 'http://www.eporner.com/config5/%s' % video_id
player_code = self._download_webpage(
redirect_url, display_id, note='Downloading player config')
video_id = self._match_id(compat_str(urlh.geturl()))
sources = self._search_regex(
r'(?s)sources\s*:\s*\[\s*({.+?})\s*\]', player_code, 'sources')
hash = self._search_regex(
r'hash\s*:\s*["\']([\da-f]{32})', webpage, 'hash')
title = self._og_search_title(webpage, default=None) or self._html_search_regex(
r'<title>(.+?) - EPORNER', webpage, 'title')
# Reverse engineered from vjs.js
def calc_hash(s):
return ''.join((encode_base_n(int(s[lb:lb + 8], 16), 36) for lb in range(0, 32, 8)))
video = self._download_json(
'http://www.eporner.com/xhr/video/%s' % video_id,
display_id, note='Downloading video JSON',
query={
'hash': calc_hash(hash),
'device': 'generic',
'domain': 'www.eporner.com',
'fallback': 'false',
})
if video.get('available') is False:
raise ExtractorError(
'%s said: %s' % (self.IE_NAME, video['message']), expected=True)
sources = video['sources']
formats = []
for video_url, format_id in re.findall(r'file\s*:\s*"([^"]+)",\s*label\s*:\s*"([^"]+)"', sources):
fmt = {
'url': video_url,
'format_id': format_id,
}
m = re.search(r'^(\d+)', format_id)
if m:
fmt['height'] = int(m.group(1))
formats.append(fmt)
for kind, formats_dict in sources.items():
if not isinstance(formats_dict, dict):
continue
for format_id, format_dict in formats_dict.items():
if not isinstance(format_dict, dict):
continue
src = format_dict.get('src')
if not isinstance(src, compat_str) or not src.startswith('http'):
continue
if kind == 'hls':
formats.extend(self._extract_m3u8_formats(
src, display_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id=kind, fatal=False))
else:
height = int_or_none(self._search_regex(
r'(\d+)[pP]', format_id, 'height', default=None))
fps = int_or_none(self._search_regex(
r'(\d+)fps', format_id, 'fps', default=None))
formats.append({
'url': src,
'format_id': format_id,
'height': height,
'fps': fps,
})
self._sort_formats(formats)
duration = parse_duration(self._html_search_meta('duration', webpage))

View File

@@ -156,7 +156,11 @@ from .cnn import (
)
from .coub import CoubIE
from .collegerama import CollegeRamaIE
from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
from .comedycentral import (
ComedyCentralIE,
ComedyCentralShowsIE,
ComedyCentralTVIE,
)
from .comcarcoff import ComCarCoffIE
from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
from .commonprotocols import RtmpIE
@@ -583,6 +587,7 @@ from .nytimes import (
NYTimesArticleIE,
)
from .nuvid import NuvidIE
from .odatv import OdaTVIE
from .odnoklassniki import OdnoklassnikiIE
from .oktoberfesttv import OktoberfestTVIE
from .onet import (

View File

@@ -0,0 +1,50 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
ExtractorError,
NO_DEFAULT,
remove_start
)
class OdaTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?odatv\.com/(?:mob|vid)_video\.php\?.*\bid=(?P<id>[^&]+)'
_TESTS = [{
'url': 'http://odatv.com/vid_video.php?id=8E388',
'md5': 'dc61d052f205c9bf2da3545691485154',
'info_dict': {
'id': '8E388',
'ext': 'mp4',
'title': 'Artık Davutoğlu ile devam edemeyiz'
}
}, {
# mobile URL
'url': 'http://odatv.com/mob_video.php?id=8E388',
'only_matching': True,
}, {
# no video
'url': 'http://odatv.com/mob_video.php?id=8E900',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
no_video = 'NO VIDEO!' in webpage
video_url = self._search_regex(
r'mp4\s*:\s*(["\'])(?P<url>http.+?)\1', webpage, 'video url',
default=None if no_video else NO_DEFAULT, group='url')
if no_video:
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
return {
'id': video_id,
'url': video_url,
'title': remove_start(self._og_search_title(webpage), 'Video: '),
'thumbnail': self._og_search_thumbnail(webpage),
}

View File

@@ -111,7 +111,7 @@ class PornHubIE(InfoExtractor):
webpage = self._download_webpage(req, video_id)
error_msg = self._html_search_regex(
r'(?s)<div[^>]+class=(["\']).*?\b(?:removed|userMessageSection)\b.*?\1[^>]*>(?P<error>.+?)</div>',
r'(?s)<div[^>]+class=(["\'])(?:(?!\1).)*\b(?:removed|userMessageSection)\b(?:(?!\1).)*\1[^>]*>(?P<error>.+?)</div>',
webpage, 'error message', default=None, group='error')
if error_msg:
error_msg = re.sub(r'\s+', ' ', error_msg)

View File

@@ -9,8 +9,8 @@ from ..utils import (
class YouJizzIE(InfoExtractor):
_VALID_URL = r'https?://(?:\w+\.)?youjizz\.com/videos/[^/#?]+-(?P<id>[0-9]+)\.html(?:$|[?#])'
_TEST = {
_VALID_URL = r'https?://(?:\w+\.)?youjizz\.com/videos/(?:[^/#?]+)?-(?P<id>[0-9]+)\.html(?:$|[?#])'
_TESTS = [{
'url': 'http://www.youjizz.com/videos/zeichentrick-1-2189178.html',
'md5': '07e15fa469ba384c7693fd246905547c',
'info_dict': {
@@ -19,7 +19,10 @@ class YouJizzIE(InfoExtractor):
'title': 'Zeichentrick 1',
'age_limit': 18,
}
}
}, {
'url': 'http://www.youjizz.com/videos/-2189178.html',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)

View File

@@ -53,6 +53,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
"""Provide base functions for Youtube extractors"""
_LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
_TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
_PASSWORD_CHALLENGE_URL = 'https://accounts.google.com/signin/challenge/sl/password'
_NETRC_MACHINE = 'youtube'
# If True it will raise an error if no login info is provided
_LOGIN_REQUIRED = False
@@ -116,12 +117,10 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
'hl': 'en_US',
}
login_data = urlencode_postdata(login_form_strs)
req = sanitized_Request(self._LOGIN_URL, login_data)
login_results = self._download_webpage(
req, None,
note='Logging in', errnote='unable to log in', fatal=False)
self._PASSWORD_CHALLENGE_URL, None,
note='Logging in', errnote='unable to log in', fatal=False,
data=urlencode_postdata(login_form_strs))
if login_results is False:
return False
@@ -1736,7 +1735,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
class YoutubeSharedVideoIE(InfoExtractor):
_VALID_URL = r'(?:https?:)?//(?:www\.)?youtube\.com/shared\?ci=(?P<id>[0-9A-Za-z_-]{11})'
_VALID_URL = r'(?:https?:)?//(?:www\.)?youtube\.com/shared\?.*\bci=(?P<id>[0-9A-Za-z_-]{11})'
IE_NAME = 'youtube:shared'
_TEST = {

View File

@@ -1,3 +1,3 @@
from __future__ import unicode_literals
__version__ = '2016.07.17'
__version__ = '2016.07.22'