Compare commits

...

37 Commits

Author SHA1 Message Date
Philipp Hagemeister
d77650525d release 2014.03.21.5 2014-03-21 14:52:57 +01:00
Philipp Hagemeister
3e50c29984 release 2014.03.21.4 2014-03-21 14:38:55 +01:00
Philipp Hagemeister
64e7ad6045 [videolectures] (New extractor) 2014-03-21 14:38:41 +01:00
Philipp Hagemeister
23f4a93bb4 [daum] Modernize 2014-03-21 14:38:41 +01:00
Jaime Marquínez Ferrándiz
6f13b055f1 [cspan] Fix typo in a comment 2014-03-21 08:01:20 +01:00
Philipp Hagemeister
1f91bd15c3 release 2014.03.21.3 2014-03-21 02:10:35 +01:00
Philipp Hagemeister
11a15be4ce [cspan] Add support for newer videos (Fixes #2577) 2014-03-21 02:10:24 +01:00
Philipp Hagemeister
14e17e18cb release 2014.03.21.2 2014-03-21 01:42:45 +01:00
Philipp Hagemeister
1b124d1942 [parliamentliveuk] Add extractor 2014-03-21 01:42:28 +01:00
Philipp Hagemeister
747373d4ae release 2014.03.21.1 2014-03-21 01:00:27 +01:00
Philipp Hagemeister
18d367c0a5 Remove legacy InfoExtractors file 2014-03-21 01:00:06 +01:00
Philipp Hagemeister
a1a530b067 [pbs] Add support for video ratings 2014-03-21 00:59:51 +01:00
Philipp Hagemeister
cb9722cb3f [viki] Modernize 2014-03-21 00:53:18 +01:00
Philipp Hagemeister
773c0b4bb8 [pbs] Add support for widget URLs (Fixes #2594) 2014-03-21 00:46:32 +01:00
Philipp Hagemeister
23c322a531 release 2014.03.21 2014-03-21 00:37:23 +01:00
Philipp Hagemeister
7e8c0af004 Add --prefer-insecure option (Fixes #2364) 2014-03-21 00:37:10 +01:00
Philipp Hagemeister
d2983ccb25 [ninegag] Modernize and remove unused import 2014-03-21 00:37:10 +01:00
Philipp Hagemeister
f24e9833dc [youporn] Modernize 2014-03-21 00:37:10 +01:00
Sergey M․
bc2bdf5709 [kontrtube] Modernize 2014-03-20 23:05:57 +07:00
Philipp Hagemeister
627a209f74 release 2014.03.20 2014-03-20 16:35:54 +01:00
Philipp Hagemeister
1a4895453a [YoutubeDL] Improve error message 2014-03-20 16:33:46 +01:00
Philipp Hagemeister
aab74fa106 [ted] Simplify embed code (#2587) 2014-03-20 16:33:23 +01:00
Philipp Hagemeister
2bd9efd4c2 Merge remote-tracking branch 'anovicecodemonkey/TEDIEimprovements' 2014-03-20 16:24:34 +01:00
Jaime Marquínez Ferrándiz
39a743fb9b [arte] Modernize tests and fix _VALID_REGEX 2014-03-20 09:14:43 +01:00
Jaime Marquínez Ferrándiz
4966a0b22d [arte] Add extractor for concert.arte.tv (closes #2588) 2014-03-20 09:11:47 +01:00
anovicecodemonkey
fc26023120 [TEDIE] Add support for embeded TED video URLs 2014-03-20 01:04:21 +10:30
anovicecodemonkey
8d7c0cca13 [generic] Add support for embeded TED videos 2014-03-20 00:56:32 +10:30
Sergey M․
f66ede4328 [arte.tv:+7] Fix _VALID_URL 2014-03-19 21:23:55 +07:00
Philipp Hagemeister
cc88b90ec8 [desvscripts/release] Bump the number of password tries to accomodate stubby-fingered @phihag 2014-03-18 15:02:37 +01:00
Philipp Hagemeister
b6c5fa9a0b release 2014.03.18.1 2014-03-18 14:42:59 +01:00
Philipp Hagemeister
dff10eaa77 release 2014.03.18 2014-03-18 14:31:03 +01:00
Philipp Hagemeister
4e6f9aeca1 Fix typo 2014-03-18 14:28:53 +01:00
Philipp Hagemeister
e68301af21 Fix getpass on Windows (Fixes #2547) 2014-03-18 14:27:42 +01:00
Sergey M․
17286a96f2 [iprima] Fix permission check regex 2014-03-18 19:33:28 +07:00
Jaime Marquínez Ferrándiz
0892363e6d Merge pull request #2580 from ericpardee/patch-1
Update to comedycentral.py (cc.com)
2014-03-18 08:14:39 +01:00
ericpardee
f102372b5f Update to comedycentral.py (cc.com)
Added cc.com as it's same as comedycentral.com and used, i.e. http://www.cc.com/video-clips/fmyq0m/broad-city-a-beautiful-railroad-style-apartment
2014-03-17 18:01:26 -07:00
Jaime Marquínez Ferrándiz
ecbe1ad207 [generic] Fix access to removed function in python 3.4
The `Request.get_origin_req_host` method was deprecated in 3.3, use the
 `origin_req_host` property if it's not available, see http://docs.python.org/3.3/library/urllib.request.html#urllib.request.Request.get_origin_req_host.
2014-03-17 21:59:21 +01:00
24 changed files with 325 additions and 93 deletions

View File

@@ -36,6 +36,9 @@ which means you can modify it, redistribute it or use it however you like.
an empty string (--proxy "") for direct
connection
--no-check-certificate Suppress HTTPS certificate validation.
--prefer-insecure Use an unencrypted connection to retrieve
information about the video. (Currently
supported only for YouTube)
--cache-dir DIR Location in the filesystem where youtube-dl
can store some downloaded information
permanently. By default $XDG_CACHE_HOME

View File

@@ -70,7 +70,7 @@ RELEASE_FILES="youtube-dl youtube-dl.exe youtube-dl-$version.tar.gz"
git checkout HEAD -- youtube-dl youtube-dl.exe
/bin/echo -e "\n### Signing and uploading the new binaries to yt-dl.org ..."
for f in $RELEASE_FILES; do gpg --detach-sig "build/$version/$f"; done
for f in $RELEASE_FILES; do gpg --passphrase-repeat 5 --detach-sig "build/$version/$f"; done
scp -r "build/$version" ytdl@yt-dl.org:html/tmp/
ssh ytdl@yt-dl.org "mv html/tmp/$version html/downloads/"
ssh ytdl@yt-dl.org "sh html/update_latest.sh $version"

View File

@@ -141,6 +141,7 @@ class TestAllURLsMatching(unittest.TestCase):
def test_pbs(self):
# https://github.com/rg3/youtube-dl/issues/2350
self.assertMatch('http://video.pbs.org/viralplayer/2365173446/', ['PBS'])
self.assertMatch('http://video.pbs.org/widget/partnerplayer/980042464/', ['PBS'])
if __name__ == '__main__':
unittest.main()

View File

@@ -1,4 +0,0 @@
# Legacy file for backwards compatibility, use youtube_dl.extractor instead!
from .extractor.common import InfoExtractor, SearchInfoExtractor
from .extractor import gen_extractors, get_info_extractor

View File

@@ -148,6 +148,8 @@ class YoutubeDL(object):
again.
cookiefile: File name where cookies should be read from and dumped to.
nocheckcertificate:Do not verify SSL certificates
prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
At the moment, this is only supported by YouTube.
proxy: URL of the proxy server to use
socket_timeout: Time to wait for unresponsive hosts, in seconds
bidi_workaround: Work around buggy terminals without bidirectional text
@@ -533,7 +535,7 @@ class YoutubeDL(object):
else:
raise
else:
self.report_error('no suitable InfoExtractor: %s' % url)
self.report_error('no suitable InfoExtractor for URL %s' % url)
def process_ie_result(self, ie_result, download=True, extra_info={}):
"""

View File

@@ -56,7 +56,6 @@ __authors__ = (
__license__ = 'Public Domain'
import codecs
import getpass
import io
import locale
import optparse
@@ -68,6 +67,7 @@ import sys
from .utils import (
compat_getpass,
compat_print,
DateRange,
decodeOption,
@@ -237,6 +237,9 @@ def parseOpts(overrideArguments=None):
'--proxy', dest='proxy', default=None, metavar='URL',
help='Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection')
general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.')
general.add_option(
'--prefer-insecure', action='store_true', dest='prefer_insecure',
help='Use an unencrypted connection to retrieve information about the video. (Currently supported only for YouTube)')
general.add_option(
'--cache-dir', dest='cachedir', default=get_cachedir(), metavar='DIR',
help='Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may change.')
@@ -257,7 +260,6 @@ def parseOpts(overrideArguments=None):
action='store_true',
help='Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: do not read the user configuration in ~/.config/youtube-dl.conf (%APPDATA%/youtube-dl/config.txt on Windows)')
selection.add_option(
'--playlist-start',
dest='playliststart', metavar='NUMBER', default=1, type=int,
@@ -611,7 +613,7 @@ def _real_main(argv=None):
if opts.usetitle and opts.useid:
parser.error(u'using title conflicts with using video ID')
if opts.username is not None and opts.password is None:
opts.password = getpass.getpass(u'Type account password and press return:')
opts.password = compat_getpass(u'Type account password and press [Return]: ')
if opts.ratelimit is not None:
numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
if numeric_limit is None:
@@ -756,6 +758,7 @@ def _real_main(argv=None):
'download_archive': download_archive_fn,
'cookiefile': opts.cookiefile,
'nocheckcertificate': opts.no_check_certificate,
'prefer_insecure': opts.prefer_insecure,
'proxy': opts.proxy,
'socket_timeout': opts.socket_timeout,
'bidi_workaround': opts.bidi_workaround,

View File

@@ -10,6 +10,7 @@ from .arte import (
ArteTvIE,
ArteTVPlus7IE,
ArteTVCreativeIE,
ArteTVConcertIE,
ArteTVFutureIE,
ArteTVDDCIE,
)
@@ -173,6 +174,7 @@ from .nowness import NownessIE
from .nowvideo import NowVideoIE
from .ooyala import OoyalaIE
from .orf import ORFIE
from .parliamentliveuk import ParliamentLiveUKIE
from .pbs import PBSIE
from .photobucket import PhotobucketIE
from .playvid import PlayvidIE
@@ -258,6 +260,7 @@ from .vice import ViceIE
from .viddler import ViddlerIE
from .videobam import VideoBamIE
from .videodetective import VideoDetectiveIE
from .videolecturesnet import VideoLecturesNetIE
from .videofyme import VideofyMeIE
from .videopremium import VideoPremiumIE
from .vimeo import (

View File

@@ -131,7 +131,7 @@ class ArteTvIE(InfoExtractor):
class ArteTVPlus7IE(InfoExtractor):
IE_NAME = 'arte.tv:+7'
_VALID_URL = r'https?://www\.arte.tv/guide/(?P<lang>fr|de)/(?:(?:sendungen|emissions)/)?(?P<id>.*?)/(?P<name>.*?)(\?.*)?'
_VALID_URL = r'https?://(?:www\.)?arte\.tv/guide/(?P<lang>fr|de)/(?:(?:sendungen|emissions)/)?(?P<id>.*?)/(?P<name>.*?)(\?.*)?'
@classmethod
def _extract_url_info(cls, url):
@@ -202,6 +202,8 @@ class ArteTVPlus7IE(InfoExtractor):
re.match(r'VO-ST(F|A)', f.get('versionCode', '')) is None,
# The version with sourds/mal subtitles has also lower relevance
re.match(r'VO?(F|A)-STM\1', f.get('versionCode', '')) is None,
# Prefer http downloads over m3u8
0 if f['url'].endswith('m3u8') else 1,
)
formats = sorted(formats, key=sort_key)
def _format(format_info):
@@ -242,8 +244,9 @@ class ArteTVCreativeIE(ArteTVPlus7IE):
_TEST = {
'url': 'http://creative.arte.tv/de/magazin/agentur-amateur-corporate-design',
'file': '050489-002.mp4',
'info_dict': {
'id': '050489-002',
'ext': 'mp4',
'title': 'Agentur Amateur / Agence Amateur #2 : Corporate Design',
},
}
@@ -255,8 +258,9 @@ class ArteTVFutureIE(ArteTVPlus7IE):
_TEST = {
'url': 'http://future.arte.tv/fr/sujet/info-sciences#article-anchor-7081',
'file': '050940-003.mp4',
'info_dict': {
'id': '050940-003',
'ext': 'mp4',
'title': 'Les champignons au secours de la planète',
},
}
@@ -270,7 +274,7 @@ class ArteTVFutureIE(ArteTVPlus7IE):
class ArteTVDDCIE(ArteTVPlus7IE):
IE_NAME = 'arte.tv:ddc'
_VALID_URL = r'http?://ddc\.arte\.tv/(?P<lang>emission|folge)/(?P<id>.+)'
_VALID_URL = r'https?://ddc\.arte\.tv/(?P<lang>emission|folge)/(?P<id>.+)'
def _real_extract(self, url):
video_id, lang = self._extract_url_info(url)
@@ -284,3 +288,19 @@ class ArteTVDDCIE(ArteTVPlus7IE):
javascriptPlayerGenerator = self._download_webpage(script_url, video_id, 'Download javascript player generator')
json_url = self._search_regex(r"json_url=(.*)&rendering_place.*", javascriptPlayerGenerator, 'json url')
return self._extract_from_json_url(json_url, video_id, lang)
class ArteTVConcertIE(ArteTVPlus7IE):
IE_NAME = 'arte.tv:concert'
_VALID_URL = r'https?://concert\.arte\.tv/(?P<lang>de|fr)/(?P<id>.+)'
_TEST = {
'url': 'http://concert.arte.tv/de/notwist-im-pariser-konzertclub-divan-du-monde',
'md5': '9ea035b7bd69696b67aa2ccaaa218161',
'info_dict': {
'id': '186',
'ext': 'mp4',
'title': 'The Notwist im Pariser Konzertclub "Divan du Monde"',
'upload_date': '20140128',
},
}

View File

@@ -14,7 +14,7 @@ from ..utils import (
class ComedyCentralIE(MTVServicesInfoExtractor):
_VALID_URL = r'''(?x)https?://(?:www\.)?comedycentral\.com/
_VALID_URL = r'''(?x)https?://(?:www\.)?(comedycentral|cc)\.com/
(video-clips|episodes|cc-studios|video-collections)
/(?P<title>.*)'''
_FEED_URL = 'http://comedycentral.com/feeds/mrss/'

View File

@@ -10,9 +10,9 @@ from ..utils import (
class CSpanIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?c-span\.org/video/\?(?P<id>\d+)'
_VALID_URL = r'http://(?:www\.)?c-span\.org/video/\?(?P<id>[0-9a-f]+)'
IE_DESC = 'C-SPAN'
_TEST = {
_TESTS = [{
'url': 'http://www.c-span.org/video/?313572-1/HolderonV',
'md5': '8e44ce11f0f725527daccc453f553eb0',
'info_dict': {
@@ -22,13 +22,24 @@ class CSpanIE(InfoExtractor):
'description': 'Attorney General Eric Holder spoke to reporters following the Supreme Court decision in Shelby County v. Holder in which the court ruled that the preclearance provisions of the Voting Rights Act could not be enforced until Congress established new guidelines for review.',
},
'skip': 'Regularly fails on travis, for unknown reasons',
}
}, {
'url': 'http://www.c-span.org/video/?c4486943/cspan-international-health-care-models',
# For whatever reason, the served video alternates between
# two different ones
#'md5': 'dbb0f047376d457f2ab8b3929cbb2d0c',
'info_dict': {
'id': '340723',
'ext': 'mp4',
'title': 'International Health Care Models',
'description': 'md5:7a985a2d595dba00af3d9c9f0783c967',
}
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
page_id = mobj.group('id')
webpage = self._download_webpage(url, page_id)
video_id = self._search_regex(r'data-progid=\'(\d+)\'>', webpage, 'video id')
video_id = self._search_regex(r'progid=\'?([0-9]+)\'?>', webpage, 'video id')
description = self._html_search_regex(
[

View File

@@ -1,25 +1,28 @@
# encoding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
compat_urllib_parse,
determine_ext,
)
class DaumIE(InfoExtractor):
_VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/.*?clipid=(?P<id>\d+)'
IE_NAME = u'daum.net'
IE_NAME = 'daum.net'
_TEST = {
u'url': u'http://tvpot.daum.net/clip/ClipView.do?clipid=52554690',
u'file': u'52554690.mp4',
u'info_dict': {
u'title': u'DOTA 2GETHER 시즌2 6회 - 2부',
u'description': u'DOTA 2GETHER 시즌2 6회 - 2부',
u'upload_date': u'20130831',
u'duration': 3868,
'url': 'http://tvpot.daum.net/clip/ClipView.do?clipid=52554690',
'info_dict': {
'id': '52554690',
'ext': 'mp4',
'title': 'DOTA 2GETHER 시즌2 6회 - 2부',
'description': 'DOTA 2GETHER 시즌2 6회 - 2부',
'upload_date': '20130831',
'duration': 3868,
},
}
@@ -30,14 +33,14 @@ class DaumIE(InfoExtractor):
webpage = self._download_webpage(canonical_url, video_id)
full_id = self._search_regex(
r'<iframe src="http://videofarm.daum.net/controller/video/viewer/Video.html\?.*?vid=(.+?)[&"]',
webpage, u'full id')
webpage, 'full id')
query = compat_urllib_parse.urlencode({'vid': full_id})
info = self._download_xml(
'http://tvpot.daum.net/clip/ClipInfoXml.do?' + query, video_id,
u'Downloading video info')
'Downloading video info')
urls = self._download_xml(
'http://videofarm.daum.net/controller/api/open/v1_2/MovieData.apixml?' + query,
video_id, u'Downloading video formats info')
video_id, 'Downloading video formats info')
self.to_screen(u'%s: Getting video urls' % video_id)
formats = []
@@ -53,7 +56,6 @@ class DaumIE(InfoExtractor):
format_url = url_doc.find('result/url').text
formats.append({
'url': format_url,
'ext': determine_ext(format_url),
'format_id': profile,
})

View File

@@ -159,6 +159,18 @@ class GenericIE(InfoExtractor):
# m3u8 download
'skip_download': True,
},
},
# Embedded TED video
{
'url': 'http://en.support.wordpress.com/videos/ted-talks/',
'md5': 'deeeabcc1085eb2ba205474e7235a3d5',
'info_dict': {
'id': '981',
'ext': 'mp4',
'title': 'My web playroom',
'uploader': 'Ze Frank',
'description': 'md5:ddb2a40ecd6b6a147e400e535874947b',
}
}
]
@@ -185,9 +197,14 @@ class GenericIE(InfoExtractor):
newurl = newurl.replace(' ', '%20')
newheaders = dict((k,v) for k,v in req.headers.items()
if k.lower() not in ("content-length", "content-type"))
try:
# This function was deprecated in python 3.3 and removed in 3.4
origin_req_host = req.get_origin_req_host()
except AttributeError:
origin_req_host = req.origin_req_host
return HEADRequest(newurl,
headers=newheaders,
origin_req_host=req.get_origin_req_host(),
origin_req_host=origin_req_host,
unverifiable=True)
else:
raise compat_urllib_error.HTTPError(req.get_full_url(), code, msg, headers, fp)
@@ -482,6 +499,13 @@ class GenericIE(InfoExtractor):
if mobj is None:
# Broaden the search a little bit: JWPlayer JS loader
mobj = re.search(r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage)
# Look for embedded TED player
mobj = re.search(
r'<iframe[^>]+?src=(["\'])(?P<url>http://embed\.ted\.com/.+?)\1', webpage)
if mobj is not None:
return self.url_result(mobj.group('url'), 'TED')
if mobj is None:
# Try to find twitter cards info
mobj = re.search(r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage)

View File

@@ -48,7 +48,7 @@ class IPrimaIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
if re.search(r'Nemáte oprávnění přistupovat na tuto stránku.\s*</div>', webpage):
if re.search(r'Nemáte oprávnění přistupovat na tuto stránku\.\s*</div>', webpage):
raise ExtractorError(
'%s said: You do not have permission to access this page' % self.IE_NAME, expected=True)

View File

@@ -4,6 +4,7 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import int_or_none
class KontrTubeIE(InfoExtractor):
@@ -32,27 +33,26 @@ class KontrTubeIE(InfoExtractor):
video_url = self._html_search_regex(r"video_url: '(.+?)/?',", webpage, 'video URL')
thumbnail = self._html_search_regex(r"preview_url: '(.+?)/?',", webpage, 'video thumbnail', fatal=False)
title = self._html_search_regex(r'<title>(.+?) - Труба зовёт - Интересный видеохостинг</title>', webpage,
'video title')
title = self._html_search_regex(
r'<title>(.+?) - Труба зовёт - Интересный видеохостинг</title>', webpage, 'video title')
description = self._html_search_meta('description', webpage, 'video description')
mobj = re.search(r'<div class="col_2">Длительность: <span>(?P<minutes>\d+)м:(?P<seconds>\d+)с</span></div>',
webpage)
mobj = re.search(
r'<div class="col_2">Длительность: <span>(?P<minutes>\d+)м:(?P<seconds>\d+)с</span></div>', webpage)
duration = int(mobj.group('minutes')) * 60 + int(mobj.group('seconds')) if mobj else None
view_count = self._html_search_regex(r'<div class="col_2">Просмотров: <span>(\d+)</span></div>', webpage,
'view count', fatal=False)
view_count = int(view_count) if view_count is not None else None
view_count = self._html_search_regex(
r'<div class="col_2">Просмотров: <span>(\d+)</span></div>', webpage, 'view count', fatal=False)
comment_count = None
comment_str = self._html_search_regex(r'Комментарии: <span>([^<]+)</span>', webpage, 'comment count',
fatal=False)
comment_str = self._html_search_regex(
r'Комментарии: <span>([^<]+)</span>', webpage, 'comment count', fatal=False)
if comment_str.startswith('комментариев нет'):
comment_count = 0
else:
mobj = re.search(r'\d+ из (?P<total>\d+) комментариев', comment_str)
if mobj:
comment_count = int(mobj.group('total'))
comment_count = mobj.group('total')
return {
'id': video_id,
@@ -61,6 +61,6 @@ class KontrTubeIE(InfoExtractor):
'title': title,
'description': description,
'duration': duration,
'view_count': view_count,
'comment_count': comment_count,
'view_count': int_or_none(view_count),
'comment_count': int_or_none(comment_count),
}

View File

@@ -1,6 +1,5 @@
from __future__ import unicode_literals
import json
import re
from .common import InfoExtractor
@@ -12,8 +11,9 @@ class NineGagIE(InfoExtractor):
_TEST = {
"url": "http://9gag.tv/v/1912",
"file": "1912.mp4",
"info_dict": {
"id": "1912",
"ext": "mp4",
"description": "This 3-minute video will make you smile and then make you feel untalented and insignificant. Anyway, you should share this awesomeness. (Thanks, Dino!)",
"title": "\"People Are Awesome 2013\" Is Absolutely Awesome",
"view_count": int,

View File

@@ -0,0 +1,57 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
unified_strdate,
)
class ParliamentLiveUKIE(InfoExtractor):
IE_NAME = 'parliamentlive.tv'
IE_DESC = 'UK parliament videos'
_VALID_URL = r'https?://www\.parliamentlive\.tv/Main/Player\.aspx\?(?:[^&]+&)*?meetingId=(?P<id>[0-9]+)'
_TEST = {
'url': 'http://www.parliamentlive.tv/Main/Player.aspx?meetingId=15121&player=windowsmedia',
'info_dict': {
'id': '15121',
'ext': 'asf',
'title': 'hoc home affairs committee, 18 mar 2014.pm',
'description': 'md5:033b3acdf83304cd43946b2d5e5798d1',
},
'params': {
'skip_download': True, # Requires mplayer (mms)
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
asx_url = self._html_search_regex(
r'embed.*?src="([^"]+)" name="MediaPlayer"', webpage,
'metadata URL')
asx = self._download_xml(asx_url, video_id, 'Downloading ASX metadata')
video_url = asx.find('.//REF').attrib['HREF']
title = self._search_regex(
r'''(?x)player\.setClipDetails\(
(?:(?:[0-9]+|"[^"]+"),\s*){2}
"([^"]+",\s*"[^"]+)"
''',
webpage, 'title').replace('", "', ', ')
description = self._html_search_regex(
r'(?s)<span id="MainContentPlaceHolder_CaptionsBlock_WitnessInfo">(.*?)</span>',
webpage, 'description')
return {
'id': video_id,
'ext': 'asf',
'url': video_url,
'title': title,
'description': description,
}

View File

@@ -3,6 +3,9 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
US_RATINGS,
)
class PBSIE(InfoExtractor):
@@ -13,7 +16,7 @@ class PBSIE(InfoExtractor):
# Article with embedded player
(?:www\.)?pbs\.org/(?:[^/]+/){2,5}(?P<presumptive_id>[^/]+)/?(?:$|[?\#]) |
# Player
video\.pbs\.org/partnerplayer/(?P<player_id>[^/]+)/
video\.pbs\.org/(?:widget/)?partnerplayer/(?P<player_id>[^/]+)/
)
'''
@@ -57,6 +60,11 @@ class PBSIE(InfoExtractor):
info_url = 'http://video.pbs.org/videoInfo/%s?format=json' % video_id
info = self._download_json(info_url, display_id)
rating_str = info.get('rating')
if rating_str is not None:
rating_str = rating_str.rpartition('-')[2]
age_limit = US_RATINGS.get(rating_str)
return {
'id': video_id,
'title': info['title'],
@@ -65,4 +73,5 @@ class PBSIE(InfoExtractor):
'description': info['program'].get('description'),
'thumbnail': info.get('image_url'),
'duration': info.get('duration'),
'age_limit': age_limit,
}

View File

@@ -11,7 +11,9 @@ from ..utils import (
class TEDIE(SubtitlesInfoExtractor):
_VALID_URL = r'''(?x)http://www\.ted\.com/
_VALID_URL = r'''(?x)
(?P<proto>https?://)
(?P<type>www|embed)(?P<urlmain>\.ted\.com/
(
(?P<type_playlist>playlists(?:/\d+)?) # We have a playlist
|
@@ -19,6 +21,7 @@ class TEDIE(SubtitlesInfoExtractor):
)
(/lang/(.*?))? # The url may contain the language
/(?P<name>\w+) # Here goes the name and then ".html"
.*)$
'''
_TEST = {
'url': 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html',
@@ -48,6 +51,9 @@ class TEDIE(SubtitlesInfoExtractor):
def _real_extract(self, url):
m = re.match(self._VALID_URL, url, re.VERBOSE)
if m.group('type') == 'embed':
desktop_url = m.group('proto') + 'www' + m.group('urlmain')
return self.url_result(desktop_url, 'TED')
name = m.group('name')
if m.group('type_talk'):
return self._talk_info(url, name)

View File

@@ -0,0 +1,67 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
find_xpath_attr,
int_or_none,
parse_duration,
unified_strdate,
)
class VideoLecturesNetIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?videolectures\.net/(?P<id>[^/#?]+)/'
IE_NAME = 'videolectures.net'
_TEST = {
'url': 'http://videolectures.net/promogram_igor_mekjavic_eng/',
'info_dict': {
'id': 'promogram_igor_mekjavic_eng',
'ext': 'mp4',
'title': 'Automatics, robotics and biocybernetics',
'description': 'md5:815fc1deb6b3a2bff99de2d5325be482',
'upload_date': '20130627',
'duration': 565,
'thumbnail': 're:http://.*\.jpg',
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
smil_url = 'http://videolectures.net/%s/video/1/smil.xml' % video_id
smil = self._download_xml(smil_url, video_id)
title = find_xpath_attr(smil, './/meta', 'name', 'title').attrib['content']
description = find_xpath_attr(smil, './/meta', 'name', 'abstract').attrib['content']
upload_date = unified_strdate(
find_xpath_attr(smil, './/meta', 'name', 'date').attrib['content'])
switch = smil.find('.//switch')
duration = parse_duration(switch.attrib.get('dur'))
thumbnail_el = find_xpath_attr(switch, './image', 'type', 'thumbnail')
thumbnail = (
None if thumbnail_el is None else thumbnail_el.attrib.get('src'))
formats = [{
'url': v.attrib['src'],
'width': int_or_none(v.attrib.get('width')),
'height': int_or_none(v.attrib.get('height')),
'filesize': int_or_none(v.attrib.get('size')),
'tbr': int_or_none(v.attrib.get('systemBitrate')) / 1000.0,
'ext': v.attrib.get('ext'),
} for v in switch.findall('./video')
if v.attrib.get('proto') == 'http']
return {
'id': video_id,
'title': title,
'description': description,
'upload_date': upload_date,
'duration': duration,
'thumbnail': thumbnail,
'formats': formats,
}

View File

@@ -1,29 +1,33 @@
from __future__ import unicode_literals
import re
from ..utils import (
ExtractorError,
unescapeHTML,
unified_strdate,
US_RATINGS,
)
from .subtitles import SubtitlesInfoExtractor
class VikiIE(SubtitlesInfoExtractor):
IE_NAME = u'viki'
IE_NAME = 'viki'
_VALID_URL = r'^https?://(?:www\.)?viki\.com/videos/(?P<id>[0-9]+v)'
_TEST = {
u'url': u'http://www.viki.com/videos/1023585v-heirs-episode-14',
u'file': u'1023585v.mp4',
u'md5': u'a21454021c2646f5433514177e2caa5f',
u'info_dict': {
u'title': u'Heirs Episode 14',
u'uploader': u'SBS',
u'description': u'md5:c4b17b9626dd4b143dcc4d855ba3474e',
u'upload_date': u'20131121',
u'age_limit': 13,
'url': 'http://www.viki.com/videos/1023585v-heirs-episode-14',
'md5': 'a21454021c2646f5433514177e2caa5f',
'info_dict': {
'id': '1023585v',
'ext': 'mp4',
'title': 'Heirs Episode 14',
'uploader': 'SBS',
'description': 'md5:c4b17b9626dd4b143dcc4d855ba3474e',
'upload_date': '20131121',
'age_limit': 13,
},
u'skip': u'Blocked in the US',
'skip': 'Blocked in the US',
}
def _real_extract(self, url):
@@ -44,28 +48,21 @@ class VikiIE(SubtitlesInfoExtractor):
rating_str = self._html_search_regex(
r'<strong>Rating: </strong>\s*([^<]*)<', webpage,
u'rating information', default='').strip()
RATINGS = {
'G': 0,
'PG': 10,
'PG-13': 13,
'R': 16,
'NC': 18,
}
age_limit = RATINGS.get(rating_str)
'rating information', default='').strip()
age_limit = US_RATINGS.get(rating_str)
info_url = 'http://www.viki.com/player5_fragment/%s?action=show&controller=videos' % video_id
info_webpage = self._download_webpage(
info_url, video_id, note=u'Downloading info page')
info_url, video_id, note='Downloading info page')
if re.match(r'\s*<div\s+class="video-error', info_webpage):
raise ExtractorError(
u'Video %s is blocked from your location.' % video_id,
'Video %s is blocked from your location.' % video_id,
expected=True)
video_url = self._html_search_regex(
r'<source[^>]+src="([^"]+)"', info_webpage, u'video URL')
r'<source[^>]+src="([^"]+)"', info_webpage, 'video URL')
upload_date_str = self._html_search_regex(
r'"created_at":"([^"]+)"', info_webpage, u'upload date')
r'"created_at":"([^"]+)"', info_webpage, 'upload date')
upload_date = (
unified_strdate(upload_date_str)
if upload_date_str is not None

View File

@@ -1,3 +1,6 @@
from __future__ import unicode_literals
import json
import re
import sys
@@ -17,24 +20,25 @@ from ..aes import (
class YouPornIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>youporn\.com/watch/(?P<videoid>[0-9]+)/(?P<title>[^/]+))'
_VALID_URL = r'^(?P<proto>https?://)(?:www\.)?(?P<url>youporn\.com/watch/(?P<videoid>[0-9]+)/(?P<title>[^/]+))'
_TEST = {
u'url': u'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
u'file': u'505835.mp4',
u'md5': u'71ec5fcfddacf80f495efa8b6a8d9a89',
u'info_dict': {
u"upload_date": u"20101221",
u"description": u"Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?",
u"uploader": u"Ask Dan And Jennifer",
u"title": u"Sex Ed: Is It Safe To Masturbate Daily?",
u"age_limit": 18,
'url': 'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
'md5': '71ec5fcfddacf80f495efa8b6a8d9a89',
'info_dict': {
'id': '505835',
'ext': 'mp4',
'upload_date': '20101221',
'description': 'Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?',
'uploader': 'Ask Dan And Jennifer',
'title': 'Sex Ed: Is It Safe To Masturbate Daily?',
'age_limit': 18,
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('videoid')
url = 'http://www.' + mobj.group('url')
url = mobj.group('proto') + 'www.' + mobj.group('url')
req = compat_urllib_request.Request(url)
req.add_header('Cookie', 'age_verified=1')
@@ -42,7 +46,7 @@ class YouPornIE(InfoExtractor):
age_limit = self._rta_search(webpage)
# Get JSON parameters
json_params = self._search_regex(r'var currentVideo = new Video\((.*)\);', webpage, u'JSON parameters')
json_params = self._search_regex(r'var currentVideo = new Video\((.*)\);', webpage, 'JSON parameters')
try:
params = json.loads(json_params)
except:
@@ -61,7 +65,7 @@ class YouPornIE(InfoExtractor):
# Get all of the links from the page
DOWNLOAD_LIST_RE = r'(?s)<ul class="downloadList">(?P<download_list>.*?)</ul>'
download_list_html = self._search_regex(DOWNLOAD_LIST_RE,
webpage, u'download list').strip()
webpage, 'download list').strip()
LINK_RE = r'<a href="([^"]+)">'
links = re.findall(LINK_RE, download_list_html)
@@ -86,7 +90,7 @@ class YouPornIE(InfoExtractor):
resolution = format_parts[0]
height = int(resolution[:-len('p')])
bitrate = int(format_parts[1][:-len('k')])
format = u'-'.join(format_parts) + u'-' + dn
format = '-'.join(format_parts) + '-' + dn
formats.append({
'url': video_url,

View File

@@ -1130,14 +1130,18 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
return self._download_webpage(url, video_id, note=u'Searching for annotations.', errnote=u'Unable to download video annotations.')
def _real_extract(self, url):
proto = (
u'http' if self._downloader.params.get('prefer_insecure', False)
else u'https')
# Extract original video URL from URL with redirection, like age verification, using next_url parameter
mobj = re.search(self._NEXT_URL_RE, url)
if mobj:
url = 'https://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
url = proto + '://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
video_id = self.extract_id(url)
# Get video webpage
url = 'https://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
video_webpage = self._download_webpage(url, video_id)
# Attempt to extract SWF player URL
@@ -1162,7 +1166,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
'asv': 3,
'sts':'1588',
})
video_info_url = 'https://www.youtube.com/get_video_info?' + data
video_info_url = proto + '://www.youtube.com/get_video_info?' + data
video_info_webpage = self._download_webpage(video_info_url, video_id,
note=False,
errnote='unable to download video info webpage')
@@ -1170,7 +1174,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
else:
age_gate = False
for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
video_info_url = ('https://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
video_info_url = (proto + '://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
% (video_id, el_type))
video_info_webpage = self._download_webpage(video_info_url, video_id,
note=False,
@@ -1445,7 +1449,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
'duration': video_duration,
'age_limit': 18 if age_gate else 0,
'annotations': video_annotations,
'webpage_url': 'https://www.youtube.com/watch?v=%s' % video_id,
'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
'view_count': view_count,
'like_count': like_count,
'dislike_count': dislike_count,

View File

@@ -6,6 +6,7 @@ import ctypes
import datetime
import email.utils
import errno
import getpass
import gzip
import itertools
import io
@@ -762,6 +763,10 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
def unified_strdate(date_str):
"""Return a string with the date in the format YYYYMMDD"""
if date_str is None:
return None
upload_date = None
#Replace commas
date_str = date_str.replace(',', ' ')
@@ -1279,3 +1284,21 @@ def parse_xml(s):
parser = xml.etree.ElementTree.XMLParser(target=TreeBuilder())
kwargs = {'parser': parser} if sys.version_info >= (2, 7) else {}
return xml.etree.ElementTree.XML(s.encode('utf-8'), **kwargs)
if sys.version_info < (3, 0) and sys.platform == 'win32':
def compat_getpass(prompt, *args, **kwargs):
if isinstance(prompt, compat_str):
prompt = prompt.encode(preferredencoding())
return getpass.getpass(prompt, *args, **kwargs)
else:
compat_getpass = getpass.getpass
US_RATINGS = {
'G': 0,
'PG': 10,
'PG-13': 13,
'R': 16,
'NC': 18,
}

View File

@@ -1,2 +1,2 @@
__version__ = '2014.03.17'
__version__ = '2014.03.21.5'