mirror of
https://github.com/ytdl-org/youtube-dl.git
synced 2025-08-03 02:50:01 -05:00
Compare commits
17 Commits
2013.08.27
...
2013.08.28
Author | SHA1 | Date | |
---|---|---|---|
![]() |
1619e22f40 | ||
![]() |
88a79ce6a6 | ||
![]() |
acebc9cd6b | ||
![]() |
443c12a703 | ||
![]() |
7f3c4f4f65 | ||
![]() |
0bc56fa66a | ||
![]() |
1a582dd49d | ||
![]() |
e86ea47c02 | ||
![]() |
aa5a63a5b5 | ||
![]() |
2a7b4da9b2 | ||
![]() |
069d098f84 | ||
![]() |
ff2424595a | ||
![]() |
cd0abcc0bb | ||
![]() |
05a2926c5c | ||
![]() |
7070b83687 | ||
![]() |
8d212e604a | ||
![]() |
943f7f7a39 |
@@ -430,6 +430,10 @@ def _real_main(argv=None):
|
||||
proxy_handler = compat_urllib_request.ProxyHandler(proxies)
|
||||
https_handler = make_HTTPS_handler(opts)
|
||||
opener = compat_urllib_request.build_opener(https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
|
||||
# Delete the default user-agent header, which would otherwise apply in
|
||||
# cases where our custom HTTP handler doesn't come into play
|
||||
# (See https://github.com/rg3/youtube-dl/issues/1309 for details)
|
||||
opener.addheaders =[]
|
||||
compat_urllib_request.install_opener(opener)
|
||||
socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
|
||||
|
||||
|
@@ -8,6 +8,8 @@ from .breakcom import BreakIE
|
||||
from .brightcove import BrightcoveIE
|
||||
from .c56 import C56IE
|
||||
from .canalplus import CanalplusIE
|
||||
from .canalc2 import Canalc2IE
|
||||
from .cnn import CNNIE
|
||||
from .collegehumor import CollegeHumorIE
|
||||
from .comedycentral import ComedyCentralIE
|
||||
from .condenast import CondeNastIE
|
||||
@@ -52,6 +54,7 @@ from .muzu import MuzuTVIE
|
||||
from .myspass import MySpassIE
|
||||
from .myvideo import MyVideoIE
|
||||
from .nba import NBAIE
|
||||
from .nbc import NBCNewsIE
|
||||
from .ooyala import OoyalaIE
|
||||
from .pbs import PBSIE
|
||||
from .photobucket import PhotobucketIE
|
||||
|
35
youtube_dl/extractor/canalc2.py
Normal file
35
youtube_dl/extractor/canalc2.py
Normal file
@@ -0,0 +1,35 @@
|
||||
# coding: utf-8
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class Canalc2IE(InfoExtractor):
|
||||
_IE_NAME = 'canalc2.tv'
|
||||
_VALID_URL = r'http://.*?\.canalc2\.tv/video\.asp\?idVideo=(\d+)&voir=oui'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://www.canalc2.tv/video.asp?idVideo=12163&voir=oui',
|
||||
u'file': u'12163.mp4',
|
||||
u'md5': u'060158428b650f896c542dfbb3d6487f',
|
||||
u'info_dict': {
|
||||
u'title': u'Terrasses du Numérique'
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = re.match(self._VALID_URL, url).group(1)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
file_name = self._search_regex(
|
||||
r"so\.addVariable\('file','(.*?)'\);",
|
||||
webpage, 'file name')
|
||||
video_url = 'http://vod-flash.u-strasbg.fr:8080/' + file_name
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'class="evenement8">(.*?)</a>', webpage, u'title')
|
||||
|
||||
return {'id': video_id,
|
||||
'ext': 'mp4',
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
}
|
@@ -5,7 +5,7 @@ from .common import InfoExtractor
|
||||
from ..utils import unified_strdate
|
||||
|
||||
class CanalplusIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.canalplus\.fr/.*?\?vid=(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(www\.canalplus\.fr/.*?\?vid=|player\.canalplus\.fr/#/)(?P<id>\d+)'
|
||||
_VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/cplus/%s'
|
||||
IE_NAME = u'canalplus.fr'
|
||||
|
||||
|
47
youtube_dl/extractor/cnn.py
Normal file
47
youtube_dl/extractor/cnn.py
Normal file
@@ -0,0 +1,47 @@
|
||||
import re
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import determine_ext
|
||||
|
||||
class CNNIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(edition\.)?cnn\.com/video/(data/.+?|\?)/(?P<path>.+?/(?P<title>[^/]+?)\.cnn)'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn',
|
||||
u'file': u'sports_2013_06_09_nadal-1-on-1.cnn.mp4',
|
||||
u'md5': u'3e6121ea48df7e2259fe73a0628605c4',
|
||||
u'info_dict': {
|
||||
u'title': u'Nadal wins 8th French Open title',
|
||||
u'description': u'World Sport\'s Amanda Davies chats with 2013 French Open champion Rafael Nadal.',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
path = mobj.group('path')
|
||||
page_title = mobj.group('title')
|
||||
info_xml = self._download_webpage(
|
||||
'http://cnn.com/video/data/3.0/%s/index.xml' % path, page_title)
|
||||
info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
|
||||
|
||||
formats = []
|
||||
for f in info.findall('files/file'):
|
||||
mf = re.match(r'(\d+)x(\d+)(?:_(.*)k)?',f.attrib['bitrate'])
|
||||
if mf is not None:
|
||||
formats.append((int(mf.group(1)), int(mf.group(2)), int(mf.group(3) or 0), f.text))
|
||||
formats = sorted(formats)
|
||||
(_,_,_, video_path) = formats[-1]
|
||||
video_url = 'http://ht.cdn.turner.com/cnn/big%s' % video_path
|
||||
|
||||
thumbnails = sorted([((int(t.attrib['height']),int(t.attrib['width'])), t.text) for t in info.findall('images/image')])
|
||||
thumbs_dict = [{'resolution': res, 'url': t_url} for (res, t_url) in thumbnails]
|
||||
|
||||
return {'id': info.attrib['id'],
|
||||
'title': info.find('headline').text,
|
||||
'url': video_url,
|
||||
'ext': determine_ext(video_url),
|
||||
'thumbnail': thumbnails[-1][1],
|
||||
'thumbnails': thumbs_dict,
|
||||
'description': info.find('description').text,
|
||||
}
|
@@ -57,8 +57,8 @@ class GooglePlusIE(InfoExtractor):
|
||||
webpage, 'title', default=u'NA')
|
||||
|
||||
# Step 2, Simulate clicking the image box to launch video
|
||||
DOMAIN = 'https://plus.google.com'
|
||||
video_page = self._search_regex(r'<a href="((?:%s)?/photos/.*?)"' % re.escape(DOMAIN),
|
||||
DOMAIN = 'https://plus.google.com/'
|
||||
video_page = self._search_regex(r'<a href="((?:%s)?photos/.*?)"' % re.escape(DOMAIN),
|
||||
webpage, u'video page URL')
|
||||
if not video_page.startswith(DOMAIN):
|
||||
video_page = DOMAIN + video_page
|
||||
|
@@ -1,6 +1,7 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import determine_ext
|
||||
@@ -12,24 +13,25 @@ class HarkIE(InfoExtractor):
|
||||
u'file': u'mmbzyhkgny.mp3',
|
||||
u'md5': u'6783a58491b47b92c7c1af5a77d4cbee',
|
||||
u'info_dict': {
|
||||
u"title": u"Obama: 'Beyond The Afghan Theater, We Only Target Al Qaeda' On May 23, 2013 ",
|
||||
u'title': u"Obama: 'Beyond The Afghan Theater, We Only Target Al Qaeda' on May 23, 2013",
|
||||
u'description': u'President Barack Obama addressed the nation live on May 23, 2013 in a speech aimed at addressing counter-terrorism policies including the use of drone strikes, detainees at Guantanamo Bay prison facility, and American citizens who are terrorists.',
|
||||
u'duration': 11,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group(1)
|
||||
embed_url = "http://www.hark.com/clips/%s/homepage_embed" %(video_id)
|
||||
webpage = self._download_webpage(embed_url, video_id)
|
||||
|
||||
final_url = self._search_regex(r'src="(.+?).mp3"',
|
||||
webpage, 'video url')+'.mp3'
|
||||
title = self._html_search_regex(r'<title>(.+?)</title>',
|
||||
webpage, 'video title').replace(' Sound Clip and Quote - Hark','').replace(
|
||||
'Sound Clip , Quote, MP3, and Ringtone - Hark','')
|
||||
json_url = "http://www.hark.com/clips/%s.json" %(video_id)
|
||||
info_json = self._download_webpage(json_url, video_id)
|
||||
info = json.loads(info_json)
|
||||
final_url = info['url']
|
||||
|
||||
return {'id': video_id,
|
||||
'url' : final_url,
|
||||
'title': title,
|
||||
'title': info['name'],
|
||||
'ext': determine_ext(final_url),
|
||||
'description': info['description'],
|
||||
'thumbnail': info['image_original'],
|
||||
'duration': info['duration'],
|
||||
}
|
||||
|
33
youtube_dl/extractor/nbc.py
Normal file
33
youtube_dl/extractor/nbc.py
Normal file
@@ -0,0 +1,33 @@
|
||||
import re
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import find_xpath_attr, compat_str
|
||||
|
||||
|
||||
class NBCNewsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.nbcnews\.com/video/.+?/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://www.nbcnews.com/video/nbc-news/52753292',
|
||||
u'file': u'52753292.flv',
|
||||
u'md5': u'47abaac93c6eaf9ad37ee6c4463a5179',
|
||||
u'info_dict': {
|
||||
u'title': u'Crew emerges after four-month Mars food study',
|
||||
u'description': u'md5:24e632ffac72b35f8b67a12d1b6ddfc1',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
info_xml = self._download_webpage('http://www.nbcnews.com/id/%s/displaymode/1219' % video_id, video_id)
|
||||
info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8')).find('video')
|
||||
|
||||
return {'id': video_id,
|
||||
'title': info.find('headline').text,
|
||||
'ext': 'flv',
|
||||
'url': find_xpath_attr(info, 'media', 'type', 'flashVideo').text,
|
||||
'description': compat_str(info.find('caption').text),
|
||||
'thumbnail': find_xpath_attr(info, 'media', 'type', 'thumbnail').text,
|
||||
}
|
@@ -476,7 +476,7 @@ def formatSeconds(secs):
|
||||
def make_HTTPS_handler(opts):
|
||||
if sys.version_info < (3,2):
|
||||
# Python's 2.x handler is very simplistic
|
||||
return YoutubeDLHandlerHTTPS()
|
||||
return compat_urllib_request.HTTPSHandler()
|
||||
else:
|
||||
import ssl
|
||||
context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
|
||||
@@ -485,7 +485,7 @@ def make_HTTPS_handler(opts):
|
||||
context.verify_mode = (ssl.CERT_NONE
|
||||
if opts.no_check_certificate
|
||||
else ssl.CERT_REQUIRED)
|
||||
return YoutubeDLHandlerHTTPS(context=context)
|
||||
return compat_urllib_request.HTTPSHandler(context=context)
|
||||
|
||||
class ExtractorError(Exception):
|
||||
"""Error during info extraction."""
|
||||
@@ -569,8 +569,7 @@ class ContentTooShortError(Exception):
|
||||
self.downloaded = downloaded
|
||||
self.expected = expected
|
||||
|
||||
|
||||
class YoutubeDLHandler_Template: # Old-style class, like HTTPHandler
|
||||
class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
|
||||
"""Handler for HTTP requests and responses.
|
||||
|
||||
This class, when installed with an OpenerDirector, automatically adds
|
||||
@@ -603,8 +602,8 @@ class YoutubeDLHandler_Template: # Old-style class, like HTTPHandler
|
||||
ret.code = code
|
||||
return ret
|
||||
|
||||
def _http_request(self, req):
|
||||
for h, v in std_headers.items():
|
||||
def http_request(self, req):
|
||||
for h,v in std_headers.items():
|
||||
if h in req.headers:
|
||||
del req.headers[h]
|
||||
req.add_header(h, v)
|
||||
@@ -619,7 +618,7 @@ class YoutubeDLHandler_Template: # Old-style class, like HTTPHandler
|
||||
del req.headers['Youtubedl-user-agent']
|
||||
return req
|
||||
|
||||
def _http_response(self, req, resp):
|
||||
def http_response(self, req, resp):
|
||||
old_resp = resp
|
||||
# gzip
|
||||
if resp.headers.get('Content-encoding', '') == 'gzip':
|
||||
@@ -633,16 +632,8 @@ class YoutubeDLHandler_Template: # Old-style class, like HTTPHandler
|
||||
resp.msg = old_resp.msg
|
||||
return resp
|
||||
|
||||
|
||||
class YoutubeDLHandler(YoutubeDLHandler_Template, compat_urllib_request.HTTPHandler):
|
||||
http_request = YoutubeDLHandler_Template._http_request
|
||||
http_response = YoutubeDLHandler_Template._http_response
|
||||
|
||||
|
||||
class YoutubeDLHandlerHTTPS(YoutubeDLHandler_Template, compat_urllib_request.HTTPSHandler):
|
||||
https_request = YoutubeDLHandler_Template._http_request
|
||||
https_response = YoutubeDLHandler_Template._http_response
|
||||
|
||||
https_request = http_request
|
||||
https_response = http_response
|
||||
|
||||
def unified_strdate(date_str):
|
||||
"""Return a string with the date in the format YYYYMMDD"""
|
||||
|
@@ -1,2 +1,2 @@
|
||||
|
||||
__version__ = '2013.08.27'
|
||||
__version__ = '2013.08.28'
|
||||
|
Reference in New Issue
Block a user