mirror of
https://github.com/ytdl-org/youtube-dl.git
synced 2025-08-02 18:39:51 -05:00
Compare commits
21 Commits
2013.07.07
...
2013.07.10
Author | SHA1 | Date | |
---|---|---|---|
![]() |
0a1be1e997 | ||
![]() |
c93898dae9 | ||
![]() |
ebdf2af727 | ||
![]() |
c108eb73cc | ||
![]() |
3a1375dacf | ||
![]() |
41bece30b4 | ||
![]() |
16ea58cbda | ||
![]() |
99e350d902 | ||
![]() |
13e06d298c | ||
![]() |
81f0259b9e | ||
![]() |
fefcb5d314 | ||
![]() |
345b0c9b46 | ||
![]() |
20c3893f0e | ||
![]() |
29293c1e09 | ||
![]() |
5fe3a3c3fb | ||
![]() |
b04621d155 | ||
![]() |
b227060388 | ||
![]() |
d93e4dcbb7 | ||
![]() |
73e79f2a1b | ||
![]() |
fc79158de2 | ||
![]() |
7763b04e5f |
@@ -20,9 +20,9 @@ tests = [
|
||||
# 84
|
||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<",
|
||||
"<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWe098765432rmnbvcxzasdfghjklpoiuyt1"),
|
||||
# 83
|
||||
# 83 - vfl26ng3K 2013/07/10
|
||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<",
|
||||
"D.>/?;}[{=+_)(*&^%$#!MNBVCXeAS<FGHJKLPOIUYTREWZ0987654321mnbvcxzasdfghjklpoiuytrQ"),
|
||||
"qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>"),
|
||||
# 82
|
||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.<",
|
||||
"Q>/?;}[{=+-(*<^%$#@!MNBVCXZASDFGHKLPOIUY8REWT0q&7654321mnbvcxzasdfghjklpoiuytrew9"),
|
||||
|
@@ -45,7 +45,7 @@ class TestYoutubeSig(unittest.TestCase):
|
||||
|
||||
def test_83(self):
|
||||
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<"
|
||||
right = "D.>/?;}[{=+_)(*&^%$#!MNBVCXeAS<FGHJKLPOIUYTREWZ0987654321mnbvcxzasdfghjklpoiuytrQ"
|
||||
right = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>"
|
||||
self.assertEqual(sig(wrong), right)
|
||||
|
||||
def test_82(self):
|
||||
|
@@ -1,4 +1,4 @@
|
||||
|
||||
from .archiveorg import ArchiveOrgIE
|
||||
from .ard import ARDIE
|
||||
from .arte import ArteTvIE
|
||||
from .auengine import AUEngineIE
|
||||
@@ -11,6 +11,8 @@ from .comedycentral import ComedyCentralIE
|
||||
from .cspan import CSpanIE
|
||||
from .dailymotion import DailymotionIE
|
||||
from .depositfiles import DepositFilesIE
|
||||
from .dotsub import DotsubIE
|
||||
from .dreisat import DreiSatIE
|
||||
from .eighttracks import EightTracksIE
|
||||
from .escapist import EscapistIE
|
||||
from .facebook import FacebookIE
|
||||
@@ -56,6 +58,7 @@ from .tumblr import TumblrIE
|
||||
from .tutv import TutvIE
|
||||
from .ustream import UstreamIE
|
||||
from .vbox7 import Vbox7IE
|
||||
from .veoh import VeohIE
|
||||
from .vevo import VevoIE
|
||||
from .vimeo import VimeoIE
|
||||
from .vine import VineIE
|
||||
|
66
youtube_dl/extractor/archiveorg.py
Normal file
66
youtube_dl/extractor/archiveorg.py
Normal file
@@ -0,0 +1,66 @@
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class ArchiveOrgIE(InfoExtractor):
|
||||
IE_NAME = 'archive.org'
|
||||
IE_DESC = 'archive.org videos'
|
||||
_VALID_URL = r'(?:https?://)?(?:www\.)?archive.org/details/(?P<id>[^?/]+)(?:[?].*)?$'
|
||||
_TEST = {
|
||||
u"url": u"http://archive.org/details/XD300-23_68HighlightsAResearchCntAugHumanIntellect",
|
||||
u'file': u'XD300-23_68HighlightsAResearchCntAugHumanIntellect.ogv',
|
||||
u'md5': u'8af1d4cf447933ed3c7f4871162602db',
|
||||
u'info_dict': {
|
||||
u"title": u"1968 Demo - FJCC Conference Presentation Reel #1",
|
||||
u"description": u"Reel 1 of 3: Also known as the \"Mother of All Demos\", Doug Engelbart's presentation at the Fall Joint Computer Conference in San Francisco, December 9, 1968 titled \"A Research Center for Augmenting Human Intellect.\" For this presentation, Doug and his team astonished the audience by not only relating their research, but demonstrating it live. This was the debut of the mouse, interactive computing, hypermedia, computer supported software engineering, video teleconferencing, etc. See also <a href=\"http://dougengelbart.org/firsts/dougs-1968-demo.html\" rel=\"nofollow\">Doug's 1968 Demo page</a> for more background, highlights, links, and the detailed paper published in this conference proceedings. Filmed on 3 reels: Reel 1 | <a href=\"http://www.archive.org/details/XD300-24_68HighlightsAResearchCntAugHumanIntellect\" rel=\"nofollow\">Reel 2</a> | <a href=\"http://www.archive.org/details/XD300-25_68HighlightsAResearchCntAugHumanIntellect\" rel=\"nofollow\">Reel 3</a>",
|
||||
u"upload_date": u"19681210",
|
||||
u"uploader": u"SRI International"
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
json_url = url + (u'?' if u'?' in url else '&') + u'output=json'
|
||||
json_data = self._download_webpage(json_url, video_id)
|
||||
data = json.loads(json_data)
|
||||
|
||||
title = data['metadata']['title'][0]
|
||||
description = data['metadata']['description'][0]
|
||||
uploader = data['metadata']['creator'][0]
|
||||
upload_date = unified_strdate(data['metadata']['date'][0])
|
||||
|
||||
formats = [{
|
||||
'format': fdata['format'],
|
||||
'url': 'http://' + data['server'] + data['dir'] + fn,
|
||||
'file_size': int(fdata['size']),
|
||||
}
|
||||
for fn,fdata in data['files'].items()
|
||||
if 'Video' in fdata['format']]
|
||||
formats.sort(key=lambda fdata: fdata['file_size'])
|
||||
|
||||
info = {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'description': description,
|
||||
'uploader': uploader,
|
||||
'upload_date': upload_date,
|
||||
}
|
||||
thumbnail = data.get('misc', {}).get('image')
|
||||
if thumbnail:
|
||||
info['thumbnail'] = thumbnail
|
||||
|
||||
# TODO: Remove when #980 has been merged
|
||||
info['url'] = formats[-1]['url']
|
||||
info['ext'] = determine_ext(formats[-1]['url'])
|
||||
|
||||
return self.video_result(info)
|
@@ -32,7 +32,7 @@ class ARDIE(InfoExtractor):
|
||||
# determine title and media streams from webpage
|
||||
html = self._download_webpage(url, video_id)
|
||||
title = re.search(self._TITLE, html).group('title')
|
||||
streams = [m.groupdict() for m in re.finditer(self._MEDIA_STREAM, html)]
|
||||
streams = [mo.groupdict() for mo in re.finditer(self._MEDIA_STREAM, html)]
|
||||
if not streams:
|
||||
assert '"fsk"' in html
|
||||
raise ExtractorError(u'This video is only available after 8:00 pm')
|
||||
|
@@ -4,9 +4,6 @@ import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
# This is used by the not implemented extractLiveStream method
|
||||
compat_urllib_parse,
|
||||
|
||||
ExtractorError,
|
||||
unified_strdate,
|
||||
)
|
||||
@@ -28,6 +25,7 @@ class ArteTvIE(InfoExtractor):
|
||||
return any(re.match(regex, url) for regex in (cls._EMISSION_URL, cls._VIDEOS_URL))
|
||||
|
||||
# TODO implement Live Stream
|
||||
# from ..utils import compat_urllib_parse
|
||||
# def extractLiveStream(self, url):
|
||||
# video_lang = url.split('/')[-4]
|
||||
# info = self.grep_webpage(
|
||||
@@ -57,7 +55,6 @@ class ArteTvIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._EMISSION_URL, url)
|
||||
if mobj is not None:
|
||||
name = mobj.group('name')
|
||||
lang = mobj.group('lang')
|
||||
# This is not a real id, it can be for example AJT for the news
|
||||
# http://www.arte.tv/guide/fr/emissions/AJT/arte-journal
|
||||
@@ -77,12 +74,8 @@ class ArteTvIE(InfoExtractor):
|
||||
|
||||
def _extract_emission(self, url, video_id, lang):
|
||||
"""Extract from www.arte.tv/guide"""
|
||||
if video_id.replace('-','').isdigit():
|
||||
json_url = 'http://org-www.arte.tv/papi/tvguide/videos/stream/player/F/%s_PLUS7-F/ALL/ALL.json' % video_id
|
||||
else:
|
||||
# We don't know the real id of the video, we have to search in the webpage
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
json_url = self._html_search_regex(r'arte_vp_url="(.*?)"', webpage, 'json url')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
json_url = self._html_search_regex(r'arte_vp_url="(.*?)"', webpage, 'json url')
|
||||
|
||||
json_info = self._download_webpage(json_url, video_id, 'Downloading info json')
|
||||
self.report_extraction(video_id)
|
||||
|
@@ -189,5 +189,5 @@ class BlipTVUserIE(InfoExtractor):
|
||||
pagenum += 1
|
||||
|
||||
urls = [u'http://blip.tv/%s' % video_id for video_id in video_ids]
|
||||
url_entries = [self.url_result(url, 'BlipTV') for url in urls]
|
||||
url_entries = [self.url_result(vurl, 'BlipTV') for vurl in urls]
|
||||
return [self.playlist_result(url_entries, playlist_title = username)]
|
||||
|
@@ -3,6 +3,7 @@ import os
|
||||
import re
|
||||
import socket
|
||||
import sys
|
||||
import netrc
|
||||
|
||||
from ..utils import (
|
||||
compat_http_client,
|
||||
@@ -36,6 +37,8 @@ class InfoExtractor(object):
|
||||
The following fields are optional:
|
||||
|
||||
format: The video format, defaults to ext (used for --get-format)
|
||||
thumbnails: A list of dictionaries (with the entries "resolution" and
|
||||
"url") for the varying thumbnails
|
||||
thumbnail: Full URL to a video thumbnail image.
|
||||
description: One-line video description.
|
||||
uploader: Full name of the video uploader.
|
||||
@@ -161,6 +164,10 @@ class InfoExtractor(object):
|
||||
"""Report attempt to confirm age."""
|
||||
self.to_screen(u'Confirming age')
|
||||
|
||||
def report_login(self):
|
||||
"""Report attempt to log in."""
|
||||
self.to_screen(u'Logging in')
|
||||
|
||||
#Methods for following #608
|
||||
#They set the correct value of the '_type' key
|
||||
def video_result(self, video_info):
|
||||
@@ -225,6 +232,36 @@ class InfoExtractor(object):
|
||||
else:
|
||||
return res
|
||||
|
||||
def _get_login_info(self):
|
||||
"""
|
||||
Get the the login info as (username, password)
|
||||
It will look in the netrc file using the _NETRC_MACHINE value
|
||||
If there's no info available, return (None, None)
|
||||
"""
|
||||
if self._downloader is None:
|
||||
return (None, None)
|
||||
|
||||
username = None
|
||||
password = None
|
||||
downloader_params = self._downloader.params
|
||||
|
||||
# Attempt to use provided username and password or .netrc data
|
||||
if downloader_params.get('username', None) is not None:
|
||||
username = downloader_params['username']
|
||||
password = downloader_params['password']
|
||||
elif downloader_params.get('usenetrc', False):
|
||||
try:
|
||||
info = netrc.netrc().authenticators(self._NETRC_MACHINE)
|
||||
if info is not None:
|
||||
username = info[0]
|
||||
password = info[2]
|
||||
else:
|
||||
raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
|
||||
except (IOError, netrc.NetrcParseError) as err:
|
||||
self._downloader.report_warning(u'parsing .netrc: %s' % compat_str(err))
|
||||
|
||||
return (username, password)
|
||||
|
||||
class SearchInfoExtractor(InfoExtractor):
|
||||
"""
|
||||
Base class for paged search queries extractors.
|
||||
|
41
youtube_dl/extractor/dotsub.py
Normal file
41
youtube_dl/extractor/dotsub.py
Normal file
@@ -0,0 +1,41 @@
|
||||
import re
|
||||
import json
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class DotsubIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?dotsub\.com/view/([^/]+)'
|
||||
_TEST = {
|
||||
u'url': u'http://dotsub.com/view/aed3b8b2-1889-4df5-ae63-ad85f5572f27',
|
||||
u'file': u'aed3b8b2-1889-4df5-ae63-ad85f5572f27.flv',
|
||||
u'md5': u'0914d4d69605090f623b7ac329fea66e',
|
||||
u'info_dict': {
|
||||
u"title": u"Pyramids of Waste (2010), AKA The Lightbulb Conspiracy - Planned obsolescence documentary",
|
||||
u"uploader": u"4v4l0n42",
|
||||
u'description': u'Pyramids of Waste (2010) also known as "The lightbulb conspiracy" is a documentary about how our economic system based on consumerism and planned obsolescence is breaking our planet down.\r\n\r\nSolutions to this can be found at:\r\nhttp://robotswillstealyourjob.com\r\nhttp://www.federicopistono.org\r\n\r\nhttp://opensourceecology.org\r\nhttp://thezeitgeistmovement.com',
|
||||
u'thumbnail': u'http://dotsub.com/media/aed3b8b2-1889-4df5-ae63-ad85f5572f27/p',
|
||||
u'upload_date': u'20101213',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group(1)
|
||||
info_url = "https://dotsub.com/api/media/%s/metadata" %(video_id)
|
||||
webpage = self._download_webpage(info_url, video_id)
|
||||
info = json.loads(webpage)
|
||||
date = time.gmtime(info['dateCreated']/1000) # The timestamp is in miliseconds
|
||||
|
||||
return [{
|
||||
'id': video_id,
|
||||
'url': info['mediaURI'],
|
||||
'ext': 'flv',
|
||||
'title': info['title'],
|
||||
'thumbnail': info['screenshotURI'],
|
||||
'description': info['description'],
|
||||
'uploader': info['user'],
|
||||
'view_count': info['numberOfViews'],
|
||||
'upload_date': u'%04i%02i%02i' % (date.tm_year, date.tm_mon, date.tm_mday),
|
||||
}]
|
84
youtube_dl/extractor/dreisat.py
Normal file
84
youtube_dl/extractor/dreisat.py
Normal file
@@ -0,0 +1,84 @@
|
||||
# coding: utf-8
|
||||
|
||||
import re
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class DreiSatIE(InfoExtractor):
|
||||
IE_NAME = '3sat'
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?3sat.de/mediathek/index.php\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$'
|
||||
_TEST = {
|
||||
u"url": u"http://www.3sat.de/mediathek/index.php?obj=36983",
|
||||
u'file': u'36983.webm',
|
||||
u'md5': u'57c97d0469d71cf874f6815aa2b7c944',
|
||||
u'info_dict': {
|
||||
u"title": u"Kaffeeland Schweiz",
|
||||
u"description": u"Über 80 Kaffeeröstereien liefern in der Schweiz das Getränk, in das das Land so vernarrt ist: Mehr als 1000 Tassen trinkt ein Schweizer pro Jahr. SCHWEIZWEIT nimmt die Kaffeekultur unter die...",
|
||||
u"uploader": u"3sat",
|
||||
u"upload_date": u"20130622"
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
|
||||
details_xml = self._download_webpage(details_url, video_id, note=u'Downloading video details')
|
||||
details_doc = xml.etree.ElementTree.fromstring(details_xml.encode('utf-8'))
|
||||
|
||||
thumbnail_els = details_doc.findall('.//teaserimage')
|
||||
thumbnails = [{
|
||||
'width': te.attrib['key'].partition('x')[0],
|
||||
'height': te.attrib['key'].partition('x')[2],
|
||||
'url': te.text,
|
||||
} for te in thumbnail_els]
|
||||
|
||||
information_el = details_doc.find('.//information')
|
||||
video_title = information_el.find('./title').text
|
||||
video_description = information_el.find('./detail').text
|
||||
|
||||
details_el = details_doc.find('.//details')
|
||||
video_uploader = details_el.find('./channel').text
|
||||
upload_date = unified_strdate(details_el.find('./airtime').text)
|
||||
|
||||
format_els = details_doc.findall('.//formitaet')
|
||||
formats = [{
|
||||
'format_id': fe.attrib['basetype'],
|
||||
'width': int(fe.find('./width').text),
|
||||
'height': int(fe.find('./height').text),
|
||||
'url': fe.find('./url').text,
|
||||
'filesize': int(fe.find('./filesize').text),
|
||||
'video_bitrate': int(fe.find('./videoBitrate').text),
|
||||
'3sat_qualityname': fe.find('./quality').text,
|
||||
} for fe in format_els
|
||||
if not fe.find('./url').text.startswith('http://www.metafilegenerator.de/')]
|
||||
|
||||
def _sortkey(format):
|
||||
qidx = ['low', 'med', 'high', 'veryhigh'].index(format['3sat_qualityname'])
|
||||
prefer_http = 1 if 'rtmp' in format['url'] else 0
|
||||
return (qidx, prefer_http, format['video_bitrate'])
|
||||
formats.sort(key=_sortkey)
|
||||
|
||||
info = {
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
'formats': formats,
|
||||
'description': video_description,
|
||||
'thumbnails': thumbnails,
|
||||
'thumbnail': thumbnails[-1]['url'],
|
||||
'uploader': video_uploader,
|
||||
'upload_date': upload_date,
|
||||
}
|
||||
|
||||
# TODO: Remove when #980 has been merged
|
||||
info['url'] = formats[-1]['url']
|
||||
info['ext'] = determine_ext(formats[-1]['url'])
|
||||
|
||||
return self.video_result(info)
|
@@ -4,14 +4,15 @@ import xml.etree.ElementTree
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
unified_strdate,
|
||||
compat_urllib_parse,
|
||||
)
|
||||
|
||||
class GameSpotIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?gamespot\.com/([^/]+)/videos/([^/]+)-([^/d]+)/'
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?gamespot\.com/.*-(?P<page_id>\d+)/?'
|
||||
_TEST = {
|
||||
u"url": u"http://www.gamespot.com/arma-iii/videos/arma-iii-community-guide-sitrep-i-6410818/",
|
||||
u"file": u"6410818.mp4",
|
||||
u"md5": u"5569d64ca98db01f0177c934fe8c1e9b",
|
||||
u"md5": u"b2a30deaa8654fcccd43713a6b6a4825",
|
||||
u"info_dict": {
|
||||
u"title": u"Arma III - Community Guide: SITREP I",
|
||||
u"upload_date": u"20130627",
|
||||
@@ -21,13 +22,22 @@ class GameSpotIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group(3).split("-")[-1]
|
||||
info_url = "http://www.gamespot.com/pages/video_player/xml.php?id="+str(video_id)
|
||||
page_id = mobj.group('page_id')
|
||||
webpage = self._download_webpage(url, page_id)
|
||||
video_id = self._html_search_regex([r'"og:video" content=".*?\?id=(\d+)"',
|
||||
r'http://www\.gamespot\.com/videoembed/(\d+)'],
|
||||
webpage, 'video id')
|
||||
data = compat_urllib_parse.urlencode({'id': video_id, 'newplayer': '1'})
|
||||
info_url = 'http://www.gamespot.com/pages/video_player/xml.php?' + data
|
||||
info_xml = self._download_webpage(info_url, video_id)
|
||||
doc = xml.etree.ElementTree.fromstring(info_xml)
|
||||
clip_el = doc.find('./playList/clip')
|
||||
|
||||
video_url = clip_el.find('./URI').text
|
||||
http_urls = [{'url': node.find('filePath').text,
|
||||
'rate': int(node.find('rate').text)}
|
||||
for node in clip_el.find('./httpURI')]
|
||||
best_quality = sorted(http_urls, key=lambda f: f['rate'])[-1]
|
||||
video_url = best_quality['url']
|
||||
title = clip_el.find('./title').text
|
||||
ext = video_url.rpartition('.')[2]
|
||||
thumbnail_url = clip_el.find('./screenGrabURI').text
|
||||
|
47
youtube_dl/extractor/veoh.py
Normal file
47
youtube_dl/extractor/veoh.py
Normal file
@@ -0,0 +1,47 @@
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
)
|
||||
|
||||
class VeohIE(InfoExtractor):
|
||||
_VALID_URL = r'http://www\.veoh\.com/watch/v(?P<id>\d*)'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://www.veoh.com/watch/v56314296nk7Zdmz3',
|
||||
u'file': u'56314296.mp4',
|
||||
u'md5': u'620e68e6a3cff80086df3348426c9ca3',
|
||||
u'info_dict': {
|
||||
u'title': u'Straight Backs Are Stronger',
|
||||
u'uploader': u'LUMOback',
|
||||
u'description': u'At LUMOback, we believe straight backs are stronger. The LUMOback Posture & Movement Sensor: It gently vibrates when you slouch, inspiring improved posture and mobility. Use the app to track your data and improve your posture over time. ',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
m_youtube = re.search(r'http://www\.youtube\.com/v/(.*?)(\&|")', webpage)
|
||||
if m_youtube is not None:
|
||||
youtube_id = m_youtube.group(1)
|
||||
self.to_screen(u'%s: detected Youtube video.' % video_id)
|
||||
return self.url_result(youtube_id, 'Youtube')
|
||||
|
||||
self.report_extraction(video_id)
|
||||
info = self._search_regex(r'videoDetailsJSON = \'({.*?})\';', webpage, 'info')
|
||||
info = json.loads(info)
|
||||
video_url = info.get('fullPreviewHashHighPath') or info.get('fullPreviewHashLowPath')
|
||||
|
||||
return {'id': info['videoId'],
|
||||
'title': info['title'],
|
||||
'ext': determine_ext(video_url),
|
||||
'url': video_url,
|
||||
'uploader': info['username'],
|
||||
'thumbnail': info.get('highResImage') or info.get('medResImage'),
|
||||
'description': info['description'],
|
||||
'view_count': info['views'],
|
||||
}
|
@@ -17,6 +17,7 @@ class VimeoIE(InfoExtractor):
|
||||
|
||||
# _VALID_URL matches Vimeo URLs
|
||||
_VALID_URL = r'(?P<proto>https?://)?(?:(?:www|player)\.)?vimeo(?P<pro>pro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)(?:[?].*)?$'
|
||||
_NETRC_MACHINE = 'vimeo'
|
||||
IE_NAME = u'vimeo'
|
||||
_TEST = {
|
||||
u'url': u'http://vimeo.com/56015672',
|
||||
@@ -31,6 +32,25 @@ class VimeoIE(InfoExtractor):
|
||||
}
|
||||
}
|
||||
|
||||
def _login(self):
|
||||
(username, password) = self._get_login_info()
|
||||
if username is None:
|
||||
return
|
||||
self.report_login()
|
||||
login_url = 'https://vimeo.com/log_in'
|
||||
webpage = self._download_webpage(login_url, None, False)
|
||||
token = re.search(r'xsrft: \'(.*?)\'', webpage).group(1)
|
||||
data = compat_urllib_parse.urlencode({'email': username,
|
||||
'password': password,
|
||||
'action': 'login',
|
||||
'service': 'vimeo',
|
||||
'token': token,
|
||||
})
|
||||
login_request = compat_urllib_request.Request(login_url, data)
|
||||
login_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
login_request.add_header('Cookie', 'xsrft=%s' % token)
|
||||
self._download_webpage(login_request, None, False, u'Wrong login info')
|
||||
|
||||
def _verify_video_password(self, url, video_id, webpage):
|
||||
password = self._downloader.params.get('videopassword', None)
|
||||
if password is None:
|
||||
@@ -50,6 +70,9 @@ class VimeoIE(InfoExtractor):
|
||||
u'Verifying the password',
|
||||
u'Wrong password')
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
def _real_extract(self, url, new_video=True):
|
||||
# Extract ID from URL
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
@@ -117,7 +117,19 @@ class YoutubeIE(InfoExtractor):
|
||||
u"uploader": u"IconaPop",
|
||||
u"uploader_id": u"IconaPop"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
u"url": u"https://www.youtube.com/watch?v=07FYdnEawAQ",
|
||||
u"file": u"07FYdnEawAQ.mp4",
|
||||
u"note": u"Test VEVO video with age protection (#956)",
|
||||
u"info_dict": {
|
||||
u"upload_date": u"20130703",
|
||||
u"title": u"Justin Timberlake - Tunnel Vision (Explicit)",
|
||||
u"description": u"md5:64249768eec3bc4276236606ea996373",
|
||||
u"uploader": u"justintimberlakeVEVO",
|
||||
u"uploader_id": u"justintimberlakeVEVO"
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
@@ -131,10 +143,6 @@ class YoutubeIE(InfoExtractor):
|
||||
"""Report attempt to set language."""
|
||||
self.to_screen(u'Setting language')
|
||||
|
||||
def report_login(self):
|
||||
"""Report attempt to log in."""
|
||||
self.to_screen(u'Logging in')
|
||||
|
||||
def report_video_webpage_download(self, video_id):
|
||||
"""Report attempt to download video webpage."""
|
||||
self.to_screen(u'%s: Downloading video webpage' % video_id)
|
||||
@@ -182,7 +190,7 @@ class YoutubeIE(InfoExtractor):
|
||||
elif len(s) == 84:
|
||||
return s[83:36:-1] + s[2] + s[35:26:-1] + s[3] + s[25:3:-1] + s[26]
|
||||
elif len(s) == 83:
|
||||
return s[52] + s[81:55:-1] + s[2] + s[54:52:-1] + s[82] + s[51:36:-1] + s[55] + s[35:2:-1] + s[36]
|
||||
return s[:81]
|
||||
elif len(s) == 82:
|
||||
return s[36] + s[79:67:-1] + s[81] + s[66:40:-1] + s[33] + s[39:36:-1] + s[40] + s[35] + s[0] + s[67] + s[32:0:-1] + s[34]
|
||||
|
||||
@@ -296,26 +304,6 @@ class YoutubeIE(InfoExtractor):
|
||||
if self._downloader is None:
|
||||
return
|
||||
|
||||
username = None
|
||||
password = None
|
||||
downloader_params = self._downloader.params
|
||||
|
||||
# Attempt to use provided username and password or .netrc data
|
||||
if downloader_params.get('username', None) is not None:
|
||||
username = downloader_params['username']
|
||||
password = downloader_params['password']
|
||||
elif downloader_params.get('usenetrc', False):
|
||||
try:
|
||||
info = netrc.netrc().authenticators(self._NETRC_MACHINE)
|
||||
if info is not None:
|
||||
username = info[0]
|
||||
password = info[2]
|
||||
else:
|
||||
raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
|
||||
except (IOError, netrc.NetrcParseError) as err:
|
||||
self._downloader.report_warning(u'parsing .netrc: %s' % compat_str(err))
|
||||
return
|
||||
|
||||
# Set language
|
||||
request = compat_urllib_request.Request(self._LANG_URL)
|
||||
try:
|
||||
@@ -325,6 +313,8 @@ class YoutubeIE(InfoExtractor):
|
||||
self._downloader.report_warning(u'unable to set language: %s' % compat_str(err))
|
||||
return
|
||||
|
||||
(username, password) = self._get_login_info()
|
||||
|
||||
# No authentication to be performed
|
||||
if username is None:
|
||||
return
|
||||
@@ -432,15 +422,35 @@ class YoutubeIE(InfoExtractor):
|
||||
|
||||
# Get video info
|
||||
self.report_video_info_webpage_download(video_id)
|
||||
for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
|
||||
video_info_url = ('https://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
|
||||
% (video_id, el_type))
|
||||
if re.search(r'player-age-gate-content">', video_webpage) is not None:
|
||||
self.report_age_confirmation()
|
||||
age_gate = True
|
||||
# We simulate the access to the video from www.youtube.com/v/{video_id}
|
||||
# this can be viewed without login into Youtube
|
||||
data = compat_urllib_parse.urlencode({'video_id': video_id,
|
||||
'el': 'embedded',
|
||||
'gl': 'US',
|
||||
'hl': 'en',
|
||||
'eurl': 'https://youtube.googleapis.com/v/' + video_id,
|
||||
'asv': 3,
|
||||
'sts':'1588',
|
||||
})
|
||||
video_info_url = 'https://www.youtube.com/get_video_info?' + data
|
||||
video_info_webpage = self._download_webpage(video_info_url, video_id,
|
||||
note=False,
|
||||
errnote='unable to download video info webpage')
|
||||
video_info = compat_parse_qs(video_info_webpage)
|
||||
if 'token' in video_info:
|
||||
break
|
||||
else:
|
||||
age_gate = False
|
||||
for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
|
||||
video_info_url = ('https://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
|
||||
% (video_id, el_type))
|
||||
video_info_webpage = self._download_webpage(video_info_url, video_id,
|
||||
note=False,
|
||||
errnote='unable to download video info webpage')
|
||||
video_info = compat_parse_qs(video_info_webpage)
|
||||
if 'token' in video_info:
|
||||
break
|
||||
if 'token' not in video_info:
|
||||
if 'reason' in video_info:
|
||||
raise ExtractorError(u'YouTube said: %s' % video_info['reason'][0], expected=True)
|
||||
@@ -473,7 +483,12 @@ class YoutubeIE(InfoExtractor):
|
||||
video_title = compat_urllib_parse.unquote_plus(video_info['title'][0])
|
||||
|
||||
# thumbnail image
|
||||
if 'thumbnail_url' not in video_info:
|
||||
# We try first to get a high quality image:
|
||||
m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
|
||||
video_webpage, re.DOTALL)
|
||||
if m_thumb is not None:
|
||||
video_thumbnail = m_thumb.group(1)
|
||||
elif 'thumbnail_url' not in video_info:
|
||||
self._downloader.report_warning(u'unable to extract video thumbnail')
|
||||
video_thumbnail = ''
|
||||
else: # don't panic if we can't find it
|
||||
@@ -562,9 +577,15 @@ class YoutubeIE(InfoExtractor):
|
||||
elif 's' in url_data:
|
||||
if self._downloader.params.get('verbose'):
|
||||
s = url_data['s'][0]
|
||||
player = self._search_regex(r'html5player-(.+?)\.js', video_webpage,
|
||||
'html5 player', fatal=False)
|
||||
self.to_screen('encrypted signature length %d (%d.%d), itag %s, html5 player %s' %
|
||||
if age_gate:
|
||||
player_version = self._search_regex(r'ad3-(.+?)\.swf',
|
||||
video_info['ad3_module'][0], 'flash player',
|
||||
fatal=False)
|
||||
player = 'flash player %s' % player_version
|
||||
else:
|
||||
player = u'html5 player %s' % self._search_regex(r'html5player-(.+?)\.js', video_webpage,
|
||||
'html5 player', fatal=False)
|
||||
self.to_screen('encrypted signature length %d (%d.%d), itag %s, %s' %
|
||||
(len(s), len(s.split('.')[0]), len(s.split('.')[1]), url_data['itag'][0], player))
|
||||
signature = self._decrypt_signature(url_data['s'][0])
|
||||
url += '&signature=' + signature
|
||||
@@ -692,7 +713,7 @@ class YoutubePlaylistIE(InfoExtractor):
|
||||
|
||||
videos = [v[1] for v in sorted(videos)]
|
||||
|
||||
url_results = [self.url_result(url, 'Youtube') for url in videos]
|
||||
url_results = [self.url_result(vurl, 'Youtube') for vurl in videos]
|
||||
return [self.playlist_result(url_results, playlist_id, playlist_title)]
|
||||
|
||||
|
||||
@@ -750,7 +771,7 @@ class YoutubeChannelIE(InfoExtractor):
|
||||
self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
|
||||
|
||||
urls = ['http://www.youtube.com/watch?v=%s' % id for id in video_ids]
|
||||
url_entries = [self.url_result(url, 'Youtube') for url in urls]
|
||||
url_entries = [self.url_result(eurl, 'Youtube') for eurl in urls]
|
||||
return [self.playlist_result(url_entries, channel_id)]
|
||||
|
||||
|
||||
@@ -807,7 +828,7 @@ class YoutubeUserIE(InfoExtractor):
|
||||
pagenum += 1
|
||||
|
||||
urls = ['http://www.youtube.com/watch?v=%s' % video_id for video_id in video_ids]
|
||||
url_results = [self.url_result(url, 'Youtube') for url in urls]
|
||||
url_results = [self.url_result(rurl, 'Youtube') for rurl in urls]
|
||||
return [self.playlist_result(url_results, playlist_title = username)]
|
||||
|
||||
class YoutubeSearchIE(SearchInfoExtractor):
|
||||
@@ -882,6 +903,12 @@ class YoutubeSubscriptionsIE(YoutubeIE):
|
||||
def suitable(cls, url):
|
||||
return re.match(cls._VALID_URL, url) is not None
|
||||
|
||||
def _real_initialize(self):
|
||||
(username, password) = self._get_login_info()
|
||||
if username is None:
|
||||
raise ExtractorError(u'No login info available, needed for downloading the Youtube subscriptions.', expected=True)
|
||||
super(YoutubeSubscriptionsIE, self)._real_initialize()
|
||||
|
||||
def _real_extract(self, url):
|
||||
feed_entries = []
|
||||
# The step argument is available only in 2.7 or higher
|
||||
|
@@ -623,7 +623,7 @@ def unified_strdate(date_str):
|
||||
date_str = date_str.replace(',',' ')
|
||||
# %z (UTC offset) is only supported in python>=3.2
|
||||
date_str = re.sub(r' (\+|-)[\d]*$', '', date_str)
|
||||
format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y', '%Y-%m-%d', '%d/%m/%Y', '%Y/%m/%d %H:%M:%S']
|
||||
format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y', '%Y-%m-%d', '%d/%m/%Y', '%Y/%m/%d %H:%M:%S', '%d.%m.%Y %H:%M']
|
||||
for expression in format_expressions:
|
||||
try:
|
||||
upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
|
||||
@@ -631,6 +631,13 @@ def unified_strdate(date_str):
|
||||
pass
|
||||
return upload_date
|
||||
|
||||
def determine_ext(url):
|
||||
guess = url.partition(u'?')[0].rpartition(u'.')[2]
|
||||
if re.match(r'^[A-Za-z0-9]+$', guess):
|
||||
return guess
|
||||
else:
|
||||
return u'unknown_video'
|
||||
|
||||
def date_from_str(date_str):
|
||||
"""
|
||||
Return a datetime object from a string in the format YYYYMMDD or
|
||||
|
@@ -1,2 +1,2 @@
|
||||
|
||||
__version__ = '2013.07.07.01'
|
||||
__version__ = '2013.07.10'
|
||||
|
Reference in New Issue
Block a user