mirror of
https://github.com/ytdl-org/youtube-dl.git
synced 2025-08-02 10:30:04 -05:00
Compare commits
7 Commits
2014.11.25
...
2014.11.26
Author | SHA1 | Date | |
---|---|---|---|
![]() |
ddfd0f2727 | ||
![]() |
d0720e7118 | ||
![]() |
4e262a8838 | ||
![]() |
b9ed3af343 | ||
![]() |
63c9b2c1d9 | ||
![]() |
65f3a228b1 | ||
![]() |
3004ae2c3a |
1
AUTHORS
1
AUTHORS
@@ -87,3 +87,4 @@ William Sewell
|
||||
Dao Hoang Son
|
||||
Oskar Jauch
|
||||
Matthew Rayfield
|
||||
t0mm0
|
||||
|
@@ -373,6 +373,7 @@ from .syfy import SyfyIE
|
||||
from .sztvhu import SztvHuIE
|
||||
from .tagesschau import TagesschauIE
|
||||
from .tapely import TapelyIE
|
||||
from .tass import TassIE
|
||||
from .teachertube import (
|
||||
TeacherTubeIE,
|
||||
TeacherTubeUserIE,
|
||||
|
@@ -296,9 +296,11 @@ class InfoExtractor(object):
|
||||
content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal)
|
||||
return (content, urlh)
|
||||
|
||||
def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True):
|
||||
def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True, prefix=None):
|
||||
content_type = urlh.headers.get('Content-Type', '')
|
||||
webpage_bytes = urlh.read()
|
||||
if prefix is not None:
|
||||
webpage_bytes = prefix + webpage_bytes
|
||||
m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type)
|
||||
if m:
|
||||
encoding = m.group(1)
|
||||
|
@@ -445,6 +445,30 @@ class GenericIE(InfoExtractor):
|
||||
'title': 'Rosetta #CometLanding webcast HL 10',
|
||||
}
|
||||
},
|
||||
# LazyYT
|
||||
{
|
||||
'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
|
||||
'info_dict': {
|
||||
'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
|
||||
},
|
||||
'playlist_mincount': 2,
|
||||
},
|
||||
# Direct link with incorrect MIME type
|
||||
{
|
||||
'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
|
||||
'md5': '4ccbebe5f36706d85221f204d7eb5913',
|
||||
'info_dict': {
|
||||
'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
|
||||
'id': '5_Lennart_Poettering_-_Systemd',
|
||||
'ext': 'webm',
|
||||
'title': '5_Lennart_Poettering_-_Systemd',
|
||||
'upload_date': '20141120',
|
||||
},
|
||||
'expected_warnings': [
|
||||
'URL could be a direct video link, returning it as such.'
|
||||
]
|
||||
}
|
||||
|
||||
]
|
||||
|
||||
def report_following_redirect(self, new_url):
|
||||
@@ -598,10 +622,28 @@ class GenericIE(InfoExtractor):
|
||||
if not self._downloader.params.get('test', False) and not is_intentional:
|
||||
self._downloader.report_warning('Falling back on generic information extractor.')
|
||||
|
||||
if full_response:
|
||||
webpage = self._webpage_read_content(full_response, url, video_id)
|
||||
else:
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
if not full_response:
|
||||
full_response = self._request_webpage(url, video_id)
|
||||
|
||||
# Maybe it's a direct link to a video?
|
||||
# Be careful not to download the whole thing!
|
||||
first_bytes = full_response.read(512)
|
||||
if not re.match(r'^\s*<', first_bytes.decode('utf-8', 'replace')):
|
||||
self._downloader.report_warning(
|
||||
'URL could be a direct video link, returning it as such.')
|
||||
upload_date = unified_strdate(
|
||||
head_response.headers.get('Last-Modified'))
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': os.path.splitext(url_basename(url))[0],
|
||||
'direct': True,
|
||||
'url': url,
|
||||
'upload_date': upload_date,
|
||||
}
|
||||
|
||||
webpage = self._webpage_read_content(
|
||||
full_response, url, video_id, prefix=first_bytes)
|
||||
|
||||
self.report_extraction(video_id)
|
||||
|
||||
# Is it an RSS feed?
|
||||
@@ -702,6 +744,12 @@ class GenericIE(InfoExtractor):
|
||||
return _playlist_from_matches(
|
||||
matches, lambda m: unescapeHTML(m[1]))
|
||||
|
||||
# Look for lazyYT YouTube embed
|
||||
matches = re.findall(
|
||||
r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)
|
||||
if matches:
|
||||
return _playlist_from_matches(matches, lambda m: unescapeHTML(m))
|
||||
|
||||
# Look for embedded Dailymotion player
|
||||
matches = re.findall(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/embed/video/.+?)\1', webpage)
|
||||
|
62
youtube_dl/extractor/tass.py
Normal file
62
youtube_dl/extractor/tass.py
Normal file
@@ -0,0 +1,62 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
js_to_json,
|
||||
qualities,
|
||||
)
|
||||
|
||||
|
||||
class TassIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:tass\.ru|itar-tass\.com)/[^/]+/(?P<id>\d+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://tass.ru/obschestvo/1586870',
|
||||
'md5': '3b4cdd011bc59174596b6145cda474a4',
|
||||
'info_dict': {
|
||||
'id': '1586870',
|
||||
'ext': 'mp4',
|
||||
'title': 'Посетителям московского зоопарка показали красную панду',
|
||||
'description': 'Приехавшую из Дублина Зейну можно увидеть в павильоне "Кошки тропиков"',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://itar-tass.com/obschestvo/1600009',
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
sources = json.loads(js_to_json(self._search_regex(
|
||||
r'(?s)sources\s*:\s*(\[.+?\])', webpage, 'sources')))
|
||||
|
||||
quality = qualities(['sd', 'hd'])
|
||||
|
||||
formats = []
|
||||
for source in sources:
|
||||
video_url = source.get('file')
|
||||
if not video_url or not video_url.startswith('http') or not video_url.endswith('.mp4'):
|
||||
continue
|
||||
label = source.get('label')
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'format_id': label,
|
||||
'quality': quality(label),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._og_search_title(webpage),
|
||||
'description': self._og_search_description(webpage),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'formats': formats,
|
||||
}
|
@@ -1,2 +1,2 @@
|
||||
|
||||
__version__ = '2014.11.25'
|
||||
__version__ = '2014.11.26'
|
||||
|
Reference in New Issue
Block a user