Compare commits

...

67 Commits

Author SHA1 Message Date
f4d66a99cf release 2014.07.10 2014-07-10 14:49:16 +02:00
411f691b21 [mpora] Fix player regex 2014-07-09 19:12:42 +07:00
6e1e0e4b5b [veoh] Skip deleted test video 2014-07-08 20:22:27 +07:00
1aac03797e [ninegag] Fix extraction 2014-07-07 20:12:59 +07:00
459af43494 [arte] Manually set the rtmp play_path (fix #3198)
rtmpdump doesn't parse it right
2014-07-07 14:10:57 +02:00
f4f7e3cf41 Merge branch 'master' of github.com:rg3/youtube-dl 2014-07-06 20:57:05 +02:00
1fd015516e [newstube] Replace test 2014-07-06 19:32:13 +07:00
76bafa8ffe [newstube] Capture error message 2014-07-06 18:53:31 +07:00
8d5797b00f [YoutubeDL] Show download URL when -v is set
This will allow us to debug issues like #3204
2014-07-06 11:28:51 +02:00
7571c02c8a [generic] Set default-search to error
This prevents users from submitting bug reports where they mistyped a URL, and prevents me from getting a weird video when holding shift and thus searching for :Tds
2014-07-06 11:22:44 +02:00
49cbe7c8e3 [allocine] add extractor for allocine.fr (fixes #3189) 2014-07-05 14:42:26 +02:00
ba4133c9eb Credit @hakatashi for #3181 #3182 2014-07-04 22:30:43 +07:00
b67f1840a1 [niconico] Remove unused import 2014-07-04 22:26:56 +07:00
165c46690f Merge pull request #3180 from hakatashi/niconico-without-authentication
[niconico] Download without authentication
2014-07-04 22:25:05 +07:00
16bc9ab601 Merge branch 'hakatashi-niconico-channel-video' 2014-07-04 22:06:31 +07:00
15ce1338b4 [niconico] Extract more metadata and simplify (Closes #3181) 2014-07-04 22:05:46 +07:00
0ff30c5333 Merge branch 'niconico-channel-video' of https://github.com/hakatashi/youtube-dl into hakatashi-niconico-channel-video 2014-07-04 21:39:54 +07:00
6feb2d5e80 [youtube:search_url] Update regexes 2014-07-04 19:21:19 +07:00
1e07fea200 [teachertube] Add support for new video URL format 2014-07-03 21:11:56 +07:00
7aeb67b39b [teachertube:user:collection] Update media regex 2014-07-03 21:08:44 +07:00
93881db22a [anitube] Modernize 2014-07-02 19:24:01 +07:00
64ed7a38f9 [niconico] Add support for channel video 2014-07-02 03:13:12 +09:00
2fd466fcfc [niconico] Download without authentication 2014-07-02 02:32:54 +09:00
dc2fc73691 [youtube:truncated_url] Move test to extractor 2014-07-01 15:49:34 +02:00
c4808c6009 [youtube_truncated_url] Add support for truncated watch URLs with annotations (#3178) 2014-07-01 15:49:16 +02:00
c67f584eb3 [rai] Skip test 2014-07-01 19:24:18 +07:00
29f6ed78e8 [tagesschau] replace 404 test 2014-07-01 10:35:49 +02:00
7807ee664d [wdr] fix test 2014-07-01 09:59:57 +02:00
d518d06efd [vk] Skip georestricted ivi embed test 2014-06-30 03:16:31 +07:00
25a0cc44b9 [teachertube:user] fix regex 2014-06-29 20:33:46 +02:00
825cdcec3c Merge branch 'master' of github.com:rg3/youtube-dl 2014-06-29 16:44:37 +02:00
41b610acab [GooglePlus] fix video title extraction 2014-06-29 16:43:31 +02:00
0364fa8b65 [generic] Add support for ivi.ru embedded player 2014-06-29 20:18:23 +07:00
849086a1ae [vk] Better support for embeds 2014-06-29 20:07:59 +07:00
36fbc6887f [ivi] Add support for embedded URLs 2014-06-29 20:06:47 +07:00
a8a98e43f2 [vk] Add support for mobile URLs 2014-06-29 19:51:00 +07:00
57bdc730e2 [vk] Add support for more URL formats (#3172) 2014-06-29 19:33:39 +07:00
31a196d7f5 [TeacherTube] add user + collection, removed classrooms 2014-06-29 13:45:10 +02:00
9b27e6c3b4 [Tumblr] fix encoding (PEP0263) 2014-06-29 09:32:53 +02:00
62f1f9507f [Tumblr] fix test + add description 2014-06-29 09:08:46 +02:00
ee8dda41ae [Toypics] support https urls 2014-06-29 08:21:23 +02:00
01ba178097 [vk] Update test 2014-06-29 04:51:47 +07:00
78ff59d052 [Motherless] simplify 2014-06-28 20:02:02 +02:00
f3f1cd6b3b Merge pull request #3167 from Schnouki/motherless
* mother/motherless:
  [Motherless] Add new extractor
2014-06-28 19:12:31 +02:00
803540e811 [drtv] Add missing extractor import 2014-06-28 17:36:13 +07:00
458ade6361 [ArteTVFuture] fix empty formats list 2014-06-28 10:22:53 +02:00
a69969ee05 [Motherless] Add new extractor 2014-06-27 18:12:11 +02:00
f2b8db57eb [drtv] Add extractor for DR TV (Closes #3126) 2014-06-27 20:53:59 +07:00
331ae266ff [npo] Add extractor (closes #3145) 2014-06-26 20:30:44 +02:00
4242001863 release 2014.06.26 2014-06-26 16:44:01 +02:00
78338f71ca [livestream:original] Add support for folder urls (closes #2631)
The webpage only contains shortened links for the videos, since the server
doesn't support HEAD requests, we use an specific extractor for them.
2014-06-26 16:34:36 +02:00
f5172a3084 [teachertube] Add support for new URL formats 2014-06-26 20:01:59 +07:00
c7df67edbd [teachertube] Improve extraction 2014-06-26 20:00:47 +07:00
d410fee91d [VideoTt] fix ValueError (#3161) 2014-06-26 07:35:47 +02:00
ba7aa464de [soundgasm] PEP8 and add a display_id (#3155) 2014-06-25 23:47:38 +02:00
8333034dce Merge remote-tracking branch 'pachacamac/soundgasm' 2014-06-25 23:45:03 +02:00
637b6af80f release 2014.06.25 2014-06-25 21:24:01 +02:00
1044f8afd2 [Soundgasm] Add new extractor 2014-06-25 18:07:23 +02:00
2f775107f9 Merge branch 'master' of github.com:rg3/youtube-dl 2014-06-25 17:45:24 +02:00
85342674b2 [Dailymotion] fix uploader name (fixes #3153) 2014-06-25 17:44:19 +02:00
fd69098a45 [rutube] Update playlist tests 2014-06-25 19:06:11 +07:00
8867f908fc Merge pull request #3148 from crazedpsyc/master
[BlipTV] Allow plus sign in video ID
2014-06-25 07:14:04 +02:00
b7c33124c8 [BlipTV] Allow plus sign in video ID 2014-06-24 17:55:08 -06:00
89a8c423c7 Merge pull request #3146 from pvdl/patch-1
[discovery] Change default url
2014-06-24 18:11:26 +02:00
cea2582df2 [discovery] Change default url
URL does a redirect from dsc.discovery.com to www.discovery.com
This commit fixes the correct URL.
2014-06-24 17:41:53 +02:00
e423e0baaa [wistia] Add duration and modernize 2014-06-24 19:34:39 +07:00
60b2dd1285 [comedycentral] Correct handling when latest tds episode is a special-episode instead of a regular one 2014-06-24 10:50:41 +02:00
39 changed files with 741 additions and 141 deletions

View File

@ -70,8 +70,9 @@ which means you can modify it, redistribute it or use it however you like.
--default-search PREFIX Use this prefix for unqualified URLs. For
example "gvsearch2:" downloads two videos
from google videos for youtube-dl "large
apple". By default (with value "auto")
youtube-dl guesses.
apple". Use the value "auto" to let
youtube-dl guess. The default value "error"
just throws an error.
--ignore-config Do not read configuration files. When given
in the global configuration file /etc
/youtube-dl.conf: do not read the user

View File

@ -69,9 +69,6 @@ class TestAllURLsMatching(unittest.TestCase):
def test_youtube_show_matching(self):
self.assertMatch('http://www.youtube.com/show/airdisasters', ['youtube:show'])
def test_youtube_truncated(self):
self.assertMatch('http://www.youtube.com/watch?', ['youtube:truncated_url'])
def test_youtube_search_matching(self):
self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url'])
self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url'])

View File

@ -28,8 +28,9 @@ from youtube_dl.extractor import (
SoundcloudSetIE,
SoundcloudUserIE,
SoundcloudPlaylistIE,
TeacherTubeClassroomIE,
TeacherTubeUserIE,
LivestreamIE,
LivestreamOriginalIE,
NHLVideocenterIE,
BambuserChannelIE,
BandcampAlbumIE,
@ -40,6 +41,7 @@ from youtube_dl.extractor import (
KhanAcademyIE,
EveryonesMixtapeIE,
RutubeChannelIE,
RutubePersonIE,
GoogleSearchIE,
GenericIE,
TEDIE,
@ -154,6 +156,14 @@ class TestPlaylists(unittest.TestCase):
self.assertEqual(result['title'], 'TEDCity2.0 (English)')
self.assertTrue(len(result['entries']) >= 4)
def test_livestreamoriginal_folder(self):
dl = FakeYDL()
ie = LivestreamOriginalIE(dl)
result = ie.extract('https://www.livestream.com/newplay/folder?dirId=a07bf706-d0e4-4e75-a747-b021d84f2fd3')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], 'a07bf706-d0e4-4e75-a747-b021d84f2fd3')
self.assertTrue(len(result['entries']) >= 28)
def test_nhl_videocenter(self):
dl = FakeYDL()
ie = NHLVideocenterIE(dl)
@ -256,10 +266,18 @@ class TestPlaylists(unittest.TestCase):
def test_rutube_channel(self):
dl = FakeYDL()
ie = RutubeChannelIE(dl)
result = ie.extract('http://rutube.ru/tags/video/1409')
result = ie.extract('http://rutube.ru/tags/video/1800/')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], '1409')
self.assertTrue(len(result['entries']) >= 34)
self.assertEqual(result['id'], '1800')
self.assertTrue(len(result['entries']) >= 68)
def test_rutube_person(self):
dl = FakeYDL()
ie = RutubePersonIE(dl)
result = ie.extract('http://rutube.ru/video/person/313878/')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], '313878')
self.assertTrue(len(result['entries']) >= 37)
def test_multiple_brightcove_videos(self):
# https://github.com/rg3/youtube-dl/issues/2283
@ -361,13 +379,13 @@ class TestPlaylists(unittest.TestCase):
result['title'], 'Brace Yourself - Today\'s Weirdest News')
self.assertTrue(len(result['entries']) >= 10)
def test_TeacherTubeClassroom(self):
def test_TeacherTubeUser(self):
dl = FakeYDL()
ie = TeacherTubeClassroomIE(dl)
result = ie.extract('http://www.teachertube.com/view_classroom.php?user=rbhagwati2')
ie = TeacherTubeUserIE(dl)
result = ie.extract('http://www.teachertube.com/user/profile/rbhagwati2')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], 'rbhagwati2')
self.assertTrue(len(result['entries']) >= 20)
self.assertTrue(len(result['entries']) >= 179)
if __name__ == '__main__':
unittest.main()

View File

@ -993,6 +993,8 @@ class YoutubeDL(object):
fd = get_suitable_downloader(info)(self, self.params)
for ph in self._progress_hooks:
fd.add_progress_hook(ph)
if self.params.get('verbose'):
self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
return fd.download(name, info)
if info_dict.get('requested_formats') is not None:
downloaded = []

View File

@ -59,6 +59,7 @@ __authors__ = (
'Adam Thalhammer',
'Georg Jähnig',
'Ralf Haring',
'Koki Takahashi',
)
__license__ = 'Public Domain'
@ -269,7 +270,7 @@ def parseOpts(overrideArguments=None):
general.add_option(
'--default-search',
dest='default_search', metavar='PREFIX',
help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". By default (with value "auto") youtube-dl guesses.')
help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". Use the value "auto" to let youtube-dl guess. The default value "error" just throws an error.')
general.add_option(
'--ignore-config',
action='store_true',

View File

@ -3,6 +3,7 @@ from .addanime import AddAnimeIE
from .aftonbladet import AftonbladetIE
from .anitube import AnitubeIE
from .aol import AolIE
from .allocine import AllocineIE
from .aparat import AparatIE
from .appletrailers import AppleTrailersIE
from .archiveorg import ArchiveOrgIE
@ -63,6 +64,7 @@ from .dailymotion import (
from .daum import DaumIE
from .dotsub import DotsubIE
from .dreisat import DreiSatIE
from .drtv import DRTVIE
from .defense import DefenseGouvFrIE
from .discovery import DiscoveryIE
from .divxstage import DivxStageIE
@ -147,7 +149,11 @@ from .ku6 import Ku6IE
from .la7 import LA7IE
from .lifenews import LifeNewsIE
from .liveleak import LiveLeakIE
from .livestream import LivestreamIE, LivestreamOriginalIE
from .livestream import (
LivestreamIE,
LivestreamOriginalIE,
LivestreamShortenerIE,
)
from .lynda import (
LyndaIE,
LyndaCourseIE
@ -165,6 +171,7 @@ from .mpora import MporaIE
from .mofosex import MofosexIE
from .mooshare import MooshareIE
from .morningstar import MorningstarIE
from .motherless import MotherlessIE
from .motorsport import MotorsportIE
from .moviezine import MoviezineIE
from .movshare import MovShareIE
@ -197,6 +204,7 @@ from .normalboots import NormalbootsIE
from .novamov import NovaMovIE
from .nowness import NownessIE
from .nowvideo import NowVideoIE
from .npo import NPOIE
from .nrk import (
NRKIE,
NRKTVIE,
@ -255,6 +263,7 @@ from .soundcloud import (
SoundcloudUserIE,
SoundcloudPlaylistIE
)
from .soundgasm import SoundgasmIE
from .southparkstudios import (
SouthParkStudiosIE,
SouthparkDeIE,
@ -274,7 +283,7 @@ from .sztvhu import SztvHuIE
from .tagesschau import TagesschauIE
from .teachertube import (
TeacherTubeIE,
TeacherTubeClassroomIE,
TeacherTubeUserIE,
)
from .teachingchannel import TeachingChannelIE
from .teamcoco import TeamcocoIE

View File

@ -0,0 +1,89 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import re
import json
from .common import InfoExtractor
from ..utils import (
compat_str,
qualities,
determine_ext,
)
class AllocineIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?allocine\.fr/(?P<typ>article|video|film)/(fichearticle_gen_carticle=|player_gen_cmedia=|fichefilm_gen_cfilm=)(?P<id>[0-9]+)(?:\.html)?'
_TESTS = [{
'url': 'http://www.allocine.fr/article/fichearticle_gen_carticle=18635087.html',
'md5': '0c9fcf59a841f65635fa300ac43d8269',
'info_dict': {
'id': '19546517',
'ext': 'mp4',
'title': 'Astérix - Le Domaine des Dieux Teaser VF',
'description': 'md5:4a754271d9c6f16c72629a8a993ee884',
'thumbnail': 're:http://.*\.jpg',
},
}, {
'url': 'http://www.allocine.fr/video/player_gen_cmedia=19540403&cfilm=222257.html',
'md5': 'd0cdce5d2b9522ce279fdfec07ff16e0',
'info_dict': {
'id': '19540403',
'ext': 'mp4',
'title': 'Planes 2 Bande-annonce VF',
'description': 'md5:c4b1f7bd682a91de6491ada267ec0f4d',
'thumbnail': 're:http://.*\.jpg',
},
}, {
'url': 'http://www.allocine.fr/film/fichefilm_gen_cfilm=181290.html',
'md5': '101250fb127ef9ca3d73186ff22a47ce',
'info_dict': {
'id': '19544709',
'ext': 'mp4',
'title': 'Dragons 2 - Bande annonce finale VF',
'description': 'md5:e74a4dc750894bac300ece46c7036490',
'thumbnail': 're:http://.*\.jpg',
},
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
typ = mobj.group('typ')
display_id = mobj.group('id')
webpage = self._download_webpage(url, display_id)
if typ == 'film':
video_id = self._search_regex(r'href="/video/player_gen_cmedia=([0-9]+).+"', webpage, 'video id')
else:
player = self._search_regex(r'data-player=\'([^\']+)\'>', webpage, 'data player')
player_data = json.loads(player)
video_id = compat_str(player_data['refMedia'])
xml = self._download_xml('http://www.allocine.fr/ws/AcVisiondataV4.ashx?media=%s' % video_id, display_id)
video = xml.find('.//AcVisionVideo').attrib
quality = qualities(['ld', 'md', 'hd'])
formats = []
for k, v in video.items():
if re.match(r'.+_path', k):
format_id = k.split('_')[0]
formats.append({
'format_id': format_id,
'quality': quality(format_id),
'url': v,
'ext': determine_ext(v),
})
self._sort_formats(formats)
return {
'id': video_id,
'title': video['videoTitle'],
'thumbnail': self._og_search_thumbnail(webpage),
'formats': formats,
'description': self._og_search_description(webpage),
}

View File

@ -1,22 +1,24 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
class AnitubeIE(InfoExtractor):
IE_NAME = u'anitube.se'
IE_NAME = 'anitube.se'
_VALID_URL = r'https?://(?:www\.)?anitube\.se/video/(?P<id>\d+)'
_TEST = {
u'url': u'http://www.anitube.se/video/36621',
u'md5': u'59d0eeae28ea0bc8c05e7af429998d43',
u'file': u'36621.mp4',
u'info_dict': {
u'id': u'36621',
u'ext': u'mp4',
u'title': u'Recorder to Randoseru 01',
'url': 'http://www.anitube.se/video/36621',
'md5': '59d0eeae28ea0bc8c05e7af429998d43',
'info_dict': {
'id': '36621',
'ext': 'mp4',
'title': 'Recorder to Randoseru 01',
'duration': 180.19,
},
u'skip': u'Blocked in the US',
'skip': 'Blocked in the US',
}
def _real_extract(self, url):
@ -24,13 +26,15 @@ class AnitubeIE(InfoExtractor):
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
key = self._html_search_regex(r'http://www\.anitube\.se/embed/([A-Za-z0-9_-]*)',
webpage, u'key')
key = self._html_search_regex(
r'http://www\.anitube\.se/embed/([A-Za-z0-9_-]*)', webpage, 'key')
config_xml = self._download_xml('http://www.anitube.se/nuevo/econfig.php?key=%s' % key,
key)
config_xml = self._download_xml(
'http://www.anitube.se/nuevo/econfig.php?key=%s' % key, key)
video_title = config_xml.find('title').text
thumbnail = config_xml.find('image').text
duration = float(config_xml.find('duration').text)
formats = []
video_url = config_xml.find('file')
@ -49,5 +53,7 @@ class AnitubeIE(InfoExtractor):
return {
'id': video_id,
'title': video_title,
'thumbnail': thumbnail,
'duration': duration,
'formats': formats
}

View File

@ -39,7 +39,10 @@ class ArteTvIE(InfoExtractor):
formats = [{
'forma_id': q.attrib['quality'],
'url': q.text,
# The playpath starts at 'mp4:', if we don't manually
# split the url, rtmpdump will incorrectly parse them
'url': q.text.split('mp4:', 1)[0],
'play_path': 'mp4:' + q.text.split('mp4:', 1)[1],
'ext': 'flv',
'quality': 2 if q.attrib['quality'] == 'hd' else 1,
} for q in config.findall('./urls/url')]
@ -111,7 +114,7 @@ class ArteTVPlus7IE(InfoExtractor):
if not formats:
# Some videos are only available in the 'Originalversion'
# they aren't tagged as being in French or German
if all(f['versionCode'] == 'VO' for f in all_formats):
if all(f['versionCode'] == 'VO' or f['versionCode'] == 'VA' for f in all_formats):
formats = all_formats
else:
raise ExtractorError(u'The formats list is empty')
@ -189,9 +192,10 @@ class ArteTVFutureIE(ArteTVPlus7IE):
_TEST = {
'url': 'http://future.arte.tv/fr/sujet/info-sciences#article-anchor-7081',
'info_dict': {
'id': '050940-003',
'id': '5201',
'ext': 'mp4',
'title': 'Les champignons au secours de la planète',
'upload_date': '20131101',
},
}

View File

@ -15,7 +15,7 @@ from ..utils import (
class BlipTVIE(SubtitlesInfoExtractor):
_VALID_URL = r'https?://(?:\w+\.)?blip\.tv/(?:(?:.+-|rss/flash/)(?P<id>\d+)|((?:play/|api\.swf#)(?P<lookup_id>[\da-zA-Z]+)))'
_VALID_URL = r'https?://(?:\w+\.)?blip\.tv/(?:(?:.+-|rss/flash/)(?P<id>\d+)|((?:play/|api\.swf#)(?P<lookup_id>[\da-zA-Z+]+)))'
_TESTS = [
{

View File

@ -130,7 +130,7 @@ class ComedyCentralShowsIE(InfoExtractor):
raise ExtractorError('Invalid redirected URL: ' + url)
if mobj.group('episode') == '':
raise ExtractorError('Redirected URL is still not specific: ' + url)
epTitle = mobj.group('episode').rpartition('/')[-1]
epTitle = (mobj.group('episode') or mobj.group('videotitle')).rpartition('/')[-1]
mMovieParams = re.findall('(?:<param name="movie" value="|var url = ")(http://media.mtvnservices.com/([^"]*(?:episode|video).*?:.*?))"', webpage)
if len(mMovieParams) == 0:

View File

@ -459,6 +459,9 @@ class InfoExtractor(object):
if secure: regexes = self._og_regexes('video:secure_url') + regexes
return self._html_search_regex(regexes, html, name, **kargs)
def _og_search_url(self, html, **kargs):
return self._og_search_property('url', html, **kargs)
def _html_search_meta(self, name, html, display_name=None, fatal=False):
if display_name is None:
display_name = name

View File

@ -150,7 +150,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
return {
'id': video_id,
'formats': formats,
'uploader': info['owner_screenname'],
'uploader': info['owner.screenname'],
'upload_date': video_upload_date,
'title': self._og_search_title(webpage),
'subtitles': video_subtitles,

View File

@ -7,9 +7,9 @@ from .common import InfoExtractor
class DiscoveryIE(InfoExtractor):
_VALID_URL = r'http://dsc\.discovery\.com\/[a-zA-Z0-9\-]*/[a-zA-Z0-9\-]*/videos/(?P<id>[a-zA-Z0-9\-]*)(.htm)?'
_VALID_URL = r'http://www\.discovery\.com\/[a-zA-Z0-9\-]*/[a-zA-Z0-9\-]*/videos/(?P<id>[a-zA-Z0-9\-]*)(.htm)?'
_TEST = {
'url': 'http://dsc.discovery.com/tv-shows/mythbusters/videos/mission-impossible-outtakes.htm',
'url': 'http://www.discovery.com/tv-shows/mythbusters/videos/mission-impossible-outtakes.htm',
'md5': 'e12614f9ee303a6ccef415cb0793eba2',
'info_dict': {
'id': '614784',

View File

@ -0,0 +1,91 @@
from __future__ import unicode_literals
import re
from .subtitles import SubtitlesInfoExtractor
from .common import ExtractorError
from ..utils import parse_iso8601
class DRTVIE(SubtitlesInfoExtractor):
_VALID_URL = r'http://(?:www\.)?dr\.dk/tv/se/[^/]+/(?P<id>[\da-z-]+)'
_TEST = {
'url': 'http://www.dr.dk/tv/se/partiets-mand/partiets-mand-7-8',
'md5': '4a7e1dd65cdb2643500a3f753c942f25',
'info_dict': {
'id': 'partiets-mand-7-8',
'ext': 'mp4',
'title': 'Partiets mand (7:8)',
'description': 'md5:a684b90a8f9336cd4aab94b7647d7862',
'timestamp': 1403047940,
'upload_date': '20140617',
'duration': 1299.040,
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
programcard = self._download_json(
'http://www.dr.dk/mu/programcard/expanded/%s' % video_id, video_id, 'Downloading video JSON')
data = programcard['Data'][0]
title = data['Title']
description = data['Description']
timestamp = parse_iso8601(data['CreatedTime'][:-5])
thumbnail = None
duration = None
restricted_to_denmark = False
formats = []
subtitles = {}
for asset in data['Assets']:
if asset['Kind'] == 'Image':
thumbnail = asset['Uri']
elif asset['Kind'] == 'VideoResource':
duration = asset['DurationInMilliseconds'] / 1000.0
restricted_to_denmark = asset['RestrictedToDenmark']
for link in asset['Links']:
target = link['Target']
uri = link['Uri']
formats.append({
'url': uri + '?hdcore=3.3.0&plugin=aasp-3.3.0.99.43' if target == 'HDS' else uri,
'format_id': target,
'ext': link['FileFormat'],
'preference': -1 if target == 'HDS' else -2,
})
subtitles_list = asset.get('SubtitlesList')
if isinstance(subtitles_list, list):
LANGS = {
'Danish': 'dk',
}
for subs in subtitles_list:
lang = subs['Language']
subtitles[LANGS.get(lang, lang)] = subs['Uri']
if not formats and restricted_to_denmark:
raise ExtractorError(
'Unfortunately, DR is not allowed to show this program outside Denmark.', expected=True)
self._sort_formats(formats)
if self._downloader.params.get('listsubtitles', False):
self._list_available_subtitles(video_id, subtitles)
return
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'timestamp': timestamp,
'duration': duration,
'formats': formats,
'subtitles': self.extract_subtitles(video_id, subtitles),
}

View File

@ -383,7 +383,7 @@ class GenericIE(InfoExtractor):
if not parsed_url.scheme:
default_search = self._downloader.params.get('default_search')
if default_search is None:
default_search = 'auto_warning'
default_search = 'error'
if default_search in ('auto', 'auto_warning'):
if '/' in url:
@ -397,8 +397,13 @@ class GenericIE(InfoExtractor):
expected=True)
else:
self._downloader.report_warning(
'Falling back to youtube search for %s . Set --default-search to "auto" to suppress this warning.' % url)
'Falling back to youtube search for %s . Set --default-search "auto" to suppress this warning.' % url)
return self.url_result('ytsearch:' + url)
elif default_search == 'error':
raise ExtractorError(
('%r is not a valid URL. '
'Set --default-search "ytseach" (or run youtube-dl "ytsearch:%s" ) to search YouTube'
) % (url, url), expected=True)
else:
assert ':' in default_search
return self.url_result(default_search + url)
@ -620,6 +625,11 @@ class GenericIE(InfoExtractor):
if mobj is not None:
return self.url_result(mobj.group('url'), 'VK')
# Look for embedded ivi player
mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
if mobj is not None:
return self.url_result(mobj.group('url'), 'Ivi')
# Look for embedded Huffington Post player
mobj = re.search(
r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)

View File

@ -52,8 +52,7 @@ class GooglePlusIE(InfoExtractor):
# Extract title
# Get the first line for title
video_title = self._html_search_regex(r'<meta name\=\"Description\" content\=\"(.*?)[\n<"]',
webpage, 'title', default='NA')
video_title = self._og_search_description(webpage).splitlines()[0]
# Step 2, Simulate clicking the image box to launch video
DOMAIN = 'https://plus.google.com/'

View File

@ -14,7 +14,7 @@ from ..utils import (
class IviIE(InfoExtractor):
IE_DESC = 'ivi.ru'
IE_NAME = 'ivi'
_VALID_URL = r'https?://(?:www\.)?ivi\.ru/watch(?:/(?P<compilationid>[^/]+))?/(?P<videoid>\d+)'
_VALID_URL = r'https?://(?:www\.)?ivi\.ru/(?:watch/(?:[^/]+/)?|video/player\?.*?videoId=)(?P<videoid>\d+)'
_TESTS = [
# Single movie

View File

@ -9,6 +9,7 @@ from ..utils import (
compat_urlparse,
xpath_with_ns,
compat_str,
orderedSet,
)
@ -64,7 +65,10 @@ class LivestreamIE(InfoExtractor):
# The original version of Livestream uses a different system
class LivestreamOriginalIE(InfoExtractor):
IE_NAME = 'livestream:original'
_VALID_URL = r'https?://www\.livestream\.com/(?P<user>[^/]+)/video\?.*?clipId=(?P<id>.*?)(&|$)'
_VALID_URL = r'''(?x)https?://www\.livestream\.com/
(?P<user>[^/]+)/(?P<type>video|folder)
(?:\?.*?Id=|/)(?P<id>.*?)(&|$)
'''
_TEST = {
'url': 'http://www.livestream.com/dealbook/video?clipId=pla_8aa4a3f1-ba15-46a4-893b-902210e138fb',
'info_dict': {
@ -78,10 +82,7 @@ class LivestreamOriginalIE(InfoExtractor):
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
user = mobj.group('user')
def _extract_video(self, user, video_id):
api_url = 'http://x{0}x.api.channel.livestream.com/2.0/clipdetails?extendedInfo=true&id={1}'.format(user, video_id)
info = self._download_xml(api_url, video_id)
@ -99,3 +100,44 @@ class LivestreamOriginalIE(InfoExtractor):
'ext': 'flv',
'thumbnail': thumbnail_url,
}
def _extract_folder(self, url, folder_id):
webpage = self._download_webpage(url, folder_id)
urls = orderedSet(re.findall(r'<a href="(https?://livestre\.am/.*?)"', webpage))
return {
'_type': 'playlist',
'id': folder_id,
'entries': [{
'_type': 'url',
'url': video_url,
} for video_url in urls],
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
id = mobj.group('id')
user = mobj.group('user')
url_type = mobj.group('type')
if url_type == 'folder':
return self._extract_folder(url, id)
else:
return self._extract_video(user, id)
# The server doesn't support HEAD request, the generic extractor can't detect
# the redirection
class LivestreamShortenerIE(InfoExtractor):
IE_NAME = 'livestream:shortener'
IE_DESC = False # Do not list
_VALID_URL = r'https?://livestre\.am/(?P<id>.+)'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
id = mobj.group('id')
webpage = self._download_webpage(url, id)
return {
'_type': 'url',
'url': self._og_search_url(webpage),
}

View File

@ -0,0 +1,87 @@
from __future__ import unicode_literals
import datetime
import re
from .common import InfoExtractor
from ..utils import (
int_or_none,
unified_strdate,
)
class MotherlessIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?motherless\.com/(?P<id>[A-Z0-9]+)'
_TESTS = [
{
'url': 'http://motherless.com/AC3FFE1',
'md5': '5527fef81d2e529215dad3c2d744a7d9',
'info_dict': {
'id': 'AC3FFE1',
'ext': 'flv',
'title': 'Fucked in the ass while playing PS3',
'categories': ['Gaming', 'anal', 'reluctant', 'rough', 'Wife'],
'upload_date': '20100913',
'uploader_id': 'famouslyfuckedup',
'thumbnail': 're:http://.*\.jpg',
'age_limit': 18,
}
},
{
'url': 'http://motherless.com/532291B',
'md5': 'bc59a6b47d1f958e61fbd38a4d31b131',
'info_dict': {
'id': '532291B',
'ext': 'mp4',
'title': 'Amazing girl playing the omegle game, PERFECT!',
'categories': ['Amateur', 'webcam', 'omegle', 'pink', 'young', 'masturbate', 'teen', 'game', 'hairy'],
'upload_date': '20140622',
'uploader_id': 'Sulivana7x',
'thumbnail': 're:http://.*\.jpg',
'age_limit': 18,
}
}
]
def _real_extract(self,url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
title = self._html_search_regex(r'id="view-upload-title">\s+([^<]+)<', webpage, 'title')
video_url = self._html_search_regex(r'setup\(\{\s+"file".+: "([^"]+)",', webpage, 'video_url')
age_limit = self._rta_search(webpage)
view_count = self._html_search_regex(r'<strong>Views</strong>\s+([^<]+)<', webpage, 'view_count')
upload_date = self._html_search_regex(r'<strong>Uploaded</strong>\s+([^<]+)<', webpage, 'upload_date')
if 'Ago' in upload_date:
days = int(re.search(r'([0-9]+)', upload_date).group(1))
upload_date = (datetime.datetime.now() - datetime.timedelta(days=days)).strftime('%Y%m%d')
else:
upload_date = unified_strdate(upload_date)
like_count = self._html_search_regex(r'<strong>Favorited</strong>\s+([^<]+)<', webpage, 'like_count')
comment_count = webpage.count('class="media-comment-contents"')
uploader_id = self._html_search_regex(r'"thumb-member-username">\s+<a href="/m/([^"]+)"', webpage, 'uploader_id')
categories = self._html_search_meta('keywords', webpage)
if categories:
categories = [cat.strip() for cat in categories.split(',')]
return {
'id': video_id,
'title': title,
'upload_date': upload_date,
'uploader_id': uploader_id,
'thumbnail': self._og_search_thumbnail(webpage),
'categories': categories,
'view_count': int_or_none(view_count.replace(',', '')),
'like_count': int_or_none(like_count.replace(',', '')),
'comment_count': comment_count,
'age_limit': age_limit,
'url': video_url,
}

View File

@ -28,7 +28,7 @@ class MporaIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
data_json = self._search_regex(
r"new FM\.Player\('[^']+',\s*(\{.*?)\);\n", webpage, 'json')
r"new FM\.Player\('[^']+',\s*(\{.*?)\).player;", webpage, 'json')
data = json.loads(data_json)

View File

@ -4,18 +4,19 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import ExtractorError
class NewstubeIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?newstube\.ru/media/(?P<id>.+)'
_TEST = {
'url': 'http://newstube.ru/media/na-korable-progress-prodolzhaetsya-testirovanie-sistemy-kurs',
'url': 'http://www.newstube.ru/media/telekanal-cnn-peremestil-gorod-slavyansk-v-krym',
'info_dict': {
'id': 'd156a237-a6e9-4111-a682-039995f721f1',
'id': '728e0ef2-e187-4012-bac0-5a081fdcb1f6',
'ext': 'flv',
'title': 'На корабле «Прогресс» продолжается тестирование системы «Курс»',
'description': 'md5:d0cbe7b4a6f600552617e48548d5dc77',
'duration': 20.04,
'title': 'Телеканал CNN переместил город Славянск в Крым',
'description': 'md5:419a8c9f03442bc0b0a794d689360335',
'duration': 31.05,
},
'params': {
# rtmp download
@ -40,6 +41,10 @@ class NewstubeIE(InfoExtractor):
def ns(s):
return s.replace('/', '/%(ns)s') % {'ns': '{http://app1.newstube.ru/N2SiteWS/player.asmx}'}
error_message = player.find(ns('./ErrorMessage'))
if error_message is not None:
raise ExtractorError('%s returned error: %s' % (self.IE_NAME, error_message.text), expected=True)
session_id = player.find(ns('./SessionId')).text
media_info = player.find(ns('./Medias/MediaInfo'))
title = media_info.find(ns('./Name')).text

View File

@ -8,10 +8,9 @@ from ..utils import (
compat_urllib_parse,
compat_urllib_request,
compat_urlparse,
compat_str,
ExtractorError,
unified_strdate,
parse_duration,
int_or_none,
)
@ -30,6 +29,7 @@ class NiconicoIE(InfoExtractor):
'uploader_id': '2698420',
'upload_date': '20131123',
'description': '(c) copyright 2008, Blender Foundation / www.bigbuckbunny.org',
'duration': 33,
},
'params': {
'username': 'ydl.niconico@gmail.com',
@ -37,17 +37,20 @@ class NiconicoIE(InfoExtractor):
},
}
_VALID_URL = r'^https?://(?:www\.|secure\.)?nicovideo\.jp/watch/([a-z][a-z][0-9]+)(?:.*)$'
_VALID_URL = r'https?://(?:www\.|secure\.)?nicovideo\.jp/watch/((?:[a-z]{2})?[0-9]+)'
_NETRC_MACHINE = 'niconico'
# Determine whether the downloader uses authentication to download video
_AUTHENTICATE = False
def _real_initialize(self):
self._login()
if self._downloader.params.get('username', None) is not None:
self._AUTHENTICATE = True
if self._AUTHENTICATE:
self._login()
def _login(self):
(username, password) = self._get_login_info()
if username is None:
# Login is required
raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
# Log in
login_form_strs = {
@ -79,44 +82,66 @@ class NiconicoIE(InfoExtractor):
'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id, video_id,
note='Downloading video info page')
# Get flv info
flv_info_webpage = self._download_webpage(
'http://flapi.nicovideo.jp/api/getflv?v=' + video_id,
video_id, 'Downloading flv info')
if self._AUTHENTICATE:
# Get flv info
flv_info_webpage = self._download_webpage(
'http://flapi.nicovideo.jp/api/getflv?v=' + video_id,
video_id, 'Downloading flv info')
else:
# Get external player info
ext_player_info = self._download_webpage(
'http://ext.nicovideo.jp/thumb_watch/' + video_id, video_id)
thumb_play_key = self._search_regex(
r'\'thumbPlayKey\'\s*:\s*\'(.*?)\'', ext_player_info, 'thumbPlayKey')
# Get flv info
flv_info_data = compat_urllib_parse.urlencode({
'k': thumb_play_key,
'v': video_id
})
flv_info_request = compat_urllib_request.Request(
'http://ext.nicovideo.jp/thumb_watch', flv_info_data,
{'Content-Type': 'application/x-www-form-urlencoded'})
flv_info_webpage = self._download_webpage(
flv_info_request, video_id,
note='Downloading flv info', errnote='Unable to download flv info')
video_real_url = compat_urlparse.parse_qs(flv_info_webpage)['url'][0]
# Start extracting information
video_title = video_info.find('.//title').text
video_extension = video_info.find('.//movie_type').text
video_format = video_extension.upper()
video_thumbnail = video_info.find('.//thumbnail_url').text
video_description = video_info.find('.//description').text
video_uploader_id = video_info.find('.//user_id').text
video_upload_date = unified_strdate(video_info.find('.//first_retrieve').text.split('+')[0])
video_view_count = video_info.find('.//view_counter').text
video_webpage_url = video_info.find('.//watch_url').text
title = video_info.find('.//title').text
extension = video_info.find('.//movie_type').text
video_format = extension.upper()
thumbnail = video_info.find('.//thumbnail_url').text
description = video_info.find('.//description').text
upload_date = unified_strdate(video_info.find('.//first_retrieve').text.split('+')[0])
view_count = int_or_none(video_info.find('.//view_counter').text)
comment_count = int_or_none(video_info.find('.//comment_num').text)
duration = parse_duration(video_info.find('.//length').text)
webpage_url = video_info.find('.//watch_url').text
# uploader
video_uploader = video_uploader_id
url = 'http://seiga.nicovideo.jp/api/user/info?id=' + video_uploader_id
try:
user_info = self._download_xml(
url, video_id, note='Downloading user information')
video_uploader = user_info.find('.//nickname').text
except ExtractorError as err:
self._downloader.report_warning('Unable to download user info webpage: %s' % compat_str(err))
if video_info.find('.//ch_id') is not None:
uploader_id = video_info.find('.//ch_id').text
uploader = video_info.find('.//ch_name').text
elif video_info.find('.//user_id') is not None:
uploader_id = video_info.find('.//user_id').text
uploader = video_info.find('.//user_nickname').text
else:
uploader_id = uploader = None
return {
'id': video_id,
'url': video_real_url,
'title': video_title,
'ext': video_extension,
'title': title,
'ext': extension,
'format': video_format,
'thumbnail': video_thumbnail,
'description': video_description,
'uploader': video_uploader,
'upload_date': video_upload_date,
'uploader_id': video_uploader_id,
'view_count': video_view_count,
'webpage_url': video_webpage_url,
'thumbnail': thumbnail,
'description': description,
'uploader': uploader,
'upload_date': upload_date,
'uploader_id': uploader_id,
'view_count': view_count,
'comment_count': comment_count,
'duration': duration,
'webpage_url': webpage_url,
}

View File

@ -47,7 +47,7 @@ class NineGagIE(InfoExtractor):
webpage = self._download_webpage(url, display_id)
post_view = json.loads(self._html_search_regex(
r'var postView = new app\.PostView\({\s*post:\s*({.+?}),', webpage, 'post view'))
r'var postView = new app\.PostView\({\s*post:\s*({.+?}),\s*posts:\s*prefetchedCurrentPost', webpage, 'post view'))
youtube_id = post_view['videoExternalId']
title = post_view['title']

View File

@ -0,0 +1,62 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
unified_strdate,
)
class NPOIE(InfoExtractor):
IE_NAME = 'npo.nl'
_VALID_URL = r'https?://www\.npo\.nl/[^/]+/[^/]+/(?P<id>[^/?]+)'
_TEST = {
'url': 'http://www.npo.nl/nieuwsuur/22-06-2014/VPWON_1220719',
'md5': '4b3f9c429157ec4775f2c9cb7b911016',
'info_dict': {
'id': 'VPWON_1220719',
'ext': 'mp4',
'title': 'Nieuwsuur',
'description': 'Dagelijks tussen tien en elf: nieuws, sport en achtergronden.',
'upload_date': '20140622',
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
metadata = self._download_json(
'http://e.omroep.nl/metadata/aflevering/%s' % video_id,
video_id,
# We have to remove the javascript callback
transform_source=lambda j: re.sub(r'parseMetadata\((.*?)\);\n//epc', r'\1', j)
)
token_page = self._download_webpage(
'http://ida.omroep.nl/npoplayer/i.js',
video_id,
note='Downloading token'
)
token = self._search_regex(r'npoplayer.token = "(.+?)"', token_page, 'token')
streams_info = self._download_json(
'http://ida.omroep.nl/odi/?prid=%s&puboptions=h264_std&adaptive=yes&token=%s' % (video_id, token),
video_id
)
stream_info = self._download_json(
streams_info['streams'][0] + '&type=json',
video_id,
'Downloading stream info'
)
return {
'id': video_id,
'title': metadata['titel'],
'ext': 'mp4',
'url': stream_info['url'],
'description': metadata['info'],
'thumbnail': metadata['images'][-1]['url'],
'upload_date': unified_strdate(metadata['gidsdatum']),
}

View File

@ -35,7 +35,8 @@ class RaiIE(SubtitlesInfoExtractor):
'description': '',
'upload_date': '20140612',
'duration': 1758,
}
},
'skip': 'Error 404',
},
{
'url': 'http://www.rainews.it/dl/rainews/media/state-of-the-net-Antonella-La-Carpia-regole-virali-7aafdea9-0e5d-49d5-88a6-7e65da67ae13.html',

View File

@ -0,0 +1,40 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
class SoundgasmIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?soundgasm\.net/u/(?P<user>[0-9a-zA-Z_\-]+)/(?P<title>[0-9a-zA-Z_\-]+)'
_TEST = {
'url': 'http://soundgasm.net/u/ytdl/Piano-sample',
'md5': '010082a2c802c5275bb00030743e75ad',
'info_dict': {
'id': '88abd86ea000cafe98f96321b23cc1206cbcbcc9',
'ext': 'm4a',
'title': 'ytdl_Piano-sample',
'description': 'Royalty Free Sample Music'
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
display_id = mobj.group('title')
audio_title = mobj.group('user') + '_' + mobj.group('title')
webpage = self._download_webpage(url, display_id)
audio_url = self._html_search_regex(
r'(?s)m4a\:\s"([^"]+)"', webpage, 'audio URL')
audio_id = re.split('\/|\.', audio_url)[-2]
description = self._html_search_regex(
r'(?s)<li>Description:\s(.*?)<\/li>', webpage, 'description',
fatal=False)
return {
'id': audio_id,
'display_id': display_id,
'url': audio_url,
'title': audio_title,
'description': description
}

View File

@ -20,13 +20,13 @@ class TagesschauIE(InfoExtractor):
'thumbnail': 're:^http:.*\.jpg$',
},
}, {
'url': 'http://www.tagesschau.de/multimedia/video/video-196.html',
'md5': '8aaa8bf3ae1ca2652309718c03019128',
'url': 'http://www.tagesschau.de/multimedia/video/video-5964.html',
'md5': '66652566900963a3f962333579eeffcf',
'info_dict': {
'id': '196',
'id': '5964',
'ext': 'mp4',
'title': 'Ukraine-Konflikt: Klitschko in Kiew als Bürgermeister vereidigt',
'description': 'md5:f22e4af75821d174fa6c977349682691',
'title': 'Nahost-Konflikt: Israel bombadiert Ziele im Gazastreifen und Westjordanland',
'description': 'md5:07bfc78c48eec3145ed4805299a1900a',
'thumbnail': 're:http://.*\.jpg',
},
}]

View File

@ -14,7 +14,7 @@ class TeacherTubeIE(InfoExtractor):
IE_NAME = 'teachertube'
IE_DESC = 'teachertube.com videos'
_VALID_URL = r'https?://(?:www\.)?teachertube\.com/(viewVideo\.php\?video_id=|music\.php\?music_id=)(?P<id>\d+)'
_VALID_URL = r'https?://(?:www\.)?teachertube\.com/(viewVideo\.php\?video_id=|music\.php\?music_id=|video/(?:[\da-z-]+-)?|audio/)(?P<id>\d+)'
_TESTS = [{
'url': 'http://www.teachertube.com/viewVideo.php?video_id=339997',
@ -45,6 +45,15 @@ class TeacherTubeIE(InfoExtractor):
'title': 'PER ASPERA AD ASTRA',
'description': 'RADIJSKA EMISIJA ZRAKOPLOVNE TEHNI?KE ?KOLE P',
},
}, {
'url': 'http://www.teachertube.com/video/intro-video-schleicher-297790',
'md5': '9c79fbb2dd7154823996fc28d4a26998',
'info_dict': {
'id': '297790',
'ext': 'mp4',
'title': 'Intro Video - Schleicher',
'description': 'Intro Video - Why to flip, how flipping will',
},
}]
def _real_extract(self, url):
@ -66,6 +75,7 @@ class TeacherTubeIE(InfoExtractor):
media_urls = re.findall(r'data-contenturl="([^"]+)"', webpage)
media_urls.extend(re.findall(r'var\s+filePath\s*=\s*"([^"]+)"', webpage))
media_urls.extend(re.findall(r'\'file\'\s*:\s*["\']([^"\']+)["\'],', webpage))
formats = [
{
@ -79,28 +89,36 @@ class TeacherTubeIE(InfoExtractor):
return {
'id': video_id,
'title': title,
'thumbnail': self._html_search_regex(r'var\s+thumbUrl\s*=\s*"([^"]+)"', webpage, 'thumbnail'),
'thumbnail': self._html_search_regex(r'\'image\'\s*:\s*["\']([^"\']+)["\']', webpage, 'thumbnail'),
'formats': formats,
'description': description,
}
class TeacherTubeClassroomIE(InfoExtractor):
IE_NAME = 'teachertube:classroom'
IE_DESC = 'teachertube.com online classrooms'
class TeacherTubeUserIE(InfoExtractor):
IE_NAME = 'teachertube:user:collection'
IE_DESC = 'teachertube.com user and collection videos'
_VALID_URL = r'https?://(?:www\.)?teachertube\.com/view_classroom\.php\?user=(?P<user>[0-9a-zA-Z]+)'
_VALID_URL = r'https?://(?:www\.)?teachertube\.com/(user/profile|collection)/(?P<user>[0-9a-zA-Z]+)/?'
_MEDIA_RE = r'(?s)"sidebar_thumb_time">[0-9:]+</div>.+?<a href="(https?://(?:www\.)?teachertube\.com/(?:video|audio)/[^"]+)">'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
user_id = mobj.group('user')
rss = self._download_xml(
'http://www.teachertube.com/rssclassroom.php?mode=user&username=%s' % user_id,
user_id, 'Downloading classroom RSS')
urls = []
webpage = self._download_webpage(url, user_id)
urls.extend(re.findall(self._MEDIA_RE, webpage))
pages = re.findall(r'/ajax-user/user-videos/%s\?page=([0-9]+)' % user_id, webpage)[1:-1]
for p in pages:
more = 'http://www.teachertube.com/ajax-user/user-videos/%s?page=%s' % (user_id, p)
webpage = self._download_webpage(more, user_id, 'Downloading page %s/%s' % (p, len(pages) + 1))
urls.extend(re.findall(self._MEDIA_RE, webpage))
entries = []
for url in rss.findall('.//{http://search.yahoo.com/mrss/}player'):
entries.append(self.url_result(url.attrib['url'], 'TeacherTube'))
for url in urls:
entries.append(self.url_result(url, 'TeacherTube'))
return self.playlist_result(entries, user_id)

View File

@ -1,10 +1,13 @@
# -*- coding:utf-8 -*-
from __future__ import unicode_literals
from .common import InfoExtractor
import re
class ToypicsIE(InfoExtractor):
IE_DESC = 'Toypics user profile'
_VALID_URL = r'http://videos\.toypics\.net/view/(?P<id>[0-9]+)/.*'
_VALID_URL = r'https?://videos\.toypics\.net/view/(?P<id>[0-9]+)/.*'
_TEST = {
'url': 'http://videos.toypics.net/view/514/chancebulged,-2-1/',
'md5': '16e806ad6d6f58079d210fe30985e08b',
@ -61,7 +64,7 @@ class ToypicsUserIE(InfoExtractor):
note='Downloading page %d/%d' % (n, page_count))
urls.extend(
re.findall(
r'<p class="video-entry-title">\n\s*<a href="(http://videos.toypics.net/view/[^"]+)">',
r'<p class="video-entry-title">\s+<a href="(https?://videos.toypics.net/view/[^"]+)">',
lpage))
return {

View File

@ -1,3 +1,4 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import re
@ -10,14 +11,27 @@ from ..utils import (
class TumblrIE(InfoExtractor):
_VALID_URL = r'http://(?P<blog_name>.*?)\.tumblr\.com/((post)|(video))/(?P<id>\d*)($|/)'
_TEST = {
_TESTS = [{
'url': 'http://tatianamaslanydaily.tumblr.com/post/54196191430/orphan-black-dvd-extra-behind-the-scenes',
'file': '54196191430.mp4',
'md5': '479bb068e5b16462f5176a6828829767',
'info_dict': {
"title": "tatiana maslany news"
'id': '54196191430',
'ext': 'mp4',
'title': 'tatiana maslany news, Orphan Black || DVD extra - behind the scenes ↳...',
'description': 'md5:dfac39636969fe6bf1caa2d50405f069',
'thumbnail': 're:http://.*\.jpg',
}
}
}, {
'url': 'http://5sostrum.tumblr.com/post/90208453769/yall-forgetting-the-greatest-keek-of-them-all',
'md5': 'bf348ef8c0ef84fbf1cbd6fa6e000359',
'info_dict': {
'id': '90208453769',
'ext': 'mp4',
'title': '5SOS STRUM ;)',
'description': 'md5:dba62ac8639482759c8eb10ce474586a',
'thumbnail': 're:http://.*\.jpg',
}
}]
def _real_extract(self, url):
m_url = re.match(self._VALID_URL, url)
@ -48,6 +62,7 @@ class TumblrIE(InfoExtractor):
return [{'id': video_id,
'url': video_url,
'title': video_title,
'description': self._html_search_meta('description', webpage),
'thumbnail': video_thumbnail,
'ext': ext
}]

View File

@ -49,6 +49,7 @@ class VeohIE(InfoExtractor):
'description': 'md5:f5a11c51f8fb51d2315bca0937526891',
'uploader': 'newsy-videos',
},
'skip': 'This video has been deleted.',
},
]

View File

@ -4,7 +4,10 @@ import re
import base64
from .common import InfoExtractor
from ..utils import unified_strdate
from ..utils import (
unified_strdate,
int_or_none,
)
class VideoTtIE(InfoExtractor):
@ -50,9 +53,9 @@ class VideoTtIE(InfoExtractor):
'thumbnail': settings['config']['thumbnail'],
'upload_date': unified_strdate(video['added']),
'uploader': video['owner'],
'view_count': int(video['view_count']),
'comment_count': int(video['comment_count']),
'like_count': int(video['liked']),
'dislike_count': int(video['disliked']),
'view_count': int_or_none(video['view_count']),
'comment_count': None if video.get('comment_count') == '--' else int_or_none(video['comment_count']),
'like_count': int_or_none(video['liked']),
'dislike_count': int_or_none(video['disliked']),
'formats': formats,
}

View File

@ -16,7 +16,7 @@ from ..utils import (
class VKIE(InfoExtractor):
IE_NAME = 'vk.com'
_VALID_URL = r'https?://vk\.com/(?:video_ext\.php\?.*?\boid=(?P<oid>-?\d+).*?\bid=(?P<id>\d+)|(?:videos.*?\?.*?z=)?video(?P<videoid>.*?)(?:\?|%2F|$))'
_VALID_URL = r'https?://(?:m\.)?vk\.com/(?:video_ext\.php\?.*?\boid=(?P<oid>-?\d+).*?\bid=(?P<id>\d+)|(?:.+?\?.*?z=)?video(?P<videoid>.*?)(?:\?|%2F|$))'
_NETRC_MACHINE = 'vk'
_TESTS = [
@ -27,7 +27,7 @@ class VKIE(InfoExtractor):
'id': '162222515',
'ext': 'flv',
'title': 'ProtivoGunz - Хуёвая песня',
'uploader': 'Noize MC',
'uploader': 're:Noize MC.*',
'duration': 195,
},
},
@ -62,11 +62,47 @@ class VKIE(InfoExtractor):
'id': '164049491',
'ext': 'mp4',
'uploader': 'Триллеры',
'title': '► Бойцовский клуб / Fight Club 1999 [HD 720]\u00a0',
'title': '► Бойцовский клуб / Fight Club 1999 [HD 720]',
'duration': 8352,
},
'skip': 'Requires vk account credentials',
},
{
'url': 'http://vk.com/feed?z=video-43215063_166094326%2Fbb50cacd3177146d7a',
'md5': 'd82c22e449f036282d1d3f7f4d276869',
'info_dict': {
'id': '166094326',
'ext': 'mp4',
'uploader': 'Киномания - лучшее из мира кино',
'title': 'Запах женщины (1992)',
'duration': 9392,
},
'skip': 'Requires vk account credentials',
},
{
'url': 'http://vk.com/hd_kino_mania?z=video-43215063_168067957%2F15c66b9b533119788d',
'md5': '4d7a5ef8cf114dfa09577e57b2993202',
'info_dict': {
'id': '168067957',
'ext': 'mp4',
'uploader': 'Киномания - лучшее из мира кино',
'title': ' ',
'duration': 7291,
},
'skip': 'Requires vk account credentials',
},
{
'url': 'http://m.vk.com/video-43215063_169084319?list=125c627d1aa1cebb83&from=wall-43215063_2566540',
'md5': '0c45586baa71b7cb1d0784ee3f4e00a6',
'note': 'ivi.ru embed',
'info_dict': {
'id': '60690',
'ext': 'mp4',
'title': 'Книга Илая',
'duration': 6771,
},
'skip': 'Only works from Russia',
},
]
def _login(self):
@ -110,6 +146,16 @@ class VKIE(InfoExtractor):
if m_yt is not None:
self.to_screen('Youtube video detected')
return self.url_result(m_yt.group(1), 'Youtube')
m_opts = re.search(r'(?s)var\s+opts\s*=\s*({.*?});', info_page)
if m_opts:
m_opts_url = re.search(r"url\s*:\s*'([^']+)", m_opts.group(1))
if m_opts_url:
opts_url = m_opts_url.group(1)
if opts_url.startswith('//'):
opts_url = 'http:' + opts_url
return self.url_result(opts_url)
data_json = self._search_regex(r'var vars = ({.*?});', info_page, 'vars')
data = json.loads(data_json)

View File

@ -1,3 +1,4 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import re
@ -54,14 +55,14 @@ class WDRIE(InfoExtractor):
},
},
{
'url': 'http://www.funkhauseuropa.de/av/audiogrenzenlosleckerbaklava101-audioplayer.html',
'md5': 'cfff440d4ee64114083ac44676df5d15',
'url': 'http://www.funkhauseuropa.de/av/audiosuepersongsoulbossanova100-audioplayer.html',
'md5': '24e83813e832badb0a8d7d1ef9ef0691',
'info_dict': {
'id': 'mdb-363068',
'id': 'mdb-463528',
'ext': 'mp3',
'title': 'Grenzenlos lecker - Baklava',
'title': 'Süpersong: Soul Bossa Nova',
'description': 'md5:7b29e97e10dfb6e265238b32fa35b23a',
'upload_date': '20140311',
'upload_date': '20140630',
},
},
]
@ -127,9 +128,10 @@ class WDRMobileIE(InfoExtractor):
'info_dict': {
'title': '4283021',
'id': '421735',
'ext': 'mp4',
'age_limit': 0,
},
'_skip': 'Will be depublicized shortly'
'skip': 'Problems with loading data.'
}
def _real_extract(self, url):
@ -139,6 +141,7 @@ class WDRMobileIE(InfoExtractor):
'title': mobj.group('title'),
'age_limit': int(mobj.group('age_limit')),
'url': url,
'ext': determine_ext(url),
'user_agent': 'mobile',
}

View File

@ -1,3 +1,5 @@
from __future__ import unicode_literals
import json
import re
@ -5,14 +7,16 @@ from .common import InfoExtractor
class WistiaIE(InfoExtractor):
_VALID_URL = r'^https?://(?:fast\.)?wistia\.net/embed/iframe/(?P<id>[a-z0-9]+)'
_VALID_URL = r'https?://(?:fast\.)?wistia\.net/embed/iframe/(?P<id>[a-z0-9]+)'
_TEST = {
u"url": u"http://fast.wistia.net/embed/iframe/sh7fpupwlt",
u"file": u"sh7fpupwlt.mov",
u"md5": u"cafeb56ec0c53c18c97405eecb3133df",
u"info_dict": {
u"title": u"cfh_resourceful_zdkh_final_1"
'url': 'http://fast.wistia.net/embed/iframe/sh7fpupwlt',
'md5': 'cafeb56ec0c53c18c97405eecb3133df',
'info_dict': {
'id': 'sh7fpupwlt',
'ext': 'mov',
'title': 'Being Resourceful',
'duration': 117,
},
}
@ -22,7 +26,7 @@ class WistiaIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
data_json = self._html_search_regex(
r'Wistia.iframeInit\((.*?), {}\);', webpage, u'video data')
r'Wistia\.iframeInit\((.*?), {}\);', webpage, 'video data')
data = json.loads(data_json)
@ -54,4 +58,5 @@ class WistiaIE(InfoExtractor):
'title': data['name'],
'formats': formats,
'thumbnails': thumbnails,
'duration': data.get('duration'),
}

View File

@ -1698,14 +1698,14 @@ class YoutubeSearchURLIE(InfoExtractor):
webpage = self._download_webpage(url, query)
result_code = self._search_regex(
r'(?s)<ol id="search-results"(.*?)</ol>', webpage, u'result HTML')
r'(?s)<ol class="item-section"(.*?)</ol>', webpage, u'result HTML')
part_codes = re.findall(
r'(?s)<h3 class="yt-lockup-title">(.*?)</h3>', result_code)
entries = []
for part_code in part_codes:
part_title = self._html_search_regex(
r'(?s)title="([^"]+)"', part_code, 'item title', fatal=False)
[r'(?s)title="([^"]+)"', r'>([^<]+)</a>'], part_code, 'item title', fatal=False)
part_url_snippet = self._html_search_regex(
r'(?s)href="([^"]+)"', part_code, 'item URL')
part_url = compat_urlparse.urljoin(
@ -1825,10 +1825,21 @@ class YoutubeTruncatedURLIE(InfoExtractor):
IE_NAME = 'youtube:truncated_url'
IE_DESC = False # Do not list
_VALID_URL = r'''(?x)
(?:https?://)?[^/]+/watch\?(?:feature=[a-z_]+)?$|
(?:https?://)?[^/]+/watch\?(?:
feature=[a-z_]+|
annotation_id=annotation_[^&]+
)?$|
(?:https?://)?(?:www\.)?youtube\.com/attribution_link\?a=[^&]+$
'''
_TESTS = [{
'url': 'http://www.youtube.com/watch?annotation_id=annotation_3951667041',
'only_matching': True,
}, {
'url': 'http://www.youtube.com/watch?',
'only_matching': True,
}]
def _real_extract(self, url):
raise ExtractorError(
u'Did you forget to quote the URL? Remember that & is a meta '

View File

@ -816,6 +816,9 @@ def unified_strdate(date_str):
'%d %b %Y',
'%B %d %Y',
'%b %d %Y',
'%b %dst %Y %I:%M%p',
'%b %dnd %Y %I:%M%p',
'%b %dth %Y %I:%M%p',
'%Y-%m-%d',
'%d.%m.%Y',
'%d/%m/%Y',

View File

@ -1,2 +1,2 @@
__version__ = '2014.06.24.1'
__version__ = '2014.07.10'