Compare commits

..

31 Commits

Author SHA1 Message Date
Philipp Hagemeister
37709fae89 release 2014.08.29 2014-08-29 01:07:30 +02:00
Philipp Hagemeister
a81e4eb69d [promptfile] Remove quality=1 and leave it to default 2014-08-29 01:07:18 +02:00
Naglis Jonaitis
8e72edfb19 [promptfile] Add new extractor 2014-08-29 00:20:10 +03:00
Philipp Hagemeister
863f08a92e release 2014.08.28.2 2014-08-28 18:03:29 +02:00
Sergey M․
de2d9f5f1b [rtlnl] Add support for progressive videos (Closes #3603) 2014-08-28 22:54:06 +07:00
Philipp Hagemeister
a520c11241 release 2014.08.28.1 2014-08-28 14:01:01 +02:00
Philipp Hagemeister
b94744d157 [dropbox] Make sure ?dl=0 is ignore (Fixes #3605) 2014-08-28 14:00:55 +02:00
Philipp Hagemeister
753727cded [test_download] Expect a minimum file size
This should detect when we're downloading a small text file by accident.
2014-08-28 13:59:45 +02:00
Philipp Hagemeister
daebaab692 [extractor/common] Correct typo 2014-08-28 13:04:49 +02:00
Philipp Hagemeister
3524cc25ca [sportdeutschland] Add support for more plain videos 2014-08-28 10:55:32 +02:00
Philipp Hagemeister
29a7e1f261 release 2014.08.28 2014-08-28 01:38:15 +02:00
Philipp Hagemeister
df53b6358d [veehd] Modernize 2014-08-28 01:37:57 +02:00
Philipp Hagemeister
f1a9d64eea [extractor/common] Modernize 2014-08-28 01:04:43 +02:00
Philipp Hagemeister
27f774862d Merge remote-tracking branch 'origin/master' 2014-08-28 01:01:04 +02:00
Philipp Hagemeister
b759a0d4d2 fix up imports 2014-08-28 01:00:59 +02:00
Philipp Hagemeister
22a6f15061 Move playlist tests to extractors.
From now on, test_download will run these tests. That means we benefit not only from the networking setup in there, but also from the other tests (for example test_all_urls to find problems with _VALID_URLs).
2014-08-28 00:58:24 +02:00
Philipp Hagemeister
259454525f [nhl] Modernize 2014-08-27 19:52:55 +02:00
Philipp Hagemeister
ce6815aad3 [dailymotion] Correct test case 2014-08-27 19:20:20 +02:00
Philipp Hagemeister
4e408e479b [utils] Modernize tests 2014-08-27 19:11:45 +02:00
Philipp Hagemeister
a9d3890fcb [ted] Remove md5sum from test
This failed multiple times, and the md5sum is not that important anyways.
2014-08-27 17:12:13 +02:00
Philipp Hagemeister
2857084000 [test_download] Improve playlist handling 2014-08-27 17:11:45 +02:00
Jaime Marquínez Ferrándiz
7798fad535 [downloader/rtmp] Use check_exectuble 2014-08-27 15:56:27 +02:00
Jaime Marquínez Ferrándiz
baf2907501 [downloader/hls] Return False if ffmpeg or avconv couldn't be found 2014-08-27 15:50:47 +02:00
Jaime Marquínez Ferrándiz
3c765c68c4 [downloader/hls] Use check_executable 2014-08-27 15:49:07 +02:00
Jaime Marquínez Ferrándiz
29153f49b3 [downloader/hls] Use the correct program when reporting an error 2014-08-27 15:40:02 +02:00
Sergey M․
071a236c5a [yahoo] Add one more pattern for content id 2014-08-27 18:59:28 +07:00
Sergey M․
10437550f4 [cbs] Update test video id 2014-08-27 18:41:24 +07:00
Philipp Hagemeister
2929b3e71d [vimeo] Move all testcases to extractors and clean up
Previously, these extractors all defined their own suitable methods.
No test_all_urls tests that everything is in order, automatically :)
2014-08-27 11:36:02 +02:00
Philipp Hagemeister
22449fa624 Improve test_all_urls output 2014-08-27 11:35:43 +02:00
Philipp Hagemeister
d36d3f420c [vimeo] Move test case to where it belongs 2014-08-27 11:13:42 +02:00
Philipp Hagemeister
60ac04e57c [facebook] Match video.php URLs 2014-08-27 11:08:47 +02:00
45 changed files with 735 additions and 722 deletions

View File

@@ -103,7 +103,8 @@ def expect_info_dict(self, expected_dict, got_dict):
self.assertTrue(
isinstance(got, compat_str),
'Expected a %r object, but got %r' % (compat_str, type(got)))
u'Expected a %s object, but got %s for field %s' % (
compat_str.__name__, type(got).__name__, info_field))
self.assertTrue(
match_rex.match(got),
u'field %s (value: %r) should match %r' % (info_field, got, match_str))

View File

@@ -27,7 +27,6 @@
"rejecttitle": null,
"retries": 10,
"simulate": false,
"skip_download": false,
"subtitleslang": null,
"subtitlesformat": "srt",
"test": true,

View File

@@ -109,7 +109,9 @@ class TestAllURLsMatching(unittest.TestCase):
if type(ie).__name__ in ('GenericIE', tc['name'] + 'IE'):
self.assertTrue(ie.suitable(url), '%s should match URL %r' % (type(ie).__name__, url))
else:
self.assertFalse(ie.suitable(url), '%s should not match URL %r' % (type(ie).__name__, url))
self.assertFalse(
ie.suitable(url),
'%s should not match URL %r . That URL belongs to %s.' % (type(ie).__name__, url, tc['name']))
def test_keywords(self):
self.assertMatch(':ytsubs', ['youtube:subscriptions'])

View File

@@ -28,6 +28,7 @@ from youtube_dl.utils import (
compat_HTTPError,
DownloadError,
ExtractorError,
format_bytes,
UnavailableVideoError,
)
from youtube_dl.extractor import get_info_extractor
@@ -103,8 +104,11 @@ def generator(test_case):
def get_tc_filename(tc):
return tc.get('file') or ydl.prepare_filename(tc.get('info_dict', {}))
def try_rm_tcs_files():
for tc in test_cases:
res_dict = None
def try_rm_tcs_files(tcs=None):
if tcs is None:
tcs = test_cases
for tc in tcs:
tc_filename = get_tc_filename(tc)
try_rm(tc_filename)
try_rm(tc_filename + '.part')
@@ -148,7 +152,15 @@ def generator(test_case):
self.assertEqual(
len(res_dict['entries']),
test_case['playlist_count'],
'Expected at %d in playlist %s, but got %d.')
'Expected %d entries in playlist %s, but got %d.' % (
test_case['playlist_count'],
test_case['url'],
len(res_dict['entries']),
))
if 'playlist_duration_sum' in test_case:
got_duration = sum(e['duration'] for e in res_dict['entries'])
self.assertEqual(
test_case['playlist_duration_sum'], got_duration)
for tc in test_cases:
tc_filename = get_tc_filename(tc)
@@ -160,12 +172,27 @@ def generator(test_case):
if 'md5' in tc:
md5_for_file = _file_md5(tc_filename)
self.assertEqual(md5_for_file, tc['md5'])
expected_minsize = tc.get('file_minsize', 10000)
if expected_minsize is not None:
if params.get('test'):
expected_minsize = max(expected_minsize, 10000)
got_fsize = os.path.getsize(tc_filename)
assertGreaterEqual(
self, got_fsize, expected_minsize,
'Expected %s to be at least %s, but it\'s only %s ' %
(tc_filename, format_bytes(expected_minsize),
format_bytes(got_fsize)))
with io.open(info_json_fn, encoding='utf-8') as infof:
info_dict = json.load(infof)
expect_info_dict(self, tc.get('info_dict', {}), info_dict)
finally:
try_rm_tcs_files()
if is_playlist and res_dict is not None:
# Remove all other files that may have been extracted if the
# extractor returns full results even with extract_flat
res_tcs = [{'info_dict': e} for e in res_dict['entries']]
try_rm_tcs_files(res_tcs)
return test_template

View File

@@ -1,395 +0,0 @@
#!/usr/bin/env python
# encoding: utf-8
## DEPRECATED FILE!
# Add new tests to the extractors themselves, like this:
# _TEST = {
# 'url': 'http://example.com/playlist/42',
# 'playlist_mincount': 99,
# 'info_dict': {
# 'id': '42',
# 'title': 'Playlist number forty-two',
# }
# }
from __future__ import unicode_literals
# Allow direct execution
import os
import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import (
assertRegexpMatches,
assertGreaterEqual,
expect_info_dict,
FakeYDL,
)
from youtube_dl.extractor import (
AcademicEarthCourseIE,
DailymotionPlaylistIE,
DailymotionUserIE,
VimeoChannelIE,
VimeoUserIE,
VimeoAlbumIE,
VimeoGroupsIE,
VineUserIE,
UstreamChannelIE,
SoundcloudSetIE,
SoundcloudUserIE,
SoundcloudPlaylistIE,
TeacherTubeUserIE,
LivestreamIE,
LivestreamOriginalIE,
NHLVideocenterIE,
BambuserChannelIE,
BandcampAlbumIE,
SmotriCommunityIE,
SmotriUserIE,
IviCompilationIE,
ImdbListIE,
KhanAcademyIE,
EveryonesMixtapeIE,
RutubeChannelIE,
RutubePersonIE,
GoogleSearchIE,
GenericIE,
TEDIE,
ToypicsUserIE,
XTubeUserIE,
InstagramUserIE,
CSpanIE,
AolIE,
GameOnePlaylistIE,
)
class TestPlaylists(unittest.TestCase):
def assertIsPlaylist(self, info):
"""Make sure the info has '_type' set to 'playlist'"""
self.assertEqual(info['_type'], 'playlist')
def test_dailymotion_playlist(self):
dl = FakeYDL()
ie = DailymotionPlaylistIE(dl)
result = ie.extract('http://www.dailymotion.com/playlist/xv4bw_nqtv_sport/1#video=xl8v3q')
self.assertIsPlaylist(result)
self.assertEqual(result['title'], 'SPORT')
self.assertTrue(len(result['entries']) > 20)
def test_dailymotion_user(self):
dl = FakeYDL()
ie = DailymotionUserIE(dl)
result = ie.extract('https://www.dailymotion.com/user/nqtv')
self.assertIsPlaylist(result)
assertGreaterEqual(self, len(result['entries']), 100)
self.assertEqual(result['title'], 'Rémi Gaillard')
def test_vimeo_channel(self):
dl = FakeYDL()
ie = VimeoChannelIE(dl)
result = ie.extract('http://vimeo.com/channels/tributes')
self.assertIsPlaylist(result)
self.assertEqual(result['title'], 'Vimeo Tributes')
self.assertTrue(len(result['entries']) > 24)
def test_vimeo_user(self):
dl = FakeYDL()
ie = VimeoUserIE(dl)
result = ie.extract('http://vimeo.com/nkistudio/videos')
self.assertIsPlaylist(result)
self.assertEqual(result['title'], 'Nki')
self.assertTrue(len(result['entries']) > 65)
def test_vimeo_album(self):
dl = FakeYDL()
ie = VimeoAlbumIE(dl)
result = ie.extract('http://vimeo.com/album/2632481')
self.assertIsPlaylist(result)
self.assertEqual(result['title'], 'Staff Favorites: November 2013')
self.assertTrue(len(result['entries']) > 12)
def test_vimeo_groups(self):
dl = FakeYDL()
ie = VimeoGroupsIE(dl)
result = ie.extract('http://vimeo.com/groups/rolexawards')
self.assertIsPlaylist(result)
self.assertEqual(result['title'], 'Rolex Awards for Enterprise')
self.assertTrue(len(result['entries']) > 72)
def test_vine_user(self):
dl = FakeYDL()
ie = VineUserIE(dl)
result = ie.extract('https://vine.co/Visa')
self.assertIsPlaylist(result)
assertGreaterEqual(self, len(result['entries']), 47)
def test_ustream_channel(self):
dl = FakeYDL()
ie = UstreamChannelIE(dl)
result = ie.extract('http://www.ustream.tv/channel/channeljapan')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], '10874166')
assertGreaterEqual(self, len(result['entries']), 54)
def test_soundcloud_set(self):
dl = FakeYDL()
ie = SoundcloudSetIE(dl)
result = ie.extract('https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep')
self.assertIsPlaylist(result)
self.assertEqual(result['title'], 'The Royal Concept EP')
assertGreaterEqual(self, len(result['entries']), 6)
def test_soundcloud_user(self):
dl = FakeYDL()
ie = SoundcloudUserIE(dl)
result = ie.extract('https://soundcloud.com/the-concept-band')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], '9615865')
assertGreaterEqual(self, len(result['entries']), 12)
def test_soundcloud_likes(self):
dl = FakeYDL()
ie = SoundcloudUserIE(dl)
result = ie.extract('https://soundcloud.com/the-concept-band/likes')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], '9615865')
assertGreaterEqual(self, len(result['entries']), 1)
def test_soundcloud_playlist(self):
dl = FakeYDL()
ie = SoundcloudPlaylistIE(dl)
result = ie.extract('http://api.soundcloud.com/playlists/4110309')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], '4110309')
self.assertEqual(result['title'], 'TILT Brass - Bowery Poetry Club, August \'03 [Non-Site SCR 02]')
assertRegexpMatches(
self, result['description'], r'.*?TILT Brass - Bowery Poetry Club')
self.assertEqual(len(result['entries']), 6)
def test_livestream_event(self):
dl = FakeYDL()
ie = LivestreamIE(dl)
result = ie.extract('http://new.livestream.com/tedx/cityenglish')
self.assertIsPlaylist(result)
self.assertEqual(result['title'], 'TEDCity2.0 (English)')
assertGreaterEqual(self, len(result['entries']), 4)
def test_livestreamoriginal_folder(self):
dl = FakeYDL()
ie = LivestreamOriginalIE(dl)
result = ie.extract('https://www.livestream.com/newplay/folder?dirId=a07bf706-d0e4-4e75-a747-b021d84f2fd3')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], 'a07bf706-d0e4-4e75-a747-b021d84f2fd3')
assertGreaterEqual(self, len(result['entries']), 28)
def test_nhl_videocenter(self):
dl = FakeYDL()
ie = NHLVideocenterIE(dl)
result = ie.extract('http://video.canucks.nhl.com/videocenter/console?catid=999')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], '999')
self.assertEqual(result['title'], 'Highlights')
self.assertEqual(len(result['entries']), 12)
def test_bambuser_channel(self):
dl = FakeYDL()
ie = BambuserChannelIE(dl)
result = ie.extract('http://bambuser.com/channel/pixelversity')
self.assertIsPlaylist(result)
self.assertEqual(result['title'], 'pixelversity')
assertGreaterEqual(self, len(result['entries']), 60)
def test_bandcamp_album(self):
dl = FakeYDL()
ie = BandcampAlbumIE(dl)
result = ie.extract('http://nightbringer.bandcamp.com/album/hierophany-of-the-open-grave')
self.assertIsPlaylist(result)
self.assertEqual(result['title'], 'Hierophany of the Open Grave')
assertGreaterEqual(self, len(result['entries']), 9)
def test_smotri_community(self):
dl = FakeYDL()
ie = SmotriCommunityIE(dl)
result = ie.extract('http://smotri.com/community/video/kommuna')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], 'kommuna')
self.assertEqual(result['title'], 'КПРФ')
assertGreaterEqual(self, len(result['entries']), 4)
def test_smotri_user(self):
dl = FakeYDL()
ie = SmotriUserIE(dl)
result = ie.extract('http://smotri.com/user/inspector')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], 'inspector')
self.assertEqual(result['title'], 'Inspector')
assertGreaterEqual(self, len(result['entries']), 9)
def test_AcademicEarthCourse(self):
dl = FakeYDL()
ie = AcademicEarthCourseIE(dl)
result = ie.extract('http://academicearth.org/playlists/laws-of-nature/')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], 'laws-of-nature')
self.assertEqual(result['title'], 'Laws of Nature')
self.assertEqual(result['description'],u'Introduce yourself to the laws of nature with these free online college lectures from Yale, Harvard, and MIT.')# u"Today's websites are increasingly dynamic. Pages are no longer static HTML files but instead generated by scripts and database calls. User interfaces are more seamless, with technologies like Ajax replacing traditional page reloads. This course teaches students how to build dynamic websites with Ajax and with Linux, Apache, MySQL, and PHP (LAMP), one of today's most popular frameworks. Students learn how to set up domain names with DNS, how to structure pages with XHTML and CSS, how to program in JavaScript and PHP, how to configure Apache and MySQL, how to design and query databases with SQL, how to use Ajax with both XML and JSON, and how to build mashups. The course explores issues of security, scalability, and cross-browser support and also discusses enterprise-level deployments of websites, including third-party hosting, virtualization, colocation in data centers, firewalling, and load-balancing.")
self.assertEqual(len(result['entries']), 4)
def test_ivi_compilation(self):
dl = FakeYDL()
ie = IviCompilationIE(dl)
result = ie.extract('http://www.ivi.ru/watch/dvoe_iz_lartsa')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], 'dvoe_iz_lartsa')
self.assertEqual(result['title'], 'Двое из ларца (2006 - 2008)')
assertGreaterEqual(self, len(result['entries']), 24)
def test_ivi_compilation_season(self):
dl = FakeYDL()
ie = IviCompilationIE(dl)
result = ie.extract('http://www.ivi.ru/watch/dvoe_iz_lartsa/season1')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], 'dvoe_iz_lartsa/season1')
self.assertEqual(result['title'], 'Двое из ларца (2006 - 2008) 1 сезон')
assertGreaterEqual(self, len(result['entries']), 12)
def test_imdb_list(self):
dl = FakeYDL()
ie = ImdbListIE(dl)
result = ie.extract('http://www.imdb.com/list/JFs9NWw6XI0')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], 'JFs9NWw6XI0')
self.assertEqual(result['title'], 'March 23, 2012 Releases')
self.assertEqual(len(result['entries']), 7)
def test_khanacademy_topic(self):
dl = FakeYDL()
ie = KhanAcademyIE(dl)
result = ie.extract('https://www.khanacademy.org/math/applied-math/cryptography')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], 'cryptography')
self.assertEqual(result['title'], 'Journey into cryptography')
self.assertEqual(result['description'], 'How have humans protected their secret messages through history? What has changed today?')
assertGreaterEqual(self, len(result['entries']), 3)
def test_EveryonesMixtape(self):
dl = FakeYDL()
ie = EveryonesMixtapeIE(dl)
result = ie.extract('http://everyonesmixtape.com/#/mix/m7m0jJAbMQi')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], 'm7m0jJAbMQi')
self.assertEqual(result['title'], 'Driving')
self.assertEqual(len(result['entries']), 24)
def test_rutube_channel(self):
dl = FakeYDL()
ie = RutubeChannelIE(dl)
result = ie.extract('http://rutube.ru/tags/video/1800/')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], '1800')
assertGreaterEqual(self, len(result['entries']), 68)
def test_rutube_person(self):
dl = FakeYDL()
ie = RutubePersonIE(dl)
result = ie.extract('http://rutube.ru/video/person/313878/')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], '313878')
assertGreaterEqual(self, len(result['entries']), 37)
def test_multiple_brightcove_videos(self):
# https://github.com/rg3/youtube-dl/issues/2283
dl = FakeYDL()
ie = GenericIE(dl)
result = ie.extract('http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], 'always-never-nuclear-command-and-control')
self.assertEqual(result['title'], 'Always/Never: A Little-Seen Movie About Nuclear Command and Control : The New Yorker')
self.assertEqual(len(result['entries']), 3)
def test_ted_playlist(self):
dl = FakeYDL()
ie = TEDIE(dl)
result = ie.extract('http://www.ted.com/playlists/who_are_the_hackers')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], '10')
self.assertEqual(result['title'], 'Who are the hackers?')
assertGreaterEqual(self, len(result['entries']), 6)
def test_toypics_user(self):
dl = FakeYDL()
ie = ToypicsUserIE(dl)
result = ie.extract('http://videos.toypics.net/Mikey')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], 'Mikey')
assertGreaterEqual(self, len(result['entries']), 17)
def test_xtube_user(self):
dl = FakeYDL()
ie = XTubeUserIE(dl)
result = ie.extract('http://www.xtube.com/community/profile.php?user=greenshowers')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], 'greenshowers')
assertGreaterEqual(self, len(result['entries']), 155)
def test_InstagramUser(self):
dl = FakeYDL()
ie = InstagramUserIE(dl)
result = ie.extract('http://instagram.com/porsche')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], 'porsche')
assertGreaterEqual(self, len(result['entries']), 2)
test_video = next(
e for e in result['entries']
if e['id'] == '614605558512799803_462752227')
dl.add_default_extra_info(test_video, ie, '(irrelevant URL)')
dl.process_video_result(test_video, download=False)
EXPECTED = {
'id': '614605558512799803_462752227',
'ext': 'mp4',
'title': '#Porsche Intelligent Performance.',
'thumbnail': 're:^https?://.*\.jpg',
'uploader': 'Porsche',
'uploader_id': 'porsche',
'timestamp': 1387486713,
'upload_date': '20131219',
}
expect_info_dict(self, EXPECTED, test_video)
def test_CSpan_playlist(self):
dl = FakeYDL()
ie = CSpanIE(dl)
result = ie.extract(
'http://www.c-span.org/video/?318608-1/gm-ignition-switch-recall')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], '342759')
self.assertEqual(
result['title'], 'General Motors Ignition Switch Recall')
whole_duration = sum(e['duration'] for e in result['entries'])
self.assertEqual(whole_duration, 14855)
def test_aol_playlist(self):
dl = FakeYDL()
ie = AolIE(dl)
result = ie.extract(
'http://on.aol.com/playlist/brace-yourself---todays-weirdest-news-152147?icid=OnHomepageC4_Omg_Img#_videoid=518184316')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], '152147')
self.assertEqual(
result['title'], 'Brace Yourself - Today\'s Weirdest News')
assertGreaterEqual(self, len(result['entries']), 10)
def test_TeacherTubeUser(self):
dl = FakeYDL()
ie = TeacherTubeUserIE(dl)
result = ie.extract('http://www.teachertube.com/user/profile/rbhagwati2')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], 'rbhagwati2')
assertGreaterEqual(self, len(result['entries']), 179)
if __name__ == '__main__':
unittest.main()

View File

@@ -1,6 +1,8 @@
#!/usr/bin/env python
# coding: utf-8
from __future__ import unicode_literals
# Allow direct execution
import os
import sys
@@ -13,7 +15,6 @@ import io
import json
import xml.etree.ElementTree
#from youtube_dl.utils import htmlentity_transform
from youtube_dl.utils import (
DateRange,
encodeFilename,
@@ -41,11 +42,6 @@ from youtube_dl.utils import (
uppercase_escape,
)
if sys.version_info < (3, 0):
_compat_str = lambda b: b.decode('unicode-escape')
else:
_compat_str = lambda s: s
class TestUtil(unittest.TestCase):
def test_timeconvert(self):
@@ -67,9 +63,9 @@ class TestUtil(unittest.TestCase):
self.assertEqual('this - that', sanitize_filename('this: that'))
self.assertEqual(sanitize_filename('AT&T'), 'AT&T')
aumlaut = _compat_str('\xe4')
aumlaut = 'ä'
self.assertEqual(sanitize_filename(aumlaut), aumlaut)
tests = _compat_str('\u043a\u0438\u0440\u0438\u043b\u043b\u0438\u0446\u0430')
tests = '\u043a\u0438\u0440\u0438\u043b\u043b\u0438\u0446\u0430'
self.assertEqual(sanitize_filename(tests), tests)
forbidden = '"\0\\/'
@@ -91,9 +87,9 @@ class TestUtil(unittest.TestCase):
self.assertEqual('yes_no', sanitize_filename('yes? no', restricted=True))
self.assertEqual('this_-_that', sanitize_filename('this: that', restricted=True))
tests = _compat_str('a\xe4b\u4e2d\u56fd\u7684c')
tests = 'a\xe4b\u4e2d\u56fd\u7684c'
self.assertEqual(sanitize_filename(tests, restricted=True), 'a_b_c')
self.assertTrue(sanitize_filename(_compat_str('\xf6'), restricted=True) != '') # No empty filename
self.assertTrue(sanitize_filename('\xf6', restricted=True) != '') # No empty filename
forbidden = '"\0\\/&!: \'\t\n()[]{}$;`^,#'
for fc in forbidden:
@@ -101,8 +97,8 @@ class TestUtil(unittest.TestCase):
self.assertTrue(fbc not in sanitize_filename(fc, restricted=True))
# Handle a common case more neatly
self.assertEqual(sanitize_filename(_compat_str('\u5927\u58f0\u5e26 - Song'), restricted=True), 'Song')
self.assertEqual(sanitize_filename(_compat_str('\u603b\u7edf: Speech'), restricted=True), 'Speech')
self.assertEqual(sanitize_filename('\u5927\u58f0\u5e26 - Song', restricted=True), 'Song')
self.assertEqual(sanitize_filename('\u603b\u7edf: Speech', restricted=True), 'Speech')
# .. but make sure the file name is never empty
self.assertTrue(sanitize_filename('-', restricted=True) != '')
self.assertTrue(sanitize_filename(':', restricted=True) != '')
@@ -120,7 +116,9 @@ class TestUtil(unittest.TestCase):
self.assertEqual(orderedSet([135, 1, 1, 1]), [135, 1])
def test_unescape_html(self):
self.assertEqual(unescapeHTML(_compat_str('%20;')), _compat_str('%20;'))
self.assertEqual(unescapeHTML('%20;'), '%20;')
self.assertEqual(
unescapeHTML('&eacute;'), 'é')
def test_daterange(self):
_20century = DateRange("19000101","20000101")
@@ -138,7 +136,7 @@ class TestUtil(unittest.TestCase):
self.assertEqual(unified_strdate('1968-12-10'), '19681210')
def test_find_xpath_attr(self):
testxml = u'''<root>
testxml = '''<root>
<node/>
<node x="a"/>
<node x="a" y="c" />
@@ -151,18 +149,18 @@ class TestUtil(unittest.TestCase):
self.assertEqual(find_xpath_attr(doc, './/node', 'y', 'c'), doc[2])
def test_meta_parser(self):
testhtml = u'''
testhtml = '''
<head>
<meta name="description" content="foo &amp; bar">
<meta content='Plato' name='author'/>
</head>
'''
get_meta = lambda name: get_meta_content(name, testhtml)
self.assertEqual(get_meta('description'), u'foo & bar')
self.assertEqual(get_meta('description'), 'foo & bar')
self.assertEqual(get_meta('author'), 'Plato')
def test_xpath_with_ns(self):
testxml = u'''<root xmlns:media="http://example.com/">
testxml = '''<root xmlns:media="http://example.com/">
<media:song>
<media:author>The Author</media:author>
<url>http://server.com/download.mp3</url>
@@ -171,8 +169,8 @@ class TestUtil(unittest.TestCase):
doc = xml.etree.ElementTree.fromstring(testxml)
find = lambda p: doc.find(xpath_with_ns(p, {'media': 'http://example.com/'}))
self.assertTrue(find('media:song') is not None)
self.assertEqual(find('media:song/media:author').text, u'The Author')
self.assertEqual(find('media:song/url').text, u'http://server.com/download.mp3')
self.assertEqual(find('media:song/media:author').text, 'The Author')
self.assertEqual(find('media:song/url').text, 'http://server.com/download.mp3')
def test_smuggle_url(self):
data = {u"ö": u"ö", u"abc": [3]}
@@ -187,22 +185,22 @@ class TestUtil(unittest.TestCase):
self.assertEqual(res_data, None)
def test_shell_quote(self):
args = ['ffmpeg', '-i', encodeFilename(u'ñ€ß\'.mp4')]
self.assertEqual(shell_quote(args), u"""ffmpeg -i 'ñ€ß'"'"'.mp4'""")
args = ['ffmpeg', '-i', encodeFilename('ñ€ß\'.mp4')]
self.assertEqual(shell_quote(args), """ffmpeg -i 'ñ€ß'"'"'.mp4'""")
def test_str_to_int(self):
self.assertEqual(str_to_int('123,456'), 123456)
self.assertEqual(str_to_int('123.456'), 123456)
def test_url_basename(self):
self.assertEqual(url_basename(u'http://foo.de/'), u'')
self.assertEqual(url_basename(u'http://foo.de/bar/baz'), u'baz')
self.assertEqual(url_basename(u'http://foo.de/bar/baz?x=y'), u'baz')
self.assertEqual(url_basename(u'http://foo.de/bar/baz#x=y'), u'baz')
self.assertEqual(url_basename(u'http://foo.de/bar/baz/'), u'baz')
self.assertEqual(url_basename('http://foo.de/'), '')
self.assertEqual(url_basename('http://foo.de/bar/baz'), 'baz')
self.assertEqual(url_basename('http://foo.de/bar/baz?x=y'), 'baz')
self.assertEqual(url_basename('http://foo.de/bar/baz#x=y'), 'baz')
self.assertEqual(url_basename('http://foo.de/bar/baz/'), 'baz')
self.assertEqual(
url_basename(u'http://media.w3.org/2010/05/sintel/trailer.mp4'),
u'trailer.mp4')
url_basename('http://media.w3.org/2010/05/sintel/trailer.mp4'),
'trailer.mp4')
def test_parse_duration(self):
self.assertEqual(parse_duration(None), None)
@@ -256,16 +254,16 @@ class TestUtil(unittest.TestCase):
testPL(5, 2, (20, 99), [])
def test_struct_unpack(self):
self.assertEqual(struct_unpack(u'!B', b'\x00'), (0,))
self.assertEqual(struct_unpack('!B', b'\x00'), (0,))
def test_read_batch_urls(self):
f = io.StringIO(u'''\xef\xbb\xbf foo
f = io.StringIO('''\xef\xbb\xbf foo
bar\r
baz
# More after this line\r
; or after this
bam''')
self.assertEqual(read_batch_urls(f), [u'foo', u'bar', u'baz', u'bam'])
self.assertEqual(read_batch_urls(f), ['foo', 'bar', 'baz', 'bam'])
def test_urlencode_postdata(self):
data = urlencode_postdata({'username': 'foo@bar.com', 'password': '1234'})
@@ -282,8 +280,8 @@ class TestUtil(unittest.TestCase):
self.assertEqual(d, [{"id": "532cb", "x": 3}])
def test_uppercase_escape(self):
self.assertEqual(uppercase_escape(u''), u'')
self.assertEqual(uppercase_escape(u'\\U0001d550'), u'𝕐')
self.assertEqual(uppercase_escape(''), '')
self.assertEqual(uppercase_escape('\\U0001d550'), '𝕐')
if __name__ == '__main__':
unittest.main()

View File

@@ -3,6 +3,7 @@ import subprocess
from .common import FileDownloader
from ..utils import (
check_executable,
encodeFilename,
)
@@ -19,13 +20,11 @@ class HlsFD(FileDownloader):
encodeFilename(tmpfilename, for_subprocess=True)]
for program in ['avconv', 'ffmpeg']:
try:
subprocess.call([program, '-version'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
if check_executable(program, ['-version']):
break
except (OSError, IOError):
pass
else:
self.report_error(u'm3u8 download detected but ffmpeg or avconv could not be found. Please install one.')
return False
cmd = [program] + args
retval = subprocess.call(cmd)
@@ -42,5 +41,5 @@ class HlsFD(FileDownloader):
return True
else:
self.to_stderr(u"\n")
self.report_error(u'ffmpeg exited with code %d' % retval)
self.report_error(u'%s exited with code %d' % (program, retval))
return False

View File

@@ -8,9 +8,10 @@ import time
from .common import FileDownloader
from ..utils import (
check_executable,
compat_str,
encodeFilename,
format_bytes,
compat_str,
)
@@ -103,9 +104,7 @@ class RtmpFD(FileDownloader):
test = self.params.get('test', False)
# Check for rtmpdump first
try:
subprocess.call(['rtmpdump', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
except (OSError, IOError):
if not check_executable('rtmpdump', ['-h']):
self.report_error('RTMP download detected but "rtmpdump" could not be run. Please install it.')
return False

View File

@@ -257,6 +257,7 @@ from .podomatic import PodomaticIE
from .pornhd import PornHdIE
from .pornhub import PornHubIE
from .pornotube import PornotubeIE
from .promptfile import PromptFileIE
from .prosiebensat1 import ProSiebenSat1IE
from .pyvideo import PyvideoIE
from .radiofrance import RadioFranceIE

View File

@@ -7,6 +7,15 @@ from .common import InfoExtractor
class AcademicEarthCourseIE(InfoExtractor):
_VALID_URL = r'^https?://(?:www\.)?academicearth\.org/playlists/(?P<id>[^?#/]+)'
IE_NAME = 'AcademicEarth:Course'
_TEST = {
'url': 'http://academicearth.org/playlists/laws-of-nature/',
'info_dict': {
'id': 'laws-of-nature',
'title': 'Laws of Nature',
'description': 'Introduce yourself to the laws of nature with these free online college lectures from Yale, Harvard, and MIT.',
},
'playlist_count': 4,
}
def _real_extract(self, url):
m = re.match(self._VALID_URL, url)

View File

@@ -21,7 +21,7 @@ class AolIE(InfoExtractor):
(?:$|\?)
'''
_TEST = {
_TESTS = [{
'url': 'http://on.aol.com/video/u-s--official-warns-of-largest-ever-irs-phone-scam-518167793?icid=OnHomepageC2Wide_MustSee_Img',
'md5': '18ef68f48740e86ae94b98da815eec42',
'info_dict': {
@@ -30,7 +30,14 @@ class AolIE(InfoExtractor):
'title': 'U.S. Official Warns Of \'Largest Ever\' IRS Phone Scam',
},
'add_ie': ['FiveMin'],
}
}, {
'url': 'http://on.aol.com/playlist/brace-yourself---todays-weirdest-news-152147?icid=OnHomepageC4_Omg_Img#_videoid=518184316',
'info_dict': {
'id': '152147',
'title': 'Brace Yourself - Today\'s Weirdest News',
},
'playlist_mincount': 10,
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)

View File

@@ -59,6 +59,13 @@ class BambuserChannelIE(InfoExtractor):
_VALID_URL = r'https?://bambuser\.com/channel/(?P<user>.*?)(?:/|#|\?|$)'
# The maximum number we can get with each request
_STEP = 50
_TEST = {
'url': 'http://bambuser.com/channel/pixelversity',
'info_dict': {
'title': 'pixelversity',
},
'playlist_mincount': 60,
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
@@ -73,10 +80,10 @@ class BambuserChannelIE(InfoExtractor):
req = compat_urllib_request.Request(req_url)
# Without setting this header, we wouldn't get any result
req.add_header('Referer', 'http://bambuser.com/channel/%s' % user)
info_json = self._download_webpage(req, user,
'Downloading page %d' % i)
results = json.loads(info_json)['result']
if len(results) == 0:
data = self._download_json(
req, user, 'Downloading page %d' % i)
results = data['result']
if not results:
break
last_id = results[-1]['vid']
urls.extend(self.url_result(v['page'], 'Bambuser') for v in results)

View File

@@ -96,7 +96,7 @@ class BandcampAlbumIE(InfoExtractor):
IE_NAME = 'Bandcamp:album'
_VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<title>[^?#]+))'
_TEST = {
_TESTS = [{
'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
'playlist': [
{
@@ -118,7 +118,13 @@ class BandcampAlbumIE(InfoExtractor):
'playlistend': 2
},
'skip': 'Bandcamp imposes download limits. See test_playlists:test_bandcamp_album for the playlist test'
}
}, {
'url': 'http://nightbringer.bandcamp.com/album/hierophany-of-the-open-grave',
'info_dict': {
'title': 'Hierophany of the Open Grave',
},
'playlist_mincount': 9,
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)

View File

@@ -25,7 +25,7 @@ class CBSIE(InfoExtractor):
}, {
'url': 'http://www.cbs.com/shows/liveonletterman/artist/221752/st-vincent/',
'info_dict': {
'id': 'P9gjWjelt6iP',
'id': 'WWF_5KqY3PK1',
'ext': 'flv',
'title': 'Live on Letterman - St. Vincent',
'description': 'Live On Letterman: St. Vincent in concert from New York\'s Ed Sullivan Theater on Tuesday, July 16, 2014.',

View File

@@ -1,3 +1,5 @@
from __future__ import unicode_literals
import base64
import hashlib
import json
@@ -202,17 +204,17 @@ class InfoExtractor(object):
self.report_download_webpage(video_id)
elif note is not False:
if video_id is None:
self.to_screen(u'%s' % (note,))
self.to_screen('%s' % (note,))
else:
self.to_screen(u'%s: %s' % (video_id, note))
self.to_screen('%s: %s' % (video_id, note))
try:
return self._downloader.urlopen(url_or_request)
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
if errnote is False:
return False
if errnote is None:
errnote = u'Unable to download webpage'
errmsg = u'%s: %s' % (errnote, compat_str(err))
errnote = 'Unable to download webpage'
errmsg = '%s: %s' % (errnote, compat_str(err))
if fatal:
raise ExtractorError(errmsg, sys.exc_info()[2], cause=err)
else:
@@ -249,7 +251,7 @@ class InfoExtractor(object):
url = url_or_request.get_full_url()
except AttributeError:
url = url_or_request
self.to_screen(u'Dumping request to ' + url)
self.to_screen('Dumping request to ' + url)
dump = base64.b64encode(webpage_bytes).decode('ascii')
self._downloader.to_screen(dump)
if self._downloader.params.get('write_pages', False):
@@ -259,11 +261,11 @@ class InfoExtractor(object):
url = url_or_request
basen = '%s_%s' % (video_id, url)
if len(basen) > 240:
h = u'___' + hashlib.md5(basen.encode('utf-8')).hexdigest()
h = '___' + hashlib.md5(basen.encode('utf-8')).hexdigest()
basen = basen[:240 - len(h)] + h
raw_filename = basen + '.dump'
filename = sanitize_filename(raw_filename, restricted=True)
self.to_screen(u'Saving request to ' + filename)
self.to_screen('Saving request to ' + filename)
with open(filename, 'wb') as outf:
outf.write(webpage_bytes)
@@ -272,14 +274,14 @@ class InfoExtractor(object):
except LookupError:
content = webpage_bytes.decode('utf-8', 'replace')
if (u'<title>Access to this site is blocked</title>' in content and
u'Websense' in content[:512]):
msg = u'Access to this webpage has been blocked by Websense filtering software in your network.'
if ('<title>Access to this site is blocked</title>' in content and
'Websense' in content[:512]):
msg = 'Access to this webpage has been blocked by Websense filtering software in your network.'
blocked_iframe = self._html_search_regex(
r'<iframe src="([^"]+)"', content,
u'Websense information URL', default=None)
'Websense information URL', default=None)
if blocked_iframe:
msg += u' Visit %s for more details' % blocked_iframe
msg += ' Visit %s for more details' % blocked_iframe
raise ExtractorError(msg, expected=True)
return (content, urlh)
@@ -294,7 +296,7 @@ class InfoExtractor(object):
return content
def _download_xml(self, url_or_request, video_id,
note=u'Downloading XML', errnote=u'Unable to download XML',
note='Downloading XML', errnote='Unable to download XML',
transform_source=None, fatal=True):
"""Return the xml as an xml.etree.ElementTree.Element"""
xml_string = self._download_webpage(
@@ -306,8 +308,8 @@ class InfoExtractor(object):
return xml.etree.ElementTree.fromstring(xml_string.encode('utf-8'))
def _download_json(self, url_or_request, video_id,
note=u'Downloading JSON metadata',
errnote=u'Unable to download JSON metadata',
note='Downloading JSON metadata',
errnote='Unable to download JSON metadata',
transform_source=None,
fatal=True):
json_string = self._download_webpage(
@@ -322,29 +324,29 @@ class InfoExtractor(object):
raise ExtractorError('Failed to download JSON', cause=ve)
def report_warning(self, msg, video_id=None):
idstr = u'' if video_id is None else u'%s: ' % video_id
idstr = '' if video_id is None else '%s: ' % video_id
self._downloader.report_warning(
u'[%s] %s%s' % (self.IE_NAME, idstr, msg))
'[%s] %s%s' % (self.IE_NAME, idstr, msg))
def to_screen(self, msg):
"""Print msg to screen, prefixing it with '[ie_name]'"""
self._downloader.to_screen(u'[%s] %s' % (self.IE_NAME, msg))
self._downloader.to_screen('[%s] %s' % (self.IE_NAME, msg))
def report_extraction(self, id_or_name):
"""Report information extraction."""
self.to_screen(u'%s: Extracting information' % id_or_name)
self.to_screen('%s: Extracting information' % id_or_name)
def report_download_webpage(self, video_id):
"""Report webpage download."""
self.to_screen(u'%s: Downloading webpage' % video_id)
self.to_screen('%s: Downloading webpage' % video_id)
def report_age_confirmation(self):
"""Report attempt to confirm age."""
self.to_screen(u'Confirming age')
self.to_screen('Confirming age')
def report_login(self):
"""Report attempt to log in."""
self.to_screen(u'Logging in')
self.to_screen('Logging in')
#Methods for following #608
@staticmethod
@@ -384,7 +386,7 @@ class InfoExtractor(object):
break
if os.name != 'nt' and sys.stderr.isatty():
_name = u'\033[0;34m%s\033[0m' % name
_name = '\033[0;34m%s\033[0m' % name
else:
_name = name
@@ -394,10 +396,10 @@ class InfoExtractor(object):
elif default is not _NO_DEFAULT:
return default
elif fatal:
raise RegexNotFoundError(u'Unable to extract %s' % _name)
raise RegexNotFoundError('Unable to extract %s' % _name)
else:
self._downloader.report_warning(u'unable to extract %s; '
u'please report this issue on http://yt-dl.org/bug' % _name)
self._downloader.report_warning('unable to extract %s; '
'please report this issue on http://yt-dl.org/bug' % _name)
return None
def _html_search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0):
@@ -436,7 +438,7 @@ class InfoExtractor(object):
else:
raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
except (IOError, netrc.NetrcParseError) as err:
self._downloader.report_warning(u'parsing .netrc: %s' % compat_str(err))
self._downloader.report_warning('parsing .netrc: %s' % compat_str(err))
return (username, password)
@@ -476,7 +478,7 @@ class InfoExtractor(object):
return unescapeHTML(escaped)
def _og_search_thumbnail(self, html, **kargs):
return self._og_search_property('image', html, u'thumbnail url', fatal=False, **kargs)
return self._og_search_property('image', html, 'thumbnail url', fatal=False, **kargs)
def _og_search_description(self, html, **kargs):
return self._og_search_property('description', html, fatal=False, **kargs)
@@ -535,7 +537,7 @@ class InfoExtractor(object):
def _sort_formats(self, formats):
if not formats:
raise ExtractorError(u'No video formats found')
raise ExtractorError('No video formats found')
def _formats_key(f):
# TODO remove the following workaround
@@ -555,9 +557,9 @@ class InfoExtractor(object):
if f.get('vcodec') == 'none': # audio only
if self._downloader.params.get('prefer_free_formats'):
ORDER = [u'aac', u'mp3', u'm4a', u'webm', u'ogg', u'opus']
ORDER = ['aac', 'mp3', 'm4a', 'webm', 'ogg', 'opus']
else:
ORDER = [u'webm', u'opus', u'ogg', u'mp3', u'aac', u'm4a']
ORDER = ['webm', 'opus', 'ogg', 'mp3', 'aac', 'm4a']
ext_preference = 0
try:
audio_ext_preference = ORDER.index(f['ext'])
@@ -565,9 +567,9 @@ class InfoExtractor(object):
audio_ext_preference = -1
else:
if self._downloader.params.get('prefer_free_formats'):
ORDER = [u'flv', u'mp4', u'webm']
ORDER = ['flv', 'mp4', 'webm']
else:
ORDER = [u'webm', u'flv', u'mp4']
ORDER = ['webm', 'flv', 'mp4']
try:
ext_preference = ORDER.index(f['ext'])
except ValueError:
@@ -609,7 +611,7 @@ class InfoExtractor(object):
def _sleep(self, timeout, video_id, msg_template=None):
if msg_template is None:
msg_template = u'%(video_id)s: Waiting for %(timeout)s seconds'
msg_template = '%(video_id)s: Waiting for %(timeout)s seconds'
msg = msg_template % {'video_id': video_id, 'timeout': timeout}
self.to_screen(msg)
time.sleep(timeout)
@@ -662,6 +664,9 @@ class InfoExtractor(object):
elif line.startswith('#') or not line.strip():
continue
else:
if last_info is None:
formats.append({'url': line})
continue
tbr = int_or_none(last_info.get('BANDWIDTH'), scale=1000)
f = {
@@ -704,7 +709,7 @@ class SearchInfoExtractor(InfoExtractor):
def _real_extract(self, query):
mobj = re.match(self._make_valid_url(), query)
if mobj is None:
raise ExtractorError(u'Invalid search query "%s"' % query)
raise ExtractorError('Invalid search query "%s"' % query)
prefix = mobj.group('prefix')
query = mobj.group('query')
@@ -715,9 +720,9 @@ class SearchInfoExtractor(InfoExtractor):
else:
n = int(prefix)
if n <= 0:
raise ExtractorError(u'invalid download number %s for query "%s"' % (n, query))
raise ExtractorError('invalid download number %s for query "%s"' % (n, query))
elif n > self._MAX_RESULTS:
self._downloader.report_warning(u'%s returns max %i results (you requested %i)' % (self._SEARCH_KEY, self._MAX_RESULTS, n))
self._downloader.report_warning('%s returns max %i results (you requested %i)' % (self._SEARCH_KEY, self._MAX_RESULTS, n))
n = self._MAX_RESULTS
return self._get_n_results(query, n)

View File

@@ -34,6 +34,13 @@ class CSpanIE(InfoExtractor):
'title': 'International Health Care Models',
'description': 'md5:7a985a2d595dba00af3d9c9f0783c967',
}
}, {
'url': 'http://www.c-span.org/video/?318608-1/gm-ignition-switch-recall',
'info_dict': {
'id': '342759',
'title': 'General Motors Ignition Switch Recall',
},
'playlist_duration_sum': 14855,
}]
def _real_extract(self, url):

View File

@@ -1,3 +1,6 @@
#coding: utf-8
from __future__ import unicode_literals
import re
import json
import itertools
@@ -28,51 +31,53 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
"""Information Extractor for Dailymotion"""
_VALID_URL = r'(?i)(?:https?://)?(?:(www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(embed|#)/)?video/(?P<id>[^/?_]+)'
IE_NAME = u'dailymotion'
IE_NAME = 'dailymotion'
_FORMATS = [
(u'stream_h264_ld_url', u'ld'),
(u'stream_h264_url', u'standard'),
(u'stream_h264_hq_url', u'hq'),
(u'stream_h264_hd_url', u'hd'),
(u'stream_h264_hd1080_url', u'hd180'),
('stream_h264_ld_url', 'ld'),
('stream_h264_url', 'standard'),
('stream_h264_hq_url', 'hq'),
('stream_h264_hd_url', 'hd'),
('stream_h264_hd1080_url', 'hd180'),
]
_TESTS = [
{
u'url': u'http://www.dailymotion.com/video/x33vw9_tutoriel-de-youtubeur-dl-des-video_tech',
u'file': u'x33vw9.mp4',
u'md5': u'392c4b85a60a90dc4792da41ce3144eb',
u'info_dict': {
u"uploader": u"Amphora Alex and Van .",
u"title": u"Tutoriel de Youtubeur\"DL DES VIDEO DE YOUTUBE\""
'url': 'http://www.dailymotion.com/video/x33vw9_tutoriel-de-youtubeur-dl-des-video_tech',
'md5': '392c4b85a60a90dc4792da41ce3144eb',
'info_dict': {
'id': 'x33vw9',
'ext': 'mp4',
'uploader': 'Amphora Alex and Van .',
'title': 'Tutoriel de Youtubeur"DL DES VIDEO DE YOUTUBE"',
}
},
# Vevo video
{
u'url': u'http://www.dailymotion.com/video/x149uew_katy-perry-roar-official_musi',
u'file': u'USUV71301934.mp4',
u'info_dict': {
u'title': u'Roar (Official)',
u'uploader': u'Katy Perry',
u'upload_date': u'20130905',
'url': 'http://www.dailymotion.com/video/x149uew_katy-perry-roar-official_musi',
'info_dict': {
'title': 'Roar (Official)',
'id': 'USUV71301934',
'ext': 'mp4',
'uploader': 'Katy Perry',
'upload_date': '20130905',
},
u'params': {
u'skip_download': True,
'params': {
'skip_download': True,
},
u'skip': u'VEVO is only available in some countries',
'skip': 'VEVO is only available in some countries',
},
# age-restricted video
{
u'url': u'http://www.dailymotion.com/video/xyh2zz_leanna-decker-cyber-girl-of-the-year-desires-nude-playboy-plus_redband',
u'file': u'xyh2zz.mp4',
u'md5': u'0d667a7b9cebecc3c89ee93099c4159d',
u'info_dict': {
u'title': 'Leanna Decker - Cyber Girl Of The Year Desires Nude [Playboy Plus]',
u'uploader': 'HotWaves1012',
u'age_limit': 18,
'url': 'http://www.dailymotion.com/video/xyh2zz_leanna-decker-cyber-girl-of-the-year-desires-nude-playboy-plus_redband',
'md5': '0d667a7b9cebecc3c89ee93099c4159d',
'info_dict': {
'id': 'xyh2zz',
'ext': 'mp4',
'title': 'Leanna Decker - Cyber Girl Of The Year Desires Nude [Playboy Plus]',
'uploader': 'HotWaves1012',
'age_limit': 18,
}
}
]
@@ -97,8 +102,8 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
webpage)
if m_vevo is not None:
vevo_id = m_vevo.group('id')
self.to_screen(u'Vevo video detected: %s' % vevo_id)
return self.url_result(u'vevo:%s' % vevo_id, ie='Vevo')
self.to_screen('Vevo video detected: %s' % vevo_id)
return self.url_result('vevo:%s' % vevo_id, ie='Vevo')
age_limit = self._rta_search(webpage)
@@ -109,7 +114,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
embed_url = 'http://www.dailymotion.com/embed/video/%s' % video_id
embed_page = self._download_webpage(embed_url, video_id,
u'Downloading embed page')
'Downloading embed page')
info = self._search_regex(r'var info = ({.*?}),$', embed_page,
'video info', flags=re.MULTILINE)
info = json.loads(info)
@@ -134,7 +139,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
'height': height,
})
if not formats:
raise ExtractorError(u'Unable to extract video URL')
raise ExtractorError('Unable to extract video URL')
# subtitles
video_subtitles = self.extract_subtitles(video_id, webpage)
@@ -143,7 +148,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
return
view_count = self._search_regex(
r'video_views_count[^>]+>\s+([\d\.,]+)', webpage, u'view count', fatal=False)
r'video_views_count[^>]+>\s+([\d\.,]+)', webpage, 'view count', fatal=False)
if view_count is not None:
view_count = str_to_int(view_count)
@@ -165,28 +170,35 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
'https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id,
video_id, note=False)
except ExtractorError as err:
self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))
self._downloader.report_warning('unable to download video subtitles: %s' % compat_str(err))
return {}
info = json.loads(sub_list)
if (info['total'] > 0):
sub_lang_list = dict((l['language'], l['url']) for l in info['list'])
return sub_lang_list
self._downloader.report_warning(u'video doesn\'t have subtitles')
self._downloader.report_warning('video doesn\'t have subtitles')
return {}
class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
IE_NAME = u'dailymotion:playlist'
IE_NAME = 'dailymotion:playlist'
_VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>.+?)/'
_MORE_PAGES_INDICATOR = r'(?s)<div class="pages[^"]*">.*?<a\s+class="[^"]*?icon-arrow_right[^"]*?"'
_PAGE_TEMPLATE = 'https://www.dailymotion.com/playlist/%s/%s'
_TESTS = [{
'url': 'http://www.dailymotion.com/playlist/xv4bw_nqtv_sport/1#video=xl8v3q',
'info_dict': {
'title': 'SPORT',
},
'playlist_mincount': 20,
}]
def _extract_entries(self, id):
video_ids = []
for pagenum in itertools.count(1):
request = self._build_request(self._PAGE_TEMPLATE % (id, pagenum))
webpage = self._download_webpage(request,
id, u'Downloading page %s' % pagenum)
id, 'Downloading page %s' % pagenum)
video_ids.extend(re.findall(r'data-xid="(.+?)"', webpage))
@@ -209,9 +221,17 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
class DailymotionUserIE(DailymotionPlaylistIE):
IE_NAME = u'dailymotion:user'
IE_NAME = 'dailymotion:user'
_VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/user/(?P<user>[^/]+)'
_PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s'
_TESTS = [{
'url': 'https://www.dailymotion.com/user/nqtv',
'info_dict': {
'id': 'nqtv',
'title': 'Rémi Gaillard',
},
'playlist_mincount': 100,
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
@@ -219,7 +239,7 @@ class DailymotionUserIE(DailymotionPlaylistIE):
webpage = self._download_webpage(url, user)
full_user = unescapeHTML(self._html_search_regex(
r'<a class="nav-image" title="([^"]+)" href="/%s">' % re.escape(user),
webpage, u'user', flags=re.DOTALL))
webpage, 'user'))
return {
'_type': 'playlist',

View File

@@ -11,8 +11,7 @@ from ..utils import compat_urllib_parse_unquote
class DropboxIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?dropbox[.]com/s/(?P<id>[a-zA-Z0-9]{15})/(?P<title>[^?#]*)'
_TEST = {
'url': 'https://www.dropbox.com/s/nelirfsxnmcfbfh/youtube-dl%20test%20video%20%27%C3%A4%22BaW_jenozKc.mp4',
'md5': '8a3d905427a6951ccb9eb292f154530b',
'url': 'https://www.dropbox.com/s/nelirfsxnmcfbfh/youtube-dl%20test%20video%20%27%C3%A4%22BaW_jenozKc.mp4?dl=0',
'info_dict': {
'id': 'nelirfsxnmcfbfh',
'ext': 'mp4',
@@ -25,7 +24,9 @@ class DropboxIE(InfoExtractor):
video_id = mobj.group('id')
fn = compat_urllib_parse_unquote(mobj.group('title'))
title = os.path.splitext(fn)[0]
video_url = url + '?dl=1'
video_url = (
re.sub(r'[?&]dl=0', '', url) +
('?' if '?' in url else '&') + 'dl=1')
return {
'id': video_id,

View File

@@ -12,10 +12,11 @@ from ..utils import (
class EveryonesMixtapeIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?everyonesmixtape\.com/#/mix/(?P<id>[0-9a-zA-Z]+)(?:/(?P<songnr>[0-9]))?$'
_TEST = {
_TESTS = [{
'url': 'http://everyonesmixtape.com/#/mix/m7m0jJAbMQi/5',
'file': '5bfseWNmlds.mp4',
"info_dict": {
'id': '5bfseWNmlds',
'ext': 'mp4',
"title": "Passion Pit - \"Sleepyhead\" (Official Music Video)",
"uploader": "FKR.TV",
"uploader_id": "frenchkissrecords",
@@ -25,7 +26,14 @@ class EveryonesMixtapeIE(InfoExtractor):
'params': {
'skip_download': True, # This is simply YouTube
}
}
}, {
'url': 'http://everyonesmixtape.com/#/mix/m7m0jJAbMQi',
'info_dict': {
'id': 'm7m0jJAbMQi',
'title': 'Driving',
},
'playlist_count': 24
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)

View File

@@ -21,14 +21,14 @@ class FacebookIE(InfoExtractor):
_VALID_URL = r'''(?x)
https?://(?:\w+\.)?facebook\.com/
(?:[^#]*?\#!/)?
(?:video/video\.php|photo\.php|video/embed)\?(?:.*?)
(?:video/video\.php|photo\.php|video\.php|video/embed)\?(?:.*?)
(?:v|video_id)=(?P<id>[0-9]+)
(?:.*)'''
_LOGIN_URL = 'https://www.facebook.com/login.php?next=http%3A%2F%2Ffacebook.com%2Fhome.php&login_attempt=1'
_CHECKPOINT_URL = 'https://www.facebook.com/checkpoint/?next=http%3A%2F%2Ffacebook.com%2Fhome.php&_fb_noscript=1'
_NETRC_MACHINE = 'facebook'
IE_NAME = 'facebook'
_TEST = {
_TESTS = [{
'url': 'https://www.facebook.com/photo.php?v=120708114770723',
'md5': '48975a41ccc4b7a581abd68651c1a5a8',
'info_dict': {
@@ -37,7 +37,10 @@ class FacebookIE(InfoExtractor):
'duration': 279,
'title': 'PEOPLE ARE AWESOME 2013',
}
}
}, {
'url': 'https://www.facebook.com/video.php?v=10204634152394104',
'only_matching': True,
}]
def _login(self):
(useremail, password) = self._get_login_info()

View File

@@ -12,6 +12,7 @@ from ..utils import (
compat_urlparse,
compat_xml_parse_error,
determine_ext,
ExtractorError,
float_or_none,
HEADRequest,
@@ -351,6 +352,20 @@ class GenericIE(InfoExtractor):
'description': 're:'
},
'playlist_mincount': 11,
},
# Multiple brightcove videos
# https://github.com/rg3/youtube-dl/issues/2283
{
'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
'info_dict': {
'id': 'always-never',
'title': 'Always / Never - The New Yorker',
},
'playlist_count': 3,
'params': {
'extract_flat': False,
'skip_download': True,
}
}
]

View File

@@ -63,6 +63,14 @@ class ImdbListIE(InfoExtractor):
IE_NAME = 'imdb:list'
IE_DESC = 'Internet Movie Database lists'
_VALID_URL = r'http://www\.imdb\.com/list/(?P<id>[\da-zA-Z_-]{11})'
_TEST = {
'url': 'http://www.imdb.com/list/JFs9NWw6XI0',
'info_dict': {
'id': 'JFs9NWw6XI0',
'title': 'March 23, 2012 Releases',
},
'playlist_count': 7,
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)

View File

@@ -46,6 +46,30 @@ class InstagramUserIE(InfoExtractor):
_VALID_URL = r'http://instagram\.com/(?P<username>[^/]{2,})/?(?:$|[?#])'
IE_DESC = 'Instagram user profile'
IE_NAME = 'instagram:user'
_TEST = {
'url': 'http://instagram.com/porsche',
'info_dict': {
'id': 'porsche',
'title': 'porsche',
},
'playlist_mincount': 2,
'playlist': [{
'info_dict': {
'id': '614605558512799803_462752227',
'ext': 'mp4',
'title': '#Porsche Intelligent Performance.',
'thumbnail': 're:^https?://.*\.jpg',
'uploader': 'Porsche',
'uploader_id': 'porsche',
'timestamp': 1387486713,
'upload_date': '20131219',
},
}],
'params': {
'extract_flat': True,
'skip_download': True,
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)

View File

@@ -127,6 +127,21 @@ class IviCompilationIE(InfoExtractor):
IE_DESC = 'ivi.ru compilations'
IE_NAME = 'ivi:compilation'
_VALID_URL = r'https?://(?:www\.)?ivi\.ru/watch/(?!\d+)(?P<compilationid>[a-z\d_-]+)(?:/season(?P<seasonid>\d+))?$'
_TESTS = [{
'url': 'http://www.ivi.ru/watch/dvoe_iz_lartsa',
'info_dict': {
'id': 'dvoe_iz_lartsa',
'title': 'Двое из ларца (2006 - 2008)',
},
'playlist_mincount': 24,
}, {
'url': 'http://www.ivi.ru/watch/dvoe_iz_lartsa/season1',
'info_dict': {
'id': 'dvoe_iz_lartsa/season1',
'title': 'Двое из ларца (2006 - 2008) 1 сезон',
},
'playlist_mincount': 12,
}]
def _extract_entries(self, html, compilation_id):
return [self.url_result('http://www.ivi.ru/watch/%s/%s' % (compilation_id, serie), 'Ivi')

View File

@@ -12,18 +12,27 @@ class KhanAcademyIE(InfoExtractor):
_VALID_URL = r'^https?://(?:www\.)?khanacademy\.org/(?P<key>[^/]+)/(?:[^/]+/){,2}(?P<id>[^?#/]+)(?:$|[?#])'
IE_NAME = 'KhanAcademy'
_TEST = {
_TESTS = [{
'url': 'http://www.khanacademy.org/video/one-time-pad',
'file': 'one-time-pad.mp4',
'md5': '7021db7f2d47d4fff89b13177cb1e8f4',
'info_dict': {
'id': 'one-time-pad',
'ext': 'mp4',
'title': 'The one-time pad',
'description': 'The perfect cipher',
'duration': 176,
'uploader': 'Brit Cruise',
'upload_date': '20120411',
}
}
}, {
'url': 'https://www.khanacademy.org/math/applied-math/cryptography',
'info_dict': {
'id': 'cryptography',
'title': 'Journey into cryptography',
'description': 'How have humans protected their secret messages through history? What has changed today?',
},
'playlist_mincount': 3,
}]
def _real_extract(self, url):
m = re.match(self._VALID_URL, url)

View File

@@ -19,7 +19,7 @@ from ..utils import (
class LivestreamIE(InfoExtractor):
IE_NAME = 'livestream'
_VALID_URL = r'http://new\.livestream\.com/.*?/(?P<event_name>.*?)(/videos/(?P<id>\d+))?/?$'
_TEST = {
_TESTS = [{
'url': 'http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370',
'md5': '53274c76ba7754fb0e8d072716f2292b',
'info_dict': {
@@ -31,7 +31,13 @@ class LivestreamIE(InfoExtractor):
'view_count': int,
'thumbnail': 're:^http://.*\.jpg$'
}
}
}, {
'url': 'http://new.livestream.com/tedx/cityenglish',
'info_dict': {
'title': 'TEDCity2.0 (English)',
},
'playlist_mincount': 4,
}]
def _parse_smil(self, video_id, smil_url):
formats = []
@@ -111,34 +117,37 @@ class LivestreamIE(InfoExtractor):
event_name = mobj.group('event_name')
webpage = self._download_webpage(url, video_id or event_name)
og_video = self._og_search_video_url(webpage, 'player url', fatal=False, default=None)
if og_video is None:
config_json = self._search_regex(
r'window.config = ({.*?});', webpage, 'window config')
info = json.loads(config_json)['event']
def is_relevant(vdata, vid):
result = vdata['type'] == 'video'
if video_id is not None:
result = result and compat_str(vdata['data']['id']) == vid
return result
videos = [self._extract_video_info(video_data['data'])
for video_data in info['feed']['data']
if is_relevant(video_data, video_id)]
if video_id is None:
# This is an event page:
return self.playlist_result(videos, info['id'], info['full_name'])
else:
if videos:
return videos[0]
else:
og_video = self._og_search_video_url(
webpage, 'player url', fatal=False, default=None)
if og_video is not None:
query_str = compat_urllib_parse_urlparse(og_video).query
query = compat_urlparse.parse_qs(query_str)
api_url = query['play_url'][0].replace('.smil', '')
info = json.loads(self._download_webpage(
api_url, video_id, 'Downloading video info'))
return self._extract_video_info(info)
if 'play_url' in query:
api_url = query['play_url'][0].replace('.smil', '')
info = json.loads(self._download_webpage(
api_url, video_id, 'Downloading video info'))
return self._extract_video_info(info)
config_json = self._search_regex(
r'window.config = ({.*?});', webpage, 'window config')
info = json.loads(config_json)['event']
def is_relevant(vdata, vid):
result = vdata['type'] == 'video'
if video_id is not None:
result = result and compat_str(vdata['data']['id']) == vid
return result
videos = [self._extract_video_info(video_data['data'])
for video_data in info['feed']['data']
if is_relevant(video_data, video_id)]
if video_id is None:
# This is an event page:
return self.playlist_result(videos, info['id'], info['full_name'])
else:
if not videos:
raise ExtractorError('Cannot find video %s' % video_id)
return videos[0]
# The original version of Livestream uses a different system
@@ -148,7 +157,7 @@ class LivestreamOriginalIE(InfoExtractor):
(?P<user>[^/]+)/(?P<type>video|folder)
(?:\?.*?Id=|/)(?P<id>.*?)(&|$)
'''
_TEST = {
_TESTS = [{
'url': 'http://www.livestream.com/dealbook/video?clipId=pla_8aa4a3f1-ba15-46a4-893b-902210e138fb',
'info_dict': {
'id': 'pla_8aa4a3f1-ba15-46a4-893b-902210e138fb',
@@ -159,7 +168,13 @@ class LivestreamOriginalIE(InfoExtractor):
# rtmp
'skip_download': True,
},
}
}, {
'url': 'https://www.livestream.com/newplay/folder?dirId=a07bf706-d0e4-4e75-a747-b021d84f2fd3',
'info_dict': {
'id': 'a07bf706-d0e4-4e75-a747-b021d84f2fd3',
},
'playlist_mincount': 4,
}]
def _extract_video(self, user, video_id):
api_url = 'http://x{0}x.api.channel.livestream.com/2.0/clipdetails?extendedInfo=true&id={1}'.format(user, video_id)
@@ -182,15 +197,19 @@ class LivestreamOriginalIE(InfoExtractor):
def _extract_folder(self, url, folder_id):
webpage = self._download_webpage(url, folder_id)
urls = orderedSet(re.findall(r'<a href="(https?://livestre\.am/.*?)"', webpage))
paths = orderedSet(re.findall(
r'''(?x)(?:
<li\s+class="folder">\s*<a\s+href="|
<a\s+href="(?=https?://livestre\.am/)
)([^"]+)"''', webpage))
return {
'_type': 'playlist',
'id': folder_id,
'entries': [{
'_type': 'url',
'url': video_url,
} for video_url in urls],
'url': compat_urlparse.urljoin(url, p),
} for p in paths],
}
def _real_extract(self, url):

View File

@@ -1,11 +1,9 @@
from __future__ import unicode_literals
import json
import re
from .common import InfoExtractor
from ..utils import (
strip_jsonp,
parse_duration,
unified_strdate,
)

View File

@@ -1,3 +1,5 @@
from __future__ import unicode_literals
import re
import json
@@ -25,8 +27,8 @@ class NHLBaseInfoExtractor(InfoExtractor):
'path': initial_video_url.replace('.mp4', '_sd.mp4'),
})
path_url = 'http://video.nhl.com/videocenter/servlets/encryptvideopath?' + data
path_doc = self._download_xml(path_url, video_id,
u'Downloading final video url')
path_doc = self._download_xml(
path_url, video_id, 'Downloading final video url')
video_url = path_doc.find('path').text
join = compat_urlparse.urljoin
@@ -43,17 +45,18 @@ class NHLBaseInfoExtractor(InfoExtractor):
class NHLIE(NHLBaseInfoExtractor):
IE_NAME = u'nhl.com'
_VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/console\?.*?(?<=[?&])id=(?P<id>\d+)'
IE_NAME = 'nhl.com'
_VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/console\?.*?(?:[?&])id=(?P<id>[0-9]+)'
_TEST = {
u'url': u'http://video.canucks.nhl.com/videocenter/console?catid=6?id=453614',
u'file': u'453614.mp4',
u'info_dict': {
u'title': u'Quick clip: Weise 4-3 goal vs Flames',
u'description': u'Dale Weise scores his first of the season to put the Canucks up 4-3.',
u'duration': 18,
u'upload_date': u'20131006',
'url': 'http://video.canucks.nhl.com/videocenter/console?catid=6?id=453614',
'info_dict': {
'id': '453614',
'ext': 'mp4',
'title': 'Quick clip: Weise 4-3 goal vs Flames',
'description': 'Dale Weise scores his first of the season to put the Canucks up 4-3.',
'duration': 18,
'upload_date': '20131006',
},
}
@@ -61,23 +64,23 @@ class NHLIE(NHLBaseInfoExtractor):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
json_url = 'http://video.nhl.com/videocenter/servlets/playlist?ids=%s&format=json' % video_id
info_json = self._download_webpage(json_url, video_id,
u'Downloading info json')
info_json = self._fix_json(info_json)
info = json.loads(info_json)[0]
return self._extract_video(info)
data = self._download_json(
json_url, video_id, transform_source=self._fix_json)
return self._extract_video(data[0])
class NHLVideocenterIE(NHLBaseInfoExtractor):
IE_NAME = u'nhl.com:videocenter'
IE_DESC = u'NHL videocenter category'
_VALID_URL = r'https?://video\.(?P<team>[^.]*)\.nhl\.com/videocenter/(console\?.*?catid=(?P<catid>[^&]+))?'
@classmethod
def suitable(cls, url):
if NHLIE.suitable(url):
return False
return super(NHLVideocenterIE, cls).suitable(url)
IE_NAME = 'nhl.com:videocenter'
IE_DESC = 'NHL videocenter category'
_VALID_URL = r'https?://video\.(?P<team>[^.]*)\.nhl\.com/videocenter/(console\?.*?catid=(?P<catid>[0-9]+)(?![&?]id=).*?)?$'
_TEST = {
'url': 'http://video.canucks.nhl.com/videocenter/console?catid=999',
'info_dict': {
'id': '999',
'title': 'Highlights',
},
'playlist_count': 12,
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
@@ -86,10 +89,10 @@ class NHLVideocenterIE(NHLBaseInfoExtractor):
cat_id = self._search_regex(
[r'var defaultCatId = "(.+?)";',
r'{statusIndex:0,index:0,.*?id:(.*?),'],
webpage, u'category id')
webpage, 'category id')
playlist_title = self._html_search_regex(
r'tab0"[^>]*?>(.*?)</td>',
webpage, u'playlist title', flags=re.DOTALL).lower().capitalize()
webpage, 'playlist title', flags=re.DOTALL).lower().capitalize()
data = compat_urllib_parse.urlencode({
'cid': cat_id,
@@ -104,7 +107,7 @@ class NHLVideocenterIE(NHLBaseInfoExtractor):
response = self._fix_json(response)
if not response.strip():
self._downloader.report_warning(u'Got an empty reponse, trying '
u'adding the "newvideos" parameter')
'adding the "newvideos" parameter')
response = self._download_webpage(request_url + '&newvideos=true',
playlist_title)
response = self._fix_json(response)
@@ -114,5 +117,5 @@ class NHLVideocenterIE(NHLBaseInfoExtractor):
'_type': 'playlist',
'title': playlist_title,
'id': cat_id,
'entries': [self._extract_video(i) for i in videos],
'entries': [self._extract_video(v) for v in videos],
}

View File

@@ -0,0 +1,67 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
determine_ext,
compat_urllib_parse,
compat_urllib_request,
)
class PromptFileIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?promptfile\.com/l/(?P<id>[0-9A-Z\-]+)'
_FILE_NOT_FOUND_REGEX = r'<div.+id="not_found_msg".+>.+</div>[^-]'
_TEST = {
'url': 'http://www.promptfile.com/l/D21B4746E9-F01462F0FF',
'md5': 'd1451b6302da7215485837aaea882c4c',
'info_dict': {
'id': 'D21B4746E9-F01462F0FF',
'ext': 'mp4',
'title': 'Birds.mp4',
'thumbnail': 're:^https?://.*\.jpg$',
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
if re.search(self._FILE_NOT_FOUND_REGEX, webpage) is not None:
raise ExtractorError('Video %s does not exist' % video_id,
expected=True)
fields = dict(re.findall(r'''(?x)type="hidden"\s+
name="(.+?)"\s+
value="(.*?)"
''', webpage))
post = compat_urllib_parse.urlencode(fields)
req = compat_urllib_request.Request(url, post)
req.add_header('Content-type', 'application/x-www-form-urlencoded')
webpage = self._download_webpage(
req, video_id, 'Downloading video page')
url = self._html_search_regex(r'url:\s*\'([^\']+)\'', webpage, 'URL')
title = self._html_search_regex(
r'<span.+title="([^"]+)">', webpage, 'title')
thumbnail = self._html_search_regex(
r'<div id="player_overlay">.*button>.*?<img src="([^"]+)"',
webpage, 'thumbnail', fatal=False, flags=re.DOTALL)
formats = [{
'format_id': 'sd',
'url': url,
'ext': determine_ext(title),
}]
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'thumbnail': thumbnail,
'formats': formats,
}

View File

@@ -12,22 +12,16 @@ class RtlXlIE(InfoExtractor):
_TEST = {
'url': 'http://www.rtlxl.nl/#!/rtl-nieuws-132237/6e4203a6-0a5e-3596-8424-c599a59e0677',
'md5': 'cc16baa36a6c169391f0764fa6b16654',
'info_dict': {
'id': '6e4203a6-0a5e-3596-8424-c599a59e0677',
'ext': 'flv',
'ext': 'mp4',
'title': 'RTL Nieuws - Laat',
'description': 'Dagelijks het laatste nieuws uit binnen- en '
'buitenland. Voor nog meer nieuws kunt u ook gebruikmaken van '
'onze mobiele apps.',
'description': 'md5:6b61f66510c8889923b11f2778c72dc5',
'timestamp': 1408051800,
'upload_date': '20140814',
'duration': 576.880,
},
'params': {
# We download the first bytes of the first fragment, it can't be
# processed by the f4m downloader beacuse it isn't complete
'skip_download': True,
},
}
def _real_extract(self, url):
@@ -41,14 +35,32 @@ class RtlXlIE(InfoExtractor):
material = info['material'][0]
episode_info = info['episodes'][0]
f4m_url = 'http://manifest.us.rtl.nl' + material['videopath']
progname = info['abstracts'][0]['name']
subtitle = material['title'] or info['episodes'][0]['name']
videopath = material['videopath']
f4m_url = 'http://manifest.us.rtl.nl' + videopath
formats = self._extract_f4m_formats(f4m_url, uuid)
video_urlpart = videopath.split('/flash/')[1][:-4]
PG_URL_TEMPLATE = 'http://pg.us.rtl.nl/rtlxl/network/%s/progressive/%s.mp4'
formats.extend([
{
'url': PG_URL_TEMPLATE % ('a2m', video_urlpart),
'format_id': 'pg-sd',
},
{
'url': PG_URL_TEMPLATE % ('a3m', video_urlpart),
'format_id': 'pg-hd',
}
])
return {
'id': uuid,
'title': '%s - %s' % (progname, subtitle),
'formats': self._extract_f4m_formats(f4m_url, uuid),
'formats': formats,
'timestamp': material['original_date'],
'description': episode_info['synopsis'],
'duration': parse_duration(material.get('duration')),

View File

@@ -74,6 +74,13 @@ class RutubeChannelIE(InfoExtractor):
IE_NAME = 'rutube:channel'
IE_DESC = 'Rutube channels'
_VALID_URL = r'http://rutube\.ru/tags/video/(?P<id>\d+)'
_TESTS = [{
'url': 'http://rutube.ru/tags/video/1800/',
'info_dict': {
'id': '1800',
},
'playlist_mincount': 68,
}]
_PAGE_TEMPLATE = 'http://rutube.ru/api/tags/video/%s/?page=%s&format=json'
@@ -101,6 +108,7 @@ class RutubeMovieIE(RutubeChannelIE):
IE_NAME = 'rutube:movie'
IE_DESC = 'Rutube movies'
_VALID_URL = r'http://rutube\.ru/metainfo/tv/(?P<id>\d+)'
_TESTS = []
_MOVIE_TEMPLATE = 'http://rutube.ru/api/metainfo/tv/%s/?format=json'
_PAGE_TEMPLATE = 'http://rutube.ru/api/metainfo/tv/%s/video?page=%s&format=json'
@@ -119,5 +127,12 @@ class RutubePersonIE(RutubeChannelIE):
IE_NAME = 'rutube:person'
IE_DESC = 'Rutube person videos'
_VALID_URL = r'http://rutube\.ru/video/person/(?P<id>\d+)'
_TESTS = [{
'url': 'http://rutube.ru/video/person/313878/',
'info_dict': {
'id': '313878',
},
'playlist_mincount': 37,
}]
_PAGE_TEMPLATE = 'http://rutube.ru/api/video/person/%s/?page=%s&format=json'

View File

@@ -267,6 +267,14 @@ class SmotriCommunityIE(InfoExtractor):
IE_DESC = 'Smotri.com community videos'
IE_NAME = 'smotri:community'
_VALID_URL = r'^https?://(?:www\.)?smotri\.com/community/video/(?P<communityid>[0-9A-Za-z_\'-]+)'
_TEST = {
'url': 'http://smotri.com/community/video/kommuna',
'info_dict': {
'id': 'kommuna',
'title': 'КПРФ',
},
'playlist_mincount': 4,
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
@@ -289,6 +297,14 @@ class SmotriUserIE(InfoExtractor):
IE_DESC = 'Smotri.com user videos'
IE_NAME = 'smotri:user'
_VALID_URL = r'^https?://(?:www\.)?smotri\.com/user/(?P<userid>[0-9A-Za-z_\'-]+)'
_TESTS = [{
'url': 'http://smotri.com/user/inspector',
'info_dict': {
'id': 'inspector',
'title': 'Inspector',
},
'playlist_mincount': 9,
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)

View File

@@ -28,7 +28,8 @@ class SoundcloudIE(InfoExtractor):
_VALID_URL = r'''(?x)^(?:https?://)?
(?:(?:(?:www\.|m\.)?soundcloud\.com/
(?P<uploader>[\w\d-]+)/
(?!sets/)(?P<title>[\w\d-]+)/?
(?!sets/|likes/?(?:$|[?#]))
(?P<title>[\w\d-]+)/?
(?P<token>[^?]+?)?(?:[?].*)?$)
|(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+))
|(?P<player>(?:w|player|p.)\.soundcloud\.com/player/?.*?url=.*)
@@ -221,13 +222,16 @@ class SoundcloudIE(InfoExtractor):
class SoundcloudSetIE(SoundcloudIE):
_VALID_URL = r'https?://(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)'
IE_NAME = 'soundcloud:set'
# it's in tests/test_playlists.py
_TESTS = []
_TESTS = [{
'url': 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep',
'info_dict': {
'title': 'The Royal Concept EP',
},
'playlist_mincount': 6,
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
if mobj is None:
raise ExtractorError('Invalid URL: %s' % url)
# extract uploader (which is in the url)
uploader = mobj.group(1)
@@ -246,20 +250,32 @@ class SoundcloudSetIE(SoundcloudIE):
self._downloader.report_error('unable to download video webpage: %s' % compat_str(err['error_message']))
return
self.report_extraction(full_title)
return {'_type': 'playlist',
'entries': [self._extract_info_dict(track) for track in info['tracks']],
'id': info['id'],
'title': info['title'],
}
return {
'_type': 'playlist',
'entries': [self._extract_info_dict(track) for track in info['tracks']],
'id': info['id'],
'title': info['title'],
}
class SoundcloudUserIE(SoundcloudIE):
_VALID_URL = r'https?://(www\.)?soundcloud\.com/(?P<user>[^/]+)/?((?P<rsrc>tracks|likes)/?)?(\?.*)?$'
IE_NAME = 'soundcloud:user'
# it's in tests/test_playlists.py
_TESTS = []
_TESTS = [{
'url': 'https://soundcloud.com/the-concept-band',
'info_dict': {
'id': '9615865',
'title': 'The Royal Concept',
},
'playlist_mincount': 12
}, {
'url': 'https://soundcloud.com/the-concept-band/likes',
'info_dict': {
'id': '9615865',
'title': 'The Royal Concept',
},
'playlist_mincount': 1,
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
@@ -301,9 +317,18 @@ class SoundcloudUserIE(SoundcloudIE):
class SoundcloudPlaylistIE(SoundcloudIE):
_VALID_URL = r'https?://api\.soundcloud\.com/playlists/(?P<id>[0-9]+)'
IE_NAME = 'soundcloud:playlist'
_TESTS = [
# it's in tests/test_playlists.py
_TESTS = []
{
'url': 'http://api.soundcloud.com/playlists/4110309',
'info_dict': {
'id': '4110309',
'title': 'TILT Brass - Bowery Poetry Club, August \'03 [Non-Site SCR 02]',
'description': 're:.*?TILT Brass - Bowery Poetry Club',
},
'playlist_count': 6,
}
]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)

View File

@@ -12,7 +12,7 @@ from ..utils import (
class SportDeutschlandIE(InfoExtractor):
_VALID_URL = r'https?://sportdeutschland\.tv/(?P<sport>[^/?#]+)/(?P<id>[^?#/]+)(?:$|[?#])'
_TEST = {
_TESTS = [{
'url': 'http://sportdeutschland.tv/badminton/live-li-ning-badminton-weltmeisterschaft-2014-kopenhagen',
'info_dict': {
'id': 'live-li-ning-badminton-weltmeisterschaft-2014-kopenhagen',
@@ -20,15 +20,28 @@ class SportDeutschlandIE(InfoExtractor):
'title': 'LIVE: Li-Ning Badminton Weltmeisterschaft 2014 Kopenhagen',
'categories': ['Badminton'],
'view_count': int,
'thumbnail': 're:^https?://.*\.jpg',
'thumbnail': 're:^https?://.*\.jpg$',
'description': 're:^Die Badminton-WM 2014 aus Kopenhagen LIVE',
'timestamp': 1409043600,
'upload_date': '20140826',
'timestamp': int,
'upload_date': 're:^201408[23][0-9]$',
},
'params': {
'skip_download': 'Live stream',
},
}
}, {
'url': 'http://sportdeutschland.tv/li-ning-badminton-wm-2014/lee-li-ning-badminton-weltmeisterschaft-2014-kopenhagen-herren-einzel-wei-vs',
'info_dict': {
'id': 'lee-li-ning-badminton-weltmeisterschaft-2014-kopenhagen-herren-einzel-wei-vs',
'ext': 'mp4',
'upload_date': '20140825',
'description': 'md5:60a20536b57cee7d9a4ec005e8687504',
'timestamp': 1408976060,
'title': 'Li-Ning Badminton Weltmeisterschaft 2014 Kopenhagen: Herren Einzel, Wei Lee vs. Keun Lee',
'thumbnail': 're:^https?://.*\.jpg$',
'view_count': int,
'categories': ['Li-Ning Badminton WM 2014'],
}
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
@@ -46,21 +59,27 @@ class SportDeutschlandIE(InfoExtractor):
categories = list(data.get('section', {}).get('tags', {}).values())
asset = data['asset']
formats = []
smil_url = asset['video']
m3u8_url = smil_url.replace('.smil', '.m3u8')
formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4')
if '.smil' in smil_url:
m3u8_url = smil_url.replace('.smil', '.m3u8')
formats.extend(
self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4'))
smil_doc = self._download_xml(
smil_url, video_id, note='Downloading SMIL metadata')
base_url = smil_doc.find('./head/meta').attrib['base']
formats.extend([{
'format_id': 'rmtp',
'url': base_url,
'play_path': n.attrib['src'],
'ext': 'flv',
'preference': -100,
'format_note': 'Seems to fail at example stream',
} for n in smil_doc.findall('./body/video')])
else:
formats.append({'url': smil_url})
smil_doc = self._download_xml(
smil_url, video_id, note='Downloading SMIL metadata')
base_url = smil_doc.find('./head/meta').attrib['base']
formats.extend([{
'format_id': 'rmtp',
'url': base_url,
'play_path': n.attrib['src'],
'ext': 'flv',
'preference': -100,
'format_note': 'Seems to fail at example stream',
} for n in smil_doc.findall('./body/video')])
self._sort_formats(formats)
return {
@@ -71,7 +90,7 @@ class SportDeutschlandIE(InfoExtractor):
'description': asset.get('teaser'),
'categories': categories,
'view_count': asset.get('views'),
'rtmp_live': asset['live'],
'rtmp_live': asset.get('live'),
'timestamp': parse_iso8601(asset.get('date')),
}

View File

@@ -106,6 +106,13 @@ class TeacherTubeUserIE(InfoExtractor):
\s*
<a\s+href="(https?://(?:www\.)?teachertube\.com/(?:video|audio)/[^"]+)"
'''
_TEST = {
'url': 'http://www.teachertube.com/user/profile/rbhagwati2',
'info_dict': {
'id': 'rbhagwati2'
},
'playlist_mincount': 179,
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)

View File

@@ -51,7 +51,6 @@ class TEDIE(SubtitlesInfoExtractor):
}
}, {
'url': 'http://www.ted.com/talks/gabby_giffords_and_mark_kelly_be_passionate_be_courageous_be_your_best',
'md5': '49144e345a899b8cb34d315f3b9cfeeb',
'info_dict': {
'id': '1972',
'ext': 'mp4',
@@ -59,6 +58,13 @@ class TEDIE(SubtitlesInfoExtractor):
'uploader': 'Gabby Giffords and Mark Kelly',
'description': 'md5:5174aed4d0f16021b704120360f72b92',
},
}, {
'url': 'http://www.ted.com/playlists/who_are_the_hackers',
'info_dict': {
'id': '10',
'title': 'Who are the hackers?',
},
'playlist_mincount': 6,
}]
_NATIVE_FORMATS = {

View File

@@ -42,6 +42,13 @@ class ToypicsIE(InfoExtractor):
class ToypicsUserIE(InfoExtractor):
IE_DESC = 'Toypics user profile'
_VALID_URL = r'http://videos\.toypics\.net/(?P<username>[^/?]+)(?:$|[?#])'
_TEST = {
'url': 'http://videos.toypics.net/Mikey',
'info_dict': {
'id': 'Mikey',
},
'playlist_mincount': 9917,
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)

View File

@@ -1,6 +1,5 @@
from __future__ import unicode_literals
import json
import re
from .common import InfoExtractor
@@ -68,21 +67,36 @@ class UstreamIE(InfoExtractor):
class UstreamChannelIE(InfoExtractor):
_VALID_URL = r'https?://www\.ustream\.tv/channel/(?P<slug>.+)'
IE_NAME = 'ustream:channel'
_TEST = {
'url': 'http://www.ustream.tv/channel/channeljapan',
'info_dict': {
'id': '10874166',
},
'playlist_mincount': 54,
}
def _real_extract(self, url):
m = re.match(self._VALID_URL, url)
slug = m.group('slug')
webpage = self._download_webpage(url, slug)
display_id = m.group('slug')
webpage = self._download_webpage(url, display_id)
channel_id = get_meta_content('ustream:channel_id', webpage)
BASE = 'http://www.ustream.tv'
next_url = '/ajax/socialstream/videos/%s/1.json' % channel_id
video_ids = []
while next_url:
reply = json.loads(self._download_webpage(compat_urlparse.urljoin(BASE, next_url), channel_id))
reply = self._download_json(
compat_urlparse.urljoin(BASE, next_url), display_id,
note='Downloading video information (next: %d)' % (len(video_ids) + 1))
video_ids.extend(re.findall(r'data-content-id="(\d.*)"', reply['data']))
next_url = reply['nextUrl']
urls = ['http://www.ustream.tv/recorded/' + vid for vid in video_ids]
url_entries = [self.url_result(eurl, 'Ustream') for eurl in urls]
return self.playlist_result(url_entries, channel_id)
entries = [
self.url_result('http://www.ustream.tv/recorded/' + vid, 'Ustream')
for vid in video_ids]
return {
'_type': 'playlist',
'id': channel_id,
'display_id': display_id,
'entries': entries,
}

View File

@@ -16,8 +16,9 @@ class VeeHDIE(InfoExtractor):
_TEST = {
'url': 'http://veehd.com/video/4686958',
'file': '4686958.mp4',
'info_dict': {
'id': '4686958',
'ext': 'mp4',
'title': 'Time Lapse View from Space ( ISS)',
'uploader_id': 'spotted',
'description': 'md5:f0094c4cf3a72e22bc4e4239ef767ad7',

View File

@@ -57,6 +57,7 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
(?P<proto>(?:https?:)?//)?
(?:(?:www|(?P<player>player))\.)?
vimeo(?P<pro>pro)?\.com/
(?!channels/[^/?#]+/?(?:$|[?#])|album/)
(?:.*?/)?
(?:(?:play_redirect_hls|moogaloop\.swf)\?clip_id=)?
(?:videos?/)?
@@ -151,30 +152,8 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
'duration': 62,
}
},
{
'note': 'video player needs Referer',
'url': 'http://vimeo.com/user22258446/review/91613211/13f927e053',
'md5': '6295fdab8f4bf6a002d058b2c6dce276',
'info_dict': {
'id': '91613211',
'ext': 'mp4',
'title': 'Death by dogma versus assembling agile - Sander Hoogendoorn',
'uploader': 'DevWeek Events',
'duration': 2773,
'thumbnail': 're:^https?://.*\.jpg$',
}
}
]
@classmethod
def suitable(cls, url):
if VimeoChannelIE.suitable(url):
# Otherwise channel urls like http://vimeo.com/channels/31259 would
# match
return False
else:
return super(VimeoIE, cls).suitable(url)
def _verify_video_password(self, url, video_id, webpage):
password = self._downloader.params.get('videopassword', None)
if password is None:
@@ -393,9 +372,16 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
class VimeoChannelIE(InfoExtractor):
IE_NAME = 'vimeo:channel'
_VALID_URL = r'(?:https?://)?vimeo\.com/channels/(?P<id>[^/]+)/?(\?.*)?$'
_VALID_URL = r'https?://vimeo\.com/channels/(?P<id>[^/?#]+)/?(?:$|[?#])'
_MORE_PAGES_INDICATOR = r'<a.+?rel="next"'
_TITLE_RE = r'<link rel="alternate"[^>]+?title="(.*?)"'
_TESTS = [{
'url': 'http://vimeo.com/channels/tributes',
'info_dict': {
'title': 'Vimeo Tributes',
},
'playlist_mincount': 25,
}]
def _page_url(self, base_url, pagenum):
return '%s/videos/page:%d/' % (base_url, pagenum)
@@ -429,14 +415,15 @@ class VimeoChannelIE(InfoExtractor):
class VimeoUserIE(VimeoChannelIE):
IE_NAME = 'vimeo:user'
_VALID_URL = r'(?:https?://)?vimeo\.com/(?P<name>[^/]+)(?:/videos|[#?]|$)'
_VALID_URL = r'https?://vimeo\.com/(?![0-9]+(?:$|[?#/]))(?P<name>[^/]+)(?:/videos|[#?]|$)'
_TITLE_RE = r'<a[^>]+?class="user">([^<>]+?)</a>'
@classmethod
def suitable(cls, url):
if VimeoChannelIE.suitable(url) or VimeoIE.suitable(url) or VimeoAlbumIE.suitable(url) or VimeoGroupsIE.suitable(url):
return False
return super(VimeoUserIE, cls).suitable(url)
_TESTS = [{
'url': 'http://vimeo.com/nkistudio/videos',
'info_dict': {
'title': 'Nki',
},
'playlist_mincount': 66,
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
@@ -446,8 +433,15 @@ class VimeoUserIE(VimeoChannelIE):
class VimeoAlbumIE(VimeoChannelIE):
IE_NAME = 'vimeo:album'
_VALID_URL = r'(?:https?://)?vimeo\.com/album/(?P<id>\d+)'
_VALID_URL = r'https?://vimeo\.com/album/(?P<id>\d+)'
_TITLE_RE = r'<header id="page_header">\n\s*<h1>(.*?)</h1>'
_TESTS = [{
'url': 'http://vimeo.com/album/2632481',
'info_dict': {
'title': 'Staff Favorites: November 2013',
},
'playlist_mincount': 13,
}]
def _page_url(self, base_url, pagenum):
return '%s/page:%d/' % (base_url, pagenum)
@@ -461,6 +455,13 @@ class VimeoAlbumIE(VimeoChannelIE):
class VimeoGroupsIE(VimeoAlbumIE):
IE_NAME = 'vimeo:group'
_VALID_URL = r'(?:https?://)?vimeo\.com/groups/(?P<name>[^/]+)'
_TESTS = [{
'url': 'http://vimeo.com/groups/rolexawards',
'info_dict': {
'title': 'Rolex Awards for Enterprise',
},
'playlist_mincount': 73,
}]
def _extract_list_title(self, webpage):
return self._og_search_title(webpage)
@@ -474,8 +475,8 @@ class VimeoGroupsIE(VimeoAlbumIE):
class VimeoReviewIE(InfoExtractor):
IE_NAME = 'vimeo:review'
IE_DESC = 'Review pages on vimeo'
_VALID_URL = r'(?:https?://)?vimeo\.com/[^/]+/review/(?P<id>[^/]+)'
_TEST = {
_VALID_URL = r'https?://vimeo\.com/[^/]+/review/(?P<id>[^/]+)'
_TESTS = [{
'url': 'https://vimeo.com/user21297594/review/75524534/3c257a1b5d',
'file': '75524534.mp4',
'md5': 'c507a72f780cacc12b2248bb4006d253',
@@ -483,7 +484,19 @@ class VimeoReviewIE(InfoExtractor):
'title': "DICK HARDWICK 'Comedian'",
'uploader': 'Richard Hardwick',
}
}
}, {
'note': 'video player needs Referer',
'url': 'http://vimeo.com/user22258446/review/91613211/13f927e053',
'md5': '6295fdab8f4bf6a002d058b2c6dce276',
'info_dict': {
'id': '91613211',
'ext': 'mp4',
'title': 'Death by dogma versus assembling agile - Sander Hoogendoorn',
'uploader': 'DevWeek Events',
'duration': 2773,
'thumbnail': 're:^https?://.*\.jpg$',
}
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
@@ -498,6 +511,10 @@ class VimeoWatchLaterIE(VimeoBaseInfoExtractor, VimeoChannelIE):
_VALID_URL = r'https?://vimeo\.com/home/watchlater|:vimeowatchlater'
_LOGIN_REQUIRED = True
_TITLE_RE = r'href="/home/watchlater".*?>(.*?)<'
_TESTS = [{
'url': 'http://vimeo.com/home/watchlater',
'only_matching': True,
}]
def _real_initialize(self):
self._login()

View File

@@ -65,6 +65,13 @@ class VineUserIE(InfoExtractor):
IE_NAME = 'vine:user'
_VALID_URL = r'(?:https?://)?vine\.co/(?P<user>[^/]+)/?(\?.*)?$'
_VINE_BASE_URL = "https://vine.co/"
_TEST = {
'url': 'https://vine.co/Visa',
'info_dict': {
'id': 'Visa',
},
'playlist_mincount': 47,
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)

View File

@@ -77,9 +77,17 @@ class XTubeIE(InfoExtractor):
'age_limit': 18,
}
class XTubeUserIE(InfoExtractor):
IE_DESC = 'XTube user profile'
_VALID_URL = r'https?://(?:www\.)?xtube\.com/community/profile\.php\?(.*?)user=(?P<username>[^&#]+)(?:$|[&#])'
_TEST = {
'url': 'http://www.xtube.com/community/profile.php?user=greenshowers',
'info_dict': {
'id': 'greenshowers',
},
'playlist_mincount': 155,
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)

View File

@@ -71,7 +71,8 @@ class YahooIE(InfoExtractor):
if items_json is None:
CONTENT_ID_REGEXES = [
r'YUI\.namespace\("Media"\)\.CONTENT_ID\s*=\s*"([^"]+)"',
r'root\.App\.Cache\.context\.videoCache\.curVideo = \{"([^"]+)"'
r'root\.App\.Cache\.context\.videoCache\.curVideo = \{"([^"]+)"',
r'"first_videoid"\s*:\s*"([^"]+)"',
]
long_id = self._search_regex(CONTENT_ID_REGEXES, webpage, 'content ID')
video_id = long_id

View File

@@ -298,30 +298,6 @@ def xpath_with_ns(path, ns_map):
replaced.append('{%s}%s' % (ns_map[ns], tag))
return '/'.join(replaced)
def htmlentity_transform(matchobj):
"""Transforms an HTML entity to a character.
This function receives a match object and is intended to be used with
the re.sub() function.
"""
entity = matchobj.group(1)
# Known non-numeric HTML entity
if entity in compat_html_entities.name2codepoint:
return compat_chr(compat_html_entities.name2codepoint[entity])
mobj = re.match(u'(?u)#(x?\\d+)', entity)
if mobj is not None:
numstr = mobj.group(1)
if numstr.startswith(u'x'):
base = 16
numstr = u'0%s' % numstr
else:
base = 10
return compat_chr(int(numstr, base))
# Unknown entity in name, return its literal representation
return (u'&%s;' % entity)
compat_html_parser.locatestarttagend = re.compile(r"""<[a-zA-Z][-.a-zA-Z0-9:_]*(?:\s+(?:(?<=['"\s])[^\s/>][^\s/=>]*(?:\s*=+\s*(?:'[^']*'|"[^"]*"|(?!['"])[^>\s]*))?\s*)*)?\s*""", re.VERBOSE) # backport bugfix
class BaseHTMLParser(compat_html_parser.HTMLParser):
@@ -543,13 +519,33 @@ def orderedSet(iterable):
return res
def _htmlentity_transform(entity):
"""Transforms an HTML entity to a character."""
# Known non-numeric HTML entity
if entity in compat_html_entities.name2codepoint:
return compat_chr(compat_html_entities.name2codepoint[entity])
mobj = re.match(r'#(x?[0-9]+)', entity)
if mobj is not None:
numstr = mobj.group(1)
if numstr.startswith(u'x'):
base = 16
numstr = u'0%s' % numstr
else:
base = 10
return compat_chr(int(numstr, base))
# Unknown entity in name, return its literal representation
return (u'&%s;' % entity)
def unescapeHTML(s):
if s is None:
return None
assert type(s) == compat_str
result = re.sub(r'(?u)&(.+?);', htmlentity_transform, s)
return result
return re.sub(
r'&([^;]+);', lambda m: _htmlentity_transform(m.group(1)), s)
def encodeFilename(s, for_subprocess=False):

View File

@@ -1,2 +1,2 @@
__version__ = '2014.08.27.1'
__version__ = '2014.08.29'