mirror of
https://github.com/ytdl-org/youtube-dl.git
synced 2025-12-13 01:22:44 +01:00
Compare commits
52 Commits
2013.11.18
...
2013.11.22
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
50123be421 | ||
|
|
3f8ced5144 | ||
|
|
00ea0f11eb | ||
|
|
0b63aed8df | ||
|
|
15c3adbb16 | ||
|
|
f143a42fe6 | ||
|
|
241650c7ff | ||
|
|
bfe7439a20 | ||
|
|
cffa6aa107 | ||
|
|
02e4ebbbad | ||
|
|
ab009f59ef | ||
|
|
0980426559 | ||
|
|
b1c9c66936 | ||
|
|
a6a173c2fd | ||
|
|
2bb683c201 | ||
|
|
64bb5187f5 | ||
|
|
9e4f50a8ae | ||
|
|
0190eecc00 | ||
|
|
ca872a4c0b | ||
|
|
f2e87ef4fa | ||
|
|
0ad97bbc05 | ||
|
|
c4864091a1 | ||
|
|
9a98a466b3 | ||
|
|
f99e0f1ed6 | ||
|
|
d323bcb152 | ||
|
|
da6a795fdb | ||
|
|
c5edcde21f | ||
|
|
15ff3c831e | ||
|
|
100959a6d9 | ||
|
|
0a120f74b2 | ||
|
|
8f05351984 | ||
|
|
4eb92208a3 | ||
|
|
71791f414c | ||
|
|
f3682997d7 | ||
|
|
cc13cc0251 | ||
|
|
86bd5f2ca9 | ||
|
|
8694c60000 | ||
|
|
9d1538182f | ||
|
|
5904088811 | ||
|
|
69545c2aff | ||
|
|
495da337ae | ||
|
|
34b3afc7be | ||
|
|
00373a4c5d | ||
|
|
cb7dfeeac4 | ||
|
|
efd6c574a2 | ||
|
|
4113e6ab56 | ||
|
|
ba3881dffd | ||
|
|
08bc37cdd0 | ||
|
|
9771cceb2c | ||
|
|
880e1c529d | ||
|
|
dcbb45803f | ||
|
|
0bd59f3723 |
@@ -123,6 +123,7 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
--get-description simulate, quiet but print video description
|
||||
--get-filename simulate, quiet but print output filename
|
||||
--get-format simulate, quiet but print output format
|
||||
-j, --dump-json simulate, quiet but print JSON information
|
||||
--newline output progress bar as new lines
|
||||
--no-progress do not print progress bar
|
||||
--console-title display progress in console titlebar
|
||||
|
||||
2
setup.py
2
setup.py
@@ -48,7 +48,7 @@ else:
|
||||
'data_files': [ # Installing system-wide would require sudo...
|
||||
('etc/bash_completion.d', ['youtube-dl.bash-completion']),
|
||||
('share/doc/youtube_dl', ['README.txt']),
|
||||
('share/man/man1/', ['youtube-dl.1'])
|
||||
('share/man/man1', ['youtube-dl.1'])
|
||||
]
|
||||
}
|
||||
if setuptools_available:
|
||||
|
||||
@@ -24,7 +24,7 @@ def _download_restricted(url, filename, age):
|
||||
}
|
||||
ydl = YoutubeDL(params)
|
||||
ydl.add_default_info_extractors()
|
||||
json_filename = filename + '.info.json'
|
||||
json_filename = os.path.splitext(filename)[0] + '.info.json'
|
||||
try_rm(json_filename)
|
||||
ydl.download([url])
|
||||
res = os.path.exists(json_filename)
|
||||
|
||||
@@ -103,7 +103,7 @@ def generator(test_case):
|
||||
tc_filename = get_tc_filename(tc)
|
||||
try_rm(tc_filename)
|
||||
try_rm(tc_filename + '.part')
|
||||
try_rm(tc_filename + '.info.json')
|
||||
try_rm(os.path.splitext(tc_filename)[0] + '.info.json')
|
||||
try_rm_tcs_files()
|
||||
try:
|
||||
try_num = 1
|
||||
@@ -130,11 +130,12 @@ def generator(test_case):
|
||||
if not test_case.get('params', {}).get('skip_download', False):
|
||||
self.assertTrue(os.path.exists(tc_filename), msg='Missing file ' + tc_filename)
|
||||
self.assertTrue(tc_filename in finished_hook_called)
|
||||
self.assertTrue(os.path.exists(tc_filename + '.info.json'))
|
||||
info_json_fn = os.path.splitext(tc_filename)[0] + '.info.json'
|
||||
self.assertTrue(os.path.exists(info_json_fn))
|
||||
if 'md5' in tc:
|
||||
md5_for_file = _file_md5(tc_filename)
|
||||
self.assertEqual(md5_for_file, tc['md5'])
|
||||
with io.open(tc_filename + '.info.json', encoding='utf-8') as infof:
|
||||
with io.open(info_json_fn, encoding='utf-8') as infof:
|
||||
info_dict = json.load(infof)
|
||||
for (info_field, expected) in tc.get('info_dict', {}).items():
|
||||
if isinstance(expected, compat_str) and expected.startswith('md5:'):
|
||||
|
||||
@@ -22,6 +22,7 @@ from youtube_dl.extractor import (
|
||||
LivestreamIE,
|
||||
NHLVideocenterIE,
|
||||
BambuserChannelIE,
|
||||
BandcampAlbumIE
|
||||
)
|
||||
|
||||
|
||||
@@ -103,5 +104,13 @@ class TestPlaylists(unittest.TestCase):
|
||||
self.assertEqual(result['title'], u'pixelversity')
|
||||
self.assertTrue(len(result['entries']) >= 66)
|
||||
|
||||
def test_bandcamp_album(self):
|
||||
dl = FakeYDL()
|
||||
ie = BandcampAlbumIE(dl)
|
||||
result = ie.extract('http://mpallante.bandcamp.com/album/nightmare-night-ep')
|
||||
self.assertIsPlaylist(result)
|
||||
self.assertEqual(result['title'], u'Nightmare Night EP')
|
||||
self.assertTrue(len(result['entries']) >= 4)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
||||
@@ -24,6 +24,8 @@ from youtube_dl.utils import (
|
||||
xpath_with_ns,
|
||||
smuggle_url,
|
||||
unsmuggle_url,
|
||||
shell_quote,
|
||||
encodeFilename,
|
||||
)
|
||||
|
||||
if sys.version_info < (3, 0):
|
||||
@@ -170,6 +172,10 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(res_url, url)
|
||||
self.assertEqual(res_data, None)
|
||||
|
||||
def test_shell_quote(self):
|
||||
args = ['ffmpeg', '-i', encodeFilename(u'ñ€ß\'.mp4')]
|
||||
self.assertEqual(shell_quote(args), u"""ffmpeg -i 'ñ€ß'"'"'.mp4'""")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
||||
@@ -31,7 +31,7 @@ params = get_params({
|
||||
|
||||
|
||||
TEST_ID = 'BaW_jenozKc'
|
||||
INFO_JSON_FILE = TEST_ID + '.mp4.info.json'
|
||||
INFO_JSON_FILE = TEST_ID + '.info.json'
|
||||
DESCRIPTION_FILE = TEST_ID + '.mp4.description'
|
||||
EXPECTED_DESCRIPTION = u'''test chars: "'/\ä↭𝕐
|
||||
|
||||
|
||||
@@ -27,7 +27,7 @@ class TestYoutubeLists(unittest.TestCase):
|
||||
def test_youtube_playlist(self):
|
||||
dl = FakeYDL()
|
||||
ie = YoutubePlaylistIE(dl)
|
||||
result = ie.extract('https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')[0]
|
||||
result = ie.extract('https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')
|
||||
self.assertIsPlaylist(result)
|
||||
self.assertEqual(result['title'], 'ytdl test PL')
|
||||
ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']]
|
||||
@@ -44,13 +44,13 @@ class TestYoutubeLists(unittest.TestCase):
|
||||
def test_issue_673(self):
|
||||
dl = FakeYDL()
|
||||
ie = YoutubePlaylistIE(dl)
|
||||
result = ie.extract('PLBB231211A4F62143')[0]
|
||||
result = ie.extract('PLBB231211A4F62143')
|
||||
self.assertTrue(len(result['entries']) > 25)
|
||||
|
||||
def test_youtube_playlist_long(self):
|
||||
dl = FakeYDL()
|
||||
ie = YoutubePlaylistIE(dl)
|
||||
result = ie.extract('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')[0]
|
||||
result = ie.extract('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')
|
||||
self.assertIsPlaylist(result)
|
||||
self.assertTrue(len(result['entries']) >= 799)
|
||||
|
||||
@@ -58,7 +58,7 @@ class TestYoutubeLists(unittest.TestCase):
|
||||
#651
|
||||
dl = FakeYDL()
|
||||
ie = YoutubePlaylistIE(dl)
|
||||
result = ie.extract('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')[0]
|
||||
result = ie.extract('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
|
||||
ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']]
|
||||
self.assertFalse('pElCt5oNDuI' in ytie_results)
|
||||
self.assertFalse('KdPEApIVdWM' in ytie_results)
|
||||
@@ -66,7 +66,7 @@ class TestYoutubeLists(unittest.TestCase):
|
||||
def test_youtube_playlist_empty(self):
|
||||
dl = FakeYDL()
|
||||
ie = YoutubePlaylistIE(dl)
|
||||
result = ie.extract('https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx')[0]
|
||||
result = ie.extract('https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx')
|
||||
self.assertIsPlaylist(result)
|
||||
self.assertEqual(len(result['entries']), 0)
|
||||
|
||||
@@ -74,7 +74,7 @@ class TestYoutubeLists(unittest.TestCase):
|
||||
dl = FakeYDL()
|
||||
ie = YoutubePlaylistIE(dl)
|
||||
# TODO find a > 100 (paginating?) videos course
|
||||
result = ie.extract('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')[0]
|
||||
result = ie.extract('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
|
||||
entries = result['entries']
|
||||
self.assertEqual(YoutubeIE()._extract_id(entries[0]['url']), 'j9WZyLZCBzs')
|
||||
self.assertEqual(len(entries), 25)
|
||||
@@ -99,7 +99,7 @@ class TestYoutubeLists(unittest.TestCase):
|
||||
def test_youtube_safe_search(self):
|
||||
dl = FakeYDL()
|
||||
ie = YoutubePlaylistIE(dl)
|
||||
result = ie.extract('PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl')[0]
|
||||
result = ie.extract('PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl')
|
||||
self.assertEqual(len(result['entries']), 2)
|
||||
|
||||
def test_youtube_show(self):
|
||||
|
||||
@@ -5,6 +5,7 @@ from __future__ import absolute_import
|
||||
|
||||
import errno
|
||||
import io
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
@@ -84,6 +85,7 @@ class YoutubeDL(object):
|
||||
forcethumbnail: Force printing thumbnail URL.
|
||||
forcedescription: Force printing description.
|
||||
forcefilename: Force printing final filename.
|
||||
forcejson: Force printing info_dict as JSON.
|
||||
simulate: Do not download the video files.
|
||||
format: Video format code.
|
||||
format_limit: Highest quality format to try.
|
||||
@@ -217,13 +219,15 @@ class YoutubeDL(object):
|
||||
if not self.params.get('consoletitle', False):
|
||||
return
|
||||
if 'TERM' in os.environ:
|
||||
write_string(u'\033[22t', self._screen_file)
|
||||
# Save the title on stack
|
||||
write_string(u'\033[22;0t', self._screen_file)
|
||||
|
||||
def restore_console_title(self):
|
||||
if not self.params.get('consoletitle', False):
|
||||
return
|
||||
if 'TERM' in os.environ:
|
||||
write_string(u'\033[23t', self._screen_file)
|
||||
# Restore the title from stack
|
||||
write_string(u'\033[23;0t', self._screen_file)
|
||||
|
||||
def __enter__(self):
|
||||
self.save_console_title()
|
||||
@@ -635,7 +639,7 @@ class YoutubeDL(object):
|
||||
|
||||
# Forced printings
|
||||
if self.params.get('forcetitle', False):
|
||||
compat_print(info_dict['title'])
|
||||
compat_print(info_dict['fulltitle'])
|
||||
if self.params.get('forceid', False):
|
||||
compat_print(info_dict['id'])
|
||||
if self.params.get('forceurl', False):
|
||||
@@ -649,6 +653,8 @@ class YoutubeDL(object):
|
||||
compat_print(filename)
|
||||
if self.params.get('forceformat', False):
|
||||
compat_print(info_dict['format'])
|
||||
if self.params.get('forcejson', False):
|
||||
compat_print(json.dumps(info_dict))
|
||||
|
||||
# Do nothing else if in simulate mode
|
||||
if self.params.get('simulate', False):
|
||||
@@ -711,7 +717,7 @@ class YoutubeDL(object):
|
||||
return
|
||||
|
||||
if self.params.get('writeinfojson', False):
|
||||
infofn = filename + u'.info.json'
|
||||
infofn = os.path.splitext(filename)[0] + u'.info.json'
|
||||
self.report_writeinfojson(infofn)
|
||||
try:
|
||||
json_info_dict = dict((k, v) for k, v in info_dict.items() if not k in ['urlhandle'])
|
||||
|
||||
@@ -34,6 +34,7 @@ __authors__ = (
|
||||
'Andras Elso',
|
||||
'Jelle van der Waa',
|
||||
'Marcin Cieślak',
|
||||
'Anton Larionov',
|
||||
)
|
||||
|
||||
__license__ = 'Public Domain'
|
||||
@@ -306,6 +307,9 @@ def parseOpts(overrideArguments=None):
|
||||
verbosity.add_option('--get-format',
|
||||
action='store_true', dest='getformat',
|
||||
help='simulate, quiet but print output format', default=False)
|
||||
verbosity.add_option('-j', '--dump-json',
|
||||
action='store_true', dest='dumpjson',
|
||||
help='simulate, quiet but print JSON information', default=False)
|
||||
verbosity.add_option('--newline',
|
||||
action='store_true', dest='progress_with_newline', help='output progress bar as new lines', default=False)
|
||||
verbosity.add_option('--no-progress',
|
||||
@@ -608,7 +612,7 @@ def _real_main(argv=None):
|
||||
'username': opts.username,
|
||||
'password': opts.password,
|
||||
'videopassword': opts.videopassword,
|
||||
'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat),
|
||||
'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.dumpjson),
|
||||
'forceurl': opts.geturl,
|
||||
'forcetitle': opts.gettitle,
|
||||
'forceid': opts.getid,
|
||||
@@ -616,8 +620,9 @@ def _real_main(argv=None):
|
||||
'forcedescription': opts.getdescription,
|
||||
'forcefilename': opts.getfilename,
|
||||
'forceformat': opts.getformat,
|
||||
'forcejson': opts.dumpjson,
|
||||
'simulate': opts.simulate,
|
||||
'skip_download': (opts.skip_download or opts.simulate or opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat),
|
||||
'skip_download': (opts.skip_download or opts.simulate or opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.dumpjson),
|
||||
'format': opts.format,
|
||||
'format_limit': opts.format_limit,
|
||||
'listformats': opts.listformats,
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
from .appletrailers import AppleTrailersIE
|
||||
from .addanime import AddAnimeIE
|
||||
from .anitube import AnitubeIE
|
||||
from .archiveorg import ArchiveOrgIE
|
||||
from .ard import ARDIE
|
||||
from .arte import (
|
||||
@@ -10,7 +11,7 @@ from .arte import (
|
||||
)
|
||||
from .auengine import AUEngineIE
|
||||
from .bambuser import BambuserIE, BambuserChannelIE
|
||||
from .bandcamp import BandcampIE
|
||||
from .bandcamp import BandcampIE, BandcampAlbumIE
|
||||
from .bliptv import BlipTVIE, BlipTVUserIE
|
||||
from .bloomberg import BloombergIE
|
||||
from .breakcom import BreakIE
|
||||
@@ -25,6 +26,7 @@ from .comedycentral import ComedyCentralIE
|
||||
from .condenast import CondeNastIE
|
||||
from .criterion import CriterionIE
|
||||
from .cspan import CSpanIE
|
||||
from .d8 import D8IE
|
||||
from .dailymotion import (
|
||||
DailymotionIE,
|
||||
DailymotionPlaylistIE,
|
||||
@@ -126,12 +128,14 @@ from .spiegel import SpiegelIE
|
||||
from .stanfordoc import StanfordOpenClassroomIE
|
||||
from .statigram import StatigramIE
|
||||
from .steam import SteamIE
|
||||
from .streamcloud import StreamcloudIE
|
||||
from .sztvhu import SztvHuIE
|
||||
from .teamcoco import TeamcocoIE
|
||||
from .techtalks import TechTalksIE
|
||||
from .ted import TEDIE
|
||||
from .tf1 import TF1IE
|
||||
from .thisav import ThisAVIE
|
||||
from .toutv import TouTvIE
|
||||
from .traileraddict import TrailerAddictIE
|
||||
from .trilulilu import TriluliluIE
|
||||
from .tube8 import Tube8IE
|
||||
|
||||
55
youtube_dl/extractor/anitube.py
Normal file
55
youtube_dl/extractor/anitube.py
Normal file
@@ -0,0 +1,55 @@
|
||||
import re
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class AnitubeIE(InfoExtractor):
|
||||
IE_NAME = u'anitube.se'
|
||||
_VALID_URL = r'https?://(?:www\.)?anitube\.se/video/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://www.anitube.se/video/36621',
|
||||
u'md5': u'59d0eeae28ea0bc8c05e7af429998d43',
|
||||
u'file': u'36621.mp4',
|
||||
u'info_dict': {
|
||||
u'id': u'36621',
|
||||
u'ext': u'mp4',
|
||||
u'title': u'Recorder to Randoseru 01',
|
||||
},
|
||||
u'skip': u'Blocked in the US',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
key = self._html_search_regex(r'http://www\.anitube\.se/embed/([A-Za-z0-9_-]*)',
|
||||
webpage, u'key')
|
||||
|
||||
webpage_config = self._download_webpage('http://www.anitube.se/nuevo/econfig.php?key=%s' % key,
|
||||
key)
|
||||
config_xml = xml.etree.ElementTree.fromstring(webpage_config.encode('utf-8'))
|
||||
|
||||
video_title = config_xml.find('title').text
|
||||
|
||||
formats = []
|
||||
video_url = config_xml.find('file')
|
||||
if video_url is not None:
|
||||
formats.append({
|
||||
'format_id': 'sd',
|
||||
'url': video_url.text,
|
||||
})
|
||||
video_url = config_xml.find('filehd')
|
||||
if video_url is not None:
|
||||
formats.append({
|
||||
'format_id': 'hd',
|
||||
'url': video_url.text,
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
'formats': formats
|
||||
}
|
||||
@@ -44,7 +44,6 @@ class AUEngineIE(InfoExtractor):
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': ext,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
|
||||
@@ -3,13 +3,16 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_str,
|
||||
compat_urlparse,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class BandcampIE(InfoExtractor):
|
||||
IE_NAME = u'Bandcamp'
|
||||
_VALID_URL = r'http://.*?\.bandcamp\.com/track/(?P<title>.*)'
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
u'url': u'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song',
|
||||
u'file': u'1812978515.mp3',
|
||||
u'md5': u'cdeb30cdae1921719a3cbcab696ef53c',
|
||||
@@ -17,7 +20,29 @@ class BandcampIE(InfoExtractor):
|
||||
u"title": u"youtube-dl test song \"'/\\\u00e4\u21ad"
|
||||
},
|
||||
u'skip': u'There is a limit of 200 free downloads / month for the test song'
|
||||
}
|
||||
}, {
|
||||
u'url': u'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
|
||||
u'playlist': [
|
||||
{
|
||||
u'file': u'1353101989.mp3',
|
||||
u'md5': u'39bc1eded3476e927c724321ddf116cf',
|
||||
u'info_dict': {
|
||||
u'title': u'Intro',
|
||||
}
|
||||
},
|
||||
{
|
||||
u'file': u'38097443.mp3',
|
||||
u'md5': u'1a2c32e2691474643e912cc6cd4bffaa',
|
||||
u'info_dict': {
|
||||
u'title': u'Kero One - Keep It Alive (Blazo remix)',
|
||||
}
|
||||
},
|
||||
],
|
||||
u'params': {
|
||||
u'playlistend': 2
|
||||
},
|
||||
u'skip': u'Bancamp imposes download limits. See test_playlists:test_bandcamp_album for the playlist test'
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
@@ -26,6 +51,26 @@ class BandcampIE(InfoExtractor):
|
||||
# We get the link to the free download page
|
||||
m_download = re.search(r'freeDownloadPage: "(.*?)"', webpage)
|
||||
if m_download is None:
|
||||
m_trackinfo = re.search(r'trackinfo: (.+),\s*?\n', webpage)
|
||||
if m_trackinfo:
|
||||
json_code = m_trackinfo.group(1)
|
||||
data = json.loads(json_code)
|
||||
|
||||
entries = []
|
||||
for d in data:
|
||||
formats = [{
|
||||
'format_id': 'format_id',
|
||||
'url': format_url,
|
||||
'ext': format_id.partition('-')[0]
|
||||
} for format_id, format_url in sorted(d['file'].items())]
|
||||
entries.append({
|
||||
'id': compat_str(d['id']),
|
||||
'title': d['title'],
|
||||
'formats': formats,
|
||||
})
|
||||
|
||||
return self.playlist_result(entries, title, title)
|
||||
else:
|
||||
raise ExtractorError(u'No free songs found')
|
||||
|
||||
download_link = m_download.group(1)
|
||||
@@ -61,3 +106,25 @@ class BandcampIE(InfoExtractor):
|
||||
}
|
||||
|
||||
return [track_info]
|
||||
|
||||
|
||||
class BandcampAlbumIE(InfoExtractor):
|
||||
IE_NAME = u'Bandcamp:album'
|
||||
_VALID_URL = r'http://.*?\.bandcamp\.com/album/(?P<title>.*)'
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
title = mobj.group('title')
|
||||
webpage = self._download_webpage(url, title)
|
||||
tracks_paths = re.findall(r'<a href="(.*?)" itemprop="url">', webpage)
|
||||
if not tracks_paths:
|
||||
raise ExtractorError(u'The page doesn\'t contain any track')
|
||||
entries = [
|
||||
self.url_result(compat_urlparse.urljoin(url, t_path), ie=BandcampIE.ie_key())
|
||||
for t_path in tracks_paths]
|
||||
title = self._search_regex(r'album_title : "(.*?)"', webpage, u'title')
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'title': title,
|
||||
'entries': entries,
|
||||
}
|
||||
|
||||
@@ -5,6 +5,7 @@ import xml.etree.ElementTree
|
||||
from .common import InfoExtractor
|
||||
from ..utils import unified_strdate
|
||||
|
||||
|
||||
class CanalplusIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(www\.canalplus\.fr/.*?/(?P<path>.*)|player\.canalplus\.fr/#/(?P<id>\d+))'
|
||||
_VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/cplus/%s'
|
||||
@@ -25,7 +26,7 @@ class CanalplusIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
video_id = mobj.groupdict().get('id')
|
||||
if video_id is None:
|
||||
webpage = self._download_webpage(url, mobj.group('path'))
|
||||
video_id = self._search_regex(r'videoId = "(\d+)";', webpage, u'video id')
|
||||
|
||||
@@ -71,10 +71,8 @@ class CollegeHumorIE(InfoExtractor):
|
||||
|
||||
adoc = xml.etree.ElementTree.fromstring(manifestXml)
|
||||
try:
|
||||
media_node = adoc.findall('./{http://ns.adobe.com/f4m/1.0}media')[0]
|
||||
node_id = media_node.attrib['url']
|
||||
video_id = adoc.findall('./{http://ns.adobe.com/f4m/1.0}id')[0].text
|
||||
except IndexError as err:
|
||||
except IndexError:
|
||||
raise ExtractorError(u'Invalid manifest file')
|
||||
url_pr = compat_urllib_parse_urlparse(info['thumbnail'])
|
||||
info['url'] = url_pr.scheme + '://' + url_pr.netloc + video_id[:-2].replace('.csmil','').replace(',','')
|
||||
|
||||
@@ -350,6 +350,17 @@ class InfoExtractor(object):
|
||||
if secure: regexes = self._og_regexes('video:secure_url') + regexes
|
||||
return self._html_search_regex(regexes, html, name, **kargs)
|
||||
|
||||
def _html_search_meta(self, name, html, display_name=None):
|
||||
if display_name is None:
|
||||
display_name = name
|
||||
return self._html_search_regex(
|
||||
r'''(?ix)<meta(?=[^>]+(?:name|property)=["\']%s["\'])
|
||||
[^>]+content=["\']([^"\']+)["\']''' % re.escape(name),
|
||||
html, display_name, fatal=False)
|
||||
|
||||
def _dc_search_uploader(self, html):
|
||||
return self._html_search_meta('dc.creator', html, 'uploader')
|
||||
|
||||
def _rta_search(self, html):
|
||||
# See http://www.rtalabel.org/index.php?content=howtofaq#single
|
||||
if re.search(r'(?ix)<meta\s+name="rating"\s+'
|
||||
@@ -358,6 +369,23 @@ class InfoExtractor(object):
|
||||
return 18
|
||||
return 0
|
||||
|
||||
def _media_rating_search(self, html):
|
||||
# See http://www.tjg-designs.com/WP/metadata-code-examples-adding-metadata-to-your-web-pages/
|
||||
rating = self._html_search_meta('rating', html)
|
||||
|
||||
if not rating:
|
||||
return None
|
||||
|
||||
RATING_TABLE = {
|
||||
'safe for kids': 0,
|
||||
'general': 8,
|
||||
'14 years': 14,
|
||||
'mature': 17,
|
||||
'restricted': 19,
|
||||
}
|
||||
return RATING_TABLE.get(rating.lower(), None)
|
||||
|
||||
|
||||
|
||||
class SearchInfoExtractor(InfoExtractor):
|
||||
"""
|
||||
|
||||
22
youtube_dl/extractor/d8.py
Normal file
22
youtube_dl/extractor/d8.py
Normal file
@@ -0,0 +1,22 @@
|
||||
# encoding: utf-8
|
||||
from .canalplus import CanalplusIE
|
||||
|
||||
|
||||
class D8IE(CanalplusIE):
|
||||
_VALID_URL = r'https?://www\.d8\.tv/.*?/(?P<path>.*)'
|
||||
_VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/d8/%s'
|
||||
IE_NAME = u'd8.tv'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://www.d8.tv/d8-docs-mags/pid6589-d8-campagne-intime.html',
|
||||
u'file': u'966289.flv',
|
||||
u'info_dict': {
|
||||
u'title': u'Campagne intime - Documentaire exceptionnel',
|
||||
u'description': u'md5:d2643b799fb190846ae09c61e59a859f',
|
||||
u'upload_date': u'20131108',
|
||||
},
|
||||
u'params': {
|
||||
# rtmp
|
||||
u'skip_download': True,
|
||||
},
|
||||
}
|
||||
@@ -1,4 +1,3 @@
|
||||
import itertools
|
||||
import json
|
||||
import random
|
||||
import re
|
||||
|
||||
@@ -11,11 +11,11 @@ from ..utils import (
|
||||
|
||||
|
||||
class EscapistIE(InfoExtractor):
|
||||
_VALID_URL = r'^(https?://)?(www\.)?escapistmagazine\.com/videos/view/(?P<showname>[^/]+)/(?P<episode>[^/?]+)[/?]?.*$'
|
||||
_VALID_URL = r'^https?://?(www\.)?escapistmagazine\.com/videos/view/(?P<showname>[^/]+)/(?P<episode>[^/?]+)[/?]?.*$'
|
||||
_TEST = {
|
||||
u'url': u'http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate',
|
||||
u'file': u'6618-Breaking-Down-Baldurs-Gate.mp4',
|
||||
u'md5': u'c6793dbda81388f4264c1ba18684a74d',
|
||||
u'md5': u'ab3a706c681efca53f0a35f1415cf0d1',
|
||||
u'info_dict': {
|
||||
u"description": u"Baldur's Gate: Original, Modded or Enhanced Edition? I'll break down what you can expect from the new Baldur's Gate: Enhanced Edition.",
|
||||
u"uploader": u"the-escapist-presents",
|
||||
@@ -25,50 +25,60 @@ class EscapistIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||
showName = mobj.group('showname')
|
||||
videoId = mobj.group('episode')
|
||||
|
||||
self.report_extraction(videoId)
|
||||
webpage = self._download_webpage(url, videoId)
|
||||
|
||||
videoDesc = self._html_search_regex('<meta name="description" content="([^"]*)"',
|
||||
videoDesc = self._html_search_regex(
|
||||
r'<meta name="description" content="([^"]*)"',
|
||||
webpage, u'description', fatal=False)
|
||||
|
||||
playerUrl = self._og_search_video_url(webpage, name='player url')
|
||||
playerUrl = self._og_search_video_url(webpage, name=u'player URL')
|
||||
|
||||
title = self._html_search_regex('<meta name="title" content="([^"]*)"',
|
||||
webpage, u'player url').split(' : ')[-1]
|
||||
title = self._html_search_regex(
|
||||
r'<meta name="title" content="([^"]*)"',
|
||||
webpage, u'title').split(' : ')[-1]
|
||||
|
||||
configUrl = self._search_regex('config=(.*)$', playerUrl, u'config url')
|
||||
configUrl = self._search_regex('config=(.*)$', playerUrl, u'config URL')
|
||||
configUrl = compat_urllib_parse.unquote(configUrl)
|
||||
|
||||
configJSON = self._download_webpage(configUrl, videoId,
|
||||
u'Downloading configuration',
|
||||
u'unable to download configuration')
|
||||
formats = []
|
||||
|
||||
# Technically, it's JavaScript, not JSON
|
||||
configJSON = configJSON.replace("'", '"')
|
||||
def _add_format(name, cfgurl):
|
||||
configJSON = self._download_webpage(
|
||||
cfgurl, videoId,
|
||||
u'Downloading ' + name + ' configuration',
|
||||
u'Unable to download ' + name + ' configuration')
|
||||
|
||||
# Technically, it's JavaScript, not JSON
|
||||
configJSON = configJSON.replace("'", '"')
|
||||
|
||||
try:
|
||||
config = json.loads(configJSON)
|
||||
except (ValueError,) as err:
|
||||
raise ExtractorError(u'Invalid JSON in configuration file: ' + compat_str(err))
|
||||
playlist = config['playlist']
|
||||
formats.append({
|
||||
'url': playlist[1]['url'],
|
||||
'format_id': name,
|
||||
})
|
||||
|
||||
_add_format(u'normal', configUrl)
|
||||
hq_url = (configUrl +
|
||||
('&hq=1' if '?' in configUrl else configUrl + '?hq=1'))
|
||||
try:
|
||||
config = json.loads(configJSON)
|
||||
except (ValueError,) as err:
|
||||
raise ExtractorError(u'Invalid JSON in configuration file: ' + compat_str(err))
|
||||
_add_format(u'hq', hq_url)
|
||||
except ExtractorError:
|
||||
pass # That's fine, we'll just use normal quality
|
||||
|
||||
playlist = config['playlist']
|
||||
videoUrl = playlist[1]['url']
|
||||
|
||||
info = {
|
||||
return {
|
||||
'id': videoId,
|
||||
'url': videoUrl,
|
||||
'formats': formats,
|
||||
'uploader': showName,
|
||||
'upload_date': None,
|
||||
'title': title,
|
||||
'ext': 'mp4',
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'description': videoDesc,
|
||||
'player_url': playerUrl,
|
||||
}
|
||||
|
||||
return [info]
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
import json
|
||||
import netrc
|
||||
import re
|
||||
import socket
|
||||
|
||||
|
||||
@@ -39,7 +39,6 @@ class FKTVIE(InfoExtractor):
|
||||
for i, _ in enumerate(files, 1):
|
||||
video_id = '%04d%d' % (episode, i)
|
||||
video_url = 'http://dl%d.fernsehkritik.tv/fernsehkritik%d%s.flv' % (server, episode, '' if i == 1 else '-%d' % i)
|
||||
video_title = 'Fernsehkritik %d.%d' % (episode, i)
|
||||
videos.append({
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
|
||||
@@ -24,7 +24,7 @@ class GameSpotIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
page_id = video_id = mobj.group('page_id')
|
||||
page_id = mobj.group('page_id')
|
||||
webpage = self._download_webpage(url, page_id)
|
||||
data_video_json = self._search_regex(r'data-video=\'(.*?)\'', webpage, u'data video')
|
||||
data_video = json.loads(unescapeHTML(data_video_json))
|
||||
|
||||
@@ -199,7 +199,8 @@ class GenericIE(InfoExtractor):
|
||||
mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
|
||||
if mobj is not None:
|
||||
burl = unescapeHTML(mobj.group(1))
|
||||
return self.url_result(burl, 'Bandcamp')
|
||||
# Don't set the extractor because it can be a track url or an album
|
||||
return self.url_result(burl)
|
||||
|
||||
# Start with something easy: JW Player in SWFObject
|
||||
mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
|
||||
|
||||
@@ -22,7 +22,7 @@ class JeuxVideoIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
title = re.match(self._VALID_URL, url).group(1)
|
||||
title = mobj.group(1)
|
||||
webpage = self._download_webpage(url, title)
|
||||
xml_link = self._html_search_regex(
|
||||
r'<param name="flashvars" value="config=(.*?)" />',
|
||||
|
||||
@@ -6,9 +6,7 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urlparse,
|
||||
get_meta_content,
|
||||
xpath_with_ns,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -59,7 +59,6 @@ class MTVIE(InfoExtractor):
|
||||
if '/error_country_block.swf' in metadataXml:
|
||||
raise ExtractorError(u'This video is not available from your country.', expected=True)
|
||||
mdoc = xml.etree.ElementTree.fromstring(metadataXml.encode('utf-8'))
|
||||
renditions = mdoc.findall('.//rendition')
|
||||
|
||||
formats = []
|
||||
for rendition in mdoc.findall('.//rendition'):
|
||||
|
||||
@@ -72,7 +72,7 @@ class NHLIE(NHLBaseInfoExtractor):
|
||||
|
||||
class NHLVideocenterIE(NHLBaseInfoExtractor):
|
||||
IE_NAME = u'nhl.com:videocenter'
|
||||
IE_DESC = u'Download the first 12 videos from a videocenter category'
|
||||
IE_DESC = u'NHL videocenter category'
|
||||
_VALID_URL = r'https?://video\.(?P<team>[^.]*)\.nhl\.com/videocenter/(console\?.*?catid=(?P<catid>[^&]+))?'
|
||||
|
||||
@classmethod
|
||||
|
||||
@@ -6,7 +6,6 @@ from ..utils import (
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urllib_request,
|
||||
compat_urllib_parse,
|
||||
unescapeHTML,
|
||||
)
|
||||
from ..aes import (
|
||||
aes_decrypt_text
|
||||
|
||||
@@ -59,6 +59,7 @@ class SoundcloudIE(InfoExtractor):
|
||||
]
|
||||
|
||||
_CLIENT_ID = 'b45b1aa10f1ac2941910a7f0d10f8e28'
|
||||
_IPHONE_CLIENT_ID = '376f225bf427445fc4bfb6b99b72e0bf'
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
@@ -83,7 +84,6 @@ class SoundcloudIE(InfoExtractor):
|
||||
thumbnail = thumbnail.replace('-large', '-t500x500')
|
||||
result = {
|
||||
'id': track_id,
|
||||
'url': info['stream_url'] + '?client_id=' + self._CLIENT_ID,
|
||||
'uploader': info['user']['username'],
|
||||
'upload_date': unified_strdate(info['created_at']),
|
||||
'title': info['title'],
|
||||
@@ -92,19 +92,29 @@ class SoundcloudIE(InfoExtractor):
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
if info.get('downloadable', False):
|
||||
# We can build a direct link to the song
|
||||
result['url'] = 'https://api.soundcloud.com/tracks/{0}/download?client_id={1}'.format(track_id, self._CLIENT_ID)
|
||||
if not info.get('streamable', False):
|
||||
# We have to get the rtmp url
|
||||
else:
|
||||
# We have to retrieve the url
|
||||
stream_json = self._download_webpage(
|
||||
'http://api.soundcloud.com/i1/tracks/{0}/streams?client_id={1}'.format(track_id, self._CLIENT_ID),
|
||||
'http://api.soundcloud.com/i1/tracks/{0}/streams?client_id={1}'.format(track_id, self._IPHONE_CLIENT_ID),
|
||||
track_id, u'Downloading track url')
|
||||
rtmp_url = json.loads(stream_json)['rtmp_mp3_128_url']
|
||||
# The url doesn't have an rtmp app, we have to extract the playpath
|
||||
url, path = rtmp_url.split('mp3:', 1)
|
||||
result.update({
|
||||
'url': url,
|
||||
'play_path': 'mp3:' + path,
|
||||
})
|
||||
# There should be only one entry in the dictionary
|
||||
key, stream_url = list(json.loads(stream_json).items())[0]
|
||||
if key.startswith(u'http'):
|
||||
result['url'] = stream_url
|
||||
elif key.startswith(u'rtmp'):
|
||||
# The url doesn't have an rtmp app, we have to extract the playpath
|
||||
url, path = stream_url.split('mp3:', 1)
|
||||
result.update({
|
||||
'url': url,
|
||||
'play_path': 'mp3:' + path,
|
||||
})
|
||||
else:
|
||||
# We fallback to the stream_url in the original info, this
|
||||
# cannot be always used, sometimes it can give an HTTP 404 error
|
||||
result['url'] = info['stream_url'] + '?client_id=' + self._CLIENT_ID,
|
||||
|
||||
return result
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -158,7 +168,6 @@ class SoundcloudSetIE(SoundcloudIE):
|
||||
resolv_url = self._resolv_url(url)
|
||||
info_json = self._download_webpage(resolv_url, full_title)
|
||||
|
||||
videos = []
|
||||
info = json.loads(info_json)
|
||||
if 'errors' in info:
|
||||
for err in info['errors']:
|
||||
|
||||
@@ -6,7 +6,6 @@ from ..utils import (
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urllib_request,
|
||||
compat_urllib_parse,
|
||||
unescapeHTML,
|
||||
)
|
||||
from ..aes import (
|
||||
aes_decrypt_text
|
||||
@@ -36,11 +35,12 @@ class SpankwireIE(InfoExtractor):
|
||||
webpage = self._download_webpage(req, video_id)
|
||||
|
||||
video_title = self._html_search_regex(r'<h1>([^<]+)', webpage, u'title')
|
||||
video_uploader = self._html_search_regex(r'by:\s*<a [^>]*>(.+?)</a>', webpage, u'uploader', fatal=False)
|
||||
thumbnail = self._html_search_regex(r'flashvars\.image_url = "([^"]+)', webpage, u'thumbnail', fatal=False)
|
||||
description = self._html_search_regex(r'>\s*Description:</div>\s*<[^>]*>([^<]+)', webpage, u'description', fatal=False)
|
||||
if len(description) == 0:
|
||||
description = None
|
||||
video_uploader = self._html_search_regex(
|
||||
r'by:\s*<a [^>]*>(.+?)</a>', webpage, u'uploader', fatal=False)
|
||||
thumbnail = self._html_search_regex(
|
||||
r'flashvars\.image_url = "([^"]+)', webpage, u'thumbnail', fatal=False)
|
||||
description = self._html_search_regex(
|
||||
r'<div\s+id="descriptionContent">([^<]+)<', webpage, u'description', fatal=False)
|
||||
|
||||
video_urls = list(map(compat_urllib_parse.unquote , re.findall(r'flashvars\.quality_[0-9]{3}p = "([^"]+)', webpage)))
|
||||
if webpage.find('flashvars\.encrypted = "true"') != -1:
|
||||
|
||||
@@ -2,7 +2,6 @@ import re
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import determine_ext
|
||||
|
||||
|
||||
class SpiegelIE(InfoExtractor):
|
||||
|
||||
65
youtube_dl/extractor/streamcloud.py
Normal file
65
youtube_dl/extractor/streamcloud.py
Normal file
@@ -0,0 +1,65 @@
|
||||
# coding: utf-8
|
||||
import re
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
|
||||
|
||||
class StreamcloudIE(InfoExtractor):
|
||||
IE_NAME = u'streamcloud.eu'
|
||||
_VALID_URL = r'https?://streamcloud\.eu/(?P<id>[a-zA-Z0-9_-]+)/(?P<fname>[^#?]*)\.html'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://streamcloud.eu/skp9j99s4bpz/youtube-dl_test_video_____________-BaW_jenozKc.mp4.html',
|
||||
u'file': u'skp9j99s4bpz.mp4',
|
||||
u'md5': u'6bea4c7fa5daaacc2a946b7146286686',
|
||||
u'info_dict': {
|
||||
u'title': u'youtube-dl test video \'/\\ ä ↭',
|
||||
u'duration': 9,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
orig_webpage = self._download_webpage(url, video_id)
|
||||
|
||||
fields = re.findall(r'''(?x)<input\s+
|
||||
type="(?:hidden|submit)"\s+
|
||||
name="([^"]+)"\s+
|
||||
(?:id="[^"]+"\s+)?
|
||||
value="([^"]*)"
|
||||
''', orig_webpage)
|
||||
post = compat_urllib_parse.urlencode(fields)
|
||||
|
||||
self.to_screen('%s: Waiting for timeout' % video_id)
|
||||
time.sleep(12)
|
||||
headers = {
|
||||
b'Content-Type': b'application/x-www-form-urlencoded',
|
||||
}
|
||||
req = compat_urllib_request.Request(url, post, headers)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
req, video_id, note=u'Downloading video page ...')
|
||||
title = self._html_search_regex(
|
||||
r'<h1[^>]*>([^<]+)<', webpage, u'title')
|
||||
video_url = self._search_regex(
|
||||
r'file:\s*"([^"]+)"', webpage, u'video URL')
|
||||
duration_str = self._search_regex(
|
||||
r'duration:\s*"?([0-9]+)"?', webpage, u'duration', fatal=False)
|
||||
duration = None if duration_str is None else int(duration_str)
|
||||
thumbnail = self._search_regex(
|
||||
r'image:\s*"([^"]+)"', webpage, u'thumbnail URL', fatal=False)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'url': video_url,
|
||||
'duration': duration,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
@@ -15,7 +15,8 @@ class SztvHuIE(InfoExtractor):
|
||||
u'info_dict': {
|
||||
u"title": u"Cserkészek népszerűsítették a környezettudatos életmódot a Savaria téren",
|
||||
u"description": u'A zöld nap játékos ismeretterjesztő programjait a Magyar Cserkész Szövetség szervezte, akik az ország nyolc városában adják át tudásukat az érdeklődőknek. A PET...',
|
||||
}
|
||||
},
|
||||
u'skip': u'Service temporarily disabled as of 2013-11-20'
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -60,7 +60,7 @@ class TeamcocoIE(InfoExtractor):
|
||||
return -1
|
||||
formats.sort(key=sort_key)
|
||||
if not formats:
|
||||
raise RegexNotFoundError(u'Unable to extract video URL')
|
||||
raise ExtractorError(u'Unable to extract video URL')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
||||
@@ -4,7 +4,6 @@ import re
|
||||
from .subtitles import SubtitlesInfoExtractor
|
||||
|
||||
from ..utils import (
|
||||
compat_str,
|
||||
RegexNotFoundError,
|
||||
)
|
||||
|
||||
@@ -113,6 +112,6 @@ class TEDIE(SubtitlesInfoExtractor):
|
||||
url = 'http://www.ted.com/talks/subtitles/id/%s/lang/%s/format/srt' % (video_id, l)
|
||||
sub_lang_list[l] = url
|
||||
return sub_lang_list
|
||||
except RegexNotFoundError as err:
|
||||
except RegexNotFoundError:
|
||||
self._downloader.report_warning(u'video doesn\'t have subtitles')
|
||||
return {}
|
||||
|
||||
74
youtube_dl/extractor/toutv.py
Normal file
74
youtube_dl/extractor/toutv.py
Normal file
@@ -0,0 +1,74 @@
|
||||
# coding: utf-8
|
||||
import re
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class TouTvIE(InfoExtractor):
|
||||
IE_NAME = u'tou.tv'
|
||||
_VALID_URL = r'https?://www\.tou\.tv/(?P<id>[a-zA-Z0-9_-]+(?:/(?P<episode>S[0-9]+E[0-9]+)))'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://www.tou.tv/30-vies/S04E41',
|
||||
u'file': u'30-vies_S04E41.mp4',
|
||||
u'info_dict': {
|
||||
u'title': u'30 vies Saison 4 / Épisode 41',
|
||||
u'description': u'md5:da363002db82ccbe4dafeb9cab039b09',
|
||||
u'age_limit': 8,
|
||||
u'uploader': u'Groupe des Nouveaux Médias',
|
||||
u'duration': 1296,
|
||||
u'upload_date': u'20131118',
|
||||
u'thumbnail': u'http://static.tou.tv/medias/images/2013-11-18_19_00_00_30VIES_0341_01_L.jpeg',
|
||||
},
|
||||
u'params': {
|
||||
u'skip_download': True, # Requires rtmpdump
|
||||
},
|
||||
u'skip': 'Only available in Canada'
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
mediaId = self._search_regex(
|
||||
r'"idMedia":\s*"([^"]+)"', webpage, u'media ID')
|
||||
|
||||
streams_url = u'http://release.theplatform.com/content.select?pid=' + mediaId
|
||||
streams_webpage = self._download_webpage(
|
||||
streams_url, video_id, note=u'Downloading stream list')
|
||||
|
||||
streams_doc = xml.etree.ElementTree.fromstring(
|
||||
streams_webpage.encode('utf-8'))
|
||||
video_url = next(n.text
|
||||
for n in streams_doc.findall('.//choice/url')
|
||||
if u'//ad.doubleclick' not in n.text)
|
||||
if video_url.endswith('/Unavailable.flv'):
|
||||
raise ExtractorError(
|
||||
u'Access to this video is blocked from outside of Canada',
|
||||
expected=True)
|
||||
|
||||
duration_str = self._html_search_meta(
|
||||
'video:duration', webpage, u'duration')
|
||||
duration = int(duration_str) if duration_str else None
|
||||
upload_date_str = self._html_search_meta(
|
||||
'video:release_date', webpage, u'upload date')
|
||||
upload_date = unified_strdate(upload_date_str) if upload_date_str else None
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._og_search_title(webpage),
|
||||
'url': video_url,
|
||||
'description': self._og_search_description(webpage),
|
||||
'uploader': self._dc_search_uploader(webpage),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'age_limit': self._media_rating_search(webpage),
|
||||
'duration': duration,
|
||||
'upload_date': upload_date,
|
||||
'ext': 'mp4',
|
||||
}
|
||||
@@ -5,8 +5,6 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urllib_request,
|
||||
compat_urllib_parse,
|
||||
unescapeHTML,
|
||||
)
|
||||
from ..aes import (
|
||||
aes_decrypt_text
|
||||
|
||||
@@ -24,12 +24,16 @@ class VideoPremiumIE(InfoExtractor):
|
||||
webpage_url = 'http://videopremium.tv/' + video_id
|
||||
webpage = self._download_webpage(webpage_url, video_id)
|
||||
|
||||
self.report_extraction(video_id)
|
||||
if re.match(r"^<html><head><script[^>]*>window.location\s*=", webpage):
|
||||
# Download again, we need a cookie
|
||||
webpage = self._download_webpage(
|
||||
webpage_url, video_id,
|
||||
note=u'Downloading webpage again (with cookie)')
|
||||
|
||||
video_title = self._html_search_regex(r'<h2(?:.*?)>\s*(.+?)\s*<',
|
||||
webpage, u'video title')
|
||||
video_title = self._html_search_regex(
|
||||
r'<h2(?:.*?)>\s*(.+?)\s*<', webpage, u'video title')
|
||||
|
||||
return [{
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': "rtmp://e%d.md.iplay.md/play" % random.randint(1, 16),
|
||||
'play_path': "mp4:%s.f4v" % video_id,
|
||||
@@ -37,4 +41,4 @@ class VideoPremiumIE(InfoExtractor):
|
||||
'player_url': "http://videopremium.tv/uplayer/uppod.swf",
|
||||
'ext': 'f4v',
|
||||
'title': video_title,
|
||||
}]
|
||||
}
|
||||
@@ -151,7 +151,7 @@ class VimeoIE(InfoExtractor):
|
||||
config = json.loads(config_json)
|
||||
except RegexNotFoundError:
|
||||
# For pro videos or player.vimeo.com urls
|
||||
config = self._search_regex([r' = {config:({.+?}),assets:', r'c=({.+?);'],
|
||||
config = self._search_regex([r' = {config:({.+?}),assets:', r'(?:c|b)=({.+?});'],
|
||||
webpage, u'info section', flags=re.DOTALL)
|
||||
config = json.loads(config)
|
||||
except Exception as e:
|
||||
|
||||
@@ -5,7 +5,6 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urllib_request,
|
||||
compat_urllib_parse,
|
||||
)
|
||||
|
||||
class XTubeIE(InfoExtractor):
|
||||
|
||||
@@ -139,10 +139,10 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
|
||||
class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
IE_DESC = u'YouTube.com'
|
||||
_VALID_URL = r"""(?xi)^
|
||||
_VALID_URL = r"""(?x)^
|
||||
(
|
||||
(?:https?://|//)? # http(s):// or protocol-independent URL (optional)
|
||||
(?:(?:(?:(?:\w+\.)?youtube(?:-nocookie)?\.com/|
|
||||
(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
|
||||
tube\.majestyc\.net/|
|
||||
youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
|
||||
(?:.*?\#/)? # handle anchor (#/) redirect urls
|
||||
@@ -1510,7 +1510,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
})
|
||||
return results
|
||||
|
||||
class YoutubePlaylistIE(InfoExtractor):
|
||||
class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
||||
IE_DESC = u'YouTube.com playlists'
|
||||
_VALID_URL = r"""(?:
|
||||
(?:https?://)?
|
||||
@@ -1526,8 +1526,9 @@ class YoutubePlaylistIE(InfoExtractor):
|
||||
|
|
||||
((?:PL|EC|UU|FL)[0-9A-Za-z-_]{10,})
|
||||
)"""
|
||||
_TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/playlists/%s?max-results=%i&start-index=%i&v=2&alt=json&safeSearch=none'
|
||||
_MAX_RESULTS = 50
|
||||
_TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&page=%s'
|
||||
_MORE_PAGES_INDICATOR = r'data-link-type="next"'
|
||||
_VIDEO_RE = r'href="/watch\?v=([0-9A-Za-z_-]{11})&'
|
||||
IE_NAME = u'youtube:playlist'
|
||||
|
||||
@classmethod
|
||||
@@ -1535,6 +1536,9 @@ class YoutubePlaylistIE(InfoExtractor):
|
||||
"""Receives a URL and returns True if suitable for this IE."""
|
||||
return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
def _real_extract(self, url):
|
||||
# Extract playlist id
|
||||
mobj = re.match(self._VALID_URL, url, re.VERBOSE)
|
||||
@@ -1552,41 +1556,23 @@ class YoutubePlaylistIE(InfoExtractor):
|
||||
else:
|
||||
self.to_screen(u'Downloading playlist PL%s - add --no-playlist to just download video %s' % (playlist_id, video_id))
|
||||
|
||||
# Download playlist videos from API
|
||||
videos = []
|
||||
# Extract the video ids from the playlist pages
|
||||
ids = []
|
||||
|
||||
for page_num in itertools.count(1):
|
||||
start_index = self._MAX_RESULTS * (page_num - 1) + 1
|
||||
if start_index >= 1000:
|
||||
self._downloader.report_warning(u'Max number of results reached')
|
||||
break
|
||||
url = self._TEMPLATE_URL % (playlist_id, self._MAX_RESULTS, start_index)
|
||||
url = self._TEMPLATE_URL % (playlist_id, page_num)
|
||||
page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num)
|
||||
# The ids are duplicated
|
||||
new_ids = orderedSet(re.findall(self._VIDEO_RE, page))
|
||||
ids.extend(new_ids)
|
||||
|
||||
try:
|
||||
response = json.loads(page)
|
||||
except ValueError as err:
|
||||
raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
|
||||
|
||||
if 'feed' not in response:
|
||||
raise ExtractorError(u'Got a malformed response from YouTube API')
|
||||
playlist_title = response['feed']['title']['$t']
|
||||
if 'entry' not in response['feed']:
|
||||
# Number of videos is a multiple of self._MAX_RESULTS
|
||||
if re.search(self._MORE_PAGES_INDICATOR, page) is None:
|
||||
break
|
||||
|
||||
for entry in response['feed']['entry']:
|
||||
index = entry['yt$position']['$t']
|
||||
if 'media$group' in entry and 'yt$videoid' in entry['media$group']:
|
||||
videos.append((
|
||||
index,
|
||||
'https://www.youtube.com/watch?v=' + entry['media$group']['yt$videoid']['$t']
|
||||
))
|
||||
playlist_title = self._og_search_title(page)
|
||||
|
||||
videos = [v[1] for v in sorted(videos)]
|
||||
|
||||
url_results = [self.url_result(vurl, 'Youtube') for vurl in videos]
|
||||
return [self.playlist_result(url_results, playlist_id, playlist_title)]
|
||||
url_results = [self.url_result(vid, 'Youtube') for vid in ids]
|
||||
return self.playlist_result(url_results, playlist_id, playlist_title)
|
||||
|
||||
|
||||
class YoutubeChannelIE(InfoExtractor):
|
||||
|
||||
@@ -53,7 +53,7 @@ class ZDFIE(InfoExtractor):
|
||||
video_id,
|
||||
u'Get stream URL')
|
||||
|
||||
MMS_STREAM = r'href="(?P<video_url>mms://[^"]*)"'
|
||||
#MMS_STREAM = r'href="(?P<video_url>mms://[^"]*)"'
|
||||
RTSP_STREAM = r'(?P<video_url>rtsp://[^"]*.mp4)'
|
||||
|
||||
mobj = re.search(self._MEDIA_STREAM, media_link)
|
||||
|
||||
@@ -109,7 +109,7 @@ def update_self(to_screen, verbose):
|
||||
urlh = compat_urllib_request.urlopen(version['exe'][0])
|
||||
newcontent = urlh.read()
|
||||
urlh.close()
|
||||
except (IOError, OSError) as err:
|
||||
except (IOError, OSError):
|
||||
if verbose: to_screen(compat_str(traceback.format_exc()))
|
||||
to_screen(u'ERROR: unable to download latest version')
|
||||
return
|
||||
@@ -122,7 +122,7 @@ def update_self(to_screen, verbose):
|
||||
try:
|
||||
with open(exe + '.new', 'wb') as outf:
|
||||
outf.write(newcontent)
|
||||
except (IOError, OSError) as err:
|
||||
except (IOError, OSError):
|
||||
if verbose: to_screen(compat_str(traceback.format_exc()))
|
||||
to_screen(u'ERROR: unable to write the new version')
|
||||
return
|
||||
@@ -141,7 +141,7 @@ start /b "" cmd /c del "%%~f0"&exit /b"
|
||||
|
||||
subprocess.Popen([bat]) # Continues to run in the background
|
||||
return # Do not show premature success messages
|
||||
except (IOError, OSError) as err:
|
||||
except (IOError, OSError):
|
||||
if verbose: to_screen(compat_str(traceback.format_exc()))
|
||||
to_screen(u'ERROR: unable to overwrite current version')
|
||||
return
|
||||
@@ -152,7 +152,7 @@ start /b "" cmd /c del "%%~f0"&exit /b"
|
||||
urlh = compat_urllib_request.urlopen(version['bin'][0])
|
||||
newcontent = urlh.read()
|
||||
urlh.close()
|
||||
except (IOError, OSError) as err:
|
||||
except (IOError, OSError):
|
||||
if verbose: to_screen(compat_str(traceback.format_exc()))
|
||||
to_screen(u'ERROR: unable to download latest version')
|
||||
return
|
||||
@@ -165,7 +165,7 @@ start /b "" cmd /c del "%%~f0"&exit /b"
|
||||
try:
|
||||
with open(filename, 'wb') as outf:
|
||||
outf.write(newcontent)
|
||||
except (IOError, OSError) as err:
|
||||
except (IOError, OSError):
|
||||
if verbose: to_screen(compat_str(traceback.format_exc()))
|
||||
to_screen(u'ERROR: unable to overwrite current version')
|
||||
return
|
||||
|
||||
@@ -734,6 +734,8 @@ def unified_strdate(date_str):
|
||||
'%Y/%m/%d %H:%M:%S',
|
||||
'%d.%m.%Y %H:%M',
|
||||
'%Y-%m-%dT%H:%M:%SZ',
|
||||
'%Y-%m-%dT%H:%M:%S.%fZ',
|
||||
'%Y-%m-%dT%H:%M:%S.%f0Z',
|
||||
'%Y-%m-%dT%H:%M:%S',
|
||||
]
|
||||
for expression in format_expressions:
|
||||
@@ -949,7 +951,16 @@ class locked_file(object):
|
||||
|
||||
|
||||
def shell_quote(args):
|
||||
return ' '.join(map(pipes.quote, args))
|
||||
quoted_args = []
|
||||
encoding = sys.getfilesystemencoding()
|
||||
if encoding is None:
|
||||
encoding = 'utf-8'
|
||||
for a in args:
|
||||
if isinstance(a, bytes):
|
||||
# We may get a filename encoded with 'encodeFilename'
|
||||
a = a.decode(encoding)
|
||||
quoted_args.append(pipes.quote(a))
|
||||
return u' '.join(quoted_args)
|
||||
|
||||
|
||||
def takewhile_inclusive(pred, seq):
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
|
||||
__version__ = '2013.11.18.1'
|
||||
__version__ = '2013.11.22.1'
|
||||
|
||||
Reference in New Issue
Block a user