mirror of
https://github.com/ytdl-org/youtube-dl.git
synced 2025-12-23 00:10:30 +01:00
Compare commits
151 Commits
2015.05.03
...
2015.05.15
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c4fc559f45 | ||
|
|
2bc4330303 | ||
|
|
12675275a1 | ||
|
|
3a105f7b20 | ||
|
|
1ae72fb23d | ||
|
|
7ec676bb3d | ||
|
|
29ea57283e | ||
|
|
5488973961 | ||
|
|
96d45a5489 | ||
|
|
7a012d5a16 | ||
|
|
fa6a16996e | ||
|
|
82245a6de7 | ||
|
|
ff28ede2d1 | ||
|
|
98b8ec8616 | ||
|
|
88f9d8748c | ||
|
|
7d57d2e18b | ||
|
|
38caa00d18 | ||
|
|
c827d4cfdb | ||
|
|
509c630db8 | ||
|
|
fbff30d2db | ||
|
|
86c7fdb17c | ||
|
|
62bd6589c7 | ||
|
|
2cc6d13547 | ||
|
|
bb8ca1d112 | ||
|
|
8e59539752 | ||
|
|
372744c544 | ||
|
|
83880949a1 | ||
|
|
3749e36e9f | ||
|
|
0b4253fa37 | ||
|
|
86ec1e487c | ||
|
|
fd4eefed39 | ||
|
|
b480e7874b | ||
|
|
41333b97b9 | ||
|
|
c1c924abfe | ||
|
|
1c7e2e64f6 | ||
|
|
7dff03636a | ||
|
|
5332fd91bf | ||
|
|
d4b963d0a6 | ||
|
|
6d3f5935e5 | ||
|
|
968ee17677 | ||
|
|
81ed3bb9c0 | ||
|
|
5115652828 | ||
|
|
1f92865494 | ||
|
|
e41f450f28 | ||
|
|
97fcf1bbd0 | ||
|
|
13763ce599 | ||
|
|
7fcb605b82 | ||
|
|
70484b9f8a | ||
|
|
69b46b3d95 | ||
|
|
95c5534f8e | ||
|
|
370b39e8ec | ||
|
|
3da8038918 | ||
|
|
a6762c4a22 | ||
|
|
98c2c0febc | ||
|
|
63cbd19f50 | ||
|
|
1934f3a0ea | ||
|
|
a909e6ad43 | ||
|
|
1dcb52188d | ||
|
|
28ebef0b1b | ||
|
|
f03a8a3c4e | ||
|
|
03f760b1c0 | ||
|
|
f900dc3fb9 | ||
|
|
95eb1adda8 | ||
|
|
c6ddbdb66c | ||
|
|
3800b908b1 | ||
|
|
69fe3a5f09 | ||
|
|
754270313a | ||
|
|
057ebeaca3 | ||
|
|
480065172d | ||
|
|
f2e0056579 | ||
|
|
32fffff2cc | ||
|
|
3c47824d6b | ||
|
|
0892090a56 | ||
|
|
d592b42f5c | ||
|
|
3b5f65a64c | ||
|
|
5c0b2c16a8 | ||
|
|
d39e0f05db | ||
|
|
6d14d08e06 | ||
|
|
32060c6d6b | ||
|
|
3dbec410a0 | ||
|
|
de765f6c31 | ||
|
|
dc455a5f88 | ||
|
|
bab19a8e91 | ||
|
|
322915014f | ||
|
|
79998cd5af | ||
|
|
50b9013064 | ||
|
|
bb03fdae0d | ||
|
|
4384cf9e7d | ||
|
|
d47e980d0d | ||
|
|
fe373287eb | ||
|
|
cbe443362f | ||
|
|
2c0c9dc46c | ||
|
|
0ceab84749 | ||
|
|
34e7dc81a9 | ||
|
|
4e6e9d21bd | ||
|
|
d1feb30811 | ||
|
|
43837189c1 | ||
|
|
249962ffa2 | ||
|
|
541168039d | ||
|
|
7ef00afe9d | ||
|
|
156fc83a55 | ||
|
|
46be82b811 | ||
|
|
09b412dafa | ||
|
|
5268a05e47 | ||
|
|
406224be52 | ||
|
|
3799834dcf | ||
|
|
553e412bda | ||
|
|
f22834a372 | ||
|
|
bd349a8704 | ||
|
|
bc08873cff | ||
|
|
aafe273990 | ||
|
|
c09593c04e | ||
|
|
84bf31aaf8 | ||
|
|
05d5392cda | ||
|
|
d9a743d917 | ||
|
|
ac6c358c2a | ||
|
|
ad0c0ad3b4 | ||
|
|
1ed34f3dd6 | ||
|
|
6a8f9cd22e | ||
|
|
e8b9ab8957 | ||
|
|
74f728249f | ||
|
|
d6a1738892 | ||
|
|
b326b07adc | ||
|
|
07d2921c6d | ||
|
|
22e462c97a | ||
|
|
dcf8077906 | ||
|
|
3408f6e64a | ||
|
|
e10dc0e1f0 | ||
|
|
ce5c1ae517 | ||
|
|
bbe718c97f | ||
|
|
01e4b1ee14 | ||
|
|
815ac0293e | ||
|
|
6568382d6f | ||
|
|
f943b7ddce | ||
|
|
ff9d68e7be | ||
|
|
7212560f4d | ||
|
|
1aa43d77c0 | ||
|
|
e038d5c4e3 | ||
|
|
dfad3aac98 | ||
|
|
df8418ffcf | ||
|
|
50aa43b3ae | ||
|
|
a90552663e | ||
|
|
883340c107 | ||
|
|
0fe2ff78e6 | ||
|
|
dc1eed93be | ||
|
|
b2f82360d7 | ||
|
|
782e0568ef | ||
|
|
90b4b0eabe | ||
|
|
cec04ef3a6 | ||
|
|
71fa56b887 | ||
|
|
b9b3ab45ea |
@@ -269,7 +269,7 @@ The simplest case is requesting a specific format, for example `-f 22`. You can
|
||||
|
||||
If you want to download multiple videos and they don't have the same formats available, you can specify the order of preference using slashes, as in `-f 22/17/18`. You can also filter the video results by putting a condition in brackets, as in `-f "best[height=720]"` (or `-f "[filesize>10M]"`). This works for filesize, height, width, tbr, abr, vbr, asr, and fps and the comparisons <, <=, >, >=, =, != and for ext, acodec, vcodec, container, and protocol and the comparisons =, != . Formats for which the value is not known are excluded unless you put a question mark (?) after the operator. You can combine format filters, so `-f "[height <=? 720][tbr>500]"` selects up to 720p videos (or videos where the height is not known) with a bitrate of at least 500 KBit/s. Use commas to download multiple formats, such as `-f 136/137/mp4/bestvideo,140/m4a/bestaudio`. You can merge the video and audio of two formats into a single file using `-f <video-format>+<audio-format>` (requires ffmpeg or avconv), for example `-f bestvideo+bestaudio`.
|
||||
|
||||
Since the end of April 2015 and version 2015.04.26 youtube-dl uses `-f bestvideo+bestaudio/best` as default format selection (see #5447, #5456). If ffmpeg or avconv are installed this results in downloading `bestvideo` and `bestaudio` separately and muxing them together into a single file giving the best overall quality available. Otherwise it falls back to `best` and results in downloading best available quality served as a single file. `best` is also needed for videos that don't come from YouTube because they don't provide the audio and video in two different files. If you want to only download some dash formats (for example if you are not interested in getting videos with a resolution higher than 1080p), you can add `-f bestvideo[height<=?1080]+bestaudio/best` to your configuration file.
|
||||
Since the end of April 2015 and version 2015.04.26 youtube-dl uses `-f bestvideo+bestaudio/best` as default format selection (see #5447, #5456). If ffmpeg or avconv are installed this results in downloading `bestvideo` and `bestaudio` separately and muxing them together into a single file giving the best overall quality available. Otherwise it falls back to `best` and results in downloading best available quality served as a single file. `best` is also needed for videos that don't come from YouTube because they don't provide the audio and video in two different files. If you want to only download some dash formats (for example if you are not interested in getting videos with a resolution higher than 1080p), you can add `-f bestvideo[height<=?1080]+bestaudio/best` to your configuration file. Note that if you use youtube-dl to stream to `stdout` (and most likely to pipe it to your media player then), i.e. you explicitly specify output template as `-o -`, youtube-dl still uses `-f best` format selection in order to start content delivery immediately to your player and not to wait until `bestvideo` and `bestaudio` are downloaded and muxed.
|
||||
|
||||
If you want to preserve the old format selection behavior (prior to youtube-dl 2015.04.26), i.e. you want to download best available quality media served as a single file, you should explicitly specify your choice with `-f best`. You may want to add it to the [configuration file](#configuration) in order not to type it every time you run youtube-dl.
|
||||
|
||||
|
||||
@@ -64,6 +64,8 @@
|
||||
- **BR**: Bayerischer Rundfunk Mediathek
|
||||
- **Break**
|
||||
- **Brightcove**
|
||||
- **bt:article**: Bergens Tidende Articles
|
||||
- **bt:vestlendingen**: Bergens Tidende - Vestlendingen
|
||||
- **BuzzFeed**
|
||||
- **BYUtv**
|
||||
- **Camdemy**
|
||||
@@ -240,6 +242,7 @@
|
||||
- **LetvPlaylist**
|
||||
- **LetvTv**
|
||||
- **Libsyn**
|
||||
- **life:embed**
|
||||
- **lifenews**: LIFE | NEWS
|
||||
- **LiveLeak**
|
||||
- **livestream**
|
||||
@@ -288,6 +291,7 @@
|
||||
- **MySpass**
|
||||
- **myvideo**
|
||||
- **MyVidster**
|
||||
- **N-JOY**
|
||||
- **n-tv.de**
|
||||
- **NationalGeographic**
|
||||
- **Naver**
|
||||
@@ -328,6 +332,7 @@
|
||||
- **ntv.ru**
|
||||
- **Nuvid**
|
||||
- **NYTimes**
|
||||
- **NYTimesArticle**
|
||||
- **ocw.mit.edu**
|
||||
- **Odnoklassniki**
|
||||
- **OktoberfestTV**
|
||||
@@ -364,9 +369,10 @@
|
||||
- **prosiebensat1**: ProSiebenSat.1 Digital
|
||||
- **Puls4**
|
||||
- **Pyvideo**
|
||||
- **QQMusic**
|
||||
- **QQMusicAlbum**
|
||||
- **QQMusicSinger**
|
||||
- **qqmusic**
|
||||
- **qqmusic:album**
|
||||
- **qqmusic:singer**
|
||||
- **qqmusic:toplist**
|
||||
- **QuickVid**
|
||||
- **R7**
|
||||
- **radio.de**
|
||||
@@ -434,6 +440,8 @@
|
||||
- **southpark.cc.com**
|
||||
- **southpark.cc.com:español**
|
||||
- **southpark.de**
|
||||
- **southpark.nl**
|
||||
- **southparkstudios.dk**
|
||||
- **Space**
|
||||
- **SpankBang**
|
||||
- **Spankwire**
|
||||
@@ -453,6 +461,7 @@
|
||||
- **StreamCZ**
|
||||
- **StreetVoice**
|
||||
- **SunPorno**
|
||||
- **SVT**
|
||||
- **SVTPlay**: SVT Play and Öppet arkiv
|
||||
- **SWRMediathek**
|
||||
- **Syfy**
|
||||
@@ -486,6 +495,7 @@
|
||||
- **tlc.com**
|
||||
- **tlc.de**
|
||||
- **TMZ**
|
||||
- **TMZArticle**
|
||||
- **TNAFlix**
|
||||
- **tou.tv**
|
||||
- **Toypics**: Toypics user profile
|
||||
@@ -529,7 +539,7 @@
|
||||
- **Vessel**
|
||||
- **Vesti**: Вести.Ru
|
||||
- **Vevo**
|
||||
- **VGTV**
|
||||
- **VGTV**: VGTV and BTTV
|
||||
- **vh1.com**
|
||||
- **Vice**
|
||||
- **Viddler**
|
||||
@@ -563,6 +573,7 @@
|
||||
- **vk.com**
|
||||
- **vk.com:user-videos**: vk.com:All of a user's videos
|
||||
- **Vodlocker**
|
||||
- **VoiceRepublic**
|
||||
- **Vporn**
|
||||
- **VRT**
|
||||
- **vube**: Vube.com
|
||||
@@ -587,6 +598,7 @@
|
||||
- **XHamster**
|
||||
- **XMinus**
|
||||
- **XNXX**
|
||||
- **Xstream**
|
||||
- **XTube**
|
||||
- **XTubeUser**: XTube user profile
|
||||
- **Xuite**
|
||||
|
||||
@@ -40,7 +40,6 @@ from youtube_dl.utils import (
|
||||
read_batch_urls,
|
||||
sanitize_filename,
|
||||
sanitize_path,
|
||||
sanitize_url_path_consecutive_slashes,
|
||||
prepend_extension,
|
||||
replace_extension,
|
||||
shell_quote,
|
||||
@@ -53,6 +52,7 @@ from youtube_dl.utils import (
|
||||
unified_strdate,
|
||||
unsmuggle_url,
|
||||
uppercase_escape,
|
||||
lowercase_escape,
|
||||
url_basename,
|
||||
urlencode_postdata,
|
||||
version_tuple,
|
||||
@@ -175,26 +175,6 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(sanitize_path('./abc'), 'abc')
|
||||
self.assertEqual(sanitize_path('./../abc'), '..\\abc')
|
||||
|
||||
def test_sanitize_url_path_consecutive_slashes(self):
|
||||
self.assertEqual(
|
||||
sanitize_url_path_consecutive_slashes('http://hostname/foo//bar/filename.html'),
|
||||
'http://hostname/foo/bar/filename.html')
|
||||
self.assertEqual(
|
||||
sanitize_url_path_consecutive_slashes('http://hostname//foo/bar/filename.html'),
|
||||
'http://hostname/foo/bar/filename.html')
|
||||
self.assertEqual(
|
||||
sanitize_url_path_consecutive_slashes('http://hostname//'),
|
||||
'http://hostname/')
|
||||
self.assertEqual(
|
||||
sanitize_url_path_consecutive_slashes('http://hostname/foo/bar/filename.html'),
|
||||
'http://hostname/foo/bar/filename.html')
|
||||
self.assertEqual(
|
||||
sanitize_url_path_consecutive_slashes('http://hostname/'),
|
||||
'http://hostname/')
|
||||
self.assertEqual(
|
||||
sanitize_url_path_consecutive_slashes('http://hostname/abc//'),
|
||||
'http://hostname/abc/')
|
||||
|
||||
def test_prepend_extension(self):
|
||||
self.assertEqual(prepend_extension('abc.ext', 'temp'), 'abc.temp.ext')
|
||||
self.assertEqual(prepend_extension('abc.ext', 'temp', 'ext'), 'abc.temp.ext')
|
||||
@@ -418,6 +398,10 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(uppercase_escape('aä'), 'aä')
|
||||
self.assertEqual(uppercase_escape('\\U0001d550'), '𝕐')
|
||||
|
||||
def test_lowercase_escape(self):
|
||||
self.assertEqual(lowercase_escape('aä'), 'aä')
|
||||
self.assertEqual(lowercase_escape('\\u0026'), '&')
|
||||
|
||||
def test_limit_length(self):
|
||||
self.assertEqual(limit_length(None, 12), None)
|
||||
self.assertEqual(limit_length('foo', 12), 'foo')
|
||||
@@ -616,7 +600,7 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4')
|
||||
<div xml:lang="en">
|
||||
<p begin="0" end="1">The following line contains Chinese characters and special symbols</p>
|
||||
<p begin="1" end="2">第二行<br/>♪♪</p>
|
||||
<p begin="2" end="3"><span>Third<br/>Line</span></p>
|
||||
<p begin="2" dur="1"><span>Third<br/>Line</span></p>
|
||||
</div>
|
||||
</body>
|
||||
</tt>'''
|
||||
|
||||
@@ -260,7 +260,6 @@ class YoutubeDL(object):
|
||||
The following options are used by the post processors:
|
||||
prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
|
||||
otherwise prefer avconv.
|
||||
exec_cmd: Arbitrary command to run after downloading
|
||||
"""
|
||||
|
||||
params = None
|
||||
@@ -1086,8 +1085,11 @@ class YoutubeDL(object):
|
||||
req_format = self.params.get('format')
|
||||
if req_format is None:
|
||||
req_format_list = []
|
||||
if info_dict['extractor'] in ['youtube', 'ted'] and FFmpegMergerPP(self).available:
|
||||
req_format_list.append('bestvideo+bestaudio')
|
||||
if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
|
||||
info_dict['extractor'] in ['youtube', 'ted']):
|
||||
merger = FFmpegMergerPP(self)
|
||||
if merger.available and merger.can_merge():
|
||||
req_format_list.append('bestvideo+bestaudio')
|
||||
req_format_list.append('best')
|
||||
req_format = '/'.join(req_format_list)
|
||||
formats_to_download = []
|
||||
@@ -1847,7 +1849,7 @@ class YoutubeDL(object):
|
||||
thumb_ext = determine_ext(t['url'], 'jpg')
|
||||
suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
|
||||
thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
|
||||
thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
|
||||
t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
|
||||
|
||||
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
|
||||
self.to_screen('[%s] %s: Thumbnail %sis already present' %
|
||||
|
||||
@@ -240,13 +240,18 @@ def _real_main(argv=None):
|
||||
if opts.xattrs:
|
||||
postprocessors.append({'key': 'XAttrMetadata'})
|
||||
if opts.embedthumbnail:
|
||||
postprocessors.append({'key': 'EmbedThumbnail'})
|
||||
already_have_thumbnail = opts.writethumbnail or opts.write_all_thumbnails
|
||||
postprocessors.append({
|
||||
'key': 'EmbedThumbnail',
|
||||
'already_have_thumbnail': already_have_thumbnail
|
||||
})
|
||||
if not already_have_thumbnail:
|
||||
opts.writethumbnail = True
|
||||
# Please keep ExecAfterDownload towards the bottom as it allows the user to modify the final file in any way.
|
||||
# So if the user is able to remove the file before your postprocessor runs it might cause a few problems.
|
||||
if opts.exec_cmd:
|
||||
postprocessors.append({
|
||||
'key': 'ExecAfterDownload',
|
||||
'verboseOutput': opts.verbose,
|
||||
'exec_cmd': opts.exec_cmd,
|
||||
})
|
||||
if opts.xattr_set_filesize:
|
||||
@@ -345,7 +350,6 @@ def _real_main(argv=None):
|
||||
'default_search': opts.default_search,
|
||||
'youtube_include_dash_manifest': opts.youtube_include_dash_manifest,
|
||||
'encoding': opts.encoding,
|
||||
'exec_cmd': opts.exec_cmd,
|
||||
'extract_flat': opts.extract_flat,
|
||||
'merge_output_format': opts.merge_output_format,
|
||||
'postprocessors': postprocessors,
|
||||
|
||||
@@ -131,7 +131,7 @@ class RtmpFD(FileDownloader):
|
||||
if play_path is not None:
|
||||
basic_args += ['--playpath', play_path]
|
||||
if tc_url is not None:
|
||||
basic_args += ['--tcUrl', url]
|
||||
basic_args += ['--tcUrl', tc_url]
|
||||
if test:
|
||||
basic_args += ['--stop', '1']
|
||||
if flash_version is not None:
|
||||
|
||||
@@ -258,7 +258,10 @@ from .letv import (
|
||||
LetvPlaylistIE
|
||||
)
|
||||
from .libsyn import LibsynIE
|
||||
from .lifenews import LifeNewsIE
|
||||
from .lifenews import (
|
||||
LifeNewsIE,
|
||||
LifeEmbedIE,
|
||||
)
|
||||
from .liveleak import LiveLeakIE
|
||||
from .livestream import (
|
||||
LivestreamIE,
|
||||
@@ -321,7 +324,10 @@ from .nbc import (
|
||||
NBCSportsIE,
|
||||
NBCSportsVPlayerIE,
|
||||
)
|
||||
from .ndr import NDRIE
|
||||
from .ndr import (
|
||||
NDRIE,
|
||||
NJoyIE,
|
||||
)
|
||||
from .ndtv import NDTVIE
|
||||
from .netzkino import NetzkinoIE
|
||||
from .nerdcubed import NerdCubedFeedIE
|
||||
@@ -363,7 +369,10 @@ from .nrk import (
|
||||
)
|
||||
from .ntvde import NTVDeIE
|
||||
from .ntvru import NTVRuIE
|
||||
from .nytimes import NYTimesIE
|
||||
from .nytimes import (
|
||||
NYTimesIE,
|
||||
NYTimesArticleIE,
|
||||
)
|
||||
from .nuvid import NuvidIE
|
||||
from .odnoklassniki import OdnoklassnikiIE
|
||||
from .oktoberfesttv import OktoberfestTVIE
|
||||
@@ -405,6 +414,7 @@ from .qqmusic import (
|
||||
QQMusicIE,
|
||||
QQMusicSingerIE,
|
||||
QQMusicAlbumIE,
|
||||
QQMusicToplistIE,
|
||||
)
|
||||
from .quickvid import QuickVidIE
|
||||
from .r7 import R7IE
|
||||
@@ -480,8 +490,10 @@ from .soundgasm import (
|
||||
)
|
||||
from .southpark import (
|
||||
SouthParkIE,
|
||||
SouthParkDeIE,
|
||||
SouthParkDkIE,
|
||||
SouthParkEsIE,
|
||||
SouthparkDeIE,
|
||||
SouthParkNlIE
|
||||
)
|
||||
from .space import SpaceIE
|
||||
from .spankbang import SpankBangIE
|
||||
@@ -501,7 +513,10 @@ from .streamcloud import StreamcloudIE
|
||||
from .streamcz import StreamCZIE
|
||||
from .streetvoice import StreetVoiceIE
|
||||
from .sunporno import SunPornoIE
|
||||
from .svtplay import SVTPlayIE
|
||||
from .svt import (
|
||||
SVTIE,
|
||||
SVTPlayIE,
|
||||
)
|
||||
from .swrmediathek import SWRMediathekIE
|
||||
from .syfy import SyfyIE
|
||||
from .sztvhu import SztvHuIE
|
||||
@@ -530,7 +545,10 @@ from .thesixtyone import TheSixtyOneIE
|
||||
from .thisav import ThisAVIE
|
||||
from .tinypic import TinyPicIE
|
||||
from .tlc import TlcIE, TlcDeIE
|
||||
from .tmz import TMZIE
|
||||
from .tmz import (
|
||||
TMZIE,
|
||||
TMZArticleIE,
|
||||
)
|
||||
from .tnaflix import TNAFlixIE
|
||||
from .thvideo import (
|
||||
THVideoIE,
|
||||
@@ -583,7 +601,11 @@ from .veoh import VeohIE
|
||||
from .vessel import VesselIE
|
||||
from .vesti import VestiIE
|
||||
from .vevo import VevoIE
|
||||
from .vgtv import VGTVIE
|
||||
from .vgtv import (
|
||||
BTArticleIE,
|
||||
BTVestlendingenIE,
|
||||
VGTVIE,
|
||||
)
|
||||
from .vh1 import VH1IE
|
||||
from .vice import ViceIE
|
||||
from .viddler import ViddlerIE
|
||||
@@ -620,6 +642,7 @@ from .vk import (
|
||||
VKUserVideosIE,
|
||||
)
|
||||
from .vodlocker import VodlockerIE
|
||||
from .voicerepublic import VoiceRepublicIE
|
||||
from .vporn import VpornIE
|
||||
from .vrt import VRTIE
|
||||
from .vube import VubeIE
|
||||
@@ -646,9 +669,10 @@ from .xboxclips import XboxClipsIE
|
||||
from .xhamster import XHamsterIE
|
||||
from .xminus import XMinusIE
|
||||
from .xnxx import XNXXIE
|
||||
from .xvideos import XVideosIE
|
||||
from .xstream import XstreamIE
|
||||
from .xtube import XTubeUserIE, XTubeIE
|
||||
from .xuite import XuiteIE
|
||||
from .xvideos import XVideosIE
|
||||
from .xxxymovies import XXXYMoviesIE
|
||||
from .yahoo import (
|
||||
YahooIE,
|
||||
|
||||
@@ -1,21 +1,11 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
xpath_with_ns,
|
||||
xpath_text,
|
||||
find_xpath_attr,
|
||||
)
|
||||
|
||||
|
||||
class AftenpostenIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?aftenposten\.no/webtv/(?:#!/)?video/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.aftenposten.no/webtv/#!/video/21039/trailer-sweatshop-i-can-t-take-any-more',
|
||||
'md5': 'fd828cd29774a729bf4d4425fe192972',
|
||||
@@ -30,69 +20,4 @@ class AftenpostenIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
data = self._download_xml(
|
||||
'http://frontend.xstream.dk/ap/feed/video/?platform=web&id=%s' % video_id, video_id)
|
||||
|
||||
NS_MAP = {
|
||||
'atom': 'http://www.w3.org/2005/Atom',
|
||||
'xt': 'http://xstream.dk/',
|
||||
'media': 'http://search.yahoo.com/mrss/',
|
||||
}
|
||||
|
||||
entry = data.find(xpath_with_ns('./atom:entry', NS_MAP))
|
||||
|
||||
title = xpath_text(
|
||||
entry, xpath_with_ns('./atom:title', NS_MAP), 'title')
|
||||
description = xpath_text(
|
||||
entry, xpath_with_ns('./atom:summary', NS_MAP), 'description')
|
||||
timestamp = parse_iso8601(xpath_text(
|
||||
entry, xpath_with_ns('./atom:published', NS_MAP), 'upload date'))
|
||||
|
||||
formats = []
|
||||
media_group = entry.find(xpath_with_ns('./media:group', NS_MAP))
|
||||
for media_content in media_group.findall(xpath_with_ns('./media:content', NS_MAP)):
|
||||
media_url = media_content.get('url')
|
||||
if not media_url:
|
||||
continue
|
||||
tbr = int_or_none(media_content.get('bitrate'))
|
||||
mobj = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>[^/]+))/(?P<playpath>.+)$', media_url)
|
||||
if mobj:
|
||||
formats.append({
|
||||
'url': mobj.group('url'),
|
||||
'play_path': 'mp4:%s' % mobj.group('playpath'),
|
||||
'app': mobj.group('app'),
|
||||
'ext': 'flv',
|
||||
'tbr': tbr,
|
||||
'format_id': 'rtmp-%d' % tbr,
|
||||
})
|
||||
else:
|
||||
formats.append({
|
||||
'url': media_url,
|
||||
'tbr': tbr,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
link = find_xpath_attr(
|
||||
entry, xpath_with_ns('./atom:link', NS_MAP), 'rel', 'original')
|
||||
if link is not None:
|
||||
formats.append({
|
||||
'url': link.get('href'),
|
||||
'format_id': link.get('rel'),
|
||||
})
|
||||
|
||||
thumbnails = [{
|
||||
'url': splash.get('url'),
|
||||
'width': int_or_none(splash.get('width')),
|
||||
'height': int_or_none(splash.get('height')),
|
||||
} for splash in media_group.findall(xpath_with_ns('./xt:splash', NS_MAP))]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'timestamp': timestamp,
|
||||
'formats': formats,
|
||||
'thumbnails': thumbnails,
|
||||
}
|
||||
return self.url_result('xstream:ap:%s' % self._match_id(url), 'Xstream')
|
||||
|
||||
@@ -33,7 +33,7 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
json_url = url + ('?' if '?' in url else '&') + 'output=json'
|
||||
json_url = url + ('&' if '?' in url else '?') + 'output=json'
|
||||
data = self._download_json(json_url, video_id)
|
||||
|
||||
def get_optional(data_dict, field):
|
||||
|
||||
@@ -16,11 +16,11 @@ class BetIE(InfoExtractor):
|
||||
{
|
||||
'url': 'http://www.bet.com/news/politics/2014/12/08/in-bet-exclusive-obama-talks-race-and-racism.html',
|
||||
'info_dict': {
|
||||
'id': '740ab250-bb94-4a8a-8787-fe0de7c74471',
|
||||
'id': 'news/national/2014/a-conversation-with-president-obama',
|
||||
'display_id': 'in-bet-exclusive-obama-talks-race-and-racism',
|
||||
'ext': 'flv',
|
||||
'title': 'BET News Presents: A Conversation With President Obama',
|
||||
'description': 'md5:5a88d8ae912c1b33e090290af7ec33c6',
|
||||
'title': 'A Conversation With President Obama',
|
||||
'description': 'md5:699d0652a350cf3e491cd15cc745b5da',
|
||||
'duration': 1534,
|
||||
'timestamp': 1418075340,
|
||||
'upload_date': '20141208',
|
||||
@@ -35,7 +35,7 @@ class BetIE(InfoExtractor):
|
||||
{
|
||||
'url': 'http://www.bet.com/video/news/national/2014/justice-for-ferguson-a-community-reacts.html',
|
||||
'info_dict': {
|
||||
'id': 'bcd1b1df-673a-42cf-8d01-b282db608f2d',
|
||||
'id': 'news/national/2014/justice-for-ferguson-a-community-reacts',
|
||||
'display_id': 'justice-for-ferguson-a-community-reacts',
|
||||
'ext': 'flv',
|
||||
'title': 'Justice for Ferguson: A Community Reacts',
|
||||
@@ -61,6 +61,9 @@ class BetIE(InfoExtractor):
|
||||
[r'mediaURL\s*:\s*"([^"]+)"', r"var\s+mrssMediaUrl\s*=\s*'([^']+)'"],
|
||||
webpage, 'media URL'))
|
||||
|
||||
video_id = self._search_regex(
|
||||
r'/video/(.*)/_jcr_content/', media_url, 'video id')
|
||||
|
||||
mrss = self._download_xml(media_url, display_id)
|
||||
|
||||
item = mrss.find('./channel/item')
|
||||
@@ -75,8 +78,6 @@ class BetIE(InfoExtractor):
|
||||
description = xpath_text(
|
||||
item, './description', 'description', fatal=False)
|
||||
|
||||
video_id = xpath_text(item, './guid', 'video id', fatal=False)
|
||||
|
||||
timestamp = parse_iso8601(xpath_text(
|
||||
item, xpath_with_ns('./dc:date', NS_MAP),
|
||||
'upload date', fatal=False))
|
||||
|
||||
@@ -2,7 +2,10 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
fix_xml_ampersands,
|
||||
)
|
||||
|
||||
|
||||
class BildIE(InfoExtractor):
|
||||
@@ -15,7 +18,7 @@ class BildIE(InfoExtractor):
|
||||
'id': '38184146',
|
||||
'ext': 'mp4',
|
||||
'title': 'BILD hat sie getestet',
|
||||
'thumbnail': 'http://bilder.bild.de/fotos/stand-das-koennen-die-neuen-ipads-38184138/Bild/1.bild.jpg',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 196,
|
||||
'description': 'Mit dem iPad Air 2 und dem iPad Mini 3 hat Apple zwei neue Tablet-Modelle präsentiert. BILD-Reporter Sven Stein durfte die Geräte bereits testen. ',
|
||||
}
|
||||
@@ -25,7 +28,7 @@ class BildIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
xml_url = url.split(".bild.html")[0] + ",view=xml.bild.xml"
|
||||
doc = self._download_xml(xml_url, video_id)
|
||||
doc = self._download_xml(xml_url, video_id, transform_source=fix_xml_ampersands)
|
||||
|
||||
duration = int_or_none(doc.attrib.get('duration'), scale=1000)
|
||||
|
||||
|
||||
@@ -16,27 +16,38 @@ class BRIE(InfoExtractor):
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.br.de/mediathek/video/sendungen/heimatsound/heimatsound-festival-2014-trailer-100.html',
|
||||
'md5': '93556dd2bcb2948d9259f8670c516d59',
|
||||
'url': 'http://www.br.de/mediathek/video/sendungen/abendschau/betriebliche-altersvorsorge-104.html',
|
||||
'md5': '83a0477cf0b8451027eb566d88b51106',
|
||||
'info_dict': {
|
||||
'id': '25e279aa-1ffd-40fd-9955-5325bd48a53a',
|
||||
'id': '48f656ef-287e-486f-be86-459122db22cc',
|
||||
'ext': 'mp4',
|
||||
'title': 'Wenn das Traditions-Theater wackelt',
|
||||
'description': 'Heimatsound-Festival 2014: Wenn das Traditions-Theater wackelt',
|
||||
'duration': 34,
|
||||
'uploader': 'BR',
|
||||
'upload_date': '20140802',
|
||||
'title': 'Die böse Überraschung',
|
||||
'description': 'Betriebliche Altersvorsorge: Die böse Überraschung',
|
||||
'duration': 180,
|
||||
'uploader': 'Reinhard Weber',
|
||||
'upload_date': '20150422',
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://www.br.de/nachrichten/schaeuble-haushaltsentwurf-bundestag-100.html',
|
||||
'md5': '3db0df1a9a9cd9fa0c70e6ea8aa8e820',
|
||||
'url': 'http://www.br.de/nachrichten/oberbayern/inhalt/muenchner-polizeipraesident-schreiber-gestorben-100.html',
|
||||
'md5': 'a44396d73ab6a68a69a568fae10705bb',
|
||||
'info_dict': {
|
||||
'id': 'c6aae3de-2cf9-43f2-957f-f17fef9afaab',
|
||||
'id': 'a4b83e34-123d-4b81-9f4e-c0d3121a4e05',
|
||||
'ext': 'mp4',
|
||||
'title': 'Manfred Schreiber ist tot',
|
||||
'description': 'Abendschau kompakt: Manfred Schreiber ist tot',
|
||||
'duration': 26,
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://www.br.de/radio/br-klassik/sendungen/allegro/premiere-urauffuehrung-the-land-2015-dance-festival-muenchen-100.html',
|
||||
'md5': '8b5b27c0b090f3b35eac4ab3f7a73d3d',
|
||||
'info_dict': {
|
||||
'id': '74c603c9-26d3-48bb-b85b-079aeed66e0b',
|
||||
'ext': 'aac',
|
||||
'title': '"Keine neuen Schulden im nächsten Jahr"',
|
||||
'description': 'Haushaltsentwurf: "Keine neuen Schulden im nächsten Jahr"',
|
||||
'duration': 64,
|
||||
'title': 'Kurzweilig und sehr bewegend',
|
||||
'description': '"The Land" von Peeping Tom: Kurzweilig und sehr bewegend',
|
||||
'duration': 296,
|
||||
}
|
||||
},
|
||||
{
|
||||
|
||||
@@ -16,7 +16,7 @@ class BYUtvIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:5438d33774b6bdc662f9485a340401cc',
|
||||
'title': 'Season 5 Episode 5',
|
||||
'thumbnail': 're:^https?://.*promo.*'
|
||||
'thumbnail': 're:^https?://.*\.jpg$'
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
|
||||
@@ -25,14 +25,14 @@ class CanalplusIE(InfoExtractor):
|
||||
}
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.canalplus.fr/c-infos-documentaires/pid1830-c-zapping.html?vid=922470',
|
||||
'md5': '3db39fb48b9685438ecf33a1078023e4',
|
||||
'url': 'http://www.canalplus.fr/c-emissions/pid1830-c-zapping.html?vid=1263092',
|
||||
'md5': 'b3481d7ca972f61e37420798d0a9d934',
|
||||
'info_dict': {
|
||||
'id': '922470',
|
||||
'id': '1263092',
|
||||
'ext': 'flv',
|
||||
'title': 'Zapping - 26/08/13',
|
||||
'description': 'Le meilleur de toutes les chaînes, tous les jours.\nEmission du 26 août 2013',
|
||||
'upload_date': '20130826',
|
||||
'title': 'Le Zapping - 13/05/15',
|
||||
'description': 'md5:09738c0d06be4b5d06a0940edb0da73f',
|
||||
'upload_date': '20150513',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.piwiplus.fr/videos-piwi/pid1405-le-labyrinthe-boing-super-ranger.html?vid=1108190',
|
||||
@@ -56,7 +56,7 @@ class CanalplusIE(InfoExtractor):
|
||||
'skip': 'videos get deleted after a while',
|
||||
}, {
|
||||
'url': 'http://www.itele.fr/france/video/aubervilliers-un-lycee-en-colere-111559',
|
||||
'md5': '65aa83ad62fe107ce29e564bb8712580',
|
||||
'md5': 'f3a46edcdf28006598ffaf5b30e6a2d4',
|
||||
'info_dict': {
|
||||
'id': '1213714',
|
||||
'ext': 'flv',
|
||||
|
||||
@@ -764,7 +764,7 @@ class InfoExtractor(object):
|
||||
f.get('fps') if f.get('fps') is not None else -1,
|
||||
f.get('filesize_approx') if f.get('filesize_approx') is not None else -1,
|
||||
f.get('source_preference') if f.get('source_preference') is not None else -1,
|
||||
f.get('format_id'),
|
||||
f.get('format_id') if f.get('format_id') is not None else '',
|
||||
)
|
||||
formats.sort(key=_formats_key)
|
||||
|
||||
@@ -896,7 +896,7 @@ class InfoExtractor(object):
|
||||
format_id = []
|
||||
if m3u8_id:
|
||||
format_id.append(m3u8_id)
|
||||
last_media_name = last_media.get('NAME') if last_media else None
|
||||
last_media_name = last_media.get('NAME') if last_media and last_media.get('TYPE') != 'SUBTITLES' else None
|
||||
format_id.append(last_media_name if last_media_name else '%d' % (tbr if tbr else len(formats)))
|
||||
f = {
|
||||
'format_id': '-'.join(format_id),
|
||||
@@ -1072,9 +1072,6 @@ class InfoExtractor(object):
|
||||
def _get_automatic_captions(self, *args, **kwargs):
|
||||
raise NotImplementedError("This method must be implemented by subclasses")
|
||||
|
||||
def _subtitles_timecode(self, seconds):
|
||||
return '%02d:%02d:%02d.%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
|
||||
|
||||
|
||||
class SearchInfoExtractor(InfoExtractor):
|
||||
"""
|
||||
|
||||
@@ -52,6 +52,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'uploader': 'IGN',
|
||||
'title': 'Steam Machine Models, Pricing Listed on Steam Store - IGN News',
|
||||
'upload_date': '20150306',
|
||||
}
|
||||
},
|
||||
# Vevo video
|
||||
@@ -106,9 +107,9 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
age_limit = self._rta_search(webpage)
|
||||
|
||||
video_upload_date = None
|
||||
mobj = re.search(r'<div class="[^"]*uploaded_cont[^"]*" title="[^"]*">([0-9]{2})-([0-9]{2})-([0-9]{4})</div>', webpage)
|
||||
mobj = re.search(r'<meta property="video:release_date" content="([0-9]{4})-([0-9]{2})-([0-9]{2}).+?"/>', webpage)
|
||||
if mobj is not None:
|
||||
video_upload_date = mobj.group(3) + mobj.group(2) + mobj.group(1)
|
||||
video_upload_date = mobj.group(1) + mobj.group(2) + mobj.group(3)
|
||||
|
||||
embed_url = 'https://www.dailymotion.com/embed/video/%s' % video_id
|
||||
embed_request = self._build_request(embed_url)
|
||||
|
||||
@@ -26,7 +26,7 @@ class DumpertIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
req = compat_urllib_request.Request(url)
|
||||
req.add_header('Cookie', 'nsfw=1')
|
||||
req.add_header('Cookie', 'nsfw=1; cpc=10')
|
||||
webpage = self._download_webpage(req, video_id)
|
||||
|
||||
files_base64 = self._search_regex(
|
||||
|
||||
@@ -4,7 +4,10 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse
|
||||
from ..utils import ExtractorError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
unescapeHTML
|
||||
)
|
||||
|
||||
|
||||
class EroProfileIE(InfoExtractor):
|
||||
@@ -75,8 +78,8 @@ class EroProfileIE(InfoExtractor):
|
||||
[r"glbUpdViews\s*\('\d*','(\d+)'", r'p/report/video/(\d+)'],
|
||||
webpage, 'video id', default=None)
|
||||
|
||||
video_url = self._search_regex(
|
||||
r'<source src="([^"]+)', webpage, 'video url')
|
||||
video_url = unescapeHTML(self._search_regex(
|
||||
r'<source src="([^"]+)', webpage, 'video url'))
|
||||
title = self._html_search_regex(
|
||||
r'Title:</th><td>([^<]+)</td>', webpage, 'title')
|
||||
thumbnail = self._search_regex(
|
||||
|
||||
@@ -8,7 +8,8 @@ from ..compat import compat_urllib_request
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
clean_html,
|
||||
qualities,
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -36,10 +37,10 @@ def _decrypt_config(key, string):
|
||||
|
||||
|
||||
class EscapistIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://?(www\.)?escapistmagazine\.com/videos/view/[^/?#]+/(?P<id>[0-9]+)-[^/?#]*(?:$|[?#])'
|
||||
_VALID_URL = r'https?://?(?:www\.)?escapistmagazine\.com/videos/view/[^/?#]+/(?P<id>[0-9]+)-[^/?#]*(?:$|[?#])'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate',
|
||||
'md5': 'c6793dbda81388f4264c1ba18684a74d',
|
||||
'md5': 'ab3a706c681efca53f0a35f1415cf0d1',
|
||||
'info_dict': {
|
||||
'id': '6618',
|
||||
'ext': 'mp4',
|
||||
@@ -47,10 +48,11 @@ class EscapistIE(InfoExtractor):
|
||||
'title': "Breaking Down Baldur's Gate",
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 264,
|
||||
'uploader': 'The Escapist',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.escapistmagazine.com/videos/view/zero-punctuation/10044-Evolve-One-vs-Multiplayer',
|
||||
'md5': 'cf8842a8a46444d241f9a9980d7874f2',
|
||||
'md5': '9e8c437b0dbb0387d3bd3255ca77f6bf',
|
||||
'info_dict': {
|
||||
'id': '10044',
|
||||
'ext': 'mp4',
|
||||
@@ -58,6 +60,7 @@ class EscapistIE(InfoExtractor):
|
||||
'title': 'Evolve - One vs Multiplayer',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 304,
|
||||
'uploader': 'The Escapist',
|
||||
}
|
||||
}]
|
||||
|
||||
@@ -65,35 +68,33 @@ class EscapistIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
imsVideo = self._parse_json(
|
||||
ims_video = self._parse_json(
|
||||
self._search_regex(
|
||||
r'imsVideo\.play\(({.+?})\);', webpage, 'imsVideo'),
|
||||
video_id)
|
||||
video_id = imsVideo['videoID']
|
||||
key = imsVideo['hash']
|
||||
video_id = ims_video['videoID']
|
||||
key = ims_video['hash']
|
||||
|
||||
quality = qualities(['lq', 'hq', 'hd'])
|
||||
config_req = compat_urllib_request.Request(
|
||||
'http://www.escapistmagazine.com/videos/'
|
||||
'vidconfig.php?videoID=%s&hash=%s' % (video_id, key))
|
||||
config_req.add_header('Referer', url)
|
||||
config = self._download_webpage(config_req, video_id, 'Downloading video config')
|
||||
|
||||
formats = []
|
||||
for q in ['lq', 'hq', 'hd']:
|
||||
config_req = compat_urllib_request.Request(
|
||||
'http://www.escapistmagazine.com/videos/'
|
||||
'vidconfig.php?videoID=%s&hash=%s&quality=%s' % (video_id, key, 'mp4_' + q))
|
||||
config_req.add_header('Referer', url)
|
||||
config = self._download_webpage(config_req, video_id, 'Downloading video config ' + q.upper())
|
||||
data = json.loads(_decrypt_config(key, config))
|
||||
|
||||
data = json.loads(_decrypt_config(key, config))
|
||||
video_data = data['videoData']
|
||||
|
||||
title = clean_html(data['videoData']['title'])
|
||||
duration = data['videoData']['duration'] / 1000
|
||||
title = clean_html(video_data['title'])
|
||||
duration = float_or_none(video_data.get('duration'), 1000)
|
||||
uploader = video_data.get('publisher')
|
||||
|
||||
for i, v in enumerate(data['files']['videos']):
|
||||
|
||||
formats.append({
|
||||
'url': v,
|
||||
'format_id': determine_ext(v) + '_' + q + str(i),
|
||||
'quality': quality(q),
|
||||
})
|
||||
formats = [{
|
||||
'url': video['src'],
|
||||
'format_id': '%s-%sp' % (determine_ext(video['src']), video['res']),
|
||||
'height': int_or_none(video.get('res')),
|
||||
} for video in data['files']['videos']]
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
@@ -102,4 +103,5 @@ class EscapistIE(InfoExtractor):
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'description': self._og_search_description(webpage),
|
||||
'duration': duration,
|
||||
'uploader': uploader,
|
||||
}
|
||||
|
||||
@@ -5,7 +5,7 @@ from ..utils import smuggle_url
|
||||
|
||||
|
||||
class FoxSportsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?foxsports\.com/video\?vid=(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?foxsports\.com/(?:[^/]+/)*(?P<id>[^/]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.foxsports.com/video?vid=432609859715',
|
||||
|
||||
@@ -37,6 +37,7 @@ from .condenast import CondeNastIE
|
||||
from .udn import UDNEmbedIE
|
||||
from .senateisvp import SenateISVPIE
|
||||
from .bliptv import BlipTVIE
|
||||
from .svt import SVTIE
|
||||
|
||||
|
||||
class GenericIE(InfoExtractor):
|
||||
@@ -413,19 +414,6 @@ class GenericIE(InfoExtractor):
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
},
|
||||
# MLB articles
|
||||
{
|
||||
'url': 'http://m.mlb.com/news/article/118550098/blue-jays-kevin-pillar-goes-spidey-up-the-wall-to-rob-tim-beckham-of-a-homer',
|
||||
'md5': 'b190e70141fb9a1552a85426b4da1b5d',
|
||||
'info_dict': {
|
||||
'id': '75609783',
|
||||
'ext': 'mp4',
|
||||
'title': 'Must C: Pillar climbs for catch',
|
||||
'description': '4/15/15: Blue Jays outfielder Kevin Pillar continues his defensive dominance by climbing the wall in left to rob Tim Beckham of a home run',
|
||||
'timestamp': 1429124820,
|
||||
'upload_date': '20150415',
|
||||
}
|
||||
},
|
||||
# Wistia embed
|
||||
{
|
||||
'url': 'http://education-portal.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
|
||||
@@ -658,6 +646,17 @@ class GenericIE(InfoExtractor):
|
||||
'title': 'Facebook Creates "On This Day" | Crunch Report',
|
||||
},
|
||||
},
|
||||
# SVT embed
|
||||
{
|
||||
'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
|
||||
'info_dict': {
|
||||
'id': '2900353',
|
||||
'ext': 'flv',
|
||||
'title': 'Här trycker Jagr till Giroux (under SVT-intervjun)',
|
||||
'duration': 27,
|
||||
'age_limit': 0,
|
||||
},
|
||||
},
|
||||
# RSS feed with enclosure
|
||||
{
|
||||
'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
|
||||
@@ -1091,6 +1090,11 @@ class GenericIE(InfoExtractor):
|
||||
if bliptv_url:
|
||||
return self.url_result(bliptv_url, 'BlipTV')
|
||||
|
||||
# Look for SVT player
|
||||
svt_url = SVTIE._extract_url(webpage)
|
||||
if svt_url:
|
||||
return self.url_result(svt_url, 'SVT')
|
||||
|
||||
# Look for embedded condenast player
|
||||
matches = re.findall(
|
||||
r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="(https?://player\.cnevids\.com/embed/[^"]+")',
|
||||
@@ -1453,7 +1457,7 @@ class GenericIE(InfoExtractor):
|
||||
if refresh_header:
|
||||
found = re.search(REDIRECT_REGEX, refresh_header)
|
||||
if found:
|
||||
new_url = found.group(1)
|
||||
new_url = compat_urlparse.urljoin(url, found.group(1))
|
||||
self.report_following_redirect(new_url)
|
||||
return {
|
||||
'_type': 'url',
|
||||
|
||||
@@ -85,7 +85,8 @@ class GigaIE(InfoExtractor):
|
||||
r'class="author">([^<]+)</a>', webpage, 'uploader', fatal=False)
|
||||
|
||||
view_count = str_to_int(self._search_regex(
|
||||
r'<span class="views"><strong>([\d.]+)</strong>', webpage, 'view count', fatal=False))
|
||||
r'<span class="views"><strong>([\d.,]+)</strong>',
|
||||
webpage, 'view count', fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
||||
@@ -35,13 +35,7 @@ class GorillaVidIE(InfoExtractor):
|
||||
},
|
||||
}, {
|
||||
'url': 'http://gorillavid.in/embed-z08zf8le23c6-960x480.html',
|
||||
'md5': 'c9e293ca74d46cad638e199c3f3fe604',
|
||||
'info_dict': {
|
||||
'id': 'z08zf8le23c6',
|
||||
'ext': 'mp4',
|
||||
'title': 'Say something nice',
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
},
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://daclips.in/3rso4kdn6f9m',
|
||||
'md5': '1ad8fd39bb976eeb66004d3a4895f106',
|
||||
|
||||
@@ -25,7 +25,8 @@ class HistoricFilmsIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
tape_id = self._search_regex(
|
||||
r'class="tapeId">([^<]+)<', webpage, 'tape id')
|
||||
[r'class="tapeId"[^>]*>([^<]+)<', r'tapeId\s*:\s*"([^"]+)"'],
|
||||
webpage, 'tape id')
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
description = self._og_search_description(webpage)
|
||||
|
||||
@@ -7,6 +7,7 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
srt_subtitles_timecode,
|
||||
)
|
||||
|
||||
|
||||
@@ -39,8 +40,8 @@ class KanalPlayIE(InfoExtractor):
|
||||
'%s\r\n%s --> %s\r\n%s'
|
||||
% (
|
||||
num,
|
||||
self._subtitles_timecode(item['startMillis'] / 1000.0),
|
||||
self._subtitles_timecode(item['endMillis'] / 1000.0),
|
||||
srt_subtitles_timecode(item['startMillis'] / 1000.0),
|
||||
srt_subtitles_timecode(item['endMillis'] / 1000.0),
|
||||
item['text'],
|
||||
) for num, item in enumerate(subs, 1))
|
||||
|
||||
|
||||
@@ -4,7 +4,9 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
unified_strdate,
|
||||
ExtractorError,
|
||||
@@ -14,7 +16,7 @@ from ..utils import (
|
||||
class LifeNewsIE(InfoExtractor):
|
||||
IE_NAME = 'lifenews'
|
||||
IE_DESC = 'LIFE | NEWS'
|
||||
_VALID_URL = r'http://lifenews\.ru/(?:mobile/)?news/(?P<id>\d+)'
|
||||
_VALID_URL = r'http://lifenews\.ru/(?:mobile/)?(?P<section>news|video)/(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://lifenews.ru/news/126342',
|
||||
@@ -39,17 +41,34 @@ class LifeNewsIE(InfoExtractor):
|
||||
'upload_date': '20150402',
|
||||
'uploader': 'embed.life.ru',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://lifenews.ru/news/153461',
|
||||
'md5': '9b6ef8bc0ffa25aebc8bdb40d89ab795',
|
||||
'info_dict': {
|
||||
'id': '153461',
|
||||
'ext': 'mp4',
|
||||
'title': 'В Москве спасли потерявшегося медвежонка, который спрятался на дереве',
|
||||
'description': 'Маленький хищник не смог найти дорогу домой и обрел временное убежище на тополе недалеко от жилого массива, пока его не нашла соседская собака.',
|
||||
'upload_date': '20150505',
|
||||
'uploader': 'embed.life.ru',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://lifenews.ru/video/13035',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
section = mobj.group('section')
|
||||
|
||||
webpage = self._download_webpage('http://lifenews.ru/news/%s' % video_id, video_id, 'Downloading page')
|
||||
webpage = self._download_webpage(
|
||||
'http://lifenews.ru/%s/%s' % (section, video_id),
|
||||
video_id, 'Downloading page')
|
||||
|
||||
videos = re.findall(r'<video.*?poster="(?P<poster>[^"]+)".*?src="(?P<video>[^"]+)".*?></video>', webpage)
|
||||
iframe_link = self._html_search_regex(
|
||||
'<iframe[^>]+src="([^"]+)', webpage, 'iframe link', default=None)
|
||||
'<iframe[^>]+src=["\']([^"\']+)["\']', webpage, 'iframe link', default=None)
|
||||
if not videos and not iframe_link:
|
||||
raise ExtractorError('No media links available for %s' % video_id)
|
||||
|
||||
@@ -88,6 +107,7 @@ class LifeNewsIE(InfoExtractor):
|
||||
return cur_info
|
||||
|
||||
if iframe_link:
|
||||
iframe_link = self._proto_relative_url(iframe_link, 'http:')
|
||||
cur_info = dict(common_info)
|
||||
cur_info.update({
|
||||
'_type': 'url_transparent',
|
||||
@@ -101,3 +121,49 @@ class LifeNewsIE(InfoExtractor):
|
||||
return make_entry(video_id, videos[0])
|
||||
else:
|
||||
return [make_entry(video_id, media, video_number + 1) for video_number, media in enumerate(videos)]
|
||||
|
||||
|
||||
class LifeEmbedIE(InfoExtractor):
|
||||
IE_NAME = 'life:embed'
|
||||
_VALID_URL = r'http://embed\.life\.ru/embed/(?P<id>[\da-f]{32})'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://embed.life.ru/embed/e50c2dec2867350528e2574c899b8291',
|
||||
'md5': 'b889715c9e49cb1981281d0e5458fbbe',
|
||||
'info_dict': {
|
||||
'id': 'e50c2dec2867350528e2574c899b8291',
|
||||
'ext': 'mp4',
|
||||
'title': 'e50c2dec2867350528e2574c899b8291',
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
formats = []
|
||||
for video_url in re.findall(r'"file"\s*:\s*"([^"]+)', webpage):
|
||||
video_url = compat_urlparse.urljoin(url, video_url)
|
||||
ext = determine_ext(video_url)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
video_url, video_id, 'mp4', m3u8_id='m3u8'))
|
||||
else:
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'format_id': ext,
|
||||
'preference': 1,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnail = self._search_regex(
|
||||
r'"image"\s*:\s*"([^"]+)', webpage, 'thumbnail', default=None)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_id,
|
||||
'thumbnail': thumbnail,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
@@ -194,23 +194,19 @@ class LivestreamIE(InfoExtractor):
|
||||
# The original version of Livestream uses a different system
|
||||
class LivestreamOriginalIE(InfoExtractor):
|
||||
IE_NAME = 'livestream:original'
|
||||
_VALID_URL = r'''(?x)https?://www\.livestream\.com/
|
||||
_VALID_URL = r'''(?x)https?://original\.livestream\.com/
|
||||
(?P<user>[^/]+)/(?P<type>video|folder)
|
||||
(?:\?.*?Id=|/)(?P<id>.*?)(&|$)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'http://www.livestream.com/dealbook/video?clipId=pla_8aa4a3f1-ba15-46a4-893b-902210e138fb',
|
||||
'url': 'http://original.livestream.com/dealbook/video?clipId=pla_8aa4a3f1-ba15-46a4-893b-902210e138fb',
|
||||
'info_dict': {
|
||||
'id': 'pla_8aa4a3f1-ba15-46a4-893b-902210e138fb',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Spark 1 (BitCoin) with Cameron Winklevoss & Tyler Winklevoss of Winklevoss Capital',
|
||||
},
|
||||
'params': {
|
||||
# rtmp
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.livestream.com/newplay/folder?dirId=a07bf706-d0e4-4e75-a747-b021d84f2fd3',
|
||||
'url': 'https://original.livestream.com/newplay/folder?dirId=a07bf706-d0e4-4e75-a747-b021d84f2fd3',
|
||||
'info_dict': {
|
||||
'id': 'a07bf706-d0e4-4e75-a747-b021d84f2fd3',
|
||||
},
|
||||
@@ -221,19 +217,17 @@ class LivestreamOriginalIE(InfoExtractor):
|
||||
api_url = 'http://x{0}x.api.channel.livestream.com/2.0/clipdetails?extendedInfo=true&id={1}'.format(user, video_id)
|
||||
|
||||
info = self._download_xml(api_url, video_id)
|
||||
# this url is used on mobile devices
|
||||
stream_url = 'http://x{0}x.api.channel.livestream.com/3.0/getstream.json?id={1}'.format(user, video_id)
|
||||
stream_info = self._download_json(stream_url, video_id)
|
||||
item = info.find('channel').find('item')
|
||||
ns = {'media': 'http://search.yahoo.com/mrss'}
|
||||
thumbnail_url = item.find(xpath_with_ns('media:thumbnail', ns)).attrib['url']
|
||||
# Remove the extension and number from the path (like 1.jpg)
|
||||
path = self._search_regex(r'(user-files/.+)_.*?\.jpg$', thumbnail_url, 'path')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': item.find('title').text,
|
||||
'url': 'rtmp://extondemand.livestream.com/ondemand',
|
||||
'play_path': 'trans/dv15/mogulus-{0}'.format(path),
|
||||
'player_url': 'http://static.livestream.com/chromelessPlayer/v21/playerapi.swf?hash=5uetk&v=0803&classid=D27CDB6E-AE6D-11cf-96B8-444553540000&jsEnabled=false&wmode=opaque',
|
||||
'ext': 'flv',
|
||||
'url': stream_info['progressiveUrl'],
|
||||
'thumbnail': thumbnail_url,
|
||||
}
|
||||
|
||||
|
||||
@@ -10,7 +10,21 @@ from ..utils import (
|
||||
|
||||
|
||||
class MLBIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://m(?:lb)?\.(?:[\da-z_-]+\.)?mlb\.com/(?:(?:.*?/)?video/(?:topic/[\da-z_-]+/)?v|(?:shared/video/embed/(?:embed|m-internal-embed)\.html|[^/]+/video/play\.jsp)\?.*?\bcontent_id=)(?P<id>n?\d+)'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:[\da-z_-]+\.)*mlb\.com/
|
||||
(?:
|
||||
(?:
|
||||
(?:.*?/)?video/(?:topic/[\da-z_-]+/)?v|
|
||||
(?:
|
||||
shared/video/embed/(?:embed|m-internal-embed)\.html|
|
||||
(?:[^/]+/)+(?:play|index)\.jsp|
|
||||
)\?.*?\bcontent_id=
|
||||
)
|
||||
(?P<id>n?\d+)|
|
||||
(?:[^/]+/)*(?P<path>[^/]+)
|
||||
)
|
||||
'''
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://m.mlb.com/sea/video/topic/51231442/v34698933/nymsea-ackley-robs-a-home-run-with-an-amazing-catch/?c_id=sea',
|
||||
@@ -68,6 +82,18 @@ class MLBIE(InfoExtractor):
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://m.mlb.com/news/article/118550098/blue-jays-kevin-pillar-goes-spidey-up-the-wall-to-rob-tim-beckham-of-a-homer',
|
||||
'md5': 'b190e70141fb9a1552a85426b4da1b5d',
|
||||
'info_dict': {
|
||||
'id': '75609783',
|
||||
'ext': 'mp4',
|
||||
'title': 'Must C: Pillar climbs for catch',
|
||||
'description': '4/15/15: Blue Jays outfielder Kevin Pillar continues his defensive dominance by climbing the wall in left to rob Tim Beckham of a home run',
|
||||
'timestamp': 1429124820,
|
||||
'upload_date': '20150415',
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://m.mlb.com/shared/video/embed/embed.html?content_id=35692085&topic_id=6479266&width=400&height=224&property=mlb',
|
||||
'only_matching': True,
|
||||
@@ -88,6 +114,10 @@ class MLBIE(InfoExtractor):
|
||||
# From http://m.mlb.com/news/article/118550098/blue-jays-kevin-pillar-goes-spidey-up-the-wall-to-rob-tim-beckham-of-a-homer
|
||||
'url': 'http://mlb.mlb.com/shared/video/embed/m-internal-embed.html?content_id=75609783&property=mlb&autoplay=true&hashmode=false&siteSection=mlb/multimedia/article_118550098/article_embed&club=mlb',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'http://washington.nationals.mlb.com/mlb/gameday/index.jsp?c_id=was&gid=2015_05_09_atlmlb_wasmlb_1&lang=en&content_id=108309983&mode=video#',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
|
||||
@@ -95,6 +125,12 @@ class MLBIE(InfoExtractor):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
if not video_id:
|
||||
video_path = mobj.group('path')
|
||||
webpage = self._download_webpage(url, video_path)
|
||||
video_id = self._search_regex(
|
||||
[r'data-video-?id="(\d+)"', r'content_id=(\d+)'], webpage, 'video id')
|
||||
|
||||
detail = self._download_xml(
|
||||
'http://m.mlb.com/gen/multimedia/detail/%s/%s/%s/%s.xml'
|
||||
% (video_id[-3], video_id[-2], video_id[-1], video_id), video_id)
|
||||
|
||||
@@ -10,6 +10,8 @@ from ..compat import (
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
find_xpath_attr,
|
||||
lowercase_escape,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
|
||||
@@ -37,14 +39,32 @@ class NBCIE(InfoExtractor):
|
||||
},
|
||||
'skip': 'Only works from US',
|
||||
},
|
||||
{
|
||||
'url': 'http://www.nbc.com/saturday-night-live/video/star-wars-teaser/2832821',
|
||||
'info_dict': {
|
||||
'id': '8iUuyzWDdYUZ',
|
||||
'ext': 'flv',
|
||||
'title': 'Star Wars Teaser',
|
||||
'description': 'md5:0b40f9cbde5b671a7ff62fceccc4f442',
|
||||
},
|
||||
'skip': 'Only works from US',
|
||||
},
|
||||
{
|
||||
# This video has expired but with an escaped embedURL
|
||||
'url': 'http://www.nbc.com/parenthood/episode-guide/season-5/just-like-at-home/515',
|
||||
'skip': 'Expired'
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
theplatform_url = self._search_regex(
|
||||
'(?:class="video-player video-player-full" data-mpx-url|class="player" src)="(.*?)"',
|
||||
webpage, 'theplatform url').replace('_no_endcard', '')
|
||||
theplatform_url = unescapeHTML(lowercase_escape(self._html_search_regex(
|
||||
[
|
||||
r'(?:class="video-player video-player-full" data-mpx-url|class="player" src)="(.*?)"',
|
||||
r'"embedURL"\s*:\s*"([^"]+)"'
|
||||
],
|
||||
webpage, 'theplatform url').replace('_no_endcard', '').replace('\\/', '/')))
|
||||
if theplatform_url.startswith('//'):
|
||||
theplatform_url = 'http:' + theplatform_url
|
||||
return self.url_result(theplatform_url)
|
||||
|
||||
@@ -8,41 +8,11 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
qualities,
|
||||
parse_duration,
|
||||
)
|
||||
|
||||
|
||||
class NDRIE(InfoExtractor):
|
||||
IE_NAME = 'ndr'
|
||||
IE_DESC = 'NDR.de - Mediathek'
|
||||
_VALID_URL = r'https?://www\.ndr\.de/.+?(?P<id>\d+)\.html'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.ndr.de/fernsehen/sendungen/nordmagazin/Kartoffeltage-in-der-Lewitz,nordmagazin25866.html',
|
||||
'md5': '5bc5f5b92c82c0f8b26cddca34f8bb2c',
|
||||
'note': 'Video file',
|
||||
'info_dict': {
|
||||
'id': '25866',
|
||||
'ext': 'mp4',
|
||||
'title': 'Kartoffeltage in der Lewitz',
|
||||
'description': 'md5:48c4c04dde604c8a9971b3d4e3b9eaa8',
|
||||
'duration': 166,
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://www.ndr.de/info/audio51535.html',
|
||||
'md5': 'bb3cd38e24fbcc866d13b50ca59307b8',
|
||||
'note': 'Audio file',
|
||||
'info_dict': {
|
||||
'id': '51535',
|
||||
'ext': 'mp3',
|
||||
'title': 'La Valette entgeht der Hinrichtung',
|
||||
'description': 'md5:22f9541913a40fe50091d5cdd7c9f536',
|
||||
'duration': 884,
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
class NDRBaseIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
@@ -54,7 +24,11 @@ class NDRIE(InfoExtractor):
|
||||
if description:
|
||||
description = description.strip()
|
||||
|
||||
duration = int_or_none(self._html_search_regex(r'duration: (\d+),\n', page, 'duration', fatal=False))
|
||||
duration = int_or_none(self._html_search_regex(r'duration: (\d+),\n', page, 'duration', default=None))
|
||||
if not duration:
|
||||
duration = parse_duration(self._html_search_regex(
|
||||
r'(<span class="min">\d+</span>:<span class="sec">\d+</span>)',
|
||||
page, 'duration', default=None))
|
||||
|
||||
formats = []
|
||||
|
||||
@@ -92,3 +66,65 @@ class NDRIE(InfoExtractor):
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class NDRIE(NDRBaseIE):
|
||||
IE_NAME = 'ndr'
|
||||
IE_DESC = 'NDR.de - Mediathek'
|
||||
_VALID_URL = r'https?://www\.ndr\.de/.+?(?P<id>\d+)\.html'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.ndr.de/fernsehen/sendungen/nordmagazin/Kartoffeltage-in-der-Lewitz,nordmagazin25866.html',
|
||||
'md5': '5bc5f5b92c82c0f8b26cddca34f8bb2c',
|
||||
'note': 'Video file',
|
||||
'info_dict': {
|
||||
'id': '25866',
|
||||
'ext': 'mp4',
|
||||
'title': 'Kartoffeltage in der Lewitz',
|
||||
'description': 'md5:48c4c04dde604c8a9971b3d4e3b9eaa8',
|
||||
'duration': 166,
|
||||
},
|
||||
'skip': '404 Not found',
|
||||
},
|
||||
{
|
||||
'url': 'http://www.ndr.de/fernsehen/Party-Poette-und-Parade,hafengeburtstag988.html',
|
||||
'md5': 'dadc003c55ae12a5d2f6bd436cd73f59',
|
||||
'info_dict': {
|
||||
'id': '988',
|
||||
'ext': 'mp4',
|
||||
'title': 'Party, Pötte und Parade',
|
||||
'description': 'Hunderttausende feiern zwischen Speicherstadt und St. Pauli den 826. Hafengeburtstag. Die NDR Sondersendung zeigt die schönsten und spektakulärsten Bilder vom Auftakt.',
|
||||
'duration': 3498,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.ndr.de/info/audio51535.html',
|
||||
'md5': 'bb3cd38e24fbcc866d13b50ca59307b8',
|
||||
'note': 'Audio file',
|
||||
'info_dict': {
|
||||
'id': '51535',
|
||||
'ext': 'mp3',
|
||||
'title': 'La Valette entgeht der Hinrichtung',
|
||||
'description': 'md5:22f9541913a40fe50091d5cdd7c9f536',
|
||||
'duration': 884,
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
|
||||
class NJoyIE(NDRBaseIE):
|
||||
IE_NAME = 'N-JOY'
|
||||
_VALID_URL = r'https?://www\.n-joy\.de/.+?(?P<id>\d+)\.html'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.n-joy.de/entertainment/comedy/comedy_contest/Benaissa-beim-NDR-Comedy-Contest,comedycontest2480.html',
|
||||
'md5': 'cb63be60cd6f9dd75218803146d8dc67',
|
||||
'info_dict': {
|
||||
'id': '2480',
|
||||
'ext': 'mp4',
|
||||
'title': 'Benaissa beim NDR Comedy Contest',
|
||||
'description': 'Von seinem sehr "behaarten" Leben lässt sich Benaissa trotz aller Schwierigkeiten nicht unterkriegen.',
|
||||
'duration': 654,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -49,7 +49,7 @@ class NetzkinoIE(InfoExtractor):
|
||||
'http://www.netzkino.de/beta/dist/production.min.js', video_id,
|
||||
note='Downloading player code')
|
||||
avo_js = self._search_regex(
|
||||
r'window\.avoCore\s*=.*?urlTemplate:\s*(\{.*?"\})',
|
||||
r'var urlTemplate=(\{.*?"\})',
|
||||
production_js, 'URL templates')
|
||||
templates = self._parse_json(
|
||||
avo_js, video_id, transform_source=js_to_json)
|
||||
|
||||
@@ -21,6 +21,9 @@ class NHLBaseInfoExtractor(InfoExtractor):
|
||||
return json_string.replace('\\\'', '\'')
|
||||
|
||||
def _real_extract_video(self, video_id):
|
||||
vid_parts = video_id.split(',')
|
||||
if len(vid_parts) == 3:
|
||||
video_id = '%s0%s%s-X-h' % (vid_parts[0][:4], vid_parts[1], vid_parts[2].rjust(4, '0'))
|
||||
json_url = 'http://video.nhl.com/videocenter/servlets/playlist?ids=%s&format=json' % video_id
|
||||
data = self._download_json(
|
||||
json_url, video_id, transform_source=self._fix_json)
|
||||
@@ -47,7 +50,7 @@ class NHLBaseInfoExtractor(InfoExtractor):
|
||||
video_url = initial_video_url
|
||||
|
||||
join = compat_urlparse.urljoin
|
||||
return {
|
||||
ret = {
|
||||
'id': video_id,
|
||||
'title': info['name'],
|
||||
'url': video_url,
|
||||
@@ -56,11 +59,20 @@ class NHLBaseInfoExtractor(InfoExtractor):
|
||||
'thumbnail': join(join(video_url, '/u/'), info['bigImage']),
|
||||
'upload_date': unified_strdate(info['releaseDate'].split('.')[0]),
|
||||
}
|
||||
if video_url.startswith('rtmp:'):
|
||||
mobj = re.match(r'(?P<tc_url>rtmp://[^/]+/(?P<app>[a-z0-9/]+))/(?P<play_path>mp4:.*)', video_url)
|
||||
ret.update({
|
||||
'tc_url': mobj.group('tc_url'),
|
||||
'play_path': mobj.group('play_path'),
|
||||
'app': mobj.group('app'),
|
||||
'no_resume': True,
|
||||
})
|
||||
return ret
|
||||
|
||||
|
||||
class NHLIE(NHLBaseInfoExtractor):
|
||||
IE_NAME = 'nhl.com'
|
||||
_VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/(?:console)?(?:\?(?:.*?[?&])?)id=(?P<id>[-0-9a-zA-Z]+)'
|
||||
_VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/(?:console)?(?:\?(?:.*?[?&])?)(?:id|hlg)=(?P<id>[-0-9a-zA-Z,]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://video.canucks.nhl.com/videocenter/console?catid=6?id=453614',
|
||||
@@ -101,6 +113,29 @@ class NHLIE(NHLBaseInfoExtractor):
|
||||
}, {
|
||||
'url': 'http://video.nhl.com/videocenter/?id=736722',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://video.nhl.com/videocenter/console?hlg=20142015,2,299&lang=en',
|
||||
'md5': '076fcb88c255154aacbf0a7accc3f340',
|
||||
'info_dict': {
|
||||
'id': '2014020299-X-h',
|
||||
'ext': 'mp4',
|
||||
'title': 'Penguins at Islanders / Game Highlights',
|
||||
'description': 'Home broadcast - Pittsburgh Penguins at New York Islanders - November 22, 2014',
|
||||
'duration': 268,
|
||||
'upload_date': '20141122',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://video.oilers.nhl.com/videocenter/console?id=691469&catid=4',
|
||||
'info_dict': {
|
||||
'id': '691469',
|
||||
'ext': 'mp4',
|
||||
'title': 'RAW | Craig MacTavish Full Press Conference',
|
||||
'description': 'Oilers GM Craig MacTavish addresses the media at Rexall Place on Friday.',
|
||||
'upload_date': '20141205',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # Requires rtmpdump
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -14,7 +14,9 @@ from ..compat import (
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
ExtractorError,
|
||||
unified_strdate,
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
@@ -25,21 +27,38 @@ class NocoIE(InfoExtractor):
|
||||
_SUB_LANG_TEMPLATE = '&sub_lang=%s'
|
||||
_NETRC_MACHINE = 'noco'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://noco.tv/emission/11538/nolife/ami-ami-idol-hello-france/',
|
||||
'md5': '0a993f0058ddbcd902630b2047ef710e',
|
||||
'info_dict': {
|
||||
'id': '11538',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ami Ami Idol - Hello! France',
|
||||
'description': 'md5:4eaab46ab68fa4197a317a88a53d3b86',
|
||||
'upload_date': '20140412',
|
||||
'uploader': 'Nolife',
|
||||
'uploader_id': 'NOL',
|
||||
'duration': 2851.2,
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://noco.tv/emission/11538/nolife/ami-ami-idol-hello-france/',
|
||||
'md5': '0a993f0058ddbcd902630b2047ef710e',
|
||||
'info_dict': {
|
||||
'id': '11538',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ami Ami Idol - Hello! France',
|
||||
'description': 'md5:4eaab46ab68fa4197a317a88a53d3b86',
|
||||
'upload_date': '20140412',
|
||||
'uploader': 'Nolife',
|
||||
'uploader_id': 'NOL',
|
||||
'duration': 2851.2,
|
||||
},
|
||||
'skip': 'Requires noco account',
|
||||
},
|
||||
'skip': 'Requires noco account',
|
||||
}
|
||||
{
|
||||
'url': 'http://noco.tv/emission/12610/lbl42/the-guild/s01e01-wake-up-call',
|
||||
'md5': 'c190f1f48e313c55838f1f412225934d',
|
||||
'info_dict': {
|
||||
'id': '12610',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Guild #1 - Wake-Up Call',
|
||||
'timestamp': 1403863200,
|
||||
'upload_date': '20140627',
|
||||
'uploader': 'LBL42',
|
||||
'uploader_id': 'LBL',
|
||||
'duration': 233.023,
|
||||
},
|
||||
'skip': 'Requires noco account',
|
||||
}
|
||||
]
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
@@ -90,51 +109,66 @@ class NocoIE(InfoExtractor):
|
||||
'shows/%s/medias' % video_id,
|
||||
video_id, 'Downloading video JSON')
|
||||
|
||||
show = self._call_api(
|
||||
'shows/by_id/%s' % video_id,
|
||||
video_id, 'Downloading show JSON')[0]
|
||||
|
||||
options = self._call_api(
|
||||
'users/init', video_id,
|
||||
'Downloading user options JSON')['options']
|
||||
audio_lang_pref = options.get('audio_language') or options.get('language', 'fr')
|
||||
|
||||
if audio_lang_pref == 'original':
|
||||
audio_lang_pref = show['original_lang']
|
||||
if len(medias) == 1:
|
||||
audio_lang_pref = list(medias.keys())[0]
|
||||
elif audio_lang_pref not in medias:
|
||||
audio_lang_pref = 'fr'
|
||||
|
||||
qualities = self._call_api(
|
||||
'qualities',
|
||||
video_id, 'Downloading qualities JSON')
|
||||
|
||||
formats = []
|
||||
|
||||
for lang, lang_dict in medias['fr']['video_list'].items():
|
||||
for format_id, fmt in lang_dict['quality_list'].items():
|
||||
format_id_extended = '%s-%s' % (lang, format_id) if lang != 'none' else format_id
|
||||
for audio_lang, audio_lang_dict in medias.items():
|
||||
preference = 1 if audio_lang == audio_lang_pref else 0
|
||||
for sub_lang, lang_dict in audio_lang_dict['video_list'].items():
|
||||
for format_id, fmt in lang_dict['quality_list'].items():
|
||||
format_id_extended = 'audio-%s_sub-%s_%s' % (audio_lang, sub_lang, format_id)
|
||||
|
||||
video = self._call_api(
|
||||
'shows/%s/video/%s/fr' % (video_id, format_id.lower()),
|
||||
video_id, 'Downloading %s video JSON' % format_id_extended,
|
||||
lang if lang != 'none' else None)
|
||||
video = self._call_api(
|
||||
'shows/%s/video/%s/%s' % (video_id, format_id.lower(), audio_lang),
|
||||
video_id, 'Downloading %s video JSON' % format_id_extended,
|
||||
sub_lang if sub_lang != 'none' else None)
|
||||
|
||||
file_url = video['file']
|
||||
if not file_url:
|
||||
continue
|
||||
file_url = video['file']
|
||||
if not file_url:
|
||||
continue
|
||||
|
||||
if file_url in ['forbidden', 'not found']:
|
||||
popmessage = video['popmessage']
|
||||
self._raise_error(popmessage['title'], popmessage['message'])
|
||||
if file_url in ['forbidden', 'not found']:
|
||||
popmessage = video['popmessage']
|
||||
self._raise_error(popmessage['title'], popmessage['message'])
|
||||
|
||||
formats.append({
|
||||
'url': file_url,
|
||||
'format_id': format_id_extended,
|
||||
'width': fmt['res_width'],
|
||||
'height': fmt['res_lines'],
|
||||
'abr': fmt['audiobitrate'],
|
||||
'vbr': fmt['videobitrate'],
|
||||
'filesize': fmt['filesize'],
|
||||
'format_note': qualities[format_id]['quality_name'],
|
||||
'preference': qualities[format_id]['priority'],
|
||||
})
|
||||
formats.append({
|
||||
'url': file_url,
|
||||
'format_id': format_id_extended,
|
||||
'width': int_or_none(fmt.get('res_width')),
|
||||
'height': int_or_none(fmt.get('res_lines')),
|
||||
'abr': int_or_none(fmt.get('audiobitrate')),
|
||||
'vbr': int_or_none(fmt.get('videobitrate')),
|
||||
'filesize': int_or_none(fmt.get('filesize')),
|
||||
'format_note': qualities[format_id].get('quality_name'),
|
||||
'quality': qualities[format_id].get('priority'),
|
||||
'preference': preference,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
show = self._call_api(
|
||||
'shows/by_id/%s' % video_id,
|
||||
video_id, 'Downloading show JSON')[0]
|
||||
|
||||
upload_date = unified_strdate(show['online_date_start_utc'])
|
||||
uploader = show['partner_name']
|
||||
uploader_id = show['partner_key']
|
||||
duration = show['duration_ms'] / 1000.0
|
||||
timestamp = parse_iso8601(show.get('online_date_start_utc'), ' ')
|
||||
uploader = show.get('partner_name')
|
||||
uploader_id = show.get('partner_key')
|
||||
duration = float_or_none(show.get('duration_ms'), 1000)
|
||||
|
||||
thumbnails = []
|
||||
for thumbnail_key, thumbnail_url in show.items():
|
||||
@@ -166,7 +200,7 @@ class NocoIE(InfoExtractor):
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnails': thumbnails,
|
||||
'upload_date': upload_date,
|
||||
'timestamp': timestamp,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'duration': duration,
|
||||
|
||||
@@ -4,7 +4,6 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
@@ -200,20 +199,10 @@ class NRKTVIE(InfoExtractor):
|
||||
url = "%s%s" % (baseurl, subtitlesurl)
|
||||
self._debug_print('%s: Subtitle url: %s' % (video_id, url))
|
||||
captions = self._download_xml(
|
||||
url, video_id, 'Downloading subtitles',
|
||||
transform_source=lambda s: s.replace(r'<br />', '\r\n'))
|
||||
url, video_id, 'Downloading subtitles')
|
||||
lang = captions.get('lang', 'no')
|
||||
ps = captions.findall('./{0}body/{0}div/{0}p'.format('{http://www.w3.org/ns/ttml}'))
|
||||
srt = ''
|
||||
for pos, p in enumerate(ps):
|
||||
begin = parse_duration(p.get('begin'))
|
||||
duration = parse_duration(p.get('dur'))
|
||||
starttime = self._subtitles_timecode(begin)
|
||||
endtime = self._subtitles_timecode(begin + duration)
|
||||
srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (compat_str(pos), starttime, endtime, p.text)
|
||||
return {lang: [
|
||||
{'ext': 'ttml', 'url': url},
|
||||
{'ext': 'srt', 'data': srt},
|
||||
]}
|
||||
|
||||
def _extract_f4m(self, manifest_url, video_id):
|
||||
|
||||
@@ -8,30 +8,8 @@ from ..utils import (
|
||||
)
|
||||
|
||||
|
||||
class NYTimesIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:www\.)?nytimes\.com/video/(?:[^/]+/)+?|graphics8\.nytimes\.com/bcvideo/\d+(?:\.\d+)?/iframe/embed\.html\?videoId=)(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.nytimes.com/video/opinion/100000002847155/verbatim-what-is-a-photocopier.html?playlistId=100000001150263',
|
||||
'md5': '18a525a510f942ada2720db5f31644c0',
|
||||
'info_dict': {
|
||||
'id': '100000002847155',
|
||||
'ext': 'mov',
|
||||
'title': 'Verbatim: What Is a Photocopier?',
|
||||
'description': 'md5:93603dada88ddbda9395632fdc5da260',
|
||||
'timestamp': 1398631707,
|
||||
'upload_date': '20140427',
|
||||
'uploader': 'Brett Weiner',
|
||||
'duration': 419,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.nytimes.com/video/travel/100000003550828/36-hours-in-dubai.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
class NYTimesBaseIE(InfoExtractor):
|
||||
def _extract_video_from_id(self, video_id):
|
||||
video_data = self._download_json(
|
||||
'http://www.nytimes.com/svc/video/api/v2/video/%s' % video_id,
|
||||
video_id, 'Downloading video JSON')
|
||||
@@ -81,3 +59,59 @@ class NYTimesIE(InfoExtractor):
|
||||
'formats': formats,
|
||||
'thumbnails': thumbnails,
|
||||
}
|
||||
|
||||
|
||||
class NYTimesIE(NYTimesBaseIE):
|
||||
_VALID_URL = r'https?://(?:(?:www\.)?nytimes\.com/video/(?:[^/]+/)+?|graphics8\.nytimes\.com/bcvideo/\d+(?:\.\d+)?/iframe/embed\.html\?videoId=)(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.nytimes.com/video/opinion/100000002847155/verbatim-what-is-a-photocopier.html?playlistId=100000001150263',
|
||||
'md5': '18a525a510f942ada2720db5f31644c0',
|
||||
'info_dict': {
|
||||
'id': '100000002847155',
|
||||
'ext': 'mov',
|
||||
'title': 'Verbatim: What Is a Photocopier?',
|
||||
'description': 'md5:93603dada88ddbda9395632fdc5da260',
|
||||
'timestamp': 1398631707,
|
||||
'upload_date': '20140427',
|
||||
'uploader': 'Brett Weiner',
|
||||
'duration': 419,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.nytimes.com/video/travel/100000003550828/36-hours-in-dubai.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
return self._extract_video_from_id(video_id)
|
||||
|
||||
|
||||
class NYTimesArticleIE(NYTimesBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?nytimes\.com/(.(?<!video))*?/(?:[^/]+/)*(?P<id>[^.]+)(?:\.html)?'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.nytimes.com/2015/04/14/business/owner-of-gravity-payments-a-credit-card-processor-is-setting-a-new-minimum-wage-70000-a-year.html?_r=0',
|
||||
'md5': 'e2076d58b4da18e6a001d53fd56db3c9',
|
||||
'info_dict': {
|
||||
'id': '100000003628438',
|
||||
'ext': 'mov',
|
||||
'title': 'New Minimum Wage: $70,000 a Year',
|
||||
'description': 'Dan Price, C.E.O. of Gravity Payments, surprised his 120-person staff by announcing that he planned over the next three years to raise the salary of every employee to $70,000 a year.',
|
||||
'timestamp': 1429033037,
|
||||
'upload_date': '20150414',
|
||||
'uploader': 'Matthew Williams',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.nytimes.com/news/minute/2014/03/17/times-minute-whats-next-in-crimea/?_php=true&_type=blogs&_php=true&_type=blogs&_r=1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_id = self._html_search_regex(r'data-videoid="(\d+)"', webpage, 'video id')
|
||||
|
||||
return self._extract_video_from_id(video_id)
|
||||
|
||||
@@ -6,6 +6,7 @@ from ..utils import (
|
||||
unified_strdate,
|
||||
int_or_none,
|
||||
qualities,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
|
||||
@@ -36,8 +37,8 @@ class OdnoklassnikiIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
player = self._parse_json(
|
||||
self._search_regex(
|
||||
r"OKVideo\.start\(({.+?})\s*,\s*'VideoAutoplay_player'", webpage, 'player'),
|
||||
unescapeHTML(self._search_regex(
|
||||
r'data-attributes="([^"]+)"', webpage, 'player')),
|
||||
video_id)
|
||||
|
||||
metadata = self._parse_json(player['flashvars']['metadata'], video_id)
|
||||
|
||||
@@ -1,11 +1,14 @@
|
||||
from __future__ import unicode_literals
|
||||
import re
|
||||
import json
|
||||
import base64
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
unescapeHTML,
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -32,6 +35,17 @@ class OoyalaIE(InfoExtractor):
|
||||
'description': '',
|
||||
},
|
||||
},
|
||||
{
|
||||
# Information available only through SAS api
|
||||
# From http://community.plm.automation.siemens.com/t5/News-NX-Manufacturing/Tool-Path-Divide/ba-p/4187
|
||||
'url': 'http://player.ooyala.com/player.js?embedCode=FiOG81ZTrvckcchQxmalf4aQj590qTEx',
|
||||
'md5': 'a84001441b35ea492bc03736e59e7935',
|
||||
'info_dict': {
|
||||
'id': 'FiOG81ZTrvckcchQxmalf4aQj590qTEx',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ooyala video',
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
@staticmethod
|
||||
@@ -44,11 +58,21 @@ class OoyalaIE(InfoExtractor):
|
||||
ie=cls.ie_key())
|
||||
|
||||
def _extract_result(self, info, more_info):
|
||||
embedCode = info['embedCode']
|
||||
video_url = info.get('ipad_url') or info['url']
|
||||
|
||||
if determine_ext(video_url) == 'm3u8':
|
||||
formats = self._extract_m3u8_formats(video_url, embedCode, ext='mp4')
|
||||
else:
|
||||
formats = [{
|
||||
'url': video_url,
|
||||
'ext': 'mp4',
|
||||
}]
|
||||
|
||||
return {
|
||||
'id': info['embedCode'],
|
||||
'ext': 'mp4',
|
||||
'id': embedCode,
|
||||
'title': unescapeHTML(info['title']),
|
||||
'url': info.get('ipad_url') or info['url'],
|
||||
'formats': formats,
|
||||
'description': unescapeHTML(more_info['description']),
|
||||
'thumbnail': more_info['promo'],
|
||||
}
|
||||
@@ -77,6 +101,36 @@ class OoyalaIE(InfoExtractor):
|
||||
mobile_player, 'info', fatal=False, default=None)
|
||||
if videos_info:
|
||||
break
|
||||
|
||||
if not videos_info:
|
||||
formats = []
|
||||
auth_data = self._download_json(
|
||||
'http://player.ooyala.com/sas/player_api/v1/authorization/embed_code/%s/%s?domain=www.example.org&supportedFormats=mp4,webm' % (embedCode, embedCode),
|
||||
embedCode)
|
||||
|
||||
cur_auth_data = auth_data['authorization_data'][embedCode]
|
||||
|
||||
for stream in cur_auth_data['streams']:
|
||||
formats.append({
|
||||
'url': base64.b64decode(stream['url']['data'].encode('ascii')).decode('utf-8'),
|
||||
'ext': stream.get('delivery_type'),
|
||||
'format': stream.get('video_codec'),
|
||||
'format_id': stream.get('profile'),
|
||||
'width': int_or_none(stream.get('width')),
|
||||
'height': int_or_none(stream.get('height')),
|
||||
'abr': int_or_none(stream.get('audio_bitrate')),
|
||||
'vbr': int_or_none(stream.get('video_bitrate')),
|
||||
})
|
||||
if formats:
|
||||
return {
|
||||
'id': embedCode,
|
||||
'formats': formats,
|
||||
'title': 'Ooyala video',
|
||||
}
|
||||
|
||||
if not cur_auth_data['authorized']:
|
||||
raise ExtractorError(cur_auth_data['message'], expected=True)
|
||||
|
||||
if not videos_info:
|
||||
raise ExtractorError('Unable to extract info')
|
||||
videos_info = videos_info.replace('\\"', '"')
|
||||
|
||||
@@ -187,6 +187,7 @@ class PBSIE(InfoExtractor):
|
||||
else:
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'format_id': redirect.get('eeid'),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
|
||||
@@ -9,11 +9,13 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
strip_jsonp,
|
||||
unescapeHTML,
|
||||
js_to_json,
|
||||
)
|
||||
from ..compat import compat_urllib_request
|
||||
|
||||
|
||||
class QQMusicIE(InfoExtractor):
|
||||
IE_NAME = 'qqmusic'
|
||||
_VALID_URL = r'http://y.qq.com/#type=song&mid=(?P<id>[0-9A-Za-z]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://y.qq.com/#type=song&mid=004295Et37taLD',
|
||||
@@ -96,6 +98,7 @@ class QQPlaylistBaseIE(InfoExtractor):
|
||||
|
||||
|
||||
class QQMusicSingerIE(QQPlaylistBaseIE):
|
||||
IE_NAME = 'qqmusic:singer'
|
||||
_VALID_URL = r'http://y.qq.com/#type=singer&mid=(?P<id>[0-9A-Za-z]+)'
|
||||
_TEST = {
|
||||
'url': 'http://y.qq.com/#type=singer&mid=001BLpXF2DyJe2',
|
||||
@@ -139,6 +142,7 @@ class QQMusicSingerIE(QQPlaylistBaseIE):
|
||||
|
||||
|
||||
class QQMusicAlbumIE(QQPlaylistBaseIE):
|
||||
IE_NAME = 'qqmusic:album'
|
||||
_VALID_URL = r'http://y.qq.com/#type=album&mid=(?P<id>[0-9A-Za-z]+)'
|
||||
|
||||
_TEST = {
|
||||
@@ -168,3 +172,67 @@ class QQMusicAlbumIE(QQPlaylistBaseIE):
|
||||
album_page, 'album details', default=None)
|
||||
|
||||
return self.playlist_result(entries, mid, album_name, album_detail)
|
||||
|
||||
|
||||
class QQMusicToplistIE(QQPlaylistBaseIE):
|
||||
IE_NAME = 'qqmusic:toplist'
|
||||
_VALID_URL = r'http://y\.qq\.com/#type=toplist&p=(?P<id>(top|global)_[0-9]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://y.qq.com/#type=toplist&p=global_12',
|
||||
'info_dict': {
|
||||
'id': 'global_12',
|
||||
'title': 'itunes榜',
|
||||
},
|
||||
'playlist_count': 10,
|
||||
}, {
|
||||
'url': 'http://y.qq.com/#type=toplist&p=top_6',
|
||||
'info_dict': {
|
||||
'id': 'top_6',
|
||||
'title': 'QQ音乐巅峰榜·欧美',
|
||||
},
|
||||
'playlist_count': 100,
|
||||
}, {
|
||||
'url': 'http://y.qq.com/#type=toplist&p=global_5',
|
||||
'info_dict': {
|
||||
'id': 'global_5',
|
||||
'title': '韩国mnet排行榜',
|
||||
},
|
||||
'playlist_count': 50,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def strip_qq_jsonp(code):
|
||||
return js_to_json(re.sub(r'^MusicJsonCallback\((.*?)\)/\*.+?\*/$', r'\1', code))
|
||||
|
||||
def _real_extract(self, url):
|
||||
list_id = self._match_id(url)
|
||||
|
||||
list_type, num_id = list_id.split("_")
|
||||
|
||||
list_page = self._download_webpage(
|
||||
"http://y.qq.com/y/static/toplist/index/%s.html" % list_id,
|
||||
list_id, 'Download toplist page')
|
||||
|
||||
entries = []
|
||||
if list_type == 'top':
|
||||
jsonp_url = "http://y.qq.com/y/static/toplist/json/top/%s/1.js" % num_id
|
||||
else:
|
||||
jsonp_url = "http://y.qq.com/y/static/toplist/json/global/%s/1_1.js" % num_id
|
||||
|
||||
toplist_json = self._download_json(
|
||||
jsonp_url, list_id, note='Retrieve toplist json',
|
||||
errnote='Unable to get toplist json', transform_source=self.strip_qq_jsonp)
|
||||
|
||||
for song in toplist_json['l']:
|
||||
s = song['s']
|
||||
song_mid = s.split("|")[20]
|
||||
entries.append(self.url_result(
|
||||
'http://y.qq.com/#type=song&mid=' + song_mid, 'QQMusic',
|
||||
song_mid))
|
||||
|
||||
list_name = self._html_search_regex(
|
||||
r'<h2 id="top_name">([^\']+)</h2>', list_page, 'top list name',
|
||||
default=None)
|
||||
|
||||
return self.playlist_result(entries, list_id, list_name)
|
||||
|
||||
@@ -87,7 +87,7 @@ class RUTVIE(InfoExtractor):
|
||||
'skip': 'Translation has finished',
|
||||
},
|
||||
{
|
||||
'url': 'http://live.russia.tv/index/index/channel_id/3',
|
||||
'url': 'http://player.rutv.ru/iframe/live/id/21/showZoomBtn/false/isPlay/true/',
|
||||
'info_dict': {
|
||||
'id': '21',
|
||||
'ext': 'mp4',
|
||||
@@ -128,8 +128,10 @@ class RUTVIE(InfoExtractor):
|
||||
elif video_path.startswith('index/iframe/cast_id'):
|
||||
video_type = 'live'
|
||||
|
||||
is_live = video_type == 'live'
|
||||
|
||||
json_data = self._download_json(
|
||||
'http://player.rutv.ru/iframe/%splay/id/%s' % ('live-' if video_type == 'live' else '', video_id),
|
||||
'http://player.rutv.ru/iframe/%splay/id/%s' % ('live-' if is_live else '', video_id),
|
||||
video_id, 'Downloading JSON')
|
||||
|
||||
if json_data['errors']:
|
||||
@@ -156,6 +158,7 @@ class RUTVIE(InfoExtractor):
|
||||
|
||||
for transport, links in media['sources'].items():
|
||||
for quality, url in links.items():
|
||||
preference = -1 if priority_transport == transport else -2
|
||||
if transport == 'rtmp':
|
||||
mobj = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>.+))/(?P<playpath>.+)$', url)
|
||||
if not mobj:
|
||||
@@ -169,9 +172,11 @@ class RUTVIE(InfoExtractor):
|
||||
'rtmp_live': True,
|
||||
'ext': 'flv',
|
||||
'vbr': int(quality),
|
||||
'preference': preference,
|
||||
}
|
||||
elif transport == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(url, video_id, 'mp4'))
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
url, video_id, 'mp4', preference=preference, m3u8_id='hls'))
|
||||
continue
|
||||
else:
|
||||
fmt = {
|
||||
@@ -181,17 +186,11 @@ class RUTVIE(InfoExtractor):
|
||||
'width': width,
|
||||
'height': height,
|
||||
'format_id': '%s-%s' % (transport, quality),
|
||||
'preference': -1 if priority_transport == transport else -2,
|
||||
})
|
||||
formats.append(fmt)
|
||||
|
||||
if not formats:
|
||||
raise ExtractorError('No media links available for %s' % video_id)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
is_live = video_type == 'live'
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._live_title(title) if is_live else title,
|
||||
|
||||
@@ -11,7 +11,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class ScreenwaveMediaIE(InfoExtractor):
|
||||
_VALID_URL = r'http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=(?P<id>.+)'
|
||||
_VALID_URL = r'http://player\d?\.screenwavemedia\.com/(?:play/)?[a-zA-Z]+\.php\?[^"]*\bid=(?P<id>.+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://player.screenwavemedia.com/play/play.php?playerdiv=videoarea&companiondiv=squareAd&id=Cinemassacre-19911',
|
||||
@@ -20,7 +20,10 @@ class ScreenwaveMediaIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
playerdata = self._download_webpage(url, video_id, 'Downloading player webpage')
|
||||
|
||||
playerdata = self._download_webpage(
|
||||
'http://player.screenwavemedia.com/play/player.php?id=%s' % video_id,
|
||||
video_id, 'Downloading player webpage')
|
||||
|
||||
vidtitle = self._search_regex(
|
||||
r'\'vidtitle\'\s*:\s*"([^"]+)"', playerdata, 'vidtitle').replace('\\/', '/')
|
||||
@@ -99,7 +102,7 @@ class TeamFourIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
playerdata_url = self._search_regex(
|
||||
r'src="(http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=.+?)"',
|
||||
r'src="(http://player\d?\.screenwavemedia\.com/(?:play/)?[a-zA-Z]+\.php\?[^"]*\bid=.+?)"',
|
||||
webpage, 'player data URL')
|
||||
|
||||
video_title = self._html_search_regex(
|
||||
|
||||
@@ -8,7 +8,7 @@ from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_request
|
||||
)
|
||||
from ..utils import sanitize_url_path_consecutive_slashes
|
||||
from ..utils import ExtractorError
|
||||
|
||||
|
||||
class SohuIE(InfoExtractor):
|
||||
@@ -28,7 +28,7 @@ class SohuIE(InfoExtractor):
|
||||
}
|
||||
}, {
|
||||
'url': 'http://tv.sohu.com/20150305/n409385080.shtml',
|
||||
'md5': '699060e75cf58858dd47fb9c03c42cfb',
|
||||
'md5': 'ac9a5d322b4bf9ae184d53e4711e4f1a',
|
||||
'info_dict': {
|
||||
'id': '409385080',
|
||||
'ext': 'mp4',
|
||||
@@ -36,7 +36,7 @@ class SohuIE(InfoExtractor):
|
||||
}
|
||||
}, {
|
||||
'url': 'http://my.tv.sohu.com/us/232799889/78693464.shtml',
|
||||
'md5': '9bf34be48f2f4dadcb226c74127e203c',
|
||||
'md5': '49308ff6dafde5ece51137d04aec311e',
|
||||
'info_dict': {
|
||||
'id': '78693464',
|
||||
'ext': 'mp4',
|
||||
@@ -50,7 +50,7 @@ class SohuIE(InfoExtractor):
|
||||
'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
|
||||
},
|
||||
'playlist': [{
|
||||
'md5': 'bdbfb8f39924725e6589c146bc1883ad',
|
||||
'md5': '492923eac023ba2f13ff69617c32754a',
|
||||
'info_dict': {
|
||||
'id': '78910339_part1',
|
||||
'ext': 'mp4',
|
||||
@@ -58,7 +58,7 @@ class SohuIE(InfoExtractor):
|
||||
'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
|
||||
}
|
||||
}, {
|
||||
'md5': '3e1f46aaeb95354fd10e7fca9fc1804e',
|
||||
'md5': 'de604848c0e8e9c4a4dde7e1347c0637',
|
||||
'info_dict': {
|
||||
'id': '78910339_part2',
|
||||
'ext': 'mp4',
|
||||
@@ -66,7 +66,7 @@ class SohuIE(InfoExtractor):
|
||||
'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
|
||||
}
|
||||
}, {
|
||||
'md5': '8407e634175fdac706766481b9443450',
|
||||
'md5': '93584716ee0657c0b205b8aa3d27aa13',
|
||||
'info_dict': {
|
||||
'id': '78910339_part3',
|
||||
'ext': 'mp4',
|
||||
@@ -117,6 +117,15 @@ class SohuIE(InfoExtractor):
|
||||
r'var vid ?= ?["\'](\d+)["\']',
|
||||
webpage, 'video path')
|
||||
vid_data = _fetch_data(vid, mytv)
|
||||
if vid_data['play'] != 1:
|
||||
if vid_data.get('status') == 12:
|
||||
raise ExtractorError(
|
||||
'Sohu said: There\'s something wrong in the video.',
|
||||
expected=True)
|
||||
else:
|
||||
raise ExtractorError(
|
||||
'Sohu said: The video is only licensed to users in Mainland China.',
|
||||
expected=True)
|
||||
|
||||
formats_json = {}
|
||||
for format_id in ('nor', 'high', 'super', 'ori', 'h2644k', 'h2654k'):
|
||||
@@ -132,24 +141,21 @@ class SohuIE(InfoExtractor):
|
||||
for i in range(part_count):
|
||||
formats = []
|
||||
for format_id, format_data in formats_json.items():
|
||||
allot = format_data['allot']
|
||||
prot = format_data['prot']
|
||||
|
||||
data = format_data['data']
|
||||
clips_url = data['clipsURL']
|
||||
su = data['su']
|
||||
|
||||
part_str = self._download_webpage(
|
||||
'http://%s/?prot=%s&file=%s&new=%s' %
|
||||
(allot, prot, clips_url[i], su[i]),
|
||||
video_id,
|
||||
'Downloading %s video URL part %d of %d'
|
||||
% (format_id, i + 1, part_count))
|
||||
|
||||
part_info = part_str.split('|')
|
||||
|
||||
video_url = sanitize_url_path_consecutive_slashes(
|
||||
'%s%s?key=%s' % (part_info[0], su[i], part_info[3]))
|
||||
# URLs starts with http://newflv.sohu.ccgslb.net/ is not usable
|
||||
# so retry until got a working URL
|
||||
video_url = 'newflv.sohu.ccgslb.net'
|
||||
retries = 0
|
||||
while 'newflv.sohu.ccgslb.net' in video_url and retries < 5:
|
||||
download_note = 'Download information from CDN gateway for format ' + format_id
|
||||
if retries > 0:
|
||||
download_note += ' (retry #%d)' % retries
|
||||
retries += 1
|
||||
cdn_info = self._download_json(
|
||||
'http://data.vod.itc.cn/cdnList?new=' + data['su'][i],
|
||||
video_id, download_note)
|
||||
video_url = cdn_info['url']
|
||||
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
|
||||
@@ -336,7 +336,7 @@ class SoundcloudUserIE(SoundcloudIE):
|
||||
if len(new_entries) == 0:
|
||||
self.to_screen('%s: End page received' % uploader)
|
||||
break
|
||||
entries.extend(self._extract_info_dict(e, quiet=True) for e in new_entries)
|
||||
entries.extend(self.url_result(e['permalink_url'], 'Soundcloud') for e in new_entries)
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
|
||||
@@ -32,7 +32,7 @@ class SouthParkEsIE(SouthParkIE):
|
||||
}]
|
||||
|
||||
|
||||
class SouthparkDeIE(SouthParkIE):
|
||||
class SouthParkDeIE(SouthParkIE):
|
||||
IE_NAME = 'southpark.de'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.de/(?:clips|alle-episoden)/(?P<id>.+?)(\?|#|$))'
|
||||
_FEED_URL = 'http://www.southpark.de/feeds/video-player/mrss/'
|
||||
@@ -46,3 +46,25 @@ class SouthparkDeIE(SouthParkIE):
|
||||
'description': 'Cartman explains the benefits of "Shitter" to Stan, Kyle and Craig.',
|
||||
},
|
||||
}]
|
||||
|
||||
|
||||
class SouthParkNlIE(SouthParkIE):
|
||||
IE_NAME = 'southpark.nl'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.nl/(?:clips|full-episodes)/(?P<id>.+?)(\?|#|$))'
|
||||
_FEED_URL = 'http://www.southpark.nl/feeds/video-player/mrss/'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.southpark.nl/full-episodes/s18e06-freemium-isnt-free',
|
||||
'playlist_count': 4,
|
||||
}]
|
||||
|
||||
|
||||
class SouthParkDkIE(SouthParkIE):
|
||||
IE_NAME = 'southparkstudios.dk'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<url>southparkstudios\.dk/(?:clips|full-episodes)/(?P<id>.+?)(\?|#|$))'
|
||||
_FEED_URL = 'http://www.southparkstudios.dk/feeds/video-player/mrss/'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.southparkstudios.dk/full-episodes/s18e07-grounded-vindaloop',
|
||||
'playlist_count': 4,
|
||||
}]
|
||||
|
||||
@@ -9,41 +9,9 @@ from ..utils import (
|
||||
)
|
||||
|
||||
|
||||
class SVTPlayIE(InfoExtractor):
|
||||
IE_DESC = 'SVT Play and Öppet arkiv'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<host>svtplay|oppetarkiv)\.se/video/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.svtplay.se/video/2609989/sm-veckan/sm-veckan-rally-final-sasong-1-sm-veckan-rally-final',
|
||||
'md5': 'ade3def0643fa1c40587a422f98edfd9',
|
||||
'info_dict': {
|
||||
'id': '2609989',
|
||||
'ext': 'flv',
|
||||
'title': 'SM veckan vinter, Örebro - Rally, final',
|
||||
'duration': 4500,
|
||||
'thumbnail': 're:^https?://.*[\.-]jpg$',
|
||||
'age_limit': 0,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.oppetarkiv.se/video/1058509/rederiet-sasong-1-avsnitt-1-av-318',
|
||||
'md5': 'c3101a17ce9634f4c1f9800f0746c187',
|
||||
'info_dict': {
|
||||
'id': '1058509',
|
||||
'ext': 'flv',
|
||||
'title': 'Farlig kryssning',
|
||||
'duration': 2566,
|
||||
'thumbnail': 're:^https?://.*[\.-]jpg$',
|
||||
'age_limit': 0,
|
||||
},
|
||||
'skip': 'Only works from Sweden',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
host = mobj.group('host')
|
||||
|
||||
info = self._download_json(
|
||||
'http://www.%s.se/video/%s?output=json' % (host, video_id), video_id)
|
||||
class SVTBaseIE(InfoExtractor):
|
||||
def _extract_video(self, url, video_id):
|
||||
info = self._download_json(url, video_id)
|
||||
|
||||
title = info['context']['title']
|
||||
thumbnail = info['context'].get('thumbnailImage')
|
||||
@@ -80,3 +48,70 @@ class SVTPlayIE(InfoExtractor):
|
||||
'duration': duration,
|
||||
'age_limit': age_limit,
|
||||
}
|
||||
|
||||
|
||||
class SVTIE(SVTBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?svt\.se/wd\?(?:.*?&)?widgetId=(?P<widget_id>\d+)&.*?\barticleId=(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.svt.se/wd?widgetId=23991§ionId=541&articleId=2900353&type=embed&contextSectionId=123&autostart=false',
|
||||
'md5': '9648197555fc1b49e3dc22db4af51d46',
|
||||
'info_dict': {
|
||||
'id': '2900353',
|
||||
'ext': 'flv',
|
||||
'title': 'Här trycker Jagr till Giroux (under SVT-intervjun)',
|
||||
'duration': 27,
|
||||
'age_limit': 0,
|
||||
},
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _extract_url(webpage):
|
||||
mobj = re.search(
|
||||
r'(?:<iframe src|href)="(?P<url>%s[^"]*)"' % SVTIE._VALID_URL, webpage)
|
||||
if mobj:
|
||||
return mobj.group('url')
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
widget_id = mobj.group('widget_id')
|
||||
article_id = mobj.group('id')
|
||||
return self._extract_video(
|
||||
'http://www.svt.se/wd?widgetId=%s&articleId=%s&format=json&type=embed&output=json' % (widget_id, article_id),
|
||||
article_id)
|
||||
|
||||
|
||||
class SVTPlayIE(SVTBaseIE):
|
||||
IE_DESC = 'SVT Play and Öppet arkiv'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<host>svtplay|oppetarkiv)\.se/video/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.svtplay.se/video/2609989/sm-veckan/sm-veckan-rally-final-sasong-1-sm-veckan-rally-final',
|
||||
'md5': 'ade3def0643fa1c40587a422f98edfd9',
|
||||
'info_dict': {
|
||||
'id': '2609989',
|
||||
'ext': 'flv',
|
||||
'title': 'SM veckan vinter, Örebro - Rally, final',
|
||||
'duration': 4500,
|
||||
'thumbnail': 're:^https?://.*[\.-]jpg$',
|
||||
'age_limit': 0,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.oppetarkiv.se/video/1058509/rederiet-sasong-1-avsnitt-1-av-318',
|
||||
'md5': 'c3101a17ce9634f4c1f9800f0746c187',
|
||||
'info_dict': {
|
||||
'id': '1058509',
|
||||
'ext': 'flv',
|
||||
'title': 'Farlig kryssning',
|
||||
'duration': 2566,
|
||||
'thumbnail': 're:^https?://.*[\.-]jpg$',
|
||||
'age_limit': 0,
|
||||
},
|
||||
'skip': 'Only works from Sweden',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
host = mobj.group('host')
|
||||
return self._extract_video(
|
||||
'http://www.%s.se/video/%s?output=json' % (host, video_id),
|
||||
video_id)
|
||||
@@ -2,13 +2,16 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
import binascii
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
qualities,
|
||||
)
|
||||
from ..compat import compat_ord
|
||||
|
||||
|
||||
class TeamcocoIE(InfoExtractor):
|
||||
@@ -59,33 +62,49 @@ class TeamcocoIE(InfoExtractor):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
||||
display_id = mobj.group('display_id')
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
webpage, urlh = self._download_webpage_handle(url, display_id)
|
||||
if 'src=expired' in urlh.geturl():
|
||||
raise ExtractorError('This video is expired.', expected=True)
|
||||
|
||||
video_id = mobj.group('video_id')
|
||||
if not video_id:
|
||||
video_id = self._html_search_regex(
|
||||
self._VIDEO_ID_REGEXES, webpage, 'video id')
|
||||
|
||||
preload = None
|
||||
preloads = re.findall(r'"preload":\s*"([^"]+)"', webpage)
|
||||
if preloads:
|
||||
preload = max([(len(p), p) for p in preloads])[1]
|
||||
data = None
|
||||
|
||||
if not preload:
|
||||
preload = ''.join(re.findall(r'this\.push\("([^"]+)"\);', webpage))
|
||||
preload_codes = self._html_search_regex(
|
||||
r'(function.+)setTimeout\(function\(\)\{playlist',
|
||||
webpage, 'preload codes')
|
||||
base64_fragments = re.findall(r'"([a-zA-z0-9+/=]+)"', preload_codes)
|
||||
base64_fragments.remove('init')
|
||||
|
||||
if not preload:
|
||||
preload = self._html_search_regex([
|
||||
r'player,\[?"([^"]+)"\]?', r'player.init\(\[?"([^"]+)"\]?\)'
|
||||
], webpage.replace('","', ''), 'preload data', default=None)
|
||||
def _check_sequence(cur_fragments):
|
||||
if not cur_fragments:
|
||||
return
|
||||
for i in range(len(cur_fragments)):
|
||||
cur_sequence = (''.join(cur_fragments[i:] + cur_fragments[:i])).encode('ascii')
|
||||
try:
|
||||
raw_data = base64.b64decode(cur_sequence)
|
||||
if compat_ord(raw_data[0]) == compat_ord('{'):
|
||||
return json.loads(raw_data.decode('utf-8'))
|
||||
except (TypeError, binascii.Error, UnicodeDecodeError, ValueError):
|
||||
continue
|
||||
|
||||
if not preload:
|
||||
def _check_data():
|
||||
for i in range(len(base64_fragments) + 1):
|
||||
for j in range(i, len(base64_fragments) + 1):
|
||||
data = _check_sequence(base64_fragments[:i] + base64_fragments[j:])
|
||||
if data:
|
||||
return data
|
||||
|
||||
self.to_screen('Try to compute possible data sequence. This may take some time.')
|
||||
data = _check_data()
|
||||
|
||||
if not data:
|
||||
raise ExtractorError(
|
||||
'Preload information could not be extracted', expected=True)
|
||||
|
||||
data = self._parse_json(
|
||||
base64.b64decode(preload.encode('ascii')).decode('utf-8'), video_id)
|
||||
|
||||
formats = []
|
||||
get_quality = qualities(['500k', '480p', '1000k', '720p', '1080p'])
|
||||
for filed in data['files']:
|
||||
|
||||
@@ -30,3 +30,31 @@ class TMZIE(InfoExtractor):
|
||||
'description': self._og_search_description(webpage),
|
||||
'thumbnail': self._html_search_meta('ThumbURL', webpage),
|
||||
}
|
||||
|
||||
|
||||
class TMZArticleIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?tmz\.com/\d{4}/\d{2}/\d{2}/(?P<id>[^/]+)/?'
|
||||
_TEST = {
|
||||
'url': 'http://www.tmz.com/2015/04/19/bobby-brown-bobbi-kristina-awake-video-concert',
|
||||
'md5': 'e482a414a38db73087450e3a6ce69d00',
|
||||
'info_dict': {
|
||||
'id': '0_6snoelag',
|
||||
'ext': 'mp4',
|
||||
'title': 'Bobby Brown Tells Crowd ... Bobbi Kristina is Awake',
|
||||
'description': 'Bobby Brown stunned his audience during a concert Saturday night, when he told the crowd, "Bobbi is awake. She\'s watching me."',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
embedded_video_info_str = self._html_search_regex(
|
||||
r'tmzVideoEmbedV2\("([^)]+)"\);', webpage, 'embedded video info')
|
||||
|
||||
embedded_video_info = self._parse_json(
|
||||
embedded_video_info_str, video_id,
|
||||
transform_source=lambda s: s.replace('\\', ''))
|
||||
|
||||
return self.url_result(
|
||||
'http://www.tmz.com/videos/%s/' % embedded_video_info['id'])
|
||||
|
||||
@@ -38,9 +38,13 @@ class VesselIE(InfoExtractor):
|
||||
return req
|
||||
|
||||
@staticmethod
|
||||
def find_assets(data, asset_type):
|
||||
def find_assets(data, asset_type, asset_id=None):
|
||||
for asset in data.get('assets', []):
|
||||
if asset.get('type') == asset_type:
|
||||
if not asset.get('type') == asset_type:
|
||||
continue
|
||||
elif asset_id is not None and not asset.get('id') == asset_id:
|
||||
continue
|
||||
else:
|
||||
yield asset
|
||||
|
||||
def _check_access_rights(self, data):
|
||||
@@ -82,11 +86,13 @@ class VesselIE(InfoExtractor):
|
||||
req = VesselIE.make_json_request(
|
||||
self._API_URL_TEMPLATE % asset_id, {'client': 'web'})
|
||||
data = self._download_json(req, video_id)
|
||||
video_asset_id = data.get('main_video_asset')
|
||||
|
||||
self._check_access_rights(data)
|
||||
|
||||
try:
|
||||
video_asset = next(VesselIE.find_assets(data, 'video'))
|
||||
video_asset = next(
|
||||
VesselIE.find_assets(data, 'video', asset_id=video_asset_id))
|
||||
except StopIteration:
|
||||
raise ExtractorError('No video assets found')
|
||||
|
||||
|
||||
@@ -8,7 +8,19 @@ from ..utils import float_or_none
|
||||
|
||||
|
||||
class VGTVIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?vgtv\.no/#!/[^/]+/(?P<id>[0-9]+)'
|
||||
IE_DESC = 'VGTV and BTTV'
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:
|
||||
vgtv:|
|
||||
http://(?:www\.)?
|
||||
)
|
||||
(?P<host>vgtv|bt)
|
||||
(?:
|
||||
:|
|
||||
\.no/(?:tv/)?\#!/(?:video|live)/
|
||||
)
|
||||
(?P<id>[0-9]+)
|
||||
'''
|
||||
_TESTS = [
|
||||
{
|
||||
# streamType: vod
|
||||
@@ -64,12 +76,25 @@ class VGTVIE(InfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.bt.no/tv/#!/video/100250/norling-dette-er-forskjellen-paa-1-divisjon-og-eliteserien',
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
host = mobj.group('host')
|
||||
|
||||
HOST_WEBSITES = {
|
||||
'vgtv': 'vgtv',
|
||||
'bt': 'bttv',
|
||||
}
|
||||
|
||||
data = self._download_json(
|
||||
'http://svp.vg.no/svp/api/v1/vgtv/assets/%s?appName=vgtv-website' % video_id,
|
||||
'http://svp.vg.no/svp/api/v1/%s/assets/%s?appName=%s-website'
|
||||
% (host, video_id, HOST_WEBSITES[host]),
|
||||
video_id, 'Downloading media JSON')
|
||||
|
||||
streams = data['streamUrls']
|
||||
@@ -78,11 +103,14 @@ class VGTVIE(InfoExtractor):
|
||||
|
||||
hls_url = streams.get('hls')
|
||||
if hls_url:
|
||||
formats.extend(self._extract_m3u8_formats(hls_url, video_id, 'mp4'))
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
hls_url, video_id, 'mp4', m3u8_id='hls'))
|
||||
|
||||
hds_url = streams.get('hds')
|
||||
if hds_url:
|
||||
formats.extend(self._extract_f4m_formats(hds_url + '?hdcore=3.2.0&plugin=aasp-3.2.0.77.18', video_id))
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
hds_url + '?hdcore=3.2.0&plugin=aasp-3.2.0.77.18',
|
||||
video_id, f4m_id='hds'))
|
||||
|
||||
mp4_url = streams.get('mp4')
|
||||
if mp4_url:
|
||||
@@ -115,3 +143,51 @@ class VGTVIE(InfoExtractor):
|
||||
'view_count': data['displays'],
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class BTArticleIE(InfoExtractor):
|
||||
IE_NAME = 'bt:article'
|
||||
IE_DESC = 'Bergens Tidende Articles'
|
||||
_VALID_URL = 'http://(?:www\.)?bt\.no/(?:[^/]+/)+(?P<id>[^/]+)-\d+\.html'
|
||||
_TEST = {
|
||||
'url': 'http://www.bt.no/nyheter/lokalt/Kjemper-for-internatet-1788214.html',
|
||||
'md5': 'd055e8ee918ef2844745fcfd1a4175fb',
|
||||
'info_dict': {
|
||||
'id': '23199',
|
||||
'ext': 'mp4',
|
||||
'title': 'Alrekstad internat',
|
||||
'description': 'md5:dc81a9056c874fedb62fc48a300dac58',
|
||||
'thumbnail': 're:^https?://.*\.jpg',
|
||||
'duration': 191,
|
||||
'timestamp': 1289991323,
|
||||
'upload_date': '20101117',
|
||||
'view_count': int,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
webpage = self._download_webpage(url, self._match_id(url))
|
||||
video_id = self._search_regex(
|
||||
r'SVP\.Player\.load\(\s*(\d+)', webpage, 'video id')
|
||||
return self.url_result('vgtv:bt:%s' % video_id, 'VGTV')
|
||||
|
||||
|
||||
class BTVestlendingenIE(InfoExtractor):
|
||||
IE_NAME = 'bt:vestlendingen'
|
||||
IE_DESC = 'Bergens Tidende - Vestlendingen'
|
||||
_VALID_URL = 'http://(?:www\.)?bt\.no/spesial/vestlendingen/#!/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.bt.no/spesial/vestlendingen/#!/86588',
|
||||
'md5': 'd7d17e3337dc80de6d3a540aefbe441b',
|
||||
'info_dict': {
|
||||
'id': '86588',
|
||||
'ext': 'mov',
|
||||
'title': 'Otto Wollertsen',
|
||||
'description': 'Vestlendingen Otto Fredrik Wollertsen',
|
||||
'timestamp': 1430473209,
|
||||
'upload_date': '20150501',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
return self.url_result('xstream:btno:%s' % self._match_id(url), 'Xstream')
|
||||
|
||||
@@ -31,7 +31,6 @@ class ViceIE(InfoExtractor):
|
||||
r'embedCode=([^&\'"]+)', webpage,
|
||||
'ooyala embed code')
|
||||
ooyala_url = OoyalaIE._url_for_embed_code(embed_code)
|
||||
print(ooyala_url)
|
||||
except ExtractorError:
|
||||
raise ExtractorError('The page doesn\'t contain a video', expected=True)
|
||||
return self.url_result(ooyala_url, ie='Ooyala')
|
||||
|
||||
@@ -75,7 +75,7 @@ class VineIE(InfoExtractor):
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._og_search_title(webpage),
|
||||
'alt_title': self._og_search_description(webpage),
|
||||
'alt_title': self._og_search_description(webpage, default=None),
|
||||
'description': data['description'],
|
||||
'thumbnail': data['thumbnailUrl'],
|
||||
'upload_date': unified_strdate(data['created']),
|
||||
|
||||
99
youtube_dl/extractor/voicerepublic.py
Normal file
99
youtube_dl/extractor/voicerepublic.py
Normal file
@@ -0,0 +1,99 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class VoiceRepublicIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://voicerepublic\.com/(?:talks|embed)/(?P<id>[0-9a-z-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://voicerepublic.com/talks/watching-the-watchers-building-a-sousveillance-state',
|
||||
'md5': '0554a24d1657915aa8e8f84e15dc9353',
|
||||
'info_dict': {
|
||||
'id': '2296',
|
||||
'display_id': 'watching-the-watchers-building-a-sousveillance-state',
|
||||
'ext': 'm4a',
|
||||
'title': 'Watching the Watchers: Building a Sousveillance State',
|
||||
'description': 'md5:715ba964958afa2398df615809cfecb1',
|
||||
'thumbnail': 're:^https?://.*\.(?:png|jpg)$',
|
||||
'duration': 1800,
|
||||
'view_count': int,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://voicerepublic.com/embed/watching-the-watchers-building-a-sousveillance-state',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
req = compat_urllib_request.Request(
|
||||
compat_urlparse.urljoin(url, '/talks/%s' % display_id))
|
||||
# Older versions of Firefox get redirected to an "upgrade browser" page
|
||||
req.add_header('User-Agent', 'youtube-dl')
|
||||
webpage = self._download_webpage(req, display_id)
|
||||
|
||||
if '>Queued for processing, please stand by...<' in webpage:
|
||||
raise ExtractorError(
|
||||
'Audio is still queued for processing', expected=True)
|
||||
|
||||
config = self._search_regex(
|
||||
r'(?s)return ({.+?});\s*\n', webpage,
|
||||
'data', default=None)
|
||||
data = self._parse_json(config, display_id, fatal=False) if config else None
|
||||
if data:
|
||||
title = data['title']
|
||||
description = data.get('teaser')
|
||||
talk_id = data.get('talk_id') or display_id
|
||||
talk = data['talk']
|
||||
duration = int_or_none(talk.get('duration'))
|
||||
formats = [{
|
||||
'url': compat_urlparse.urljoin(url, talk_url),
|
||||
'format_id': format_id,
|
||||
'ext': determine_ext(talk_url) or format_id,
|
||||
'vcodec': 'none',
|
||||
} for format_id, talk_url in talk['links'].items()]
|
||||
else:
|
||||
title = self._og_search_title(webpage)
|
||||
description = self._html_search_regex(
|
||||
r"(?s)<div class='talk-teaser'[^>]*>(.+?)</div>",
|
||||
webpage, 'description', fatal=False)
|
||||
talk_id = self._search_regex(
|
||||
[r"id='jc-(\d+)'", r"data-shareable-id='(\d+)'"],
|
||||
webpage, 'talk id', default=None) or display_id
|
||||
duration = None
|
||||
player = self._search_regex(
|
||||
r"class='vr-player jp-jplayer'([^>]+)>", webpage, 'player')
|
||||
formats = [{
|
||||
'url': compat_urlparse.urljoin(url, talk_url),
|
||||
'format_id': format_id,
|
||||
'ext': determine_ext(talk_url) or format_id,
|
||||
'vcodec': 'none',
|
||||
} for format_id, talk_url in re.findall(r"data-([^=]+)='([^']+)'", player)]
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
view_count = int_or_none(self._search_regex(
|
||||
r"class='play-count[^']*'>\s*(\d+) plays",
|
||||
webpage, 'play count', fatal=False))
|
||||
|
||||
return {
|
||||
'id': talk_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'formats': formats,
|
||||
}
|
||||
@@ -6,8 +6,8 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class WorldStarHipHopIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www|m)\.worldstar(?:candy|hiphop)\.com/videos/video\.php\?v=(?P<id>.*)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:www|m)\.worldstar(?:candy|hiphop)\.com/(?:videos|android)/video\.php\?v=(?P<id>.*)'
|
||||
_TESTS = [{
|
||||
"url": "http://www.worldstarhiphop.com/videos/video.php?v=wshh6a7q1ny0G34ZwuIO",
|
||||
"md5": "9d04de741161603bf7071bbf4e883186",
|
||||
"info_dict": {
|
||||
@@ -15,7 +15,15 @@ class WorldStarHipHopIE(InfoExtractor):
|
||||
"ext": "mp4",
|
||||
"title": "KO Of The Week: MMA Fighter Gets Knocked Out By Swift Head Kick!"
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'http://m.worldstarhiphop.com/android/video.php?v=wshh6a7q1ny0G34ZwuIO',
|
||||
'md5': 'dc1c76c83ecc4190bb1eb143899b87d3',
|
||||
'info_dict': {
|
||||
'id': 'wshh6a7q1ny0G34ZwuIO',
|
||||
'ext': 'mp4',
|
||||
"title": "KO Of The Week: MMA Fighter Gets Knocked Out By Swift Head Kick!"
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
@@ -26,19 +34,22 @@ class WorldStarHipHopIE(InfoExtractor):
|
||||
return self.url_result('vevo:%s' % m_vevo_id.group(1), ie='Vevo')
|
||||
|
||||
video_url = self._search_regex(
|
||||
r'so\.addVariable\("file","(.*?)"\)', webpage, 'video URL')
|
||||
[r'so\.addVariable\("file","(.*?)"\)',
|
||||
r'<div class="artlist">\s*<a[^>]+href="([^"]+)">'],
|
||||
webpage, 'video URL')
|
||||
|
||||
if 'youtube' in video_url:
|
||||
return self.url_result(video_url, ie='Youtube')
|
||||
|
||||
video_title = self._html_search_regex(
|
||||
r'(?s)<div class="content-heading">\s*<h1>(.*?)</h1>',
|
||||
[r'(?s)<div class="content-heading">\s*<h1>(.*?)</h1>',
|
||||
r'<span[^>]+class="tc-sp-pinned-title">(.*)</span>'],
|
||||
webpage, 'title')
|
||||
|
||||
# Getting thumbnail and if not thumbnail sets correct title for WSHH candy video.
|
||||
thumbnail = self._html_search_regex(
|
||||
r'rel="image_src" href="(.*)" />', webpage, 'thumbnail',
|
||||
fatal=False)
|
||||
default=None)
|
||||
if not thumbnail:
|
||||
_title = r'candytitles.*>(.*)</span>'
|
||||
mobj = re.search(_title, webpage)
|
||||
|
||||
115
youtube_dl/extractor/xstream.py
Normal file
115
youtube_dl/extractor/xstream.py
Normal file
@@ -0,0 +1,115 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
xpath_with_ns,
|
||||
xpath_text,
|
||||
find_xpath_attr,
|
||||
)
|
||||
|
||||
|
||||
class XstreamIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:
|
||||
xstream:|
|
||||
https?://frontend\.xstream\.(?:dk|net)/
|
||||
)
|
||||
(?P<partner_id>[^/]+)
|
||||
(?:
|
||||
:|
|
||||
/feed/video/\?.*?\bid=
|
||||
)
|
||||
(?P<id>\d+)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'http://frontend.xstream.dk/btno/feed/video/?platform=web&id=86588',
|
||||
'md5': 'd7d17e3337dc80de6d3a540aefbe441b',
|
||||
'info_dict': {
|
||||
'id': '86588',
|
||||
'ext': 'mov',
|
||||
'title': 'Otto Wollertsen',
|
||||
'description': 'Vestlendingen Otto Fredrik Wollertsen',
|
||||
'timestamp': 1430473209,
|
||||
'upload_date': '20150501',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://frontend.xstream.dk/ap/feed/video/?platform=web&id=21039',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
partner_id = mobj.group('partner_id')
|
||||
video_id = mobj.group('id')
|
||||
|
||||
data = self._download_xml(
|
||||
'http://frontend.xstream.dk/%s/feed/video/?platform=web&id=%s'
|
||||
% (partner_id, video_id),
|
||||
video_id)
|
||||
|
||||
NS_MAP = {
|
||||
'atom': 'http://www.w3.org/2005/Atom',
|
||||
'xt': 'http://xstream.dk/',
|
||||
'media': 'http://search.yahoo.com/mrss/',
|
||||
}
|
||||
|
||||
entry = data.find(xpath_with_ns('./atom:entry', NS_MAP))
|
||||
|
||||
title = xpath_text(
|
||||
entry, xpath_with_ns('./atom:title', NS_MAP), 'title')
|
||||
description = xpath_text(
|
||||
entry, xpath_with_ns('./atom:summary', NS_MAP), 'description')
|
||||
timestamp = parse_iso8601(xpath_text(
|
||||
entry, xpath_with_ns('./atom:published', NS_MAP), 'upload date'))
|
||||
|
||||
formats = []
|
||||
media_group = entry.find(xpath_with_ns('./media:group', NS_MAP))
|
||||
for media_content in media_group.findall(xpath_with_ns('./media:content', NS_MAP)):
|
||||
media_url = media_content.get('url')
|
||||
if not media_url:
|
||||
continue
|
||||
tbr = int_or_none(media_content.get('bitrate'))
|
||||
mobj = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>[^/]+))/(?P<playpath>.+)$', media_url)
|
||||
if mobj:
|
||||
formats.append({
|
||||
'url': mobj.group('url'),
|
||||
'play_path': 'mp4:%s' % mobj.group('playpath'),
|
||||
'app': mobj.group('app'),
|
||||
'ext': 'flv',
|
||||
'tbr': tbr,
|
||||
'format_id': 'rtmp-%d' % tbr,
|
||||
})
|
||||
else:
|
||||
formats.append({
|
||||
'url': media_url,
|
||||
'tbr': tbr,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
link = find_xpath_attr(
|
||||
entry, xpath_with_ns('./atom:link', NS_MAP), 'rel', 'original')
|
||||
if link is not None:
|
||||
formats.append({
|
||||
'url': link.get('href'),
|
||||
'format_id': link.get('rel'),
|
||||
})
|
||||
|
||||
thumbnails = [{
|
||||
'url': splash.get('url'),
|
||||
'width': int_or_none(splash.get('width')),
|
||||
'height': int_or_none(splash.get('height')),
|
||||
} for splash in media_group.findall(xpath_with_ns('./xt:splash', NS_MAP))]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'timestamp': timestamp,
|
||||
'formats': formats,
|
||||
'thumbnails': thumbnails,
|
||||
}
|
||||
@@ -9,6 +9,7 @@ from ..utils import (
|
||||
float_or_none,
|
||||
month_by_abbreviation,
|
||||
ExtractorError,
|
||||
get_element_by_attribute,
|
||||
)
|
||||
|
||||
|
||||
@@ -23,6 +24,7 @@ class YamIE(InfoExtractor):
|
||||
'id': '2283921',
|
||||
'ext': 'mp3',
|
||||
'title': '發現 - 趙薇 京華煙雲主題曲',
|
||||
'description': '發現 - 趙薇 京華煙雲主題曲',
|
||||
'uploader_id': 'princekt',
|
||||
'upload_date': '20080807',
|
||||
'duration': 313.0,
|
||||
@@ -55,6 +57,17 @@ class YamIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
},
|
||||
'skip': 'invalid YouTube URL',
|
||||
}, {
|
||||
'url': 'http://mymedia.yam.com/m/2373534',
|
||||
'md5': '7ff74b91b7a817269d83796f8c5890b1',
|
||||
'info_dict': {
|
||||
'id': '2373534',
|
||||
'ext': 'mp3',
|
||||
'title': '林俊傑&蔡卓妍-小酒窩',
|
||||
'description': 'md5:904003395a0fcce6cfb25028ff468420',
|
||||
'upload_date': '20080928',
|
||||
'uploader_id': 'onliner2',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -75,15 +88,19 @@ class YamIE(InfoExtractor):
|
||||
if youtube_url:
|
||||
return self.url_result(youtube_url, 'Youtube')
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<h1[^>]+class="heading"[^>]*>\s*(.+)\s*</h1>', page, 'title')
|
||||
|
||||
api_page = self._download_webpage(
|
||||
'http://mymedia.yam.com/api/a/?pID=' + video_id, video_id,
|
||||
note='Downloading API page')
|
||||
api_result_obj = compat_urlparse.parse_qs(api_page)
|
||||
|
||||
info_table = get_element_by_attribute('class', 'info', page)
|
||||
uploader_id = self._html_search_regex(
|
||||
r'<!-- 發表作者 -->:[\n ]+<a href="/([a-z]+)"',
|
||||
page, 'uploader id', fatal=False)
|
||||
mobj = re.search(r'<!-- 發表於 -->(?P<mon>[A-Z][a-z]{2}) ' +
|
||||
r'<!-- 發表作者 -->:[\n ]+<a href="/([a-z0-9]+)"',
|
||||
info_table, 'uploader id', fatal=False)
|
||||
mobj = re.search(r'<!-- 發表於 -->(?P<mon>[A-Z][a-z]{2})\s+' +
|
||||
r'(?P<day>\d{1,2}), (?P<year>\d{4})', page)
|
||||
if mobj:
|
||||
upload_date = '%s%02d%02d' % (
|
||||
@@ -97,7 +114,8 @@ class YamIE(InfoExtractor):
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': api_result_obj['mp3file'][0],
|
||||
'title': self._html_search_meta('description', page),
|
||||
'title': title,
|
||||
'description': self._html_search_meta('description', page),
|
||||
'duration': duration,
|
||||
'uploader_id': uploader_id,
|
||||
'upload_date': upload_date,
|
||||
|
||||
@@ -1667,13 +1667,42 @@ class YoutubeWatchLaterIE(YoutubePlaylistIE):
|
||||
return self._extract_playlist('WL')
|
||||
|
||||
|
||||
class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
|
||||
class YoutubeHistoryIE(YoutubePlaylistIE):
|
||||
IE_NAME = 'youtube:history'
|
||||
IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
|
||||
_VALID_URL = 'https?://www\.youtube\.com/feed/history|:ythistory'
|
||||
_FEED_NAME = 'history'
|
||||
_PERSONAL_FEED = True
|
||||
_PLAYLIST_TITLE = 'Youtube Watch History'
|
||||
_TESTS = []
|
||||
|
||||
def _real_extract(self, url):
|
||||
title = 'Youtube History'
|
||||
page = self._download_webpage('https://www.youtube.com/feed/history', title)
|
||||
|
||||
# The extraction process is the same as for playlists, but the regex
|
||||
# for the video ids doesn't contain an index
|
||||
ids = []
|
||||
more_widget_html = content_html = page
|
||||
|
||||
for page_num in itertools.count(1):
|
||||
matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
|
||||
new_ids = orderedSet(matches)
|
||||
ids.extend(new_ids)
|
||||
|
||||
mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
|
||||
if not mobj:
|
||||
break
|
||||
|
||||
more = self._download_json(
|
||||
'https://youtube.com/%s' % mobj.group('more'), title,
|
||||
'Downloading page #%s' % page_num,
|
||||
transform_source=uppercase_escape)
|
||||
content_html = more['content_html']
|
||||
more_widget_html = more['load_more_widget_html']
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'title': title,
|
||||
'entries': self._ids_to_results(ids),
|
||||
}
|
||||
|
||||
|
||||
class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
|
||||
|
||||
@@ -4,12 +4,18 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError
|
||||
|
||||
|
||||
class ZingMp3BaseInfoExtractor(InfoExtractor):
|
||||
|
||||
@staticmethod
|
||||
def _extract_item(item):
|
||||
def _extract_item(self, item):
|
||||
error_message = item.find('./errormessage').text
|
||||
if error_message:
|
||||
raise ExtractorError(
|
||||
'%s returned error: %s' % (self.IE_NAME, error_message),
|
||||
expected=True)
|
||||
|
||||
title = item.find('./title').text.strip()
|
||||
source = item.find('./source').text
|
||||
extension = item.attrib['type']
|
||||
|
||||
@@ -7,12 +7,9 @@ import subprocess
|
||||
|
||||
from .ffmpeg import FFmpegPostProcessor
|
||||
|
||||
from ..compat import (
|
||||
compat_urlretrieve,
|
||||
)
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
check_executable,
|
||||
encodeArgument,
|
||||
encodeFilename,
|
||||
PostProcessingError,
|
||||
prepend_extension,
|
||||
@@ -25,26 +22,30 @@ class EmbedThumbnailPPError(PostProcessingError):
|
||||
|
||||
|
||||
class EmbedThumbnailPP(FFmpegPostProcessor):
|
||||
def __init__(self, downloader=None, already_have_thumbnail=False):
|
||||
super(EmbedThumbnailPP, self).__init__(downloader)
|
||||
self._already_have_thumbnail = already_have_thumbnail
|
||||
|
||||
def run(self, info):
|
||||
filename = info['filepath']
|
||||
temp_filename = prepend_extension(filename, 'temp')
|
||||
temp_thumbnail = filename + '.' + determine_ext(info['thumbnail'])
|
||||
|
||||
if not info.get('thumbnail'):
|
||||
if not info.get('thumbnails'):
|
||||
raise EmbedThumbnailPPError('Thumbnail was not found. Nothing to do.')
|
||||
|
||||
compat_urlretrieve(info['thumbnail'], temp_thumbnail)
|
||||
thumbnail_filename = info['thumbnails'][-1]['filename']
|
||||
|
||||
if info['ext'] == 'mp3':
|
||||
options = [
|
||||
'-i', temp_thumbnail, '-c', 'copy', '-map', '0', '-map', '1',
|
||||
'-c', 'copy', '-map', '0', '-map', '1',
|
||||
'-metadata:s:v', 'title="Album cover"', '-metadata:s:v', 'comment="Cover (Front)"']
|
||||
|
||||
self._downloader.to_screen('[ffmpeg] Adding thumbnail to "%s"' % filename)
|
||||
|
||||
self.run_ffmpeg(filename, temp_filename, options)
|
||||
self.run_ffmpeg_multiple_files([filename, thumbnail_filename], temp_filename, options)
|
||||
|
||||
os.remove(encodeFilename(temp_thumbnail))
|
||||
if not self._already_have_thumbnail:
|
||||
os.remove(encodeFilename(thumbnail_filename))
|
||||
os.remove(encodeFilename(filename))
|
||||
os.rename(encodeFilename(temp_filename), encodeFilename(filename))
|
||||
|
||||
@@ -52,7 +53,12 @@ class EmbedThumbnailPP(FFmpegPostProcessor):
|
||||
if not check_executable('AtomicParsley', ['-v']):
|
||||
raise EmbedThumbnailPPError('AtomicParsley was not found. Please install.')
|
||||
|
||||
cmd = ['AtomicParsley', filename, '--artwork', temp_thumbnail, '-o', temp_filename]
|
||||
cmd = [encodeFilename('AtomicParsley', True),
|
||||
encodeFilename(filename, True),
|
||||
encodeArgument('--artwork'),
|
||||
encodeFilename(thumbnail_filename, True),
|
||||
encodeArgument('-o'),
|
||||
encodeFilename(temp_filename, True)]
|
||||
|
||||
self._downloader.to_screen('[atomicparsley] Adding thumbnail to "%s"' % filename)
|
||||
|
||||
@@ -66,7 +72,8 @@ class EmbedThumbnailPP(FFmpegPostProcessor):
|
||||
msg = stderr.decode('utf-8', 'replace').strip()
|
||||
raise EmbedThumbnailPPError(msg)
|
||||
|
||||
os.remove(encodeFilename(temp_thumbnail))
|
||||
if not self._already_have_thumbnail:
|
||||
os.remove(encodeFilename(thumbnail_filename))
|
||||
# for formats that don't support thumbnails (like 3gp) AtomicParsley
|
||||
# won't create to the temporary file
|
||||
if b'No changes' in stdout:
|
||||
|
||||
@@ -8,8 +8,8 @@ from ..utils import PostProcessingError
|
||||
|
||||
|
||||
class ExecAfterDownloadPP(PostProcessor):
|
||||
def __init__(self, downloader=None, verboseOutput=None, exec_cmd=None):
|
||||
self.verboseOutput = verboseOutput
|
||||
def __init__(self, downloader, exec_cmd):
|
||||
super(ExecAfterDownloadPP, self).__init__(downloader)
|
||||
self.exec_cmd = exec_cmd
|
||||
|
||||
def run(self, information):
|
||||
|
||||
@@ -591,6 +591,23 @@ class FFmpegMergerPP(FFmpegPostProcessor):
|
||||
os.rename(encodeFilename(temp_filename), encodeFilename(filename))
|
||||
return info['__files_to_merge'], info
|
||||
|
||||
def can_merge(self):
|
||||
# TODO: figure out merge-capable ffmpeg version
|
||||
if self.basename != 'avconv':
|
||||
return True
|
||||
|
||||
required_version = '10-0'
|
||||
if is_outdated_version(
|
||||
self._versions[self.basename], required_version):
|
||||
warning = ('Your copy of %s is outdated and unable to properly mux separate video and audio files, '
|
||||
'youtube-dl will download single file media. '
|
||||
'Update %s to version %s or newer to fix this.') % (
|
||||
self.basename, self.basename, required_version)
|
||||
if self._downloader:
|
||||
self._downloader.report_warning(warning)
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
class FFmpegFixupStretchedPP(FFmpegPostProcessor):
|
||||
def run(self, info):
|
||||
|
||||
@@ -3,18 +3,34 @@ from __future__ import unicode_literals
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
import errno
|
||||
|
||||
from .common import PostProcessor
|
||||
from ..compat import (
|
||||
subprocess_check_output
|
||||
)
|
||||
from ..utils import (
|
||||
check_executable,
|
||||
hyphenate_date,
|
||||
version_tuple,
|
||||
PostProcessingError,
|
||||
encodeArgument,
|
||||
encodeFilename,
|
||||
)
|
||||
|
||||
|
||||
class XAttrMetadataError(PostProcessingError):
|
||||
def __init__(self, code=None, msg='Unknown error'):
|
||||
super(XAttrMetadataError, self).__init__(msg)
|
||||
self.code = code
|
||||
|
||||
# Parsing code and msg
|
||||
if (self.code in (errno.ENOSPC, errno.EDQUOT) or
|
||||
'No space left' in self.msg or 'Disk quota excedded' in self.msg):
|
||||
self.reason = 'NO_SPACE'
|
||||
elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
|
||||
self.reason = 'VALUE_TOO_LONG'
|
||||
else:
|
||||
self.reason = 'NOT_SUPPORTED'
|
||||
|
||||
|
||||
class XAttrMetadataPP(PostProcessor):
|
||||
|
||||
#
|
||||
@@ -51,7 +67,10 @@ class XAttrMetadataPP(PostProcessor):
|
||||
raise ImportError
|
||||
|
||||
def write_xattr(path, key, value):
|
||||
return xattr.setxattr(path, key, value)
|
||||
try:
|
||||
xattr.set(path, key, value)
|
||||
except EnvironmentError as e:
|
||||
raise XAttrMetadataError(e.errno, e.strerror)
|
||||
|
||||
except ImportError:
|
||||
if os.name == 'nt':
|
||||
@@ -62,8 +81,11 @@ class XAttrMetadataPP(PostProcessor):
|
||||
assert os.path.exists(path)
|
||||
|
||||
ads_fn = path + ":" + key
|
||||
with open(ads_fn, "wb") as f:
|
||||
f.write(value)
|
||||
try:
|
||||
with open(ads_fn, "wb") as f:
|
||||
f.write(value)
|
||||
except EnvironmentError as e:
|
||||
raise XAttrMetadataError(e.errno, e.strerror)
|
||||
else:
|
||||
user_has_setfattr = check_executable("setfattr", ['--version'])
|
||||
user_has_xattr = check_executable("xattr", ['-h'])
|
||||
@@ -71,12 +93,27 @@ class XAttrMetadataPP(PostProcessor):
|
||||
if user_has_setfattr or user_has_xattr:
|
||||
|
||||
def write_xattr(path, key, value):
|
||||
value = value.decode('utf-8')
|
||||
if user_has_setfattr:
|
||||
cmd = ['setfattr', '-n', key, '-v', value, path]
|
||||
executable = 'setfattr'
|
||||
opts = ['-n', key, '-v', value]
|
||||
elif user_has_xattr:
|
||||
cmd = ['xattr', '-w', key, value, path]
|
||||
executable = 'xattr'
|
||||
opts = ['-w', key, value]
|
||||
|
||||
subprocess_check_output(cmd)
|
||||
cmd = ([encodeFilename(executable, True)] +
|
||||
[encodeArgument(o) for o in opts] +
|
||||
[encodeFilename(path, True)])
|
||||
|
||||
try:
|
||||
p = subprocess.Popen(
|
||||
cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
|
||||
except EnvironmentError as e:
|
||||
raise XAttrMetadataError(e.errno, e.strerror)
|
||||
stdout, stderr = p.communicate()
|
||||
stderr = stderr.decode('utf-8', 'replace')
|
||||
if p.returncode != 0:
|
||||
raise XAttrMetadataError(p.returncode, stderr)
|
||||
|
||||
else:
|
||||
# On Unix, and can't find pyxattr, setfattr, or xattr.
|
||||
@@ -121,6 +158,19 @@ class XAttrMetadataPP(PostProcessor):
|
||||
|
||||
return [], info
|
||||
|
||||
except (subprocess.CalledProcessError, OSError):
|
||||
self._downloader.report_error("This filesystem doesn't support extended attributes. (You may have to enable them in your /etc/fstab)")
|
||||
except XAttrMetadataError as e:
|
||||
if e.reason == 'NO_SPACE':
|
||||
self._downloader.report_warning(
|
||||
'There\'s no disk space left or disk quota exceeded. ' +
|
||||
'Extended attributes are not written.')
|
||||
elif e.reason == 'VALUE_TOO_LONG':
|
||||
self._downloader.report_warning(
|
||||
'Unable to write extended attributes due to too long values.')
|
||||
else:
|
||||
msg = 'This filesystem doesn\'t support extended attributes. '
|
||||
if os.name == 'nt':
|
||||
msg += 'You need to use NTFS.'
|
||||
else:
|
||||
msg += '(You may have to enable them in your /etc/fstab)'
|
||||
self._downloader.report_error(msg)
|
||||
return [], info
|
||||
|
||||
@@ -327,13 +327,6 @@ def sanitize_path(s):
|
||||
return os.path.join(*sanitized_path)
|
||||
|
||||
|
||||
def sanitize_url_path_consecutive_slashes(url):
|
||||
"""Collapses consecutive slashes in URLs' path"""
|
||||
parsed_url = list(compat_urlparse.urlparse(url))
|
||||
parsed_url[2] = re.sub(r'/{2,}', '/', parsed_url[2])
|
||||
return compat_urlparse.urlunparse(parsed_url)
|
||||
|
||||
|
||||
def orderedSet(iterable):
|
||||
""" Remove all duplicates from the input iterable """
|
||||
res = []
|
||||
@@ -1380,7 +1373,7 @@ def get_exe_version(exe, args=['--version'],
|
||||
or False if the executable is not present """
|
||||
try:
|
||||
out, _ = subprocess.Popen(
|
||||
[exe] + args,
|
||||
[encodeArgument(exe)] + args,
|
||||
stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
|
||||
except OSError:
|
||||
return False
|
||||
@@ -1486,6 +1479,14 @@ def uppercase_escape(s):
|
||||
s)
|
||||
|
||||
|
||||
def lowercase_escape(s):
|
||||
unicode_escape = codecs.getdecoder('unicode_escape')
|
||||
return re.sub(
|
||||
r'\\u[0-9a-fA-F]{4}',
|
||||
lambda m: unicode_escape(m.group(0))[0],
|
||||
s)
|
||||
|
||||
|
||||
def escape_rfc3986(s):
|
||||
"""Escape non-ASCII characters as suggested by RFC 3986"""
|
||||
if sys.version_info < (3, 0) and isinstance(s, compat_str):
|
||||
@@ -1834,12 +1835,8 @@ def parse_dfxp_time_expr(time_expr):
|
||||
return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3))
|
||||
|
||||
|
||||
def format_srt_time(seconds):
|
||||
(mins, secs) = divmod(seconds, 60)
|
||||
(hours, mins) = divmod(mins, 60)
|
||||
millisecs = (secs - int(secs)) * 1000
|
||||
secs = int(secs)
|
||||
return '%02d:%02d:%02d,%03d' % (hours, mins, secs, millisecs)
|
||||
def srt_subtitles_timecode(seconds):
|
||||
return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
|
||||
|
||||
|
||||
def dfxp2srt(dfxp_data):
|
||||
@@ -1865,10 +1862,14 @@ def dfxp2srt(dfxp_data):
|
||||
paras = dfxp.findall(_x('.//ttml:p'))
|
||||
|
||||
for para, index in zip(paras, itertools.count(1)):
|
||||
begin_time = parse_dfxp_time_expr(para.attrib['begin'])
|
||||
end_time = parse_dfxp_time_expr(para.attrib.get('end'))
|
||||
if not end_time:
|
||||
end_time = begin_time + parse_dfxp_time_expr(para.attrib['dur'])
|
||||
out.append('%d\n%s --> %s\n%s\n\n' % (
|
||||
index,
|
||||
format_srt_time(parse_dfxp_time_expr(para.attrib.get('begin'))),
|
||||
format_srt_time(parse_dfxp_time_expr(para.attrib.get('end'))),
|
||||
srt_subtitles_timecode(begin_time),
|
||||
srt_subtitles_timecode(end_time),
|
||||
parse_node(para)))
|
||||
|
||||
return ''.join(out)
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = '2015.05.03'
|
||||
__version__ = '2015.05.15'
|
||||
|
||||
Reference in New Issue
Block a user