mirror of
https://github.com/ytdl-org/youtube-dl.git
synced 2025-12-24 17:00:30 +01:00
Compare commits
216 Commits
2015.04.28
...
2015.05.15
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c4fc559f45 | ||
|
|
2bc4330303 | ||
|
|
12675275a1 | ||
|
|
3a105f7b20 | ||
|
|
1ae72fb23d | ||
|
|
7ec676bb3d | ||
|
|
29ea57283e | ||
|
|
5488973961 | ||
|
|
96d45a5489 | ||
|
|
7a012d5a16 | ||
|
|
fa6a16996e | ||
|
|
82245a6de7 | ||
|
|
ff28ede2d1 | ||
|
|
98b8ec8616 | ||
|
|
88f9d8748c | ||
|
|
7d57d2e18b | ||
|
|
38caa00d18 | ||
|
|
c827d4cfdb | ||
|
|
509c630db8 | ||
|
|
fbff30d2db | ||
|
|
86c7fdb17c | ||
|
|
62bd6589c7 | ||
|
|
2cc6d13547 | ||
|
|
bb8ca1d112 | ||
|
|
8e59539752 | ||
|
|
372744c544 | ||
|
|
83880949a1 | ||
|
|
3749e36e9f | ||
|
|
0b4253fa37 | ||
|
|
86ec1e487c | ||
|
|
fd4eefed39 | ||
|
|
b480e7874b | ||
|
|
41333b97b9 | ||
|
|
c1c924abfe | ||
|
|
1c7e2e64f6 | ||
|
|
7dff03636a | ||
|
|
5332fd91bf | ||
|
|
d4b963d0a6 | ||
|
|
6d3f5935e5 | ||
|
|
968ee17677 | ||
|
|
81ed3bb9c0 | ||
|
|
5115652828 | ||
|
|
1f92865494 | ||
|
|
e41f450f28 | ||
|
|
97fcf1bbd0 | ||
|
|
13763ce599 | ||
|
|
7fcb605b82 | ||
|
|
70484b9f8a | ||
|
|
69b46b3d95 | ||
|
|
95c5534f8e | ||
|
|
370b39e8ec | ||
|
|
3da8038918 | ||
|
|
a6762c4a22 | ||
|
|
98c2c0febc | ||
|
|
63cbd19f50 | ||
|
|
1934f3a0ea | ||
|
|
a909e6ad43 | ||
|
|
1dcb52188d | ||
|
|
28ebef0b1b | ||
|
|
f03a8a3c4e | ||
|
|
03f760b1c0 | ||
|
|
f900dc3fb9 | ||
|
|
95eb1adda8 | ||
|
|
c6ddbdb66c | ||
|
|
3800b908b1 | ||
|
|
69fe3a5f09 | ||
|
|
754270313a | ||
|
|
057ebeaca3 | ||
|
|
480065172d | ||
|
|
f2e0056579 | ||
|
|
32fffff2cc | ||
|
|
3c47824d6b | ||
|
|
0892090a56 | ||
|
|
d592b42f5c | ||
|
|
3b5f65a64c | ||
|
|
5c0b2c16a8 | ||
|
|
d39e0f05db | ||
|
|
6d14d08e06 | ||
|
|
32060c6d6b | ||
|
|
3dbec410a0 | ||
|
|
de765f6c31 | ||
|
|
dc455a5f88 | ||
|
|
bab19a8e91 | ||
|
|
322915014f | ||
|
|
79998cd5af | ||
|
|
50b9013064 | ||
|
|
bb03fdae0d | ||
|
|
4384cf9e7d | ||
|
|
d47e980d0d | ||
|
|
fe373287eb | ||
|
|
cbe443362f | ||
|
|
2c0c9dc46c | ||
|
|
0ceab84749 | ||
|
|
34e7dc81a9 | ||
|
|
4e6e9d21bd | ||
|
|
d1feb30811 | ||
|
|
43837189c1 | ||
|
|
249962ffa2 | ||
|
|
541168039d | ||
|
|
7ef00afe9d | ||
|
|
156fc83a55 | ||
|
|
46be82b811 | ||
|
|
09b412dafa | ||
|
|
5268a05e47 | ||
|
|
406224be52 | ||
|
|
3799834dcf | ||
|
|
553e412bda | ||
|
|
f22834a372 | ||
|
|
bd349a8704 | ||
|
|
bc08873cff | ||
|
|
aafe273990 | ||
|
|
c09593c04e | ||
|
|
84bf31aaf8 | ||
|
|
05d5392cda | ||
|
|
d9a743d917 | ||
|
|
ac6c358c2a | ||
|
|
ad0c0ad3b4 | ||
|
|
1ed34f3dd6 | ||
|
|
6a8f9cd22e | ||
|
|
e8b9ab8957 | ||
|
|
74f728249f | ||
|
|
d6a1738892 | ||
|
|
b326b07adc | ||
|
|
07d2921c6d | ||
|
|
22e462c97a | ||
|
|
dcf8077906 | ||
|
|
3408f6e64a | ||
|
|
e10dc0e1f0 | ||
|
|
ce5c1ae517 | ||
|
|
bbe718c97f | ||
|
|
01e4b1ee14 | ||
|
|
815ac0293e | ||
|
|
6568382d6f | ||
|
|
f943b7ddce | ||
|
|
ff9d68e7be | ||
|
|
7212560f4d | ||
|
|
1aa43d77c0 | ||
|
|
e038d5c4e3 | ||
|
|
dfad3aac98 | ||
|
|
df8418ffcf | ||
|
|
50aa43b3ae | ||
|
|
a90552663e | ||
|
|
883340c107 | ||
|
|
0fe2ff78e6 | ||
|
|
dc1eed93be | ||
|
|
b2f82360d7 | ||
|
|
782e0568ef | ||
|
|
90b4b0eabe | ||
|
|
cec04ef3a6 | ||
|
|
71fa56b887 | ||
|
|
b9b3ab45ea | ||
|
|
957b794c26 | ||
|
|
8001607e90 | ||
|
|
3e7202c1bc | ||
|
|
848edeab89 | ||
|
|
1748d67aea | ||
|
|
5477ca8239 | ||
|
|
d0fd305023 | ||
|
|
8dab1e9072 | ||
|
|
963aea5279 | ||
|
|
0a64aa7355 | ||
|
|
0669c89c55 | ||
|
|
2699da8041 | ||
|
|
98727e123f | ||
|
|
b29e0000e6 | ||
|
|
b3ed15b760 | ||
|
|
666a9a2b95 | ||
|
|
a4bcaad773 | ||
|
|
e65e4c8874 | ||
|
|
21f6330274 | ||
|
|
38c6902b90 | ||
|
|
2ddcd88129 | ||
|
|
dd8920653c | ||
|
|
c938c35f95 | ||
|
|
2eb0192155 | ||
|
|
d948e09b61 | ||
|
|
89966a5aea | ||
|
|
8e3df9dfee | ||
|
|
5890eef6b0 | ||
|
|
083c1bb960 | ||
|
|
861e65eb05 | ||
|
|
650cfd0cb0 | ||
|
|
e68ae99a41 | ||
|
|
8683b4d8d9 | ||
|
|
1dbd717eb4 | ||
|
|
6a8422b942 | ||
|
|
cb202fd286 | ||
|
|
67fc8ecd53 | ||
|
|
df8301fef5 | ||
|
|
4070b458ec | ||
|
|
ffbc3901d2 | ||
|
|
7a03280df4 | ||
|
|
482a1258de | ||
|
|
cd298882cd | ||
|
|
e01c56f9e1 | ||
|
|
4d72df4031 | ||
|
|
f7f1df1d82 | ||
|
|
c4a21bc9db | ||
|
|
621ffe7bf4 | ||
|
|
8dd5418803 | ||
|
|
965cb8d530 | ||
|
|
b2e8e7dab5 | ||
|
|
59d814f793 | ||
|
|
bb865f3a5e | ||
|
|
9ee53a49f0 | ||
|
|
79adb09baa | ||
|
|
cf0649f8b7 | ||
|
|
f8690631e2 | ||
|
|
5456d78f0c | ||
|
|
cbbece96a2 | ||
|
|
9d8ba307ef | ||
|
|
ec7c1e85e0 | ||
|
|
e70c7568c0 | ||
|
|
39b62db116 | ||
|
|
2edce52584 | ||
|
|
10831b5ec9 |
@@ -133,7 +133,7 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
--no-mtime Do not use the Last-modified header to set the file modification time
|
||||
--write-description Write video description to a .description file
|
||||
--write-info-json Write video metadata to a .info.json file
|
||||
--write-annotations Write video annotations to a .annotation file
|
||||
--write-annotations Write video annotations to a .annotations.xml file
|
||||
--load-info FILE JSON file containing the video information (created with the "--write-info-json" option)
|
||||
--cookies FILE File to read cookies from and dump cookie jar in
|
||||
--cache-dir DIR Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl
|
||||
@@ -216,7 +216,7 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
--recode-video FORMAT Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm|mkv)
|
||||
-k, --keep-video Keep the video file on disk after the post-processing; the video is erased by default
|
||||
--no-post-overwrites Do not overwrite post-processed files; the post-processed files are overwritten by default
|
||||
--embed-subs Embed subtitles in the video (only for mp4 videos)
|
||||
--embed-subs Embed subtitles in the video (only for mkv and mp4 videos)
|
||||
--embed-thumbnail Embed thumbnail in the audio as cover art
|
||||
--add-metadata Write metadata to the video file
|
||||
--metadata-from-title FORMAT Parse additional metadata like song title / artist from the video title. The format syntax is the same as --output, the parsed
|
||||
@@ -269,7 +269,7 @@ The simplest case is requesting a specific format, for example `-f 22`. You can
|
||||
|
||||
If you want to download multiple videos and they don't have the same formats available, you can specify the order of preference using slashes, as in `-f 22/17/18`. You can also filter the video results by putting a condition in brackets, as in `-f "best[height=720]"` (or `-f "[filesize>10M]"`). This works for filesize, height, width, tbr, abr, vbr, asr, and fps and the comparisons <, <=, >, >=, =, != and for ext, acodec, vcodec, container, and protocol and the comparisons =, != . Formats for which the value is not known are excluded unless you put a question mark (?) after the operator. You can combine format filters, so `-f "[height <=? 720][tbr>500]"` selects up to 720p videos (or videos where the height is not known) with a bitrate of at least 500 KBit/s. Use commas to download multiple formats, such as `-f 136/137/mp4/bestvideo,140/m4a/bestaudio`. You can merge the video and audio of two formats into a single file using `-f <video-format>+<audio-format>` (requires ffmpeg or avconv), for example `-f bestvideo+bestaudio`.
|
||||
|
||||
Since the end of April 2015 and version 2015.04.26 youtube-dl uses `-f bestvideo+bestaudio/best` as default format selection (see #5447, #5456). If ffmpeg or avconv are installed this results in downloading `bestvideo` and `bestaudio` separately and muxing them together into a single file giving the best overall quality available. Otherwise it falls back to `best` and results in downloading best available quality served as a single file. `best` is also needed for videos that don't come from YouTube because they don't provide the audio and video in two different files. If you want to only download some dash formats (for example if you are not interested in getting videos with a resolution higher than 1080p), you can add `-f bestvideo[height<=?1080]+bestaudio/best` to your configuration file.
|
||||
Since the end of April 2015 and version 2015.04.26 youtube-dl uses `-f bestvideo+bestaudio/best` as default format selection (see #5447, #5456). If ffmpeg or avconv are installed this results in downloading `bestvideo` and `bestaudio` separately and muxing them together into a single file giving the best overall quality available. Otherwise it falls back to `best` and results in downloading best available quality served as a single file. `best` is also needed for videos that don't come from YouTube because they don't provide the audio and video in two different files. If you want to only download some dash formats (for example if you are not interested in getting videos with a resolution higher than 1080p), you can add `-f bestvideo[height<=?1080]+bestaudio/best` to your configuration file. Note that if you use youtube-dl to stream to `stdout` (and most likely to pipe it to your media player then), i.e. you explicitly specify output template as `-o -`, youtube-dl still uses `-f best` format selection in order to start content delivery immediately to your player and not to wait until `bestvideo` and `bestaudio` are downloaded and muxed.
|
||||
|
||||
If you want to preserve the old format selection behavior (prior to youtube-dl 2015.04.26), i.e. you want to download best available quality media served as a single file, you should explicitly specify your choice with `-f best`. You may want to add it to the [configuration file](#configuration) in order not to type it every time you run youtube-dl.
|
||||
|
||||
|
||||
@@ -44,6 +44,7 @@
|
||||
- **audiomack**
|
||||
- **audiomack:album**
|
||||
- **Azubu**
|
||||
- **BaiduVideo**
|
||||
- **bambuser**
|
||||
- **bambuser:channel**
|
||||
- **Bandcamp**
|
||||
@@ -63,6 +64,8 @@
|
||||
- **BR**: Bayerischer Rundfunk Mediathek
|
||||
- **Break**
|
||||
- **Brightcove**
|
||||
- **bt:article**: Bergens Tidende Articles
|
||||
- **bt:vestlendingen**: Bergens Tidende - Vestlendingen
|
||||
- **BuzzFeed**
|
||||
- **BYUtv**
|
||||
- **Camdemy**
|
||||
@@ -155,6 +158,7 @@
|
||||
- **FootyRoom**
|
||||
- **Foxgay**
|
||||
- **FoxNews**
|
||||
- **FoxSports**
|
||||
- **france2.fr:generation-quoi**
|
||||
- **FranceCulture**
|
||||
- **FranceInter**
|
||||
@@ -184,7 +188,6 @@
|
||||
- **Golem**
|
||||
- **GorillaVid**: GorillaVid.in, daclips.in, movpod.in, fastvideo.in and realvid.net
|
||||
- **Goshgay**
|
||||
- **Grooveshark**
|
||||
- **Groupon**
|
||||
- **Hark**
|
||||
- **HearThisAt**
|
||||
@@ -239,6 +242,7 @@
|
||||
- **LetvPlaylist**
|
||||
- **LetvTv**
|
||||
- **Libsyn**
|
||||
- **life:embed**
|
||||
- **lifenews**: LIFE | NEWS
|
||||
- **LiveLeak**
|
||||
- **livestream**
|
||||
@@ -287,6 +291,7 @@
|
||||
- **MySpass**
|
||||
- **myvideo**
|
||||
- **MyVidster**
|
||||
- **N-JOY**
|
||||
- **n-tv.de**
|
||||
- **NationalGeographic**
|
||||
- **Naver**
|
||||
@@ -327,6 +332,7 @@
|
||||
- **ntv.ru**
|
||||
- **Nuvid**
|
||||
- **NYTimes**
|
||||
- **NYTimesArticle**
|
||||
- **ocw.mit.edu**
|
||||
- **Odnoklassniki**
|
||||
- **OktoberfestTV**
|
||||
@@ -363,9 +369,10 @@
|
||||
- **prosiebensat1**: ProSiebenSat.1 Digital
|
||||
- **Puls4**
|
||||
- **Pyvideo**
|
||||
- **QQMusic**
|
||||
- **QQMusicAlbum**
|
||||
- **QQMusicSinger**
|
||||
- **qqmusic**
|
||||
- **qqmusic:album**
|
||||
- **qqmusic:singer**
|
||||
- **qqmusic:toplist**
|
||||
- **QuickVid**
|
||||
- **R7**
|
||||
- **radio.de**
|
||||
@@ -433,6 +440,8 @@
|
||||
- **southpark.cc.com**
|
||||
- **southpark.cc.com:español**
|
||||
- **southpark.de**
|
||||
- **southpark.nl**
|
||||
- **southparkstudios.dk**
|
||||
- **Space**
|
||||
- **SpankBang**
|
||||
- **Spankwire**
|
||||
@@ -452,6 +461,7 @@
|
||||
- **StreamCZ**
|
||||
- **StreetVoice**
|
||||
- **SunPorno**
|
||||
- **SVT**
|
||||
- **SVTPlay**: SVT Play and Öppet arkiv
|
||||
- **SWRMediathek**
|
||||
- **Syfy**
|
||||
@@ -485,6 +495,7 @@
|
||||
- **tlc.com**
|
||||
- **tlc.de**
|
||||
- **TMZ**
|
||||
- **TMZArticle**
|
||||
- **TNAFlix**
|
||||
- **tou.tv**
|
||||
- **Toypics**: Toypics user profile
|
||||
@@ -528,7 +539,7 @@
|
||||
- **Vessel**
|
||||
- **Vesti**: Вести.Ru
|
||||
- **Vevo**
|
||||
- **VGTV**
|
||||
- **VGTV**: VGTV and BTTV
|
||||
- **vh1.com**
|
||||
- **Vice**
|
||||
- **Viddler**
|
||||
@@ -562,6 +573,7 @@
|
||||
- **vk.com**
|
||||
- **vk.com:user-videos**: vk.com:All of a user's videos
|
||||
- **Vodlocker**
|
||||
- **VoiceRepublic**
|
||||
- **Vporn**
|
||||
- **VRT**
|
||||
- **vube**: Vube.com
|
||||
@@ -586,6 +598,7 @@
|
||||
- **XHamster**
|
||||
- **XMinus**
|
||||
- **XNXX**
|
||||
- **Xstream**
|
||||
- **XTube**
|
||||
- **XTubeUser**: XTube user profile
|
||||
- **Xuite**
|
||||
|
||||
@@ -237,7 +237,7 @@ class TestFormatSelection(unittest.TestCase):
|
||||
f2['url'] = 'url:' + f2id
|
||||
|
||||
info_dict = _make_result([f1, f2], extractor='youtube')
|
||||
ydl = YDL()
|
||||
ydl = YDL({'format': 'best/bestvideo'})
|
||||
yie = YoutubeIE(ydl)
|
||||
yie._sort_formats(info_dict['formats'])
|
||||
ydl.process_ie_result(info_dict)
|
||||
@@ -245,7 +245,7 @@ class TestFormatSelection(unittest.TestCase):
|
||||
self.assertEqual(downloaded['format_id'], f1id)
|
||||
|
||||
info_dict = _make_result([f2, f1], extractor='youtube')
|
||||
ydl = YDL()
|
||||
ydl = YDL({'format': 'best/bestvideo'})
|
||||
yie = YoutubeIE(ydl)
|
||||
yie._sort_formats(info_dict['formats'])
|
||||
ydl.process_ie_result(info_dict)
|
||||
|
||||
@@ -40,7 +40,8 @@ from youtube_dl.utils import (
|
||||
read_batch_urls,
|
||||
sanitize_filename,
|
||||
sanitize_path,
|
||||
sanitize_url_path_consecutive_slashes,
|
||||
prepend_extension,
|
||||
replace_extension,
|
||||
shell_quote,
|
||||
smuggle_url,
|
||||
str_to_int,
|
||||
@@ -51,6 +52,7 @@ from youtube_dl.utils import (
|
||||
unified_strdate,
|
||||
unsmuggle_url,
|
||||
uppercase_escape,
|
||||
lowercase_escape,
|
||||
url_basename,
|
||||
urlencode_postdata,
|
||||
version_tuple,
|
||||
@@ -173,25 +175,21 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(sanitize_path('./abc'), 'abc')
|
||||
self.assertEqual(sanitize_path('./../abc'), '..\\abc')
|
||||
|
||||
def test_sanitize_url_path_consecutive_slashes(self):
|
||||
self.assertEqual(
|
||||
sanitize_url_path_consecutive_slashes('http://hostname/foo//bar/filename.html'),
|
||||
'http://hostname/foo/bar/filename.html')
|
||||
self.assertEqual(
|
||||
sanitize_url_path_consecutive_slashes('http://hostname//foo/bar/filename.html'),
|
||||
'http://hostname/foo/bar/filename.html')
|
||||
self.assertEqual(
|
||||
sanitize_url_path_consecutive_slashes('http://hostname//'),
|
||||
'http://hostname/')
|
||||
self.assertEqual(
|
||||
sanitize_url_path_consecutive_slashes('http://hostname/foo/bar/filename.html'),
|
||||
'http://hostname/foo/bar/filename.html')
|
||||
self.assertEqual(
|
||||
sanitize_url_path_consecutive_slashes('http://hostname/'),
|
||||
'http://hostname/')
|
||||
self.assertEqual(
|
||||
sanitize_url_path_consecutive_slashes('http://hostname/abc//'),
|
||||
'http://hostname/abc/')
|
||||
def test_prepend_extension(self):
|
||||
self.assertEqual(prepend_extension('abc.ext', 'temp'), 'abc.temp.ext')
|
||||
self.assertEqual(prepend_extension('abc.ext', 'temp', 'ext'), 'abc.temp.ext')
|
||||
self.assertEqual(prepend_extension('abc.unexpected_ext', 'temp', 'ext'), 'abc.unexpected_ext.temp')
|
||||
self.assertEqual(prepend_extension('abc', 'temp'), 'abc.temp')
|
||||
self.assertEqual(prepend_extension('.abc', 'temp'), '.abc.temp')
|
||||
self.assertEqual(prepend_extension('.abc.ext', 'temp'), '.abc.temp.ext')
|
||||
|
||||
def test_replace_extension(self):
|
||||
self.assertEqual(replace_extension('abc.ext', 'temp'), 'abc.temp')
|
||||
self.assertEqual(replace_extension('abc.ext', 'temp', 'ext'), 'abc.temp')
|
||||
self.assertEqual(replace_extension('abc.unexpected_ext', 'temp', 'ext'), 'abc.unexpected_ext.temp')
|
||||
self.assertEqual(replace_extension('abc', 'temp'), 'abc.temp')
|
||||
self.assertEqual(replace_extension('.abc', 'temp'), '.abc.temp')
|
||||
self.assertEqual(replace_extension('.abc.ext', 'temp'), '.abc.temp')
|
||||
|
||||
def test_ordered_set(self):
|
||||
self.assertEqual(orderedSet([1, 1, 2, 3, 4, 4, 5, 6, 7, 3, 5]), [1, 2, 3, 4, 5, 6, 7])
|
||||
@@ -400,6 +398,10 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(uppercase_escape('aä'), 'aä')
|
||||
self.assertEqual(uppercase_escape('\\U0001d550'), '𝕐')
|
||||
|
||||
def test_lowercase_escape(self):
|
||||
self.assertEqual(lowercase_escape('aä'), 'aä')
|
||||
self.assertEqual(lowercase_escape('\\u0026'), '&')
|
||||
|
||||
def test_limit_length(self):
|
||||
self.assertEqual(limit_length(None, 12), None)
|
||||
self.assertEqual(limit_length('foo', 12), 'foo')
|
||||
@@ -598,7 +600,7 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4')
|
||||
<div xml:lang="en">
|
||||
<p begin="0" end="1">The following line contains Chinese characters and special symbols</p>
|
||||
<p begin="1" end="2">第二行<br/>♪♪</p>
|
||||
<p begin="2" end="3"><span>Third<br/>Line</span></p>
|
||||
<p begin="2" dur="1"><span>Third<br/>Line</span></p>
|
||||
</div>
|
||||
</body>
|
||||
</tt>'''
|
||||
|
||||
@@ -71,6 +71,7 @@ from .utils import (
|
||||
write_string,
|
||||
YoutubeDLHandler,
|
||||
prepend_extension,
|
||||
replace_extension,
|
||||
args_to_str,
|
||||
age_restricted,
|
||||
)
|
||||
@@ -259,7 +260,6 @@ class YoutubeDL(object):
|
||||
The following options are used by the post processors:
|
||||
prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
|
||||
otherwise prefer avconv.
|
||||
exec_cmd: Arbitrary command to run after downloading
|
||||
"""
|
||||
|
||||
params = None
|
||||
@@ -914,15 +914,16 @@ class YoutubeDL(object):
|
||||
if not available_formats:
|
||||
return None
|
||||
|
||||
if format_spec == 'best' or format_spec is None:
|
||||
return available_formats[-1]
|
||||
elif format_spec == 'worst':
|
||||
if format_spec in ['best', 'worst', None]:
|
||||
format_idx = 0 if format_spec == 'worst' else -1
|
||||
audiovideo_formats = [
|
||||
f for f in available_formats
|
||||
if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
|
||||
if audiovideo_formats:
|
||||
return audiovideo_formats[0]
|
||||
return available_formats[0]
|
||||
return audiovideo_formats[format_idx]
|
||||
# for audio only urls, select the best/worst audio format
|
||||
elif all(f.get('acodec') != 'none' for f in available_formats):
|
||||
return available_formats[format_idx]
|
||||
elif format_spec == 'bestaudio':
|
||||
audio_formats = [
|
||||
f for f in available_formats
|
||||
@@ -1084,8 +1085,11 @@ class YoutubeDL(object):
|
||||
req_format = self.params.get('format')
|
||||
if req_format is None:
|
||||
req_format_list = []
|
||||
if info_dict['extractor'] in ['youtube', 'ted'] and FFmpegMergerPP(self).available:
|
||||
req_format_list.append('bestvideo+bestaudio')
|
||||
if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
|
||||
info_dict['extractor'] in ['youtube', 'ted']):
|
||||
merger = FFmpegMergerPP(self)
|
||||
if merger.available and merger.can_merge():
|
||||
req_format_list.append('bestvideo+bestaudio')
|
||||
req_format_list.append('best')
|
||||
req_format = '/'.join(req_format_list)
|
||||
formats_to_download = []
|
||||
@@ -1269,7 +1273,7 @@ class YoutubeDL(object):
|
||||
return
|
||||
|
||||
if self.params.get('writedescription', False):
|
||||
descfn = filename + '.description'
|
||||
descfn = replace_extension(filename, 'description', info_dict.get('ext'))
|
||||
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
|
||||
self.to_screen('[info] Video description is already present')
|
||||
elif info_dict.get('description') is None:
|
||||
@@ -1284,7 +1288,7 @@ class YoutubeDL(object):
|
||||
return
|
||||
|
||||
if self.params.get('writeannotations', False):
|
||||
annofn = filename + '.annotations.xml'
|
||||
annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
|
||||
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
|
||||
self.to_screen('[info] Video annotations are already present')
|
||||
else:
|
||||
@@ -1331,13 +1335,13 @@ class YoutubeDL(object):
|
||||
return
|
||||
|
||||
if self.params.get('writeinfojson', False):
|
||||
infofn = os.path.splitext(filename)[0] + '.info.json'
|
||||
infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
|
||||
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
|
||||
self.to_screen('[info] Video description metadata is already present')
|
||||
else:
|
||||
self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
|
||||
try:
|
||||
write_json_file(info_dict, infofn)
|
||||
write_json_file(self.filter_requested_info(info_dict), infofn)
|
||||
except (OSError, IOError):
|
||||
self.report_error('Cannot write metadata to JSON file ' + infofn)
|
||||
return
|
||||
@@ -1381,11 +1385,18 @@ class YoutubeDL(object):
|
||||
# TODO: Check acodec/vcodec
|
||||
return False
|
||||
|
||||
filename_real_ext = os.path.splitext(filename)[1][1:]
|
||||
filename_wo_ext = (
|
||||
os.path.splitext(filename)[0]
|
||||
if filename_real_ext == info_dict['ext']
|
||||
else filename)
|
||||
requested_formats = info_dict['requested_formats']
|
||||
if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
|
||||
filename = os.path.splitext(filename)[0] + '.mkv'
|
||||
self.report_warning('You have requested formats uncompatible for merge. '
|
||||
info_dict['ext'] = 'mkv'
|
||||
self.report_warning('You have requested formats incompatible for merge. '
|
||||
'The formats will be merged into mkv')
|
||||
# Ensure filename always has a correct extension for successful merge
|
||||
filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
|
||||
if os.path.exists(encodeFilename(filename)):
|
||||
self.to_screen(
|
||||
'[download] %s has already been downloaded and '
|
||||
@@ -1395,7 +1406,7 @@ class YoutubeDL(object):
|
||||
new_info = dict(info_dict)
|
||||
new_info.update(f)
|
||||
fname = self.prepare_filename(new_info)
|
||||
fname = prepend_extension(fname, 'f%s' % f['format_id'])
|
||||
fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext'])
|
||||
downloaded.append(fname)
|
||||
partial_success = dl(fname, new_info)
|
||||
success = success and partial_success
|
||||
@@ -1487,7 +1498,7 @@ class YoutubeDL(object):
|
||||
[info_filename], mode='r',
|
||||
openhook=fileinput.hook_encoded('utf-8'))) as f:
|
||||
# FileInput doesn't have a read method, we can't call json.load
|
||||
info = json.loads('\n'.join(f))
|
||||
info = self.filter_requested_info(json.loads('\n'.join(f)))
|
||||
try:
|
||||
self.process_ie_result(info, download=True)
|
||||
except DownloadError:
|
||||
@@ -1499,6 +1510,12 @@ class YoutubeDL(object):
|
||||
raise
|
||||
return self._download_retcode
|
||||
|
||||
@staticmethod
|
||||
def filter_requested_info(info_dict):
|
||||
return dict(
|
||||
(k, v) for k, v in info_dict.items()
|
||||
if k not in ['requested_formats', 'requested_subtitles'])
|
||||
|
||||
def post_process(self, filename, ie_info):
|
||||
"""Run all the postprocessors on the given file."""
|
||||
info = dict(ie_info)
|
||||
@@ -1832,7 +1849,7 @@ class YoutubeDL(object):
|
||||
thumb_ext = determine_ext(t['url'], 'jpg')
|
||||
suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
|
||||
thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
|
||||
thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
|
||||
t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
|
||||
|
||||
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
|
||||
self.to_screen('[%s] %s: Thumbnail %sis already present' %
|
||||
|
||||
@@ -240,13 +240,18 @@ def _real_main(argv=None):
|
||||
if opts.xattrs:
|
||||
postprocessors.append({'key': 'XAttrMetadata'})
|
||||
if opts.embedthumbnail:
|
||||
postprocessors.append({'key': 'EmbedThumbnail'})
|
||||
already_have_thumbnail = opts.writethumbnail or opts.write_all_thumbnails
|
||||
postprocessors.append({
|
||||
'key': 'EmbedThumbnail',
|
||||
'already_have_thumbnail': already_have_thumbnail
|
||||
})
|
||||
if not already_have_thumbnail:
|
||||
opts.writethumbnail = True
|
||||
# Please keep ExecAfterDownload towards the bottom as it allows the user to modify the final file in any way.
|
||||
# So if the user is able to remove the file before your postprocessor runs it might cause a few problems.
|
||||
if opts.exec_cmd:
|
||||
postprocessors.append({
|
||||
'key': 'ExecAfterDownload',
|
||||
'verboseOutput': opts.verbose,
|
||||
'exec_cmd': opts.exec_cmd,
|
||||
})
|
||||
if opts.xattr_set_filesize:
|
||||
@@ -345,7 +350,6 @@ def _real_main(argv=None):
|
||||
'default_search': opts.default_search,
|
||||
'youtube_include_dash_manifest': opts.youtube_include_dash_manifest,
|
||||
'encoding': opts.encoding,
|
||||
'exec_cmd': opts.exec_cmd,
|
||||
'extract_flat': opts.extract_flat,
|
||||
'merge_output_format': opts.merge_output_format,
|
||||
'postprocessors': postprocessors,
|
||||
|
||||
@@ -46,11 +46,6 @@ try:
|
||||
except ImportError: # Python 2
|
||||
import htmlentitydefs as compat_html_entities
|
||||
|
||||
try:
|
||||
import html.parser as compat_html_parser
|
||||
except ImportError: # Python 2
|
||||
import HTMLParser as compat_html_parser
|
||||
|
||||
try:
|
||||
import http.client as compat_http_client
|
||||
except ImportError: # Python 2
|
||||
@@ -404,7 +399,6 @@ __all__ = [
|
||||
'compat_getenv',
|
||||
'compat_getpass',
|
||||
'compat_html_entities',
|
||||
'compat_html_parser',
|
||||
'compat_http_client',
|
||||
'compat_http_server',
|
||||
'compat_kwargs',
|
||||
|
||||
@@ -28,13 +28,8 @@ class HttpFD(FileDownloader):
|
||||
add_headers = info_dict.get('http_headers')
|
||||
if add_headers:
|
||||
headers.update(add_headers)
|
||||
data = info_dict.get('http_post_data')
|
||||
http_method = info_dict.get('http_method')
|
||||
basic_request = compat_urllib_request.Request(url, data, headers)
|
||||
request = compat_urllib_request.Request(url, data, headers)
|
||||
if http_method is not None:
|
||||
basic_request.get_method = lambda: http_method
|
||||
request.get_method = lambda: http_method
|
||||
basic_request = compat_urllib_request.Request(url, None, headers)
|
||||
request = compat_urllib_request.Request(url, None, headers)
|
||||
|
||||
is_test = self.params.get('test', False)
|
||||
|
||||
|
||||
@@ -131,7 +131,7 @@ class RtmpFD(FileDownloader):
|
||||
if play_path is not None:
|
||||
basic_args += ['--playpath', play_path]
|
||||
if tc_url is not None:
|
||||
basic_args += ['--tcUrl', url]
|
||||
basic_args += ['--tcUrl', tc_url]
|
||||
if test:
|
||||
basic_args += ['--stop', '1']
|
||||
if flash_version is not None:
|
||||
|
||||
@@ -32,6 +32,7 @@ from .atresplayer import AtresPlayerIE
|
||||
from .atttechchannel import ATTTechChannelIE
|
||||
from .audiomack import AudiomackIE, AudiomackAlbumIE
|
||||
from .azubu import AzubuIE
|
||||
from .baidu import BaiduVideoIE
|
||||
from .bambuser import BambuserIE, BambuserChannelIE
|
||||
from .bandcamp import BandcampIE, BandcampAlbumIE
|
||||
from .bbccouk import BBCCoUkIE
|
||||
@@ -161,6 +162,7 @@ from .footyroom import FootyRoomIE
|
||||
from .fourtube import FourTubeIE
|
||||
from .foxgay import FoxgayIE
|
||||
from .foxnews import FoxNewsIE
|
||||
from .foxsports import FoxSportsIE
|
||||
from .franceculture import FranceCultureIE
|
||||
from .franceinter import FranceInterIE
|
||||
from .francetv import (
|
||||
@@ -198,7 +200,6 @@ from .googleplus import GooglePlusIE
|
||||
from .googlesearch import GoogleSearchIE
|
||||
from .gorillavid import GorillaVidIE
|
||||
from .goshgay import GoshgayIE
|
||||
from .grooveshark import GroovesharkIE
|
||||
from .groupon import GrouponIE
|
||||
from .hark import HarkIE
|
||||
from .hearthisat import HearThisAtIE
|
||||
@@ -257,7 +258,10 @@ from .letv import (
|
||||
LetvPlaylistIE
|
||||
)
|
||||
from .libsyn import LibsynIE
|
||||
from .lifenews import LifeNewsIE
|
||||
from .lifenews import (
|
||||
LifeNewsIE,
|
||||
LifeEmbedIE,
|
||||
)
|
||||
from .liveleak import LiveLeakIE
|
||||
from .livestream import (
|
||||
LivestreamIE,
|
||||
@@ -320,7 +324,10 @@ from .nbc import (
|
||||
NBCSportsIE,
|
||||
NBCSportsVPlayerIE,
|
||||
)
|
||||
from .ndr import NDRIE
|
||||
from .ndr import (
|
||||
NDRIE,
|
||||
NJoyIE,
|
||||
)
|
||||
from .ndtv import NDTVIE
|
||||
from .netzkino import NetzkinoIE
|
||||
from .nerdcubed import NerdCubedFeedIE
|
||||
@@ -362,7 +369,10 @@ from .nrk import (
|
||||
)
|
||||
from .ntvde import NTVDeIE
|
||||
from .ntvru import NTVRuIE
|
||||
from .nytimes import NYTimesIE
|
||||
from .nytimes import (
|
||||
NYTimesIE,
|
||||
NYTimesArticleIE,
|
||||
)
|
||||
from .nuvid import NuvidIE
|
||||
from .odnoklassniki import OdnoklassnikiIE
|
||||
from .oktoberfesttv import OktoberfestTVIE
|
||||
@@ -404,6 +414,7 @@ from .qqmusic import (
|
||||
QQMusicIE,
|
||||
QQMusicSingerIE,
|
||||
QQMusicAlbumIE,
|
||||
QQMusicToplistIE,
|
||||
)
|
||||
from .quickvid import QuickVidIE
|
||||
from .r7 import R7IE
|
||||
@@ -479,8 +490,10 @@ from .soundgasm import (
|
||||
)
|
||||
from .southpark import (
|
||||
SouthParkIE,
|
||||
SouthParkDeIE,
|
||||
SouthParkDkIE,
|
||||
SouthParkEsIE,
|
||||
SouthparkDeIE,
|
||||
SouthParkNlIE
|
||||
)
|
||||
from .space import SpaceIE
|
||||
from .spankbang import SpankBangIE
|
||||
@@ -500,7 +513,10 @@ from .streamcloud import StreamcloudIE
|
||||
from .streamcz import StreamCZIE
|
||||
from .streetvoice import StreetVoiceIE
|
||||
from .sunporno import SunPornoIE
|
||||
from .svtplay import SVTPlayIE
|
||||
from .svt import (
|
||||
SVTIE,
|
||||
SVTPlayIE,
|
||||
)
|
||||
from .swrmediathek import SWRMediathekIE
|
||||
from .syfy import SyfyIE
|
||||
from .sztvhu import SztvHuIE
|
||||
@@ -529,7 +545,10 @@ from .thesixtyone import TheSixtyOneIE
|
||||
from .thisav import ThisAVIE
|
||||
from .tinypic import TinyPicIE
|
||||
from .tlc import TlcIE, TlcDeIE
|
||||
from .tmz import TMZIE
|
||||
from .tmz import (
|
||||
TMZIE,
|
||||
TMZArticleIE,
|
||||
)
|
||||
from .tnaflix import TNAFlixIE
|
||||
from .thvideo import (
|
||||
THVideoIE,
|
||||
@@ -582,7 +601,11 @@ from .veoh import VeohIE
|
||||
from .vessel import VesselIE
|
||||
from .vesti import VestiIE
|
||||
from .vevo import VevoIE
|
||||
from .vgtv import VGTVIE
|
||||
from .vgtv import (
|
||||
BTArticleIE,
|
||||
BTVestlendingenIE,
|
||||
VGTVIE,
|
||||
)
|
||||
from .vh1 import VH1IE
|
||||
from .vice import ViceIE
|
||||
from .viddler import ViddlerIE
|
||||
@@ -619,6 +642,7 @@ from .vk import (
|
||||
VKUserVideosIE,
|
||||
)
|
||||
from .vodlocker import VodlockerIE
|
||||
from .voicerepublic import VoiceRepublicIE
|
||||
from .vporn import VpornIE
|
||||
from .vrt import VRTIE
|
||||
from .vube import VubeIE
|
||||
@@ -645,9 +669,10 @@ from .xboxclips import XboxClipsIE
|
||||
from .xhamster import XHamsterIE
|
||||
from .xminus import XMinusIE
|
||||
from .xnxx import XNXXIE
|
||||
from .xvideos import XVideosIE
|
||||
from .xstream import XstreamIE
|
||||
from .xtube import XTubeUserIE, XTubeIE
|
||||
from .xuite import XuiteIE
|
||||
from .xvideos import XVideosIE
|
||||
from .xxxymovies import XXXYMoviesIE
|
||||
from .yahoo import (
|
||||
YahooIE,
|
||||
|
||||
@@ -1,21 +1,11 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
xpath_with_ns,
|
||||
xpath_text,
|
||||
find_xpath_attr,
|
||||
)
|
||||
|
||||
|
||||
class AftenpostenIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?aftenposten\.no/webtv/(?:#!/)?video/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.aftenposten.no/webtv/#!/video/21039/trailer-sweatshop-i-can-t-take-any-more',
|
||||
'md5': 'fd828cd29774a729bf4d4425fe192972',
|
||||
@@ -30,69 +20,4 @@ class AftenpostenIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
data = self._download_xml(
|
||||
'http://frontend.xstream.dk/ap/feed/video/?platform=web&id=%s' % video_id, video_id)
|
||||
|
||||
NS_MAP = {
|
||||
'atom': 'http://www.w3.org/2005/Atom',
|
||||
'xt': 'http://xstream.dk/',
|
||||
'media': 'http://search.yahoo.com/mrss/',
|
||||
}
|
||||
|
||||
entry = data.find(xpath_with_ns('./atom:entry', NS_MAP))
|
||||
|
||||
title = xpath_text(
|
||||
entry, xpath_with_ns('./atom:title', NS_MAP), 'title')
|
||||
description = xpath_text(
|
||||
entry, xpath_with_ns('./atom:summary', NS_MAP), 'description')
|
||||
timestamp = parse_iso8601(xpath_text(
|
||||
entry, xpath_with_ns('./atom:published', NS_MAP), 'upload date'))
|
||||
|
||||
formats = []
|
||||
media_group = entry.find(xpath_with_ns('./media:group', NS_MAP))
|
||||
for media_content in media_group.findall(xpath_with_ns('./media:content', NS_MAP)):
|
||||
media_url = media_content.get('url')
|
||||
if not media_url:
|
||||
continue
|
||||
tbr = int_or_none(media_content.get('bitrate'))
|
||||
mobj = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>[^/]+))/(?P<playpath>.+)$', media_url)
|
||||
if mobj:
|
||||
formats.append({
|
||||
'url': mobj.group('url'),
|
||||
'play_path': 'mp4:%s' % mobj.group('playpath'),
|
||||
'app': mobj.group('app'),
|
||||
'ext': 'flv',
|
||||
'tbr': tbr,
|
||||
'format_id': 'rtmp-%d' % tbr,
|
||||
})
|
||||
else:
|
||||
formats.append({
|
||||
'url': media_url,
|
||||
'tbr': tbr,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
link = find_xpath_attr(
|
||||
entry, xpath_with_ns('./atom:link', NS_MAP), 'rel', 'original')
|
||||
if link is not None:
|
||||
formats.append({
|
||||
'url': link.get('href'),
|
||||
'format_id': link.get('rel'),
|
||||
})
|
||||
|
||||
thumbnails = [{
|
||||
'url': splash.get('url'),
|
||||
'width': int_or_none(splash.get('width')),
|
||||
'height': int_or_none(splash.get('height')),
|
||||
} for splash in media_group.findall(xpath_with_ns('./xt:splash', NS_MAP))]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'timestamp': timestamp,
|
||||
'formats': formats,
|
||||
'thumbnails': thumbnails,
|
||||
}
|
||||
return self.url_result('xstream:ap:%s' % self._match_id(url), 'Xstream')
|
||||
|
||||
@@ -33,7 +33,7 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
json_url = url + ('?' if '?' in url else '&') + 'output=json'
|
||||
json_url = url + ('&' if '?' in url else '?') + 'output=json'
|
||||
data = self._download_json(json_url, video_id)
|
||||
|
||||
def get_optional(data_dict, field):
|
||||
|
||||
68
youtube_dl/extractor/baidu.py
Normal file
68
youtube_dl/extractor/baidu.py
Normal file
@@ -0,0 +1,68 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
|
||||
|
||||
class BaiduVideoIE(InfoExtractor):
|
||||
_VALID_URL = r'http://v\.baidu\.com/(?P<type>[a-z]+)/(?P<id>\d+)\.htm'
|
||||
_TESTS = [{
|
||||
'url': 'http://v.baidu.com/comic/1069.htm?frp=bdbrand&q=%E4%B8%AD%E5%8D%8E%E5%B0%8F%E5%BD%93%E5%AE%B6',
|
||||
'info_dict': {
|
||||
'id': '1069',
|
||||
'title': '中华小当家 TV版 (全52集)',
|
||||
'description': 'md5:395a419e41215e531c857bb037bbaf80',
|
||||
},
|
||||
'playlist_count': 52,
|
||||
}, {
|
||||
'url': 'http://v.baidu.com/show/11595.htm?frp=bdbrand',
|
||||
'info_dict': {
|
||||
'id': '11595',
|
||||
'title': 're:^奔跑吧兄弟',
|
||||
'description': 'md5:1bf88bad6d850930f542d51547c089b8',
|
||||
},
|
||||
'playlist_mincount': 3,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
playlist_id = mobj.group('id')
|
||||
category = category2 = mobj.group('type')
|
||||
if category == 'show':
|
||||
category2 = 'tvshow'
|
||||
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
playlist_title = self._html_search_regex(
|
||||
r'title\s*:\s*(["\'])(?P<title>[^\']+)\1', webpage,
|
||||
'playlist title', group='title')
|
||||
playlist_description = self._html_search_regex(
|
||||
r'<input[^>]+class="j-data-intro"[^>]+value="([^"]+)"/>', webpage,
|
||||
playlist_id, 'playlist description')
|
||||
|
||||
site = self._html_search_regex(
|
||||
r'filterSite\s*:\s*["\']([^"]*)["\']', webpage,
|
||||
'primary provider site')
|
||||
api_result = self._download_json(
|
||||
'http://v.baidu.com/%s_intro/?dtype=%sPlayUrl&id=%s&site=%s' % (
|
||||
category, category2, playlist_id, site),
|
||||
playlist_id, 'Get playlist links')
|
||||
|
||||
entries = []
|
||||
for episode in api_result[0]['episodes']:
|
||||
episode_id = '%s_%s' % (playlist_id, episode['episode'])
|
||||
|
||||
redirect_page = self._download_webpage(
|
||||
compat_urlparse.urljoin(url, episode['url']), episode_id,
|
||||
note='Download Baidu redirect page')
|
||||
real_url = self._html_search_regex(
|
||||
r'location\.replace\("([^"]+)"\)', redirect_page, 'real URL')
|
||||
|
||||
entries.append(self.url_result(
|
||||
real_url, video_title=episode['single_title']))
|
||||
|
||||
return self.playlist_result(
|
||||
entries, playlist_id, playlist_title, playlist_description)
|
||||
@@ -3,7 +3,10 @@ from __future__ import unicode_literals
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
)
|
||||
from ..compat import compat_HTTPError
|
||||
|
||||
|
||||
@@ -112,6 +115,20 @@ class BBCCoUkIE(InfoExtractor):
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/iplayer/episode/b054fn09/ad/natural-world-20152016-2-super-powered-owls',
|
||||
'info_dict': {
|
||||
'id': 'p02n76xf',
|
||||
'ext': 'flv',
|
||||
'title': 'Natural World, 2015-2016: 2. Super Powered Owls',
|
||||
'description': 'md5:e4db5c937d0e95a7c6b5e654d429183d',
|
||||
'duration': 3540,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'geolocation',
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/iplayer/playlist/p01dvks4',
|
||||
'only_matching': True,
|
||||
@@ -326,16 +343,27 @@ class BBCCoUkIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, group_id, 'Downloading video page')
|
||||
|
||||
programme_id = self._search_regex(
|
||||
r'"vpid"\s*:\s*"([\da-z]{8})"', webpage, 'vpid', fatal=False, default=None)
|
||||
programme_id = None
|
||||
|
||||
tviplayer = self._search_regex(
|
||||
r'mediator\.bind\(({.+?})\s*,\s*document\.getElementById',
|
||||
webpage, 'player', default=None)
|
||||
|
||||
if tviplayer:
|
||||
player = self._parse_json(tviplayer, group_id).get('player', {})
|
||||
duration = int_or_none(player.get('duration'))
|
||||
programme_id = player.get('vpid')
|
||||
|
||||
if not programme_id:
|
||||
programme_id = self._search_regex(
|
||||
r'"vpid"\s*:\s*"([\da-z]{8})"', webpage, 'vpid', fatal=False, default=None)
|
||||
|
||||
if programme_id:
|
||||
player = self._download_json(
|
||||
'http://www.bbc.co.uk/iplayer/episode/%s.json' % group_id,
|
||||
group_id)['jsConf']['player']
|
||||
title = player['title']
|
||||
description = player['subtitle']
|
||||
duration = player['duration']
|
||||
formats, subtitles = self._download_media_selector(programme_id)
|
||||
title = self._og_search_title(webpage)
|
||||
description = self._search_regex(
|
||||
r'<p class="medium-description">([^<]+)</p>',
|
||||
webpage, 'description', fatal=False)
|
||||
else:
|
||||
programme_id, title, description, duration, formats, subtitles = self._download_playlist(group_id)
|
||||
|
||||
@@ -345,6 +373,7 @@ class BBCCoUkIE(InfoExtractor):
|
||||
'id': programme_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': self._og_search_thumbnail(webpage, default=None),
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
|
||||
@@ -16,11 +16,11 @@ class BetIE(InfoExtractor):
|
||||
{
|
||||
'url': 'http://www.bet.com/news/politics/2014/12/08/in-bet-exclusive-obama-talks-race-and-racism.html',
|
||||
'info_dict': {
|
||||
'id': '740ab250-bb94-4a8a-8787-fe0de7c74471',
|
||||
'id': 'news/national/2014/a-conversation-with-president-obama',
|
||||
'display_id': 'in-bet-exclusive-obama-talks-race-and-racism',
|
||||
'ext': 'flv',
|
||||
'title': 'BET News Presents: A Conversation With President Obama',
|
||||
'description': 'md5:5a88d8ae912c1b33e090290af7ec33c6',
|
||||
'title': 'A Conversation With President Obama',
|
||||
'description': 'md5:699d0652a350cf3e491cd15cc745b5da',
|
||||
'duration': 1534,
|
||||
'timestamp': 1418075340,
|
||||
'upload_date': '20141208',
|
||||
@@ -35,7 +35,7 @@ class BetIE(InfoExtractor):
|
||||
{
|
||||
'url': 'http://www.bet.com/video/news/national/2014/justice-for-ferguson-a-community-reacts.html',
|
||||
'info_dict': {
|
||||
'id': 'bcd1b1df-673a-42cf-8d01-b282db608f2d',
|
||||
'id': 'news/national/2014/justice-for-ferguson-a-community-reacts',
|
||||
'display_id': 'justice-for-ferguson-a-community-reacts',
|
||||
'ext': 'flv',
|
||||
'title': 'Justice for Ferguson: A Community Reacts',
|
||||
@@ -61,6 +61,9 @@ class BetIE(InfoExtractor):
|
||||
[r'mediaURL\s*:\s*"([^"]+)"', r"var\s+mrssMediaUrl\s*=\s*'([^']+)'"],
|
||||
webpage, 'media URL'))
|
||||
|
||||
video_id = self._search_regex(
|
||||
r'/video/(.*)/_jcr_content/', media_url, 'video id')
|
||||
|
||||
mrss = self._download_xml(media_url, display_id)
|
||||
|
||||
item = mrss.find('./channel/item')
|
||||
@@ -75,8 +78,6 @@ class BetIE(InfoExtractor):
|
||||
description = xpath_text(
|
||||
item, './description', 'description', fatal=False)
|
||||
|
||||
video_id = xpath_text(item, './guid', 'video id', fatal=False)
|
||||
|
||||
timestamp = parse_iso8601(xpath_text(
|
||||
item, xpath_with_ns('./dc:date', NS_MAP),
|
||||
'upload date', fatal=False))
|
||||
|
||||
@@ -2,7 +2,10 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
fix_xml_ampersands,
|
||||
)
|
||||
|
||||
|
||||
class BildIE(InfoExtractor):
|
||||
@@ -15,7 +18,7 @@ class BildIE(InfoExtractor):
|
||||
'id': '38184146',
|
||||
'ext': 'mp4',
|
||||
'title': 'BILD hat sie getestet',
|
||||
'thumbnail': 'http://bilder.bild.de/fotos/stand-das-koennen-die-neuen-ipads-38184138/Bild/1.bild.jpg',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 196,
|
||||
'description': 'Mit dem iPad Air 2 und dem iPad Mini 3 hat Apple zwei neue Tablet-Modelle präsentiert. BILD-Reporter Sven Stein durfte die Geräte bereits testen. ',
|
||||
}
|
||||
@@ -25,7 +28,7 @@ class BildIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
xml_url = url.split(".bild.html")[0] + ",view=xml.bild.xml"
|
||||
doc = self._download_xml(xml_url, video_id)
|
||||
doc = self._download_xml(xml_url, video_id, transform_source=fix_xml_ampersands)
|
||||
|
||||
duration = int_or_none(doc.attrib.get('duration'), scale=1000)
|
||||
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import itertools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
@@ -14,18 +15,25 @@ from ..utils import (
|
||||
class BiliBiliIE(InfoExtractor):
|
||||
_VALID_URL = r'http://www\.bilibili\.(?:tv|com)/video/av(?P<id>[0-9]+)/'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.bilibili.tv/video/av1074402/',
|
||||
'md5': '2c301e4dab317596e837c3e7633e7d86',
|
||||
'info_dict': {
|
||||
'id': '1074402',
|
||||
'id': '1074402_part1',
|
||||
'ext': 'flv',
|
||||
'title': '【金坷垃】金泡沫',
|
||||
'duration': 308,
|
||||
'upload_date': '20140420',
|
||||
'thumbnail': 're:^https?://.+\.jpg',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.bilibili.com/video/av1041170/',
|
||||
'info_dict': {
|
||||
'id': '1041170',
|
||||
'title': '【BD1080P】刀语【诸神&异域】',
|
||||
},
|
||||
'playlist_count': 9,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
@@ -57,19 +65,14 @@ class BiliBiliIE(InfoExtractor):
|
||||
|
||||
cid = self._search_regex(r'cid=(\d+)', webpage, 'cid')
|
||||
|
||||
entries = []
|
||||
|
||||
lq_doc = self._download_xml(
|
||||
'http://interface.bilibili.com/v_cdn_play?appkey=1&cid=%s' % cid,
|
||||
video_id,
|
||||
note='Downloading LQ video info'
|
||||
)
|
||||
lq_durl = lq_doc.find('./durl')
|
||||
formats = [{
|
||||
'format_id': 'lq',
|
||||
'quality': 1,
|
||||
'url': lq_durl.find('./url').text,
|
||||
'filesize': int_or_none(
|
||||
lq_durl.find('./size'), get_attr='text'),
|
||||
}]
|
||||
lq_durls = lq_doc.findall('./durl')
|
||||
|
||||
hq_doc = self._download_xml(
|
||||
'http://interface.bilibili.com/playurl?appkey=1&cid=%s' % cid,
|
||||
@@ -77,23 +80,44 @@ class BiliBiliIE(InfoExtractor):
|
||||
note='Downloading HQ video info',
|
||||
fatal=False,
|
||||
)
|
||||
if hq_doc is not False:
|
||||
hq_durl = hq_doc.find('./durl')
|
||||
formats.append({
|
||||
'format_id': 'hq',
|
||||
'quality': 2,
|
||||
'ext': 'flv',
|
||||
'url': hq_durl.find('./url').text,
|
||||
hq_durls = hq_doc.findall('./durl') if hq_doc is not False else itertools.repeat(None)
|
||||
|
||||
assert len(lq_durls) == len(hq_durls)
|
||||
|
||||
i = 1
|
||||
for lq_durl, hq_durl in zip(lq_durls, hq_durls):
|
||||
formats = [{
|
||||
'format_id': 'lq',
|
||||
'quality': 1,
|
||||
'url': lq_durl.find('./url').text,
|
||||
'filesize': int_or_none(
|
||||
hq_durl.find('./size'), get_attr='text'),
|
||||
lq_durl.find('./size'), get_attr='text'),
|
||||
}]
|
||||
if hq_durl:
|
||||
formats.append({
|
||||
'format_id': 'hq',
|
||||
'quality': 2,
|
||||
'ext': 'flv',
|
||||
'url': hq_durl.find('./url').text,
|
||||
'filesize': int_or_none(
|
||||
hq_durl.find('./size'), get_attr='text'),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
entries.append({
|
||||
'id': '%s_part%d' % (video_id, i),
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'duration': duration,
|
||||
'upload_date': upload_date,
|
||||
'thumbnail': thumbnail,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
i += 1
|
||||
|
||||
return {
|
||||
'_type': 'multi_video',
|
||||
'entries': entries,
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'duration': duration,
|
||||
'upload_date': upload_date,
|
||||
'thumbnail': thumbnail,
|
||||
'title': title
|
||||
}
|
||||
|
||||
@@ -16,27 +16,38 @@ class BRIE(InfoExtractor):
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.br.de/mediathek/video/sendungen/heimatsound/heimatsound-festival-2014-trailer-100.html',
|
||||
'md5': '93556dd2bcb2948d9259f8670c516d59',
|
||||
'url': 'http://www.br.de/mediathek/video/sendungen/abendschau/betriebliche-altersvorsorge-104.html',
|
||||
'md5': '83a0477cf0b8451027eb566d88b51106',
|
||||
'info_dict': {
|
||||
'id': '25e279aa-1ffd-40fd-9955-5325bd48a53a',
|
||||
'id': '48f656ef-287e-486f-be86-459122db22cc',
|
||||
'ext': 'mp4',
|
||||
'title': 'Wenn das Traditions-Theater wackelt',
|
||||
'description': 'Heimatsound-Festival 2014: Wenn das Traditions-Theater wackelt',
|
||||
'duration': 34,
|
||||
'uploader': 'BR',
|
||||
'upload_date': '20140802',
|
||||
'title': 'Die böse Überraschung',
|
||||
'description': 'Betriebliche Altersvorsorge: Die böse Überraschung',
|
||||
'duration': 180,
|
||||
'uploader': 'Reinhard Weber',
|
||||
'upload_date': '20150422',
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://www.br.de/nachrichten/schaeuble-haushaltsentwurf-bundestag-100.html',
|
||||
'md5': '3db0df1a9a9cd9fa0c70e6ea8aa8e820',
|
||||
'url': 'http://www.br.de/nachrichten/oberbayern/inhalt/muenchner-polizeipraesident-schreiber-gestorben-100.html',
|
||||
'md5': 'a44396d73ab6a68a69a568fae10705bb',
|
||||
'info_dict': {
|
||||
'id': 'c6aae3de-2cf9-43f2-957f-f17fef9afaab',
|
||||
'id': 'a4b83e34-123d-4b81-9f4e-c0d3121a4e05',
|
||||
'ext': 'mp4',
|
||||
'title': 'Manfred Schreiber ist tot',
|
||||
'description': 'Abendschau kompakt: Manfred Schreiber ist tot',
|
||||
'duration': 26,
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://www.br.de/radio/br-klassik/sendungen/allegro/premiere-urauffuehrung-the-land-2015-dance-festival-muenchen-100.html',
|
||||
'md5': '8b5b27c0b090f3b35eac4ab3f7a73d3d',
|
||||
'info_dict': {
|
||||
'id': '74c603c9-26d3-48bb-b85b-079aeed66e0b',
|
||||
'ext': 'aac',
|
||||
'title': '"Keine neuen Schulden im nächsten Jahr"',
|
||||
'description': 'Haushaltsentwurf: "Keine neuen Schulden im nächsten Jahr"',
|
||||
'duration': 64,
|
||||
'title': 'Kurzweilig und sehr bewegend',
|
||||
'description': '"The Land" von Peeping Tom: Kurzweilig und sehr bewegend',
|
||||
'duration': 296,
|
||||
}
|
||||
},
|
||||
{
|
||||
|
||||
@@ -16,7 +16,7 @@ class BYUtvIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:5438d33774b6bdc662f9485a340401cc',
|
||||
'title': 'Season 5 Episode 5',
|
||||
'thumbnail': 're:^https?://.*promo.*'
|
||||
'thumbnail': 're:^https?://.*\.jpg$'
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
|
||||
@@ -25,14 +25,14 @@ class CanalplusIE(InfoExtractor):
|
||||
}
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.canalplus.fr/c-infos-documentaires/pid1830-c-zapping.html?vid=922470',
|
||||
'md5': '3db39fb48b9685438ecf33a1078023e4',
|
||||
'url': 'http://www.canalplus.fr/c-emissions/pid1830-c-zapping.html?vid=1263092',
|
||||
'md5': 'b3481d7ca972f61e37420798d0a9d934',
|
||||
'info_dict': {
|
||||
'id': '922470',
|
||||
'id': '1263092',
|
||||
'ext': 'flv',
|
||||
'title': 'Zapping - 26/08/13',
|
||||
'description': 'Le meilleur de toutes les chaînes, tous les jours.\nEmission du 26 août 2013',
|
||||
'upload_date': '20130826',
|
||||
'title': 'Le Zapping - 13/05/15',
|
||||
'description': 'md5:09738c0d06be4b5d06a0940edb0da73f',
|
||||
'upload_date': '20150513',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.piwiplus.fr/videos-piwi/pid1405-le-labyrinthe-boing-super-ranger.html?vid=1108190',
|
||||
@@ -56,7 +56,7 @@ class CanalplusIE(InfoExtractor):
|
||||
'skip': 'videos get deleted after a while',
|
||||
}, {
|
||||
'url': 'http://www.itele.fr/france/video/aubervilliers-un-lycee-en-colere-111559',
|
||||
'md5': '65aa83ad62fe107ce29e564bb8712580',
|
||||
'md5': 'f3a46edcdf28006598ffaf5b30e6a2d4',
|
||||
'info_dict': {
|
||||
'id': '1213714',
|
||||
'ext': 'flv',
|
||||
|
||||
@@ -47,7 +47,7 @@ class InfoExtractor(object):
|
||||
information possibly downloading the video to the file system, among
|
||||
other possible outcomes.
|
||||
|
||||
The type field determines the the type of the result.
|
||||
The type field determines the type of the result.
|
||||
By far the most common value (and the default if _type is missing) is
|
||||
"video", which indicates a single video.
|
||||
|
||||
@@ -111,11 +111,8 @@ class InfoExtractor(object):
|
||||
(quality takes higher priority)
|
||||
-1 for default (order by other properties),
|
||||
-2 or smaller for less than default.
|
||||
* http_method HTTP method to use for the download.
|
||||
* http_headers A dictionary of additional HTTP headers
|
||||
to add to the request.
|
||||
* http_post_data Additional data to send with a POST
|
||||
request.
|
||||
* stretched_ratio If given and not 1, indicates that the
|
||||
video's pixels are not square.
|
||||
width : height ratio as float.
|
||||
@@ -572,7 +569,7 @@ class InfoExtractor(object):
|
||||
|
||||
def _get_login_info(self):
|
||||
"""
|
||||
Get the the login info as (username, password)
|
||||
Get the login info as (username, password)
|
||||
It will look in the netrc file using the _NETRC_MACHINE value
|
||||
If there's no info available, return (None, None)
|
||||
"""
|
||||
@@ -767,7 +764,7 @@ class InfoExtractor(object):
|
||||
f.get('fps') if f.get('fps') is not None else -1,
|
||||
f.get('filesize_approx') if f.get('filesize_approx') is not None else -1,
|
||||
f.get('source_preference') if f.get('source_preference') is not None else -1,
|
||||
f.get('format_id'),
|
||||
f.get('format_id') if f.get('format_id') is not None else '',
|
||||
)
|
||||
formats.sort(key=_formats_key)
|
||||
|
||||
@@ -899,7 +896,7 @@ class InfoExtractor(object):
|
||||
format_id = []
|
||||
if m3u8_id:
|
||||
format_id.append(m3u8_id)
|
||||
last_media_name = last_media.get('NAME') if last_media else None
|
||||
last_media_name = last_media.get('NAME') if last_media and last_media.get('TYPE') != 'SUBTITLES' else None
|
||||
format_id.append(last_media_name if last_media_name else '%d' % (tbr if tbr else len(formats)))
|
||||
f = {
|
||||
'format_id': '-'.join(format_id),
|
||||
@@ -1075,9 +1072,6 @@ class InfoExtractor(object):
|
||||
def _get_automatic_captions(self, *args, **kwargs):
|
||||
raise NotImplementedError("This method must be implemented by subclasses")
|
||||
|
||||
def _subtitles_timecode(self, seconds):
|
||||
return '%02d:%02d:%02d.%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
|
||||
|
||||
|
||||
class SearchInfoExtractor(InfoExtractor):
|
||||
"""
|
||||
|
||||
@@ -52,6 +52,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'uploader': 'IGN',
|
||||
'title': 'Steam Machine Models, Pricing Listed on Steam Store - IGN News',
|
||||
'upload_date': '20150306',
|
||||
}
|
||||
},
|
||||
# Vevo video
|
||||
@@ -85,7 +86,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
url = 'http://www.dailymotion.com/video/%s' % video_id
|
||||
url = 'https://www.dailymotion.com/video/%s' % video_id
|
||||
|
||||
# Retrieve video webpage to extract further information
|
||||
request = self._build_request(url)
|
||||
@@ -106,11 +107,11 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
age_limit = self._rta_search(webpage)
|
||||
|
||||
video_upload_date = None
|
||||
mobj = re.search(r'<div class="[^"]*uploaded_cont[^"]*" title="[^"]*">([0-9]{2})-([0-9]{2})-([0-9]{4})</div>', webpage)
|
||||
mobj = re.search(r'<meta property="video:release_date" content="([0-9]{4})-([0-9]{2})-([0-9]{2}).+?"/>', webpage)
|
||||
if mobj is not None:
|
||||
video_upload_date = mobj.group(3) + mobj.group(2) + mobj.group(1)
|
||||
video_upload_date = mobj.group(1) + mobj.group(2) + mobj.group(3)
|
||||
|
||||
embed_url = 'http://www.dailymotion.com/embed/video/%s' % video_id
|
||||
embed_url = 'https://www.dailymotion.com/embed/video/%s' % video_id
|
||||
embed_request = self._build_request(embed_url)
|
||||
embed_page = self._download_webpage(
|
||||
embed_request, video_id, 'Downloading embed page')
|
||||
|
||||
@@ -11,19 +11,25 @@ from ..utils import (
|
||||
|
||||
class DreiSatIE(InfoExtractor):
|
||||
IE_NAME = '3sat'
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?3sat\.de/mediathek/(?:index\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$'
|
||||
_TEST = {
|
||||
'url': 'http://www.3sat.de/mediathek/index.php?mode=play&obj=45918',
|
||||
'md5': 'be37228896d30a88f315b638900a026e',
|
||||
'info_dict': {
|
||||
'id': '45918',
|
||||
'ext': 'mp4',
|
||||
'title': 'Waidmannsheil',
|
||||
'description': 'md5:cce00ca1d70e21425e72c86a98a56817',
|
||||
'uploader': '3sat',
|
||||
'upload_date': '20140913'
|
||||
}
|
||||
}
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?3sat\.de/mediathek/(?:index\.php|mediathek\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.3sat.de/mediathek/index.php?mode=play&obj=45918',
|
||||
'md5': 'be37228896d30a88f315b638900a026e',
|
||||
'info_dict': {
|
||||
'id': '45918',
|
||||
'ext': 'mp4',
|
||||
'title': 'Waidmannsheil',
|
||||
'description': 'md5:cce00ca1d70e21425e72c86a98a56817',
|
||||
'uploader': '3sat',
|
||||
'upload_date': '20140913'
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://www.3sat.de/mediathek/mediathek.php?mode=play&obj=51066',
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
||||
@@ -26,7 +26,7 @@ class DumpertIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
req = compat_urllib_request.Request(url)
|
||||
req.add_header('Cookie', 'nsfw=1')
|
||||
req.add_header('Cookie', 'nsfw=1; cpc=10')
|
||||
webpage = self._download_webpage(req, video_id)
|
||||
|
||||
files_base64 = self._search_regex(
|
||||
|
||||
@@ -4,7 +4,10 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse
|
||||
from ..utils import ExtractorError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
unescapeHTML
|
||||
)
|
||||
|
||||
|
||||
class EroProfileIE(InfoExtractor):
|
||||
@@ -75,8 +78,8 @@ class EroProfileIE(InfoExtractor):
|
||||
[r"glbUpdViews\s*\('\d*','(\d+)'", r'p/report/video/(\d+)'],
|
||||
webpage, 'video id', default=None)
|
||||
|
||||
video_url = self._search_regex(
|
||||
r'<source src="([^"]+)', webpage, 'video url')
|
||||
video_url = unescapeHTML(self._search_regex(
|
||||
r'<source src="([^"]+)', webpage, 'video url'))
|
||||
title = self._html_search_regex(
|
||||
r'Title:</th><td>([^<]+)</td>', webpage, 'title')
|
||||
thumbnail = self._search_regex(
|
||||
|
||||
@@ -8,7 +8,8 @@ from ..compat import compat_urllib_request
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
clean_html,
|
||||
qualities,
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -36,10 +37,10 @@ def _decrypt_config(key, string):
|
||||
|
||||
|
||||
class EscapistIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://?(www\.)?escapistmagazine\.com/videos/view/[^/?#]+/(?P<id>[0-9]+)-[^/?#]*(?:$|[?#])'
|
||||
_VALID_URL = r'https?://?(?:www\.)?escapistmagazine\.com/videos/view/[^/?#]+/(?P<id>[0-9]+)-[^/?#]*(?:$|[?#])'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate',
|
||||
'md5': 'c6793dbda81388f4264c1ba18684a74d',
|
||||
'md5': 'ab3a706c681efca53f0a35f1415cf0d1',
|
||||
'info_dict': {
|
||||
'id': '6618',
|
||||
'ext': 'mp4',
|
||||
@@ -47,10 +48,11 @@ class EscapistIE(InfoExtractor):
|
||||
'title': "Breaking Down Baldur's Gate",
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 264,
|
||||
'uploader': 'The Escapist',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.escapistmagazine.com/videos/view/zero-punctuation/10044-Evolve-One-vs-Multiplayer',
|
||||
'md5': 'cf8842a8a46444d241f9a9980d7874f2',
|
||||
'md5': '9e8c437b0dbb0387d3bd3255ca77f6bf',
|
||||
'info_dict': {
|
||||
'id': '10044',
|
||||
'ext': 'mp4',
|
||||
@@ -58,6 +60,7 @@ class EscapistIE(InfoExtractor):
|
||||
'title': 'Evolve - One vs Multiplayer',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 304,
|
||||
'uploader': 'The Escapist',
|
||||
}
|
||||
}]
|
||||
|
||||
@@ -65,35 +68,33 @@ class EscapistIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
imsVideo = self._parse_json(
|
||||
ims_video = self._parse_json(
|
||||
self._search_regex(
|
||||
r'imsVideo\.play\(({.+?})\);', webpage, 'imsVideo'),
|
||||
video_id)
|
||||
video_id = imsVideo['videoID']
|
||||
key = imsVideo['hash']
|
||||
video_id = ims_video['videoID']
|
||||
key = ims_video['hash']
|
||||
|
||||
quality = qualities(['lq', 'hq', 'hd'])
|
||||
config_req = compat_urllib_request.Request(
|
||||
'http://www.escapistmagazine.com/videos/'
|
||||
'vidconfig.php?videoID=%s&hash=%s' % (video_id, key))
|
||||
config_req.add_header('Referer', url)
|
||||
config = self._download_webpage(config_req, video_id, 'Downloading video config')
|
||||
|
||||
formats = []
|
||||
for q in ['lq', 'hq', 'hd']:
|
||||
config_req = compat_urllib_request.Request('http://www.escapistmagazine.com/videos/'
|
||||
'vidconfig.php?videoID=%s&hash=%s&quality=%s' % (video_id, key, 'mp4_' + q))
|
||||
config_req.add_header('Referer', url)
|
||||
config = self._download_webpage(config_req, video_id, 'Downloading video config ' + q.upper())
|
||||
data = json.loads(_decrypt_config(key, config))
|
||||
|
||||
data = json.loads(_decrypt_config(key, config))
|
||||
video_data = data['videoData']
|
||||
|
||||
title = clean_html(data['videoData']['title'])
|
||||
duration = data['videoData']['duration'] / 1000
|
||||
|
||||
for i, v in enumerate(data['files']['videos']):
|
||||
|
||||
formats.append({
|
||||
'url': v,
|
||||
'format_id': determine_ext(v) + '_' + q + str(i),
|
||||
'quality': quality(q),
|
||||
})
|
||||
title = clean_html(video_data['title'])
|
||||
duration = float_or_none(video_data.get('duration'), 1000)
|
||||
uploader = video_data.get('publisher')
|
||||
|
||||
formats = [{
|
||||
'url': video['src'],
|
||||
'format_id': '%s-%sp' % (determine_ext(video['src']), video['res']),
|
||||
'height': int_or_none(video.get('res')),
|
||||
} for video in data['files']['videos']]
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
@@ -102,4 +103,5 @@ class EscapistIE(InfoExtractor):
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'description': self._og_search_description(webpage),
|
||||
'duration': duration,
|
||||
'uploader': uploader,
|
||||
}
|
||||
|
||||
32
youtube_dl/extractor/foxsports.py
Normal file
32
youtube_dl/extractor/foxsports.py
Normal file
@@ -0,0 +1,32 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import smuggle_url
|
||||
|
||||
|
||||
class FoxSportsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?foxsports\.com/(?:[^/]+/)*(?P<id>[^/]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.foxsports.com/video?vid=432609859715',
|
||||
'info_dict': {
|
||||
'id': 'gA0bHB3Ladz3',
|
||||
'ext': 'flv',
|
||||
'title': 'Courtney Lee on going up 2-0 in series vs. Blazers',
|
||||
'description': 'Courtney Lee talks about Memphis being focused.',
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
config = self._parse_json(
|
||||
self._search_regex(
|
||||
r"data-player-config='([^']+)'", webpage, 'data player config'),
|
||||
video_id)
|
||||
|
||||
return self.url_result(smuggle_url(
|
||||
config['releaseURL'] + '&manifest=f4m', {'force_smil_url': True}))
|
||||
@@ -37,6 +37,7 @@ from .condenast import CondeNastIE
|
||||
from .udn import UDNEmbedIE
|
||||
from .senateisvp import SenateISVPIE
|
||||
from .bliptv import BlipTVIE
|
||||
from .svt import SVTIE
|
||||
|
||||
|
||||
class GenericIE(InfoExtractor):
|
||||
@@ -645,6 +646,17 @@ class GenericIE(InfoExtractor):
|
||||
'title': 'Facebook Creates "On This Day" | Crunch Report',
|
||||
},
|
||||
},
|
||||
# SVT embed
|
||||
{
|
||||
'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
|
||||
'info_dict': {
|
||||
'id': '2900353',
|
||||
'ext': 'flv',
|
||||
'title': 'Här trycker Jagr till Giroux (under SVT-intervjun)',
|
||||
'duration': 27,
|
||||
'age_limit': 0,
|
||||
},
|
||||
},
|
||||
# RSS feed with enclosure
|
||||
{
|
||||
'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
|
||||
@@ -1078,6 +1090,11 @@ class GenericIE(InfoExtractor):
|
||||
if bliptv_url:
|
||||
return self.url_result(bliptv_url, 'BlipTV')
|
||||
|
||||
# Look for SVT player
|
||||
svt_url = SVTIE._extract_url(webpage)
|
||||
if svt_url:
|
||||
return self.url_result(svt_url, 'SVT')
|
||||
|
||||
# Look for embedded condenast player
|
||||
matches = re.findall(
|
||||
r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="(https?://player\.cnevids\.com/embed/[^"]+")',
|
||||
@@ -1289,6 +1306,10 @@ class GenericIE(InfoExtractor):
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
|
||||
webpage)
|
||||
if not mobj:
|
||||
mobj = re.search(
|
||||
r'data-video-link=["\'](?P<url>http://m.mlb.com/video/[^"\']+)',
|
||||
webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'), 'MLB')
|
||||
|
||||
@@ -1436,7 +1457,7 @@ class GenericIE(InfoExtractor):
|
||||
if refresh_header:
|
||||
found = re.search(REDIRECT_REGEX, refresh_header)
|
||||
if found:
|
||||
new_url = found.group(1)
|
||||
new_url = compat_urlparse.urljoin(url, found.group(1))
|
||||
self.report_following_redirect(new_url)
|
||||
return {
|
||||
'_type': 'url',
|
||||
|
||||
@@ -85,7 +85,8 @@ class GigaIE(InfoExtractor):
|
||||
r'class="author">([^<]+)</a>', webpage, 'uploader', fatal=False)
|
||||
|
||||
view_count = str_to_int(self._search_regex(
|
||||
r'<span class="views"><strong>([\d.]+)</strong>', webpage, 'view count', fatal=False))
|
||||
r'<span class="views"><strong>([\d.,]+)</strong>',
|
||||
webpage, 'view count', fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
||||
@@ -35,13 +35,7 @@ class GorillaVidIE(InfoExtractor):
|
||||
},
|
||||
}, {
|
||||
'url': 'http://gorillavid.in/embed-z08zf8le23c6-960x480.html',
|
||||
'md5': 'c9e293ca74d46cad638e199c3f3fe604',
|
||||
'info_dict': {
|
||||
'id': 'z08zf8le23c6',
|
||||
'ext': 'mp4',
|
||||
'title': 'Say something nice',
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
},
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://daclips.in/3rso4kdn6f9m',
|
||||
'md5': '1ad8fd39bb976eeb66004d3a4895f106',
|
||||
|
||||
@@ -1,191 +0,0 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import time
|
||||
import math
|
||||
import os.path
|
||||
import re
|
||||
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_html_parser,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import ExtractorError
|
||||
|
||||
|
||||
class GroovesharkHtmlParser(compat_html_parser.HTMLParser):
|
||||
def __init__(self):
|
||||
self._current_object = None
|
||||
self.objects = []
|
||||
compat_html_parser.HTMLParser.__init__(self)
|
||||
|
||||
def handle_starttag(self, tag, attrs):
|
||||
attrs = dict((k, v) for k, v in attrs)
|
||||
if tag == 'object':
|
||||
self._current_object = {'attrs': attrs, 'params': []}
|
||||
elif tag == 'param':
|
||||
self._current_object['params'].append(attrs)
|
||||
|
||||
def handle_endtag(self, tag):
|
||||
if tag == 'object':
|
||||
self.objects.append(self._current_object)
|
||||
self._current_object = None
|
||||
|
||||
@classmethod
|
||||
def extract_object_tags(cls, html):
|
||||
p = cls()
|
||||
p.feed(html)
|
||||
p.close()
|
||||
return p.objects
|
||||
|
||||
|
||||
class GroovesharkIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(www\.)?grooveshark\.com/#!/s/([^/]+)/([^/]+)'
|
||||
_TEST = {
|
||||
'url': 'http://grooveshark.com/#!/s/Jolene+Tenth+Key+Remix+Ft+Will+Sessions/6SS1DW?src=5',
|
||||
'md5': '7ecf8aefa59d6b2098517e1baa530023',
|
||||
'info_dict': {
|
||||
'id': '6SS1DW',
|
||||
'title': 'Jolene (Tenth Key Remix ft. Will Sessions)',
|
||||
'ext': 'mp3',
|
||||
'duration': 227,
|
||||
}
|
||||
}
|
||||
|
||||
do_playerpage_request = True
|
||||
do_bootstrap_request = True
|
||||
|
||||
def _parse_target(self, target):
|
||||
uri = compat_urlparse.urlparse(target)
|
||||
hash = uri.fragment[1:].split('?')[0]
|
||||
token = os.path.basename(hash.rstrip('/'))
|
||||
return (uri, hash, token)
|
||||
|
||||
def _build_bootstrap_url(self, target):
|
||||
(uri, hash, token) = self._parse_target(target)
|
||||
query = 'getCommunicationToken=1&hash=%s&%d' % (compat_urllib_parse.quote(hash, safe=''), self.ts)
|
||||
return (compat_urlparse.urlunparse((uri.scheme, uri.netloc, '/preload.php', None, query, None)), token)
|
||||
|
||||
def _build_meta_url(self, target):
|
||||
(uri, hash, token) = self._parse_target(target)
|
||||
query = 'hash=%s&%d' % (compat_urllib_parse.quote(hash, safe=''), self.ts)
|
||||
return (compat_urlparse.urlunparse((uri.scheme, uri.netloc, '/preload.php', None, query, None)), token)
|
||||
|
||||
def _build_stream_url(self, meta):
|
||||
return compat_urlparse.urlunparse(('http', meta['streamKey']['ip'], '/stream.php', None, None, None))
|
||||
|
||||
def _build_swf_referer(self, target, obj):
|
||||
(uri, _, _) = self._parse_target(target)
|
||||
return compat_urlparse.urlunparse((uri.scheme, uri.netloc, obj['attrs']['data'], None, None, None))
|
||||
|
||||
def _transform_bootstrap(self, js):
|
||||
return re.split('(?m)^\s*try\s*\{', js)[0] \
|
||||
.split(' = ', 1)[1].strip().rstrip(';')
|
||||
|
||||
def _transform_meta(self, js):
|
||||
return js.split('\n')[0].split('=')[1].rstrip(';')
|
||||
|
||||
def _get_meta(self, target):
|
||||
(meta_url, token) = self._build_meta_url(target)
|
||||
self.to_screen('Metadata URL: %s' % meta_url)
|
||||
|
||||
headers = {'Referer': compat_urlparse.urldefrag(target)[0]}
|
||||
req = compat_urllib_request.Request(meta_url, headers=headers)
|
||||
res = self._download_json(req, token,
|
||||
transform_source=self._transform_meta)
|
||||
|
||||
if 'getStreamKeyWithSong' not in res:
|
||||
raise ExtractorError(
|
||||
'Metadata not found. URL may be malformed, or Grooveshark API may have changed.')
|
||||
|
||||
if res['getStreamKeyWithSong'] is None:
|
||||
raise ExtractorError(
|
||||
'Metadata download failed, probably due to Grooveshark anti-abuse throttling. Wait at least an hour before retrying from this IP.',
|
||||
expected=True)
|
||||
|
||||
return res['getStreamKeyWithSong']
|
||||
|
||||
def _get_bootstrap(self, target):
|
||||
(bootstrap_url, token) = self._build_bootstrap_url(target)
|
||||
|
||||
headers = {'Referer': compat_urlparse.urldefrag(target)[0]}
|
||||
req = compat_urllib_request.Request(bootstrap_url, headers=headers)
|
||||
res = self._download_json(req, token, fatal=False,
|
||||
note='Downloading player bootstrap data',
|
||||
errnote='Unable to download player bootstrap data',
|
||||
transform_source=self._transform_bootstrap)
|
||||
return res
|
||||
|
||||
def _get_playerpage(self, target):
|
||||
(_, _, token) = self._parse_target(target)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
target, token,
|
||||
note='Downloading player page',
|
||||
errnote='Unable to download player page',
|
||||
fatal=False)
|
||||
|
||||
if webpage is not None:
|
||||
# Search (for example German) error message
|
||||
error_msg = self._html_search_regex(
|
||||
r'<div id="content">\s*<h2>(.*?)</h2>', webpage,
|
||||
'error message', default=None)
|
||||
if error_msg is not None:
|
||||
error_msg = error_msg.replace('\n', ' ')
|
||||
raise ExtractorError('Grooveshark said: %s' % error_msg)
|
||||
|
||||
if webpage is not None:
|
||||
o = GroovesharkHtmlParser.extract_object_tags(webpage)
|
||||
return webpage, [x for x in o if x['attrs']['id'] == 'jsPlayerEmbed']
|
||||
|
||||
return webpage, None
|
||||
|
||||
def _real_initialize(self):
|
||||
self.ts = int(time.time() * 1000) # timestamp in millis
|
||||
|
||||
def _real_extract(self, url):
|
||||
(target_uri, _, token) = self._parse_target(url)
|
||||
|
||||
# 1. Fill cookiejar by making a request to the player page
|
||||
swf_referer = None
|
||||
if self.do_playerpage_request:
|
||||
(_, player_objs) = self._get_playerpage(url)
|
||||
if player_objs:
|
||||
swf_referer = self._build_swf_referer(url, player_objs[0])
|
||||
self.to_screen('SWF Referer: %s' % swf_referer)
|
||||
|
||||
# 2. Ask preload.php for swf bootstrap data to better mimic webapp
|
||||
if self.do_bootstrap_request:
|
||||
bootstrap = self._get_bootstrap(url)
|
||||
self.to_screen('CommunicationToken: %s' % bootstrap['getCommunicationToken'])
|
||||
|
||||
# 3. Ask preload.php for track metadata.
|
||||
meta = self._get_meta(url)
|
||||
|
||||
# 4. Construct stream request for track.
|
||||
stream_url = self._build_stream_url(meta)
|
||||
duration = int(math.ceil(float(meta['streamKey']['uSecs']) / 1000000))
|
||||
post_dict = {'streamKey': meta['streamKey']['streamKey']}
|
||||
post_data = compat_urllib_parse.urlencode(post_dict).encode('utf-8')
|
||||
headers = {
|
||||
'Content-Length': len(post_data),
|
||||
'Content-Type': 'application/x-www-form-urlencoded'
|
||||
}
|
||||
if swf_referer is not None:
|
||||
headers['Referer'] = swf_referer
|
||||
|
||||
return {
|
||||
'id': token,
|
||||
'title': meta['song']['Name'],
|
||||
'http_method': 'POST',
|
||||
'url': stream_url,
|
||||
'ext': 'mp3',
|
||||
'format': 'mp3 audio',
|
||||
'duration': duration,
|
||||
'http_post_data': post_data,
|
||||
'http_headers': headers,
|
||||
}
|
||||
@@ -25,7 +25,8 @@ class HistoricFilmsIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
tape_id = self._search_regex(
|
||||
r'class="tapeId">([^<]+)<', webpage, 'tape id')
|
||||
[r'class="tapeId"[^>]*>([^<]+)<', r'tapeId\s*:\s*"([^"]+)"'],
|
||||
webpage, 'tape id')
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
description = self._og_search_description(webpage)
|
||||
|
||||
@@ -1,36 +1,75 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class IconosquareIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(www\.)?(?:iconosquare\.com|statigr\.am)/p/(?P<id>[^/]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:iconosquare\.com|statigr\.am)/p/(?P<id>[^/]+)'
|
||||
_TEST = {
|
||||
'url': 'http://statigr.am/p/522207370455279102_24101272',
|
||||
'md5': '6eb93b882a3ded7c378ee1d6884b1814',
|
||||
'info_dict': {
|
||||
'id': '522207370455279102_24101272',
|
||||
'ext': 'mp4',
|
||||
'uploader_id': 'aguynamedpatrick',
|
||||
'title': 'Instagram photo by @aguynamedpatrick (Patrick Janelle)',
|
||||
'title': 'Instagram media by @aguynamedpatrick (Patrick Janelle)',
|
||||
'description': 'md5:644406a9ec27457ed7aa7a9ebcd4ce3d',
|
||||
'timestamp': 1376471991,
|
||||
'upload_date': '20130814',
|
||||
'uploader': 'aguynamedpatrick',
|
||||
'uploader_id': '24101272',
|
||||
'comment_count': int,
|
||||
'like_count': int,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
media = self._parse_json(
|
||||
self._search_regex(
|
||||
r'window\.media\s*=\s*({.+?});\n', webpage, 'media'),
|
||||
video_id)
|
||||
|
||||
formats = [{
|
||||
'url': f['url'],
|
||||
'format_id': format_id,
|
||||
'width': int_or_none(f.get('width')),
|
||||
'height': int_or_none(f.get('height'))
|
||||
} for format_id, f in media['videos'].items()]
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<title>(.+?)(?: *\(Videos?\))? \| (?:Iconosquare|Statigram)</title>',
|
||||
webpage, 'title')
|
||||
uploader_id = self._html_search_regex(
|
||||
r'@([^ ]+)', title, 'uploader name', fatal=False)
|
||||
|
||||
timestamp = int_or_none(media.get('created_time') or media.get('caption', {}).get('created_time'))
|
||||
description = media.get('caption', {}).get('text')
|
||||
|
||||
uploader = media.get('user', {}).get('username')
|
||||
uploader_id = media.get('user', {}).get('id')
|
||||
|
||||
comment_count = int_or_none(media.get('comments', {}).get('count'))
|
||||
like_count = int_or_none(media.get('likes', {}).get('count'))
|
||||
|
||||
thumbnails = [{
|
||||
'url': t['url'],
|
||||
'id': thumbnail_id,
|
||||
'width': int_or_none(t.get('width')),
|
||||
'height': int_or_none(t.get('height'))
|
||||
} for thumbnail_id, t in media.get('images', {}).items()]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': self._og_search_video_url(webpage),
|
||||
'title': title,
|
||||
'description': self._og_search_description(webpage),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'uploader_id': uploader_id
|
||||
'description': description,
|
||||
'thumbnails': thumbnails,
|
||||
'timestamp': timestamp,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'comment_count': comment_count,
|
||||
'like_count': like_count,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
@@ -7,6 +7,7 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
srt_subtitles_timecode,
|
||||
)
|
||||
|
||||
|
||||
@@ -39,8 +40,8 @@ class KanalPlayIE(InfoExtractor):
|
||||
'%s\r\n%s --> %s\r\n%s'
|
||||
% (
|
||||
num,
|
||||
self._subtitles_timecode(item['startMillis'] / 1000.0),
|
||||
self._subtitles_timecode(item['endMillis'] / 1000.0),
|
||||
srt_subtitles_timecode(item['startMillis'] / 1000.0),
|
||||
srt_subtitles_timecode(item['endMillis'] / 1000.0),
|
||||
item['text'],
|
||||
) for num, item in enumerate(subs, 1))
|
||||
|
||||
|
||||
@@ -4,7 +4,9 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
unified_strdate,
|
||||
ExtractorError,
|
||||
@@ -14,9 +16,9 @@ from ..utils import (
|
||||
class LifeNewsIE(InfoExtractor):
|
||||
IE_NAME = 'lifenews'
|
||||
IE_DESC = 'LIFE | NEWS'
|
||||
_VALID_URL = r'http://lifenews\.ru/(?:mobile/)?news/(?P<id>\d+)'
|
||||
_VALID_URL = r'http://lifenews\.ru/(?:mobile/)?(?P<section>news|video)/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://lifenews.ru/news/126342',
|
||||
'md5': 'e1b50a5c5fb98a6a544250f2e0db570a',
|
||||
'info_dict': {
|
||||
@@ -27,16 +29,47 @@ class LifeNewsIE(InfoExtractor):
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
'upload_date': '20140130',
|
||||
}
|
||||
}
|
||||
}, {
|
||||
# video in <iframe>
|
||||
'url': 'http://lifenews.ru/news/152125',
|
||||
'md5': '77d19a6f0886cd76bdbf44b4d971a273',
|
||||
'info_dict': {
|
||||
'id': '152125',
|
||||
'ext': 'mp4',
|
||||
'title': 'В Сети появилось видео захвата «Правым сектором» колхозных полей ',
|
||||
'description': 'Жители двух поселков Днепропетровской области не простили радикалам угрозу лишения плодородных земель и пошли в лобовую. ',
|
||||
'upload_date': '20150402',
|
||||
'uploader': 'embed.life.ru',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://lifenews.ru/news/153461',
|
||||
'md5': '9b6ef8bc0ffa25aebc8bdb40d89ab795',
|
||||
'info_dict': {
|
||||
'id': '153461',
|
||||
'ext': 'mp4',
|
||||
'title': 'В Москве спасли потерявшегося медвежонка, который спрятался на дереве',
|
||||
'description': 'Маленький хищник не смог найти дорогу домой и обрел временное убежище на тополе недалеко от жилого массива, пока его не нашла соседская собака.',
|
||||
'upload_date': '20150505',
|
||||
'uploader': 'embed.life.ru',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://lifenews.ru/video/13035',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
section = mobj.group('section')
|
||||
|
||||
webpage = self._download_webpage('http://lifenews.ru/news/%s' % video_id, video_id, 'Downloading page')
|
||||
webpage = self._download_webpage(
|
||||
'http://lifenews.ru/%s/%s' % (section, video_id),
|
||||
video_id, 'Downloading page')
|
||||
|
||||
videos = re.findall(r'<video.*?poster="(?P<poster>[^"]+)".*?src="(?P<video>[^"]+)".*?></video>', webpage)
|
||||
if not videos:
|
||||
iframe_link = self._html_search_regex(
|
||||
'<iframe[^>]+src=["\']([^"\']+)["\']', webpage, 'iframe link', default=None)
|
||||
if not videos and not iframe_link:
|
||||
raise ExtractorError('No media links available for %s' % video_id)
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
@@ -47,28 +80,90 @@ class LifeNewsIE(InfoExtractor):
|
||||
description = self._og_search_description(webpage)
|
||||
|
||||
view_count = self._html_search_regex(
|
||||
r'<div class=\'views\'>(\d+)</div>', webpage, 'view count', fatal=False)
|
||||
r'<div class=\'views\'>\s*(\d+)\s*</div>', webpage, 'view count', fatal=False)
|
||||
comment_count = self._html_search_regex(
|
||||
r'<div class=\'comments\'>\s*<span class=\'counter\'>(\d+)</span>', webpage, 'comment count', fatal=False)
|
||||
r'<div class=\'comments\'>\s*<span class=\'counter\'>\s*(\d+)\s*</span>', webpage, 'comment count', fatal=False)
|
||||
|
||||
upload_date = self._html_search_regex(
|
||||
r'<time datetime=\'([^\']+)\'>', webpage, 'upload date', fatal=False)
|
||||
if upload_date is not None:
|
||||
upload_date = unified_strdate(upload_date)
|
||||
|
||||
common_info = {
|
||||
'description': description,
|
||||
'view_count': int_or_none(view_count),
|
||||
'comment_count': int_or_none(comment_count),
|
||||
'upload_date': upload_date,
|
||||
}
|
||||
|
||||
def make_entry(video_id, media, video_number=None):
|
||||
return {
|
||||
cur_info = dict(common_info)
|
||||
cur_info.update({
|
||||
'id': video_id,
|
||||
'url': media[1],
|
||||
'thumbnail': media[0],
|
||||
'title': title if video_number is None else '%s-video%s' % (title, video_number),
|
||||
'description': description,
|
||||
'view_count': int_or_none(view_count),
|
||||
'comment_count': int_or_none(comment_count),
|
||||
'upload_date': upload_date,
|
||||
}
|
||||
})
|
||||
return cur_info
|
||||
|
||||
if iframe_link:
|
||||
iframe_link = self._proto_relative_url(iframe_link, 'http:')
|
||||
cur_info = dict(common_info)
|
||||
cur_info.update({
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'url': iframe_link,
|
||||
})
|
||||
return cur_info
|
||||
|
||||
if len(videos) == 1:
|
||||
return make_entry(video_id, videos[0])
|
||||
else:
|
||||
return [make_entry(video_id, media, video_number + 1) for video_number, media in enumerate(videos)]
|
||||
|
||||
|
||||
class LifeEmbedIE(InfoExtractor):
|
||||
IE_NAME = 'life:embed'
|
||||
_VALID_URL = r'http://embed\.life\.ru/embed/(?P<id>[\da-f]{32})'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://embed.life.ru/embed/e50c2dec2867350528e2574c899b8291',
|
||||
'md5': 'b889715c9e49cb1981281d0e5458fbbe',
|
||||
'info_dict': {
|
||||
'id': 'e50c2dec2867350528e2574c899b8291',
|
||||
'ext': 'mp4',
|
||||
'title': 'e50c2dec2867350528e2574c899b8291',
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
formats = []
|
||||
for video_url in re.findall(r'"file"\s*:\s*"([^"]+)', webpage):
|
||||
video_url = compat_urlparse.urljoin(url, video_url)
|
||||
ext = determine_ext(video_url)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
video_url, video_id, 'mp4', m3u8_id='m3u8'))
|
||||
else:
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'format_id': ext,
|
||||
'preference': 1,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnail = self._search_regex(
|
||||
r'"image"\s*:\s*"([^"]+)', webpage, 'thumbnail', default=None)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_id,
|
||||
'thumbnail': thumbnail,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
@@ -194,23 +194,19 @@ class LivestreamIE(InfoExtractor):
|
||||
# The original version of Livestream uses a different system
|
||||
class LivestreamOriginalIE(InfoExtractor):
|
||||
IE_NAME = 'livestream:original'
|
||||
_VALID_URL = r'''(?x)https?://www\.livestream\.com/
|
||||
_VALID_URL = r'''(?x)https?://original\.livestream\.com/
|
||||
(?P<user>[^/]+)/(?P<type>video|folder)
|
||||
(?:\?.*?Id=|/)(?P<id>.*?)(&|$)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'http://www.livestream.com/dealbook/video?clipId=pla_8aa4a3f1-ba15-46a4-893b-902210e138fb',
|
||||
'url': 'http://original.livestream.com/dealbook/video?clipId=pla_8aa4a3f1-ba15-46a4-893b-902210e138fb',
|
||||
'info_dict': {
|
||||
'id': 'pla_8aa4a3f1-ba15-46a4-893b-902210e138fb',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Spark 1 (BitCoin) with Cameron Winklevoss & Tyler Winklevoss of Winklevoss Capital',
|
||||
},
|
||||
'params': {
|
||||
# rtmp
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.livestream.com/newplay/folder?dirId=a07bf706-d0e4-4e75-a747-b021d84f2fd3',
|
||||
'url': 'https://original.livestream.com/newplay/folder?dirId=a07bf706-d0e4-4e75-a747-b021d84f2fd3',
|
||||
'info_dict': {
|
||||
'id': 'a07bf706-d0e4-4e75-a747-b021d84f2fd3',
|
||||
},
|
||||
@@ -221,19 +217,17 @@ class LivestreamOriginalIE(InfoExtractor):
|
||||
api_url = 'http://x{0}x.api.channel.livestream.com/2.0/clipdetails?extendedInfo=true&id={1}'.format(user, video_id)
|
||||
|
||||
info = self._download_xml(api_url, video_id)
|
||||
# this url is used on mobile devices
|
||||
stream_url = 'http://x{0}x.api.channel.livestream.com/3.0/getstream.json?id={1}'.format(user, video_id)
|
||||
stream_info = self._download_json(stream_url, video_id)
|
||||
item = info.find('channel').find('item')
|
||||
ns = {'media': 'http://search.yahoo.com/mrss'}
|
||||
thumbnail_url = item.find(xpath_with_ns('media:thumbnail', ns)).attrib['url']
|
||||
# Remove the extension and number from the path (like 1.jpg)
|
||||
path = self._search_regex(r'(user-files/.+)_.*?\.jpg$', thumbnail_url, 'path')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': item.find('title').text,
|
||||
'url': 'rtmp://extondemand.livestream.com/ondemand',
|
||||
'play_path': 'trans/dv15/mogulus-{0}'.format(path),
|
||||
'player_url': 'http://static.livestream.com/chromelessPlayer/v21/playerapi.swf?hash=5uetk&v=0803&classid=D27CDB6E-AE6D-11cf-96B8-444553540000&jsEnabled=false&wmode=opaque',
|
||||
'ext': 'flv',
|
||||
'url': stream_info['progressiveUrl'],
|
||||
'thumbnail': thumbnail_url,
|
||||
}
|
||||
|
||||
|
||||
@@ -10,7 +10,21 @@ from ..utils import (
|
||||
|
||||
|
||||
class MLBIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://m(?:lb)?\.(?:[\da-z_-]+\.)?mlb\.com/(?:(?:.*?/)?video/(?:topic/[\da-z_-]+/)?v|(?:shared/video/embed/embed\.html|[^/]+/video/play\.jsp)\?.*?\bcontent_id=)(?P<id>n?\d+)'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:[\da-z_-]+\.)*mlb\.com/
|
||||
(?:
|
||||
(?:
|
||||
(?:.*?/)?video/(?:topic/[\da-z_-]+/)?v|
|
||||
(?:
|
||||
shared/video/embed/(?:embed|m-internal-embed)\.html|
|
||||
(?:[^/]+/)+(?:play|index)\.jsp|
|
||||
)\?.*?\bcontent_id=
|
||||
)
|
||||
(?P<id>n?\d+)|
|
||||
(?:[^/]+/)*(?P<path>[^/]+)
|
||||
)
|
||||
'''
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://m.mlb.com/sea/video/topic/51231442/v34698933/nymsea-ackley-robs-a-home-run-with-an-amazing-catch/?c_id=sea',
|
||||
@@ -68,6 +82,18 @@ class MLBIE(InfoExtractor):
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://m.mlb.com/news/article/118550098/blue-jays-kevin-pillar-goes-spidey-up-the-wall-to-rob-tim-beckham-of-a-homer',
|
||||
'md5': 'b190e70141fb9a1552a85426b4da1b5d',
|
||||
'info_dict': {
|
||||
'id': '75609783',
|
||||
'ext': 'mp4',
|
||||
'title': 'Must C: Pillar climbs for catch',
|
||||
'description': '4/15/15: Blue Jays outfielder Kevin Pillar continues his defensive dominance by climbing the wall in left to rob Tim Beckham of a home run',
|
||||
'timestamp': 1429124820,
|
||||
'upload_date': '20150415',
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://m.mlb.com/shared/video/embed/embed.html?content_id=35692085&topic_id=6479266&width=400&height=224&property=mlb',
|
||||
'only_matching': True,
|
||||
@@ -83,6 +109,15 @@ class MLBIE(InfoExtractor):
|
||||
{
|
||||
'url': 'http://m.cardinals.mlb.com/stl/video/v51175783/atlstl-piscotty-makes-great-sliding-catch-on-line/?partnerId=as_mlb_20150321_42500876&adbid=579409712979910656&adbpl=tw&adbpr=52847728',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
# From http://m.mlb.com/news/article/118550098/blue-jays-kevin-pillar-goes-spidey-up-the-wall-to-rob-tim-beckham-of-a-homer
|
||||
'url': 'http://mlb.mlb.com/shared/video/embed/m-internal-embed.html?content_id=75609783&property=mlb&autoplay=true&hashmode=false&siteSection=mlb/multimedia/article_118550098/article_embed&club=mlb',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'http://washington.nationals.mlb.com/mlb/gameday/index.jsp?c_id=was&gid=2015_05_09_atlmlb_wasmlb_1&lang=en&content_id=108309983&mode=video#',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
|
||||
@@ -90,6 +125,12 @@ class MLBIE(InfoExtractor):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
if not video_id:
|
||||
video_path = mobj.group('path')
|
||||
webpage = self._download_webpage(url, video_path)
|
||||
video_id = self._search_regex(
|
||||
[r'data-video-?id="(\d+)"', r'content_id=(\d+)'], webpage, 'video id')
|
||||
|
||||
detail = self._download_xml(
|
||||
'http://m.mlb.com/gen/multimedia/detail/%s/%s/%s/%s.xml'
|
||||
% (video_id[-3], video_id[-2], video_id[-1], video_id), video_id)
|
||||
|
||||
@@ -10,6 +10,8 @@ from ..compat import (
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
find_xpath_attr,
|
||||
lowercase_escape,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
|
||||
@@ -37,14 +39,32 @@ class NBCIE(InfoExtractor):
|
||||
},
|
||||
'skip': 'Only works from US',
|
||||
},
|
||||
{
|
||||
'url': 'http://www.nbc.com/saturday-night-live/video/star-wars-teaser/2832821',
|
||||
'info_dict': {
|
||||
'id': '8iUuyzWDdYUZ',
|
||||
'ext': 'flv',
|
||||
'title': 'Star Wars Teaser',
|
||||
'description': 'md5:0b40f9cbde5b671a7ff62fceccc4f442',
|
||||
},
|
||||
'skip': 'Only works from US',
|
||||
},
|
||||
{
|
||||
# This video has expired but with an escaped embedURL
|
||||
'url': 'http://www.nbc.com/parenthood/episode-guide/season-5/just-like-at-home/515',
|
||||
'skip': 'Expired'
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
theplatform_url = self._search_regex(
|
||||
'(?:class="video-player video-player-full" data-mpx-url|class="player" src)="(.*?)"',
|
||||
webpage, 'theplatform url').replace('_no_endcard', '')
|
||||
theplatform_url = unescapeHTML(lowercase_escape(self._html_search_regex(
|
||||
[
|
||||
r'(?:class="video-player video-player-full" data-mpx-url|class="player" src)="(.*?)"',
|
||||
r'"embedURL"\s*:\s*"([^"]+)"'
|
||||
],
|
||||
webpage, 'theplatform url').replace('_no_endcard', '').replace('\\/', '/')))
|
||||
if theplatform_url.startswith('//'):
|
||||
theplatform_url = 'http:' + theplatform_url
|
||||
return self.url_result(theplatform_url)
|
||||
|
||||
@@ -8,41 +8,11 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
qualities,
|
||||
parse_duration,
|
||||
)
|
||||
|
||||
|
||||
class NDRIE(InfoExtractor):
|
||||
IE_NAME = 'ndr'
|
||||
IE_DESC = 'NDR.de - Mediathek'
|
||||
_VALID_URL = r'https?://www\.ndr\.de/.+?(?P<id>\d+)\.html'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.ndr.de/fernsehen/sendungen/nordmagazin/Kartoffeltage-in-der-Lewitz,nordmagazin25866.html',
|
||||
'md5': '5bc5f5b92c82c0f8b26cddca34f8bb2c',
|
||||
'note': 'Video file',
|
||||
'info_dict': {
|
||||
'id': '25866',
|
||||
'ext': 'mp4',
|
||||
'title': 'Kartoffeltage in der Lewitz',
|
||||
'description': 'md5:48c4c04dde604c8a9971b3d4e3b9eaa8',
|
||||
'duration': 166,
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://www.ndr.de/info/audio51535.html',
|
||||
'md5': 'bb3cd38e24fbcc866d13b50ca59307b8',
|
||||
'note': 'Audio file',
|
||||
'info_dict': {
|
||||
'id': '51535',
|
||||
'ext': 'mp3',
|
||||
'title': 'La Valette entgeht der Hinrichtung',
|
||||
'description': 'md5:22f9541913a40fe50091d5cdd7c9f536',
|
||||
'duration': 884,
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
class NDRBaseIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
@@ -54,7 +24,11 @@ class NDRIE(InfoExtractor):
|
||||
if description:
|
||||
description = description.strip()
|
||||
|
||||
duration = int_or_none(self._html_search_regex(r'duration: (\d+),\n', page, 'duration', fatal=False))
|
||||
duration = int_or_none(self._html_search_regex(r'duration: (\d+),\n', page, 'duration', default=None))
|
||||
if not duration:
|
||||
duration = parse_duration(self._html_search_regex(
|
||||
r'(<span class="min">\d+</span>:<span class="sec">\d+</span>)',
|
||||
page, 'duration', default=None))
|
||||
|
||||
formats = []
|
||||
|
||||
@@ -92,3 +66,65 @@ class NDRIE(InfoExtractor):
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class NDRIE(NDRBaseIE):
|
||||
IE_NAME = 'ndr'
|
||||
IE_DESC = 'NDR.de - Mediathek'
|
||||
_VALID_URL = r'https?://www\.ndr\.de/.+?(?P<id>\d+)\.html'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.ndr.de/fernsehen/sendungen/nordmagazin/Kartoffeltage-in-der-Lewitz,nordmagazin25866.html',
|
||||
'md5': '5bc5f5b92c82c0f8b26cddca34f8bb2c',
|
||||
'note': 'Video file',
|
||||
'info_dict': {
|
||||
'id': '25866',
|
||||
'ext': 'mp4',
|
||||
'title': 'Kartoffeltage in der Lewitz',
|
||||
'description': 'md5:48c4c04dde604c8a9971b3d4e3b9eaa8',
|
||||
'duration': 166,
|
||||
},
|
||||
'skip': '404 Not found',
|
||||
},
|
||||
{
|
||||
'url': 'http://www.ndr.de/fernsehen/Party-Poette-und-Parade,hafengeburtstag988.html',
|
||||
'md5': 'dadc003c55ae12a5d2f6bd436cd73f59',
|
||||
'info_dict': {
|
||||
'id': '988',
|
||||
'ext': 'mp4',
|
||||
'title': 'Party, Pötte und Parade',
|
||||
'description': 'Hunderttausende feiern zwischen Speicherstadt und St. Pauli den 826. Hafengeburtstag. Die NDR Sondersendung zeigt die schönsten und spektakulärsten Bilder vom Auftakt.',
|
||||
'duration': 3498,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.ndr.de/info/audio51535.html',
|
||||
'md5': 'bb3cd38e24fbcc866d13b50ca59307b8',
|
||||
'note': 'Audio file',
|
||||
'info_dict': {
|
||||
'id': '51535',
|
||||
'ext': 'mp3',
|
||||
'title': 'La Valette entgeht der Hinrichtung',
|
||||
'description': 'md5:22f9541913a40fe50091d5cdd7c9f536',
|
||||
'duration': 884,
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
|
||||
class NJoyIE(NDRBaseIE):
|
||||
IE_NAME = 'N-JOY'
|
||||
_VALID_URL = r'https?://www\.n-joy\.de/.+?(?P<id>\d+)\.html'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.n-joy.de/entertainment/comedy/comedy_contest/Benaissa-beim-NDR-Comedy-Contest,comedycontest2480.html',
|
||||
'md5': 'cb63be60cd6f9dd75218803146d8dc67',
|
||||
'info_dict': {
|
||||
'id': '2480',
|
||||
'ext': 'mp4',
|
||||
'title': 'Benaissa beim NDR Comedy Contest',
|
||||
'description': 'Von seinem sehr "behaarten" Leben lässt sich Benaissa trotz aller Schwierigkeiten nicht unterkriegen.',
|
||||
'duration': 654,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -49,7 +49,7 @@ class NetzkinoIE(InfoExtractor):
|
||||
'http://www.netzkino.de/beta/dist/production.min.js', video_id,
|
||||
note='Downloading player code')
|
||||
avo_js = self._search_regex(
|
||||
r'window\.avoCore\s*=.*?urlTemplate:\s*(\{.*?"\})',
|
||||
r'var urlTemplate=(\{.*?"\})',
|
||||
production_js, 'URL templates')
|
||||
templates = self._parse_json(
|
||||
avo_js, video_id, transform_source=js_to_json)
|
||||
|
||||
@@ -21,6 +21,9 @@ class NHLBaseInfoExtractor(InfoExtractor):
|
||||
return json_string.replace('\\\'', '\'')
|
||||
|
||||
def _real_extract_video(self, video_id):
|
||||
vid_parts = video_id.split(',')
|
||||
if len(vid_parts) == 3:
|
||||
video_id = '%s0%s%s-X-h' % (vid_parts[0][:4], vid_parts[1], vid_parts[2].rjust(4, '0'))
|
||||
json_url = 'http://video.nhl.com/videocenter/servlets/playlist?ids=%s&format=json' % video_id
|
||||
data = self._download_json(
|
||||
json_url, video_id, transform_source=self._fix_json)
|
||||
@@ -47,7 +50,7 @@ class NHLBaseInfoExtractor(InfoExtractor):
|
||||
video_url = initial_video_url
|
||||
|
||||
join = compat_urlparse.urljoin
|
||||
return {
|
||||
ret = {
|
||||
'id': video_id,
|
||||
'title': info['name'],
|
||||
'url': video_url,
|
||||
@@ -56,11 +59,20 @@ class NHLBaseInfoExtractor(InfoExtractor):
|
||||
'thumbnail': join(join(video_url, '/u/'), info['bigImage']),
|
||||
'upload_date': unified_strdate(info['releaseDate'].split('.')[0]),
|
||||
}
|
||||
if video_url.startswith('rtmp:'):
|
||||
mobj = re.match(r'(?P<tc_url>rtmp://[^/]+/(?P<app>[a-z0-9/]+))/(?P<play_path>mp4:.*)', video_url)
|
||||
ret.update({
|
||||
'tc_url': mobj.group('tc_url'),
|
||||
'play_path': mobj.group('play_path'),
|
||||
'app': mobj.group('app'),
|
||||
'no_resume': True,
|
||||
})
|
||||
return ret
|
||||
|
||||
|
||||
class NHLIE(NHLBaseInfoExtractor):
|
||||
IE_NAME = 'nhl.com'
|
||||
_VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/(?:console)?(?:\?(?:.*?[?&])?)id=(?P<id>[-0-9a-zA-Z]+)'
|
||||
_VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/(?:console)?(?:\?(?:.*?[?&])?)(?:id|hlg)=(?P<id>[-0-9a-zA-Z,]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://video.canucks.nhl.com/videocenter/console?catid=6?id=453614',
|
||||
@@ -101,6 +113,29 @@ class NHLIE(NHLBaseInfoExtractor):
|
||||
}, {
|
||||
'url': 'http://video.nhl.com/videocenter/?id=736722',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://video.nhl.com/videocenter/console?hlg=20142015,2,299&lang=en',
|
||||
'md5': '076fcb88c255154aacbf0a7accc3f340',
|
||||
'info_dict': {
|
||||
'id': '2014020299-X-h',
|
||||
'ext': 'mp4',
|
||||
'title': 'Penguins at Islanders / Game Highlights',
|
||||
'description': 'Home broadcast - Pittsburgh Penguins at New York Islanders - November 22, 2014',
|
||||
'duration': 268,
|
||||
'upload_date': '20141122',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://video.oilers.nhl.com/videocenter/console?id=691469&catid=4',
|
||||
'info_dict': {
|
||||
'id': '691469',
|
||||
'ext': 'mp4',
|
||||
'title': 'RAW | Craig MacTavish Full Press Conference',
|
||||
'description': 'Oilers GM Craig MacTavish addresses the media at Rexall Place on Friday.',
|
||||
'upload_date': '20141205',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # Requires rtmpdump
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -3,6 +3,7 @@ from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
import datetime
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
@@ -14,7 +15,9 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
unified_strdate,
|
||||
parse_iso8601,
|
||||
xpath_text,
|
||||
determine_ext,
|
||||
)
|
||||
|
||||
|
||||
@@ -32,30 +35,50 @@ class NiconicoIE(InfoExtractor):
|
||||
'uploader': 'takuya0301',
|
||||
'uploader_id': '2698420',
|
||||
'upload_date': '20131123',
|
||||
'timestamp': 1385182762,
|
||||
'description': '(c) copyright 2008, Blender Foundation / www.bigbuckbunny.org',
|
||||
'duration': 33,
|
||||
},
|
||||
'params': {
|
||||
'username': 'ydl.niconico@gmail.com',
|
||||
'password': 'youtube-dl',
|
||||
},
|
||||
}, {
|
||||
# File downloaded with and without credentials are different, so omit
|
||||
# the md5 field
|
||||
'url': 'http://www.nicovideo.jp/watch/nm14296458',
|
||||
'md5': '8db08e0158457cf852a31519fceea5bc',
|
||||
'info_dict': {
|
||||
'id': 'nm14296458',
|
||||
'ext': 'swf',
|
||||
'title': '【鏡音リン】Dance on media【オリジナル】take2!',
|
||||
'description': 'md5:',
|
||||
'description': 'md5:689f066d74610b3b22e0f1739add0f58',
|
||||
'uploader': 'りょうた',
|
||||
'uploader_id': '18822557',
|
||||
'upload_date': '20110429',
|
||||
'timestamp': 1304065916,
|
||||
'duration': 209,
|
||||
},
|
||||
'params': {
|
||||
'username': 'ydl.niconico@gmail.com',
|
||||
'password': 'youtube-dl',
|
||||
}, {
|
||||
# 'video exists but is marked as "deleted"
|
||||
# md5 is unstable
|
||||
'url': 'http://www.nicovideo.jp/watch/sm10000',
|
||||
'info_dict': {
|
||||
'id': 'sm10000',
|
||||
'ext': 'unknown_video',
|
||||
'description': 'deleted',
|
||||
'title': 'ドラえもんエターナル第3話「決戦第3新東京市」<前編>',
|
||||
'upload_date': '20071224',
|
||||
'timestamp': 1198527840, # timestamp field has different value if logged in
|
||||
'duration': 304,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.nicovideo.jp/watch/so22543406',
|
||||
'info_dict': {
|
||||
'id': '1388129933',
|
||||
'ext': 'mp4',
|
||||
'title': '【第1回】RADIOアニメロミックス ラブライブ!~のぞえりRadio Garden~',
|
||||
'description': 'md5:b27d224bb0ff53d3c8269e9f8b561cf1',
|
||||
'timestamp': 1388851200,
|
||||
'upload_date': '20140104',
|
||||
'uploader': 'アニメロチャンネル',
|
||||
'uploader_id': '312',
|
||||
}
|
||||
}]
|
||||
|
||||
_VALID_URL = r'https?://(?:www\.|secure\.)?nicovideo\.jp/watch/(?P<id>(?:[a-z]{2})?[0-9]+)'
|
||||
@@ -95,9 +118,13 @@ class NiconicoIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
# Get video webpage. We are not actually interested in it, but need
|
||||
# the cookies in order to be able to download the info webpage
|
||||
self._download_webpage('http://www.nicovideo.jp/watch/' + video_id, video_id)
|
||||
# Get video webpage. We are not actually interested in it for normal
|
||||
# cases, but need the cookies in order to be able to download the
|
||||
# info webpage
|
||||
webpage, handle = self._download_webpage_handle(
|
||||
'http://www.nicovideo.jp/watch/' + video_id, video_id)
|
||||
if video_id.startswith('so'):
|
||||
video_id = self._match_id(handle.geturl())
|
||||
|
||||
video_info = self._download_xml(
|
||||
'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id, video_id,
|
||||
@@ -127,22 +154,78 @@ class NiconicoIE(InfoExtractor):
|
||||
flv_info_request, video_id,
|
||||
note='Downloading flv info', errnote='Unable to download flv info')
|
||||
|
||||
if 'deleted=' in flv_info_webpage:
|
||||
raise ExtractorError('The video has been deleted.',
|
||||
expected=True)
|
||||
video_real_url = compat_urlparse.parse_qs(flv_info_webpage)['url'][0]
|
||||
flv_info = compat_urlparse.parse_qs(flv_info_webpage)
|
||||
if 'url' not in flv_info:
|
||||
if 'deleted' in flv_info:
|
||||
raise ExtractorError('The video has been deleted.',
|
||||
expected=True)
|
||||
else:
|
||||
raise ExtractorError('Unable to find video URL')
|
||||
|
||||
video_real_url = flv_info['url'][0]
|
||||
|
||||
# Start extracting information
|
||||
title = video_info.find('.//title').text
|
||||
extension = video_info.find('.//movie_type').text
|
||||
title = xpath_text(video_info, './/title')
|
||||
if not title:
|
||||
title = self._og_search_title(webpage, default=None)
|
||||
if not title:
|
||||
title = self._html_search_regex(
|
||||
r'<span[^>]+class="videoHeaderTitle"[^>]*>([^<]+)</span>',
|
||||
webpage, 'video title')
|
||||
|
||||
watch_api_data_string = self._html_search_regex(
|
||||
r'<div[^>]+id="watchAPIDataContainer"[^>]+>([^<]+)</div>',
|
||||
webpage, 'watch api data', default=None)
|
||||
watch_api_data = self._parse_json(watch_api_data_string, video_id) if watch_api_data_string else {}
|
||||
video_detail = watch_api_data.get('videoDetail', {})
|
||||
|
||||
extension = xpath_text(video_info, './/movie_type')
|
||||
if not extension:
|
||||
extension = determine_ext(video_real_url)
|
||||
video_format = extension.upper()
|
||||
thumbnail = video_info.find('.//thumbnail_url').text
|
||||
description = video_info.find('.//description').text
|
||||
upload_date = unified_strdate(video_info.find('.//first_retrieve').text.split('+')[0])
|
||||
view_count = int_or_none(video_info.find('.//view_counter').text)
|
||||
comment_count = int_or_none(video_info.find('.//comment_num').text)
|
||||
duration = parse_duration(video_info.find('.//length').text)
|
||||
webpage_url = video_info.find('.//watch_url').text
|
||||
|
||||
thumbnail = (
|
||||
xpath_text(video_info, './/thumbnail_url') or
|
||||
self._html_search_meta('image', webpage, 'thumbnail', default=None) or
|
||||
video_detail.get('thumbnail'))
|
||||
|
||||
description = xpath_text(video_info, './/description')
|
||||
|
||||
timestamp = parse_iso8601(xpath_text(video_info, './/first_retrieve'))
|
||||
if not timestamp:
|
||||
match = self._html_search_meta('datePublished', webpage, 'date published', default=None)
|
||||
if match:
|
||||
timestamp = parse_iso8601(match.replace('+', ':00+'))
|
||||
if not timestamp and video_detail.get('postedAt'):
|
||||
timestamp = parse_iso8601(
|
||||
video_detail['postedAt'].replace('/', '-'),
|
||||
delimiter=' ', timezone=datetime.timedelta(hours=9))
|
||||
|
||||
view_count = int_or_none(xpath_text(video_info, './/view_counter'))
|
||||
if not view_count:
|
||||
match = self._html_search_regex(
|
||||
r'>Views: <strong[^>]*>([^<]+)</strong>',
|
||||
webpage, 'view count', default=None)
|
||||
if match:
|
||||
view_count = int_or_none(match.replace(',', ''))
|
||||
view_count = view_count or video_detail.get('viewCount')
|
||||
|
||||
comment_count = int_or_none(xpath_text(video_info, './/comment_num'))
|
||||
if not comment_count:
|
||||
match = self._html_search_regex(
|
||||
r'>Comments: <strong[^>]*>([^<]+)</strong>',
|
||||
webpage, 'comment count', default=None)
|
||||
if match:
|
||||
comment_count = int_or_none(match.replace(',', ''))
|
||||
comment_count = comment_count or video_detail.get('commentCount')
|
||||
|
||||
duration = (parse_duration(
|
||||
xpath_text(video_info, './/length') or
|
||||
self._html_search_meta(
|
||||
'video:duration', webpage, 'video duration', default=None)) or
|
||||
video_detail.get('length'))
|
||||
|
||||
webpage_url = xpath_text(video_info, './/watch_url') or url
|
||||
|
||||
if video_info.find('.//ch_id') is not None:
|
||||
uploader_id = video_info.find('.//ch_id').text
|
||||
@@ -162,7 +245,7 @@ class NiconicoIE(InfoExtractor):
|
||||
'thumbnail': thumbnail,
|
||||
'description': description,
|
||||
'uploader': uploader,
|
||||
'upload_date': upload_date,
|
||||
'timestamp': timestamp,
|
||||
'uploader_id': uploader_id,
|
||||
'view_count': view_count,
|
||||
'comment_count': comment_count,
|
||||
|
||||
@@ -14,7 +14,9 @@ from ..compat import (
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
ExtractorError,
|
||||
unified_strdate,
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
@@ -25,21 +27,38 @@ class NocoIE(InfoExtractor):
|
||||
_SUB_LANG_TEMPLATE = '&sub_lang=%s'
|
||||
_NETRC_MACHINE = 'noco'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://noco.tv/emission/11538/nolife/ami-ami-idol-hello-france/',
|
||||
'md5': '0a993f0058ddbcd902630b2047ef710e',
|
||||
'info_dict': {
|
||||
'id': '11538',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ami Ami Idol - Hello! France',
|
||||
'description': 'md5:4eaab46ab68fa4197a317a88a53d3b86',
|
||||
'upload_date': '20140412',
|
||||
'uploader': 'Nolife',
|
||||
'uploader_id': 'NOL',
|
||||
'duration': 2851.2,
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://noco.tv/emission/11538/nolife/ami-ami-idol-hello-france/',
|
||||
'md5': '0a993f0058ddbcd902630b2047ef710e',
|
||||
'info_dict': {
|
||||
'id': '11538',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ami Ami Idol - Hello! France',
|
||||
'description': 'md5:4eaab46ab68fa4197a317a88a53d3b86',
|
||||
'upload_date': '20140412',
|
||||
'uploader': 'Nolife',
|
||||
'uploader_id': 'NOL',
|
||||
'duration': 2851.2,
|
||||
},
|
||||
'skip': 'Requires noco account',
|
||||
},
|
||||
'skip': 'Requires noco account',
|
||||
}
|
||||
{
|
||||
'url': 'http://noco.tv/emission/12610/lbl42/the-guild/s01e01-wake-up-call',
|
||||
'md5': 'c190f1f48e313c55838f1f412225934d',
|
||||
'info_dict': {
|
||||
'id': '12610',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Guild #1 - Wake-Up Call',
|
||||
'timestamp': 1403863200,
|
||||
'upload_date': '20140627',
|
||||
'uploader': 'LBL42',
|
||||
'uploader_id': 'LBL',
|
||||
'duration': 233.023,
|
||||
},
|
||||
'skip': 'Requires noco account',
|
||||
}
|
||||
]
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
@@ -90,51 +109,66 @@ class NocoIE(InfoExtractor):
|
||||
'shows/%s/medias' % video_id,
|
||||
video_id, 'Downloading video JSON')
|
||||
|
||||
show = self._call_api(
|
||||
'shows/by_id/%s' % video_id,
|
||||
video_id, 'Downloading show JSON')[0]
|
||||
|
||||
options = self._call_api(
|
||||
'users/init', video_id,
|
||||
'Downloading user options JSON')['options']
|
||||
audio_lang_pref = options.get('audio_language') or options.get('language', 'fr')
|
||||
|
||||
if audio_lang_pref == 'original':
|
||||
audio_lang_pref = show['original_lang']
|
||||
if len(medias) == 1:
|
||||
audio_lang_pref = list(medias.keys())[0]
|
||||
elif audio_lang_pref not in medias:
|
||||
audio_lang_pref = 'fr'
|
||||
|
||||
qualities = self._call_api(
|
||||
'qualities',
|
||||
video_id, 'Downloading qualities JSON')
|
||||
|
||||
formats = []
|
||||
|
||||
for lang, lang_dict in medias['fr']['video_list'].items():
|
||||
for format_id, fmt in lang_dict['quality_list'].items():
|
||||
format_id_extended = '%s-%s' % (lang, format_id) if lang != 'none' else format_id
|
||||
for audio_lang, audio_lang_dict in medias.items():
|
||||
preference = 1 if audio_lang == audio_lang_pref else 0
|
||||
for sub_lang, lang_dict in audio_lang_dict['video_list'].items():
|
||||
for format_id, fmt in lang_dict['quality_list'].items():
|
||||
format_id_extended = 'audio-%s_sub-%s_%s' % (audio_lang, sub_lang, format_id)
|
||||
|
||||
video = self._call_api(
|
||||
'shows/%s/video/%s/fr' % (video_id, format_id.lower()),
|
||||
video_id, 'Downloading %s video JSON' % format_id_extended,
|
||||
lang if lang != 'none' else None)
|
||||
video = self._call_api(
|
||||
'shows/%s/video/%s/%s' % (video_id, format_id.lower(), audio_lang),
|
||||
video_id, 'Downloading %s video JSON' % format_id_extended,
|
||||
sub_lang if sub_lang != 'none' else None)
|
||||
|
||||
file_url = video['file']
|
||||
if not file_url:
|
||||
continue
|
||||
file_url = video['file']
|
||||
if not file_url:
|
||||
continue
|
||||
|
||||
if file_url in ['forbidden', 'not found']:
|
||||
popmessage = video['popmessage']
|
||||
self._raise_error(popmessage['title'], popmessage['message'])
|
||||
if file_url in ['forbidden', 'not found']:
|
||||
popmessage = video['popmessage']
|
||||
self._raise_error(popmessage['title'], popmessage['message'])
|
||||
|
||||
formats.append({
|
||||
'url': file_url,
|
||||
'format_id': format_id_extended,
|
||||
'width': fmt['res_width'],
|
||||
'height': fmt['res_lines'],
|
||||
'abr': fmt['audiobitrate'],
|
||||
'vbr': fmt['videobitrate'],
|
||||
'filesize': fmt['filesize'],
|
||||
'format_note': qualities[format_id]['quality_name'],
|
||||
'preference': qualities[format_id]['priority'],
|
||||
})
|
||||
formats.append({
|
||||
'url': file_url,
|
||||
'format_id': format_id_extended,
|
||||
'width': int_or_none(fmt.get('res_width')),
|
||||
'height': int_or_none(fmt.get('res_lines')),
|
||||
'abr': int_or_none(fmt.get('audiobitrate')),
|
||||
'vbr': int_or_none(fmt.get('videobitrate')),
|
||||
'filesize': int_or_none(fmt.get('filesize')),
|
||||
'format_note': qualities[format_id].get('quality_name'),
|
||||
'quality': qualities[format_id].get('priority'),
|
||||
'preference': preference,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
show = self._call_api(
|
||||
'shows/by_id/%s' % video_id,
|
||||
video_id, 'Downloading show JSON')[0]
|
||||
|
||||
upload_date = unified_strdate(show['online_date_start_utc'])
|
||||
uploader = show['partner_name']
|
||||
uploader_id = show['partner_key']
|
||||
duration = show['duration_ms'] / 1000.0
|
||||
timestamp = parse_iso8601(show.get('online_date_start_utc'), ' ')
|
||||
uploader = show.get('partner_name')
|
||||
uploader_id = show.get('partner_key')
|
||||
duration = float_or_none(show.get('duration_ms'), 1000)
|
||||
|
||||
thumbnails = []
|
||||
for thumbnail_key, thumbnail_url in show.items():
|
||||
@@ -166,7 +200,7 @@ class NocoIE(InfoExtractor):
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnails': thumbnails,
|
||||
'upload_date': upload_date,
|
||||
'timestamp': timestamp,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'duration': duration,
|
||||
|
||||
@@ -4,7 +4,6 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
@@ -200,20 +199,10 @@ class NRKTVIE(InfoExtractor):
|
||||
url = "%s%s" % (baseurl, subtitlesurl)
|
||||
self._debug_print('%s: Subtitle url: %s' % (video_id, url))
|
||||
captions = self._download_xml(
|
||||
url, video_id, 'Downloading subtitles',
|
||||
transform_source=lambda s: s.replace(r'<br />', '\r\n'))
|
||||
url, video_id, 'Downloading subtitles')
|
||||
lang = captions.get('lang', 'no')
|
||||
ps = captions.findall('./{0}body/{0}div/{0}p'.format('{http://www.w3.org/ns/ttml}'))
|
||||
srt = ''
|
||||
for pos, p in enumerate(ps):
|
||||
begin = parse_duration(p.get('begin'))
|
||||
duration = parse_duration(p.get('dur'))
|
||||
starttime = self._subtitles_timecode(begin)
|
||||
endtime = self._subtitles_timecode(begin + duration)
|
||||
srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (compat_str(pos), starttime, endtime, p.text)
|
||||
return {lang: [
|
||||
{'ext': 'ttml', 'url': url},
|
||||
{'ext': 'srt', 'data': srt},
|
||||
]}
|
||||
|
||||
def _extract_f4m(self, manifest_url, video_id):
|
||||
|
||||
@@ -8,30 +8,8 @@ from ..utils import (
|
||||
)
|
||||
|
||||
|
||||
class NYTimesIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:www\.)?nytimes\.com/video/(?:[^/]+/)+?|graphics8\.nytimes\.com/bcvideo/\d+(?:\.\d+)?/iframe/embed\.html\?videoId=)(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.nytimes.com/video/opinion/100000002847155/verbatim-what-is-a-photocopier.html?playlistId=100000001150263',
|
||||
'md5': '18a525a510f942ada2720db5f31644c0',
|
||||
'info_dict': {
|
||||
'id': '100000002847155',
|
||||
'ext': 'mov',
|
||||
'title': 'Verbatim: What Is a Photocopier?',
|
||||
'description': 'md5:93603dada88ddbda9395632fdc5da260',
|
||||
'timestamp': 1398631707,
|
||||
'upload_date': '20140427',
|
||||
'uploader': 'Brett Weiner',
|
||||
'duration': 419,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.nytimes.com/video/travel/100000003550828/36-hours-in-dubai.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
class NYTimesBaseIE(InfoExtractor):
|
||||
def _extract_video_from_id(self, video_id):
|
||||
video_data = self._download_json(
|
||||
'http://www.nytimes.com/svc/video/api/v2/video/%s' % video_id,
|
||||
video_id, 'Downloading video JSON')
|
||||
@@ -81,3 +59,59 @@ class NYTimesIE(InfoExtractor):
|
||||
'formats': formats,
|
||||
'thumbnails': thumbnails,
|
||||
}
|
||||
|
||||
|
||||
class NYTimesIE(NYTimesBaseIE):
|
||||
_VALID_URL = r'https?://(?:(?:www\.)?nytimes\.com/video/(?:[^/]+/)+?|graphics8\.nytimes\.com/bcvideo/\d+(?:\.\d+)?/iframe/embed\.html\?videoId=)(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.nytimes.com/video/opinion/100000002847155/verbatim-what-is-a-photocopier.html?playlistId=100000001150263',
|
||||
'md5': '18a525a510f942ada2720db5f31644c0',
|
||||
'info_dict': {
|
||||
'id': '100000002847155',
|
||||
'ext': 'mov',
|
||||
'title': 'Verbatim: What Is a Photocopier?',
|
||||
'description': 'md5:93603dada88ddbda9395632fdc5da260',
|
||||
'timestamp': 1398631707,
|
||||
'upload_date': '20140427',
|
||||
'uploader': 'Brett Weiner',
|
||||
'duration': 419,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.nytimes.com/video/travel/100000003550828/36-hours-in-dubai.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
return self._extract_video_from_id(video_id)
|
||||
|
||||
|
||||
class NYTimesArticleIE(NYTimesBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?nytimes\.com/(.(?<!video))*?/(?:[^/]+/)*(?P<id>[^.]+)(?:\.html)?'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.nytimes.com/2015/04/14/business/owner-of-gravity-payments-a-credit-card-processor-is-setting-a-new-minimum-wage-70000-a-year.html?_r=0',
|
||||
'md5': 'e2076d58b4da18e6a001d53fd56db3c9',
|
||||
'info_dict': {
|
||||
'id': '100000003628438',
|
||||
'ext': 'mov',
|
||||
'title': 'New Minimum Wage: $70,000 a Year',
|
||||
'description': 'Dan Price, C.E.O. of Gravity Payments, surprised his 120-person staff by announcing that he planned over the next three years to raise the salary of every employee to $70,000 a year.',
|
||||
'timestamp': 1429033037,
|
||||
'upload_date': '20150414',
|
||||
'uploader': 'Matthew Williams',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.nytimes.com/news/minute/2014/03/17/times-minute-whats-next-in-crimea/?_php=true&_type=blogs&_php=true&_type=blogs&_r=1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_id = self._html_search_regex(r'data-videoid="(\d+)"', webpage, 'video id')
|
||||
|
||||
return self._extract_video_from_id(video_id)
|
||||
|
||||
@@ -6,6 +6,7 @@ from ..utils import (
|
||||
unified_strdate,
|
||||
int_or_none,
|
||||
qualities,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
|
||||
@@ -36,8 +37,8 @@ class OdnoklassnikiIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
player = self._parse_json(
|
||||
self._search_regex(
|
||||
r"OKVideo\.start\(({.+?})\s*,\s*'VideoAutoplay_player'", webpage, 'player'),
|
||||
unescapeHTML(self._search_regex(
|
||||
r'data-attributes="([^"]+)"', webpage, 'player')),
|
||||
video_id)
|
||||
|
||||
metadata = self._parse_json(player['flashvars']['metadata'], video_id)
|
||||
|
||||
@@ -1,11 +1,14 @@
|
||||
from __future__ import unicode_literals
|
||||
import re
|
||||
import json
|
||||
import base64
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
unescapeHTML,
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -32,6 +35,17 @@ class OoyalaIE(InfoExtractor):
|
||||
'description': '',
|
||||
},
|
||||
},
|
||||
{
|
||||
# Information available only through SAS api
|
||||
# From http://community.plm.automation.siemens.com/t5/News-NX-Manufacturing/Tool-Path-Divide/ba-p/4187
|
||||
'url': 'http://player.ooyala.com/player.js?embedCode=FiOG81ZTrvckcchQxmalf4aQj590qTEx',
|
||||
'md5': 'a84001441b35ea492bc03736e59e7935',
|
||||
'info_dict': {
|
||||
'id': 'FiOG81ZTrvckcchQxmalf4aQj590qTEx',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ooyala video',
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
@staticmethod
|
||||
@@ -44,11 +58,21 @@ class OoyalaIE(InfoExtractor):
|
||||
ie=cls.ie_key())
|
||||
|
||||
def _extract_result(self, info, more_info):
|
||||
embedCode = info['embedCode']
|
||||
video_url = info.get('ipad_url') or info['url']
|
||||
|
||||
if determine_ext(video_url) == 'm3u8':
|
||||
formats = self._extract_m3u8_formats(video_url, embedCode, ext='mp4')
|
||||
else:
|
||||
formats = [{
|
||||
'url': video_url,
|
||||
'ext': 'mp4',
|
||||
}]
|
||||
|
||||
return {
|
||||
'id': info['embedCode'],
|
||||
'ext': 'mp4',
|
||||
'id': embedCode,
|
||||
'title': unescapeHTML(info['title']),
|
||||
'url': info.get('ipad_url') or info['url'],
|
||||
'formats': formats,
|
||||
'description': unescapeHTML(more_info['description']),
|
||||
'thumbnail': more_info['promo'],
|
||||
}
|
||||
@@ -77,6 +101,36 @@ class OoyalaIE(InfoExtractor):
|
||||
mobile_player, 'info', fatal=False, default=None)
|
||||
if videos_info:
|
||||
break
|
||||
|
||||
if not videos_info:
|
||||
formats = []
|
||||
auth_data = self._download_json(
|
||||
'http://player.ooyala.com/sas/player_api/v1/authorization/embed_code/%s/%s?domain=www.example.org&supportedFormats=mp4,webm' % (embedCode, embedCode),
|
||||
embedCode)
|
||||
|
||||
cur_auth_data = auth_data['authorization_data'][embedCode]
|
||||
|
||||
for stream in cur_auth_data['streams']:
|
||||
formats.append({
|
||||
'url': base64.b64decode(stream['url']['data'].encode('ascii')).decode('utf-8'),
|
||||
'ext': stream.get('delivery_type'),
|
||||
'format': stream.get('video_codec'),
|
||||
'format_id': stream.get('profile'),
|
||||
'width': int_or_none(stream.get('width')),
|
||||
'height': int_or_none(stream.get('height')),
|
||||
'abr': int_or_none(stream.get('audio_bitrate')),
|
||||
'vbr': int_or_none(stream.get('video_bitrate')),
|
||||
})
|
||||
if formats:
|
||||
return {
|
||||
'id': embedCode,
|
||||
'formats': formats,
|
||||
'title': 'Ooyala video',
|
||||
}
|
||||
|
||||
if not cur_auth_data['authorized']:
|
||||
raise ExtractorError(cur_auth_data['message'], expected=True)
|
||||
|
||||
if not videos_info:
|
||||
raise ExtractorError('Unable to extract info')
|
||||
videos_info = videos_info.replace('\\"', '"')
|
||||
|
||||
@@ -5,6 +5,8 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
unified_strdate,
|
||||
US_RATINGS,
|
||||
)
|
||||
@@ -149,21 +151,45 @@ class PBSIE(InfoExtractor):
|
||||
for vid_id in video_id]
|
||||
return self.playlist_result(entries, display_id)
|
||||
|
||||
info_url = 'http://video.pbs.org/videoInfo/%s?format=json' % video_id
|
||||
info = self._download_json(info_url, display_id)
|
||||
info = self._download_json(
|
||||
'http://video.pbs.org/videoInfo/%s?format=json&type=partner' % video_id,
|
||||
display_id)
|
||||
|
||||
redirect_url = info['alternate_encoding']['url']
|
||||
redirect_info = self._download_json(
|
||||
redirect_url + '?format=json', display_id,
|
||||
'Downloading video url info')
|
||||
if redirect_info['status'] == 'error':
|
||||
if redirect_info['http_code'] == 403:
|
||||
message = (
|
||||
'The video is not available in your region due to '
|
||||
'right restrictions')
|
||||
formats = []
|
||||
for encoding_name in ('recommended_encoding', 'alternate_encoding'):
|
||||
redirect = info.get(encoding_name)
|
||||
if not redirect:
|
||||
continue
|
||||
redirect_url = redirect.get('url')
|
||||
if not redirect_url:
|
||||
continue
|
||||
|
||||
redirect_info = self._download_json(
|
||||
redirect_url + '?format=json', display_id,
|
||||
'Downloading %s video url info' % encoding_name)
|
||||
|
||||
if redirect_info['status'] == 'error':
|
||||
if redirect_info['http_code'] == 403:
|
||||
message = (
|
||||
'The video is not available in your region due to '
|
||||
'right restrictions')
|
||||
else:
|
||||
message = redirect_info['message']
|
||||
raise ExtractorError(message, expected=True)
|
||||
|
||||
format_url = redirect_info.get('url')
|
||||
if not format_url:
|
||||
continue
|
||||
|
||||
if determine_ext(format_url) == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, display_id, 'mp4', preference=1, m3u8_id='hls'))
|
||||
else:
|
||||
message = redirect_info['message']
|
||||
raise ExtractorError(message, expected=True)
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'format_id': redirect.get('eeid'),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
rating_str = info.get('rating')
|
||||
if rating_str is not None:
|
||||
@@ -174,11 +200,10 @@ class PBSIE(InfoExtractor):
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': info['title'],
|
||||
'url': redirect_info['url'],
|
||||
'ext': 'mp4',
|
||||
'description': info['program'].get('description'),
|
||||
'thumbnail': info.get('image_url'),
|
||||
'duration': info.get('duration'),
|
||||
'duration': int_or_none(info.get('duration')),
|
||||
'age_limit': age_limit,
|
||||
'upload_date': upload_date,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
@@ -9,11 +9,13 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
strip_jsonp,
|
||||
unescapeHTML,
|
||||
js_to_json,
|
||||
)
|
||||
from ..compat import compat_urllib_request
|
||||
|
||||
|
||||
class QQMusicIE(InfoExtractor):
|
||||
IE_NAME = 'qqmusic'
|
||||
_VALID_URL = r'http://y.qq.com/#type=song&mid=(?P<id>[0-9A-Za-z]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://y.qq.com/#type=song&mid=004295Et37taLD',
|
||||
@@ -96,6 +98,7 @@ class QQPlaylistBaseIE(InfoExtractor):
|
||||
|
||||
|
||||
class QQMusicSingerIE(QQPlaylistBaseIE):
|
||||
IE_NAME = 'qqmusic:singer'
|
||||
_VALID_URL = r'http://y.qq.com/#type=singer&mid=(?P<id>[0-9A-Za-z]+)'
|
||||
_TEST = {
|
||||
'url': 'http://y.qq.com/#type=singer&mid=001BLpXF2DyJe2',
|
||||
@@ -139,6 +142,7 @@ class QQMusicSingerIE(QQPlaylistBaseIE):
|
||||
|
||||
|
||||
class QQMusicAlbumIE(QQPlaylistBaseIE):
|
||||
IE_NAME = 'qqmusic:album'
|
||||
_VALID_URL = r'http://y.qq.com/#type=album&mid=(?P<id>[0-9A-Za-z]+)'
|
||||
|
||||
_TEST = {
|
||||
@@ -168,3 +172,67 @@ class QQMusicAlbumIE(QQPlaylistBaseIE):
|
||||
album_page, 'album details', default=None)
|
||||
|
||||
return self.playlist_result(entries, mid, album_name, album_detail)
|
||||
|
||||
|
||||
class QQMusicToplistIE(QQPlaylistBaseIE):
|
||||
IE_NAME = 'qqmusic:toplist'
|
||||
_VALID_URL = r'http://y\.qq\.com/#type=toplist&p=(?P<id>(top|global)_[0-9]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://y.qq.com/#type=toplist&p=global_12',
|
||||
'info_dict': {
|
||||
'id': 'global_12',
|
||||
'title': 'itunes榜',
|
||||
},
|
||||
'playlist_count': 10,
|
||||
}, {
|
||||
'url': 'http://y.qq.com/#type=toplist&p=top_6',
|
||||
'info_dict': {
|
||||
'id': 'top_6',
|
||||
'title': 'QQ音乐巅峰榜·欧美',
|
||||
},
|
||||
'playlist_count': 100,
|
||||
}, {
|
||||
'url': 'http://y.qq.com/#type=toplist&p=global_5',
|
||||
'info_dict': {
|
||||
'id': 'global_5',
|
||||
'title': '韩国mnet排行榜',
|
||||
},
|
||||
'playlist_count': 50,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def strip_qq_jsonp(code):
|
||||
return js_to_json(re.sub(r'^MusicJsonCallback\((.*?)\)/\*.+?\*/$', r'\1', code))
|
||||
|
||||
def _real_extract(self, url):
|
||||
list_id = self._match_id(url)
|
||||
|
||||
list_type, num_id = list_id.split("_")
|
||||
|
||||
list_page = self._download_webpage(
|
||||
"http://y.qq.com/y/static/toplist/index/%s.html" % list_id,
|
||||
list_id, 'Download toplist page')
|
||||
|
||||
entries = []
|
||||
if list_type == 'top':
|
||||
jsonp_url = "http://y.qq.com/y/static/toplist/json/top/%s/1.js" % num_id
|
||||
else:
|
||||
jsonp_url = "http://y.qq.com/y/static/toplist/json/global/%s/1_1.js" % num_id
|
||||
|
||||
toplist_json = self._download_json(
|
||||
jsonp_url, list_id, note='Retrieve toplist json',
|
||||
errnote='Unable to get toplist json', transform_source=self.strip_qq_jsonp)
|
||||
|
||||
for song in toplist_json['l']:
|
||||
s = song['s']
|
||||
song_mid = s.split("|")[20]
|
||||
entries.append(self.url_result(
|
||||
'http://y.qq.com/#type=song&mid=' + song_mid, 'QQMusic',
|
||||
song_mid))
|
||||
|
||||
list_name = self._html_search_regex(
|
||||
r'<h2 id="top_name">([^\']+)</h2>', list_page, 'top list name',
|
||||
default=None)
|
||||
|
||||
return self.playlist_result(entries, list_id, list_name)
|
||||
|
||||
@@ -84,11 +84,20 @@ class RUTVIE(InfoExtractor):
|
||||
'title': 'Сочи-2014. Биатлон. Индивидуальная гонка. Мужчины ',
|
||||
'description': 'md5:9e0ed5c9d2fa1efbfdfed90c9a6d179c',
|
||||
},
|
||||
'skip': 'Translation has finished',
|
||||
},
|
||||
{
|
||||
'url': 'http://player.rutv.ru/iframe/live/id/21/showZoomBtn/false/isPlay/true/',
|
||||
'info_dict': {
|
||||
'id': '21',
|
||||
'ext': 'mp4',
|
||||
'title': 're:^Россия 24. Прямой эфир [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'is_live': True,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Translation has finished',
|
||||
},
|
||||
]
|
||||
|
||||
@@ -119,8 +128,10 @@ class RUTVIE(InfoExtractor):
|
||||
elif video_path.startswith('index/iframe/cast_id'):
|
||||
video_type = 'live'
|
||||
|
||||
is_live = video_type == 'live'
|
||||
|
||||
json_data = self._download_json(
|
||||
'http://player.rutv.ru/iframe/%splay/id/%s' % ('live-' if video_type == 'live' else '', video_id),
|
||||
'http://player.rutv.ru/iframe/%splay/id/%s' % ('live-' if is_live else '', video_id),
|
||||
video_id, 'Downloading JSON')
|
||||
|
||||
if json_data['errors']:
|
||||
@@ -147,6 +158,7 @@ class RUTVIE(InfoExtractor):
|
||||
|
||||
for transport, links in media['sources'].items():
|
||||
for quality, url in links.items():
|
||||
preference = -1 if priority_transport == transport else -2
|
||||
if transport == 'rtmp':
|
||||
mobj = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>.+))/(?P<playpath>.+)$', url)
|
||||
if not mobj:
|
||||
@@ -160,9 +172,11 @@ class RUTVIE(InfoExtractor):
|
||||
'rtmp_live': True,
|
||||
'ext': 'flv',
|
||||
'vbr': int(quality),
|
||||
'preference': preference,
|
||||
}
|
||||
elif transport == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(url, video_id, 'mp4'))
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
url, video_id, 'mp4', preference=preference, m3u8_id='hls'))
|
||||
continue
|
||||
else:
|
||||
fmt = {
|
||||
@@ -172,21 +186,18 @@ class RUTVIE(InfoExtractor):
|
||||
'width': width,
|
||||
'height': height,
|
||||
'format_id': '%s-%s' % (transport, quality),
|
||||
'preference': -1 if priority_transport == transport else -2,
|
||||
})
|
||||
formats.append(fmt)
|
||||
|
||||
if not formats:
|
||||
raise ExtractorError('No media links available for %s' % video_id)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'title': self._live_title(title) if is_live else title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'view_count': view_count,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
'is_live': is_live,
|
||||
}
|
||||
|
||||
@@ -11,7 +11,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class ScreenwaveMediaIE(InfoExtractor):
|
||||
_VALID_URL = r'http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=(?P<id>.+)'
|
||||
_VALID_URL = r'http://player\d?\.screenwavemedia\.com/(?:play/)?[a-zA-Z]+\.php\?[^"]*\bid=(?P<id>.+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://player.screenwavemedia.com/play/play.php?playerdiv=videoarea&companiondiv=squareAd&id=Cinemassacre-19911',
|
||||
@@ -20,7 +20,10 @@ class ScreenwaveMediaIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
playerdata = self._download_webpage(url, video_id, 'Downloading player webpage')
|
||||
|
||||
playerdata = self._download_webpage(
|
||||
'http://player.screenwavemedia.com/play/player.php?id=%s' % video_id,
|
||||
video_id, 'Downloading player webpage')
|
||||
|
||||
vidtitle = self._search_regex(
|
||||
r'\'vidtitle\'\s*:\s*"([^"]+)"', playerdata, 'vidtitle').replace('\\/', '/')
|
||||
@@ -99,7 +102,7 @@ class TeamFourIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
playerdata_url = self._search_regex(
|
||||
r'src="(http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=.+?)"',
|
||||
r'src="(http://player\d?\.screenwavemedia\.com/(?:play/)?[a-zA-Z]+\.php\?[^"]*\bid=.+?)"',
|
||||
webpage, 'player data URL')
|
||||
|
||||
video_title = self._html_search_regex(
|
||||
|
||||
@@ -8,7 +8,7 @@ from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_request
|
||||
)
|
||||
from ..utils import sanitize_url_path_consecutive_slashes
|
||||
from ..utils import ExtractorError
|
||||
|
||||
|
||||
class SohuIE(InfoExtractor):
|
||||
@@ -28,7 +28,7 @@ class SohuIE(InfoExtractor):
|
||||
}
|
||||
}, {
|
||||
'url': 'http://tv.sohu.com/20150305/n409385080.shtml',
|
||||
'md5': '699060e75cf58858dd47fb9c03c42cfb',
|
||||
'md5': 'ac9a5d322b4bf9ae184d53e4711e4f1a',
|
||||
'info_dict': {
|
||||
'id': '409385080',
|
||||
'ext': 'mp4',
|
||||
@@ -36,7 +36,7 @@ class SohuIE(InfoExtractor):
|
||||
}
|
||||
}, {
|
||||
'url': 'http://my.tv.sohu.com/us/232799889/78693464.shtml',
|
||||
'md5': '9bf34be48f2f4dadcb226c74127e203c',
|
||||
'md5': '49308ff6dafde5ece51137d04aec311e',
|
||||
'info_dict': {
|
||||
'id': '78693464',
|
||||
'ext': 'mp4',
|
||||
@@ -50,7 +50,7 @@ class SohuIE(InfoExtractor):
|
||||
'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
|
||||
},
|
||||
'playlist': [{
|
||||
'md5': 'bdbfb8f39924725e6589c146bc1883ad',
|
||||
'md5': '492923eac023ba2f13ff69617c32754a',
|
||||
'info_dict': {
|
||||
'id': '78910339_part1',
|
||||
'ext': 'mp4',
|
||||
@@ -58,7 +58,7 @@ class SohuIE(InfoExtractor):
|
||||
'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
|
||||
}
|
||||
}, {
|
||||
'md5': '3e1f46aaeb95354fd10e7fca9fc1804e',
|
||||
'md5': 'de604848c0e8e9c4a4dde7e1347c0637',
|
||||
'info_dict': {
|
||||
'id': '78910339_part2',
|
||||
'ext': 'mp4',
|
||||
@@ -66,7 +66,7 @@ class SohuIE(InfoExtractor):
|
||||
'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
|
||||
}
|
||||
}, {
|
||||
'md5': '8407e634175fdac706766481b9443450',
|
||||
'md5': '93584716ee0657c0b205b8aa3d27aa13',
|
||||
'info_dict': {
|
||||
'id': '78910339_part3',
|
||||
'ext': 'mp4',
|
||||
@@ -117,6 +117,15 @@ class SohuIE(InfoExtractor):
|
||||
r'var vid ?= ?["\'](\d+)["\']',
|
||||
webpage, 'video path')
|
||||
vid_data = _fetch_data(vid, mytv)
|
||||
if vid_data['play'] != 1:
|
||||
if vid_data.get('status') == 12:
|
||||
raise ExtractorError(
|
||||
'Sohu said: There\'s something wrong in the video.',
|
||||
expected=True)
|
||||
else:
|
||||
raise ExtractorError(
|
||||
'Sohu said: The video is only licensed to users in Mainland China.',
|
||||
expected=True)
|
||||
|
||||
formats_json = {}
|
||||
for format_id in ('nor', 'high', 'super', 'ori', 'h2644k', 'h2654k'):
|
||||
@@ -132,24 +141,21 @@ class SohuIE(InfoExtractor):
|
||||
for i in range(part_count):
|
||||
formats = []
|
||||
for format_id, format_data in formats_json.items():
|
||||
allot = format_data['allot']
|
||||
prot = format_data['prot']
|
||||
|
||||
data = format_data['data']
|
||||
clips_url = data['clipsURL']
|
||||
su = data['su']
|
||||
|
||||
part_str = self._download_webpage(
|
||||
'http://%s/?prot=%s&file=%s&new=%s' %
|
||||
(allot, prot, clips_url[i], su[i]),
|
||||
video_id,
|
||||
'Downloading %s video URL part %d of %d'
|
||||
% (format_id, i + 1, part_count))
|
||||
|
||||
part_info = part_str.split('|')
|
||||
|
||||
video_url = sanitize_url_path_consecutive_slashes(
|
||||
'%s%s?key=%s' % (part_info[0], su[i], part_info[3]))
|
||||
# URLs starts with http://newflv.sohu.ccgslb.net/ is not usable
|
||||
# so retry until got a working URL
|
||||
video_url = 'newflv.sohu.ccgslb.net'
|
||||
retries = 0
|
||||
while 'newflv.sohu.ccgslb.net' in video_url and retries < 5:
|
||||
download_note = 'Download information from CDN gateway for format ' + format_id
|
||||
if retries > 0:
|
||||
download_note += ' (retry #%d)' % retries
|
||||
retries += 1
|
||||
cdn_info = self._download_json(
|
||||
'http://data.vod.itc.cn/cdnList?new=' + data['su'][i],
|
||||
video_id, download_note)
|
||||
video_url = cdn_info['url']
|
||||
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
|
||||
@@ -336,7 +336,7 @@ class SoundcloudUserIE(SoundcloudIE):
|
||||
if len(new_entries) == 0:
|
||||
self.to_screen('%s: End page received' % uploader)
|
||||
break
|
||||
entries.extend(self._extract_info_dict(e, quiet=True) for e in new_entries)
|
||||
entries.extend(self.url_result(e['permalink_url'], 'Soundcloud') for e in new_entries)
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
|
||||
@@ -32,7 +32,7 @@ class SouthParkEsIE(SouthParkIE):
|
||||
}]
|
||||
|
||||
|
||||
class SouthparkDeIE(SouthParkIE):
|
||||
class SouthParkDeIE(SouthParkIE):
|
||||
IE_NAME = 'southpark.de'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.de/(?:clips|alle-episoden)/(?P<id>.+?)(\?|#|$))'
|
||||
_FEED_URL = 'http://www.southpark.de/feeds/video-player/mrss/'
|
||||
@@ -46,3 +46,25 @@ class SouthparkDeIE(SouthParkIE):
|
||||
'description': 'Cartman explains the benefits of "Shitter" to Stan, Kyle and Craig.',
|
||||
},
|
||||
}]
|
||||
|
||||
|
||||
class SouthParkNlIE(SouthParkIE):
|
||||
IE_NAME = 'southpark.nl'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.nl/(?:clips|full-episodes)/(?P<id>.+?)(\?|#|$))'
|
||||
_FEED_URL = 'http://www.southpark.nl/feeds/video-player/mrss/'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.southpark.nl/full-episodes/s18e06-freemium-isnt-free',
|
||||
'playlist_count': 4,
|
||||
}]
|
||||
|
||||
|
||||
class SouthParkDkIE(SouthParkIE):
|
||||
IE_NAME = 'southparkstudios.dk'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<url>southparkstudios\.dk/(?:clips|full-episodes)/(?P<id>.+?)(\?|#|$))'
|
||||
_FEED_URL = 'http://www.southparkstudios.dk/feeds/video-player/mrss/'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.southparkstudios.dk/full-episodes/s18e07-grounded-vindaloop',
|
||||
'playlist_count': 4,
|
||||
}]
|
||||
|
||||
@@ -9,41 +9,9 @@ from ..utils import (
|
||||
)
|
||||
|
||||
|
||||
class SVTPlayIE(InfoExtractor):
|
||||
IE_DESC = 'SVT Play and Öppet arkiv'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<host>svtplay|oppetarkiv)\.se/video/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.svtplay.se/video/2609989/sm-veckan/sm-veckan-rally-final-sasong-1-sm-veckan-rally-final',
|
||||
'md5': 'ade3def0643fa1c40587a422f98edfd9',
|
||||
'info_dict': {
|
||||
'id': '2609989',
|
||||
'ext': 'flv',
|
||||
'title': 'SM veckan vinter, Örebro - Rally, final',
|
||||
'duration': 4500,
|
||||
'thumbnail': 're:^https?://.*[\.-]jpg$',
|
||||
'age_limit': 0,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.oppetarkiv.se/video/1058509/rederiet-sasong-1-avsnitt-1-av-318',
|
||||
'md5': 'c3101a17ce9634f4c1f9800f0746c187',
|
||||
'info_dict': {
|
||||
'id': '1058509',
|
||||
'ext': 'flv',
|
||||
'title': 'Farlig kryssning',
|
||||
'duration': 2566,
|
||||
'thumbnail': 're:^https?://.*[\.-]jpg$',
|
||||
'age_limit': 0,
|
||||
},
|
||||
'skip': 'Only works from Sweden',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
host = mobj.group('host')
|
||||
|
||||
info = self._download_json(
|
||||
'http://www.%s.se/video/%s?output=json' % (host, video_id), video_id)
|
||||
class SVTBaseIE(InfoExtractor):
|
||||
def _extract_video(self, url, video_id):
|
||||
info = self._download_json(url, video_id)
|
||||
|
||||
title = info['context']['title']
|
||||
thumbnail = info['context'].get('thumbnailImage')
|
||||
@@ -80,3 +48,70 @@ class SVTPlayIE(InfoExtractor):
|
||||
'duration': duration,
|
||||
'age_limit': age_limit,
|
||||
}
|
||||
|
||||
|
||||
class SVTIE(SVTBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?svt\.se/wd\?(?:.*?&)?widgetId=(?P<widget_id>\d+)&.*?\barticleId=(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.svt.se/wd?widgetId=23991§ionId=541&articleId=2900353&type=embed&contextSectionId=123&autostart=false',
|
||||
'md5': '9648197555fc1b49e3dc22db4af51d46',
|
||||
'info_dict': {
|
||||
'id': '2900353',
|
||||
'ext': 'flv',
|
||||
'title': 'Här trycker Jagr till Giroux (under SVT-intervjun)',
|
||||
'duration': 27,
|
||||
'age_limit': 0,
|
||||
},
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _extract_url(webpage):
|
||||
mobj = re.search(
|
||||
r'(?:<iframe src|href)="(?P<url>%s[^"]*)"' % SVTIE._VALID_URL, webpage)
|
||||
if mobj:
|
||||
return mobj.group('url')
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
widget_id = mobj.group('widget_id')
|
||||
article_id = mobj.group('id')
|
||||
return self._extract_video(
|
||||
'http://www.svt.se/wd?widgetId=%s&articleId=%s&format=json&type=embed&output=json' % (widget_id, article_id),
|
||||
article_id)
|
||||
|
||||
|
||||
class SVTPlayIE(SVTBaseIE):
|
||||
IE_DESC = 'SVT Play and Öppet arkiv'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<host>svtplay|oppetarkiv)\.se/video/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.svtplay.se/video/2609989/sm-veckan/sm-veckan-rally-final-sasong-1-sm-veckan-rally-final',
|
||||
'md5': 'ade3def0643fa1c40587a422f98edfd9',
|
||||
'info_dict': {
|
||||
'id': '2609989',
|
||||
'ext': 'flv',
|
||||
'title': 'SM veckan vinter, Örebro - Rally, final',
|
||||
'duration': 4500,
|
||||
'thumbnail': 're:^https?://.*[\.-]jpg$',
|
||||
'age_limit': 0,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.oppetarkiv.se/video/1058509/rederiet-sasong-1-avsnitt-1-av-318',
|
||||
'md5': 'c3101a17ce9634f4c1f9800f0746c187',
|
||||
'info_dict': {
|
||||
'id': '1058509',
|
||||
'ext': 'flv',
|
||||
'title': 'Farlig kryssning',
|
||||
'duration': 2566,
|
||||
'thumbnail': 're:^https?://.*[\.-]jpg$',
|
||||
'age_limit': 0,
|
||||
},
|
||||
'skip': 'Only works from Sweden',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
host = mobj.group('host')
|
||||
return self._extract_video(
|
||||
'http://www.%s.se/video/%s?output=json' % (host, video_id),
|
||||
video_id)
|
||||
@@ -2,13 +2,16 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
import binascii
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
qualities,
|
||||
)
|
||||
from ..compat import compat_ord
|
||||
|
||||
|
||||
class TeamcocoIE(InfoExtractor):
|
||||
@@ -59,33 +62,49 @@ class TeamcocoIE(InfoExtractor):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
||||
display_id = mobj.group('display_id')
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
webpage, urlh = self._download_webpage_handle(url, display_id)
|
||||
if 'src=expired' in urlh.geturl():
|
||||
raise ExtractorError('This video is expired.', expected=True)
|
||||
|
||||
video_id = mobj.group('video_id')
|
||||
if not video_id:
|
||||
video_id = self._html_search_regex(
|
||||
self._VIDEO_ID_REGEXES, webpage, 'video id')
|
||||
|
||||
preload = None
|
||||
preloads = re.findall(r'"preload":\s*"([^"]+)"', webpage)
|
||||
if preloads:
|
||||
preload = max([(len(p), p) for p in preloads])[1]
|
||||
data = None
|
||||
|
||||
if not preload:
|
||||
preload = ''.join(re.findall(r'this\.push\("([^"]+)"\);', webpage))
|
||||
preload_codes = self._html_search_regex(
|
||||
r'(function.+)setTimeout\(function\(\)\{playlist',
|
||||
webpage, 'preload codes')
|
||||
base64_fragments = re.findall(r'"([a-zA-z0-9+/=]+)"', preload_codes)
|
||||
base64_fragments.remove('init')
|
||||
|
||||
if not preload:
|
||||
preload = self._html_search_regex([
|
||||
r'player,\[?"([^"]+)"\]?', r'player.init\(\[?"([^"]+)"\]?\)'
|
||||
], webpage.replace('","', ''), 'preload data', default=None)
|
||||
def _check_sequence(cur_fragments):
|
||||
if not cur_fragments:
|
||||
return
|
||||
for i in range(len(cur_fragments)):
|
||||
cur_sequence = (''.join(cur_fragments[i:] + cur_fragments[:i])).encode('ascii')
|
||||
try:
|
||||
raw_data = base64.b64decode(cur_sequence)
|
||||
if compat_ord(raw_data[0]) == compat_ord('{'):
|
||||
return json.loads(raw_data.decode('utf-8'))
|
||||
except (TypeError, binascii.Error, UnicodeDecodeError, ValueError):
|
||||
continue
|
||||
|
||||
if not preload:
|
||||
def _check_data():
|
||||
for i in range(len(base64_fragments) + 1):
|
||||
for j in range(i, len(base64_fragments) + 1):
|
||||
data = _check_sequence(base64_fragments[:i] + base64_fragments[j:])
|
||||
if data:
|
||||
return data
|
||||
|
||||
self.to_screen('Try to compute possible data sequence. This may take some time.')
|
||||
data = _check_data()
|
||||
|
||||
if not data:
|
||||
raise ExtractorError(
|
||||
'Preload information could not be extracted', expected=True)
|
||||
|
||||
data = self._parse_json(
|
||||
base64.b64decode(preload.encode('ascii')).decode('utf-8'), video_id)
|
||||
|
||||
formats = []
|
||||
get_quality = qualities(['500k', '480p', '1000k', '720p', '1080p'])
|
||||
for filed in data['files']:
|
||||
|
||||
@@ -15,19 +15,37 @@ class TestTubeIE(InfoExtractor):
|
||||
'id': '60163',
|
||||
'display_id': '5-weird-ways-plants-can-eat-animals',
|
||||
'duration': 275,
|
||||
'ext': 'mp4',
|
||||
'ext': 'webm',
|
||||
'title': '5 Weird Ways Plants Can Eat Animals',
|
||||
'description': 'Why have some plants evolved to eat meat?',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'uploader': 'DNews',
|
||||
'uploader_id': 'dnews',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://testtube.com/iflscience/insane-jet-ski-flipping',
|
||||
'info_dict': {
|
||||
'id': 'fAGfJ4YjVus',
|
||||
'ext': 'mp4',
|
||||
'title': 'Flipping Jet-Ski Skills | Outrageous Acts of Science',
|
||||
'uploader': 'Science Channel',
|
||||
'uploader_id': 'ScienceChannel',
|
||||
'upload_date': '20150203',
|
||||
'description': 'md5:e61374030015bae1d2e22f096d4769d6',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
youtube_url = self._html_search_regex(
|
||||
r'<iframe[^>]+src="((?:https?:)?//www.youtube.com/embed/[^"]+)"',
|
||||
webpage, 'youtube iframe', default=None)
|
||||
if youtube_url:
|
||||
return self.url_result(youtube_url, 'Youtube', video_id=display_id)
|
||||
|
||||
video_id = self._search_regex(
|
||||
r"player\.loadRevision3Item\('video_id',\s*([0-9]+)\);",
|
||||
webpage, 'video ID')
|
||||
|
||||
@@ -129,7 +129,9 @@ class ThePlatformIE(InfoExtractor):
|
||||
head = meta.find(_x('smil:head'))
|
||||
body = meta.find(_x('smil:body'))
|
||||
|
||||
f4m_node = body.find(_x('smil:seq//smil:video')) or body.find(_x('smil:seq/smil:video'))
|
||||
f4m_node = body.find(_x('smil:seq//smil:video'))
|
||||
if f4m_node is None:
|
||||
f4m_node = body.find(_x('smil:seq/smil:video'))
|
||||
if f4m_node is not None and '.f4m' in f4m_node.attrib['src']:
|
||||
f4m_url = f4m_node.attrib['src']
|
||||
if 'manifest.f4m?' not in f4m_url:
|
||||
@@ -142,7 +144,9 @@ class ThePlatformIE(InfoExtractor):
|
||||
formats = []
|
||||
switch = body.find(_x('smil:switch'))
|
||||
if switch is None:
|
||||
switch = body.find(_x('smil:par//smil:switch')) or body.find(_x('smil:par/smil:switch'))
|
||||
switch = body.find(_x('smil:par//smil:switch'))
|
||||
if switch is None:
|
||||
switch = body.find(_x('smil:par/smil:switch'))
|
||||
if switch is None:
|
||||
switch = body.find(_x('smil:par'))
|
||||
if switch is not None:
|
||||
@@ -163,7 +167,9 @@ class ThePlatformIE(InfoExtractor):
|
||||
'vbr': vbr,
|
||||
})
|
||||
else:
|
||||
switch = body.find(_x('smil:seq//smil:switch')) or body.find(_x('smil:seq/smil:switch'))
|
||||
switch = body.find(_x('smil:seq//smil:switch'))
|
||||
if switch is None:
|
||||
switch = body.find(_x('smil:seq/smil:switch'))
|
||||
for f in switch.findall(_x('smil:video')):
|
||||
attr = f.attrib
|
||||
vbr = int_or_none(attr.get('system-bitrate'), 1000)
|
||||
|
||||
@@ -30,3 +30,31 @@ class TMZIE(InfoExtractor):
|
||||
'description': self._og_search_description(webpage),
|
||||
'thumbnail': self._html_search_meta('ThumbURL', webpage),
|
||||
}
|
||||
|
||||
|
||||
class TMZArticleIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?tmz\.com/\d{4}/\d{2}/\d{2}/(?P<id>[^/]+)/?'
|
||||
_TEST = {
|
||||
'url': 'http://www.tmz.com/2015/04/19/bobby-brown-bobbi-kristina-awake-video-concert',
|
||||
'md5': 'e482a414a38db73087450e3a6ce69d00',
|
||||
'info_dict': {
|
||||
'id': '0_6snoelag',
|
||||
'ext': 'mp4',
|
||||
'title': 'Bobby Brown Tells Crowd ... Bobbi Kristina is Awake',
|
||||
'description': 'Bobby Brown stunned his audience during a concert Saturday night, when he told the crowd, "Bobbi is awake. She\'s watching me."',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
embedded_video_info_str = self._html_search_regex(
|
||||
r'tmzVideoEmbedV2\("([^)]+)"\);', webpage, 'embedded video info')
|
||||
|
||||
embedded_video_info = self._parse_json(
|
||||
embedded_video_info_str, video_id,
|
||||
transform_source=lambda s: s.replace('\\', ''))
|
||||
|
||||
return self.url_result(
|
||||
'http://www.tmz.com/videos/%s/' % embedded_video_info['id'])
|
||||
|
||||
@@ -17,7 +17,9 @@ from ..utils import (
|
||||
class VeeHDIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://veehd\.com/video/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
# Seems VeeHD videos have multiple copies on several servers, all of
|
||||
# whom have different MD5 checksums, so omit md5 field in all tests
|
||||
_TESTS = [{
|
||||
'url': 'http://veehd.com/video/4639434_Solar-Sinter',
|
||||
'info_dict': {
|
||||
'id': '4639434',
|
||||
@@ -26,7 +28,26 @@ class VeeHDIE(InfoExtractor):
|
||||
'uploader_id': 'VideoEyes',
|
||||
'description': 'md5:46a840e8692ddbaffb5f81d9885cb457',
|
||||
},
|
||||
}
|
||||
'skip': 'Video deleted',
|
||||
}, {
|
||||
'url': 'http://veehd.com/video/4905758_Elysian-Fields-Channeling',
|
||||
'info_dict': {
|
||||
'id': '4905758',
|
||||
'ext': 'mp4',
|
||||
'title': 'Elysian Fields - Channeling',
|
||||
'description': 'md5:360e4e95fdab58aefbea0f2a19e5604b',
|
||||
'uploader_id': 'spotted',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://veehd.com/video/2046729_2012-2009-DivX-Trailer',
|
||||
'info_dict': {
|
||||
'id': '2046729',
|
||||
'ext': 'avi',
|
||||
'title': '2012 (2009) DivX Trailer',
|
||||
'description': 'md5:75435ee95255e6a9838ac6f6f3a2396b',
|
||||
'uploader_id': 'Movie_Trailers',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
@@ -48,13 +69,21 @@ class VeeHDIE(InfoExtractor):
|
||||
player_page = self._download_webpage(
|
||||
player_url, video_id, 'Downloading player page')
|
||||
|
||||
video_url = None
|
||||
|
||||
config_json = self._search_regex(
|
||||
r'value=\'config=({.+?})\'', player_page, 'config json', default=None)
|
||||
|
||||
if config_json:
|
||||
config = json.loads(config_json)
|
||||
video_url = compat_urlparse.unquote(config['clip']['url'])
|
||||
else:
|
||||
|
||||
if not video_url:
|
||||
video_url = self._html_search_regex(
|
||||
r'<embed[^>]+type="video/divx"[^>]+src="([^"]+)"',
|
||||
player_page, 'video url', default=None)
|
||||
|
||||
if not video_url:
|
||||
iframe_src = self._search_regex(
|
||||
r'<iframe[^>]+src="/?([^"]+)"', player_page, 'iframe url')
|
||||
iframe_url = 'http://veehd.com/%s' % iframe_src
|
||||
@@ -82,7 +111,6 @@ class VeeHDIE(InfoExtractor):
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'url': video_url,
|
||||
'ext': 'mp4',
|
||||
'uploader_id': uploader_id,
|
||||
'thumbnail': thumbnail,
|
||||
'description': description,
|
||||
|
||||
@@ -38,9 +38,13 @@ class VesselIE(InfoExtractor):
|
||||
return req
|
||||
|
||||
@staticmethod
|
||||
def find_assets(data, asset_type):
|
||||
def find_assets(data, asset_type, asset_id=None):
|
||||
for asset in data.get('assets', []):
|
||||
if asset.get('type') == asset_type:
|
||||
if not asset.get('type') == asset_type:
|
||||
continue
|
||||
elif asset_id is not None and not asset.get('id') == asset_id:
|
||||
continue
|
||||
else:
|
||||
yield asset
|
||||
|
||||
def _check_access_rights(self, data):
|
||||
@@ -82,11 +86,13 @@ class VesselIE(InfoExtractor):
|
||||
req = VesselIE.make_json_request(
|
||||
self._API_URL_TEMPLATE % asset_id, {'client': 'web'})
|
||||
data = self._download_json(req, video_id)
|
||||
video_asset_id = data.get('main_video_asset')
|
||||
|
||||
self._check_access_rights(data)
|
||||
|
||||
try:
|
||||
video_asset = next(VesselIE.find_assets(data, 'video'))
|
||||
video_asset = next(
|
||||
VesselIE.find_assets(data, 'video', asset_id=video_asset_id))
|
||||
except StopIteration:
|
||||
raise ExtractorError('No video assets found')
|
||||
|
||||
|
||||
@@ -8,7 +8,19 @@ from ..utils import float_or_none
|
||||
|
||||
|
||||
class VGTVIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?vgtv\.no/#!/(?:.*)/(?P<id>[0-9]+)'
|
||||
IE_DESC = 'VGTV and BTTV'
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:
|
||||
vgtv:|
|
||||
http://(?:www\.)?
|
||||
)
|
||||
(?P<host>vgtv|bt)
|
||||
(?:
|
||||
:|
|
||||
\.no/(?:tv/)?\#!/(?:video|live)/
|
||||
)
|
||||
(?P<id>[0-9]+)
|
||||
'''
|
||||
_TESTS = [
|
||||
{
|
||||
# streamType: vod
|
||||
@@ -64,12 +76,25 @@ class VGTVIE(InfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.bt.no/tv/#!/video/100250/norling-dette-er-forskjellen-paa-1-divisjon-og-eliteserien',
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
host = mobj.group('host')
|
||||
|
||||
HOST_WEBSITES = {
|
||||
'vgtv': 'vgtv',
|
||||
'bt': 'bttv',
|
||||
}
|
||||
|
||||
data = self._download_json(
|
||||
'http://svp.vg.no/svp/api/v1/vgtv/assets/%s?appName=vgtv-website' % video_id,
|
||||
'http://svp.vg.no/svp/api/v1/%s/assets/%s?appName=%s-website'
|
||||
% (host, video_id, HOST_WEBSITES[host]),
|
||||
video_id, 'Downloading media JSON')
|
||||
|
||||
streams = data['streamUrls']
|
||||
@@ -78,11 +103,14 @@ class VGTVIE(InfoExtractor):
|
||||
|
||||
hls_url = streams.get('hls')
|
||||
if hls_url:
|
||||
formats.extend(self._extract_m3u8_formats(hls_url, video_id, 'mp4'))
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
hls_url, video_id, 'mp4', m3u8_id='hls'))
|
||||
|
||||
hds_url = streams.get('hds')
|
||||
if hds_url:
|
||||
formats.extend(self._extract_f4m_formats(hds_url + '?hdcore=3.2.0&plugin=aasp-3.2.0.77.18', video_id))
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
hds_url + '?hdcore=3.2.0&plugin=aasp-3.2.0.77.18',
|
||||
video_id, f4m_id='hds'))
|
||||
|
||||
mp4_url = streams.get('mp4')
|
||||
if mp4_url:
|
||||
@@ -115,3 +143,51 @@ class VGTVIE(InfoExtractor):
|
||||
'view_count': data['displays'],
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class BTArticleIE(InfoExtractor):
|
||||
IE_NAME = 'bt:article'
|
||||
IE_DESC = 'Bergens Tidende Articles'
|
||||
_VALID_URL = 'http://(?:www\.)?bt\.no/(?:[^/]+/)+(?P<id>[^/]+)-\d+\.html'
|
||||
_TEST = {
|
||||
'url': 'http://www.bt.no/nyheter/lokalt/Kjemper-for-internatet-1788214.html',
|
||||
'md5': 'd055e8ee918ef2844745fcfd1a4175fb',
|
||||
'info_dict': {
|
||||
'id': '23199',
|
||||
'ext': 'mp4',
|
||||
'title': 'Alrekstad internat',
|
||||
'description': 'md5:dc81a9056c874fedb62fc48a300dac58',
|
||||
'thumbnail': 're:^https?://.*\.jpg',
|
||||
'duration': 191,
|
||||
'timestamp': 1289991323,
|
||||
'upload_date': '20101117',
|
||||
'view_count': int,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
webpage = self._download_webpage(url, self._match_id(url))
|
||||
video_id = self._search_regex(
|
||||
r'SVP\.Player\.load\(\s*(\d+)', webpage, 'video id')
|
||||
return self.url_result('vgtv:bt:%s' % video_id, 'VGTV')
|
||||
|
||||
|
||||
class BTVestlendingenIE(InfoExtractor):
|
||||
IE_NAME = 'bt:vestlendingen'
|
||||
IE_DESC = 'Bergens Tidende - Vestlendingen'
|
||||
_VALID_URL = 'http://(?:www\.)?bt\.no/spesial/vestlendingen/#!/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.bt.no/spesial/vestlendingen/#!/86588',
|
||||
'md5': 'd7d17e3337dc80de6d3a540aefbe441b',
|
||||
'info_dict': {
|
||||
'id': '86588',
|
||||
'ext': 'mov',
|
||||
'title': 'Otto Wollertsen',
|
||||
'description': 'Vestlendingen Otto Fredrik Wollertsen',
|
||||
'timestamp': 1430473209,
|
||||
'upload_date': '20150501',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
return self.url_result('xstream:btno:%s' % self._match_id(url), 'Xstream')
|
||||
|
||||
@@ -31,7 +31,6 @@ class ViceIE(InfoExtractor):
|
||||
r'embedCode=([^&\'"]+)', webpage,
|
||||
'ooyala embed code')
|
||||
ooyala_url = OoyalaIE._url_for_embed_code(embed_code)
|
||||
print(ooyala_url)
|
||||
except ExtractorError:
|
||||
raise ExtractorError('The page doesn\'t contain a video', expected=True)
|
||||
return self.url_result(ooyala_url, ie='Ooyala')
|
||||
|
||||
@@ -2,12 +2,17 @@ from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from ..compat import compat_urlparse
|
||||
from ..compat import (
|
||||
compat_urlparse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
unescapeHTML,
|
||||
unified_strdate,
|
||||
US_RATINGS,
|
||||
determine_ext,
|
||||
mimetype2ext,
|
||||
)
|
||||
from .common import InfoExtractor
|
||||
|
||||
@@ -15,8 +20,11 @@ from .common import InfoExtractor
|
||||
class VikiIE(InfoExtractor):
|
||||
IE_NAME = 'viki'
|
||||
|
||||
# iPad2
|
||||
_USER_AGENT = 'Mozilla/5.0(iPad; U; CPU OS 4_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8F191 Safari/6533.18.5'
|
||||
|
||||
_VALID_URL = r'^https?://(?:www\.)?viki\.com/videos/(?P<id>[0-9]+v)'
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.viki.com/videos/1023585v-heirs-episode-14',
|
||||
'info_dict': {
|
||||
'id': '1023585v',
|
||||
@@ -28,7 +36,30 @@ class VikiIE(InfoExtractor):
|
||||
'age_limit': 13,
|
||||
},
|
||||
'skip': 'Blocked in the US',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.viki.com/videos/1067139v-the-avengers-age-of-ultron-press-conference',
|
||||
'md5': 'ca6493e6f0a6ec07da9aa8d6304b4b2c',
|
||||
'info_dict': {
|
||||
'id': '1067139v',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:d70b2f9428f5488321bfe1db10d612ea',
|
||||
'upload_date': '20150430',
|
||||
'title': '\'The Avengers: Age of Ultron\' Press Conference',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.viki.com/videos/1048879v-ankhon-dekhi',
|
||||
'info_dict': {
|
||||
'id': '1048879v',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20140820',
|
||||
'description': 'md5:54ff56d51bdfc7a30441ec967394e91c',
|
||||
'title': 'Ankhon Dekhi',
|
||||
},
|
||||
'params': {
|
||||
# requires ffmpeg
|
||||
'skip_download': True,
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
@@ -50,15 +81,34 @@ class VikiIE(InfoExtractor):
|
||||
'rating information', default='').strip()
|
||||
age_limit = US_RATINGS.get(rating_str)
|
||||
|
||||
info_url = 'http://www.viki.com/player5_fragment/%s?action=show&controller=videos' % video_id
|
||||
req = compat_urllib_request.Request(
|
||||
'http://www.viki.com/player5_fragment/%s?action=show&controller=videos' % video_id)
|
||||
req.add_header('User-Agent', self._USER_AGENT)
|
||||
info_webpage = self._download_webpage(
|
||||
info_url, video_id, note='Downloading info page')
|
||||
if re.match(r'\s*<div\s+class="video-error', info_webpage):
|
||||
raise ExtractorError(
|
||||
'Video %s is blocked from your location.' % video_id,
|
||||
expected=True)
|
||||
video_url = self._html_search_regex(
|
||||
r'<source[^>]+src="([^"]+)"', info_webpage, 'video URL')
|
||||
req, video_id, note='Downloading info page')
|
||||
err_msg = self._html_search_regex(r'<div[^>]+class="video-error[^>]+>(.+)</div>', info_webpage, 'error message', default=None)
|
||||
if err_msg:
|
||||
if 'not available in your region' in err_msg:
|
||||
raise ExtractorError(
|
||||
'Video %s is blocked from your location.' % video_id,
|
||||
expected=True)
|
||||
else:
|
||||
raise ExtractorError('Viki said: ' + err_msg)
|
||||
mobj = re.search(
|
||||
r'<source[^>]+type="(?P<mime_type>[^"]+)"[^>]+src="(?P<url>[^"]+)"', info_webpage)
|
||||
if not mobj:
|
||||
raise ExtractorError('Unable to find video URL')
|
||||
video_url = unescapeHTML(mobj.group('url'))
|
||||
video_ext = mimetype2ext(mobj.group('mime_type'))
|
||||
|
||||
if determine_ext(video_url) == 'm3u8':
|
||||
formats = self._extract_m3u8_formats(
|
||||
video_url, video_id, ext=video_ext)
|
||||
else:
|
||||
formats = [{
|
||||
'url': video_url,
|
||||
'ext': video_ext,
|
||||
}]
|
||||
|
||||
upload_date_str = self._html_search_regex(
|
||||
r'"created_at":"([^"]+)"', info_webpage, 'upload date')
|
||||
@@ -74,7 +124,7 @@ class VikiIE(InfoExtractor):
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'url': video_url,
|
||||
'formats': formats,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'age_limit': age_limit,
|
||||
|
||||
@@ -177,7 +177,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
password = self._downloader.params.get('videopassword', None)
|
||||
if password is None:
|
||||
raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True)
|
||||
token = self._search_regex(r'xsrft":"(.*?)"', webpage, 'login token')
|
||||
token = self._search_regex(r'xsrft[\s=:"\']+([^"\']+)', webpage, 'login token')
|
||||
data = urlencode_postdata({
|
||||
'password': password,
|
||||
'token': token,
|
||||
@@ -223,6 +223,8 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
orig_url = url
|
||||
if mobj.group('pro') or mobj.group('player'):
|
||||
url = 'https://player.vimeo.com/video/' + video_id
|
||||
else:
|
||||
url = 'https://vimeo.com/' + video_id
|
||||
|
||||
# Retrieve video webpage to extract further information
|
||||
request = compat_urllib_request.Request(url, None, headers)
|
||||
@@ -439,7 +441,7 @@ class VimeoChannelIE(InfoExtractor):
|
||||
name="([^"]+)"\s+
|
||||
value="([^"]*)"
|
||||
''', login_form))
|
||||
token = self._search_regex(r'xsrft":"(.*?)"', webpage, 'login token')
|
||||
token = self._search_regex(r'xsrft[\s=:"\']+([^"\']+)', webpage, 'login token')
|
||||
fields['token'] = token
|
||||
fields['password'] = password
|
||||
post = urlencode_postdata(fields)
|
||||
|
||||
@@ -75,7 +75,7 @@ class VineIE(InfoExtractor):
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._og_search_title(webpage),
|
||||
'alt_title': self._og_search_description(webpage),
|
||||
'alt_title': self._og_search_description(webpage, default=None),
|
||||
'description': data['description'],
|
||||
'thumbnail': data['thumbnailUrl'],
|
||||
'upload_date': unified_strdate(data['created']),
|
||||
|
||||
99
youtube_dl/extractor/voicerepublic.py
Normal file
99
youtube_dl/extractor/voicerepublic.py
Normal file
@@ -0,0 +1,99 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class VoiceRepublicIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://voicerepublic\.com/(?:talks|embed)/(?P<id>[0-9a-z-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://voicerepublic.com/talks/watching-the-watchers-building-a-sousveillance-state',
|
||||
'md5': '0554a24d1657915aa8e8f84e15dc9353',
|
||||
'info_dict': {
|
||||
'id': '2296',
|
||||
'display_id': 'watching-the-watchers-building-a-sousveillance-state',
|
||||
'ext': 'm4a',
|
||||
'title': 'Watching the Watchers: Building a Sousveillance State',
|
||||
'description': 'md5:715ba964958afa2398df615809cfecb1',
|
||||
'thumbnail': 're:^https?://.*\.(?:png|jpg)$',
|
||||
'duration': 1800,
|
||||
'view_count': int,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://voicerepublic.com/embed/watching-the-watchers-building-a-sousveillance-state',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
req = compat_urllib_request.Request(
|
||||
compat_urlparse.urljoin(url, '/talks/%s' % display_id))
|
||||
# Older versions of Firefox get redirected to an "upgrade browser" page
|
||||
req.add_header('User-Agent', 'youtube-dl')
|
||||
webpage = self._download_webpage(req, display_id)
|
||||
|
||||
if '>Queued for processing, please stand by...<' in webpage:
|
||||
raise ExtractorError(
|
||||
'Audio is still queued for processing', expected=True)
|
||||
|
||||
config = self._search_regex(
|
||||
r'(?s)return ({.+?});\s*\n', webpage,
|
||||
'data', default=None)
|
||||
data = self._parse_json(config, display_id, fatal=False) if config else None
|
||||
if data:
|
||||
title = data['title']
|
||||
description = data.get('teaser')
|
||||
talk_id = data.get('talk_id') or display_id
|
||||
talk = data['talk']
|
||||
duration = int_or_none(talk.get('duration'))
|
||||
formats = [{
|
||||
'url': compat_urlparse.urljoin(url, talk_url),
|
||||
'format_id': format_id,
|
||||
'ext': determine_ext(talk_url) or format_id,
|
||||
'vcodec': 'none',
|
||||
} for format_id, talk_url in talk['links'].items()]
|
||||
else:
|
||||
title = self._og_search_title(webpage)
|
||||
description = self._html_search_regex(
|
||||
r"(?s)<div class='talk-teaser'[^>]*>(.+?)</div>",
|
||||
webpage, 'description', fatal=False)
|
||||
talk_id = self._search_regex(
|
||||
[r"id='jc-(\d+)'", r"data-shareable-id='(\d+)'"],
|
||||
webpage, 'talk id', default=None) or display_id
|
||||
duration = None
|
||||
player = self._search_regex(
|
||||
r"class='vr-player jp-jplayer'([^>]+)>", webpage, 'player')
|
||||
formats = [{
|
||||
'url': compat_urlparse.urljoin(url, talk_url),
|
||||
'format_id': format_id,
|
||||
'ext': determine_ext(talk_url) or format_id,
|
||||
'vcodec': 'none',
|
||||
} for format_id, talk_url in re.findall(r"data-([^=]+)='([^']+)'", player)]
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
view_count = int_or_none(self._search_regex(
|
||||
r"class='play-count[^']*'>\s*(\d+) plays",
|
||||
webpage, 'play count', fatal=False))
|
||||
|
||||
return {
|
||||
'id': talk_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'formats': formats,
|
||||
}
|
||||
@@ -27,9 +27,6 @@ class VpornIE(InfoExtractor):
|
||||
'duration': 393,
|
||||
'age_limit': 18,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'comment_count': int,
|
||||
}
|
||||
},
|
||||
{
|
||||
@@ -47,9 +44,6 @@ class VpornIE(InfoExtractor):
|
||||
'duration': 588,
|
||||
'age_limit': 18,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'comment_count': int,
|
||||
}
|
||||
},
|
||||
]
|
||||
@@ -64,29 +58,29 @@ class VpornIE(InfoExtractor):
|
||||
title = self._html_search_regex(
|
||||
r'videoname\s*=\s*\'([^\']+)\'', webpage, 'title').strip()
|
||||
description = self._html_search_regex(
|
||||
r'<div class="description_txt">(.*?)</div>', webpage, 'description', fatal=False)
|
||||
r'class="(?:descr|description_txt)">(.*?)</div>',
|
||||
webpage, 'description', fatal=False)
|
||||
thumbnail = self._html_search_regex(
|
||||
r'flashvars\.imageUrl\s*=\s*"([^"]+)"', webpage, 'description', fatal=False, default=None)
|
||||
if thumbnail:
|
||||
thumbnail = 'http://www.vporn.com' + thumbnail
|
||||
|
||||
uploader = self._html_search_regex(
|
||||
r'(?s)UPLOADED BY.*?<a href="/user/[^"]+">([^<]+)</a>',
|
||||
r'(?s)Uploaded by:.*?<a href="/user/[^"]+"[^>]*>(.+?)</a>',
|
||||
webpage, 'uploader', fatal=False)
|
||||
|
||||
categories = re.findall(r'<a href="/cat/[^"]+">([^<]+)</a>', webpage)
|
||||
categories = re.findall(r'<a href="/cat/[^"]+"[^>]*>([^<]+)</a>', webpage)
|
||||
|
||||
duration = parse_duration(self._search_regex(
|
||||
r'duration (\d+ min \d+ sec)', webpage, 'duration', fatal=False))
|
||||
r'Runtime:\s*</span>\s*(\d+ min \d+ sec)',
|
||||
webpage, 'duration', fatal=False))
|
||||
|
||||
view_count = str_to_int(self._html_search_regex(
|
||||
r'<span>([\d,\.]+) VIEWS</span>', webpage, 'view count', fatal=False))
|
||||
like_count = str_to_int(self._html_search_regex(
|
||||
r'<span id="like" class="n">([\d,\.]+)</span>', webpage, 'like count', fatal=False))
|
||||
dislike_count = str_to_int(self._html_search_regex(
|
||||
r'<span id="dislike" class="n">([\d,\.]+)</span>', webpage, 'dislike count', fatal=False))
|
||||
view_count = str_to_int(self._search_regex(
|
||||
r'class="views">([\d,\.]+) [Vv]iews<',
|
||||
webpage, 'view count', fatal=False))
|
||||
comment_count = str_to_int(self._html_search_regex(
|
||||
r'<h4>Comments \(<b>([\d,\.]+)</b>\)</h4>', webpage, 'comment count', fatal=False))
|
||||
r"'Comments \(([\d,\.]+)\)'",
|
||||
webpage, 'comment count', default=None))
|
||||
|
||||
formats = []
|
||||
|
||||
@@ -117,8 +111,6 @@ class VpornIE(InfoExtractor):
|
||||
'categories': categories,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'like_count': like_count,
|
||||
'dislike_count': dislike_count,
|
||||
'comment_count': comment_count,
|
||||
'age_limit': 18,
|
||||
'formats': formats,
|
||||
|
||||
@@ -6,8 +6,8 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class WorldStarHipHopIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www|m)\.worldstar(?:candy|hiphop)\.com/videos/video\.php\?v=(?P<id>.*)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:www|m)\.worldstar(?:candy|hiphop)\.com/(?:videos|android)/video\.php\?v=(?P<id>.*)'
|
||||
_TESTS = [{
|
||||
"url": "http://www.worldstarhiphop.com/videos/video.php?v=wshh6a7q1ny0G34ZwuIO",
|
||||
"md5": "9d04de741161603bf7071bbf4e883186",
|
||||
"info_dict": {
|
||||
@@ -15,7 +15,15 @@ class WorldStarHipHopIE(InfoExtractor):
|
||||
"ext": "mp4",
|
||||
"title": "KO Of The Week: MMA Fighter Gets Knocked Out By Swift Head Kick!"
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'http://m.worldstarhiphop.com/android/video.php?v=wshh6a7q1ny0G34ZwuIO',
|
||||
'md5': 'dc1c76c83ecc4190bb1eb143899b87d3',
|
||||
'info_dict': {
|
||||
'id': 'wshh6a7q1ny0G34ZwuIO',
|
||||
'ext': 'mp4',
|
||||
"title": "KO Of The Week: MMA Fighter Gets Knocked Out By Swift Head Kick!"
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
@@ -26,19 +34,22 @@ class WorldStarHipHopIE(InfoExtractor):
|
||||
return self.url_result('vevo:%s' % m_vevo_id.group(1), ie='Vevo')
|
||||
|
||||
video_url = self._search_regex(
|
||||
r'so\.addVariable\("file","(.*?)"\)', webpage, 'video URL')
|
||||
[r'so\.addVariable\("file","(.*?)"\)',
|
||||
r'<div class="artlist">\s*<a[^>]+href="([^"]+)">'],
|
||||
webpage, 'video URL')
|
||||
|
||||
if 'youtube' in video_url:
|
||||
return self.url_result(video_url, ie='Youtube')
|
||||
|
||||
video_title = self._html_search_regex(
|
||||
r'(?s)<div class="content-heading">\s*<h1>(.*?)</h1>',
|
||||
[r'(?s)<div class="content-heading">\s*<h1>(.*?)</h1>',
|
||||
r'<span[^>]+class="tc-sp-pinned-title">(.*)</span>'],
|
||||
webpage, 'title')
|
||||
|
||||
# Getting thumbnail and if not thumbnail sets correct title for WSHH candy video.
|
||||
thumbnail = self._html_search_regex(
|
||||
r'rel="image_src" href="(.*)" />', webpage, 'thumbnail',
|
||||
fatal=False)
|
||||
default=None)
|
||||
if not thumbnail:
|
||||
_title = r'candytitles.*>(.*)</span>'
|
||||
mobj = re.search(_title, webpage)
|
||||
|
||||
115
youtube_dl/extractor/xstream.py
Normal file
115
youtube_dl/extractor/xstream.py
Normal file
@@ -0,0 +1,115 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
xpath_with_ns,
|
||||
xpath_text,
|
||||
find_xpath_attr,
|
||||
)
|
||||
|
||||
|
||||
class XstreamIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:
|
||||
xstream:|
|
||||
https?://frontend\.xstream\.(?:dk|net)/
|
||||
)
|
||||
(?P<partner_id>[^/]+)
|
||||
(?:
|
||||
:|
|
||||
/feed/video/\?.*?\bid=
|
||||
)
|
||||
(?P<id>\d+)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'http://frontend.xstream.dk/btno/feed/video/?platform=web&id=86588',
|
||||
'md5': 'd7d17e3337dc80de6d3a540aefbe441b',
|
||||
'info_dict': {
|
||||
'id': '86588',
|
||||
'ext': 'mov',
|
||||
'title': 'Otto Wollertsen',
|
||||
'description': 'Vestlendingen Otto Fredrik Wollertsen',
|
||||
'timestamp': 1430473209,
|
||||
'upload_date': '20150501',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://frontend.xstream.dk/ap/feed/video/?platform=web&id=21039',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
partner_id = mobj.group('partner_id')
|
||||
video_id = mobj.group('id')
|
||||
|
||||
data = self._download_xml(
|
||||
'http://frontend.xstream.dk/%s/feed/video/?platform=web&id=%s'
|
||||
% (partner_id, video_id),
|
||||
video_id)
|
||||
|
||||
NS_MAP = {
|
||||
'atom': 'http://www.w3.org/2005/Atom',
|
||||
'xt': 'http://xstream.dk/',
|
||||
'media': 'http://search.yahoo.com/mrss/',
|
||||
}
|
||||
|
||||
entry = data.find(xpath_with_ns('./atom:entry', NS_MAP))
|
||||
|
||||
title = xpath_text(
|
||||
entry, xpath_with_ns('./atom:title', NS_MAP), 'title')
|
||||
description = xpath_text(
|
||||
entry, xpath_with_ns('./atom:summary', NS_MAP), 'description')
|
||||
timestamp = parse_iso8601(xpath_text(
|
||||
entry, xpath_with_ns('./atom:published', NS_MAP), 'upload date'))
|
||||
|
||||
formats = []
|
||||
media_group = entry.find(xpath_with_ns('./media:group', NS_MAP))
|
||||
for media_content in media_group.findall(xpath_with_ns('./media:content', NS_MAP)):
|
||||
media_url = media_content.get('url')
|
||||
if not media_url:
|
||||
continue
|
||||
tbr = int_or_none(media_content.get('bitrate'))
|
||||
mobj = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>[^/]+))/(?P<playpath>.+)$', media_url)
|
||||
if mobj:
|
||||
formats.append({
|
||||
'url': mobj.group('url'),
|
||||
'play_path': 'mp4:%s' % mobj.group('playpath'),
|
||||
'app': mobj.group('app'),
|
||||
'ext': 'flv',
|
||||
'tbr': tbr,
|
||||
'format_id': 'rtmp-%d' % tbr,
|
||||
})
|
||||
else:
|
||||
formats.append({
|
||||
'url': media_url,
|
||||
'tbr': tbr,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
link = find_xpath_attr(
|
||||
entry, xpath_with_ns('./atom:link', NS_MAP), 'rel', 'original')
|
||||
if link is not None:
|
||||
formats.append({
|
||||
'url': link.get('href'),
|
||||
'format_id': link.get('rel'),
|
||||
})
|
||||
|
||||
thumbnails = [{
|
||||
'url': splash.get('url'),
|
||||
'width': int_or_none(splash.get('width')),
|
||||
'height': int_or_none(splash.get('height')),
|
||||
} for splash in media_group.findall(xpath_with_ns('./xt:splash', NS_MAP))]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'timestamp': timestamp,
|
||||
'formats': formats,
|
||||
'thumbnails': thumbnails,
|
||||
}
|
||||
@@ -22,7 +22,7 @@ from .nbc import NBCSportsVPlayerIE
|
||||
|
||||
class YahooIE(InfoExtractor):
|
||||
IE_DESC = 'Yahoo screen and movies'
|
||||
_VALID_URL = r'(?P<url>(?P<host>https?://(?:[a-zA-Z]{2}\.)?[\da-zA-Z_-]+\.yahoo\.com)/(?:[^/]+/)*(?P<display_id>.+?)-(?P<id>[0-9]+)(?:-[a-z]+)?\.html)'
|
||||
_VALID_URL = r'(?P<url>(?P<host>https?://(?:[a-zA-Z]{2}\.)?[\da-zA-Z_-]+\.yahoo\.com)/(?:[^/]+/)*(?P<display_id>.+)?-(?P<id>[0-9]+)(?:-[a-z]+)?\.html)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html',
|
||||
@@ -140,12 +140,15 @@ class YahooIE(InfoExtractor):
|
||||
'description': 'md5:df390f70a9ba7c95ff1daace988f0d8d',
|
||||
'title': 'Tyler Kalinoski hits buzzer-beater to lift Davidson',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://tw.news.yahoo.com/-100120367.html',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
display_id = mobj.group('display_id')
|
||||
display_id = mobj.group('display_id') or self._match_id(url)
|
||||
page_id = mobj.group('id')
|
||||
url = mobj.group('url')
|
||||
host = mobj.group('host')
|
||||
|
||||
@@ -9,6 +9,7 @@ from ..utils import (
|
||||
float_or_none,
|
||||
month_by_abbreviation,
|
||||
ExtractorError,
|
||||
get_element_by_attribute,
|
||||
)
|
||||
|
||||
|
||||
@@ -23,6 +24,7 @@ class YamIE(InfoExtractor):
|
||||
'id': '2283921',
|
||||
'ext': 'mp3',
|
||||
'title': '發現 - 趙薇 京華煙雲主題曲',
|
||||
'description': '發現 - 趙薇 京華煙雲主題曲',
|
||||
'uploader_id': 'princekt',
|
||||
'upload_date': '20080807',
|
||||
'duration': 313.0,
|
||||
@@ -55,6 +57,17 @@ class YamIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
},
|
||||
'skip': 'invalid YouTube URL',
|
||||
}, {
|
||||
'url': 'http://mymedia.yam.com/m/2373534',
|
||||
'md5': '7ff74b91b7a817269d83796f8c5890b1',
|
||||
'info_dict': {
|
||||
'id': '2373534',
|
||||
'ext': 'mp3',
|
||||
'title': '林俊傑&蔡卓妍-小酒窩',
|
||||
'description': 'md5:904003395a0fcce6cfb25028ff468420',
|
||||
'upload_date': '20080928',
|
||||
'uploader_id': 'onliner2',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -75,15 +88,19 @@ class YamIE(InfoExtractor):
|
||||
if youtube_url:
|
||||
return self.url_result(youtube_url, 'Youtube')
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<h1[^>]+class="heading"[^>]*>\s*(.+)\s*</h1>', page, 'title')
|
||||
|
||||
api_page = self._download_webpage(
|
||||
'http://mymedia.yam.com/api/a/?pID=' + video_id, video_id,
|
||||
note='Downloading API page')
|
||||
api_result_obj = compat_urlparse.parse_qs(api_page)
|
||||
|
||||
info_table = get_element_by_attribute('class', 'info', page)
|
||||
uploader_id = self._html_search_regex(
|
||||
r'<!-- 發表作者 -->:[\n ]+<a href="/([a-z]+)"',
|
||||
page, 'uploader id', fatal=False)
|
||||
mobj = re.search(r'<!-- 發表於 -->(?P<mon>[A-Z][a-z]{2}) ' +
|
||||
r'<!-- 發表作者 -->:[\n ]+<a href="/([a-z0-9]+)"',
|
||||
info_table, 'uploader id', fatal=False)
|
||||
mobj = re.search(r'<!-- 發表於 -->(?P<mon>[A-Z][a-z]{2})\s+' +
|
||||
r'(?P<day>\d{1,2}), (?P<year>\d{4})', page)
|
||||
if mobj:
|
||||
upload_date = '%s%02d%02d' % (
|
||||
@@ -97,7 +114,8 @@ class YamIE(InfoExtractor):
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': api_result_obj['mp3file'][0],
|
||||
'title': self._html_search_meta('description', page),
|
||||
'title': title,
|
||||
'description': self._html_search_meta('description', page),
|
||||
'duration': duration,
|
||||
'uploader_id': uploader_id,
|
||||
'upload_date': upload_date,
|
||||
|
||||
@@ -47,7 +47,7 @@ class YouPornIE(InfoExtractor):
|
||||
|
||||
# Get JSON parameters
|
||||
json_params = self._search_regex(
|
||||
[r'var\s+videoJa?son\s*=\s*({.+?});',
|
||||
[r'videoJa?son\s*=\s*({.+})',
|
||||
r'var\s+currentVideo\s*=\s*new\s+Video\((.+?)\)[,;]'],
|
||||
webpage, 'JSON parameters')
|
||||
try:
|
||||
|
||||
@@ -1,8 +1,6 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
@@ -16,7 +14,7 @@ class YourUploadIE(InfoExtractor):
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://yourupload.com/watch/14i14h',
|
||||
'md5': 'bf5c2f95c4c917536e80936af7bc51e1',
|
||||
'md5': '5e2c63385454c557f97c4c4131a393cd',
|
||||
'info_dict': {
|
||||
'id': '14i14h',
|
||||
'ext': 'mp4',
|
||||
@@ -35,24 +33,21 @@ class YourUploadIE(InfoExtractor):
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
video_id = self._match_id(url)
|
||||
|
||||
url = 'http://embed.yucache.net/{0:}'.format(video_id)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
embed_url = 'http://embed.yucache.net/{0:}'.format(video_id)
|
||||
webpage = self._download_webpage(embed_url, video_id)
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
url = self._og_search_video_url(webpage)
|
||||
|
||||
formats = [{
|
||||
'format_id': 'sd',
|
||||
'url': url,
|
||||
}]
|
||||
video_url = self._og_search_video_url(webpage)
|
||||
thumbnail = self._og_search_thumbnail(webpage, default=None)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'url': video_url,
|
||||
'thumbnail': thumbnail,
|
||||
'http_headers': {
|
||||
'Referer': embed_url,
|
||||
},
|
||||
}
|
||||
|
||||
@@ -1291,12 +1291,22 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
||||
page = self._download_webpage(url, playlist_id)
|
||||
more_widget_html = content_html = page
|
||||
|
||||
# Check if the playlist exists or is private
|
||||
if re.search(r'<div class="yt-alert-message">[^<]*?(The|This) playlist (does not exist|is private)[^<]*?</div>', page) is not None:
|
||||
raise ExtractorError(
|
||||
'The playlist doesn\'t exist or is private, use --username or '
|
||||
'--netrc to access it.',
|
||||
expected=True)
|
||||
for match in re.findall(r'<div class="yt-alert-message">([^<]+)</div>', page):
|
||||
match = match.strip()
|
||||
# Check if the playlist exists or is private
|
||||
if re.match(r'[^<]*(The|This) playlist (does not exist|is private)[^<]*', match):
|
||||
raise ExtractorError(
|
||||
'The playlist doesn\'t exist or is private, use --username or '
|
||||
'--netrc to access it.',
|
||||
expected=True)
|
||||
elif re.match(r'[^<]*Invalid parameters[^<]*', match):
|
||||
raise ExtractorError(
|
||||
'Invalid parameters. Maybe URL is incorrect.',
|
||||
expected=True)
|
||||
elif re.match(r'[^<]*Choose your language[^<]*', match):
|
||||
continue
|
||||
else:
|
||||
self.report_warning('Youtube gives an alert message: ' + match)
|
||||
|
||||
# Extract the video ids from the playlist pages
|
||||
ids = []
|
||||
@@ -1657,13 +1667,42 @@ class YoutubeWatchLaterIE(YoutubePlaylistIE):
|
||||
return self._extract_playlist('WL')
|
||||
|
||||
|
||||
class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
|
||||
class YoutubeHistoryIE(YoutubePlaylistIE):
|
||||
IE_NAME = 'youtube:history'
|
||||
IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
|
||||
_VALID_URL = 'https?://www\.youtube\.com/feed/history|:ythistory'
|
||||
_FEED_NAME = 'history'
|
||||
_PERSONAL_FEED = True
|
||||
_PLAYLIST_TITLE = 'Youtube Watch History'
|
||||
_TESTS = []
|
||||
|
||||
def _real_extract(self, url):
|
||||
title = 'Youtube History'
|
||||
page = self._download_webpage('https://www.youtube.com/feed/history', title)
|
||||
|
||||
# The extraction process is the same as for playlists, but the regex
|
||||
# for the video ids doesn't contain an index
|
||||
ids = []
|
||||
more_widget_html = content_html = page
|
||||
|
||||
for page_num in itertools.count(1):
|
||||
matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
|
||||
new_ids = orderedSet(matches)
|
||||
ids.extend(new_ids)
|
||||
|
||||
mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
|
||||
if not mobj:
|
||||
break
|
||||
|
||||
more = self._download_json(
|
||||
'https://youtube.com/%s' % mobj.group('more'), title,
|
||||
'Downloading page #%s' % page_num,
|
||||
transform_source=uppercase_escape)
|
||||
content_html = more['content_html']
|
||||
more_widget_html = more['load_more_widget_html']
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'title': title,
|
||||
'entries': self._ids_to_results(ids),
|
||||
}
|
||||
|
||||
|
||||
class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
|
||||
|
||||
@@ -4,12 +4,18 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError
|
||||
|
||||
|
||||
class ZingMp3BaseInfoExtractor(InfoExtractor):
|
||||
|
||||
@staticmethod
|
||||
def _extract_item(item):
|
||||
def _extract_item(self, item):
|
||||
error_message = item.find('./errormessage').text
|
||||
if error_message:
|
||||
raise ExtractorError(
|
||||
'%s returned error: %s' % (self.IE_NAME, error_message),
|
||||
expected=True)
|
||||
|
||||
title = item.find('./title').text.strip()
|
||||
source = item.find('./source').text
|
||||
extension = item.attrib['type']
|
||||
|
||||
@@ -637,7 +637,7 @@ def parseOpts(overrideArguments=None):
|
||||
filesystem.add_option(
|
||||
'--write-annotations',
|
||||
action='store_true', dest='writeannotations', default=False,
|
||||
help='Write video annotations to a .annotation file')
|
||||
help='Write video annotations to a .annotations.xml file')
|
||||
filesystem.add_option(
|
||||
'--load-info',
|
||||
dest='load_info_filename', metavar='FILE',
|
||||
@@ -698,7 +698,7 @@ def parseOpts(overrideArguments=None):
|
||||
postproc.add_option(
|
||||
'--embed-subs',
|
||||
action='store_true', dest='embedsubtitles', default=False,
|
||||
help='Embed subtitles in the video (only for mp4 videos)')
|
||||
help='Embed subtitles in the video (only for mkv and mp4 videos)')
|
||||
postproc.add_option(
|
||||
'--embed-thumbnail',
|
||||
action='store_true', dest='embedthumbnail', default=False,
|
||||
|
||||
@@ -7,12 +7,9 @@ import subprocess
|
||||
|
||||
from .ffmpeg import FFmpegPostProcessor
|
||||
|
||||
from ..compat import (
|
||||
compat_urlretrieve,
|
||||
)
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
check_executable,
|
||||
encodeArgument,
|
||||
encodeFilename,
|
||||
PostProcessingError,
|
||||
prepend_extension,
|
||||
@@ -25,26 +22,30 @@ class EmbedThumbnailPPError(PostProcessingError):
|
||||
|
||||
|
||||
class EmbedThumbnailPP(FFmpegPostProcessor):
|
||||
def __init__(self, downloader=None, already_have_thumbnail=False):
|
||||
super(EmbedThumbnailPP, self).__init__(downloader)
|
||||
self._already_have_thumbnail = already_have_thumbnail
|
||||
|
||||
def run(self, info):
|
||||
filename = info['filepath']
|
||||
temp_filename = prepend_extension(filename, 'temp')
|
||||
temp_thumbnail = filename + '.' + determine_ext(info['thumbnail'])
|
||||
|
||||
if not info.get('thumbnail'):
|
||||
if not info.get('thumbnails'):
|
||||
raise EmbedThumbnailPPError('Thumbnail was not found. Nothing to do.')
|
||||
|
||||
compat_urlretrieve(info['thumbnail'], temp_thumbnail)
|
||||
thumbnail_filename = info['thumbnails'][-1]['filename']
|
||||
|
||||
if info['ext'] == 'mp3':
|
||||
options = [
|
||||
'-i', temp_thumbnail, '-c', 'copy', '-map', '0', '-map', '1',
|
||||
'-c', 'copy', '-map', '0', '-map', '1',
|
||||
'-metadata:s:v', 'title="Album cover"', '-metadata:s:v', 'comment="Cover (Front)"']
|
||||
|
||||
self._downloader.to_screen('[ffmpeg] Adding thumbnail to "%s"' % filename)
|
||||
|
||||
self.run_ffmpeg(filename, temp_filename, options)
|
||||
self.run_ffmpeg_multiple_files([filename, thumbnail_filename], temp_filename, options)
|
||||
|
||||
os.remove(encodeFilename(temp_thumbnail))
|
||||
if not self._already_have_thumbnail:
|
||||
os.remove(encodeFilename(thumbnail_filename))
|
||||
os.remove(encodeFilename(filename))
|
||||
os.rename(encodeFilename(temp_filename), encodeFilename(filename))
|
||||
|
||||
@@ -52,7 +53,12 @@ class EmbedThumbnailPP(FFmpegPostProcessor):
|
||||
if not check_executable('AtomicParsley', ['-v']):
|
||||
raise EmbedThumbnailPPError('AtomicParsley was not found. Please install.')
|
||||
|
||||
cmd = ['AtomicParsley', filename, '--artwork', temp_thumbnail, '-o', temp_filename]
|
||||
cmd = [encodeFilename('AtomicParsley', True),
|
||||
encodeFilename(filename, True),
|
||||
encodeArgument('--artwork'),
|
||||
encodeFilename(thumbnail_filename, True),
|
||||
encodeArgument('-o'),
|
||||
encodeFilename(temp_filename, True)]
|
||||
|
||||
self._downloader.to_screen('[atomicparsley] Adding thumbnail to "%s"' % filename)
|
||||
|
||||
@@ -66,7 +72,8 @@ class EmbedThumbnailPP(FFmpegPostProcessor):
|
||||
msg = stderr.decode('utf-8', 'replace').strip()
|
||||
raise EmbedThumbnailPPError(msg)
|
||||
|
||||
os.remove(encodeFilename(temp_thumbnail))
|
||||
if not self._already_have_thumbnail:
|
||||
os.remove(encodeFilename(thumbnail_filename))
|
||||
# for formats that don't support thumbnails (like 3gp) AtomicParsley
|
||||
# won't create to the temporary file
|
||||
if b'No changes' in stdout:
|
||||
|
||||
@@ -8,8 +8,8 @@ from ..utils import PostProcessingError
|
||||
|
||||
|
||||
class ExecAfterDownloadPP(PostProcessor):
|
||||
def __init__(self, downloader=None, verboseOutput=None, exec_cmd=None):
|
||||
self.verboseOutput = verboseOutput
|
||||
def __init__(self, downloader, exec_cmd):
|
||||
super(ExecAfterDownloadPP, self).__init__(downloader)
|
||||
self.exec_cmd = exec_cmd
|
||||
|
||||
def run(self, information):
|
||||
|
||||
@@ -501,8 +501,8 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
|
||||
return cls._lang_map.get(code[:2])
|
||||
|
||||
def run(self, information):
|
||||
if information['ext'] != 'mp4':
|
||||
self._downloader.to_screen('[ffmpeg] Subtitles can only be embedded in mp4 files')
|
||||
if information['ext'] not in ['mp4', 'mkv']:
|
||||
self._downloader.to_screen('[ffmpeg] Subtitles can only be embedded in mp4 or mkv files')
|
||||
return [], information
|
||||
subtitles = information.get('requested_subtitles')
|
||||
if not subtitles:
|
||||
@@ -520,8 +520,9 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
|
||||
# Don't copy the existing subtitles, we may be running the
|
||||
# postprocessor a second time
|
||||
'-map', '-0:s',
|
||||
'-c:s', 'mov_text',
|
||||
]
|
||||
if information['ext'] == 'mp4':
|
||||
opts += ['-c:s', 'mov_text']
|
||||
for (i, lang) in enumerate(sub_langs):
|
||||
opts.extend(['-map', '%d:0' % (i + 1)])
|
||||
lang_code = self._conver_lang_code(lang)
|
||||
@@ -590,6 +591,23 @@ class FFmpegMergerPP(FFmpegPostProcessor):
|
||||
os.rename(encodeFilename(temp_filename), encodeFilename(filename))
|
||||
return info['__files_to_merge'], info
|
||||
|
||||
def can_merge(self):
|
||||
# TODO: figure out merge-capable ffmpeg version
|
||||
if self.basename != 'avconv':
|
||||
return True
|
||||
|
||||
required_version = '10-0'
|
||||
if is_outdated_version(
|
||||
self._versions[self.basename], required_version):
|
||||
warning = ('Your copy of %s is outdated and unable to properly mux separate video and audio files, '
|
||||
'youtube-dl will download single file media. '
|
||||
'Update %s to version %s or newer to fix this.') % (
|
||||
self.basename, self.basename, required_version)
|
||||
if self._downloader:
|
||||
self._downloader.report_warning(warning)
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
class FFmpegFixupStretchedPP(FFmpegPostProcessor):
|
||||
def run(self, info):
|
||||
|
||||
@@ -3,18 +3,34 @@ from __future__ import unicode_literals
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
import errno
|
||||
|
||||
from .common import PostProcessor
|
||||
from ..compat import (
|
||||
subprocess_check_output
|
||||
)
|
||||
from ..utils import (
|
||||
check_executable,
|
||||
hyphenate_date,
|
||||
version_tuple,
|
||||
PostProcessingError,
|
||||
encodeArgument,
|
||||
encodeFilename,
|
||||
)
|
||||
|
||||
|
||||
class XAttrMetadataError(PostProcessingError):
|
||||
def __init__(self, code=None, msg='Unknown error'):
|
||||
super(XAttrMetadataError, self).__init__(msg)
|
||||
self.code = code
|
||||
|
||||
# Parsing code and msg
|
||||
if (self.code in (errno.ENOSPC, errno.EDQUOT) or
|
||||
'No space left' in self.msg or 'Disk quota excedded' in self.msg):
|
||||
self.reason = 'NO_SPACE'
|
||||
elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
|
||||
self.reason = 'VALUE_TOO_LONG'
|
||||
else:
|
||||
self.reason = 'NOT_SUPPORTED'
|
||||
|
||||
|
||||
class XAttrMetadataPP(PostProcessor):
|
||||
|
||||
#
|
||||
@@ -51,7 +67,10 @@ class XAttrMetadataPP(PostProcessor):
|
||||
raise ImportError
|
||||
|
||||
def write_xattr(path, key, value):
|
||||
return xattr.setxattr(path, key, value)
|
||||
try:
|
||||
xattr.set(path, key, value)
|
||||
except EnvironmentError as e:
|
||||
raise XAttrMetadataError(e.errno, e.strerror)
|
||||
|
||||
except ImportError:
|
||||
if os.name == 'nt':
|
||||
@@ -62,8 +81,11 @@ class XAttrMetadataPP(PostProcessor):
|
||||
assert os.path.exists(path)
|
||||
|
||||
ads_fn = path + ":" + key
|
||||
with open(ads_fn, "wb") as f:
|
||||
f.write(value)
|
||||
try:
|
||||
with open(ads_fn, "wb") as f:
|
||||
f.write(value)
|
||||
except EnvironmentError as e:
|
||||
raise XAttrMetadataError(e.errno, e.strerror)
|
||||
else:
|
||||
user_has_setfattr = check_executable("setfattr", ['--version'])
|
||||
user_has_xattr = check_executable("xattr", ['-h'])
|
||||
@@ -71,12 +93,27 @@ class XAttrMetadataPP(PostProcessor):
|
||||
if user_has_setfattr or user_has_xattr:
|
||||
|
||||
def write_xattr(path, key, value):
|
||||
value = value.decode('utf-8')
|
||||
if user_has_setfattr:
|
||||
cmd = ['setfattr', '-n', key, '-v', value, path]
|
||||
executable = 'setfattr'
|
||||
opts = ['-n', key, '-v', value]
|
||||
elif user_has_xattr:
|
||||
cmd = ['xattr', '-w', key, value, path]
|
||||
executable = 'xattr'
|
||||
opts = ['-w', key, value]
|
||||
|
||||
subprocess_check_output(cmd)
|
||||
cmd = ([encodeFilename(executable, True)] +
|
||||
[encodeArgument(o) for o in opts] +
|
||||
[encodeFilename(path, True)])
|
||||
|
||||
try:
|
||||
p = subprocess.Popen(
|
||||
cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
|
||||
except EnvironmentError as e:
|
||||
raise XAttrMetadataError(e.errno, e.strerror)
|
||||
stdout, stderr = p.communicate()
|
||||
stderr = stderr.decode('utf-8', 'replace')
|
||||
if p.returncode != 0:
|
||||
raise XAttrMetadataError(p.returncode, stderr)
|
||||
|
||||
else:
|
||||
# On Unix, and can't find pyxattr, setfattr, or xattr.
|
||||
@@ -121,6 +158,19 @@ class XAttrMetadataPP(PostProcessor):
|
||||
|
||||
return [], info
|
||||
|
||||
except (subprocess.CalledProcessError, OSError):
|
||||
self._downloader.report_error("This filesystem doesn't support extended attributes. (You may have to enable them in your /etc/fstab)")
|
||||
except XAttrMetadataError as e:
|
||||
if e.reason == 'NO_SPACE':
|
||||
self._downloader.report_warning(
|
||||
'There\'s no disk space left or disk quota exceeded. ' +
|
||||
'Extended attributes are not written.')
|
||||
elif e.reason == 'VALUE_TOO_LONG':
|
||||
self._downloader.report_warning(
|
||||
'Unable to write extended attributes due to too long values.')
|
||||
else:
|
||||
msg = 'This filesystem doesn\'t support extended attributes. '
|
||||
if os.name == 'nt':
|
||||
msg += 'You need to use NTFS.'
|
||||
else:
|
||||
msg += '(You may have to enable them in your /etc/fstab)'
|
||||
self._downloader.report_error(msg)
|
||||
return [], info
|
||||
|
||||
@@ -327,13 +327,6 @@ def sanitize_path(s):
|
||||
return os.path.join(*sanitized_path)
|
||||
|
||||
|
||||
def sanitize_url_path_consecutive_slashes(url):
|
||||
"""Collapses consecutive slashes in URLs' path"""
|
||||
parsed_url = list(compat_urlparse.urlparse(url))
|
||||
parsed_url[2] = re.sub(r'/{2,}', '/', parsed_url[2])
|
||||
return compat_urlparse.urlunparse(parsed_url)
|
||||
|
||||
|
||||
def orderedSet(iterable):
|
||||
""" Remove all duplicates from the input iterable """
|
||||
res = []
|
||||
@@ -1349,9 +1342,19 @@ def parse_duration(s):
|
||||
return res
|
||||
|
||||
|
||||
def prepend_extension(filename, ext):
|
||||
def prepend_extension(filename, ext, expected_real_ext=None):
|
||||
name, real_ext = os.path.splitext(filename)
|
||||
return '{0}.{1}{2}'.format(name, ext, real_ext)
|
||||
return (
|
||||
'{0}.{1}{2}'.format(name, ext, real_ext)
|
||||
if not expected_real_ext or real_ext[1:] == expected_real_ext
|
||||
else '{0}.{1}'.format(filename, ext))
|
||||
|
||||
|
||||
def replace_extension(filename, ext, expected_real_ext=None):
|
||||
name, real_ext = os.path.splitext(filename)
|
||||
return '{0}.{1}'.format(
|
||||
name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
|
||||
ext)
|
||||
|
||||
|
||||
def check_executable(exe, args=[]):
|
||||
@@ -1370,7 +1373,7 @@ def get_exe_version(exe, args=['--version'],
|
||||
or False if the executable is not present """
|
||||
try:
|
||||
out, _ = subprocess.Popen(
|
||||
[exe] + args,
|
||||
[encodeArgument(exe)] + args,
|
||||
stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
|
||||
except OSError:
|
||||
return False
|
||||
@@ -1476,6 +1479,14 @@ def uppercase_escape(s):
|
||||
s)
|
||||
|
||||
|
||||
def lowercase_escape(s):
|
||||
unicode_escape = codecs.getdecoder('unicode_escape')
|
||||
return re.sub(
|
||||
r'\\u[0-9a-fA-F]{4}',
|
||||
lambda m: unicode_escape(m.group(0))[0],
|
||||
s)
|
||||
|
||||
|
||||
def escape_rfc3986(s):
|
||||
"""Escape non-ASCII characters as suggested by RFC 3986"""
|
||||
if sys.version_info < (3, 0) and isinstance(s, compat_str):
|
||||
@@ -1824,12 +1835,8 @@ def parse_dfxp_time_expr(time_expr):
|
||||
return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3))
|
||||
|
||||
|
||||
def format_srt_time(seconds):
|
||||
(mins, secs) = divmod(seconds, 60)
|
||||
(hours, mins) = divmod(mins, 60)
|
||||
millisecs = (secs - int(secs)) * 1000
|
||||
secs = int(secs)
|
||||
return '%02d:%02d:%02d,%03d' % (hours, mins, secs, millisecs)
|
||||
def srt_subtitles_timecode(seconds):
|
||||
return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
|
||||
|
||||
|
||||
def dfxp2srt(dfxp_data):
|
||||
@@ -1855,10 +1862,14 @@ def dfxp2srt(dfxp_data):
|
||||
paras = dfxp.findall(_x('.//ttml:p'))
|
||||
|
||||
for para, index in zip(paras, itertools.count(1)):
|
||||
begin_time = parse_dfxp_time_expr(para.attrib['begin'])
|
||||
end_time = parse_dfxp_time_expr(para.attrib.get('end'))
|
||||
if not end_time:
|
||||
end_time = begin_time + parse_dfxp_time_expr(para.attrib['dur'])
|
||||
out.append('%d\n%s --> %s\n%s\n\n' % (
|
||||
index,
|
||||
format_srt_time(parse_dfxp_time_expr(para.attrib.get('begin'))),
|
||||
format_srt_time(parse_dfxp_time_expr(para.attrib.get('end'))),
|
||||
srt_subtitles_timecode(begin_time),
|
||||
srt_subtitles_timecode(end_time),
|
||||
parse_node(para)))
|
||||
|
||||
return ''.join(out)
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = '2015.04.28'
|
||||
__version__ = '2015.05.15'
|
||||
|
||||
Reference in New Issue
Block a user