mirror of
https://github.com/ytdl-org/youtube-dl.git
synced 2025-12-25 09:20:29 +01:00
Compare commits
359 Commits
2015.07.07
...
2015.07.28
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
289bbb350e | ||
|
|
d247a2c8bf | ||
|
|
88ed52aec9 | ||
|
|
4c6bd5b5b6 | ||
|
|
aeb7b41d44 | ||
|
|
5bdec59de1 | ||
|
|
7a89681722 | ||
|
|
51da40e621 | ||
|
|
1af330f29f | ||
|
|
9afa1770d1 | ||
|
|
3ebbcce1c7 | ||
|
|
2c7c721933 | ||
|
|
7523647391 | ||
|
|
9700cd9097 | ||
|
|
eab7faa0c1 | ||
|
|
a56c1e38c7 | ||
|
|
40a2d17052 | ||
|
|
b14fa8e687 | ||
|
|
678e436f2e | ||
|
|
ff81c4c99c | ||
|
|
420658e6cb | ||
|
|
593ddd851b | ||
|
|
1243402657 | ||
|
|
2b2ee140c3 | ||
|
|
d97f5cd795 | ||
|
|
f3f0b8e403 | ||
|
|
660f9459da | ||
|
|
10952eb2cf | ||
|
|
cdad742700 | ||
|
|
a9e8f60ef6 | ||
|
|
a8b7b26068 | ||
|
|
ba911137fa | ||
|
|
d3f007af18 | ||
|
|
2929fa0e79 | ||
|
|
297a564bee | ||
|
|
53b8247cb5 | ||
|
|
59db9f8018 | ||
|
|
b73b14f72c | ||
|
|
41597d9bed | ||
|
|
b37317d8b0 | ||
|
|
87dc451108 | ||
|
|
ca4456eda8 | ||
|
|
993df6bc22 | ||
|
|
61be92e26a | ||
|
|
c59b61c0da | ||
|
|
3e214851a4 | ||
|
|
a47b602b08 | ||
|
|
a083b859e4 | ||
|
|
948199deac | ||
|
|
c356620ec1 | ||
|
|
f79ebf09a2 | ||
|
|
c7620992d2 | ||
|
|
ce1bafdce9 | ||
|
|
9872e588c8 | ||
|
|
d609edf4f1 | ||
|
|
3a99d321a8 | ||
|
|
4bb3d999ac | ||
|
|
40101dc311 | ||
|
|
e9c6deffee | ||
|
|
9c29bc69f7 | ||
|
|
1e12429564 | ||
|
|
795704f0f1 | ||
|
|
981b9cdc8c | ||
|
|
3f724339db | ||
|
|
70c857b728 | ||
|
|
c84683c88b | ||
|
|
b68a2613f8 | ||
|
|
28afa6e77a | ||
|
|
496ce6b349 | ||
|
|
ce9512b78b | ||
|
|
4eb59a6b1c | ||
|
|
80b1ee0a4c | ||
|
|
f993afb26d | ||
|
|
7c80519cbf | ||
|
|
8250c32f49 | ||
|
|
2fe1ff8582 | ||
|
|
17ee98e1a5 | ||
|
|
2ee8f5d80f | ||
|
|
3f302bca8c | ||
|
|
c909e5820e | ||
|
|
a1b85269a4 | ||
|
|
faa1f83ab4 | ||
|
|
308c505c3d | ||
|
|
0eacd2aaae | ||
|
|
18ae46ad4b | ||
|
|
65c2b21df1 | ||
|
|
772acaf31f | ||
|
|
f8d0745e27 | ||
|
|
d719c6a5ab | ||
|
|
769efa16af | ||
|
|
86b4e98ac6 | ||
|
|
3bf8c316a6 | ||
|
|
e37c92ec6d | ||
|
|
a5dd9a0c5d | ||
|
|
7a4a945f13 | ||
|
|
1d18e26eca | ||
|
|
ac4b8df5e4 | ||
|
|
3bc9fb5889 | ||
|
|
632cbb8efa | ||
|
|
789a12aaaf | ||
|
|
ecdbe09e10 | ||
|
|
1dc31c2786 | ||
|
|
32470bf619 | ||
|
|
8b61bfd638 | ||
|
|
8a7a208905 | ||
|
|
0215103e92 | ||
|
|
c2d1be8981 | ||
|
|
4951c9f821 | ||
|
|
726adc43ec | ||
|
|
3c6ae8b59e | ||
|
|
605be3f7f8 | ||
|
|
c51bc70e0f | ||
|
|
e89d7e3029 | ||
|
|
4b0f45f667 | ||
|
|
36068ae019 | ||
|
|
761ee0d827 | ||
|
|
fb8bc3f818 | ||
|
|
826a7da808 | ||
|
|
cbd55ade68 | ||
|
|
5705ee6ef8 | ||
|
|
3f5c6d0c1b | ||
|
|
e58066e244 | ||
|
|
ee48b6a88f | ||
|
|
9ac09ed4de | ||
|
|
22603348aa | ||
|
|
fec73daaa3 | ||
|
|
c6b68648f4 | ||
|
|
1ecb5d1d83 | ||
|
|
dc786d3db5 | ||
|
|
74fe23ec35 | ||
|
|
b0bff54b08 | ||
|
|
1b541d8d6e | ||
|
|
f29ac588dd | ||
|
|
0696667734 | ||
|
|
1793d71db6 | ||
|
|
4211e1941b | ||
|
|
4bdfef5a18 | ||
|
|
8a37f53685 | ||
|
|
4e1ad6e9a8 | ||
|
|
fb10e1aa57 | ||
|
|
3c283a381e | ||
|
|
dac4d5be12 | ||
|
|
530857182d | ||
|
|
9441f77faa | ||
|
|
3cc8b4c327 | ||
|
|
6b19647d57 | ||
|
|
7bd42d0d96 | ||
|
|
c60e8cfaf7 | ||
|
|
7fd002c006 | ||
|
|
db6c50f109 | ||
|
|
aa4789d632 | ||
|
|
ee8de13e14 | ||
|
|
7dde5f6a8d | ||
|
|
736f003f2e | ||
|
|
47af21e8f1 | ||
|
|
605cbef653 | ||
|
|
388ad0c05c | ||
|
|
2ebbb6f1f7 | ||
|
|
d54f1c7477 | ||
|
|
b78f5ec4c3 | ||
|
|
9fd3bf04b7 | ||
|
|
e97bb3de83 | ||
|
|
c2daf8dfa4 | ||
|
|
09b718c439 | ||
|
|
c177bb3a50 | ||
|
|
977a247a06 | ||
|
|
899a3e2f13 | ||
|
|
8ee4ecb48d | ||
|
|
f7e6f7fa23 | ||
|
|
1f80e360fc | ||
|
|
d7011316d0 | ||
|
|
d3671b344f | ||
|
|
a60cccbf9f | ||
|
|
3e72f5f10e | ||
|
|
b94b78971c | ||
|
|
4d08161ac2 | ||
|
|
8954e48140 | ||
|
|
aa99aa4e85 | ||
|
|
d79febcd06 | ||
|
|
13fc7f3a05 | ||
|
|
14309e1ddc | ||
|
|
5513967926 | ||
|
|
eacd875f3b | ||
|
|
c4fe07c7af | ||
|
|
1186e3f91a | ||
|
|
f354385bf5 | ||
|
|
cabe001590 | ||
|
|
89f691e141 | ||
|
|
4a63291144 | ||
|
|
593b77064c | ||
|
|
9fefc88656 | ||
|
|
a3bfddfa5e | ||
|
|
36da48798a | ||
|
|
a0f28f90fa | ||
|
|
851229a01f | ||
|
|
c9c854cea7 | ||
|
|
a38436e889 | ||
|
|
23fc384f2c | ||
|
|
1540119723 | ||
|
|
574f42d79a | ||
|
|
536b0700b0 | ||
|
|
5ba761eb85 | ||
|
|
611ac379bb | ||
|
|
03f32a7ead | ||
|
|
50ea2bb20d | ||
|
|
525daedd5a | ||
|
|
e118031ef8 | ||
|
|
45eedbe58c | ||
|
|
e37c932fca | ||
|
|
5eb778bf4d | ||
|
|
ab9b890b52 | ||
|
|
31c746e5dc | ||
|
|
f01f731107 | ||
|
|
70f0f5a8ca | ||
|
|
cc357c4db8 | ||
|
|
97f4aecfc1 | ||
|
|
2af0f87c8b | ||
|
|
b062d94eef | ||
|
|
6c1b0c0ed2 | ||
|
|
ddcdc684e2 | ||
|
|
eae89f92e6 | ||
|
|
01d115b06b | ||
|
|
79057965a8 | ||
|
|
dcd4d95c8e | ||
|
|
cf61d96df0 | ||
|
|
f8da79f828 | ||
|
|
9750e7d70e | ||
|
|
50aa2bb6b9 | ||
|
|
1d1dd597ed | ||
|
|
cfe5537ee5 | ||
|
|
7869eb3fc4 | ||
|
|
6dfa0602f0 | ||
|
|
75a40b2251 | ||
|
|
28fb109ed0 | ||
|
|
48607afac5 | ||
|
|
b6ea9ef21a | ||
|
|
b8dd44baa9 | ||
|
|
c4f1fde75b | ||
|
|
667170e2c7 | ||
|
|
53429e6551 | ||
|
|
ac8f97f2b3 | ||
|
|
41c0d2f8cb | ||
|
|
1f3a43dbe6 | ||
|
|
369e195a44 | ||
|
|
15006fedb9 | ||
|
|
e35b23f54d | ||
|
|
f72b0a6032 | ||
|
|
ac9ed061ec | ||
|
|
d919fa3344 | ||
|
|
79913fde35 | ||
|
|
da634d0a8b | ||
|
|
fac54cb426 | ||
|
|
3f19b9b7c1 | ||
|
|
86f2541695 | ||
|
|
181c4ccaaa | ||
|
|
ed848087d5 | ||
|
|
edd66be5be | ||
|
|
246995dbc8 | ||
|
|
b931fbe5ab | ||
|
|
e014ff015d | ||
|
|
4fa5f40232 | ||
|
|
9b15be97aa | ||
|
|
a7ada46bd9 | ||
|
|
9d16788ad9 | ||
|
|
6ce89aecc3 | ||
|
|
963d0ce7e3 | ||
|
|
0f08d7f851 | ||
|
|
44c514eb9c | ||
|
|
513cbdda93 | ||
|
|
e1ba152352 | ||
|
|
446e764500 | ||
|
|
901d00caa6 | ||
|
|
094790d2c9 | ||
|
|
1c0163a5cc | ||
|
|
8fa7e5817a | ||
|
|
01b89d5682 | ||
|
|
9f01c1a803 | ||
|
|
46f0f50016 | ||
|
|
b8070dbbd7 | ||
|
|
3b16d803c9 | ||
|
|
de195c23a6 | ||
|
|
d3b8908886 | ||
|
|
2688176c77 | ||
|
|
a5839317aa | ||
|
|
a0aab26a41 | ||
|
|
27713812a0 | ||
|
|
cf2c5fda4f | ||
|
|
a9684c0dbf | ||
|
|
c0bf5e1c4d | ||
|
|
a31e3e7dcb | ||
|
|
17b41a3337 | ||
|
|
89a683ae74 | ||
|
|
008661069b | ||
|
|
9296e92e1c | ||
|
|
a34af8d066 | ||
|
|
8726e04629 | ||
|
|
2a01c940ec | ||
|
|
4eab60cbd2 | ||
|
|
a0e060ac1e | ||
|
|
397a8ea96e | ||
|
|
15830339ef | ||
|
|
b29280285e | ||
|
|
1633491bff | ||
|
|
2b0fa1f7dd | ||
|
|
02b386f80a | ||
|
|
bf20b9c540 | ||
|
|
06a12933f3 | ||
|
|
6dd94d3a79 | ||
|
|
f2f89c762a | ||
|
|
e6c2d9ad29 | ||
|
|
83423254cc | ||
|
|
1c20ddc966 | ||
|
|
675e9f22ea | ||
|
|
77c6fb5b24 | ||
|
|
082a0140ef | ||
|
|
9e535ce055 | ||
|
|
d76dea001b | ||
|
|
af0f9b0e95 | ||
|
|
e2082ea942 | ||
|
|
68923e52a3 | ||
|
|
9281f6d253 | ||
|
|
4647845679 | ||
|
|
cf9cf7dd04 | ||
|
|
1316b54956 | ||
|
|
cbc1fadd6f | ||
|
|
37c1e4025c | ||
|
|
aa5d9a79d6 | ||
|
|
1866432db7 | ||
|
|
2028c6e03d | ||
|
|
aa5740fb61 | ||
|
|
da92eeae42 | ||
|
|
a9dcf4a860 | ||
|
|
2da0cad6ae | ||
|
|
af1fa6234e | ||
|
|
2a282a3b5f | ||
|
|
7bb23aeca4 | ||
|
|
de939d89eb | ||
|
|
77c975f536 | ||
|
|
75ab0ebcf5 | ||
|
|
10273d6e08 | ||
|
|
d5552a3477 | ||
|
|
a8b081a052 | ||
|
|
9e96dc8b35 | ||
|
|
8f73e89ca0 | ||
|
|
14835de9fb | ||
|
|
8a1a26ce4c | ||
|
|
5bf3276e8d | ||
|
|
93dfcb9357 | ||
|
|
0c8662d2b6 | ||
|
|
d84f1d14b5 | ||
|
|
4da31bd566 | ||
|
|
423d2be5f8 | ||
|
|
453a1617aa | ||
|
|
b9258c6178 | ||
|
|
6800d3372f | ||
|
|
a650110ba7 | ||
|
|
54b31d149e | ||
|
|
a745475808 | ||
|
|
f11554092b |
5
AUTHORS
5
AUTHORS
@@ -130,3 +130,8 @@ Peter Ding
|
||||
jackyzy823
|
||||
George Brighton
|
||||
Remita Amine
|
||||
Aurélio A. Heckert
|
||||
Bernhard Minks
|
||||
sceext
|
||||
Zach Bruggeman
|
||||
Tjark Saul
|
||||
|
||||
25
README.md
25
README.md
@@ -75,7 +75,7 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
## Video Selection:
|
||||
--playlist-start NUMBER Playlist video to start at (default is 1)
|
||||
--playlist-end NUMBER Playlist video to end at (default is last)
|
||||
--playlist-items ITEM_SPEC Playlist video items to download. Specify indices of the videos in the playlist seperated by commas like: "--playlist-items 1,2,5,8"
|
||||
--playlist-items ITEM_SPEC Playlist video items to download. Specify indices of the videos in the playlist separated by commas like: "--playlist-items 1,2,5,8"
|
||||
if you want to download videos indexed 1, 2, 5, 8 in the playlist. You can specify range: "--playlist-items 1-3,7,10-13", it will
|
||||
download the videos at index 1, 2, 3, 7, 10, 11, 12 and 13.
|
||||
--match-title REGEX Download only matching titles (regex or caseless sub-string)
|
||||
@@ -214,7 +214,8 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
--audio-format FORMAT Specify audio format: "best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; "best" by default
|
||||
--audio-quality QUALITY Specify ffmpeg/avconv audio quality, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default
|
||||
5)
|
||||
--recode-video FORMAT Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm|mkv)
|
||||
--recode-video FORMAT Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm|mkv|avi)
|
||||
--postprocessor-args ARGS Give these arguments to the postprocessor
|
||||
-k, --keep-video Keep the video file on disk after the post-processing; the video is erased by default
|
||||
--no-post-overwrites Do not overwrite post-processed files; the post-processed files are overwritten by default
|
||||
--embed-subs Embed subtitles in the video (only for mkv and mp4 videos)
|
||||
@@ -237,6 +238,26 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
|
||||
You can configure youtube-dl by placing default arguments (such as `--extract-audio --no-mtime` to always extract the audio and not copy the mtime) into `/etc/youtube-dl.conf` and/or `~/.config/youtube-dl/config`. On Windows, the configuration file locations are `%APPDATA%\youtube-dl\config.txt` and `C:\Users\<user name>\youtube-dl.conf`.
|
||||
|
||||
### Authentication with `.netrc` file ###
|
||||
|
||||
You may also want to configure automatic credentials storage for extractors that support authentication (by providing login and password with `--username` and `--password`) in order not to pass credentials as command line arguments on every youtube-dl execution and prevent tracking plain text passwords in shell command history. You can achieve this using [`.netrc` file](http://stackoverflow.com/tags/.netrc/info) on per extractor basis. For that you will need to create `.netrc` file in your `$HOME` and restrict permissions to read/write by you only:
|
||||
```
|
||||
touch $HOME/.netrc
|
||||
chmod a-rwx,u+rw $HOME/.netrc
|
||||
```
|
||||
After that you can add credentials for extractor in the following format, where *extractor* is the name of extractor in lowercase:
|
||||
```
|
||||
machine <extractor> login <login> password <password>
|
||||
```
|
||||
For example:
|
||||
```
|
||||
machine youtube login myaccount@gmail.com password my_youtube_password
|
||||
machine twitch login my_twitch_account_name password my_twitch_password
|
||||
```
|
||||
To activate authentication with `.netrc` file you should pass `--netrc` to youtube-dl or to place it in [configuration file](#configuration).
|
||||
|
||||
On Windows you may also need to setup `%HOME%` environment variable manually.
|
||||
|
||||
# OUTPUT TEMPLATE
|
||||
|
||||
The `-o` option allows users to indicate a template for the output file names. The basic usage is not to set any template arguments when downloading a single file, like in `youtube-dl -o funny_video.flv "http://some/video"`. However, it may contain special sequences that will be replaced when downloading each video. The special sequences have the format `%(NAME)s`. To clarify, that is a percent symbol followed by a name in parenthesis, followed by a lowercase S. Allowed names are:
|
||||
|
||||
@@ -28,7 +28,8 @@
|
||||
- **anitube.se**
|
||||
- **AnySex**
|
||||
- **Aparat**
|
||||
- **AppleDaily**
|
||||
- **AppleConnect**
|
||||
- **AppleDaily**: 臺灣蘋果日報
|
||||
- **AppleTrailers**
|
||||
- **archive.org**: archive.org videos
|
||||
- **ARD**
|
||||
@@ -45,11 +46,12 @@
|
||||
- **audiomack**
|
||||
- **audiomack:album**
|
||||
- **Azubu**
|
||||
- **BaiduVideo**
|
||||
- **BaiduVideo**: 百度视频
|
||||
- **bambuser**
|
||||
- **bambuser:channel**
|
||||
- **Bandcamp**
|
||||
- **Bandcamp:album**
|
||||
- **bbc**: BBC
|
||||
- **bbc.co.uk**: BBC iPlayer
|
||||
- **BeatportPro**
|
||||
- **Beeg**
|
||||
@@ -106,7 +108,7 @@
|
||||
- **Crunchyroll**
|
||||
- **crunchyroll:playlist**
|
||||
- **CSpan**: C-SPAN
|
||||
- **CtsNews**
|
||||
- **CtsNews**: 華視新聞
|
||||
- **culturebox.francetvinfo.fr**
|
||||
- **dailymotion**
|
||||
- **dailymotion:playlist**
|
||||
@@ -121,7 +123,7 @@
|
||||
- **Discovery**
|
||||
- **divxstage**: DivxStage
|
||||
- **Dotsub**
|
||||
- **DouyuTV**
|
||||
- **DouyuTV**: 斗鱼
|
||||
- **dramafever**
|
||||
- **dramafever:series**
|
||||
- **DRBonanza**
|
||||
@@ -222,7 +224,8 @@
|
||||
- **instagram:user**: Instagram user profile
|
||||
- **InternetVideoArchive**
|
||||
- **IPrima**
|
||||
- **iqiyi**
|
||||
- **iqiyi**: 爱奇艺
|
||||
- **Ir90Tv**
|
||||
- **ivi**: ivi.ru
|
||||
- **ivi:compilation**: ivi.ru compilations
|
||||
- **Izlesene**
|
||||
@@ -243,9 +246,16 @@
|
||||
- **kontrtube**: KontrTube.ru - Труба зовёт
|
||||
- **KrasView**: Красвью
|
||||
- **Ku6**
|
||||
- **kuwo:album**: 酷我音乐 - 专辑
|
||||
- **kuwo:category**: 酷我音乐 - 分类
|
||||
- **kuwo:chart**: 酷我音乐 - 排行榜
|
||||
- **kuwo:mv**: 酷我音乐 - MV
|
||||
- **kuwo:singer**: 酷我音乐 - 歌手
|
||||
- **kuwo:song**: 酷我音乐
|
||||
- **la7.tv**
|
||||
- **Laola1Tv**
|
||||
- **Letv**
|
||||
- **Lecture2Go**
|
||||
- **Letv**: 乐视网
|
||||
- **LetvPlaylist**
|
||||
- **LetvTv**
|
||||
- **Libsyn**
|
||||
@@ -297,6 +307,7 @@
|
||||
- **MySpace**
|
||||
- **MySpace:album**
|
||||
- **MySpass**
|
||||
- **Myvi**
|
||||
- **myvideo**
|
||||
- **MyVidster**
|
||||
- **N-JOY**
|
||||
@@ -312,11 +323,18 @@
|
||||
- **NDTV**
|
||||
- **NerdCubedFeed**
|
||||
- **Nerdist**
|
||||
- **netease:album**: 网易云音乐 - 专辑
|
||||
- **netease:djradio**: 网易云音乐 - 电台
|
||||
- **netease:mv**: 网易云音乐 - MV
|
||||
- **netease:playlist**: 网易云音乐 - 歌单
|
||||
- **netease:program**: 网易云音乐 - 电台节目
|
||||
- **netease:singer**: 网易云音乐 - 歌手
|
||||
- **netease:song**: 网易云音乐
|
||||
- **Netzkino**
|
||||
- **Newgrounds**
|
||||
- **Newstube**
|
||||
- **NextMedia**
|
||||
- **NextMediaActionNews**
|
||||
- **NextMedia**: 蘋果日報
|
||||
- **NextMediaActionNews**: 蘋果日報 - 動新聞
|
||||
- **nfb**: National Film Board of Canada
|
||||
- **nfl.com**
|
||||
- **nhl.com**
|
||||
@@ -332,13 +350,14 @@
|
||||
- **Nowness**
|
||||
- **NowTV**
|
||||
- **nowvideo**: NowVideo
|
||||
- **npo.nl**
|
||||
- **npo**: npo.nl and ntr.nl
|
||||
- **npo**: npo.nl and ntr.nl
|
||||
- **npo.nl:live**
|
||||
- **npo.nl:radio**
|
||||
- **npo.nl:radio:fragment**
|
||||
- **NRK**
|
||||
- **NRKPlaylist**
|
||||
- **NRKTV**
|
||||
- **NRKTV**: NRK TV and NRK Radio
|
||||
- **ntv.ru**
|
||||
- **Nuvid**
|
||||
- **NYTimes**
|
||||
@@ -382,11 +401,11 @@
|
||||
- **prosiebensat1**: ProSiebenSat.1 Digital
|
||||
- **Puls4**
|
||||
- **Pyvideo**
|
||||
- **qqmusic**
|
||||
- **qqmusic:album**
|
||||
- **qqmusic:playlist**
|
||||
- **qqmusic:singer**
|
||||
- **qqmusic:toplist**
|
||||
- **qqmusic**: QQ音乐
|
||||
- **qqmusic:album**: QQ音乐 - 专辑
|
||||
- **qqmusic:playlist**: QQ音乐 - 歌单
|
||||
- **qqmusic:singer**: QQ音乐 - 歌手
|
||||
- **qqmusic:toplist**: QQ音乐 - 排行榜
|
||||
- **QuickVid**
|
||||
- **R7**
|
||||
- **radio.de**
|
||||
@@ -395,6 +414,7 @@
|
||||
- **RadioJavan**
|
||||
- **Rai**
|
||||
- **RBMARadio**
|
||||
- **RDS**: RDS.ca
|
||||
- **RedTube**
|
||||
- **Restudy**
|
||||
- **ReverbNation**
|
||||
@@ -470,6 +490,7 @@
|
||||
- **SportBox**
|
||||
- **SportBoxEmbed**
|
||||
- **SportDeutschland**
|
||||
- **Sportschau**
|
||||
- **Srf**
|
||||
- **SRMediathek**: Saarländischer Rundfunk
|
||||
- **SSA**
|
||||
@@ -495,7 +516,6 @@
|
||||
- **TechTalks**
|
||||
- **techtv.mit.edu**
|
||||
- **ted**
|
||||
- **tegenlicht.vpro.nl**
|
||||
- **TeleBruxelles**
|
||||
- **telecinco.es**
|
||||
- **TeleMB**
|
||||
@@ -551,7 +571,7 @@
|
||||
- **Ubu**
|
||||
- **udemy**
|
||||
- **udemy:course**
|
||||
- **UDNEmbed**
|
||||
- **UDNEmbed**: 聯合影音
|
||||
- **Ultimedia**
|
||||
- **Unistra**
|
||||
- **Urort**: NRK P3 Urørt
|
||||
@@ -596,8 +616,8 @@
|
||||
- **Vimple**: Vimple - one-click video hosting
|
||||
- **Vine**
|
||||
- **vine:user**
|
||||
- **vk.com**
|
||||
- **vk.com:user-videos**: vk.com:All of a user's videos
|
||||
- **vk**: VK
|
||||
- **vk:uservideos**: VK - User's Videos
|
||||
- **Vodlocker**
|
||||
- **VoiceRepublic**
|
||||
- **Vporn**
|
||||
@@ -613,9 +633,11 @@
|
||||
- **wdr:mobile**
|
||||
- **WDRMaus**: Sendung mit der Maus
|
||||
- **WebOfStories**
|
||||
- **WebOfStoriesPlaylist**
|
||||
- **Weibo**
|
||||
- **Wimp**
|
||||
- **Wistia**
|
||||
- **WNL**
|
||||
- **WorldStarHipHop**
|
||||
- **wrzuta.pl**
|
||||
- **WSJ**: Wall Street Journal
|
||||
@@ -628,18 +650,19 @@
|
||||
- **Xstream**
|
||||
- **XTube**
|
||||
- **XTubeUser**: XTube user profile
|
||||
- **Xuite**
|
||||
- **Xuite**: 隨意窩Xuite影音
|
||||
- **XVideos**
|
||||
- **XXXYMovies**
|
||||
- **Yahoo**: Yahoo screen and movies
|
||||
- **Yam**
|
||||
- **Yam**: 蕃薯藤yam天空部落
|
||||
- **yandexmusic:album**: Яндекс.Музыка - Альбом
|
||||
- **yandexmusic:playlist**: Яндекс.Музыка - Плейлист
|
||||
- **yandexmusic:track**: Яндекс.Музыка - Трек
|
||||
- **YesJapan**
|
||||
- **yinyuetai:video**: 音悦Tai
|
||||
- **Ynet**
|
||||
- **YouJizz**
|
||||
- **youku**
|
||||
- **youku**: 优酷
|
||||
- **YouPorn**
|
||||
- **YourUpload**
|
||||
- **youtube**: YouTube.com
|
||||
|
||||
@@ -14,6 +14,8 @@ from youtube_dl.utils import get_filesystem_encoding
|
||||
from youtube_dl.compat import (
|
||||
compat_getenv,
|
||||
compat_expanduser,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_parse_unquote_plus,
|
||||
)
|
||||
|
||||
|
||||
@@ -42,5 +44,28 @@ class TestCompat(unittest.TestCase):
|
||||
dir(youtube_dl.compat))) - set(['unicode_literals'])
|
||||
self.assertEqual(all_names, sorted(present_names))
|
||||
|
||||
def test_compat_urllib_parse_unquote(self):
|
||||
self.assertEqual(compat_urllib_parse_unquote('abc%20def'), 'abc def')
|
||||
self.assertEqual(compat_urllib_parse_unquote('%7e/abc+def'), '~/abc+def')
|
||||
self.assertEqual(compat_urllib_parse_unquote(''), '')
|
||||
self.assertEqual(compat_urllib_parse_unquote('%'), '%')
|
||||
self.assertEqual(compat_urllib_parse_unquote('%%'), '%%')
|
||||
self.assertEqual(compat_urllib_parse_unquote('%%%'), '%%%')
|
||||
self.assertEqual(compat_urllib_parse_unquote('%2F'), '/')
|
||||
self.assertEqual(compat_urllib_parse_unquote('%2f'), '/')
|
||||
self.assertEqual(compat_urllib_parse_unquote('%E6%B4%A5%E6%B3%A2'), '津波')
|
||||
self.assertEqual(
|
||||
compat_urllib_parse_unquote('''<meta property="og:description" content="%E2%96%81%E2%96%82%E2%96%83%E2%96%84%25%E2%96%85%E2%96%86%E2%96%87%E2%96%88" />
|
||||
%<a href="https://ar.wikipedia.org/wiki/%D8%AA%D8%B3%D9%88%D9%86%D8%A7%D9%85%D9%8A">%a'''),
|
||||
'''<meta property="og:description" content="▁▂▃▄%▅▆▇█" />
|
||||
%<a href="https://ar.wikipedia.org/wiki/تسونامي">%a''')
|
||||
self.assertEqual(
|
||||
compat_urllib_parse_unquote('''%28%5E%E2%97%A3_%E2%97%A2%5E%29%E3%81%A3%EF%B8%BB%E3%83%87%E2%95%90%E4%B8%80 %E2%87%80 %E2%87%80 %E2%87%80 %E2%87%80 %E2%87%80 %E2%86%B6%I%Break%25Things%'''),
|
||||
'''(^◣_◢^)っ︻デ═一 ⇀ ⇀ ⇀ ⇀ ⇀ ↶%I%Break%Things%''')
|
||||
|
||||
def test_compat_urllib_parse_unquote_plus(self):
|
||||
self.assertEqual(compat_urllib_parse_unquote_plus('abc%20def'), 'abc def')
|
||||
self.assertEqual(compat_urllib_parse_unquote_plus('%7e/abc+def'), '~/abc def')
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
||||
@@ -324,6 +324,7 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(parse_duration('02:03:04'), 7384)
|
||||
self.assertEqual(parse_duration('01:02:03:04'), 93784)
|
||||
self.assertEqual(parse_duration('1 hour 3 minutes'), 3780)
|
||||
self.assertEqual(parse_duration('87 Min.'), 5220)
|
||||
|
||||
def test_fix_xml_ampersands(self):
|
||||
self.assertEqual(
|
||||
|
||||
@@ -262,6 +262,8 @@ class YoutubeDL(object):
|
||||
The following options are used by the post processors:
|
||||
prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
|
||||
otherwise prefer avconv.
|
||||
postprocessor_args: A list of additional command-line arguments for the
|
||||
postprocessor.
|
||||
"""
|
||||
|
||||
params = None
|
||||
@@ -1102,7 +1104,8 @@ class YoutubeDL(object):
|
||||
if req_format is None:
|
||||
req_format_list = []
|
||||
if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
|
||||
info_dict['extractor'] in ['youtube', 'ted']):
|
||||
info_dict['extractor'] in ['youtube', 'ted'] and
|
||||
not info_dict.get('is_live')):
|
||||
merger = FFmpegMergerPP(self)
|
||||
if merger.available and merger.can_merge():
|
||||
req_format_list.append('bestvideo+bestaudio')
|
||||
|
||||
@@ -169,7 +169,7 @@ def _real_main(argv=None):
|
||||
if not opts.audioquality.isdigit():
|
||||
parser.error('invalid audio quality specified')
|
||||
if opts.recodevideo is not None:
|
||||
if opts.recodevideo not in ['mp4', 'flv', 'webm', 'ogg', 'mkv']:
|
||||
if opts.recodevideo not in ['mp4', 'flv', 'webm', 'ogg', 'mkv', 'avi']:
|
||||
parser.error('invalid video recode format specified')
|
||||
if opts.convertsubtitles is not None:
|
||||
if opts.convertsubtitles not in ['srt', 'vtt', 'ass']:
|
||||
@@ -263,6 +263,9 @@ def _real_main(argv=None):
|
||||
external_downloader_args = None
|
||||
if opts.external_downloader_args:
|
||||
external_downloader_args = shlex.split(opts.external_downloader_args)
|
||||
postprocessor_args = None
|
||||
if opts.postprocessor_args:
|
||||
postprocessor_args = shlex.split(opts.postprocessor_args)
|
||||
match_filter = (
|
||||
None if opts.match_filter is None
|
||||
else match_filter_func(opts.match_filter))
|
||||
@@ -367,6 +370,7 @@ def _real_main(argv=None):
|
||||
'ffmpeg_location': opts.ffmpeg_location,
|
||||
'hls_prefer_native': opts.hls_prefer_native,
|
||||
'external_downloader_args': external_downloader_args,
|
||||
'postprocessor_args': postprocessor_args,
|
||||
'cn_verification_proxy': opts.cn_verification_proxy,
|
||||
}
|
||||
|
||||
|
||||
@@ -9,6 +9,7 @@ import shutil
|
||||
import socket
|
||||
import subprocess
|
||||
import sys
|
||||
import itertools
|
||||
|
||||
|
||||
try:
|
||||
@@ -74,42 +75,74 @@ except ImportError:
|
||||
import BaseHTTPServer as compat_http_server
|
||||
|
||||
try:
|
||||
from urllib.parse import unquote_to_bytes as compat_urllib_parse_unquote_to_bytes
|
||||
from urllib.parse import unquote as compat_urllib_parse_unquote
|
||||
except ImportError:
|
||||
def compat_urllib_parse_unquote(string, encoding='utf-8', errors='replace'):
|
||||
if string == '':
|
||||
from urllib.parse import unquote_plus as compat_urllib_parse_unquote_plus
|
||||
except ImportError: # Python 2
|
||||
_asciire = (compat_urllib_parse._asciire if hasattr(compat_urllib_parse, '_asciire')
|
||||
else re.compile('([\x00-\x7f]+)'))
|
||||
|
||||
# HACK: The following are the correct unquote_to_bytes, unquote and unquote_plus
|
||||
# implementations from cpython 3.4.3's stdlib. Python 2's version
|
||||
# is apparently broken (see https://github.com/rg3/youtube-dl/pull/6244)
|
||||
|
||||
def compat_urllib_parse_unquote_to_bytes(string):
|
||||
"""unquote_to_bytes('abc%20def') -> b'abc def'."""
|
||||
# Note: strings are encoded as UTF-8. This is only an issue if it contains
|
||||
# unescaped non-ASCII characters, which URIs should not.
|
||||
if not string:
|
||||
# Is it a string-like object?
|
||||
string.split
|
||||
return b''
|
||||
if isinstance(string, unicode):
|
||||
string = string.encode('utf-8')
|
||||
bits = string.split(b'%')
|
||||
if len(bits) == 1:
|
||||
return string
|
||||
res = string.split('%')
|
||||
if len(res) == 1:
|
||||
res = [bits[0]]
|
||||
append = res.append
|
||||
for item in bits[1:]:
|
||||
try:
|
||||
append(compat_urllib_parse._hextochr[item[:2]])
|
||||
append(item[2:])
|
||||
except KeyError:
|
||||
append(b'%')
|
||||
append(item)
|
||||
return b''.join(res)
|
||||
|
||||
def compat_urllib_parse_unquote(string, encoding='utf-8', errors='replace'):
|
||||
"""Replace %xx escapes by their single-character equivalent. The optional
|
||||
encoding and errors parameters specify how to decode percent-encoded
|
||||
sequences into Unicode characters, as accepted by the bytes.decode()
|
||||
method.
|
||||
By default, percent-encoded sequences are decoded with UTF-8, and invalid
|
||||
sequences are replaced by a placeholder character.
|
||||
|
||||
unquote('abc%20def') -> 'abc def'.
|
||||
"""
|
||||
if '%' not in string:
|
||||
string.split
|
||||
return string
|
||||
if encoding is None:
|
||||
encoding = 'utf-8'
|
||||
if errors is None:
|
||||
errors = 'replace'
|
||||
# pct_sequence: contiguous sequence of percent-encoded bytes, decoded
|
||||
pct_sequence = b''
|
||||
string = res[0]
|
||||
for item in res[1:]:
|
||||
try:
|
||||
if not item:
|
||||
raise ValueError
|
||||
pct_sequence += item[:2].decode('hex')
|
||||
rest = item[2:]
|
||||
if not rest:
|
||||
# This segment was just a single percent-encoded character.
|
||||
# May be part of a sequence of code units, so delay decoding.
|
||||
# (Stored in pct_sequence).
|
||||
continue
|
||||
except ValueError:
|
||||
rest = '%' + item
|
||||
# Encountered non-percent-encoded characters. Flush the current
|
||||
# pct_sequence.
|
||||
string += pct_sequence.decode(encoding, errors) + rest
|
||||
pct_sequence = b''
|
||||
if pct_sequence:
|
||||
# Flush the final pct_sequence
|
||||
string += pct_sequence.decode(encoding, errors)
|
||||
return string
|
||||
bits = _asciire.split(string)
|
||||
res = [bits[0]]
|
||||
append = res.append
|
||||
for i in range(1, len(bits), 2):
|
||||
append(compat_urllib_parse_unquote_to_bytes(bits[i]).decode(encoding, errors))
|
||||
append(bits[i + 1])
|
||||
return ''.join(res)
|
||||
|
||||
def compat_urllib_parse_unquote_plus(string, encoding='utf-8', errors='replace'):
|
||||
"""Like unquote(), but also replace plus signs by spaces, as required for
|
||||
unquoting HTML form values.
|
||||
|
||||
unquote_plus('%7e/abc+def') -> '~/abc def'
|
||||
"""
|
||||
string = string.replace('+', ' ')
|
||||
return compat_urllib_parse_unquote(string, encoding, errors)
|
||||
|
||||
try:
|
||||
compat_str = unicode # Python 2
|
||||
@@ -388,6 +421,15 @@ else:
|
||||
pass
|
||||
return _terminal_size(columns, lines)
|
||||
|
||||
try:
|
||||
itertools.count(start=0, step=1)
|
||||
compat_itertools_count = itertools.count
|
||||
except TypeError: # Python 2.6
|
||||
def compat_itertools_count(start=0, step=1):
|
||||
n = start
|
||||
while True:
|
||||
yield n
|
||||
n += step
|
||||
|
||||
__all__ = [
|
||||
'compat_HTTPError',
|
||||
@@ -401,6 +443,7 @@ __all__ = [
|
||||
'compat_html_entities',
|
||||
'compat_http_client',
|
||||
'compat_http_server',
|
||||
'compat_itertools_count',
|
||||
'compat_kwargs',
|
||||
'compat_ord',
|
||||
'compat_parse_qs',
|
||||
@@ -411,6 +454,8 @@ __all__ = [
|
||||
'compat_urllib_error',
|
||||
'compat_urllib_parse',
|
||||
'compat_urllib_parse_unquote',
|
||||
'compat_urllib_parse_unquote_plus',
|
||||
'compat_urllib_parse_unquote_to_bytes',
|
||||
'compat_urllib_parse_urlparse',
|
||||
'compat_urllib_request',
|
||||
'compat_urlparse',
|
||||
|
||||
@@ -8,6 +8,7 @@ from .hls import NativeHlsFD
|
||||
from .http import HttpFD
|
||||
from .rtsp import RtspFD
|
||||
from .rtmp import RtmpFD
|
||||
from .dash import DashSegmentsFD
|
||||
|
||||
from ..utils import (
|
||||
determine_protocol,
|
||||
@@ -20,6 +21,7 @@ PROTOCOL_MAP = {
|
||||
'mms': RtspFD,
|
||||
'rtsp': RtspFD,
|
||||
'f4m': F4mFD,
|
||||
'http_dash_segments': DashSegmentsFD,
|
||||
}
|
||||
|
||||
|
||||
|
||||
66
youtube_dl/downloader/dash.py
Normal file
66
youtube_dl/downloader/dash.py
Normal file
@@ -0,0 +1,66 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import FileDownloader
|
||||
from ..compat import compat_urllib_request
|
||||
|
||||
|
||||
class DashSegmentsFD(FileDownloader):
|
||||
"""
|
||||
Download segments in a DASH manifest
|
||||
"""
|
||||
def real_download(self, filename, info_dict):
|
||||
self.report_destination(filename)
|
||||
tmpfilename = self.temp_name(filename)
|
||||
base_url = info_dict['url']
|
||||
segment_urls = info_dict['segment_urls']
|
||||
|
||||
is_test = self.params.get('test', False)
|
||||
remaining_bytes = self._TEST_FILE_SIZE if is_test else None
|
||||
byte_counter = 0
|
||||
|
||||
def append_url_to_file(outf, target_url, target_name, remaining_bytes=None):
|
||||
self.to_screen('[DashSegments] %s: Downloading %s' % (info_dict['id'], target_name))
|
||||
req = compat_urllib_request.Request(target_url)
|
||||
if remaining_bytes is not None:
|
||||
req.add_header('Range', 'bytes=0-%d' % (remaining_bytes - 1))
|
||||
|
||||
data = self.ydl.urlopen(req).read()
|
||||
|
||||
if remaining_bytes is not None:
|
||||
data = data[:remaining_bytes]
|
||||
|
||||
outf.write(data)
|
||||
return len(data)
|
||||
|
||||
def combine_url(base_url, target_url):
|
||||
if re.match(r'^https?://', target_url):
|
||||
return target_url
|
||||
return '%s%s%s' % (base_url, '' if base_url.endswith('/') else '/', target_url)
|
||||
|
||||
with open(tmpfilename, 'wb') as outf:
|
||||
append_url_to_file(
|
||||
outf, combine_url(base_url, info_dict['initialization_url']),
|
||||
'initialization segment')
|
||||
for i, segment_url in enumerate(segment_urls):
|
||||
segment_len = append_url_to_file(
|
||||
outf, combine_url(base_url, segment_url),
|
||||
'segment %d / %d' % (i + 1, len(segment_urls)),
|
||||
remaining_bytes)
|
||||
byte_counter += segment_len
|
||||
if remaining_bytes is not None:
|
||||
remaining_bytes -= segment_len
|
||||
if remaining_bytes <= 0:
|
||||
break
|
||||
|
||||
self.try_rename(tmpfilename, filename)
|
||||
|
||||
self._hook_progress({
|
||||
'downloaded_bytes': byte_counter,
|
||||
'total_bytes': byte_counter,
|
||||
'filename': filename,
|
||||
'status': 'finished',
|
||||
})
|
||||
|
||||
return True
|
||||
@@ -19,9 +19,14 @@ from .anysex import AnySexIE
|
||||
from .aol import AolIE
|
||||
from .allocine import AllocineIE
|
||||
from .aparat import AparatIE
|
||||
from .appleconnect import AppleConnectIE
|
||||
from .appletrailers import AppleTrailersIE
|
||||
from .archiveorg import ArchiveOrgIE
|
||||
from .ard import ARDIE, ARDMediathekIE
|
||||
from .ard import (
|
||||
ARDIE,
|
||||
ARDMediathekIE,
|
||||
SportschauIE,
|
||||
)
|
||||
from .arte import (
|
||||
ArteTvIE,
|
||||
ArteTVPlus7IE,
|
||||
@@ -38,7 +43,10 @@ from .azubu import AzubuIE
|
||||
from .baidu import BaiduVideoIE
|
||||
from .bambuser import BambuserIE, BambuserChannelIE
|
||||
from .bandcamp import BandcampIE, BandcampAlbumIE
|
||||
from .bbccouk import BBCCoUkIE
|
||||
from .bbc import (
|
||||
BBCCoUkIE,
|
||||
BBCIE,
|
||||
)
|
||||
from .beeg import BeegIE
|
||||
from .behindkink import BehindKinkIE
|
||||
from .beatportpro import BeatportProIE
|
||||
@@ -238,6 +246,7 @@ from .instagram import InstagramIE, InstagramUserIE
|
||||
from .internetvideoarchive import InternetVideoArchiveIE
|
||||
from .iprima import IPrimaIE
|
||||
from .iqiyi import IqiyiIE
|
||||
from .ir90tv import Ir90TvIE
|
||||
from .ivi import (
|
||||
IviIE,
|
||||
IviCompilationIE
|
||||
@@ -260,8 +269,17 @@ from .keek import KeekIE
|
||||
from .kontrtube import KontrTubeIE
|
||||
from .krasview import KrasViewIE
|
||||
from .ku6 import Ku6IE
|
||||
from .kuwo import (
|
||||
KuwoIE,
|
||||
KuwoAlbumIE,
|
||||
KuwoChartIE,
|
||||
KuwoSingerIE,
|
||||
KuwoCategoryIE,
|
||||
KuwoMvIE,
|
||||
)
|
||||
from .la7 import LA7IE
|
||||
from .laola1tv import Laola1TvIE
|
||||
from .lecture2go import Lecture2GoIE
|
||||
from .letv import (
|
||||
LetvIE,
|
||||
LetvTvIE,
|
||||
@@ -323,6 +341,7 @@ from .musicvault import MusicVaultIE
|
||||
from .muzu import MuzuTVIE
|
||||
from .myspace import MySpaceIE, MySpaceAlbumIE
|
||||
from .myspass import MySpassIE
|
||||
from .myvi import MyviIE
|
||||
from .myvideo import MyVideoIE
|
||||
from .myvidster import MyVidsterIE
|
||||
from .nationalgeographic import NationalGeographicIE
|
||||
@@ -342,6 +361,15 @@ from .ndtv import NDTVIE
|
||||
from .netzkino import NetzkinoIE
|
||||
from .nerdcubed import NerdCubedFeedIE
|
||||
from .nerdist import NerdistIE
|
||||
from .neteasemusic import (
|
||||
NetEaseMusicIE,
|
||||
NetEaseMusicAlbumIE,
|
||||
NetEaseMusicSingerIE,
|
||||
NetEaseMusicListIE,
|
||||
NetEaseMusicMvIE,
|
||||
NetEaseMusicProgramIE,
|
||||
NetEaseMusicDjRadioIE,
|
||||
)
|
||||
from .newgrounds import NewgroundsIE
|
||||
from .newstube import NewstubeIE
|
||||
from .nextmedia import (
|
||||
@@ -371,7 +399,8 @@ from .npo import (
|
||||
NPOLiveIE,
|
||||
NPORadioIE,
|
||||
NPORadioFragmentIE,
|
||||
TegenlichtVproIE,
|
||||
VPROIE,
|
||||
WNLIE
|
||||
)
|
||||
from .nrk import (
|
||||
NRKIE,
|
||||
@@ -442,6 +471,7 @@ from .radiobremen import RadioBremenIE
|
||||
from .radiofrance import RadioFranceIE
|
||||
from .rai import RaiIE
|
||||
from .rbmaradio import RBMARadioIE
|
||||
from .rds import RDSIE
|
||||
from .redtube import RedTubeIE
|
||||
from .restudy import RestudyIE
|
||||
from .reverbnation import ReverbNationIE
|
||||
@@ -703,7 +733,10 @@ from .wdr import (
|
||||
WDRMobileIE,
|
||||
WDRMausIE,
|
||||
)
|
||||
from .webofstories import WebOfStoriesIE
|
||||
from .webofstories import (
|
||||
WebOfStoriesIE,
|
||||
WebOfStoriesPlaylistIE,
|
||||
)
|
||||
from .weibo import WeiboIE
|
||||
from .wimp import WimpIE
|
||||
from .wistia import WistiaIE
|
||||
@@ -734,6 +767,7 @@ from .yandexmusic import (
|
||||
YandexMusicPlaylistIE,
|
||||
)
|
||||
from .yesjapan import YesJapanIE
|
||||
from .yinyuetai import YinYueTaiIE
|
||||
from .ynet import YnetIE
|
||||
from .youjizz import YouJizzIE
|
||||
from .youku import YoukuIE
|
||||
|
||||
50
youtube_dl/extractor/appleconnect.py
Normal file
50
youtube_dl/extractor/appleconnect.py
Normal file
@@ -0,0 +1,50 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
str_to_int,
|
||||
ExtractorError
|
||||
)
|
||||
|
||||
|
||||
class AppleConnectIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://itunes\.apple\.com/\w{0,2}/?post/idsa\.(?P<id>[\w-]+)'
|
||||
_TEST = {
|
||||
'url': 'https://itunes.apple.com/us/post/idsa.4ab17a39-2720-11e5-96c5-a5b38f6c42d3',
|
||||
'md5': '10d0f2799111df4cb1c924520ca78f98',
|
||||
'info_dict': {
|
||||
'id': '4ab17a39-2720-11e5-96c5-a5b38f6c42d3',
|
||||
'ext': 'm4v',
|
||||
'title': 'Energy',
|
||||
'uploader': 'Drake',
|
||||
'thumbnail': 'http://is5.mzstatic.com/image/thumb/Video5/v4/78/61/c5/7861c5fa-ad6d-294b-1464-cf7605b911d6/source/1920x1080sr.jpg',
|
||||
'upload_date': '20150710',
|
||||
'timestamp': 1436545535,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
try:
|
||||
video_json = self._html_search_regex(
|
||||
r'class="auc-video-data">(\{.*?\})', webpage, 'json')
|
||||
except ExtractorError:
|
||||
raise ExtractorError('This post doesn\'t contain a video', expected=True)
|
||||
|
||||
video_data = self._parse_json(video_json, video_id)
|
||||
timestamp = str_to_int(self._html_search_regex(r'data-timestamp="(\d+)"', webpage, 'timestamp'))
|
||||
like_count = str_to_int(self._html_search_regex(r'(\d+) Loves', webpage, 'like count'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_data['sslSrc'],
|
||||
'title': video_data['title'],
|
||||
'description': video_data['description'],
|
||||
'uploader': video_data['artistName'],
|
||||
'thumbnail': video_data['artworkUrl'],
|
||||
'timestamp': timestamp,
|
||||
'like_count': like_count,
|
||||
}
|
||||
@@ -8,6 +8,7 @@ from .generic import GenericIE
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
get_element_by_attribute,
|
||||
qualities,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
@@ -22,19 +23,125 @@ class ARDMediathekIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://(?:(?:www\.)?ardmediathek\.de|mediathek\.daserste\.de)/(?:.*/)(?P<video_id>[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.ardmediathek.de/tv/Dokumentation-und-Reportage/Ich-liebe-das-Leben-trotzdem/rbb-Fernsehen/Video?documentId=29582122&bcastId=3822114',
|
||||
'info_dict': {
|
||||
'id': '29582122',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ich liebe das Leben trotzdem',
|
||||
'description': 'md5:45e4c225c72b27993314b31a84a5261c',
|
||||
'duration': 4557,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.ardmediathek.de/tv/Tatort/Tatort-Scheinwelten-H%C3%B6rfassung-Video/Das-Erste/Video?documentId=29522730&bcastId=602916',
|
||||
'md5': 'f4d98b10759ac06c0072bbcd1f0b9e3e',
|
||||
'info_dict': {
|
||||
'id': '29522730',
|
||||
'ext': 'mp4',
|
||||
'title': 'Tatort: Scheinwelten - Hörfassung (Video tgl. ab 20 Uhr)',
|
||||
'description': 'md5:196392e79876d0ac94c94e8cdb2875f1',
|
||||
'duration': 5252,
|
||||
},
|
||||
}, {
|
||||
# audio
|
||||
'url': 'http://www.ardmediathek.de/tv/WDR-H%C3%B6rspiel-Speicher/Tod-eines-Fu%C3%9Fballers/WDR-3/Audio-Podcast?documentId=28488308&bcastId=23074086',
|
||||
'md5': '219d94d8980b4f538c7fcb0865eb7f2c',
|
||||
'info_dict': {
|
||||
'id': '28488308',
|
||||
'ext': 'mp3',
|
||||
'title': 'Tod eines Fußballers',
|
||||
'description': 'md5:f6e39f3461f0e1f54bfa48c8875c86ef',
|
||||
'duration': 3240,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://mediathek.daserste.de/sendungen_a-z/328454_anne-will/22429276_vertrauen-ist-gut-spionieren-ist-besser-geht',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.ardmediathek.de/tv/Tatort/Das-Wunder-von-Wolbeck-Video-tgl-ab-20/Das-Erste/Video?documentId=22490580&bcastId=602916',
|
||||
'info_dict': {
|
||||
'id': '22490580',
|
||||
'ext': 'mp4',
|
||||
'title': 'Das Wunder von Wolbeck (Video tgl. ab 20 Uhr)',
|
||||
'description': 'Auf einem restaurierten Hof bei Wolbeck wird der Heilpraktiker Raffael Lembeck eines morgens von seiner Frau Stella tot aufgefunden. Das Opfer war offensichtlich in seiner Praxis zu Fall gekommen und ist dann verblutet, erklärt Prof. Boerne am Tatort.',
|
||||
},
|
||||
'skip': 'Blocked outside of Germany',
|
||||
}]
|
||||
|
||||
def _extract_media_info(self, media_info_url, webpage, video_id):
|
||||
media_info = self._download_json(
|
||||
media_info_url, video_id, 'Downloading media JSON')
|
||||
|
||||
formats = self._extract_formats(media_info, video_id)
|
||||
|
||||
if not formats:
|
||||
if '"fsk"' in webpage:
|
||||
raise ExtractorError(
|
||||
'This video is only available after 20:00', expected=True)
|
||||
elif media_info.get('_geoblocked'):
|
||||
raise ExtractorError('This video is not available due to geo restriction', expected=True)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
duration = int_or_none(media_info.get('_duration'))
|
||||
thumbnail = media_info.get('_previewImage')
|
||||
|
||||
subtitles = {}
|
||||
subtitle_url = media_info.get('_subtitleUrl')
|
||||
if subtitle_url:
|
||||
subtitles['de'] = [{
|
||||
'ext': 'srt',
|
||||
'url': subtitle_url,
|
||||
}]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'duration': duration,
|
||||
'thumbnail': thumbnail,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
def _extract_formats(self, media_info, video_id):
|
||||
type_ = media_info.get('_type')
|
||||
media_array = media_info.get('_mediaArray', [])
|
||||
formats = []
|
||||
for num, media in enumerate(media_array):
|
||||
for stream in media.get('_mediaStreamArray', []):
|
||||
stream_urls = stream.get('_stream')
|
||||
if not stream_urls:
|
||||
continue
|
||||
if not isinstance(stream_urls, list):
|
||||
stream_urls = [stream_urls]
|
||||
quality = stream.get('_quality')
|
||||
server = stream.get('_server')
|
||||
for stream_url in stream_urls:
|
||||
ext = determine_ext(stream_url)
|
||||
if ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
stream_url + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124',
|
||||
video_id, preference=-1, f4m_id='hds'))
|
||||
elif ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
stream_url, video_id, 'mp4', preference=1, m3u8_id='hls'))
|
||||
else:
|
||||
if server and server.startswith('rtmp'):
|
||||
f = {
|
||||
'url': server,
|
||||
'play_path': stream_url,
|
||||
'format_id': 'a%s-rtmp-%s' % (num, quality),
|
||||
}
|
||||
elif stream_url.startswith('http'):
|
||||
f = {
|
||||
'url': stream_url,
|
||||
'format_id': 'a%s-%s-%s' % (num, ext, quality)
|
||||
}
|
||||
else:
|
||||
continue
|
||||
m = re.search(r'_(?P<width>\d+)x(?P<height>\d+)\.mp4$', stream_url)
|
||||
if m:
|
||||
f.update({
|
||||
'width': int(m.group('width')),
|
||||
'height': int(m.group('height')),
|
||||
})
|
||||
if type_ == 'audio':
|
||||
f['vcodec'] = 'none'
|
||||
formats.append(f)
|
||||
return formats
|
||||
|
||||
def _real_extract(self, url):
|
||||
# determine video id from url
|
||||
m = re.match(self._VALID_URL, url)
|
||||
@@ -92,46 +199,22 @@ class ARDMediathekIE(InfoExtractor):
|
||||
'format_id': fid,
|
||||
'url': furl,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
info = {
|
||||
'formats': formats,
|
||||
}
|
||||
else: # request JSON file
|
||||
media_info = self._download_json(
|
||||
'http://www.ardmediathek.de/play/media/%s' % video_id, video_id)
|
||||
# The second element of the _mediaArray contains the standard http urls
|
||||
streams = media_info['_mediaArray'][1]['_mediaStreamArray']
|
||||
if not streams:
|
||||
if '"fsk"' in webpage:
|
||||
raise ExtractorError('This video is only available after 20:00')
|
||||
info = self._extract_media_info(
|
||||
'http://www.ardmediathek.de/play/media/%s' % video_id, webpage, video_id)
|
||||
|
||||
formats = []
|
||||
for s in streams:
|
||||
if type(s['_stream']) == list:
|
||||
for index, url in enumerate(s['_stream'][::-1]):
|
||||
quality = s['_quality'] + index
|
||||
formats.append({
|
||||
'quality': quality,
|
||||
'url': url,
|
||||
'format_id': '%s-%s' % (determine_ext(url), quality)
|
||||
})
|
||||
continue
|
||||
|
||||
format = {
|
||||
'quality': s['_quality'],
|
||||
'url': s['_stream'],
|
||||
}
|
||||
|
||||
format['format_id'] = '%s-%s' % (
|
||||
determine_ext(format['url']), format['quality'])
|
||||
|
||||
formats.append(format)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
info.update({
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'formats': formats,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
})
|
||||
|
||||
return info
|
||||
|
||||
|
||||
class ARDIE(InfoExtractor):
|
||||
@@ -189,3 +272,41 @@ class ARDIE(InfoExtractor):
|
||||
'upload_date': upload_date,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
|
||||
|
||||
class SportschauIE(ARDMediathekIE):
|
||||
IE_NAME = 'Sportschau'
|
||||
_VALID_URL = r'(?P<baseurl>https?://(?:www\.)?sportschau\.de/(?:[^/]+/)+video(?P<id>[^/#?]+))\.html'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.sportschau.de/tourdefrance/videoseppeltkokainhatnichtsmitklassischemdopingzutun100.html',
|
||||
'info_dict': {
|
||||
'id': 'seppeltkokainhatnichtsmitklassischemdopingzutun100',
|
||||
'ext': 'mp4',
|
||||
'title': 'Seppelt: "Kokain hat nichts mit klassischem Doping zu tun"',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'description': 'Der ARD-Doping Experte Hajo Seppelt gibt seine Einschätzung zum ersten Dopingfall der diesjährigen Tour de France um den Italiener Luca Paolini ab.',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
base_url = mobj.group('baseurl')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
title = get_element_by_attribute('class', 'headline', webpage)
|
||||
description = self._html_search_meta('description', webpage, 'description')
|
||||
|
||||
info = self._extract_media_info(
|
||||
base_url + '-mc_defaultQuality-h.json', webpage, video_id)
|
||||
|
||||
info.update({
|
||||
'title': title,
|
||||
'description': description,
|
||||
})
|
||||
|
||||
return info
|
||||
|
||||
@@ -8,6 +8,7 @@ from ..compat import compat_urlparse
|
||||
|
||||
|
||||
class BaiduVideoIE(InfoExtractor):
|
||||
IE_DESC = '百度视频'
|
||||
_VALID_URL = r'http://v\.baidu\.com/(?P<type>[a-z]+)/(?P<id>\d+)\.htm'
|
||||
_TESTS = [{
|
||||
'url': 'http://v.baidu.com/comic/1069.htm?frp=bdbrand&q=%E4%B8%AD%E5%8D%8E%E5%B0%8F%E5%BD%93%E5%AE%B6',
|
||||
|
||||
722
youtube_dl/extractor/bbc.py
Normal file
722
youtube_dl/extractor/bbc.py
Normal file
@@ -0,0 +1,722 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
)
|
||||
from ..compat import compat_HTTPError
|
||||
|
||||
|
||||
class BBCCoUkIE(InfoExtractor):
|
||||
IE_NAME = 'bbc.co.uk'
|
||||
IE_DESC = 'BBC iPlayer'
|
||||
_VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:(?:(?:programmes|iplayer(?:/[^/]+)?/(?:episode|playlist))/)|music/clips[/#])(?P<id>[\da-z]{8})'
|
||||
|
||||
_MEDIASELECTOR_URL = 'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.bbc.co.uk/programmes/b039g8p7',
|
||||
'info_dict': {
|
||||
'id': 'b039d07m',
|
||||
'ext': 'flv',
|
||||
'title': 'Kaleidoscope, Leonard Cohen',
|
||||
'description': 'The Canadian poet and songwriter reflects on his musical career.',
|
||||
'duration': 1740,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://www.bbc.co.uk/iplayer/episode/b00yng5w/The_Man_in_Black_Series_3_The_Printed_Name/',
|
||||
'info_dict': {
|
||||
'id': 'b00yng1d',
|
||||
'ext': 'flv',
|
||||
'title': 'The Man in Black: Series 3: The Printed Name',
|
||||
'description': "Mark Gatiss introduces Nicholas Pierpan's chilling tale of a writer's devilish pact with a mysterious man. Stars Ewan Bailey.",
|
||||
'duration': 1800,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Episode is no longer available on BBC iPlayer Radio',
|
||||
},
|
||||
{
|
||||
'url': 'http://www.bbc.co.uk/iplayer/episode/b03vhd1f/The_Voice_UK_Series_3_Blind_Auditions_5/',
|
||||
'info_dict': {
|
||||
'id': 'b00yng1d',
|
||||
'ext': 'flv',
|
||||
'title': 'The Voice UK: Series 3: Blind Auditions 5',
|
||||
'description': "Emma Willis and Marvin Humes present the fifth set of blind auditions in the singing competition, as the coaches continue to build their teams based on voice alone.",
|
||||
'duration': 5100,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
|
||||
},
|
||||
{
|
||||
'url': 'http://www.bbc.co.uk/iplayer/episode/p026c7jt/tomorrows-worlds-the-unearthly-history-of-science-fiction-2-invasion',
|
||||
'info_dict': {
|
||||
'id': 'b03k3pb7',
|
||||
'ext': 'flv',
|
||||
'title': "Tomorrow's Worlds: The Unearthly History of Science Fiction",
|
||||
'description': '2. Invasion',
|
||||
'duration': 3600,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/programmes/b04v20dw',
|
||||
'info_dict': {
|
||||
'id': 'b04v209v',
|
||||
'ext': 'flv',
|
||||
'title': 'Pete Tong, The Essential New Tune Special',
|
||||
'description': "Pete has a very special mix - all of 2014's Essential New Tunes!",
|
||||
'duration': 10800,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/music/clips/p02frcc3',
|
||||
'note': 'Audio',
|
||||
'info_dict': {
|
||||
'id': 'p02frcch',
|
||||
'ext': 'flv',
|
||||
'title': 'Pete Tong, Past, Present and Future Special, Madeon - After Hours mix',
|
||||
'description': 'French house superstar Madeon takes us out of the club and onto the after party.',
|
||||
'duration': 3507,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/music/clips/p025c0zz',
|
||||
'note': 'Video',
|
||||
'info_dict': {
|
||||
'id': 'p025c103',
|
||||
'ext': 'flv',
|
||||
'title': 'Reading and Leeds Festival, 2014, Rae Morris - Closer (Live on BBC Three)',
|
||||
'description': 'Rae Morris performs Closer for BBC Three at Reading 2014',
|
||||
'duration': 226,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/iplayer/episode/b054fn09/ad/natural-world-20152016-2-super-powered-owls',
|
||||
'info_dict': {
|
||||
'id': 'p02n76xf',
|
||||
'ext': 'flv',
|
||||
'title': 'Natural World, 2015-2016: 2. Super Powered Owls',
|
||||
'description': 'md5:e4db5c937d0e95a7c6b5e654d429183d',
|
||||
'duration': 3540,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'geolocation',
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/iplayer/episode/b05zmgwn/royal-academy-summer-exhibition',
|
||||
'info_dict': {
|
||||
'id': 'b05zmgw1',
|
||||
'ext': 'flv',
|
||||
'description': 'Kirsty Wark and Morgan Quaintance visit the Royal Academy as it prepares for its annual artistic extravaganza, meeting people who have come together to make the show unique.',
|
||||
'title': 'Royal Academy Summer Exhibition',
|
||||
'duration': 3540,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'geolocation',
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/iplayer/playlist/p01dvks4',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/music/clips#p02frcc3',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/iplayer/cbeebies/episode/b0480276/bing-14-atchoo',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
|
||||
def _extract_asx_playlist(self, connection, programme_id):
|
||||
asx = self._download_xml(connection.get('href'), programme_id, 'Downloading ASX playlist')
|
||||
return [ref.get('href') for ref in asx.findall('./Entry/ref')]
|
||||
|
||||
def _extract_connection(self, connection, programme_id):
|
||||
formats = []
|
||||
protocol = connection.get('protocol')
|
||||
supplier = connection.get('supplier')
|
||||
if protocol == 'http':
|
||||
href = connection.get('href')
|
||||
transfer_format = connection.get('transferFormat')
|
||||
# ASX playlist
|
||||
if supplier == 'asx':
|
||||
for i, ref in enumerate(self._extract_asx_playlist(connection, programme_id)):
|
||||
formats.append({
|
||||
'url': ref,
|
||||
'format_id': 'ref%s_%s' % (i, supplier),
|
||||
})
|
||||
# Skip DASH until supported
|
||||
elif transfer_format == 'dash':
|
||||
pass
|
||||
# Direct link
|
||||
else:
|
||||
formats.append({
|
||||
'url': href,
|
||||
'format_id': supplier,
|
||||
})
|
||||
elif protocol == 'rtmp':
|
||||
application = connection.get('application', 'ondemand')
|
||||
auth_string = connection.get('authString')
|
||||
identifier = connection.get('identifier')
|
||||
server = connection.get('server')
|
||||
formats.append({
|
||||
'url': '%s://%s/%s?%s' % (protocol, server, application, auth_string),
|
||||
'play_path': identifier,
|
||||
'app': '%s?%s' % (application, auth_string),
|
||||
'page_url': 'http://www.bbc.co.uk',
|
||||
'player_url': 'http://www.bbc.co.uk/emp/releases/iplayer/revisions/617463_618125_4/617463_618125_4_emp.swf',
|
||||
'rtmp_live': False,
|
||||
'ext': 'flv',
|
||||
'format_id': supplier,
|
||||
})
|
||||
return formats
|
||||
|
||||
def _extract_items(self, playlist):
|
||||
return playlist.findall('./{http://bbc.co.uk/2008/emp/playlist}item')
|
||||
|
||||
def _extract_medias(self, media_selection):
|
||||
error = media_selection.find('./{http://bbc.co.uk/2008/mp/mediaselection}error')
|
||||
if error is not None:
|
||||
raise ExtractorError(
|
||||
'%s returned error: %s' % (self.IE_NAME, error.get('id')), expected=True)
|
||||
return media_selection.findall('./{http://bbc.co.uk/2008/mp/mediaselection}media')
|
||||
|
||||
def _extract_connections(self, media):
|
||||
return media.findall('./{http://bbc.co.uk/2008/mp/mediaselection}connection')
|
||||
|
||||
def _extract_video(self, media, programme_id):
|
||||
formats = []
|
||||
vbr = int_or_none(media.get('bitrate'))
|
||||
vcodec = media.get('encoding')
|
||||
service = media.get('service')
|
||||
width = int_or_none(media.get('width'))
|
||||
height = int_or_none(media.get('height'))
|
||||
file_size = int_or_none(media.get('media_file_size'))
|
||||
for connection in self._extract_connections(media):
|
||||
conn_formats = self._extract_connection(connection, programme_id)
|
||||
for format in conn_formats:
|
||||
format.update({
|
||||
'format_id': '%s_%s' % (service, format['format_id']),
|
||||
'width': width,
|
||||
'height': height,
|
||||
'vbr': vbr,
|
||||
'vcodec': vcodec,
|
||||
'filesize': file_size,
|
||||
})
|
||||
formats.extend(conn_formats)
|
||||
return formats
|
||||
|
||||
def _extract_audio(self, media, programme_id):
|
||||
formats = []
|
||||
abr = int_or_none(media.get('bitrate'))
|
||||
acodec = media.get('encoding')
|
||||
service = media.get('service')
|
||||
for connection in self._extract_connections(media):
|
||||
conn_formats = self._extract_connection(connection, programme_id)
|
||||
for format in conn_formats:
|
||||
format.update({
|
||||
'format_id': '%s_%s' % (service, format['format_id']),
|
||||
'abr': abr,
|
||||
'acodec': acodec,
|
||||
})
|
||||
formats.extend(conn_formats)
|
||||
return formats
|
||||
|
||||
def _get_subtitles(self, media, programme_id):
|
||||
subtitles = {}
|
||||
for connection in self._extract_connections(media):
|
||||
captions = self._download_xml(connection.get('href'), programme_id, 'Downloading captions')
|
||||
lang = captions.get('{http://www.w3.org/XML/1998/namespace}lang', 'en')
|
||||
subtitles[lang] = [
|
||||
{
|
||||
'url': connection.get('href'),
|
||||
'ext': 'ttml',
|
||||
},
|
||||
]
|
||||
return subtitles
|
||||
|
||||
def _download_media_selector(self, programme_id):
|
||||
return self._download_media_selector_url(
|
||||
self._MEDIASELECTOR_URL % programme_id, programme_id)
|
||||
|
||||
def _download_media_selector_url(self, url, programme_id=None):
|
||||
try:
|
||||
media_selection = self._download_xml(
|
||||
url, programme_id, 'Downloading media selection XML')
|
||||
except ExtractorError as ee:
|
||||
if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403:
|
||||
media_selection = xml.etree.ElementTree.fromstring(ee.cause.read().decode('utf-8'))
|
||||
else:
|
||||
raise
|
||||
return self._process_media_selector(media_selection, programme_id)
|
||||
|
||||
def _process_media_selector(self, media_selection, programme_id):
|
||||
formats = []
|
||||
subtitles = None
|
||||
|
||||
for media in self._extract_medias(media_selection):
|
||||
kind = media.get('kind')
|
||||
if kind == 'audio':
|
||||
formats.extend(self._extract_audio(media, programme_id))
|
||||
elif kind == 'video':
|
||||
formats.extend(self._extract_video(media, programme_id))
|
||||
elif kind == 'captions':
|
||||
subtitles = self.extract_subtitles(media, programme_id)
|
||||
|
||||
return formats, subtitles
|
||||
|
||||
def _download_playlist(self, playlist_id):
|
||||
try:
|
||||
playlist = self._download_json(
|
||||
'http://www.bbc.co.uk/programmes/%s/playlist.json' % playlist_id,
|
||||
playlist_id, 'Downloading playlist JSON')
|
||||
|
||||
version = playlist.get('defaultAvailableVersion')
|
||||
if version:
|
||||
smp_config = version['smpConfig']
|
||||
title = smp_config['title']
|
||||
description = smp_config['summary']
|
||||
for item in smp_config['items']:
|
||||
kind = item['kind']
|
||||
if kind != 'programme' and kind != 'radioProgramme':
|
||||
continue
|
||||
programme_id = item.get('vpid')
|
||||
duration = int_or_none(item.get('duration'))
|
||||
formats, subtitles = self._download_media_selector(programme_id)
|
||||
return programme_id, title, description, duration, formats, subtitles
|
||||
except ExtractorError as ee:
|
||||
if not (isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404):
|
||||
raise
|
||||
|
||||
# fallback to legacy playlist
|
||||
return self._process_legacy_playlist(playlist_id)
|
||||
|
||||
def _process_legacy_playlist_url(self, url, display_id):
|
||||
playlist = self._download_legacy_playlist_url(url, display_id)
|
||||
return self._extract_from_legacy_playlist(playlist, display_id)
|
||||
|
||||
def _process_legacy_playlist(self, playlist_id):
|
||||
return self._process_legacy_playlist_url(
|
||||
'http://www.bbc.co.uk/iplayer/playlist/%s' % playlist_id, playlist_id)
|
||||
|
||||
def _download_legacy_playlist_url(self, url, playlist_id=None):
|
||||
return self._download_xml(
|
||||
url, playlist_id, 'Downloading legacy playlist XML')
|
||||
|
||||
def _extract_from_legacy_playlist(self, playlist, playlist_id):
|
||||
no_items = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}noItems')
|
||||
if no_items is not None:
|
||||
reason = no_items.get('reason')
|
||||
if reason == 'preAvailability':
|
||||
msg = 'Episode %s is not yet available' % playlist_id
|
||||
elif reason == 'postAvailability':
|
||||
msg = 'Episode %s is no longer available' % playlist_id
|
||||
elif reason == 'noMedia':
|
||||
msg = 'Episode %s is not currently available' % playlist_id
|
||||
else:
|
||||
msg = 'Episode %s is not available: %s' % (playlist_id, reason)
|
||||
raise ExtractorError(msg, expected=True)
|
||||
|
||||
for item in self._extract_items(playlist):
|
||||
kind = item.get('kind')
|
||||
if kind != 'programme' and kind != 'radioProgramme':
|
||||
continue
|
||||
title = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}title').text
|
||||
description = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}summary').text
|
||||
|
||||
def get_programme_id(item):
|
||||
def get_from_attributes(item):
|
||||
for p in('identifier', 'group'):
|
||||
value = item.get(p)
|
||||
if value and re.match(r'^[pb][\da-z]{7}$', value):
|
||||
return value
|
||||
get_from_attributes(item)
|
||||
mediator = item.find('./{http://bbc.co.uk/2008/emp/playlist}mediator')
|
||||
if mediator is not None:
|
||||
return get_from_attributes(mediator)
|
||||
|
||||
programme_id = get_programme_id(item)
|
||||
duration = int_or_none(item.get('duration'))
|
||||
# TODO: programme_id can be None and media items can be incorporated right inside
|
||||
# playlist's item (e.g. http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu)
|
||||
# as f4m and m3u8
|
||||
formats, subtitles = self._download_media_selector(programme_id)
|
||||
|
||||
return programme_id, title, description, duration, formats, subtitles
|
||||
|
||||
def _real_extract(self, url):
|
||||
group_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, group_id, 'Downloading video page')
|
||||
|
||||
programme_id = None
|
||||
|
||||
tviplayer = self._search_regex(
|
||||
r'mediator\.bind\(({.+?})\s*,\s*document\.getElementById',
|
||||
webpage, 'player', default=None)
|
||||
|
||||
if tviplayer:
|
||||
player = self._parse_json(tviplayer, group_id).get('player', {})
|
||||
duration = int_or_none(player.get('duration'))
|
||||
programme_id = player.get('vpid')
|
||||
|
||||
if not programme_id:
|
||||
programme_id = self._search_regex(
|
||||
r'"vpid"\s*:\s*"([\da-z]{8})"', webpage, 'vpid', fatal=False, default=None)
|
||||
|
||||
if programme_id:
|
||||
formats, subtitles = self._download_media_selector(programme_id)
|
||||
title = self._og_search_title(webpage)
|
||||
description = self._search_regex(
|
||||
r'<p class="[^"]*medium-description[^"]*">([^<]+)</p>',
|
||||
webpage, 'description', fatal=False)
|
||||
else:
|
||||
programme_id, title, description, duration, formats, subtitles = self._download_playlist(group_id)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': programme_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': self._og_search_thumbnail(webpage, default=None),
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
|
||||
class BBCIE(BBCCoUkIE):
|
||||
IE_NAME = 'bbc'
|
||||
IE_DESC = 'BBC'
|
||||
_VALID_URL = r'https?://(?:www\.)?bbc\.(?:com|co\.uk)/(?:[^/]+/)+(?P<id>[^/#?]+)'
|
||||
|
||||
# fails with notukerror for some videos
|
||||
# _MEDIASELECTOR_URL = 'http://open.live.bbc.co.uk/mediaselector/4/mtis/stream/%s'
|
||||
_MEDIASELECTOR_URL = 'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/journalism-pc/vpid/%s'
|
||||
|
||||
_TESTS = [{
|
||||
# article with multiple videos embedded with data-media-meta containing
|
||||
# playlist.sxml, externalId and no direct video links
|
||||
'url': 'http://www.bbc.com/news/world-europe-32668511',
|
||||
'info_dict': {
|
||||
'id': 'world-europe-32668511',
|
||||
'title': 'Russia stages massive WW2 parade despite Western boycott',
|
||||
'description': 'md5:00ff61976f6081841f759a08bf78cc9c',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
}, {
|
||||
# article with multiple videos embedded with data-media-meta (more videos)
|
||||
'url': 'http://www.bbc.com/news/business-28299555',
|
||||
'info_dict': {
|
||||
'id': 'business-28299555',
|
||||
'title': 'Farnborough Airshow: Video highlights',
|
||||
'description': 'BBC reports and video highlights at the Farnborough Airshow.',
|
||||
},
|
||||
'playlist_count': 9,
|
||||
'skip': 'Save time',
|
||||
}, {
|
||||
# article with multiple videos embedded with `new SMP()`
|
||||
'url': 'http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460',
|
||||
'info_dict': {
|
||||
'id': '3662a707-0af9-3149-963f-47bea720b460',
|
||||
'title': 'BBC Blogs - Adam Curtis - BUGGER',
|
||||
},
|
||||
'playlist_count': 18,
|
||||
}, {
|
||||
# single video embedded with mediaAssetPage.init()
|
||||
'url': 'http://www.bbc.com/news/world-europe-32041533',
|
||||
'info_dict': {
|
||||
'id': 'p02mprgb',
|
||||
'ext': 'flv',
|
||||
'title': 'Aerial footage showed the site of the crash in the Alps - courtesy BFM TV',
|
||||
'duration': 47,
|
||||
'timestamp': 1427219242,
|
||||
'upload_date': '20150324',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
# article with single video embedded with data-media-meta containing
|
||||
# direct video links (for now these are extracted) and playlist.xml (with
|
||||
# media items as f4m and m3u8 - currently unsupported)
|
||||
'url': 'http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu',
|
||||
'info_dict': {
|
||||
'id': '150615_telabyad_kentin_cogu',
|
||||
'ext': 'mp4',
|
||||
'title': "YPG: Tel Abyad'ın tamamı kontrolümüzde",
|
||||
'duration': 47,
|
||||
'timestamp': 1434397334,
|
||||
'upload_date': '20150615',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
# single video embedded with mediaAssetPage.init() (regional section)
|
||||
'url': 'http://www.bbc.com/mundo/video_fotos/2015/06/150619_video_honduras_militares_hospitales_corrupcion_aw',
|
||||
'info_dict': {
|
||||
'id': '150619_video_honduras_militares_hospitales_corrupcion_aw',
|
||||
'ext': 'mp4',
|
||||
'title': 'Honduras militariza sus hospitales por nuevo escándalo de corrupción',
|
||||
'duration': 87,
|
||||
'timestamp': 1434713142,
|
||||
'upload_date': '20150619',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
# single video story with digitalData
|
||||
'url': 'http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret',
|
||||
'info_dict': {
|
||||
'id': 'p02q6gc4',
|
||||
'ext': 'flv',
|
||||
'title': 'Sri Lanka’s spicy secret',
|
||||
'description': 'As a new train line to Jaffna opens up the country’s north, travellers can experience a truly distinct slice of Tamil culture.',
|
||||
'timestamp': 1437674293,
|
||||
'upload_date': '20150723',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
# single video story without digitalData
|
||||
'url': 'http://www.bbc.com/autos/story/20130513-hyundais-rock-star',
|
||||
'info_dict': {
|
||||
'id': 'p018zqqg',
|
||||
'ext': 'flv',
|
||||
'title': 'Hyundai Santa Fe Sport: Rock star',
|
||||
'description': 'md5:b042a26142c4154a6e472933cf20793d',
|
||||
'timestamp': 1368473503,
|
||||
'upload_date': '20130513',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
# single video with playlist.sxml URL
|
||||
'url': 'http://www.bbc.com/sport/0/football/33653409',
|
||||
'info_dict': {
|
||||
'id': 'p02xycnp',
|
||||
'ext': 'flv',
|
||||
'title': 'Transfers: Cristiano Ronaldo to Man Utd, Arsenal to spend?',
|
||||
'description': 'md5:398fca0e2e701c609d726e034fa1fc89',
|
||||
'duration': 140,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
# single video with playlist URL from weather section
|
||||
'url': 'http://www.bbc.com/weather/features/33601775',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# custom redirection to www.bbc.com
|
||||
'url': 'http://www.bbc.co.uk/news/science-environment-33661876',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if BBCCoUkIE.suitable(url) else super(BBCIE, cls).suitable(url)
|
||||
|
||||
def _extract_from_media_meta(self, media_meta, video_id):
|
||||
# Direct links to media in media metadata (e.g.
|
||||
# http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu)
|
||||
# TODO: there are also f4m and m3u8 streams incorporated in playlist.sxml
|
||||
source_files = media_meta.get('sourceFiles')
|
||||
if source_files:
|
||||
return [{
|
||||
'url': f['url'],
|
||||
'format_id': format_id,
|
||||
'ext': f.get('encoding'),
|
||||
'tbr': float_or_none(f.get('bitrate'), 1000),
|
||||
'filesize': int_or_none(f.get('filesize')),
|
||||
} for format_id, f in source_files.items() if f.get('url')], []
|
||||
|
||||
programme_id = media_meta.get('externalId')
|
||||
if programme_id:
|
||||
return self._download_media_selector(programme_id)
|
||||
|
||||
# Process playlist.sxml as legacy playlist
|
||||
href = media_meta.get('href')
|
||||
if href:
|
||||
playlist = self._download_legacy_playlist_url(href)
|
||||
_, _, _, _, formats, subtitles = self._extract_from_legacy_playlist(playlist, video_id)
|
||||
return formats, subtitles
|
||||
|
||||
return [], []
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
timestamp = parse_iso8601(self._search_regex(
|
||||
[r'"datePublished":\s*"([^"]+)',
|
||||
r'<meta[^>]+property="article:published_time"[^>]+content="([^"]+)"',
|
||||
r'itemprop="datePublished"[^>]+datetime="([^"]+)"'],
|
||||
webpage, 'date', default=None))
|
||||
|
||||
# single video with playlist.sxml URL (e.g. http://www.bbc.com/sport/0/football/3365340ng)
|
||||
playlist = self._search_regex(
|
||||
r'<param[^>]+name="playlist"[^>]+value="([^"]+)"',
|
||||
webpage, 'playlist', default=None)
|
||||
if playlist:
|
||||
programme_id, title, description, duration, formats, subtitles = \
|
||||
self._process_legacy_playlist_url(playlist, playlist_id)
|
||||
self._sort_formats(formats)
|
||||
return {
|
||||
'id': programme_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'timestamp': timestamp,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
# single video story (e.g. http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret)
|
||||
programme_id = self._search_regex(
|
||||
[r'data-video-player-vpid="([\da-z]{8})"',
|
||||
r'<param[^>]+name="externalIdentifier"[^>]+value="([\da-z]{8})"'],
|
||||
webpage, 'vpid', default=None)
|
||||
if programme_id:
|
||||
formats, subtitles = self._download_media_selector(programme_id)
|
||||
self._sort_formats(formats)
|
||||
# digitalData may be missing (e.g. http://www.bbc.com/autos/story/20130513-hyundais-rock-star)
|
||||
digital_data = self._parse_json(
|
||||
self._search_regex(
|
||||
r'var\s+digitalData\s*=\s*({.+?});?\n', webpage, 'digital data', default='{}'),
|
||||
programme_id, fatal=False)
|
||||
page_info = digital_data.get('page', {}).get('pageInfo', {})
|
||||
title = page_info.get('pageName') or self._og_search_title(webpage)
|
||||
description = page_info.get('description') or self._og_search_description(webpage)
|
||||
timestamp = parse_iso8601(page_info.get('publicationDate')) or timestamp
|
||||
return {
|
||||
'id': programme_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'timestamp': timestamp,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
playlist_title = self._html_search_regex(
|
||||
r'<title>(.*?)(?:\s*-\s*BBC [^ ]+)?</title>', webpage, 'playlist title')
|
||||
playlist_description = self._og_search_description(webpage, default=None)
|
||||
|
||||
def extract_all(pattern):
|
||||
return list(filter(None, map(
|
||||
lambda s: self._parse_json(s, playlist_id, fatal=False),
|
||||
re.findall(pattern, webpage))))
|
||||
|
||||
# Multiple video article (e.g.
|
||||
# http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460)
|
||||
EMBED_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:[^/]+/)+[\da-z]{8}(?:\b[^"]+)?'
|
||||
entries = []
|
||||
for match in extract_all(r'new\s+SMP\(({.+?})\)'):
|
||||
embed_url = match.get('playerSettings', {}).get('externalEmbedUrl')
|
||||
if embed_url and re.match(EMBED_URL, embed_url):
|
||||
entries.append(embed_url)
|
||||
entries.extend(re.findall(
|
||||
r'setPlaylist\("(%s)"\)' % EMBED_URL, webpage))
|
||||
if entries:
|
||||
return self.playlist_result(
|
||||
[self.url_result(entry, 'BBCCoUk') for entry in entries],
|
||||
playlist_id, playlist_title, playlist_description)
|
||||
|
||||
# Multiple video article (e.g. http://www.bbc.com/news/world-europe-32668511)
|
||||
medias = extract_all(r"data-media-meta='({[^']+})'")
|
||||
|
||||
if not medias:
|
||||
# Single video article (e.g. http://www.bbc.com/news/video_and_audio/international)
|
||||
media_asset_page = self._parse_json(
|
||||
self._search_regex(
|
||||
r'mediaAssetPage\.init\(\s*({.+?}), "/', webpage, 'media asset'),
|
||||
playlist_id)
|
||||
medias = []
|
||||
for video in media_asset_page.get('videos', {}).values():
|
||||
medias.extend(video.values())
|
||||
|
||||
entries = []
|
||||
for num, media_meta in enumerate(medias, start=1):
|
||||
formats, subtitles = self._extract_from_media_meta(media_meta, playlist_id)
|
||||
if not formats:
|
||||
continue
|
||||
self._sort_formats(formats)
|
||||
|
||||
video_id = media_meta.get('externalId')
|
||||
if not video_id:
|
||||
video_id = playlist_id if len(medias) == 1 else '%s-%s' % (playlist_id, num)
|
||||
|
||||
title = media_meta.get('caption')
|
||||
if not title:
|
||||
title = playlist_title if len(medias) == 1 else '%s - Video %s' % (playlist_title, num)
|
||||
|
||||
duration = int_or_none(media_meta.get('durationInSeconds')) or parse_duration(media_meta.get('duration'))
|
||||
|
||||
images = []
|
||||
for image in media_meta.get('images', {}).values():
|
||||
images.extend(image.values())
|
||||
if 'image' in media_meta:
|
||||
images.append(media_meta['image'])
|
||||
|
||||
thumbnails = [{
|
||||
'url': image.get('href'),
|
||||
'width': int_or_none(image.get('width')),
|
||||
'height': int_or_none(image.get('height')),
|
||||
} for image in images]
|
||||
|
||||
entries.append({
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnails': thumbnails,
|
||||
'duration': duration,
|
||||
'timestamp': timestamp,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
})
|
||||
|
||||
return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
|
||||
@@ -1,379 +0,0 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
)
|
||||
from ..compat import compat_HTTPError
|
||||
|
||||
|
||||
class BBCCoUkIE(InfoExtractor):
|
||||
IE_NAME = 'bbc.co.uk'
|
||||
IE_DESC = 'BBC iPlayer'
|
||||
_VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:(?:(?:programmes|iplayer(?:/[^/]+)?/(?:episode|playlist))/)|music/clips[/#])(?P<id>[\da-z]{8})'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.bbc.co.uk/programmes/b039g8p7',
|
||||
'info_dict': {
|
||||
'id': 'b039d07m',
|
||||
'ext': 'flv',
|
||||
'title': 'Kaleidoscope, Leonard Cohen',
|
||||
'description': 'The Canadian poet and songwriter reflects on his musical career.',
|
||||
'duration': 1740,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://www.bbc.co.uk/iplayer/episode/b00yng5w/The_Man_in_Black_Series_3_The_Printed_Name/',
|
||||
'info_dict': {
|
||||
'id': 'b00yng1d',
|
||||
'ext': 'flv',
|
||||
'title': 'The Man in Black: Series 3: The Printed Name',
|
||||
'description': "Mark Gatiss introduces Nicholas Pierpan's chilling tale of a writer's devilish pact with a mysterious man. Stars Ewan Bailey.",
|
||||
'duration': 1800,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Episode is no longer available on BBC iPlayer Radio',
|
||||
},
|
||||
{
|
||||
'url': 'http://www.bbc.co.uk/iplayer/episode/b03vhd1f/The_Voice_UK_Series_3_Blind_Auditions_5/',
|
||||
'info_dict': {
|
||||
'id': 'b00yng1d',
|
||||
'ext': 'flv',
|
||||
'title': 'The Voice UK: Series 3: Blind Auditions 5',
|
||||
'description': "Emma Willis and Marvin Humes present the fifth set of blind auditions in the singing competition, as the coaches continue to build their teams based on voice alone.",
|
||||
'duration': 5100,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
|
||||
},
|
||||
{
|
||||
'url': 'http://www.bbc.co.uk/iplayer/episode/p026c7jt/tomorrows-worlds-the-unearthly-history-of-science-fiction-2-invasion',
|
||||
'info_dict': {
|
||||
'id': 'b03k3pb7',
|
||||
'ext': 'flv',
|
||||
'title': "Tomorrow's Worlds: The Unearthly History of Science Fiction",
|
||||
'description': '2. Invasion',
|
||||
'duration': 3600,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/programmes/b04v20dw',
|
||||
'info_dict': {
|
||||
'id': 'b04v209v',
|
||||
'ext': 'flv',
|
||||
'title': 'Pete Tong, The Essential New Tune Special',
|
||||
'description': "Pete has a very special mix - all of 2014's Essential New Tunes!",
|
||||
'duration': 10800,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/music/clips/p02frcc3',
|
||||
'note': 'Audio',
|
||||
'info_dict': {
|
||||
'id': 'p02frcch',
|
||||
'ext': 'flv',
|
||||
'title': 'Pete Tong, Past, Present and Future Special, Madeon - After Hours mix',
|
||||
'description': 'French house superstar Madeon takes us out of the club and onto the after party.',
|
||||
'duration': 3507,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/music/clips/p025c0zz',
|
||||
'note': 'Video',
|
||||
'info_dict': {
|
||||
'id': 'p025c103',
|
||||
'ext': 'flv',
|
||||
'title': 'Reading and Leeds Festival, 2014, Rae Morris - Closer (Live on BBC Three)',
|
||||
'description': 'Rae Morris performs Closer for BBC Three at Reading 2014',
|
||||
'duration': 226,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/iplayer/episode/b054fn09/ad/natural-world-20152016-2-super-powered-owls',
|
||||
'info_dict': {
|
||||
'id': 'p02n76xf',
|
||||
'ext': 'flv',
|
||||
'title': 'Natural World, 2015-2016: 2. Super Powered Owls',
|
||||
'description': 'md5:e4db5c937d0e95a7c6b5e654d429183d',
|
||||
'duration': 3540,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'geolocation',
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/iplayer/episode/b05zmgwn/royal-academy-summer-exhibition',
|
||||
'info_dict': {
|
||||
'id': 'b05zmgw1',
|
||||
'ext': 'flv',
|
||||
'description': 'Kirsty Wark and Morgan Quaintance visit the Royal Academy as it prepares for its annual artistic extravaganza, meeting people who have come together to make the show unique.',
|
||||
'title': 'Royal Academy Summer Exhibition',
|
||||
'duration': 3540,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'geolocation',
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/iplayer/playlist/p01dvks4',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/music/clips#p02frcc3',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/iplayer/cbeebies/episode/b0480276/bing-14-atchoo',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
|
||||
def _extract_asx_playlist(self, connection, programme_id):
|
||||
asx = self._download_xml(connection.get('href'), programme_id, 'Downloading ASX playlist')
|
||||
return [ref.get('href') for ref in asx.findall('./Entry/ref')]
|
||||
|
||||
def _extract_connection(self, connection, programme_id):
|
||||
formats = []
|
||||
protocol = connection.get('protocol')
|
||||
supplier = connection.get('supplier')
|
||||
if protocol == 'http':
|
||||
href = connection.get('href')
|
||||
# ASX playlist
|
||||
if supplier == 'asx':
|
||||
for i, ref in enumerate(self._extract_asx_playlist(connection, programme_id)):
|
||||
formats.append({
|
||||
'url': ref,
|
||||
'format_id': 'ref%s_%s' % (i, supplier),
|
||||
})
|
||||
# Direct link
|
||||
else:
|
||||
formats.append({
|
||||
'url': href,
|
||||
'format_id': supplier,
|
||||
})
|
||||
elif protocol == 'rtmp':
|
||||
application = connection.get('application', 'ondemand')
|
||||
auth_string = connection.get('authString')
|
||||
identifier = connection.get('identifier')
|
||||
server = connection.get('server')
|
||||
formats.append({
|
||||
'url': '%s://%s/%s?%s' % (protocol, server, application, auth_string),
|
||||
'play_path': identifier,
|
||||
'app': '%s?%s' % (application, auth_string),
|
||||
'page_url': 'http://www.bbc.co.uk',
|
||||
'player_url': 'http://www.bbc.co.uk/emp/releases/iplayer/revisions/617463_618125_4/617463_618125_4_emp.swf',
|
||||
'rtmp_live': False,
|
||||
'ext': 'flv',
|
||||
'format_id': supplier,
|
||||
})
|
||||
return formats
|
||||
|
||||
def _extract_items(self, playlist):
|
||||
return playlist.findall('./{http://bbc.co.uk/2008/emp/playlist}item')
|
||||
|
||||
def _extract_medias(self, media_selection):
|
||||
error = media_selection.find('./{http://bbc.co.uk/2008/mp/mediaselection}error')
|
||||
if error is not None:
|
||||
raise ExtractorError(
|
||||
'%s returned error: %s' % (self.IE_NAME, error.get('id')), expected=True)
|
||||
return media_selection.findall('./{http://bbc.co.uk/2008/mp/mediaselection}media')
|
||||
|
||||
def _extract_connections(self, media):
|
||||
return media.findall('./{http://bbc.co.uk/2008/mp/mediaselection}connection')
|
||||
|
||||
def _extract_video(self, media, programme_id):
|
||||
formats = []
|
||||
vbr = int(media.get('bitrate'))
|
||||
vcodec = media.get('encoding')
|
||||
service = media.get('service')
|
||||
width = int(media.get('width'))
|
||||
height = int(media.get('height'))
|
||||
file_size = int(media.get('media_file_size'))
|
||||
for connection in self._extract_connections(media):
|
||||
conn_formats = self._extract_connection(connection, programme_id)
|
||||
for format in conn_formats:
|
||||
format.update({
|
||||
'format_id': '%s_%s' % (service, format['format_id']),
|
||||
'width': width,
|
||||
'height': height,
|
||||
'vbr': vbr,
|
||||
'vcodec': vcodec,
|
||||
'filesize': file_size,
|
||||
})
|
||||
formats.extend(conn_formats)
|
||||
return formats
|
||||
|
||||
def _extract_audio(self, media, programme_id):
|
||||
formats = []
|
||||
abr = int(media.get('bitrate'))
|
||||
acodec = media.get('encoding')
|
||||
service = media.get('service')
|
||||
for connection in self._extract_connections(media):
|
||||
conn_formats = self._extract_connection(connection, programme_id)
|
||||
for format in conn_formats:
|
||||
format.update({
|
||||
'format_id': '%s_%s' % (service, format['format_id']),
|
||||
'abr': abr,
|
||||
'acodec': acodec,
|
||||
})
|
||||
formats.extend(conn_formats)
|
||||
return formats
|
||||
|
||||
def _get_subtitles(self, media, programme_id):
|
||||
subtitles = {}
|
||||
for connection in self._extract_connections(media):
|
||||
captions = self._download_xml(connection.get('href'), programme_id, 'Downloading captions')
|
||||
lang = captions.get('{http://www.w3.org/XML/1998/namespace}lang', 'en')
|
||||
subtitles[lang] = [
|
||||
{
|
||||
'url': connection.get('href'),
|
||||
'ext': 'ttml',
|
||||
},
|
||||
]
|
||||
return subtitles
|
||||
|
||||
def _download_media_selector(self, programme_id):
|
||||
try:
|
||||
media_selection = self._download_xml(
|
||||
'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s' % programme_id,
|
||||
programme_id, 'Downloading media selection XML')
|
||||
except ExtractorError as ee:
|
||||
if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403:
|
||||
media_selection = xml.etree.ElementTree.fromstring(ee.cause.read().decode('utf-8'))
|
||||
else:
|
||||
raise
|
||||
|
||||
formats = []
|
||||
subtitles = None
|
||||
|
||||
for media in self._extract_medias(media_selection):
|
||||
kind = media.get('kind')
|
||||
if kind == 'audio':
|
||||
formats.extend(self._extract_audio(media, programme_id))
|
||||
elif kind == 'video':
|
||||
formats.extend(self._extract_video(media, programme_id))
|
||||
elif kind == 'captions':
|
||||
subtitles = self.extract_subtitles(media, programme_id)
|
||||
|
||||
return formats, subtitles
|
||||
|
||||
def _download_playlist(self, playlist_id):
|
||||
try:
|
||||
playlist = self._download_json(
|
||||
'http://www.bbc.co.uk/programmes/%s/playlist.json' % playlist_id,
|
||||
playlist_id, 'Downloading playlist JSON')
|
||||
|
||||
version = playlist.get('defaultAvailableVersion')
|
||||
if version:
|
||||
smp_config = version['smpConfig']
|
||||
title = smp_config['title']
|
||||
description = smp_config['summary']
|
||||
for item in smp_config['items']:
|
||||
kind = item['kind']
|
||||
if kind != 'programme' and kind != 'radioProgramme':
|
||||
continue
|
||||
programme_id = item.get('vpid')
|
||||
duration = int(item.get('duration'))
|
||||
formats, subtitles = self._download_media_selector(programme_id)
|
||||
return programme_id, title, description, duration, formats, subtitles
|
||||
except ExtractorError as ee:
|
||||
if not (isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404):
|
||||
raise
|
||||
|
||||
# fallback to legacy playlist
|
||||
playlist = self._download_xml(
|
||||
'http://www.bbc.co.uk/iplayer/playlist/%s' % playlist_id,
|
||||
playlist_id, 'Downloading legacy playlist XML')
|
||||
|
||||
no_items = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}noItems')
|
||||
if no_items is not None:
|
||||
reason = no_items.get('reason')
|
||||
if reason == 'preAvailability':
|
||||
msg = 'Episode %s is not yet available' % playlist_id
|
||||
elif reason == 'postAvailability':
|
||||
msg = 'Episode %s is no longer available' % playlist_id
|
||||
elif reason == 'noMedia':
|
||||
msg = 'Episode %s is not currently available' % playlist_id
|
||||
else:
|
||||
msg = 'Episode %s is not available: %s' % (playlist_id, reason)
|
||||
raise ExtractorError(msg, expected=True)
|
||||
|
||||
for item in self._extract_items(playlist):
|
||||
kind = item.get('kind')
|
||||
if kind != 'programme' and kind != 'radioProgramme':
|
||||
continue
|
||||
title = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}title').text
|
||||
description = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}summary').text
|
||||
programme_id = item.get('identifier')
|
||||
duration = int(item.get('duration'))
|
||||
formats, subtitles = self._download_media_selector(programme_id)
|
||||
|
||||
return programme_id, title, description, duration, formats, subtitles
|
||||
|
||||
def _real_extract(self, url):
|
||||
group_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, group_id, 'Downloading video page')
|
||||
|
||||
programme_id = None
|
||||
|
||||
tviplayer = self._search_regex(
|
||||
r'mediator\.bind\(({.+?})\s*,\s*document\.getElementById',
|
||||
webpage, 'player', default=None)
|
||||
|
||||
if tviplayer:
|
||||
player = self._parse_json(tviplayer, group_id).get('player', {})
|
||||
duration = int_or_none(player.get('duration'))
|
||||
programme_id = player.get('vpid')
|
||||
|
||||
if not programme_id:
|
||||
programme_id = self._search_regex(
|
||||
r'"vpid"\s*:\s*"([\da-z]{8})"', webpage, 'vpid', fatal=False, default=None)
|
||||
|
||||
if programme_id:
|
||||
formats, subtitles = self._download_media_selector(programme_id)
|
||||
title = self._og_search_title(webpage)
|
||||
description = self._search_regex(
|
||||
r'<p class="[^"]*medium-description[^"]*">([^<]+)</p>',
|
||||
webpage, 'description', fatal=False)
|
||||
else:
|
||||
programme_id, title, description, duration, formats, subtitles = self._download_playlist(group_id)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': programme_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': self._og_search_thumbnail(webpage, default=None),
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
@@ -1,7 +1,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
from ..utils import (
|
||||
xpath_text,
|
||||
xpath_with_ns,
|
||||
@@ -57,7 +57,7 @@ class BetIE(InfoExtractor):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
media_url = compat_urllib_parse.unquote(self._search_regex(
|
||||
media_url = compat_urllib_parse_unquote(self._search_regex(
|
||||
[r'mediaURL\s*:\s*"([^"]+)"', r"var\s+mrssMediaUrl\s*=\s*'([^']+)'"],
|
||||
webpage, 'media URL'))
|
||||
|
||||
|
||||
@@ -41,8 +41,15 @@ class BiliBiliIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
if self._search_regex(r'(此视频不存在或被删除)', webpage, 'error message', default=None):
|
||||
raise ExtractorError('The video does not exist or was deleted', expected=True)
|
||||
if '(此视频不存在或被删除)' in webpage:
|
||||
raise ExtractorError(
|
||||
'The video does not exist or was deleted', expected=True)
|
||||
|
||||
if '>你没有权限浏览! 由于版权相关问题 我们不对您所在的地区提供服务<' in webpage:
|
||||
raise ExtractorError(
|
||||
'The video is not available in your region due to copyright reasons',
|
||||
expected=True)
|
||||
|
||||
video_code = self._search_regex(
|
||||
r'(?s)<div itemprop="video".*?>(.*?)</div>', webpage, 'video code')
|
||||
|
||||
|
||||
@@ -5,7 +5,6 @@ import re
|
||||
from .common import InfoExtractor
|
||||
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
)
|
||||
@@ -14,6 +13,8 @@ from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
unescapeHTML,
|
||||
xpath_text,
|
||||
xpath_with_ns,
|
||||
)
|
||||
|
||||
|
||||
@@ -23,10 +24,10 @@ class BlipTVIE(InfoExtractor):
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://blip.tv/cbr/cbr-exclusive-gotham-city-imposters-bats-vs-jokerz-short-3-5796352',
|
||||
'md5': 'c6934ad0b6acf2bd920720ec888eb812',
|
||||
'md5': '80baf1ec5c3d2019037c1c707d676b9f',
|
||||
'info_dict': {
|
||||
'id': '5779306',
|
||||
'ext': 'mov',
|
||||
'ext': 'm4v',
|
||||
'title': 'CBR EXCLUSIVE: "Gotham City Imposters" Bats VS Jokerz Short 3',
|
||||
'description': 'md5:9bc31f227219cde65e47eeec8d2dc596',
|
||||
'timestamp': 1323138843,
|
||||
@@ -100,6 +101,20 @@ class BlipTVIE(InfoExtractor):
|
||||
'vcodec': 'none',
|
||||
}
|
||||
},
|
||||
{
|
||||
# missing duration
|
||||
'url': 'http://blip.tv/rss/flash/6700880',
|
||||
'info_dict': {
|
||||
'id': '6684191',
|
||||
'ext': 'm4v',
|
||||
'title': 'Cowboy Bebop: Gateway Shuffle Review',
|
||||
'description': 'md5:3acc480c0f9ae157f5fe88547ecaf3f8',
|
||||
'timestamp': 1386639757,
|
||||
'upload_date': '20131210',
|
||||
'uploader': 'sfdebris',
|
||||
'uploader_id': '706520',
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
@staticmethod
|
||||
@@ -128,35 +143,34 @@ class BlipTVIE(InfoExtractor):
|
||||
|
||||
rss = self._download_xml('http://blip.tv/rss/flash/%s' % video_id, video_id, 'Downloading video RSS')
|
||||
|
||||
def blip(s):
|
||||
return '{http://blip.tv/dtd/blip/1.0}%s' % s
|
||||
|
||||
def media(s):
|
||||
return '{http://search.yahoo.com/mrss/}%s' % s
|
||||
|
||||
def itunes(s):
|
||||
return '{http://www.itunes.com/dtds/podcast-1.0.dtd}%s' % s
|
||||
def _x(p):
|
||||
return xpath_with_ns(p, {
|
||||
'blip': 'http://blip.tv/dtd/blip/1.0',
|
||||
'media': 'http://search.yahoo.com/mrss/',
|
||||
'itunes': 'http://www.itunes.com/dtds/podcast-1.0.dtd',
|
||||
})
|
||||
|
||||
item = rss.find('channel/item')
|
||||
|
||||
video_id = item.find(blip('item_id')).text
|
||||
title = item.find('./title').text
|
||||
description = clean_html(compat_str(item.find(blip('puredescription')).text))
|
||||
timestamp = parse_iso8601(item.find(blip('datestamp')).text)
|
||||
uploader = item.find(blip('user')).text
|
||||
uploader_id = item.find(blip('userid')).text
|
||||
duration = int(item.find(blip('runtime')).text)
|
||||
media_thumbnail = item.find(media('thumbnail'))
|
||||
thumbnail = media_thumbnail.get('url') if media_thumbnail is not None else item.find(itunes('image')).text
|
||||
categories = [category.text for category in item.findall('category')]
|
||||
video_id = xpath_text(item, _x('blip:item_id'), 'video id') or lookup_id
|
||||
title = xpath_text(item, 'title', 'title', fatal=True)
|
||||
description = clean_html(xpath_text(item, _x('blip:puredescription'), 'description'))
|
||||
timestamp = parse_iso8601(xpath_text(item, _x('blip:datestamp'), 'timestamp'))
|
||||
uploader = xpath_text(item, _x('blip:user'), 'uploader')
|
||||
uploader_id = xpath_text(item, _x('blip:userid'), 'uploader id')
|
||||
duration = int_or_none(xpath_text(item, _x('blip:runtime'), 'duration'))
|
||||
media_thumbnail = item.find(_x('media:thumbnail'))
|
||||
thumbnail = (media_thumbnail.get('url') if media_thumbnail is not None
|
||||
else xpath_text(item, 'image', 'thumbnail'))
|
||||
categories = [category.text for category in item.findall('category') if category is not None]
|
||||
|
||||
formats = []
|
||||
subtitles_urls = {}
|
||||
|
||||
media_group = item.find(media('group'))
|
||||
for media_content in media_group.findall(media('content')):
|
||||
media_group = item.find(_x('media:group'))
|
||||
for media_content in media_group.findall(_x('media:content')):
|
||||
url = media_content.get('url')
|
||||
role = media_content.get(blip('role'))
|
||||
role = media_content.get(_x('blip:role'))
|
||||
msg = self._download_webpage(
|
||||
url + '?showplayer=20140425131715&referrer=http://blip.tv&mask=7&skin=flashvars&view=url',
|
||||
video_id, 'Resolving URL for %s' % role)
|
||||
@@ -175,8 +189,8 @@ class BlipTVIE(InfoExtractor):
|
||||
'url': real_url,
|
||||
'format_id': role,
|
||||
'format_note': media_type,
|
||||
'vcodec': media_content.get(blip('vcodec')) or 'none',
|
||||
'acodec': media_content.get(blip('acodec')),
|
||||
'vcodec': media_content.get(_x('blip:vcodec')) or 'none',
|
||||
'acodec': media_content.get(_x('blip:acodec')),
|
||||
'filesize': media_content.get('filesize'),
|
||||
'width': int_or_none(media_content.get('width')),
|
||||
'height': int_or_none(media_content.get('height')),
|
||||
|
||||
@@ -106,15 +106,11 @@ class CanalplusIE(InfoExtractor):
|
||||
continue
|
||||
format_id = fmt.tag
|
||||
if format_id == 'HLS':
|
||||
hls_formats = self._extract_m3u8_formats(format_url, video_id, 'flv')
|
||||
for fmt in hls_formats:
|
||||
fmt['preference'] = preference(format_id)
|
||||
formats.extend(hls_formats)
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4', preference=preference(format_id)))
|
||||
elif format_id == 'HDS':
|
||||
hds_formats = self._extract_f4m_formats(format_url + '?hdcore=2.11.3', video_id)
|
||||
for fmt in hds_formats:
|
||||
fmt['preference'] = preference(format_id)
|
||||
formats.extend(hds_formats)
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
format_url + '?hdcore=2.11.3', video_id, preference=preference(format_id)))
|
||||
else:
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
|
||||
@@ -7,6 +7,7 @@ from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_request,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_parse_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
@@ -88,7 +89,7 @@ class CeskaTelevizeIE(InfoExtractor):
|
||||
if playlist_url == 'error_region':
|
||||
raise ExtractorError(NOT_AVAILABLE_STRING, expected=True)
|
||||
|
||||
req = compat_urllib_request.Request(compat_urllib_parse.unquote(playlist_url))
|
||||
req = compat_urllib_request.Request(compat_urllib_parse_unquote(playlist_url))
|
||||
req.add_header('Referer', url)
|
||||
|
||||
playlist = self._download_json(req, video_id)
|
||||
|
||||
@@ -1,7 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
find_xpath_attr,
|
||||
@@ -10,9 +8,9 @@ from ..utils import (
|
||||
|
||||
|
||||
class ClipsyndicateIE(InfoExtractor):
|
||||
_VALID_URL = r'http://www\.clipsyndicate\.com/video/play(list/\d+)?/(?P<id>\d+)'
|
||||
_VALID_URL = r'http://(?:chic|www)\.clipsyndicate\.com/video/play(list/\d+)?/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.clipsyndicate.com/video/play/4629301/brick_briscoe',
|
||||
'md5': '4d7d549451bad625e0ff3d7bd56d776c',
|
||||
'info_dict': {
|
||||
@@ -22,11 +20,13 @@ class ClipsyndicateIE(InfoExtractor):
|
||||
'duration': 612,
|
||||
'thumbnail': 're:^https?://.+\.jpg',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'http://chic.clipsyndicate.com/video/play/5844117/shark_attack',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
video_id = self._match_id(url)
|
||||
js_player = self._download_webpage(
|
||||
'http://eplayer.clipsyndicate.com/embed/player.js?va_id=%s' % video_id,
|
||||
video_id, 'Downlaoding player')
|
||||
|
||||
@@ -36,7 +36,7 @@ class ComCarCoffIE(InfoExtractor):
|
||||
webpage, 'full data json'))
|
||||
|
||||
video_id = full_data['activeVideo']['video']
|
||||
video_data = full_data['videos'][video_id]
|
||||
video_data = full_data.get('videos', {}).get(video_id) or full_data['singleshots'][video_id]
|
||||
thumbnails = [{
|
||||
'url': video_data['images']['thumb'],
|
||||
}, {
|
||||
|
||||
@@ -27,7 +27,9 @@ from ..utils import (
|
||||
bug_reports_message,
|
||||
clean_html,
|
||||
compiled_regex_type,
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
fix_xml_ampersands,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
RegexNotFoundError,
|
||||
@@ -63,7 +65,7 @@ class InfoExtractor(object):
|
||||
|
||||
Potential fields:
|
||||
* url Mandatory. The URL of the video file
|
||||
* ext Will be calculated from url if missing
|
||||
* ext Will be calculated from URL if missing
|
||||
* format A human-readable description of the format
|
||||
("mp4 container with h264/opus").
|
||||
Calculated from the format_id, width, height.
|
||||
@@ -153,7 +155,7 @@ class InfoExtractor(object):
|
||||
lower to higher preference, each element is a dictionary
|
||||
with the "ext" entry and one of:
|
||||
* "data": The subtitles file contents
|
||||
* "url": A url pointing to the subtitles file
|
||||
* "url": A URL pointing to the subtitles file
|
||||
automatic_captions: Like 'subtitles', used by the YoutubeIE for
|
||||
automatically generated captions
|
||||
duration: Length of the video in seconds, as an integer.
|
||||
@@ -174,13 +176,17 @@ class InfoExtractor(object):
|
||||
Set to "root" to indicate that this is a
|
||||
comment to the original video.
|
||||
age_limit: Age restriction for the video, as an integer (years)
|
||||
webpage_url: The url to the video webpage, if given to youtube-dl it
|
||||
webpage_url: The URL to the video webpage, if given to youtube-dl it
|
||||
should allow to get the same result again. (It will be set
|
||||
by YoutubeDL if it's missing)
|
||||
categories: A list of categories that the video falls in, for example
|
||||
["Sports", "Berlin"]
|
||||
is_live: True, False, or None (=unknown). Whether this video is a
|
||||
live stream that goes on instead of a fixed-length video.
|
||||
start_time: Time in seconds where the reproduction should start, as
|
||||
specified in the URL.
|
||||
end_time: Time in seconds where the reproduction should end, as
|
||||
specified in the URL.
|
||||
|
||||
Unless mentioned otherwise, the fields should be Unicode strings.
|
||||
|
||||
@@ -499,7 +505,7 @@ class InfoExtractor(object):
|
||||
# Methods for following #608
|
||||
@staticmethod
|
||||
def url_result(url, ie=None, video_id=None, video_title=None):
|
||||
"""Returns a url that points to a page that should be processed"""
|
||||
"""Returns a URL that points to a page that should be processed"""
|
||||
# TODO: ie should be the class used for getting the info
|
||||
video_info = {'_type': 'url',
|
||||
'url': url,
|
||||
@@ -633,7 +639,7 @@ class InfoExtractor(object):
|
||||
return unescapeHTML(escaped)
|
||||
|
||||
def _og_search_thumbnail(self, html, **kargs):
|
||||
return self._og_search_property('image', html, 'thumbnail url', fatal=False, **kargs)
|
||||
return self._og_search_property('image', html, 'thumbnail URL', fatal=False, **kargs)
|
||||
|
||||
def _og_search_description(self, html, **kargs):
|
||||
return self._og_search_property('description', html, fatal=False, **kargs)
|
||||
@@ -705,6 +711,25 @@ class InfoExtractor(object):
|
||||
return self._html_search_meta('twitter:player', html,
|
||||
'twitter card player')
|
||||
|
||||
@staticmethod
|
||||
def _hidden_inputs(html):
|
||||
return dict([
|
||||
(input.group('name'), input.group('value')) for input in re.finditer(
|
||||
r'''(?x)
|
||||
<input\s+
|
||||
type=(?P<q_hidden>["\'])hidden(?P=q_hidden)\s+
|
||||
name=(?P<q_name>["\'])(?P<name>.+?)(?P=q_name)\s+
|
||||
(?:id=(?P<q_id>["\']).+?(?P=q_id)\s+)?
|
||||
value=(?P<q_value>["\'])(?P<value>.*?)(?P=q_value)
|
||||
''', html)
|
||||
])
|
||||
|
||||
def _form_hidden_inputs(self, form_id, html):
|
||||
form = self._search_regex(
|
||||
r'(?s)<form[^>]+?id=(["\'])%s\1[^>]*>(?P<form>.+?)</form>' % form_id,
|
||||
html, '%s form' % form_id, group='form')
|
||||
return self._hidden_inputs(form)
|
||||
|
||||
def _sort_formats(self, formats, field_preference=None):
|
||||
if not formats:
|
||||
raise ExtractorError('No video formats found')
|
||||
@@ -815,10 +840,14 @@ class InfoExtractor(object):
|
||||
self.to_screen(msg)
|
||||
time.sleep(timeout)
|
||||
|
||||
def _extract_f4m_formats(self, manifest_url, video_id, preference=None, f4m_id=None):
|
||||
def _extract_f4m_formats(self, manifest_url, video_id, preference=None, f4m_id=None,
|
||||
transform_source=lambda s: fix_xml_ampersands(s).strip()):
|
||||
manifest = self._download_xml(
|
||||
manifest_url, video_id, 'Downloading f4m manifest',
|
||||
'Unable to download f4m manifest')
|
||||
'Unable to download f4m manifest',
|
||||
# Some manifests may be malformed, e.g. prosiebensat1 generated manifests
|
||||
# (see https://github.com/rg3/youtube-dl/issues/6215#issuecomment-121704244)
|
||||
transform_source=transform_source)
|
||||
|
||||
formats = []
|
||||
manifest_version = '1.0'
|
||||
@@ -828,8 +857,19 @@ class InfoExtractor(object):
|
||||
media_nodes = manifest.findall('{http://ns.adobe.com/f4m/2.0}media')
|
||||
for i, media_el in enumerate(media_nodes):
|
||||
if manifest_version == '2.0':
|
||||
manifest_url = ('/'.join(manifest_url.split('/')[:-1]) + '/' +
|
||||
(media_el.attrib.get('href') or media_el.attrib.get('url')))
|
||||
media_url = media_el.attrib.get('href') or media_el.attrib.get('url')
|
||||
if not media_url:
|
||||
continue
|
||||
manifest_url = (
|
||||
media_url if media_url.startswith('http://') or media_url.startswith('https://')
|
||||
else ('/'.join(manifest_url.split('/')[:-1]) + '/' + media_url))
|
||||
# If media_url is itself a f4m manifest do the recursive extraction
|
||||
# since bitrates in parent manifest (this one) and media_url manifest
|
||||
# may differ leading to inability to resolve the format by requested
|
||||
# bitrate in f4m downloader
|
||||
if determine_ext(manifest_url) == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(manifest_url, video_id, preference, f4m_id))
|
||||
continue
|
||||
tbr = int_or_none(media_el.attrib.get('bitrate'))
|
||||
formats.append({
|
||||
'format_id': '-'.join(filter(None, [f4m_id, compat_str(i if tbr is None else tbr)])),
|
||||
@@ -960,7 +1000,7 @@ class InfoExtractor(object):
|
||||
def _parse_smil_video(self, video, video_id, base, rtmp_count):
|
||||
src = video.get('src')
|
||||
if not src:
|
||||
return ([], rtmp_count)
|
||||
return [], rtmp_count
|
||||
bitrate = int_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
|
||||
width = int_or_none(video.get('width'))
|
||||
height = int_or_none(video.get('height'))
|
||||
@@ -973,7 +1013,7 @@ class InfoExtractor(object):
|
||||
proto = 'http'
|
||||
ext = video.get('ext')
|
||||
if proto == 'm3u8':
|
||||
return (self._extract_m3u8_formats(src, video_id, ext), rtmp_count)
|
||||
return self._extract_m3u8_formats(src, video_id, ext), rtmp_count
|
||||
elif proto == 'rtmp':
|
||||
rtmp_count += 1
|
||||
streamer = video.get('streamer') or base
|
||||
@@ -1080,7 +1120,7 @@ class InfoExtractor(object):
|
||||
class SearchInfoExtractor(InfoExtractor):
|
||||
"""
|
||||
Base class for paged search queries extractors.
|
||||
They accept urls in the format _SEARCH_KEY(|all|[0-9]):{query}
|
||||
They accept URLs in the format _SEARCH_KEY(|all|[0-9]):{query}
|
||||
Instances should define _SEARCH_KEY and _MAX_RESULTS.
|
||||
"""
|
||||
|
||||
|
||||
@@ -12,6 +12,7 @@ from math import pow, sqrt, floor
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..utils import (
|
||||
@@ -254,7 +255,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
video_upload_date = unified_strdate(video_upload_date)
|
||||
video_uploader = self._html_search_regex(r'<div>\s*Publisher:(.+?)</div>', webpage, 'video_uploader', fatal=False, flags=re.DOTALL)
|
||||
|
||||
playerdata_url = compat_urllib_parse.unquote(self._html_search_regex(r'"config_url":"([^"]+)', webpage, 'playerdata_url'))
|
||||
playerdata_url = compat_urllib_parse_unquote(self._html_search_regex(r'"config_url":"([^"]+)', webpage, 'playerdata_url'))
|
||||
playerdata_req = compat_urllib_request.Request(playerdata_url)
|
||||
playerdata_req.data = compat_urllib_parse.urlencode({'current_page': webpage_url})
|
||||
playerdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
|
||||
@@ -6,6 +6,7 @@ from ..utils import parse_iso8601, ExtractorError
|
||||
|
||||
|
||||
class CtsNewsIE(InfoExtractor):
|
||||
IE_DESC = '華視新聞'
|
||||
# https connection failed (Connection reset)
|
||||
_VALID_URL = r'http://news\.cts\.com\.tw/[a-z]+/[a-z]+/\d+/(?P<id>\d+)\.html'
|
||||
_TESTS = [{
|
||||
|
||||
@@ -13,8 +13,10 @@ from ..compat import (
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
orderedSet,
|
||||
parse_iso8601,
|
||||
str_to_int,
|
||||
unescapeHTML,
|
||||
)
|
||||
@@ -28,10 +30,16 @@ class DailymotionBaseInfoExtractor(InfoExtractor):
|
||||
request.add_header('Cookie', 'family_filter=off; ff=off')
|
||||
return request
|
||||
|
||||
def _download_webpage_handle_no_ff(self, url, *args, **kwargs):
|
||||
request = self._build_request(url)
|
||||
return self._download_webpage_handle(request, *args, **kwargs)
|
||||
|
||||
def _download_webpage_no_ff(self, url, *args, **kwargs):
|
||||
request = self._build_request(url)
|
||||
return self._download_webpage(request, *args, **kwargs)
|
||||
|
||||
|
||||
class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
"""Information Extractor for Dailymotion"""
|
||||
|
||||
_VALID_URL = r'(?i)(?:https?://)?(?:(www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(embed|#)/)?video/(?P<id>[^/?_]+)'
|
||||
IE_NAME = 'dailymotion'
|
||||
|
||||
@@ -50,9 +58,17 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
'info_dict': {
|
||||
'id': 'x2iuewm',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'IGN',
|
||||
'title': 'Steam Machine Models, Pricing Listed on Steam Store - IGN News',
|
||||
'description': 'Several come bundled with the Steam Controller.',
|
||||
'thumbnail': 're:^https?:.*\.(?:jpg|png)$',
|
||||
'duration': 74,
|
||||
'timestamp': 1425657362,
|
||||
'upload_date': '20150306',
|
||||
'uploader': 'IGN',
|
||||
'uploader_id': 'xijv66',
|
||||
'age_limit': 0,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
}
|
||||
},
|
||||
# Vevo video
|
||||
@@ -86,38 +102,106 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
url = 'https://www.dailymotion.com/video/%s' % video_id
|
||||
|
||||
# Retrieve video webpage to extract further information
|
||||
request = self._build_request(url)
|
||||
webpage = self._download_webpage(request, video_id)
|
||||
|
||||
# Extract URL, uploader and title from webpage
|
||||
self.report_extraction(video_id)
|
||||
|
||||
# It may just embed a vevo video:
|
||||
m_vevo = re.search(
|
||||
r'<link rel="video_src" href="[^"]*?vevo.com[^"]*?video=(?P<id>[\w]*)',
|
||||
webpage)
|
||||
if m_vevo is not None:
|
||||
vevo_id = m_vevo.group('id')
|
||||
self.to_screen('Vevo video detected: %s' % vevo_id)
|
||||
return self.url_result('vevo:%s' % vevo_id, ie='Vevo')
|
||||
webpage = self._download_webpage_no_ff(
|
||||
'https://www.dailymotion.com/video/%s' % video_id, video_id)
|
||||
|
||||
age_limit = self._rta_search(webpage)
|
||||
|
||||
video_upload_date = None
|
||||
mobj = re.search(r'<meta property="video:release_date" content="([0-9]{4})-([0-9]{2})-([0-9]{2}).+?"/>', webpage)
|
||||
if mobj is not None:
|
||||
video_upload_date = mobj.group(1) + mobj.group(2) + mobj.group(3)
|
||||
description = self._og_search_description(webpage) or self._html_search_meta(
|
||||
'description', webpage, 'description')
|
||||
|
||||
view_count = str_to_int(self._search_regex(
|
||||
[r'<meta[^>]+itemprop="interactionCount"[^>]+content="UserPlays:(\d+)"',
|
||||
r'video_views_count[^>]+>\s+([\d\.,]+)'],
|
||||
webpage, 'view count', fatal=False))
|
||||
comment_count = int_or_none(self._search_regex(
|
||||
r'<meta[^>]+itemprop="interactionCount"[^>]+content="UserComments:(\d+)"',
|
||||
webpage, 'comment count', fatal=False))
|
||||
|
||||
player_v5 = self._search_regex(
|
||||
r'playerV5\s*=\s*dmp\.create\([^,]+?,\s*({.+?})\);',
|
||||
webpage, 'player v5', default=None)
|
||||
if player_v5:
|
||||
player = self._parse_json(player_v5, video_id)
|
||||
metadata = player['metadata']
|
||||
formats = []
|
||||
for quality, media_list in metadata['qualities'].items():
|
||||
for media in media_list:
|
||||
media_url = media.get('url')
|
||||
if not media_url:
|
||||
continue
|
||||
type_ = media.get('type')
|
||||
if type_ == 'application/vnd.lumberjack.manifest':
|
||||
continue
|
||||
if type_ == 'application/x-mpegURL' or determine_ext(media_url) == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
media_url, video_id, 'mp4', m3u8_id='hls'))
|
||||
else:
|
||||
f = {
|
||||
'url': media_url,
|
||||
'format_id': quality,
|
||||
}
|
||||
m = re.search(r'H264-(?P<width>\d+)x(?P<height>\d+)', media_url)
|
||||
if m:
|
||||
f.update({
|
||||
'width': int(m.group('width')),
|
||||
'height': int(m.group('height')),
|
||||
})
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = metadata['title']
|
||||
duration = int_or_none(metadata.get('duration'))
|
||||
timestamp = int_or_none(metadata.get('created_time'))
|
||||
thumbnail = metadata.get('poster_url')
|
||||
uploader = metadata.get('owner', {}).get('screenname')
|
||||
uploader_id = metadata.get('owner', {}).get('id')
|
||||
|
||||
subtitles = {}
|
||||
for subtitle_lang, subtitle in metadata.get('subtitles', {}).get('data', {}).items():
|
||||
subtitles[subtitle_lang] = [{
|
||||
'ext': determine_ext(subtitle_url),
|
||||
'url': subtitle_url,
|
||||
} for subtitle_url in subtitle.get('urls', [])]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'timestamp': timestamp,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'age_limit': age_limit,
|
||||
'view_count': view_count,
|
||||
'comment_count': comment_count,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
# vevo embed
|
||||
vevo_id = self._search_regex(
|
||||
r'<link rel="video_src" href="[^"]*?vevo.com[^"]*?video=(?P<id>[\w]*)',
|
||||
webpage, 'vevo embed', default=None)
|
||||
if vevo_id:
|
||||
return self.url_result('vevo:%s' % vevo_id, 'Vevo')
|
||||
|
||||
# fallback old player
|
||||
embed_page = self._download_webpage_no_ff(
|
||||
'https://www.dailymotion.com/embed/video/%s' % video_id,
|
||||
video_id, 'Downloading embed page')
|
||||
|
||||
timestamp = parse_iso8601(self._html_search_meta(
|
||||
'video:release_date', webpage, 'upload date'))
|
||||
|
||||
info = self._parse_json(
|
||||
self._search_regex(
|
||||
r'var info = ({.*?}),$', embed_page,
|
||||
'video info', flags=re.MULTILINE),
|
||||
video_id)
|
||||
|
||||
embed_url = 'https://www.dailymotion.com/embed/video/%s' % video_id
|
||||
embed_request = self._build_request(embed_url)
|
||||
embed_page = self._download_webpage(
|
||||
embed_request, video_id, 'Downloading embed page')
|
||||
info = self._search_regex(r'var info = ({.*?}),$', embed_page,
|
||||
'video info', flags=re.MULTILINE)
|
||||
info = json.loads(info)
|
||||
if info.get('error') is not None:
|
||||
msg = 'Couldn\'t get video, Dailymotion says: %s' % info['error']['title']
|
||||
raise ExtractorError(msg, expected=True)
|
||||
@@ -138,16 +222,11 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
'width': width,
|
||||
'height': height,
|
||||
})
|
||||
if not formats:
|
||||
raise ExtractorError('Unable to extract video URL')
|
||||
self._sort_formats(formats)
|
||||
|
||||
# subtitles
|
||||
video_subtitles = self.extract_subtitles(video_id, webpage)
|
||||
|
||||
view_count = str_to_int(self._search_regex(
|
||||
r'video_views_count[^>]+>\s+([\d\.,]+)',
|
||||
webpage, 'view count', fatal=False))
|
||||
|
||||
title = self._og_search_title(webpage, default=None)
|
||||
if title is None:
|
||||
title = self._html_search_regex(
|
||||
@@ -158,12 +237,14 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'uploader': info['owner.screenname'],
|
||||
'upload_date': video_upload_date,
|
||||
'timestamp': timestamp,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'subtitles': video_subtitles,
|
||||
'thumbnail': info['thumbnail_url'],
|
||||
'age_limit': age_limit,
|
||||
'view_count': view_count,
|
||||
'duration': info['duration']
|
||||
}
|
||||
|
||||
def _get_subtitles(self, video_id, webpage):
|
||||
@@ -198,10 +279,17 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
|
||||
|
||||
def _extract_entries(self, id):
|
||||
video_ids = []
|
||||
processed_urls = set()
|
||||
for pagenum in itertools.count(1):
|
||||
request = self._build_request(self._PAGE_TEMPLATE % (id, pagenum))
|
||||
webpage = self._download_webpage(request,
|
||||
id, 'Downloading page %s' % pagenum)
|
||||
page_url = self._PAGE_TEMPLATE % (id, pagenum)
|
||||
webpage, urlh = self._download_webpage_handle_no_ff(
|
||||
page_url, id, 'Downloading page %s' % pagenum)
|
||||
if urlh.geturl() in processed_urls:
|
||||
self.report_warning('Stopped at duplicated page %s, which is the same as %s' % (
|
||||
page_url, urlh.geturl()), id)
|
||||
break
|
||||
|
||||
processed_urls.add(urlh.geturl())
|
||||
|
||||
video_ids.extend(re.findall(r'data-xid="(.+?)"', webpage))
|
||||
|
||||
@@ -225,7 +313,7 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
|
||||
|
||||
class DailymotionUserIE(DailymotionPlaylistIE):
|
||||
IE_NAME = 'dailymotion:user'
|
||||
_VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?:(?:old/)?user/)?(?P<user>[^/]+)$'
|
||||
_VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|#|video|playlist)/)(?:(?:old/)?user/)?(?P<user>[^/]+)'
|
||||
_PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.dailymotion.com/user/nqtv',
|
||||
@@ -234,6 +322,17 @@ class DailymotionUserIE(DailymotionPlaylistIE):
|
||||
'title': 'Rémi Gaillard',
|
||||
},
|
||||
'playlist_mincount': 100,
|
||||
}, {
|
||||
'url': 'http://www.dailymotion.com/user/UnderProject',
|
||||
'info_dict': {
|
||||
'id': 'UnderProject',
|
||||
'title': 'UnderProject',
|
||||
},
|
||||
'playlist_mincount': 1800,
|
||||
'expected_warnings': [
|
||||
'Stopped at duplicated page',
|
||||
],
|
||||
'skip': 'Takes too long time',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -284,8 +383,7 @@ class DailymotionCloudIE(DailymotionBaseInfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
request = self._build_request(url)
|
||||
webpage = self._download_webpage(request, video_id)
|
||||
webpage = self._download_webpage_no_ff(url, video_id)
|
||||
|
||||
title = self._html_search_regex(r'<title>([^>]+)</title>', webpage, 'title')
|
||||
|
||||
|
||||
@@ -3,42 +3,47 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import unified_strdate
|
||||
|
||||
|
||||
class DFBIE(InfoExtractor):
|
||||
IE_NAME = 'tv.dfb.de'
|
||||
_VALID_URL = r'https?://tv\.dfb\.de/video/[^/]+/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://tv\.dfb\.de/video/(?P<display_id>[^/]+)/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://tv.dfb.de/video/highlights-des-empfangs-in-berlin/9070/',
|
||||
'url': 'http://tv.dfb.de/video/u-19-em-stimmen-zum-spiel-gegen-russland/11633/',
|
||||
# The md5 is different each time
|
||||
'info_dict': {
|
||||
'id': '9070',
|
||||
'id': '11633',
|
||||
'display_id': 'u-19-em-stimmen-zum-spiel-gegen-russland',
|
||||
'ext': 'flv',
|
||||
'title': 'Highlights des Empfangs in Berlin',
|
||||
'upload_date': '20140716',
|
||||
'title': 'U 19-EM: Stimmen zum Spiel gegen Russland',
|
||||
'upload_date': '20150714',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
player_info = self._download_xml(
|
||||
'http://tv.dfb.de/server/hd_video.php?play=%s' % video_id,
|
||||
video_id)
|
||||
display_id)
|
||||
video_info = player_info.find('video')
|
||||
|
||||
f4m_info = self._download_xml(self._proto_relative_url(video_info.find('url').text.strip()), video_id)
|
||||
f4m_info = self._download_xml(
|
||||
self._proto_relative_url(video_info.find('url').text.strip()), display_id)
|
||||
token_el = f4m_info.find('token')
|
||||
manifest_url = token_el.attrib['url'] + '?' + 'hdnea=' + token_el.attrib['auth'] + '&hdcore=3.2.0'
|
||||
formats = self._extract_f4m_formats(manifest_url, display_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': video_info.find('title').text,
|
||||
'url': manifest_url,
|
||||
'ext': 'flv',
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'upload_date': ''.join(video_info.find('time_date').text.split('.')[::-1]),
|
||||
'upload_date': unified_strdate(video_info.find('time_date').text),
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
@@ -9,6 +9,7 @@ from ..compat import (compat_str, compat_basestring)
|
||||
|
||||
|
||||
class DouyuTVIE(InfoExtractor):
|
||||
IE_DESC = '斗鱼'
|
||||
_VALID_URL = r'http://(?:www\.)?douyutv\.com/(?P<id>[A-Za-z0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.douyutv.com/iseven',
|
||||
|
||||
@@ -23,8 +23,23 @@ class DramaFeverBaseIE(InfoExtractor):
|
||||
_LOGIN_URL = 'https://www.dramafever.com/accounts/login/'
|
||||
_NETRC_MACHINE = 'dramafever'
|
||||
|
||||
_CONSUMER_SECRET = 'DA59dtVXYLxajktV'
|
||||
|
||||
_consumer_secret = None
|
||||
|
||||
def _get_consumer_secret(self):
|
||||
mainjs = self._download_webpage(
|
||||
'http://www.dramafever.com/static/51afe95/df2014/scripts/main.js',
|
||||
None, 'Downloading main.js', fatal=False)
|
||||
if not mainjs:
|
||||
return self._CONSUMER_SECRET
|
||||
return self._search_regex(
|
||||
r"var\s+cs\s*=\s*'([^']+)'", mainjs,
|
||||
'consumer secret', default=self._CONSUMER_SECRET)
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
self._consumer_secret = self._get_consumer_secret()
|
||||
|
||||
def _login(self):
|
||||
(username, password) = self._get_login_info()
|
||||
@@ -119,6 +134,23 @@ class DramaFeverIE(DramaFeverBaseIE):
|
||||
'url': href,
|
||||
}]
|
||||
|
||||
series_id, episode_number = video_id.split('.')
|
||||
episode_info = self._download_json(
|
||||
# We only need a single episode info, so restricting page size to one episode
|
||||
# and dealing with page number as with episode number
|
||||
r'http://www.dramafever.com/api/4/episode/series/?cs=%s&series_id=%s&page_number=%s&page_size=1'
|
||||
% (self._consumer_secret, series_id, episode_number),
|
||||
video_id, 'Downloading episode info JSON', fatal=False)
|
||||
if episode_info:
|
||||
value = episode_info.get('value')
|
||||
if value:
|
||||
subfile = value[0].get('subfile') or value[0].get('new_subfile')
|
||||
if subfile and subfile != 'http://www.dramafever.com/st/':
|
||||
subtitles.setdefault('English', []).append({
|
||||
'ext': 'srt',
|
||||
'url': subfile,
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
@@ -152,27 +184,14 @@ class DramaFeverSeriesIE(DramaFeverBaseIE):
|
||||
'playlist_count': 20,
|
||||
}]
|
||||
|
||||
_CONSUMER_SECRET = 'DA59dtVXYLxajktV'
|
||||
_PAGE_SIZE = 60 # max is 60 (see http://api.drama9.com/#get--api-4-episode-series-)
|
||||
|
||||
def _get_consumer_secret(self, video_id):
|
||||
mainjs = self._download_webpage(
|
||||
'http://www.dramafever.com/static/51afe95/df2014/scripts/main.js',
|
||||
video_id, 'Downloading main.js', fatal=False)
|
||||
if not mainjs:
|
||||
return self._CONSUMER_SECRET
|
||||
return self._search_regex(
|
||||
r"var\s+cs\s*=\s*'([^']+)'", mainjs,
|
||||
'consumer secret', default=self._CONSUMER_SECRET)
|
||||
|
||||
def _real_extract(self, url):
|
||||
series_id = self._match_id(url)
|
||||
|
||||
consumer_secret = self._get_consumer_secret(series_id)
|
||||
|
||||
series = self._download_json(
|
||||
'http://www.dramafever.com/api/4/series/query/?cs=%s&series_id=%s'
|
||||
% (consumer_secret, series_id),
|
||||
% (self._consumer_secret, series_id),
|
||||
series_id, 'Downloading series JSON')['series'][series_id]
|
||||
|
||||
title = clean_html(series['name'])
|
||||
@@ -182,7 +201,7 @@ class DramaFeverSeriesIE(DramaFeverBaseIE):
|
||||
for page_num in itertools.count(1):
|
||||
episodes = self._download_json(
|
||||
'http://www.dramafever.com/api/4/episode/series/?cs=%s&series_id=%s&page_size=%d&page_number=%d'
|
||||
% (consumer_secret, series_id, self._PAGE_SIZE, page_num),
|
||||
% (self._consumer_secret, series_id, self._PAGE_SIZE, page_num),
|
||||
series_id, 'Downloading episodes JSON page #%d' % page_num)
|
||||
for episode in episodes.get('value', []):
|
||||
episode_url = episode.get('episode_url')
|
||||
|
||||
@@ -1,9 +1,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
)
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
|
||||
|
||||
class EHowIE(InfoExtractor):
|
||||
@@ -26,7 +24,7 @@ class EHowIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_url = self._search_regex(
|
||||
r'(?:file|source)=(http[^\'"&]*)', webpage, 'video URL')
|
||||
final_url = compat_urllib_parse.unquote(video_url)
|
||||
final_url = compat_urllib_parse_unquote(video_url)
|
||||
uploader = self._html_search_meta('uploader', webpage)
|
||||
title = self._og_search_title(webpage).replace(' | eHow', '')
|
||||
|
||||
|
||||
@@ -9,7 +9,7 @@ from ..compat import (
|
||||
compat_http_client,
|
||||
compat_str,
|
||||
compat_urllib_error,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..utils import (
|
||||
@@ -136,7 +136,7 @@ class FacebookIE(InfoExtractor):
|
||||
else:
|
||||
raise ExtractorError('Cannot parse data')
|
||||
data = dict(json.loads(m.group(1)))
|
||||
params_raw = compat_urllib_parse.unquote(data['params'])
|
||||
params_raw = compat_urllib_parse_unquote(data['params'])
|
||||
params = json.loads(params_raw)
|
||||
video_data = params['video_data'][0]
|
||||
|
||||
|
||||
@@ -6,15 +6,11 @@ import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
parse_duration,
|
||||
determine_ext,
|
||||
)
|
||||
@@ -59,12 +55,12 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
|
||||
# See https://github.com/rg3/youtube-dl/issues/3963
|
||||
# m3u8 urls work fine
|
||||
continue
|
||||
video_url_parsed = compat_urllib_parse_urlparse(video_url)
|
||||
f4m_url = self._download_webpage(
|
||||
'http://hdfauth.francetv.fr/esi/TA?url=%s' % video_url_parsed.path,
|
||||
'http://hdfauth.francetv.fr/esi/TA?url=%s' % video_url,
|
||||
video_id, 'Downloading f4m manifest token', fatal=False)
|
||||
if f4m_url:
|
||||
formats.extend(self._extract_f4m_formats(f4m_url, video_id, 1, format_id))
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
f4m_url + '&hdcore=3.7.0&plugin=aasp-3.7.0.39.44', video_id, 1, format_id))
|
||||
elif ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(video_url, video_id, 'mp4', m3u8_id=format_id))
|
||||
elif video_url.startswith('rtmp'):
|
||||
@@ -87,7 +83,7 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
|
||||
'title': info['titre'],
|
||||
'description': clean_html(info['synopsis']),
|
||||
'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', info['image']),
|
||||
'duration': float_or_none(info.get('real_duration'), 1000) or parse_duration(info['duree']),
|
||||
'duration': int_or_none(info.get('real_duration')) or parse_duration(info['duree']),
|
||||
'timestamp': int_or_none(info['diffusion']['timestamp']),
|
||||
'formats': formats,
|
||||
}
|
||||
@@ -160,11 +156,21 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor):
|
||||
class FranceTVIE(FranceTVBaseInfoExtractor):
|
||||
IE_NAME = 'francetv'
|
||||
IE_DESC = 'France 2, 3, 4, 5 and Ô'
|
||||
_VALID_URL = r'''(?x)https?://www\.france[2345o]\.fr/
|
||||
(?:
|
||||
emissions/.*?/(videos|emissions)/(?P<id>[^/?]+)
|
||||
| (emissions?|jt)/(?P<key>[^/?]+)
|
||||
)'''
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
(?:www\.)?france[2345o]\.fr/
|
||||
(?:
|
||||
emissions/[^/]+/(?:videos|diffusions)|
|
||||
emission/[^/]+|
|
||||
videos|
|
||||
jt
|
||||
)
|
||||
/|
|
||||
embed\.francetv\.fr/\?ue=
|
||||
)
|
||||
(?P<id>[^/?]+)
|
||||
'''
|
||||
|
||||
_TESTS = [
|
||||
# france2
|
||||
@@ -221,24 +227,46 @@ class FranceTVIE(FranceTVBaseInfoExtractor):
|
||||
},
|
||||
# franceo
|
||||
{
|
||||
'url': 'http://www.franceo.fr/jt/info-afrique/04-12-2013',
|
||||
'md5': '52f0bfe202848b15915a2f39aaa8981b',
|
||||
'url': 'http://www.franceo.fr/jt/info-soir/18-07-2015',
|
||||
'md5': '47d5816d3b24351cdce512ad7ab31da8',
|
||||
'info_dict': {
|
||||
'id': '108634970',
|
||||
'id': '125377621',
|
||||
'ext': 'flv',
|
||||
'title': 'Infô Afrique',
|
||||
'description': 'md5:ebf346da789428841bee0fd2a935ea55',
|
||||
'upload_date': '20140915',
|
||||
'timestamp': 1410822000,
|
||||
'title': 'Infô soir',
|
||||
'description': 'md5:01b8c6915a3d93d8bbbd692651714309',
|
||||
'upload_date': '20150718',
|
||||
'timestamp': 1437241200,
|
||||
'duration': 414,
|
||||
},
|
||||
},
|
||||
{
|
||||
# francetv embed
|
||||
'url': 'http://embed.francetv.fr/?ue=8d7d3da1e3047c42ade5a5d7dfd3fc87',
|
||||
'info_dict': {
|
||||
'id': 'EV_30231',
|
||||
'ext': 'flv',
|
||||
'title': 'Alcaline, le concert avec Calogero',
|
||||
'description': 'md5:61f08036dcc8f47e9cfc33aed08ffaff',
|
||||
'upload_date': '20150226',
|
||||
'timestamp': 1424989860,
|
||||
'duration': 5400,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.france4.fr/emission/highlander/diffusion-du-17-07-2015-04h05',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'http://www.franceo.fr/videos/125377617',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
webpage = self._download_webpage(url, mobj.group('key') or mobj.group('id'))
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_id, catalogue = self._html_search_regex(
|
||||
r'href="http://videos\.francetv\.fr/video/([^@]+@[^"]+)"',
|
||||
r'href="http://videos?\.francetv\.fr/video/([^@]+@[^"]+)"',
|
||||
webpage, 'video ID').split('@')
|
||||
return self._extract_video(video_id, catalogue)
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@ import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
@@ -75,7 +75,7 @@ class GameSpotIE(InfoExtractor):
|
||||
return {
|
||||
'id': data_video['guid'],
|
||||
'display_id': page_id,
|
||||
'title': compat_urllib_parse.unquote(data_video['title']),
|
||||
'title': compat_urllib_parse_unquote(data_video['title']),
|
||||
'formats': formats,
|
||||
'description': self._html_search_meta('description', webpage),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
|
||||
@@ -8,7 +8,6 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from .youtube import YoutubeIE
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
@@ -37,6 +36,7 @@ from .rutv import RUTVIE
|
||||
from .tvc import TVCIE
|
||||
from .sportbox import SportBoxEmbedIE
|
||||
from .smotri import SmotriIE
|
||||
from .myvi import MyviIE
|
||||
from .condenast import CondeNastIE
|
||||
from .udn import UDNEmbedIE
|
||||
from .senateisvp import SenateISVPIE
|
||||
@@ -276,14 +276,6 @@ class GenericIE(InfoExtractor):
|
||||
'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
|
||||
},
|
||||
},
|
||||
# BBC iPlayer embeds
|
||||
{
|
||||
'url': 'http://www.bbc.co.uk/blogs/adamcurtis/posts/BUGGER',
|
||||
'info_dict': {
|
||||
'title': 'BBC - Blogs - Adam Curtis - BUGGER',
|
||||
},
|
||||
'playlist_mincount': 18,
|
||||
},
|
||||
# RUTV embed
|
||||
{
|
||||
'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
|
||||
@@ -338,6 +330,17 @@ class GenericIE(InfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# Myvi.ru embed
|
||||
{
|
||||
'url': 'http://www.kinomyvi.tv/news/detail/Pervij-dublirovannij-trejler--Uzhastikov-_nOw1',
|
||||
'info_dict': {
|
||||
'id': 'f4dafcad-ff21-423d-89b5-146cfd89fa1e',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ужастики, русский трейлер (2015)',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 153,
|
||||
}
|
||||
},
|
||||
# XHamster embed
|
||||
{
|
||||
'url': 'http://www.numisc.com/forum/showthread.php?11696-FM15-which-pumiscer-was-this-%28-vid-%29-%28-alfa-as-fuck-srx-%29&s=711f5db534502e22260dec8c5e2d66d8',
|
||||
@@ -396,6 +399,26 @@ class GenericIE(InfoExtractor):
|
||||
'skip_download': 'Requires rtmpdump'
|
||||
}
|
||||
},
|
||||
# francetv embed
|
||||
{
|
||||
'url': 'http://www.tsprod.com/replay-du-concert-alcaline-de-calogero',
|
||||
'info_dict': {
|
||||
'id': 'EV_30231',
|
||||
'ext': 'mp4',
|
||||
'title': 'Alcaline, le concert avec Calogero',
|
||||
'description': 'md5:61f08036dcc8f47e9cfc33aed08ffaff',
|
||||
'upload_date': '20150226',
|
||||
'timestamp': 1424989860,
|
||||
'duration': 5400,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 downloads
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': [
|
||||
'Forbidden'
|
||||
]
|
||||
},
|
||||
# Condé Nast embed
|
||||
{
|
||||
'url': 'http://www.wired.com/2014/04/honda-asimo/',
|
||||
@@ -1103,7 +1126,7 @@ class GenericIE(InfoExtractor):
|
||||
# Sometimes embedded video player is hidden behind percent encoding
|
||||
# (e.g. https://github.com/rg3/youtube-dl/issues/2448)
|
||||
# Unescaping the whole page allows to handle those cases in a generic way
|
||||
webpage = compat_urllib_parse.unquote(webpage)
|
||||
webpage = compat_urllib_parse_unquote(webpage)
|
||||
|
||||
# it's tempting to parse this further, but you would
|
||||
# have to take into account all the variations like
|
||||
@@ -1165,6 +1188,12 @@ class GenericIE(InfoExtractor):
|
||||
if vimeo_url is not None:
|
||||
return self.url_result(vimeo_url)
|
||||
|
||||
vid_me_embed_url = self._search_regex(
|
||||
r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]',
|
||||
webpage, 'vid.me embed', default=None)
|
||||
if vid_me_embed_url is not None:
|
||||
return self.url_result(vid_me_embed_url, 'Vidme')
|
||||
|
||||
# Look for embedded YouTube player
|
||||
matches = re.findall(r'''(?x)
|
||||
(?:
|
||||
@@ -1357,7 +1386,7 @@ class GenericIE(InfoExtractor):
|
||||
return self.url_result(mobj.group('url'))
|
||||
mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(compat_urllib_parse.unquote(mobj.group('url')))
|
||||
return self.url_result(compat_urllib_parse_unquote(mobj.group('url')))
|
||||
|
||||
# Look for funnyordie embed
|
||||
matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
|
||||
@@ -1420,11 +1449,23 @@ class GenericIE(InfoExtractor):
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'), 'ArteTVEmbed')
|
||||
|
||||
# Look for embedded francetv player
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?://)?embed\.francetv\.fr/\?ue=.+?)\1',
|
||||
webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'))
|
||||
|
||||
# Look for embedded smotri.com player
|
||||
smotri_url = SmotriIE._extract_url(webpage)
|
||||
if smotri_url:
|
||||
return self.url_result(smotri_url, 'Smotri')
|
||||
|
||||
# Look for embedded Myvi.ru player
|
||||
myvi_url = MyviIE._extract_url(webpage)
|
||||
if myvi_url:
|
||||
return self.url_result(myvi_url)
|
||||
|
||||
# Look for embeded soundcloud player
|
||||
mobj = re.search(
|
||||
r'<iframe\s+(?:[a-zA-Z0-9_-]+="[^"]+"\s+)*src="(?P<url>https?://(?:w\.)?soundcloud\.com/player[^"]+)"',
|
||||
@@ -1653,7 +1694,7 @@ class GenericIE(InfoExtractor):
|
||||
if refresh_header:
|
||||
found = re.search(REDIRECT_REGEX, refresh_header)
|
||||
if found:
|
||||
new_url = compat_urlparse.urljoin(url, found.group(1))
|
||||
new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1)))
|
||||
self.report_following_redirect(new_url)
|
||||
return {
|
||||
'_type': 'url',
|
||||
@@ -1665,7 +1706,7 @@ class GenericIE(InfoExtractor):
|
||||
entries = []
|
||||
for video_url in found:
|
||||
video_url = compat_urlparse.urljoin(url, video_url)
|
||||
video_id = compat_urllib_parse.unquote(os.path.basename(video_url))
|
||||
video_id = compat_urllib_parse_unquote(os.path.basename(video_url))
|
||||
|
||||
# Sometimes, jwplayer extraction will result in a YouTube URL
|
||||
if YoutubeIE.suitable(video_url):
|
||||
|
||||
@@ -78,12 +78,7 @@ class GorillaVidIE(InfoExtractor):
|
||||
if re.search(self._FILE_NOT_FOUND_REGEX, webpage) is not None:
|
||||
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
|
||||
|
||||
fields = dict(re.findall(r'''(?x)<input\s+
|
||||
type="hidden"\s+
|
||||
name="([^"]+)"\s+
|
||||
(?:id="[^"]+"\s+)?
|
||||
value="([^"]*)"
|
||||
''', webpage))
|
||||
fields = self._hidden_inputs(webpage)
|
||||
|
||||
if fields['op'] == 'download1':
|
||||
countdown = int_or_none(self._search_regex(
|
||||
|
||||
@@ -58,11 +58,7 @@ class HostingBulkIE(InfoExtractor):
|
||||
r'<img src="([^"]+)".+?class="pic"',
|
||||
webpage, 'thumbnail', fatal=False)
|
||||
|
||||
fields = dict(re.findall(r'''(?x)<input\s+
|
||||
type="hidden"\s+
|
||||
name="([^"]+)"\s+
|
||||
value="([^"]*)"
|
||||
''', webpage))
|
||||
fields = self._hidden_inputs(webpage)
|
||||
|
||||
request = compat_urllib_request.Request(url, urlencode_postdata(fields))
|
||||
request.add_header('Content-type', 'application/x-www-form-urlencoded')
|
||||
|
||||
@@ -10,7 +10,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class HowStuffWorksIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://[\da-z-]+\.howstuffworks\.com/(?:[^/]+/)*\d+-(?P<id>.+?)-video\.htm'
|
||||
_VALID_URL = r'https?://[\da-z-]+\.howstuffworks\.com/(?:[^/]+/)*(?:\d+-)?(?P<id>.+?)-video\.htm'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://adventure.howstuffworks.com/5266-cool-jobs-iditarod-musher-video.htm',
|
||||
@@ -46,6 +46,10 @@ class HowStuffWorksIE(InfoExtractor):
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://shows.howstuffworks.com/stuff-to-blow-your-mind/optical-illusions-video.htm',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -4,7 +4,7 @@ import base64
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urlparse,
|
||||
)
|
||||
|
||||
@@ -39,7 +39,7 @@ class InfoQIE(InfoExtractor):
|
||||
# Extract video URL
|
||||
encoded_id = self._search_regex(
|
||||
r"jsclassref\s*=\s*'([^']*)'", webpage, 'encoded id')
|
||||
real_id = compat_urllib_parse.unquote(base64.b64decode(encoded_id.encode('ascii')).decode('utf-8'))
|
||||
real_id = compat_urllib_parse_unquote(base64.b64decode(encoded_id.encode('ascii')).decode('utf-8'))
|
||||
playpath = 'mp4:' + real_id
|
||||
|
||||
video_filename = playpath.split('/')[-1]
|
||||
|
||||
@@ -3,23 +3,18 @@ from __future__ import unicode_literals
|
||||
|
||||
import hashlib
|
||||
import math
|
||||
import os.path
|
||||
import random
|
||||
import re
|
||||
import time
|
||||
import uuid
|
||||
import zlib
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
url_basename,
|
||||
)
|
||||
from ..utils import ExtractorError
|
||||
|
||||
|
||||
class IqiyiIE(InfoExtractor):
|
||||
IE_NAME = 'iqiyi'
|
||||
IE_DESC = '爱奇艺'
|
||||
|
||||
_VALID_URL = r'http://(?:www\.)iqiyi.com/v_.+?\.html'
|
||||
|
||||
@@ -38,62 +33,57 @@ class IqiyiIE(InfoExtractor):
|
||||
'title': '名侦探柯南第752集',
|
||||
},
|
||||
'playlist': [{
|
||||
'md5': '7e49376fecaffa115d951634917fe105',
|
||||
'info_dict': {
|
||||
'id': 'e3f585b550a280af23c98b6cb2be19fb_part1',
|
||||
'ext': 'f4v',
|
||||
'title': '名侦探柯南第752集',
|
||||
},
|
||||
}, {
|
||||
'md5': '41b75ba13bb7ac0e411131f92bc4f6ca',
|
||||
'info_dict': {
|
||||
'id': 'e3f585b550a280af23c98b6cb2be19fb_part2',
|
||||
'ext': 'f4v',
|
||||
'title': '名侦探柯南第752集',
|
||||
},
|
||||
}, {
|
||||
'md5': '0cee1dd0a3d46a83e71e2badeae2aab0',
|
||||
'info_dict': {
|
||||
'id': 'e3f585b550a280af23c98b6cb2be19fb_part3',
|
||||
'ext': 'f4v',
|
||||
'title': '名侦探柯南第752集',
|
||||
},
|
||||
}, {
|
||||
'md5': '4f8ad72373b0c491b582e7c196b0b1f9',
|
||||
'info_dict': {
|
||||
'id': 'e3f585b550a280af23c98b6cb2be19fb_part4',
|
||||
'ext': 'f4v',
|
||||
'title': '名侦探柯南第752集',
|
||||
},
|
||||
}, {
|
||||
'md5': 'd89ad028bcfad282918e8098e811711d',
|
||||
'info_dict': {
|
||||
'id': 'e3f585b550a280af23c98b6cb2be19fb_part5',
|
||||
'ext': 'f4v',
|
||||
'title': '名侦探柯南第752集',
|
||||
},
|
||||
}, {
|
||||
'md5': '9cb1e5c95da25dff0660c32ae50903b7',
|
||||
'info_dict': {
|
||||
'id': 'e3f585b550a280af23c98b6cb2be19fb_part6',
|
||||
'ext': 'f4v',
|
||||
'title': '名侦探柯南第752集',
|
||||
},
|
||||
}, {
|
||||
'md5': '155116e0ff1867bbc9b98df294faabc9',
|
||||
'info_dict': {
|
||||
'id': 'e3f585b550a280af23c98b6cb2be19fb_part7',
|
||||
'ext': 'f4v',
|
||||
'title': '名侦探柯南第752集',
|
||||
},
|
||||
}, {
|
||||
'md5': '53f5db77622ae14fa493ed2a278a082b',
|
||||
'info_dict': {
|
||||
'id': 'e3f585b550a280af23c98b6cb2be19fb_part8',
|
||||
'ext': 'f4v',
|
||||
'title': '名侦探柯南第752集',
|
||||
},
|
||||
}],
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
_FORMATS_MAP = [
|
||||
@@ -211,20 +201,7 @@ class IqiyiIE(InfoExtractor):
|
||||
return raw_data
|
||||
|
||||
def get_enc_key(self, swf_url, video_id):
|
||||
filename, _ = os.path.splitext(url_basename(swf_url))
|
||||
enc_key_json = self._downloader.cache.load('iqiyi-enc-key', filename)
|
||||
if enc_key_json is not None:
|
||||
return enc_key_json[0]
|
||||
|
||||
req = self._request_webpage(
|
||||
swf_url, video_id, note='download swf content')
|
||||
cn = req.read()
|
||||
cn = zlib.decompress(cn[8:])
|
||||
pt = re.compile(b'MixerRemote\x08(?P<enc_key>.+?)\$&vv')
|
||||
enc_key = self._search_regex(pt, cn, 'enc_key').decode('utf8')
|
||||
|
||||
self._downloader.cache.store('iqiyi-enc-key', filename, [enc_key])
|
||||
|
||||
enc_key = '8e29ab5666d041c3a1ea76e06dabdffb'
|
||||
return enc_key
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
42
youtube_dl/extractor/ir90tv.py
Normal file
42
youtube_dl/extractor/ir90tv.py
Normal file
@@ -0,0 +1,42 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import remove_start
|
||||
|
||||
|
||||
class Ir90TvIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?90tv\.ir/video/(?P<id>[0-9]+)/.*'
|
||||
_TESTS = [{
|
||||
'url': 'http://90tv.ir/video/95719/%D8%B4%D8%A7%DB%8C%D8%B9%D8%A7%D8%AA-%D9%86%D9%82%D9%84-%D9%88-%D8%A7%D9%86%D8%AA%D9%82%D8%A7%D9%84%D8%A7%D8%AA-%D9%85%D9%87%D9%85-%D9%81%D9%88%D8%AA%D8%A8%D8%A7%D9%84-%D8%A7%D8%B1%D9%88%D9%BE%D8%A7-940218',
|
||||
'md5': '411dbd94891381960cb9e13daa47a869',
|
||||
'info_dict': {
|
||||
'id': '95719',
|
||||
'ext': 'mp4',
|
||||
'title': 'شایعات نقل و انتقالات مهم فوتبال اروپا 94/02/18',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.90tv.ir/video/95719/%D8%B4%D8%A7%DB%8C%D8%B9%D8%A7%D8%AA-%D9%86%D9%82%D9%84-%D9%88-%D8%A7%D9%86%D8%AA%D9%82%D8%A7%D9%84%D8%A7%D8%AA-%D9%85%D9%87%D9%85-%D9%81%D9%88%D8%AA%D8%A8%D8%A7%D9%84-%D8%A7%D8%B1%D9%88%D9%BE%D8%A7-940218',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = remove_start(self._html_search_regex(
|
||||
r'<title>([^<]+)</title>', webpage, 'title'), '90tv.ir :: ')
|
||||
|
||||
video_url = self._search_regex(
|
||||
r'<source[^>]+src="([^"]+)"', webpage, 'video url')
|
||||
|
||||
thumbnail = self._search_regex(r'poster="([^"]+)"', webpage, 'thumbnail url', fatal=False)
|
||||
|
||||
return {
|
||||
'url': video_url,
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'video_url': video_url,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
@@ -8,9 +8,9 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class JeuxVideoIE(InfoExtractor):
|
||||
_VALID_URL = r'http://.*?\.jeuxvideo\.com/.*/(.*?)-\d+\.htm'
|
||||
_VALID_URL = r'http://.*?\.jeuxvideo\.com/.*/(.*?)\.htm'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.jeuxvideo.com/reportages-videos-jeux/0004/00046170/tearaway-playstation-vita-gc-2013-tearaway-nous-presente-ses-papiers-d-identite-00115182.htm',
|
||||
'md5': '046e491afb32a8aaac1f44dd4ddd54ee',
|
||||
'info_dict': {
|
||||
@@ -19,7 +19,10 @@ class JeuxVideoIE(InfoExtractor):
|
||||
'title': 'Tearaway : GC 2013 : Tearaway nous présente ses papiers d\'identité',
|
||||
'description': 'Lorsque les développeurs de LittleBigPlanet proposent un nouveau titre, on ne peut que s\'attendre à un résultat original et fort attrayant.',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.jeuxvideo.com/videos/chroniques/434220/l-histoire-du-jeu-video-la-saturn.htm',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse
|
||||
from ..compat import compat_urllib_parse_unquote_plus
|
||||
from ..utils import (
|
||||
js_to_json,
|
||||
)
|
||||
@@ -24,7 +24,7 @@ class KaraoketvIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
page_video_url = self._og_search_video_url(webpage, video_id)
|
||||
config_json = compat_urllib_parse.unquote_plus(self._search_regex(
|
||||
config_json = compat_urllib_parse_unquote_plus(self._search_regex(
|
||||
r'config=(.*)', page_video_url, 'configuration'))
|
||||
|
||||
urls_info_json = self._download_json(
|
||||
|
||||
314
youtube_dl/extractor/kuwo.py
Normal file
314
youtube_dl/extractor/kuwo.py
Normal file
@@ -0,0 +1,314 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import itertools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
get_element_by_id,
|
||||
clean_html,
|
||||
ExtractorError,
|
||||
remove_start,
|
||||
)
|
||||
|
||||
|
||||
class KuwoBaseIE(InfoExtractor):
|
||||
_FORMATS = [
|
||||
{'format': 'ape', 'ext': 'ape', 'preference': 100},
|
||||
{'format': 'mp3-320', 'ext': 'mp3', 'br': '320kmp3', 'abr': 320, 'preference': 80},
|
||||
{'format': 'mp3-192', 'ext': 'mp3', 'br': '192kmp3', 'abr': 192, 'preference': 70},
|
||||
{'format': 'mp3-128', 'ext': 'mp3', 'br': '128kmp3', 'abr': 128, 'preference': 60},
|
||||
{'format': 'wma', 'ext': 'wma', 'preference': 20},
|
||||
{'format': 'aac', 'ext': 'aac', 'abr': 48, 'preference': 10}
|
||||
]
|
||||
|
||||
def _get_formats(self, song_id):
|
||||
formats = []
|
||||
for file_format in self._FORMATS:
|
||||
song_url = self._download_webpage(
|
||||
'http://antiserver.kuwo.cn/anti.s?format=%s&br=%s&rid=MUSIC_%s&type=convert_url&response=url' %
|
||||
(file_format['ext'], file_format.get('br', ''), song_id),
|
||||
song_id, note='Download %s url info' % file_format['format'],
|
||||
)
|
||||
if song_url.startswith('http://') or song_url.startswith('https://'):
|
||||
formats.append({
|
||||
'url': song_url,
|
||||
'format_id': file_format['format'],
|
||||
'format': file_format['format'],
|
||||
'preference': file_format['preference'],
|
||||
'abr': file_format.get('abr'),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
return formats
|
||||
|
||||
|
||||
class KuwoIE(KuwoBaseIE):
|
||||
IE_NAME = 'kuwo:song'
|
||||
IE_DESC = '酷我音乐'
|
||||
_VALID_URL = r'http://www\.kuwo\.cn/yinyue/(?P<id>\d+?)/'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.kuwo.cn/yinyue/635632/',
|
||||
'info_dict': {
|
||||
'id': '635632',
|
||||
'ext': 'ape',
|
||||
'title': '爱我别走',
|
||||
'creator': '张震岳',
|
||||
'upload_date': '20080122',
|
||||
'description': 'md5:ed13f58e3c3bf3f7fd9fbc4e5a7aa75c'
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.kuwo.cn/yinyue/6446136/',
|
||||
'info_dict': {
|
||||
'id': '6446136',
|
||||
'ext': 'mp3',
|
||||
'title': '心',
|
||||
'creator': 'IU',
|
||||
'upload_date': '20150518',
|
||||
},
|
||||
'params': {
|
||||
'format': 'mp3-320'
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
song_id = self._match_id(url)
|
||||
webpage = self._download_webpage(
|
||||
url, song_id, note='Download song detail info',
|
||||
errnote='Unable to get song detail info')
|
||||
|
||||
song_name = self._html_search_regex(
|
||||
r'<h1[^>]+title="([^"]+)">', webpage, 'song name')
|
||||
singer_name = self._html_search_regex(
|
||||
r'<div[^>]+class="s_img">\s*<a[^>]+title="([^>]+)"',
|
||||
webpage, 'singer name', fatal=False)
|
||||
lrc_content = clean_html(get_element_by_id('lrcContent', webpage))
|
||||
if lrc_content == '暂无': # indicates no lyrics
|
||||
lrc_content = None
|
||||
|
||||
formats = self._get_formats(song_id)
|
||||
|
||||
album_id = self._html_search_regex(
|
||||
r'<p[^>]+class="album"[^<]+<a[^>]+href="http://www\.kuwo\.cn/album/(\d+)/"',
|
||||
webpage, 'album id', fatal=False)
|
||||
|
||||
publish_time = None
|
||||
if album_id is not None:
|
||||
album_info_page = self._download_webpage(
|
||||
'http://www.kuwo.cn/album/%s/' % album_id, song_id,
|
||||
note='Download album detail info',
|
||||
errnote='Unable to get album detail info')
|
||||
|
||||
publish_time = self._html_search_regex(
|
||||
r'发行时间:(\d{4}-\d{2}-\d{2})', album_info_page,
|
||||
'publish time', fatal=False)
|
||||
if publish_time:
|
||||
publish_time = publish_time.replace('-', '')
|
||||
|
||||
return {
|
||||
'id': song_id,
|
||||
'title': song_name,
|
||||
'creator': singer_name,
|
||||
'upload_date': publish_time,
|
||||
'description': lrc_content,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class KuwoAlbumIE(InfoExtractor):
|
||||
IE_NAME = 'kuwo:album'
|
||||
IE_DESC = '酷我音乐 - 专辑'
|
||||
_VALID_URL = r'http://www\.kuwo\.cn/album/(?P<id>\d+?)/'
|
||||
_TEST = {
|
||||
'url': 'http://www.kuwo.cn/album/502294/',
|
||||
'info_dict': {
|
||||
'id': '502294',
|
||||
'title': 'M',
|
||||
'description': 'md5:6a7235a84cc6400ec3b38a7bdaf1d60c',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
album_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
url, album_id, note='Download album info',
|
||||
errnote='Unable to get album info')
|
||||
|
||||
album_name = self._html_search_regex(
|
||||
r'<div[^>]+class="comm"[^<]+<h1[^>]+title="([^"]+)"', webpage,
|
||||
'album name')
|
||||
album_intro = remove_start(
|
||||
clean_html(get_element_by_id('intro', webpage)),
|
||||
'%s简介:' % album_name)
|
||||
|
||||
entries = [
|
||||
self.url_result(song_url, 'Kuwo') for song_url in re.findall(
|
||||
r'<p[^>]+class="listen"><a[^>]+href="(http://www\.kuwo\.cn/yinyue/\d+/)"',
|
||||
webpage)
|
||||
]
|
||||
return self.playlist_result(entries, album_id, album_name, album_intro)
|
||||
|
||||
|
||||
class KuwoChartIE(InfoExtractor):
|
||||
IE_NAME = 'kuwo:chart'
|
||||
IE_DESC = '酷我音乐 - 排行榜'
|
||||
_VALID_URL = r'http://yinyue\.kuwo\.cn/billboard_(?P<id>[^.]+).htm'
|
||||
_TEST = {
|
||||
'url': 'http://yinyue.kuwo.cn/billboard_香港中文龙虎榜.htm',
|
||||
'info_dict': {
|
||||
'id': '香港中文龙虎榜',
|
||||
'title': '香港中文龙虎榜',
|
||||
'description': 're:\d{4}第\d{2}期',
|
||||
},
|
||||
'playlist_mincount': 10,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
chart_id = self._match_id(url)
|
||||
webpage = self._download_webpage(
|
||||
url, chart_id, note='Download chart info',
|
||||
errnote='Unable to get chart info')
|
||||
|
||||
chart_name = self._html_search_regex(
|
||||
r'<h1[^>]+class="unDis">([^<]+)</h1>', webpage, 'chart name')
|
||||
|
||||
chart_desc = self._html_search_regex(
|
||||
r'<p[^>]+class="tabDef">(\d{4}第\d{2}期)</p>', webpage, 'chart desc')
|
||||
|
||||
entries = [
|
||||
self.url_result(song_url, 'Kuwo') for song_url in re.findall(
|
||||
r'<a[^>]+href="(http://www\.kuwo\.cn/yinyue/\d+)/"', webpage)
|
||||
]
|
||||
return self.playlist_result(entries, chart_id, chart_name, chart_desc)
|
||||
|
||||
|
||||
class KuwoSingerIE(InfoExtractor):
|
||||
IE_NAME = 'kuwo:singer'
|
||||
IE_DESC = '酷我音乐 - 歌手'
|
||||
_VALID_URL = r'http://www\.kuwo\.cn/mingxing/(?P<id>[^/]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.kuwo.cn/mingxing/bruno+mars/',
|
||||
'info_dict': {
|
||||
'id': 'bruno+mars',
|
||||
'title': 'Bruno Mars',
|
||||
},
|
||||
'playlist_count': 10,
|
||||
}, {
|
||||
'url': 'http://www.kuwo.cn/mingxing/Ali/music.htm',
|
||||
'info_dict': {
|
||||
'id': 'Ali',
|
||||
'title': 'Ali',
|
||||
},
|
||||
'playlist_mincount': 95,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
singer_id = self._match_id(url)
|
||||
webpage = self._download_webpage(
|
||||
url, singer_id, note='Download singer info',
|
||||
errnote='Unable to get singer info')
|
||||
|
||||
singer_name = self._html_search_regex(
|
||||
r'<div class="title clearfix">\s*<h1>([^<]+)<span', webpage, 'singer name'
|
||||
)
|
||||
|
||||
entries = []
|
||||
first_page_only = False if re.search(r'/music(?:_\d+)?\.htm', url) else True
|
||||
for page_num in itertools.count(1):
|
||||
webpage = self._download_webpage(
|
||||
'http://www.kuwo.cn/mingxing/%s/music_%d.htm' % (singer_id, page_num),
|
||||
singer_id, note='Download song list page #%d' % page_num,
|
||||
errnote='Unable to get song list page #%d' % page_num)
|
||||
|
||||
entries.extend([
|
||||
self.url_result(song_url, 'Kuwo') for song_url in re.findall(
|
||||
r'<p[^>]+class="m_name"><a[^>]+href="(http://www\.kuwo\.cn/yinyue/\d+)/',
|
||||
webpage)
|
||||
][:10 if first_page_only else None])
|
||||
|
||||
if first_page_only or not re.search(r'<a[^>]+href="[^"]+">下一页</a>', webpage):
|
||||
break
|
||||
|
||||
return self.playlist_result(entries, singer_id, singer_name)
|
||||
|
||||
|
||||
class KuwoCategoryIE(InfoExtractor):
|
||||
IE_NAME = 'kuwo:category'
|
||||
IE_DESC = '酷我音乐 - 分类'
|
||||
_VALID_URL = r'http://yinyue\.kuwo\.cn/yy/cinfo_(?P<id>\d+?).htm'
|
||||
_TEST = {
|
||||
'url': 'http://yinyue.kuwo.cn/yy/cinfo_86375.htm',
|
||||
'info_dict': {
|
||||
'id': '86375',
|
||||
'title': '八十年代精选',
|
||||
'description': '这些都是属于八十年代的回忆!',
|
||||
},
|
||||
'playlist_count': 30,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
category_id = self._match_id(url)
|
||||
webpage = self._download_webpage(
|
||||
url, category_id, note='Download category info',
|
||||
errnote='Unable to get category info')
|
||||
|
||||
category_name = self._html_search_regex(
|
||||
r'<h1[^>]+title="([^<>]+?)">[^<>]+?</h1>', webpage, 'category name')
|
||||
|
||||
category_desc = remove_start(
|
||||
get_element_by_id('intro', webpage).strip(),
|
||||
'%s简介:' % category_name)
|
||||
|
||||
jsonm = self._parse_json(self._html_search_regex(
|
||||
r'var\s+jsonm\s*=\s*([^;]+);', webpage, 'category songs'), category_id)
|
||||
|
||||
entries = [
|
||||
self.url_result('http://www.kuwo.cn/yinyue/%s/' % song['musicrid'], 'Kuwo')
|
||||
for song in jsonm['musiclist']
|
||||
]
|
||||
return self.playlist_result(entries, category_id, category_name, category_desc)
|
||||
|
||||
|
||||
class KuwoMvIE(KuwoBaseIE):
|
||||
IE_NAME = 'kuwo:mv'
|
||||
IE_DESC = '酷我音乐 - MV'
|
||||
_VALID_URL = r'http://www\.kuwo\.cn/mv/(?P<id>\d+?)/'
|
||||
_TEST = {
|
||||
'url': 'http://www.kuwo.cn/mv/6480076/',
|
||||
'info_dict': {
|
||||
'id': '6480076',
|
||||
'ext': 'mkv',
|
||||
'title': '我们家MV',
|
||||
'creator': '2PM',
|
||||
},
|
||||
}
|
||||
_FORMATS = KuwoBaseIE._FORMATS + [
|
||||
{'format': 'mkv', 'ext': 'mkv', 'preference': 250},
|
||||
{'format': 'mp4', 'ext': 'mp4', 'preference': 200},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
song_id = self._match_id(url)
|
||||
webpage = self._download_webpage(
|
||||
url, song_id, note='Download mv detail info: %s' % song_id,
|
||||
errnote='Unable to get mv detail info: %s' % song_id)
|
||||
|
||||
mobj = re.search(
|
||||
r'<h1[^>]+title="(?P<song>[^"]+)">[^<]+<span[^>]+title="(?P<singer>[^"]+)"',
|
||||
webpage)
|
||||
if mobj:
|
||||
song_name = mobj.group('song')
|
||||
singer_name = mobj.group('singer')
|
||||
else:
|
||||
raise ExtractorError('Unable to find song or singer names')
|
||||
|
||||
formats = self._get_formats(song_id)
|
||||
|
||||
return {
|
||||
'id': song_id,
|
||||
'title': song_name,
|
||||
'creator': singer_name,
|
||||
'formats': formats,
|
||||
}
|
||||
62
youtube_dl/extractor/lecture2go.py
Normal file
62
youtube_dl/extractor/lecture2go.py
Normal file
@@ -0,0 +1,62 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
parse_duration,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class Lecture2GoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://lecture2go\.uni-hamburg\.de/veranstaltungen/-/v/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'https://lecture2go.uni-hamburg.de/veranstaltungen/-/v/17473',
|
||||
'md5': 'ac02b570883020d208d405d5a3fd2f7f',
|
||||
'info_dict': {
|
||||
'id': '17473',
|
||||
'ext': 'flv',
|
||||
'title': '2 - Endliche Automaten und reguläre Sprachen',
|
||||
'creator': 'Frank Heitmann',
|
||||
'duration': 5220,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_search_regex(r'<em[^>]+class="title">(.+)</em>', webpage, 'title')
|
||||
|
||||
formats = []
|
||||
for url in set(re.findall(r'"src","([^"]+)"', webpage)):
|
||||
ext = determine_ext(url)
|
||||
if ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(url, video_id))
|
||||
elif ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(url, video_id))
|
||||
else:
|
||||
formats.append({
|
||||
'url': url,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
creator = self._html_search_regex(
|
||||
r'<div[^>]+id="description">([^<]+)</div>', webpage, 'creator', fatal=False)
|
||||
duration = parse_duration(self._html_search_regex(
|
||||
r'Duration:\s*</em>\s*<em[^>]*>([^<]+)</em>', webpage, 'duration', fatal=False))
|
||||
view_count = int_or_none(self._html_search_regex(
|
||||
r'Views:\s*</em>\s*<em[^>]+>(\d+)</em>', webpage, 'view count', fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'creator': creator,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
}
|
||||
@@ -15,10 +15,12 @@ from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
parse_iso8601,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class LetvIE(InfoExtractor):
|
||||
IE_DESC = '乐视网'
|
||||
_VALID_URL = r'http://www\.letv\.com/ptv/vplay/(?P<id>\d+).html'
|
||||
|
||||
_TESTS = [{
|
||||
@@ -133,7 +135,7 @@ class LetvIE(InfoExtractor):
|
||||
}
|
||||
|
||||
if format_id[-1:] == 'p':
|
||||
url_info_dict['height'] = format_id[:-1]
|
||||
url_info_dict['height'] = int_or_none(format_id[:-1])
|
||||
|
||||
urls.append(url_info_dict)
|
||||
|
||||
|
||||
@@ -2,9 +2,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
)
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
|
||||
|
||||
class MalemotionIE(InfoExtractor):
|
||||
@@ -24,7 +22,7 @@ class MalemotionIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_url = compat_urllib_parse.unquote(self._search_regex(
|
||||
video_url = compat_urllib_parse_unquote(self._search_regex(
|
||||
r'<source type="video/mp4" src="(.+?)"', webpage, 'video URL'))
|
||||
video_title = self._html_search_regex(
|
||||
r'<title>(.*?)</title', webpage, 'title')
|
||||
|
||||
@@ -6,6 +6,7 @@ from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..utils import (
|
||||
@@ -155,7 +156,7 @@ class MetacafeIE(InfoExtractor):
|
||||
video_url = None
|
||||
mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage)
|
||||
if mobj is not None:
|
||||
mediaURL = compat_urllib_parse.unquote(mobj.group(1))
|
||||
mediaURL = compat_urllib_parse_unquote(mobj.group(1))
|
||||
video_ext = mediaURL[-3:]
|
||||
|
||||
# Extract gdaKey if available
|
||||
|
||||
@@ -5,6 +5,7 @@ import json
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
@@ -48,7 +49,7 @@ class MiTeleIE(InfoExtractor):
|
||||
domain = 'http://' + domain
|
||||
info_url = compat_urlparse.urljoin(
|
||||
domain,
|
||||
compat_urllib_parse.unquote(embed_data['flashvars']['host'])
|
||||
compat_urllib_parse_unquote(embed_data['flashvars']['host'])
|
||||
)
|
||||
info_el = self._download_xml(info_url, episode).find('./video/info')
|
||||
|
||||
|
||||
@@ -3,9 +3,7 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
)
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
HEADRequest,
|
||||
@@ -60,7 +58,7 @@ class MixcloudIE(InfoExtractor):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
uploader = mobj.group(1)
|
||||
cloudcast_name = mobj.group(2)
|
||||
track_id = compat_urllib_parse.unquote('-'.join((uploader, cloudcast_name)))
|
||||
track_id = compat_urllib_parse_unquote('-'.join((uploader, cloudcast_name)))
|
||||
|
||||
webpage = self._download_webpage(url, track_id)
|
||||
|
||||
|
||||
@@ -5,9 +5,9 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urllib_request,
|
||||
compat_urllib_parse,
|
||||
)
|
||||
|
||||
|
||||
@@ -34,7 +34,7 @@ class MofosexIE(InfoExtractor):
|
||||
webpage = self._download_webpage(req, video_id)
|
||||
|
||||
video_title = self._html_search_regex(r'<h1>(.+?)<', webpage, 'title')
|
||||
video_url = compat_urllib_parse.unquote(self._html_search_regex(r'flashvars.video_url = \'([^\']+)', webpage, 'video_url'))
|
||||
video_url = compat_urllib_parse_unquote(self._html_search_regex(r'flashvars.video_url = \'([^\']+)', webpage, 'video_url'))
|
||||
path = compat_urllib_parse_urlparse(video_url).path
|
||||
extension = os.path.splitext(path)[1][1:]
|
||||
format = path.split('/')[5].split('_')[:2]
|
||||
|
||||
@@ -35,7 +35,8 @@ class MySpassIE(InfoExtractor):
|
||||
|
||||
# get metadata
|
||||
metadata_url = META_DATA_URL_TEMPLATE % video_id
|
||||
metadata = self._download_xml(metadata_url, video_id)
|
||||
metadata = self._download_xml(
|
||||
metadata_url, video_id, transform_source=lambda s: s.strip())
|
||||
|
||||
# extract values from metadata
|
||||
url_flv_el = metadata.find('url_flv')
|
||||
|
||||
60
youtube_dl/extractor/myvi.py
Normal file
60
youtube_dl/extractor/myvi.py
Normal file
@@ -0,0 +1,60 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .vimple import SprutoBaseIE
|
||||
|
||||
|
||||
class MyviIE(SprutoBaseIE):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
myvi\.(?:ru/player|tv)/
|
||||
(?:
|
||||
(?:
|
||||
embed/html|
|
||||
flash|
|
||||
api/Video/Get
|
||||
)/|
|
||||
content/preloader\.swf\?.*\bid=
|
||||
)
|
||||
(?P<id>[\da-zA-Z_-]+)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'http://myvi.ru/player/embed/html/oOy4euHA6LVwNNAjhD9_Jq5Ha2Qf0rtVMVFMAZav8wObeRTZaCATzucDQIDph8hQU0',
|
||||
'md5': '571bbdfba9f9ed229dc6d34cc0f335bf',
|
||||
'info_dict': {
|
||||
'id': 'f16b2bbd-cde8-481c-a981-7cd48605df43',
|
||||
'ext': 'mp4',
|
||||
'title': 'хозяин жизни',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 25,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://myvi.ru/player/content/preloader.swf?id=oOy4euHA6LVwNNAjhD9_Jq5Ha2Qf0rtVMVFMAZav8wOYf1WFpPfc_bWTKGVf_Zafr0',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://myvi.ru/player/api/Video/Get/oOy4euHA6LVwNNAjhD9_Jq5Ha2Qf0rtVMVFMAZav8wObeRTZaCATzucDQIDph8hQU0',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://myvi.tv/embed/html/oTGTNWdyz4Zwy_u1nraolwZ1odenTd9WkTnRfIL9y8VOgHYqOHApE575x4_xxS9Vn0?ap=0',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://myvi.ru/player/flash/ocp2qZrHI-eZnHKQBK4cZV60hslH8LALnk0uBfKsB-Q4WnY26SeGoYPi8HWHxu0O30',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def _extract_url(cls, webpage):
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//myvi\.(?:ru/player|tv)/(?:embed/html|flash)/[^"]+)\1', webpage)
|
||||
if mobj:
|
||||
return mobj.group('url')
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
spruto = self._download_json(
|
||||
'http://myvi.ru/player/api/Video/Get/%s?sig' % video_id, video_id)['sprutoData']
|
||||
|
||||
return self._extract_spruto(spruto, video_id)
|
||||
@@ -10,6 +10,7 @@ from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_ord,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..utils import (
|
||||
@@ -107,7 +108,7 @@ class MyVideoIE(InfoExtractor):
|
||||
if not a == '_encxml':
|
||||
params[a] = b
|
||||
else:
|
||||
encxml = compat_urllib_parse.unquote(b)
|
||||
encxml = compat_urllib_parse_unquote(b)
|
||||
if not params.get('domain'):
|
||||
params['domain'] = 'www.myvideo.de'
|
||||
xmldata_url = '%s?%s' % (encxml, compat_urllib_parse.urlencode(params))
|
||||
@@ -135,7 +136,7 @@ class MyVideoIE(InfoExtractor):
|
||||
video_url = None
|
||||
mobj = re.search('connectionurl=\'(.*?)\'', dec_data)
|
||||
if mobj:
|
||||
video_url = compat_urllib_parse.unquote(mobj.group(1))
|
||||
video_url = compat_urllib_parse_unquote(mobj.group(1))
|
||||
if 'myvideo2flash' in video_url:
|
||||
self.report_warning(
|
||||
'Rewriting URL to use unencrypted rtmp:// ...',
|
||||
@@ -147,10 +148,10 @@ class MyVideoIE(InfoExtractor):
|
||||
mobj = re.search('path=\'(http.*?)\' source=\'(.*?)\'', dec_data)
|
||||
if mobj is None:
|
||||
raise ExtractorError('unable to extract url')
|
||||
video_url = compat_urllib_parse.unquote(mobj.group(1)) + compat_urllib_parse.unquote(mobj.group(2))
|
||||
video_url = compat_urllib_parse_unquote(mobj.group(1)) + compat_urllib_parse_unquote(mobj.group(2))
|
||||
|
||||
video_file = self._search_regex('source=\'(.*?)\'', dec_data, 'video file')
|
||||
video_file = compat_urllib_parse.unquote(video_file)
|
||||
video_file = compat_urllib_parse_unquote(video_file)
|
||||
|
||||
if not video_file.endswith('f4m'):
|
||||
ppath, prefix = video_file.split('.')
|
||||
@@ -159,7 +160,7 @@ class MyVideoIE(InfoExtractor):
|
||||
video_playpath = ''
|
||||
|
||||
video_swfobj = self._search_regex('swfobject.embedSWF\(\'(.+?)\'', webpage, 'swfobj')
|
||||
video_swfobj = compat_urllib_parse.unquote(video_swfobj)
|
||||
video_swfobj = compat_urllib_parse_unquote(video_swfobj)
|
||||
|
||||
video_title = self._html_search_regex("<h1(?: class='globalHd')?>(.*?)</h1>",
|
||||
webpage, 'title')
|
||||
|
||||
@@ -8,25 +8,40 @@ from ..utils import (
|
||||
|
||||
|
||||
class NationalGeographicIE(InfoExtractor):
|
||||
_VALID_URL = r'http://video\.nationalgeographic\.com/video/.*?'
|
||||
_VALID_URL = r'http://video\.nationalgeographic\.com/.*?'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://video.nationalgeographic.com/video/news/150210-news-crab-mating-vin?source=featuredvideo',
|
||||
'info_dict': {
|
||||
'id': '4DmDACA6Qtk_',
|
||||
'ext': 'flv',
|
||||
'title': 'Mating Crabs Busted by Sharks',
|
||||
'description': 'md5:16f25aeffdeba55aaa8ec37e093ad8b3',
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://video.nationalgeographic.com/video/news/150210-news-crab-mating-vin?source=featuredvideo',
|
||||
'info_dict': {
|
||||
'id': '4DmDACA6Qtk_',
|
||||
'ext': 'flv',
|
||||
'title': 'Mating Crabs Busted by Sharks',
|
||||
'description': 'md5:16f25aeffdeba55aaa8ec37e093ad8b3',
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
}
|
||||
{
|
||||
'url': 'http://video.nationalgeographic.com/wild/when-sharks-attack/the-real-jaws',
|
||||
'info_dict': {
|
||||
'id': '_JeBD_D7PlS5',
|
||||
'ext': 'flv',
|
||||
'title': 'The Real Jaws',
|
||||
'description': 'md5:8d3e09d9d53a85cd397b4b21b2c77be6',
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
name = url_basename(url)
|
||||
|
||||
webpage = self._download_webpage(url, name)
|
||||
feed_url = self._search_regex(r'data-feed-url="([^"]+)"', webpage, 'feed url')
|
||||
guid = self._search_regex(r'data-video-guid="([^"]+)"', webpage, 'guid')
|
||||
feed_url = self._search_regex(
|
||||
r'data-feed-url="([^"]+)"', webpage, 'feed url')
|
||||
guid = self._search_regex(
|
||||
r'id="(?:videoPlayer|player-container)"[^>]+data-guid="([^"]+)"',
|
||||
webpage, 'guid')
|
||||
|
||||
feed = self._download_xml('%s?byGuid=%s' % (feed_url, guid), name)
|
||||
content = feed.find('.//{http://search.yahoo.com/mrss/}content')
|
||||
@@ -34,5 +49,6 @@ class NationalGeographicIE(InfoExtractor):
|
||||
|
||||
return self.url_result(smuggle_url(
|
||||
'http://link.theplatform.com/s/ngs/%s?format=SMIL&formats=MPEG4&manifest=f4m' % theplatform_id,
|
||||
# For some reason, the normal links don't work and we must force the use of f4m
|
||||
# For some reason, the normal links don't work and we must force
|
||||
# the use of f4m
|
||||
{'force_smil_url': True}))
|
||||
|
||||
459
youtube_dl/extractor/neteasemusic.py
Normal file
459
youtube_dl/extractor/neteasemusic.py
Normal file
@@ -0,0 +1,459 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from hashlib import md5
|
||||
from base64 import b64encode
|
||||
from datetime import datetime
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_request,
|
||||
compat_urllib_parse,
|
||||
compat_str,
|
||||
compat_itertools_count,
|
||||
)
|
||||
|
||||
|
||||
class NetEaseMusicBaseIE(InfoExtractor):
|
||||
_FORMATS = ['bMusic', 'mMusic', 'hMusic']
|
||||
_NETEASE_SALT = '3go8&$8*3*3h0k(2)2'
|
||||
_API_BASE = 'http://music.163.com/api/'
|
||||
|
||||
@classmethod
|
||||
def _encrypt(cls, dfsid):
|
||||
salt_bytes = bytearray(cls._NETEASE_SALT.encode('utf-8'))
|
||||
string_bytes = bytearray(compat_str(dfsid).encode('ascii'))
|
||||
salt_len = len(salt_bytes)
|
||||
for i in range(len(string_bytes)):
|
||||
string_bytes[i] = string_bytes[i] ^ salt_bytes[i % salt_len]
|
||||
m = md5()
|
||||
m.update(bytes(string_bytes))
|
||||
result = b64encode(m.digest()).decode('ascii')
|
||||
return result.replace('/', '_').replace('+', '-')
|
||||
|
||||
@classmethod
|
||||
def extract_formats(cls, info):
|
||||
formats = []
|
||||
for song_format in cls._FORMATS:
|
||||
details = info.get(song_format)
|
||||
if not details:
|
||||
continue
|
||||
formats.append({
|
||||
'url': 'http://m1.music.126.net/%s/%s.%s' %
|
||||
(cls._encrypt(details['dfsId']), details['dfsId'],
|
||||
details['extension']),
|
||||
'ext': details.get('extension'),
|
||||
'abr': details.get('bitrate', 0) / 1000,
|
||||
'format_id': song_format,
|
||||
'filesize': details.get('size'),
|
||||
'asr': details.get('sr')
|
||||
})
|
||||
return formats
|
||||
|
||||
@classmethod
|
||||
def convert_milliseconds(cls, ms):
|
||||
return int(round(ms / 1000.0))
|
||||
|
||||
def query_api(self, endpoint, video_id, note):
|
||||
req = compat_urllib_request.Request('%s%s' % (self._API_BASE, endpoint))
|
||||
req.add_header('Referer', self._API_BASE)
|
||||
return self._download_json(req, video_id, note)
|
||||
|
||||
|
||||
class NetEaseMusicIE(NetEaseMusicBaseIE):
|
||||
IE_NAME = 'netease:song'
|
||||
IE_DESC = '网易云音乐'
|
||||
_VALID_URL = r'https?://music\.163\.com/(#/)?song\?id=(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://music.163.com/#/song?id=32102397',
|
||||
'md5': 'f2e97280e6345c74ba9d5677dd5dcb45',
|
||||
'info_dict': {
|
||||
'id': '32102397',
|
||||
'ext': 'mp3',
|
||||
'title': 'Bad Blood (feat. Kendrick Lamar)',
|
||||
'creator': 'Taylor Swift / Kendrick Lamar',
|
||||
'upload_date': '20150517',
|
||||
'timestamp': 1431878400,
|
||||
'description': 'md5:a10a54589c2860300d02e1de821eb2ef',
|
||||
},
|
||||
}, {
|
||||
'note': 'No lyrics translation.',
|
||||
'url': 'http://music.163.com/#/song?id=29822014',
|
||||
'info_dict': {
|
||||
'id': '29822014',
|
||||
'ext': 'mp3',
|
||||
'title': '听见下雨的声音',
|
||||
'creator': '周杰伦',
|
||||
'upload_date': '20141225',
|
||||
'timestamp': 1419523200,
|
||||
'description': 'md5:a4d8d89f44656af206b7b2555c0bce6c',
|
||||
},
|
||||
}, {
|
||||
'note': 'No lyrics.',
|
||||
'url': 'http://music.163.com/song?id=17241424',
|
||||
'info_dict': {
|
||||
'id': '17241424',
|
||||
'ext': 'mp3',
|
||||
'title': 'Opus 28',
|
||||
'creator': 'Dustin O\'Halloran',
|
||||
'upload_date': '20080211',
|
||||
'timestamp': 1202745600,
|
||||
},
|
||||
}, {
|
||||
'note': 'Has translated name.',
|
||||
'url': 'http://music.163.com/#/song?id=22735043',
|
||||
'info_dict': {
|
||||
'id': '22735043',
|
||||
'ext': 'mp3',
|
||||
'title': '소원을 말해봐 (Genie)',
|
||||
'creator': '少女时代',
|
||||
'description': 'md5:79d99cc560e4ca97e0c4d86800ee4184',
|
||||
'upload_date': '20100127',
|
||||
'timestamp': 1264608000,
|
||||
'alt_title': '说出愿望吧(Genie)',
|
||||
}
|
||||
}]
|
||||
|
||||
def _process_lyrics(self, lyrics_info):
|
||||
original = lyrics_info.get('lrc', {}).get('lyric')
|
||||
translated = lyrics_info.get('tlyric', {}).get('lyric')
|
||||
|
||||
if not translated:
|
||||
return original
|
||||
|
||||
lyrics_expr = r'(\[[0-9]{2}:[0-9]{2}\.[0-9]{2,}\])([^\n]+)'
|
||||
original_ts_texts = re.findall(lyrics_expr, original)
|
||||
translation_ts_dict = dict(
|
||||
(time_stamp, text) for time_stamp, text in re.findall(lyrics_expr, translated)
|
||||
)
|
||||
lyrics = '\n'.join([
|
||||
'%s%s / %s' % (time_stamp, text, translation_ts_dict.get(time_stamp, ''))
|
||||
for time_stamp, text in original_ts_texts
|
||||
])
|
||||
return lyrics
|
||||
|
||||
def _real_extract(self, url):
|
||||
song_id = self._match_id(url)
|
||||
|
||||
params = {
|
||||
'id': song_id,
|
||||
'ids': '[%s]' % song_id
|
||||
}
|
||||
info = self.query_api(
|
||||
'song/detail?' + compat_urllib_parse.urlencode(params),
|
||||
song_id, 'Downloading song info')['songs'][0]
|
||||
|
||||
formats = self.extract_formats(info)
|
||||
self._sort_formats(formats)
|
||||
|
||||
lyrics_info = self.query_api(
|
||||
'song/lyric?id=%s&lv=-1&tv=-1' % song_id,
|
||||
song_id, 'Downloading lyrics data')
|
||||
lyrics = self._process_lyrics(lyrics_info)
|
||||
|
||||
alt_title = None
|
||||
if info.get('transNames'):
|
||||
alt_title = '/'.join(info.get('transNames'))
|
||||
|
||||
return {
|
||||
'id': song_id,
|
||||
'title': info['name'],
|
||||
'alt_title': alt_title,
|
||||
'creator': ' / '.join([artist['name'] for artist in info.get('artists', [])]),
|
||||
'timestamp': self.convert_milliseconds(info.get('album', {}).get('publishTime')),
|
||||
'thumbnail': info.get('album', {}).get('picUrl'),
|
||||
'duration': self.convert_milliseconds(info.get('duration', 0)),
|
||||
'description': lyrics,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class NetEaseMusicAlbumIE(NetEaseMusicBaseIE):
|
||||
IE_NAME = 'netease:album'
|
||||
IE_DESC = '网易云音乐 - 专辑'
|
||||
_VALID_URL = r'https?://music\.163\.com/(#/)?album\?id=(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://music.163.com/#/album?id=220780',
|
||||
'info_dict': {
|
||||
'id': '220780',
|
||||
'title': 'B\'day',
|
||||
},
|
||||
'playlist_count': 23,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
album_id = self._match_id(url)
|
||||
|
||||
info = self.query_api(
|
||||
'album/%s?id=%s' % (album_id, album_id),
|
||||
album_id, 'Downloading album data')['album']
|
||||
|
||||
name = info['name']
|
||||
desc = info.get('description')
|
||||
entries = [
|
||||
self.url_result('http://music.163.com/#/song?id=%s' % song['id'],
|
||||
'NetEaseMusic', song['id'])
|
||||
for song in info['songs']
|
||||
]
|
||||
return self.playlist_result(entries, album_id, name, desc)
|
||||
|
||||
|
||||
class NetEaseMusicSingerIE(NetEaseMusicBaseIE):
|
||||
IE_NAME = 'netease:singer'
|
||||
IE_DESC = '网易云音乐 - 歌手'
|
||||
_VALID_URL = r'https?://music\.163\.com/(#/)?artist\?id=(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'note': 'Singer has aliases.',
|
||||
'url': 'http://music.163.com/#/artist?id=10559',
|
||||
'info_dict': {
|
||||
'id': '10559',
|
||||
'title': '张惠妹 - aMEI;阿密特',
|
||||
},
|
||||
'playlist_count': 50,
|
||||
}, {
|
||||
'note': 'Singer has translated name.',
|
||||
'url': 'http://music.163.com/#/artist?id=124098',
|
||||
'info_dict': {
|
||||
'id': '124098',
|
||||
'title': '李昇基 - 이승기',
|
||||
},
|
||||
'playlist_count': 50,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
singer_id = self._match_id(url)
|
||||
|
||||
info = self.query_api(
|
||||
'artist/%s?id=%s' % (singer_id, singer_id),
|
||||
singer_id, 'Downloading singer data')
|
||||
|
||||
name = info['artist']['name']
|
||||
if info['artist']['trans']:
|
||||
name = '%s - %s' % (name, info['artist']['trans'])
|
||||
if info['artist']['alias']:
|
||||
name = '%s - %s' % (name, ';'.join(info['artist']['alias']))
|
||||
|
||||
entries = [
|
||||
self.url_result('http://music.163.com/#/song?id=%s' % song['id'],
|
||||
'NetEaseMusic', song['id'])
|
||||
for song in info['hotSongs']
|
||||
]
|
||||
return self.playlist_result(entries, singer_id, name)
|
||||
|
||||
|
||||
class NetEaseMusicListIE(NetEaseMusicBaseIE):
|
||||
IE_NAME = 'netease:playlist'
|
||||
IE_DESC = '网易云音乐 - 歌单'
|
||||
_VALID_URL = r'https?://music\.163\.com/(#/)?(playlist|discover/toplist)\?id=(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://music.163.com/#/playlist?id=79177352',
|
||||
'info_dict': {
|
||||
'id': '79177352',
|
||||
'title': 'Billboard 2007 Top 100',
|
||||
'description': 'md5:12fd0819cab2965b9583ace0f8b7b022'
|
||||
},
|
||||
'playlist_count': 99,
|
||||
}, {
|
||||
'note': 'Toplist/Charts sample',
|
||||
'url': 'http://music.163.com/#/discover/toplist?id=3733003',
|
||||
'info_dict': {
|
||||
'id': '3733003',
|
||||
'title': 're:韩国Melon排行榜周榜 [0-9]{4}-[0-9]{2}-[0-9]{2}',
|
||||
'description': 'md5:73ec782a612711cadc7872d9c1e134fc',
|
||||
},
|
||||
'playlist_count': 50,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
list_id = self._match_id(url)
|
||||
|
||||
info = self.query_api(
|
||||
'playlist/detail?id=%s&lv=-1&tv=-1' % list_id,
|
||||
list_id, 'Downloading playlist data')['result']
|
||||
|
||||
name = info['name']
|
||||
desc = info.get('description')
|
||||
|
||||
if info.get('specialType') == 10: # is a chart/toplist
|
||||
datestamp = datetime.fromtimestamp(
|
||||
self.convert_milliseconds(info['updateTime'])).strftime('%Y-%m-%d')
|
||||
name = '%s %s' % (name, datestamp)
|
||||
|
||||
entries = [
|
||||
self.url_result('http://music.163.com/#/song?id=%s' % song['id'],
|
||||
'NetEaseMusic', song['id'])
|
||||
for song in info['tracks']
|
||||
]
|
||||
return self.playlist_result(entries, list_id, name, desc)
|
||||
|
||||
|
||||
class NetEaseMusicMvIE(NetEaseMusicBaseIE):
|
||||
IE_NAME = 'netease:mv'
|
||||
IE_DESC = '网易云音乐 - MV'
|
||||
_VALID_URL = r'https?://music\.163\.com/(#/)?mv\?id=(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://music.163.com/#/mv?id=415350',
|
||||
'info_dict': {
|
||||
'id': '415350',
|
||||
'ext': 'mp4',
|
||||
'title': '이럴거면 그러지말지',
|
||||
'description': '白雅言自作曲唱甜蜜爱情',
|
||||
'creator': '白雅言',
|
||||
'upload_date': '20150520',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mv_id = self._match_id(url)
|
||||
|
||||
info = self.query_api(
|
||||
'mv/detail?id=%s&type=mp4' % mv_id,
|
||||
mv_id, 'Downloading mv info')['data']
|
||||
|
||||
formats = [
|
||||
{'url': mv_url, 'ext': 'mp4', 'format_id': '%sp' % brs, 'height': int(brs)}
|
||||
for brs, mv_url in info['brs'].items()
|
||||
]
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': mv_id,
|
||||
'title': info['name'],
|
||||
'description': info.get('desc') or info.get('briefDesc'),
|
||||
'creator': info['artistName'],
|
||||
'upload_date': info['publishTime'].replace('-', ''),
|
||||
'formats': formats,
|
||||
'thumbnail': info.get('cover'),
|
||||
'duration': self.convert_milliseconds(info.get('duration', 0)),
|
||||
}
|
||||
|
||||
|
||||
class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
|
||||
IE_NAME = 'netease:program'
|
||||
IE_DESC = '网易云音乐 - 电台节目'
|
||||
_VALID_URL = r'https?://music\.163\.com/(#/?)program\?id=(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://music.163.com/#/program?id=10109055',
|
||||
'info_dict': {
|
||||
'id': '10109055',
|
||||
'ext': 'mp3',
|
||||
'title': '不丹足球背后的故事',
|
||||
'description': '喜马拉雅人的足球梦 ...',
|
||||
'creator': '大话西藏',
|
||||
'timestamp': 1434179342,
|
||||
'upload_date': '20150613',
|
||||
'duration': 900,
|
||||
},
|
||||
}, {
|
||||
'note': 'This program has accompanying songs.',
|
||||
'url': 'http://music.163.com/#/program?id=10141022',
|
||||
'info_dict': {
|
||||
'id': '10141022',
|
||||
'title': '25岁,你是自在如风的少年<27°C>',
|
||||
'description': 'md5:8d594db46cc3e6509107ede70a4aaa3b',
|
||||
},
|
||||
'playlist_count': 4,
|
||||
}, {
|
||||
'note': 'This program has accompanying songs.',
|
||||
'url': 'http://music.163.com/#/program?id=10141022',
|
||||
'info_dict': {
|
||||
'id': '10141022',
|
||||
'ext': 'mp3',
|
||||
'title': '25岁,你是自在如风的少年<27°C>',
|
||||
'description': 'md5:8d594db46cc3e6509107ede70a4aaa3b',
|
||||
'timestamp': 1434450841,
|
||||
'upload_date': '20150616',
|
||||
},
|
||||
'params': {
|
||||
'noplaylist': True
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
program_id = self._match_id(url)
|
||||
|
||||
info = self.query_api(
|
||||
'dj/program/detail?id=%s' % program_id,
|
||||
program_id, 'Downloading program info')['program']
|
||||
|
||||
name = info['name']
|
||||
description = info['description']
|
||||
|
||||
if not info['songs'] or self._downloader.params.get('noplaylist'):
|
||||
if info['songs']:
|
||||
self.to_screen(
|
||||
'Downloading just the main audio %s because of --no-playlist'
|
||||
% info['mainSong']['id'])
|
||||
|
||||
formats = self.extract_formats(info['mainSong'])
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': program_id,
|
||||
'title': name,
|
||||
'description': description,
|
||||
'creator': info['dj']['brand'],
|
||||
'timestamp': self.convert_milliseconds(info['createTime']),
|
||||
'thumbnail': info['coverUrl'],
|
||||
'duration': self.convert_milliseconds(info.get('duration', 0)),
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
self.to_screen(
|
||||
'Downloading playlist %s - add --no-playlist to just download the main audio %s'
|
||||
% (program_id, info['mainSong']['id']))
|
||||
|
||||
song_ids = [info['mainSong']['id']]
|
||||
song_ids.extend([song['id'] for song in info['songs']])
|
||||
entries = [
|
||||
self.url_result('http://music.163.com/#/song?id=%s' % song_id,
|
||||
'NetEaseMusic', song_id)
|
||||
for song_id in song_ids
|
||||
]
|
||||
return self.playlist_result(entries, program_id, name, description)
|
||||
|
||||
|
||||
class NetEaseMusicDjRadioIE(NetEaseMusicBaseIE):
|
||||
IE_NAME = 'netease:djradio'
|
||||
IE_DESC = '网易云音乐 - 电台'
|
||||
_VALID_URL = r'https?://music\.163\.com/(#/)?djradio\?id=(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://music.163.com/#/djradio?id=42',
|
||||
'info_dict': {
|
||||
'id': '42',
|
||||
'title': '声音蔓延',
|
||||
'description': 'md5:766220985cbd16fdd552f64c578a6b15'
|
||||
},
|
||||
'playlist_mincount': 40,
|
||||
}
|
||||
_PAGE_SIZE = 1000
|
||||
|
||||
def _real_extract(self, url):
|
||||
dj_id = self._match_id(url)
|
||||
|
||||
name = None
|
||||
desc = None
|
||||
entries = []
|
||||
for offset in compat_itertools_count(start=0, step=self._PAGE_SIZE):
|
||||
info = self.query_api(
|
||||
'dj/program/byradio?asc=false&limit=%d&radioId=%s&offset=%d'
|
||||
% (self._PAGE_SIZE, dj_id, offset),
|
||||
dj_id, 'Downloading dj programs - %d' % offset)
|
||||
|
||||
entries.extend([
|
||||
self.url_result(
|
||||
'http://music.163.com/#/program?id=%s' % program['id'],
|
||||
'NetEaseMusicProgram', program['id'])
|
||||
for program in info['programs']
|
||||
])
|
||||
|
||||
if name is None:
|
||||
radio = info['programs'][0]['radio']
|
||||
name = radio['name']
|
||||
desc = radio['desc']
|
||||
|
||||
if not info['more']:
|
||||
break
|
||||
|
||||
return self.playlist_result(entries, dj_id, name, desc)
|
||||
@@ -6,6 +6,7 @@ from ..utils import parse_iso8601
|
||||
|
||||
|
||||
class NextMediaIE(InfoExtractor):
|
||||
IE_DESC = '蘋果日報'
|
||||
_VALID_URL = r'http://hk.apple.nextmedia.com/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://hk.apple.nextmedia.com/realtime/news/20141108/53109199',
|
||||
@@ -66,6 +67,7 @@ class NextMediaIE(InfoExtractor):
|
||||
|
||||
|
||||
class NextMediaActionNewsIE(NextMediaIE):
|
||||
IE_DESC = '蘋果日報 - 動新聞'
|
||||
_VALID_URL = r'http://hk.dv.nextmedia.com/actionnews/[^/]+/(?P<date>\d+)/(?P<id>\d+)/\d+'
|
||||
_TESTS = [{
|
||||
'url': 'http://hk.dv.nextmedia.com/actionnews/hit/20150121/19009428/20061460',
|
||||
@@ -90,6 +92,7 @@ class NextMediaActionNewsIE(NextMediaIE):
|
||||
|
||||
|
||||
class AppleDailyIE(NextMediaIE):
|
||||
IE_DESC = '臺灣蘋果日報'
|
||||
_VALID_URL = r'http://(www|ent).appledaily.com.tw/(?:animation|appledaily|enews|realtimenews)/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)(/.*)?'
|
||||
_TESTS = [{
|
||||
'url': 'http://ent.appledaily.com.tw/enews/article/entertainment/20150128/36354694',
|
||||
|
||||
@@ -133,7 +133,7 @@ class NowTVIE(InfoExtractor):
|
||||
station = mobj.group('station')
|
||||
|
||||
info = self._download_json(
|
||||
'https://api.nowtv.de/v3/movies/%s?fields=*,format,files' % display_id,
|
||||
'https://api.nowtv.de/v3/movies/%s?fields=id,title,free,geoblocked,articleLong,articleShort,broadcastStartDate,seoUrl,duration,format,files' % display_id,
|
||||
display_id)
|
||||
|
||||
video_id = compat_str(info['id'])
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
fix_xml_ampersands,
|
||||
@@ -7,7 +9,6 @@ from ..utils import (
|
||||
qualities,
|
||||
strip_jsonp,
|
||||
unified_strdate,
|
||||
url_basename,
|
||||
)
|
||||
|
||||
|
||||
@@ -37,8 +38,21 @@ class NPOBaseIE(InfoExtractor):
|
||||
|
||||
|
||||
class NPOIE(NPOBaseIE):
|
||||
IE_NAME = 'npo.nl'
|
||||
_VALID_URL = r'https?://(?:www\.)?npo\.nl/(?!live|radio)[^/]+/[^/]+/(?P<id>[^/?]+)'
|
||||
IE_NAME = 'npo'
|
||||
IE_DESC = 'npo.nl and ntr.nl'
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:
|
||||
npo:|
|
||||
https?://
|
||||
(?:www\.)?
|
||||
(?:
|
||||
npo\.nl/(?!live|radio)(?:[^/]+/){2}|
|
||||
ntr\.nl/(?:[^/]+/){2,}|
|
||||
omroepwnl\.nl/video/fragment/[^/]+__
|
||||
)
|
||||
)
|
||||
(?P<id>[^/?#]+)
|
||||
'''
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
@@ -58,7 +72,7 @@ class NPOIE(NPOBaseIE):
|
||||
'info_dict': {
|
||||
'id': 'VARA_101191800',
|
||||
'ext': 'm4v',
|
||||
'title': 'De Mega Mike & Mega Thomas show',
|
||||
'title': 'De Mega Mike & Mega Thomas show: The best of.',
|
||||
'description': 'md5:3b74c97fc9d6901d5a665aac0e5400f4',
|
||||
'upload_date': '20090227',
|
||||
'duration': 2400,
|
||||
@@ -70,8 +84,8 @@ class NPOIE(NPOBaseIE):
|
||||
'info_dict': {
|
||||
'id': 'VPWON_1169289',
|
||||
'ext': 'm4v',
|
||||
'title': 'Tegenlicht',
|
||||
'description': 'md5:d6476bceb17a8c103c76c3b708f05dd1',
|
||||
'title': 'Tegenlicht: De toekomst komt uit Afrika',
|
||||
'description': 'md5:52cf4eefbc96fffcbdc06d024147abea',
|
||||
'upload_date': '20130225',
|
||||
'duration': 3000,
|
||||
},
|
||||
@@ -100,6 +114,30 @@ class NPOIE(NPOBaseIE):
|
||||
'title': 'Hoe gaat Europa verder na Parijs?',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.ntr.nl/Aap-Poot-Pies/27/detail/Aap-poot-pies/VPWON_1233944#content',
|
||||
'md5': '01c6a2841675995da1f0cf776f03a9c3',
|
||||
'info_dict': {
|
||||
'id': 'VPWON_1233944',
|
||||
'ext': 'm4v',
|
||||
'title': 'Aap, poot, pies',
|
||||
'description': 'md5:c9c8005d1869ae65b858e82c01a91fde',
|
||||
'upload_date': '20150508',
|
||||
'duration': 599,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.omroepwnl.nl/video/fragment/vandaag-de-dag-verkiezingen__POMS_WNL_853698',
|
||||
'md5': 'd30cd8417b8b9bca1fdff27428860d08',
|
||||
'info_dict': {
|
||||
'id': 'POW_00996502',
|
||||
'ext': 'm4v',
|
||||
'title': '''"Dit is wel een 'landslide'..."''',
|
||||
'description': 'md5:f8d66d537dfb641380226e31ca57b8e8',
|
||||
'upload_date': '20150508',
|
||||
'duration': 462,
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -114,6 +152,18 @@ class NPOIE(NPOBaseIE):
|
||||
transform_source=strip_jsonp,
|
||||
)
|
||||
|
||||
# For some videos actual video id (prid) is different (e.g. for
|
||||
# http://www.omroepwnl.nl/video/fragment/vandaag-de-dag-verkiezingen__POMS_WNL_853698
|
||||
# video id is POMS_WNL_853698 but prid is POW_00996502)
|
||||
video_id = metadata.get('prid') or video_id
|
||||
|
||||
# titel is too generic in some cases so utilize aflevering_titel as well
|
||||
# when available (e.g. http://tegenlicht.vpro.nl/afleveringen/2014-2015/access-to-africa.html)
|
||||
title = metadata['titel']
|
||||
sub_title = metadata.get('aflevering_titel')
|
||||
if sub_title and sub_title != title:
|
||||
title += ': %s' % sub_title
|
||||
|
||||
token = self._get_token(video_id)
|
||||
|
||||
formats = []
|
||||
@@ -186,8 +236,8 @@ class NPOIE(NPOBaseIE):
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': metadata['titel'],
|
||||
'description': metadata['info'],
|
||||
'title': title,
|
||||
'description': metadata.get('info'),
|
||||
'thumbnail': metadata.get('images', [{'url': None}])[-1]['url'],
|
||||
'upload_date': unified_strdate(metadata.get('gidsdatum')),
|
||||
'duration': parse_duration(metadata.get('tijdsduur')),
|
||||
@@ -356,9 +406,8 @@ class NPORadioFragmentIE(InfoExtractor):
|
||||
}
|
||||
|
||||
|
||||
class TegenlichtVproIE(NPOIE):
|
||||
IE_NAME = 'tegenlicht.vpro.nl'
|
||||
_VALID_URL = r'https?://tegenlicht\.vpro\.nl/afleveringen/.*?'
|
||||
class VPROIE(NPOIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:tegenlicht\.)?vpro\.nl/(?:[^/]+/){2,}(?P<id>[^/]+)\.html'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
@@ -367,17 +416,72 @@ class TegenlichtVproIE(NPOIE):
|
||||
'info_dict': {
|
||||
'id': 'VPWON_1169289',
|
||||
'ext': 'm4v',
|
||||
'title': 'Tegenlicht',
|
||||
'description': 'md5:d6476bceb17a8c103c76c3b708f05dd1',
|
||||
'title': 'De toekomst komt uit Afrika',
|
||||
'description': 'md5:52cf4eefbc96fffcbdc06d024147abea',
|
||||
'upload_date': '20130225',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.vpro.nl/programmas/2doc/2015/sergio-herman.html',
|
||||
'info_dict': {
|
||||
'id': 'sergio-herman',
|
||||
'title': 'Sergio Herman: Fucking perfect',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
},
|
||||
{
|
||||
# playlist with youtube embed
|
||||
'url': 'http://www.vpro.nl/programmas/2doc/2015/education-education.html',
|
||||
'info_dict': {
|
||||
'id': 'education-education',
|
||||
'title': '2Doc',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
name = url_basename(url)
|
||||
webpage = self._download_webpage(url, name)
|
||||
urn = self._html_search_meta('mediaurn', webpage)
|
||||
info_page = self._download_json(
|
||||
'http://rs.vpro.nl/v2/api/media/%s.json' % urn, name)
|
||||
return self._get_info(info_page['mid'])
|
||||
playlist_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
entries = [
|
||||
self.url_result('npo:%s' % video_id if not video_id.startswith('http') else video_id)
|
||||
for video_id in re.findall(r'data-media-id="([^"]+)"', webpage)
|
||||
]
|
||||
|
||||
playlist_title = self._search_regex(
|
||||
r'<title>\s*([^>]+?)\s*-\s*Teledoc\s*-\s*VPRO\s*</title>',
|
||||
webpage, 'playlist title', default=None) or self._og_search_title(webpage)
|
||||
|
||||
return self.playlist_result(entries, playlist_id, playlist_title)
|
||||
|
||||
|
||||
class WNLIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?omroepwnl\.nl/video/detail/(?P<id>[^/]+)__\d+'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.omroepwnl.nl/video/detail/vandaag-de-dag-6-mei__060515',
|
||||
'info_dict': {
|
||||
'id': 'vandaag-de-dag-6-mei',
|
||||
'title': 'Vandaag de Dag 6 mei',
|
||||
},
|
||||
'playlist_count': 4,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
entries = [
|
||||
self.url_result('npo:%s' % video_id, 'NPO')
|
||||
for video_id, part in re.findall(
|
||||
r'<a[^>]+href="([^"]+)"[^>]+class="js-mid"[^>]*>(Deel \d+)', webpage)
|
||||
]
|
||||
|
||||
playlist_title = self._html_search_regex(
|
||||
r'(?s)<h1[^>]+class="subject"[^>]*>(.+?)</h1>',
|
||||
webpage, 'playlist title')
|
||||
|
||||
return self.playlist_result(entries, playlist_id, playlist_title)
|
||||
|
||||
@@ -116,7 +116,8 @@ class NRKPlaylistIE(InfoExtractor):
|
||||
|
||||
|
||||
class NRKTVIE(InfoExtractor):
|
||||
_VALID_URL = r'(?P<baseurl>https?://tv\.nrk(?:super)?\.no/)(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P<part_id>\d+))?'
|
||||
IE_DESC = 'NRK TV and NRK Radio'
|
||||
_VALID_URL = r'(?P<baseurl>https?://(?:tv|radio)\.nrk(?:super)?\.no/)(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P<part_id>\d+))?'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
@@ -188,6 +189,10 @@ class NRKTVIE(InfoExtractor):
|
||||
'duration': 6947.5199999999995,
|
||||
},
|
||||
'skip': 'Only works from Norway',
|
||||
},
|
||||
{
|
||||
'url': 'https://radio.nrk.no/serie/dagsnytt/NPUB21019315/12-07-2015#',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
|
||||
@@ -206,7 +211,8 @@ class NRKTVIE(InfoExtractor):
|
||||
]}
|
||||
|
||||
def _extract_f4m(self, manifest_url, video_id):
|
||||
return self._extract_f4m_formats(manifest_url + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124', video_id)
|
||||
return self._extract_f4m_formats(
|
||||
manifest_url + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124', video_id, f4m_id='hds')
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
@@ -268,7 +274,7 @@ class NRKTVIE(InfoExtractor):
|
||||
|
||||
m3u8_url = re.search(r'data-hls-media="([^"]+)"', webpage)
|
||||
if m3u8_url:
|
||||
formats.extend(self._extract_m3u8_formats(m3u8_url.group(1), video_id, 'mp4'))
|
||||
formats.extend(self._extract_m3u8_formats(m3u8_url.group(1), video_id, 'mp4', m3u8_id='hls'))
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles_url = self._html_search_regex(
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
from ..utils import (
|
||||
unified_strdate,
|
||||
int_or_none,
|
||||
@@ -62,7 +62,7 @@ class OdnoklassnikiIE(InfoExtractor):
|
||||
metadata = self._parse_json(metadata, video_id)
|
||||
else:
|
||||
metadata = self._download_json(
|
||||
compat_urllib_parse.unquote(flashvars['metadataUrl']),
|
||||
compat_urllib_parse_unquote(flashvars['metadataUrl']),
|
||||
video_id, 'Downloading metadata JSON')
|
||||
|
||||
movie = metadata['movie']
|
||||
|
||||
@@ -49,19 +49,21 @@ class OnionStudiosIE(InfoExtractor):
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = self._search_regex(
|
||||
r'share_title\s*=\s*"([^"]+)"', webpage, 'title')
|
||||
r'share_title\s*=\s*(["\'])(?P<title>[^\1]+?)\1',
|
||||
webpage, 'title', group='title')
|
||||
description = self._search_regex(
|
||||
r'share_description\s*=\s*"([^"]+)"', webpage,
|
||||
'description', default=None)
|
||||
r'share_description\s*=\s*(["\'])(?P<description>[^\1]+?)\1',
|
||||
webpage, 'description', default=None, group='description')
|
||||
thumbnail = self._search_regex(
|
||||
r'poster="([^"]+)"', webpage, 'thumbnail', default=False)
|
||||
r'poster\s*=\s*(["\'])(?P<thumbnail>[^\1]+?)\1',
|
||||
webpage, 'thumbnail', default=False, group='thumbnail')
|
||||
|
||||
uploader_id = self._search_regex(
|
||||
r'twitter_handle\s*=\s*"([^"]+)"',
|
||||
webpage, 'uploader id', fatal=False)
|
||||
r'twitter_handle\s*=\s*(["\'])(?P<uploader_id>[^\1]+?)\1',
|
||||
webpage, 'uploader id', fatal=False, group='uploader_id')
|
||||
uploader = self._search_regex(
|
||||
r'window\.channelName\s*=\s*"Embedded:([^"]+)"',
|
||||
webpage, 'uploader', default=False)
|
||||
r'window\.channelName\s*=\s*(["\'])Embedded:(?P<uploader>[^\1]+?)\1',
|
||||
webpage, 'uploader', default=False, group='uploader')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
||||
@@ -3,9 +3,9 @@ from __future__ import unicode_literals
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse_unquote_plus
|
||||
from ..utils import (
|
||||
parse_iso8601,
|
||||
compat_urllib_parse,
|
||||
parse_age_limit,
|
||||
int_or_none,
|
||||
)
|
||||
@@ -37,7 +37,7 @@ class OpenFilmIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
player = compat_urllib_parse.unquote_plus(
|
||||
player = compat_urllib_parse_unquote_plus(
|
||||
self._og_search_video_url(webpage))
|
||||
|
||||
video = json.loads(self._search_regex(
|
||||
|
||||
@@ -32,7 +32,7 @@ class PBSIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '2365006249',
|
||||
'ext': 'mp4',
|
||||
'title': 'A More Perfect Union',
|
||||
'title': 'Constitution USA with Peter Sagal - A More Perfect Union',
|
||||
'description': 'md5:ba0c207295339c8d6eced00b7c363c6a',
|
||||
'duration': 3190,
|
||||
},
|
||||
@@ -46,7 +46,7 @@ class PBSIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '2365297690',
|
||||
'ext': 'mp4',
|
||||
'title': 'Losing Iraq',
|
||||
'title': 'FRONTLINE - Losing Iraq',
|
||||
'description': 'md5:f5bfbefadf421e8bb8647602011caf8e',
|
||||
'duration': 5050,
|
||||
},
|
||||
@@ -60,7 +60,7 @@ class PBSIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '2201174722',
|
||||
'ext': 'mp4',
|
||||
'title': 'Cyber Schools Gain Popularity, but Quality Questions Persist',
|
||||
'title': 'PBS NewsHour - Cyber Schools Gain Popularity, but Quality Questions Persist',
|
||||
'description': 'md5:5871c15cba347c1b3d28ac47a73c7c28',
|
||||
'duration': 801,
|
||||
},
|
||||
@@ -72,7 +72,7 @@ class PBSIE(InfoExtractor):
|
||||
'id': '2365297708',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:68d87ef760660eb564455eb30ca464fe',
|
||||
'title': 'Dudamel Conducts Verdi Requiem at the Hollywood Bowl - Full',
|
||||
'title': 'Great Performances - Dudamel Conducts Verdi Requiem at the Hollywood Bowl - Full',
|
||||
'duration': 6559,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
@@ -88,7 +88,7 @@ class PBSIE(InfoExtractor):
|
||||
'display_id': 'killer-typhoon',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:c741d14e979fc53228c575894094f157',
|
||||
'title': 'Killer Typhoon',
|
||||
'title': 'NOVA - Killer Typhoon',
|
||||
'duration': 3172,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'upload_date': '20140122',
|
||||
@@ -110,7 +110,7 @@ class PBSIE(InfoExtractor):
|
||||
'id': '2280706814',
|
||||
'display_id': 'player',
|
||||
'ext': 'mp4',
|
||||
'title': 'Death and the Civil War',
|
||||
'title': 'American Experience - Death and the Civil War',
|
||||
'description': 'American Experience, TV’s most-watched history series, brings to life the compelling stories from our past that inform our understanding of the world today.',
|
||||
'duration': 6705,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
@@ -118,6 +118,21 @@ class PBSIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True, # requires ffmpeg
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://video.pbs.org/video/2365367186/',
|
||||
'info_dict': {
|
||||
'id': '2365367186',
|
||||
'display_id': '2365367186',
|
||||
'ext': 'mp4',
|
||||
'title': 'To Catch A Comet - Full Episode',
|
||||
'description': 'On November 12, 2014, billions of kilometers from Earth, spacecraft orbiter Rosetta and lander Philae did what no other had dared to attempt \u2014 land on the volatile surface of a comet as it zooms around the sun at 67,000 km/hr. The European Space Agency hopes this mission can help peer into our past and unlock secrets of our origins.',
|
||||
'duration': 3342,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # requires ffmpeg
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
@@ -224,6 +239,20 @@ class PBSIE(InfoExtractor):
|
||||
rating_str = rating_str.rpartition('-')[2]
|
||||
age_limit = US_RATINGS.get(rating_str)
|
||||
|
||||
subtitles = {}
|
||||
closed_captions_url = info.get('closed_captions_url')
|
||||
if closed_captions_url:
|
||||
subtitles['en'] = [{
|
||||
'ext': 'ttml',
|
||||
'url': closed_captions_url,
|
||||
}]
|
||||
|
||||
# info['title'] is often incomplete (e.g. 'Full Episode', 'Episode 5', etc)
|
||||
# Try turning it to 'program - title' naming scheme if possible
|
||||
alt_title = info.get('program', {}).get('title')
|
||||
if alt_title:
|
||||
info['title'] = alt_title + ' - ' + re.sub(r'^' + alt_title + '[\s\-:]+', '', info['title'])
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
@@ -234,4 +263,5 @@ class PBSIE(InfoExtractor):
|
||||
'age_limit': age_limit,
|
||||
'upload_date': upload_date,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
@@ -4,7 +4,7 @@ import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
|
||||
|
||||
class PhotobucketIE(InfoExtractor):
|
||||
@@ -34,7 +34,7 @@ class PhotobucketIE(InfoExtractor):
|
||||
info_json = self._search_regex(r'Pb\.Data\.Shared\.put\(Pb\.Data\.Shared\.MEDIA, (.*?)\);',
|
||||
webpage, 'info json')
|
||||
info = json.loads(info_json)
|
||||
url = compat_urllib_parse.unquote(self._html_search_regex(r'file=(.+\.mp4)', info['linkcodes']['html'], 'url'))
|
||||
url = compat_urllib_parse_unquote(self._html_search_regex(r'file=(.+\.mp4)', info['linkcodes']['html'], 'url'))
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': url,
|
||||
|
||||
@@ -38,9 +38,7 @@ class PlayedIE(InfoExtractor):
|
||||
if m_error:
|
||||
raise ExtractorError(m_error.group('msg'), expected=True)
|
||||
|
||||
fields = re.findall(
|
||||
r'type="hidden" name="([^"]+)"\s+value="([^"]+)">', orig_webpage)
|
||||
data = dict(fields)
|
||||
data = self._hidden_inputs(orig_webpage)
|
||||
|
||||
self._sleep(2, video_id)
|
||||
|
||||
|
||||
@@ -4,7 +4,8 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_parse_unquote_plus,
|
||||
)
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
@@ -44,7 +45,7 @@ class PlayvidIE(InfoExtractor):
|
||||
flashvars = self._html_search_regex(
|
||||
r'flashvars="(.+?)"', webpage, 'flashvars')
|
||||
|
||||
infos = compat_urllib_parse.unquote(flashvars).split(r'&')
|
||||
infos = compat_urllib_parse_unquote(flashvars).split(r'&')
|
||||
for info in infos:
|
||||
videovars_match = re.match(r'^video_vars\[(.+?)\]=(.+?)$', info)
|
||||
if videovars_match:
|
||||
@@ -52,7 +53,7 @@ class PlayvidIE(InfoExtractor):
|
||||
val = videovars_match.group(2)
|
||||
|
||||
if key == 'title':
|
||||
video_title = compat_urllib_parse.unquote_plus(val)
|
||||
video_title = compat_urllib_parse_unquote_plus(val)
|
||||
if key == 'duration':
|
||||
try:
|
||||
duration = int(val)
|
||||
|
||||
@@ -5,7 +5,8 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_parse_unquote_plus,
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
@@ -69,7 +70,7 @@ class PornHubIE(InfoExtractor):
|
||||
webpage, 'uploader', fatal=False)
|
||||
thumbnail = self._html_search_regex(r'"image_url":"([^"]+)', webpage, 'thumbnail', fatal=False)
|
||||
if thumbnail:
|
||||
thumbnail = compat_urllib_parse.unquote(thumbnail)
|
||||
thumbnail = compat_urllib_parse_unquote(thumbnail)
|
||||
|
||||
view_count = self._extract_count(
|
||||
r'<span class="count">([\d,\.]+)</span> views', webpage, 'view')
|
||||
@@ -80,9 +81,9 @@ class PornHubIE(InfoExtractor):
|
||||
comment_count = self._extract_count(
|
||||
r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment')
|
||||
|
||||
video_urls = list(map(compat_urllib_parse.unquote, re.findall(r'"quality_[0-9]{3}p":"([^"]+)', webpage)))
|
||||
video_urls = list(map(compat_urllib_parse_unquote, re.findall(r'"quality_[0-9]{3}p":"([^"]+)', webpage)))
|
||||
if webpage.find('"encrypted":true') != -1:
|
||||
password = compat_urllib_parse.unquote_plus(
|
||||
password = compat_urllib_parse_unquote_plus(
|
||||
self._search_regex(r'"video_title":"([^"]+)', webpage, 'password'))
|
||||
video_urls = list(map(lambda s: aes_decrypt_text(s, password, 32).decode('utf-8'), video_urls))
|
||||
|
||||
|
||||
@@ -1,7 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
@@ -31,12 +29,7 @@ class PrimeShareTVIE(InfoExtractor):
|
||||
if '>File not exist<' in webpage:
|
||||
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
|
||||
|
||||
fields = dict(re.findall(r'''(?x)<input\s+
|
||||
type="hidden"\s+
|
||||
name="([^"]+)"\s+
|
||||
(?:id="[^"]+"\s+)?
|
||||
value="([^"]*)"
|
||||
''', webpage))
|
||||
fields = self._hidden_inputs(webpage)
|
||||
|
||||
headers = {
|
||||
'Referer': url,
|
||||
|
||||
@@ -35,10 +35,7 @@ class PromptFileIE(InfoExtractor):
|
||||
raise ExtractorError('Video %s does not exist' % video_id,
|
||||
expected=True)
|
||||
|
||||
fields = dict(re.findall(r'''(?x)type="hidden"\s+
|
||||
name="(.+?)"\s+
|
||||
value="(.*?)"
|
||||
''', webpage))
|
||||
fields = self._hidden_inputs(webpage)
|
||||
post = compat_urllib_parse.urlencode(fields)
|
||||
req = compat_urllib_request.Request(url, post)
|
||||
req.add_header('Content-type', 'application/x-www-form-urlencoded')
|
||||
|
||||
@@ -9,8 +9,11 @@ from ..compat import (
|
||||
compat_urllib_parse,
|
||||
)
|
||||
from ..utils import (
|
||||
unified_strdate,
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
@@ -21,6 +24,11 @@ class ProSiebenSat1IE(InfoExtractor):
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
# Tests changes introduced in https://github.com/rg3/youtube-dl/pull/6242
|
||||
# in response to fixing https://github.com/rg3/youtube-dl/issues/6215:
|
||||
# - malformed f4m manifest support
|
||||
# - proper handling of URLs starting with `https?://` in 2.0 manifests
|
||||
# - recursive child f4m manifests extraction
|
||||
'url': 'http://www.prosieben.de/tv/circus-halligalli/videos/218-staffel-2-episode-18-jahresrueckblick-ganze-folge',
|
||||
'info_dict': {
|
||||
'id': '2104602',
|
||||
@@ -208,7 +216,7 @@ class ProSiebenSat1IE(InfoExtractor):
|
||||
clip_id = self._html_search_regex(self._CLIPID_REGEXES, webpage, 'clip id')
|
||||
|
||||
access_token = 'prosieben'
|
||||
client_name = 'kolibri-1.12.6'
|
||||
client_name = 'kolibri-2.0.19-splec4'
|
||||
client_location = url
|
||||
|
||||
videos_api_url = 'http://vas.sim-technik.de/vas/live/v2/videos?%s' % compat_urllib_parse.urlencode({
|
||||
@@ -218,10 +226,13 @@ class ProSiebenSat1IE(InfoExtractor):
|
||||
'ids': clip_id,
|
||||
})
|
||||
|
||||
videos = self._download_json(videos_api_url, clip_id, 'Downloading videos JSON')
|
||||
video = self._download_json(videos_api_url, clip_id, 'Downloading videos JSON')[0]
|
||||
|
||||
duration = float(videos[0]['duration'])
|
||||
source_ids = [source['id'] for source in videos[0]['sources']]
|
||||
if video.get('is_protected') is True:
|
||||
raise ExtractorError('This video is DRM protected.', expected=True)
|
||||
|
||||
duration = float_or_none(video.get('duration'))
|
||||
source_ids = [source['id'] for source in video['sources']]
|
||||
source_ids_str = ','.join(map(str, source_ids))
|
||||
|
||||
g = '01!8d8F_)r9]4s[qeuXfP%'
|
||||
@@ -275,8 +286,9 @@ class ProSiebenSat1IE(InfoExtractor):
|
||||
|
||||
for source in urls_sources:
|
||||
protocol = source['protocol']
|
||||
source_url = source['url']
|
||||
if protocol == 'rtmp' or protocol == 'rtmpe':
|
||||
mobj = re.search(r'^(?P<url>rtmpe?://[^/]+)/(?P<path>.+)$', source['url'])
|
||||
mobj = re.search(r'^(?P<url>rtmpe?://[^/]+)/(?P<path>.+)$', source_url)
|
||||
if not mobj:
|
||||
continue
|
||||
path = mobj.group('path')
|
||||
@@ -293,9 +305,11 @@ class ProSiebenSat1IE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'format_id': '%s_%s' % (source['cdn'], source['bitrate']),
|
||||
})
|
||||
elif 'f4mgenerator' in source_url or determine_ext(source_url) == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(source_url, clip_id))
|
||||
else:
|
||||
formats.append({
|
||||
'url': source['url'],
|
||||
'url': source_url,
|
||||
'vbr': fix_bitrate(source['bitrate']),
|
||||
})
|
||||
|
||||
|
||||
@@ -16,6 +16,7 @@ from ..compat import compat_urllib_request
|
||||
|
||||
class QQMusicIE(InfoExtractor):
|
||||
IE_NAME = 'qqmusic'
|
||||
IE_DESC = 'QQ音乐'
|
||||
_VALID_URL = r'http://y.qq.com/#type=song&mid=(?P<id>[0-9A-Za-z]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://y.qq.com/#type=song&mid=004295Et37taLD',
|
||||
@@ -142,6 +143,7 @@ class QQPlaylistBaseIE(InfoExtractor):
|
||||
|
||||
class QQMusicSingerIE(QQPlaylistBaseIE):
|
||||
IE_NAME = 'qqmusic:singer'
|
||||
IE_DESC = 'QQ音乐 - 歌手'
|
||||
_VALID_URL = r'http://y.qq.com/#type=singer&mid=(?P<id>[0-9A-Za-z]+)'
|
||||
_TEST = {
|
||||
'url': 'http://y.qq.com/#type=singer&mid=001BLpXF2DyJe2',
|
||||
@@ -186,6 +188,7 @@ class QQMusicSingerIE(QQPlaylistBaseIE):
|
||||
|
||||
class QQMusicAlbumIE(QQPlaylistBaseIE):
|
||||
IE_NAME = 'qqmusic:album'
|
||||
IE_DESC = 'QQ音乐 - 专辑'
|
||||
_VALID_URL = r'http://y.qq.com/#type=album&mid=(?P<id>[0-9A-Za-z]+)'
|
||||
|
||||
_TESTS = [{
|
||||
@@ -228,6 +231,7 @@ class QQMusicAlbumIE(QQPlaylistBaseIE):
|
||||
|
||||
class QQMusicToplistIE(QQPlaylistBaseIE):
|
||||
IE_NAME = 'qqmusic:toplist'
|
||||
IE_DESC = 'QQ音乐 - 排行榜'
|
||||
_VALID_URL = r'http://y\.qq\.com/#type=toplist&p=(?P<id>(top|global)_[0-9]+)'
|
||||
|
||||
_TESTS = [{
|
||||
@@ -281,6 +285,7 @@ class QQMusicToplistIE(QQPlaylistBaseIE):
|
||||
|
||||
class QQMusicPlaylistIE(QQPlaylistBaseIE):
|
||||
IE_NAME = 'qqmusic:playlist'
|
||||
IE_DESC = 'QQ音乐 - 歌单'
|
||||
_VALID_URL = r'http://y\.qq\.com/#type=taoge&id=(?P<id>[0-9]+)'
|
||||
|
||||
_TEST = {
|
||||
|
||||
73
youtube_dl/extractor/rds.py
Normal file
73
youtube_dl/extractor/rds.py
Normal file
@@ -0,0 +1,73 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
class RDSIE(InfoExtractor):
|
||||
IE_DESC = 'RDS.ca'
|
||||
_VALID_URL = r'https?://(?:www\.)?rds\.ca/vid(?:[eé]|%C3%A9)os/(?:[^/]+/)*(?P<display_id>[^/]+)-(?P<id>\d+\.\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.rds.ca/videos/football/nfl/fowler-jr-prend-la-direction-de-jacksonville-3.1132799',
|
||||
'info_dict': {
|
||||
'id': '3.1132799',
|
||||
'display_id': 'fowler-jr-prend-la-direction-de-jacksonville',
|
||||
'ext': 'mp4',
|
||||
'title': 'Fowler Jr. prend la direction de Jacksonville',
|
||||
'description': 'Dante Fowler Jr. est le troisième choix du repêchage 2015 de la NFL. ',
|
||||
'timestamp': 1430397346,
|
||||
'upload_date': '20150430',
|
||||
'duration': 154.354,
|
||||
'age_limit': 0,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.rds.ca/vid%C3%A9os/un-voyage-positif-3.877934',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id')
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
# TODO: extract f4m from 9c9media.com
|
||||
video_url = self._search_regex(
|
||||
r'<span[^>]+itemprop="contentURL"[^>]+content="([^"]+)"',
|
||||
webpage, 'video url')
|
||||
|
||||
title = self._og_search_title(webpage) or self._html_search_meta(
|
||||
'title', webpage, 'title', fatal=True)
|
||||
description = self._og_search_description(webpage) or self._html_search_meta(
|
||||
'description', webpage, 'description')
|
||||
thumbnail = self._og_search_thumbnail(webpage) or self._search_regex(
|
||||
[r'<link[^>]+itemprop="thumbnailUrl"[^>]+href="([^"]+)"',
|
||||
r'<span[^>]+itemprop="thumbnailUrl"[^>]+content="([^"]+)"'],
|
||||
webpage, 'thumbnail', fatal=False)
|
||||
timestamp = parse_iso8601(self._search_regex(
|
||||
r'<span[^>]+itemprop="uploadDate"[^>]+content="([^"]+)"',
|
||||
webpage, 'upload date', fatal=False))
|
||||
duration = parse_duration(self._search_regex(
|
||||
r'<span[^>]+itemprop="duration"[^>]+content="([^"]+)"',
|
||||
webpage, 'duration', fatal=False))
|
||||
age_limit = self._family_friendly_search(webpage)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'timestamp': timestamp,
|
||||
'duration': duration,
|
||||
'age_limit': age_limit,
|
||||
}
|
||||
@@ -43,6 +43,21 @@ class RtlNlIE(InfoExtractor):
|
||||
'upload_date': '20150215',
|
||||
'description': 'Er zijn nieuwe beelden vrijgegeven die vlak na de aanslag in Kopenhagen zijn gemaakt. Op de video is goed te zien hoe omstanders zich bekommeren om één van de slachtoffers, terwijl de eerste agenten ter plaatse komen.',
|
||||
}
|
||||
}, {
|
||||
# empty synopsis and missing episodes (see https://github.com/rg3/youtube-dl/issues/6275)
|
||||
'url': 'http://www.rtl.nl/system/videoplayer/derden/rtlnieuws/video_embed.html#uuid=f536aac0-1dc3-4314-920e-3bd1c5b3811a/autoplay=false',
|
||||
'info_dict': {
|
||||
'id': 'f536aac0-1dc3-4314-920e-3bd1c5b3811a',
|
||||
'ext': 'mp4',
|
||||
'title': 'RTL Nieuws - Meer beelden van overval juwelier',
|
||||
'thumbnail': 're:^https?://screenshots\.rtl\.nl/system/thumb/sz=[0-9]+x[0-9]+/uuid=f536aac0-1dc3-4314-920e-3bd1c5b3811a$',
|
||||
'timestamp': 1437233400,
|
||||
'upload_date': '20150718',
|
||||
'duration': 30.474,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# encrypted m3u8 streams, georestricted
|
||||
'url': 'http://www.rtlxl.nl/#!/afl-2-257632/52a74543-c504-4cde-8aa8-ec66fe8d68a7',
|
||||
@@ -59,22 +74,29 @@ class RtlNlIE(InfoExtractor):
|
||||
uuid)
|
||||
|
||||
material = info['material'][0]
|
||||
progname = info['abstracts'][0]['name']
|
||||
subtitle = material['title'] or info['episodes'][0]['name']
|
||||
description = material.get('synopsis') or info['episodes'][0]['synopsis']
|
||||
title = info['abstracts'][0]['name']
|
||||
subtitle = material.get('title')
|
||||
if subtitle:
|
||||
title += ' - %s' % subtitle
|
||||
description = material.get('synopsis')
|
||||
|
||||
meta = info.get('meta', {})
|
||||
|
||||
# Use unencrypted m3u8 streams (See https://github.com/rg3/youtube-dl/issues/4118)
|
||||
# NB: nowadays, recent ffmpeg and avconv can handle these encrypted streams, so
|
||||
# this adaptive -> flash workaround is not required in general, but it also
|
||||
# allows bypassing georestriction therefore is retained for now.
|
||||
videopath = material['videopath'].replace('/adaptive/', '/flash/')
|
||||
# m3u8 streams are encrypted and may not be handled properly by older ffmpeg/avconv.
|
||||
# To workaround this previously adaptive -> flash trick was used to obtain
|
||||
# unencrypted m3u8 streams (see https://github.com/rg3/youtube-dl/issues/4118)
|
||||
# and bypass georestrictions as well.
|
||||
# Currently, unencrypted m3u8 playlists are (intentionally?) invalid and therefore
|
||||
# unusable albeit can be fixed by simple string replacement (see
|
||||
# https://github.com/rg3/youtube-dl/pull/6337)
|
||||
# Since recent ffmpeg and avconv handle encrypted streams just fine encrypted
|
||||
# streams are used now.
|
||||
videopath = material['videopath']
|
||||
m3u8_url = meta.get('videohost', 'http://manifest.us.rtl.nl') + videopath
|
||||
|
||||
formats = self._extract_m3u8_formats(m3u8_url, uuid, ext='mp4')
|
||||
|
||||
video_urlpart = videopath.split('/flash/')[1][:-5]
|
||||
video_urlpart = videopath.split('/adaptive/')[1][:-5]
|
||||
PG_URL_TEMPLATE = 'http://pg.us.rtl.nl/rtlxl/network/%s/progressive/%s.mp4'
|
||||
|
||||
formats.extend([
|
||||
@@ -107,7 +129,7 @@ class RtlNlIE(InfoExtractor):
|
||||
|
||||
return {
|
||||
'id': uuid,
|
||||
'title': '%s - %s' % (progname, subtitle),
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'timestamp': material['original_date'],
|
||||
'description': description,
|
||||
|
||||
@@ -19,7 +19,16 @@ from ..utils import (
|
||||
|
||||
class RTSIE(InfoExtractor):
|
||||
IE_DESC = 'RTS.ch'
|
||||
_VALID_URL = r'https?://(?:www\.)?rts\.ch/(?:(?:[^/]+/){2,}(?P<id>[0-9]+)-(?P<display_id>.+?)\.html|play/tv/[^/]+/video/(?P<display_id_new>.+?)\?id=(?P<id_new>[0-9]+))'
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:
|
||||
rts:(?P<rts_id>\d+)|
|
||||
https?://
|
||||
(?:www\.)?rts\.ch/
|
||||
(?:
|
||||
(?:[^/]+/){2,}(?P<id>[0-9]+)-(?P<display_id>.+?)\.html|
|
||||
play/tv/[^/]+/video/(?P<display_id_new>.+?)\?id=(?P<id_new>[0-9]+)
|
||||
)
|
||||
)'''
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
@@ -122,6 +131,15 @@ class RTSIE(InfoExtractor):
|
||||
'view_count': int,
|
||||
},
|
||||
},
|
||||
{
|
||||
# article with videos on rhs
|
||||
'url': 'http://www.rts.ch/sport/hockey/6693917-hockey-davos-decroche-son-31e-titre-de-champion-de-suisse.html',
|
||||
'info_dict': {
|
||||
'id': '6693917',
|
||||
'title': 'Hockey: Davos décroche son 31e titre de champion de Suisse',
|
||||
},
|
||||
'playlist_mincount': 5,
|
||||
},
|
||||
{
|
||||
'url': 'http://www.rts.ch/play/tv/le-19h30/video/le-chantier-du-nouveau-parlement-vaudois-a-permis-une-trouvaille-historique?id=6348280',
|
||||
'only_matching': True,
|
||||
@@ -130,7 +148,7 @@ class RTSIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
m = re.match(self._VALID_URL, url)
|
||||
video_id = m.group('id') or m.group('id_new')
|
||||
video_id = m.group('rts_id') or m.group('id') or m.group('id_new')
|
||||
display_id = m.group('display_id') or m.group('display_id_new')
|
||||
|
||||
def download_json(internal_id):
|
||||
@@ -143,6 +161,15 @@ class RTSIE(InfoExtractor):
|
||||
# video_id extracted out of URL is not always a real id
|
||||
if 'video' not in all_info and 'audio' not in all_info:
|
||||
page = self._download_webpage(url, display_id)
|
||||
|
||||
# article with videos on rhs
|
||||
videos = re.findall(
|
||||
r'<article[^>]+class="content-item"[^>]*>\s*<a[^>]+data-video-urn="urn:rts:video:(\d+)"',
|
||||
page)
|
||||
if videos:
|
||||
entries = [self.url_result('rts:%s' % video_urn, 'RTS') for video_urn in videos]
|
||||
return self.playlist_result(entries, video_id, self._og_search_title(page))
|
||||
|
||||
internal_id = self._html_search_regex(
|
||||
r'<(?:video|audio) data-id="([0-9]+)"', page,
|
||||
'internal video id')
|
||||
|
||||
@@ -1,17 +1,12 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
js_to_json,
|
||||
remove_end,
|
||||
)
|
||||
|
||||
|
||||
class SBSIE(InfoExtractor):
|
||||
IE_DESC = 'sbs.com.au'
|
||||
_VALID_URL = r'https?://(?:www\.)?sbs\.com\.au/ondemand/video/(?:single/)?(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?sbs\.com\.au/(?:ondemand|news)/video/(?:single/)?(?P<id>[0-9]+)'
|
||||
|
||||
_TESTS = [{
|
||||
# Original URL is handled by the generic IE which finds the iframe:
|
||||
@@ -21,39 +16,36 @@ class SBSIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '320403011771',
|
||||
'ext': 'mp4',
|
||||
'title': 'Dingo Conservation',
|
||||
'description': 'Dingoes are on the brink of extinction; most of the animals we think are dingoes are in fact crossbred with wild dogs. This family run a dingo conservation park to prevent their extinction',
|
||||
'title': 'Dingo Conservation (The Feed)',
|
||||
'description': 'md5:f250a9856fca50d22dec0b5b8015f8a5',
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
'duration': 308,
|
||||
},
|
||||
'add_ies': ['generic'],
|
||||
}, {
|
||||
'url': 'http://www.sbs.com.au/ondemand/video/320403011771/Dingo-Conservation-The-Feed',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.sbs.com.au/news/video/471395907773/The-Feed-July-9',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
webpage = self._download_webpage(
|
||||
'http://www.sbs.com.au/ondemand/video/single/%s?context=web' % video_id, video_id)
|
||||
|
||||
player = self._search_regex(
|
||||
r'(?s)playerParams\.releaseUrls\s*=\s*(\{.*?\n\});\n',
|
||||
webpage, 'player')
|
||||
player = re.sub(r"'\s*\+\s*[\da-zA-Z_]+\s*\+\s*'", '', player)
|
||||
player_params = self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?s)var\s+playerParams\s*=\s*({.+?});', webpage, 'playerParams'),
|
||||
video_id)
|
||||
|
||||
release_urls = self._parse_json(js_to_json(player), video_id)
|
||||
|
||||
theplatform_url = release_urls.get('progressive') or release_urls['standard']
|
||||
|
||||
title = remove_end(self._og_search_title(webpage), ' (The Feed)')
|
||||
description = self._html_search_meta('description', webpage)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
urls = player_params['releaseUrls']
|
||||
theplatform_url = (urls.get('progressive') or urls.get('standard') or
|
||||
urls.get('html') or player_params['relatedItemsURL'])
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
'url': theplatform_url,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import base64
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -35,8 +34,7 @@ class SharedIE(InfoExtractor):
|
||||
raise ExtractorError(
|
||||
'Video %s does not exist' % video_id, expected=True)
|
||||
|
||||
download_form = dict(re.findall(
|
||||
r'<input type="hidden" name="([^"]+)" value="([^"]*)"', webpage))
|
||||
download_form = self._hidden_inputs(webpage)
|
||||
request = compat_urllib_request.Request(
|
||||
url, compat_urllib_parse.urlencode(download_form))
|
||||
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
|
||||
@@ -23,6 +23,15 @@ class SnagFilmsEmbedIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': '#whilewewatch',
|
||||
}
|
||||
}, {
|
||||
# invalid labels, 360p is better that 480p
|
||||
'url': 'http://www.snagfilms.com/embed/player?filmId=17ca0950-a74a-11e0-a92a-0026bb61d036',
|
||||
'md5': '882fca19b9eb27ef865efeeaed376a48',
|
||||
'info_dict': {
|
||||
'id': '17ca0950-a74a-11e0-a92a-0026bb61d036',
|
||||
'ext': 'mp4',
|
||||
'title': 'Life in Limbo',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.snagfilms.com/embed/player?filmId=0000014c-de2f-d5d6-abcf-ffef58af0017',
|
||||
'only_matching': True,
|
||||
@@ -52,14 +61,15 @@ class SnagFilmsEmbedIE(InfoExtractor):
|
||||
if not file_:
|
||||
continue
|
||||
type_ = source.get('type')
|
||||
format_id = source.get('label')
|
||||
ext = determine_ext(file_)
|
||||
if any(_ == 'm3u8' for _ in (type_, ext)):
|
||||
format_id = source.get('label') or ext
|
||||
if all(v == 'm3u8' for v in (type_, ext)):
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
file_, video_id, 'mp4', m3u8_id='hls'))
|
||||
else:
|
||||
bitrate = int_or_none(self._search_regex(
|
||||
r'(\d+)kbps', file_, 'bitrate', default=None))
|
||||
[r'(\d+)kbps', r'_\d{1,2}x\d{1,2}_(\d{3,})\.%s' % ext],
|
||||
file_, 'bitrate', default=None))
|
||||
height = int_or_none(self._search_regex(
|
||||
r'^(\d+)[pP]$', format_id, 'height', default=None))
|
||||
formats.append({
|
||||
|
||||
@@ -282,9 +282,11 @@ class SoundcloudSetIE(SoundcloudIE):
|
||||
msgs = (compat_str(err['error_message']) for err in info['errors'])
|
||||
raise ExtractorError('unable to download video webpage: %s' % ','.join(msgs))
|
||||
|
||||
entries = [self.url_result(track['permalink_url'], 'Soundcloud') for track in info['tracks']]
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'entries': [self._extract_info_dict(track, secret_token=token) for track in info['tracks']],
|
||||
'entries': entries,
|
||||
'id': '%s' % info['id'],
|
||||
'title': info['title'],
|
||||
}
|
||||
@@ -379,9 +381,7 @@ class SoundcloudPlaylistIE(SoundcloudIE):
|
||||
data = self._download_json(
|
||||
base_url + data, playlist_id, 'Downloading playlist')
|
||||
|
||||
entries = [
|
||||
self._extract_info_dict(t, quiet=True, secret_token=token)
|
||||
for t in data['tracks']]
|
||||
entries = [self.url_result(track['permalink_url'], 'Soundcloud') for track in data['tracks']]
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
|
||||
@@ -4,7 +4,7 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
@@ -68,7 +68,7 @@ class SpankwireIE(InfoExtractor):
|
||||
webpage, 'comment count', fatal=False))
|
||||
|
||||
video_urls = list(map(
|
||||
compat_urllib_parse.unquote,
|
||||
compat_urllib_parse_unquote,
|
||||
re.findall(r'playerData\.cdnPath[0-9]{3,}\s*=\s*(?:encodeURIComponent\()?["\']([^"\']+)["\']', webpage)))
|
||||
if webpage.find('flashvars\.encrypted = "true"') != -1:
|
||||
password = self._search_regex(
|
||||
|
||||
@@ -9,7 +9,7 @@ from .spiegeltv import SpiegeltvIE
|
||||
|
||||
|
||||
class SpiegelIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<id>[0-9]+)(?:-embed)?(?:\.html)?(?:#.*)?$'
|
||||
_VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<id>[0-9]+)(?:-embed|-iframe)?(?:\.html)?(?:#.*)?$'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html',
|
||||
'md5': '2c2754212136f35fb4b19767d242f66e',
|
||||
@@ -39,6 +39,9 @@ class SpiegelIE(InfoExtractor):
|
||||
'description': 'SPIEGEL ONLINE-Nutzer durften den deutschen Astronauten Alexander Gerst über sein Leben auf der ISS-Station befragen. Hier kommen seine Antworten auf die besten sechs Fragen.',
|
||||
'title': 'Fragen an Astronaut Alexander Gerst: "Bekommen Sie die Tageszeiten mit?"',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.spiegel.de/video/astronaut-alexander-gerst-von-der-iss-station-beantwortet-fragen-video-1519126-iframe.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -8,17 +8,17 @@ from ..utils import parse_filesize
|
||||
|
||||
|
||||
class TagesschauIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?tagesschau\.de/multimedia/(?:sendung/ts|video/video)(?P<id>-?[0-9]+)\.html'
|
||||
_VALID_URL = r'https?://(?:www\.)?tagesschau\.de/multimedia/(?:[^/]+/)*?[^/#?]+?(?P<id>-?[0-9]+)(?:~_[^/#?]+?)?\.html'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.tagesschau.de/multimedia/video/video1399128.html',
|
||||
'md5': 'bcdeac2194fb296d599ce7929dfa4009',
|
||||
'url': 'http://www.tagesschau.de/multimedia/video/video-102143.html',
|
||||
'md5': '917a228bc7df7850783bc47979673a09',
|
||||
'info_dict': {
|
||||
'id': '1399128',
|
||||
'id': '102143',
|
||||
'ext': 'mp4',
|
||||
'title': 'Harald Range, Generalbundesanwalt, zu den Ermittlungen',
|
||||
'description': 'md5:69da3c61275b426426d711bde96463ab',
|
||||
'thumbnail': 're:^http:.*\.jpg$',
|
||||
'title': 'Regierungsumbildung in Athen: Neue Minister in Griechenland vereidigt',
|
||||
'description': 'md5:171feccd9d9b3dd54d05d501568f6359',
|
||||
'thumbnail': 're:^https?:.*\.jpg$',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.tagesschau.de/multimedia/sendung/ts-5727.html',
|
||||
@@ -28,8 +28,39 @@ class TagesschauIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:695c01bfd98b7e313c501386327aea59',
|
||||
'title': 'Sendung: tagesschau \t04.12.2014 20:00 Uhr',
|
||||
'thumbnail': 're:^http:.*\.jpg$',
|
||||
}
|
||||
'thumbnail': 're:^https?:.*\.jpg$',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.tagesschau.de/multimedia/politikimradio/audio-18407.html',
|
||||
'md5': 'aef45de271c4bf0a5db834aa40bf774c',
|
||||
'info_dict': {
|
||||
'id': '18407',
|
||||
'ext': 'mp3',
|
||||
'title': 'Flüchtlingsdebatte: Hitzig, aber wenig hilfreich',
|
||||
'description': 'Flüchtlingsdebatte: Hitzig, aber wenig hilfreich',
|
||||
'thumbnail': 're:^https?:.*\.jpg$',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.tagesschau.de/multimedia/sendung/tsg-3771.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.tagesschau.de/multimedia/sendung/tt-3827.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.tagesschau.de/multimedia/sendung/nm-3475.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.tagesschau.de/multimedia/sendung/weltspiegel-3167.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.tagesschau.de/multimedia/tsvorzwanzig-959.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.tagesschau.de/multimedia/sendung/bab/bab-3299~_bab-sendung-209.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.tagesschau.de/multimedia/video/video-102303~_bab-sendung-211.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_FORMATS = {
|
||||
@@ -49,19 +80,26 @@ class TagesschauIE(InfoExtractor):
|
||||
playerpage = self._download_webpage(
|
||||
player_url, display_id, 'Downloading player page')
|
||||
|
||||
medias = re.findall(
|
||||
r'"(http://media.+?)", type:"video/(.+?)", quality:"(.+?)"',
|
||||
playerpage)
|
||||
formats = []
|
||||
for url, ext, res in medias:
|
||||
for media in re.finditer(
|
||||
r'''(?x)
|
||||
(?P<q_url>["\'])(?P<url>http://media.+?)(?P=q_url)
|
||||
,\s*type:(?P<q_type>["\'])(?P<type>video|audio)/(?P<ext>.+?)(?P=q_type)
|
||||
(?:,\s*quality:(?P<q_quality>["\'])(?P<quality>.+?)(?P=q_quality))?
|
||||
''', playerpage):
|
||||
url = media.group('url')
|
||||
type_ = media.group('type')
|
||||
ext = media.group('ext')
|
||||
res = media.group('quality')
|
||||
f = {
|
||||
'format_id': res + '_' + ext,
|
||||
'format_id': '%s_%s' % (res, ext) if res else ext,
|
||||
'url': url,
|
||||
'ext': ext,
|
||||
'vcodec': 'none' if type_ == 'audio' else None,
|
||||
}
|
||||
f.update(self._FORMATS.get(res, {}))
|
||||
formats.append(f)
|
||||
thumbnail_fn = re.findall(r'"(/multimedia/.+?\.jpg)"', playerpage)[-1]
|
||||
thumbnail = self._og_search_thumbnail(playerpage)
|
||||
title = self._og_search_title(webpage).strip()
|
||||
description = self._og_search_description(webpage).strip()
|
||||
else:
|
||||
@@ -99,17 +137,14 @@ class TagesschauIE(InfoExtractor):
|
||||
'filesize_approx': parse_filesize(m.group('filesize_approx')),
|
||||
})
|
||||
formats.append(format)
|
||||
thumbnail_fn = self._search_regex(
|
||||
r'(?s)<img alt="Sendungsbild".*?src="([^"]+)"',
|
||||
webpage, 'thumbnail', fatal=False)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
description = self._html_search_regex(
|
||||
r'(?s)<p class="teasertext">(.*?)</p>',
|
||||
webpage, 'description', fatal=False)
|
||||
webpage, 'description', default=None)
|
||||
title = self._html_search_regex(
|
||||
r'<span class="headline".*?>(.*?)</span>', webpage, 'title')
|
||||
|
||||
self._sort_formats(formats)
|
||||
thumbnail = 'http://www.tagesschau.de' + thumbnail_fn
|
||||
|
||||
return {
|
||||
'id': display_id,
|
||||
|
||||
@@ -4,8 +4,6 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .pornhub import PornHubIE
|
||||
from .vimeo import VimeoIE
|
||||
|
||||
|
||||
class TumblrIE(InfoExtractor):
|
||||
@@ -60,26 +58,16 @@ class TumblrIE(InfoExtractor):
|
||||
blog = m_url.group('blog_name')
|
||||
|
||||
url = 'http://%s.tumblr.com/post/%s/' % (blog, video_id)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
vid_me_embed_url = self._search_regex(
|
||||
r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]',
|
||||
webpage, 'vid.me embed', default=None)
|
||||
if vid_me_embed_url is not None:
|
||||
return self.url_result(vid_me_embed_url, 'Vidme')
|
||||
|
||||
pornhub_url = PornHubIE._extract_url(webpage)
|
||||
if pornhub_url:
|
||||
return self.url_result(pornhub_url, 'PornHub')
|
||||
|
||||
vimeo_url = VimeoIE._extract_vimeo_url(url, webpage)
|
||||
if vimeo_url:
|
||||
return self.url_result(vimeo_url, 'Vimeo')
|
||||
webpage, urlh = self._download_webpage_handle(url, video_id)
|
||||
|
||||
iframe_url = self._search_regex(
|
||||
r'src=\'(https?://www\.tumblr\.com/video/[^\']+)\'',
|
||||
webpage, 'iframe url')
|
||||
iframe = self._download_webpage(iframe_url, video_id)
|
||||
webpage, 'iframe url', default=None)
|
||||
if iframe_url is None:
|
||||
return self.url_result(urlh.geturl(), 'Generic')
|
||||
|
||||
iframe = self._download_webpage(iframe_url, video_id,
|
||||
'Downloading iframe page')
|
||||
video_url = self._search_regex(r'<source src="([^"]+)"',
|
||||
iframe, 'video url')
|
||||
|
||||
|
||||
@@ -22,8 +22,8 @@ class TwitchBaseIE(InfoExtractor):
|
||||
|
||||
_API_BASE = 'https://api.twitch.tv'
|
||||
_USHER_BASE = 'http://usher.twitch.tv'
|
||||
_LOGIN_URL = 'https://secure.twitch.tv/user/login'
|
||||
_LOGIN_POST_URL = 'https://secure-login.twitch.tv/login'
|
||||
_LOGIN_URL = 'https://secure.twitch.tv/login'
|
||||
_LOGIN_POST_URL = 'https://passport.twitch.tv/authorize'
|
||||
_NETRC_MACHINE = 'twitch'
|
||||
|
||||
def _handle_error(self, response):
|
||||
@@ -59,20 +59,12 @@ class TwitchBaseIE(InfoExtractor):
|
||||
login_page = self._download_webpage(
|
||||
self._LOGIN_URL, None, 'Downloading login page')
|
||||
|
||||
authenticity_token = self._search_regex(
|
||||
r'<input name="authenticity_token" type="hidden" value="([^"]+)"',
|
||||
login_page, 'authenticity token')
|
||||
login_form = self._hidden_inputs(login_page)
|
||||
|
||||
login_form = {
|
||||
'utf8': '✓'.encode('utf-8'),
|
||||
'authenticity_token': authenticity_token,
|
||||
'redirect_on_login': '',
|
||||
'embed_form': 'false',
|
||||
'mp_source_action': 'login-button',
|
||||
'follow': '',
|
||||
'login': username,
|
||||
'password': password,
|
||||
}
|
||||
login_form.update({
|
||||
'login': username.encode('utf-8'),
|
||||
'password': password.encode('utf-8'),
|
||||
})
|
||||
|
||||
request = compat_urllib_request.Request(
|
||||
self._LOGIN_POST_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
|
||||
@@ -80,11 +72,15 @@ class TwitchBaseIE(InfoExtractor):
|
||||
response = self._download_webpage(
|
||||
request, None, 'Logging in as %s' % username)
|
||||
|
||||
m = re.search(
|
||||
r"id=([\"'])login_error_message\1[^>]*>(?P<msg>[^<]+)", response)
|
||||
if m:
|
||||
error_message = self._search_regex(
|
||||
r'<div[^>]+class="subwindow_notice"[^>]*>([^<]+)</div>',
|
||||
response, 'error message', default=None)
|
||||
if error_message:
|
||||
raise ExtractorError(
|
||||
'Unable to login: %s' % m.group('msg').strip(), expected=True)
|
||||
'Unable to login. Twitch said: %s' % error_message, expected=True)
|
||||
|
||||
if '>Reset your password<' in response:
|
||||
self.report_warning('Twitch asks you to reset your password, go to https://secure.twitch.tv/reset/submit')
|
||||
|
||||
def _prefer_source(self, formats):
|
||||
try:
|
||||
@@ -314,9 +310,9 @@ class TwitchBookmarksIE(TwitchPlaylistBaseIE):
|
||||
|
||||
class TwitchStreamIE(TwitchBaseIE):
|
||||
IE_NAME = 'twitch:stream'
|
||||
_VALID_URL = r'%s/(?P<id>[^/]+)/?(?:\#.*)?$' % TwitchBaseIE._VALID_URL_BASE
|
||||
_VALID_URL = r'%s/(?P<id>[^/#?]+)/?(?:\#.*)?$' % TwitchBaseIE._VALID_URL_BASE
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.twitch.tv/shroomztv',
|
||||
'info_dict': {
|
||||
'id': '12772022048',
|
||||
@@ -335,7 +331,10 @@ class TwitchStreamIE(TwitchBaseIE):
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.twitch.tv/miracle_doto#profile-0',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_id = self._match_id(url)
|
||||
@@ -350,6 +349,12 @@ class TwitchStreamIE(TwitchBaseIE):
|
||||
'http://www.twitch.tv/%s/profile' % channel_id,
|
||||
'TwitchProfile', channel_id)
|
||||
|
||||
# Channel name may be typed if different case than the original channel name
|
||||
# (e.g. http://www.twitch.tv/TWITCHPLAYSPOKEMON) that will lead to constructing
|
||||
# an invalid m3u8 URL. Working around by use of original channel name from stream
|
||||
# JSON and fallback to lowercase if it's not available.
|
||||
channel_id = stream.get('channel', {}).get('name') or channel_id.lower()
|
||||
|
||||
access_token = self._download_json(
|
||||
'%s/api/channels/%s/access_token' % (self._API_BASE, channel_id), channel_id,
|
||||
'Downloading channel access token')
|
||||
|
||||
@@ -15,7 +15,8 @@ from ..utils import (
|
||||
class UdemyIE(InfoExtractor):
|
||||
IE_NAME = 'udemy'
|
||||
_VALID_URL = r'https?://www\.udemy\.com/(?:[^#]+#/lecture/|lecture/view/?\?lectureId=)(?P<id>\d+)'
|
||||
_LOGIN_URL = 'https://www.udemy.com/join/login-submit/'
|
||||
_LOGIN_URL = 'https://www.udemy.com/join/login-popup/?displayType=ajax&showSkipButton=1'
|
||||
_ORIGIN_URL = 'https://www.udemy.com'
|
||||
_NETRC_MACHINE = 'udemy'
|
||||
|
||||
_TESTS = [{
|
||||
@@ -74,29 +75,36 @@ class UdemyIE(InfoExtractor):
|
||||
expected=True)
|
||||
|
||||
login_popup = self._download_webpage(
|
||||
'https://www.udemy.com/join/login-popup?displayType=ajax&showSkipButton=1', None,
|
||||
'Downloading login popup')
|
||||
self._LOGIN_URL, None, 'Downloading login popup')
|
||||
|
||||
if login_popup == '<div class="run-command close-popup redirect" data-url="https://www.udemy.com/"></div>':
|
||||
def is_logged(webpage):
|
||||
return any(p in webpage for p in ['href="https://www.udemy.com/user/logout/', '>Logout<'])
|
||||
|
||||
# already logged in
|
||||
if is_logged(login_popup):
|
||||
return
|
||||
|
||||
csrf = self._html_search_regex(
|
||||
r'<input type="hidden" name="csrf" value="(.+?)"',
|
||||
login_popup, 'csrf token')
|
||||
login_form = self._form_hidden_inputs('login-form', login_popup)
|
||||
|
||||
login_form.update({
|
||||
'email': username.encode('utf-8'),
|
||||
'password': password.encode('utf-8'),
|
||||
})
|
||||
|
||||
login_form = {
|
||||
'email': username,
|
||||
'password': password,
|
||||
'csrf': csrf,
|
||||
'displayType': 'json',
|
||||
'isSubmitted': '1',
|
||||
}
|
||||
request = compat_urllib_request.Request(
|
||||
self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
|
||||
response = self._download_json(
|
||||
request.add_header('Referer', self._ORIGIN_URL)
|
||||
request.add_header('Origin', self._ORIGIN_URL)
|
||||
|
||||
response = self._download_webpage(
|
||||
request, None, 'Logging in as %s' % username)
|
||||
|
||||
if 'returnUrl' not in response:
|
||||
if not is_logged(response):
|
||||
error = self._html_search_regex(
|
||||
r'(?s)<div[^>]+class="form-errors[^"]*">(.+?)</div>',
|
||||
response, 'error message', default=None)
|
||||
if error:
|
||||
raise ExtractorError('Unable to login: %s' % error, expected=True)
|
||||
raise ExtractorError('Unable to log in')
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -11,6 +11,7 @@ from ..compat import compat_urlparse
|
||||
|
||||
|
||||
class UDNEmbedIE(InfoExtractor):
|
||||
IE_DESC = '聯合影音'
|
||||
_VALID_URL = r'https?://video\.udn\.com/(?:embed|play)/news/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://video.udn.com/embed/news/300040',
|
||||
|
||||
@@ -5,6 +5,7 @@ import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
@@ -76,7 +77,7 @@ class VeeHDIE(InfoExtractor):
|
||||
|
||||
if config_json:
|
||||
config = json.loads(config_json)
|
||||
video_url = compat_urlparse.unquote(config['clip']['url'])
|
||||
video_url = compat_urllib_parse_unquote(config['clip']['url'])
|
||||
|
||||
if not video_url:
|
||||
video_url = self._html_search_regex(
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .ooyala import OoyalaIE
|
||||
@@ -7,25 +6,29 @@ from ..utils import ExtractorError
|
||||
|
||||
|
||||
class ViceIE(InfoExtractor):
|
||||
_VALID_URL = r'http://www\.vice\.com/.*?/(?P<name>.+)'
|
||||
_VALID_URL = r'https?://(?:.+?\.)?vice\.com/(?:[^/]+/)+(?P<id>.+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.vice.com/Fringes/cowboy-capitalists-part-1',
|
||||
'info_dict': {
|
||||
'id': '43cW1mYzpia9IlestBjVpd23Yu3afAfp',
|
||||
'ext': 'mp4',
|
||||
'title': 'VICE_COWBOYCAPITALISTS_PART01_v1_VICE_WM_1080p.mov',
|
||||
},
|
||||
'params': {
|
||||
# Requires ffmpeg (m3u8 manifest)
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.vice.com/Fringes/cowboy-capitalists-part-1',
|
||||
'info_dict': {
|
||||
'id': '43cW1mYzpia9IlestBjVpd23Yu3afAfp',
|
||||
'ext': 'mp4',
|
||||
'title': 'VICE_COWBOYCAPITALISTS_PART01_v1_VICE_WM_1080p.mov',
|
||||
},
|
||||
'params': {
|
||||
# Requires ffmpeg (m3u8 manifest)
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://news.vice.com/video/experimenting-on-animals-inside-the-monkey-lab',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
name = mobj.group('name')
|
||||
webpage = self._download_webpage(url, name)
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
try:
|
||||
embed_code = self._search_regex(
|
||||
r'embedCode=([^&\'"]+)', webpage,
|
||||
|
||||
@@ -8,20 +8,23 @@ from ..compat import compat_urllib_request
|
||||
|
||||
|
||||
class VideoMegaIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)https?://
|
||||
(?:www\.)?videomega\.tv/
|
||||
(?:iframe\.php|cdn\.php)?\?ref=(?P<id>[A-Za-z0-9]+)
|
||||
'''
|
||||
_TEST = {
|
||||
'url': 'http://videomega.tv/?ref=4GNA688SU99US886ANG4',
|
||||
'md5': 'bf5c2f95c4c917536e80936af7bc51e1',
|
||||
_VALID_URL = r'(?:videomega:|https?://(?:www\.)?videomega\.tv/(?:(?:view|iframe|cdn)\.php)?\?ref=)(?P<id>[A-Za-z0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://videomega.tv/cdn.php?ref=AOSQBJYKIDDIKYJBQSOA',
|
||||
'md5': 'cc1920a58add3f05c6a93285b84fb3aa',
|
||||
'info_dict': {
|
||||
'id': '4GNA688SU99US886ANG4',
|
||||
'id': 'AOSQBJYKIDDIKYJBQSOA',
|
||||
'ext': 'mp4',
|
||||
'title': 'BigBuckBunny_320x180',
|
||||
'title': '1254207',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'http://videomega.tv/cdn.php?ref=AOSQBJYKIDDIKYJBQSOA&width=1070&height=600',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://videomega.tv/view.php?ref=090051111052065112106089103052052103089106112065052111051090',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
@@ -29,12 +32,13 @@ class VideoMegaIE(InfoExtractor):
|
||||
iframe_url = 'http://videomega.tv/cdn.php?ref=%s' % video_id
|
||||
req = compat_urllib_request.Request(iframe_url)
|
||||
req.add_header('Referer', url)
|
||||
req.add_header('Cookie', 'noadvtday=0')
|
||||
webpage = self._download_webpage(req, video_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<title>(.*?)</title>', webpage, 'title')
|
||||
r'<title>(.+?)</title>', webpage, 'title')
|
||||
title = re.sub(
|
||||
r'(?:^[Vv]ideo[Mm]ega\.tv\s-\s?|\s?-\svideomega\.tv$)', '', title)
|
||||
r'(?:^[Vv]ideo[Mm]ega\.tv\s-\s*|\s*-\svideomega\.tv$)', '', title)
|
||||
thumbnail = self._search_regex(
|
||||
r'<video[^>]+?poster="([^"]+)"', webpage, 'thumbnail', fatal=False)
|
||||
video_url = self._search_regex(
|
||||
|
||||
@@ -1,129 +1,137 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_request
|
||||
from ..compat import (
|
||||
compat_urllib_request,
|
||||
compat_urllib_parse,
|
||||
)
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
class ViewsterIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?viewster\.com/movie/(?P<id>\d+-\d+-\d+)'
|
||||
_VALID_URL = r'http://(?:www\.)?viewster\.com/(?:serie|movie)/(?P<id>\d+-\d+-\d+)'
|
||||
_TESTS = [{
|
||||
# movielink, paymethod=fre
|
||||
'url': 'http://www.viewster.com/movie/1293-19341-000/hout-wood/',
|
||||
'playlist': [{
|
||||
'md5': '8f9d94b282d80c42b378dffdbb11caf3',
|
||||
'info_dict': {
|
||||
'id': '1293-19341-000-movie',
|
||||
'ext': 'flv',
|
||||
'title': "'Hout' (Wood) - Movie",
|
||||
},
|
||||
}],
|
||||
'info_dict': {
|
||||
'id': '1293-19341-000',
|
||||
'title': "'Hout' (Wood)",
|
||||
'description': 'md5:925733185a9242ef96f436937683f33b',
|
||||
}
|
||||
}, {
|
||||
# movielink, paymethod=adv
|
||||
# movie, Type=Movie
|
||||
'url': 'http://www.viewster.com/movie/1140-11855-000/the-listening-project/',
|
||||
'playlist': [{
|
||||
'md5': '77a005453ca7396cbe3d35c9bea30aef',
|
||||
'info_dict': {
|
||||
'id': '1140-11855-000-movie',
|
||||
'ext': 'flv',
|
||||
'title': "THE LISTENING PROJECT - Movie",
|
||||
},
|
||||
}],
|
||||
'md5': '14d3cfffe66d57b41ae2d9c873416f01',
|
||||
'info_dict': {
|
||||
'id': '1140-11855-000',
|
||||
'title': "THE LISTENING PROJECT",
|
||||
'description': 'md5:714421ae9957e112e672551094bf3b08',
|
||||
}
|
||||
'ext': 'flv',
|
||||
'title': 'The listening Project',
|
||||
'description': 'md5:bac720244afd1a8ea279864e67baa071',
|
||||
'timestamp': 1214870400,
|
||||
'upload_date': '20080701',
|
||||
'duration': 4680,
|
||||
},
|
||||
}, {
|
||||
# direct links, no movielink
|
||||
'url': 'http://www.viewster.com/movie/1198-56411-000/sinister/',
|
||||
'playlist': [{
|
||||
'md5': '0307b7eac6bfb21ab0577a71f6eebd8f',
|
||||
'info_dict': {
|
||||
'id': '1198-56411-000-trailer',
|
||||
'ext': 'mp4',
|
||||
'title': "Sinister - Trailer",
|
||||
},
|
||||
}, {
|
||||
'md5': '80b9ee3ad69fb368f104cb5d9732ae95',
|
||||
'info_dict': {
|
||||
'id': '1198-56411-000-behind-scenes',
|
||||
'ext': 'mp4',
|
||||
'title': "Sinister - Behind Scenes",
|
||||
},
|
||||
}, {
|
||||
'md5': '3b3ea897ecaa91fca57a8a94ac1b15c5',
|
||||
'info_dict': {
|
||||
'id': '1198-56411-000-scene-from-movie',
|
||||
'ext': 'mp4',
|
||||
'title': "Sinister - Scene from movie",
|
||||
},
|
||||
}],
|
||||
# series episode, Type=Episode
|
||||
'url': 'http://www.viewster.com/serie/1284-19427-001/the-world-and-a-wall/',
|
||||
'md5': 'd5434c80fcfdb61651cc2199a88d6ba3',
|
||||
'info_dict': {
|
||||
'id': '1198-56411-000',
|
||||
'title': "Sinister",
|
||||
'description': 'md5:014c40b0488848de9683566a42e33372',
|
||||
}
|
||||
'id': '1284-19427-001',
|
||||
'ext': 'flv',
|
||||
'title': 'The World and a Wall',
|
||||
'description': 'md5:24814cf74d3453fdf5bfef9716d073e3',
|
||||
'timestamp': 1428192000,
|
||||
'upload_date': '20150405',
|
||||
'duration': 1500,
|
||||
},
|
||||
}, {
|
||||
# serie, Type=Serie
|
||||
'url': 'http://www.viewster.com/serie/1303-19426-000/',
|
||||
'info_dict': {
|
||||
'id': '1303-19426-000',
|
||||
'title': 'Is It Wrong to Try to Pick up Girls in a Dungeon?',
|
||||
'description': 'md5:eeda9bef25b0d524b3a29a97804c2f11',
|
||||
},
|
||||
'playlist_count': 13,
|
||||
}, {
|
||||
# unfinished serie, no Type
|
||||
'url': 'http://www.viewster.com/serie/1284-19427-000/baby-steps-season-2/',
|
||||
'info_dict': {
|
||||
'id': '1284-19427-000',
|
||||
'title': 'Baby Steps—Season 2',
|
||||
'description': 'md5:e7097a8fc97151e25f085c9eb7a1cdb1',
|
||||
},
|
||||
'playlist_mincount': 16,
|
||||
}]
|
||||
|
||||
_ACCEPT_HEADER = 'application/json, text/javascript, */*; q=0.01'
|
||||
_AUTH_TOKEN = '/YqhSYsx8EaU9Bsta3ojlA=='
|
||||
|
||||
def _download_json(self, url, video_id, note='Downloading JSON metadata', fatal=True):
|
||||
request = compat_urllib_request.Request(url)
|
||||
request.add_header('Accept', self._ACCEPT_HEADER)
|
||||
request.add_header('Auth-token', self._AUTH_TOKEN)
|
||||
return super(ViewsterIE, self)._download_json(request, video_id, note, fatal=fatal)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
request = compat_urllib_request.Request(
|
||||
'http://api.live.viewster.com/api/v1/movie/%s' % video_id)
|
||||
request.add_header('Accept', self._ACCEPT_HEADER)
|
||||
info = self._download_json(
|
||||
'https://public-api.viewster.com/search/%s' % video_id,
|
||||
video_id, 'Downloading entry JSON')
|
||||
|
||||
movie = self._download_json(
|
||||
request, video_id, 'Downloading movie metadata JSON')
|
||||
entry_id = info.get('Id') or info['id']
|
||||
|
||||
title = movie.get('title') or movie['original_title']
|
||||
description = movie.get('synopsis')
|
||||
thumbnail = movie.get('large_artwork') or movie.get('artwork')
|
||||
# unfinished serie has no Type
|
||||
if info.get('Type') in ['Serie', None]:
|
||||
episodes = self._download_json(
|
||||
'https://public-api.viewster.com/series/%s/episodes' % entry_id,
|
||||
video_id, 'Downloading series JSON')
|
||||
entries = [
|
||||
self.url_result(
|
||||
'http://www.viewster.com/movie/%s' % episode['OriginId'], 'Viewster')
|
||||
for episode in episodes]
|
||||
title = (info.get('Title') or info['Synopsis']['Title']).strip()
|
||||
description = info.get('Synopsis', {}).get('Detailed')
|
||||
return self.playlist_result(entries, video_id, title, description)
|
||||
|
||||
entries = []
|
||||
for clip in movie['play_list']:
|
||||
entry = None
|
||||
|
||||
# movielink api
|
||||
link_request = clip.get('link_request')
|
||||
if link_request:
|
||||
request = compat_urllib_request.Request(
|
||||
'http://api.live.viewster.com/api/v1/movielink?movieid=%(movieid)s&action=%(action)s&paymethod=%(paymethod)s&price=%(price)s¤cy=%(currency)s&language=%(language)s&subtitlelanguage=%(subtitlelanguage)s&ischromecast=%(ischromecast)s'
|
||||
% link_request)
|
||||
request.add_header('Accept', self._ACCEPT_HEADER)
|
||||
|
||||
movie_link = self._download_json(
|
||||
request, video_id, 'Downloading movie link JSON', fatal=False)
|
||||
|
||||
if movie_link:
|
||||
formats = self._extract_f4m_formats(
|
||||
movie_link['url'] + '&hdcore=3.2.0&plugin=flowplayer-3.2.0.1', video_id)
|
||||
self._sort_formats(formats)
|
||||
entry = {
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
# direct link
|
||||
clip_url = clip.get('clip_data', {}).get('url')
|
||||
if clip_url:
|
||||
entry = {
|
||||
'url': clip_url,
|
||||
'ext': 'mp4',
|
||||
}
|
||||
|
||||
if entry:
|
||||
entry.update({
|
||||
'id': '%s-%s' % (video_id, clip['canonical_title']),
|
||||
'title': '%s - %s' % (title, clip['title']),
|
||||
formats = []
|
||||
for media_type in ('application/f4m+xml', 'application/x-mpegURL'):
|
||||
media = self._download_json(
|
||||
'https://public-api.viewster.com/movies/%s/video?mediaType=%s'
|
||||
% (entry_id, compat_urllib_parse.quote(media_type)),
|
||||
video_id, 'Downloading %s JSON' % media_type, fatal=False)
|
||||
if not media:
|
||||
continue
|
||||
video_url = media.get('Uri')
|
||||
if not video_url:
|
||||
continue
|
||||
ext = determine_ext(video_url)
|
||||
if ext == 'f4m':
|
||||
video_url += '&' if '?' in video_url else '?'
|
||||
video_url += 'hdcore=3.2.0&plugin=flowplayer-3.2.0.1'
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
video_url, video_id, f4m_id='hds'))
|
||||
elif ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
video_url, video_id, 'mp4', m3u8_id='hls',
|
||||
fatal=False # m3u8 sometimes fail
|
||||
))
|
||||
else:
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
})
|
||||
entries.append(entry)
|
||||
self._sort_formats(formats)
|
||||
|
||||
playlist = self.playlist_result(entries, video_id, title, description)
|
||||
playlist['thumbnail'] = thumbnail
|
||||
return playlist
|
||||
synopsis = info.get('Synopsis', {})
|
||||
# Prefer title outside synopsis since it's less messy
|
||||
title = (info.get('Title') or synopsis['Title']).strip()
|
||||
description = synopsis.get('Detailed') or info.get('Synopsis', {}).get('Short')
|
||||
duration = int_or_none(info.get('Duration'))
|
||||
timestamp = parse_iso8601(info.get('ReleaseDate'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'timestamp': timestamp,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
@@ -88,6 +88,14 @@ class VikiBaseIE(InfoExtractor):
|
||||
if not self._token:
|
||||
self.report_warning('Unable to get session token, login has probably failed')
|
||||
|
||||
@staticmethod
|
||||
def dict_selection(dict_obj, preferred_key):
|
||||
if preferred_key in dict_obj:
|
||||
return dict_obj.get(preferred_key)
|
||||
|
||||
filtered_dict = list(filter(None, [dict_obj.get(k) for k in dict_obj.keys()]))
|
||||
return filtered_dict[0] if filtered_dict else None
|
||||
|
||||
|
||||
class VikiIE(VikiBaseIE):
|
||||
IE_NAME = 'viki'
|
||||
@@ -173,6 +181,19 @@ class VikiIE(VikiBaseIE):
|
||||
}, {
|
||||
'url': 'http://www.viki.com/player/44699v',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# non-English description
|
||||
'url': 'http://www.viki.com/videos/158036v-love-in-magic',
|
||||
'md5': '1713ae35df5a521b31f6dc40730e7c9c',
|
||||
'info_dict': {
|
||||
'id': '158036v',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'I Planet Entertainment',
|
||||
'upload_date': '20111122',
|
||||
'timestamp': 1321985454,
|
||||
'description': 'md5:44b1e46619df3a072294645c770cef36',
|
||||
'title': 'Love In Magic',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -181,19 +202,14 @@ class VikiIE(VikiBaseIE):
|
||||
video = self._call_api(
|
||||
'videos/%s.json' % video_id, video_id, 'Downloading video JSON')
|
||||
|
||||
title = None
|
||||
titles = video.get('titles')
|
||||
if titles:
|
||||
title = titles.get('en') or titles[titles.keys()[0]]
|
||||
title = self.dict_selection(video.get('titles', {}), 'en')
|
||||
if not title:
|
||||
title = 'Episode %d' % video.get('number') if video.get('type') == 'episode' else video.get('id') or video_id
|
||||
container_titles = video.get('container', {}).get('titles')
|
||||
if container_titles:
|
||||
container_title = container_titles.get('en') or container_titles[container_titles.keys()[0]]
|
||||
title = '%s - %s' % (container_title, title)
|
||||
container_titles = video.get('container', {}).get('titles', {})
|
||||
container_title = self.dict_selection(container_titles, 'en')
|
||||
title = '%s - %s' % (container_title, title)
|
||||
|
||||
descriptions = video.get('descriptions')
|
||||
description = descriptions.get('en') or descriptions[titles.keys()[0]] if descriptions else None
|
||||
description = self.dict_selection(video.get('descriptions', {}), 'en')
|
||||
|
||||
duration = int_or_none(video.get('duration'))
|
||||
timestamp = parse_iso8601(video.get('created_at'))
|
||||
@@ -242,8 +258,8 @@ class VikiIE(VikiBaseIE):
|
||||
|
||||
formats = []
|
||||
for format_id, stream_dict in streams.items():
|
||||
height = self._search_regex(
|
||||
r'^(\d+)[pP]$', format_id, 'height', default=None)
|
||||
height = int_or_none(self._search_regex(
|
||||
r'^(\d+)[pP]$', format_id, 'height', default=None))
|
||||
for protocol, format_dict in stream_dict.items():
|
||||
if format_id == 'm3u8':
|
||||
formats = self._extract_m3u8_formats(
|
||||
@@ -299,11 +315,9 @@ class VikiChannelIE(VikiBaseIE):
|
||||
'containers/%s.json' % channel_id, channel_id,
|
||||
'Downloading channel JSON')
|
||||
|
||||
titles = channel['titles']
|
||||
title = titles.get('en') or titles[titles.keys()[0]]
|
||||
title = self.dict_selection(channel['titles'], 'en')
|
||||
|
||||
descriptions = channel['descriptions']
|
||||
description = descriptions.get('en') or descriptions[descriptions.keys()[0]]
|
||||
description = self.dict_selection(channel['descriptions'], 'en')
|
||||
|
||||
entries = []
|
||||
for video_type in ('episodes', 'clips', 'movies'):
|
||||
|
||||
@@ -452,11 +452,7 @@ class VimeoChannelIE(InfoExtractor):
|
||||
password = self._downloader.params.get('videopassword', None)
|
||||
if password is None:
|
||||
raise ExtractorError('This album is protected by a password, use the --video-password option', expected=True)
|
||||
fields = dict(re.findall(r'''(?x)<input\s+
|
||||
type="hidden"\s+
|
||||
name="([^"]+)"\s+
|
||||
value="([^"]*)"
|
||||
''', login_form))
|
||||
fields = self._hidden_inputs(login_form)
|
||||
token = self._search_regex(r'xsrft[\s=:"\']+([^"\']+)', webpage, 'login token')
|
||||
fields['token'] = token
|
||||
fields['password'] = password
|
||||
|
||||
@@ -4,7 +4,29 @@ from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class VimpleIE(InfoExtractor):
|
||||
class SprutoBaseIE(InfoExtractor):
|
||||
def _extract_spruto(self, spruto, video_id):
|
||||
playlist = spruto['playlist'][0]
|
||||
title = playlist['title']
|
||||
video_id = playlist.get('videoId') or video_id
|
||||
thumbnail = playlist.get('posterUrl') or playlist.get('thumbnailUrl')
|
||||
duration = int_or_none(playlist.get('duration'))
|
||||
|
||||
formats = [{
|
||||
'url': f['url'],
|
||||
} for f in playlist['video']]
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class VimpleIE(SprutoBaseIE):
|
||||
IE_DESC = 'Vimple - one-click video hosting'
|
||||
_VALID_URL = r'https?://(?:player\.vimple\.ru/iframe|vimple\.ru)/(?P<id>[\da-f-]{32,36})'
|
||||
_TESTS = [
|
||||
@@ -30,25 +52,9 @@ class VimpleIE(InfoExtractor):
|
||||
webpage = self._download_webpage(
|
||||
'http://player.vimple.ru/iframe/%s' % video_id, video_id)
|
||||
|
||||
playlist = self._parse_json(
|
||||
spruto = self._parse_json(
|
||||
self._search_regex(
|
||||
r'sprutoData\s*:\s*({.+?}),\r\n', webpage, 'spruto data'),
|
||||
video_id)['playlist'][0]
|
||||
video_id)
|
||||
|
||||
title = playlist['title']
|
||||
video_id = playlist.get('videoId') or video_id
|
||||
thumbnail = playlist.get('posterUrl') or playlist.get('thumbnailUrl')
|
||||
duration = int_or_none(playlist.get('duration'))
|
||||
|
||||
formats = [{
|
||||
'url': f['url'],
|
||||
} for f in playlist['video']]
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
||||
return self._extract_spruto(spruto, video_id)
|
||||
|
||||
@@ -20,8 +20,19 @@ from ..utils import (
|
||||
|
||||
|
||||
class VKIE(InfoExtractor):
|
||||
IE_NAME = 'vk.com'
|
||||
_VALID_URL = r'https?://(?:m\.)?vk\.com/(?:video_ext\.php\?.*?\boid=(?P<oid>-?\d+).*?\bid=(?P<id>\d+)|(?:.+?\?.*?z=)?video(?P<videoid>[^s].*?)(?:\?|%2F|$))'
|
||||
IE_NAME = 'vk'
|
||||
IE_DESC = 'VK'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
(?:m\.)?vk\.com/video_ext\.php\?.*?\boid=(?P<oid>-?\d+).*?\bid=(?P<id>\d+)|
|
||||
(?:
|
||||
(?:m\.)?vk\.com/(?:.+?\?.*?z=)?video|
|
||||
(?:www\.)?biqle\.ru/watch/
|
||||
)
|
||||
(?P<videoid>[^s].*?)(?:\?(?:.*\blist=(?P<list_id>[\da-f]+))?|%2F|$)
|
||||
)
|
||||
'''
|
||||
_NETRC_MACHINE = 'vk'
|
||||
|
||||
_TESTS = [
|
||||
@@ -109,11 +120,50 @@ class VKIE(InfoExtractor):
|
||||
},
|
||||
'skip': 'Only works from Russia',
|
||||
},
|
||||
{
|
||||
# video (removed?) only available with list id
|
||||
'url': 'https://vk.com/video30481095_171201961?list=8764ae2d21f14088d4',
|
||||
'md5': '091287af5402239a1051c37ec7b92913',
|
||||
'info_dict': {
|
||||
'id': '171201961',
|
||||
'ext': 'mp4',
|
||||
'title': 'ТюменцевВВ_09.07.2015',
|
||||
'uploader': 'Anton Ivanov',
|
||||
'duration': 109,
|
||||
'upload_date': '20150709',
|
||||
'view_count': int,
|
||||
},
|
||||
},
|
||||
{
|
||||
# youtube embed
|
||||
'url': 'https://vk.com/video276849682_170681728',
|
||||
'info_dict': {
|
||||
'id': 'V3K4mi0SYkc',
|
||||
'ext': 'mp4',
|
||||
'title': "DSWD Awards 'Children's Joy Foundation, Inc.' Certificate of Registration and License to Operate",
|
||||
'description': 'md5:bf9c26cfa4acdfb146362682edd3827a',
|
||||
'duration': 179,
|
||||
'upload_date': '20130116',
|
||||
'uploader': "Children's Joy Foundation",
|
||||
'uploader_id': 'thecjf',
|
||||
'view_count': int,
|
||||
},
|
||||
},
|
||||
{
|
||||
# removed video, just testing that we match the pattern
|
||||
'url': 'http://vk.com/feed?z=video-43215063_166094326%2Fbb50cacd3177146d7a',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
# age restricted video, requires vk account credentials
|
||||
'url': 'https://vk.com/video205387401_164765225',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
# vk wrapper
|
||||
'url': 'http://www.biqle.ru/watch/847655_160197695',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
|
||||
def _login(self):
|
||||
@@ -124,9 +174,7 @@ class VKIE(InfoExtractor):
|
||||
login_page = self._download_webpage(
|
||||
'https://vk.com', None, 'Downloading login page')
|
||||
|
||||
login_form = dict(re.findall(
|
||||
r'<input\s+type="hidden"\s+name="([^"]+)"\s+(?:id="[^"]+"\s+)?value="([^"]*)"',
|
||||
login_page))
|
||||
login_form = self._hidden_inputs(login_page)
|
||||
|
||||
login_form.update({
|
||||
'email': username.encode('cp1251'),
|
||||
@@ -154,8 +202,20 @@ class VKIE(InfoExtractor):
|
||||
video_id = '%s_%s' % (mobj.group('oid'), mobj.group('id'))
|
||||
|
||||
info_url = 'https://vk.com/al_video.php?act=show&al=1&module=video&video=%s' % video_id
|
||||
|
||||
# Some videos (removed?) can only be downloaded with list id specified
|
||||
list_id = mobj.group('list_id')
|
||||
if list_id:
|
||||
info_url += '&list=%s' % list_id
|
||||
|
||||
info_page = self._download_webpage(info_url, video_id)
|
||||
|
||||
error_message = self._html_search_regex(
|
||||
r'(?s)<!><div[^>]+class="video_layer_message"[^>]*>(.+?)</div>',
|
||||
info_page, 'error message', default=None)
|
||||
if error_message:
|
||||
raise ExtractorError(error_message, expected=True)
|
||||
|
||||
if re.search(r'<!>/login\.php\?.*\bact=security_check', info_page):
|
||||
raise ExtractorError(
|
||||
'You are trying to log in from an unusual location. You should confirm ownership at vk.com to log in with this IP.',
|
||||
@@ -174,16 +234,20 @@ class VKIE(InfoExtractor):
|
||||
|
||||
r'<!>Видео временно недоступно':
|
||||
'Video %s is temporarily unavailable.',
|
||||
|
||||
r'<!>Access denied':
|
||||
'Access denied to video %s.',
|
||||
}
|
||||
|
||||
for error_re, error_msg in ERRORS.items():
|
||||
if re.search(error_re, info_page):
|
||||
raise ExtractorError(error_msg % video_id, expected=True)
|
||||
|
||||
m_yt = re.search(r'src="(http://www.youtube.com/.*?)"', info_page)
|
||||
if m_yt is not None:
|
||||
self.to_screen('Youtube video detected')
|
||||
return self.url_result(m_yt.group(1), 'Youtube')
|
||||
youtube_url = self._search_regex(
|
||||
r'<iframe[^>]+src="((?:https?:)?//www.youtube.com/embed/[^"]+)"',
|
||||
info_page, 'youtube iframe', default=None)
|
||||
if youtube_url:
|
||||
return self.url_result(youtube_url, 'Youtube')
|
||||
|
||||
m_rutube = re.search(
|
||||
r'\ssrc="((?:https?:)?//rutube\.ru\\?/video\\?/embed(?:.*?))\\?"', info_page)
|
||||
@@ -237,25 +301,34 @@ class VKIE(InfoExtractor):
|
||||
|
||||
|
||||
class VKUserVideosIE(InfoExtractor):
|
||||
IE_NAME = 'vk.com:user-videos'
|
||||
IE_DESC = 'vk.com:All of a user\'s videos'
|
||||
_VALID_URL = r'https?://vk\.com/videos(?P<id>[0-9]+)(?:m\?.*)?'
|
||||
IE_NAME = 'vk:uservideos'
|
||||
IE_DESC = "VK - User's Videos"
|
||||
_VALID_URL = r'https?://vk\.com/videos(?P<id>-?[0-9]+)$'
|
||||
_TEMPLATE_URL = 'https://vk.com/videos'
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://vk.com/videos205387401',
|
||||
'info_dict': {
|
||||
'id': '205387401',
|
||||
'title': "Tom Cruise's Videos",
|
||||
},
|
||||
'playlist_mincount': 4,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://vk.com/videos-77521',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
page_id = self._match_id(url)
|
||||
page = self._download_webpage(url, page_id)
|
||||
video_ids = orderedSet(
|
||||
m.group(1) for m in re.finditer(r'href="/video([0-9_]+)"', page))
|
||||
url_entries = [
|
||||
|
||||
webpage = self._download_webpage(url, page_id)
|
||||
|
||||
entries = [
|
||||
self.url_result(
|
||||
'http://vk.com/video' + video_id, 'VK', video_id=video_id)
|
||||
for video_id in video_ids]
|
||||
return self.playlist_result(url_entries, page_id)
|
||||
for video_id in orderedSet(re.findall(r'href="/video(-?[0-9_]+)"', webpage))]
|
||||
|
||||
title = unescapeHTML(self._search_regex(
|
||||
r'<title>\s*([^<]+?)\s+\|\s+\d+\s+videos',
|
||||
webpage, 'title', default=page_id))
|
||||
|
||||
return self.playlist_result(entries, page_id, title)
|
||||
|
||||
@@ -1,8 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
@@ -28,12 +26,7 @@ class VodlockerIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
fields = dict(re.findall(r'''(?x)<input\s+
|
||||
type="hidden"\s+
|
||||
name="([^"]+)"\s+
|
||||
(?:id="[^"]+"\s+)?
|
||||
value="([^"]*)"
|
||||
''', webpage))
|
||||
fields = self._hidden_inputs(webpage)
|
||||
|
||||
if fields['op'] == 'download1':
|
||||
self._sleep(3, video_id) # they do detect when requests happen too fast!
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
|
||||
@@ -98,3 +100,42 @@ class WebOfStoriesIE(InfoExtractor):
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
}
|
||||
|
||||
|
||||
class WebOfStoriesPlaylistIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?webofstories\.com/playAll/(?P<id>[^/]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.webofstories.com/playAll/donald.knuth',
|
||||
'info_dict': {
|
||||
'id': 'donald.knuth',
|
||||
'title': 'Donald Knuth (Scientist)',
|
||||
},
|
||||
'playlist_mincount': 97,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
entries = [
|
||||
self.url_result('http://www.webofstories.com/play/%s' % video_number, 'WebOfStories')
|
||||
for video_number in set(re.findall('href="/playAll/%s\?sId=(\d+)"' % playlist_id, webpage))
|
||||
]
|
||||
|
||||
title = self._search_regex(
|
||||
r'<div id="speakerName">\s*<span>([^<]+)</span>',
|
||||
webpage, 'speaker', default=None)
|
||||
if title:
|
||||
field = self._search_regex(
|
||||
r'<span id="primaryField">([^<]+)</span>',
|
||||
webpage, 'field', default=None)
|
||||
if field:
|
||||
title += ' (%s)' % field
|
||||
|
||||
if not title:
|
||||
title = self._search_regex(
|
||||
r'<title>Play\s+all\s+stories\s*-\s*([^<]+)\s*-\s*Web\s+of\s+Stories</title>',
|
||||
webpage, 'title')
|
||||
|
||||
return self.playlist_result(entries, playlist_id, title)
|
||||
|
||||
@@ -1,9 +1,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
)
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
|
||||
|
||||
class XBefIE(InfoExtractor):
|
||||
@@ -30,7 +28,7 @@ class XBefIE(InfoExtractor):
|
||||
config_url_enc = self._download_webpage(
|
||||
'http://xbef.com/Main/GetVideoURLEncoded/%s' % video_id, video_id,
|
||||
note='Retrieving config URL')
|
||||
config_url = compat_urllib_parse.unquote(config_url_enc)
|
||||
config_url = compat_urllib_parse_unquote(config_url_enc)
|
||||
config = self._download_xml(
|
||||
config_url, video_id, note='Retrieving config')
|
||||
|
||||
|
||||
@@ -2,9 +2,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
)
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
|
||||
|
||||
class XNXXIE(InfoExtractor):
|
||||
@@ -26,7 +24,7 @@ class XNXXIE(InfoExtractor):
|
||||
|
||||
video_url = self._search_regex(r'flv_url=(.*?)&',
|
||||
webpage, 'video URL')
|
||||
video_url = compat_urllib_parse.unquote(video_url)
|
||||
video_url = compat_urllib_parse_unquote(video_url)
|
||||
|
||||
video_title = self._html_search_regex(r'<title>(.*?)\s+-\s+XNXX.COM',
|
||||
webpage, 'title')
|
||||
|
||||
@@ -5,7 +5,7 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_request,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_parse_unquote,
|
||||
)
|
||||
from ..utils import (
|
||||
parse_duration,
|
||||
@@ -59,7 +59,7 @@ class XTubeIE(InfoExtractor):
|
||||
for format_id, video_url in re.findall(
|
||||
r'flashvars\.quality_(.+?)\s*=\s*"([^"]+)"', webpage):
|
||||
fmt = {
|
||||
'url': compat_urllib_parse.unquote(video_url),
|
||||
'url': compat_urllib_parse_unquote(video_url),
|
||||
'format_id': format_id,
|
||||
}
|
||||
m = re.search(r'^(?P<height>\d+)[pP]', format_id)
|
||||
@@ -68,7 +68,7 @@ class XTubeIE(InfoExtractor):
|
||||
formats.append(fmt)
|
||||
|
||||
if not formats:
|
||||
video_url = compat_urllib_parse.unquote(self._search_regex(
|
||||
video_url = compat_urllib_parse_unquote(self._search_regex(
|
||||
r'flashvars\.video_url\s*=\s*"([^"]+)"',
|
||||
webpage, 'video URL'))
|
||||
formats.append({'url': video_url})
|
||||
|
||||
@@ -13,6 +13,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class XuiteIE(InfoExtractor):
|
||||
IE_DESC = '隨意窩Xuite影音'
|
||||
_REGEX_BASE64 = r'(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?'
|
||||
_VALID_URL = r'https?://vlog\.xuite\.net/(?:play|embed)/(?P<id>%s)' % _REGEX_BASE64
|
||||
_TESTS = [{
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user