mirror of
https://github.com/ytdl-org/youtube-dl.git
synced 2025-12-08 15:12:43 +01:00
Compare commits
382 Commits
2017.02.16
...
2017.04.14
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
38d2f8325f | ||
|
|
6f4dd6667b | ||
|
|
95728fda70 | ||
|
|
3f7409f745 | ||
|
|
b2fff30817 | ||
|
|
f59746372a | ||
|
|
34d98cc411 | ||
|
|
40fcba5edb | ||
|
|
e4d74e2778 | ||
|
|
3ef1d0c733 | ||
|
|
3962260b7d | ||
|
|
0ee79a378a | ||
|
|
79a51069e5 | ||
|
|
a9a346535b | ||
|
|
89beedd31f | ||
|
|
e109f1ff43 | ||
|
|
d23028a8fb | ||
|
|
6214611a4a | ||
|
|
1730878167 | ||
|
|
689cd458a6 | ||
|
|
6b9466de2f | ||
|
|
61568e50cf | ||
|
|
364a69e8c6 | ||
|
|
6240925b40 | ||
|
|
964744af95 | ||
|
|
1af959ef9f | ||
|
|
a206ef62df | ||
|
|
3f2ce6896a | ||
|
|
a6f7263cf4 | ||
|
|
4372436504 | ||
|
|
eb8cc8ea3b | ||
|
|
41b263ac8a | ||
|
|
ca8fca9d9d | ||
|
|
e129fa0846 | ||
|
|
2bd875edfe | ||
|
|
95152630db | ||
|
|
04e431cf97 | ||
|
|
1591ba258a | ||
|
|
29c6726646 | ||
|
|
a66e25859a | ||
|
|
c93c0fc2fd | ||
|
|
90e3f18fc1 | ||
|
|
5f3e0b69ef | ||
|
|
28b674ca23 | ||
|
|
e18f1da97a | ||
|
|
78280352ca | ||
|
|
a01825a541 | ||
|
|
f8f2da25ab | ||
|
|
4c03973296 | ||
|
|
60e5016199 | ||
|
|
c4d6fc6d65 | ||
|
|
1b3feca0a7 | ||
|
|
80b2fdf9ac | ||
|
|
3bef10a50c | ||
|
|
a84da06f49 | ||
|
|
3461f5db06 | ||
|
|
0378b8b917 | ||
|
|
7f04386b89 | ||
|
|
fac39cccd4 | ||
|
|
b68e00b08a | ||
|
|
2ab0bfcd81 | ||
|
|
b022f4f600 | ||
|
|
e2435ba5f3 | ||
|
|
a9bb61a425 | ||
|
|
dbf70c489f | ||
|
|
61e2331ad8 | ||
|
|
fd47550885 | ||
|
|
4457823dda | ||
|
|
b3633fa0ce | ||
|
|
b56e41a701 | ||
|
|
a76c25146a | ||
|
|
361f293ab8 | ||
|
|
b8d8cced9b | ||
|
|
51342717cd | ||
|
|
48ab554feb | ||
|
|
a6f3a162f3 | ||
|
|
91399b2fcc | ||
|
|
eecea00d36 | ||
|
|
2cd668ee59 | ||
|
|
ca77b92f94 | ||
|
|
e97fc8d6b8 | ||
|
|
be61efdf17 | ||
|
|
77c8ebe631 | ||
|
|
7453999580 | ||
|
|
1640eb0961 | ||
|
|
3e943cfe09 | ||
|
|
82be732b17 | ||
|
|
639e5b2a84 | ||
|
|
128244657b | ||
|
|
12ee65ea0d | ||
|
|
aea1dccbd0 | ||
|
|
9e691da067 | ||
|
|
82eefd0be0 | ||
|
|
f7923a4c39 | ||
|
|
cc63259d18 | ||
|
|
2bfaf89b6c | ||
|
|
4f06c1c9fc | ||
|
|
942b44a052 | ||
|
|
a426ef6d78 | ||
|
|
41c5e60dd5 | ||
|
|
d212c93d16 | ||
|
|
15495cf3e5 | ||
|
|
5b7cc56b05 | ||
|
|
590bc6f6a1 | ||
|
|
51098426b8 | ||
|
|
c73e330e7a | ||
|
|
fb4fc44928 | ||
|
|
03486dbb01 | ||
|
|
51ef4919df | ||
|
|
d66d43c554 | ||
|
|
610a6d1053 | ||
|
|
c6c22e984d | ||
|
|
d97729c83a | ||
|
|
7aa0ee321b | ||
|
|
e8e4cc5a6a | ||
|
|
c7301e677b | ||
|
|
048086920b | ||
|
|
1088d76da6 | ||
|
|
31a1214076 | ||
|
|
d0ba55871e | ||
|
|
54b960f340 | ||
|
|
a3ccd6bd11 | ||
|
|
7963b6cba8 | ||
|
|
bea7af6947 | ||
|
|
a5d783f525 | ||
|
|
d0572557c2 | ||
|
|
52d5ecabd5 | ||
|
|
b0f7f21cb9 | ||
|
|
579c99a284 | ||
|
|
ca5ed022e9 | ||
|
|
391d076d7c | ||
|
|
c183e14f89 | ||
|
|
093dad9e25 | ||
|
|
e8686e51d7 | ||
|
|
8e5a7c5e67 | ||
|
|
e1e35d1ac6 | ||
|
|
21fbf0f955 | ||
|
|
97952bdb78 | ||
|
|
8a8cc339b6 | ||
|
|
957f453429 | ||
|
|
0e9a73e612 | ||
|
|
0ecdd3adbd | ||
|
|
9487ce03e9 | ||
|
|
45e6ad21b4 | ||
|
|
68220649fa | ||
|
|
46b18f2349 | ||
|
|
772b5ff57f | ||
|
|
f68ef1e2ab | ||
|
|
febfe1e262 | ||
|
|
5f0daab1ca | ||
|
|
2a721cdff2 | ||
|
|
e7a51a4c02 | ||
|
|
3e5856d860 | ||
|
|
ea883a687c | ||
|
|
7f3590c43b | ||
|
|
7d539ee10a | ||
|
|
6ad476079d | ||
|
|
0efbc6b56d | ||
|
|
21bfcd3d6e | ||
|
|
b51dc9db0e | ||
|
|
a309684285 | ||
|
|
ba448445b8 | ||
|
|
5db83d79bf | ||
|
|
2a751e137f | ||
|
|
398887b4c0 | ||
|
|
66bf351f80 | ||
|
|
9d08963022 | ||
|
|
e313d209c2 | ||
|
|
ff9d509d20 | ||
|
|
c1795ca6c8 | ||
|
|
8c99623259 | ||
|
|
57b0ddb35f | ||
|
|
a28f8d7396 | ||
|
|
7049799470 | ||
|
|
4605c94d1a | ||
|
|
a8e687a4da | ||
|
|
f9e5c92c94 | ||
|
|
c2ee861c6d | ||
|
|
bd34c32bd7 | ||
|
|
f802c48660 | ||
|
|
76bee08fe7 | ||
|
|
2913821723 | ||
|
|
0e7f9a9b48 | ||
|
|
0cf2352e85 | ||
|
|
0f6b87d067 | ||
|
|
d7344d33b1 | ||
|
|
b08cc749d6 | ||
|
|
b68a812ea8 | ||
|
|
2e76bdc850 | ||
|
|
fe646a2f10 | ||
|
|
9df53ea36e | ||
|
|
d7d7f84c95 | ||
|
|
dccd0ab35d | ||
|
|
80146dcc6c | ||
|
|
e30ccf7047 | ||
|
|
54a3a8827b | ||
|
|
92cb5763f4 | ||
|
|
da92da4b88 | ||
|
|
1664702626 | ||
|
|
3f116b189b | ||
|
|
4b5de77bdb | ||
|
|
96182695e4 | ||
|
|
fc11ad3833 | ||
|
|
d2b64e04b4 | ||
|
|
5dd376345b | ||
|
|
1a2192cb90 | ||
|
|
0236cd0dfd | ||
|
|
ed0cf9b383 | ||
|
|
a50862b735 | ||
|
|
6d0fe752bf | ||
|
|
afa4597618 | ||
|
|
75027364ba | ||
|
|
5316566edc | ||
|
|
c64c03be35 | ||
|
|
bcefc59279 | ||
|
|
6f211dc936 | ||
|
|
f24c1e5584 | ||
|
|
466274fe9a | ||
|
|
30f8f142d4 | ||
|
|
a3ba8a7acf | ||
|
|
054a587de8 | ||
|
|
64b7ccef3e | ||
|
|
6f4e4132d8 | ||
|
|
eb3079b6ce | ||
|
|
bc82f22879 | ||
|
|
4d058c9862 | ||
|
|
d16f27ca27 | ||
|
|
cbb127568a | ||
|
|
d02d4fa0a9 | ||
|
|
692fa200ca | ||
|
|
9bae185ba6 | ||
|
|
4d345bf17b | ||
|
|
250eea6821 | ||
|
|
28d15b73f8 | ||
|
|
11bb6ad1a5 | ||
|
|
c9612c0487 | ||
|
|
af5049f128 | ||
|
|
158af5242e | ||
|
|
40df485f55 | ||
|
|
4b8a984c67 | ||
|
|
83e8fce628 | ||
|
|
aa9cc2ecbf | ||
|
|
1dc24093f8 | ||
|
|
11bae9cdde | ||
|
|
43b38424a9 | ||
|
|
948519b35d | ||
|
|
87dadd456a | ||
|
|
7c4aa6fd6f | ||
|
|
9bd05b5a18 | ||
|
|
0a5445ddbe | ||
|
|
f48409c7ac | ||
|
|
c9619f0a17 | ||
|
|
f4c68ba372 | ||
|
|
ef48a1175d | ||
|
|
c6184bcf7b | ||
|
|
18abb74376 | ||
|
|
dbc01fdb6f | ||
|
|
f264c62334 | ||
|
|
0dc5a86a32 | ||
|
|
0e879f432a | ||
|
|
892b47ab6c | ||
|
|
fdeea72611 | ||
|
|
7fd4655256 | ||
|
|
fd5c4aab59 | ||
|
|
8878789f11 | ||
|
|
a5cf17989b | ||
|
|
b3aec47665 | ||
|
|
9d0c08a02c | ||
|
|
e498758b9c | ||
|
|
5fc8d89361 | ||
|
|
d374d943f3 | ||
|
|
103f8c8d36 | ||
|
|
922ab7840b | ||
|
|
831217291a | ||
|
|
db182c63fb | ||
|
|
eeb0a95684 | ||
|
|
231bcd0b6b | ||
|
|
204efc8509 | ||
|
|
5d3a51e1b9 | ||
|
|
ad3033037c | ||
|
|
f3bc281239 | ||
|
|
441d7a32e5 | ||
|
|
51ed496307 | ||
|
|
68f17a9c2d | ||
|
|
39e7277ed1 | ||
|
|
42dcdbe11c | ||
|
|
6b097cff27 | ||
|
|
f2f7961820 | ||
|
|
be5df5ee31 | ||
|
|
f2980fddeb | ||
|
|
0f57447de7 | ||
|
|
19f3821821 | ||
|
|
8e1409fd80 | ||
|
|
050f143c12 | ||
|
|
fafc2bf5a9 | ||
|
|
b3175982c3 | ||
|
|
89db639dfe | ||
|
|
d0d9ade486 | ||
|
|
28572a1a0b | ||
|
|
0f3d41b44d | ||
|
|
d5fd9a3be3 | ||
|
|
ada77fa544 | ||
|
|
9e03aa75c7 | ||
|
|
30eaa3a702 | ||
|
|
c59f703610 | ||
|
|
bc61c80c14 | ||
|
|
345b24538b | ||
|
|
63a29b6118 | ||
|
|
b5869560a4 | ||
|
|
527ef85fe9 | ||
|
|
58ad6995cd | ||
|
|
a86e416088 | ||
|
|
71e9577b94 | ||
|
|
0d427c8304 | ||
|
|
139d8ac106 | ||
|
|
abd29a2ced | ||
|
|
31615ac279 | ||
|
|
fc320a40d9 | ||
|
|
7345d6d465 | ||
|
|
86466a8b6f | ||
|
|
33dc173cdc | ||
|
|
3444844b04 | ||
|
|
8c6c88c7da | ||
|
|
159aaaa9d0 | ||
|
|
eea0716cae | ||
|
|
336a76551b | ||
|
|
dc0a869e5e | ||
|
|
e39b5d4ab8 | ||
|
|
e469ab2528 | ||
|
|
890d44b005 | ||
|
|
6926304472 | ||
|
|
3ccdde8cb7 | ||
|
|
da42ff0668 | ||
|
|
82f662182b | ||
|
|
2cc7fcd338 | ||
|
|
6d4c259765 | ||
|
|
c78dd35491 | ||
|
|
8ffb8e63fe | ||
|
|
983e9b7746 | ||
|
|
8936f68a0b | ||
|
|
c58b7ffef4 | ||
|
|
f1a78ee4ef | ||
|
|
de64e23c56 | ||
|
|
553f6dbac7 | ||
|
|
0aa10994f4 | ||
|
|
4248dad92b | ||
|
|
0a840f584c | ||
|
|
0016b84e16 | ||
|
|
18a0defab0 | ||
|
|
5d3fbf77d9 | ||
|
|
80b59020e0 | ||
|
|
71631862f4 | ||
|
|
89cc7fe770 | ||
|
|
04d906eae3 | ||
|
|
8ab8066cf0 | ||
|
|
01b1aa9ff4 | ||
|
|
ff4007891f | ||
|
|
28200e654b | ||
|
|
e633f21a96 | ||
|
|
d392005a79 | ||
|
|
773f291dcb | ||
|
|
bf5b9d859a | ||
|
|
049a0f4d6d | ||
|
|
ac33accd96 | ||
|
|
e84888b432 | ||
|
|
02d9b82a23 | ||
|
|
a2e3286676 | ||
|
|
f75caf059e | ||
|
|
bdabbc220c | ||
|
|
70bcc444a9 | ||
|
|
28e35f5070 | ||
|
|
cf3704c132 | ||
|
|
2c1f442c2b | ||
|
|
bad4ccdb5d | ||
|
|
db76c30c6e | ||
|
|
c2bde5d081 | ||
|
|
90fad0e74c | ||
|
|
d94badc755 | ||
|
|
fef51645d6 | ||
|
|
4cead6a614 | ||
|
|
a4a554a793 | ||
|
|
b898f0a173 |
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@@ -6,8 +6,8 @@
|
|||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.02.16*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.04.14*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.02.16**
|
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.04.14**
|
||||||
|
|
||||||
### Before submitting an *issue* make sure you have:
|
### Before submitting an *issue* make sure you have:
|
||||||
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||||
@@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
|
|||||||
[debug] User config: []
|
[debug] User config: []
|
||||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||||
[debug] youtube-dl version 2017.02.16
|
[debug] youtube-dl version 2017.04.14
|
||||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||||
[debug] Proxy map: {}
|
[debug] Proxy map: {}
|
||||||
|
|||||||
9
AUTHORS
9
AUTHORS
@@ -202,3 +202,12 @@ Fabian Stahl
|
|||||||
Bagira
|
Bagira
|
||||||
Odd Stråbø
|
Odd Stråbø
|
||||||
Philip Herzog
|
Philip Herzog
|
||||||
|
Thomas Christlieb
|
||||||
|
Marek Rusinowski
|
||||||
|
Tobias Gruetzmacher
|
||||||
|
Olivier Bilodeau
|
||||||
|
Lars Vierbergen
|
||||||
|
Juanjo Benages
|
||||||
|
Xiao Di Guan
|
||||||
|
Thomas Winant
|
||||||
|
Daniel Twardowski
|
||||||
|
|||||||
386
ChangeLog
386
ChangeLog
@@ -1,3 +1,389 @@
|
|||||||
|
version 2017.04.14
|
||||||
|
|
||||||
|
Core
|
||||||
|
+ [downloader/hls] Add basic support for EXT-X-BYTERANGE tag (#10955)
|
||||||
|
+ [adobepass] Improve Comcast and Verison login code (#10803)
|
||||||
|
+ [adobepass] Add support for Verizon (#10803)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [aenetworks] Add support for specials (#12723)
|
||||||
|
+ [hbo] Extract HLS formats
|
||||||
|
+ [go90] Add support for go90.com (#10127)
|
||||||
|
+ [tv2hu] Add support for tv2.hu (#10509)
|
||||||
|
+ [generic] Exclude URLs with xml ext from valid video URLs (#10768, #11654)
|
||||||
|
* [youtube] Improve HLS formats extraction
|
||||||
|
* [afreecatv] Fix extraction for videos with different key layout (#12718)
|
||||||
|
- [youtube] Remove explicit preference for audio-only and video-only formats in
|
||||||
|
order not to break sorting when new formats appear
|
||||||
|
* [canalplus] Bypass geo restriction
|
||||||
|
|
||||||
|
|
||||||
|
version 2017.04.11
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [afreecatv] Fix extraction (#12706)
|
||||||
|
+ [generic] Add support for <object> YouTube embeds (#12637)
|
||||||
|
* [bbccouk] Treat bitrate as audio+video bitrate in media selector
|
||||||
|
+ [bbccouk] Skip unrecognized formats in media selector (#12701)
|
||||||
|
+ [bbccouk] Add support for https protocol in media selector (#12701)
|
||||||
|
* [curiositystream] Fix extraction (#12638)
|
||||||
|
* [adn] Update subtitle decryption key
|
||||||
|
* [chaturbate] Fix extraction (#12665, #12688, #12690)
|
||||||
|
|
||||||
|
|
||||||
|
version 2017.04.09
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [medici] Add support for medici.tv (#3406)
|
||||||
|
+ [rbmaradio] Add support for redbullradio.com URLs (#12687)
|
||||||
|
+ [npo:live] Add support for default URL (#12555)
|
||||||
|
* [mixcloud:playlist] Fix title, description and view count extraction (#12582)
|
||||||
|
+ [thesun] Add suport for thesun.co.uk (#11298, #12674)
|
||||||
|
+ [ceskateleveize:porady] Add support for porady (#7411, #12645)
|
||||||
|
* [ceskateleveize] Improve extraction and remove URL replacement hacks
|
||||||
|
+ [kaltura] Add support for iframe embeds (#12679)
|
||||||
|
* [airmozilla] Fix extraction (#12670)
|
||||||
|
* [wshh] Extract html5 entries and delegate to generic extractor (12676)
|
||||||
|
+ [raiplay] Extract subtitles
|
||||||
|
+ [xfileshare] Add support for vidlo.us (#12660)
|
||||||
|
+ [xfileshare] Add support for vidbom.com (#12661)
|
||||||
|
+ [aenetworks] Add more video URL regular expressions (#12657)
|
||||||
|
+ [odnoklassniki] Fix format sorting for 1080p quality
|
||||||
|
+ [rtl2] Add support for you.rtl2.de (#10257)
|
||||||
|
+ [vshare] Add support for vshare.io (#12278)
|
||||||
|
|
||||||
|
|
||||||
|
version 2017.04.03
|
||||||
|
|
||||||
|
Core
|
||||||
|
+ [extractor/common] Add censorship check for TransTelekom ISP
|
||||||
|
* [extractor/common] Move censorship checks to a separate method
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [discoveryvr] Add support for discoveryvr.com (#12578)
|
||||||
|
+ [tv5mondeplus] Add support for tv5mondeplus.com (#11386)
|
||||||
|
+ [periscope] Add support for pscp.tv URLs (#12618, #12625)
|
||||||
|
|
||||||
|
|
||||||
|
version 2017.04.02
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [YoutubeDL] Return early when extraction of url_transparent fails
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [rai] Fix and improve extraction (#11790)
|
||||||
|
+ [vrv] Add support for series pages
|
||||||
|
* [limelight] Improve extraction for audio only formats
|
||||||
|
* [funimation] Fix extraction (#10696, #11773)
|
||||||
|
+ [xfileshare] Add support for vidabc.com (#12589)
|
||||||
|
+ [xfileshare] Improve extraction and extract hls formats
|
||||||
|
+ [crunchyroll] Pass geo verifcation proxy
|
||||||
|
+ [cwtv] Extract ISM formats
|
||||||
|
+ [tvplay] Bypass geo restriction
|
||||||
|
+ [vrv] Add support for vrv.co
|
||||||
|
+ [packtpub] Add support for packtpub.com (#12610)
|
||||||
|
+ [generic] Pass base_url to _parse_jwplayer_data
|
||||||
|
+ [adn] Add support for animedigitalnetwork.fr (#4866)
|
||||||
|
+ [allocine] Extract more metadata
|
||||||
|
* [allocine] Fix extraction (#12592)
|
||||||
|
* [openload] Fix extraction
|
||||||
|
|
||||||
|
|
||||||
|
version 2017.03.26
|
||||||
|
|
||||||
|
Core
|
||||||
|
* Don't raise an error if JWPlayer config data is not a Javascript object
|
||||||
|
literal. _find_jwplayer_data now returns a dict rather than an str. (#12307)
|
||||||
|
* Expand environment variables for options representing paths (#12556)
|
||||||
|
+ [utils] Introduce expand_path
|
||||||
|
* [downloader/hls] Delegate downloading to ffmpeg immediately for live streams
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [afreecatv] Fix extraction (#12179)
|
||||||
|
+ [atvat] Add support for atv.at (#5325)
|
||||||
|
+ [fox] Add metadata extraction (#12391)
|
||||||
|
+ [atresplayer] Extract DASH formats
|
||||||
|
+ [atresplayer] Extract HD manifest (#12548)
|
||||||
|
* [atresplayer] Fix login error detection (#12548)
|
||||||
|
* [franceculture] Fix extraction (#12547)
|
||||||
|
* [youtube] Improve URL regular expression (#12538)
|
||||||
|
* [generic] Do not follow redirects to the same URL
|
||||||
|
|
||||||
|
|
||||||
|
version 2017.03.24
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
- [9c9media] Remove mp4 URL extraction request
|
||||||
|
+ [bellmedia] Add support for etalk.ca and space.ca (#12447)
|
||||||
|
* [channel9] Fix extraction (#11323)
|
||||||
|
* [cloudy] Fix extraction (#12525)
|
||||||
|
+ [hbo] Add support for free episode URLs and new formats extraction (#12519)
|
||||||
|
* [condenast] Fix extraction and style (#12526)
|
||||||
|
* [viu] Relax URL regular expression (#12529)
|
||||||
|
|
||||||
|
|
||||||
|
version 2017.03.22
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
- [pluralsight] Omit module title from video title (#12506)
|
||||||
|
* [pornhub] Decode obfuscated video URL (#12470, #12515)
|
||||||
|
* [senateisvp] Allow https URL scheme for embeds (#12512)
|
||||||
|
|
||||||
|
|
||||||
|
version 2017.03.20
|
||||||
|
|
||||||
|
Core
|
||||||
|
+ [YoutubeDL] Allow multiple input URLs to be used with stdout (-) as
|
||||||
|
output template
|
||||||
|
+ [adobepass] Detect and output error on authz token extraction (#12472)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [bostonglobe] Add extractor for bostonglobe.com (#12099)
|
||||||
|
+ [toongoggles] Add support for toongoggles.com (#12171)
|
||||||
|
+ [medialaan] Add support for Medialaan sites (#9974, #11912)
|
||||||
|
+ [discoverynetworks] Add support for more domains and bypass geo restiction
|
||||||
|
* [openload] Fix extraction (#10408)
|
||||||
|
|
||||||
|
|
||||||
|
version 2017.03.16
|
||||||
|
|
||||||
|
Core
|
||||||
|
+ [postprocessor/ffmpeg] Add support for flac
|
||||||
|
+ [extractor/common] Extract SMIL formats from jwplayer
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [generic] Add forgotten return for jwplayer formats
|
||||||
|
* [redbulltv] Improve extraction
|
||||||
|
|
||||||
|
|
||||||
|
version 2017.03.15
|
||||||
|
|
||||||
|
Core
|
||||||
|
* Fix missing subtitles if --add-metadata is used (#12423)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [facebook] Make title optional (#12443)
|
||||||
|
+ [mitele] Add support for ooyala videos (#12430)
|
||||||
|
* [openload] Fix extraction (#12435, #12446)
|
||||||
|
* [streamable] Update API URL (#12433)
|
||||||
|
+ [crunchyroll] Extract season name (#12428)
|
||||||
|
* [discoverygo] Bypass geo restriction
|
||||||
|
+ [discoverygo:playlist] Add support for playlists (#12424)
|
||||||
|
|
||||||
|
|
||||||
|
version 2017.03.10
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [generic] Make title optional for jwplayer embeds (#12410)
|
||||||
|
* [wdr:maus] Fix extraction (#12373)
|
||||||
|
* [prosiebensat1] Improve title extraction (#12318, #12327)
|
||||||
|
* [dplayit] Separate and rewrite extractor and bypass geo restriction (#12393)
|
||||||
|
* [miomio] Fix extraction (#12291, #12388, #12402)
|
||||||
|
* [telequebec] Fix description extraction (#12399)
|
||||||
|
* [openload] Fix extraction (#12357)
|
||||||
|
* [brightcove:legacy] Relax videoPlayer validation check (#12381)
|
||||||
|
|
||||||
|
|
||||||
|
version 2017.03.07
|
||||||
|
|
||||||
|
Core
|
||||||
|
* Metadata are now added after conversion (#5594)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [soundcloud] Update client id (#12376)
|
||||||
|
* [openload] Fix extraction (#10408, #12357)
|
||||||
|
|
||||||
|
|
||||||
|
version 2017.03.06
|
||||||
|
|
||||||
|
Core
|
||||||
|
+ [utils] Process bytestrings in urljoin (#12369)
|
||||||
|
* [extractor/common] Improve height extraction and extract bitrate
|
||||||
|
* [extractor/common] Move jwplayer formats extraction in separate method
|
||||||
|
+ [external:ffmpeg] Limit test download size to 10KiB (#12362)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [drtv] Add geo countries to GeoRestrictedError
|
||||||
|
+ [drtv:live] Bypass geo restriction
|
||||||
|
+ [tunepk] Add extractor (#12197, #12243)
|
||||||
|
|
||||||
|
|
||||||
|
version 2017.03.05
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [twitch] Add basic support for two-factor authentication (#11974)
|
||||||
|
+ [vier] Add support for vijf.be (#12304)
|
||||||
|
+ [redbulltv] Add support for redbull.tv (#3919, #11948)
|
||||||
|
* [douyutv] Switch to the PC API to escape the 5-min limitation (#12316)
|
||||||
|
+ [generic] Add support for rutube embeds
|
||||||
|
+ [rutube] Relax URL regular expression
|
||||||
|
+ [vrak] Add support for vrak.tv (#11452)
|
||||||
|
+ [brightcove:new] Add ability to smuggle geo_countries into URL
|
||||||
|
+ [brightcove:new] Raise GeoRestrictedError
|
||||||
|
* [go] Relax URL regular expression (#12341)
|
||||||
|
* [24video] Use original host for requests (#12339)
|
||||||
|
* [ruutu] Disable DASH formats (#12322)
|
||||||
|
|
||||||
|
|
||||||
|
version 2017.03.02
|
||||||
|
|
||||||
|
Core
|
||||||
|
+ [adobepass] Add support for Charter Spectrum (#11465)
|
||||||
|
* [YoutubeDL] Don't sanitize identifiers in output template (#12317)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [facebook] Fix extraction (#12323, #12330)
|
||||||
|
* [youtube] Mark errors about rental videos as expected (#12324)
|
||||||
|
+ [npo] Add support for audio
|
||||||
|
* [npo] Adapt to app.php API (#12311, #12320)
|
||||||
|
|
||||||
|
|
||||||
|
version 2017.02.28
|
||||||
|
|
||||||
|
Core
|
||||||
|
+ [utils] Add bytes_to_long and long_to_bytes
|
||||||
|
+ [utils] Add pkcs1pad
|
||||||
|
+ [aes] Add aes_cbc_encrypt
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [azmedien:showplaylist] Add support for show playlists (#12160)
|
||||||
|
+ [youtube:playlist] Recognize another playlist pattern (#11928, #12286)
|
||||||
|
+ [daisuki] Add support for daisuki.net (#2486, #3186, #4738, #6175, #7776,
|
||||||
|
#10060)
|
||||||
|
* [douyu] Fix extraction (#12301)
|
||||||
|
|
||||||
|
|
||||||
|
version 2017.02.27
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [downloader/common] Limit displaying 2 digits after decimal point in sleep
|
||||||
|
interval message (#12183)
|
||||||
|
+ [extractor/common] Add preference to _parse_html5_media_entries
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [npo] Add support for zapp.nl
|
||||||
|
+ [npo] Add support for hetklokhuis.nl (#12293)
|
||||||
|
- [scivee] Remove extractor (#9315)
|
||||||
|
+ [cda] Decode download URL (#12255)
|
||||||
|
+ [crunchyroll] Improve uploader extraction (#12267)
|
||||||
|
+ [youtube] Raise GeoRestrictedError
|
||||||
|
+ [dailymotion] Raise GeoRestrictedError
|
||||||
|
+ [mdr] Recognize more URL patterns (#12169)
|
||||||
|
+ [tvigle] Raise GeoRestrictedError
|
||||||
|
* [vevo] Fix extraction for videos with the new streams/streamsV3 format
|
||||||
|
(#11719)
|
||||||
|
+ [freshlive] Add support for freshlive.tv (#12175)
|
||||||
|
+ [xhamster] Capture and output videoClosed error (#12263)
|
||||||
|
+ [etonline] Add support for etonline.com (#12236)
|
||||||
|
+ [njpwworld] Add support for njpwworld.com (#11561)
|
||||||
|
* [amcnetworks] Relax URL regular expression (#12127)
|
||||||
|
|
||||||
|
|
||||||
|
version 2017.02.24.1
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [noco] Modernize
|
||||||
|
* [noco] Switch login URL to https (#12246)
|
||||||
|
+ [thescene] Extract more metadata
|
||||||
|
* [thescene] Fix extraction (#12235)
|
||||||
|
+ [tubitv] Use geo bypass mechanism
|
||||||
|
* [openload] Fix extraction (#10408)
|
||||||
|
+ [ivi] Raise GeoRestrictedError
|
||||||
|
|
||||||
|
|
||||||
|
version 2017.02.24
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [options] Hide deprecated options from --help
|
||||||
|
* [options] Deprecate --autonumber-size
|
||||||
|
+ [YoutubeDL] Add support for string formatting operations in output template
|
||||||
|
(#5185, #5748, #6841, #9929, #9966 #9978, #12189)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [lynda:course] Add webpage extraction fallback (#12238)
|
||||||
|
* [go] Sign all uplynk URLs and use geo bypass only for free videos
|
||||||
|
(#12087, #12210)
|
||||||
|
+ [skylinewebcams] Add support for skylinewebcams.com (#12221)
|
||||||
|
+ [instagram] Add support for multi video posts (#12226)
|
||||||
|
+ [crunchyroll] Extract playlist entries ids
|
||||||
|
* [mgtv] Fix extraction
|
||||||
|
+ [sohu] Raise GeoRestrictedError
|
||||||
|
+ [leeco] Raise GeoRestrictedError and use geo bypass mechanism
|
||||||
|
|
||||||
|
|
||||||
|
version 2017.02.22
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [crunchyroll] Fix descriptions with double quotes (#12124)
|
||||||
|
* [dailymotion] Make comment count optional (#12209)
|
||||||
|
+ [vidzi] Add support for vidzi.cc (#12213)
|
||||||
|
+ [24video] Add support for 24video.tube (#12217)
|
||||||
|
+ [crackle] Use geo bypass mechanism
|
||||||
|
+ [viewster] Use geo verification headers
|
||||||
|
+ [tfo] Improve geo restriction detection and use geo bypass mechanism
|
||||||
|
+ [telequebec] Use geo bypass mechanism
|
||||||
|
+ [limelight] Extract PlaylistService errors and improve geo restriction
|
||||||
|
detection
|
||||||
|
|
||||||
|
|
||||||
|
version 2017.02.21
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [extractor/common] Allow calling _initialize_geo_bypass from extractors
|
||||||
|
(#11970)
|
||||||
|
+ [adobepass] Add support for Time Warner Cable (#12191)
|
||||||
|
+ [travis] Run tests in parallel
|
||||||
|
+ [downloader/ism] Honor HTTP headers when downloading fragments
|
||||||
|
+ [downloader/dash] Honor HTTP headers when downloading fragments
|
||||||
|
+ [utils] Add GeoUtils class for working with geo tools and GeoUtils.random_ipv4
|
||||||
|
+ Add option --geo-bypass-country for explicit geo bypass on behalf of
|
||||||
|
specified country
|
||||||
|
+ Add options to control geo bypass mechanism --geo-bypass and --no-geo-bypass
|
||||||
|
+ Add experimental geo restriction bypass mechanism based on faking
|
||||||
|
X-Forwarded-For HTTP header
|
||||||
|
+ [utils] Introduce GeoRestrictedError for geo restricted videos
|
||||||
|
+ [utils] Introduce YoutubeDLError base class for all youtube-dl exceptions
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [ninecninemedia] Use geo bypass mechanism
|
||||||
|
* [spankbang] Make uploader optional (#12193)
|
||||||
|
+ [iprima] Improve geo restriction detection and disable geo bypass
|
||||||
|
* [iprima] Modernize
|
||||||
|
* [commonmistakes] Disable UnicodeBOM extractor test for python 3.2
|
||||||
|
+ [prosiebensat1] Throw ExtractionError on unsupported page type (#12180)
|
||||||
|
* [nrk] Update _API_HOST and relax _VALID_URL
|
||||||
|
+ [tv4] Bypass geo restriction and improve detection
|
||||||
|
* [tv4] Switch to hls3 protocol (#12177)
|
||||||
|
+ [viki] Improve geo restriction detection
|
||||||
|
+ [vgtv] Improve geo restriction detection
|
||||||
|
+ [srgssr] Improve geo restriction detection
|
||||||
|
+ [vbox7] Improve geo restriction detection and use geo bypass mechanism
|
||||||
|
+ [svt] Improve geo restriction detection and use geo bypass mechanism
|
||||||
|
+ [pbs] Improve geo restriction detection and use geo bypass mechanism
|
||||||
|
+ [ondemandkorea] Improve geo restriction detection and use geo bypass mechanism
|
||||||
|
+ [nrk] Improve geo restriction detection and use geo bypass mechanism
|
||||||
|
+ [itv] Improve geo restriction detection and use geo bypass mechanism
|
||||||
|
+ [go] Improve geo restriction detection and use geo bypass mechanism
|
||||||
|
+ [dramafever] Improve geo restriction detection and use geo bypass mechanism
|
||||||
|
* [brightcove:legacy] Restrict videoPlayer value (#12040)
|
||||||
|
+ [tvn24] Add support for tvn24.pl and tvn24bis.pl (#11679)
|
||||||
|
+ [thisav] Add support for HTML5 media (#11771)
|
||||||
|
* [metacafe] Bypass family filter (#10371)
|
||||||
|
* [viceland] Improve info extraction
|
||||||
|
|
||||||
|
|
||||||
|
version 2017.02.17
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [heise] Improve extraction (#9725)
|
||||||
|
* [ellentv] Improve (#11653)
|
||||||
|
* [openload] Fix extraction (#10408, #12002)
|
||||||
|
+ [theplatform] Recognize URLs with whitespaces (#12044)
|
||||||
|
* [einthusan] Relax URL regular expression (#12141, #12159)
|
||||||
|
+ [generic] Support complex JWPlayer embedded videos (#12030)
|
||||||
|
* [elpais] Improve extraction (#12139)
|
||||||
|
|
||||||
|
|
||||||
version 2017.02.16
|
version 2017.02.16
|
||||||
|
|
||||||
Core
|
Core
|
||||||
|
|||||||
197
README.md
197
README.md
@@ -99,11 +99,21 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
|||||||
--source-address IP Client-side IP address to bind to
|
--source-address IP Client-side IP address to bind to
|
||||||
-4, --force-ipv4 Make all connections via IPv4
|
-4, --force-ipv4 Make all connections via IPv4
|
||||||
-6, --force-ipv6 Make all connections via IPv6
|
-6, --force-ipv6 Make all connections via IPv6
|
||||||
|
|
||||||
|
## Geo Restriction:
|
||||||
--geo-verification-proxy URL Use this proxy to verify the IP address for
|
--geo-verification-proxy URL Use this proxy to verify the IP address for
|
||||||
some geo-restricted sites. The default
|
some geo-restricted sites. The default
|
||||||
proxy specified by --proxy (or none, if the
|
proxy specified by --proxy (or none, if the
|
||||||
options is not present) is used for the
|
options is not present) is used for the
|
||||||
actual downloading.
|
actual downloading.
|
||||||
|
--geo-bypass Bypass geographic restriction via faking
|
||||||
|
X-Forwarded-For HTTP header (experimental)
|
||||||
|
--no-geo-bypass Do not bypass geographic restriction via
|
||||||
|
faking X-Forwarded-For HTTP header
|
||||||
|
(experimental)
|
||||||
|
--geo-bypass-country CODE Force bypass geographic restriction with
|
||||||
|
explicitly provided two-letter ISO 3166-2
|
||||||
|
country code (experimental)
|
||||||
|
|
||||||
## Video Selection:
|
## Video Selection:
|
||||||
--playlist-start NUMBER Playlist video to start at (default is 1)
|
--playlist-start NUMBER Playlist video to start at (default is 1)
|
||||||
@@ -137,20 +147,22 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
|||||||
--match-filter FILTER Generic video filter. Specify any key (see
|
--match-filter FILTER Generic video filter. Specify any key (see
|
||||||
help for -o for a list of available keys)
|
help for -o for a list of available keys)
|
||||||
to match if the key is present, !key to
|
to match if the key is present, !key to
|
||||||
check if the key is not present,key >
|
check if the key is not present, key >
|
||||||
NUMBER (like "comment_count > 12", also
|
NUMBER (like "comment_count > 12", also
|
||||||
works with >=, <, <=, !=, =) to compare
|
works with >=, <, <=, !=, =) to compare
|
||||||
against a number, and & to require multiple
|
against a number, key = 'LITERAL' (like
|
||||||
matches. Values which are not known are
|
"uploader = 'Mike Smith'", also works with
|
||||||
excluded unless you put a question mark (?)
|
!=) to match against a string literal and &
|
||||||
after the operator.For example, to only
|
to require multiple matches. Values which
|
||||||
match videos that have been liked more than
|
are not known are excluded unless you put a
|
||||||
100 times and disliked less than 50 times
|
question mark (?) after the operator. For
|
||||||
(or the dislike functionality is not
|
example, to only match videos that have
|
||||||
available at the given service), but who
|
been liked more than 100 times and disliked
|
||||||
also have a description, use --match-filter
|
less than 50 times (or the dislike
|
||||||
"like_count > 100 & dislike_count <? 50 &
|
functionality is not available at the given
|
||||||
description" .
|
service), but who also have a description,
|
||||||
|
use --match-filter "like_count > 100 &
|
||||||
|
dislike_count <? 50 & description" .
|
||||||
--no-playlist Download only the video, if the URL refers
|
--no-playlist Download only the video, if the URL refers
|
||||||
to a video and a playlist.
|
to a video and a playlist.
|
||||||
--yes-playlist Download the playlist, if the URL refers to
|
--yes-playlist Download the playlist, if the URL refers to
|
||||||
@@ -169,10 +181,10 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
|||||||
-R, --retries RETRIES Number of retries (default is 10), or
|
-R, --retries RETRIES Number of retries (default is 10), or
|
||||||
"infinite".
|
"infinite".
|
||||||
--fragment-retries RETRIES Number of retries for a fragment (default
|
--fragment-retries RETRIES Number of retries for a fragment (default
|
||||||
is 10), or "infinite" (DASH and hlsnative
|
is 10), or "infinite" (DASH, hlsnative and
|
||||||
only)
|
ISM)
|
||||||
--skip-unavailable-fragments Skip unavailable fragments (DASH and
|
--skip-unavailable-fragments Skip unavailable fragments (DASH, hlsnative
|
||||||
hlsnative only)
|
and ISM)
|
||||||
--abort-on-unavailable-fragment Abort downloading when some fragment is not
|
--abort-on-unavailable-fragment Abort downloading when some fragment is not
|
||||||
available
|
available
|
||||||
--buffer-size SIZE Size of download buffer (e.g. 1024 or 16K)
|
--buffer-size SIZE Size of download buffer (e.g. 1024 or 16K)
|
||||||
@@ -205,21 +217,11 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
|||||||
--id Use only video ID in file name
|
--id Use only video ID in file name
|
||||||
-o, --output TEMPLATE Output filename template, see the "OUTPUT
|
-o, --output TEMPLATE Output filename template, see the "OUTPUT
|
||||||
TEMPLATE" for all the info
|
TEMPLATE" for all the info
|
||||||
--autonumber-size NUMBER Specify the number of digits in
|
|
||||||
%(autonumber)s when it is present in output
|
|
||||||
filename template or --auto-number option
|
|
||||||
is given (default is 5)
|
|
||||||
--autonumber-start NUMBER Specify the start value for %(autonumber)s
|
--autonumber-start NUMBER Specify the start value for %(autonumber)s
|
||||||
(default is 1)
|
(default is 1)
|
||||||
--restrict-filenames Restrict filenames to only ASCII
|
--restrict-filenames Restrict filenames to only ASCII
|
||||||
characters, and avoid "&" and spaces in
|
characters, and avoid "&" and spaces in
|
||||||
filenames
|
filenames
|
||||||
-A, --auto-number [deprecated; use -o
|
|
||||||
"%(autonumber)s-%(title)s.%(ext)s" ] Number
|
|
||||||
downloaded files starting from 00000
|
|
||||||
-t, --title [deprecated] Use title in file name
|
|
||||||
(default)
|
|
||||||
-l, --literal [deprecated] Alias of --title
|
|
||||||
-w, --no-overwrites Do not overwrite files
|
-w, --no-overwrites Do not overwrite files
|
||||||
-c, --continue Force resume of partially downloaded files.
|
-c, --continue Force resume of partially downloaded files.
|
||||||
By default, youtube-dl will resume
|
By default, youtube-dl will resume
|
||||||
@@ -373,8 +375,9 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
|||||||
(requires ffmpeg or avconv and ffprobe or
|
(requires ffmpeg or avconv and ffprobe or
|
||||||
avprobe)
|
avprobe)
|
||||||
--audio-format FORMAT Specify audio format: "best", "aac",
|
--audio-format FORMAT Specify audio format: "best", "aac",
|
||||||
"vorbis", "mp3", "m4a", "opus", or "wav";
|
"flac", "mp3", "m4a", "opus", "vorbis", or
|
||||||
"best" by default; No effect without -x
|
"wav"; "best" by default; No effect without
|
||||||
|
-x
|
||||||
--audio-quality QUALITY Specify ffmpeg/avconv audio quality, insert
|
--audio-quality QUALITY Specify ffmpeg/avconv audio quality, insert
|
||||||
a value between 0 (better) and 9 (worse)
|
a value between 0 (better) and 9 (worse)
|
||||||
for VBR or a specific bitrate like 128K
|
for VBR or a specific bitrate like 128K
|
||||||
@@ -474,87 +477,89 @@ The `-o` option allows users to indicate a template for the output file names.
|
|||||||
|
|
||||||
**tl;dr:** [navigate me to examples](#output-template-examples).
|
**tl;dr:** [navigate me to examples](#output-template-examples).
|
||||||
|
|
||||||
The basic usage is not to set any template arguments when downloading a single file, like in `youtube-dl -o funny_video.flv "http://some/video"`. However, it may contain special sequences that will be replaced when downloading each video. The special sequences have the format `%(NAME)s`. To clarify, that is a percent symbol followed by a name in parentheses, followed by a lowercase S. Allowed names are:
|
The basic usage is not to set any template arguments when downloading a single file, like in `youtube-dl -o funny_video.flv "http://some/video"`. However, it may contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [python string formatting operations](https://docs.python.org/2/library/stdtypes.html#string-formatting). For example, `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by a formatting operations. Allowed names along with sequence type are:
|
||||||
|
|
||||||
- `id`: Video identifier
|
- `id` (string): Video identifier
|
||||||
- `title`: Video title
|
- `title` (string): Video title
|
||||||
- `url`: Video URL
|
- `url` (string): Video URL
|
||||||
- `ext`: Video filename extension
|
- `ext` (string): Video filename extension
|
||||||
- `alt_title`: A secondary title of the video
|
- `alt_title` (string): A secondary title of the video
|
||||||
- `display_id`: An alternative identifier for the video
|
- `display_id` (string): An alternative identifier for the video
|
||||||
- `uploader`: Full name of the video uploader
|
- `uploader` (string): Full name of the video uploader
|
||||||
- `license`: License name the video is licensed under
|
- `license` (string): License name the video is licensed under
|
||||||
- `creator`: The creator of the video
|
- `creator` (string): The creator of the video
|
||||||
- `release_date`: The date (YYYYMMDD) when the video was released
|
- `release_date` (string): The date (YYYYMMDD) when the video was released
|
||||||
- `timestamp`: UNIX timestamp of the moment the video became available
|
- `timestamp` (numeric): UNIX timestamp of the moment the video became available
|
||||||
- `upload_date`: Video upload date (YYYYMMDD)
|
- `upload_date` (string): Video upload date (YYYYMMDD)
|
||||||
- `uploader_id`: Nickname or id of the video uploader
|
- `uploader_id` (string): Nickname or id of the video uploader
|
||||||
- `location`: Physical location where the video was filmed
|
- `location` (string): Physical location where the video was filmed
|
||||||
- `duration`: Length of the video in seconds
|
- `duration` (numeric): Length of the video in seconds
|
||||||
- `view_count`: How many users have watched the video on the platform
|
- `view_count` (numeric): How many users have watched the video on the platform
|
||||||
- `like_count`: Number of positive ratings of the video
|
- `like_count` (numeric): Number of positive ratings of the video
|
||||||
- `dislike_count`: Number of negative ratings of the video
|
- `dislike_count` (numeric): Number of negative ratings of the video
|
||||||
- `repost_count`: Number of reposts of the video
|
- `repost_count` (numeric): Number of reposts of the video
|
||||||
- `average_rating`: Average rating give by users, the scale used depends on the webpage
|
- `average_rating` (numeric): Average rating give by users, the scale used depends on the webpage
|
||||||
- `comment_count`: Number of comments on the video
|
- `comment_count` (numeric): Number of comments on the video
|
||||||
- `age_limit`: Age restriction for the video (years)
|
- `age_limit` (numeric): Age restriction for the video (years)
|
||||||
- `format`: A human-readable description of the format
|
- `format` (string): A human-readable description of the format
|
||||||
- `format_id`: Format code specified by `--format`
|
- `format_id` (string): Format code specified by `--format`
|
||||||
- `format_note`: Additional info about the format
|
- `format_note` (string): Additional info about the format
|
||||||
- `width`: Width of the video
|
- `width` (numeric): Width of the video
|
||||||
- `height`: Height of the video
|
- `height` (numeric): Height of the video
|
||||||
- `resolution`: Textual description of width and height
|
- `resolution` (string): Textual description of width and height
|
||||||
- `tbr`: Average bitrate of audio and video in KBit/s
|
- `tbr` (numeric): Average bitrate of audio and video in KBit/s
|
||||||
- `abr`: Average audio bitrate in KBit/s
|
- `abr` (numeric): Average audio bitrate in KBit/s
|
||||||
- `acodec`: Name of the audio codec in use
|
- `acodec` (string): Name of the audio codec in use
|
||||||
- `asr`: Audio sampling rate in Hertz
|
- `asr` (numeric): Audio sampling rate in Hertz
|
||||||
- `vbr`: Average video bitrate in KBit/s
|
- `vbr` (numeric): Average video bitrate in KBit/s
|
||||||
- `fps`: Frame rate
|
- `fps` (numeric): Frame rate
|
||||||
- `vcodec`: Name of the video codec in use
|
- `vcodec` (string): Name of the video codec in use
|
||||||
- `container`: Name of the container format
|
- `container` (string): Name of the container format
|
||||||
- `filesize`: The number of bytes, if known in advance
|
- `filesize` (numeric): The number of bytes, if known in advance
|
||||||
- `filesize_approx`: An estimate for the number of bytes
|
- `filesize_approx` (numeric): An estimate for the number of bytes
|
||||||
- `protocol`: The protocol that will be used for the actual download
|
- `protocol` (string): The protocol that will be used for the actual download
|
||||||
- `extractor`: Name of the extractor
|
- `extractor` (string): Name of the extractor
|
||||||
- `extractor_key`: Key name of the extractor
|
- `extractor_key` (string): Key name of the extractor
|
||||||
- `epoch`: Unix epoch when creating the file
|
- `epoch` (numeric): Unix epoch when creating the file
|
||||||
- `autonumber`: Five-digit number that will be increased with each download, starting at zero
|
- `autonumber` (numeric): Five-digit number that will be increased with each download, starting at zero
|
||||||
- `playlist`: Name or id of the playlist that contains the video
|
- `playlist` (string): Name or id of the playlist that contains the video
|
||||||
- `playlist_index`: Index of the video in the playlist padded with leading zeros according to the total length of the playlist
|
- `playlist_index` (numeric): Index of the video in the playlist padded with leading zeros according to the total length of the playlist
|
||||||
- `playlist_id`: Playlist identifier
|
- `playlist_id` (string): Playlist identifier
|
||||||
- `playlist_title`: Playlist title
|
- `playlist_title` (string): Playlist title
|
||||||
|
|
||||||
|
|
||||||
Available for the video that belongs to some logical chapter or section:
|
Available for the video that belongs to some logical chapter or section:
|
||||||
- `chapter`: Name or title of the chapter the video belongs to
|
- `chapter` (string): Name or title of the chapter the video belongs to
|
||||||
- `chapter_number`: Number of the chapter the video belongs to
|
- `chapter_number` (numeric): Number of the chapter the video belongs to
|
||||||
- `chapter_id`: Id of the chapter the video belongs to
|
- `chapter_id` (string): Id of the chapter the video belongs to
|
||||||
|
|
||||||
Available for the video that is an episode of some series or programme:
|
Available for the video that is an episode of some series or programme:
|
||||||
- `series`: Title of the series or programme the video episode belongs to
|
- `series` (string): Title of the series or programme the video episode belongs to
|
||||||
- `season`: Title of the season the video episode belongs to
|
- `season` (string): Title of the season the video episode belongs to
|
||||||
- `season_number`: Number of the season the video episode belongs to
|
- `season_number` (numeric): Number of the season the video episode belongs to
|
||||||
- `season_id`: Id of the season the video episode belongs to
|
- `season_id` (string): Id of the season the video episode belongs to
|
||||||
- `episode`: Title of the video episode
|
- `episode` (string): Title of the video episode
|
||||||
- `episode_number`: Number of the video episode within a season
|
- `episode_number` (numeric): Number of the video episode within a season
|
||||||
- `episode_id`: Id of the video episode
|
- `episode_id` (string): Id of the video episode
|
||||||
|
|
||||||
Available for the media that is a track or a part of a music album:
|
Available for the media that is a track or a part of a music album:
|
||||||
- `track`: Title of the track
|
- `track` (string): Title of the track
|
||||||
- `track_number`: Number of the track within an album or a disc
|
- `track_number` (numeric): Number of the track within an album or a disc
|
||||||
- `track_id`: Id of the track
|
- `track_id` (string): Id of the track
|
||||||
- `artist`: Artist(s) of the track
|
- `artist` (string): Artist(s) of the track
|
||||||
- `genre`: Genre(s) of the track
|
- `genre` (string): Genre(s) of the track
|
||||||
- `album`: Title of the album the track belongs to
|
- `album` (string): Title of the album the track belongs to
|
||||||
- `album_type`: Type of the album
|
- `album_type` (string): Type of the album
|
||||||
- `album_artist`: List of all artists appeared on the album
|
- `album_artist` (string): List of all artists appeared on the album
|
||||||
- `disc_number`: Number of the disc or other physical medium the track belongs to
|
- `disc_number` (numeric): Number of the disc or other physical medium the track belongs to
|
||||||
- `release_year`: Year (YYYY) when the album was released
|
- `release_year` (numeric): Year (YYYY) when the album was released
|
||||||
|
|
||||||
Each aforementioned sequence when referenced in an output template will be replaced by the actual value corresponding to the sequence name. Note that some of the sequences are not guaranteed to be present since they depend on the metadata obtained by a particular extractor. Such sequences will be replaced with `NA`.
|
Each aforementioned sequence when referenced in an output template will be replaced by the actual value corresponding to the sequence name. Note that some of the sequences are not guaranteed to be present since they depend on the metadata obtained by a particular extractor. Such sequences will be replaced with `NA`.
|
||||||
|
|
||||||
For example for `-o %(title)s-%(id)s.%(ext)s` and an mp4 video with title `youtube-dl test video` and id `BaW_jenozKcj`, this will result in a `youtube-dl test video-BaW_jenozKcj.mp4` file created in the current directory.
|
For example for `-o %(title)s-%(id)s.%(ext)s` and an mp4 video with title `youtube-dl test video` and id `BaW_jenozKcj`, this will result in a `youtube-dl test video-BaW_jenozKcj.mp4` file created in the current directory.
|
||||||
|
|
||||||
|
For numeric sequences you can use numeric related formatting, for example, `%(view_count)05d` will result in a string with view count padded with zeros up to 5 characters, like in `00042`.
|
||||||
|
|
||||||
Output templates can also contain arbitrary hierarchical path, e.g. `-o '%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s'` which will result in downloading each video in a directory corresponding to this path template. Any missing directory will be automatically created for you.
|
Output templates can also contain arbitrary hierarchical path, e.g. `-o '%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s'` which will result in downloading each video in a directory corresponding to this path template. Any missing directory will be automatically created for you.
|
||||||
|
|
||||||
To use percent literals in an output template use `%%`. To output to stdout use `-o -`.
|
To use percent literals in an output template use `%%`. To output to stdout use `-o -`.
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
from __future__ import unicode_literals, print_function
|
from __future__ import unicode_literals, print_function
|
||||||
|
|
||||||
from inspect import getsource
|
from inspect import getsource
|
||||||
|
import io
|
||||||
import os
|
import os
|
||||||
from os.path import dirname as dirn
|
from os.path import dirname as dirn
|
||||||
import sys
|
import sys
|
||||||
@@ -95,5 +96,5 @@ module_contents.append(
|
|||||||
|
|
||||||
module_src = '\n'.join(module_contents) + '\n'
|
module_src = '\n'.join(module_contents) + '\n'
|
||||||
|
|
||||||
with open(lazy_extractors_filename, 'wt') as f:
|
with io.open(lazy_extractors_filename, 'wt', encoding='utf-8') as f:
|
||||||
f.write(module_src)
|
f.write(module_src)
|
||||||
|
|||||||
@@ -1,8 +1,9 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
DOWNLOAD_TESTS="age_restriction|download|subtitles|write_annotations|iqiyi_sdk_interpreter"
|
DOWNLOAD_TESTS="age_restriction|download|subtitles|write_annotations|iqiyi_sdk_interpreter|youtube_lists"
|
||||||
|
|
||||||
test_set=""
|
test_set=""
|
||||||
|
multiprocess_args=""
|
||||||
|
|
||||||
case "$YTDL_TEST_SET" in
|
case "$YTDL_TEST_SET" in
|
||||||
core)
|
core)
|
||||||
@@ -10,10 +11,11 @@ case "$YTDL_TEST_SET" in
|
|||||||
;;
|
;;
|
||||||
download)
|
download)
|
||||||
test_set="-I test_(?!$DOWNLOAD_TESTS).+\.py"
|
test_set="-I test_(?!$DOWNLOAD_TESTS).+\.py"
|
||||||
|
multiprocess_args="--processes=4 --process-timeout=540"
|
||||||
;;
|
;;
|
||||||
*)
|
*)
|
||||||
break
|
break
|
||||||
;;
|
;;
|
||||||
esac
|
esac
|
||||||
|
|
||||||
nosetests test --verbose $test_set
|
nosetests test --verbose $test_set $multiprocess_args
|
||||||
|
|||||||
@@ -28,6 +28,7 @@
|
|||||||
- **acast**
|
- **acast**
|
||||||
- **acast:channel**
|
- **acast:channel**
|
||||||
- **AddAnime**
|
- **AddAnime**
|
||||||
|
- **ADN**: Anime Digital Network
|
||||||
- **AdobeTV**
|
- **AdobeTV**
|
||||||
- **AdobeTVChannel**
|
- **AdobeTVChannel**
|
||||||
- **AdobeTVShow**
|
- **AdobeTVShow**
|
||||||
@@ -67,6 +68,7 @@
|
|||||||
- **arte.tv:playlist**
|
- **arte.tv:playlist**
|
||||||
- **AtresPlayer**
|
- **AtresPlayer**
|
||||||
- **ATTTechChannel**
|
- **ATTTechChannel**
|
||||||
|
- **ATVAt**
|
||||||
- **AudiMedia**
|
- **AudiMedia**
|
||||||
- **AudioBoom**
|
- **AudioBoom**
|
||||||
- **audiomack**
|
- **audiomack**
|
||||||
@@ -78,6 +80,7 @@
|
|||||||
- **awaan:video**
|
- **awaan:video**
|
||||||
- **AZMedien**: AZ Medien videos
|
- **AZMedien**: AZ Medien videos
|
||||||
- **AZMedienPlaylist**: AZ Medien playlists
|
- **AZMedienPlaylist**: AZ Medien playlists
|
||||||
|
- **AZMedienShowPlaylist**: AZ Medien show playlists
|
||||||
- **Azubu**
|
- **Azubu**
|
||||||
- **AzubuLive**
|
- **AzubuLive**
|
||||||
- **BaiduVideo**: 百度视频
|
- **BaiduVideo**: 百度视频
|
||||||
@@ -107,6 +110,7 @@
|
|||||||
- **blinkx**
|
- **blinkx**
|
||||||
- **Bloomberg**
|
- **Bloomberg**
|
||||||
- **BokeCC**
|
- **BokeCC**
|
||||||
|
- **BostonGlobe**
|
||||||
- **Bpb**: Bundeszentrale für politische Bildung
|
- **Bpb**: Bundeszentrale für politische Bildung
|
||||||
- **BR**: Bayerischer Rundfunk Mediathek
|
- **BR**: Bayerischer Rundfunk Mediathek
|
||||||
- **BravoTV**
|
- **BravoTV**
|
||||||
@@ -123,7 +127,7 @@
|
|||||||
- **CamWithHer**
|
- **CamWithHer**
|
||||||
- **canalc2.tv**
|
- **canalc2.tv**
|
||||||
- **Canalplus**: canalplus.fr, piwiplus.fr and d8.tv
|
- **Canalplus**: canalplus.fr, piwiplus.fr and d8.tv
|
||||||
- **Canvas**
|
- **Canvas**: canvas.be and een.be
|
||||||
- **CarambaTV**
|
- **CarambaTV**
|
||||||
- **CarambaTVPage**
|
- **CarambaTVPage**
|
||||||
- **CartoonNetwork**
|
- **CartoonNetwork**
|
||||||
@@ -141,6 +145,7 @@
|
|||||||
- **CCTV**: 央视网
|
- **CCTV**: 央视网
|
||||||
- **CDA**
|
- **CDA**
|
||||||
- **CeskaTelevize**
|
- **CeskaTelevize**
|
||||||
|
- **CeskaTelevizePorady**
|
||||||
- **channel9**: Channel 9
|
- **channel9**: Channel 9
|
||||||
- **CharlieRose**
|
- **CharlieRose**
|
||||||
- **Chaturbate**
|
- **Chaturbate**
|
||||||
@@ -191,6 +196,8 @@
|
|||||||
- **dailymotion:playlist**
|
- **dailymotion:playlist**
|
||||||
- **dailymotion:user**
|
- **dailymotion:user**
|
||||||
- **DailymotionCloud**
|
- **DailymotionCloud**
|
||||||
|
- **Daisuki**
|
||||||
|
- **DaisukiPlaylist**
|
||||||
- **daum.net**
|
- **daum.net**
|
||||||
- **daum.net:clip**
|
- **daum.net:clip**
|
||||||
- **daum.net:playlist**
|
- **daum.net:playlist**
|
||||||
@@ -205,10 +212,14 @@
|
|||||||
- **Digiteka**
|
- **Digiteka**
|
||||||
- **Discovery**
|
- **Discovery**
|
||||||
- **DiscoveryGo**
|
- **DiscoveryGo**
|
||||||
|
- **DiscoveryGoPlaylist**
|
||||||
|
- **DiscoveryNetworksDe**
|
||||||
|
- **DiscoveryVR**
|
||||||
- **Disney**
|
- **Disney**
|
||||||
- **Dotsub**
|
- **Dotsub**
|
||||||
- **DouyuTV**: 斗鱼
|
- **DouyuTV**: 斗鱼
|
||||||
- **DPlay**
|
- **DPlay**
|
||||||
|
- **DPlayIt**
|
||||||
- **dramafever**
|
- **dramafever**
|
||||||
- **dramafever:series**
|
- **dramafever:series**
|
||||||
- **DRBonanza**
|
- **DRBonanza**
|
||||||
@@ -239,6 +250,7 @@
|
|||||||
- **ESPN**
|
- **ESPN**
|
||||||
- **ESPNArticle**
|
- **ESPNArticle**
|
||||||
- **EsriVideo**
|
- **EsriVideo**
|
||||||
|
- **ETOnline**
|
||||||
- **Europa**
|
- **Europa**
|
||||||
- **EveryonesMixtape**
|
- **EveryonesMixtape**
|
||||||
- **ExpoTV**
|
- **ExpoTV**
|
||||||
@@ -274,6 +286,7 @@
|
|||||||
- **francetvinfo.fr**
|
- **francetvinfo.fr**
|
||||||
- **Freesound**
|
- **Freesound**
|
||||||
- **freespeech.org**
|
- **freespeech.org**
|
||||||
|
- **FreshLive**
|
||||||
- **Funimation**
|
- **Funimation**
|
||||||
- **FunnyOrDie**
|
- **FunnyOrDie**
|
||||||
- **Fusion**
|
- **Fusion**
|
||||||
@@ -295,6 +308,7 @@
|
|||||||
- **Globo**
|
- **Globo**
|
||||||
- **GloboArticle**
|
- **GloboArticle**
|
||||||
- **Go**
|
- **Go**
|
||||||
|
- **Go90**
|
||||||
- **GodTube**
|
- **GodTube**
|
||||||
- **GodTV**
|
- **GodTV**
|
||||||
- **Golem**
|
- **Golem**
|
||||||
@@ -303,13 +317,14 @@
|
|||||||
- **GPUTechConf**
|
- **GPUTechConf**
|
||||||
- **Groupon**
|
- **Groupon**
|
||||||
- **Hark**
|
- **Hark**
|
||||||
- **HBO**
|
- **hbo**
|
||||||
- **HBOEpisode**
|
- **hbo:episode**
|
||||||
- **HearThisAt**
|
- **HearThisAt**
|
||||||
- **Heise**
|
- **Heise**
|
||||||
- **HellPorno**
|
- **HellPorno**
|
||||||
- **Helsinki**: helsinki.fi
|
- **Helsinki**: helsinki.fi
|
||||||
- **HentaiStigma**
|
- **HentaiStigma**
|
||||||
|
- **hetklokhuis**
|
||||||
- **hgtv.com:show**
|
- **hgtv.com:show**
|
||||||
- **HistoricFilms**
|
- **HistoricFilms**
|
||||||
- **history:topic**: History.com Topic
|
- **history:topic**: History.com Topic
|
||||||
@@ -417,6 +432,8 @@
|
|||||||
- **MatchTV**
|
- **MatchTV**
|
||||||
- **MDR**: MDR.DE and KiKA
|
- **MDR**: MDR.DE and KiKA
|
||||||
- **media.ccc.de**
|
- **media.ccc.de**
|
||||||
|
- **Medialaan**
|
||||||
|
- **Medici**
|
||||||
- **Meipai**: 美拍
|
- **Meipai**: 美拍
|
||||||
- **MelonVOD**
|
- **MelonVOD**
|
||||||
- **META**
|
- **META**
|
||||||
@@ -511,6 +528,7 @@
|
|||||||
- **Nintendo**
|
- **Nintendo**
|
||||||
- **njoy**: N-JOY
|
- **njoy**: N-JOY
|
||||||
- **njoy:embed**
|
- **njoy:embed**
|
||||||
|
- **NJPWWorld**: 新日本プロレスワールド
|
||||||
- **NobelPrize**
|
- **NobelPrize**
|
||||||
- **Noco**
|
- **Noco**
|
||||||
- **Normalboots**
|
- **Normalboots**
|
||||||
@@ -559,6 +577,8 @@
|
|||||||
- **orf:iptv**: iptv.ORF.at
|
- **orf:iptv**: iptv.ORF.at
|
||||||
- **orf:oe1**: Radio Österreich 1
|
- **orf:oe1**: Radio Österreich 1
|
||||||
- **orf:tvthek**: ORF TVthek
|
- **orf:tvthek**: ORF TVthek
|
||||||
|
- **PacktPub**
|
||||||
|
- **PacktPubCourse**
|
||||||
- **PandaTV**: 熊猫TV
|
- **PandaTV**: 熊猫TV
|
||||||
- **pandora.tv**: 판도라TV
|
- **pandora.tv**: 판도라TV
|
||||||
- **parliamentlive.tv**: UK parliament videos
|
- **parliamentlive.tv**: UK parliament videos
|
||||||
@@ -616,9 +636,10 @@
|
|||||||
- **radiofrance**
|
- **radiofrance**
|
||||||
- **RadioJavan**
|
- **RadioJavan**
|
||||||
- **Rai**
|
- **Rai**
|
||||||
- **RaiTV**
|
- **RaiPlay**
|
||||||
- **RBMARadio**
|
- **RBMARadio**
|
||||||
- **RDS**: RDS.ca
|
- **RDS**: RDS.ca
|
||||||
|
- **RedBullTV**
|
||||||
- **RedTube**
|
- **RedTube**
|
||||||
- **RegioTV**
|
- **RegioTV**
|
||||||
- **RENTV**
|
- **RENTV**
|
||||||
@@ -640,7 +661,9 @@
|
|||||||
- **rte**: Raidió Teilifís Éireann TV
|
- **rte**: Raidió Teilifís Éireann TV
|
||||||
- **rte:radio**: Raidió Teilifís Éireann radio
|
- **rte:radio**: Raidió Teilifís Éireann radio
|
||||||
- **rtl.nl**: rtl.nl and rtlxl.nl
|
- **rtl.nl**: rtl.nl and rtlxl.nl
|
||||||
- **RTL2**
|
- **rtl2**
|
||||||
|
- **rtl2:you**
|
||||||
|
- **rtl2:you:series**
|
||||||
- **RTP**
|
- **RTP**
|
||||||
- **RTS**: RTS.ch
|
- **RTS**: RTS.ch
|
||||||
- **rtve.es:alacarta**: RTVE a la carta
|
- **rtve.es:alacarta**: RTVE a la carta
|
||||||
@@ -666,7 +689,6 @@
|
|||||||
- **savefrom.net**
|
- **savefrom.net**
|
||||||
- **SBS**: sbs.com.au
|
- **SBS**: sbs.com.au
|
||||||
- **schooltv**
|
- **schooltv**
|
||||||
- **SciVee**
|
|
||||||
- **screen.yahoo:search**: Yahoo screen search
|
- **screen.yahoo:search**: Yahoo screen search
|
||||||
- **Screencast**
|
- **Screencast**
|
||||||
- **ScreencastOMatic**
|
- **ScreencastOMatic**
|
||||||
@@ -680,6 +702,7 @@
|
|||||||
- **Shared**: shared.sx
|
- **Shared**: shared.sx
|
||||||
- **ShowRoomLive**
|
- **ShowRoomLive**
|
||||||
- **Sina**
|
- **Sina**
|
||||||
|
- **SkylineWebcams**
|
||||||
- **skynewsarabia:article**
|
- **skynewsarabia:article**
|
||||||
- **skynewsarabia:video**
|
- **skynewsarabia:video**
|
||||||
- **SkySports**
|
- **SkySports**
|
||||||
@@ -762,17 +785,18 @@
|
|||||||
- **TheScene**
|
- **TheScene**
|
||||||
- **TheSixtyOne**
|
- **TheSixtyOne**
|
||||||
- **TheStar**
|
- **TheStar**
|
||||||
|
- **TheSun**
|
||||||
- **TheWeatherChannel**
|
- **TheWeatherChannel**
|
||||||
- **ThisAmericanLife**
|
- **ThisAmericanLife**
|
||||||
- **ThisAV**
|
- **ThisAV**
|
||||||
- **ThisOldHouse**
|
- **ThisOldHouse**
|
||||||
- **tinypic**: tinypic.com videos
|
- **tinypic**: tinypic.com videos
|
||||||
- **tlc.de**
|
|
||||||
- **TMZ**
|
- **TMZ**
|
||||||
- **TMZArticle**
|
- **TMZArticle**
|
||||||
- **TNAFlix**
|
- **TNAFlix**
|
||||||
- **TNAFlixNetworkEmbed**
|
- **TNAFlixNetworkEmbed**
|
||||||
- **toggle**
|
- **toggle**
|
||||||
|
- **ToonGoggles**
|
||||||
- **Tosh**: Tosh.0
|
- **Tosh**: Tosh.0
|
||||||
- **tou.tv**
|
- **tou.tv**
|
||||||
- **Toypics**: Toypics user profile
|
- **Toypics**: Toypics user profile
|
||||||
@@ -790,13 +814,16 @@
|
|||||||
- **tunein:program**
|
- **tunein:program**
|
||||||
- **tunein:station**
|
- **tunein:station**
|
||||||
- **tunein:topic**
|
- **tunein:topic**
|
||||||
|
- **TunePk**
|
||||||
- **Turbo**
|
- **Turbo**
|
||||||
- **Tutv**
|
- **Tutv**
|
||||||
- **tv.dfb.de**
|
- **tv.dfb.de**
|
||||||
- **TV2**
|
- **TV2**
|
||||||
|
- **tv2.hu**
|
||||||
- **TV2Article**
|
- **TV2Article**
|
||||||
- **TV3**
|
- **TV3**
|
||||||
- **TV4**: tv4.se and tv4play.se
|
- **TV4**: tv4.se and tv4play.se
|
||||||
|
- **TV5MondePlus**: TV5MONDE+
|
||||||
- **TVA**
|
- **TVA**
|
||||||
- **TVANouvelles**
|
- **TVANouvelles**
|
||||||
- **TVANouvellesArticle**
|
- **TVANouvellesArticle**
|
||||||
@@ -804,6 +831,7 @@
|
|||||||
- **TVCArticle**
|
- **TVCArticle**
|
||||||
- **tvigle**: Интернет-телевидение Tvigle.ru
|
- **tvigle**: Интернет-телевидение Tvigle.ru
|
||||||
- **tvland.com**
|
- **tvland.com**
|
||||||
|
- **TVN24**
|
||||||
- **TVNoe**
|
- **TVNoe**
|
||||||
- **tvp**: Telewizja Polska
|
- **tvp**: Telewizja Polska
|
||||||
- **tvp:embed**: Telewizja Polska
|
- **tvp:embed**: Telewizja Polska
|
||||||
@@ -872,7 +900,7 @@
|
|||||||
- **vidme:user**
|
- **vidme:user**
|
||||||
- **vidme:user:likes**
|
- **vidme:user:likes**
|
||||||
- **Vidzi**
|
- **Vidzi**
|
||||||
- **vier**
|
- **vier**: vier.be and vijf.be
|
||||||
- **vier:videos**
|
- **vier:videos**
|
||||||
- **ViewLift**
|
- **ViewLift**
|
||||||
- **ViewLiftEmbed**
|
- **ViewLiftEmbed**
|
||||||
@@ -908,7 +936,11 @@
|
|||||||
- **VoxMedia**
|
- **VoxMedia**
|
||||||
- **Vporn**
|
- **Vporn**
|
||||||
- **vpro**: npo.nl and ntr.nl
|
- **vpro**: npo.nl and ntr.nl
|
||||||
- **VRT**
|
- **Vrak**
|
||||||
|
- **VRT**: deredactie.be, sporza.be, cobra.be and cobra.canvas.be
|
||||||
|
- **vrv**
|
||||||
|
- **vrv:series**
|
||||||
|
- **VShare**
|
||||||
- **vube**: Vube.com
|
- **vube**: Vube.com
|
||||||
- **VuClip**
|
- **VuClip**
|
||||||
- **VVVVID**
|
- **VVVVID**
|
||||||
@@ -936,7 +968,7 @@
|
|||||||
- **WSJ**: Wall Street Journal
|
- **WSJ**: Wall Street Journal
|
||||||
- **XBef**
|
- **XBef**
|
||||||
- **XboxClips**
|
- **XboxClips**
|
||||||
- **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE
|
- **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE, Vid ABC, VidBom, vidlo
|
||||||
- **XHamster**
|
- **XHamster**
|
||||||
- **XHamsterEmbed**
|
- **XHamsterEmbed**
|
||||||
- **xiami:album**: 虾米音乐 - 专辑
|
- **xiami:album**: 虾米音乐 - 专辑
|
||||||
|
|||||||
5
setup.py
5
setup.py
@@ -107,8 +107,8 @@ setup(
|
|||||||
url='https://github.com/rg3/youtube-dl',
|
url='https://github.com/rg3/youtube-dl',
|
||||||
author='Ricardo Garcia',
|
author='Ricardo Garcia',
|
||||||
author_email='ytdl@yt-dl.org',
|
author_email='ytdl@yt-dl.org',
|
||||||
maintainer='Philipp Hagemeister',
|
maintainer='Sergey M.',
|
||||||
maintainer_email='phihag@phihag.de',
|
maintainer_email='dstftw@gmail.com',
|
||||||
packages=[
|
packages=[
|
||||||
'youtube_dl',
|
'youtube_dl',
|
||||||
'youtube_dl.extractor', 'youtube_dl.downloader',
|
'youtube_dl.extractor', 'youtube_dl.downloader',
|
||||||
@@ -130,6 +130,7 @@ setup(
|
|||||||
'Programming Language :: Python :: 3.3',
|
'Programming Language :: Python :: 3.3',
|
||||||
'Programming Language :: Python :: 3.4',
|
'Programming Language :: Python :: 3.4',
|
||||||
'Programming Language :: Python :: 3.5',
|
'Programming Language :: Python :: 3.5',
|
||||||
|
'Programming Language :: Python :: 3.6',
|
||||||
],
|
],
|
||||||
|
|
||||||
cmdclass={'build_lazy_extractors': build_lazy_extractors},
|
cmdclass={'build_lazy_extractors': build_lazy_extractors},
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ import sys
|
|||||||
import unittest
|
import unittest
|
||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
from test.helper import FakeYDL
|
from test.helper import FakeYDL, expect_dict
|
||||||
from youtube_dl.extractor.common import InfoExtractor
|
from youtube_dl.extractor.common import InfoExtractor
|
||||||
from youtube_dl.extractor import YoutubeIE, get_info_extractor
|
from youtube_dl.extractor import YoutubeIE, get_info_extractor
|
||||||
from youtube_dl.utils import encode_data_uri, strip_jsonp, ExtractorError, RegexNotFoundError
|
from youtube_dl.utils import encode_data_uri, strip_jsonp, ExtractorError, RegexNotFoundError
|
||||||
@@ -84,6 +84,97 @@ class TestInfoExtractor(unittest.TestCase):
|
|||||||
self.assertRaises(ExtractorError, self.ie._download_json, uri, None)
|
self.assertRaises(ExtractorError, self.ie._download_json, uri, None)
|
||||||
self.assertEqual(self.ie._download_json(uri, None, fatal=False), None)
|
self.assertEqual(self.ie._download_json(uri, None, fatal=False), None)
|
||||||
|
|
||||||
|
def test_extract_jwplayer_data_realworld(self):
|
||||||
|
# from http://www.suffolk.edu/sjc/
|
||||||
|
expect_dict(
|
||||||
|
self,
|
||||||
|
self.ie._extract_jwplayer_data(r'''
|
||||||
|
<script type='text/javascript'>
|
||||||
|
jwplayer('my-video').setup({
|
||||||
|
file: 'rtmp://192.138.214.154/live/sjclive',
|
||||||
|
fallback: 'true',
|
||||||
|
width: '95%',
|
||||||
|
aspectratio: '16:9',
|
||||||
|
primary: 'flash',
|
||||||
|
mediaid:'XEgvuql4'
|
||||||
|
});
|
||||||
|
</script>
|
||||||
|
''', None, require_title=False),
|
||||||
|
{
|
||||||
|
'id': 'XEgvuql4',
|
||||||
|
'formats': [{
|
||||||
|
'url': 'rtmp://192.138.214.154/live/sjclive',
|
||||||
|
'ext': 'flv'
|
||||||
|
}]
|
||||||
|
})
|
||||||
|
|
||||||
|
# from https://www.pornoxo.com/videos/7564/striptease-from-sexy-secretary/
|
||||||
|
expect_dict(
|
||||||
|
self,
|
||||||
|
self.ie._extract_jwplayer_data(r'''
|
||||||
|
<script type="text/javascript">
|
||||||
|
jwplayer("mediaplayer").setup({
|
||||||
|
'videoid': "7564",
|
||||||
|
'width': "100%",
|
||||||
|
'aspectratio': "16:9",
|
||||||
|
'stretching': "exactfit",
|
||||||
|
'autostart': 'false',
|
||||||
|
'flashplayer': "https://t04.vipstreamservice.com/jwplayer/v5.10/player.swf",
|
||||||
|
'file': "https://cdn.pornoxo.com/key=MF+oEbaxqTKb50P-w9G3nA,end=1489689259,ip=104.199.146.27/ip=104.199.146.27/speed=6573765/buffer=3.0/2009-12/4b2157147afe5efa93ce1978e0265289c193874e02597.flv",
|
||||||
|
'image': "https://t03.vipstreamservice.com/thumbs/pxo-full/2009-12/14/a4b2157147afe5efa93ce1978e0265289c193874e02597.flv-full-13.jpg",
|
||||||
|
'filefallback': "https://cdn.pornoxo.com/key=9ZPsTR5EvPLQrBaak2MUGA,end=1489689259,ip=104.199.146.27/ip=104.199.146.27/speed=6573765/buffer=3.0/2009-12/m_4b2157147afe5efa93ce1978e0265289c193874e02597.mp4",
|
||||||
|
'logo.hide': true,
|
||||||
|
'skin': "https://t04.vipstreamservice.com/jwplayer/skin/modieus-blk.zip",
|
||||||
|
'plugins': "https://t04.vipstreamservice.com/jwplayer/dock/dockableskinnableplugin.swf",
|
||||||
|
'dockableskinnableplugin.piclink': "/index.php?key=ajax-videothumbsn&vid=7564&data=2009-12--14--4b2157147afe5efa93ce1978e0265289c193874e02597.flv--17370",
|
||||||
|
'controlbar': 'bottom',
|
||||||
|
'modes': [
|
||||||
|
{type: 'flash', src: 'https://t04.vipstreamservice.com/jwplayer/v5.10/player.swf'}
|
||||||
|
],
|
||||||
|
'provider': 'http'
|
||||||
|
});
|
||||||
|
//noinspection JSAnnotator
|
||||||
|
invideo.setup({
|
||||||
|
adsUrl: "/banner-iframe/?zoneId=32",
|
||||||
|
adsUrl2: "",
|
||||||
|
autostart: false
|
||||||
|
});
|
||||||
|
</script>
|
||||||
|
''', 'dummy', require_title=False),
|
||||||
|
{
|
||||||
|
'thumbnail': 'https://t03.vipstreamservice.com/thumbs/pxo-full/2009-12/14/a4b2157147afe5efa93ce1978e0265289c193874e02597.flv-full-13.jpg',
|
||||||
|
'formats': [{
|
||||||
|
'url': 'https://cdn.pornoxo.com/key=MF+oEbaxqTKb50P-w9G3nA,end=1489689259,ip=104.199.146.27/ip=104.199.146.27/speed=6573765/buffer=3.0/2009-12/4b2157147afe5efa93ce1978e0265289c193874e02597.flv',
|
||||||
|
'ext': 'flv'
|
||||||
|
}]
|
||||||
|
})
|
||||||
|
|
||||||
|
# from http://www.indiedb.com/games/king-machine/videos
|
||||||
|
expect_dict(
|
||||||
|
self,
|
||||||
|
self.ie._extract_jwplayer_data(r'''
|
||||||
|
<script>
|
||||||
|
jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/\/www.indiedb.com\/","displaytitle":false,"autostart":false,"repeat":false,"title":"king machine trailer 1","sharing":{"link":"http:\/\/www.indiedb.com\/games\/king-machine\/videos\/king-machine-trailer-1","code":"<iframe width=\"560\" height=\"315\" src=\"http:\/\/www.indiedb.com\/media\/iframe\/1522983\" frameborder=\"0\" allowfullscreen><\/iframe><br><a href=\"http:\/\/www.indiedb.com\/games\/king-machine\/videos\/king-machine-trailer-1\">king machine trailer 1 - Indie DB<\/a>"},"related":{"file":"http:\/\/rss.indiedb.com\/media\/recommended\/1522983\/feed\/rss.xml","dimensions":"160x120","onclick":"link"},"sources":[{"file":"http:\/\/cdn.dbolical.com\/cache\/videos\/games\/1\/50\/49678\/encode_mp4\/king-machine-trailer.mp4","label":"360p SD","default":"true"},{"file":"http:\/\/cdn.dbolical.com\/cache\/videos\/games\/1\/50\/49678\/encode720p_mp4\/king-machine-trailer.mp4","label":"720p HD"}],"image":"http:\/\/media.indiedb.com\/cache\/images\/games\/1\/50\/49678\/thumb_620x2000\/king-machine-trailer.mp4.jpg","advertising":{"client":"vast","tag":"http:\/\/ads.intergi.com\/adrawdata\/3.0\/5205\/4251742\/0\/1013\/ADTECH;cors=yes;width=560;height=315;referring_url=http:\/\/www.indiedb.com\/games\/king-machine\/videos\/king-machine-trailer-1;content_url=http:\/\/www.indiedb.com\/games\/king-machine\/videos\/king-machine-trailer-1;media_id=1522983;title=king+machine+trailer+1;device=__DEVICE__;model=__MODEL__;os=Windows+OS;osversion=__OSVERSION__;ua=__UA__;ip=109.171.17.81;uniqueid=1522983;tags=__TAGS__;number=58cac25928151;time=1489683033"},"width":620,"height":349}).once("play", function(event) {
|
||||||
|
videoAnalytics("play");
|
||||||
|
}).once("complete", function(event) {
|
||||||
|
videoAnalytics("completed");
|
||||||
|
});
|
||||||
|
</script>
|
||||||
|
''', 'dummy'),
|
||||||
|
{
|
||||||
|
'title': 'king machine trailer 1',
|
||||||
|
'thumbnail': 'http://media.indiedb.com/cache/images/games/1/50/49678/thumb_620x2000/king-machine-trailer.mp4.jpg',
|
||||||
|
'formats': [{
|
||||||
|
'url': 'http://cdn.dbolical.com/cache/videos/games/1/50/49678/encode_mp4/king-machine-trailer.mp4',
|
||||||
|
'height': 360,
|
||||||
|
'ext': 'mp4'
|
||||||
|
}, {
|
||||||
|
'url': 'http://cdn.dbolical.com/cache/videos/games/1/50/49678/encode720p_mp4/king-machine-trailer.mp4',
|
||||||
|
'height': 720,
|
||||||
|
'ext': 'mp4'
|
||||||
|
}]
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|||||||
@@ -526,6 +526,7 @@ class TestYoutubeDL(unittest.TestCase):
|
|||||||
'id': '1234',
|
'id': '1234',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'width': None,
|
'width': None,
|
||||||
|
'height': 1080,
|
||||||
}
|
}
|
||||||
|
|
||||||
def fname(templ):
|
def fname(templ):
|
||||||
@@ -535,6 +536,19 @@ class TestYoutubeDL(unittest.TestCase):
|
|||||||
self.assertEqual(fname('%(id)s-%(width)s.%(ext)s'), '1234-NA.mp4')
|
self.assertEqual(fname('%(id)s-%(width)s.%(ext)s'), '1234-NA.mp4')
|
||||||
# Replace missing fields with 'NA'
|
# Replace missing fields with 'NA'
|
||||||
self.assertEqual(fname('%(uploader_date)s-%(id)s.%(ext)s'), 'NA-1234.mp4')
|
self.assertEqual(fname('%(uploader_date)s-%(id)s.%(ext)s'), 'NA-1234.mp4')
|
||||||
|
self.assertEqual(fname('%(height)d.%(ext)s'), '1080.mp4')
|
||||||
|
self.assertEqual(fname('%(height)6d.%(ext)s'), ' 1080.mp4')
|
||||||
|
self.assertEqual(fname('%(height)-6d.%(ext)s'), '1080 .mp4')
|
||||||
|
self.assertEqual(fname('%(height)06d.%(ext)s'), '001080.mp4')
|
||||||
|
self.assertEqual(fname('%(height) 06d.%(ext)s'), ' 01080.mp4')
|
||||||
|
self.assertEqual(fname('%(height) 06d.%(ext)s'), ' 01080.mp4')
|
||||||
|
self.assertEqual(fname('%(height)0 6d.%(ext)s'), ' 01080.mp4')
|
||||||
|
self.assertEqual(fname('%(height)0 6d.%(ext)s'), ' 01080.mp4')
|
||||||
|
self.assertEqual(fname('%(height) 0 6d.%(ext)s'), ' 01080.mp4')
|
||||||
|
self.assertEqual(fname('%%(height)06d.%(ext)s'), '%(height)06d.mp4')
|
||||||
|
self.assertEqual(fname('%(width)06d.%(ext)s'), 'NA.mp4')
|
||||||
|
self.assertEqual(fname('%(width)06d.%%(ext)s'), 'NA.%(ext)s')
|
||||||
|
self.assertEqual(fname('%%(width)06d.%(ext)s'), '%(width)06d.mp4')
|
||||||
|
|
||||||
def test_format_note(self):
|
def test_format_note(self):
|
||||||
ydl = YoutubeDL()
|
ydl = YoutubeDL()
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ import sys
|
|||||||
import unittest
|
import unittest
|
||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
from youtube_dl.aes import aes_decrypt, aes_encrypt, aes_cbc_decrypt, aes_decrypt_text
|
from youtube_dl.aes import aes_decrypt, aes_encrypt, aes_cbc_decrypt, aes_cbc_encrypt, aes_decrypt_text
|
||||||
from youtube_dl.utils import bytes_to_intlist, intlist_to_bytes
|
from youtube_dl.utils import bytes_to_intlist, intlist_to_bytes
|
||||||
import base64
|
import base64
|
||||||
|
|
||||||
@@ -34,6 +34,13 @@ class TestAES(unittest.TestCase):
|
|||||||
decrypted = intlist_to_bytes(aes_cbc_decrypt(data, self.key, self.iv))
|
decrypted = intlist_to_bytes(aes_cbc_decrypt(data, self.key, self.iv))
|
||||||
self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg)
|
self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg)
|
||||||
|
|
||||||
|
def test_cbc_encrypt(self):
|
||||||
|
data = bytes_to_intlist(self.secret_msg)
|
||||||
|
encrypted = intlist_to_bytes(aes_cbc_encrypt(data, self.key, self.iv))
|
||||||
|
self.assertEqual(
|
||||||
|
encrypted,
|
||||||
|
b"\x97\x92+\xe5\x0b\xc3\x18\x91ky9m&\xb3\xb5@\xe6'\xc2\x96.\xc8u\x88\xab9-[\x9e|\xf1\xcd")
|
||||||
|
|
||||||
def test_decrypt_text(self):
|
def test_decrypt_text(self):
|
||||||
password = intlist_to_bytes(self.key).decode('utf-8')
|
password = intlist_to_bytes(self.key).decode('utf-8')
|
||||||
encrypted = base64.b64encode(
|
encrypted = base64.b64encode(
|
||||||
|
|||||||
@@ -27,11 +27,11 @@ from youtube_dl.compat import (
|
|||||||
class TestCompat(unittest.TestCase):
|
class TestCompat(unittest.TestCase):
|
||||||
def test_compat_getenv(self):
|
def test_compat_getenv(self):
|
||||||
test_str = 'тест'
|
test_str = 'тест'
|
||||||
compat_setenv('YOUTUBE-DL-TEST', test_str)
|
compat_setenv('YOUTUBE_DL_COMPAT_GETENV', test_str)
|
||||||
self.assertEqual(compat_getenv('YOUTUBE-DL-TEST'), test_str)
|
self.assertEqual(compat_getenv('YOUTUBE_DL_COMPAT_GETENV'), test_str)
|
||||||
|
|
||||||
def test_compat_setenv(self):
|
def test_compat_setenv(self):
|
||||||
test_var = 'YOUTUBE-DL-TEST'
|
test_var = 'YOUTUBE_DL_COMPAT_SETENV'
|
||||||
test_str = 'тест'
|
test_str = 'тест'
|
||||||
compat_setenv(test_var, test_str)
|
compat_setenv(test_var, test_str)
|
||||||
compat_getenv(test_var)
|
compat_getenv(test_var)
|
||||||
|
|||||||
@@ -65,15 +65,31 @@ defs = gettestcases()
|
|||||||
|
|
||||||
|
|
||||||
class TestDownload(unittest.TestCase):
|
class TestDownload(unittest.TestCase):
|
||||||
|
# Parallel testing in nosetests. See
|
||||||
|
# http://nose.readthedocs.org/en/latest/doc_tests/test_multiprocess/multiprocess.html
|
||||||
|
_multiprocess_shared_ = True
|
||||||
|
|
||||||
maxDiff = None
|
maxDiff = None
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
"""Identify each test with the `add_ie` attribute, if available."""
|
||||||
|
|
||||||
|
def strclass(cls):
|
||||||
|
"""From 2.7's unittest; 2.6 had _strclass so we can't import it."""
|
||||||
|
return '%s.%s' % (cls.__module__, cls.__name__)
|
||||||
|
|
||||||
|
add_ie = getattr(self, self._testMethodName).add_ie
|
||||||
|
return '%s (%s)%s:' % (self._testMethodName,
|
||||||
|
strclass(self.__class__),
|
||||||
|
' [%s]' % add_ie if add_ie else '')
|
||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
self.defs = defs
|
self.defs = defs
|
||||||
|
|
||||||
# Dynamically generate tests
|
# Dynamically generate tests
|
||||||
|
|
||||||
|
|
||||||
def generator(test_case):
|
def generator(test_case, tname):
|
||||||
|
|
||||||
def test_template(self):
|
def test_template(self):
|
||||||
ie = youtube_dl.extractor.get_info_extractor(test_case['name'])
|
ie = youtube_dl.extractor.get_info_extractor(test_case['name'])
|
||||||
@@ -102,6 +118,7 @@ def generator(test_case):
|
|||||||
return
|
return
|
||||||
|
|
||||||
params = get_params(test_case.get('params', {}))
|
params = get_params(test_case.get('params', {}))
|
||||||
|
params['outtmpl'] = tname + '_' + params['outtmpl']
|
||||||
if is_playlist and 'playlist' not in test_case:
|
if is_playlist and 'playlist' not in test_case:
|
||||||
params.setdefault('extract_flat', 'in_playlist')
|
params.setdefault('extract_flat', 'in_playlist')
|
||||||
params.setdefault('skip_download', True)
|
params.setdefault('skip_download', True)
|
||||||
@@ -134,7 +151,7 @@ def generator(test_case):
|
|||||||
try_num = 1
|
try_num = 1
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
# We're not using .download here sine that is just a shim
|
# We're not using .download here since that is just a shim
|
||||||
# for outside error handling, and returns the exit code
|
# for outside error handling, and returns the exit code
|
||||||
# instead of the result dict.
|
# instead of the result dict.
|
||||||
res_dict = ydl.extract_info(
|
res_dict = ydl.extract_info(
|
||||||
@@ -146,7 +163,7 @@ def generator(test_case):
|
|||||||
raise
|
raise
|
||||||
|
|
||||||
if try_num == RETRIES:
|
if try_num == RETRIES:
|
||||||
report_warning('Failed due to network errors, skipping...')
|
report_warning('%s failed due to network errors, skipping...' % tname)
|
||||||
return
|
return
|
||||||
|
|
||||||
print('Retrying: {0} failed tries\n\n##########\n\n'.format(try_num))
|
print('Retrying: {0} failed tries\n\n##########\n\n'.format(try_num))
|
||||||
@@ -182,7 +199,16 @@ def generator(test_case):
|
|||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
test_case['playlist_duration_sum'], got_duration)
|
test_case['playlist_duration_sum'], got_duration)
|
||||||
|
|
||||||
for tc in test_cases:
|
# Generalize both playlists and single videos to unified format for
|
||||||
|
# simplicity
|
||||||
|
if 'entries' not in res_dict:
|
||||||
|
res_dict['entries'] = [res_dict]
|
||||||
|
|
||||||
|
for tc_num, tc in enumerate(test_cases):
|
||||||
|
tc_res_dict = res_dict['entries'][tc_num]
|
||||||
|
# First, check test cases' data against extracted data alone
|
||||||
|
expect_info_dict(self, tc_res_dict, tc.get('info_dict', {}))
|
||||||
|
# Now, check downloaded file consistency
|
||||||
tc_filename = get_tc_filename(tc)
|
tc_filename = get_tc_filename(tc)
|
||||||
if not test_case.get('params', {}).get('skip_download', False):
|
if not test_case.get('params', {}).get('skip_download', False):
|
||||||
self.assertTrue(os.path.exists(tc_filename), msg='Missing file ' + tc_filename)
|
self.assertTrue(os.path.exists(tc_filename), msg='Missing file ' + tc_filename)
|
||||||
@@ -200,13 +226,14 @@ def generator(test_case):
|
|||||||
if 'md5' in tc:
|
if 'md5' in tc:
|
||||||
md5_for_file = _file_md5(tc_filename)
|
md5_for_file = _file_md5(tc_filename)
|
||||||
self.assertEqual(md5_for_file, tc['md5'])
|
self.assertEqual(md5_for_file, tc['md5'])
|
||||||
|
# Finally, check test cases' data again but this time against
|
||||||
|
# extracted data from info JSON file written during processing
|
||||||
info_json_fn = os.path.splitext(tc_filename)[0] + '.info.json'
|
info_json_fn = os.path.splitext(tc_filename)[0] + '.info.json'
|
||||||
self.assertTrue(
|
self.assertTrue(
|
||||||
os.path.exists(info_json_fn),
|
os.path.exists(info_json_fn),
|
||||||
'Missing info file %s' % info_json_fn)
|
'Missing info file %s' % info_json_fn)
|
||||||
with io.open(info_json_fn, encoding='utf-8') as infof:
|
with io.open(info_json_fn, encoding='utf-8') as infof:
|
||||||
info_dict = json.load(infof)
|
info_dict = json.load(infof)
|
||||||
|
|
||||||
expect_info_dict(self, info_dict, tc.get('info_dict', {}))
|
expect_info_dict(self, info_dict, tc.get('info_dict', {}))
|
||||||
finally:
|
finally:
|
||||||
try_rm_tcs_files()
|
try_rm_tcs_files()
|
||||||
@@ -221,13 +248,15 @@ def generator(test_case):
|
|||||||
|
|
||||||
# And add them to TestDownload
|
# And add them to TestDownload
|
||||||
for n, test_case in enumerate(defs):
|
for n, test_case in enumerate(defs):
|
||||||
test_method = generator(test_case)
|
|
||||||
tname = 'test_' + str(test_case['name'])
|
tname = 'test_' + str(test_case['name'])
|
||||||
i = 1
|
i = 1
|
||||||
while hasattr(TestDownload, tname):
|
while hasattr(TestDownload, tname):
|
||||||
tname = 'test_%s_%d' % (test_case['name'], i)
|
tname = 'test_%s_%d' % (test_case['name'], i)
|
||||||
i += 1
|
i += 1
|
||||||
|
test_method = generator(test_case, tname)
|
||||||
test_method.__name__ = str(tname)
|
test_method.__name__ = str(tname)
|
||||||
|
ie_list = test_case.get('add_ie')
|
||||||
|
test_method.add_ie = ie_list and ','.join(ie_list)
|
||||||
setattr(TestDownload, test_method.__name__, test_method)
|
setattr(TestDownload, test_method.__name__, test_method)
|
||||||
del test_method
|
del test_method
|
||||||
|
|
||||||
|
|||||||
@@ -21,7 +21,7 @@ from youtube_dl.extractor import (
|
|||||||
NPOIE,
|
NPOIE,
|
||||||
ComedyCentralIE,
|
ComedyCentralIE,
|
||||||
NRKTVIE,
|
NRKTVIE,
|
||||||
RaiTVIE,
|
RaiPlayIE,
|
||||||
VikiIE,
|
VikiIE,
|
||||||
ThePlatformIE,
|
ThePlatformIE,
|
||||||
ThePlatformFeedIE,
|
ThePlatformFeedIE,
|
||||||
@@ -258,9 +258,9 @@ class TestNRKSubtitles(BaseTestSubtitles):
|
|||||||
self.assertEqual(md5(subtitles['no']), '544fa917d3197fcbee64634559221cc2')
|
self.assertEqual(md5(subtitles['no']), '544fa917d3197fcbee64634559221cc2')
|
||||||
|
|
||||||
|
|
||||||
class TestRaiSubtitles(BaseTestSubtitles):
|
class TestRaiPlaySubtitles(BaseTestSubtitles):
|
||||||
url = 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-cb27157f-9dd0-4aee-b788-b1f67643a391.html'
|
url = 'http://www.raiplay.it/video/2014/04/Report-del-07042014-cb27157f-9dd0-4aee-b788-b1f67643a391.html'
|
||||||
IE = RaiTVIE
|
IE = RaiPlayIE
|
||||||
|
|
||||||
def test_allsubtitles(self):
|
def test_allsubtitles(self):
|
||||||
self.DL.params['writesubtitles'] = True
|
self.DL.params['writesubtitles'] = True
|
||||||
|
|||||||
@@ -52,9 +52,11 @@ from youtube_dl.utils import (
|
|||||||
parse_filesize,
|
parse_filesize,
|
||||||
parse_count,
|
parse_count,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
|
pkcs1pad,
|
||||||
read_batch_urls,
|
read_batch_urls,
|
||||||
sanitize_filename,
|
sanitize_filename,
|
||||||
sanitize_path,
|
sanitize_path,
|
||||||
|
expand_path,
|
||||||
prepend_extension,
|
prepend_extension,
|
||||||
replace_extension,
|
replace_extension,
|
||||||
remove_start,
|
remove_start,
|
||||||
@@ -94,6 +96,8 @@ from youtube_dl.utils import (
|
|||||||
from youtube_dl.compat import (
|
from youtube_dl.compat import (
|
||||||
compat_chr,
|
compat_chr,
|
||||||
compat_etree_fromstring,
|
compat_etree_fromstring,
|
||||||
|
compat_getenv,
|
||||||
|
compat_setenv,
|
||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
compat_parse_qs,
|
compat_parse_qs,
|
||||||
)
|
)
|
||||||
@@ -213,6 +217,18 @@ class TestUtil(unittest.TestCase):
|
|||||||
self.assertEqual(sanitize_path('./abc'), 'abc')
|
self.assertEqual(sanitize_path('./abc'), 'abc')
|
||||||
self.assertEqual(sanitize_path('./../abc'), '..\\abc')
|
self.assertEqual(sanitize_path('./../abc'), '..\\abc')
|
||||||
|
|
||||||
|
def test_expand_path(self):
|
||||||
|
def env(var):
|
||||||
|
return '%{0}%'.format(var) if sys.platform == 'win32' else '${0}'.format(var)
|
||||||
|
|
||||||
|
compat_setenv('YOUTUBE_DL_EXPATH_PATH', 'expanded')
|
||||||
|
self.assertEqual(expand_path(env('YOUTUBE_DL_EXPATH_PATH')), 'expanded')
|
||||||
|
self.assertEqual(expand_path(env('HOME')), compat_getenv('HOME'))
|
||||||
|
self.assertEqual(expand_path('~'), compat_getenv('HOME'))
|
||||||
|
self.assertEqual(
|
||||||
|
expand_path('~/%s' % env('YOUTUBE_DL_EXPATH_PATH')),
|
||||||
|
'%s/expanded' % compat_getenv('HOME'))
|
||||||
|
|
||||||
def test_prepend_extension(self):
|
def test_prepend_extension(self):
|
||||||
self.assertEqual(prepend_extension('abc.ext', 'temp'), 'abc.temp.ext')
|
self.assertEqual(prepend_extension('abc.ext', 'temp'), 'abc.temp.ext')
|
||||||
self.assertEqual(prepend_extension('abc.ext', 'temp', 'ext'), 'abc.temp.ext')
|
self.assertEqual(prepend_extension('abc.ext', 'temp', 'ext'), 'abc.temp.ext')
|
||||||
@@ -454,6 +470,9 @@ class TestUtil(unittest.TestCase):
|
|||||||
|
|
||||||
def test_urljoin(self):
|
def test_urljoin(self):
|
||||||
self.assertEqual(urljoin('http://foo.de/', '/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
|
self.assertEqual(urljoin('http://foo.de/', '/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
|
||||||
|
self.assertEqual(urljoin(b'http://foo.de/', '/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
|
||||||
|
self.assertEqual(urljoin('http://foo.de/', b'/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
|
||||||
|
self.assertEqual(urljoin(b'http://foo.de/', b'/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
|
||||||
self.assertEqual(urljoin('//foo.de/', '/a/b/c.txt'), '//foo.de/a/b/c.txt')
|
self.assertEqual(urljoin('//foo.de/', '/a/b/c.txt'), '//foo.de/a/b/c.txt')
|
||||||
self.assertEqual(urljoin('http://foo.de/', 'a/b/c.txt'), 'http://foo.de/a/b/c.txt')
|
self.assertEqual(urljoin('http://foo.de/', 'a/b/c.txt'), 'http://foo.de/a/b/c.txt')
|
||||||
self.assertEqual(urljoin('http://foo.de', '/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
|
self.assertEqual(urljoin('http://foo.de', '/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
|
||||||
@@ -1104,6 +1123,14 @@ The first line
|
|||||||
ohdave_rsa_encrypt(b'aa111222', e, N),
|
ohdave_rsa_encrypt(b'aa111222', e, N),
|
||||||
'726664bd9a23fd0c70f9f1b84aab5e3905ce1e45a584e9cbcf9bcc7510338fc1986d6c599ff990d923aa43c51c0d9013cd572e13bc58f4ae48f2ed8c0b0ba881')
|
'726664bd9a23fd0c70f9f1b84aab5e3905ce1e45a584e9cbcf9bcc7510338fc1986d6c599ff990d923aa43c51c0d9013cd572e13bc58f4ae48f2ed8c0b0ba881')
|
||||||
|
|
||||||
|
def test_pkcs1pad(self):
|
||||||
|
data = [1, 2, 3]
|
||||||
|
padded_data = pkcs1pad(data, 32)
|
||||||
|
self.assertEqual(padded_data[:2], [0, 2])
|
||||||
|
self.assertEqual(padded_data[28:], [0, 1, 2, 3])
|
||||||
|
|
||||||
|
self.assertRaises(ValueError, pkcs1pad, data, 8)
|
||||||
|
|
||||||
def test_encode_base_n(self):
|
def test_encode_base_n(self):
|
||||||
self.assertEqual(encode_base_n(0, 30), '0')
|
self.assertEqual(encode_base_n(0, 30), '0')
|
||||||
self.assertEqual(encode_base_n(80, 30), '2k')
|
self.assertEqual(encode_base_n(80, 30), '2k')
|
||||||
|
|||||||
@@ -29,10 +29,10 @@ import random
|
|||||||
from .compat import (
|
from .compat import (
|
||||||
compat_basestring,
|
compat_basestring,
|
||||||
compat_cookiejar,
|
compat_cookiejar,
|
||||||
compat_expanduser,
|
|
||||||
compat_get_terminal_size,
|
compat_get_terminal_size,
|
||||||
compat_http_client,
|
compat_http_client,
|
||||||
compat_kwargs,
|
compat_kwargs,
|
||||||
|
compat_numeric_types,
|
||||||
compat_os_name,
|
compat_os_name,
|
||||||
compat_str,
|
compat_str,
|
||||||
compat_tokenize_tokenize,
|
compat_tokenize_tokenize,
|
||||||
@@ -53,9 +53,12 @@ from .utils import (
|
|||||||
encode_compat_str,
|
encode_compat_str,
|
||||||
encodeFilename,
|
encodeFilename,
|
||||||
error_to_compat_str,
|
error_to_compat_str,
|
||||||
|
expand_path,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
format_bytes,
|
format_bytes,
|
||||||
formatSeconds,
|
formatSeconds,
|
||||||
|
GeoRestrictedError,
|
||||||
|
ISO3166Utils,
|
||||||
locked_file,
|
locked_file,
|
||||||
make_HTTPS_handler,
|
make_HTTPS_handler,
|
||||||
MaxDownloadsReached,
|
MaxDownloadsReached,
|
||||||
@@ -272,6 +275,12 @@ class YoutubeDL(object):
|
|||||||
If it returns None, the video is downloaded.
|
If it returns None, the video is downloaded.
|
||||||
match_filter_func in utils.py is one example for this.
|
match_filter_func in utils.py is one example for this.
|
||||||
no_color: Do not emit color codes in output.
|
no_color: Do not emit color codes in output.
|
||||||
|
geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
|
||||||
|
HTTP header (experimental)
|
||||||
|
geo_bypass_country:
|
||||||
|
Two-letter ISO 3166-2 country code that will be used for
|
||||||
|
explicit geographic restriction bypassing via faking
|
||||||
|
X-Forwarded-For HTTP header (experimental)
|
||||||
|
|
||||||
The following options determine which downloader is picked:
|
The following options determine which downloader is picked:
|
||||||
external_downloader: Executable of the external downloader to call.
|
external_downloader: Executable of the external downloader to call.
|
||||||
@@ -319,11 +328,21 @@ class YoutubeDL(object):
|
|||||||
self.params.update(params)
|
self.params.update(params)
|
||||||
self.cache = Cache(self)
|
self.cache = Cache(self)
|
||||||
|
|
||||||
if self.params.get('cn_verification_proxy') is not None:
|
def check_deprecated(param, option, suggestion):
|
||||||
self.report_warning('--cn-verification-proxy is deprecated. Use --geo-verification-proxy instead.')
|
if self.params.get(param) is not None:
|
||||||
|
self.report_warning(
|
||||||
|
'%s is deprecated. Use %s instead.' % (option, suggestion))
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
|
||||||
if self.params.get('geo_verification_proxy') is None:
|
if self.params.get('geo_verification_proxy') is None:
|
||||||
self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
|
self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
|
||||||
|
|
||||||
|
check_deprecated('autonumber_size', '--autonumber-size', 'output template with %(autonumber)0Nd, where N in the number of digits')
|
||||||
|
check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
|
||||||
|
check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
|
||||||
|
|
||||||
if params.get('bidi_workaround', False):
|
if params.get('bidi_workaround', False):
|
||||||
try:
|
try:
|
||||||
import pty
|
import pty
|
||||||
@@ -585,10 +604,7 @@ class YoutubeDL(object):
|
|||||||
autonumber_size = self.params.get('autonumber_size')
|
autonumber_size = self.params.get('autonumber_size')
|
||||||
if autonumber_size is None:
|
if autonumber_size is None:
|
||||||
autonumber_size = 5
|
autonumber_size = 5
|
||||||
autonumber_templ = '%0' + str(autonumber_size) + 'd'
|
template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
|
||||||
template_dict['autonumber'] = autonumber_templ % (self.params.get('autonumber_start', 1) - 1 + self._num_downloads)
|
|
||||||
if template_dict.get('playlist_index') is not None:
|
|
||||||
template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
|
|
||||||
if template_dict.get('resolution') is None:
|
if template_dict.get('resolution') is None:
|
||||||
if template_dict.get('width') and template_dict.get('height'):
|
if template_dict.get('width') and template_dict.get('height'):
|
||||||
template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
|
template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
|
||||||
@@ -600,14 +616,63 @@ class YoutubeDL(object):
|
|||||||
sanitize = lambda k, v: sanitize_filename(
|
sanitize = lambda k, v: sanitize_filename(
|
||||||
compat_str(v),
|
compat_str(v),
|
||||||
restricted=self.params.get('restrictfilenames'),
|
restricted=self.params.get('restrictfilenames'),
|
||||||
is_id=(k == 'id'))
|
is_id=(k == 'id' or k.endswith('_id')))
|
||||||
template_dict = dict((k, sanitize(k, v))
|
template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
|
||||||
for k, v in template_dict.items()
|
for k, v in template_dict.items()
|
||||||
if v is not None and not isinstance(v, (list, tuple, dict)))
|
if v is not None and not isinstance(v, (list, tuple, dict)))
|
||||||
template_dict = collections.defaultdict(lambda: 'NA', template_dict)
|
template_dict = collections.defaultdict(lambda: 'NA', template_dict)
|
||||||
|
|
||||||
outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
|
outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
|
||||||
tmpl = compat_expanduser(outtmpl)
|
|
||||||
|
# For fields playlist_index and autonumber convert all occurrences
|
||||||
|
# of %(field)s to %(field)0Nd for backward compatibility
|
||||||
|
field_size_compat_map = {
|
||||||
|
'playlist_index': len(str(template_dict['n_entries'])),
|
||||||
|
'autonumber': autonumber_size,
|
||||||
|
}
|
||||||
|
FIELD_SIZE_COMPAT_RE = r'(?<!%)%\((?P<field>autonumber|playlist_index)\)s'
|
||||||
|
mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl)
|
||||||
|
if mobj:
|
||||||
|
outtmpl = re.sub(
|
||||||
|
FIELD_SIZE_COMPAT_RE,
|
||||||
|
r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
|
||||||
|
outtmpl)
|
||||||
|
|
||||||
|
NUMERIC_FIELDS = set((
|
||||||
|
'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
|
||||||
|
'upload_year', 'upload_month', 'upload_day',
|
||||||
|
'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
|
||||||
|
'average_rating', 'comment_count', 'age_limit',
|
||||||
|
'start_time', 'end_time',
|
||||||
|
'chapter_number', 'season_number', 'episode_number',
|
||||||
|
'track_number', 'disc_number', 'release_year',
|
||||||
|
'playlist_index',
|
||||||
|
))
|
||||||
|
|
||||||
|
# Missing numeric fields used together with integer presentation types
|
||||||
|
# in format specification will break the argument substitution since
|
||||||
|
# string 'NA' is returned for missing fields. We will patch output
|
||||||
|
# template for missing fields to meet string presentation type.
|
||||||
|
for numeric_field in NUMERIC_FIELDS:
|
||||||
|
if numeric_field not in template_dict:
|
||||||
|
# As of [1] format syntax is:
|
||||||
|
# %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
|
||||||
|
# 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
|
||||||
|
FORMAT_RE = r'''(?x)
|
||||||
|
(?<!%)
|
||||||
|
%
|
||||||
|
\({0}\) # mapping key
|
||||||
|
(?:[#0\-+ ]+)? # conversion flags (optional)
|
||||||
|
(?:\d+)? # minimum field width (optional)
|
||||||
|
(?:\.\d+)? # precision (optional)
|
||||||
|
[hlL]? # length modifier (optional)
|
||||||
|
[diouxXeEfFgGcrs%] # conversion type
|
||||||
|
'''
|
||||||
|
outtmpl = re.sub(
|
||||||
|
FORMAT_RE.format(numeric_field),
|
||||||
|
r'%({0})s'.format(numeric_field), outtmpl)
|
||||||
|
|
||||||
|
tmpl = expand_path(outtmpl)
|
||||||
filename = tmpl % template_dict
|
filename = tmpl % template_dict
|
||||||
# Temporary fix for #4787
|
# Temporary fix for #4787
|
||||||
# 'Treat' all problem characters by passing filename through preferredencoding
|
# 'Treat' all problem characters by passing filename through preferredencoding
|
||||||
@@ -707,6 +772,14 @@ class YoutubeDL(object):
|
|||||||
return self.process_ie_result(ie_result, download, extra_info)
|
return self.process_ie_result(ie_result, download, extra_info)
|
||||||
else:
|
else:
|
||||||
return ie_result
|
return ie_result
|
||||||
|
except GeoRestrictedError as e:
|
||||||
|
msg = e.msg
|
||||||
|
if e.countries:
|
||||||
|
msg += '\nThis video is available in %s.' % ', '.join(
|
||||||
|
map(ISO3166Utils.short2full, e.countries))
|
||||||
|
msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
|
||||||
|
self.report_error(msg)
|
||||||
|
break
|
||||||
except ExtractorError as e: # An error we somewhat expected
|
except ExtractorError as e: # An error we somewhat expected
|
||||||
self.report_error(compat_str(e), e.format_traceback())
|
self.report_error(compat_str(e), e.format_traceback())
|
||||||
break
|
break
|
||||||
@@ -764,6 +837,12 @@ class YoutubeDL(object):
|
|||||||
ie_result['url'], ie_key=ie_result.get('ie_key'),
|
ie_result['url'], ie_key=ie_result.get('ie_key'),
|
||||||
extra_info=extra_info, download=False, process=False)
|
extra_info=extra_info, download=False, process=False)
|
||||||
|
|
||||||
|
# extract_info may return None when ignoreerrors is enabled and
|
||||||
|
# extraction failed with an error, don't crash and return early
|
||||||
|
# in this case
|
||||||
|
if not info:
|
||||||
|
return info
|
||||||
|
|
||||||
force_properties = dict(
|
force_properties = dict(
|
||||||
(k, v) for k, v in ie_result.items() if v is not None)
|
(k, v) for k, v in ie_result.items() if v is not None)
|
||||||
for f in ('_type', 'url', 'ie_key'):
|
for f in ('_type', 'url', 'ie_key'):
|
||||||
@@ -776,7 +855,7 @@ class YoutubeDL(object):
|
|||||||
|
|
||||||
return self.process_ie_result(
|
return self.process_ie_result(
|
||||||
new_result, download=download, extra_info=extra_info)
|
new_result, download=download, extra_info=extra_info)
|
||||||
elif result_type == 'playlist' or result_type == 'multi_video':
|
elif result_type in ('playlist', 'multi_video'):
|
||||||
# We process each entry in the playlist
|
# We process each entry in the playlist
|
||||||
playlist = ie_result.get('title') or ie_result.get('id')
|
playlist = ie_result.get('title') or ie_result.get('id')
|
||||||
self.to_screen('[download] Downloading playlist: %s' % playlist)
|
self.to_screen('[download] Downloading playlist: %s' % playlist)
|
||||||
@@ -847,8 +926,14 @@ class YoutubeDL(object):
|
|||||||
if self.params.get('playlistrandom', False):
|
if self.params.get('playlistrandom', False):
|
||||||
random.shuffle(entries)
|
random.shuffle(entries)
|
||||||
|
|
||||||
|
x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
|
||||||
|
|
||||||
for i, entry in enumerate(entries, 1):
|
for i, entry in enumerate(entries, 1):
|
||||||
self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
|
self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
|
||||||
|
# This __x_forwarded_for_ip thing is a bit ugly but requires
|
||||||
|
# minimal changes
|
||||||
|
if x_forwarded_for:
|
||||||
|
entry['__x_forwarded_for_ip'] = x_forwarded_for
|
||||||
extra = {
|
extra = {
|
||||||
'n_entries': n_entries,
|
'n_entries': n_entries,
|
||||||
'playlist': playlist,
|
'playlist': playlist,
|
||||||
@@ -1233,6 +1318,11 @@ class YoutubeDL(object):
|
|||||||
if cookies:
|
if cookies:
|
||||||
res['Cookie'] = cookies
|
res['Cookie'] = cookies
|
||||||
|
|
||||||
|
if 'X-Forwarded-For' not in res:
|
||||||
|
x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
|
||||||
|
if x_forwarded_for_ip:
|
||||||
|
res['X-Forwarded-For'] = x_forwarded_for_ip
|
||||||
|
|
||||||
return res
|
return res
|
||||||
|
|
||||||
def _calc_cookies(self, info_dict):
|
def _calc_cookies(self, info_dict):
|
||||||
@@ -1375,6 +1465,9 @@ class YoutubeDL(object):
|
|||||||
full_format_info = info_dict.copy()
|
full_format_info = info_dict.copy()
|
||||||
full_format_info.update(format)
|
full_format_info.update(format)
|
||||||
format['http_headers'] = self._calc_headers(full_format_info)
|
format['http_headers'] = self._calc_headers(full_format_info)
|
||||||
|
# Remove private housekeeping stuff
|
||||||
|
if '__x_forwarded_for_ip' in info_dict:
|
||||||
|
del info_dict['__x_forwarded_for_ip']
|
||||||
|
|
||||||
# TODO Central sorting goes here
|
# TODO Central sorting goes here
|
||||||
|
|
||||||
@@ -1785,6 +1878,7 @@ class YoutubeDL(object):
|
|||||||
"""Download a given list of URLs."""
|
"""Download a given list of URLs."""
|
||||||
outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
|
outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
|
||||||
if (len(url_list) > 1 and
|
if (len(url_list) > 1 and
|
||||||
|
outtmpl != '-' and
|
||||||
'%' not in outtmpl and
|
'%' not in outtmpl and
|
||||||
self.params.get('max_downloads') != 1):
|
self.params.get('max_downloads') != 1):
|
||||||
raise SameFileError(outtmpl)
|
raise SameFileError(outtmpl)
|
||||||
@@ -2082,7 +2176,7 @@ class YoutubeDL(object):
|
|||||||
if opts_cookiefile is None:
|
if opts_cookiefile is None:
|
||||||
self.cookiejar = compat_cookiejar.CookieJar()
|
self.cookiejar = compat_cookiejar.CookieJar()
|
||||||
else:
|
else:
|
||||||
opts_cookiefile = compat_expanduser(opts_cookiefile)
|
opts_cookiefile = expand_path(opts_cookiefile)
|
||||||
self.cookiejar = compat_cookiejar.MozillaCookieJar(
|
self.cookiejar = compat_cookiejar.MozillaCookieJar(
|
||||||
opts_cookiefile)
|
opts_cookiefile)
|
||||||
if os.access(opts_cookiefile, os.R_OK):
|
if os.access(opts_cookiefile, os.R_OK):
|
||||||
|
|||||||
@@ -16,7 +16,6 @@ from .options import (
|
|||||||
parseOpts,
|
parseOpts,
|
||||||
)
|
)
|
||||||
from .compat import (
|
from .compat import (
|
||||||
compat_expanduser,
|
|
||||||
compat_getpass,
|
compat_getpass,
|
||||||
compat_shlex_split,
|
compat_shlex_split,
|
||||||
workaround_optparse_bug9161,
|
workaround_optparse_bug9161,
|
||||||
@@ -26,6 +25,7 @@ from .utils import (
|
|||||||
decodeOption,
|
decodeOption,
|
||||||
DEFAULT_OUTTMPL,
|
DEFAULT_OUTTMPL,
|
||||||
DownloadError,
|
DownloadError,
|
||||||
|
expand_path,
|
||||||
match_filter_func,
|
match_filter_func,
|
||||||
MaxDownloadsReached,
|
MaxDownloadsReached,
|
||||||
preferredencoding,
|
preferredencoding,
|
||||||
@@ -88,7 +88,7 @@ def _real_main(argv=None):
|
|||||||
batchfd = sys.stdin
|
batchfd = sys.stdin
|
||||||
else:
|
else:
|
||||||
batchfd = io.open(
|
batchfd = io.open(
|
||||||
compat_expanduser(opts.batchfile),
|
expand_path(opts.batchfile),
|
||||||
'r', encoding='utf-8', errors='ignore')
|
'r', encoding='utf-8', errors='ignore')
|
||||||
batch_urls = read_batch_urls(batchfd)
|
batch_urls = read_batch_urls(batchfd)
|
||||||
if opts.verbose:
|
if opts.verbose:
|
||||||
@@ -196,7 +196,7 @@ def _real_main(argv=None):
|
|||||||
if opts.playlistend not in (-1, None) and opts.playlistend < opts.playliststart:
|
if opts.playlistend not in (-1, None) and opts.playlistend < opts.playliststart:
|
||||||
raise ValueError('Playlist end must be greater than playlist start')
|
raise ValueError('Playlist end must be greater than playlist start')
|
||||||
if opts.extractaudio:
|
if opts.extractaudio:
|
||||||
if opts.audioformat not in ['best', 'aac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav']:
|
if opts.audioformat not in ['best', 'aac', 'flac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav']:
|
||||||
parser.error('invalid audio format specified')
|
parser.error('invalid audio format specified')
|
||||||
if opts.audioquality:
|
if opts.audioquality:
|
||||||
opts.audioquality = opts.audioquality.strip('k').strip('K')
|
opts.audioquality = opts.audioquality.strip('k').strip('K')
|
||||||
@@ -238,18 +238,15 @@ def _real_main(argv=None):
|
|||||||
|
|
||||||
any_getting = opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.getduration or opts.dumpjson or opts.dump_single_json
|
any_getting = opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.getduration or opts.dumpjson or opts.dump_single_json
|
||||||
any_printing = opts.print_json
|
any_printing = opts.print_json
|
||||||
download_archive_fn = compat_expanduser(opts.download_archive) if opts.download_archive is not None else opts.download_archive
|
download_archive_fn = expand_path(opts.download_archive) if opts.download_archive is not None else opts.download_archive
|
||||||
|
|
||||||
# PostProcessors
|
# PostProcessors
|
||||||
postprocessors = []
|
postprocessors = []
|
||||||
# Add the metadata pp first, the other pps will copy it
|
|
||||||
if opts.metafromtitle:
|
if opts.metafromtitle:
|
||||||
postprocessors.append({
|
postprocessors.append({
|
||||||
'key': 'MetadataFromTitle',
|
'key': 'MetadataFromTitle',
|
||||||
'titleformat': opts.metafromtitle
|
'titleformat': opts.metafromtitle
|
||||||
})
|
})
|
||||||
if opts.addmetadata:
|
|
||||||
postprocessors.append({'key': 'FFmpegMetadata'})
|
|
||||||
if opts.extractaudio:
|
if opts.extractaudio:
|
||||||
postprocessors.append({
|
postprocessors.append({
|
||||||
'key': 'FFmpegExtractAudio',
|
'key': 'FFmpegExtractAudio',
|
||||||
@@ -262,6 +259,16 @@ def _real_main(argv=None):
|
|||||||
'key': 'FFmpegVideoConvertor',
|
'key': 'FFmpegVideoConvertor',
|
||||||
'preferedformat': opts.recodevideo,
|
'preferedformat': opts.recodevideo,
|
||||||
})
|
})
|
||||||
|
# FFmpegMetadataPP should be run after FFmpegVideoConvertorPP and
|
||||||
|
# FFmpegExtractAudioPP as containers before conversion may not support
|
||||||
|
# metadata (3gp, webm, etc.)
|
||||||
|
# And this post-processor should be placed before other metadata
|
||||||
|
# manipulating post-processors (FFmpegEmbedSubtitle) to prevent loss of
|
||||||
|
# extra metadata. By default ffmpeg preserves metadata applicable for both
|
||||||
|
# source and target containers. From this point the container won't change,
|
||||||
|
# so metadata can be added here.
|
||||||
|
if opts.addmetadata:
|
||||||
|
postprocessors.append({'key': 'FFmpegMetadata'})
|
||||||
if opts.convertsubtitles:
|
if opts.convertsubtitles:
|
||||||
postprocessors.append({
|
postprocessors.append({
|
||||||
'key': 'FFmpegSubtitlesConvertor',
|
'key': 'FFmpegSubtitlesConvertor',
|
||||||
@@ -414,6 +421,11 @@ def _real_main(argv=None):
|
|||||||
'cn_verification_proxy': opts.cn_verification_proxy,
|
'cn_verification_proxy': opts.cn_verification_proxy,
|
||||||
'geo_verification_proxy': opts.geo_verification_proxy,
|
'geo_verification_proxy': opts.geo_verification_proxy,
|
||||||
'config_location': opts.config_location,
|
'config_location': opts.config_location,
|
||||||
|
'geo_bypass': opts.geo_bypass,
|
||||||
|
'geo_bypass_country': opts.geo_bypass_country,
|
||||||
|
# just for deprecation check
|
||||||
|
'autonumber': opts.autonumber if opts.autonumber is True else None,
|
||||||
|
'usetitle': opts.usetitle if opts.usetitle is True else None,
|
||||||
}
|
}
|
||||||
|
|
||||||
with YoutubeDL(ydl_opts) as ydl:
|
with YoutubeDL(ydl_opts) as ydl:
|
||||||
@@ -437,7 +449,7 @@ def _real_main(argv=None):
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
if opts.load_info_filename is not None:
|
if opts.load_info_filename is not None:
|
||||||
retcode = ydl.download_with_info_file(compat_expanduser(opts.load_info_filename))
|
retcode = ydl.download_with_info_file(expand_path(opts.load_info_filename))
|
||||||
else:
|
else:
|
||||||
retcode = ydl.download(all_urls)
|
retcode = ydl.download(all_urls)
|
||||||
except MaxDownloadsReached:
|
except MaxDownloadsReached:
|
||||||
|
|||||||
@@ -60,6 +60,34 @@ def aes_cbc_decrypt(data, key, iv):
|
|||||||
return decrypted_data
|
return decrypted_data
|
||||||
|
|
||||||
|
|
||||||
|
def aes_cbc_encrypt(data, key, iv):
|
||||||
|
"""
|
||||||
|
Encrypt with aes in CBC mode. Using PKCS#7 padding
|
||||||
|
|
||||||
|
@param {int[]} data cleartext
|
||||||
|
@param {int[]} key 16/24/32-Byte cipher key
|
||||||
|
@param {int[]} iv 16-Byte IV
|
||||||
|
@returns {int[]} encrypted data
|
||||||
|
"""
|
||||||
|
expanded_key = key_expansion(key)
|
||||||
|
block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES))
|
||||||
|
|
||||||
|
encrypted_data = []
|
||||||
|
previous_cipher_block = iv
|
||||||
|
for i in range(block_count):
|
||||||
|
block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]
|
||||||
|
remaining_length = BLOCK_SIZE_BYTES - len(block)
|
||||||
|
block += [remaining_length] * remaining_length
|
||||||
|
mixed_block = xor(block, previous_cipher_block)
|
||||||
|
|
||||||
|
encrypted_block = aes_encrypt(mixed_block, expanded_key)
|
||||||
|
encrypted_data += encrypted_block
|
||||||
|
|
||||||
|
previous_cipher_block = encrypted_block
|
||||||
|
|
||||||
|
return encrypted_data
|
||||||
|
|
||||||
|
|
||||||
def key_expansion(data):
|
def key_expansion(data):
|
||||||
"""
|
"""
|
||||||
Generate key schedule
|
Generate key schedule
|
||||||
|
|||||||
@@ -8,8 +8,11 @@ import re
|
|||||||
import shutil
|
import shutil
|
||||||
import traceback
|
import traceback
|
||||||
|
|
||||||
from .compat import compat_expanduser, compat_getenv
|
from .compat import compat_getenv
|
||||||
from .utils import write_json_file
|
from .utils import (
|
||||||
|
expand_path,
|
||||||
|
write_json_file,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class Cache(object):
|
class Cache(object):
|
||||||
@@ -21,7 +24,7 @@ class Cache(object):
|
|||||||
if res is None:
|
if res is None:
|
||||||
cache_root = compat_getenv('XDG_CACHE_HOME', '~/.cache')
|
cache_root = compat_getenv('XDG_CACHE_HOME', '~/.cache')
|
||||||
res = os.path.join(cache_root, 'youtube-dl')
|
res = os.path.join(cache_root, 'youtube-dl')
|
||||||
return compat_expanduser(res)
|
return expand_path(res)
|
||||||
|
|
||||||
def _get_cache_fn(self, section, key, dtype):
|
def _get_cache_fn(self, section, key, dtype):
|
||||||
assert re.match(r'^[a-zA-Z0-9_.-]+$', section), \
|
assert re.match(r'^[a-zA-Z0-9_.-]+$', section), \
|
||||||
|
|||||||
@@ -2692,7 +2692,7 @@ else:
|
|||||||
userhome = pwent.pw_dir
|
userhome = pwent.pw_dir
|
||||||
userhome = userhome.rstrip('/')
|
userhome = userhome.rstrip('/')
|
||||||
return (userhome + path[i:]) or '/'
|
return (userhome + path[i:]) or '/'
|
||||||
elif compat_os_name == 'nt' or compat_os_name == 'ce':
|
elif compat_os_name in ('nt', 'ce'):
|
||||||
def compat_expanduser(path):
|
def compat_expanduser(path):
|
||||||
"""Expand ~ and ~user constructs.
|
"""Expand ~ and ~user constructs.
|
||||||
|
|
||||||
@@ -2760,6 +2760,12 @@ else:
|
|||||||
compat_kwargs = lambda kwargs: kwargs
|
compat_kwargs = lambda kwargs: kwargs
|
||||||
|
|
||||||
|
|
||||||
|
try:
|
||||||
|
compat_numeric_types = (int, float, long, complex)
|
||||||
|
except NameError: # Python 3
|
||||||
|
compat_numeric_types = (int, float, complex)
|
||||||
|
|
||||||
|
|
||||||
if sys.version_info < (2, 7):
|
if sys.version_info < (2, 7):
|
||||||
def compat_socket_create_connection(address, timeout, source_address=None):
|
def compat_socket_create_connection(address, timeout, source_address=None):
|
||||||
host, port = address
|
host, port = address
|
||||||
@@ -2895,6 +2901,7 @@ __all__ = [
|
|||||||
'compat_input',
|
'compat_input',
|
||||||
'compat_itertools_count',
|
'compat_itertools_count',
|
||||||
'compat_kwargs',
|
'compat_kwargs',
|
||||||
|
'compat_numeric_types',
|
||||||
'compat_ord',
|
'compat_ord',
|
||||||
'compat_os_name',
|
'compat_os_name',
|
||||||
'compat_parse_qs',
|
'compat_parse_qs',
|
||||||
|
|||||||
@@ -43,6 +43,9 @@ def get_suitable_downloader(info_dict, params={}):
|
|||||||
if ed.can_download(info_dict):
|
if ed.can_download(info_dict):
|
||||||
return ed
|
return ed
|
||||||
|
|
||||||
|
if protocol.startswith('m3u8') and info_dict.get('is_live'):
|
||||||
|
return FFmpegFD
|
||||||
|
|
||||||
if protocol == 'm3u8' and params.get('hls_prefer_native') is True:
|
if protocol == 'm3u8' and params.get('hls_prefer_native') is True:
|
||||||
return HlsFD
|
return HlsFD
|
||||||
|
|
||||||
|
|||||||
@@ -347,7 +347,10 @@ class FileDownloader(object):
|
|||||||
if min_sleep_interval:
|
if min_sleep_interval:
|
||||||
max_sleep_interval = self.params.get('max_sleep_interval', min_sleep_interval)
|
max_sleep_interval = self.params.get('max_sleep_interval', min_sleep_interval)
|
||||||
sleep_interval = random.uniform(min_sleep_interval, max_sleep_interval)
|
sleep_interval = random.uniform(min_sleep_interval, max_sleep_interval)
|
||||||
self.to_screen('[download] Sleeping %s seconds...' % sleep_interval)
|
self.to_screen(
|
||||||
|
'[download] Sleeping %s seconds...' % (
|
||||||
|
int(sleep_interval) if sleep_interval.is_integer()
|
||||||
|
else '%.2f' % sleep_interval))
|
||||||
time.sleep(sleep_interval)
|
time.sleep(sleep_interval)
|
||||||
|
|
||||||
return self.real_download(filename, info_dict)
|
return self.real_download(filename, info_dict)
|
||||||
|
|||||||
@@ -43,7 +43,10 @@ class DashSegmentsFD(FragmentFD):
|
|||||||
count = 0
|
count = 0
|
||||||
while count <= fragment_retries:
|
while count <= fragment_retries:
|
||||||
try:
|
try:
|
||||||
success = ctx['dl'].download(target_filename, {'url': segment_url})
|
success = ctx['dl'].download(target_filename, {
|
||||||
|
'url': segment_url,
|
||||||
|
'http_headers': info_dict.get('http_headers'),
|
||||||
|
})
|
||||||
if not success:
|
if not success:
|
||||||
return False
|
return False
|
||||||
down, target_sanitized = sanitize_open(target_filename, 'rb')
|
down, target_sanitized = sanitize_open(target_filename, 'rb')
|
||||||
|
|||||||
@@ -6,7 +6,10 @@ import sys
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import FileDownloader
|
from .common import FileDownloader
|
||||||
from ..compat import compat_setenv
|
from ..compat import (
|
||||||
|
compat_setenv,
|
||||||
|
compat_str,
|
||||||
|
)
|
||||||
from ..postprocessor.ffmpeg import FFmpegPostProcessor, EXT_TO_OUT_FORMATS
|
from ..postprocessor.ffmpeg import FFmpegPostProcessor, EXT_TO_OUT_FORMATS
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
cli_option,
|
cli_option,
|
||||||
@@ -270,6 +273,10 @@ class FFmpegFD(ExternalFD):
|
|||||||
args += ['-rtmp_live', 'live']
|
args += ['-rtmp_live', 'live']
|
||||||
|
|
||||||
args += ['-i', url, '-c', 'copy']
|
args += ['-i', url, '-c', 'copy']
|
||||||
|
|
||||||
|
if self.params.get('test', False):
|
||||||
|
args += ['-fs', compat_str(self._TEST_FILE_SIZE)]
|
||||||
|
|
||||||
if protocol in ('m3u8', 'm3u8_native'):
|
if protocol in ('m3u8', 'm3u8_native'):
|
||||||
if self.params.get('hls_use_mpegts', False) or tmpfilename == '-':
|
if self.params.get('hls_use_mpegts', False) or tmpfilename == '-':
|
||||||
args += ['-f', 'mpegts']
|
args += ['-f', 'mpegts']
|
||||||
|
|||||||
@@ -34,7 +34,7 @@ class HlsFD(FragmentFD):
|
|||||||
def can_download(manifest, info_dict):
|
def can_download(manifest, info_dict):
|
||||||
UNSUPPORTED_FEATURES = (
|
UNSUPPORTED_FEATURES = (
|
||||||
r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)', # encrypted streams [1]
|
r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)', # encrypted streams [1]
|
||||||
r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2]
|
# r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2]
|
||||||
|
|
||||||
# Live streams heuristic does not always work (e.g. geo restricted to Germany
|
# Live streams heuristic does not always work (e.g. geo restricted to Germany
|
||||||
# http://hls-geo.daserste.de/i/videoportal/Film/c_620000/622873/format,716451,716457,716450,716458,716459,.mp4.csmil/index_4_av.m3u8?null=0)
|
# http://hls-geo.daserste.de/i/videoportal/Film/c_620000/622873/format,716451,716457,716450,716458,716459,.mp4.csmil/index_4_av.m3u8?null=0)
|
||||||
@@ -52,7 +52,9 @@ class HlsFD(FragmentFD):
|
|||||||
# 4. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.5
|
# 4. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.5
|
||||||
)
|
)
|
||||||
check_results = [not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES]
|
check_results = [not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES]
|
||||||
check_results.append(can_decrypt_frag or '#EXT-X-KEY:METHOD=AES-128' not in manifest)
|
is_aes128_enc = '#EXT-X-KEY:METHOD=AES-128' in manifest
|
||||||
|
check_results.append(can_decrypt_frag or not is_aes128_enc)
|
||||||
|
check_results.append(not (is_aes128_enc and r'#EXT-X-BYTERANGE' in manifest))
|
||||||
check_results.append(not info_dict.get('is_live'))
|
check_results.append(not info_dict.get('is_live'))
|
||||||
return all(check_results)
|
return all(check_results)
|
||||||
|
|
||||||
@@ -100,6 +102,7 @@ class HlsFD(FragmentFD):
|
|||||||
i = 0
|
i = 0
|
||||||
media_sequence = 0
|
media_sequence = 0
|
||||||
decrypt_info = {'METHOD': 'NONE'}
|
decrypt_info = {'METHOD': 'NONE'}
|
||||||
|
byte_range = {}
|
||||||
frags_filenames = []
|
frags_filenames = []
|
||||||
for line in s.splitlines():
|
for line in s.splitlines():
|
||||||
line = line.strip()
|
line = line.strip()
|
||||||
@@ -114,11 +117,14 @@ class HlsFD(FragmentFD):
|
|||||||
if extra_query:
|
if extra_query:
|
||||||
frag_url = update_url_query(frag_url, extra_query)
|
frag_url = update_url_query(frag_url, extra_query)
|
||||||
count = 0
|
count = 0
|
||||||
|
headers = info_dict.get('http_headers', {})
|
||||||
|
if byte_range:
|
||||||
|
headers['Range'] = 'bytes=%d-%d' % (byte_range['start'], byte_range['end'])
|
||||||
while count <= fragment_retries:
|
while count <= fragment_retries:
|
||||||
try:
|
try:
|
||||||
success = ctx['dl'].download(frag_filename, {
|
success = ctx['dl'].download(frag_filename, {
|
||||||
'url': frag_url,
|
'url': frag_url,
|
||||||
'http_headers': info_dict.get('http_headers'),
|
'http_headers': headers,
|
||||||
})
|
})
|
||||||
if not success:
|
if not success:
|
||||||
return False
|
return False
|
||||||
@@ -167,6 +173,13 @@ class HlsFD(FragmentFD):
|
|||||||
decrypt_info['KEY'] = self.ydl.urlopen(decrypt_info['URI']).read()
|
decrypt_info['KEY'] = self.ydl.urlopen(decrypt_info['URI']).read()
|
||||||
elif line.startswith('#EXT-X-MEDIA-SEQUENCE'):
|
elif line.startswith('#EXT-X-MEDIA-SEQUENCE'):
|
||||||
media_sequence = int(line[22:])
|
media_sequence = int(line[22:])
|
||||||
|
elif line.startswith('#EXT-X-BYTERANGE'):
|
||||||
|
splitted_byte_range = line[17:].split('@')
|
||||||
|
sub_range_start = int(splitted_byte_range[1]) if len(splitted_byte_range) == 2 else byte_range['end']
|
||||||
|
byte_range = {
|
||||||
|
'start': sub_range_start,
|
||||||
|
'end': sub_range_start + int(splitted_byte_range[0]),
|
||||||
|
}
|
||||||
|
|
||||||
self._finish_frag_download(ctx)
|
self._finish_frag_download(ctx)
|
||||||
|
|
||||||
|
|||||||
@@ -238,7 +238,10 @@ class IsmFD(FragmentFD):
|
|||||||
count = 0
|
count = 0
|
||||||
while count <= fragment_retries:
|
while count <= fragment_retries:
|
||||||
try:
|
try:
|
||||||
success = ctx['dl'].download(target_filename, {'url': segment_url})
|
success = ctx['dl'].download(target_filename, {
|
||||||
|
'url': segment_url,
|
||||||
|
'http_headers': info_dict.get('http_headers'),
|
||||||
|
})
|
||||||
if not success:
|
if not success:
|
||||||
return False
|
return False
|
||||||
down, target_sanitized = sanitize_open(target_filename, 'rb')
|
down, target_sanitized = sanitize_open(target_filename, 'rb')
|
||||||
|
|||||||
@@ -169,7 +169,7 @@ class RtmpFD(FileDownloader):
|
|||||||
self.report_error('[rtmpdump] Could not connect to RTMP server.')
|
self.report_error('[rtmpdump] Could not connect to RTMP server.')
|
||||||
return False
|
return False
|
||||||
|
|
||||||
while (retval == RD_INCOMPLETE or retval == RD_FAILED) and not test and not live:
|
while retval in (RD_INCOMPLETE, RD_FAILED) and not test and not live:
|
||||||
prevsize = os.path.getsize(encodeFilename(tmpfilename))
|
prevsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||||
self.to_screen('[rtmpdump] %s bytes' % prevsize)
|
self.to_screen('[rtmpdump] %s bytes' % prevsize)
|
||||||
time.sleep(5.0) # This seems to be needed
|
time.sleep(5.0) # This seems to be needed
|
||||||
|
|||||||
@@ -25,7 +25,8 @@ class AddAnimeIE(InfoExtractor):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'description': 'One Piece 606',
|
'description': 'One Piece 606',
|
||||||
'title': 'One Piece 606',
|
'title': 'One Piece 606',
|
||||||
}
|
},
|
||||||
|
'skip': 'Video is gone',
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://add-anime.net/video/MDUGWYKNGBD8/One-Piece-687',
|
'url': 'http://add-anime.net/video/MDUGWYKNGBD8/One-Piece-687',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
|||||||
136
youtube_dl/extractor/adn.py
Normal file
136
youtube_dl/extractor/adn.py
Normal file
@@ -0,0 +1,136 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import base64
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..aes import aes_cbc_decrypt
|
||||||
|
from ..compat import compat_ord
|
||||||
|
from ..utils import (
|
||||||
|
bytes_to_intlist,
|
||||||
|
ExtractorError,
|
||||||
|
float_or_none,
|
||||||
|
intlist_to_bytes,
|
||||||
|
srt_subtitles_timecode,
|
||||||
|
strip_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ADNIE(InfoExtractor):
|
||||||
|
IE_DESC = 'Anime Digital Network'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?animedigitalnetwork\.fr/video/[^/]+/(?P<id>\d+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://animedigitalnetwork.fr/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites',
|
||||||
|
'md5': 'e497370d847fd79d9d4c74be55575c7a',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '7778',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Blue Exorcist - Kyôto Saga - Épisode 1',
|
||||||
|
'description': 'md5:2f7b5aa76edbc1a7a92cedcda8a528d5',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _get_subtitles(self, sub_path, video_id):
|
||||||
|
if not sub_path:
|
||||||
|
return None
|
||||||
|
|
||||||
|
enc_subtitles = self._download_webpage(
|
||||||
|
'http://animedigitalnetwork.fr/' + sub_path,
|
||||||
|
video_id, fatal=False)
|
||||||
|
if not enc_subtitles:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
|
||||||
|
dec_subtitles = intlist_to_bytes(aes_cbc_decrypt(
|
||||||
|
bytes_to_intlist(base64.b64decode(enc_subtitles[24:])),
|
||||||
|
bytes_to_intlist(b'\nd\xaf\xd2J\xd0\xfc\xe1\xfc\xdf\xb61\xe8\xe1\xf0\xcc'),
|
||||||
|
bytes_to_intlist(base64.b64decode(enc_subtitles[:24]))
|
||||||
|
))
|
||||||
|
subtitles_json = self._parse_json(
|
||||||
|
dec_subtitles[:-compat_ord(dec_subtitles[-1])],
|
||||||
|
None, fatal=False)
|
||||||
|
if not subtitles_json:
|
||||||
|
return None
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
for sub_lang, sub in subtitles_json.items():
|
||||||
|
srt = ''
|
||||||
|
for num, current in enumerate(sub):
|
||||||
|
start, end, text = (
|
||||||
|
float_or_none(current.get('startTime')),
|
||||||
|
float_or_none(current.get('endTime')),
|
||||||
|
current.get('text'))
|
||||||
|
if start is None or end is None or text is None:
|
||||||
|
continue
|
||||||
|
srt += os.linesep.join(
|
||||||
|
(
|
||||||
|
'%d' % num,
|
||||||
|
'%s --> %s' % (
|
||||||
|
srt_subtitles_timecode(start),
|
||||||
|
srt_subtitles_timecode(end)),
|
||||||
|
text,
|
||||||
|
os.linesep,
|
||||||
|
))
|
||||||
|
|
||||||
|
if sub_lang == 'vostf':
|
||||||
|
sub_lang = 'fr'
|
||||||
|
subtitles.setdefault(sub_lang, []).extend([{
|
||||||
|
'ext': 'json',
|
||||||
|
'data': json.dumps(sub),
|
||||||
|
}, {
|
||||||
|
'ext': 'srt',
|
||||||
|
'data': srt,
|
||||||
|
}])
|
||||||
|
return subtitles
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
player_config = self._parse_json(self._search_regex(
|
||||||
|
r'playerConfig\s*=\s*({.+});', webpage, 'player config'), video_id)
|
||||||
|
|
||||||
|
video_info = {}
|
||||||
|
video_info_str = self._search_regex(
|
||||||
|
r'videoInfo\s*=\s*({.+});', webpage,
|
||||||
|
'video info', fatal=False)
|
||||||
|
if video_info_str:
|
||||||
|
video_info = self._parse_json(
|
||||||
|
video_info_str, video_id, fatal=False) or {}
|
||||||
|
|
||||||
|
options = player_config.get('options') or {}
|
||||||
|
metas = options.get('metas') or {}
|
||||||
|
title = metas.get('title') or video_info['title']
|
||||||
|
links = player_config.get('links') or {}
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for format_id, qualities in links.items():
|
||||||
|
for load_balancer_url in qualities.values():
|
||||||
|
load_balancer_data = self._download_json(
|
||||||
|
load_balancer_url, video_id, fatal=False) or {}
|
||||||
|
m3u8_url = load_balancer_data.get('location')
|
||||||
|
if not m3u8_url:
|
||||||
|
continue
|
||||||
|
m3u8_formats = self._extract_m3u8_formats(
|
||||||
|
m3u8_url, video_id, 'mp4', 'm3u8_native',
|
||||||
|
m3u8_id=format_id, fatal=False)
|
||||||
|
if format_id == 'vf':
|
||||||
|
for f in m3u8_formats:
|
||||||
|
f['language'] = 'fr'
|
||||||
|
formats.extend(m3u8_formats)
|
||||||
|
error = options.get('error')
|
||||||
|
if not formats and error:
|
||||||
|
raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': strip_or_none(metas.get('summary') or video_info.get('resume')),
|
||||||
|
'thumbnail': video_info.get('image'),
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': self.extract_subtitles(player_config.get('subtitles'), video_id),
|
||||||
|
'episode': metas.get('subtitle') or video_info.get('videoTitle'),
|
||||||
|
'series': video_info.get('playlistTitle'),
|
||||||
|
}
|
||||||
@@ -31,6 +31,21 @@ MSO_INFO = {
|
|||||||
'username_field': 'user',
|
'username_field': 'user',
|
||||||
'password_field': 'passwd',
|
'password_field': 'passwd',
|
||||||
},
|
},
|
||||||
|
'TWC': {
|
||||||
|
'name': 'Time Warner Cable | Spectrum',
|
||||||
|
'username_field': 'Ecom_User_ID',
|
||||||
|
'password_field': 'Ecom_Password',
|
||||||
|
},
|
||||||
|
'Charter_Direct': {
|
||||||
|
'name': 'Charter Spectrum',
|
||||||
|
'username_field': 'IDToken1',
|
||||||
|
'password_field': 'IDToken2',
|
||||||
|
},
|
||||||
|
'Verizon': {
|
||||||
|
'name': 'Verizon FiOS',
|
||||||
|
'username_field': 'IDToken1',
|
||||||
|
'password_field': 'IDToken2',
|
||||||
|
},
|
||||||
'thr030': {
|
'thr030': {
|
||||||
'name': '3 Rivers Communications'
|
'name': '3 Rivers Communications'
|
||||||
},
|
},
|
||||||
@@ -1374,40 +1389,72 @@ class AdobePassIE(InfoExtractor):
|
|||||||
# Comcast page flow varies by video site and whether you
|
# Comcast page flow varies by video site and whether you
|
||||||
# are on Comcast's network.
|
# are on Comcast's network.
|
||||||
provider_redirect_page, urlh = provider_redirect_page_res
|
provider_redirect_page, urlh = provider_redirect_page_res
|
||||||
# Check for Comcast auto login
|
|
||||||
if 'automatically signing you in' in provider_redirect_page:
|
if 'automatically signing you in' in provider_redirect_page:
|
||||||
oauth_redirect_url = self._html_search_regex(
|
oauth_redirect_url = self._html_search_regex(
|
||||||
r'window\.location\s*=\s*[\'"]([^\'"]+)',
|
r'window\.location\s*=\s*[\'"]([^\'"]+)',
|
||||||
provider_redirect_page, 'oauth redirect')
|
provider_redirect_page, 'oauth redirect')
|
||||||
# Just need to process the request. No useful data comes back
|
|
||||||
self._download_webpage(
|
self._download_webpage(
|
||||||
oauth_redirect_url, video_id, 'Confirming auto login')
|
oauth_redirect_url, video_id, 'Confirming auto login')
|
||||||
else:
|
else:
|
||||||
if '<form name="signin"' in provider_redirect_page:
|
if '<form name="signin"' in provider_redirect_page:
|
||||||
# already have the form, just fill it
|
|
||||||
provider_login_page_res = provider_redirect_page_res
|
provider_login_page_res = provider_redirect_page_res
|
||||||
elif 'http-equiv="refresh"' in provider_redirect_page:
|
elif 'http-equiv="refresh"' in provider_redirect_page:
|
||||||
# redirects to the login page
|
|
||||||
oauth_redirect_url = self._html_search_regex(
|
oauth_redirect_url = self._html_search_regex(
|
||||||
r'content="0;\s*url=([^\'"]+)',
|
r'content="0;\s*url=([^\'"]+)',
|
||||||
provider_redirect_page, 'meta refresh redirect')
|
provider_redirect_page, 'meta refresh redirect')
|
||||||
provider_login_page_res = self._download_webpage_handle(
|
provider_login_page_res = self._download_webpage_handle(
|
||||||
oauth_redirect_url,
|
oauth_redirect_url, video_id,
|
||||||
video_id, 'Downloading Provider Login Page')
|
'Downloading Provider Login Page')
|
||||||
else:
|
else:
|
||||||
provider_login_page_res = post_form(
|
provider_login_page_res = post_form(
|
||||||
provider_redirect_page_res, 'Downloading Provider Login Page')
|
provider_redirect_page_res,
|
||||||
|
'Downloading Provider Login Page')
|
||||||
|
|
||||||
mvpd_confirm_page_res = post_form(provider_login_page_res, 'Logging in', {
|
mvpd_confirm_page_res = post_form(
|
||||||
mso_info.get('username_field', 'username'): username,
|
provider_login_page_res, 'Logging in', {
|
||||||
mso_info.get('password_field', 'password'): password,
|
mso_info['username_field']: username,
|
||||||
})
|
mso_info['password_field']: password,
|
||||||
|
})
|
||||||
mvpd_confirm_page, urlh = mvpd_confirm_page_res
|
mvpd_confirm_page, urlh = mvpd_confirm_page_res
|
||||||
if '<button class="submit" value="Resume">Resume</button>' in mvpd_confirm_page:
|
if '<button class="submit" value="Resume">Resume</button>' in mvpd_confirm_page:
|
||||||
post_form(mvpd_confirm_page_res, 'Confirming Login')
|
post_form(mvpd_confirm_page_res, 'Confirming Login')
|
||||||
|
elif mso_id == 'Verizon':
|
||||||
|
# In general, if you're connecting from a Verizon-assigned IP,
|
||||||
|
# you will not actually pass your credentials.
|
||||||
|
provider_redirect_page, urlh = provider_redirect_page_res
|
||||||
|
if 'Please wait ...' in provider_redirect_page:
|
||||||
|
saml_redirect_url = self._html_search_regex(
|
||||||
|
r'self\.parent\.location=(["\'])(?P<url>.+?)\1',
|
||||||
|
provider_redirect_page,
|
||||||
|
'SAML Redirect URL', group='url')
|
||||||
|
saml_login_page = self._download_webpage(
|
||||||
|
saml_redirect_url, video_id,
|
||||||
|
'Downloading SAML Login Page')
|
||||||
|
else:
|
||||||
|
saml_login_page_res = post_form(
|
||||||
|
provider_redirect_page_res, 'Logging in', {
|
||||||
|
mso_info['username_field']: username,
|
||||||
|
mso_info['password_field']: password,
|
||||||
|
})
|
||||||
|
saml_login_page, urlh = saml_login_page_res
|
||||||
|
if 'Please try again.' in saml_login_page:
|
||||||
|
raise ExtractorError(
|
||||||
|
'We\'re sorry, but either the User ID or Password entered is not correct.')
|
||||||
|
saml_login_url = self._search_regex(
|
||||||
|
r'xmlHttp\.open\("POST"\s*,\s*(["\'])(?P<url>.+?)\1',
|
||||||
|
saml_login_page, 'SAML Login URL', group='url')
|
||||||
|
saml_response_json = self._download_json(
|
||||||
|
saml_login_url, video_id, 'Downloading SAML Response',
|
||||||
|
headers={'Content-Type': 'text/xml'})
|
||||||
|
self._download_webpage(
|
||||||
|
saml_response_json['targetValue'], video_id,
|
||||||
|
'Confirming Login', data=urlencode_postdata({
|
||||||
|
'SAMLResponse': saml_response_json['SAMLResponse'],
|
||||||
|
'RelayState': saml_response_json['RelayState']
|
||||||
|
}), headers={
|
||||||
|
'Content-Type': 'application/x-www-form-urlencoded'
|
||||||
|
})
|
||||||
else:
|
else:
|
||||||
# Normal, non-Comcast flow
|
|
||||||
provider_login_page_res = post_form(
|
provider_login_page_res = post_form(
|
||||||
provider_redirect_page_res, 'Downloading Provider Login Page')
|
provider_redirect_page_res, 'Downloading Provider Login Page')
|
||||||
mvpd_confirm_page_res = post_form(provider_login_page_res, 'Logging in', {
|
mvpd_confirm_page_res = post_form(provider_login_page_res, 'Logging in', {
|
||||||
@@ -1448,6 +1495,8 @@ class AdobePassIE(InfoExtractor):
|
|||||||
self._downloader.cache.store(self._MVPD_CACHE, requestor_id, {})
|
self._downloader.cache.store(self._MVPD_CACHE, requestor_id, {})
|
||||||
count += 1
|
count += 1
|
||||||
continue
|
continue
|
||||||
|
if '<error' in authorize:
|
||||||
|
raise ExtractorError(xml_text(authorize, 'details'), expected=True)
|
||||||
authz_token = unescapeHTML(xml_text(authorize, 'authzToken'))
|
authz_token = unescapeHTML(xml_text(authorize, 'authzToken'))
|
||||||
requestor_info[guid] = authz_token
|
requestor_info[guid] = authz_token
|
||||||
self._downloader.cache.store(self._MVPD_CACHE, requestor_id, requestor_info)
|
self._downloader.cache.store(self._MVPD_CACHE, requestor_id, requestor_info)
|
||||||
|
|||||||
@@ -23,7 +23,19 @@ class AENetworksBaseIE(ThePlatformIE):
|
|||||||
class AENetworksIE(AENetworksBaseIE):
|
class AENetworksIE(AENetworksBaseIE):
|
||||||
IE_NAME = 'aenetworks'
|
IE_NAME = 'aenetworks'
|
||||||
IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network'
|
IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network'
|
||||||
_VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:history|aetv|mylifetime|lifetimemovieclub)\.com|fyi\.tv)/(?:shows/(?P<show_path>[^/]+(?:/[^/]+){0,2})|movies/(?P<movie_display_id>[^/]+)(?:/full-movie)?)'
|
_VALID_URL = r'''(?x)
|
||||||
|
https?://
|
||||||
|
(?:www\.)?
|
||||||
|
(?P<domain>
|
||||||
|
(?:history|aetv|mylifetime|lifetimemovieclub)\.com|
|
||||||
|
fyi\.tv
|
||||||
|
)/
|
||||||
|
(?:
|
||||||
|
shows/(?P<show_path>[^/]+(?:/[^/]+){0,2})|
|
||||||
|
movies/(?P<movie_display_id>[^/]+)(?:/full-movie)?|
|
||||||
|
specials/(?P<special_display_id>[^/]+)/full-special
|
||||||
|
)
|
||||||
|
'''
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1',
|
'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1',
|
||||||
'md5': 'a97a65f7e823ae10e9244bc5433d5fe6',
|
'md5': 'a97a65f7e823ae10e9244bc5433d5fe6',
|
||||||
@@ -65,6 +77,9 @@ class AENetworksIE(AENetworksBaseIE):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'https://www.lifetimemovieclub.com/movies/a-killer-among-us',
|
'url': 'https://www.lifetimemovieclub.com/movies/a-killer-among-us',
|
||||||
'only_matching': True
|
'only_matching': True
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.history.com/specials/sniper-into-the-kill-zone/full-special',
|
||||||
|
'only_matching': True
|
||||||
}]
|
}]
|
||||||
_DOMAIN_TO_REQUESTOR_ID = {
|
_DOMAIN_TO_REQUESTOR_ID = {
|
||||||
'history.com': 'HISTORY',
|
'history.com': 'HISTORY',
|
||||||
@@ -75,8 +90,8 @@ class AENetworksIE(AENetworksBaseIE):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
domain, show_path, movie_display_id = re.match(self._VALID_URL, url).groups()
|
domain, show_path, movie_display_id, special_display_id = re.match(self._VALID_URL, url).groups()
|
||||||
display_id = show_path or movie_display_id
|
display_id = show_path or movie_display_id or special_display_id
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
if show_path:
|
if show_path:
|
||||||
url_parts = show_path.split('/')
|
url_parts = show_path.split('/')
|
||||||
@@ -107,7 +122,10 @@ class AENetworksIE(AENetworksBaseIE):
|
|||||||
}
|
}
|
||||||
video_id = self._html_search_meta('aetn:VideoID', webpage)
|
video_id = self._html_search_meta('aetn:VideoID', webpage)
|
||||||
media_url = self._search_regex(
|
media_url = self._search_regex(
|
||||||
r"media_url\s*=\s*'([^']+)'", webpage, 'video url')
|
[r"media_url\s*=\s*'(?P<url>[^']+)'",
|
||||||
|
r'data-media-url=(?P<url>(?:https?:)?//[^\s>]+)',
|
||||||
|
r'data-media-url=(["\'])(?P<url>(?:(?!\1).)+?)\1'],
|
||||||
|
webpage, 'video url', group='url')
|
||||||
theplatform_metadata = self._download_theplatform_metadata(self._search_regex(
|
theplatform_metadata = self._download_theplatform_metadata(self._search_regex(
|
||||||
r'https?://link.theplatform.com/s/([^?]+)', media_url, 'theplatform_path'), video_id)
|
r'https?://link.theplatform.com/s/([^?]+)', media_url, 'theplatform_path'), video_id)
|
||||||
info = self._parse_theplatform_metadata(theplatform_metadata)
|
info = self._parse_theplatform_metadata(theplatform_metadata)
|
||||||
|
|||||||
@@ -4,15 +4,11 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import compat_xpath
|
||||||
compat_urllib_parse_urlparse,
|
|
||||||
compat_urlparse,
|
|
||||||
)
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
determine_ext,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
update_url_query,
|
|
||||||
xpath_element,
|
|
||||||
xpath_text,
|
xpath_text,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -43,7 +39,8 @@ class AfreecaTVIE(InfoExtractor):
|
|||||||
'uploader': 'dailyapril',
|
'uploader': 'dailyapril',
|
||||||
'uploader_id': 'dailyapril',
|
'uploader_id': 'dailyapril',
|
||||||
'upload_date': '20160503',
|
'upload_date': '20160503',
|
||||||
}
|
},
|
||||||
|
'skip': 'Video is gone',
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://afbbs.afreecatv.com:8080/app/read_ucc_bbs.cgi?nStationNo=16711924&nTitleNo=36153164&szBjId=dailyapril&nBbsNo=18605867',
|
'url': 'http://afbbs.afreecatv.com:8080/app/read_ucc_bbs.cgi?nStationNo=16711924&nTitleNo=36153164&szBjId=dailyapril&nBbsNo=18605867',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@@ -71,6 +68,76 @@ class AfreecaTVIE(InfoExtractor):
|
|||||||
'upload_date': '20160502',
|
'upload_date': '20160502',
|
||||||
},
|
},
|
||||||
}],
|
}],
|
||||||
|
'skip': 'Video is gone',
|
||||||
|
}, {
|
||||||
|
'url': 'http://vod.afreecatv.com/PLAYER/STATION/18650793',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '18650793',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '오늘은 다르다! 쏘님의 우월한 위아래~ 댄스리액션!',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'uploader': '윈아디',
|
||||||
|
'uploader_id': 'badkids',
|
||||||
|
'duration': 107,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://vod.afreecatv.com/PLAYER/STATION/10481652',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '10481652',
|
||||||
|
'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'",
|
||||||
|
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
|
||||||
|
'uploader': 'dailyapril',
|
||||||
|
'uploader_id': 'dailyapril',
|
||||||
|
'duration': 6492,
|
||||||
|
},
|
||||||
|
'playlist_count': 2,
|
||||||
|
'playlist': [{
|
||||||
|
'md5': 'd8b7c174568da61d774ef0203159bf97',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '20160502_c4c62b9d_174361386_1',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': "BJ유트루와 함께하는 '팅커벨 메이크업!' (part 1)",
|
||||||
|
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
|
||||||
|
'uploader': 'dailyapril',
|
||||||
|
'uploader_id': 'dailyapril',
|
||||||
|
'upload_date': '20160502',
|
||||||
|
'duration': 3601,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'md5': '58f2ce7f6044e34439ab2d50612ab02b',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '20160502_39e739bb_174361386_2',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': "BJ유트루와 함께하는 '팅커벨 메이크업!' (part 2)",
|
||||||
|
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
|
||||||
|
'uploader': 'dailyapril',
|
||||||
|
'uploader_id': 'dailyapril',
|
||||||
|
'upload_date': '20160502',
|
||||||
|
'duration': 2891,
|
||||||
|
},
|
||||||
|
}],
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# non standard key
|
||||||
|
'url': 'http://vod.afreecatv.com/PLAYER/STATION/20515605',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '20170411_BE689A0E_190960999_1_2_h',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '혼자사는여자집',
|
||||||
|
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
|
||||||
|
'uploader': '♥이슬이',
|
||||||
|
'uploader_id': 'dasl8121',
|
||||||
|
'upload_date': '20170411',
|
||||||
|
'duration': 213,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.afreecatv.com/player/Player.swf?szType=szBjId=djleegoon&nStationNo=11273158&nBbsNo=13161095&nTitleNo=36327652',
|
'url': 'http://www.afreecatv.com/player/Player.swf?szType=szBjId=djleegoon&nStationNo=11273158&nBbsNo=13161095&nTitleNo=36327652',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@@ -85,42 +152,77 @@ class AfreecaTVIE(InfoExtractor):
|
|||||||
m = re.match(r'^(?P<upload_date>\d{8})_\w+_(?P<part>\d+)$', key)
|
m = re.match(r'^(?P<upload_date>\d{8})_\w+_(?P<part>\d+)$', key)
|
||||||
if m:
|
if m:
|
||||||
video_key['upload_date'] = m.group('upload_date')
|
video_key['upload_date'] = m.group('upload_date')
|
||||||
video_key['part'] = m.group('part')
|
video_key['part'] = int(m.group('part'))
|
||||||
return video_key
|
return video_key
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
parsed_url = compat_urllib_parse_urlparse(url)
|
|
||||||
info_url = compat_urlparse.urlunparse(parsed_url._replace(
|
|
||||||
netloc='afbbs.afreecatv.com:8080',
|
|
||||||
path='/api/video/get_video_info.php'))
|
|
||||||
|
|
||||||
video_xml = self._download_xml(
|
video_xml = self._download_xml(
|
||||||
update_url_query(info_url, {'nTitleNo': video_id}), video_id)
|
'http://afbbs.afreecatv.com:8080/api/video/get_video_info.php',
|
||||||
|
video_id, query={'nTitleNo': video_id})
|
||||||
|
|
||||||
if xpath_element(video_xml, './track/video/file') is None:
|
video_element = video_xml.findall(compat_xpath('./track/video'))[1]
|
||||||
|
if video_element is None or video_element.text is None:
|
||||||
raise ExtractorError('Specified AfreecaTV video does not exist',
|
raise ExtractorError('Specified AfreecaTV video does not exist',
|
||||||
expected=True)
|
expected=True)
|
||||||
|
|
||||||
title = xpath_text(video_xml, './track/title', 'title')
|
video_url = video_element.text.strip()
|
||||||
|
|
||||||
|
title = xpath_text(video_xml, './track/title', 'title', fatal=True)
|
||||||
|
|
||||||
uploader = xpath_text(video_xml, './track/nickname', 'uploader')
|
uploader = xpath_text(video_xml, './track/nickname', 'uploader')
|
||||||
uploader_id = xpath_text(video_xml, './track/bj_id', 'uploader id')
|
uploader_id = xpath_text(video_xml, './track/bj_id', 'uploader id')
|
||||||
duration = int_or_none(xpath_text(video_xml, './track/duration',
|
duration = int_or_none(xpath_text(
|
||||||
'duration'))
|
video_xml, './track/duration', 'duration'))
|
||||||
thumbnail = xpath_text(video_xml, './track/titleImage', 'thumbnail')
|
thumbnail = xpath_text(video_xml, './track/titleImage', 'thumbnail')
|
||||||
|
|
||||||
entries = []
|
common_entry = {
|
||||||
for i, video_file in enumerate(video_xml.findall('./track/video/file')):
|
'uploader': uploader,
|
||||||
video_key = self.parse_video_key(video_file.get('key', ''))
|
'uploader_id': uploader_id,
|
||||||
if not video_key:
|
'thumbnail': thumbnail,
|
||||||
continue
|
}
|
||||||
entries.append({
|
|
||||||
'id': '%s_%s' % (video_id, video_key.get('part', i + 1)),
|
info = common_entry.copy()
|
||||||
'title': title,
|
info.update({
|
||||||
'upload_date': video_key.get('upload_date'),
|
'id': video_id,
|
||||||
'duration': int_or_none(video_file.get('duration')),
|
'title': title,
|
||||||
'url': video_file.text,
|
'duration': duration,
|
||||||
|
})
|
||||||
|
|
||||||
|
if not video_url:
|
||||||
|
entries = []
|
||||||
|
file_elements = video_element.findall(compat_xpath('./file'))
|
||||||
|
one = len(file_elements) == 1
|
||||||
|
for file_num, file_element in enumerate(file_elements, start=1):
|
||||||
|
file_url = file_element.text
|
||||||
|
if not file_url:
|
||||||
|
continue
|
||||||
|
key = file_element.get('key', '')
|
||||||
|
upload_date = self._search_regex(
|
||||||
|
r'^(\d{8})_', key, 'upload date', default=None)
|
||||||
|
file_duration = int_or_none(file_element.get('duration'))
|
||||||
|
format_id = key if key else '%s_%s' % (video_id, file_num)
|
||||||
|
formats = self._extract_m3u8_formats(
|
||||||
|
file_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||||
|
m3u8_id='hls',
|
||||||
|
note='Downloading part %d m3u8 information' % file_num)
|
||||||
|
title = title if one else '%s (part %d)' % (title, file_num)
|
||||||
|
file_info = common_entry.copy()
|
||||||
|
file_info.update({
|
||||||
|
'id': format_id,
|
||||||
|
'title': title,
|
||||||
|
'upload_date': upload_date,
|
||||||
|
'duration': file_duration,
|
||||||
|
'formats': formats,
|
||||||
|
})
|
||||||
|
entries.append(file_info)
|
||||||
|
entries_info = info.copy()
|
||||||
|
entries_info.update({
|
||||||
|
'_type': 'multi_video',
|
||||||
|
'entries': entries,
|
||||||
})
|
})
|
||||||
|
return entries_info
|
||||||
|
|
||||||
info = {
|
info = {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
@@ -131,17 +233,18 @@ class AfreecaTVIE(InfoExtractor):
|
|||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(entries) > 1:
|
if determine_ext(video_url) == 'm3u8':
|
||||||
info['_type'] = 'multi_video'
|
info['formats'] = self._extract_m3u8_formats(
|
||||||
info['entries'] = entries
|
video_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||||
elif len(entries) == 1:
|
m3u8_id='hls')
|
||||||
info['url'] = entries[0]['url']
|
|
||||||
info['upload_date'] = entries[0].get('upload_date')
|
|
||||||
else:
|
else:
|
||||||
raise ExtractorError(
|
app, playpath = video_url.split('mp4:')
|
||||||
'No files found for the specified AfreecaTV video, either'
|
info.update({
|
||||||
' the URL is incorrect or the video has been made private.',
|
'url': app,
|
||||||
expected=True)
|
'ext': 'flv',
|
||||||
|
'play_path': 'mp4:' + playpath,
|
||||||
|
'rtmp_live': True, # downloading won't end without this
|
||||||
|
})
|
||||||
|
|
||||||
return info
|
return info
|
||||||
|
|
||||||
|
|||||||
@@ -15,12 +15,12 @@ class AirMozillaIE(InfoExtractor):
|
|||||||
_VALID_URL = r'https?://air\.mozilla\.org/(?P<id>[0-9a-z-]+)/?'
|
_VALID_URL = r'https?://air\.mozilla\.org/(?P<id>[0-9a-z-]+)/?'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'https://air.mozilla.org/privacy-lab-a-meetup-for-privacy-minded-people-in-san-francisco/',
|
'url': 'https://air.mozilla.org/privacy-lab-a-meetup-for-privacy-minded-people-in-san-francisco/',
|
||||||
'md5': '2e3e7486ba5d180e829d453875b9b8bf',
|
'md5': '8d02f53ee39cf006009180e21df1f3ba',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '6x4q2w',
|
'id': '6x4q2w',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Privacy Lab - a meetup for privacy minded people in San Francisco',
|
'title': 'Privacy Lab - a meetup for privacy minded people in San Francisco',
|
||||||
'thumbnail': r're:https?://vid\.ly/(?P<id>[0-9a-z-]+)/poster',
|
'thumbnail': r're:https?://.*/poster\.jpg',
|
||||||
'description': 'Brings together privacy professionals and others interested in privacy at for-profits, non-profits, and NGOs in an effort to contribute to the state of the ecosystem...',
|
'description': 'Brings together privacy professionals and others interested in privacy at for-profits, non-profits, and NGOs in an effort to contribute to the state of the ecosystem...',
|
||||||
'timestamp': 1422487800,
|
'timestamp': 1422487800,
|
||||||
'upload_date': '20150128',
|
'upload_date': '20150128',
|
||||||
@@ -34,21 +34,13 @@ class AirMozillaIE(InfoExtractor):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id = self._match_id(url)
|
display_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
video_id = self._html_search_regex(r'//vid.ly/(.*?)/embed', webpage, 'id')
|
video_id = self._html_search_regex(r'//vid\.ly/(.*?)/embed', webpage, 'id')
|
||||||
|
|
||||||
embed_script = self._download_webpage('https://vid.ly/{0}/embed'.format(video_id), video_id)
|
embed_script = self._download_webpage('https://vid.ly/{0}/embed'.format(video_id), video_id)
|
||||||
jwconfig = self._search_regex(r'\svar jwconfig = (\{.*?\});\s', embed_script, 'metadata')
|
jwconfig = self._parse_json(self._search_regex(
|
||||||
metadata = self._parse_json(jwconfig, video_id)
|
r'initCallback\((.*)\);', embed_script, 'metadata'), video_id)['config']
|
||||||
|
|
||||||
formats = [{
|
|
||||||
'url': source['file'],
|
|
||||||
'ext': source['type'],
|
|
||||||
'format_id': self._search_regex(r'&format=(.*)$', source['file'], 'video format'),
|
|
||||||
'format': source['label'],
|
|
||||||
'height': int(source['label'].rstrip('p')),
|
|
||||||
} for source in metadata['playlist'][0]['sources']]
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
|
info_dict = self._parse_jwplayer_data(jwconfig, video_id)
|
||||||
view_count = int_or_none(self._html_search_regex(
|
view_count = int_or_none(self._html_search_regex(
|
||||||
r'Views since archived: ([0-9]+)',
|
r'Views since archived: ([0-9]+)',
|
||||||
webpage, 'view count', fatal=False))
|
webpage, 'view count', fatal=False))
|
||||||
@@ -58,17 +50,17 @@ class AirMozillaIE(InfoExtractor):
|
|||||||
r'Duration:\s*(\d+\s*hours?\s*\d+\s*minutes?)',
|
r'Duration:\s*(\d+\s*hours?\s*\d+\s*minutes?)',
|
||||||
webpage, 'duration', fatal=False))
|
webpage, 'duration', fatal=False))
|
||||||
|
|
||||||
return {
|
info_dict.update({
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': self._og_search_title(webpage),
|
'title': self._og_search_title(webpage),
|
||||||
'formats': formats,
|
|
||||||
'url': self._og_search_url(webpage),
|
'url': self._og_search_url(webpage),
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
'thumbnail': metadata['playlist'][0].get('image'),
|
|
||||||
'description': self._og_search_description(webpage),
|
'description': self._og_search_description(webpage),
|
||||||
'timestamp': timestamp,
|
'timestamp': timestamp,
|
||||||
'location': self._html_search_regex(r'Location: (.*)', webpage, 'location', default=None),
|
'location': self._html_search_regex(r'Location: (.*)', webpage, 'location', default=None),
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
'view_count': view_count,
|
'view_count': view_count,
|
||||||
'categories': re.findall(r'<a href=".*?" class="channel">(.*?)</a>', webpage),
|
'categories': re.findall(r'<a href=".*?" class="channel">(.*?)</a>', webpage),
|
||||||
}
|
})
|
||||||
|
|
||||||
|
return info_dict
|
||||||
|
|||||||
@@ -2,9 +2,13 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
remove_end,
|
int_or_none,
|
||||||
qualities,
|
qualities,
|
||||||
|
remove_end,
|
||||||
|
try_get,
|
||||||
|
unified_timestamp,
|
||||||
url_basename,
|
url_basename,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -22,6 +26,10 @@ class AllocineIE(InfoExtractor):
|
|||||||
'title': 'Astérix - Le Domaine des Dieux Teaser VF',
|
'title': 'Astérix - Le Domaine des Dieux Teaser VF',
|
||||||
'description': 'md5:4a754271d9c6f16c72629a8a993ee884',
|
'description': 'md5:4a754271d9c6f16c72629a8a993ee884',
|
||||||
'thumbnail': r're:http://.*\.jpg',
|
'thumbnail': r're:http://.*\.jpg',
|
||||||
|
'duration': 39,
|
||||||
|
'timestamp': 1404273600,
|
||||||
|
'upload_date': '20140702',
|
||||||
|
'view_count': int,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.allocine.fr/video/player_gen_cmedia=19540403&cfilm=222257.html',
|
'url': 'http://www.allocine.fr/video/player_gen_cmedia=19540403&cfilm=222257.html',
|
||||||
@@ -33,6 +41,10 @@ class AllocineIE(InfoExtractor):
|
|||||||
'title': 'Planes 2 Bande-annonce VF',
|
'title': 'Planes 2 Bande-annonce VF',
|
||||||
'description': 'Regardez la bande annonce du film Planes 2 (Planes 2 Bande-annonce VF). Planes 2, un film de Roberts Gannaway',
|
'description': 'Regardez la bande annonce du film Planes 2 (Planes 2 Bande-annonce VF). Planes 2, un film de Roberts Gannaway',
|
||||||
'thumbnail': r're:http://.*\.jpg',
|
'thumbnail': r're:http://.*\.jpg',
|
||||||
|
'duration': 69,
|
||||||
|
'timestamp': 1385659800,
|
||||||
|
'upload_date': '20131128',
|
||||||
|
'view_count': int,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.allocine.fr/video/player_gen_cmedia=19544709&cfilm=181290.html',
|
'url': 'http://www.allocine.fr/video/player_gen_cmedia=19544709&cfilm=181290.html',
|
||||||
@@ -44,6 +56,10 @@ class AllocineIE(InfoExtractor):
|
|||||||
'title': 'Dragons 2 - Bande annonce finale VF',
|
'title': 'Dragons 2 - Bande annonce finale VF',
|
||||||
'description': 'md5:6cdd2d7c2687d4c6aafe80a35e17267a',
|
'description': 'md5:6cdd2d7c2687d4c6aafe80a35e17267a',
|
||||||
'thumbnail': r're:http://.*\.jpg',
|
'thumbnail': r're:http://.*\.jpg',
|
||||||
|
'duration': 144,
|
||||||
|
'timestamp': 1397589900,
|
||||||
|
'upload_date': '20140415',
|
||||||
|
'view_count': int,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.allocine.fr/video/video-19550147/',
|
'url': 'http://www.allocine.fr/video/video-19550147/',
|
||||||
@@ -69,34 +85,37 @@ class AllocineIE(InfoExtractor):
|
|||||||
r'data-model="([^"]+)"', webpage, 'data model', default=None)
|
r'data-model="([^"]+)"', webpage, 'data model', default=None)
|
||||||
if model:
|
if model:
|
||||||
model_data = self._parse_json(model, display_id)
|
model_data = self._parse_json(model, display_id)
|
||||||
|
video = model_data['videos'][0]
|
||||||
for video_url in model_data['sources'].values():
|
title = video['title']
|
||||||
|
for video_url in video['sources'].values():
|
||||||
video_id, format_id = url_basename(video_url).split('_')[:2]
|
video_id, format_id = url_basename(video_url).split('_')[:2]
|
||||||
formats.append({
|
formats.append({
|
||||||
'format_id': format_id,
|
'format_id': format_id,
|
||||||
'quality': quality(format_id),
|
'quality': quality(format_id),
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
})
|
})
|
||||||
|
duration = int_or_none(video.get('duration'))
|
||||||
title = model_data['title']
|
view_count = int_or_none(video.get('view_count'))
|
||||||
|
timestamp = unified_timestamp(try_get(
|
||||||
|
video, lambda x: x['added_at']['date'], compat_str))
|
||||||
else:
|
else:
|
||||||
video_id = display_id
|
video_id = display_id
|
||||||
media_data = self._download_json(
|
media_data = self._download_json(
|
||||||
'http://www.allocine.fr/ws/AcVisiondataV5.ashx?media=%s' % video_id, display_id)
|
'http://www.allocine.fr/ws/AcVisiondataV5.ashx?media=%s' % video_id, display_id)
|
||||||
|
title = remove_end(
|
||||||
|
self._html_search_regex(
|
||||||
|
r'(?s)<title>(.+?)</title>', webpage, 'title').strip(),
|
||||||
|
' - AlloCiné')
|
||||||
for key, value in media_data['video'].items():
|
for key, value in media_data['video'].items():
|
||||||
if not key.endswith('Path'):
|
if not key.endswith('Path'):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
format_id = key[:-len('Path')]
|
format_id = key[:-len('Path')]
|
||||||
formats.append({
|
formats.append({
|
||||||
'format_id': format_id,
|
'format_id': format_id,
|
||||||
'quality': quality(format_id),
|
'quality': quality(format_id),
|
||||||
'url': value,
|
'url': value,
|
||||||
})
|
})
|
||||||
|
duration, view_count, timestamp = [None] * 3
|
||||||
title = remove_end(self._html_search_regex(
|
|
||||||
r'(?s)<title>(.+?)</title>', webpage, 'title'
|
|
||||||
).strip(), ' - AlloCiné')
|
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
@@ -104,7 +123,10 @@ class AllocineIE(InfoExtractor):
|
|||||||
'id': video_id,
|
'id': video_id,
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'thumbnail': self._og_search_thumbnail(webpage),
|
|
||||||
'formats': formats,
|
|
||||||
'description': self._og_search_description(webpage),
|
'description': self._og_search_description(webpage),
|
||||||
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
|
'duration': duration,
|
||||||
|
'timestamp': timestamp,
|
||||||
|
'view_count': view_count,
|
||||||
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class AMCNetworksIE(ThePlatformIE):
|
class AMCNetworksIE(ThePlatformIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?(?:amc|bbcamerica|ifc|wetv)\.com/(?:movies/|shows/[^/]+/(?:full-episodes/)?[^/]+/episode-\d+(?:-(?:[^/]+/)?|/))(?P<id>[^/?#]+)'
|
_VALID_URL = r'https?://(?:www\.)?(?:amc|bbcamerica|ifc|wetv)\.com/(?:movies|shows(?:/[^/]+)+)/(?P<id>[^/?#]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.ifc.com/shows/maron/season-04/episode-01/step-1',
|
'url': 'http://www.ifc.com/shows/maron/season-04/episode-01/step-1',
|
||||||
'md5': '',
|
'md5': '',
|
||||||
@@ -44,6 +44,12 @@ class AMCNetworksIE(ThePlatformIE):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'http://www.bbcamerica.com/shows/doctor-who/full-episodes/the-power-of-the-daleks/episode-01-episode-1-color-version',
|
'url': 'http://www.bbcamerica.com/shows/doctor-who/full-episodes/the-power-of-the-daleks/episode-01-episode-1-color-version',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.wetv.com/shows/mama-june-from-not-to-hot/full-episode/season-01/thin-tervention',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.wetv.com/shows/la-hair/videos/season-05/episode-09-episode-9-2/episode-9-sneak-peek-3',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
|||||||
@@ -1,13 +1,13 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .jwplatform import JWPlatformBaseIE
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
clean_html,
|
clean_html,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class ArchiveOrgIE(JWPlatformBaseIE):
|
class ArchiveOrgIE(InfoExtractor):
|
||||||
IE_NAME = 'archive.org'
|
IE_NAME = 'archive.org'
|
||||||
IE_DESC = 'archive.org videos'
|
IE_DESC = 'archive.org videos'
|
||||||
_VALID_URL = r'https?://(?:www\.)?archive\.org/(?:details|embed)/(?P<id>[^/?#]+)(?:[?].*)?$'
|
_VALID_URL = r'https?://(?:www\.)?archive\.org/(?:details|embed)/(?P<id>[^/?#]+)(?:[?].*)?$'
|
||||||
|
|||||||
@@ -93,8 +93,7 @@ class ArkenaIE(InfoExtractor):
|
|||||||
exts = (mimetype2ext(f.get('Type')), determine_ext(f_url, None))
|
exts = (mimetype2ext(f.get('Type')), determine_ext(f_url, None))
|
||||||
if kind == 'm3u8' or 'm3u8' in exts:
|
if kind == 'm3u8' or 'm3u8' in exts:
|
||||||
formats.extend(self._extract_m3u8_formats(
|
formats.extend(self._extract_m3u8_formats(
|
||||||
f_url, video_id, 'mp4',
|
f_url, video_id, 'mp4', 'm3u8_native',
|
||||||
entry_protocol='m3u8' if is_live else 'm3u8_native',
|
|
||||||
m3u8_id=kind, fatal=False, live=is_live))
|
m3u8_id=kind, fatal=False, live=is_live))
|
||||||
elif kind == 'flash' or 'f4m' in exts:
|
elif kind == 'flash' or 'f4m' in exts:
|
||||||
formats.extend(self._extract_f4m_formats(
|
formats.extend(self._extract_f4m_formats(
|
||||||
|
|||||||
@@ -90,7 +90,8 @@ class AtresPlayerIE(InfoExtractor):
|
|||||||
request, None, 'Logging in as %s' % username)
|
request, None, 'Logging in as %s' % username)
|
||||||
|
|
||||||
error = self._html_search_regex(
|
error = self._html_search_regex(
|
||||||
r'(?s)<ul class="list_error">(.+?)</ul>', response, 'error', default=None)
|
r'(?s)<ul[^>]+class="[^"]*\blist_error\b[^"]*">(.+?)</ul>',
|
||||||
|
response, 'error', default=None)
|
||||||
if error:
|
if error:
|
||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
'Unable to login: %s' % error, expected=True)
|
'Unable to login: %s' % error, expected=True)
|
||||||
@@ -155,13 +156,17 @@ class AtresPlayerIE(InfoExtractor):
|
|||||||
if format_id == 'token' or not video_url.startswith('http'):
|
if format_id == 'token' or not video_url.startswith('http'):
|
||||||
continue
|
continue
|
||||||
if 'geodeswowsmpra3player' in video_url:
|
if 'geodeswowsmpra3player' in video_url:
|
||||||
f4m_path = video_url.split('smil:', 1)[-1].split('free_', 1)[0]
|
# f4m_path = video_url.split('smil:', 1)[-1].split('free_', 1)[0]
|
||||||
f4m_url = 'http://drg.antena3.com/{0}hds/es/sd.f4m'.format(f4m_path)
|
# f4m_url = 'http://drg.antena3.com/{0}hds/es/sd.f4m'.format(f4m_path)
|
||||||
# this videos are protected by DRM, the f4m downloader doesn't support them
|
# this videos are protected by DRM, the f4m downloader doesn't support them
|
||||||
continue
|
continue
|
||||||
else:
|
video_url_hd = video_url.replace('free_es', 'es')
|
||||||
f4m_url = video_url[:-9] + '/manifest.f4m'
|
formats.extend(self._extract_f4m_formats(
|
||||||
formats.extend(self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds', fatal=False))
|
video_url_hd[:-9] + '/manifest.f4m', video_id, f4m_id='hds',
|
||||||
|
fatal=False))
|
||||||
|
formats.extend(self._extract_mpd_formats(
|
||||||
|
video_url_hd[:-9] + '/manifest.mpd', video_id, mpd_id='dash',
|
||||||
|
fatal=False))
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
path_data = player.get('pathData')
|
path_data = player.get('pathData')
|
||||||
|
|||||||
73
youtube_dl/extractor/atvat.py
Normal file
73
youtube_dl/extractor/atvat.py
Normal file
@@ -0,0 +1,73 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
determine_ext,
|
||||||
|
int_or_none,
|
||||||
|
unescapeHTML,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ATVAtIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?atv\.at/(?:[^/]+/){2}(?P<id>[dv]\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://atv.at/aktuell/di-210317-2005-uhr/v1698449/',
|
||||||
|
'md5': 'c3b6b975fb3150fc628572939df205f2',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1698447',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'DI, 21.03.17 | 20:05 Uhr 1/1',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'http://atv.at/aktuell/meinrad-knapp/d8416/',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
video_data = self._parse_json(unescapeHTML(self._search_regex(
|
||||||
|
r'class="[^"]*jsb_video/FlashPlayer[^"]*"[^>]+data-jsb="([^"]+)"',
|
||||||
|
webpage, 'player data')), display_id)['config']['initial_video']
|
||||||
|
|
||||||
|
video_id = video_data['id']
|
||||||
|
video_title = video_data['title']
|
||||||
|
|
||||||
|
parts = []
|
||||||
|
for part in video_data.get('parts', []):
|
||||||
|
part_id = part['id']
|
||||||
|
part_title = part['title']
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for source in part.get('sources', []):
|
||||||
|
source_url = source.get('src')
|
||||||
|
if not source_url:
|
||||||
|
continue
|
||||||
|
ext = determine_ext(source_url)
|
||||||
|
if ext == 'm3u8':
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
source_url, part_id, 'mp4', 'm3u8_native',
|
||||||
|
m3u8_id='hls', fatal=False))
|
||||||
|
else:
|
||||||
|
formats.append({
|
||||||
|
'format_id': source.get('delivery'),
|
||||||
|
'url': source_url,
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
parts.append({
|
||||||
|
'id': part_id,
|
||||||
|
'title': part_title,
|
||||||
|
'thumbnail': part.get('preview_image_url'),
|
||||||
|
'duration': int_or_none(part.get('duration')),
|
||||||
|
'is_live': part.get('is_livestream'),
|
||||||
|
'formats': formats,
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'multi_video',
|
||||||
|
'id': video_id,
|
||||||
|
'title': video_title,
|
||||||
|
'entries': parts,
|
||||||
|
}
|
||||||
@@ -1,3 +1,4 @@
|
|||||||
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
@@ -5,6 +6,7 @@ import re
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from .kaltura import KalturaIE
|
from .kaltura import KalturaIE
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
get_element_by_class,
|
||||||
get_element_by_id,
|
get_element_by_id,
|
||||||
strip_or_none,
|
strip_or_none,
|
||||||
urljoin,
|
urljoin,
|
||||||
@@ -170,3 +172,42 @@ class AZMedienPlaylistIE(AZMedienBaseIE):
|
|||||||
'video-title', webpage)), group='title')
|
'video-title', webpage)), group='title')
|
||||||
|
|
||||||
return self.playlist_result(entries, show_id, title)
|
return self.playlist_result(entries, show_id, title)
|
||||||
|
|
||||||
|
|
||||||
|
class AZMedienShowPlaylistIE(AZMedienBaseIE):
|
||||||
|
IE_DESC = 'AZ Medien show playlists'
|
||||||
|
_VALID_URL = r'''(?x)
|
||||||
|
https?://
|
||||||
|
(?:www\.)?
|
||||||
|
(?:
|
||||||
|
telezueri\.ch|
|
||||||
|
telebaern\.tv|
|
||||||
|
telem1\.ch
|
||||||
|
)/
|
||||||
|
(?:
|
||||||
|
all-episodes|
|
||||||
|
alle-episoden
|
||||||
|
)/
|
||||||
|
(?P<id>[^/?#&]+)
|
||||||
|
'''
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.telezueri.ch/all-episodes/astrotalk',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'astrotalk',
|
||||||
|
'title': 'TeleZüri: AstroTalk - alle episoden',
|
||||||
|
'description': 'md5:4c0f7e7d741d906004266e295ceb4a26',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 13,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
playlist_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, playlist_id)
|
||||||
|
episodes = get_element_by_class('search-mobile-box', webpage)
|
||||||
|
entries = [self.url_result(
|
||||||
|
urljoin(url, m.group('url'))) for m in re.finditer(
|
||||||
|
r'<a[^>]+href=(["\'])(?P<url>(?:(?!\1).)+)\1', episodes)]
|
||||||
|
title = self._og_search_title(webpage, fatal=False)
|
||||||
|
description = self._og_search_description(webpage)
|
||||||
|
return self.playlist_result(entries, playlist_id, title, description)
|
||||||
|
|||||||
@@ -361,7 +361,7 @@ class BBCCoUkIE(InfoExtractor):
|
|||||||
fmt.update({
|
fmt.update({
|
||||||
'width': width,
|
'width': width,
|
||||||
'height': height,
|
'height': height,
|
||||||
'vbr': bitrate,
|
'tbr': bitrate,
|
||||||
'vcodec': encoding,
|
'vcodec': encoding,
|
||||||
})
|
})
|
||||||
else:
|
else:
|
||||||
@@ -370,7 +370,7 @@ class BBCCoUkIE(InfoExtractor):
|
|||||||
'acodec': encoding,
|
'acodec': encoding,
|
||||||
'vcodec': 'none',
|
'vcodec': 'none',
|
||||||
})
|
})
|
||||||
if protocol == 'http':
|
if protocol in ('http', 'https'):
|
||||||
# Direct link
|
# Direct link
|
||||||
fmt.update({
|
fmt.update({
|
||||||
'url': href,
|
'url': href,
|
||||||
@@ -389,6 +389,8 @@ class BBCCoUkIE(InfoExtractor):
|
|||||||
'rtmp_live': False,
|
'rtmp_live': False,
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
})
|
})
|
||||||
|
else:
|
||||||
|
continue
|
||||||
formats.append(fmt)
|
formats.append(fmt)
|
||||||
elif kind == 'captions':
|
elif kind == 'captions':
|
||||||
subtitles = self.extract_subtitles(media, programme_id)
|
subtitles = self.extract_subtitles(media, programme_id)
|
||||||
@@ -407,7 +409,7 @@ class BBCCoUkIE(InfoExtractor):
|
|||||||
description = smp_config['summary']
|
description = smp_config['summary']
|
||||||
for item in smp_config['items']:
|
for item in smp_config['items']:
|
||||||
kind = item['kind']
|
kind = item['kind']
|
||||||
if kind != 'programme' and kind != 'radioProgramme':
|
if kind not in ('programme', 'radioProgramme'):
|
||||||
continue
|
continue
|
||||||
programme_id = item.get('vpid')
|
programme_id = item.get('vpid')
|
||||||
duration = int_or_none(item.get('duration'))
|
duration = int_or_none(item.get('duration'))
|
||||||
@@ -448,7 +450,7 @@ class BBCCoUkIE(InfoExtractor):
|
|||||||
|
|
||||||
for item in self._extract_items(playlist):
|
for item in self._extract_items(playlist):
|
||||||
kind = item.get('kind')
|
kind = item.get('kind')
|
||||||
if kind != 'programme' and kind != 'radioProgramme':
|
if kind not in ('programme', 'radioProgramme'):
|
||||||
continue
|
continue
|
||||||
title = playlist.find('./{%s}title' % self._EMP_PLAYLIST_NS).text
|
title = playlist.find('./{%s}title' % self._EMP_PLAYLIST_NS).text
|
||||||
description_el = playlist.find('./{%s}summary' % self._EMP_PLAYLIST_NS)
|
description_el = playlist.find('./{%s}summary' % self._EMP_PLAYLIST_NS)
|
||||||
|
|||||||
@@ -21,10 +21,11 @@ class BellMediaIE(InfoExtractor):
|
|||||||
animalplanet|
|
animalplanet|
|
||||||
bravo|
|
bravo|
|
||||||
mtv|
|
mtv|
|
||||||
space
|
space|
|
||||||
|
etalk
|
||||||
)\.ca|
|
)\.ca|
|
||||||
much\.com
|
much\.com
|
||||||
)/.*?(?:\bvid=|-vid|~|%7E|/(?:episode)?)(?P<id>[0-9]{6,})'''
|
)/.*?(?:\bvid(?:eoid)?=|-vid|~|%7E|/(?:episode)?)(?P<id>[0-9]{6,})'''
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.ctv.ca/video/player?vid=706966',
|
'url': 'http://www.ctv.ca/video/player?vid=706966',
|
||||||
'md5': 'ff2ebbeae0aa2dcc32a830c3fd69b7b0',
|
'md5': 'ff2ebbeae0aa2dcc32a830c3fd69b7b0',
|
||||||
@@ -58,6 +59,9 @@ class BellMediaIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'http://www.ctv.ca/DCs-Legends-of-Tomorrow/Video/S2E11-Turncoat-vid1051430',
|
'url': 'http://www.ctv.ca/DCs-Legends-of-Tomorrow/Video/S2E11-Turncoat-vid1051430',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.etalk.ca/video?videoid=663455',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
_DOMAINS = {
|
_DOMAINS = {
|
||||||
'thecomedynetwork': 'comedy',
|
'thecomedynetwork': 'comedy',
|
||||||
@@ -65,6 +69,7 @@ class BellMediaIE(InfoExtractor):
|
|||||||
'sciencechannel': 'discsci',
|
'sciencechannel': 'discsci',
|
||||||
'investigationdiscovery': 'invdisc',
|
'investigationdiscovery': 'invdisc',
|
||||||
'animalplanet': 'aniplan',
|
'animalplanet': 'aniplan',
|
||||||
|
'etalk': 'ctv',
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
|||||||
72
youtube_dl/extractor/bostonglobe.py
Normal file
72
youtube_dl/extractor/bostonglobe.py
Normal file
@@ -0,0 +1,72 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
from ..utils import (
|
||||||
|
extract_attributes,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class BostonGlobeIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'(?i)https?://(?:www\.)?bostonglobe\.com/.*/(?P<id>[^/]+)/\w+(?:\.html)?'
|
||||||
|
_TESTS = [
|
||||||
|
{
|
||||||
|
'url': 'http://www.bostonglobe.com/metro/2017/02/11/tree-finally-succumbs-disease-leaving-hole-neighborhood/h1b4lviqzMTIn9sVy8F3gP/story.html',
|
||||||
|
'md5': '0a62181079c85c2d2b618c9a738aedaf',
|
||||||
|
'info_dict': {
|
||||||
|
'title': 'A tree finally succumbs to disease, leaving a hole in a neighborhood',
|
||||||
|
'id': '5320421710001',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'description': 'It arrived as a sapling when the Back Bay was in its infancy, a spindly American elm tamped down into a square of dirt cut into the brick sidewalk of 1880s Marlborough Street, no higher than the first bay window of the new brownstone behind it.',
|
||||||
|
'timestamp': 1486877593,
|
||||||
|
'upload_date': '20170212',
|
||||||
|
'uploader_id': '245991542',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
# Embedded youtube video; we hand it off to the Generic extractor.
|
||||||
|
'url': 'https://www.bostonglobe.com/lifestyle/names/2017/02/17/does-ben-affleck-play-matt-damon-favorite-version-batman/ruqkc9VxKBYmh5txn1XhSI/story.html',
|
||||||
|
'md5': '582b40327089d5c0c949b3c54b13c24b',
|
||||||
|
'info_dict': {
|
||||||
|
'title': "Who Is Matt Damon's Favorite Batman?",
|
||||||
|
'id': 'ZW1QCnlA6Qc',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'upload_date': '20170217',
|
||||||
|
'description': 'md5:3b3dccb9375867e0b4d527ed87d307cb',
|
||||||
|
'uploader': 'The Late Late Show with James Corden',
|
||||||
|
'uploader_id': 'TheLateLateShow',
|
||||||
|
},
|
||||||
|
'expected_warnings': ['404'],
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
page_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, page_id)
|
||||||
|
|
||||||
|
page_title = self._og_search_title(webpage, default=None)
|
||||||
|
|
||||||
|
# <video data-brightcove-video-id="5320421710001" data-account="245991542" data-player="SJWAiyYWg" data-embed="default" class="video-js" controls itemscope itemtype="http://schema.org/VideoObject">
|
||||||
|
entries = []
|
||||||
|
for video in re.findall(r'(?i)(<video[^>]+>)', webpage):
|
||||||
|
attrs = extract_attributes(video)
|
||||||
|
|
||||||
|
video_id = attrs.get('data-brightcove-video-id')
|
||||||
|
account_id = attrs.get('data-account')
|
||||||
|
player_id = attrs.get('data-player')
|
||||||
|
embed = attrs.get('data-embed')
|
||||||
|
|
||||||
|
if video_id and account_id and player_id and embed:
|
||||||
|
entries.append(
|
||||||
|
'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s'
|
||||||
|
% (account_id, player_id, embed, video_id))
|
||||||
|
|
||||||
|
if len(entries) == 0:
|
||||||
|
return self.url_result(url, 'Generic')
|
||||||
|
elif len(entries) == 1:
|
||||||
|
return self.url_result(entries[0], 'BrightcoveNew')
|
||||||
|
else:
|
||||||
|
return self.playlist_from_matches(entries, page_id, page_title, ie='BrightcoveNew')
|
||||||
@@ -191,6 +191,16 @@ class BrightcoveLegacyIE(InfoExtractor):
|
|||||||
# These fields hold the id of the video
|
# These fields hold the id of the video
|
||||||
videoPlayer = find_param('@videoPlayer') or find_param('videoId') or find_param('videoID') or find_param('@videoList')
|
videoPlayer = find_param('@videoPlayer') or find_param('videoId') or find_param('videoID') or find_param('@videoList')
|
||||||
if videoPlayer is not None:
|
if videoPlayer is not None:
|
||||||
|
if isinstance(videoPlayer, list):
|
||||||
|
videoPlayer = videoPlayer[0]
|
||||||
|
videoPlayer = videoPlayer.strip()
|
||||||
|
# UUID is also possible for videoPlayer (e.g.
|
||||||
|
# http://www.popcornflix.com/hoodies-vs-hooligans/7f2d2b87-bbf2-4623-acfb-ea942b4f01dd
|
||||||
|
# or http://www8.hp.com/cn/zh/home.html)
|
||||||
|
if not (re.match(
|
||||||
|
r'^(?:\d+|[\da-fA-F]{8}-?[\da-fA-F]{4}-?[\da-fA-F]{4}-?[\da-fA-F]{4}-?[\da-fA-F]{12})$',
|
||||||
|
videoPlayer) or videoPlayer.startswith('ref:')):
|
||||||
|
return None
|
||||||
params['@videoPlayer'] = videoPlayer
|
params['@videoPlayer'] = videoPlayer
|
||||||
linkBase = find_param('linkBaseURL')
|
linkBase = find_param('linkBaseURL')
|
||||||
if linkBase is not None:
|
if linkBase is not None:
|
||||||
@@ -511,6 +521,9 @@ class BrightcoveNewIE(InfoExtractor):
|
|||||||
return entries
|
return entries
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
url, smuggled_data = unsmuggle_url(url, {})
|
||||||
|
self._initialize_geo_bypass(smuggled_data.get('geo_countries'))
|
||||||
|
|
||||||
account_id, player_id, embed, video_id = re.match(self._VALID_URL, url).groups()
|
account_id, player_id, embed, video_id = re.match(self._VALID_URL, url).groups()
|
||||||
|
|
||||||
webpage = self._download_webpage(
|
webpage = self._download_webpage(
|
||||||
@@ -540,8 +553,10 @@ class BrightcoveNewIE(InfoExtractor):
|
|||||||
except ExtractorError as e:
|
except ExtractorError as e:
|
||||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||||
json_data = self._parse_json(e.cause.read().decode(), video_id)[0]
|
json_data = self._parse_json(e.cause.read().decode(), video_id)[0]
|
||||||
raise ExtractorError(
|
message = json_data.get('message') or json_data['error_code']
|
||||||
json_data.get('message') or json_data['error_code'], expected=True)
|
if json_data.get('error_subcode') == 'CLIENT_GEO':
|
||||||
|
self.raise_geo_restricted(msg=message)
|
||||||
|
raise ExtractorError(message, expected=True)
|
||||||
raise
|
raise
|
||||||
|
|
||||||
title = json_data['name'].strip()
|
title = json_data['name'].strip()
|
||||||
|
|||||||
@@ -45,6 +45,9 @@ class CanalplusIE(InfoExtractor):
|
|||||||
'itele': 'itele',
|
'itele': 'itele',
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Only works for direct mp4 URLs
|
||||||
|
_GEO_COUNTRIES = ['FR']
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.canalplus.fr/c-emissions/pid1830-c-zapping.html?vid=1192814',
|
'url': 'http://www.canalplus.fr/c-emissions/pid1830-c-zapping.html?vid=1192814',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@@ -56,6 +59,7 @@ class CanalplusIE(InfoExtractor):
|
|||||||
'upload_date': '20160702',
|
'upload_date': '20160702',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
|
# geo restricted, bypassed
|
||||||
'url': 'http://www.piwiplus.fr/videos-piwi/pid1405-le-labyrinthe-boing-super-ranger.html?vid=1108190',
|
'url': 'http://www.piwiplus.fr/videos-piwi/pid1405-le-labyrinthe-boing-super-ranger.html?vid=1108190',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1108190',
|
'id': '1108190',
|
||||||
@@ -65,19 +69,20 @@ class CanalplusIE(InfoExtractor):
|
|||||||
'description': 'md5:4cea7a37153be42c1ba2c1d3064376ff',
|
'description': 'md5:4cea7a37153be42c1ba2c1d3064376ff',
|
||||||
'upload_date': '20140724',
|
'upload_date': '20140724',
|
||||||
},
|
},
|
||||||
'skip': 'Only works from France',
|
'expected_warnings': ['HTTP Error 403: Forbidden'],
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.c8.fr/c8-divertissement/ms-touche-pas-a-mon-poste/pid6318-videos-integrales.html',
|
# geo restricted, bypassed
|
||||||
'md5': '4b47b12b4ee43002626b97fad8fb1de5',
|
'url': 'http://www.c8.fr/c8-divertissement/ms-touche-pas-a-mon-poste/pid6318-videos-integrales.html?vid=1443684',
|
||||||
|
'md5': 'bb6f9f343296ab7ebd88c97b660ecf8d',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1420213',
|
'id': '1443684',
|
||||||
'display_id': 'pid6318-videos-integrales',
|
'display_id': 'pid6318-videos-integrales',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'TPMP ! Même le matin - Les 35H de Baba - 14/10/2016',
|
'title': 'Guess my iep ! - TPMP - 07/04/2017',
|
||||||
'description': 'md5:f96736c1b0ffaa96fd5b9e60ad871799',
|
'description': 'md5:6f005933f6e06760a9236d9b3b5f17fa',
|
||||||
'upload_date': '20161014',
|
'upload_date': '20170407',
|
||||||
},
|
},
|
||||||
'skip': 'Only works from France',
|
'expected_warnings': ['HTTP Error 403: Forbidden'],
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.itele.fr/chroniques/invite-michael-darmon/rachida-dati-nicolas-sarkozy-est-le-plus-en-phase-avec-les-inquietudes-des-francais-171510',
|
'url': 'http://www.itele.fr/chroniques/invite-michael-darmon/rachida-dati-nicolas-sarkozy-est-le-plus-en-phase-avec-les-inquietudes-des-francais-171510',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@@ -134,15 +139,15 @@ class CanalplusIE(InfoExtractor):
|
|||||||
|
|
||||||
preference = qualities(['MOBILE', 'BAS_DEBIT', 'HAUT_DEBIT', 'HD'])
|
preference = qualities(['MOBILE', 'BAS_DEBIT', 'HAUT_DEBIT', 'HD'])
|
||||||
|
|
||||||
fmt_url = next(iter(media.get('VIDEOS')))
|
# _, fmt_url = next(iter(media['VIDEOS'].items()))
|
||||||
if '/geo' in fmt_url.lower():
|
# if '/geo' in fmt_url.lower():
|
||||||
response = self._request_webpage(
|
# response = self._request_webpage(
|
||||||
HEADRequest(fmt_url), video_id,
|
# HEADRequest(fmt_url), video_id,
|
||||||
'Checking if the video is georestricted')
|
# 'Checking if the video is georestricted')
|
||||||
if '/blocage' in response.geturl():
|
# if '/blocage' in response.geturl():
|
||||||
raise ExtractorError(
|
# raise ExtractorError(
|
||||||
'The video is not available in your country',
|
# 'The video is not available in your country',
|
||||||
expected=True)
|
# expected=True)
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for format_id, format_url in media['VIDEOS'].items():
|
for format_id, format_url in media['VIDEOS'].items():
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ from ..utils import float_or_none
|
|||||||
|
|
||||||
|
|
||||||
class CanvasIE(InfoExtractor):
|
class CanvasIE(InfoExtractor):
|
||||||
|
IE_DESC = 'canvas.be and een.be'
|
||||||
_VALID_URL = r'https?://(?:www\.)?(?P<site_id>canvas|een)\.be/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
_VALID_URL = r'https?://(?:www\.)?(?P<site_id>canvas|een)\.be/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.canvas.be/video/de-afspraak/najaar-2015/de-afspraak-veilt-voor-de-warmste-week',
|
'url': 'http://www.canvas.be/video/de-afspraak/najaar-2015/de-afspraak-veilt-voor-de-warmste-week',
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import codecs
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
@@ -96,6 +97,10 @@ class CDAIE(InfoExtractor):
|
|||||||
if not video or 'file' not in video:
|
if not video or 'file' not in video:
|
||||||
self.report_warning('Unable to extract %s version information' % version)
|
self.report_warning('Unable to extract %s version information' % version)
|
||||||
return
|
return
|
||||||
|
if video['file'].startswith('uggc'):
|
||||||
|
video['file'] = codecs.decode(video['file'], 'rot_13')
|
||||||
|
if video['file'].endswith('adc.mp4'):
|
||||||
|
video['file'] = video['file'].replace('adc.mp4', '.mp4')
|
||||||
f = {
|
f = {
|
||||||
'url': video['file'],
|
'url': video['file'],
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -12,13 +12,14 @@ from ..utils import (
|
|||||||
ExtractorError,
|
ExtractorError,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
sanitized_Request,
|
sanitized_Request,
|
||||||
|
unescapeHTML,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
USER_AGENTS,
|
USER_AGENTS,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class CeskaTelevizeIE(InfoExtractor):
|
class CeskaTelevizeIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/(porady|ivysilani)/(?:[^/]+/)*(?P<id>[^/#?]+)/*(?:[#?].*)?$'
|
_VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/ivysilani/(?:[^/?#&]+/)*(?P<id>[^/#?]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220',
|
'url': 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@@ -62,40 +63,12 @@ class CeskaTelevizeIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
'skip': 'Georestricted to Czech Republic',
|
'skip': 'Georestricted to Czech Republic',
|
||||||
}, {
|
}, {
|
||||||
# video with 18+ caution trailer
|
'url': 'http://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php?hash=d6a3e1370d2e4fa76296b90bad4dfc19673b641e&IDEC=217 562 22150/0004&channelID=1&width=100%25',
|
||||||
'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/',
|
'only_matching': True,
|
||||||
'info_dict': {
|
|
||||||
'id': '215562210900007-bogotart',
|
|
||||||
'title': 'Queer: Bogotart',
|
|
||||||
'description': 'Alternativní průvodce současným queer světem',
|
|
||||||
},
|
|
||||||
'playlist': [{
|
|
||||||
'info_dict': {
|
|
||||||
'id': '61924494876844842',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Queer: Bogotart (Varování 18+)',
|
|
||||||
'duration': 10.2,
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'info_dict': {
|
|
||||||
'id': '61924494877068022',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Queer: Bogotart (Queer)',
|
|
||||||
'thumbnail': r're:^https?://.*\.jpg',
|
|
||||||
'duration': 1558.3,
|
|
||||||
},
|
|
||||||
}],
|
|
||||||
'params': {
|
|
||||||
# m3u8 download
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
url = url.replace('/porady/', '/ivysilani/').replace('/video/', '')
|
playlist_id = self._match_id(url)
|
||||||
|
|
||||||
mobj = re.match(self._VALID_URL, url)
|
|
||||||
playlist_id = mobj.group('id')
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, playlist_id)
|
webpage = self._download_webpage(url, playlist_id)
|
||||||
|
|
||||||
@@ -103,13 +76,28 @@ class CeskaTelevizeIE(InfoExtractor):
|
|||||||
if '%s</p>' % NOT_AVAILABLE_STRING in webpage:
|
if '%s</p>' % NOT_AVAILABLE_STRING in webpage:
|
||||||
raise ExtractorError(NOT_AVAILABLE_STRING, expected=True)
|
raise ExtractorError(NOT_AVAILABLE_STRING, expected=True)
|
||||||
|
|
||||||
typ = self._html_search_regex(
|
type_ = None
|
||||||
r'getPlaylistUrl\(\[\{"type":"(.+?)","id":".+?"\}\],', webpage, 'type')
|
episode_id = None
|
||||||
episode_id = self._html_search_regex(
|
|
||||||
r'getPlaylistUrl\(\[\{"type":".+?","id":"(.+?)"\}\],', webpage, 'episode_id')
|
playlist = self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'getPlaylistUrl\(\[({.+?})\]', webpage, 'playlist',
|
||||||
|
default='{}'), playlist_id)
|
||||||
|
if playlist:
|
||||||
|
type_ = playlist.get('type')
|
||||||
|
episode_id = playlist.get('id')
|
||||||
|
|
||||||
|
if not type_:
|
||||||
|
type_ = self._html_search_regex(
|
||||||
|
r'getPlaylistUrl\(\[\{"type":"(.+?)","id":".+?"\}\],',
|
||||||
|
webpage, 'type')
|
||||||
|
if not episode_id:
|
||||||
|
episode_id = self._html_search_regex(
|
||||||
|
r'getPlaylistUrl\(\[\{"type":".+?","id":"(.+?)"\}\],',
|
||||||
|
webpage, 'episode_id')
|
||||||
|
|
||||||
data = {
|
data = {
|
||||||
'playlist[0][type]': typ,
|
'playlist[0][type]': type_,
|
||||||
'playlist[0][id]': episode_id,
|
'playlist[0][id]': episode_id,
|
||||||
'requestUrl': compat_urllib_parse_urlparse(url).path,
|
'requestUrl': compat_urllib_parse_urlparse(url).path,
|
||||||
'requestSource': 'iVysilani',
|
'requestSource': 'iVysilani',
|
||||||
@@ -160,8 +148,7 @@ class CeskaTelevizeIE(InfoExtractor):
|
|||||||
for format_id, stream_url in item.get('streamUrls', {}).items():
|
for format_id, stream_url in item.get('streamUrls', {}).items():
|
||||||
if 'playerType=flash' in stream_url:
|
if 'playerType=flash' in stream_url:
|
||||||
stream_formats = self._extract_m3u8_formats(
|
stream_formats = self._extract_m3u8_formats(
|
||||||
stream_url, playlist_id, 'mp4',
|
stream_url, playlist_id, 'mp4', 'm3u8_native',
|
||||||
entry_protocol='m3u8' if is_live else 'm3u8_native',
|
|
||||||
m3u8_id='hls-%s' % format_id, fatal=False)
|
m3u8_id='hls-%s' % format_id, fatal=False)
|
||||||
else:
|
else:
|
||||||
stream_formats = self._extract_mpd_formats(
|
stream_formats = self._extract_mpd_formats(
|
||||||
@@ -246,3 +233,47 @@ class CeskaTelevizeIE(InfoExtractor):
|
|||||||
yield line
|
yield line
|
||||||
|
|
||||||
return '\r\n'.join(_fix_subtitle(subtitles))
|
return '\r\n'.join(_fix_subtitle(subtitles))
|
||||||
|
|
||||||
|
|
||||||
|
class CeskaTelevizePoradyIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/porady/(?:[^/?#&]+/)*(?P<id>[^/#?]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
# video with 18+ caution trailer
|
||||||
|
'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '215562210900007-bogotart',
|
||||||
|
'title': 'Queer: Bogotart',
|
||||||
|
'description': 'Alternativní průvodce současným queer světem',
|
||||||
|
},
|
||||||
|
'playlist': [{
|
||||||
|
'info_dict': {
|
||||||
|
'id': '61924494876844842',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Queer: Bogotart (Varování 18+)',
|
||||||
|
'duration': 10.2,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'info_dict': {
|
||||||
|
'id': '61924494877068022',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Queer: Bogotart (Queer)',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg',
|
||||||
|
'duration': 1558.3,
|
||||||
|
},
|
||||||
|
}],
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
data_url = unescapeHTML(self._search_regex(
|
||||||
|
r'<span[^>]*\bdata-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||||
|
webpage, 'iframe player url', group='url'))
|
||||||
|
|
||||||
|
return self.url_result(data_url, ie=CeskaTelevizeIE.ie_key())
|
||||||
|
|||||||
@@ -4,62 +4,62 @@ import re
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
clean_html,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
parse_filesize,
|
int_or_none,
|
||||||
|
parse_iso8601,
|
||||||
qualities,
|
qualities,
|
||||||
|
unescapeHTML,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class Channel9IE(InfoExtractor):
|
class Channel9IE(InfoExtractor):
|
||||||
'''
|
|
||||||
Common extractor for channel9.msdn.com.
|
|
||||||
|
|
||||||
The type of provided URL (video or playlist) is determined according to
|
|
||||||
meta Search.PageType from web page HTML rather than URL itself, as it is
|
|
||||||
not always possible to do.
|
|
||||||
'''
|
|
||||||
IE_DESC = 'Channel 9'
|
IE_DESC = 'Channel 9'
|
||||||
IE_NAME = 'channel9'
|
IE_NAME = 'channel9'
|
||||||
_VALID_URL = r'https?://(?:www\.)?channel9\.msdn\.com/(?P<contentpath>.+?)(?P<rss>/RSS)?/?(?:[?#&]|$)'
|
_VALID_URL = r'https?://(?:www\.)?(?:channel9\.msdn\.com|s\.ch9\.ms)/(?P<contentpath>.+?)(?P<rss>/RSS)?/?(?:[?#&]|$)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://channel9.msdn.com/Events/TechEd/Australia/2013/KOS002',
|
'url': 'http://channel9.msdn.com/Events/TechEd/Australia/2013/KOS002',
|
||||||
'md5': 'bbd75296ba47916b754e73c3a4bbdf10',
|
'md5': '32083d4eaf1946db6d454313f44510ca',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'Events/TechEd/Australia/2013/KOS002',
|
'id': '6c413323-383a-49dc-88f9-a22800cab024',
|
||||||
'ext': 'mp4',
|
'ext': 'wmv',
|
||||||
'title': 'Developer Kick-Off Session: Stuff We Love',
|
'title': 'Developer Kick-Off Session: Stuff We Love',
|
||||||
'description': 'md5:c08d72240b7c87fcecafe2692f80e35f',
|
'description': 'md5:b80bf9355a503c193aff7ec6cd5a7731',
|
||||||
'duration': 4576,
|
'duration': 4576,
|
||||||
'thumbnail': r're:http://.*\.jpg',
|
'thumbnail': r're:https?://.*\.jpg',
|
||||||
|
'timestamp': 1377717420,
|
||||||
|
'upload_date': '20130828',
|
||||||
'session_code': 'KOS002',
|
'session_code': 'KOS002',
|
||||||
'session_day': 'Day 1',
|
|
||||||
'session_room': 'Arena 1A',
|
'session_room': 'Arena 1A',
|
||||||
'session_speakers': ['Ed Blankenship', 'Andrew Coates', 'Brady Gaster', 'Patrick Klug',
|
'session_speakers': ['Andrew Coates', 'Brady Gaster', 'Mads Kristensen', 'Ed Blankenship', 'Patrick Klug'],
|
||||||
'Mads Kristensen'],
|
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://channel9.msdn.com/posts/Self-service-BI-with-Power-BI-nuclear-testing',
|
'url': 'http://channel9.msdn.com/posts/Self-service-BI-with-Power-BI-nuclear-testing',
|
||||||
'md5': 'b43ee4529d111bc37ba7ee4f34813e68',
|
'md5': 'dcf983ee6acd2088e7188c3cf79b46bc',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'posts/Self-service-BI-with-Power-BI-nuclear-testing',
|
'id': 'fe8e435f-bb93-4e01-8e97-a28c01887024',
|
||||||
'ext': 'mp4',
|
'ext': 'wmv',
|
||||||
'title': 'Self-service BI with Power BI - nuclear testing',
|
'title': 'Self-service BI with Power BI - nuclear testing',
|
||||||
'description': 'md5:d1e6ecaafa7fb52a2cacdf9599829f5b',
|
'description': 'md5:2d17fec927fc91e9e17783b3ecc88f54',
|
||||||
'duration': 1540,
|
'duration': 1540,
|
||||||
'thumbnail': r're:http://.*\.jpg',
|
'thumbnail': r're:https?://.*\.jpg',
|
||||||
|
'timestamp': 1386381991,
|
||||||
|
'upload_date': '20131207',
|
||||||
'authors': ['Mike Wilmot'],
|
'authors': ['Mike Wilmot'],
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
# low quality mp4 is best
|
# low quality mp4 is best
|
||||||
'url': 'https://channel9.msdn.com/Events/CPP/CppCon-2015/Ranges-for-the-Standard-Library',
|
'url': 'https://channel9.msdn.com/Events/CPP/CppCon-2015/Ranges-for-the-Standard-Library',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'Events/CPP/CppCon-2015/Ranges-for-the-Standard-Library',
|
'id': '33ad69d2-6a4e-4172-83a1-a523013dec76',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Ranges for the Standard Library',
|
'title': 'Ranges for the Standard Library',
|
||||||
'description': 'md5:2e6b4917677af3728c5f6d63784c4c5d',
|
'description': 'md5:9895e0a9fd80822d2f01c454b8f4a372',
|
||||||
'duration': 5646,
|
'duration': 5646,
|
||||||
'thumbnail': r're:http://.*\.jpg',
|
'thumbnail': r're:https?://.*\.jpg',
|
||||||
|
'upload_date': '20150930',
|
||||||
|
'timestamp': 1443640735,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
@@ -70,7 +70,7 @@ class Channel9IE(InfoExtractor):
|
|||||||
'id': 'Niners/Splendid22/Queue/76acff796e8f411184b008028e0d492b',
|
'id': 'Niners/Splendid22/Queue/76acff796e8f411184b008028e0d492b',
|
||||||
'title': 'Channel 9',
|
'title': 'Channel 9',
|
||||||
},
|
},
|
||||||
'playlist_count': 2,
|
'playlist_mincount': 100,
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://channel9.msdn.com/Events/DEVintersection/DEVintersection-2016/RSS',
|
'url': 'https://channel9.msdn.com/Events/DEVintersection/DEVintersection-2016/RSS',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@@ -81,189 +81,6 @@ class Channel9IE(InfoExtractor):
|
|||||||
|
|
||||||
_RSS_URL = 'http://channel9.msdn.com/%s/RSS'
|
_RSS_URL = 'http://channel9.msdn.com/%s/RSS'
|
||||||
|
|
||||||
def _formats_from_html(self, html):
|
|
||||||
FORMAT_REGEX = r'''
|
|
||||||
(?x)
|
|
||||||
<a\s+href="(?P<url>[^"]+)">(?P<quality>[^<]+)</a>\s*
|
|
||||||
<span\s+class="usage">\((?P<note>[^\)]+)\)</span>\s*
|
|
||||||
(?:<div\s+class="popup\s+rounded">\s*
|
|
||||||
<h3>File\s+size</h3>\s*(?P<filesize>.*?)\s*
|
|
||||||
</div>)? # File size part may be missing
|
|
||||||
'''
|
|
||||||
quality = qualities((
|
|
||||||
'MP3', 'MP4',
|
|
||||||
'Low Quality WMV', 'Low Quality MP4',
|
|
||||||
'Mid Quality WMV', 'Mid Quality MP4',
|
|
||||||
'High Quality WMV', 'High Quality MP4'))
|
|
||||||
formats = [{
|
|
||||||
'url': x.group('url'),
|
|
||||||
'format_id': x.group('quality'),
|
|
||||||
'format_note': x.group('note'),
|
|
||||||
'format': '%s (%s)' % (x.group('quality'), x.group('note')),
|
|
||||||
'filesize_approx': parse_filesize(x.group('filesize')),
|
|
||||||
'quality': quality(x.group('quality')),
|
|
||||||
'vcodec': 'none' if x.group('note') == 'Audio only' else None,
|
|
||||||
} for x in list(re.finditer(FORMAT_REGEX, html))]
|
|
||||||
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
return formats
|
|
||||||
|
|
||||||
def _extract_title(self, html):
|
|
||||||
title = self._html_search_meta('title', html, 'title')
|
|
||||||
if title is None:
|
|
||||||
title = self._og_search_title(html)
|
|
||||||
TITLE_SUFFIX = ' (Channel 9)'
|
|
||||||
if title is not None and title.endswith(TITLE_SUFFIX):
|
|
||||||
title = title[:-len(TITLE_SUFFIX)]
|
|
||||||
return title
|
|
||||||
|
|
||||||
def _extract_description(self, html):
|
|
||||||
DESCRIPTION_REGEX = r'''(?sx)
|
|
||||||
<div\s+class="entry-content">\s*
|
|
||||||
<div\s+id="entry-body">\s*
|
|
||||||
(?P<description>.+?)\s*
|
|
||||||
</div>\s*
|
|
||||||
</div>
|
|
||||||
'''
|
|
||||||
m = re.search(DESCRIPTION_REGEX, html)
|
|
||||||
if m is not None:
|
|
||||||
return m.group('description')
|
|
||||||
return self._html_search_meta('description', html, 'description')
|
|
||||||
|
|
||||||
def _extract_duration(self, html):
|
|
||||||
m = re.search(r'"length": *"(?P<hours>\d{2}):(?P<minutes>\d{2}):(?P<seconds>\d{2})"', html)
|
|
||||||
return ((int(m.group('hours')) * 60 * 60) + (int(m.group('minutes')) * 60) + int(m.group('seconds'))) if m else None
|
|
||||||
|
|
||||||
def _extract_slides(self, html):
|
|
||||||
m = re.search(r'<a href="(?P<slidesurl>[^"]+)" class="slides">Slides</a>', html)
|
|
||||||
return m.group('slidesurl') if m is not None else None
|
|
||||||
|
|
||||||
def _extract_zip(self, html):
|
|
||||||
m = re.search(r'<a href="(?P<zipurl>[^"]+)" class="zip">Zip</a>', html)
|
|
||||||
return m.group('zipurl') if m is not None else None
|
|
||||||
|
|
||||||
def _extract_avg_rating(self, html):
|
|
||||||
m = re.search(r'<p class="avg-rating">Avg Rating: <span>(?P<avgrating>[^<]+)</span></p>', html)
|
|
||||||
return float(m.group('avgrating')) if m is not None else 0
|
|
||||||
|
|
||||||
def _extract_rating_count(self, html):
|
|
||||||
m = re.search(r'<div class="rating-count">\((?P<ratingcount>[^<]+)\)</div>', html)
|
|
||||||
return int(self._fix_count(m.group('ratingcount'))) if m is not None else 0
|
|
||||||
|
|
||||||
def _extract_view_count(self, html):
|
|
||||||
m = re.search(r'<li class="views">\s*<span class="count">(?P<viewcount>[^<]+)</span> Views\s*</li>', html)
|
|
||||||
return int(self._fix_count(m.group('viewcount'))) if m is not None else 0
|
|
||||||
|
|
||||||
def _extract_comment_count(self, html):
|
|
||||||
m = re.search(r'<li class="comments">\s*<a href="#comments">\s*<span class="count">(?P<commentcount>[^<]+)</span> Comments\s*</a>\s*</li>', html)
|
|
||||||
return int(self._fix_count(m.group('commentcount'))) if m is not None else 0
|
|
||||||
|
|
||||||
def _fix_count(self, count):
|
|
||||||
return int(str(count).replace(',', '')) if count is not None else None
|
|
||||||
|
|
||||||
def _extract_authors(self, html):
|
|
||||||
m = re.search(r'(?s)<li class="author">(.*?)</li>', html)
|
|
||||||
if m is None:
|
|
||||||
return None
|
|
||||||
return re.findall(r'<a href="/Niners/[^"]+">([^<]+)</a>', m.group(1))
|
|
||||||
|
|
||||||
def _extract_session_code(self, html):
|
|
||||||
m = re.search(r'<li class="code">\s*(?P<code>.+?)\s*</li>', html)
|
|
||||||
return m.group('code') if m is not None else None
|
|
||||||
|
|
||||||
def _extract_session_day(self, html):
|
|
||||||
m = re.search(r'<li class="day">\s*<a href="/Events/[^"]+">(?P<day>[^<]+)</a>\s*</li>', html)
|
|
||||||
return m.group('day').strip() if m is not None else None
|
|
||||||
|
|
||||||
def _extract_session_room(self, html):
|
|
||||||
m = re.search(r'<li class="room">\s*(?P<room>.+?)\s*</li>', html)
|
|
||||||
return m.group('room') if m is not None else None
|
|
||||||
|
|
||||||
def _extract_session_speakers(self, html):
|
|
||||||
return re.findall(r'<a href="/Events/Speakers/[^"]+">([^<]+)</a>', html)
|
|
||||||
|
|
||||||
def _extract_content(self, html, content_path):
|
|
||||||
# Look for downloadable content
|
|
||||||
formats = self._formats_from_html(html)
|
|
||||||
slides = self._extract_slides(html)
|
|
||||||
zip_ = self._extract_zip(html)
|
|
||||||
|
|
||||||
# Nothing to download
|
|
||||||
if len(formats) == 0 and slides is None and zip_ is None:
|
|
||||||
self._downloader.report_warning('None of recording, slides or zip are available for %s' % content_path)
|
|
||||||
return
|
|
||||||
|
|
||||||
# Extract meta
|
|
||||||
title = self._extract_title(html)
|
|
||||||
description = self._extract_description(html)
|
|
||||||
thumbnail = self._og_search_thumbnail(html)
|
|
||||||
duration = self._extract_duration(html)
|
|
||||||
avg_rating = self._extract_avg_rating(html)
|
|
||||||
rating_count = self._extract_rating_count(html)
|
|
||||||
view_count = self._extract_view_count(html)
|
|
||||||
comment_count = self._extract_comment_count(html)
|
|
||||||
|
|
||||||
common = {
|
|
||||||
'_type': 'video',
|
|
||||||
'id': content_path,
|
|
||||||
'description': description,
|
|
||||||
'thumbnail': thumbnail,
|
|
||||||
'duration': duration,
|
|
||||||
'avg_rating': avg_rating,
|
|
||||||
'rating_count': rating_count,
|
|
||||||
'view_count': view_count,
|
|
||||||
'comment_count': comment_count,
|
|
||||||
}
|
|
||||||
|
|
||||||
result = []
|
|
||||||
|
|
||||||
if slides is not None:
|
|
||||||
d = common.copy()
|
|
||||||
d.update({'title': title + '-Slides', 'url': slides})
|
|
||||||
result.append(d)
|
|
||||||
|
|
||||||
if zip_ is not None:
|
|
||||||
d = common.copy()
|
|
||||||
d.update({'title': title + '-Zip', 'url': zip_})
|
|
||||||
result.append(d)
|
|
||||||
|
|
||||||
if len(formats) > 0:
|
|
||||||
d = common.copy()
|
|
||||||
d.update({'title': title, 'formats': formats})
|
|
||||||
result.append(d)
|
|
||||||
|
|
||||||
return result
|
|
||||||
|
|
||||||
def _extract_entry_item(self, html, content_path):
|
|
||||||
contents = self._extract_content(html, content_path)
|
|
||||||
if contents is None:
|
|
||||||
return contents
|
|
||||||
|
|
||||||
if len(contents) > 1:
|
|
||||||
raise ExtractorError('Got more than one entry')
|
|
||||||
result = contents[0]
|
|
||||||
result['authors'] = self._extract_authors(html)
|
|
||||||
|
|
||||||
return result
|
|
||||||
|
|
||||||
def _extract_session(self, html, content_path):
|
|
||||||
contents = self._extract_content(html, content_path)
|
|
||||||
if contents is None:
|
|
||||||
return contents
|
|
||||||
|
|
||||||
session_meta = {
|
|
||||||
'session_code': self._extract_session_code(html),
|
|
||||||
'session_day': self._extract_session_day(html),
|
|
||||||
'session_room': self._extract_session_room(html),
|
|
||||||
'session_speakers': self._extract_session_speakers(html),
|
|
||||||
}
|
|
||||||
|
|
||||||
for content in contents:
|
|
||||||
content.update(session_meta)
|
|
||||||
|
|
||||||
return self.playlist_result(contents)
|
|
||||||
|
|
||||||
def _extract_list(self, video_id, rss_url=None):
|
def _extract_list(self, video_id, rss_url=None):
|
||||||
if not rss_url:
|
if not rss_url:
|
||||||
rss_url = self._RSS_URL % video_id
|
rss_url = self._RSS_URL % video_id
|
||||||
@@ -274,9 +91,7 @@ class Channel9IE(InfoExtractor):
|
|||||||
return self.playlist_result(entries, video_id, title_text)
|
return self.playlist_result(entries, video_id, title_text)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
content_path, rss = re.match(self._VALID_URL, url).groups()
|
||||||
content_path = mobj.group('contentpath')
|
|
||||||
rss = mobj.group('rss')
|
|
||||||
|
|
||||||
if rss:
|
if rss:
|
||||||
return self._extract_list(content_path, url)
|
return self._extract_list(content_path, url)
|
||||||
@@ -284,17 +99,158 @@ class Channel9IE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(
|
webpage = self._download_webpage(
|
||||||
url, content_path, 'Downloading web page')
|
url, content_path, 'Downloading web page')
|
||||||
|
|
||||||
page_type = self._search_regex(
|
episode_data = self._search_regex(
|
||||||
r'<meta[^>]+name=(["\'])WT\.entryid\1[^>]+content=(["\'])(?P<pagetype>[^:]+).+?\2',
|
r"data-episode='([^']+)'", webpage, 'episode data', default=None)
|
||||||
webpage, 'page type', default=None, group='pagetype')
|
if episode_data:
|
||||||
if page_type:
|
episode_data = self._parse_json(unescapeHTML(
|
||||||
if page_type == 'Entry': # Any 'item'-like page, may contain downloadable content
|
episode_data), content_path)
|
||||||
return self._extract_entry_item(webpage, content_path)
|
content_id = episode_data['contentId']
|
||||||
elif page_type == 'Session': # Event session page, may contain downloadable content
|
is_session = '/Sessions(' in episode_data['api']
|
||||||
return self._extract_session(webpage, content_path)
|
content_url = 'https://channel9.msdn.com/odata' + episode_data['api']
|
||||||
elif page_type == 'Event':
|
if is_session:
|
||||||
return self._extract_list(content_path)
|
content_url += '?$expand=Speakers'
|
||||||
else:
|
else:
|
||||||
raise ExtractorError('Unexpected WT.entryid %s' % page_type, expected=True)
|
content_url += '?$expand=Authors'
|
||||||
else: # Assuming list
|
content_data = self._download_json(content_url, content_id)
|
||||||
|
title = content_data['Title']
|
||||||
|
|
||||||
|
QUALITIES = (
|
||||||
|
'mp3',
|
||||||
|
'wmv', 'mp4',
|
||||||
|
'wmv-low', 'mp4-low',
|
||||||
|
'wmv-mid', 'mp4-mid',
|
||||||
|
'wmv-high', 'mp4-high',
|
||||||
|
)
|
||||||
|
|
||||||
|
quality_key = qualities(QUALITIES)
|
||||||
|
|
||||||
|
def quality(quality_id, format_url):
|
||||||
|
return (len(QUALITIES) if '_Source.' in format_url
|
||||||
|
else quality_key(quality_id))
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
urls = set()
|
||||||
|
|
||||||
|
SITE_QUALITIES = {
|
||||||
|
'MP3': 'mp3',
|
||||||
|
'MP4': 'mp4',
|
||||||
|
'Low Quality WMV': 'wmv-low',
|
||||||
|
'Low Quality MP4': 'mp4-low',
|
||||||
|
'Mid Quality WMV': 'wmv-mid',
|
||||||
|
'Mid Quality MP4': 'mp4-mid',
|
||||||
|
'High Quality WMV': 'wmv-high',
|
||||||
|
'High Quality MP4': 'mp4-high',
|
||||||
|
}
|
||||||
|
|
||||||
|
formats_select = self._search_regex(
|
||||||
|
r'(?s)<select[^>]+name=["\']format[^>]+>(.+?)</select', webpage,
|
||||||
|
'formats select', default=None)
|
||||||
|
if formats_select:
|
||||||
|
for mobj in re.finditer(
|
||||||
|
r'<option\b[^>]+\bvalue=(["\'])(?P<url>(?:(?!\1).)+)\1[^>]*>\s*(?P<format>[^<]+?)\s*<',
|
||||||
|
formats_select):
|
||||||
|
format_url = mobj.group('url')
|
||||||
|
if format_url in urls:
|
||||||
|
continue
|
||||||
|
urls.add(format_url)
|
||||||
|
format_id = mobj.group('format')
|
||||||
|
quality_id = SITE_QUALITIES.get(format_id, format_id)
|
||||||
|
formats.append({
|
||||||
|
'url': format_url,
|
||||||
|
'format_id': quality_id,
|
||||||
|
'quality': quality(quality_id, format_url),
|
||||||
|
'vcodec': 'none' if quality_id == 'mp3' else None,
|
||||||
|
})
|
||||||
|
|
||||||
|
API_QUALITIES = {
|
||||||
|
'VideoMP4Low': 'mp4-low',
|
||||||
|
'VideoWMV': 'wmv-mid',
|
||||||
|
'VideoMP4Medium': 'mp4-mid',
|
||||||
|
'VideoMP4High': 'mp4-high',
|
||||||
|
'VideoWMVHQ': 'wmv-hq',
|
||||||
|
}
|
||||||
|
|
||||||
|
for format_id, q in API_QUALITIES.items():
|
||||||
|
q_url = content_data.get(format_id)
|
||||||
|
if not q_url or q_url in urls:
|
||||||
|
continue
|
||||||
|
urls.add(q_url)
|
||||||
|
formats.append({
|
||||||
|
'url': q_url,
|
||||||
|
'format_id': q,
|
||||||
|
'quality': quality(q, q_url),
|
||||||
|
})
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
slides = content_data.get('Slides')
|
||||||
|
zip_file = content_data.get('ZipFile')
|
||||||
|
|
||||||
|
if not formats and not slides and not zip_file:
|
||||||
|
raise ExtractorError(
|
||||||
|
'None of recording, slides or zip are available for %s' % content_path)
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
for caption in content_data.get('Captions', []):
|
||||||
|
caption_url = caption.get('Url')
|
||||||
|
if not caption_url:
|
||||||
|
continue
|
||||||
|
subtitles.setdefault(caption.get('Language', 'en'), []).append({
|
||||||
|
'url': caption_url,
|
||||||
|
'ext': 'vtt',
|
||||||
|
})
|
||||||
|
|
||||||
|
common = {
|
||||||
|
'id': content_id,
|
||||||
|
'title': title,
|
||||||
|
'description': clean_html(content_data.get('Description') or content_data.get('Body')),
|
||||||
|
'thumbnail': content_data.get('Thumbnail') or content_data.get('VideoPlayerPreviewImage'),
|
||||||
|
'duration': int_or_none(content_data.get('MediaLengthInSeconds')),
|
||||||
|
'timestamp': parse_iso8601(content_data.get('PublishedDate')),
|
||||||
|
'avg_rating': int_or_none(content_data.get('Rating')),
|
||||||
|
'rating_count': int_or_none(content_data.get('RatingCount')),
|
||||||
|
'view_count': int_or_none(content_data.get('Views')),
|
||||||
|
'comment_count': int_or_none(content_data.get('CommentCount')),
|
||||||
|
'subtitles': subtitles,
|
||||||
|
}
|
||||||
|
if is_session:
|
||||||
|
speakers = []
|
||||||
|
for s in content_data.get('Speakers', []):
|
||||||
|
speaker_name = s.get('FullName')
|
||||||
|
if not speaker_name:
|
||||||
|
continue
|
||||||
|
speakers.append(speaker_name)
|
||||||
|
|
||||||
|
common.update({
|
||||||
|
'session_code': content_data.get('Code'),
|
||||||
|
'session_room': content_data.get('Room'),
|
||||||
|
'session_speakers': speakers,
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
authors = []
|
||||||
|
for a in content_data.get('Authors', []):
|
||||||
|
author_name = a.get('DisplayName')
|
||||||
|
if not author_name:
|
||||||
|
continue
|
||||||
|
authors.append(author_name)
|
||||||
|
common['authors'] = authors
|
||||||
|
|
||||||
|
contents = []
|
||||||
|
|
||||||
|
if slides:
|
||||||
|
d = common.copy()
|
||||||
|
d.update({'title': title + '-Slides', 'url': slides})
|
||||||
|
contents.append(d)
|
||||||
|
|
||||||
|
if zip_file:
|
||||||
|
d = common.copy()
|
||||||
|
d.update({'title': title + '-Zip', 'url': zip_file})
|
||||||
|
contents.append(d)
|
||||||
|
|
||||||
|
if formats:
|
||||||
|
d = common.copy()
|
||||||
|
d.update({'title': title, 'formats': formats})
|
||||||
|
contents.append(d)
|
||||||
|
return self.playlist_result(contents)
|
||||||
|
else:
|
||||||
return self._extract_list(content_path)
|
return self._extract_list(content_path)
|
||||||
|
|||||||
@@ -33,10 +33,17 @@ class ChaturbateIE(InfoExtractor):
|
|||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
m3u8_formats = [(m.group('id').lower(), m.group('url')) for m in re.finditer(
|
m3u8_urls = []
|
||||||
r'hlsSource(?P<id>.+?)\s*=\s*(?P<q>["\'])(?P<url>http.+?)(?P=q)', webpage)]
|
|
||||||
|
|
||||||
if not m3u8_formats:
|
for m in re.finditer(
|
||||||
|
r'(["\'])(?P<url>http.+?\.m3u8.*?)\1', webpage):
|
||||||
|
m3u8_fast_url, m3u8_no_fast_url = m.group('url'), m.group(
|
||||||
|
'url').replace('_fast', '')
|
||||||
|
for m3u8_url in (m3u8_fast_url, m3u8_no_fast_url):
|
||||||
|
if m3u8_url not in m3u8_urls:
|
||||||
|
m3u8_urls.append(m3u8_url)
|
||||||
|
|
||||||
|
if not m3u8_urls:
|
||||||
error = self._search_regex(
|
error = self._search_regex(
|
||||||
[r'<span[^>]+class=(["\'])desc_span\1[^>]*>(?P<error>[^<]+)</span>',
|
[r'<span[^>]+class=(["\'])desc_span\1[^>]*>(?P<error>[^<]+)</span>',
|
||||||
r'<div[^>]+id=(["\'])defchat\1[^>]*>\s*<p><strong>(?P<error>[^<]+)<'],
|
r'<div[^>]+id=(["\'])defchat\1[^>]*>\s*<p><strong>(?P<error>[^<]+)<'],
|
||||||
@@ -50,7 +57,8 @@ class ChaturbateIE(InfoExtractor):
|
|||||||
raise ExtractorError('Unable to find stream URL')
|
raise ExtractorError('Unable to find stream URL')
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for m3u8_id, m3u8_url in m3u8_formats:
|
for m3u8_url in m3u8_urls:
|
||||||
|
m3u8_id = 'fast' if '_fast' in m3u8_url else 'slow'
|
||||||
formats.extend(self._extract_m3u8_formats(
|
formats.extend(self._extract_m3u8_formats(
|
||||||
m3u8_url, video_id, ext='mp4',
|
m3u8_url, video_id, ext='mp4',
|
||||||
# ffmpeg skips segments for fast m3u8
|
# ffmpeg skips segments for fast m3u8
|
||||||
|
|||||||
@@ -1,97 +1,56 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
|
||||||
compat_parse_qs,
|
|
||||||
compat_HTTPError,
|
|
||||||
)
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
str_to_int,
|
||||||
HEADRequest,
|
unified_strdate,
|
||||||
remove_end,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class CloudyIE(InfoExtractor):
|
class CloudyIE(InfoExtractor):
|
||||||
_IE_DESC = 'cloudy.ec'
|
_IE_DESC = 'cloudy.ec'
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'https?://(?:www\.)?cloudy\.ec/(?:v/|embed\.php\?.*?\bid=)(?P<id>[A-Za-z0-9]+)'
|
||||||
https?://(?:www\.)?cloudy\.ec/
|
_TESTS = [{
|
||||||
(?:v/|embed\.php\?id=)
|
|
||||||
(?P<id>[A-Za-z0-9]+)
|
|
||||||
'''
|
|
||||||
_EMBED_URL = 'http://www.cloudy.ec/embed.php?id=%s'
|
|
||||||
_API_URL = 'http://www.cloudy.ec/api/player.api.php'
|
|
||||||
_MAX_TRIES = 2
|
|
||||||
_TEST = {
|
|
||||||
'url': 'https://www.cloudy.ec/v/af511e2527aac',
|
'url': 'https://www.cloudy.ec/v/af511e2527aac',
|
||||||
'md5': '5cb253ace826a42f35b4740539bedf07',
|
'md5': '29832b05028ead1b58be86bf319397ca',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'af511e2527aac',
|
'id': 'af511e2527aac',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'title': 'Funny Cats and Animals Compilation june 2013',
|
'title': 'Funny Cats and Animals Compilation june 2013',
|
||||||
|
'upload_date': '20130913',
|
||||||
|
'view_count': int,
|
||||||
}
|
}
|
||||||
}
|
}, {
|
||||||
|
'url': 'http://www.cloudy.ec/embed.php?autoplay=1&id=af511e2527aac',
|
||||||
def _extract_video(self, video_id, file_key, error_url=None, try_num=0):
|
'only_matching': True,
|
||||||
|
}]
|
||||||
if try_num > self._MAX_TRIES - 1:
|
|
||||||
raise ExtractorError('Unable to extract video URL', expected=True)
|
|
||||||
|
|
||||||
form = {
|
|
||||||
'file': video_id,
|
|
||||||
'key': file_key,
|
|
||||||
}
|
|
||||||
|
|
||||||
if error_url:
|
|
||||||
form.update({
|
|
||||||
'numOfErrors': try_num,
|
|
||||||
'errorCode': '404',
|
|
||||||
'errorUrl': error_url,
|
|
||||||
})
|
|
||||||
|
|
||||||
player_data = self._download_webpage(
|
|
||||||
self._API_URL, video_id, 'Downloading player data', query=form)
|
|
||||||
data = compat_parse_qs(player_data)
|
|
||||||
|
|
||||||
try_num += 1
|
|
||||||
|
|
||||||
if 'error' in data:
|
|
||||||
raise ExtractorError(
|
|
||||||
'%s error: %s' % (self.IE_NAME, ' '.join(data['error_msg'])),
|
|
||||||
expected=True)
|
|
||||||
|
|
||||||
title = data.get('title', [None])[0]
|
|
||||||
if title:
|
|
||||||
title = remove_end(title, '&asdasdas').strip()
|
|
||||||
|
|
||||||
video_url = data.get('url', [None])[0]
|
|
||||||
|
|
||||||
if video_url:
|
|
||||||
try:
|
|
||||||
self._request_webpage(HEADRequest(video_url), video_id, 'Checking video URL')
|
|
||||||
except ExtractorError as e:
|
|
||||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code in [404, 410]:
|
|
||||||
self.report_warning('Invalid video URL, requesting another', video_id)
|
|
||||||
return self._extract_video(video_id, file_key, video_url, try_num)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'url': video_url,
|
|
||||||
'title': title,
|
|
||||||
}
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group('id')
|
|
||||||
|
|
||||||
url = self._EMBED_URL % video_id
|
webpage = self._download_webpage(
|
||||||
webpage = self._download_webpage(url, video_id)
|
'http://www.cloudy.ec/embed.php?id=%s' % video_id, video_id)
|
||||||
|
|
||||||
file_key = self._search_regex(
|
info = self._parse_html5_media_entries(url, webpage, video_id)[0]
|
||||||
[r'key\s*:\s*"([^"]+)"', r'filekey\s*=\s*"([^"]+)"'],
|
|
||||||
webpage, 'file_key')
|
|
||||||
|
|
||||||
return self._extract_video(video_id, file_key)
|
webpage = self._download_webpage(
|
||||||
|
'https://www.cloudy.ec/v/%s' % video_id, video_id, fatal=False)
|
||||||
|
|
||||||
|
if webpage:
|
||||||
|
info.update({
|
||||||
|
'title': self._search_regex(
|
||||||
|
r'<h\d[^>]*>([^<]+)<', webpage, 'title'),
|
||||||
|
'upload_date': unified_strdate(self._search_regex(
|
||||||
|
r'>Published at (\d{4}-\d{1,2}-\d{1,2})', webpage,
|
||||||
|
'upload date', fatal=False)),
|
||||||
|
'view_count': str_to_int(self._search_regex(
|
||||||
|
r'([\d,.]+) views<', webpage, 'view count', fatal=False)),
|
||||||
|
})
|
||||||
|
|
||||||
|
if not info.get('title'):
|
||||||
|
info['title'] = video_id
|
||||||
|
|
||||||
|
info['id'] = video_id
|
||||||
|
|
||||||
|
return info
|
||||||
|
|||||||
@@ -1,3 +1,4 @@
|
|||||||
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import base64
|
import base64
|
||||||
@@ -6,6 +7,7 @@ import hashlib
|
|||||||
import json
|
import json
|
||||||
import netrc
|
import netrc
|
||||||
import os
|
import os
|
||||||
|
import random
|
||||||
import re
|
import re
|
||||||
import socket
|
import socket
|
||||||
import sys
|
import sys
|
||||||
@@ -35,31 +37,35 @@ from ..utils import (
|
|||||||
clean_html,
|
clean_html,
|
||||||
compiled_regex_type,
|
compiled_regex_type,
|
||||||
determine_ext,
|
determine_ext,
|
||||||
|
determine_protocol,
|
||||||
error_to_compat_str,
|
error_to_compat_str,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
extract_attributes,
|
||||||
fix_xml_ampersands,
|
fix_xml_ampersands,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
|
GeoRestrictedError,
|
||||||
|
GeoUtils,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
js_to_json,
|
||||||
|
mimetype2ext,
|
||||||
|
orderedSet,
|
||||||
|
parse_codecs,
|
||||||
|
parse_duration,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
|
parse_m3u8_attributes,
|
||||||
RegexNotFoundError,
|
RegexNotFoundError,
|
||||||
sanitize_filename,
|
|
||||||
sanitized_Request,
|
sanitized_Request,
|
||||||
|
sanitize_filename,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
|
update_Request,
|
||||||
|
update_url_query,
|
||||||
|
urljoin,
|
||||||
url_basename,
|
url_basename,
|
||||||
xpath_element,
|
xpath_element,
|
||||||
xpath_text,
|
xpath_text,
|
||||||
xpath_with_ns,
|
xpath_with_ns,
|
||||||
determine_protocol,
|
|
||||||
parse_duration,
|
|
||||||
mimetype2ext,
|
|
||||||
update_Request,
|
|
||||||
update_url_query,
|
|
||||||
parse_m3u8_attributes,
|
|
||||||
extract_attributes,
|
|
||||||
parse_codecs,
|
|
||||||
urljoin,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -319,17 +325,34 @@ class InfoExtractor(object):
|
|||||||
_real_extract() methods and define a _VALID_URL regexp.
|
_real_extract() methods and define a _VALID_URL regexp.
|
||||||
Probably, they should also be added to the list of extractors.
|
Probably, they should also be added to the list of extractors.
|
||||||
|
|
||||||
|
_GEO_BYPASS attribute may be set to False in order to disable
|
||||||
|
geo restriction bypass mechanisms for a particular extractor.
|
||||||
|
Though it won't disable explicit geo restriction bypass based on
|
||||||
|
country code provided with geo_bypass_country. (experimental)
|
||||||
|
|
||||||
|
_GEO_COUNTRIES attribute may contain a list of presumably geo unrestricted
|
||||||
|
countries for this extractor. One of these countries will be used by
|
||||||
|
geo restriction bypass mechanism right away in order to bypass
|
||||||
|
geo restriction, of course, if the mechanism is not disabled. (experimental)
|
||||||
|
|
||||||
|
NB: both these geo attributes are experimental and may change in future
|
||||||
|
or be completely removed.
|
||||||
|
|
||||||
Finally, the _WORKING attribute should be set to False for broken IEs
|
Finally, the _WORKING attribute should be set to False for broken IEs
|
||||||
in order to warn the users and skip the tests.
|
in order to warn the users and skip the tests.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
_ready = False
|
_ready = False
|
||||||
_downloader = None
|
_downloader = None
|
||||||
|
_x_forwarded_for_ip = None
|
||||||
|
_GEO_BYPASS = True
|
||||||
|
_GEO_COUNTRIES = None
|
||||||
_WORKING = True
|
_WORKING = True
|
||||||
|
|
||||||
def __init__(self, downloader=None):
|
def __init__(self, downloader=None):
|
||||||
"""Constructor. Receives an optional downloader."""
|
"""Constructor. Receives an optional downloader."""
|
||||||
self._ready = False
|
self._ready = False
|
||||||
|
self._x_forwarded_for_ip = None
|
||||||
self.set_downloader(downloader)
|
self.set_downloader(downloader)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@@ -358,15 +381,59 @@ class InfoExtractor(object):
|
|||||||
|
|
||||||
def initialize(self):
|
def initialize(self):
|
||||||
"""Initializes an instance (authentication, etc)."""
|
"""Initializes an instance (authentication, etc)."""
|
||||||
|
self._initialize_geo_bypass(self._GEO_COUNTRIES)
|
||||||
if not self._ready:
|
if not self._ready:
|
||||||
self._real_initialize()
|
self._real_initialize()
|
||||||
self._ready = True
|
self._ready = True
|
||||||
|
|
||||||
|
def _initialize_geo_bypass(self, countries):
|
||||||
|
"""
|
||||||
|
Initialize geo restriction bypass mechanism.
|
||||||
|
|
||||||
|
This method is used to initialize geo bypass mechanism based on faking
|
||||||
|
X-Forwarded-For HTTP header. A random country from provided country list
|
||||||
|
is selected and a random IP belonging to this country is generated. This
|
||||||
|
IP will be passed as X-Forwarded-For HTTP header in all subsequent
|
||||||
|
HTTP requests.
|
||||||
|
|
||||||
|
This method will be used for initial geo bypass mechanism initialization
|
||||||
|
during the instance initialization with _GEO_COUNTRIES.
|
||||||
|
|
||||||
|
You may also manually call it from extractor's code if geo countries
|
||||||
|
information is not available beforehand (e.g. obtained during
|
||||||
|
extraction) or due to some another reason.
|
||||||
|
"""
|
||||||
|
if not self._x_forwarded_for_ip:
|
||||||
|
country_code = self._downloader.params.get('geo_bypass_country', None)
|
||||||
|
# If there is no explicit country for geo bypass specified and
|
||||||
|
# the extractor is known to be geo restricted let's fake IP
|
||||||
|
# as X-Forwarded-For right away.
|
||||||
|
if (not country_code and
|
||||||
|
self._GEO_BYPASS and
|
||||||
|
self._downloader.params.get('geo_bypass', True) and
|
||||||
|
countries):
|
||||||
|
country_code = random.choice(countries)
|
||||||
|
if country_code:
|
||||||
|
self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code)
|
||||||
|
if self._downloader.params.get('verbose', False):
|
||||||
|
self._downloader.to_stdout(
|
||||||
|
'[debug] Using fake IP %s (%s) as X-Forwarded-For.'
|
||||||
|
% (self._x_forwarded_for_ip, country_code.upper()))
|
||||||
|
|
||||||
def extract(self, url):
|
def extract(self, url):
|
||||||
"""Extracts URL information and returns it in list of dicts."""
|
"""Extracts URL information and returns it in list of dicts."""
|
||||||
try:
|
try:
|
||||||
self.initialize()
|
for _ in range(2):
|
||||||
return self._real_extract(url)
|
try:
|
||||||
|
self.initialize()
|
||||||
|
ie_result = self._real_extract(url)
|
||||||
|
if self._x_forwarded_for_ip:
|
||||||
|
ie_result['__x_forwarded_for_ip'] = self._x_forwarded_for_ip
|
||||||
|
return ie_result
|
||||||
|
except GeoRestrictedError as e:
|
||||||
|
if self.__maybe_fake_ip_and_retry(e.countries):
|
||||||
|
continue
|
||||||
|
raise
|
||||||
except ExtractorError:
|
except ExtractorError:
|
||||||
raise
|
raise
|
||||||
except compat_http_client.IncompleteRead as e:
|
except compat_http_client.IncompleteRead as e:
|
||||||
@@ -374,6 +441,21 @@ class InfoExtractor(object):
|
|||||||
except (KeyError, StopIteration) as e:
|
except (KeyError, StopIteration) as e:
|
||||||
raise ExtractorError('An extractor error has occurred.', cause=e)
|
raise ExtractorError('An extractor error has occurred.', cause=e)
|
||||||
|
|
||||||
|
def __maybe_fake_ip_and_retry(self, countries):
|
||||||
|
if (not self._downloader.params.get('geo_bypass_country', None) and
|
||||||
|
self._GEO_BYPASS and
|
||||||
|
self._downloader.params.get('geo_bypass', True) and
|
||||||
|
not self._x_forwarded_for_ip and
|
||||||
|
countries):
|
||||||
|
country_code = random.choice(countries)
|
||||||
|
self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code)
|
||||||
|
if self._x_forwarded_for_ip:
|
||||||
|
self.report_warning(
|
||||||
|
'Video is geo restricted. Retrying extraction with fake IP %s (%s) as X-Forwarded-For.'
|
||||||
|
% (self._x_forwarded_for_ip, country_code.upper()))
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
def set_downloader(self, downloader):
|
def set_downloader(self, downloader):
|
||||||
"""Sets the downloader for this IE."""
|
"""Sets the downloader for this IE."""
|
||||||
self._downloader = downloader
|
self._downloader = downloader
|
||||||
@@ -433,6 +515,15 @@ class InfoExtractor(object):
|
|||||||
if isinstance(url_or_request, (compat_str, str)):
|
if isinstance(url_or_request, (compat_str, str)):
|
||||||
url_or_request = url_or_request.partition('#')[0]
|
url_or_request = url_or_request.partition('#')[0]
|
||||||
|
|
||||||
|
# Some sites check X-Forwarded-For HTTP header in order to figure out
|
||||||
|
# the origin of the client behind proxy. This allows bypassing geo
|
||||||
|
# restriction by faking this header's value to IP that belongs to some
|
||||||
|
# geo unrestricted country. We will do so once we encounter any
|
||||||
|
# geo restriction error.
|
||||||
|
if self._x_forwarded_for_ip:
|
||||||
|
if 'X-Forwarded-For' not in headers:
|
||||||
|
headers['X-Forwarded-For'] = self._x_forwarded_for_ip
|
||||||
|
|
||||||
urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal, data=data, headers=headers, query=query)
|
urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal, data=data, headers=headers, query=query)
|
||||||
if urlh is False:
|
if urlh is False:
|
||||||
assert not fatal
|
assert not fatal
|
||||||
@@ -457,6 +548,34 @@ class InfoExtractor(object):
|
|||||||
|
|
||||||
return encoding
|
return encoding
|
||||||
|
|
||||||
|
def __check_blocked(self, content):
|
||||||
|
first_block = content[:512]
|
||||||
|
if ('<title>Access to this site is blocked</title>' in content and
|
||||||
|
'Websense' in first_block):
|
||||||
|
msg = 'Access to this webpage has been blocked by Websense filtering software in your network.'
|
||||||
|
blocked_iframe = self._html_search_regex(
|
||||||
|
r'<iframe src="([^"]+)"', content,
|
||||||
|
'Websense information URL', default=None)
|
||||||
|
if blocked_iframe:
|
||||||
|
msg += ' Visit %s for more details' % blocked_iframe
|
||||||
|
raise ExtractorError(msg, expected=True)
|
||||||
|
if '<title>The URL you requested has been blocked</title>' in first_block:
|
||||||
|
msg = (
|
||||||
|
'Access to this webpage has been blocked by Indian censorship. '
|
||||||
|
'Use a VPN or proxy server (with --proxy) to route around it.')
|
||||||
|
block_msg = self._html_search_regex(
|
||||||
|
r'</h1><p>(.*?)</p>',
|
||||||
|
content, 'block message', default=None)
|
||||||
|
if block_msg:
|
||||||
|
msg += ' (Message: "%s")' % block_msg.replace('\n', ' ')
|
||||||
|
raise ExtractorError(msg, expected=True)
|
||||||
|
if ('<title>TTK :: Доступ к ресурсу ограничен</title>' in content and
|
||||||
|
'blocklist.rkn.gov.ru' in content):
|
||||||
|
raise ExtractorError(
|
||||||
|
'Access to this webpage has been blocked by decision of the Russian government. '
|
||||||
|
'Visit http://blocklist.rkn.gov.ru/ for a block reason.',
|
||||||
|
expected=True)
|
||||||
|
|
||||||
def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True, prefix=None, encoding=None):
|
def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True, prefix=None, encoding=None):
|
||||||
content_type = urlh.headers.get('Content-Type', '')
|
content_type = urlh.headers.get('Content-Type', '')
|
||||||
webpage_bytes = urlh.read()
|
webpage_bytes = urlh.read()
|
||||||
@@ -498,25 +617,7 @@ class InfoExtractor(object):
|
|||||||
except LookupError:
|
except LookupError:
|
||||||
content = webpage_bytes.decode('utf-8', 'replace')
|
content = webpage_bytes.decode('utf-8', 'replace')
|
||||||
|
|
||||||
if ('<title>Access to this site is blocked</title>' in content and
|
self.__check_blocked(content)
|
||||||
'Websense' in content[:512]):
|
|
||||||
msg = 'Access to this webpage has been blocked by Websense filtering software in your network.'
|
|
||||||
blocked_iframe = self._html_search_regex(
|
|
||||||
r'<iframe src="([^"]+)"', content,
|
|
||||||
'Websense information URL', default=None)
|
|
||||||
if blocked_iframe:
|
|
||||||
msg += ' Visit %s for more details' % blocked_iframe
|
|
||||||
raise ExtractorError(msg, expected=True)
|
|
||||||
if '<title>The URL you requested has been blocked</title>' in content[:512]:
|
|
||||||
msg = (
|
|
||||||
'Access to this webpage has been blocked by Indian censorship. '
|
|
||||||
'Use a VPN or proxy server (with --proxy) to route around it.')
|
|
||||||
block_msg = self._html_search_regex(
|
|
||||||
r'</h1><p>(.*?)</p>',
|
|
||||||
content, 'block message', default=None)
|
|
||||||
if block_msg:
|
|
||||||
msg += ' (Message: "%s")' % block_msg.replace('\n', ' ')
|
|
||||||
raise ExtractorError(msg, expected=True)
|
|
||||||
|
|
||||||
return content
|
return content
|
||||||
|
|
||||||
@@ -608,10 +709,8 @@ class InfoExtractor(object):
|
|||||||
expected=True)
|
expected=True)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def raise_geo_restricted(msg='This video is not available from your location due to geo restriction'):
|
def raise_geo_restricted(msg='This video is not available from your location due to geo restriction', countries=None):
|
||||||
raise ExtractorError(
|
raise GeoRestrictedError(msg, countries=countries)
|
||||||
'%s. You might want to use --proxy to workaround.' % msg,
|
|
||||||
expected=True)
|
|
||||||
|
|
||||||
# Methods for following #608
|
# Methods for following #608
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@@ -627,6 +726,13 @@ class InfoExtractor(object):
|
|||||||
video_info['title'] = video_title
|
video_info['title'] = video_title
|
||||||
return video_info
|
return video_info
|
||||||
|
|
||||||
|
def playlist_from_matches(self, matches, video_id, video_title, getter=None, ie=None):
|
||||||
|
urlrs = orderedSet(
|
||||||
|
self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
|
||||||
|
for m in matches)
|
||||||
|
return self.playlist_result(
|
||||||
|
urlrs, playlist_id=video_id, playlist_title=video_title)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def playlist_result(entries, playlist_id=None, playlist_title=None, playlist_description=None):
|
def playlist_result(entries, playlist_id=None, playlist_title=None, playlist_description=None):
|
||||||
"""Returns a playlist"""
|
"""Returns a playlist"""
|
||||||
@@ -1673,7 +1779,7 @@ class InfoExtractor(object):
|
|||||||
if content_type == 'text':
|
if content_type == 'text':
|
||||||
# TODO implement WebVTT downloading
|
# TODO implement WebVTT downloading
|
||||||
pass
|
pass
|
||||||
elif content_type == 'video' or content_type == 'audio':
|
elif content_type in ('video', 'audio'):
|
||||||
base_url = ''
|
base_url = ''
|
||||||
for element in (representation, adaptation_set, period, mpd_doc):
|
for element in (representation, adaptation_set, period, mpd_doc):
|
||||||
base_url_e = element.find(_add_ns('BaseURL'))
|
base_url_e = element.find(_add_ns('BaseURL'))
|
||||||
@@ -1923,7 +2029,7 @@ class InfoExtractor(object):
|
|||||||
})
|
})
|
||||||
return formats
|
return formats
|
||||||
|
|
||||||
def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8', mpd_id=None):
|
def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8', mpd_id=None, preference=None):
|
||||||
def absolute_url(video_url):
|
def absolute_url(video_url):
|
||||||
return compat_urlparse.urljoin(base_url, video_url)
|
return compat_urlparse.urljoin(base_url, video_url)
|
||||||
|
|
||||||
@@ -1945,7 +2051,8 @@ class InfoExtractor(object):
|
|||||||
is_plain_url = False
|
is_plain_url = False
|
||||||
formats = self._extract_m3u8_formats(
|
formats = self._extract_m3u8_formats(
|
||||||
full_url, video_id, ext='mp4',
|
full_url, video_id, ext='mp4',
|
||||||
entry_protocol=m3u8_entry_protocol, m3u8_id=m3u8_id)
|
entry_protocol=m3u8_entry_protocol, m3u8_id=m3u8_id,
|
||||||
|
preference=preference)
|
||||||
elif ext == 'mpd':
|
elif ext == 'mpd':
|
||||||
is_plain_url = False
|
is_plain_url = False
|
||||||
formats = self._extract_mpd_formats(
|
formats = self._extract_mpd_formats(
|
||||||
@@ -2073,6 +2180,141 @@ class InfoExtractor(object):
|
|||||||
})
|
})
|
||||||
return formats
|
return formats
|
||||||
|
|
||||||
|
def _find_jwplayer_data(self, webpage, video_id=None, transform_source=js_to_json):
|
||||||
|
mobj = re.search(
|
||||||
|
r'jwplayer\((?P<quote>[\'"])[^\'" ]+(?P=quote)\)\.setup\s*\((?P<options>[^)]+)\)',
|
||||||
|
webpage)
|
||||||
|
if mobj:
|
||||||
|
try:
|
||||||
|
jwplayer_data = self._parse_json(mobj.group('options'),
|
||||||
|
video_id=video_id,
|
||||||
|
transform_source=transform_source)
|
||||||
|
except ExtractorError:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
if isinstance(jwplayer_data, dict):
|
||||||
|
return jwplayer_data
|
||||||
|
|
||||||
|
def _extract_jwplayer_data(self, webpage, video_id, *args, **kwargs):
|
||||||
|
jwplayer_data = self._find_jwplayer_data(
|
||||||
|
webpage, video_id, transform_source=js_to_json)
|
||||||
|
return self._parse_jwplayer_data(
|
||||||
|
jwplayer_data, video_id, *args, **kwargs)
|
||||||
|
|
||||||
|
def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True,
|
||||||
|
m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None):
|
||||||
|
# JWPlayer backward compatibility: flattened playlists
|
||||||
|
# https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/api/config.js#L81-L96
|
||||||
|
if 'playlist' not in jwplayer_data:
|
||||||
|
jwplayer_data = {'playlist': [jwplayer_data]}
|
||||||
|
|
||||||
|
entries = []
|
||||||
|
|
||||||
|
# JWPlayer backward compatibility: single playlist item
|
||||||
|
# https://github.com/jwplayer/jwplayer/blob/v7.7.0/src/js/playlist/playlist.js#L10
|
||||||
|
if not isinstance(jwplayer_data['playlist'], list):
|
||||||
|
jwplayer_data['playlist'] = [jwplayer_data['playlist']]
|
||||||
|
|
||||||
|
for video_data in jwplayer_data['playlist']:
|
||||||
|
# JWPlayer backward compatibility: flattened sources
|
||||||
|
# https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/playlist/item.js#L29-L35
|
||||||
|
if 'sources' not in video_data:
|
||||||
|
video_data['sources'] = [video_data]
|
||||||
|
|
||||||
|
this_video_id = video_id or video_data['mediaid']
|
||||||
|
|
||||||
|
formats = self._parse_jwplayer_formats(
|
||||||
|
video_data['sources'], video_id=this_video_id, m3u8_id=m3u8_id,
|
||||||
|
mpd_id=mpd_id, rtmp_params=rtmp_params, base_url=base_url)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
tracks = video_data.get('tracks')
|
||||||
|
if tracks and isinstance(tracks, list):
|
||||||
|
for track in tracks:
|
||||||
|
if track.get('kind') != 'captions':
|
||||||
|
continue
|
||||||
|
track_url = urljoin(base_url, track.get('file'))
|
||||||
|
if not track_url:
|
||||||
|
continue
|
||||||
|
subtitles.setdefault(track.get('label') or 'en', []).append({
|
||||||
|
'url': self._proto_relative_url(track_url)
|
||||||
|
})
|
||||||
|
|
||||||
|
entries.append({
|
||||||
|
'id': this_video_id,
|
||||||
|
'title': video_data['title'] if require_title else video_data.get('title'),
|
||||||
|
'description': video_data.get('description'),
|
||||||
|
'thumbnail': self._proto_relative_url(video_data.get('image')),
|
||||||
|
'timestamp': int_or_none(video_data.get('pubdate')),
|
||||||
|
'duration': float_or_none(jwplayer_data.get('duration') or video_data.get('duration')),
|
||||||
|
'subtitles': subtitles,
|
||||||
|
'formats': formats,
|
||||||
|
})
|
||||||
|
if len(entries) == 1:
|
||||||
|
return entries[0]
|
||||||
|
else:
|
||||||
|
return self.playlist_result(entries)
|
||||||
|
|
||||||
|
def _parse_jwplayer_formats(self, jwplayer_sources_data, video_id=None,
|
||||||
|
m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None):
|
||||||
|
formats = []
|
||||||
|
for source in jwplayer_sources_data:
|
||||||
|
source_url = self._proto_relative_url(source['file'])
|
||||||
|
if base_url:
|
||||||
|
source_url = compat_urlparse.urljoin(base_url, source_url)
|
||||||
|
source_type = source.get('type') or ''
|
||||||
|
ext = mimetype2ext(source_type) or determine_ext(source_url)
|
||||||
|
if source_type == 'hls' or ext == 'm3u8':
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
source_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||||
|
m3u8_id=m3u8_id, fatal=False))
|
||||||
|
elif ext == 'mpd':
|
||||||
|
formats.extend(self._extract_mpd_formats(
|
||||||
|
source_url, video_id, mpd_id=mpd_id, fatal=False))
|
||||||
|
elif ext == 'smil':
|
||||||
|
formats.extend(self._extract_smil_formats(
|
||||||
|
source_url, video_id, fatal=False))
|
||||||
|
# https://github.com/jwplayer/jwplayer/blob/master/src/js/providers/default.js#L67
|
||||||
|
elif source_type.startswith('audio') or ext in (
|
||||||
|
'oga', 'aac', 'mp3', 'mpeg', 'vorbis'):
|
||||||
|
formats.append({
|
||||||
|
'url': source_url,
|
||||||
|
'vcodec': 'none',
|
||||||
|
'ext': ext,
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
height = int_or_none(source.get('height'))
|
||||||
|
if height is None:
|
||||||
|
# Often no height is provided but there is a label in
|
||||||
|
# format like "1080p", "720p SD", or 1080.
|
||||||
|
height = int_or_none(self._search_regex(
|
||||||
|
r'^(\d{3,4})[pP]?(?:\b|$)', compat_str(source.get('label') or ''),
|
||||||
|
'height', default=None))
|
||||||
|
a_format = {
|
||||||
|
'url': source_url,
|
||||||
|
'width': int_or_none(source.get('width')),
|
||||||
|
'height': height,
|
||||||
|
'tbr': int_or_none(source.get('bitrate')),
|
||||||
|
'ext': ext,
|
||||||
|
}
|
||||||
|
if source_url.startswith('rtmp'):
|
||||||
|
a_format['ext'] = 'flv'
|
||||||
|
# See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as
|
||||||
|
# of jwplayer.flash.swf
|
||||||
|
rtmp_url_parts = re.split(
|
||||||
|
r'((?:mp4|mp3|flv):)', source_url, 1)
|
||||||
|
if len(rtmp_url_parts) == 3:
|
||||||
|
rtmp_url, prefix, play_path = rtmp_url_parts
|
||||||
|
a_format.update({
|
||||||
|
'url': rtmp_url,
|
||||||
|
'play_path': prefix + play_path,
|
||||||
|
})
|
||||||
|
if rtmp_params:
|
||||||
|
a_format.update(rtmp_params)
|
||||||
|
formats.append(a_format)
|
||||||
|
return formats
|
||||||
|
|
||||||
def _live_title(self, name):
|
def _live_title(self, name):
|
||||||
""" Generate the title for a live video """
|
""" Generate the title for a live video """
|
||||||
now = datetime.datetime.now()
|
now = datetime.datetime.now()
|
||||||
|
|||||||
@@ -1,5 +1,7 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import sys
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import ExtractorError
|
from ..utils import ExtractorError
|
||||||
|
|
||||||
@@ -33,7 +35,9 @@ class UnicodeBOMIE(InfoExtractor):
|
|||||||
IE_DESC = False
|
IE_DESC = False
|
||||||
_VALID_URL = r'(?P<bom>\ufeff)(?P<id>.*)$'
|
_VALID_URL = r'(?P<bom>\ufeff)(?P<id>.*)$'
|
||||||
|
|
||||||
_TESTS = [{
|
# Disable test for python 3.2 since BOM is broken in re in this version
|
||||||
|
# (see https://github.com/rg3/youtube-dl/issues/9751)
|
||||||
|
_TESTS = [] if (3, 0) < sys.version_info <= (3, 3) else [{
|
||||||
'url': '\ufeffhttp://www.youtube.com/watch?v=BaW_jenozKc',
|
'url': '\ufeffhttp://www.youtube.com/watch?v=BaW_jenozKc',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|||||||
@@ -9,13 +9,14 @@ from ..compat import (
|
|||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
orderedSet,
|
|
||||||
remove_end,
|
|
||||||
extract_attributes,
|
|
||||||
mimetype2ext,
|
|
||||||
determine_ext,
|
determine_ext,
|
||||||
|
extract_attributes,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
js_to_json,
|
||||||
|
mimetype2ext,
|
||||||
|
orderedSet,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
|
remove_end,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -66,6 +67,16 @@ class CondeNastIE(InfoExtractor):
|
|||||||
'upload_date': '20130314',
|
'upload_date': '20130314',
|
||||||
'timestamp': 1363219200,
|
'timestamp': 1363219200,
|
||||||
}
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'http://video.gq.com/watch/the-closer-with-keith-olbermann-the-only-true-surprise-trump-s-an-idiot?c=series',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '58d1865bfd2e6126e2000015',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'The Only True Surprise? Trump’s an Idiot',
|
||||||
|
'uploader': 'gq',
|
||||||
|
'upload_date': '20170321',
|
||||||
|
'timestamp': 1490126427,
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
# JS embed
|
# JS embed
|
||||||
'url': 'http://player.cnevids.com/embedjs/55f9cf8b61646d1acf00000c/5511d76261646d5566020000.js',
|
'url': 'http://player.cnevids.com/embedjs/55f9cf8b61646d1acf00000c/5511d76261646d5566020000.js',
|
||||||
@@ -114,26 +125,33 @@ class CondeNastIE(InfoExtractor):
|
|||||||
})
|
})
|
||||||
video_id = query['videoId']
|
video_id = query['videoId']
|
||||||
video_info = None
|
video_info = None
|
||||||
info_page = self._download_webpage(
|
info_page = self._download_json(
|
||||||
'http://player.cnevids.com/player/video.js',
|
'http://player.cnevids.com/player/video.js',
|
||||||
video_id, 'Downloading video info', query=query, fatal=False)
|
video_id, 'Downloading video info', fatal=False, query=query)
|
||||||
if info_page:
|
if info_page:
|
||||||
video_info = self._parse_json(self._search_regex(
|
video_info = info_page.get('video')
|
||||||
r'loadCallback\(({.+})\)', info_page, 'video info'), video_id)['video']
|
if not video_info:
|
||||||
else:
|
|
||||||
info_page = self._download_webpage(
|
info_page = self._download_webpage(
|
||||||
'http://player.cnevids.com/player/loader.js',
|
'http://player.cnevids.com/player/loader.js',
|
||||||
video_id, 'Downloading loader info', query=query)
|
video_id, 'Downloading loader info', query=query)
|
||||||
video_info = self._parse_json(self._search_regex(
|
video_info = self._parse_json(
|
||||||
r'var\s+video\s*=\s*({.+?});', info_page, 'video info'), video_id)
|
self._search_regex(
|
||||||
|
r'(?s)var\s+config\s*=\s*({.+?});', info_page, 'config'),
|
||||||
|
video_id, transform_source=js_to_json)['video']
|
||||||
|
|
||||||
title = video_info['title']
|
title = video_info['title']
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for fdata in video_info.get('sources', [{}])[0]:
|
for fdata in video_info['sources']:
|
||||||
src = fdata.get('src')
|
src = fdata.get('src')
|
||||||
if not src:
|
if not src:
|
||||||
continue
|
continue
|
||||||
ext = mimetype2ext(fdata.get('type')) or determine_ext(src)
|
ext = mimetype2ext(fdata.get('type')) or determine_ext(src)
|
||||||
|
if ext == 'm3u8':
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
src, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||||
|
m3u8_id='hls', fatal=False))
|
||||||
|
continue
|
||||||
quality = fdata.get('quality')
|
quality = fdata.get('quality')
|
||||||
formats.append({
|
formats.append({
|
||||||
'format_id': ext + ('-%s' % quality if quality else ''),
|
'format_id': ext + ('-%s' % quality if quality else ''),
|
||||||
@@ -169,7 +187,6 @@ class CondeNastIE(InfoExtractor):
|
|||||||
path=remove_end(parsed_url.path, '.js').replace('/embedjs/', '/embed/')))
|
path=remove_end(parsed_url.path, '.js').replace('/embedjs/', '/embed/')))
|
||||||
url_type = 'embed'
|
url_type = 'embed'
|
||||||
|
|
||||||
self.to_screen('Extracting from %s with the Condé Nast extractor' % self._SITES[site])
|
|
||||||
webpage = self._download_webpage(url, item_id)
|
webpage = self._download_webpage(url, item_id)
|
||||||
|
|
||||||
if url_type == 'series':
|
if url_type == 'series':
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ from ..utils import int_or_none
|
|||||||
|
|
||||||
|
|
||||||
class CrackleIE(InfoExtractor):
|
class CrackleIE(InfoExtractor):
|
||||||
|
_GEO_COUNTRIES = ['US']
|
||||||
_VALID_URL = r'(?:crackle:|https?://(?:(?:www|m)\.)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P<id>\d+)'
|
_VALID_URL = r'(?:crackle:|https?://(?:(?:www|m)\.)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P<id>\d+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.crackle.com/comedians-in-cars-getting-coffee/2498934',
|
'url': 'http://www.crackle.com/comedians-in-cars-getting-coffee/2498934',
|
||||||
|
|||||||
@@ -123,7 +123,7 @@ class CrunchyrollIE(CrunchyrollBaseIE):
|
|||||||
'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
|
'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '645513',
|
'id': '645513',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'title': 'Wanna be the Strongest in the World Episode 1 – An Idol-Wrestler is Born!',
|
'title': 'Wanna be the Strongest in the World Episode 1 – An Idol-Wrestler is Born!',
|
||||||
'description': 'md5:2d17137920c64f2f49981a7797d275ef',
|
'description': 'md5:2d17137920c64f2f49981a7797d275ef',
|
||||||
'thumbnail': 'http://img1.ak.crunchyroll.com/i/spire1-tmb/20c6b5e10f1a47b10516877d3c039cae1380951166_full.jpg',
|
'thumbnail': 'http://img1.ak.crunchyroll.com/i/spire1-tmb/20c6b5e10f1a47b10516877d3c039cae1380951166_full.jpg',
|
||||||
@@ -177,6 +177,7 @@ class CrunchyrollIE(CrunchyrollBaseIE):
|
|||||||
'uploader': 'Kadokawa Pictures Inc.',
|
'uploader': 'Kadokawa Pictures Inc.',
|
||||||
'upload_date': '20170118',
|
'upload_date': '20170118',
|
||||||
'series': "KONOSUBA -God's blessing on this wonderful world!",
|
'series': "KONOSUBA -God's blessing on this wonderful world!",
|
||||||
|
'season': "KONOSUBA -God's blessing on this wonderful world! 2",
|
||||||
'season_number': 2,
|
'season_number': 2,
|
||||||
'episode': 'Give Me Deliverance from this Judicial Injustice!',
|
'episode': 'Give Me Deliverance from this Judicial Injustice!',
|
||||||
'episode_number': 1,
|
'episode_number': 1,
|
||||||
@@ -192,6 +193,53 @@ class CrunchyrollIE(CrunchyrollBaseIE):
|
|||||||
# geo-restricted (US), 18+ maturity wall, non-premium available
|
# geo-restricted (US), 18+ maturity wall, non-premium available
|
||||||
'url': 'http://www.crunchyroll.com/cosplay-complex-ova/episode-1-the-birth-of-the-cosplay-club-565617',
|
'url': 'http://www.crunchyroll.com/cosplay-complex-ova/episode-1-the-birth-of-the-cosplay-club-565617',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# A description with double quotes
|
||||||
|
'url': 'http://www.crunchyroll.com/11eyes/episode-1-piros-jszaka-red-night-535080',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '535080',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '11eyes Episode 1 – Piros éjszaka - Red Night',
|
||||||
|
'description': 'Kakeru and Yuka are thrown into an alternate nightmarish world they call "Red Night".',
|
||||||
|
'uploader': 'Marvelous AQL Inc.',
|
||||||
|
'upload_date': '20091021',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# Just test metadata extraction
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# make sure we can extract an uploader name that's not a link
|
||||||
|
'url': 'http://www.crunchyroll.com/hakuoki-reimeiroku/episode-1-dawn-of-the-divine-warriors-606899',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '606899',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Hakuoki Reimeiroku Episode 1 – Dawn of the Divine Warriors',
|
||||||
|
'description': 'Ryunosuke was left to die, but Serizawa-san asked him a simple question "Do you want to live?"',
|
||||||
|
'uploader': 'Geneon Entertainment',
|
||||||
|
'upload_date': '20120717',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# just test metadata extraction
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# A video with a vastly different season name compared to the series name
|
||||||
|
'url': 'http://www.crunchyroll.com/nyarko-san-another-crawling-chaos/episode-1-test-590532',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '590532',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Haiyoru! Nyaruani (ONA) Episode 1 – Test',
|
||||||
|
'description': 'Mahiro and Nyaruko talk about official certification.',
|
||||||
|
'uploader': 'TV TOKYO',
|
||||||
|
'upload_date': '20120305',
|
||||||
|
'series': 'Nyarko-san: Another Crawling Chaos',
|
||||||
|
'season': 'Haiyoru! Nyaruani (ONA)',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# Just test metadata extraction
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
_FORMAT_IDS = {
|
_FORMAT_IDS = {
|
||||||
@@ -342,7 +390,9 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
|||||||
else:
|
else:
|
||||||
webpage_url = 'http://www.' + mobj.group('url')
|
webpage_url = 'http://www.' + mobj.group('url')
|
||||||
|
|
||||||
webpage = self._download_webpage(self._add_skip_wall(webpage_url), video_id, 'Downloading webpage')
|
webpage = self._download_webpage(
|
||||||
|
self._add_skip_wall(webpage_url), video_id,
|
||||||
|
headers=self.geo_verification_headers())
|
||||||
note_m = self._html_search_regex(
|
note_m = self._html_search_regex(
|
||||||
r'<div class="showmedia-trailer-notice">(.+?)</div>',
|
r'<div class="showmedia-trailer-notice">(.+?)</div>',
|
||||||
webpage, 'trailer-notice', default='')
|
webpage, 'trailer-notice', default='')
|
||||||
@@ -362,9 +412,9 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
|||||||
r'(?s)<h1[^>]*>((?:(?!<h1).)*?<span[^>]+itemprop=["\']title["\'][^>]*>(?:(?!<h1).)+?)</h1>',
|
r'(?s)<h1[^>]*>((?:(?!<h1).)*?<span[^>]+itemprop=["\']title["\'][^>]*>(?:(?!<h1).)+?)</h1>',
|
||||||
webpage, 'video_title')
|
webpage, 'video_title')
|
||||||
video_title = re.sub(r' {2,}', ' ', video_title)
|
video_title = re.sub(r' {2,}', ' ', video_title)
|
||||||
video_description = self._html_search_regex(
|
video_description = self._parse_json(self._html_search_regex(
|
||||||
r'<script[^>]*>\s*.+?\[media_id=%s\].+?"description"\s*:\s*"([^"]+)' % video_id,
|
r'<script[^>]*>\s*.+?\[media_id=%s\].+?({.+?"description"\s*:.+?})\);' % video_id,
|
||||||
webpage, 'description', default=None)
|
webpage, 'description', default='{}'), video_id).get('description')
|
||||||
if video_description:
|
if video_description:
|
||||||
video_description = lowercase_escape(video_description.replace(r'\r\n', '\n'))
|
video_description = lowercase_escape(video_description.replace(r'\r\n', '\n'))
|
||||||
video_upload_date = self._html_search_regex(
|
video_upload_date = self._html_search_regex(
|
||||||
@@ -373,8 +423,9 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
|||||||
if video_upload_date:
|
if video_upload_date:
|
||||||
video_upload_date = unified_strdate(video_upload_date)
|
video_upload_date = unified_strdate(video_upload_date)
|
||||||
video_uploader = self._html_search_regex(
|
video_uploader = self._html_search_regex(
|
||||||
r'<a[^>]+href="/publisher/[^"]+"[^>]*>([^<]+)</a>', webpage,
|
# try looking for both an uploader that's a link and one that's not
|
||||||
'video_uploader', fatal=False)
|
[r'<a[^>]+href="/publisher/[^"]+"[^>]*>([^<]+)</a>', r'<div>\s*Publisher:\s*<span>\s*(.+?)\s*</span>\s*</div>'],
|
||||||
|
webpage, 'video_uploader', fatal=False)
|
||||||
|
|
||||||
available_fmts = []
|
available_fmts = []
|
||||||
for a, fmt in re.findall(r'(<a[^>]+token=["\']showmedia\.([0-9]{3,4})p["\'][^>]+>)', webpage):
|
for a, fmt in re.findall(r'(<a[^>]+token=["\']showmedia\.([0-9]{3,4})p["\'][^>]+>)', webpage):
|
||||||
@@ -460,7 +511,8 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
|||||||
# webpage provide more accurate data than series_title from XML
|
# webpage provide more accurate data than series_title from XML
|
||||||
series = self._html_search_regex(
|
series = self._html_search_regex(
|
||||||
r'id=["\']showmedia_about_episode_num[^>]+>\s*<a[^>]+>([^<]+)',
|
r'id=["\']showmedia_about_episode_num[^>]+>\s*<a[^>]+>([^<]+)',
|
||||||
webpage, 'series', default=xpath_text(metadata, 'series_title'))
|
webpage, 'series', fatal=False)
|
||||||
|
season = xpath_text(metadata, 'series_title')
|
||||||
|
|
||||||
episode = xpath_text(metadata, 'episode_title')
|
episode = xpath_text(metadata, 'episode_title')
|
||||||
episode_number = int_or_none(xpath_text(metadata, 'episode_number'))
|
episode_number = int_or_none(xpath_text(metadata, 'episode_number'))
|
||||||
@@ -477,6 +529,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
|||||||
'uploader': video_uploader,
|
'uploader': video_uploader,
|
||||||
'upload_date': video_upload_date,
|
'upload_date': video_upload_date,
|
||||||
'series': series,
|
'series': series,
|
||||||
|
'season': season,
|
||||||
'season_number': season_number,
|
'season_number': season_number,
|
||||||
'episode': episode,
|
'episode': episode,
|
||||||
'episode_number': episode_number,
|
'episode_number': episode_number,
|
||||||
@@ -514,16 +567,18 @@ class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
show_id = self._match_id(url)
|
show_id = self._match_id(url)
|
||||||
|
|
||||||
webpage = self._download_webpage(self._add_skip_wall(url), show_id)
|
webpage = self._download_webpage(
|
||||||
|
self._add_skip_wall(url), show_id,
|
||||||
|
headers=self.geo_verification_headers())
|
||||||
title = self._html_search_regex(
|
title = self._html_search_regex(
|
||||||
r'(?s)<h1[^>]*>\s*<span itemprop="name">(.*?)</span>',
|
r'(?s)<h1[^>]*>\s*<span itemprop="name">(.*?)</span>',
|
||||||
webpage, 'title')
|
webpage, 'title')
|
||||||
episode_paths = re.findall(
|
episode_paths = re.findall(
|
||||||
r'(?s)<li id="showview_videos_media_[0-9]+"[^>]+>.*?<a href="([^"]+)"',
|
r'(?s)<li id="showview_videos_media_(\d+)"[^>]+>.*?<a href="([^"]+)"',
|
||||||
webpage)
|
webpage)
|
||||||
entries = [
|
entries = [
|
||||||
self.url_result('http://www.crunchyroll.com' + ep, 'Crunchyroll')
|
self.url_result('http://www.crunchyroll.com' + ep, 'Crunchyroll', ep_id)
|
||||||
for ep in episode_paths
|
for ep_id, ep in episode_paths
|
||||||
]
|
]
|
||||||
entries.reverse()
|
entries.reverse()
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,8 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
@@ -49,6 +51,48 @@ class CuriosityStreamBaseIE(InfoExtractor):
|
|||||||
limelight_media_id = media['limelight_media_id']
|
limelight_media_id = media['limelight_media_id']
|
||||||
title = media['title']
|
title = media['title']
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for encoding in media.get('encodings', []):
|
||||||
|
m3u8_url = encoding.get('master_playlist_url')
|
||||||
|
if m3u8_url:
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
m3u8_url, video_id, 'mp4', 'm3u8_native',
|
||||||
|
m3u8_id='hls', fatal=False))
|
||||||
|
encoding_url = encoding.get('url')
|
||||||
|
file_url = encoding.get('file_url')
|
||||||
|
if not encoding_url and not file_url:
|
||||||
|
continue
|
||||||
|
f = {
|
||||||
|
'width': int_or_none(encoding.get('width')),
|
||||||
|
'height': int_or_none(encoding.get('height')),
|
||||||
|
'vbr': int_or_none(encoding.get('video_bitrate')),
|
||||||
|
'abr': int_or_none(encoding.get('audio_bitrate')),
|
||||||
|
'filesize': int_or_none(encoding.get('size_in_bytes')),
|
||||||
|
'vcodec': encoding.get('video_codec'),
|
||||||
|
'acodec': encoding.get('audio_codec'),
|
||||||
|
'container': encoding.get('container_type'),
|
||||||
|
}
|
||||||
|
for f_url in (encoding_url, file_url):
|
||||||
|
if not f_url:
|
||||||
|
continue
|
||||||
|
fmt = f.copy()
|
||||||
|
rtmp = re.search(r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+))/(?P<playpath>mp[34]:.+)$', f_url)
|
||||||
|
if rtmp:
|
||||||
|
fmt.update({
|
||||||
|
'url': rtmp.group('url'),
|
||||||
|
'play_path': rtmp.group('playpath'),
|
||||||
|
'app': rtmp.group('app'),
|
||||||
|
'ext': 'flv',
|
||||||
|
'format_id': 'rtmp',
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
fmt.update({
|
||||||
|
'url': f_url,
|
||||||
|
'format_id': 'http',
|
||||||
|
})
|
||||||
|
formats.append(fmt)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
for closed_caption in media.get('closed_captions', []):
|
for closed_caption in media.get('closed_captions', []):
|
||||||
sub_url = closed_caption.get('file')
|
sub_url = closed_caption.get('file')
|
||||||
@@ -60,16 +104,14 @@ class CuriosityStreamBaseIE(InfoExtractor):
|
|||||||
})
|
})
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'_type': 'url_transparent',
|
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': 'limelight:media:' + limelight_media_id,
|
'formats': formats,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': media.get('description'),
|
'description': media.get('description'),
|
||||||
'thumbnail': media.get('image_large') or media.get('image_medium') or media.get('image_small'),
|
'thumbnail': media.get('image_large') or media.get('image_medium') or media.get('image_small'),
|
||||||
'duration': int_or_none(media.get('duration')),
|
'duration': int_or_none(media.get('duration')),
|
||||||
'tags': media.get('tags'),
|
'tags': media.get('tags'),
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
'ie_key': 'LimelightMedia',
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -78,14 +120,12 @@ class CuriosityStreamIE(CuriosityStreamBaseIE):
|
|||||||
_VALID_URL = r'https?://app\.curiositystream\.com/video/(?P<id>\d+)'
|
_VALID_URL = r'https?://app\.curiositystream\.com/video/(?P<id>\d+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'https://app.curiositystream.com/video/2',
|
'url': 'https://app.curiositystream.com/video/2',
|
||||||
'md5': 'a0074c190e6cddaf86900b28d3e9ee7a',
|
'md5': '262bb2f257ff301115f1973540de8983',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '2',
|
'id': '2',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'How Did You Develop The Internet?',
|
'title': 'How Did You Develop The Internet?',
|
||||||
'description': 'Vint Cerf, Google\'s Chief Internet Evangelist, describes how he and Bob Kahn created the internet.',
|
'description': 'Vint Cerf, Google\'s Chief Internet Evangelist, describes how he and Bob Kahn created the internet.',
|
||||||
'timestamp': 1448388615,
|
|
||||||
'upload_date': '20151124',
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -105,7 +145,7 @@ class CuriosityStreamCollectionIE(CuriosityStreamBaseIE):
|
|||||||
'title': 'Curious Minds: The Internet',
|
'title': 'Curious Minds: The Internet',
|
||||||
'description': 'How is the internet shaping our lives in the 21st Century?',
|
'description': 'How is the internet shaping our lives in the 21st Century?',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 17,
|
'playlist_mincount': 12,
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
|||||||
@@ -82,6 +82,11 @@ class CWTVIE(InfoExtractor):
|
|||||||
'url': quality_url,
|
'url': quality_url,
|
||||||
'tbr': tbr,
|
'tbr': tbr,
|
||||||
})
|
})
|
||||||
|
video_metadata = video_data['assetFields']
|
||||||
|
ism_url = video_metadata.get('smoothStreamingUrl')
|
||||||
|
if ism_url:
|
||||||
|
formats.extend(self._extract_ism_formats(
|
||||||
|
ism_url, video_id, ism_id='mss', fatal=False))
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
thumbnails = [{
|
thumbnails = [{
|
||||||
@@ -90,8 +95,6 @@ class CWTVIE(InfoExtractor):
|
|||||||
'height': image.get('height'),
|
'height': image.get('height'),
|
||||||
} for image_id, image in video_data['images'].items() if image.get('uri')] if video_data.get('images') else None
|
} for image_id, image in video_data['images'].items() if image.get('uri')] if video_data.get('images') else None
|
||||||
|
|
||||||
video_metadata = video_data['assetFields']
|
|
||||||
|
|
||||||
subtitles = {
|
subtitles = {
|
||||||
'en': [{
|
'en': [{
|
||||||
'url': video_metadata['UnicornCcUrl'],
|
'url': video_metadata['UnicornCcUrl'],
|
||||||
|
|||||||
@@ -66,7 +66,6 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
|||||||
'uploader_id': 'xijv66',
|
'uploader_id': 'xijv66',
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'comment_count': int,
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
# Vevo video
|
# Vevo video
|
||||||
@@ -140,7 +139,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
|||||||
view_count = str_to_int(view_count_str)
|
view_count = str_to_int(view_count_str)
|
||||||
comment_count = int_or_none(self._search_regex(
|
comment_count = int_or_none(self._search_regex(
|
||||||
r'<meta[^>]+itemprop="interactionCount"[^>]+content="UserComments:(\d+)"',
|
r'<meta[^>]+itemprop="interactionCount"[^>]+content="UserComments:(\d+)"',
|
||||||
webpage, 'comment count', fatal=False))
|
webpage, 'comment count', default=None))
|
||||||
|
|
||||||
player_v5 = self._search_regex(
|
player_v5 = self._search_regex(
|
||||||
[r'buildPlayer\(({.+?})\);\n', # See https://github.com/rg3/youtube-dl/issues/7826
|
[r'buildPlayer\(({.+?})\);\n', # See https://github.com/rg3/youtube-dl/issues/7826
|
||||||
@@ -283,9 +282,14 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def _check_error(self, info):
|
def _check_error(self, info):
|
||||||
|
error = info.get('error')
|
||||||
if info.get('error') is not None:
|
if info.get('error') is not None:
|
||||||
|
title = error['title']
|
||||||
|
# See https://developer.dailymotion.com/api#access-error
|
||||||
|
if error.get('code') == 'DM007':
|
||||||
|
self.raise_geo_restricted(msg=title)
|
||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
'%s said: %s' % (self.IE_NAME, info['error']['title']), expected=True)
|
'%s said: %s' % (self.IE_NAME, title), expected=True)
|
||||||
|
|
||||||
def _get_subtitles(self, video_id, webpage):
|
def _get_subtitles(self, video_id, webpage):
|
||||||
try:
|
try:
|
||||||
|
|||||||
159
youtube_dl/extractor/daisuki.py
Normal file
159
youtube_dl/extractor/daisuki.py
Normal file
@@ -0,0 +1,159 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import base64
|
||||||
|
import json
|
||||||
|
import random
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..aes import (
|
||||||
|
aes_cbc_decrypt,
|
||||||
|
aes_cbc_encrypt,
|
||||||
|
)
|
||||||
|
from ..utils import (
|
||||||
|
bytes_to_intlist,
|
||||||
|
bytes_to_long,
|
||||||
|
clean_html,
|
||||||
|
ExtractorError,
|
||||||
|
intlist_to_bytes,
|
||||||
|
get_element_by_id,
|
||||||
|
js_to_json,
|
||||||
|
int_or_none,
|
||||||
|
long_to_bytes,
|
||||||
|
pkcs1pad,
|
||||||
|
remove_end,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class DaisukiIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?daisuki\.net/[^/]+/[^/]+/[^/]+/watch\.[^.]+\.(?P<id>\d+)\.html'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.daisuki.net/tw/en/anime/watch.TheIdolMasterCG.11213.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '11213',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '#01 Who is in the pumpkin carriage? - THE IDOLM@STER CINDERELLA GIRLS',
|
||||||
|
'subtitles': {
|
||||||
|
'mul': [{
|
||||||
|
'ext': 'ttml',
|
||||||
|
}],
|
||||||
|
},
|
||||||
|
'creator': 'BANDAI NAMCO Entertainment',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True, # AES-encrypted HLS stream
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
# The public key in PEM format can be found in clientlibs_anime_watch.min.js
|
||||||
|
_RSA_KEY = (0xc5524c25e8e14b366b3754940beeb6f96cb7e2feef0b932c7659a0c5c3bf173d602464c2df73d693b513ae06ff1be8f367529ab30bf969c5640522181f2a0c51ea546ae120d3d8d908595e4eff765b389cde080a1ef7f1bbfb07411cc568db73b7f521cedf270cbfbe0ddbc29b1ac9d0f2d8f4359098caffee6d07915020077d, 65537)
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
flashvars = self._parse_json(self._search_regex(
|
||||||
|
r'(?s)var\s+flashvars\s*=\s*({.+?});', webpage, 'flashvars'),
|
||||||
|
video_id, transform_source=js_to_json)
|
||||||
|
|
||||||
|
iv = [0] * 16
|
||||||
|
|
||||||
|
data = {}
|
||||||
|
for key in ('device_cd', 'mv_id', 'ss1_prm', 'ss2_prm', 'ss3_prm', 'ss_id'):
|
||||||
|
data[key] = flashvars.get(key, '')
|
||||||
|
|
||||||
|
encrypted_rtn = None
|
||||||
|
|
||||||
|
# Some AES keys are rejected. Try it with different AES keys
|
||||||
|
for idx in range(5):
|
||||||
|
aes_key = [random.randint(0, 254) for _ in range(32)]
|
||||||
|
padded_aeskey = intlist_to_bytes(pkcs1pad(aes_key, 128))
|
||||||
|
|
||||||
|
n, e = self._RSA_KEY
|
||||||
|
encrypted_aeskey = long_to_bytes(pow(bytes_to_long(padded_aeskey), e, n))
|
||||||
|
init_data = self._download_json('http://www.daisuki.net/bin/bgn/init', video_id, query={
|
||||||
|
's': flashvars.get('s', ''),
|
||||||
|
'c': flashvars.get('ss3_prm', ''),
|
||||||
|
'e': url,
|
||||||
|
'd': base64.b64encode(intlist_to_bytes(aes_cbc_encrypt(
|
||||||
|
bytes_to_intlist(json.dumps(data)),
|
||||||
|
aes_key, iv))).decode('ascii'),
|
||||||
|
'a': base64.b64encode(encrypted_aeskey).decode('ascii'),
|
||||||
|
}, note='Downloading JSON metadata' + (' (try #%d)' % (idx + 1) if idx > 0 else ''))
|
||||||
|
|
||||||
|
if 'rtn' in init_data:
|
||||||
|
encrypted_rtn = init_data['rtn']
|
||||||
|
break
|
||||||
|
|
||||||
|
self._sleep(5, video_id)
|
||||||
|
|
||||||
|
if encrypted_rtn is None:
|
||||||
|
raise ExtractorError('Failed to fetch init data')
|
||||||
|
|
||||||
|
rtn = self._parse_json(
|
||||||
|
intlist_to_bytes(aes_cbc_decrypt(bytes_to_intlist(
|
||||||
|
base64.b64decode(encrypted_rtn)),
|
||||||
|
aes_key, iv)).decode('utf-8').rstrip('\0'),
|
||||||
|
video_id)
|
||||||
|
|
||||||
|
formats = self._extract_m3u8_formats(
|
||||||
|
rtn['play_url'], video_id, ext='mp4', entry_protocol='m3u8_native')
|
||||||
|
|
||||||
|
title = remove_end(self._og_search_title(webpage), ' - DAISUKI')
|
||||||
|
|
||||||
|
creator = self._html_search_regex(
|
||||||
|
r'Creator\s*:\s*([^<]+)', webpage, 'creator', fatal=False)
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
caption_url = rtn.get('caption_url')
|
||||||
|
if caption_url:
|
||||||
|
# mul: multiple languages
|
||||||
|
subtitles['mul'] = [{
|
||||||
|
'url': caption_url,
|
||||||
|
'ext': 'ttml',
|
||||||
|
}]
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
'creator': creator,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class DaisukiPlaylistIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)daisuki\.net/[^/]+/[^/]+/[^/]+/detail\.(?P<id>[a-zA-Z0-9]+)\.html'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.daisuki.net/tw/en/anime/detail.TheIdolMasterCG.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'TheIdolMasterCG',
|
||||||
|
'title': 'THE IDOLM@STER CINDERELLA GIRLS',
|
||||||
|
'description': 'md5:0f2c028a9339f7a2c7fbf839edc5c5d8',
|
||||||
|
},
|
||||||
|
'playlist_count': 26,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
playlist_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, playlist_id)
|
||||||
|
|
||||||
|
episode_pattern = r'''(?sx)
|
||||||
|
<img[^>]+delay="[^"]+/(\d+)/movie\.jpg".+?
|
||||||
|
<p[^>]+class=".*?\bepisodeNumber\b.*?">(?:<a[^>]+>)?([^<]+)'''
|
||||||
|
entries = [{
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'url': url.replace('detail', 'watch').replace('.html', '.' + movie_id + '.html'),
|
||||||
|
'episode_id': episode_id,
|
||||||
|
'episode_number': int_or_none(episode_id),
|
||||||
|
} for movie_id, episode_id in re.findall(episode_pattern, webpage)]
|
||||||
|
|
||||||
|
playlist_title = remove_end(
|
||||||
|
self._og_search_title(webpage, fatal=False), ' - Anime - DAISUKI')
|
||||||
|
playlist_description = clean_html(get_element_by_id('synopsisTxt', webpage))
|
||||||
|
|
||||||
|
return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
|
||||||
@@ -1,17 +1,21 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_str
|
from ..compat import compat_str
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
extract_attributes,
|
extract_attributes,
|
||||||
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
parse_age_limit,
|
parse_age_limit,
|
||||||
ExtractorError,
|
remove_end,
|
||||||
|
unescapeHTML,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class DiscoveryGoIE(InfoExtractor):
|
class DiscoveryGoBaseIE(InfoExtractor):
|
||||||
_VALID_URL = r'''(?x)https?://(?:www\.)?(?:
|
_VALID_URL_TEMPLATE = r'''(?x)https?://(?:www\.)?(?:
|
||||||
discovery|
|
discovery|
|
||||||
investigationdiscovery|
|
investigationdiscovery|
|
||||||
discoverylife|
|
discoverylife|
|
||||||
@@ -21,18 +25,23 @@ class DiscoveryGoIE(InfoExtractor):
|
|||||||
sciencechannel|
|
sciencechannel|
|
||||||
tlc|
|
tlc|
|
||||||
velocitychannel
|
velocitychannel
|
||||||
)go\.com/(?:[^/]+/)*(?P<id>[^/?#&]+)'''
|
)go\.com/%s(?P<id>[^/?#&]+)'''
|
||||||
|
|
||||||
|
|
||||||
|
class DiscoveryGoIE(DiscoveryGoBaseIE):
|
||||||
|
_VALID_URL = DiscoveryGoBaseIE._VALID_URL_TEMPLATE % r'(?:[^/]+/)+'
|
||||||
|
_GEO_COUNTRIES = ['US']
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'https://www.discoverygo.com/love-at-first-kiss/kiss-first-ask-questions-later/',
|
'url': 'https://www.discoverygo.com/bering-sea-gold/reaper-madness/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '57a33c536b66d1cd0345eeb1',
|
'id': '58c167d86b66d12f2addeb01',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Kiss First, Ask Questions Later!',
|
'title': 'Reaper Madness',
|
||||||
'description': 'md5:fe923ba34050eae468bffae10831cb22',
|
'description': 'md5:09f2c625c99afb8946ed4fb7865f6e78',
|
||||||
'duration': 2579,
|
'duration': 2519,
|
||||||
'series': 'Love at First Kiss',
|
'series': 'Bering Sea Gold',
|
||||||
'season_number': 1,
|
'season_number': 8,
|
||||||
'episode_number': 1,
|
'episode_number': 6,
|
||||||
'age_limit': 14,
|
'age_limit': 14,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
@@ -113,3 +122,46 @@ class DiscoveryGoIE(InfoExtractor):
|
|||||||
'formats': formats,
|
'formats': formats,
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class DiscoveryGoPlaylistIE(DiscoveryGoBaseIE):
|
||||||
|
_VALID_URL = DiscoveryGoBaseIE._VALID_URL_TEMPLATE % ''
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://www.discoverygo.com/bering-sea-gold/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'bering-sea-gold',
|
||||||
|
'title': 'Bering Sea Gold',
|
||||||
|
'description': 'md5:cc5c6489835949043c0cc3ad66c2fa0e',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 6,
|
||||||
|
}
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def suitable(cls, url):
|
||||||
|
return False if DiscoveryGoIE.suitable(url) else super(
|
||||||
|
DiscoveryGoPlaylistIE, cls).suitable(url)
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
entries = []
|
||||||
|
for mobj in re.finditer(r'data-json=(["\'])(?P<json>{.+?})\1', webpage):
|
||||||
|
data = self._parse_json(
|
||||||
|
mobj.group('json'), display_id,
|
||||||
|
transform_source=unescapeHTML, fatal=False)
|
||||||
|
if not isinstance(data, dict) or data.get('type') != 'episode':
|
||||||
|
continue
|
||||||
|
episode_url = data.get('socialUrl')
|
||||||
|
if not episode_url:
|
||||||
|
continue
|
||||||
|
entries.append(self.url_result(
|
||||||
|
episode_url, ie=DiscoveryGoIE.ie_key(),
|
||||||
|
video_id=data.get('id')))
|
||||||
|
|
||||||
|
return self.playlist_result(
|
||||||
|
entries, display_id,
|
||||||
|
remove_end(self._og_search_title(
|
||||||
|
webpage, fatal=False), ' | Discovery GO'),
|
||||||
|
self._og_search_description(webpage))
|
||||||
|
|||||||
@@ -9,13 +9,13 @@ from ..compat import (
|
|||||||
compat_parse_qs,
|
compat_parse_qs,
|
||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
)
|
)
|
||||||
|
from ..utils import smuggle_url
|
||||||
|
|
||||||
|
|
||||||
class TlcDeIE(InfoExtractor):
|
class DiscoveryNetworksDeIE(InfoExtractor):
|
||||||
IE_NAME = 'tlc.de'
|
_VALID_URL = r'https?://(?:www\.)?(?:discovery|tlc|animalplanet|dmax)\.de/(?:.*#(?P<id>\d+)|(?:[^/]+/)*videos/(?P<title>[^/?#]+))'
|
||||||
_VALID_URL = r'https?://(?:www\.)?tlc\.de/(?:[^/]+/)*videos/(?P<title>[^/?#]+)?(?:.*#(?P<id>\d+))?'
|
|
||||||
|
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.tlc.de/sendungen/breaking-amish/videos/#3235167922001',
|
'url': 'http://www.tlc.de/sendungen/breaking-amish/videos/#3235167922001',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '3235167922001',
|
'id': '3235167922001',
|
||||||
@@ -29,7 +29,13 @@ class TlcDeIE(InfoExtractor):
|
|||||||
'upload_date': '20140404',
|
'upload_date': '20140404',
|
||||||
'uploader_id': '1659832546',
|
'uploader_id': '1659832546',
|
||||||
},
|
},
|
||||||
}
|
}, {
|
||||||
|
'url': 'http://www.dmax.de/programme/storage-hunters-uk/videos/storage-hunters-uk-episode-6/',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.discovery.de/#5332316765001',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1659832546/default_default/index.html?videoId=%s'
|
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1659832546/default_default/index.html?videoId=%s'
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@@ -39,5 +45,8 @@ class TlcDeIE(InfoExtractor):
|
|||||||
title = mobj.group('title')
|
title = mobj.group('title')
|
||||||
webpage = self._download_webpage(url, title)
|
webpage = self._download_webpage(url, title)
|
||||||
brightcove_legacy_url = BrightcoveLegacyIE._extract_brightcove_url(webpage)
|
brightcove_legacy_url = BrightcoveLegacyIE._extract_brightcove_url(webpage)
|
||||||
brightcove_id = compat_parse_qs(compat_urlparse.urlparse(brightcove_legacy_url).query)['@videoPlayer'][0]
|
brightcove_id = compat_parse_qs(compat_urlparse.urlparse(
|
||||||
return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id)
|
brightcove_legacy_url).query)['@videoPlayer'][0]
|
||||||
|
return self.url_result(smuggle_url(
|
||||||
|
self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, {'geo_countries': ['DE']}),
|
||||||
|
'BrightcoveNew', brightcove_id)
|
||||||
59
youtube_dl/extractor/discoveryvr.py
Normal file
59
youtube_dl/extractor/discoveryvr.py
Normal file
@@ -0,0 +1,59 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import parse_duration
|
||||||
|
|
||||||
|
|
||||||
|
class DiscoveryVRIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?discoveryvr\.com/watch/(?P<id>[^/?#]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.discoveryvr.com/watch/discovery-vr-an-introduction',
|
||||||
|
'md5': '32b1929798c464a54356378b7912eca4',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'discovery-vr-an-introduction',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Discovery VR - An Introduction',
|
||||||
|
'description': 'md5:80d418a10efb8899d9403e61d8790f06',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
bootstrap_data = self._search_regex(
|
||||||
|
r'root\.DVR\.bootstrapData\s+=\s+"({.+?})";',
|
||||||
|
webpage, 'bootstrap data')
|
||||||
|
bootstrap_data = self._parse_json(
|
||||||
|
bootstrap_data.encode('utf-8').decode('unicode_escape'),
|
||||||
|
display_id)
|
||||||
|
videos = self._parse_json(bootstrap_data['videos'], display_id)['allVideos']
|
||||||
|
video_data = next(video for video in videos if video.get('slug') == display_id)
|
||||||
|
|
||||||
|
series = video_data.get('showTitle')
|
||||||
|
title = episode = video_data.get('title') or series
|
||||||
|
if series and series != title:
|
||||||
|
title = '%s - %s' % (series, title)
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for f, format_id in (('cdnUriM3U8', 'mobi'), ('webVideoUrlSd', 'sd'), ('webVideoUrlHd', 'hd')):
|
||||||
|
f_url = video_data.get(f)
|
||||||
|
if not f_url:
|
||||||
|
continue
|
||||||
|
formats.append({
|
||||||
|
'format_id': format_id,
|
||||||
|
'url': f_url,
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': display_id,
|
||||||
|
'display_id': display_id,
|
||||||
|
'title': title,
|
||||||
|
'description': video_data.get('description'),
|
||||||
|
'thumbnail': video_data.get('thumbnail'),
|
||||||
|
'duration': parse_duration(video_data.get('runTime')),
|
||||||
|
'formats': formats,
|
||||||
|
'episode': episode,
|
||||||
|
'series': series,
|
||||||
|
}
|
||||||
@@ -1,15 +1,10 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import hashlib
|
|
||||||
import time
|
import time
|
||||||
import uuid
|
import hashlib
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
|
||||||
compat_str,
|
|
||||||
compat_urllib_parse_urlencode,
|
|
||||||
)
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
@@ -25,7 +20,7 @@ class DouyuTVIE(InfoExtractor):
|
|||||||
'id': '17732',
|
'id': '17732',
|
||||||
'display_id': 'iseven',
|
'display_id': 'iseven',
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'title': 're:^清晨醒脑!T-ara根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
'title': 're:^清晨醒脑!T-ARA根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||||
'description': r're:.*m7show@163\.com.*',
|
'description': r're:.*m7show@163\.com.*',
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
'uploader': '7师傅',
|
'uploader': '7师傅',
|
||||||
@@ -56,7 +51,7 @@ class DouyuTVIE(InfoExtractor):
|
|||||||
'id': '17732',
|
'id': '17732',
|
||||||
'display_id': '17732',
|
'display_id': '17732',
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'title': 're:^清晨醒脑!T-ara根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
'title': 're:^清晨醒脑!T-ARA根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||||
'description': r're:.*m7show@163\.com.*',
|
'description': r're:.*m7show@163\.com.*',
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
'uploader': '7师傅',
|
'uploader': '7师傅',
|
||||||
@@ -74,10 +69,6 @@ class DouyuTVIE(InfoExtractor):
|
|||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
# Decompile core.swf in webpage by ffdec "Search SWFs in memory". core.swf
|
|
||||||
# is encrypted originally, but ffdec can dump memory to get the decrypted one.
|
|
||||||
_API_KEY = 'A12Svb&%1UUmf@hC'
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
@@ -88,6 +79,7 @@ class DouyuTVIE(InfoExtractor):
|
|||||||
room_id = self._html_search_regex(
|
room_id = self._html_search_regex(
|
||||||
r'"room_id\\?"\s*:\s*(\d+),', page, 'room id')
|
r'"room_id\\?"\s*:\s*(\d+),', page, 'room id')
|
||||||
|
|
||||||
|
# Grab metadata from mobile API
|
||||||
room = self._download_json(
|
room = self._download_json(
|
||||||
'http://m.douyu.com/html5/live?roomId=%s' % room_id, video_id,
|
'http://m.douyu.com/html5/live?roomId=%s' % room_id, video_id,
|
||||||
note='Downloading room info')['data']
|
note='Downloading room info')['data']
|
||||||
@@ -96,38 +88,22 @@ class DouyuTVIE(InfoExtractor):
|
|||||||
if room.get('show_status') == '2':
|
if room.get('show_status') == '2':
|
||||||
raise ExtractorError('Live stream is offline', expected=True)
|
raise ExtractorError('Live stream is offline', expected=True)
|
||||||
|
|
||||||
tt = compat_str(int(time.time() / 60))
|
# Grab the URL from PC client API
|
||||||
did = uuid.uuid4().hex.upper()
|
# The m3u8 url from mobile API requires re-authentication every 5 minutes
|
||||||
|
tt = int(time.time())
|
||||||
sign_content = ''.join((room_id, did, self._API_KEY, tt))
|
signContent = 'lapi/live/thirdPart/getPlay/%s?aid=pcclient&rate=0&time=%d9TUk5fjjUjg9qIMH3sdnh' % (room_id, tt)
|
||||||
sign = hashlib.md5((sign_content).encode('utf-8')).hexdigest()
|
sign = hashlib.md5(signContent.encode('ascii')).hexdigest()
|
||||||
|
video_url = self._download_json(
|
||||||
flv_data = compat_urllib_parse_urlencode({
|
'http://coapi.douyucdn.cn/lapi/live/thirdPart/getPlay/' + room_id,
|
||||||
'cdn': 'ws',
|
video_id, note='Downloading video URL info',
|
||||||
'rate': '0',
|
query={'rate': 0}, headers={
|
||||||
'tt': tt,
|
'auth': sign,
|
||||||
'did': did,
|
'time': str(tt),
|
||||||
'sign': sign,
|
'aid': 'pcclient'
|
||||||
})
|
})['data']['live_url']
|
||||||
|
|
||||||
video_info = self._download_json(
|
|
||||||
'http://www.douyu.com/lapi/live/getPlay/%s' % room_id, video_id,
|
|
||||||
data=flv_data, note='Downloading video info',
|
|
||||||
headers={'Content-Type': 'application/x-www-form-urlencoded'})
|
|
||||||
|
|
||||||
error_code = video_info.get('error', 0)
|
|
||||||
if error_code is not 0:
|
|
||||||
raise ExtractorError(
|
|
||||||
'%s reported error %i' % (self.IE_NAME, error_code),
|
|
||||||
expected=True)
|
|
||||||
|
|
||||||
base_url = video_info['data']['rtmp_url']
|
|
||||||
live_path = video_info['data']['rtmp_live']
|
|
||||||
|
|
||||||
video_url = '%s/%s' % (base_url, live_path)
|
|
||||||
|
|
||||||
title = self._live_title(unescapeHTML(room['room_name']))
|
title = self._live_title(unescapeHTML(room['room_name']))
|
||||||
description = room.get('notice')
|
description = room.get('show_details')
|
||||||
thumbnail = room.get('room_src')
|
thumbnail = room.get('room_src')
|
||||||
uploader = room.get('nickname')
|
uploader = room.get('nickname')
|
||||||
|
|
||||||
|
|||||||
@@ -6,37 +6,24 @@ import re
|
|||||||
import time
|
import time
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_urlparse
|
from ..compat import (
|
||||||
|
compat_urlparse,
|
||||||
|
compat_HTTPError,
|
||||||
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
USER_AGENTS,
|
USER_AGENTS,
|
||||||
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
unified_strdate,
|
||||||
|
remove_end,
|
||||||
update_url_query,
|
update_url_query,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class DPlayIE(InfoExtractor):
|
class DPlayIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?P<domain>it\.dplay\.com|www\.dplay\.(?:dk|se|no))/[^/]+/(?P<id>[^/?#]+)'
|
_VALID_URL = r'https?://(?P<domain>www\.dplay\.(?:dk|se|no))/[^/]+/(?P<id>[^/?#]+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# geo restricted, via direct unsigned hls URL
|
|
||||||
'url': 'http://it.dplay.com/take-me-out/stagione-1-episodio-25/',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '1255600',
|
|
||||||
'display_id': 'stagione-1-episodio-25',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Episodio 25',
|
|
||||||
'description': 'md5:cae5f40ad988811b197d2d27a53227eb',
|
|
||||||
'duration': 2761,
|
|
||||||
'timestamp': 1454701800,
|
|
||||||
'upload_date': '20160205',
|
|
||||||
'creator': 'RTIT',
|
|
||||||
'series': 'Take me out',
|
|
||||||
'season_number': 1,
|
|
||||||
'episode_number': 25,
|
|
||||||
'age_limit': 0,
|
|
||||||
},
|
|
||||||
'expected_warnings': ['Unable to download f4m manifest'],
|
|
||||||
}, {
|
|
||||||
# non geo restricted, via secure api, unsigned download hls URL
|
# non geo restricted, via secure api, unsigned download hls URL
|
||||||
'url': 'http://www.dplay.se/nugammalt-77-handelser-som-format-sverige/season-1-svensken-lar-sig-njuta-av-livet/',
|
'url': 'http://www.dplay.se/nugammalt-77-handelser-som-format-sverige/season-1-svensken-lar-sig-njuta-av-livet/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@@ -168,3 +155,90 @@ class DPlayIE(InfoExtractor):
|
|||||||
'formats': formats,
|
'formats': formats,
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class DPlayItIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://it\.dplay\.com/[^/]+/[^/]+/(?P<id>[^/?#]+)'
|
||||||
|
_GEO_COUNTRIES = ['IT']
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://it.dplay.com/nove/biografie-imbarazzanti/luigi-di-maio-la-psicosi-di-stanislawskij/',
|
||||||
|
'md5': '2b808ffb00fc47b884a172ca5d13053c',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '6918',
|
||||||
|
'display_id': 'luigi-di-maio-la-psicosi-di-stanislawskij',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Biografie imbarazzanti: Luigi Di Maio: la psicosi di Stanislawskij',
|
||||||
|
'description': 'md5:3c7a4303aef85868f867a26f5cc14813',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpe?g',
|
||||||
|
'upload_date': '20160524',
|
||||||
|
'series': 'Biografie imbarazzanti',
|
||||||
|
'season_number': 1,
|
||||||
|
'episode': 'Luigi Di Maio: la psicosi di Stanislawskij',
|
||||||
|
'episode_number': 1,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
info_url = self._search_regex(
|
||||||
|
r'url\s*:\s*["\']((?:https?:)?//[^/]+/playback/videoPlaybackInfo/\d+)',
|
||||||
|
webpage, 'video id')
|
||||||
|
|
||||||
|
title = remove_end(self._og_search_title(webpage), ' | Dplay')
|
||||||
|
|
||||||
|
try:
|
||||||
|
info = self._download_json(
|
||||||
|
info_url, display_id, headers={
|
||||||
|
'Authorization': 'Bearer %s' % self._get_cookies(url).get(
|
||||||
|
'dplayit_token').value,
|
||||||
|
'Referer': url,
|
||||||
|
})
|
||||||
|
except ExtractorError as e:
|
||||||
|
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (400, 403):
|
||||||
|
info = self._parse_json(e.cause.read().decode('utf-8'), display_id)
|
||||||
|
error = info['errors'][0]
|
||||||
|
if error.get('code') == 'access.denied.geoblocked':
|
||||||
|
self.raise_geo_restricted(
|
||||||
|
msg=error.get('detail'), countries=self._GEO_COUNTRIES)
|
||||||
|
raise ExtractorError(info['errors'][0]['detail'], expected=True)
|
||||||
|
raise
|
||||||
|
|
||||||
|
hls_url = info['data']['attributes']['streaming']['hls']['url']
|
||||||
|
|
||||||
|
formats = self._extract_m3u8_formats(
|
||||||
|
hls_url, display_id, ext='mp4', entry_protocol='m3u8_native',
|
||||||
|
m3u8_id='hls')
|
||||||
|
|
||||||
|
series = self._html_search_regex(
|
||||||
|
r'(?s)<h1[^>]+class=["\'].*?\bshow_title\b.*?["\'][^>]*>(.+?)</h1>',
|
||||||
|
webpage, 'series', fatal=False)
|
||||||
|
episode = self._search_regex(
|
||||||
|
r'<p[^>]+class=["\'].*?\bdesc_ep\b.*?["\'][^>]*>\s*<br/>\s*<b>([^<]+)',
|
||||||
|
webpage, 'episode', fatal=False)
|
||||||
|
|
||||||
|
mobj = re.search(
|
||||||
|
r'(?s)<span[^>]+class=["\']dates["\'][^>]*>.+?\bS\.(?P<season_number>\d+)\s+E\.(?P<episode_number>\d+)\s*-\s*(?P<upload_date>\d{2}/\d{2}/\d{4})',
|
||||||
|
webpage)
|
||||||
|
if mobj:
|
||||||
|
season_number = int(mobj.group('season_number'))
|
||||||
|
episode_number = int(mobj.group('episode_number'))
|
||||||
|
upload_date = unified_strdate(mobj.group('upload_date'))
|
||||||
|
else:
|
||||||
|
season_number = episode_number = upload_date = None
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': info_url.rpartition('/')[-1],
|
||||||
|
'display_id': display_id,
|
||||||
|
'title': title,
|
||||||
|
'description': self._og_search_description(webpage),
|
||||||
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
|
'series': series,
|
||||||
|
'season_number': season_number,
|
||||||
|
'episode': episode,
|
||||||
|
'episode_number': episode_number,
|
||||||
|
'upload_date': upload_date,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
||||||
|
|||||||
@@ -20,6 +20,7 @@ from ..utils import (
|
|||||||
class DramaFeverBaseIE(AMPIE):
|
class DramaFeverBaseIE(AMPIE):
|
||||||
_LOGIN_URL = 'https://www.dramafever.com/accounts/login/'
|
_LOGIN_URL = 'https://www.dramafever.com/accounts/login/'
|
||||||
_NETRC_MACHINE = 'dramafever'
|
_NETRC_MACHINE = 'dramafever'
|
||||||
|
_GEO_COUNTRIES = ['US', 'CA']
|
||||||
|
|
||||||
_CONSUMER_SECRET = 'DA59dtVXYLxajktV'
|
_CONSUMER_SECRET = 'DA59dtVXYLxajktV'
|
||||||
|
|
||||||
@@ -116,8 +117,9 @@ class DramaFeverIE(DramaFeverBaseIE):
|
|||||||
'http://www.dramafever.com/amp/episode/feed.json?guid=%s' % video_id)
|
'http://www.dramafever.com/amp/episode/feed.json?guid=%s' % video_id)
|
||||||
except ExtractorError as e:
|
except ExtractorError as e:
|
||||||
if isinstance(e.cause, compat_HTTPError):
|
if isinstance(e.cause, compat_HTTPError):
|
||||||
raise ExtractorError(
|
self.raise_geo_restricted(
|
||||||
'Currently unavailable in your country.', expected=True)
|
msg='Currently unavailable in your country',
|
||||||
|
countries=self._GEO_COUNTRIES)
|
||||||
raise
|
raise
|
||||||
|
|
||||||
series_id, episode_number = video_id.split('.')
|
series_id, episode_number = video_id.split('.')
|
||||||
|
|||||||
@@ -15,6 +15,8 @@ from ..utils import (
|
|||||||
|
|
||||||
class DRTVIE(InfoExtractor):
|
class DRTVIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?dr\.dk/(?:tv/se|nyheder|radio/ondemand)/(?:[^/]+/)*(?P<id>[\da-z-]+)(?:[/#?]|$)'
|
_VALID_URL = r'https?://(?:www\.)?dr\.dk/(?:tv/se|nyheder|radio/ondemand)/(?:[^/]+/)*(?P<id>[\da-z-]+)(?:[/#?]|$)'
|
||||||
|
_GEO_BYPASS = False
|
||||||
|
_GEO_COUNTRIES = ['DK']
|
||||||
IE_NAME = 'drtv'
|
IE_NAME = 'drtv'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.dr.dk/tv/se/boern/ultra/klassen-ultra/klassen-darlig-taber-10',
|
'url': 'https://www.dr.dk/tv/se/boern/ultra/klassen-ultra/klassen-darlig-taber-10',
|
||||||
@@ -137,7 +139,7 @@ class DRTVIE(InfoExtractor):
|
|||||||
if not formats and restricted_to_denmark:
|
if not formats and restricted_to_denmark:
|
||||||
self.raise_geo_restricted(
|
self.raise_geo_restricted(
|
||||||
'Unfortunately, DR is not allowed to show this program outside Denmark.',
|
'Unfortunately, DR is not allowed to show this program outside Denmark.',
|
||||||
expected=True)
|
countries=self._GEO_COUNTRIES)
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
@@ -156,6 +158,7 @@ class DRTVIE(InfoExtractor):
|
|||||||
class DRTVLiveIE(InfoExtractor):
|
class DRTVLiveIE(InfoExtractor):
|
||||||
IE_NAME = 'drtv:live'
|
IE_NAME = 'drtv:live'
|
||||||
_VALID_URL = r'https?://(?:www\.)?dr\.dk/(?:tv|TV)/live/(?P<id>[\da-z-]+)'
|
_VALID_URL = r'https?://(?:www\.)?dr\.dk/(?:tv|TV)/live/(?P<id>[\da-z-]+)'
|
||||||
|
_GEO_COUNTRIES = ['DK']
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'https://www.dr.dk/tv/live/dr1',
|
'url': 'https://www.dr.dk/tv/live/dr1',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
|||||||
@@ -18,8 +18,8 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class EinthusanIE(InfoExtractor):
|
class EinthusanIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://einthusan\.tv/movie/watch/(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://einthusan\.tv/movie/watch/(?P<id>[^/?#&]+)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'https://einthusan.tv/movie/watch/9097/',
|
'url': 'https://einthusan.tv/movie/watch/9097/',
|
||||||
'md5': 'ff0f7f2065031b8a2cf13a933731c035',
|
'md5': 'ff0f7f2065031b8a2cf13a933731c035',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@@ -29,7 +29,10 @@ class EinthusanIE(InfoExtractor):
|
|||||||
'description': 'md5:33ef934c82a671a94652a9b4e54d931b',
|
'description': 'md5:33ef934c82a671a94652a9b4e54d931b',
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
}
|
}
|
||||||
}
|
}, {
|
||||||
|
'url': 'https://einthusan.tv/movie/watch/51MZ/?lang=hindi',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
# reversed from jsoncrypto.prototype.decrypt() in einthusan-PGMovieWatcher.js
|
# reversed from jsoncrypto.prototype.decrypt() in einthusan-PGMovieWatcher.js
|
||||||
def _decrypt(self, encrypted_data, video_id):
|
def _decrypt(self, encrypted_data, video_id):
|
||||||
|
|||||||
@@ -1,13 +1,9 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import json
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from .kaltura import KalturaIE
|
||||||
ExtractorError,
|
from ..utils import NO_DEFAULT
|
||||||
NO_DEFAULT,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class EllenTVIE(InfoExtractor):
|
class EllenTVIE(InfoExtractor):
|
||||||
@@ -65,7 +61,7 @@ class EllenTVIE(InfoExtractor):
|
|||||||
if partner_id and kaltura_id:
|
if partner_id and kaltura_id:
|
||||||
break
|
break
|
||||||
|
|
||||||
return self.url_result('kaltura:%s:%s' % (partner_id, kaltura_id), 'Kaltura')
|
return self.url_result('kaltura:%s:%s' % (partner_id, kaltura_id), KalturaIE.ie_key())
|
||||||
|
|
||||||
|
|
||||||
class EllenTVClipsIE(InfoExtractor):
|
class EllenTVClipsIE(InfoExtractor):
|
||||||
@@ -77,14 +73,14 @@ class EllenTVClipsIE(InfoExtractor):
|
|||||||
'id': 'meryl-streep-vanessa-hudgens',
|
'id': 'meryl-streep-vanessa-hudgens',
|
||||||
'title': 'Meryl Streep, Vanessa Hudgens',
|
'title': 'Meryl Streep, Vanessa Hudgens',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 7,
|
'playlist_mincount': 5,
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
playlist_id = self._match_id(url)
|
playlist_id = self._match_id(url)
|
||||||
|
|
||||||
webpage = self._download_webpage(url, playlist_id)
|
webpage = self._download_webpage(url, playlist_id)
|
||||||
playlist = self._extract_playlist(webpage)
|
playlist = self._extract_playlist(webpage, playlist_id)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'_type': 'playlist',
|
'_type': 'playlist',
|
||||||
@@ -93,16 +89,13 @@ class EllenTVClipsIE(InfoExtractor):
|
|||||||
'entries': self._extract_entries(playlist)
|
'entries': self._extract_entries(playlist)
|
||||||
}
|
}
|
||||||
|
|
||||||
def _extract_playlist(self, webpage):
|
def _extract_playlist(self, webpage, playlist_id):
|
||||||
json_string = self._search_regex(r'playerView.addClips\(\[\{(.*?)\}\]\);', webpage, 'json')
|
json_string = self._search_regex(r'playerView.addClips\(\[\{(.*?)\}\]\);', webpage, 'json')
|
||||||
try:
|
return self._parse_json('[{' + json_string + '}]', playlist_id)
|
||||||
return json.loads('[{' + json_string + '}]')
|
|
||||||
except ValueError as ve:
|
|
||||||
raise ExtractorError('Failed to download JSON', cause=ve)
|
|
||||||
|
|
||||||
def _extract_entries(self, playlist):
|
def _extract_entries(self, playlist):
|
||||||
return [
|
return [
|
||||||
self.url_result(
|
self.url_result(
|
||||||
'kaltura:%s:%s' % (item['kaltura_partner_id'], item['kaltura_entry_id']),
|
'kaltura:%s:%s' % (item['kaltura_partner_id'], item['kaltura_entry_id']),
|
||||||
'Kaltura')
|
KalturaIE.ie_key(), video_id=item['kaltura_entry_id'])
|
||||||
for item in playlist]
|
for item in playlist]
|
||||||
|
|||||||
@@ -39,6 +39,18 @@ class ElPaisIE(InfoExtractor):
|
|||||||
'description': 'La nave portaba cientos de ánforas y se hundió cerca de la isla de Cabrera por razones desconocidas',
|
'description': 'La nave portaba cientos de ánforas y se hundió cerca de la isla de Cabrera por razones desconocidas',
|
||||||
'upload_date': '20170127',
|
'upload_date': '20170127',
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://epv.elpais.com/epv/2017/02/14/programa_la_voz_de_inaki/1487062137_075943.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1487062137_075943',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Disyuntivas',
|
||||||
|
'description': 'md5:a0fb1485c4a6a8a917e6f93878e66218',
|
||||||
|
'upload_date': '20170214',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@@ -59,14 +71,15 @@ class ElPaisIE(InfoExtractor):
|
|||||||
video_url = prefix + video_suffix
|
video_url = prefix + video_suffix
|
||||||
thumbnail_suffix = self._search_regex(
|
thumbnail_suffix = self._search_regex(
|
||||||
r"(?:URLMediaStill|urlFotogramaFijo_\d+)\s*=\s*url_cache\s*\+\s*'([^']+)'",
|
r"(?:URLMediaStill|urlFotogramaFijo_\d+)\s*=\s*url_cache\s*\+\s*'([^']+)'",
|
||||||
webpage, 'thumbnail URL', fatal=False)
|
webpage, 'thumbnail URL', default=None)
|
||||||
thumbnail = (
|
thumbnail = (
|
||||||
None if thumbnail_suffix is None
|
None if thumbnail_suffix is None
|
||||||
else prefix + thumbnail_suffix)
|
else prefix + thumbnail_suffix) or self._og_search_thumbnail(webpage)
|
||||||
title = self._html_search_regex(
|
title = self._html_search_regex(
|
||||||
(r"tituloVideo\s*=\s*'([^']+)'", webpage, 'title',
|
(r"tituloVideo\s*=\s*'([^']+)'",
|
||||||
r'<h2 class="entry-header entry-title.*?>(.*?)</h2>'),
|
r'<h2 class="entry-header entry-title.*?>(.*?)</h2>',
|
||||||
webpage, 'title')
|
r'<h1[^>]+class="titulo"[^>]*>([^<]+)'),
|
||||||
|
webpage, 'title', default=None) or self._og_search_title(webpage)
|
||||||
upload_date = unified_strdate(self._search_regex(
|
upload_date = unified_strdate(self._search_regex(
|
||||||
r'<p class="date-header date-int updated"\s+title="([^"]+)">',
|
r'<p class="date-header date-int updated"\s+title="([^"]+)">',
|
||||||
webpage, 'upload date', default=None) or self._html_search_meta(
|
webpage, 'upload date', default=None) or self._html_search_meta(
|
||||||
|
|||||||
39
youtube_dl/extractor/etonline.py
Normal file
39
youtube_dl/extractor/etonline.py
Normal file
@@ -0,0 +1,39 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class ETOnlineIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?etonline\.com/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://www.etonline.com/tv/211130_dove_cameron_liv_and_maddie_emotional_episode_series_finale/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '211130_dove_cameron_liv_and_maddie_emotional_episode_series_finale',
|
||||||
|
'title': 'md5:a21ec7d3872ed98335cbd2a046f34ee6',
|
||||||
|
'description': 'md5:8b94484063f463cca709617c79618ccd',
|
||||||
|
},
|
||||||
|
'playlist_count': 2,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.etonline.com/media/video/here_are_the_stars_who_love_bringing_their_moms_as_dates_to_the_oscars-211359/',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1242911076001/default_default/index.html?videoId=ref:%s'
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
playlist_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, playlist_id)
|
||||||
|
|
||||||
|
entries = [
|
||||||
|
self.url_result(
|
||||||
|
self.BRIGHTCOVE_URL_TEMPLATE % video_id, 'BrightcoveNew', video_id)
|
||||||
|
for video_id in re.findall(
|
||||||
|
r'site\.brightcove\s*\([^,]+,\s*["\'](title_\d+)', webpage)]
|
||||||
|
|
||||||
|
return self.playlist_result(
|
||||||
|
entries, playlist_id,
|
||||||
|
self._og_search_title(webpage, fatal=False),
|
||||||
|
self._og_search_description(webpage))
|
||||||
@@ -19,6 +19,7 @@ from .acast import (
|
|||||||
ACastChannelIE,
|
ACastChannelIE,
|
||||||
)
|
)
|
||||||
from .addanime import AddAnimeIE
|
from .addanime import AddAnimeIE
|
||||||
|
from .adn import ADNIE
|
||||||
from .adobetv import (
|
from .adobetv import (
|
||||||
AdobeTVIE,
|
AdobeTVIE,
|
||||||
AdobeTVShowIE,
|
AdobeTVShowIE,
|
||||||
@@ -71,6 +72,7 @@ from .arte import (
|
|||||||
)
|
)
|
||||||
from .atresplayer import AtresPlayerIE
|
from .atresplayer import AtresPlayerIE
|
||||||
from .atttechchannel import ATTTechChannelIE
|
from .atttechchannel import ATTTechChannelIE
|
||||||
|
from .atvat import ATVAtIE
|
||||||
from .audimedia import AudiMediaIE
|
from .audimedia import AudiMediaIE
|
||||||
from .audioboom import AudioBoomIE
|
from .audioboom import AudioBoomIE
|
||||||
from .audiomack import AudiomackIE, AudiomackAlbumIE
|
from .audiomack import AudiomackIE, AudiomackAlbumIE
|
||||||
@@ -83,6 +85,7 @@ from .awaan import (
|
|||||||
from .azmedien import (
|
from .azmedien import (
|
||||||
AZMedienIE,
|
AZMedienIE,
|
||||||
AZMedienPlaylistIE,
|
AZMedienPlaylistIE,
|
||||||
|
AZMedienShowPlaylistIE,
|
||||||
)
|
)
|
||||||
from .azubu import AzubuIE, AzubuLiveIE
|
from .azubu import AzubuIE, AzubuLiveIE
|
||||||
from .baidu import BaiduVideoIE
|
from .baidu import BaiduVideoIE
|
||||||
@@ -116,6 +119,7 @@ from .bleacherreport import (
|
|||||||
from .blinkx import BlinkxIE
|
from .blinkx import BlinkxIE
|
||||||
from .bloomberg import BloombergIE
|
from .bloomberg import BloombergIE
|
||||||
from .bokecc import BokeCCIE
|
from .bokecc import BokeCCIE
|
||||||
|
from .bostonglobe import BostonGlobeIE
|
||||||
from .bpb import BpbIE
|
from .bpb import BpbIE
|
||||||
from .br import BRIE
|
from .br import BRIE
|
||||||
from .bravotv import BravoTVIE
|
from .bravotv import BravoTVIE
|
||||||
@@ -161,7 +165,10 @@ from .ccc import CCCIE
|
|||||||
from .ccma import CCMAIE
|
from .ccma import CCMAIE
|
||||||
from .cctv import CCTVIE
|
from .cctv import CCTVIE
|
||||||
from .cda import CDAIE
|
from .cda import CDAIE
|
||||||
from .ceskatelevize import CeskaTelevizeIE
|
from .ceskatelevize import (
|
||||||
|
CeskaTelevizeIE,
|
||||||
|
CeskaTelevizePoradyIE,
|
||||||
|
)
|
||||||
from .channel9 import Channel9IE
|
from .channel9 import Channel9IE
|
||||||
from .charlierose import CharlieRoseIE
|
from .charlierose import CharlieRoseIE
|
||||||
from .chaturbate import ChaturbateIE
|
from .chaturbate import ChaturbateIE
|
||||||
@@ -227,6 +234,10 @@ from .dailymotion import (
|
|||||||
DailymotionUserIE,
|
DailymotionUserIE,
|
||||||
DailymotionCloudIE,
|
DailymotionCloudIE,
|
||||||
)
|
)
|
||||||
|
from .daisuki import (
|
||||||
|
DaisukiIE,
|
||||||
|
DaisukiPlaylistIE,
|
||||||
|
)
|
||||||
from .daum import (
|
from .daum import (
|
||||||
DaumIE,
|
DaumIE,
|
||||||
DaumClipIE,
|
DaumClipIE,
|
||||||
@@ -241,7 +252,10 @@ from .dfb import DFBIE
|
|||||||
from .dhm import DHMIE
|
from .dhm import DHMIE
|
||||||
from .dotsub import DotsubIE
|
from .dotsub import DotsubIE
|
||||||
from .douyutv import DouyuTVIE
|
from .douyutv import DouyuTVIE
|
||||||
from .dplay import DPlayIE
|
from .dplay import (
|
||||||
|
DPlayIE,
|
||||||
|
DPlayItIE,
|
||||||
|
)
|
||||||
from .dramafever import (
|
from .dramafever import (
|
||||||
DramaFeverIE,
|
DramaFeverIE,
|
||||||
DramaFeverSeriesIE,
|
DramaFeverSeriesIE,
|
||||||
@@ -257,7 +271,12 @@ from .dvtv import DVTVIE
|
|||||||
from .dumpert import DumpertIE
|
from .dumpert import DumpertIE
|
||||||
from .defense import DefenseGouvFrIE
|
from .defense import DefenseGouvFrIE
|
||||||
from .discovery import DiscoveryIE
|
from .discovery import DiscoveryIE
|
||||||
from .discoverygo import DiscoveryGoIE
|
from .discoverygo import (
|
||||||
|
DiscoveryGoIE,
|
||||||
|
DiscoveryGoPlaylistIE,
|
||||||
|
)
|
||||||
|
from .discoverynetworks import DiscoveryNetworksDeIE
|
||||||
|
from .discoveryvr import DiscoveryVRIE
|
||||||
from .disney import DisneyIE
|
from .disney import DisneyIE
|
||||||
from .dispeak import DigitallySpeakingIE
|
from .dispeak import DigitallySpeakingIE
|
||||||
from .dropbox import DropboxIE
|
from .dropbox import DropboxIE
|
||||||
@@ -288,6 +307,7 @@ from .espn import (
|
|||||||
ESPNArticleIE,
|
ESPNArticleIE,
|
||||||
)
|
)
|
||||||
from .esri import EsriVideoIE
|
from .esri import EsriVideoIE
|
||||||
|
from .etonline import ETOnlineIE
|
||||||
from .europa import EuropaIE
|
from .europa import EuropaIE
|
||||||
from .everyonesmixtape import EveryonesMixtapeIE
|
from .everyonesmixtape import EveryonesMixtapeIE
|
||||||
from .expotv import ExpoTVIE
|
from .expotv import ExpoTVIE
|
||||||
@@ -338,6 +358,7 @@ from .francetv import (
|
|||||||
)
|
)
|
||||||
from .freesound import FreesoundIE
|
from .freesound import FreesoundIE
|
||||||
from .freespeech import FreespeechIE
|
from .freespeech import FreespeechIE
|
||||||
|
from .freshlive import FreshLiveIE
|
||||||
from .funimation import FunimationIE
|
from .funimation import FunimationIE
|
||||||
from .funnyordie import FunnyOrDieIE
|
from .funnyordie import FunnyOrDieIE
|
||||||
from .fusion import FusionIE
|
from .fusion import FusionIE
|
||||||
@@ -363,6 +384,7 @@ from .globo import (
|
|||||||
GloboArticleIE,
|
GloboArticleIE,
|
||||||
)
|
)
|
||||||
from .go import GoIE
|
from .go import GoIE
|
||||||
|
from .go90 import Go90IE
|
||||||
from .godtube import GodTubeIE
|
from .godtube import GodTubeIE
|
||||||
from .godtv import GodTVIE
|
from .godtv import GodTVIE
|
||||||
from .golem import GolemIE
|
from .golem import GolemIE
|
||||||
@@ -520,6 +542,7 @@ from .mangomolo import (
|
|||||||
)
|
)
|
||||||
from .matchtv import MatchTVIE
|
from .matchtv import MatchTVIE
|
||||||
from .mdr import MDRIE
|
from .mdr import MDRIE
|
||||||
|
from .medici import MediciIE
|
||||||
from .meipai import MeipaiIE
|
from .meipai import MeipaiIE
|
||||||
from .melonvod import MelonVODIE
|
from .melonvod import MelonVODIE
|
||||||
from .meta import METAIE
|
from .meta import METAIE
|
||||||
@@ -637,6 +660,7 @@ from .ninecninemedia import (
|
|||||||
from .ninegag import NineGagIE
|
from .ninegag import NineGagIE
|
||||||
from .ninenow import NineNowIE
|
from .ninenow import NineNowIE
|
||||||
from .nintendo import NintendoIE
|
from .nintendo import NintendoIE
|
||||||
|
from .njpwworld import NJPWWorldIE
|
||||||
from .nobelprize import NobelPrizeIE
|
from .nobelprize import NobelPrizeIE
|
||||||
from .noco import NocoIE
|
from .noco import NocoIE
|
||||||
from .normalboots import NormalbootsIE
|
from .normalboots import NormalbootsIE
|
||||||
@@ -666,6 +690,7 @@ from .npo import (
|
|||||||
NPORadioIE,
|
NPORadioIE,
|
||||||
NPORadioFragmentIE,
|
NPORadioFragmentIE,
|
||||||
SchoolTVIE,
|
SchoolTVIE,
|
||||||
|
HetKlokhuisIE,
|
||||||
VPROIE,
|
VPROIE,
|
||||||
WNLIE,
|
WNLIE,
|
||||||
)
|
)
|
||||||
@@ -710,6 +735,10 @@ from .orf import (
|
|||||||
ORFFM4IE,
|
ORFFM4IE,
|
||||||
ORFIPTVIE,
|
ORFIPTVIE,
|
||||||
)
|
)
|
||||||
|
from .packtpub import (
|
||||||
|
PacktPubIE,
|
||||||
|
PacktPubCourseIE,
|
||||||
|
)
|
||||||
from .pandatv import PandaTVIE
|
from .pandatv import PandaTVIE
|
||||||
from .pandoratv import PandoraTVIE
|
from .pandoratv import PandoraTVIE
|
||||||
from .parliamentliveuk import ParliamentLiveUKIE
|
from .parliamentliveuk import ParliamentLiveUKIE
|
||||||
@@ -779,11 +808,12 @@ from .radiojavan import RadioJavanIE
|
|||||||
from .radiobremen import RadioBremenIE
|
from .radiobremen import RadioBremenIE
|
||||||
from .radiofrance import RadioFranceIE
|
from .radiofrance import RadioFranceIE
|
||||||
from .rai import (
|
from .rai import (
|
||||||
RaiTVIE,
|
RaiPlayIE,
|
||||||
RaiIE,
|
RaiIE,
|
||||||
)
|
)
|
||||||
from .rbmaradio import RBMARadioIE
|
from .rbmaradio import RBMARadioIE
|
||||||
from .rds import RDSIE
|
from .rds import RDSIE
|
||||||
|
from .redbulltv import RedBullTVIE
|
||||||
from .redtube import RedTubeIE
|
from .redtube import RedTubeIE
|
||||||
from .regiotv import RegioTVIE
|
from .regiotv import RegioTVIE
|
||||||
from .rentv import (
|
from .rentv import (
|
||||||
@@ -809,7 +839,11 @@ from .rozhlas import RozhlasIE
|
|||||||
from .rtbf import RTBFIE
|
from .rtbf import RTBFIE
|
||||||
from .rte import RteIE, RteRadioIE
|
from .rte import RteIE, RteRadioIE
|
||||||
from .rtlnl import RtlNlIE
|
from .rtlnl import RtlNlIE
|
||||||
from .rtl2 import RTL2IE
|
from .rtl2 import (
|
||||||
|
RTL2IE,
|
||||||
|
RTL2YouIE,
|
||||||
|
RTL2YouSeriesIE,
|
||||||
|
)
|
||||||
from .rtp import RTPIE
|
from .rtp import RTPIE
|
||||||
from .rts import RTSIE
|
from .rts import RTSIE
|
||||||
from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE, RTVELiveIE, RTVETelevisionIE
|
from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE, RTVELiveIE, RTVETelevisionIE
|
||||||
@@ -835,7 +869,6 @@ from .safari import (
|
|||||||
from .sapo import SapoIE
|
from .sapo import SapoIE
|
||||||
from .savefrom import SaveFromIE
|
from .savefrom import SaveFromIE
|
||||||
from .sbs import SBSIE
|
from .sbs import SBSIE
|
||||||
from .scivee import SciVeeIE
|
|
||||||
from .screencast import ScreencastIE
|
from .screencast import ScreencastIE
|
||||||
from .screencastomatic import ScreencastOMaticIE
|
from .screencastomatic import ScreencastOMaticIE
|
||||||
from .scrippsnetworks import ScrippsNetworksWatchIE
|
from .scrippsnetworks import ScrippsNetworksWatchIE
|
||||||
@@ -852,6 +885,7 @@ from .shared import (
|
|||||||
from .showroomlive import ShowRoomLiveIE
|
from .showroomlive import ShowRoomLiveIE
|
||||||
from .sina import SinaIE
|
from .sina import SinaIE
|
||||||
from .sixplay import SixPlayIE
|
from .sixplay import SixPlayIE
|
||||||
|
from .skylinewebcams import SkylineWebcamsIE
|
||||||
from .skynewsarabia import (
|
from .skynewsarabia import (
|
||||||
SkyNewsArabiaIE,
|
SkyNewsArabiaIE,
|
||||||
SkyNewsArabiaArticleIE,
|
SkyNewsArabiaArticleIE,
|
||||||
@@ -951,13 +985,13 @@ from .theplatform import (
|
|||||||
from .thescene import TheSceneIE
|
from .thescene import TheSceneIE
|
||||||
from .thesixtyone import TheSixtyOneIE
|
from .thesixtyone import TheSixtyOneIE
|
||||||
from .thestar import TheStarIE
|
from .thestar import TheStarIE
|
||||||
|
from .thesun import TheSunIE
|
||||||
from .theweatherchannel import TheWeatherChannelIE
|
from .theweatherchannel import TheWeatherChannelIE
|
||||||
from .thisamericanlife import ThisAmericanLifeIE
|
from .thisamericanlife import ThisAmericanLifeIE
|
||||||
from .thisav import ThisAVIE
|
from .thisav import ThisAVIE
|
||||||
from .thisoldhouse import ThisOldHouseIE
|
from .thisoldhouse import ThisOldHouseIE
|
||||||
from .threeqsdn import ThreeQSDNIE
|
from .threeqsdn import ThreeQSDNIE
|
||||||
from .tinypic import TinyPicIE
|
from .tinypic import TinyPicIE
|
||||||
from .tlc import TlcDeIE
|
|
||||||
from .tmz import (
|
from .tmz import (
|
||||||
TMZIE,
|
TMZIE,
|
||||||
TMZArticleIE,
|
TMZArticleIE,
|
||||||
@@ -970,6 +1004,7 @@ from .tnaflix import (
|
|||||||
)
|
)
|
||||||
from .toggle import ToggleIE
|
from .toggle import ToggleIE
|
||||||
from .tonline import TOnlineIE
|
from .tonline import TOnlineIE
|
||||||
|
from .toongoggles import ToonGogglesIE
|
||||||
from .toutv import TouTvIE
|
from .toutv import TouTvIE
|
||||||
from .toypics import ToypicsUserIE, ToypicsIE
|
from .toypics import ToypicsUserIE, ToypicsIE
|
||||||
from .traileraddict import TrailerAddictIE
|
from .traileraddict import TrailerAddictIE
|
||||||
@@ -990,14 +1025,17 @@ from .tunein import (
|
|||||||
TuneInTopicIE,
|
TuneInTopicIE,
|
||||||
TuneInShortenerIE,
|
TuneInShortenerIE,
|
||||||
)
|
)
|
||||||
|
from .tunepk import TunePkIE
|
||||||
from .turbo import TurboIE
|
from .turbo import TurboIE
|
||||||
from .tutv import TutvIE
|
from .tutv import TutvIE
|
||||||
from .tv2 import (
|
from .tv2 import (
|
||||||
TV2IE,
|
TV2IE,
|
||||||
TV2ArticleIE,
|
TV2ArticleIE,
|
||||||
)
|
)
|
||||||
|
from .tv2hu import TV2HuIE
|
||||||
from .tv3 import TV3IE
|
from .tv3 import TV3IE
|
||||||
from .tv4 import TV4IE
|
from .tv4 import TV4IE
|
||||||
|
from .tv5mondeplus import TV5MondePlusIE
|
||||||
from .tva import TVAIE
|
from .tva import TVAIE
|
||||||
from .tvanouvelles import (
|
from .tvanouvelles import (
|
||||||
TVANouvellesIE,
|
TVANouvellesIE,
|
||||||
@@ -1009,6 +1047,7 @@ from .tvc import (
|
|||||||
)
|
)
|
||||||
from .tvigle import TvigleIE
|
from .tvigle import TvigleIE
|
||||||
from .tvland import TVLandIE
|
from .tvland import TVLandIE
|
||||||
|
from .tvn24 import TVN24IE
|
||||||
from .tvnoe import TVNoeIE
|
from .tvnoe import TVNoeIE
|
||||||
from .tvp import (
|
from .tvp import (
|
||||||
TVPEmbedIE,
|
TVPEmbedIE,
|
||||||
@@ -1155,6 +1194,13 @@ from .voicerepublic import VoiceRepublicIE
|
|||||||
from .voxmedia import VoxMediaIE
|
from .voxmedia import VoxMediaIE
|
||||||
from .vporn import VpornIE
|
from .vporn import VpornIE
|
||||||
from .vrt import VRTIE
|
from .vrt import VRTIE
|
||||||
|
from .vrak import VrakIE
|
||||||
|
from .vrv import (
|
||||||
|
VRVIE,
|
||||||
|
VRVSeriesIE,
|
||||||
|
)
|
||||||
|
from .vshare import VShareIE
|
||||||
|
from .medialaan import MedialaanIE
|
||||||
from .vube import VubeIE
|
from .vube import VubeIE
|
||||||
from .vuclip import VuClipIE
|
from .vuclip import VuClipIE
|
||||||
from .vvvvid import VVVVIDIE
|
from .vvvvid import VVVVIDIE
|
||||||
|
|||||||
@@ -54,7 +54,7 @@ class EyedoTVIE(InfoExtractor):
|
|||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'formats': self._extract_m3u8_formats(
|
'formats': self._extract_m3u8_formats(
|
||||||
m3u8_url, video_id, 'mp4', 'm3u8' if is_live else 'm3u8_native'),
|
m3u8_url, video_id, 'mp4', 'm3u8_native'),
|
||||||
'description': xpath_text(video_data, _add_ns('Description')),
|
'description': xpath_text(video_data, _add_ns('Description')),
|
||||||
'duration': parse_duration(xpath_text(video_data, _add_ns('Duration'))),
|
'duration': parse_duration(xpath_text(video_data, _add_ns('Duration'))),
|
||||||
'uploader': xpath_text(video_data, _add_ns('Createur')),
|
'uploader': xpath_text(video_data, _add_ns('Createur')),
|
||||||
|
|||||||
@@ -196,6 +196,10 @@ class FacebookIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'https://www.facebookcorewwwi.onion/video.php?v=274175099429670',
|
'url': 'https://www.facebookcorewwwi.onion/video.php?v=274175099429670',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# no title
|
||||||
|
'url': 'https://www.facebook.com/onlycleverentertainment/videos/1947995502095005/',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@@ -303,7 +307,7 @@ class FacebookIE(InfoExtractor):
|
|||||||
if not video_data:
|
if not video_data:
|
||||||
server_js_data = self._parse_json(
|
server_js_data = self._parse_json(
|
||||||
self._search_regex(
|
self._search_regex(
|
||||||
r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+(?:stream_pagelet|pagelet_group_mall)',
|
r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+(?:stream_pagelet|pagelet_group_mall|permalink_video_pagelet)',
|
||||||
webpage, 'js data', default='{}'),
|
webpage, 'js data', default='{}'),
|
||||||
video_id, transform_source=js_to_json, fatal=False)
|
video_id, transform_source=js_to_json, fatal=False)
|
||||||
if server_js_data:
|
if server_js_data:
|
||||||
@@ -353,15 +357,15 @@ class FacebookIE(InfoExtractor):
|
|||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
video_title = self._html_search_regex(
|
video_title = self._html_search_regex(
|
||||||
r'<h2\s+[^>]*class="uiHeaderTitle"[^>]*>([^<]*)</h2>', webpage, 'title',
|
r'<h2\s+[^>]*class="uiHeaderTitle"[^>]*>([^<]*)</h2>', webpage,
|
||||||
default=None)
|
'title', default=None)
|
||||||
if not video_title:
|
if not video_title:
|
||||||
video_title = self._html_search_regex(
|
video_title = self._html_search_regex(
|
||||||
r'(?s)<span class="fbPhotosPhotoCaption".*?id="fbPhotoPageCaption"><span class="hasCaption">(.*?)</span>',
|
r'(?s)<span class="fbPhotosPhotoCaption".*?id="fbPhotoPageCaption"><span class="hasCaption">(.*?)</span>',
|
||||||
webpage, 'alternative title', default=None)
|
webpage, 'alternative title', default=None)
|
||||||
if not video_title:
|
if not video_title:
|
||||||
video_title = self._html_search_meta(
|
video_title = self._html_search_meta(
|
||||||
'description', webpage, 'title')
|
'description', webpage, 'title', default=None)
|
||||||
if video_title:
|
if video_title:
|
||||||
video_title = limit_length(video_title, 80)
|
video_title = limit_length(video_title, 80)
|
||||||
else:
|
else:
|
||||||
|
|||||||
@@ -47,9 +47,12 @@ class FOXIE(AdobePassIE):
|
|||||||
resource = self._get_mvpd_resource('fbc-fox', None, ap_p['videoGUID'], rating)
|
resource = self._get_mvpd_resource('fbc-fox', None, ap_p['videoGUID'], rating)
|
||||||
query['auth'] = self._extract_mvpd_auth(url, video_id, 'fbc-fox', resource)
|
query['auth'] = self._extract_mvpd_auth(url, video_id, 'fbc-fox', resource)
|
||||||
|
|
||||||
return {
|
info = self._search_json_ld(webpage, video_id, fatal=False)
|
||||||
|
info.update({
|
||||||
'_type': 'url_transparent',
|
'_type': 'url_transparent',
|
||||||
'ie_key': 'ThePlatform',
|
'ie_key': 'ThePlatform',
|
||||||
'url': smuggle_url(update_url_query(release_url, query), {'force_smil_url': True}),
|
'url': smuggle_url(update_url_query(release_url, query), {'force_smil_url': True}),
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
}
|
})
|
||||||
|
|
||||||
|
return info
|
||||||
|
|||||||
@@ -4,7 +4,8 @@ from __future__ import unicode_literals
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
unified_strdate,
|
extract_attributes,
|
||||||
|
int_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -19,6 +20,7 @@ class FranceCultureIE(InfoExtractor):
|
|||||||
'title': 'Rendez-vous au pays des geeks',
|
'title': 'Rendez-vous au pays des geeks',
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
'upload_date': '20140301',
|
'upload_date': '20140301',
|
||||||
|
'timestamp': 1393642916,
|
||||||
'vcodec': 'none',
|
'vcodec': 'none',
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -28,30 +30,34 @@ class FranceCultureIE(InfoExtractor):
|
|||||||
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
video_url = self._search_regex(
|
video_data = extract_attributes(self._search_regex(
|
||||||
r'(?s)<div[^>]+class="[^"]*?title-zone-diffusion[^"]*?"[^>]*>.*?<button[^>]+data-asset-source="([^"]+)"',
|
r'(?s)<div[^>]+class="[^"]*?(?:title-zone-diffusion|heading-zone-(?:wrapper|player-button))[^"]*?"[^>]*>.*?(<button[^>]+data-asset-source="[^"]+"[^>]+>)',
|
||||||
webpage, 'video path')
|
webpage, 'video data'))
|
||||||
|
|
||||||
title = self._og_search_title(webpage)
|
video_url = video_data['data-asset-source']
|
||||||
|
title = video_data.get('data-asset-title') or self._og_search_title(webpage)
|
||||||
|
|
||||||
upload_date = unified_strdate(self._search_regex(
|
description = self._html_search_regex(
|
||||||
'(?s)<div[^>]+class="date"[^>]*>.*?<span[^>]+class="inner"[^>]*>([^<]+)<',
|
r'(?s)<div[^>]+class="intro"[^>]*>.*?<h2>(.+?)</h2>',
|
||||||
webpage, 'upload date', fatal=False))
|
webpage, 'description', default=None)
|
||||||
thumbnail = self._search_regex(
|
thumbnail = self._search_regex(
|
||||||
r'(?s)<figure[^>]+itemtype="https://schema.org/ImageObject"[^>]*>.*?<img[^>]+data-dejavu-src="([^"]+)"',
|
r'(?s)<figure[^>]+itemtype="https://schema.org/ImageObject"[^>]*>.*?<img[^>]+(?:data-dejavu-)?src="([^"]+)"',
|
||||||
webpage, 'thumbnail', fatal=False)
|
webpage, 'thumbnail', fatal=False)
|
||||||
uploader = self._html_search_regex(
|
uploader = self._html_search_regex(
|
||||||
r'(?s)<div id="emission".*?<span class="author">(.*?)</span>',
|
r'(?s)<span class="author">(.*?)</span>',
|
||||||
webpage, 'uploader', default=None)
|
webpage, 'uploader', default=None)
|
||||||
vcodec = 'none' if determine_ext(video_url.lower()) == 'mp3' else None
|
ext = determine_ext(video_url.lower())
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': display_id,
|
'id': display_id,
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'title': title,
|
'title': title,
|
||||||
|
'description': description,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
'vcodec': vcodec,
|
'ext': ext,
|
||||||
|
'vcodec': 'none' if ext == 'mp3' else None,
|
||||||
'uploader': uploader,
|
'uploader': uploader,
|
||||||
'upload_date': upload_date,
|
'timestamp': int_or_none(video_data.get('data-asset-created-date')),
|
||||||
|
'duration': int_or_none(video_data.get('data-duration')),
|
||||||
}
|
}
|
||||||
|
|||||||
83
youtube_dl/extractor/freshlive.py
Normal file
83
youtube_dl/extractor/freshlive.py
Normal file
@@ -0,0 +1,83 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
int_or_none,
|
||||||
|
try_get,
|
||||||
|
unified_timestamp,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class FreshLiveIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://freshlive\.tv/[^/]+/(?P<id>\d+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://freshlive.tv/satotv/74712',
|
||||||
|
'md5': '9f0cf5516979c4454ce982df3d97f352',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '74712',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'テスト',
|
||||||
|
'description': 'テスト',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'duration': 1511,
|
||||||
|
'timestamp': 1483619655,
|
||||||
|
'upload_date': '20170105',
|
||||||
|
'uploader': 'サトTV',
|
||||||
|
'uploader_id': 'satotv',
|
||||||
|
'view_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
'is_live': False,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
options = self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'window\.__CONTEXT__\s*=\s*({.+?});\s*</script>',
|
||||||
|
webpage, 'initial context'),
|
||||||
|
video_id)
|
||||||
|
|
||||||
|
info = options['context']['dispatcher']['stores']['ProgramStore']['programs'][video_id]
|
||||||
|
|
||||||
|
title = info['title']
|
||||||
|
|
||||||
|
if info.get('status') == 'upcoming':
|
||||||
|
raise ExtractorError('Stream %s is upcoming' % video_id, expected=True)
|
||||||
|
|
||||||
|
stream_url = info.get('liveStreamUrl') or info['archiveStreamUrl']
|
||||||
|
|
||||||
|
is_live = info.get('liveStreamUrl') is not None
|
||||||
|
|
||||||
|
formats = self._extract_m3u8_formats(
|
||||||
|
stream_url, video_id, 'mp4',
|
||||||
|
'm3u8_native', m3u8_id='hls')
|
||||||
|
|
||||||
|
if is_live:
|
||||||
|
title = self._live_title(title)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'formats': formats,
|
||||||
|
'title': title,
|
||||||
|
'description': info.get('description'),
|
||||||
|
'thumbnail': info.get('thumbnailUrl'),
|
||||||
|
'duration': int_or_none(info.get('airTime')),
|
||||||
|
'timestamp': unified_timestamp(info.get('createdAt')),
|
||||||
|
'uploader': try_get(
|
||||||
|
info, lambda x: x['channel']['title'], compat_str),
|
||||||
|
'uploader_id': try_get(
|
||||||
|
info, lambda x: x['channel']['code'], compat_str),
|
||||||
|
'uploader_url': try_get(
|
||||||
|
info, lambda x: x['channel']['permalink'], compat_str),
|
||||||
|
'view_count': int_or_none(info.get('viewCount')),
|
||||||
|
'comment_count': int_or_none(info.get('commentCount')),
|
||||||
|
'tags': info.get('tags', []),
|
||||||
|
'is_live': is_live,
|
||||||
|
}
|
||||||
@@ -7,9 +7,9 @@ from ..compat import (
|
|||||||
compat_urllib_parse_unquote_plus,
|
compat_urllib_parse_unquote_plus,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
clean_html,
|
|
||||||
determine_ext,
|
determine_ext,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
js_to_json,
|
||||||
sanitized_Request,
|
sanitized_Request,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
urlencode_postdata
|
urlencode_postdata
|
||||||
@@ -17,34 +17,26 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class FunimationIE(InfoExtractor):
|
class FunimationIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?funimation\.com/shows/[^/]+/videos/(?:official|promotional)/(?P<id>[^/?#&]+)'
|
_VALID_URL = r'https?://(?:www\.)?funimation(?:\.com|now\.uk)/shows/[^/]+/(?P<id>[^/?#&]+)'
|
||||||
|
|
||||||
_NETRC_MACHINE = 'funimation'
|
_NETRC_MACHINE = 'funimation'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.funimation.com/shows/air/videos/official/breeze',
|
'url': 'https://www.funimation.com/shows/hacksign/role-play/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '658',
|
'id': '91144',
|
||||||
'display_id': 'breeze',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Air - 1 - Breeze',
|
|
||||||
'description': 'md5:1769f43cd5fc130ace8fd87232207892',
|
|
||||||
'thumbnail': r're:https?://.*\.jpg',
|
|
||||||
},
|
|
||||||
'skip': 'Access without user interaction is forbidden by CloudFlare, and video removed',
|
|
||||||
}, {
|
|
||||||
'url': 'http://www.funimation.com/shows/hacksign/videos/official/role-play',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '31128',
|
|
||||||
'display_id': 'role-play',
|
'display_id': 'role-play',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': '.hack//SIGN - 1 - Role Play',
|
'title': '.hack//SIGN - Role Play',
|
||||||
'description': 'md5:b602bdc15eef4c9bbb201bb6e6a4a2dd',
|
'description': 'md5:b602bdc15eef4c9bbb201bb6e6a4a2dd',
|
||||||
'thumbnail': r're:https?://.*\.jpg',
|
'thumbnail': r're:https?://.*\.jpg',
|
||||||
},
|
},
|
||||||
'skip': 'Access without user interaction is forbidden by CloudFlare',
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.funimation.com/shows/attack-on-titan-junior-high/videos/promotional/broadcast-dub-preview',
|
'url': 'https://www.funimation.com/shows/attack-on-titan-junior-high/broadcast-dub-preview/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '9635',
|
'id': '9635',
|
||||||
'display_id': 'broadcast-dub-preview',
|
'display_id': 'broadcast-dub-preview',
|
||||||
@@ -54,25 +46,13 @@ class FunimationIE(InfoExtractor):
|
|||||||
'thumbnail': r're:https?://.*\.(?:jpg|png)',
|
'thumbnail': r're:https?://.*\.(?:jpg|png)',
|
||||||
},
|
},
|
||||||
'skip': 'Access without user interaction is forbidden by CloudFlare',
|
'skip': 'Access without user interaction is forbidden by CloudFlare',
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.funimationnow.uk/shows/puzzle-dragons-x/drop-impact/simulcast/',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
_LOGIN_URL = 'http://www.funimation.com/login'
|
_LOGIN_URL = 'http://www.funimation.com/login'
|
||||||
|
|
||||||
def _download_webpage(self, *args, **kwargs):
|
|
||||||
try:
|
|
||||||
return super(FunimationIE, self)._download_webpage(*args, **kwargs)
|
|
||||||
except ExtractorError as ee:
|
|
||||||
if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403:
|
|
||||||
response = ee.cause.read()
|
|
||||||
if b'>Please complete the security check to access<' in response:
|
|
||||||
raise ExtractorError(
|
|
||||||
'Access to funimation.com is blocked by CloudFlare. '
|
|
||||||
'Please browse to http://www.funimation.com/, solve '
|
|
||||||
'the reCAPTCHA, export browser cookies to a text file,'
|
|
||||||
' and then try again with --cookies YOUR_COOKIE_FILE.',
|
|
||||||
expected=True)
|
|
||||||
raise
|
|
||||||
|
|
||||||
def _extract_cloudflare_session_ua(self, url):
|
def _extract_cloudflare_session_ua(self, url):
|
||||||
ci_session_cookie = self._get_cookies(url).get('ci_session')
|
ci_session_cookie = self._get_cookies(url).get('ci_session')
|
||||||
if ci_session_cookie:
|
if ci_session_cookie:
|
||||||
@@ -114,119 +94,74 @@ class FunimationIE(InfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id = self._match_id(url)
|
display_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
def _search_kane(name):
|
||||||
|
return self._search_regex(
|
||||||
|
r"KANE_customdimensions\.%s\s*=\s*'([^']+)';" % name,
|
||||||
|
webpage, name, default=None)
|
||||||
|
|
||||||
|
title_data = self._parse_json(self._search_regex(
|
||||||
|
r'TITLE_DATA\s*=\s*({[^}]+})',
|
||||||
|
webpage, 'title data', default=''),
|
||||||
|
display_id, js_to_json, fatal=False) or {}
|
||||||
|
|
||||||
|
video_id = title_data.get('id') or self._search_regex([
|
||||||
|
r"KANE_customdimensions.videoID\s*=\s*'(\d+)';",
|
||||||
|
r'<iframe[^>]+src="/player/(\d+)"',
|
||||||
|
], webpage, 'video_id', default=None)
|
||||||
|
if not video_id:
|
||||||
|
player_url = self._html_search_meta([
|
||||||
|
'al:web:url',
|
||||||
|
'og:video:url',
|
||||||
|
'og:video:secure_url',
|
||||||
|
], webpage, fatal=True)
|
||||||
|
video_id = self._search_regex(r'/player/(\d+)', player_url, 'video id')
|
||||||
|
|
||||||
|
title = episode = title_data.get('title') or _search_kane('videoTitle') or self._og_search_title(webpage)
|
||||||
|
series = _search_kane('showName')
|
||||||
|
if series:
|
||||||
|
title = '%s - %s' % (series, title)
|
||||||
|
description = self._html_search_meta(['description', 'og:description'], webpage, fatal=True)
|
||||||
|
|
||||||
|
try:
|
||||||
|
sources = self._download_json(
|
||||||
|
'https://prod-api-funimationnow.dadcdigital.com/api/source/catalog/video/%s/signed/' % video_id,
|
||||||
|
video_id)['items']
|
||||||
|
except ExtractorError as e:
|
||||||
|
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||||
|
error = self._parse_json(e.cause.read(), video_id)['errors'][0]
|
||||||
|
raise ExtractorError('%s said: %s' % (
|
||||||
|
self.IE_NAME, error.get('detail') or error.get('title')), expected=True)
|
||||||
|
raise
|
||||||
|
|
||||||
errors = []
|
|
||||||
formats = []
|
formats = []
|
||||||
|
for source in sources:
|
||||||
ERRORS_MAP = {
|
source_url = source.get('src')
|
||||||
'ERROR_MATURE_CONTENT_LOGGED_IN': 'matureContentLoggedIn',
|
if not source_url:
|
||||||
'ERROR_MATURE_CONTENT_LOGGED_OUT': 'matureContentLoggedOut',
|
continue
|
||||||
'ERROR_SUBSCRIPTION_LOGGED_OUT': 'subscriptionLoggedOut',
|
source_type = source.get('videoType') or determine_ext(source_url)
|
||||||
'ERROR_VIDEO_EXPIRED': 'videoExpired',
|
if source_type == 'm3u8':
|
||||||
'ERROR_TERRITORY_UNAVAILABLE': 'territoryUnavailable',
|
formats.extend(self._extract_m3u8_formats(
|
||||||
'SVODBASIC_SUBSCRIPTION_IN_PLAYER': 'basicSubscription',
|
source_url, video_id, 'mp4',
|
||||||
'SVODNON_SUBSCRIPTION_IN_PLAYER': 'nonSubscription',
|
m3u8_id='hls', fatal=False))
|
||||||
'ERROR_PLAYER_NOT_RESPONDING': 'playerNotResponding',
|
else:
|
||||||
'ERROR_UNABLE_TO_CONNECT_TO_CDN': 'unableToConnectToCDN',
|
formats.append({
|
||||||
'ERROR_STREAM_NOT_FOUND': 'streamNotFound',
|
'format_id': source_type,
|
||||||
}
|
'url': source_url,
|
||||||
|
})
|
||||||
USER_AGENTS = (
|
|
||||||
# PC UA is served with m3u8 that provides some bonus lower quality formats
|
|
||||||
('pc', 'Mozilla/5.0 (Windows NT 5.2; WOW64; rv:42.0) Gecko/20100101 Firefox/42.0'),
|
|
||||||
# Mobile UA allows to extract direct links and also does not fail when
|
|
||||||
# PC UA fails with hulu error (e.g.
|
|
||||||
# http://www.funimation.com/shows/hacksign/videos/official/role-play)
|
|
||||||
('mobile', 'Mozilla/5.0 (Linux; Android 4.4.2; Nexus 4 Build/KOT49H) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.114 Mobile Safari/537.36'),
|
|
||||||
)
|
|
||||||
|
|
||||||
user_agent = self._extract_cloudflare_session_ua(url)
|
|
||||||
if user_agent:
|
|
||||||
USER_AGENTS = ((None, user_agent),)
|
|
||||||
|
|
||||||
for kind, user_agent in USER_AGENTS:
|
|
||||||
request = sanitized_Request(url)
|
|
||||||
request.add_header('User-Agent', user_agent)
|
|
||||||
webpage = self._download_webpage(
|
|
||||||
request, display_id,
|
|
||||||
'Downloading %s webpage' % kind if kind else 'Downloading webpage')
|
|
||||||
|
|
||||||
playlist = self._parse_json(
|
|
||||||
self._search_regex(
|
|
||||||
r'var\s+playersData\s*=\s*(\[.+?\]);\n',
|
|
||||||
webpage, 'players data'),
|
|
||||||
display_id)[0]['playlist']
|
|
||||||
|
|
||||||
items = next(item['items'] for item in playlist if item.get('items'))
|
|
||||||
item = next(item for item in items if item.get('itemAK') == display_id)
|
|
||||||
|
|
||||||
error_messages = {}
|
|
||||||
video_error_messages = self._search_regex(
|
|
||||||
r'var\s+videoErrorMessages\s*=\s*({.+?});\n',
|
|
||||||
webpage, 'error messages', default=None)
|
|
||||||
if video_error_messages:
|
|
||||||
error_messages_json = self._parse_json(video_error_messages, display_id, fatal=False)
|
|
||||||
if error_messages_json:
|
|
||||||
for _, error in error_messages_json.items():
|
|
||||||
type_ = error.get('type')
|
|
||||||
description = error.get('description')
|
|
||||||
content = error.get('content')
|
|
||||||
if type_ == 'text' and description and content:
|
|
||||||
error_message = ERRORS_MAP.get(description)
|
|
||||||
if error_message:
|
|
||||||
error_messages[error_message] = content
|
|
||||||
|
|
||||||
for video in item.get('videoSet', []):
|
|
||||||
auth_token = video.get('authToken')
|
|
||||||
if not auth_token:
|
|
||||||
continue
|
|
||||||
funimation_id = video.get('FUNImationID') or video.get('videoId')
|
|
||||||
preference = 1 if video.get('languageMode') == 'dub' else 0
|
|
||||||
if not auth_token.startswith('?'):
|
|
||||||
auth_token = '?%s' % auth_token
|
|
||||||
for quality, height in (('sd', 480), ('hd', 720), ('hd1080', 1080)):
|
|
||||||
format_url = video.get('%sUrl' % quality)
|
|
||||||
if not format_url:
|
|
||||||
continue
|
|
||||||
if not format_url.startswith(('http', '//')):
|
|
||||||
errors.append(format_url)
|
|
||||||
continue
|
|
||||||
if determine_ext(format_url) == 'm3u8':
|
|
||||||
formats.extend(self._extract_m3u8_formats(
|
|
||||||
format_url + auth_token, display_id, 'mp4', entry_protocol='m3u8_native',
|
|
||||||
preference=preference, m3u8_id='%s-hls' % funimation_id, fatal=False))
|
|
||||||
else:
|
|
||||||
tbr = int_or_none(self._search_regex(
|
|
||||||
r'-(\d+)[Kk]', format_url, 'tbr', default=None))
|
|
||||||
formats.append({
|
|
||||||
'url': format_url + auth_token,
|
|
||||||
'format_id': '%s-http-%dp' % (funimation_id, height),
|
|
||||||
'height': height,
|
|
||||||
'tbr': tbr,
|
|
||||||
'preference': preference,
|
|
||||||
})
|
|
||||||
|
|
||||||
if not formats and errors:
|
|
||||||
raise ExtractorError(
|
|
||||||
'%s returned error: %s'
|
|
||||||
% (self.IE_NAME, clean_html(error_messages.get(errors[0], errors[0]))),
|
|
||||||
expected=True)
|
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
title = item['title']
|
|
||||||
artist = item.get('artist')
|
|
||||||
if artist:
|
|
||||||
title = '%s - %s' % (artist, title)
|
|
||||||
description = self._og_search_description(webpage) or item.get('description')
|
|
||||||
thumbnail = self._og_search_thumbnail(webpage) or item.get('posterUrl')
|
|
||||||
video_id = item.get('itemId') or display_id
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': description,
|
'description': description,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
|
'series': series,
|
||||||
|
'season_number': int_or_none(title_data.get('seasonNum') or _search_kane('season')),
|
||||||
|
'episode_number': int_or_none(title_data.get('episodeNum')),
|
||||||
|
'episode': episode,
|
||||||
|
'season_id': title_data.get('seriesId'),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -20,6 +20,7 @@ from ..utils import (
|
|||||||
float_or_none,
|
float_or_none,
|
||||||
HEADRequest,
|
HEADRequest,
|
||||||
is_html,
|
is_html,
|
||||||
|
js_to_json,
|
||||||
orderedSet,
|
orderedSet,
|
||||||
sanitized_Request,
|
sanitized_Request,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
@@ -83,6 +84,7 @@ from .twentymin import TwentyMinutenIE
|
|||||||
from .ustream import UstreamIE
|
from .ustream import UstreamIE
|
||||||
from .openload import OpenloadIE
|
from .openload import OpenloadIE
|
||||||
from .videopress import VideoPressIE
|
from .videopress import VideoPressIE
|
||||||
|
from .rutube import RutubeIE
|
||||||
|
|
||||||
|
|
||||||
class GenericIE(InfoExtractor):
|
class GenericIE(InfoExtractor):
|
||||||
@@ -447,6 +449,23 @@ class GenericIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
}],
|
}],
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
# Brightcove with UUID in videoPlayer
|
||||||
|
'url': 'http://www8.hp.com/cn/zh/home.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '5255815316001',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Sprocket Video - China',
|
||||||
|
'description': 'Sprocket Video - China',
|
||||||
|
'uploader': 'HP-Video Gallery',
|
||||||
|
'timestamp': 1482263210,
|
||||||
|
'upload_date': '20161220',
|
||||||
|
'uploader_id': '1107601872001',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True, # m3u8 download
|
||||||
|
},
|
||||||
|
},
|
||||||
# ooyala video
|
# ooyala video
|
||||||
{
|
{
|
||||||
'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
|
'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
|
||||||
@@ -711,6 +730,21 @@ class GenericIE(InfoExtractor):
|
|||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
# YouTube <object> embed
|
||||||
|
{
|
||||||
|
'url': 'http://www.improbable.com/2017/04/03/untrained-modern-youths-and-ancient-masters-in-selfie-portraits/',
|
||||||
|
'md5': '516718101ec834f74318df76259fb3cc',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'msN87y-iEx0',
|
||||||
|
'ext': 'webm',
|
||||||
|
'title': 'Feynman: Mirrors FUN TO IMAGINE 6',
|
||||||
|
'upload_date': '20080526',
|
||||||
|
'description': 'md5:0ffc78ea3f01b2e2c247d5f8d1d3c18d',
|
||||||
|
'uploader': 'Christopher Sykes',
|
||||||
|
'uploader_id': 'ChristopherJSykes',
|
||||||
|
},
|
||||||
|
'add_ie': ['Youtube'],
|
||||||
|
},
|
||||||
# Camtasia studio
|
# Camtasia studio
|
||||||
{
|
{
|
||||||
'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
|
'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
|
||||||
@@ -883,12 +917,13 @@ class GenericIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
# LazyYT
|
# LazyYT
|
||||||
{
|
{
|
||||||
'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
|
'url': 'https://skiplagged.com/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1986',
|
'id': 'skiplagged',
|
||||||
'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
|
'title': 'Skiplagged: The smart way to find cheap flights',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 2,
|
'playlist_mincount': 1,
|
||||||
|
'add_ie': ['Youtube'],
|
||||||
},
|
},
|
||||||
# Cinchcast embed
|
# Cinchcast embed
|
||||||
{
|
{
|
||||||
@@ -961,6 +996,30 @@ class GenericIE(InfoExtractor):
|
|||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
# Complex jwplayer
|
||||||
|
{
|
||||||
|
'url': 'http://www.indiedb.com/games/king-machine/videos',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'videos',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'king machine trailer 1',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
# JWPlayer config passed as variable
|
||||||
|
'url': 'http://www.txxx.com/videos/3326530/ariele/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3326530_hq',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'ARIELE | Tube Cup',
|
||||||
|
'uploader': 'www.txxx.com',
|
||||||
|
'age_limit': 18,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
|
},
|
||||||
# rtl.nl embed
|
# rtl.nl embed
|
||||||
{
|
{
|
||||||
'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
|
'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
|
||||||
@@ -1036,6 +1095,21 @@ class GenericIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
'add_ie': ['Kaltura'],
|
'add_ie': ['Kaltura'],
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
# Kaltura iframe embed
|
||||||
|
'url': 'http://www.gsd.harvard.edu/event/i-m-pei-a-centennial-celebration/',
|
||||||
|
'md5': 'ae5ace8eb09dc1a35d03b579a9c2cc44',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '0_f2cfbpwy',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'I. M. Pei: A Centennial Celebration',
|
||||||
|
'description': 'md5:1db8f40c69edc46ca180ba30c567f37c',
|
||||||
|
'upload_date': '20170403',
|
||||||
|
'uploader_id': 'batchUser',
|
||||||
|
'timestamp': 1491232186,
|
||||||
|
},
|
||||||
|
'add_ie': ['Kaltura'],
|
||||||
|
},
|
||||||
# Eagle.Platform embed (generic URL)
|
# Eagle.Platform embed (generic URL)
|
||||||
{
|
{
|
||||||
'url': 'http://lenta.ru/news/2015/03/06/navalny/',
|
'url': 'http://lenta.ru/news/2015/03/06/navalny/',
|
||||||
@@ -1490,7 +1564,40 @@ class GenericIE(InfoExtractor):
|
|||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
'add_ie': [VideoPressIE.ie_key()],
|
'add_ie': [VideoPressIE.ie_key()],
|
||||||
}
|
},
|
||||||
|
{
|
||||||
|
# Rutube embed
|
||||||
|
'url': 'http://magazzino.friday.ru/videos/vipuski/kazan-2',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '9b3d5bee0a8740bf70dfd29d3ea43541',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Магаззино: Казань 2',
|
||||||
|
'description': 'md5:99bccdfac2269f0e8fdbc4bbc9db184a',
|
||||||
|
'uploader': 'Магаззино',
|
||||||
|
'upload_date': '20170228',
|
||||||
|
'uploader_id': '996642',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
'add_ie': [RutubeIE.ie_key()],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
# ThePlatform embedded with whitespaces in URLs
|
||||||
|
'url': 'http://www.golfchannel.com/topics/shows/golftalkcentral.htm',
|
||||||
|
'only_matching': True,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
# Senate ISVP iframe https
|
||||||
|
'url': 'https://www.hsgac.senate.gov/hearings/canadas-fast-track-refugee-plan-unanswered-questions-and-implications-for-us-national-security',
|
||||||
|
'md5': 'fb8c70b0b515e5037981a2492099aab8',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'govtaff020316',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Integrated Senate Video Player',
|
||||||
|
},
|
||||||
|
'add_ie': [SenateISVPIE.ie_key()],
|
||||||
|
},
|
||||||
# {
|
# {
|
||||||
# # TODO: find another test
|
# # TODO: find another test
|
||||||
# # http://schema.org/VideoObject
|
# # http://schema.org/VideoObject
|
||||||
@@ -1790,14 +1897,6 @@ class GenericIE(InfoExtractor):
|
|||||||
video_description = self._og_search_description(webpage, default=None)
|
video_description = self._og_search_description(webpage, default=None)
|
||||||
video_thumbnail = self._og_search_thumbnail(webpage, default=None)
|
video_thumbnail = self._og_search_thumbnail(webpage, default=None)
|
||||||
|
|
||||||
# Helper method
|
|
||||||
def _playlist_from_matches(matches, getter=None, ie=None):
|
|
||||||
urlrs = orderedSet(
|
|
||||||
self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
|
|
||||||
for m in matches)
|
|
||||||
return self.playlist_result(
|
|
||||||
urlrs, playlist_id=video_id, playlist_title=video_title)
|
|
||||||
|
|
||||||
# Look for Brightcove Legacy Studio embeds
|
# Look for Brightcove Legacy Studio embeds
|
||||||
bc_urls = BrightcoveLegacyIE._extract_brightcove_urls(webpage)
|
bc_urls = BrightcoveLegacyIE._extract_brightcove_urls(webpage)
|
||||||
if bc_urls:
|
if bc_urls:
|
||||||
@@ -1818,28 +1917,28 @@ class GenericIE(InfoExtractor):
|
|||||||
# Look for Brightcove New Studio embeds
|
# Look for Brightcove New Studio embeds
|
||||||
bc_urls = BrightcoveNewIE._extract_urls(webpage)
|
bc_urls = BrightcoveNewIE._extract_urls(webpage)
|
||||||
if bc_urls:
|
if bc_urls:
|
||||||
return _playlist_from_matches(bc_urls, ie='BrightcoveNew')
|
return self.playlist_from_matches(bc_urls, video_id, video_title, ie='BrightcoveNew')
|
||||||
|
|
||||||
# Look for ThePlatform embeds
|
# Look for ThePlatform embeds
|
||||||
tp_urls = ThePlatformIE._extract_urls(webpage)
|
tp_urls = ThePlatformIE._extract_urls(webpage)
|
||||||
if tp_urls:
|
if tp_urls:
|
||||||
return _playlist_from_matches(tp_urls, ie='ThePlatform')
|
return self.playlist_from_matches(tp_urls, video_id, video_title, ie='ThePlatform')
|
||||||
|
|
||||||
# Look for Vessel embeds
|
# Look for Vessel embeds
|
||||||
vessel_urls = VesselIE._extract_urls(webpage)
|
vessel_urls = VesselIE._extract_urls(webpage)
|
||||||
if vessel_urls:
|
if vessel_urls:
|
||||||
return _playlist_from_matches(vessel_urls, ie=VesselIE.ie_key())
|
return self.playlist_from_matches(vessel_urls, video_id, video_title, ie=VesselIE.ie_key())
|
||||||
|
|
||||||
# Look for embedded rtl.nl player
|
# Look for embedded rtl.nl player
|
||||||
matches = re.findall(
|
matches = re.findall(
|
||||||
r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',
|
r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',
|
||||||
webpage)
|
webpage)
|
||||||
if matches:
|
if matches:
|
||||||
return _playlist_from_matches(matches, ie='RtlNl')
|
return self.playlist_from_matches(matches, video_id, video_title, ie='RtlNl')
|
||||||
|
|
||||||
vimeo_urls = VimeoIE._extract_urls(url, webpage)
|
vimeo_urls = VimeoIE._extract_urls(url, webpage)
|
||||||
if vimeo_urls:
|
if vimeo_urls:
|
||||||
return _playlist_from_matches(vimeo_urls, ie=VimeoIE.ie_key())
|
return self.playlist_from_matches(vimeo_urls, video_id, video_title, ie=VimeoIE.ie_key())
|
||||||
|
|
||||||
vid_me_embed_url = self._search_regex(
|
vid_me_embed_url = self._search_regex(
|
||||||
r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]',
|
r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]',
|
||||||
@@ -1854,6 +1953,7 @@ class GenericIE(InfoExtractor):
|
|||||||
data-video-url=|
|
data-video-url=|
|
||||||
<embed[^>]+?src=|
|
<embed[^>]+?src=|
|
||||||
embedSWF\(?:\s*|
|
embedSWF\(?:\s*|
|
||||||
|
<object[^>]+data=|
|
||||||
new\s+SWFObject\(
|
new\s+SWFObject\(
|
||||||
)
|
)
|
||||||
(["\'])
|
(["\'])
|
||||||
@@ -1861,25 +1961,25 @@ class GenericIE(InfoExtractor):
|
|||||||
(?:embed|v|p)/.+?)
|
(?:embed|v|p)/.+?)
|
||||||
\1''', webpage)
|
\1''', webpage)
|
||||||
if matches:
|
if matches:
|
||||||
return _playlist_from_matches(
|
return self.playlist_from_matches(
|
||||||
matches, lambda m: unescapeHTML(m[1]))
|
matches, video_id, video_title, lambda m: unescapeHTML(m[1]))
|
||||||
|
|
||||||
# Look for lazyYT YouTube embed
|
# Look for lazyYT YouTube embed
|
||||||
matches = re.findall(
|
matches = re.findall(
|
||||||
r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)
|
r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)
|
||||||
if matches:
|
if matches:
|
||||||
return _playlist_from_matches(matches, lambda m: unescapeHTML(m))
|
return self.playlist_from_matches(matches, video_id, video_title, lambda m: unescapeHTML(m))
|
||||||
|
|
||||||
# Look for Wordpress "YouTube Video Importer" plugin
|
# Look for Wordpress "YouTube Video Importer" plugin
|
||||||
matches = re.findall(r'''(?x)<div[^>]+
|
matches = re.findall(r'''(?x)<div[^>]+
|
||||||
class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
|
class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
|
||||||
data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
|
data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
|
||||||
if matches:
|
if matches:
|
||||||
return _playlist_from_matches(matches, lambda m: m[-1])
|
return self.playlist_from_matches(matches, video_id, video_title, lambda m: m[-1])
|
||||||
|
|
||||||
matches = DailymotionIE._extract_urls(webpage)
|
matches = DailymotionIE._extract_urls(webpage)
|
||||||
if matches:
|
if matches:
|
||||||
return _playlist_from_matches(matches)
|
return self.playlist_from_matches(matches, video_id, video_title)
|
||||||
|
|
||||||
# Look for embedded Dailymotion playlist player (#3822)
|
# Look for embedded Dailymotion playlist player (#3822)
|
||||||
m = re.search(
|
m = re.search(
|
||||||
@@ -1888,8 +1988,8 @@ class GenericIE(InfoExtractor):
|
|||||||
playlists = re.findall(
|
playlists = re.findall(
|
||||||
r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
|
r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
|
||||||
if playlists:
|
if playlists:
|
||||||
return _playlist_from_matches(
|
return self.playlist_from_matches(
|
||||||
playlists, lambda p: '//dailymotion.com/playlist/%s' % p)
|
playlists, video_id, video_title, lambda p: '//dailymotion.com/playlist/%s' % p)
|
||||||
|
|
||||||
# Look for embedded Wistia player
|
# Look for embedded Wistia player
|
||||||
match = re.search(
|
match = re.search(
|
||||||
@@ -1996,8 +2096,9 @@ class GenericIE(InfoExtractor):
|
|||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
|
embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
|
||||||
if embeds:
|
if embeds:
|
||||||
return _playlist_from_matches(
|
return self.playlist_from_matches(
|
||||||
embeds, getter=lambda v: OoyalaIE._url_for_embed_code(smuggle_url(v['provider_video_id'], {'domain': url})), ie='Ooyala')
|
embeds, video_id, video_title,
|
||||||
|
getter=lambda v: OoyalaIE._url_for_embed_code(smuggle_url(v['provider_video_id'], {'domain': url})), ie='Ooyala')
|
||||||
|
|
||||||
# Look for Aparat videos
|
# Look for Aparat videos
|
||||||
mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
|
mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
|
||||||
@@ -2059,13 +2160,13 @@ class GenericIE(InfoExtractor):
|
|||||||
# Look for funnyordie embed
|
# Look for funnyordie embed
|
||||||
matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
|
matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
|
||||||
if matches:
|
if matches:
|
||||||
return _playlist_from_matches(
|
return self.playlist_from_matches(
|
||||||
matches, getter=unescapeHTML, ie='FunnyOrDie')
|
matches, video_id, video_title, getter=unescapeHTML, ie='FunnyOrDie')
|
||||||
|
|
||||||
# Look for BBC iPlayer embed
|
# Look for BBC iPlayer embed
|
||||||
matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
|
matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
|
||||||
if matches:
|
if matches:
|
||||||
return _playlist_from_matches(matches, ie='BBCCoUk')
|
return self.playlist_from_matches(matches, video_id, video_title, ie='BBCCoUk')
|
||||||
|
|
||||||
# Look for embedded RUTV player
|
# Look for embedded RUTV player
|
||||||
rutv_url = RUTVIE._extract_url(webpage)
|
rutv_url = RUTVIE._extract_url(webpage)
|
||||||
@@ -2080,32 +2181,32 @@ class GenericIE(InfoExtractor):
|
|||||||
# Look for embedded SportBox player
|
# Look for embedded SportBox player
|
||||||
sportbox_urls = SportBoxEmbedIE._extract_urls(webpage)
|
sportbox_urls = SportBoxEmbedIE._extract_urls(webpage)
|
||||||
if sportbox_urls:
|
if sportbox_urls:
|
||||||
return _playlist_from_matches(sportbox_urls, ie='SportBoxEmbed')
|
return self.playlist_from_matches(sportbox_urls, video_id, video_title, ie='SportBoxEmbed')
|
||||||
|
|
||||||
# Look for embedded XHamster player
|
# Look for embedded XHamster player
|
||||||
xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
|
xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
|
||||||
if xhamster_urls:
|
if xhamster_urls:
|
||||||
return _playlist_from_matches(xhamster_urls, ie='XHamsterEmbed')
|
return self.playlist_from_matches(xhamster_urls, video_id, video_title, ie='XHamsterEmbed')
|
||||||
|
|
||||||
# Look for embedded TNAFlixNetwork player
|
# Look for embedded TNAFlixNetwork player
|
||||||
tnaflix_urls = TNAFlixNetworkEmbedIE._extract_urls(webpage)
|
tnaflix_urls = TNAFlixNetworkEmbedIE._extract_urls(webpage)
|
||||||
if tnaflix_urls:
|
if tnaflix_urls:
|
||||||
return _playlist_from_matches(tnaflix_urls, ie=TNAFlixNetworkEmbedIE.ie_key())
|
return self.playlist_from_matches(tnaflix_urls, video_id, video_title, ie=TNAFlixNetworkEmbedIE.ie_key())
|
||||||
|
|
||||||
# Look for embedded PornHub player
|
# Look for embedded PornHub player
|
||||||
pornhub_urls = PornHubIE._extract_urls(webpage)
|
pornhub_urls = PornHubIE._extract_urls(webpage)
|
||||||
if pornhub_urls:
|
if pornhub_urls:
|
||||||
return _playlist_from_matches(pornhub_urls, ie=PornHubIE.ie_key())
|
return self.playlist_from_matches(pornhub_urls, video_id, video_title, ie=PornHubIE.ie_key())
|
||||||
|
|
||||||
# Look for embedded DrTuber player
|
# Look for embedded DrTuber player
|
||||||
drtuber_urls = DrTuberIE._extract_urls(webpage)
|
drtuber_urls = DrTuberIE._extract_urls(webpage)
|
||||||
if drtuber_urls:
|
if drtuber_urls:
|
||||||
return _playlist_from_matches(drtuber_urls, ie=DrTuberIE.ie_key())
|
return self.playlist_from_matches(drtuber_urls, video_id, video_title, ie=DrTuberIE.ie_key())
|
||||||
|
|
||||||
# Look for embedded RedTube player
|
# Look for embedded RedTube player
|
||||||
redtube_urls = RedTubeIE._extract_urls(webpage)
|
redtube_urls = RedTubeIE._extract_urls(webpage)
|
||||||
if redtube_urls:
|
if redtube_urls:
|
||||||
return _playlist_from_matches(redtube_urls, ie=RedTubeIE.ie_key())
|
return self.playlist_from_matches(redtube_urls, video_id, video_title, ie=RedTubeIE.ie_key())
|
||||||
|
|
||||||
# Look for embedded Tvigle player
|
# Look for embedded Tvigle player
|
||||||
mobj = re.search(
|
mobj = re.search(
|
||||||
@@ -2151,12 +2252,12 @@ class GenericIE(InfoExtractor):
|
|||||||
# Look for embedded soundcloud player
|
# Look for embedded soundcloud player
|
||||||
soundcloud_urls = SoundcloudIE._extract_urls(webpage)
|
soundcloud_urls = SoundcloudIE._extract_urls(webpage)
|
||||||
if soundcloud_urls:
|
if soundcloud_urls:
|
||||||
return _playlist_from_matches(soundcloud_urls, getter=unescapeHTML, ie=SoundcloudIE.ie_key())
|
return self.playlist_from_matches(soundcloud_urls, video_id, video_title, getter=unescapeHTML, ie=SoundcloudIE.ie_key())
|
||||||
|
|
||||||
# Look for tunein player
|
# Look for tunein player
|
||||||
tunein_urls = TuneInBaseIE._extract_urls(webpage)
|
tunein_urls = TuneInBaseIE._extract_urls(webpage)
|
||||||
if tunein_urls:
|
if tunein_urls:
|
||||||
return _playlist_from_matches(tunein_urls)
|
return self.playlist_from_matches(tunein_urls, video_id, video_title)
|
||||||
|
|
||||||
# Look for embedded mtvservices player
|
# Look for embedded mtvservices player
|
||||||
mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage)
|
mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage)
|
||||||
@@ -2439,30 +2540,36 @@ class GenericIE(InfoExtractor):
|
|||||||
# Look for DBTV embeds
|
# Look for DBTV embeds
|
||||||
dbtv_urls = DBTVIE._extract_urls(webpage)
|
dbtv_urls = DBTVIE._extract_urls(webpage)
|
||||||
if dbtv_urls:
|
if dbtv_urls:
|
||||||
return _playlist_from_matches(dbtv_urls, ie=DBTVIE.ie_key())
|
return self.playlist_from_matches(dbtv_urls, video_id, video_title, ie=DBTVIE.ie_key())
|
||||||
|
|
||||||
# Look for Videa embeds
|
# Look for Videa embeds
|
||||||
videa_urls = VideaIE._extract_urls(webpage)
|
videa_urls = VideaIE._extract_urls(webpage)
|
||||||
if videa_urls:
|
if videa_urls:
|
||||||
return _playlist_from_matches(videa_urls, ie=VideaIE.ie_key())
|
return self.playlist_from_matches(videa_urls, video_id, video_title, ie=VideaIE.ie_key())
|
||||||
|
|
||||||
# Look for 20 minuten embeds
|
# Look for 20 minuten embeds
|
||||||
twentymin_urls = TwentyMinutenIE._extract_urls(webpage)
|
twentymin_urls = TwentyMinutenIE._extract_urls(webpage)
|
||||||
if twentymin_urls:
|
if twentymin_urls:
|
||||||
return _playlist_from_matches(
|
return self.playlist_from_matches(
|
||||||
twentymin_urls, ie=TwentyMinutenIE.ie_key())
|
twentymin_urls, video_id, video_title, ie=TwentyMinutenIE.ie_key())
|
||||||
|
|
||||||
# Look for Openload embeds
|
# Look for Openload embeds
|
||||||
openload_urls = OpenloadIE._extract_urls(webpage)
|
openload_urls = OpenloadIE._extract_urls(webpage)
|
||||||
if openload_urls:
|
if openload_urls:
|
||||||
return _playlist_from_matches(
|
return self.playlist_from_matches(
|
||||||
openload_urls, ie=OpenloadIE.ie_key())
|
openload_urls, video_id, video_title, ie=OpenloadIE.ie_key())
|
||||||
|
|
||||||
# Look for VideoPress embeds
|
# Look for VideoPress embeds
|
||||||
videopress_urls = VideoPressIE._extract_urls(webpage)
|
videopress_urls = VideoPressIE._extract_urls(webpage)
|
||||||
if videopress_urls:
|
if videopress_urls:
|
||||||
return _playlist_from_matches(
|
return self.playlist_from_matches(
|
||||||
videopress_urls, ie=VideoPressIE.ie_key())
|
videopress_urls, video_id, video_title, ie=VideoPressIE.ie_key())
|
||||||
|
|
||||||
|
# Look for Rutube embeds
|
||||||
|
rutube_urls = RutubeIE._extract_urls(webpage)
|
||||||
|
if rutube_urls:
|
||||||
|
return self.playlist_from_matches(
|
||||||
|
rutube_urls, ie=RutubeIE.ie_key())
|
||||||
|
|
||||||
# Looking for http://schema.org/VideoObject
|
# Looking for http://schema.org/VideoObject
|
||||||
json_ld = self._search_json_ld(
|
json_ld = self._search_json_ld(
|
||||||
@@ -2488,6 +2595,15 @@ class GenericIE(InfoExtractor):
|
|||||||
self._sort_formats(entry['formats'])
|
self._sort_formats(entry['formats'])
|
||||||
return self.playlist_result(entries)
|
return self.playlist_result(entries)
|
||||||
|
|
||||||
|
jwplayer_data = self._find_jwplayer_data(
|
||||||
|
webpage, video_id, transform_source=js_to_json)
|
||||||
|
if jwplayer_data:
|
||||||
|
info = self._parse_jwplayer_data(
|
||||||
|
jwplayer_data, video_id, require_title=False, base_url=url)
|
||||||
|
if not info.get('title'):
|
||||||
|
info['title'] = video_title
|
||||||
|
return info
|
||||||
|
|
||||||
def check_video(vurl):
|
def check_video(vurl):
|
||||||
if YoutubeIE.suitable(vurl):
|
if YoutubeIE.suitable(vurl):
|
||||||
return True
|
return True
|
||||||
@@ -2495,7 +2611,7 @@ class GenericIE(InfoExtractor):
|
|||||||
return True
|
return True
|
||||||
vpath = compat_urlparse.urlparse(vurl).path
|
vpath = compat_urlparse.urlparse(vurl).path
|
||||||
vext = determine_ext(vpath)
|
vext = determine_ext(vpath)
|
||||||
return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml', 'js')
|
return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml', 'js', 'xml')
|
||||||
|
|
||||||
def filter_video(urls):
|
def filter_video(urls):
|
||||||
return list(filter(check_video, urls))
|
return list(filter(check_video, urls))
|
||||||
@@ -2561,11 +2677,14 @@ class GenericIE(InfoExtractor):
|
|||||||
found = re.search(REDIRECT_REGEX, refresh_header)
|
found = re.search(REDIRECT_REGEX, refresh_header)
|
||||||
if found:
|
if found:
|
||||||
new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1)))
|
new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1)))
|
||||||
self.report_following_redirect(new_url)
|
if new_url != url:
|
||||||
return {
|
self.report_following_redirect(new_url)
|
||||||
'_type': 'url',
|
return {
|
||||||
'url': new_url,
|
'_type': 'url',
|
||||||
}
|
'url': new_url,
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
found = None
|
||||||
|
|
||||||
if not found:
|
if not found:
|
||||||
# twitter:player is a https URL to iframe player that may or may not
|
# twitter:player is a https URL to iframe player that may or may not
|
||||||
|
|||||||
@@ -36,7 +36,7 @@ class GoIE(AdobePassIE):
|
|||||||
'requestor_id': 'DisneyXD',
|
'requestor_id': 'DisneyXD',
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
_VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/(?:[^/]+/)*(?:vdka(?P<id>\w+)|season-\d+/\d+-(?P<display_id>[^/?#]+))' % '|'.join(_SITE_INFO.keys())
|
_VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/(?:[^/]+/)*(?:vdka(?P<id>\w+)|(?:[^/]+/)*(?P<display_id>[^/?#]+))' % '|'.join(_SITE_INFO.keys())
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://abc.go.com/shows/castle/video/most-recent/vdka0_g86w5onx',
|
'url': 'http://abc.go.com/shows/castle/video/most-recent/vdka0_g86w5onx',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@@ -52,6 +52,12 @@ class GoIE(AdobePassIE):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'http://abc.go.com/shows/after-paradise/video/most-recent/vdka3335601',
|
'url': 'http://abc.go.com/shows/after-paradise/video/most-recent/vdka3335601',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://abc.go.com/shows/the-catch/episode-guide/season-01/10-the-wedding',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://abc.go.com/shows/world-news-tonight/episode-guide/2017-02/17-021717-intense-stand-off-between-man-with-rifle-and-police-in-oakland',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@@ -78,40 +84,60 @@ class GoIE(AdobePassIE):
|
|||||||
ext = determine_ext(asset_url)
|
ext = determine_ext(asset_url)
|
||||||
if ext == 'm3u8':
|
if ext == 'm3u8':
|
||||||
video_type = video_data.get('type')
|
video_type = video_data.get('type')
|
||||||
if video_type == 'lf':
|
data = {
|
||||||
data = {
|
'video_id': video_data['id'],
|
||||||
'video_id': video_data['id'],
|
'video_type': video_type,
|
||||||
'video_type': video_type,
|
'brand': brand,
|
||||||
'brand': brand,
|
'device': '001',
|
||||||
'device': '001',
|
}
|
||||||
}
|
if video_data.get('accesslevel') == '1':
|
||||||
if video_data.get('accesslevel') == '1':
|
requestor_id = site_info['requestor_id']
|
||||||
requestor_id = site_info['requestor_id']
|
resource = self._get_mvpd_resource(
|
||||||
resource = self._get_mvpd_resource(
|
requestor_id, title, video_id, None)
|
||||||
requestor_id, title, video_id, None)
|
auth = self._extract_mvpd_auth(
|
||||||
auth = self._extract_mvpd_auth(
|
url, video_id, requestor_id, resource)
|
||||||
url, video_id, requestor_id, resource)
|
data.update({
|
||||||
data.update({
|
'token': auth,
|
||||||
'token': auth,
|
'token_type': 'ap',
|
||||||
'token_type': 'ap',
|
'adobe_requestor_id': requestor_id,
|
||||||
'adobe_requestor_id': requestor_id,
|
})
|
||||||
})
|
else:
|
||||||
entitlement = self._download_json(
|
self._initialize_geo_bypass(['US'])
|
||||||
'https://api.entitlement.watchabc.go.com/vp2/ws-secure/entitlement/2020/authorize.json',
|
entitlement = self._download_json(
|
||||||
video_id, data=urlencode_postdata(data), headers=self.geo_verification_headers())
|
'https://api.entitlement.watchabc.go.com/vp2/ws-secure/entitlement/2020/authorize.json',
|
||||||
errors = entitlement.get('errors', {}).get('errors', [])
|
video_id, data=urlencode_postdata(data), headers=self.geo_verification_headers())
|
||||||
if errors:
|
errors = entitlement.get('errors', {}).get('errors', [])
|
||||||
error_message = ', '.join([error['message'] for error in errors])
|
if errors:
|
||||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True)
|
for error in errors:
|
||||||
asset_url += '?' + entitlement['uplynkData']['sessionKey']
|
if error.get('code') == 1002:
|
||||||
|
self.raise_geo_restricted(
|
||||||
|
error['message'], countries=['US'])
|
||||||
|
error_message = ', '.join([error['message'] for error in errors])
|
||||||
|
raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True)
|
||||||
|
asset_url += '?' + entitlement['uplynkData']['sessionKey']
|
||||||
formats.extend(self._extract_m3u8_formats(
|
formats.extend(self._extract_m3u8_formats(
|
||||||
asset_url, video_id, 'mp4', m3u8_id=format_id or 'hls', fatal=False))
|
asset_url, video_id, 'mp4', m3u8_id=format_id or 'hls', fatal=False))
|
||||||
else:
|
else:
|
||||||
formats.append({
|
f = {
|
||||||
'format_id': format_id,
|
'format_id': format_id,
|
||||||
'url': asset_url,
|
'url': asset_url,
|
||||||
'ext': ext,
|
'ext': ext,
|
||||||
})
|
}
|
||||||
|
if re.search(r'(?:/mp4/source/|_source\.mp4)', asset_url):
|
||||||
|
f.update({
|
||||||
|
'format_id': ('%s-' % format_id if format_id else '') + 'SOURCE',
|
||||||
|
'preference': 1,
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
mobj = re.search(r'/(\d+)x(\d+)/', asset_url)
|
||||||
|
if mobj:
|
||||||
|
height = int(mobj.group(2))
|
||||||
|
f.update({
|
||||||
|
'format_id': ('%s-' % format_id if format_id else '') + '%dP' % height,
|
||||||
|
'width': int(mobj.group(1)),
|
||||||
|
'height': height,
|
||||||
|
})
|
||||||
|
formats.append(f)
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
|
|||||||
92
youtube_dl/extractor/go90.py
Normal file
92
youtube_dl/extractor/go90.py
Normal file
@@ -0,0 +1,92 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
parse_iso8601,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class Go90IE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?go90\.com/videos/(?P<id>[0-9a-zA-Z]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://www.go90.com/videos/84BUqjLpf9D',
|
||||||
|
'md5': 'efa7670dbbbf21a7b07b360652b24a32',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '84BUqjLpf9D',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Inside The Utah Coalition Against Pornography Convention',
|
||||||
|
'description': 'VICE\'s Karley Sciortino meets with activists who discuss the state\'s strong anti-porn stance. Then, VICE Sports explains NFL contracts.',
|
||||||
|
'timestamp': 1491868800,
|
||||||
|
'upload_date': '20170411',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
video_data = self._download_json(
|
||||||
|
'https://www.go90.com/api/view/items/' + video_id,
|
||||||
|
video_id, headers={
|
||||||
|
'Content-Type': 'application/json; charset=utf-8',
|
||||||
|
}, data=b'{"client":"web","device_type":"pc"}')
|
||||||
|
title = video_data['title']
|
||||||
|
main_video_asset = video_data['main_video_asset']
|
||||||
|
|
||||||
|
thumbnails = []
|
||||||
|
formats = []
|
||||||
|
for asset in video_data.get('assets'):
|
||||||
|
if asset.get('id') == main_video_asset:
|
||||||
|
for source in asset.get('sources', []):
|
||||||
|
source_location = source.get('location')
|
||||||
|
if not source_location:
|
||||||
|
continue
|
||||||
|
source_type = source.get('type')
|
||||||
|
if source_type == 'hls':
|
||||||
|
m3u8_formats = self._extract_m3u8_formats(
|
||||||
|
source_location, video_id, 'mp4',
|
||||||
|
'm3u8_native', m3u8_id='hls', fatal=False)
|
||||||
|
for f in m3u8_formats:
|
||||||
|
mobj = re.search(r'/hls-(\d+)-(\d+)K', f['url'])
|
||||||
|
if mobj:
|
||||||
|
height, tbr = mobj.groups()
|
||||||
|
height = int_or_none(height)
|
||||||
|
f.update({
|
||||||
|
'height': f.get('height') or height,
|
||||||
|
'width': f.get('width') or int_or_none(height / 9.0 * 16.0 if height else None),
|
||||||
|
'tbr': f.get('tbr') or int_or_none(tbr),
|
||||||
|
})
|
||||||
|
formats.extend(m3u8_formats)
|
||||||
|
elif source_type == 'dash':
|
||||||
|
formats.extend(self._extract_mpd_formats(
|
||||||
|
source_location, video_id, mpd_id='dash', fatal=False))
|
||||||
|
else:
|
||||||
|
formats.append({
|
||||||
|
'format_id': source.get('name'),
|
||||||
|
'url': source_location,
|
||||||
|
'width': int_or_none(source.get('width')),
|
||||||
|
'height': int_or_none(source.get('height')),
|
||||||
|
'tbr': int_or_none(source.get('bitrate')),
|
||||||
|
})
|
||||||
|
elif asset.get('type') == 'image':
|
||||||
|
asset_location = asset.get('location')
|
||||||
|
if not asset_location:
|
||||||
|
continue
|
||||||
|
thumbnails.append({
|
||||||
|
'url': asset_location,
|
||||||
|
'width': int_or_none(asset.get('width')),
|
||||||
|
'height': int_or_none(asset.get('height')),
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'formats': formats,
|
||||||
|
'thumbnails': thumbnails,
|
||||||
|
'description': video_data.get('short_description'),
|
||||||
|
'like_count': int_or_none(video_data.get('like_count')),
|
||||||
|
'timestamp': parse_iso8601(video_data.get('released_at')),
|
||||||
|
}
|
||||||
@@ -4,6 +4,7 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
xpath_text,
|
xpath_text,
|
||||||
xpath_element,
|
xpath_element,
|
||||||
@@ -14,14 +15,26 @@ from ..utils import (
|
|||||||
|
|
||||||
class HBOBaseIE(InfoExtractor):
|
class HBOBaseIE(InfoExtractor):
|
||||||
_FORMATS_INFO = {
|
_FORMATS_INFO = {
|
||||||
|
'pro7': {
|
||||||
|
'width': 1280,
|
||||||
|
'height': 720,
|
||||||
|
},
|
||||||
'1920': {
|
'1920': {
|
||||||
'width': 1280,
|
'width': 1280,
|
||||||
'height': 720,
|
'height': 720,
|
||||||
},
|
},
|
||||||
|
'pro6': {
|
||||||
|
'width': 768,
|
||||||
|
'height': 432,
|
||||||
|
},
|
||||||
'640': {
|
'640': {
|
||||||
'width': 768,
|
'width': 768,
|
||||||
'height': 432,
|
'height': 432,
|
||||||
},
|
},
|
||||||
|
'pro5': {
|
||||||
|
'width': 640,
|
||||||
|
'height': 360,
|
||||||
|
},
|
||||||
'highwifi': {
|
'highwifi': {
|
||||||
'width': 640,
|
'width': 640,
|
||||||
'height': 360,
|
'height': 360,
|
||||||
@@ -78,6 +91,19 @@ class HBOBaseIE(InfoExtractor):
|
|||||||
formats.extend(self._extract_m3u8_formats(
|
formats.extend(self._extract_m3u8_formats(
|
||||||
video_url.replace('.tar', '/base_index_w8.m3u8'),
|
video_url.replace('.tar', '/base_index_w8.m3u8'),
|
||||||
video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
|
video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
|
||||||
|
elif source.tag == 'hls':
|
||||||
|
m3u8_formats = self._extract_m3u8_formats(
|
||||||
|
video_url.replace('.tar', '/base_index.m3u8'),
|
||||||
|
video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
|
||||||
|
for f in m3u8_formats:
|
||||||
|
if f.get('vcodec') == 'none' and not f.get('tbr'):
|
||||||
|
f['tbr'] = int_or_none(self._search_regex(
|
||||||
|
r'-(\d+)k/', f['url'], 'tbr', default=None))
|
||||||
|
formats.extend(m3u8_formats)
|
||||||
|
elif source.tag == 'dash':
|
||||||
|
formats.extend(self._extract_mpd_formats(
|
||||||
|
video_url.replace('.tar', '/manifest.mpd'),
|
||||||
|
video_id, mpd_id='dash', fatal=False))
|
||||||
else:
|
else:
|
||||||
format_info = self._FORMATS_INFO.get(source.tag, {})
|
format_info = self._FORMATS_INFO.get(source.tag, {})
|
||||||
formats.append({
|
formats.append({
|
||||||
@@ -86,7 +112,7 @@ class HBOBaseIE(InfoExtractor):
|
|||||||
'width': format_info.get('width'),
|
'width': format_info.get('width'),
|
||||||
'height': format_info.get('height'),
|
'height': format_info.get('height'),
|
||||||
})
|
})
|
||||||
self._sort_formats(formats, ('width', 'height', 'tbr', 'format_id'))
|
self._sort_formats(formats)
|
||||||
|
|
||||||
thumbnails = []
|
thumbnails = []
|
||||||
card_sizes = xpath_element(video_data, 'titleCardSizes')
|
card_sizes = xpath_element(video_data, 'titleCardSizes')
|
||||||
@@ -112,10 +138,11 @@ class HBOBaseIE(InfoExtractor):
|
|||||||
|
|
||||||
|
|
||||||
class HBOIE(HBOBaseIE):
|
class HBOIE(HBOBaseIE):
|
||||||
|
IE_NAME = 'hbo'
|
||||||
_VALID_URL = r'https?://(?:www\.)?hbo\.com/video/video\.html\?.*vid=(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://(?:www\.)?hbo\.com/video/video\.html\?.*vid=(?P<id>[0-9]+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.hbo.com/video/video.html?autoplay=true&g=u&vid=1437839',
|
'url': 'http://www.hbo.com/video/video.html?autoplay=true&g=u&vid=1437839',
|
||||||
'md5': '1c33253f0c7782142c993c0ba62a8753',
|
'md5': '2c6a6bc1222c7e91cb3334dad1746e5a',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1437839',
|
'id': '1437839',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@@ -131,11 +158,12 @@ class HBOIE(HBOBaseIE):
|
|||||||
|
|
||||||
|
|
||||||
class HBOEpisodeIE(HBOBaseIE):
|
class HBOEpisodeIE(HBOBaseIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?hbo\.com/(?!video)([^/]+/)+video/(?P<id>[0-9a-z-]+)\.html'
|
IE_NAME = 'hbo:episode'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?hbo\.com/(?P<path>(?!video)(?:(?:[^/]+/)+video|watch-free-episodes)/(?P<id>[0-9a-z-]+))(?:\.html)?'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.hbo.com/girls/episodes/5/52-i-love-you-baby/video/ep-52-inside-the-episode.html?autoplay=true',
|
'url': 'http://www.hbo.com/girls/episodes/5/52-i-love-you-baby/video/ep-52-inside-the-episode.html?autoplay=true',
|
||||||
'md5': '689132b253cc0ab7434237fc3a293210',
|
'md5': '61ead79b9c0dfa8d3d4b07ef4ac556fb',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1439518',
|
'id': '1439518',
|
||||||
'display_id': 'ep-52-inside-the-episode',
|
'display_id': 'ep-52-inside-the-episode',
|
||||||
@@ -147,16 +175,19 @@ class HBOEpisodeIE(HBOBaseIE):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'http://www.hbo.com/game-of-thrones/about/video/season-5-invitation-to-the-set.html?autoplay=true',
|
'url': 'http://www.hbo.com/game-of-thrones/about/video/season-5-invitation-to-the-set.html?autoplay=true',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.hbo.com/watch-free-episodes/last-week-tonight-with-john-oliver',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id = self._match_id(url)
|
path, display_id = re.match(self._VALID_URL, url).groups()
|
||||||
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
content = self._download_json(
|
||||||
|
'http://www.hbo.com/api/content/' + path, display_id)['content']
|
||||||
|
|
||||||
video_id = self._search_regex(
|
video_id = compat_str((content.get('parsed', {}).get(
|
||||||
r'(?P<q1>[\'"])videoId(?P=q1)\s*:\s*(?P<q2>[\'"])(?P<video_id>\d+)(?P=q2)',
|
'common:FullBleedVideo', {}) or content['selectedEpisode'])['videoId'])
|
||||||
webpage, 'video ID', group='video_id')
|
|
||||||
|
|
||||||
info_dict = self._extract_from_id(video_id)
|
info_dict = self._extract_from_id(video_id)
|
||||||
info_dict['display_id'] = display_id
|
info_dict['display_id'] = display_id
|
||||||
|
|||||||
@@ -6,59 +6,58 @@ from ..utils import (
|
|||||||
determine_ext,
|
determine_ext,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
|
xpath_text,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class HeiseIE(InfoExtractor):
|
class HeiseIE(InfoExtractor):
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'https?://(?:www\.)?heise\.de/(?:[^/]+/)+[^/]+-(?P<id>[0-9]+)\.html'
|
||||||
https?://(?:www\.)?heise\.de/video/artikel/
|
_TESTS = [{
|
||||||
.+?(?P<id>[0-9]+)\.html(?:$|[?#])
|
'url': 'http://www.heise.de/video/artikel/Podcast-c-t-uplink-3-3-Owncloud-Tastaturen-Peilsender-Smartphone-2404147.html',
|
||||||
'''
|
|
||||||
_TEST = {
|
|
||||||
'url': (
|
|
||||||
'http://www.heise.de/video/artikel/Podcast-c-t-uplink-3-3-Owncloud-Tastaturen-Peilsender-Smartphone-2404147.html'
|
|
||||||
),
|
|
||||||
'md5': 'ffed432483e922e88545ad9f2f15d30e',
|
'md5': 'ffed432483e922e88545ad9f2f15d30e',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '2404147',
|
'id': '2404147',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': (
|
'title': "Podcast: c't uplink 3.3 – Owncloud / Tastaturen / Peilsender Smartphone",
|
||||||
"Podcast: c't uplink 3.3 – Owncloud / Tastaturen / Peilsender Smartphone"
|
|
||||||
),
|
|
||||||
'format_id': 'mp4_720p',
|
'format_id': 'mp4_720p',
|
||||||
'timestamp': 1411812600,
|
'timestamp': 1411812600,
|
||||||
'upload_date': '20140927',
|
'upload_date': '20140927',
|
||||||
'description': 'In uplink-Episode 3.3 geht es darum, wie man sich von Cloud-Anbietern emanzipieren kann, worauf man beim Kauf einer Tastatur achten sollte und was Smartphones über uns verraten.',
|
'description': 'md5:c934cbfb326c669c2bcabcbe3d3fcd20',
|
||||||
'thumbnail': r're:^https?://.*\.jpe?g$',
|
'thumbnail': r're:^https?://.*/gallery/$',
|
||||||
}
|
}
|
||||||
}
|
}, {
|
||||||
|
'url': 'http://www.heise.de/ct/artikel/c-t-uplink-3-3-Owncloud-Tastaturen-Peilsender-Smartphone-2403911.html',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.heise.de/newsticker/meldung/c-t-uplink-Owncloud-Tastaturen-Peilsender-Smartphone-2404251.html?wt_mc=rss.ho.beitrag.atom',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.heise.de/ct/ausgabe/2016-12-Spiele-3214137.html',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
container_id = self._search_regex(
|
container_id = self._search_regex(
|
||||||
r'<div class="videoplayerjw".*?data-container="([0-9]+)"',
|
r'<div class="videoplayerjw"[^>]+data-container="([0-9]+)"',
|
||||||
webpage, 'container ID')
|
webpage, 'container ID')
|
||||||
sequenz_id = self._search_regex(
|
sequenz_id = self._search_regex(
|
||||||
r'<div class="videoplayerjw".*?data-sequenz="([0-9]+)"',
|
r'<div class="videoplayerjw"[^>]+data-sequenz="([0-9]+)"',
|
||||||
webpage, 'sequenz ID')
|
webpage, 'sequenz ID')
|
||||||
data_url = 'http://www.heise.de/videout/feed?container=%s&sequenz=%s' % (container_id, sequenz_id)
|
|
||||||
doc = self._download_xml(data_url, video_id)
|
|
||||||
|
|
||||||
info = {
|
title = self._html_search_meta('fulltitle', webpage, default=None)
|
||||||
'id': video_id,
|
if not title or title == "c't":
|
||||||
'thumbnail': self._og_search_thumbnail(webpage),
|
title = self._search_regex(
|
||||||
'timestamp': parse_iso8601(
|
r'<div[^>]+class="videoplayerjw"[^>]+data-title="([^"]+)"',
|
||||||
self._html_search_meta('date', webpage)),
|
webpage, 'title')
|
||||||
'description': self._og_search_description(webpage),
|
|
||||||
}
|
|
||||||
|
|
||||||
title = self._html_search_meta('fulltitle', webpage)
|
doc = self._download_xml(
|
||||||
if title:
|
'http://www.heise.de/videout/feed', video_id, query={
|
||||||
info['title'] = title
|
'container': container_id,
|
||||||
else:
|
'sequenz': sequenz_id,
|
||||||
info['title'] = self._og_search_title(webpage)
|
})
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for source_node in doc.findall('.//{http://rss.jwpcdn.com/}source'):
|
for source_node in doc.findall('.//{http://rss.jwpcdn.com/}source'):
|
||||||
@@ -74,6 +73,18 @@ class HeiseIE(InfoExtractor):
|
|||||||
'height': height,
|
'height': height,
|
||||||
})
|
})
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
info['formats'] = formats
|
|
||||||
|
|
||||||
return info
|
description = self._og_search_description(
|
||||||
|
webpage, default=None) or self._html_search_meta(
|
||||||
|
'description', webpage)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': (xpath_text(doc, './/{http://rss.jwpcdn.com/}image') or
|
||||||
|
self._og_search_thumbnail(webpage)),
|
||||||
|
'timestamp': parse_iso8601(
|
||||||
|
self._html_search_meta('date', webpage)),
|
||||||
|
'formats': formats,
|
||||||
|
}
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
get_element_by_attribute,
|
get_element_by_attribute,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
@@ -50,6 +51,33 @@ class InstagramIE(InfoExtractor):
|
|||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
# multi video post
|
||||||
|
'url': 'https://www.instagram.com/p/BQ0eAlwhDrw/',
|
||||||
|
'playlist': [{
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'BQ0dSaohpPW',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Video 1',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'BQ0dTpOhuHT',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Video 2',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'BQ0dT7RBFeF',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Video 3',
|
||||||
|
},
|
||||||
|
}],
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'BQ0eAlwhDrw',
|
||||||
|
'title': 'Post by instagram',
|
||||||
|
'description': 'md5:0f9203fc6a2ce4d228da5754bcf54957',
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://instagram.com/p/-Cmh1cukG2/',
|
'url': 'https://instagram.com/p/-Cmh1cukG2/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@@ -113,6 +141,32 @@ class InstagramIE(InfoExtractor):
|
|||||||
'timestamp': int_or_none(comment.get('created_at')),
|
'timestamp': int_or_none(comment.get('created_at')),
|
||||||
} for comment in media.get(
|
} for comment in media.get(
|
||||||
'comments', {}).get('nodes', []) if comment.get('text')]
|
'comments', {}).get('nodes', []) if comment.get('text')]
|
||||||
|
if not video_url:
|
||||||
|
edges = try_get(
|
||||||
|
media, lambda x: x['edge_sidecar_to_children']['edges'],
|
||||||
|
list) or []
|
||||||
|
if edges:
|
||||||
|
entries = []
|
||||||
|
for edge_num, edge in enumerate(edges, start=1):
|
||||||
|
node = try_get(edge, lambda x: x['node'], dict)
|
||||||
|
if not node:
|
||||||
|
continue
|
||||||
|
node_video_url = try_get(node, lambda x: x['video_url'], compat_str)
|
||||||
|
if not node_video_url:
|
||||||
|
continue
|
||||||
|
entries.append({
|
||||||
|
'id': node.get('shortcode') or node['id'],
|
||||||
|
'title': 'Video %d' % edge_num,
|
||||||
|
'url': node_video_url,
|
||||||
|
'thumbnail': node.get('display_url'),
|
||||||
|
'width': int_or_none(try_get(node, lambda x: x['dimensions']['width'])),
|
||||||
|
'height': int_or_none(try_get(node, lambda x: x['dimensions']['height'])),
|
||||||
|
'view_count': int_or_none(node.get('video_view_count')),
|
||||||
|
})
|
||||||
|
return self.playlist_result(
|
||||||
|
entries, video_id,
|
||||||
|
'Post by %s' % uploader_id if uploader_id else None,
|
||||||
|
description)
|
||||||
|
|
||||||
if not video_url:
|
if not video_url:
|
||||||
video_url = self._og_search_video_url(webpage, secure=False)
|
video_url = self._og_search_video_url(webpage, secure=False)
|
||||||
|
|||||||
@@ -8,12 +8,12 @@ from .common import InfoExtractor
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
js_to_json,
|
js_to_json,
|
||||||
sanitized_Request,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class IPrimaIE(InfoExtractor):
|
class IPrimaIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://play\.iprima\.cz/(?:.+/)?(?P<id>[^?#]+)'
|
_VALID_URL = r'https?://play\.iprima\.cz/(?:.+/)?(?P<id>[^?#]+)'
|
||||||
|
_GEO_BYPASS = False
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://play.iprima.cz/gondici-s-r-o-33',
|
'url': 'http://play.iprima.cz/gondici-s-r-o-33',
|
||||||
@@ -29,6 +29,10 @@ class IPrimaIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'http://play.iprima.cz/particka/particka-92',
|
'url': 'http://play.iprima.cz/particka/particka-92',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# geo restricted
|
||||||
|
'url': 'http://play.iprima.cz/closer-nove-pripady/closer-nove-pripady-iv-1',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@@ -38,11 +42,13 @@ class IPrimaIE(InfoExtractor):
|
|||||||
|
|
||||||
video_id = self._search_regex(r'data-product="([^"]+)">', webpage, 'real id')
|
video_id = self._search_regex(r'data-product="([^"]+)">', webpage, 'real id')
|
||||||
|
|
||||||
req = sanitized_Request(
|
playerpage = self._download_webpage(
|
||||||
'http://play.iprima.cz/prehravac/init?_infuse=1'
|
'http://play.iprima.cz/prehravac/init',
|
||||||
'&_ts=%s&productId=%s' % (round(time.time()), video_id))
|
video_id, note='Downloading player', query={
|
||||||
req.add_header('Referer', url)
|
'_infuse': 1,
|
||||||
playerpage = self._download_webpage(req, video_id, note='Downloading player')
|
'_ts': round(time.time()),
|
||||||
|
'productId': video_id,
|
||||||
|
}, headers={'Referer': url})
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
|
|
||||||
@@ -82,7 +88,7 @@ class IPrimaIE(InfoExtractor):
|
|||||||
extract_formats(src)
|
extract_formats(src)
|
||||||
|
|
||||||
if not formats and '>GEO_IP_NOT_ALLOWED<' in playerpage:
|
if not formats and '>GEO_IP_NOT_ALLOWED<' in playerpage:
|
||||||
self.raise_geo_restricted()
|
self.raise_geo_restricted(countries=['CZ'])
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
|||||||
@@ -24,6 +24,7 @@ from ..utils import (
|
|||||||
|
|
||||||
class ITVIE(InfoExtractor):
|
class ITVIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?itv\.com/hub/[^/]+/(?P<id>[0-9a-zA-Z]+)'
|
_VALID_URL = r'https?://(?:www\.)?itv\.com/hub/[^/]+/(?P<id>[0-9a-zA-Z]+)'
|
||||||
|
_GEO_COUNTRIES = ['GB']
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.itv.com/hub/mr-bean-animated-series/2a2936a0053',
|
'url': 'http://www.itv.com/hub/mr-bean-animated-series/2a2936a0053',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@@ -98,7 +99,11 @@ class ITVIE(InfoExtractor):
|
|||||||
headers=headers, data=etree.tostring(req_env))
|
headers=headers, data=etree.tostring(req_env))
|
||||||
playlist = xpath_element(resp_env, './/Playlist')
|
playlist = xpath_element(resp_env, './/Playlist')
|
||||||
if playlist is None:
|
if playlist is None:
|
||||||
|
fault_code = xpath_text(resp_env, './/faultcode')
|
||||||
fault_string = xpath_text(resp_env, './/faultstring')
|
fault_string = xpath_text(resp_env, './/faultstring')
|
||||||
|
if fault_code == 'InvalidGeoRegion':
|
||||||
|
self.raise_geo_restricted(
|
||||||
|
msg=fault_string, countries=self._GEO_COUNTRIES)
|
||||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, fault_string))
|
raise ExtractorError('%s said: %s' % (self.IE_NAME, fault_string))
|
||||||
title = xpath_text(playlist, 'EpisodeTitle', fatal=True)
|
title = xpath_text(playlist, 'EpisodeTitle', fatal=True)
|
||||||
video_element = xpath_element(playlist, 'VideoEntries/Video', fatal=True)
|
video_element = xpath_element(playlist, 'VideoEntries/Video', fatal=True)
|
||||||
|
|||||||
@@ -16,6 +16,8 @@ class IviIE(InfoExtractor):
|
|||||||
IE_DESC = 'ivi.ru'
|
IE_DESC = 'ivi.ru'
|
||||||
IE_NAME = 'ivi'
|
IE_NAME = 'ivi'
|
||||||
_VALID_URL = r'https?://(?:www\.)?ivi\.ru/(?:watch/(?:[^/]+/)?|video/player\?.*?videoId=)(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:www\.)?ivi\.ru/(?:watch/(?:[^/]+/)?|video/player\?.*?videoId=)(?P<id>\d+)'
|
||||||
|
_GEO_BYPASS = False
|
||||||
|
_GEO_COUNTRIES = ['RU']
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
# Single movie
|
# Single movie
|
||||||
@@ -91,7 +93,11 @@ class IviIE(InfoExtractor):
|
|||||||
|
|
||||||
if 'error' in video_json:
|
if 'error' in video_json:
|
||||||
error = video_json['error']
|
error = video_json['error']
|
||||||
if error['origin'] == 'NoRedisValidData':
|
origin = error['origin']
|
||||||
|
if origin == 'NotAllowedForLocation':
|
||||||
|
self.raise_geo_restricted(
|
||||||
|
msg=error['message'], countries=self._GEO_COUNTRIES)
|
||||||
|
elif origin == 'NoRedisValidData':
|
||||||
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
|
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
|
||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
'Unable to download video %s: %s' % (video_id, error['message']),
|
'Unable to download video %s: %s' % (video_id, error['message']),
|
||||||
|
|||||||
@@ -4,139 +4,9 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_urlparse
|
|
||||||
from ..utils import (
|
|
||||||
determine_ext,
|
|
||||||
float_or_none,
|
|
||||||
int_or_none,
|
|
||||||
js_to_json,
|
|
||||||
mimetype2ext,
|
|
||||||
urljoin,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class JWPlatformBaseIE(InfoExtractor):
|
class JWPlatformIE(InfoExtractor):
|
||||||
@staticmethod
|
|
||||||
def _find_jwplayer_data(webpage):
|
|
||||||
# TODO: Merge this with JWPlayer-related codes in generic.py
|
|
||||||
|
|
||||||
mobj = re.search(
|
|
||||||
r'jwplayer\((?P<quote>[\'"])[^\'" ]+(?P=quote)\)\.setup\s*\((?P<options>[^)]+)\)',
|
|
||||||
webpage)
|
|
||||||
if mobj:
|
|
||||||
return mobj.group('options')
|
|
||||||
|
|
||||||
def _extract_jwplayer_data(self, webpage, video_id, *args, **kwargs):
|
|
||||||
jwplayer_data = self._parse_json(
|
|
||||||
self._find_jwplayer_data(webpage), video_id,
|
|
||||||
transform_source=js_to_json)
|
|
||||||
return self._parse_jwplayer_data(
|
|
||||||
jwplayer_data, video_id, *args, **kwargs)
|
|
||||||
|
|
||||||
def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True,
|
|
||||||
m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None):
|
|
||||||
# JWPlayer backward compatibility: flattened playlists
|
|
||||||
# https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/api/config.js#L81-L96
|
|
||||||
if 'playlist' not in jwplayer_data:
|
|
||||||
jwplayer_data = {'playlist': [jwplayer_data]}
|
|
||||||
|
|
||||||
entries = []
|
|
||||||
|
|
||||||
# JWPlayer backward compatibility: single playlist item
|
|
||||||
# https://github.com/jwplayer/jwplayer/blob/v7.7.0/src/js/playlist/playlist.js#L10
|
|
||||||
if not isinstance(jwplayer_data['playlist'], list):
|
|
||||||
jwplayer_data['playlist'] = [jwplayer_data['playlist']]
|
|
||||||
|
|
||||||
for video_data in jwplayer_data['playlist']:
|
|
||||||
# JWPlayer backward compatibility: flattened sources
|
|
||||||
# https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/playlist/item.js#L29-L35
|
|
||||||
if 'sources' not in video_data:
|
|
||||||
video_data['sources'] = [video_data]
|
|
||||||
|
|
||||||
this_video_id = video_id or video_data['mediaid']
|
|
||||||
|
|
||||||
formats = []
|
|
||||||
for source in video_data['sources']:
|
|
||||||
source_url = self._proto_relative_url(source['file'])
|
|
||||||
if base_url:
|
|
||||||
source_url = compat_urlparse.urljoin(base_url, source_url)
|
|
||||||
source_type = source.get('type') or ''
|
|
||||||
ext = mimetype2ext(source_type) or determine_ext(source_url)
|
|
||||||
if source_type == 'hls' or ext == 'm3u8':
|
|
||||||
formats.extend(self._extract_m3u8_formats(
|
|
||||||
source_url, this_video_id, 'mp4', 'm3u8_native', m3u8_id=m3u8_id, fatal=False))
|
|
||||||
elif ext == 'mpd':
|
|
||||||
formats.extend(self._extract_mpd_formats(
|
|
||||||
source_url, this_video_id, mpd_id=mpd_id, fatal=False))
|
|
||||||
# https://github.com/jwplayer/jwplayer/blob/master/src/js/providers/default.js#L67
|
|
||||||
elif source_type.startswith('audio') or ext in ('oga', 'aac', 'mp3', 'mpeg', 'vorbis'):
|
|
||||||
formats.append({
|
|
||||||
'url': source_url,
|
|
||||||
'vcodec': 'none',
|
|
||||||
'ext': ext,
|
|
||||||
})
|
|
||||||
else:
|
|
||||||
height = int_or_none(source.get('height'))
|
|
||||||
if height is None:
|
|
||||||
# Often no height is provided but there is a label in
|
|
||||||
# format like 1080p.
|
|
||||||
height = int_or_none(self._search_regex(
|
|
||||||
r'^(\d{3,})[pP]$', source.get('label') or '',
|
|
||||||
'height', default=None))
|
|
||||||
a_format = {
|
|
||||||
'url': source_url,
|
|
||||||
'width': int_or_none(source.get('width')),
|
|
||||||
'height': height,
|
|
||||||
'ext': ext,
|
|
||||||
}
|
|
||||||
if source_url.startswith('rtmp'):
|
|
||||||
a_format['ext'] = 'flv'
|
|
||||||
|
|
||||||
# See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as
|
|
||||||
# of jwplayer.flash.swf
|
|
||||||
rtmp_url_parts = re.split(
|
|
||||||
r'((?:mp4|mp3|flv):)', source_url, 1)
|
|
||||||
if len(rtmp_url_parts) == 3:
|
|
||||||
rtmp_url, prefix, play_path = rtmp_url_parts
|
|
||||||
a_format.update({
|
|
||||||
'url': rtmp_url,
|
|
||||||
'play_path': prefix + play_path,
|
|
||||||
})
|
|
||||||
if rtmp_params:
|
|
||||||
a_format.update(rtmp_params)
|
|
||||||
formats.append(a_format)
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
subtitles = {}
|
|
||||||
tracks = video_data.get('tracks')
|
|
||||||
if tracks and isinstance(tracks, list):
|
|
||||||
for track in tracks:
|
|
||||||
if track.get('kind') != 'captions':
|
|
||||||
continue
|
|
||||||
track_url = urljoin(base_url, track.get('file'))
|
|
||||||
if not track_url:
|
|
||||||
continue
|
|
||||||
subtitles.setdefault(track.get('label') or 'en', []).append({
|
|
||||||
'url': self._proto_relative_url(track_url)
|
|
||||||
})
|
|
||||||
|
|
||||||
entries.append({
|
|
||||||
'id': this_video_id,
|
|
||||||
'title': video_data['title'] if require_title else video_data.get('title'),
|
|
||||||
'description': video_data.get('description'),
|
|
||||||
'thumbnail': self._proto_relative_url(video_data.get('image')),
|
|
||||||
'timestamp': int_or_none(video_data.get('pubdate')),
|
|
||||||
'duration': float_or_none(jwplayer_data.get('duration') or video_data.get('duration')),
|
|
||||||
'subtitles': subtitles,
|
|
||||||
'formats': formats,
|
|
||||||
})
|
|
||||||
if len(entries) == 1:
|
|
||||||
return entries[0]
|
|
||||||
else:
|
|
||||||
return self.playlist_result(entries)
|
|
||||||
|
|
||||||
|
|
||||||
class JWPlatformIE(JWPlatformBaseIE):
|
|
||||||
_VALID_URL = r'(?:https?://content\.jwplatform\.com/(?:feeds|players|jw6)/|jwplatform:)(?P<id>[a-zA-Z0-9]{8})'
|
_VALID_URL = r'(?:https?://content\.jwplatform\.com/(?:feeds|players|jw6)/|jwplatform:)(?P<id>[a-zA-Z0-9]{8})'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://content.jwplatform.com/players/nPripu9l-ALJ3XQCI.js',
|
'url': 'http://content.jwplatform.com/players/nPripu9l-ALJ3XQCI.js',
|
||||||
|
|||||||
@@ -91,6 +91,7 @@ class KalturaIE(InfoExtractor):
|
|||||||
}],
|
}],
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
'skip': 'Gone. Maybe https://www.safaribooksonline.com/library/tutorials/introduction-to-python-anon/3469/',
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
@@ -107,27 +108,37 @@ class KalturaIE(InfoExtractor):
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _extract_url(webpage):
|
def _extract_url(webpage):
|
||||||
|
# Embed codes: https://knowledge.kaltura.com/embedding-kaltura-media-players-your-site
|
||||||
mobj = (
|
mobj = (
|
||||||
re.search(
|
re.search(
|
||||||
r"""(?xs)
|
r"""(?xs)
|
||||||
kWidget\.(?:thumb)?[Ee]mbed\(
|
kWidget\.(?:thumb)?[Ee]mbed\(
|
||||||
\{.*?
|
\{.*?
|
||||||
(?P<q1>['\"])wid(?P=q1)\s*:\s*
|
(?P<q1>['"])wid(?P=q1)\s*:\s*
|
||||||
(?P<q2>['\"])_?(?P<partner_id>(?:(?!(?P=q2)).)+)(?P=q2),.*?
|
(?P<q2>['"])_?(?P<partner_id>(?:(?!(?P=q2)).)+)(?P=q2),.*?
|
||||||
(?P<q3>['\"])entry_?[Ii]d(?P=q3)\s*:\s*
|
(?P<q3>['"])entry_?[Ii]d(?P=q3)\s*:\s*
|
||||||
(?P<q4>['\"])(?P<id>(?:(?!(?P=q4)).)+)(?P=q4)(?:,|\s*\})
|
(?P<q4>['"])(?P<id>(?:(?!(?P=q4)).)+)(?P=q4)(?:,|\s*\})
|
||||||
""", webpage) or
|
""", webpage) or
|
||||||
re.search(
|
re.search(
|
||||||
r'''(?xs)
|
r'''(?xs)
|
||||||
(?P<q1>["\'])
|
(?P<q1>["'])
|
||||||
(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com(?::\d+)?/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)(?:(?!(?P=q1)).)*
|
(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com(?::\d+)?/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)(?:(?!(?P=q1)).)*
|
||||||
(?P=q1).*?
|
(?P=q1).*?
|
||||||
(?:
|
(?:
|
||||||
entry_?[Ii]d|
|
entry_?[Ii]d|
|
||||||
(?P<q2>["\'])entry_?[Ii]d(?P=q2)
|
(?P<q2>["'])entry_?[Ii]d(?P=q2)
|
||||||
)\s*:\s*
|
)\s*:\s*
|
||||||
(?P<q3>["\'])(?P<id>(?:(?!(?P=q3)).)+)(?P=q3)
|
(?P<q3>["'])(?P<id>(?:(?!(?P=q3)).)+)(?P=q3)
|
||||||
''', webpage))
|
''', webpage) or
|
||||||
|
re.search(
|
||||||
|
r'''(?xs)
|
||||||
|
<iframe[^>]+src=(?P<q1>["'])
|
||||||
|
(?:https?:)?//(?:www\.)?kaltura\.com/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)
|
||||||
|
(?:(?!(?P=q1)).)*
|
||||||
|
[?&]entry_id=(?P<id>(?:(?!(?P=q1))[^&])+)
|
||||||
|
(?P=q1)
|
||||||
|
''', webpage)
|
||||||
|
)
|
||||||
if mobj:
|
if mobj:
|
||||||
embed_info = mobj.groupdict()
|
embed_info = mobj.groupdict()
|
||||||
url = 'kaltura:%(partner_id)s:%(id)s' % embed_info
|
url = 'kaltura:%(partner_id)s:%(id)s' % embed_info
|
||||||
|
|||||||
@@ -30,7 +30,7 @@ from ..utils import (
|
|||||||
class LeIE(InfoExtractor):
|
class LeIE(InfoExtractor):
|
||||||
IE_DESC = '乐视网'
|
IE_DESC = '乐视网'
|
||||||
_VALID_URL = r'https?://(?:www\.le\.com/ptv/vplay|(?:sports\.le|(?:www\.)?lesports)\.com/(?:match|video))/(?P<id>\d+)\.html'
|
_VALID_URL = r'https?://(?:www\.le\.com/ptv/vplay|(?:sports\.le|(?:www\.)?lesports)\.com/(?:match|video))/(?P<id>\d+)\.html'
|
||||||
|
_GEO_COUNTRIES = ['CN']
|
||||||
_URL_TEMPLATE = 'http://www.le.com/ptv/vplay/%s.html'
|
_URL_TEMPLATE = 'http://www.le.com/ptv/vplay/%s.html'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
@@ -126,10 +126,9 @@ class LeIE(InfoExtractor):
|
|||||||
if playstatus['status'] == 0:
|
if playstatus['status'] == 0:
|
||||||
flag = playstatus['flag']
|
flag = playstatus['flag']
|
||||||
if flag == 1:
|
if flag == 1:
|
||||||
msg = 'Country %s auth error' % playstatus['country']
|
self.raise_geo_restricted()
|
||||||
else:
|
else:
|
||||||
msg = 'Generic error. flag = %d' % flag
|
raise ExtractorError('Generic error. flag = %d' % flag, expected=True)
|
||||||
raise ExtractorError(msg, expected=True)
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
media_id = self._match_id(url)
|
media_id = self._match_id(url)
|
||||||
|
|||||||
@@ -4,11 +4,13 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_HTTPError
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
unsmuggle_url,
|
unsmuggle_url,
|
||||||
|
ExtractorError,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -20,9 +22,17 @@ class LimelightBaseIE(InfoExtractor):
|
|||||||
headers = {}
|
headers = {}
|
||||||
if referer:
|
if referer:
|
||||||
headers['Referer'] = referer
|
headers['Referer'] = referer
|
||||||
return self._download_json(
|
try:
|
||||||
self._PLAYLIST_SERVICE_URL % (self._PLAYLIST_SERVICE_PATH, item_id, method),
|
return self._download_json(
|
||||||
item_id, 'Downloading PlaylistService %s JSON' % method, fatal=fatal, headers=headers)
|
self._PLAYLIST_SERVICE_URL % (self._PLAYLIST_SERVICE_PATH, item_id, method),
|
||||||
|
item_id, 'Downloading PlaylistService %s JSON' % method, fatal=fatal, headers=headers)
|
||||||
|
except ExtractorError as e:
|
||||||
|
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||||
|
error = self._parse_json(e.cause.read().decode(), item_id)['detail']['contentAccessPermission']
|
||||||
|
if error == 'CountryDisabled':
|
||||||
|
self.raise_geo_restricted()
|
||||||
|
raise ExtractorError(error, expected=True)
|
||||||
|
raise
|
||||||
|
|
||||||
def _call_api(self, organization_id, item_id, method):
|
def _call_api(self, organization_id, item_id, method):
|
||||||
return self._download_json(
|
return self._download_json(
|
||||||
@@ -52,13 +62,21 @@ class LimelightBaseIE(InfoExtractor):
|
|||||||
fmt = {
|
fmt = {
|
||||||
'url': stream_url,
|
'url': stream_url,
|
||||||
'abr': float_or_none(stream.get('audioBitRate')),
|
'abr': float_or_none(stream.get('audioBitRate')),
|
||||||
'vbr': float_or_none(stream.get('videoBitRate')),
|
|
||||||
'fps': float_or_none(stream.get('videoFrameRate')),
|
'fps': float_or_none(stream.get('videoFrameRate')),
|
||||||
'width': int_or_none(stream.get('videoWidthInPixels')),
|
|
||||||
'height': int_or_none(stream.get('videoHeightInPixels')),
|
|
||||||
'ext': ext,
|
'ext': ext,
|
||||||
}
|
}
|
||||||
rtmp = re.search(r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+))/(?P<playpath>mp4:.+)$', stream_url)
|
width = int_or_none(stream.get('videoWidthInPixels'))
|
||||||
|
height = int_or_none(stream.get('videoHeightInPixels'))
|
||||||
|
vbr = float_or_none(stream.get('videoBitRate'))
|
||||||
|
if width or height or vbr:
|
||||||
|
fmt.update({
|
||||||
|
'width': width,
|
||||||
|
'height': height,
|
||||||
|
'vbr': vbr,
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
fmt['vcodec'] = 'none'
|
||||||
|
rtmp = re.search(r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+))/(?P<playpath>mp[34]:.+)$', stream_url)
|
||||||
if rtmp:
|
if rtmp:
|
||||||
format_id = 'rtmp'
|
format_id = 'rtmp'
|
||||||
if stream.get('videoBitRate'):
|
if stream.get('videoBitRate'):
|
||||||
@@ -213,6 +231,7 @@ class LimelightMediaIE(LimelightBaseIE):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
url, smuggled_data = unsmuggle_url(url, {})
|
url, smuggled_data = unsmuggle_url(url, {})
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
self._initialize_geo_bypass(smuggled_data.get('geo_countries'))
|
||||||
|
|
||||||
pc, mobile, metadata = self._extract(
|
pc, mobile, metadata = self._extract(
|
||||||
video_id, 'getPlaylistByMediaId',
|
video_id, 'getPlaylistByMediaId',
|
||||||
|
|||||||
@@ -119,7 +119,8 @@ class LivestreamIE(InfoExtractor):
|
|||||||
m3u8_url = video_data.get('m3u8_url')
|
m3u8_url = video_data.get('m3u8_url')
|
||||||
if m3u8_url:
|
if m3u8_url:
|
||||||
formats.extend(self._extract_m3u8_formats(
|
formats.extend(self._extract_m3u8_formats(
|
||||||
m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
|
m3u8_url, video_id, 'mp4', 'm3u8_native',
|
||||||
|
m3u8_id='hls', fatal=False))
|
||||||
|
|
||||||
f4m_url = video_data.get('f4m_url')
|
f4m_url = video_data.get('f4m_url')
|
||||||
if f4m_url:
|
if f4m_url:
|
||||||
@@ -158,11 +159,11 @@ class LivestreamIE(InfoExtractor):
|
|||||||
if smil_url:
|
if smil_url:
|
||||||
formats.extend(self._extract_smil_formats(smil_url, broadcast_id))
|
formats.extend(self._extract_smil_formats(smil_url, broadcast_id))
|
||||||
|
|
||||||
entry_protocol = 'm3u8' if is_live else 'm3u8_native'
|
|
||||||
m3u8_url = stream_info.get('m3u8_url')
|
m3u8_url = stream_info.get('m3u8_url')
|
||||||
if m3u8_url:
|
if m3u8_url:
|
||||||
formats.extend(self._extract_m3u8_formats(
|
formats.extend(self._extract_m3u8_formats(
|
||||||
m3u8_url, broadcast_id, 'mp4', entry_protocol, m3u8_id='hls', fatal=False))
|
m3u8_url, broadcast_id, 'mp4', 'm3u8_native',
|
||||||
|
m3u8_id='hls', fatal=False))
|
||||||
|
|
||||||
rtsp_url = stream_info.get('rtsp_url')
|
rtsp_url = stream_info.get('rtsp_url')
|
||||||
if rtsp_url:
|
if rtsp_url:
|
||||||
@@ -276,7 +277,7 @@ class LivestreamOriginalIE(InfoExtractor):
|
|||||||
'view_count': view_count,
|
'view_count': view_count,
|
||||||
}
|
}
|
||||||
|
|
||||||
def _extract_video_formats(self, video_data, video_id, entry_protocol):
|
def _extract_video_formats(self, video_data, video_id):
|
||||||
formats = []
|
formats = []
|
||||||
|
|
||||||
progressive_url = video_data.get('progressiveUrl')
|
progressive_url = video_data.get('progressiveUrl')
|
||||||
@@ -289,7 +290,8 @@ class LivestreamOriginalIE(InfoExtractor):
|
|||||||
m3u8_url = video_data.get('httpUrl')
|
m3u8_url = video_data.get('httpUrl')
|
||||||
if m3u8_url:
|
if m3u8_url:
|
||||||
formats.extend(self._extract_m3u8_formats(
|
formats.extend(self._extract_m3u8_formats(
|
||||||
m3u8_url, video_id, 'mp4', entry_protocol, m3u8_id='hls', fatal=False))
|
m3u8_url, video_id, 'mp4', 'm3u8_native',
|
||||||
|
m3u8_id='hls', fatal=False))
|
||||||
|
|
||||||
rtsp_url = video_data.get('rtspUrl')
|
rtsp_url = video_data.get('rtspUrl')
|
||||||
if rtsp_url:
|
if rtsp_url:
|
||||||
@@ -340,11 +342,10 @@ class LivestreamOriginalIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
video_data = self._download_json(stream_url, content_id)
|
video_data = self._download_json(stream_url, content_id)
|
||||||
is_live = video_data.get('isLive')
|
is_live = video_data.get('isLive')
|
||||||
entry_protocol = 'm3u8' if is_live else 'm3u8_native'
|
|
||||||
info.update({
|
info.update({
|
||||||
'id': content_id,
|
'id': content_id,
|
||||||
'title': self._live_title(info['title']) if is_live else info['title'],
|
'title': self._live_title(info['title']) if is_live else info['title'],
|
||||||
'formats': self._extract_video_formats(video_data, content_id, entry_protocol),
|
'formats': self._extract_video_formats(video_data, content_id),
|
||||||
'is_live': is_live,
|
'is_live': is_live,
|
||||||
})
|
})
|
||||||
return info
|
return info
|
||||||
|
|||||||
@@ -260,9 +260,24 @@ class LyndaCourseIE(LyndaBaseIE):
|
|||||||
course_path = mobj.group('coursepath')
|
course_path = mobj.group('coursepath')
|
||||||
course_id = mobj.group('courseid')
|
course_id = mobj.group('courseid')
|
||||||
|
|
||||||
|
item_template = 'https://www.lynda.com/%s/%%s-4.html' % course_path
|
||||||
|
|
||||||
course = self._download_json(
|
course = self._download_json(
|
||||||
'https://www.lynda.com/ajax/player?courseId=%s&type=course' % course_id,
|
'https://www.lynda.com/ajax/player?courseId=%s&type=course' % course_id,
|
||||||
course_id, 'Downloading course JSON')
|
course_id, 'Downloading course JSON', fatal=False)
|
||||||
|
|
||||||
|
if not course:
|
||||||
|
webpage = self._download_webpage(url, course_id)
|
||||||
|
entries = [
|
||||||
|
self.url_result(
|
||||||
|
item_template % video_id, ie=LyndaIE.ie_key(),
|
||||||
|
video_id=video_id)
|
||||||
|
for video_id in re.findall(
|
||||||
|
r'data-video-id=["\'](\d+)', webpage)]
|
||||||
|
return self.playlist_result(
|
||||||
|
entries, course_id,
|
||||||
|
self._og_search_title(webpage, fatal=False),
|
||||||
|
self._og_search_description(webpage))
|
||||||
|
|
||||||
if course.get('Status') == 'NotFound':
|
if course.get('Status') == 'NotFound':
|
||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
@@ -283,7 +298,7 @@ class LyndaCourseIE(LyndaBaseIE):
|
|||||||
if video_id:
|
if video_id:
|
||||||
entries.append({
|
entries.append({
|
||||||
'_type': 'url_transparent',
|
'_type': 'url_transparent',
|
||||||
'url': 'https://www.lynda.com/%s/%s-4.html' % (course_path, video_id),
|
'url': item_template % video_id,
|
||||||
'ie_key': LyndaIE.ie_key(),
|
'ie_key': LyndaIE.ie_key(),
|
||||||
'chapter': chapter.get('Title'),
|
'chapter': chapter.get('Title'),
|
||||||
'chapter_number': int_or_none(chapter.get('ChapterIndex')),
|
'chapter_number': int_or_none(chapter.get('ChapterIndex')),
|
||||||
|
|||||||
@@ -14,7 +14,7 @@ from ..utils import (
|
|||||||
|
|
||||||
class MDRIE(InfoExtractor):
|
class MDRIE(InfoExtractor):
|
||||||
IE_DESC = 'MDR.DE and KiKA'
|
IE_DESC = 'MDR.DE and KiKA'
|
||||||
_VALID_URL = r'https?://(?:www\.)?(?:mdr|kika)\.de/(?:.*)/[a-z]+-?(?P<id>\d+)(?:_.+?)?\.html'
|
_VALID_URL = r'https?://(?:www\.)?(?:mdr|kika)\.de/(?:.*)/[a-z-]+-?(?P<id>\d+)(?:_.+?)?\.html'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# MDR regularly deletes its videos
|
# MDR regularly deletes its videos
|
||||||
@@ -31,6 +31,7 @@ class MDRIE(InfoExtractor):
|
|||||||
'duration': 250,
|
'duration': 250,
|
||||||
'uploader': 'MITTELDEUTSCHER RUNDFUNK',
|
'uploader': 'MITTELDEUTSCHER RUNDFUNK',
|
||||||
},
|
},
|
||||||
|
'skip': '404 not found',
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.kika.de/baumhaus/videos/video19636.html',
|
'url': 'http://www.kika.de/baumhaus/videos/video19636.html',
|
||||||
'md5': '4930515e36b06c111213e80d1e4aad0e',
|
'md5': '4930515e36b06c111213e80d1e4aad0e',
|
||||||
@@ -41,6 +42,7 @@ class MDRIE(InfoExtractor):
|
|||||||
'duration': 134,
|
'duration': 134,
|
||||||
'uploader': 'KIKA',
|
'uploader': 'KIKA',
|
||||||
},
|
},
|
||||||
|
'skip': '404 not found',
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.kika.de/sendungen/einzelsendungen/weihnachtsprogramm/videos/video8182.html',
|
'url': 'http://www.kika.de/sendungen/einzelsendungen/weihnachtsprogramm/videos/video8182.html',
|
||||||
'md5': '5fe9c4dd7d71e3b238f04b8fdd588357',
|
'md5': '5fe9c4dd7d71e3b238f04b8fdd588357',
|
||||||
@@ -49,11 +51,21 @@ class MDRIE(InfoExtractor):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Beutolomäus und der geheime Weihnachtswunsch',
|
'title': 'Beutolomäus und der geheime Weihnachtswunsch',
|
||||||
'description': 'md5:b69d32d7b2c55cbe86945ab309d39bbd',
|
'description': 'md5:b69d32d7b2c55cbe86945ab309d39bbd',
|
||||||
'timestamp': 1450950000,
|
'timestamp': 1482541200,
|
||||||
'upload_date': '20151224',
|
'upload_date': '20161224',
|
||||||
'duration': 4628,
|
'duration': 4628,
|
||||||
'uploader': 'KIKA',
|
'uploader': 'KIKA',
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
# audio with alternative playerURL pattern
|
||||||
|
'url': 'http://www.mdr.de/kultur/videos-und-audios/audio-radio/operation-mindfuck-robert-wilson100.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '100',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Feature: Operation Mindfuck - Robert Anton Wilson',
|
||||||
|
'duration': 3239,
|
||||||
|
'uploader': 'MITTELDEUTSCHER RUNDFUNK',
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.kika.de/baumhaus/sendungen/video19636_zc-fea7f8a0_zs-4bf89c60.html',
|
'url': 'http://www.kika.de/baumhaus/sendungen/video19636_zc-fea7f8a0_zs-4bf89c60.html',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@@ -71,7 +83,7 @@ class MDRIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
data_url = self._search_regex(
|
data_url = self._search_regex(
|
||||||
r'(?:dataURL|playerXml(?:["\'])?)\s*:\s*(["\'])(?P<url>.+/(?:video|audio)-?[0-9]+-avCustom\.xml)\1',
|
r'(?:dataURL|playerXml(?:["\'])?)\s*:\s*(["\'])(?P<url>.+?-avCustom\.xml)\1',
|
||||||
webpage, 'data url', group='url').replace(r'\/', '/')
|
webpage, 'data url', group='url').replace(r'\/', '/')
|
||||||
|
|
||||||
doc = self._download_xml(
|
doc = self._download_xml(
|
||||||
|
|||||||
259
youtube_dl/extractor/medialaan.py
Normal file
259
youtube_dl/extractor/medialaan.py
Normal file
@@ -0,0 +1,259 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
int_or_none,
|
||||||
|
parse_duration,
|
||||||
|
try_get,
|
||||||
|
unified_timestamp,
|
||||||
|
urlencode_postdata,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class MedialaanIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'''(?x)
|
||||||
|
https?://
|
||||||
|
(?:www\.)?
|
||||||
|
(?:
|
||||||
|
(?P<site_id>vtm|q2|vtmkzoom)\.be/
|
||||||
|
(?:
|
||||||
|
video(?:/[^/]+/id/|/?\?.*?\baid=)|
|
||||||
|
(?:[^/]+/)*
|
||||||
|
)
|
||||||
|
)
|
||||||
|
(?P<id>[^/?#&]+)
|
||||||
|
'''
|
||||||
|
_NETRC_MACHINE = 'medialaan'
|
||||||
|
_APIKEY = '3_HZ0FtkMW_gOyKlqQzW5_0FHRC7Nd5XpXJZcDdXY4pk5eES2ZWmejRW5egwVm4ug-'
|
||||||
|
_SITE_TO_APP_ID = {
|
||||||
|
'vtm': 'vtm_watch',
|
||||||
|
'q2': 'q2',
|
||||||
|
'vtmkzoom': 'vtmkzoom',
|
||||||
|
}
|
||||||
|
_TESTS = [{
|
||||||
|
# vod
|
||||||
|
'url': 'http://vtm.be/video/volledige-afleveringen/id/vtm_20170219_VM0678361_vtmwatch',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'vtm_20170219_VM0678361_vtmwatch',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Allemaal Chris afl. 6',
|
||||||
|
'description': 'md5:4be86427521e7b07e0adb0c9c554ddb2',
|
||||||
|
'timestamp': 1487533280,
|
||||||
|
'upload_date': '20170219',
|
||||||
|
'duration': 2562,
|
||||||
|
'series': 'Allemaal Chris',
|
||||||
|
'season': 'Allemaal Chris',
|
||||||
|
'season_number': 1,
|
||||||
|
'season_id': '256936078124527',
|
||||||
|
'episode': 'Allemaal Chris afl. 6',
|
||||||
|
'episode_number': 6,
|
||||||
|
'episode_id': '256936078591527',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
'skip': 'Requires account credentials',
|
||||||
|
}, {
|
||||||
|
# clip
|
||||||
|
'url': 'http://vtm.be/video?aid=168332',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '168332',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '"Veronique liegt!"',
|
||||||
|
'description': 'md5:1385e2b743923afe54ba4adc38476155',
|
||||||
|
'timestamp': 1489002029,
|
||||||
|
'upload_date': '20170308',
|
||||||
|
'duration': 96,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# vod
|
||||||
|
'url': 'http://vtm.be/video/volledige-afleveringen/id/257107153551000',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# vod
|
||||||
|
'url': 'http://vtm.be/video?aid=163157',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# vod
|
||||||
|
'url': 'http://www.q2.be/video/volledige-afleveringen/id/2be_20170301_VM0684442_q2',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# clip
|
||||||
|
'url': 'http://vtmkzoom.be/k3-dansstudio/een-nieuw-seizoen-van-k3-dansstudio',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_initialize(self):
|
||||||
|
self._logged_in = False
|
||||||
|
|
||||||
|
def _login(self):
|
||||||
|
username, password = self._get_login_info()
|
||||||
|
if username is None:
|
||||||
|
self.raise_login_required()
|
||||||
|
|
||||||
|
auth_data = {
|
||||||
|
'APIKey': self._APIKEY,
|
||||||
|
'sdk': 'js_6.1',
|
||||||
|
'format': 'json',
|
||||||
|
'loginID': username,
|
||||||
|
'password': password,
|
||||||
|
}
|
||||||
|
|
||||||
|
auth_info = self._download_json(
|
||||||
|
'https://accounts.eu1.gigya.com/accounts.login', None,
|
||||||
|
note='Logging in', errnote='Unable to log in',
|
||||||
|
data=urlencode_postdata(auth_data))
|
||||||
|
|
||||||
|
error_message = auth_info.get('errorDetails') or auth_info.get('errorMessage')
|
||||||
|
if error_message:
|
||||||
|
raise ExtractorError(
|
||||||
|
'Unable to login: %s' % error_message, expected=True)
|
||||||
|
|
||||||
|
self._uid = auth_info['UID']
|
||||||
|
self._uid_signature = auth_info['UIDSignature']
|
||||||
|
self._signature_timestamp = auth_info['signatureTimestamp']
|
||||||
|
|
||||||
|
self._logged_in = True
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id, site_id = mobj.group('id', 'site_id')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
config = self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'videoJSConfig\s*=\s*JSON\.parse\(\'({.+?})\'\);',
|
||||||
|
webpage, 'config', default='{}'), video_id,
|
||||||
|
transform_source=lambda s: s.replace(
|
||||||
|
'\\\\', '\\').replace(r'\"', '"').replace(r"\'", "'"))
|
||||||
|
|
||||||
|
vod_id = config.get('vodId') or self._search_regex(
|
||||||
|
(r'\\"vodId\\"\s*:\s*\\"(.+?)\\"',
|
||||||
|
r'<[^>]+id=["\']vod-(\d+)'),
|
||||||
|
webpage, 'video_id', default=None)
|
||||||
|
|
||||||
|
# clip, no authentication required
|
||||||
|
if not vod_id:
|
||||||
|
player = self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'vmmaplayer\(({.+?})\);', webpage, 'vmma player',
|
||||||
|
default=''),
|
||||||
|
video_id, transform_source=lambda s: '[%s]' % s, fatal=False)
|
||||||
|
if player:
|
||||||
|
video = player[-1]
|
||||||
|
info = {
|
||||||
|
'id': video_id,
|
||||||
|
'url': video['videoUrl'],
|
||||||
|
'title': video['title'],
|
||||||
|
'thumbnail': video.get('imageUrl'),
|
||||||
|
'timestamp': int_or_none(video.get('createdDate')),
|
||||||
|
'duration': int_or_none(video.get('duration')),
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
info = self._parse_html5_media_entries(
|
||||||
|
url, webpage, video_id, m3u8_id='hls')[0]
|
||||||
|
info.update({
|
||||||
|
'id': video_id,
|
||||||
|
'title': self._html_search_meta('description', webpage),
|
||||||
|
'duration': parse_duration(self._html_search_meta('duration', webpage)),
|
||||||
|
})
|
||||||
|
# vod, authentication required
|
||||||
|
else:
|
||||||
|
if not self._logged_in:
|
||||||
|
self._login()
|
||||||
|
|
||||||
|
settings = self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
|
||||||
|
webpage, 'drupal settings', default='{}'),
|
||||||
|
video_id)
|
||||||
|
|
||||||
|
def get(container, item):
|
||||||
|
return try_get(
|
||||||
|
settings, lambda x: x[container][item],
|
||||||
|
compat_str) or self._search_regex(
|
||||||
|
r'"%s"\s*:\s*"([^"]+)' % item, webpage, item,
|
||||||
|
default=None)
|
||||||
|
|
||||||
|
app_id = get('vod', 'app_id') or self._SITE_TO_APP_ID.get(site_id, 'vtm_watch')
|
||||||
|
sso = get('vod', 'gigyaDatabase') or 'vtm-sso'
|
||||||
|
|
||||||
|
data = self._download_json(
|
||||||
|
'http://vod.medialaan.io/api/1.0/item/%s/video' % vod_id,
|
||||||
|
video_id, query={
|
||||||
|
'app_id': app_id,
|
||||||
|
'user_network': sso,
|
||||||
|
'UID': self._uid,
|
||||||
|
'UIDSignature': self._uid_signature,
|
||||||
|
'signatureTimestamp': self._signature_timestamp,
|
||||||
|
})
|
||||||
|
|
||||||
|
formats = self._extract_m3u8_formats(
|
||||||
|
data['response']['uri'], video_id, entry_protocol='m3u8_native',
|
||||||
|
ext='mp4', m3u8_id='hls')
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
info = {
|
||||||
|
'id': vod_id,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
||||||
|
|
||||||
|
api_key = get('vod', 'apiKey')
|
||||||
|
channel = get('medialaanGigya', 'channel')
|
||||||
|
|
||||||
|
if api_key:
|
||||||
|
videos = self._download_json(
|
||||||
|
'http://vod.medialaan.io/vod/v2/videos', video_id, fatal=False,
|
||||||
|
query={
|
||||||
|
'channels': channel,
|
||||||
|
'ids': vod_id,
|
||||||
|
'limit': 1,
|
||||||
|
'apikey': api_key,
|
||||||
|
})
|
||||||
|
if videos:
|
||||||
|
video = try_get(
|
||||||
|
videos, lambda x: x['response']['videos'][0], dict)
|
||||||
|
if video:
|
||||||
|
def get(container, item, expected_type=None):
|
||||||
|
return try_get(
|
||||||
|
video, lambda x: x[container][item], expected_type)
|
||||||
|
|
||||||
|
def get_string(container, item):
|
||||||
|
return get(container, item, compat_str)
|
||||||
|
|
||||||
|
info.update({
|
||||||
|
'series': get_string('program', 'title'),
|
||||||
|
'season': get_string('season', 'title'),
|
||||||
|
'season_number': int_or_none(get('season', 'number')),
|
||||||
|
'season_id': get_string('season', 'id'),
|
||||||
|
'episode': get_string('episode', 'title'),
|
||||||
|
'episode_number': int_or_none(get('episode', 'number')),
|
||||||
|
'episode_id': get_string('episode', 'id'),
|
||||||
|
'duration': int_or_none(
|
||||||
|
video.get('duration')) or int_or_none(
|
||||||
|
video.get('durationMillis'), scale=1000),
|
||||||
|
'title': get_string('episode', 'title'),
|
||||||
|
'description': get_string('episode', 'text'),
|
||||||
|
'timestamp': unified_timestamp(get_string(
|
||||||
|
'publication', 'begin')),
|
||||||
|
})
|
||||||
|
|
||||||
|
if not info.get('title'):
|
||||||
|
info['title'] = try_get(
|
||||||
|
config, lambda x: x['videoConfig']['title'],
|
||||||
|
compat_str) or self._html_search_regex(
|
||||||
|
r'\\"title\\"\s*:\s*\\"(.+?)\\"', webpage, 'title',
|
||||||
|
default=None) or self._og_search_title(webpage)
|
||||||
|
|
||||||
|
if not info.get('description'):
|
||||||
|
info['description'] = self._html_search_regex(
|
||||||
|
r'<div[^>]+class="field-item\s+even">\s*<p>(.+?)</p>',
|
||||||
|
webpage, 'description', default=None)
|
||||||
|
|
||||||
|
return info
|
||||||
70
youtube_dl/extractor/medici.py
Normal file
70
youtube_dl/extractor/medici.py
Normal file
@@ -0,0 +1,70 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
unified_strdate,
|
||||||
|
update_url_query,
|
||||||
|
urlencode_postdata,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class MediciIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?medici\.tv/#!/(?P<id>[^?#&]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.medici.tv/#!/daniel-harding-frans-helmerson-verbier-festival-music-camp',
|
||||||
|
'md5': '004c21bb0a57248085b6ff3fec72719d',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3059',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Daniel Harding conducts the Verbier Festival Music Camp \u2013 With Frans Helmerson',
|
||||||
|
'description': 'md5:322a1e952bafb725174fd8c1a8212f58',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'upload_date': '20170408',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
# Sets csrftoken cookie
|
||||||
|
self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
MEDICI_URL = 'http://www.medici.tv/'
|
||||||
|
|
||||||
|
data = self._download_json(
|
||||||
|
MEDICI_URL, video_id,
|
||||||
|
data=urlencode_postdata({
|
||||||
|
'json': 'true',
|
||||||
|
'page': '/%s' % video_id,
|
||||||
|
'timezone_offset': -420,
|
||||||
|
}), headers={
|
||||||
|
'X-CSRFToken': self._get_cookies(url)['csrftoken'].value,
|
||||||
|
'X-Requested-With': 'XMLHttpRequest',
|
||||||
|
'Referer': MEDICI_URL,
|
||||||
|
'Content-Type': 'application/x-www-form-urlencoded',
|
||||||
|
})
|
||||||
|
|
||||||
|
video = data['video']['videos']['video1']
|
||||||
|
|
||||||
|
title = video.get('nom') or data['title']
|
||||||
|
|
||||||
|
video_id = video.get('id') or video_id
|
||||||
|
formats = self._extract_f4m_formats(
|
||||||
|
update_url_query(video['url_akamai'], {
|
||||||
|
'hdcore': '3.1.0',
|
||||||
|
'plugin=aasp': '3.1.0.43.124',
|
||||||
|
}), video_id, f4m_id='hds')
|
||||||
|
|
||||||
|
description = data.get('meta_description')
|
||||||
|
thumbnail = video.get('url_thumbnail') or data.get('main_image')
|
||||||
|
upload_date = unified_strdate(data['video'].get('date'))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'upload_date': upload_date,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
||||||
@@ -6,12 +6,12 @@ from .common import InfoExtractor
|
|||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_parse_qs,
|
compat_parse_qs,
|
||||||
compat_urllib_parse_unquote,
|
compat_urllib_parse_unquote,
|
||||||
|
compat_urllib_parse_urlencode,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
urlencode_postdata,
|
|
||||||
get_element_by_attribute,
|
get_element_by_attribute,
|
||||||
mimetype2ext,
|
mimetype2ext,
|
||||||
)
|
)
|
||||||
@@ -50,6 +50,21 @@ class MetacafeIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
'skip': 'Page is temporarily unavailable.',
|
'skip': 'Page is temporarily unavailable.',
|
||||||
},
|
},
|
||||||
|
# metacafe video with family filter
|
||||||
|
{
|
||||||
|
'url': 'http://www.metacafe.com/watch/2155630/adult_art_by_david_hart_156/',
|
||||||
|
'md5': 'b06082c5079bbdcde677a6291fbdf376',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2155630',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Adult Art By David Hart 156',
|
||||||
|
'uploader': '63346',
|
||||||
|
'description': 'md5:9afac8fc885252201ad14563694040fc',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
},
|
||||||
# AnyClip video
|
# AnyClip video
|
||||||
{
|
{
|
||||||
'url': 'http://www.metacafe.com/watch/an-dVVXnuY7Jh77J/the_andromeda_strain_1971_stop_the_bomb_part_3/',
|
'url': 'http://www.metacafe.com/watch/an-dVVXnuY7Jh77J/the_andromeda_strain_1971_stop_the_bomb_part_3/',
|
||||||
@@ -112,22 +127,6 @@ class MetacafeIE(InfoExtractor):
|
|||||||
def report_disclaimer(self):
|
def report_disclaimer(self):
|
||||||
self.to_screen('Retrieving disclaimer')
|
self.to_screen('Retrieving disclaimer')
|
||||||
|
|
||||||
def _confirm_age(self):
|
|
||||||
# Retrieve disclaimer
|
|
||||||
self.report_disclaimer()
|
|
||||||
self._download_webpage(self._DISCLAIMER, None, False, 'Unable to retrieve disclaimer')
|
|
||||||
|
|
||||||
# Confirm age
|
|
||||||
self.report_age_confirmation()
|
|
||||||
self._download_webpage(
|
|
||||||
self._FILTER_POST, None, False, 'Unable to confirm age',
|
|
||||||
data=urlencode_postdata({
|
|
||||||
'filters': '0',
|
|
||||||
'submit': "Continue - I'm over 18",
|
|
||||||
}), headers={
|
|
||||||
'Content-Type': 'application/x-www-form-urlencoded',
|
|
||||||
})
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
# Extract id and simplified title from URL
|
# Extract id and simplified title from URL
|
||||||
video_id, display_id = re.match(self._VALID_URL, url).groups()
|
video_id, display_id = re.match(self._VALID_URL, url).groups()
|
||||||
@@ -143,13 +142,15 @@ class MetacafeIE(InfoExtractor):
|
|||||||
if prefix == 'cb':
|
if prefix == 'cb':
|
||||||
return self.url_result('theplatform:%s' % ext_id, 'ThePlatform')
|
return self.url_result('theplatform:%s' % ext_id, 'ThePlatform')
|
||||||
|
|
||||||
# self._confirm_age()
|
headers = {
|
||||||
|
# Disable family filter
|
||||||
|
'Cookie': 'user=%s; ' % compat_urllib_parse_urlencode({'ffilter': False})
|
||||||
|
}
|
||||||
|
|
||||||
# AnyClip videos require the flashversion cookie so that we get the link
|
# AnyClip videos require the flashversion cookie so that we get the link
|
||||||
# to the mp4 file
|
# to the mp4 file
|
||||||
headers = {}
|
|
||||||
if video_id.startswith('an-'):
|
if video_id.startswith('an-'):
|
||||||
headers['Cookie'] = 'flashVersion=0;'
|
headers['Cookie'] += 'flashVersion=0; '
|
||||||
|
|
||||||
# Retrieve video webpage to extract further information
|
# Retrieve video webpage to extract further information
|
||||||
webpage = self._download_webpage(url, video_id, headers=headers)
|
webpage = self._download_webpage(url, video_id, headers=headers)
|
||||||
|
|||||||
@@ -2,16 +2,17 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
from ..utils import int_or_none
|
from ..utils import int_or_none
|
||||||
|
|
||||||
|
|
||||||
class MGTVIE(InfoExtractor):
|
class MGTVIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?mgtv\.com/v/(?:[^/]+/)*(?P<id>\d+)\.html'
|
_VALID_URL = r'https?://(?:www\.)?mgtv\.com/(v|b)/(?:[^/]+/)*(?P<id>\d+)\.html'
|
||||||
IE_DESC = '芒果TV'
|
IE_DESC = '芒果TV'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.mgtv.com/v/1/290525/f/3116640.html',
|
'url': 'http://www.mgtv.com/v/1/290525/f/3116640.html',
|
||||||
'md5': '1bdadcf760a0b90946ca68ee9a2db41a',
|
'md5': 'b1ffc0fc163152acf6beaa81832c9ee7',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '3116640',
|
'id': '3116640',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@@ -21,48 +22,45 @@ class MGTVIE(InfoExtractor):
|
|||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
# no tbr extracted from stream_url
|
'url': 'http://www.mgtv.com/b/301817/3826653.html',
|
||||||
'url': 'http://www.mgtv.com/v/1/1/f/3324755.html',
|
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
api_data = self._download_json(
|
api_data = self._download_json(
|
||||||
'http://v.api.mgtv.com/player/video', video_id,
|
'http://pcweb.api.mgtv.com/player/video', video_id,
|
||||||
query={'video_id': video_id},
|
query={'video_id': video_id},
|
||||||
headers=self.geo_verification_headers())['data']
|
headers=self.geo_verification_headers())['data']
|
||||||
info = api_data['info']
|
info = api_data['info']
|
||||||
|
title = info['title'].strip()
|
||||||
|
stream_domain = api_data['stream_domain'][0]
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for idx, stream in enumerate(api_data['stream']):
|
for idx, stream in enumerate(api_data['stream']):
|
||||||
stream_url = stream.get('url')
|
stream_path = stream.get('url')
|
||||||
if not stream_url:
|
if not stream_path:
|
||||||
|
continue
|
||||||
|
format_data = self._download_json(
|
||||||
|
stream_domain + stream_path, video_id,
|
||||||
|
note='Download video info for format #%d' % idx)
|
||||||
|
format_url = format_data.get('info')
|
||||||
|
if not format_url:
|
||||||
continue
|
continue
|
||||||
tbr = int_or_none(self._search_regex(
|
tbr = int_or_none(self._search_regex(
|
||||||
r'(\d+)\.mp4', stream_url, 'tbr', default=None))
|
r'_(\d+)_mp4/', format_url, 'tbr', default=None))
|
||||||
|
formats.append({
|
||||||
def extract_format(stream_url, format_id, idx, query={}):
|
'format_id': compat_str(tbr or idx),
|
||||||
format_info = self._download_json(
|
'url': format_url,
|
||||||
stream_url, video_id,
|
'ext': 'mp4',
|
||||||
note='Download video info for format %s' % (format_id or '#%d' % idx),
|
'tbr': tbr,
|
||||||
query=query)
|
'protocol': 'm3u8_native',
|
||||||
return {
|
})
|
||||||
'format_id': format_id,
|
|
||||||
'url': format_info['info'],
|
|
||||||
'ext': 'mp4',
|
|
||||||
'tbr': tbr,
|
|
||||||
}
|
|
||||||
|
|
||||||
formats.append(extract_format(
|
|
||||||
stream_url, 'hls-%d' % tbr if tbr else None, idx * 2))
|
|
||||||
formats.append(extract_format(stream_url.replace(
|
|
||||||
'/playlist.m3u8', ''), 'http-%d' % tbr if tbr else None, idx * 2 + 1, {'pno': 1031}))
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': info['title'].strip(),
|
'title': title,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'description': info.get('desc'),
|
'description': info.get('desc'),
|
||||||
'duration': int_or_none(info.get('duration')),
|
'duration': int_or_none(info.get('duration')),
|
||||||
|
|||||||
@@ -51,6 +51,7 @@ class MioMioIE(InfoExtractor):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'マツコの知らない世界【劇的進化SP!ビニール傘&冷凍食品2016】 1_2 - 16 05 31',
|
'title': 'マツコの知らない世界【劇的進化SP!ビニール傘&冷凍食品2016】 1_2 - 16 05 31',
|
||||||
},
|
},
|
||||||
|
'skip': 'Unable to load videos',
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _extract_mioplayer(self, webpage, video_id, title, http_headers):
|
def _extract_mioplayer(self, webpage, video_id, title, http_headers):
|
||||||
@@ -94,9 +95,18 @@ class MioMioIE(InfoExtractor):
|
|||||||
|
|
||||||
return entries
|
return entries
|
||||||
|
|
||||||
|
def _download_chinese_webpage(self, *args, **kwargs):
|
||||||
|
# Requests with English locales return garbage
|
||||||
|
headers = {
|
||||||
|
'Accept-Language': 'zh-TW,en-US;q=0.7,en;q=0.3',
|
||||||
|
}
|
||||||
|
kwargs.setdefault('headers', {}).update(headers)
|
||||||
|
return self._download_webpage(*args, **kwargs)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_chinese_webpage(
|
||||||
|
url, video_id)
|
||||||
|
|
||||||
title = self._html_search_meta(
|
title = self._html_search_meta(
|
||||||
'description', webpage, 'title', fatal=True)
|
'description', webpage, 'title', fatal=True)
|
||||||
@@ -106,7 +116,7 @@ class MioMioIE(InfoExtractor):
|
|||||||
|
|
||||||
if '_h5' in mioplayer_path:
|
if '_h5' in mioplayer_path:
|
||||||
player_url = compat_urlparse.urljoin(url, mioplayer_path)
|
player_url = compat_urlparse.urljoin(url, mioplayer_path)
|
||||||
player_webpage = self._download_webpage(
|
player_webpage = self._download_chinese_webpage(
|
||||||
player_url, video_id,
|
player_url, video_id,
|
||||||
note='Downloading player webpage', headers={'Referer': url})
|
note='Downloading player webpage', headers={'Referer': url})
|
||||||
entries = self._parse_html5_media_entries(player_url, player_webpage, video_id)
|
entries = self._parse_html5_media_entries(player_url, player_webpage, video_id)
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ from __future__ import unicode_literals
|
|||||||
import uuid
|
import uuid
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from .ooyala import OoyalaIE
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_str,
|
compat_str,
|
||||||
compat_urllib_parse_urlencode,
|
compat_urllib_parse_urlencode,
|
||||||
@@ -24,6 +25,9 @@ class MiTeleBaseIE(InfoExtractor):
|
|||||||
r'(?s)(<ms-video-player.+?</ms-video-player>)',
|
r'(?s)(<ms-video-player.+?</ms-video-player>)',
|
||||||
webpage, 'ms video player'))
|
webpage, 'ms video player'))
|
||||||
video_id = player_data['data-media-id']
|
video_id = player_data['data-media-id']
|
||||||
|
if player_data.get('data-cms-id') == 'ooyala':
|
||||||
|
return self.url_result(
|
||||||
|
'ooyala:%s' % video_id, ie=OoyalaIE.ie_key(), video_id=video_id)
|
||||||
config_url = compat_urlparse.urljoin(url, player_data['data-config'])
|
config_url = compat_urlparse.urljoin(url, player_data['data-config'])
|
||||||
config = self._download_json(
|
config = self._download_json(
|
||||||
config_url, video_id, 'Downloading config JSON')
|
config_url, video_id, 'Downloading config JSON')
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user