mirror of
https://github.com/ytdl-org/youtube-dl.git
synced 2025-12-24 08:50:29 +01:00
Compare commits
208 Commits
2018.01.18
...
2018.03.20
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a66d1d079a | ||
|
|
c651de39d5 | ||
|
|
d9e2240f7c | ||
|
|
832f9d5258 | ||
|
|
21dedcb580 | ||
|
|
6780154e6b | ||
|
|
38f59e2793 | ||
|
|
9a054fcbba | ||
|
|
6e3f23d912 | ||
|
|
47a5cb7734 | ||
|
|
e0d198c18d | ||
|
|
96b8b9abae | ||
|
|
178ee88319 | ||
|
|
d123960857 | ||
|
|
3526c3043b | ||
|
|
8e70c1bfac | ||
|
|
27b1c73f14 | ||
|
|
46c6742d4f | ||
|
|
c95dfb0509 | ||
|
|
b8c6badc96 | ||
|
|
b848a4ca1a | ||
|
|
e6e68069f6 | ||
|
|
f3672ac522 | ||
|
|
f226880c6d | ||
|
|
08250b69c2 | ||
|
|
d116918993 | ||
|
|
7399ca1f80 | ||
|
|
b4a190fe2a | ||
|
|
cc7f6c720e | ||
|
|
3a0ceb32e2 | ||
|
|
7dee417127 | ||
|
|
5b1d158834 | ||
|
|
a7298f3e99 | ||
|
|
5d49d879cc | ||
|
|
b5434b5c31 | ||
|
|
690404a6f8 | ||
|
|
d91dd0ce19 | ||
|
|
6202f08e1b | ||
|
|
574e9db2b0 | ||
|
|
2e25f80d5d | ||
|
|
64f34528df | ||
|
|
26ad6bcdfc | ||
|
|
81dc74966a | ||
|
|
d53b6764d0 | ||
|
|
62f49dd3b9 | ||
|
|
f9f10268c1 | ||
|
|
f241a97312 | ||
|
|
86c8cfc555 | ||
|
|
c01db237b5 | ||
|
|
0093c77032 | ||
|
|
5616caf852 | ||
|
|
05a7ffb126 | ||
|
|
28f21c9501 | ||
|
|
4c780fbd0a | ||
|
|
7773a92800 | ||
|
|
b871d7e954 | ||
|
|
44dc11db61 | ||
|
|
949faa15e8 | ||
|
|
0c3e5f4921 | ||
|
|
266fbd6b73 | ||
|
|
d1b6187012 | ||
|
|
6ab35f5e16 | ||
|
|
32ae31847f | ||
|
|
abe8766c35 | ||
|
|
eaa3172672 | ||
|
|
797c9284d6 | ||
|
|
8c73ef37b6 | ||
|
|
b5cbe3d652 | ||
|
|
ece12e6348 | ||
|
|
ff274e3c16 | ||
|
|
c106237d56 | ||
|
|
6e72ea4775 | ||
|
|
d6a0350253 | ||
|
|
ad29ef043e | ||
|
|
f01df14c4f | ||
|
|
9306b0c8d9 | ||
|
|
f4b7427279 | ||
|
|
300148b48a | ||
|
|
2d17c63140 | ||
|
|
f2908d072e | ||
|
|
5e7841932c | ||
|
|
870f3bfc63 | ||
|
|
3d977fe4d2 | ||
|
|
f075838728 | ||
|
|
2acc11d771 | ||
|
|
0704306e1d | ||
|
|
9dc7ea320d | ||
|
|
e231afb14f | ||
|
|
12acb9a6fb | ||
|
|
18ebd1a843 | ||
|
|
8315ee6c4c | ||
|
|
b9d1a79426 | ||
|
|
09f934b009 | ||
|
|
73af6e22fd | ||
|
|
77e499f95e | ||
|
|
befa4708fd | ||
|
|
90830004c8 | ||
|
|
18d7aa6efa | ||
|
|
b12cf31bb1 | ||
|
|
7d2b4aa047 | ||
|
|
38662dfec7 | ||
|
|
ee706f1009 | ||
|
|
c4e7496421 | ||
|
|
b8adcec4ea | ||
|
|
073cca3df8 | ||
|
|
f66df20ccd | ||
|
|
ea69624992 | ||
|
|
49702e3669 | ||
|
|
59b5e7b280 | ||
|
|
b9683400cf | ||
|
|
760f81212f | ||
|
|
79080573b5 | ||
|
|
99892e9908 | ||
|
|
8faa338ff3 | ||
|
|
818df33fda | ||
|
|
a072a12e24 | ||
|
|
e67734dda9 | ||
|
|
9e36fedd00 | ||
|
|
d2b200eef9 | ||
|
|
a03a3c80fe | ||
|
|
9d5871fdff | ||
|
|
ff873b5777 | ||
|
|
042968ff31 | ||
|
|
430f2ca544 | ||
|
|
cbfbf07cdc | ||
|
|
9e167e1ee3 | ||
|
|
5828489072 | ||
|
|
8c5fafe29f | ||
|
|
6f4ad0db34 | ||
|
|
c2b3bd0451 | ||
|
|
728cee5385 | ||
|
|
246a75b4ff | ||
|
|
4fac463d70 | ||
|
|
382b8182ce | ||
|
|
ce53320b11 | ||
|
|
51b0557d1e | ||
|
|
5a5860825d | ||
|
|
237d07f114 | ||
|
|
9f4ec3de25 | ||
|
|
96a0bbdd0d | ||
|
|
c8064d4fab | ||
|
|
fde677fed4 | ||
|
|
0e0508c8a2 | ||
|
|
bcf150e435 | ||
|
|
240f26229d | ||
|
|
b9b150def7 | ||
|
|
d20225f33b | ||
|
|
5399ab3f0c | ||
|
|
b91a7a4e5e | ||
|
|
e4a60912b8 | ||
|
|
00c97e3e7a | ||
|
|
cf7259bc93 | ||
|
|
b54d4a5ce8 | ||
|
|
db157d2a2a | ||
|
|
6fcc053947 | ||
|
|
a3e8146ea8 | ||
|
|
f19eae429a | ||
|
|
ba515388b8 | ||
|
|
e2e18694db | ||
|
|
4989d351b4 | ||
|
|
1367c798e3 | ||
|
|
9a340af37e | ||
|
|
3c3bceb41d | ||
|
|
64a12edb48 | ||
|
|
4bf18702e6 | ||
|
|
ecc218ab14 | ||
|
|
d6b152915c | ||
|
|
69a934e9ad | ||
|
|
5fa2a6a561 | ||
|
|
d2a422f548 | ||
|
|
b9d52fb2ca | ||
|
|
466000fc6b | ||
|
|
65220c3bd6 | ||
|
|
c989bdbef8 | ||
|
|
eee1692ff3 | ||
|
|
07e56e6df7 | ||
|
|
3c3a07ee0b | ||
|
|
27940ca09c | ||
|
|
3931b84597 | ||
|
|
a0ee342b50 | ||
|
|
864a4576b7 | ||
|
|
bbb7c3f7e9 | ||
|
|
9d6458a206 | ||
|
|
837b061710 | ||
|
|
967ebbdb6c | ||
|
|
dc400ed6a2 | ||
|
|
cf2820710d | ||
|
|
5d7d805ca9 | ||
|
|
f206126df0 | ||
|
|
021bd012bb | ||
|
|
6e5eacb770 | ||
|
|
d7da6db4e1 | ||
|
|
721a0c3c7b | ||
|
|
e0ab56571e | ||
|
|
99d6e696fc | ||
|
|
6289e07883 | ||
|
|
655c410063 | ||
|
|
b2a027fc6f | ||
|
|
0d9c48de4f | ||
|
|
df58ecbeba | ||
|
|
ac458e90a3 | ||
|
|
7df18fcc65 | ||
|
|
c707b1d828 | ||
|
|
c384d537f8 | ||
|
|
e7f3529f68 | ||
|
|
7d5406216a | ||
|
|
2a3683c378 | ||
|
|
154e4fdace |
7
.github/ISSUE_TEMPLATE.md
vendored
7
.github/ISSUE_TEMPLATE.md
vendored
@@ -6,12 +6,13 @@
|
||||
|
||||
---
|
||||
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.01.18*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.01.18**
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.03.20*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.03.20**
|
||||
|
||||
### Before submitting an *issue* make sure you have:
|
||||
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||
- [ ] [Searched](https://github.com/rg3/youtube-dl/search?type=Issues) the bugtracker for similar issues including closed ones
|
||||
- [ ] Checked that provided video/audio/playlist URLs (if any) are alive and playable in a browser
|
||||
|
||||
### What is the purpose of your *issue*?
|
||||
- [ ] Bug report (encountered problems with youtube-dl)
|
||||
@@ -35,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2018.01.18
|
||||
[debug] youtube-dl version 2018.03.20
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
||||
1
.github/ISSUE_TEMPLATE_tmpl.md
vendored
1
.github/ISSUE_TEMPLATE_tmpl.md
vendored
@@ -12,6 +12,7 @@
|
||||
### Before submitting an *issue* make sure you have:
|
||||
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||
- [ ] [Searched](https://github.com/rg3/youtube-dl/search?type=Issues) the bugtracker for similar issues including closed ones
|
||||
- [ ] Checked that provided video/audio/playlist URLs (if any) are alive and playable in a browser
|
||||
|
||||
### What is the purpose of your *issue*?
|
||||
- [ ] Bug report (encountered problems with youtube-dl)
|
||||
|
||||
3
AUTHORS
3
AUTHORS
@@ -233,3 +233,6 @@ Daniel Weber
|
||||
Kay Bouché
|
||||
Yang Hongbo
|
||||
Lei Wang
|
||||
Petr Novák
|
||||
Leonardo Taccari
|
||||
Martin Weinelt
|
||||
|
||||
226
ChangeLog
226
ChangeLog
@@ -1,3 +1,229 @@
|
||||
version 2018.03.20
|
||||
|
||||
Core
|
||||
* [extractor/common] Improve thumbnail extraction for HTML5 entries
|
||||
* Generalize XML manifest processing code and improve XSPF parsing
|
||||
+ [extractor/common] Add _download_xml_handle
|
||||
+ [extractor/common] Add support for relative URIs in _parse_xspf (#15794)
|
||||
|
||||
Extractors
|
||||
+ [7plus] Extract series metadata (#15862, #15906)
|
||||
* [9now] Bypass geo restriction (#15920)
|
||||
* [cbs] Skip unavailable assets (#13490, #13506, #15776)
|
||||
+ [canalc2] Add support for HTML5 videos (#15916, #15919)
|
||||
+ [ceskatelevize] Add support for iframe embeds (#15918)
|
||||
+ [prosiebensat1] Add support for galileo.tv (#15894)
|
||||
+ [generic] Add support for xfileshare embeds (#15879)
|
||||
* [bilibili] Switch to v2 playurl API
|
||||
* [bilibili] Fix and improve extraction (#15048, #15430, #15622, #15863)
|
||||
* [heise] Improve extraction (#15496, #15784, #15026)
|
||||
* [instagram] Fix user videos extraction (#15858)
|
||||
|
||||
|
||||
version 2018.03.14
|
||||
|
||||
Extractors
|
||||
* [soundcloud] Update client id (#15866)
|
||||
+ [tennistv] Add support for tennistv.com
|
||||
+ [line] Add support for tv.line.me (#9427)
|
||||
* [xnxx] Fix extraction (#15817)
|
||||
* [njpwworld] Fix authentication (#15815)
|
||||
|
||||
|
||||
version 2018.03.10
|
||||
|
||||
Core
|
||||
* [downloader/hls] Skip uplynk ad fragments (#15748)
|
||||
|
||||
Extractors
|
||||
* [pornhub] Don't override session cookies (#15697)
|
||||
+ [raywenderlich] Add support for videos.raywenderlich.com (#15251)
|
||||
* [funk] Fix extraction and rework extractors (#15792)
|
||||
* [nexx] Restore reverse engineered approach
|
||||
+ [heise] Add support for kaltura embeds (#14961, #15728)
|
||||
+ [tvnow] Extract series metadata (#15774)
|
||||
* [ruutu] Continue formats extraction on NOT-USED URLs (#15775)
|
||||
* [vrtnu] Use redirect URL for building video JSON URL (#15767, #15769)
|
||||
* [vimeo] Modernize login code and improve error messaging
|
||||
* [archiveorg] Fix extraction (#15770, #15772)
|
||||
+ [hidive] Add support for hidive.com (#15494)
|
||||
* [afreecatv] Detect deleted videos
|
||||
* [afreecatv] Fix extraction (#15755)
|
||||
* [vice] Fix extraction and rework extractors (#11101, #13019, #13622, #13778)
|
||||
+ [vidzi] Add support for vidzi.si (#15751)
|
||||
* [npo] Fix typo
|
||||
|
||||
|
||||
version 2018.03.03
|
||||
|
||||
Core
|
||||
+ [utils] Add parse_resolution
|
||||
Revert respect --prefer-insecure while updating
|
||||
|
||||
Extractors
|
||||
+ [yapfiles] Add support for yapfiles.ru (#15726, #11085)
|
||||
* [spankbang] Fix formats extraction (#15727)
|
||||
* [adn] Fix extraction (#15716)
|
||||
+ [toggle] Extract DASH and ISM formats (#15721)
|
||||
+ [nickelodeon] Add support for nickelodeon.com.tr (#15706)
|
||||
* [npo] Validate and filter format URLs (#15709)
|
||||
|
||||
|
||||
version 2018.02.26
|
||||
|
||||
Extractors
|
||||
* [udemy] Use custom User-Agent (#15571)
|
||||
|
||||
|
||||
version 2018.02.25
|
||||
|
||||
Core
|
||||
* [postprocessor/embedthumbnail] Skip embedding when there aren't any
|
||||
thumbnails (#12573)
|
||||
* [extractor/common] Improve jwplayer subtitles extraction (#15695)
|
||||
|
||||
Extractors
|
||||
+ [vidlii] Add support for vidlii.com (#14472, #14512, #14779)
|
||||
+ [streamango] Capture and output error messages
|
||||
* [streamango] Fix extraction (#14160, #14256)
|
||||
+ [telequebec] Add support for emissions (#14649, #14655)
|
||||
+ [telequebec:live] Add support for live streams (#15688)
|
||||
+ [mailru:music] Add support for mail.ru/music (#15618)
|
||||
* [aenetworks] Switch to akamai HLS formats (#15612)
|
||||
* [ytsearch] Fix flat title extraction (#11260, #15681)
|
||||
|
||||
|
||||
version 2018.02.22
|
||||
|
||||
Core
|
||||
+ [utils] Fixup some common URL typos in sanitize_url (#15649)
|
||||
* Respect --prefer-insecure while updating (#15497)
|
||||
|
||||
Extractors
|
||||
* [vidio] Fix HLS URL extraction (#15675)
|
||||
+ [nexx] Add support for arc.nexx.cloud URLs
|
||||
* [nexx] Switch to arc API (#15652)
|
||||
* [redtube] Fix duration extraction (#15659)
|
||||
+ [sonyliv] Respect referrer (#15648)
|
||||
+ [brightcove:new] Use referrer for formats' HTTP headers
|
||||
+ [cbc] Add support for olympics.cbc.ca (#15535)
|
||||
+ [fusion] Add support for fusion.tv (#15628)
|
||||
* [npo] Improve quality metadata extraction
|
||||
* [npo] Relax URL regular expression (#14987, #14994)
|
||||
+ [npo] Capture and output error message
|
||||
+ [pornhub] Add support for channels (#15613)
|
||||
* [youtube] Handle shared URLs with generic extractor (#14303)
|
||||
|
||||
|
||||
version 2018.02.11
|
||||
|
||||
Core
|
||||
+ [YoutubeDL] Add support for filesize_approx in format selector (#15550)
|
||||
|
||||
Extractors
|
||||
+ [francetv] Add support for live streams (#13689)
|
||||
+ [francetv] Add support for zouzous.fr and ludo.fr (#10454, #13087, #13103,
|
||||
#15012)
|
||||
* [francetv] Separate main extractor and rework others to delegate to it
|
||||
* [francetv] Improve manifest URL signing (#15536)
|
||||
+ [francetv] Sign m3u8 manifest URLs (#15565)
|
||||
+ [veoh] Add support for embed URLs (#15561)
|
||||
* [afreecatv] Fix extraction (#15556)
|
||||
* [periscope] Use accessVideoPublic endpoint (#15554)
|
||||
* [discovery] Fix auth request (#15542)
|
||||
+ [6play] Extract subtitles (#15541)
|
||||
* [newgrounds] Fix metadata extraction (#15531)
|
||||
+ [nbc] Add support for stream.nbcolympics.com (#10295)
|
||||
* [dvtv] Fix live streams extraction (#15442)
|
||||
|
||||
|
||||
version 2018.02.08
|
||||
|
||||
Extractors
|
||||
+ [myvi] Extend URL regular expression
|
||||
+ [myvi:embed] Add support for myvi.tv embeds (#15521)
|
||||
+ [prosiebensat1] Extend URL regular expression (#15520)
|
||||
* [pokemon] Relax URL regular expression and extend title extraction (#15518)
|
||||
+ [gameinformer] Use geo verification headers
|
||||
* [la7] Fix extraction (#15501, #15502)
|
||||
* [gameinformer] Fix brightcove id extraction (#15416)
|
||||
+ [afreecatv] Pass referrer to video info request (#15507)
|
||||
+ [telebruxelles] Add support for live streams
|
||||
* [telebruxelles] Relax URL regular expression
|
||||
* [telebruxelles] Fix extraction (#15504)
|
||||
* [extractor/common] Respect secure schemes in _extract_wowza_formats
|
||||
|
||||
|
||||
version 2018.02.04
|
||||
|
||||
Core
|
||||
* [downloader/http] Randomize HTTP chunk size
|
||||
+ [downloader/http] Add ability to pass downloader options via info dict
|
||||
* [downloader/http] Fix 302 infinite loops by not reusing requests
|
||||
+ Document http_chunk_size
|
||||
|
||||
Extractors
|
||||
+ [brightcove] Pass embed page URL as referrer (#15486)
|
||||
+ [youtube] Enforce using chunked HTTP downloading for DASH formats
|
||||
|
||||
|
||||
version 2018.02.03
|
||||
|
||||
Core
|
||||
+ Introduce --http-chunk-size for chunk-based HTTP downloading
|
||||
+ Add support for IronPython
|
||||
* [downloader/ism] Fix Python 3.2 support
|
||||
|
||||
Extractors
|
||||
* [redbulltv] Fix extraction (#15481)
|
||||
* [redtube] Fix metadata extraction (#15472)
|
||||
* [pladform] Respect platform id and extract HLS formats (#15468)
|
||||
- [rtlnl] Remove progressive formats (#15459)
|
||||
* [6play] Do no modify asset URLs with a token (#15248)
|
||||
* [nationalgeographic] Relax URL regular expression
|
||||
* [dplay] Relax URL regular expression (#15458)
|
||||
* [cbsinteractive] Fix data extraction (#15451)
|
||||
+ [amcnetworks] Add support for sundancetv.com (#9260)
|
||||
|
||||
|
||||
version 2018.01.27
|
||||
|
||||
Core
|
||||
* [extractor/common] Improve _json_ld for articles
|
||||
* Switch codebase to use compat_b64decode
|
||||
+ [compat] Add compat_b64decode
|
||||
|
||||
Extractors
|
||||
+ [seznamzpravy] Add support for seznam.cz and seznamzpravy.cz (#14102, #14616)
|
||||
* [dplay] Bypass geo restriction
|
||||
+ [dplay] Add support for disco-api videos (#15396)
|
||||
* [youtube] Extract precise error messages (#15284)
|
||||
* [teachertube] Capture and output error message
|
||||
* [teachertube] Fix and relax thumbnail extraction (#15403)
|
||||
+ [prosiebensat1] Add another clip id regular expression (#15378)
|
||||
* [tbs] Update tokenizer url (#15395)
|
||||
* [mixcloud] Use compat_b64decode (#15394)
|
||||
- [thesixtyone] Remove extractor (#15341)
|
||||
|
||||
|
||||
version 2018.01.21
|
||||
|
||||
Core
|
||||
* [extractor/common] Improve jwplayer DASH formats extraction (#9242, #15187)
|
||||
* [utils] Improve scientific notation handling in js_to_json (#14789)
|
||||
|
||||
Extractors
|
||||
+ [southparkdk] Add support for southparkstudios.nu
|
||||
+ [southpark] Add support for collections (#14803)
|
||||
* [franceinter] Fix upload date extraction (#14996)
|
||||
+ [rtvs] Add support for rtvs.sk (#9242, #15187)
|
||||
* [restudy] Fix extraction and extend URL regular expression (#15347)
|
||||
* [youtube:live] Improve live detection (#15365)
|
||||
+ [springboardplatform] Add support for springboardplatform.com
|
||||
* [prosiebensat1] Add another clip id regular expression (#15290)
|
||||
- [ringtv] Remove extractor (#15345)
|
||||
|
||||
|
||||
version 2018.01.18
|
||||
|
||||
Extractors
|
||||
|
||||
@@ -198,6 +198,11 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
||||
size. By default, the buffer size is
|
||||
automatically resized from an initial value
|
||||
of SIZE.
|
||||
--http-chunk-size SIZE Size of a chunk for chunk-based HTTP
|
||||
downloading (e.g. 10485760 or 10M) (default
|
||||
is disabled). May be useful for bypassing
|
||||
bandwidth throttling imposed by a webserver
|
||||
(experimental)
|
||||
--playlist-reverse Download playlist videos in reverse order
|
||||
--playlist-random Download playlist videos in random order
|
||||
--xattr-set-filesize Set file xattribute ytdl.filesize with
|
||||
|
||||
@@ -135,6 +135,7 @@
|
||||
- **CarambaTVPage**
|
||||
- **CartoonNetwork**
|
||||
- **cbc.ca**
|
||||
- **cbc.ca:olympics**
|
||||
- **cbc.ca:player**
|
||||
- **cbc.ca:watch**
|
||||
- **cbc.ca:watch:video**
|
||||
@@ -189,7 +190,7 @@
|
||||
- **CSpan**: C-SPAN
|
||||
- **CtsNews**: 華視新聞
|
||||
- **CTVNews**
|
||||
- **culturebox.francetvinfo.fr**
|
||||
- **Culturebox**
|
||||
- **CultureUnplugged**
|
||||
- **curiositystream**
|
||||
- **curiositystream:collection**
|
||||
@@ -291,11 +292,14 @@
|
||||
- **FranceTV**
|
||||
- **FranceTVEmbed**
|
||||
- **francetvinfo.fr**
|
||||
- **FranceTVJeunesse**
|
||||
- **FranceTVSite**
|
||||
- **Freesound**
|
||||
- **freespeech.org**
|
||||
- **FreshLive**
|
||||
- **Funimation**
|
||||
- **Funk**
|
||||
- **FunkChannel**
|
||||
- **FunkMix**
|
||||
- **FunnyOrDie**
|
||||
- **Fusion**
|
||||
- **Fux**
|
||||
@@ -333,6 +337,7 @@
|
||||
- **HentaiStigma**
|
||||
- **hetklokhuis**
|
||||
- **hgtv.com:show**
|
||||
- **HiDive**
|
||||
- **HistoricFilms**
|
||||
- **history:topic**: History.com Topic
|
||||
- **hitbox**
|
||||
@@ -422,6 +427,7 @@
|
||||
- **limelight**
|
||||
- **limelight:channel**
|
||||
- **limelight:channel_list**
|
||||
- **LineTV**
|
||||
- **LiTV**
|
||||
- **LiveLeak**
|
||||
- **LiveLeakEmbed**
|
||||
@@ -437,6 +443,8 @@
|
||||
- **m6**
|
||||
- **macgamestore**: MacGameStore trailers
|
||||
- **mailru**: Видео@Mail.Ru
|
||||
- **mailru:music**: Музыка@Mail.Ru
|
||||
- **mailru:music:search**: Музыка@Mail.Ru
|
||||
- **MakersChannel**
|
||||
- **MakerTV**
|
||||
- **mangomolo:live**
|
||||
@@ -502,6 +510,7 @@
|
||||
- **MySpass**
|
||||
- **Myvi**
|
||||
- **MyVidster**
|
||||
- **MyviEmbed**
|
||||
- **n-tv.de**
|
||||
- **natgeo**
|
||||
- **natgeo:episodeguide**
|
||||
@@ -510,7 +519,8 @@
|
||||
- **NBA**
|
||||
- **NBC**
|
||||
- **NBCNews**
|
||||
- **NBCOlympics**
|
||||
- **nbcolympics**
|
||||
- **nbcolympics:stream**
|
||||
- **NBCSports**
|
||||
- **NBCSportsVPlayer**
|
||||
- **ndr**: NDR.de - Norddeutscher Rundfunk
|
||||
@@ -667,6 +677,7 @@
|
||||
- **RaiPlay**
|
||||
- **RaiPlayLive**
|
||||
- **RaiPlayPlaylist**
|
||||
- **RayWenderlich**
|
||||
- **RBMARadio**
|
||||
- **RDS**: RDS.ca
|
||||
- **RedBullTV**
|
||||
@@ -682,7 +693,6 @@
|
||||
- **revision**
|
||||
- **revision3:embed**
|
||||
- **RICE**
|
||||
- **RingTV**
|
||||
- **RMCDecouverte**
|
||||
- **RockstarGames**
|
||||
- **RoosterTeeth**
|
||||
@@ -703,6 +713,7 @@
|
||||
- **rtve.es:live**: RTVE.es live streams
|
||||
- **rtve.es:television**
|
||||
- **RTVNH**
|
||||
- **RTVS**
|
||||
- **Rudo**
|
||||
- **RUHD**
|
||||
- **RulePorn**
|
||||
@@ -732,6 +743,8 @@
|
||||
- **ServingSys**
|
||||
- **Servus**
|
||||
- **Sexu**
|
||||
- **SeznamZpravy**
|
||||
- **SeznamZpravyArticle**
|
||||
- **Shahid**
|
||||
- **ShahidShow**
|
||||
- **Shared**: shared.sx
|
||||
@@ -773,6 +786,7 @@
|
||||
- **Sport5**
|
||||
- **SportBoxEmbed**
|
||||
- **SportDeutschland**
|
||||
- **SpringboardPlatform**
|
||||
- **Sprout**
|
||||
- **sr:mediathek**: Saarländischer Rundfunk
|
||||
- **SRGSSR**
|
||||
@@ -812,8 +826,11 @@
|
||||
- **Telegraaf**
|
||||
- **TeleMB**
|
||||
- **TeleQuebec**
|
||||
- **TeleQuebecEmission**
|
||||
- **TeleQuebecLive**
|
||||
- **TeleTask**
|
||||
- **Telewebion**
|
||||
- **TennisTV**
|
||||
- **TF1**
|
||||
- **TFO**
|
||||
- **TheIntercept**
|
||||
@@ -821,7 +838,6 @@
|
||||
- **ThePlatform**
|
||||
- **ThePlatformFeed**
|
||||
- **TheScene**
|
||||
- **TheSixtyOne**
|
||||
- **TheStar**
|
||||
- **TheSun**
|
||||
- **TheWeatherChannel**
|
||||
@@ -923,7 +939,6 @@
|
||||
- **vice**
|
||||
- **vice:article**
|
||||
- **vice:show**
|
||||
- **Viceland**
|
||||
- **Vidbit**
|
||||
- **Viddler**
|
||||
- **Videa**
|
||||
@@ -939,6 +954,7 @@
|
||||
- **VideoPress**
|
||||
- **videoweed**: VideoWeed
|
||||
- **Vidio**
|
||||
- **VidLii**
|
||||
- **vidme**
|
||||
- **vidme:user**
|
||||
- **vidme:user:likes**
|
||||
@@ -1043,6 +1059,7 @@
|
||||
- **yandexmusic:album**: Яндекс.Музыка - Альбом
|
||||
- **yandexmusic:playlist**: Яндекс.Музыка - Плейлист
|
||||
- **yandexmusic:track**: Яндекс.Музыка - Трек
|
||||
- **YapFiles**
|
||||
- **YesJapan**
|
||||
- **yinyuetai:video**: 音悦Tai
|
||||
- **Ynet**
|
||||
|
||||
@@ -3,4 +3,4 @@ universal = True
|
||||
|
||||
[flake8]
|
||||
exclude = youtube_dl/extractor/__init__.py,devscripts/buildserver.py,devscripts/lazy_load_template.py,devscripts/make_issue_template.py,setup.py,build,.git
|
||||
ignore = E402,E501,E731
|
||||
ignore = E402,E501,E731,E741
|
||||
|
||||
@@ -694,6 +694,55 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
|
||||
self.ie._sort_formats(formats)
|
||||
expect_value(self, formats, expected_formats, None)
|
||||
|
||||
def test_parse_xspf(self):
|
||||
_TEST_CASES = [
|
||||
(
|
||||
'foo_xspf',
|
||||
'https://example.org/src/foo_xspf.xspf',
|
||||
[{
|
||||
'id': 'foo_xspf',
|
||||
'title': 'Pandemonium',
|
||||
'description': 'Visit http://bigbrother404.bandcamp.com',
|
||||
'duration': 202.416,
|
||||
'formats': [{
|
||||
'manifest_url': 'https://example.org/src/foo_xspf.xspf',
|
||||
'url': 'https://example.org/src/cd1/track%201.mp3',
|
||||
}],
|
||||
}, {
|
||||
'id': 'foo_xspf',
|
||||
'title': 'Final Cartridge (Nichico Twelve Remix)',
|
||||
'description': 'Visit http://bigbrother404.bandcamp.com',
|
||||
'duration': 255.857,
|
||||
'formats': [{
|
||||
'manifest_url': 'https://example.org/src/foo_xspf.xspf',
|
||||
'url': 'https://example.org/%E3%83%88%E3%83%A9%E3%83%83%E3%82%AF%E3%80%80%EF%BC%92.mp3',
|
||||
}],
|
||||
}, {
|
||||
'id': 'foo_xspf',
|
||||
'title': 'Rebuilding Nightingale',
|
||||
'description': 'Visit http://bigbrother404.bandcamp.com',
|
||||
'duration': 287.915,
|
||||
'formats': [{
|
||||
'manifest_url': 'https://example.org/src/foo_xspf.xspf',
|
||||
'url': 'https://example.org/src/track3.mp3',
|
||||
}, {
|
||||
'manifest_url': 'https://example.org/src/foo_xspf.xspf',
|
||||
'url': 'https://example.com/track3.mp3',
|
||||
}]
|
||||
}]
|
||||
),
|
||||
]
|
||||
|
||||
for xspf_file, xspf_url, expected_entries in _TEST_CASES:
|
||||
with io.open('./test/testdata/xspf/%s.xspf' % xspf_file,
|
||||
mode='r', encoding='utf-8') as f:
|
||||
entries = self.ie._parse_xspf(
|
||||
compat_etree_fromstring(f.read().encode('utf-8')),
|
||||
xspf_file, xspf_url=xspf_url, xspf_base_url=xspf_url)
|
||||
expect_value(self, entries, expected_entries, None)
|
||||
for i in range(len(entries)):
|
||||
expect_dict(self, entries[i], expected_entries[i])
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
||||
@@ -92,8 +92,8 @@ class TestDownload(unittest.TestCase):
|
||||
def generator(test_case, tname):
|
||||
|
||||
def test_template(self):
|
||||
ie = youtube_dl.extractor.get_info_extractor(test_case['name'])
|
||||
other_ies = [get_info_extractor(ie_key) for ie_key in test_case.get('add_ie', [])]
|
||||
ie = youtube_dl.extractor.get_info_extractor(test_case['name'])()
|
||||
other_ies = [get_info_extractor(ie_key)() for ie_key in test_case.get('add_ie', [])]
|
||||
is_playlist = any(k.startswith('playlist') for k in test_case)
|
||||
test_cases = test_case.get(
|
||||
'playlist', [] if is_playlist else [test_case])
|
||||
|
||||
125
test/test_downloader_http.py
Normal file
125
test/test_downloader_http.py
Normal file
@@ -0,0 +1,125 @@
|
||||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
# Allow direct execution
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from test.helper import try_rm
|
||||
from youtube_dl import YoutubeDL
|
||||
from youtube_dl.compat import compat_http_server
|
||||
from youtube_dl.downloader.http import HttpFD
|
||||
from youtube_dl.utils import encodeFilename
|
||||
import ssl
|
||||
import threading
|
||||
|
||||
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
|
||||
def http_server_port(httpd):
|
||||
if os.name == 'java' and isinstance(httpd.socket, ssl.SSLSocket):
|
||||
# In Jython SSLSocket is not a subclass of socket.socket
|
||||
sock = httpd.socket.sock
|
||||
else:
|
||||
sock = httpd.socket
|
||||
return sock.getsockname()[1]
|
||||
|
||||
|
||||
TEST_SIZE = 10 * 1024
|
||||
|
||||
|
||||
class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
|
||||
def log_message(self, format, *args):
|
||||
pass
|
||||
|
||||
def send_content_range(self, total=None):
|
||||
range_header = self.headers.get('Range')
|
||||
start = end = None
|
||||
if range_header:
|
||||
mobj = re.search(r'^bytes=(\d+)-(\d+)', range_header)
|
||||
if mobj:
|
||||
start = int(mobj.group(1))
|
||||
end = int(mobj.group(2))
|
||||
valid_range = start is not None and end is not None
|
||||
if valid_range:
|
||||
content_range = 'bytes %d-%d' % (start, end)
|
||||
if total:
|
||||
content_range += '/%d' % total
|
||||
self.send_header('Content-Range', content_range)
|
||||
return (end - start + 1) if valid_range else total
|
||||
|
||||
def serve(self, range=True, content_length=True):
|
||||
self.send_response(200)
|
||||
self.send_header('Content-Type', 'video/mp4')
|
||||
size = TEST_SIZE
|
||||
if range:
|
||||
size = self.send_content_range(TEST_SIZE)
|
||||
if content_length:
|
||||
self.send_header('Content-Length', size)
|
||||
self.end_headers()
|
||||
self.wfile.write(b'#' * size)
|
||||
|
||||
def do_GET(self):
|
||||
if self.path == '/regular':
|
||||
self.serve()
|
||||
elif self.path == '/no-content-length':
|
||||
self.serve(content_length=False)
|
||||
elif self.path == '/no-range':
|
||||
self.serve(range=False)
|
||||
elif self.path == '/no-range-no-content-length':
|
||||
self.serve(range=False, content_length=False)
|
||||
else:
|
||||
assert False
|
||||
|
||||
|
||||
class FakeLogger(object):
|
||||
def debug(self, msg):
|
||||
pass
|
||||
|
||||
def warning(self, msg):
|
||||
pass
|
||||
|
||||
def error(self, msg):
|
||||
pass
|
||||
|
||||
|
||||
class TestHttpFD(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.httpd = compat_http_server.HTTPServer(
|
||||
('127.0.0.1', 0), HTTPTestRequestHandler)
|
||||
self.port = http_server_port(self.httpd)
|
||||
self.server_thread = threading.Thread(target=self.httpd.serve_forever)
|
||||
self.server_thread.daemon = True
|
||||
self.server_thread.start()
|
||||
|
||||
def download(self, params, ep):
|
||||
params['logger'] = FakeLogger()
|
||||
ydl = YoutubeDL(params)
|
||||
downloader = HttpFD(ydl, params)
|
||||
filename = 'testfile.mp4'
|
||||
try_rm(encodeFilename(filename))
|
||||
self.assertTrue(downloader.real_download(filename, {
|
||||
'url': 'http://127.0.0.1:%d/%s' % (self.port, ep),
|
||||
}))
|
||||
self.assertEqual(os.path.getsize(encodeFilename(filename)), TEST_SIZE)
|
||||
try_rm(encodeFilename(filename))
|
||||
|
||||
def download_all(self, params):
|
||||
for ep in ('regular', 'no-content-length', 'no-range', 'no-range-no-content-length'):
|
||||
self.download(params, ep)
|
||||
|
||||
def test_regular(self):
|
||||
self.download_all({})
|
||||
|
||||
def test_chunked(self):
|
||||
self.download_all({
|
||||
'http_chunk_size': 1000,
|
||||
})
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
@@ -47,7 +47,7 @@ class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
|
||||
self.end_headers()
|
||||
return
|
||||
|
||||
new_url = 'http://localhost:%d/中文.html' % http_server_port(self.server)
|
||||
new_url = 'http://127.0.0.1:%d/中文.html' % http_server_port(self.server)
|
||||
self.send_response(302)
|
||||
self.send_header(b'Location', new_url.encode('utf-8'))
|
||||
self.end_headers()
|
||||
@@ -74,7 +74,7 @@ class FakeLogger(object):
|
||||
class TestHTTP(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.httpd = compat_http_server.HTTPServer(
|
||||
('localhost', 0), HTTPTestRequestHandler)
|
||||
('127.0.0.1', 0), HTTPTestRequestHandler)
|
||||
self.port = http_server_port(self.httpd)
|
||||
self.server_thread = threading.Thread(target=self.httpd.serve_forever)
|
||||
self.server_thread.daemon = True
|
||||
@@ -86,15 +86,15 @@ class TestHTTP(unittest.TestCase):
|
||||
return
|
||||
|
||||
ydl = YoutubeDL({'logger': FakeLogger()})
|
||||
r = ydl.extract_info('http://localhost:%d/302' % self.port)
|
||||
self.assertEqual(r['entries'][0]['url'], 'http://localhost:%d/vid.mp4' % self.port)
|
||||
r = ydl.extract_info('http://127.0.0.1:%d/302' % self.port)
|
||||
self.assertEqual(r['entries'][0]['url'], 'http://127.0.0.1:%d/vid.mp4' % self.port)
|
||||
|
||||
|
||||
class TestHTTPS(unittest.TestCase):
|
||||
def setUp(self):
|
||||
certfn = os.path.join(TEST_DIR, 'testcert.pem')
|
||||
self.httpd = compat_http_server.HTTPServer(
|
||||
('localhost', 0), HTTPTestRequestHandler)
|
||||
('127.0.0.1', 0), HTTPTestRequestHandler)
|
||||
self.httpd.socket = ssl.wrap_socket(
|
||||
self.httpd.socket, certfile=certfn, server_side=True)
|
||||
self.port = http_server_port(self.httpd)
|
||||
@@ -107,11 +107,11 @@ class TestHTTPS(unittest.TestCase):
|
||||
ydl = YoutubeDL({'logger': FakeLogger()})
|
||||
self.assertRaises(
|
||||
Exception,
|
||||
ydl.extract_info, 'https://localhost:%d/video.html' % self.port)
|
||||
ydl.extract_info, 'https://127.0.0.1:%d/video.html' % self.port)
|
||||
|
||||
ydl = YoutubeDL({'logger': FakeLogger(), 'nocheckcertificate': True})
|
||||
r = ydl.extract_info('https://localhost:%d/video.html' % self.port)
|
||||
self.assertEqual(r['entries'][0]['url'], 'https://localhost:%d/vid.mp4' % self.port)
|
||||
r = ydl.extract_info('https://127.0.0.1:%d/video.html' % self.port)
|
||||
self.assertEqual(r['entries'][0]['url'], 'https://127.0.0.1:%d/vid.mp4' % self.port)
|
||||
|
||||
|
||||
def _build_proxy_handler(name):
|
||||
@@ -132,23 +132,23 @@ def _build_proxy_handler(name):
|
||||
class TestProxy(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.proxy = compat_http_server.HTTPServer(
|
||||
('localhost', 0), _build_proxy_handler('normal'))
|
||||
('127.0.0.1', 0), _build_proxy_handler('normal'))
|
||||
self.port = http_server_port(self.proxy)
|
||||
self.proxy_thread = threading.Thread(target=self.proxy.serve_forever)
|
||||
self.proxy_thread.daemon = True
|
||||
self.proxy_thread.start()
|
||||
|
||||
self.geo_proxy = compat_http_server.HTTPServer(
|
||||
('localhost', 0), _build_proxy_handler('geo'))
|
||||
('127.0.0.1', 0), _build_proxy_handler('geo'))
|
||||
self.geo_port = http_server_port(self.geo_proxy)
|
||||
self.geo_proxy_thread = threading.Thread(target=self.geo_proxy.serve_forever)
|
||||
self.geo_proxy_thread.daemon = True
|
||||
self.geo_proxy_thread.start()
|
||||
|
||||
def test_proxy(self):
|
||||
geo_proxy = 'localhost:{0}'.format(self.geo_port)
|
||||
geo_proxy = '127.0.0.1:{0}'.format(self.geo_port)
|
||||
ydl = YoutubeDL({
|
||||
'proxy': 'localhost:{0}'.format(self.port),
|
||||
'proxy': '127.0.0.1:{0}'.format(self.port),
|
||||
'geo_verification_proxy': geo_proxy,
|
||||
})
|
||||
url = 'http://foo.com/bar'
|
||||
@@ -162,7 +162,7 @@ class TestProxy(unittest.TestCase):
|
||||
|
||||
def test_proxy_with_idn(self):
|
||||
ydl = YoutubeDL({
|
||||
'proxy': 'localhost:{0}'.format(self.port),
|
||||
'proxy': '127.0.0.1:{0}'.format(self.port),
|
||||
})
|
||||
url = 'http://中文.tw/'
|
||||
response = ydl.urlopen(url).read().decode('utf-8')
|
||||
|
||||
@@ -53,10 +53,12 @@ from youtube_dl.utils import (
|
||||
parse_filesize,
|
||||
parse_count,
|
||||
parse_iso8601,
|
||||
parse_resolution,
|
||||
pkcs1pad,
|
||||
read_batch_urls,
|
||||
sanitize_filename,
|
||||
sanitize_path,
|
||||
sanitize_url,
|
||||
expand_path,
|
||||
prepend_extension,
|
||||
replace_extension,
|
||||
@@ -219,6 +221,12 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(sanitize_path('./abc'), 'abc')
|
||||
self.assertEqual(sanitize_path('./../abc'), '..\\abc')
|
||||
|
||||
def test_sanitize_url(self):
|
||||
self.assertEqual(sanitize_url('//foo.bar'), 'http://foo.bar')
|
||||
self.assertEqual(sanitize_url('httpss://foo.bar'), 'https://foo.bar')
|
||||
self.assertEqual(sanitize_url('rmtps://foo.bar'), 'rtmps://foo.bar')
|
||||
self.assertEqual(sanitize_url('https://foo.bar'), 'https://foo.bar')
|
||||
|
||||
def test_expand_path(self):
|
||||
def env(var):
|
||||
return '%{0}%'.format(var) if sys.platform == 'win32' else '${0}'.format(var)
|
||||
@@ -344,6 +352,7 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(unified_timestamp('2017-03-30T17:52:41Q'), 1490896361)
|
||||
self.assertEqual(unified_timestamp('Sep 11, 2013 | 5:49 AM'), 1378878540)
|
||||
self.assertEqual(unified_timestamp('December 15, 2017 at 7:49 am'), 1513324140)
|
||||
self.assertEqual(unified_timestamp('2018-03-14T08:32:43.1493874+00:00'), 1521016363)
|
||||
|
||||
def test_determine_ext(self):
|
||||
self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4')
|
||||
@@ -814,6 +823,9 @@ class TestUtil(unittest.TestCase):
|
||||
inp = '''{"duration": "00:01:07"}'''
|
||||
self.assertEqual(js_to_json(inp), '''{"duration": "00:01:07"}''')
|
||||
|
||||
inp = '''{segments: [{"offset":-3.885780586188048e-16,"duration":39.75000000000001}]}'''
|
||||
self.assertEqual(js_to_json(inp), '''{"segments": [{"offset":-3.885780586188048e-16,"duration":39.75000000000001}]}''')
|
||||
|
||||
def test_js_to_json_edgecases(self):
|
||||
on = js_to_json("{abc_def:'1\\'\\\\2\\\\\\'3\"4'}")
|
||||
self.assertEqual(json.loads(on), {"abc_def": "1'\\2\\'3\"4"})
|
||||
@@ -885,6 +897,13 @@ class TestUtil(unittest.TestCase):
|
||||
on = js_to_json('{/*comment\n*/42/*comment\n*/:/*comment\n*/42/*comment\n*/}')
|
||||
self.assertEqual(json.loads(on), {'42': 42})
|
||||
|
||||
on = js_to_json('{42:4.2e1}')
|
||||
self.assertEqual(json.loads(on), {'42': 42.0})
|
||||
|
||||
def test_js_to_json_malformed(self):
|
||||
self.assertEqual(js_to_json('42a1'), '42"a1"')
|
||||
self.assertEqual(js_to_json('42a-1'), '42"a"-1')
|
||||
|
||||
def test_extract_attributes(self):
|
||||
self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'})
|
||||
self.assertEqual(extract_attributes("<e x='y'>"), {'x': 'y'})
|
||||
@@ -965,6 +984,16 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(parse_count('1.1kk '), 1100000)
|
||||
self.assertEqual(parse_count('1.1kk views'), 1100000)
|
||||
|
||||
def test_parse_resolution(self):
|
||||
self.assertEqual(parse_resolution(None), {})
|
||||
self.assertEqual(parse_resolution(''), {})
|
||||
self.assertEqual(parse_resolution('1920x1080'), {'width': 1920, 'height': 1080})
|
||||
self.assertEqual(parse_resolution('1920×1080'), {'width': 1920, 'height': 1080})
|
||||
self.assertEqual(parse_resolution('1920 x 1080'), {'width': 1920, 'height': 1080})
|
||||
self.assertEqual(parse_resolution('720p'), {'height': 720})
|
||||
self.assertEqual(parse_resolution('4k'), {'height': 2160})
|
||||
self.assertEqual(parse_resolution('8K'), {'height': 4320})
|
||||
|
||||
def test_version_tuple(self):
|
||||
self.assertEqual(version_tuple('1'), (1,))
|
||||
self.assertEqual(version_tuple('10.23.344'), (10, 23, 344))
|
||||
|
||||
34
test/testdata/xspf/foo_xspf.xspf
vendored
Normal file
34
test/testdata/xspf/foo_xspf.xspf
vendored
Normal file
@@ -0,0 +1,34 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<playlist version="1" xmlns="http://xspf.org/ns/0/">
|
||||
<date>2018-03-09T18:01:43Z</date>
|
||||
<trackList>
|
||||
<track>
|
||||
<location>cd1/track%201.mp3</location>
|
||||
<title>Pandemonium</title>
|
||||
<creator>Foilverb</creator>
|
||||
<annotation>Visit http://bigbrother404.bandcamp.com</annotation>
|
||||
<album>Pandemonium EP</album>
|
||||
<trackNum>1</trackNum>
|
||||
<duration>202416</duration>
|
||||
</track>
|
||||
<track>
|
||||
<location>../%E3%83%88%E3%83%A9%E3%83%83%E3%82%AF%E3%80%80%EF%BC%92.mp3</location>
|
||||
<title>Final Cartridge (Nichico Twelve Remix)</title>
|
||||
<annotation>Visit http://bigbrother404.bandcamp.com</annotation>
|
||||
<creator>Foilverb</creator>
|
||||
<album>Pandemonium EP</album>
|
||||
<trackNum>2</trackNum>
|
||||
<duration>255857</duration>
|
||||
</track>
|
||||
<track>
|
||||
<location>track3.mp3</location>
|
||||
<location>https://example.com/track3.mp3</location>
|
||||
<title>Rebuilding Nightingale</title>
|
||||
<annotation>Visit http://bigbrother404.bandcamp.com</annotation>
|
||||
<creator>Foilverb</creator>
|
||||
<album>Pandemonium EP</album>
|
||||
<trackNum>3</trackNum>
|
||||
<duration>287915</duration>
|
||||
</track>
|
||||
</trackList>
|
||||
</playlist>
|
||||
@@ -298,7 +298,8 @@ class YoutubeDL(object):
|
||||
the downloader (see youtube_dl/downloader/common.py):
|
||||
nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
|
||||
noresizebuffer, retries, continuedl, noprogress, consoletitle,
|
||||
xattr_set_filesize, external_downloader_args, hls_use_mpegts.
|
||||
xattr_set_filesize, external_downloader_args, hls_use_mpegts,
|
||||
http_chunk_size.
|
||||
|
||||
The following options are used by the post processors:
|
||||
prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
|
||||
@@ -1032,7 +1033,7 @@ class YoutubeDL(object):
|
||||
'!=': operator.ne,
|
||||
}
|
||||
operator_rex = re.compile(r'''(?x)\s*
|
||||
(?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
|
||||
(?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)
|
||||
\s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
|
||||
(?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
|
||||
$
|
||||
|
||||
@@ -191,6 +191,11 @@ def _real_main(argv=None):
|
||||
if numeric_buffersize is None:
|
||||
parser.error('invalid buffer size specified')
|
||||
opts.buffersize = numeric_buffersize
|
||||
if opts.http_chunk_size is not None:
|
||||
numeric_chunksize = FileDownloader.parse_bytes(opts.http_chunk_size)
|
||||
if not numeric_chunksize:
|
||||
parser.error('invalid http chunk size specified')
|
||||
opts.http_chunk_size = numeric_chunksize
|
||||
if opts.playliststart <= 0:
|
||||
raise ValueError('Playlist start must be positive')
|
||||
if opts.playlistend not in (-1, None) and opts.playlistend < opts.playliststart:
|
||||
@@ -346,6 +351,7 @@ def _real_main(argv=None):
|
||||
'keep_fragments': opts.keep_fragments,
|
||||
'buffersize': opts.buffersize,
|
||||
'noresizebuffer': opts.noresizebuffer,
|
||||
'http_chunk_size': opts.http_chunk_size,
|
||||
'continuedl': opts.continue_dl,
|
||||
'noprogress': opts.noprogress,
|
||||
'progress_with_newline': opts.progress_with_newline,
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
from math import ceil
|
||||
|
||||
from .compat import compat_b64decode
|
||||
from .utils import bytes_to_intlist, intlist_to_bytes
|
||||
|
||||
BLOCK_SIZE_BYTES = 16
|
||||
@@ -180,7 +180,7 @@ def aes_decrypt_text(data, password, key_size_bytes):
|
||||
"""
|
||||
NONCE_LENGTH_BYTES = 8
|
||||
|
||||
data = bytes_to_intlist(base64.b64decode(data.encode('utf-8')))
|
||||
data = bytes_to_intlist(compat_b64decode(data))
|
||||
password = bytes_to_intlist(password.encode('utf-8'))
|
||||
|
||||
key = password[:key_size_bytes] + [0] * (key_size_bytes - len(password))
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
import binascii
|
||||
import collections
|
||||
import ctypes
|
||||
@@ -2896,9 +2897,24 @@ except TypeError:
|
||||
if isinstance(spec, compat_str):
|
||||
spec = spec.encode('ascii')
|
||||
return struct.unpack(spec, *args)
|
||||
|
||||
class compat_Struct(struct.Struct):
|
||||
def __init__(self, fmt):
|
||||
if isinstance(fmt, compat_str):
|
||||
fmt = fmt.encode('ascii')
|
||||
super(compat_Struct, self).__init__(fmt)
|
||||
else:
|
||||
compat_struct_pack = struct.pack
|
||||
compat_struct_unpack = struct.unpack
|
||||
if platform.python_implementation() == 'IronPython' and sys.version_info < (2, 7, 8):
|
||||
class compat_Struct(struct.Struct):
|
||||
def unpack(self, string):
|
||||
if not isinstance(string, buffer): # noqa: F821
|
||||
string = buffer(string) # noqa: F821
|
||||
return super(compat_Struct, self).unpack(string)
|
||||
else:
|
||||
compat_Struct = struct.Struct
|
||||
|
||||
|
||||
try:
|
||||
from future_builtins import zip as compat_zip
|
||||
@@ -2908,6 +2924,16 @@ except ImportError: # not 2.6+ or is 3.x
|
||||
except ImportError:
|
||||
compat_zip = zip
|
||||
|
||||
|
||||
if sys.version_info < (3, 3):
|
||||
def compat_b64decode(s, *args, **kwargs):
|
||||
if isinstance(s, compat_str):
|
||||
s = s.encode('ascii')
|
||||
return base64.b64decode(s, *args, **kwargs)
|
||||
else:
|
||||
compat_b64decode = base64.b64decode
|
||||
|
||||
|
||||
if platform.python_implementation() == 'PyPy' and sys.pypy_version_info < (5, 4, 0):
|
||||
# PyPy2 prior to version 5.4.0 expects byte strings as Windows function
|
||||
# names, see the original PyPy issue [1] and the youtube-dl one [2].
|
||||
@@ -2930,6 +2956,8 @@ __all__ = [
|
||||
'compat_HTMLParseError',
|
||||
'compat_HTMLParser',
|
||||
'compat_HTTPError',
|
||||
'compat_Struct',
|
||||
'compat_b64decode',
|
||||
'compat_basestring',
|
||||
'compat_chr',
|
||||
'compat_cookiejar',
|
||||
|
||||
@@ -49,6 +49,9 @@ class FileDownloader(object):
|
||||
external_downloader_args: A list of additional command-line arguments for the
|
||||
external downloader.
|
||||
hls_use_mpegts: Use the mpegts container for HLS videos.
|
||||
http_chunk_size: Size of a chunk for chunk-based HTTP downloading. May be
|
||||
useful for bypassing bandwidth throttling imposed by
|
||||
a webserver (experimental)
|
||||
|
||||
Subclasses of this one must re-define the real_download method.
|
||||
"""
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
from __future__ import division, unicode_literals
|
||||
|
||||
import base64
|
||||
import io
|
||||
import itertools
|
||||
import time
|
||||
|
||||
from .fragment import FragmentFD
|
||||
from ..compat import (
|
||||
compat_b64decode,
|
||||
compat_etree_fromstring,
|
||||
compat_urlparse,
|
||||
compat_urllib_error,
|
||||
@@ -312,7 +312,7 @@ class F4mFD(FragmentFD):
|
||||
boot_info = self._get_bootstrap_from_url(bootstrap_url)
|
||||
else:
|
||||
bootstrap_url = None
|
||||
bootstrap = base64.b64decode(node.text.encode('ascii'))
|
||||
bootstrap = compat_b64decode(node.text)
|
||||
boot_info = read_bootstrap_info(bootstrap)
|
||||
return boot_info, bootstrap_url
|
||||
|
||||
@@ -349,7 +349,7 @@ class F4mFD(FragmentFD):
|
||||
live = boot_info['live']
|
||||
metadata_node = media.find(_add_ns('metadata'))
|
||||
if metadata_node is not None:
|
||||
metadata = base64.b64decode(metadata_node.text.encode('ascii'))
|
||||
metadata = compat_b64decode(metadata_node.text)
|
||||
else:
|
||||
metadata = None
|
||||
|
||||
|
||||
@@ -75,8 +75,9 @@ class HlsFD(FragmentFD):
|
||||
fd.add_progress_hook(ph)
|
||||
return fd.real_download(filename, info_dict)
|
||||
|
||||
def anvato_ad(s):
|
||||
return s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s
|
||||
def is_ad_fragment(s):
|
||||
return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s or
|
||||
s.startswith('#UPLYNK-SEGMENT') and s.endswith(',ad'))
|
||||
|
||||
media_frags = 0
|
||||
ad_frags = 0
|
||||
@@ -86,7 +87,7 @@ class HlsFD(FragmentFD):
|
||||
if not line:
|
||||
continue
|
||||
if line.startswith('#'):
|
||||
if anvato_ad(line):
|
||||
if is_ad_fragment(line):
|
||||
ad_frags += 1
|
||||
ad_frag_next = True
|
||||
continue
|
||||
@@ -195,7 +196,7 @@ class HlsFD(FragmentFD):
|
||||
'start': sub_range_start,
|
||||
'end': sub_range_start + int(splitted_byte_range[0]),
|
||||
}
|
||||
elif anvato_ad(line):
|
||||
elif is_ad_fragment(line):
|
||||
ad_frag_next = True
|
||||
|
||||
self._finish_frag_download(ctx)
|
||||
|
||||
@@ -4,13 +4,18 @@ import errno
|
||||
import os
|
||||
import socket
|
||||
import time
|
||||
import random
|
||||
import re
|
||||
|
||||
from .common import FileDownloader
|
||||
from ..compat import compat_urllib_error
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_error,
|
||||
)
|
||||
from ..utils import (
|
||||
ContentTooShortError,
|
||||
encodeFilename,
|
||||
int_or_none,
|
||||
sanitize_open,
|
||||
sanitized_Request,
|
||||
write_xattr,
|
||||
@@ -38,21 +43,26 @@ class HttpFD(FileDownloader):
|
||||
add_headers = info_dict.get('http_headers')
|
||||
if add_headers:
|
||||
headers.update(add_headers)
|
||||
basic_request = sanitized_Request(url, None, headers)
|
||||
request = sanitized_Request(url, None, headers)
|
||||
|
||||
is_test = self.params.get('test', False)
|
||||
|
||||
if is_test:
|
||||
request.add_header('Range', 'bytes=0-%s' % str(self._TEST_FILE_SIZE - 1))
|
||||
chunk_size = self._TEST_FILE_SIZE if is_test else (
|
||||
info_dict.get('downloader_options', {}).get('http_chunk_size') or
|
||||
self.params.get('http_chunk_size') or 0)
|
||||
|
||||
ctx.open_mode = 'wb'
|
||||
ctx.resume_len = 0
|
||||
ctx.data_len = None
|
||||
ctx.block_size = self.params.get('buffersize', 1024)
|
||||
ctx.start_time = time.time()
|
||||
ctx.chunk_size = None
|
||||
|
||||
if self.params.get('continuedl', True):
|
||||
# Establish possible resume length
|
||||
if os.path.isfile(encodeFilename(ctx.tmpfilename)):
|
||||
ctx.resume_len = os.path.getsize(encodeFilename(ctx.tmpfilename))
|
||||
ctx.resume_len = os.path.getsize(
|
||||
encodeFilename(ctx.tmpfilename))
|
||||
|
||||
ctx.is_resume = ctx.resume_len > 0
|
||||
|
||||
count = 0
|
||||
retries = self.params.get('retries', 0)
|
||||
@@ -64,11 +74,36 @@ class HttpFD(FileDownloader):
|
||||
def __init__(self, source_error):
|
||||
self.source_error = source_error
|
||||
|
||||
class NextFragment(Exception):
|
||||
pass
|
||||
|
||||
def set_range(req, start, end):
|
||||
range_header = 'bytes=%d-' % start
|
||||
if end:
|
||||
range_header += compat_str(end)
|
||||
req.add_header('Range', range_header)
|
||||
|
||||
def establish_connection():
|
||||
if ctx.resume_len != 0:
|
||||
self.report_resuming_byte(ctx.resume_len)
|
||||
request.add_header('Range', 'bytes=%d-' % ctx.resume_len)
|
||||
ctx.chunk_size = (random.randint(int(chunk_size * 0.95), chunk_size)
|
||||
if not is_test and chunk_size else chunk_size)
|
||||
if ctx.resume_len > 0:
|
||||
range_start = ctx.resume_len
|
||||
if ctx.is_resume:
|
||||
self.report_resuming_byte(ctx.resume_len)
|
||||
ctx.open_mode = 'ab'
|
||||
elif ctx.chunk_size > 0:
|
||||
range_start = 0
|
||||
else:
|
||||
range_start = None
|
||||
ctx.is_resume = False
|
||||
range_end = range_start + ctx.chunk_size - 1 if ctx.chunk_size else None
|
||||
if range_end and ctx.data_len is not None and range_end >= ctx.data_len:
|
||||
range_end = ctx.data_len - 1
|
||||
has_range = range_start is not None
|
||||
ctx.has_range = has_range
|
||||
request = sanitized_Request(url, None, headers)
|
||||
if has_range:
|
||||
set_range(request, range_start, range_end)
|
||||
# Establish connection
|
||||
try:
|
||||
ctx.data = self.ydl.urlopen(request)
|
||||
@@ -77,29 +112,40 @@ class HttpFD(FileDownloader):
|
||||
# that don't support resuming and serve a whole file with no Content-Range
|
||||
# set in response despite of requested Range (see
|
||||
# https://github.com/rg3/youtube-dl/issues/6057#issuecomment-126129799)
|
||||
if ctx.resume_len > 0:
|
||||
if has_range:
|
||||
content_range = ctx.data.headers.get('Content-Range')
|
||||
if content_range:
|
||||
content_range_m = re.search(r'bytes (\d+)-', content_range)
|
||||
content_range_m = re.search(r'bytes (\d+)-(\d+)?(?:/(\d+))?', content_range)
|
||||
# Content-Range is present and matches requested Range, resume is possible
|
||||
if content_range_m and ctx.resume_len == int(content_range_m.group(1)):
|
||||
return
|
||||
if content_range_m:
|
||||
if range_start == int(content_range_m.group(1)):
|
||||
content_range_end = int_or_none(content_range_m.group(2))
|
||||
content_len = int_or_none(content_range_m.group(3))
|
||||
accept_content_len = (
|
||||
# Non-chunked download
|
||||
not ctx.chunk_size or
|
||||
# Chunked download and requested piece or
|
||||
# its part is promised to be served
|
||||
content_range_end == range_end or
|
||||
content_len < range_end)
|
||||
if accept_content_len:
|
||||
ctx.data_len = content_len
|
||||
return
|
||||
# Content-Range is either not present or invalid. Assuming remote webserver is
|
||||
# trying to send the whole file, resume is not possible, so wiping the local file
|
||||
# and performing entire redownload
|
||||
self.report_unable_to_resume()
|
||||
ctx.resume_len = 0
|
||||
ctx.open_mode = 'wb'
|
||||
ctx.data_len = int_or_none(ctx.data.info().get('Content-length', None))
|
||||
return
|
||||
except (compat_urllib_error.HTTPError, ) as err:
|
||||
if (err.code < 500 or err.code >= 600) and err.code != 416:
|
||||
# Unexpected HTTP error
|
||||
raise
|
||||
elif err.code == 416:
|
||||
if err.code == 416:
|
||||
# Unable to resume (requested range not satisfiable)
|
||||
try:
|
||||
# Open the connection again without the range header
|
||||
ctx.data = self.ydl.urlopen(basic_request)
|
||||
ctx.data = self.ydl.urlopen(
|
||||
sanitized_Request(url, None, headers))
|
||||
content_length = ctx.data.info()['Content-Length']
|
||||
except (compat_urllib_error.HTTPError, ) as err:
|
||||
if err.code < 500 or err.code >= 600:
|
||||
@@ -130,6 +176,9 @@ class HttpFD(FileDownloader):
|
||||
ctx.resume_len = 0
|
||||
ctx.open_mode = 'wb'
|
||||
return
|
||||
elif err.code < 500 or err.code >= 600:
|
||||
# Unexpected HTTP error
|
||||
raise
|
||||
raise RetryDownload(err)
|
||||
except socket.error as err:
|
||||
if err.errno != errno.ECONNRESET:
|
||||
@@ -160,7 +209,7 @@ class HttpFD(FileDownloader):
|
||||
return False
|
||||
|
||||
byte_counter = 0 + ctx.resume_len
|
||||
block_size = self.params.get('buffersize', 1024)
|
||||
block_size = ctx.block_size
|
||||
start = time.time()
|
||||
|
||||
# measure time over whole while-loop, so slow_down() and best_block_size() work together properly
|
||||
@@ -233,25 +282,30 @@ class HttpFD(FileDownloader):
|
||||
|
||||
# Progress message
|
||||
speed = self.calc_speed(start, now, byte_counter - ctx.resume_len)
|
||||
if data_len is None:
|
||||
if ctx.data_len is None:
|
||||
eta = None
|
||||
else:
|
||||
eta = self.calc_eta(start, time.time(), data_len - ctx.resume_len, byte_counter - ctx.resume_len)
|
||||
eta = self.calc_eta(start, time.time(), ctx.data_len - ctx.resume_len, byte_counter - ctx.resume_len)
|
||||
|
||||
self._hook_progress({
|
||||
'status': 'downloading',
|
||||
'downloaded_bytes': byte_counter,
|
||||
'total_bytes': data_len,
|
||||
'total_bytes': ctx.data_len,
|
||||
'tmpfilename': ctx.tmpfilename,
|
||||
'filename': ctx.filename,
|
||||
'eta': eta,
|
||||
'speed': speed,
|
||||
'elapsed': now - start,
|
||||
'elapsed': now - ctx.start_time,
|
||||
})
|
||||
|
||||
if is_test and byte_counter == data_len:
|
||||
break
|
||||
|
||||
if not is_test and ctx.chunk_size and ctx.data_len is not None and byte_counter < ctx.data_len:
|
||||
ctx.resume_len = byte_counter
|
||||
# ctx.block_size = block_size
|
||||
raise NextFragment()
|
||||
|
||||
if ctx.stream is None:
|
||||
self.to_stderr('\n')
|
||||
self.report_error('Did not get any data blocks')
|
||||
@@ -276,7 +330,7 @@ class HttpFD(FileDownloader):
|
||||
'total_bytes': byte_counter,
|
||||
'filename': ctx.filename,
|
||||
'status': 'finished',
|
||||
'elapsed': time.time() - start,
|
||||
'elapsed': time.time() - ctx.start_time,
|
||||
})
|
||||
|
||||
return True
|
||||
@@ -290,6 +344,8 @@ class HttpFD(FileDownloader):
|
||||
if count <= retries:
|
||||
self.report_retry(e.source_error, count, retries)
|
||||
continue
|
||||
except NextFragment:
|
||||
continue
|
||||
except SucceedDownload:
|
||||
return True
|
||||
|
||||
|
||||
@@ -1,25 +1,27 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import time
|
||||
import struct
|
||||
import binascii
|
||||
import io
|
||||
|
||||
from .fragment import FragmentFD
|
||||
from ..compat import compat_urllib_error
|
||||
from ..compat import (
|
||||
compat_Struct,
|
||||
compat_urllib_error,
|
||||
)
|
||||
|
||||
|
||||
u8 = struct.Struct(b'>B')
|
||||
u88 = struct.Struct(b'>Bx')
|
||||
u16 = struct.Struct(b'>H')
|
||||
u1616 = struct.Struct(b'>Hxx')
|
||||
u32 = struct.Struct(b'>I')
|
||||
u64 = struct.Struct(b'>Q')
|
||||
u8 = compat_Struct('>B')
|
||||
u88 = compat_Struct('>Bx')
|
||||
u16 = compat_Struct('>H')
|
||||
u1616 = compat_Struct('>Hxx')
|
||||
u32 = compat_Struct('>I')
|
||||
u64 = compat_Struct('>Q')
|
||||
|
||||
s88 = struct.Struct(b'>bx')
|
||||
s16 = struct.Struct(b'>h')
|
||||
s1616 = struct.Struct(b'>hxx')
|
||||
s32 = struct.Struct(b'>i')
|
||||
s88 = compat_Struct('>bx')
|
||||
s16 = compat_Struct('>h')
|
||||
s1616 = compat_Struct('>hxx')
|
||||
s32 = compat_Struct('>i')
|
||||
|
||||
unity_matrix = (s32.pack(0x10000) + s32.pack(0) * 3) * 2 + s32.pack(0x40000000)
|
||||
|
||||
@@ -139,7 +141,7 @@ def write_piff_header(stream, params):
|
||||
sample_entry_payload += u16.pack(0x18) # depth
|
||||
sample_entry_payload += s16.pack(-1) # pre defined
|
||||
|
||||
codec_private_data = binascii.unhexlify(params['codec_private_data'])
|
||||
codec_private_data = binascii.unhexlify(params['codec_private_data'].encode('utf-8'))
|
||||
if fourcc in ('H264', 'AVC1'):
|
||||
sps, pps = codec_private_data.split(u32.pack(1))[1:]
|
||||
avcc_payload = u8.pack(1) # configuration version
|
||||
|
||||
@@ -66,7 +66,7 @@ class AbcNewsIE(InfoExtractor):
|
||||
_TESTS = [{
|
||||
'url': 'http://abcnews.go.com/Blotter/News/dramatic-video-rare-death-job-america/story?id=10498713#.UIhwosWHLjY',
|
||||
'info_dict': {
|
||||
'id': '10498713',
|
||||
'id': '10505354',
|
||||
'ext': 'flv',
|
||||
'display_id': 'dramatic-video-rare-death-job-america',
|
||||
'title': 'Occupational Hazards',
|
||||
@@ -79,7 +79,7 @@ class AbcNewsIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://abcnews.go.com/Entertainment/justin-timberlake-performs-stop-feeling-eurovision-2016/story?id=39125818',
|
||||
'info_dict': {
|
||||
'id': '39125818',
|
||||
'id': '38897857',
|
||||
'ext': 'mp4',
|
||||
'display_id': 'justin-timberlake-performs-stop-feeling-eurovision-2016',
|
||||
'title': 'Justin Timberlake Drops Hints For Secret Single',
|
||||
|
||||
@@ -1,13 +1,15 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
import json
|
||||
import os
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..aes import aes_cbc_decrypt
|
||||
from ..compat import compat_ord
|
||||
from ..compat import (
|
||||
compat_b64decode,
|
||||
compat_ord,
|
||||
)
|
||||
from ..utils import (
|
||||
bytes_to_intlist,
|
||||
ExtractorError,
|
||||
@@ -48,9 +50,9 @@ class ADNIE(InfoExtractor):
|
||||
|
||||
# http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
|
||||
dec_subtitles = intlist_to_bytes(aes_cbc_decrypt(
|
||||
bytes_to_intlist(base64.b64decode(enc_subtitles[24:])),
|
||||
bytes_to_intlist(b'\x1b\xe0\x29\x61\x38\x94\x24\x00\x12\xbd\xc5\x80\xac\xce\xbe\xb0'),
|
||||
bytes_to_intlist(base64.b64decode(enc_subtitles[:24]))
|
||||
bytes_to_intlist(compat_b64decode(enc_subtitles[24:])),
|
||||
bytes_to_intlist(b'\xc8\x6e\x06\xbc\xbe\xc6\x49\xf5\x88\x0d\xc8\x47\xc4\x27\x0c\x60'),
|
||||
bytes_to_intlist(compat_b64decode(enc_subtitles[:24]))
|
||||
))
|
||||
subtitles_json = self._parse_json(
|
||||
dec_subtitles[:-compat_ord(dec_subtitles[-1])].decode(),
|
||||
@@ -105,15 +107,18 @@ class ADNIE(InfoExtractor):
|
||||
|
||||
options = player_config.get('options') or {}
|
||||
metas = options.get('metas') or {}
|
||||
title = metas.get('title') or video_info['title']
|
||||
links = player_config.get('links') or {}
|
||||
sub_path = player_config.get('subtitles')
|
||||
error = None
|
||||
if not links:
|
||||
links_url = player_config['linksurl']
|
||||
links_url = player_config.get('linksurl') or options['videoUrl']
|
||||
links_data = self._download_json(urljoin(
|
||||
self._BASE_URL, links_url), video_id)
|
||||
links = links_data.get('links') or {}
|
||||
metas = metas or links_data.get('meta') or {}
|
||||
sub_path = sub_path or links_data.get('subtitles')
|
||||
error = links_data.get('error')
|
||||
title = metas.get('title') or video_info['title']
|
||||
|
||||
formats = []
|
||||
for format_id, qualities in links.items():
|
||||
@@ -144,7 +149,7 @@ class ADNIE(InfoExtractor):
|
||||
'description': strip_or_none(metas.get('summary') or video_info.get('resume')),
|
||||
'thumbnail': video_info.get('image'),
|
||||
'formats': formats,
|
||||
'subtitles': self.extract_subtitles(player_config.get('subtitles'), video_id),
|
||||
'subtitles': self.extract_subtitles(sub_path, video_id),
|
||||
'episode': metas.get('subtitle') or video_info.get('videoTitle'),
|
||||
'series': video_info.get('playlistTitle'),
|
||||
}
|
||||
|
||||
@@ -122,7 +122,8 @@ class AENetworksIE(AENetworksBaseIE):
|
||||
|
||||
query = {
|
||||
'mbr': 'true',
|
||||
'assetTypes': 'high_video_s3'
|
||||
'assetTypes': 'high_video_ak',
|
||||
'switch': 'hls_high_ak',
|
||||
}
|
||||
video_id = self._html_search_meta('aetn:VideoID', webpage)
|
||||
media_url = self._search_regex(
|
||||
|
||||
@@ -175,10 +175,27 @@ class AfreecaTVIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
if re.search(r'alert\(["\']This video has been deleted', webpage):
|
||||
raise ExtractorError(
|
||||
'Video %s has been deleted' % video_id, expected=True)
|
||||
|
||||
station_id = self._search_regex(
|
||||
r'nStationNo\s*=\s*(\d+)', webpage, 'station')
|
||||
bbs_id = self._search_regex(
|
||||
r'nBbsNo\s*=\s*(\d+)', webpage, 'bbs')
|
||||
video_id = self._search_regex(
|
||||
r'nTitleNo\s*=\s*(\d+)', webpage, 'title', default=video_id)
|
||||
|
||||
video_xml = self._download_xml(
|
||||
'http://afbbs.afreecatv.com:8080/api/video/get_video_info.php',
|
||||
video_id, query={
|
||||
video_id, headers={
|
||||
'Referer': 'http://vod.afreecatv.com/embed.php',
|
||||
}, query={
|
||||
'nTitleNo': video_id,
|
||||
'nStationNo': station_id,
|
||||
'nBbsNo': bbs_id,
|
||||
'partialView': 'SKIP_ADULT',
|
||||
})
|
||||
|
||||
@@ -187,10 +204,10 @@ class AfreecaTVIE(InfoExtractor):
|
||||
raise ExtractorError(
|
||||
'%s said: %s' % (self.IE_NAME, flag), expected=True)
|
||||
|
||||
video_element = video_xml.findall(compat_xpath('./track/video'))[1]
|
||||
video_element = video_xml.findall(compat_xpath('./track/video'))[-1]
|
||||
if video_element is None or video_element.text is None:
|
||||
raise ExtractorError('Specified AfreecaTV video does not exist',
|
||||
expected=True)
|
||||
raise ExtractorError(
|
||||
'Video %s video does not exist' % video_id, expected=True)
|
||||
|
||||
video_url = video_element.text.strip()
|
||||
|
||||
|
||||
@@ -11,7 +11,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class AMCNetworksIE(ThePlatformIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:amc|bbcamerica|ifc|wetv)\.com/(?:movies|shows(?:/[^/]+)+)/(?P<id>[^/?#]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:amc|bbcamerica|ifc|(?:we|sundance)tv)\.com/(?:movies|shows(?:/[^/]+)+)/(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.ifc.com/shows/maron/season-04/episode-01/step-1',
|
||||
'md5': '',
|
||||
@@ -51,6 +51,9 @@ class AMCNetworksIE(ThePlatformIE):
|
||||
}, {
|
||||
'url': 'http://www.wetv.com/shows/la-hair/videos/season-05/episode-09-episode-9-2/episode-9-sneak-peek-3',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.sundancetv.com/shows/riviera/full-episodes/season-1/episode-01-episode-1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -41,7 +41,7 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
webpage = self._download_webpage(
|
||||
'http://archive.org/embed/' + video_id, video_id)
|
||||
jwplayer_playlist = self._parse_json(self._search_regex(
|
||||
r"(?s)Play\('[^']+'\s*,\s*(\[.+\])\s*,\s*{.*?}\);",
|
||||
r"(?s)Play\('[^']+'\s*,\s*(\[.+\])\s*,\s*{.*?}\)",
|
||||
webpage, 'jwplayer playlist'), video_id)
|
||||
info = self._parse_jwplayer_data(
|
||||
{'playlist': jwplayer_playlist}, video_id, base_url=url)
|
||||
|
||||
@@ -24,57 +24,30 @@ class ARDMediathekIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://(?:(?:www\.)?ardmediathek\.de|mediathek\.(?:daserste|rbb-online)\.de)/(?:.*/)(?P<video_id>[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.ardmediathek.de/tv/Dokumentation-und-Reportage/Ich-liebe-das-Leben-trotzdem/rbb-Fernsehen/Video?documentId=29582122&bcastId=3822114',
|
||||
# available till 26.07.2022
|
||||
'url': 'http://www.ardmediathek.de/tv/S%C3%9CDLICHT/Was-ist-die-Kunst-der-Zukunft-liebe-Ann/BR-Fernsehen/Video?bcastId=34633636&documentId=44726822',
|
||||
'info_dict': {
|
||||
'id': '29582122',
|
||||
'id': '44726822',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ich liebe das Leben trotzdem',
|
||||
'description': 'md5:45e4c225c72b27993314b31a84a5261c',
|
||||
'duration': 4557,
|
||||
'title': 'Was ist die Kunst der Zukunft, liebe Anna McCarthy?',
|
||||
'description': 'md5:4ada28b3e3b5df01647310e41f3a62f5',
|
||||
'duration': 1740,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'HTTP Error 404: Not Found',
|
||||
}, {
|
||||
'url': 'http://www.ardmediathek.de/tv/Tatort/Tatort-Scheinwelten-H%C3%B6rfassung-Video/Das-Erste/Video?documentId=29522730&bcastId=602916',
|
||||
'md5': 'f4d98b10759ac06c0072bbcd1f0b9e3e',
|
||||
'info_dict': {
|
||||
'id': '29522730',
|
||||
'ext': 'mp4',
|
||||
'title': 'Tatort: Scheinwelten - Hörfassung (Video tgl. ab 20 Uhr)',
|
||||
'description': 'md5:196392e79876d0ac94c94e8cdb2875f1',
|
||||
'duration': 5252,
|
||||
},
|
||||
'skip': 'HTTP Error 404: Not Found',
|
||||
}
|
||||
}, {
|
||||
# audio
|
||||
'url': 'http://www.ardmediathek.de/tv/WDR-H%C3%B6rspiel-Speicher/Tod-eines-Fu%C3%9Fballers/WDR-3/Audio-Podcast?documentId=28488308&bcastId=23074086',
|
||||
'md5': '219d94d8980b4f538c7fcb0865eb7f2c',
|
||||
'info_dict': {
|
||||
'id': '28488308',
|
||||
'ext': 'mp3',
|
||||
'title': 'Tod eines Fußballers',
|
||||
'description': 'md5:f6e39f3461f0e1f54bfa48c8875c86ef',
|
||||
'duration': 3240,
|
||||
},
|
||||
'skip': 'HTTP Error 404: Not Found',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://mediathek.daserste.de/sendungen_a-z/328454_anne-will/22429276_vertrauen-ist-gut-spionieren-ist-besser-geht',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# audio
|
||||
'url': 'http://mediathek.rbb-online.de/radio/Hörspiel/Vor-dem-Fest/kulturradio/Audio?documentId=30796318&topRessort=radio&bcastId=9839158',
|
||||
'md5': '4e8f00631aac0395fee17368ac0e9867',
|
||||
'info_dict': {
|
||||
'id': '30796318',
|
||||
'ext': 'mp3',
|
||||
'title': 'Vor dem Fest',
|
||||
'description': 'md5:c0c1c8048514deaed2a73b3a60eecacb',
|
||||
'duration': 3287,
|
||||
},
|
||||
'skip': 'Video is no longer available',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _extract_media_info(self, media_info_url, webpage, video_id):
|
||||
@@ -252,20 +225,23 @@ class ARDMediathekIE(InfoExtractor):
|
||||
|
||||
class ARDIE(InfoExtractor):
|
||||
_VALID_URL = r'(?P<mainurl>https?://(www\.)?daserste\.de/[^?#]+/videos/(?P<display_id>[^/?#]+)-(?P<id>[0-9]+))\.html'
|
||||
_TEST = {
|
||||
'url': 'http://www.daserste.de/information/reportage-dokumentation/dokus/videos/die-story-im-ersten-mission-unter-falscher-flagge-100.html',
|
||||
'md5': 'd216c3a86493f9322545e045ddc3eb35',
|
||||
_TESTS = [{
|
||||
# available till 14.02.2019
|
||||
'url': 'http://www.daserste.de/information/talk/maischberger/videos/das-groko-drama-zerlegen-sich-die-volksparteien-video-102.html',
|
||||
'md5': '8e4ec85f31be7c7fc08a26cdbc5a1f49',
|
||||
'info_dict': {
|
||||
'display_id': 'die-story-im-ersten-mission-unter-falscher-flagge',
|
||||
'id': '100',
|
||||
'display_id': 'das-groko-drama-zerlegen-sich-die-volksparteien-video',
|
||||
'id': '102',
|
||||
'ext': 'mp4',
|
||||
'duration': 2600,
|
||||
'title': 'Die Story im Ersten: Mission unter falscher Flagge',
|
||||
'upload_date': '20140804',
|
||||
'duration': 4435.0,
|
||||
'title': 'Das GroKo-Drama: Zerlegen sich die Volksparteien?',
|
||||
'upload_date': '20180214',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
'skip': 'HTTP Error 404: Not Found',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.daserste.de/information/reportage-dokumentation/dokus/videos/die-story-im-ersten-mission-unter-falscher-flagge-100.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
||||
@@ -1,11 +1,13 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
from ..compat import (
|
||||
compat_b64decode,
|
||||
compat_urllib_parse_unquote,
|
||||
)
|
||||
|
||||
|
||||
class BigflixIE(InfoExtractor):
|
||||
@@ -39,8 +41,8 @@ class BigflixIE(InfoExtractor):
|
||||
webpage, 'title')
|
||||
|
||||
def decode_url(quoted_b64_url):
|
||||
return base64.b64decode(compat_urllib_parse_unquote(
|
||||
quoted_b64_url).encode('ascii')).decode('utf-8')
|
||||
return compat_b64decode(compat_urllib_parse_unquote(
|
||||
quoted_b64_url)).decode('utf-8')
|
||||
|
||||
formats = []
|
||||
for height, encoded_url in re.findall(
|
||||
|
||||
@@ -27,14 +27,14 @@ class BiliBiliIE(InfoExtractor):
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.bilibili.tv/video/av1074402/',
|
||||
'md5': '9fa226fe2b8a9a4d5a69b4c6a183417e',
|
||||
'md5': '5f7d29e1a2872f3df0cf76b1f87d3788',
|
||||
'info_dict': {
|
||||
'id': '1074402',
|
||||
'ext': 'mp4',
|
||||
'ext': 'flv',
|
||||
'title': '【金坷垃】金泡沫',
|
||||
'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923',
|
||||
'duration': 308.315,
|
||||
'timestamp': 1398012660,
|
||||
'duration': 308.067,
|
||||
'timestamp': 1398012678,
|
||||
'upload_date': '20140420',
|
||||
'thumbnail': r're:^https?://.+\.jpg',
|
||||
'uploader': '菊子桑',
|
||||
@@ -59,17 +59,38 @@ class BiliBiliIE(InfoExtractor):
|
||||
'url': 'http://www.bilibili.com/video/av8903802/',
|
||||
'info_dict': {
|
||||
'id': '8903802',
|
||||
'ext': 'mp4',
|
||||
'title': '阿滴英文|英文歌分享#6 "Closer',
|
||||
'description': '滴妹今天唱Closer給你聽! 有史以来,被推最多次也是最久的歌曲,其实歌词跟我原本想像差蛮多的,不过还是好听! 微博@阿滴英文',
|
||||
'uploader': '阿滴英文',
|
||||
'uploader_id': '65880958',
|
||||
'timestamp': 1488382620,
|
||||
'upload_date': '20170301',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # Test metadata only
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': '8903802_part1',
|
||||
'ext': 'flv',
|
||||
'title': '阿滴英文|英文歌分享#6 "Closer',
|
||||
'description': 'md5:3b1b9e25b78da4ef87e9b548b88ee76a',
|
||||
'uploader': '阿滴英文',
|
||||
'uploader_id': '65880958',
|
||||
'timestamp': 1488382634,
|
||||
'upload_date': '20170301',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # Test metadata only
|
||||
},
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': '8903802_part2',
|
||||
'ext': 'flv',
|
||||
'title': '阿滴英文|英文歌分享#6 "Closer',
|
||||
'description': 'md5:3b1b9e25b78da4ef87e9b548b88ee76a',
|
||||
'uploader': '阿滴英文',
|
||||
'uploader_id': '65880958',
|
||||
'timestamp': 1488382634,
|
||||
'upload_date': '20170301',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # Test metadata only
|
||||
},
|
||||
}]
|
||||
}]
|
||||
|
||||
_APP_KEY = '84956560bc028eb7'
|
||||
@@ -92,9 +113,13 @@ class BiliBiliIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
if 'anime/' not in url:
|
||||
cid = compat_parse_qs(self._search_regex(
|
||||
[r'EmbedPlayer\([^)]+,\s*"([^"]+)"\)',
|
||||
r'<iframe[^>]+src="https://secure\.bilibili\.com/secure,([^"]+)"'],
|
||||
cid = self._search_regex(
|
||||
r'cid(?:["\']:|=)(\d+)', webpage, 'cid',
|
||||
default=None
|
||||
) or compat_parse_qs(self._search_regex(
|
||||
[r'1EmbedPlayer\([^)]+,\s*"([^"]+)"\)',
|
||||
r'1EmbedPlayer\([^)]+,\s*\\"([^"]+)\\"\)',
|
||||
r'1<iframe[^>]+src="https://secure\.bilibili\.com/secure,([^"]+)"'],
|
||||
webpage, 'player parameters'))['cid'][0]
|
||||
else:
|
||||
if 'no_bangumi_tip' not in smuggled_data:
|
||||
@@ -114,53 +139,66 @@ class BiliBiliIE(InfoExtractor):
|
||||
self._report_error(js)
|
||||
cid = js['result']['cid']
|
||||
|
||||
payload = 'appkey=%s&cid=%s&otype=json&quality=2&type=mp4' % (self._APP_KEY, cid)
|
||||
sign = hashlib.md5((payload + self._BILIBILI_KEY).encode('utf-8')).hexdigest()
|
||||
|
||||
headers = {
|
||||
'Referer': url
|
||||
}
|
||||
headers.update(self.geo_verification_headers())
|
||||
|
||||
video_info = self._download_json(
|
||||
'http://interface.bilibili.com/playurl?%s&sign=%s' % (payload, sign),
|
||||
video_id, note='Downloading video info page',
|
||||
headers=headers)
|
||||
|
||||
if 'durl' not in video_info:
|
||||
self._report_error(video_info)
|
||||
|
||||
entries = []
|
||||
|
||||
for idx, durl in enumerate(video_info['durl']):
|
||||
formats = [{
|
||||
'url': durl['url'],
|
||||
'filesize': int_or_none(durl['size']),
|
||||
}]
|
||||
for backup_url in durl.get('backup_url', []):
|
||||
formats.append({
|
||||
'url': backup_url,
|
||||
# backup URLs have lower priorities
|
||||
'preference': -2 if 'hd.mp4' in backup_url else -3,
|
||||
RENDITIONS = ('qn=80&quality=80&type=', 'quality=2&type=mp4')
|
||||
for num, rendition in enumerate(RENDITIONS, start=1):
|
||||
payload = 'appkey=%s&cid=%s&otype=json&%s' % (self._APP_KEY, cid, rendition)
|
||||
sign = hashlib.md5((payload + self._BILIBILI_KEY).encode('utf-8')).hexdigest()
|
||||
|
||||
video_info = self._download_json(
|
||||
'http://interface.bilibili.com/v2/playurl?%s&sign=%s' % (payload, sign),
|
||||
video_id, note='Downloading video info page',
|
||||
headers=headers, fatal=num == len(RENDITIONS))
|
||||
|
||||
if not video_info:
|
||||
continue
|
||||
|
||||
if 'durl' not in video_info:
|
||||
if num < len(RENDITIONS):
|
||||
continue
|
||||
self._report_error(video_info)
|
||||
|
||||
for idx, durl in enumerate(video_info['durl']):
|
||||
formats = [{
|
||||
'url': durl['url'],
|
||||
'filesize': int_or_none(durl['size']),
|
||||
}]
|
||||
for backup_url in durl.get('backup_url', []):
|
||||
formats.append({
|
||||
'url': backup_url,
|
||||
# backup URLs have lower priorities
|
||||
'preference': -2 if 'hd.mp4' in backup_url else -3,
|
||||
})
|
||||
|
||||
for a_format in formats:
|
||||
a_format.setdefault('http_headers', {}).update({
|
||||
'Referer': url,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
entries.append({
|
||||
'id': '%s_part%s' % (video_id, idx),
|
||||
'duration': float_or_none(durl.get('length'), 1000),
|
||||
'formats': formats,
|
||||
})
|
||||
break
|
||||
|
||||
for a_format in formats:
|
||||
a_format.setdefault('http_headers', {}).update({
|
||||
'Referer': url,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
entries.append({
|
||||
'id': '%s_part%s' % (video_id, idx),
|
||||
'duration': float_or_none(durl.get('length'), 1000),
|
||||
'formats': formats,
|
||||
})
|
||||
|
||||
title = self._html_search_regex('<h1[^>]*>([^<]+)</h1>', webpage, 'title')
|
||||
title = self._html_search_regex(
|
||||
('<h1[^>]+\btitle=(["\'])(?P<title>(?:(?!\1).)+)\1',
|
||||
'(?s)<h1[^>]*>(?P<title>.+?)</h1>'), webpage, 'title',
|
||||
group='title')
|
||||
description = self._html_search_meta('description', webpage)
|
||||
timestamp = unified_timestamp(self._html_search_regex(
|
||||
r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time', default=None))
|
||||
r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time',
|
||||
default=None) or self._html_search_meta(
|
||||
'uploadDate', webpage, 'timestamp', default=None))
|
||||
thumbnail = self._html_search_meta(['og:image', 'thumbnailUrl'], webpage)
|
||||
|
||||
# TODO 'view_count' requires deobfuscating Javascript
|
||||
@@ -174,13 +212,16 @@ class BiliBiliIE(InfoExtractor):
|
||||
}
|
||||
|
||||
uploader_mobj = re.search(
|
||||
r'<a[^>]+href="(?:https?:)?//space\.bilibili\.com/(?P<id>\d+)"[^>]+title="(?P<name>[^"]+)"',
|
||||
r'<a[^>]+href="(?:https?:)?//space\.bilibili\.com/(?P<id>\d+)"[^>]*>(?P<name>[^<]+)',
|
||||
webpage)
|
||||
if uploader_mobj:
|
||||
info.update({
|
||||
'uploader': uploader_mobj.group('name'),
|
||||
'uploader_id': uploader_mobj.group('id'),
|
||||
})
|
||||
if not info.get('uploader'):
|
||||
info['uploader'] = self._html_search_meta(
|
||||
'author', webpage, 'uploader', default=None)
|
||||
|
||||
for entry in entries:
|
||||
entry.update(info)
|
||||
|
||||
@@ -564,7 +564,7 @@ class BrightcoveNewIE(AdobePassIE):
|
||||
|
||||
return entries
|
||||
|
||||
def _parse_brightcove_metadata(self, json_data, video_id):
|
||||
def _parse_brightcove_metadata(self, json_data, video_id, headers={}):
|
||||
title = json_data['name'].strip()
|
||||
|
||||
formats = []
|
||||
@@ -638,6 +638,9 @@ class BrightcoveNewIE(AdobePassIE):
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
for f in formats:
|
||||
f.setdefault('http_headers', {}).update(headers)
|
||||
|
||||
subtitles = {}
|
||||
for text_track in json_data.get('text_tracks', []):
|
||||
if text_track.get('src'):
|
||||
@@ -690,10 +693,17 @@ class BrightcoveNewIE(AdobePassIE):
|
||||
webpage, 'policy key', group='pk')
|
||||
|
||||
api_url = 'https://edge.api.brightcove.com/playback/v1/accounts/%s/videos/%s' % (account_id, video_id)
|
||||
try:
|
||||
json_data = self._download_json(api_url, video_id, headers={
|
||||
'Accept': 'application/json;pk=%s' % policy_key
|
||||
headers = {
|
||||
'Accept': 'application/json;pk=%s' % policy_key,
|
||||
}
|
||||
referrer = smuggled_data.get('referrer')
|
||||
if referrer:
|
||||
headers.update({
|
||||
'Referer': referrer,
|
||||
'Origin': re.search(r'https?://[^/]+', referrer).group(0),
|
||||
})
|
||||
try:
|
||||
json_data = self._download_json(api_url, video_id, headers=headers)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||
json_data = self._parse_json(e.cause.read().decode(), video_id)[0]
|
||||
@@ -717,4 +727,5 @@ class BrightcoveNewIE(AdobePassIE):
|
||||
'tveToken': tve_token,
|
||||
})
|
||||
|
||||
return self._parse_brightcove_metadata(json_data, video_id)
|
||||
return self._parse_brightcove_metadata(
|
||||
json_data, video_id, headers=headers)
|
||||
|
||||
@@ -31,6 +31,10 @@ class Canalc2IE(InfoExtractor):
|
||||
webpage = self._download_webpage(
|
||||
'http://www.canalc2.tv/video/%s' % video_id, video_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'(?s)class="[^"]*col_description[^"]*">.*?<h3>(.+?)</h3>',
|
||||
webpage, 'title')
|
||||
|
||||
formats = []
|
||||
for _, video_url in re.findall(r'file\s*=\s*(["\'])(.+?)\1', webpage):
|
||||
if video_url.startswith('rtmp://'):
|
||||
@@ -49,17 +53,21 @@ class Canalc2IE(InfoExtractor):
|
||||
'url': video_url,
|
||||
'format_id': 'http',
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'(?s)class="[^"]*col_description[^"]*">.*?<h3>(.*?)</h3>', webpage, 'title')
|
||||
duration = parse_duration(self._search_regex(
|
||||
r'id=["\']video_duree["\'][^>]*>([^<]+)',
|
||||
webpage, 'duration', fatal=False))
|
||||
if formats:
|
||||
info = {
|
||||
'formats': formats,
|
||||
}
|
||||
else:
|
||||
info = self._parse_html5_media_entries(url, webpage, url)[0]
|
||||
|
||||
return {
|
||||
self._sort_formats(info['formats'])
|
||||
|
||||
info.update({
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
||||
'duration': parse_duration(self._search_regex(
|
||||
r'id=["\']video_duree["\'][^>]*>([^<]+)',
|
||||
webpage, 'duration', fatal=False)),
|
||||
})
|
||||
return info
|
||||
|
||||
@@ -246,7 +246,7 @@ class VrtNUIE(GigyaBaseIE):
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
webpage, urlh = self._download_webpage_handle(url, display_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'(?ms)<h1 class="content__heading">(.+?)</h1>',
|
||||
@@ -276,7 +276,7 @@ class VrtNUIE(GigyaBaseIE):
|
||||
webpage, 'release_date', default=None))
|
||||
|
||||
# If there's a ? or a # in the URL, remove them and everything after
|
||||
clean_url = url.split('?')[0].split('#')[0].strip('/')
|
||||
clean_url = urlh.geturl().split('?')[0].split('#')[0].strip('/')
|
||||
securevideo_url = clean_url + '.mssecurevideo.json'
|
||||
|
||||
try:
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -13,6 +14,7 @@ from ..utils import (
|
||||
xpath_element,
|
||||
xpath_with_ns,
|
||||
find_xpath_attr,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
parse_age_limit,
|
||||
int_or_none,
|
||||
@@ -359,3 +361,63 @@ class CBCWatchIE(CBCWatchBaseIE):
|
||||
video_id = self._match_id(url)
|
||||
rss = self._call_api('web/browse/' + video_id, video_id)
|
||||
return self._parse_rss_feed(rss)
|
||||
|
||||
|
||||
class CBCOlympicsIE(InfoExtractor):
|
||||
IE_NAME = 'cbc.ca:olympics'
|
||||
_VALID_URL = r'https?://olympics\.cbc\.ca/video/[^/]+/(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://olympics.cbc.ca/video/whats-on-tv/olympic-morning-featuring-the-opening-ceremony/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video_id = self._hidden_inputs(webpage)['videoId']
|
||||
video_doc = self._download_xml(
|
||||
'https://olympics.cbc.ca/videodata/%s.xml' % video_id, video_id)
|
||||
title = xpath_text(video_doc, 'title', fatal=True)
|
||||
is_live = xpath_text(video_doc, 'kind') == 'Live'
|
||||
if is_live:
|
||||
title = self._live_title(title)
|
||||
|
||||
formats = []
|
||||
for video_source in video_doc.findall('videoSources/videoSource'):
|
||||
uri = xpath_text(video_source, 'uri')
|
||||
if not uri:
|
||||
continue
|
||||
tokenize = self._download_json(
|
||||
'https://olympics.cbc.ca/api/api-akamai/tokenize',
|
||||
video_id, data=json.dumps({
|
||||
'VideoSource': uri,
|
||||
}).encode(), headers={
|
||||
'Content-Type': 'application/json',
|
||||
'Referer': url,
|
||||
# d3.VideoPlayer._init in https://olympics.cbc.ca/components/script/base.js
|
||||
'Cookie': '_dvp=TK:C0ObxjerU', # AKAMAI CDN cookie
|
||||
}, fatal=False)
|
||||
if not tokenize:
|
||||
continue
|
||||
content_url = tokenize['ContentUrl']
|
||||
video_source_format = video_source.get('format')
|
||||
if video_source_format == 'IIS':
|
||||
formats.extend(self._extract_ism_formats(
|
||||
content_url, video_id, ism_id=video_source_format, fatal=False))
|
||||
else:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
content_url, video_id, 'mp4',
|
||||
'm3u8' if is_live else 'm3u8_native',
|
||||
m3u8_id=video_source_format, fatal=False))
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': xpath_text(video_doc, 'description'),
|
||||
'thumbnail': xpath_text(video_doc, 'thumbnailUrl'),
|
||||
'duration': parse_duration(xpath_text(video_doc, 'duration')),
|
||||
'formats': formats,
|
||||
'is_live': is_live,
|
||||
}
|
||||
|
||||
@@ -2,6 +2,7 @@ from __future__ import unicode_literals
|
||||
|
||||
from .theplatform import ThePlatformFeedIE
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
find_xpath_attr,
|
||||
xpath_element,
|
||||
@@ -61,6 +62,7 @@ class CBSIE(CBSBaseIE):
|
||||
asset_types = []
|
||||
subtitles = {}
|
||||
formats = []
|
||||
last_e = None
|
||||
for item in items_data.findall('.//item'):
|
||||
asset_type = xpath_text(item, 'assetType')
|
||||
if not asset_type or asset_type in asset_types:
|
||||
@@ -74,11 +76,17 @@ class CBSIE(CBSBaseIE):
|
||||
query['formats'] = 'MPEG4,M3U'
|
||||
elif asset_type in ('RTMP', 'WIFI', '3G'):
|
||||
query['formats'] = 'MPEG4,FLV'
|
||||
tp_formats, tp_subtitles = self._extract_theplatform_smil(
|
||||
update_url_query(tp_release_url, query), content_id,
|
||||
'Downloading %s SMIL data' % asset_type)
|
||||
try:
|
||||
tp_formats, tp_subtitles = self._extract_theplatform_smil(
|
||||
update_url_query(tp_release_url, query), content_id,
|
||||
'Downloading %s SMIL data' % asset_type)
|
||||
except ExtractorError as e:
|
||||
last_e = e
|
||||
continue
|
||||
formats.extend(tp_formats)
|
||||
subtitles = self._merge_subtitles(subtitles, tp_subtitles)
|
||||
if last_e and not formats:
|
||||
raise last_e
|
||||
self._sort_formats(formats)
|
||||
|
||||
info = self._extract_theplatform_metadata(tp_path, content_id)
|
||||
|
||||
@@ -75,10 +75,10 @@ class CBSInteractiveIE(CBSIE):
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
data_json = self._html_search_regex(
|
||||
r"data-(?:cnet|zdnet)-video(?:-uvp(?:js)?)?-options='([^']+)'",
|
||||
r"data(?:-(?:cnet|zdnet))?-video(?:-(?:uvp(?:js)?|player))?-options='([^']+)'",
|
||||
webpage, 'data json')
|
||||
data = self._parse_json(data_json, display_id)
|
||||
vdata = data.get('video') or data['videos'][0]
|
||||
vdata = data.get('video') or (data.get('videos') or data.get('playlist'))[0]
|
||||
|
||||
video_id = vdata['mpxRefId']
|
||||
|
||||
|
||||
@@ -13,6 +13,7 @@ from ..utils import (
|
||||
float_or_none,
|
||||
sanitized_Request,
|
||||
unescapeHTML,
|
||||
update_url_query,
|
||||
urlencode_postdata,
|
||||
USER_AGENTS,
|
||||
)
|
||||
@@ -265,6 +266,10 @@ class CeskaTelevizePoradyIE(InfoExtractor):
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# iframe embed
|
||||
'url': 'http://www.ceskatelevize.cz/porady/10614999031-neviditelni/21251212048/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -272,8 +277,11 @@ class CeskaTelevizePoradyIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
data_url = unescapeHTML(self._search_regex(
|
||||
r'<span[^>]*\bdata-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||
webpage, 'iframe player url', group='url'))
|
||||
data_url = update_url_query(unescapeHTML(self._search_regex(
|
||||
(r'<span[^>]*\bdata-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||
r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?ceskatelevize\.cz/ivysilani/embed/iFramePlayer\.php.*?)\1'),
|
||||
webpage, 'iframe player url', group='url')), query={
|
||||
'autoStart': 'true',
|
||||
})
|
||||
|
||||
return self.url_result(data_url, ie=CeskaTelevizeIE.ie_key())
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import base64
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .youtube import YoutubeIE
|
||||
from ..compat import compat_b64decode
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
ExtractorError
|
||||
@@ -58,7 +58,7 @@ class ChilloutzoneIE(InfoExtractor):
|
||||
|
||||
base64_video_info = self._html_search_regex(
|
||||
r'var cozVidData = "(.+?)";', webpage, 'video data')
|
||||
decoded_video_info = base64.b64decode(base64_video_info.encode('utf-8')).decode('utf-8')
|
||||
decoded_video_info = compat_b64decode(base64_video_info).decode('utf-8')
|
||||
video_info_dict = json.loads(decoded_video_info)
|
||||
|
||||
# get video information from dict
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_b64decode
|
||||
from ..utils import parse_duration
|
||||
|
||||
|
||||
@@ -44,8 +44,7 @@ class ChirbitIE(InfoExtractor):
|
||||
|
||||
# Reverse engineered from https://chirb.it/js/chirbit.player.js (look
|
||||
# for soundURL)
|
||||
audio_url = base64.b64decode(
|
||||
data_fd[::-1].encode('ascii')).decode('utf-8')
|
||||
audio_url = compat_b64decode(data_fd[::-1]).decode('utf-8')
|
||||
|
||||
title = self._search_regex(
|
||||
r'class=["\']chirbit-title["\'][^>]*>([^<]+)', webpage, 'title')
|
||||
|
||||
@@ -174,6 +174,8 @@ class InfoExtractor(object):
|
||||
width : height ratio as float.
|
||||
* no_resume The server does not support resuming the
|
||||
(HTTP or RTMP) download. Boolean.
|
||||
* downloader_options A dictionary of downloader options as
|
||||
described in FileDownloader
|
||||
|
||||
url: Final video URL.
|
||||
ext: Video filename extension.
|
||||
@@ -642,19 +644,31 @@ class InfoExtractor(object):
|
||||
content, _ = res
|
||||
return content
|
||||
|
||||
def _download_xml_handle(
|
||||
self, url_or_request, video_id, note='Downloading XML',
|
||||
errnote='Unable to download XML', transform_source=None,
|
||||
fatal=True, encoding=None, data=None, headers={}, query={}):
|
||||
"""Return a tuple (xml as an xml.etree.ElementTree.Element, URL handle)"""
|
||||
res = self._download_webpage_handle(
|
||||
url_or_request, video_id, note, errnote, fatal=fatal,
|
||||
encoding=encoding, data=data, headers=headers, query=query)
|
||||
if res is False:
|
||||
return res
|
||||
xml_string, urlh = res
|
||||
return self._parse_xml(
|
||||
xml_string, video_id, transform_source=transform_source,
|
||||
fatal=fatal), urlh
|
||||
|
||||
def _download_xml(self, url_or_request, video_id,
|
||||
note='Downloading XML', errnote='Unable to download XML',
|
||||
transform_source=None, fatal=True, encoding=None,
|
||||
data=None, headers={}, query={}):
|
||||
"""Return the xml as an xml.etree.ElementTree.Element"""
|
||||
xml_string = self._download_webpage(
|
||||
url_or_request, video_id, note, errnote, fatal=fatal,
|
||||
encoding=encoding, data=data, headers=headers, query=query)
|
||||
if xml_string is False:
|
||||
return xml_string
|
||||
return self._parse_xml(
|
||||
xml_string, video_id, transform_source=transform_source,
|
||||
fatal=fatal)
|
||||
res = self._download_xml_handle(
|
||||
url_or_request, video_id, note=note, errnote=errnote,
|
||||
transform_source=transform_source, fatal=fatal, encoding=encoding,
|
||||
data=data, headers=headers, query=query)
|
||||
return res if res is False else res[0]
|
||||
|
||||
def _parse_xml(self, xml_string, video_id, transform_source=None, fatal=True):
|
||||
if transform_source:
|
||||
@@ -1027,7 +1041,7 @@ class InfoExtractor(object):
|
||||
part_of_series = e.get('partOfSeries') or e.get('partOfTVSeries')
|
||||
if isinstance(part_of_series, dict) and part_of_series.get('@type') in ('TVSeries', 'Series', 'CreativeWorkSeries'):
|
||||
info['series'] = unescapeHTML(part_of_series.get('name'))
|
||||
elif item_type == 'Article':
|
||||
elif item_type in ('Article', 'NewsArticle'):
|
||||
info.update({
|
||||
'timestamp': parse_iso8601(e.get('datePublished')),
|
||||
'title': unescapeHTML(e.get('headline')),
|
||||
@@ -1692,22 +1706,24 @@ class InfoExtractor(object):
|
||||
})
|
||||
return subtitles
|
||||
|
||||
def _extract_xspf_playlist(self, playlist_url, playlist_id, fatal=True):
|
||||
def _extract_xspf_playlist(self, xspf_url, playlist_id, fatal=True):
|
||||
xspf = self._download_xml(
|
||||
playlist_url, playlist_id, 'Downloading xpsf playlist',
|
||||
xspf_url, playlist_id, 'Downloading xpsf playlist',
|
||||
'Unable to download xspf manifest', fatal=fatal)
|
||||
if xspf is False:
|
||||
return []
|
||||
return self._parse_xspf(xspf, playlist_id)
|
||||
return self._parse_xspf(
|
||||
xspf, playlist_id, xspf_url=xspf_url,
|
||||
xspf_base_url=base_url(xspf_url))
|
||||
|
||||
def _parse_xspf(self, playlist, playlist_id):
|
||||
def _parse_xspf(self, xspf_doc, playlist_id, xspf_url=None, xspf_base_url=None):
|
||||
NS_MAP = {
|
||||
'xspf': 'http://xspf.org/ns/0/',
|
||||
's1': 'http://static.streamone.nl/player/ns/0',
|
||||
}
|
||||
|
||||
entries = []
|
||||
for track in playlist.findall(xpath_with_ns('./xspf:trackList/xspf:track', NS_MAP)):
|
||||
for track in xspf_doc.findall(xpath_with_ns('./xspf:trackList/xspf:track', NS_MAP)):
|
||||
title = xpath_text(
|
||||
track, xpath_with_ns('./xspf:title', NS_MAP), 'title', default=playlist_id)
|
||||
description = xpath_text(
|
||||
@@ -1717,12 +1733,18 @@ class InfoExtractor(object):
|
||||
duration = float_or_none(
|
||||
xpath_text(track, xpath_with_ns('./xspf:duration', NS_MAP), 'duration'), 1000)
|
||||
|
||||
formats = [{
|
||||
'url': location.text,
|
||||
'format_id': location.get(xpath_with_ns('s1:label', NS_MAP)),
|
||||
'width': int_or_none(location.get(xpath_with_ns('s1:width', NS_MAP))),
|
||||
'height': int_or_none(location.get(xpath_with_ns('s1:height', NS_MAP))),
|
||||
} for location in track.findall(xpath_with_ns('./xspf:location', NS_MAP))]
|
||||
formats = []
|
||||
for location in track.findall(xpath_with_ns('./xspf:location', NS_MAP)):
|
||||
format_url = urljoin(xspf_base_url, location.text)
|
||||
if not format_url:
|
||||
continue
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'manifest_url': xspf_url,
|
||||
'format_id': location.get(xpath_with_ns('s1:label', NS_MAP)),
|
||||
'width': int_or_none(location.get(xpath_with_ns('s1:width', NS_MAP))),
|
||||
'height': int_or_none(location.get(xpath_with_ns('s1:height', NS_MAP))),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
entries.append({
|
||||
@@ -1736,18 +1758,18 @@ class InfoExtractor(object):
|
||||
return entries
|
||||
|
||||
def _extract_mpd_formats(self, mpd_url, video_id, mpd_id=None, note=None, errnote=None, fatal=True, formats_dict={}):
|
||||
res = self._download_webpage_handle(
|
||||
res = self._download_xml_handle(
|
||||
mpd_url, video_id,
|
||||
note=note or 'Downloading MPD manifest',
|
||||
errnote=errnote or 'Failed to download MPD manifest',
|
||||
fatal=fatal)
|
||||
if res is False:
|
||||
return []
|
||||
mpd, urlh = res
|
||||
mpd_doc, urlh = res
|
||||
mpd_base_url = base_url(urlh.geturl())
|
||||
|
||||
return self._parse_mpd_formats(
|
||||
compat_etree_fromstring(mpd.encode('utf-8')), mpd_id, mpd_base_url,
|
||||
mpd_doc, mpd_id=mpd_id, mpd_base_url=mpd_base_url,
|
||||
formats_dict=formats_dict, mpd_url=mpd_url)
|
||||
|
||||
def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', formats_dict={}, mpd_url=None):
|
||||
@@ -2021,17 +2043,16 @@ class InfoExtractor(object):
|
||||
return formats
|
||||
|
||||
def _extract_ism_formats(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True):
|
||||
res = self._download_webpage_handle(
|
||||
res = self._download_xml_handle(
|
||||
ism_url, video_id,
|
||||
note=note or 'Downloading ISM manifest',
|
||||
errnote=errnote or 'Failed to download ISM manifest',
|
||||
fatal=fatal)
|
||||
if res is False:
|
||||
return []
|
||||
ism, urlh = res
|
||||
ism_doc, urlh = res
|
||||
|
||||
return self._parse_ism_formats(
|
||||
compat_etree_fromstring(ism.encode('utf-8')), urlh.geturl(), ism_id)
|
||||
return self._parse_ism_formats(ism_doc, urlh.geturl(), ism_id)
|
||||
|
||||
def _parse_ism_formats(self, ism_doc, ism_url, ism_id=None):
|
||||
"""
|
||||
@@ -2129,8 +2150,8 @@ class InfoExtractor(object):
|
||||
return formats
|
||||
|
||||
def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8', mpd_id=None, preference=None):
|
||||
def absolute_url(video_url):
|
||||
return compat_urlparse.urljoin(base_url, video_url)
|
||||
def absolute_url(item_url):
|
||||
return urljoin(base_url, item_url)
|
||||
|
||||
def parse_content_type(content_type):
|
||||
if not content_type:
|
||||
@@ -2187,7 +2208,7 @@ class InfoExtractor(object):
|
||||
if src:
|
||||
_, formats = _media_formats(src, media_type)
|
||||
media_info['formats'].extend(formats)
|
||||
media_info['thumbnail'] = media_attributes.get('poster')
|
||||
media_info['thumbnail'] = absolute_url(media_attributes.get('poster'))
|
||||
if media_content:
|
||||
for source_tag in re.findall(r'<source[^>]+>', media_content):
|
||||
source_attributes = extract_attributes(source_tag)
|
||||
@@ -2248,9 +2269,10 @@ class InfoExtractor(object):
|
||||
def _extract_wowza_formats(self, url, video_id, m3u8_entry_protocol='m3u8_native', skip_protocols=[]):
|
||||
query = compat_urlparse.urlparse(url).query
|
||||
url = re.sub(r'/(?:manifest|playlist|jwplayer)\.(?:m3u8|f4m|mpd|smil)', '', url)
|
||||
url_base = self._search_regex(
|
||||
r'(?:(?:https?|rtmp|rtsp):)?(//[^?]+)', url, 'format url')
|
||||
http_base_url = '%s:%s' % ('http', url_base)
|
||||
mobj = re.search(
|
||||
r'(?:(?:http|rtmp|rtsp)(?P<s>s)?:)?(?P<url>//[^?]+)', url)
|
||||
url_base = mobj.group('url')
|
||||
http_base_url = '%s%s:%s' % ('http', mobj.group('s') or '', url_base)
|
||||
formats = []
|
||||
|
||||
def manifest_url(manifest):
|
||||
@@ -2350,7 +2372,10 @@ class InfoExtractor(object):
|
||||
for track in tracks:
|
||||
if not isinstance(track, dict):
|
||||
continue
|
||||
if track.get('kind') != 'captions':
|
||||
track_kind = track.get('kind')
|
||||
if not track_kind or not isinstance(track_kind, compat_str):
|
||||
continue
|
||||
if track_kind.lower() not in ('captions', 'subtitles'):
|
||||
continue
|
||||
track_url = urljoin(base_url, track.get('file'))
|
||||
if not track_url:
|
||||
@@ -2404,7 +2429,7 @@ class InfoExtractor(object):
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
source_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id=m3u8_id, fatal=False))
|
||||
elif ext == 'mpd':
|
||||
elif source_type == 'dash' or ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
source_url, video_id, mpd_id=mpd_id, fatal=False))
|
||||
elif ext == 'smil':
|
||||
|
||||
@@ -3,13 +3,13 @@ from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
import base64
|
||||
import zlib
|
||||
|
||||
from hashlib import sha1
|
||||
from math import pow, sqrt, floor
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_b64decode,
|
||||
compat_etree_fromstring,
|
||||
compat_urllib_parse_urlencode,
|
||||
compat_urllib_request,
|
||||
@@ -272,8 +272,8 @@ class CrunchyrollIE(CrunchyrollBaseIE):
|
||||
}
|
||||
|
||||
def _decrypt_subtitles(self, data, iv, id):
|
||||
data = bytes_to_intlist(base64.b64decode(data.encode('utf-8')))
|
||||
iv = bytes_to_intlist(base64.b64decode(iv.encode('utf-8')))
|
||||
data = bytes_to_intlist(compat_b64decode(data))
|
||||
iv = bytes_to_intlist(compat_b64decode(iv))
|
||||
id = int(id)
|
||||
|
||||
def obfuscate_key_aux(count, modulo, start):
|
||||
|
||||
@@ -10,6 +10,7 @@ from ..aes import (
|
||||
aes_cbc_decrypt,
|
||||
aes_cbc_encrypt,
|
||||
)
|
||||
from ..compat import compat_b64decode
|
||||
from ..utils import (
|
||||
bytes_to_intlist,
|
||||
bytes_to_long,
|
||||
@@ -93,7 +94,7 @@ class DaisukiMottoIE(InfoExtractor):
|
||||
|
||||
rtn = self._parse_json(
|
||||
intlist_to_bytes(aes_cbc_decrypt(bytes_to_intlist(
|
||||
base64.b64decode(encrypted_rtn)),
|
||||
compat_b64decode(encrypted_rtn)),
|
||||
aes_key, iv)).decode('utf-8').rstrip('\0'),
|
||||
video_id)
|
||||
|
||||
|
||||
@@ -5,15 +5,16 @@ import re
|
||||
import string
|
||||
|
||||
from .discoverygo import DiscoveryGoBaseIE
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
update_url_query,
|
||||
try_get,
|
||||
)
|
||||
from ..compat import compat_HTTPError
|
||||
|
||||
|
||||
class DiscoveryIE(DiscoveryGoBaseIE):
|
||||
_VALID_URL = r'''(?x)https?://(?:www\.)?(?:
|
||||
_VALID_URL = r'''(?x)https?://(?:www\.)?(?P<site>
|
||||
discovery|
|
||||
investigationdiscovery|
|
||||
discoverylife|
|
||||
@@ -44,7 +45,7 @@ class DiscoveryIE(DiscoveryGoBaseIE):
|
||||
_GEO_BYPASS = False
|
||||
|
||||
def _real_extract(self, url):
|
||||
path, display_id = re.match(self._VALID_URL, url).groups()
|
||||
site, path, display_id = re.match(self._VALID_URL, url).groups()
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
react_data = self._parse_json(self._search_regex(
|
||||
@@ -55,14 +56,13 @@ class DiscoveryIE(DiscoveryGoBaseIE):
|
||||
video_id = video['id']
|
||||
|
||||
access_token = self._download_json(
|
||||
'https://www.discovery.com/anonymous', display_id, query={
|
||||
'authLink': update_url_query(
|
||||
'https://login.discovery.com/v1/oauth2/authorize', {
|
||||
'client_id': react_data['application']['apiClientId'],
|
||||
'redirect_uri': 'https://fusion.ddmcdn.com/app/mercury-sdk/180/redirectHandler.html',
|
||||
'response_type': 'anonymous',
|
||||
'state': 'nonce,' + ''.join([random.choice(string.ascii_letters) for _ in range(32)]),
|
||||
})
|
||||
'https://www.%s.com/anonymous' % site, display_id, query={
|
||||
'authRel': 'authorization',
|
||||
'client_id': try_get(
|
||||
react_data, lambda x: x['application']['apiClientId'],
|
||||
compat_str) or '3020a40c2356a645b4b4',
|
||||
'nonce': ''.join([random.choice(string.ascii_letters) for _ in range(32)]),
|
||||
'redirectUri': 'https://fusion.ddmcdn.com/app/mercury-sdk/180/redirectHandler.html?https://www.%s.com' % site,
|
||||
})['access_token']
|
||||
|
||||
try:
|
||||
|
||||
@@ -12,25 +12,28 @@ from ..compat import (
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
remove_end,
|
||||
try_get,
|
||||
unified_strdate,
|
||||
unified_timestamp,
|
||||
update_url_query,
|
||||
USER_AGENTS,
|
||||
)
|
||||
|
||||
|
||||
class DPlayIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?P<domain>www\.dplay\.(?:dk|se|no))/[^/]+/(?P<id>[^/?#]+)'
|
||||
_VALID_URL = r'https?://(?P<domain>www\.(?P<host>dplay\.(?P<country>dk|se|no)))/(?:video(?:er|s)/)?(?P<id>[^/]+/[^/?#]+)'
|
||||
|
||||
_TESTS = [{
|
||||
# non geo restricted, via secure api, unsigned download hls URL
|
||||
'url': 'http://www.dplay.se/nugammalt-77-handelser-som-format-sverige/season-1-svensken-lar-sig-njuta-av-livet/',
|
||||
'info_dict': {
|
||||
'id': '3172',
|
||||
'display_id': 'season-1-svensken-lar-sig-njuta-av-livet',
|
||||
'display_id': 'nugammalt-77-handelser-som-format-sverige/season-1-svensken-lar-sig-njuta-av-livet',
|
||||
'ext': 'mp4',
|
||||
'title': 'Svensken lär sig njuta av livet',
|
||||
'description': 'md5:d3819c9bccffd0fe458ca42451dd50d8',
|
||||
@@ -48,7 +51,7 @@ class DPlayIE(InfoExtractor):
|
||||
'url': 'http://www.dplay.dk/mig-og-min-mor/season-6-episode-12/',
|
||||
'info_dict': {
|
||||
'id': '70816',
|
||||
'display_id': 'season-6-episode-12',
|
||||
'display_id': 'mig-og-min-mor/season-6-episode-12',
|
||||
'ext': 'mp4',
|
||||
'title': 'Episode 12',
|
||||
'description': 'md5:9c86e51a93f8a4401fc9641ef9894c90',
|
||||
@@ -65,6 +68,33 @@ class DPlayIE(InfoExtractor):
|
||||
# geo restricted, via direct unsigned hls URL
|
||||
'url': 'http://www.dplay.no/pga-tour/season-1-hoydepunkter-18-21-februar/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# disco-api
|
||||
'url': 'https://www.dplay.no/videoer/i-kongens-klr/sesong-1-episode-7',
|
||||
'info_dict': {
|
||||
'id': '40206',
|
||||
'display_id': 'i-kongens-klr/sesong-1-episode-7',
|
||||
'ext': 'mp4',
|
||||
'title': 'Episode 7',
|
||||
'description': 'md5:e3e1411b2b9aebeea36a6ec5d50c60cf',
|
||||
'duration': 2611.16,
|
||||
'timestamp': 1516726800,
|
||||
'upload_date': '20180123',
|
||||
'series': 'I kongens klær',
|
||||
'season_number': 1,
|
||||
'episode_number': 7,
|
||||
},
|
||||
'params': {
|
||||
'format': 'bestvideo',
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
|
||||
'url': 'https://www.dplay.dk/videoer/singleliv/season-5-episode-3',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.dplay.se/videos/sofias-anglar/sofias-anglar-1001',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -72,10 +102,81 @@ class DPlayIE(InfoExtractor):
|
||||
display_id = mobj.group('id')
|
||||
domain = mobj.group('domain')
|
||||
|
||||
self._initialize_geo_bypass([mobj.group('country').upper()])
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
video_id = self._search_regex(
|
||||
r'data-video-id=["\'](\d+)', webpage, 'video id')
|
||||
r'data-video-id=["\'](\d+)', webpage, 'video id', default=None)
|
||||
|
||||
if not video_id:
|
||||
host = mobj.group('host')
|
||||
disco_base = 'https://disco-api.%s' % host
|
||||
self._download_json(
|
||||
'%s/token' % disco_base, display_id, 'Downloading token',
|
||||
query={
|
||||
'realm': host.replace('.', ''),
|
||||
})
|
||||
video = self._download_json(
|
||||
'%s/content/videos/%s' % (disco_base, display_id), display_id,
|
||||
headers={
|
||||
'Referer': url,
|
||||
'x-disco-client': 'WEB:UNKNOWN:dplay-client:0.0.1',
|
||||
}, query={
|
||||
'include': 'show'
|
||||
})
|
||||
video_id = video['data']['id']
|
||||
info = video['data']['attributes']
|
||||
title = info['name']
|
||||
formats = []
|
||||
for format_id, format_dict in self._download_json(
|
||||
'%s/playback/videoPlaybackInfo/%s' % (disco_base, video_id),
|
||||
display_id)['data']['attributes']['streaming'].items():
|
||||
if not isinstance(format_dict, dict):
|
||||
continue
|
||||
format_url = format_dict.get('url')
|
||||
if not format_url:
|
||||
continue
|
||||
ext = determine_ext(format_url)
|
||||
if format_id == 'dash' or ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
format_url, display_id, mpd_id='dash', fatal=False))
|
||||
elif format_id == 'hls' or ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, display_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls',
|
||||
fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'format_id': format_id,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
series = None
|
||||
try:
|
||||
included = video.get('included')
|
||||
if isinstance(included, list):
|
||||
show = next(e for e in included if e.get('type') == 'show')
|
||||
series = try_get(
|
||||
show, lambda x: x['attributes']['name'], compat_str)
|
||||
except StopIteration:
|
||||
pass
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': info.get('description'),
|
||||
'duration': float_or_none(
|
||||
info.get('videoDuration'), scale=1000),
|
||||
'timestamp': unified_timestamp(info.get('publishStart')),
|
||||
'series': series,
|
||||
'season_number': int_or_none(info.get('seasonNumber')),
|
||||
'episode_number': int_or_none(info.get('episodeNumber')),
|
||||
'age_limit': int_or_none(info.get('minimum_age')),
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
info = self._download_json(
|
||||
'http://%s/api/v2/ajax/videos?video_id=%s' % (domain, video_id),
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_b64decode
|
||||
from ..utils import (
|
||||
qualities,
|
||||
sanitized_Request,
|
||||
@@ -42,7 +42,7 @@ class DumpertIE(InfoExtractor):
|
||||
r'data-files="([^"]+)"', webpage, 'data files')
|
||||
|
||||
files = self._parse_json(
|
||||
base64.b64decode(files_base64.encode('utf-8')).decode('utf-8'),
|
||||
compat_b64decode(files_base64).decode('utf-8'),
|
||||
video_id)
|
||||
|
||||
quality = qualities(['flv', 'mobile', 'tablet', '720p'])
|
||||
|
||||
@@ -32,7 +32,7 @@ class DVTVIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://video.aktualne.cz/dvtv/dvtv-16-12-2014-utok-talibanu-boj-o-kliniku-uprchlici/r~973eb3bc854e11e498be002590604f2e/',
|
||||
'info_dict': {
|
||||
'title': 'DVTV 16. 12. 2014: útok Talibanu, boj o kliniku, uprchlíci',
|
||||
'title': r're:^DVTV 16\. 12\. 2014: útok Talibanu, boj o kliniku, uprchlíci',
|
||||
'id': '973eb3bc854e11e498be002590604f2e',
|
||||
},
|
||||
'playlist': [{
|
||||
@@ -91,10 +91,24 @@ class DVTVIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://video.aktualne.cz/v-cechach-poprve-zazni-zelenkova-zrestaurovana-mse/r~45b4b00483ec11e4883b002590604f2e/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://video.aktualne.cz/dvtv/babis-a-zeman-nesou-vinu-za-to-ze-nemame-jasno-v-tom-kdo-bud/r~026afb54fad711e79704ac1f6b220ee8/',
|
||||
'md5': '87defe16681b1429c91f7a74809823c6',
|
||||
'info_dict': {
|
||||
'id': 'f5ae72f6fad611e794dbac1f6b220ee8',
|
||||
'ext': 'mp4',
|
||||
'title': 'Babiš a Zeman nesou vinu za to, že nemáme jasno v tom, kdo bude vládnout, říká Pekarová Adamová',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _parse_video_metadata(self, js, video_id):
|
||||
def _parse_video_metadata(self, js, video_id, live_js=None):
|
||||
data = self._parse_json(js, video_id, transform_source=js_to_json)
|
||||
if live_js:
|
||||
data.update(self._parse_json(
|
||||
live_js, video_id, transform_source=js_to_json))
|
||||
|
||||
title = unescapeHTML(data['title'])
|
||||
|
||||
@@ -142,13 +156,18 @@ class DVTVIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
# live content
|
||||
live_item = self._search_regex(
|
||||
r'(?s)embedData[0-9a-f]{32}\.asset\.liveStarter\s*=\s*(\{.+?\});',
|
||||
webpage, 'video', default=None)
|
||||
|
||||
# single video
|
||||
item = self._search_regex(
|
||||
r'(?s)embedData[0-9a-f]{32}\[["\']asset["\']\]\s*=\s*(\{.+?\});',
|
||||
webpage, 'video', default=None, fatal=False)
|
||||
webpage, 'video', default=None)
|
||||
|
||||
if item:
|
||||
return self._parse_video_metadata(item, video_id)
|
||||
return self._parse_video_metadata(item, video_id, live_item)
|
||||
|
||||
# playlist
|
||||
items = re.findall(
|
||||
|
||||
@@ -1,13 +1,13 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urlparse,
|
||||
compat_b64decode,
|
||||
compat_str,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
@@ -36,9 +36,9 @@ class EinthusanIE(InfoExtractor):
|
||||
|
||||
# reversed from jsoncrypto.prototype.decrypt() in einthusan-PGMovieWatcher.js
|
||||
def _decrypt(self, encrypted_data, video_id):
|
||||
return self._parse_json(base64.b64decode((
|
||||
return self._parse_json(compat_b64decode((
|
||||
encrypted_data[:10] + encrypted_data[-1] + encrypted_data[12:-1]
|
||||
).encode('ascii')).decode('utf-8'), video_id)
|
||||
)).decode('utf-8'), video_id)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
@@ -162,6 +162,7 @@ from .cbc import (
|
||||
CBCPlayerIE,
|
||||
CBCWatchVideoIE,
|
||||
CBCWatchIE,
|
||||
CBCOlympicsIE,
|
||||
)
|
||||
from .cbs import CBSIE
|
||||
from .cbslocal import CBSLocalIE
|
||||
@@ -373,8 +374,10 @@ from .franceculture import FranceCultureIE
|
||||
from .franceinter import FranceInterIE
|
||||
from .francetv import (
|
||||
FranceTVIE,
|
||||
FranceTVSiteIE,
|
||||
FranceTVEmbedIE,
|
||||
FranceTVInfoIE,
|
||||
FranceTVJeunesseIE,
|
||||
GenerationWhatIE,
|
||||
CultureboxIE,
|
||||
)
|
||||
@@ -382,7 +385,10 @@ from .freesound import FreesoundIE
|
||||
from .freespeech import FreespeechIE
|
||||
from .freshlive import FreshLiveIE
|
||||
from .funimation import FunimationIE
|
||||
from .funk import FunkIE
|
||||
from .funk import (
|
||||
FunkMixIE,
|
||||
FunkChannelIE,
|
||||
)
|
||||
from .funnyordie import FunnyOrDieIE
|
||||
from .fusion import FusionIE
|
||||
from .fxnetworks import FXNetworksIE
|
||||
@@ -426,6 +432,7 @@ from .hellporno import HellPornoIE
|
||||
from .helsinki import HelsinkiIE
|
||||
from .hentaistigma import HentaiStigmaIE
|
||||
from .hgtv import HGTVComShowIE
|
||||
from .hidive import HiDiveIE
|
||||
from .historicfilms import HistoricFilmsIE
|
||||
from .hitbox import HitboxIE, HitboxLiveIE
|
||||
from .hitrecord import HitRecordIE
|
||||
@@ -543,6 +550,7 @@ from .limelight import (
|
||||
LimelightChannelIE,
|
||||
LimelightChannelListIE,
|
||||
)
|
||||
from .line import LineTVIE
|
||||
from .litv import LiTVIE
|
||||
from .liveleak import (
|
||||
LiveLeakIE,
|
||||
@@ -563,7 +571,11 @@ from .lynda import (
|
||||
)
|
||||
from .m6 import M6IE
|
||||
from .macgamestore import MacGameStoreIE
|
||||
from .mailru import MailRuIE
|
||||
from .mailru import (
|
||||
MailRuIE,
|
||||
MailRuMusicIE,
|
||||
MailRuMusicSearchIE,
|
||||
)
|
||||
from .makerschannel import MakersChannelIE
|
||||
from .makertv import MakerTVIE
|
||||
from .mangomolo import (
|
||||
@@ -630,7 +642,10 @@ from .musicplayon import MusicPlayOnIE
|
||||
from .mwave import MwaveIE, MwaveMeetGreetIE
|
||||
from .myspace import MySpaceIE, MySpaceAlbumIE
|
||||
from .myspass import MySpassIE
|
||||
from .myvi import MyviIE
|
||||
from .myvi import (
|
||||
MyviIE,
|
||||
MyviEmbedIE,
|
||||
)
|
||||
from .myvidster import MyVidsterIE
|
||||
from .nationalgeographic import (
|
||||
NationalGeographicVideoIE,
|
||||
@@ -644,6 +659,7 @@ from .nbc import (
|
||||
NBCIE,
|
||||
NBCNewsIE,
|
||||
NBCOlympicsIE,
|
||||
NBCOlympicsStreamIE,
|
||||
NBCSportsIE,
|
||||
NBCSportsVPlayerIE,
|
||||
)
|
||||
@@ -860,6 +876,7 @@ from .rai import (
|
||||
RaiPlayPlaylistIE,
|
||||
RaiIE,
|
||||
)
|
||||
from .raywenderlich import RayWenderlichIE
|
||||
from .rbmaradio import RBMARadioIE
|
||||
from .rds import RDSIE
|
||||
from .redbulltv import RedBullTVIE
|
||||
@@ -881,7 +898,6 @@ from .revision3 import (
|
||||
Revision3IE,
|
||||
)
|
||||
from .rice import RICEIE
|
||||
from .ringtv import RingTVIE
|
||||
from .rmcdecouverte import RMCDecouverteIE
|
||||
from .ro220 import Ro220IE
|
||||
from .rockstargames import RockstarGamesIE
|
||||
@@ -901,6 +917,7 @@ from .rtp import RTPIE
|
||||
from .rts import RTSIE
|
||||
from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE, RTVELiveIE, RTVETelevisionIE
|
||||
from .rtvnh import RTVNHIE
|
||||
from .rtvs import RTVSIE
|
||||
from .rudo import RudoIE
|
||||
from .ruhd import RUHDIE
|
||||
from .ruleporn import RulePornIE
|
||||
@@ -933,6 +950,10 @@ from .servingsys import ServingSysIE
|
||||
from .servus import ServusIE
|
||||
from .sevenplus import SevenPlusIE
|
||||
from .sexu import SexuIE
|
||||
from .seznamzpravy import (
|
||||
SeznamZpravyIE,
|
||||
SeznamZpravyArticleIE,
|
||||
)
|
||||
from .shahid import (
|
||||
ShahidIE,
|
||||
ShahidShowIE,
|
||||
@@ -990,6 +1011,7 @@ from .stitcher import StitcherIE
|
||||
from .sport5 import Sport5IE
|
||||
from .sportbox import SportBoxEmbedIE
|
||||
from .sportdeutschland import SportDeutschlandIE
|
||||
from .springboardplatform import SpringboardPlatformIE
|
||||
from .sprout import SproutIE
|
||||
from .srgssr import (
|
||||
SRGSSRIE,
|
||||
@@ -1033,9 +1055,14 @@ from .telebruxelles import TeleBruxellesIE
|
||||
from .telecinco import TelecincoIE
|
||||
from .telegraaf import TelegraafIE
|
||||
from .telemb import TeleMBIE
|
||||
from .telequebec import TeleQuebecIE
|
||||
from .telequebec import (
|
||||
TeleQuebecIE,
|
||||
TeleQuebecEmissionIE,
|
||||
TeleQuebecLiveIE,
|
||||
)
|
||||
from .teletask import TeleTaskIE
|
||||
from .telewebion import TelewebionIE
|
||||
from .tennistv import TennisTVIE
|
||||
from .testurl import TestURLIE
|
||||
from .tf1 import TF1IE
|
||||
from .tfo import TFOIE
|
||||
@@ -1045,7 +1072,6 @@ from .theplatform import (
|
||||
ThePlatformFeedIE,
|
||||
)
|
||||
from .thescene import TheSceneIE
|
||||
from .thesixtyone import TheSixtyOneIE
|
||||
from .thestar import TheStarIE
|
||||
from .thesun import TheSunIE
|
||||
from .theweatherchannel import TheWeatherChannelIE
|
||||
@@ -1191,7 +1217,6 @@ from .vice import (
|
||||
ViceArticleIE,
|
||||
ViceShowIE,
|
||||
)
|
||||
from .viceland import VicelandIE
|
||||
from .vidbit import VidbitIE
|
||||
from .viddler import ViddlerIE
|
||||
from .videa import VideaIE
|
||||
@@ -1206,6 +1231,7 @@ from .videomore import (
|
||||
from .videopremium import VideoPremiumIE
|
||||
from .videopress import VideoPressIE
|
||||
from .vidio import VidioIE
|
||||
from .vidlii import VidLiiIE
|
||||
from .vidme import (
|
||||
VidmeIE,
|
||||
VidmeUserIE,
|
||||
@@ -1349,6 +1375,7 @@ from .yandexmusic import (
|
||||
YandexMusicPlaylistIE,
|
||||
)
|
||||
from .yandexdisk import YandexDiskIE
|
||||
from .yapfiles import YapFilesIE
|
||||
from .yesjapan import YesJapanIE
|
||||
from .yinyuetai import YinYueTaiIE
|
||||
from .ynet import YnetIE
|
||||
|
||||
@@ -33,7 +33,7 @@ class FranceInterIE(InfoExtractor):
|
||||
description = self._og_search_description(webpage)
|
||||
|
||||
upload_date_str = self._search_regex(
|
||||
r'class=["\']cover-emission-period["\'][^>]*>[^<]+\s+(\d{1,2}\s+[^\s]+\s+\d{4})<',
|
||||
r'class=["\']\s*cover-emission-period\s*["\'][^>]*>[^<]+\s+(\d{1,2}\s+[^\s]+\s+\d{4})<',
|
||||
webpage, 'upload date', fatal=False)
|
||||
if upload_date_str:
|
||||
upload_date_list = upload_date_str.split()
|
||||
|
||||
@@ -5,19 +5,89 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
determine_ext,
|
||||
try_get,
|
||||
)
|
||||
from .dailymotion import DailymotionIE
|
||||
|
||||
|
||||
class FranceTVBaseInfoExtractor(InfoExtractor):
|
||||
def _make_url_result(self, video_or_full_id, catalog=None):
|
||||
full_id = 'francetv:%s' % video_or_full_id
|
||||
if '@' not in video_or_full_id and catalog:
|
||||
full_id += '@%s' % catalog
|
||||
return self.url_result(
|
||||
full_id, ie=FranceTVIE.ie_key(),
|
||||
video_id=video_or_full_id.split('@')[0])
|
||||
|
||||
|
||||
class FranceTVIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:
|
||||
https?://
|
||||
sivideo\.webservices\.francetelevisions\.fr/tools/getInfosOeuvre/v2/\?
|
||||
.*?\bidDiffusion=[^&]+|
|
||||
(?:
|
||||
https?://videos\.francetv\.fr/video/|
|
||||
francetv:
|
||||
)
|
||||
(?P<id>[^@]+)(?:@(?P<catalog>.+))?
|
||||
)
|
||||
'''
|
||||
|
||||
_TESTS = [{
|
||||
# without catalog
|
||||
'url': 'https://sivideo.webservices.francetelevisions.fr/tools/getInfosOeuvre/v2/?idDiffusion=162311093&callback=_jsonp_loader_callback_request_0',
|
||||
'md5': 'c2248a8de38c4e65ea8fae7b5df2d84f',
|
||||
'info_dict': {
|
||||
'id': '162311093',
|
||||
'ext': 'mp4',
|
||||
'title': '13h15, le dimanche... - Les mystères de Jésus',
|
||||
'description': 'md5:75efe8d4c0a8205e5904498ffe1e1a42',
|
||||
'timestamp': 1502623500,
|
||||
'upload_date': '20170813',
|
||||
},
|
||||
}, {
|
||||
# with catalog
|
||||
'url': 'https://sivideo.webservices.francetelevisions.fr/tools/getInfosOeuvre/v2/?idDiffusion=NI_1004933&catalogue=Zouzous&callback=_jsonp_loader_callback_request_4',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://videos.francetv.fr/video/NI_657393@Regions',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'francetv:162311093',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'francetv:NI_1004933@Zouzous',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'francetv:NI_983319@Info-web',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'francetv:NI_983319',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'francetv:NI_657393@Regions',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# france-3 live
|
||||
'url': 'francetv:SIM_France3',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _extract_video(self, video_id, catalogue=None):
|
||||
# Videos are identified by idDiffusion so catalogue part is optional.
|
||||
# However when provided, some extra formats may be returned so we pass
|
||||
# it if available.
|
||||
info = self._download_json(
|
||||
'https://sivideo.webservices.francetelevisions.fr/tools/getInfosOeuvre/v2/',
|
||||
video_id, 'Downloading video JSON', query={
|
||||
@@ -27,7 +97,8 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
|
||||
|
||||
if info.get('status') == 'NOK':
|
||||
raise ExtractorError(
|
||||
'%s returned error: %s' % (self.IE_NAME, info['message']), expected=True)
|
||||
'%s returned error: %s' % (self.IE_NAME, info['message']),
|
||||
expected=True)
|
||||
allowed_countries = info['videos'][0].get('geoblocage')
|
||||
if allowed_countries:
|
||||
georestricted = True
|
||||
@@ -42,6 +113,21 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
|
||||
else:
|
||||
georestricted = False
|
||||
|
||||
def sign(manifest_url, manifest_id):
|
||||
for host in ('hdfauthftv-a.akamaihd.net', 'hdfauth.francetv.fr'):
|
||||
signed_url = self._download_webpage(
|
||||
'https://%s/esi/TA' % host, video_id,
|
||||
'Downloading signed %s manifest URL' % manifest_id,
|
||||
fatal=False, query={
|
||||
'url': manifest_url,
|
||||
})
|
||||
if (signed_url and isinstance(signed_url, compat_str) and
|
||||
re.search(r'^(?:https?:)?//', signed_url)):
|
||||
return signed_url
|
||||
return manifest_url
|
||||
|
||||
is_live = None
|
||||
|
||||
formats = []
|
||||
for video in info['videos']:
|
||||
if video['statut'] != 'ONLINE':
|
||||
@@ -49,6 +135,10 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
|
||||
video_url = video['url']
|
||||
if not video_url:
|
||||
continue
|
||||
if is_live is None:
|
||||
is_live = (try_get(
|
||||
video, lambda x: x['plages_ouverture'][0]['direct'],
|
||||
bool) is True) or '/live.francetv.fr/' in video_url
|
||||
format_id = video['format']
|
||||
ext = determine_ext(video_url)
|
||||
if ext == 'f4m':
|
||||
@@ -56,17 +146,14 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
|
||||
# See https://github.com/rg3/youtube-dl/issues/3963
|
||||
# m3u8 urls work fine
|
||||
continue
|
||||
f4m_url = self._download_webpage(
|
||||
'http://hdfauth.francetv.fr/esi/TA?url=%s' % video_url,
|
||||
video_id, 'Downloading f4m manifest token', fatal=False)
|
||||
if f4m_url:
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
f4m_url + '&hdcore=3.7.0&plugin=aasp-3.7.0.39.44',
|
||||
video_id, f4m_id=format_id, fatal=False))
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
sign(video_url, format_id) + '&hdcore=3.7.0&plugin=aasp-3.7.0.39.44',
|
||||
video_id, f4m_id=format_id, fatal=False))
|
||||
elif ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
video_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id=format_id, fatal=False))
|
||||
sign(video_url, format_id), video_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id=format_id,
|
||||
fatal=False))
|
||||
elif video_url.startswith('rtmp'):
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
@@ -97,33 +184,48 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'title': self._live_title(title) if is_live else title,
|
||||
'description': clean_html(info['synopsis']),
|
||||
'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', info['image']),
|
||||
'duration': int_or_none(info.get('real_duration')) or parse_duration(info['duree']),
|
||||
'timestamp': int_or_none(info['diffusion']['timestamp']),
|
||||
'is_live': is_live,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
catalog = mobj.group('catalog')
|
||||
|
||||
class FranceTVIE(FranceTVBaseInfoExtractor):
|
||||
if not video_id:
|
||||
qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
|
||||
video_id = qs.get('idDiffusion', [None])[0]
|
||||
catalog = qs.get('catalogue', [None])[0]
|
||||
if not video_id:
|
||||
raise ExtractorError('Invalid URL', expected=True)
|
||||
|
||||
return self._extract_video(video_id, catalog)
|
||||
|
||||
|
||||
class FranceTVSiteIE(FranceTVBaseInfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:www\.)?france\.tv|mobile\.france\.tv)/(?:[^/]+/)*(?P<id>[^/]+)\.html'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.france.tv/france-2/13h15-le-dimanche/140921-les-mysteres-de-jesus.html',
|
||||
'info_dict': {
|
||||
'id': '157550144',
|
||||
'id': '162311093',
|
||||
'ext': 'mp4',
|
||||
'title': '13h15, le dimanche... - Les mystères de Jésus',
|
||||
'description': 'md5:75efe8d4c0a8205e5904498ffe1e1a42',
|
||||
'timestamp': 1494156300,
|
||||
'upload_date': '20170507',
|
||||
'timestamp': 1502623500,
|
||||
'upload_date': '20170813',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 downloads
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': [FranceTVIE.ie_key()],
|
||||
}, {
|
||||
# france3
|
||||
'url': 'https://www.france.tv/france-3/des-chiffres-et-des-lettres/139063-emission-du-mardi-9-mai-2017.html',
|
||||
@@ -156,6 +258,10 @@ class FranceTVIE(FranceTVBaseInfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.france.tv/142749-rouge-sang.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# france-3 live
|
||||
'url': 'https://www.france.tv/france-3/direct.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -172,13 +278,14 @@ class FranceTVIE(FranceTVBaseInfoExtractor):
|
||||
video_id, catalogue = self._html_search_regex(
|
||||
r'(?:href=|player\.setVideo\(\s*)"http://videos?\.francetv\.fr/video/([^@]+@[^"]+)"',
|
||||
webpage, 'video ID').split('@')
|
||||
return self._extract_video(video_id, catalogue)
|
||||
|
||||
return self._make_url_result(video_id, catalogue)
|
||||
|
||||
|
||||
class FranceTVEmbedIE(FranceTVBaseInfoExtractor):
|
||||
_VALID_URL = r'https?://embed\.francetv\.fr/*\?.*?\bue=(?P<id>[^&]+)'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://embed.francetv.fr/?ue=7fd581a2ccf59d2fc5719c5c13cf6961',
|
||||
'info_dict': {
|
||||
'id': 'NI_983319',
|
||||
@@ -188,7 +295,11 @@ class FranceTVEmbedIE(FranceTVBaseInfoExtractor):
|
||||
'timestamp': 1493981780,
|
||||
'duration': 16,
|
||||
},
|
||||
}
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': [FranceTVIE.ie_key()],
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
@@ -197,12 +308,12 @@ class FranceTVEmbedIE(FranceTVBaseInfoExtractor):
|
||||
'http://api-embed.webservices.francetelevisions.fr/key/%s' % video_id,
|
||||
video_id)
|
||||
|
||||
return self._extract_video(video['video_id'], video.get('catalog'))
|
||||
return self._make_url_result(video['video_id'], video.get('catalog'))
|
||||
|
||||
|
||||
class FranceTVInfoIE(FranceTVBaseInfoExtractor):
|
||||
IE_NAME = 'francetvinfo.fr'
|
||||
_VALID_URL = r'https?://(?:www|mobile|france3-regions)\.francetvinfo\.fr/(?:[^/]+/)*(?P<title>[^/?#&.]+)'
|
||||
_VALID_URL = r'https?://(?:www|mobile|france3-regions)\.francetvinfo\.fr/(?:[^/]+/)*(?P<id>[^/?#&.]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html',
|
||||
@@ -217,51 +328,18 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor):
|
||||
},
|
||||
},
|
||||
'params': {
|
||||
# m3u8 downloads
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': [FranceTVIE.ie_key()],
|
||||
}, {
|
||||
'url': 'http://www.francetvinfo.fr/elections/europeennes/direct-europeennes-regardez-le-debat-entre-les-candidats-a-la-presidence-de-la-commission_600639.html',
|
||||
'info_dict': {
|
||||
'id': 'EV_20019',
|
||||
'ext': 'mp4',
|
||||
'title': 'Débat des candidats à la Commission européenne',
|
||||
'description': 'Débat des candidats à la Commission européenne',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'HLS (reqires ffmpeg)'
|
||||
},
|
||||
'skip': 'Ce direct est terminé et sera disponible en rattrapage dans quelques minutes.',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.francetvinfo.fr/economie/entreprises/les-entreprises-familiales-le-secret-de-la-reussite_933271.html',
|
||||
'md5': 'f485bda6e185e7d15dbc69b72bae993e',
|
||||
'info_dict': {
|
||||
'id': 'NI_173343',
|
||||
'ext': 'mp4',
|
||||
'title': 'Les entreprises familiales : le secret de la réussite',
|
||||
'thumbnail': r're:^https?://.*\.jpe?g$',
|
||||
'timestamp': 1433273139,
|
||||
'upload_date': '20150602',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 downloads
|
||||
'skip_download': True,
|
||||
},
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://france3-regions.francetvinfo.fr/bretagne/cotes-d-armor/thalassa-echappee-breizh-ce-venredi-dans-les-cotes-d-armor-954961.html',
|
||||
'md5': 'f485bda6e185e7d15dbc69b72bae993e',
|
||||
'info_dict': {
|
||||
'id': 'NI_657393',
|
||||
'ext': 'mp4',
|
||||
'title': 'Olivier Monthus, réalisateur de "Bretagne, le choix de l’Armor"',
|
||||
'description': 'md5:a3264114c9d29aeca11ced113c37b16c',
|
||||
'thumbnail': r're:^https?://.*\.jpe?g$',
|
||||
'timestamp': 1458300695,
|
||||
'upload_date': '20160318',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Dailymotion embed
|
||||
'url': 'http://www.francetvinfo.fr/politique/notre-dame-des-landes/video-sur-france-inter-cecile-duflot-denonce-le-regard-meprisant-de-patrick-cohen_1520091.html',
|
||||
@@ -283,9 +361,9 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
page_title = mobj.group('title')
|
||||
webpage = self._download_webpage(url, page_title)
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
dailymotion_urls = DailymotionIE._extract_urls(webpage)
|
||||
if dailymotion_urls:
|
||||
@@ -297,12 +375,13 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor):
|
||||
(r'id-video=([^@]+@[^"]+)',
|
||||
r'<a[^>]+href="(?:https?:)?//videos\.francetv\.fr/video/([^@]+@[^"]+)"'),
|
||||
webpage, 'video id').split('@')
|
||||
return self._extract_video(video_id, catalogue)
|
||||
|
||||
return self._make_url_result(video_id, catalogue)
|
||||
|
||||
|
||||
class GenerationWhatIE(InfoExtractor):
|
||||
IE_NAME = 'france2.fr:generation-what'
|
||||
_VALID_URL = r'https?://generation-what\.francetv\.fr/[^/]+/video/(?P<id>[^/?#]+)'
|
||||
_VALID_URL = r'https?://generation-what\.francetv\.fr/[^/]+/video/(?P<id>[^/?#&]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://generation-what.francetv.fr/portrait/video/present-arms',
|
||||
@@ -314,6 +393,10 @@ class GenerationWhatIE(InfoExtractor):
|
||||
'uploader_id': 'UCHH9p1eetWCgt4kXBYCb3_w',
|
||||
'upload_date': '20160411',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
}, {
|
||||
'url': 'http://generation-what.francetv.fr/europe/video/present-arms',
|
||||
'only_matching': True,
|
||||
@@ -321,42 +404,87 @@ class GenerationWhatIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
youtube_id = self._search_regex(
|
||||
r"window\.videoURL\s*=\s*'([0-9A-Za-z_-]{11})';",
|
||||
webpage, 'youtube id')
|
||||
return self.url_result(youtube_id, 'Youtube', youtube_id)
|
||||
|
||||
return self.url_result(youtube_id, ie='Youtube', video_id=youtube_id)
|
||||
|
||||
|
||||
class CultureboxIE(FranceTVBaseInfoExtractor):
|
||||
IE_NAME = 'culturebox.francetvinfo.fr'
|
||||
_VALID_URL = r'https?://(?:m\.)?culturebox\.francetvinfo\.fr/(?P<name>.*?)(\?|$)'
|
||||
_VALID_URL = r'https?://(?:m\.)?culturebox\.francetvinfo\.fr/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://culturebox.francetvinfo.fr/live/musique/musique-classique/le-livre-vermeil-de-montserrat-a-la-cathedrale-delne-214511',
|
||||
'md5': '9b88dc156781c4dbebd4c3e066e0b1d6',
|
||||
_TESTS = [{
|
||||
'url': 'https://culturebox.francetvinfo.fr/opera-classique/musique-classique/c-est-baroque/concerts/cantates-bwv-4-106-et-131-de-bach-par-raphael-pichon-57-268689',
|
||||
'info_dict': {
|
||||
'id': 'EV_50111',
|
||||
'ext': 'flv',
|
||||
'title': "Le Livre Vermeil de Montserrat à la Cathédrale d'Elne",
|
||||
'description': 'md5:f8a4ad202e8fe533e2c493cc12e739d9',
|
||||
'upload_date': '20150320',
|
||||
'timestamp': 1426892400,
|
||||
'duration': 2760.9,
|
||||
'id': 'EV_134885',
|
||||
'ext': 'mp4',
|
||||
'title': 'Cantates BWV 4, 106 et 131 de Bach par Raphaël Pichon 5/7',
|
||||
'description': 'md5:19c44af004b88219f4daa50fa9a351d4',
|
||||
'upload_date': '20180206',
|
||||
'timestamp': 1517945220,
|
||||
'duration': 5981,
|
||||
},
|
||||
}
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': [FranceTVIE.ie_key()],
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
name = mobj.group('name')
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, name)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
if ">Ce live n'est plus disponible en replay<" in webpage:
|
||||
raise ExtractorError('Video %s is not available' % name, expected=True)
|
||||
raise ExtractorError(
|
||||
'Video %s is not available' % display_id, expected=True)
|
||||
|
||||
video_id, catalogue = self._search_regex(
|
||||
r'["\'>]https?://videos\.francetv\.fr/video/([^@]+@.+?)["\'<]',
|
||||
webpage, 'video id').split('@')
|
||||
|
||||
return self._extract_video(video_id, catalogue)
|
||||
return self._make_url_result(video_id, catalogue)
|
||||
|
||||
|
||||
class FranceTVJeunesseIE(FranceTVBaseInfoExtractor):
|
||||
_VALID_URL = r'(?P<url>https?://(?:www\.)?(?:zouzous|ludo)\.fr/heros/(?P<id>[^/?#&]+))'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.zouzous.fr/heros/simon',
|
||||
'info_dict': {
|
||||
'id': 'simon',
|
||||
},
|
||||
'playlist_count': 9,
|
||||
}, {
|
||||
'url': 'https://www.ludo.fr/heros/ninjago',
|
||||
'info_dict': {
|
||||
'id': 'ninjago',
|
||||
},
|
||||
'playlist_count': 10,
|
||||
}, {
|
||||
'url': 'https://www.zouzous.fr/heros/simon?abc',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
playlist_id = mobj.group('id')
|
||||
|
||||
playlist = self._download_json(
|
||||
'%s/%s' % (mobj.group('url'), 'playlist'), playlist_id)
|
||||
|
||||
if not playlist.get('count'):
|
||||
raise ExtractorError(
|
||||
'%s is not available' % playlist_id, expected=True)
|
||||
|
||||
entries = []
|
||||
for item in playlist['items']:
|
||||
identity = item.get('identity')
|
||||
if identity and isinstance(identity, compat_str):
|
||||
entries.append(self._make_url_result(identity))
|
||||
|
||||
return self.playlist_result(entries, playlist_id)
|
||||
|
||||
@@ -1,43 +1,102 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .nexx import NexxIE
|
||||
from ..utils import extract_attributes
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class FunkIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?funk\.net/(?:mix|channel)/(?:[^/]+/)*(?P<id>[^?/#]+)'
|
||||
class FunkBaseIE(InfoExtractor):
|
||||
def _make_url_result(self, video):
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': 'nexx:741:%s' % video['sourceId'],
|
||||
'ie_key': NexxIE.ie_key(),
|
||||
'id': video['sourceId'],
|
||||
'title': video.get('title'),
|
||||
'description': video.get('description'),
|
||||
'duration': int_or_none(video.get('duration')),
|
||||
'season_number': int_or_none(video.get('seasonNr')),
|
||||
'episode_number': int_or_none(video.get('episodeNr')),
|
||||
}
|
||||
|
||||
|
||||
class FunkMixIE(FunkBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?funk\.net/mix/(?P<id>[^/]+)/(?P<alias>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.funk.net/mix/59d65d935f8b160001828b5b/0/59d517e741dca10001252574/',
|
||||
'md5': '4d40974481fa3475f8bccfd20c5361f8',
|
||||
'url': 'https://www.funk.net/mix/59d65d935f8b160001828b5b/die-realste-kifferdoku-aller-zeiten',
|
||||
'md5': '8edf617c2f2b7c9847dfda313f199009',
|
||||
'info_dict': {
|
||||
'id': '716599',
|
||||
'id': '123748',
|
||||
'ext': 'mp4',
|
||||
'title': 'Neue Rechte Welle',
|
||||
'description': 'md5:a30a53f740ffb6bfd535314c2cc5fb69',
|
||||
'timestamp': 1501337639,
|
||||
'upload_date': '20170729',
|
||||
'title': '"Die realste Kifferdoku aller Zeiten"',
|
||||
'description': 'md5:c97160f5bafa8d47ec8e2e461012aa9d',
|
||||
'timestamp': 1490274721,
|
||||
'upload_date': '20170323',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
mix_id = mobj.group('id')
|
||||
alias = mobj.group('alias')
|
||||
|
||||
lists = self._download_json(
|
||||
'https://www.funk.net/api/v3.1/curation/curatedLists/',
|
||||
mix_id, headers={
|
||||
'authorization': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnROYW1lIjoiY3VyYXRpb24tdG9vbC12Mi4wIiwic2NvcGUiOiJzdGF0aWMtY29udGVudC1hcGksY3VyYXRpb24tc2VydmljZSxzZWFyY2gtYXBpIn0.SGCC1IXHLtZYoo8PvRKlU2gXH1su8YSu47sB3S4iXBI',
|
||||
'Referer': url,
|
||||
}, query={
|
||||
'size': 100,
|
||||
})['result']['lists']
|
||||
|
||||
metas = next(
|
||||
l for l in lists
|
||||
if mix_id in (l.get('entityId'), l.get('alias')))['videoMetas']
|
||||
video = next(
|
||||
meta['videoDataDelegate']
|
||||
for meta in metas if meta.get('alias') == alias)
|
||||
|
||||
return self._make_url_result(video)
|
||||
|
||||
|
||||
class FunkChannelIE(FunkBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?funk\.net/channel/(?P<id>[^/]+)/(?P<alias>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.funk.net/channel/ba/die-lustigsten-instrumente-aus-dem-internet-teil-2',
|
||||
'info_dict': {
|
||||
'id': '1155821',
|
||||
'ext': 'mp4',
|
||||
'title': 'Die LUSTIGSTEN INSTRUMENTE aus dem Internet - Teil 2',
|
||||
'description': 'md5:a691d0413ef4835588c5b03ded670c1f',
|
||||
'timestamp': 1514507395,
|
||||
'upload_date': '20171229',
|
||||
},
|
||||
'params': {
|
||||
'format': 'bestvideo',
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.funk.net/channel/59d5149841dca100012511e3/0/59d52049999264000182e79d/',
|
||||
'url': 'https://www.funk.net/channel/59d5149841dca100012511e3/mein-erster-job-lovemilla-folge-1/lovemilla/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
channel_id = mobj.group('id')
|
||||
alias = mobj.group('alias')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
results = self._download_json(
|
||||
'https://www.funk.net/api/v3.0/content/videos/filter', channel_id,
|
||||
headers={
|
||||
'authorization': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnROYW1lIjoiY3VyYXRpb24tdG9vbCIsInNjb3BlIjoic3RhdGljLWNvbnRlbnQtYXBpLGN1cmF0aW9uLWFwaSxzZWFyY2gtYXBpIn0.q4Y2xZG8PFHai24-4Pjx2gym9RmJejtmK6lMXP5wAgc',
|
||||
'Referer': url,
|
||||
}, query={
|
||||
'channelId': channel_id,
|
||||
'size': 100,
|
||||
})['result']
|
||||
|
||||
domain_id = NexxIE._extract_domain_id(webpage) or '741'
|
||||
nexx_id = extract_attributes(self._search_regex(
|
||||
r'(<div[^>]id=["\']mediaplayer-funk[^>]+>)',
|
||||
webpage, 'media player'))['data-id']
|
||||
video = next(r for r in results if r.get('alias') == alias)
|
||||
|
||||
return self.url_result(
|
||||
'nexx:%s:%s' % (domain_id, nexx_id), ie=NexxIE.ie_key(),
|
||||
video_id=nexx_id)
|
||||
return self._make_url_result(video)
|
||||
|
||||
@@ -5,9 +5,9 @@ from .ooyala import OoyalaIE
|
||||
|
||||
|
||||
class FusionIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?fusion\.net/video/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?fusion\.(?:net|tv)/video/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://fusion.net/video/201781/u-s-and-panamanian-forces-work-together-to-stop-a-vessel-smuggling-drugs/',
|
||||
'url': 'http://fusion.tv/video/201781/u-s-and-panamanian-forces-work-together-to-stop-a-vessel-smuggling-drugs/',
|
||||
'info_dict': {
|
||||
'id': 'ZpcWNoMTE6x6uVIIWYpHh0qQDjxBuq5P',
|
||||
'ext': 'mp4',
|
||||
@@ -20,7 +20,7 @@ class FusionIE(InfoExtractor):
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
}, {
|
||||
'url': 'http://fusion.net/video/201781',
|
||||
'url': 'http://fusion.tv/video/201781',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
|
||||
@@ -23,6 +23,11 @@ class GameInformerIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
brightcove_id = self._search_regex(r"getVideo\('[^']+video_id=(\d+)", webpage, 'brightcove id')
|
||||
return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id)
|
||||
webpage = self._download_webpage(
|
||||
url, display_id, headers=self.geo_verification_headers())
|
||||
brightcove_id = self._search_regex(
|
||||
[r'<[^>]+\bid=["\']bc_(\d+)', r"getVideo\('[^']+video_id=(\d+)"],
|
||||
webpage, 'brightcove id')
|
||||
return self.url_result(
|
||||
self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew',
|
||||
brightcove_id)
|
||||
|
||||
@@ -101,6 +101,10 @@ from .vzaar import VzaarIE
|
||||
from .channel9 import Channel9IE
|
||||
from .vshare import VShareIE
|
||||
from .mediasite import MediasiteIE
|
||||
from .springboardplatform import SpringboardPlatformIE
|
||||
from .yapfiles import YapFilesIE
|
||||
from .vice import ViceIE
|
||||
from .xfileshare import XFileShareIE
|
||||
|
||||
|
||||
class GenericIE(InfoExtractor):
|
||||
@@ -1938,6 +1942,49 @@ class GenericIE(InfoExtractor):
|
||||
'timestamp': 1474354800,
|
||||
'upload_date': '20160920',
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://www.kidzworld.com/article/30935-trolls-the-beat-goes-on-interview-skylar-astin-and-amanda-leighton',
|
||||
'info_dict': {
|
||||
'id': '1731611',
|
||||
'ext': 'mp4',
|
||||
'title': 'Official Trailer | TROLLS: THE BEAT GOES ON!',
|
||||
'description': 'md5:eb5f23826a027ba95277d105f248b825',
|
||||
'timestamp': 1516100691,
|
||||
'upload_date': '20180116',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': [SpringboardPlatformIE.ie_key()],
|
||||
},
|
||||
{
|
||||
'url': 'https://www.youtube.com/shared?ci=1nEzmT-M4fU',
|
||||
'info_dict': {
|
||||
'id': 'uPDB5I9wfp8',
|
||||
'ext': 'webm',
|
||||
'title': 'Pocoyo: 90 minutos de episódios completos Português para crianças - PARTE 3',
|
||||
'description': 'md5:d9e4d9346a2dfff4c7dc4c8cec0f546d',
|
||||
'upload_date': '20160219',
|
||||
'uploader': 'Pocoyo - Português (BR)',
|
||||
'uploader_id': 'PocoyoBrazil',
|
||||
},
|
||||
'add_ie': [YoutubeIE.ie_key()],
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'https://www.yapfiles.ru/show/1872528/690b05d3054d2dbe1e69523aa21bb3b1.mp4.html',
|
||||
'info_dict': {
|
||||
'id': 'vMDE4NzI1Mjgt690b',
|
||||
'ext': 'mp4',
|
||||
'title': 'Котята',
|
||||
},
|
||||
'add_ie': [YapFilesIE.ie_key()],
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
# {
|
||||
# # TODO: find another test
|
||||
@@ -2185,7 +2232,11 @@ class GenericIE(InfoExtractor):
|
||||
self._sort_formats(smil['formats'])
|
||||
return smil
|
||||
elif doc.tag == '{http://xspf.org/ns/0/}playlist':
|
||||
return self.playlist_result(self._parse_xspf(doc, video_id), video_id)
|
||||
return self.playlist_result(
|
||||
self._parse_xspf(
|
||||
doc, video_id, xspf_url=url,
|
||||
xspf_base_url=compat_str(full_response.geturl())),
|
||||
video_id)
|
||||
elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
|
||||
info_dict['formats'] = self._parse_mpd_formats(
|
||||
doc,
|
||||
@@ -2264,7 +2315,10 @@ class GenericIE(InfoExtractor):
|
||||
# Look for Brightcove New Studio embeds
|
||||
bc_urls = BrightcoveNewIE._extract_urls(self, webpage)
|
||||
if bc_urls:
|
||||
return self.playlist_from_matches(bc_urls, video_id, video_title, ie='BrightcoveNew')
|
||||
return self.playlist_from_matches(
|
||||
bc_urls, video_id, video_title,
|
||||
getter=lambda x: smuggle_url(x, {'referrer': url}),
|
||||
ie='BrightcoveNew')
|
||||
|
||||
# Look for Nexx embeds
|
||||
nexx_urls = NexxIE._extract_urls(webpage)
|
||||
@@ -2906,6 +2960,27 @@ class GenericIE(InfoExtractor):
|
||||
for mediasite_url in mediasite_urls]
|
||||
return self.playlist_result(entries, video_id, video_title)
|
||||
|
||||
springboardplatform_urls = SpringboardPlatformIE._extract_urls(webpage)
|
||||
if springboardplatform_urls:
|
||||
return self.playlist_from_matches(
|
||||
springboardplatform_urls, video_id, video_title,
|
||||
ie=SpringboardPlatformIE.ie_key())
|
||||
|
||||
yapfiles_urls = YapFilesIE._extract_urls(webpage)
|
||||
if yapfiles_urls:
|
||||
return self.playlist_from_matches(
|
||||
yapfiles_urls, video_id, video_title, ie=YapFilesIE.ie_key())
|
||||
|
||||
vice_urls = ViceIE._extract_urls(webpage)
|
||||
if vice_urls:
|
||||
return self.playlist_from_matches(
|
||||
vice_urls, video_id, video_title, ie=ViceIE.ie_key())
|
||||
|
||||
xfileshare_urls = XFileShareIE._extract_urls(webpage)
|
||||
if xfileshare_urls:
|
||||
return self.playlist_from_matches(
|
||||
xfileshare_urls, video_id, video_title, ie=XFileShareIE.ie_key())
|
||||
|
||||
def merge_dicts(dict1, dict2):
|
||||
merged = {}
|
||||
for k, v in dict1.items():
|
||||
|
||||
@@ -2,11 +2,14 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .kaltura import KalturaIE
|
||||
from .youtube import YoutubeIE
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
NO_DEFAULT,
|
||||
parse_iso8601,
|
||||
smuggle_url,
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
@@ -14,18 +17,19 @@ from ..utils import (
|
||||
class HeiseIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?heise\.de/(?:[^/]+/)+[^/]+-(?P<id>[0-9]+)\.html'
|
||||
_TESTS = [{
|
||||
# kaltura embed
|
||||
'url': 'http://www.heise.de/video/artikel/Podcast-c-t-uplink-3-3-Owncloud-Tastaturen-Peilsender-Smartphone-2404147.html',
|
||||
'md5': 'ffed432483e922e88545ad9f2f15d30e',
|
||||
'info_dict': {
|
||||
'id': '2404147',
|
||||
'id': '1_kkrq94sm',
|
||||
'ext': 'mp4',
|
||||
'title': "Podcast: c't uplink 3.3 – Owncloud / Tastaturen / Peilsender Smartphone",
|
||||
'format_id': 'mp4_720p',
|
||||
'timestamp': 1411812600,
|
||||
'upload_date': '20140927',
|
||||
'timestamp': 1512734959,
|
||||
'upload_date': '20171208',
|
||||
'description': 'md5:c934cbfb326c669c2bcabcbe3d3fcd20',
|
||||
'thumbnail': r're:^https?://.*/gallery/$',
|
||||
}
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# YouTube embed
|
||||
'url': 'http://www.heise.de/newsticker/meldung/Netflix-In-20-Jahren-vom-Videoverleih-zum-TV-Revolutionaer-3814130.html',
|
||||
@@ -42,6 +46,32 @@ class HeiseIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.heise.de/video/artikel/nachgehakt-Wie-sichert-das-c-t-Tool-Restric-tor-Windows-10-ab-3700244.html',
|
||||
'info_dict': {
|
||||
'id': '1_ntrmio2s',
|
||||
'ext': 'mp4',
|
||||
'title': "nachgehakt: Wie sichert das c't-Tool Restric'tor Windows 10 ab?",
|
||||
'description': 'md5:47e8ffb6c46d85c92c310a512d6db271',
|
||||
'timestamp': 1512470717,
|
||||
'upload_date': '20171205',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.heise.de/ct/artikel/c-t-uplink-20-8-Staubsaugerroboter-Xiaomi-Vacuum-2-AR-Brille-Meta-2-und-Android-rooten-3959893.html',
|
||||
'info_dict': {
|
||||
'id': '1_59mk80sf',
|
||||
'ext': 'mp4',
|
||||
'title': "c't uplink 20.8: Staubsaugerroboter Xiaomi Vacuum 2, AR-Brille Meta 2 und Android rooten",
|
||||
'description': 'md5:f50fe044d3371ec73a8f79fcebd74afc',
|
||||
'timestamp': 1517567237,
|
||||
'upload_date': '20180202',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.heise.de/ct/artikel/c-t-uplink-3-3-Owncloud-Tastaturen-Peilsender-Smartphone-2403911.html',
|
||||
'only_matching': True,
|
||||
@@ -57,19 +87,45 @@ class HeiseIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_search_meta('fulltitle', webpage, default=None)
|
||||
if not title or title == "c't":
|
||||
title = self._search_regex(
|
||||
r'<div[^>]+class="videoplayerjw"[^>]+data-title="([^"]+)"',
|
||||
webpage, 'title')
|
||||
def extract_title(default=NO_DEFAULT):
|
||||
title = self._html_search_meta(
|
||||
('fulltitle', 'title'), webpage, default=None)
|
||||
if not title or title == "c't":
|
||||
title = self._search_regex(
|
||||
r'<div[^>]+class="videoplayerjw"[^>]+data-title="([^"]+)"',
|
||||
webpage, 'title', default=None)
|
||||
if not title:
|
||||
title = self._html_search_regex(
|
||||
r'<h1[^>]+\bclass=["\']article_page_title[^>]+>(.+?)<',
|
||||
webpage, 'title', default=default)
|
||||
return title
|
||||
|
||||
title = extract_title(default=None)
|
||||
description = self._og_search_description(
|
||||
webpage, default=None) or self._html_search_meta(
|
||||
'description', webpage)
|
||||
|
||||
kaltura_url = KalturaIE._extract_url(webpage)
|
||||
if kaltura_url:
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': smuggle_url(kaltura_url, {'source_url': url}),
|
||||
'ie_key': KalturaIE.ie_key(),
|
||||
'title': title,
|
||||
'description': description,
|
||||
}
|
||||
|
||||
yt_urls = YoutubeIE._extract_urls(webpage)
|
||||
if yt_urls:
|
||||
return self.playlist_from_matches(yt_urls, video_id, title, ie=YoutubeIE.ie_key())
|
||||
return self.playlist_from_matches(
|
||||
yt_urls, video_id, title, ie=YoutubeIE.ie_key())
|
||||
|
||||
title = extract_title()
|
||||
|
||||
container_id = self._search_regex(
|
||||
r'<div class="videoplayerjw"[^>]+data-container="([0-9]+)"',
|
||||
webpage, 'container ID')
|
||||
|
||||
sequenz_id = self._search_regex(
|
||||
r'<div class="videoplayerjw"[^>]+data-sequenz="([0-9]+)"',
|
||||
webpage, 'sequenz ID')
|
||||
@@ -95,10 +151,6 @@ class HeiseIE(InfoExtractor):
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
description = self._og_search_description(
|
||||
webpage, default=None) or self._html_search_meta(
|
||||
'description', webpage)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
|
||||
96
youtube_dl/extractor/hidive.py
Normal file
96
youtube_dl/extractor/hidive.py
Normal file
@@ -0,0 +1,96 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class HiDiveIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?hidive\.com/stream/(?P<title>[^/]+)/(?P<key>[^/?#&]+)'
|
||||
# Using X-Forwarded-For results in 403 HTTP error for HLS fragments,
|
||||
# so disabling geo bypass completely
|
||||
_GEO_BYPASS = False
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.hidive.com/stream/the-comic-artist-and-his-assistants/s01e001',
|
||||
'info_dict': {
|
||||
'id': 'the-comic-artist-and-his-assistants/s01e001',
|
||||
'ext': 'mp4',
|
||||
'title': 'the-comic-artist-and-his-assistants/s01e001',
|
||||
'series': 'the-comic-artist-and-his-assistants',
|
||||
'season_number': 1,
|
||||
'episode_number': 1,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
title, key = mobj.group('title', 'key')
|
||||
video_id = '%s/%s' % (title, key)
|
||||
|
||||
settings = self._download_json(
|
||||
'https://www.hidive.com/play/settings', video_id,
|
||||
data=urlencode_postdata({
|
||||
'Title': title,
|
||||
'Key': key,
|
||||
}))
|
||||
|
||||
restriction = settings.get('restrictionReason')
|
||||
if restriction == 'RegionRestricted':
|
||||
self.raise_geo_restricted()
|
||||
|
||||
if restriction and restriction != 'None':
|
||||
raise ExtractorError(
|
||||
'%s said: %s' % (self.IE_NAME, restriction), expected=True)
|
||||
|
||||
formats = []
|
||||
subtitles = {}
|
||||
for rendition_id, rendition in settings['renditions'].items():
|
||||
bitrates = rendition.get('bitrates')
|
||||
if not isinstance(bitrates, dict):
|
||||
continue
|
||||
m3u8_url = bitrates.get('hls')
|
||||
if not isinstance(m3u8_url, compat_str):
|
||||
continue
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='%s-hls' % rendition_id, fatal=False))
|
||||
cc_files = rendition.get('ccFiles')
|
||||
if not isinstance(cc_files, list):
|
||||
continue
|
||||
for cc_file in cc_files:
|
||||
if not isinstance(cc_file, list) or len(cc_file) < 3:
|
||||
continue
|
||||
cc_lang = cc_file[0]
|
||||
cc_url = cc_file[2]
|
||||
if not isinstance(cc_lang, compat_str) or not isinstance(
|
||||
cc_url, compat_str):
|
||||
continue
|
||||
subtitles.setdefault(cc_lang, []).append({
|
||||
'url': cc_url,
|
||||
})
|
||||
|
||||
season_number = int_or_none(self._search_regex(
|
||||
r's(\d+)', key, 'season number', default=None))
|
||||
episode_number = int_or_none(self._search_regex(
|
||||
r'e(\d+)', key, 'episode number', default=None))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_id,
|
||||
'subtitles': subtitles,
|
||||
'formats': formats,
|
||||
'series': title,
|
||||
'season_number': season_number,
|
||||
'episode_number': episode_number,
|
||||
}
|
||||
@@ -1,8 +1,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_b64decode
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
HEADRequest,
|
||||
@@ -48,7 +47,7 @@ class HotNewHipHopIE(InfoExtractor):
|
||||
if 'mediaKey' not in mkd:
|
||||
raise ExtractorError('Did not get a media key')
|
||||
|
||||
redirect_url = base64.b64decode(video_url_base64).decode('utf-8')
|
||||
redirect_url = compat_b64decode(video_url_base64).decode('utf-8')
|
||||
redirect_req = HEADRequest(redirect_url)
|
||||
req = self._request_webpage(
|
||||
redirect_req, video_id,
|
||||
|
||||
@@ -2,9 +2,8 @@
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
|
||||
from ..compat import (
|
||||
compat_b64decode,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urlparse,
|
||||
)
|
||||
@@ -61,7 +60,7 @@ class InfoQIE(BokeCCBaseIE):
|
||||
encoded_id = self._search_regex(
|
||||
r"jsclassref\s*=\s*'([^']*)'", webpage, 'encoded id', default=None)
|
||||
|
||||
real_id = compat_urllib_parse_unquote(base64.b64decode(encoded_id.encode('ascii')).decode('utf-8'))
|
||||
real_id = compat_urllib_parse_unquote(compat_b64decode(encoded_id).decode('utf-8'))
|
||||
playpath = 'mp4:' + real_id
|
||||
|
||||
return [{
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import itertools
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -238,70 +238,58 @@ class InstagramUserIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _entries(self, uploader_id):
|
||||
query = {
|
||||
'__a': 1,
|
||||
}
|
||||
|
||||
def get_count(kind):
|
||||
def get_count(suffix):
|
||||
return int_or_none(try_get(
|
||||
node, lambda x: x['%ss' % kind]['count']))
|
||||
node, lambda x: x['edge_media_' + suffix]['count']))
|
||||
|
||||
for page_num in itertools.count(1):
|
||||
page = self._download_json(
|
||||
'https://instagram.com/%s/' % uploader_id, uploader_id,
|
||||
note='Downloading page %d' % page_num,
|
||||
fatal=False, query=query)
|
||||
if not page:
|
||||
break
|
||||
|
||||
nodes = try_get(page, lambda x: x['user']['media']['nodes'], list)
|
||||
if not nodes:
|
||||
break
|
||||
|
||||
max_id = None
|
||||
|
||||
for node in nodes:
|
||||
node_id = node.get('id')
|
||||
if node_id:
|
||||
max_id = node_id
|
||||
|
||||
if node.get('__typename') != 'GraphVideo' and node.get('is_video') is not True:
|
||||
continue
|
||||
video_id = node.get('code')
|
||||
if not video_id:
|
||||
continue
|
||||
|
||||
info = self.url_result(
|
||||
'https://instagram.com/p/%s/' % video_id,
|
||||
ie=InstagramIE.ie_key(), video_id=video_id)
|
||||
|
||||
description = try_get(
|
||||
node, [lambda x: x['caption'], lambda x: x['text']['id']],
|
||||
compat_str)
|
||||
thumbnail = node.get('thumbnail_src') or node.get('display_src')
|
||||
timestamp = int_or_none(node.get('date'))
|
||||
|
||||
comment_count = get_count('comment')
|
||||
like_count = get_count('like')
|
||||
view_count = int_or_none(node.get('video_views'))
|
||||
|
||||
info.update({
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'timestamp': timestamp,
|
||||
'comment_count': comment_count,
|
||||
'like_count': like_count,
|
||||
'view_count': view_count,
|
||||
edges = self._download_json(
|
||||
'https://www.instagram.com/graphql/query/', uploader_id, query={
|
||||
'query_hash': '472f257a40c653c64c666ce877d59d2b',
|
||||
'variables': json.dumps({
|
||||
'id': uploader_id,
|
||||
'first': 999999999,
|
||||
})
|
||||
})['data']['user']['edge_owner_to_timeline_media']['edges']
|
||||
|
||||
yield info
|
||||
for edge in edges:
|
||||
node = edge['node']
|
||||
|
||||
if not max_id:
|
||||
break
|
||||
if node.get('__typename') != 'GraphVideo' and node.get('is_video') is not True:
|
||||
continue
|
||||
video_id = node.get('shortcode')
|
||||
if not video_id:
|
||||
continue
|
||||
|
||||
query['max_id'] = max_id
|
||||
info = self.url_result(
|
||||
'https://instagram.com/p/%s/' % video_id,
|
||||
ie=InstagramIE.ie_key(), video_id=video_id)
|
||||
|
||||
description = try_get(
|
||||
node, lambda x: x['edge_media_to_caption']['edges'][0]['node']['text'],
|
||||
compat_str)
|
||||
thumbnail = node.get('thumbnail_src') or node.get('display_src')
|
||||
timestamp = int_or_none(node.get('taken_at_timestamp'))
|
||||
|
||||
comment_count = get_count('to_comment')
|
||||
like_count = get_count('preview_like')
|
||||
view_count = int_or_none(node.get('video_view_count'))
|
||||
|
||||
info.update({
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'timestamp': timestamp,
|
||||
'comment_count': comment_count,
|
||||
'like_count': like_count,
|
||||
'view_count': view_count,
|
||||
})
|
||||
|
||||
yield info
|
||||
|
||||
def _real_extract(self, url):
|
||||
uploader_id = self._match_id(url)
|
||||
username = self._match_id(url)
|
||||
uploader_id = self._download_json(
|
||||
'https://instagram.com/%s/' % username, username, query={
|
||||
'__a': 1,
|
||||
})['graphql']['user']['id']
|
||||
return self.playlist_result(
|
||||
self._entries(uploader_id), uploader_id, uploader_id)
|
||||
self._entries(uploader_id), username, username)
|
||||
|
||||
@@ -49,7 +49,9 @@ class LA7IE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
player_data = self._parse_json(
|
||||
self._search_regex(r'videoLa7\(({[^;]+})\);', webpage, 'player data'),
|
||||
self._search_regex(
|
||||
[r'(?s)videoParams\s*=\s*({.+?});', r'videoLa7\(({[^;]+})\);'],
|
||||
webpage, 'player data'),
|
||||
video_id, transform_source=js_to_json)
|
||||
|
||||
return {
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
import datetime
|
||||
import hashlib
|
||||
import re
|
||||
@@ -9,6 +8,7 @@ import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_b64decode,
|
||||
compat_ord,
|
||||
compat_str,
|
||||
compat_urllib_parse_urlencode,
|
||||
@@ -329,7 +329,7 @@ class LetvCloudIE(InfoExtractor):
|
||||
raise ExtractorError('Letv cloud returned an unknwon error')
|
||||
|
||||
def b64decode(s):
|
||||
return base64.b64decode(s.encode('utf-8')).decode('utf-8')
|
||||
return compat_b64decode(s).decode('utf-8')
|
||||
|
||||
formats = []
|
||||
for media in play_json['data']['video_info']['media'].values():
|
||||
|
||||
90
youtube_dl/extractor/line.py
Normal file
90
youtube_dl/extractor/line.py
Normal file
@@ -0,0 +1,90 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import js_to_json
|
||||
|
||||
|
||||
class LineTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://tv\.line\.me/v/(?P<id>\d+)_[^/]+-(?P<segment>ep\d+-\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://tv.line.me/v/793123_goodbye-mrblack-ep1-1/list/69246',
|
||||
'info_dict': {
|
||||
'id': '793123_ep1-1',
|
||||
'ext': 'mp4',
|
||||
'title': 'Goodbye Mr.Black | EP.1-1',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 998.509,
|
||||
'view_count': int,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://tv.line.me/v/2587507_%E6%B4%BE%E9%81%A3%E5%A5%B3%E9%86%ABx-ep1-02/list/185245',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
series_id, segment = re.match(self._VALID_URL, url).groups()
|
||||
video_id = '%s_%s' % (series_id, segment)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
player_params = self._parse_json(self._search_regex(
|
||||
r'naver\.WebPlayer\(({[^}]+})\)', webpage, 'player parameters'),
|
||||
video_id, transform_source=js_to_json)
|
||||
|
||||
video_info = self._download_json(
|
||||
'https://global-nvapis.line.me/linetv/rmcnmv/vod_play_videoInfo.json',
|
||||
video_id, query={
|
||||
'videoId': player_params['videoId'],
|
||||
'key': player_params['key'],
|
||||
})
|
||||
|
||||
stream = video_info['streams'][0]
|
||||
extra_query = '?__gda__=' + stream['key']['value']
|
||||
formats = self._extract_m3u8_formats(
|
||||
stream['source'] + extra_query, video_id, ext='mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls')
|
||||
|
||||
for a_format in formats:
|
||||
a_format['url'] += extra_query
|
||||
|
||||
duration = None
|
||||
for video in video_info.get('videos', {}).get('list', []):
|
||||
encoding_option = video.get('encodingOption', {})
|
||||
abr = video['bitrate']['audio']
|
||||
vbr = video['bitrate']['video']
|
||||
tbr = abr + vbr
|
||||
formats.append({
|
||||
'url': video['source'],
|
||||
'format_id': 'http-%d' % int(tbr),
|
||||
'height': encoding_option.get('height'),
|
||||
'width': encoding_option.get('width'),
|
||||
'abr': abr,
|
||||
'vbr': vbr,
|
||||
'filesize': video.get('size'),
|
||||
})
|
||||
if video.get('duration') and duration is None:
|
||||
duration = video['duration']
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
if not formats[0].get('width'):
|
||||
formats[0]['vcodec'] = 'none'
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
|
||||
# like_count requires an additional API request https://tv.line.me/api/likeit/getCount
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'extra_param_to_segment_url': extra_query[1:],
|
||||
'duration': duration,
|
||||
'thumbnails': [{'url': thumbnail['source']}
|
||||
for thumbnail in video_info.get('thumbnails', {}).get('list', [])],
|
||||
'view_count': video_info.get('meta', {}).get('count'),
|
||||
}
|
||||
@@ -1,12 +1,17 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import itertools
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
remove_end,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
@@ -157,3 +162,153 @@ class MailRuIE(InfoExtractor):
|
||||
'view_count': view_count,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class MailRuMusicSearchBaseIE(InfoExtractor):
|
||||
def _search(self, query, url, audio_id, limit=100, offset=0):
|
||||
search = self._download_json(
|
||||
'https://my.mail.ru/cgi-bin/my/ajax', audio_id,
|
||||
'Downloading songs JSON page %d' % (offset // limit + 1),
|
||||
headers={
|
||||
'Referer': url,
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
}, query={
|
||||
'xemail': '',
|
||||
'ajax_call': '1',
|
||||
'func_name': 'music.search',
|
||||
'mna': '',
|
||||
'mnb': '',
|
||||
'arg_query': query,
|
||||
'arg_extended': '1',
|
||||
'arg_search_params': json.dumps({
|
||||
'music': {
|
||||
'limit': limit,
|
||||
'offset': offset,
|
||||
},
|
||||
}),
|
||||
'arg_limit': limit,
|
||||
'arg_offset': offset,
|
||||
})
|
||||
return next(e for e in search if isinstance(e, dict))
|
||||
|
||||
@staticmethod
|
||||
def _extract_track(t, fatal=True):
|
||||
audio_url = t['URL'] if fatal else t.get('URL')
|
||||
if not audio_url:
|
||||
return
|
||||
|
||||
audio_id = t['File'] if fatal else t.get('File')
|
||||
if not audio_id:
|
||||
return
|
||||
|
||||
thumbnail = t.get('AlbumCoverURL') or t.get('FiledAlbumCover')
|
||||
uploader = t.get('OwnerName') or t.get('OwnerName_Text_HTML')
|
||||
uploader_id = t.get('UploaderID')
|
||||
duration = int_or_none(t.get('DurationInSeconds')) or parse_duration(
|
||||
t.get('Duration') or t.get('DurationStr'))
|
||||
view_count = int_or_none(t.get('PlayCount') or t.get('PlayCount_hr'))
|
||||
|
||||
track = t.get('Name') or t.get('Name_Text_HTML')
|
||||
artist = t.get('Author') or t.get('Author_Text_HTML')
|
||||
|
||||
if track:
|
||||
title = '%s - %s' % (artist, track) if artist else track
|
||||
else:
|
||||
title = audio_id
|
||||
|
||||
return {
|
||||
'extractor_key': MailRuMusicIE.ie_key(),
|
||||
'id': audio_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'vcodec': 'none',
|
||||
'abr': int_or_none(t.get('BitRate')),
|
||||
'track': track,
|
||||
'artist': artist,
|
||||
'album': t.get('Album'),
|
||||
'url': audio_url,
|
||||
}
|
||||
|
||||
|
||||
class MailRuMusicIE(MailRuMusicSearchBaseIE):
|
||||
IE_NAME = 'mailru:music'
|
||||
IE_DESC = 'Музыка@Mail.Ru'
|
||||
_VALID_URL = r'https?://my\.mail\.ru/music/songs/[^/?#&]+-(?P<id>[\da-f]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://my.mail.ru/music/songs/%D0%BC8%D0%BB8%D1%82%D1%85-l-a-h-luciferian-aesthetics-of-herrschaft-single-2017-4e31f7125d0dfaef505d947642366893',
|
||||
'md5': '0f8c22ef8c5d665b13ac709e63025610',
|
||||
'info_dict': {
|
||||
'id': '4e31f7125d0dfaef505d947642366893',
|
||||
'ext': 'mp3',
|
||||
'title': 'L.A.H. (Luciferian Aesthetics of Herrschaft) single, 2017 - М8Л8ТХ',
|
||||
'uploader': 'Игорь Мудрый',
|
||||
'uploader_id': '1459196328',
|
||||
'duration': 280,
|
||||
'view_count': int,
|
||||
'vcodec': 'none',
|
||||
'abr': 320,
|
||||
'track': 'L.A.H. (Luciferian Aesthetics of Herrschaft) single, 2017',
|
||||
'artist': 'М8Л8ТХ',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
audio_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, audio_id)
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
music_data = self._search(title, url, audio_id)['MusicData']
|
||||
t = next(t for t in music_data if t.get('File') == audio_id)
|
||||
|
||||
info = self._extract_track(t)
|
||||
info['title'] = title
|
||||
return info
|
||||
|
||||
|
||||
class MailRuMusicSearchIE(MailRuMusicSearchBaseIE):
|
||||
IE_NAME = 'mailru:music:search'
|
||||
IE_DESC = 'Музыка@Mail.Ru'
|
||||
_VALID_URL = r'https?://my\.mail\.ru/music/search/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://my.mail.ru/music/search/black%20shadow',
|
||||
'info_dict': {
|
||||
'id': 'black shadow',
|
||||
},
|
||||
'playlist_mincount': 532,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
query = compat_urllib_parse_unquote(self._match_id(url))
|
||||
|
||||
entries = []
|
||||
|
||||
LIMIT = 100
|
||||
offset = 0
|
||||
|
||||
for _ in itertools.count(1):
|
||||
search = self._search(query, url, query, LIMIT, offset)
|
||||
|
||||
music_data = search.get('MusicData')
|
||||
if not music_data or not isinstance(music_data, list):
|
||||
break
|
||||
|
||||
for t in music_data:
|
||||
track = self._extract_track(t, fatal=False)
|
||||
if track:
|
||||
entries.append(track)
|
||||
|
||||
total = try_get(
|
||||
search, lambda x: x['Results']['music']['Total'], int)
|
||||
|
||||
if total is not None:
|
||||
if offset > total:
|
||||
break
|
||||
|
||||
offset += LIMIT
|
||||
|
||||
return self.playlist_result(entries, query)
|
||||
|
||||
@@ -1,13 +1,12 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
from ..compat import (
|
||||
compat_b64decode,
|
||||
compat_urllib_parse_unquote,
|
||||
)
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class MangomoloBaseIE(InfoExtractor):
|
||||
@@ -51,4 +50,4 @@ class MangomoloLiveIE(MangomoloBaseIE):
|
||||
_IS_LIVE = True
|
||||
|
||||
def _get_real_id(self, page_id):
|
||||
return base64.b64decode(compat_urllib_parse_unquote(page_id).encode()).decode()
|
||||
return compat_b64decode(compat_urllib_parse_unquote(page_id)).decode()
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
import functools
|
||||
import itertools
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_b64decode,
|
||||
compat_chr,
|
||||
compat_ord,
|
||||
compat_str,
|
||||
@@ -79,7 +79,7 @@ class MixcloudIE(InfoExtractor):
|
||||
|
||||
if encrypted_play_info is not None:
|
||||
# Decode
|
||||
encrypted_play_info = base64.b64decode(encrypted_play_info)
|
||||
encrypted_play_info = compat_b64decode(encrypted_play_info)
|
||||
else:
|
||||
# New path
|
||||
full_info_json = self._parse_json(self._html_search_regex(
|
||||
@@ -109,7 +109,7 @@ class MixcloudIE(InfoExtractor):
|
||||
kpa_target = encrypted_play_info
|
||||
else:
|
||||
kps = ['https://', 'http://']
|
||||
kpa_target = base64.b64decode(info_json['streamInfo']['url'])
|
||||
kpa_target = compat_b64decode(info_json['streamInfo']['url'])
|
||||
for kp in kps:
|
||||
partial_key = self._decrypt_xor_cipher(kpa_target, kp)
|
||||
for quote in ["'", '"']:
|
||||
@@ -165,7 +165,7 @@ class MixcloudIE(InfoExtractor):
|
||||
format_url = stream_info.get(url_key)
|
||||
if not format_url:
|
||||
continue
|
||||
decrypted = self._decrypt_xor_cipher(key, base64.b64decode(format_url))
|
||||
decrypted = self._decrypt_xor_cipher(key, compat_b64decode(format_url))
|
||||
if not decrypted:
|
||||
continue
|
||||
if url_key == 'hlsUrl':
|
||||
|
||||
@@ -3,22 +3,31 @@ from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .vimple import SprutoBaseIE
|
||||
|
||||
|
||||
class MyviIE(SprutoBaseIE):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
myvi\.(?:ru/player|tv)/
|
||||
(?:
|
||||
(?:
|
||||
https?://
|
||||
(?:www\.)?
|
||||
myvi\.
|
||||
(?:
|
||||
embed/html|
|
||||
flash|
|
||||
api/Video/Get
|
||||
)/|
|
||||
content/preloader\.swf\?.*\bid=
|
||||
)
|
||||
(?P<id>[\da-zA-Z_-]+)
|
||||
(?:ru/player|tv)/
|
||||
(?:
|
||||
(?:
|
||||
embed/html|
|
||||
flash|
|
||||
api/Video/Get
|
||||
)/|
|
||||
content/preloader\.swf\?.*\bid=
|
||||
)|
|
||||
ru/watch/
|
||||
)|
|
||||
myvi:
|
||||
)
|
||||
(?P<id>[\da-zA-Z_-]+)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'http://myvi.ru/player/embed/html/oOy4euHA6LVwNNAjhD9_Jq5Ha2Qf0rtVMVFMAZav8wObeRTZaCATzucDQIDph8hQU0',
|
||||
@@ -42,6 +51,12 @@ class MyviIE(SprutoBaseIE):
|
||||
}, {
|
||||
'url': 'http://myvi.ru/player/flash/ocp2qZrHI-eZnHKQBK4cZV60hslH8LALnk0uBfKsB-Q4WnY26SeGoYPi8HWHxu0O30',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.myvi.ru/watch/YwbqszQynUaHPn_s82sx0Q2',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'myvi:YwbqszQynUaHPn_s82sx0Q2',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
@@ -58,3 +73,39 @@ class MyviIE(SprutoBaseIE):
|
||||
'http://myvi.ru/player/api/Video/Get/%s?sig' % video_id, video_id)['sprutoData']
|
||||
|
||||
return self._extract_spruto(spruto, video_id)
|
||||
|
||||
|
||||
class MyviEmbedIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?myvi\.tv/(?:[^?]+\?.*?\bv=|embed/)(?P<id>[\da-z]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.myvi.tv/embed/ccdqic3wgkqwpb36x9sxg43t4r',
|
||||
'info_dict': {
|
||||
'id': 'b3ea0663-3234-469d-873e-7fecf36b31d1',
|
||||
'ext': 'mp4',
|
||||
'title': 'Твоя (original song).mp4',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 277,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.myvi.tv/idmi6o?v=ccdqic3wgkqwpb36x9sxg43t4r#watch',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if MyviIE.suitable(url) else super(MyviEmbedIE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'https://www.myvi.tv/embed/%s' % video_id, video_id)
|
||||
|
||||
myvi_id = self._search_regex(
|
||||
r'CreatePlayer\s*\(\s*["\'].*?\bv=([\da-zA-Z_]+)',
|
||||
webpage, 'video id')
|
||||
|
||||
return self.url_result('myvi:%s' % myvi_id, ie=MyviIE.ie_key())
|
||||
|
||||
@@ -68,7 +68,7 @@ class NationalGeographicVideoIE(InfoExtractor):
|
||||
|
||||
class NationalGeographicIE(ThePlatformIE, AdobePassIE):
|
||||
IE_NAME = 'natgeo'
|
||||
_VALID_URL = r'https?://channel\.nationalgeographic\.com/(?:wild/)?[^/]+/(?:videos|episodes)/(?P<id>[^/?]+)'
|
||||
_VALID_URL = r'https?://channel\.nationalgeographic\.com/(?:(?:wild/)?[^/]+/)?(?:videos|episodes)/(?P<id>[^/?]+)'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
@@ -102,6 +102,10 @@ class NationalGeographicIE(ThePlatformIE, AdobePassIE):
|
||||
{
|
||||
'url': 'http://channel.nationalgeographic.com/the-story-of-god-with-morgan-freeman/episodes/the-power-of-miracles/',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'http://channel.nationalgeographic.com/videos/treasures-rediscovered/',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import base64
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .theplatform import ThePlatformIE
|
||||
@@ -358,6 +359,7 @@ class NBCNewsIE(ThePlatformIE):
|
||||
|
||||
|
||||
class NBCOlympicsIE(InfoExtractor):
|
||||
IE_NAME = 'nbcolympics'
|
||||
_VALID_URL = r'https?://www\.nbcolympics\.com/video/(?P<id>[a-z-]+)'
|
||||
|
||||
_TEST = {
|
||||
@@ -395,3 +397,54 @@ class NBCOlympicsIE(InfoExtractor):
|
||||
'ie_key': ThePlatformIE.ie_key(),
|
||||
'display_id': display_id,
|
||||
}
|
||||
|
||||
|
||||
class NBCOlympicsStreamIE(AdobePassIE):
|
||||
IE_NAME = 'nbcolympics:stream'
|
||||
_VALID_URL = r'https?://stream\.nbcolympics\.com/(?P<id>[0-9a-z-]+)'
|
||||
_TEST = {
|
||||
'url': 'http://stream.nbcolympics.com/2018-winter-olympics-nbcsn-evening-feb-8',
|
||||
'info_dict': {
|
||||
'id': '203493',
|
||||
'ext': 'mp4',
|
||||
'title': 're:Curling, Alpine, Luge [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
_DATA_URL_TEMPLATE = 'http://stream.nbcolympics.com/data/%s_%s.json'
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
pid = self._search_regex(r'pid\s*=\s*(\d+);', webpage, 'pid')
|
||||
resource = self._search_regex(
|
||||
r"resource\s*=\s*'(.+)';", webpage,
|
||||
'resource').replace("' + pid + '", pid)
|
||||
event_config = self._download_json(
|
||||
self._DATA_URL_TEMPLATE % ('event_config', pid),
|
||||
pid)['eventConfig']
|
||||
title = self._live_title(event_config['eventTitle'])
|
||||
source_url = self._download_json(
|
||||
self._DATA_URL_TEMPLATE % ('live_sources', pid),
|
||||
pid)['videoSources'][0]['sourceUrl']
|
||||
media_token = self._extract_mvpd_auth(
|
||||
url, pid, event_config.get('requestorId', 'NBCOlympics'), resource)
|
||||
formats = self._extract_m3u8_formats(self._download_webpage(
|
||||
'http://sp.auth.adobe.com/tvs/v1/sign', pid, query={
|
||||
'cdn': 'akamai',
|
||||
'mediaToken': base64.b64encode(media_token.encode()),
|
||||
'resource': base64.b64encode(resource.encode()),
|
||||
'url': source_url,
|
||||
}), pid, 'mp4')
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': pid,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'is_live': True,
|
||||
}
|
||||
|
||||
@@ -87,19 +87,21 @@ class NewgroundsIE(InfoExtractor):
|
||||
self._check_formats(formats, media_id)
|
||||
self._sort_formats(formats)
|
||||
|
||||
uploader = self._search_regex(
|
||||
r'(?:Author|Writer)\s*<a[^>]+>([^<]+)', webpage, 'uploader',
|
||||
uploader = self._html_search_regex(
|
||||
(r'(?s)<h4[^>]*>(.+?)</h4>.*?<em>\s*Author\s*</em>',
|
||||
r'(?:Author|Writer)\s*<a[^>]+>([^<]+)'), webpage, 'uploader',
|
||||
fatal=False)
|
||||
|
||||
timestamp = unified_timestamp(self._search_regex(
|
||||
r'<dt>Uploaded</dt>\s*<dd>([^<]+)', webpage, 'timestamp',
|
||||
timestamp = unified_timestamp(self._html_search_regex(
|
||||
(r'<dt>\s*Uploaded\s*</dt>\s*<dd>([^<]+</dd>\s*<dd>[^<]+)',
|
||||
r'<dt>\s*Uploaded\s*</dt>\s*<dd>([^<]+)'), webpage, 'timestamp',
|
||||
default=None))
|
||||
duration = parse_duration(self._search_regex(
|
||||
r'<dd>Song\s*</dd><dd>.+?</dd><dd>([^<]+)', webpage, 'duration',
|
||||
default=None))
|
||||
r'(?s)<dd>\s*Song\s*</dd>\s*<dd>.+?</dd>\s*<dd>([^<]+)', webpage,
|
||||
'duration', default=None))
|
||||
|
||||
filesize_approx = parse_filesize(self._html_search_regex(
|
||||
r'<dd>Song\s*</dd><dd>(.+?)</dd>', webpage, 'filesize',
|
||||
r'(?s)<dd>\s*Song\s*</dd>\s*<dd>(.+?)</dd>', webpage, 'filesize',
|
||||
default=None))
|
||||
if len(formats) == 1:
|
||||
formats[0]['filesize_approx'] = filesize_approx
|
||||
|
||||
@@ -21,7 +21,8 @@ class NexxIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:
|
||||
https?://api\.nexx(?:\.cloud|cdn\.com)/v3/(?P<domain_id>\d+)/videos/byid/|
|
||||
nexx:(?P<domain_id_s>\d+):
|
||||
nexx:(?:(?P<domain_id_s>\d+):)?|
|
||||
https?://arc\.nexx\.cloud/api/video/
|
||||
)
|
||||
(?P<id>\d+)
|
||||
'''
|
||||
@@ -61,12 +62,33 @@ class NexxIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# does not work via arc
|
||||
'url': 'nexx:741:1269984',
|
||||
'md5': 'c714b5b238b2958dc8d5642addba6886',
|
||||
'info_dict': {
|
||||
'id': '1269984',
|
||||
'ext': 'mp4',
|
||||
'title': '1 TAG ohne KLO... wortwörtlich! 😑',
|
||||
'alt_title': '1 TAG ohne KLO... wortwörtlich! 😑',
|
||||
'description': 'md5:4604539793c49eda9443ab5c5b1d612f',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 607,
|
||||
'timestamp': 1518614955,
|
||||
'upload_date': '20180214',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://api.nexxcdn.com/v3/748/videos/byid/128907',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'nexx:748:128907',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'nexx:128907',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://arc.nexx.cloud/api/video/128907.json',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
@@ -124,65 +146,77 @@ class NexxIE(InfoExtractor):
|
||||
domain_id = mobj.group('domain_id') or mobj.group('domain_id_s')
|
||||
video_id = mobj.group('id')
|
||||
|
||||
# Reverse engineered from JS code (see getDeviceID function)
|
||||
device_id = '%d:%d:%d%d' % (
|
||||
random.randint(1, 4), int(time.time()),
|
||||
random.randint(1e4, 99999), random.randint(1, 9))
|
||||
video = None
|
||||
|
||||
result = self._call_api(domain_id, 'session/init', video_id, data={
|
||||
'nxp_devh': device_id,
|
||||
'nxp_userh': '',
|
||||
'precid': '0',
|
||||
'playlicense': '0',
|
||||
'screenx': '1920',
|
||||
'screeny': '1080',
|
||||
'playerversion': '6.0.00',
|
||||
'gateway': 'html5',
|
||||
'adGateway': '',
|
||||
'explicitlanguage': 'en-US',
|
||||
'addTextTemplates': '1',
|
||||
'addDomainData': '1',
|
||||
'addAdModel': '1',
|
||||
}, headers={
|
||||
'X-Request-Enable-Auth-Fallback': '1',
|
||||
})
|
||||
response = self._download_json(
|
||||
'https://arc.nexx.cloud/api/video/%s.json' % video_id,
|
||||
video_id, fatal=False)
|
||||
if response and isinstance(response, dict):
|
||||
result = response.get('result')
|
||||
if result and isinstance(result, dict):
|
||||
video = result
|
||||
|
||||
cid = result['general']['cid']
|
||||
# not all videos work via arc, e.g. nexx:741:1269984
|
||||
if not video:
|
||||
# Reverse engineered from JS code (see getDeviceID function)
|
||||
device_id = '%d:%d:%d%d' % (
|
||||
random.randint(1, 4), int(time.time()),
|
||||
random.randint(1e4, 99999), random.randint(1, 9))
|
||||
|
||||
# As described in [1] X-Request-Token generation algorithm is
|
||||
# as follows:
|
||||
# md5( operation + domain_id + domain_secret )
|
||||
# where domain_secret is a static value that will be given by nexx.tv
|
||||
# as per [1]. Here is how this "secret" is generated (reversed
|
||||
# from _play.api.init function, search for clienttoken). So it's
|
||||
# actually not static and not that much of a secret.
|
||||
# 1. https://nexxtvstorage.blob.core.windows.net/files/201610/27.pdf
|
||||
secret = result['device']['clienttoken'][int(device_id[0]):]
|
||||
secret = secret[0:len(secret) - int(device_id[-1])]
|
||||
|
||||
op = 'byid'
|
||||
|
||||
# Reversed from JS code for _play.api.call function (search for
|
||||
# X-Request-Token)
|
||||
request_token = hashlib.md5(
|
||||
''.join((op, domain_id, secret)).encode('utf-8')).hexdigest()
|
||||
|
||||
video = self._call_api(
|
||||
domain_id, 'videos/%s/%s' % (op, video_id), video_id, data={
|
||||
'additionalfields': 'language,channel,actors,studio,licenseby,slug,subtitle,teaser,description',
|
||||
'addInteractionOptions': '1',
|
||||
'addStatusDetails': '1',
|
||||
'addStreamDetails': '1',
|
||||
'addCaptions': '1',
|
||||
'addScenes': '1',
|
||||
'addHotSpots': '1',
|
||||
'addBumpers': '1',
|
||||
'captionFormat': 'data',
|
||||
result = self._call_api(domain_id, 'session/init', video_id, data={
|
||||
'nxp_devh': device_id,
|
||||
'nxp_userh': '',
|
||||
'precid': '0',
|
||||
'playlicense': '0',
|
||||
'screenx': '1920',
|
||||
'screeny': '1080',
|
||||
'playerversion': '6.0.00',
|
||||
'gateway': 'html5',
|
||||
'adGateway': '',
|
||||
'explicitlanguage': 'en-US',
|
||||
'addTextTemplates': '1',
|
||||
'addDomainData': '1',
|
||||
'addAdModel': '1',
|
||||
}, headers={
|
||||
'X-Request-CID': cid,
|
||||
'X-Request-Token': request_token,
|
||||
'X-Request-Enable-Auth-Fallback': '1',
|
||||
})
|
||||
|
||||
cid = result['general']['cid']
|
||||
|
||||
# As described in [1] X-Request-Token generation algorithm is
|
||||
# as follows:
|
||||
# md5( operation + domain_id + domain_secret )
|
||||
# where domain_secret is a static value that will be given by nexx.tv
|
||||
# as per [1]. Here is how this "secret" is generated (reversed
|
||||
# from _play.api.init function, search for clienttoken). So it's
|
||||
# actually not static and not that much of a secret.
|
||||
# 1. https://nexxtvstorage.blob.core.windows.net/files/201610/27.pdf
|
||||
secret = result['device']['clienttoken'][int(device_id[0]):]
|
||||
secret = secret[0:len(secret) - int(device_id[-1])]
|
||||
|
||||
op = 'byid'
|
||||
|
||||
# Reversed from JS code for _play.api.call function (search for
|
||||
# X-Request-Token)
|
||||
request_token = hashlib.md5(
|
||||
''.join((op, domain_id, secret)).encode('utf-8')).hexdigest()
|
||||
|
||||
video = self._call_api(
|
||||
domain_id, 'videos/%s/%s' % (op, video_id), video_id, data={
|
||||
'additionalfields': 'language,channel,actors,studio,licenseby,slug,subtitle,teaser,description',
|
||||
'addInteractionOptions': '1',
|
||||
'addStatusDetails': '1',
|
||||
'addStreamDetails': '1',
|
||||
'addCaptions': '1',
|
||||
'addScenes': '1',
|
||||
'addHotSpots': '1',
|
||||
'addBumpers': '1',
|
||||
'captionFormat': 'data',
|
||||
}, headers={
|
||||
'X-Request-CID': cid,
|
||||
'X-Request-Token': request_token,
|
||||
})
|
||||
|
||||
general = video['general']
|
||||
title = general['title']
|
||||
|
||||
|
||||
@@ -198,7 +198,7 @@ class NickNightIE(NickDeIE):
|
||||
|
||||
class NickRuIE(MTVServicesInfoExtractor):
|
||||
IE_NAME = 'nickelodeonru'
|
||||
_VALID_URL = r'https?://(?:www\.)nickelodeon\.(?:ru|fr|es|pt|ro|hu)/[^/]+/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)nickelodeon\.(?:ru|fr|es|pt|ro|hu|com\.tr)/[^/]+/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.nickelodeon.ru/shows/henrydanger/videos/episodes/3-sezon-15-seriya-licenziya-na-polyot/pmomfb#playlist/7airc6',
|
||||
'only_matching': True,
|
||||
@@ -220,6 +220,9 @@ class NickRuIE(MTVServicesInfoExtractor):
|
||||
}, {
|
||||
'url': 'http://www.nickelodeon.hu/musorok/spongyabob-kockanadrag/videok/episodes/buborekfujas-az-elszakadt-nadrag/q57iob#playlist/k6te4y',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.nickelodeon.com.tr/programlar/sunger-bob/videolar/kayip-yatak/mgqbjy',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -13,7 +13,7 @@ class NineGagIE(InfoExtractor):
|
||||
_TESTS = [{
|
||||
'url': 'http://9gag.com/tv/p/Kk2X5/people-are-awesome-2013-is-absolutely-awesome',
|
||||
'info_dict': {
|
||||
'id': 'Kk2X5',
|
||||
'id': 'kXzwOKyGlSA',
|
||||
'ext': 'mp4',
|
||||
'description': 'This 3-minute video will make you smile and then make you feel untalented and insignificant. Anyway, you should share this awesomeness. (Thanks, Dino!)',
|
||||
'title': '\"People Are Awesome 2013\" Is Absolutely Awesome',
|
||||
|
||||
@@ -4,15 +4,17 @@ from __future__ import unicode_literals
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
ExtractorError,
|
||||
smuggle_url,
|
||||
)
|
||||
|
||||
|
||||
class NineNowIE(InfoExtractor):
|
||||
IE_NAME = '9now.com.au'
|
||||
_VALID_URL = r'https?://(?:www\.)?9now\.com\.au/(?:[^/]+/){2}(?P<id>[^/?#]+)'
|
||||
_GEO_COUNTRIES = ['AU']
|
||||
_TESTS = [{
|
||||
# clip
|
||||
'url': 'https://www.9now.com.au/afl-footy-show/2016/clip-ciql02091000g0hp5oktrnytc',
|
||||
@@ -75,7 +77,9 @@ class NineNowIE(InfoExtractor):
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
|
||||
'url': smuggle_url(
|
||||
self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
|
||||
{'geo_countries': self._GEO_COUNTRIES}),
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': common_data.get('description'),
|
||||
|
||||
@@ -43,7 +43,8 @@ class NJPWWorldIE(InfoExtractor):
|
||||
webpage, urlh = self._download_webpage_handle(
|
||||
'https://njpwworld.com/auth/login', None,
|
||||
note='Logging in', errnote='Unable to login',
|
||||
data=urlencode_postdata({'login_id': username, 'pw': password}))
|
||||
data=urlencode_postdata({'login_id': username, 'pw': password}),
|
||||
headers={'Referer': 'https://njpwworld.com/auth'})
|
||||
# /auth/login will return 302 for successful logins
|
||||
if urlh.geturl() == 'https://njpwworld.com/auth/login':
|
||||
self.report_warning('unable to login')
|
||||
|
||||
@@ -11,6 +11,7 @@ from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
fix_xml_ampersands,
|
||||
int_or_none,
|
||||
orderedSet,
|
||||
parse_duration,
|
||||
qualities,
|
||||
@@ -38,7 +39,7 @@ class NPOIE(NPOBaseIE):
|
||||
npo\.nl/(?!(?:live|radio)/)(?:[^/]+/){2}|
|
||||
ntr\.nl/(?:[^/]+/){2,}|
|
||||
omroepwnl\.nl/video/fragment/[^/]+__|
|
||||
(?:zapp|npo3)\.nl/(?:[^/]+/){2}
|
||||
(?:zapp|npo3)\.nl/(?:[^/]+/){2,}
|
||||
)
|
||||
)
|
||||
(?P<id>[^/?#]+)
|
||||
@@ -156,6 +157,9 @@ class NPOIE(NPOBaseIE):
|
||||
}, {
|
||||
'url': 'http://www.npo.nl/radio-gaga/13-06-2017/BNN_101383373',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.zapp.nl/1803-skelterlab/instructie-video-s/740-instructievideo-s/POMS_AT_11736927',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -170,6 +174,10 @@ class NPOIE(NPOBaseIE):
|
||||
transform_source=strip_jsonp,
|
||||
)
|
||||
|
||||
error = metadata.get('error')
|
||||
if error:
|
||||
raise ExtractorError(error, expected=True)
|
||||
|
||||
# For some videos actual video id (prid) is different (e.g. for
|
||||
# http://www.omroepwnl.nl/video/fragment/vandaag-de-dag-verkiezingen__POMS_WNL_853698
|
||||
# video id is POMS_WNL_853698 but prid is POW_00996502)
|
||||
@@ -187,7 +195,15 @@ class NPOIE(NPOBaseIE):
|
||||
formats = []
|
||||
urls = set()
|
||||
|
||||
quality = qualities(['adaptive', 'wmv_sb', 'h264_sb', 'wmv_bb', 'h264_bb', 'wvc1_std', 'h264_std'])
|
||||
def is_legal_url(format_url):
|
||||
return format_url and format_url not in urls and re.match(
|
||||
r'^(?:https?:)?//', format_url)
|
||||
|
||||
QUALITY_LABELS = ('Laag', 'Normaal', 'Hoog')
|
||||
QUALITY_FORMATS = ('adaptive', 'wmv_sb', 'h264_sb', 'wmv_bb', 'h264_bb', 'wvc1_std', 'h264_std')
|
||||
|
||||
quality_from_label = qualities(QUALITY_LABELS)
|
||||
quality_from_format_id = qualities(QUALITY_FORMATS)
|
||||
items = self._download_json(
|
||||
'http://ida.omroep.nl/app.php/%s' % video_id, video_id,
|
||||
'Downloading formats JSON', query={
|
||||
@@ -196,18 +212,34 @@ class NPOIE(NPOBaseIE):
|
||||
})['items'][0]
|
||||
for num, item in enumerate(items):
|
||||
item_url = item.get('url')
|
||||
if not item_url or item_url in urls:
|
||||
if not is_legal_url(item_url):
|
||||
continue
|
||||
urls.add(item_url)
|
||||
format_id = self._search_regex(
|
||||
r'video/ida/([^/]+)', item_url, 'format id',
|
||||
default=None)
|
||||
|
||||
item_label = item.get('label')
|
||||
|
||||
def add_format_url(format_url):
|
||||
width = int_or_none(self._search_regex(
|
||||
r'(\d+)[xX]\d+', format_url, 'width', default=None))
|
||||
height = int_or_none(self._search_regex(
|
||||
r'\d+[xX](\d+)', format_url, 'height', default=None))
|
||||
if item_label in QUALITY_LABELS:
|
||||
quality = quality_from_label(item_label)
|
||||
f_id = item_label
|
||||
elif item_label in QUALITY_FORMATS:
|
||||
quality = quality_from_format_id(format_id)
|
||||
f_id = format_id
|
||||
else:
|
||||
quality, f_id = [None] * 2
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'format_id': format_id,
|
||||
'quality': quality(format_id),
|
||||
'format_id': f_id,
|
||||
'width': width,
|
||||
'height': height,
|
||||
'quality': quality,
|
||||
})
|
||||
|
||||
# Example: http://www.npo.nl/de-nieuwe-mens-deel-1/21-07-2010/WO_VPRO_043706
|
||||
@@ -219,7 +251,7 @@ class NPOIE(NPOBaseIE):
|
||||
stream_info = self._download_json(
|
||||
item_url + '&type=json', video_id,
|
||||
'Downloading %s stream JSON'
|
||||
% item.get('label') or item.get('format') or format_id or num)
|
||||
% item_label or item.get('format') or format_id or num)
|
||||
except ExtractorError as ee:
|
||||
if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404:
|
||||
error = (self._parse_json(
|
||||
@@ -251,7 +283,7 @@ class NPOIE(NPOBaseIE):
|
||||
if not is_live:
|
||||
for num, stream in enumerate(metadata.get('streams', [])):
|
||||
stream_url = stream.get('url')
|
||||
if not stream_url or stream_url in urls:
|
||||
if not is_legal_url(stream_url):
|
||||
continue
|
||||
urls.add(stream_url)
|
||||
# smooth streaming is not supported
|
||||
|
||||
@@ -1,9 +1,13 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import base64
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..compat import (
|
||||
compat_b64decode,
|
||||
compat_str,
|
||||
compat_urllib_parse_urlencode,
|
||||
)
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
@@ -12,7 +16,6 @@ from ..utils import (
|
||||
try_get,
|
||||
unsmuggle_url,
|
||||
)
|
||||
from ..compat import compat_urllib_parse_urlencode
|
||||
|
||||
|
||||
class OoyalaBaseIE(InfoExtractor):
|
||||
@@ -44,7 +47,7 @@ class OoyalaBaseIE(InfoExtractor):
|
||||
url_data = try_get(stream, lambda x: x['url']['data'], compat_str)
|
||||
if not url_data:
|
||||
continue
|
||||
s_url = base64.b64decode(url_data.encode('ascii')).decode('utf-8')
|
||||
s_url = compat_b64decode(url_data).decode('utf-8')
|
||||
if not s_url or s_url in urls:
|
||||
continue
|
||||
urls.append(s_url)
|
||||
|
||||
@@ -56,18 +56,16 @@ class PeriscopeIE(PeriscopeBaseIE):
|
||||
def _real_extract(self, url):
|
||||
token = self._match_id(url)
|
||||
|
||||
broadcast_data = self._call_api(
|
||||
'getBroadcastPublic', {'broadcast_id': token}, token)
|
||||
broadcast = broadcast_data['broadcast']
|
||||
status = broadcast['status']
|
||||
stream = self._call_api(
|
||||
'accessVideoPublic', {'broadcast_id': token}, token)
|
||||
|
||||
user = broadcast_data.get('user', {})
|
||||
broadcast = stream['broadcast']
|
||||
title = broadcast['status']
|
||||
|
||||
uploader = broadcast.get('user_display_name') or user.get('display_name')
|
||||
uploader_id = (broadcast.get('username') or user.get('username') or
|
||||
broadcast.get('user_id') or user.get('id'))
|
||||
uploader = broadcast.get('user_display_name') or broadcast.get('username')
|
||||
uploader_id = (broadcast.get('user_id') or broadcast.get('username'))
|
||||
|
||||
title = '%s - %s' % (uploader, status) if uploader else status
|
||||
title = '%s - %s' % (uploader, title) if uploader else title
|
||||
state = broadcast.get('state').lower()
|
||||
if state == 'running':
|
||||
title = self._live_title(title)
|
||||
@@ -77,9 +75,6 @@ class PeriscopeIE(PeriscopeBaseIE):
|
||||
'url': broadcast[image],
|
||||
} for image in ('image_url', 'image_url_small') if broadcast.get(image)]
|
||||
|
||||
stream = self._call_api(
|
||||
'getAccessPublic', {'broadcast_id': token}, token)
|
||||
|
||||
video_urls = set()
|
||||
formats = []
|
||||
for format_id in ('replay', 'rtmp', 'hls', 'https_hls', 'lhls', 'lhlsweb'):
|
||||
|
||||
@@ -4,7 +4,9 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
xpath_text,
|
||||
@@ -26,17 +28,15 @@ class PladformIE(InfoExtractor):
|
||||
(?P<id>\d+)
|
||||
'''
|
||||
_TESTS = [{
|
||||
# http://muz-tv.ru/kinozal/view/7400/
|
||||
'url': 'http://out.pladform.ru/player?pl=24822&videoid=100183293',
|
||||
'md5': '61f37b575dd27f1bb2e1854777fe31f4',
|
||||
'url': 'https://out.pladform.ru/player?pl=64471&videoid=3777899&vk_puid15=0&vk_puid34=0',
|
||||
'md5': '53362fac3a27352da20fa2803cc5cd6f',
|
||||
'info_dict': {
|
||||
'id': '100183293',
|
||||
'id': '3777899',
|
||||
'ext': 'mp4',
|
||||
'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
|
||||
'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
|
||||
'title': 'СТУДИЯ СОЮЗ • Шоу Студия Союз, 24 выпуск (01.02.2018) Нурлан Сабуров и Слава Комиссаренко',
|
||||
'description': 'md5:05140e8bf1b7e2d46e7ba140be57fd95',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 694,
|
||||
'age_limit': 0,
|
||||
'duration': 3190,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://static.pladform.ru/player.swf?pl=21469&videoid=100183293&vkcid=0',
|
||||
@@ -56,22 +56,48 @@ class PladformIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
|
||||
pl = qs.get('pl', ['1'])[0]
|
||||
|
||||
video = self._download_xml(
|
||||
'http://out.pladform.ru/getVideo?pl=1&videoid=%s' % video_id,
|
||||
video_id)
|
||||
'http://out.pladform.ru/getVideo', video_id, query={
|
||||
'pl': pl,
|
||||
'videoid': video_id,
|
||||
})
|
||||
|
||||
def fail(text):
|
||||
raise ExtractorError(
|
||||
'%s returned error: %s' % (self.IE_NAME, text),
|
||||
expected=True)
|
||||
|
||||
if video.tag == 'error':
|
||||
raise ExtractorError(
|
||||
'%s returned error: %s' % (self.IE_NAME, video.text),
|
||||
expected=True)
|
||||
fail(video.text)
|
||||
|
||||
quality = qualities(('ld', 'sd', 'hd'))
|
||||
|
||||
formats = [{
|
||||
'url': src.text,
|
||||
'format_id': src.get('quality'),
|
||||
'quality': quality(src.get('quality')),
|
||||
} for src in video.findall('./src')]
|
||||
formats = []
|
||||
for src in video.findall('./src'):
|
||||
if src is None:
|
||||
continue
|
||||
format_url = src.text
|
||||
if not format_url:
|
||||
continue
|
||||
if src.get('type') == 'hls' or determine_ext(format_url) == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': src.text,
|
||||
'format_id': src.get('quality'),
|
||||
'quality': quality(src.get('quality')),
|
||||
})
|
||||
|
||||
if not formats:
|
||||
error = xpath_text(video, './cap', 'error', default=None)
|
||||
if error:
|
||||
fail(error)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
|
||||
@@ -11,19 +11,34 @@ from ..utils import (
|
||||
|
||||
|
||||
class PokemonIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?pokemon\.com/[a-z]{2}(?:.*?play=(?P<id>[a-z0-9]{32})|/[^/]+/\d+_\d+-(?P<display_id>[^/?#]+))'
|
||||
_VALID_URL = r'https?://(?:www\.)?pokemon\.com/[a-z]{2}(?:.*?play=(?P<id>[a-z0-9]{32})|/(?:[^/]+/)+(?P<display_id>[^/?#&]+))'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.pokemon.com/us/pokemon-episodes/19_01-from-a-to-z/?play=true',
|
||||
'md5': '9fb209ae3a569aac25de0f5afc4ee08f',
|
||||
'url': 'https://www.pokemon.com/us/pokemon-episodes/20_30-the-ol-raise-and-switch/',
|
||||
'md5': '2fe8eaec69768b25ef898cda9c43062e',
|
||||
'info_dict': {
|
||||
'id': 'd0436c00c3ce4071ac6cee8130ac54a1',
|
||||
'id': 'afe22e30f01c41f49d4f1d9eab5cd9a4',
|
||||
'ext': 'mp4',
|
||||
'title': 'From A to Z!',
|
||||
'description': 'Bonnie makes a new friend, Ash runs into an old friend, and a terrifying premonition begins to unfold!',
|
||||
'timestamp': 1460478136,
|
||||
'upload_date': '20160412',
|
||||
'title': 'The Ol’ Raise and Switch!',
|
||||
'description': 'md5:7db77f7107f98ba88401d3adc80ff7af',
|
||||
'timestamp': 1511824728,
|
||||
'upload_date': '20171127',
|
||||
},
|
||||
'add_id': ['LimelightMedia'],
|
||||
}, {
|
||||
# no data-video-title
|
||||
'url': 'https://www.pokemon.com/us/pokemon-episodes/pokemon-movies/pokemon-the-rise-of-darkrai-2008',
|
||||
'info_dict': {
|
||||
'id': '99f3bae270bf4e5097274817239ce9c8',
|
||||
'ext': 'mp4',
|
||||
'title': 'Pokémon: The Rise of Darkrai',
|
||||
'description': 'md5:ea8fbbf942e1e497d54b19025dd57d9d',
|
||||
'timestamp': 1417778347,
|
||||
'upload_date': '20141205',
|
||||
},
|
||||
'add_id': ['LimelightMedia'],
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_id': ['LimelightMedia']
|
||||
}, {
|
||||
'url': 'http://www.pokemon.com/uk/pokemon-episodes/?play=2e8b5c761f1d4a9286165d7748c1ece2',
|
||||
'only_matching': True,
|
||||
@@ -42,7 +57,9 @@ class PokemonIE(InfoExtractor):
|
||||
r'(<[^>]+data-video-id="%s"[^>]*>)' % (video_id if video_id else '[a-z0-9]{32}'),
|
||||
webpage, 'video data element'))
|
||||
video_id = video_data['data-video-id']
|
||||
title = video_data['data-video-title']
|
||||
title = video_data.get('data-video-title') or self._html_search_meta(
|
||||
'pkm-title', webpage, ' title', default=None) or self._search_regex(
|
||||
r'<h1[^>]+\bclass=["\']us-title[^>]+>([^<]+)', webpage, 'title')
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
|
||||
@@ -115,12 +115,13 @@ class PornHubIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
self._set_cookie('pornhub.com', 'age_verified', '1')
|
||||
|
||||
def dl_webpage(platform):
|
||||
self._set_cookie('pornhub.com', 'platform', platform)
|
||||
return self._download_webpage(
|
||||
'http://www.pornhub.com/view_video.php?viewkey=%s' % video_id,
|
||||
video_id, headers={
|
||||
'Cookie': 'age_verified=1; platform=%s' % platform,
|
||||
})
|
||||
video_id)
|
||||
|
||||
webpage = dl_webpage('pc')
|
||||
|
||||
@@ -275,7 +276,7 @@ class PornHubPlaylistIE(PornHubPlaylistBaseIE):
|
||||
|
||||
|
||||
class PornHubUserVideosIE(PornHubPlaylistBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?pornhub\.com/users/(?P<id>[^/]+)/videos'
|
||||
_VALID_URL = r'https?://(?:www\.)?pornhub\.com/(?:user|channel)s/(?P<id>[^/]+)/videos'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.pornhub.com/users/zoe_ph/videos/public',
|
||||
'info_dict': {
|
||||
@@ -285,6 +286,25 @@ class PornHubUserVideosIE(PornHubPlaylistBaseIE):
|
||||
}, {
|
||||
'url': 'http://www.pornhub.com/users/rushandlia/videos',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# default sorting as Top Rated Videos
|
||||
'url': 'https://www.pornhub.com/channels/povd/videos',
|
||||
'info_dict': {
|
||||
'id': 'povd',
|
||||
},
|
||||
'playlist_mincount': 293,
|
||||
}, {
|
||||
# Top Rated Videos
|
||||
'url': 'https://www.pornhub.com/channels/povd/videos?o=ra',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Most Recent Videos
|
||||
'url': 'https://www.pornhub.com/channels/povd/videos?o=da',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Most Viewed Videos
|
||||
'url': 'https://www.pornhub.com/channels/povd/videos?o=vi',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -129,10 +129,11 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE):
|
||||
https?://
|
||||
(?:www\.)?
|
||||
(?:
|
||||
(?:beta\.)?
|
||||
(?:
|
||||
prosieben(?:maxx)?|sixx|sat1(?:gold)?|kabeleins(?:doku)?|the-voice-of-germany|7tv|advopedia
|
||||
)\.(?:de|at|ch)|
|
||||
ran\.de|fem\.com|advopedia\.de
|
||||
ran\.de|fem\.com|advopedia\.de|galileo\.tv/video
|
||||
)
|
||||
/(?P<id>.+)
|
||||
'''
|
||||
@@ -325,6 +326,11 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE):
|
||||
'url': 'http://www.sat1gold.de/tv/edel-starck/video/11-staffel-1-episode-1-partner-wider-willen-ganze-folge',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
# geo restricted to Germany
|
||||
'url': 'https://www.galileo.tv/video/diese-emojis-werden-oft-missverstanden',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'http://www.sat1gold.de/tv/edel-starck/playlist/die-gesamte-1-staffel',
|
||||
'only_matching': True,
|
||||
@@ -342,8 +348,10 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE):
|
||||
r'"clip_id"\s*:\s+"(\d+)"',
|
||||
r'clipid: "(\d+)"',
|
||||
r'clip[iI]d=(\d+)',
|
||||
r'clip[iI]d\s*=\s*["\'](\d+)',
|
||||
r'clip[iI][dD]\s*=\s*["\'](\d+)',
|
||||
r"'itemImageUrl'\s*:\s*'/dynamic/thumbnails/full/\d+/(\d+)",
|
||||
r'proMamsId"\s*:\s*"(\d+)',
|
||||
r'proMamsId"\s*:\s*"(\d+)',
|
||||
]
|
||||
_TITLE_REGEXES = [
|
||||
r'<h2 class="subtitle" itemprop="name">\s*(.+?)</h2>',
|
||||
|
||||
102
youtube_dl/extractor/raywenderlich.py
Normal file
102
youtube_dl/extractor/raywenderlich.py
Normal file
@@ -0,0 +1,102 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .vimeo import VimeoIE
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
ExtractorError,
|
||||
smuggle_url,
|
||||
unsmuggle_url,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class RayWenderlichIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://videos\.raywenderlich\.com/courses/(?P<course_id>[^/]+)/lessons/(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://videos.raywenderlich.com/courses/105-testing-in-ios/lessons/1',
|
||||
'info_dict': {
|
||||
'id': '248377018',
|
||||
'ext': 'mp4',
|
||||
'title': 'Testing In iOS Episode 1: Introduction',
|
||||
'duration': 133,
|
||||
'uploader': 'Ray Wenderlich',
|
||||
'uploader_id': 'user3304672',
|
||||
},
|
||||
'params': {
|
||||
'noplaylist': True,
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': [VimeoIE.ie_key()],
|
||||
'expected_warnings': ['HTTP Error 403: Forbidden'],
|
||||
}, {
|
||||
'url': 'https://videos.raywenderlich.com/courses/105-testing-in-ios/lessons/1',
|
||||
'info_dict': {
|
||||
'title': 'Testing in iOS',
|
||||
'id': '105-testing-in-ios',
|
||||
},
|
||||
'params': {
|
||||
'noplaylist': False,
|
||||
},
|
||||
'playlist_count': 29,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
course_id, lesson_id = mobj.group('course_id', 'id')
|
||||
video_id = '%s/%s' % (course_id, lesson_id)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
no_playlist = self._downloader.params.get('noplaylist')
|
||||
if no_playlist or smuggled_data.get('force_video', False):
|
||||
if no_playlist:
|
||||
self.to_screen(
|
||||
'Downloading just video %s because of --no-playlist'
|
||||
% video_id)
|
||||
if '>Subscribe to unlock' in webpage:
|
||||
raise ExtractorError(
|
||||
'This content is only available for subscribers',
|
||||
expected=True)
|
||||
vimeo_id = self._search_regex(
|
||||
r'data-vimeo-id=["\'](\d+)', webpage, 'video id')
|
||||
return self.url_result(
|
||||
VimeoIE._smuggle_referrer(
|
||||
'https://player.vimeo.com/video/%s' % vimeo_id, url),
|
||||
ie=VimeoIE.ie_key(), video_id=vimeo_id)
|
||||
|
||||
self.to_screen(
|
||||
'Downloading playlist %s - add --no-playlist to just download video'
|
||||
% course_id)
|
||||
|
||||
lesson_ids = set((lesson_id, ))
|
||||
for lesson in re.findall(
|
||||
r'(<a[^>]+\bclass=["\']lesson-link[^>]+>)', webpage):
|
||||
attrs = extract_attributes(lesson)
|
||||
if not attrs:
|
||||
continue
|
||||
lesson_url = attrs.get('href')
|
||||
if not lesson_url:
|
||||
continue
|
||||
lesson_id = self._search_regex(
|
||||
r'/lessons/(\d+)', lesson_url, 'lesson id', default=None)
|
||||
if not lesson_id:
|
||||
continue
|
||||
lesson_ids.add(lesson_id)
|
||||
|
||||
entries = []
|
||||
for lesson_id in sorted(lesson_ids):
|
||||
entries.append(self.url_result(
|
||||
smuggle_url(urljoin(url, lesson_id), {'force_video': True}),
|
||||
ie=RayWenderlichIE.ie_key()))
|
||||
|
||||
title = self._search_regex(
|
||||
r'class=["\']course-title[^>]+>([^<]+)', webpage, 'course title',
|
||||
default=None)
|
||||
|
||||
return self.playlist_result(entries, course_id, title)
|
||||
@@ -5,135 +5,93 @@ from .common import InfoExtractor
|
||||
from ..compat import compat_HTTPError
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
try_get,
|
||||
# unified_timestamp,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class RedBullTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?redbull\.tv/(?:video|film|live)/(?:AP-\w+/segment/)?(?P<id>AP-\w+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?redbull\.tv/video/(?P<id>AP-\w+)'
|
||||
_TESTS = [{
|
||||
# film
|
||||
'url': 'https://www.redbull.tv/video/AP-1Q756YYX51W11/abc-of-wrc',
|
||||
'url': 'https://www.redbull.tv/video/AP-1Q6XCDTAN1W11',
|
||||
'md5': 'fb0445b98aa4394e504b413d98031d1f',
|
||||
'info_dict': {
|
||||
'id': 'AP-1Q756YYX51W11',
|
||||
'id': 'AP-1Q6XCDTAN1W11',
|
||||
'ext': 'mp4',
|
||||
'title': 'ABC of...WRC',
|
||||
'title': 'ABC of... WRC - ABC of... S1E6',
|
||||
'description': 'md5:5c7ed8f4015c8492ecf64b6ab31e7d31',
|
||||
'duration': 1582.04,
|
||||
# 'timestamp': 1488405786,
|
||||
# 'upload_date': '20170301',
|
||||
},
|
||||
}, {
|
||||
# episode
|
||||
'url': 'https://www.redbull.tv/video/AP-1PMT5JCWH1W11/grime?playlist=shows:shows-playall:web',
|
||||
'url': 'https://www.redbull.tv/video/AP-1PMHKJFCW1W11',
|
||||
'info_dict': {
|
||||
'id': 'AP-1PMT5JCWH1W11',
|
||||
'id': 'AP-1PMHKJFCW1W11',
|
||||
'ext': 'mp4',
|
||||
'title': 'Grime - Hashtags S2 E4',
|
||||
'description': 'md5:334b741c8c1ce65be057eab6773c1cf5',
|
||||
'title': 'Grime - Hashtags S2E4',
|
||||
'description': 'md5:b5f522b89b72e1e23216e5018810bb25',
|
||||
'duration': 904.6,
|
||||
# 'timestamp': 1487290093,
|
||||
# 'upload_date': '20170217',
|
||||
'series': 'Hashtags',
|
||||
'season_number': 2,
|
||||
'episode_number': 4,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# segment
|
||||
'url': 'https://www.redbull.tv/live/AP-1R5DX49XS1W11/segment/AP-1QSAQJ6V52111/semi-finals',
|
||||
'info_dict': {
|
||||
'id': 'AP-1QSAQJ6V52111',
|
||||
'ext': 'mp4',
|
||||
'title': 'Semi Finals - Vans Park Series Pro Tour',
|
||||
'description': 'md5:306a2783cdafa9e65e39aa62f514fd97',
|
||||
'duration': 11791.991,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.redbull.tv/film/AP-1MSKKF5T92111/in-motion',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
session = self._download_json(
|
||||
'https://api-v2.redbull.tv/session', video_id,
|
||||
'https://api.redbull.tv/v3/session', video_id,
|
||||
note='Downloading access token', query={
|
||||
'build': '4.370.0',
|
||||
'category': 'personal_computer',
|
||||
'os_version': '1.0',
|
||||
'os_family': 'http',
|
||||
})
|
||||
if session.get('code') == 'error':
|
||||
raise ExtractorError('%s said: %s' % (
|
||||
self.IE_NAME, session['message']))
|
||||
auth = '%s %s' % (session.get('token_type', 'Bearer'), session['access_token'])
|
||||
token = session['token']
|
||||
|
||||
try:
|
||||
info = self._download_json(
|
||||
'https://api-v2.redbull.tv/content/%s' % video_id,
|
||||
video = self._download_json(
|
||||
'https://api.redbull.tv/v3/products/' + video_id,
|
||||
video_id, note='Downloading video information',
|
||||
headers={'Authorization': auth}
|
||||
headers={'Authorization': token}
|
||||
)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
|
||||
error_message = self._parse_json(
|
||||
e.cause.read().decode(), video_id)['message']
|
||||
e.cause.read().decode(), video_id)['error']
|
||||
raise ExtractorError('%s said: %s' % (
|
||||
self.IE_NAME, error_message), expected=True)
|
||||
raise
|
||||
|
||||
video = info['video_product']
|
||||
|
||||
title = info['title'].strip()
|
||||
title = video['title'].strip()
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
video['url'], video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls')
|
||||
'https://dms.redbull.tv/v3/%s/%s/playlist.m3u8' % (video_id, token),
|
||||
video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls')
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
for _, captions in (try_get(
|
||||
video, lambda x: x['attachments']['captions'],
|
||||
dict) or {}).items():
|
||||
if not captions or not isinstance(captions, list):
|
||||
continue
|
||||
for caption in captions:
|
||||
caption_url = caption.get('url')
|
||||
if not caption_url:
|
||||
continue
|
||||
ext = caption.get('format')
|
||||
if ext == 'xml':
|
||||
ext = 'ttml'
|
||||
subtitles.setdefault(caption.get('lang') or 'en', []).append({
|
||||
'url': caption_url,
|
||||
'ext': ext,
|
||||
})
|
||||
for resource in video.get('resources', []):
|
||||
if resource.startswith('closed_caption_'):
|
||||
splitted_resource = resource.split('_')
|
||||
if splitted_resource[2]:
|
||||
subtitles.setdefault('en', []).append({
|
||||
'url': 'https://resources.redbull.tv/%s/%s' % (video_id, resource),
|
||||
'ext': splitted_resource[2],
|
||||
})
|
||||
|
||||
subheading = info.get('subheading')
|
||||
subheading = video.get('subheading')
|
||||
if subheading:
|
||||
title += ' - %s' % subheading
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': info.get('long_description') or info.get(
|
||||
'description': video.get('long_description') or video.get(
|
||||
'short_description'),
|
||||
'duration': float_or_none(video.get('duration'), scale=1000),
|
||||
# 'timestamp': unified_timestamp(info.get('published')),
|
||||
'series': info.get('show_title'),
|
||||
'season_number': int_or_none(info.get('season_number')),
|
||||
'episode_number': int_or_none(info.get('episode_number')),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
@@ -15,7 +15,7 @@ class RedditIE(InfoExtractor):
|
||||
_TEST = {
|
||||
# from https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/
|
||||
'url': 'https://v.redd.it/zv89llsvexdz',
|
||||
'md5': '655d06ace653ea3b87bccfb1b27ec99d',
|
||||
'md5': '0a070c53eba7ec4534d95a5a1259e253',
|
||||
'info_dict': {
|
||||
'id': 'zv89llsvexdz',
|
||||
'ext': 'mp4',
|
||||
|
||||
@@ -16,12 +16,12 @@ class RedTubeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:www\.)?redtube\.com/|embed\.redtube\.com/\?.*?\bid=)(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.redtube.com/66418',
|
||||
'md5': '7b8c22b5e7098a3e1c09709df1126d2d',
|
||||
'md5': 'fc08071233725f26b8f014dba9590005',
|
||||
'info_dict': {
|
||||
'id': '66418',
|
||||
'ext': 'mp4',
|
||||
'title': 'Sucked on a toilet',
|
||||
'upload_date': '20120831',
|
||||
'upload_date': '20110811',
|
||||
'duration': 596,
|
||||
'view_count': int,
|
||||
'age_limit': 18,
|
||||
@@ -46,9 +46,10 @@ class RedTubeIE(InfoExtractor):
|
||||
raise ExtractorError('Video %s has been removed' % video_id, expected=True)
|
||||
|
||||
title = self._html_search_regex(
|
||||
(r'<h1 class="videoTitle[^"]*">(?P<title>.+?)</h1>',
|
||||
r'videoTitle\s*:\s*(["\'])(?P<title>)\1'),
|
||||
webpage, 'title', group='title')
|
||||
(r'<h(\d)[^>]+class="(?:video_title_text|videoTitle)[^"]*">(?P<title>(?:(?!\1).)+)</h\1>',
|
||||
r'(?:videoTitle|title)\s*:\s*(["\'])(?P<title>(?:(?!\1).)+)\1',),
|
||||
webpage, 'title', group='title',
|
||||
default=None) or self._og_search_title(webpage)
|
||||
|
||||
formats = []
|
||||
sources = self._parse_json(
|
||||
@@ -87,12 +88,14 @@ class RedTubeIE(InfoExtractor):
|
||||
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
upload_date = unified_strdate(self._search_regex(
|
||||
r'<span[^>]+class="added-time"[^>]*>ADDED ([^<]+)<',
|
||||
r'<span[^>]+>ADDED ([^<]+)<',
|
||||
webpage, 'upload date', fatal=False))
|
||||
duration = int_or_none(self._search_regex(
|
||||
r'videoDuration\s*:\s*(\d+)', webpage, 'duration', default=None))
|
||||
duration = int_or_none(self._og_search_property(
|
||||
'video:duration', webpage, default=None) or self._search_regex(
|
||||
r'videoDuration\s*:\s*(\d+)', webpage, 'duration', default=None))
|
||||
view_count = str_to_int(self._search_regex(
|
||||
r'<span[^>]*>VIEWS</span></td>\s*<td>([\d,.]+)',
|
||||
(r'<div[^>]*>Views</div>\s*<div[^>]*>\s*([\d,.]+)',
|
||||
r'<span[^>]*>VIEWS</span>\s*</td>\s*<td>\s*([\d,.]+)'),
|
||||
webpage, 'view count', fatal=False))
|
||||
|
||||
# No self-labeling, but they describe themselves as
|
||||
|
||||
@@ -5,8 +5,8 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class RestudyIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?restudy\.dk/video/play/id/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:(?:www|portal)\.)?restudy\.dk/video/[^/]+/id/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.restudy.dk/video/play/id/1637',
|
||||
'info_dict': {
|
||||
'id': '1637',
|
||||
@@ -18,7 +18,10 @@ class RestudyIE(InfoExtractor):
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'https://portal.restudy.dk/video/leiden-frosteffekt/id/1637',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
@@ -29,7 +32,7 @@ class RestudyIE(InfoExtractor):
|
||||
description = self._og_search_description(webpage).strip()
|
||||
|
||||
formats = self._extract_smil_formats(
|
||||
'https://www.restudy.dk/awsmedia/SmilDirectory/video_%s.xml' % video_id,
|
||||
'https://cdn.portal.restudy.dk/dynamic/themes/front/awsmedia/SmilDirectory/video_%s.xml' % video_id,
|
||||
video_id)
|
||||
self._sort_formats(formats)
|
||||
|
||||
|
||||
@@ -1,44 +0,0 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class RingTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?ringtv\.craveonline\.com/(?P<type>news|videos/video)/(?P<id>[^/?#]+)'
|
||||
_TEST = {
|
||||
'url': 'http://ringtv.craveonline.com/news/310833-luis-collazo-says-victor-ortiz-better-not-quit-on-jan-30',
|
||||
'md5': 'd25945f5df41cdca2d2587165ac28720',
|
||||
'info_dict': {
|
||||
'id': '857645',
|
||||
'ext': 'mp4',
|
||||
'title': 'Video: Luis Collazo says Victor Ortiz "better not quit on Jan. 30" - Ring TV',
|
||||
'description': 'Luis Collazo is excited about his Jan. 30 showdown with fellow former welterweight titleholder Victor Ortiz at Barclays Center in his hometown of Brooklyn. The SuperBowl week fight headlines a Golden Boy Live! card on Fox Sports 1.',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id').split('-')[0]
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
if mobj.group('type') == 'news':
|
||||
video_id = self._search_regex(
|
||||
r'''(?x)<iframe[^>]+src="http://cms\.springboardplatform\.com/
|
||||
embed_iframe/[0-9]+/video/([0-9]+)/''',
|
||||
webpage, 'real video ID')
|
||||
title = self._og_search_title(webpage)
|
||||
description = self._html_search_regex(
|
||||
r'addthis:description="([^"]+)"',
|
||||
webpage, 'description', fatal=False)
|
||||
final_url = 'http://ringtv.craveonline.springboardplatform.com/storage/ringtv.craveonline.com/conversion/%s.mp4' % video_id
|
||||
thumbnail_url = 'http://ringtv.craveonline.springboardplatform.com/storage/ringtv.craveonline.com/snapshots/%s.jpg' % video_id
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': final_url,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail_url,
|
||||
'description': description,
|
||||
}
|
||||
@@ -1,12 +1,12 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..aes import aes_cbc_decrypt
|
||||
from ..compat import (
|
||||
compat_b64decode,
|
||||
compat_ord,
|
||||
compat_str,
|
||||
)
|
||||
@@ -142,11 +142,11 @@ class RTL2YouIE(RTL2YouBaseIE):
|
||||
stream_data = self._download_json(
|
||||
self._BACKWERK_BASE_URL + 'stream/video/' + video_id, video_id)
|
||||
|
||||
data, iv = base64.b64decode(stream_data['streamUrl']).decode().split(':')
|
||||
data, iv = compat_b64decode(stream_data['streamUrl']).decode().split(':')
|
||||
stream_url = intlist_to_bytes(aes_cbc_decrypt(
|
||||
bytes_to_intlist(base64.b64decode(data)),
|
||||
bytes_to_intlist(compat_b64decode(data)),
|
||||
bytes_to_intlist(self._AES_KEY),
|
||||
bytes_to_intlist(base64.b64decode(iv))
|
||||
bytes_to_intlist(compat_b64decode(iv))
|
||||
))
|
||||
if b'rtl2_you_video_not_found' in stream_url:
|
||||
raise ExtractorError('video not found', expected=True)
|
||||
|
||||
@@ -93,58 +93,11 @@ class RtlNlIE(InfoExtractor):
|
||||
|
||||
meta = info.get('meta', {})
|
||||
|
||||
# m3u8 streams are encrypted and may not be handled properly by older ffmpeg/avconv.
|
||||
# To workaround this previously adaptive -> flash trick was used to obtain
|
||||
# unencrypted m3u8 streams (see https://github.com/rg3/youtube-dl/issues/4118)
|
||||
# and bypass georestrictions as well.
|
||||
# Currently, unencrypted m3u8 playlists are (intentionally?) invalid and therefore
|
||||
# unusable albeit can be fixed by simple string replacement (see
|
||||
# https://github.com/rg3/youtube-dl/pull/6337)
|
||||
# Since recent ffmpeg and avconv handle encrypted streams just fine encrypted
|
||||
# streams are used now.
|
||||
videopath = material['videopath']
|
||||
m3u8_url = meta.get('videohost', 'http://manifest.us.rtl.nl') + videopath
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
m3u8_url, uuid, 'mp4', m3u8_id='hls', fatal=False)
|
||||
|
||||
video_urlpart = videopath.split('/adaptive/')[1][:-5]
|
||||
PG_URL_TEMPLATE = 'http://pg.us.rtl.nl/rtlxl/network/%s/progressive/%s.mp4'
|
||||
|
||||
PG_FORMATS = (
|
||||
('a2t', 512, 288),
|
||||
('a3t', 704, 400),
|
||||
('nettv', 1280, 720),
|
||||
)
|
||||
|
||||
def pg_format(format_id, width, height):
|
||||
return {
|
||||
'url': PG_URL_TEMPLATE % (format_id, video_urlpart),
|
||||
'format_id': 'pg-%s' % format_id,
|
||||
'protocol': 'http',
|
||||
'width': width,
|
||||
'height': height,
|
||||
}
|
||||
|
||||
if not formats:
|
||||
formats = [pg_format(*pg_tuple) for pg_tuple in PG_FORMATS]
|
||||
else:
|
||||
pg_formats = []
|
||||
for format_id, width, height in PG_FORMATS:
|
||||
try:
|
||||
# Find hls format with the same width and height corresponding
|
||||
# to progressive format and copy metadata from it.
|
||||
f = next(f for f in formats if f.get('height') == height)
|
||||
# hls formats may have invalid width
|
||||
f['width'] = width
|
||||
f_copy = f.copy()
|
||||
f_copy.update(pg_format(format_id, width, height))
|
||||
pg_formats.append(f_copy)
|
||||
except StopIteration:
|
||||
# Missing hls format does mean that no progressive format with
|
||||
# such width and height exists either.
|
||||
pass
|
||||
formats.extend(pg_formats)
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnails = []
|
||||
|
||||
@@ -7,6 +7,7 @@ import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_b64decode,
|
||||
compat_struct_unpack,
|
||||
)
|
||||
from ..utils import (
|
||||
@@ -21,7 +22,7 @@ from ..utils import (
|
||||
|
||||
|
||||
def _decrypt_url(png):
|
||||
encrypted_data = base64.b64decode(png.encode('utf-8'))
|
||||
encrypted_data = compat_b64decode(png)
|
||||
text_index = encrypted_data.find(b'tEXt')
|
||||
text_chunk = encrypted_data[text_index - 4:]
|
||||
length = compat_struct_unpack('!I', text_chunk[:4])[0]
|
||||
|
||||
47
youtube_dl/extractor/rtvs.py
Normal file
47
youtube_dl/extractor/rtvs.py
Normal file
@@ -0,0 +1,47 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class RTVSIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?rtvs\.sk/(?:radio|televizia)/archiv/\d+/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
# radio archive
|
||||
'url': 'http://www.rtvs.sk/radio/archiv/11224/414872',
|
||||
'md5': '134d5d6debdeddf8a5d761cbc9edacb8',
|
||||
'info_dict': {
|
||||
'id': '414872',
|
||||
'ext': 'mp3',
|
||||
'title': 'Ostrov pokladov 1 časť.mp3'
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
# tv archive
|
||||
'url': 'http://www.rtvs.sk/televizia/archiv/8249/63118',
|
||||
'md5': '85e2c55cf988403b70cac24f5c086dc6',
|
||||
'info_dict': {
|
||||
'id': '63118',
|
||||
'ext': 'mp4',
|
||||
'title': 'Amaro Džives - Náš deň',
|
||||
'description': 'Galavečer pri príležitosti Medzinárodného dňa Rómov.'
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
playlist_url = self._search_regex(
|
||||
r'playlist["\']?\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
|
||||
'playlist url', group='url')
|
||||
|
||||
data = self._download_json(
|
||||
playlist_url, video_id, 'Downloading playlist')[0]
|
||||
return self._parse_jwplayer_data(data, video_id=video_id)
|
||||
@@ -53,6 +53,12 @@ class RuutuIE(InfoExtractor):
|
||||
'age_limit': 0,
|
||||
},
|
||||
},
|
||||
# Episode where <SourceFile> is "NOT-USED", but has other
|
||||
# downloadable sources available.
|
||||
{
|
||||
'url': 'http://www.ruutu.fi/video/3193728',
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -72,7 +78,7 @@ class RuutuIE(InfoExtractor):
|
||||
video_url = child.text
|
||||
if (not video_url or video_url in processed_urls or
|
||||
any(p in video_url for p in ('NOT_USED', 'NOT-USED'))):
|
||||
return
|
||||
continue
|
||||
processed_urls.append(video_url)
|
||||
ext = determine_ext(video_url)
|
||||
if ext == 'm3u8':
|
||||
|
||||
@@ -4,22 +4,30 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .brightcove import BrightcoveNewIE
|
||||
from ..utils import update_url_query
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
try_get,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
class SevenPlusIE(BrightcoveNewIE):
|
||||
IE_NAME = '7plus'
|
||||
_VALID_URL = r'https?://(?:www\.)?7plus\.com\.au/(?P<path>[^?]+\?.*?\bepisode-id=(?P<id>[^&#]+))'
|
||||
_TESTS = [{
|
||||
'url': 'https://7plus.com.au/BEAT?episode-id=BEAT-001',
|
||||
'url': 'https://7plus.com.au/MTYS?episode-id=MTYS7-003',
|
||||
'info_dict': {
|
||||
'id': 'BEAT-001',
|
||||
'id': 'MTYS7-003',
|
||||
'ext': 'mp4',
|
||||
'title': 'S1 E1 - Help / Lucy In The Sky With Diamonds',
|
||||
'description': 'md5:37718bea20a8eedaca7f7361af566131',
|
||||
'title': 'S7 E3 - Wind Surf',
|
||||
'description': 'md5:29c6a69f21accda7601278f81b46483d',
|
||||
'uploader_id': '5303576322001',
|
||||
'upload_date': '20171031',
|
||||
'timestamp': 1509440068,
|
||||
'upload_date': '20171201',
|
||||
'timestamp': 1512106377,
|
||||
'series': 'Mighty Ships',
|
||||
'season_number': 7,
|
||||
'episode_number': 3,
|
||||
'episode': 'Wind Surf',
|
||||
},
|
||||
'params': {
|
||||
'format': 'bestvideo',
|
||||
@@ -63,5 +71,14 @@ class SevenPlusIE(BrightcoveNewIE):
|
||||
value = item.get(src_key)
|
||||
if value:
|
||||
info[dst_key] = value
|
||||
info['series'] = try_get(
|
||||
item, lambda x: x['seriesLogo']['name'], compat_str)
|
||||
mobj = re.search(r'^S(\d+)\s+E(\d+)\s+-\s+(.+)$', info['title'])
|
||||
if mobj:
|
||||
info.update({
|
||||
'season_number': int(mobj.group(1)),
|
||||
'episode_number': int(mobj.group(2)),
|
||||
'episode': mobj.group(3),
|
||||
})
|
||||
|
||||
return info
|
||||
|
||||
169
youtube_dl/extractor/seznamzpravy.py
Normal file
169
youtube_dl/extractor/seznamzpravy.py
Normal file
@@ -0,0 +1,169 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_str,
|
||||
compat_urllib_parse_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
urljoin,
|
||||
int_or_none,
|
||||
parse_codecs,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
def _raw_id(src_url):
|
||||
return compat_urllib_parse_urlparse(src_url).path.split('/')[-1]
|
||||
|
||||
|
||||
class SeznamZpravyIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?seznamzpravy\.cz/iframe/player\?.*\bsrc='
|
||||
_TESTS = [{
|
||||
'url': 'https://www.seznamzpravy.cz/iframe/player?duration=241&serviceSlug=zpravy&src=https%3A%2F%2Fv39-a.sdn.szn.cz%2Fv_39%2Fvmd%2F5999c902ea707c67d8e267a9%3Ffl%3Dmdk%2C432f65a0%7C&itemType=video&autoPlay=false&title=Sv%C4%9Bt%20bez%20obalu%3A%20%C4%8Ce%C5%A1t%C3%AD%20voj%C3%A1ci%20na%20mis%C3%ADch%20(kr%C3%A1tk%C3%A1%20verze)&series=Sv%C4%9Bt%20bez%20obalu&serviceName=Seznam%20Zpr%C3%A1vy&poster=%2F%2Fd39-a.sdn.szn.cz%2Fd_39%2Fc_img_F_I%2FR5puJ.jpeg%3Ffl%3Dcro%2C0%2C0%2C1920%2C1080%7Cres%2C1200%2C%2C1%7Cjpg%2C80%2C%2C1&width=1920&height=1080&cutFrom=0&cutTo=0&splVersion=VOD&contentId=170889&contextId=35990&showAdvert=true&collocation=&autoplayPossible=true&embed=&isVideoTooShortForPreroll=false&isVideoTooLongForPostroll=true&videoCommentOpKey=&videoCommentId=&version=4.0.76&dotService=zpravy&gemiusPrismIdentifier=bVc1ZIb_Qax4W2v5xOPGpMeCP31kFfrTzj0SqPTLh_b.Z7&zoneIdPreroll=seznam.pack.videospot&skipOffsetPreroll=5§ionPrefixPreroll=%2Fzpravy',
|
||||
'info_dict': {
|
||||
'id': '170889',
|
||||
'ext': 'mp4',
|
||||
'title': 'Svět bez obalu: Čeští vojáci na misích (krátká verze)',
|
||||
'thumbnail': r're:^https?://.*\.jpe?g',
|
||||
'duration': 241,
|
||||
'series': 'Svět bez obalu',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# with Location key
|
||||
'url': 'https://www.seznamzpravy.cz/iframe/player?duration=null&serviceSlug=zpravy&src=https%3A%2F%2Flive-a.sdn.szn.cz%2Fv_39%2F59e468fe454f8472a96af9fa%3Ffl%3Dmdk%2C5c1e2840%7C&itemType=livevod&autoPlay=false&title=P%C5%99edseda%20KDU-%C4%8CSL%20Pavel%20B%C4%9Blobr%C3%A1dek%20ve%20volebn%C3%AD%20V%C3%BDzv%C4%9B%20Seznamu&series=V%C3%BDzva&serviceName=Seznam%20Zpr%C3%A1vy&poster=%2F%2Fd39-a.sdn.szn.cz%2Fd_39%2Fc_img_G_J%2FjTBCs.jpeg%3Ffl%3Dcro%2C0%2C0%2C1280%2C720%7Cres%2C1200%2C%2C1%7Cjpg%2C80%2C%2C1&width=16&height=9&cutFrom=0&cutTo=0&splVersion=VOD&contentId=185688&contextId=38489&showAdvert=true&collocation=&hideFullScreen=false&hideSubtitles=false&embed=&isVideoTooShortForPreroll=false&isVideoTooShortForPreroll2=false&isVideoTooLongForPostroll=false&fakePostrollZoneID=seznam.clanky.zpravy.preroll&fakePrerollZoneID=seznam.clanky.zpravy.preroll&videoCommentId=&trim=default_16x9&noPrerollVideoLength=30&noPreroll2VideoLength=undefined&noMidrollVideoLength=0&noPostrollVideoLength=999999&autoplayPossible=true&version=5.0.41&dotService=zpravy&gemiusPrismIdentifier=zD3g7byfW5ekpXmxTVLaq5Srjw5i4hsYo0HY1aBwIe..27&zoneIdPreroll=seznam.pack.videospot&skipOffsetPreroll=5§ionPrefixPreroll=%2Fzpravy%2Fvyzva&zoneIdPostroll=seznam.pack.videospot&skipOffsetPostroll=5§ionPrefixPostroll=%2Fzpravy%2Fvyzva®ression=false',
|
||||
'info_dict': {
|
||||
'id': '185688',
|
||||
'ext': 'mp4',
|
||||
'title': 'Předseda KDU-ČSL Pavel Bělobrádek ve volební Výzvě Seznamu',
|
||||
'thumbnail': r're:^https?://.*\.jpe?g',
|
||||
'series': 'Výzva',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return [
|
||||
mobj.group('url') for mobj in re.finditer(
|
||||
r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?seznamzpravy\.cz/iframe/player\?.*?)\1',
|
||||
webpage)]
|
||||
|
||||
def _extract_sdn_formats(self, sdn_url, video_id):
|
||||
sdn_data = self._download_json(sdn_url, video_id)
|
||||
|
||||
if sdn_data.get('Location'):
|
||||
sdn_url = sdn_data['Location']
|
||||
sdn_data = self._download_json(sdn_url, video_id)
|
||||
|
||||
formats = []
|
||||
mp4_formats = try_get(sdn_data, lambda x: x['data']['mp4'], dict) or {}
|
||||
for format_id, format_data in mp4_formats.items():
|
||||
relative_url = format_data.get('url')
|
||||
if not relative_url:
|
||||
continue
|
||||
|
||||
try:
|
||||
width, height = format_data.get('resolution')
|
||||
except (TypeError, ValueError):
|
||||
width, height = None, None
|
||||
|
||||
f = {
|
||||
'url': urljoin(sdn_url, relative_url),
|
||||
'format_id': 'http-%s' % format_id,
|
||||
'tbr': int_or_none(format_data.get('bandwidth'), scale=1000),
|
||||
'width': int_or_none(width),
|
||||
'height': int_or_none(height),
|
||||
}
|
||||
f.update(parse_codecs(format_data.get('codec')))
|
||||
formats.append(f)
|
||||
|
||||
pls = sdn_data.get('pls', {})
|
||||
|
||||
def get_url(format_id):
|
||||
return try_get(pls, lambda x: x[format_id]['url'], compat_str)
|
||||
|
||||
dash_rel_url = get_url('dash')
|
||||
if dash_rel_url:
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
urljoin(sdn_url, dash_rel_url), video_id, mpd_id='dash',
|
||||
fatal=False))
|
||||
|
||||
hls_rel_url = get_url('hls')
|
||||
if hls_rel_url:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
urljoin(sdn_url, hls_rel_url), video_id, ext='mp4',
|
||||
m3u8_id='hls', fatal=False))
|
||||
|
||||
self._sort_formats(formats)
|
||||
return formats
|
||||
|
||||
def _real_extract(self, url):
|
||||
params = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
|
||||
|
||||
src = params['src'][0]
|
||||
title = params['title'][0]
|
||||
video_id = params.get('contentId', [_raw_id(src)])[0]
|
||||
formats = self._extract_sdn_formats(src + 'spl2,2,VOD', video_id)
|
||||
|
||||
duration = int_or_none(params.get('duration', [None])[0])
|
||||
series = params.get('series', [None])[0]
|
||||
thumbnail = params.get('poster', [None])[0]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'series': series,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class SeznamZpravyArticleIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:seznam\.cz/zpravy|seznamzpravy\.cz)/clanek/(?:[^/?#&]+)-(?P<id>\d+)'
|
||||
_API_URL = 'https://apizpravy.seznam.cz/'
|
||||
|
||||
_TESTS = [{
|
||||
# two videos on one page, with SDN URL
|
||||
'url': 'https://www.seznamzpravy.cz/clanek/jejich-svet-na-nas-utoci-je-lepsi-branit-se-na-jejich-pisecku-rika-reziser-a-major-v-zaloze-marhoul-35990',
|
||||
'info_dict': {
|
||||
'id': '35990',
|
||||
'title': 'md5:6011c877a36905f28f271fcd8dcdb0f2',
|
||||
'description': 'md5:933f7b06fa337a814ba199d3596d27ba',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
}, {
|
||||
# video with live stream URL
|
||||
'url': 'https://www.seznam.cz/zpravy/clanek/znovu-do-vlady-s-ano-pavel-belobradek-ve-volebnim-specialu-seznamu-38489',
|
||||
'info_dict': {
|
||||
'id': '38489',
|
||||
'title': 'md5:8fa1afdc36fd378cf0eba2b74c5aca60',
|
||||
'description': 'md5:428e7926a1a81986ec7eb23078004fb4',
|
||||
},
|
||||
'playlist_count': 1,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
article_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, article_id)
|
||||
|
||||
info = self._search_json_ld(webpage, article_id, default={})
|
||||
|
||||
title = info.get('title') or self._og_search_title(webpage, fatal=False)
|
||||
description = info.get('description') or self._og_search_description(webpage)
|
||||
|
||||
return self.playlist_result([
|
||||
self.url_result(url, ie=SeznamZpravyIE.ie_key())
|
||||
for url in SeznamZpravyIE._extract_urls(webpage)],
|
||||
article_id, title, description)
|
||||
@@ -1,8 +1,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_b64decode
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
@@ -22,8 +21,8 @@ class SharedBaseIE(InfoExtractor):
|
||||
|
||||
video_url = self._extract_video_url(webpage, video_id, url)
|
||||
|
||||
title = base64.b64decode(self._html_search_meta(
|
||||
'full:title', webpage, 'title').encode('utf-8')).decode('utf-8')
|
||||
title = compat_b64decode(self._html_search_meta(
|
||||
'full:title', webpage, 'title')).decode('utf-8')
|
||||
filesize = int_or_none(self._html_search_meta(
|
||||
'full:size', webpage, 'file size', fatal=False))
|
||||
|
||||
@@ -92,5 +91,4 @@ class VivoIE(SharedBaseIE):
|
||||
r'InitializeStream\s*\(\s*(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||
webpage, 'stream', group='url'),
|
||||
video_id,
|
||||
transform_source=lambda x: base64.b64decode(
|
||||
x.encode('ascii')).decode('utf-8'))[0]
|
||||
transform_source=lambda x: compat_b64decode(x).decode('utf-8'))[0]
|
||||
|
||||
@@ -4,7 +4,11 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_str,
|
||||
compat_urllib_parse_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
@@ -48,6 +52,7 @@ class SixPlayIE(InfoExtractor):
|
||||
urls = []
|
||||
quality_key = qualities(['lq', 'sd', 'hq', 'hd'])
|
||||
formats = []
|
||||
subtitles = {}
|
||||
for asset in clip_data['assets']:
|
||||
asset_url = asset.get('full_physical_path')
|
||||
protocol = asset.get('protocol')
|
||||
@@ -56,8 +61,11 @@ class SixPlayIE(InfoExtractor):
|
||||
urls.append(asset_url)
|
||||
container = asset.get('video_container')
|
||||
ext = determine_ext(asset_url)
|
||||
if protocol == 'http_subtitle' or ext == 'vtt':
|
||||
subtitles.setdefault('fr', []).append({'url': asset_url})
|
||||
continue
|
||||
if container == 'm3u8' or ext == 'm3u8':
|
||||
if protocol == 'usp':
|
||||
if protocol == 'usp' and not compat_parse_qs(compat_urllib_parse_urlparse(asset_url).query).get('token', [None])[0]:
|
||||
asset_url = re.sub(r'/([^/]+)\.ism/[^/]*\.m3u8', r'/\1.ism/\1.m3u8', asset_url)
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
asset_url, video_id, 'mp4', 'm3u8_native',
|
||||
@@ -98,4 +106,5 @@ class SixPlayIE(InfoExtractor):
|
||||
'duration': int_or_none(clip_data.get('duration')),
|
||||
'series': get(lambda x: x['program']['title']),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
@@ -33,5 +33,8 @@ class SonyLIVIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
brightcove_id = self._match_id(url)
|
||||
return self.url_result(
|
||||
smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, {'geo_countries': ['IN']}),
|
||||
smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, {
|
||||
'geo_countries': ['IN'],
|
||||
'referrer': url,
|
||||
}),
|
||||
'BrightcoveNew', brightcove_id)
|
||||
|
||||
@@ -157,8 +157,7 @@ class SoundcloudIE(InfoExtractor):
|
||||
},
|
||||
]
|
||||
|
||||
_CLIENT_ID = 'DQskPX1pntALRzMp4HSxya3Mc0AO66Ro'
|
||||
_IPHONE_CLIENT_ID = '376f225bf427445fc4bfb6b99b72e0bf'
|
||||
_CLIENT_ID = 'LvWovRaJZlWCHql0bISuum8Bd2KX79mb'
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
|
||||
@@ -6,7 +6,7 @@ from .mtv import MTVServicesInfoExtractor
|
||||
|
||||
class SouthParkIE(MTVServicesInfoExtractor):
|
||||
IE_NAME = 'southpark.cc.com'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.cc\.com/(?:clips|(?:full-)?episodes)/(?P<id>.+?)(\?|#|$))'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.cc\.com/(?:clips|(?:full-)?episodes|collections)/(?P<id>.+?)(\?|#|$))'
|
||||
|
||||
_FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss'
|
||||
|
||||
@@ -20,6 +20,9 @@ class SouthParkIE(MTVServicesInfoExtractor):
|
||||
'timestamp': 1112760000,
|
||||
'upload_date': '20050406',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://southpark.cc.com/collections/7758/fan-favorites/1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
|
||||
@@ -41,7 +44,7 @@ class SouthParkEsIE(SouthParkIE):
|
||||
|
||||
class SouthParkDeIE(SouthParkIE):
|
||||
IE_NAME = 'southpark.de'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.de/(?:clips|alle-episoden)/(?P<id>.+?)(\?|#|$))'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.de/(?:clips|alle-episoden|collections)/(?P<id>.+?)(\?|#|$))'
|
||||
_FEED_URL = 'http://www.southpark.de/feeds/video-player/mrss/'
|
||||
|
||||
_TESTS = [{
|
||||
@@ -70,12 +73,15 @@ class SouthParkDeIE(SouthParkIE):
|
||||
'description': 'Kyle will mit seinem kleinen Bruder Ike Videospiele spielen. Als der nicht mehr mit ihm spielen will, hat Kyle Angst, dass er die Kids von heute nicht mehr versteht.',
|
||||
},
|
||||
'playlist_count': 3,
|
||||
}, {
|
||||
'url': 'http://www.southpark.de/collections/2476/superhero-showdown/1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
|
||||
class SouthParkNlIE(SouthParkIE):
|
||||
IE_NAME = 'southpark.nl'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.nl/(?:clips|(?:full-)?episodes)/(?P<id>.+?)(\?|#|$))'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.nl/(?:clips|(?:full-)?episodes|collections)/(?P<id>.+?)(\?|#|$))'
|
||||
_FEED_URL = 'http://www.southpark.nl/feeds/video-player/mrss/'
|
||||
|
||||
_TESTS = [{
|
||||
@@ -90,7 +96,7 @@ class SouthParkNlIE(SouthParkIE):
|
||||
|
||||
class SouthParkDkIE(SouthParkIE):
|
||||
IE_NAME = 'southparkstudios.dk'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<url>southparkstudios\.dk/(?:clips|full-episodes)/(?P<id>.+?)(\?|#|$))'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<url>southparkstudios\.(?:dk|nu)/(?:clips|full-episodes|collections)/(?P<id>.+?)(\?|#|$))'
|
||||
_FEED_URL = 'http://www.southparkstudios.dk/feeds/video-player/mrss/'
|
||||
|
||||
_TESTS = [{
|
||||
@@ -100,4 +106,10 @@ class SouthParkDkIE(SouthParkIE):
|
||||
'description': 'Butters is convinced he\'s living in a virtual reality.',
|
||||
},
|
||||
'playlist_mincount': 3,
|
||||
}, {
|
||||
'url': 'http://www.southparkstudios.dk/collections/2476/superhero-showdown/1',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.southparkstudios.nu/collections/2476/superhero-showdown/1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user