Compare commits

...

84 Commits

Author SHA1 Message Date
Sergey M․
0db492c02a release 2017.07.23 2017-07-23 01:09:09 +07:00
Sergey M․
425f41319a [ChangeLog] Actualize 2017-07-23 01:06:08 +07:00
Sergey M․
71dde5eecf [itv] Fix production id extraction (closes #13671) 2017-07-23 00:59:07 +07:00
Sergey M․
935d6c20c0 [vidio] Make duration non fatal and fix typo 2017-07-23 00:44:50 +07:00
Sergey M․
e0f1fb0a27 [mtv] Skip missing video parts (closes #13690) 2017-07-23 00:25:23 +07:00
Sergey M․
0017d9ad6d [YoutubeDL] Improve default format specification (closes #13704) 2017-07-23 00:12:01 +07:00
Sergey M․
327c8364f1 [sportbox:embed] Fix extraction 2017-07-22 21:35:14 +07:00
dubber0
359aa2fdd1 [npo] Add support for npo3.nl URLs 2017-07-22 19:15:55 +07:00
Sergey M․
f76c02c87b [dramafever] Fix tests 2017-07-22 11:41:40 +07:00
Sergey M․
7d9a1db111 [dramafever] Remove video id from title (closes #13699) 2017-07-22 11:40:46 +07:00
Sergey M․
0396806f67 [YoutubeDL] Do not override id, extractor and extractor_key in url_transparent
All these meta fields must be borrowed from final extractor that actually performs extraction.
This commit fixes extractor id in download archives for url_transparent downloads. Previously, 'transparent' extractor was erroneously
used for extractor archive id, e.g. 'eggheadlesson 4n8ugwwj5t' instead of 'wistia 4n8ugwwj5t'.
2017-07-21 00:13:32 +07:00
Sergey M․
dc6520aa3d [egghead:lesson] Add extractor (#6635) 2017-07-20 23:22:36 +07:00
Sergey M․
c653326a14 [funnyordie] Extract more metadata (closes #13677) 2017-07-20 22:50:56 +07:00
Yen Chi Hsuan
3fcf346ac1 [youku:show] Refine playlist extraction
Handle playlists that the initial page is not the first page
2017-07-20 23:20:46 +08:00
Yen Chi Hsuan
fa63cf6c23 [youku:show] Fix playlist extraction (closes #13248) 2017-07-20 22:57:51 +08:00
Yen Chi Hsuan
85f5a74b6c [tbs] Mark as broken and skip invalid tests 2017-07-20 21:19:09 +08:00
Yen Chi Hsuan
d20b1c6725 [dispeak] Recognize sevt subdomain (closes #13276) 2017-07-20 18:14:14 +08:00
Sergey M․
bb176df3bb [spiegel:article] Move test 2017-07-17 22:19:40 +07:00
Sergey M․
83d00044c1 [adn] Improve error reporting (#13663) 2017-07-16 20:50:32 +07:00
Sergey M․
7abed4e06c [crunchyroll] Relax series and season regex (closes #13659) 2017-07-16 12:40:45 +07:00
Sergey M․
13eb526f11 [nexx:embed] PEP 8 2017-07-16 05:23:19 +07:00
Sergey M․
00d06e3cfc [spiegel:article] Add support for nexx iframe embeds (closes #13029) 2017-07-16 04:38:20 +07:00
Sergey M․
749ca5eced [extractor/common] Fix playlist_from_matches 2017-07-16 04:33:14 +07:00
Sergey M․
3f59b0154a [nexx:embed] Add extractor for iframe embeds 2017-07-16 04:32:37 +07:00
Sergey M․
089b97cfee [nexx] Improve JS embed extraction 2017-07-16 04:30:48 +07:00
Sergey M․
decf86044d [pearvideo] Improve (closes #13031) 2017-07-16 03:06:04 +07:00
troywith77
94b817edeb [pearvideo] Add extractor 2017-07-16 03:02:31 +07:00
Sergey M․
cea931a9e5 release 2017.07.15 2017-07-15 07:36:05 +07:00
Sergey M․
ef78563e9c [ChangeLog] Actualize 2017-07-15 07:33:26 +07:00
Sergey M․
961ea474b6 [YoutubeDL] PEP 8 2017-07-15 07:02:57 +07:00
Sergey M․
ea3f20494f [youtube] PEP 8 2017-07-15 07:02:57 +07:00
Sergey M․
c7604d79e9 [spiegeltv] Delegate extraction to nexx (closes #13159) 2017-07-15 07:02:57 +07:00
Sergey M․
4e826cd9ae [nexx] Add extractor (closes #10807, closes #13465) 2017-07-15 07:02:57 +07:00
Robin Neatherway
2583c0b54e Fix bugs caused by typos 2017-07-14 23:08:32 +07:00
Sergey M․
7d02dcfaa2 [youtube] Don't capture YouTube Red ad for creator meta field (closes #13621) 2017-07-14 22:37:04 +07:00
satunnainen
00dbdfc1f7 [slideshare] Fix extraction 2017-07-14 22:11:07 +07:00
rrooij
f354d84807 [5tv] Add another video URL pattern (closes #13354) 2017-07-14 22:10:17 +07:00
Sergey M․
15da37c7dc [YoutubeDL] Don't expand env variables in meta fields (closes #13637) 2017-07-14 00:42:12 +07:00
Sergey M․
9a0942ad55 [drtv] Make HLS and HDS extraction non fatal 2017-07-11 22:59:56 +07:00
Sergey M․
f2bb33a986 [ted] Fix subtitles extraction (closes #13628, closes #13629) 2017-07-11 21:36:45 +07:00
Yen Chi Hsuan
3615bfe1b4 [twitter] Fix remaining tests 2017-07-11 16:46:37 +08:00
Yen Chi Hsuan
e8f20ffa03 [vine] Make sure the title won't be empty
And fix a relevant TwitterCard test case
2017-07-11 16:05:15 +08:00
Yen Chi Hsuan
9be31e771c [twitter] Support HLS streams in vmap URLs 2017-07-11 15:48:48 +08:00
Yen Chi Hsuan
7f176ac477 [periscope] Support pscp.tv URLs in embedded frames
And fix a relevant twitter test
2017-07-11 15:35:19 +08:00
Yen Chi Hsuan
2edfd745df [twitter] Extract mp4 urls via mobile API (closes #12726) 2017-07-11 15:19:36 +08:00
Yen Chi Hsuan
708f6f511e [niconico] Fix authentication error handling (closes #12486) 2017-07-11 15:04:45 +08:00
Yen Chi Hsuan
bb13949197 [niconico] Check login errors (#12486) 2017-07-11 15:03:11 +08:00
Yen Chi Hsuan
c3c94ca4a4 [giantbomb] Extract m3u8 formats (closes #13626) 2017-07-10 21:34:27 +08:00
Sergey M․
e3cd1fcdd1 [vlive:playlist] Relax and simplify 2017-07-10 04:32:24 +07:00
coreynicholson
b71c18b434 [vlive:playlist] Add extractor 2017-07-10 04:24:04 +07:00
Sergey M․
7bf539edcc [eagleplatform] Fix test 2017-07-10 00:14:41 +07:00
Sergey M․
65c416dda8 release 2017.07.09 2017-07-09 20:16:38 +07:00
Sergey M․
207acd8465 [ChangeLog] Actualize 2017-07-09 20:15:15 +07:00
Sergey M․
71a1db8919 [dailymail] Add support for embeds 2017-07-09 20:06:24 +07:00
Sergey M․
6e925598d6 [csjw] Add coding cookie 2017-07-09 19:18:12 +07:00
Sergey M․
73cf76a93f [joj] Rewrite and add support for generic embeds (closes #13268) 2017-07-09 19:17:54 +07:00
luboss
256a746d21 [joj] Add extractor 2017-07-09 19:17:38 +07:00
Sergey M․
58179eb7d9 [abc.net.au:iview] Extract more formats (closes #13492, closes #13489) 2017-07-09 17:55:40 +07:00
Sergey M․
485cb37576 [egghead:course] Improve (closes #13370) 2017-07-09 17:30:49 +07:00
Santiago Calcagno
ed84454d35 [egghead:course] Fix extraction 2017-07-09 17:30:25 +07:00
Sergey M․
a02682fd13 Keep in sync with ffmpeg's current malformed AAC bitstream wording (closes #13587) 2017-07-09 17:09:44 +07:00
Sergey M․
0d2f0b0357 [csjw] Make description optional 2017-07-09 17:05:11 +07:00
Sergey M․
c319d1c483 [csjw] Fix issues and improve extraction (closes #13525) 2017-07-09 17:01:05 +07:00
Christopher Smith
d2b9f362fa [cjsw] Add extractor 2017-07-09 17:01:00 +07:00
Sergey M․
4328ddf82b [extractor/common] Add support for AMP tags in _parse_html5_media_entries 2017-07-09 16:29:52 +07:00
Sergey M․
250b042c7e [generic] Add tests for #13557 2017-07-09 16:02:38 +07:00
Sergey M․
665e945246 [eagleplatform] Add support for referrer protected videos (closes #13557) 2017-07-09 15:57:58 +07:00
Sergey M․
5af2fd7fa0 [eagleplatform] Add support for another embed pattern (#13557) 2017-07-09 15:55:04 +07:00
mlindner
15237fcd51 [veoh] Extend _VALID_URL 2017-07-09 14:54:52 +07:00
rrooij
7a57730907 [npo:live] Fix live stream id extraction (closes #13568) 2017-07-09 14:21:40 +07:00
Sergey M․
8b347a389e [googledrive] Fix height extraction (closes #13603) 2017-07-09 00:26:13 +07:00
Sergey M․
a49804816c [dailymotion] Add support for new layout (close #13580) 2017-07-08 18:12:15 +07:00
Yen Chi Hsuan
eadd313321 [yam] Remove extractor
mymedia.yam.com is dead. An wikipedia user also pointed out that Yam's
blog service is no longer available. [1]

[1] https://zh.wikipedia.org/zh-tw/%E5%A4%A9%E7%A9%BA%E9%83%A8%E8%90%BD
2017-07-08 15:48:05 +08:00
Sergey M․
d852c6bc59 [xhamster] Extract all formats and fix duration extraction (#13593) 2017-07-07 22:49:11 +07:00
Sergey M․
00e5c36315 [xhamster] Add support for new URL schema (closes #13593) 2017-07-07 22:27:34 +07:00
Sergey M․
8a04ade86b Credit @parmjitv for #13322, #13503, #13541, #13549 2017-07-06 23:15:23 +07:00
Sergey M․
ab328411d5 Credit @orng for ruv (#13396) 2017-07-06 23:15:16 +07:00
Sergey M․
ddeff4be3f Credit @gfabiano for #13382, #13385, #13415 2017-07-06 23:15:09 +07:00
Parmjit Virk
60d4401c5e [espn] Extend _VALID_URL (fixes #13244) 2017-07-06 22:55:59 +07:00
Sergey M․
dee2ff1d81 [test_utils] Fix tests under Windows 2017-07-06 00:25:37 +07:00
Sergey M․
6554708252 [kaltura] Fix typo in subtitles extraction (closes #13569) 2017-07-05 23:20:50 +07:00
Sergey M․
0a2e1b2e30 [vier] Adapt extraction to redesign (#13575) 2017-07-05 22:52:47 +07:00
Yen Chi Hsuan
babbc04d45 [xuite] Move to the new HTML5 API and reduce # of requests 2017-07-05 23:27:12 +08:00
Yen Chi Hsuan
609ff8ca19 [utils] Support attributes with no values in get_elements_by_attribute() 2017-07-05 23:27:12 +08:00
59 changed files with 1654 additions and 639 deletions

View File

@@ -6,8 +6,8 @@
---
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.07.02*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.07.02**
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.07.23*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.07.23**
### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
[debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2017.07.02
[debug] youtube-dl version 2017.07.23
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {}

View File

@@ -220,3 +220,6 @@ gritstub
Adam Voss
Mike Fährmann
Jan Kundrát
Giuseppe Fabiano
Örn Guðjónsson
Parmjit Virk

View File

@@ -1,3 +1,80 @@
version 2017.07.23
Core
* [YoutubeDL] Improve default format specification (#13704)
* [YoutubeDL] Do not override id, extractor and extractor_key for
url_transparent entities
* [extractor/common] Fix playlist_from_matches
Extractors
* [itv] Fix production id extraction (#13671, #13703)
* [vidio] Make duration non fatal and fix typo
* [mtv] Skip missing video parts (#13690)
* [sportbox:embed] Fix extraction
+ [npo] Add support for npo3.nl URLs (#13695)
* [dramafever] Remove video id from title (#13699)
+ [egghead:lesson] Add support for lessons (#6635)
* [funnyordie] Extract more metadata (#13677)
* [youku:show] Fix playlist extraction (#13248)
+ [dispeak] Recognize sevt subdomain (#13276)
* [adn] Improve error reporting (#13663)
* [crunchyroll] Relax series and season regex (#13659)
+ [spiegel:article] Add support for nexx iframe embeds (#13029)
+ [nexx:embed] Add support for iframe embeds
* [nexx] Improve JS embed extraction
+ [pearvideo] Add support for pearvideo.com (#13031)
version 2017.07.15
Core
* [YoutubeDL] Don't expand environment variables in meta fields (#13637)
Extractors
* [spiegeltv] Delegate extraction to nexx extractor (#13159)
+ [nexx] Add support for nexx.cloud (#10807, #13465)
* [generic] Fix rutube embeds extraction (#13641)
* [karrierevideos] Fix title extraction (#13641)
* [youtube] Don't capture YouTube Red ad for creator meta field (#13621)
* [slideshare] Fix extraction (#13617)
+ [5tv] Add another video URL pattern (#13354, #13606)
* [drtv] Make HLS and HDS extraction non fatal
* [ted] Fix subtitles extraction (#13628, #13629)
* [vine] Make sure the title won't be empty
+ [twitter] Support HLS streams in vmap URLs
+ [periscope] Support pscp.tv URLs in embedded frames
* [twitter] Extract mp4 urls via mobile API (#12726)
* [niconico] Fix authentication error handling (#12486)
* [giantbomb] Extract m3u8 formats (#13626)
+ [vlive:playlist] Add support for playlists (#13613)
version 2017.07.09
Core
+ [extractor/common] Add support for AMP tags in _parse_html5_media_entries
+ [utils] Support attributes with no values in get_elements_by_attribute
Extractors
+ [dailymail] Add support for embeds
+ [joj] Add support for joj.sk (#13268)
* [abc.net.au:iview] Extract more formats (#13492, #13489)
* [egghead:course] Fix extraction (#6635, #13370)
+ [cjsw] Add support for cjsw.com (#13525)
+ [eagleplatform] Add support for referrer protected videos (#13557)
+ [eagleplatform] Add support for another embed pattern (#13557)
* [veoh] Extend URL regular expression (#13601)
* [npo:live] Fix live stream id extraction (#13568, #13605)
* [googledrive] Fix height extraction (#13603)
+ [dailymotion] Add support for new layout (#13580)
- [yam] Remove extractor
* [xhamster] Extract all formats and fix duration extraction (#13593)
+ [xhamster] Add support for new URL schema (#13593)
* [espn] Extend URL regular expression (#13244, #13549)
* [kaltura] Fix typo in subtitles extraction (#13569)
* [vier] Adapt extraction to redesign (#13575)
version 2017.07.02
Core

View File

@@ -42,7 +42,7 @@
- **Allocine**
- **AlphaPorno**
- **AMCNetworks**
- **anderetijden**: npo.nl and ntr.nl
- **anderetijden**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
- **AnimeOnDemand**
- **anitube.se**
- **Anvato**
@@ -154,6 +154,7 @@
- **chirbit**
- **chirbit:profile**
- **Cinchcast**
- **CJSW**
- **Clipfish**
- **cliphunter**
- **ClipRs**
@@ -237,6 +238,7 @@
- **EbaumsWorld**
- **EchoMsk**
- **egghead:course**: egghead.io course
- **egghead:lesson**: egghead.io lesson
- **eHow**
- **Einthusan**
- **eitb.tv**
@@ -369,6 +371,7 @@
- **Jamendo**
- **JamendoAlbum**
- **JeuxVideo**
- **Joj**
- **Jove**
- **jpopsuki.tv**
- **JWPlatform**
@@ -519,6 +522,8 @@
- **NextMedia**: 蘋果日報
- **NextMediaActionNews**: 蘋果日報 - 動新聞
- **NextTV**: 壹電視
- **Nexx**
- **NexxEmbed**
- **nfb**: National Film Board of Canada
- **nfl.com**
- **NhkVod**
@@ -549,7 +554,7 @@
- **NowTVList**
- **nowvideo**: NowVideo
- **Noz**
- **npo**: npo.nl and ntr.nl
- **npo**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
- **npo.nl:live**
- **npo.nl:radio**
- **npo.nl:radio:fragment**
@@ -593,6 +598,7 @@
- **Patreon**
- **pbs**: Public Broadcasting Service (PBS) and member stations: PBS: Public Broadcasting Service, APT - Alabama Public Television (WBIQ), GPB/Georgia Public Broadcasting (WGTV), Mississippi Public Broadcasting (WMPN), Nashville Public Television (WNPT), WFSU-TV (WFSU), WSRE (WSRE), WTCI (WTCI), WPBA/Channel 30 (WPBA), Alaska Public Media (KAKM), Arizona PBS (KAET), KNME-TV/Channel 5 (KNME), Vegas PBS (KLVX), AETN/ARKANSAS ETV NETWORK (KETS), KET (WKLE), WKNO/Channel 10 (WKNO), LPB/LOUISIANA PUBLIC BROADCASTING (WLPB), OETA (KETA), Ozarks Public Television (KOZK), WSIU Public Broadcasting (WSIU), KEET TV (KEET), KIXE/Channel 9 (KIXE), KPBS San Diego (KPBS), KQED (KQED), KVIE Public Television (KVIE), PBS SoCal/KOCE (KOCE), ValleyPBS (KVPT), CONNECTICUT PUBLIC TELEVISION (WEDH), KNPB Channel 5 (KNPB), SOPTV (KSYS), Rocky Mountain PBS (KRMA), KENW-TV3 (KENW), KUED Channel 7 (KUED), Wyoming PBS (KCWC), Colorado Public Television / KBDI 12 (KBDI), KBYU-TV (KBYU), Thirteen/WNET New York (WNET), WGBH/Channel 2 (WGBH), WGBY (WGBY), NJTV Public Media NJ (WNJT), WLIW21 (WLIW), mpt/Maryland Public Television (WMPB), WETA Television and Radio (WETA), WHYY (WHYY), PBS 39 (WLVT), WVPT - Your Source for PBS and More! (WVPT), Howard University Television (WHUT), WEDU PBS (WEDU), WGCU Public Media (WGCU), WPBT2 (WPBT), WUCF TV (WUCF), WUFT/Channel 5 (WUFT), WXEL/Channel 42 (WXEL), WLRN/Channel 17 (WLRN), WUSF Public Broadcasting (WUSF), ETV (WRLK), UNC-TV (WUNC), PBS Hawaii - Oceanic Cable Channel 10 (KHET), Idaho Public Television (KAID), KSPS (KSPS), OPB (KOPB), KWSU/Channel 10 & KTNW/Channel 31 (KWSU), WILL-TV (WILL), Network Knowledge - WSEC/Springfield (WSEC), WTTW11 (WTTW), Iowa Public Television/IPTV (KDIN), Nine Network (KETC), PBS39 Fort Wayne (WFWA), WFYI Indianapolis (WFYI), Milwaukee Public Television (WMVS), WNIN (WNIN), WNIT Public Television (WNIT), WPT (WPNE), WVUT/Channel 22 (WVUT), WEIU/Channel 51 (WEIU), WQPT-TV (WQPT), WYCC PBS Chicago (WYCC), WIPB-TV (WIPB), WTIU (WTIU), CET (WCET), ThinkTVNetwork (WPTD), WBGU-TV (WBGU), WGVU TV (WGVU), NET1 (KUON), Pioneer Public Television (KWCM), SDPB Television (KUSD), TPT (KTCA), KSMQ (KSMQ), KPTS/Channel 8 (KPTS), KTWU/Channel 11 (KTWU), East Tennessee PBS (WSJK), WCTE-TV (WCTE), WLJT, Channel 11 (WLJT), WOSU TV (WOSU), WOUB/WOUC (WOUB), WVPB (WVPB), WKYU-PBS (WKYU), KERA 13 (KERA), MPBN (WCBB), Mountain Lake PBS (WCFE), NHPTV (WENH), Vermont PBS (WETK), witf (WITF), WQED Multimedia (WQED), WMHT Educational Telecommunications (WMHT), Q-TV (WDCQ), WTVS Detroit Public TV (WTVS), CMU Public Television (WCMU), WKAR-TV (WKAR), WNMU-TV Public TV 13 (WNMU), WDSE - WRPT (WDSE), WGTE TV (WGTE), Lakeland Public Television (KAWE), KMOS-TV - Channels 6.1, 6.2 and 6.3 (KMOS), MontanaPBS (KUSM), KRWG/Channel 22 (KRWG), KACV (KACV), KCOS/Channel 13 (KCOS), WCNY/Channel 24 (WCNY), WNED (WNED), WPBS (WPBS), WSKG Public TV (WSKG), WXXI (WXXI), WPSU (WPSU), WVIA Public Media Studios (WVIA), WTVI (WTVI), Western Reserve PBS (WNEO), WVIZ/PBS ideastream (WVIZ), KCTS 9 (KCTS), Basin PBS (KPBT), KUHT / Channel 8 (KUHT), KLRN (KLRN), KLRU (KLRU), WTJX Channel 12 (WTJX), WCVE PBS (WCVE), KBTC Public Television (KBTC)
- **pcmag**
- **PearVideo**
- **People**
- **periscope**: Periscope
- **periscope:user**: Periscope user videos
@@ -769,7 +775,7 @@
- **tagesschau:player**
- **Tass**
- **TastyTrade**
- **TBS**
- **TBS** (Currently broken)
- **TDSLifeway**
- **teachertube**: teachertube.com videos
- **teachertube:user:collection**: teachertube.com user and collection videos
@@ -940,13 +946,14 @@
- **vk:wallpost**
- **vlive**
- **vlive:channel**
- **vlive:playlist**
- **Vodlocker**
- **VODPl**
- **VODPlatform**
- **VoiceRepublic**
- **VoxMedia**
- **Vporn**
- **vpro**: npo.nl and ntr.nl
- **vpro**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
- **Vrak**
- **VRT**: deredactie.be, sporza.be, cobra.be and cobra.canvas.be
- **vrv**
@@ -972,7 +979,7 @@
- **wholecloud**: WholeCloud
- **Wimp**
- **Wistia**
- **wnl**: npo.nl and ntr.nl
- **wnl**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
- **WorldStarHipHop**
- **wrzuta.pl**
- **wrzuta.pl:playlist**
@@ -996,7 +1003,6 @@
- **XVideos**
- **XXXYMovies**
- **Yahoo**: Yahoo screen and movies
- **Yam**: 蕃薯藤yam天空部落
- **yandexmusic:album**: Яндекс.Музыка - Альбом
- **yandexmusic:playlist**: Яндекс.Музыка - Плейлист
- **yandexmusic:track**: Яндекс.Музыка - Трек

View File

@@ -41,6 +41,7 @@ def _make_result(formats, **kwargs):
'id': 'testid',
'title': 'testttitle',
'extractor': 'testex',
'extractor_key': 'TestEx',
}
res.update(**kwargs)
return res
@@ -448,6 +449,17 @@ class TestFormatSelection(unittest.TestCase):
pass
self.assertEqual(ydl.downloaded_info_dicts, [])
def test_default_format_spec(self):
ydl = YDL({'simulate': True})
self.assertEqual(ydl._default_format_spec({}), 'bestvideo+bestaudio/best')
ydl = YDL({'outtmpl': '-'})
self.assertEqual(ydl._default_format_spec({}), 'best')
ydl = YDL({})
self.assertEqual(ydl._default_format_spec({}, download=False), 'bestvideo+bestaudio/best')
self.assertEqual(ydl._default_format_spec({'is_live': True}), 'best')
class TestYoutubeDL(unittest.TestCase):
def test_subtitles(self):
@@ -527,6 +539,8 @@ class TestYoutubeDL(unittest.TestCase):
'ext': 'mp4',
'width': None,
'height': 1080,
'title1': '$PATH',
'title2': '%PATH%',
}
def fname(templ):
@@ -545,10 +559,14 @@ class TestYoutubeDL(unittest.TestCase):
self.assertEqual(fname('%(height)0 6d.%(ext)s'), ' 01080.mp4')
self.assertEqual(fname('%(height)0 6d.%(ext)s'), ' 01080.mp4')
self.assertEqual(fname('%(height) 0 6d.%(ext)s'), ' 01080.mp4')
self.assertEqual(fname('%%'), '%')
self.assertEqual(fname('%%%%'), '%%')
self.assertEqual(fname('%%(height)06d.%(ext)s'), '%(height)06d.mp4')
self.assertEqual(fname('%(width)06d.%(ext)s'), 'NA.mp4')
self.assertEqual(fname('%(width)06d.%%(ext)s'), 'NA.%(ext)s')
self.assertEqual(fname('%%(width)06d.%(ext)s'), '%(width)06d.mp4')
self.assertEqual(fname('Hello %(title1)s'), 'Hello $PATH')
self.assertEqual(fname('Hello %(title2)s'), 'Hello %PATH%')
def test_format_note(self):
ydl = YoutubeDL()
@@ -755,7 +773,8 @@ class TestYoutubeDL(unittest.TestCase):
'_type': 'url_transparent',
'url': 'foo2:',
'ie_key': 'Foo2',
'title': 'foo1 title'
'title': 'foo1 title',
'id': 'foo1_id',
}
class Foo2IE(InfoExtractor):
@@ -781,6 +800,9 @@ class TestYoutubeDL(unittest.TestCase):
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['url'], TEST_URL)
self.assertEqual(downloaded['title'], 'foo1 title')
self.assertEqual(downloaded['id'], 'testid')
self.assertEqual(downloaded['extractor'], 'testex')
self.assertEqual(downloaded['extractor_key'], 'TestEx')
if __name__ == '__main__':

View File

@@ -98,6 +98,7 @@ from youtube_dl.compat import (
compat_chr,
compat_etree_fromstring,
compat_getenv,
compat_os_name,
compat_setenv,
compat_urlparse,
compat_parse_qs,
@@ -448,7 +449,9 @@ class TestUtil(unittest.TestCase):
def test_shell_quote(self):
args = ['ffmpeg', '-i', encodeFilename('ñ€ß\'.mp4')]
self.assertEqual(shell_quote(args), """ffmpeg -i 'ñ€ß'"'"'.mp4'""")
self.assertEqual(
shell_quote(args),
"""ffmpeg -i 'ñ€ß'"'"'.mp4'""" if compat_os_name != 'nt' else '''ffmpeg -i "ñ€ß'.mp4"''')
def test_str_to_int(self):
self.assertEqual(str_to_int('123,456'), 123456)
@@ -932,7 +935,7 @@ class TestUtil(unittest.TestCase):
def test_args_to_str(self):
self.assertEqual(
args_to_str(['foo', 'ba/r', '-baz', '2 be', '']),
'foo ba/r -baz \'2 be\' \'\''
'foo ba/r -baz \'2 be\' \'\'' if compat_os_name != 'nt' else 'foo ba/r -baz "2 be" ""'
)
def test_parse_filesize(self):
@@ -1228,6 +1231,12 @@ part 3</font></u>
self.assertEqual(get_element_by_attribute('class', 'foo', html), None)
self.assertEqual(get_element_by_attribute('class', 'no-such-foo', html), None)
html = '''
<div itemprop="author" itemscope>foo</div>
'''
self.assertEqual(get_element_by_attribute('itemprop', 'author', html), 'foo')
def test_get_elements_by_class(self):
html = '''
<span class="foo bar">nice</span><span class="foo bar">also nice</span>

View File

@@ -26,6 +26,8 @@ import tokenize
import traceback
import random
from string import ascii_letters
from .compat import (
compat_basestring,
compat_cookiejar,
@@ -674,7 +676,19 @@ class YoutubeDL(object):
FORMAT_RE.format(numeric_field),
r'%({0})s'.format(numeric_field), outtmpl)
filename = expand_path(outtmpl % template_dict)
# expand_path translates '%%' into '%' and '$$' into '$'
# correspondingly that is not what we want since we need to keep
# '%%' intact for template dict substitution step. Working around
# with boundary-alike separator hack.
sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
# outtmpl should be expand_path'ed before template dict substitution
# because meta fields may contain env variables we don't want to
# be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
# title "Hello $PATH", we don't want `$PATH` to be expanded.
filename = expand_path(outtmpl).replace(sep, '') % template_dict
# Temporary fix for #4787
# 'Treat' all problem characters by passing filename through preferredencoding
# to workaround encoding issues with subprocess on python2 @ Windows
@@ -846,7 +860,7 @@ class YoutubeDL(object):
force_properties = dict(
(k, v) for k, v in ie_result.items() if v is not None)
for f in ('_type', 'url', 'ie_key'):
for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
if f in force_properties:
del force_properties[f]
new_result = info.copy()
@@ -1050,6 +1064,25 @@ class YoutubeDL(object):
return op(actual_value, comparison_value)
return _filter
def _default_format_spec(self, info_dict, download=True):
req_format_list = []
def can_have_partial_formats():
if self.params.get('simulate', False):
return True
if not download:
return True
if self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-':
return False
if info_dict.get('is_live'):
return False
merger = FFmpegMergerPP(self)
return merger.available and merger.can_merge()
if can_have_partial_formats():
req_format_list.append('bestvideo+bestaudio')
req_format_list.append('best')
return '/'.join(req_format_list)
def build_format_selector(self, format_spec):
def syntax_error(note, start):
message = (
@@ -1520,14 +1553,10 @@ class YoutubeDL(object):
req_format = self.params.get('format')
if req_format is None:
req_format_list = []
if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
not info_dict.get('is_live')):
merger = FFmpegMergerPP(self)
if merger.available and merger.can_merge():
req_format_list.append('bestvideo+bestaudio')
req_format_list.append('best')
req_format = '/'.join(req_format_list)
req_format = self._default_format_spec(info_dict, download=download)
if self.params.get('verbose'):
self.to_stdout('[debug] Default format spec: %s' % req_format)
format_selector = self.build_format_selector(req_format)
# While in format selection we may need to have an access to the original
@@ -1890,7 +1919,7 @@ class YoutubeDL(object):
info_dict.get('protocol') == 'm3u8' and
self.params.get('hls_prefer_native')):
if fixup_policy == 'warn':
self.report_warning('%s: malformated aac bitstream.' % (
self.report_warning('%s: malformed AAC bitstream detected.' % (
info_dict['id']))
elif fixup_policy == 'detect_or_warn':
fixup_pp = FFmpegFixupM3u8PP(self)
@@ -1899,7 +1928,7 @@ class YoutubeDL(object):
info_dict['__postprocessors'].append(fixup_pp)
else:
self.report_warning(
'%s: malformated aac bitstream. %s'
'%s: malformed AAC bitstream detected. %s'
% (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
else:
assert fixup_policy in ('ignore', 'never')

View File

@@ -98,7 +98,7 @@ def write_piff_header(stream, params):
if is_audio:
smhd_payload = s88.pack(0) # balance
smhd_payload = u16.pack(0) # reserved
smhd_payload += u16.pack(0) # reserved
media_header_box = full_box(b'smhd', 0, 0, smhd_payload) # Sound Media Header
else:
vmhd_payload = u16.pack(0) # graphics mode
@@ -126,7 +126,6 @@ def write_piff_header(stream, params):
if fourcc == 'AACL':
sample_entry_box = box(b'mp4a', sample_entry_payload)
else:
sample_entry_payload = sample_entry_payload
sample_entry_payload += u16.pack(0) # pre defined
sample_entry_payload += u16.pack(0) # reserved
sample_entry_payload += u32.pack(0) * 3 # pre defined

View File

@@ -3,11 +3,13 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
ExtractorError,
js_to_json,
int_or_none,
parse_iso8601,
try_get,
)
@@ -124,7 +126,20 @@ class ABCIViewIE(InfoExtractor):
title = video_params.get('title') or video_params['seriesTitle']
stream = next(s for s in video_params['playlist'] if s.get('type') == 'program')
formats = self._extract_akamai_formats(stream['hds-unmetered'], video_id)
format_urls = [
try_get(stream, lambda x: x['hds-unmetered'], compat_str)]
# May have higher quality video
sd_url = try_get(
stream, lambda x: x['streams']['hds']['sd'], compat_str)
if sd_url:
format_urls.append(sd_url.replace('metered', 'um'))
formats = []
for format_url in format_urls:
if format_url:
formats.extend(
self._extract_akamai_formats(format_url, video_id))
self._sort_formats(formats)
subtitles = {}

View File

@@ -107,11 +107,13 @@ class ADNIE(InfoExtractor):
metas = options.get('metas') or {}
title = metas.get('title') or video_info['title']
links = player_config.get('links') or {}
error = None
if not links:
links_url = player_config['linksurl']
links_data = self._download_json(urljoin(
self._BASE_URL, links_url), video_id)
links = links_data.get('links') or {}
error = links_data.get('error')
formats = []
for format_id, qualities in links.items():
@@ -130,7 +132,8 @@ class ADNIE(InfoExtractor):
for f in m3u8_formats:
f['language'] = 'fr'
formats.extend(m3u8_formats)
error = options.get('error')
if not error:
error = options.get('error')
if not formats and error:
raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
self._sort_formats(formats)

View File

@@ -43,7 +43,7 @@ class AudioBoomIE(InfoExtractor):
def from_clip(field):
if clip:
clip.get(field)
return clip.get(field)
audio_url = from_clip('clipURLPriorToLoading') or self._og_search_property(
'audio', webpage, 'audio url')

View File

@@ -0,0 +1,72 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
determine_ext,
unescapeHTML,
)
class CJSWIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?cjsw\.com/program/(?P<program>[^/]+)/episode/(?P<id>\d+)'
_TESTS = [{
'url': 'http://cjsw.com/program/freshly-squeezed/episode/20170620',
'md5': 'cee14d40f1e9433632c56e3d14977120',
'info_dict': {
'id': '91d9f016-a2e7-46c5-8dcb-7cbcd7437c41',
'ext': 'mp3',
'title': 'Freshly Squeezed Episode June 20, 2017',
'description': 'md5:c967d63366c3898a80d0c7b0ff337202',
'series': 'Freshly Squeezed',
'episode_id': '20170620',
},
}, {
# no description
'url': 'http://cjsw.com/program/road-pops/episode/20170707/',
'only_matching': True,
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
program, episode_id = mobj.group('program', 'id')
audio_id = '%s/%s' % (program, episode_id)
webpage = self._download_webpage(url, episode_id)
title = unescapeHTML(self._search_regex(
(r'<h1[^>]+class=["\']episode-header__title["\'][^>]*>(?P<title>[^<]+)',
r'data-audio-title=(["\'])(?P<title>(?:(?!\1).)+)\1'),
webpage, 'title', group='title'))
audio_url = self._search_regex(
r'<button[^>]+data-audio-src=(["\'])(?P<url>(?:(?!\1).)+)\1',
webpage, 'audio url', group='url')
audio_id = self._search_regex(
r'/([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})\.mp3',
audio_url, 'audio id', default=audio_id)
formats = [{
'url': audio_url,
'ext': determine_ext(audio_url, 'mp3'),
'vcodec': 'none',
}]
description = self._html_search_regex(
r'<p>(?P<description>.+?)</p>', webpage, 'description',
default=None)
series = self._search_regex(
r'data-showname=(["\'])(?P<name>(?:(?!\1).)+)\1', webpage,
'series', default=program, group='name')
return {
'id': audio_id,
'title': title,
'description': description,
'formats': formats,
'series': series,
'episode_id': episode_id,
}

View File

@@ -730,12 +730,12 @@ class InfoExtractor(object):
video_info['title'] = video_title
return video_info
def playlist_from_matches(self, matches, video_id, video_title, getter=None, ie=None):
urlrs = orderedSet(
def playlist_from_matches(self, matches, playlist_id=None, playlist_title=None, getter=None, ie=None):
urls = orderedSet(
self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
for m in matches)
return self.playlist_result(
urlrs, playlist_id=video_id, playlist_title=video_title)
urls, playlist_id=playlist_id, playlist_title=playlist_title)
@staticmethod
def playlist_result(entries, playlist_id=None, playlist_title=None, playlist_description=None):
@@ -2132,15 +2132,18 @@ class InfoExtractor(object):
return is_plain_url, formats
entries = []
# amp-video and amp-audio are very similar to their HTML5 counterparts
# so we wll include them right here (see
# https://www.ampproject.org/docs/reference/components/amp-video)
media_tags = [(media_tag, media_type, '')
for media_tag, media_type
in re.findall(r'(?s)(<(video|audio)[^>]*/>)', webpage)]
in re.findall(r'(?s)(<(?:amp-)?(video|audio)[^>]*/>)', webpage)]
media_tags.extend(re.findall(
# We only allow video|audio followed by a whitespace or '>'.
# Allowing more characters may end up in significant slow down (see
# https://github.com/rg3/youtube-dl/issues/11979, example URL:
# http://www.porntrex.com/maps/videositemap.xml).
r'(?s)(<(?P<tag>video|audio)(?:\s+[^>]*)?>)(.*?)</(?P=tag)>', webpage))
r'(?s)(<(?P<tag>(?:amp-)?(?:video|audio))(?:\s+[^>]*)?>)(.*?)</(?P=tag)>', webpage))
for media_tag, media_type, media_content in media_tags:
media_info = {
'formats': [],

View File

@@ -510,7 +510,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
# webpage provide more accurate data than series_title from XML
series = self._html_search_regex(
r'id=["\']showmedia_about_episode_num[^>]+>\s*<a[^>]+>([^<]+)',
r'(?s)<h\d[^>]+\bid=["\']showmedia_about_episode_num[^>]+>(.+?)</h\d',
webpage, 'series', fatal=False)
season = xpath_text(metadata, 'series_title')
@@ -518,7 +518,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
episode_number = int_or_none(xpath_text(metadata, 'episode_number'))
season_number = int_or_none(self._search_regex(
r'(?s)<h4[^>]+id=["\']showmedia_about_episode_num[^>]+>.+?</h4>\s*<h4>\s*Season (\d+)',
r'(?s)<h\d[^>]+id=["\']showmedia_about_episode_num[^>]+>.+?</h\d>\s*<h4>\s*Season (\d+)',
webpage, 'season number', default=None))
return {

View File

@@ -1,6 +1,8 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
@@ -12,8 +14,8 @@ from ..utils import (
class DailyMailIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?dailymail\.co\.uk/video/[^/]+/video-(?P<id>[0-9]+)'
_TEST = {
_VALID_URL = r'https?://(?:www\.)?dailymail\.co\.uk/(?:video/[^/]+/video-|embed/video/)(?P<id>[0-9]+)'
_TESTS = [{
'url': 'http://www.dailymail.co.uk/video/tvshowbiz/video-1295863/The-Mountain-appears-sparkling-water-ad-Heavy-Bubbles.html',
'md5': 'f6129624562251f628296c3a9ffde124',
'info_dict': {
@@ -22,7 +24,16 @@ class DailyMailIE(InfoExtractor):
'title': 'The Mountain appears in sparkling water ad for \'Heavy Bubbles\'',
'description': 'md5:a93d74b6da172dd5dc4d973e0b766a84',
}
}
}, {
'url': 'http://www.dailymail.co.uk/embed/video/1295863.html',
'only_matching': True,
}]
@staticmethod
def _extract_urls(webpage):
return re.findall(
r'<iframe\b[^>]+\bsrc=["\'](?P<url>(?:https?:)?//(?:www\.)?dailymail\.co\.uk/embed/video/\d+\.html)',
webpage)
def _real_extract(self, url):
video_id = self._match_id(url)

View File

@@ -147,7 +147,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
view_count_str = self._search_regex(
(r'<meta[^>]+itemprop="interactionCount"[^>]+content="UserPlays:([\s\d,.]+)"',
r'video_views_count[^>]+>\s+([\s\d\,.]+)'),
webpage, 'view count', fatal=False)
webpage, 'view count', default=None)
if view_count_str:
view_count_str = re.sub(r'\s', '', view_count_str)
view_count = str_to_int(view_count_str)
@@ -159,7 +159,9 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
[r'buildPlayer\(({.+?})\);\n', # See https://github.com/rg3/youtube-dl/issues/7826
r'playerV5\s*=\s*dmp\.create\([^,]+?,\s*({.+?})\);',
r'buildPlayer\(({.+?})\);',
r'var\s+config\s*=\s*({.+?});'],
r'var\s+config\s*=\s*({.+?});',
# New layout regex (see https://github.com/rg3/youtube-dl/issues/13580)
r'__PLAYER_CONFIG__\s*=\s*({.+?});'],
webpage, 'player v5', default=None)
if player_v5:
player = self._parse_json(player_v5, video_id)

View File

@@ -13,7 +13,7 @@ from ..utils import (
class DigitallySpeakingIE(InfoExtractor):
_VALID_URL = r'https?://(?:evt\.dispeak|events\.digitallyspeaking)\.com/(?:[^/]+/)+xml/(?P<id>[^.]+)\.xml'
_VALID_URL = r'https?://(?:s?evt\.dispeak|events\.digitallyspeaking)\.com/(?:[^/]+/)+xml/(?P<id>[^.]+)\.xml'
_TESTS = [{
# From http://gdcvault.com/play/1023460/Tenacious-Design-and-The-Interface
@@ -28,6 +28,10 @@ class DigitallySpeakingIE(InfoExtractor):
# From http://www.gdcvault.com/play/1014631/Classic-Game-Postmortem-PAC
'url': 'http://events.digitallyspeaking.com/gdc/sf11/xml/12396_1299111843500GMPX.xml',
'only_matching': True,
}, {
# From http://www.gdcvault.com/play/1013700/Advanced-Material
'url': 'http://sevt.dispeak.com/ubm/gdc/eur10/xml/11256_1282118587281VNIT.xml',
'only_matching': True,
}]
def _parse_mp4(self, metadata):

View File

@@ -12,6 +12,7 @@ from ..utils import (
ExtractorError,
clean_html,
int_or_none,
remove_end,
sanitized_Request,
urlencode_postdata
)
@@ -72,15 +73,15 @@ class DramaFeverIE(DramaFeverBaseIE):
'url': 'http://www.dramafever.com/drama/4512/1/Cooking_with_Shin/',
'info_dict': {
'id': '4512.1',
'ext': 'mp4',
'title': 'Cooking with Shin 4512.1',
'ext': 'flv',
'title': 'Cooking with Shin',
'description': 'md5:a8eec7942e1664a6896fcd5e1287bfd0',
'episode': 'Episode 1',
'episode_number': 1,
'thumbnail': r're:^https?://.*\.jpg',
'timestamp': 1404336058,
'upload_date': '20140702',
'duration': 343,
'duration': 344,
},
'params': {
# m3u8 download
@@ -90,15 +91,15 @@ class DramaFeverIE(DramaFeverBaseIE):
'url': 'http://www.dramafever.com/drama/4826/4/Mnet_Asian_Music_Awards_2015/?ap=1',
'info_dict': {
'id': '4826.4',
'ext': 'mp4',
'title': 'Mnet Asian Music Awards 2015 4826.4',
'ext': 'flv',
'title': 'Mnet Asian Music Awards 2015',
'description': 'md5:3ff2ee8fedaef86e076791c909cf2e91',
'episode': 'Mnet Asian Music Awards 2015 - Part 3',
'episode_number': 4,
'thumbnail': r're:^https?://.*\.jpg',
'timestamp': 1450213200,
'upload_date': '20151215',
'duration': 5602,
'duration': 5359,
},
'params': {
# m3u8 download
@@ -122,6 +123,10 @@ class DramaFeverIE(DramaFeverBaseIE):
countries=self._GEO_COUNTRIES)
raise
# title is postfixed with video id for some reason, removing
if info.get('title'):
info['title'] = remove_end(info['title'], video_id).strip()
series_id, episode_number = video_id.split('.')
episode_info = self._download_json(
# We only need a single episode info, so restricting page size to one episode

View File

@@ -118,7 +118,7 @@ class DRTVIE(InfoExtractor):
if target == 'HDS':
f4m_formats = self._extract_f4m_formats(
uri + '?hdcore=3.3.0&plugin=aasp-3.3.0.99.43',
video_id, preference, f4m_id=format_id)
video_id, preference, f4m_id=format_id, fatal=False)
if kind == 'AudioResource':
for f in f4m_formats:
f['vcodec'] = 'none'
@@ -126,7 +126,8 @@ class DRTVIE(InfoExtractor):
elif target == 'HLS':
formats.extend(self._extract_m3u8_formats(
uri, video_id, 'mp4', entry_protocol='m3u8_native',
preference=preference, m3u8_id=format_id))
preference=preference, m3u8_id=format_id,
fatal=False))
else:
bitrate = link.get('Bitrate')
if bitrate:

View File

@@ -11,6 +11,7 @@ from ..compat import (
from ..utils import (
ExtractorError,
int_or_none,
unsmuggle_url,
)
@@ -50,6 +51,10 @@ class EaglePlatformIE(InfoExtractor):
'view_count': int,
},
'skip': 'Georestricted',
}, {
# referrer protected video (https://tvrain.ru/lite/teleshow/kak_vse_nachinalos/namin-418921/)
'url': 'eagleplatform:tvrainru.media.eagleplatform.com:582306',
'only_matching': True,
}]
@staticmethod
@@ -60,16 +65,40 @@ class EaglePlatformIE(InfoExtractor):
webpage)
if mobj is not None:
return mobj.group('url')
# Basic usage embedding (see http://dultonmedia.github.io/eplayer/)
PLAYER_JS_RE = r'''
<script[^>]+
src=(?P<qjs>["\'])(?:https?:)?//(?P<host>(?:(?!(?P=qjs)).)+\.media\.eagleplatform\.com)/player/player\.js(?P=qjs)
.+?
'''
# "Basic usage" embedding (see http://dultonmedia.github.io/eplayer/)
mobj = re.search(
r'''(?xs)
<script[^>]+
src=(?P<q1>["\'])(?:https?:)?//(?P<host>.+?\.media\.eagleplatform\.com)/player/player\.js(?P=q1)
.+?
%s
<div[^>]+
class=(?P<q2>["\'])eagleplayer(?P=q2)[^>]+
class=(?P<qclass>["\'])eagleplayer(?P=qclass)[^>]+
data-id=["\'](?P<id>\d+)
''', webpage)
''' % PLAYER_JS_RE, webpage)
if mobj is not None:
return 'eagleplatform:%(host)s:%(id)s' % mobj.groupdict()
# Generalization of "Javascript code usage", "Combined usage" and
# "Usage without attaching to DOM" embeddings (see
# http://dultonmedia.github.io/eplayer/)
mobj = re.search(
r'''(?xs)
%s
<script>
.+?
new\s+EaglePlayer\(
(?:[^,]+\s*,\s*)?
{
.+?
\bid\s*:\s*["\']?(?P<id>\d+)
.+?
}
\s*\)
.+?
</script>
''' % PLAYER_JS_RE, webpage)
if mobj is not None:
return 'eagleplatform:%(host)s:%(id)s' % mobj.groupdict()
@@ -79,9 +108,10 @@ class EaglePlatformIE(InfoExtractor):
if status != 200:
raise ExtractorError(' '.join(response['errors']), expected=True)
def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata', *args, **kwargs):
def _download_json(self, url_or_request, video_id, *args, **kwargs):
try:
response = super(EaglePlatformIE, self)._download_json(url_or_request, video_id, note)
response = super(EaglePlatformIE, self)._download_json(
url_or_request, video_id, *args, **kwargs)
except ExtractorError as ee:
if isinstance(ee.cause, compat_HTTPError):
response = self._parse_json(ee.cause.read().decode('utf-8'), video_id)
@@ -93,11 +123,24 @@ class EaglePlatformIE(InfoExtractor):
return self._download_json(url_or_request, video_id, note)['data'][0]
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
mobj = re.match(self._VALID_URL, url)
host, video_id = mobj.group('custom_host') or mobj.group('host'), mobj.group('id')
headers = {}
query = {
'id': video_id,
}
referrer = smuggled_data.get('referrer')
if referrer:
headers['Referer'] = referrer
query['referrer'] = referrer
player_data = self._download_json(
'http://%s/api/player_data?id=%s' % (host, video_id), video_id)
'http://%s/api/player_data' % host, video_id,
headers=headers, query=query)
media = player_data['data']['playlist']['viewports'][0]['medialist'][0]

View File

@@ -1,15 +1,18 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
int_or_none,
try_get,
unified_timestamp,
)
class EggheadCourseIE(InfoExtractor):
IE_DESC = 'egghead.io course'
IE_NAME = 'egghead:course'
_VALID_URL = r'https://egghead\.io/courses/(?P<id>[a-zA-Z_0-9-]+)'
_VALID_URL = r'https://egghead\.io/courses/(?P<id>[^/?#&]+)'
_TEST = {
'url': 'https://egghead.io/courses/professor-frisby-introduces-composable-functional-javascript',
'playlist_count': 29,
@@ -22,18 +25,60 @@ class EggheadCourseIE(InfoExtractor):
def _real_extract(self, url):
playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
title = self._html_search_regex(r'<h1 class="title">([^<]+)</h1>', webpage, 'title')
ul = self._search_regex(r'(?s)<ul class="series-lessons-list">(.*?)</ul>', webpage, 'session list')
course = self._download_json(
'https://egghead.io/api/v1/series/%s' % playlist_id, playlist_id)
found = re.findall(r'(?s)<a class="[^"]*"\s*href="([^"]+)">\s*<li class="item', ul)
entries = [self.url_result(m) for m in found]
entries = [
self.url_result(
'wistia:%s' % lesson['wistia_id'], ie='Wistia',
video_id=lesson['wistia_id'], video_title=lesson.get('title'))
for lesson in course['lessons'] if lesson.get('wistia_id')]
return self.playlist_result(
entries, playlist_id, course.get('title'),
course.get('description'))
class EggheadLessonIE(InfoExtractor):
IE_DESC = 'egghead.io lesson'
IE_NAME = 'egghead:lesson'
_VALID_URL = r'https://egghead\.io/lessons/(?P<id>[^/?#&]+)'
_TEST = {
'url': 'https://egghead.io/lessons/javascript-linear-data-flow-with-container-style-types-box',
'info_dict': {
'id': 'fv5yotjxcg',
'ext': 'mp4',
'title': 'Create linear data flow with container style types (Box)',
'description': 'md5:9aa2cdb6f9878ed4c39ec09e85a8150e',
'thumbnail': r're:^https?:.*\.jpg$',
'timestamp': 1481296768,
'upload_date': '20161209',
'duration': 304,
'view_count': 0,
'tags': ['javascript', 'free'],
},
'params': {
'skip_download': True,
},
}
def _real_extract(self, url):
lesson_id = self._match_id(url)
lesson = self._download_json(
'https://egghead.io/api/v1/lessons/%s' % lesson_id, lesson_id)
return {
'_type': 'playlist',
'id': playlist_id,
'title': title,
'description': self._og_search_description(webpage),
'entries': entries,
'_type': 'url_transparent',
'ie_key': 'Wistia',
'url': 'wistia:%s' % lesson['wistia_id'],
'id': lesson['wistia_id'],
'title': lesson.get('title'),
'description': lesson.get('summary'),
'thumbnail': lesson.get('thumb_nail'),
'timestamp': unified_timestamp(lesson.get('published_at')),
'duration': int_or_none(lesson.get('duration')),
'view_count': int_or_none(lesson.get('plays_count')),
'tags': try_get(lesson, lambda x: x['tag_list'], list),
}

View File

@@ -10,7 +10,25 @@ from ..utils import (
class ESPNIE(InfoExtractor):
_VALID_URL = r'https?://(?:espn\.go|(?:www\.)?espn)\.com/video/clip(?:\?.*?\bid=|/_/id/)(?P<id>\d+)'
_VALID_URL = r'''(?x)
https?://
(?:
(?:(?:\w+\.)+)?espn\.go|
(?:www\.)?espn
)\.com/
(?:
(?:
video/clip|
watch/player
)
(?:
\?.*?\bid=|
/_/id/
)
)
(?P<id>\d+)
'''
_TESTS = [{
'url': 'http://espn.go.com/video/clip?id=10365079',
'info_dict': {
@@ -25,20 +43,34 @@ class ESPNIE(InfoExtractor):
'skip_download': True,
},
}, {
# intl video, from http://www.espnfc.us/video/mls-highlights/150/video/2743663/must-see-moments-best-of-the-mls-season
'url': 'http://espn.go.com/video/clip?id=2743663',
'url': 'https://broadband.espn.go.com/video/clip?id=18910086',
'info_dict': {
'id': '2743663',
'id': '18910086',
'ext': 'mp4',
'title': 'Must-See Moments: Best of the MLS season',
'description': 'md5:4c2d7232beaea572632bec41004f0aeb',
'timestamp': 1449446454,
'upload_date': '20151207',
'title': 'Kyrie spins around defender for two',
'description': 'md5:2b0f5bae9616d26fba8808350f0d2b9b',
'timestamp': 1489539155,
'upload_date': '20170315',
},
'params': {
'skip_download': True,
},
'expected_warnings': ['Unable to download f4m manifest'],
}, {
'url': 'http://nonredline.sports.espn.go.com/video/clip?id=19744672',
'only_matching': True,
}, {
'url': 'https://cdn.espn.go.com/video/clip/_/id/19771774',
'only_matching': True,
}, {
'url': 'http://www.espn.com/watch/player?id=19141491',
'only_matching': True,
}, {
'url': 'http://www.espn.com/watch/player?bucketId=257&id=19505875',
'only_matching': True,
}, {
'url': 'http://www.espn.com/watch/player/_/id/19141491',
'only_matching': True,
}, {
'url': 'http://www.espn.com/video/clip?id=10365079',
'only_matching': True,

View File

@@ -185,6 +185,7 @@ from .chirbit import (
ChirbitProfileIE,
)
from .cinchcast import CinchcastIE
from .cjsw import CJSWIE
from .clipfish import ClipfishIE
from .cliphunter import CliphunterIE
from .cliprs import ClipRsIE
@@ -297,7 +298,10 @@ from .dw import (
from .eagleplatform import EaglePlatformIE
from .ebaumsworld import EbaumsWorldIE
from .echomsk import EchoMskIE
from .egghead import EggheadCourseIE
from .egghead import (
EggheadCourseIE,
EggheadLessonIE,
)
from .ehow import EHowIE
from .eighttracks import EightTracksIE
from .einthusan import EinthusanIE
@@ -469,6 +473,7 @@ from .jamendo import (
)
from .jeuxvideo import JeuxVideoIE
from .jove import JoveIE
from .joj import JojIE
from .jwplatform import JWPlatformIE
from .jpopsukitv import JpopsukiIE
from .kaltura import KalturaIE
@@ -651,6 +656,10 @@ from .nextmedia import (
AppleDailyIE,
NextTVIE,
)
from .nexx import (
NexxIE,
NexxEmbedIE,
)
from .nfb import NFBIE
from .nfl import NFLIE
from .nhk import NhkVodIE
@@ -759,6 +768,7 @@ from .pandoratv import PandoraTVIE
from .parliamentliveuk import ParliamentLiveUKIE
from .patreon import PatreonIE
from .pbs import PBSIE
from .pearvideo import PearVideoIE
from .people import PeopleIE
from .periscope import (
PeriscopeIE,
@@ -1204,7 +1214,8 @@ from .vk import (
)
from .vlive import (
VLiveIE,
VLiveChannelIE
VLiveChannelIE,
VLivePlaylistIE
)
from .vodlocker import VodlockerIE
from .vodpl import VODPlIE
@@ -1280,7 +1291,6 @@ from .yahoo import (
YahooIE,
YahooSearchIE,
)
from .yam import YamIE
from .yandexmusic import (
YandexMusicTrackIE,
YandexMusicAlbumIE,

View File

@@ -43,7 +43,7 @@ class FiveTVIE(InfoExtractor):
'info_dict': {
'id': 'glavnoe',
'ext': 'mp4',
'title': 'Итоги недели с 8 по 14 июня 2015 года',
'title': r're:^Итоги недели с \d+ по \d+ \w+ \d{4} года$',
'thumbnail': r're:^https?://.*\.jpg$',
},
}, {
@@ -70,7 +70,8 @@ class FiveTVIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
video_url = self._search_regex(
r'<a[^>]+?href="([^"]+)"[^>]+?class="videoplayer"',
[r'<div[^>]+?class="flowplayer[^>]+?data-href="([^"]+)"',
r'<a[^>]+?href="([^"]+)"[^>]+?class="videoplayer"'],
webpage, 'video url')
title = self._og_search_title(webpage, default=None) or self._search_regex(

View File

@@ -1,10 +1,14 @@
from __future__ import unicode_literals
import json
import re
from .common import InfoExtractor
from ..utils import ExtractorError
from ..utils import (
ExtractorError,
float_or_none,
int_or_none,
unified_timestamp,
)
class FunnyOrDieIE(InfoExtractor):
@@ -18,6 +22,10 @@ class FunnyOrDieIE(InfoExtractor):
'title': 'Heart-Shaped Box: Literal Video Version',
'description': 'md5:ea09a01bc9a1c46d9ab696c01747c338',
'thumbnail': r're:^http:.*\.jpg$',
'uploader': 'DASjr',
'timestamp': 1317904928,
'upload_date': '20111006',
'duration': 318.3,
},
}, {
'url': 'http://www.funnyordie.com/embed/e402820827',
@@ -27,6 +35,8 @@ class FunnyOrDieIE(InfoExtractor):
'title': 'Please Use This Song (Jon Lajoie)',
'description': 'Please use this to sell something. www.jonlajoie.com',
'thumbnail': r're:^http:.*\.jpg$',
'timestamp': 1398988800,
'upload_date': '20140502',
},
'params': {
'skip_download': True,
@@ -100,15 +110,53 @@ class FunnyOrDieIE(InfoExtractor):
'url': 'http://www.funnyordie.com%s' % src,
}]
post_json = self._search_regex(
r'fb_post\s*=\s*(\{.*?\});', webpage, 'post details')
post = json.loads(post_json)
timestamp = unified_timestamp(self._html_search_meta(
'uploadDate', webpage, 'timestamp', default=None))
uploader = self._html_search_regex(
r'<h\d[^>]+\bclass=["\']channel-preview-name[^>]+>(.+?)</h',
webpage, 'uploader', default=None)
title, description, thumbnail, duration = [None] * 4
medium = self._parse_json(
self._search_regex(
r'jsonMedium\s*=\s*({.+?});', webpage, 'JSON medium',
default='{}'),
video_id, fatal=False)
if medium:
title = medium.get('title')
duration = float_or_none(medium.get('duration'))
if not timestamp:
timestamp = unified_timestamp(medium.get('publishDate'))
post = self._parse_json(
self._search_regex(
r'fb_post\s*=\s*(\{.*?\});', webpage, 'post details',
default='{}'),
video_id, fatal=False)
if post:
if not title:
title = post.get('name')
description = post.get('description')
thumbnail = post.get('picture')
if not title:
title = self._og_search_title(webpage)
if not description:
description = self._og_search_description(webpage)
if not duration:
duration = int_or_none(self._html_search_meta(
('video:duration', 'duration'), webpage, 'duration', default=False))
return {
'id': video_id,
'title': post['name'],
'description': post.get('description'),
'thumbnail': post.get('picture'),
'title': title,
'description': description,
'thumbnail': thumbnail,
'uploader': uploader,
'timestamp': timestamp,
'duration': duration,
'formats': formats,
'subtitles': subtitles,
}

View File

@@ -36,6 +36,10 @@ from .brightcove import (
BrightcoveLegacyIE,
BrightcoveNewIE,
)
from .nexx import (
NexxIE,
NexxEmbedIE,
)
from .nbc import NBCSportsVPlayerIE
from .ooyala import OoyalaIE
from .rutv import RUTVIE
@@ -57,6 +61,7 @@ from .dailymotion import (
DailymotionIE,
DailymotionCloudIE,
)
from .dailymail import DailyMailIE
from .onionstudios import OnionStudiosIE
from .viewlift import ViewLiftEmbedIE
from .mtv import MTVServicesEmbeddedIE
@@ -91,6 +96,7 @@ from .anvato import AnvatoIE
from .washingtonpost import WashingtonPostIE
from .wistia import WistiaIE
from .mediaset import MediasetIE
from .joj import JojIE
class GenericIE(InfoExtractor):
@@ -759,6 +765,20 @@ class GenericIE(InfoExtractor):
},
'add_ie': ['Dailymotion'],
},
# DailyMail embed
{
'url': 'http://www.bumm.sk/krimi/2017/07/05/biztonsagi-kamera-buktatta-le-az-agg-ferfit-utlegelo-apolot',
'info_dict': {
'id': '1495629',
'ext': 'mp4',
'title': 'Care worker punches elderly dementia patient in head 11 times',
'description': 'md5:3a743dee84e57e48ec68bf67113199a5',
},
'add_ie': ['DailyMail'],
'params': {
'skip_download': True,
},
},
# YouTube embed
{
'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html',
@@ -1185,7 +1205,7 @@ class GenericIE(InfoExtractor):
},
'add_ie': ['Kaltura'],
},
# Eagle.Platform embed (generic URL)
# EaglePlatform embed (generic URL)
{
'url': 'http://lenta.ru/news/2015/03/06/navalny/',
# Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
@@ -1199,8 +1219,26 @@ class GenericIE(InfoExtractor):
'view_count': int,
'age_limit': 0,
},
'params': {
'skip_download': True,
},
},
# ClipYou (Eagle.Platform) embed (custom URL)
# referrer protected EaglePlatform embed
{
'url': 'https://tvrain.ru/lite/teleshow/kak_vse_nachinalos/namin-418921/',
'info_dict': {
'id': '582306',
'ext': 'mp4',
'title': 'Стас Намин: «Мы нарушили девственность Кремля»',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 3382,
'view_count': int,
},
'params': {
'skip_download': True,
},
},
# ClipYou (EaglePlatform) embed (custom URL)
{
'url': 'http://muz-tv.ru/play/7129/',
# Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
@@ -1212,6 +1250,9 @@ class GenericIE(InfoExtractor):
'duration': 216,
'view_count': int,
},
'params': {
'skip_download': True,
},
},
# Pladform embed
{
@@ -1512,6 +1553,22 @@ class GenericIE(InfoExtractor):
},
'add_ie': ['BrightcoveLegacy'],
},
# Nexx embed
{
'url': 'https://www.funk.net/serien/5940e15073f6120001657956/items/593efbb173f6120001657503',
'info_dict': {
'id': '247746',
'ext': 'mp4',
'title': "Yesterday's Jam (OV)",
'description': 'md5:09bc0984723fed34e2581624a84e05f0',
'timestamp': 1492594816,
'upload_date': '20170419',
},
'params': {
'format': 'bestvideo',
'skip_download': True,
},
},
# Facebook <iframe> embed
{
'url': 'https://www.hostblogger.de/blog/archives/6181-Auto-jagt-Betonmischer.html',
@@ -1749,6 +1806,26 @@ class GenericIE(InfoExtractor):
},
'add_ie': [MediasetIE.ie_key()],
},
{
# JOJ.sk embeds
'url': 'https://www.noviny.sk/slovensko/238543-slovenskom-sa-prehnala-vlna-silnych-burok',
'info_dict': {
'id': '238543-slovenskom-sa-prehnala-vlna-silnych-burok',
'title': 'Slovenskom sa prehnala vlna silných búrok',
},
'playlist_mincount': 5,
'add_ie': [JojIE.ie_key()],
},
{
# AMP embed (see https://www.ampproject.org/docs/reference/components/amp-video)
'url': 'https://tvrain.ru/amp/418921/',
'md5': 'cc00413936695987e8de148b67d14f1d',
'info_dict': {
'id': '418921',
'ext': 'mp4',
'title': 'Стас Намин: «Мы нарушили девственность Кремля»',
},
},
# {
# # TODO: find another test
# # http://schema.org/VideoObject
@@ -2076,6 +2153,16 @@ class GenericIE(InfoExtractor):
if bc_urls:
return self.playlist_from_matches(bc_urls, video_id, video_title, ie='BrightcoveNew')
# Look for Nexx embeds
nexx_urls = NexxIE._extract_urls(webpage)
if nexx_urls:
return self.playlist_from_matches(nexx_urls, video_id, video_title, ie=NexxIE.ie_key())
# Look for Nexx iFrame embeds
nexx_embed_urls = NexxEmbedIE._extract_urls(webpage)
if nexx_embed_urls:
return self.playlist_from_matches(nexx_embed_urls, video_id, video_title, ie=NexxEmbedIE.ie_key())
# Look for ThePlatform embeds
tp_urls = ThePlatformIE._extract_urls(webpage)
if tp_urls:
@@ -2148,6 +2235,12 @@ class GenericIE(InfoExtractor):
return self.playlist_from_matches(
playlists, video_id, video_title, lambda p: '//dailymotion.com/playlist/%s' % p)
# Look for DailyMail embeds
dailymail_urls = DailyMailIE._extract_urls(webpage)
if dailymail_urls:
return self.playlist_from_matches(
dailymail_urls, video_id, video_title, ie=DailyMailIE.ie_key())
# Look for embedded Wistia player
wistia_url = WistiaIE._extract_url(webpage)
if wistia_url:
@@ -2443,12 +2536,12 @@ class GenericIE(InfoExtractor):
if kaltura_url:
return self.url_result(smuggle_url(kaltura_url, {'source_url': url}), KalturaIE.ie_key())
# Look for Eagle.Platform embeds
# Look for EaglePlatform embeds
eagleplatform_url = EaglePlatformIE._extract_url(webpage)
if eagleplatform_url:
return self.url_result(eagleplatform_url, EaglePlatformIE.ie_key())
return self.url_result(smuggle_url(eagleplatform_url, {'referrer': url}), EaglePlatformIE.ie_key())
# Look for ClipYou (uses Eagle.Platform) embeds
# Look for ClipYou (uses EaglePlatform) embeds
mobj = re.search(
r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
if mobj is not None:
@@ -2677,7 +2770,7 @@ class GenericIE(InfoExtractor):
rutube_urls = RutubeIE._extract_urls(webpage)
if rutube_urls:
return self.playlist_from_matches(
rutube_urls, ie=RutubeIE.ie_key())
rutube_urls, video_id, video_title, ie=RutubeIE.ie_key())
# Look for WashingtonPost embeds
wapo_urls = WashingtonPostIE._extract_urls(webpage)
@@ -2691,6 +2784,12 @@ class GenericIE(InfoExtractor):
return self.playlist_from_matches(
mediaset_urls, video_id, video_title, ie=MediasetIE.ie_key())
# Look for JOJ.sk embeds
joj_urls = JojIE._extract_urls(webpage)
if joj_urls:
return self.playlist_from_matches(
joj_urls, video_id, video_title, ie=JojIE.ie_key())
def merge_dicts(dict1, dict2):
merged = {}
for k, v in dict1.items():

View File

@@ -5,9 +5,10 @@ import json
from .common import InfoExtractor
from ..utils import (
unescapeHTML,
qualities,
determine_ext,
int_or_none,
qualities,
unescapeHTML,
)
@@ -15,7 +16,7 @@ class GiantBombIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?giantbomb\.com/videos/(?P<display_id>[^/]+)/(?P<id>\d+-\d+)'
_TEST = {
'url': 'http://www.giantbomb.com/videos/quick-look-destiny-the-dark-below/2300-9782/',
'md5': '57badeface303ecf6b98b812de1b9018',
'md5': 'c8ea694254a59246a42831155dec57ac',
'info_dict': {
'id': '2300-9782',
'display_id': 'quick-look-destiny-the-dark-below',
@@ -51,11 +52,16 @@ class GiantBombIE(InfoExtractor):
for format_id, video_url in video['videoStreams'].items():
if format_id == 'f4m_stream':
continue
if video_url.endswith('.f4m'):
ext = determine_ext(video_url)
if ext == 'f4m':
f4m_formats = self._extract_f4m_formats(video_url + '?hdcore=3.3.1', display_id)
if f4m_formats:
f4m_formats[0]['quality'] = quality(format_id)
formats.extend(f4m_formats)
elif ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
video_url, display_id, ext='mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False))
else:
formats.append({
'url': video_url,

View File

@@ -92,7 +92,7 @@ class GoogleDriveIE(InfoExtractor):
if resolution:
f.update({
'width': resolution[0],
'height': resolution[0],
'height': resolution[1],
})
formats.append(f)
self._sort_formats(formats)

View File

@@ -59,12 +59,18 @@ class ITVIE(InfoExtractor):
def _add_sub_element(element, name):
return etree.SubElement(element, _add_ns(name))
production_id = (
params.get('data-video-autoplay-id') or
'%s#001' % (
params.get('data-video-episode-id') or
video_id.replace('a', '/')))
req_env = etree.Element(_add_ns('soapenv:Envelope'))
_add_sub_element(req_env, 'soapenv:Header')
body = _add_sub_element(req_env, 'soapenv:Body')
get_playlist = _add_sub_element(body, ('tem:GetPlaylist'))
request = _add_sub_element(get_playlist, 'tem:request')
_add_sub_element(request, 'itv:ProductionId').text = params['data-video-id']
_add_sub_element(request, 'itv:ProductionId').text = production_id
_add_sub_element(request, 'itv:RequestGuid').text = compat_str(uuid.uuid4()).upper()
vodcrid = _add_sub_element(request, 'itv:Vodcrid')
_add_sub_element(vodcrid, 'com:Id')

100
youtube_dl/extractor/joj.py Executable file
View File

@@ -0,0 +1,100 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
int_or_none,
js_to_json,
try_get,
)
class JojIE(InfoExtractor):
_VALID_URL = r'''(?x)
(?:
joj:|
https?://media\.joj\.sk/embed/
)
(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})
'''
_TESTS = [{
'url': 'https://media.joj.sk/embed/a388ec4c-6019-4a4a-9312-b1bee194e932',
'info_dict': {
'id': 'a388ec4c-6019-4a4a-9312-b1bee194e932',
'ext': 'mp4',
'title': 'NOVÉ BÝVANIE',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 3118,
}
}, {
'url': 'joj:a388ec4c-6019-4a4a-9312-b1bee194e932',
'only_matching': True,
}]
@staticmethod
def _extract_urls(webpage):
return re.findall(
r'<iframe\b[^>]+\bsrc=["\'](?P<url>(?:https?:)?//media\.joj\.sk/embed/[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})',
webpage)
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(
'https://media.joj.sk/embed/%s' % video_id, video_id)
title = self._search_regex(
(r'videoTitle\s*:\s*(["\'])(?P<title>(?:(?!\1).)+)\1',
r'<title>(?P<title>[^<]+)'), webpage, 'title',
default=None, group='title') or self._og_search_title(webpage)
bitrates = self._parse_json(
self._search_regex(
r'(?s)bitrates\s*=\s*({.+?});', webpage, 'bitrates',
default='{}'),
video_id, transform_source=js_to_json, fatal=False)
formats = []
for format_url in try_get(bitrates, lambda x: x['mp4'], list) or []:
if isinstance(format_url, compat_str):
height = self._search_regex(
r'(\d+)[pP]\.', format_url, 'height', default=None)
formats.append({
'url': format_url,
'format_id': '%sp' % height if height else None,
'height': int(height),
})
if not formats:
playlist = self._download_xml(
'https://media.joj.sk/services/Video.php?clip=%s' % video_id,
video_id)
for file_el in playlist.findall('./files/file'):
path = file_el.get('path')
if not path:
continue
format_id = file_el.get('id') or file_el.get('label')
formats.append({
'url': 'http://n16.joj.sk/storage/%s' % path.replace(
'dat/', '', 1),
'format_id': format_id,
'height': int_or_none(self._search_regex(
r'(\d+)[pP]', format_id or path, 'height',
default=None)),
})
self._sort_formats(formats)
thumbnail = self._og_search_thumbnail(webpage)
duration = int_or_none(self._search_regex(
r'videoDuration\s*:\s*(\d+)', webpage, 'duration', fatal=False))
return {
'id': video_id,
'title': title,
'thumbnail': thumbnail,
'duration': duration,
'formats': formats,
}

View File

@@ -324,7 +324,7 @@ class KalturaIE(InfoExtractor):
if captions:
for caption in captions.get('objects', []):
# Continue if caption is not ready
if f.get('status') != 2:
if caption.get('status') != 2:
continue
if not caption.get('id'):
continue

View File

@@ -48,7 +48,7 @@ class KarriereVideosIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
title = (self._html_search_meta('title', webpage, default=None) or
self._search_regex(r'<h1 class="title">([^<]+)</h1>'))
self._search_regex(r'<h1 class="title">([^<]+)</h1>', webpage, 'video title'))
video_id = self._search_regex(
r'/config/video/(.+?)\.xml', webpage, 'video id')

View File

@@ -83,7 +83,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
hls_url = rendition.find('./src').text
formats.extend(self._extract_m3u8_formats(
hls_url, video_id, ext='mp4', entry_protocol='m3u8_native',
m3u8_id='hls'))
m3u8_id='hls', fatal=False))
else:
# fms
try:
@@ -106,7 +106,8 @@ class MTVServicesInfoExtractor(InfoExtractor):
}])
except (KeyError, TypeError):
raise ExtractorError('Invalid rendition field.')
self._sort_formats(formats)
if formats:
self._sort_formats(formats)
return formats
def _extract_subtitles(self, mdoc, mtvn_id):
@@ -133,8 +134,11 @@ class MTVServicesInfoExtractor(InfoExtractor):
mediagen_url += 'acceptMethods='
mediagen_url += 'hls' if use_hls else 'fms'
mediagen_doc = self._download_xml(mediagen_url, video_id,
'Downloading video urls')
mediagen_doc = self._download_xml(
mediagen_url, video_id, 'Downloading video urls', fatal=False)
if mediagen_doc is False:
return None
item = mediagen_doc.find('./video/item')
if item is not None and item.get('type') == 'text':
@@ -174,6 +178,13 @@ class MTVServicesInfoExtractor(InfoExtractor):
formats = self._extract_video_formats(mediagen_doc, mtvn_id, video_id)
# Some parts of complete video may be missing (e.g. missing Act 3 in
# http://www.southpark.de/alle-episoden/s14e01-sexual-healing)
if not formats:
return None
self._sort_formats(formats)
return {
'title': title,
'formats': formats,
@@ -205,9 +216,14 @@ class MTVServicesInfoExtractor(InfoExtractor):
title = xpath_text(idoc, './channel/title')
description = xpath_text(idoc, './channel/description')
entries = []
for item in idoc.findall('.//item'):
info = self._get_video_info(item, use_hls)
if info:
entries.append(info)
return self.playlist_result(
[self._get_video_info(item, use_hls) for item in idoc.findall('.//item')],
playlist_title=title, playlist_description=description)
entries, playlist_title=title, playlist_description=description)
def _extract_triforce_mgid(self, webpage, data_zone=None, video_id=None):
triforce_feed = self._parse_json(self._search_regex(

View File

@@ -0,0 +1,271 @@
# coding: utf-8
from __future__ import unicode_literals
import hashlib
import random
import re
import time
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
ExtractorError,
int_or_none,
parse_duration,
try_get,
urlencode_postdata,
)
class NexxIE(InfoExtractor):
_VALID_URL = r'https?://api\.nexx(?:\.cloud|cdn\.com)/v3/(?P<domain_id>\d+)/videos/byid/(?P<id>\d+)'
_TESTS = [{
# movie
'url': 'https://api.nexx.cloud/v3/748/videos/byid/128907',
'md5': '16746bfc28c42049492385c989b26c4a',
'info_dict': {
'id': '128907',
'ext': 'mp4',
'title': 'Stiftung Warentest',
'alt_title': 'Wie ein Test abläuft',
'description': 'md5:d1ddb1ef63de721132abd38639cc2fd2',
'release_year': 2013,
'creator': 'SPIEGEL TV',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 2509,
'timestamp': 1384264416,
'upload_date': '20131112',
},
'params': {
'format': 'bestvideo',
},
}, {
# episode
'url': 'https://api.nexx.cloud/v3/741/videos/byid/247858',
'info_dict': {
'id': '247858',
'ext': 'mp4',
'title': 'Return of the Golden Child (OV)',
'description': 'md5:5d969537509a92b733de21bae249dc63',
'release_year': 2017,
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 1397,
'timestamp': 1495033267,
'upload_date': '20170517',
'episode_number': 2,
'season_number': 2,
},
'params': {
'format': 'bestvideo',
'skip_download': True,
},
}, {
'url': 'https://api.nexxcdn.com/v3/748/videos/byid/128907',
'only_matching': True,
}]
@staticmethod
def _extract_urls(webpage):
# Reference:
# 1. https://nx-s.akamaized.net/files/201510/44.pdf
entries = []
# JavaScript Integration
mobj = re.search(
r'<script\b[^>]+\bsrc=["\']https?://require\.nexx(?:\.cloud|cdn\.com)/(?P<id>\d+)',
webpage)
if mobj:
domain_id = mobj.group('id')
for video_id in re.findall(
r'(?is)onPLAYReady.+?_play\.init\s*\(.+?\s*,\s*["\']?(\d+)',
webpage):
entries.append(
'https://api.nexx.cloud/v3/%s/videos/byid/%s'
% (domain_id, video_id))
# TODO: support more embed formats
return entries
@staticmethod
def _extract_url(webpage):
return NexxIE._extract_urls(webpage)[0]
def _handle_error(self, response):
status = int_or_none(try_get(
response, lambda x: x['metadata']['status']) or 200)
if 200 <= status < 300:
return
raise ExtractorError(
'%s said: %s' % (self.IE_NAME, response['metadata']['errorhint']),
expected=True)
def _call_api(self, domain_id, path, video_id, data=None, headers={}):
headers['Content-Type'] = 'application/x-www-form-urlencoded; charset=UTF-8'
result = self._download_json(
'https://api.nexx.cloud/v3/%s/%s' % (domain_id, path), video_id,
'Downloading %s JSON' % path, data=urlencode_postdata(data),
headers=headers)
self._handle_error(result)
return result['result']
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
domain_id, video_id = mobj.group('domain_id', 'id')
# Reverse engineered from JS code (see getDeviceID function)
device_id = '%d:%d:%d%d' % (
random.randint(1, 4), int(time.time()),
random.randint(1e4, 99999), random.randint(1, 9))
result = self._call_api(domain_id, 'session/init', video_id, data={
'nxp_devh': device_id,
'nxp_userh': '',
'precid': '0',
'playlicense': '0',
'screenx': '1920',
'screeny': '1080',
'playerversion': '6.0.00',
'gateway': 'html5',
'adGateway': '',
'explicitlanguage': 'en-US',
'addTextTemplates': '1',
'addDomainData': '1',
'addAdModel': '1',
}, headers={
'X-Request-Enable-Auth-Fallback': '1',
})
cid = result['general']['cid']
# As described in [1] X-Request-Token generation algorithm is
# as follows:
# md5( operation + domain_id + domain_secret )
# where domain_secret is a static value that will be given by nexx.tv
# as per [1]. Here is how this "secret" is generated (reversed
# from _play.api.init function, search for clienttoken). So it's
# actually not static and not that much of a secret.
# 1. https://nexxtvstorage.blob.core.windows.net/files/201610/27.pdf
secret = result['device']['clienttoken'][int(device_id[0]):]
secret = secret[0:len(secret) - int(device_id[-1])]
op = 'byid'
# Reversed from JS code for _play.api.call function (search for
# X-Request-Token)
request_token = hashlib.md5(
''.join((op, domain_id, secret)).encode('utf-8')).hexdigest()
video = self._call_api(
domain_id, 'videos/%s/%s' % (op, video_id), video_id, data={
'additionalfields': 'language,channel,actors,studio,licenseby,slug,subtitle,teaser,description',
'addInteractionOptions': '1',
'addStatusDetails': '1',
'addStreamDetails': '1',
'addCaptions': '1',
'addScenes': '1',
'addHotSpots': '1',
'addBumpers': '1',
'captionFormat': 'data',
}, headers={
'X-Request-CID': cid,
'X-Request-Token': request_token,
})
general = video['general']
title = general['title']
stream_data = video['streamdata']
language = general.get('language_raw') or ''
# TODO: reverse more cdns and formats
cdn = stream_data['cdnType']
assert cdn == 'azure'
azure_locator = stream_data['azureLocator']
AZURE_URL = 'http://nx-p%02d.akamaized.net/'
for secure in ('s', ''):
cdn_shield = stream_data.get('cdnShieldHTTP%s' % secure.upper())
if cdn_shield:
azure_base = 'http%s://%s' % (secure, cdn_shield)
break
else:
azure_base = AZURE_URL % int(stream_data['azureAccount'].replace('nexxplayplus', ''))
is_ml = ',' in language
azure_m3u8_url = '%s%s/%s_src%s.ism/Manifest(format=m3u8-aapl)' % (
azure_base, azure_locator, video_id, ('_manifest' if is_ml else ''))
protection_token = try_get(
video, lambda x: x['protectiondata']['token'], compat_str)
if protection_token:
azure_m3u8_url += '?hdnts=%s' % protection_token
formats = self._extract_m3u8_formats(
azure_m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='%s-hls' % cdn)
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'alt_title': general.get('subtitle'),
'description': general.get('description'),
'release_year': int_or_none(general.get('year')),
'creator': general.get('studio') or general.get('studio_adref'),
'thumbnail': try_get(
video, lambda x: x['imagedata']['thumb'], compat_str),
'duration': parse_duration(general.get('runtime')),
'timestamp': int_or_none(general.get('uploaded')),
'episode_number': int_or_none(try_get(
video, lambda x: x['episodedata']['episode'])),
'season_number': int_or_none(try_get(
video, lambda x: x['episodedata']['season'])),
'formats': formats,
}
class NexxEmbedIE(InfoExtractor):
_VALID_URL = r'https?://embed\.nexx(?:\.cloud|cdn\.com)/\d+/(?P<id>[^/?#&]+)'
_TEST = {
'url': 'http://embed.nexx.cloud/748/KC1614647Z27Y7T?autoplay=1',
'md5': '16746bfc28c42049492385c989b26c4a',
'info_dict': {
'id': '161464',
'ext': 'mp4',
'title': 'Nervenkitzel Achterbahn',
'alt_title': 'Karussellbauer in Deutschland',
'description': 'md5:ffe7b1cc59a01f585e0569949aef73cc',
'release_year': 2005,
'creator': 'SPIEGEL TV',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 2761,
'timestamp': 1394021479,
'upload_date': '20140305',
},
'params': {
'format': 'bestvideo',
'skip_download': True,
},
}
@staticmethod
def _extract_urls(webpage):
# Reference:
# 1. https://nx-s.akamaized.net/files/201510/44.pdf
# iFrame Embed Integration
return [mobj.group('url') for mobj in re.finditer(
r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//embed\.nexx(?:\.cloud|cdn\.com)/\d+/(?:(?!\1).)+)\1',
webpage)]
def _real_extract(self, url):
embed_id = self._match_id(url)
webpage = self._download_webpage(url, embed_id)
return self.url_result(NexxIE._extract_url(webpage), ie=NexxIE.ie_key())

View File

@@ -1,23 +1,22 @@
# coding: utf-8
from __future__ import unicode_literals
import re
import json
import datetime
from .common import InfoExtractor
from ..compat import (
compat_parse_qs,
compat_urlparse,
)
from ..utils import (
determine_ext,
ExtractorError,
int_or_none,
parse_duration,
parse_iso8601,
sanitized_Request,
xpath_text,
determine_ext,
urlencode_postdata,
xpath_text,
)
@@ -101,19 +100,24 @@ class NiconicoIE(InfoExtractor):
return True
# Log in
login_ok = True
login_form_strs = {
'mail': username,
'mail_tel': username,
'password': password,
}
login_data = urlencode_postdata(login_form_strs)
request = sanitized_Request(
'https://secure.nicovideo.jp/secure/login', login_data)
login_results = self._download_webpage(
request, None, note='Logging in', errnote='Unable to log in')
if re.search(r'(?i)<h1 class="mb8p4">Log in error</h1>', login_results) is not None:
urlh = self._request_webpage(
'https://account.nicovideo.jp/api/v1/login', None,
note='Logging in', errnote='Unable to log in',
data=urlencode_postdata(login_form_strs))
if urlh is False:
login_ok = False
else:
parts = compat_urlparse.urlparse(urlh.geturl())
if compat_parse_qs(parts.query).get('message', [None])[0] == 'cant_login':
login_ok = False
if not login_ok:
self._downloader.report_warning('unable to log in: bad username or password')
return False
return True
return login_ok
def _real_extract(self, url):
video_id = self._match_id(url)

View File

@@ -28,7 +28,7 @@ class NPOBaseIE(InfoExtractor):
class NPOIE(NPOBaseIE):
IE_NAME = 'npo'
IE_DESC = 'npo.nl and ntr.nl'
IE_DESC = 'npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl'
_VALID_URL = r'''(?x)
(?:
npo:|
@@ -38,7 +38,7 @@ class NPOIE(NPOBaseIE):
npo\.nl/(?!(?:live|radio)/)(?:[^/]+/){2}|
ntr\.nl/(?:[^/]+/){2,}|
omroepwnl\.nl/video/fragment/[^/]+__|
zapp\.nl/[^/]+/[^/]+/
(?:zapp|npo3)\.nl/(?:[^/]+/){2}
)
)
(?P<id>[^/?#]+)
@@ -146,6 +146,9 @@ class NPOIE(NPOBaseIE):
}, {
'url': 'http://www.zapp.nl/beste-vrienden-quiz/extra-video-s/WO_NTR_1067990',
'only_matching': True,
}, {
'url': 'https://www.npo3.nl/3onderzoekt/16-09-2015/VPWON_1239870',
'only_matching': True,
}, {
# live stream
'url': 'npo:LI_NL1_4188102',
@@ -341,7 +344,7 @@ class NPOLiveIE(NPOBaseIE):
webpage = self._download_webpage(url, display_id)
live_id = self._search_regex(
r'data-prid="([^"]+)"', webpage, 'live id')
[r'media-id="([^"]+)"', r'data-prid="([^"]+)"'], webpage, 'live id')
return {
'_type': 'url_transparent',

View File

@@ -0,0 +1,63 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
qualities,
unified_timestamp,
)
class PearVideoIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?pearvideo\.com/video_(?P<id>\d+)'
_TEST = {
'url': 'http://www.pearvideo.com/video_1076290',
'info_dict': {
'id': '1076290',
'ext': 'mp4',
'title': '小浣熊在主人家玻璃上滚石头:没砸',
'description': 'md5:01d576b747de71be0ee85eb7cac25f9d',
'timestamp': 1494275280,
'upload_date': '20170508',
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
quality = qualities(
('ldflv', 'ld', 'sdflv', 'sd', 'hdflv', 'hd', 'src'))
formats = [{
'url': mobj.group('url'),
'format_id': mobj.group('id'),
'quality': quality(mobj.group('id')),
} for mobj in re.finditer(
r'(?P<id>[a-zA-Z]+)Url\s*=\s*(["\'])(?P<url>(?:https?:)?//.+?)\2',
webpage)]
self._sort_formats(formats)
title = self._search_regex(
(r'<h1[^>]+\bclass=(["\'])video-tt\1[^>]*>(?P<value>[^<]+)',
r'<[^>]+\bdata-title=(["\'])(?P<value>(?:(?!\1).)+)\1'),
webpage, 'title', group='value')
description = self._search_regex(
(r'<div[^>]+\bclass=(["\'])summary\1[^>]*>(?P<value>[^<]+)',
r'<[^>]+\bdata-summary=(["\'])(?P<value>(?:(?!\1).)+)\1'),
webpage, 'description', default=None,
group='value') or self._html_search_meta('Description', webpage)
timestamp = unified_timestamp(self._search_regex(
r'<div[^>]+\bclass=["\']date["\'][^>]*>([^<]+)',
webpage, 'timestamp', fatal=False))
return {
'id': video_id,
'title': title,
'description': description,
'timestamp': timestamp,
'formats': formats,
}

View File

@@ -49,7 +49,7 @@ class PeriscopeIE(PeriscopeBaseIE):
@staticmethod
def _extract_url(webpage):
mobj = re.search(
r'<iframe[^>]+src=([\'"])(?P<url>(?:https?:)?//(?:www\.)?periscope\.tv/(?:(?!\1).)+)\1', webpage)
r'<iframe[^>]+src=([\'"])(?P<url>(?:https?:)?//(?:www\.)?(?:periscope|pscp)\.tv/(?:(?!\1).)+)\1', webpage)
if mobj:
return mobj.group('url')

View File

@@ -31,7 +31,7 @@ class SlideshareIE(InfoExtractor):
page_title = mobj.group('title')
webpage = self._download_webpage(url, page_title)
slideshare_obj = self._search_regex(
r'\$\.extend\(slideshare_object,\s*(\{.*?\})\);',
r'\$\.extend\(.*?slideshare_object,\s*(\{.*?\})\);',
webpage, 'slideshare object')
info = json.loads(slideshare_obj)
if info['slideshow']['type'] != 'video':

View File

@@ -4,6 +4,7 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from .nexx import NexxEmbedIE
from .spiegeltv import SpiegeltvIE
from ..compat import compat_urlparse
from ..utils import (
@@ -121,6 +122,26 @@ class SpiegelArticleIE(InfoExtractor):
},
'playlist_count': 6,
}, {
# Nexx iFrame embed
'url': 'http://www.spiegel.de/sptv/spiegeltv/spiegel-tv-ueber-schnellste-katapult-achterbahn-der-welt-taron-a-1137884.html',
'info_dict': {
'id': '161464',
'ext': 'mp4',
'title': 'Nervenkitzel Achterbahn',
'alt_title': 'Karussellbauer in Deutschland',
'description': 'md5:ffe7b1cc59a01f585e0569949aef73cc',
'release_year': 2005,
'creator': 'SPIEGEL TV',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 2761,
'timestamp': 1394021479,
'upload_date': '20140305',
},
'params': {
'format': 'bestvideo',
'skip_download': True,
},
}]
def _real_extract(self, url):
@@ -143,6 +164,9 @@ class SpiegelArticleIE(InfoExtractor):
entries = [
self.url_result(compat_urlparse.urljoin(
self.http_scheme() + '//spiegel.de/', embed_path))
for embed_path in embeds
]
return self.playlist_result(entries)
for embed_path in embeds]
if embeds:
return self.playlist_result(entries)
return self.playlist_from_matches(
NexxEmbedIE._extract_urls(webpage), ie=NexxEmbedIE.ie_key())

View File

@@ -1,114 +1,17 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import compat_urllib_parse_urlparse
from ..utils import (
determine_ext,
float_or_none,
)
from .nexx import NexxIE
class SpiegeltvIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?spiegel\.tv/(?:#/)?filme/(?P<id>[\-a-z0-9]+)'
_TESTS = [{
'url': 'http://www.spiegel.tv/filme/flug-mh370/',
'info_dict': {
'id': 'flug-mh370',
'ext': 'm4v',
'title': 'Flug MH370',
'description': 'Das Rätsel um die Boeing 777 der Malaysia-Airlines',
'thumbnail': r're:http://.*\.jpg$',
},
'params': {
# m3u8 download
'skip_download': True,
}
}, {
'url': 'http://www.spiegel.tv/#/filme/alleskino-die-wahrheit-ueber-maenner/',
_VALID_URL = r'https?://(?:www\.)?spiegel\.tv/videos/(?P<id>\d+)'
_TEST = {
'url': 'http://www.spiegel.tv/videos/161681-flug-mh370/',
'only_matching': True,
}]
}
def _real_extract(self, url):
if '/#/' in url:
url = url.replace('/#/', '/')
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
title = self._html_search_regex(r'<h1.*?>(.*?)</h1>', webpage, 'title')
apihost = 'http://spiegeltv-ivms2-restapi.s3.amazonaws.com'
version_json = self._download_json(
'%s/version.json' % apihost, video_id,
note='Downloading version information')
version_name = version_json['version_name']
slug_json = self._download_json(
'%s/%s/restapi/slugs/%s.json' % (apihost, version_name, video_id),
video_id,
note='Downloading object information')
oid = slug_json['object_id']
media_json = self._download_json(
'%s/%s/restapi/media/%s.json' % (apihost, version_name, oid),
video_id, note='Downloading media information')
uuid = media_json['uuid']
is_wide = media_json['is_wide']
server_json = self._download_json(
'http://spiegeltv-prod-static.s3.amazonaws.com/projectConfigs/projectConfig.json',
video_id, note='Downloading server information')
format = '16x9' if is_wide else '4x3'
formats = []
for streamingserver in server_json['streamingserver']:
endpoint = streamingserver.get('endpoint')
if not endpoint:
continue
play_path = 'mp4:%s_spiegeltv_0500_%s.m4v' % (uuid, format)
if endpoint.startswith('rtmp'):
formats.append({
'url': endpoint,
'format_id': 'rtmp',
'app': compat_urllib_parse_urlparse(endpoint).path[1:],
'play_path': play_path,
'player_path': 'http://prod-static.spiegel.tv/frontend-076.swf',
'ext': 'flv',
'rtmp_live': True,
})
elif determine_ext(endpoint) == 'm3u8':
formats.append({
'url': endpoint.replace('[video]', play_path),
'ext': 'm4v',
'format_id': 'hls', # Prefer hls since it allows to workaround georestriction
'protocol': 'm3u8',
'preference': 1,
'http_headers': {
'Accept-Encoding': 'deflate', # gzip causes trouble on the server side
},
})
else:
formats.append({
'url': endpoint,
})
self._check_formats(formats, video_id)
thumbnails = []
for image in media_json['images']:
thumbnails.append({
'url': image['url'],
'width': image['width'],
'height': image['height'],
})
description = media_json['subtitle']
duration = float_or_none(media_json.get('duration_in_ms'), scale=1000)
return {
'id': video_id,
'title': title,
'description': description,
'duration': duration,
'thumbnails': thumbnails,
'formats': formats,
}
return self.url_result(
'https://api.nexx.cloud/v3/748/videos/byid/%s'
% self._match_id(url), ie=NexxIE.ie_key())

View File

@@ -4,7 +4,11 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import js_to_json
from ..utils import (
determine_ext,
int_or_none,
js_to_json,
)
class SportBoxEmbedIE(InfoExtractor):
@@ -14,8 +18,10 @@ class SportBoxEmbedIE(InfoExtractor):
'info_dict': {
'id': '211355',
'ext': 'mp4',
'title': 'В Новороссийске прошел детский турнир «Поле славы боевой»',
'title': '211355',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 292,
'view_count': int,
},
'params': {
# m3u8 download
@@ -24,6 +30,9 @@ class SportBoxEmbedIE(InfoExtractor):
}, {
'url': 'http://news.sportbox.ru/vdl/player?nid=370908&only_player=1&autostart=false&playeri=2&height=340&width=580',
'only_matching': True,
}, {
'url': 'https://news.sportbox.ru/vdl/player/media/193095',
'only_matching': True,
}]
@staticmethod
@@ -37,36 +46,34 @@ class SportBoxEmbedIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
wjplayer_data = self._parse_json(
self._search_regex(
r'(?s)wjplayer\(({.+?})\);', webpage, 'wjplayer settings'),
video_id, transform_source=js_to_json)
formats = []
def cleanup_js(code):
# desktop_advert_config contains complex Javascripts and we don't need it
return js_to_json(re.sub(r'desktop_advert_config.*', '', code))
jwplayer_data = self._parse_json(self._search_regex(
r'(?s)player\.setup\(({.+?})\);', webpage, 'jwplayer settings'), video_id,
transform_source=cleanup_js)
hls_url = jwplayer_data.get('hls_url')
if hls_url:
formats.extend(self._extract_m3u8_formats(
hls_url, video_id, ext='mp4', m3u8_id='hls'))
rtsp_url = jwplayer_data.get('rtsp_url')
if rtsp_url:
formats.append({
'url': rtsp_url,
'format_id': 'rtsp',
})
for source in wjplayer_data['sources']:
src = source.get('src')
if not src:
continue
if determine_ext(src) == 'm3u8':
formats.extend(self._extract_m3u8_formats(
src, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False))
else:
formats.append({
'url': src,
})
self._sort_formats(formats)
title = jwplayer_data['node_title']
thumbnail = jwplayer_data.get('image_url')
view_count = int_or_none(self._search_regex(
r'Просмотров\s*:\s*(\d+)', webpage, 'view count', default=None))
return {
'id': video_id,
'title': title,
'thumbnail': thumbnail,
'title': video_id,
'thumbnail': wjplayer_data.get('poster'),
'duration': int_or_none(wjplayer_data.get('duration')),
'view_count': view_count,
'formats': formats,
}

View File

@@ -8,6 +8,9 @@ from ..utils import extract_attributes
class TBSIE(TurnerBaseIE):
# https://github.com/rg3/youtube-dl/issues/13658
_WORKING = False
_VALID_URL = r'https?://(?:www\.)?(?P<site>tbs|tntdrama)\.com/videos/(?:[^/]+/)+(?P<id>[^/?#]+)\.html'
_TESTS = [{
'url': 'http://www.tbs.com/videos/people-of-earth/season-1/extras/2007318/theatrical-trailer.html',
@@ -17,7 +20,8 @@ class TBSIE(TurnerBaseIE):
'ext': 'mp4',
'title': 'Theatrical Trailer',
'description': 'Catch the latest comedy from TBS, People of Earth, premiering Halloween night--Monday, October 31, at 9/8c.',
}
},
'skip': 'TBS videos are deleted after a while',
}, {
'url': 'http://www.tntdrama.com/videos/good-behavior/season-1/extras/1538823/you-better-run.html',
'md5': 'ce53c6ead5e9f3280b4ad2031a6fab56',
@@ -26,7 +30,8 @@ class TBSIE(TurnerBaseIE):
'ext': 'mp4',
'title': 'You Better Run',
'description': 'Letty Raines must figure out what she\'s running toward while running away from her past. Good Behavior premieres November 15 at 9/8c.',
}
},
'skip': 'TBS videos are deleted after a while',
}]
def _real_extract(self, url):

View File

@@ -271,20 +271,22 @@ class TEDIE(InfoExtractor):
}
def _get_subtitles(self, video_id, talk_info):
languages = [lang['languageCode'] for lang in talk_info.get('languages', [])]
if languages:
sub_lang_list = {}
for l in languages:
sub_lang_list[l] = [
{
'url': 'http://www.ted.com/talks/subtitles/id/%s/lang/%s/format/%s' % (video_id, l, ext),
'ext': ext,
}
for ext in ['ted', 'srt']
]
return sub_lang_list
else:
return {}
sub_lang_list = {}
for language in try_get(
talk_info,
(lambda x: x['downloads']['languages'],
lambda x: x['languages']), list):
lang_code = language.get('languageCode') or language.get('ianaCode')
if not lang_code:
continue
sub_lang_list[lang_code] = [
{
'url': 'http://www.ted.com/talks/subtitles/id/%s/lang/%s/format/%s' % (video_id, lang_code, ext),
'ext': ext,
}
for ext in ['ted', 'srt']
]
return sub_lang_list
def _watch_info(self, url, name):
webpage = self._download_webpage(url, name)

View File

@@ -7,20 +7,38 @@ from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import (
determine_ext,
float_or_none,
xpath_text,
remove_end,
int_or_none,
dict_get,
ExtractorError,
float_or_none,
int_or_none,
remove_end,
try_get,
xpath_text,
)
from .periscope import PeriscopeIE
class TwitterBaseIE(InfoExtractor):
def _get_vmap_video_url(self, vmap_url, video_id):
def _extract_formats_from_vmap_url(self, vmap_url, video_id):
vmap_data = self._download_xml(vmap_url, video_id)
return xpath_text(vmap_data, './/MediaFile').strip()
video_url = xpath_text(vmap_data, './/MediaFile').strip()
if determine_ext(video_url) == 'm3u8':
return self._extract_m3u8_formats(
video_url, video_id, ext='mp4', m3u8_id='hls',
entry_protocol='m3u8_native')
return [{
'url': video_url,
}]
@staticmethod
def _search_dimensions_in_video_url(a_format, video_url):
m = re.search(r'/(?P<width>\d+)x(?P<height>\d+)/', video_url)
if m:
a_format.update({
'width': int(m.group('width')),
'height': int(m.group('height')),
})
class TwitterCardIE(TwitterBaseIE):
@@ -36,7 +54,8 @@ class TwitterCardIE(TwitterBaseIE):
'title': 'Twitter Card',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 30.033,
}
},
'skip': 'Video gone',
},
{
'url': 'https://twitter.com/i/cards/tfw/v1/623160978427936768',
@@ -48,6 +67,7 @@ class TwitterCardIE(TwitterBaseIE):
'thumbnail': r're:^https?://.*\.jpg',
'duration': 80.155,
},
'skip': 'Video gone',
},
{
'url': 'https://twitter.com/i/cards/tfw/v1/654001591733886977',
@@ -65,7 +85,7 @@ class TwitterCardIE(TwitterBaseIE):
},
{
'url': 'https://twitter.com/i/cards/tfw/v1/665289828897005568',
'md5': 'ab2745d0b0ce53319a534fccaa986439',
'md5': '6dabeaca9e68cbb71c99c322a4b42a11',
'info_dict': {
'id': 'iBb2x00UVlv',
'ext': 'mp4',
@@ -73,16 +93,17 @@ class TwitterCardIE(TwitterBaseIE):
'uploader_id': '1189339351084113920',
'uploader': 'ArsenalTerje',
'title': 'Vine by ArsenalTerje',
'timestamp': 1447451307,
},
'add_ie': ['Vine'],
}, {
'url': 'https://twitter.com/i/videos/tweet/705235433198714880',
'md5': '3846d0a07109b5ab622425449b59049d',
'md5': '884812a2adc8aaf6fe52b15ccbfa3b88',
'info_dict': {
'id': '705235433198714880',
'ext': 'mp4',
'title': 'Twitter web player',
'thumbnail': r're:^https?://.*\.jpg',
'thumbnail': r're:^https?://.*',
},
}, {
'url': 'https://twitter.com/i/videos/752274308186120192',
@@ -90,6 +111,59 @@ class TwitterCardIE(TwitterBaseIE):
},
]
def _parse_media_info(self, media_info, video_id):
formats = []
for media_variant in media_info.get('variants', []):
media_url = media_variant['url']
if media_url.endswith('.m3u8'):
formats.extend(self._extract_m3u8_formats(media_url, video_id, ext='mp4', m3u8_id='hls'))
elif media_url.endswith('.mpd'):
formats.extend(self._extract_mpd_formats(media_url, video_id, mpd_id='dash'))
else:
vbr = int_or_none(dict_get(media_variant, ('bitRate', 'bitrate')), scale=1000)
a_format = {
'url': media_url,
'format_id': 'http-%d' % vbr if vbr else 'http',
'vbr': vbr,
}
# Reported bitRate may be zero
if not a_format['vbr']:
del a_format['vbr']
self._search_dimensions_in_video_url(a_format, media_url)
formats.append(a_format)
return formats
def _extract_mobile_formats(self, username, video_id):
webpage = self._download_webpage(
'https://mobile.twitter.com/%s/status/%s' % (username, video_id),
video_id, 'Downloading mobile webpage',
headers={
# A recent mobile UA is necessary for `gt` cookie
'User-Agent': 'Mozilla/5.0 (Android 6.0.1; Mobile; rv:54.0) Gecko/54.0 Firefox/54.0',
})
main_script_url = self._html_search_regex(
r'<script[^>]+src="([^"]+main\.[^"]+)"', webpage, 'main script URL')
main_script = self._download_webpage(
main_script_url, video_id, 'Downloading main script')
bearer_token = self._search_regex(
r'BEARER_TOKEN\s*:\s*"([^"]+)"',
main_script, 'bearer token')
guest_token = self._search_regex(
r'document\.cookie\s*=\s*decodeURIComponent\("gt=(\d+)',
webpage, 'guest token')
api_data = self._download_json(
'https://api.twitter.com/2/timeline/conversation/%s.json' % video_id,
video_id, 'Downloading mobile API data',
headers={
'Authorization': 'Bearer ' + bearer_token,
'x-guest-token': guest_token,
})
media_info = try_get(api_data, lambda o: o['globalObjects']['tweets'][video_id]
['extended_entities']['media'][0]['video_info']) or {}
return self._parse_media_info(media_info, video_id)
def _real_extract(self, url):
video_id = self._match_id(url)
@@ -117,14 +191,6 @@ class TwitterCardIE(TwitterBaseIE):
if periscope_url:
return self.url_result(periscope_url, PeriscopeIE.ie_key())
def _search_dimensions_in_video_url(a_format, video_url):
m = re.search(r'/(?P<width>\d+)x(?P<height>\d+)/', video_url)
if m:
a_format.update({
'width': int(m.group('width')),
'height': int(m.group('height')),
})
video_url = config.get('video_url') or config.get('playlist', [{}])[0].get('source')
if video_url:
@@ -135,15 +201,14 @@ class TwitterCardIE(TwitterBaseIE):
'url': video_url,
}
_search_dimensions_in_video_url(f, video_url)
self._search_dimensions_in_video_url(f, video_url)
formats.append(f)
vmap_url = config.get('vmapUrl') or config.get('vmap_url')
if vmap_url:
formats.append({
'url': self._get_vmap_video_url(vmap_url, video_id),
})
formats.extend(
self._extract_formats_from_vmap_url(vmap_url, video_id))
media_info = None
@@ -152,29 +217,14 @@ class TwitterCardIE(TwitterBaseIE):
media_info = entity['mediaInfo']
if media_info:
for media_variant in media_info['variants']:
media_url = media_variant['url']
if media_url.endswith('.m3u8'):
formats.extend(self._extract_m3u8_formats(media_url, video_id, ext='mp4', m3u8_id='hls'))
elif media_url.endswith('.mpd'):
formats.extend(self._extract_mpd_formats(media_url, video_id, mpd_id='dash'))
else:
vbr = int_or_none(media_variant.get('bitRate'), scale=1000)
a_format = {
'url': media_url,
'format_id': 'http-%d' % vbr if vbr else 'http',
'vbr': vbr,
}
# Reported bitRate may be zero
if not a_format['vbr']:
del a_format['vbr']
_search_dimensions_in_video_url(a_format, media_url)
formats.append(a_format)
formats.extend(self._parse_media_info(media_info, video_id))
duration = float_or_none(media_info.get('duration', {}).get('nanos'), scale=1e9)
username = config.get('user', {}).get('screen_name')
if username:
formats.extend(self._extract_mobile_formats(username, video_id))
self._remove_duplicate_formats(formats)
self._sort_formats(formats)
title = self._search_regex(r'<title>([^<]+)</title>', webpage, 'title')
@@ -255,10 +305,10 @@ class TwitterIE(InfoExtractor):
'info_dict': {
'id': '700207533655363584',
'ext': 'mp4',
'title': 'JG - BEAT PROD: @suhmeduh #Damndaniel',
'description': 'JG on Twitter: "BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ"',
'title': 'Donte - BEAT PROD: @suhmeduh #Damndaniel',
'description': 'Donte on Twitter: "BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ"',
'thumbnail': r're:^https?://.*\.jpg',
'uploader': 'JG',
'uploader': 'Donte',
'uploader_id': 'jaydingeer',
},
'params': {
@@ -270,9 +320,11 @@ class TwitterIE(InfoExtractor):
'info_dict': {
'id': 'MIOxnrUteUd',
'ext': 'mp4',
'title': 'Dr.Pepperの飲み方 #japanese #バカ #ドクペ #電動ガン',
'uploader': 'TAKUMA',
'uploader_id': '1004126642786242560',
'title': 'FilmDrunk - Vine of the day',
'description': 'FilmDrunk on Twitter: "Vine of the day https://t.co/xmTvRdqxWf"',
'uploader': 'FilmDrunk',
'uploader_id': 'Filmdrunk',
'timestamp': 1402826626,
'upload_date': '20140615',
},
'add_ie': ['Vine'],
@@ -294,13 +346,28 @@ class TwitterIE(InfoExtractor):
'info_dict': {
'id': '1zqKVVlkqLaKB',
'ext': 'mp4',
'title': 'Sgt Kerry Schmidt - Ontario Provincial Police - Road rage, mischief, assault, rollover and fire in one occurrence',
'title': 'Sgt Kerry Schmidt - LIVE on #Periscope: Road rage, mischief, assault, rollover and fire in one occurrence',
'description': 'Sgt Kerry Schmidt on Twitter: "LIVE on #Periscope: Road rage, mischief, assault, rollover and fire in one occurrence https://t.co/EKrVgIXF3s"',
'upload_date': '20160923',
'uploader_id': 'OPP_HSD',
'uploader': 'Sgt Kerry Schmidt - Ontario Provincial Police',
'uploader': 'Sgt Kerry Schmidt',
'timestamp': 1474613214,
},
'add_ie': ['Periscope'],
}, {
# has mp4 formats via mobile API
'url': 'https://twitter.com/news_al3alm/status/852138619213144067',
'info_dict': {
'id': '852138619213144067',
'ext': 'mp4',
'title': 'عالم الأخبار - كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة',
'description': 'عالم الأخبار on Twitter: "كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة https://t.co/xg6OhpyKfN"',
'uploader': 'عالم الأخبار',
'uploader_id': 'news_al3alm',
},
'params': {
'format': 'best[format_id^=http-]',
},
}]
def _real_extract(self, url):
@@ -393,7 +460,7 @@ class TwitterAmplifyIE(TwitterBaseIE):
vmap_url = self._html_search_meta(
'twitter:amplify:vmap', webpage, 'vmap url')
video_url = self._get_vmap_video_url(vmap_url, video_id)
formats = self._extract_formats_from_vmap_url(vmap_url, video_id)
thumbnails = []
thumbnail = self._html_search_meta(
@@ -415,11 +482,10 @@ class TwitterAmplifyIE(TwitterBaseIE):
})
video_w, video_h = _find_dimension('player')
formats = [{
'url': video_url,
formats[0].update({
'width': video_w,
'height': video_h,
}]
})
return {
'id': video_id,

View File

@@ -12,47 +12,46 @@ from ..utils import (
class VeohIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?veoh\.com/(?:watch|iphone/#_Watch)/(?P<id>(?:v|yapi-)[\da-zA-Z]+)'
_VALID_URL = r'https?://(?:www\.)?veoh\.com/(?:watch|iphone/#_Watch)/(?P<id>(?:v|e|yapi-)[\da-zA-Z]+)'
_TESTS = [
{
'url': 'http://www.veoh.com/watch/v56314296nk7Zdmz3',
'md5': '620e68e6a3cff80086df3348426c9ca3',
'info_dict': {
'id': '56314296',
'ext': 'mp4',
'title': 'Straight Backs Are Stronger',
'uploader': 'LUMOback',
'description': 'At LUMOback, we believe straight backs are stronger. The LUMOback Posture & Movement Sensor: It gently vibrates when you slouch, inspiring improved posture and mobility. Use the app to track your data and improve your posture over time. ',
},
_TESTS = [{
'url': 'http://www.veoh.com/watch/v56314296nk7Zdmz3',
'md5': '620e68e6a3cff80086df3348426c9ca3',
'info_dict': {
'id': '56314296',
'ext': 'mp4',
'title': 'Straight Backs Are Stronger',
'uploader': 'LUMOback',
'description': 'At LUMOback, we believe straight backs are stronger. The LUMOback Posture & Movement Sensor: It gently vibrates when you slouch, inspiring improved posture and mobility. Use the app to track your data and improve your posture over time. ',
},
{
'url': 'http://www.veoh.com/watch/v27701988pbTc4wzN?h1=Chile+workers+cover+up+to+avoid+skin+damage',
'md5': '4a6ff84b87d536a6a71e6aa6c0ad07fa',
'info_dict': {
'id': '27701988',
'ext': 'mp4',
'title': 'Chile workers cover up to avoid skin damage',
'description': 'md5:2bd151625a60a32822873efc246ba20d',
'uploader': 'afp-news',
'duration': 123,
},
'skip': 'This video has been deleted.',
}, {
'url': 'http://www.veoh.com/watch/v27701988pbTc4wzN?h1=Chile+workers+cover+up+to+avoid+skin+damage',
'md5': '4a6ff84b87d536a6a71e6aa6c0ad07fa',
'info_dict': {
'id': '27701988',
'ext': 'mp4',
'title': 'Chile workers cover up to avoid skin damage',
'description': 'md5:2bd151625a60a32822873efc246ba20d',
'uploader': 'afp-news',
'duration': 123,
},
{
'url': 'http://www.veoh.com/watch/v69525809F6Nc4frX',
'md5': '4fde7b9e33577bab2f2f8f260e30e979',
'note': 'Embedded ooyala video',
'info_dict': {
'id': '69525809',
'ext': 'mp4',
'title': 'Doctors Alter Plan For Preteen\'s Weight Loss Surgery',
'description': 'md5:f5a11c51f8fb51d2315bca0937526891',
'uploader': 'newsy-videos',
},
'skip': 'This video has been deleted.',
'skip': 'This video has been deleted.',
}, {
'url': 'http://www.veoh.com/watch/v69525809F6Nc4frX',
'md5': '4fde7b9e33577bab2f2f8f260e30e979',
'note': 'Embedded ooyala video',
'info_dict': {
'id': '69525809',
'ext': 'mp4',
'title': 'Doctors Alter Plan For Preteen\'s Weight Loss Surgery',
'description': 'md5:f5a11c51f8fb51d2315bca0937526891',
'uploader': 'newsy-videos',
},
]
'skip': 'This video has been deleted.',
}, {
'url': 'http://www.veoh.com/watch/e152215AJxZktGS',
'only_matching': True,
}]
def _extract_formats(self, source):
formats = []

View File

@@ -121,7 +121,11 @@ class VH1IE(MTVIE):
idoc = self._download_xml(
doc_url, video_id,
'Downloading info', transform_source=fix_xml_ampersands)
return self.playlist_result(
[self._get_video_info(item) for item in idoc.findall('.//item')],
playlist_id=video_id,
)
entries = []
for item in idoc.findall('.//item'):
info = self._get_video_info(item)
if info:
entries.append(info)
return self.playlist_result(entries, playlist_id=video_id)

View File

@@ -56,7 +56,8 @@ class VidioIE(InfoExtractor):
self._sort_formats(formats)
duration = int_or_none(duration or self._search_regex(
r'data-video-duration=(["\'])(?P<duartion>\d+)\1', webpage, 'duration'))
r'data-video-duration=(["\'])(?P<duration>\d+)\1', webpage,
'duration', fatal=False, group='duration'))
thumbnail = thumbnail or self._og_search_thumbnail(webpage)
like_count = int_or_none(self._search_regex(

View File

@@ -15,7 +15,21 @@ from ..utils import (
class VierIE(InfoExtractor):
IE_NAME = 'vier'
IE_DESC = 'vier.be and vijf.be'
_VALID_URL = r'https?://(?:www\.)?(?P<site>vier|vijf)\.be/(?:[^/]+/videos/(?P<display_id>[^/]+)(?:/(?P<id>\d+))?|video/v3/embed/(?P<embed_id>\d+))'
_VALID_URL = r'''(?x)
https?://
(?:www\.)?(?P<site>vier|vijf)\.be/
(?:
(?:
[^/]+/videos|
video(?:/[^/]+)*
)/
(?P<display_id>[^/]+)(?:/(?P<id>\d+))?|
(?:
video/v3/embed|
embed/video/public
)/(?P<embed_id>\d+)
)
'''
_NETRC_MACHINE = 'vier'
_TESTS = [{
'url': 'http://www.vier.be/planb/videos/het-wordt-warm-de-moestuin/16129',
@@ -83,6 +97,15 @@ class VierIE(InfoExtractor):
}, {
'url': 'http://www.vier.be/video/v3/embed/16129',
'only_matching': True,
}, {
'url': 'https://www.vijf.be/embed/video/public/4093',
'only_matching': True,
}, {
'url': 'https://www.vier.be/video/blockbusters/in-juli-en-augustus-summer-classics',
'only_matching': True,
}, {
'url': 'https://www.vier.be/video/achter-de-rug/2017/achter-de-rug-seizoen-1-aflevering-6',
'only_matching': True,
}]
def _real_initialize(self):
@@ -133,14 +156,20 @@ class VierIE(InfoExtractor):
video_id = self._search_regex(
[r'data-nid="(\d+)"', r'"nid"\s*:\s*"(\d+)"'],
webpage, 'video id', default=video_id or display_id)
application = self._search_regex(
[r'data-application="([^"]+)"', r'"application"\s*:\s*"([^"]+)"'],
webpage, 'application', default=site + '_vod')
filename = self._search_regex(
[r'data-filename="([^"]+)"', r'"filename"\s*:\s*"([^"]+)"'],
webpage, 'filename')
playlist_url = 'http://vod.streamcloud.be/%s/_definst_/mp4:%s.mp4/playlist.m3u8' % (application, filename)
playlist_url = self._search_regex(
r'data-file=(["\'])(?P<url>(?:https?:)?//[^/]+/.+?\.m3u8.*?)\1',
webpage, 'm3u8 url', default=None, group='url')
if not playlist_url:
application = self._search_regex(
[r'data-application="([^"]+)"', r'"application"\s*:\s*"([^"]+)"'],
webpage, 'application', default=site + '_vod')
filename = self._search_regex(
[r'data-filename="([^"]+)"', r'"filename"\s*:\s*"([^"]+)"'],
webpage, 'filename')
playlist_url = 'http://vod.streamcloud.be/%s/_definst_/mp4:%s.mp4/playlist.m3u8' % (application, filename)
formats = self._extract_wowza_formats(
playlist_url, display_id, skip_protocols=['dash'])
self._sort_formats(formats)

View File

@@ -92,10 +92,12 @@ class VineIE(InfoExtractor):
username = data.get('username')
alt_title = 'Vine by %s' % username if username else None
return {
'id': video_id,
'title': data.get('description'),
'alt_title': 'Vine by %s' % username if username else None,
'title': data.get('description') or alt_title or 'Vine video',
'alt_title': alt_title,
'thumbnail': data.get('thumbnailUrl'),
'timestamp': unified_timestamp(data.get('created')),
'uploader': username,

View File

@@ -49,6 +49,10 @@ class VLiveIE(InfoExtractor):
},
}]
@classmethod
def suitable(cls, url):
return False if VLivePlaylistIE.suitable(url) else super(VLiveIE, cls).suitable(url)
def _real_extract(self, url):
video_id = self._match_id(url)
@@ -261,3 +265,54 @@ class VLiveChannelIE(InfoExtractor):
return self.playlist_result(
entries, channel_code, channel_name)
class VLivePlaylistIE(InfoExtractor):
IE_NAME = 'vlive:playlist'
_VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/video/(?P<video_id>[0-9]+)/playlist/(?P<id>[0-9]+)'
_TEST = {
'url': 'http://www.vlive.tv/video/22867/playlist/22912',
'info_dict': {
'id': '22912',
'title': 'Valentine Day Message from TWICE'
},
'playlist_mincount': 9
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id, playlist_id = mobj.group('video_id', 'id')
VIDEO_URL_TEMPLATE = 'http://www.vlive.tv/video/%s'
if self._downloader.params.get('noplaylist'):
self.to_screen(
'Downloading just video %s because of --no-playlist' % video_id)
return self.url_result(
VIDEO_URL_TEMPLATE % video_id,
ie=VLiveIE.ie_key(), video_id=video_id)
self.to_screen(
'Downloading playlist %s - add --no-playlist to just download video'
% playlist_id)
webpage = self._download_webpage(
'http://www.vlive.tv/video/%s/playlist/%s'
% (video_id, playlist_id), playlist_id)
item_ids = self._parse_json(
self._search_regex(
r'playlistVideoSeqs\s*=\s*(\[[^]]+\])', webpage,
'playlist video seqs'),
playlist_id)
entries = [
self.url_result(
VIDEO_URL_TEMPLATE % item_id, ie=VLiveIE.ie_key(),
video_id=compat_str(item_id))
for item_id in item_ids]
playlist_name = self._html_search_regex(
r'<div[^>]+class="[^"]*multicam_playlist[^>]*>\s*<h3[^>]+>([^<]+)',
webpage, 'playlist title', fatal=False)
return self.playlist_result(entries, playlist_id, playlist_name)

View File

@@ -3,6 +3,7 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
clean_html,
dict_get,
@@ -14,12 +15,21 @@ from ..utils import (
class XHamsterIE(InfoExtractor):
_VALID_URL = r'(?P<proto>https?)://(?:.+?\.)?xhamster\.com/movies/(?P<id>[0-9]+)/(?P<seo>.*?)\.html(?:\?.*)?'
_VALID_URL = r'''(?x)
https?://
(?:.+?\.)?xhamster\.com/
(?:
movies/(?P<id>\d+)/(?P<display_id>[^/]*)\.html|
videos/(?P<display_id_2>[^/]*)-(?P<id_2>\d+)
)
'''
_TESTS = [{
'url': 'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html',
'md5': '8281348b8d3c53d39fffb377d24eac4e',
'info_dict': {
'id': '1509445',
'display_id': 'femaleagent_shy_beauty_takes_the_bait',
'ext': 'mp4',
'title': 'FemaleAgent Shy beauty takes the bait',
'upload_date': '20121014',
@@ -32,6 +42,7 @@ class XHamsterIE(InfoExtractor):
'url': 'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd',
'info_dict': {
'id': '2221348',
'display_id': 'britney_spears_sexy_booty',
'ext': 'mp4',
'title': 'Britney Spears Sexy Booty',
'upload_date': '20130914',
@@ -66,26 +77,18 @@ class XHamsterIE(InfoExtractor):
# This video is visible for marcoalfa123456's friends only
'url': 'https://it.xhamster.com/movies/7263980/la_mia_vicina.html',
'only_matching': True,
}, {
# new URL schema
'url': 'https://pt.xhamster.com/videos/euro-pedal-pumping-7937821',
'only_matching': True,
}]
def _real_extract(self, url):
def extract_video_url(webpage, name):
return self._search_regex(
[r'''file\s*:\s*(?P<q>["'])(?P<mp4>.+?)(?P=q)''',
r'''<a\s+href=(?P<q>["'])(?P<mp4>.+?)(?P=q)\s+class=["']mp4Thumb''',
r'''<video[^>]+file=(?P<q>["'])(?P<mp4>.+?)(?P=q)[^>]*>'''],
webpage, name, group='mp4')
def is_hd(webpage):
return '<div class=\'icon iconHD\'' in webpage
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id') or mobj.group('id_2')
display_id = mobj.group('display_id') or mobj.group('display_id_2')
video_id = mobj.group('id')
seo = mobj.group('seo')
proto = mobj.group('proto')
mrss_url = '%s://xhamster.com/movies/%s/%s.html' % (proto, video_id, seo)
webpage = self._download_webpage(mrss_url, video_id)
webpage = self._download_webpage(url, video_id)
error = self._html_search_regex(
r'<div[^>]+id=["\']videoClosed["\'][^>]*>(.+?)</div>',
@@ -99,6 +102,39 @@ class XHamsterIE(InfoExtractor):
r'<title[^>]*>(.+?)(?:,\s*[^,]*?\s*Porn\s*[^,]*?:\s*xHamster[^<]*| - xHamster\.com)</title>'],
webpage, 'title')
formats = []
format_urls = set()
sources = self._parse_json(
self._search_regex(
r'sources\s*:\s*({.+?})\s*,?\s*\n', webpage, 'sources',
default='{}'),
video_id, fatal=False)
for format_id, format_url in sources.items():
if not isinstance(format_url, compat_str):
continue
if format_url in format_urls:
continue
format_urls.add(format_url)
formats.append({
'format_id': format_id,
'url': format_url,
'height': int_or_none(self._search_regex(
r'^(\d+)[pP]', format_id, 'height', default=None))
})
video_url = self._search_regex(
[r'''file\s*:\s*(?P<q>["'])(?P<mp4>.+?)(?P=q)''',
r'''<a\s+href=(?P<q>["'])(?P<mp4>.+?)(?P=q)\s+class=["']mp4Thumb''',
r'''<video[^>]+file=(?P<q>["'])(?P<mp4>.+?)(?P=q)[^>]*>'''],
webpage, 'video url', group='mp4', default=None)
if video_url and video_url not in format_urls:
formats.append({
'url': video_url,
})
self._sort_formats(formats)
# Only a few videos have an description
mobj = re.search(r'<span>Description: </span>([^<]+)', webpage)
description = mobj.group(1) if mobj else None
@@ -117,7 +153,8 @@ class XHamsterIE(InfoExtractor):
webpage, 'thumbnail', fatal=False, group='thumbnail')
duration = parse_duration(self._search_regex(
r'Runtime:\s*</span>\s*([\d:]+)', webpage,
[r'<[^<]+\bitemprop=["\']duration["\'][^<]+\bcontent=["\'](.+?)["\']',
r'Runtime:\s*</span>\s*([\d:]+)'], webpage,
'duration', fatal=False))
view_count = int_or_none(self._search_regex(
@@ -132,30 +169,6 @@ class XHamsterIE(InfoExtractor):
age_limit = self._rta_search(webpage)
hd = is_hd(webpage)
format_id = 'hd' if hd else 'sd'
video_url = extract_video_url(webpage, format_id)
formats = [{
'url': video_url,
'format_id': 'hd' if hd else 'sd',
'preference': 1,
}]
if not hd:
mrss_url = self._search_regex(r'<link rel="canonical" href="([^"]+)', webpage, 'mrss_url')
webpage = self._download_webpage(mrss_url + '?hd', video_id, note='Downloading HD webpage')
if is_hd(webpage):
video_url = extract_video_url(webpage, 'hd')
formats.append({
'url': video_url,
'format_id': 'hd',
'preference': 2,
})
self._sort_formats(formats)
categories_html = self._search_regex(
r'(?s)<table.+?(<span>Categories:.+?)</table>', webpage,
'categories', default=None)
@@ -164,6 +177,7 @@ class XHamsterIE(InfoExtractor):
return {
'id': video_id,
'display_id': display_id,
'title': title,
'description': description,
'upload_date': upload_date,

View File

@@ -1,14 +1,13 @@
# coding: utf-8
from __future__ import unicode_literals
import base64
from .common import InfoExtractor
from ..compat import compat_urllib_parse_unquote
from ..utils import (
ExtractorError,
float_or_none,
get_element_by_attribute,
parse_iso8601,
parse_duration,
remove_end,
)
@@ -24,6 +23,7 @@ class XuiteIE(InfoExtractor):
'id': '3860914',
'ext': 'mp3',
'title': '孤單南半球-歐德陽',
'description': '孤單南半球-歐德陽',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 247.246,
'timestamp': 1314932940,
@@ -44,7 +44,7 @@ class XuiteIE(InfoExtractor):
'duration': 596.458,
'timestamp': 1454242500,
'upload_date': '20160131',
'uploader': 'yan12125',
'uploader': '屁姥',
'uploader_id': '12158353',
'categories': ['個人短片'],
'description': 'http://download.blender.org/peach/bigbuckbunny_movies/BigBuckBunny_320x180.mp4',
@@ -72,10 +72,10 @@ class XuiteIE(InfoExtractor):
# from http://forgetfulbc.blogspot.com/2016/06/date.html
'url': 'http://vlog.xuite.net/embed/cE1xbENoLTI3NDQ3MzM2LmZsdg==?ar=0&as=0',
'info_dict': {
'id': 'cE1xbENoLTI3NDQ3MzM2LmZsdg==',
'id': '27447336',
'ext': 'mp4',
'title': '男女平權只是口號?專家解釋約會時男生是否該幫女生付錢 (中字)',
'description': 'md5:f0abdcb69df300f522a5442ef3146f2a',
'description': 'md5:1223810fa123b179083a3aed53574706',
'timestamp': 1466160960,
'upload_date': '20160617',
'uploader': 'B.C. & Lowy',
@@ -86,29 +86,9 @@ class XuiteIE(InfoExtractor):
'only_matching': True,
}]
@staticmethod
def base64_decode_utf8(data):
return base64.b64decode(data.encode('utf-8')).decode('utf-8')
@staticmethod
def base64_encode_utf8(data):
return base64.b64encode(data.encode('utf-8')).decode('utf-8')
def _extract_flv_config(self, encoded_media_id):
flv_config = self._download_xml(
'http://vlog.xuite.net/flash/player?media=%s' % encoded_media_id,
'flv config')
prop_dict = {}
for prop in flv_config.findall('./property'):
prop_id = self.base64_decode_utf8(prop.attrib['id'])
# CDATA may be empty in flv config
if not prop.text:
continue
encoded_content = self.base64_decode_utf8(prop.text)
prop_dict[prop_id] = compat_urllib_parse_unquote(encoded_content)
return prop_dict
def _real_extract(self, url):
# /play/ URLs provide embedded video URL and more metadata
url = url.replace('/embed/', '/play/')
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
@@ -121,51 +101,53 @@ class XuiteIE(InfoExtractor):
'%s returned error: %s' % (self.IE_NAME, error_msg),
expected=True)
encoded_media_id = self._search_regex(
r'attributes\.name\s*=\s*"([^"]+)"', webpage,
'encoded media id', default=None)
if encoded_media_id is None:
video_id = self._html_search_regex(
r'data-mediaid="(\d+)"', webpage, 'media id')
encoded_media_id = self.base64_encode_utf8(video_id)
flv_config = self._extract_flv_config(encoded_media_id)
media_info = self._parse_json(self._search_regex(
r'var\s+mediaInfo\s*=\s*({.*});', webpage, 'media info'), video_id)
FORMATS = {
'audio': 'mp3',
'video': 'mp4',
}
video_id = media_info['MEDIA_ID']
formats = []
for format_tag in ('src', 'hq_src'):
video_url = flv_config.get(format_tag)
for key in ('html5Url', 'html5HQUrl'):
video_url = media_info.get(key)
if not video_url:
continue
format_id = self._search_regex(
r'\bq=(.+?)\b', video_url, 'format id', default=format_tag)
r'\bq=(.+?)\b', video_url, 'format id', default=None)
formats.append({
'url': video_url,
'ext': FORMATS.get(flv_config['type'], 'mp4'),
'ext': 'mp4' if format_id.isnumeric() else format_id,
'format_id': format_id,
'height': int(format_id) if format_id.isnumeric() else None,
})
self._sort_formats(formats)
timestamp = flv_config.get('publish_datetime')
timestamp = media_info.get('PUBLISH_DATETIME')
if timestamp:
timestamp = parse_iso8601(timestamp + ' +0800', ' ')
category = flv_config.get('category')
category = media_info.get('catName')
categories = [category] if category else []
uploader = media_info.get('NICKNAME')
uploader_url = None
author_div = get_element_by_attribute('itemprop', 'author', webpage)
if author_div:
uploader = uploader or self._html_search_meta('name', author_div)
uploader_url = self._html_search_regex(
r'<link[^>]+itemprop="url"[^>]+href="([^"]+)"', author_div,
'uploader URL', fatal=False)
return {
'id': video_id,
'title': flv_config['title'],
'description': flv_config.get('description'),
'thumbnail': flv_config.get('thumb'),
'title': media_info['TITLE'],
'description': remove_end(media_info.get('metaDesc'), ' (Xuite 影音)'),
'thumbnail': media_info.get('ogImageUrl'),
'timestamp': timestamp,
'uploader': flv_config.get('author_name'),
'uploader_id': flv_config.get('author_id'),
'duration': parse_duration(flv_config.get('duration')),
'uploader': uploader,
'uploader_id': media_info.get('MEMBER_ID'),
'uploader_url': uploader_url,
'duration': float_or_none(media_info.get('MEDIA_DURATION'), 1000000),
'categories': categories,
'formats': formats,
}

View File

@@ -1,123 +0,0 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import (
float_or_none,
month_by_abbreviation,
ExtractorError,
get_element_by_attribute,
)
class YamIE(InfoExtractor):
IE_DESC = '蕃薯藤yam天空部落'
_VALID_URL = r'https?://mymedia\.yam\.com/m/(?P<id>\d+)'
_TESTS = [{
# An audio hosted on Yam
'url': 'http://mymedia.yam.com/m/2283921',
'md5': 'c011b8e262a52d5473d9c2e3c9963b9c',
'info_dict': {
'id': '2283921',
'ext': 'mp3',
'title': '發現 - 趙薇 京華煙雲主題曲',
'description': '發現 - 趙薇 京華煙雲主題曲',
'uploader_id': 'princekt',
'upload_date': '20080807',
'duration': 313.0,
}
}, {
# An external video hosted on YouTube
'url': 'http://mymedia.yam.com/m/3599430',
'md5': '03127cf10d8f35d120a9e8e52e3b17c6',
'info_dict': {
'id': 'CNpEoQlrIgA',
'ext': 'mp4',
'upload_date': '20150306',
'uploader': '新莊社大瑜伽社',
'description': 'md5:11e2e405311633ace874f2e6226c8b17',
'uploader_id': '2323agoy',
'title': '20090412陽明山二子坪-1',
},
'skip': 'Video does not exist',
}, {
'url': 'http://mymedia.yam.com/m/3598173',
'info_dict': {
'id': '3598173',
'ext': 'mp4',
},
'skip': 'cause Yam system error',
}, {
'url': 'http://mymedia.yam.com/m/3599437',
'info_dict': {
'id': '3599437',
'ext': 'mp4',
},
'skip': 'invalid YouTube URL',
}, {
'url': 'http://mymedia.yam.com/m/2373534',
'md5': '7ff74b91b7a817269d83796f8c5890b1',
'info_dict': {
'id': '2373534',
'ext': 'mp3',
'title': '林俊傑&蔡卓妍-小酒窩',
'description': 'md5:904003395a0fcce6cfb25028ff468420',
'upload_date': '20080928',
'uploader_id': 'onliner2',
}
}]
def _real_extract(self, url):
video_id = self._match_id(url)
page = self._download_webpage(url, video_id)
# Check for errors
system_msg = self._html_search_regex(
r'系統訊息(?:<br>|\n|\r)*([^<>]+)<br>', page, 'system message',
default=None)
if system_msg:
raise ExtractorError(system_msg, expected=True)
# Is it hosted externally on YouTube?
youtube_url = self._html_search_regex(
r'<embed src="(http://www.youtube.com/[^"]+)"',
page, 'YouTube url', default=None)
if youtube_url:
return self.url_result(youtube_url, 'Youtube')
title = self._html_search_regex(
r'<h1[^>]+class="heading"[^>]*>\s*(.+)\s*</h1>', page, 'title')
api_page = self._download_webpage(
'http://mymedia.yam.com/api/a/?pID=' + video_id, video_id,
note='Downloading API page')
api_result_obj = compat_urlparse.parse_qs(api_page)
info_table = get_element_by_attribute('class', 'info', page)
uploader_id = self._html_search_regex(
r'<!-- 發表作者 -->[\n ]+<a href="/([a-z0-9]+)"',
info_table, 'uploader id', fatal=False)
mobj = re.search(r'<!-- 發表於 -->(?P<mon>[A-Z][a-z]{2})\s+' +
r'(?P<day>\d{1,2}), (?P<year>\d{4})', page)
if mobj:
upload_date = '%s%02d%02d' % (
mobj.group('year'),
month_by_abbreviation(mobj.group('mon')),
int(mobj.group('day')))
else:
upload_date = None
duration = float_or_none(api_result_obj['totaltime'][0], scale=1000)
return {
'id': video_id,
'url': api_result_obj['mp3file'][0],
'title': title,
'description': self._html_search_meta('description', page),
'duration': duration,
'uploader_id': uploader_id,
'upload_date': upload_date,
}

View File

@@ -1,7 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
import itertools
import random
import re
import string
@@ -14,7 +13,6 @@ from ..utils import (
js_to_json,
str_or_none,
strip_jsonp,
urljoin,
)
@@ -222,17 +220,42 @@ class YoukuShowIE(InfoExtractor):
_VALID_URL = r'https?://list\.youku\.com/show/id_(?P<id>[0-9a-z]+)\.html'
IE_NAME = 'youku:show'
_TEST = {
_TESTS = [{
'url': 'http://list.youku.com/show/id_zc7c670be07ff11e48b3f.html',
'info_dict': {
'id': 'zc7c670be07ff11e48b3f',
'title': '花千骨 未删减',
'title': '花千骨 DVD',
'description': 'md5:a1ae6f5618571bbeb5c9821f9c81b558',
},
'playlist_count': 50,
}
}, {
# Episode number not starting from 1
'url': 'http://list.youku.com/show/id_zefbfbd70efbfbd780bef.html',
'info_dict': {
'id': 'zefbfbd70efbfbd780bef',
'title': '超级飞侠3',
'description': 'md5:275715156abebe5ccc2a1992e9d56b98',
},
'playlist_count': 24,
}, {
# Ongoing playlist. The initial page is the last one
'url': 'http://list.youku.com/show/id_za7c275ecd7b411e1a19e.html',
'only_matchine': True,
}]
_PAGE_SIZE = 40
def _extract_entries(self, playlist_data_url, show_id, note, query):
query['callback'] = 'cb'
playlist_data = self._download_json(
playlist_data_url, show_id, query=query, note=note,
transform_source=lambda s: js_to_json(strip_jsonp(s)))['html']
drama_list = (get_element_by_class('p-drama-grid', playlist_data) or
get_element_by_class('p-drama-half-row', playlist_data))
if drama_list is None:
raise ExtractorError('No episodes found')
video_urls = re.findall(r'<a[^>]+href="([^"]+)"', drama_list)
return playlist_data, [
self.url_result(self._proto_relative_url(video_url, 'http:'), YoukuIE.ie_key())
for video_url in video_urls]
def _real_extract(self, url):
show_id = self._match_id(url)
@@ -242,30 +265,29 @@ class YoukuShowIE(InfoExtractor):
page_config = self._parse_json(self._search_regex(
r'var\s+PageConfig\s*=\s*({.+});', webpage, 'page config'),
show_id, transform_source=js_to_json)
for idx in itertools.count(0):
if idx == 0:
playlist_data_url = 'http://list.youku.com/show/module'
query = {'id': page_config['showid'], 'tab': 'point'}
else:
playlist_data_url = 'http://list.youku.com/show/point'
query = {
'id': page_config['showid'],
'stage': 'reload_%d' % (self._PAGE_SIZE * idx + 1),
}
query['callback'] = 'cb'
playlist_data = self._download_json(
playlist_data_url, show_id, query=query,
first_page, initial_entries = self._extract_entries(
'http://list.youku.com/show/module', show_id,
note='Downloading initial playlist data page',
query={
'id': page_config['showid'],
'tab': 'showInfo',
})
first_page_reload_id = self._html_search_regex(
r'<div[^>]+id="(reload_\d+)', first_page, 'first page reload id')
# The first reload_id has the same items as first_page
reload_ids = re.findall('<li[^>]+data-id="([^"]+)">', first_page)
for idx, reload_id in enumerate(reload_ids):
if reload_id == first_page_reload_id:
entries.extend(initial_entries)
continue
_, new_entries = self._extract_entries(
'http://list.youku.com/show/episode', show_id,
note='Downloading playlist data page %d' % (idx + 1),
transform_source=lambda s: js_to_json(strip_jsonp(s)))['html']
video_urls = re.findall(
r'<div[^>]+class="p-thumb"[^<]+<a[^>]+href="([^"]+)"',
playlist_data)
new_entries = [
self.url_result(urljoin(url, video_url), YoukuIE.ie_key())
for video_url in video_urls]
query={
'id': page_config['showid'],
'stage': reload_id,
})
entries.extend(new_entries)
if len(new_entries) < self._PAGE_SIZE:
break
desc = self._html_search_meta('description', webpage, fatal=False)
playlist_title = desc.split(',')[0] if desc else None

View File

@@ -673,6 +673,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
},
},
# video_info is None (https://github.com/rg3/youtube-dl/issues/4421)
# YouTube Red ad is not captured for creator
{
'url': '__2ABJjxzNo',
'info_dict': {
@@ -1649,7 +1650,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
video_webpage, 'license', default=None)
m_music = re.search(
r'<h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*<ul[^>]*>\s*<li>(?P<title>.+?) by (?P<creator>.+?)(?:\(.+?\))?</li',
r'''(?x)
<h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*
<ul[^>]*>\s*
<li>(?P<title>.+?)
by (?P<creator>.+?)
(?:
\(.+?\)|
<a[^>]*
(?:
\bhref=["\']/red[^>]*>| # drop possible
>\s*Listen ad-free with YouTube Red # YouTube Red ad
)
.*?
)?</li
''',
video_webpage)
if m_music:
video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))

View File

@@ -542,7 +542,7 @@ class FFmpegFixupM3u8PP(FFmpegPostProcessor):
temp_filename = prepend_extension(filename, 'temp')
options = ['-c', 'copy', '-f', 'mp4', '-bsf:a', 'aac_adtstoasc']
self._downloader.to_screen('[ffmpeg] Fixing malformated aac bitstream in "%s"' % filename)
self._downloader.to_screen('[ffmpeg] Fixing malformed AAC bitstream in "%s"' % filename)
self.run_ffmpeg(filename, temp_filename, options)
os.remove(encodeFilename(filename))

View File

@@ -365,9 +365,9 @@ def get_elements_by_attribute(attribute, value, html, escape_value=True):
retlist = []
for m in re.finditer(r'''(?xs)
<([a-zA-Z0-9:._-]+)
(?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'))*?
(?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
\s+%s=['"]?%s['"]?
(?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'))*?
(?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
\s*>
(?P<content>.*?)
</\1>

View File

@@ -1,3 +1,3 @@
from __future__ import unicode_literals
__version__ = '2017.07.02'
__version__ = '2017.07.23'