mirror of
https://github.com/ytdl-org/youtube-dl.git
synced 2025-12-11 08:32:44 +01:00
Compare commits
90 Commits
2017.05.29
...
2017.06.23
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
170719414d | ||
|
|
38dad4737f | ||
|
|
ddbb4c5c3e | ||
|
|
fa3ea7223a | ||
|
|
0f4a5a73e7 | ||
|
|
18166bb8e8 | ||
|
|
d4893e764b | ||
|
|
97b6e30113 | ||
|
|
9be9ec5980 | ||
|
|
048b55804d | ||
|
|
6ce79d7ac0 | ||
|
|
1641ca402d | ||
|
|
85cbcede5b | ||
|
|
a1de83e5f0 | ||
|
|
fee00b3884 | ||
|
|
2d2132ac6e | ||
|
|
cc2ffe5afe | ||
|
|
560050669b | ||
|
|
eaa006d1bd | ||
|
|
a6f29820c6 | ||
|
|
1433734c35 | ||
|
|
aefce8e6dc | ||
|
|
8b6ac49ecc | ||
|
|
b08e235f09 | ||
|
|
be80986ed9 | ||
|
|
473e87064b | ||
|
|
4f90d2aeac | ||
|
|
b230fefc3c | ||
|
|
96a2daa1ee | ||
|
|
0ea6efbb7a | ||
|
|
6a9cb29509 | ||
|
|
ca27037171 | ||
|
|
0bf4b71b75 | ||
|
|
5215f45327 | ||
|
|
0a268c6e11 | ||
|
|
7dd5415cd0 | ||
|
|
b5dc33daa9 | ||
|
|
97fa1f8dc4 | ||
|
|
b081f53b08 | ||
|
|
cb1e6d8985 | ||
|
|
9932ac5c58 | ||
|
|
bf87c36c93 | ||
|
|
b4a3d461e4 | ||
|
|
72b409559c | ||
|
|
534863e057 | ||
|
|
16bc958287 | ||
|
|
624bd0104c | ||
|
|
28a4d6cce8 | ||
|
|
2ae2ffda5e | ||
|
|
70e7967202 | ||
|
|
6e999fbc12 | ||
|
|
7409af9eb3 | ||
|
|
4e3637034c | ||
|
|
1afd0b0da7 | ||
|
|
7515830422 | ||
|
|
f5521ea209 | ||
|
|
34646967ba | ||
|
|
e4d2e76d8e | ||
|
|
87f5646937 | ||
|
|
cc69a3de1b | ||
|
|
15aeeb1188 | ||
|
|
1693bebe4d | ||
|
|
4244a13a1d | ||
|
|
931adf8cc1 | ||
|
|
c996943418 | ||
|
|
76e6378358 | ||
|
|
a355b57f58 | ||
|
|
1508da30c2 | ||
|
|
eb703e5380 | ||
|
|
0a3924e746 | ||
|
|
e1db730d86 | ||
|
|
537191826f | ||
|
|
130880ba48 | ||
|
|
f8ba3fda4d | ||
|
|
e1b90cc3db | ||
|
|
43e6579558 | ||
|
|
6d923aab35 | ||
|
|
62bafabc09 | ||
|
|
9edcdac90c | ||
|
|
cd138d8bd4 | ||
|
|
cd750b731c | ||
|
|
4bede0d8f5 | ||
|
|
f129c3f349 | ||
|
|
39d4c1be4d | ||
|
|
f7a747ce59 | ||
|
|
4489d41816 | ||
|
|
87b5184a0d | ||
|
|
c56ad5c975 | ||
|
|
6b7ce85cdc | ||
|
|
d10d0e3cf8 |
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@@ -6,8 +6,8 @@
|
||||
|
||||
---
|
||||
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.05.29*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.05.29**
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.06.23*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.06.23**
|
||||
|
||||
### Before submitting an *issue* make sure you have:
|
||||
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||
@@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2017.05.29
|
||||
[debug] youtube-dl version 2017.06.23
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
||||
3
AUTHORS
3
AUTHORS
@@ -217,3 +217,6 @@ Marvin Ewald
|
||||
Frédéric Bournival
|
||||
Timendum
|
||||
gritstub
|
||||
Adam Voss
|
||||
Mike Fährmann
|
||||
Jan Kundrát
|
||||
|
||||
95
ChangeLog
95
ChangeLog
@@ -1,3 +1,98 @@
|
||||
version 2017.06.23
|
||||
|
||||
Core
|
||||
* [adobepass] Fix extraction on older python 2.6
|
||||
|
||||
Extractors
|
||||
* [youtube] Adapt to new automatic captions rendition (#13467)
|
||||
* [hgtv.com:show] Relax video config regular expression (#13279, #13461)
|
||||
* [drtuber] Fix formats extraction (#12058)
|
||||
* [youporn] Fix upload date extraction
|
||||
* [youporn] Improve formats extraction
|
||||
* [youporn] Fix title extraction (#13456)
|
||||
* [googledrive] Fix formats sorting (#13443)
|
||||
* [watchindianporn] Fix extraction (#13411, #13415)
|
||||
+ [vimeo] Add fallback mp4 extension for original format
|
||||
+ [ruv] Add support for ruv.is (#13396)
|
||||
* [viu] Fix extraction on older python 2.6
|
||||
* [pandora.tv] Fix upload_date extraction (#12846)
|
||||
+ [asiancrush] Add support for asiancrush.com (#13420)
|
||||
|
||||
|
||||
version 2017.06.18
|
||||
|
||||
Core
|
||||
* [downloader/common] Use utils.shell_quote for debug command line
|
||||
* [utils] Use compat_shlex_quote in shell_quote
|
||||
* [postprocessor/execafterdownload] Encode command line (#13407)
|
||||
* [compat] Fix compat_shlex_quote on Windows (#5889, #10254)
|
||||
* [postprocessor/metadatafromtitle] Fix missing optional meta fields processing
|
||||
in --metadata-from-title (#13408)
|
||||
* [extractor/common] Fix json dumping with --geo-bypass
|
||||
+ [extractor/common] Improve jwplayer subtitles extraction
|
||||
+ [extractor/common] Improve jwplayer formats extraction (#13379)
|
||||
|
||||
Extractors
|
||||
* [polskieradio] Fix extraction (#13392)
|
||||
+ [xfileshare] Add support for fastvideo.me (#13385)
|
||||
* [bilibili] Fix extraction of videos with double quotes in titles (#13387)
|
||||
* [4tube] Fix extraction (#13381, #13382)
|
||||
+ [disney] Add support for disneychannel.de (#13383)
|
||||
* [npo] Improve URL regular expression (#13376)
|
||||
+ [corus] Add support for showcase.ca
|
||||
+ [corus] Add support for history.ca (#13359)
|
||||
|
||||
|
||||
version 2017.06.12
|
||||
|
||||
Core
|
||||
* [utils] Handle compat_HTMLParseError in extract_attributes (#13349)
|
||||
+ [compat] Introduce compat_HTMLParseError
|
||||
* [utils] Improve unified_timestamp
|
||||
* [extractor/generic] Ensure format id is unicode string
|
||||
* [extractor/common] Return unicode string from _match_id
|
||||
+ [YoutubeDL] Sanitize more fields (#13313)
|
||||
|
||||
Extractors
|
||||
+ [xfileshare] Add support for rapidvideo.tv (#13348)
|
||||
* [xfileshare] Modernize and pass Referer
|
||||
+ [rutv] Add support for testplayer.vgtrk.com (#13347)
|
||||
+ [newgrounds] Extract more metadata (#13232)
|
||||
+ [newgrounds:playlist] Add support for playlists (#10611)
|
||||
* [newgrounds] Improve formats and uploader extraction (#13346)
|
||||
* [msn] Fix formats extraction
|
||||
* [turbo] Ensure format id is string
|
||||
* [sexu] Ensure height is int
|
||||
* [jove] Ensure comment count is int
|
||||
* [golem] Ensure format id is string
|
||||
* [gfycat] Ensure filesize is int
|
||||
* [foxgay] Ensure height is int
|
||||
* [flickr] Ensure format id is string
|
||||
* [sohu] Fix numeric fields
|
||||
* [safari] Improve authentication detection (#13319)
|
||||
* [liveleak] Ensure height is int (#13313)
|
||||
* [streamango] Make title optional (#13292)
|
||||
* [rtlnl] Improve URL regular expression (#13295)
|
||||
* [tvplayer] Fix extraction (#13291)
|
||||
|
||||
|
||||
version 2017.06.05
|
||||
|
||||
Core
|
||||
* [YoutubeDL] Don't emit ANSI escape codes on Windows (#13270)
|
||||
|
||||
Extractors
|
||||
+ [bandcamp:weekly] Add support for bandcamp weekly (#12758)
|
||||
* [pornhub:playlist] Fix extraction (#13281)
|
||||
- [godtv] Remove extractor (#13175)
|
||||
* [safari] Fix typo (#13252)
|
||||
* [youtube] Improve chapters extraction (#13247)
|
||||
* [1tv] Lower preference for HTTP formats (#13246)
|
||||
* [francetv] Relax URL regular expression
|
||||
* [drbonanza] Fix extraction (#13231)
|
||||
* [packtpub] Fix authentication (#13240)
|
||||
|
||||
|
||||
version 2017.05.29
|
||||
|
||||
Extractors
|
||||
|
||||
2
Makefile
2
Makefile
@@ -101,7 +101,7 @@ youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-
|
||||
--exclude '*.pyc' \
|
||||
--exclude '*.pyo' \
|
||||
--exclude '*~' \
|
||||
--exclude '__pycache' \
|
||||
--exclude '__pycache__' \
|
||||
--exclude '.git' \
|
||||
--exclude 'testdata' \
|
||||
--exclude 'docs/_build' \
|
||||
|
||||
39
README.md
39
README.md
@@ -145,18 +145,18 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
||||
--max-views COUNT Do not download any videos with more than
|
||||
COUNT views
|
||||
--match-filter FILTER Generic video filter. Specify any key (see
|
||||
help for -o for a list of available keys)
|
||||
to match if the key is present, !key to
|
||||
check if the key is not present, key >
|
||||
NUMBER (like "comment_count > 12", also
|
||||
works with >=, <, <=, !=, =) to compare
|
||||
against a number, key = 'LITERAL' (like
|
||||
"uploader = 'Mike Smith'", also works with
|
||||
!=) to match against a string literal and &
|
||||
to require multiple matches. Values which
|
||||
are not known are excluded unless you put a
|
||||
question mark (?) after the operator. For
|
||||
example, to only match videos that have
|
||||
the "OUTPUT TEMPLATE" for a list of
|
||||
available keys) to match if the key is
|
||||
present, !key to check if the key is not
|
||||
present, key > NUMBER (like "comment_count
|
||||
> 12", also works with >=, <, <=, !=, =) to
|
||||
compare against a number, key = 'LITERAL'
|
||||
(like "uploader = 'Mike Smith'", also works
|
||||
with !=) to match against a string literal
|
||||
and & to require multiple matches. Values
|
||||
which are not known are excluded unless you
|
||||
put a question mark (?) after the operator.
|
||||
For example, to only match videos that have
|
||||
been liked more than 100 times and disliked
|
||||
less than 50 times (or the dislike
|
||||
functionality is not available at the given
|
||||
@@ -277,8 +277,8 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
||||
--get-filename Simulate, quiet but print output filename
|
||||
--get-format Simulate, quiet but print output format
|
||||
-j, --dump-json Simulate, quiet but print JSON information.
|
||||
See --output for a description of available
|
||||
keys.
|
||||
See the "OUTPUT TEMPLATE" for a description
|
||||
of available keys.
|
||||
-J, --dump-single-json Simulate, quiet but print JSON information
|
||||
for each command-line argument. If the URL
|
||||
refers to a playlist, dump the whole
|
||||
@@ -474,7 +474,10 @@ machine twitch login my_twitch_account_name password my_twitch_password
|
||||
```
|
||||
To activate authentication with the `.netrc` file you should pass `--netrc` to youtube-dl or place it in the [configuration file](#configuration).
|
||||
|
||||
On Windows you may also need to setup the `%HOME%` environment variable manually.
|
||||
On Windows you may also need to setup the `%HOME%` environment variable manually. For example:
|
||||
```
|
||||
set HOME=%USERPROFILE%
|
||||
```
|
||||
|
||||
# OUTPUT TEMPLATE
|
||||
|
||||
@@ -532,13 +535,14 @@ The basic usage is not to set any template arguments when downloading a single f
|
||||
- `playlist_id` (string): Playlist identifier
|
||||
- `playlist_title` (string): Playlist title
|
||||
|
||||
|
||||
Available for the video that belongs to some logical chapter or section:
|
||||
|
||||
- `chapter` (string): Name or title of the chapter the video belongs to
|
||||
- `chapter_number` (numeric): Number of the chapter the video belongs to
|
||||
- `chapter_id` (string): Id of the chapter the video belongs to
|
||||
|
||||
Available for the video that is an episode of some series or programme:
|
||||
|
||||
- `series` (string): Title of the series or programme the video episode belongs to
|
||||
- `season` (string): Title of the season the video episode belongs to
|
||||
- `season_number` (numeric): Number of the season the video episode belongs to
|
||||
@@ -548,6 +552,7 @@ Available for the video that is an episode of some series or programme:
|
||||
- `episode_id` (string): Id of the video episode
|
||||
|
||||
Available for the media that is a track or a part of a music album:
|
||||
|
||||
- `track` (string): Title of the track
|
||||
- `track_number` (numeric): Number of the track within an album or a disc
|
||||
- `track_id` (string): Id of the track
|
||||
@@ -649,7 +654,7 @@ Also filtering work for comparisons `=` (equals), `!=` (not equals), `^=` (begin
|
||||
- `acodec`: Name of the audio codec in use
|
||||
- `vcodec`: Name of the video codec in use
|
||||
- `container`: Name of the container format
|
||||
- `protocol`: The protocol that will be used for the actual download, lower-case (`http`, `https`, `rtsp`, `rtmp`, `rtmpe`, `mms`, `f4m`, `ism`, `m3u8`, or `m3u8_native`)
|
||||
- `protocol`: The protocol that will be used for the actual download, lower-case (`http`, `https`, `rtsp`, `rtmp`, `rtmpe`, `mms`, `f4m`, `ism`, `http_dash_segments`, `m3u8`, or `m3u8_native`)
|
||||
- `format_id`: A short description of the format
|
||||
|
||||
Note that none of the aforementioned meta fields are guaranteed to be present since this solely depends on the metadata obtained by particular extractor, i.e. the metadata offered by the video hoster.
|
||||
|
||||
@@ -8,7 +8,7 @@ import re
|
||||
ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
README_FILE = os.path.join(ROOT_DIR, 'README.md')
|
||||
|
||||
PREFIX = '''%YOUTUBE-DL(1)
|
||||
PREFIX = r'''%YOUTUBE-DL(1)
|
||||
|
||||
# NAME
|
||||
|
||||
|
||||
@@ -67,6 +67,8 @@
|
||||
- **arte.tv:info**
|
||||
- **arte.tv:magazine**
|
||||
- **arte.tv:playlist**
|
||||
- **AsianCrush**
|
||||
- **AsianCrushPlaylist**
|
||||
- **AtresPlayer**
|
||||
- **ATTTechChannel**
|
||||
- **ATVAt**
|
||||
@@ -87,6 +89,7 @@
|
||||
- **bambuser:channel**
|
||||
- **Bandcamp**
|
||||
- **Bandcamp:album**
|
||||
- **Bandcamp:weekly**
|
||||
- **bangumi.bilibili.com**: BiliBili番剧
|
||||
- **bbc**: BBC
|
||||
- **bbc.co.uk**: BBC iPlayer
|
||||
@@ -310,7 +313,6 @@
|
||||
- **Go**
|
||||
- **Go90**
|
||||
- **GodTube**
|
||||
- **GodTV**
|
||||
- **Golem**
|
||||
- **GoogleDrive**
|
||||
- **Goshgay**
|
||||
@@ -512,6 +514,7 @@
|
||||
- **netease:song**: 网易云音乐
|
||||
- **Netzkino**
|
||||
- **Newgrounds**
|
||||
- **NewgroundsPlaylist**
|
||||
- **Newstube**
|
||||
- **NextMedia**: 蘋果日報
|
||||
- **NextMediaActionNews**: 蘋果日報 - 動新聞
|
||||
@@ -685,6 +688,7 @@
|
||||
- **rutube:person**: Rutube person videos
|
||||
- **RUTV**: RUTV.RU
|
||||
- **Ruutu**
|
||||
- **Ruv**
|
||||
- **safari**: safaribooksonline.com online video
|
||||
- **safari:api**
|
||||
- **safari:course**: safaribooksonline.com online courses
|
||||
@@ -974,7 +978,7 @@
|
||||
- **WSJArticle**
|
||||
- **XBef**
|
||||
- **XboxClips**
|
||||
- **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE, Vid ABC, VidBom, vidlo
|
||||
- **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE, Vid ABC, VidBom, vidlo, RapidVideo.TV, FastVideo.me
|
||||
- **XHamster**
|
||||
- **XHamsterEmbed**
|
||||
- **xiami:album**: 虾米音乐 - 专辑
|
||||
|
||||
@@ -340,6 +340,7 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(unified_timestamp('May 16, 2016 11:15 PM'), 1463440500)
|
||||
self.assertEqual(unified_timestamp('Feb 7, 2016 at 6:35 pm'), 1454870100)
|
||||
self.assertEqual(unified_timestamp('2017-03-30T17:52:41Q'), 1490896361)
|
||||
self.assertEqual(unified_timestamp('Sep 11, 2013 | 5:49 AM'), 1378878540)
|
||||
|
||||
def test_determine_ext(self):
|
||||
self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4')
|
||||
@@ -915,6 +916,8 @@ class TestUtil(unittest.TestCase):
|
||||
supports_outside_bmp = False
|
||||
if supports_outside_bmp:
|
||||
self.assertEqual(extract_attributes('<e x="Smile 😀!">'), {'x': 'Smile \U0001f600!'})
|
||||
# Malformed HTML should not break attributes extraction on older Python
|
||||
self.assertEqual(extract_attributes('<mal"formed/>'), {})
|
||||
|
||||
def test_clean_html(self):
|
||||
self.assertEqual(clean_html('a:\nb'), 'a: b')
|
||||
|
||||
@@ -254,6 +254,13 @@ class TestYoutubeChapters(unittest.TestCase):
|
||||
'title': '3 - Из серпов луны...[Iz serpov luny]',
|
||||
}]
|
||||
),
|
||||
(
|
||||
# https://www.youtube.com/watch?v=xZW70zEasOk
|
||||
# time point more than duration
|
||||
'''● LCS Spring finals: Saturday and Sunday from <a href="#" onclick="yt.www.watch.player.seekTo(13*60+30);return false;">13:30</a> outside the venue! <br />● PAX East: Fri, Sat & Sun - more info in tomorrows video on the main channel!''',
|
||||
283,
|
||||
[]
|
||||
),
|
||||
]
|
||||
|
||||
def test_youtube_chapters(self):
|
||||
|
||||
@@ -58,6 +58,7 @@ from .utils import (
|
||||
format_bytes,
|
||||
formatSeconds,
|
||||
GeoRestrictedError,
|
||||
int_or_none,
|
||||
ISO3166Utils,
|
||||
locked_file,
|
||||
make_HTTPS_handler,
|
||||
@@ -302,6 +303,17 @@ class YoutubeDL(object):
|
||||
postprocessor.
|
||||
"""
|
||||
|
||||
_NUMERIC_FIELDS = set((
|
||||
'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
|
||||
'timestamp', 'upload_year', 'upload_month', 'upload_day',
|
||||
'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
|
||||
'average_rating', 'comment_count', 'age_limit',
|
||||
'start_time', 'end_time',
|
||||
'chapter_number', 'season_number', 'episode_number',
|
||||
'track_number', 'disc_number', 'release_year',
|
||||
'playlist_index',
|
||||
))
|
||||
|
||||
params = None
|
||||
_ies = []
|
||||
_pps = []
|
||||
@@ -498,24 +510,25 @@ class YoutubeDL(object):
|
||||
def to_console_title(self, message):
|
||||
if not self.params.get('consoletitle', False):
|
||||
return
|
||||
if compat_os_name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
|
||||
# c_wchar_p() might not be necessary if `message` is
|
||||
# already of type unicode()
|
||||
ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
|
||||
if compat_os_name == 'nt':
|
||||
if ctypes.windll.kernel32.GetConsoleWindow():
|
||||
# c_wchar_p() might not be necessary if `message` is
|
||||
# already of type unicode()
|
||||
ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
|
||||
elif 'TERM' in os.environ:
|
||||
self._write_string('\033]0;%s\007' % message, self._screen_file)
|
||||
|
||||
def save_console_title(self):
|
||||
if not self.params.get('consoletitle', False):
|
||||
return
|
||||
if 'TERM' in os.environ:
|
||||
if compat_os_name != 'nt' and 'TERM' in os.environ:
|
||||
# Save the title on stack
|
||||
self._write_string('\033[22;0t', self._screen_file)
|
||||
|
||||
def restore_console_title(self):
|
||||
if not self.params.get('consoletitle', False):
|
||||
return
|
||||
if 'TERM' in os.environ:
|
||||
if compat_os_name != 'nt' and 'TERM' in os.environ:
|
||||
# Restore the title from stack
|
||||
self._write_string('\033[23;0t', self._screen_file)
|
||||
|
||||
@@ -638,22 +651,11 @@ class YoutubeDL(object):
|
||||
r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
|
||||
outtmpl)
|
||||
|
||||
NUMERIC_FIELDS = set((
|
||||
'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
|
||||
'timestamp', 'upload_year', 'upload_month', 'upload_day',
|
||||
'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
|
||||
'average_rating', 'comment_count', 'age_limit',
|
||||
'start_time', 'end_time',
|
||||
'chapter_number', 'season_number', 'episode_number',
|
||||
'track_number', 'disc_number', 'release_year',
|
||||
'playlist_index',
|
||||
))
|
||||
|
||||
# Missing numeric fields used together with integer presentation types
|
||||
# in format specification will break the argument substitution since
|
||||
# string 'NA' is returned for missing fields. We will patch output
|
||||
# template for missing fields to meet string presentation type.
|
||||
for numeric_field in NUMERIC_FIELDS:
|
||||
for numeric_field in self._NUMERIC_FIELDS:
|
||||
if numeric_field not in template_dict:
|
||||
# As of [1] format syntax is:
|
||||
# %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
|
||||
@@ -1344,9 +1346,28 @@ class YoutubeDL(object):
|
||||
if 'title' not in info_dict:
|
||||
raise ExtractorError('Missing "title" field in extractor result')
|
||||
|
||||
if not isinstance(info_dict['id'], compat_str):
|
||||
self.report_warning('"id" field is not a string - forcing string conversion')
|
||||
info_dict['id'] = compat_str(info_dict['id'])
|
||||
def report_force_conversion(field, field_not, conversion):
|
||||
self.report_warning(
|
||||
'"%s" field is not %s - forcing %s conversion, there is an error in extractor'
|
||||
% (field, field_not, conversion))
|
||||
|
||||
def sanitize_string_field(info, string_field):
|
||||
field = info.get(string_field)
|
||||
if field is None or isinstance(field, compat_str):
|
||||
return
|
||||
report_force_conversion(string_field, 'a string', 'string')
|
||||
info[string_field] = compat_str(field)
|
||||
|
||||
def sanitize_numeric_fields(info):
|
||||
for numeric_field in self._NUMERIC_FIELDS:
|
||||
field = info.get(numeric_field)
|
||||
if field is None or isinstance(field, compat_numeric_types):
|
||||
continue
|
||||
report_force_conversion(numeric_field, 'numeric', 'int')
|
||||
info[numeric_field] = int_or_none(field)
|
||||
|
||||
sanitize_string_field(info_dict, 'id')
|
||||
sanitize_numeric_fields(info_dict)
|
||||
|
||||
if 'playlist' not in info_dict:
|
||||
# It isn't part of a playlist
|
||||
@@ -1434,6 +1455,8 @@ class YoutubeDL(object):
|
||||
if 'url' not in format:
|
||||
raise ExtractorError('Missing "url" key in result (index %d)' % i)
|
||||
|
||||
sanitize_string_field(format, 'format_id')
|
||||
sanitize_numeric_fields(format)
|
||||
format['url'] = sanitize_url(format['url'])
|
||||
|
||||
if format.get('format_id') is None:
|
||||
|
||||
@@ -2322,6 +2322,19 @@ try:
|
||||
except ImportError: # Python 2
|
||||
from HTMLParser import HTMLParser as compat_HTMLParser
|
||||
|
||||
try: # Python 2
|
||||
from HTMLParser import HTMLParseError as compat_HTMLParseError
|
||||
except ImportError: # Python <3.4
|
||||
try:
|
||||
from html.parser import HTMLParseError as compat_HTMLParseError
|
||||
except ImportError: # Python >3.4
|
||||
|
||||
# HTMLParseError has been deprecated in Python 3.3 and removed in
|
||||
# Python 3.5. Introducing dummy exception for Python >3.5 for compatible
|
||||
# and uniform cross-version exceptiong handling
|
||||
class compat_HTMLParseError(Exception):
|
||||
pass
|
||||
|
||||
try:
|
||||
from subprocess import DEVNULL
|
||||
compat_subprocess_get_DEVNULL = lambda: DEVNULL
|
||||
@@ -2604,14 +2617,22 @@ except ImportError: # Python 2
|
||||
parsed_result[name] = [value]
|
||||
return parsed_result
|
||||
|
||||
try:
|
||||
from shlex import quote as compat_shlex_quote
|
||||
except ImportError: # Python < 3.3
|
||||
|
||||
compat_os_name = os._name if os.name == 'java' else os.name
|
||||
|
||||
|
||||
if compat_os_name == 'nt':
|
||||
def compat_shlex_quote(s):
|
||||
if re.match(r'^[-_\w./]+$', s):
|
||||
return s
|
||||
else:
|
||||
return "'" + s.replace("'", "'\"'\"'") + "'"
|
||||
return s if re.match(r'^[-_\w./]+$', s) else '"%s"' % s.replace('"', '\\"')
|
||||
else:
|
||||
try:
|
||||
from shlex import quote as compat_shlex_quote
|
||||
except ImportError: # Python < 3.3
|
||||
def compat_shlex_quote(s):
|
||||
if re.match(r'^[-_\w./]+$', s):
|
||||
return s
|
||||
else:
|
||||
return "'" + s.replace("'", "'\"'\"'") + "'"
|
||||
|
||||
|
||||
try:
|
||||
@@ -2636,9 +2657,6 @@ def compat_ord(c):
|
||||
return ord(c)
|
||||
|
||||
|
||||
compat_os_name = os._name if os.name == 'java' else os.name
|
||||
|
||||
|
||||
if sys.version_info >= (3, 0):
|
||||
compat_getenv = os.getenv
|
||||
compat_expanduser = os.path.expanduser
|
||||
@@ -2882,6 +2900,7 @@ else:
|
||||
|
||||
|
||||
__all__ = [
|
||||
'compat_HTMLParseError',
|
||||
'compat_HTMLParser',
|
||||
'compat_HTTPError',
|
||||
'compat_basestring',
|
||||
|
||||
@@ -8,10 +8,11 @@ import random
|
||||
|
||||
from ..compat import compat_os_name
|
||||
from ..utils import (
|
||||
decodeArgument,
|
||||
encodeFilename,
|
||||
error_to_compat_str,
|
||||
decodeArgument,
|
||||
format_bytes,
|
||||
shell_quote,
|
||||
timeconvert,
|
||||
)
|
||||
|
||||
@@ -381,10 +382,5 @@ class FileDownloader(object):
|
||||
if exe is None:
|
||||
exe = os.path.basename(str_args[0])
|
||||
|
||||
try:
|
||||
import pipes
|
||||
shell_quote = lambda args: ' '.join(map(pipes.quote, str_args))
|
||||
except ImportError:
|
||||
shell_quote = repr
|
||||
self.to_screen('[debug] %s command line: %s' % (
|
||||
exe, shell_quote(str_args)))
|
||||
|
||||
@@ -22,7 +22,7 @@ class ABCOTVSIE(InfoExtractor):
|
||||
'display_id': 'east-bay-museum-celebrates-vintage-synthesizers',
|
||||
'ext': 'mp4',
|
||||
'title': 'East Bay museum celebrates vintage synthesizers',
|
||||
'description': 'md5:a4f10fb2f2a02565c1749d4adbab4b10',
|
||||
'description': 'md5:24ed2bd527096ec2a5c67b9d5a9005f3',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'timestamp': 1421123075,
|
||||
'upload_date': '20150113',
|
||||
|
||||
@@ -6,7 +6,10 @@ import time
|
||||
import xml.etree.ElementTree as etree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..compat import (
|
||||
compat_kwargs,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
unescapeHTML,
|
||||
urlencode_postdata,
|
||||
@@ -1317,7 +1320,8 @@ class AdobePassIE(InfoExtractor):
|
||||
headers = kwargs.get('headers', {})
|
||||
headers.update(self.geo_verification_headers())
|
||||
kwargs['headers'] = headers
|
||||
return super(AdobePassIE, self)._download_webpage_handle(*args, **kwargs)
|
||||
return super(AdobePassIE, self)._download_webpage_handle(
|
||||
*args, **compat_kwargs(kwargs))
|
||||
|
||||
@staticmethod
|
||||
def _get_mvpd_resource(provider_id, title, guid, rating):
|
||||
|
||||
93
youtube_dl/extractor/asiancrush.py
Normal file
93
youtube_dl/extractor/asiancrush.py
Normal file
@@ -0,0 +1,93 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .kaltura import KalturaIE
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
remove_end,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class AsianCrushIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?asiancrush\.com/video/(?:[^/]+/)?0+(?P<id>\d+)v\b'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.asiancrush.com/video/012869v/women-who-flirt/',
|
||||
'md5': 'c3b740e48d0ba002a42c0b72857beae6',
|
||||
'info_dict': {
|
||||
'id': '1_y4tmjm5r',
|
||||
'ext': 'mp4',
|
||||
'title': 'Women Who Flirt',
|
||||
'description': 'md5:3db14e9186197857e7063522cb89a805',
|
||||
'timestamp': 1496936429,
|
||||
'upload_date': '20170608',
|
||||
'uploader_id': 'craig@crifkin.com',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.asiancrush.com/video/she-was-pretty/011886v-pretty-episode-3/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
data = self._download_json(
|
||||
'https://www.asiancrush.com/wp-admin/admin-ajax.php', video_id,
|
||||
data=urlencode_postdata({
|
||||
'postid': video_id,
|
||||
'action': 'get_channel_kaltura_vars',
|
||||
}))
|
||||
|
||||
entry_id = data['entry_id']
|
||||
|
||||
return self.url_result(
|
||||
'kaltura:%s:%s' % (data['partner_id'], entry_id),
|
||||
ie=KalturaIE.ie_key(), video_id=entry_id,
|
||||
video_title=data.get('vid_label'))
|
||||
|
||||
|
||||
class AsianCrushPlaylistIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?asiancrush\.com/series/0+(?P<id>\d+)s\b'
|
||||
_TEST = {
|
||||
'url': 'https://www.asiancrush.com/series/012481s/scholar-walks-night/',
|
||||
'info_dict': {
|
||||
'id': '12481',
|
||||
'title': 'Scholar Who Walks the Night',
|
||||
'description': 'md5:7addd7c5132a09fd4741152d96cce886',
|
||||
},
|
||||
'playlist_count': 20,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
entries = []
|
||||
|
||||
for mobj in re.finditer(
|
||||
r'<a[^>]+href=(["\'])(?P<url>%s.*?)\1[^>]*>' % AsianCrushIE._VALID_URL,
|
||||
webpage):
|
||||
attrs = extract_attributes(mobj.group(0))
|
||||
if attrs.get('class') == 'clearfix':
|
||||
entries.append(self.url_result(
|
||||
mobj.group('url'), ie=AsianCrushIE.ie_key()))
|
||||
|
||||
title = remove_end(
|
||||
self._html_search_regex(
|
||||
r'(?s)<h1\b[^>]\bid=["\']movieTitle[^>]+>(.+?)</h1>', webpage,
|
||||
'title', default=None) or self._og_search_title(
|
||||
webpage, default=None) or self._html_search_meta(
|
||||
'twitter:title', webpage, 'title',
|
||||
default=None) or self._search_regex(
|
||||
r'<title>([^<]+)</title>', webpage, 'title', fatal=False),
|
||||
' | AsianCrush')
|
||||
|
||||
description = self._og_search_description(
|
||||
webpage, default=None) or self._html_search_meta(
|
||||
'twitter:description', webpage, 'description', fatal=False)
|
||||
|
||||
return self.playlist_result(entries, playlist_id, title, description)
|
||||
@@ -14,14 +14,16 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
KNOWN_EXTENSIONS,
|
||||
parse_filesize,
|
||||
unescapeHTML,
|
||||
update_url_query,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class BandcampIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://.*?\.bandcamp\.com/track/(?P<title>.*)'
|
||||
_VALID_URL = r'https?://.*?\.bandcamp\.com/track/(?P<title>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song',
|
||||
'md5': 'c557841d5e50261777a6585648adf439',
|
||||
@@ -155,7 +157,7 @@ class BandcampIE(InfoExtractor):
|
||||
|
||||
class BandcampAlbumIE(InfoExtractor):
|
||||
IE_NAME = 'Bandcamp:album'
|
||||
_VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<album_id>[^?#]+)|/?(?:$|[?#]))'
|
||||
_VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<album_id>[^/?#&]+))?'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
|
||||
@@ -222,6 +224,12 @@ class BandcampAlbumIE(InfoExtractor):
|
||||
'playlist_count': 2,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return (False
|
||||
if BandcampWeeklyIE.suitable(url) or BandcampIE.suitable(url)
|
||||
else super(BandcampAlbumIE, cls).suitable(url))
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
uploader_id = mobj.group('subdomain')
|
||||
@@ -250,3 +258,92 @@ class BandcampAlbumIE(InfoExtractor):
|
||||
'title': title,
|
||||
'entries': entries,
|
||||
}
|
||||
|
||||
|
||||
class BandcampWeeklyIE(InfoExtractor):
|
||||
IE_NAME = 'Bandcamp:weekly'
|
||||
_VALID_URL = r'https?://(?:www\.)?bandcamp\.com/?\?(?:.*?&)?show=(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://bandcamp.com/?show=224',
|
||||
'md5': 'b00df799c733cf7e0c567ed187dea0fd',
|
||||
'info_dict': {
|
||||
'id': '224',
|
||||
'ext': 'opus',
|
||||
'title': 'BC Weekly April 4th 2017 - Magic Moments',
|
||||
'description': 'md5:5d48150916e8e02d030623a48512c874',
|
||||
'duration': 5829.77,
|
||||
'release_date': '20170404',
|
||||
'series': 'Bandcamp Weekly',
|
||||
'episode': 'Magic Moments',
|
||||
'episode_number': 208,
|
||||
'episode_id': '224',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://bandcamp.com/?blah/blah@&show=228',
|
||||
'only_matching': True
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
blob = self._parse_json(
|
||||
self._search_regex(
|
||||
r'data-blob=(["\'])(?P<blob>{.+?})\1', webpage,
|
||||
'blob', group='blob'),
|
||||
video_id, transform_source=unescapeHTML)
|
||||
|
||||
show = blob['bcw_show']
|
||||
|
||||
# This is desired because any invalid show id redirects to `bandcamp.com`
|
||||
# which happens to expose the latest Bandcamp Weekly episode.
|
||||
show_id = int_or_none(show.get('show_id')) or int_or_none(video_id)
|
||||
|
||||
formats = []
|
||||
for format_id, format_url in show['audio_stream'].items():
|
||||
if not isinstance(format_url, compat_str):
|
||||
continue
|
||||
for known_ext in KNOWN_EXTENSIONS:
|
||||
if known_ext in format_id:
|
||||
ext = known_ext
|
||||
break
|
||||
else:
|
||||
ext = None
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': format_url,
|
||||
'ext': ext,
|
||||
'vcodec': 'none',
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = show.get('audio_title') or 'Bandcamp Weekly'
|
||||
subtitle = show.get('subtitle')
|
||||
if subtitle:
|
||||
title += ' - %s' % subtitle
|
||||
|
||||
episode_number = None
|
||||
seq = blob.get('bcw_seq')
|
||||
|
||||
if seq and isinstance(seq, list):
|
||||
try:
|
||||
episode_number = next(
|
||||
int_or_none(e.get('episode_number'))
|
||||
for e in seq
|
||||
if isinstance(e, dict) and int_or_none(e.get('id')) == show_id)
|
||||
except StopIteration:
|
||||
pass
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': show.get('desc') or show.get('short_desc'),
|
||||
'duration': float_or_none(show.get('audio_duration')),
|
||||
'is_live': False,
|
||||
'release_date': unified_strdate(show.get('published_date')),
|
||||
'series': 'Bandcamp Weekly',
|
||||
'episode': show.get('subtitle'),
|
||||
'episode_number': episode_number,
|
||||
'episode_id': compat_str(video_id),
|
||||
'formats': formats
|
||||
}
|
||||
|
||||
@@ -54,6 +54,22 @@ class BiliBiliIE(InfoExtractor):
|
||||
'description': '如果你是神明,并且能够让妄想成为现实。那你会进行怎么样的妄想?是淫靡的世界?独裁社会?毁灭性的制裁?还是……2015年,涩谷。从6年前发生的大灾害“涩谷地震”之后复兴了的这个街区里新设立的私立高中...',
|
||||
},
|
||||
'skip': 'Geo-restricted to China',
|
||||
}, {
|
||||
# Title with double quotes
|
||||
'url': 'http://www.bilibili.com/video/av8903802/',
|
||||
'info_dict': {
|
||||
'id': '8903802',
|
||||
'ext': 'mp4',
|
||||
'title': '阿滴英文|英文歌分享#6 "Closer',
|
||||
'description': '滴妹今天唱Closer給你聽! 有史以来,被推最多次也是最久的歌曲,其实歌词跟我原本想像差蛮多的,不过还是好听! 微博@阿滴英文',
|
||||
'uploader': '阿滴英文',
|
||||
'uploader_id': '65880958',
|
||||
'timestamp': 1488382620,
|
||||
'upload_date': '20170301',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # Test metadata only
|
||||
},
|
||||
}]
|
||||
|
||||
_APP_KEY = '84956560bc028eb7'
|
||||
@@ -135,7 +151,7 @@ class BiliBiliIE(InfoExtractor):
|
||||
'formats': formats,
|
||||
})
|
||||
|
||||
title = self._html_search_regex('<h1[^>]+title="([^"]+)">', webpage, 'title')
|
||||
title = self._html_search_regex('<h1[^>]*>([^<]+)</h1>', webpage, 'title')
|
||||
description = self._html_search_meta('description', webpage)
|
||||
timestamp = unified_timestamp(self._html_search_regex(
|
||||
r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time', default=None))
|
||||
|
||||
@@ -376,7 +376,7 @@ class InfoExtractor(object):
|
||||
cls._VALID_URL_RE = re.compile(cls._VALID_URL)
|
||||
m = cls._VALID_URL_RE.match(url)
|
||||
assert m
|
||||
return m.group('id')
|
||||
return compat_str(m.group('id'))
|
||||
|
||||
@classmethod
|
||||
def working(cls):
|
||||
@@ -420,7 +420,7 @@ class InfoExtractor(object):
|
||||
if country_code:
|
||||
self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code)
|
||||
if self._downloader.params.get('verbose', False):
|
||||
self._downloader.to_stdout(
|
||||
self._downloader.to_screen(
|
||||
'[debug] Using fake IP %s (%s) as X-Forwarded-For.'
|
||||
% (self._x_forwarded_for_ip, country_code.upper()))
|
||||
|
||||
@@ -2299,6 +2299,8 @@ class InfoExtractor(object):
|
||||
tracks = video_data.get('tracks')
|
||||
if tracks and isinstance(tracks, list):
|
||||
for track in tracks:
|
||||
if not isinstance(track, dict):
|
||||
continue
|
||||
if track.get('kind') != 'captions':
|
||||
continue
|
||||
track_url = urljoin(base_url, track.get('file'))
|
||||
@@ -2328,6 +2330,8 @@ class InfoExtractor(object):
|
||||
urls = []
|
||||
formats = []
|
||||
for source in jwplayer_sources_data:
|
||||
if not isinstance(source, dict):
|
||||
continue
|
||||
source_url = self._proto_relative_url(source.get('file'))
|
||||
if not source_url:
|
||||
continue
|
||||
|
||||
@@ -8,7 +8,16 @@ from ..utils import int_or_none
|
||||
|
||||
|
||||
class CorusIE(ThePlatformFeedIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:globaltv|etcanada)\.com|(?:hgtv|foodnetwork|slice)\.ca)/(?:video/|(?:[^/]+/)+(?:videos/[a-z0-9-]+-|video\.html\?.*?\bv=))(?P<id>\d+)'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:www\.)?
|
||||
(?P<domain>
|
||||
(?:globaltv|etcanada)\.com|
|
||||
(?:hgtv|foodnetwork|slice|history|showcase)\.ca
|
||||
)
|
||||
/(?:video/|(?:[^/]+/)+(?:videos/[a-z0-9-]+-|video\.html\?.*?\bv=))
|
||||
(?P<id>\d+)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'http://www.hgtv.ca/shows/bryan-inc/videos/movie-night-popcorn-with-bryan-870923331648/',
|
||||
'md5': '05dcbca777bf1e58c2acbb57168ad3a6',
|
||||
@@ -27,6 +36,12 @@ class CorusIE(ThePlatformFeedIE):
|
||||
}, {
|
||||
'url': 'http://etcanada.com/video/873675331955/meet-the-survivor-game-changers-castaways-part-2/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.history.ca/the-world-without-canada/video/full-episodes/natural-resources/video.html?v=955054659646#video',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.showcase.ca/eyewitness/video/eyewitness++106/video.html?v=955070531919&p=1&s=da#video',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_TP_FEEDS = {
|
||||
@@ -50,6 +65,14 @@ class CorusIE(ThePlatformFeedIE):
|
||||
'feed_id': '5tUJLgV2YNJ5',
|
||||
'account_id': 2414427935,
|
||||
},
|
||||
'history': {
|
||||
'feed_id': 'tQFx_TyyEq4J',
|
||||
'account_id': 2369613659,
|
||||
},
|
||||
'showcase': {
|
||||
'feed_id': '9H6qyshBZU3E',
|
||||
'account_id': 2414426607,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -15,7 +15,7 @@ from ..utils import (
|
||||
|
||||
class DisneyIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?P<domain>(?:[^/]+\.)?(?:disney\.[a-z]{2,3}(?:\.[a-z]{2})?|disney(?:(?:me|latino)\.com|turkiye\.com\.tr)|(?:starwars|marvelkids)\.com))/(?:(?:embed/|(?:[^/]+/)+[\w-]+-)(?P<id>[a-z0-9]{24})|(?:[^/]+/)?(?P<display_id>[^/?#]+))'''
|
||||
https?://(?P<domain>(?:[^/]+\.)?(?:disney\.[a-z]{2,3}(?:\.[a-z]{2})?|disney(?:(?:me|latino)\.com|turkiye\.com\.tr|channel\.de)|(?:starwars|marvelkids)\.com))/(?:(?:embed/|(?:[^/]+/)+[\w-]+-)(?P<id>[a-z0-9]{24})|(?:[^/]+/)?(?P<display_id>[^/?#]+))'''
|
||||
_TESTS = [{
|
||||
# Disney.EmbedVideo
|
||||
'url': 'http://video.disney.com/watch/moana-trailer-545ed1857afee5a0ec239977',
|
||||
@@ -68,6 +68,9 @@ class DisneyIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://disneyjunior.en.disneyme.com/dj/watch-my-friends-tigger-and-pooh-promo',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://disneychannel.de/sehen/soy-luna-folge-118-5518518987ba27f3cc729268',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://disneyjunior.disney.com/galactech-the-galactech-grab-galactech-an-admiral-rescue',
|
||||
'only_matching': True,
|
||||
|
||||
@@ -1,135 +1,59 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
js_to_json,
|
||||
parse_duration,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
|
||||
class DRBonanzaIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?dr\.dk/bonanza/(?:[^/]+/)+(?:[^/])+?(?:assetId=(?P<id>\d+))?(?:[#&]|$)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.dr.dk/bonanza/serie/portraetter/Talkshowet.htm?assetId=65517',
|
||||
_VALID_URL = r'https?://(?:www\.)?dr\.dk/bonanza/[^/]+/\d+/[^/]+/(?P<id>\d+)/(?P<display_id>[^/?#&]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.dr.dk/bonanza/serie/154/matador/40312/matador---0824-komme-fremmede-',
|
||||
'info_dict': {
|
||||
'id': '65517',
|
||||
'id': '40312',
|
||||
'display_id': 'matador---0824-komme-fremmede-',
|
||||
'ext': 'mp4',
|
||||
'title': 'Talkshowet - Leonard Cohen',
|
||||
'description': 'md5:8f34194fb30cd8c8a30ad8b27b70c0ca',
|
||||
'title': 'MATADOR - 08:24. "Komme fremmede".',
|
||||
'description': 'md5:77b4c1ac4d4c1b9d610ab4395212ff84',
|
||||
'thumbnail': r're:^https?://.*\.(?:gif|jpg)$',
|
||||
'timestamp': 1295537932,
|
||||
'upload_date': '20110120',
|
||||
'duration': 3664,
|
||||
'duration': 4613,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # requires rtmp
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.dr.dk/bonanza/radio/serie/sport/fodbold.htm?assetId=59410',
|
||||
'md5': '6dfe039417e76795fb783c52da3de11d',
|
||||
'info_dict': {
|
||||
'id': '59410',
|
||||
'ext': 'mp3',
|
||||
'title': 'EM fodbold 1992 Danmark - Tyskland finale Transmission',
|
||||
'description': 'md5:501e5a195749480552e214fbbed16c4e',
|
||||
'thumbnail': r're:^https?://.*\.(?:gif|jpg)$',
|
||||
'timestamp': 1223274900,
|
||||
'upload_date': '20081006',
|
||||
'duration': 7369,
|
||||
},
|
||||
}]
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
url_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, url_id)
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id, display_id = mobj.group('id', 'display_id')
|
||||
|
||||
if url_id:
|
||||
info = json.loads(self._html_search_regex(r'({.*?%s.*})' % url_id, webpage, 'json'))
|
||||
else:
|
||||
# Just fetch the first video on that page
|
||||
info = json.loads(self._html_search_regex(r'bonanzaFunctions.newPlaylist\(({.*})\)', webpage, 'json'))
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
asset_id = str(info['AssetId'])
|
||||
title = info['Title'].rstrip(' \'\"-,.:;!?')
|
||||
duration = int_or_none(info.get('Duration'), scale=1000)
|
||||
# First published online. "FirstPublished" contains the date for original airing.
|
||||
timestamp = parse_iso8601(
|
||||
re.sub(r'\.\d+$', '', info['Created']))
|
||||
info = self._parse_html5_media_entries(
|
||||
url, webpage, display_id, m3u8_id='hls',
|
||||
m3u8_entry_protocol='m3u8_native')[0]
|
||||
self._sort_formats(info['formats'])
|
||||
|
||||
def parse_filename_info(url):
|
||||
match = re.search(r'/\d+_(?P<width>\d+)x(?P<height>\d+)x(?P<bitrate>\d+)K\.(?P<ext>\w+)$', url)
|
||||
if match:
|
||||
return {
|
||||
'width': int(match.group('width')),
|
||||
'height': int(match.group('height')),
|
||||
'vbr': int(match.group('bitrate')),
|
||||
'ext': match.group('ext')
|
||||
}
|
||||
match = re.search(r'/\d+_(?P<bitrate>\d+)K\.(?P<ext>\w+)$', url)
|
||||
if match:
|
||||
return {
|
||||
'vbr': int(match.group('bitrate')),
|
||||
'ext': match.group(2)
|
||||
}
|
||||
return {}
|
||||
asset = self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?s)currentAsset\s*=\s*({.+?})\s*</script', webpage, 'asset'),
|
||||
display_id, transform_source=js_to_json)
|
||||
|
||||
video_types = ['VideoHigh', 'VideoMid', 'VideoLow']
|
||||
preferencemap = {
|
||||
'VideoHigh': -1,
|
||||
'VideoMid': -2,
|
||||
'VideoLow': -3,
|
||||
'Audio': -4,
|
||||
}
|
||||
title = unescapeHTML(asset['AssetTitle']).strip()
|
||||
|
||||
formats = []
|
||||
for file in info['Files']:
|
||||
if info['Type'] == 'Video':
|
||||
if file['Type'] in video_types:
|
||||
format = parse_filename_info(file['Location'])
|
||||
format.update({
|
||||
'url': file['Location'],
|
||||
'format_id': file['Type'].replace('Video', ''),
|
||||
'preference': preferencemap.get(file['Type'], -10),
|
||||
})
|
||||
if format['url'].startswith('rtmp'):
|
||||
rtmp_url = format['url']
|
||||
format['rtmp_live'] = True # --resume does not work
|
||||
if '/bonanza/' in rtmp_url:
|
||||
format['play_path'] = rtmp_url.split('/bonanza/')[1]
|
||||
formats.append(format)
|
||||
elif file['Type'] == 'Thumb':
|
||||
thumbnail = file['Location']
|
||||
elif info['Type'] == 'Audio':
|
||||
if file['Type'] == 'Audio':
|
||||
format = parse_filename_info(file['Location'])
|
||||
format.update({
|
||||
'url': file['Location'],
|
||||
'format_id': file['Type'],
|
||||
'vcodec': 'none',
|
||||
})
|
||||
formats.append(format)
|
||||
elif file['Type'] == 'Thumb':
|
||||
thumbnail = file['Location']
|
||||
def extract(field):
|
||||
return self._search_regex(
|
||||
r'<div[^>]+>\s*<p>%s:<p>\s*</div>\s*<div[^>]+>\s*<p>([^<]+)</p>' % field,
|
||||
webpage, field, default=None)
|
||||
|
||||
description = '%s\n%s\n%s\n' % (
|
||||
info['Description'], info['Actors'], info['Colophon'])
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
display_id = re.sub(r'[^\w\d-]', '', re.sub(r' ', '-', title.lower())) + '-' + asset_id
|
||||
display_id = re.sub(r'-+', '-', display_id)
|
||||
|
||||
return {
|
||||
'id': asset_id,
|
||||
info.update({
|
||||
'id': asset.get('AssetId') or video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'timestamp': timestamp,
|
||||
'duration': duration,
|
||||
}
|
||||
'description': extract('Programinfo'),
|
||||
'duration': parse_duration(extract('Tid')),
|
||||
'thumbnail': asset.get('AssetImageUrl'),
|
||||
})
|
||||
return info
|
||||
|
||||
@@ -44,8 +44,23 @@ class DrTuberIE(InfoExtractor):
|
||||
webpage = self._download_webpage(
|
||||
'http://www.drtuber.com/video/%s' % video_id, display_id)
|
||||
|
||||
video_url = self._html_search_regex(
|
||||
r'<source src="([^"]+)"', webpage, 'video URL')
|
||||
video_data = self._download_json(
|
||||
'http://www.drtuber.com/player_config_json/', video_id, query={
|
||||
'vid': video_id,
|
||||
'embed': 0,
|
||||
'aid': 0,
|
||||
'domain_id': 0,
|
||||
})
|
||||
|
||||
formats = []
|
||||
for format_id, video_url in video_data['files'].items():
|
||||
if video_url:
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'quality': 2 if format_id == 'hq' else 1,
|
||||
'url': video_url
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = self._html_search_regex(
|
||||
(r'class="title_watch"[^>]*><(?:p|h\d+)[^>]*>([^<]+)<',
|
||||
@@ -75,7 +90,7 @@ class DrTuberIE(InfoExtractor):
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'url': video_url,
|
||||
'formats': formats,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'like_count': like_count,
|
||||
|
||||
@@ -71,6 +71,10 @@ from .arte import (
|
||||
TheOperaPlatformIE,
|
||||
ArteTVPlaylistIE,
|
||||
)
|
||||
from .asiancrush import (
|
||||
AsianCrushIE,
|
||||
AsianCrushPlaylistIE,
|
||||
)
|
||||
from .atresplayer import AtresPlayerIE
|
||||
from .atttechchannel import ATTTechChannelIE
|
||||
from .atvat import ATVAtIE
|
||||
@@ -90,7 +94,7 @@ from .azmedien import (
|
||||
)
|
||||
from .baidu import BaiduVideoIE
|
||||
from .bambuser import BambuserIE, BambuserChannelIE
|
||||
from .bandcamp import BandcampIE, BandcampAlbumIE
|
||||
from .bandcamp import BandcampIE, BandcampAlbumIE, BandcampWeeklyIE
|
||||
from .bbc import (
|
||||
BBCCoUkIE,
|
||||
BBCCoUkArticleIE,
|
||||
@@ -392,7 +396,6 @@ from .globo import (
|
||||
from .go import GoIE
|
||||
from .go90 import Go90IE
|
||||
from .godtube import GodTubeIE
|
||||
from .godtv import GodTVIE
|
||||
from .golem import GolemIE
|
||||
from .googledrive import GoogleDriveIE
|
||||
from .googleplus import GooglePlusIE
|
||||
@@ -637,7 +640,10 @@ from .neteasemusic import (
|
||||
NetEaseMusicProgramIE,
|
||||
NetEaseMusicDjRadioIE,
|
||||
)
|
||||
from .newgrounds import NewgroundsIE
|
||||
from .newgrounds import (
|
||||
NewgroundsIE,
|
||||
NewgroundsPlaylistIE,
|
||||
)
|
||||
from .newstube import NewstubeIE
|
||||
from .nextmedia import (
|
||||
NextMediaIE,
|
||||
@@ -869,6 +875,7 @@ from .rutube import (
|
||||
)
|
||||
from .rutv import RUTVIE
|
||||
from .ruutu import RuutuIE
|
||||
from .ruv import RuvIE
|
||||
from .sandia import SandiaIE
|
||||
from .safari import (
|
||||
SafariIE,
|
||||
|
||||
@@ -102,6 +102,8 @@ class FirstTVIE(InfoExtractor):
|
||||
'format_id': f.get('name'),
|
||||
'tbr': tbr,
|
||||
'source_preference': quality(f.get('name')),
|
||||
# quality metadata of http formats may be incorrect
|
||||
'preference': -1,
|
||||
})
|
||||
# m3u8 URL format is reverse engineered from [1] (search for
|
||||
# master.m3u8). dashEdges (that is currently balancer-vod.1tv.ru)
|
||||
|
||||
@@ -1,7 +1,10 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse_urlencode
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_parse_urlencode,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
@@ -81,7 +84,7 @@ class FlickrIE(InfoExtractor):
|
||||
|
||||
formats = []
|
||||
for stream in streams['stream']:
|
||||
stream_type = str(stream.get('type'))
|
||||
stream_type = compat_str(stream.get('type'))
|
||||
formats.append({
|
||||
'format_id': stream_type,
|
||||
'url': stream['_content'],
|
||||
|
||||
@@ -85,11 +85,11 @@ class FourTubeIE(InfoExtractor):
|
||||
media_id = params[0]
|
||||
sources = ['%s' % p for p in params[2]]
|
||||
|
||||
token_url = 'http://tkn.4tube.com/{0}/desktop/{1}'.format(
|
||||
token_url = 'https://tkn.kodicdn.com/{0}/desktop/{1}'.format(
|
||||
media_id, '+'.join(sources))
|
||||
headers = {
|
||||
b'Content-Type': b'application/x-www-form-urlencoded',
|
||||
b'Origin': b'http://www.4tube.com',
|
||||
b'Origin': b'https://www.4tube.com',
|
||||
}
|
||||
token_req = sanitized_Request(token_url, b'{}', headers)
|
||||
tokens = self._download_json(token_req, video_id)
|
||||
|
||||
@@ -5,6 +5,7 @@ import itertools
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
get_element_by_id,
|
||||
int_or_none,
|
||||
remove_end,
|
||||
)
|
||||
|
||||
@@ -46,7 +47,7 @@ class FoxgayIE(InfoExtractor):
|
||||
|
||||
formats = [{
|
||||
'url': source,
|
||||
'height': resolution,
|
||||
'height': int_or_none(resolution),
|
||||
} for source, resolution in zip(
|
||||
video_data['sources'], video_data.get('resolutions', itertools.repeat(None)))]
|
||||
|
||||
|
||||
@@ -112,7 +112,7 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
|
||||
|
||||
|
||||
class FranceTVIE(FranceTVBaseInfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:www\.)?france\.tv|mobile\.france\.tv)/(?:[^/]+/)+(?P<id>[^/]+)\.html'
|
||||
_VALID_URL = r'https?://(?:(?:www\.)?france\.tv|mobile\.france\.tv)/(?:[^/]+/)*(?P<id>[^/]+)\.html'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.france.tv/france-2/13h15-le-dimanche/140921-les-mysteres-de-jesus.html',
|
||||
@@ -157,6 +157,9 @@ class FranceTVIE(FranceTVBaseInfoExtractor):
|
||||
}, {
|
||||
'url': 'https://mobile.france.tv/france-5/c-dans-l-air/137347-emission-du-vendredi-12-mai-2017.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.france.tv/142749-rouge-sang.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -10,6 +10,7 @@ from .common import InfoExtractor
|
||||
from .youtube import YoutubeIE
|
||||
from ..compat import (
|
||||
compat_etree_fromstring,
|
||||
compat_str,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urlparse,
|
||||
compat_xml_parse_error,
|
||||
@@ -1907,14 +1908,14 @@ class GenericIE(InfoExtractor):
|
||||
content_type = head_response.headers.get('Content-Type', '').lower()
|
||||
m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
|
||||
if m:
|
||||
format_id = m.group('format_id')
|
||||
format_id = compat_str(m.group('format_id'))
|
||||
if format_id.endswith('mpegurl'):
|
||||
formats = self._extract_m3u8_formats(url, video_id, 'mp4')
|
||||
elif format_id == 'f4m':
|
||||
formats = self._extract_f4m_formats(url, video_id)
|
||||
else:
|
||||
formats = [{
|
||||
'format_id': m.group('format_id'),
|
||||
'format_id': format_id,
|
||||
'url': url,
|
||||
'vcodec': 'none' if m.group('type') == 'audio' else None
|
||||
}]
|
||||
|
||||
@@ -82,7 +82,7 @@ class GfycatIE(InfoExtractor):
|
||||
video_url = gfy.get('%sUrl' % format_id)
|
||||
if not video_url:
|
||||
continue
|
||||
filesize = gfy.get('%sSize' % format_id)
|
||||
filesize = int_or_none(gfy.get('%sSize' % format_id))
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'format_id': format_id,
|
||||
|
||||
@@ -1,66 +0,0 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .ooyala import OoyalaIE
|
||||
from ..utils import js_to_json
|
||||
|
||||
|
||||
class GodTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?god\.tv(?:/[^/]+)*/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://god.tv/jesus-image/video/jesus-conference-2016/randy-needham',
|
||||
'info_dict': {
|
||||
'id': 'lpd3g2MzE6D1g8zFAKz8AGpxWcpu6o_3',
|
||||
'ext': 'mp4',
|
||||
'title': 'Randy Needham',
|
||||
'duration': 3615.08,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://god.tv/playlist/bible-study',
|
||||
'info_dict': {
|
||||
'id': 'bible-study',
|
||||
},
|
||||
'playlist_mincount': 37,
|
||||
}, {
|
||||
'url': 'http://god.tv/node/15097',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://god.tv/live/africa',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://god.tv/liveevents',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
settings = self._parse_json(
|
||||
self._search_regex(
|
||||
r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
|
||||
webpage, 'settings', default='{}'),
|
||||
display_id, transform_source=js_to_json, fatal=False)
|
||||
|
||||
ooyala_id = None
|
||||
|
||||
if settings:
|
||||
playlist = settings.get('playlist')
|
||||
if playlist and isinstance(playlist, list):
|
||||
entries = [
|
||||
OoyalaIE._build_url_result(video['content_id'])
|
||||
for video in playlist if video.get('content_id')]
|
||||
if entries:
|
||||
return self.playlist_result(entries, display_id)
|
||||
ooyala_id = settings.get('ooyala', {}).get('content_id')
|
||||
|
||||
if not ooyala_id:
|
||||
ooyala_id = self._search_regex(
|
||||
r'["\']content_id["\']\s*:\s*(["\'])(?P<id>[\w-]+)\1',
|
||||
webpage, 'ooyala id', group='id')
|
||||
|
||||
return OoyalaIE._build_url_result(ooyala_id)
|
||||
@@ -3,6 +3,7 @@ from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
@@ -46,7 +47,7 @@ class GolemIE(InfoExtractor):
|
||||
continue
|
||||
|
||||
formats.append({
|
||||
'format_id': e.tag,
|
||||
'format_id': compat_str(e.tag),
|
||||
'url': compat_urlparse.urljoin(self._PREFIX, url),
|
||||
'height': self._int(e.get('height'), 'height'),
|
||||
'width': self._int(e.get('width'), 'width'),
|
||||
|
||||
@@ -69,19 +69,32 @@ class GoogleDriveIE(InfoExtractor):
|
||||
r'"fmt_stream_map"\s*,\s*"([^"]+)', webpage, 'fmt stream map').split(',')
|
||||
fmt_list = self._search_regex(r'"fmt_list"\s*,\s*"([^"]+)', webpage, 'fmt_list').split(',')
|
||||
|
||||
resolutions = {}
|
||||
for fmt in fmt_list:
|
||||
mobj = re.search(
|
||||
r'^(?P<format_id>\d+)/(?P<width>\d+)[xX](?P<height>\d+)', fmt)
|
||||
if mobj:
|
||||
resolutions[mobj.group('format_id')] = (
|
||||
int(mobj.group('width')), int(mobj.group('height')))
|
||||
|
||||
formats = []
|
||||
for fmt, fmt_stream in zip(fmt_list, fmt_stream_map):
|
||||
fmt_id, fmt_url = fmt_stream.split('|')
|
||||
resolution = fmt.split('/')[1]
|
||||
width, height = resolution.split('x')
|
||||
formats.append({
|
||||
'url': lowercase_escape(fmt_url),
|
||||
'format_id': fmt_id,
|
||||
'resolution': resolution,
|
||||
'width': int_or_none(width),
|
||||
'height': int_or_none(height),
|
||||
'ext': self._FORMATS_EXT[fmt_id],
|
||||
})
|
||||
for fmt_stream in fmt_stream_map:
|
||||
fmt_stream_split = fmt_stream.split('|')
|
||||
if len(fmt_stream_split) < 2:
|
||||
continue
|
||||
format_id, format_url = fmt_stream_split[:2]
|
||||
f = {
|
||||
'url': lowercase_escape(format_url),
|
||||
'format_id': format_id,
|
||||
'ext': self._FORMATS_EXT[format_id],
|
||||
}
|
||||
resolution = resolutions.get(format_id)
|
||||
if resolution:
|
||||
f.update({
|
||||
'width': resolution[0],
|
||||
'height': resolution[0],
|
||||
})
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
|
||||
@@ -7,14 +7,19 @@ from .common import InfoExtractor
|
||||
class HGTVComShowIE(InfoExtractor):
|
||||
IE_NAME = 'hgtv.com:show'
|
||||
_VALID_URL = r'https?://(?:www\.)?hgtv\.com/shows/[^/]+/(?P<id>[^/?#&]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.hgtv.com/shows/flip-or-flop/flip-or-flop-full-episodes-videos',
|
||||
_TESTS = [{
|
||||
# data-module="video"
|
||||
'url': 'http://www.hgtv.com/shows/flip-or-flop/flip-or-flop-full-episodes-season-4-videos',
|
||||
'info_dict': {
|
||||
'id': 'flip-or-flop-full-episodes-videos',
|
||||
'id': 'flip-or-flop-full-episodes-season-4-videos',
|
||||
'title': 'Flip or Flop Full Episodes',
|
||||
},
|
||||
'playlist_mincount': 15,
|
||||
}
|
||||
}, {
|
||||
# data-deferred-module="video"
|
||||
'url': 'http://www.hgtv.com/shows/good-bones/episodes/an-old-victorian-house-gets-a-new-facelift',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
@@ -23,7 +28,7 @@ class HGTVComShowIE(InfoExtractor):
|
||||
|
||||
config = self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?s)data-module=["\']video["\'][^>]*>.*?<script[^>]+type=["\']text/x-config["\'][^>]*>(.+?)</script',
|
||||
r'(?s)data-(?:deferred)?-module=["\']video["\'][^>]*>.*?<script[^>]+type=["\']text/x-config["\'][^>]*>(.+?)</script',
|
||||
webpage, 'video config'),
|
||||
display_id)['channels'][0]
|
||||
|
||||
|
||||
@@ -65,9 +65,9 @@ class JoveIE(InfoExtractor):
|
||||
webpage, 'description', fatal=False)
|
||||
publish_date = unified_strdate(self._html_search_meta(
|
||||
'citation_publication_date', webpage, 'publish date', fatal=False))
|
||||
comment_count = self._html_search_regex(
|
||||
comment_count = int(self._html_search_regex(
|
||||
r'<meta name="num_comments" content="(\d+) Comments?"',
|
||||
webpage, 'comment count', fatal=False)
|
||||
webpage, 'comment count', fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
||||
@@ -115,8 +115,9 @@ class LiveLeakIE(InfoExtractor):
|
||||
|
||||
for a_format in info_dict['formats']:
|
||||
if not a_format.get('height'):
|
||||
a_format['height'] = self._search_regex(
|
||||
r'([0-9]+)p\.mp4', a_format['url'], 'height label', default=None)
|
||||
a_format['height'] = int_or_none(self._search_regex(
|
||||
r'([0-9]+)p\.mp4', a_format['url'], 'height label',
|
||||
default=None))
|
||||
|
||||
self._sort_formats(info_dict['formats'])
|
||||
|
||||
|
||||
@@ -68,10 +68,6 @@ class MSNIE(InfoExtractor):
|
||||
format_url = file_.get('url')
|
||||
if not format_url:
|
||||
continue
|
||||
ext = determine_ext(format_url)
|
||||
if ext == 'ism':
|
||||
formats.extend(self._extract_ism_formats(
|
||||
format_url + '/Manifest', display_id, 'mss', fatal=False))
|
||||
if 'm3u8' in format_url:
|
||||
# m3u8_native should not be used here until
|
||||
# https://github.com/rg3/youtube-dl/issues/9913 is fixed
|
||||
@@ -79,6 +75,9 @@ class MSNIE(InfoExtractor):
|
||||
format_url, display_id, 'mp4',
|
||||
m3u8_id='hls', fatal=False)
|
||||
formats.extend(m3u8_formats)
|
||||
elif determine_ext(format_url) == 'ism':
|
||||
formats.extend(self._extract_ism_formats(
|
||||
format_url + '/Manifest', display_id, 'mss', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
|
||||
@@ -1,6 +1,15 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
parse_filesize,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
class NewgroundsIE(InfoExtractor):
|
||||
@@ -13,7 +22,10 @@ class NewgroundsIE(InfoExtractor):
|
||||
'ext': 'mp3',
|
||||
'title': 'B7 - BusMode',
|
||||
'uploader': 'Burn7',
|
||||
}
|
||||
'timestamp': 1378878540,
|
||||
'upload_date': '20130911',
|
||||
'duration': 143,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.newgrounds.com/portal/view/673111',
|
||||
'md5': '3394735822aab2478c31b1004fe5e5bc',
|
||||
@@ -22,25 +34,133 @@ class NewgroundsIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Dancin',
|
||||
'uploader': 'Squirrelman82',
|
||||
'timestamp': 1460256780,
|
||||
'upload_date': '20160410',
|
||||
},
|
||||
}, {
|
||||
# source format unavailable, additional mp4 formats
|
||||
'url': 'http://www.newgrounds.com/portal/view/689400',
|
||||
'info_dict': {
|
||||
'id': '689400',
|
||||
'ext': 'mp4',
|
||||
'title': 'ZTV News Episode 8',
|
||||
'uploader': 'BennettTheSage',
|
||||
'timestamp': 1487965140,
|
||||
'upload_date': '20170224',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
media_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, media_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<title>([^>]+)</title>', webpage, 'title')
|
||||
|
||||
uploader = self._html_search_regex(
|
||||
r'Author\s*<a[^>]+>([^<]+)', webpage, 'uploader', fatal=False)
|
||||
media_url = self._parse_json(self._search_regex(
|
||||
r'"url"\s*:\s*("[^"]+"),', webpage, ''), media_id)
|
||||
|
||||
music_url = self._parse_json(self._search_regex(
|
||||
r'"url":("[^"]+"),', webpage, ''), media_id)
|
||||
formats = [{
|
||||
'url': media_url,
|
||||
'format_id': 'source',
|
||||
'quality': 1,
|
||||
}]
|
||||
|
||||
max_resolution = int_or_none(self._search_regex(
|
||||
r'max_resolution["\']\s*:\s*(\d+)', webpage, 'max resolution',
|
||||
default=None))
|
||||
if max_resolution:
|
||||
url_base = media_url.rpartition('.')[0]
|
||||
for resolution in (360, 720, 1080):
|
||||
if resolution > max_resolution:
|
||||
break
|
||||
formats.append({
|
||||
'url': '%s.%dp.mp4' % (url_base, resolution),
|
||||
'format_id': '%dp' % resolution,
|
||||
'height': resolution,
|
||||
})
|
||||
|
||||
self._check_formats(formats, media_id)
|
||||
self._sort_formats(formats)
|
||||
|
||||
uploader = self._search_regex(
|
||||
r'(?:Author|Writer)\s*<a[^>]+>([^<]+)', webpage, 'uploader',
|
||||
fatal=False)
|
||||
|
||||
timestamp = unified_timestamp(self._search_regex(
|
||||
r'<dt>Uploaded</dt>\s*<dd>([^<]+)', webpage, 'timestamp',
|
||||
default=None))
|
||||
duration = parse_duration(self._search_regex(
|
||||
r'<dd>Song\s*</dd><dd>.+?</dd><dd>([^<]+)', webpage, 'duration',
|
||||
default=None))
|
||||
|
||||
filesize_approx = parse_filesize(self._html_search_regex(
|
||||
r'<dd>Song\s*</dd><dd>(.+?)</dd>', webpage, 'filesize',
|
||||
default=None))
|
||||
if len(formats) == 1:
|
||||
formats[0]['filesize_approx'] = filesize_approx
|
||||
|
||||
if '<dd>Song' in webpage:
|
||||
formats[0]['vcodec'] = 'none'
|
||||
|
||||
return {
|
||||
'id': media_id,
|
||||
'title': title,
|
||||
'url': music_url,
|
||||
'uploader': uploader,
|
||||
'timestamp': timestamp,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class NewgroundsPlaylistIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?newgrounds\.com/(?:collection|[^/]+/search/[^/]+)/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.newgrounds.com/collection/cats',
|
||||
'info_dict': {
|
||||
'id': 'cats',
|
||||
'title': 'Cats',
|
||||
},
|
||||
'playlist_mincount': 46,
|
||||
}, {
|
||||
'url': 'http://www.newgrounds.com/portal/search/author/ZONE-SAMA',
|
||||
'info_dict': {
|
||||
'id': 'ZONE-SAMA',
|
||||
'title': 'Portal Search: ZONE-SAMA',
|
||||
},
|
||||
'playlist_mincount': 47,
|
||||
}, {
|
||||
'url': 'http://www.newgrounds.com/audio/search/title/cats',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
title = self._search_regex(
|
||||
r'<title>([^>]+)</title>', webpage, 'title', default=None)
|
||||
|
||||
# cut left menu
|
||||
webpage = self._search_regex(
|
||||
r'(?s)<div[^>]+\bclass=["\']column wide(.+)',
|
||||
webpage, 'wide column', default=webpage)
|
||||
|
||||
entries = []
|
||||
for a, path, media_id in re.findall(
|
||||
r'(<a[^>]+\bhref=["\']/?((?:portal/view|audio/listen)/(\d+))[^>]+>)',
|
||||
webpage):
|
||||
a_class = extract_attributes(a).get('class')
|
||||
if a_class not in ('item-portalsubmission', 'item-audiosubmission'):
|
||||
continue
|
||||
entries.append(
|
||||
self.url_result(
|
||||
'https://www.newgrounds.com/%s' % path,
|
||||
ie=NewgroundsIE.ie_key(), video_id=media_id))
|
||||
|
||||
return self.playlist_result(entries, playlist_id, title)
|
||||
|
||||
@@ -35,7 +35,7 @@ class NPOIE(NPOBaseIE):
|
||||
https?://
|
||||
(?:www\.)?
|
||||
(?:
|
||||
npo\.nl/(?!live|radio)(?:[^/]+/){2}|
|
||||
npo\.nl/(?!(?:live|radio)/)(?:[^/]+/){2}|
|
||||
ntr\.nl/(?:[^/]+/){2,}|
|
||||
omroepwnl\.nl/video/fragment/[^/]+__|
|
||||
zapp\.nl/[^/]+/[^/]+/
|
||||
@@ -150,6 +150,9 @@ class NPOIE(NPOBaseIE):
|
||||
# live stream
|
||||
'url': 'npo:LI_NL1_4188102',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.npo.nl/radio-gaga/13-06-2017/BNN_101383373',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -14,7 +15,6 @@ from ..utils import (
|
||||
strip_or_none,
|
||||
unified_timestamp,
|
||||
urljoin,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
@@ -45,22 +45,15 @@ class PacktPubIE(PacktPubBaseIE):
|
||||
(username, password) = self._get_login_info()
|
||||
if username is None:
|
||||
return
|
||||
webpage = self._download_webpage(self._PACKT_BASE, None)
|
||||
login_form = self._form_hidden_inputs(
|
||||
'packt-user-login-form', webpage)
|
||||
login_form.update({
|
||||
'email': username,
|
||||
'password': password,
|
||||
})
|
||||
self._download_webpage(
|
||||
self._PACKT_BASE, None, 'Logging in as %s' % username,
|
||||
data=urlencode_postdata(login_form))
|
||||
try:
|
||||
self._TOKEN = self._download_json(
|
||||
'%s/users/tokens/sessions' % self._MAPT_REST, None,
|
||||
'Downloading Authorization Token')['data']['token']
|
||||
self._MAPT_REST + '/users/tokens', None,
|
||||
'Downloading Authorization Token', data=json.dumps({
|
||||
'email': username,
|
||||
'password': password,
|
||||
}).encode())['data']['access']
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 404):
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (400, 401, 404):
|
||||
message = self._parse_json(e.cause.read().decode(), None)['message']
|
||||
raise ExtractorError(message, expected=True)
|
||||
raise
|
||||
@@ -83,7 +76,7 @@ class PacktPubIE(PacktPubBaseIE):
|
||||
|
||||
headers = {}
|
||||
if self._TOKEN:
|
||||
headers['Authorization'] = self._TOKEN
|
||||
headers['Authorization'] = 'Bearer ' + self._TOKEN
|
||||
video = self._download_json(
|
||||
'%s/users/me/products/%s/chapters/%s/sections/%s'
|
||||
% (self._MAPT_REST, course_id, chapter_id, video_id), video_id,
|
||||
|
||||
@@ -19,7 +19,7 @@ class PandoraTVIE(InfoExtractor):
|
||||
IE_NAME = 'pandora.tv'
|
||||
IE_DESC = '판도라TV'
|
||||
_VALID_URL = r'https?://(?:.+?\.)?channel\.pandora\.tv/channel/video\.ptv\?'
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://jp.channel.pandora.tv/channel/video.ptv?c1=&prgid=53294230&ch_userid=mikakim&ref=main&lot=cate_01_2',
|
||||
'info_dict': {
|
||||
'id': '53294230',
|
||||
@@ -34,7 +34,26 @@ class PandoraTVIE(InfoExtractor):
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'http://channel.pandora.tv/channel/video.ptv?ch_userid=gogoucc&prgid=54721744',
|
||||
'info_dict': {
|
||||
'id': '54721744',
|
||||
'ext': 'flv',
|
||||
'title': '[HD] JAPAN COUNTDOWN 170423',
|
||||
'description': '[HD] JAPAN COUNTDOWN 170423',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 1704.9,
|
||||
'upload_date': '20170423',
|
||||
'uploader': 'GOGO_UCC',
|
||||
'uploader_id': 'gogoucc',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
},
|
||||
'params': {
|
||||
# Test metadata only
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
|
||||
@@ -86,7 +105,7 @@ class PandoraTVIE(InfoExtractor):
|
||||
'description': info.get('body'),
|
||||
'thumbnail': info.get('thumbnail') or info.get('poster'),
|
||||
'duration': float_or_none(info.get('runtime'), 1000) or parse_duration(info.get('time')),
|
||||
'upload_date': info['fid'][:8] if isinstance(info.get('fid'), compat_str) else None,
|
||||
'upload_date': info['fid'].split('/')[-1][:8] if isinstance(info.get('fid'), compat_str) else None,
|
||||
'uploader': info.get('nickname'),
|
||||
'uploader_id': info.get('upload_userid'),
|
||||
'view_count': str_to_int(info.get('hit')),
|
||||
|
||||
@@ -65,7 +65,7 @@ class PolskieRadioIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
content = self._search_regex(
|
||||
r'(?s)<div[^>]+class="audio atarticle"[^>]*>(.+?)<script>',
|
||||
r'(?s)<div[^>]+class="\s*this-article\s*"[^>]*>(.+?)<div[^>]+class="tags"[^>]*>',
|
||||
webpage, 'content')
|
||||
|
||||
timestamp = unified_timestamp(self._html_search_regex(
|
||||
|
||||
@@ -252,11 +252,14 @@ class PornHubPlaylistBaseIE(InfoExtractor):
|
||||
|
||||
playlist = self._parse_json(
|
||||
self._search_regex(
|
||||
r'playlistObject\s*=\s*({.+?});', webpage, 'playlist'),
|
||||
playlist_id)
|
||||
r'(?:playlistObject|PLAYLIST_VIEW)\s*=\s*({.+?});', webpage,
|
||||
'playlist', default='{}'),
|
||||
playlist_id, fatal=False)
|
||||
title = playlist.get('title') or self._search_regex(
|
||||
r'>Videos\s+in\s+(.+?)\s+[Pp]laylist<', webpage, 'title', fatal=False)
|
||||
|
||||
return self.playlist_result(
|
||||
entries, playlist_id, playlist.get('title'), playlist.get('description'))
|
||||
entries, playlist_id, title, playlist.get('description'))
|
||||
|
||||
|
||||
class PornHubPlaylistIE(PornHubPlaylistBaseIE):
|
||||
@@ -296,6 +299,7 @@ class PornHubUserVideosIE(PornHubPlaylistBaseIE):
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
|
||||
break
|
||||
raise
|
||||
page_entries = self._extract_entries(webpage)
|
||||
if not page_entries:
|
||||
break
|
||||
|
||||
@@ -15,7 +15,7 @@ class RtlNlIE(InfoExtractor):
|
||||
https?://(?:www\.)?
|
||||
(?:
|
||||
rtlxl\.nl/[^\#]*\#!/[^/]+/|
|
||||
rtl\.nl/system/videoplayer/(?:[^/]+/)+(?:video_)?embed\.html\b.+?\buuid=
|
||||
rtl\.nl/(?:system/videoplayer/(?:[^/]+/)+(?:video_)?embed\.html\b.+?\buuid=|video/)
|
||||
)
|
||||
(?P<id>[0-9a-f-]+)'''
|
||||
|
||||
@@ -70,6 +70,9 @@ class RtlNlIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://rtlxl.nl/?_ga=1.204735956.572365465.1466978370#!/rtl-nieuws-132237/3c487912-023b-49ac-903e-2c5d79f8410f',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.rtl.nl/video/c603c9c2-601d-4b5e-8175-64f1e942dc7d/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -13,11 +13,15 @@ from ..utils import (
|
||||
class RUTVIE(InfoExtractor):
|
||||
IE_DESC = 'RUTV.RU'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://player\.(?:rutv\.ru|vgtrk\.com)/
|
||||
(?P<path>flash\d+v/container\.swf\?id=
|
||||
|iframe/(?P<type>swf|video|live)/id/
|
||||
|index/iframe/cast_id/)
|
||||
(?P<id>\d+)'''
|
||||
https?://
|
||||
(?:test)?player\.(?:rutv\.ru|vgtrk\.com)/
|
||||
(?P<path>
|
||||
flash\d+v/container\.swf\?id=|
|
||||
iframe/(?P<type>swf|video|live)/id/|
|
||||
index/iframe/cast_id/
|
||||
)
|
||||
(?P<id>\d+)
|
||||
'''
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
@@ -99,17 +103,21 @@ class RUTVIE(InfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'https://testplayer.vgtrk.com/iframe/live/id/19201/showZoomBtn/false/isPlay/true/',
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
|
||||
@classmethod
|
||||
def _extract_url(cls, webpage):
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.(?:rutv\.ru|vgtrk\.com)/(?:iframe/(?:swf|video|live)/id|index/iframe/cast_id)/.+?)\1', webpage)
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:test)?player\.(?:rutv\.ru|vgtrk\.com)/(?:iframe/(?:swf|video|live)/id|index/iframe/cast_id)/.+?)\1', webpage)
|
||||
if mobj:
|
||||
return mobj.group('url')
|
||||
|
||||
mobj = re.search(
|
||||
r'<meta[^>]+?property=(["\'])og:video\1[^>]+?content=(["\'])(?P<url>https?://player\.(?:rutv\.ru|vgtrk\.com)/flash\d+v/container\.swf\?id=.+?\2)',
|
||||
r'<meta[^>]+?property=(["\'])og:video\1[^>]+?content=(["\'])(?P<url>https?://(?:test)?player\.(?:rutv\.ru|vgtrk\.com)/flash\d+v/container\.swf\?id=.+?\2)',
|
||||
webpage)
|
||||
if mobj:
|
||||
return mobj.group('url')
|
||||
|
||||
101
youtube_dl/extractor/ruv.py
Normal file
101
youtube_dl/extractor/ruv.py
Normal file
@@ -0,0 +1,101 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
class RuvIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?ruv\.is/(?:sarpurinn/[^/]+|node)/(?P<id>[^/]+(?:/\d+)?)'
|
||||
_TESTS = [{
|
||||
# m3u8
|
||||
'url': 'http://ruv.is/sarpurinn/ruv-aukaras/fh-valur/20170516',
|
||||
'md5': '66347652f4e13e71936817102acc1724',
|
||||
'info_dict': {
|
||||
'id': '1144499',
|
||||
'display_id': 'fh-valur/20170516',
|
||||
'ext': 'mp4',
|
||||
'title': 'FH - Valur',
|
||||
'description': 'Bein útsending frá 3. leik FH og Vals í úrslitum Olísdeildar karla í handbolta.',
|
||||
'timestamp': 1494963600,
|
||||
'upload_date': '20170516',
|
||||
},
|
||||
}, {
|
||||
# mp3
|
||||
'url': 'http://ruv.is/sarpurinn/ras-2/morgunutvarpid/20170619',
|
||||
'md5': '395ea250c8a13e5fdb39d4670ef85378',
|
||||
'info_dict': {
|
||||
'id': '1153630',
|
||||
'display_id': 'morgunutvarpid/20170619',
|
||||
'ext': 'mp3',
|
||||
'title': 'Morgunútvarpið',
|
||||
'description': 'md5:a4cf1202c0a1645ca096b06525915418',
|
||||
'timestamp': 1497855000,
|
||||
'upload_date': '20170619',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://ruv.is/sarpurinn/ruv/frettir/20170614',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.ruv.is/node/1151854',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://ruv.is/sarpurinn/klippa/secret-soltice-hefst-a-morgun',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://ruv.is/sarpurinn/ras-1/morgunvaktin/20170619',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
|
||||
FIELD_RE = r'video\.%s\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1'
|
||||
|
||||
media_url = self._html_search_regex(
|
||||
FIELD_RE % 'src', webpage, 'video URL', group='url')
|
||||
|
||||
video_id = self._search_regex(
|
||||
r'<link\b[^>]+\bhref=["\']https?://www\.ruv\.is/node/(\d+)',
|
||||
webpage, 'video id', default=display_id)
|
||||
|
||||
ext = determine_ext(media_url)
|
||||
|
||||
if ext == 'm3u8':
|
||||
formats = self._extract_m3u8_formats(
|
||||
media_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls')
|
||||
elif ext == 'mp3':
|
||||
formats = [{
|
||||
'format_id': 'mp3',
|
||||
'url': media_url,
|
||||
'vcodec': 'none',
|
||||
}]
|
||||
else:
|
||||
formats = [{
|
||||
'url': media_url,
|
||||
}]
|
||||
|
||||
description = self._og_search_description(webpage, default=None)
|
||||
thumbnail = self._og_search_thumbnail(
|
||||
webpage, default=None) or self._search_regex(
|
||||
FIELD_RE % 'poster', webpage, 'thumbnail', fatal=False)
|
||||
timestamp = unified_timestamp(self._html_search_meta(
|
||||
'article:published_time', webpage, 'timestamp', fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'timestamp': timestamp,
|
||||
'formats': formats,
|
||||
}
|
||||
@@ -16,7 +16,6 @@ from ..utils import (
|
||||
|
||||
class SafariBaseIE(InfoExtractor):
|
||||
_LOGIN_URL = 'https://www.safaribooksonline.com/accounts/login/'
|
||||
_SUCCESSFUL_LOGIN_REGEX = r'<a href="/accounts/logout/"[^>]*>Sign Out</a>'
|
||||
_NETRC_MACHINE = 'safari'
|
||||
|
||||
_API_BASE = 'https://www.safaribooksonline.com/api/v1'
|
||||
@@ -28,10 +27,6 @@ class SafariBaseIE(InfoExtractor):
|
||||
self._login()
|
||||
|
||||
def _login(self):
|
||||
# We only need to log in once for courses or individual videos
|
||||
if self.LOGGED_IN:
|
||||
return
|
||||
|
||||
(username, password) = self._get_login_info()
|
||||
if username is None:
|
||||
return
|
||||
@@ -39,11 +34,17 @@ class SafariBaseIE(InfoExtractor):
|
||||
headers = std_headers.copy()
|
||||
if 'Referer' not in headers:
|
||||
headers['Referer'] = self._LOGIN_URL
|
||||
login_page_request = sanitized_Request(self._LOGIN_URL, headers=headers)
|
||||
|
||||
login_page = self._download_webpage(
|
||||
login_page_request, None,
|
||||
'Downloading login form')
|
||||
self._LOGIN_URL, None, 'Downloading login form', headers=headers)
|
||||
|
||||
def is_logged(webpage):
|
||||
return any(re.search(p, webpage) for p in (
|
||||
r'href=["\']/accounts/logout/', r'>Sign Out<'))
|
||||
|
||||
if is_logged(login_page):
|
||||
self.LOGGED_IN = True
|
||||
return
|
||||
|
||||
csrf = self._html_search_regex(
|
||||
r"name='csrfmiddlewaretoken'\s+value='([^']+)'",
|
||||
@@ -62,14 +63,12 @@ class SafariBaseIE(InfoExtractor):
|
||||
login_page = self._download_webpage(
|
||||
request, None, 'Logging in as %s' % username)
|
||||
|
||||
if re.search(self._SUCCESSFUL_LOGIN_REGEX, login_page) is None:
|
||||
if not is_logged(login_page):
|
||||
raise ExtractorError(
|
||||
'Login failed; make sure your credentials are correct and try again.',
|
||||
expected=True)
|
||||
|
||||
SafariBaseIE.LOGGED_IN = True
|
||||
|
||||
self.to_screen('Login successful')
|
||||
self.LOGGED_IN = True
|
||||
|
||||
|
||||
class SafariIE(SafariBaseIE):
|
||||
|
||||
@@ -32,8 +32,9 @@ class SexuIE(InfoExtractor):
|
||||
formats = [{
|
||||
'url': source['file'].replace('\\', ''),
|
||||
'format_id': source.get('label'),
|
||||
'height': self._search_regex(
|
||||
r'^(\d+)[pP]', source.get('label', ''), 'height', default=None),
|
||||
'height': int(self._search_regex(
|
||||
r'^(\d+)[pP]', source.get('label', ''), 'height',
|
||||
default=None)),
|
||||
} for source in sources if source.get('file')]
|
||||
self._sort_formats(formats)
|
||||
|
||||
|
||||
@@ -8,7 +8,11 @@ from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_parse_urlencode,
|
||||
)
|
||||
from ..utils import ExtractorError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class SohuIE(InfoExtractor):
|
||||
@@ -169,10 +173,11 @@ class SohuIE(InfoExtractor):
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'format_id': format_id,
|
||||
'filesize': data['clipsBytes'][i],
|
||||
'width': data['width'],
|
||||
'height': data['height'],
|
||||
'fps': data['fps'],
|
||||
'filesize': int_or_none(
|
||||
try_get(data, lambda x: x['clipsBytes'][i])),
|
||||
'width': int_or_none(data.get('width')),
|
||||
'height': int_or_none(data.get('height')),
|
||||
'fps': int_or_none(data.get('fps')),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
|
||||
@@ -21,6 +21,17 @@ class StreamangoIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': '20170315_150006.mp4',
|
||||
}
|
||||
}, {
|
||||
# no og:title
|
||||
'url': 'https://streamango.com/embed/foqebrpftarclpob/asdf_asd_2_mp4',
|
||||
'info_dict': {
|
||||
'id': 'foqebrpftarclpob',
|
||||
'ext': 'mp4',
|
||||
'title': 'foqebrpftarclpob',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://streamango.com/embed/clapasobsptpkdfe/20170315_150006_mp4',
|
||||
'only_matching': True,
|
||||
@@ -31,7 +42,7 @@ class StreamangoIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
title = self._og_search_title(webpage, default=video_id)
|
||||
|
||||
formats = []
|
||||
for format_ in re.findall(r'({[^}]*\bsrc\s*:\s*[^}]*})', webpage):
|
||||
|
||||
@@ -4,6 +4,7 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
@@ -49,7 +50,7 @@ class TurboIE(InfoExtractor):
|
||||
for child in item:
|
||||
m = re.search(r'url_video_(?P<quality>.+)', child.tag)
|
||||
if m:
|
||||
quality = m.group('quality')
|
||||
quality = compat_str(m.group('quality'))
|
||||
formats.append({
|
||||
'format_id': quality,
|
||||
'url': child.text,
|
||||
|
||||
@@ -48,7 +48,7 @@ class TVPlayerIE(InfoExtractor):
|
||||
'https://tvplayer.com/watch/context', display_id,
|
||||
'Downloading JSON context', query={
|
||||
'resource': resource_id,
|
||||
'nonce': token,
|
||||
'gen': token,
|
||||
})
|
||||
|
||||
validate = context['validate']
|
||||
|
||||
@@ -615,7 +615,10 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
if download_url and not source_file.get('is_cold') and not source_file.get('is_defrosting'):
|
||||
source_name = source_file.get('public_name', 'Original')
|
||||
if self._is_valid_url(download_url, video_id, '%s video' % source_name):
|
||||
ext = source_file.get('extension', determine_ext(download_url)).lower()
|
||||
ext = (try_get(
|
||||
source_file, lambda x: x['extension'],
|
||||
compat_str) or determine_ext(
|
||||
download_url, None) or 'mp4').lower()
|
||||
formats.append({
|
||||
'url': download_url,
|
||||
'ext': ext,
|
||||
|
||||
@@ -4,7 +4,10 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..compat import (
|
||||
compat_kwargs,
|
||||
compat_str,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
@@ -36,7 +39,8 @@ class ViuBaseIE(InfoExtractor):
|
||||
headers.update(kwargs.get('headers', {}))
|
||||
kwargs['headers'] = headers
|
||||
response = self._download_json(
|
||||
'https://www.viu.com/api/' + path, *args, **kwargs)['response']
|
||||
'https://www.viu.com/api/' + path, *args,
|
||||
**compat_kwargs(kwargs))['response']
|
||||
if response.get('status') != 'success':
|
||||
raise ExtractorError('%s said: %s' % (
|
||||
self.IE_NAME, response['message']), expected=True)
|
||||
|
||||
@@ -4,11 +4,7 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
unified_strdate,
|
||||
parse_duration,
|
||||
int_or_none,
|
||||
)
|
||||
from ..utils import parse_duration
|
||||
|
||||
|
||||
class WatchIndianPornIE(InfoExtractor):
|
||||
@@ -23,11 +19,8 @@ class WatchIndianPornIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Hot milf from kerala shows off her gorgeous large breasts on camera',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'LoveJay',
|
||||
'upload_date': '20160428',
|
||||
'duration': 226,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'categories': list,
|
||||
'age_limit': 18,
|
||||
}
|
||||
@@ -40,51 +33,36 @@ class WatchIndianPornIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
video_url = self._html_search_regex(
|
||||
r"url: escape\('([^']+)'\)", webpage, 'url')
|
||||
info_dict = self._parse_html5_media_entries(url, webpage, video_id)[0]
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<h2 class="he2"><span>(.*?)</span>',
|
||||
webpage, 'title')
|
||||
thumbnail = self._html_search_regex(
|
||||
r'<span id="container"><img\s+src="([^"]+)"',
|
||||
webpage, 'thumbnail', fatal=False)
|
||||
|
||||
uploader = self._html_search_regex(
|
||||
r'class="aupa">\s*(.*?)</a>',
|
||||
webpage, 'uploader')
|
||||
upload_date = unified_strdate(self._html_search_regex(
|
||||
r'Added: <strong>(.+?)</strong>', webpage, 'upload date', fatal=False))
|
||||
title = self._html_search_regex((
|
||||
r'<title>(.+?)\s*-\s*Indian\s+Porn</title>',
|
||||
r'<h4>(.+?)</h4>'
|
||||
), webpage, 'title')
|
||||
|
||||
duration = parse_duration(self._search_regex(
|
||||
r'<td>Time:\s*</td>\s*<td align="right"><span>\s*(.+?)\s*</span>',
|
||||
r'Time:\s*<strong>\s*(.+?)\s*</strong>',
|
||||
webpage, 'duration', fatal=False))
|
||||
|
||||
view_count = int_or_none(self._search_regex(
|
||||
r'<td>Views:\s*</td>\s*<td align="right"><span>\s*(\d+)\s*</span>',
|
||||
view_count = int(self._search_regex(
|
||||
r'(?s)Time:\s*<strong>.*?</strong>.*?<strong>\s*(\d+)\s*</strong>',
|
||||
webpage, 'view count', fatal=False))
|
||||
comment_count = int_or_none(self._search_regex(
|
||||
r'<td>Comments:\s*</td>\s*<td align="right"><span>\s*(\d+)\s*</span>',
|
||||
webpage, 'comment count', fatal=False))
|
||||
|
||||
categories = re.findall(
|
||||
r'<a href="[^"]+/search/video/desi"><span>([^<]+)</span></a>',
|
||||
r'<a[^>]+class=[\'"]categories[\'"][^>]*>\s*([^<]+)\s*</a>',
|
||||
webpage)
|
||||
|
||||
return {
|
||||
info_dict.update({
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'url': video_url,
|
||||
'http_headers': {
|
||||
'Referer': url,
|
||||
},
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader,
|
||||
'upload_date': upload_date,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'comment_count': comment_count,
|
||||
'categories': categories,
|
||||
'age_limit': 18,
|
||||
}
|
||||
})
|
||||
|
||||
return info_dict
|
||||
|
||||
@@ -10,7 +10,6 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
NO_DEFAULT,
|
||||
sanitized_Request,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
@@ -30,6 +29,8 @@ class XFileShareIE(InfoExtractor):
|
||||
(r'vidabc\.com', 'Vid ABC'),
|
||||
(r'vidbom\.com', 'VidBom'),
|
||||
(r'vidlo\.us', 'vidlo'),
|
||||
(r'rapidvideo\.(?:cool|org)', 'RapidVideo.TV'),
|
||||
(r'fastvideo\.me', 'FastVideo.me'),
|
||||
)
|
||||
|
||||
IE_DESC = 'XFileShare based sites: %s' % ', '.join(list(zip(*_SITES))[1])
|
||||
@@ -109,6 +110,12 @@ class XFileShareIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.rapidvideo.cool/b667kprndr8w',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.fastvideo.me/k8604r8nk8sn/FAST_FURIOUS_8_-_Trailer_italiano_ufficiale.mp4.html',
|
||||
'only_matching': True
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -130,12 +137,12 @@ class XFileShareIE(InfoExtractor):
|
||||
if countdown:
|
||||
self._sleep(countdown, video_id)
|
||||
|
||||
post = urlencode_postdata(fields)
|
||||
|
||||
req = sanitized_Request(url, post)
|
||||
req.add_header('Content-type', 'application/x-www-form-urlencoded')
|
||||
|
||||
webpage = self._download_webpage(req, video_id, 'Downloading video page')
|
||||
webpage = self._download_webpage(
|
||||
url, video_id, 'Downloading video page',
|
||||
data=urlencode_postdata(fields), headers={
|
||||
'Referer': url,
|
||||
'Content-type': 'application/x-www-form-urlencoded',
|
||||
})
|
||||
|
||||
title = (self._search_regex(
|
||||
(r'style="z-index: [0-9]+;">([^<]+)</span>',
|
||||
|
||||
@@ -3,6 +3,7 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
sanitized_Request,
|
||||
@@ -26,7 +27,7 @@ class YouPornIE(InfoExtractor):
|
||||
'description': 'Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'Ask Dan And Jennifer',
|
||||
'upload_date': '20101221',
|
||||
'upload_date': '20101217',
|
||||
'average_rating': int,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
@@ -45,7 +46,7 @@ class YouPornIE(InfoExtractor):
|
||||
'description': 'http://sweetlivegirls.com Big Tits Awesome Brunette On amazing webcam show.mp4',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'Unknown',
|
||||
'upload_date': '20111125',
|
||||
'upload_date': '20110418',
|
||||
'average_rating': int,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
@@ -68,28 +69,46 @@ class YouPornIE(InfoExtractor):
|
||||
webpage = self._download_webpage(request, display_id)
|
||||
|
||||
title = self._search_regex(
|
||||
[r'(?:video_titles|videoTitle)\s*[:=]\s*(["\'])(?P<title>.+?)\1',
|
||||
r'<h1[^>]+class=["\']heading\d?["\'][^>]*>([^<])<'],
|
||||
webpage, 'title', group='title')
|
||||
[r'(?:video_titles|videoTitle)\s*[:=]\s*(["\'])(?P<title>(?:(?!\1).)+)\1',
|
||||
r'<h1[^>]+class=["\']heading\d?["\'][^>]*>(?P<title>[^<]+)<'],
|
||||
webpage, 'title', group='title',
|
||||
default=None) or self._og_search_title(
|
||||
webpage, default=None) or self._html_search_meta(
|
||||
'title', webpage, fatal=True)
|
||||
|
||||
links = []
|
||||
|
||||
# Main source
|
||||
definitions = self._parse_json(
|
||||
self._search_regex(
|
||||
r'mediaDefinition\s*=\s*(\[.+?\]);', webpage,
|
||||
'media definitions', default='[]'),
|
||||
video_id, fatal=False)
|
||||
if definitions:
|
||||
for definition in definitions:
|
||||
if not isinstance(definition, dict):
|
||||
continue
|
||||
video_url = definition.get('videoUrl')
|
||||
if isinstance(video_url, compat_str) and video_url:
|
||||
links.append(video_url)
|
||||
|
||||
# Fallback #1, this also contains extra low quality 180p format
|
||||
for _, link in re.findall(r'<a[^>]+href=(["\'])(http.+?)\1[^>]+title=["\']Download [Vv]ideo', webpage):
|
||||
links.append(link)
|
||||
|
||||
# Fallback #2 (unavailable as at 22.06.2017)
|
||||
sources = self._search_regex(
|
||||
r'(?s)sources\s*:\s*({.+?})', webpage, 'sources', default=None)
|
||||
if sources:
|
||||
for _, link in re.findall(r'[^:]+\s*:\s*(["\'])(http.+?)\1', sources):
|
||||
links.append(link)
|
||||
|
||||
# Fallback #1
|
||||
# Fallback #3 (unavailable as at 22.06.2017)
|
||||
for _, link in re.findall(
|
||||
r'(?:videoUrl|videoSrc|videoIpadUrl|html5PlayerSrc)\s*[:=]\s*(["\'])(http.+?)\1', webpage):
|
||||
r'(?:videoSrc|videoIpadUrl|html5PlayerSrc)\s*[:=]\s*(["\'])(http.+?)\1', webpage):
|
||||
links.append(link)
|
||||
|
||||
# Fallback #2, this also contains extra low quality 180p format
|
||||
for _, link in re.findall(r'<a[^>]+href=(["\'])(http.+?)\1[^>]+title=["\']Download [Vv]ideo', webpage):
|
||||
links.append(link)
|
||||
|
||||
# Fallback #3, encrypted links
|
||||
# Fallback #4, encrypted links (unavailable as at 22.06.2017)
|
||||
for _, encrypted_link in re.findall(
|
||||
r'encryptedQuality\d{3,4}URL\s*=\s*(["\'])([\da-zA-Z+/=]+)\1', webpage):
|
||||
links.append(aes_decrypt_text(encrypted_link, title, 32).decode('utf-8'))
|
||||
@@ -124,7 +143,8 @@ class YouPornIE(InfoExtractor):
|
||||
r'(?s)<div[^>]+class=["\']submitByLink["\'][^>]*>(.+?)</div>',
|
||||
webpage, 'uploader', fatal=False)
|
||||
upload_date = unified_strdate(self._html_search_regex(
|
||||
r'(?s)<div[^>]+class=["\']videoInfo(?:Date|Time)["\'][^>]*>(.+?)</div>',
|
||||
[r'Date\s+[Aa]dded:\s*<span>([^<]+)',
|
||||
r'(?s)<div[^>]+class=["\']videoInfo(?:Date|Time)["\'][^>]*>(.+?)</div>'],
|
||||
webpage, 'upload date', fatal=False))
|
||||
|
||||
age_limit = self._rta_search(webpage)
|
||||
|
||||
@@ -1269,37 +1269,57 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
sub_lang_list[sub_lang] = sub_formats
|
||||
return sub_lang_list
|
||||
|
||||
def make_captions(sub_url, sub_langs):
|
||||
parsed_sub_url = compat_urllib_parse_urlparse(sub_url)
|
||||
caption_qs = compat_parse_qs(parsed_sub_url.query)
|
||||
captions = {}
|
||||
for sub_lang in sub_langs:
|
||||
sub_formats = []
|
||||
for ext in self._SUBTITLE_FORMATS:
|
||||
caption_qs.update({
|
||||
'tlang': [sub_lang],
|
||||
'fmt': [ext],
|
||||
})
|
||||
sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(
|
||||
query=compat_urllib_parse_urlencode(caption_qs, True)))
|
||||
sub_formats.append({
|
||||
'url': sub_url,
|
||||
'ext': ext,
|
||||
})
|
||||
captions[sub_lang] = sub_formats
|
||||
return captions
|
||||
|
||||
# New captions format as of 22.06.2017
|
||||
player_response = args.get('player_response')
|
||||
if player_response and isinstance(player_response, compat_str):
|
||||
player_response = self._parse_json(
|
||||
player_response, video_id, fatal=False)
|
||||
if player_response:
|
||||
renderer = player_response['captions']['playerCaptionsTracklistRenderer']
|
||||
base_url = renderer['captionTracks'][0]['baseUrl']
|
||||
sub_lang_list = []
|
||||
for lang in renderer['translationLanguages']:
|
||||
lang_code = lang.get('languageCode')
|
||||
if lang_code:
|
||||
sub_lang_list.append(lang_code)
|
||||
return make_captions(base_url, sub_lang_list)
|
||||
|
||||
# Some videos don't provide ttsurl but rather caption_tracks and
|
||||
# caption_translation_languages (e.g. 20LmZk1hakA)
|
||||
# Does not used anymore as of 22.06.2017
|
||||
caption_tracks = args['caption_tracks']
|
||||
caption_translation_languages = args['caption_translation_languages']
|
||||
caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
|
||||
parsed_caption_url = compat_urllib_parse_urlparse(caption_url)
|
||||
caption_qs = compat_parse_qs(parsed_caption_url.query)
|
||||
|
||||
sub_lang_list = {}
|
||||
sub_lang_list = []
|
||||
for lang in caption_translation_languages.split(','):
|
||||
lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
|
||||
sub_lang = lang_qs.get('lc', [None])[0]
|
||||
if not sub_lang:
|
||||
continue
|
||||
sub_formats = []
|
||||
for ext in self._SUBTITLE_FORMATS:
|
||||
caption_qs.update({
|
||||
'tlang': [sub_lang],
|
||||
'fmt': [ext],
|
||||
})
|
||||
sub_url = compat_urlparse.urlunparse(parsed_caption_url._replace(
|
||||
query=compat_urllib_parse_urlencode(caption_qs, True)))
|
||||
sub_formats.append({
|
||||
'url': sub_url,
|
||||
'ext': ext,
|
||||
})
|
||||
sub_lang_list[sub_lang] = sub_formats
|
||||
return sub_lang_list
|
||||
if sub_lang:
|
||||
sub_lang_list.append(sub_lang)
|
||||
return make_captions(caption_url, sub_lang_list)
|
||||
# An extractor error can be raise by the download process if there are
|
||||
# no automatic captions but there are subtitles
|
||||
except (KeyError, ExtractorError):
|
||||
except (KeyError, IndexError, ExtractorError):
|
||||
self._downloader.report_warning(err_msg)
|
||||
return {}
|
||||
|
||||
@@ -1353,10 +1373,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
start_time = parse_duration(time_point)
|
||||
if start_time is None:
|
||||
continue
|
||||
if start_time > duration:
|
||||
break
|
||||
end_time = (duration if next_num == len(chapter_lines)
|
||||
else parse_duration(chapter_lines[next_num][1]))
|
||||
if end_time is None:
|
||||
continue
|
||||
if end_time > duration:
|
||||
end_time = duration
|
||||
if start_time > end_time:
|
||||
break
|
||||
chapter_title = re.sub(
|
||||
r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
|
||||
chapter_title = re.sub(r'\s+', ' ', chapter_title)
|
||||
|
||||
@@ -310,7 +310,7 @@ def parseOpts(overrideArguments=None):
|
||||
metavar='FILTER', dest='match_filter', default=None,
|
||||
help=(
|
||||
'Generic video filter. '
|
||||
'Specify any key (see help for -o for a list of available keys) to '
|
||||
'Specify any key (see the "OUTPUT TEMPLATE" for a list of available keys) to '
|
||||
'match if the key is present, '
|
||||
'!key to check if the key is not present, '
|
||||
'key > NUMBER (like "comment_count > 12", also works with '
|
||||
@@ -618,7 +618,7 @@ def parseOpts(overrideArguments=None):
|
||||
verbosity.add_option(
|
||||
'-j', '--dump-json',
|
||||
action='store_true', dest='dumpjson', default=False,
|
||||
help='Simulate, quiet but print JSON information. See --output for a description of available keys.')
|
||||
help='Simulate, quiet but print JSON information. See the "OUTPUT TEMPLATE" for a description of available keys.')
|
||||
verbosity.add_option(
|
||||
'-J', '--dump-single-json',
|
||||
action='store_true', dest='dump_single_json', default=False,
|
||||
|
||||
@@ -4,7 +4,10 @@ import subprocess
|
||||
|
||||
from .common import PostProcessor
|
||||
from ..compat import compat_shlex_quote
|
||||
from ..utils import PostProcessingError
|
||||
from ..utils import (
|
||||
encodeArgument,
|
||||
PostProcessingError,
|
||||
)
|
||||
|
||||
|
||||
class ExecAfterDownloadPP(PostProcessor):
|
||||
@@ -20,7 +23,7 @@ class ExecAfterDownloadPP(PostProcessor):
|
||||
cmd = cmd.replace('{}', compat_shlex_quote(information['filepath']))
|
||||
|
||||
self._downloader.to_screen('[exec] Executing command: %s' % cmd)
|
||||
retCode = subprocess.call(cmd, shell=True)
|
||||
retCode = subprocess.call(encodeArgument(cmd), shell=True)
|
||||
if retCode != 0:
|
||||
raise PostProcessingError(
|
||||
'Command returned error code %d' % retCode)
|
||||
|
||||
@@ -35,11 +35,14 @@ class MetadataFromTitlePP(PostProcessor):
|
||||
title = info['title']
|
||||
match = re.match(self._titleregex, title)
|
||||
if match is None:
|
||||
self._downloader.to_screen('[fromtitle] Could not interpret title of video as "%s"' % self._titleformat)
|
||||
self._downloader.to_screen(
|
||||
'[fromtitle] Could not interpret title of video as "%s"'
|
||||
% self._titleformat)
|
||||
return [], info
|
||||
for attribute, value in match.groupdict().items():
|
||||
value = match.group(attribute)
|
||||
info[attribute] = value
|
||||
self._downloader.to_screen('[fromtitle] parsed ' + attribute + ': ' + value)
|
||||
self._downloader.to_screen(
|
||||
'[fromtitle] parsed %s: %s'
|
||||
% (attribute, value if value is not None else 'NA'))
|
||||
|
||||
return [], info
|
||||
|
||||
@@ -22,7 +22,6 @@ import locale
|
||||
import math
|
||||
import operator
|
||||
import os
|
||||
import pipes
|
||||
import platform
|
||||
import random
|
||||
import re
|
||||
@@ -36,6 +35,7 @@ import xml.etree.ElementTree
|
||||
import zlib
|
||||
|
||||
from .compat import (
|
||||
compat_HTMLParseError,
|
||||
compat_HTMLParser,
|
||||
compat_basestring,
|
||||
compat_chr,
|
||||
@@ -409,8 +409,12 @@ def extract_attributes(html_element):
|
||||
but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
|
||||
"""
|
||||
parser = HTMLAttributeParser()
|
||||
parser.feed(html_element)
|
||||
parser.close()
|
||||
try:
|
||||
parser.feed(html_element)
|
||||
parser.close()
|
||||
# Older Python may throw HTMLParseError in case of malformed HTML
|
||||
except compat_HTMLParseError:
|
||||
pass
|
||||
return parser.attrs
|
||||
|
||||
|
||||
@@ -1179,7 +1183,7 @@ def unified_timestamp(date_str, day_first=True):
|
||||
if date_str is None:
|
||||
return None
|
||||
|
||||
date_str = date_str.replace(',', ' ')
|
||||
date_str = re.sub(r'[,|]', '', date_str)
|
||||
|
||||
pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
|
||||
timezone, date_str = extract_timezone(date_str)
|
||||
@@ -1530,7 +1534,7 @@ def shell_quote(args):
|
||||
if isinstance(a, bytes):
|
||||
# We may get a filename encoded with 'encodeFilename'
|
||||
a = a.decode(encoding)
|
||||
quoted_args.append(pipes.quote(a))
|
||||
quoted_args.append(compat_shlex_quote(a))
|
||||
return ' '.join(quoted_args)
|
||||
|
||||
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = '2017.05.29'
|
||||
__version__ = '2017.06.23'
|
||||
|
||||
Reference in New Issue
Block a user