mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-12-18 03:42:23 +01:00
Compare commits
2 Commits
2022.02.04
...
2022.01.21
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b54e1255ce | ||
|
|
f20d607b0e |
8
.github/ISSUE_TEMPLATE/1_broken_site.yml
vendored
8
.github/ISSUE_TEMPLATE/1_broken_site.yml
vendored
@@ -1,4 +1,4 @@
|
||||
name: Broken site
|
||||
name: Broken site support
|
||||
description: Report broken or misfunctioning site
|
||||
labels: [triage, site-bug]
|
||||
body:
|
||||
@@ -11,7 +11,7 @@ body:
|
||||
options:
|
||||
- label: I'm reporting a broken site
|
||||
required: true
|
||||
- label: I've verified that I'm running yt-dlp version **2022.02.04**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update))
|
||||
- label: I've verified that I'm running yt-dlp version **2022.01.21**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update))
|
||||
required: true
|
||||
- label: I've checked that all provided URLs are alive and playable in a browser
|
||||
required: true
|
||||
@@ -51,12 +51,12 @@ body:
|
||||
[debug] Portable config file: yt-dlp.conf
|
||||
[debug] Portable config: ['-i']
|
||||
[debug] Encodings: locale cp1252, fs utf-8, stdout utf-8, stderr utf-8, pref cp1252
|
||||
[debug] yt-dlp version 2022.02.04 (exe)
|
||||
[debug] yt-dlp version 2022.01.21 (exe)
|
||||
[debug] Python version 3.8.8 (CPython 64bit) - Windows-10-10.0.19041-SP0
|
||||
[debug] exe versions: ffmpeg 3.0.1, ffprobe 3.0.1
|
||||
[debug] Optional libraries: Cryptodome, keyring, mutagen, sqlite, websockets
|
||||
[debug] Proxy map: {}
|
||||
yt-dlp is up to date (2022.02.04)
|
||||
yt-dlp is up to date (2022.01.21)
|
||||
<more lines>
|
||||
render: shell
|
||||
validations:
|
||||
|
||||
@@ -11,7 +11,7 @@ body:
|
||||
options:
|
||||
- label: I'm reporting a new site support request
|
||||
required: true
|
||||
- label: I've verified that I'm running yt-dlp version **2022.02.04**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update))
|
||||
- label: I've verified that I'm running yt-dlp version **2022.01.21**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update))
|
||||
required: true
|
||||
- label: I've checked that all provided URLs are alive and playable in a browser
|
||||
required: true
|
||||
@@ -62,12 +62,12 @@ body:
|
||||
[debug] Portable config file: yt-dlp.conf
|
||||
[debug] Portable config: ['-i']
|
||||
[debug] Encodings: locale cp1252, fs utf-8, stdout utf-8, stderr utf-8, pref cp1252
|
||||
[debug] yt-dlp version 2022.02.04 (exe)
|
||||
[debug] yt-dlp version 2022.01.21 (exe)
|
||||
[debug] Python version 3.8.8 (CPython 64bit) - Windows-10-10.0.19041-SP0
|
||||
[debug] exe versions: ffmpeg 3.0.1, ffprobe 3.0.1
|
||||
[debug] Optional libraries: Cryptodome, keyring, mutagen, sqlite, websockets
|
||||
[debug] Proxy map: {}
|
||||
yt-dlp is up to date (2022.02.04)
|
||||
yt-dlp is up to date (2022.01.21)
|
||||
<more lines>
|
||||
render: shell
|
||||
validations:
|
||||
|
||||
@@ -11,7 +11,7 @@ body:
|
||||
options:
|
||||
- label: I'm reporting a site feature request
|
||||
required: true
|
||||
- label: I've verified that I'm running yt-dlp version **2022.02.04**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update))
|
||||
- label: I've verified that I'm running yt-dlp version **2022.01.21**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update))
|
||||
required: true
|
||||
- label: I've checked that all provided URLs are alive and playable in a browser
|
||||
required: true
|
||||
@@ -60,12 +60,12 @@ body:
|
||||
[debug] Portable config file: yt-dlp.conf
|
||||
[debug] Portable config: ['-i']
|
||||
[debug] Encodings: locale cp1252, fs utf-8, stdout utf-8, stderr utf-8, pref cp1252
|
||||
[debug] yt-dlp version 2022.02.04 (exe)
|
||||
[debug] yt-dlp version 2022.01.21 (exe)
|
||||
[debug] Python version 3.8.8 (CPython 64bit) - Windows-10-10.0.19041-SP0
|
||||
[debug] exe versions: ffmpeg 3.0.1, ffprobe 3.0.1
|
||||
[debug] Optional libraries: Cryptodome, keyring, mutagen, sqlite, websockets
|
||||
[debug] Proxy map: {}
|
||||
yt-dlp is up to date (2022.02.04)
|
||||
yt-dlp is up to date (2022.01.21)
|
||||
<more lines>
|
||||
render: shell
|
||||
validations:
|
||||
|
||||
6
.github/ISSUE_TEMPLATE/4_bug_report.yml
vendored
6
.github/ISSUE_TEMPLATE/4_bug_report.yml
vendored
@@ -11,7 +11,7 @@ body:
|
||||
options:
|
||||
- label: I'm reporting a bug unrelated to a specific site
|
||||
required: true
|
||||
- label: I've verified that I'm running yt-dlp version **2022.02.04**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update))
|
||||
- label: I've verified that I'm running yt-dlp version **2022.01.21**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update))
|
||||
required: true
|
||||
- label: I've checked that all provided URLs are alive and playable in a browser
|
||||
required: true
|
||||
@@ -45,12 +45,12 @@ body:
|
||||
[debug] Portable config file: yt-dlp.conf
|
||||
[debug] Portable config: ['-i']
|
||||
[debug] Encodings: locale cp1252, fs utf-8, stdout utf-8, stderr utf-8, pref cp1252
|
||||
[debug] yt-dlp version 2022.02.04 (exe)
|
||||
[debug] yt-dlp version 2022.01.21 (exe)
|
||||
[debug] Python version 3.8.8 (CPython 64bit) - Windows-10-10.0.19041-SP0
|
||||
[debug] exe versions: ffmpeg 3.0.1, ffprobe 3.0.1
|
||||
[debug] Optional libraries: Cryptodome, keyring, mutagen, sqlite, websockets
|
||||
[debug] Proxy map: {}
|
||||
yt-dlp is up to date (2022.02.04)
|
||||
yt-dlp is up to date (2022.01.21)
|
||||
<more lines>
|
||||
render: shell
|
||||
validations:
|
||||
|
||||
4
.github/ISSUE_TEMPLATE/5_feature_request.yml
vendored
4
.github/ISSUE_TEMPLATE/5_feature_request.yml
vendored
@@ -11,9 +11,7 @@ body:
|
||||
options:
|
||||
- label: I'm reporting a feature request
|
||||
required: true
|
||||
- label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme)
|
||||
required: true
|
||||
- label: I've verified that I'm running yt-dlp version **2022.02.04**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update))
|
||||
- label: I've verified that I'm running yt-dlp version **2022.01.21**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update))
|
||||
required: true
|
||||
- label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues including closed ones. DO NOT post duplicates
|
||||
required: true
|
||||
|
||||
3
.github/ISSUE_TEMPLATE/6_question.yml
vendored
3
.github/ISSUE_TEMPLATE/6_question.yml
vendored
@@ -25,8 +25,7 @@ body:
|
||||
Ask your question in an arbitrary form.
|
||||
Please make sure it's worded well enough to be understood, see [is-the-description-of-the-issue-itself-sufficient](https://github.com/ytdl-org/youtube-dl#is-the-description-of-the-issue-itself-sufficient).
|
||||
Provide any additional information and as much context and examples as possible.
|
||||
If your question contains "isn't working" or "can you add", this is most likely the wrong template.
|
||||
If you are in doubt if this is the right template, use another template!
|
||||
If your question contains "isn't working" or "can you add", this is most likely the wrong template
|
||||
placeholder: WRITE QUESTION HERE
|
||||
validations:
|
||||
required: true
|
||||
|
||||
3
.github/ISSUE_TEMPLATE/config.yml
vendored
3
.github/ISSUE_TEMPLATE/config.yml
vendored
@@ -3,6 +3,3 @@ contact_links:
|
||||
- name: Get help from the community on Discord
|
||||
url: https://discord.gg/H5MNcFW63r
|
||||
about: Join the yt-dlp Discord for community-powered support!
|
||||
- name: Matrix Bridge to the Discord server
|
||||
url: https://matrix.to/#/#yt-dlp:matrix.org
|
||||
about: For those who do not want to use Discord
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
name: Broken site
|
||||
name: Broken site support
|
||||
description: Report broken or misfunctioning site
|
||||
labels: [triage, site-bug]
|
||||
body:
|
||||
|
||||
@@ -11,8 +11,6 @@ body:
|
||||
options:
|
||||
- label: I'm reporting a feature request
|
||||
required: true
|
||||
- label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme)
|
||||
required: true
|
||||
- label: I've verified that I'm running yt-dlp version **%(version)s**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update))
|
||||
required: true
|
||||
- label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues including closed ones. DO NOT post duplicates
|
||||
|
||||
3
.github/ISSUE_TEMPLATE_tmpl/6_question.yml
vendored
3
.github/ISSUE_TEMPLATE_tmpl/6_question.yml
vendored
@@ -25,8 +25,7 @@ body:
|
||||
Ask your question in an arbitrary form.
|
||||
Please make sure it's worded well enough to be understood, see [is-the-description-of-the-issue-itself-sufficient](https://github.com/ytdl-org/youtube-dl#is-the-description-of-the-issue-itself-sufficient).
|
||||
Provide any additional information and as much context and examples as possible.
|
||||
If your question contains "isn't working" or "can you add", this is most likely the wrong template.
|
||||
If you are in doubt if this is the right template, use another template!
|
||||
If your question contains "isn't working" or "can you add", this is most likely the wrong template
|
||||
placeholder: WRITE QUESTION HERE
|
||||
validations:
|
||||
required: true
|
||||
|
||||
2
.gitignore
vendored
2
.gitignore
vendored
@@ -92,7 +92,7 @@ README.txt
|
||||
*.tar.gz
|
||||
*.zsh
|
||||
*.spec
|
||||
test/testdata/sigs/player-*.js
|
||||
test/testdata/player-*.js
|
||||
|
||||
# Binary
|
||||
/youtube-dl
|
||||
|
||||
@@ -113,7 +113,7 @@ If the issue is with `youtube-dl` (the upstream fork of yt-dlp) and not with yt-
|
||||
|
||||
### Are you willing to share account details if needed?
|
||||
|
||||
The maintainers and potential contributors of the project often do not have an account for the website you are asking support for. So any developer interested in solving your issue may ask you for account details. It is your personal discretion whether you are willing to share the account in order for the developer to try and solve your issue. However, if you are unwilling or unable to provide details, they obviously cannot work on the issue and it cannot be solved unless some developer who both has an account and is willing/able to contribute decides to solve it.
|
||||
The maintainers and potential contributors of the project often do not have an account for the website you are asking support for. So any developer interested in solving your issue may ask you for account details. It is your personal discression whether you are willing to share the account in order for the developer to try and solve your issue. However, if you are unwilling or unable to provide details, they obviously cannot work on the issue and it cannot be solved unless some developer who both has an account and is willing/able to contribute decides to solve it.
|
||||
|
||||
By sharing an account with anyone, you agree to bear all risks associated with it. The maintainers and yt-dlp can't be held responsible for any misuse of the credentials.
|
||||
|
||||
@@ -252,11 +252,7 @@ For extraction to work yt-dlp relies on metadata your extractor extracts and pro
|
||||
- `title` (media title)
|
||||
- `url` (media download URL) or `formats`
|
||||
|
||||
The aforementioned metafields are the critical data that the extraction does not make any sense without and if any of them fail to be extracted then the extractor is considered completely broken. While all extractors must return a `title`, they must also allow it's extraction to be non-fatal.
|
||||
|
||||
For pornographic sites, appropriate `age_limit` must also be returned.
|
||||
|
||||
The extractor is allowed to return the info dict without url or formats in some special cases if it allows the user to extract usefull information with `--ignore-no-formats-error` - Eg: when the video is a live stream that has not started yet.
|
||||
The aforementioned metafields are the critical data that the extraction does not make any sense without and if any of them fail to be extracted then the extractor is considered completely broken. While, in fact, only `id` is technically mandatory, due to compatibility reasons, yt-dlp also treats `title` as mandatory. The extractor is allowed to return the info dict without url or formats in some special cases if it allows the user to extract usefull information with `--ignore-no-formats-error` - Eg: when the video is a live stream that has not started yet.
|
||||
|
||||
[Any field](yt_dlp/extractor/common.py#219-L426) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerant** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields.
|
||||
|
||||
|
||||
@@ -190,7 +190,3 @@ CreaValix
|
||||
sian1468
|
||||
arkamar
|
||||
hyano
|
||||
KiberInfinity
|
||||
tejing1
|
||||
Bricio
|
||||
lazypete365
|
||||
|
||||
67
Changelog.md
67
Changelog.md
@@ -11,72 +11,7 @@
|
||||
-->
|
||||
|
||||
|
||||
### 2022.02.04
|
||||
|
||||
* [youtube:search] Fix extractor by [coletdjnz](https://github.com/coletdjnz)
|
||||
* [youtube:search] Add tests
|
||||
* [twitcasting] Enforce UTF-8 for POST payload by [Lesmiscore](https://github.com/Lesmiscore)
|
||||
* [mediaset] Fix extractor by [nixxo](https://github.com/nixxo)
|
||||
* [websocket] Make syntax error in `websockets` module non-fatal
|
||||
|
||||
### 2022.02.03
|
||||
|
||||
* Merge youtube-dl: Upto [commit/78ce962](https://github.com/ytdl-org/youtube-dl/commit/78ce962f4fe020994c216dd2671546fbe58a5c67)
|
||||
* Add option `--print-to-file`
|
||||
* Make nested --config-locations relative to parent file
|
||||
* Ensure `_type` is present in `info.json`
|
||||
* Fix `--compat-options list-formats`
|
||||
* Fix/improve `InAdvancePagedList`
|
||||
* [downloader/ffmpeg] Handle unknown formats better
|
||||
* [outtmpl] Handle `-o ""` better
|
||||
* [outtmpl] Handle hard-coded file extension better
|
||||
* [extractor] Add convinience function `_yes_playlist`
|
||||
* [extractor] Allow non-fatal `title` extraction
|
||||
* [extractor] Extract video inside `Article` json_ld
|
||||
* [generic] Allow further processing of json_ld URL
|
||||
* [cookies] Fix keyring selection for unsupported desktops
|
||||
* [utils] Strip double spaces in `clean_html` by [dirkf](https://github.com/dirkf)
|
||||
* [aes] Add `unpad_pkcs7`
|
||||
* [test] Fix `test_youtube_playlist_noplaylist`
|
||||
* [docs,cleanup] Misc cleanup
|
||||
* [dplay] Add extractors for site changes by [Sipherdrakon](https://github.com/Sipherdrakon)
|
||||
* [ertgr] Add extractors by [zmousm](https://github.com/zmousm), [dirkf](https://github.com/dirkf)
|
||||
* [Musicdex] Add extractors by [Ashish0804](https://github.com/Ashish0804)
|
||||
* [YandexVideoPreview] Add extractor by [KiberInfinity](https://github.com/KiberInfinity)
|
||||
* [youtube] Add extractor `YoutubeMusicSearchURLIE`
|
||||
* [archive.org] Ignore unnecessary files
|
||||
* [Bilibili] Add 8k support by [u-spec-png](https://github.com/u-spec-png)
|
||||
* [bilibili] Fix extractor, make anthology title non-fatal
|
||||
* [CAM4] Add thumbnail extraction by [alerikaisattera](https://github.com/alerikaisattera)
|
||||
* [cctv] De-prioritize sample format
|
||||
* [crunchyroll:beta] Add cookies support by [tejing1](https://github.com/tejing1)
|
||||
* [crunchyroll] Fix login by [tejing1](https://github.com/tejing1)
|
||||
* [doodstream] Fix extractor
|
||||
* [fc2] Fix extraction by [Lesmiscore](https://github.com/Lesmiscore)
|
||||
* [FFmpegConcat] Abort on --skip-download and download errors
|
||||
* [Fujitv] Extract metadata and support premium by [YuenSzeHong](https://github.com/YuenSzeHong)
|
||||
* [globo] Fix extractor by [Bricio](https://github.com/Bricio)
|
||||
* [glomex] Simplify embed detection
|
||||
* [GoogleSearch] Fix extractor
|
||||
* [Instagram] Fix extraction when logged in by [MinePlayersPE](https://github.com/MinePlayersPE)
|
||||
* [iq.com] Add VIP support by [MinePlayersPE](https://github.com/MinePlayersPE)
|
||||
* [mildom] Fix extractor by [lazypete365](https://github.com/lazypete365)
|
||||
* [MySpass] Fix video url processing by [trassshhub](https://github.com/trassshhub)
|
||||
* [Odnoklassniki] Improve embedded players extraction by [KiberInfinity](https://github.com/KiberInfinity)
|
||||
* [orf:tvthek] Lazy playlist extraction and obey --no-playlist
|
||||
* [Pladform] Fix redirection to external player by [KiberInfinity](https://github.com/KiberInfinity)
|
||||
* [ThisOldHouse] Improve Premium URL check by [Ashish0804](https://github.com/Ashish0804)
|
||||
* [TikTok] Iterate through app versions by [MinePlayersPE](https://github.com/MinePlayersPE)
|
||||
* [tumblr] Fix 403 errors and handle vimeo embeds by [foghawk](https://github.com/foghawk)
|
||||
* [viki] Fix "Bad request" for manifest by [nyuszika7h](https://github.com/nyuszika7h)
|
||||
* [Vimm] add recording extractor by [alerikaisattera](https://github.com/alerikaisattera)
|
||||
* [web.archive:youtube] Add `ytarchive:` prefix and misc cleanup
|
||||
* [youtube:api] Do not use seek when reading HTTPError response by [coletdjnz](https://github.com/coletdjnz)
|
||||
* [youtube] Fix n-sig for player e06dea74
|
||||
* [youtube, cleanup] Misc fixes and cleanup
|
||||
|
||||
|
||||
### 2022.01.21
|
||||
### 2021.01.21
|
||||
|
||||
* Add option `--concat-playlist` to **concat videos in a playlist**
|
||||
* Allow **multiple and nested configuration files**
|
||||
|
||||
2
Makefile
2
Makefile
@@ -14,7 +14,7 @@ pypi-files: AUTHORS Changelog.md LICENSE README.md README.txt supportedsites com
|
||||
.PHONY: all clean install test tar pypi-files completions ot offlinetest codetest supportedsites
|
||||
|
||||
clean-test:
|
||||
rm -rf test/testdata/sigs/player-*.js tmp/ *.annotations.xml *.aria2 *.description *.dump *.frag \
|
||||
rm -rf test/testdata/player-*.js tmp/ *.annotations.xml *.aria2 *.description *.dump *.frag \
|
||||
*.frag.aria2 *.frag.urls *.info.json *.live_chat.json *.meta *.part* *.tmp *.temp *.unknown_video *.ytdl \
|
||||
*.3gp *.ape *.avi *.desktop *.flac *.flv *.jpeg *.jpg *.m4a *.m4v *.mhtml *.mkv *.mov *.mp3 \
|
||||
*.mp4 *.ogg *.opus *.png *.sbv *.srt *.swf *.swp *.ttml *.url *.vtt *.wav *.webloc *.webm *.webp
|
||||
|
||||
39
README.md
39
README.md
@@ -71,7 +71,7 @@ yt-dlp is a [youtube-dl](https://github.com/ytdl-org/youtube-dl) fork based on t
|
||||
|
||||
# NEW FEATURES
|
||||
|
||||
* Based on **youtube-dl 2021.12.17 [commit/5add3f4](https://github.com/ytdl-org/youtube-dl/commit/5add3f4373287e6346ca3551239edab549284db3)** and **youtube-dlc 2020.11.11-3 [commit/f9401f2](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee)**: You get all the features and patches of [youtube-dlc](https://github.com/blackjack4494/yt-dlc) in addition to the latest [youtube-dl](https://github.com/ytdl-org/youtube-dl)
|
||||
* Based on **youtube-dl 2021.12.17 [commit/5014bd6](https://github.com/ytdl-org/youtube-dl/commit/5014bd67c22b421207b2650d4dc874b95b36dda1)** and **youtube-dlc 2020.11.11-3 [commit/f9401f2](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee)**: You get all the features and patches of [youtube-dlc](https://github.com/blackjack4494/yt-dlc) in addition to the latest [youtube-dl](https://github.com/ytdl-org/youtube-dl)
|
||||
|
||||
* **[SponsorBlock Integration](#sponsorblock-options)**: You can mark/remove sponsor sections in youtube videos by utilizing the [SponsorBlock](https://sponsor.ajay.app) API
|
||||
|
||||
@@ -379,9 +379,8 @@ You can also fork the project on github and run your fork's [build workflow](.gi
|
||||
--proxy URL Use the specified HTTP/HTTPS/SOCKS proxy.
|
||||
To enable SOCKS proxy, specify a proper
|
||||
scheme. For example
|
||||
socks5://user:pass@127.0.0.1:1080/. Pass in
|
||||
an empty string (--proxy "") for direct
|
||||
connection
|
||||
socks5://127.0.0.1:1080/. Pass in an empty
|
||||
string (--proxy "") for direct connection
|
||||
--socket-timeout SECONDS Time to wait before giving up, in seconds
|
||||
--source-address IP Client-side IP address to bind to
|
||||
-4, --force-ipv4 Make all connections via IPv4
|
||||
@@ -394,7 +393,7 @@ You can also fork the project on github and run your fork's [build workflow](.gi
|
||||
option is not present) is used for the
|
||||
actual downloading
|
||||
--geo-bypass Bypass geographic restriction via faking
|
||||
X-Forwarded-For HTTP header (default)
|
||||
X-Forwarded-For HTTP header
|
||||
--no-geo-bypass Do not bypass geographic restriction via
|
||||
faking X-Forwarded-For HTTP header
|
||||
--geo-bypass-country CODE Force bypass geographic restriction with
|
||||
@@ -687,12 +686,6 @@ You can also fork the project on github and run your fork's [build workflow](.gi
|
||||
Implies --quiet and --simulate (unless
|
||||
--no-simulate is used). This option can be
|
||||
used multiple times
|
||||
--print-to-file [WHEN:]TEMPLATE FILE
|
||||
Append given template to the file. The
|
||||
values of WHEN and TEMPLATE are same as
|
||||
that of --print. FILE uses the same syntax
|
||||
as the output template. This option can be
|
||||
used multiple times
|
||||
-j, --dump-json Quiet, but print JSON information for each
|
||||
video. Simulate unless --no-simulate is
|
||||
used. See "OUTPUT TEMPLATE" for a
|
||||
@@ -1050,7 +1043,7 @@ You can configure yt-dlp by placing any supported command line option to a confi
|
||||
|
||||
1. **Main Configuration**: The file given by `--config-location`
|
||||
1. **Portable Configuration**: `yt-dlp.conf` in the same directory as the bundled binary. If you are running from source-code (`<root dir>/yt_dlp/__main__.py`), the root directory is used instead.
|
||||
1. **Home Configuration**: `yt-dlp.conf` in the home path given by `-P`, or in the current directory if no such path is given
|
||||
1. **Home Configuration**: `yt-dlp.conf` in the home path given by `-P "home:<path>"`, or in the current directory if no such path is given
|
||||
1. **User Configuration**:
|
||||
* `%XDG_CONFIG_HOME%/yt-dlp/config` (recommended on Linux/macOS)
|
||||
* `%XDG_CONFIG_HOME%/yt-dlp.conf`
|
||||
@@ -1137,13 +1130,12 @@ To summarize, the general syntax for a field is:
|
||||
%(name[.keys][addition][>strf][,alternate][&replacement][|default])[flags][width][.precision][length]type
|
||||
```
|
||||
|
||||
Additionally, you can set different output templates for the various metadata files separately from the general output template by specifying the type of file followed by the template separated by a colon `:`. The different file types supported are `subtitle`, `thumbnail`, `description`, `annotation` (deprecated), `infojson`, `link`, `pl_thumbnail`, `pl_description`, `pl_infojson`, `chapter`, `pl_video`. For example, `-o "%(title)s.%(ext)s" -o "thumbnail:%(title)s\%(title)s.%(ext)s"` will put the thumbnails in a folder with the same name as the video. If any of the templates is empty, that type of file will not be written. Eg: `--write-thumbnail -o "thumbnail:"` will write thumbnails only for playlists and not for video.
|
||||
Additionally, you can set different output templates for the various metadata files separately from the general output template by specifying the type of file followed by the template separated by a colon `:`. The different file types supported are `subtitle`, `thumbnail`, `description`, `annotation` (deprecated), `infojson`, `link`, `pl_thumbnail`, `pl_description`, `pl_infojson`, `chapter`, `pl_video`. For example, `-o "%(title)s.%(ext)s" -o "thumbnail:%(title)s\%(title)s.%(ext)s"` will put the thumbnails in a folder with the same name as the video. If any of the templates (except default) is empty, that type of file will not be written. Eg: `--write-thumbnail -o "thumbnail:"` will write thumbnails only for playlists and not for video.
|
||||
|
||||
The available fields are:
|
||||
|
||||
- `id` (string): Video identifier
|
||||
- `title` (string): Video title
|
||||
- `fulltitle` (string): Video title ignoring live timestamp and generic title
|
||||
- `url` (string): Video URL
|
||||
- `ext` (string): Video filename extension
|
||||
- `alt_title` (string): A secondary title of the video
|
||||
@@ -1199,16 +1191,16 @@ The available fields are:
|
||||
- `protocol` (string): The protocol that will be used for the actual download
|
||||
- `extractor` (string): Name of the extractor
|
||||
- `extractor_key` (string): Key name of the extractor
|
||||
- `epoch` (numeric): Unix epoch of when the information extraction was completed
|
||||
- `epoch` (numeric): Unix epoch when creating the file
|
||||
- `autonumber` (numeric): Number that will be increased with each download, starting at `--autonumber-start`
|
||||
- `video_autonumber` (numeric): Number that will be increased with each video
|
||||
- `n_entries` (numeric): Total number of extracted items in the playlist
|
||||
- `playlist_id` (string): Identifier of the playlist that contains the video
|
||||
- `playlist_title` (string): Name of the playlist that contains the video
|
||||
- `playlist` (string): `playlist_id` or `playlist_title`
|
||||
- `playlist` (string): Name or id of the playlist that contains the video
|
||||
- `playlist_count` (numeric): Total number of items in the playlist. May not be known if entire playlist is not extracted
|
||||
- `playlist_index` (numeric): Index of the video in the playlist padded with leading zeros according the final index
|
||||
- `playlist_autonumber` (numeric): Position of the video in the playlist download queue padded with leading zeros according to the total length of the playlist
|
||||
- `playlist_id` (string): Playlist identifier
|
||||
- `playlist_title` (string): Playlist title
|
||||
- `playlist_uploader` (string): Full name of the playlist uploader
|
||||
- `playlist_uploader_id` (string): Nickname or id of the playlist uploader
|
||||
- `webpage_url` (string): A URL to the video webpage which if given to yt-dlp should allow to get the same result again
|
||||
@@ -1553,7 +1545,7 @@ $ yt-dlp -S "proto"
|
||||
|
||||
|
||||
# Download the best video with h264 codec, or the best video if there is no such video
|
||||
$ yt-dlp -f "(bv*[vcodec^=avc1]+ba) / (bv*+ba/b)"
|
||||
$ yt-dlp -f "(bv*+ba/b)[vcodec^=avc1] / (bv*+ba/b)"
|
||||
|
||||
# Download the best video with best codec no better than h264,
|
||||
# or the best video with worst codec if there is no such video
|
||||
@@ -1671,11 +1663,6 @@ The following extractors use this feature:
|
||||
* `language`: Languages to extract. Eg: `crunchyroll:language=jaJp`
|
||||
* `hardsub`: Which hard-sub versions to extract. Eg: `crunchyroll:hardsub=None,enUS`
|
||||
|
||||
#### crunchyroll:beta
|
||||
* `format`: Which stream type(s) to extract. Default is `adaptive_hls` Eg: `crunchyrollbeta:format=vo_adaptive_hls`
|
||||
* Potentially useful values include `adaptive_hls`, `adaptive_dash`, `vo_adaptive_hls`, `vo_adaptive_dash`, `download_hls`, `trailer_hls`, `trailer_dash`
|
||||
* `hardsub`: Preference order for which hardsub versions to extract. Default is `None` (no hardsubs). Eg: `crunchyrollbeta:hardsub=en-US,None`
|
||||
|
||||
#### vikichannel
|
||||
* `video_types`: Types of videos to download - one or more of `episodes`, `movies`, `clips`, `trailers`
|
||||
|
||||
@@ -1690,10 +1677,6 @@ The following extractors use this feature:
|
||||
* `vcodec`: vcodec to ignore - one or more of `h264`, `h265`, `dvh265`
|
||||
* `dr`: dynamic range to ignore - one or more of `sdr`, `hdr10`, `dv`
|
||||
|
||||
#### tiktok
|
||||
* `app_version`: App version to call mobile APIs with - should be set along with `manifest_app_version`. (e.g. `20.2.1`)
|
||||
* `manifest_app_version`: Numeric app version to call mobile APIs with. (e.g. `221`)
|
||||
|
||||
NOTE: These options may be changed/removed in the future without concern for backward compatibility
|
||||
|
||||
<!-- MANPAGE: MOVE "INSTALLATION" SECTION HERE -->
|
||||
|
||||
@@ -54,7 +54,6 @@
|
||||
- **AMCNetworks**
|
||||
- **AmericasTestKitchen**
|
||||
- **AmericasTestKitchenSeason**
|
||||
- **AmHistoryChannel**
|
||||
- **anderetijden**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
|
||||
- **AnimalPlanet**
|
||||
- **AnimeLab**
|
||||
@@ -228,7 +227,6 @@
|
||||
- **ComedyCentralTV**
|
||||
- **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED
|
||||
- **CONtv**
|
||||
- **CookingChannel**
|
||||
- **Corus**
|
||||
- **Coub**
|
||||
- **CozyTV**
|
||||
@@ -270,20 +268,21 @@
|
||||
- **DeezerPlaylist**
|
||||
- **defense.gouv.fr**
|
||||
- **democracynow**
|
||||
- **DestinationAmerica**
|
||||
- **DHM**: Filmarchiv - Deutsches Historisches Museum
|
||||
- **Digg**
|
||||
- **DigitalConcertHall**: DigitalConcertHall extractor
|
||||
- **DigitallySpeaking**
|
||||
- **Digiteka**
|
||||
- **Discovery**
|
||||
- **DiscoveryLife**
|
||||
- **DiscoveryGo**
|
||||
- **DiscoveryGoPlaylist**
|
||||
- **DiscoveryNetworksDe**
|
||||
- **DiscoveryPlus**
|
||||
- **DiscoveryPlusIndia**
|
||||
- **DiscoveryPlusIndiaShow**
|
||||
- **DiscoveryPlusItaly**
|
||||
- **DiscoveryPlusItalyShow**
|
||||
- **DiscoveryVR**
|
||||
- **Disney**
|
||||
- **DIYNetwork**
|
||||
- **dlive:stream**
|
||||
@@ -332,9 +331,6 @@
|
||||
- **Eporner**
|
||||
- **EroProfile**
|
||||
- **EroProfile:album**
|
||||
- **ertflix**: ERTFLIX videos
|
||||
- **ertflix:codename**: ERTFLIX videos by codename
|
||||
- **ertwebtv:embed**: ert.gr webtv embedded videos
|
||||
- **Escapist**
|
||||
- **ESPN**
|
||||
- **ESPNArticle**
|
||||
@@ -364,7 +360,6 @@
|
||||
- **FiveTV**
|
||||
- **Flickr**
|
||||
- **Folketinget**: Folketinget (ft.dk; Danish parliament)
|
||||
- **FoodNetwork**
|
||||
- **FootyRoom**
|
||||
- **Formula1**
|
||||
- **FOX**
|
||||
@@ -423,7 +418,6 @@
|
||||
- **glomex**: Glomex videos
|
||||
- **glomex:embed**: Glomex embedded videos
|
||||
- **Go**
|
||||
- **GoDiscovery**
|
||||
- **GodTube**
|
||||
- **Gofile**
|
||||
- **Golem**
|
||||
@@ -445,7 +439,6 @@
|
||||
- **hetklokhuis**
|
||||
- **hgtv.com:show**
|
||||
- **HGTVDe**
|
||||
- **HGTVUsa**
|
||||
- **HiDive**
|
||||
- **HistoricFilms**
|
||||
- **history:player**
|
||||
@@ -493,7 +486,6 @@
|
||||
- **InstagramIOS**: IOS instagram:// URL
|
||||
- **Internazionale**
|
||||
- **InternetVideoArchive**
|
||||
- **InvestigationDiscovery**
|
||||
- **IPrima**
|
||||
- **IPrimaCNN**
|
||||
- **iq.com**: International version of iQiyi
|
||||
@@ -673,10 +665,6 @@
|
||||
- **MTVUutisetArticle**
|
||||
- **MuenchenTV**: münchen.tv
|
||||
- **MuseScore**
|
||||
- **MusicdexAlbum**
|
||||
- **MusicdexArtist**
|
||||
- **MusicdexPlaylist**
|
||||
- **MusicdexSong**
|
||||
- **mva**: Microsoft Virtual Academy videos
|
||||
- **mva:course**: Microsoft Virtual Academy courses
|
||||
- **Mwave**
|
||||
@@ -1198,7 +1186,6 @@
|
||||
- **tiktok:tag**
|
||||
- **tiktok:user**
|
||||
- **tinypic**: tinypic.com videos
|
||||
- **TLC**
|
||||
- **TMZ**
|
||||
- **TNAFlix**
|
||||
- **TNAFlixNetworkEmbed**
|
||||
@@ -1211,7 +1198,6 @@
|
||||
- **Toypics**: Toypics video
|
||||
- **ToypicsUser**: Toypics user profile
|
||||
- **TrailerAddict** (Currently broken)
|
||||
- **TravelChannel**
|
||||
- **Trilulilu**
|
||||
- **Trovo**
|
||||
- **TrovoChannelClip**: All Clips of a trovo.live channel; "trovoclip:" prefix
|
||||
@@ -1324,7 +1310,7 @@
|
||||
- **Viddler**
|
||||
- **Videa**
|
||||
- **video.arnes.si**: Arnes Video
|
||||
- **video.google:search**: Google Video search; "gvsearch:" prefix
|
||||
- **video.google:search**: Google Video search; "gvsearch:" prefix (Currently broken)
|
||||
- **video.sky.it**
|
||||
- **video.sky.it:live**
|
||||
- **VideoDetective**
|
||||
@@ -1353,8 +1339,7 @@
|
||||
- **vimeo:review**: Review pages on vimeo
|
||||
- **vimeo:user**
|
||||
- **vimeo:watchlater**: Vimeo watch later list, "vimeowatchlater" keyword (requires authentication)
|
||||
- **Vimm:recording**
|
||||
- **Vimm:stream**
|
||||
- **Vimm**
|
||||
- **Vimple**: Vimple - one-click video hosting
|
||||
- **Vine**
|
||||
- **vine:user**
|
||||
@@ -1407,7 +1392,7 @@
|
||||
- **wdr:mobile**
|
||||
- **WDRElefant**
|
||||
- **WDRPage**
|
||||
- **web.archive:youtube**: web.archive.org saved youtube videos, "ytarchive:" prefix
|
||||
- **web.archive:youtube**: web.archive.org saved youtube videos
|
||||
- **Webcaster**
|
||||
- **WebcasterFeed**
|
||||
- **WebOfStories**
|
||||
@@ -1458,7 +1443,6 @@
|
||||
- **yandexmusic:playlist**: Яндекс.Музыка - Плейлист
|
||||
- **yandexmusic:track**: Яндекс.Музыка - Трек
|
||||
- **YandexVideo**
|
||||
- **YandexVideoPreview**
|
||||
- **YapFiles**
|
||||
- **YesJapan**
|
||||
- **yinyuetai:video**: 音悦Tai
|
||||
@@ -1475,7 +1459,6 @@
|
||||
- **youtube**: YouTube
|
||||
- **youtube:favorites**: YouTube liked videos; ":ytfav" keyword (requires cookies)
|
||||
- **youtube:history**: Youtube watch history; ":ythis" keyword (requires cookies)
|
||||
- **youtube:music:search_url**: YouTube music search URLs with selectable sections (Eg: #songs)
|
||||
- **youtube:playlist**: YouTube playlists
|
||||
- **youtube:recommended**: YouTube recommended videos; ":ytrec" keyword
|
||||
- **youtube:search**: YouTube search; "ytsearch:" prefix
|
||||
|
||||
@@ -220,7 +220,7 @@ def sanitize_got_info_dict(got_dict):
|
||||
IGNORED_PREFIXES = ('', 'playlist', 'requested', 'webpage')
|
||||
|
||||
def sanitize(key, value):
|
||||
if isinstance(value, str) and len(value) > 100 and key != 'thumbnail':
|
||||
if isinstance(value, str) and len(value) > 100:
|
||||
return f'md5:{md5(value)}'
|
||||
elif isinstance(value, list) and len(value) > 10:
|
||||
return f'count:{len(value)}'
|
||||
|
||||
@@ -1133,7 +1133,7 @@ class TestUtil(unittest.TestCase):
|
||||
|
||||
def test_clean_html(self):
|
||||
self.assertEqual(clean_html('a:\nb'), 'a: b')
|
||||
self.assertEqual(clean_html('a:\n "b"'), 'a: "b"')
|
||||
self.assertEqual(clean_html('a:\n "b"'), 'a: "b"')
|
||||
self.assertEqual(clean_html('a<br>\xa0b'), 'a\nb')
|
||||
|
||||
def test_intlist_to_bytes(self):
|
||||
|
||||
@@ -9,9 +9,11 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from test.helper import FakeYDL, is_download_test
|
||||
|
||||
|
||||
from yt_dlp.extractor import (
|
||||
YoutubeIE,
|
||||
YoutubePlaylistIE,
|
||||
YoutubeTabIE,
|
||||
YoutubeIE,
|
||||
)
|
||||
|
||||
|
||||
@@ -25,10 +27,21 @@ class TestYoutubeLists(unittest.TestCase):
|
||||
dl = FakeYDL()
|
||||
dl.params['noplaylist'] = True
|
||||
ie = YoutubeTabIE(dl)
|
||||
result = ie.extract('https://www.youtube.com/watch?v=OmJ-4B-mS-Y&list=PLydZ2Hrp_gPRJViZjLFKaBMgCQOYEEkyp&index=2')
|
||||
result = ie.extract('https://www.youtube.com/watch?v=FXxLjLQi3Fg&list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')
|
||||
self.assertEqual(result['_type'], 'url')
|
||||
self.assertEqual(result['ie_key'], YoutubeIE.ie_key())
|
||||
self.assertEqual(YoutubeIE.extract_id(result['url']), 'OmJ-4B-mS-Y')
|
||||
self.assertEqual(YoutubeIE.extract_id(result['url']), 'FXxLjLQi3Fg')
|
||||
|
||||
def test_youtube_course(self):
|
||||
print('Skipping: Course URLs no longer exists')
|
||||
return
|
||||
dl = FakeYDL()
|
||||
ie = YoutubePlaylistIE(dl)
|
||||
# TODO find a > 100 (paginating?) videos course
|
||||
result = ie.extract('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
|
||||
entries = list(result['entries'])
|
||||
self.assertEqual(YoutubeIE.extract_id(entries[0]['url']), 'j9WZyLZCBzs')
|
||||
self.assertEqual(len(entries), 25)
|
||||
self.assertEqual(YoutubeIE.extract_id(entries[-1]['url']), 'rYefUsYuEp0')
|
||||
|
||||
def test_youtube_mix(self):
|
||||
dl = FakeYDL()
|
||||
@@ -39,6 +52,15 @@ class TestYoutubeLists(unittest.TestCase):
|
||||
original_video = entries[0]
|
||||
self.assertEqual(original_video['id'], 'tyITL_exICo')
|
||||
|
||||
def test_youtube_toptracks(self):
|
||||
print('Skipping: The playlist page gives error 500')
|
||||
return
|
||||
dl = FakeYDL()
|
||||
ie = YoutubePlaylistIE(dl)
|
||||
result = ie.extract('https://www.youtube.com/playlist?list=MCUS')
|
||||
entries = result['entries']
|
||||
self.assertEqual(len(entries), 100)
|
||||
|
||||
def test_youtube_flat_playlist_extraction(self):
|
||||
dl = FakeYDL()
|
||||
dl.params['extract_flat'] = True
|
||||
|
||||
@@ -86,10 +86,6 @@ _NSIG_TESTS = [
|
||||
'https://www.youtube.com/s/player/8040e515/player_ias.vflset/en_US/base.js',
|
||||
'wvOFaY-yjgDuIEg5', 'HkfBFDHmgw4rsw',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/e06dea74/player_ias.vflset/en_US/base.js',
|
||||
'AiuodmaDDYw8d3y4bf', 'ankd8eza2T6Qmw',
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
@@ -120,17 +116,10 @@ class TestPlayerInfo(unittest.TestCase):
|
||||
class TestSignature(unittest.TestCase):
|
||||
def setUp(self):
|
||||
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
self.TESTDATA_DIR = os.path.join(TEST_DIR, 'testdata/sigs')
|
||||
self.TESTDATA_DIR = os.path.join(TEST_DIR, 'testdata')
|
||||
if not os.path.exists(self.TESTDATA_DIR):
|
||||
os.mkdir(self.TESTDATA_DIR)
|
||||
|
||||
def tearDown(self):
|
||||
try:
|
||||
for f in os.listdir(self.TESTDATA_DIR):
|
||||
os.remove(f)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
def t_factory(name, sig_func, url_pattern):
|
||||
def make_tfunc(url, sig_input, expected_sig):
|
||||
|
||||
@@ -72,7 +72,6 @@ from .utils import (
|
||||
GeoRestrictedError,
|
||||
get_domain,
|
||||
HEADRequest,
|
||||
InAdvancePagedList,
|
||||
int_or_none,
|
||||
iri_to_uri,
|
||||
ISO3166Utils,
|
||||
@@ -201,12 +200,9 @@ class YoutubeDL(object):
|
||||
verbose: Print additional info to stdout.
|
||||
quiet: Do not print messages to stdout.
|
||||
no_warnings: Do not print out anything for warnings.
|
||||
forceprint: A dict with keys WHEN mapped to a list of templates to
|
||||
print to stdout. The allowed keys are video or any of the
|
||||
items in utils.POSTPROCESS_WHEN.
|
||||
forceprint: A dict with keys video/playlist mapped to
|
||||
a list of templates to force print to stdout
|
||||
For compatibility, a single list is also accepted
|
||||
print_to_file: A dict with keys WHEN (same as forceprint) mapped to
|
||||
a list of tuples with (template, filename)
|
||||
forceurl: Force printing final URL. (Deprecated)
|
||||
forcetitle: Force printing title. (Deprecated)
|
||||
forceid: Force printing ID. (Deprecated)
|
||||
@@ -352,8 +348,8 @@ class YoutubeDL(object):
|
||||
postprocessors: A list of dictionaries, each with an entry
|
||||
* key: The name of the postprocessor. See
|
||||
yt_dlp/postprocessor/__init__.py for a list.
|
||||
* when: When to run the postprocessor. Allowed values are
|
||||
the entries of utils.POSTPROCESS_WHEN
|
||||
* when: When to run the postprocessor. Can be one of
|
||||
pre_process|before_dl|post_process|after_move.
|
||||
Assumed to be 'post_process' if not given
|
||||
post_hooks: Deprecated - Register a custom postprocessor instead
|
||||
A list of functions that get called as the final step
|
||||
@@ -484,7 +480,6 @@ class YoutubeDL(object):
|
||||
extractor_args: A dictionary of arguments to be passed to the extractors.
|
||||
See "EXTRACTOR ARGUMENTS" for details.
|
||||
Eg: {'youtube': {'skip': ['dash', 'hls']}}
|
||||
mark_watched: Mark videos watched (even with --simulate). Only for YouTube
|
||||
youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
|
||||
If True (default), DASH manifests and related
|
||||
data will be downloaded and processed by extractor.
|
||||
@@ -596,14 +591,12 @@ class YoutubeDL(object):
|
||||
else:
|
||||
self.params['nooverwrites'] = not self.params['overwrites']
|
||||
|
||||
self.params.setdefault('forceprint', {})
|
||||
self.params.setdefault('print_to_file', {})
|
||||
|
||||
# Compatibility with older syntax
|
||||
params.setdefault('forceprint', {})
|
||||
if not isinstance(params['forceprint'], dict):
|
||||
self.params['forceprint'] = {'video': params['forceprint']}
|
||||
params['forceprint'] = {'video': params['forceprint']}
|
||||
|
||||
if self.params.get('bidi_workaround', False):
|
||||
if params.get('bidi_workaround', False):
|
||||
try:
|
||||
import pty
|
||||
master, slave = pty.openpty()
|
||||
@@ -631,7 +624,7 @@ class YoutubeDL(object):
|
||||
|
||||
if (sys.platform != 'win32'
|
||||
and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
|
||||
and not self.params.get('restrictfilenames', False)):
|
||||
and not params.get('restrictfilenames', False)):
|
||||
# Unicode filesystem API will throw errors (#1474, #13027)
|
||||
self.report_warning(
|
||||
'Assuming --restrict-filenames since file system encoding '
|
||||
@@ -1222,17 +1215,10 @@ class YoutubeDL(object):
|
||||
try:
|
||||
outtmpl = self._outtmpl_expandpath(self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default']))
|
||||
filename = self.evaluate_outtmpl(outtmpl, info_dict, True)
|
||||
if not filename:
|
||||
return None
|
||||
|
||||
if tmpl_type in ('default', 'temp'):
|
||||
final_ext, ext = self.params.get('final_ext'), info_dict.get('ext')
|
||||
if final_ext and ext and final_ext != ext and filename.endswith(f'.{final_ext}'):
|
||||
filename = replace_extension(filename, ext, final_ext)
|
||||
else:
|
||||
force_ext = OUTTMPL_TYPES[tmpl_type]
|
||||
if force_ext:
|
||||
filename = replace_extension(filename, force_ext, info_dict.get('ext'))
|
||||
force_ext = OUTTMPL_TYPES.get(tmpl_type)
|
||||
if filename and force_ext is not None:
|
||||
filename = replace_extension(filename, force_ext, info_dict.get('ext'))
|
||||
|
||||
# https://github.com/blackjack4494/youtube-dlc/issues/85
|
||||
trim_file_name = self.params.get('trim_file_name', False)
|
||||
@@ -1676,9 +1662,6 @@ class YoutubeDL(object):
|
||||
msg = 'Downloading %d videos'
|
||||
if not isinstance(ie_entries, (PagedList, LazyList)):
|
||||
ie_entries = LazyList(ie_entries)
|
||||
elif isinstance(ie_entries, InAdvancePagedList):
|
||||
if ie_entries._pagesize == 1:
|
||||
playlist_count = ie_entries._pagecount
|
||||
|
||||
def get_entry(i):
|
||||
return YoutubeDL.__handle_extraction_exceptions(
|
||||
@@ -2240,7 +2223,10 @@ class YoutubeDL(object):
|
||||
|
||||
def _calc_headers(self, info_dict):
|
||||
res = std_headers.copy()
|
||||
res.update(info_dict.get('http_headers') or {})
|
||||
|
||||
add_headers = info_dict.get('http_headers')
|
||||
if add_headers:
|
||||
res.update(add_headers)
|
||||
|
||||
cookies = self._calc_cookies(info_dict)
|
||||
if cookies:
|
||||
@@ -2303,17 +2289,10 @@ class YoutubeDL(object):
|
||||
self._num_videos += 1
|
||||
|
||||
if 'id' not in info_dict:
|
||||
raise ExtractorError('Missing "id" field in extractor result', ie=info_dict['extractor'])
|
||||
elif not info_dict.get('id'):
|
||||
raise ExtractorError('Extractor failed to obtain "id"', ie=info_dict['extractor'])
|
||||
|
||||
info_dict['fulltitle'] = info_dict.get('title')
|
||||
raise ExtractorError('Missing "id" field in extractor result')
|
||||
if 'title' not in info_dict:
|
||||
raise ExtractorError('Missing "title" field in extractor result',
|
||||
video_id=info_dict['id'], ie=info_dict['extractor'])
|
||||
elif not info_dict.get('title'):
|
||||
self.report_warning('Extractor failed to obtain "title". Creating a generic title instead')
|
||||
info_dict['title'] = f'{info_dict["extractor"]} video #{info_dict["id"]}'
|
||||
|
||||
def report_force_conversion(field, field_not, conversion):
|
||||
self.report_warning(
|
||||
@@ -2421,6 +2400,9 @@ class YoutubeDL(object):
|
||||
if not self.params.get('allow_unplayable_formats'):
|
||||
formats = [f for f in formats if not f.get('has_drm')]
|
||||
|
||||
# backward compatibility
|
||||
info_dict['fulltitle'] = info_dict['title']
|
||||
|
||||
if info_dict.get('is_live'):
|
||||
get_from_start = bool(self.params.get('live_from_start'))
|
||||
formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]
|
||||
@@ -2697,32 +2679,19 @@ class YoutubeDL(object):
|
||||
subs[lang] = f
|
||||
return subs
|
||||
|
||||
def _forceprint(self, key, info_dict):
|
||||
if info_dict is None:
|
||||
return
|
||||
info_copy = info_dict.copy()
|
||||
info_copy['formats_table'] = self.render_formats_table(info_dict)
|
||||
info_copy['thumbnails_table'] = self.render_thumbnails_table(info_dict)
|
||||
info_copy['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles'))
|
||||
info_copy['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions'))
|
||||
def _forceprint(self, tmpl, info_dict):
|
||||
mobj = re.match(r'\w+(=?)$', tmpl)
|
||||
if mobj and mobj.group(1):
|
||||
tmpl = f'{tmpl[:-1]} = %({tmpl[:-1]})r'
|
||||
elif mobj:
|
||||
tmpl = '%({})s'.format(tmpl)
|
||||
|
||||
def format_tmpl(tmpl):
|
||||
mobj = re.match(r'\w+(=?)$', tmpl)
|
||||
if mobj and mobj.group(1):
|
||||
return f'{tmpl[:-1]} = %({tmpl[:-1]})r'
|
||||
elif mobj:
|
||||
return f'%({tmpl})s'
|
||||
return tmpl
|
||||
|
||||
for tmpl in self.params['forceprint'].get(key, []):
|
||||
self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy))
|
||||
|
||||
for tmpl, file_tmpl in self.params['print_to_file'].get(key, []):
|
||||
filename = self.evaluate_outtmpl(file_tmpl, info_dict)
|
||||
tmpl = format_tmpl(tmpl)
|
||||
self.to_screen(f'[info] Writing {tmpl!r} to: {filename}')
|
||||
with io.open(filename, 'a', encoding='utf-8') as f:
|
||||
f.write(self.evaluate_outtmpl(tmpl, info_copy) + '\n')
|
||||
info_dict = info_dict.copy()
|
||||
info_dict['formats_table'] = self.render_formats_table(info_dict)
|
||||
info_dict['thumbnails_table'] = self.render_thumbnails_table(info_dict)
|
||||
info_dict['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles'))
|
||||
info_dict['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions'))
|
||||
self.to_stdout(self.evaluate_outtmpl(tmpl, info_dict))
|
||||
|
||||
def __forced_printings(self, info_dict, filename, incomplete):
|
||||
def print_mandatory(field, actual_field=None):
|
||||
@@ -2746,11 +2715,10 @@ class YoutubeDL(object):
|
||||
elif 'url' in info_dict:
|
||||
info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
|
||||
|
||||
if (self.params.get('forcejson')
|
||||
or self.params['forceprint'].get('video')
|
||||
or self.params['print_to_file'].get('video')):
|
||||
if self.params['forceprint'].get('video') or self.params.get('forcejson'):
|
||||
self.post_extract(info_dict)
|
||||
self._forceprint('video', info_dict)
|
||||
for tmpl in self.params['forceprint'].get('video', []):
|
||||
self._forceprint(tmpl, info_dict)
|
||||
|
||||
print_mandatory('title')
|
||||
print_mandatory('id')
|
||||
@@ -3239,7 +3207,6 @@ class YoutubeDL(object):
|
||||
if info_dict is None:
|
||||
return info_dict
|
||||
info_dict.setdefault('epoch', int(time.time()))
|
||||
info_dict.setdefault('_type', 'video')
|
||||
remove_keys = {'__original_infodict'} # Always remove this since this may contain a copy of the entire dict
|
||||
keep_keys = ['_type'] # Always keep this to facilitate load-info-json
|
||||
if remove_private_keys:
|
||||
@@ -3318,7 +3285,8 @@ class YoutubeDL(object):
|
||||
return infodict
|
||||
|
||||
def run_all_pps(self, key, info, *, additional_pps=None):
|
||||
self._forceprint(key, info)
|
||||
for tmpl in self.params['forceprint'].get(key, []):
|
||||
self._forceprint(tmpl, info)
|
||||
for pp in (additional_pps or []) + self._pps[key]:
|
||||
info = self.run_pp(pp, info)
|
||||
return info
|
||||
@@ -3513,7 +3481,7 @@ class YoutubeDL(object):
|
||||
delim=self._format_screen('\u2500', self.Styles.DELIM, '-', test_encoding=True))
|
||||
|
||||
def render_thumbnails_table(self, info_dict):
|
||||
thumbnails = list(info_dict.get('thumbnails') or [])
|
||||
thumbnails = list(info_dict.get('thumbnails'))
|
||||
if not thumbnails:
|
||||
return None
|
||||
return render_table(
|
||||
|
||||
@@ -335,9 +335,6 @@ def _real_main(argv=None):
|
||||
if _video_multistreams_set is False and _audio_multistreams_set is False:
|
||||
_unused_compat_opt('multistreams')
|
||||
outtmpl_default = opts.outtmpl.get('default')
|
||||
if outtmpl_default == '':
|
||||
outtmpl_default, opts.skip_download = None, True
|
||||
del opts.outtmpl['default']
|
||||
if opts.useid:
|
||||
if outtmpl_default is None:
|
||||
outtmpl_default = opts.outtmpl['default'] = '%(id)s.%(ext)s'
|
||||
@@ -359,10 +356,6 @@ def _real_main(argv=None):
|
||||
for type_, tmpl_list in opts.forceprint.items():
|
||||
for tmpl in tmpl_list:
|
||||
validate_outtmpl(tmpl, f'{type_} print template')
|
||||
for type_, tmpl_list in opts.print_to_file.items():
|
||||
for tmpl, file in tmpl_list:
|
||||
validate_outtmpl(tmpl, f'{type_} print-to-file template')
|
||||
validate_outtmpl(file, f'{type_} print-to-file filename')
|
||||
validate_outtmpl(opts.sponsorblock_chapter_title, 'SponsorBlock chapter title')
|
||||
for k, tmpl in opts.progress_template.items():
|
||||
k = f'{k[:-6]} console title' if '-title' in k else f'{k} progress'
|
||||
@@ -670,7 +663,6 @@ def _real_main(argv=None):
|
||||
'forcefilename': opts.getfilename,
|
||||
'forceformat': opts.getformat,
|
||||
'forceprint': opts.forceprint,
|
||||
'print_to_file': opts.print_to_file,
|
||||
'forcejson': opts.dumpjson or opts.print_json,
|
||||
'dump_single_json': opts.dump_single_json,
|
||||
'force_write_download_archive': opts.force_write_download_archive,
|
||||
|
||||
@@ -2,15 +2,8 @@ from __future__ import unicode_literals
|
||||
|
||||
from math import ceil
|
||||
|
||||
from .compat import (
|
||||
compat_b64decode,
|
||||
compat_ord,
|
||||
compat_pycrypto_AES,
|
||||
)
|
||||
from .utils import (
|
||||
bytes_to_intlist,
|
||||
intlist_to_bytes,
|
||||
)
|
||||
from .compat import compat_b64decode, compat_pycrypto_AES
|
||||
from .utils import bytes_to_intlist, intlist_to_bytes
|
||||
|
||||
|
||||
if compat_pycrypto_AES:
|
||||
@@ -32,10 +25,6 @@ else:
|
||||
return intlist_to_bytes(aes_gcm_decrypt_and_verify(*map(bytes_to_intlist, (data, key, tag, nonce))))
|
||||
|
||||
|
||||
def unpad_pkcs7(data):
|
||||
return data[:-compat_ord(data[-1])]
|
||||
|
||||
|
||||
BLOCK_SIZE_BYTES = 16
|
||||
|
||||
|
||||
@@ -517,6 +506,5 @@ __all__ = [
|
||||
'aes_encrypt',
|
||||
'aes_gcm_decrypt_and_verify',
|
||||
'aes_gcm_decrypt_and_verify_bytes',
|
||||
'key_expansion',
|
||||
'unpad_pkcs7',
|
||||
'key_expansion'
|
||||
]
|
||||
|
||||
@@ -2,7 +2,6 @@
|
||||
|
||||
import asyncio
|
||||
import base64
|
||||
import collections
|
||||
import ctypes
|
||||
import getpass
|
||||
import html
|
||||
@@ -181,17 +180,14 @@ def windows_enable_vt_mode(): # TODO: Do this the proper way https://bugs.pytho
|
||||
|
||||
compat_basestring = str
|
||||
compat_chr = chr
|
||||
compat_filter = filter
|
||||
compat_input = input
|
||||
compat_integer_types = (int, )
|
||||
compat_kwargs = lambda kwargs: kwargs
|
||||
compat_map = map
|
||||
compat_numeric_types = (int, float, complex)
|
||||
compat_str = str
|
||||
compat_xpath = lambda xpath: xpath
|
||||
compat_zip = zip
|
||||
|
||||
compat_collections_abc = collections.abc
|
||||
compat_HTMLParser = html.parser.HTMLParser
|
||||
compat_HTTPError = urllib.error.HTTPError
|
||||
compat_Struct = struct.Struct
|
||||
@@ -249,7 +245,6 @@ __all__ = [
|
||||
'compat_b64decode',
|
||||
'compat_basestring',
|
||||
'compat_chr',
|
||||
'compat_collections_abc',
|
||||
'compat_cookiejar',
|
||||
'compat_cookiejar_Cookie',
|
||||
'compat_cookies',
|
||||
@@ -259,7 +254,6 @@ __all__ = [
|
||||
'compat_etree_fromstring',
|
||||
'compat_etree_register_namespace',
|
||||
'compat_expanduser',
|
||||
'compat_filter',
|
||||
'compat_get_terminal_size',
|
||||
'compat_getenv',
|
||||
'compat_getpass',
|
||||
@@ -271,7 +265,6 @@ __all__ = [
|
||||
'compat_integer_types',
|
||||
'compat_itertools_count',
|
||||
'compat_kwargs',
|
||||
'compat_map',
|
||||
'compat_numeric_types',
|
||||
'compat_ord',
|
||||
'compat_os_name',
|
||||
|
||||
@@ -11,11 +11,7 @@ from datetime import datetime, timedelta, timezone
|
||||
from enum import Enum, auto
|
||||
from hashlib import pbkdf2_hmac
|
||||
|
||||
from .aes import (
|
||||
aes_cbc_decrypt_bytes,
|
||||
aes_gcm_decrypt_and_verify_bytes,
|
||||
unpad_pkcs7,
|
||||
)
|
||||
from .aes import aes_cbc_decrypt_bytes, aes_gcm_decrypt_and_verify_bytes
|
||||
from .compat import (
|
||||
compat_b64decode,
|
||||
compat_cookiejar_Cookie,
|
||||
@@ -673,7 +669,8 @@ def _get_linux_desktop_environment(env):
|
||||
return _LinuxDesktopEnvironment.GNOME
|
||||
elif 'KDE_FULL_SESSION' in env:
|
||||
return _LinuxDesktopEnvironment.KDE
|
||||
return _LinuxDesktopEnvironment.OTHER
|
||||
else:
|
||||
return _LinuxDesktopEnvironment.OTHER
|
||||
|
||||
|
||||
def _choose_linux_keyring(logger):
|
||||
@@ -850,9 +847,10 @@ def pbkdf2_sha1(password, salt, iterations, key_length):
|
||||
|
||||
|
||||
def _decrypt_aes_cbc(ciphertext, key, logger, initialization_vector=b' ' * 16):
|
||||
plaintext = unpad_pkcs7(aes_cbc_decrypt_bytes(ciphertext, key, initialization_vector))
|
||||
plaintext = aes_cbc_decrypt_bytes(ciphertext, key, initialization_vector)
|
||||
padding_length = plaintext[-1]
|
||||
try:
|
||||
return plaintext.decode('utf-8')
|
||||
return plaintext[:-padding_length].decode('utf-8')
|
||||
except UnicodeDecodeError:
|
||||
logger.warning('failed to decrypt cookie (AES-CBC) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
|
||||
return None
|
||||
|
||||
@@ -17,13 +17,11 @@ from ..utils import (
|
||||
cli_valueless_option,
|
||||
cli_bool_option,
|
||||
_configuration_args,
|
||||
determine_ext,
|
||||
encodeFilename,
|
||||
encodeArgument,
|
||||
handle_youtubedl_headers,
|
||||
check_executable,
|
||||
Popen,
|
||||
remove_end,
|
||||
)
|
||||
|
||||
|
||||
@@ -465,15 +463,6 @@ class FFmpegFD(ExternalFD):
|
||||
args += ['-f', 'flv']
|
||||
elif ext == 'mp4' and tmpfilename == '-':
|
||||
args += ['-f', 'mpegts']
|
||||
elif ext == 'unknown_video':
|
||||
ext = determine_ext(remove_end(tmpfilename, '.part'))
|
||||
if ext == 'unknown_video':
|
||||
self.report_warning(
|
||||
'The video format is unknown and cannot be downloaded by ffmpeg. '
|
||||
'Explicitly set the extension in the filename to attempt download in that format')
|
||||
else:
|
||||
self.report_warning(f'The video format is unknown. Trying to download as {ext} according to the filename')
|
||||
args += ['-f', EXT_TO_OUT_FORMATS.get(ext, ext)]
|
||||
else:
|
||||
args += ['-f', EXT_TO_OUT_FORMATS.get(ext, ext)]
|
||||
|
||||
|
||||
@@ -14,7 +14,7 @@ except ImportError:
|
||||
|
||||
from .common import FileDownloader
|
||||
from .http import HttpFD
|
||||
from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7
|
||||
from ..aes import aes_cbc_decrypt_bytes
|
||||
from ..compat import (
|
||||
compat_os_name,
|
||||
compat_urllib_error,
|
||||
@@ -366,7 +366,8 @@ class FragmentFD(FileDownloader):
|
||||
# not what it decrypts to.
|
||||
if self.params.get('test', False):
|
||||
return frag_content
|
||||
return unpad_pkcs7(aes_cbc_decrypt_bytes(frag_content, decrypt_info['KEY'], iv))
|
||||
decrypted_data = aes_cbc_decrypt_bytes(frag_content, decrypt_info['KEY'], iv)
|
||||
return decrypted_data[:-decrypted_data[-1]]
|
||||
|
||||
return decrypt_fragment
|
||||
|
||||
|
||||
@@ -5,12 +5,9 @@ import threading
|
||||
|
||||
try:
|
||||
import websockets
|
||||
except (ImportError, SyntaxError):
|
||||
# websockets 3.10 on python 3.6 causes SyntaxError
|
||||
# See https://github.com/yt-dlp/yt-dlp/issues/2633
|
||||
has_websockets = False
|
||||
else:
|
||||
has_websockets = True
|
||||
except ImportError:
|
||||
has_websockets = False
|
||||
|
||||
from .common import FileDownloader
|
||||
from .external import FFmpegFD
|
||||
|
||||
@@ -300,10 +300,11 @@ class ABCIViewShowSeriesIE(InfoExtractor):
|
||||
unescapeHTML(webpage_data).encode('utf-8').decode('unicode_escape'), show_id)
|
||||
video_data = video_data['route']['pageData']['_embedded']
|
||||
|
||||
highlight = try_get(video_data, lambda x: x['highlightVideo']['shareUrl'])
|
||||
if not self._yes_playlist(show_id, bool(highlight), video_label='highlight video'):
|
||||
return self.url_result(highlight, ie=ABCIViewIE.ie_key())
|
||||
if self.get_param('noplaylist') and 'highlightVideo' in video_data:
|
||||
self.to_screen('Downloading just the highlight video because of --no-playlist')
|
||||
return self.url_result(video_data['highlightVideo']['shareUrl'], ie=ABCIViewIE.ie_key())
|
||||
|
||||
self.to_screen(f'Downloading playlist {show_id} - add --no-playlist to just download the highlight video')
|
||||
series = video_data['selectedSeries']
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
|
||||
@@ -8,10 +8,11 @@ import os
|
||||
import random
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7
|
||||
from ..aes import aes_cbc_decrypt
|
||||
from ..compat import (
|
||||
compat_HTTPError,
|
||||
compat_b64decode,
|
||||
compat_ord,
|
||||
)
|
||||
from ..utils import (
|
||||
ass_subtitles_timecode,
|
||||
@@ -83,11 +84,14 @@ class ADNIE(InfoExtractor):
|
||||
return None
|
||||
|
||||
# http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
|
||||
dec_subtitles = unpad_pkcs7(aes_cbc_decrypt_bytes(
|
||||
compat_b64decode(enc_subtitles[24:]),
|
||||
binascii.unhexlify(self._K + 'ab9f52f5baae7c72'),
|
||||
compat_b64decode(enc_subtitles[:24])))
|
||||
subtitles_json = self._parse_json(dec_subtitles.decode(), None, fatal=False)
|
||||
dec_subtitles = intlist_to_bytes(aes_cbc_decrypt(
|
||||
bytes_to_intlist(compat_b64decode(enc_subtitles[24:])),
|
||||
bytes_to_intlist(binascii.unhexlify(self._K + 'ab9f52f5baae7c72')),
|
||||
bytes_to_intlist(compat_b64decode(enc_subtitles[:24]))
|
||||
))
|
||||
subtitles_json = self._parse_json(
|
||||
dec_subtitles[:-compat_ord(dec_subtitles[-1])].decode(),
|
||||
None, fatal=False)
|
||||
if not subtitles_json:
|
||||
return None
|
||||
|
||||
|
||||
@@ -19,7 +19,6 @@ from ..utils import (
|
||||
get_element_by_id,
|
||||
HEADRequest,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
KNOWN_EXTENSIONS,
|
||||
merge_dicts,
|
||||
mimetype2ext,
|
||||
@@ -65,7 +64,7 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
'description': 'md5:43a603fd6c5b4b90d12a96b921212b9c',
|
||||
'uploader': 'yorkmba99@hotmail.com',
|
||||
'timestamp': 1387699629,
|
||||
'upload_date': '20131222',
|
||||
'upload_date': "20131222",
|
||||
},
|
||||
}, {
|
||||
'url': 'http://archive.org/embed/XD300-23_68HighlightsAResearchCntAugHumanIntellect',
|
||||
@@ -151,7 +150,8 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
|
||||
# Archive.org metadata API doesn't clearly demarcate playlist entries
|
||||
# or subtitle tracks, so we get them from the embeddable player.
|
||||
embed_page = self._download_webpage(f'https://archive.org/embed/{identifier}', identifier)
|
||||
embed_page = self._download_webpage(
|
||||
'https://archive.org/embed/' + identifier, identifier)
|
||||
playlist = self._playlist_data(embed_page)
|
||||
|
||||
entries = {}
|
||||
@@ -166,17 +166,17 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
'thumbnails': [],
|
||||
'artist': p.get('artist'),
|
||||
'track': p.get('title'),
|
||||
'subtitles': {},
|
||||
}
|
||||
'subtitles': {}}
|
||||
|
||||
for track in p.get('tracks', []):
|
||||
if track['kind'] != 'subtitles':
|
||||
continue
|
||||
entries[p['orig']][track['label']] = {
|
||||
'url': 'https://archive.org/' + track['file'].lstrip('/')
|
||||
}
|
||||
|
||||
metadata = self._download_json('http://archive.org/metadata/' + identifier, identifier)
|
||||
entries[p['orig']][track['label']] = {
|
||||
'url': 'https://archive.org/' + track['file'].lstrip('/')}
|
||||
|
||||
metadata = self._download_json(
|
||||
'http://archive.org/metadata/' + identifier, identifier)
|
||||
m = metadata['metadata']
|
||||
identifier = m['identifier']
|
||||
|
||||
@@ -189,7 +189,7 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
'license': m.get('licenseurl'),
|
||||
'release_date': unified_strdate(m.get('date')),
|
||||
'timestamp': unified_timestamp(dict_get(m, ['publicdate', 'addeddate'])),
|
||||
'webpage_url': f'https://archive.org/details/{identifier}',
|
||||
'webpage_url': 'https://archive.org/details/' + identifier,
|
||||
'location': m.get('venue'),
|
||||
'release_year': int_or_none(m.get('year'))}
|
||||
|
||||
@@ -207,7 +207,7 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
'discnumber': int_or_none(f.get('disc')),
|
||||
'release_year': int_or_none(f.get('year'))})
|
||||
entry = entries[f['name']]
|
||||
elif traverse_obj(f, 'original', expected_type=str) in entries:
|
||||
elif f.get('original') in entries:
|
||||
entry = entries[f['original']]
|
||||
else:
|
||||
continue
|
||||
@@ -230,12 +230,13 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
'filesize': int_or_none(f.get('size')),
|
||||
'protocol': 'https'})
|
||||
|
||||
# Sort available formats by filesize
|
||||
for entry in entries.values():
|
||||
self._sort_formats(entry['formats'])
|
||||
entry['formats'] = list(sorted(entry['formats'], key=lambda x: x.get('filesize', -1)))
|
||||
|
||||
if len(entries) == 1:
|
||||
# If there's only one item, use it as the main info dict
|
||||
only_video = next(iter(entries.values()))
|
||||
only_video = entries[list(entries.keys())[0]]
|
||||
if entry_id:
|
||||
info = merge_dicts(only_video, info)
|
||||
else:
|
||||
@@ -260,19 +261,19 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
|
||||
class YoutubeWebArchiveIE(InfoExtractor):
|
||||
IE_NAME = 'web.archive:youtube'
|
||||
IE_DESC = 'web.archive.org saved youtube videos, "ytarchive:" prefix'
|
||||
_VALID_URL = r'''(?x)(?:(?P<prefix>ytarchive:)|
|
||||
(?:https?://)?web\.archive\.org/
|
||||
(?:web/)?(?:(?P<date>[0-9]{14})?[0-9A-Za-z_*]*/)? # /web and the version index is optional
|
||||
(?:https?(?::|%3[Aa])//)?(?:
|
||||
(?:\w+\.)?youtube\.com(?::(?:80|443))?/watch(?:\.php)?(?:\?|%3[fF])(?:[^\#]+(?:&|%26))?v(?:=|%3[dD]) # Youtube URL
|
||||
|(?:wayback-fakeurl\.archive\.org/yt/) # Or the internal fake url
|
||||
)
|
||||
)(?P<id>[0-9A-Za-z_-]{11})
|
||||
(?(prefix)
|
||||
(?::(?P<date2>[0-9]{14}))?$|
|
||||
(?:%26|[#&]|$)
|
||||
)'''
|
||||
IE_DESC = 'web.archive.org saved youtube videos'
|
||||
_VALID_URL = r"""(?x)^
|
||||
(?:https?://)?web\.archive\.org/
|
||||
(?:web/)?
|
||||
(?:(?P<date>[0-9]{14})?[0-9A-Za-z_*]*/)? # /web and the version index is optional
|
||||
|
||||
(?:https?(?::|%3[Aa])//)?
|
||||
(?:
|
||||
(?:\w+\.)?youtube\.com(?::(?:80|443))?/watch(?:\.php)?(?:\?|%3[fF])(?:[^\#]+(?:&|%26))?v(?:=|%3[dD]) # Youtube URL
|
||||
|(?:wayback-fakeurl\.archive\.org/yt/) # Or the internal fake url
|
||||
)
|
||||
(?P<id>[0-9A-Za-z_-]{11})(?:%26|\#|&|$)
|
||||
"""
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
@@ -437,13 +438,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://web.archive.org/http://www.youtube.com:80/watch?v=-05VVye-ffg',
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'ytarchive:BaW_jenozKc:20050214000000',
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'ytarchive:BaW_jenozKc',
|
||||
'only_matching': True
|
||||
},
|
||||
}
|
||||
]
|
||||
_YT_INITIAL_DATA_RE = r'(?:(?:(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;)|%s)' % YoutubeBaseInfoExtractor._YT_INITIAL_DATA_RE
|
||||
_YT_INITIAL_PLAYER_RESPONSE_RE = r'(?:(?:(?:window\s*\[\s*["\']ytInitialPlayerResponse["\']\s*\]|ytInitialPlayerResponse)\s*=[(\s]*({.+?})[)\s]*;)|%s)' % YoutubeBaseInfoExtractor._YT_INITIAL_PLAYER_RESPONSE_RE
|
||||
@@ -489,6 +484,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
page_title, 'title', default='')
|
||||
|
||||
def _extract_metadata(self, video_id, webpage):
|
||||
|
||||
search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None)) if webpage else (lambda x: None))
|
||||
player_response = self._extract_yt_initial_variable(
|
||||
webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE, video_id, 'initial player response') or {}
|
||||
@@ -600,7 +596,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
|
||||
# Prefer the new polymer UI captures as we support extracting more metadata from them
|
||||
# WBM captures seem to all switch to this layout ~July 2020
|
||||
modern_captures = [x for x in all_captures if x >= 20200701000000]
|
||||
modern_captures = list(filter(lambda x: x >= 20200701000000, all_captures))
|
||||
if modern_captures:
|
||||
capture_dates.append(modern_captures[0])
|
||||
capture_dates.append(url_date)
|
||||
@@ -612,11 +608,11 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
|
||||
# Fallbacks if any of the above fail
|
||||
capture_dates.extend([self._OLDEST_CAPTURE_DATE, self._NEWEST_CAPTURE_DATE])
|
||||
return orderedSet(filter(None, capture_dates))
|
||||
return orderedSet(capture_dates)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, url_date, url_date_2 = self._match_valid_url(url).group('id', 'date', 'date2')
|
||||
url_date = url_date or url_date_2
|
||||
|
||||
url_date, video_id = self._match_valid_url(url).groups()
|
||||
|
||||
urlh = None
|
||||
try:
|
||||
@@ -633,9 +629,11 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
raise
|
||||
|
||||
capture_dates = self._get_capture_dates(video_id, int_or_none(url_date))
|
||||
self.write_debug('Captures to try: ' + join_nonempty(*capture_dates, delim=', '))
|
||||
self.write_debug('Captures to try: ' + ', '.join(str(i) for i in capture_dates if i is not None))
|
||||
info = {'id': video_id}
|
||||
for capture in capture_dates:
|
||||
if not capture:
|
||||
continue
|
||||
webpage = self._download_webpage(
|
||||
(self._WAYBACK_BASE_URL + 'http://www.youtube.com/watch?v=%s') % (capture, video_id),
|
||||
video_id=video_id, fatal=False, errnote='unable to download capture webpage (it may not be archived)',
|
||||
@@ -650,7 +648,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
info['thumbnails'] = self._extract_thumbnails(video_id)
|
||||
|
||||
if urlh:
|
||||
url = compat_urllib_parse_unquote(urlh.geturl())
|
||||
url = compat_urllib_parse_unquote(urlh.url)
|
||||
video_file_url_qs = parse_qs(url)
|
||||
# Attempt to recover any ext & format info from playback url & response headers
|
||||
format = {'url': url, 'filesize': int_or_none(urlh.headers.get('x-archive-orig-content-length'))}
|
||||
|
||||
@@ -17,9 +17,9 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
mimetype2ext,
|
||||
parse_iso8601,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
parse_count,
|
||||
smuggle_url,
|
||||
srt_subtitles_timecode,
|
||||
@@ -52,14 +52,16 @@ class BiliBiliIE(InfoExtractor):
|
||||
'url': 'http://www.bilibili.com/video/av1074402/',
|
||||
'md5': '5f7d29e1a2872f3df0cf76b1f87d3788',
|
||||
'info_dict': {
|
||||
'id': '1074402_part1',
|
||||
'ext': 'mp4',
|
||||
'id': '1074402',
|
||||
'ext': 'flv',
|
||||
'title': '【金坷垃】金泡沫',
|
||||
'uploader_id': '156160',
|
||||
'uploader': '菊子桑',
|
||||
'upload_date': '20140420',
|
||||
'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923',
|
||||
'duration': 308.067,
|
||||
'timestamp': 1398012678,
|
||||
'upload_date': '20140420',
|
||||
'thumbnail': r're:^https?://.+\.jpg',
|
||||
'uploader': '菊子桑',
|
||||
'uploader_id': '156160',
|
||||
},
|
||||
}, {
|
||||
# Tested in BiliBiliBangumiIE
|
||||
@@ -73,27 +75,49 @@ class BiliBiliIE(InfoExtractor):
|
||||
'url': 'http://bangumi.bilibili.com/anime/5802/play#100643',
|
||||
'md5': '3f721ad1e75030cc06faf73587cfec57',
|
||||
'info_dict': {
|
||||
'id': '100643_part1',
|
||||
'id': '100643',
|
||||
'ext': 'mp4',
|
||||
'title': 'CHAOS;CHILD',
|
||||
'description': '如果你是神明,并且能够让妄想成为现实。那你会进行怎么样的妄想?是淫靡的世界?独裁社会?毁灭性的制裁?还是……2015年,涩谷。从6年前发生的大灾害“涩谷地震”之后复兴了的这个街区里新设立的私立高中...',
|
||||
},
|
||||
'skip': 'Geo-restricted to China',
|
||||
}, {
|
||||
# Title with double quotes
|
||||
'url': 'http://www.bilibili.com/video/av8903802/',
|
||||
'info_dict': {
|
||||
'id': '8903802_part1',
|
||||
'ext': 'mp4',
|
||||
'id': '8903802',
|
||||
'title': '阿滴英文|英文歌分享#6 "Closer',
|
||||
'upload_date': '20170301',
|
||||
'description': '滴妹今天唱Closer給你聽! 有史以来,被推最多次也是最久的歌曲,其实歌词跟我原本想像差蛮多的,不过还是好听! 微博@阿滴英文',
|
||||
'timestamp': 1488382634,
|
||||
'uploader_id': '65880958',
|
||||
'uploader': '阿滴英文',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': '8903802_part1',
|
||||
'ext': 'flv',
|
||||
'title': '阿滴英文|英文歌分享#6 "Closer',
|
||||
'description': 'md5:3b1b9e25b78da4ef87e9b548b88ee76a',
|
||||
'uploader': '阿滴英文',
|
||||
'uploader_id': '65880958',
|
||||
'timestamp': 1488382634,
|
||||
'upload_date': '20170301',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': '8903802_part2',
|
||||
'ext': 'flv',
|
||||
'title': '阿滴英文|英文歌分享#6 "Closer',
|
||||
'description': 'md5:3b1b9e25b78da4ef87e9b548b88ee76a',
|
||||
'uploader': '阿滴英文',
|
||||
'uploader_id': '65880958',
|
||||
'timestamp': 1488382634,
|
||||
'upload_date': '20170301',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
}, {
|
||||
# new BV video id format
|
||||
'url': 'https://www.bilibili.com/video/BV1JE411F741',
|
||||
@@ -128,7 +152,6 @@ class BiliBiliIE(InfoExtractor):
|
||||
av_id, bv_id = self._get_video_id_set(video_id, mobj.group('id_bv') is not None)
|
||||
video_id = av_id
|
||||
|
||||
info = {}
|
||||
anime_id = mobj.group('anime_id')
|
||||
page_id = mobj.group('page')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
@@ -180,48 +203,35 @@ class BiliBiliIE(InfoExtractor):
|
||||
}
|
||||
headers.update(self.geo_verification_headers())
|
||||
|
||||
video_info = self._parse_json(
|
||||
self._search_regex(r'window.__playinfo__\s*=\s*({.+?})</script>', webpage, 'video info', default=None) or '{}',
|
||||
video_id, fatal=False)
|
||||
video_info = video_info.get('data') or {}
|
||||
|
||||
durl = traverse_obj(video_info, ('dash', 'video'))
|
||||
audios = traverse_obj(video_info, ('dash', 'audio')) or []
|
||||
entries = []
|
||||
|
||||
RENDITIONS = ('qn=80&quality=80&type=', 'quality=2&type=mp4')
|
||||
for num, rendition in enumerate(RENDITIONS, start=1):
|
||||
payload = 'appkey=%s&cid=%s&otype=json&%s' % (self._APP_KEY, cid, rendition)
|
||||
sign = hashlib.md5((payload + self._BILIBILI_KEY).encode('utf-8')).hexdigest()
|
||||
if not video_info:
|
||||
video_info = self._download_json(
|
||||
'http://interface.bilibili.com/v2/playurl?%s&sign=%s' % (payload, sign),
|
||||
video_id, note='Downloading video info page',
|
||||
headers=headers, fatal=num == len(RENDITIONS))
|
||||
if not video_info:
|
||||
continue
|
||||
|
||||
if not durl and 'durl' not in video_info:
|
||||
video_info = self._download_json(
|
||||
'http://interface.bilibili.com/v2/playurl?%s&sign=%s' % (payload, sign),
|
||||
video_id, note='Downloading video info page',
|
||||
headers=headers, fatal=num == len(RENDITIONS))
|
||||
|
||||
if not video_info:
|
||||
continue
|
||||
|
||||
if 'durl' not in video_info:
|
||||
if num < len(RENDITIONS):
|
||||
continue
|
||||
self._report_error(video_info)
|
||||
|
||||
formats = []
|
||||
for idx, durl in enumerate(durl or video_info['durl']):
|
||||
formats.append({
|
||||
'url': durl.get('baseUrl') or durl.get('base_url') or durl.get('url'),
|
||||
'ext': mimetype2ext(durl.get('mimeType') or durl.get('mime_type')),
|
||||
'fps': int_or_none(durl.get('frameRate') or durl.get('frame_rate')),
|
||||
'width': int_or_none(durl.get('width')),
|
||||
'height': int_or_none(durl.get('height')),
|
||||
'vcodec': durl.get('codecs'),
|
||||
'acodec': 'none' if audios else None,
|
||||
'tbr': float_or_none(durl.get('bandwidth'), scale=1000),
|
||||
'filesize': int_or_none(durl.get('size')),
|
||||
})
|
||||
for backup_url in traverse_obj(durl, 'backup_url', expected_type=list) or []:
|
||||
for idx, durl in enumerate(video_info['durl']):
|
||||
formats = [{
|
||||
'url': durl['url'],
|
||||
'filesize': int_or_none(durl['size']),
|
||||
}]
|
||||
for backup_url in durl.get('backup_url', []):
|
||||
formats.append({
|
||||
'url': backup_url,
|
||||
# backup URLs have lower priorities
|
||||
'quality': -2 if 'hd.mp4' in backup_url else -3,
|
||||
})
|
||||
|
||||
@@ -229,47 +239,30 @@ class BiliBiliIE(InfoExtractor):
|
||||
a_format.setdefault('http_headers', {}).update({
|
||||
'Referer': url,
|
||||
})
|
||||
for audio in audios:
|
||||
formats.append({
|
||||
'url': audio.get('baseUrl') or audio.get('base_url') or audio.get('url'),
|
||||
'ext': mimetype2ext(audio.get('mimeType') or audio.get('mime_type')),
|
||||
'fps': int_or_none(audio.get('frameRate') or audio.get('frame_rate')),
|
||||
'width': int_or_none(audio.get('width')),
|
||||
'height': int_or_none(audio.get('height')),
|
||||
'acodec': audio.get('codecs'),
|
||||
'vcodec': 'none',
|
||||
'tbr': float_or_none(audio.get('bandwidth'), scale=1000),
|
||||
'filesize': int_or_none(audio.get('size'))
|
||||
})
|
||||
for backup_url in traverse_obj(audio, 'backup_url', expected_type=list) or []:
|
||||
formats.append({
|
||||
'url': backup_url,
|
||||
# backup URLs have lower priorities
|
||||
'quality': -3,
|
||||
})
|
||||
|
||||
info.update({
|
||||
'id': video_id,
|
||||
'duration': float_or_none(durl.get('length'), 1000),
|
||||
'formats': formats,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
entries.append({
|
||||
'id': '%s_part%s' % (video_id, idx),
|
||||
'duration': float_or_none(durl.get('length'), 1000),
|
||||
'formats': formats,
|
||||
})
|
||||
break
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = self._html_search_regex((
|
||||
r'<h1[^>]+title=(["\'])(?P<content>[^"\']+)',
|
||||
r'(?s)<h1[^>]*>(?P<content>.+?)</h1>',
|
||||
self._meta_regex('title')
|
||||
), webpage, 'title', group='content', fatal=False)
|
||||
title = self._html_search_regex(
|
||||
(r'<h1[^>]+\btitle=(["\'])(?P<title>(?:(?!\1).)+)\1',
|
||||
r'(?s)<h1[^>]*>(?P<title>.+?)</h1>'), webpage, 'title',
|
||||
group='title')
|
||||
|
||||
# Get part title for anthologies
|
||||
if page_id is not None:
|
||||
# TODO: The json is already downloaded by _extract_anthology_entries. Don't redownload for each video.
|
||||
part_info = traverse_obj(self._download_json(
|
||||
f'https://api.bilibili.com/x/player/pagelist?bvid={bv_id}&jsonp=jsonp',
|
||||
video_id, note='Extracting videos in anthology'), 'data', expected_type=list)
|
||||
title = title if len(part_info) == 1 else traverse_obj(part_info, (int(page_id) - 1, 'part')) or title
|
||||
# TODO: The json is already downloaded by _extract_anthology_entries. Don't redownload for each video
|
||||
part_title = try_get(
|
||||
self._download_json(
|
||||
f'https://api.bilibili.com/x/player/pagelist?bvid={bv_id}&jsonp=jsonp',
|
||||
video_id, note='Extracting videos in anthology'),
|
||||
lambda x: x['data'][int(page_id) - 1]['part'])
|
||||
title = part_title or title
|
||||
|
||||
description = self._html_search_meta('description', webpage)
|
||||
timestamp = unified_timestamp(self._html_search_regex(
|
||||
@@ -279,15 +272,15 @@ class BiliBiliIE(InfoExtractor):
|
||||
thumbnail = self._html_search_meta(['og:image', 'thumbnailUrl'], webpage)
|
||||
|
||||
# TODO 'view_count' requires deobfuscating Javascript
|
||||
info.update({
|
||||
'id': f'{video_id}_part{page_id or 1}',
|
||||
info = {
|
||||
'id': str(video_id) if page_id is None else '%s_part%s' % (video_id, page_id),
|
||||
'cid': cid,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'timestamp': timestamp,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': float_or_none(video_info.get('timelength'), scale=1000),
|
||||
})
|
||||
}
|
||||
|
||||
uploader_mobj = re.search(
|
||||
r'<a[^>]+href="(?:https?:)?//space\.bilibili\.com/(?P<id>\d+)"[^>]*>\s*(?P<name>[^<]+?)\s*<',
|
||||
@@ -308,7 +301,7 @@ class BiliBiliIE(InfoExtractor):
|
||||
video_id, fatal=False, note='Downloading tags'), ('data', ..., 'tag_name')),
|
||||
}
|
||||
|
||||
info['subtitles'] = {
|
||||
entries[0]['subtitles'] = {
|
||||
'danmaku': [{
|
||||
'ext': 'xml',
|
||||
'url': f'https://comment.bilibili.com/{cid}.xml',
|
||||
@@ -343,10 +336,12 @@ class BiliBiliIE(InfoExtractor):
|
||||
entry['id'] = '%s_part%d' % (video_id, (idx + 1))
|
||||
|
||||
return {
|
||||
'_type': 'multi_video',
|
||||
'id': str(video_id),
|
||||
'bv_id': bv_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'entries': entries,
|
||||
**info, **top_level_info
|
||||
}
|
||||
|
||||
@@ -487,9 +482,9 @@ class BilibiliChannelIE(InfoExtractor):
|
||||
data = self._download_json(
|
||||
self._API_URL % (list_id, page_num), list_id, note=f'Downloading page {page_num}')['data']
|
||||
|
||||
max_count = max_count or traverse_obj(data, ('page', 'count'))
|
||||
max_count = max_count or try_get(data, lambda x: x['page']['count'])
|
||||
|
||||
entries = traverse_obj(data, ('list', 'vlist'))
|
||||
entries = try_get(data, lambda x: x['list']['vlist'])
|
||||
if not entries:
|
||||
return
|
||||
for entry in entries:
|
||||
@@ -527,7 +522,7 @@ class BilibiliCategoryIE(InfoExtractor):
|
||||
api_url, query, query={'Search_key': query, 'pn': page_num},
|
||||
note='Extracting results from page %s of %s' % (page_num, num_pages))
|
||||
|
||||
video_list = traverse_obj(parsed_json, ('data', 'archives'), expected_type=list)
|
||||
video_list = try_get(parsed_json, lambda x: x['data']['archives'], list)
|
||||
if not video_list:
|
||||
raise ExtractorError('Failed to retrieve video list for page %d' % page_num)
|
||||
|
||||
@@ -557,7 +552,7 @@ class BilibiliCategoryIE(InfoExtractor):
|
||||
|
||||
api_url = 'https://api.bilibili.com/x/web-interface/newlist?rid=%d&type=1&ps=20&jsonp=jsonp' % rid_value
|
||||
page_json = self._download_json(api_url, query, query={'Search_key': query, 'pn': '1'})
|
||||
page_data = traverse_obj(page_json, ('data', 'page'), expected_type=dict)
|
||||
page_data = try_get(page_json, lambda x: x['data']['page'], dict)
|
||||
count, size = int_or_none(page_data.get('count')), int_or_none(page_data.get('size'))
|
||||
if count is None or not size:
|
||||
raise ExtractorError('Failed to calculate either page count or size')
|
||||
|
||||
@@ -13,8 +13,6 @@ class CAM4IE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 're:^foxynesss [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'age_limit': 18,
|
||||
'live_status': 'is_live',
|
||||
'thumbnail': 'https://snapshots.xcdnpro.com/thumbnails/foxynesss',
|
||||
}
|
||||
}
|
||||
|
||||
@@ -31,5 +29,4 @@ class CAM4IE(InfoExtractor):
|
||||
'is_live': True,
|
||||
'age_limit': 18,
|
||||
'formats': formats,
|
||||
'thumbnail': f'https://snapshots.xcdnpro.com/thumbnails/{channel_id}',
|
||||
}
|
||||
|
||||
@@ -162,8 +162,7 @@ class CCTVIE(InfoExtractor):
|
||||
'url': video_url,
|
||||
'format_id': 'http',
|
||||
'quality': quality,
|
||||
# Sample clip
|
||||
'preference': -10
|
||||
'source_preference': -10
|
||||
})
|
||||
|
||||
hls_url = try_get(data, lambda x: x['hls_url'], compat_str)
|
||||
|
||||
@@ -1291,7 +1291,6 @@ class InfoExtractor(object):
|
||||
return self._og_search_property('description', html, fatal=False, **kargs)
|
||||
|
||||
def _og_search_title(self, html, **kargs):
|
||||
kargs.setdefault('fatal', False)
|
||||
return self._og_search_property('title', html, **kargs)
|
||||
|
||||
def _og_search_video_url(self, html, name='video url', secure=True, **kargs):
|
||||
@@ -1448,7 +1447,7 @@ class InfoExtractor(object):
|
||||
'title': part.get('name'),
|
||||
'start_time': part.get('startOffset'),
|
||||
'end_time': part.get('endOffset'),
|
||||
} for part in variadic(e.get('hasPart') or []) if part.get('@type') == 'Clip']
|
||||
} for part in e.get('hasPart', []) if part.get('@type') == 'Clip']
|
||||
for idx, (last_c, current_c, next_c) in enumerate(zip(
|
||||
[{'end_time': 0}] + chapters, chapters, chapters[1:])):
|
||||
current_c['end_time'] = current_c['end_time'] or next_c['start_time']
|
||||
@@ -1529,8 +1528,6 @@ class InfoExtractor(object):
|
||||
'title': unescapeHTML(e.get('headline')),
|
||||
'description': unescapeHTML(e.get('articleBody') or e.get('description')),
|
||||
})
|
||||
if traverse_obj(e, ('video', 0, '@type')) == 'VideoObject':
|
||||
extract_video_object(e['video'][0])
|
||||
elif item_type == 'VideoObject':
|
||||
extract_video_object(e)
|
||||
if expected_type is None:
|
||||
@@ -3713,22 +3710,6 @@ class InfoExtractor(object):
|
||||
return [] if default is NO_DEFAULT else default
|
||||
return list(val) if casesense else [x.lower() for x in val]
|
||||
|
||||
def _yes_playlist(self, playlist_id, video_id, smuggled_data=None, *, playlist_label='playlist', video_label='video'):
|
||||
if not playlist_id or not video_id:
|
||||
return not video_id
|
||||
|
||||
no_playlist = (smuggled_data or {}).get('force_noplaylist')
|
||||
if no_playlist is not None:
|
||||
return not no_playlist
|
||||
|
||||
video_id = '' if video_id is True else f' {video_id}'
|
||||
playlist_id = '' if playlist_id is True else f' {playlist_id}'
|
||||
if self.get_param('noplaylist'):
|
||||
self.to_screen(f'Downloading just the {video_label}{video_id} because of --no-playlist')
|
||||
return False
|
||||
self.to_screen(f'Downloading {playlist_label}{playlist_id} - add --no-playlist to download just the {video_label}{video_id}')
|
||||
return True
|
||||
|
||||
|
||||
class SearchInfoExtractor(InfoExtractor):
|
||||
"""
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
import re
|
||||
import json
|
||||
import zlib
|
||||
@@ -24,17 +23,15 @@ from ..utils import (
|
||||
bytes_to_intlist,
|
||||
extract_attributes,
|
||||
float_or_none,
|
||||
format_field,
|
||||
intlist_to_bytes,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
lowercase_escape,
|
||||
merge_dicts,
|
||||
qualities,
|
||||
remove_end,
|
||||
sanitized_Request,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
urlencode_postdata,
|
||||
xpath_text,
|
||||
)
|
||||
from ..aes import (
|
||||
@@ -43,8 +40,8 @@ from ..aes import (
|
||||
|
||||
|
||||
class CrunchyrollBaseIE(InfoExtractor):
|
||||
_LOGIN_URL = 'https://www.crunchyroll.com/welcome/login'
|
||||
_API_BASE = 'https://api.crunchyroll.com'
|
||||
_LOGIN_URL = 'https://www.crunchyroll.com/login'
|
||||
_LOGIN_FORM = 'login_form'
|
||||
_NETRC_MACHINE = 'crunchyroll'
|
||||
|
||||
def _call_rpc_api(self, method, video_id, note=None, data=None):
|
||||
@@ -61,33 +58,50 @@ class CrunchyrollBaseIE(InfoExtractor):
|
||||
username, password = self._get_login_info()
|
||||
if username is None:
|
||||
return
|
||||
if self._get_cookies(self._LOGIN_URL).get('etp_rt'):
|
||||
|
||||
login_page = self._download_webpage(
|
||||
self._LOGIN_URL, None, 'Downloading login page')
|
||||
|
||||
def is_logged(webpage):
|
||||
return 'href="/logout"' in webpage
|
||||
|
||||
# Already logged in
|
||||
if is_logged(login_page):
|
||||
return
|
||||
|
||||
upsell_response = self._download_json(
|
||||
f'{self._API_BASE}/get_upsell_data.0.json', None, 'Getting session id',
|
||||
query={
|
||||
'sess_id': 1,
|
||||
'device_id': 'whatvalueshouldbeforweb',
|
||||
'device_type': 'com.crunchyroll.static',
|
||||
'access_token': 'giKq5eY27ny3cqz',
|
||||
'referer': self._LOGIN_URL
|
||||
})
|
||||
if upsell_response['code'] != 'ok':
|
||||
raise ExtractorError('Could not get session id')
|
||||
session_id = upsell_response['data']['session_id']
|
||||
login_form_str = self._search_regex(
|
||||
r'(?P<form><form[^>]+?id=(["\'])%s\2[^>]*>)' % self._LOGIN_FORM,
|
||||
login_page, 'login form', group='form')
|
||||
|
||||
login_response = self._download_json(
|
||||
f'{self._API_BASE}/login.1.json', None, 'Logging in',
|
||||
data=compat_urllib_parse_urlencode({
|
||||
'account': username,
|
||||
'password': password,
|
||||
'session_id': session_id
|
||||
}).encode('ascii'))
|
||||
if login_response['code'] != 'ok':
|
||||
raise ExtractorError('Login failed. Bad username or password?', expected=True)
|
||||
if not self._get_cookies(self._LOGIN_URL).get('etp_rt'):
|
||||
raise ExtractorError('Login succeeded but did not set etp_rt cookie')
|
||||
post_url = extract_attributes(login_form_str).get('action')
|
||||
if not post_url:
|
||||
post_url = self._LOGIN_URL
|
||||
elif not post_url.startswith('http'):
|
||||
post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url)
|
||||
|
||||
login_form = self._form_hidden_inputs(self._LOGIN_FORM, login_page)
|
||||
|
||||
login_form.update({
|
||||
'login_form[name]': username,
|
||||
'login_form[password]': password,
|
||||
})
|
||||
|
||||
response = self._download_webpage(
|
||||
post_url, None, 'Logging in', 'Wrong login info',
|
||||
data=urlencode_postdata(login_form),
|
||||
headers={'Content-Type': 'application/x-www-form-urlencoded'})
|
||||
|
||||
# Successful login
|
||||
if is_logged(response):
|
||||
return
|
||||
|
||||
error = self._html_search_regex(
|
||||
'(?s)<ul[^>]+class=["\']messages["\'][^>]*>(.+?)</ul>',
|
||||
response, 'error message', default=None)
|
||||
if error:
|
||||
raise ExtractorError('Unable to login: %s' % error, expected=True)
|
||||
|
||||
raise ExtractorError('Unable to log in')
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
@@ -719,118 +733,13 @@ class CrunchyrollBetaIE(CrunchyrollBaseIE):
|
||||
def _real_extract(self, url):
|
||||
lang, internal_id, display_id = self._match_valid_url(url).group('lang', 'internal_id', 'id')
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
initial_state = self._parse_json(
|
||||
self._search_regex(r'__INITIAL_STATE__\s*=\s*({.+?})\s*;', webpage, 'initial state'),
|
||||
display_id)
|
||||
episode_data = initial_state['content']['byId'][internal_id]
|
||||
if not self._get_cookies(url).get('etp_rt'):
|
||||
video_id = episode_data['external_id'].split('.')[1]
|
||||
series_id = episode_data['episode_metadata']['series_slug_title']
|
||||
return self.url_result(f'https://www.crunchyroll.com/{lang}{series_id}/{display_id}-{video_id}',
|
||||
CrunchyrollIE.ie_key(), video_id)
|
||||
|
||||
app_config = self._parse_json(
|
||||
self._search_regex(r'__APP_CONFIG__\s*=\s*({.+?})\s*;', webpage, 'app config'),
|
||||
display_id)
|
||||
client_id = app_config['cxApiParams']['accountAuthClientId']
|
||||
api_domain = app_config['cxApiParams']['apiDomain']
|
||||
basic_token = str(base64.b64encode(('%s:' % client_id).encode('ascii')), 'ascii')
|
||||
auth_response = self._download_json(
|
||||
f'{api_domain}/auth/v1/token', display_id,
|
||||
note='Authenticating with cookie',
|
||||
headers={
|
||||
'Authorization': 'Basic ' + basic_token
|
||||
}, data='grant_type=etp_rt_cookie'.encode('ascii'))
|
||||
policy_response = self._download_json(
|
||||
f'{api_domain}/index/v2', display_id,
|
||||
note='Retrieving signed policy',
|
||||
headers={
|
||||
'Authorization': auth_response['token_type'] + ' ' + auth_response['access_token']
|
||||
})
|
||||
bucket = policy_response['cms']['bucket']
|
||||
params = {
|
||||
'Policy': policy_response['cms']['policy'],
|
||||
'Signature': policy_response['cms']['signature'],
|
||||
'Key-Pair-Id': policy_response['cms']['key_pair_id']
|
||||
}
|
||||
locale = traverse_obj(initial_state, ('localization', 'locale'))
|
||||
if locale:
|
||||
params['locale'] = locale
|
||||
episode_response = self._download_json(
|
||||
f'{api_domain}/cms/v2{bucket}/episodes/{internal_id}', display_id,
|
||||
note='Retrieving episode metadata',
|
||||
query=params)
|
||||
if episode_response.get('is_premium_only') and not episode_response.get('playback'):
|
||||
raise ExtractorError('This video is for premium members only.', expected=True)
|
||||
stream_response = self._download_json(
|
||||
episode_response['playback'], display_id,
|
||||
note='Retrieving stream info')
|
||||
|
||||
thumbnails = []
|
||||
for thumbnails_data in traverse_obj(episode_response, ('images', 'thumbnail')):
|
||||
for thumbnail_data in thumbnails_data:
|
||||
thumbnails.append({
|
||||
'url': thumbnail_data.get('source'),
|
||||
'width': thumbnail_data.get('width'),
|
||||
'height': thumbnail_data.get('height'),
|
||||
})
|
||||
subtitles = {}
|
||||
for lang, subtitle_data in stream_response.get('subtitles').items():
|
||||
subtitles[lang] = [{
|
||||
'url': subtitle_data.get('url'),
|
||||
'ext': subtitle_data.get('format')
|
||||
}]
|
||||
|
||||
requested_hardsubs = [('' if val == 'none' else val) for val in (self._configuration_arg('hardsub') or ['none'])]
|
||||
hardsub_preference = qualities(requested_hardsubs[::-1])
|
||||
requested_formats = self._configuration_arg('format') or ['adaptive_hls']
|
||||
|
||||
formats = []
|
||||
for stream_type, streams in stream_response.get('streams', {}).items():
|
||||
if stream_type not in requested_formats:
|
||||
continue
|
||||
for stream in streams.values():
|
||||
hardsub_lang = stream.get('hardsub_locale') or ''
|
||||
if hardsub_lang.lower() not in requested_hardsubs:
|
||||
continue
|
||||
format_id = join_nonempty(
|
||||
stream_type,
|
||||
format_field(stream, 'hardsub_locale', 'hardsub-%s'))
|
||||
if not stream.get('url'):
|
||||
continue
|
||||
if stream_type.split('_')[-1] == 'hls':
|
||||
adaptive_formats = self._extract_m3u8_formats(
|
||||
stream['url'], display_id, 'mp4', m3u8_id=format_id,
|
||||
note='Downloading %s information' % format_id,
|
||||
fatal=False)
|
||||
elif stream_type.split('_')[-1] == 'dash':
|
||||
adaptive_formats = self._extract_mpd_formats(
|
||||
stream['url'], display_id, mpd_id=format_id,
|
||||
note='Downloading %s information' % format_id,
|
||||
fatal=False)
|
||||
for f in adaptive_formats:
|
||||
if f.get('acodec') != 'none':
|
||||
f['language'] = stream_response.get('audio_locale')
|
||||
f['quality'] = hardsub_preference(hardsub_lang.lower())
|
||||
formats.extend(adaptive_formats)
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': internal_id,
|
||||
'title': '%s Episode %s – %s' % (episode_response.get('season_title'), episode_response.get('episode'), episode_response.get('title')),
|
||||
'description': episode_response.get('description').replace(r'\r\n', '\n'),
|
||||
'duration': float_or_none(episode_response.get('duration_ms'), 1000),
|
||||
'thumbnails': thumbnails,
|
||||
'series': episode_response.get('series_title'),
|
||||
'series_id': episode_response.get('series_id'),
|
||||
'season': episode_response.get('season_title'),
|
||||
'season_id': episode_response.get('season_id'),
|
||||
'season_number': episode_response.get('season_number'),
|
||||
'episode': episode_response.get('title'),
|
||||
'episode_number': episode_response.get('sequence_number'),
|
||||
'subtitles': subtitles,
|
||||
'formats': formats
|
||||
}
|
||||
episode_data = self._parse_json(
|
||||
self._search_regex(r'__INITIAL_STATE__\s*=\s*({.+?})\s*;', webpage, 'episode data'),
|
||||
display_id)['content']['byId'][internal_id]
|
||||
video_id = episode_data['external_id'].split('.')[1]
|
||||
series_id = episode_data['episode_metadata']['series_slug_title']
|
||||
return self.url_result(f'https://www.crunchyroll.com/{lang}{series_id}/{display_id}-{video_id}',
|
||||
CrunchyrollIE.ie_key(), video_id)
|
||||
|
||||
|
||||
class CrunchyrollBetaShowIE(CrunchyrollBaseIE):
|
||||
|
||||
@@ -207,10 +207,12 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
video_id, playlist_id = self._match_valid_url(url).groups()
|
||||
|
||||
if playlist_id:
|
||||
if self._yes_playlist(playlist_id, video_id):
|
||||
if not self.get_param('noplaylist'):
|
||||
self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % playlist_id)
|
||||
return self.url_result(
|
||||
'http://www.dailymotion.com/playlist/' + playlist_id,
|
||||
'DailymotionPlaylist', playlist_id)
|
||||
self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
|
||||
|
||||
password = self.get_param('videopassword')
|
||||
media = self._call_api(
|
||||
|
||||
@@ -157,8 +157,11 @@ class DaumListIE(InfoExtractor):
|
||||
query_dict = parse_qs(url)
|
||||
if 'clipid' in query_dict:
|
||||
clip_id = query_dict['clipid'][0]
|
||||
if not self._yes_playlist(list_id, clip_id):
|
||||
if self.get_param('noplaylist'):
|
||||
self.to_screen('Downloading just video %s because of --no-playlist' % clip_id)
|
||||
return self.url_result(DaumClipIE._URL_TEMPLATE % clip_id, 'DaumClip')
|
||||
else:
|
||||
self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % list_id)
|
||||
|
||||
|
||||
class DaumPlaylistIE(DaumListIE):
|
||||
|
||||
@@ -20,16 +20,6 @@ class DoodStreamIE(InfoExtractor):
|
||||
'description': 'Kat Wonders - Monthly May 2020 | DoodStream.com',
|
||||
'thumbnail': 'https://img.doodcdn.com/snaps/flyus84qgl2fsk4g.jpg',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://dood.watch/d/5s1wmbdacezb',
|
||||
'md5': '4568b83b31e13242b3f1ff96c55f0595',
|
||||
'info_dict': {
|
||||
'id': '5s1wmbdacezb',
|
||||
'ext': 'mp4',
|
||||
'title': 'Kat Wonders - Monthly May 2020',
|
||||
'description': 'Kat Wonders - Monthly May 2020 | DoodStream.com',
|
||||
'thumbnail': 'https://img.doodcdn.com/snaps/flyus84qgl2fsk4g.jpg',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://dood.to/d/jzrxn12t2s7n',
|
||||
'md5': '3207e199426eca7c2aa23c2872e6728a',
|
||||
@@ -44,26 +34,31 @@ class DoodStreamIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
url = f'https://dood.to/e/{video_id}'
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_search_meta(['og:title', 'twitter:title'], webpage, default=None)
|
||||
thumb = self._html_search_meta(['og:image', 'twitter:image'], webpage, default=None)
|
||||
if '/d/' in url:
|
||||
url = "https://dood.to" + self._html_search_regex(
|
||||
r'<iframe src="(/e/[a-z0-9]+)"', webpage, 'embed')
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_search_meta(['og:title', 'twitter:title'],
|
||||
webpage, default=None)
|
||||
thumb = self._html_search_meta(['og:image', 'twitter:image'],
|
||||
webpage, default=None)
|
||||
token = self._html_search_regex(r'[?&]token=([a-z0-9]+)[&\']', webpage, 'token')
|
||||
description = self._html_search_meta(
|
||||
['og:description', 'description', 'twitter:description'], webpage, default=None)
|
||||
|
||||
['og:description', 'description', 'twitter:description'],
|
||||
webpage, default=None)
|
||||
auth_url = 'https://dood.to' + self._html_search_regex(
|
||||
r'(/pass_md5.*?)\'', webpage, 'pass_md5')
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:53.0) Gecko/20100101 Firefox/66.0',
|
||||
'referer': url
|
||||
}
|
||||
|
||||
pass_md5 = self._html_search_regex(r'(/pass_md5.*?)\'', webpage, 'pass_md5')
|
||||
final_url = ''.join((
|
||||
self._download_webpage(f'https://dood.to{pass_md5}', video_id, headers=headers),
|
||||
*(random.choice(string.ascii_letters + string.digits) for _ in range(10)),
|
||||
f'?token={token}&expiry={int(time.time() * 1000)}',
|
||||
))
|
||||
webpage = self._download_webpage(auth_url, video_id, headers=headers)
|
||||
final_url = webpage + ''.join([random.choice(string.ascii_letters + string.digits) for _ in range(10)]) + "?token=" + token + "&expiry=" + str(int(time.time() * 1000))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
||||
@@ -369,222 +369,6 @@ class DiscoveryPlusBaseIE(DPlayBaseIE):
|
||||
return self._get_disco_api_info(url, self._match_id(url), **self._DISCO_API_PARAMS)
|
||||
|
||||
|
||||
class GoDiscoveryIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:go\.)?discovery\.com/video' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://go.discovery.com/video/dirty-jobs-discovery-atve-us/rodbuster-galvanizer',
|
||||
'info_dict': {
|
||||
'id': '4164906',
|
||||
'display_id': 'dirty-jobs-discovery-atve-us/rodbuster-galvanizer',
|
||||
'ext': 'mp4',
|
||||
'title': 'Rodbuster / Galvanizer',
|
||||
'description': 'Mike installs rebar with a team of rodbusters, then he galvanizes steel.',
|
||||
'season_number': 9,
|
||||
'episode_number': 1,
|
||||
},
|
||||
'skip': 'Available for Premium users',
|
||||
}, {
|
||||
'url': 'https://discovery.com/video/dirty-jobs-discovery-atve-us/rodbuster-galvanizer',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_PRODUCT = 'dsc'
|
||||
_DISCO_API_PARAMS = {
|
||||
'disco_host': 'us1-prod-direct.go.discovery.com',
|
||||
'realm': 'go',
|
||||
'country': 'us',
|
||||
}
|
||||
|
||||
|
||||
class TravelChannelIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:watch\.)?travelchannel\.com/video' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://watch.travelchannel.com/video/ghost-adventures-travel-channel/ghost-train-of-ely',
|
||||
'info_dict': {
|
||||
'id': '2220256',
|
||||
'display_id': 'ghost-adventures-travel-channel/ghost-train-of-ely',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ghost Train of Ely',
|
||||
'description': 'The crew investigates the dark history of the Nevada Northern Railway.',
|
||||
'season_number': 24,
|
||||
'episode_number': 1,
|
||||
},
|
||||
'skip': 'Available for Premium users',
|
||||
}, {
|
||||
'url': 'https://watch.travelchannel.com/video/ghost-adventures-travel-channel/ghost-train-of-ely',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_PRODUCT = 'trav'
|
||||
_DISCO_API_PARAMS = {
|
||||
'disco_host': 'us1-prod-direct.watch.travelchannel.com',
|
||||
'realm': 'go',
|
||||
'country': 'us',
|
||||
}
|
||||
|
||||
|
||||
class CookingChannelIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:watch\.)?cookingchanneltv\.com/video' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://watch.cookingchanneltv.com/video/carnival-eats-cooking-channel/the-postman-always-brings-rice-2348634',
|
||||
'info_dict': {
|
||||
'id': '2348634',
|
||||
'display_id': 'carnival-eats-cooking-channel/the-postman-always-brings-rice-2348634',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Postman Always Brings Rice',
|
||||
'description': 'Noah visits the Maui Fair and the Aurora Winter Festival in Vancouver.',
|
||||
'season_number': 9,
|
||||
'episode_number': 1,
|
||||
},
|
||||
'skip': 'Available for Premium users',
|
||||
}, {
|
||||
'url': 'https://watch.cookingchanneltv.com/video/carnival-eats-cooking-channel/the-postman-always-brings-rice-2348634',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_PRODUCT = 'cook'
|
||||
_DISCO_API_PARAMS = {
|
||||
'disco_host': 'us1-prod-direct.watch.cookingchanneltv.com',
|
||||
'realm': 'go',
|
||||
'country': 'us',
|
||||
}
|
||||
|
||||
|
||||
class HGTVUsaIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:watch\.)?hgtv\.com/video' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://watch.hgtv.com/video/home-inspector-joe-hgtv-atve-us/this-mold-house',
|
||||
'info_dict': {
|
||||
'id': '4289736',
|
||||
'display_id': 'home-inspector-joe-hgtv-atve-us/this-mold-house',
|
||||
'ext': 'mp4',
|
||||
'title': 'This Mold House',
|
||||
'description': 'Joe and Noel help take a familys dream home from hazardous to fabulous.',
|
||||
'season_number': 1,
|
||||
'episode_number': 1,
|
||||
},
|
||||
'skip': 'Available for Premium users',
|
||||
}, {
|
||||
'url': 'https://watch.hgtv.com/video/home-inspector-joe-hgtv-atve-us/this-mold-house',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_PRODUCT = 'hgtv'
|
||||
_DISCO_API_PARAMS = {
|
||||
'disco_host': 'us1-prod-direct.watch.hgtv.com',
|
||||
'realm': 'go',
|
||||
'country': 'us',
|
||||
}
|
||||
|
||||
|
||||
class FoodNetworkIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:watch\.)?foodnetwork\.com/video' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://watch.foodnetwork.com/video/kids-baking-championship-food-network/float-like-a-butterfly',
|
||||
'info_dict': {
|
||||
'id': '4116449',
|
||||
'display_id': 'kids-baking-championship-food-network/float-like-a-butterfly',
|
||||
'ext': 'mp4',
|
||||
'title': 'Float Like a Butterfly',
|
||||
'description': 'The 12 kid bakers create colorful carved butterfly cakes.',
|
||||
'season_number': 10,
|
||||
'episode_number': 1,
|
||||
},
|
||||
'skip': 'Available for Premium users',
|
||||
}, {
|
||||
'url': 'https://watch.foodnetwork.com/video/kids-baking-championship-food-network/float-like-a-butterfly',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_PRODUCT = 'food'
|
||||
_DISCO_API_PARAMS = {
|
||||
'disco_host': 'us1-prod-direct.watch.foodnetwork.com',
|
||||
'realm': 'go',
|
||||
'country': 'us',
|
||||
}
|
||||
|
||||
|
||||
class DestinationAmericaIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?destinationamerica\.com/video' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://www.destinationamerica.com/video/alaska-monsters-destination-america-atve-us/central-alaskas-bigfoot',
|
||||
'info_dict': {
|
||||
'id': '4210904',
|
||||
'display_id': 'alaska-monsters-destination-america-atve-us/central-alaskas-bigfoot',
|
||||
'ext': 'mp4',
|
||||
'title': 'Central Alaskas Bigfoot',
|
||||
'description': 'A team heads to central Alaska to investigate an aggressive Bigfoot.',
|
||||
'season_number': 1,
|
||||
'episode_number': 1,
|
||||
},
|
||||
'skip': 'Available for Premium users',
|
||||
}, {
|
||||
'url': 'https://www.destinationamerica.com/video/alaska-monsters-destination-america-atve-us/central-alaskas-bigfoot',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_PRODUCT = 'dam'
|
||||
_DISCO_API_PARAMS = {
|
||||
'disco_host': 'us1-prod-direct.destinationamerica.com',
|
||||
'realm': 'go',
|
||||
'country': 'us',
|
||||
}
|
||||
|
||||
|
||||
class InvestigationDiscoveryIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?investigationdiscovery\.com/video' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://www.investigationdiscovery.com/video/unmasked-investigation-discovery/the-killer-clown',
|
||||
'info_dict': {
|
||||
'id': '2139409',
|
||||
'display_id': 'unmasked-investigation-discovery/the-killer-clown',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Killer Clown',
|
||||
'description': 'A wealthy Florida woman is fatally shot in the face by a clown at her door.',
|
||||
'season_number': 1,
|
||||
'episode_number': 1,
|
||||
},
|
||||
'skip': 'Available for Premium users',
|
||||
}, {
|
||||
'url': 'https://www.investigationdiscovery.com/video/unmasked-investigation-discovery/the-killer-clown',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_PRODUCT = 'ids'
|
||||
_DISCO_API_PARAMS = {
|
||||
'disco_host': 'us1-prod-direct.investigationdiscovery.com',
|
||||
'realm': 'go',
|
||||
'country': 'us',
|
||||
}
|
||||
|
||||
|
||||
class AmHistoryChannelIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?ahctv\.com/video' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://www.ahctv.com/video/modern-sniper-ahc/army',
|
||||
'info_dict': {
|
||||
'id': '2309730',
|
||||
'display_id': 'modern-sniper-ahc/army',
|
||||
'ext': 'mp4',
|
||||
'title': 'Army',
|
||||
'description': 'Snipers today face challenges their predecessors couldve only dreamed of.',
|
||||
'season_number': 1,
|
||||
'episode_number': 1,
|
||||
},
|
||||
'skip': 'Available for Premium users',
|
||||
}, {
|
||||
'url': 'https://www.ahctv.com/video/modern-sniper-ahc/army',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_PRODUCT = 'ahc'
|
||||
_DISCO_API_PARAMS = {
|
||||
'disco_host': 'us1-prod-direct.ahctv.com',
|
||||
'realm': 'go',
|
||||
'country': 'us',
|
||||
}
|
||||
|
||||
|
||||
class ScienceChannelIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?sciencechannel\.com/video' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
@@ -599,9 +383,6 @@ class ScienceChannelIE(DiscoveryPlusBaseIE):
|
||||
'episode_number': 1,
|
||||
},
|
||||
'skip': 'Available for Premium users',
|
||||
}, {
|
||||
'url': 'https://www.sciencechannel.com/video/strangest-things-science-atve-us/nazi-mystery-machine',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_PRODUCT = 'sci'
|
||||
@@ -626,9 +407,6 @@ class DIYNetworkIE(DiscoveryPlusBaseIE):
|
||||
'episode_number': 2,
|
||||
},
|
||||
'skip': 'Available for Premium users',
|
||||
}, {
|
||||
'url': 'https://watch.diynetwork.com/video/pool-kings-diy-network/bringing-beach-life-to-texas',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_PRODUCT = 'diy'
|
||||
@@ -639,33 +417,6 @@ class DIYNetworkIE(DiscoveryPlusBaseIE):
|
||||
}
|
||||
|
||||
|
||||
class DiscoveryLifeIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?discoverylife\.com/video' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://www.discoverylife.com/video/surviving-death-discovery-life-atve-us/bodily-trauma',
|
||||
'info_dict': {
|
||||
'id': '2218238',
|
||||
'display_id': 'surviving-death-discovery-life-atve-us/bodily-trauma',
|
||||
'ext': 'mp4',
|
||||
'title': 'Bodily Trauma',
|
||||
'description': 'Meet three people who tested the limits of the human body.',
|
||||
'season_number': 1,
|
||||
'episode_number': 2,
|
||||
},
|
||||
'skip': 'Available for Premium users',
|
||||
}, {
|
||||
'url': 'https://www.discoverylife.com/video/surviving-death-discovery-life-atve-us/bodily-trauma',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_PRODUCT = 'dlf'
|
||||
_DISCO_API_PARAMS = {
|
||||
'disco_host': 'us1-prod-direct.discoverylife.com',
|
||||
'realm': 'go',
|
||||
'country': 'us',
|
||||
}
|
||||
|
||||
|
||||
class AnimalPlanetIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?animalplanet\.com/video' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
@@ -680,9 +431,6 @@ class AnimalPlanetIE(DiscoveryPlusBaseIE):
|
||||
'episode_number': 11,
|
||||
},
|
||||
'skip': 'Available for Premium users',
|
||||
}, {
|
||||
'url': 'https://www.animalplanet.com/video/north-woods-law-animal-planet/squirrel-showdown',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_PRODUCT = 'apl'
|
||||
@@ -693,33 +441,6 @@ class AnimalPlanetIE(DiscoveryPlusBaseIE):
|
||||
}
|
||||
|
||||
|
||||
class TLCIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:go\.)?tlc\.com/video' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://go.tlc.com/video/my-600-lb-life-tlc/melissas-story-part-1',
|
||||
'info_dict': {
|
||||
'id': '2206540',
|
||||
'display_id': 'my-600-lb-life-tlc/melissas-story-part-1',
|
||||
'ext': 'mp4',
|
||||
'title': 'Melissas Story (Part 1)',
|
||||
'description': 'At 650 lbs, Melissa is ready to begin her seven-year weight loss journey.',
|
||||
'season_number': 1,
|
||||
'episode_number': 1,
|
||||
},
|
||||
'skip': 'Available for Premium users',
|
||||
}, {
|
||||
'url': 'https://go.tlc.com/video/my-600-lb-life-tlc/melissas-story-part-1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_PRODUCT = 'tlc'
|
||||
_DISCO_API_PARAMS = {
|
||||
'disco_host': 'us1-prod-direct.tlc.com',
|
||||
'realm': 'go',
|
||||
'country': 'us',
|
||||
}
|
||||
|
||||
|
||||
class DiscoveryPlusIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?discoveryplus\.com/(?!it/)(?:\w{2}/)?video' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
|
||||
@@ -7,11 +7,13 @@ import re
|
||||
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7
|
||||
from ..aes import aes_cbc_decrypt
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
from ..utils import (
|
||||
bytes_to_intlist,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
intlist_to_bytes,
|
||||
float_or_none,
|
||||
mimetype2ext,
|
||||
str_or_none,
|
||||
@@ -189,11 +191,13 @@ class DRTVIE(InfoExtractor):
|
||||
def decrypt_uri(e):
|
||||
n = int(e[2:10], 16)
|
||||
a = e[10 + n:]
|
||||
data = hex_to_bytes(e[10:10 + n])
|
||||
key = hashlib.sha256(('%s:sRBzYNXBzkKgnjj8pGtkACch' % a).encode('utf-8')).digest()
|
||||
iv = hex_to_bytes(a)
|
||||
decrypted = unpad_pkcs7(aes_cbc_decrypt_bytes(data, key, iv))
|
||||
return decrypted.decode('utf-8').split('?')[0]
|
||||
data = bytes_to_intlist(hex_to_bytes(e[10:10 + n]))
|
||||
key = bytes_to_intlist(hashlib.sha256(
|
||||
('%s:sRBzYNXBzkKgnjj8pGtkACch' % a).encode('utf-8')).digest())
|
||||
iv = bytes_to_intlist(hex_to_bytes(a))
|
||||
decrypted = aes_cbc_decrypt(data, key, iv)
|
||||
return intlist_to_bytes(
|
||||
decrypted[:-decrypted[-1]]).decode('utf-8').split('?')[0]
|
||||
|
||||
for asset in assets:
|
||||
kind = asset.get('Kind')
|
||||
|
||||
@@ -1,316 +0,0 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
dict_get,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
parse_qs,
|
||||
parse_age_limit,
|
||||
parse_iso8601,
|
||||
str_or_none,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
url_or_none,
|
||||
variadic,
|
||||
)
|
||||
|
||||
|
||||
class ERTFlixBaseIE(InfoExtractor):
|
||||
def _call_api(
|
||||
self, video_id, method='Player/AcquireContent', api_version=1,
|
||||
param_headers=None, data=None, headers=None, **params):
|
||||
platform_codename = {'platformCodename': 'www'}
|
||||
headers_as_param = {'X-Api-Date-Format': 'iso', 'X-Api-Camel-Case': False}
|
||||
headers_as_param.update(param_headers or {})
|
||||
headers = headers or {}
|
||||
if data:
|
||||
headers['Content-Type'] = headers_as_param['Content-Type'] = 'application/json;charset=utf-8'
|
||||
data = json.dumps(merge_dicts(platform_codename, data)).encode('utf-8')
|
||||
query = merge_dicts(
|
||||
{} if data else platform_codename,
|
||||
{'$headers': json.dumps(headers_as_param)},
|
||||
params)
|
||||
response = self._download_json(
|
||||
'https://api.app.ertflix.gr/v%s/%s' % (str(api_version), method),
|
||||
video_id, fatal=False, query=query, data=data, headers=headers)
|
||||
if try_get(response, lambda x: x['Result']['Success']) is True:
|
||||
return response
|
||||
|
||||
def _call_api_get_tiles(self, video_id, *tile_ids):
|
||||
requested_tile_ids = [video_id] + list(tile_ids)
|
||||
requested_tiles = [{'Id': tile_id} for tile_id in requested_tile_ids]
|
||||
tiles_response = self._call_api(
|
||||
video_id, method='Tile/GetTiles', api_version=2,
|
||||
data={'RequestedTiles': requested_tiles})
|
||||
tiles = try_get(tiles_response, lambda x: x['Tiles'], list) or []
|
||||
if tile_ids:
|
||||
if sorted([tile['Id'] for tile in tiles]) != sorted(requested_tile_ids):
|
||||
raise ExtractorError('Requested tiles not found', video_id=video_id)
|
||||
return tiles
|
||||
try:
|
||||
return next(tile for tile in tiles if tile['Id'] == video_id)
|
||||
except StopIteration:
|
||||
raise ExtractorError('No matching tile found', video_id=video_id)
|
||||
|
||||
|
||||
class ERTFlixCodenameIE(ERTFlixBaseIE):
|
||||
IE_NAME = 'ertflix:codename'
|
||||
IE_DESC = 'ERTFLIX videos by codename'
|
||||
_VALID_URL = r'ertflix:(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'ertflix:monogramma-praxitelis-tzanoylinos',
|
||||
'md5': '5b9c2cd171f09126167e4082fc1dd0ef',
|
||||
'info_dict': {
|
||||
'id': 'monogramma-praxitelis-tzanoylinos',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:ef0b439902963d56c43ac83c3f41dd0e',
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def _extract_formats_and_subs(self, video_id, allow_none=True):
|
||||
media_info = self._call_api(video_id, codename=video_id)
|
||||
formats, subs = [], {}
|
||||
for media_file in try_get(media_info, lambda x: x['MediaFiles'], list) or []:
|
||||
for media in try_get(media_file, lambda x: x['Formats'], list) or []:
|
||||
fmt_url = url_or_none(try_get(media, lambda x: x['Url']))
|
||||
if not fmt_url:
|
||||
continue
|
||||
ext = determine_ext(fmt_url)
|
||||
if ext == 'm3u8':
|
||||
formats_, subs_ = self._extract_m3u8_formats_and_subtitles(
|
||||
fmt_url, video_id, m3u8_id='hls', ext='mp4', fatal=False)
|
||||
elif ext == 'mpd':
|
||||
formats_, subs_ = self._extract_mpd_formats_and_subtitles(
|
||||
fmt_url, video_id, mpd_id='dash', fatal=False)
|
||||
else:
|
||||
formats.append({
|
||||
'url': fmt_url,
|
||||
'format_id': str_or_none(media.get('Id')),
|
||||
})
|
||||
continue
|
||||
formats.extend(formats_)
|
||||
self._merge_subtitles(subs_, target=subs)
|
||||
|
||||
if formats or not allow_none:
|
||||
self._sort_formats(formats)
|
||||
return formats, subs
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
formats, subs = self._extract_formats_and_subs(video_id)
|
||||
|
||||
if formats:
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subs,
|
||||
'title': self._generic_title(url),
|
||||
}
|
||||
|
||||
|
||||
class ERTFlixIE(ERTFlixBaseIE):
|
||||
IE_NAME = 'ertflix'
|
||||
IE_DESC = 'ERTFLIX videos'
|
||||
_VALID_URL = r'https?://www\.ertflix\.gr/(?:series|vod)/(?P<id>[a-z]{3}\.\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.ertflix.gr/vod/vod.173258-aoratoi-ergates',
|
||||
'md5': '6479d5e60fd7e520b07ba5411dcdd6e7',
|
||||
'info_dict': {
|
||||
'id': 'aoratoi-ergates',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:c1433d598fbba0211b0069021517f8b4',
|
||||
'description': 'md5:01a64d113c31957eb7eb07719ab18ff4',
|
||||
'thumbnail': r're:https?://.+\.jpg',
|
||||
'episode_id': 'vod.173258',
|
||||
'timestamp': 1639648800,
|
||||
'upload_date': '20211216',
|
||||
'duration': 3166,
|
||||
'age_limit': 8,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.ertflix.gr/series/ser.3448-monogramma',
|
||||
'info_dict': {
|
||||
'id': 'ser.3448',
|
||||
'age_limit': 8,
|
||||
'description': 'Η εκπομπή σαράντα ετών που σημάδεψε τον πολιτισμό μας.',
|
||||
'title': 'Μονόγραμμα',
|
||||
},
|
||||
'playlist_mincount': 64,
|
||||
}, {
|
||||
'url': 'https://www.ertflix.gr/series/ser.3448-monogramma?season=1',
|
||||
'info_dict': {
|
||||
'id': 'ser.3448',
|
||||
'age_limit': 8,
|
||||
'description': 'Η εκπομπή σαράντα ετών που σημάδεψε τον πολιτισμό μας.',
|
||||
'title': 'Μονόγραμμα',
|
||||
},
|
||||
'playlist_count': 22,
|
||||
}, {
|
||||
'url': 'https://www.ertflix.gr/series/ser.3448-monogramma?season=1&season=2021%20-%202022',
|
||||
'info_dict': {
|
||||
'id': 'ser.3448',
|
||||
'age_limit': 8,
|
||||
'description': 'Η εκπομπή σαράντα ετών που σημάδεψε τον πολιτισμό μας.',
|
||||
'title': 'Μονόγραμμα',
|
||||
},
|
||||
'playlist_mincount': 36,
|
||||
}, {
|
||||
'url': 'https://www.ertflix.gr/series/ser.164991-to-diktuo-1?season=1-9',
|
||||
'info_dict': {
|
||||
'id': 'ser.164991',
|
||||
'age_limit': 8,
|
||||
'description': 'Η πρώτη ελληνική εκπομπή με θεματολογία αποκλειστικά γύρω από το ίντερνετ.',
|
||||
'title': 'Το δίκτυο',
|
||||
},
|
||||
'playlist_mincount': 9,
|
||||
}]
|
||||
|
||||
def _extract_episode(self, episode):
|
||||
codename = try_get(episode, lambda x: x['Codename'], compat_str)
|
||||
title = episode.get('Title')
|
||||
description = clean_html(dict_get(episode, ('ShortDescription', 'TinyDescription', )))
|
||||
if not codename or not title or not episode.get('HasPlayableStream', True):
|
||||
return
|
||||
thumbnail = next((
|
||||
url_or_none(thumb.get('Url'))
|
||||
for thumb in variadic(dict_get(episode, ('Images', 'Image')) or {})
|
||||
if thumb.get('IsMain')),
|
||||
None)
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'thumbnail': thumbnail,
|
||||
'id': codename,
|
||||
'episode_id': episode.get('Id'),
|
||||
'title': title,
|
||||
'alt_title': episode.get('Subtitle'),
|
||||
'description': description,
|
||||
'timestamp': parse_iso8601(episode.get('PublishDate')),
|
||||
'duration': episode.get('DurationSeconds'),
|
||||
'age_limit': self._parse_age_rating(episode),
|
||||
'url': 'ertflix:%s' % (codename, ),
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _parse_age_rating(info_dict):
|
||||
return parse_age_limit(
|
||||
info_dict.get('AgeRating')
|
||||
or (info_dict.get('IsAdultContent') and 18)
|
||||
or (info_dict.get('IsKidsContent') and 0))
|
||||
|
||||
def _extract_series(self, video_id, season_titles=None, season_numbers=None):
|
||||
media_info = self._call_api(video_id, method='Tile/GetSeriesDetails', id=video_id)
|
||||
|
||||
series = try_get(media_info, lambda x: x['Series'], dict) or {}
|
||||
series_info = {
|
||||
'age_limit': self._parse_age_rating(series),
|
||||
'title': series.get('Title'),
|
||||
'description': dict_get(series, ('ShortDescription', 'TinyDescription', )),
|
||||
}
|
||||
if season_numbers:
|
||||
season_titles = season_titles or []
|
||||
for season in try_get(series, lambda x: x['Seasons'], list) or []:
|
||||
if season.get('SeasonNumber') in season_numbers and season.get('Title'):
|
||||
season_titles.append(season['Title'])
|
||||
|
||||
def gen_episode(m_info, season_titles):
|
||||
for episode_group in try_get(m_info, lambda x: x['EpisodeGroups'], list) or []:
|
||||
if season_titles and episode_group.get('Title') not in season_titles:
|
||||
continue
|
||||
episodes = try_get(episode_group, lambda x: x['Episodes'], list)
|
||||
if not episodes:
|
||||
continue
|
||||
season_info = {
|
||||
'season': episode_group.get('Title'),
|
||||
'season_number': int_or_none(episode_group.get('SeasonNumber')),
|
||||
}
|
||||
try:
|
||||
episodes = [(int(ep['EpisodeNumber']), ep) for ep in episodes]
|
||||
episodes.sort()
|
||||
except (KeyError, ValueError):
|
||||
episodes = enumerate(episodes, 1)
|
||||
for n, episode in episodes:
|
||||
info = self._extract_episode(episode)
|
||||
if info is None:
|
||||
continue
|
||||
info['episode_number'] = n
|
||||
info.update(season_info)
|
||||
yield info
|
||||
|
||||
return self.playlist_result(
|
||||
gen_episode(media_info, season_titles), playlist_id=video_id, **series_info)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
if video_id.startswith('ser.'):
|
||||
param_season = parse_qs(url).get('season', [None])
|
||||
param_season = [
|
||||
(have_number, int_or_none(v) if have_number else str_or_none(v))
|
||||
for have_number, v in
|
||||
[(int_or_none(ps) is not None, ps) for ps in param_season]
|
||||
if v is not None
|
||||
]
|
||||
season_kwargs = {
|
||||
k: [v for is_num, v in param_season if is_num is c] or None
|
||||
for k, c in
|
||||
[('season_titles', False), ('season_numbers', True)]
|
||||
}
|
||||
return self._extract_series(video_id, **season_kwargs)
|
||||
|
||||
return self._extract_episode(self._call_api_get_tiles(video_id))
|
||||
|
||||
|
||||
class ERTWebtvEmbedIE(InfoExtractor):
|
||||
IE_NAME = 'ertwebtv:embed'
|
||||
IE_DESC = 'ert.gr webtv embedded videos'
|
||||
_BASE_PLAYER_URL_RE = re.escape('//www.ert.gr/webtv/live-uni/vod/dt-uni-vod.php')
|
||||
_VALID_URL = rf'https?:{_BASE_PLAYER_URL_RE}\?([^#]+&)?f=(?P<id>[^#&]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.ert.gr/webtv/live-uni/vod/dt-uni-vod.php?f=trailers/E2251_TO_DIKTYO_E09_16-01_1900.mp4&bgimg=/photos/2022/1/to_diktio_ep09_i_istoria_tou_diadiktiou_stin_Ellada_1021x576.jpg',
|
||||
'md5': 'f9e9900c25c26f4ecfbddbb4b6305854',
|
||||
'info_dict': {
|
||||
'id': 'trailers/E2251_TO_DIKTYO_E09_16-01_1900.mp4',
|
||||
'title': 'md5:914f06a73cd8b62fbcd6fb90c636e497',
|
||||
'ext': 'mp4',
|
||||
'thumbnail': 'https://program.ert.gr/photos/2022/1/to_diktio_ep09_i_istoria_tou_diadiktiou_stin_Ellada_1021x576.jpg'
|
||||
},
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def _extract_urls(cls, webpage):
|
||||
EMBED_URL_RE = rf'(?:https?:)?{cls._BASE_PLAYER_URL_RE}\?(?:(?!(?P=_q1)).)+'
|
||||
EMBED_RE = rf'<iframe[^>]+?src=(?P<_q1>["\'])(?P<url>{EMBED_URL_RE})(?P=_q1)'
|
||||
|
||||
for mobj in re.finditer(EMBED_RE, webpage):
|
||||
url = unescapeHTML(mobj.group('url'))
|
||||
if not cls.suitable(url):
|
||||
continue
|
||||
yield url
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
formats, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
f'https://mediastream.ert.gr/vodedge/_definst_/mp4:dvrorigin/{video_id}/playlist.m3u8',
|
||||
video_id, 'mp4')
|
||||
self._sort_formats(formats)
|
||||
thumbnail_id = parse_qs(url).get('bgimg', [None])[0]
|
||||
if thumbnail_id and not thumbnail_id.startswith('http'):
|
||||
thumbnail_id = f'https://program.ert.gr{thumbnail_id}'
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': f'VOD - {video_id}',
|
||||
'thumbnail': thumbnail_id,
|
||||
'formats': formats,
|
||||
'subtitles': subs,
|
||||
}
|
||||
@@ -361,19 +361,9 @@ from .dplay import (
|
||||
DPlayIE,
|
||||
DiscoveryPlusIE,
|
||||
HGTVDeIE,
|
||||
GoDiscoveryIE,
|
||||
TravelChannelIE,
|
||||
CookingChannelIE,
|
||||
HGTVUsaIE,
|
||||
FoodNetworkIE,
|
||||
InvestigationDiscoveryIE,
|
||||
DestinationAmericaIE,
|
||||
AmHistoryChannelIE,
|
||||
ScienceChannelIE,
|
||||
DIYNetworkIE,
|
||||
DiscoveryLifeIE,
|
||||
AnimalPlanetIE,
|
||||
TLCIE,
|
||||
DiscoveryPlusIndiaIE,
|
||||
DiscoveryNetworksDeIE,
|
||||
DiscoveryPlusItalyIE,
|
||||
@@ -397,6 +387,11 @@ from .dumpert import DumpertIE
|
||||
from .defense import DefenseGouvFrIE
|
||||
from .digitalconcerthall import DigitalConcertHallIE
|
||||
from .discovery import DiscoveryIE
|
||||
from .discoverygo import (
|
||||
DiscoveryGoIE,
|
||||
DiscoveryGoPlaylistIE,
|
||||
)
|
||||
from .discoveryvr import DiscoveryVRIE
|
||||
from .disney import DisneyIE
|
||||
from .dispeak import DigitallySpeakingIE
|
||||
from .doodstream import DoodStreamIE
|
||||
@@ -438,11 +433,6 @@ from .eroprofile import (
|
||||
EroProfileIE,
|
||||
EroProfileAlbumIE,
|
||||
)
|
||||
from .ertgr import (
|
||||
ERTFlixCodenameIE,
|
||||
ERTFlixIE,
|
||||
ERTWebtvEmbedIE,
|
||||
)
|
||||
from .escapist import EscapistIE
|
||||
from .espn import (
|
||||
ESPNIE,
|
||||
@@ -891,12 +881,6 @@ from .mtv import (
|
||||
)
|
||||
from .muenchentv import MuenchenTVIE
|
||||
from .musescore import MuseScoreIE
|
||||
from .musicdex import (
|
||||
MusicdexSongIE,
|
||||
MusicdexAlbumIE,
|
||||
MusicdexArtistIE,
|
||||
MusicdexPlaylistIE,
|
||||
)
|
||||
from .mwave import MwaveIE, MwaveMeetGreetIE
|
||||
from .mxplayer import (
|
||||
MxplayerIE,
|
||||
@@ -1845,10 +1829,7 @@ from .vimeo import (
|
||||
VimeoWatchLaterIE,
|
||||
VHXEmbedIE,
|
||||
)
|
||||
from .vimm import (
|
||||
VimmIE,
|
||||
VimmRecordingIE,
|
||||
)
|
||||
from .vimm import VimmIE
|
||||
from .vimple import VimpleIE
|
||||
from .vine import (
|
||||
VineIE,
|
||||
@@ -1995,7 +1976,6 @@ from .yandexmusic import (
|
||||
)
|
||||
from .yandexvideo import (
|
||||
YandexVideoIE,
|
||||
YandexVideoPreviewIE,
|
||||
ZenYandexIE,
|
||||
ZenYandexChannelIE,
|
||||
)
|
||||
@@ -2028,7 +2008,6 @@ from .youtube import (
|
||||
YoutubeSearchDateIE,
|
||||
YoutubeSearchIE,
|
||||
YoutubeSearchURLIE,
|
||||
YoutubeMusicSearchURLIE,
|
||||
YoutubeSubscriptionsIE,
|
||||
YoutubeTruncatedIDIE,
|
||||
YoutubeTruncatedURLIE,
|
||||
|
||||
@@ -1,16 +1,18 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import hashlib
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
sanitized_Request,
|
||||
traverse_obj,
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
@@ -80,32 +82,41 @@ class FC2IE(InfoExtractor):
|
||||
self._downloader.cookiejar.clear_session_cookies() # must clear
|
||||
self._login()
|
||||
|
||||
title, thumbnail, description = None, None, None
|
||||
title = 'FC2 video %s' % video_id
|
||||
thumbnail = None
|
||||
if webpage is not None:
|
||||
title = self._html_search_regex(
|
||||
(r'<h2\s+class="videoCnt_title">([^<]+?)</h2>',
|
||||
r'\s+href="[^"]+"\s*title="([^"]+?)"\s*rel="nofollow">\s*<img',
|
||||
# there's two matches in the webpage
|
||||
r'\s+href="[^"]+"\s*title="([^"]+?)"\s*rel="nofollow">\s*\1'),
|
||||
webpage,
|
||||
'title', fatal=False)
|
||||
title = self._og_search_title(webpage)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
description = self._og_search_description(webpage)
|
||||
refer = url.replace('/content/', '/a/content/') if '/a/content/' not in url else url
|
||||
|
||||
vidplaylist = self._download_json(
|
||||
'https://video.fc2.com/api/v3/videoplaylist/%s?sh=1&fs=0' % video_id, video_id,
|
||||
note='Downloading info page')
|
||||
vid_url = traverse_obj(vidplaylist, ('playlist', 'nq'))
|
||||
if not vid_url:
|
||||
raise ExtractorError('Unable to extract video URL')
|
||||
vid_url = urljoin('https://video.fc2.com/', vid_url)
|
||||
mimi = hashlib.md5((video_id + '_gGddgPfeaf_gzyr').encode('utf-8')).hexdigest()
|
||||
|
||||
info_url = (
|
||||
'http://video.fc2.com/ginfo.php?mimi={1:s}&href={2:s}&v={0:s}&fversion=WIN%2011%2C6%2C602%2C180&from=2&otag=0&upid={0:s}&tk=null&'.
|
||||
format(video_id, mimi, compat_urllib_request.quote(refer, safe=b'').replace('.', '%2E')))
|
||||
|
||||
info_webpage = self._download_webpage(
|
||||
info_url, video_id, note='Downloading info page')
|
||||
info = compat_urlparse.parse_qs(info_webpage)
|
||||
|
||||
if 'err_code' in info:
|
||||
# most of the time we can still download wideo even if err_code is 403 or 602
|
||||
self.report_warning(
|
||||
'Error code was: %s... but still trying' % info['err_code'][0])
|
||||
|
||||
if 'filepath' not in info:
|
||||
raise ExtractorError('Cannot download file. Are you logged in?')
|
||||
|
||||
video_url = info['filepath'][0] + '?mid=' + info['mid'][0]
|
||||
title_info = info.get('title')
|
||||
if title_info:
|
||||
title = title_info[0]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'url': vid_url,
|
||||
'ext': 'mp4',
|
||||
'description': description,
|
||||
'url': video_url,
|
||||
'ext': 'flv',
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
|
||||
|
||||
@@ -1,52 +1,48 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
from ..utils import HEADRequest
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class FujiTVFODPlus7IE(InfoExtractor):
|
||||
_VALID_URL = r'https?://fod\.fujitv\.co\.jp/title/(?P<sid>[0-9a-z]{4})/(?P<id>[0-9a-z]+)'
|
||||
_BASE_URL = 'https://i.fod.fujitv.co.jp/'
|
||||
_VALID_URL = r'https?://fod\.fujitv\.co\.jp/title/[0-9a-z]{4}/(?P<id>[0-9a-z]+)'
|
||||
_BASE_URL = 'http://i.fod.fujitv.co.jp/'
|
||||
_BITRATE_MAP = {
|
||||
300: (320, 180),
|
||||
800: (640, 360),
|
||||
1200: (1280, 720),
|
||||
2000: (1280, 720),
|
||||
4000: (1920, 1080),
|
||||
}
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://fod.fujitv.co.jp/title/5d40/5d40110076',
|
||||
'url': 'https://fod.fujitv.co.jp/title/5d40/5d40810075',
|
||||
'info_dict': {
|
||||
'id': '5d40110076',
|
||||
'id': '5d40810075',
|
||||
'title': '5d40810075',
|
||||
'ext': 'mp4',
|
||||
'title': '#1318 『まる子、まぼろしの洋館を見る』の巻',
|
||||
'series': 'ちびまる子ちゃん',
|
||||
'series_id': '5d40',
|
||||
'description': 'md5:b3f51dbfdda162ac4f789e0ff4d65750',
|
||||
'thumbnail': 'https://i.fod.fujitv.co.jp/img/program/5d40/episode/5d40110076_a.jpg',
|
||||
'format_id': '4000',
|
||||
'thumbnail': 'http://i.fod.fujitv.co.jp/pc/image/wbtn/wbtn_5d40810075.jpg'
|
||||
},
|
||||
'skip': 'Expires after a week'
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
series_id, video_id = self._match_valid_url(url).groups()
|
||||
self._request_webpage(HEADRequest(url), video_id)
|
||||
json_info = {}
|
||||
token = self._get_cookies(url).get('CT')
|
||||
if token:
|
||||
json_info = self._download_json('https://fod-sp.fujitv.co.jp/apps/api/episode/detail/?ep_id=%s&is_premium=false' % video_id, video_id, headers={'x-authorization': f'Bearer {token.value}'}, fatal=False)
|
||||
else:
|
||||
self.report_warning(f'The token cookie is needed to extract video metadata. {self._LOGIN_HINTS["cookies"]}')
|
||||
formats, subtitles = [], {}
|
||||
src_json = self._download_json(f'{self._BASE_URL}abrjson_v2/tv_android/{video_id}', video_id)
|
||||
for src in src_json['video_selector']:
|
||||
if not src.get('url'):
|
||||
continue
|
||||
fmt, subs = self._extract_m3u8_formats_and_subtitles(src['url'], video_id, 'mp4')
|
||||
formats.extend(fmt)
|
||||
subtitles = self._merge_subtitles(subtitles, subs)
|
||||
self._sort_formats(formats, ['tbr'])
|
||||
video_id = self._match_id(url)
|
||||
formats = self._extract_m3u8_formats(
|
||||
self._BASE_URL + 'abr/tv_android/%s.m3u8' % video_id, video_id, 'mp4')
|
||||
for f in formats:
|
||||
wh = self._BITRATE_MAP.get(f.get('tbr'))
|
||||
if wh:
|
||||
f.update({
|
||||
'width': wh[0],
|
||||
'height': wh[1],
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': json_info.get('ep_title'),
|
||||
'series': json_info.get('lu_title'),
|
||||
'series_id': series_id,
|
||||
'description': json_info.get('ep_description'),
|
||||
'title': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'thumbnail': f'{self._BASE_URL}img/program/{series_id}/episode/{video_id}_a.jpg',
|
||||
'thumbnail': self._BASE_URL + 'pc/image/wbtn/wbtn_%s.jpg' % video_id,
|
||||
}
|
||||
|
||||
@@ -140,7 +140,6 @@ from .medialaan import MedialaanIE
|
||||
from .simplecast import SimplecastIE
|
||||
from .wimtv import WimTVIE
|
||||
from .tvopengr import TVOpenGrEmbedIE
|
||||
from .ertgr import ERTWebtvEmbedIE
|
||||
from .tvp import TVPEmbedIE
|
||||
from .blogger import BloggerIE
|
||||
from .mainstreaming import MainStreamingIE
|
||||
@@ -1924,15 +1923,6 @@ class GenericIE(InfoExtractor):
|
||||
},
|
||||
}]
|
||||
},
|
||||
{
|
||||
'url': 'https://www.ertnews.gr/video/manolis-goyalles-o-anthropos-piso-apo-ti-diadiktyaki-vasilopita/',
|
||||
'info_dict': {
|
||||
'id': '2022/tv/news-themata-ianouarios/20220114-apotis6-gouales-pita.mp4',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:df64f5b61c06d0e9556c0cdd5cf14464',
|
||||
'thumbnail': 'https://www.ert.gr/themata/photos/2021/20220114-apotis6-gouales-pita.jpg',
|
||||
},
|
||||
},
|
||||
{
|
||||
# ThePlatform embedded with whitespaces in URLs
|
||||
'url': 'http://www.golfchannel.com/topics/shows/golftalkcentral.htm',
|
||||
@@ -3703,13 +3693,6 @@ class GenericIE(InfoExtractor):
|
||||
if tvopengr_urls:
|
||||
return self.playlist_from_matches(tvopengr_urls, video_id, video_title, ie=TVOpenGrEmbedIE.ie_key())
|
||||
|
||||
# Look for ert.gr webtv embeds
|
||||
ertwebtv_urls = list(ERTWebtvEmbedIE._extract_urls(webpage))
|
||||
if len(ertwebtv_urls) == 1:
|
||||
return self.url_result(self._proto_relative_url(ertwebtv_urls[0]), video_title=video_title, url_transparent=True)
|
||||
elif ertwebtv_urls:
|
||||
return self.playlist_from_matches(ertwebtv_urls, video_id, video_title, ie=ERTWebtvEmbedIE.ie_key())
|
||||
|
||||
tvp_urls = TVPEmbedIE._extract_urls(webpage)
|
||||
if tvp_urls:
|
||||
return self.playlist_from_matches(tvp_urls, video_id, video_title, ie=TVPEmbedIE.ie_key())
|
||||
@@ -3815,16 +3798,13 @@ class GenericIE(InfoExtractor):
|
||||
|
||||
# Looking for http://schema.org/VideoObject
|
||||
json_ld = self._search_json_ld(webpage, video_id, default={})
|
||||
if json_ld.get('url') not in (url, None):
|
||||
if json_ld.get('url'):
|
||||
self.report_detected('JSON LD')
|
||||
if determine_ext(json_ld['url']) == 'm3u8':
|
||||
if determine_ext(json_ld.get('url')) == 'm3u8':
|
||||
json_ld['formats'], json_ld['subtitles'] = self._extract_m3u8_formats_and_subtitles(
|
||||
json_ld['url'], video_id, 'mp4')
|
||||
json_ld.pop('url')
|
||||
self._sort_formats(json_ld['formats'])
|
||||
else:
|
||||
json_ld['_type'] = 'url_transparent'
|
||||
json_ld['url'] = smuggle_url(json_ld['url'], {'force_videoid': video_id, 'to_generic': True})
|
||||
return merge_dicts(json_ld, info_dict)
|
||||
|
||||
def check_video(vurl):
|
||||
|
||||
@@ -12,7 +12,6 @@ from ..compat import (
|
||||
compat_str,
|
||||
)
|
||||
from ..utils import (
|
||||
HEADRequest,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
orderedSet,
|
||||
@@ -68,28 +67,11 @@ class GloboIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'globo:3607726',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://globoplay.globo.com/v/10248083/',
|
||||
'info_dict': {
|
||||
'id': '10248083',
|
||||
'ext': 'mp4',
|
||||
'title': 'Melhores momentos: Equador 1 x 1 Brasil pelas Eliminatórias da Copa do Mundo 2022',
|
||||
'duration': 530.964,
|
||||
'uploader': 'SporTV',
|
||||
'uploader_id': '698',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
self._request_webpage(
|
||||
HEADRequest('https://globo-ab.globo.com/v2/selected-alternatives?experiments=player-isolated-experiment-02&skipImpressions=true'),
|
||||
video_id, 'Getting cookies')
|
||||
|
||||
video = self._download_json(
|
||||
'http://api.globovideos.com/videos/%s/playlist' % video_id,
|
||||
video_id)['videos'][0]
|
||||
@@ -100,7 +82,7 @@ class GloboIE(InfoExtractor):
|
||||
|
||||
formats = []
|
||||
security = self._download_json(
|
||||
'https://playback.video.globo.com/v2/video-session', video_id, 'Downloading security hash for %s' % video_id,
|
||||
'https://playback.video.globo.com/v1/video-session', video_id, 'Downloading security hash for %s' % video_id,
|
||||
headers={'content-type': 'application/json'}, data=json.dumps({
|
||||
"player_type": "desktop",
|
||||
"video_id": video_id,
|
||||
@@ -110,9 +92,7 @@ class GloboIE(InfoExtractor):
|
||||
"tz": "-3.0:00"
|
||||
}).encode())
|
||||
|
||||
self._request_webpage(HEADRequest(security['sources'][0]['url_template']), video_id, 'Getting locksession cookie')
|
||||
|
||||
security_hash = security['sources'][0]['token']
|
||||
security_hash = security['source']['token']
|
||||
if not security_hash:
|
||||
message = security.get('message')
|
||||
if message:
|
||||
@@ -135,7 +115,7 @@ class GloboIE(InfoExtractor):
|
||||
md5_data = (received_md5 + padded_sign_time + '0xAC10FD').encode()
|
||||
signed_md5 = base64.urlsafe_b64encode(hashlib.md5(md5_data).digest()).decode().strip('=')
|
||||
signed_hash = hash_prefix + padded_sign_time + signed_md5
|
||||
source = security['sources'][0]['url_parts']
|
||||
source = security['source']['url_parts']
|
||||
resource_url = source['scheme'] + '://' + source['domain'] + source['path']
|
||||
signed_url = '%s?h=%s&k=html5&a=%s' % (resource_url, signed_hash, 'F' if video.get('subscriber_only') else 'A')
|
||||
|
||||
|
||||
@@ -7,7 +7,6 @@ import urllib.parse
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
extract_attributes,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_qs,
|
||||
@@ -178,38 +177,44 @@ class GlomexEmbedIE(GlomexBaseIE):
|
||||
|
||||
@classmethod
|
||||
def _extract_urls(cls, webpage, origin_url):
|
||||
VALID_SRC = rf'(?:https?:)?{cls._BASE_PLAYER_URL_RE}\?(?:(?!(?P=_q1)).)+'
|
||||
|
||||
# https://docs.glomex.com/publisher/video-player-integration/javascript-api/
|
||||
quot_re = r'["\']'
|
||||
EMBED_RE = r'''(?x)(?:
|
||||
<iframe[^>]+?src=(?P<_q1>%(quot_re)s)(?P<url>%(url_re)s)(?P=_q1)|
|
||||
<(?P<html_tag>glomex-player|div)(?:
|
||||
data-integration-id=(?P<_q2>%(quot_re)s)(?P<integration_html>(?:(?!(?P=_q2)).)+)(?P=_q2)|
|
||||
data-playlist-id=(?P<_q3>%(quot_re)s)(?P<id_html>(?:(?!(?P=_q3)).)+)(?P=_q3)|
|
||||
data-glomex-player=(?P<_q4>%(quot_re)s)(?P<glomex_player>true)(?P=_q4)|
|
||||
[^>]*?
|
||||
)+>|
|
||||
# naive parsing of inline scripts for hard-coded integration parameters
|
||||
<(?P<script_tag>script)[^<]*?>(?:
|
||||
(?P<_stjs1>dataset\.)?integrationId\s*(?(_stjs1)=|:)\s*
|
||||
(?P<_q5>%(quot_re)s)(?P<integration_js>(?:(?!(?P=_q5)).)+)(?P=_q5)\s*(?(_stjs1);|,)?|
|
||||
(?P<_stjs2>dataset\.)?playlistId\s*(?(_stjs2)=|:)\s*
|
||||
(?P<_q6>%(quot_re)s)(?P<id_js>(?:(?!(?P=_q6)).)+)(?P=_q6)\s*(?(_stjs2);|,)?|
|
||||
(?:\s|.)*?
|
||||
)+</script>
|
||||
)''' % {'quot_re': r'["\']', 'url_re': VALID_SRC}
|
||||
|
||||
regex = fr'''(?x)
|
||||
<iframe[^>]+?src=(?P<q>{quot_re})(?P<url>
|
||||
(?:https?:)?{cls._BASE_PLAYER_URL_RE}\?(?:(?!(?P=q)).)+
|
||||
)(?P=q)'''
|
||||
for mobj in re.finditer(regex, webpage):
|
||||
url = unescapeHTML(mobj.group('url'))
|
||||
if cls.suitable(url):
|
||||
for mobj in re.finditer(EMBED_RE, webpage):
|
||||
mdict = mobj.groupdict()
|
||||
if mdict.get('url'):
|
||||
url = unescapeHTML(mdict['url'])
|
||||
if not cls.suitable(url):
|
||||
continue
|
||||
yield cls._smuggle_origin_url(url, origin_url)
|
||||
|
||||
regex = fr'''(?x)
|
||||
<glomex-player [^>]+?>|
|
||||
<div[^>]* data-glomex-player=(?P<q>{quot_re})true(?P=q)[^>]*>'''
|
||||
for mobj in re.finditer(regex, webpage):
|
||||
attrs = extract_attributes(mobj.group(0))
|
||||
if attrs.get('data-integration-id') and attrs.get('data-playlist-id'):
|
||||
yield cls.build_player_url(attrs['data-playlist-id'], attrs['data-integration-id'], origin_url)
|
||||
|
||||
# naive parsing of inline scripts for hard-coded integration parameters
|
||||
regex = fr'''(?x)
|
||||
(?P<is_js>dataset\.)?%s\s*(?(is_js)=|:)\s*
|
||||
(?P<q>{quot_re})(?P<id>(?:(?!(?P=q)).)+)(?P=q)\s'''
|
||||
for mobj in re.finditer(r'(?x)<script[^<]*>.+?</script>', webpage):
|
||||
script = mobj.group(0)
|
||||
integration_id = re.search(regex % 'integrationId', script)
|
||||
if not integration_id:
|
||||
continue
|
||||
playlist_id = re.search(regex % 'playlistId', script)
|
||||
if playlist_id:
|
||||
yield cls.build_player_url(playlist_id, integration_id, origin_url)
|
||||
elif mdict.get('html_tag'):
|
||||
if mdict['html_tag'] == 'div' and not mdict.get('glomex_player'):
|
||||
continue
|
||||
if not mdict.get('video_id_html') or not mdict.get('integration_html'):
|
||||
continue
|
||||
yield cls.build_player_url(mdict['video_id_html'], mdict['integration_html'], origin_url)
|
||||
elif mdict.get('script_tag'):
|
||||
if not mdict.get('video_id_js') or not mdict.get('integration_js'):
|
||||
continue
|
||||
yield cls.build_player_url(mdict['video_id_js'], mdict['integration_js'], origin_url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, origin_url = self._unsmuggle_origin_url(url)
|
||||
|
||||
@@ -8,33 +8,36 @@ from .common import SearchInfoExtractor
|
||||
|
||||
class GoogleSearchIE(SearchInfoExtractor):
|
||||
IE_DESC = 'Google Video search'
|
||||
_MAX_RESULTS = 1000
|
||||
IE_NAME = 'video.google:search'
|
||||
_SEARCH_KEY = 'gvsearch'
|
||||
_TESTS = [{
|
||||
_WORKING = False
|
||||
_TEST = {
|
||||
'url': 'gvsearch15:python language',
|
||||
'info_dict': {
|
||||
'id': 'python language',
|
||||
'title': 'python language',
|
||||
},
|
||||
'playlist_count': 15,
|
||||
}]
|
||||
_PAGE_SIZE = 100
|
||||
}
|
||||
|
||||
def _search_results(self, query):
|
||||
for pagenum in itertools.count():
|
||||
webpage = self._download_webpage(
|
||||
'http://www.google.com/search', f'gvsearch:{query}',
|
||||
note=f'Downloading result page {pagenum + 1}',
|
||||
'http://www.google.com/search',
|
||||
'gvsearch:' + query,
|
||||
note='Downloading result page %s' % (pagenum + 1),
|
||||
query={
|
||||
'tbm': 'vid',
|
||||
'q': query,
|
||||
'start': pagenum * self._PAGE_SIZE,
|
||||
'num': self._PAGE_SIZE,
|
||||
'start': pagenum * 10,
|
||||
'hl': 'en',
|
||||
})
|
||||
|
||||
for url in re.findall(r'<div[^>]* class="dXiKIc"[^>]*><a href="([^"]+)"', webpage):
|
||||
yield self.url_result(url)
|
||||
for hit_idx, mobj in enumerate(re.finditer(
|
||||
r'<h3 class="r"><a href="([^"]+)"', webpage)):
|
||||
if re.search(f'id="vidthumb{hit_idx + 1}"', webpage):
|
||||
yield self.url_result(mobj.group(1))
|
||||
|
||||
if not re.search(r'id="pnnext"', webpage):
|
||||
return
|
||||
|
||||
@@ -64,7 +64,10 @@ class ImgGamingBaseIE(InfoExtractor):
|
||||
domain, media_type, media_id, playlist_id = self._match_valid_url(url).groups()
|
||||
|
||||
if playlist_id:
|
||||
if self._yes_playlist(playlist_id, media_id):
|
||||
if self.get_param('noplaylist'):
|
||||
self.to_screen('Downloading just video %s because of --no-playlist' % media_id)
|
||||
else:
|
||||
self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % playlist_id)
|
||||
media_type, media_id = 'playlist', playlist_id
|
||||
|
||||
if media_type == 'playlist':
|
||||
|
||||
@@ -18,7 +18,6 @@ from ..utils import (
|
||||
int_or_none,
|
||||
lowercase_escape,
|
||||
std_headers,
|
||||
str_or_none,
|
||||
str_to_int,
|
||||
traverse_obj,
|
||||
url_or_none,
|
||||
@@ -128,74 +127,6 @@ class InstagramBaseIE(InfoExtractor):
|
||||
'like_count': self._get_count(node, 'likes', 'preview_like'),
|
||||
}
|
||||
|
||||
def _extract_product_media(self, product_media):
|
||||
media_id = product_media.get('code') or product_media.get('id')
|
||||
vcodec = product_media.get('video_codec')
|
||||
dash_manifest_raw = product_media.get('video_dash_manifest')
|
||||
videos_list = product_media.get('video_versions')
|
||||
if not (dash_manifest_raw or videos_list):
|
||||
return {}
|
||||
|
||||
formats = [{
|
||||
'format_id': format.get('id'),
|
||||
'url': format.get('url'),
|
||||
'width': format.get('width'),
|
||||
'height': format.get('height'),
|
||||
'vcodec': vcodec,
|
||||
} for format in videos_list or []]
|
||||
if dash_manifest_raw:
|
||||
formats.extend(self._parse_mpd_formats(self._parse_xml(dash_manifest_raw, media_id), mpd_id='dash'))
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnails = [{
|
||||
'url': thumbnail.get('url'),
|
||||
'width': thumbnail.get('width'),
|
||||
'height': thumbnail.get('height')
|
||||
} for thumbnail in traverse_obj(product_media, ('image_versions2', 'candidates')) or []]
|
||||
return {
|
||||
'id': media_id,
|
||||
'duration': float_or_none(product_media.get('video_duration')),
|
||||
'formats': formats,
|
||||
'thumbnails': thumbnails
|
||||
}
|
||||
|
||||
def _extract_product(self, product_info):
|
||||
if isinstance(product_info, list):
|
||||
product_info = product_info[0]
|
||||
|
||||
user_info = product_info.get('user') or {}
|
||||
info_dict = {
|
||||
'id': product_info.get('code') or product_info.get('id'),
|
||||
'title': product_info.get('title') or f'Video by {user_info.get("username")}',
|
||||
'description': traverse_obj(product_info, ('caption', 'text'), expected_type=str_or_none),
|
||||
'timestamp': int_or_none(product_info.get('taken_at')),
|
||||
'channel': user_info.get('username'),
|
||||
'uploader': user_info.get('full_name'),
|
||||
'uploader_id': str_or_none(user_info.get('pk')),
|
||||
'view_count': int_or_none(product_info.get('view_count')),
|
||||
'like_count': int_or_none(product_info.get('like_count')),
|
||||
'comment_count': int_or_none(product_info.get('comment_count')),
|
||||
'http_headers': {
|
||||
'Referer': 'https://www.instagram.com/',
|
||||
}
|
||||
}
|
||||
carousel_media = product_info.get('carousel_media')
|
||||
if carousel_media:
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
**info_dict,
|
||||
'title': f'Post by {user_info.get("username")}',
|
||||
'entries': [{
|
||||
**info_dict,
|
||||
**self._extract_product_media(product_media),
|
||||
} for product_media in carousel_media],
|
||||
}
|
||||
|
||||
return {
|
||||
**info_dict,
|
||||
**self._extract_product_media(product_info)
|
||||
}
|
||||
|
||||
|
||||
class InstagramIOSIE(InfoExtractor):
|
||||
IE_DESC = 'IOS instagram:// URL'
|
||||
@@ -254,9 +185,8 @@ class InstagramIE(InstagramBaseIE):
|
||||
'duration': 0,
|
||||
'timestamp': 1371748545,
|
||||
'upload_date': '20130620',
|
||||
'uploader_id': '2815873',
|
||||
'uploader_id': 'naomipq',
|
||||
'uploader': 'B E A U T Y F O R A S H E S',
|
||||
'channel': 'naomipq',
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'comments': list,
|
||||
@@ -272,9 +202,8 @@ class InstagramIE(InstagramBaseIE):
|
||||
'duration': 0,
|
||||
'timestamp': 1453760977,
|
||||
'upload_date': '20160125',
|
||||
'uploader_id': '12246775',
|
||||
'uploader_id': 'britneyspears',
|
||||
'uploader': 'Britney Spears',
|
||||
'channel': 'britneyspears',
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'comments': list,
|
||||
@@ -320,9 +249,8 @@ class InstagramIE(InstagramBaseIE):
|
||||
'duration': 53.83,
|
||||
'timestamp': 1530032919,
|
||||
'upload_date': '20180626',
|
||||
'uploader_id': '25025320',
|
||||
'uploader_id': 'instagram',
|
||||
'uploader': 'Instagram',
|
||||
'channel': 'instagram',
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'comments': list,
|
||||
@@ -388,19 +316,16 @@ class InstagramIE(InstagramBaseIE):
|
||||
if not media:
|
||||
additional_data = self._parse_json(
|
||||
self._search_regex(
|
||||
r'window\.__additionalDataLoaded\s*\(\s*[^,]+,\s*({.+?})\s*\);',
|
||||
r'window\.__additionalDataLoaded\s*\(\s*[^,]+,\s*({.+?})\s*\)\s*;',
|
||||
webpage, 'additional data', default='{}'),
|
||||
video_id, fatal=False)
|
||||
product_item = traverse_obj(additional_data, ('items', 0), expected_type=dict)
|
||||
if product_item:
|
||||
return self._extract_product(product_item)
|
||||
media = traverse_obj(additional_data, ('graphql', 'shortcode_media'), 'shortcode_media', expected_type=dict) or {}
|
||||
|
||||
if not media and 'www.instagram.com/accounts/login' in urlh.geturl():
|
||||
self.raise_login_required('You need to log in to access this content')
|
||||
|
||||
username = traverse_obj(media, ('owner', 'username')) or self._search_regex(
|
||||
r'"owner"\s*:\s*{\s*"username"\s*:\s*"(.+?)"', webpage, 'username', fatal=False)
|
||||
uploader_id = traverse_obj(media, ('owner', 'username')) or self._search_regex(
|
||||
r'"owner"\s*:\s*{\s*"username"\s*:\s*"(.+?)"', webpage, 'uploader id', fatal=False)
|
||||
|
||||
description = (
|
||||
traverse_obj(media, ('edge_media_to_caption', 'edges', 0, 'node', 'text'), expected_type=str)
|
||||
@@ -417,7 +342,7 @@ class InstagramIE(InstagramBaseIE):
|
||||
if nodes:
|
||||
return self.playlist_result(
|
||||
self._extract_nodes(nodes, True), video_id,
|
||||
format_field(username, template='Post by %s'), description)
|
||||
format_field(uploader_id, template='Post by %s'), description)
|
||||
|
||||
video_url = self._og_search_video_url(webpage, secure=False)
|
||||
|
||||
@@ -453,13 +378,12 @@ class InstagramIE(InstagramBaseIE):
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': media.get('title') or 'Video by %s' % username,
|
||||
'title': media.get('title') or 'Video by %s' % uploader_id,
|
||||
'description': description,
|
||||
'duration': float_or_none(media.get('video_duration')),
|
||||
'timestamp': traverse_obj(media, 'taken_at_timestamp', 'date', expected_type=int_or_none),
|
||||
'uploader_id': traverse_obj(media, ('owner', 'id')),
|
||||
'uploader_id': uploader_id,
|
||||
'uploader': traverse_obj(media, ('owner', 'full_name')),
|
||||
'channel': username,
|
||||
'like_count': self._get_count(media, 'likes', 'preview_like') or str_to_int(self._search_regex(
|
||||
r'data-log-event="likeCountClick"[^>]*>[^\d]*([\d,\.]+)', webpage, 'like count', fatal=False)),
|
||||
'comment_count': self._get_count(media, 'comments', 'preview_comment', 'to_comment', 'to_parent_comment'),
|
||||
@@ -654,6 +578,7 @@ class InstagramStoryIE(InstagramBaseIE):
|
||||
'X-ASBD-ID': 198387,
|
||||
'X-IG-WWW-Claim': 0,
|
||||
})['reels']
|
||||
entites = []
|
||||
|
||||
full_name = traverse_obj(videos, ('user', 'full_name'))
|
||||
|
||||
@@ -667,10 +592,41 @@ class InstagramStoryIE(InstagramBaseIE):
|
||||
username = traverse_obj(user_info, ('user', 'username')) or username
|
||||
full_name = traverse_obj(user_info, ('user', 'full_name')) or full_name
|
||||
|
||||
highlights = traverse_obj(videos, (f'highlight:{story_id}', 'items'), (str(user_id), 'items'))
|
||||
return self.playlist_result([{
|
||||
**self._extract_product(highlight),
|
||||
'title': f'Story by {username}',
|
||||
'uploader': full_name,
|
||||
'uploader_id': user_id,
|
||||
} for highlight in highlights], playlist_id=story_id, playlist_title=highlight_title)
|
||||
videos = traverse_obj(videos, (f'highlight:{story_id}', 'items'), (str(user_id), 'items'))
|
||||
for video_info in videos:
|
||||
formats = []
|
||||
if isinstance(video_info, list):
|
||||
video_info = video_info[0]
|
||||
vcodec = video_info.get('video_codec')
|
||||
dash_manifest_raw = video_info.get('video_dash_manifest')
|
||||
videos_list = video_info.get('video_versions')
|
||||
if not (dash_manifest_raw or videos_list):
|
||||
continue
|
||||
for format in videos_list:
|
||||
formats.append({
|
||||
'url': format.get('url'),
|
||||
'width': format.get('width'),
|
||||
'height': format.get('height'),
|
||||
'vcodec': vcodec,
|
||||
})
|
||||
if dash_manifest_raw:
|
||||
formats.extend(self._parse_mpd_formats(self._parse_xml(dash_manifest_raw, story_id), mpd_id='dash'))
|
||||
self._sort_formats(formats)
|
||||
thumbnails = [{
|
||||
'url': thumbnail.get('url'),
|
||||
'width': thumbnail.get('width'),
|
||||
'height': thumbnail.get('height')
|
||||
} for thumbnail in traverse_obj(video_info, ('image_versions2', 'candidates')) or []]
|
||||
entites.append({
|
||||
'id': video_info.get('id'),
|
||||
'title': f'Story by {username}',
|
||||
'timestamp': int_or_none(video_info.get('taken_at')),
|
||||
'channel': username,
|
||||
'uploader': full_name,
|
||||
'duration': float_or_none(video_info.get('video_duration')),
|
||||
'uploader_id': user_id,
|
||||
'thumbnails': thumbnails,
|
||||
'formats': formats,
|
||||
})
|
||||
|
||||
return self.playlist_result(entites, playlist_id=story_id, playlist_title=highlight_title)
|
||||
|
||||
@@ -10,7 +10,6 @@ from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_parse_urlencode,
|
||||
compat_urllib_parse_unquote
|
||||
)
|
||||
from .openload import PhantomJSwrapper
|
||||
from ..utils import (
|
||||
@@ -18,7 +17,6 @@ from ..utils import (
|
||||
decode_packed_codes,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
format_field,
|
||||
get_element_by_id,
|
||||
get_element_by_attribute,
|
||||
int_or_none,
|
||||
@@ -433,10 +431,6 @@ class IqIE(InfoExtractor):
|
||||
'format': '500',
|
||||
},
|
||||
'expected_warnings': ['format is restricted']
|
||||
}, {
|
||||
# VIP-restricted video
|
||||
'url': 'https://www.iq.com/play/mermaid-in-the-fog-2021-gbdpx13bs4',
|
||||
'only_matching': True
|
||||
}]
|
||||
_BID_TAGS = {
|
||||
'100': '240P',
|
||||
@@ -463,8 +457,8 @@ class IqIE(InfoExtractor):
|
||||
_DASH_JS = '''
|
||||
console.log(page.evaluate(function() {
|
||||
var tvid = "%(tvid)s"; var vid = "%(vid)s"; var src = "%(src)s";
|
||||
var uid = "%(uid)s"; var dfp = "%(dfp)s"; var mode = "%(mode)s"; var lang = "%(lang)s";
|
||||
var bid_list = %(bid_list)s; var ut_list = %(ut_list)s; var tm = new Date().getTime();
|
||||
var dfp = "%(dfp)s"; var mode = "%(mode)s"; var lang = "%(lang)s"; var bid_list = %(bid_list)s;
|
||||
var tm = new Date().getTime();
|
||||
var cmd5x_func = %(cmd5x_func)s; var cmd5x_exporter = {}; cmd5x_func({}, cmd5x_exporter, {}); var cmd5x = cmd5x_exporter.cmd5x;
|
||||
var authKey = cmd5x(cmd5x('') + tm + '' + tvid);
|
||||
var k_uid = Array.apply(null, Array(32)).map(function() {return Math.floor(Math.random() * 15).toString(16)}).join('');
|
||||
@@ -478,7 +472,7 @@ class IqIE(InfoExtractor):
|
||||
'src': src,
|
||||
'vt': 0,
|
||||
'rs': 1,
|
||||
'uid': uid,
|
||||
'uid': 0,
|
||||
'ori': 'pcw',
|
||||
'ps': 1,
|
||||
'k_uid': k_uid,
|
||||
@@ -515,14 +509,12 @@ class IqIE(InfoExtractor):
|
||||
'version': '10.0',
|
||||
'dfp': dfp
|
||||
}),
|
||||
'ut': 0, // TODO: Set ut param for VIP members
|
||||
};
|
||||
var enc_params = [];
|
||||
for (var prop in query) {
|
||||
enc_params.push(encodeURIComponent(prop) + '=' + encodeURIComponent(query[prop]));
|
||||
}
|
||||
ut_list.forEach(function(ut) {
|
||||
enc_params.push('ut=' + ut);
|
||||
})
|
||||
var dash_path = '/dash?' + enc_params.join('&'); dash_path += '&vf=' + cmd5x(dash_path);
|
||||
dash_paths[bid] = dash_path;
|
||||
});
|
||||
@@ -579,37 +571,17 @@ class IqIE(InfoExtractor):
|
||||
page_data = next_props['initialState']['play']
|
||||
video_info = page_data['curVideoInfo']
|
||||
|
||||
uid = traverse_obj(
|
||||
self._parse_json(
|
||||
self._get_cookie('I00002', '{}'), video_id, transform_source=compat_urllib_parse_unquote, fatal=False),
|
||||
('data', 'uid'), default=0)
|
||||
|
||||
if uid:
|
||||
vip_data = self._download_json(
|
||||
'https://pcw-api.iq.com/api/vtype', video_id, note='Downloading VIP data', errnote='Unable to download VIP data', query={
|
||||
'batch': 1,
|
||||
'platformId': 3,
|
||||
'modeCode': self._get_cookie('mod', 'intl'),
|
||||
'langCode': self._get_cookie('lang', 'en_us'),
|
||||
'deviceId': self._get_cookie('QC005', '')
|
||||
}, fatal=False)
|
||||
ut_list = traverse_obj(vip_data, ('data', 'all_vip', ..., 'vipType'), expected_type=str_or_none, default=[])
|
||||
else:
|
||||
ut_list = ['0']
|
||||
|
||||
# bid 0 as an initial format checker
|
||||
dash_paths = self._parse_json(PhantomJSwrapper(self).get(
|
||||
url, html='<!DOCTYPE html>', video_id=video_id, note2='Executing signature code', jscode=self._DASH_JS % {
|
||||
'tvid': video_info['tvId'],
|
||||
'vid': video_info['vid'],
|
||||
'src': traverse_obj(next_props, ('initialProps', 'pageProps', 'ptid'),
|
||||
expected_type=str, default='04022001010011000000'),
|
||||
'uid': uid,
|
||||
expected_type=str, default='01010031010018000000'),
|
||||
'dfp': self._get_cookie('dfp', ''),
|
||||
'mode': self._get_cookie('mod', 'intl'),
|
||||
'lang': self._get_cookie('lang', 'en_us'),
|
||||
'bid_list': '[' + ','.join(['0', *self._BID_TAGS.keys()]) + ']',
|
||||
'ut_list': '[' + ','.join(ut_list) + ']',
|
||||
'cmd5x_func': self._extract_cmd5x_function(webpage, video_id),
|
||||
})[1].strip(), video_id)
|
||||
|
||||
@@ -618,10 +590,9 @@ class IqIE(InfoExtractor):
|
||||
urljoin('https://cache-video.iq.com', dash_paths['0']), video_id,
|
||||
note='Downloading initial video format info', errnote='Unable to download initial video format info')['data']
|
||||
|
||||
preview_time = traverse_obj(
|
||||
initial_format_data, ('boss_ts', (None, 'data'), ('previewTime', 'rtime')), expected_type=float_or_none, get_all=False)
|
||||
if traverse_obj(initial_format_data, ('boss_ts', 'data', 'prv'), expected_type=int_or_none):
|
||||
self.report_warning('This preview video is limited%s' % format_field(preview_time, template='to %s seconds'))
|
||||
preview_time = traverse_obj(initial_format_data, ('boss_ts', 'data', 'previewTime'), expected_type=float_or_none)
|
||||
if preview_time:
|
||||
self.report_warning(f'This preview video is limited to {preview_time} seconds')
|
||||
|
||||
# TODO: Extract audio-only formats
|
||||
for bid in set(traverse_obj(initial_format_data, ('program', 'video', ..., 'bid'), expected_type=str_or_none, default=[])):
|
||||
|
||||
@@ -7,7 +7,6 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
traverse_obj,
|
||||
smuggle_url,
|
||||
unsmuggle_url,
|
||||
)
|
||||
@@ -56,6 +55,9 @@ class LiTVIE(InfoExtractor):
|
||||
episode_title = program_info['title']
|
||||
content_id = season_list['contentId']
|
||||
|
||||
if prompt:
|
||||
self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (content_id, video_id))
|
||||
|
||||
all_episodes = [
|
||||
self.url_result(smuggle_url(
|
||||
self._URL_TEMPLATE % (program_info['contentType'], episode['contentId']),
|
||||
@@ -65,10 +67,16 @@ class LiTVIE(InfoExtractor):
|
||||
return self.playlist_result(all_episodes, content_id, episode_title)
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
url, data = unsmuggle_url(url, {})
|
||||
|
||||
video_id = self._match_id(url)
|
||||
|
||||
noplaylist = self.get_param('noplaylist')
|
||||
noplaylist_prompt = True
|
||||
if 'force_noplaylist' in data:
|
||||
noplaylist = data['force_noplaylist']
|
||||
noplaylist_prompt = False
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
program_info = self._parse_json(self._search_regex(
|
||||
@@ -76,9 +84,14 @@ class LiTVIE(InfoExtractor):
|
||||
video_id)
|
||||
|
||||
season_list = list(program_info.get('seasonList', {}).values())
|
||||
playlist_id = traverse_obj(season_list, 0, 'contentId')
|
||||
if self._yes_playlist(playlist_id, video_id, smuggled_data):
|
||||
return self._extract_playlist(season_list[0], video_id, program_info)
|
||||
if season_list:
|
||||
if not noplaylist:
|
||||
return self._extract_playlist(
|
||||
season_list[0], video_id, program_info,
|
||||
prompt=noplaylist_prompt)
|
||||
|
||||
if noplaylist_prompt:
|
||||
self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
|
||||
|
||||
# In browsers `getMainUrl` request is always issued. Usually this
|
||||
# endpoint gives the same result as the data embedded in the webpage.
|
||||
|
||||
@@ -7,7 +7,6 @@ import re
|
||||
from .theplatform import ThePlatformBaseIE
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
GeoRestrictedError,
|
||||
int_or_none,
|
||||
OnDemandPagedList,
|
||||
parse_qs,
|
||||
@@ -38,7 +37,7 @@ class MediasetIE(ThePlatformBaseIE):
|
||||
'id': 'F310575103000102',
|
||||
'ext': 'mp4',
|
||||
'title': 'Episodio 1',
|
||||
'description': 'md5:e8017b7d7194e9bfb75299c2b8d81e02',
|
||||
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 2682.0,
|
||||
'upload_date': '20210530',
|
||||
@@ -46,11 +45,6 @@ class MediasetIE(ThePlatformBaseIE):
|
||||
'timestamp': 1622413946,
|
||||
'uploader': 'Canale 5',
|
||||
'uploader_id': 'C5',
|
||||
'season': 'Season 1',
|
||||
'episode': 'Episode 1',
|
||||
'season_number': 1,
|
||||
'episode_number': 1,
|
||||
'chapters': [{'start_time': 0.0, 'end_time': 439.88}, {'start_time': 439.88, 'end_time': 1685.84}, {'start_time': 1685.84, 'end_time': 2682.0}],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.mediasetplay.mediaset.it/video/matrix/puntata-del-25-maggio_F309013801000501',
|
||||
@@ -59,7 +53,7 @@ class MediasetIE(ThePlatformBaseIE):
|
||||
'id': 'F309013801000501',
|
||||
'ext': 'mp4',
|
||||
'title': 'Puntata del 25 maggio',
|
||||
'description': 'md5:ee2e456e3eb1dba5e814596655bb5296',
|
||||
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 6565.008,
|
||||
'upload_date': '20200903',
|
||||
@@ -67,11 +61,6 @@ class MediasetIE(ThePlatformBaseIE):
|
||||
'timestamp': 1599172492,
|
||||
'uploader': 'Canale 5',
|
||||
'uploader_id': 'C5',
|
||||
'season': 'Season 5',
|
||||
'episode': 'Episode 5',
|
||||
'season_number': 5,
|
||||
'episode_number': 5,
|
||||
'chapters': [{'start_time': 0.0, 'end_time': 3409.08}, {'start_time': 3409.08, 'end_time': 6565.008}],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.mediasetplay.mediaset.it/video/cameracafe5/episodio-69-pezzo-di-luna_F303843101017801',
|
||||
@@ -80,7 +69,7 @@ class MediasetIE(ThePlatformBaseIE):
|
||||
'id': 'F303843101017801',
|
||||
'ext': 'mp4',
|
||||
'title': 'Episodio 69 - Pezzo di luna',
|
||||
'description': 'md5:7c32c8ec4118b72588b9412f11353f73',
|
||||
'description': '',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 263.008,
|
||||
'upload_date': '20200902',
|
||||
@@ -88,11 +77,6 @@ class MediasetIE(ThePlatformBaseIE):
|
||||
'timestamp': 1599064700,
|
||||
'uploader': 'Italia 1',
|
||||
'uploader_id': 'I1',
|
||||
'season': 'Season 5',
|
||||
'episode': 'Episode 178',
|
||||
'season_number': 5,
|
||||
'episode_number': 178,
|
||||
'chapters': [{'start_time': 0.0, 'end_time': 261.88}, {'start_time': 261.88, 'end_time': 263.008}],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.mediasetplay.mediaset.it/video/cameracafe5/episodio-51-tu-chi-sei_F303843107000601',
|
||||
@@ -101,7 +85,7 @@ class MediasetIE(ThePlatformBaseIE):
|
||||
'id': 'F303843107000601',
|
||||
'ext': 'mp4',
|
||||
'title': 'Episodio 51 - Tu chi sei?',
|
||||
'description': 'md5:42ef006e56824cc31787a547590923f4',
|
||||
'description': '',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 367.021,
|
||||
'upload_date': '20200902',
|
||||
@@ -109,28 +93,6 @@ class MediasetIE(ThePlatformBaseIE):
|
||||
'timestamp': 1599069817,
|
||||
'uploader': 'Italia 1',
|
||||
'uploader_id': 'I1',
|
||||
'season': 'Season 5',
|
||||
'episode': 'Episode 6',
|
||||
'season_number': 5,
|
||||
'episode_number': 6,
|
||||
'chapters': [{'start_time': 0.0, 'end_time': 358.68}, {'start_time': 358.68, 'end_time': 367.021}],
|
||||
},
|
||||
}, {
|
||||
# movie
|
||||
'url': 'https://www.mediasetplay.mediaset.it/movie/selvaggi/selvaggi_F006474501000101',
|
||||
'md5': '720440187a2ae26af8148eb9e6b901ed',
|
||||
'info_dict': {
|
||||
'id': 'F006474501000101',
|
||||
'ext': 'mp4',
|
||||
'title': 'Selvaggi',
|
||||
'description': 'md5:cfdedbbfdd12d4d0e5dcf1fa1b75284f',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 5233.01,
|
||||
'upload_date': '20210729',
|
||||
'timestamp': 1627594716,
|
||||
'uploader': 'Cine34',
|
||||
'uploader_id': 'B6',
|
||||
'chapters': [{'start_time': 0.0, 'end_time': 1938.56}, {'start_time': 1938.56, 'end_time': 5233.01}],
|
||||
},
|
||||
}, {
|
||||
# clip
|
||||
@@ -198,22 +160,6 @@ class MediasetIE(ThePlatformBaseIE):
|
||||
video.attrib['src'] = re.sub(r'(https?://vod05)t(-mediaset-it\.akamaized\.net/.+?.mpd)\?.+', r'\1\2', video.attrib['src'])
|
||||
return super(MediasetIE, self)._parse_smil_formats(smil, smil_url, video_id, namespace, f4m_params, transform_rtmp_url)
|
||||
|
||||
def _check_drm_formats(self, tp_formats, video_id):
|
||||
has_nondrm, drm_manifest = False, ''
|
||||
for f in tp_formats:
|
||||
if '_sampleaes/' in (f.get('manifest_url') or ''):
|
||||
drm_manifest = drm_manifest or f['manifest_url']
|
||||
f['has_drm'] = True
|
||||
if not f.get('has_drm') and f.get('manifest_url'):
|
||||
has_nondrm = True
|
||||
|
||||
nodrm_manifest = re.sub(r'_sampleaes/(\w+)_fp_', r'/\1_no_', drm_manifest)
|
||||
if has_nondrm or nodrm_manifest == drm_manifest:
|
||||
return
|
||||
|
||||
tp_formats.extend(self._extract_m3u8_formats(
|
||||
nodrm_manifest, video_id, m3u8_id='hls', fatal=False) or [])
|
||||
|
||||
def _real_extract(self, url):
|
||||
guid = self._match_id(url)
|
||||
tp_path = 'PR1GhC/media/guid/2702976343/' + guid
|
||||
@@ -221,10 +167,10 @@ class MediasetIE(ThePlatformBaseIE):
|
||||
|
||||
formats = []
|
||||
subtitles = {}
|
||||
first_e = geo_e = None
|
||||
first_e = None
|
||||
asset_type = 'geoNo:HD,browser,geoIT|geoNo:HD,geoIT|geoNo:SD,browser,geoIT|geoNo:SD,geoIT|geoNo|HD|SD'
|
||||
# TODO: fixup ISM+none manifest URLs
|
||||
for f in ('MPEG4', 'M3U'):
|
||||
for f in ('MPEG4', 'MPEG-DASH+none', 'M3U+none'):
|
||||
try:
|
||||
tp_formats, tp_subtitles = self._extract_theplatform_smil(
|
||||
update_url_query('http://link.theplatform.%s/s/%s' % (self._TP_TLD, tp_path), {
|
||||
@@ -233,19 +179,13 @@ class MediasetIE(ThePlatformBaseIE):
|
||||
'assetTypes': asset_type,
|
||||
}), guid, 'Downloading %s SMIL data' % (f.split('+')[0]))
|
||||
except ExtractorError as e:
|
||||
if not geo_e and isinstance(e, GeoRestrictedError):
|
||||
geo_e = e
|
||||
if not first_e:
|
||||
first_e = e
|
||||
continue
|
||||
self._check_drm_formats(tp_formats, guid)
|
||||
break
|
||||
formats.extend(tp_formats)
|
||||
subtitles = self._merge_subtitles(subtitles, tp_subtitles)
|
||||
|
||||
# check for errors and report them
|
||||
if (first_e or geo_e) and not formats:
|
||||
raise geo_e or first_e
|
||||
|
||||
if first_e and not formats:
|
||||
raise first_e
|
||||
self._sort_formats(formats)
|
||||
|
||||
feed_data = self._download_json(
|
||||
@@ -261,22 +201,15 @@ class MediasetIE(ThePlatformBaseIE):
|
||||
break
|
||||
|
||||
info.update({
|
||||
'description': info.get('description') or feed_data.get('description') or feed_data.get('longDescription'),
|
||||
'episode_number': int_or_none(feed_data.get('tvSeasonEpisodeNumber')),
|
||||
'season_number': int_or_none(feed_data.get('tvSeasonNumber')),
|
||||
'series': feed_data.get('mediasetprogram$brandTitle'),
|
||||
'uploader': publish_info.get('description'),
|
||||
'uploader_id': publish_info.get('channel'),
|
||||
'view_count': int_or_none(feed_data.get('mediasetprogram$numberOfViews')),
|
||||
'thumbnail': thumbnail,
|
||||
})
|
||||
|
||||
if feed_data.get('programType') == 'episode':
|
||||
info.update({
|
||||
'episode_number': int_or_none(
|
||||
feed_data.get('tvSeasonEpisodeNumber')),
|
||||
'season_number': int_or_none(
|
||||
feed_data.get('tvSeasonNumber')),
|
||||
'series': feed_data.get('mediasetprogram$brandTitle'),
|
||||
})
|
||||
|
||||
info.update({
|
||||
'id': guid,
|
||||
'formats': formats,
|
||||
@@ -291,29 +224,37 @@ class MediasetShowIE(MediasetIE):
|
||||
https?://
|
||||
(?:(?:www|static3)\.)?mediasetplay\.mediaset\.it/
|
||||
(?:
|
||||
(?:fiction|programmi-tv|serie-tv|kids)/(?:.+?/)?
|
||||
(?:[a-z-]+)_SE(?P<id>\d{12})
|
||||
(?:fiction|programmi-tv|serie-tv)/(?:.+?/)?
|
||||
(?:[a-z]+)_SE(?P<id>\d{12})
|
||||
(?:,ST(?P<st>\d{12}))?
|
||||
(?:,sb(?P<sb>\d{9}))?$
|
||||
)
|
||||
)
|
||||
'''
|
||||
_TESTS = [{
|
||||
# TV Show webpage (general webpage)
|
||||
'url': 'https://www.mediasetplay.mediaset.it/programmi-tv/leiene/leiene_SE000000000061',
|
||||
# TV Show webpage (with a single playlist)
|
||||
'url': 'https://www.mediasetplay.mediaset.it/serie-tv/fireforce/episodi_SE000000001556',
|
||||
'info_dict': {
|
||||
'id': '000000000061',
|
||||
'title': 'Le Iene',
|
||||
'id': '000000001556',
|
||||
'title': 'Fire Force',
|
||||
},
|
||||
'playlist_mincount': 7,
|
||||
'playlist_count': 1,
|
||||
}, {
|
||||
# TV Show webpage (specific season)
|
||||
# TV Show webpage (with multiple playlists)
|
||||
'url': 'https://www.mediasetplay.mediaset.it/programmi-tv/leiene/leiene_SE000000000061,ST000000002763',
|
||||
'info_dict': {
|
||||
'id': '000000002763',
|
||||
'title': 'Le Iene',
|
||||
},
|
||||
'playlist_mincount': 7,
|
||||
'playlist_count': 7,
|
||||
}, {
|
||||
# TV Show specific playlist (single page)
|
||||
'url': 'https://www.mediasetplay.mediaset.it/serie-tv/fireforce/episodi_SE000000001556,ST000000002738,sb100013107',
|
||||
'info_dict': {
|
||||
'id': '100013107',
|
||||
'title': 'Episodi',
|
||||
},
|
||||
'playlist_count': 4,
|
||||
}, {
|
||||
# TV Show specific playlist (with multiple pages)
|
||||
'url': 'https://www.mediasetplay.mediaset.it/programmi-tv/leiene/iservizi_SE000000000061,ST000000002763,sb100013375',
|
||||
@@ -321,7 +262,7 @@ class MediasetShowIE(MediasetIE):
|
||||
'id': '100013375',
|
||||
'title': 'I servizi',
|
||||
},
|
||||
'playlist_mincount': 50,
|
||||
'playlist_count': 53,
|
||||
}]
|
||||
|
||||
_BY_SUBBRAND = 'https://feed.entertainment.tv.theplatform.eu/f/PR1GhC/mediaset-prod-all-programs-v2?byCustomValue={subBrandId}{%s}&sort=:publishInfo_lastPublished|desc,tvSeasonEpisodeNumber|desc&range=%d-%d'
|
||||
@@ -340,7 +281,7 @@ class MediasetShowIE(MediasetIE):
|
||||
def _real_extract(self, url):
|
||||
playlist_id, st, sb = self._match_valid_url(url).group('id', 'st', 'sb')
|
||||
if not sb:
|
||||
page = self._download_webpage(url, st or playlist_id)
|
||||
page = self._download_webpage(url, playlist_id)
|
||||
entries = [self.url_result(urljoin('https://www.mediasetplay.mediaset.it', url))
|
||||
for url in re.findall(r'href="([^<>=]+SE\d{12},ST\d{12},sb\d{9})">[^<]+<', page)]
|
||||
title = (self._html_search_regex(r'(?s)<h1[^>]*>(.+?)</h1>', page, 'title', default=None)
|
||||
|
||||
@@ -12,8 +12,6 @@ from ..utils import (
|
||||
update_url_query,
|
||||
random_uuidv4,
|
||||
try_get,
|
||||
float_or_none,
|
||||
dict_get
|
||||
)
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
@@ -24,18 +22,9 @@ class MildomBaseIE(InfoExtractor):
|
||||
_GUEST_ID = None
|
||||
_DISPATCHER_CONFIG = None
|
||||
|
||||
def _call_api(self, url, video_id, query=None, note='Downloading JSON metadata', init=False):
|
||||
query = query or {}
|
||||
if query:
|
||||
query['__platform'] = 'web'
|
||||
def _call_api(self, url, video_id, query={}, note='Downloading JSON metadata', init=False):
|
||||
url = update_url_query(url, self._common_queries(query, init=init))
|
||||
content = self._download_json(url, video_id, note=note)
|
||||
if content['code'] == 0:
|
||||
return content['body']
|
||||
else:
|
||||
self.raise_no_formats(
|
||||
f'Video not found or premium content. {content["code"]} - {content["message"]}',
|
||||
expected=True)
|
||||
return self._download_json(url, video_id, note=note)['body']
|
||||
|
||||
def _common_queries(self, query={}, init=False):
|
||||
dc = self._fetch_dispatcher_config()
|
||||
@@ -159,7 +148,6 @@ class MildomIE(MildomBaseIE):
|
||||
'id': result_video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'timestamp': float_or_none(enterstudio.get('live_start_ms'), scale=1000),
|
||||
'uploader': uploader,
|
||||
'uploader_id': video_id,
|
||||
'formats': formats,
|
||||
@@ -170,50 +158,7 @@ class MildomIE(MildomBaseIE):
|
||||
class MildomVodIE(MildomBaseIE):
|
||||
IE_NAME = 'mildom:vod'
|
||||
IE_DESC = 'Download a VOD in Mildom'
|
||||
_VALID_URL = r'https?://(?:(?:www|m)\.)mildom\.com/playback/(?P<user_id>\d+)/(?P<id>(?P=user_id)-[a-zA-Z0-9]+-?[0-9]*)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.mildom.com/playback/10882672/10882672-1597662269',
|
||||
'info_dict': {
|
||||
'id': '10882672-1597662269',
|
||||
'ext': 'mp4',
|
||||
'title': '始めてのミルダム配信じゃぃ!',
|
||||
'thumbnail': r're:^https?://.*\.(png|jpg)$',
|
||||
'upload_date': '20200817',
|
||||
'duration': 4138.37,
|
||||
'description': 'ゲームをしたくて!',
|
||||
'timestamp': 1597662269.0,
|
||||
'uploader_id': '10882672',
|
||||
'uploader': 'kson組長(けいそん)',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.mildom.com/playback/10882672/10882672-1597758589870-477',
|
||||
'info_dict': {
|
||||
'id': '10882672-1597758589870-477',
|
||||
'ext': 'mp4',
|
||||
'title': '【kson】感染メイズ!麻酔銃で無双する',
|
||||
'thumbnail': r're:^https?://.*\.(png|jpg)$',
|
||||
'timestamp': 1597759093.0,
|
||||
'uploader': 'kson組長(けいそん)',
|
||||
'duration': 4302.58,
|
||||
'uploader_id': '10882672',
|
||||
'description': 'このステージ絶対乗り越えたい',
|
||||
'upload_date': '20200818',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.mildom.com/playback/10882672/10882672-buha9td2lrn97fk2jme0',
|
||||
'info_dict': {
|
||||
'id': '10882672-buha9td2lrn97fk2jme0',
|
||||
'ext': 'mp4',
|
||||
'title': '【kson組長】CART RACER!!!',
|
||||
'thumbnail': r're:^https?://.*\.(png|jpg)$',
|
||||
'uploader_id': '10882672',
|
||||
'uploader': 'kson組長(けいそん)',
|
||||
'upload_date': '20201104',
|
||||
'timestamp': 1604494797.0,
|
||||
'duration': 4657.25,
|
||||
'description': 'WTF',
|
||||
},
|
||||
}]
|
||||
_VALID_URL = r'https?://(?:(?:www|m)\.)mildom\.com/playback/(?P<user_id>\d+)/(?P<id>(?P=user_id)-[a-zA-Z0-9]+)'
|
||||
|
||||
def _real_extract(self, url):
|
||||
m = self._match_valid_url(url)
|
||||
@@ -268,9 +213,6 @@ class MildomVodIE(MildomBaseIE):
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'timestamp': float_or_none(autoplay['publish_time'], scale=1000),
|
||||
'duration': float_or_none(autoplay['video_length'], scale=1000),
|
||||
'thumbnail': dict_get(autoplay, ('upload_pic', 'video_pic')),
|
||||
'uploader': uploader,
|
||||
'uploader_id': user_id,
|
||||
'formats': formats,
|
||||
@@ -288,13 +230,6 @@ class MildomUserVodIE(MildomBaseIE):
|
||||
'title': 'Uploads from ねこばたけ',
|
||||
},
|
||||
'playlist_mincount': 351,
|
||||
}, {
|
||||
'url': 'https://www.mildom.com/profile/10882672',
|
||||
'info_dict': {
|
||||
'id': '10882672',
|
||||
'title': 'Uploads from kson組長(けいそん)',
|
||||
},
|
||||
'playlist_mincount': 191,
|
||||
}]
|
||||
|
||||
def _entries(self, user_id):
|
||||
|
||||
@@ -1,175 +0,0 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
date_from_str,
|
||||
format_field,
|
||||
try_get,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class MusicdexBaseIE(InfoExtractor):
|
||||
def _return_info(self, track_json, album_json, id):
|
||||
return {
|
||||
'id': str(id),
|
||||
'title': track_json.get('name'),
|
||||
'track': track_json.get('name'),
|
||||
'description': track_json.get('description'),
|
||||
'track_number': track_json.get('number'),
|
||||
'url': format_field(track_json, 'url', 'https://www.musicdex.org/%s'),
|
||||
'duration': track_json.get('duration'),
|
||||
'genre': [genre.get('name') for genre in track_json.get('genres') or []],
|
||||
'like_count': track_json.get('likes_count'),
|
||||
'view_count': track_json.get('plays'),
|
||||
'artist': [artist.get('name') for artist in track_json.get('artists') or []],
|
||||
'album_artist': [artist.get('name') for artist in album_json.get('artists') or []],
|
||||
'thumbnail': format_field(album_json, 'image', 'https://www.musicdex.org/%s'),
|
||||
'album': album_json.get('name'),
|
||||
'release_year': try_get(album_json, lambda x: date_from_str(unified_strdate(x['release_date'])).year),
|
||||
'extractor_key': MusicdexSongIE.ie_key(),
|
||||
'extractor': 'MusicdexSong',
|
||||
}
|
||||
|
||||
|
||||
class MusicdexSongIE(MusicdexBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?musicdex\.org/track/(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.musicdex.org/track/306/dual-existence',
|
||||
'info_dict': {
|
||||
'id': '306',
|
||||
'ext': 'mp3',
|
||||
'title': 'dual existence',
|
||||
'description': '#NIPPONSEI @ IRC.RIZON.NET',
|
||||
'track': 'dual existence',
|
||||
'track_number': 1,
|
||||
'duration': 266000,
|
||||
'genre': ['Anime'],
|
||||
'like_count': int,
|
||||
'view_count': int,
|
||||
'artist': ['fripSide'],
|
||||
'album_artist': ['fripSide'],
|
||||
'thumbnail': 'https://www.musicdex.org/storage/album/9iDIam1DHTVqUG4UclFIEq1WAFGXfPW4y0TtZa91.png',
|
||||
'album': 'To Aru Kagaku no Railgun T OP2 Single - dual existence',
|
||||
'release_year': 2020
|
||||
},
|
||||
'params': {'skip_download': True}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
id = self._match_id(url)
|
||||
data_json = self._download_json(f'https://www.musicdex.org/secure/tracks/{id}?defaultRelations=true', id)['track']
|
||||
return self._return_info(data_json, data_json.get('album') or {}, id)
|
||||
|
||||
|
||||
class MusicdexAlbumIE(MusicdexBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?musicdex\.org/album/(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.musicdex.org/album/56/tenmon-and-eiichiro-yanagi-minori/ef-a-tale-of-memories-original-soundtrack-2-fortissimo',
|
||||
'playlist_mincount': 28,
|
||||
'info_dict': {
|
||||
'id': '56',
|
||||
'genre': ['OST'],
|
||||
'view_count': int,
|
||||
'artist': ['TENMON & Eiichiro Yanagi / minori'],
|
||||
'title': 'ef - a tale of memories Original Soundtrack 2 ~fortissimo~',
|
||||
'release_year': 2008,
|
||||
'thumbnail': 'https://www.musicdex.org/storage/album/2rSHkyYBYfB7sbvElpEyTMcUn6toY7AohOgJuDlE.jpg',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
id = self._match_id(url)
|
||||
data_json = self._download_json(f'https://www.musicdex.org/secure/albums/{id}?defaultRelations=true', id)['album']
|
||||
entries = [self._return_info(track, data_json, track['id']) for track in data_json.get('tracks') or [] if track.get('id')]
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': id,
|
||||
'title': data_json.get('name'),
|
||||
'description': data_json.get('description'),
|
||||
'genre': [genre.get('name') for genre in data_json.get('genres') or []],
|
||||
'view_count': data_json.get('plays'),
|
||||
'artist': [artist.get('name') for artist in data_json.get('artists') or []],
|
||||
'thumbnail': format_field(data_json, 'image', 'https://www.musicdex.org/%s'),
|
||||
'release_year': try_get(data_json, lambda x: date_from_str(unified_strdate(x['release_date'])).year),
|
||||
'entries': entries,
|
||||
}
|
||||
|
||||
|
||||
class MusicdexPageIE(MusicdexBaseIE):
|
||||
def _entries(self, id):
|
||||
next_page_url = self._API_URL % id
|
||||
while next_page_url:
|
||||
data_json = self._download_json(next_page_url, id)['pagination']
|
||||
for data in data_json.get('data') or []:
|
||||
yield data
|
||||
next_page_url = data_json.get('next_page_url')
|
||||
|
||||
|
||||
class MusicdexArtistIE(MusicdexPageIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?musicdex\.org/artist/(?P<id>\d+)'
|
||||
_API_URL = 'https://www.musicdex.org/secure/artists/%s/albums?page=1'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.musicdex.org/artist/11/fripside',
|
||||
'playlist_mincount': 28,
|
||||
'info_dict': {
|
||||
'id': '11',
|
||||
'view_count': int,
|
||||
'title': 'fripSide',
|
||||
'thumbnail': 'https://www.musicdex.org/storage/artist/ZmOz0lN2vsweegB660em3xWffCjLPmTQHqJls5Xx.jpg',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
id = self._match_id(url)
|
||||
data_json = self._download_json(f'https://www.musicdex.org/secure/artists/{id}', id)['artist']
|
||||
entries = []
|
||||
for album in self._entries(id):
|
||||
entries.extend(self._return_info(track, album, track['id']) for track in album.get('tracks') or [] if track.get('id'))
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': id,
|
||||
'title': data_json.get('name'),
|
||||
'view_count': data_json.get('plays'),
|
||||
'thumbnail': format_field(data_json, 'image_small', 'https://www.musicdex.org/%s'),
|
||||
'entries': entries,
|
||||
}
|
||||
|
||||
|
||||
class MusicdexPlaylistIE(MusicdexPageIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?musicdex\.org/playlist/(?P<id>\d+)'
|
||||
_API_URL = 'https://www.musicdex.org/secure/playlists/%s/tracks?perPage=10000&page=1'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.musicdex.org/playlist/9/test',
|
||||
'playlist_mincount': 73,
|
||||
'info_dict': {
|
||||
'id': '9',
|
||||
'view_count': int,
|
||||
'title': 'Test',
|
||||
'thumbnail': 'https://www.musicdex.org/storage/album/jXATI79f0IbQ2sgsKYOYRCW3zRwF3XsfHhzITCuJ.jpg',
|
||||
'description': 'Test 123 123 21312 32121321321321312',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
id = self._match_id(url)
|
||||
data_json = self._download_json(f'https://www.musicdex.org/secure/playlists/{id}', id)['playlist']
|
||||
entries = [self._return_info(track, track.get('album') or {}, track['id'])
|
||||
for track in self._entries(id) or [] if track.get('id')]
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': id,
|
||||
'title': data_json.get('name'),
|
||||
'description': data_json.get('description'),
|
||||
'view_count': data_json.get('plays'),
|
||||
'thumbnail': format_field(data_json, 'image', 'https://www.musicdex.org/%s'),
|
||||
'entries': entries,
|
||||
}
|
||||
@@ -1,6 +1,8 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
@@ -11,74 +13,33 @@ from ..utils import (
|
||||
|
||||
|
||||
class MySpassIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?myspass\.de/(?:[^/]+/)*(?P<id>\d+)/?[^/]*$'
|
||||
_TESTS = [{
|
||||
_VALID_URL = r'https?://(?:www\.)?myspass\.de/([^/]+/)*(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.myspass.de/myspass/shows/tvshows/absolute-mehrheit/Absolute-Mehrheit-vom-17022013-Die-Highlights-Teil-2--/11741/',
|
||||
'md5': '0b49f4844a068f8b33f4b7c88405862b',
|
||||
'info_dict': {
|
||||
'id': '11741',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:9f0db5044c8fe73f528a390498f7ce9b',
|
||||
'description': 'Wer kann in die Fußstapfen von Wolfgang Kubicki treten und die Mehrheit der Zuschauer hinter sich versammeln? Wird vielleicht sogar die Absolute Mehrheit geknackt und der Jackpot von 200.000 Euro mit nach Hause genommen?',
|
||||
'title': '17.02.2013 - Die Highlights, Teil 2',
|
||||
'thumbnail': r're:.*\.jpg',
|
||||
'duration': 323.0,
|
||||
'episode': '17.02.2013 - Die Highlights, Teil 2',
|
||||
'season_id': '544',
|
||||
'episode_number': 1,
|
||||
'series': 'Absolute Mehrheit',
|
||||
'season_number': 2,
|
||||
'season': 'Season 2',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'https://www.myspass.de/shows/tvshows/tv-total/Novak-Puffovic-bei-bester-Laune--/44996/',
|
||||
'md5': 'eb28b7c5e254192046e86ebaf7deac8f',
|
||||
'info_dict': {
|
||||
'id': '44996',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:74c7f886e00834417f1e427ab0da6121',
|
||||
'title': 'Novak Puffovic bei bester Laune',
|
||||
'thumbnail': r're:.*\.jpg',
|
||||
'episode_number': 8,
|
||||
'episode': 'Novak Puffovic bei bester Laune',
|
||||
'series': 'TV total',
|
||||
'season': 'Season 19',
|
||||
'season_id': '987',
|
||||
'duration': 2941.0,
|
||||
'season_number': 19,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'https://www.myspass.de/channels/tv-total-raabigramm/17033/20831/',
|
||||
'md5': '7b293a6b9f3a7acdd29304c8d0dbb7cc',
|
||||
'info_dict': {
|
||||
'id': '20831',
|
||||
'ext': 'mp4',
|
||||
'description': 'Gefühle pur: Schaut euch die ungeschnittene Version von Stefans Liebesbeweis an die Moderationsgrazie von Welt, Verona Feldbusch, an.',
|
||||
'title': 'Raabigramm Verona Feldbusch',
|
||||
'thumbnail': r're:.*\.jpg',
|
||||
'episode_number': 6,
|
||||
'episode': 'Raabigramm Verona Feldbusch',
|
||||
'series': 'TV total',
|
||||
'season': 'Season 1',
|
||||
'season_id': '34',
|
||||
'duration': 105.0,
|
||||
'season_number': 1,
|
||||
},
|
||||
}]
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
metadata = self._download_xml('http://www.myspass.de/myspass/includes/apps/video/getvideometadataxml.php?id=' + video_id, video_id)
|
||||
metadata = self._download_xml(
|
||||
'http://www.myspass.de/myspass/includes/apps/video/getvideometadataxml.php?id=' + video_id,
|
||||
video_id)
|
||||
|
||||
title = xpath_text(metadata, 'title', fatal=True)
|
||||
video_url = xpath_text(metadata, 'url_flv', 'download url', True)
|
||||
video_id_int = int(video_id)
|
||||
for group in self._search_regex(r'/myspass2009/\d+/(\d+)/(\d+)/(\d+)/', video_url, 'myspass', group=(1, 2, 3), default=[]):
|
||||
for group in re.search(r'/myspass2009/\d+/(\d+)/(\d+)/(\d+)/', video_url).groups():
|
||||
group_int = int(group)
|
||||
if group_int > video_id_int:
|
||||
video_url = video_url.replace(group, compat_str(group_int // video_id_int))
|
||||
video_url = video_url.replace(
|
||||
group, compat_str(group_int // video_id_int))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
||||
@@ -165,10 +165,14 @@ class NBAWatchIE(NBAWatchBaseIE):
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
collection_id = parse_qs(url).get('collection', [None])[0]
|
||||
if self._yes_playlist(collection_id, display_id):
|
||||
return self.url_result(
|
||||
'https://www.nba.com/watch/list/collection/' + collection_id,
|
||||
NBAWatchCollectionIE.ie_key(), collection_id)
|
||||
if collection_id:
|
||||
if self.get_param('noplaylist'):
|
||||
self.to_screen('Downloading just video %s because of --no-playlist' % display_id)
|
||||
else:
|
||||
self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % collection_id)
|
||||
return self.url_result(
|
||||
'https://www.nba.com/watch/list/collection/' + collection_id,
|
||||
NBAWatchCollectionIE.ie_key(), collection_id)
|
||||
return self._extract_video('seoName', display_id)
|
||||
|
||||
|
||||
|
||||
@@ -405,12 +405,17 @@ class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
|
||||
name = info['name']
|
||||
description = info['description']
|
||||
|
||||
if not self._yes_playlist(info['songs'] and program_id, info['mainSong']['id']):
|
||||
if not info['songs'] or self.get_param('noplaylist'):
|
||||
if info['songs']:
|
||||
self.to_screen(
|
||||
'Downloading just the main audio %s because of --no-playlist'
|
||||
% info['mainSong']['id'])
|
||||
|
||||
formats = self.extract_formats(info['mainSong'])
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': info['mainSong']['id'],
|
||||
'id': program_id,
|
||||
'title': name,
|
||||
'description': description,
|
||||
'creator': info['dj']['brand'],
|
||||
@@ -420,6 +425,10 @@ class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
self.to_screen(
|
||||
'Downloading playlist %s - add --no-playlist to just download the main audio %s'
|
||||
% (program_id, info['mainSong']['id']))
|
||||
|
||||
song_ids = [info['mainSong']['id']]
|
||||
song_ids.extend([song['id'] for song in info['songs']])
|
||||
entries = [
|
||||
|
||||
@@ -5,9 +5,11 @@ import base64
|
||||
import hashlib
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7
|
||||
from ..aes import aes_cbc_decrypt
|
||||
from ..utils import (
|
||||
bytes_to_intlist,
|
||||
int_or_none,
|
||||
intlist_to_bytes,
|
||||
parse_codecs,
|
||||
parse_duration,
|
||||
)
|
||||
@@ -45,8 +47,10 @@ class NewstubeIE(InfoExtractor):
|
||||
}))
|
||||
key = hashlib.pbkdf2_hmac(
|
||||
'sha1', video_guid.replace('-', '').encode(), enc_data[:16], 1)[:16]
|
||||
dec_data = unpad_pkcs7(aes_cbc_decrypt_bytes(enc_data[32:], key, enc_data[16:32]))
|
||||
sources = self._parse_json(dec_data, video_guid)
|
||||
dec_data = aes_cbc_decrypt(
|
||||
bytes_to_intlist(enc_data[32:]), bytes_to_intlist(key),
|
||||
bytes_to_intlist(enc_data[16:32]))
|
||||
sources = self._parse_json(intlist_to_bytes(dec_data[:-dec_data[-1]]), video_guid)
|
||||
|
||||
formats = []
|
||||
for source in sources:
|
||||
|
||||
@@ -35,38 +35,6 @@ class OdnoklassnikiIE(InfoExtractor):
|
||||
(?P<id>[\d-]+)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'note': 'Coub embedded',
|
||||
'url': 'http://ok.ru/video/1484130554189',
|
||||
'info_dict': {
|
||||
'id': '1keok9',
|
||||
'ext': 'mp4',
|
||||
'timestamp': 1545580896,
|
||||
'view_count': int,
|
||||
'thumbnail': 'https://coub-anubis-a.akamaized.net/coub_storage/coub/simple/cw_image/c5ac87553bd/608e806a1239c210ab692/1545580913_00026.jpg',
|
||||
'title': 'Народная забава',
|
||||
'uploader': 'Nevata',
|
||||
'upload_date': '20181223',
|
||||
'age_limit': 0,
|
||||
'uploader_id': 'nevata.s',
|
||||
'like_count': int,
|
||||
'duration': 8.08,
|
||||
'repost_count': int,
|
||||
},
|
||||
}, {
|
||||
'note': 'vk.com embedded',
|
||||
'url': 'https://ok.ru/video/3568183087575',
|
||||
'info_dict': {
|
||||
'id': '-165101755_456243749',
|
||||
'ext': 'mp4',
|
||||
'uploader_id': '-165101755',
|
||||
'duration': 132,
|
||||
'timestamp': 1642869935,
|
||||
'upload_date': '20220122',
|
||||
'thumbnail': str,
|
||||
'title': str,
|
||||
'uploader': str,
|
||||
},
|
||||
}, {
|
||||
# metadata in JSON
|
||||
'url': 'http://ok.ru/video/20079905452',
|
||||
'md5': '0b62089b479e06681abaaca9d204f152',
|
||||
@@ -203,10 +171,6 @@ class OdnoklassnikiIE(InfoExtractor):
|
||||
webpage, 'player', group='player')),
|
||||
video_id)
|
||||
|
||||
# embedded external player
|
||||
if player.get('isExternalPlayer') and player.get('url'):
|
||||
return self.url_result(player['url'])
|
||||
|
||||
flashvars = player['flashvars']
|
||||
|
||||
metadata = flashvars.get('metadata')
|
||||
@@ -262,14 +226,6 @@ class OdnoklassnikiIE(InfoExtractor):
|
||||
'start_time': start_time,
|
||||
}
|
||||
|
||||
# pladform
|
||||
if provider == 'OPEN_GRAPH':
|
||||
info.update({
|
||||
'_type': 'url_transparent',
|
||||
'url': movie['contentId'],
|
||||
})
|
||||
return info
|
||||
|
||||
if provider == 'USER_YOUTUBE':
|
||||
info.update({
|
||||
'_type': 'url_transparent',
|
||||
|
||||
@@ -182,9 +182,14 @@ class OnetChannelIE(OnetBaseIE):
|
||||
video_id = remove_start(current_clip_info['ckmId'], 'mvp:')
|
||||
video_name = url_basename(current_clip_info['url'])
|
||||
|
||||
if not self._yes_playlist(channel_id, video_name, playlist_label='channel'):
|
||||
if self.get_param('noplaylist'):
|
||||
self.to_screen(
|
||||
'Downloading just video %s because of --no-playlist' % video_name)
|
||||
return self._extract_from_id(video_id, webpage)
|
||||
|
||||
self.to_screen(
|
||||
'Downloading channel %s - add --no-playlist to just download video %s' % (
|
||||
channel_id, video_name))
|
||||
matches = re.findall(
|
||||
r'<a[^>]+href=[\'"](%s[a-z]+/[0-9a-z-]+/[0-9a-z]+)' % self._URL_BASE_RE,
|
||||
webpage)
|
||||
|
||||
@@ -1,26 +1,23 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import functools
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
HEADRequest,
|
||||
InAdvancePagedList,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
orderedSet,
|
||||
remove_end,
|
||||
smuggle_url,
|
||||
str_or_none,
|
||||
strip_jsonp,
|
||||
unescapeHTML,
|
||||
unified_strdate,
|
||||
unsmuggle_url,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
@@ -28,40 +25,9 @@ from ..utils import (
|
||||
class ORFTVthekIE(InfoExtractor):
|
||||
IE_NAME = 'orf:tvthek'
|
||||
IE_DESC = 'ORF TVthek'
|
||||
_VALID_URL = r'(?P<url>https?://tvthek\.orf\.at/(?:(?:[^/]+/){2}){1,2}(?P<id>\d+))(/[^/]+/(?P<vid>\d+))?(?:$|[?#])'
|
||||
_VALID_URL = r'https?://tvthek\.orf\.at/(?:[^/]+/)+(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://tvthek.orf.at/profile/ZIB-2/1211/ZIB-2/14121079',
|
||||
'info_dict': {
|
||||
'id': '14121079',
|
||||
},
|
||||
'playlist_count': 11,
|
||||
'params': {'noplaylist': True}
|
||||
}, {
|
||||
'url': 'https://tvthek.orf.at/profile/ZIB-2/1211/ZIB-2/14121079/Umfrage-Welches-Tier-ist-Sebastian-Kurz/15083150',
|
||||
'info_dict': {
|
||||
'id': '14121079',
|
||||
},
|
||||
'playlist_count': 1,
|
||||
'params': {'playlist_items': '5'}
|
||||
}, {
|
||||
'url': 'https://tvthek.orf.at/profile/ZIB-2/1211/ZIB-2/14121079/Umfrage-Welches-Tier-ist-Sebastian-Kurz/15083150',
|
||||
'info_dict': {
|
||||
'id': '14121079',
|
||||
'playlist_count': 1
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': '15083150',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:7be1c485425f5f255a5e4e4815e77d04',
|
||||
'thumbnail': 'https://api-tvthek.orf.at/uploads/media/segments/0130/59/824271ea35cd8931a0fb08ab316a5b0a1562342c.jpeg',
|
||||
'title': 'Umfrage: Welches Tier ist Sebastian Kurz?',
|
||||
}
|
||||
}],
|
||||
'playlist_count': 1,
|
||||
'params': {'noplaylist': True, 'skip_download': 'm3u8'}
|
||||
}, {
|
||||
'url': 'http://tvthek.orf.at/program/Aufgetischt/2745173/Aufgetischt-Mit-der-Steirischen-Tafelrunde/8891389',
|
||||
'playlist': [{
|
||||
'md5': '2942210346ed779588f428a92db88712',
|
||||
@@ -96,90 +62,8 @@ class ORFTVthekIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _pagefunc(self, url, data_jsb, n, *, image=None):
|
||||
sd = data_jsb[n]
|
||||
video_id, title = str(sd['id']), sd['title']
|
||||
formats = []
|
||||
for fd in sd['sources']:
|
||||
src = url_or_none(fd.get('src'))
|
||||
if not src:
|
||||
continue
|
||||
format_id = join_nonempty('delivery', 'quality', 'quality_string', from_dict=fd)
|
||||
ext = determine_ext(src)
|
||||
if ext == 'm3u8':
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
src, video_id, 'mp4', m3u8_id=format_id, fatal=False, note=f'Downloading {format_id} m3u8 manifest')
|
||||
if any('/geoprotection' in f['url'] for f in m3u8_formats):
|
||||
self.raise_geo_restricted()
|
||||
formats.extend(m3u8_formats)
|
||||
elif ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
src, video_id, f4m_id=format_id, fatal=False))
|
||||
elif ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
src, video_id, mpd_id=format_id, fatal=False, note=f'Downloading {format_id} mpd manifest'))
|
||||
else:
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': src,
|
||||
'protocol': fd.get('protocol'),
|
||||
})
|
||||
|
||||
# Check for geoblocking.
|
||||
# There is a property is_geoprotection, but that's always false
|
||||
geo_str = sd.get('geoprotection_string')
|
||||
http_url = next(
|
||||
(f['url'] for f in formats if re.match(r'^https?://.*\.mp4$', f['url'])),
|
||||
None) if geo_str else None
|
||||
if http_url:
|
||||
self._request_webpage(
|
||||
HEADRequest(http_url), video_id, fatal=False, note='Testing for geoblocking',
|
||||
errnote=f'This video seems to be blocked outside of {geo_str}. You may want to try the streaming-* formats')
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
for sub in sd.get('subtitles', []):
|
||||
sub_src = sub.get('src')
|
||||
if not sub_src:
|
||||
continue
|
||||
subtitles.setdefault(sub.get('lang', 'de-AT'), []).append({
|
||||
'url': sub_src,
|
||||
})
|
||||
|
||||
upload_date = unified_strdate(sd.get('created_date'))
|
||||
|
||||
thumbnails = []
|
||||
preview = sd.get('preview_image_url')
|
||||
if preview:
|
||||
thumbnails.append({
|
||||
'id': 'preview',
|
||||
'url': preview,
|
||||
'preference': 0,
|
||||
})
|
||||
image = sd.get('image_full_url') or image
|
||||
if image:
|
||||
thumbnails.append({
|
||||
'id': 'full',
|
||||
'url': image,
|
||||
'preference': 1,
|
||||
})
|
||||
|
||||
yield {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'webpage_url': smuggle_url(f'{url}/part/{video_id}', {'force_noplaylist': True}),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'description': sd.get('description'),
|
||||
'duration': int_or_none(sd.get('duration_in_seconds')),
|
||||
'upload_date': upload_date,
|
||||
'thumbnails': thumbnails,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url)
|
||||
playlist_id, video_id, base_url = self._match_valid_url(url).group('id', 'vid', 'url')
|
||||
playlist_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
data_jsb = self._parse_json(
|
||||
@@ -188,16 +72,107 @@ class ORFTVthekIE(InfoExtractor):
|
||||
webpage, 'playlist', group='json'),
|
||||
playlist_id, transform_source=unescapeHTML)['playlist']['videos']
|
||||
|
||||
if not self._yes_playlist(playlist_id, video_id, smuggled_data):
|
||||
data_jsb = [sd for sd in data_jsb if str(sd.get('id')) == video_id]
|
||||
entries = []
|
||||
for sd in data_jsb:
|
||||
video_id, title = sd.get('id'), sd.get('title')
|
||||
if not video_id or not title:
|
||||
continue
|
||||
video_id = compat_str(video_id)
|
||||
formats = []
|
||||
for fd in sd['sources']:
|
||||
src = url_or_none(fd.get('src'))
|
||||
if not src:
|
||||
continue
|
||||
format_id = join_nonempty('delivery', 'quality', 'quality_string', from_dict=fd)
|
||||
ext = determine_ext(src)
|
||||
if ext == 'm3u8':
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
src, video_id, 'mp4', m3u8_id=format_id, fatal=False)
|
||||
if any('/geoprotection' in f['url'] for f in m3u8_formats):
|
||||
self.raise_geo_restricted()
|
||||
formats.extend(m3u8_formats)
|
||||
elif ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
src, video_id, f4m_id=format_id, fatal=False))
|
||||
elif ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
src, video_id, mpd_id=format_id, fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': src,
|
||||
'protocol': fd.get('protocol'),
|
||||
})
|
||||
|
||||
playlist_count = len(data_jsb)
|
||||
image = self._og_search_thumbnail(webpage) if playlist_count == 1 else None
|
||||
# Check for geoblocking.
|
||||
# There is a property is_geoprotection, but that's always false
|
||||
geo_str = sd.get('geoprotection_string')
|
||||
if geo_str:
|
||||
try:
|
||||
http_url = next(
|
||||
f['url']
|
||||
for f in formats
|
||||
if re.match(r'^https?://.*\.mp4$', f['url']))
|
||||
except StopIteration:
|
||||
pass
|
||||
else:
|
||||
req = HEADRequest(http_url)
|
||||
self._request_webpage(
|
||||
req, video_id,
|
||||
note='Testing for geoblocking',
|
||||
errnote=((
|
||||
'This video seems to be blocked outside of %s. '
|
||||
'You may want to try the streaming-* formats.')
|
||||
% geo_str),
|
||||
fatal=False)
|
||||
|
||||
self._check_formats(formats, video_id)
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
for sub in sd.get('subtitles', []):
|
||||
sub_src = sub.get('src')
|
||||
if not sub_src:
|
||||
continue
|
||||
subtitles.setdefault(sub.get('lang', 'de-AT'), []).append({
|
||||
'url': sub_src,
|
||||
})
|
||||
|
||||
upload_date = unified_strdate(sd.get('created_date'))
|
||||
|
||||
thumbnails = []
|
||||
preview = sd.get('preview_image_url')
|
||||
if preview:
|
||||
thumbnails.append({
|
||||
'id': 'preview',
|
||||
'url': preview,
|
||||
'preference': 0,
|
||||
})
|
||||
image = sd.get('image_full_url')
|
||||
if not image and len(data_jsb) == 1:
|
||||
image = self._og_search_thumbnail(webpage)
|
||||
if image:
|
||||
thumbnails.append({
|
||||
'id': 'full',
|
||||
'url': image,
|
||||
'preference': 1,
|
||||
})
|
||||
|
||||
entries.append({
|
||||
'_type': 'video',
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'description': sd.get('description'),
|
||||
'duration': int_or_none(sd.get('duration_in_seconds')),
|
||||
'upload_date': upload_date,
|
||||
'thumbnails': thumbnails,
|
||||
})
|
||||
|
||||
page_func = functools.partial(self._pagefunc, base_url, data_jsb, image=image)
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'entries': InAdvancePagedList(page_func, playlist_count, 1),
|
||||
'entries': entries,
|
||||
'id': playlist_id,
|
||||
}
|
||||
|
||||
|
||||
@@ -28,24 +28,6 @@ class PladformIE(InfoExtractor):
|
||||
(?P<id>\d+)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'http://out.pladform.ru/player?pl=18079&type=html5&videoid=100231282',
|
||||
'info_dict': {
|
||||
'id': '6216d548e755edae6e8280667d774791',
|
||||
'ext': 'mp4',
|
||||
'timestamp': 1406117012,
|
||||
'title': 'Гарик Мартиросян и Гарик Харламов - Кастинг на концерт ко Дню милиции',
|
||||
'age_limit': 0,
|
||||
'upload_date': '20140723',
|
||||
'thumbnail': str,
|
||||
'view_count': int,
|
||||
'description': str,
|
||||
'category': list,
|
||||
'uploader_id': '12082',
|
||||
'uploader': 'Comedy Club',
|
||||
'duration': 367,
|
||||
},
|
||||
'expected_warnings': ['HTTP Error 404: Not Found']
|
||||
}, {
|
||||
'url': 'https://out.pladform.ru/player?pl=64471&videoid=3777899&vk_puid15=0&vk_puid34=0',
|
||||
'md5': '53362fac3a27352da20fa2803cc5cd6f',
|
||||
'info_dict': {
|
||||
@@ -81,19 +63,13 @@ class PladformIE(InfoExtractor):
|
||||
'http://out.pladform.ru/getVideo', video_id, query={
|
||||
'pl': pl,
|
||||
'videoid': video_id,
|
||||
}, fatal=False)
|
||||
})
|
||||
|
||||
def fail(text):
|
||||
raise ExtractorError(
|
||||
'%s returned error: %s' % (self.IE_NAME, text),
|
||||
expected=True)
|
||||
|
||||
if not video:
|
||||
targetUrl = self._request_webpage(url, video_id, note='Resolving final URL').geturl()
|
||||
if targetUrl == url:
|
||||
raise ExtractorError('Can\'t parse page')
|
||||
return self.url_result(targetUrl)
|
||||
|
||||
if video.tag == 'error':
|
||||
fail(video.text)
|
||||
|
||||
|
||||
@@ -4,13 +4,16 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7
|
||||
from ..aes import aes_cbc_decrypt
|
||||
from ..compat import (
|
||||
compat_b64decode,
|
||||
compat_ord,
|
||||
compat_str,
|
||||
)
|
||||
from ..utils import (
|
||||
bytes_to_intlist,
|
||||
ExtractorError,
|
||||
intlist_to_bytes,
|
||||
int_or_none,
|
||||
strip_or_none,
|
||||
)
|
||||
@@ -139,12 +142,17 @@ class RTL2YouIE(RTL2YouBaseIE):
|
||||
self._BACKWERK_BASE_URL + 'stream/video/' + video_id, video_id)
|
||||
|
||||
data, iv = compat_b64decode(stream_data['streamUrl']).decode().split(':')
|
||||
stream_url = unpad_pkcs7(aes_cbc_decrypt_bytes(
|
||||
compat_b64decode(data), self._AES_KEY, compat_b64decode(iv)))
|
||||
stream_url = intlist_to_bytes(aes_cbc_decrypt(
|
||||
bytes_to_intlist(compat_b64decode(data)),
|
||||
bytes_to_intlist(self._AES_KEY),
|
||||
bytes_to_intlist(compat_b64decode(iv))
|
||||
))
|
||||
if b'rtl2_you_video_not_found' in stream_url:
|
||||
raise ExtractorError('video not found', expected=True)
|
||||
|
||||
formats = self._extract_m3u8_formats(stream_url.decode(), video_id, 'mp4', 'm3u8_native')
|
||||
formats = self._extract_m3u8_formats(
|
||||
stream_url[:-compat_ord(stream_url[-1])].decode(),
|
||||
video_id, 'mp4', 'm3u8_native')
|
||||
self._sort_formats(formats)
|
||||
|
||||
video_data = self._download_json(
|
||||
|
||||
@@ -2,9 +2,10 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..aes import aes_cbc_decrypt, unpad_pkcs7
|
||||
from ..aes import aes_cbc_decrypt
|
||||
from ..compat import (
|
||||
compat_b64decode,
|
||||
compat_ord,
|
||||
)
|
||||
from ..utils import (
|
||||
bytes_to_intlist,
|
||||
@@ -75,7 +76,8 @@ class ShemarooMeIE(InfoExtractor):
|
||||
url_data = bytes_to_intlist(compat_b64decode(data_json['new_play_url']))
|
||||
key = bytes_to_intlist(compat_b64decode(data_json['key']))
|
||||
iv = [0] * 16
|
||||
m3u8_url = unpad_pkcs7(intlist_to_bytes(aes_cbc_decrypt(url_data, key, iv))).decode('ascii')
|
||||
m3u8_url = intlist_to_bytes(aes_cbc_decrypt(url_data, key, iv))
|
||||
m3u8_url = m3u8_url[:-compat_ord((m3u8_url[-1]))].decode('ascii')
|
||||
formats, m3u8_subs = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, fatal=False, headers={'stream_key': data_json['stream_key']})
|
||||
self._sort_formats(formats)
|
||||
|
||||
|
||||
@@ -59,8 +59,12 @@ class SportDeutschlandIE(InfoExtractor):
|
||||
videos = asset.get('videos') or []
|
||||
if len(videos) > 1:
|
||||
playlist_id = parse_qs(url).get('playlistId', [None])[0]
|
||||
if not self._yes_playlist(playlist_id, asset_id):
|
||||
videos = [videos[int(playlist_id)]]
|
||||
if playlist_id:
|
||||
if self.get_param('noplaylist'):
|
||||
videos = [videos[int(playlist_id)]]
|
||||
self.to_screen('Downloading just a single video because of --no-playlist')
|
||||
else:
|
||||
self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % asset_id)
|
||||
|
||||
def entries():
|
||||
for i, video in enumerate(videos, 1):
|
||||
|
||||
@@ -52,7 +52,7 @@ class ThisOldHouseIE(InfoExtractor):
|
||||
video_url = self._search_regex(
|
||||
r'<iframe[^>]+src=[\'"]((?:https?:)?//(?:www\.)?thisoldhouse\.(?:chorus\.build|com)/videos/zype/([0-9a-f]{24})[^\'"]*)[\'"]',
|
||||
webpage, 'video url')
|
||||
if 'subscription_required=true' in video_url or 'c-entry-group-labels__image' in webpage:
|
||||
if 'subscription_required=true' in video_url:
|
||||
return self.url_result(self._request_webpage(HEADRequest(video_url), display_id).geturl(), 'Zype', display_id)
|
||||
video_id = self._search_regex(r'(?:https?:)?//(?:www\.)?thisoldhouse\.(?:chorus\.build|com)/videos/zype/([0-9a-f]{24})', video_url, 'video id')
|
||||
return self.url_result(self._ZYPE_TMPL % video_id, 'Zype', video_id)
|
||||
|
||||
@@ -27,8 +27,8 @@ from ..utils import (
|
||||
|
||||
|
||||
class TikTokBaseIE(InfoExtractor):
|
||||
_APP_VERSIONS = [('20.9.3', '293'), ('20.4.3', '243'), ('20.2.1', '221'), ('20.1.2', '212'), ('20.0.4', '204')]
|
||||
_WORKING_APP_VERSION = None
|
||||
_APP_VERSION = '20.1.0'
|
||||
_MANIFEST_APP_VERSION = '210'
|
||||
_APP_NAME = 'trill'
|
||||
_AID = 1180
|
||||
_API_HOSTNAME = 'api-h2.tiktokv.com'
|
||||
@@ -36,27 +36,15 @@ class TikTokBaseIE(InfoExtractor):
|
||||
_WEBPAGE_HOST = 'https://www.tiktok.com/'
|
||||
QUALITIES = ('360p', '540p', '720p', '1080p')
|
||||
|
||||
def _call_api_impl(self, ep, query, manifest_app_version, video_id, fatal=True,
|
||||
note='Downloading API JSON', errnote='Unable to download API page'):
|
||||
self._set_cookie(self._API_HOSTNAME, 'odin_tt', ''.join(random.choice('0123456789abcdef') for _ in range(160)))
|
||||
webpage_cookies = self._get_cookies(self._WEBPAGE_HOST)
|
||||
if webpage_cookies.get('sid_tt'):
|
||||
self._set_cookie(self._API_HOSTNAME, 'sid_tt', webpage_cookies['sid_tt'].value)
|
||||
return self._download_json(
|
||||
'https://%s/aweme/v1/%s/' % (self._API_HOSTNAME, ep), video_id=video_id,
|
||||
fatal=fatal, note=note, errnote=errnote, headers={
|
||||
'User-Agent': f'com.ss.android.ugc.trill/{manifest_app_version} (Linux; U; Android 10; en_US; Pixel 4; Build/QQ3A.200805.001; Cronet/58.0.2991.0)',
|
||||
'Accept': 'application/json',
|
||||
}, query=query)
|
||||
|
||||
def _build_api_query(self, query, app_version, manifest_app_version):
|
||||
return {
|
||||
def _call_api(self, ep, query, video_id, fatal=True,
|
||||
note='Downloading API JSON', errnote='Unable to download API page'):
|
||||
real_query = {
|
||||
**query,
|
||||
'version_name': app_version,
|
||||
'version_code': manifest_app_version,
|
||||
'build_number': app_version,
|
||||
'manifest_version_code': manifest_app_version,
|
||||
'update_version_code': manifest_app_version,
|
||||
'version_name': self._APP_VERSION,
|
||||
'version_code': self._MANIFEST_APP_VERSION,
|
||||
'build_number': self._APP_VERSION,
|
||||
'manifest_version_code': self._MANIFEST_APP_VERSION,
|
||||
'update_version_code': self._MANIFEST_APP_VERSION,
|
||||
'openudid': ''.join(random.choice('0123456789abcdef') for _ in range(16)),
|
||||
'uuid': ''.join([random.choice(string.digits) for _ in range(16)]),
|
||||
'_rticket': int(time.time() * 1000),
|
||||
@@ -85,40 +73,16 @@ class TikTokBaseIE(InfoExtractor):
|
||||
'as': 'a1qwert123',
|
||||
'cp': 'cbfhckdckkde1',
|
||||
}
|
||||
|
||||
def _call_api(self, ep, query, video_id, fatal=True,
|
||||
note='Downloading API JSON', errnote='Unable to download API page'):
|
||||
if not self._WORKING_APP_VERSION:
|
||||
app_version = self._configuration_arg('app_version', [''], ie_key=TikTokIE.ie_key())[0]
|
||||
manifest_app_version = self._configuration_arg('manifest_app_version', [''], ie_key=TikTokIE.ie_key())[0]
|
||||
if app_version and manifest_app_version:
|
||||
self._WORKING_APP_VERSION = (app_version, manifest_app_version)
|
||||
self.write_debug('Imported app version combo from extractor arguments')
|
||||
elif app_version or manifest_app_version:
|
||||
self.report_warning('Only one of the two required version params are passed as extractor arguments', only_once=True)
|
||||
|
||||
if self._WORKING_APP_VERSION:
|
||||
app_version, manifest_app_version = self._WORKING_APP_VERSION
|
||||
real_query = self._build_api_query(query, app_version, manifest_app_version)
|
||||
return self._call_api_impl(ep, real_query, manifest_app_version, video_id, fatal, note, errnote)
|
||||
|
||||
for count, (app_version, manifest_app_version) in enumerate(self._APP_VERSIONS, start=1):
|
||||
real_query = self._build_api_query(query, app_version, manifest_app_version)
|
||||
try:
|
||||
res = self._call_api_impl(ep, real_query, manifest_app_version, video_id, fatal, note, errnote)
|
||||
self._WORKING_APP_VERSION = (app_version, manifest_app_version)
|
||||
return res
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, json.JSONDecodeError) and e.cause.pos == 0:
|
||||
if count == len(self._APP_VERSIONS):
|
||||
if fatal:
|
||||
raise e
|
||||
else:
|
||||
self.report_warning(str(e.cause or e.msg))
|
||||
return
|
||||
self.report_warning('%s. Retrying... (attempt %s of %s)' % (str(e.cause or e.msg), count, len(self._APP_VERSIONS)))
|
||||
continue
|
||||
raise e
|
||||
self._set_cookie(self._API_HOSTNAME, 'odin_tt', ''.join(random.choice('0123456789abcdef') for _ in range(160)))
|
||||
webpage_cookies = self._get_cookies(self._WEBPAGE_HOST)
|
||||
if webpage_cookies.get('sid_tt'):
|
||||
self._set_cookie(self._API_HOSTNAME, 'sid_tt', webpage_cookies['sid_tt'].value)
|
||||
return self._download_json(
|
||||
'https://%s/aweme/v1/%s/' % (self._API_HOSTNAME, ep), video_id=video_id,
|
||||
fatal=fatal, note=note, errnote=errnote, headers={
|
||||
'User-Agent': f'com.ss.android.ugc.trill/{self._MANIFEST_APP_VERSION} (Linux; U; Android 10; en_US; Pixel 4; Build/QQ3A.200805.001; Cronet/58.0.2991.0)',
|
||||
'Accept': 'application/json',
|
||||
}, query=real_query)
|
||||
|
||||
def _get_subtitles(self, aweme_detail, aweme_id):
|
||||
# TODO: Extract text positioning info
|
||||
@@ -790,7 +754,8 @@ class DouyinIE(TikTokIE):
|
||||
'comment_count': int,
|
||||
}
|
||||
}]
|
||||
_APP_VERSIONS = [('9.6.0', '960')]
|
||||
_APP_VERSION = '9.6.0'
|
||||
_MANIFEST_APP_VERSION = '960'
|
||||
_APP_NAME = 'aweme'
|
||||
_AID = 1128
|
||||
_API_HOSTNAME = 'aweme.snssdk.com'
|
||||
|
||||
@@ -21,20 +21,28 @@ class TumblrIE(InfoExtractor):
|
||||
'id': '54196191430',
|
||||
'ext': 'mp4',
|
||||
'title': 'tatiana maslany news, Orphan Black || DVD extra - behind the scenes ↳...',
|
||||
'description': 'md5:390ab77358960235b6937ab3b8528956',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'duration': 127,
|
||||
'description': 'md5:37db8211e40b50c7c44e95da14f630b7',
|
||||
'thumbnail': r're:http://.*\.jpg',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://maskofthedragon.tumblr.com/post/626907179849564160/mona-talking-in-english',
|
||||
'md5': 'f43ff8a8861712b6cf0e0c2bd84cfc68',
|
||||
'url': 'http://5sostrum.tumblr.com/post/90208453769/yall-forgetting-the-greatest-keek-of-them-all',
|
||||
'md5': 'bf348ef8c0ef84fbf1cbd6fa6e000359',
|
||||
'info_dict': {
|
||||
'id': '626907179849564160',
|
||||
'id': '90208453769',
|
||||
'ext': 'mp4',
|
||||
'title': 'Me roast is buggered!, Mona\xa0“talking” in\xa0“english”',
|
||||
'description': 'md5:082a3a621530cb786ad2b7592a6d9e2c',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'duration': 7,
|
||||
'title': '5SOS STRUM ;]',
|
||||
'description': 'md5:dba62ac8639482759c8eb10ce474586a',
|
||||
'thumbnail': r're:http://.*\.jpg',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://hdvideotest.tumblr.com/post/130323439814/test-description-for-my-hd-video',
|
||||
'md5': '7ae503065ad150122dc3089f8cf1546c',
|
||||
'info_dict': {
|
||||
'id': '130323439814',
|
||||
'ext': 'mp4',
|
||||
'title': 'HD Video Testing \u2014 Test description for my HD video',
|
||||
'description': 'md5:97cc3ab5fcd27ee4af6356701541319c',
|
||||
'thumbnail': r're:http://.*\.jpg',
|
||||
},
|
||||
'params': {
|
||||
'format': 'hd',
|
||||
@@ -52,20 +60,16 @@ class TumblrIE(InfoExtractor):
|
||||
'uploader_id': '1638622',
|
||||
'uploader': 'naked-yogi',
|
||||
},
|
||||
# 'add_ie': ['Vidme'],
|
||||
'skip': 'dead embedded video host'
|
||||
'add_ie': ['Vidme'],
|
||||
}, {
|
||||
'url': 'https://dominustempori.tumblr.com/post/673572712813297664/youtubes-all-right-for-some-pretty-cool',
|
||||
'md5': '5e45724c70b748f64f5a1731ac72c84a',
|
||||
'url': 'http://camdamage.tumblr.com/post/98846056295/',
|
||||
'md5': 'a9e0c8371ea1ca306d6554e3fecf50b6',
|
||||
'info_dict': {
|
||||
'id': '87816359',
|
||||
'id': '105463834',
|
||||
'ext': 'mp4',
|
||||
'title': 'Harold Ramis',
|
||||
'uploader': 'Resolution Productions Group',
|
||||
'uploader_id': 'resolutionproductions',
|
||||
'uploader_url': 'https://vimeo.com/resolutionproductions',
|
||||
'thumbnail': r're:^https?://i.vimeocdn.com/video/.*',
|
||||
'duration': 291,
|
||||
'title': 'Cam Damage-HD 720p',
|
||||
'uploader': 'John Moyer',
|
||||
'uploader_id': 'user32021558',
|
||||
},
|
||||
'add_ie': ['Vimeo'],
|
||||
}, {
|
||||
@@ -82,27 +86,18 @@ class TumblrIE(InfoExtractor):
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'repost_count': int,
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'timestamp': 1455940159,
|
||||
'view_count': int,
|
||||
},
|
||||
'add_ie': ['Vine'],
|
||||
}, {
|
||||
'url': 'https://silami.tumblr.com/post/84250043974/my-bad-river-flows-in-you-impression-on-maschine',
|
||||
'md5': '3c92d7c3d867f14ccbeefa2119022277',
|
||||
'url': 'http://vitasidorkina.tumblr.com/post/134652425014/joskriver-victoriassecret-invisibility-or',
|
||||
'md5': '01c12ceb82cbf6b2fe0703aa56b3ad72',
|
||||
'info_dict': {
|
||||
'id': 'nYtvtTPuTl',
|
||||
'id': '-7LnUPGlSo',
|
||||
'ext': 'mp4',
|
||||
'title': 'Video by silbulterman',
|
||||
'description': '#maschine',
|
||||
'uploader_id': '242859024',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'timestamp': 1398801174,
|
||||
'like_count': int,
|
||||
'uploader': 'Sil',
|
||||
'channel': 'silbulterman',
|
||||
'comment_count': int,
|
||||
'upload_date': '20140429',
|
||||
'title': 'Video by victoriassecret',
|
||||
'description': 'Invisibility or flight…which superpower would YOU choose? #VSFashionShow #ThisOrThat',
|
||||
'uploader_id': 'victoriassecret',
|
||||
'thumbnail': r're:^https?://.*\.jpg'
|
||||
},
|
||||
'add_ie': ['Instagram'],
|
||||
}]
|
||||
@@ -166,14 +161,9 @@ class TumblrIE(InfoExtractor):
|
||||
r'src=\'(https?://www\.tumblr\.com/video/[^\']+)\'',
|
||||
webpage, 'iframe url', default=None)
|
||||
if iframe_url is None:
|
||||
iframe_url = self._search_regex(
|
||||
r'src=["\'](https?://safe\.txmblr\.com/svc/embed/inline/[^"\']+)["\']',
|
||||
webpage, 'embed iframe url', default=None)
|
||||
return self.url_result(iframe_url or redirect_url, 'Generic')
|
||||
return self.url_result(redirect_url, 'Generic')
|
||||
|
||||
iframe = self._download_webpage(
|
||||
iframe_url, video_id, 'Downloading iframe page',
|
||||
headers={'Referer': redirect_url})
|
||||
iframe = self._download_webpage(iframe_url, video_id, 'Downloading iframe page')
|
||||
|
||||
duration = None
|
||||
sources = []
|
||||
|
||||
@@ -85,7 +85,7 @@ class TwitCastingIE(InfoExtractor):
|
||||
if video_password:
|
||||
request_data = urlencode_postdata({
|
||||
'password': video_password,
|
||||
}, encoding='utf-8')
|
||||
})
|
||||
webpage = self._download_webpage(
|
||||
url, video_id, data=request_data,
|
||||
headers={'Origin': 'https://twitcasting.tv'})
|
||||
|
||||
@@ -19,7 +19,7 @@ class VikiBaseIE(InfoExtractor):
|
||||
_VALID_URL_BASE = r'https?://(?:www\.)?viki\.(?:com|net|mx|jp|fr)/'
|
||||
_API_URL_TEMPLATE = 'https://api.viki.io%s'
|
||||
|
||||
_DEVICE_ID = '112395910d'
|
||||
_DEVICE_ID = '86085977d' # used for android api
|
||||
_APP = '100005a'
|
||||
_APP_VERSION = '6.11.3'
|
||||
_APP_SECRET = 'd96704b180208dbb2efa30fe44c48bd8690441af9f567ba8fd710a72badc85198f7472'
|
||||
@@ -253,7 +253,7 @@ class VikiIE(VikiBaseIE):
|
||||
} for thumbnail_id, thumbnail in (video.get('images') or {}).items() if thumbnail.get('url')]
|
||||
|
||||
resp = self._call_api(
|
||||
'playback_streams/%s.json?drms=dt3&device_id=%s' % (video_id, self._DEVICE_ID),
|
||||
'playback_streams/%s.json?drms=dt1,dt2&device_id=%s' % (video_id, self._DEVICE_ID),
|
||||
video_id, 'Downloading video streams JSON')['main'][0]
|
||||
|
||||
stream_id = try_get(resp, lambda x: x['properties']['track']['stream_id'])
|
||||
@@ -264,13 +264,10 @@ class VikiIE(VikiBaseIE):
|
||||
} for ext in ('srt', 'vtt')]) for lang in (video.get('subtitle_completions') or {}).keys())
|
||||
|
||||
mpd_url = resp['url']
|
||||
# 720p is hidden in another MPD which can be found in the current manifest content
|
||||
# 1080p is hidden in another mpd which can be found in the current manifest content
|
||||
mpd_content = self._download_webpage(mpd_url, video_id, note='Downloading initial MPD manifest')
|
||||
mpd_url = self._search_regex(
|
||||
r'(?mi)<BaseURL>(http.+.mpd)', mpd_content, 'new manifest', default=mpd_url)
|
||||
if 'mpdhd_high' not in mpd_url:
|
||||
# Modify the URL to get 1080p
|
||||
mpd_url = mpd_url.replace('mpdhd', 'mpdhd_high')
|
||||
formats = self._extract_mpd_formats(mpd_url, video_id)
|
||||
self._sort_formats(formats)
|
||||
|
||||
|
||||
@@ -3,8 +3,7 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class VimmIE(InfoExtractor):
|
||||
IE_NAME = 'Vimm:stream'
|
||||
_VALID_URL = r'https?://(?:www\.)?vimm\.tv/(?:c/)?(?P<id>[0-9a-z-]+)$'
|
||||
_VALID_URL = r'https?://(?:www\.)?vimm\.tv/c/(?P<id>[0-9a-z-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.vimm.tv/c/calimeatwagon',
|
||||
'info_dict': {
|
||||
@@ -14,9 +13,6 @@ class VimmIE(InfoExtractor):
|
||||
'live_status': 'is_live',
|
||||
},
|
||||
'skip': 'Live',
|
||||
}, {
|
||||
'url': 'https://www.vimm.tv/octaafradio',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -33,37 +29,3 @@ class VimmIE(InfoExtractor):
|
||||
'formats': formats,
|
||||
'subtitles': subs,
|
||||
}
|
||||
|
||||
|
||||
class VimmRecordingIE(InfoExtractor):
|
||||
IE_NAME = 'Vimm:recording'
|
||||
_VALID_URL = r'https?://(?:www\.)?vimm\.tv/c/(?P<channel_id>[0-9a-z-]+)\?v=(?P<video_id>[0-9A-Za-z]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.vimm.tv/c/kaldewei?v=2JZsrPTFxsSz',
|
||||
'md5': '15122ee95baa32a548e4a3e120b598f1',
|
||||
'info_dict': {
|
||||
'id': '2JZsrPTFxsSz',
|
||||
'ext': 'mp4',
|
||||
'title': 'VIMM - [DE/GER] Kaldewei Live - In Farbe und Bunt',
|
||||
'uploader_id': 'kaldewei',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_id, video_id = self._match_valid_url(url).groups()
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
title = self._og_search_title(webpage)
|
||||
|
||||
formats, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
f'https://d211qfrkztakg3.cloudfront.net/{channel_id}/{video_id}/index.m3u8', video_id, 'mp4', m3u8_id='hls', live=False)
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'is_live': False,
|
||||
'uploader_id': channel_id,
|
||||
'formats': formats,
|
||||
'subtitles': subs,
|
||||
}
|
||||
|
||||
@@ -287,7 +287,8 @@ class ViuOTTIE(InfoExtractor):
|
||||
raise ExtractorError('This video is not available in your region.', expected=True)
|
||||
|
||||
series_id = video_data.get('series_id')
|
||||
if self._yes_playlist(series_id, video_id, idata):
|
||||
if not self.get_param('noplaylist') and not idata.get('force_noplaylist'):
|
||||
self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % series_id)
|
||||
series = product_data.get('series', {})
|
||||
product = series.get('product')
|
||||
if product:
|
||||
@@ -307,6 +308,9 @@ class ViuOTTIE(InfoExtractor):
|
||||
|
||||
return self.playlist_result(entries, series_id, series.get('name'), series.get('description'))
|
||||
|
||||
if self.get_param('noplaylist'):
|
||||
self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
|
||||
|
||||
duration_limit = False
|
||||
query = {
|
||||
'ccs_product_id': video_data['ccs_product_id'],
|
||||
|
||||
@@ -146,24 +146,30 @@ class VLiveIE(VLiveBaseIE):
|
||||
'post/v1.0/officialVideoPost-%s', video_id,
|
||||
'author{nickname},channel{channelCode,channelName},officialVideo{commentCount,exposeStatus,likeCount,playCount,playTime,status,title,type,vodId},playlist{playlistSeq,totalCount,name}')
|
||||
|
||||
playlist_id = str_or_none(try_get(post, lambda x: x['playlist']['playlistSeq']))
|
||||
if not self._yes_playlist(playlist_id, video_id):
|
||||
playlist = post.get('playlist')
|
||||
if not playlist or self.get_param('noplaylist'):
|
||||
if playlist:
|
||||
self.to_screen(
|
||||
'Downloading just video %s because of --no-playlist'
|
||||
% video_id)
|
||||
|
||||
video = post['officialVideo']
|
||||
return self._get_vlive_info(post, video, video_id)
|
||||
else:
|
||||
playlist_name = playlist.get('name')
|
||||
playlist_id = str_or_none(playlist.get('playlistSeq'))
|
||||
playlist_count = str_or_none(playlist.get('totalCount'))
|
||||
|
||||
playlist_name = str_or_none(try_get(post, lambda x: x['playlist']['name']))
|
||||
playlist_count = str_or_none(try_get(post, lambda x: x['playlist']['totalCount']))
|
||||
playlist = self._call_api(
|
||||
'playlist/v1.0/playlist-%s/posts', playlist_id, 'data', {'limit': playlist_count})
|
||||
|
||||
playlist = self._call_api(
|
||||
'playlist/v1.0/playlist-%s/posts', playlist_id, 'data', {'limit': playlist_count})
|
||||
entries = []
|
||||
for video_data in playlist['data']:
|
||||
video = video_data.get('officialVideo')
|
||||
video_id = str_or_none(video.get('videoSeq'))
|
||||
entries.append(self._get_vlive_info(video_data, video, video_id))
|
||||
|
||||
entries = []
|
||||
for video_data in playlist['data']:
|
||||
video = video_data.get('officialVideo')
|
||||
video_id = str_or_none(video.get('videoSeq'))
|
||||
entries.append(self._get_vlive_info(video_data, video, video_id))
|
||||
|
||||
return self.playlist_result(entries, playlist_id, playlist_name)
|
||||
return self.playlist_result(entries, playlist_id, playlist_name)
|
||||
|
||||
def _get_vlive_info(self, post, video, video_id):
|
||||
def get_common_fields():
|
||||
|
||||
@@ -11,7 +11,6 @@ from ..utils import (
|
||||
int_or_none,
|
||||
try_get,
|
||||
url_or_none,
|
||||
lowercase_escape,
|
||||
)
|
||||
|
||||
|
||||
@@ -149,45 +148,6 @@ class YandexVideoIE(InfoExtractor):
|
||||
}
|
||||
|
||||
|
||||
class YandexVideoPreviewIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?yandex\.ru/video/preview(?:/?\?.*?filmId=|/)(?P<id>\d+)'
|
||||
_TESTS = [{ # Odnoklassniki
|
||||
'url': 'https://yandex.ru/video/preview/?filmId=10682852472978372885&text=summer',
|
||||
'info_dict': {
|
||||
'id': '1352565459459',
|
||||
'ext': 'mp4',
|
||||
'like_count': int,
|
||||
'upload_date': '20191202',
|
||||
'age_limit': 0,
|
||||
'duration': 196,
|
||||
'thumbnail': 'https://i.mycdn.me/videoPreview?id=544866765315&type=37&idx=13&tkn=TY5qjLYZHxpmcnK8U2LgzYkgmaU&fn=external_8',
|
||||
'uploader_id': '481054701571',
|
||||
'title': 'LOFT - summer, summer, summer HD',
|
||||
'manifest_stream_number': 0,
|
||||
'uploader': 'АРТЁМ КУДРОВ',
|
||||
},
|
||||
}, { # youtube
|
||||
'url': 'https://yandex.ru/video/preview/?filmId=4479424425337895262&source=main_redirect&text=видео&utm_source=main_stripe_big',
|
||||
'only_matching': True,
|
||||
}, { # YandexVideo
|
||||
'url': 'https://yandex.ru/video/preview/5275069442094787341',
|
||||
'only_matching': True,
|
||||
}, { # youtube
|
||||
'url': 'https://yandex.ru/video/preview/?filmId=16658118429797832897&from=tabbar&p=1&text=%D0%BF%D1%80%D0%BE%D1%81%D0%BC%D0%BE%D1%82%D1%80+%D1%84%D1%80%D0%B0%D0%B3%D0%BC%D0%B5%D0%BD%D1%82%D0%B0+%D0%BC%D0%B0%D0%BB%D0%B5%D0%BD%D1%8C%D0%BA%D0%B8%D0%B9+%D0%BF%D1%80%D0%B8%D0%BD%D1%86+%D0%BC%D1%8B+%D0%B2+%D0%BE%D1%82%D0%B2%D0%B5%D1%82%D0%B5+%D0%B7%D0%B0+%D1%82%D0%B5%D1%85+%D0%BA%D0%BE%D0%B3%D0%BE+%D0%BF%D1%80%D0%B8%D1%80%D1%83%D1%87%D0%B8%D0%BB%D0%B8',
|
||||
'only_matching': True,
|
||||
}, { # Odnoklassniki
|
||||
'url': 'https://yandex.ru/video/preview/?text=Francis%20Lai%20-%20Le%20Bon%20Et%20Les%20MC)chants&path=wizard&parent-reqid=1643208087979310-1481782809207673478-sas3-0931-2f9-sas-l7-balancer-8080-BAL-9380&wiz_type=vital&filmId=12508152936505397283',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, id)
|
||||
data_raw = self._search_regex(r'window.Ya.__inline_params__\s*=\s*JSON.parse\(\'([^"]+?\\u0022video\\u0022:[^"]+?})\'\);', webpage, 'data_raw')
|
||||
data_json = self._parse_json(data_raw, id, transform_source=lowercase_escape)
|
||||
return self.url_result(data_json['video']['url'])
|
||||
|
||||
|
||||
class ZenYandexIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://zen\.yandex\.ru(?:/video)?/(media|watch)/(?:(?:id/[^/]+/|[^/]+/)(?:[a-z0-9-]+)-)?(?P<id>[a-z0-9-]+)'
|
||||
_TESTS = [{
|
||||
|
||||
@@ -42,7 +42,6 @@ from ..utils import (
|
||||
int_or_none,
|
||||
is_html,
|
||||
join_nonempty,
|
||||
js_to_json,
|
||||
mimetype2ext,
|
||||
network_exceptions,
|
||||
NO_DEFAULT,
|
||||
@@ -258,7 +257,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
|
||||
_RESERVED_NAMES = (
|
||||
r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'
|
||||
r'shorts|movies|results|search|shared|hashtag|trending|explore|feed|feeds|'
|
||||
r'shorts|movies|results|shared|hashtag|trending|explore|feed|feeds|'
|
||||
r'browse|oembed|get_video_info|iframe_api|s/player|'
|
||||
r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
|
||||
|
||||
@@ -761,15 +760,13 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, network_exceptions):
|
||||
if isinstance(e.cause, compat_HTTPError):
|
||||
first_bytes = e.cause.read(512)
|
||||
if not is_html(first_bytes):
|
||||
yt_error = try_get(
|
||||
self._parse_json(
|
||||
self._webpage_read_content(e.cause, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),
|
||||
lambda x: x['error']['message'], compat_str)
|
||||
if yt_error:
|
||||
self._report_alerts([('ERROR', yt_error)], fatal=False)
|
||||
if isinstance(e.cause, compat_HTTPError) and not is_html(e.cause.read(512)):
|
||||
e.cause.seek(0)
|
||||
yt_error = try_get(
|
||||
self._parse_json(e.cause.read().decode(), item_id, fatal=False),
|
||||
lambda x: x['error']['message'], compat_str)
|
||||
if yt_error:
|
||||
self._report_alerts([('ERROR', yt_error)], fatal=False)
|
||||
# Downloading page may result in intermittent 5xx HTTP error
|
||||
# Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
|
||||
# We also want to catch all other network exceptions since errors in later pages can be troublesome
|
||||
@@ -2421,14 +2418,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
raise ExtractorError(traceback.format_exc(), cause=e, video_id=video_id)
|
||||
|
||||
def _extract_n_function_name(self, jscode):
|
||||
nfunc, idx = self._search_regex(
|
||||
r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z0-9$]{3})(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]\)',
|
||||
jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))
|
||||
if not idx:
|
||||
return nfunc
|
||||
return json.loads(js_to_json(self._search_regex(
|
||||
rf'var {nfunc}\s*=\s*(\[.+?\]);', jscode,
|
||||
f'Initial JS player n function list ({nfunc}.{idx})')))[int(idx)]
|
||||
return self._search_regex(
|
||||
(r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z0-9$]{3})\([a-zA-Z0-9]\)',),
|
||||
jscode, 'Initial JS player n function name', group='nfunc')
|
||||
|
||||
def _extract_n_function(self, video_id, player_url):
|
||||
player_id = self._extract_player_info(player_url)
|
||||
@@ -3601,26 +3593,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
|
||||
class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
|
||||
|
||||
@staticmethod
|
||||
def passthrough_smuggled_data(func):
|
||||
def _smuggle(entries, smuggled_data):
|
||||
for entry in entries:
|
||||
# TODO: Convert URL to music.youtube instead.
|
||||
# Do we need to passthrough any other smuggled_data?
|
||||
entry['url'] = smuggle_url(entry['url'], smuggled_data)
|
||||
yield entry
|
||||
|
||||
@functools.wraps(func)
|
||||
def wrapper(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
if self.is_music_url(url):
|
||||
smuggled_data['is_music_url'] = True
|
||||
info_dict = func(self, url, smuggled_data)
|
||||
if smuggled_data and info_dict.get('entries'):
|
||||
info_dict['entries'] = _smuggle(info_dict['entries'], smuggled_data)
|
||||
return info_dict
|
||||
return wrapper
|
||||
|
||||
def _extract_channel_id(self, webpage):
|
||||
channel_id = self._html_search_meta(
|
||||
'channelId', webpage, 'channel id', default=None)
|
||||
@@ -3688,24 +3660,6 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
|
||||
ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
|
||||
break
|
||||
|
||||
def _music_reponsive_list_entry(self, renderer):
|
||||
video_id = traverse_obj(renderer, ('playlistItemData', 'videoId'))
|
||||
if video_id:
|
||||
return self.url_result(f'https://music.youtube.com/watch?v={video_id}',
|
||||
ie=YoutubeIE.ie_key(), video_id=video_id)
|
||||
playlist_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'playlistId'))
|
||||
if playlist_id:
|
||||
video_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'videoId'))
|
||||
if video_id:
|
||||
return self.url_result(f'https://music.youtube.com/watch?v={video_id}&list={playlist_id}',
|
||||
ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
|
||||
return self.url_result(f'https://music.youtube.com/playlist?list={playlist_id}',
|
||||
ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
|
||||
browse_id = traverse_obj(renderer, ('navigationEndpoint', 'browseEndpoint', 'browseId'))
|
||||
if browse_id:
|
||||
return self.url_result(f'https://music.youtube.com/browse/{browse_id}',
|
||||
ie=YoutubeTabIE.ie_key(), video_id=browse_id)
|
||||
|
||||
def _shelf_entries_from_content(self, shelf_renderer):
|
||||
content = shelf_renderer.get('content')
|
||||
if not isinstance(content, dict):
|
||||
@@ -3827,9 +3781,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
|
||||
for content in contents:
|
||||
if not isinstance(content, dict):
|
||||
continue
|
||||
is_renderer = traverse_obj(
|
||||
content, 'itemSectionRenderer', 'musicShelfRenderer', 'musicShelfContinuation',
|
||||
expected_type=dict)
|
||||
is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
|
||||
if not is_renderer:
|
||||
renderer = content.get('richItemRenderer')
|
||||
if renderer:
|
||||
@@ -3846,7 +3798,6 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
|
||||
'playlistVideoListRenderer': self._playlist_entries,
|
||||
'gridRenderer': self._grid_entries,
|
||||
'shelfRenderer': lambda x: self._shelf_entries(x),
|
||||
'musicResponsiveListItemRenderer': lambda x: [self._music_reponsive_list_entry(x)],
|
||||
'backstagePostThreadRenderer': self._post_thread_entries,
|
||||
'videoRenderer': lambda x: [self._video_entry(x)],
|
||||
'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),
|
||||
@@ -4271,32 +4222,33 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
|
||||
raise ExtractorError(err_note, expected=True)
|
||||
self.report_warning(err_note, item_id)
|
||||
|
||||
@staticmethod
|
||||
def _smuggle_data(entries, data):
|
||||
for entry in entries:
|
||||
if data:
|
||||
entry['url'] = smuggle_url(entry['url'], data)
|
||||
yield entry
|
||||
|
||||
_SEARCH_PARAMS = None
|
||||
|
||||
def _search_results(self, query, params=NO_DEFAULT, default_client='web'):
|
||||
def _search_results(self, query, params=NO_DEFAULT):
|
||||
data = {'query': query}
|
||||
if params is NO_DEFAULT:
|
||||
params = self._SEARCH_PARAMS
|
||||
if params:
|
||||
data['params'] = params
|
||||
|
||||
content_keys = (
|
||||
('contents', 'twoColumnSearchResultsRenderer', 'primaryContents', 'sectionListRenderer', 'contents'),
|
||||
('onResponseReceivedCommands', 0, 'appendContinuationItemsAction', 'continuationItems'),
|
||||
# ytmusic search
|
||||
('contents', 'tabbedSearchResultsRenderer', 'tabs', 0, 'tabRenderer', 'content', 'sectionListRenderer', 'contents'),
|
||||
('continuationContents', ),
|
||||
)
|
||||
check_get_keys = tuple(set(keys[0] for keys in content_keys))
|
||||
|
||||
continuation_list = [None]
|
||||
for page_num in itertools.count(1):
|
||||
data.update(continuation_list[0] or {})
|
||||
search = self._extract_response(
|
||||
item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
|
||||
default_client=default_client, check_get_keys=check_get_keys)
|
||||
slr_contents = traverse_obj(search, *content_keys)
|
||||
yield from self._extract_entries({'contents': list(variadic(slr_contents))}, continuation_list)
|
||||
check_get_keys=('contents', 'onResponseReceivedCommands'))
|
||||
slr_contents = try_get(
|
||||
search,
|
||||
(lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
|
||||
lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
|
||||
list)
|
||||
yield from self._extract_entries({'contents': slr_contents}, continuation_list)
|
||||
if not continuation_list[0]:
|
||||
break
|
||||
|
||||
@@ -4973,10 +4925,18 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||
return False if YoutubeIE.suitable(url) else super(
|
||||
YoutubeTabIE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
if self.is_music_url(url):
|
||||
smuggled_data['is_music_url'] = True
|
||||
info_dict = self.__real_extract(url, smuggled_data)
|
||||
if info_dict.get('entries'):
|
||||
info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
|
||||
return info_dict
|
||||
|
||||
_URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(not_channel)|(?P<tab>/\w+))?(?P<post>.*)$')
|
||||
|
||||
@YoutubeTabBaseInfoExtractor.passthrough_smuggled_data
|
||||
def _real_extract(self, url, smuggled_data):
|
||||
def __real_extract(self, url, smuggled_data):
|
||||
item_id = self._match_id(url)
|
||||
url = compat_urlparse.urlunparse(
|
||||
compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
|
||||
@@ -5311,14 +5271,7 @@ class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
|
||||
IE_NAME = 'youtube:search'
|
||||
_SEARCH_KEY = 'ytsearch'
|
||||
_SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only
|
||||
_TESTS = [{
|
||||
'url': 'ytsearch5:youtube-dl test video',
|
||||
'playlist_count': 5,
|
||||
'info_dict': {
|
||||
'id': 'youtube-dl test video',
|
||||
'title': 'youtube-dl test video',
|
||||
}
|
||||
}]
|
||||
_TESTS = []
|
||||
|
||||
|
||||
class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
|
||||
@@ -5326,20 +5279,12 @@ class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
|
||||
_SEARCH_KEY = 'ytsearchdate'
|
||||
IE_DESC = 'YouTube search, newest videos first'
|
||||
_SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date
|
||||
_TESTS = [{
|
||||
'url': 'ytsearchdate5:youtube-dl test video',
|
||||
'playlist_count': 5,
|
||||
'info_dict': {
|
||||
'id': 'youtube-dl test video',
|
||||
'title': 'youtube-dl test video',
|
||||
}
|
||||
}]
|
||||
|
||||
|
||||
class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):
|
||||
IE_DESC = 'YouTube search URLs with sorting and filter support'
|
||||
IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
|
||||
_VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:results|search)\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
|
||||
_VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
|
||||
'playlist_mincount': 5,
|
||||
@@ -5366,60 +5311,7 @@ class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):
|
||||
return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)
|
||||
|
||||
|
||||
class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor):
|
||||
IE_DESC = 'YouTube music search URLs with selectable sections (Eg: #songs)'
|
||||
IE_NAME = 'youtube:music:search_url'
|
||||
_VALID_URL = r'https?://music\.youtube\.com/search\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'https://music.youtube.com/search?q=royalty+free+music',
|
||||
'playlist_count': 16,
|
||||
'info_dict': {
|
||||
'id': 'royalty free music',
|
||||
'title': 'royalty free music',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://music.youtube.com/search?q=royalty+free+music&sp=EgWKAQIIAWoKEAoQAxAEEAkQBQ%3D%3D',
|
||||
'playlist_mincount': 30,
|
||||
'info_dict': {
|
||||
'id': 'royalty free music - songs',
|
||||
'title': 'royalty free music - songs',
|
||||
},
|
||||
'params': {'extract_flat': 'in_playlist'}
|
||||
}, {
|
||||
'url': 'https://music.youtube.com/search?q=royalty+free+music#community+playlists',
|
||||
'playlist_mincount': 30,
|
||||
'info_dict': {
|
||||
'id': 'royalty free music - community playlists',
|
||||
'title': 'royalty free music - community playlists',
|
||||
},
|
||||
'params': {'extract_flat': 'in_playlist'}
|
||||
}]
|
||||
|
||||
_SECTIONS = {
|
||||
'albums': 'EgWKAQIYAWoKEAoQAxAEEAkQBQ==',
|
||||
'artists': 'EgWKAQIgAWoKEAoQAxAEEAkQBQ==',
|
||||
'community playlists': 'EgeKAQQoAEABagoQChADEAQQCRAF',
|
||||
'featured playlists': 'EgeKAQQoADgBagwQAxAJEAQQDhAKEAU==',
|
||||
'songs': 'EgWKAQIIAWoKEAoQAxAEEAkQBQ==',
|
||||
'videos': 'EgWKAQIQAWoKEAoQAxAEEAkQBQ==',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
qs = parse_qs(url)
|
||||
query = (qs.get('search_query') or qs.get('q'))[0]
|
||||
params = qs.get('sp', (None,))[0]
|
||||
if params:
|
||||
section = next((k for k, v in self._SECTIONS.items() if v == params), params)
|
||||
else:
|
||||
section = compat_urllib_parse_unquote_plus((url.split('#') + [''])[1]).lower()
|
||||
params = self._SECTIONS.get(section)
|
||||
if not params:
|
||||
section = None
|
||||
title = join_nonempty(query, section, delim=' - ')
|
||||
return self.playlist_result(self._search_results(query, params, default_client='web_music'), title, title)
|
||||
|
||||
|
||||
class YoutubeFeedsInfoExtractor(InfoExtractor):
|
||||
class YoutubeFeedsInfoExtractor(YoutubeTabIE):
|
||||
"""
|
||||
Base class for feed extractors
|
||||
Subclasses must define the _FEED_NAME property.
|
||||
@@ -5433,7 +5325,8 @@ class YoutubeFeedsInfoExtractor(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
return self.url_result(
|
||||
f'https://www.youtube.com/feed/{self._FEED_NAME}', ie=YoutubeTabIE.ie_key())
|
||||
'https://www.youtube.com/feed/%s' % self._FEED_NAME,
|
||||
ie=YoutubeTabIE.ie_key())
|
||||
|
||||
|
||||
class YoutubeWatchLaterIE(InfoExtractor):
|
||||
|
||||
@@ -173,16 +173,11 @@ def create_parser():
|
||||
process_key=str.lower, append=False):
|
||||
|
||||
out_dict = dict(getattr(parser.values, option.dest))
|
||||
multiple_args = not isinstance(value, str)
|
||||
if multiple_keys:
|
||||
allowed_keys = r'(%s)(,(%s))*' % (allowed_keys, allowed_keys)
|
||||
mobj = re.match(
|
||||
r'(?i)(?P<keys>%s)%s(?P<val>.*)$' % (allowed_keys, delimiter),
|
||||
value[0] if multiple_args else value)
|
||||
mobj = re.match(r'(?i)(?P<keys>%s)%s(?P<val>.*)$' % (allowed_keys, delimiter), value)
|
||||
if mobj is not None:
|
||||
keys, val = mobj.group('keys').split(','), mobj.group('val')
|
||||
if multiple_args:
|
||||
val = [val, *value[1:]]
|
||||
elif default_key is not None:
|
||||
keys, val = [default_key], value
|
||||
else:
|
||||
@@ -345,7 +340,7 @@ def create_parser():
|
||||
help=(
|
||||
'Use the specified HTTP/HTTPS/SOCKS proxy. To enable '
|
||||
'SOCKS proxy, specify a proper scheme. For example '
|
||||
'socks5://user:pass@127.0.0.1:1080/. Pass in an empty string (--proxy "") '
|
||||
'socks5://127.0.0.1:1080/. Pass in an empty string (--proxy "") '
|
||||
'for direct connection'))
|
||||
network.add_option(
|
||||
'--socket-timeout',
|
||||
@@ -381,10 +376,10 @@ def create_parser():
|
||||
geo.add_option(
|
||||
'--geo-bypass',
|
||||
action='store_true', dest='geo_bypass', default=True,
|
||||
help='Bypass geographic restriction via faking X-Forwarded-For HTTP header (default)')
|
||||
help='Bypass geographic restriction via faking X-Forwarded-For HTTP header')
|
||||
geo.add_option(
|
||||
'--no-geo-bypass',
|
||||
action='store_false', dest='geo_bypass',
|
||||
action='store_false', dest='geo_bypass', default=True,
|
||||
help='Do not bypass geographic restriction via faking X-Forwarded-For HTTP header')
|
||||
geo.add_option(
|
||||
'--geo-bypass-country', metavar='CODE',
|
||||
@@ -928,18 +923,6 @@ def create_parser():
|
||||
'Field name or output template to print to screen, optionally prefixed with when to print it, separated by a ":". '
|
||||
'Supported values of "WHEN" are the same as that of --use-postprocessor, and "video" (default). '
|
||||
'Implies --quiet and --simulate (unless --no-simulate is used). This option can be used multiple times'))
|
||||
verbosity.add_option(
|
||||
'--print-to-file',
|
||||
metavar='[WHEN:]TEMPLATE FILE', dest='print_to_file', default={}, type='str', nargs=2,
|
||||
action='callback', callback=_dict_from_options_callback,
|
||||
callback_kwargs={
|
||||
'allowed_keys': 'video|' + '|'.join(map(re.escape, POSTPROCESS_WHEN)),
|
||||
'default_key': 'video',
|
||||
'multiple_keys': False,
|
||||
'append': True,
|
||||
}, help=(
|
||||
'Append given template to the file. The values of WHEN and TEMPLATE are same as that of --print. '
|
||||
'FILE uses the same syntax as the output template. This option can be used multiple times'))
|
||||
verbosity.add_option(
|
||||
'-g', '--get-url',
|
||||
action='store_true', dest='geturl', default=False,
|
||||
|
||||
@@ -1149,19 +1149,20 @@ class FFmpegConcatPP(FFmpegPostProcessor):
|
||||
|
||||
@PostProcessor._restrict_to(images=False)
|
||||
def run(self, info):
|
||||
entries = info.get('entries') or []
|
||||
if (self.get_param('skip_download') or not any(entries)
|
||||
or self._only_multi_video and info['_type'] != 'multi_video'):
|
||||
if not info.get('entries') or self._only_multi_video and info['_type'] != 'multi_video':
|
||||
return [], info
|
||||
elif any(len(entry) > 1 for entry in traverse_obj(entries, (..., 'requested_downloads')) or []):
|
||||
elif None in info['entries']:
|
||||
raise PostProcessingError('Aborting concatenation because some downloads failed')
|
||||
elif any(len(entry) > 1 for entry in traverse_obj(info, ('entries', ..., 'requested_downloads')) or []):
|
||||
raise PostProcessingError('Concatenation is not supported when downloading multiple separate formats')
|
||||
|
||||
in_files = traverse_obj(entries, (..., 'requested_downloads', 0, 'filepath'))
|
||||
if len(in_files) < len(entries):
|
||||
raise PostProcessingError('Aborting concatenation because some downloads failed')
|
||||
in_files = traverse_obj(info, ('entries', ..., 'requested_downloads', 0, 'filepath'))
|
||||
if not in_files:
|
||||
self.to_screen('There are no files to concatenate')
|
||||
return [], info
|
||||
|
||||
ie_copy = self._downloader._playlist_infodict(info)
|
||||
exts = traverse_obj(entries, (..., 'requested_downloads', 0, 'ext'), (..., 'ext'))
|
||||
exts = [traverse_obj(entry, ('requested_downloads', 0, 'ext'), 'ext') for entry in info['entries']]
|
||||
ie_copy['ext'] = exts[0] if len(set(exts)) == 1 else 'mkv'
|
||||
out_file = self._downloader.prepare_filename(ie_copy, 'pl_video')
|
||||
|
||||
|
||||
@@ -66,7 +66,7 @@ class MetadataParserPP(PostProcessor):
|
||||
self.write_debug(f'Searching for {out_re.pattern!r} in {template!r}')
|
||||
match = out_re.search(data_to_parse)
|
||||
if match is None:
|
||||
self.to_screen(f'Could not interpret {inp!r} as {out!r}')
|
||||
self.report_warning(f'Could not interpret {inp!r} as {out!r}')
|
||||
return
|
||||
for attribute, value in match.groupdict().items():
|
||||
info[attribute] = value
|
||||
@@ -80,7 +80,7 @@ class MetadataParserPP(PostProcessor):
|
||||
def f(info):
|
||||
val = info.get(field)
|
||||
if val is None:
|
||||
self.to_screen(f'Video does not have a {field}')
|
||||
self.report_warning(f'Video does not have a {field}')
|
||||
return
|
||||
elif not isinstance(val, str):
|
||||
self.report_warning(f'Cannot replace in field {field} since it is a {type(val).__name__}')
|
||||
|
||||
@@ -54,7 +54,7 @@ _NON_UPDATEABLE_REASONS = {
|
||||
'win_dir': 'Auto-update is not supported for unpackaged windows executable; Re-download the latest release',
|
||||
'mac_dir': 'Auto-update is not supported for unpackaged MacOS executable; Re-download the latest release',
|
||||
'source': 'You cannot update when running from source code; Use git to pull the latest changes',
|
||||
'unknown': 'It looks like you installed yt-dlp with a package manager, pip or setup.py; Use that to update',
|
||||
'unknown': 'It looks like you installed yt-dlp with a package manager, pip, setup.py or a tarball; Use that to update',
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -639,9 +639,10 @@ def clean_html(html):
|
||||
if html is None: # Convenience for sanitizing descriptions etc.
|
||||
return html
|
||||
|
||||
html = re.sub(r'\s+', ' ', html)
|
||||
html = re.sub(r'(?u)\s?<\s?br\s?/?\s?>\s?', '\n', html)
|
||||
html = re.sub(r'(?u)<\s?/\s?p\s?>\s?<\s?p[^>]*>', '\n', html)
|
||||
# Newline vs <br />
|
||||
html = html.replace('\n', ' ')
|
||||
html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
|
||||
html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
|
||||
# Strip html tags
|
||||
html = re.sub('<.*?>', '', html)
|
||||
# Replace html entities
|
||||
@@ -1018,9 +1019,13 @@ def make_HTTPS_handler(params, **kwargs):
|
||||
|
||||
|
||||
def bug_reports_message(before=';'):
|
||||
msg = ('please report this issue on https://github.com/yt-dlp/yt-dlp , '
|
||||
'filling out the "Broken site" issue template properly. '
|
||||
'Confirm you are on the latest version using -U')
|
||||
if ytdl_is_updateable():
|
||||
update_cmd = 'type yt-dlp -U to update'
|
||||
else:
|
||||
update_cmd = 'see https://github.com/yt-dlp/yt-dlp on how to update'
|
||||
msg = 'please report this issue on https://github.com/yt-dlp/yt-dlp .'
|
||||
msg += ' Make sure you are using the latest version; %s.' % update_cmd
|
||||
msg += ' Be sure to call yt-dlp with the --verbose flag and include its complete output.'
|
||||
|
||||
before = before.rstrip()
|
||||
if not before or before.endswith(('.', '!', '?')):
|
||||
@@ -2840,7 +2845,8 @@ class InAdvancePagedList(PagedList):
|
||||
|
||||
def _getslice(self, start, end):
|
||||
start_page = start // self._pagesize
|
||||
end_page = self._pagecount if end is None else min(self._pagecount, end // self._pagesize + 1)
|
||||
end_page = (
|
||||
self._pagecount if end is None else (end // self._pagesize + 1))
|
||||
skip_elems = start - start_page * self._pagesize
|
||||
only_more = None if end is None else end - start
|
||||
for pagenum in range(start_page, end_page):
|
||||
@@ -3425,11 +3431,12 @@ def render_table(header_row, data, delim=False, extra_gap=0, hide_empty=False):
|
||||
return [max(width(str(v)) for v in col) for col in zip(*table)]
|
||||
|
||||
def filter_using_list(row, filterArray):
|
||||
return [col for take, col in itertools.zip_longest(filterArray, row, fillvalue=True) if take]
|
||||
return [col for (take, col) in zip(filterArray, row) if take]
|
||||
|
||||
max_lens = get_max_lens(data) if hide_empty else []
|
||||
header_row = filter_using_list(header_row, max_lens)
|
||||
data = [filter_using_list(row, max_lens) for row in data]
|
||||
if hide_empty:
|
||||
max_lens = get_max_lens(data)
|
||||
header_row = filter_using_list(header_row, max_lens)
|
||||
data = [filter_using_list(row, max_lens) for row in data]
|
||||
|
||||
table = [header_row] + data
|
||||
max_lens = get_max_lens(table)
|
||||
@@ -5217,10 +5224,8 @@ class Config:
|
||||
|
||||
def init(self, args=None, filename=None):
|
||||
assert not self.__initialized
|
||||
directory = ''
|
||||
if filename:
|
||||
location = os.path.realpath(filename)
|
||||
directory = os.path.dirname(location)
|
||||
if location in self._loaded_paths:
|
||||
return False
|
||||
self._loaded_paths.add(location)
|
||||
@@ -5228,7 +5233,7 @@ class Config:
|
||||
self.__initialized = True
|
||||
self.own_args, self.filename = args, filename
|
||||
for location in self._parser.parse_args(args)[0].config_locations or []:
|
||||
location = os.path.join(directory, expand_path(location))
|
||||
location = compat_expanduser(location)
|
||||
if os.path.isdir(location):
|
||||
location = os.path.join(location, 'yt-dlp.conf')
|
||||
if not os.path.exists(location):
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
# Autogenerated by devscripts/update-version.py
|
||||
|
||||
__version__ = '2022.02.04'
|
||||
__version__ = '2022.01.21'
|
||||
|
||||
RELEASE_GIT_HEAD = 'c1653e9ef'
|
||||
RELEASE_GIT_HEAD = 'f20d607b0'
|
||||
|
||||
Reference in New Issue
Block a user