mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-12-18 03:42:23 +01:00
Compare commits
44 Commits
2021.03.24
...
2021.04.11
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a0f30f194a | ||
|
|
b31fdeedfd | ||
|
|
8fa43c73d8 | ||
|
|
56d868dbb7 | ||
|
|
f4f751af40 | ||
|
|
1988fab7e3 | ||
|
|
9de3ea3126 | ||
|
|
e01d6aa435 | ||
|
|
f7ad71607d | ||
|
|
68379de561 | ||
|
|
d9aa233295 | ||
|
|
f37468c41f | ||
|
|
52a8a1e1b9 | ||
|
|
d818eb7473 | ||
|
|
f8d4ad9ab0 | ||
|
|
3ffc7c89b0 | ||
|
|
f1823403b0 | ||
|
|
384fb069ec | ||
|
|
a4ddaf231e | ||
|
|
7e60c06925 | ||
|
|
d92f5d5a90 | ||
|
|
9e62f283ff | ||
|
|
c24ce07a84 | ||
|
|
de6758128e | ||
|
|
73d4343e39 | ||
|
|
57d104424f | ||
|
|
02aabd45d0 | ||
|
|
39ed931e53 | ||
|
|
b28f8d244a | ||
|
|
73cd218f5a | ||
|
|
84601bb72b | ||
|
|
54df8fc5b2 | ||
|
|
5d39972ed0 | ||
|
|
0481374e1d | ||
|
|
eff635394a | ||
|
|
df0c81513e | ||
|
|
3f6a90eb63 | ||
|
|
b050d210df | ||
|
|
f4e4be19f0 | ||
|
|
cce889b900 | ||
|
|
a6ae61a4c2 | ||
|
|
b23b9eefd9 | ||
|
|
a2f0b0c672 | ||
|
|
b704fc1a68 |
6
.github/ISSUE_TEMPLATE/1_broken_site.md
vendored
6
.github/ISSUE_TEMPLATE/1_broken_site.md
vendored
@@ -21,7 +21,7 @@ assignees: ''
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp:
|
||||
- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is 2021.03.21. If it's not, see https://github.com/yt-dlp/yt-dlp on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is 2021.04.03. If it's not, see https://github.com/yt-dlp/yt-dlp on how to update. Issues with outdated version will be REJECTED.
|
||||
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
||||
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in https://github.com/yt-dlp/yt-dlp.
|
||||
- Search the bugtracker for similar issues: https://github.com/yt-dlp/yt-dlp. DO NOT post duplicates.
|
||||
@@ -29,7 +29,7 @@ Carefully read and work through this check list in order to prevent the most com
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a broken site support
|
||||
- [ ] I've verified that I'm running yt-dlp version **2021.03.21**
|
||||
- [ ] I've verified that I'm running yt-dlp version **2021.04.03**
|
||||
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
||||
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
|
||||
- [ ] I've searched the bugtracker for similar issues including closed ones
|
||||
@@ -44,7 +44,7 @@ Add the `-v` flag to your command line you run yt-dlp with (`yt-dlp -v <your com
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] yt-dlp version 2021.03.21
|
||||
[debug] yt-dlp version 2021.04.03
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
||||
@@ -21,7 +21,7 @@ assignees: ''
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp:
|
||||
- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is 2021.03.21. If it's not, see https://github.com/yt-dlp/yt-dlp on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is 2021.04.03. If it's not, see https://github.com/yt-dlp/yt-dlp on how to update. Issues with outdated version will be REJECTED.
|
||||
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
||||
- Make sure that site you are requesting is not dedicated to copyright infringement, see https://github.com/yt-dlp/yt-dlp. yt-dlp does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights.
|
||||
- Search the bugtracker for similar site support requests: https://github.com/yt-dlp/yt-dlp. DO NOT post duplicates.
|
||||
@@ -29,7 +29,7 @@ Carefully read and work through this check list in order to prevent the most com
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a new site support request
|
||||
- [ ] I've verified that I'm running yt-dlp version **2021.03.21**
|
||||
- [ ] I've verified that I'm running yt-dlp version **2021.04.03**
|
||||
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
||||
- [ ] I've checked that none of provided URLs violate any copyrights
|
||||
- [ ] I've searched the bugtracker for similar site support requests including closed ones
|
||||
|
||||
@@ -21,13 +21,13 @@ assignees: ''
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp:
|
||||
- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is 2021.03.21. If it's not, see https://github.com/yt-dlp/yt-dlp on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is 2021.04.03. If it's not, see https://github.com/yt-dlp/yt-dlp on how to update. Issues with outdated version will be REJECTED.
|
||||
- Search the bugtracker for similar site feature requests: https://github.com/yt-dlp/yt-dlp. DO NOT post duplicates.
|
||||
- Finally, put x into all relevant boxes like this [x] (Dont forget to delete the empty space)
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a site feature request
|
||||
- [ ] I've verified that I'm running yt-dlp version **2021.03.21**
|
||||
- [ ] I've verified that I'm running yt-dlp version **2021.04.03**
|
||||
- [ ] I've searched the bugtracker for similar site feature requests including closed ones
|
||||
|
||||
|
||||
|
||||
6
.github/ISSUE_TEMPLATE/4_bug_report.md
vendored
6
.github/ISSUE_TEMPLATE/4_bug_report.md
vendored
@@ -21,7 +21,7 @@ assignees: ''
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp:
|
||||
- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is 2021.03.21. If it's not, see https://github.com/yt-dlp/yt-dlp on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is 2021.04.03. If it's not, see https://github.com/yt-dlp/yt-dlp on how to update. Issues with outdated version will be REJECTED.
|
||||
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
||||
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in https://github.com/yt-dlp/yt-dlp.
|
||||
- Search the bugtracker for similar issues: https://github.com/yt-dlp/yt-dlp. DO NOT post duplicates.
|
||||
@@ -30,7 +30,7 @@ Carefully read and work through this check list in order to prevent the most com
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a broken site support issue
|
||||
- [ ] I've verified that I'm running yt-dlp version **2021.03.21**
|
||||
- [ ] I've verified that I'm running yt-dlp version **2021.04.03**
|
||||
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
||||
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
|
||||
- [ ] I've searched the bugtracker for similar bug reports including closed ones
|
||||
@@ -46,7 +46,7 @@ Add the `-v` flag to your command line you run yt-dlp with (`yt-dlp -v <your com
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] yt-dlp version 2021.03.21
|
||||
[debug] yt-dlp version 2021.04.03
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
||||
4
.github/ISSUE_TEMPLATE/5_feature_request.md
vendored
4
.github/ISSUE_TEMPLATE/5_feature_request.md
vendored
@@ -21,13 +21,13 @@ assignees: ''
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp:
|
||||
- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is 2021.03.21. If it's not, see https://github.com/yt-dlp/yt-dlp on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is 2021.04.03. If it's not, see https://github.com/yt-dlp/yt-dlp on how to update. Issues with outdated version will be REJECTED.
|
||||
- Search the bugtracker for similar feature requests: https://github.com/yt-dlp/yt-dlp. DO NOT post duplicates.
|
||||
- Finally, put x into all relevant boxes like this [x] (Dont forget to delete the empty space)
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a feature request
|
||||
- [ ] I've verified that I'm running yt-dlp version **2021.03.21**
|
||||
- [ ] I've verified that I'm running yt-dlp version **2021.04.03**
|
||||
- [ ] I've searched the bugtracker for similar feature requests including closed ones
|
||||
|
||||
|
||||
|
||||
@@ -35,3 +35,6 @@ damianoamatruda
|
||||
2ShedsJackson
|
||||
CXwudi
|
||||
xtkoba
|
||||
llacb47
|
||||
hheimbuerger
|
||||
B0pol
|
||||
55
Changelog.md
55
Changelog.md
@@ -6,8 +6,10 @@
|
||||
* Run `make doc`
|
||||
* Update Changelog.md and CONTRIBUTORS
|
||||
* Change "Merged with ytdl" version in Readme.md if needed
|
||||
* Add new/fixed extractors in "new features" section of Readme.md
|
||||
* Commit to master as `Release <version>`
|
||||
* Push to origin/release - build task will now run
|
||||
* Push to origin/release using `git push origin master:release`
|
||||
build task will now run
|
||||
* Update version.py using devscripts\update-version.py
|
||||
* Run `make issuetemplates`
|
||||
* Commit to master as `[version] update :ci skip all`
|
||||
@@ -17,13 +19,57 @@
|
||||
-->
|
||||
|
||||
|
||||
### 2021.04.11
|
||||
* Add option `--convert-thumbnails` (only jpg currently supported)
|
||||
* Format selector `mergeall` to download and merge all formats
|
||||
* Pass any field to `--exec` using similar syntax to output template
|
||||
* Choose downloader for each protocol using `--downloader PROTO:NAME`
|
||||
* Alias `--downloader` for `--external-downloader`
|
||||
* Added `native` as an option for the downloader
|
||||
* Merge youtube-dl: Upto [commit/4fb25ff](https://github.com/ytdl-org/youtube-dl/commit/4fb25ff5a3be5206bb72e5c4046715b1529fb2c7) (except vimeo)
|
||||
* [DiscoveryPlusIndia] Add DiscoveryPlusIndiaShowIE by [Ashish0804](https://github.com/Ashish0804)
|
||||
* [NFHSNetwork] Add extractor by [llacb47](https://github.com/llacb47)
|
||||
* [nebula] Add extractor (watchnebula.com) by [hheimbuerger](https://github.com/hheimbuerger)
|
||||
* [nitter] Fix extraction of reply tweets and update instance list by [B0pol](https://github.com/B0pol)
|
||||
* [nitter] Fix thumbnails by [B0pol](https://github.com/B0pol)
|
||||
* [youtube] Fix thumbnail URL
|
||||
* [youtube] Parse API parameters from initial webpage by [colethedj](https://github.com/colethedj)
|
||||
* [youtube] Extract comments' approximate timestamp by [colethedj](https://github.com/colethedj)
|
||||
* [youtube] Fix `\_extract_alerts`
|
||||
* [bilibili] Fix uploader
|
||||
* [utils] Add `datetime_from_str` and `datetime_add_months` by [colethedj](https://github.com/colethedj)
|
||||
* Run some `postprocessors` before actual download
|
||||
* Improve argument parsing for `-P`, `-o`, `-S`
|
||||
* Fix some `m3u8` not obeying `--allow-unplayable-formats`
|
||||
* Fix default of `dynamic_mpd`
|
||||
* Deprecate `--all-formats`, `--include-ads`, `--hls-prefer-native`, `--hls-prefer-ffmpeg`
|
||||
* [documentation] Improvements
|
||||
|
||||
### 2021.04.03
|
||||
* Merge youtube-dl: Upto [commit/654b4f4](https://github.com/ytdl-org/youtube-dl/commit/654b4f4ff2718f38b3182c1188c5d569c14cc70a)
|
||||
* Ability to set a specific field in the file's metadata using `--parse-metadata`
|
||||
* Ability to select n'th best format like `-f bv*.2`
|
||||
* [DiscoveryPlus] Add discoveryplus.in
|
||||
* [la7] Add podcasts and podcast playlists by [nixxo](https://github.com/nixxo)
|
||||
* [mildom] Update extractor with current proxy by [nao20010128nao](https://github.com/nao20010128nao)
|
||||
* [ard:mediathek] Fix video id extraction
|
||||
* [generic] Detect Invidious' link element
|
||||
* [youtube] Show premium state in `availability` by [colethedj](https://github.com/colethedj)
|
||||
* [viewsource] Add extractor to handle `view-source:`
|
||||
* [sponskrub] Run before embedding thumbnail
|
||||
* [documentation] Improve `--parse-metadata` documentation
|
||||
|
||||
|
||||
### 2021.03.24.1
|
||||
* Revert [commit/8562218](https://github.com/ytdl-org/youtube-dl/commit/8562218350a79d4709da8593bb0c538aa0824acf)
|
||||
|
||||
### 2021.03.24
|
||||
* Merge youtube-dl: Upto [commit/8562218](https://github.com/ytdl-org/youtube-dl/commit/8562218350a79d4709da8593bb0c538aa0824acf)
|
||||
* Merge youtube-dl: Upto 2021.03.25 ([commit/8562218](https://github.com/ytdl-org/youtube-dl/commit/8562218350a79d4709da8593bb0c538aa0824acf))
|
||||
* Parse metadata from multiple fields using `--parse-metadata`
|
||||
* Ability to load playlist infojson using `--load-info-json`
|
||||
* Write current epoch to infojson when using `--no-clean-infojson`
|
||||
* [youtube_live_chat] fix bug when trying to set cookies
|
||||
* [niconico] Fix for when logged in by: @CXwudi and @xtkoba
|
||||
* [niconico] Fix for when logged in by [CXwudi](https://github.com/CXwudi) and [xtkoba](https://github.com/xtkoba)
|
||||
* [linuxacadamy] Fix login
|
||||
|
||||
|
||||
@@ -98,7 +144,6 @@
|
||||
### 2021.03.03.2
|
||||
* [build] Fix bug
|
||||
|
||||
|
||||
### 2021.03.03
|
||||
* [youtube] Use new browse API for continuation page extraction by [colethedj](https://github.com/colethedj) and [pukkandan](https://github.com/pukkandan)
|
||||
* Fix HLS playlist downloading by [shirt](https://github.com/shirt-dev)
|
||||
@@ -353,7 +398,7 @@
|
||||
|
||||
### 2021.01.08
|
||||
* Merge youtube-dl: Upto [2021.01.08](https://github.com/ytdl-org/youtube-dl/releases/tag/2021.01.08) except stitcher ([1](https://github.com/ytdl-org/youtube-dl/commit/bb38a1215718cdf36d73ff0a7830a64cd9fa37cc), [2](https://github.com/ytdl-org/youtube-dl/commit/a563c97c5cddf55f8989ed7ea8314ef78e30107f))
|
||||
* Moved changelog to seperate file
|
||||
* Moved changelog to separate file
|
||||
|
||||
|
||||
### 2021.01.07-1
|
||||
|
||||
244
README.md
244
README.md
@@ -1,39 +1,44 @@
|
||||
<div align="center">
|
||||
|
||||
# YT-DLP
|
||||
A command-line program to download videos from YouTube and many other [video platforms](supportedsites.md)
|
||||
|
||||
[](https://github.com/yt-dlp/yt-dlp/releases/latest)
|
||||
[](LICENSE)
|
||||
<!-- GHA doesnot have for-the-badge style
|
||||
[](https://github.com/yt-dlp/yt-dlp/actions)
|
||||
[](https://discord.gg/H5MNcFW63r)
|
||||
-->
|
||||
[](https://github.com/yt-dlp/yt-dlp/releases/latest)
|
||||
[](LICENSE)
|
||||
[](https://yt-dlp.readthedocs.io)
|
||||
[](https://discord.gg/H5MNcFW63r)
|
||||
|
||||
[](https://github.com/yt-dlp/yt-dlp/commits)
|
||||
[](https://github.com/yt-dlp/yt-dlp/commits)
|
||||
[](https://github.com/yt-dlp/yt-dlp/releases/latest)
|
||||
[](https://pypi.org/project/yt-dlp)
|
||||
[](https://yt-dlp.readthedocs.io)
|
||||
[](https://github.com/yt-dlp/yt-dlp/commits)
|
||||
[](https://github.com/yt-dlp/yt-dlp/commits)
|
||||
[](https://github.com/yt-dlp/yt-dlp/releases/latest)
|
||||
[](https://pypi.org/project/yt-dlp)
|
||||
|
||||
A command-line program to download videos from youtube.com and many other [video platforms](supportedsites.md)
|
||||
</div>
|
||||
|
||||
This is a [youtube-dl](https://github.com/ytdl-org/youtube-dl) fork based on the now inactive [youtube-dlc](https://github.com/blackjack4494/yt-dlc). The main focus of this project is adding new features and patches while also keeping up to date with the original project
|
||||
yt-dlp is a [youtube-dl](https://github.com/ytdl-org/youtube-dl) fork based on the now inactive [youtube-dlc](https://github.com/blackjack4494/yt-dlc). The main focus of this project is adding new features and patches while also keeping up to date with the original project
|
||||
|
||||
* [NEW FEATURES](#new-features)
|
||||
* [INSTALLATION](#installation)
|
||||
* [Dependancies](#dependancies)
|
||||
* [Update](#update)
|
||||
* [Compile](#compile)
|
||||
* [DESCRIPTION](#description)
|
||||
* [OPTIONS](#options)
|
||||
* [USAGE AND OPTIONS](#usage-and-options)
|
||||
* [General Options](#general-options)
|
||||
* [Network Options](#network-options)
|
||||
* [Geo Restriction](#geo-restriction)
|
||||
* [Geo-restriction](#geo-restriction)
|
||||
* [Video Selection](#video-selection)
|
||||
* [Download Options](#download-options)
|
||||
* [Filesystem Options](#filesystem-options)
|
||||
* [Thumbnail images](#thumbnail-images)
|
||||
* [Thumbnail Options](#thumbnail-options)
|
||||
* [Internet Shortcut Options](#internet-shortcut-options)
|
||||
* [Verbosity and Simulation Options](#verbosity-and-simulation-options)
|
||||
* [Workarounds](#workarounds)
|
||||
* [Video Format Options](#video-format-options)
|
||||
* [Subtitle Options](#subtitle-options)
|
||||
* [Authentication Options](#authentication-options)
|
||||
* [Adobe Pass Options](#adobe-pass-options)
|
||||
* [Post-processing Options](#post-processing-options)
|
||||
* [SponSkrub (SponsorBlock) Options](#sponskrub-sponsorblock-options)
|
||||
* [Extractor Options](#extractor-options)
|
||||
@@ -46,9 +51,12 @@ This is a [youtube-dl](https://github.com/ytdl-org/youtube-dl) fork based on the
|
||||
* [Filtering Formats](#filtering-formats)
|
||||
* [Sorting Formats](#sorting-formats)
|
||||
* [Format Selection examples](#format-selection-examples)
|
||||
* [MODIFYING METADATA](#modifying-metadata)
|
||||
* [Modifying metadata examples](#modifying-metadata-examples)
|
||||
* [PLUGINS](#plugins)
|
||||
* [DEPRECATED OPTIONS](#deprecated-options)
|
||||
* [MORE](#more)
|
||||
</div>
|
||||
|
||||
|
||||
# NEW FEATURES
|
||||
@@ -58,7 +66,7 @@ The major new features from the latest release of [blackjack4494/yt-dlc](https:/
|
||||
|
||||
* **[Format Sorting](#sorting-formats)**: The default format sorting options have been changed so that higher resolution and better codecs will be now preferred instead of simply using larger bitrate. Furthermore, you can now specify the sort order using `-S`. This allows for much easier format selection that what is possible by simply using `--format` ([examples](#format-selection-examples))
|
||||
|
||||
* **Merged with youtube-dl v2021.03.25**: You get all the latest features and patches of [youtube-dl](https://github.com/ytdl-org/youtube-dl) in addition to all the features of [youtube-dlc](https://github.com/blackjack4494/yt-dlc)
|
||||
* **Merged with youtube-dl v2021.04.07**: You get all the latest features and patches of [youtube-dl](https://github.com/ytdl-org/youtube-dl) in addition to all the features of [youtube-dlc](https://github.com/blackjack4494/yt-dlc)
|
||||
|
||||
* **Merged with animelover1984/youtube-dl**: You get most of the features and improvements from [animelover1984/youtube-dl](https://github.com/animelover1984/youtube-dl) including `--get-comments`, `BiliBiliSearch`, `BilibiliChannel`, Embedding thumbnail in mp4/ogg/opus, Playlist infojson etc. Note that the NicoNico improvements are not available. See [#31](https://github.com/yt-dlp/yt-dlp/pull/31) for details.
|
||||
|
||||
@@ -69,15 +77,13 @@ The major new features from the latest release of [blackjack4494/yt-dlc](https:/
|
||||
|
||||
* **Split video by chapters**: Videos can be split into multiple files based on chapters using `--split-chapters`
|
||||
|
||||
* **Multithreaded fragment downloads**: Fragment downloads can be natively multi-threaded. Use `--concurrent-fragments` (`-N`) option to set the number of threads used
|
||||
* **Multi-threaded fragment downloads**: Download multiple fragments of m3u8/mpd videos in parallel. Use `--concurrent-fragments` (`-N`) option to set the number of threads used
|
||||
|
||||
* **Aria2c with HLS/DASH**: You can use aria2c as the external downloader for DASH(mpd) and HLS(m3u8) formats. No more slow ffmpeg/native downloads
|
||||
* **Aria2c with HLS/DASH**: You can use `aria2c` as the external downloader for DASH(mpd) and HLS(m3u8) formats
|
||||
|
||||
* **New extractors**: AnimeLab, Philo MSO, Rcs, Gedi, bitwave.tv, mildom, audius, zee5, mtv.it, wimtv, pluto.tv
|
||||
* **New extractors**: AnimeLab, Philo MSO, Rcs, Gedi, bitwave.tv, mildom, audius, zee5, mtv.it, wimtv, pluto.tv, niconico users, discoveryplus.in, mediathek, NFHSNetwork, nebula
|
||||
|
||||
* **Fixed extractors**: archive.org, roosterteeth.com, skyit, instagram, itv, SouthparkDe, spreaker, Vlive, tiktok, akamai, ina, rumble, tennistv, amcnetworks
|
||||
|
||||
* **Plugin extractors**: Extractors can be loaded from an external file. See [plugins](#plugins) for details
|
||||
* **Fixed extractors**: archive.org, roosterteeth.com, skyit, instagram, itv, SouthparkDe, spreaker, Vlive, akamai, ina, rumble, tennistv, amcnetworks, la7 podcasts, linuxacadamy, nitter
|
||||
|
||||
* **Multiple paths and output templates**: You can give different [output templates](#output-template) and download paths for different types of files. You can also set a temporary path where intermediary files are downloaded to using `--paths` (`-P`)
|
||||
|
||||
@@ -87,6 +93,8 @@ The major new features from the latest release of [blackjack4494/yt-dlc](https:/
|
||||
|
||||
* **Improvements**: Multiple `--postprocessor-args` and `--external-downloader-args`, Date/time formatting in `-o`, faster archive checking, more [format selection options](#format-selection) etc
|
||||
|
||||
* **Plugin extractors**: Extractors can be loaded from an external file. See [plugins](#plugins) for details
|
||||
|
||||
* **Self-updater**: The releases can be updated using `yt-dlp -U`
|
||||
|
||||
|
||||
@@ -99,6 +107,7 @@ If you are coming from [youtube-dl](https://github.com/ytdl-org/youtube-dl), the
|
||||
|
||||
|
||||
# INSTALLATION
|
||||
yt-dlp is not platform specific. So it should work on your Unix box, on Windows or on macOS
|
||||
|
||||
You can install yt-dlp using one of the following methods:
|
||||
* Download the binary from the [latest release](https://github.com/yt-dlp/yt-dlp/releases/latest) (recommended method)
|
||||
@@ -123,8 +132,14 @@ sudo aria2c https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp -o
|
||||
sudo chmod a+rx /usr/local/bin/yt-dlp
|
||||
```
|
||||
|
||||
### DEPENDANCIES
|
||||
|
||||
Python versions 2.6, 2.7, or 3.2+ are currently supported. However, 3.2+ is strongly recommended and python2 support will be deprecated in the future.
|
||||
|
||||
Although there are no required dependancies, `ffmpeg` and `ffprobe` are highly recommended. Other optional dependancies are `sponskrub`, `AtomicParsley`, `mutagen`, `pycryptodome` and any of the supported external downloaders. Note that the windows releases are already built with the python interpreter, mutagen and pycryptodome included.
|
||||
|
||||
### UPDATE
|
||||
Starting from version `2021.02.09`, you can use `yt-dlp -U` to update if you are using the provided release.
|
||||
You can use `yt-dlp -U` to update if you are using the provided release.
|
||||
If you are using `pip`, simply re-run the same command that was used to install the program.
|
||||
|
||||
### COMPILE
|
||||
@@ -146,13 +161,10 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t
|
||||
|
||||
**Note**: In either platform, `devscripts\update-version.py` can be used to automatically update the version number
|
||||
|
||||
# DESCRIPTION
|
||||
**yt-dlp** is a command-line program to download videos from youtube.com many other [video platforms](supportedsites.md). It requires the Python interpreter, version 2.6, 2.7, or 3.2+, and it is not platform specific. It should work on your Unix box, on Windows or on macOS. It is released to the public domain, which means you can modify it, redistribute it or use it however you like.
|
||||
# USAGE AND OPTIONS
|
||||
|
||||
yt-dlp [OPTIONS] [--] URL [URL...]
|
||||
|
||||
|
||||
# OPTIONS
|
||||
`Ctrl+F` is your friend :D
|
||||
<!-- Autogenerated -->
|
||||
|
||||
@@ -211,7 +223,7 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t
|
||||
-4, --force-ipv4 Make all connections via IPv4
|
||||
-6, --force-ipv6 Make all connections via IPv6
|
||||
|
||||
## Geo Restriction:
|
||||
## Geo-restriction:
|
||||
--geo-verification-proxy URL Use this proxy to verify the IP address for
|
||||
some geo-restricted sites. The default
|
||||
proxy specified by --proxy (or none, if the
|
||||
@@ -295,13 +307,11 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t
|
||||
--break-on-reject Stop the download process when encountering
|
||||
a file that has been filtered out
|
||||
--no-download-archive Do not use archive file (default)
|
||||
--include-ads Download advertisements as well
|
||||
(experimental)
|
||||
--no-include-ads Do not download advertisements (default)
|
||||
|
||||
## Download Options:
|
||||
-N, --concurrent-fragments N Number of fragments to download
|
||||
concurrently (default is 1)
|
||||
-N, --concurrent-fragments N Number of fragments of a dash/hlsnative
|
||||
video that should be download concurrently
|
||||
(default is 1)
|
||||
-r, --limit-rate RATE Maximum download rate in bytes per second
|
||||
(e.g. 50K or 4.2M)
|
||||
-R, --retries RETRIES Number of retries (default is 10), or
|
||||
@@ -335,10 +345,6 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t
|
||||
--playlist-random Download playlist videos in random order
|
||||
--xattr-set-filesize Set file xattribute ytdl.filesize with
|
||||
expected file size
|
||||
--hls-prefer-native Use the native HLS downloader instead of
|
||||
ffmpeg
|
||||
--hls-prefer-ffmpeg Use ffmpeg instead of the native HLS
|
||||
downloader
|
||||
--hls-use-mpegts Use the mpegts container for HLS videos;
|
||||
allowing some players to play the video
|
||||
while downloading, and reducing the chance
|
||||
@@ -348,10 +354,19 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t
|
||||
--no-hls-use-mpegts Do not use the mpegts container for HLS
|
||||
videos. This is default when not
|
||||
downloading live streams
|
||||
--external-downloader NAME Name or path of the external downloader to
|
||||
use. Currently supports aria2c, avconv,
|
||||
axel, curl, ffmpeg, httpie, wget
|
||||
(Recommended: aria2c)
|
||||
--downloader [PROTO:]NAME Name or path of the external downloader to
|
||||
use (optionally) prefixed by the protocols
|
||||
(http, ftp, m3u8, dash, rstp, rtmp, mms) to
|
||||
use it for. Currently supports native,
|
||||
aria2c, avconv, axel, curl, ffmpeg, httpie,
|
||||
wget (Recommended: aria2c). You can use
|
||||
this option multiple times to set different
|
||||
downloaders for different protocols. For
|
||||
example, --downloader aria2c --downloader
|
||||
"dash,m3u8:native" will use aria2c for
|
||||
http/ftp downloads, and the native
|
||||
downloader for dash/m3u8 downloads
|
||||
(Alias: --external-downloader)
|
||||
--downloader-args NAME:ARGS Give these arguments to the external
|
||||
downloader. Specify the downloader name and
|
||||
the arguments separated by a colon ":". You
|
||||
@@ -363,7 +378,7 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t
|
||||
stdin), one URL per line. Lines starting
|
||||
with '#', ';' or ']' are considered as
|
||||
comments and ignored
|
||||
-P, --paths TYPE:PATH The paths where the files should be
|
||||
-P, --paths TYPES:PATH The paths where the files should be
|
||||
downloaded. Specify the type of file and
|
||||
the path separated by a colon ":". All the
|
||||
same types as --output are supported.
|
||||
@@ -374,7 +389,7 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t
|
||||
home path after download is finished. This
|
||||
option is ignored if --output is an
|
||||
absolute path
|
||||
-o, --output [TYPE:]TEMPLATE Output filename template, see "OUTPUT
|
||||
-o, --output [TYPES:]TEMPLATE Output filename template; see "OUTPUT
|
||||
TEMPLATE" for details
|
||||
--output-na-placeholder TEXT Placeholder value for unavailable meta
|
||||
fields in output filename template
|
||||
@@ -450,7 +465,7 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t
|
||||
--no-cache-dir Disable filesystem caching
|
||||
--rm-cache-dir Delete all filesystem cache files
|
||||
|
||||
## Thumbnail Images:
|
||||
## Thumbnail Options:
|
||||
--write-thumbnail Write thumbnail image to disk
|
||||
--no-write-thumbnail Do not write thumbnail image to disk
|
||||
(default)
|
||||
@@ -551,7 +566,6 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t
|
||||
into a single file
|
||||
--no-audio-multistreams Only one audio stream is downloaded for
|
||||
each output file (default)
|
||||
--all-formats Download all available video formats
|
||||
--prefer-free-formats Prefer video formats with free containers
|
||||
over non-free ones of same quality. Use
|
||||
with "-S ext" to strictly prefer free
|
||||
@@ -598,8 +612,6 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t
|
||||
-2, --twofactor TWOFACTOR Two-factor authentication code
|
||||
-n, --netrc Use .netrc authentication data
|
||||
--video-password PASSWORD Video password (vimeo, youku)
|
||||
|
||||
## Adobe Pass Options:
|
||||
--ap-mso MSO Adobe Pass multiple-system operator (TV
|
||||
provider) identifier, use --ap-list-mso for
|
||||
a list of available MSOs
|
||||
@@ -636,24 +648,24 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t
|
||||
Specify the postprocessor/executable name
|
||||
and the arguments separated by a colon ":"
|
||||
to give the argument to the specified
|
||||
postprocessor/executable. Supported
|
||||
postprocessors are: SponSkrub,
|
||||
ExtractAudio, VideoRemuxer, VideoConvertor,
|
||||
EmbedSubtitle, Metadata, Merger,
|
||||
FixupStretched, FixupM4a, FixupM3u8,
|
||||
SubtitlesConvertor, EmbedThumbnail and
|
||||
SplitChapters. The supported executables
|
||||
are: SponSkrub, FFmpeg, FFprobe, and
|
||||
AtomicParsley. You can also specify
|
||||
"PP+EXE:ARGS" to give the arguments to the
|
||||
specified executable only when being used
|
||||
by the specified postprocessor.
|
||||
Additionally, for ffmpeg/ffprobe, "_i"/"_o"
|
||||
can be appended to the prefix optionally
|
||||
followed by a number to pass the argument
|
||||
before the specified input/output file. Eg:
|
||||
--ppa "Merger+ffmpeg_i1:-v quiet". You can
|
||||
use this option multiple times to give
|
||||
postprocessor/executable. Supported PP are:
|
||||
Merger, ExtractAudio, SplitChapters,
|
||||
Metadata, EmbedSubtitle, EmbedThumbnail,
|
||||
SubtitlesConvertor, ThumbnailsConvertor,
|
||||
VideoRemuxer, VideoConvertor, SponSkrub,
|
||||
FixupStretched, FixupM4a and FixupM3u8. The
|
||||
supported executables are: AtomicParsley,
|
||||
FFmpeg, FFprobe, and SponSkrub. You can
|
||||
also specify "PP+EXE:ARGS" to give the
|
||||
arguments to the specified executable only
|
||||
when being used by the specified
|
||||
postprocessor. Additionally, for
|
||||
ffmpeg/ffprobe, "_i"/"_o" can be appended
|
||||
to the prefix optionally followed by a
|
||||
number to pass the argument before the
|
||||
specified input/output file. Eg: --ppa
|
||||
"Merger+ffmpeg_i1:-v quiet". You can use
|
||||
this option multiple times to give
|
||||
different arguments to different
|
||||
postprocessors. (Alias: --ppa)
|
||||
-k, --keep-video Keep the intermediate video file on disk
|
||||
@@ -669,26 +681,9 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t
|
||||
--no-embed-thumbnail Do not embed thumbnail (default)
|
||||
--add-metadata Write metadata to the video file
|
||||
--no-add-metadata Do not write metadata (default)
|
||||
--parse-metadata FIELD:FORMAT Parse additional metadata like title/artist
|
||||
from other fields. Give a template or field
|
||||
name to extract data from and the format to
|
||||
interpret it as, seperated by a ":". Either
|
||||
regular expression with named capture
|
||||
groups or a similar syntax to the output
|
||||
template can be used for the FORMAT.
|
||||
Similarly, the syntax for output template
|
||||
can be used for FIELD to parse the data
|
||||
from multiple fields. The parsed parameters
|
||||
replace any existing values and can be used
|
||||
in output templates. This option can be
|
||||
used multiple times. Example: --parse-
|
||||
metadata "title:%(artist)s - %(title)s"
|
||||
matches a title like "Coldplay - Paradise".
|
||||
Example: --parse-metadata "%(series)s
|
||||
%(episode_number)s:%(title)s" sets the
|
||||
title using series and episode number.
|
||||
Example (regex): --parse-metadata
|
||||
"description:Artist - (?P<artist>.+?)"
|
||||
--parse-metadata FROM:TO Parse additional metadata like title/artist
|
||||
from other fields; see "MODIFYING METADATA"
|
||||
for details
|
||||
--xattrs Write metadata to the video file's xattrs
|
||||
(using dublin core and xdg standards)
|
||||
--fixup POLICY Automatically correct known faults of the
|
||||
@@ -700,12 +695,19 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t
|
||||
path to the binary or its containing
|
||||
directory
|
||||
--exec CMD Execute a command on the file after
|
||||
downloading and post-processing, similar to
|
||||
find's -exec syntax. Example: --exec 'adb
|
||||
push {} /sdcard/Music/ && rm {}'
|
||||
downloading and post-processing. Similar
|
||||
syntax to the output template can be used
|
||||
to pass any field as arguments to the
|
||||
command. An additional field "filepath"
|
||||
that contains the final path of the
|
||||
downloaded file is also available. If no
|
||||
fields are passed, "%(filepath)s" is
|
||||
appended to the end of the command
|
||||
--convert-subs FORMAT Convert the subtitles to another format
|
||||
(currently supported: srt|ass|vtt|lrc)
|
||||
(Alias: --convert-subtitles)
|
||||
--convert-thumbnails FORMAT Convert the thumbnails to another format
|
||||
(currently supported: jpg)
|
||||
--split-chapters Split video into multiple files based on
|
||||
internal chapters. The "chapter:" prefix
|
||||
can be used with "--paths" and "--output"
|
||||
@@ -828,9 +830,9 @@ The `-o` option is used to indicate a template for the output file names while `
|
||||
|
||||
**tl;dr:** [navigate me to examples](#output-template-examples).
|
||||
|
||||
The basic usage of `-o` is not to set any template arguments when downloading a single file, like in `yt-dlp -o funny_video.flv "https://some/video"` (hard-coding file extension like this is not recommended). However, it may contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [python string formatting operations](https://docs.python.org/2/library/stdtypes.html#string-formatting). For example, `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by formatting operations. Date/time fields can also be formatted according to [strftime formatting](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes) by specifying it inside the parantheses seperated from the field name using a `>`. For example, `%(duration>%H-%M-%S)s`.
|
||||
The basic usage of `-o` is not to set any template arguments when downloading a single file, like in `yt-dlp -o funny_video.flv "https://some/video"` (hard-coding file extension like this is not recommended). However, it may contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [python string formatting operations](https://docs.python.org/2/library/stdtypes.html#string-formatting). For example, `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by formatting operations. Date/time fields can also be formatted according to [strftime formatting](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes) by specifying it inside the parantheses separated from the field name using a `>`. For example, `%(duration>%H-%M-%S)s`.
|
||||
|
||||
Additionally, you can set different output templates for the various metadata files seperately from the general output template by specifying the type of file followed by the template seperated by a colon ":". The different filetypes supported are `subtitle`, `thumbnail`, `description`, `annotation`, `infojson`, `pl_description`, `pl_infojson`, `chapter`. For example, `-o '%(title)s.%(ext)s' -o 'thumbnail:%(title)s\%(title)s.%(ext)s'` will put the thumbnails in a folder with the same name as the video.
|
||||
Additionally, you can set different output templates for the various metadata files separately from the general output template by specifying the type of file followed by the template separated by a colon `:`. The different filetypes supported are `subtitle`, `thumbnail`, `description`, `annotation`, `infojson`, `pl_description`, `pl_infojson`, `chapter`. For example, `-o '%(title)s.%(ext)s' -o 'thumbnail:%(title)s\%(title)s.%(ext)s'` will put the thumbnails in a folder with the same name as the video.
|
||||
|
||||
The available fields are:
|
||||
|
||||
@@ -923,7 +925,7 @@ Available for the media that is a track or a part of a music album:
|
||||
- `disc_number` (numeric): Number of the disc or other physical medium the track belongs to
|
||||
- `release_year` (numeric): Year (YYYY) when the album was released
|
||||
|
||||
Available when using `--split-chapters` for videos with internal chapters:
|
||||
Available for `chapter:` prefix when using `--split-chapters` for videos with internal chapters:
|
||||
|
||||
- `section_title` (string): Title of the chapter
|
||||
- `section_number` (numeric): Number of the chapter within the file
|
||||
@@ -962,7 +964,7 @@ youtube-dl_test_video_.mp4 # A simple file name
|
||||
# Download YouTube playlist videos in separate directory indexed by video order in a playlist
|
||||
$ yt-dlp -o '%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s' https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re
|
||||
|
||||
# Download YouTube playlist videos in seperate directories according to their uploaded year
|
||||
# Download YouTube playlist videos in separate directories according to their uploaded year
|
||||
$ yt-dlp -o '%(upload_date>%Y)s/%(title)s.%(ext)s' https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re
|
||||
|
||||
# Download all playlists of YouTube channel/user keeping each playlist in separate directory:
|
||||
@@ -983,7 +985,7 @@ $ yt-dlp -o - BaW_jenozKc
|
||||
By default, yt-dlp tries to download the best available quality if you **don't** pass any options.
|
||||
This is generally equivalent to using `-f bestvideo*+bestaudio/best`. However, if multiple audiostreams is enabled (`--audio-multistreams`), the default format changes to `-f bestvideo+bestaudio/best`. Similarly, if ffmpeg is unavailable, or if you use yt-dlp to stream to `stdout` (`-o -`), the default becomes `-f best/bestvideo+bestaudio`.
|
||||
|
||||
The general syntax for format selection is `--f FORMAT` (or `--format FORMAT`) where `FORMAT` is a *selector expression*, i.e. an expression that describes format or formats you would like to download.
|
||||
The general syntax for format selection is `-f FORMAT` (or `--format FORMAT`) where `FORMAT` is a *selector expression*, i.e. an expression that describes format or formats you would like to download.
|
||||
|
||||
**tl;dr:** [navigate me to examples](#format-selection-examples).
|
||||
|
||||
@@ -994,8 +996,9 @@ You can also use a file extension (currently `3gp`, `aac`, `flv`, `m4a`, `mp3`,
|
||||
You can also use special names to select particular edge case formats:
|
||||
|
||||
- `all`: Select all formats
|
||||
- `b*`, `best*`: Select the best quality format irrespective of whether it contains video or audio.
|
||||
- `w*`, `worst*`: Select the worst quality format irrespective of whether it contains video or audio.
|
||||
- `mergeall`: Select and merge all formats (Must be used with `--audio-multistreams`, `--video-multistreams` or both)
|
||||
- `b*`, `best*`: Select the best quality format irrespective of whether it contains video or audio
|
||||
- `w*`, `worst*`: Select the worst quality format irrespective of whether it contains video or audio
|
||||
- `b`, `best`: Select the best quality format that contains both video and audio. Equivalent to `best*[vcodec!=none][acodec!=none]`
|
||||
- `w`, `worst`: Select the worst quality format that contains both video and audio. Equivalent to `worst*[vcodec!=none][acodec!=none]`
|
||||
- `bv`, `bestvideo`: Select the best quality video-only format. Equivalent to `best*[acodec=none]`
|
||||
@@ -1007,7 +1010,9 @@ You can also use special names to select particular edge case formats:
|
||||
- `ba*`, `bestaudio*`: Select the best quality format that contains audio. It may also contain video. Equivalent to `best*[acodec!=none]`
|
||||
- `wa*`, `worstaudio*`: Select the worst quality format that contains audio. It may also contain video. Equivalent to `worst*[acodec!=none]`
|
||||
|
||||
For example, to download the worst quality video-only format you can use `-f worstvideo`. It is however recomended to never actually use `worst` and related options. When your format selector is `worst`, the format which is worst in all respects is selected. Most of the time, what you actually want is the video with the smallest filesize instead. So it is generally better to use `-f best -S +size,+br,+res,+fps` instead of `-f worst`. See [sorting formats](#sorting-formats) for more details.
|
||||
For example, to download the worst quality video-only format you can use `-f worstvideo`. It is however recomended not to use `worst` and related options. When your format selector is `worst`, the format which is worst in all respects is selected. Most of the time, what you actually want is the video with the smallest filesize instead. So it is generally better to use `-f best -S +size,+br,+res,+fps` instead of `-f worst`. See [sorting formats](#sorting-formats) for more details.
|
||||
|
||||
You can select the n'th best format of a type by using `best<type>.<n>`. For example, `best.2` will select the 2nd best combined format. Similarly, `bv*.3` will select the 3rd best format that contains a video stream.
|
||||
|
||||
If you want to download multiple videos and they don't have the same formats available, you can specify the order of preference using slashes. Note that formats on the left hand side are preferred, for example `-f 22/17/18` will download format 22 if it's available, otherwise it will download format 17 if it's available, otherwise it will download format 18 if it's available, otherwise it will complain that no suitable formats are available for download.
|
||||
|
||||
@@ -1078,7 +1083,7 @@ You can change the criteria for being considered the `best` by using `-S` (`--fo
|
||||
- `br`: Equivalent to using `tbr,vbr,abr`
|
||||
- `asr`: Audio sample rate in Hz
|
||||
|
||||
Note that any other **numerical** field made available by the extractor can also be used. All fields, unless specified otherwise, are sorted in decending order. To reverse this, prefix the field with a `+`. Eg: `+res` prefers format with the smallest resolution. Additionally, you can suffix a prefered value for the fields, seperated by a `:`. Eg: `res:720` prefers larger videos, but no larger than 720p and the smallest video if there are no videos less than 720p. For `codec` and `ext`, you can provide two prefered values, the first for video and the second for audio. Eg: `+codec:avc:m4a` (equivalent to `+vcodec:avc,+acodec:m4a`) sets the video codec preference to `h264` > `h265` > `vp9` > `vp9.2` > `av01` > `vp8` > `h263` > `theora` and audio codec preference to `mp4a` > `aac` > `vorbis` > `opus` > `mp3` > `ac3` > `dts`. You can also make the sorting prefer the nearest values to the provided by using `~` as the delimiter. Eg: `filesize~1G` prefers the format with filesize closest to 1 GiB.
|
||||
Note that any other **numerical** field made available by the extractor can also be used. All fields, unless specified otherwise, are sorted in decending order. To reverse this, prefix the field with a `+`. Eg: `+res` prefers format with the smallest resolution. Additionally, you can suffix a prefered value for the fields, separated by a `:`. Eg: `res:720` prefers larger videos, but no larger than 720p and the smallest video if there are no videos less than 720p. For `codec` and `ext`, you can provide two prefered values, the first for video and the second for audio. Eg: `+codec:avc:m4a` (equivalent to `+vcodec:avc,+acodec:m4a`) sets the video codec preference to `h264` > `h265` > `vp9` > `vp9.2` > `av01` > `vp8` > `h263` > `theora` and audio codec preference to `mp4a` > `aac` > `vorbis` > `opus` > `mp3` > `ac3` > `dts`. You can also make the sorting prefer the nearest values to the provided by using `~` as the delimiter. Eg: `filesize~1G` prefers the format with filesize closest to 1 GiB.
|
||||
|
||||
The fields `hasvid`, `ie_pref`, `lang` are always given highest priority in sorting, irrespective of the user-defined order. This behaviour can be changed by using `--force-format-sort`. Apart from these, the default order used is: `quality,res,fps,codec:vp9.2,size,br,asr,proto,ext,hasaud,source,id`. Note that the extractors may override this default order, but they cannot override the user-provided order.
|
||||
|
||||
@@ -1107,10 +1112,17 @@ $ yt-dlp
|
||||
# by default, bestvideo and bestaudio will have the same file name.
|
||||
$ yt-dlp -f 'bv,ba' -o '%(title)s.f%(format_id)s.%(ext)s'
|
||||
|
||||
# Download and merge the best format that has a video stream,
|
||||
# and all audio-only formats into one file
|
||||
$ yt-dlp -f 'bv*+mergeall[vcodec=none]' --audio-multistreams
|
||||
|
||||
# Download and merge the best format that has a video stream,
|
||||
# and the best 2 audio-only formats into one file
|
||||
$ yt-dlp -f 'bv*+ba+ba.2' --audio-multistreams
|
||||
|
||||
|
||||
# The following examples show the old method (without -S) of format selection
|
||||
# and how to use -S to achieve a similar but better result
|
||||
# and how to use -S to achieve a similar but (generally) better result
|
||||
|
||||
# Download the worst video available (old method)
|
||||
$ yt-dlp -f 'wv*+wa/w'
|
||||
@@ -1191,7 +1203,7 @@ $ yt-dlp -S '+codec:h264'
|
||||
$ yt-dlp -f '((bv*[fps>30]/bv*)[height<=720]/(wv*[fps>30]/wv*)) + ba / (b[fps>30]/b)[height<=720]/(w[fps>30]/w)'
|
||||
|
||||
# Download the video with the largest resolution no better than 720p,
|
||||
# or the video with the smallest resolution available if there is no such video,
|
||||
# or the video with the smallest resolution available if there is no such video,
|
||||
# prefering larger framerate for formats with the same resolution
|
||||
$ yt-dlp -S 'res:720,fps'
|
||||
|
||||
@@ -1203,6 +1215,33 @@ $ yt-dlp -S 'res:720,fps'
|
||||
$ yt-dlp -S '+res:480,codec,br'
|
||||
```
|
||||
|
||||
# MODIFYING METADATA
|
||||
|
||||
The metadata obtained the the extractors can be modified by using `--parse-metadata FROM:TO`. The general syntax is to give the name of a field or a template (with similar syntax to [output template](#output-template)) to extract data from, and the format to interpret it as, separated by a colon `:`. Either a [python regular expression](https://docs.python.org/3/library/re.html#regular-expression-syntax) with named capture groups or a similar syntax to the [output template](#output-template) (only `%(field)s` formatting is supported) can be used for `TO`. The option can be used multiple times to parse and modify various fields.
|
||||
|
||||
Note that any field created by this can be used in the [output template](#output-template) and will also affect the media file's metadata added when using `--add-metadata`.
|
||||
|
||||
You can also use this to change only the metadata that is embedded in the media file. To do this, set the value of the corresponding field with a `meta_` prefix. For example, any value you set to `meta_description` field will be added to the `description` field in the file. You can use this to set a different "description" and "synopsis", for example.
|
||||
|
||||
## Modifying metadata examples
|
||||
|
||||
Note that on Windows you may need to use double quotes instead of single.
|
||||
|
||||
```bash
|
||||
# Interpret the title as "Artist - Title"
|
||||
$ yt-dlp --parse-metadata 'title:%(artist)s - %(title)s'
|
||||
|
||||
# Regex example
|
||||
$ yt-dlp --parse-metadata 'description:Artist - (?P<artist>.+)'
|
||||
|
||||
# Set title as "Series name S01E05"
|
||||
$ yt-dlp --parse-metadata '%(series)s S%(season_number)02dE%(episode_number)02d:%(title)s'
|
||||
|
||||
# Set "comment" field in video metadata using description instead of webpage_url
|
||||
$ yt-dlp --parse-metadata 'description:(?s)(?P<meta_comment>.+)' --add-metadata
|
||||
|
||||
```
|
||||
|
||||
# PLUGINS
|
||||
|
||||
Plugins are loaded from `<root-dir>/ytdlp_plugins/<type>/__init__.py`. Currently only `extractor` plugins are supported. Support for `downloader` and `postprocessor` plugins may be added in the future. See [ytdlp_plugins](ytdlp_plugins) for example.
|
||||
@@ -1218,13 +1257,18 @@ These are all the deprecated options and the current alternative to achieve the
|
||||
-A, --auto-number -o "%(autonumber)s-%(id)s.%(ext)s"
|
||||
-t, --title -o "%(title)s-%(id)s.%(ext)s"
|
||||
-l, --literal -o accepts literal names
|
||||
--all-formats -f all
|
||||
--autonumber-size NUMBER Use string formatting. Eg: %(autonumber)03d
|
||||
--metadata-from-title FORMAT --parse-metadata "title:FORMAT"
|
||||
--metadata-from-title FORMAT --parse-metadata "%(title)s:FORMAT"
|
||||
--prefer-avconv avconv is no longer officially supported (Alias: --no-prefer-ffmpeg)
|
||||
--prefer-ffmpeg Default (Alias: --no-prefer-avconv)
|
||||
--hls-prefer-native --downloader "m3u8:native"
|
||||
--hls-prefer-ffmpeg --downloader "m3u8:ffmpeg"
|
||||
--avconv-location avconv is no longer officially supported
|
||||
-C, --call-home Not implemented
|
||||
--no-call-home Default
|
||||
--include-ads Not implemented
|
||||
--no-include-ads Default
|
||||
--write-srt --write-subs
|
||||
--no-write-srt --no-write-subs
|
||||
--srt-lang LANGS --sub-langs LANGS
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
- **20min**
|
||||
- **220.ro**
|
||||
- **23video**
|
||||
- **247sports**
|
||||
- **24video**
|
||||
- **3qsdn**: 3Q SDN
|
||||
- **3sat**
|
||||
@@ -171,7 +172,8 @@
|
||||
- **cbsnews**: CBS News
|
||||
- **cbsnews:embed**
|
||||
- **cbsnews:livevideo**: CBS News Live Videos
|
||||
- **CBSSports**
|
||||
- **cbssports**
|
||||
- **cbssports:embed**
|
||||
- **CCMA**
|
||||
- **CCTV**: 央视网
|
||||
- **CDA**
|
||||
@@ -249,6 +251,8 @@
|
||||
- **DiscoveryGoPlaylist**
|
||||
- **DiscoveryNetworksDe**
|
||||
- **DiscoveryPlus**
|
||||
- **DiscoveryPlusIndia**
|
||||
- **DiscoveryPlusIndiaShow**
|
||||
- **DiscoveryVR**
|
||||
- **Disney**
|
||||
- **dlive:stream**
|
||||
@@ -457,6 +461,8 @@
|
||||
- **kuwo:singer**: 酷我音乐 - 歌手
|
||||
- **kuwo:song**: 酷我音乐
|
||||
- **la7.it**
|
||||
- **la7.it:pod:episode**
|
||||
- **la7.it:podcast**
|
||||
- **laola1tv**
|
||||
- **laola1tv:embed**
|
||||
- **lbry**
|
||||
@@ -480,6 +486,8 @@
|
||||
- **limelight**
|
||||
- **limelight:channel**
|
||||
- **limelight:channel_list**
|
||||
- **LineLive**
|
||||
- **LineLiveChannel**
|
||||
- **LineTV**
|
||||
- **linkedin:learning**
|
||||
- **linkedin:learning:course**
|
||||
@@ -506,6 +514,7 @@
|
||||
- **mangomolo:live**
|
||||
- **mangomolo:video**
|
||||
- **ManyVids**
|
||||
- **MaoriTV**
|
||||
- **Markiza**
|
||||
- **MarkizaPage**
|
||||
- **massengeschmack.tv**
|
||||
@@ -606,6 +615,7 @@
|
||||
- **ndr:embed**
|
||||
- **ndr:embed:base**
|
||||
- **NDTV**
|
||||
- **Nebula**
|
||||
- **NerdCubedFeed**
|
||||
- **netease:album**: 网易云音乐 - 专辑
|
||||
- **netease:djradio**: 网易云音乐 - 电台
|
||||
@@ -624,6 +634,7 @@
|
||||
- **NextTV**: 壹電視
|
||||
- **Nexx**
|
||||
- **NexxEmbed**
|
||||
- **NFHSNetwork**
|
||||
- **nfl.com** (Currently broken)
|
||||
- **nfl.com:article** (Currently broken)
|
||||
- **NhkVod**
|
||||
@@ -707,6 +718,9 @@
|
||||
- **OutsideTV**
|
||||
- **PacktPub**
|
||||
- **PacktPubCourse**
|
||||
- **PalcoMP3:artist**
|
||||
- **PalcoMP3:song**
|
||||
- **PalcoMP3:video**
|
||||
- **pandora.tv**: 판도라TV
|
||||
- **ParamountNetwork**
|
||||
- **parliamentlive.tv**: UK parliament videos
|
||||
@@ -1093,6 +1107,7 @@
|
||||
- **Vidbit**
|
||||
- **Viddler**
|
||||
- **Videa**
|
||||
- **video.arnes.si**: Arnes Video
|
||||
- **video.google:search**: Google Video search
|
||||
- **video.sky.it**
|
||||
- **video.sky.it:live**
|
||||
|
||||
@@ -14,10 +14,10 @@ from yt_dlp.postprocessor import MetadataFromFieldPP, MetadataFromTitlePP
|
||||
class TestMetadataFromField(unittest.TestCase):
|
||||
def test_format_to_regex(self):
|
||||
pp = MetadataFromFieldPP(None, ['title:%(title)s - %(artist)s'])
|
||||
self.assertEqual(pp._data[0]['regex'], r'(?P<title>[^\r\n]+)\ \-\ (?P<artist>[^\r\n]+)')
|
||||
self.assertEqual(pp._data[0]['regex'], r'(?P<title>.+)\ \-\ (?P<artist>.+)')
|
||||
|
||||
|
||||
class TestMetadataFromTitle(unittest.TestCase):
|
||||
def test_format_to_regex(self):
|
||||
pp = MetadataFromTitlePP(None, '%(title)s - %(artist)s')
|
||||
self.assertEqual(pp._titleregex, r'(?P<title>[^\r\n]+)\ \-\ (?P<artist>[^\r\n]+)')
|
||||
self.assertEqual(pp._titleregex, r'(?P<title>.+)\ \-\ (?P<artist>.+)')
|
||||
|
||||
@@ -23,6 +23,7 @@ from yt_dlp.utils import (
|
||||
clean_html,
|
||||
clean_podcast_url,
|
||||
date_from_str,
|
||||
datetime_from_str,
|
||||
DateRange,
|
||||
detect_exe_version,
|
||||
determine_ext,
|
||||
@@ -311,8 +312,18 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(date_from_str('yesterday'), date_from_str('now-1day'))
|
||||
self.assertEqual(date_from_str('now+7day'), date_from_str('now+1week'))
|
||||
self.assertEqual(date_from_str('now+14day'), date_from_str('now+2week'))
|
||||
self.assertEqual(date_from_str('now+365day'), date_from_str('now+1year'))
|
||||
self.assertEqual(date_from_str('now+30day'), date_from_str('now+1month'))
|
||||
self.assertEqual(date_from_str('20200229+365day'), date_from_str('20200229+1year'))
|
||||
self.assertEqual(date_from_str('20210131+28day'), date_from_str('20210131+1month'))
|
||||
|
||||
def test_datetime_from_str(self):
|
||||
self.assertEqual(datetime_from_str('yesterday', precision='day'), datetime_from_str('now-1day', precision='auto'))
|
||||
self.assertEqual(datetime_from_str('now+7day', precision='day'), datetime_from_str('now+1week', precision='auto'))
|
||||
self.assertEqual(datetime_from_str('now+14day', precision='day'), datetime_from_str('now+2week', precision='auto'))
|
||||
self.assertEqual(datetime_from_str('20200229+365day', precision='day'), datetime_from_str('20200229+1year', precision='auto'))
|
||||
self.assertEqual(datetime_from_str('20210131+28day', precision='day'), datetime_from_str('20210131+1month', precision='auto'))
|
||||
self.assertEqual(datetime_from_str('20210131+59day', precision='day'), datetime_from_str('20210131+2month', precision='auto'))
|
||||
self.assertEqual(datetime_from_str('now+1day', precision='hour'), datetime_from_str('now+24hours', precision='auto'))
|
||||
self.assertEqual(datetime_from_str('now+23hours', precision='hour'), datetime_from_str('now+23hours', precision='auto'))
|
||||
|
||||
def test_daterange(self):
|
||||
_20century = DateRange("19000101", "20000101")
|
||||
|
||||
@@ -111,9 +111,17 @@ from .utils import (
|
||||
process_communicate_or_kill,
|
||||
)
|
||||
from .cache import Cache
|
||||
from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER, _PLUGIN_CLASSES
|
||||
from .extractor import (
|
||||
gen_extractor_classes,
|
||||
get_info_extractor,
|
||||
_LAZY_LOADER,
|
||||
_PLUGIN_CLASSES
|
||||
)
|
||||
from .extractor.openload import PhantomJSwrapper
|
||||
from .downloader import get_suitable_downloader
|
||||
from .downloader import (
|
||||
get_suitable_downloader,
|
||||
shorten_protocol_name
|
||||
)
|
||||
from .downloader.rtmp import rtmpdump_version
|
||||
from .postprocessor import (
|
||||
FFmpegFixupM3u8PP,
|
||||
@@ -283,10 +291,9 @@ class YoutubeDL(object):
|
||||
postprocessors: A list of dictionaries, each with an entry
|
||||
* key: The name of the postprocessor. See
|
||||
yt_dlp/postprocessor/__init__.py for a list.
|
||||
* _after_move: Optional. If True, run this post_processor
|
||||
after 'MoveFilesAfterDownload'
|
||||
as well as any further keyword arguments for the
|
||||
postprocessor.
|
||||
* when: When to run the postprocessor. Can be one of
|
||||
pre_process|before_dl|post_process|after_move.
|
||||
Assumed to be 'post_process' if not given
|
||||
post_hooks: A list of functions that get called as the final step
|
||||
for each video file, after all postprocessors have been
|
||||
called. The filename will be passed as the only argument.
|
||||
@@ -359,9 +366,13 @@ class YoutubeDL(object):
|
||||
geo_bypass_country
|
||||
|
||||
The following options determine which downloader is picked:
|
||||
external_downloader: Executable of the external downloader to call.
|
||||
None or unset for standard (built-in) downloader.
|
||||
hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv
|
||||
external_downloader: A dictionary of protocol keys and the executable of the
|
||||
external downloader to use for it. The allowed protocols
|
||||
are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
|
||||
Set the value to 'native' to use the native downloader
|
||||
hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
|
||||
or {'m3u8': 'ffmpeg'} instead.
|
||||
Use the native HLS downloader instead of ffmpeg/avconv
|
||||
if True, otherwise use ffmpeg/avconv if False, otherwise
|
||||
use downloader suggested by extractor if None.
|
||||
|
||||
@@ -411,7 +422,7 @@ class YoutubeDL(object):
|
||||
|
||||
params = None
|
||||
_ies = []
|
||||
_pps = {'beforedl': [], 'aftermove': [], 'normal': []}
|
||||
_pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
|
||||
__prepare_filename_warned = False
|
||||
_first_webpage_request = True
|
||||
_download_retcode = None
|
||||
@@ -426,7 +437,7 @@ class YoutubeDL(object):
|
||||
params = {}
|
||||
self._ies = []
|
||||
self._ies_instances = {}
|
||||
self._pps = {'beforedl': [], 'aftermove': [], 'normal': []}
|
||||
self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
|
||||
self.__prepare_filename_warned = False
|
||||
self._first_webpage_request = True
|
||||
self._post_hooks = []
|
||||
@@ -539,7 +550,7 @@ class YoutubeDL(object):
|
||||
when = pp_def['when']
|
||||
del pp_def['when']
|
||||
else:
|
||||
when = 'normal'
|
||||
when = 'post_process'
|
||||
pp = pp_class(self, **compat_kwargs(pp_def))
|
||||
self.add_post_processor(pp, when=when)
|
||||
|
||||
@@ -593,7 +604,7 @@ class YoutubeDL(object):
|
||||
for ie in gen_extractor_classes():
|
||||
self.add_info_extractor(ie)
|
||||
|
||||
def add_post_processor(self, pp, when='normal'):
|
||||
def add_post_processor(self, pp, when='post_process'):
|
||||
"""Add a PostProcessor object to the end of the chain."""
|
||||
self._pps[when].append(pp)
|
||||
pp.set_downloader(self)
|
||||
@@ -1541,6 +1552,66 @@ class YoutubeDL(object):
|
||||
selectors.append(current_selector)
|
||||
return selectors
|
||||
|
||||
def _merge(formats_pair):
|
||||
format_1, format_2 = formats_pair
|
||||
|
||||
formats_info = []
|
||||
formats_info.extend(format_1.get('requested_formats', (format_1,)))
|
||||
formats_info.extend(format_2.get('requested_formats', (format_2,)))
|
||||
|
||||
if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
|
||||
get_no_more = {"video": False, "audio": False}
|
||||
for (i, fmt_info) in enumerate(formats_info):
|
||||
for aud_vid in ["audio", "video"]:
|
||||
if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
|
||||
if get_no_more[aud_vid]:
|
||||
formats_info.pop(i)
|
||||
get_no_more[aud_vid] = True
|
||||
|
||||
if len(formats_info) == 1:
|
||||
return formats_info[0]
|
||||
|
||||
video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
|
||||
audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
|
||||
|
||||
the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
|
||||
the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
|
||||
|
||||
output_ext = self.params.get('merge_output_format')
|
||||
if not output_ext:
|
||||
if the_only_video:
|
||||
output_ext = the_only_video['ext']
|
||||
elif the_only_audio and not video_fmts:
|
||||
output_ext = the_only_audio['ext']
|
||||
else:
|
||||
output_ext = 'mkv'
|
||||
|
||||
new_dict = {
|
||||
'requested_formats': formats_info,
|
||||
'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info),
|
||||
'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info),
|
||||
'ext': output_ext,
|
||||
}
|
||||
|
||||
if the_only_video:
|
||||
new_dict.update({
|
||||
'width': the_only_video.get('width'),
|
||||
'height': the_only_video.get('height'),
|
||||
'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
|
||||
'fps': the_only_video.get('fps'),
|
||||
'vcodec': the_only_video.get('vcodec'),
|
||||
'vbr': the_only_video.get('vbr'),
|
||||
'stretched_ratio': the_only_video.get('stretched_ratio'),
|
||||
})
|
||||
|
||||
if the_only_audio:
|
||||
new_dict.update({
|
||||
'acodec': the_only_audio.get('acodec'),
|
||||
'abr': the_only_audio.get('abr'),
|
||||
})
|
||||
|
||||
return new_dict
|
||||
|
||||
def _build_selector_function(selector):
|
||||
if isinstance(selector, list): # ,
|
||||
fs = [_build_selector_function(s) for s in selector]
|
||||
@@ -1565,32 +1636,46 @@ class YoutubeDL(object):
|
||||
return []
|
||||
|
||||
elif selector.type == SINGLE: # atom
|
||||
format_spec = selector.selector if selector.selector is not None else 'best'
|
||||
format_spec = (selector.selector or 'best').lower()
|
||||
|
||||
# TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
|
||||
if format_spec == 'all':
|
||||
def selector_function(ctx):
|
||||
formats = list(ctx['formats'])
|
||||
if formats:
|
||||
for f in formats:
|
||||
yield f
|
||||
elif format_spec == 'mergeall':
|
||||
def selector_function(ctx):
|
||||
formats = list(ctx['formats'])
|
||||
if not formats:
|
||||
return
|
||||
merged_format = formats[0]
|
||||
for f in formats[1:]:
|
||||
merged_format = _merge((merged_format, f))
|
||||
yield merged_format
|
||||
|
||||
else:
|
||||
format_fallback = False
|
||||
format_spec_obj = re.match(r'(best|worst|b|w)(video|audio|v|a)?(\*)?$', format_spec)
|
||||
if format_spec_obj is not None:
|
||||
format_idx = 0 if format_spec_obj.group(1)[0] == 'w' else -1
|
||||
format_type = format_spec_obj.group(2)[0] if format_spec_obj.group(2) else False
|
||||
not_format_type = 'v' if format_type == 'a' else 'a'
|
||||
format_modified = format_spec_obj.group(3) is not None
|
||||
mobj = re.match(
|
||||
r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
|
||||
format_spec)
|
||||
if mobj is not None:
|
||||
format_idx = int_or_none(mobj.group('n'), default=1)
|
||||
format_idx = format_idx - 1 if mobj.group('bw')[0] == 'w' else -format_idx
|
||||
format_type = (mobj.group('type') or [None])[0]
|
||||
not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
|
||||
format_modified = mobj.group('mod') is not None
|
||||
|
||||
format_fallback = not format_type and not format_modified # for b, w
|
||||
filter_f = ((lambda f: f.get(format_type + 'codec') != 'none')
|
||||
if format_type and format_modified # bv*, ba*, wv*, wa*
|
||||
else (lambda f: f.get(not_format_type + 'codec') == 'none')
|
||||
if format_type # bv, ba, wv, wa
|
||||
else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
|
||||
if not format_modified # b, w
|
||||
else None) # b*, w*
|
||||
filter_f = (
|
||||
(lambda f: f.get('%scodec' % format_type) != 'none')
|
||||
if format_type and format_modified # bv*, ba*, wv*, wa*
|
||||
else (lambda f: f.get('%scodec' % not_format_type) == 'none')
|
||||
if format_type # bv, ba, wv, wa
|
||||
else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
|
||||
if not format_modified # b, w
|
||||
else None) # b*, w*
|
||||
else:
|
||||
format_idx = -1
|
||||
filter_f = ((lambda f: f.get('ext') == format_spec)
|
||||
@@ -1602,75 +1687,18 @@ class YoutubeDL(object):
|
||||
if not formats:
|
||||
return
|
||||
matches = list(filter(filter_f, formats)) if filter_f is not None else formats
|
||||
if matches:
|
||||
n = len(matches)
|
||||
if -n <= format_idx < n:
|
||||
yield matches[format_idx]
|
||||
elif format_fallback == 'force' or (format_fallback and ctx['incomplete_formats']):
|
||||
elif format_fallback and ctx['incomplete_formats']:
|
||||
# for extractors with incomplete formats (audio only (soundcloud)
|
||||
# or video only (imgur)) best/worst will fallback to
|
||||
# best/worst {video,audio}-only format
|
||||
yield formats[format_idx]
|
||||
n = len(formats)
|
||||
if -n <= format_idx < n:
|
||||
yield formats[format_idx]
|
||||
|
||||
elif selector.type == MERGE: # +
|
||||
def _merge(formats_pair):
|
||||
format_1, format_2 = formats_pair
|
||||
|
||||
formats_info = []
|
||||
formats_info.extend(format_1.get('requested_formats', (format_1,)))
|
||||
formats_info.extend(format_2.get('requested_formats', (format_2,)))
|
||||
|
||||
if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
|
||||
get_no_more = {"video": False, "audio": False}
|
||||
for (i, fmt_info) in enumerate(formats_info):
|
||||
for aud_vid in ["audio", "video"]:
|
||||
if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
|
||||
if get_no_more[aud_vid]:
|
||||
formats_info.pop(i)
|
||||
get_no_more[aud_vid] = True
|
||||
|
||||
if len(formats_info) == 1:
|
||||
return formats_info[0]
|
||||
|
||||
video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
|
||||
audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
|
||||
|
||||
the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
|
||||
the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
|
||||
|
||||
output_ext = self.params.get('merge_output_format')
|
||||
if not output_ext:
|
||||
if the_only_video:
|
||||
output_ext = the_only_video['ext']
|
||||
elif the_only_audio and not video_fmts:
|
||||
output_ext = the_only_audio['ext']
|
||||
else:
|
||||
output_ext = 'mkv'
|
||||
|
||||
new_dict = {
|
||||
'requested_formats': formats_info,
|
||||
'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info),
|
||||
'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info),
|
||||
'ext': output_ext,
|
||||
}
|
||||
|
||||
if the_only_video:
|
||||
new_dict.update({
|
||||
'width': the_only_video.get('width'),
|
||||
'height': the_only_video.get('height'),
|
||||
'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
|
||||
'fps': the_only_video.get('fps'),
|
||||
'vcodec': the_only_video.get('vcodec'),
|
||||
'vbr': the_only_video.get('vbr'),
|
||||
'stretched_ratio': the_only_video.get('stretched_ratio'),
|
||||
})
|
||||
|
||||
if the_only_audio:
|
||||
new_dict.update({
|
||||
'acodec': the_only_audio.get('acodec'),
|
||||
'abr': the_only_audio.get('abr'),
|
||||
})
|
||||
|
||||
return new_dict
|
||||
|
||||
selector_1, selector_2 = map(_build_selector_function, selector.selector)
|
||||
|
||||
def selector_function(ctx):
|
||||
@@ -2085,13 +2113,12 @@ class YoutubeDL(object):
|
||||
self.post_extract(info_dict)
|
||||
self._num_downloads += 1
|
||||
|
||||
info_dict = self.pre_process(info_dict)
|
||||
info_dict, _ = self.pre_process(info_dict)
|
||||
|
||||
# info_dict['_filename'] needs to be set for backward compatibility
|
||||
info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
|
||||
temp_filename = self.prepare_filename(info_dict, 'temp')
|
||||
files_to_move = {}
|
||||
skip_dl = self.params.get('skip_download', False)
|
||||
|
||||
# Forced printings
|
||||
self.__forced_printings(info_dict, full_filename, incomplete=False)
|
||||
@@ -2168,11 +2195,9 @@ class YoutubeDL(object):
|
||||
# ie = self.get_info_extractor(info_dict['extractor_key'])
|
||||
for sub_lang, sub_info in subtitles.items():
|
||||
sub_format = sub_info['ext']
|
||||
sub_fn = self.prepare_filename(info_dict, 'subtitle')
|
||||
sub_filename = subtitles_filename(
|
||||
temp_filename if not skip_dl else sub_fn,
|
||||
sub_lang, sub_format, info_dict.get('ext'))
|
||||
sub_filename_final = subtitles_filename(sub_fn, sub_lang, sub_format, info_dict.get('ext'))
|
||||
sub_filename = subtitles_filename(temp_filename, sub_lang, sub_format, info_dict.get('ext'))
|
||||
sub_filename_final = subtitles_filename(
|
||||
self.prepare_filename(info_dict, 'subtitle'), sub_lang, sub_format, info_dict.get('ext'))
|
||||
if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(sub_filename)):
|
||||
self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
|
||||
sub_info['filepath'] = sub_filename
|
||||
@@ -2200,28 +2225,6 @@ class YoutubeDL(object):
|
||||
(sub_lang, error_to_compat_str(err)))
|
||||
continue
|
||||
|
||||
if skip_dl:
|
||||
if self.params.get('convertsubtitles', False):
|
||||
# subconv = FFmpegSubtitlesConvertorPP(self, format=self.params.get('convertsubtitles'))
|
||||
filename_real_ext = os.path.splitext(full_filename)[1][1:]
|
||||
filename_wo_ext = (
|
||||
os.path.splitext(full_filename)[0]
|
||||
if filename_real_ext == info_dict['ext']
|
||||
else full_filename)
|
||||
afilename = '%s.%s' % (filename_wo_ext, self.params.get('convertsubtitles'))
|
||||
# if subconv.available:
|
||||
# info_dict['__postprocessors'].append(subconv)
|
||||
if os.path.exists(encodeFilename(afilename)):
|
||||
self.to_screen(
|
||||
'[download] %s has already been downloaded and '
|
||||
'converted' % afilename)
|
||||
else:
|
||||
try:
|
||||
self.post_process(full_filename, info_dict, files_to_move)
|
||||
except PostProcessingError as err:
|
||||
self.report_error('Postprocessing: %s' % str(err))
|
||||
return
|
||||
|
||||
if self.params.get('writeinfojson', False):
|
||||
infofn = self.prepare_filename(info_dict, 'infojson')
|
||||
if not self._ensure_dir_exists(encodeFilename(infofn)):
|
||||
@@ -2237,11 +2240,10 @@ class YoutubeDL(object):
|
||||
return
|
||||
info_dict['__infojson_filename'] = infofn
|
||||
|
||||
thumbfn = self.prepare_filename(info_dict, 'thumbnail')
|
||||
thumb_fn_temp = temp_filename if not skip_dl else thumbfn
|
||||
for thumb_ext in self._write_thumbnails(info_dict, thumb_fn_temp):
|
||||
thumb_filename_temp = replace_extension(thumb_fn_temp, thumb_ext, info_dict.get('ext'))
|
||||
thumb_filename = replace_extension(thumbfn, thumb_ext, info_dict.get('ext'))
|
||||
for thumb_ext in self._write_thumbnails(info_dict, temp_filename):
|
||||
thumb_filename_temp = replace_extension(temp_filename, thumb_ext, info_dict.get('ext'))
|
||||
thumb_filename = replace_extension(
|
||||
self.prepare_filename(info_dict, 'thumbnail'), thumb_ext, info_dict.get('ext'))
|
||||
files_to_move[thumb_filename_temp] = thumb_filename
|
||||
|
||||
# Write internet shortcut files
|
||||
@@ -2293,9 +2295,20 @@ class YoutubeDL(object):
|
||||
if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
|
||||
return
|
||||
|
||||
# Download
|
||||
try:
|
||||
info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
|
||||
except PostProcessingError as err:
|
||||
self.report_error('Preprocessing: %s' % str(err))
|
||||
return
|
||||
|
||||
must_record_download_archive = False
|
||||
if not skip_dl:
|
||||
if self.params.get('skip_download', False):
|
||||
info_dict['filepath'] = temp_filename
|
||||
info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
|
||||
info_dict['__files_to_move'] = files_to_move
|
||||
info_dict = self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)
|
||||
else:
|
||||
# Download
|
||||
try:
|
||||
|
||||
def existing_file(*filepaths):
|
||||
@@ -2591,7 +2604,7 @@ class YoutubeDL(object):
|
||||
def actual_post_extract(info_dict):
|
||||
if info_dict.get('_type') in ('playlist', 'multi_video'):
|
||||
for video_dict in info_dict.get('entries', {}):
|
||||
actual_post_extract(video_dict)
|
||||
actual_post_extract(video_dict or {})
|
||||
return
|
||||
|
||||
if '__post_extractor' not in info_dict:
|
||||
@@ -2602,13 +2615,14 @@ class YoutubeDL(object):
|
||||
del info_dict['__post_extractor']
|
||||
return
|
||||
|
||||
actual_post_extract(info_dict)
|
||||
actual_post_extract(info_dict or {})
|
||||
|
||||
def pre_process(self, ie_info):
|
||||
def pre_process(self, ie_info, key='pre_process', files_to_move=None):
|
||||
info = dict(ie_info)
|
||||
for pp in self._pps['beforedl']:
|
||||
info['__files_to_move'] = files_to_move or {}
|
||||
for pp in self._pps[key]:
|
||||
info = self.run_pp(pp, info)
|
||||
return info
|
||||
return info, info.pop('__files_to_move', None)
|
||||
|
||||
def post_process(self, filename, ie_info, files_to_move=None):
|
||||
"""Run all the postprocessors on the given file."""
|
||||
@@ -2616,11 +2630,11 @@ class YoutubeDL(object):
|
||||
info['filepath'] = filename
|
||||
info['__files_to_move'] = files_to_move or {}
|
||||
|
||||
for pp in ie_info.get('__postprocessors', []) + self._pps['normal']:
|
||||
for pp in ie_info.get('__postprocessors', []) + self._pps['post_process']:
|
||||
info = self.run_pp(pp, info)
|
||||
info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
|
||||
del info['__files_to_move']
|
||||
for pp in self._pps['aftermove']:
|
||||
for pp in self._pps['after_move']:
|
||||
info = self.run_pp(pp, info)
|
||||
return info
|
||||
|
||||
@@ -2761,7 +2775,7 @@ class YoutubeDL(object):
|
||||
'|',
|
||||
format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
|
||||
format_field(f, 'tbr', '%4dk'),
|
||||
f.get('protocol').replace('http_dash_segments', 'dash').replace("native", "n").replace('niconico_', ''),
|
||||
shorten_protocol_name(f.get('protocol', '').replace("native", "n")),
|
||||
'|',
|
||||
format_field(f, 'vcodec', default='unknown').replace('none', ''),
|
||||
format_field(f, 'vbr', '%4dk'),
|
||||
|
||||
@@ -228,8 +228,11 @@ def _real_main(argv=None):
|
||||
if not re.match(remux_regex, opts.remuxvideo):
|
||||
parser.error('invalid video remux format specified')
|
||||
if opts.convertsubtitles is not None:
|
||||
if opts.convertsubtitles not in ['srt', 'vtt', 'ass', 'lrc']:
|
||||
if opts.convertsubtitles not in ('srt', 'vtt', 'ass', 'lrc'):
|
||||
parser.error('invalid subtitle format specified')
|
||||
if opts.convertthumbnails is not None:
|
||||
if opts.convertthumbnails not in ('jpg', ):
|
||||
parser.error('invalid thumbnail format specified')
|
||||
|
||||
if opts.date is not None:
|
||||
date = DateRange.day(opts.date)
|
||||
@@ -322,7 +325,22 @@ def _real_main(argv=None):
|
||||
postprocessors.append({
|
||||
'key': 'MetadataFromField',
|
||||
'formats': opts.metafromfield,
|
||||
'when': 'beforedl'
|
||||
# Run this immediately after extraction is complete
|
||||
'when': 'pre_process'
|
||||
})
|
||||
if opts.convertsubtitles:
|
||||
postprocessors.append({
|
||||
'key': 'FFmpegSubtitlesConvertor',
|
||||
'format': opts.convertsubtitles,
|
||||
# Run this before the actual video download
|
||||
'when': 'before_dl'
|
||||
})
|
||||
if opts.convertthumbnails:
|
||||
postprocessors.append({
|
||||
'key': 'FFmpegThumbnailsConvertor',
|
||||
'format': opts.convertthumbnails,
|
||||
# Run this before the actual video download
|
||||
'when': 'before_dl'
|
||||
})
|
||||
if opts.extractaudio:
|
||||
postprocessors.append({
|
||||
@@ -351,15 +369,11 @@ def _real_main(argv=None):
|
||||
# so metadata can be added here.
|
||||
if opts.addmetadata:
|
||||
postprocessors.append({'key': 'FFmpegMetadata'})
|
||||
if opts.convertsubtitles:
|
||||
postprocessors.append({
|
||||
'key': 'FFmpegSubtitlesConvertor',
|
||||
'format': opts.convertsubtitles,
|
||||
})
|
||||
if opts.embedsubtitles:
|
||||
already_have_subtitle = opts.writesubtitles
|
||||
postprocessors.append({
|
||||
'key': 'FFmpegEmbedSubtitle',
|
||||
# already_have_subtitle = True prevents the file from being deleted after embedding
|
||||
'already_have_subtitle': already_have_subtitle
|
||||
})
|
||||
if not already_have_subtitle:
|
||||
@@ -368,15 +382,9 @@ def _real_main(argv=None):
|
||||
# this was the old behaviour if only --all-sub was given.
|
||||
if opts.allsubtitles and not opts.writeautomaticsub:
|
||||
opts.writesubtitles = True
|
||||
if opts.embedthumbnail:
|
||||
already_have_thumbnail = opts.writethumbnail or opts.write_all_thumbnails
|
||||
postprocessors.append({
|
||||
'key': 'EmbedThumbnail',
|
||||
'already_have_thumbnail': already_have_thumbnail
|
||||
})
|
||||
if not already_have_thumbnail:
|
||||
opts.writethumbnail = True
|
||||
# This should be below most ffmpeg PP because it may cut parts out from the video
|
||||
# This should be above EmbedThumbnail since sponskrub removes the thumbnail attachment
|
||||
# but must be below EmbedSubtitle and FFmpegMetadata
|
||||
# See https://github.com/yt-dlp/yt-dlp/issues/204 , https://github.com/faissaloo/SponSkrub/issues/29
|
||||
# If opts.sponskrub is None, sponskrub is used, but it silently fails if the executable can't be found
|
||||
if opts.sponskrub is not False:
|
||||
postprocessors.append({
|
||||
@@ -387,6 +395,15 @@ def _real_main(argv=None):
|
||||
'force': opts.sponskrub_force,
|
||||
'ignoreerror': opts.sponskrub is None,
|
||||
})
|
||||
if opts.embedthumbnail:
|
||||
already_have_thumbnail = opts.writethumbnail or opts.write_all_thumbnails
|
||||
postprocessors.append({
|
||||
'key': 'EmbedThumbnail',
|
||||
# already_have_thumbnail = True prevents the file from being deleted after embedding
|
||||
'already_have_thumbnail': already_have_thumbnail
|
||||
})
|
||||
if not already_have_thumbnail:
|
||||
opts.writethumbnail = True
|
||||
if opts.split_chapters:
|
||||
postprocessors.append({'key': 'FFmpegSplitChapters'})
|
||||
# XAttrMetadataPP should be run after post-processors that may change file contents
|
||||
@@ -397,7 +414,8 @@ def _real_main(argv=None):
|
||||
postprocessors.append({
|
||||
'key': 'ExecAfterDownload',
|
||||
'exec_cmd': opts.exec_cmd,
|
||||
'when': 'aftermove'
|
||||
# Run this only after the files have been moved to their final locations
|
||||
'when': 'after_move'
|
||||
})
|
||||
|
||||
def report_args_compat(arg, name):
|
||||
@@ -423,7 +441,6 @@ def _real_main(argv=None):
|
||||
else match_filter_func(opts.match_filter))
|
||||
|
||||
ydl_opts = {
|
||||
'convertsubtitles': opts.convertsubtitles,
|
||||
'usenetrc': opts.usenetrc,
|
||||
'username': opts.username,
|
||||
'password': opts.password,
|
||||
|
||||
@@ -78,6 +78,15 @@ try:
|
||||
except ImportError: # Python 2
|
||||
import Cookie as compat_cookies
|
||||
|
||||
if sys.version_info[0] == 2:
|
||||
class compat_cookies_SimpleCookie(compat_cookies.SimpleCookie):
|
||||
def load(self, rawdata):
|
||||
if isinstance(rawdata, compat_str):
|
||||
rawdata = str(rawdata)
|
||||
return super(compat_cookies_SimpleCookie, self).load(rawdata)
|
||||
else:
|
||||
compat_cookies_SimpleCookie = compat_cookies.SimpleCookie
|
||||
|
||||
try:
|
||||
import html.entities as compat_html_entities
|
||||
except ImportError: # Python 2
|
||||
@@ -3020,6 +3029,7 @@ __all__ = [
|
||||
'compat_cookiejar',
|
||||
'compat_cookiejar_Cookie',
|
||||
'compat_cookies',
|
||||
'compat_cookies_SimpleCookie',
|
||||
'compat_ctypes_WINFUNCTYPE',
|
||||
'compat_etree_Element',
|
||||
'compat_etree_fromstring',
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
determine_protocol,
|
||||
)
|
||||
@@ -42,6 +43,23 @@ PROTOCOL_MAP = {
|
||||
}
|
||||
|
||||
|
||||
def shorten_protocol_name(proto, simplify=False):
|
||||
short_protocol_names = {
|
||||
'm3u8_native': 'm3u8_n',
|
||||
'http_dash_segments': 'dash',
|
||||
'niconico_dmc': 'dmc',
|
||||
}
|
||||
if simplify:
|
||||
short_protocol_names.update({
|
||||
'https': 'http',
|
||||
'ftps': 'ftp',
|
||||
'm3u8_native': 'm3u8',
|
||||
'm3u8_frag_urls': 'm3u8',
|
||||
'dash_frag_urls': 'dash',
|
||||
})
|
||||
return short_protocol_names.get(proto, proto)
|
||||
|
||||
|
||||
def get_suitable_downloader(info_dict, params={}, default=HttpFD):
|
||||
"""Get the downloader class that can handle the info dict."""
|
||||
protocol = determine_protocol(info_dict)
|
||||
@@ -50,8 +68,14 @@ def get_suitable_downloader(info_dict, params={}, default=HttpFD):
|
||||
# if (info_dict.get('start_time') or info_dict.get('end_time')) and not info_dict.get('requested_formats') and FFmpegFD.can_download(info_dict):
|
||||
# return FFmpegFD
|
||||
|
||||
external_downloader = params.get('external_downloader')
|
||||
if external_downloader is not None:
|
||||
downloaders = params.get('external_downloader')
|
||||
external_downloader = (
|
||||
downloaders if isinstance(downloaders, compat_str) or downloaders is None
|
||||
else downloaders.get(shorten_protocol_name(protocol, True), downloaders.get('default')))
|
||||
if external_downloader and external_downloader.lower() == 'native':
|
||||
external_downloader = 'native'
|
||||
|
||||
if external_downloader not in (None, 'native'):
|
||||
ed = get_external_downloader(external_downloader)
|
||||
if ed.can_download(info_dict, external_downloader):
|
||||
return ed
|
||||
@@ -59,6 +83,8 @@ def get_suitable_downloader(info_dict, params={}, default=HttpFD):
|
||||
if protocol.startswith('m3u8'):
|
||||
if info_dict.get('is_live'):
|
||||
return FFmpegFD
|
||||
elif external_downloader == 'native':
|
||||
return HlsFD
|
||||
elif _get_real_downloader(info_dict, 'frag_urls', params, None):
|
||||
return HlsFD
|
||||
elif params.get('hls_prefer_native') is True:
|
||||
@@ -70,6 +96,7 @@ def get_suitable_downloader(info_dict, params={}, default=HttpFD):
|
||||
|
||||
|
||||
__all__ = [
|
||||
'get_suitable_downloader',
|
||||
'FileDownloader',
|
||||
'get_suitable_downloader',
|
||||
'shorten_protocol_name',
|
||||
]
|
||||
|
||||
@@ -20,7 +20,7 @@ from ..utils import (
|
||||
class DashSegmentsFD(FragmentFD):
|
||||
"""
|
||||
Download segments in a DASH manifest. External downloaders can take over
|
||||
the fragment downloads by supporting the 'frag_urls' protocol
|
||||
the fragment downloads by supporting the 'dash_frag_urls' protocol
|
||||
"""
|
||||
|
||||
FD_NAME = 'dashsegments'
|
||||
@@ -30,7 +30,7 @@ class DashSegmentsFD(FragmentFD):
|
||||
fragments = info_dict['fragments'][:1] if self.params.get(
|
||||
'test', False) else info_dict['fragments']
|
||||
|
||||
real_downloader = _get_real_downloader(info_dict, 'frag_urls', self.params, None)
|
||||
real_downloader = _get_real_downloader(info_dict, 'dash_frag_urls', self.params, None)
|
||||
|
||||
ctx = {
|
||||
'filename': filename,
|
||||
|
||||
@@ -81,11 +81,15 @@ class ExternalFD(FileDownloader):
|
||||
|
||||
@property
|
||||
def exe(self):
|
||||
return self.params.get('external_downloader')
|
||||
return self.get_basename()
|
||||
|
||||
@classmethod
|
||||
def available(cls, path=None):
|
||||
return check_executable(path or cls.get_basename(), [cls.AVAILABLE_OPT])
|
||||
path = check_executable(path or cls.get_basename(), [cls.AVAILABLE_OPT])
|
||||
if path:
|
||||
cls.exe = path
|
||||
return path
|
||||
return False
|
||||
|
||||
@classmethod
|
||||
def supports(cls, info_dict):
|
||||
@@ -259,7 +263,7 @@ class WgetFD(ExternalFD):
|
||||
|
||||
class Aria2cFD(ExternalFD):
|
||||
AVAILABLE_OPT = '-v'
|
||||
SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps', 'frag_urls')
|
||||
SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps', 'dash_frag_urls', 'm3u8_frag_urls')
|
||||
|
||||
@staticmethod
|
||||
def supports_manifest(manifest):
|
||||
@@ -310,9 +314,11 @@ class Aria2cFD(ExternalFD):
|
||||
|
||||
|
||||
class HttpieFD(ExternalFD):
|
||||
AVAILABLE_OPT = '--version'
|
||||
|
||||
@classmethod
|
||||
def available(cls, path=None):
|
||||
return check_executable(path or 'http', ['--version'])
|
||||
return ExternalFD.available(cls, path or 'http')
|
||||
|
||||
def _make_cmd(self, tmpfilename, info_dict):
|
||||
cmd = ['http', '--download', '--output', tmpfilename, info_dict['url']]
|
||||
@@ -327,7 +333,8 @@ class FFmpegFD(ExternalFD):
|
||||
SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps', 'm3u8', 'rtsp', 'rtmp', 'mms')
|
||||
|
||||
@classmethod
|
||||
def available(cls, path=None): # path is ignored for ffmpeg
|
||||
def available(cls, path=None):
|
||||
# TODO: Fix path for ffmpeg
|
||||
return FFmpegPostProcessor().available
|
||||
|
||||
def _call_downloader(self, tmpfilename, info_dict):
|
||||
@@ -484,4 +491,4 @@ def get_external_downloader(external_downloader):
|
||||
downloader . """
|
||||
# Drop .exe extension on Windows
|
||||
bn = os.path.splitext(os.path.basename(external_downloader))[0]
|
||||
return _BY_NAME[bn]
|
||||
return _BY_NAME.get(bn)
|
||||
|
||||
@@ -32,7 +32,7 @@ from ..utils import (
|
||||
class HlsFD(FragmentFD):
|
||||
"""
|
||||
Download segments in a m3u8 manifest. External downloaders can take over
|
||||
the fragment downloads by supporting the 'frag_urls' protocol and
|
||||
the fragment downloads by supporting the 'm3u8_frag_urls' protocol and
|
||||
re-defining 'supports_manifest' function
|
||||
"""
|
||||
|
||||
@@ -95,7 +95,7 @@ class HlsFD(FragmentFD):
|
||||
# fd.add_progress_hook(ph)
|
||||
return fd.real_download(filename, info_dict)
|
||||
|
||||
real_downloader = _get_real_downloader(info_dict, 'frag_urls', self.params, None)
|
||||
real_downloader = _get_real_downloader(info_dict, 'm3u8_frag_urls', self.params, None)
|
||||
if real_downloader and not real_downloader.supports_manifest(s):
|
||||
real_downloader = None
|
||||
if real_downloader:
|
||||
|
||||
@@ -272,7 +272,8 @@ class ARDMediathekIE(ARDMediathekBaseIE):
|
||||
else: # request JSON file
|
||||
if not document_id:
|
||||
video_id = self._search_regex(
|
||||
r'/play/(?:config|media)/(\d+)', webpage, 'media id')
|
||||
(r'/play/(?:config|media|sola)/(\d+)', r'contentId["\']\s*:\s*(\d+)'),
|
||||
webpage, 'media id', default=None)
|
||||
info = self._extract_media_info(
|
||||
'http://www.ardmediathek.de/play/media/%s' % video_id,
|
||||
webpage, video_id)
|
||||
@@ -413,12 +414,6 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
|
||||
# playlist of type 'sammlung'
|
||||
'url': 'https://www.ardmediathek.de/ard/sammlung/team-muenster/5JpTzLSbWUAK8184IOvEir/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.ardmediathek.de/video/coronavirus-update-ndr-info/astrazeneca-kurz-lockdown-und-pims-syndrom-81/ndr/Y3JpZDovL25kci5kZS84NzE0M2FjNi0wMWEwLTQ5ODEtOTE5NS1mOGZhNzdhOTFmOTI/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.ardmediathek.de/ard/player/Y3JpZDovL3dkci5kZS9CZWl0cmFnLWQ2NDJjYWEzLTMwZWYtNGI4NS1iMTI2LTU1N2UxYTcxOGIzOQ/tatort-duo-koeln-leipzig-ihr-kinderlein-kommet',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _ARD_load_playlist_snipped(self, playlist_id, display_id, client, mode, pageNumber):
|
||||
@@ -518,7 +513,13 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
|
||||
return self.playlist_result(entries, playlist_title=display_id)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('video_id')
|
||||
display_id = mobj.group('display_id')
|
||||
if display_id:
|
||||
display_id = display_id.rstrip('/')
|
||||
if not display_id:
|
||||
display_id = video_id
|
||||
|
||||
if mobj.group('mode') in ('sendung', 'sammlung'):
|
||||
# this is a playlist-URL
|
||||
@@ -529,9 +530,9 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
|
||||
|
||||
player_page = self._download_json(
|
||||
'https://api.ardmediathek.de/public-gateway',
|
||||
video_id, data=json.dumps({
|
||||
display_id, data=json.dumps({
|
||||
'query': '''{
|
||||
playerPage(client: "ard", clipId: "%s") {
|
||||
playerPage(client:"%s", clipId: "%s") {
|
||||
blockedByFsk
|
||||
broadcastedOn
|
||||
maturityContentRating
|
||||
@@ -561,7 +562,7 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
|
||||
}
|
||||
}
|
||||
}
|
||||
}''' % video_id,
|
||||
}''' % (mobj.group('client'), video_id),
|
||||
}).encode(), headers={
|
||||
'Content-Type': 'application/json'
|
||||
})['data']['playerPage']
|
||||
@@ -586,6 +587,7 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
|
||||
r'\(FSK\s*(\d+)\)\s*$', description, 'age limit', default=None))
|
||||
info.update({
|
||||
'age_limit': age_limit,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'timestamp': unified_timestamp(player_page.get('broadcastedOn')),
|
||||
|
||||
101
yt_dlp/extractor/arnes.py
Normal file
101
yt_dlp/extractor/arnes.py
Normal file
@@ -0,0 +1,101 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urllib_parse_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
remove_start,
|
||||
)
|
||||
|
||||
|
||||
class ArnesIE(InfoExtractor):
|
||||
IE_NAME = 'video.arnes.si'
|
||||
IE_DESC = 'Arnes Video'
|
||||
_VALID_URL = r'https?://video\.arnes\.si/(?:[a-z]{2}/)?(?:watch|embed|api/(?:asset|public/video))/(?P<id>[0-9a-zA-Z]{12})'
|
||||
_TESTS = [{
|
||||
'url': 'https://video.arnes.si/watch/a1qrWTOQfVoU?t=10',
|
||||
'md5': '4d0f4d0a03571b33e1efac25fd4a065d',
|
||||
'info_dict': {
|
||||
'id': 'a1qrWTOQfVoU',
|
||||
'ext': 'mp4',
|
||||
'title': 'Linearna neodvisnost, definicija',
|
||||
'description': 'Linearna neodvisnost, definicija',
|
||||
'license': 'PRIVATE',
|
||||
'creator': 'Polona Oblak',
|
||||
'timestamp': 1585063725,
|
||||
'upload_date': '20200324',
|
||||
'channel': 'Polona Oblak',
|
||||
'channel_id': 'q6pc04hw24cj',
|
||||
'channel_url': 'https://video.arnes.si/?channel=q6pc04hw24cj',
|
||||
'duration': 596.75,
|
||||
'view_count': int,
|
||||
'tags': ['linearna_algebra'],
|
||||
'start_time': 10,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://video.arnes.si/api/asset/s1YjnV7hadlC/play.mp4',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://video.arnes.si/embed/s1YjnV7hadlC',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://video.arnes.si/en/watch/s1YjnV7hadlC',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://video.arnes.si/embed/s1YjnV7hadlC?t=123&hideRelated=1',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://video.arnes.si/api/public/video/s1YjnV7hadlC',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_BASE_URL = 'https://video.arnes.si'
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
video = self._download_json(
|
||||
self._BASE_URL + '/api/public/video/' + video_id, video_id)['data']
|
||||
title = video['title']
|
||||
|
||||
formats = []
|
||||
for media in (video.get('media') or []):
|
||||
media_url = media.get('url')
|
||||
if not media_url:
|
||||
continue
|
||||
formats.append({
|
||||
'url': self._BASE_URL + media_url,
|
||||
'format_id': remove_start(media.get('format'), 'FORMAT_'),
|
||||
'format_note': media.get('formatTranslation'),
|
||||
'width': int_or_none(media.get('width')),
|
||||
'height': int_or_none(media.get('height')),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
channel = video.get('channel') or {}
|
||||
channel_id = channel.get('url')
|
||||
thumbnail = video.get('thumbnailUrl')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': self._BASE_URL + thumbnail,
|
||||
'description': video.get('description'),
|
||||
'license': video.get('license'),
|
||||
'creator': video.get('author'),
|
||||
'timestamp': parse_iso8601(video.get('creationTime')),
|
||||
'channel': channel.get('name'),
|
||||
'channel_id': channel_id,
|
||||
'channel_url': self._BASE_URL + '/?channel=' + channel_id if channel_id else None,
|
||||
'duration': float_or_none(video.get('duration'), 1000),
|
||||
'view_count': int_or_none(video.get('views')),
|
||||
'tags': video.get('hashtags'),
|
||||
'start_time': int_or_none(compat_parse_qs(
|
||||
compat_urllib_parse_urlparse(url).query).get('t', [None])[0]),
|
||||
}
|
||||
@@ -120,6 +120,7 @@ class BiliBiliIE(InfoExtractor):
|
||||
'url': 'https://www.bilibili.com/video/BV1bK411W797',
|
||||
'info_dict': {
|
||||
'id': 'BV1bK411W797',
|
||||
'title': '物语中的人物是如何吐槽自己的OP的'
|
||||
},
|
||||
'playlist_count': 17,
|
||||
}]
|
||||
@@ -156,7 +157,8 @@ class BiliBiliIE(InfoExtractor):
|
||||
if r is not None:
|
||||
self.to_screen('Downloading anthology %s - add --no-playlist to just download video' % video_id)
|
||||
return r
|
||||
self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
|
||||
else:
|
||||
self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
|
||||
|
||||
if 'anime/' not in url:
|
||||
cid = self._search_regex(
|
||||
@@ -274,7 +276,7 @@ class BiliBiliIE(InfoExtractor):
|
||||
}
|
||||
|
||||
uploader_mobj = re.search(
|
||||
r'<a[^>]+href="(?:https?:)?//space\.bilibili\.com/(?P<id>\d+)"[^>]*>(?P<name>[^<]+)',
|
||||
r'<a[^>]+href="(?:https?:)?//space\.bilibili\.com/(?P<id>\d+)"[^>]*>\s*(?P<name>[^<]+?)\s*<',
|
||||
webpage)
|
||||
if uploader_mobj:
|
||||
info.update({
|
||||
|
||||
@@ -1,38 +1,113 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .cbs import CBSBaseIE
|
||||
import re
|
||||
|
||||
# from .cbs import CBSBaseIE
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class CBSSportsIE(CBSBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?cbssports\.com/[^/]+/(?:video|news)/(?P<id>[^/?#&]+)'
|
||||
|
||||
# class CBSSportsEmbedIE(CBSBaseIE):
|
||||
class CBSSportsEmbedIE(InfoExtractor):
|
||||
IE_NAME = 'cbssports:embed'
|
||||
_VALID_URL = r'''(?ix)https?://(?:(?:www\.)?cbs|embed\.247)sports\.com/player/embed.+?
|
||||
(?:
|
||||
ids%3D(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})|
|
||||
pcid%3D(?P<pcid>\d+)
|
||||
)'''
|
||||
_TESTS = [{
|
||||
'url': 'https://www.cbssports.com/nba/video/donovan-mitchell-flashes-star-potential-in-game-2-victory-over-thunder/',
|
||||
'info_dict': {
|
||||
'id': '1214315075735',
|
||||
'ext': 'mp4',
|
||||
'title': 'Donovan Mitchell flashes star potential in Game 2 victory over Thunder',
|
||||
'description': 'md5:df6f48622612c2d6bd2e295ddef58def',
|
||||
'timestamp': 1524111457,
|
||||
'upload_date': '20180419',
|
||||
'uploader': 'CBSI-NEW',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
'url': 'https://www.cbssports.com/player/embed/?args=player_id%3Db56c03a6-231a-4bbe-9c55-af3c8a8e9636%26ids%3Db56c03a6-231a-4bbe-9c55-af3c8a8e9636%26resizable%3D1%26autoplay%3Dtrue%26domain%3Dcbssports.com%26comp_ads_enabled%3Dfalse%26watchAndRead%3D0%26startTime%3D0%26env%3Dprod',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.cbssports.com/nba/news/nba-playoffs-2018-watch-76ers-vs-heat-game-3-series-schedule-tv-channel-online-stream/',
|
||||
'url': 'https://embed.247sports.com/player/embed/?args=%3fplayer_id%3d1827823171591%26channel%3dcollege-football-recruiting%26pcid%3d1827823171591%26width%3d640%26height%3d360%26autoplay%3dTrue%26comp_ads_enabled%3dFalse%26uvpc%3dhttps%253a%252f%252fwww.cbssports.com%252fapi%252fcontent%252fvideo%252fconfig%252f%253fcfg%253duvp_247sports_v4%2526partner%253d247%26uvpc_m%3dhttps%253a%252f%252fwww.cbssports.com%252fapi%252fcontent%252fvideo%252fconfig%252f%253fcfg%253duvp_247sports_m_v4%2526partner_m%253d247_mobile%26utag%3d247sportssite%26resizable%3dTrue',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _extract_video_info(self, filter_query, video_id):
|
||||
return self._extract_feed_info('dJ5BDC', 'VxxJg8Ymh8sE', filter_query, video_id)
|
||||
# def _extract_video_info(self, filter_query, video_id):
|
||||
# return self._extract_feed_info('dJ5BDC', 'VxxJg8Ymh8sE', filter_query, video_id)
|
||||
|
||||
def _real_extract(self, url):
|
||||
uuid, pcid = re.match(self._VALID_URL, url).groups()
|
||||
query = {'id': uuid} if uuid else {'pcid': pcid}
|
||||
video = self._download_json(
|
||||
'https://www.cbssports.com/api/content/video/',
|
||||
uuid or pcid, query=query)[0]
|
||||
video_id = video['id']
|
||||
title = video['title']
|
||||
metadata = video.get('metaData') or {}
|
||||
# return self._extract_video_info('byId=%d' % metadata['mpxOutletId'], video_id)
|
||||
# return self._extract_video_info('byGuid=' + metadata['mpxRefId'], video_id)
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
metadata['files'][0]['url'], video_id, 'mp4',
|
||||
'm3u8_native', m3u8_id='hls', fatal=False)
|
||||
self._sort_formats(formats)
|
||||
|
||||
image = video.get('image')
|
||||
thumbnails = None
|
||||
if image:
|
||||
image_path = image.get('path')
|
||||
if image_path:
|
||||
thumbnails = [{
|
||||
'url': image_path,
|
||||
'width': int_or_none(image.get('width')),
|
||||
'height': int_or_none(image.get('height')),
|
||||
'filesize': int_or_none(image.get('size')),
|
||||
}]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnails': thumbnails,
|
||||
'description': video.get('description'),
|
||||
'timestamp': int_or_none(try_get(video, lambda x: x['dateCreated']['epoch'])),
|
||||
'duration': int_or_none(metadata.get('duration')),
|
||||
}
|
||||
|
||||
|
||||
class CBSSportsBaseIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video_id = self._search_regex(
|
||||
[r'(?:=|%26)pcid%3D(\d+)', r'embedVideo(?:Container)?_(\d+)'],
|
||||
webpage, 'video id')
|
||||
return self._extract_video_info('byId=%s' % video_id, video_id)
|
||||
iframe_url = self._search_regex(
|
||||
r'<iframe[^>]+(?:data-)?src="(https?://[^/]+/player/embed[^"]+)"',
|
||||
webpage, 'embed url')
|
||||
return self.url_result(iframe_url, CBSSportsEmbedIE.ie_key())
|
||||
|
||||
|
||||
class CBSSportsIE(CBSSportsBaseIE):
|
||||
IE_NAME = 'cbssports'
|
||||
_VALID_URL = r'https?://(?:www\.)?cbssports\.com/[^/]+/video/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.cbssports.com/college-football/video/cover-3-stanford-spring-gleaning/',
|
||||
'info_dict': {
|
||||
'id': 'b56c03a6-231a-4bbe-9c55-af3c8a8e9636',
|
||||
'ext': 'mp4',
|
||||
'title': 'Cover 3: Stanford Spring Gleaning',
|
||||
'description': 'The Cover 3 crew break down everything you need to know about the Stanford Cardinal this spring.',
|
||||
'timestamp': 1617218398,
|
||||
'upload_date': '20210331',
|
||||
'duration': 502,
|
||||
},
|
||||
}]
|
||||
|
||||
|
||||
class TwentyFourSevenSportsIE(CBSSportsBaseIE):
|
||||
IE_NAME = '247sports'
|
||||
_VALID_URL = r'https?://(?:www\.)?247sports\.com/Video/(?:[^/?#&]+-)?(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://247sports.com/Video/2021-QB-Jake-Garcia-senior-highlights-through-five-games-10084854/',
|
||||
'info_dict': {
|
||||
'id': '4f1265cb-c3b5-44a8-bb1d-1914119a0ccc',
|
||||
'ext': 'mp4',
|
||||
'title': '2021 QB Jake Garcia senior highlights through five games',
|
||||
'description': 'md5:8cb67ebed48e2e6adac1701e0ff6e45b',
|
||||
'timestamp': 1607114223,
|
||||
'upload_date': '20201204',
|
||||
'duration': 208,
|
||||
},
|
||||
}]
|
||||
|
||||
@@ -17,7 +17,7 @@ import math
|
||||
|
||||
from ..compat import (
|
||||
compat_cookiejar_Cookie,
|
||||
compat_cookies,
|
||||
compat_cookies_SimpleCookie,
|
||||
compat_etree_Element,
|
||||
compat_etree_fromstring,
|
||||
compat_getpass,
|
||||
@@ -1308,6 +1308,7 @@ class InfoExtractor(object):
|
||||
|
||||
def extract_video_object(e):
|
||||
assert e['@type'] == 'VideoObject'
|
||||
author = e.get('author')
|
||||
info.update({
|
||||
'url': url_or_none(e.get('contentUrl')),
|
||||
'title': unescapeHTML(e.get('name')),
|
||||
@@ -1315,7 +1316,11 @@ class InfoExtractor(object):
|
||||
'thumbnail': url_or_none(e.get('thumbnailUrl') or e.get('thumbnailURL')),
|
||||
'duration': parse_duration(e.get('duration')),
|
||||
'timestamp': unified_timestamp(e.get('uploadDate')),
|
||||
'uploader': str_or_none(e.get('author')),
|
||||
# author can be an instance of 'Organization' or 'Person' types.
|
||||
# both types can have 'name' property(inherited from 'Thing' type). [1]
|
||||
# however some websites are using 'Text' type instead.
|
||||
# 1. https://schema.org/VideoObject
|
||||
'uploader': author.get('name') if isinstance(author, dict) else author if isinstance(author, compat_str) else None,
|
||||
'filesize': float_or_none(e.get('contentSize')),
|
||||
'tbr': int_or_none(e.get('bitrate')),
|
||||
'width': int_or_none(e.get('width')),
|
||||
@@ -1398,7 +1403,7 @@ class InfoExtractor(object):
|
||||
return self._hidden_inputs(form)
|
||||
|
||||
class FormatSort:
|
||||
regex = r' *((?P<reverse>\+)?(?P<field>[a-zA-Z0-9_]+)((?P<seperator>[~:])(?P<limit>.*?))?)? *$'
|
||||
regex = r' *((?P<reverse>\+)?(?P<field>[a-zA-Z0-9_]+)((?P<separator>[~:])(?P<limit>.*?))?)? *$'
|
||||
|
||||
default = ('hidden', 'hasvid', 'ie_pref', 'lang', 'quality',
|
||||
'res', 'fps', 'codec:vp9.2', 'size', 'br', 'asr',
|
||||
@@ -1558,7 +1563,7 @@ class InfoExtractor(object):
|
||||
if self._get_field_setting(field, 'type') == 'alias':
|
||||
field = self._get_field_setting(field, 'field')
|
||||
reverse = match.group('reverse') is not None
|
||||
closest = match.group('seperator') == '~'
|
||||
closest = match.group('separator') == '~'
|
||||
limit_text = match.group('limit')
|
||||
|
||||
has_limit = limit_text is not None
|
||||
@@ -1575,7 +1580,8 @@ class InfoExtractor(object):
|
||||
else None)
|
||||
|
||||
def print_verbose_info(self, to_screen):
|
||||
to_screen('[debug] Sort order given by user: %s' % ','.join(self._sort_user))
|
||||
if self._sort_user:
|
||||
to_screen('[debug] Sort order given by user: %s' % ','.join(self._sort_user))
|
||||
if self._sort_extractor:
|
||||
to_screen('[debug] Sort order given by extractor: %s' % ', '.join(self._sort_extractor))
|
||||
to_screen('[debug] Formats sorted by: %s' % ', '.join(['%s%s%s' % (
|
||||
@@ -1889,7 +1895,8 @@ class InfoExtractor(object):
|
||||
if '#EXT-X-FAXS-CM:' in m3u8_doc: # Adobe Flash Access
|
||||
return []
|
||||
|
||||
if re.search(r'#EXT-X-SESSION-KEY:.*?URI="skd://', m3u8_doc): # Apple FairPlay
|
||||
if (not self._downloader.params.get('allow_unplayable_formats')
|
||||
and re.search(r'#EXT-X-SESSION-KEY:.*?URI="skd://', m3u8_doc)): # Apple FairPlay
|
||||
return []
|
||||
|
||||
formats = []
|
||||
@@ -2406,7 +2413,7 @@ class InfoExtractor(object):
|
||||
http://standards.iso.org/ittf/PubliclyAvailableStandards/c065274_ISO_IEC_23009-1_2014.zip
|
||||
2. https://en.wikipedia.org/wiki/Dynamic_Adaptive_Streaming_over_HTTP
|
||||
"""
|
||||
if not self._downloader.params.get('dynamic_mpd'):
|
||||
if not self._downloader.params.get('dynamic_mpd', True):
|
||||
if mpd_doc.get('type') == 'dynamic':
|
||||
return []
|
||||
|
||||
@@ -3218,10 +3225,10 @@ class InfoExtractor(object):
|
||||
self._downloader.cookiejar.set_cookie(cookie)
|
||||
|
||||
def _get_cookies(self, url):
|
||||
""" Return a compat_cookies.SimpleCookie with the cookies for the url """
|
||||
""" Return a compat_cookies_SimpleCookie with the cookies for the url """
|
||||
req = sanitized_Request(url)
|
||||
self._downloader.cookiejar.add_cookie_header(req)
|
||||
return compat_cookies.SimpleCookie(req.get_header('Cookie'))
|
||||
return compat_cookies_SimpleCookie(req.get_header('Cookie'))
|
||||
|
||||
def _apply_first_set_cookie_header(self, url_handle, cookie):
|
||||
"""
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urlparse,
|
||||
@@ -58,3 +60,16 @@ class MmsIE(InfoExtractor):
|
||||
'title': title,
|
||||
'url': url,
|
||||
}
|
||||
|
||||
|
||||
class ViewSourceIE(InfoExtractor):
|
||||
IE_DESC = False
|
||||
_VALID_URL = r'view-source:(?P<url>.+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'view-source:https://www.youtube.com/watch?v=BaW_jenozKc',
|
||||
'only_matching': True
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
return self.url_result(re.match(self._VALID_URL, url).group('url'))
|
||||
|
||||
@@ -25,12 +25,12 @@ class CuriosityStreamBaseIE(InfoExtractor):
|
||||
raise ExtractorError(
|
||||
'%s said: %s' % (self.IE_NAME, error), expected=True)
|
||||
|
||||
def _call_api(self, path, video_id):
|
||||
def _call_api(self, path, video_id, query=None):
|
||||
headers = {}
|
||||
if self._auth_token:
|
||||
headers['X-Auth-Token'] = self._auth_token
|
||||
result = self._download_json(
|
||||
self._API_BASE_URL + path, video_id, headers=headers)
|
||||
self._API_BASE_URL + path, video_id, headers=headers, query=query)
|
||||
self._handle_errors(result)
|
||||
return result['data']
|
||||
|
||||
@@ -52,62 +52,75 @@ class CuriosityStreamIE(CuriosityStreamBaseIE):
|
||||
_VALID_URL = r'https?://(?:app\.)?curiositystream\.com/video/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'https://app.curiositystream.com/video/2',
|
||||
'md5': '262bb2f257ff301115f1973540de8983',
|
||||
'info_dict': {
|
||||
'id': '2',
|
||||
'ext': 'mp4',
|
||||
'title': 'How Did You Develop The Internet?',
|
||||
'description': 'Vint Cerf, Google\'s Chief Internet Evangelist, describes how he and Bob Kahn created the internet.',
|
||||
}
|
||||
},
|
||||
'params': {
|
||||
'format': 'bestvideo',
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
media = self._call_api('media/' + video_id, video_id)
|
||||
title = media['title']
|
||||
|
||||
formats = []
|
||||
for encoding in media.get('encodings', []):
|
||||
m3u8_url = encoding.get('master_playlist_url')
|
||||
if m3u8_url:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
encoding_url = encoding.get('url')
|
||||
file_url = encoding.get('file_url')
|
||||
if not encoding_url and not file_url:
|
||||
continue
|
||||
f = {
|
||||
'width': int_or_none(encoding.get('width')),
|
||||
'height': int_or_none(encoding.get('height')),
|
||||
'vbr': int_or_none(encoding.get('video_bitrate')),
|
||||
'abr': int_or_none(encoding.get('audio_bitrate')),
|
||||
'filesize': int_or_none(encoding.get('size_in_bytes')),
|
||||
'vcodec': encoding.get('video_codec'),
|
||||
'acodec': encoding.get('audio_codec'),
|
||||
'container': encoding.get('container_type'),
|
||||
}
|
||||
for f_url in (encoding_url, file_url):
|
||||
if not f_url:
|
||||
for encoding_format in ('m3u8', 'mpd'):
|
||||
media = self._call_api('media/' + video_id, video_id, query={
|
||||
'encodingsNew': 'true',
|
||||
'encodingsFormat': encoding_format,
|
||||
})
|
||||
for encoding in media.get('encodings', []):
|
||||
playlist_url = encoding.get('master_playlist_url')
|
||||
if encoding_format == 'm3u8':
|
||||
# use `m3u8` entry_protocol until EXT-X-MAP is properly supported by `m3u8_native` entry_protocol
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
playlist_url, video_id, 'mp4',
|
||||
m3u8_id='hls', fatal=False))
|
||||
elif encoding_format == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
playlist_url, video_id, mpd_id='dash', fatal=False))
|
||||
encoding_url = encoding.get('url')
|
||||
file_url = encoding.get('file_url')
|
||||
if not encoding_url and not file_url:
|
||||
continue
|
||||
fmt = f.copy()
|
||||
rtmp = re.search(r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+))/(?P<playpath>mp[34]:.+)$', f_url)
|
||||
if rtmp:
|
||||
fmt.update({
|
||||
'url': rtmp.group('url'),
|
||||
'play_path': rtmp.group('playpath'),
|
||||
'app': rtmp.group('app'),
|
||||
'ext': 'flv',
|
||||
'format_id': 'rtmp',
|
||||
})
|
||||
else:
|
||||
fmt.update({
|
||||
'url': f_url,
|
||||
'format_id': 'http',
|
||||
})
|
||||
formats.append(fmt)
|
||||
f = {
|
||||
'width': int_or_none(encoding.get('width')),
|
||||
'height': int_or_none(encoding.get('height')),
|
||||
'vbr': int_or_none(encoding.get('video_bitrate')),
|
||||
'abr': int_or_none(encoding.get('audio_bitrate')),
|
||||
'filesize': int_or_none(encoding.get('size_in_bytes')),
|
||||
'vcodec': encoding.get('video_codec'),
|
||||
'acodec': encoding.get('audio_codec'),
|
||||
'container': encoding.get('container_type'),
|
||||
}
|
||||
for f_url in (encoding_url, file_url):
|
||||
if not f_url:
|
||||
continue
|
||||
fmt = f.copy()
|
||||
rtmp = re.search(r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+))/(?P<playpath>mp[34]:.+)$', f_url)
|
||||
if rtmp:
|
||||
fmt.update({
|
||||
'url': rtmp.group('url'),
|
||||
'play_path': rtmp.group('playpath'),
|
||||
'app': rtmp.group('app'),
|
||||
'ext': 'flv',
|
||||
'format_id': 'rtmp',
|
||||
})
|
||||
else:
|
||||
fmt.update({
|
||||
'url': f_url,
|
||||
'format_id': 'http',
|
||||
})
|
||||
formats.append(fmt)
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = media['title']
|
||||
|
||||
subtitles = {}
|
||||
for closed_caption in media.get('closed_captions', []):
|
||||
sub_url = closed_caption.get('file')
|
||||
@@ -140,7 +153,7 @@ class CuriosityStreamCollectionIE(CuriosityStreamBaseIE):
|
||||
'title': 'Curious Minds: The Internet',
|
||||
'description': 'How is the internet shaping our lives in the 21st Century?',
|
||||
},
|
||||
'playlist_mincount': 17,
|
||||
'playlist_mincount': 16,
|
||||
}, {
|
||||
'url': 'https://curiositystream.com/series/2',
|
||||
'only_matching': True,
|
||||
|
||||
100
yt_dlp/extractor/discoveryplusindia.py
Normal file
100
yt_dlp/extractor/discoveryplusindia.py
Normal file
@@ -0,0 +1,100 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from ..compat import compat_str
|
||||
from ..utils import try_get
|
||||
from .common import InfoExtractor
|
||||
from .dplay import DPlayIE
|
||||
|
||||
|
||||
class DiscoveryPlusIndiaIE(DPlayIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?discoveryplus\.in/videos?' + DPlayIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://www.discoveryplus.in/videos/how-do-they-do-it/fugu-and-more?seasonId=8&type=EPISODE',
|
||||
'info_dict': {
|
||||
'id': '27104',
|
||||
'ext': 'mp4',
|
||||
'display_id': 'how-do-they-do-it/fugu-and-more',
|
||||
'title': 'Fugu and More',
|
||||
'description': 'The Japanese catch, prepare and eat the deadliest fish on the planet.',
|
||||
'duration': 1319,
|
||||
'timestamp': 1582309800,
|
||||
'upload_date': '20200221',
|
||||
'series': 'How Do They Do It?',
|
||||
'season_number': 8,
|
||||
'episode_number': 2,
|
||||
'creator': 'Discovery Channel',
|
||||
},
|
||||
'params': {
|
||||
'format': 'bestvideo',
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Cookies (not necessarily logged in) are needed'
|
||||
}]
|
||||
|
||||
def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
|
||||
headers['x-disco-params'] = 'realm=%s' % realm
|
||||
headers['x-disco-client'] = 'WEB:UNKNOWN:dplus-india:17.0.0'
|
||||
|
||||
def _download_video_playback_info(self, disco_base, video_id, headers):
|
||||
return self._download_json(
|
||||
disco_base + 'playback/v3/videoPlaybackInfo',
|
||||
video_id, headers=headers, data=json.dumps({
|
||||
'deviceInfo': {
|
||||
'adBlocker': False,
|
||||
},
|
||||
'videoId': video_id,
|
||||
}).encode('utf-8'))['data']['attributes']['streaming']
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
return self._get_disco_api_info(
|
||||
url, display_id, 'ap2-prod-direct.discoveryplus.in', 'dplusindia', 'in')
|
||||
|
||||
|
||||
class DiscoveryPlusIndiaShowIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?discoveryplus\.in/show/(?P<show_name>[^/]+)/?(?:[?#]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.discoveryplus.in/show/how-do-they-do-it',
|
||||
'playlist_mincount': 140,
|
||||
'info_dict': {
|
||||
'id': 'how-do-they-do-it',
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
def _entries(self, show_name):
|
||||
headers = {
|
||||
'x-disco-client': 'WEB:UNKNOWN:dplus-india:prod',
|
||||
'x-disco-params': 'realm=dplusindia',
|
||||
'referer': 'https://www.discoveryplus.in/',
|
||||
}
|
||||
show_url = 'https://ap2-prod-direct.discoveryplus.in/cms/routes/show/{}?include=default'.format(show_name)
|
||||
show_json = self._download_json(show_url,
|
||||
video_id=show_name,
|
||||
headers=headers)['included'][4]['attributes']['component']
|
||||
show_id = show_json['mandatoryParams'].split('=')[-1]
|
||||
season_url = 'https://ap2-prod-direct.discoveryplus.in/content/videos?sort=episodeNumber&filter[seasonNumber]={}&filter[show.id]={}&page[size]=100&page[number]={}'
|
||||
for season in show_json['filters'][0]['options']:
|
||||
season_id = season['id']
|
||||
total_pages, page_num = 1, 0
|
||||
while page_num < total_pages:
|
||||
season_json = self._download_json(season_url.format(season_id, show_id, compat_str(page_num + 1)),
|
||||
video_id=show_id, headers=headers,
|
||||
note='Downloading JSON metadata%s' % (' page %d' % page_num if page_num else ''))
|
||||
if page_num == 0:
|
||||
total_pages = try_get(season_json, lambda x: x['meta']['totalPages'], int) or 1
|
||||
episodes_json = season_json['data']
|
||||
for episode in episodes_json:
|
||||
video_id = episode['attributes']['path']
|
||||
yield self.url_result(
|
||||
'https://discoveryplus.in/videos/%s' % video_id,
|
||||
ie=DiscoveryPlusIndiaIE.ie_key(), video_id=video_id)
|
||||
page_num += 1
|
||||
|
||||
def _real_extract(self, url):
|
||||
show_name = re.match(self._VALID_URL, url).group('show_name')
|
||||
return self.playlist_result(self._entries(show_name), playlist_id=show_name)
|
||||
@@ -80,6 +80,7 @@ from .arte import (
|
||||
ArteTVEmbedIE,
|
||||
ArteTVPlaylistIE,
|
||||
)
|
||||
from .arnes import ArnesIE
|
||||
from .asiancrush import (
|
||||
AsianCrushIE,
|
||||
AsianCrushPlaylistIE,
|
||||
@@ -209,7 +210,11 @@ from .cbsnews import (
|
||||
CBSNewsIE,
|
||||
CBSNewsLiveVideoIE,
|
||||
)
|
||||
from .cbssports import CBSSportsIE
|
||||
from .cbssports import (
|
||||
CBSSportsEmbedIE,
|
||||
CBSSportsIE,
|
||||
TwentyFourSevenSportsIE,
|
||||
)
|
||||
from .ccc import (
|
||||
CCCIE,
|
||||
CCCPlaylistIE,
|
||||
@@ -264,6 +269,7 @@ from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
|
||||
from .commonprotocols import (
|
||||
MmsIE,
|
||||
RtmpIE,
|
||||
ViewSourceIE,
|
||||
)
|
||||
from .condenast import CondeNastIE
|
||||
from .contv import CONtvIE
|
||||
@@ -307,6 +313,10 @@ from .democracynow import DemocracynowIE
|
||||
from .dfb import DFBIE
|
||||
from .dhm import DHMIE
|
||||
from .digg import DiggIE
|
||||
from .discoveryplusindia import (
|
||||
DiscoveryPlusIndiaIE,
|
||||
DiscoveryPlusIndiaShowIE,
|
||||
)
|
||||
from .dotsub import DotsubIE
|
||||
from .douyutv import (
|
||||
DouyuShowIE,
|
||||
@@ -582,7 +592,11 @@ from .kuwo import (
|
||||
KuwoCategoryIE,
|
||||
KuwoMvIE,
|
||||
)
|
||||
from .la7 import LA7IE
|
||||
from .la7 import (
|
||||
LA7IE,
|
||||
LA7PodcastEpisodeIE,
|
||||
LA7PodcastIE,
|
||||
)
|
||||
from .laola1tv import (
|
||||
Laola1TvEmbedIE,
|
||||
Laola1TvIE,
|
||||
@@ -623,7 +637,11 @@ from .limelight import (
|
||||
LimelightChannelIE,
|
||||
LimelightChannelListIE,
|
||||
)
|
||||
from .line import LineTVIE
|
||||
from .line import (
|
||||
LineTVIE,
|
||||
LineLiveIE,
|
||||
LineLiveChannelIE,
|
||||
)
|
||||
from .linkedin import (
|
||||
LinkedInLearningIE,
|
||||
LinkedInLearningCourseIE,
|
||||
@@ -661,6 +679,7 @@ from .mangomolo import (
|
||||
MangomoloLiveIE,
|
||||
)
|
||||
from .manyvids import ManyVidsIE
|
||||
from .maoritv import MaoriTVIE
|
||||
from .markiza import (
|
||||
MarkizaIE,
|
||||
MarkizaPageIE,
|
||||
@@ -784,8 +803,9 @@ from .ndr import (
|
||||
NJoyEmbedIE,
|
||||
)
|
||||
from .ndtv import NDTVIE
|
||||
from .netzkino import NetzkinoIE
|
||||
from .nebula import NebulaIE
|
||||
from .nerdcubed import NerdCubedFeedIE
|
||||
from .netzkino import NetzkinoIE
|
||||
from .neteasemusic import (
|
||||
NetEaseMusicIE,
|
||||
NetEaseMusicAlbumIE,
|
||||
@@ -810,6 +830,7 @@ from .nexx import (
|
||||
NexxIE,
|
||||
NexxEmbedIE,
|
||||
)
|
||||
from .nfhsnetwork import NFHSNetworkIE
|
||||
from .nfl import (
|
||||
NFLIE,
|
||||
NFLArticleIE,
|
||||
@@ -921,6 +942,11 @@ from .packtpub import (
|
||||
PacktPubIE,
|
||||
PacktPubCourseIE,
|
||||
)
|
||||
from .palcomp3 import (
|
||||
PalcoMP3IE,
|
||||
PalcoMP3ArtistIE,
|
||||
PalcoMP3VideoIE,
|
||||
)
|
||||
from .pandoratv import PandoraTVIE
|
||||
from .parliamentliveuk import ParliamentLiveUKIE
|
||||
from .patreon import PatreonIE
|
||||
|
||||
@@ -401,7 +401,7 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor):
|
||||
(r'player\.load[^;]+src:\s*["\']([^"\']+)',
|
||||
r'id-video=([^@]+@[^"]+)',
|
||||
r'<a[^>]+href="(?:https?:)?//videos\.francetv\.fr/video/([^@]+@[^"]+)"',
|
||||
r'data-id="([^"]+)"'),
|
||||
r'data-id=["\']([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'),
|
||||
webpage, 'video id')
|
||||
|
||||
return self._make_url_result(video_id)
|
||||
|
||||
@@ -2659,6 +2659,15 @@ class GenericIE(InfoExtractor):
|
||||
if vid_me_embed_url is not None:
|
||||
return self.url_result(vid_me_embed_url, 'Vidme')
|
||||
|
||||
# Invidious Instances
|
||||
# https://github.com/yt-dlp/yt-dlp/issues/195
|
||||
# https://github.com/iv-org/invidious/pull/1730
|
||||
youtube_url = self._search_regex(
|
||||
r'<link rel="alternate" href="(https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"',
|
||||
webpage, 'youtube link', default=None)
|
||||
if youtube_url:
|
||||
return self.url_result(youtube_url, YoutubeIE.ie_key())
|
||||
|
||||
# Look for YouTube embeds
|
||||
youtube_urls = YoutubeIE._extract_urls(webpage)
|
||||
if youtube_urls:
|
||||
|
||||
@@ -12,6 +12,7 @@ from ..compat import (
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
get_element_by_attribute,
|
||||
int_or_none,
|
||||
lowercase_escape,
|
||||
@@ -32,6 +33,7 @@ class InstagramIE(InfoExtractor):
|
||||
'title': 'Video by naomipq',
|
||||
'description': 'md5:1f17f0ab29bd6fe2bfad705f58de3cb8',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'duration': 0,
|
||||
'timestamp': 1371748545,
|
||||
'upload_date': '20130620',
|
||||
'uploader_id': 'naomipq',
|
||||
@@ -48,6 +50,7 @@ class InstagramIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Video by britneyspears',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'duration': 0,
|
||||
'timestamp': 1453760977,
|
||||
'upload_date': '20160125',
|
||||
'uploader_id': 'britneyspears',
|
||||
@@ -86,6 +89,24 @@ class InstagramIE(InfoExtractor):
|
||||
'title': 'Post by instagram',
|
||||
'description': 'md5:0f9203fc6a2ce4d228da5754bcf54957',
|
||||
},
|
||||
}, {
|
||||
# IGTV
|
||||
'url': 'https://www.instagram.com/tv/BkfuX9UB-eK/',
|
||||
'info_dict': {
|
||||
'id': 'BkfuX9UB-eK',
|
||||
'ext': 'mp4',
|
||||
'title': 'Fingerboarding Tricks with @cass.fb',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'duration': 53.83,
|
||||
'timestamp': 1530032919,
|
||||
'upload_date': '20180626',
|
||||
'uploader_id': 'instagram',
|
||||
'uploader': 'Instagram',
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'comments': list,
|
||||
'description': 'Meet Cass Hirst (@cass.fb), a fingerboarding pro who can perform tiny ollies and kickflips while blindfolded.',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://instagram.com/p/-Cmh1cukG2/',
|
||||
'only_matching': True,
|
||||
@@ -159,7 +180,9 @@ class InstagramIE(InfoExtractor):
|
||||
description = try_get(
|
||||
media, lambda x: x['edge_media_to_caption']['edges'][0]['node']['text'],
|
||||
compat_str) or media.get('caption')
|
||||
title = media.get('title')
|
||||
thumbnail = media.get('display_src') or media.get('display_url')
|
||||
duration = float_or_none(media.get('video_duration'))
|
||||
timestamp = int_or_none(media.get('taken_at_timestamp') or media.get('date'))
|
||||
uploader = media.get('owner', {}).get('full_name')
|
||||
uploader_id = media.get('owner', {}).get('username')
|
||||
@@ -200,9 +223,10 @@ class InstagramIE(InfoExtractor):
|
||||
continue
|
||||
entries.append({
|
||||
'id': node.get('shortcode') or node['id'],
|
||||
'title': 'Video %d' % edge_num,
|
||||
'title': node.get('title') or 'Video %d' % edge_num,
|
||||
'url': node_video_url,
|
||||
'thumbnail': node.get('display_url'),
|
||||
'duration': float_or_none(node.get('video_duration')),
|
||||
'width': int_or_none(try_get(node, lambda x: x['dimensions']['width'])),
|
||||
'height': int_or_none(try_get(node, lambda x: x['dimensions']['height'])),
|
||||
'view_count': int_or_none(node.get('video_view_count')),
|
||||
@@ -239,8 +263,9 @@ class InstagramIE(InfoExtractor):
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'ext': 'mp4',
|
||||
'title': 'Video by %s' % uploader_id,
|
||||
'title': title or 'Video by %s' % uploader_id,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'thumbnail': thumbnail,
|
||||
'timestamp': timestamp,
|
||||
'uploader_id': uploader_id,
|
||||
|
||||
@@ -29,34 +29,51 @@ class JamendoIE(InfoExtractor):
|
||||
'id': '196219',
|
||||
'display_id': 'stories-from-emona-i',
|
||||
'ext': 'flac',
|
||||
'title': 'Maya Filipič - Stories from Emona I',
|
||||
'artist': 'Maya Filipič',
|
||||
# 'title': 'Maya Filipič - Stories from Emona I',
|
||||
'title': 'Stories from Emona I',
|
||||
# 'artist': 'Maya Filipič',
|
||||
'track': 'Stories from Emona I',
|
||||
'duration': 210,
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'timestamp': 1217438117,
|
||||
'upload_date': '20080730',
|
||||
'license': 'by-nc-nd',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'average_rating': int,
|
||||
'tags': ['piano', 'peaceful', 'newage', 'strings', 'upbeat'],
|
||||
}
|
||||
}, {
|
||||
'url': 'https://licensing.jamendo.com/en/track/1496667/energetic-rock',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _call_api(self, resource, resource_id):
|
||||
path = '/api/%ss' % resource
|
||||
rand = compat_str(random.random())
|
||||
return self._download_json(
|
||||
'https://www.jamendo.com' + path, resource_id, query={
|
||||
'id[]': resource_id,
|
||||
}, headers={
|
||||
'X-Jam-Call': '$%s*%s~' % (hashlib.sha1((path + rand).encode()).hexdigest(), rand)
|
||||
})[0]
|
||||
|
||||
def _real_extract(self, url):
|
||||
track_id, display_id = self._VALID_URL_RE.match(url).groups()
|
||||
webpage = self._download_webpage(
|
||||
'https://www.jamendo.com/track/' + track_id, track_id)
|
||||
models = self._parse_json(self._html_search_regex(
|
||||
r"data-bundled-models='([^']+)",
|
||||
webpage, 'bundled models'), track_id)
|
||||
track = models['track']['models'][0]
|
||||
# webpage = self._download_webpage(
|
||||
# 'https://www.jamendo.com/track/' + track_id, track_id)
|
||||
# models = self._parse_json(self._html_search_regex(
|
||||
# r"data-bundled-models='([^']+)",
|
||||
# webpage, 'bundled models'), track_id)
|
||||
# track = models['track']['models'][0]
|
||||
track = self._call_api('track', track_id)
|
||||
title = track_name = track['name']
|
||||
get_model = lambda x: try_get(models, lambda y: y[x]['models'][0], dict) or {}
|
||||
artist = get_model('artist')
|
||||
artist_name = artist.get('name')
|
||||
if artist_name:
|
||||
title = '%s - %s' % (artist_name, title)
|
||||
album = get_model('album')
|
||||
# get_model = lambda x: try_get(models, lambda y: y[x]['models'][0], dict) or {}
|
||||
# artist = get_model('artist')
|
||||
# artist_name = artist.get('name')
|
||||
# if artist_name:
|
||||
# title = '%s - %s' % (artist_name, title)
|
||||
# album = get_model('album')
|
||||
|
||||
formats = [{
|
||||
'url': 'https://%s.jamendo.com/?trackid=%s&format=%s&from=app-97dab294'
|
||||
@@ -74,7 +91,7 @@ class JamendoIE(InfoExtractor):
|
||||
|
||||
urls = []
|
||||
thumbnails = []
|
||||
for _, covers in track.get('cover', {}).items():
|
||||
for covers in (track.get('cover') or {}).values():
|
||||
for cover_id, cover_url in covers.items():
|
||||
if not cover_url or cover_url in urls:
|
||||
continue
|
||||
@@ -88,13 +105,14 @@ class JamendoIE(InfoExtractor):
|
||||
})
|
||||
|
||||
tags = []
|
||||
for tag in track.get('tags', []):
|
||||
for tag in (track.get('tags') or []):
|
||||
tag_name = tag.get('name')
|
||||
if not tag_name:
|
||||
continue
|
||||
tags.append(tag_name)
|
||||
|
||||
stats = track.get('stats') or {}
|
||||
license = track.get('licenseCC') or []
|
||||
|
||||
return {
|
||||
'id': track_id,
|
||||
@@ -103,11 +121,11 @@ class JamendoIE(InfoExtractor):
|
||||
'title': title,
|
||||
'description': track.get('description'),
|
||||
'duration': int_or_none(track.get('duration')),
|
||||
'artist': artist_name,
|
||||
# 'artist': artist_name,
|
||||
'track': track_name,
|
||||
'album': album.get('name'),
|
||||
# 'album': album.get('name'),
|
||||
'formats': formats,
|
||||
'license': '-'.join(track.get('licenseCC', [])) or None,
|
||||
'license': '-'.join(license) if license else None,
|
||||
'timestamp': int_or_none(track.get('dateCreated')),
|
||||
'view_count': int_or_none(stats.get('listenedAll')),
|
||||
'like_count': int_or_none(stats.get('favorited')),
|
||||
@@ -116,9 +134,9 @@ class JamendoIE(InfoExtractor):
|
||||
}
|
||||
|
||||
|
||||
class JamendoAlbumIE(InfoExtractor):
|
||||
class JamendoAlbumIE(JamendoIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?jamendo\.com/album/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'https://www.jamendo.com/album/121486/duck-on-cover',
|
||||
'info_dict': {
|
||||
'id': '121486',
|
||||
@@ -151,17 +169,7 @@ class JamendoAlbumIE(InfoExtractor):
|
||||
'params': {
|
||||
'playlistend': 2
|
||||
}
|
||||
}
|
||||
|
||||
def _call_api(self, resource, resource_id):
|
||||
path = '/api/%ss' % resource
|
||||
rand = compat_str(random.random())
|
||||
return self._download_json(
|
||||
'https://www.jamendo.com' + path, resource_id, query={
|
||||
'id[]': resource_id,
|
||||
}, headers={
|
||||
'X-Jam-Call': '$%s*%s~' % (hashlib.sha1((path + rand).encode()).hexdigest(), rand)
|
||||
})[0]
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
album_id = self._match_id(url)
|
||||
@@ -169,7 +177,7 @@ class JamendoAlbumIE(InfoExtractor):
|
||||
album_name = album.get('name')
|
||||
|
||||
entries = []
|
||||
for track in album.get('tracks', []):
|
||||
for track in (album.get('tracks') or []):
|
||||
track_id = track.get('id')
|
||||
if not track_id:
|
||||
continue
|
||||
|
||||
@@ -1,9 +1,15 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
parse_duration,
|
||||
smuggle_url,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
@@ -57,3 +63,141 @@ class LA7IE(InfoExtractor):
|
||||
'thumbnail': self._og_search_thumbnail(webpage, default=None),
|
||||
'ie_key': 'Kaltura',
|
||||
}
|
||||
|
||||
|
||||
class LA7PodcastEpisodeIE(InfoExtractor):
|
||||
IE_NAME = 'la7.it:pod:episode'
|
||||
_VALID_URL = r'''(?x)(https?://)?
|
||||
(?:www\.)?la7\.it/[^/]+/podcast/([^/]+-)?(?P<id>\d+)'''
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.la7.it/voicetown/podcast/la-carezza-delle-memoria-di-carlo-verdone-23-03-2021-371497',
|
||||
'md5': '7737d4d79b3c1a34b3de3e16297119ed',
|
||||
'info_dict': {
|
||||
'id': '371497',
|
||||
'ext': 'mp3',
|
||||
'title': '"La carezza delle memoria" di Carlo Verdone',
|
||||
'description': 'md5:5abf07c3c551a687db80af3f9ceb7d52',
|
||||
'thumbnail': 'https://www.la7.it/sites/default/files/podcast/371497.jpg',
|
||||
'upload_date': '20210323',
|
||||
},
|
||||
}, {
|
||||
# embed url
|
||||
'url': 'https://www.la7.it/embed/podcast/371497',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# date already in the title
|
||||
'url': 'https://www.la7.it/propagandalive/podcast/lintervista-di-diego-bianchi-ad-annalisa-cuzzocrea-puntata-del-1932021-20-03-2021-371130',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# title same as show_title
|
||||
'url': 'https://www.la7.it/otto-e-mezzo/podcast/otto-e-mezzo-26-03-2021-372340',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _extract_info(self, webpage, video_id=None, ppn=None):
|
||||
if not video_id:
|
||||
video_id = self._search_regex(
|
||||
r'data-nid=([\'"])(?P<vid>\d+)\1',
|
||||
webpage, 'video_id', group='vid')
|
||||
|
||||
media_url = self._search_regex(
|
||||
(r'src:\s*([\'"])(?P<url>.+?mp3.+?)\1',
|
||||
r'data-podcast=([\'"])(?P<url>.+?mp3.+?)\1'),
|
||||
webpage, 'media_url', group='url')
|
||||
ext = determine_ext(media_url)
|
||||
formats = [{
|
||||
'url': media_url,
|
||||
'format_id': ext,
|
||||
'ext': ext,
|
||||
}]
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = self._html_search_regex(
|
||||
(r'<div class="title">(?P<title>.+?)</',
|
||||
r'<title>(?P<title>[^<]+)</title>',
|
||||
r'title:\s*([\'"])(?P<title>.+?)\1'),
|
||||
webpage, 'title', group='title')
|
||||
|
||||
description = (
|
||||
self._html_search_regex(
|
||||
(r'<div class="description">(.+?)</div>',
|
||||
r'<div class="description-mobile">(.+?)</div>',
|
||||
r'<div class="box-txt">([^<]+?)</div>',
|
||||
r'<div class="field-content"><p>(.+?)</p></div>'),
|
||||
webpage, 'description', default=None)
|
||||
or self._html_search_meta('description', webpage))
|
||||
|
||||
thumb = self._html_search_regex(
|
||||
(r'<div class="podcast-image"><img src="(.+?)"></div>',
|
||||
r'<div class="container-embed"[^<]+url\((.+?)\);">',
|
||||
r'<div class="field-content"><img src="(.+?)"'),
|
||||
webpage, 'thumbnail', fatal=False, default=None)
|
||||
|
||||
duration = parse_duration(self._html_search_regex(
|
||||
r'<span class="(?:durata|duration)">([\d:]+)</span>',
|
||||
webpage, 'duration', fatal=False, default=None))
|
||||
|
||||
date = self._html_search_regex(
|
||||
r'class="data">\s*(?:<span>)?([\d\.]+)\s*</',
|
||||
webpage, 'date', default=None)
|
||||
|
||||
date_alt = self._search_regex(
|
||||
r'(\d+[\./]\d+[\./]\d+)', title, 'date_alt', default=None)
|
||||
ppn = ppn or self._search_regex(
|
||||
r'ppN:\s*([\'"])(?P<ppn>.+?)\1',
|
||||
webpage, 'ppn', group='ppn', default=None)
|
||||
# if the date is not in the title
|
||||
# and title is the same as the show_title
|
||||
# add the date to the title
|
||||
if date and not date_alt and ppn and ppn.lower() == title.lower():
|
||||
title += ' del %s' % date
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': float_or_none(duration),
|
||||
'formats': formats,
|
||||
'thumbnail': thumb,
|
||||
'upload_date': unified_strdate(date),
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
return self._extract_info(webpage, video_id)
|
||||
|
||||
|
||||
class LA7PodcastIE(LA7PodcastEpisodeIE):
|
||||
IE_NAME = 'la7.it:podcast'
|
||||
_VALID_URL = r'(https?://)?(www\.)?la7\.it/(?P<id>[^/]+)/podcast/?(?:$|[#?])'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.la7.it/propagandalive/podcast',
|
||||
'info_dict': {
|
||||
'id': 'propagandalive',
|
||||
'title': "Propaganda Live",
|
||||
},
|
||||
'playlist_count': 10,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
title = (
|
||||
self._html_search_regex(
|
||||
r'<h1.*?>(.+?)</h1>', webpage, 'title', fatal=False, default=None)
|
||||
or self._og_search_title(webpage))
|
||||
ppn = self._search_regex(
|
||||
r'window\.ppN\s*=\s*([\'"])(?P<ppn>.+?)\1',
|
||||
webpage, 'ppn', group='ppn', default=None)
|
||||
|
||||
entries = []
|
||||
for episode in re.finditer(
|
||||
r'<div class="container-podcast-property">([\s\S]+?)(?:</div>\s*){3}',
|
||||
webpage):
|
||||
entries.append(self._extract_info(episode.group(1), ppn=ppn))
|
||||
|
||||
return self.playlist_result(entries, playlist_id, title)
|
||||
|
||||
@@ -4,7 +4,13 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import js_to_json
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
str_or_none,
|
||||
)
|
||||
|
||||
|
||||
class LineTVIE(InfoExtractor):
|
||||
@@ -88,3 +94,137 @@ class LineTVIE(InfoExtractor):
|
||||
for thumbnail in video_info.get('thumbnails', {}).get('list', [])],
|
||||
'view_count': video_info.get('meta', {}).get('count'),
|
||||
}
|
||||
|
||||
|
||||
class LineLiveBaseIE(InfoExtractor):
|
||||
_API_BASE_URL = 'https://live-api.line-apps.com/web/v4.0/channel/'
|
||||
|
||||
def _parse_broadcast_item(self, item):
|
||||
broadcast_id = compat_str(item['id'])
|
||||
title = item['title']
|
||||
is_live = item.get('isBroadcastingNow')
|
||||
|
||||
thumbnails = []
|
||||
for thumbnail_id, thumbnail_url in (item.get('thumbnailURLs') or {}).items():
|
||||
if not thumbnail_url:
|
||||
continue
|
||||
thumbnails.append({
|
||||
'id': thumbnail_id,
|
||||
'url': thumbnail_url,
|
||||
})
|
||||
|
||||
channel = item.get('channel') or {}
|
||||
channel_id = str_or_none(channel.get('id'))
|
||||
|
||||
return {
|
||||
'id': broadcast_id,
|
||||
'title': self._live_title(title) if is_live else title,
|
||||
'thumbnails': thumbnails,
|
||||
'timestamp': int_or_none(item.get('createdAt')),
|
||||
'channel': channel.get('name'),
|
||||
'channel_id': channel_id,
|
||||
'channel_url': 'https://live.line.me/channels/' + channel_id if channel_id else None,
|
||||
'duration': int_or_none(item.get('archiveDuration')),
|
||||
'view_count': int_or_none(item.get('viewerCount')),
|
||||
'comment_count': int_or_none(item.get('chatCount')),
|
||||
'is_live': is_live,
|
||||
}
|
||||
|
||||
|
||||
class LineLiveIE(LineLiveBaseIE):
|
||||
_VALID_URL = r'https?://live\.line\.me/channels/(?P<channel_id>\d+)/broadcast/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://live.line.me/channels/4867368/broadcast/16331360',
|
||||
'md5': 'bc931f26bf1d4f971e3b0982b3fab4a3',
|
||||
'info_dict': {
|
||||
'id': '16331360',
|
||||
'title': '振りコピ講座😙😙😙',
|
||||
'ext': 'mp4',
|
||||
'timestamp': 1617095132,
|
||||
'upload_date': '20210330',
|
||||
'channel': '白川ゆめか',
|
||||
'channel_id': '4867368',
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'is_live': False,
|
||||
}
|
||||
}, {
|
||||
# archiveStatus == 'DELETED'
|
||||
'url': 'https://live.line.me/channels/4778159/broadcast/16378488',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_id, broadcast_id = re.match(self._VALID_URL, url).groups()
|
||||
broadcast = self._download_json(
|
||||
self._API_BASE_URL + '%s/broadcast/%s' % (channel_id, broadcast_id),
|
||||
broadcast_id)
|
||||
item = broadcast['item']
|
||||
info = self._parse_broadcast_item(item)
|
||||
protocol = 'm3u8' if info['is_live'] else 'm3u8_native'
|
||||
formats = []
|
||||
for k, v in (broadcast.get(('live' if info['is_live'] else 'archived') + 'HLSURLs') or {}).items():
|
||||
if not v:
|
||||
continue
|
||||
if k == 'abr':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
v, broadcast_id, 'mp4', protocol,
|
||||
m3u8_id='hls', fatal=False))
|
||||
continue
|
||||
f = {
|
||||
'ext': 'mp4',
|
||||
'format_id': 'hls-' + k,
|
||||
'protocol': protocol,
|
||||
'url': v,
|
||||
}
|
||||
if not k.isdigit():
|
||||
f['vcodec'] = 'none'
|
||||
formats.append(f)
|
||||
if not formats:
|
||||
archive_status = item.get('archiveStatus')
|
||||
if archive_status != 'ARCHIVED':
|
||||
raise ExtractorError('this video has been ' + archive_status.lower(), expected=True)
|
||||
self._sort_formats(formats)
|
||||
info['formats'] = formats
|
||||
return info
|
||||
|
||||
|
||||
class LineLiveChannelIE(LineLiveBaseIE):
|
||||
_VALID_URL = r'https?://live\.line\.me/channels/(?P<id>\d+)(?!/broadcast/\d+)(?:[/?&#]|$)'
|
||||
_TEST = {
|
||||
'url': 'https://live.line.me/channels/5893542',
|
||||
'info_dict': {
|
||||
'id': '5893542',
|
||||
'title': 'いくらちゃん',
|
||||
'description': 'md5:c3a4af801f43b2fac0b02294976580be',
|
||||
},
|
||||
'playlist_mincount': 29
|
||||
}
|
||||
|
||||
def _archived_broadcasts_entries(self, archived_broadcasts, channel_id):
|
||||
while True:
|
||||
for row in (archived_broadcasts.get('rows') or []):
|
||||
share_url = str_or_none(row.get('shareURL'))
|
||||
if not share_url:
|
||||
continue
|
||||
info = self._parse_broadcast_item(row)
|
||||
info.update({
|
||||
'_type': 'url',
|
||||
'url': share_url,
|
||||
'ie_key': LineLiveIE.ie_key(),
|
||||
})
|
||||
yield info
|
||||
if not archived_broadcasts.get('hasNextPage'):
|
||||
return
|
||||
archived_broadcasts = self._download_json(
|
||||
self._API_BASE_URL + channel_id + '/archived_broadcasts',
|
||||
channel_id, query={
|
||||
'lastId': info['id'],
|
||||
})
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_id = self._match_id(url)
|
||||
channel = self._download_json(self._API_BASE_URL + channel_id, channel_id)
|
||||
return self.playlist_result(
|
||||
self._archived_broadcasts_entries(channel.get('archivedBroadcasts') or {}, channel_id),
|
||||
channel_id, channel.get('title'), channel.get('information'))
|
||||
|
||||
31
yt_dlp/extractor/maoritv.py
Normal file
31
yt_dlp/extractor/maoritv.py
Normal file
@@ -0,0 +1,31 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class MaoriTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?maoritelevision\.com/shows/(?:[^/]+/)+(?P<id>[^/?&#]+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.maoritelevision.com/shows/korero-mai/S01E054/korero-mai-series-1-episode-54',
|
||||
'md5': '5ade8ef53851b6a132c051b1cd858899',
|
||||
'info_dict': {
|
||||
'id': '4774724855001',
|
||||
'ext': 'mp4',
|
||||
'title': 'Kōrero Mai, Series 1 Episode 54',
|
||||
'upload_date': '20160226',
|
||||
'timestamp': 1456455018,
|
||||
'description': 'md5:59bde32fd066d637a1a55794c56d8dcb',
|
||||
'uploader_id': '1614493167001',
|
||||
},
|
||||
}
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1614493167001/HJlhIQhQf_default/index.html?videoId=%s'
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
brightcove_id = self._search_regex(
|
||||
r'data-main-video-id=["\'](\d+)', webpage, 'brightcove id')
|
||||
return self.url_result(
|
||||
self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
|
||||
'BrightcoveNew', brightcove_id)
|
||||
@@ -5,6 +5,7 @@ from datetime import datetime
|
||||
import itertools
|
||||
import json
|
||||
import base64
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
@@ -68,7 +69,7 @@ class MildomBaseIE(InfoExtractor):
|
||||
self._DISPATCHER_CONFIG = self._parse_json(base64.b64decode(tmp['data']), 'initialization')
|
||||
except ExtractorError:
|
||||
self._DISPATCHER_CONFIG = self._download_json(
|
||||
'https://bookish-octo-barnacle.vercel.app/api/dispatcher_config', 'initialization',
|
||||
'https://bookish-octo-barnacle.vercel.app/api/mildom/dispatcher_config', 'initialization',
|
||||
note='Downloading dispatcher_config fallback')
|
||||
return self._DISPATCHER_CONFIG
|
||||
|
||||
@@ -110,6 +111,7 @@ class MildomIE(MildomBaseIE):
|
||||
enterstudio = self._call_api(
|
||||
'https://cloudac.mildom.com/nonolive/gappserv/live/enterstudio', video_id,
|
||||
note='Downloading live metadata', query={'user_id': video_id})
|
||||
result_video_id = enterstudio.get('log_id', video_id)
|
||||
|
||||
title = try_get(
|
||||
enterstudio, (
|
||||
@@ -128,7 +130,7 @@ class MildomIE(MildomBaseIE):
|
||||
), compat_str)
|
||||
|
||||
servers = self._call_api(
|
||||
'https://cloudac.mildom.com/nonolive/gappserv/live/liveserver', video_id,
|
||||
'https://cloudac.mildom.com/nonolive/gappserv/live/liveserver', result_video_id,
|
||||
note='Downloading live server list', query={
|
||||
'user_id': video_id,
|
||||
'live_server_type': 'hls',
|
||||
@@ -139,7 +141,7 @@ class MildomIE(MildomBaseIE):
|
||||
'is_lhls': '0',
|
||||
})
|
||||
m3u8_url = update_url_query(servers['stream_server'] + '/%s_master.m3u8' % video_id, stream_query)
|
||||
formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', headers={
|
||||
formats = self._extract_m3u8_formats(m3u8_url, result_video_id, 'mp4', headers={
|
||||
'Referer': 'https://www.mildom.com/',
|
||||
'Origin': 'https://www.mildom.com',
|
||||
}, note='Downloading m3u8 information')
|
||||
@@ -150,13 +152,13 @@ class MildomIE(MildomBaseIE):
|
||||
parsed = parsed._replace(
|
||||
netloc='bookish-octo-barnacle.vercel.app',
|
||||
query=compat_urllib_parse_urlencode(stream_query, True),
|
||||
path='/api' + parsed.path)
|
||||
path='/api/mildom' + parsed.path)
|
||||
fmt['url'] = compat_urlparse.urlunparse(parsed)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'id': result_video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'uploader': uploader,
|
||||
@@ -172,9 +174,8 @@ class MildomVodIE(MildomBaseIE):
|
||||
_VALID_URL = r'https?://(?:(?:www|m)\.)mildom\.com/playback/(?P<user_id>\d+)/(?P<id>(?P=user_id)-[a-zA-Z0-9]+)'
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
m = self._VALID_URL_RE.match(url)
|
||||
user_id = m.group('user_id')
|
||||
m = re.match(self._VALID_URL, url)
|
||||
user_id, video_id = m.group('user_id'), m.group('id')
|
||||
url = 'https://www.mildom.com/playback/%s/%s' % (user_id, video_id)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
@@ -230,7 +231,7 @@ class MildomVodIE(MildomBaseIE):
|
||||
parsed = parsed._replace(
|
||||
netloc='bookish-octo-barnacle.vercel.app',
|
||||
query=compat_urllib_parse_urlencode(stream_query, True),
|
||||
path='/api/vod2/proxy')
|
||||
path='/api/mildom/vod2/proxy')
|
||||
fmt['url'] = compat_urlparse.urlunparse(parsed)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
@@ -275,7 +275,9 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
||||
|
||||
@staticmethod
|
||||
def _extract_child_with_type(parent, t):
|
||||
return next(c for c in parent['children'] if c.get('type') == t)
|
||||
for c in parent['children']:
|
||||
if c.get('type') == t:
|
||||
return c
|
||||
|
||||
def _extract_mgid(self, webpage):
|
||||
try:
|
||||
@@ -306,7 +308,8 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
||||
data = self._parse_json(self._search_regex(
|
||||
r'__DATA__\s*=\s*({.+?});', webpage, 'data'), None)
|
||||
main_container = self._extract_child_with_type(data, 'MainContainer')
|
||||
video_player = self._extract_child_with_type(main_container, 'VideoPlayer')
|
||||
ab_testing = self._extract_child_with_type(main_container, 'ABTesting')
|
||||
video_player = self._extract_child_with_type(ab_testing or main_container, 'VideoPlayer')
|
||||
mgid = video_player['props']['media']['video']['config']['uri']
|
||||
|
||||
return mgid
|
||||
|
||||
197
yt_dlp/extractor/nebula.py
Normal file
197
yt_dlp/extractor/nebula.py
Normal file
@@ -0,0 +1,197 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
parse_iso8601,
|
||||
try_get,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class NebulaIE(InfoExtractor):
|
||||
|
||||
_VALID_URL = r'https?://(?:www\.)?watchnebula\.com/videos/(?P<id>[-\w]+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://watchnebula.com/videos/that-time-disney-remade-beauty-and-the-beast',
|
||||
'md5': 'fe79c4df8b3aa2fea98a93d027465c7e',
|
||||
'info_dict': {
|
||||
'id': '5c271b40b13fd613090034fd',
|
||||
'ext': 'mp4',
|
||||
'title': 'That Time Disney Remade Beauty and the Beast',
|
||||
'description': 'Note: this video was originally posted on YouTube with the sponsor read included. We weren’t able to remove it without reducing video quality, so it’s presented here in its original context.',
|
||||
'upload_date': '20180731',
|
||||
'timestamp': 1533009600,
|
||||
'channel': 'Lindsay Ellis',
|
||||
'uploader': 'Lindsay Ellis',
|
||||
},
|
||||
'params': {
|
||||
'usenetrc': True,
|
||||
},
|
||||
'skip': 'All Nebula content requires authentication',
|
||||
},
|
||||
{
|
||||
'url': 'https://watchnebula.com/videos/the-logistics-of-d-day-landing-craft-how-the-allies-got-ashore',
|
||||
'md5': '6d4edd14ce65720fa63aba5c583fb328',
|
||||
'info_dict': {
|
||||
'id': '5e7e78171aaf320001fbd6be',
|
||||
'ext': 'mp4',
|
||||
'title': 'Landing Craft - How The Allies Got Ashore',
|
||||
'description': r're:^In this episode we explore the unsung heroes of D-Day, the landing craft.',
|
||||
'upload_date': '20200327',
|
||||
'timestamp': 1585348140,
|
||||
'channel': 'The Logistics of D-Day',
|
||||
'uploader': 'The Logistics of D-Day',
|
||||
},
|
||||
'params': {
|
||||
'usenetrc': True,
|
||||
},
|
||||
'skip': 'All Nebula content requires authentication',
|
||||
},
|
||||
{
|
||||
'url': 'https://watchnebula.com/videos/money-episode-1-the-draw',
|
||||
'md5': '8c7d272910eea320f6f8e6d3084eecf5',
|
||||
'info_dict': {
|
||||
'id': '5e779ebdd157bc0001d1c75a',
|
||||
'ext': 'mp4',
|
||||
'title': 'Episode 1: The Draw',
|
||||
'description': r'contains:There’s free money on offer… if the players can all work together.',
|
||||
'upload_date': '20200323',
|
||||
'timestamp': 1584980400,
|
||||
'channel': 'Tom Scott Presents: Money',
|
||||
'uploader': 'Tom Scott Presents: Money',
|
||||
},
|
||||
'params': {
|
||||
'usenetrc': True,
|
||||
},
|
||||
'skip': 'All Nebula content requires authentication',
|
||||
},
|
||||
]
|
||||
_NETRC_MACHINE = 'watchnebula'
|
||||
|
||||
def _retrieve_nebula_auth(self, video_id):
|
||||
"""
|
||||
Log in to Nebula, and returns a Nebula API token
|
||||
"""
|
||||
|
||||
username, password = self._get_login_info()
|
||||
if not (username and password):
|
||||
self.raise_login_required()
|
||||
|
||||
self.report_login()
|
||||
data = json.dumps({'email': username, 'password': password}).encode('utf8')
|
||||
response = self._download_json(
|
||||
'https://api.watchnebula.com/api/v1/auth/login/',
|
||||
data=data, fatal=False, video_id=video_id,
|
||||
headers={
|
||||
'content-type': 'application/json',
|
||||
# Submitting the 'sessionid' cookie always causes a 403 on auth endpoint
|
||||
'cookie': ''
|
||||
},
|
||||
note='Authenticating to Nebula with supplied credentials',
|
||||
errnote='Authentication failed or rejected')
|
||||
if not response or not response.get('key'):
|
||||
self.raise_login_required()
|
||||
return response['key']
|
||||
|
||||
def _retrieve_zype_api_key(self, page_url, display_id):
|
||||
"""
|
||||
Retrieves the Zype API key
|
||||
"""
|
||||
|
||||
# Find the js that has the API key from the webpage and download it
|
||||
webpage = self._download_webpage(page_url, video_id=display_id)
|
||||
main_script_relpath = self._search_regex(
|
||||
r'<script[^>]*src="(?P<script_relpath>[^"]*main.[0-9a-f]*.chunk.js)"[^>]*>', webpage,
|
||||
group='script_relpath', name='script relative path', fatal=True)
|
||||
main_script_abspath = urljoin(page_url, main_script_relpath)
|
||||
main_script = self._download_webpage(main_script_abspath, video_id=display_id,
|
||||
note='Retrieving Zype API key')
|
||||
|
||||
api_key = self._search_regex(
|
||||
r'REACT_APP_ZYPE_API_KEY\s*:\s*"(?P<api_key>[\w-]*)"', main_script,
|
||||
group='api_key', name='API key', fatal=True)
|
||||
|
||||
return api_key
|
||||
|
||||
def _call_zype_api(self, path, params, video_id, api_key, note):
|
||||
"""
|
||||
A helper for making calls to the Zype API.
|
||||
"""
|
||||
query = {'api_key': api_key, 'per_page': 1}
|
||||
query.update(params)
|
||||
return self._download_json('https://api.zype.com' + path, video_id, query=query, note=note)
|
||||
|
||||
def _call_nebula_api(self, path, video_id, access_token, note):
|
||||
"""
|
||||
A helper for making calls to the Nebula API.
|
||||
"""
|
||||
return self._download_json('https://api.watchnebula.com/api/v1' + path, video_id, headers={
|
||||
'Authorization': 'Token {access_token}'.format(access_token=access_token)
|
||||
}, note=note)
|
||||
|
||||
def _fetch_zype_access_token(self, video_id, nebula_token):
|
||||
user_object = self._call_nebula_api('/auth/user/', video_id, nebula_token, note='Retrieving Zype access token')
|
||||
access_token = try_get(user_object, lambda x: x['zype_auth_info']['access_token'], compat_str)
|
||||
if not access_token:
|
||||
if try_get(user_object, lambda x: x['is_subscribed'], bool):
|
||||
# TODO: Reimplement the same Zype token polling the Nebula frontend implements
|
||||
# see https://github.com/ytdl-org/youtube-dl/pull/24805#issuecomment-749231532
|
||||
raise ExtractorError(
|
||||
'Unable to extract Zype access token from Nebula API authentication endpoint. '
|
||||
'Open an arbitrary video in a browser with this account to generate a token',
|
||||
expected=True)
|
||||
raise ExtractorError('Unable to extract Zype access token from Nebula API authentication endpoint')
|
||||
return access_token
|
||||
|
||||
def _extract_channel_title(self, video_meta):
|
||||
# TODO: Implement the API calls giving us the channel list,
|
||||
# so that we can do the title lookup and then figure out the channel URL
|
||||
categories = video_meta.get('categories', []) if video_meta else []
|
||||
# the channel name is the value of the first category
|
||||
for category in categories:
|
||||
if category.get('value'):
|
||||
return category['value'][0]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
nebula_token = self._retrieve_nebula_auth(display_id)
|
||||
api_key = self._retrieve_zype_api_key(url, display_id)
|
||||
|
||||
response = self._call_zype_api('/videos', {'friendly_title': display_id},
|
||||
display_id, api_key, note='Retrieving metadata from Zype')
|
||||
if len(response.get('response') or []) != 1:
|
||||
raise ExtractorError('Unable to find video on Zype API')
|
||||
video_meta = response['response'][0]
|
||||
|
||||
video_id = video_meta['_id']
|
||||
zype_access_token = self._fetch_zype_access_token(display_id, nebula_token=nebula_token)
|
||||
|
||||
channel_title = self._extract_channel_title(video_meta)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'Zype',
|
||||
'url': 'https://player.zype.com/embed/%s.html?access_token=%s' % (video_id, zype_access_token),
|
||||
'title': video_meta.get('title'),
|
||||
'description': video_meta.get('description'),
|
||||
'timestamp': parse_iso8601(video_meta.get('published_at')),
|
||||
'thumbnails': [
|
||||
{
|
||||
'id': tn.get('name'), # this appears to be null
|
||||
'url': tn['url'],
|
||||
'width': tn.get('width'),
|
||||
'height': tn.get('height'),
|
||||
} for tn in video_meta.get('thumbnails', [])],
|
||||
'duration': video_meta.get('duration'),
|
||||
'channel': channel_title,
|
||||
'uploader': channel_title, # we chose uploader = channel name
|
||||
# TODO: uploader_url, channel_id, channel_url
|
||||
}
|
||||
144
yt_dlp/extractor/nfhsnetwork.py
Normal file
144
yt_dlp/extractor/nfhsnetwork.py
Normal file
@@ -0,0 +1,144 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
from ..utils import (
|
||||
try_get,
|
||||
unified_strdate,
|
||||
unified_timestamp
|
||||
)
|
||||
|
||||
|
||||
class NFHSNetworkIE(InfoExtractor):
|
||||
IE_NAME = 'NFHSNetwork'
|
||||
_VALID_URL = r'https?://(?:www\.)?nfhsnetwork\.com/events/[\w-]+/(?P<id>(?:gam|evt|dd|)?[\w\d]{0,10})'
|
||||
_TESTS = [{
|
||||
# Auto-generated two-team sport (pixellot)
|
||||
'url': 'https://www.nfhsnetwork.com/events/rockford-high-school-rockford-mi/gamcf7e54cfbc',
|
||||
'info_dict': {
|
||||
'id': 'gamcf7e54cfbc',
|
||||
'ext': 'mp4',
|
||||
'title': 'Rockford vs Spring Lake - Girls Varsity Lacrosse 03/27/2021',
|
||||
'uploader': 'MHSAA - Michigan: Rockford High School, Rockford, MI',
|
||||
'uploader_id': 'cd2622cf76',
|
||||
'uploader_url': 'https://www.nfhsnetwork.com/schools/rockford-high-school-rockford-mi',
|
||||
'location': 'Rockford, Michigan',
|
||||
'timestamp': 1616859000,
|
||||
'upload_date': '20210327'
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
# Non-sport activity with description
|
||||
'url': 'https://www.nfhsnetwork.com/events/limon-high-school-limon-co/evt4a30e3726c',
|
||||
'info_dict': {
|
||||
'id': 'evt4a30e3726c',
|
||||
'ext': 'mp4',
|
||||
'title': 'Drama Performance Limon High School vs. Limon High School - 12/13/2020',
|
||||
'description': 'Join the broadcast of the Limon High School Musical Performance at 2 PM.',
|
||||
'uploader': 'CHSAA: Limon High School, Limon, CO',
|
||||
'uploader_id': '7d2d121332',
|
||||
'uploader_url': 'https://www.nfhsnetwork.com/schools/limon-high-school-limon-co',
|
||||
'location': 'Limon, Colorado',
|
||||
'timestamp': 1607893200,
|
||||
'upload_date': '20201213'
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
# Postseason game
|
||||
'url': 'https://www.nfhsnetwork.com/events/nfhs-network-special-events/dd8de71d45',
|
||||
'info_dict': {
|
||||
'id': 'dd8de71d45',
|
||||
'ext': 'mp4',
|
||||
'title': '2015 UA Holiday Classic Tournament: National Division - 12/26/2015',
|
||||
'uploader': 'SoCal Sports Productions',
|
||||
'uploader_id': '063dba0150',
|
||||
'uploader_url': 'https://www.nfhsnetwork.com/affiliates/socal-sports-productions',
|
||||
'location': 'San Diego, California',
|
||||
'timestamp': 1451187000,
|
||||
'upload_date': '20151226'
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
# Video with no broadcasts object
|
||||
'url': 'https://www.nfhsnetwork.com/events/wiaa-wi/9aa2f92f82',
|
||||
'info_dict': {
|
||||
'id': '9aa2f92f82',
|
||||
'ext': 'mp4',
|
||||
'title': 'Competitive Equity - 01/21/2015',
|
||||
'description': 'Committee members discuss points of their research regarding a competitive equity plan',
|
||||
'uploader': 'WIAA - Wisconsin: Wisconsin Interscholastic Athletic Association',
|
||||
'uploader_id': 'a49f7d1002',
|
||||
'uploader_url': 'https://www.nfhsnetwork.com/associations/wiaa-wi',
|
||||
'location': 'Stevens Point, Wisconsin',
|
||||
'timestamp': 1421856000,
|
||||
'upload_date': '20150121'
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
data = self._download_json(
|
||||
'https://cfunity.nfhsnetwork.com/v2/game_or_event/' + video_id,
|
||||
video_id)
|
||||
publisher = data.get('publishers')[0] # always exists
|
||||
broadcast = (publisher.get('broadcasts') or publisher.get('vods'))[0] # some (older) videos don't have a broadcasts object
|
||||
uploader = publisher.get('formatted_name') or publisher.get('name')
|
||||
uploaderID = publisher.get('publisher_key')
|
||||
pubType = publisher.get('type')
|
||||
uploaderPrefix = (
|
||||
"schools" if pubType == "school"
|
||||
else "associations" if "association" in pubType
|
||||
else "affiliates" if (pubType == "publisher" or pubType == "affiliate")
|
||||
else "schools")
|
||||
uploaderPage = 'https://www.nfhsnetwork.com/%s/%s' % (uploaderPrefix, publisher.get('slug'))
|
||||
location = '%s, %s' % (data.get('city'), data.get('state_name'))
|
||||
description = broadcast.get('description')
|
||||
isLive = broadcast.get('on_air') or broadcast.get('status') == 'on_air' or False
|
||||
|
||||
timestamp = unified_timestamp(data.get('local_start_time'))
|
||||
upload_date = unified_strdate(data.get('local_start_time'))
|
||||
|
||||
title = (
|
||||
self._og_search_title(webpage)
|
||||
or self._html_search_regex(r'<h1 class="sr-hidden">(.*?)</h1>', webpage, 'title'))
|
||||
title = title.split('|')[0].strip()
|
||||
|
||||
video_type = 'broadcasts' if isLive else 'vods'
|
||||
key = broadcast.get('key') if isLive else try_get(publisher, lambda x: x['vods'][0]['key'])
|
||||
m3u8_url = self._download_json(
|
||||
'https://cfunity.nfhsnetwork.com/v2/%s/%s/url' % (video_type, key),
|
||||
video_id).get('video_url')
|
||||
|
||||
formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', live=isLive)
|
||||
self._sort_formats(formats, ['res', 'tbr'])
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'description': description,
|
||||
'timestamp': timestamp,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploaderID,
|
||||
'uploader_url': uploaderPage,
|
||||
'location': location,
|
||||
'upload_date': upload_date,
|
||||
'is_live': isLive
|
||||
}
|
||||
@@ -11,60 +11,100 @@ from ..utils import (
|
||||
determine_ext,
|
||||
)
|
||||
import re
|
||||
import random
|
||||
|
||||
|
||||
class NitterIE(InfoExtractor):
|
||||
# Taken from https://github.com/zedeus/nitter/wiki/Instances
|
||||
INSTANCES = ('nitter.net',
|
||||
'nitter.snopyta.org',
|
||||
'nitter.42l.fr',
|
||||
'nitter.nixnet.services',
|
||||
'nitter.13ad.de',
|
||||
'nitter.pussthecat.org',
|
||||
'nitter.mastodont.cat',
|
||||
'nitter.dark.fail',
|
||||
'nitter.tedomum.net',
|
||||
'nitter.cattube.org',
|
||||
'nitter.fdn.fr',
|
||||
'nitter.1d4.us',
|
||||
'nitter.kavin.rocks',
|
||||
'tweet.lambda.dance',
|
||||
'nitter.cc',
|
||||
'nitter.weaponizedhumiliation.com',
|
||||
'3nzoldnxplag42gqjs23xvghtzf6t6yzssrtytnntc6ppc7xxuoneoad.onion',
|
||||
'nitter.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd.onion',
|
||||
'nitterlgj3n5fgwesu3vxc5h67ruku33nqaoeoocae2mvlzhsu6k7fqd.onion')
|
||||
|
||||
NON_HTTP_INSTANCES = (
|
||||
'3nzoldnxplag42gqjs23xvghtzf6t6yzssrtytnntc6ppc7xxuoneoad.onion',
|
||||
'nitter.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd.onion',
|
||||
'nitter7bryz3jv7e3uekphigvmoyoem4al3fynerxkj22dmoxoq553qd.onion',
|
||||
'npf37k3mtzwxreiw52ccs5ay4e6qt2fkcs2ndieurdyn2cuzzsfyfvid.onion',
|
||||
'nitter.v6vgyqpa7yefkorazmg5d5fimstmvm2vtbirt6676mt7qmllrcnwycqd.onion',
|
||||
'i23nv6w3juvzlw32xzoxcqzktegd4i4fu3nmnc2ewv4ggiu4ledwklad.onion',
|
||||
'26oq3gioiwcmfojub37nz5gzbkdiqp7fue5kvye7d4txv4ny6fb4wwid.onion',
|
||||
|
||||
'nitter.i2p',
|
||||
'u6ikd6zndl3c4dsdq4mmujpntgeevdk5qzkfb57r4tnfeccrn2qa.b32.i2p',
|
||||
|
||||
'nitterlgj3n5fgwesu3vxc5h67ruku33nqaoeoocae2mvlzhsu6k7fqd.onion',
|
||||
)
|
||||
|
||||
HTTP_INSTANCES = (
|
||||
'nitter.42l.fr',
|
||||
'nitter.pussthecat.org',
|
||||
'nitter.nixnet.services',
|
||||
'nitter.mastodont.cat',
|
||||
'nitter.tedomum.net',
|
||||
'nitter.fdn.fr',
|
||||
'nitter.1d4.us',
|
||||
'nitter.kavin.rocks',
|
||||
'tweet.lambda.dance',
|
||||
'nitter.cc',
|
||||
'nitter.vxempire.xyz',
|
||||
'nitter.unixfox.eu',
|
||||
'nitter.domain.glass',
|
||||
'nitter.himiko.cloud',
|
||||
'nitter.eu',
|
||||
'nitter.namazso.eu',
|
||||
'nitter.mailstation.de',
|
||||
'nitter.actionsack.com',
|
||||
'nitter.cattube.org',
|
||||
'nitter.dark.fail',
|
||||
'birdsite.xanny.family',
|
||||
'nitter.40two.app',
|
||||
'nitter.skrep.in',
|
||||
|
||||
# not in the list anymore
|
||||
'nitter.snopyta.org',
|
||||
)
|
||||
|
||||
DEAD_INSTANCES = (
|
||||
# maintenance
|
||||
'nitter.ethibox.fr',
|
||||
|
||||
# official, rate limited
|
||||
'nitter.net',
|
||||
# offline
|
||||
'nitter.13ad.de',
|
||||
'nitter.weaponizedhumiliation.com',
|
||||
)
|
||||
|
||||
INSTANCES = NON_HTTP_INSTANCES + HTTP_INSTANCES + DEAD_INSTANCES
|
||||
|
||||
_INSTANCES_RE = '(?:' + '|'.join([re.escape(instance) for instance in INSTANCES]) + ')'
|
||||
_VALID_URL = r'https?://%(instance)s/(?P<uploader_id>.+)/status/(?P<id>[0-9]+)(#.)?' % {'instance': _INSTANCES_RE}
|
||||
current_instance = INSTANCES[0] # the test and official instance
|
||||
current_instance = random.choice(HTTP_INSTANCES)
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
# GIF (wrapped in mp4)
|
||||
'url': 'https://' + current_instance + '/firefox/status/1314279897502629888#m',
|
||||
'url': 'https://%s/firefox/status/1314279897502629888#m' % current_instance,
|
||||
'info_dict': {
|
||||
'id': '1314279897502629888',
|
||||
'ext': 'mp4',
|
||||
'title': 'Firefox 🔥 - You know the old saying, if you see something say something. Now you actually can with the YouTube regrets extension. Report harmful YouTube recommendations so others can avoid watching them. ➡️ https://mzl.la/3iFIiyg #UnfckTheInternet',
|
||||
'description': 'You know the old saying, if you see something say something. Now you actually can with the YouTube regrets extension. Report harmful YouTube recommendations so others can avoid watching them. ➡️ https://mzl.la/3iFIiyg #UnfckTheInternet',
|
||||
'title': 'Firefox 🔥 - You know the old saying, if you see something say something. Now you actually can with the YouTube regrets extension. \n\nReport harmful YouTube recommendations so others can avoid watching them. ➡️ https://mzl.la/3iFIiyg\n\n#UnfckTheInternet',
|
||||
'description': 'You know the old saying, if you see something say something. Now you actually can with the YouTube regrets extension. \n\nReport harmful YouTube recommendations so others can avoid watching them. ➡️ https://mzl.la/3iFIiyg\n\n#UnfckTheInternet',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'Firefox 🔥',
|
||||
'uploader_id': 'firefox',
|
||||
'uploader_url': 'https://' + current_instance + '/firefox',
|
||||
'uploader_url': 'https://%s/firefox' % current_instance,
|
||||
'upload_date': '20201008',
|
||||
'timestamp': 1602183720,
|
||||
},
|
||||
}, { # normal video
|
||||
'url': 'https://' + current_instance + '/Le___Doc/status/1299715685392756737#m',
|
||||
'url': 'https://%s/Le___Doc/status/1299715685392756737#m' % current_instance,
|
||||
'info_dict': {
|
||||
'id': '1299715685392756737',
|
||||
'ext': 'mp4',
|
||||
'title': 'Le Doc - "Je ne prédis jamais rien" D Raoult, Août 2020...',
|
||||
'description': '"Je ne prédis jamais rien" D Raoult, Août 2020...',
|
||||
'title': 'Le Doc - "Je ne prédis jamais rien"\nD Raoult, Août 2020...',
|
||||
'description': '"Je ne prédis jamais rien"\nD Raoult, Août 2020...',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'Le Doc',
|
||||
'uploader_id': 'Le___Doc',
|
||||
'uploader_url': 'https://' + current_instance + '/Le___Doc',
|
||||
'uploader_url': 'https://%s/Le___Doc' % current_instance,
|
||||
'upload_date': '20200829',
|
||||
'timestamp': 1598711341,
|
||||
'view_count': int,
|
||||
@@ -73,31 +113,51 @@ class NitterIE(InfoExtractor):
|
||||
'comment_count': int,
|
||||
},
|
||||
}, { # video embed in a "Streaming Political Ads" box
|
||||
'url': 'https://' + current_instance + '/mozilla/status/1321147074491092994#m',
|
||||
'url': 'https://%s/mozilla/status/1321147074491092994#m' % current_instance,
|
||||
'info_dict': {
|
||||
'id': '1321147074491092994',
|
||||
'ext': 'mp4',
|
||||
'title': "Mozilla - Are you being targeted with weird, ominous or just plain annoying political ads while streaming your favorite shows? This isn't a real political ad, but if you're watching streaming TV in the U.S., chances are you've seen quite a few. Learn more ➡️ https://mzl.la/StreamingAds",
|
||||
'description': "Are you being targeted with weird, ominous or just plain annoying political ads while streaming your favorite shows? This isn't a real political ad, but if you're watching streaming TV in the U.S., chances are you've seen quite a few. Learn more ➡️ https://mzl.la/StreamingAds",
|
||||
'title': "Mozilla - Are you being targeted with weird, ominous or just plain annoying political ads while streaming your favorite shows?\n\nThis isn't a real political ad, but if you're watching streaming TV in the U.S., chances are you've seen quite a few. \n\nLearn more ➡️ https://mzl.la/StreamingAds",
|
||||
'description': "Are you being targeted with weird, ominous or just plain annoying political ads while streaming your favorite shows?\n\nThis isn't a real political ad, but if you're watching streaming TV in the U.S., chances are you've seen quite a few. \n\nLearn more ➡️ https://mzl.la/StreamingAds",
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'Mozilla',
|
||||
'uploader_id': 'mozilla',
|
||||
'uploader_url': 'https://' + current_instance + '/mozilla',
|
||||
'uploader_url': 'https://%s/mozilla' % current_instance,
|
||||
'upload_date': '20201027',
|
||||
'timestamp': 1603820982
|
||||
},
|
||||
},
|
||||
}, { # not the first tweet but main-tweet
|
||||
'url': 'https://%s/TheNaturalNu/status/1379050895539724290#m' % current_instance,
|
||||
'info_dict': {
|
||||
'id': '1379050895539724290',
|
||||
'ext': 'mp4',
|
||||
'title': 'Dorothy Zbornak - This had me hollering!!',
|
||||
'description': 'This had me hollering!!',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'Dorothy Zbornak',
|
||||
'uploader_id': 'TheNaturalNu',
|
||||
'uploader_url': 'https://%s/TheNaturalNu' % current_instance,
|
||||
'timestamp': 1617626329,
|
||||
'upload_date': '20210405'
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
parsed_url = compat_urlparse.urlparse(url)
|
||||
base_url = parsed_url.scheme + '://' + parsed_url.netloc
|
||||
base_url = '%s://%s' % (parsed_url.scheme, parsed_url.netloc)
|
||||
|
||||
self._set_cookie(parsed_url.netloc, 'hlsPlayback', 'on')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
full_webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_url = base_url + self._html_search_regex(r'(?:<video[^>]+data-url|<source[^>]+src)="([^"]+)"', webpage, 'video url')
|
||||
main_tweet_start = full_webpage.find('class="main-tweet"')
|
||||
if main_tweet_start > 0:
|
||||
webpage = full_webpage[main_tweet_start:]
|
||||
if not webpage:
|
||||
webpage = full_webpage
|
||||
|
||||
video_url = '%s%s' % (base_url, self._html_search_regex(r'(?:<video[^>]+data-url|<source[^>]+src)="([^"]+)"', webpage, 'video url'))
|
||||
ext = determine_ext(video_url)
|
||||
|
||||
if ext == 'unknown_video':
|
||||
@@ -108,33 +168,34 @@ class NitterIE(InfoExtractor):
|
||||
'ext': ext
|
||||
}]
|
||||
|
||||
title = (
|
||||
self._og_search_description(webpage).replace('\n', ' ')
|
||||
or self._html_search_regex(r'<div class="tweet-content[^>]+>([^<]+)</div>', webpage, 'title'))
|
||||
title = self._og_search_description(full_webpage)
|
||||
if not title:
|
||||
title = self._html_search_regex(r'<div class="tweet-content[^>]+>([^<]+)</div>', webpage, 'title')
|
||||
description = title
|
||||
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
uploader_id = (
|
||||
mobj.group('uploader_id')
|
||||
or self._html_search_regex(r'<a class="fullname"[^>]+title="([^"]+)"', webpage, 'uploader name', fatal=False))
|
||||
or self._html_search_regex(r'<a class="fullname"[^>]+title="([^"]+)"', webpage, 'uploader name', fatal=False)
|
||||
)
|
||||
|
||||
if uploader_id:
|
||||
uploader_url = base_url + '/' + uploader_id
|
||||
uploader_url = '%s/%s' % (base_url, uploader_id)
|
||||
|
||||
uploader = self._html_search_regex(r'<a class="fullname"[^>]+title="([^"]+)"', webpage, 'uploader name', fatal=False)
|
||||
|
||||
if uploader:
|
||||
title = uploader + ' - ' + title
|
||||
title = '%s - %s' % (uploader, title)
|
||||
|
||||
view_count = parse_count(self._html_search_regex(r'<span[^>]+class="icon-play[^>]*></span>\s([^<]+)</div>', webpage, 'view count', fatal=False))
|
||||
like_count = parse_count(self._html_search_regex(r'<span[^>]+class="icon-heart[^>]*></span>\s([^<]+)</div>', webpage, 'like count', fatal=False))
|
||||
repost_count = parse_count(self._html_search_regex(r'<span[^>]+class="icon-retweet[^>]*></span>\s([^<]+)</div>', webpage, 'repost count', fatal=False))
|
||||
comment_count = parse_count(self._html_search_regex(r'<span[^>]+class="icon-comment[^>]*></span>\s([^<]+)</div>', webpage, 'repost count', fatal=False))
|
||||
|
||||
thumbnail = base_url + (self._html_search_meta('og:image', webpage, 'thumbnail url')
|
||||
or self._html_search_regex(r'<video[^>]+poster="([^"]+)"', webpage, 'thumbnail url', fatal=False))
|
||||
|
||||
thumbnail = remove_end(thumbnail, '%3Asmall') # if parsed with regex, it should contain this
|
||||
thumbnail = self._html_search_meta('og:image', full_webpage, 'thumbnail url')
|
||||
if not thumbnail:
|
||||
thumbnail = '%s%s' % (base_url, self._html_search_regex(r'<video[^>]+poster="([^"]+)"', webpage, 'thumbnail url', fatal=False))
|
||||
thumbnail = remove_end(thumbnail, '%3Asmall')
|
||||
|
||||
thumbnails = []
|
||||
thumbnail_ids = ('thumb', 'small', 'large', 'medium', 'orig')
|
||||
|
||||
148
yt_dlp/extractor/palcomp3.py
Normal file
148
yt_dlp/extractor/palcomp3.py
Normal file
@@ -0,0 +1,148 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class PalcoMP3BaseIE(InfoExtractor):
|
||||
_GQL_QUERY_TMPL = '''{
|
||||
artist(slug: "%s") {
|
||||
%s
|
||||
}
|
||||
}'''
|
||||
_ARTIST_FIELDS_TMPL = '''music(slug: "%%s") {
|
||||
%s
|
||||
}'''
|
||||
_MUSIC_FIELDS = '''duration
|
||||
hls
|
||||
mp3File
|
||||
musicID
|
||||
plays
|
||||
title'''
|
||||
|
||||
def _call_api(self, artist_slug, artist_fields):
|
||||
return self._download_json(
|
||||
'https://www.palcomp3.com.br/graphql/', artist_slug, query={
|
||||
'query': self._GQL_QUERY_TMPL % (artist_slug, artist_fields),
|
||||
})['data']
|
||||
|
||||
def _parse_music(self, music):
|
||||
music_id = compat_str(music['musicID'])
|
||||
title = music['title']
|
||||
|
||||
formats = []
|
||||
hls_url = music.get('hls')
|
||||
if hls_url:
|
||||
formats.append({
|
||||
'url': hls_url,
|
||||
'protocol': 'm3u8_native',
|
||||
'ext': 'mp4',
|
||||
})
|
||||
mp3_file = music.get('mp3File')
|
||||
if mp3_file:
|
||||
formats.append({
|
||||
'url': mp3_file,
|
||||
})
|
||||
|
||||
return {
|
||||
'id': music_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'duration': int_or_none(music.get('duration')),
|
||||
'view_count': int_or_none(music.get('plays')),
|
||||
}
|
||||
|
||||
def _real_initialize(self):
|
||||
self._ARTIST_FIELDS_TMPL = self._ARTIST_FIELDS_TMPL % self._MUSIC_FIELDS
|
||||
|
||||
def _real_extract(self, url):
|
||||
artist_slug, music_slug = re.match(self._VALID_URL, url).groups()
|
||||
artist_fields = self._ARTIST_FIELDS_TMPL % music_slug
|
||||
music = self._call_api(artist_slug, artist_fields)['artist']['music']
|
||||
return self._parse_music(music)
|
||||
|
||||
|
||||
class PalcoMP3IE(PalcoMP3BaseIE):
|
||||
IE_NAME = 'PalcoMP3:song'
|
||||
_VALID_URL = r'https?://(?:www\.)?palcomp3\.com(?:\.br)?/(?P<artist>[^/]+)/(?P<id>[^/?&#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.palcomp3.com/maiaraemaraisaoficial/nossas-composicoes-cuida-bem-dela/',
|
||||
'md5': '99fd6405b2d8fd589670f6db1ba3b358',
|
||||
'info_dict': {
|
||||
'id': '3162927',
|
||||
'ext': 'mp3',
|
||||
'title': 'Nossas Composições - CUIDA BEM DELA',
|
||||
'duration': 210,
|
||||
'view_count': int,
|
||||
}
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if PalcoMP3VideoIE.suitable(url) else super(PalcoMP3IE, cls).suitable(url)
|
||||
|
||||
|
||||
class PalcoMP3ArtistIE(PalcoMP3BaseIE):
|
||||
IE_NAME = 'PalcoMP3:artist'
|
||||
_VALID_URL = r'https?://(?:www\.)?palcomp3\.com(?:\.br)?/(?P<id>[^/?&#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.palcomp3.com.br/condedoforro/',
|
||||
'info_dict': {
|
||||
'id': '358396',
|
||||
'title': 'Conde do Forró',
|
||||
},
|
||||
'playlist_mincount': 188,
|
||||
}]
|
||||
_ARTIST_FIELDS_TMPL = '''artistID
|
||||
musics {
|
||||
nodes {
|
||||
%s
|
||||
}
|
||||
}
|
||||
name'''
|
||||
|
||||
@ classmethod
|
||||
def suitable(cls, url):
|
||||
return False if re.match(PalcoMP3IE._VALID_URL, url) else super(PalcoMP3ArtistIE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
artist_slug = self._match_id(url)
|
||||
artist = self._call_api(artist_slug, self._ARTIST_FIELDS_TMPL)['artist']
|
||||
|
||||
def entries():
|
||||
for music in (try_get(artist, lambda x: x['musics']['nodes'], list) or []):
|
||||
yield self._parse_music(music)
|
||||
|
||||
return self.playlist_result(
|
||||
entries(), str_or_none(artist.get('artistID')), artist.get('name'))
|
||||
|
||||
|
||||
class PalcoMP3VideoIE(PalcoMP3BaseIE):
|
||||
IE_NAME = 'PalcoMP3:video'
|
||||
_VALID_URL = r'https?://(?:www\.)?palcomp3\.com(?:\.br)?/(?P<artist>[^/]+)/(?P<id>[^/?&#]+)/?#clipe'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.palcomp3.com/maiaraemaraisaoficial/maiara-e-maraisa-voce-faz-falta-aqui-ao-vivo-em-vicosa-mg/#clipe',
|
||||
'add_ie': ['Youtube'],
|
||||
'info_dict': {
|
||||
'id': '_pD1nR2qqPg',
|
||||
'ext': 'mp4',
|
||||
'title': 'Maiara e Maraisa - Você Faz Falta Aqui - DVD Ao Vivo Em Campo Grande',
|
||||
'description': 'md5:7043342c09a224598e93546e98e49282',
|
||||
'upload_date': '20161107',
|
||||
'uploader_id': 'maiaramaraisaoficial',
|
||||
'uploader': 'Maiara e Maraisa',
|
||||
}
|
||||
}]
|
||||
_MUSIC_FIELDS = 'youtubeID'
|
||||
|
||||
def _parse_music(self, music):
|
||||
youtube_id = music['youtubeID']
|
||||
return self.url_result(youtube_id, 'Youtube', youtube_id)
|
||||
@@ -1,22 +1,15 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
js_to_json,
|
||||
try_get,
|
||||
update_url_query,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class PicartoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www.)?picarto\.tv/(?P<id>[a-zA-Z0-9]+)(?:/(?P<token>[a-zA-Z0-9]+))?'
|
||||
_VALID_URL = r'https?://(?:www.)?picarto\.tv/(?P<id>[a-zA-Z0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'https://picarto.tv/Setz',
|
||||
'info_dict': {
|
||||
@@ -34,65 +27,46 @@ class PicartoIE(InfoExtractor):
|
||||
return False if PicartoVodIE.suitable(url) else super(PicartoIE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
channel_id = mobj.group('id')
|
||||
channel_id = self._match_id(url)
|
||||
|
||||
metadata = self._download_json(
|
||||
'https://api.picarto.tv/v1/channel/name/' + channel_id,
|
||||
channel_id)
|
||||
data = self._download_json(
|
||||
'https://ptvintern.picarto.tv/ptvapi', channel_id, query={
|
||||
'query': '''{
|
||||
channel(name: "%s") {
|
||||
adult
|
||||
id
|
||||
online
|
||||
stream_name
|
||||
title
|
||||
}
|
||||
getLoadBalancerUrl(channel_name: "%s") {
|
||||
url
|
||||
}
|
||||
}''' % (channel_id, channel_id),
|
||||
})['data']
|
||||
metadata = data['channel']
|
||||
|
||||
if metadata.get('online') is False:
|
||||
if metadata.get('online') == 0:
|
||||
raise ExtractorError('Stream is offline', expected=True)
|
||||
title = metadata['title']
|
||||
|
||||
cdn_data = self._download_json(
|
||||
'https://picarto.tv/process/channel', channel_id,
|
||||
data=urlencode_postdata({'loadbalancinginfo': channel_id}),
|
||||
note='Downloading load balancing info')
|
||||
data['getLoadBalancerUrl']['url'] + '/stream/json_' + metadata['stream_name'] + '.js',
|
||||
channel_id, 'Downloading load balancing info')
|
||||
|
||||
token = mobj.group('token') or 'public'
|
||||
params = {
|
||||
'con': int(time.time() * 1000),
|
||||
'token': token,
|
||||
}
|
||||
|
||||
prefered_edge = cdn_data.get('preferedEdge')
|
||||
formats = []
|
||||
|
||||
for edge in cdn_data['edges']:
|
||||
edge_ep = edge.get('ep')
|
||||
if not edge_ep or not isinstance(edge_ep, compat_str):
|
||||
for source in (cdn_data.get('source') or []):
|
||||
source_url = source.get('url')
|
||||
if not source_url:
|
||||
continue
|
||||
edge_id = edge.get('id')
|
||||
for tech in cdn_data['techs']:
|
||||
tech_label = tech.get('label')
|
||||
tech_type = tech.get('type')
|
||||
preference = 0
|
||||
if edge_id == prefered_edge:
|
||||
preference += 1
|
||||
format_id = []
|
||||
if edge_id:
|
||||
format_id.append(edge_id)
|
||||
if tech_type == 'application/x-mpegurl' or tech_label == 'HLS':
|
||||
format_id.append('hls')
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
update_url_query(
|
||||
'https://%s/hls/%s/index.m3u8'
|
||||
% (edge_ep, channel_id), params),
|
||||
channel_id, 'mp4', quality=preference,
|
||||
m3u8_id='-'.join(format_id), fatal=False))
|
||||
continue
|
||||
elif tech_type == 'video/mp4' or tech_label == 'MP4':
|
||||
format_id.append('mp4')
|
||||
formats.append({
|
||||
'url': update_url_query(
|
||||
'https://%s/mp4/%s.mp4' % (edge_ep, channel_id),
|
||||
params),
|
||||
'format_id': '-'.join(format_id),
|
||||
'quality': preference,
|
||||
})
|
||||
else:
|
||||
# rtmp format does not seem to work
|
||||
continue
|
||||
source_type = source.get('type')
|
||||
if source_type == 'html5/application/vnd.apple.mpegurl':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
source_url, channel_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
elif source_type == 'html5/video/mp4':
|
||||
formats.append({
|
||||
'url': source_url,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
mature = metadata.get('adult')
|
||||
@@ -103,10 +77,10 @@ class PicartoIE(InfoExtractor):
|
||||
|
||||
return {
|
||||
'id': channel_id,
|
||||
'title': self._live_title(metadata.get('title') or channel_id),
|
||||
'title': self._live_title(title.strip()),
|
||||
'is_live': True,
|
||||
'thumbnail': try_get(metadata, lambda x: x['thumbnails']['web']),
|
||||
'channel': channel_id,
|
||||
'channel_id': metadata.get('id'),
|
||||
'channel_url': 'https://picarto.tv/%s' % channel_id,
|
||||
'age_limit': age_limit,
|
||||
'formats': formats,
|
||||
|
||||
@@ -398,6 +398,16 @@ class PornHubIE(PornHubBaseIE):
|
||||
formats = []
|
||||
|
||||
def add_format(format_url, height=None):
|
||||
ext = determine_ext(format_url)
|
||||
if ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
format_url, video_id, mpd_id='dash', fatal=False))
|
||||
return
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
return
|
||||
tbr = None
|
||||
mobj = re.search(r'(?P<height>\d+)[pP]?_(?P<tbr>\d+)[kK]', format_url)
|
||||
if mobj:
|
||||
@@ -417,16 +427,6 @@ class PornHubIE(PornHubBaseIE):
|
||||
r'/(\d{6}/\d{2})/', video_url, 'upload data', default=None)
|
||||
if upload_date:
|
||||
upload_date = upload_date.replace('/', '')
|
||||
ext = determine_ext(video_url)
|
||||
if ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
video_url, video_id, mpd_id='dash', fatal=False))
|
||||
continue
|
||||
elif ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
video_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
continue
|
||||
if '/video/get_media' in video_url:
|
||||
medias = self._download_json(video_url, video_id, fatal=False)
|
||||
if isinstance(medias, list):
|
||||
|
||||
@@ -10,7 +10,7 @@ from ..utils import (
|
||||
|
||||
class SBSIE(InfoExtractor):
|
||||
IE_DESC = 'sbs.com.au'
|
||||
_VALID_URL = r'https?://(?:www\.)?sbs\.com\.au/(?:ondemand(?:/video/(?:single/)?|.*?\bplay=)|news/(?:embeds/)?video/)(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?sbs\.com\.au/(?:ondemand(?:/video/(?:single/)?|.*?\bplay=|/watch/)|news/(?:embeds/)?video/)(?P<id>[0-9]+)'
|
||||
|
||||
_TESTS = [{
|
||||
# Original URL is handled by the generic IE which finds the iframe:
|
||||
@@ -43,6 +43,9 @@ class SBSIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.sbs.com.au/news/embeds/video/1840778819866',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.sbs.com.au/ondemand/watch/1698704451971',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -2,12 +2,18 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import js_to_json
|
||||
from ..utils import (
|
||||
get_element_by_class,
|
||||
int_or_none,
|
||||
remove_start,
|
||||
strip_or_none,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class ScreencastOMaticIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://screencast-o-matic\.com/watch/(?P<id>[0-9a-zA-Z]+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://screencast-o-matic\.com/(?:(?:watch|player)/|embed\?.*?\bsc=)(?P<id>[0-9a-zA-Z]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://screencast-o-matic.com/watch/c2lD3BeOPl',
|
||||
'md5': '483583cb80d92588f15ccbedd90f0c18',
|
||||
'info_dict': {
|
||||
@@ -16,22 +22,30 @@ class ScreencastOMaticIE(InfoExtractor):
|
||||
'title': 'Welcome to 3-4 Philosophy @ DECV!',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'description': 'as the title says! also: some general info re 1) VCE philosophy and 2) distance learning.',
|
||||
'duration': 369.163,
|
||||
'duration': 369,
|
||||
'upload_date': '20141216',
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'http://screencast-o-matic.com/player/c2lD3BeOPl',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://screencast-o-matic.com/embed?ff=true&sc=cbV2r4Q5TL&fromPH=true&a=1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
jwplayer_data = self._parse_json(
|
||||
self._search_regex(
|
||||
r"(?s)jwplayer\('mp4Player'\).setup\((\{.*?\})\);", webpage, 'setup code'),
|
||||
video_id, transform_source=js_to_json)
|
||||
|
||||
info_dict = self._parse_jwplayer_data(jwplayer_data, video_id, require_title=False)
|
||||
info_dict.update({
|
||||
'title': self._og_search_title(webpage),
|
||||
'description': self._og_search_description(webpage),
|
||||
webpage = self._download_webpage(
|
||||
'https://screencast-o-matic.com/player/' + video_id, video_id)
|
||||
info = self._parse_html5_media_entries(url, webpage, video_id)[0]
|
||||
info.update({
|
||||
'id': video_id,
|
||||
'title': get_element_by_class('overlayTitle', webpage),
|
||||
'description': strip_or_none(get_element_by_class('overlayDescription', webpage)) or None,
|
||||
'duration': int_or_none(self._search_regex(
|
||||
r'player\.duration\s*=\s*function\(\)\s*{\s*return\s+(\d+);\s*};',
|
||||
webpage, 'duration', default=None)),
|
||||
'upload_date': unified_strdate(remove_start(
|
||||
get_element_by_class('overlayPublished', webpage), 'Published: ')),
|
||||
})
|
||||
return info_dict
|
||||
return info
|
||||
|
||||
@@ -24,6 +24,7 @@ from ..utils import (
|
||||
merge_dicts,
|
||||
OnDemandPagedList,
|
||||
parse_filesize,
|
||||
parse_iso8601,
|
||||
RegexNotFoundError,
|
||||
sanitized_Request,
|
||||
smuggle_url,
|
||||
@@ -74,25 +75,28 @@ class VimeoBaseInfoExtractor(InfoExtractor):
|
||||
expected=True)
|
||||
raise ExtractorError('Unable to log in')
|
||||
|
||||
def _verify_video_password(self, url, video_id, webpage):
|
||||
def _get_video_password(self):
|
||||
password = self._downloader.params.get('videopassword')
|
||||
if password is None:
|
||||
raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True)
|
||||
token, vuid = self._extract_xsrft_and_vuid(webpage)
|
||||
data = urlencode_postdata({
|
||||
'password': password,
|
||||
'token': token,
|
||||
})
|
||||
raise ExtractorError(
|
||||
'This video is protected by a password, use the --video-password option',
|
||||
expected=True)
|
||||
return password
|
||||
|
||||
def _verify_video_password(self, url, video_id, password, token, vuid):
|
||||
if url.startswith('http://'):
|
||||
# vimeo only supports https now, but the user can give an http url
|
||||
url = url.replace('http://', 'https://')
|
||||
password_request = sanitized_Request(url + '/password', data)
|
||||
password_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
password_request.add_header('Referer', url)
|
||||
self._set_vimeo_cookie('vuid', vuid)
|
||||
return self._download_webpage(
|
||||
password_request, video_id,
|
||||
'Verifying the password', 'Wrong password')
|
||||
url + '/password', video_id, 'Verifying the password',
|
||||
'Wrong password', data=urlencode_postdata({
|
||||
'password': password,
|
||||
'token': token,
|
||||
}), headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
'Referer': url,
|
||||
})
|
||||
|
||||
def _extract_xsrft_and_vuid(self, webpage):
|
||||
xsrft = self._search_regex(
|
||||
@@ -273,7 +277,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
)?
|
||||
(?:videos?/)?
|
||||
(?P<id>[0-9]+)
|
||||
(?:/[\da-f]+)?
|
||||
(?:/(?P<unlisted_hash>[\da-f]{10}))?
|
||||
/?(?:[?&].*)?(?:[#].*)?$
|
||||
'''
|
||||
IE_NAME = 'vimeo'
|
||||
@@ -326,9 +330,9 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'id': '54469442',
|
||||
'ext': 'mp4',
|
||||
'title': 'Kathy Sierra: Building the minimum Badass User, Business of Software 2012',
|
||||
'uploader': 'The BLN & Business of Software',
|
||||
'uploader_url': r're:https?://(?:www\.)?vimeo\.com/theblnbusinessofsoftware',
|
||||
'uploader_id': 'theblnbusinessofsoftware',
|
||||
'uploader': 'Business of Software',
|
||||
'uploader_url': r're:https?://(?:www\.)?vimeo\.com/businessofsoftware',
|
||||
'uploader_id': 'businessofsoftware',
|
||||
'duration': 3610,
|
||||
'description': None,
|
||||
},
|
||||
@@ -463,6 +467,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['Unable to download JSON metadata'],
|
||||
'skip': 'this page is no longer available.',
|
||||
},
|
||||
{
|
||||
'url': 'http://player.vimeo.com/video/68375962',
|
||||
@@ -563,9 +568,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
return urls[0] if urls else None
|
||||
|
||||
def _verify_player_video_password(self, url, video_id, headers):
|
||||
password = self._downloader.params.get('videopassword')
|
||||
if password is None:
|
||||
raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True)
|
||||
password = self._get_video_password()
|
||||
data = urlencode_postdata({
|
||||
'password': base64.b64encode(password.encode()),
|
||||
})
|
||||
@@ -628,11 +631,37 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
if 'Referer' not in headers:
|
||||
headers['Referer'] = url
|
||||
|
||||
channel_id = self._search_regex(
|
||||
r'vimeo\.com/channels/([^/]+)', url, 'channel id', default=None)
|
||||
|
||||
# Extract ID from URL
|
||||
video_id = self._match_id(url)
|
||||
video_id, unlisted_hash = re.match(self._VALID_URL, url).groups()
|
||||
if unlisted_hash:
|
||||
token = self._download_json(
|
||||
'https://vimeo.com/_rv/jwt', video_id, headers={
|
||||
'X-Requested-With': 'XMLHttpRequest'
|
||||
})['token']
|
||||
video = self._download_json(
|
||||
'https://api.vimeo.com/videos/%s:%s' % (video_id, unlisted_hash),
|
||||
video_id, headers={
|
||||
'Authorization': 'jwt ' + token,
|
||||
}, query={
|
||||
'fields': 'config_url,created_time,description,license,metadata.connections.comments.total,metadata.connections.likes.total,release_time,stats.plays',
|
||||
})
|
||||
info = self._parse_config(self._download_json(
|
||||
video['config_url'], video_id), video_id)
|
||||
self._vimeo_sort_formats(info['formats'])
|
||||
get_timestamp = lambda x: parse_iso8601(video.get(x + '_time'))
|
||||
info.update({
|
||||
'description': video.get('description'),
|
||||
'license': video.get('license'),
|
||||
'release_timestamp': get_timestamp('release'),
|
||||
'timestamp': get_timestamp('created'),
|
||||
'view_count': int_or_none(try_get(video, lambda x: x['stats']['plays'])),
|
||||
})
|
||||
connections = try_get(
|
||||
video, lambda x: x['metadata']['connections'], dict) or {}
|
||||
for k in ('comment', 'like'):
|
||||
info[k + '_count'] = int_or_none(try_get(connections, lambda x: x[k + 's']['total']))
|
||||
return info
|
||||
|
||||
orig_url = url
|
||||
is_pro = 'vimeopro.com/' in url
|
||||
is_player = '://player.vimeo.com/video/' in url
|
||||
@@ -722,7 +751,10 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
if re.search(r'<form[^>]+?id="pw_form"', webpage) is not None:
|
||||
if '_video_password_verified' in data:
|
||||
raise ExtractorError('video password verification failed!')
|
||||
self._verify_video_password(redirect_url, video_id, webpage)
|
||||
video_password = self._get_video_password()
|
||||
token, vuid = self._extract_xsrft_and_vuid(webpage)
|
||||
self._verify_video_password(
|
||||
redirect_url, video_id, video_password, token, vuid)
|
||||
return self._real_extract(
|
||||
smuggle_url(redirect_url, {'_video_password_verified': 'verified'}))
|
||||
else:
|
||||
@@ -808,6 +840,8 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
r'<link[^>]+rel=["\']license["\'][^>]+href=(["\'])(?P<license>(?:(?!\1).)+)\1',
|
||||
webpage, 'license', default=None, group='license')
|
||||
|
||||
channel_id = self._search_regex(
|
||||
r'vimeo\.com/channels/([^/]+)', url, 'channel id', default=None)
|
||||
channel_url = 'https://vimeo.com/channels/%s' % channel_id if channel_id else None
|
||||
|
||||
info_dict = {
|
||||
@@ -1114,10 +1148,23 @@ class VimeoReviewIE(VimeoBaseInfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
page_url, video_id = re.match(self._VALID_URL, url).groups()
|
||||
clip_data = self._download_json(
|
||||
page_url.replace('/review/', '/review/data/'),
|
||||
video_id)['clipData']
|
||||
config_url = clip_data['configUrl']
|
||||
data = self._download_json(
|
||||
page_url.replace('/review/', '/review/data/'), video_id)
|
||||
if data.get('isLocked') is True:
|
||||
video_password = self._get_video_password()
|
||||
viewer = self._download_json(
|
||||
'https://vimeo.com/_rv/viewer', video_id)
|
||||
webpage = self._verify_video_password(
|
||||
'https://vimeo.com/' + video_id, video_id,
|
||||
video_password, viewer['xsrft'], viewer['vuid'])
|
||||
clip_page_config = self._parse_json(self._search_regex(
|
||||
r'window\.vimeo\.clip_page_config\s*=\s*({.+?});',
|
||||
webpage, 'clip page config'), video_id)
|
||||
config_url = clip_page_config['player']['config_url']
|
||||
clip_data = clip_page_config.get('clip') or {}
|
||||
else:
|
||||
clip_data = data['clipData']
|
||||
config_url = clip_data['configUrl']
|
||||
config = self._download_json(config_url, video_id)
|
||||
info_dict = self._parse_config(config, video_id)
|
||||
source_format = self._extract_original_format(
|
||||
|
||||
@@ -113,7 +113,7 @@ class VLiveIE(VLiveBaseIE):
|
||||
raise ExtractorError('Unable to log in', expected=True)
|
||||
|
||||
def _call_api(self, path_template, video_id, fields=None, limit=None):
|
||||
query = {'appId': self._APP_ID, 'gcc': 'KR'}
|
||||
query = {'appId': self._APP_ID, 'gcc': 'KR', 'platformType': 'PC'}
|
||||
if fields:
|
||||
query['fields'] = fields
|
||||
if limit:
|
||||
|
||||
@@ -154,7 +154,7 @@ class YoukuIE(InfoExtractor):
|
||||
# request basic data
|
||||
basic_data_params = {
|
||||
'vid': video_id,
|
||||
'ccode': '0590',
|
||||
'ccode': '0532',
|
||||
'client_ip': '192.168.1.1',
|
||||
'utid': cna,
|
||||
'client_ts': time.time() / 1000,
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import calendar
|
||||
import hashlib
|
||||
import itertools
|
||||
import json
|
||||
@@ -15,7 +16,6 @@ from .common import InfoExtractor, SearchInfoExtractor
|
||||
from ..compat import (
|
||||
compat_chr,
|
||||
compat_HTTPError,
|
||||
compat_kwargs,
|
||||
compat_parse_qs,
|
||||
compat_str,
|
||||
compat_urllib_parse_unquote_plus,
|
||||
@@ -28,6 +28,7 @@ from ..utils import (
|
||||
bool_or_none,
|
||||
clean_html,
|
||||
dict_get,
|
||||
datetime_from_str,
|
||||
ExtractorError,
|
||||
format_field,
|
||||
float_or_none,
|
||||
@@ -47,7 +48,7 @@ from ..utils import (
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
urljoin
|
||||
)
|
||||
|
||||
|
||||
@@ -261,33 +262,30 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
|
||||
return True
|
||||
|
||||
def _download_webpage_handle(self, *args, **kwargs):
|
||||
query = kwargs.get('query', {}).copy()
|
||||
kwargs['query'] = query
|
||||
return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
|
||||
*args, **compat_kwargs(kwargs))
|
||||
def _initialize_consent(self):
|
||||
cookies = self._get_cookies('https://www.youtube.com/')
|
||||
if cookies.get('__Secure-3PSID'):
|
||||
return
|
||||
consent_id = None
|
||||
consent = cookies.get('CONSENT')
|
||||
if consent:
|
||||
if 'YES' in consent.value:
|
||||
return
|
||||
consent_id = self._search_regex(
|
||||
r'PENDING\+(\d+)', consent.value, 'consent', default=None)
|
||||
if not consent_id:
|
||||
consent_id = random.randint(100, 999)
|
||||
self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
|
||||
|
||||
def _real_initialize(self):
|
||||
self._initialize_consent()
|
||||
if self._downloader is None:
|
||||
return
|
||||
if not self._login():
|
||||
return
|
||||
|
||||
_YT_WEB_CLIENT_VERSION = '2.20210301.08.00'
|
||||
_DEFAULT_API_DATA = {
|
||||
'context': {
|
||||
'client': {
|
||||
'clientName': 'WEB',
|
||||
'clientVersion': _YT_WEB_CLIENT_VERSION,
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
_DEFAULT_BASIC_API_HEADERS = {
|
||||
'X-YouTube-Client-Name': '1',
|
||||
'X-YouTube-Client-Version': _YT_WEB_CLIENT_VERSION
|
||||
}
|
||||
|
||||
_YT_WEB_CLIENT_VERSION = '2.20210407.08.00'
|
||||
_YT_INNERTUBE_API_KEY = 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
|
||||
_YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
|
||||
_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
|
||||
_YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
|
||||
@@ -301,19 +299,23 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
return "SAPISIDHASH %s_%s" % (time_now, sapisidhash)
|
||||
|
||||
def _call_api(self, ep, query, video_id, fatal=True, headers=None,
|
||||
note='Downloading API JSON', errnote='Unable to download API page'):
|
||||
data = self._DEFAULT_API_DATA.copy()
|
||||
note='Downloading API JSON', errnote='Unable to download API page',
|
||||
context=None, api_key=None):
|
||||
|
||||
data = {'context': context} if context else {'context': self._extract_context()}
|
||||
data.update(query)
|
||||
headers = headers or {}
|
||||
headers.update({'content-type': 'application/json'})
|
||||
auth = self._generate_sapisidhash_header()
|
||||
if auth is not None:
|
||||
headers.update({'Authorization': auth, 'X-Origin': 'https://www.youtube.com'})
|
||||
real_headers = self._generate_api_headers()
|
||||
real_headers.update({'content-type': 'application/json'})
|
||||
if headers:
|
||||
real_headers.update(headers)
|
||||
return self._download_json(
|
||||
'https://www.youtube.com/youtubei/v1/%s' % ep,
|
||||
video_id=video_id, fatal=fatal, note=note, errnote=errnote,
|
||||
data=json.dumps(data).encode('utf8'), headers=headers,
|
||||
query={'key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'})
|
||||
data=json.dumps(data).encode('utf8'), headers=real_headers,
|
||||
query={'key': api_key or self._extract_api_key()})
|
||||
|
||||
def _extract_api_key(self, ytcfg=None):
|
||||
return try_get(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str) or self._YT_INNERTUBE_API_KEY
|
||||
|
||||
def _extract_yt_initial_data(self, video_id, webpage):
|
||||
return self._parse_json(
|
||||
@@ -347,7 +349,47 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
return self._parse_json(
|
||||
self._search_regex(
|
||||
r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
|
||||
default='{}'), video_id, fatal=False)
|
||||
default='{}'), video_id, fatal=False) or {}
|
||||
|
||||
def __extract_client_version(self, ytcfg):
|
||||
return try_get(ytcfg, lambda x: x['INNERTUBE_CLIENT_VERSION'], compat_str) or self._YT_WEB_CLIENT_VERSION
|
||||
|
||||
def _extract_context(self, ytcfg=None):
|
||||
context = try_get(ytcfg, lambda x: x['INNERTUBE_CONTEXT'], dict)
|
||||
if context:
|
||||
return context
|
||||
|
||||
# Recreate the client context (required)
|
||||
client_version = self.__extract_client_version(ytcfg)
|
||||
client_name = try_get(ytcfg, lambda x: x['INNERTUBE_CLIENT_NAME'], compat_str) or 'WEB'
|
||||
context = {
|
||||
'client': {
|
||||
'clientName': client_name,
|
||||
'clientVersion': client_version,
|
||||
}
|
||||
}
|
||||
visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
|
||||
if visitor_data:
|
||||
context['client']['visitorData'] = visitor_data
|
||||
return context
|
||||
|
||||
def _generate_api_headers(self, ytcfg=None, identity_token=None, account_syncid=None, visitor_data=None):
|
||||
headers = {
|
||||
'X-YouTube-Client-Name': '1',
|
||||
'X-YouTube-Client-Version': self.__extract_client_version(ytcfg),
|
||||
}
|
||||
if identity_token:
|
||||
headers['x-youtube-identity-token'] = identity_token
|
||||
if account_syncid:
|
||||
headers['X-Goog-PageId'] = account_syncid
|
||||
headers['X-Goog-AuthUser'] = 0
|
||||
if visitor_data:
|
||||
headers['x-goog-visitor-id'] = visitor_data
|
||||
auth = self._generate_sapisidhash_header()
|
||||
if auth is not None:
|
||||
headers['Authorization'] = auth
|
||||
headers['X-Origin'] = 'https://www.youtube.com'
|
||||
return headers
|
||||
|
||||
def _extract_video(self, renderer):
|
||||
video_id = renderer.get('videoId')
|
||||
@@ -370,7 +412,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
(lambda x: x['ownerText']['runs'][0]['text'],
|
||||
lambda x: x['shortBylineText']['runs'][0]['text']), compat_str)
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'_type': 'url',
|
||||
'ie_key': YoutubeIE.ie_key(),
|
||||
'id': video_id,
|
||||
'url': video_id,
|
||||
@@ -1237,6 +1279,23 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
# restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
|
||||
'url': 'cBvYw8_A0vQ',
|
||||
'info_dict': {
|
||||
'id': 'cBvYw8_A0vQ',
|
||||
'ext': 'mp4',
|
||||
'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
|
||||
'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
|
||||
'upload_date': '20201120',
|
||||
'uploader': 'Walk around Japan',
|
||||
'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
|
||||
'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
@@ -1490,6 +1549,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
(r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
|
||||
regex), webpage, name, default='{}'), video_id, fatal=False)
|
||||
|
||||
@staticmethod
|
||||
def parse_time_text(time_text):
|
||||
"""
|
||||
Parse the comment time text
|
||||
time_text is in the format 'X units ago (edited)'
|
||||
"""
|
||||
time_text_split = time_text.split(' ')
|
||||
if len(time_text_split) >= 3:
|
||||
return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
|
||||
|
||||
@staticmethod
|
||||
def _join_text_entries(runs):
|
||||
text = None
|
||||
@@ -1512,7 +1581,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
text = self._join_text_entries(comment_text_runs) or ''
|
||||
comment_time_text = try_get(comment_renderer, lambda x: x['publishedTimeText']['runs']) or []
|
||||
time_text = self._join_text_entries(comment_time_text)
|
||||
|
||||
timestamp = calendar.timegm(self.parse_time_text(time_text).timetuple())
|
||||
author = try_get(comment_renderer, lambda x: x['authorText']['simpleText'], compat_str)
|
||||
author_id = try_get(comment_renderer,
|
||||
lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
|
||||
@@ -1523,11 +1592,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
|
||||
author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
|
||||
is_liked = try_get(comment_renderer, lambda x: x['isLiked'], bool)
|
||||
|
||||
return {
|
||||
'id': comment_id,
|
||||
'text': text,
|
||||
# TODO: This should be parsed to timestamp
|
||||
'timestamp': timestamp,
|
||||
'time_text': time_text,
|
||||
'like_count': votes,
|
||||
'is_favorited': is_liked,
|
||||
@@ -1539,7 +1607,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
}
|
||||
|
||||
def _comment_entries(self, root_continuation_data, identity_token, account_syncid,
|
||||
session_token_list, parent=None, comment_counts=None):
|
||||
ytcfg, session_token_list, parent=None, comment_counts=None):
|
||||
|
||||
def extract_thread(parent_renderer):
|
||||
contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
|
||||
@@ -1565,7 +1633,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
if comment_replies_renderer:
|
||||
comment_counts[2] += 1
|
||||
comment_entries_iter = self._comment_entries(
|
||||
comment_replies_renderer, identity_token, account_syncid,
|
||||
comment_replies_renderer, identity_token, account_syncid, ytcfg,
|
||||
parent=comment.get('id'), session_token_list=session_token_list,
|
||||
comment_counts=comment_counts)
|
||||
|
||||
@@ -1575,16 +1643,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
if not comment_counts:
|
||||
# comment so far, est. total comments, current comment thread #
|
||||
comment_counts = [0, 0, 0]
|
||||
headers = self._DEFAULT_BASIC_API_HEADERS.copy()
|
||||
|
||||
# TODO: Generalize the download code with TabIE
|
||||
if identity_token:
|
||||
headers['x-youtube-identity-token'] = identity_token
|
||||
|
||||
if account_syncid:
|
||||
headers['X-Goog-PageId'] = account_syncid
|
||||
headers['X-Goog-AuthUser'] = 0
|
||||
|
||||
context = self._extract_context(ytcfg)
|
||||
visitor_data = try_get(context, lambda x: x['client']['visitorData'], compat_str)
|
||||
continuation = YoutubeTabIE._extract_continuation(root_continuation_data) # TODO
|
||||
first_continuation = False
|
||||
if parent is None:
|
||||
@@ -1593,6 +1655,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
for page_num in itertools.count(0):
|
||||
if not continuation:
|
||||
break
|
||||
headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
|
||||
retries = self._downloader.params.get('extractor_retries', 3)
|
||||
count = -1
|
||||
last_error = None
|
||||
@@ -1615,12 +1678,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
|
||||
if page_num == 0:
|
||||
if first_continuation:
|
||||
note_prefix = "Downloading initial comment continuation page"
|
||||
note_prefix = 'Downloading initial comment continuation page'
|
||||
else:
|
||||
note_prefix = " Downloading comment reply thread %d %s" % (comment_counts[2], comment_prog_str)
|
||||
note_prefix = ' Downloading comment reply thread %d %s' % (comment_counts[2], comment_prog_str)
|
||||
else:
|
||||
note_prefix = "%sDownloading comment%s page %d %s" % (
|
||||
" " if parent else "",
|
||||
note_prefix = '%sDownloading comment%s page %d %s' % (
|
||||
' ' if parent else '',
|
||||
' replies' if parent else '',
|
||||
page_num,
|
||||
comment_prog_str)
|
||||
@@ -1635,13 +1698,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404, 413):
|
||||
if e.cause.code == 413:
|
||||
self.report_warning("Assumed end of comments (received HTTP Error 413)")
|
||||
self.report_warning('Assumed end of comments (received HTTP Error 413)')
|
||||
return
|
||||
# Downloading page may result in intermittent 5xx HTTP error
|
||||
# Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
|
||||
last_error = 'HTTP Error %s' % e.cause.code
|
||||
if e.cause.code == 404:
|
||||
last_error = last_error + " (this API is probably deprecated)"
|
||||
last_error = last_error + ' (this API is probably deprecated)'
|
||||
if count < retries:
|
||||
continue
|
||||
raise
|
||||
@@ -1659,7 +1722,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
|
||||
# YouTube sometimes gives reload: now json if something went wrong (e.g. bad auth)
|
||||
if browse.get('reload'):
|
||||
raise ExtractorError("Invalid or missing params in continuation request", expected=False)
|
||||
raise ExtractorError('Invalid or missing params in continuation request', expected=False)
|
||||
|
||||
# TODO: not tested, merged from old extractor
|
||||
err_msg = browse.get('externalErrorMessage')
|
||||
@@ -1674,6 +1737,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
|
||||
if not response:
|
||||
break
|
||||
visitor_data = try_get(
|
||||
response,
|
||||
lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
|
||||
compat_str) or visitor_data
|
||||
|
||||
known_continuation_renderers = {
|
||||
'itemSectionContinuation': extract_thread,
|
||||
@@ -1699,7 +1766,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
|
||||
if expected_comment_count:
|
||||
comment_counts[1] = str_to_int(expected_comment_count)
|
||||
self.to_screen("Downloading ~%d comments" % str_to_int(expected_comment_count))
|
||||
self.to_screen('Downloading ~%d comments' % str_to_int(expected_comment_count))
|
||||
yield comment_counts[1]
|
||||
|
||||
# TODO: cli arg.
|
||||
@@ -1715,7 +1782,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
continuation = YoutubeTabIE._build_continuation_query(
|
||||
continuation=sort_continuation_renderer.get('continuation'),
|
||||
ctp=sort_continuation_renderer.get('clickTrackingParams'))
|
||||
self.to_screen("Sorting comments by %s" % ('popular' if comment_sort_index == 0 else 'newest'))
|
||||
self.to_screen('Sorting comments by %s' % ('popular' if comment_sort_index == 0 else 'newest'))
|
||||
break
|
||||
|
||||
for entry in known_continuation_renderers[key](continuation_renderer):
|
||||
@@ -1740,6 +1807,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
renderer,
|
||||
identity_token=self._extract_identity_token(webpage, item_id=video_id),
|
||||
account_syncid=self._extract_account_syncid(ytcfg),
|
||||
ytcfg=ytcfg,
|
||||
session_token_list=[xsrf_token])
|
||||
|
||||
for comment in comment_iter:
|
||||
@@ -1748,7 +1816,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
continue
|
||||
comments.append(comment)
|
||||
break
|
||||
self.to_screen("Downloaded %d/%d comments" % (len(comments), estimated_total))
|
||||
self.to_screen('Downloaded %d/%d comments' % (len(comments), estimated_total))
|
||||
return {
|
||||
'comments': comments,
|
||||
'comment_count': len(comments),
|
||||
@@ -1760,17 +1828,18 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
base_url = self.http_scheme() + '//www.youtube.com/'
|
||||
webpage_url = base_url + 'watch?v=' + video_id
|
||||
webpage = self._download_webpage(
|
||||
webpage_url + '&has_verified=1&bpctr=9999999999',
|
||||
video_id, fatal=False)
|
||||
webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
|
||||
|
||||
player_response = None
|
||||
if webpage:
|
||||
player_response = self._extract_yt_initial_variable(
|
||||
webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
|
||||
video_id, 'initial player response')
|
||||
|
||||
ytcfg = self._extract_ytcfg(video_id, webpage)
|
||||
if not player_response:
|
||||
player_response = self._call_api(
|
||||
'player', {'videoId': video_id}, video_id)
|
||||
'player', {'videoId': video_id}, video_id, api_key=self._extract_api_key(ytcfg))
|
||||
|
||||
playability_status = player_response.get('playabilityStatus') or {}
|
||||
if playability_status.get('reason') == 'Sign in to confirm your age':
|
||||
@@ -1798,7 +1867,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
def get_text(x):
|
||||
if not x:
|
||||
return
|
||||
return x.get('simpleText') or ''.join([r['text'] for r in x['runs']])
|
||||
text = x.get('simpleText')
|
||||
if text and isinstance(text, compat_str):
|
||||
return text
|
||||
runs = x.get('runs')
|
||||
if not isinstance(runs, list):
|
||||
return
|
||||
return ''.join([r['text'] for r in runs if isinstance(r.get('text'), compat_str)])
|
||||
|
||||
search_meta = (
|
||||
lambda x: self._html_search_meta(x, webpage, default=None)) \
|
||||
@@ -2015,6 +2090,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
thumbnail_url = thumbnail.get('url')
|
||||
if not thumbnail_url:
|
||||
continue
|
||||
# Sometimes youtube gives a wrong thumbnail URL. See:
|
||||
# https://github.com/yt-dlp/yt-dlp/issues/233
|
||||
# https://github.com/ytdl-org/youtube-dl/issues/28023
|
||||
if 'maxresdefault' in thumbnail_url:
|
||||
thumbnail_url = thumbnail_url.split('?')[0]
|
||||
thumbnails.append({
|
||||
'height': int_or_none(thumbnail.get('height')),
|
||||
'url': thumbnail_url,
|
||||
@@ -2143,7 +2223,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'yt initial data')
|
||||
if not initial_data:
|
||||
initial_data = self._call_api(
|
||||
'next', {'videoId': video_id}, video_id, fatal=False)
|
||||
'next', {'videoId': video_id}, video_id, fatal=False, api_key=self._extract_api_key(ytcfg))
|
||||
|
||||
if not is_live:
|
||||
try:
|
||||
@@ -2244,7 +2324,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
info['channel'] = get_text(try_get(
|
||||
vsir,
|
||||
lambda x: x['owner']['videoOwnerRenderer']['title'],
|
||||
compat_str))
|
||||
dict))
|
||||
rows = try_get(
|
||||
vsir,
|
||||
lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
|
||||
@@ -2288,8 +2368,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
is_private = bool_or_none(video_details.get('isPrivate'))
|
||||
is_unlisted = bool_or_none(microformat.get('isUnlisted'))
|
||||
is_membersonly = None
|
||||
is_premium = None
|
||||
if initial_data and is_private is not None:
|
||||
is_membersonly = False
|
||||
is_premium = False
|
||||
contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list)
|
||||
for content in contents or []:
|
||||
badges = try_get(content, lambda x: x['videoPrimaryInfoRenderer']['badges'], list)
|
||||
@@ -2298,13 +2380,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
if label.lower() == 'members only':
|
||||
is_membersonly = True
|
||||
break
|
||||
if is_membersonly:
|
||||
elif label.lower() == 'premium':
|
||||
is_premium = True
|
||||
break
|
||||
if is_membersonly or is_premium:
|
||||
break
|
||||
|
||||
# TODO: Add this for playlists
|
||||
info['availability'] = self._availability(
|
||||
is_private=is_private,
|
||||
needs_premium=False, # Youtube no longer have premium-only videos?
|
||||
needs_premium=is_premium,
|
||||
needs_subscription=is_membersonly,
|
||||
needs_auth=info['age_limit'] >= 18,
|
||||
is_unlisted=None if is_private is None else is_unlisted)
|
||||
@@ -2668,6 +2753,13 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.youtube.com/TheYoungTurks/live',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.youtube.com/hashtag/cctv9',
|
||||
'info_dict': {
|
||||
'id': 'cctv9',
|
||||
'title': '#cctv9',
|
||||
},
|
||||
'playlist_mincount': 350,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
@@ -2828,6 +2920,16 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
||||
for entry in self._post_thread_entries(renderer):
|
||||
yield entry
|
||||
|
||||
r''' # unused
|
||||
def _rich_grid_entries(self, contents):
|
||||
for content in contents:
|
||||
video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
|
||||
if video_renderer:
|
||||
entry = self._video_entry(video_renderer)
|
||||
if entry:
|
||||
yield entry
|
||||
'''
|
||||
|
||||
@staticmethod
|
||||
def _build_continuation_query(continuation, ctp=None):
|
||||
query = {
|
||||
@@ -2873,7 +2975,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
||||
ctp = continuation_ep.get('clickTrackingParams')
|
||||
return YoutubeTabIE._build_continuation_query(continuation, ctp)
|
||||
|
||||
def _entries(self, tab, item_id, identity_token, account_syncid):
|
||||
def _entries(self, tab, item_id, identity_token, account_syncid, ytcfg):
|
||||
|
||||
def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds
|
||||
contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
|
||||
@@ -2925,21 +3027,13 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
||||
for entry in extract_entries(parent_renderer):
|
||||
yield entry
|
||||
continuation = continuation_list[0]
|
||||
|
||||
headers = {
|
||||
'x-youtube-client-name': '1',
|
||||
'x-youtube-client-version': '2.20201112.04.01',
|
||||
}
|
||||
if identity_token:
|
||||
headers['x-youtube-identity-token'] = identity_token
|
||||
|
||||
if account_syncid:
|
||||
headers['X-Goog-PageId'] = account_syncid
|
||||
headers['X-Goog-AuthUser'] = 0
|
||||
context = self._extract_context(ytcfg)
|
||||
visitor_data = try_get(context, lambda x: x['client']['visitorData'], compat_str)
|
||||
|
||||
for page_num in itertools.count(1):
|
||||
if not continuation:
|
||||
break
|
||||
headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
|
||||
retries = self._downloader.params.get('extractor_retries', 3)
|
||||
count = -1
|
||||
last_error = None
|
||||
@@ -2949,12 +3043,14 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
||||
self.report_warning('%s. Retrying ...' % last_error)
|
||||
try:
|
||||
response = self._call_api(
|
||||
ep="browse", fatal=True, headers=headers,
|
||||
ep='browse', fatal=True, headers=headers,
|
||||
video_id='%s page %s' % (item_id, page_num),
|
||||
query={
|
||||
'continuation': continuation['continuation'],
|
||||
'clickTracking': {'clickTrackingParams': continuation['itct']},
|
||||
},
|
||||
context=context,
|
||||
api_key=self._extract_api_key(ytcfg),
|
||||
note='Downloading API JSON%s' % (' (retry #%d)' % count if count else ''))
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404):
|
||||
@@ -2980,6 +3076,8 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
||||
|
||||
if not response:
|
||||
break
|
||||
visitor_data = try_get(
|
||||
response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
|
||||
|
||||
known_continuation_renderers = {
|
||||
'playlistVideoListContinuation': self._playlist_entries,
|
||||
@@ -3010,9 +3108,9 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
||||
'richItemRenderer': (extract_entries, 'contents'), # for hashtag
|
||||
'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
|
||||
}
|
||||
on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
|
||||
continuation_items = try_get(
|
||||
response,
|
||||
lambda x: dict_get(x, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))[0]['appendContinuationItemsAction']['continuationItems'], list)
|
||||
on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
|
||||
continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
|
||||
video_items_renderer = None
|
||||
for key, value in continuation_item.items():
|
||||
@@ -3070,10 +3168,10 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
||||
channel_name = renderer.get('title')
|
||||
channel_url = renderer.get('channelUrl')
|
||||
channel_id = renderer.get('externalId')
|
||||
|
||||
if not renderer:
|
||||
else:
|
||||
renderer = try_get(
|
||||
data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
|
||||
|
||||
if renderer:
|
||||
title = renderer.get('title')
|
||||
description = renderer.get('description', '')
|
||||
@@ -3099,11 +3197,12 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
||||
'width': int_or_none(t.get('width')),
|
||||
'height': int_or_none(t.get('height')),
|
||||
})
|
||||
|
||||
if playlist_id is None:
|
||||
playlist_id = item_id
|
||||
if title is None:
|
||||
title = playlist_id
|
||||
title = (
|
||||
try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
|
||||
or playlist_id)
|
||||
title += format_field(selected_tab, 'title', ' - %s')
|
||||
|
||||
metadata = {
|
||||
@@ -3126,7 +3225,8 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
||||
self._entries(
|
||||
selected_tab, playlist_id,
|
||||
self._extract_identity_token(webpage, item_id),
|
||||
self._extract_account_syncid(data)),
|
||||
self._extract_account_syncid(data),
|
||||
self._extract_ytcfg(item_id, webpage)),
|
||||
**metadata)
|
||||
|
||||
def _extract_mix_playlist(self, playlist, playlist_id):
|
||||
@@ -3180,25 +3280,26 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
||||
alert_type = alert.get('type')
|
||||
if not alert_type:
|
||||
continue
|
||||
message = try_get(alert, lambda x: x['text']['simpleText'], compat_str)
|
||||
message = try_get(alert, lambda x: x['text']['simpleText'], compat_str) or ''
|
||||
if message:
|
||||
yield alert_type, message
|
||||
for run in try_get(alert, lambda x: x['text']['runs'], list) or []:
|
||||
message = try_get(run, lambda x: x['text'], compat_str)
|
||||
if message:
|
||||
yield alert_type, message
|
||||
message += try_get(run, lambda x: x['text'], compat_str)
|
||||
if message:
|
||||
yield alert_type, message
|
||||
|
||||
err_msg = None
|
||||
errors = []
|
||||
warnings = []
|
||||
for alert_type, alert_message in _real_extract_alerts():
|
||||
if alert_type.lower() == 'error':
|
||||
if err_msg:
|
||||
self._downloader.report_warning('YouTube said: %s - %s' % ('ERROR', err_msg))
|
||||
err_msg = alert_message
|
||||
errors.append([alert_type, alert_message])
|
||||
else:
|
||||
self._downloader.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
|
||||
warnings.append([alert_type, alert_message])
|
||||
|
||||
if err_msg:
|
||||
raise ExtractorError('YouTube said: %s' % err_msg, expected=expected)
|
||||
for alert_type, alert_message in (warnings + errors[:-1]):
|
||||
self._downloader.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
|
||||
if errors:
|
||||
raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
|
||||
|
||||
def _extract_webpage(self, url, item_id):
|
||||
retries = self._downloader.params.get('extractor_retries', 3)
|
||||
|
||||
@@ -107,22 +107,31 @@ def parseOpts(overrideArguments=None):
|
||||
|
||||
return ''.join(opts)
|
||||
|
||||
def _comma_separated_values_options_callback(option, opt_str, value, parser):
|
||||
setattr(parser.values, option.dest, value.split(','))
|
||||
def _comma_separated_values_options_callback(option, opt_str, value, parser, prepend=True):
|
||||
setattr(
|
||||
parser.values, option.dest,
|
||||
value.split(',') if not prepend
|
||||
else value.split(',') + getattr(parser.values, option.dest))
|
||||
|
||||
def _dict_from_multiple_values_options_callback(
|
||||
option, opt_str, value, parser, allowed_keys=r'[\w-]+', delimiter=':', default_key=None, process=None):
|
||||
option, opt_str, value, parser,
|
||||
allowed_keys=r'[\w-]+', delimiter=':', default_key=None, process=None, multiple_keys=True):
|
||||
|
||||
out_dict = getattr(parser.values, option.dest)
|
||||
mobj = re.match(r'(?i)(?P<key>%s)%s(?P<val>.*)$' % (allowed_keys, delimiter), value)
|
||||
if multiple_keys:
|
||||
allowed_keys = r'(%s)(,(%s))*' % (allowed_keys, allowed_keys)
|
||||
mobj = re.match(r'(?i)(?P<keys>%s)%s(?P<val>.*)$' % (allowed_keys, delimiter), value)
|
||||
if mobj is not None:
|
||||
key, val = mobj.group('key').lower(), mobj.group('val')
|
||||
keys = [k.strip() for k in mobj.group('keys').lower().split(',')]
|
||||
val = mobj.group('val')
|
||||
elif default_key is not None:
|
||||
key, val = default_key, value
|
||||
keys, val = [default_key], value
|
||||
else:
|
||||
raise optparse.OptionValueError(
|
||||
'wrong %s formatting; it should be %s, not "%s"' % (opt_str, option.metavar, value))
|
||||
out_dict[key] = process(val) if callable(process) else val
|
||||
val = process(val) if callable(process) else val
|
||||
for key in keys:
|
||||
out_dict[key] = val
|
||||
|
||||
# No need to wrap help messages if we're on a wide console
|
||||
columns = compat_get_terminal_size().columns
|
||||
@@ -250,7 +259,7 @@ def parseOpts(overrideArguments=None):
|
||||
help='Make all connections via IPv6',
|
||||
)
|
||||
|
||||
geo = optparse.OptionGroup(parser, 'Geo Restriction')
|
||||
geo = optparse.OptionGroup(parser, 'Geo-restriction')
|
||||
geo.add_option(
|
||||
'--geo-verification-proxy',
|
||||
dest='geo_verification_proxy', default=None, metavar='URL',
|
||||
@@ -393,11 +402,11 @@ def parseOpts(overrideArguments=None):
|
||||
selection.add_option(
|
||||
'--include-ads',
|
||||
dest='include_ads', action='store_true',
|
||||
help='Download advertisements as well (experimental)')
|
||||
help=optparse.SUPPRESS_HELP)
|
||||
selection.add_option(
|
||||
'--no-include-ads',
|
||||
dest='include_ads', action='store_false',
|
||||
help='Do not download advertisements (default)')
|
||||
help=optparse.SUPPRESS_HELP)
|
||||
|
||||
authentication = optparse.OptionGroup(parser, 'Authentication Options')
|
||||
authentication.add_option(
|
||||
@@ -420,21 +429,19 @@ def parseOpts(overrideArguments=None):
|
||||
'--video-password',
|
||||
dest='videopassword', metavar='PASSWORD',
|
||||
help='Video password (vimeo, youku)')
|
||||
|
||||
adobe_pass = optparse.OptionGroup(parser, 'Adobe Pass Options')
|
||||
adobe_pass.add_option(
|
||||
authentication.add_option(
|
||||
'--ap-mso',
|
||||
dest='ap_mso', metavar='MSO',
|
||||
help='Adobe Pass multiple-system operator (TV provider) identifier, use --ap-list-mso for a list of available MSOs')
|
||||
adobe_pass.add_option(
|
||||
authentication.add_option(
|
||||
'--ap-username',
|
||||
dest='ap_username', metavar='USERNAME',
|
||||
help='Multiple-system operator account login')
|
||||
adobe_pass.add_option(
|
||||
authentication.add_option(
|
||||
'--ap-password',
|
||||
dest='ap_password', metavar='PASSWORD',
|
||||
help='Multiple-system operator account password. If this option is left out, yt-dlp will ask interactively')
|
||||
adobe_pass.add_option(
|
||||
authentication.add_option(
|
||||
'--ap-list-mso',
|
||||
action='store_true', dest='ap_list_mso', default=False,
|
||||
help='List all supported multiple-system operators')
|
||||
@@ -480,7 +487,7 @@ def parseOpts(overrideArguments=None):
|
||||
video_format.add_option(
|
||||
'--all-formats',
|
||||
action='store_const', dest='format', const='all',
|
||||
help='Download all available video formats')
|
||||
help=optparse.SUPPRESS_HELP)
|
||||
video_format.add_option(
|
||||
'--prefer-free-formats',
|
||||
action='store_true', dest='prefer_free_formats', default=False,
|
||||
@@ -560,7 +567,7 @@ def parseOpts(overrideArguments=None):
|
||||
downloader.add_option(
|
||||
'-N', '--concurrent-fragments',
|
||||
dest='concurrent_fragment_downloads', metavar='N', default=1, type=int,
|
||||
help='Number of fragments to download concurrently (default is %default)')
|
||||
help='Number of fragments of a dash/hlsnative video that should be download concurrently (default is %default)')
|
||||
downloader.add_option(
|
||||
'-r', '--limit-rate', '--rate-limit',
|
||||
dest='ratelimit', metavar='RATE',
|
||||
@@ -630,11 +637,11 @@ def parseOpts(overrideArguments=None):
|
||||
downloader.add_option(
|
||||
'--hls-prefer-native',
|
||||
dest='hls_prefer_native', action='store_true', default=None,
|
||||
help='Use the native HLS downloader instead of ffmpeg')
|
||||
help=optparse.SUPPRESS_HELP)
|
||||
downloader.add_option(
|
||||
'--hls-prefer-ffmpeg',
|
||||
dest='hls_prefer_native', action='store_false', default=None,
|
||||
help='Use ffmpeg instead of the native HLS downloader')
|
||||
help=optparse.SUPPRESS_HELP)
|
||||
downloader.add_option(
|
||||
'--hls-use-mpegts',
|
||||
dest='hls_use_mpegts', action='store_true', default=None,
|
||||
@@ -650,11 +657,20 @@ def parseOpts(overrideArguments=None):
|
||||
'Do not use the mpegts container for HLS videos. '
|
||||
'This is default when not downloading live streams'))
|
||||
downloader.add_option(
|
||||
'--external-downloader',
|
||||
dest='external_downloader', metavar='NAME',
|
||||
'--downloader', '--external-downloader',
|
||||
dest='external_downloader', metavar='[PROTO:]NAME', default={}, type='str',
|
||||
action='callback', callback=_dict_from_multiple_values_options_callback,
|
||||
callback_kwargs={
|
||||
'allowed_keys': 'http|ftp|m3u8|dash|rtsp|rtmp|mms',
|
||||
'default_key': 'default', 'process': lambda x: x.strip()},
|
||||
help=(
|
||||
'Name or path of the external downloader to use. '
|
||||
'Currently supports %s (Recommended: aria2c)' % ', '.join(list_external_downloaders())))
|
||||
'Name or path of the external downloader to use (optionally) prefixed by '
|
||||
'the protocols (http, ftp, m3u8, dash, rstp, rtmp, mms) to use it for. '
|
||||
'Currently supports native, %s (Recommended: aria2c). '
|
||||
'You can use this option multiple times to set different downloaders for different protocols. '
|
||||
'For example, --downloader aria2c --downloader "dash,m3u8:native" will use '
|
||||
'aria2c for http/ftp downloads, and the native downloader for dash/m3u8 downloads '
|
||||
'(Alias: --external-downloader)' % ', '.join(list_external_downloaders())))
|
||||
downloader.add_option(
|
||||
'--downloader-args', '--external-downloader-args',
|
||||
metavar='NAME:ARGS', dest='external_downloader_args', default={}, type='str',
|
||||
@@ -693,6 +709,7 @@ def parseOpts(overrideArguments=None):
|
||||
'--add-header',
|
||||
metavar='FIELD:VALUE', dest='headers', default={}, type='str',
|
||||
action='callback', callback=_dict_from_multiple_values_options_callback,
|
||||
callback_kwargs={'multiple_keys': False},
|
||||
help='Specify a custom HTTP header and its value, separated by a colon ":". You can use this option multiple times',
|
||||
)
|
||||
workarounds.add_option(
|
||||
@@ -842,7 +859,7 @@ def parseOpts(overrideArguments=None):
|
||||
action='store_true', dest='useid', help=optparse.SUPPRESS_HELP)
|
||||
filesystem.add_option(
|
||||
'-P', '--paths',
|
||||
metavar='TYPE:PATH', dest='paths', default={}, type='str',
|
||||
metavar='TYPES:PATH', dest='paths', default={}, type='str',
|
||||
action='callback', callback=_dict_from_multiple_values_options_callback,
|
||||
callback_kwargs={
|
||||
'allowed_keys': 'home|temp|%s' % '|'.join(OUTTMPL_TYPES.keys()),
|
||||
@@ -857,12 +874,12 @@ def parseOpts(overrideArguments=None):
|
||||
'This option is ignored if --output is an absolute path'))
|
||||
filesystem.add_option(
|
||||
'-o', '--output',
|
||||
metavar='[TYPE:]TEMPLATE', dest='outtmpl', default={}, type='str',
|
||||
metavar='[TYPES:]TEMPLATE', dest='outtmpl', default={}, type='str',
|
||||
action='callback', callback=_dict_from_multiple_values_options_callback,
|
||||
callback_kwargs={
|
||||
'allowed_keys': '|'.join(OUTTMPL_TYPES.keys()),
|
||||
'default_key': 'default', 'process': lambda x: x.strip()},
|
||||
help='Output filename template, see "OUTPUT TEMPLATE" for details')
|
||||
help='Output filename template; see "OUTPUT TEMPLATE" for details')
|
||||
filesystem.add_option(
|
||||
'--output-na-placeholder',
|
||||
dest='outtmpl_na_placeholder', metavar='TEXT', default='NA',
|
||||
@@ -1018,7 +1035,7 @@ def parseOpts(overrideArguments=None):
|
||||
action='store_true', dest='rm_cachedir',
|
||||
help='Delete all filesystem cache files')
|
||||
|
||||
thumbnail = optparse.OptionGroup(parser, 'Thumbnail Images')
|
||||
thumbnail = optparse.OptionGroup(parser, 'Thumbnail Options')
|
||||
thumbnail.add_option(
|
||||
'--write-thumbnail',
|
||||
action='store_true', dest='writethumbnail', default=False,
|
||||
@@ -1084,14 +1101,17 @@ def parseOpts(overrideArguments=None):
|
||||
'--postprocessor-args', '--ppa',
|
||||
metavar='NAME:ARGS', dest='postprocessor_args', default={}, type='str',
|
||||
action='callback', callback=_dict_from_multiple_values_options_callback,
|
||||
callback_kwargs={'default_key': 'default-compat', 'allowed_keys': r'\w+(?:\+\w+)?', 'process': compat_shlex_split},
|
||||
callback_kwargs={
|
||||
'allowed_keys': r'\w+(?:\+\w+)?', 'default_key': 'default-compat',
|
||||
'process': compat_shlex_split, 'multiple_keys': False},
|
||||
help=(
|
||||
'Give these arguments to the postprocessors. '
|
||||
'Specify the postprocessor/executable name and the arguments separated by a colon ":" '
|
||||
'to give the argument to the specified postprocessor/executable. Supported postprocessors are: '
|
||||
'SponSkrub, ExtractAudio, VideoRemuxer, VideoConvertor, EmbedSubtitle, Metadata, Merger, '
|
||||
'FixupStretched, FixupM4a, FixupM3u8, SubtitlesConvertor, EmbedThumbnail and SplitChapters. '
|
||||
'The supported executables are: SponSkrub, FFmpeg, FFprobe, and AtomicParsley. '
|
||||
'to give the argument to the specified postprocessor/executable. Supported PP are: '
|
||||
'Merger, ExtractAudio, SplitChapters, Metadata, EmbedSubtitle, EmbedThumbnail, '
|
||||
'SubtitlesConvertor, ThumbnailsConvertor, VideoRemuxer, VideoConvertor, '
|
||||
'SponSkrub, FixupStretched, FixupM4a and FixupM3u8. '
|
||||
'The supported executables are: AtomicParsley, FFmpeg, FFprobe, and SponSkrub. '
|
||||
'You can also specify "PP+EXE:ARGS" to give the arguments to the specified executable '
|
||||
'only when being used by the specified postprocessor. Additionally, for ffmpeg/ffprobe, '
|
||||
'"_i"/"_o" can be appended to the prefix optionally followed by a number to pass the argument '
|
||||
@@ -1144,22 +1164,10 @@ def parseOpts(overrideArguments=None):
|
||||
help=optparse.SUPPRESS_HELP)
|
||||
postproc.add_option(
|
||||
'--parse-metadata',
|
||||
metavar='FIELD:FORMAT', dest='metafromfield', action='append',
|
||||
metavar='FROM:TO', dest='metafromfield', action='append',
|
||||
help=(
|
||||
'Parse additional metadata like title/artist from other fields. '
|
||||
'Give a template or field name to extract data from and the '
|
||||
'format to interpret it as, seperated by a ":". '
|
||||
'Either regular expression with named capture groups or a '
|
||||
'similar syntax to the output template can be used for the FORMAT. '
|
||||
'Similarly, the syntax for output template can be used for FIELD '
|
||||
'to parse the data from multiple fields. '
|
||||
'The parsed parameters replace any existing values and can be used in output templates. '
|
||||
'This option can be used multiple times. '
|
||||
'Example: --parse-metadata "title:%(artist)s - %(title)s" matches a title like '
|
||||
'"Coldplay - Paradise". '
|
||||
'Example: --parse-metadata "%(series)s %(episode_number)s:%(title)s" '
|
||||
'sets the title using series and episode number. '
|
||||
'Example (regex): --parse-metadata "description:Artist - (?P<artist>.+?)"'))
|
||||
'Parse additional metadata like title/artist from other fields; '
|
||||
'see "MODIFYING METADATA" for details'))
|
||||
postproc.add_option(
|
||||
'--xattrs',
|
||||
action='store_true', dest='xattrs', default=False,
|
||||
@@ -1186,11 +1194,19 @@ def parseOpts(overrideArguments=None):
|
||||
postproc.add_option(
|
||||
'--exec',
|
||||
metavar='CMD', dest='exec_cmd',
|
||||
help='Execute a command on the file after downloading and post-processing, similar to find\'s -exec syntax. Example: --exec \'adb push {} /sdcard/Music/ && rm {}\'')
|
||||
help=(
|
||||
'Execute a command on the file after downloading and post-processing. '
|
||||
'Similar syntax to the output template can be used to pass any field as arguments to the command. '
|
||||
'An additional field "filepath" that contains the final path of the downloaded file is also available. '
|
||||
'If no fields are passed, "%(filepath)s" is appended to the end of the command'))
|
||||
postproc.add_option(
|
||||
'--convert-subs', '--convert-sub', '--convert-subtitles',
|
||||
metavar='FORMAT', dest='convertsubtitles', default=None,
|
||||
help='Convert the subtitles to another format (currently supported: srt|ass|vtt|lrc) (Alias: --convert-subtitles)')
|
||||
postproc.add_option(
|
||||
'--convert-thumbnails',
|
||||
metavar='FORMAT', dest='convertthumbnails', default=None,
|
||||
help='Convert the thumbnails to another format (currently supported: jpg)')
|
||||
postproc.add_option(
|
||||
'--split-chapters', '--split-tracks',
|
||||
dest='split_chapters', action='store_true', default=False,
|
||||
@@ -1292,7 +1308,6 @@ def parseOpts(overrideArguments=None):
|
||||
parser.add_option_group(video_format)
|
||||
parser.add_option_group(subtitles)
|
||||
parser.add_option_group(authentication)
|
||||
parser.add_option_group(adobe_pass)
|
||||
parser.add_option_group(postproc)
|
||||
parser.add_option_group(sponskrub)
|
||||
parser.add_option_group(extractor)
|
||||
|
||||
@@ -10,10 +10,11 @@ from .ffmpeg import (
|
||||
FFmpegFixupM4aPP,
|
||||
FFmpegMergerPP,
|
||||
FFmpegMetadataPP,
|
||||
FFmpegSubtitlesConvertorPP,
|
||||
FFmpegThumbnailsConvertorPP,
|
||||
FFmpegSplitChaptersPP,
|
||||
FFmpegVideoConvertorPP,
|
||||
FFmpegVideoRemuxerPP,
|
||||
FFmpegSubtitlesConvertorPP,
|
||||
FFmpegSplitChaptersPP,
|
||||
)
|
||||
from .xattrpp import XAttrMetadataPP
|
||||
from .execafterdownload import ExecAfterDownloadPP
|
||||
@@ -28,6 +29,7 @@ def get_postprocessor(key):
|
||||
|
||||
|
||||
__all__ = [
|
||||
'FFmpegPostProcessor',
|
||||
'EmbedThumbnailPP',
|
||||
'ExecAfterDownloadPP',
|
||||
'FFmpegEmbedSubtitlePP',
|
||||
@@ -38,8 +40,8 @@ __all__ = [
|
||||
'FFmpegFixupStretchedPP',
|
||||
'FFmpegMergerPP',
|
||||
'FFmpegMetadataPP',
|
||||
'FFmpegPostProcessor',
|
||||
'FFmpegSubtitlesConvertorPP',
|
||||
'FFmpegThumbnailsConvertorPP',
|
||||
'FFmpegVideoConvertorPP',
|
||||
'FFmpegVideoRemuxerPP',
|
||||
'MetadataFromFieldPP',
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
import struct
|
||||
@@ -14,8 +13,10 @@ try:
|
||||
except ImportError:
|
||||
has_mutagen = False
|
||||
|
||||
from .ffmpeg import FFmpegPostProcessor
|
||||
|
||||
from .ffmpeg import (
|
||||
FFmpegPostProcessor,
|
||||
FFmpegThumbnailsConvertorPP,
|
||||
)
|
||||
from ..utils import (
|
||||
check_executable,
|
||||
encodeArgument,
|
||||
@@ -24,7 +25,6 @@ from ..utils import (
|
||||
PostProcessingError,
|
||||
prepend_extension,
|
||||
process_communicate_or_kill,
|
||||
replace_extension,
|
||||
shell_quote,
|
||||
)
|
||||
|
||||
@@ -36,7 +36,7 @@ class EmbedThumbnailPPError(PostProcessingError):
|
||||
class EmbedThumbnailPP(FFmpegPostProcessor):
|
||||
|
||||
def __init__(self, downloader=None, already_have_thumbnail=False):
|
||||
super(EmbedThumbnailPP, self).__init__(downloader)
|
||||
FFmpegPostProcessor.__init__(self, downloader)
|
||||
self._already_have_thumbnail = already_have_thumbnail
|
||||
|
||||
def run(self, info):
|
||||
@@ -47,44 +47,21 @@ class EmbedThumbnailPP(FFmpegPostProcessor):
|
||||
self.to_screen('There aren\'t any thumbnails to embed')
|
||||
return [], info
|
||||
|
||||
initial_thumbnail = original_thumbnail = thumbnail_filename = info['thumbnails'][-1]['filepath']
|
||||
|
||||
thumbnail_filename = info['thumbnails'][-1]['filepath']
|
||||
if not os.path.exists(encodeFilename(thumbnail_filename)):
|
||||
self.report_warning('Skipping embedding the thumbnail because the file is missing.')
|
||||
return [], info
|
||||
|
||||
def is_webp(path):
|
||||
with open(encodeFilename(path), 'rb') as f:
|
||||
b = f.read(12)
|
||||
return b[0:4] == b'RIFF' and b[8:] == b'WEBP'
|
||||
|
||||
# Correct extension for WebP file with wrong extension (see #25687, #25717)
|
||||
_, thumbnail_ext = os.path.splitext(thumbnail_filename)
|
||||
if thumbnail_ext:
|
||||
thumbnail_ext = thumbnail_ext[1:].lower()
|
||||
if thumbnail_ext != 'webp' and is_webp(thumbnail_filename):
|
||||
self.to_screen('Correcting extension to webp and escaping path for thumbnail "%s"' % thumbnail_filename)
|
||||
thumbnail_webp_filename = replace_extension(thumbnail_filename, 'webp')
|
||||
if os.path.exists(thumbnail_webp_filename):
|
||||
os.remove(thumbnail_webp_filename)
|
||||
os.rename(encodeFilename(thumbnail_filename), encodeFilename(thumbnail_webp_filename))
|
||||
original_thumbnail = thumbnail_filename = thumbnail_webp_filename
|
||||
thumbnail_ext = 'webp'
|
||||
convertor = FFmpegThumbnailsConvertorPP(self._downloader)
|
||||
convertor.fixup_webp(info, -1)
|
||||
|
||||
original_thumbnail = thumbnail_filename = info['thumbnails'][-1]['filepath']
|
||||
|
||||
# Convert unsupported thumbnail formats to JPEG (see #25687, #25717)
|
||||
if thumbnail_ext not in ['jpg', 'png']:
|
||||
# NB: % is supposed to be escaped with %% but this does not work
|
||||
# for input files so working around with standard substitution
|
||||
escaped_thumbnail_filename = thumbnail_filename.replace('%', '#')
|
||||
os.rename(encodeFilename(thumbnail_filename), encodeFilename(escaped_thumbnail_filename))
|
||||
escaped_thumbnail_jpg_filename = replace_extension(escaped_thumbnail_filename, 'jpg')
|
||||
self.to_screen('Converting thumbnail "%s" to JPEG' % escaped_thumbnail_filename)
|
||||
self.run_ffmpeg(escaped_thumbnail_filename, escaped_thumbnail_jpg_filename, ['-bsf:v', 'mjpeg2jpeg'])
|
||||
thumbnail_jpg_filename = replace_extension(thumbnail_filename, 'jpg')
|
||||
# Rename back to unescaped for further processing
|
||||
os.rename(encodeFilename(escaped_thumbnail_filename), encodeFilename(thumbnail_filename))
|
||||
os.rename(encodeFilename(escaped_thumbnail_jpg_filename), encodeFilename(thumbnail_jpg_filename))
|
||||
thumbnail_filename = thumbnail_jpg_filename
|
||||
_, thumbnail_ext = os.path.splitext(thumbnail_filename)
|
||||
if thumbnail_ext not in ('jpg', 'png'):
|
||||
thumbnail_filename = convertor.convert_thumbnail(thumbnail_filename, 'jpg')
|
||||
thumbnail_ext = 'jpg'
|
||||
|
||||
mtime = os.stat(encodeFilename(filename)).st_mtime
|
||||
@@ -195,9 +172,6 @@ class EmbedThumbnailPP(FFmpegPostProcessor):
|
||||
|
||||
files_to_delete = [thumbnail_filename]
|
||||
if self._already_have_thumbnail:
|
||||
info['__files_to_move'][original_thumbnail] = replace_extension(
|
||||
info['__files_to_move'][initial_thumbnail],
|
||||
os.path.splitext(original_thumbnail)[1][1:])
|
||||
if original_thumbnail == thumbnail_filename:
|
||||
files_to_delete = []
|
||||
elif original_thumbnail != thumbnail_filename:
|
||||
|
||||
@@ -20,12 +20,13 @@ class ExecAfterDownloadPP(PostProcessor):
|
||||
def pp_key(cls):
|
||||
return 'Exec'
|
||||
|
||||
def run(self, information):
|
||||
cmd = self.exec_cmd
|
||||
if '{}' not in cmd:
|
||||
cmd += ' {}'
|
||||
|
||||
cmd = cmd.replace('{}', compat_shlex_quote(information['filepath']))
|
||||
def run(self, info):
|
||||
tmpl, info_copy = self._downloader.prepare_outtmpl(self.exec_cmd, info)
|
||||
cmd = tmpl % info_copy
|
||||
if cmd == self.exec_cmd: # No replacements were made
|
||||
if '{}' not in self.exec_cmd:
|
||||
self.exec_cmd += ' {}'
|
||||
cmd = self.exec_cmd.replace('{}', compat_shlex_quote(info['filepath']))
|
||||
|
||||
self.to_screen('Executing command: %s' % cmd)
|
||||
retCode = subprocess.call(encodeArgument(cmd), shell=True)
|
||||
@@ -33,4 +34,4 @@ class ExecAfterDownloadPP(PostProcessor):
|
||||
raise PostProcessingError(
|
||||
'Command returned error code %d' % retCode)
|
||||
|
||||
return [], information
|
||||
return [], info
|
||||
|
||||
@@ -10,7 +10,7 @@ import json
|
||||
|
||||
from .common import AudioConversionError, PostProcessor
|
||||
|
||||
from ..compat import compat_str
|
||||
from ..compat import compat_str, compat_numeric_types
|
||||
from ..utils import (
|
||||
encodeArgument,
|
||||
encodeFilename,
|
||||
@@ -530,6 +530,8 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
|
||||
metadata = {}
|
||||
|
||||
def add(meta_list, info_list=None):
|
||||
if not meta_list:
|
||||
return
|
||||
if not info_list:
|
||||
info_list = meta_list
|
||||
if not isinstance(meta_list, (list, tuple)):
|
||||
@@ -537,7 +539,7 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
|
||||
if not isinstance(info_list, (list, tuple)):
|
||||
info_list = (info_list,)
|
||||
for info_f in info_list:
|
||||
if info.get(info_f) is not None:
|
||||
if isinstance(info.get(info_f), (compat_str, compat_numeric_types)):
|
||||
for meta_f in meta_list:
|
||||
metadata[meta_f] = info[info_f]
|
||||
break
|
||||
@@ -563,6 +565,10 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
|
||||
add('episode_id', ('episode', 'episode_id'))
|
||||
add('episode_sort', 'episode_number')
|
||||
|
||||
prefix = 'meta_'
|
||||
for key in filter(lambda k: k.startswith(prefix), info.keys()):
|
||||
add(key[len(prefix):], key)
|
||||
|
||||
if not metadata:
|
||||
self.to_screen('There isn\'t any metadata to add')
|
||||
return [], info
|
||||
@@ -577,7 +583,7 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
|
||||
else:
|
||||
options.extend(['-c', 'copy'])
|
||||
|
||||
for (name, value) in metadata.items():
|
||||
for name, value in metadata.items():
|
||||
options.extend(['-metadata', '%s=%s' % (name, value)])
|
||||
|
||||
chapters = info.get('chapters', [])
|
||||
@@ -810,3 +816,73 @@ class FFmpegSplitChaptersPP(FFmpegPostProcessor):
|
||||
destination, opts = self._ffmpeg_args_for_chapter(idx + 1, chapter, info)
|
||||
self.real_run_ffmpeg([(info['filepath'], opts)], [(destination, ['-c', 'copy'])])
|
||||
return [], info
|
||||
|
||||
|
||||
class FFmpegThumbnailsConvertorPP(FFmpegPostProcessor):
|
||||
def __init__(self, downloader=None, format=None):
|
||||
super(FFmpegThumbnailsConvertorPP, self).__init__(downloader)
|
||||
self.format = format
|
||||
|
||||
@staticmethod
|
||||
def is_webp(path):
|
||||
with open(encodeFilename(path), 'rb') as f:
|
||||
b = f.read(12)
|
||||
return b[0:4] == b'RIFF' and b[8:] == b'WEBP'
|
||||
|
||||
def fixup_webp(self, info, idx=-1):
|
||||
thumbnail_filename = info['thumbnails'][idx]['filepath']
|
||||
_, thumbnail_ext = os.path.splitext(thumbnail_filename)
|
||||
if thumbnail_ext:
|
||||
thumbnail_ext = thumbnail_ext[1:].lower()
|
||||
if thumbnail_ext != 'webp' and self.is_webp(thumbnail_filename):
|
||||
self.to_screen('Correcting thumbnail "%s" extension to webp' % thumbnail_filename)
|
||||
webp_filename = replace_extension(thumbnail_filename, 'webp')
|
||||
if os.path.exists(webp_filename):
|
||||
os.remove(webp_filename)
|
||||
os.rename(encodeFilename(thumbnail_filename), encodeFilename(webp_filename))
|
||||
info['thumbnails'][idx]['filepath'] = webp_filename
|
||||
info['__files_to_move'][webp_filename] = replace_extension(
|
||||
info['__files_to_move'].pop(thumbnail_filename), 'webp')
|
||||
|
||||
def convert_thumbnail(self, thumbnail_filename, ext):
|
||||
if ext != 'jpg':
|
||||
raise FFmpegPostProcessorError('Only conversion to jpg is currently supported')
|
||||
# NB: % is supposed to be escaped with %% but this does not work
|
||||
# for input files so working around with standard substitution
|
||||
escaped_thumbnail_filename = thumbnail_filename.replace('%', '#')
|
||||
os.rename(encodeFilename(thumbnail_filename), encodeFilename(escaped_thumbnail_filename))
|
||||
escaped_thumbnail_jpg_filename = replace_extension(escaped_thumbnail_filename, 'jpg')
|
||||
self.to_screen('Converting thumbnail "%s" to JPEG' % escaped_thumbnail_filename)
|
||||
self.run_ffmpeg(escaped_thumbnail_filename, escaped_thumbnail_jpg_filename, ['-bsf:v', 'mjpeg2jpeg'])
|
||||
thumbnail_jpg_filename = replace_extension(thumbnail_filename, 'jpg')
|
||||
# Rename back to unescaped
|
||||
os.rename(encodeFilename(escaped_thumbnail_filename), encodeFilename(thumbnail_filename))
|
||||
os.rename(encodeFilename(escaped_thumbnail_jpg_filename), encodeFilename(thumbnail_jpg_filename))
|
||||
return thumbnail_jpg_filename
|
||||
|
||||
def run(self, info):
|
||||
if self.format != 'jpg':
|
||||
raise FFmpegPostProcessorError('Only conversion to jpg is currently supported')
|
||||
files_to_delete = []
|
||||
has_thumbnail = False
|
||||
|
||||
for idx, thumbnail_dict in enumerate(info['thumbnails']):
|
||||
if 'filepath' not in thumbnail_dict:
|
||||
continue
|
||||
has_thumbnail = True
|
||||
self.fixup_webp(info, idx)
|
||||
original_thumbnail = thumbnail_dict['filepath']
|
||||
_, thumbnail_ext = os.path.splitext(original_thumbnail)
|
||||
if thumbnail_ext:
|
||||
thumbnail_ext = thumbnail_ext[1:].lower()
|
||||
if thumbnail_ext == self.format:
|
||||
self.to_screen('Thumbnail "%s" is already in the requested format' % original_thumbnail)
|
||||
continue
|
||||
thumbnail_dict['filepath'] = self.convert_thumbnail(original_thumbnail, self.format)
|
||||
files_to_delete.append(original_thumbnail)
|
||||
info['__files_to_move'][thumbnail_dict['filepath']] = replace_extension(
|
||||
info['__files_to_move'][original_thumbnail], self.format)
|
||||
|
||||
if not has_thumbnail:
|
||||
self.to_screen('There aren\'t any thumbnails to convert')
|
||||
return files_to_delete, info
|
||||
|
||||
@@ -45,7 +45,7 @@ class MetadataFromFieldPP(PostProcessor):
|
||||
# replace %(..)s with regex group and escape other string parts
|
||||
for match in re.finditer(r'%\((\w+)\)s', fmt):
|
||||
regex += re.escape(fmt[lastpos:match.start()])
|
||||
regex += r'(?P<%s>[^\r\n]+)' % match.group(1)
|
||||
regex += r'(?P<%s>.+)' % match.group(1)
|
||||
lastpos = match.end()
|
||||
if lastpos < len(fmt):
|
||||
regex += re.escape(fmt[lastpos:])
|
||||
|
||||
@@ -13,6 +13,10 @@ from ..utils import (
|
||||
|
||||
class MoveFilesAfterDownloadPP(PostProcessor):
|
||||
|
||||
def __init__(self, downloader=None, downloaded=True):
|
||||
PostProcessor.__init__(self, downloader)
|
||||
self._downloaded = downloaded
|
||||
|
||||
@classmethod
|
||||
def pp_key(cls):
|
||||
return 'MoveFiles'
|
||||
@@ -21,7 +25,8 @@ class MoveFilesAfterDownloadPP(PostProcessor):
|
||||
dl_path, dl_name = os.path.split(encodeFilename(info['filepath']))
|
||||
finaldir = info.get('__finaldir', dl_path)
|
||||
finalpath = os.path.join(finaldir, dl_name)
|
||||
info['__files_to_move'][info['filepath']] = decodeFilename(finalpath)
|
||||
if self._downloaded:
|
||||
info['__files_to_move'][info['filepath']] = decodeFilename(finalpath)
|
||||
|
||||
make_newfilename = lambda old: decodeFilename(os.path.join(finaldir, os.path.basename(encodeFilename(old))))
|
||||
for oldfile, newfile in info['__files_to_move'].items():
|
||||
|
||||
@@ -3052,33 +3052,83 @@ def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
|
||||
return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
|
||||
|
||||
|
||||
def date_from_str(date_str):
|
||||
def datetime_from_str(date_str, precision='auto', format='%Y%m%d'):
|
||||
"""
|
||||
Return a datetime object from a string in the format YYYYMMDD or
|
||||
(now|today)[+-][0-9](day|week|month|year)(s)?"""
|
||||
today = datetime.date.today()
|
||||
(now|today|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
|
||||
|
||||
format: string date format used to return datetime object from
|
||||
precision: round the time portion of a datetime object.
|
||||
auto|microsecond|second|minute|hour|day.
|
||||
auto: round to the unit provided in date_str (if applicable).
|
||||
"""
|
||||
auto_precision = False
|
||||
if precision == 'auto':
|
||||
auto_precision = True
|
||||
precision = 'microsecond'
|
||||
today = datetime_round(datetime.datetime.now(), precision)
|
||||
if date_str in ('now', 'today'):
|
||||
return today
|
||||
if date_str == 'yesterday':
|
||||
return today - datetime.timedelta(days=1)
|
||||
match = re.match(r'(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
|
||||
match = re.match(
|
||||
r'(?P<start>.+)(?P<sign>[+-])(?P<time>\d+)(?P<unit>microsecond|second|minute|hour|day|week|month|year)(s)?',
|
||||
date_str)
|
||||
if match is not None:
|
||||
sign = match.group('sign')
|
||||
time = int(match.group('time'))
|
||||
if sign == '-':
|
||||
time = -time
|
||||
start_time = datetime_from_str(match.group('start'), precision, format)
|
||||
time = int(match.group('time')) * (-1 if match.group('sign') == '-' else 1)
|
||||
unit = match.group('unit')
|
||||
# A bad approximation?
|
||||
if unit == 'month':
|
||||
if unit == 'month' or unit == 'year':
|
||||
new_date = datetime_add_months(start_time, time * 12 if unit == 'year' else time)
|
||||
unit = 'day'
|
||||
time *= 30
|
||||
elif unit == 'year':
|
||||
unit = 'day'
|
||||
time *= 365
|
||||
unit += 's'
|
||||
delta = datetime.timedelta(**{unit: time})
|
||||
return today + delta
|
||||
return datetime.datetime.strptime(date_str, '%Y%m%d').date()
|
||||
else:
|
||||
if unit == 'week':
|
||||
unit = 'day'
|
||||
time *= 7
|
||||
delta = datetime.timedelta(**{unit + 's': time})
|
||||
new_date = start_time + delta
|
||||
if auto_precision:
|
||||
return datetime_round(new_date, unit)
|
||||
return new_date
|
||||
|
||||
return datetime_round(datetime.datetime.strptime(date_str, format), precision)
|
||||
|
||||
|
||||
def date_from_str(date_str, format='%Y%m%d'):
|
||||
"""
|
||||
Return a datetime object from a string in the format YYYYMMDD or
|
||||
(now|today|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
|
||||
|
||||
format: string date format used to return datetime object from
|
||||
"""
|
||||
return datetime_from_str(date_str, precision='microsecond', format=format).date()
|
||||
|
||||
|
||||
def datetime_add_months(dt, months):
|
||||
"""Increment/Decrement a datetime object by months."""
|
||||
month = dt.month + months - 1
|
||||
year = dt.year + month // 12
|
||||
month = month % 12 + 1
|
||||
day = min(dt.day, calendar.monthrange(year, month)[1])
|
||||
return dt.replace(year, month, day)
|
||||
|
||||
|
||||
def datetime_round(dt, precision='day'):
|
||||
"""
|
||||
Round a datetime object's time to a specific precision
|
||||
"""
|
||||
if precision == 'microsecond':
|
||||
return dt
|
||||
|
||||
unit_seconds = {
|
||||
'day': 86400,
|
||||
'hour': 3600,
|
||||
'minute': 60,
|
||||
'second': 1,
|
||||
}
|
||||
roundto = lambda x, n: ((x + n / 2) // n) * n
|
||||
timestamp = calendar.timegm(dt.timetuple())
|
||||
return datetime.datetime.utcfromtimestamp(roundto(timestamp, unit_seconds[precision]))
|
||||
|
||||
|
||||
def hyphenate_date(date_str):
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = '2021.03.21'
|
||||
__version__ = '2021.04.03'
|
||||
|
||||
Reference in New Issue
Block a user