mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-12-17 11:22:23 +01:00
Compare commits
80 Commits
2021.03.24
...
2021.04.22
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
0cf0571560 | ||
|
|
e58c22a0f6 | ||
|
|
e4bdd3377d | ||
|
|
0b2e9d2c30 | ||
|
|
1bdae7d312 | ||
|
|
a471f21da6 | ||
|
|
6efb071135 | ||
|
|
f4536226c1 | ||
|
|
a439a3a45c | ||
|
|
26e2805c3f | ||
|
|
3b4775e021 | ||
|
|
ab406a1c0e | ||
|
|
a3faeb7de4 | ||
|
|
8c54a3051d | ||
|
|
c32b0aab8a | ||
|
|
3097d9e512 | ||
|
|
c1df120eda | ||
|
|
2cff495997 | ||
|
|
d0491a1ebe | ||
|
|
b9d68c199b | ||
|
|
155510fe81 | ||
|
|
201c145953 | ||
|
|
5d34200268 | ||
|
|
b7da73eb19 | ||
|
|
6a39ee13f7 | ||
|
|
33245766ab | ||
|
|
358de58c4d | ||
|
|
a7191c6f57 | ||
|
|
baa5873942 | ||
|
|
c6ce815461 | ||
|
|
79360d99d3 | ||
|
|
46fff7105e | ||
|
|
72e1fe969f | ||
|
|
b5be6dd504 | ||
|
|
8ea3f7b909 | ||
|
|
921b76cab8 | ||
|
|
a31953b0e6 | ||
|
|
54670cf084 | ||
|
|
a0f30f194a | ||
|
|
b31fdeedfd | ||
|
|
8fa43c73d8 | ||
|
|
56d868dbb7 | ||
|
|
f4f751af40 | ||
|
|
1988fab7e3 | ||
|
|
9de3ea3126 | ||
|
|
e01d6aa435 | ||
|
|
f7ad71607d | ||
|
|
68379de561 | ||
|
|
d9aa233295 | ||
|
|
f37468c41f | ||
|
|
52a8a1e1b9 | ||
|
|
d818eb7473 | ||
|
|
f8d4ad9ab0 | ||
|
|
3ffc7c89b0 | ||
|
|
f1823403b0 | ||
|
|
384fb069ec | ||
|
|
a4ddaf231e | ||
|
|
7e60c06925 | ||
|
|
d92f5d5a90 | ||
|
|
9e62f283ff | ||
|
|
c24ce07a84 | ||
|
|
de6758128e | ||
|
|
73d4343e39 | ||
|
|
57d104424f | ||
|
|
02aabd45d0 | ||
|
|
39ed931e53 | ||
|
|
b28f8d244a | ||
|
|
73cd218f5a | ||
|
|
84601bb72b | ||
|
|
54df8fc5b2 | ||
|
|
5d39972ed0 | ||
|
|
0481374e1d | ||
|
|
eff635394a | ||
|
|
df0c81513e | ||
|
|
3f6a90eb63 | ||
|
|
b050d210df | ||
|
|
f4e4be19f0 | ||
|
|
cce889b900 | ||
|
|
a6ae61a4c2 | ||
|
|
b23b9eefd9 |
6
.github/ISSUE_TEMPLATE/1_broken_site.md
vendored
6
.github/ISSUE_TEMPLATE/1_broken_site.md
vendored
@@ -21,7 +21,7 @@ assignees: ''
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp:
|
||||
- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is 2021.03.24. If it's not, see https://github.com/yt-dlp/yt-dlp on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is 2021.04.11. If it's not, see https://github.com/yt-dlp/yt-dlp on how to update. Issues with outdated version will be REJECTED.
|
||||
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
||||
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in https://github.com/yt-dlp/yt-dlp.
|
||||
- Search the bugtracker for similar issues: https://github.com/yt-dlp/yt-dlp. DO NOT post duplicates.
|
||||
@@ -29,7 +29,7 @@ Carefully read and work through this check list in order to prevent the most com
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a broken site support
|
||||
- [ ] I've verified that I'm running yt-dlp version **2021.03.24**
|
||||
- [ ] I've verified that I'm running yt-dlp version **2021.04.11**
|
||||
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
||||
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
|
||||
- [ ] I've searched the bugtracker for similar issues including closed ones
|
||||
@@ -44,7 +44,7 @@ Add the `-v` flag to your command line you run yt-dlp with (`yt-dlp -v <your com
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] yt-dlp version 2021.03.24
|
||||
[debug] yt-dlp version 2021.04.11
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
||||
@@ -21,7 +21,7 @@ assignees: ''
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp:
|
||||
- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is 2021.03.24. If it's not, see https://github.com/yt-dlp/yt-dlp on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is 2021.04.11. If it's not, see https://github.com/yt-dlp/yt-dlp on how to update. Issues with outdated version will be REJECTED.
|
||||
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
||||
- Make sure that site you are requesting is not dedicated to copyright infringement, see https://github.com/yt-dlp/yt-dlp. yt-dlp does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights.
|
||||
- Search the bugtracker for similar site support requests: https://github.com/yt-dlp/yt-dlp. DO NOT post duplicates.
|
||||
@@ -29,7 +29,7 @@ Carefully read and work through this check list in order to prevent the most com
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a new site support request
|
||||
- [ ] I've verified that I'm running yt-dlp version **2021.03.24**
|
||||
- [ ] I've verified that I'm running yt-dlp version **2021.04.11**
|
||||
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
||||
- [ ] I've checked that none of provided URLs violate any copyrights
|
||||
- [ ] I've searched the bugtracker for similar site support requests including closed ones
|
||||
|
||||
@@ -21,13 +21,13 @@ assignees: ''
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp:
|
||||
- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is 2021.03.24. If it's not, see https://github.com/yt-dlp/yt-dlp on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is 2021.04.11. If it's not, see https://github.com/yt-dlp/yt-dlp on how to update. Issues with outdated version will be REJECTED.
|
||||
- Search the bugtracker for similar site feature requests: https://github.com/yt-dlp/yt-dlp. DO NOT post duplicates.
|
||||
- Finally, put x into all relevant boxes like this [x] (Dont forget to delete the empty space)
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a site feature request
|
||||
- [ ] I've verified that I'm running yt-dlp version **2021.03.24**
|
||||
- [ ] I've verified that I'm running yt-dlp version **2021.04.11**
|
||||
- [ ] I've searched the bugtracker for similar site feature requests including closed ones
|
||||
|
||||
|
||||
|
||||
6
.github/ISSUE_TEMPLATE/4_bug_report.md
vendored
6
.github/ISSUE_TEMPLATE/4_bug_report.md
vendored
@@ -21,7 +21,7 @@ assignees: ''
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp:
|
||||
- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is 2021.03.24. If it's not, see https://github.com/yt-dlp/yt-dlp on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is 2021.04.11. If it's not, see https://github.com/yt-dlp/yt-dlp on how to update. Issues with outdated version will be REJECTED.
|
||||
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
||||
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in https://github.com/yt-dlp/yt-dlp.
|
||||
- Search the bugtracker for similar issues: https://github.com/yt-dlp/yt-dlp. DO NOT post duplicates.
|
||||
@@ -30,7 +30,7 @@ Carefully read and work through this check list in order to prevent the most com
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a broken site support issue
|
||||
- [ ] I've verified that I'm running yt-dlp version **2021.03.24**
|
||||
- [ ] I've verified that I'm running yt-dlp version **2021.04.11**
|
||||
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
||||
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
|
||||
- [ ] I've searched the bugtracker for similar bug reports including closed ones
|
||||
@@ -46,7 +46,7 @@ Add the `-v` flag to your command line you run yt-dlp with (`yt-dlp -v <your com
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] yt-dlp version 2021.03.24
|
||||
[debug] yt-dlp version 2021.04.11
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
||||
4
.github/ISSUE_TEMPLATE/5_feature_request.md
vendored
4
.github/ISSUE_TEMPLATE/5_feature_request.md
vendored
@@ -21,13 +21,13 @@ assignees: ''
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp:
|
||||
- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is 2021.03.24. If it's not, see https://github.com/yt-dlp/yt-dlp on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is 2021.04.11. If it's not, see https://github.com/yt-dlp/yt-dlp on how to update. Issues with outdated version will be REJECTED.
|
||||
- Search the bugtracker for similar feature requests: https://github.com/yt-dlp/yt-dlp. DO NOT post duplicates.
|
||||
- Finally, put x into all relevant boxes like this [x] (Dont forget to delete the empty space)
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a feature request
|
||||
- [ ] I've verified that I'm running yt-dlp version **2021.03.24**
|
||||
- [ ] I've verified that I'm running yt-dlp version **2021.04.11**
|
||||
- [ ] I've searched the bugtracker for similar feature requests including closed ones
|
||||
|
||||
|
||||
|
||||
2
.github/workflows/core.yml
vendored
2
.github/workflows/core.yml
vendored
@@ -6,7 +6,7 @@ jobs:
|
||||
if: "!contains(github.event.head_commit.message, 'ci skip')"
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
fail-fast: true
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [ubuntu-18.04]
|
||||
# TODO: python 2.6
|
||||
|
||||
@@ -35,3 +35,8 @@ damianoamatruda
|
||||
2ShedsJackson
|
||||
CXwudi
|
||||
xtkoba
|
||||
llacb47
|
||||
hheimbuerger
|
||||
B0pol
|
||||
lkho
|
||||
fstirlitz
|
||||
|
||||
92
Changelog.md
92
Changelog.md
@@ -6,8 +6,10 @@
|
||||
* Run `make doc`
|
||||
* Update Changelog.md and CONTRIBUTORS
|
||||
* Change "Merged with ytdl" version in Readme.md if needed
|
||||
* Add new/fixed extractors in "new features" section of Readme.md
|
||||
* Commit to master as `Release <version>`
|
||||
* Push to origin/release - build task will now run
|
||||
* Push to origin/release using `git push origin master:release`
|
||||
build task will now run
|
||||
* Update version.py using devscripts\update-version.py
|
||||
* Run `make issuetemplates`
|
||||
* Commit to master as `[version] update :ci skip all`
|
||||
@@ -17,17 +19,96 @@
|
||||
-->
|
||||
|
||||
|
||||
### 2021.04.22
|
||||
* **Improve output template:**
|
||||
* Objects can be traversed like `%(field.key1.key2)s`
|
||||
* An offset can be added to numeric fields as `%(field+N)s`
|
||||
* Deprecates `--autonumber-start`
|
||||
* **Improve `--sub-langs`:**
|
||||
* Treat `--sub-langs` entries as regex
|
||||
* `all` can be used to refer to all the subtitles
|
||||
* language codes can be prefixed with `-` to exclude it
|
||||
* Deprecates `--all-subs`
|
||||
* Add option `--ignore-no-formats-error` to ignore the "no video format" and similar errors
|
||||
* Add option `--skip-playlist-after-errors` to skip the rest of a playlist after a given number of errors are encountered
|
||||
* Merge youtube-dl: Upto [commit/7e8b3f9](https://github.com/ytdl-org/youtube-dl/commit/7e8b3f9439ebefb3a3a4e5da9c0bd2b595976438)
|
||||
* [downloader] Fix bug in downloader selection
|
||||
* [BilibiliChannel] Fix pagination by [nao20010128nao](https://github.com/nao20010128nao) and[pukkandan](https://github.com/pukkandan)
|
||||
* [rai] Add support for http formats by [nixxo](https://github.com/nixxo)
|
||||
* [TubiTv] Add TubiTvShowIE by [Ashish0804](https://github.com/Ashish0804)
|
||||
* [twitcasting] Fix extractor
|
||||
* [viu:ott] Fix extractor and support series by [lkho](https://github.com/lkho) and[pukkandan](https://github.com/pukkandan)
|
||||
* [youtube:tab] Show unavailable videos in playlists by [colethedj](https://github.com/colethedj)
|
||||
* [youtube:tab] Reload with unavailable videos for all playlists
|
||||
* [youtube] Ignore invalid stretch ratio
|
||||
* [youtube] Improve channel syncid extraction to support ytcfg by [colethedj](https://github.com/colethedj)
|
||||
* [youtube] Standardize API calls for tabs, mixes and search by [colethedj](https://github.com/colethedj)
|
||||
* [youtube] Bugfix in `_extract_ytcfg`
|
||||
* [mildom:user:vod] Download only necessary amount of pages
|
||||
* [mildom] Remove proxy completely by [fstirlitz](https://github.com/fstirlitz)
|
||||
* [go] Fix `_VALID_URL`
|
||||
* [MetadataFromField] Improve regex and add tests
|
||||
* [Exec] Ensure backward compatibility when the command contains `%`
|
||||
* [extractor] Fix inconsistent use of `report_warning`
|
||||
* Ensure `mergeall` selects best format when multistreams are disabled
|
||||
* Improve the yt-dlp.sh script by [fstirlitz](https://github.com/fstirlitz)
|
||||
* [lazy_extractor] Do not load plugins
|
||||
* [ci] Disable fail-fast
|
||||
* [documentation] Clarify which deprecated options still work
|
||||
* [documentation] Fix typos
|
||||
|
||||
|
||||
### 2021.04.11
|
||||
* Add option `--convert-thumbnails` (only jpg currently supported)
|
||||
* Format selector `mergeall` to download and merge all formats
|
||||
* Pass any field to `--exec` using similar syntax to output template
|
||||
* Choose downloader for each protocol using `--downloader PROTO:NAME`
|
||||
* Alias `--downloader` for `--external-downloader`
|
||||
* Added `native` as an option for the downloader
|
||||
* Merge youtube-dl: Upto [commit/4fb25ff](https://github.com/ytdl-org/youtube-dl/commit/4fb25ff5a3be5206bb72e5c4046715b1529fb2c7) (except vimeo)
|
||||
* [DiscoveryPlusIndia] Add DiscoveryPlusIndiaShowIE by [Ashish0804](https://github.com/Ashish0804)
|
||||
* [NFHSNetwork] Add extractor by [llacb47](https://github.com/llacb47)
|
||||
* [nebula] Add extractor (watchnebula.com) by [hheimbuerger](https://github.com/hheimbuerger)
|
||||
* [nitter] Fix extraction of reply tweets and update instance list by [B0pol](https://github.com/B0pol)
|
||||
* [nitter] Fix thumbnails by [B0pol](https://github.com/B0pol)
|
||||
* [youtube] Fix thumbnail URL
|
||||
* [youtube] Parse API parameters from initial webpage by [colethedj](https://github.com/colethedj)
|
||||
* [youtube] Extract comments' approximate timestamp by [colethedj](https://github.com/colethedj)
|
||||
* [youtube] Fix alert extraction
|
||||
* [bilibili] Fix uploader
|
||||
* [utils] Add `datetime_from_str` and `datetime_add_months` by [colethedj](https://github.com/colethedj)
|
||||
* Run some `postprocessors` before actual download
|
||||
* Improve argument parsing for `-P`, `-o`, `-S`
|
||||
* Fix some `m3u8` not obeying `--allow-unplayable-formats`
|
||||
* Fix default of `dynamic_mpd`
|
||||
* Deprecate `--all-formats`, `--include-ads`, `--hls-prefer-native`, `--hls-prefer-ffmpeg`
|
||||
* [documentation] Improvements
|
||||
|
||||
### 2021.04.03
|
||||
* Merge youtube-dl: Upto [commit/654b4f4](https://github.com/ytdl-org/youtube-dl/commit/654b4f4ff2718f38b3182c1188c5d569c14cc70a)
|
||||
* Ability to set a specific field in the file's metadata using `--parse-metadata`
|
||||
* Ability to select n'th best format like `-f bv*.2`
|
||||
* [DiscoveryPlus] Add discoveryplus.in
|
||||
* [la7] Add podcasts and podcast playlists by [nixxo](https://github.com/nixxo)
|
||||
* [mildom] Update extractor with current proxy by [nao20010128nao](https://github.com/nao20010128nao)
|
||||
* [ard:mediathek] Fix video id extraction
|
||||
* [generic] Detect Invidious' link element
|
||||
* [youtube] Show premium state in `availability` by [colethedj](https://github.com/colethedj)
|
||||
* [viewsource] Add extractor to handle `view-source:`
|
||||
* [sponskrub] Run before embedding thumbnail
|
||||
* [documentation] Improve `--parse-metadata` documentation
|
||||
|
||||
|
||||
### 2021.03.24.1
|
||||
* Revert [commit/8562218](https://github.com/ytdl-org/youtube-dl/commit/8562218350a79d4709da8593bb0c538aa0824acf)
|
||||
|
||||
|
||||
### 2021.03.24
|
||||
* Merge youtube-dl: Upto [commit/8562218](https://github.com/ytdl-org/youtube-dl/commit/8562218350a79d4709da8593bb0c538aa0824acf)
|
||||
* Merge youtube-dl: Upto 2021.03.25 ([commit/8562218](https://github.com/ytdl-org/youtube-dl/commit/8562218350a79d4709da8593bb0c538aa0824acf))
|
||||
* Parse metadata from multiple fields using `--parse-metadata`
|
||||
* Ability to load playlist infojson using `--load-info-json`
|
||||
* Write current epoch to infojson when using `--no-clean-infojson`
|
||||
* [youtube_live_chat] fix bug when trying to set cookies
|
||||
* [niconico] Fix for when logged in by: @CXwudi and @xtkoba
|
||||
* [niconico] Fix for when logged in by [CXwudi](https://github.com/CXwudi) and [xtkoba](https://github.com/xtkoba)
|
||||
* [linuxacadamy] Fix login
|
||||
|
||||
|
||||
@@ -102,7 +183,6 @@
|
||||
### 2021.03.03.2
|
||||
* [build] Fix bug
|
||||
|
||||
|
||||
### 2021.03.03
|
||||
* [youtube] Use new browse API for continuation page extraction by [colethedj](https://github.com/colethedj) and [pukkandan](https://github.com/pukkandan)
|
||||
* Fix HLS playlist downloading by [shirt](https://github.com/shirt-dev)
|
||||
@@ -357,7 +437,7 @@
|
||||
|
||||
### 2021.01.08
|
||||
* Merge youtube-dl: Upto [2021.01.08](https://github.com/ytdl-org/youtube-dl/releases/tag/2021.01.08) except stitcher ([1](https://github.com/ytdl-org/youtube-dl/commit/bb38a1215718cdf36d73ff0a7830a64cd9fa37cc), [2](https://github.com/ytdl-org/youtube-dl/commit/a563c97c5cddf55f8989ed7ea8314ef78e30107f))
|
||||
* Moved changelog to seperate file
|
||||
* Moved changelog to separate file
|
||||
|
||||
|
||||
### 2021.01.07-1
|
||||
|
||||
375
README.md
375
README.md
@@ -1,39 +1,43 @@
|
||||
<div align="center">
|
||||
|
||||
# YT-DLP
|
||||
A command-line program to download videos from YouTube and many other [video platforms](supportedsites.md)
|
||||
|
||||
[](https://github.com/yt-dlp/yt-dlp/releases/latest)
|
||||
[](LICENSE)
|
||||
<!-- GHA doesn't have for-the-badge style
|
||||
[](https://github.com/yt-dlp/yt-dlp/actions)
|
||||
[](https://discord.gg/H5MNcFW63r)
|
||||
-->
|
||||
[](https://github.com/yt-dlp/yt-dlp/releases/latest)
|
||||
[](LICENSE)
|
||||
[](https://yt-dlp.readthedocs.io)
|
||||
[](https://discord.gg/H5MNcFW63r)
|
||||
[](https://github.com/yt-dlp/yt-dlp/commits)
|
||||
[](https://github.com/yt-dlp/yt-dlp/commits)
|
||||
[](https://github.com/yt-dlp/yt-dlp/releases/latest)
|
||||
[](https://pypi.org/project/yt-dlp)
|
||||
|
||||
[](https://github.com/yt-dlp/yt-dlp/commits)
|
||||
[](https://github.com/yt-dlp/yt-dlp/commits)
|
||||
[](https://github.com/yt-dlp/yt-dlp/releases/latest)
|
||||
[](https://pypi.org/project/yt-dlp)
|
||||
[](https://yt-dlp.readthedocs.io)
|
||||
</div>
|
||||
|
||||
A command-line program to download videos from youtube.com and many other [video platforms](supportedsites.md)
|
||||
|
||||
This is a [youtube-dl](https://github.com/ytdl-org/youtube-dl) fork based on the now inactive [youtube-dlc](https://github.com/blackjack4494/yt-dlc). The main focus of this project is adding new features and patches while also keeping up to date with the original project
|
||||
yt-dlp is a [youtube-dl](https://github.com/ytdl-org/youtube-dl) fork based on the now inactive [youtube-dlc](https://github.com/blackjack4494/yt-dlc). The main focus of this project is adding new features and patches while also keeping up to date with the original project
|
||||
|
||||
* [NEW FEATURES](#new-features)
|
||||
* [INSTALLATION](#installation)
|
||||
* [Dependencies](#dependencies)
|
||||
* [Update](#update)
|
||||
* [Compile](#compile)
|
||||
* [DESCRIPTION](#description)
|
||||
* [OPTIONS](#options)
|
||||
* [USAGE AND OPTIONS](#usage-and-options)
|
||||
* [General Options](#general-options)
|
||||
* [Network Options](#network-options)
|
||||
* [Geo Restriction](#geo-restriction)
|
||||
* [Geo-restriction](#geo-restriction)
|
||||
* [Video Selection](#video-selection)
|
||||
* [Download Options](#download-options)
|
||||
* [Filesystem Options](#filesystem-options)
|
||||
* [Thumbnail images](#thumbnail-images)
|
||||
* [Thumbnail Options](#thumbnail-options)
|
||||
* [Internet Shortcut Options](#internet-shortcut-options)
|
||||
* [Verbosity and Simulation Options](#verbosity-and-simulation-options)
|
||||
* [Workarounds](#workarounds)
|
||||
* [Video Format Options](#video-format-options)
|
||||
* [Subtitle Options](#subtitle-options)
|
||||
* [Authentication Options](#authentication-options)
|
||||
* [Adobe Pass Options](#adobe-pass-options)
|
||||
* [Post-processing Options](#post-processing-options)
|
||||
* [SponSkrub (SponsorBlock) Options](#sponskrub-sponsorblock-options)
|
||||
* [Extractor Options](#extractor-options)
|
||||
@@ -46,9 +50,12 @@ This is a [youtube-dl](https://github.com/ytdl-org/youtube-dl) fork based on the
|
||||
* [Filtering Formats](#filtering-formats)
|
||||
* [Sorting Formats](#sorting-formats)
|
||||
* [Format Selection examples](#format-selection-examples)
|
||||
* [MODIFYING METADATA](#modifying-metadata)
|
||||
* [Modifying metadata examples](#modifying-metadata-examples)
|
||||
* [PLUGINS](#plugins)
|
||||
* [DEPRECATED OPTIONS](#deprecated-options)
|
||||
* [MORE](#more)
|
||||
</div>
|
||||
|
||||
|
||||
# NEW FEATURES
|
||||
@@ -58,26 +65,25 @@ The major new features from the latest release of [blackjack4494/yt-dlc](https:/
|
||||
|
||||
* **[Format Sorting](#sorting-formats)**: The default format sorting options have been changed so that higher resolution and better codecs will be now preferred instead of simply using larger bitrate. Furthermore, you can now specify the sort order using `-S`. This allows for much easier format selection that what is possible by simply using `--format` ([examples](#format-selection-examples))
|
||||
|
||||
* **Merged with youtube-dl v2021.03.25**: You get all the latest features and patches of [youtube-dl](https://github.com/ytdl-org/youtube-dl) in addition to all the features of [youtube-dlc](https://github.com/blackjack4494/yt-dlc)
|
||||
* **Merged with youtube-dl v2021.04.17**: You get all the latest features and patches of [youtube-dl](https://github.com/ytdl-org/youtube-dl) in addition to all the features of [youtube-dlc](https://github.com/blackjack4494/yt-dlc)
|
||||
|
||||
* **Merged with animelover1984/youtube-dl**: You get most of the features and improvements from [animelover1984/youtube-dl](https://github.com/animelover1984/youtube-dl) including `--get-comments`, `BiliBiliSearch`, `BilibiliChannel`, Embedding thumbnail in mp4/ogg/opus, Playlist infojson etc. Note that the NicoNico improvements are not available. See [#31](https://github.com/yt-dlp/yt-dlp/pull/31) for details.
|
||||
|
||||
* **Youtube improvements**:
|
||||
* All Youtube Feeds (`:ytfav`, `:ytwatchlater`, `:ytsubs`, `:ythistory`, `:ytrec`) works correctly and supports downloading multiple pages of content
|
||||
* Youtube search (`ytsearch:`, `ytsearchdate:`) along with Search URLs works correctly
|
||||
* All Youtube Feeds (`:ytfav`, `:ytwatchlater`, `:ytsubs`, `:ythistory`, `:ytrec`) works and supports downloading multiple pages of content
|
||||
* Youtube search (`ytsearch:`, `ytsearchdate:`) along with Search URLs work
|
||||
* Youtube mixes supports downloading multiple pages of content
|
||||
* Redirect channel's home URL automatically to `/video` to preserve the old behaviour
|
||||
|
||||
* **Split video by chapters**: Videos can be split into multiple files based on chapters using `--split-chapters`
|
||||
|
||||
* **Multithreaded fragment downloads**: Fragment downloads can be natively multi-threaded. Use `--concurrent-fragments` (`-N`) option to set the number of threads used
|
||||
* **Multi-threaded fragment downloads**: Download multiple fragments of m3u8/mpd videos in parallel. Use `--concurrent-fragments` (`-N`) option to set the number of threads used
|
||||
|
||||
* **Aria2c with HLS/DASH**: You can use aria2c as the external downloader for DASH(mpd) and HLS(m3u8) formats. No more slow ffmpeg/native downloads
|
||||
* **Aria2c with HLS/DASH**: You can use `aria2c` as the external downloader for DASH(mpd) and HLS(m3u8) formats
|
||||
|
||||
* **New extractors**: AnimeLab, Philo MSO, Rcs, Gedi, bitwave.tv, mildom, audius, zee5, mtv.it, wimtv, pluto.tv
|
||||
* **New extractors**: AnimeLab, Philo MSO, Rcs, Gedi, bitwave.tv, mildom, audius, zee5, mtv.it, wimtv, pluto.tv, niconico users, discoveryplus.in, mediathek, NFHSNetwork, nebula
|
||||
|
||||
* **Fixed extractors**: archive.org, roosterteeth.com, skyit, instagram, itv, SouthparkDe, spreaker, Vlive, tiktok, akamai, ina, rumble, tennistv, amcnetworks
|
||||
|
||||
* **Plugin extractors**: Extractors can be loaded from an external file. See [plugins](#plugins) for details
|
||||
* **Fixed extractors**: archive.org, roosterteeth.com, skyit, instagram, itv, SouthparkDe, spreaker, Vlive, akamai, ina, rumble, tennistv, amcnetworks, la7 podcasts, linuxacadamy, nitter, twitcasting, viu
|
||||
|
||||
* **Multiple paths and output templates**: You can give different [output templates](#output-template) and download paths for different types of files. You can also set a temporary path where intermediary files are downloaded to using `--paths` (`-P`)
|
||||
|
||||
@@ -87,6 +93,8 @@ The major new features from the latest release of [blackjack4494/yt-dlc](https:/
|
||||
|
||||
* **Improvements**: Multiple `--postprocessor-args` and `--external-downloader-args`, Date/time formatting in `-o`, faster archive checking, more [format selection options](#format-selection) etc
|
||||
|
||||
* **Plugin extractors**: Extractors can be loaded from an external file. See [plugins](#plugins) for details
|
||||
|
||||
* **Self-updater**: The releases can be updated using `yt-dlp -U`
|
||||
|
||||
|
||||
@@ -99,6 +107,7 @@ If you are coming from [youtube-dl](https://github.com/ytdl-org/youtube-dl), the
|
||||
|
||||
|
||||
# INSTALLATION
|
||||
yt-dlp is not platform specific. So it should work on your Unix box, on Windows or on macOS
|
||||
|
||||
You can install yt-dlp using one of the following methods:
|
||||
* Download the binary from the [latest release](https://github.com/yt-dlp/yt-dlp/releases/latest) (recommended method)
|
||||
@@ -123,8 +132,13 @@ sudo aria2c https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp -o
|
||||
sudo chmod a+rx /usr/local/bin/yt-dlp
|
||||
```
|
||||
|
||||
### DEPENDENCIES
|
||||
Python versions 2.6, 2.7, or 3.2+ are currently supported. However, 3.2+ is strongly recommended and python2 support will be deprecated in the future.
|
||||
|
||||
Although there are no required dependencies, `ffmpeg` and `ffprobe` are highly recommended. Other optional dependencies are `sponskrub`, `AtomicParsley`, `mutagen`, `pycryptodome` and any of the supported external downloaders. Note that the windows releases are already built with the python interpreter, mutagen and pycryptodome included.
|
||||
|
||||
### UPDATE
|
||||
Starting from version `2021.02.09`, you can use `yt-dlp -U` to update if you are using the provided release.
|
||||
You can use `yt-dlp -U` to update if you are using the provided release.
|
||||
If you are using `pip`, simply re-run the same command that was used to install the program.
|
||||
|
||||
### COMPILE
|
||||
@@ -134,7 +148,7 @@ To build the Windows executable, you must have pyinstaller (and optionally mutag
|
||||
|
||||
python -m pip install --upgrade pyinstaller mutagen pycryptodome
|
||||
|
||||
Once you have all the necessary dependancies installed, just run `py pyinst.py`. The executable will be built for the same architecture (32/64 bit) as the python used to build it. It is strongly reccomended to use python3 although python2.6+ is supported.
|
||||
Once you have all the necessary dependencies installed, just run `py pyinst.py`. The executable will be built for the same architecture (32/64 bit) as the python used to build it. It is strongly recommended to use python3 although python2.6+ is supported.
|
||||
|
||||
You can also build the executable without any version info or metadata by using:
|
||||
|
||||
@@ -146,15 +160,12 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t
|
||||
|
||||
**Note**: In either platform, `devscripts\update-version.py` can be used to automatically update the version number
|
||||
|
||||
# DESCRIPTION
|
||||
**yt-dlp** is a command-line program to download videos from youtube.com many other [video platforms](supportedsites.md). It requires the Python interpreter, version 2.6, 2.7, or 3.2+, and it is not platform specific. It should work on your Unix box, on Windows or on macOS. It is released to the public domain, which means you can modify it, redistribute it or use it however you like.
|
||||
# USAGE AND OPTIONS
|
||||
|
||||
yt-dlp [OPTIONS] [--] URL [URL...]
|
||||
|
||||
|
||||
# OPTIONS
|
||||
`Ctrl+F` is your friend :D
|
||||
<!-- Autogenerated -->
|
||||
<!-- Auto generated -->
|
||||
|
||||
## General Options:
|
||||
-h, --help Print this help text and exit
|
||||
@@ -211,7 +222,7 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t
|
||||
-4, --force-ipv4 Make all connections via IPv4
|
||||
-6, --force-ipv6 Make all connections via IPv6
|
||||
|
||||
## Geo Restriction:
|
||||
## Geo-restriction:
|
||||
--geo-verification-proxy URL Use this proxy to verify the IP address for
|
||||
some geo-restricted sites. The default
|
||||
proxy specified by --proxy (or none, if the
|
||||
@@ -294,14 +305,14 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t
|
||||
a file that is in the archive
|
||||
--break-on-reject Stop the download process when encountering
|
||||
a file that has been filtered out
|
||||
--skip-playlist-after-errors N Number of allowed failures until the rest
|
||||
of the playlist is skipped
|
||||
--no-download-archive Do not use archive file (default)
|
||||
--include-ads Download advertisements as well
|
||||
(experimental)
|
||||
--no-include-ads Do not download advertisements (default)
|
||||
|
||||
## Download Options:
|
||||
-N, --concurrent-fragments N Number of fragments to download
|
||||
concurrently (default is 1)
|
||||
-N, --concurrent-fragments N Number of fragments of a dash/hlsnative
|
||||
video that should be download concurrently
|
||||
(default is 1)
|
||||
-r, --limit-rate RATE Maximum download rate in bytes per second
|
||||
(e.g. 50K or 4.2M)
|
||||
-R, --retries RETRIES Number of retries (default is 10), or
|
||||
@@ -335,10 +346,6 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t
|
||||
--playlist-random Download playlist videos in random order
|
||||
--xattr-set-filesize Set file xattribute ytdl.filesize with
|
||||
expected file size
|
||||
--hls-prefer-native Use the native HLS downloader instead of
|
||||
ffmpeg
|
||||
--hls-prefer-ffmpeg Use ffmpeg instead of the native HLS
|
||||
downloader
|
||||
--hls-use-mpegts Use the mpegts container for HLS videos;
|
||||
allowing some players to play the video
|
||||
while downloading, and reducing the chance
|
||||
@@ -348,10 +355,19 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t
|
||||
--no-hls-use-mpegts Do not use the mpegts container for HLS
|
||||
videos. This is default when not
|
||||
downloading live streams
|
||||
--external-downloader NAME Name or path of the external downloader to
|
||||
use. Currently supports aria2c, avconv,
|
||||
axel, curl, ffmpeg, httpie, wget
|
||||
(Recommended: aria2c)
|
||||
--downloader [PROTO:]NAME Name or path of the external downloader to
|
||||
use (optionally) prefixed by the protocols
|
||||
(http, ftp, m3u8, dash, rstp, rtmp, mms) to
|
||||
use it for. Currently supports native,
|
||||
aria2c, avconv, axel, curl, ffmpeg, httpie,
|
||||
wget (Recommended: aria2c). You can use
|
||||
this option multiple times to set different
|
||||
downloaders for different protocols. For
|
||||
example, --downloader aria2c --downloader
|
||||
"dash,m3u8:native" will use aria2c for
|
||||
http/ftp downloads, and the native
|
||||
downloader for dash/m3u8 downloads
|
||||
(Alias: --external-downloader)
|
||||
--downloader-args NAME:ARGS Give these arguments to the external
|
||||
downloader. Specify the downloader name and
|
||||
the arguments separated by a colon ":". You
|
||||
@@ -363,7 +379,7 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t
|
||||
stdin), one URL per line. Lines starting
|
||||
with '#', ';' or ']' are considered as
|
||||
comments and ignored
|
||||
-P, --paths TYPE:PATH The paths where the files should be
|
||||
-P, --paths TYPES:PATH The paths where the files should be
|
||||
downloaded. Specify the type of file and
|
||||
the path separated by a colon ":". All the
|
||||
same types as --output are supported.
|
||||
@@ -374,13 +390,11 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t
|
||||
home path after download is finished. This
|
||||
option is ignored if --output is an
|
||||
absolute path
|
||||
-o, --output [TYPE:]TEMPLATE Output filename template, see "OUTPUT
|
||||
-o, --output [TYPES:]TEMPLATE Output filename template; see "OUTPUT
|
||||
TEMPLATE" for details
|
||||
--output-na-placeholder TEXT Placeholder value for unavailable meta
|
||||
fields in output filename template
|
||||
(default: "NA")
|
||||
--autonumber-start NUMBER Specify the start value for %(autonumber)s
|
||||
(default is 1)
|
||||
--restrict-filenames Restrict filenames to only ASCII
|
||||
characters, and avoid "&" and spaces in
|
||||
filenames
|
||||
@@ -400,7 +414,7 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t
|
||||
-c, --continue Resume partially downloaded files/fragments
|
||||
(default)
|
||||
--no-continue Do not resume partially downloaded
|
||||
fragments. If the file is unfragmented,
|
||||
fragments. If the file is not fragmented,
|
||||
restart download of the entire file
|
||||
--part Use .part files instead of writing directly
|
||||
into output file (default)
|
||||
@@ -450,7 +464,7 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t
|
||||
--no-cache-dir Disable filesystem caching
|
||||
--rm-cache-dir Delete all filesystem cache files
|
||||
|
||||
## Thumbnail Images:
|
||||
## Thumbnail Options:
|
||||
--write-thumbnail Write thumbnail image to disk
|
||||
--no-write-thumbnail Do not write thumbnail image to disk
|
||||
(default)
|
||||
@@ -472,6 +486,12 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t
|
||||
--no-warnings Ignore warnings
|
||||
-s, --simulate Do not download the video and do not write
|
||||
anything to disk
|
||||
--ignore-no-formats-error Ignore "No video formats" error. Usefull
|
||||
for extracting metadata even if the video
|
||||
is not actually available for download
|
||||
(experimental)
|
||||
--no-ignore-no-formats-error Throw error when no downloadable video
|
||||
formats are found (default)
|
||||
--skip-download Do not download the video but write all
|
||||
related files (Alias: --no-download)
|
||||
-g, --get-url Simulate, quiet but print URL
|
||||
@@ -492,8 +512,8 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t
|
||||
--print-json Be quiet and print the video information as
|
||||
JSON (video is still being downloaded)
|
||||
--force-write-archive Force download archive entries to be
|
||||
written as far as no errors occur,even if
|
||||
-s or another simulation switch is used
|
||||
written as far as no errors occur, even if
|
||||
-s or another simulation option is used
|
||||
(Alias: --force-download-archive)
|
||||
--newline Output progress bar as new lines
|
||||
--no-progress Do not print progress bar
|
||||
@@ -551,7 +571,6 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t
|
||||
into a single file
|
||||
--no-audio-multistreams Only one audio stream is downloaded for
|
||||
each output file (default)
|
||||
--all-formats Download all available video formats
|
||||
--prefer-free-formats Prefer video formats with free containers
|
||||
over non-free ones of same quality. Use
|
||||
with "-S ext" to strictly prefer free
|
||||
@@ -569,7 +588,7 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t
|
||||
container format. One of mkv, mp4, ogg,
|
||||
webm, flv. Ignored if no merge is required
|
||||
--allow-unplayable-formats Allow unplayable formats to be listed and
|
||||
downloaded. All video postprocessing will
|
||||
downloaded. All video post-processing will
|
||||
also be turned off
|
||||
--no-allow-unplayable-formats Do not allow unplayable formats to be
|
||||
listed or downloaded (default)
|
||||
@@ -581,15 +600,17 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t
|
||||
(Alias: --write-automatic-subs)
|
||||
--no-write-auto-subs Do not write auto-generated subtitles
|
||||
(default) (Alias: --no-write-automatic-subs)
|
||||
--all-subs Download all the available subtitles of the
|
||||
video
|
||||
--list-subs List all available subtitles for the video
|
||||
--sub-format FORMAT Subtitle format, accepts formats
|
||||
preference, for example: "srt" or
|
||||
"ass/srt/best"
|
||||
--sub-langs LANGS Languages of the subtitles to download
|
||||
(optional) separated by commas, use --list-
|
||||
subs for available language tags
|
||||
--sub-langs LANGS Languages of the subtitles to download (can
|
||||
be regex) or "all" separated by commas.
|
||||
(Eg: --sub-langs en.*,ja) You can prefix
|
||||
the language code with a "-" to exempt it
|
||||
from the requested languages. (Eg: --sub-
|
||||
langs all,-live_chat) Use --list-subs for a
|
||||
list of available language tags
|
||||
|
||||
## Authentication Options:
|
||||
-u, --username USERNAME Login with this account ID
|
||||
@@ -598,8 +619,6 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t
|
||||
-2, --twofactor TWOFACTOR Two-factor authentication code
|
||||
-n, --netrc Use .netrc authentication data
|
||||
--video-password PASSWORD Video password (vimeo, youku)
|
||||
|
||||
## Adobe Pass Options:
|
||||
--ap-mso MSO Adobe Pass multiple-system operator (TV
|
||||
provider) identifier, use --ap-list-mso for
|
||||
a list of available MSOs
|
||||
@@ -636,24 +655,24 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t
|
||||
Specify the postprocessor/executable name
|
||||
and the arguments separated by a colon ":"
|
||||
to give the argument to the specified
|
||||
postprocessor/executable. Supported
|
||||
postprocessors are: SponSkrub,
|
||||
ExtractAudio, VideoRemuxer, VideoConvertor,
|
||||
EmbedSubtitle, Metadata, Merger,
|
||||
FixupStretched, FixupM4a, FixupM3u8,
|
||||
SubtitlesConvertor, EmbedThumbnail and
|
||||
SplitChapters. The supported executables
|
||||
are: SponSkrub, FFmpeg, FFprobe, and
|
||||
AtomicParsley. You can also specify
|
||||
"PP+EXE:ARGS" to give the arguments to the
|
||||
specified executable only when being used
|
||||
by the specified postprocessor.
|
||||
Additionally, for ffmpeg/ffprobe, "_i"/"_o"
|
||||
can be appended to the prefix optionally
|
||||
followed by a number to pass the argument
|
||||
before the specified input/output file. Eg:
|
||||
--ppa "Merger+ffmpeg_i1:-v quiet". You can
|
||||
use this option multiple times to give
|
||||
postprocessor/executable. Supported PP are:
|
||||
Merger, ExtractAudio, SplitChapters,
|
||||
Metadata, EmbedSubtitle, EmbedThumbnail,
|
||||
SubtitlesConvertor, ThumbnailsConvertor,
|
||||
VideoRemuxer, VideoConvertor, SponSkrub,
|
||||
FixupStretched, FixupM4a and FixupM3u8. The
|
||||
supported executables are: AtomicParsley,
|
||||
FFmpeg, FFprobe, and SponSkrub. You can
|
||||
also specify "PP+EXE:ARGS" to give the
|
||||
arguments to the specified executable only
|
||||
when being used by the specified
|
||||
postprocessor. Additionally, for
|
||||
ffmpeg/ffprobe, "_i"/"_o" can be appended
|
||||
to the prefix optionally followed by a
|
||||
number to pass the argument before the
|
||||
specified input/output file. Eg: --ppa
|
||||
"Merger+ffmpeg_i1:-v quiet". You can use
|
||||
this option multiple times to give
|
||||
different arguments to different
|
||||
postprocessors. (Alias: --ppa)
|
||||
-k, --keep-video Keep the intermediate video file on disk
|
||||
@@ -669,26 +688,9 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t
|
||||
--no-embed-thumbnail Do not embed thumbnail (default)
|
||||
--add-metadata Write metadata to the video file
|
||||
--no-add-metadata Do not write metadata (default)
|
||||
--parse-metadata FIELD:FORMAT Parse additional metadata like title/artist
|
||||
from other fields. Give a template or field
|
||||
name to extract data from and the format to
|
||||
interpret it as, seperated by a ":". Either
|
||||
regular expression with named capture
|
||||
groups or a similar syntax to the output
|
||||
template can be used for the FORMAT.
|
||||
Similarly, the syntax for output template
|
||||
can be used for FIELD to parse the data
|
||||
from multiple fields. The parsed parameters
|
||||
replace any existing values and can be used
|
||||
in output templates. This option can be
|
||||
used multiple times. Example: --parse-
|
||||
metadata "title:%(artist)s - %(title)s"
|
||||
matches a title like "Coldplay - Paradise".
|
||||
Example: --parse-metadata "%(series)s
|
||||
%(episode_number)s:%(title)s" sets the
|
||||
title using series and episode number.
|
||||
Example (regex): --parse-metadata
|
||||
"description:Artist - (?P<artist>.+?)"
|
||||
--parse-metadata FROM:TO Parse additional metadata like title/artist
|
||||
from other fields; see "MODIFYING METADATA"
|
||||
for details
|
||||
--xattrs Write metadata to the video file's xattrs
|
||||
(using dublin core and xdg standards)
|
||||
--fixup POLICY Automatically correct known faults of the
|
||||
@@ -700,12 +702,19 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t
|
||||
path to the binary or its containing
|
||||
directory
|
||||
--exec CMD Execute a command on the file after
|
||||
downloading and post-processing, similar to
|
||||
find's -exec syntax. Example: --exec 'adb
|
||||
push {} /sdcard/Music/ && rm {}'
|
||||
downloading and post-processing. Similar
|
||||
syntax to the output template can be used
|
||||
to pass any field as arguments to the
|
||||
command. An additional field "filepath"
|
||||
that contains the final path of the
|
||||
downloaded file is also available. If no
|
||||
fields are passed, "%(filepath)s" is
|
||||
appended to the end of the command
|
||||
--convert-subs FORMAT Convert the subtitles to another format
|
||||
(currently supported: srt|ass|vtt|lrc)
|
||||
(Alias: --convert-subtitles)
|
||||
--convert-thumbnails FORMAT Convert the thumbnails to another format
|
||||
(currently supported: jpg)
|
||||
--split-chapters Split video into multiple files based on
|
||||
internal chapters. The "chapter:" prefix
|
||||
can be used with "--paths" and "--output"
|
||||
@@ -828,9 +837,21 @@ The `-o` option is used to indicate a template for the output file names while `
|
||||
|
||||
**tl;dr:** [navigate me to examples](#output-template-examples).
|
||||
|
||||
The basic usage of `-o` is not to set any template arguments when downloading a single file, like in `yt-dlp -o funny_video.flv "https://some/video"` (hard-coding file extension like this is not recommended). However, it may contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [python string formatting operations](https://docs.python.org/2/library/stdtypes.html#string-formatting). For example, `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by formatting operations. Date/time fields can also be formatted according to [strftime formatting](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes) by specifying it inside the parantheses seperated from the field name using a `>`. For example, `%(duration>%H-%M-%S)s`.
|
||||
The simplest usage of `-o` is not to set any template arguments when downloading a single file, like in `yt-dlp -o funny_video.flv "https://some/video"` (hard-coding file extension like this is _not_ recommended and could break some post-processing).
|
||||
|
||||
Additionally, you can set different output templates for the various metadata files seperately from the general output template by specifying the type of file followed by the template seperated by a colon ":". The different filetypes supported are `subtitle`, `thumbnail`, `description`, `annotation`, `infojson`, `pl_description`, `pl_infojson`, `chapter`. For example, `-o '%(title)s.%(ext)s' -o 'thumbnail:%(title)s\%(title)s.%(ext)s'` will put the thumbnails in a folder with the same name as the video.
|
||||
It may however also contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [python string formatting operations](https://docs.python.org/2/library/stdtypes.html#string-formatting). For example, `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by formatting operations.
|
||||
|
||||
The field names themselves (the part inside the parenthesis) can also have some special formatting:
|
||||
1. **Date/time Formatting**: Date/time fields can be formatted according to [strftime formatting](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes) by specifying it separated from the field name using a `>`. Eg: `%(duration>%H-%M-%S)s` or `%(upload_date>%Y-%m-%d)s`
|
||||
2. **Offset numbers**: Numeric fields can have an initial offset specified by using a `+` separator. Eg: `%(playlist_index+10)03d`. This can also be used in conjunction with the date-time formatting. Eg: `%(epoch+-3600>%H-%M-%S)s`
|
||||
3. **Object traversal**: The dictionaries and lists available in metadata can be traversed by using a `.` (dot) separator. Eg: `%(tags.0)s` or `%(subtitles.en.-1.ext)`. Note that the fields that become available using this method are not listed below. Use `-j` to see such fields
|
||||
|
||||
To summarize, the general syntax for a field is:
|
||||
```
|
||||
%(name[.keys][+offset][>strf])[flags][width][.precision][length]type
|
||||
```
|
||||
|
||||
Additionally, you can set different output templates for the various metadata files separately from the general output template by specifying the type of file followed by the template separated by a colon `:`. The different file types supported are `subtitle`, `thumbnail`, `description`, `annotation`, `infojson`, `pl_description`, `pl_infojson`, `chapter`. For example, `-o '%(title)s.%(ext)s' -o 'thumbnail:%(title)s\%(title)s.%(ext)s'` will put the thumbnails in a folder with the same name as the video.
|
||||
|
||||
The available fields are:
|
||||
|
||||
@@ -923,7 +944,7 @@ Available for the media that is a track or a part of a music album:
|
||||
- `disc_number` (numeric): Number of the disc or other physical medium the track belongs to
|
||||
- `release_year` (numeric): Year (YYYY) when the album was released
|
||||
|
||||
Available when using `--split-chapters` for videos with internal chapters:
|
||||
Available for `chapter:` prefix when using `--split-chapters` for videos with internal chapters:
|
||||
|
||||
- `section_title` (string): Title of the chapter
|
||||
- `section_number` (numeric): Number of the chapter within the file
|
||||
@@ -962,7 +983,7 @@ youtube-dl_test_video_.mp4 # A simple file name
|
||||
# Download YouTube playlist videos in separate directory indexed by video order in a playlist
|
||||
$ yt-dlp -o '%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s' https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re
|
||||
|
||||
# Download YouTube playlist videos in seperate directories according to their uploaded year
|
||||
# Download YouTube playlist videos in separate directories according to their uploaded year
|
||||
$ yt-dlp -o '%(upload_date>%Y)s/%(title)s.%(ext)s' https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re
|
||||
|
||||
# Download all playlists of YouTube channel/user keeping each playlist in separate directory:
|
||||
@@ -983,19 +1004,20 @@ $ yt-dlp -o - BaW_jenozKc
|
||||
By default, yt-dlp tries to download the best available quality if you **don't** pass any options.
|
||||
This is generally equivalent to using `-f bestvideo*+bestaudio/best`. However, if multiple audiostreams is enabled (`--audio-multistreams`), the default format changes to `-f bestvideo+bestaudio/best`. Similarly, if ffmpeg is unavailable, or if you use yt-dlp to stream to `stdout` (`-o -`), the default becomes `-f best/bestvideo+bestaudio`.
|
||||
|
||||
The general syntax for format selection is `--f FORMAT` (or `--format FORMAT`) where `FORMAT` is a *selector expression*, i.e. an expression that describes format or formats you would like to download.
|
||||
The general syntax for format selection is `-f FORMAT` (or `--format FORMAT`) where `FORMAT` is a *selector expression*, i.e. an expression that describes format or formats you would like to download.
|
||||
|
||||
**tl;dr:** [navigate me to examples](#format-selection-examples).
|
||||
|
||||
The simplest case is requesting a specific format, for example with `-f 22` you can download the format with format code equal to 22. You can get the list of available format codes for particular video using `--list-formats` or `-F`. Note that these format codes are extractor specific.
|
||||
The simplest case is requesting a specific format, for example with `-f 22` you can download the format with format code equal to 22. You can get the list of available format codes for particular video using `--list-formats` or `-F`. Note that these format codes are extractor specific.
|
||||
|
||||
You can also use a file extension (currently `3gp`, `aac`, `flv`, `m4a`, `mp3`, `mp4`, `ogg`, `wav`, `webm` are supported) to download the best quality format of a particular file extension served as a single file, e.g. `-f webm` will download the best quality format with the `webm` extension served as a single file.
|
||||
|
||||
You can also use special names to select particular edge case formats:
|
||||
|
||||
- `all`: Select all formats
|
||||
- `b*`, `best*`: Select the best quality format irrespective of whether it contains video or audio.
|
||||
- `w*`, `worst*`: Select the worst quality format irrespective of whether it contains video or audio.
|
||||
- `mergeall`: Select and merge all formats (Must be used with `--audio-multistreams`, `--video-multistreams` or both)
|
||||
- `b*`, `best*`: Select the best quality format irrespective of whether it contains video or audio
|
||||
- `w*`, `worst*`: Select the worst quality format irrespective of whether it contains video or audio
|
||||
- `b`, `best`: Select the best quality format that contains both video and audio. Equivalent to `best*[vcodec!=none][acodec!=none]`
|
||||
- `w`, `worst`: Select the worst quality format that contains both video and audio. Equivalent to `worst*[vcodec!=none][acodec!=none]`
|
||||
- `bv`, `bestvideo`: Select the best quality video-only format. Equivalent to `best*[acodec=none]`
|
||||
@@ -1007,7 +1029,9 @@ You can also use special names to select particular edge case formats:
|
||||
- `ba*`, `bestaudio*`: Select the best quality format that contains audio. It may also contain video. Equivalent to `best*[acodec!=none]`
|
||||
- `wa*`, `worstaudio*`: Select the worst quality format that contains audio. It may also contain video. Equivalent to `worst*[acodec!=none]`
|
||||
|
||||
For example, to download the worst quality video-only format you can use `-f worstvideo`. It is however recomended to never actually use `worst` and related options. When your format selector is `worst`, the format which is worst in all respects is selected. Most of the time, what you actually want is the video with the smallest filesize instead. So it is generally better to use `-f best -S +size,+br,+res,+fps` instead of `-f worst`. See [sorting formats](#sorting-formats) for more details.
|
||||
For example, to download the worst quality video-only format you can use `-f worstvideo`. It is however recommended not to use `worst` and related options. When your format selector is `worst`, the format which is worst in all respects is selected. Most of the time, what you actually want is the video with the smallest filesize instead. So it is generally better to use `-f best -S +size,+br,+res,+fps` instead of `-f worst`. See [sorting formats](#sorting-formats) for more details.
|
||||
|
||||
You can select the n'th best format of a type by using `best<type>.<n>`. For example, `best.2` will select the 2nd best combined format. Similarly, `bv*.3` will select the 3rd best format that contains a video stream.
|
||||
|
||||
If you want to download multiple videos and they don't have the same formats available, you can specify the order of preference using slashes. Note that formats on the left hand side are preferred, for example `-f 22/17/18` will download format 22 if it's available, otherwise it will download format 17 if it's available, otherwise it will download format 18 if it's available, otherwise it will complain that no suitable formats are available for download.
|
||||
|
||||
@@ -1042,7 +1066,7 @@ Also filtering work for comparisons `=` (equals), `^=` (starts with), `$=` (ends
|
||||
|
||||
Any string comparison may be prefixed with negation `!` in order to produce an opposite comparison, e.g. `!*=` (does not contain).
|
||||
|
||||
Note that none of the aforementioned meta fields are guaranteed to be present since this solely depends on the metadata obtained by particular extractor, i.e. the metadata offered by the video hoster. Any other field made available by the extractor can also be used for filtering.
|
||||
Note that none of the aforementioned meta fields are guaranteed to be present since this solely depends on the metadata obtained by particular extractor, i.e. the metadata offered by the website. Any other field made available by the extractor can also be used for filtering.
|
||||
|
||||
Formats for which the value is not known are excluded unless you put a question mark (`?`) after the operator. You can combine format filters, so `-f "[height<=?720][tbr>500]"` selects up to 720p videos (or videos where the height is not known) with a bitrate of at least 500 KBit/s. You can also use the filters with `all` to download all formats that satisfy the filter. For example, `-f "all[vcodec=none]"` selects all audio-only formats.
|
||||
|
||||
@@ -1058,7 +1082,7 @@ You can change the criteria for being considered the `best` by using `-S` (`--fo
|
||||
- `lang`: Language preference as given by the extractor
|
||||
- `quality`: The quality of the format as given by the extractor
|
||||
- `source`: Preference of the source as given by the extractor
|
||||
- `proto`: Protocol used for download (`https`/`ftps` > `http`/`ftp` > `m3u8-native` > `m3u8` > `http-dash-segments` > other > `mms`/`rtsp` > unknown > `f4f`/`f4m`)
|
||||
- `proto`: Protocol used for download (`https`/`ftps` > `http`/`ftp` > `m3u8_native` > `m3u8` > `http_dash_segments` > other > `mms`/`rtsp` > unknown > `f4f`/`f4m`)
|
||||
- `vcodec`: Video Codec (`av01` > `vp9.2` > `vp9` > `h265` > `h264` > `vp8` > `h263` > `theora` > other > unknown)
|
||||
- `acodec`: Audio Codec (`opus` > `vorbis` > `aac` > `mp4a` > `mp3` > `ac3` > `dts` > other > unknown)
|
||||
- `codec`: Equivalent to `vcodec,acodec`
|
||||
@@ -1078,11 +1102,11 @@ You can change the criteria for being considered the `best` by using `-S` (`--fo
|
||||
- `br`: Equivalent to using `tbr,vbr,abr`
|
||||
- `asr`: Audio sample rate in Hz
|
||||
|
||||
Note that any other **numerical** field made available by the extractor can also be used. All fields, unless specified otherwise, are sorted in decending order. To reverse this, prefix the field with a `+`. Eg: `+res` prefers format with the smallest resolution. Additionally, you can suffix a prefered value for the fields, seperated by a `:`. Eg: `res:720` prefers larger videos, but no larger than 720p and the smallest video if there are no videos less than 720p. For `codec` and `ext`, you can provide two prefered values, the first for video and the second for audio. Eg: `+codec:avc:m4a` (equivalent to `+vcodec:avc,+acodec:m4a`) sets the video codec preference to `h264` > `h265` > `vp9` > `vp9.2` > `av01` > `vp8` > `h263` > `theora` and audio codec preference to `mp4a` > `aac` > `vorbis` > `opus` > `mp3` > `ac3` > `dts`. You can also make the sorting prefer the nearest values to the provided by using `~` as the delimiter. Eg: `filesize~1G` prefers the format with filesize closest to 1 GiB.
|
||||
Note that any other **numerical** field made available by the extractor can also be used. All fields, unless specified otherwise, are sorted in descending order. To reverse this, prefix the field with a `+`. Eg: `+res` prefers format with the smallest resolution. Additionally, you can suffix a preferred value for the fields, separated by a `:`. Eg: `res:720` prefers larger videos, but no larger than 720p and the smallest video if there are no videos less than 720p. For `codec` and `ext`, you can provide two preferred values, the first for video and the second for audio. Eg: `+codec:avc:m4a` (equivalent to `+vcodec:avc,+acodec:m4a`) sets the video codec preference to `h264` > `h265` > `vp9` > `vp9.2` > `av01` > `vp8` > `h263` > `theora` and audio codec preference to `mp4a` > `aac` > `vorbis` > `opus` > `mp3` > `ac3` > `dts`. You can also make the sorting prefer the nearest values to the provided by using `~` as the delimiter. Eg: `filesize~1G` prefers the format with filesize closest to 1 GiB.
|
||||
|
||||
The fields `hasvid`, `ie_pref`, `lang` are always given highest priority in sorting, irrespective of the user-defined order. This behaviour can be changed by using `--force-format-sort`. Apart from these, the default order used is: `quality,res,fps,codec:vp9.2,size,br,asr,proto,ext,hasaud,source,id`. Note that the extractors may override this default order, but they cannot override the user-provided order.
|
||||
|
||||
If your format selector is `worst`, the last item is selected after sorting. This means it will select the format that is worst in all repects. Most of the time, what you actually want is the video with the smallest filesize instead. So it is generally better to use `-f best -S +size,+br,+res,+fps`.
|
||||
If your format selector is `worst`, the last item is selected after sorting. This means it will select the format that is worst in all respects. Most of the time, what you actually want is the video with the smallest filesize instead. So it is generally better to use `-f best -S +size,+br,+res,+fps`.
|
||||
|
||||
**Tip**: You can use the `-v -F` to see how the formats have been sorted (worst to best).
|
||||
|
||||
@@ -1091,7 +1115,7 @@ If your format selector is `worst`, the last item is selected after sorting. Thi
|
||||
Note that on Windows you may need to use double quotes instead of single.
|
||||
|
||||
```bash
|
||||
# Download and merge the best best video-only format and the best audio-only format,
|
||||
# Download and merge the best video-only format and the best audio-only format,
|
||||
# or download the best combined format if video-only format is not available
|
||||
$ yt-dlp -f 'bv+ba/b'
|
||||
|
||||
@@ -1107,10 +1131,17 @@ $ yt-dlp
|
||||
# by default, bestvideo and bestaudio will have the same file name.
|
||||
$ yt-dlp -f 'bv,ba' -o '%(title)s.f%(format_id)s.%(ext)s'
|
||||
|
||||
# Download and merge the best format that has a video stream,
|
||||
# and all audio-only formats into one file
|
||||
$ yt-dlp -f 'bv*+mergeall[vcodec=none]' --audio-multistreams
|
||||
|
||||
# Download and merge the best format that has a video stream,
|
||||
# and the best 2 audio-only formats into one file
|
||||
$ yt-dlp -f 'bv*+ba+ba.2' --audio-multistreams
|
||||
|
||||
|
||||
# The following examples show the old method (without -S) of format selection
|
||||
# and how to use -S to achieve a similar but better result
|
||||
# and how to use -S to achieve a similar but (generally) better result
|
||||
|
||||
# Download the worst video available (old method)
|
||||
$ yt-dlp -f 'wv*+wa/w'
|
||||
@@ -1186,23 +1217,50 @@ $ yt-dlp -S '+codec:h264'
|
||||
|
||||
# More complex examples
|
||||
|
||||
# Download the best video no better than 720p prefering framerate greater than 30,
|
||||
# or the worst video (still prefering framerate greater than 30) if there is no such video
|
||||
# Download the best video no better than 720p preferring framerate greater than 30,
|
||||
# or the worst video (still preferring framerate greater than 30) if there is no such video
|
||||
$ yt-dlp -f '((bv*[fps>30]/bv*)[height<=720]/(wv*[fps>30]/wv*)) + ba / (b[fps>30]/b)[height<=720]/(w[fps>30]/w)'
|
||||
|
||||
# Download the video with the largest resolution no better than 720p,
|
||||
# or the video with the smallest resolution available if there is no such video,
|
||||
# prefering larger framerate for formats with the same resolution
|
||||
# or the video with the smallest resolution available if there is no such video,
|
||||
# preferring larger framerate for formats with the same resolution
|
||||
$ yt-dlp -S 'res:720,fps'
|
||||
|
||||
|
||||
|
||||
# Download the video with smallest resolution no worse than 480p,
|
||||
# or the video with the largest resolution available if there is no such video,
|
||||
# prefering better codec and then larger total bitrate for the same resolution
|
||||
# preferring better codec and then larger total bitrate for the same resolution
|
||||
$ yt-dlp -S '+res:480,codec,br'
|
||||
```
|
||||
|
||||
# MODIFYING METADATA
|
||||
|
||||
The metadata obtained the the extractors can be modified by using `--parse-metadata FROM:TO`. The general syntax is to give the name of a field or a template (with similar syntax to [output template](#output-template)) to extract data from, and the format to interpret it as, separated by a colon `:`. Either a [python regular expression](https://docs.python.org/3/library/re.html#regular-expression-syntax) with named capture groups or a similar syntax to the [output template](#output-template) (only `%(field)s` formatting is supported) can be used for `TO`. The option can be used multiple times to parse and modify various fields.
|
||||
|
||||
Note that any field created by this can be used in the [output template](#output-template) and will also affect the media file's metadata added when using `--add-metadata`.
|
||||
|
||||
You can also use this to change only the metadata that is embedded in the media file. To do this, set the value of the corresponding field with a `meta_` prefix. For example, any value you set to `meta_description` field will be added to the `description` field in the file. You can use this to set a different "description" and "synopsis", for example.
|
||||
|
||||
## Modifying metadata examples
|
||||
|
||||
Note that on Windows you may need to use double quotes instead of single.
|
||||
|
||||
```bash
|
||||
# Interpret the title as "Artist - Title"
|
||||
$ yt-dlp --parse-metadata 'title:%(artist)s - %(title)s'
|
||||
|
||||
# Regex example
|
||||
$ yt-dlp --parse-metadata 'description:Artist - (?P<artist>.+)'
|
||||
|
||||
# Set title as "Series name S01E05"
|
||||
$ yt-dlp --parse-metadata '%(series)s S%(season_number)02dE%(episode_number)02d:%(title)s'
|
||||
|
||||
# Set "comment" field in video metadata using description instead of webpage_url
|
||||
$ yt-dlp --parse-metadata 'description:(?s)(?P<meta_comment>.+)' --add-metadata
|
||||
|
||||
```
|
||||
|
||||
# PLUGINS
|
||||
|
||||
Plugins are loaded from `<root-dir>/ytdlp_plugins/<type>/__init__.py`. Currently only `extractor` plugins are supported. Support for `downloader` and `postprocessor` plugins may be added in the future. See [ytdlp_plugins](ytdlp_plugins) for example.
|
||||
@@ -1213,34 +1271,57 @@ Plugins are loaded from `<root-dir>/ytdlp_plugins/<type>/__init__.py`. Currently
|
||||
|
||||
These are all the deprecated options and the current alternative to achieve the same effect
|
||||
|
||||
#### Not recommended
|
||||
While these options still work, their use is not recommended since there are other alternatives to achieve the same
|
||||
|
||||
--all-formats -f all
|
||||
--all-subs --sub-langs all --write-subs
|
||||
--autonumber-size NUMBER Use string formatting. Eg: %(autonumber)03d
|
||||
--autonumber-start NUMBER Use internal field formatting like %(autonumber+NUMBER)s
|
||||
--metadata-from-title FORMAT --parse-metadata "%(title)s:FORMAT"
|
||||
--hls-prefer-native --downloader "m3u8:native"
|
||||
--hls-prefer-ffmpeg --downloader "m3u8:ffmpeg"
|
||||
--sponskrub-args ARGS --ppa "sponskrub:ARGS"
|
||||
--test Used by developers for testing extractors. Not intended for the end user
|
||||
|
||||
|
||||
#### Old aliases
|
||||
These are aliases that are no longer documented for various reasons
|
||||
|
||||
--avconv-location --ffmpeg-location
|
||||
--cn-verification-proxy URL --geo-verification-proxy URL
|
||||
--dump-headers --print-traffic
|
||||
--dump-intermediate-pages --dump-pages
|
||||
--force-write-download-archive --force-write-archive
|
||||
--load-info --load-info-json
|
||||
--no-split-tracks --no-split-chapters
|
||||
--no-write-srt --no-write-subs
|
||||
--prefer-unsecure --prefer-insecure
|
||||
--rate-limit RATE --limit-rate RATE
|
||||
--split-tracks --split-chapters
|
||||
--srt-lang LANGS --sub-langs LANGS
|
||||
--trim-file-names LENGTH --trim-filenames LENGTH
|
||||
--write-srt --write-subs
|
||||
--yes-overwrites --force-overwrites
|
||||
|
||||
#### No longer supported
|
||||
These options may no longer work as intended
|
||||
|
||||
--prefer-avconv avconv is not officially supported by yt-dlp (Alias: --no-prefer-ffmpeg)
|
||||
--prefer-ffmpeg Default (Alias: --no-prefer-avconv)
|
||||
-C, --call-home Not implemented
|
||||
--no-call-home Default
|
||||
--include-ads No longer supported
|
||||
--no-include-ads Default
|
||||
--youtube-print-sig-code No longer supported
|
||||
--id -o "%(id)s.%(ext)s"
|
||||
-A, --auto-number -o "%(autonumber)s-%(id)s.%(ext)s"
|
||||
-t, --title -o "%(title)s-%(id)s.%(ext)s"
|
||||
-l, --literal -o accepts literal names
|
||||
--autonumber-size NUMBER Use string formatting. Eg: %(autonumber)03d
|
||||
--metadata-from-title FORMAT --parse-metadata "title:FORMAT"
|
||||
--prefer-avconv avconv is no longer officially supported (Alias: --no-prefer-ffmpeg)
|
||||
--prefer-ffmpeg Default (Alias: --no-prefer-avconv)
|
||||
--avconv-location avconv is no longer officially supported
|
||||
-C, --call-home Not implemented
|
||||
--no-call-home Default
|
||||
--write-srt --write-subs
|
||||
--no-write-srt --no-write-subs
|
||||
--srt-lang LANGS --sub-langs LANGS
|
||||
--prefer-unsecure --prefer-insecure
|
||||
--rate-limit RATE --limit-rate RATE
|
||||
--force-write-download-archive --force-write-archive
|
||||
--dump-intermediate-pages --dump-pages
|
||||
--dump-headers --print-traffic
|
||||
--youtube-print-sig-code No longer supported
|
||||
--trim-file-names LENGTH --trim-filenames LENGTH
|
||||
--yes-overwrites --force-overwrites
|
||||
--load-info --load-info-json
|
||||
--split-tracks --split-chapters
|
||||
--no-split-tracks --no-split-chapters
|
||||
--sponskrub-args ARGS --ppa "sponskrub:ARGS"
|
||||
--test Only used for testing extractors
|
||||
|
||||
#### Removed
|
||||
Currently, there are no options that have been completely removed. But there are plans to remove the old output options `-A`,`-t`, `-l`, `--id` (which have been deprecated since 2014) in the near future. If you are still using these, please move to using `--output` instead
|
||||
|
||||
|
||||
|
||||
# MORE
|
||||
|
||||
@@ -14,9 +14,14 @@ lazy_extractors_filename = sys.argv[1]
|
||||
if os.path.exists(lazy_extractors_filename):
|
||||
os.remove(lazy_extractors_filename)
|
||||
|
||||
# Block plugins from loading
|
||||
os.rename('ytdlp_plugins', 'ytdlp_plugins_blocked')
|
||||
|
||||
from yt_dlp.extractor import _ALL_CLASSES
|
||||
from yt_dlp.extractor.common import InfoExtractor, SearchInfoExtractor
|
||||
|
||||
os.rename('ytdlp_plugins_blocked', 'ytdlp_plugins')
|
||||
|
||||
with open('devscripts/lazy_load_template.py', 'rt') as f:
|
||||
module_template = f.read()
|
||||
|
||||
|
||||
@@ -69,6 +69,7 @@ PyInstaller.__main__.run([
|
||||
'--onefile',
|
||||
'--icon=devscripts/cloud.ico',
|
||||
'--exclude-module=youtube_dl',
|
||||
'--exclude-module=youtube_dlc',
|
||||
'--exclude-module=test',
|
||||
'--exclude-module=ytdlp_plugins',
|
||||
'--hidden-import=mutagen',
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
- **20min**
|
||||
- **220.ro**
|
||||
- **23video**
|
||||
- **247sports**
|
||||
- **24video**
|
||||
- **3qsdn**: 3Q SDN
|
||||
- **3sat**
|
||||
@@ -171,7 +172,8 @@
|
||||
- **cbsnews**: CBS News
|
||||
- **cbsnews:embed**
|
||||
- **cbsnews:livevideo**: CBS News Live Videos
|
||||
- **CBSSports**
|
||||
- **cbssports**
|
||||
- **cbssports:embed**
|
||||
- **CCMA**
|
||||
- **CCTV**: 央视网
|
||||
- **CDA**
|
||||
@@ -249,6 +251,8 @@
|
||||
- **DiscoveryGoPlaylist**
|
||||
- **DiscoveryNetworksDe**
|
||||
- **DiscoveryPlus**
|
||||
- **DiscoveryPlusIndia**
|
||||
- **DiscoveryPlusIndiaShow**
|
||||
- **DiscoveryVR**
|
||||
- **Disney**
|
||||
- **dlive:stream**
|
||||
@@ -457,6 +461,8 @@
|
||||
- **kuwo:singer**: 酷我音乐 - 歌手
|
||||
- **kuwo:song**: 酷我音乐
|
||||
- **la7.it**
|
||||
- **la7.it:pod:episode**
|
||||
- **la7.it:podcast**
|
||||
- **laola1tv**
|
||||
- **laola1tv:embed**
|
||||
- **lbry**
|
||||
@@ -480,6 +486,8 @@
|
||||
- **limelight**
|
||||
- **limelight:channel**
|
||||
- **limelight:channel_list**
|
||||
- **LineLive**
|
||||
- **LineLiveChannel**
|
||||
- **LineTV**
|
||||
- **linkedin:learning**
|
||||
- **linkedin:learning:course**
|
||||
@@ -506,6 +514,7 @@
|
||||
- **mangomolo:live**
|
||||
- **mangomolo:video**
|
||||
- **ManyVids**
|
||||
- **MaoriTV**
|
||||
- **Markiza**
|
||||
- **MarkizaPage**
|
||||
- **massengeschmack.tv**
|
||||
@@ -606,6 +615,7 @@
|
||||
- **ndr:embed**
|
||||
- **ndr:embed:base**
|
||||
- **NDTV**
|
||||
- **Nebula**
|
||||
- **NerdCubedFeed**
|
||||
- **netease:album**: 网易云音乐 - 专辑
|
||||
- **netease:djradio**: 网易云音乐 - 电台
|
||||
@@ -624,6 +634,7 @@
|
||||
- **NextTV**: 壹電視
|
||||
- **Nexx**
|
||||
- **NexxEmbed**
|
||||
- **NFHSNetwork**
|
||||
- **nfl.com** (Currently broken)
|
||||
- **nfl.com:article** (Currently broken)
|
||||
- **NhkVod**
|
||||
@@ -707,6 +718,9 @@
|
||||
- **OutsideTV**
|
||||
- **PacktPub**
|
||||
- **PacktPubCourse**
|
||||
- **PalcoMP3:artist**
|
||||
- **PalcoMP3:song**
|
||||
- **PalcoMP3:video**
|
||||
- **pandora.tv**: 판도라TV
|
||||
- **ParamountNetwork**
|
||||
- **parliamentlive.tv**: UK parliament videos
|
||||
@@ -1002,6 +1016,7 @@
|
||||
- **TruTV**
|
||||
- **Tube8**
|
||||
- **TubiTv**
|
||||
- **TubiTvShow**
|
||||
- **Tumblr**
|
||||
- **tunein:clip**
|
||||
- **tunein:program**
|
||||
@@ -1093,6 +1108,7 @@
|
||||
- **Vidbit**
|
||||
- **Viddler**
|
||||
- **Videa**
|
||||
- **video.arnes.si**: Arnes Video
|
||||
- **video.google:search**: Google Video search
|
||||
- **video.sky.it**
|
||||
- **video.sky.it:live**
|
||||
|
||||
@@ -311,8 +311,8 @@ class TestFormatSelection(unittest.TestCase):
|
||||
self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())
|
||||
|
||||
def test_youtube_format_selection(self):
|
||||
# FIXME: Rewrite in accordance with the new format sorting options
|
||||
return
|
||||
# disabled for now - this needs some changes
|
||||
|
||||
order = [
|
||||
'38', '37', '46', '22', '45', '35', '44', '18', '34', '43', '6', '5', '17', '36', '13',
|
||||
@@ -601,6 +601,26 @@ class TestYoutubeDL(unittest.TestCase):
|
||||
self.assertTrue(subs)
|
||||
self.assertEqual(set(subs.keys()), set(['es', 'fr']))
|
||||
|
||||
result = get_info({'writesubtitles': True, 'subtitleslangs': ['all', '-en']})
|
||||
subs = result['requested_subtitles']
|
||||
self.assertTrue(subs)
|
||||
self.assertEqual(set(subs.keys()), set(['es', 'fr']))
|
||||
|
||||
result = get_info({'writesubtitles': True, 'subtitleslangs': ['en', 'fr', '-en']})
|
||||
subs = result['requested_subtitles']
|
||||
self.assertTrue(subs)
|
||||
self.assertEqual(set(subs.keys()), set(['fr']))
|
||||
|
||||
result = get_info({'writesubtitles': True, 'subtitleslangs': ['-en', 'en']})
|
||||
subs = result['requested_subtitles']
|
||||
self.assertTrue(subs)
|
||||
self.assertEqual(set(subs.keys()), set(['en']))
|
||||
|
||||
result = get_info({'writesubtitles': True, 'subtitleslangs': ['e.+']})
|
||||
subs = result['requested_subtitles']
|
||||
self.assertTrue(subs)
|
||||
self.assertEqual(set(subs.keys()), set(['es', 'en']))
|
||||
|
||||
result = get_info({'writesubtitles': True, 'writeautomaticsub': True, 'subtitleslangs': ['es', 'pt']})
|
||||
subs = result['requested_subtitles']
|
||||
self.assertTrue(subs)
|
||||
@@ -635,6 +655,8 @@ class TestYoutubeDL(unittest.TestCase):
|
||||
'height': 1080,
|
||||
'title1': '$PATH',
|
||||
'title2': '%PATH%',
|
||||
'timestamp': 1618488000,
|
||||
'formats': [{'id': 'id1'}, {'id': 'id2'}]
|
||||
}
|
||||
|
||||
def fname(templ, na_placeholder='NA'):
|
||||
@@ -651,6 +673,7 @@ class TestYoutubeDL(unittest.TestCase):
|
||||
# Or by provided placeholder
|
||||
self.assertEqual(fname(NA_TEST_OUTTMPL, na_placeholder='none'), 'none-none-1234.mp4')
|
||||
self.assertEqual(fname(NA_TEST_OUTTMPL, na_placeholder=''), '--1234.mp4')
|
||||
self.assertEqual(fname('%(height)s.%(ext)s'), '1080.mp4')
|
||||
self.assertEqual(fname('%(height)d.%(ext)s'), '1080.mp4')
|
||||
self.assertEqual(fname('%(height)6d.%(ext)s'), ' 1080.mp4')
|
||||
self.assertEqual(fname('%(height)-6d.%(ext)s'), '1080 .mp4')
|
||||
@@ -668,6 +691,12 @@ class TestYoutubeDL(unittest.TestCase):
|
||||
self.assertEqual(fname('%%(width)06d.%(ext)s'), '%(width)06d.mp4')
|
||||
self.assertEqual(fname('Hello %(title1)s'), 'Hello $PATH')
|
||||
self.assertEqual(fname('Hello %(title2)s'), 'Hello %PATH%')
|
||||
self.assertEqual(fname('%(timestamp+-1000>%H-%M-%S)s'), '11-43-20')
|
||||
self.assertEqual(fname('%(id+1)05d'), '01235')
|
||||
self.assertEqual(fname('%(width+100)05d'), 'NA')
|
||||
self.assertEqual(fname('%(formats.0)s').replace("u", ""), "{'id' - 'id1'}")
|
||||
self.assertEqual(fname('%(formats.-1.id)s'), 'id2')
|
||||
self.assertEqual(fname('%(formats.2)s'), 'NA')
|
||||
|
||||
def test_format_note(self):
|
||||
ydl = YoutubeDL()
|
||||
|
||||
@@ -72,15 +72,6 @@ class TestAllURLsMatching(unittest.TestCase):
|
||||
self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url'])
|
||||
self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url'])
|
||||
|
||||
def test_youtube_extract(self):
|
||||
assertExtractId = lambda url, id: self.assertEqual(YoutubeIE.extract_id(url), id)
|
||||
assertExtractId('http://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc')
|
||||
assertExtractId('https://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc')
|
||||
assertExtractId('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc', 'BaW_jenozKc')
|
||||
assertExtractId('https://www.youtube.com/watch_popup?v=BaW_jenozKc', 'BaW_jenozKc')
|
||||
assertExtractId('http://www.youtube.com/watch?v=BaW_jenozKcsharePLED17F32AD9753930', 'BaW_jenozKc')
|
||||
assertExtractId('BaW_jenozKc', 'BaW_jenozKc')
|
||||
|
||||
def test_facebook_matching(self):
|
||||
self.assertTrue(FacebookIE.suitable('https://www.facebook.com/Shiniknoh#!/photo.php?v=10153317450565268'))
|
||||
self.assertTrue(FacebookIE.suitable('https://www.facebook.com/cindyweather?fref=ts#!/photo.php?v=10152183998945793'))
|
||||
|
||||
@@ -39,6 +39,16 @@ class TestExecution(unittest.TestCase):
|
||||
_, stderr = p.communicate()
|
||||
self.assertFalse(stderr)
|
||||
|
||||
def test_lazy_extractors(self):
|
||||
try:
|
||||
subprocess.check_call([sys.executable, 'devscripts/make_lazy_extractors.py', 'yt_dlp/extractor/lazy_extractors.py'], cwd=rootDir, stdout=_DEV_NULL)
|
||||
subprocess.check_call([sys.executable, 'test/test_all_urls.py'], cwd=rootDir, stdout=_DEV_NULL)
|
||||
finally:
|
||||
try:
|
||||
os.remove('yt_dlp/extractor/lazy_extractors.py')
|
||||
except (IOError, OSError):
|
||||
pass
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
||||
@@ -14,10 +14,19 @@ from yt_dlp.postprocessor import MetadataFromFieldPP, MetadataFromTitlePP
|
||||
class TestMetadataFromField(unittest.TestCase):
|
||||
def test_format_to_regex(self):
|
||||
pp = MetadataFromFieldPP(None, ['title:%(title)s - %(artist)s'])
|
||||
self.assertEqual(pp._data[0]['regex'], r'(?P<title>[^\r\n]+)\ \-\ (?P<artist>[^\r\n]+)')
|
||||
self.assertEqual(pp._data[0]['regex'], r'(?P<title>.+)\ \-\ (?P<artist>.+)')
|
||||
|
||||
def test_field_to_outtmpl(self):
|
||||
pp = MetadataFromFieldPP(None, ['title:%(title)s : %(artist)s'])
|
||||
self.assertEqual(pp._data[0]['tmpl'], '%(title)s')
|
||||
|
||||
def test_in_out_seperation(self):
|
||||
pp = MetadataFromFieldPP(None, ['%(title)s \\: %(artist)s:%(title)s : %(artist)s'])
|
||||
self.assertEqual(pp._data[0]['in'], '%(title)s : %(artist)s')
|
||||
self.assertEqual(pp._data[0]['out'], '%(title)s : %(artist)s')
|
||||
|
||||
|
||||
class TestMetadataFromTitle(unittest.TestCase):
|
||||
def test_format_to_regex(self):
|
||||
pp = MetadataFromTitlePP(None, '%(title)s - %(artist)s')
|
||||
self.assertEqual(pp._titleregex, r'(?P<title>[^\r\n]+)\ \-\ (?P<artist>[^\r\n]+)')
|
||||
self.assertEqual(pp._titleregex, r'(?P<title>.+)\ \-\ (?P<artist>.+)')
|
||||
|
||||
@@ -23,6 +23,7 @@ from yt_dlp.utils import (
|
||||
clean_html,
|
||||
clean_podcast_url,
|
||||
date_from_str,
|
||||
datetime_from_str,
|
||||
DateRange,
|
||||
detect_exe_version,
|
||||
determine_ext,
|
||||
@@ -311,8 +312,18 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(date_from_str('yesterday'), date_from_str('now-1day'))
|
||||
self.assertEqual(date_from_str('now+7day'), date_from_str('now+1week'))
|
||||
self.assertEqual(date_from_str('now+14day'), date_from_str('now+2week'))
|
||||
self.assertEqual(date_from_str('now+365day'), date_from_str('now+1year'))
|
||||
self.assertEqual(date_from_str('now+30day'), date_from_str('now+1month'))
|
||||
self.assertEqual(date_from_str('20200229+365day'), date_from_str('20200229+1year'))
|
||||
self.assertEqual(date_from_str('20210131+28day'), date_from_str('20210131+1month'))
|
||||
|
||||
def test_datetime_from_str(self):
|
||||
self.assertEqual(datetime_from_str('yesterday', precision='day'), datetime_from_str('now-1day', precision='auto'))
|
||||
self.assertEqual(datetime_from_str('now+7day', precision='day'), datetime_from_str('now+1week', precision='auto'))
|
||||
self.assertEqual(datetime_from_str('now+14day', precision='day'), datetime_from_str('now+2week', precision='auto'))
|
||||
self.assertEqual(datetime_from_str('20200229+365day', precision='day'), datetime_from_str('20200229+1year', precision='auto'))
|
||||
self.assertEqual(datetime_from_str('20210131+28day', precision='day'), datetime_from_str('20210131+1month', precision='auto'))
|
||||
self.assertEqual(datetime_from_str('20210131+59day', precision='day'), datetime_from_str('20210131+2month', precision='auto'))
|
||||
self.assertEqual(datetime_from_str('now+1day', precision='hour'), datetime_from_str('now+24hours', precision='auto'))
|
||||
self.assertEqual(datetime_from_str('now+23hours', precision='hour'), datetime_from_str('now+23hours', precision='auto'))
|
||||
|
||||
def test_daterange(self):
|
||||
_20century = DateRange("19000101", "20000101")
|
||||
|
||||
26
test/test_youtube_misc.py
Normal file
26
test/test_youtube_misc.py
Normal file
@@ -0,0 +1,26 @@
|
||||
#!/usr/bin/env python
|
||||
from __future__ import unicode_literals
|
||||
|
||||
# Allow direct execution
|
||||
import os
|
||||
import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
|
||||
from yt_dlp.extractor import YoutubeIE
|
||||
|
||||
|
||||
class TestYoutubeMisc(unittest.TestCase):
|
||||
def test_youtube_extract(self):
|
||||
assertExtractId = lambda url, id: self.assertEqual(YoutubeIE.extract_id(url), id)
|
||||
assertExtractId('http://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc')
|
||||
assertExtractId('https://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc')
|
||||
assertExtractId('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc', 'BaW_jenozKc')
|
||||
assertExtractId('https://www.youtube.com/watch_popup?v=BaW_jenozKc', 'BaW_jenozKc')
|
||||
assertExtractId('http://www.youtube.com/watch?v=BaW_jenozKcsharePLED17F32AD9753930', 'BaW_jenozKc')
|
||||
assertExtractId('BaW_jenozKc', 'BaW_jenozKc')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
4
yt-dlp.sh
Normal file → Executable file
4
yt-dlp.sh
Normal file → Executable file
@@ -1,2 +1,2 @@
|
||||
#!/bin/bash
|
||||
python3 "$(dirname $(realpath $0))/yt_dlp/__main__.py" "$@"
|
||||
#!/bin/sh
|
||||
exec python3 "$(dirname "$(realpath "$0")")/yt_dlp/__main__.py" "$@"
|
||||
|
||||
@@ -99,6 +99,7 @@ from .utils import (
|
||||
strftime_or_none,
|
||||
subtitles_filename,
|
||||
to_high_limit_path,
|
||||
traverse_dict,
|
||||
UnavailableVideoError,
|
||||
url_basename,
|
||||
version_tuple,
|
||||
@@ -111,9 +112,17 @@ from .utils import (
|
||||
process_communicate_or_kill,
|
||||
)
|
||||
from .cache import Cache
|
||||
from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER, _PLUGIN_CLASSES
|
||||
from .extractor import (
|
||||
gen_extractor_classes,
|
||||
get_info_extractor,
|
||||
_LAZY_LOADER,
|
||||
_PLUGIN_CLASSES
|
||||
)
|
||||
from .extractor.openload import PhantomJSwrapper
|
||||
from .downloader import get_suitable_downloader
|
||||
from .downloader import (
|
||||
get_suitable_downloader,
|
||||
shorten_protocol_name
|
||||
)
|
||||
from .downloader.rtmp import rtmpdump_version
|
||||
from .postprocessor import (
|
||||
FFmpegFixupM3u8PP,
|
||||
@@ -183,6 +192,9 @@ class YoutubeDL(object):
|
||||
simulate: Do not download the video files.
|
||||
format: Video format code. see "FORMAT SELECTION" for more details.
|
||||
allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
|
||||
ignore_no_formats_error: Ignore "No video formats" error. Usefull for
|
||||
extracting metadata even if the video is not actually
|
||||
available for download (experimental)
|
||||
format_sort: How to sort the video formats. see "Sorting Formats"
|
||||
for more details.
|
||||
format_sort_force: Force the given format_sort. see "Sorting Formats"
|
||||
@@ -203,6 +215,8 @@ class YoutubeDL(object):
|
||||
ignoreerrors: Do not stop on download errors
|
||||
(Default True when running yt-dlp,
|
||||
but False when directly accessing YoutubeDL class)
|
||||
skip_playlist_after_errors: Number of allowed failures until the rest of
|
||||
the playlist is skipped
|
||||
force_generic_extractor: Force downloader to use the generic extractor
|
||||
overwrites: Overwrite all video and metadata files if True,
|
||||
overwrite only non-video files if None
|
||||
@@ -233,11 +247,15 @@ class YoutubeDL(object):
|
||||
writedesktoplink: Write a Linux internet shortcut file (.desktop)
|
||||
writesubtitles: Write the video subtitles to a file
|
||||
writeautomaticsub: Write the automatically generated subtitles to a file
|
||||
allsubtitles: Downloads all the subtitles of the video
|
||||
allsubtitles: Deprecated - Use subtitlelangs = ['all']
|
||||
Downloads all the subtitles of the video
|
||||
(requires writesubtitles or writeautomaticsub)
|
||||
listsubtitles: Lists all available subtitles for the video
|
||||
subtitlesformat: The format code for subtitles
|
||||
subtitleslangs: List of languages of the subtitles to download
|
||||
subtitleslangs: List of languages of the subtitles to download (can be regex).
|
||||
The list may contain "all" to refer to all the available
|
||||
subtitles. The language can be prefixed with a "-" to
|
||||
exclude it from the requested languages. Eg: ['all', '-live_chat']
|
||||
keepvideo: Keep the video file after post-processing
|
||||
daterange: A DateRange object, download only if the upload_date is in the range.
|
||||
skip_download: Skip the actual download of the video file
|
||||
@@ -283,10 +301,9 @@ class YoutubeDL(object):
|
||||
postprocessors: A list of dictionaries, each with an entry
|
||||
* key: The name of the postprocessor. See
|
||||
yt_dlp/postprocessor/__init__.py for a list.
|
||||
* _after_move: Optional. If True, run this post_processor
|
||||
after 'MoveFilesAfterDownload'
|
||||
as well as any further keyword arguments for the
|
||||
postprocessor.
|
||||
* when: When to run the postprocessor. Can be one of
|
||||
pre_process|before_dl|post_process|after_move.
|
||||
Assumed to be 'post_process' if not given
|
||||
post_hooks: A list of functions that get called as the final step
|
||||
for each video file, after all postprocessors have been
|
||||
called. The filename will be passed as the only argument.
|
||||
@@ -359,9 +376,13 @@ class YoutubeDL(object):
|
||||
geo_bypass_country
|
||||
|
||||
The following options determine which downloader is picked:
|
||||
external_downloader: Executable of the external downloader to call.
|
||||
None or unset for standard (built-in) downloader.
|
||||
hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv
|
||||
external_downloader: A dictionary of protocol keys and the executable of the
|
||||
external downloader to use for it. The allowed protocols
|
||||
are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
|
||||
Set the value to 'native' to use the native downloader
|
||||
hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
|
||||
or {'m3u8': 'ffmpeg'} instead.
|
||||
Use the native HLS downloader instead of ffmpeg/avconv
|
||||
if True, otherwise use ffmpeg/avconv if False, otherwise
|
||||
use downloader suggested by extractor if None.
|
||||
|
||||
@@ -411,7 +432,7 @@ class YoutubeDL(object):
|
||||
|
||||
params = None
|
||||
_ies = []
|
||||
_pps = {'beforedl': [], 'aftermove': [], 'normal': []}
|
||||
_pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
|
||||
__prepare_filename_warned = False
|
||||
_first_webpage_request = True
|
||||
_download_retcode = None
|
||||
@@ -426,7 +447,7 @@ class YoutubeDL(object):
|
||||
params = {}
|
||||
self._ies = []
|
||||
self._ies_instances = {}
|
||||
self._pps = {'beforedl': [], 'aftermove': [], 'normal': []}
|
||||
self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
|
||||
self.__prepare_filename_warned = False
|
||||
self._first_webpage_request = True
|
||||
self._post_hooks = []
|
||||
@@ -539,7 +560,7 @@ class YoutubeDL(object):
|
||||
when = pp_def['when']
|
||||
del pp_def['when']
|
||||
else:
|
||||
when = 'normal'
|
||||
when = 'post_process'
|
||||
pp = pp_class(self, **compat_kwargs(pp_def))
|
||||
self.add_post_processor(pp, when=when)
|
||||
|
||||
@@ -593,7 +614,7 @@ class YoutubeDL(object):
|
||||
for ie in gen_extractor_classes():
|
||||
self.add_info_extractor(ie)
|
||||
|
||||
def add_post_processor(self, pp, when='normal'):
|
||||
def add_post_processor(self, pp, when='post_process'):
|
||||
"""Add a PostProcessor object to the end of the chain."""
|
||||
self._pps[when].append(pp)
|
||||
pp.set_downloader(self)
|
||||
@@ -776,6 +797,7 @@ class YoutubeDL(object):
|
||||
def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
|
||||
""" Make the template and info_dict suitable for substitution (outtmpl % info_dict)"""
|
||||
template_dict = dict(info_dict)
|
||||
na = self.params.get('outtmpl_na_placeholder', 'NA')
|
||||
|
||||
# duration_string
|
||||
template_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
|
||||
@@ -801,18 +823,10 @@ class YoutubeDL(object):
|
||||
elif template_dict.get('width'):
|
||||
template_dict['resolution'] = '%dx?' % template_dict['width']
|
||||
|
||||
if sanitize is None:
|
||||
sanitize = lambda k, v: v
|
||||
template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
|
||||
for k, v in template_dict.items()
|
||||
if v is not None and not isinstance(v, (list, tuple, dict)))
|
||||
na = self.params.get('outtmpl_na_placeholder', 'NA')
|
||||
template_dict = collections.defaultdict(lambda: na, template_dict)
|
||||
|
||||
# For fields playlist_index and autonumber convert all occurrences
|
||||
# of %(field)s to %(field)0Nd for backward compatibility
|
||||
field_size_compat_map = {
|
||||
'playlist_index': len(str(template_dict['n_entries'])),
|
||||
'playlist_index': len(str(template_dict.get('n_entries', na))),
|
||||
'autonumber': autonumber_size,
|
||||
}
|
||||
FIELD_SIZE_COMPAT_RE = r'(?<!%)%\((?P<field>autonumber|playlist_index)\)s'
|
||||
@@ -824,32 +838,51 @@ class YoutubeDL(object):
|
||||
outtmpl)
|
||||
|
||||
numeric_fields = list(self._NUMERIC_FIELDS)
|
||||
if sanitize is None:
|
||||
sanitize = lambda k, v: v
|
||||
|
||||
# Format date
|
||||
FORMAT_DATE_RE = FORMAT_RE.format(r'(?P<key>(?P<field>\w+)>(?P<format>.+?))')
|
||||
for mobj in re.finditer(FORMAT_DATE_RE, outtmpl):
|
||||
conv_type, field, frmt, key = mobj.group('type', 'field', 'format', 'key')
|
||||
if key in template_dict:
|
||||
continue
|
||||
value = strftime_or_none(template_dict.get(field), frmt, na)
|
||||
if conv_type in 'crs': # string
|
||||
value = sanitize(field, value)
|
||||
else: # number
|
||||
numeric_fields.append(key)
|
||||
value = float_or_none(value, default=None)
|
||||
# Internal Formatting = name.key1.key2+number>strf
|
||||
INTERNAL_FORMAT_RE = FORMAT_RE.format(
|
||||
r'''(?P<final_key>
|
||||
(?P<fields>\w+(?:\.[-\w]+)*)
|
||||
(?:\+(?P<add>-?\d+(?:\.\d+)?))?
|
||||
(?:>(?P<strf_format>.+?))?
|
||||
)''')
|
||||
for mobj in re.finditer(INTERNAL_FORMAT_RE, outtmpl):
|
||||
mobj = mobj.groupdict()
|
||||
# Object traversal
|
||||
fields = mobj['fields'].split('.')
|
||||
final_key = mobj['final_key']
|
||||
value = traverse_dict(template_dict, fields)
|
||||
# Offset the value
|
||||
if mobj['add']:
|
||||
value = float_or_none(value)
|
||||
if value is not None:
|
||||
value = value + float(mobj['add'])
|
||||
# Datetime formatting
|
||||
if mobj['strf_format']:
|
||||
value = strftime_or_none(value, mobj['strf_format'])
|
||||
if mobj['type'] in 'crs' and value is not None: # string
|
||||
value = sanitize('%{}'.format(mobj['type']) % fields[-1], value)
|
||||
else: # numeric
|
||||
numeric_fields.append(final_key)
|
||||
value = float_or_none(value)
|
||||
if value is not None:
|
||||
template_dict[key] = value
|
||||
template_dict[final_key] = value
|
||||
|
||||
# Missing numeric fields used together with integer presentation types
|
||||
# in format specification will break the argument substitution since
|
||||
# string NA placeholder is returned for missing fields. We will patch
|
||||
# output template for missing fields to meet string presentation type.
|
||||
for numeric_field in numeric_fields:
|
||||
if numeric_field not in template_dict:
|
||||
if template_dict.get(numeric_field) is None:
|
||||
outtmpl = re.sub(
|
||||
FORMAT_RE.format(re.escape(numeric_field)),
|
||||
r'%({0})s'.format(numeric_field), outtmpl)
|
||||
|
||||
template_dict = collections.defaultdict(lambda: na, (
|
||||
(k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
|
||||
for k, v in template_dict.items() if v is not None))
|
||||
return outtmpl, template_dict
|
||||
|
||||
def _prepare_filename(self, info_dict, tmpl_type='default'):
|
||||
@@ -1309,6 +1342,8 @@ class YoutubeDL(object):
|
||||
x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
|
||||
|
||||
self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg))
|
||||
failures = 0
|
||||
max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
|
||||
for i, entry in enumerate(entries, 1):
|
||||
self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
|
||||
# This __x_forwarded_for_ip thing is a bit ugly but requires
|
||||
@@ -1333,6 +1368,12 @@ class YoutubeDL(object):
|
||||
continue
|
||||
|
||||
entry_result = self.__process_iterable_entry(entry, download, extra)
|
||||
if not entry_result:
|
||||
failures += 1
|
||||
if failures >= max_failures:
|
||||
self.report_error(
|
||||
'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
|
||||
break
|
||||
# TODO: skip failed (empty) entries?
|
||||
playlist_results.append(entry_result)
|
||||
ie_result['entries'] = playlist_results
|
||||
@@ -1541,6 +1582,66 @@ class YoutubeDL(object):
|
||||
selectors.append(current_selector)
|
||||
return selectors
|
||||
|
||||
def _merge(formats_pair):
|
||||
format_1, format_2 = formats_pair
|
||||
|
||||
formats_info = []
|
||||
formats_info.extend(format_1.get('requested_formats', (format_1,)))
|
||||
formats_info.extend(format_2.get('requested_formats', (format_2,)))
|
||||
|
||||
if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
|
||||
get_no_more = {"video": False, "audio": False}
|
||||
for (i, fmt_info) in enumerate(formats_info):
|
||||
for aud_vid in ["audio", "video"]:
|
||||
if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
|
||||
if get_no_more[aud_vid]:
|
||||
formats_info.pop(i)
|
||||
get_no_more[aud_vid] = True
|
||||
|
||||
if len(formats_info) == 1:
|
||||
return formats_info[0]
|
||||
|
||||
video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
|
||||
audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
|
||||
|
||||
the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
|
||||
the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
|
||||
|
||||
output_ext = self.params.get('merge_output_format')
|
||||
if not output_ext:
|
||||
if the_only_video:
|
||||
output_ext = the_only_video['ext']
|
||||
elif the_only_audio and not video_fmts:
|
||||
output_ext = the_only_audio['ext']
|
||||
else:
|
||||
output_ext = 'mkv'
|
||||
|
||||
new_dict = {
|
||||
'requested_formats': formats_info,
|
||||
'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info),
|
||||
'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info),
|
||||
'ext': output_ext,
|
||||
}
|
||||
|
||||
if the_only_video:
|
||||
new_dict.update({
|
||||
'width': the_only_video.get('width'),
|
||||
'height': the_only_video.get('height'),
|
||||
'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
|
||||
'fps': the_only_video.get('fps'),
|
||||
'vcodec': the_only_video.get('vcodec'),
|
||||
'vbr': the_only_video.get('vbr'),
|
||||
'stretched_ratio': the_only_video.get('stretched_ratio'),
|
||||
})
|
||||
|
||||
if the_only_audio:
|
||||
new_dict.update({
|
||||
'acodec': the_only_audio.get('acodec'),
|
||||
'abr': the_only_audio.get('abr'),
|
||||
})
|
||||
|
||||
return new_dict
|
||||
|
||||
def _build_selector_function(selector):
|
||||
if isinstance(selector, list): # ,
|
||||
fs = [_build_selector_function(s) for s in selector]
|
||||
@@ -1565,32 +1666,46 @@ class YoutubeDL(object):
|
||||
return []
|
||||
|
||||
elif selector.type == SINGLE: # atom
|
||||
format_spec = selector.selector if selector.selector is not None else 'best'
|
||||
format_spec = (selector.selector or 'best').lower()
|
||||
|
||||
# TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
|
||||
if format_spec == 'all':
|
||||
def selector_function(ctx):
|
||||
formats = list(ctx['formats'])
|
||||
if formats:
|
||||
for f in formats:
|
||||
yield f
|
||||
elif format_spec == 'mergeall':
|
||||
def selector_function(ctx):
|
||||
formats = list(ctx['formats'])
|
||||
if not formats:
|
||||
return
|
||||
merged_format = formats[-1]
|
||||
for f in formats[-2::-1]:
|
||||
merged_format = _merge((merged_format, f))
|
||||
yield merged_format
|
||||
|
||||
else:
|
||||
format_fallback = False
|
||||
format_spec_obj = re.match(r'(best|worst|b|w)(video|audio|v|a)?(\*)?$', format_spec)
|
||||
if format_spec_obj is not None:
|
||||
format_idx = 0 if format_spec_obj.group(1)[0] == 'w' else -1
|
||||
format_type = format_spec_obj.group(2)[0] if format_spec_obj.group(2) else False
|
||||
not_format_type = 'v' if format_type == 'a' else 'a'
|
||||
format_modified = format_spec_obj.group(3) is not None
|
||||
mobj = re.match(
|
||||
r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
|
||||
format_spec)
|
||||
if mobj is not None:
|
||||
format_idx = int_or_none(mobj.group('n'), default=1)
|
||||
format_idx = format_idx - 1 if mobj.group('bw')[0] == 'w' else -format_idx
|
||||
format_type = (mobj.group('type') or [None])[0]
|
||||
not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
|
||||
format_modified = mobj.group('mod') is not None
|
||||
|
||||
format_fallback = not format_type and not format_modified # for b, w
|
||||
filter_f = ((lambda f: f.get(format_type + 'codec') != 'none')
|
||||
if format_type and format_modified # bv*, ba*, wv*, wa*
|
||||
else (lambda f: f.get(not_format_type + 'codec') == 'none')
|
||||
if format_type # bv, ba, wv, wa
|
||||
else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
|
||||
if not format_modified # b, w
|
||||
else None) # b*, w*
|
||||
filter_f = (
|
||||
(lambda f: f.get('%scodec' % format_type) != 'none')
|
||||
if format_type and format_modified # bv*, ba*, wv*, wa*
|
||||
else (lambda f: f.get('%scodec' % not_format_type) == 'none')
|
||||
if format_type # bv, ba, wv, wa
|
||||
else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
|
||||
if not format_modified # b, w
|
||||
else None) # b*, w*
|
||||
else:
|
||||
format_idx = -1
|
||||
filter_f = ((lambda f: f.get('ext') == format_spec)
|
||||
@@ -1602,75 +1717,18 @@ class YoutubeDL(object):
|
||||
if not formats:
|
||||
return
|
||||
matches = list(filter(filter_f, formats)) if filter_f is not None else formats
|
||||
if matches:
|
||||
n = len(matches)
|
||||
if -n <= format_idx < n:
|
||||
yield matches[format_idx]
|
||||
elif format_fallback == 'force' or (format_fallback and ctx['incomplete_formats']):
|
||||
elif format_fallback and ctx['incomplete_formats']:
|
||||
# for extractors with incomplete formats (audio only (soundcloud)
|
||||
# or video only (imgur)) best/worst will fallback to
|
||||
# best/worst {video,audio}-only format
|
||||
yield formats[format_idx]
|
||||
n = len(formats)
|
||||
if -n <= format_idx < n:
|
||||
yield formats[format_idx]
|
||||
|
||||
elif selector.type == MERGE: # +
|
||||
def _merge(formats_pair):
|
||||
format_1, format_2 = formats_pair
|
||||
|
||||
formats_info = []
|
||||
formats_info.extend(format_1.get('requested_formats', (format_1,)))
|
||||
formats_info.extend(format_2.get('requested_formats', (format_2,)))
|
||||
|
||||
if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
|
||||
get_no_more = {"video": False, "audio": False}
|
||||
for (i, fmt_info) in enumerate(formats_info):
|
||||
for aud_vid in ["audio", "video"]:
|
||||
if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
|
||||
if get_no_more[aud_vid]:
|
||||
formats_info.pop(i)
|
||||
get_no_more[aud_vid] = True
|
||||
|
||||
if len(formats_info) == 1:
|
||||
return formats_info[0]
|
||||
|
||||
video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
|
||||
audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
|
||||
|
||||
the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
|
||||
the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
|
||||
|
||||
output_ext = self.params.get('merge_output_format')
|
||||
if not output_ext:
|
||||
if the_only_video:
|
||||
output_ext = the_only_video['ext']
|
||||
elif the_only_audio and not video_fmts:
|
||||
output_ext = the_only_audio['ext']
|
||||
else:
|
||||
output_ext = 'mkv'
|
||||
|
||||
new_dict = {
|
||||
'requested_formats': formats_info,
|
||||
'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info),
|
||||
'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info),
|
||||
'ext': output_ext,
|
||||
}
|
||||
|
||||
if the_only_video:
|
||||
new_dict.update({
|
||||
'width': the_only_video.get('width'),
|
||||
'height': the_only_video.get('height'),
|
||||
'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
|
||||
'fps': the_only_video.get('fps'),
|
||||
'vcodec': the_only_video.get('vcodec'),
|
||||
'vbr': the_only_video.get('vbr'),
|
||||
'stretched_ratio': the_only_video.get('stretched_ratio'),
|
||||
})
|
||||
|
||||
if the_only_audio:
|
||||
new_dict.update({
|
||||
'acodec': the_only_audio.get('acodec'),
|
||||
'abr': the_only_audio.get('abr'),
|
||||
})
|
||||
|
||||
return new_dict
|
||||
|
||||
selector_1, selector_2 = map(_build_selector_function, selector.selector)
|
||||
|
||||
def selector_function(ctx):
|
||||
@@ -1856,7 +1914,10 @@ class YoutubeDL(object):
|
||||
formats = info_dict['formats']
|
||||
|
||||
if not formats:
|
||||
raise ExtractorError('No video formats found!')
|
||||
if not self.params.get('ignore_no_formats_error'):
|
||||
raise ExtractorError('No video formats found!')
|
||||
else:
|
||||
self.report_warning('No video formats found!')
|
||||
|
||||
def is_wellformed(f):
|
||||
url = f.get('url')
|
||||
@@ -1920,13 +1981,15 @@ class YoutubeDL(object):
|
||||
|
||||
# TODO Central sorting goes here
|
||||
|
||||
if formats[0] is not info_dict:
|
||||
if formats and formats[0] is not info_dict:
|
||||
# only set the 'formats' fields if the original info_dict list them
|
||||
# otherwise we end up with a circular reference, the first (and unique)
|
||||
# element in the 'formats' field in info_dict is info_dict itself,
|
||||
# which can't be exported to json
|
||||
info_dict['formats'] = formats
|
||||
if self.params.get('listformats'):
|
||||
if not info_dict.get('formats'):
|
||||
raise ExtractorError('No video formats found', expected=True)
|
||||
self.list_formats(info_dict)
|
||||
return
|
||||
|
||||
@@ -1966,19 +2029,25 @@ class YoutubeDL(object):
|
||||
|
||||
formats_to_download = list(format_selector(ctx))
|
||||
if not formats_to_download:
|
||||
raise ExtractorError('requested format not available',
|
||||
expected=True)
|
||||
|
||||
if download:
|
||||
self.to_screen('[info] Downloading format(s) %s' % ", ".join([f['format_id'] for f in formats_to_download]))
|
||||
if not self.params.get('ignore_no_formats_error'):
|
||||
raise ExtractorError('Requested format is not available', expected=True)
|
||||
else:
|
||||
self.report_warning('Requested format is not available')
|
||||
elif download:
|
||||
self.to_screen(
|
||||
'[info] %s: Downloading format(s) %s'
|
||||
% (info_dict['id'], ", ".join([f['format_id'] for f in formats_to_download])))
|
||||
if len(formats_to_download) > 1:
|
||||
self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
|
||||
for format in formats_to_download:
|
||||
self.to_screen(
|
||||
'[info] %s: Downloading video in %s formats'
|
||||
% (info_dict['id'], len(formats_to_download)))
|
||||
for fmt in formats_to_download:
|
||||
new_info = dict(info_dict)
|
||||
new_info.update(format)
|
||||
new_info.update(fmt)
|
||||
self.process_info(new_info)
|
||||
# We update the info dict with the best quality format (backwards compatibility)
|
||||
info_dict.update(formats_to_download[-1])
|
||||
if formats_to_download:
|
||||
info_dict.update(formats_to_download[-1])
|
||||
return info_dict
|
||||
|
||||
def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
|
||||
@@ -1996,15 +2065,28 @@ class YoutubeDL(object):
|
||||
available_subs):
|
||||
return None
|
||||
|
||||
all_sub_langs = available_subs.keys()
|
||||
if self.params.get('allsubtitles', False):
|
||||
requested_langs = available_subs.keys()
|
||||
requested_langs = all_sub_langs
|
||||
elif self.params.get('subtitleslangs', False):
|
||||
requested_langs = set()
|
||||
for lang in self.params.get('subtitleslangs'):
|
||||
if lang == 'all':
|
||||
requested_langs.update(all_sub_langs)
|
||||
continue
|
||||
discard = lang[0] == '-'
|
||||
if discard:
|
||||
lang = lang[1:]
|
||||
current_langs = filter(re.compile(lang + '$').match, all_sub_langs)
|
||||
if discard:
|
||||
for lang in current_langs:
|
||||
requested_langs.discard(lang)
|
||||
else:
|
||||
requested_langs.update(current_langs)
|
||||
elif 'en' in available_subs:
|
||||
requested_langs = ['en']
|
||||
else:
|
||||
if self.params.get('subtitleslangs', False):
|
||||
requested_langs = self.params.get('subtitleslangs')
|
||||
elif 'en' in available_subs:
|
||||
requested_langs = ['en']
|
||||
else:
|
||||
requested_langs = [list(available_subs.keys())[0]]
|
||||
requested_langs = [list(all_sub_langs)[0]]
|
||||
|
||||
formats_query = self.params.get('subtitlesformat', 'best')
|
||||
formats_preference = formats_query.split('/') if formats_query else []
|
||||
@@ -2085,13 +2167,12 @@ class YoutubeDL(object):
|
||||
self.post_extract(info_dict)
|
||||
self._num_downloads += 1
|
||||
|
||||
info_dict = self.pre_process(info_dict)
|
||||
info_dict, _ = self.pre_process(info_dict)
|
||||
|
||||
# info_dict['_filename'] needs to be set for backward compatibility
|
||||
info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
|
||||
temp_filename = self.prepare_filename(info_dict, 'temp')
|
||||
files_to_move = {}
|
||||
skip_dl = self.params.get('skip_download', False)
|
||||
|
||||
# Forced printings
|
||||
self.__forced_printings(info_dict, full_filename, incomplete=False)
|
||||
@@ -2168,11 +2249,9 @@ class YoutubeDL(object):
|
||||
# ie = self.get_info_extractor(info_dict['extractor_key'])
|
||||
for sub_lang, sub_info in subtitles.items():
|
||||
sub_format = sub_info['ext']
|
||||
sub_fn = self.prepare_filename(info_dict, 'subtitle')
|
||||
sub_filename = subtitles_filename(
|
||||
temp_filename if not skip_dl else sub_fn,
|
||||
sub_lang, sub_format, info_dict.get('ext'))
|
||||
sub_filename_final = subtitles_filename(sub_fn, sub_lang, sub_format, info_dict.get('ext'))
|
||||
sub_filename = subtitles_filename(temp_filename, sub_lang, sub_format, info_dict.get('ext'))
|
||||
sub_filename_final = subtitles_filename(
|
||||
self.prepare_filename(info_dict, 'subtitle'), sub_lang, sub_format, info_dict.get('ext'))
|
||||
if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(sub_filename)):
|
||||
self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
|
||||
sub_info['filepath'] = sub_filename
|
||||
@@ -2200,28 +2279,6 @@ class YoutubeDL(object):
|
||||
(sub_lang, error_to_compat_str(err)))
|
||||
continue
|
||||
|
||||
if skip_dl:
|
||||
if self.params.get('convertsubtitles', False):
|
||||
# subconv = FFmpegSubtitlesConvertorPP(self, format=self.params.get('convertsubtitles'))
|
||||
filename_real_ext = os.path.splitext(full_filename)[1][1:]
|
||||
filename_wo_ext = (
|
||||
os.path.splitext(full_filename)[0]
|
||||
if filename_real_ext == info_dict['ext']
|
||||
else full_filename)
|
||||
afilename = '%s.%s' % (filename_wo_ext, self.params.get('convertsubtitles'))
|
||||
# if subconv.available:
|
||||
# info_dict['__postprocessors'].append(subconv)
|
||||
if os.path.exists(encodeFilename(afilename)):
|
||||
self.to_screen(
|
||||
'[download] %s has already been downloaded and '
|
||||
'converted' % afilename)
|
||||
else:
|
||||
try:
|
||||
self.post_process(full_filename, info_dict, files_to_move)
|
||||
except PostProcessingError as err:
|
||||
self.report_error('Postprocessing: %s' % str(err))
|
||||
return
|
||||
|
||||
if self.params.get('writeinfojson', False):
|
||||
infofn = self.prepare_filename(info_dict, 'infojson')
|
||||
if not self._ensure_dir_exists(encodeFilename(infofn)):
|
||||
@@ -2237,11 +2294,10 @@ class YoutubeDL(object):
|
||||
return
|
||||
info_dict['__infojson_filename'] = infofn
|
||||
|
||||
thumbfn = self.prepare_filename(info_dict, 'thumbnail')
|
||||
thumb_fn_temp = temp_filename if not skip_dl else thumbfn
|
||||
for thumb_ext in self._write_thumbnails(info_dict, thumb_fn_temp):
|
||||
thumb_filename_temp = replace_extension(thumb_fn_temp, thumb_ext, info_dict.get('ext'))
|
||||
thumb_filename = replace_extension(thumbfn, thumb_ext, info_dict.get('ext'))
|
||||
for thumb_ext in self._write_thumbnails(info_dict, temp_filename):
|
||||
thumb_filename_temp = replace_extension(temp_filename, thumb_ext, info_dict.get('ext'))
|
||||
thumb_filename = replace_extension(
|
||||
self.prepare_filename(info_dict, 'thumbnail'), thumb_ext, info_dict.get('ext'))
|
||||
files_to_move[thumb_filename_temp] = thumb_filename
|
||||
|
||||
# Write internet shortcut files
|
||||
@@ -2293,9 +2349,20 @@ class YoutubeDL(object):
|
||||
if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
|
||||
return
|
||||
|
||||
# Download
|
||||
try:
|
||||
info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
|
||||
except PostProcessingError as err:
|
||||
self.report_error('Preprocessing: %s' % str(err))
|
||||
return
|
||||
|
||||
must_record_download_archive = False
|
||||
if not skip_dl:
|
||||
if self.params.get('skip_download', False):
|
||||
info_dict['filepath'] = temp_filename
|
||||
info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
|
||||
info_dict['__files_to_move'] = files_to_move
|
||||
info_dict = self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)
|
||||
else:
|
||||
# Download
|
||||
try:
|
||||
|
||||
def existing_file(*filepaths):
|
||||
@@ -2591,7 +2658,7 @@ class YoutubeDL(object):
|
||||
def actual_post_extract(info_dict):
|
||||
if info_dict.get('_type') in ('playlist', 'multi_video'):
|
||||
for video_dict in info_dict.get('entries', {}):
|
||||
actual_post_extract(video_dict)
|
||||
actual_post_extract(video_dict or {})
|
||||
return
|
||||
|
||||
if '__post_extractor' not in info_dict:
|
||||
@@ -2602,13 +2669,14 @@ class YoutubeDL(object):
|
||||
del info_dict['__post_extractor']
|
||||
return
|
||||
|
||||
actual_post_extract(info_dict)
|
||||
actual_post_extract(info_dict or {})
|
||||
|
||||
def pre_process(self, ie_info):
|
||||
def pre_process(self, ie_info, key='pre_process', files_to_move=None):
|
||||
info = dict(ie_info)
|
||||
for pp in self._pps['beforedl']:
|
||||
info['__files_to_move'] = files_to_move or {}
|
||||
for pp in self._pps[key]:
|
||||
info = self.run_pp(pp, info)
|
||||
return info
|
||||
return info, info.pop('__files_to_move', None)
|
||||
|
||||
def post_process(self, filename, ie_info, files_to_move=None):
|
||||
"""Run all the postprocessors on the given file."""
|
||||
@@ -2616,11 +2684,11 @@ class YoutubeDL(object):
|
||||
info['filepath'] = filename
|
||||
info['__files_to_move'] = files_to_move or {}
|
||||
|
||||
for pp in ie_info.get('__postprocessors', []) + self._pps['normal']:
|
||||
for pp in ie_info.get('__postprocessors', []) + self._pps['post_process']:
|
||||
info = self.run_pp(pp, info)
|
||||
info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
|
||||
del info['__files_to_move']
|
||||
for pp in self._pps['aftermove']:
|
||||
for pp in self._pps['after_move']:
|
||||
info = self.run_pp(pp, info)
|
||||
return info
|
||||
|
||||
@@ -2761,7 +2829,7 @@ class YoutubeDL(object):
|
||||
'|',
|
||||
format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
|
||||
format_field(f, 'tbr', '%4dk'),
|
||||
f.get('protocol').replace('http_dash_segments', 'dash').replace("native", "n").replace('niconico_', ''),
|
||||
shorten_protocol_name(f.get('protocol', '').replace("native", "n")),
|
||||
'|',
|
||||
format_field(f, 'vcodec', default='unknown').replace('none', ''),
|
||||
format_field(f, 'vbr', '%4dk'),
|
||||
|
||||
@@ -228,8 +228,11 @@ def _real_main(argv=None):
|
||||
if not re.match(remux_regex, opts.remuxvideo):
|
||||
parser.error('invalid video remux format specified')
|
||||
if opts.convertsubtitles is not None:
|
||||
if opts.convertsubtitles not in ['srt', 'vtt', 'ass', 'lrc']:
|
||||
if opts.convertsubtitles not in ('srt', 'vtt', 'ass', 'lrc'):
|
||||
parser.error('invalid subtitle format specified')
|
||||
if opts.convertthumbnails is not None:
|
||||
if opts.convertthumbnails not in ('jpg', ):
|
||||
parser.error('invalid thumbnail format specified')
|
||||
|
||||
if opts.date is not None:
|
||||
date = DateRange.day(opts.date)
|
||||
@@ -322,7 +325,22 @@ def _real_main(argv=None):
|
||||
postprocessors.append({
|
||||
'key': 'MetadataFromField',
|
||||
'formats': opts.metafromfield,
|
||||
'when': 'beforedl'
|
||||
# Run this immediately after extraction is complete
|
||||
'when': 'pre_process'
|
||||
})
|
||||
if opts.convertsubtitles:
|
||||
postprocessors.append({
|
||||
'key': 'FFmpegSubtitlesConvertor',
|
||||
'format': opts.convertsubtitles,
|
||||
# Run this before the actual video download
|
||||
'when': 'before_dl'
|
||||
})
|
||||
if opts.convertthumbnails:
|
||||
postprocessors.append({
|
||||
'key': 'FFmpegThumbnailsConvertor',
|
||||
'format': opts.convertthumbnails,
|
||||
# Run this before the actual video download
|
||||
'when': 'before_dl'
|
||||
})
|
||||
if opts.extractaudio:
|
||||
postprocessors.append({
|
||||
@@ -351,15 +369,11 @@ def _real_main(argv=None):
|
||||
# so metadata can be added here.
|
||||
if opts.addmetadata:
|
||||
postprocessors.append({'key': 'FFmpegMetadata'})
|
||||
if opts.convertsubtitles:
|
||||
postprocessors.append({
|
||||
'key': 'FFmpegSubtitlesConvertor',
|
||||
'format': opts.convertsubtitles,
|
||||
})
|
||||
if opts.embedsubtitles:
|
||||
already_have_subtitle = opts.writesubtitles
|
||||
postprocessors.append({
|
||||
'key': 'FFmpegEmbedSubtitle',
|
||||
# already_have_subtitle = True prevents the file from being deleted after embedding
|
||||
'already_have_subtitle': already_have_subtitle
|
||||
})
|
||||
if not already_have_subtitle:
|
||||
@@ -368,15 +382,9 @@ def _real_main(argv=None):
|
||||
# this was the old behaviour if only --all-sub was given.
|
||||
if opts.allsubtitles and not opts.writeautomaticsub:
|
||||
opts.writesubtitles = True
|
||||
if opts.embedthumbnail:
|
||||
already_have_thumbnail = opts.writethumbnail or opts.write_all_thumbnails
|
||||
postprocessors.append({
|
||||
'key': 'EmbedThumbnail',
|
||||
'already_have_thumbnail': already_have_thumbnail
|
||||
})
|
||||
if not already_have_thumbnail:
|
||||
opts.writethumbnail = True
|
||||
# This should be below most ffmpeg PP because it may cut parts out from the video
|
||||
# This should be above EmbedThumbnail since sponskrub removes the thumbnail attachment
|
||||
# but must be below EmbedSubtitle and FFmpegMetadata
|
||||
# See https://github.com/yt-dlp/yt-dlp/issues/204 , https://github.com/faissaloo/SponSkrub/issues/29
|
||||
# If opts.sponskrub is None, sponskrub is used, but it silently fails if the executable can't be found
|
||||
if opts.sponskrub is not False:
|
||||
postprocessors.append({
|
||||
@@ -387,6 +395,15 @@ def _real_main(argv=None):
|
||||
'force': opts.sponskrub_force,
|
||||
'ignoreerror': opts.sponskrub is None,
|
||||
})
|
||||
if opts.embedthumbnail:
|
||||
already_have_thumbnail = opts.writethumbnail or opts.write_all_thumbnails
|
||||
postprocessors.append({
|
||||
'key': 'EmbedThumbnail',
|
||||
# already_have_thumbnail = True prevents the file from being deleted after embedding
|
||||
'already_have_thumbnail': already_have_thumbnail
|
||||
})
|
||||
if not already_have_thumbnail:
|
||||
opts.writethumbnail = True
|
||||
if opts.split_chapters:
|
||||
postprocessors.append({'key': 'FFmpegSplitChapters'})
|
||||
# XAttrMetadataPP should be run after post-processors that may change file contents
|
||||
@@ -397,7 +414,8 @@ def _real_main(argv=None):
|
||||
postprocessors.append({
|
||||
'key': 'ExecAfterDownload',
|
||||
'exec_cmd': opts.exec_cmd,
|
||||
'when': 'aftermove'
|
||||
# Run this only after the files have been moved to their final locations
|
||||
'when': 'after_move'
|
||||
})
|
||||
|
||||
def report_args_compat(arg, name):
|
||||
@@ -423,7 +441,6 @@ def _real_main(argv=None):
|
||||
else match_filter_func(opts.match_filter))
|
||||
|
||||
ydl_opts = {
|
||||
'convertsubtitles': opts.convertsubtitles,
|
||||
'usenetrc': opts.usenetrc,
|
||||
'username': opts.username,
|
||||
'password': opts.password,
|
||||
@@ -449,6 +466,7 @@ def _real_main(argv=None):
|
||||
'skip_download': opts.skip_download,
|
||||
'format': opts.format,
|
||||
'allow_unplayable_formats': opts.allow_unplayable_formats,
|
||||
'ignore_no_formats_error': opts.ignore_no_formats_error,
|
||||
'format_sort': opts.format_sort,
|
||||
'format_sort_force': opts.format_sort_force,
|
||||
'allow_multiple_video_streams': opts.allow_multiple_video_streams,
|
||||
@@ -526,6 +544,7 @@ def _real_main(argv=None):
|
||||
'download_archive': download_archive_fn,
|
||||
'break_on_existing': opts.break_on_existing,
|
||||
'break_on_reject': opts.break_on_reject,
|
||||
'skip_playlist_after_errors': opts.skip_playlist_after_errors,
|
||||
'cookiefile': opts.cookiefile,
|
||||
'nocheckcertificate': opts.no_check_certificate,
|
||||
'prefer_insecure': opts.prefer_insecure,
|
||||
|
||||
@@ -78,6 +78,15 @@ try:
|
||||
except ImportError: # Python 2
|
||||
import Cookie as compat_cookies
|
||||
|
||||
if sys.version_info[0] == 2:
|
||||
class compat_cookies_SimpleCookie(compat_cookies.SimpleCookie):
|
||||
def load(self, rawdata):
|
||||
if isinstance(rawdata, compat_str):
|
||||
rawdata = str(rawdata)
|
||||
return super(compat_cookies_SimpleCookie, self).load(rawdata)
|
||||
else:
|
||||
compat_cookies_SimpleCookie = compat_cookies.SimpleCookie
|
||||
|
||||
try:
|
||||
import html.entities as compat_html_entities
|
||||
except ImportError: # Python 2
|
||||
@@ -3020,6 +3029,7 @@ __all__ = [
|
||||
'compat_cookiejar',
|
||||
'compat_cookiejar_Cookie',
|
||||
'compat_cookies',
|
||||
'compat_cookies_SimpleCookie',
|
||||
'compat_ctypes_WINFUNCTYPE',
|
||||
'compat_etree_Element',
|
||||
'compat_etree_fromstring',
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
determine_protocol,
|
||||
)
|
||||
@@ -42,6 +43,23 @@ PROTOCOL_MAP = {
|
||||
}
|
||||
|
||||
|
||||
def shorten_protocol_name(proto, simplify=False):
|
||||
short_protocol_names = {
|
||||
'm3u8_native': 'm3u8_n',
|
||||
'http_dash_segments': 'dash',
|
||||
'niconico_dmc': 'dmc',
|
||||
}
|
||||
if simplify:
|
||||
short_protocol_names.update({
|
||||
'https': 'http',
|
||||
'ftps': 'ftp',
|
||||
'm3u8_native': 'm3u8',
|
||||
'm3u8_frag_urls': 'm3u8',
|
||||
'dash_frag_urls': 'dash',
|
||||
})
|
||||
return short_protocol_names.get(proto, proto)
|
||||
|
||||
|
||||
def get_suitable_downloader(info_dict, params={}, default=HttpFD):
|
||||
"""Get the downloader class that can handle the info dict."""
|
||||
protocol = determine_protocol(info_dict)
|
||||
@@ -50,16 +68,24 @@ def get_suitable_downloader(info_dict, params={}, default=HttpFD):
|
||||
# if (info_dict.get('start_time') or info_dict.get('end_time')) and not info_dict.get('requested_formats') and FFmpegFD.can_download(info_dict):
|
||||
# return FFmpegFD
|
||||
|
||||
external_downloader = params.get('external_downloader')
|
||||
if external_downloader is not None:
|
||||
downloaders = params.get('external_downloader')
|
||||
external_downloader = (
|
||||
downloaders if isinstance(downloaders, compat_str) or downloaders is None
|
||||
else downloaders.get(shorten_protocol_name(protocol, True), downloaders.get('default')))
|
||||
if external_downloader and external_downloader.lower() == 'native':
|
||||
external_downloader = 'native'
|
||||
|
||||
if external_downloader not in (None, 'native'):
|
||||
ed = get_external_downloader(external_downloader)
|
||||
if ed.can_download(info_dict, external_downloader):
|
||||
return ed
|
||||
|
||||
if protocol.startswith('m3u8'):
|
||||
if protocol in ('m3u8', 'm3u8_native'):
|
||||
if info_dict.get('is_live'):
|
||||
return FFmpegFD
|
||||
elif _get_real_downloader(info_dict, 'frag_urls', params, None):
|
||||
elif external_downloader == 'native':
|
||||
return HlsFD
|
||||
elif _get_real_downloader(info_dict, 'm3u8_frag_urls', params, None):
|
||||
return HlsFD
|
||||
elif params.get('hls_prefer_native') is True:
|
||||
return HlsFD
|
||||
@@ -70,6 +96,7 @@ def get_suitable_downloader(info_dict, params={}, default=HttpFD):
|
||||
|
||||
|
||||
__all__ = [
|
||||
'get_suitable_downloader',
|
||||
'FileDownloader',
|
||||
'get_suitable_downloader',
|
||||
'shorten_protocol_name',
|
||||
]
|
||||
|
||||
@@ -20,7 +20,7 @@ from ..utils import (
|
||||
class DashSegmentsFD(FragmentFD):
|
||||
"""
|
||||
Download segments in a DASH manifest. External downloaders can take over
|
||||
the fragment downloads by supporting the 'frag_urls' protocol
|
||||
the fragment downloads by supporting the 'dash_frag_urls' protocol
|
||||
"""
|
||||
|
||||
FD_NAME = 'dashsegments'
|
||||
@@ -30,7 +30,7 @@ class DashSegmentsFD(FragmentFD):
|
||||
fragments = info_dict['fragments'][:1] if self.params.get(
|
||||
'test', False) else info_dict['fragments']
|
||||
|
||||
real_downloader = _get_real_downloader(info_dict, 'frag_urls', self.params, None)
|
||||
real_downloader = _get_real_downloader(info_dict, 'dash_frag_urls', self.params, None)
|
||||
|
||||
ctx = {
|
||||
'filename': filename,
|
||||
|
||||
@@ -81,11 +81,15 @@ class ExternalFD(FileDownloader):
|
||||
|
||||
@property
|
||||
def exe(self):
|
||||
return self.params.get('external_downloader')
|
||||
return self.get_basename()
|
||||
|
||||
@classmethod
|
||||
def available(cls, path=None):
|
||||
return check_executable(path or cls.get_basename(), [cls.AVAILABLE_OPT])
|
||||
path = check_executable(path or cls.get_basename(), [cls.AVAILABLE_OPT])
|
||||
if path:
|
||||
cls.exe = path
|
||||
return path
|
||||
return False
|
||||
|
||||
@classmethod
|
||||
def supports(cls, info_dict):
|
||||
@@ -259,7 +263,7 @@ class WgetFD(ExternalFD):
|
||||
|
||||
class Aria2cFD(ExternalFD):
|
||||
AVAILABLE_OPT = '-v'
|
||||
SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps', 'frag_urls')
|
||||
SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps', 'dash_frag_urls', 'm3u8_frag_urls')
|
||||
|
||||
@staticmethod
|
||||
def supports_manifest(manifest):
|
||||
@@ -310,9 +314,11 @@ class Aria2cFD(ExternalFD):
|
||||
|
||||
|
||||
class HttpieFD(ExternalFD):
|
||||
AVAILABLE_OPT = '--version'
|
||||
|
||||
@classmethod
|
||||
def available(cls, path=None):
|
||||
return check_executable(path or 'http', ['--version'])
|
||||
return ExternalFD.available(cls, path or 'http')
|
||||
|
||||
def _make_cmd(self, tmpfilename, info_dict):
|
||||
cmd = ['http', '--download', '--output', tmpfilename, info_dict['url']]
|
||||
@@ -324,10 +330,11 @@ class HttpieFD(ExternalFD):
|
||||
|
||||
|
||||
class FFmpegFD(ExternalFD):
|
||||
SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps', 'm3u8', 'rtsp', 'rtmp', 'mms')
|
||||
SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps', 'm3u8', 'm3u8_native', 'rtsp', 'rtmp', 'mms')
|
||||
|
||||
@classmethod
|
||||
def available(cls, path=None): # path is ignored for ffmpeg
|
||||
def available(cls, path=None):
|
||||
# TODO: Fix path for ffmpeg
|
||||
return FFmpegPostProcessor().available
|
||||
|
||||
def _call_downloader(self, tmpfilename, info_dict):
|
||||
@@ -484,4 +491,4 @@ def get_external_downloader(external_downloader):
|
||||
downloader . """
|
||||
# Drop .exe extension on Windows
|
||||
bn = os.path.splitext(os.path.basename(external_downloader))[0]
|
||||
return _BY_NAME[bn]
|
||||
return _BY_NAME.get(bn)
|
||||
|
||||
@@ -32,7 +32,7 @@ from ..utils import (
|
||||
class HlsFD(FragmentFD):
|
||||
"""
|
||||
Download segments in a m3u8 manifest. External downloaders can take over
|
||||
the fragment downloads by supporting the 'frag_urls' protocol and
|
||||
the fragment downloads by supporting the 'm3u8_frag_urls' protocol and
|
||||
re-defining 'supports_manifest' function
|
||||
"""
|
||||
|
||||
@@ -95,7 +95,7 @@ class HlsFD(FragmentFD):
|
||||
# fd.add_progress_hook(ph)
|
||||
return fd.real_download(filename, info_dict)
|
||||
|
||||
real_downloader = _get_real_downloader(info_dict, 'frag_urls', self.params, None)
|
||||
real_downloader = _get_real_downloader(info_dict, 'm3u8_frag_urls', self.params, None)
|
||||
if real_downloader and not real_downloader.supports_manifest(s):
|
||||
real_downloader = None
|
||||
if real_downloader:
|
||||
|
||||
@@ -257,7 +257,7 @@ class AfreecaTVIE(InfoExtractor):
|
||||
if flag and flag == 'SUCCEED':
|
||||
break
|
||||
if flag == 'PARTIAL_ADULT':
|
||||
self._downloader.report_warning(
|
||||
self.report_warning(
|
||||
'In accordance with local laws and regulations, underage users are restricted from watching adult content. '
|
||||
'Only content suitable for all ages will be downloaded. '
|
||||
'Provide account credentials if you wish to download restricted content.')
|
||||
@@ -323,7 +323,7 @@ class AfreecaTVIE(InfoExtractor):
|
||||
'url': file_url,
|
||||
'format_id': 'http',
|
||||
}]
|
||||
if not formats:
|
||||
if not formats and not self._downloader.params.get('ignore_no_formats'):
|
||||
continue
|
||||
self._sort_formats(formats)
|
||||
file_info = common_entry.copy()
|
||||
|
||||
@@ -36,12 +36,12 @@ class ARDMediathekBaseIE(InfoExtractor):
|
||||
|
||||
if not formats:
|
||||
if fsk:
|
||||
raise ExtractorError(
|
||||
self.raise_no_formats(
|
||||
'This video is only available after 20:00', expected=True)
|
||||
elif media_info.get('_geoblocked'):
|
||||
self.raise_geo_restricted(
|
||||
'This video is not available due to geoblocking',
|
||||
countries=self._GEO_COUNTRIES)
|
||||
countries=self._GEO_COUNTRIES, metadata_available=True)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
@@ -272,7 +272,8 @@ class ARDMediathekIE(ARDMediathekBaseIE):
|
||||
else: # request JSON file
|
||||
if not document_id:
|
||||
video_id = self._search_regex(
|
||||
r'/play/(?:config|media)/(\d+)', webpage, 'media id')
|
||||
(r'/play/(?:config|media|sola)/(\d+)', r'contentId["\']\s*:\s*(\d+)'),
|
||||
webpage, 'media id', default=None)
|
||||
info = self._extract_media_info(
|
||||
'http://www.ardmediathek.de/play/media/%s' % video_id,
|
||||
webpage, video_id)
|
||||
|
||||
101
yt_dlp/extractor/arnes.py
Normal file
101
yt_dlp/extractor/arnes.py
Normal file
@@ -0,0 +1,101 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urllib_parse_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
remove_start,
|
||||
)
|
||||
|
||||
|
||||
class ArnesIE(InfoExtractor):
|
||||
IE_NAME = 'video.arnes.si'
|
||||
IE_DESC = 'Arnes Video'
|
||||
_VALID_URL = r'https?://video\.arnes\.si/(?:[a-z]{2}/)?(?:watch|embed|api/(?:asset|public/video))/(?P<id>[0-9a-zA-Z]{12})'
|
||||
_TESTS = [{
|
||||
'url': 'https://video.arnes.si/watch/a1qrWTOQfVoU?t=10',
|
||||
'md5': '4d0f4d0a03571b33e1efac25fd4a065d',
|
||||
'info_dict': {
|
||||
'id': 'a1qrWTOQfVoU',
|
||||
'ext': 'mp4',
|
||||
'title': 'Linearna neodvisnost, definicija',
|
||||
'description': 'Linearna neodvisnost, definicija',
|
||||
'license': 'PRIVATE',
|
||||
'creator': 'Polona Oblak',
|
||||
'timestamp': 1585063725,
|
||||
'upload_date': '20200324',
|
||||
'channel': 'Polona Oblak',
|
||||
'channel_id': 'q6pc04hw24cj',
|
||||
'channel_url': 'https://video.arnes.si/?channel=q6pc04hw24cj',
|
||||
'duration': 596.75,
|
||||
'view_count': int,
|
||||
'tags': ['linearna_algebra'],
|
||||
'start_time': 10,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://video.arnes.si/api/asset/s1YjnV7hadlC/play.mp4',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://video.arnes.si/embed/s1YjnV7hadlC',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://video.arnes.si/en/watch/s1YjnV7hadlC',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://video.arnes.si/embed/s1YjnV7hadlC?t=123&hideRelated=1',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://video.arnes.si/api/public/video/s1YjnV7hadlC',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_BASE_URL = 'https://video.arnes.si'
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
video = self._download_json(
|
||||
self._BASE_URL + '/api/public/video/' + video_id, video_id)['data']
|
||||
title = video['title']
|
||||
|
||||
formats = []
|
||||
for media in (video.get('media') or []):
|
||||
media_url = media.get('url')
|
||||
if not media_url:
|
||||
continue
|
||||
formats.append({
|
||||
'url': self._BASE_URL + media_url,
|
||||
'format_id': remove_start(media.get('format'), 'FORMAT_'),
|
||||
'format_note': media.get('formatTranslation'),
|
||||
'width': int_or_none(media.get('width')),
|
||||
'height': int_or_none(media.get('height')),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
channel = video.get('channel') or {}
|
||||
channel_id = channel.get('url')
|
||||
thumbnail = video.get('thumbnailUrl')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': self._BASE_URL + thumbnail,
|
||||
'description': video.get('description'),
|
||||
'license': video.get('license'),
|
||||
'creator': video.get('author'),
|
||||
'timestamp': parse_iso8601(video.get('creationTime')),
|
||||
'channel': channel.get('name'),
|
||||
'channel_id': channel_id,
|
||||
'channel_url': self._BASE_URL + '/?channel=' + channel_id if channel_id else None,
|
||||
'duration': float_or_none(video.get('duration'), 1000),
|
||||
'view_count': int_or_none(video.get('views')),
|
||||
'tags': video.get('hashtags'),
|
||||
'start_time': int_or_none(compat_parse_qs(
|
||||
compat_urllib_parse_urlparse(url).query).get('t', [None])[0]),
|
||||
}
|
||||
@@ -11,6 +11,7 @@ from ..compat import (
|
||||
compat_etree_Element,
|
||||
compat_HTTPError,
|
||||
compat_parse_qs,
|
||||
compat_str,
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urlparse,
|
||||
)
|
||||
@@ -25,8 +26,10 @@ from ..utils import (
|
||||
js_to_json,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
strip_or_none,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
@@ -761,8 +764,17 @@ class BBCIE(BBCCoUkIE):
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# custom redirection to www.bbc.com
|
||||
# also, video with window.__INITIAL_DATA__
|
||||
'url': 'http://www.bbc.co.uk/news/science-environment-33661876',
|
||||
'only_matching': True,
|
||||
'info_dict': {
|
||||
'id': 'p02xzws1',
|
||||
'ext': 'mp4',
|
||||
'title': "Pluto may have 'nitrogen glaciers'",
|
||||
'description': 'md5:6a95b593f528d7a5f2605221bc56912f',
|
||||
'thumbnail': r're:https?://.+/.+\.jpg',
|
||||
'timestamp': 1437785037,
|
||||
'upload_date': '20150725',
|
||||
},
|
||||
}, {
|
||||
# single video article embedded with data-media-vpid
|
||||
'url': 'http://www.bbc.co.uk/sport/rowing/35908187',
|
||||
@@ -1164,12 +1176,29 @@ class BBCIE(BBCCoUkIE):
|
||||
continue
|
||||
formats, subtitles = self._download_media_selector(item_id)
|
||||
self._sort_formats(formats)
|
||||
item_desc = None
|
||||
blocks = try_get(media, lambda x: x['summary']['blocks'], list)
|
||||
if blocks:
|
||||
summary = []
|
||||
for block in blocks:
|
||||
text = try_get(block, lambda x: x['model']['text'], compat_str)
|
||||
if text:
|
||||
summary.append(text)
|
||||
if summary:
|
||||
item_desc = '\n\n'.join(summary)
|
||||
item_time = None
|
||||
for meta in try_get(media, lambda x: x['metadata']['items'], list) or []:
|
||||
if try_get(meta, lambda x: x['label']) == 'Published':
|
||||
item_time = unified_timestamp(meta.get('timestamp'))
|
||||
break
|
||||
entries.append({
|
||||
'id': item_id,
|
||||
'title': item_title,
|
||||
'thumbnail': item.get('holdingImageUrl'),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'timestamp': item_time,
|
||||
'description': strip_or_none(item_desc),
|
||||
})
|
||||
for resp in (initial_data.get('data') or {}).values():
|
||||
name = resp.get('name')
|
||||
@@ -1242,7 +1271,7 @@ class BBCIE(BBCCoUkIE):
|
||||
entries = []
|
||||
for num, media_meta in enumerate(medias, start=1):
|
||||
formats, subtitles = self._extract_from_media_meta(media_meta, playlist_id)
|
||||
if not formats:
|
||||
if not formats and not self._downloader.params.get('ignore_no_formats'):
|
||||
continue
|
||||
self._sort_formats(formats)
|
||||
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import hashlib
|
||||
import itertools
|
||||
import json
|
||||
import re
|
||||
|
||||
@@ -120,6 +121,7 @@ class BiliBiliIE(InfoExtractor):
|
||||
'url': 'https://www.bilibili.com/video/BV1bK411W797',
|
||||
'info_dict': {
|
||||
'id': 'BV1bK411W797',
|
||||
'title': '物语中的人物是如何吐槽自己的OP的'
|
||||
},
|
||||
'playlist_count': 17,
|
||||
}]
|
||||
@@ -156,7 +158,8 @@ class BiliBiliIE(InfoExtractor):
|
||||
if r is not None:
|
||||
self.to_screen('Downloading anthology %s - add --no-playlist to just download video' % video_id)
|
||||
return r
|
||||
self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
|
||||
else:
|
||||
self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
|
||||
|
||||
if 'anime/' not in url:
|
||||
cid = self._search_regex(
|
||||
@@ -274,7 +277,7 @@ class BiliBiliIE(InfoExtractor):
|
||||
}
|
||||
|
||||
uploader_mobj = re.search(
|
||||
r'<a[^>]+href="(?:https?:)?//space\.bilibili\.com/(?P<id>\d+)"[^>]*>(?P<name>[^<]+)',
|
||||
r'<a[^>]+href="(?:https?:)?//space\.bilibili\.com/(?P<id>\d+)"[^>]*>\s*(?P<name>[^<]+?)\s*<',
|
||||
webpage)
|
||||
if uploader_mobj:
|
||||
info.update({
|
||||
@@ -496,28 +499,40 @@ class BiliBiliBangumiIE(InfoExtractor):
|
||||
|
||||
class BilibiliChannelIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://space.bilibili\.com/(?P<id>\d+)'
|
||||
# May need to add support for pagination? Need to find a user with many video uploads to test
|
||||
_API_URL = "https://api.bilibili.com/x/space/arc/search?mid=%s&pn=1&ps=25&jsonp=jsonp"
|
||||
_TEST = {} # TODO: Add tests
|
||||
_API_URL = "https://api.bilibili.com/x/space/arc/search?mid=%s&pn=%d&jsonp=jsonp"
|
||||
_TESTS = [{
|
||||
'url': 'https://space.bilibili.com/3985676/video',
|
||||
'info_dict': {},
|
||||
'playlist_mincount': 112,
|
||||
}]
|
||||
|
||||
def _entries(self, list_id):
|
||||
count, max_count = 0, None
|
||||
|
||||
for page_num in itertools.count(1):
|
||||
data = self._parse_json(
|
||||
self._download_webpage(
|
||||
self._API_URL % (list_id, page_num), list_id,
|
||||
note='Downloading page %d' % page_num),
|
||||
list_id)['data']
|
||||
|
||||
max_count = max_count or try_get(data, lambda x: x['page']['count'])
|
||||
|
||||
entries = try_get(data, lambda x: x['list']['vlist'])
|
||||
if not entries:
|
||||
return
|
||||
for entry in entries:
|
||||
yield self.url_result(
|
||||
'https://www.bilibili.com/video/%s' % entry['bvid'],
|
||||
BiliBiliIE.ie_key(), entry['bvid'])
|
||||
|
||||
count += len(entries)
|
||||
if max_count and count >= max_count:
|
||||
return
|
||||
|
||||
def _real_extract(self, url):
|
||||
list_id = self._match_id(url)
|
||||
json_str = self._download_webpage(self._API_URL % list_id, "None")
|
||||
|
||||
json_parsed = json.loads(json_str)
|
||||
entries = [{
|
||||
'_type': 'url',
|
||||
'ie_key': BiliBiliIE.ie_key(),
|
||||
'url': ('https://www.bilibili.com/video/%s' %
|
||||
entry['bvid']),
|
||||
'id': entry['bvid'],
|
||||
} for entry in json_parsed['data']['list']['vlist']]
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': list_id,
|
||||
'entries': entries
|
||||
}
|
||||
return self.playlist_result(self._entries(list_id), list_id)
|
||||
|
||||
|
||||
class BiliBiliSearchIE(SearchInfoExtractor):
|
||||
|
||||
@@ -114,7 +114,7 @@ class BRIE(InfoExtractor):
|
||||
medias.append(media)
|
||||
|
||||
if len(medias) > 1:
|
||||
self._downloader.report_warning(
|
||||
self.report_warning(
|
||||
'found multiple medias; please '
|
||||
'report this with the video URL to http://yt-dl.org/bug')
|
||||
if not medias:
|
||||
|
||||
@@ -545,9 +545,9 @@ class BrightcoveNewIE(AdobePassIE):
|
||||
errors = json_data.get('errors')
|
||||
if errors:
|
||||
error = errors[0]
|
||||
raise ExtractorError(
|
||||
self.raise_no_formats(
|
||||
error.get('message') or error.get('error_subcode') or error['error_code'], expected=True)
|
||||
if (not self._downloader.params.get('allow_unplayable_formats')
|
||||
elif (not self._downloader.params.get('allow_unplayable_formats')
|
||||
and sources and num_drm_sources == len(sources)):
|
||||
raise ExtractorError('This video is DRM protected.', expected=True)
|
||||
|
||||
|
||||
@@ -26,7 +26,7 @@ class CBSNewsEmbedIE(CBSIE):
|
||||
def _real_extract(self, url):
|
||||
item = self._parse_json(zlib.decompress(compat_b64decode(
|
||||
compat_urllib_parse_unquote(self._match_id(url))),
|
||||
-zlib.MAX_WBITS), None)['video']['items'][0]
|
||||
-zlib.MAX_WBITS).decode('utf-8'), None)['video']['items'][0]
|
||||
return self._extract_video_info(item['mpxRefId'], 'cbsnews')
|
||||
|
||||
|
||||
|
||||
@@ -1,38 +1,113 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .cbs import CBSBaseIE
|
||||
import re
|
||||
|
||||
# from .cbs import CBSBaseIE
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class CBSSportsIE(CBSBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?cbssports\.com/[^/]+/(?:video|news)/(?P<id>[^/?#&]+)'
|
||||
|
||||
# class CBSSportsEmbedIE(CBSBaseIE):
|
||||
class CBSSportsEmbedIE(InfoExtractor):
|
||||
IE_NAME = 'cbssports:embed'
|
||||
_VALID_URL = r'''(?ix)https?://(?:(?:www\.)?cbs|embed\.247)sports\.com/player/embed.+?
|
||||
(?:
|
||||
ids%3D(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})|
|
||||
pcid%3D(?P<pcid>\d+)
|
||||
)'''
|
||||
_TESTS = [{
|
||||
'url': 'https://www.cbssports.com/nba/video/donovan-mitchell-flashes-star-potential-in-game-2-victory-over-thunder/',
|
||||
'info_dict': {
|
||||
'id': '1214315075735',
|
||||
'ext': 'mp4',
|
||||
'title': 'Donovan Mitchell flashes star potential in Game 2 victory over Thunder',
|
||||
'description': 'md5:df6f48622612c2d6bd2e295ddef58def',
|
||||
'timestamp': 1524111457,
|
||||
'upload_date': '20180419',
|
||||
'uploader': 'CBSI-NEW',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
'url': 'https://www.cbssports.com/player/embed/?args=player_id%3Db56c03a6-231a-4bbe-9c55-af3c8a8e9636%26ids%3Db56c03a6-231a-4bbe-9c55-af3c8a8e9636%26resizable%3D1%26autoplay%3Dtrue%26domain%3Dcbssports.com%26comp_ads_enabled%3Dfalse%26watchAndRead%3D0%26startTime%3D0%26env%3Dprod',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.cbssports.com/nba/news/nba-playoffs-2018-watch-76ers-vs-heat-game-3-series-schedule-tv-channel-online-stream/',
|
||||
'url': 'https://embed.247sports.com/player/embed/?args=%3fplayer_id%3d1827823171591%26channel%3dcollege-football-recruiting%26pcid%3d1827823171591%26width%3d640%26height%3d360%26autoplay%3dTrue%26comp_ads_enabled%3dFalse%26uvpc%3dhttps%253a%252f%252fwww.cbssports.com%252fapi%252fcontent%252fvideo%252fconfig%252f%253fcfg%253duvp_247sports_v4%2526partner%253d247%26uvpc_m%3dhttps%253a%252f%252fwww.cbssports.com%252fapi%252fcontent%252fvideo%252fconfig%252f%253fcfg%253duvp_247sports_m_v4%2526partner_m%253d247_mobile%26utag%3d247sportssite%26resizable%3dTrue',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _extract_video_info(self, filter_query, video_id):
|
||||
return self._extract_feed_info('dJ5BDC', 'VxxJg8Ymh8sE', filter_query, video_id)
|
||||
# def _extract_video_info(self, filter_query, video_id):
|
||||
# return self._extract_feed_info('dJ5BDC', 'VxxJg8Ymh8sE', filter_query, video_id)
|
||||
|
||||
def _real_extract(self, url):
|
||||
uuid, pcid = re.match(self._VALID_URL, url).groups()
|
||||
query = {'id': uuid} if uuid else {'pcid': pcid}
|
||||
video = self._download_json(
|
||||
'https://www.cbssports.com/api/content/video/',
|
||||
uuid or pcid, query=query)[0]
|
||||
video_id = video['id']
|
||||
title = video['title']
|
||||
metadata = video.get('metaData') or {}
|
||||
# return self._extract_video_info('byId=%d' % metadata['mpxOutletId'], video_id)
|
||||
# return self._extract_video_info('byGuid=' + metadata['mpxRefId'], video_id)
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
metadata['files'][0]['url'], video_id, 'mp4',
|
||||
'm3u8_native', m3u8_id='hls', fatal=False)
|
||||
self._sort_formats(formats)
|
||||
|
||||
image = video.get('image')
|
||||
thumbnails = None
|
||||
if image:
|
||||
image_path = image.get('path')
|
||||
if image_path:
|
||||
thumbnails = [{
|
||||
'url': image_path,
|
||||
'width': int_or_none(image.get('width')),
|
||||
'height': int_or_none(image.get('height')),
|
||||
'filesize': int_or_none(image.get('size')),
|
||||
}]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnails': thumbnails,
|
||||
'description': video.get('description'),
|
||||
'timestamp': int_or_none(try_get(video, lambda x: x['dateCreated']['epoch'])),
|
||||
'duration': int_or_none(metadata.get('duration')),
|
||||
}
|
||||
|
||||
|
||||
class CBSSportsBaseIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video_id = self._search_regex(
|
||||
[r'(?:=|%26)pcid%3D(\d+)', r'embedVideo(?:Container)?_(\d+)'],
|
||||
webpage, 'video id')
|
||||
return self._extract_video_info('byId=%s' % video_id, video_id)
|
||||
iframe_url = self._search_regex(
|
||||
r'<iframe[^>]+(?:data-)?src="(https?://[^/]+/player/embed[^"]+)"',
|
||||
webpage, 'embed url')
|
||||
return self.url_result(iframe_url, CBSSportsEmbedIE.ie_key())
|
||||
|
||||
|
||||
class CBSSportsIE(CBSSportsBaseIE):
|
||||
IE_NAME = 'cbssports'
|
||||
_VALID_URL = r'https?://(?:www\.)?cbssports\.com/[^/]+/video/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.cbssports.com/college-football/video/cover-3-stanford-spring-gleaning/',
|
||||
'info_dict': {
|
||||
'id': 'b56c03a6-231a-4bbe-9c55-af3c8a8e9636',
|
||||
'ext': 'mp4',
|
||||
'title': 'Cover 3: Stanford Spring Gleaning',
|
||||
'description': 'The Cover 3 crew break down everything you need to know about the Stanford Cardinal this spring.',
|
||||
'timestamp': 1617218398,
|
||||
'upload_date': '20210331',
|
||||
'duration': 502,
|
||||
},
|
||||
}]
|
||||
|
||||
|
||||
class TwentyFourSevenSportsIE(CBSSportsBaseIE):
|
||||
IE_NAME = '247sports'
|
||||
_VALID_URL = r'https?://(?:www\.)?247sports\.com/Video/(?:[^/?#&]+-)?(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://247sports.com/Video/2021-QB-Jake-Garcia-senior-highlights-through-five-games-10084854/',
|
||||
'info_dict': {
|
||||
'id': '4f1265cb-c3b5-44a8-bb1d-1914119a0ccc',
|
||||
'ext': 'mp4',
|
||||
'title': '2021 QB Jake Garcia senior highlights through five games',
|
||||
'description': 'md5:8cb67ebed48e2e6adac1701e0ff6e45b',
|
||||
'timestamp': 1607114223,
|
||||
'upload_date': '20201204',
|
||||
'duration': 208,
|
||||
},
|
||||
}]
|
||||
|
||||
@@ -5,7 +5,6 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
qualities,
|
||||
@@ -187,14 +186,13 @@ class Channel9IE(InfoExtractor):
|
||||
'quality': quality(q, q_url),
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
slides = content_data.get('Slides')
|
||||
zip_file = content_data.get('ZipFile')
|
||||
|
||||
if not formats and not slides and not zip_file:
|
||||
raise ExtractorError(
|
||||
self.raise_no_formats(
|
||||
'None of recording, slides or zip are available for %s' % content_path)
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
for caption in content_data.get('Captions', []):
|
||||
|
||||
@@ -17,7 +17,7 @@ import math
|
||||
|
||||
from ..compat import (
|
||||
compat_cookiejar_Cookie,
|
||||
compat_cookies,
|
||||
compat_cookies_SimpleCookie,
|
||||
compat_etree_Element,
|
||||
compat_etree_fromstring,
|
||||
compat_getpass,
|
||||
@@ -683,7 +683,7 @@ class InfoExtractor(object):
|
||||
if fatal:
|
||||
raise ExtractorError(errmsg, sys.exc_info()[2], cause=err)
|
||||
else:
|
||||
self._downloader.report_warning(errmsg)
|
||||
self.report_warning(errmsg)
|
||||
return False
|
||||
|
||||
def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None):
|
||||
@@ -968,15 +968,27 @@ class InfoExtractor(object):
|
||||
"""Report attempt to log in."""
|
||||
self.to_screen('Logging in')
|
||||
|
||||
@staticmethod
|
||||
def raise_login_required(msg='This video is only available for registered users'):
|
||||
def raise_login_required(
|
||||
self, msg='This video is only available for registered users', metadata_available=False):
|
||||
if metadata_available and self._downloader.params.get('ignore_no_formats_error'):
|
||||
self.report_warning(msg)
|
||||
raise ExtractorError(
|
||||
'%s. Use --username and --password or --netrc to provide account credentials.' % msg,
|
||||
'%s. Use --cookies, --username and --password or --netrc to provide account credentials' % msg,
|
||||
expected=True)
|
||||
|
||||
@staticmethod
|
||||
def raise_geo_restricted(msg='This video is not available from your location due to geo restriction', countries=None):
|
||||
raise GeoRestrictedError(msg, countries=countries)
|
||||
def raise_geo_restricted(
|
||||
self, msg='This video is not available from your location due to geo restriction',
|
||||
countries=None, metadata_available=False):
|
||||
if metadata_available and self._downloader.params.get('ignore_no_formats_error'):
|
||||
self.report_warning(msg)
|
||||
else:
|
||||
raise GeoRestrictedError(msg, countries=countries)
|
||||
|
||||
def raise_no_formats(self, msg, expected=False, video_id=None):
|
||||
if expected and self._downloader.params.get('ignore_no_formats_error'):
|
||||
self.report_warning(msg, video_id)
|
||||
else:
|
||||
raise ExtractorError(msg, expected=expected, video_id=video_id)
|
||||
|
||||
# Methods for following #608
|
||||
@staticmethod
|
||||
@@ -1044,7 +1056,7 @@ class InfoExtractor(object):
|
||||
elif fatal:
|
||||
raise RegexNotFoundError('Unable to extract %s' % _name)
|
||||
else:
|
||||
self._downloader.report_warning('unable to extract %s' % _name + bug_reports_message())
|
||||
self.report_warning('unable to extract %s' % _name + bug_reports_message())
|
||||
return None
|
||||
|
||||
def _html_search_regex(self, pattern, string, name, default=NO_DEFAULT, fatal=True, flags=0, group=None):
|
||||
@@ -1072,7 +1084,7 @@ class InfoExtractor(object):
|
||||
raise netrc.NetrcParseError(
|
||||
'No authenticators for %s' % netrc_machine)
|
||||
except (IOError, netrc.NetrcParseError) as err:
|
||||
self._downloader.report_warning(
|
||||
self.report_warning(
|
||||
'parsing .netrc: %s' % error_to_compat_str(err))
|
||||
|
||||
return username, password
|
||||
@@ -1247,7 +1259,7 @@ class InfoExtractor(object):
|
||||
elif fatal:
|
||||
raise RegexNotFoundError('Unable to extract JSON-LD')
|
||||
else:
|
||||
self._downloader.report_warning('unable to extract JSON-LD %s' % bug_reports_message())
|
||||
self.report_warning('unable to extract JSON-LD %s' % bug_reports_message())
|
||||
return {}
|
||||
|
||||
def _json_ld(self, json_ld, video_id, fatal=True, expected_type=None):
|
||||
@@ -1308,6 +1320,7 @@ class InfoExtractor(object):
|
||||
|
||||
def extract_video_object(e):
|
||||
assert e['@type'] == 'VideoObject'
|
||||
author = e.get('author')
|
||||
info.update({
|
||||
'url': url_or_none(e.get('contentUrl')),
|
||||
'title': unescapeHTML(e.get('name')),
|
||||
@@ -1315,7 +1328,11 @@ class InfoExtractor(object):
|
||||
'thumbnail': url_or_none(e.get('thumbnailUrl') or e.get('thumbnailURL')),
|
||||
'duration': parse_duration(e.get('duration')),
|
||||
'timestamp': unified_timestamp(e.get('uploadDate')),
|
||||
'uploader': str_or_none(e.get('author')),
|
||||
# author can be an instance of 'Organization' or 'Person' types.
|
||||
# both types can have 'name' property(inherited from 'Thing' type). [1]
|
||||
# however some websites are using 'Text' type instead.
|
||||
# 1. https://schema.org/VideoObject
|
||||
'uploader': author.get('name') if isinstance(author, dict) else author if isinstance(author, compat_str) else None,
|
||||
'filesize': float_or_none(e.get('contentSize')),
|
||||
'tbr': int_or_none(e.get('bitrate')),
|
||||
'width': int_or_none(e.get('width')),
|
||||
@@ -1398,7 +1415,7 @@ class InfoExtractor(object):
|
||||
return self._hidden_inputs(form)
|
||||
|
||||
class FormatSort:
|
||||
regex = r' *((?P<reverse>\+)?(?P<field>[a-zA-Z0-9_]+)((?P<seperator>[~:])(?P<limit>.*?))?)? *$'
|
||||
regex = r' *((?P<reverse>\+)?(?P<field>[a-zA-Z0-9_]+)((?P<separator>[~:])(?P<limit>.*?))?)? *$'
|
||||
|
||||
default = ('hidden', 'hasvid', 'ie_pref', 'lang', 'quality',
|
||||
'res', 'fps', 'codec:vp9.2', 'size', 'br', 'asr',
|
||||
@@ -1558,7 +1575,7 @@ class InfoExtractor(object):
|
||||
if self._get_field_setting(field, 'type') == 'alias':
|
||||
field = self._get_field_setting(field, 'field')
|
||||
reverse = match.group('reverse') is not None
|
||||
closest = match.group('seperator') == '~'
|
||||
closest = match.group('separator') == '~'
|
||||
limit_text = match.group('limit')
|
||||
|
||||
has_limit = limit_text is not None
|
||||
@@ -1575,7 +1592,8 @@ class InfoExtractor(object):
|
||||
else None)
|
||||
|
||||
def print_verbose_info(self, to_screen):
|
||||
to_screen('[debug] Sort order given by user: %s' % ','.join(self._sort_user))
|
||||
if self._sort_user:
|
||||
to_screen('[debug] Sort order given by user: %s' % ','.join(self._sort_user))
|
||||
if self._sort_extractor:
|
||||
to_screen('[debug] Sort order given by extractor: %s' % ', '.join(self._sort_extractor))
|
||||
to_screen('[debug] Formats sorted by: %s' % ', '.join(['%s%s%s' % (
|
||||
@@ -1664,6 +1682,8 @@ class InfoExtractor(object):
|
||||
|
||||
def _sort_formats(self, formats, field_preference=[]):
|
||||
if not formats:
|
||||
if self._downloader.params.get('ignore_no_formats_error'):
|
||||
return
|
||||
raise ExtractorError('No video formats found')
|
||||
format_sort = self.FormatSort() # params and to_screen are taken from the downloader
|
||||
format_sort.evaluate_params(self._downloader.params, field_preference)
|
||||
@@ -1889,7 +1909,8 @@ class InfoExtractor(object):
|
||||
if '#EXT-X-FAXS-CM:' in m3u8_doc: # Adobe Flash Access
|
||||
return []
|
||||
|
||||
if re.search(r'#EXT-X-SESSION-KEY:.*?URI="skd://', m3u8_doc): # Apple FairPlay
|
||||
if (not self._downloader.params.get('allow_unplayable_formats')
|
||||
and re.search(r'#EXT-X-SESSION-KEY:.*?URI="skd://', m3u8_doc)): # Apple FairPlay
|
||||
return []
|
||||
|
||||
formats = []
|
||||
@@ -2406,7 +2427,7 @@ class InfoExtractor(object):
|
||||
http://standards.iso.org/ittf/PubliclyAvailableStandards/c065274_ISO_IEC_23009-1_2014.zip
|
||||
2. https://en.wikipedia.org/wiki/Dynamic_Adaptive_Streaming_over_HTTP
|
||||
"""
|
||||
if not self._downloader.params.get('dynamic_mpd'):
|
||||
if not self._downloader.params.get('dynamic_mpd', True):
|
||||
if mpd_doc.get('type') == 'dynamic':
|
||||
return []
|
||||
|
||||
@@ -3196,7 +3217,7 @@ class InfoExtractor(object):
|
||||
if fatal:
|
||||
raise ExtractorError(msg)
|
||||
else:
|
||||
self._downloader.report_warning(msg)
|
||||
self.report_warning(msg)
|
||||
return res
|
||||
|
||||
def _float(self, v, name, fatal=False, **kwargs):
|
||||
@@ -3206,7 +3227,7 @@ class InfoExtractor(object):
|
||||
if fatal:
|
||||
raise ExtractorError(msg)
|
||||
else:
|
||||
self._downloader.report_warning(msg)
|
||||
self.report_warning(msg)
|
||||
return res
|
||||
|
||||
def _set_cookie(self, domain, name, value, expire_time=None, port=None,
|
||||
@@ -3218,10 +3239,10 @@ class InfoExtractor(object):
|
||||
self._downloader.cookiejar.set_cookie(cookie)
|
||||
|
||||
def _get_cookies(self, url):
|
||||
""" Return a compat_cookies.SimpleCookie with the cookies for the url """
|
||||
""" Return a compat_cookies_SimpleCookie with the cookies for the url """
|
||||
req = sanitized_Request(url)
|
||||
self._downloader.cookiejar.add_cookie_header(req)
|
||||
return compat_cookies.SimpleCookie(req.get_header('Cookie'))
|
||||
return compat_cookies_SimpleCookie(req.get_header('Cookie'))
|
||||
|
||||
def _apply_first_set_cookie_header(self, url_handle, cookie):
|
||||
"""
|
||||
@@ -3382,7 +3403,7 @@ class SearchInfoExtractor(InfoExtractor):
|
||||
if n <= 0:
|
||||
raise ExtractorError('invalid download number %s for query "%s"' % (n, query))
|
||||
elif n > self._MAX_RESULTS:
|
||||
self._downloader.report_warning('%s returns max %i results (you requested %i)' % (self._SEARCH_KEY, self._MAX_RESULTS, n))
|
||||
self.report_warning('%s returns max %i results (you requested %i)' % (self._SEARCH_KEY, self._MAX_RESULTS, n))
|
||||
n = self._MAX_RESULTS
|
||||
return self._get_n_results(query, n)
|
||||
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urlparse,
|
||||
@@ -58,3 +60,16 @@ class MmsIE(InfoExtractor):
|
||||
'title': title,
|
||||
'url': url,
|
||||
}
|
||||
|
||||
|
||||
class ViewSourceIE(InfoExtractor):
|
||||
IE_DESC = False
|
||||
_VALID_URL = r'view-source:(?P<url>.+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'view-source:https://www.youtube.com/watch?v=BaW_jenozKc',
|
||||
'only_matching': True
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
return self.url_result(re.match(self._VALID_URL, url).group('url'))
|
||||
|
||||
@@ -131,7 +131,7 @@ class CorusIE(ThePlatformFeedIE):
|
||||
formats.extend(self._parse_smil_formats(
|
||||
smil, smil_url, video_id, namespace))
|
||||
if not formats and video.get('drm'):
|
||||
raise ExtractorError('This video is DRM protected.', expected=True)
|
||||
self.raise_no_formats('This video is DRM protected.', expected=True)
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
|
||||
@@ -428,7 +428,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
r'<div class="showmedia-trailer-notice">(.+?)</div>',
|
||||
webpage, 'trailer-notice', default='')
|
||||
if note_m:
|
||||
raise ExtractorError(note_m)
|
||||
raise ExtractorError(note_m, expected=True)
|
||||
|
||||
mobj = re.search(r'Page\.messaging_box_controller\.addItems\(\[(?P<msg>{.+?})\]\)', webpage)
|
||||
if mobj:
|
||||
|
||||
@@ -25,12 +25,12 @@ class CuriosityStreamBaseIE(InfoExtractor):
|
||||
raise ExtractorError(
|
||||
'%s said: %s' % (self.IE_NAME, error), expected=True)
|
||||
|
||||
def _call_api(self, path, video_id):
|
||||
def _call_api(self, path, video_id, query=None):
|
||||
headers = {}
|
||||
if self._auth_token:
|
||||
headers['X-Auth-Token'] = self._auth_token
|
||||
result = self._download_json(
|
||||
self._API_BASE_URL + path, video_id, headers=headers)
|
||||
self._API_BASE_URL + path, video_id, headers=headers, query=query)
|
||||
self._handle_errors(result)
|
||||
return result['data']
|
||||
|
||||
@@ -52,62 +52,75 @@ class CuriosityStreamIE(CuriosityStreamBaseIE):
|
||||
_VALID_URL = r'https?://(?:app\.)?curiositystream\.com/video/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'https://app.curiositystream.com/video/2',
|
||||
'md5': '262bb2f257ff301115f1973540de8983',
|
||||
'info_dict': {
|
||||
'id': '2',
|
||||
'ext': 'mp4',
|
||||
'title': 'How Did You Develop The Internet?',
|
||||
'description': 'Vint Cerf, Google\'s Chief Internet Evangelist, describes how he and Bob Kahn created the internet.',
|
||||
}
|
||||
},
|
||||
'params': {
|
||||
'format': 'bestvideo',
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
media = self._call_api('media/' + video_id, video_id)
|
||||
title = media['title']
|
||||
|
||||
formats = []
|
||||
for encoding in media.get('encodings', []):
|
||||
m3u8_url = encoding.get('master_playlist_url')
|
||||
if m3u8_url:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
encoding_url = encoding.get('url')
|
||||
file_url = encoding.get('file_url')
|
||||
if not encoding_url and not file_url:
|
||||
continue
|
||||
f = {
|
||||
'width': int_or_none(encoding.get('width')),
|
||||
'height': int_or_none(encoding.get('height')),
|
||||
'vbr': int_or_none(encoding.get('video_bitrate')),
|
||||
'abr': int_or_none(encoding.get('audio_bitrate')),
|
||||
'filesize': int_or_none(encoding.get('size_in_bytes')),
|
||||
'vcodec': encoding.get('video_codec'),
|
||||
'acodec': encoding.get('audio_codec'),
|
||||
'container': encoding.get('container_type'),
|
||||
}
|
||||
for f_url in (encoding_url, file_url):
|
||||
if not f_url:
|
||||
for encoding_format in ('m3u8', 'mpd'):
|
||||
media = self._call_api('media/' + video_id, video_id, query={
|
||||
'encodingsNew': 'true',
|
||||
'encodingsFormat': encoding_format,
|
||||
})
|
||||
for encoding in media.get('encodings', []):
|
||||
playlist_url = encoding.get('master_playlist_url')
|
||||
if encoding_format == 'm3u8':
|
||||
# use `m3u8` entry_protocol until EXT-X-MAP is properly supported by `m3u8_native` entry_protocol
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
playlist_url, video_id, 'mp4',
|
||||
m3u8_id='hls', fatal=False))
|
||||
elif encoding_format == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
playlist_url, video_id, mpd_id='dash', fatal=False))
|
||||
encoding_url = encoding.get('url')
|
||||
file_url = encoding.get('file_url')
|
||||
if not encoding_url and not file_url:
|
||||
continue
|
||||
fmt = f.copy()
|
||||
rtmp = re.search(r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+))/(?P<playpath>mp[34]:.+)$', f_url)
|
||||
if rtmp:
|
||||
fmt.update({
|
||||
'url': rtmp.group('url'),
|
||||
'play_path': rtmp.group('playpath'),
|
||||
'app': rtmp.group('app'),
|
||||
'ext': 'flv',
|
||||
'format_id': 'rtmp',
|
||||
})
|
||||
else:
|
||||
fmt.update({
|
||||
'url': f_url,
|
||||
'format_id': 'http',
|
||||
})
|
||||
formats.append(fmt)
|
||||
f = {
|
||||
'width': int_or_none(encoding.get('width')),
|
||||
'height': int_or_none(encoding.get('height')),
|
||||
'vbr': int_or_none(encoding.get('video_bitrate')),
|
||||
'abr': int_or_none(encoding.get('audio_bitrate')),
|
||||
'filesize': int_or_none(encoding.get('size_in_bytes')),
|
||||
'vcodec': encoding.get('video_codec'),
|
||||
'acodec': encoding.get('audio_codec'),
|
||||
'container': encoding.get('container_type'),
|
||||
}
|
||||
for f_url in (encoding_url, file_url):
|
||||
if not f_url:
|
||||
continue
|
||||
fmt = f.copy()
|
||||
rtmp = re.search(r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+))/(?P<playpath>mp[34]:.+)$', f_url)
|
||||
if rtmp:
|
||||
fmt.update({
|
||||
'url': rtmp.group('url'),
|
||||
'play_path': rtmp.group('playpath'),
|
||||
'app': rtmp.group('app'),
|
||||
'ext': 'flv',
|
||||
'format_id': 'rtmp',
|
||||
})
|
||||
else:
|
||||
fmt.update({
|
||||
'url': f_url,
|
||||
'format_id': 'http',
|
||||
})
|
||||
formats.append(fmt)
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = media['title']
|
||||
|
||||
subtitles = {}
|
||||
for closed_caption in media.get('closed_captions', []):
|
||||
sub_url = closed_caption.get('file')
|
||||
@@ -140,7 +153,7 @@ class CuriosityStreamCollectionIE(CuriosityStreamBaseIE):
|
||||
'title': 'Curious Minds: The Internet',
|
||||
'description': 'How is the internet shaping our lives in the 21st Century?',
|
||||
},
|
||||
'playlist_mincount': 17,
|
||||
'playlist_mincount': 16,
|
||||
}, {
|
||||
'url': 'https://curiositystream.com/series/2',
|
||||
'only_matching': True,
|
||||
|
||||
@@ -14,7 +14,7 @@ from ..utils import (
|
||||
class DeezerBaseInfoExtractor(InfoExtractor):
|
||||
def get_data(self, url):
|
||||
if not self._downloader.params.get('test'):
|
||||
self._downloader.report_warning('For now, this extractor only supports the 30 second previews. Patches welcome!')
|
||||
self.report_warning('For now, this extractor only supports the 30 second previews. Patches welcome!')
|
||||
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
data_id = mobj.group('id')
|
||||
|
||||
100
yt_dlp/extractor/discoveryplusindia.py
Normal file
100
yt_dlp/extractor/discoveryplusindia.py
Normal file
@@ -0,0 +1,100 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from ..compat import compat_str
|
||||
from ..utils import try_get
|
||||
from .common import InfoExtractor
|
||||
from .dplay import DPlayIE
|
||||
|
||||
|
||||
class DiscoveryPlusIndiaIE(DPlayIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?discoveryplus\.in/videos?' + DPlayIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://www.discoveryplus.in/videos/how-do-they-do-it/fugu-and-more?seasonId=8&type=EPISODE',
|
||||
'info_dict': {
|
||||
'id': '27104',
|
||||
'ext': 'mp4',
|
||||
'display_id': 'how-do-they-do-it/fugu-and-more',
|
||||
'title': 'Fugu and More',
|
||||
'description': 'The Japanese catch, prepare and eat the deadliest fish on the planet.',
|
||||
'duration': 1319,
|
||||
'timestamp': 1582309800,
|
||||
'upload_date': '20200221',
|
||||
'series': 'How Do They Do It?',
|
||||
'season_number': 8,
|
||||
'episode_number': 2,
|
||||
'creator': 'Discovery Channel',
|
||||
},
|
||||
'params': {
|
||||
'format': 'bestvideo',
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Cookies (not necessarily logged in) are needed'
|
||||
}]
|
||||
|
||||
def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
|
||||
headers['x-disco-params'] = 'realm=%s' % realm
|
||||
headers['x-disco-client'] = 'WEB:UNKNOWN:dplus-india:17.0.0'
|
||||
|
||||
def _download_video_playback_info(self, disco_base, video_id, headers):
|
||||
return self._download_json(
|
||||
disco_base + 'playback/v3/videoPlaybackInfo',
|
||||
video_id, headers=headers, data=json.dumps({
|
||||
'deviceInfo': {
|
||||
'adBlocker': False,
|
||||
},
|
||||
'videoId': video_id,
|
||||
}).encode('utf-8'))['data']['attributes']['streaming']
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
return self._get_disco_api_info(
|
||||
url, display_id, 'ap2-prod-direct.discoveryplus.in', 'dplusindia', 'in')
|
||||
|
||||
|
||||
class DiscoveryPlusIndiaShowIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?discoveryplus\.in/show/(?P<show_name>[^/]+)/?(?:[?#]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.discoveryplus.in/show/how-do-they-do-it',
|
||||
'playlist_mincount': 140,
|
||||
'info_dict': {
|
||||
'id': 'how-do-they-do-it',
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
def _entries(self, show_name):
|
||||
headers = {
|
||||
'x-disco-client': 'WEB:UNKNOWN:dplus-india:prod',
|
||||
'x-disco-params': 'realm=dplusindia',
|
||||
'referer': 'https://www.discoveryplus.in/',
|
||||
}
|
||||
show_url = 'https://ap2-prod-direct.discoveryplus.in/cms/routes/show/{}?include=default'.format(show_name)
|
||||
show_json = self._download_json(show_url,
|
||||
video_id=show_name,
|
||||
headers=headers)['included'][4]['attributes']['component']
|
||||
show_id = show_json['mandatoryParams'].split('=')[-1]
|
||||
season_url = 'https://ap2-prod-direct.discoveryplus.in/content/videos?sort=episodeNumber&filter[seasonNumber]={}&filter[show.id]={}&page[size]=100&page[number]={}'
|
||||
for season in show_json['filters'][0]['options']:
|
||||
season_id = season['id']
|
||||
total_pages, page_num = 1, 0
|
||||
while page_num < total_pages:
|
||||
season_json = self._download_json(season_url.format(season_id, show_id, compat_str(page_num + 1)),
|
||||
video_id=show_id, headers=headers,
|
||||
note='Downloading JSON metadata%s' % (' page %d' % page_num if page_num else ''))
|
||||
if page_num == 0:
|
||||
total_pages = try_get(season_json, lambda x: x['meta']['totalPages'], int) or 1
|
||||
episodes_json = season_json['data']
|
||||
for episode in episodes_json:
|
||||
video_id = episode['attributes']['path']
|
||||
yield self.url_result(
|
||||
'https://discoveryplus.in/videos/%s' % video_id,
|
||||
ie=DiscoveryPlusIndiaIE.ie_key(), video_id=video_id)
|
||||
page_num += 1
|
||||
|
||||
def _real_extract(self, url):
|
||||
show_name = re.match(self._VALID_URL, url).group('show_name')
|
||||
return self.playlist_result(self._entries(show_name), playlist_id=show_name)
|
||||
@@ -9,7 +9,6 @@ from ..utils import (
|
||||
unified_strdate,
|
||||
compat_str,
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
@@ -140,7 +139,7 @@ class DisneyIE(InfoExtractor):
|
||||
'vcodec': 'none' if (width == 0 and height == 0) else None,
|
||||
})
|
||||
if not formats and video_data.get('expired'):
|
||||
raise ExtractorError(
|
||||
self.raise_no_formats(
|
||||
'%s said: %s' % (self.IE_NAME, page_data['translations']['video_expired']),
|
||||
expected=True)
|
||||
self._sort_formats(formats)
|
||||
|
||||
@@ -79,7 +79,7 @@ class ElonetIE(InfoExtractor):
|
||||
elif fmt == 'mpd':
|
||||
subs = self._parse_mpd_subtitles(doc)
|
||||
else:
|
||||
self._downloader.report_warning(
|
||||
self.report_warning(
|
||||
"Cannot download subtitles from '%s' streams." % (fmt))
|
||||
subs = {}
|
||||
return subs
|
||||
|
||||
@@ -80,6 +80,7 @@ from .arte import (
|
||||
ArteTVEmbedIE,
|
||||
ArteTVPlaylistIE,
|
||||
)
|
||||
from .arnes import ArnesIE
|
||||
from .asiancrush import (
|
||||
AsianCrushIE,
|
||||
AsianCrushPlaylistIE,
|
||||
@@ -209,7 +210,11 @@ from .cbsnews import (
|
||||
CBSNewsIE,
|
||||
CBSNewsLiveVideoIE,
|
||||
)
|
||||
from .cbssports import CBSSportsIE
|
||||
from .cbssports import (
|
||||
CBSSportsEmbedIE,
|
||||
CBSSportsIE,
|
||||
TwentyFourSevenSportsIE,
|
||||
)
|
||||
from .ccc import (
|
||||
CCCIE,
|
||||
CCCPlaylistIE,
|
||||
@@ -264,6 +269,7 @@ from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
|
||||
from .commonprotocols import (
|
||||
MmsIE,
|
||||
RtmpIE,
|
||||
ViewSourceIE,
|
||||
)
|
||||
from .condenast import CondeNastIE
|
||||
from .contv import CONtvIE
|
||||
@@ -307,6 +313,10 @@ from .democracynow import DemocracynowIE
|
||||
from .dfb import DFBIE
|
||||
from .dhm import DHMIE
|
||||
from .digg import DiggIE
|
||||
from .discoveryplusindia import (
|
||||
DiscoveryPlusIndiaIE,
|
||||
DiscoveryPlusIndiaShowIE,
|
||||
)
|
||||
from .dotsub import DotsubIE
|
||||
from .douyutv import (
|
||||
DouyuShowIE,
|
||||
@@ -582,7 +592,11 @@ from .kuwo import (
|
||||
KuwoCategoryIE,
|
||||
KuwoMvIE,
|
||||
)
|
||||
from .la7 import LA7IE
|
||||
from .la7 import (
|
||||
LA7IE,
|
||||
LA7PodcastEpisodeIE,
|
||||
LA7PodcastIE,
|
||||
)
|
||||
from .laola1tv import (
|
||||
Laola1TvEmbedIE,
|
||||
Laola1TvIE,
|
||||
@@ -623,7 +637,11 @@ from .limelight import (
|
||||
LimelightChannelIE,
|
||||
LimelightChannelListIE,
|
||||
)
|
||||
from .line import LineTVIE
|
||||
from .line import (
|
||||
LineTVIE,
|
||||
LineLiveIE,
|
||||
LineLiveChannelIE,
|
||||
)
|
||||
from .linkedin import (
|
||||
LinkedInLearningIE,
|
||||
LinkedInLearningCourseIE,
|
||||
@@ -661,6 +679,7 @@ from .mangomolo import (
|
||||
MangomoloLiveIE,
|
||||
)
|
||||
from .manyvids import ManyVidsIE
|
||||
from .maoritv import MaoriTVIE
|
||||
from .markiza import (
|
||||
MarkizaIE,
|
||||
MarkizaPageIE,
|
||||
@@ -784,8 +803,9 @@ from .ndr import (
|
||||
NJoyEmbedIE,
|
||||
)
|
||||
from .ndtv import NDTVIE
|
||||
from .netzkino import NetzkinoIE
|
||||
from .nebula import NebulaIE
|
||||
from .nerdcubed import NerdCubedFeedIE
|
||||
from .netzkino import NetzkinoIE
|
||||
from .neteasemusic import (
|
||||
NetEaseMusicIE,
|
||||
NetEaseMusicAlbumIE,
|
||||
@@ -810,6 +830,7 @@ from .nexx import (
|
||||
NexxIE,
|
||||
NexxEmbedIE,
|
||||
)
|
||||
from .nfhsnetwork import NFHSNetworkIE
|
||||
from .nfl import (
|
||||
NFLIE,
|
||||
NFLArticleIE,
|
||||
@@ -921,6 +942,11 @@ from .packtpub import (
|
||||
PacktPubIE,
|
||||
PacktPubCourseIE,
|
||||
)
|
||||
from .palcomp3 import (
|
||||
PalcoMP3IE,
|
||||
PalcoMP3ArtistIE,
|
||||
PalcoMP3VideoIE,
|
||||
)
|
||||
from .pandoratv import PandoraTVIE
|
||||
from .parliamentliveuk import ParliamentLiveUKIE
|
||||
from .patreon import PatreonIE
|
||||
@@ -1314,7 +1340,10 @@ from .trovo import (
|
||||
from .trunews import TruNewsIE
|
||||
from .trutv import TruTVIE
|
||||
from .tube8 import Tube8IE
|
||||
from .tubitv import TubiTvIE
|
||||
from .tubitv import (
|
||||
TubiTvIE,
|
||||
TubiTvShowIE,
|
||||
)
|
||||
from .tumblr import TumblrIE
|
||||
from .tunein import (
|
||||
TuneInClipIE,
|
||||
|
||||
@@ -348,7 +348,7 @@ class FacebookIE(InfoExtractor):
|
||||
login_results, 'login error', default=None, group='error')
|
||||
if error:
|
||||
raise ExtractorError('Unable to login: %s' % error, expected=True)
|
||||
self._downloader.report_warning('unable to log in: bad username/password, or exceeded login rate limit (~3/min). Check credentials or wait.')
|
||||
self.report_warning('unable to log in: bad username/password, or exceeded login rate limit (~3/min). Check credentials or wait.')
|
||||
return
|
||||
|
||||
fb_dtsg = self._search_regex(
|
||||
@@ -369,9 +369,9 @@ class FacebookIE(InfoExtractor):
|
||||
check_response = self._download_webpage(check_req, None,
|
||||
note='Confirming login')
|
||||
if re.search(r'id="checkpointSubmitButton"', check_response) is not None:
|
||||
self._downloader.report_warning('Unable to confirm login, you have to login in your browser and authorize the login.')
|
||||
self.report_warning('Unable to confirm login, you have to login in your browser and authorize the login.')
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
self._downloader.report_warning('unable to log in: %s' % error_to_compat_str(err))
|
||||
self.report_warning('unable to log in: %s' % error_to_compat_str(err))
|
||||
return
|
||||
|
||||
def _real_initialize(self):
|
||||
@@ -625,8 +625,6 @@ class FacebookIE(InfoExtractor):
|
||||
subtitles_src = f[0].get('subtitles_src')
|
||||
if subtitles_src:
|
||||
subtitles.setdefault('en', []).append({'url': subtitles_src})
|
||||
if not formats:
|
||||
raise ExtractorError('Cannot find video formats')
|
||||
|
||||
process_formats(formats)
|
||||
|
||||
|
||||
@@ -401,7 +401,7 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor):
|
||||
(r'player\.load[^;]+src:\s*["\']([^"\']+)',
|
||||
r'id-video=([^@]+@[^"]+)',
|
||||
r'<a[^>]+href="(?:https?:)?//videos\.francetv\.fr/video/([^@]+@[^"]+)"',
|
||||
r'data-id="([^"]+)"'),
|
||||
r'data-id=["\']([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'),
|
||||
webpage, 'video id')
|
||||
|
||||
return self._make_url_result(video_id)
|
||||
|
||||
@@ -2376,7 +2376,7 @@ class GenericIE(InfoExtractor):
|
||||
|
||||
if default_search in ('auto', 'auto_warning', 'fixup_error'):
|
||||
if re.match(r'^[^\s/]+\.[^\s/]+/', url):
|
||||
self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
|
||||
self.report_warning('The url doesn\'t specify the protocol, trying with http')
|
||||
return self.url_result('http://' + url)
|
||||
elif default_search != 'fixup_error':
|
||||
if default_search == 'auto_warning':
|
||||
@@ -2385,7 +2385,7 @@ class GenericIE(InfoExtractor):
|
||||
'Invalid URL: %r . Call yt-dlp like this: yt-dlp -v "https://www.youtube.com/watch?v=BaW_jenozKc" ' % url,
|
||||
expected=True)
|
||||
else:
|
||||
self._downloader.report_warning(
|
||||
self.report_warning(
|
||||
'Falling back to youtube search for %s . Set --default-search "auto" to suppress this warning.' % url)
|
||||
return self.url_result('ytsearch:' + url)
|
||||
|
||||
@@ -2461,7 +2461,7 @@ class GenericIE(InfoExtractor):
|
||||
|
||||
if not self._downloader.params.get('test', False) and not is_intentional:
|
||||
force = self._downloader.params.get('force_generic_extractor', False)
|
||||
self._downloader.report_warning(
|
||||
self.report_warning(
|
||||
'%s on generic information extractor.' % ('Forcing' if force else 'Falling back'))
|
||||
|
||||
if not full_response:
|
||||
@@ -2488,7 +2488,7 @@ class GenericIE(InfoExtractor):
|
||||
# Maybe it's a direct link to a video?
|
||||
# Be careful not to download the whole thing!
|
||||
if not is_html(first_bytes):
|
||||
self._downloader.report_warning(
|
||||
self.report_warning(
|
||||
'URL could be a direct video link, returning it as such.')
|
||||
info_dict.update({
|
||||
'direct': True,
|
||||
@@ -2659,6 +2659,15 @@ class GenericIE(InfoExtractor):
|
||||
if vid_me_embed_url is not None:
|
||||
return self.url_result(vid_me_embed_url, 'Vidme')
|
||||
|
||||
# Invidious Instances
|
||||
# https://github.com/yt-dlp/yt-dlp/issues/195
|
||||
# https://github.com/iv-org/invidious/pull/1730
|
||||
youtube_url = self._search_regex(
|
||||
r'<link rel="alternate" href="(https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"',
|
||||
webpage, 'youtube link', default=None)
|
||||
if youtube_url:
|
||||
return self.url_result(youtube_url, YoutubeIE.ie_key())
|
||||
|
||||
# Look for YouTube embeds
|
||||
youtube_urls = YoutubeIE._extract_urls(webpage)
|
||||
if youtube_urls:
|
||||
|
||||
@@ -4,10 +4,14 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .adobepass import AdobePassIE
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
determine_ext,
|
||||
parse_age_limit,
|
||||
remove_start,
|
||||
remove_end,
|
||||
try_get,
|
||||
urlencode_postdata,
|
||||
ExtractorError,
|
||||
)
|
||||
@@ -46,15 +50,15 @@ class GoIE(AdobePassIE):
|
||||
}
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
(?:(?P<sub_domain>%s)\.)?go|
|
||||
(?P<sub_domain_2>abc|freeform|disneynow|fxnow\.fxnetworks)
|
||||
(?P<sub_domain>
|
||||
(?:%s\.)?go|fxnow\.fxnetworks|
|
||||
(?:www\.)?(?:abc|freeform|disneynow)
|
||||
)\.com/
|
||||
(?:
|
||||
(?:[^/]+/)*(?P<id>[Vv][Dd][Kk][Aa]\w+)|
|
||||
(?:[^/]+/)*(?P<display_id>[^/?\#]+)
|
||||
)
|
||||
''' % '|'.join(list(_SITE_INFO.keys()))
|
||||
''' % r'\.|'.join(list(_SITE_INFO.keys()))
|
||||
_TESTS = [{
|
||||
'url': 'http://abc.go.com/shows/designated-survivor/video/most-recent/VDKA3807643',
|
||||
'info_dict': {
|
||||
@@ -116,6 +120,18 @@ class GoIE(AdobePassIE):
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://abc.com/shows/modern-family/episode-guide/season-01/101-pilot',
|
||||
'info_dict': {
|
||||
'id': 'VDKA22600213',
|
||||
'ext': 'mp4',
|
||||
'title': 'Pilot',
|
||||
'description': 'md5:74306df917cfc199d76d061d66bebdb4',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://abc.go.com/shows/the-catch/episode-guide/season-01/10-the-wedding',
|
||||
'only_matching': True,
|
||||
@@ -133,6 +149,9 @@ class GoIE(AdobePassIE):
|
||||
}, {
|
||||
'url': 'https://disneynow.com/shows/minnies-bow-toons/video/happy-campers/vdka4872013',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.freeform.com/shows/cruel-summer/episode-guide/season-01/01-happy-birthday-jeanette-turner',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _extract_videos(self, brand, video_id='-1', show_id='-1'):
|
||||
@@ -143,24 +162,36 @@ class GoIE(AdobePassIE):
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
sub_domain = mobj.group('sub_domain') or mobj.group('sub_domain_2')
|
||||
sub_domain = remove_start(remove_end(mobj.group('sub_domain') or '', '.go'), 'www.')
|
||||
video_id, display_id = mobj.group('id', 'display_id')
|
||||
site_info = self._SITE_INFO.get(sub_domain, {})
|
||||
brand = site_info.get('brand')
|
||||
if not video_id or not site_info:
|
||||
webpage = self._download_webpage(url, display_id or video_id)
|
||||
video_id = self._search_regex(
|
||||
(
|
||||
# There may be inner quotes, e.g. data-video-id="'VDKA3609139'"
|
||||
# from http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood
|
||||
r'data-video-id=["\']*(VDKA\w+)',
|
||||
# https://github.com/ytdl-org/youtube-dl/pull/25216/files
|
||||
# The following is based on the pull request on the line above. Changed the ABC.com URL to a show available now.
|
||||
# https://abc.com/shows/the-rookie/episode-guide/season-02/19-the-q-word
|
||||
r'\bvideoIdCode["\']\s*:\s*["\'](vdka\w+)',
|
||||
# Deprecated fallback pattern
|
||||
r'\b(?:video)?id["\']\s*:\s*["\'](VDKA\w+)'
|
||||
), webpage, 'video id', default=video_id)
|
||||
data = self._parse_json(
|
||||
self._search_regex(
|
||||
r'["\']__abc_com__["\']\s*\]\s*=\s*({.+?})\s*;', webpage,
|
||||
'data', default='{}'),
|
||||
display_id or video_id, fatal=False)
|
||||
# https://abc.com/shows/modern-family/episode-guide/season-01/101-pilot
|
||||
layout = try_get(data, lambda x: x['page']['content']['video']['layout'], dict)
|
||||
video_id = None
|
||||
if layout:
|
||||
video_id = try_get(
|
||||
layout,
|
||||
(lambda x: x['videoid'], lambda x: x['video']['id']),
|
||||
compat_str)
|
||||
if not video_id:
|
||||
video_id = self._search_regex(
|
||||
(
|
||||
# There may be inner quotes, e.g. data-video-id="'VDKA3609139'"
|
||||
# from http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood
|
||||
r'data-video-id=["\']*(VDKA\w+)',
|
||||
# page.analytics.videoIdCode
|
||||
r'\bvideoIdCode["\']\s*:\s*["\']((?:vdka|VDKA)\w+)',
|
||||
# https://abc.com/shows/the-rookie/episode-guide/season-02/03-the-bet
|
||||
r'\b(?:video)?id["\']\s*:\s*["\'](VDKA\w+)'
|
||||
), webpage, 'video id', default=video_id)
|
||||
if not site_info:
|
||||
brand = self._search_regex(
|
||||
(r'data-brand=\s*["\']\s*(\d+)',
|
||||
|
||||
@@ -253,7 +253,7 @@ class GoogleDriveIE(InfoExtractor):
|
||||
or 'unable to extract confirmation code')
|
||||
|
||||
if not formats and reason:
|
||||
raise ExtractorError(reason, expected=True)
|
||||
self.raise_no_formats(reason, expected=True)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
|
||||
@@ -184,7 +184,7 @@ class HotStarIE(HotStarBaseIE):
|
||||
geo_restricted = True
|
||||
continue
|
||||
if not formats and geo_restricted:
|
||||
self.raise_geo_restricted(countries=['IN'])
|
||||
self.raise_geo_restricted(countries=['IN'], metadata_available=True)
|
||||
self._sort_formats(formats)
|
||||
|
||||
for f in formats:
|
||||
|
||||
@@ -12,6 +12,7 @@ from ..compat import (
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
get_element_by_attribute,
|
||||
int_or_none,
|
||||
lowercase_escape,
|
||||
@@ -32,6 +33,7 @@ class InstagramIE(InfoExtractor):
|
||||
'title': 'Video by naomipq',
|
||||
'description': 'md5:1f17f0ab29bd6fe2bfad705f58de3cb8',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'duration': 0,
|
||||
'timestamp': 1371748545,
|
||||
'upload_date': '20130620',
|
||||
'uploader_id': 'naomipq',
|
||||
@@ -48,6 +50,7 @@ class InstagramIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Video by britneyspears',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'duration': 0,
|
||||
'timestamp': 1453760977,
|
||||
'upload_date': '20160125',
|
||||
'uploader_id': 'britneyspears',
|
||||
@@ -86,6 +89,24 @@ class InstagramIE(InfoExtractor):
|
||||
'title': 'Post by instagram',
|
||||
'description': 'md5:0f9203fc6a2ce4d228da5754bcf54957',
|
||||
},
|
||||
}, {
|
||||
# IGTV
|
||||
'url': 'https://www.instagram.com/tv/BkfuX9UB-eK/',
|
||||
'info_dict': {
|
||||
'id': 'BkfuX9UB-eK',
|
||||
'ext': 'mp4',
|
||||
'title': 'Fingerboarding Tricks with @cass.fb',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'duration': 53.83,
|
||||
'timestamp': 1530032919,
|
||||
'upload_date': '20180626',
|
||||
'uploader_id': 'instagram',
|
||||
'uploader': 'Instagram',
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'comments': list,
|
||||
'description': 'Meet Cass Hirst (@cass.fb), a fingerboarding pro who can perform tiny ollies and kickflips while blindfolded.',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://instagram.com/p/-Cmh1cukG2/',
|
||||
'only_matching': True,
|
||||
@@ -159,7 +180,9 @@ class InstagramIE(InfoExtractor):
|
||||
description = try_get(
|
||||
media, lambda x: x['edge_media_to_caption']['edges'][0]['node']['text'],
|
||||
compat_str) or media.get('caption')
|
||||
title = media.get('title')
|
||||
thumbnail = media.get('display_src') or media.get('display_url')
|
||||
duration = float_or_none(media.get('video_duration'))
|
||||
timestamp = int_or_none(media.get('taken_at_timestamp') or media.get('date'))
|
||||
uploader = media.get('owner', {}).get('full_name')
|
||||
uploader_id = media.get('owner', {}).get('username')
|
||||
@@ -200,9 +223,10 @@ class InstagramIE(InfoExtractor):
|
||||
continue
|
||||
entries.append({
|
||||
'id': node.get('shortcode') or node['id'],
|
||||
'title': 'Video %d' % edge_num,
|
||||
'title': node.get('title') or 'Video %d' % edge_num,
|
||||
'url': node_video_url,
|
||||
'thumbnail': node.get('display_url'),
|
||||
'duration': float_or_none(node.get('video_duration')),
|
||||
'width': int_or_none(try_get(node, lambda x: x['dimensions']['width'])),
|
||||
'height': int_or_none(try_get(node, lambda x: x['dimensions']['height'])),
|
||||
'view_count': int_or_none(node.get('video_view_count')),
|
||||
@@ -239,8 +263,9 @@ class InstagramIE(InfoExtractor):
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'ext': 'mp4',
|
||||
'title': 'Video by %s' % uploader_id,
|
||||
'title': title or 'Video by %s' % uploader_id,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'thumbnail': thumbnail,
|
||||
'timestamp': timestamp,
|
||||
'uploader_id': uploader_id,
|
||||
|
||||
@@ -136,7 +136,7 @@ class IPrimaIE(InfoExtractor):
|
||||
extract_formats(src)
|
||||
|
||||
if not formats and '>GEO_IP_NOT_ALLOWED<' in playerpage:
|
||||
self.raise_geo_restricted(countries=['CZ'])
|
||||
self.raise_geo_restricted(countries=['CZ'], metadata_available=True)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
|
||||
@@ -280,7 +280,7 @@ class IqiyiIE(InfoExtractor):
|
||||
msg = 'error %s' % code
|
||||
if validation_result.get('msg'):
|
||||
msg += ': ' + validation_result['msg']
|
||||
self._downloader.report_warning('unable to log in: ' + msg)
|
||||
self.report_warning('unable to log in: ' + msg)
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
@@ -29,34 +29,51 @@ class JamendoIE(InfoExtractor):
|
||||
'id': '196219',
|
||||
'display_id': 'stories-from-emona-i',
|
||||
'ext': 'flac',
|
||||
'title': 'Maya Filipič - Stories from Emona I',
|
||||
'artist': 'Maya Filipič',
|
||||
# 'title': 'Maya Filipič - Stories from Emona I',
|
||||
'title': 'Stories from Emona I',
|
||||
# 'artist': 'Maya Filipič',
|
||||
'track': 'Stories from Emona I',
|
||||
'duration': 210,
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'timestamp': 1217438117,
|
||||
'upload_date': '20080730',
|
||||
'license': 'by-nc-nd',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'average_rating': int,
|
||||
'tags': ['piano', 'peaceful', 'newage', 'strings', 'upbeat'],
|
||||
}
|
||||
}, {
|
||||
'url': 'https://licensing.jamendo.com/en/track/1496667/energetic-rock',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _call_api(self, resource, resource_id):
|
||||
path = '/api/%ss' % resource
|
||||
rand = compat_str(random.random())
|
||||
return self._download_json(
|
||||
'https://www.jamendo.com' + path, resource_id, query={
|
||||
'id[]': resource_id,
|
||||
}, headers={
|
||||
'X-Jam-Call': '$%s*%s~' % (hashlib.sha1((path + rand).encode()).hexdigest(), rand)
|
||||
})[0]
|
||||
|
||||
def _real_extract(self, url):
|
||||
track_id, display_id = self._VALID_URL_RE.match(url).groups()
|
||||
webpage = self._download_webpage(
|
||||
'https://www.jamendo.com/track/' + track_id, track_id)
|
||||
models = self._parse_json(self._html_search_regex(
|
||||
r"data-bundled-models='([^']+)",
|
||||
webpage, 'bundled models'), track_id)
|
||||
track = models['track']['models'][0]
|
||||
# webpage = self._download_webpage(
|
||||
# 'https://www.jamendo.com/track/' + track_id, track_id)
|
||||
# models = self._parse_json(self._html_search_regex(
|
||||
# r"data-bundled-models='([^']+)",
|
||||
# webpage, 'bundled models'), track_id)
|
||||
# track = models['track']['models'][0]
|
||||
track = self._call_api('track', track_id)
|
||||
title = track_name = track['name']
|
||||
get_model = lambda x: try_get(models, lambda y: y[x]['models'][0], dict) or {}
|
||||
artist = get_model('artist')
|
||||
artist_name = artist.get('name')
|
||||
if artist_name:
|
||||
title = '%s - %s' % (artist_name, title)
|
||||
album = get_model('album')
|
||||
# get_model = lambda x: try_get(models, lambda y: y[x]['models'][0], dict) or {}
|
||||
# artist = get_model('artist')
|
||||
# artist_name = artist.get('name')
|
||||
# if artist_name:
|
||||
# title = '%s - %s' % (artist_name, title)
|
||||
# album = get_model('album')
|
||||
|
||||
formats = [{
|
||||
'url': 'https://%s.jamendo.com/?trackid=%s&format=%s&from=app-97dab294'
|
||||
@@ -74,7 +91,7 @@ class JamendoIE(InfoExtractor):
|
||||
|
||||
urls = []
|
||||
thumbnails = []
|
||||
for _, covers in track.get('cover', {}).items():
|
||||
for covers in (track.get('cover') or {}).values():
|
||||
for cover_id, cover_url in covers.items():
|
||||
if not cover_url or cover_url in urls:
|
||||
continue
|
||||
@@ -88,13 +105,14 @@ class JamendoIE(InfoExtractor):
|
||||
})
|
||||
|
||||
tags = []
|
||||
for tag in track.get('tags', []):
|
||||
for tag in (track.get('tags') or []):
|
||||
tag_name = tag.get('name')
|
||||
if not tag_name:
|
||||
continue
|
||||
tags.append(tag_name)
|
||||
|
||||
stats = track.get('stats') or {}
|
||||
license = track.get('licenseCC') or []
|
||||
|
||||
return {
|
||||
'id': track_id,
|
||||
@@ -103,11 +121,11 @@ class JamendoIE(InfoExtractor):
|
||||
'title': title,
|
||||
'description': track.get('description'),
|
||||
'duration': int_or_none(track.get('duration')),
|
||||
'artist': artist_name,
|
||||
# 'artist': artist_name,
|
||||
'track': track_name,
|
||||
'album': album.get('name'),
|
||||
# 'album': album.get('name'),
|
||||
'formats': formats,
|
||||
'license': '-'.join(track.get('licenseCC', [])) or None,
|
||||
'license': '-'.join(license) if license else None,
|
||||
'timestamp': int_or_none(track.get('dateCreated')),
|
||||
'view_count': int_or_none(stats.get('listenedAll')),
|
||||
'like_count': int_or_none(stats.get('favorited')),
|
||||
@@ -116,9 +134,9 @@ class JamendoIE(InfoExtractor):
|
||||
}
|
||||
|
||||
|
||||
class JamendoAlbumIE(InfoExtractor):
|
||||
class JamendoAlbumIE(JamendoIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?jamendo\.com/album/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'https://www.jamendo.com/album/121486/duck-on-cover',
|
||||
'info_dict': {
|
||||
'id': '121486',
|
||||
@@ -151,17 +169,7 @@ class JamendoAlbumIE(InfoExtractor):
|
||||
'params': {
|
||||
'playlistend': 2
|
||||
}
|
||||
}
|
||||
|
||||
def _call_api(self, resource, resource_id):
|
||||
path = '/api/%ss' % resource
|
||||
rand = compat_str(random.random())
|
||||
return self._download_json(
|
||||
'https://www.jamendo.com' + path, resource_id, query={
|
||||
'id[]': resource_id,
|
||||
}, headers={
|
||||
'X-Jam-Call': '$%s*%s~' % (hashlib.sha1((path + rand).encode()).hexdigest(), rand)
|
||||
})[0]
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
album_id = self._match_id(url)
|
||||
@@ -169,7 +177,7 @@ class JamendoAlbumIE(InfoExtractor):
|
||||
album_name = album.get('name')
|
||||
|
||||
entries = []
|
||||
for track in album.get('tracks', []):
|
||||
for track in (album.get('tracks') or []):
|
||||
track_id = track.get('id')
|
||||
if not track_id:
|
||||
continue
|
||||
|
||||
@@ -101,7 +101,7 @@ class KeezMoviesIE(InfoExtractor):
|
||||
|
||||
if not formats:
|
||||
if 'title="This video is no longer available"' in webpage:
|
||||
raise ExtractorError(
|
||||
self.raise_no_formats(
|
||||
'Video %s is no longer available' % video_id, expected=True)
|
||||
|
||||
try:
|
||||
|
||||
@@ -1,9 +1,15 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
parse_duration,
|
||||
smuggle_url,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
@@ -57,3 +63,141 @@ class LA7IE(InfoExtractor):
|
||||
'thumbnail': self._og_search_thumbnail(webpage, default=None),
|
||||
'ie_key': 'Kaltura',
|
||||
}
|
||||
|
||||
|
||||
class LA7PodcastEpisodeIE(InfoExtractor):
|
||||
IE_NAME = 'la7.it:pod:episode'
|
||||
_VALID_URL = r'''(?x)(https?://)?
|
||||
(?:www\.)?la7\.it/[^/]+/podcast/([^/]+-)?(?P<id>\d+)'''
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.la7.it/voicetown/podcast/la-carezza-delle-memoria-di-carlo-verdone-23-03-2021-371497',
|
||||
'md5': '7737d4d79b3c1a34b3de3e16297119ed',
|
||||
'info_dict': {
|
||||
'id': '371497',
|
||||
'ext': 'mp3',
|
||||
'title': '"La carezza delle memoria" di Carlo Verdone',
|
||||
'description': 'md5:5abf07c3c551a687db80af3f9ceb7d52',
|
||||
'thumbnail': 'https://www.la7.it/sites/default/files/podcast/371497.jpg',
|
||||
'upload_date': '20210323',
|
||||
},
|
||||
}, {
|
||||
# embed url
|
||||
'url': 'https://www.la7.it/embed/podcast/371497',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# date already in the title
|
||||
'url': 'https://www.la7.it/propagandalive/podcast/lintervista-di-diego-bianchi-ad-annalisa-cuzzocrea-puntata-del-1932021-20-03-2021-371130',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# title same as show_title
|
||||
'url': 'https://www.la7.it/otto-e-mezzo/podcast/otto-e-mezzo-26-03-2021-372340',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _extract_info(self, webpage, video_id=None, ppn=None):
|
||||
if not video_id:
|
||||
video_id = self._search_regex(
|
||||
r'data-nid=([\'"])(?P<vid>\d+)\1',
|
||||
webpage, 'video_id', group='vid')
|
||||
|
||||
media_url = self._search_regex(
|
||||
(r'src:\s*([\'"])(?P<url>.+?mp3.+?)\1',
|
||||
r'data-podcast=([\'"])(?P<url>.+?mp3.+?)\1'),
|
||||
webpage, 'media_url', group='url')
|
||||
ext = determine_ext(media_url)
|
||||
formats = [{
|
||||
'url': media_url,
|
||||
'format_id': ext,
|
||||
'ext': ext,
|
||||
}]
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = self._html_search_regex(
|
||||
(r'<div class="title">(?P<title>.+?)</',
|
||||
r'<title>(?P<title>[^<]+)</title>',
|
||||
r'title:\s*([\'"])(?P<title>.+?)\1'),
|
||||
webpage, 'title', group='title')
|
||||
|
||||
description = (
|
||||
self._html_search_regex(
|
||||
(r'<div class="description">(.+?)</div>',
|
||||
r'<div class="description-mobile">(.+?)</div>',
|
||||
r'<div class="box-txt">([^<]+?)</div>',
|
||||
r'<div class="field-content"><p>(.+?)</p></div>'),
|
||||
webpage, 'description', default=None)
|
||||
or self._html_search_meta('description', webpage))
|
||||
|
||||
thumb = self._html_search_regex(
|
||||
(r'<div class="podcast-image"><img src="(.+?)"></div>',
|
||||
r'<div class="container-embed"[^<]+url\((.+?)\);">',
|
||||
r'<div class="field-content"><img src="(.+?)"'),
|
||||
webpage, 'thumbnail', fatal=False, default=None)
|
||||
|
||||
duration = parse_duration(self._html_search_regex(
|
||||
r'<span class="(?:durata|duration)">([\d:]+)</span>',
|
||||
webpage, 'duration', fatal=False, default=None))
|
||||
|
||||
date = self._html_search_regex(
|
||||
r'class="data">\s*(?:<span>)?([\d\.]+)\s*</',
|
||||
webpage, 'date', default=None)
|
||||
|
||||
date_alt = self._search_regex(
|
||||
r'(\d+[\./]\d+[\./]\d+)', title, 'date_alt', default=None)
|
||||
ppn = ppn or self._search_regex(
|
||||
r'ppN:\s*([\'"])(?P<ppn>.+?)\1',
|
||||
webpage, 'ppn', group='ppn', default=None)
|
||||
# if the date is not in the title
|
||||
# and title is the same as the show_title
|
||||
# add the date to the title
|
||||
if date and not date_alt and ppn and ppn.lower() == title.lower():
|
||||
title += ' del %s' % date
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': float_or_none(duration),
|
||||
'formats': formats,
|
||||
'thumbnail': thumb,
|
||||
'upload_date': unified_strdate(date),
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
return self._extract_info(webpage, video_id)
|
||||
|
||||
|
||||
class LA7PodcastIE(LA7PodcastEpisodeIE):
|
||||
IE_NAME = 'la7.it:podcast'
|
||||
_VALID_URL = r'(https?://)?(www\.)?la7\.it/(?P<id>[^/]+)/podcast/?(?:$|[#?])'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.la7.it/propagandalive/podcast',
|
||||
'info_dict': {
|
||||
'id': 'propagandalive',
|
||||
'title': "Propaganda Live",
|
||||
},
|
||||
'playlist_count': 10,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
title = (
|
||||
self._html_search_regex(
|
||||
r'<h1.*?>(.+?)</h1>', webpage, 'title', fatal=False, default=None)
|
||||
or self._og_search_title(webpage))
|
||||
ppn = self._search_regex(
|
||||
r'window\.ppN\s*=\s*([\'"])(?P<ppn>.+?)\1',
|
||||
webpage, 'ppn', group='ppn', default=None)
|
||||
|
||||
entries = []
|
||||
for episode in re.finditer(
|
||||
r'<div class="container-podcast-property">([\s\S]+?)(?:</div>\s*){3}',
|
||||
webpage):
|
||||
entries.append(self._extract_info(episode.group(1), ppn=ppn))
|
||||
|
||||
return self.playlist_result(entries, playlist_id, title)
|
||||
|
||||
@@ -122,6 +122,26 @@ class LBRYIE(LBRYBaseIE):
|
||||
'channel_url': 'https://lbry.tv/@LBRYFoundation:0ed629d2b9c601300cacf7eabe9da0be79010212',
|
||||
'vcodec': 'none',
|
||||
}
|
||||
}, {
|
||||
# HLS
|
||||
'url': 'https://odysee.com/@gardeningincanada:b/plants-i-will-never-grow-again.-the:e',
|
||||
'md5': 'fc82f45ea54915b1495dd7cb5cc1289f',
|
||||
'info_dict': {
|
||||
'id': 'e51671357333fe22ae88aad320bde2f6f96b1410',
|
||||
'ext': 'mp4',
|
||||
'title': 'PLANTS I WILL NEVER GROW AGAIN. THE BLACK LIST PLANTS FOR A CANADIAN GARDEN | Gardening in Canada 🍁',
|
||||
'description': 'md5:9c539c6a03fb843956de61a4d5288d5e',
|
||||
'timestamp': 1618254123,
|
||||
'upload_date': '20210412',
|
||||
'release_timestamp': 1618254002,
|
||||
'release_date': '20210412',
|
||||
'tags': list,
|
||||
'duration': 554,
|
||||
'channel': 'Gardening In Canada',
|
||||
'channel_id': 'b8be0e93b423dad221abe29545fbe8ec36e806bc',
|
||||
'channel_url': 'https://odysee.com/@gardeningincanada:b8be0e93b423dad221abe29545fbe8ec36e806bc',
|
||||
'formats': 'mincount:3',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://odysee.com/@BrodieRobertson:5/apple-is-tracking-everything-you-do-on:e',
|
||||
'only_matching': True,
|
||||
@@ -168,10 +188,18 @@ class LBRYIE(LBRYBaseIE):
|
||||
streaming_url = self._call_api_proxy(
|
||||
'get', claim_id, {'uri': uri}, 'streaming url')['streaming_url']
|
||||
info = self._parse_stream(result, url)
|
||||
urlh = self._request_webpage(
|
||||
streaming_url, display_id, note='Downloading streaming redirect url info')
|
||||
if determine_ext(urlh.geturl()) == 'm3u8':
|
||||
info['formats'] = self._extract_m3u8_formats(
|
||||
urlh.geturl(), display_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls')
|
||||
self._sort_formats(info['formats'])
|
||||
else:
|
||||
info['url'] = streaming_url
|
||||
info.update({
|
||||
'id': claim_id,
|
||||
'title': title,
|
||||
'url': streaming_url,
|
||||
})
|
||||
return info
|
||||
|
||||
|
||||
@@ -4,7 +4,12 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import js_to_json
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
str_or_none,
|
||||
)
|
||||
|
||||
|
||||
class LineTVIE(InfoExtractor):
|
||||
@@ -71,7 +76,7 @@ class LineTVIE(InfoExtractor):
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
if not formats[0].get('width'):
|
||||
if formats and not formats[0].get('width'):
|
||||
formats[0]['vcodec'] = 'none'
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
@@ -88,3 +93,137 @@ class LineTVIE(InfoExtractor):
|
||||
for thumbnail in video_info.get('thumbnails', {}).get('list', [])],
|
||||
'view_count': video_info.get('meta', {}).get('count'),
|
||||
}
|
||||
|
||||
|
||||
class LineLiveBaseIE(InfoExtractor):
|
||||
_API_BASE_URL = 'https://live-api.line-apps.com/web/v4.0/channel/'
|
||||
|
||||
def _parse_broadcast_item(self, item):
|
||||
broadcast_id = compat_str(item['id'])
|
||||
title = item['title']
|
||||
is_live = item.get('isBroadcastingNow')
|
||||
|
||||
thumbnails = []
|
||||
for thumbnail_id, thumbnail_url in (item.get('thumbnailURLs') or {}).items():
|
||||
if not thumbnail_url:
|
||||
continue
|
||||
thumbnails.append({
|
||||
'id': thumbnail_id,
|
||||
'url': thumbnail_url,
|
||||
})
|
||||
|
||||
channel = item.get('channel') or {}
|
||||
channel_id = str_or_none(channel.get('id'))
|
||||
|
||||
return {
|
||||
'id': broadcast_id,
|
||||
'title': self._live_title(title) if is_live else title,
|
||||
'thumbnails': thumbnails,
|
||||
'timestamp': int_or_none(item.get('createdAt')),
|
||||
'channel': channel.get('name'),
|
||||
'channel_id': channel_id,
|
||||
'channel_url': 'https://live.line.me/channels/' + channel_id if channel_id else None,
|
||||
'duration': int_or_none(item.get('archiveDuration')),
|
||||
'view_count': int_or_none(item.get('viewerCount')),
|
||||
'comment_count': int_or_none(item.get('chatCount')),
|
||||
'is_live': is_live,
|
||||
}
|
||||
|
||||
|
||||
class LineLiveIE(LineLiveBaseIE):
|
||||
_VALID_URL = r'https?://live\.line\.me/channels/(?P<channel_id>\d+)/broadcast/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://live.line.me/channels/4867368/broadcast/16331360',
|
||||
'md5': 'bc931f26bf1d4f971e3b0982b3fab4a3',
|
||||
'info_dict': {
|
||||
'id': '16331360',
|
||||
'title': '振りコピ講座😙😙😙',
|
||||
'ext': 'mp4',
|
||||
'timestamp': 1617095132,
|
||||
'upload_date': '20210330',
|
||||
'channel': '白川ゆめか',
|
||||
'channel_id': '4867368',
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'is_live': False,
|
||||
}
|
||||
}, {
|
||||
# archiveStatus == 'DELETED'
|
||||
'url': 'https://live.line.me/channels/4778159/broadcast/16378488',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_id, broadcast_id = re.match(self._VALID_URL, url).groups()
|
||||
broadcast = self._download_json(
|
||||
self._API_BASE_URL + '%s/broadcast/%s' % (channel_id, broadcast_id),
|
||||
broadcast_id)
|
||||
item = broadcast['item']
|
||||
info = self._parse_broadcast_item(item)
|
||||
protocol = 'm3u8' if info['is_live'] else 'm3u8_native'
|
||||
formats = []
|
||||
for k, v in (broadcast.get(('live' if info['is_live'] else 'archived') + 'HLSURLs') or {}).items():
|
||||
if not v:
|
||||
continue
|
||||
if k == 'abr':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
v, broadcast_id, 'mp4', protocol,
|
||||
m3u8_id='hls', fatal=False))
|
||||
continue
|
||||
f = {
|
||||
'ext': 'mp4',
|
||||
'format_id': 'hls-' + k,
|
||||
'protocol': protocol,
|
||||
'url': v,
|
||||
}
|
||||
if not k.isdigit():
|
||||
f['vcodec'] = 'none'
|
||||
formats.append(f)
|
||||
if not formats:
|
||||
archive_status = item.get('archiveStatus')
|
||||
if archive_status != 'ARCHIVED':
|
||||
self.raise_no_formats('this video has been ' + archive_status.lower(), expected=True)
|
||||
self._sort_formats(formats)
|
||||
info['formats'] = formats
|
||||
return info
|
||||
|
||||
|
||||
class LineLiveChannelIE(LineLiveBaseIE):
|
||||
_VALID_URL = r'https?://live\.line\.me/channels/(?P<id>\d+)(?!/broadcast/\d+)(?:[/?&#]|$)'
|
||||
_TEST = {
|
||||
'url': 'https://live.line.me/channels/5893542',
|
||||
'info_dict': {
|
||||
'id': '5893542',
|
||||
'title': 'いくらちゃん',
|
||||
'description': 'md5:c3a4af801f43b2fac0b02294976580be',
|
||||
},
|
||||
'playlist_mincount': 29
|
||||
}
|
||||
|
||||
def _archived_broadcasts_entries(self, archived_broadcasts, channel_id):
|
||||
while True:
|
||||
for row in (archived_broadcasts.get('rows') or []):
|
||||
share_url = str_or_none(row.get('shareURL'))
|
||||
if not share_url:
|
||||
continue
|
||||
info = self._parse_broadcast_item(row)
|
||||
info.update({
|
||||
'_type': 'url',
|
||||
'url': share_url,
|
||||
'ie_key': LineLiveIE.ie_key(),
|
||||
})
|
||||
yield info
|
||||
if not archived_broadcasts.get('hasNextPage'):
|
||||
return
|
||||
archived_broadcasts = self._download_json(
|
||||
self._API_BASE_URL + channel_id + '/archived_broadcasts',
|
||||
channel_id, query={
|
||||
'lastId': info['id'],
|
||||
})
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_id = self._match_id(url)
|
||||
channel = self._download_json(self._API_BASE_URL + channel_id, channel_id)
|
||||
return self.playlist_result(
|
||||
self._archived_broadcasts_entries(channel.get('archivedBroadcasts') or {}, channel_id),
|
||||
channel_id, channel.get('title'), channel.get('information'))
|
||||
|
||||
@@ -331,7 +331,7 @@ class LyndaCourseIE(LyndaBaseIE):
|
||||
})
|
||||
|
||||
if unaccessible_videos > 0:
|
||||
self._downloader.report_warning(
|
||||
self.report_warning(
|
||||
'%s videos are only available for members (or paid members) and will not be downloaded. '
|
||||
% unaccessible_videos + self._ACCOUNT_CREDENTIALS_HINT)
|
||||
|
||||
|
||||
31
yt_dlp/extractor/maoritv.py
Normal file
31
yt_dlp/extractor/maoritv.py
Normal file
@@ -0,0 +1,31 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class MaoriTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?maoritelevision\.com/shows/(?:[^/]+/)+(?P<id>[^/?&#]+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.maoritelevision.com/shows/korero-mai/S01E054/korero-mai-series-1-episode-54',
|
||||
'md5': '5ade8ef53851b6a132c051b1cd858899',
|
||||
'info_dict': {
|
||||
'id': '4774724855001',
|
||||
'ext': 'mp4',
|
||||
'title': 'Kōrero Mai, Series 1 Episode 54',
|
||||
'upload_date': '20160226',
|
||||
'timestamp': 1456455018,
|
||||
'description': 'md5:59bde32fd066d637a1a55794c56d8dcb',
|
||||
'uploader_id': '1614493167001',
|
||||
},
|
||||
}
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1614493167001/HJlhIQhQf_default/index.html?videoId=%s'
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
brightcove_id = self._search_regex(
|
||||
r'data-main-video-id=["\'](\d+)', webpage, 'brightcove id')
|
||||
return self.url_result(
|
||||
self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
|
||||
'BrightcoveNew', brightcove_id)
|
||||
@@ -97,11 +97,11 @@ class MedalTVIE(InfoExtractor):
|
||||
error = clip.get('error')
|
||||
if not formats and error:
|
||||
if error == 404:
|
||||
raise ExtractorError(
|
||||
self.raise_no_formats(
|
||||
'That clip does not exist.',
|
||||
expected=True, video_id=video_id)
|
||||
else:
|
||||
raise ExtractorError(
|
||||
self.raise_no_formats(
|
||||
'An unknown error occurred ({0}).'.format(error),
|
||||
video_id=video_id)
|
||||
|
||||
|
||||
@@ -1,21 +1,20 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
from datetime import datetime
|
||||
import itertools
|
||||
import json
|
||||
import base64
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError, std_headers,
|
||||
std_headers,
|
||||
update_url_query,
|
||||
random_uuidv4,
|
||||
try_get,
|
||||
)
|
||||
from ..compat import (
|
||||
compat_urlparse,
|
||||
compat_urllib_parse_urlencode,
|
||||
compat_str,
|
||||
)
|
||||
|
||||
@@ -47,29 +46,24 @@ class MildomBaseIE(InfoExtractor):
|
||||
|
||||
def _fetch_dispatcher_config(self):
|
||||
if not self._DISPATCHER_CONFIG:
|
||||
try:
|
||||
tmp = self._download_json(
|
||||
'https://disp.mildom.com/serverListV2', 'initialization',
|
||||
note='Downloading dispatcher_config', data=json.dumps({
|
||||
'protover': 0,
|
||||
'data': base64.b64encode(json.dumps({
|
||||
'fr': 'web',
|
||||
'sfr': 'pc',
|
||||
'devi': 'Windows',
|
||||
'la': 'ja',
|
||||
'gid': None,
|
||||
'loc': '',
|
||||
'clu': '',
|
||||
'wh': '1919*810',
|
||||
'rtm': self.iso_timestamp(),
|
||||
'ua': std_headers['User-Agent'],
|
||||
}).encode('utf8')).decode('utf8').replace('\n', ''),
|
||||
}).encode('utf8'))
|
||||
self._DISPATCHER_CONFIG = self._parse_json(base64.b64decode(tmp['data']), 'initialization')
|
||||
except ExtractorError:
|
||||
self._DISPATCHER_CONFIG = self._download_json(
|
||||
'https://bookish-octo-barnacle.vercel.app/api/dispatcher_config', 'initialization',
|
||||
note='Downloading dispatcher_config fallback')
|
||||
tmp = self._download_json(
|
||||
'https://disp.mildom.com/serverListV2', 'initialization',
|
||||
note='Downloading dispatcher_config', data=json.dumps({
|
||||
'protover': 0,
|
||||
'data': base64.b64encode(json.dumps({
|
||||
'fr': 'web',
|
||||
'sfr': 'pc',
|
||||
'devi': 'Windows',
|
||||
'la': 'ja',
|
||||
'gid': None,
|
||||
'loc': '',
|
||||
'clu': '',
|
||||
'wh': '1919*810',
|
||||
'rtm': self.iso_timestamp(),
|
||||
'ua': std_headers['User-Agent'],
|
||||
}).encode('utf8')).decode('utf8').replace('\n', ''),
|
||||
}).encode('utf8'))
|
||||
self._DISPATCHER_CONFIG = self._parse_json(base64.b64decode(tmp['data']), 'initialization')
|
||||
return self._DISPATCHER_CONFIG
|
||||
|
||||
@staticmethod
|
||||
@@ -110,6 +104,7 @@ class MildomIE(MildomBaseIE):
|
||||
enterstudio = self._call_api(
|
||||
'https://cloudac.mildom.com/nonolive/gappserv/live/enterstudio', video_id,
|
||||
note='Downloading live metadata', query={'user_id': video_id})
|
||||
result_video_id = enterstudio.get('log_id', video_id)
|
||||
|
||||
title = try_get(
|
||||
enterstudio, (
|
||||
@@ -128,7 +123,7 @@ class MildomIE(MildomBaseIE):
|
||||
), compat_str)
|
||||
|
||||
servers = self._call_api(
|
||||
'https://cloudac.mildom.com/nonolive/gappserv/live/liveserver', video_id,
|
||||
'https://cloudac.mildom.com/nonolive/gappserv/live/liveserver', result_video_id,
|
||||
note='Downloading live server list', query={
|
||||
'user_id': video_id,
|
||||
'live_server_type': 'hls',
|
||||
@@ -139,24 +134,19 @@ class MildomIE(MildomBaseIE):
|
||||
'is_lhls': '0',
|
||||
})
|
||||
m3u8_url = update_url_query(servers['stream_server'] + '/%s_master.m3u8' % video_id, stream_query)
|
||||
formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', headers={
|
||||
formats = self._extract_m3u8_formats(m3u8_url, result_video_id, 'mp4', headers={
|
||||
'Referer': 'https://www.mildom.com/',
|
||||
'Origin': 'https://www.mildom.com',
|
||||
}, note='Downloading m3u8 information')
|
||||
|
||||
del stream_query['streamReqId'], stream_query['timestamp']
|
||||
for fmt in formats:
|
||||
# Uses https://github.com/nao20010128nao/bookish-octo-barnacle by @nao20010128nao as a proxy
|
||||
parsed = compat_urlparse.urlparse(fmt['url'])
|
||||
parsed = parsed._replace(
|
||||
netloc='bookish-octo-barnacle.vercel.app',
|
||||
query=compat_urllib_parse_urlencode(stream_query, True),
|
||||
path='/api' + parsed.path)
|
||||
fmt['url'] = compat_urlparse.urlunparse(parsed)
|
||||
fmt.setdefault('http_headers', {})['Referer'] = 'https://www.mildom.com/'
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'id': result_video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'uploader': uploader,
|
||||
@@ -172,9 +162,8 @@ class MildomVodIE(MildomBaseIE):
|
||||
_VALID_URL = r'https?://(?:(?:www|m)\.)mildom\.com/playback/(?P<user_id>\d+)/(?P<id>(?P=user_id)-[a-zA-Z0-9]+)'
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
m = self._VALID_URL_RE.match(url)
|
||||
user_id = m.group('user_id')
|
||||
m = re.match(self._VALID_URL, url)
|
||||
user_id, video_id = m.group('user_id'), m.group('id')
|
||||
url = 'https://www.mildom.com/playback/%s/%s' % (user_id, video_id)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
@@ -199,16 +188,16 @@ class MildomVodIE(MildomBaseIE):
|
||||
lambda x: x['author_info']['login_name'],
|
||||
), compat_str)
|
||||
|
||||
audio_formats = [{
|
||||
formats = [{
|
||||
'url': autoplay['audio_url'],
|
||||
'format_id': 'audio',
|
||||
'protocol': 'm3u8_native',
|
||||
'vcodec': 'none',
|
||||
'acodec': 'aac',
|
||||
'ext': 'm4a'
|
||||
}]
|
||||
video_formats = []
|
||||
for fmt in autoplay['video_link']:
|
||||
video_formats.append({
|
||||
formats.append({
|
||||
'format_id': 'video-%s' % fmt['name'],
|
||||
'url': fmt['url'],
|
||||
'protocol': 'm3u8_native',
|
||||
@@ -216,23 +205,9 @@ class MildomVodIE(MildomBaseIE):
|
||||
'height': fmt['level'],
|
||||
'vcodec': 'h264',
|
||||
'acodec': 'aac',
|
||||
'ext': 'mp4'
|
||||
})
|
||||
|
||||
stream_query = self._common_queries({
|
||||
'is_lhls': '0',
|
||||
})
|
||||
del stream_query['timestamp']
|
||||
formats = audio_formats + video_formats
|
||||
for fmt in formats:
|
||||
fmt['ext'] = 'mp4'
|
||||
parsed = compat_urlparse.urlparse(fmt['url'])
|
||||
stream_query['path'] = parsed.path[5:]
|
||||
parsed = parsed._replace(
|
||||
netloc='bookish-octo-barnacle.vercel.app',
|
||||
query=compat_urllib_parse_urlencode(stream_query, True),
|
||||
path='/api/vod2/proxy')
|
||||
fmt['url'] = compat_urlparse.urlunparse(parsed)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
@@ -258,16 +233,7 @@ class MildomUserVodIE(MildomBaseIE):
|
||||
'playlist_mincount': 351,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
user_id = self._match_id(url)
|
||||
|
||||
self._downloader.report_warning('To download ongoing live, please use "https://www.mildom.com/%s" instead. This will list up VODs belonging to user.' % user_id)
|
||||
|
||||
profile = self._call_api(
|
||||
'https://cloudac.mildom.com/nonolive/gappserv/user/profileV2', user_id,
|
||||
query={'user_id': user_id}, note='Downloading user profile')['user_info']
|
||||
|
||||
results = []
|
||||
def _entries(self, user_id):
|
||||
for page in itertools.count(1):
|
||||
reply = self._call_api(
|
||||
'https://cloudac.mildom.com/nonolive/videocontent/profile/playbackList',
|
||||
@@ -278,7 +244,16 @@ class MildomUserVodIE(MildomBaseIE):
|
||||
})
|
||||
if not reply:
|
||||
break
|
||||
results.extend('https://www.mildom.com/playback/%s/%s' % (user_id, x['v_id']) for x in reply)
|
||||
return self.playlist_result([
|
||||
self.url_result(u, ie=MildomVodIE.ie_key()) for u in results
|
||||
], user_id, 'Uploads from %s' % profile['loginname'])
|
||||
for x in reply:
|
||||
yield self.url_result('https://www.mildom.com/playback/%s/%s' % (user_id, x['v_id']))
|
||||
|
||||
def _real_extract(self, url):
|
||||
user_id = self._match_id(url)
|
||||
self.to_screen('This will download all VODs belonging to user. To download ongoing live video, use "https://www.mildom.com/%s" instead' % user_id)
|
||||
|
||||
profile = self._call_api(
|
||||
'https://cloudac.mildom.com/nonolive/gappserv/user/profileV2', user_id,
|
||||
query={'user_id': user_id}, note='Downloading user profile')['user_info']
|
||||
|
||||
return self.playlist_result(
|
||||
self._entries(user_id), user_id, 'Uploads from %s' % profile['loginname'])
|
||||
|
||||
@@ -157,7 +157,7 @@ class MixcloudIE(MixcloudBaseIE):
|
||||
})
|
||||
|
||||
if not formats and cloudcast.get('isExclusive'):
|
||||
self.raise_login_required()
|
||||
self.raise_login_required(metadata_available=True)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
|
||||
@@ -275,7 +275,9 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
||||
|
||||
@staticmethod
|
||||
def _extract_child_with_type(parent, t):
|
||||
return next(c for c in parent['children'] if c.get('type') == t)
|
||||
for c in parent['children']:
|
||||
if c.get('type') == t:
|
||||
return c
|
||||
|
||||
def _extract_mgid(self, webpage):
|
||||
try:
|
||||
@@ -306,7 +308,8 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
||||
data = self._parse_json(self._search_regex(
|
||||
r'__DATA__\s*=\s*({.+?});', webpage, 'data'), None)
|
||||
main_container = self._extract_child_with_type(data, 'MainContainer')
|
||||
video_player = self._extract_child_with_type(main_container, 'VideoPlayer')
|
||||
ab_testing = self._extract_child_with_type(main_container, 'ABTesting')
|
||||
video_player = self._extract_child_with_type(ab_testing or main_container, 'VideoPlayer')
|
||||
mgid = video_player['props']['media']['video']['config']['uri']
|
||||
|
||||
return mgid
|
||||
|
||||
197
yt_dlp/extractor/nebula.py
Normal file
197
yt_dlp/extractor/nebula.py
Normal file
@@ -0,0 +1,197 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
parse_iso8601,
|
||||
try_get,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class NebulaIE(InfoExtractor):
|
||||
|
||||
_VALID_URL = r'https?://(?:www\.)?watchnebula\.com/videos/(?P<id>[-\w]+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://watchnebula.com/videos/that-time-disney-remade-beauty-and-the-beast',
|
||||
'md5': 'fe79c4df8b3aa2fea98a93d027465c7e',
|
||||
'info_dict': {
|
||||
'id': '5c271b40b13fd613090034fd',
|
||||
'ext': 'mp4',
|
||||
'title': 'That Time Disney Remade Beauty and the Beast',
|
||||
'description': 'Note: this video was originally posted on YouTube with the sponsor read included. We weren’t able to remove it without reducing video quality, so it’s presented here in its original context.',
|
||||
'upload_date': '20180731',
|
||||
'timestamp': 1533009600,
|
||||
'channel': 'Lindsay Ellis',
|
||||
'uploader': 'Lindsay Ellis',
|
||||
},
|
||||
'params': {
|
||||
'usenetrc': True,
|
||||
},
|
||||
'skip': 'All Nebula content requires authentication',
|
||||
},
|
||||
{
|
||||
'url': 'https://watchnebula.com/videos/the-logistics-of-d-day-landing-craft-how-the-allies-got-ashore',
|
||||
'md5': '6d4edd14ce65720fa63aba5c583fb328',
|
||||
'info_dict': {
|
||||
'id': '5e7e78171aaf320001fbd6be',
|
||||
'ext': 'mp4',
|
||||
'title': 'Landing Craft - How The Allies Got Ashore',
|
||||
'description': r're:^In this episode we explore the unsung heroes of D-Day, the landing craft.',
|
||||
'upload_date': '20200327',
|
||||
'timestamp': 1585348140,
|
||||
'channel': 'The Logistics of D-Day',
|
||||
'uploader': 'The Logistics of D-Day',
|
||||
},
|
||||
'params': {
|
||||
'usenetrc': True,
|
||||
},
|
||||
'skip': 'All Nebula content requires authentication',
|
||||
},
|
||||
{
|
||||
'url': 'https://watchnebula.com/videos/money-episode-1-the-draw',
|
||||
'md5': '8c7d272910eea320f6f8e6d3084eecf5',
|
||||
'info_dict': {
|
||||
'id': '5e779ebdd157bc0001d1c75a',
|
||||
'ext': 'mp4',
|
||||
'title': 'Episode 1: The Draw',
|
||||
'description': r'contains:There’s free money on offer… if the players can all work together.',
|
||||
'upload_date': '20200323',
|
||||
'timestamp': 1584980400,
|
||||
'channel': 'Tom Scott Presents: Money',
|
||||
'uploader': 'Tom Scott Presents: Money',
|
||||
},
|
||||
'params': {
|
||||
'usenetrc': True,
|
||||
},
|
||||
'skip': 'All Nebula content requires authentication',
|
||||
},
|
||||
]
|
||||
_NETRC_MACHINE = 'watchnebula'
|
||||
|
||||
def _retrieve_nebula_auth(self, video_id):
|
||||
"""
|
||||
Log in to Nebula, and returns a Nebula API token
|
||||
"""
|
||||
|
||||
username, password = self._get_login_info()
|
||||
if not (username and password):
|
||||
self.raise_login_required()
|
||||
|
||||
self.report_login()
|
||||
data = json.dumps({'email': username, 'password': password}).encode('utf8')
|
||||
response = self._download_json(
|
||||
'https://api.watchnebula.com/api/v1/auth/login/',
|
||||
data=data, fatal=False, video_id=video_id,
|
||||
headers={
|
||||
'content-type': 'application/json',
|
||||
# Submitting the 'sessionid' cookie always causes a 403 on auth endpoint
|
||||
'cookie': ''
|
||||
},
|
||||
note='Authenticating to Nebula with supplied credentials',
|
||||
errnote='Authentication failed or rejected')
|
||||
if not response or not response.get('key'):
|
||||
self.raise_login_required()
|
||||
return response['key']
|
||||
|
||||
def _retrieve_zype_api_key(self, page_url, display_id):
|
||||
"""
|
||||
Retrieves the Zype API key
|
||||
"""
|
||||
|
||||
# Find the js that has the API key from the webpage and download it
|
||||
webpage = self._download_webpage(page_url, video_id=display_id)
|
||||
main_script_relpath = self._search_regex(
|
||||
r'<script[^>]*src="(?P<script_relpath>[^"]*main.[0-9a-f]*.chunk.js)"[^>]*>', webpage,
|
||||
group='script_relpath', name='script relative path', fatal=True)
|
||||
main_script_abspath = urljoin(page_url, main_script_relpath)
|
||||
main_script = self._download_webpage(main_script_abspath, video_id=display_id,
|
||||
note='Retrieving Zype API key')
|
||||
|
||||
api_key = self._search_regex(
|
||||
r'REACT_APP_ZYPE_API_KEY\s*:\s*"(?P<api_key>[\w-]*)"', main_script,
|
||||
group='api_key', name='API key', fatal=True)
|
||||
|
||||
return api_key
|
||||
|
||||
def _call_zype_api(self, path, params, video_id, api_key, note):
|
||||
"""
|
||||
A helper for making calls to the Zype API.
|
||||
"""
|
||||
query = {'api_key': api_key, 'per_page': 1}
|
||||
query.update(params)
|
||||
return self._download_json('https://api.zype.com' + path, video_id, query=query, note=note)
|
||||
|
||||
def _call_nebula_api(self, path, video_id, access_token, note):
|
||||
"""
|
||||
A helper for making calls to the Nebula API.
|
||||
"""
|
||||
return self._download_json('https://api.watchnebula.com/api/v1' + path, video_id, headers={
|
||||
'Authorization': 'Token {access_token}'.format(access_token=access_token)
|
||||
}, note=note)
|
||||
|
||||
def _fetch_zype_access_token(self, video_id, nebula_token):
|
||||
user_object = self._call_nebula_api('/auth/user/', video_id, nebula_token, note='Retrieving Zype access token')
|
||||
access_token = try_get(user_object, lambda x: x['zype_auth_info']['access_token'], compat_str)
|
||||
if not access_token:
|
||||
if try_get(user_object, lambda x: x['is_subscribed'], bool):
|
||||
# TODO: Reimplement the same Zype token polling the Nebula frontend implements
|
||||
# see https://github.com/ytdl-org/youtube-dl/pull/24805#issuecomment-749231532
|
||||
raise ExtractorError(
|
||||
'Unable to extract Zype access token from Nebula API authentication endpoint. '
|
||||
'Open an arbitrary video in a browser with this account to generate a token',
|
||||
expected=True)
|
||||
raise ExtractorError('Unable to extract Zype access token from Nebula API authentication endpoint')
|
||||
return access_token
|
||||
|
||||
def _extract_channel_title(self, video_meta):
|
||||
# TODO: Implement the API calls giving us the channel list,
|
||||
# so that we can do the title lookup and then figure out the channel URL
|
||||
categories = video_meta.get('categories', []) if video_meta else []
|
||||
# the channel name is the value of the first category
|
||||
for category in categories:
|
||||
if category.get('value'):
|
||||
return category['value'][0]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
nebula_token = self._retrieve_nebula_auth(display_id)
|
||||
api_key = self._retrieve_zype_api_key(url, display_id)
|
||||
|
||||
response = self._call_zype_api('/videos', {'friendly_title': display_id},
|
||||
display_id, api_key, note='Retrieving metadata from Zype')
|
||||
if len(response.get('response') or []) != 1:
|
||||
raise ExtractorError('Unable to find video on Zype API')
|
||||
video_meta = response['response'][0]
|
||||
|
||||
video_id = video_meta['_id']
|
||||
zype_access_token = self._fetch_zype_access_token(display_id, nebula_token=nebula_token)
|
||||
|
||||
channel_title = self._extract_channel_title(video_meta)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'Zype',
|
||||
'url': 'https://player.zype.com/embed/%s.html?access_token=%s' % (video_id, zype_access_token),
|
||||
'title': video_meta.get('title'),
|
||||
'description': video_meta.get('description'),
|
||||
'timestamp': parse_iso8601(video_meta.get('published_at')),
|
||||
'thumbnails': [
|
||||
{
|
||||
'id': tn.get('name'), # this appears to be null
|
||||
'url': tn['url'],
|
||||
'width': tn.get('width'),
|
||||
'height': tn.get('height'),
|
||||
} for tn in video_meta.get('thumbnails', [])],
|
||||
'duration': video_meta.get('duration'),
|
||||
'channel': channel_title,
|
||||
'uploader': channel_title, # we chose uploader = channel name
|
||||
# TODO: uploader_url, channel_id, channel_url
|
||||
}
|
||||
144
yt_dlp/extractor/nfhsnetwork.py
Normal file
144
yt_dlp/extractor/nfhsnetwork.py
Normal file
@@ -0,0 +1,144 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
from ..utils import (
|
||||
try_get,
|
||||
unified_strdate,
|
||||
unified_timestamp
|
||||
)
|
||||
|
||||
|
||||
class NFHSNetworkIE(InfoExtractor):
|
||||
IE_NAME = 'NFHSNetwork'
|
||||
_VALID_URL = r'https?://(?:www\.)?nfhsnetwork\.com/events/[\w-]+/(?P<id>(?:gam|evt|dd|)?[\w\d]{0,10})'
|
||||
_TESTS = [{
|
||||
# Auto-generated two-team sport (pixellot)
|
||||
'url': 'https://www.nfhsnetwork.com/events/rockford-high-school-rockford-mi/gamcf7e54cfbc',
|
||||
'info_dict': {
|
||||
'id': 'gamcf7e54cfbc',
|
||||
'ext': 'mp4',
|
||||
'title': 'Rockford vs Spring Lake - Girls Varsity Lacrosse 03/27/2021',
|
||||
'uploader': 'MHSAA - Michigan: Rockford High School, Rockford, MI',
|
||||
'uploader_id': 'cd2622cf76',
|
||||
'uploader_url': 'https://www.nfhsnetwork.com/schools/rockford-high-school-rockford-mi',
|
||||
'location': 'Rockford, Michigan',
|
||||
'timestamp': 1616859000,
|
||||
'upload_date': '20210327'
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
# Non-sport activity with description
|
||||
'url': 'https://www.nfhsnetwork.com/events/limon-high-school-limon-co/evt4a30e3726c',
|
||||
'info_dict': {
|
||||
'id': 'evt4a30e3726c',
|
||||
'ext': 'mp4',
|
||||
'title': 'Drama Performance Limon High School vs. Limon High School - 12/13/2020',
|
||||
'description': 'Join the broadcast of the Limon High School Musical Performance at 2 PM.',
|
||||
'uploader': 'CHSAA: Limon High School, Limon, CO',
|
||||
'uploader_id': '7d2d121332',
|
||||
'uploader_url': 'https://www.nfhsnetwork.com/schools/limon-high-school-limon-co',
|
||||
'location': 'Limon, Colorado',
|
||||
'timestamp': 1607893200,
|
||||
'upload_date': '20201213'
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
# Postseason game
|
||||
'url': 'https://www.nfhsnetwork.com/events/nfhs-network-special-events/dd8de71d45',
|
||||
'info_dict': {
|
||||
'id': 'dd8de71d45',
|
||||
'ext': 'mp4',
|
||||
'title': '2015 UA Holiday Classic Tournament: National Division - 12/26/2015',
|
||||
'uploader': 'SoCal Sports Productions',
|
||||
'uploader_id': '063dba0150',
|
||||
'uploader_url': 'https://www.nfhsnetwork.com/affiliates/socal-sports-productions',
|
||||
'location': 'San Diego, California',
|
||||
'timestamp': 1451187000,
|
||||
'upload_date': '20151226'
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
# Video with no broadcasts object
|
||||
'url': 'https://www.nfhsnetwork.com/events/wiaa-wi/9aa2f92f82',
|
||||
'info_dict': {
|
||||
'id': '9aa2f92f82',
|
||||
'ext': 'mp4',
|
||||
'title': 'Competitive Equity - 01/21/2015',
|
||||
'description': 'Committee members discuss points of their research regarding a competitive equity plan',
|
||||
'uploader': 'WIAA - Wisconsin: Wisconsin Interscholastic Athletic Association',
|
||||
'uploader_id': 'a49f7d1002',
|
||||
'uploader_url': 'https://www.nfhsnetwork.com/associations/wiaa-wi',
|
||||
'location': 'Stevens Point, Wisconsin',
|
||||
'timestamp': 1421856000,
|
||||
'upload_date': '20150121'
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
data = self._download_json(
|
||||
'https://cfunity.nfhsnetwork.com/v2/game_or_event/' + video_id,
|
||||
video_id)
|
||||
publisher = data.get('publishers')[0] # always exists
|
||||
broadcast = (publisher.get('broadcasts') or publisher.get('vods'))[0] # some (older) videos don't have a broadcasts object
|
||||
uploader = publisher.get('formatted_name') or publisher.get('name')
|
||||
uploaderID = publisher.get('publisher_key')
|
||||
pubType = publisher.get('type')
|
||||
uploaderPrefix = (
|
||||
"schools" if pubType == "school"
|
||||
else "associations" if "association" in pubType
|
||||
else "affiliates" if (pubType == "publisher" or pubType == "affiliate")
|
||||
else "schools")
|
||||
uploaderPage = 'https://www.nfhsnetwork.com/%s/%s' % (uploaderPrefix, publisher.get('slug'))
|
||||
location = '%s, %s' % (data.get('city'), data.get('state_name'))
|
||||
description = broadcast.get('description')
|
||||
isLive = broadcast.get('on_air') or broadcast.get('status') == 'on_air' or False
|
||||
|
||||
timestamp = unified_timestamp(data.get('local_start_time'))
|
||||
upload_date = unified_strdate(data.get('local_start_time'))
|
||||
|
||||
title = (
|
||||
self._og_search_title(webpage)
|
||||
or self._html_search_regex(r'<h1 class="sr-hidden">(.*?)</h1>', webpage, 'title'))
|
||||
title = title.split('|')[0].strip()
|
||||
|
||||
video_type = 'broadcasts' if isLive else 'vods'
|
||||
key = broadcast.get('key') if isLive else try_get(publisher, lambda x: x['vods'][0]['key'])
|
||||
m3u8_url = self._download_json(
|
||||
'https://cfunity.nfhsnetwork.com/v2/%s/%s/url' % (video_type, key),
|
||||
video_id).get('video_url')
|
||||
|
||||
formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', live=isLive)
|
||||
self._sort_formats(formats, ['res', 'tbr'])
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'description': description,
|
||||
'timestamp': timestamp,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploaderID,
|
||||
'uploader_url': uploaderPage,
|
||||
'location': location,
|
||||
'upload_date': upload_date,
|
||||
'is_live': isLive
|
||||
}
|
||||
@@ -190,7 +190,7 @@ class NiconicoIE(InfoExtractor):
|
||||
if compat_parse_qs(parts.query).get('message', [None])[0] == 'cant_login':
|
||||
login_ok = False
|
||||
if not login_ok:
|
||||
self._downloader.report_warning('unable to log in: bad username or password')
|
||||
self.report_warning('unable to log in: bad username or password')
|
||||
return login_ok
|
||||
|
||||
def _get_heartbeat_info(self, info_dict):
|
||||
|
||||
@@ -11,60 +11,100 @@ from ..utils import (
|
||||
determine_ext,
|
||||
)
|
||||
import re
|
||||
import random
|
||||
|
||||
|
||||
class NitterIE(InfoExtractor):
|
||||
# Taken from https://github.com/zedeus/nitter/wiki/Instances
|
||||
INSTANCES = ('nitter.net',
|
||||
'nitter.snopyta.org',
|
||||
'nitter.42l.fr',
|
||||
'nitter.nixnet.services',
|
||||
'nitter.13ad.de',
|
||||
'nitter.pussthecat.org',
|
||||
'nitter.mastodont.cat',
|
||||
'nitter.dark.fail',
|
||||
'nitter.tedomum.net',
|
||||
'nitter.cattube.org',
|
||||
'nitter.fdn.fr',
|
||||
'nitter.1d4.us',
|
||||
'nitter.kavin.rocks',
|
||||
'tweet.lambda.dance',
|
||||
'nitter.cc',
|
||||
'nitter.weaponizedhumiliation.com',
|
||||
'3nzoldnxplag42gqjs23xvghtzf6t6yzssrtytnntc6ppc7xxuoneoad.onion',
|
||||
'nitter.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd.onion',
|
||||
'nitterlgj3n5fgwesu3vxc5h67ruku33nqaoeoocae2mvlzhsu6k7fqd.onion')
|
||||
|
||||
NON_HTTP_INSTANCES = (
|
||||
'3nzoldnxplag42gqjs23xvghtzf6t6yzssrtytnntc6ppc7xxuoneoad.onion',
|
||||
'nitter.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd.onion',
|
||||
'nitter7bryz3jv7e3uekphigvmoyoem4al3fynerxkj22dmoxoq553qd.onion',
|
||||
'npf37k3mtzwxreiw52ccs5ay4e6qt2fkcs2ndieurdyn2cuzzsfyfvid.onion',
|
||||
'nitter.v6vgyqpa7yefkorazmg5d5fimstmvm2vtbirt6676mt7qmllrcnwycqd.onion',
|
||||
'i23nv6w3juvzlw32xzoxcqzktegd4i4fu3nmnc2ewv4ggiu4ledwklad.onion',
|
||||
'26oq3gioiwcmfojub37nz5gzbkdiqp7fue5kvye7d4txv4ny6fb4wwid.onion',
|
||||
|
||||
'nitter.i2p',
|
||||
'u6ikd6zndl3c4dsdq4mmujpntgeevdk5qzkfb57r4tnfeccrn2qa.b32.i2p',
|
||||
|
||||
'nitterlgj3n5fgwesu3vxc5h67ruku33nqaoeoocae2mvlzhsu6k7fqd.onion',
|
||||
)
|
||||
|
||||
HTTP_INSTANCES = (
|
||||
'nitter.42l.fr',
|
||||
'nitter.pussthecat.org',
|
||||
'nitter.nixnet.services',
|
||||
'nitter.mastodont.cat',
|
||||
'nitter.tedomum.net',
|
||||
'nitter.fdn.fr',
|
||||
'nitter.1d4.us',
|
||||
'nitter.kavin.rocks',
|
||||
'tweet.lambda.dance',
|
||||
'nitter.cc',
|
||||
'nitter.vxempire.xyz',
|
||||
'nitter.unixfox.eu',
|
||||
'nitter.domain.glass',
|
||||
'nitter.himiko.cloud',
|
||||
'nitter.eu',
|
||||
'nitter.namazso.eu',
|
||||
'nitter.mailstation.de',
|
||||
'nitter.actionsack.com',
|
||||
'nitter.cattube.org',
|
||||
'nitter.dark.fail',
|
||||
'birdsite.xanny.family',
|
||||
'nitter.40two.app',
|
||||
'nitter.skrep.in',
|
||||
|
||||
# not in the list anymore
|
||||
'nitter.snopyta.org',
|
||||
)
|
||||
|
||||
DEAD_INSTANCES = (
|
||||
# maintenance
|
||||
'nitter.ethibox.fr',
|
||||
|
||||
# official, rate limited
|
||||
'nitter.net',
|
||||
# offline
|
||||
'nitter.13ad.de',
|
||||
'nitter.weaponizedhumiliation.com',
|
||||
)
|
||||
|
||||
INSTANCES = NON_HTTP_INSTANCES + HTTP_INSTANCES + DEAD_INSTANCES
|
||||
|
||||
_INSTANCES_RE = '(?:' + '|'.join([re.escape(instance) for instance in INSTANCES]) + ')'
|
||||
_VALID_URL = r'https?://%(instance)s/(?P<uploader_id>.+)/status/(?P<id>[0-9]+)(#.)?' % {'instance': _INSTANCES_RE}
|
||||
current_instance = INSTANCES[0] # the test and official instance
|
||||
current_instance = random.choice(HTTP_INSTANCES)
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
# GIF (wrapped in mp4)
|
||||
'url': 'https://' + current_instance + '/firefox/status/1314279897502629888#m',
|
||||
'url': 'https://%s/firefox/status/1314279897502629888#m' % current_instance,
|
||||
'info_dict': {
|
||||
'id': '1314279897502629888',
|
||||
'ext': 'mp4',
|
||||
'title': 'Firefox 🔥 - You know the old saying, if you see something say something. Now you actually can with the YouTube regrets extension. Report harmful YouTube recommendations so others can avoid watching them. ➡️ https://mzl.la/3iFIiyg #UnfckTheInternet',
|
||||
'description': 'You know the old saying, if you see something say something. Now you actually can with the YouTube regrets extension. Report harmful YouTube recommendations so others can avoid watching them. ➡️ https://mzl.la/3iFIiyg #UnfckTheInternet',
|
||||
'title': 'Firefox 🔥 - You know the old saying, if you see something say something. Now you actually can with the YouTube regrets extension. \n\nReport harmful YouTube recommendations so others can avoid watching them. ➡️ https://mzl.la/3iFIiyg\n\n#UnfckTheInternet',
|
||||
'description': 'You know the old saying, if you see something say something. Now you actually can with the YouTube regrets extension. \n\nReport harmful YouTube recommendations so others can avoid watching them. ➡️ https://mzl.la/3iFIiyg\n\n#UnfckTheInternet',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'Firefox 🔥',
|
||||
'uploader_id': 'firefox',
|
||||
'uploader_url': 'https://' + current_instance + '/firefox',
|
||||
'uploader_url': 'https://%s/firefox' % current_instance,
|
||||
'upload_date': '20201008',
|
||||
'timestamp': 1602183720,
|
||||
},
|
||||
}, { # normal video
|
||||
'url': 'https://' + current_instance + '/Le___Doc/status/1299715685392756737#m',
|
||||
'url': 'https://%s/Le___Doc/status/1299715685392756737#m' % current_instance,
|
||||
'info_dict': {
|
||||
'id': '1299715685392756737',
|
||||
'ext': 'mp4',
|
||||
'title': 'Le Doc - "Je ne prédis jamais rien" D Raoult, Août 2020...',
|
||||
'description': '"Je ne prédis jamais rien" D Raoult, Août 2020...',
|
||||
'title': 'Le Doc - "Je ne prédis jamais rien"\nD Raoult, Août 2020...',
|
||||
'description': '"Je ne prédis jamais rien"\nD Raoult, Août 2020...',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'Le Doc',
|
||||
'uploader_id': 'Le___Doc',
|
||||
'uploader_url': 'https://' + current_instance + '/Le___Doc',
|
||||
'uploader_url': 'https://%s/Le___Doc' % current_instance,
|
||||
'upload_date': '20200829',
|
||||
'timestamp': 1598711341,
|
||||
'view_count': int,
|
||||
@@ -73,31 +113,51 @@ class NitterIE(InfoExtractor):
|
||||
'comment_count': int,
|
||||
},
|
||||
}, { # video embed in a "Streaming Political Ads" box
|
||||
'url': 'https://' + current_instance + '/mozilla/status/1321147074491092994#m',
|
||||
'url': 'https://%s/mozilla/status/1321147074491092994#m' % current_instance,
|
||||
'info_dict': {
|
||||
'id': '1321147074491092994',
|
||||
'ext': 'mp4',
|
||||
'title': "Mozilla - Are you being targeted with weird, ominous or just plain annoying political ads while streaming your favorite shows? This isn't a real political ad, but if you're watching streaming TV in the U.S., chances are you've seen quite a few. Learn more ➡️ https://mzl.la/StreamingAds",
|
||||
'description': "Are you being targeted with weird, ominous or just plain annoying political ads while streaming your favorite shows? This isn't a real political ad, but if you're watching streaming TV in the U.S., chances are you've seen quite a few. Learn more ➡️ https://mzl.la/StreamingAds",
|
||||
'title': "Mozilla - Are you being targeted with weird, ominous or just plain annoying political ads while streaming your favorite shows?\n\nThis isn't a real political ad, but if you're watching streaming TV in the U.S., chances are you've seen quite a few. \n\nLearn more ➡️ https://mzl.la/StreamingAds",
|
||||
'description': "Are you being targeted with weird, ominous or just plain annoying political ads while streaming your favorite shows?\n\nThis isn't a real political ad, but if you're watching streaming TV in the U.S., chances are you've seen quite a few. \n\nLearn more ➡️ https://mzl.la/StreamingAds",
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'Mozilla',
|
||||
'uploader_id': 'mozilla',
|
||||
'uploader_url': 'https://' + current_instance + '/mozilla',
|
||||
'uploader_url': 'https://%s/mozilla' % current_instance,
|
||||
'upload_date': '20201027',
|
||||
'timestamp': 1603820982
|
||||
},
|
||||
},
|
||||
}, { # not the first tweet but main-tweet
|
||||
'url': 'https://%s/TheNaturalNu/status/1379050895539724290#m' % current_instance,
|
||||
'info_dict': {
|
||||
'id': '1379050895539724290',
|
||||
'ext': 'mp4',
|
||||
'title': 'Dorothy Zbornak - This had me hollering!!',
|
||||
'description': 'This had me hollering!!',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'Dorothy Zbornak',
|
||||
'uploader_id': 'TheNaturalNu',
|
||||
'uploader_url': 'https://%s/TheNaturalNu' % current_instance,
|
||||
'timestamp': 1617626329,
|
||||
'upload_date': '20210405'
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
parsed_url = compat_urlparse.urlparse(url)
|
||||
base_url = parsed_url.scheme + '://' + parsed_url.netloc
|
||||
base_url = '%s://%s' % (parsed_url.scheme, parsed_url.netloc)
|
||||
|
||||
self._set_cookie(parsed_url.netloc, 'hlsPlayback', 'on')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
full_webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_url = base_url + self._html_search_regex(r'(?:<video[^>]+data-url|<source[^>]+src)="([^"]+)"', webpage, 'video url')
|
||||
main_tweet_start = full_webpage.find('class="main-tweet"')
|
||||
if main_tweet_start > 0:
|
||||
webpage = full_webpage[main_tweet_start:]
|
||||
if not webpage:
|
||||
webpage = full_webpage
|
||||
|
||||
video_url = '%s%s' % (base_url, self._html_search_regex(r'(?:<video[^>]+data-url|<source[^>]+src)="([^"]+)"', webpage, 'video url'))
|
||||
ext = determine_ext(video_url)
|
||||
|
||||
if ext == 'unknown_video':
|
||||
@@ -108,33 +168,34 @@ class NitterIE(InfoExtractor):
|
||||
'ext': ext
|
||||
}]
|
||||
|
||||
title = (
|
||||
self._og_search_description(webpage).replace('\n', ' ')
|
||||
or self._html_search_regex(r'<div class="tweet-content[^>]+>([^<]+)</div>', webpage, 'title'))
|
||||
title = self._og_search_description(full_webpage)
|
||||
if not title:
|
||||
title = self._html_search_regex(r'<div class="tweet-content[^>]+>([^<]+)</div>', webpage, 'title')
|
||||
description = title
|
||||
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
uploader_id = (
|
||||
mobj.group('uploader_id')
|
||||
or self._html_search_regex(r'<a class="fullname"[^>]+title="([^"]+)"', webpage, 'uploader name', fatal=False))
|
||||
or self._html_search_regex(r'<a class="fullname"[^>]+title="([^"]+)"', webpage, 'uploader name', fatal=False)
|
||||
)
|
||||
|
||||
if uploader_id:
|
||||
uploader_url = base_url + '/' + uploader_id
|
||||
uploader_url = '%s/%s' % (base_url, uploader_id)
|
||||
|
||||
uploader = self._html_search_regex(r'<a class="fullname"[^>]+title="([^"]+)"', webpage, 'uploader name', fatal=False)
|
||||
|
||||
if uploader:
|
||||
title = uploader + ' - ' + title
|
||||
title = '%s - %s' % (uploader, title)
|
||||
|
||||
view_count = parse_count(self._html_search_regex(r'<span[^>]+class="icon-play[^>]*></span>\s([^<]+)</div>', webpage, 'view count', fatal=False))
|
||||
like_count = parse_count(self._html_search_regex(r'<span[^>]+class="icon-heart[^>]*></span>\s([^<]+)</div>', webpage, 'like count', fatal=False))
|
||||
repost_count = parse_count(self._html_search_regex(r'<span[^>]+class="icon-retweet[^>]*></span>\s([^<]+)</div>', webpage, 'repost count', fatal=False))
|
||||
comment_count = parse_count(self._html_search_regex(r'<span[^>]+class="icon-comment[^>]*></span>\s([^<]+)</div>', webpage, 'repost count', fatal=False))
|
||||
|
||||
thumbnail = base_url + (self._html_search_meta('og:image', webpage, 'thumbnail url')
|
||||
or self._html_search_regex(r'<video[^>]+poster="([^"]+)"', webpage, 'thumbnail url', fatal=False))
|
||||
|
||||
thumbnail = remove_end(thumbnail, '%3Asmall') # if parsed with regex, it should contain this
|
||||
thumbnail = self._html_search_meta('og:image', full_webpage, 'thumbnail url')
|
||||
if not thumbnail:
|
||||
thumbnail = '%s%s' % (base_url, self._html_search_regex(r'<video[^>]+poster="([^"]+)"', webpage, 'thumbnail url', fatal=False))
|
||||
thumbnail = remove_end(thumbnail, '%3Asmall')
|
||||
|
||||
thumbnails = []
|
||||
thumbnail_ids = ('thumb', 'small', 'large', 'medium', 'orig')
|
||||
|
||||
@@ -247,7 +247,7 @@ class NPOIE(NPOBaseIE):
|
||||
|
||||
if not formats:
|
||||
if not self._downloader.params.get('allow_unplayable_formats') and drm:
|
||||
raise ExtractorError('This video is DRM protected.', expected=True)
|
||||
self.raise_no_formats('This video is DRM protected.', expected=True)
|
||||
return
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
@@ -260,7 +260,7 @@ class OdnoklassnikiIE(InfoExtractor):
|
||||
if not formats:
|
||||
payment_info = metadata.get('paymentInfo')
|
||||
if payment_info:
|
||||
raise ExtractorError('This video is paid, subscribe to download it', expected=True)
|
||||
self.raise_no_formats('This video is paid, subscribe to download it', expected=True)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
|
||||
@@ -10,7 +10,6 @@ from ..compat import (
|
||||
)
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
try_get,
|
||||
@@ -85,7 +84,7 @@ class OoyalaBaseIE(InfoExtractor):
|
||||
'fps': float_or_none(stream.get('framerate')),
|
||||
})
|
||||
if not formats and not auth_data.get('authorized'):
|
||||
raise ExtractorError('%s said: %s' % (
|
||||
self.raise_no_formats('%s said: %s' % (
|
||||
self.IE_NAME, auth_data['message']), expected=True)
|
||||
self._sort_formats(formats)
|
||||
|
||||
|
||||
148
yt_dlp/extractor/palcomp3.py
Normal file
148
yt_dlp/extractor/palcomp3.py
Normal file
@@ -0,0 +1,148 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class PalcoMP3BaseIE(InfoExtractor):
|
||||
_GQL_QUERY_TMPL = '''{
|
||||
artist(slug: "%s") {
|
||||
%s
|
||||
}
|
||||
}'''
|
||||
_ARTIST_FIELDS_TMPL = '''music(slug: "%%s") {
|
||||
%s
|
||||
}'''
|
||||
_MUSIC_FIELDS = '''duration
|
||||
hls
|
||||
mp3File
|
||||
musicID
|
||||
plays
|
||||
title'''
|
||||
|
||||
def _call_api(self, artist_slug, artist_fields):
|
||||
return self._download_json(
|
||||
'https://www.palcomp3.com.br/graphql/', artist_slug, query={
|
||||
'query': self._GQL_QUERY_TMPL % (artist_slug, artist_fields),
|
||||
})['data']
|
||||
|
||||
def _parse_music(self, music):
|
||||
music_id = compat_str(music['musicID'])
|
||||
title = music['title']
|
||||
|
||||
formats = []
|
||||
hls_url = music.get('hls')
|
||||
if hls_url:
|
||||
formats.append({
|
||||
'url': hls_url,
|
||||
'protocol': 'm3u8_native',
|
||||
'ext': 'mp4',
|
||||
})
|
||||
mp3_file = music.get('mp3File')
|
||||
if mp3_file:
|
||||
formats.append({
|
||||
'url': mp3_file,
|
||||
})
|
||||
|
||||
return {
|
||||
'id': music_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'duration': int_or_none(music.get('duration')),
|
||||
'view_count': int_or_none(music.get('plays')),
|
||||
}
|
||||
|
||||
def _real_initialize(self):
|
||||
self._ARTIST_FIELDS_TMPL = self._ARTIST_FIELDS_TMPL % self._MUSIC_FIELDS
|
||||
|
||||
def _real_extract(self, url):
|
||||
artist_slug, music_slug = re.match(self._VALID_URL, url).groups()
|
||||
artist_fields = self._ARTIST_FIELDS_TMPL % music_slug
|
||||
music = self._call_api(artist_slug, artist_fields)['artist']['music']
|
||||
return self._parse_music(music)
|
||||
|
||||
|
||||
class PalcoMP3IE(PalcoMP3BaseIE):
|
||||
IE_NAME = 'PalcoMP3:song'
|
||||
_VALID_URL = r'https?://(?:www\.)?palcomp3\.com(?:\.br)?/(?P<artist>[^/]+)/(?P<id>[^/?&#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.palcomp3.com/maiaraemaraisaoficial/nossas-composicoes-cuida-bem-dela/',
|
||||
'md5': '99fd6405b2d8fd589670f6db1ba3b358',
|
||||
'info_dict': {
|
||||
'id': '3162927',
|
||||
'ext': 'mp3',
|
||||
'title': 'Nossas Composições - CUIDA BEM DELA',
|
||||
'duration': 210,
|
||||
'view_count': int,
|
||||
}
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if PalcoMP3VideoIE.suitable(url) else super(PalcoMP3IE, cls).suitable(url)
|
||||
|
||||
|
||||
class PalcoMP3ArtistIE(PalcoMP3BaseIE):
|
||||
IE_NAME = 'PalcoMP3:artist'
|
||||
_VALID_URL = r'https?://(?:www\.)?palcomp3\.com(?:\.br)?/(?P<id>[^/?&#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.palcomp3.com.br/condedoforro/',
|
||||
'info_dict': {
|
||||
'id': '358396',
|
||||
'title': 'Conde do Forró',
|
||||
},
|
||||
'playlist_mincount': 188,
|
||||
}]
|
||||
_ARTIST_FIELDS_TMPL = '''artistID
|
||||
musics {
|
||||
nodes {
|
||||
%s
|
||||
}
|
||||
}
|
||||
name'''
|
||||
|
||||
@ classmethod
|
||||
def suitable(cls, url):
|
||||
return False if re.match(PalcoMP3IE._VALID_URL, url) else super(PalcoMP3ArtistIE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
artist_slug = self._match_id(url)
|
||||
artist = self._call_api(artist_slug, self._ARTIST_FIELDS_TMPL)['artist']
|
||||
|
||||
def entries():
|
||||
for music in (try_get(artist, lambda x: x['musics']['nodes'], list) or []):
|
||||
yield self._parse_music(music)
|
||||
|
||||
return self.playlist_result(
|
||||
entries(), str_or_none(artist.get('artistID')), artist.get('name'))
|
||||
|
||||
|
||||
class PalcoMP3VideoIE(PalcoMP3BaseIE):
|
||||
IE_NAME = 'PalcoMP3:video'
|
||||
_VALID_URL = r'https?://(?:www\.)?palcomp3\.com(?:\.br)?/(?P<artist>[^/]+)/(?P<id>[^/?&#]+)/?#clipe'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.palcomp3.com/maiaraemaraisaoficial/maiara-e-maraisa-voce-faz-falta-aqui-ao-vivo-em-vicosa-mg/#clipe',
|
||||
'add_ie': ['Youtube'],
|
||||
'info_dict': {
|
||||
'id': '_pD1nR2qqPg',
|
||||
'ext': 'mp4',
|
||||
'title': 'Maiara e Maraisa - Você Faz Falta Aqui - DVD Ao Vivo Em Campo Grande',
|
||||
'description': 'md5:7043342c09a224598e93546e98e49282',
|
||||
'upload_date': '20161107',
|
||||
'uploader_id': 'maiaramaraisaoficial',
|
||||
'uploader': 'Maiara e Maraisa',
|
||||
}
|
||||
}]
|
||||
_MUSIC_FIELDS = 'youtubeID'
|
||||
|
||||
def _parse_music(self, music):
|
||||
youtube_id = music['youtubeID']
|
||||
return self.url_result(youtube_id, 'Youtube', youtube_id)
|
||||
@@ -79,7 +79,7 @@ class PhilharmonieDeParisIE(InfoExtractor):
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
if not formats:
|
||||
if not formats and not self._downloader.params.get('ignore_no_formats'):
|
||||
return
|
||||
self._sort_formats(formats)
|
||||
return {
|
||||
|
||||
@@ -1,22 +1,15 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
js_to_json,
|
||||
try_get,
|
||||
update_url_query,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class PicartoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www.)?picarto\.tv/(?P<id>[a-zA-Z0-9]+)(?:/(?P<token>[a-zA-Z0-9]+))?'
|
||||
_VALID_URL = r'https?://(?:www.)?picarto\.tv/(?P<id>[a-zA-Z0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'https://picarto.tv/Setz',
|
||||
'info_dict': {
|
||||
@@ -34,65 +27,46 @@ class PicartoIE(InfoExtractor):
|
||||
return False if PicartoVodIE.suitable(url) else super(PicartoIE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
channel_id = mobj.group('id')
|
||||
channel_id = self._match_id(url)
|
||||
|
||||
metadata = self._download_json(
|
||||
'https://api.picarto.tv/v1/channel/name/' + channel_id,
|
||||
channel_id)
|
||||
data = self._download_json(
|
||||
'https://ptvintern.picarto.tv/ptvapi', channel_id, query={
|
||||
'query': '''{
|
||||
channel(name: "%s") {
|
||||
adult
|
||||
id
|
||||
online
|
||||
stream_name
|
||||
title
|
||||
}
|
||||
getLoadBalancerUrl(channel_name: "%s") {
|
||||
url
|
||||
}
|
||||
}''' % (channel_id, channel_id),
|
||||
})['data']
|
||||
metadata = data['channel']
|
||||
|
||||
if metadata.get('online') is False:
|
||||
if metadata.get('online') == 0:
|
||||
raise ExtractorError('Stream is offline', expected=True)
|
||||
title = metadata['title']
|
||||
|
||||
cdn_data = self._download_json(
|
||||
'https://picarto.tv/process/channel', channel_id,
|
||||
data=urlencode_postdata({'loadbalancinginfo': channel_id}),
|
||||
note='Downloading load balancing info')
|
||||
data['getLoadBalancerUrl']['url'] + '/stream/json_' + metadata['stream_name'] + '.js',
|
||||
channel_id, 'Downloading load balancing info')
|
||||
|
||||
token = mobj.group('token') or 'public'
|
||||
params = {
|
||||
'con': int(time.time() * 1000),
|
||||
'token': token,
|
||||
}
|
||||
|
||||
prefered_edge = cdn_data.get('preferedEdge')
|
||||
formats = []
|
||||
|
||||
for edge in cdn_data['edges']:
|
||||
edge_ep = edge.get('ep')
|
||||
if not edge_ep or not isinstance(edge_ep, compat_str):
|
||||
for source in (cdn_data.get('source') or []):
|
||||
source_url = source.get('url')
|
||||
if not source_url:
|
||||
continue
|
||||
edge_id = edge.get('id')
|
||||
for tech in cdn_data['techs']:
|
||||
tech_label = tech.get('label')
|
||||
tech_type = tech.get('type')
|
||||
preference = 0
|
||||
if edge_id == prefered_edge:
|
||||
preference += 1
|
||||
format_id = []
|
||||
if edge_id:
|
||||
format_id.append(edge_id)
|
||||
if tech_type == 'application/x-mpegurl' or tech_label == 'HLS':
|
||||
format_id.append('hls')
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
update_url_query(
|
||||
'https://%s/hls/%s/index.m3u8'
|
||||
% (edge_ep, channel_id), params),
|
||||
channel_id, 'mp4', quality=preference,
|
||||
m3u8_id='-'.join(format_id), fatal=False))
|
||||
continue
|
||||
elif tech_type == 'video/mp4' or tech_label == 'MP4':
|
||||
format_id.append('mp4')
|
||||
formats.append({
|
||||
'url': update_url_query(
|
||||
'https://%s/mp4/%s.mp4' % (edge_ep, channel_id),
|
||||
params),
|
||||
'format_id': '-'.join(format_id),
|
||||
'quality': preference,
|
||||
})
|
||||
else:
|
||||
# rtmp format does not seem to work
|
||||
continue
|
||||
source_type = source.get('type')
|
||||
if source_type == 'html5/application/vnd.apple.mpegurl':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
source_url, channel_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
elif source_type == 'html5/video/mp4':
|
||||
formats.append({
|
||||
'url': source_url,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
mature = metadata.get('adult')
|
||||
@@ -103,10 +77,10 @@ class PicartoIE(InfoExtractor):
|
||||
|
||||
return {
|
||||
'id': channel_id,
|
||||
'title': self._live_title(metadata.get('title') or channel_id),
|
||||
'title': self._live_title(title.strip()),
|
||||
'is_live': True,
|
||||
'thumbnail': try_get(metadata, lambda x: x['thumbnails']['web']),
|
||||
'channel': channel_id,
|
||||
'channel_id': metadata.get('id'),
|
||||
'channel_url': 'https://picarto.tv/%s' % channel_id,
|
||||
'age_limit': age_limit,
|
||||
'formats': formats,
|
||||
|
||||
@@ -393,7 +393,7 @@ query viewClip {
|
||||
# To somewhat reduce the probability of these consequences
|
||||
# we will sleep random amount of time before each call to ViewClip.
|
||||
self._sleep(
|
||||
random.randint(2, 5), display_id,
|
||||
random.randint(5, 10), display_id,
|
||||
'%(video_id)s: Waiting for %(timeout)s seconds to avoid throttling')
|
||||
|
||||
if not viewclip:
|
||||
|
||||
@@ -398,6 +398,16 @@ class PornHubIE(PornHubBaseIE):
|
||||
formats = []
|
||||
|
||||
def add_format(format_url, height=None):
|
||||
ext = determine_ext(format_url)
|
||||
if ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
format_url, video_id, mpd_id='dash', fatal=False))
|
||||
return
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
return
|
||||
tbr = None
|
||||
mobj = re.search(r'(?P<height>\d+)[pP]?_(?P<tbr>\d+)[kK]', format_url)
|
||||
if mobj:
|
||||
@@ -417,16 +427,6 @@ class PornHubIE(PornHubBaseIE):
|
||||
r'/(\d{6}/\d{2})/', video_url, 'upload data', default=None)
|
||||
if upload_date:
|
||||
upload_date = upload_date.replace('/', '')
|
||||
ext = determine_ext(video_url)
|
||||
if ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
video_url, video_id, mpd_id='dash', fatal=False))
|
||||
continue
|
||||
elif ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
video_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
continue
|
||||
if '/video/get_media' in video_url:
|
||||
medias = self._download_json(video_url, video_id, fatal=False)
|
||||
if isinstance(medias, list):
|
||||
|
||||
@@ -5,15 +5,16 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urlparse,
|
||||
compat_str,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
find_xpath_attr,
|
||||
fix_xml_ampersands,
|
||||
GeoRestrictedError,
|
||||
HEADRequest,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
remove_start,
|
||||
@@ -94,7 +95,9 @@ class RaiBaseIE(InfoExtractor):
|
||||
})
|
||||
|
||||
if not formats and geoprotection is True:
|
||||
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
|
||||
self.raise_geo_restricted(countries=self._GEO_COUNTRIES, metadata_available=True)
|
||||
|
||||
formats.extend(self._create_http_urls(relinker_url, formats))
|
||||
|
||||
return dict((k, v) for k, v in {
|
||||
'is_live': is_live,
|
||||
@@ -102,6 +105,92 @@ class RaiBaseIE(InfoExtractor):
|
||||
'formats': formats,
|
||||
}.items() if v is not None)
|
||||
|
||||
def _create_http_urls(self, relinker_url, fmts):
|
||||
_RELINKER_REG = r'https?://(?P<host>[^/]+?)/(?:i/)?(?P<extra>[^/]+?)/(?P<path>.+?)/(?P<id>\d+)(?:_(?P<quality>[\d\,]+))?(?:\.mp4|/playlist\.m3u8).+?'
|
||||
_MP4_TMPL = '%s&overrideUserAgentRule=mp4-%s'
|
||||
_QUALITY = {
|
||||
# tbr: w, h
|
||||
'250': [352, 198],
|
||||
'400': [512, 288],
|
||||
'700': [512, 288],
|
||||
'800': [700, 394],
|
||||
'1200': [736, 414],
|
||||
'1800': [1024, 576],
|
||||
'2400': [1280, 720],
|
||||
'3200': [1440, 810],
|
||||
'3600': [1440, 810],
|
||||
'5000': [1920, 1080],
|
||||
'10000': [1920, 1080],
|
||||
}
|
||||
|
||||
def test_url(url):
|
||||
resp = self._request_webpage(
|
||||
HEADRequest(url), None, headers={'User-Agent': 'Rai'},
|
||||
fatal=False, errnote=False, note=False)
|
||||
|
||||
if resp is False:
|
||||
return False
|
||||
|
||||
if resp.code == 200:
|
||||
return False if resp.url == url else resp.url
|
||||
return None
|
||||
|
||||
def get_format_info(tbr):
|
||||
import math
|
||||
br = int_or_none(tbr)
|
||||
if len(fmts) == 1 and not br:
|
||||
br = fmts[0].get('tbr')
|
||||
if br > 300:
|
||||
tbr = compat_str(math.floor(br / 100) * 100)
|
||||
else:
|
||||
tbr = '250'
|
||||
|
||||
# try extracting info from available m3u8 formats
|
||||
format_copy = None
|
||||
for f in fmts:
|
||||
if f.get('tbr'):
|
||||
br_limit = math.floor(br / 100)
|
||||
if br_limit - 1 <= math.floor(f['tbr'] / 100) <= br_limit + 1:
|
||||
format_copy = f.copy()
|
||||
return {
|
||||
'width': format_copy.get('width'),
|
||||
'height': format_copy.get('height'),
|
||||
'tbr': format_copy.get('tbr'),
|
||||
'vcodec': format_copy.get('vcodec'),
|
||||
'acodec': format_copy.get('acodec'),
|
||||
'fps': format_copy.get('fps'),
|
||||
'format_id': 'https-%s' % tbr,
|
||||
} if format_copy else {
|
||||
'width': _QUALITY[tbr][0],
|
||||
'height': _QUALITY[tbr][1],
|
||||
'format_id': 'https-%s' % tbr,
|
||||
'tbr': int(tbr),
|
||||
}
|
||||
|
||||
loc = test_url(_MP4_TMPL % (relinker_url, '*'))
|
||||
if not isinstance(loc, compat_str):
|
||||
return []
|
||||
|
||||
mobj = re.match(
|
||||
_RELINKER_REG,
|
||||
test_url(relinker_url) or '')
|
||||
if not mobj:
|
||||
return []
|
||||
|
||||
available_qualities = mobj.group('quality').split(',') if mobj.group('quality') else ['*']
|
||||
available_qualities = [i for i in available_qualities if i]
|
||||
|
||||
formats = []
|
||||
for q in available_qualities:
|
||||
fmt = {
|
||||
'url': _MP4_TMPL % (relinker_url, q),
|
||||
'protocol': 'https',
|
||||
'ext': 'mp4',
|
||||
}
|
||||
fmt.update(get_format_info(q))
|
||||
formats.append(fmt)
|
||||
return formats
|
||||
|
||||
@staticmethod
|
||||
def _extract_subtitles(url, video_data):
|
||||
STL_EXT = 'stl'
|
||||
@@ -151,6 +240,22 @@ class RaiPlayIE(RaiBaseIE):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# 1080p direct mp4 url
|
||||
'url': 'https://www.raiplay.it/video/2021/03/Leonardo-S1E1-b5703b02-82ee-475a-85b6-c9e4a8adf642.html',
|
||||
'md5': '2e501e8651d72f05ffe8f5d286ad560b',
|
||||
'info_dict': {
|
||||
'id': 'b5703b02-82ee-475a-85b6-c9e4a8adf642',
|
||||
'ext': 'mp4',
|
||||
'title': 'Leonardo - S1E1',
|
||||
'alt_title': 'St 1 Ep 1 - Episodio 1',
|
||||
'description': 'md5:f5360cd267d2de146e4e3879a5a47d31',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'Rai 1',
|
||||
'duration': 3229,
|
||||
'series': 'Leonardo',
|
||||
'season': 'Season 1',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.raiplay.it/video/2016/11/gazebotraindesi-efebe701-969c-4593-92f3-285f0d1ce750.html?',
|
||||
'only_matching': True,
|
||||
@@ -318,7 +423,7 @@ class RaiIE(RaiBaseIE):
|
||||
}, {
|
||||
# with ContentItem in og:url
|
||||
'url': 'http://www.rai.it/dl/RaiTV/programmi/media/ContentItem-efb17665-691c-45d5-a60c-5301333cbb0c.html',
|
||||
'md5': '6865dd00cf0bbf5772fdd89d59bd768a',
|
||||
'md5': '06345bd97c932f19ffb129973d07a020',
|
||||
'info_dict': {
|
||||
'id': 'efb17665-691c-45d5-a60c-5301333cbb0c',
|
||||
'ext': 'mp4',
|
||||
@@ -350,22 +455,6 @@ class RaiIE(RaiBaseIE):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# ContentItem in iframe (see #12652) and subtitle at 'subtitlesUrl' key
|
||||
'url': 'http://www.presadiretta.rai.it/dl/portali/site/puntata/ContentItem-3ed19d13-26c2-46ff-a551-b10828262f1b.html',
|
||||
'info_dict': {
|
||||
'id': '1ad6dc64-444a-42a4-9bea-e5419ad2f5fd',
|
||||
'ext': 'mp4',
|
||||
'title': 'Partiti acchiappavoti - Presa diretta del 13/09/2015',
|
||||
'description': 'md5:d291b03407ec505f95f27970c0b025f4',
|
||||
'upload_date': '20150913',
|
||||
'subtitles': {
|
||||
'it': 'count:2',
|
||||
},
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# Direct MMS URL
|
||||
'url': 'http://www.rai.it/dl/RaiTV/programmi/media/ContentItem-b63a4089-ac28-48cf-bca5-9f5b5bc46df5.html',
|
||||
|
||||
@@ -202,7 +202,7 @@ class RuutuIE(InfoExtractor):
|
||||
if not formats:
|
||||
if (not self._downloader.params.get('allow_unplayable_formats')
|
||||
and xpath_text(video_xml, './Clip/DRM', default=None)):
|
||||
raise ExtractorError('This video is DRM protected.', expected=True)
|
||||
self.raise_no_formats('This video is DRM protected.', expected=True)
|
||||
ns_st_cds = pv('ns_st_cds')
|
||||
if ns_st_cds != 'free':
|
||||
raise ExtractorError('This video is %s.' % ns_st_cds, expected=True)
|
||||
|
||||
@@ -10,7 +10,7 @@ from ..utils import (
|
||||
|
||||
class SBSIE(InfoExtractor):
|
||||
IE_DESC = 'sbs.com.au'
|
||||
_VALID_URL = r'https?://(?:www\.)?sbs\.com\.au/(?:ondemand(?:/video/(?:single/)?|.*?\bplay=)|news/(?:embeds/)?video/)(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?sbs\.com\.au/(?:ondemand(?:/video/(?:single/)?|.*?\bplay=|/watch/)|news/(?:embeds/)?video/)(?P<id>[0-9]+)'
|
||||
|
||||
_TESTS = [{
|
||||
# Original URL is handled by the generic IE which finds the iframe:
|
||||
@@ -43,6 +43,9 @@ class SBSIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.sbs.com.au/news/embeds/video/1840778819866',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.sbs.com.au/ondemand/watch/1698704451971',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -2,12 +2,18 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import js_to_json
|
||||
from ..utils import (
|
||||
get_element_by_class,
|
||||
int_or_none,
|
||||
remove_start,
|
||||
strip_or_none,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class ScreencastOMaticIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://screencast-o-matic\.com/watch/(?P<id>[0-9a-zA-Z]+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://screencast-o-matic\.com/(?:(?:watch|player)/|embed\?.*?\bsc=)(?P<id>[0-9a-zA-Z]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://screencast-o-matic.com/watch/c2lD3BeOPl',
|
||||
'md5': '483583cb80d92588f15ccbedd90f0c18',
|
||||
'info_dict': {
|
||||
@@ -16,22 +22,30 @@ class ScreencastOMaticIE(InfoExtractor):
|
||||
'title': 'Welcome to 3-4 Philosophy @ DECV!',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'description': 'as the title says! also: some general info re 1) VCE philosophy and 2) distance learning.',
|
||||
'duration': 369.163,
|
||||
'duration': 369,
|
||||
'upload_date': '20141216',
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'http://screencast-o-matic.com/player/c2lD3BeOPl',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://screencast-o-matic.com/embed?ff=true&sc=cbV2r4Q5TL&fromPH=true&a=1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
jwplayer_data = self._parse_json(
|
||||
self._search_regex(
|
||||
r"(?s)jwplayer\('mp4Player'\).setup\((\{.*?\})\);", webpage, 'setup code'),
|
||||
video_id, transform_source=js_to_json)
|
||||
|
||||
info_dict = self._parse_jwplayer_data(jwplayer_data, video_id, require_title=False)
|
||||
info_dict.update({
|
||||
'title': self._og_search_title(webpage),
|
||||
'description': self._og_search_description(webpage),
|
||||
webpage = self._download_webpage(
|
||||
'https://screencast-o-matic.com/player/' + video_id, video_id)
|
||||
info = self._parse_html5_media_entries(url, webpage, video_id)[0]
|
||||
info.update({
|
||||
'id': video_id,
|
||||
'title': get_element_by_class('overlayTitle', webpage),
|
||||
'description': strip_or_none(get_element_by_class('overlayDescription', webpage)) or None,
|
||||
'duration': int_or_none(self._search_regex(
|
||||
r'player\.duration\s*=\s*function\(\)\s*{\s*return\s+(\d+);\s*};',
|
||||
webpage, 'duration', default=None)),
|
||||
'upload_date': unified_strdate(remove_start(
|
||||
get_element_by_class('overlayPublished', webpage), 'Published: ')),
|
||||
})
|
||||
return info_dict
|
||||
return info
|
||||
|
||||
@@ -312,7 +312,7 @@ class SoundcloudIE(InfoExtractor):
|
||||
self._update_client_id()
|
||||
continue
|
||||
elif non_fatal:
|
||||
self._downloader.report_warning(error_to_compat_str(e))
|
||||
self.report_warning(error_to_compat_str(e))
|
||||
return False
|
||||
raise
|
||||
|
||||
@@ -498,7 +498,7 @@ class SoundcloudIE(InfoExtractor):
|
||||
f['vcodec'] = 'none'
|
||||
|
||||
if not formats and info.get('policy') == 'BLOCK':
|
||||
self.raise_geo_restricted()
|
||||
self.raise_geo_restricted(metadata_available=True)
|
||||
self._sort_formats(formats)
|
||||
|
||||
user = info.get('user') or {}
|
||||
|
||||
@@ -77,7 +77,7 @@ class SportDeutschlandIE(InfoExtractor):
|
||||
continue
|
||||
formats = self._extract_m3u8_formats(
|
||||
video_url.replace('.smil', '.m3u8'), video_id, 'mp4', fatal=False)
|
||||
if not formats:
|
||||
if not formats and not self._downloader.params.get('ignore_no_formats'):
|
||||
continue
|
||||
yield {
|
||||
'id': video_id,
|
||||
|
||||
@@ -139,7 +139,7 @@ class SteamIE(InfoExtractor):
|
||||
'format_id': ext + quality,
|
||||
'url': video_url,
|
||||
})
|
||||
if not formats:
|
||||
if not formats and not self._downloader.params.get('ignore_no_formats'):
|
||||
continue
|
||||
entry['formats'] = formats
|
||||
entries.append(entry)
|
||||
|
||||
@@ -49,7 +49,7 @@ class SVTBaseIE(InfoExtractor):
|
||||
if not formats and rights.get('geoBlockedSweden'):
|
||||
self.raise_geo_restricted(
|
||||
'This video is only available in Sweden',
|
||||
countries=self._GEO_COUNTRIES)
|
||||
countries=self._GEO_COUNTRIES, metadata_available=True)
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
|
||||
@@ -7,7 +7,6 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
@@ -156,10 +155,9 @@ class ToggleIE(InfoExtractor):
|
||||
for meta in (info.get('Metas') or []):
|
||||
if (not self._downloader.params.get('allow_unplayable_formats')
|
||||
and meta.get('Key') == 'Encryption' and meta.get('Value') == '1'):
|
||||
raise ExtractorError(
|
||||
self.raise_no_formats(
|
||||
'This video is DRM protected.', expected=True)
|
||||
# Most likely because geo-blocked
|
||||
raise ExtractorError('No downloadable videos found', expected=True)
|
||||
# Most likely because geo-blocked if no formats and no DRM
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnails = []
|
||||
|
||||
@@ -7,13 +7,19 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
sanitized_Request,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class TubiTvIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?tubitv\.com/(?:video|movies|tv-shows)/(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:
|
||||
tubitv:|
|
||||
https?://(?:www\.)?tubitv\.com/(?:video|movies|tv-shows)/
|
||||
)
|
||||
(?P<id>[0-9]+)'''
|
||||
_LOGIN_URL = 'http://tubitv.com/login'
|
||||
_NETRC_MACHINE = 'tubitv'
|
||||
_GEO_COUNTRIES = ['US']
|
||||
@@ -108,3 +114,28 @@ class TubiTvIE(InfoExtractor):
|
||||
'uploader_id': video_data.get('publisher_id'),
|
||||
'release_year': int_or_none(video_data.get('year')),
|
||||
}
|
||||
|
||||
|
||||
class TubiTvShowIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?tubitv\.com/series/[0-9]+/(?P<show_name>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://tubitv.com/series/3936/the-joy-of-painting-with-bob-ross?start=true',
|
||||
'playlist_mincount': 390,
|
||||
'info_dict': {
|
||||
'id': 'the-joy-of-painting-with-bob-ross',
|
||||
}
|
||||
}]
|
||||
|
||||
def _entries(self, show_url, show_name):
|
||||
show_webpage = self._download_webpage(show_url, show_name)
|
||||
show_json = self._parse_json(self._search_regex(
|
||||
r"window\.__data\s*=\s*({.+?});\s*</script>",
|
||||
show_webpage, 'data',), show_name, transform_source=js_to_json)['video']
|
||||
for episode_id in show_json['fullContentById'].keys():
|
||||
yield self.url_result(
|
||||
'tubitv:%s' % episode_id,
|
||||
ie=TubiTvIE.ie_key(), video_id=episode_id)
|
||||
|
||||
def _real_extract(self, url):
|
||||
show_name = re.match(self._VALID_URL, url).group('show_name')
|
||||
return self.playlist_result(self._entries(url, show_name), playlist_id=show_name)
|
||||
|
||||
@@ -103,7 +103,7 @@ class TV2IE(InfoExtractor):
|
||||
'filesize': int_or_none(item.get('fileSize')),
|
||||
})
|
||||
if not formats and data.get('drmProtected'):
|
||||
raise ExtractorError('This video is DRM protected.', expected=True)
|
||||
self.raise_no_formats('This video is DRM protected.', expected=True)
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnails = [{
|
||||
|
||||
@@ -107,7 +107,7 @@ class TV4IE(InfoExtractor):
|
||||
video_id, ism_id='mss', fatal=False))
|
||||
|
||||
if not formats and info.get('is_geo_restricted'):
|
||||
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
|
||||
self.raise_geo_restricted(countries=self._GEO_COUNTRIES, metadata_available=True)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
|
||||
@@ -298,7 +298,8 @@ class TVPlayIE(InfoExtractor):
|
||||
|
||||
if not formats and video.get('is_geo_blocked'):
|
||||
self.raise_geo_restricted(
|
||||
'This content might not be available in your country due to copyright reasons')
|
||||
'This content might not be available in your country due to copyright reasons',
|
||||
metadata_available=True)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
|
||||
@@ -65,7 +65,9 @@ class TwitCastingIE(InfoExtractor):
|
||||
request_data = urlencode_postdata({
|
||||
'password': video_password,
|
||||
})
|
||||
webpage = self._download_webpage(url, video_id, data=request_data)
|
||||
webpage = self._download_webpage(
|
||||
url, video_id, data=request_data,
|
||||
headers={'Origin': 'https://twitcasting.tv'})
|
||||
|
||||
title = clean_html(get_element_by_id(
|
||||
'movietitle', webpage)) or self._html_search_meta(
|
||||
@@ -77,14 +79,15 @@ class TwitCastingIE(InfoExtractor):
|
||||
webpage, 'm3u8 url', group='url', default=None)
|
||||
if not m3u8_url:
|
||||
video_js_data = self._parse_json(self._search_regex(
|
||||
r"data-movie-playlist='(\[[^']+\])'",
|
||||
webpage, 'movie playlist'), video_id)[0]
|
||||
r'data-movie-playlist=(["\'])(?P<url>(?:(?!\1).)+)',
|
||||
webpage, 'movie playlist', group='url'), video_id)
|
||||
if isinstance(video_js_data, dict):
|
||||
video_js_data = list(video_js_data.values())[0]
|
||||
video_js_data = video_js_data[0]
|
||||
m3u8_url = video_js_data['source']['url']
|
||||
|
||||
# use `m3u8` entry_protocol until EXT-X-MAP is properly supported by `m3u8_native` entry_protocol
|
||||
formats = self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', m3u8_id='hls')
|
||||
|
||||
m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls')
|
||||
thumbnail = video_js_data.get('thumbnailUrl') or self._og_search_thumbnail(webpage)
|
||||
description = clean_html(get_element_by_id(
|
||||
'authorcomment', webpage)) or self._html_search_meta(
|
||||
|
||||
@@ -10,7 +10,6 @@ from ..compat import (
|
||||
compat_urllib_parse_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
@@ -193,8 +192,8 @@ class VideomoreIE(InfoExtractor):
|
||||
error = item.get('error')
|
||||
if error:
|
||||
if error in ('Данное видео недоступно для просмотра на территории этой страны', 'Данное видео доступно для просмотра только на территории России'):
|
||||
self.raise_geo_restricted(countries=['RU'])
|
||||
raise ExtractorError(error, expected=True)
|
||||
self.raise_geo_restricted(countries=['RU'], metadata_available=True)
|
||||
self.raise_no_formats(error, expected=True)
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
|
||||
@@ -24,6 +24,7 @@ from ..utils import (
|
||||
merge_dicts,
|
||||
OnDemandPagedList,
|
||||
parse_filesize,
|
||||
parse_iso8601,
|
||||
RegexNotFoundError,
|
||||
sanitized_Request,
|
||||
smuggle_url,
|
||||
@@ -74,25 +75,28 @@ class VimeoBaseInfoExtractor(InfoExtractor):
|
||||
expected=True)
|
||||
raise ExtractorError('Unable to log in')
|
||||
|
||||
def _verify_video_password(self, url, video_id, webpage):
|
||||
def _get_video_password(self):
|
||||
password = self._downloader.params.get('videopassword')
|
||||
if password is None:
|
||||
raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True)
|
||||
token, vuid = self._extract_xsrft_and_vuid(webpage)
|
||||
data = urlencode_postdata({
|
||||
'password': password,
|
||||
'token': token,
|
||||
})
|
||||
raise ExtractorError(
|
||||
'This video is protected by a password, use the --video-password option',
|
||||
expected=True)
|
||||
return password
|
||||
|
||||
def _verify_video_password(self, url, video_id, password, token, vuid):
|
||||
if url.startswith('http://'):
|
||||
# vimeo only supports https now, but the user can give an http url
|
||||
url = url.replace('http://', 'https://')
|
||||
password_request = sanitized_Request(url + '/password', data)
|
||||
password_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
password_request.add_header('Referer', url)
|
||||
self._set_vimeo_cookie('vuid', vuid)
|
||||
return self._download_webpage(
|
||||
password_request, video_id,
|
||||
'Verifying the password', 'Wrong password')
|
||||
url + '/password', video_id, 'Verifying the password',
|
||||
'Wrong password', data=urlencode_postdata({
|
||||
'password': password,
|
||||
'token': token,
|
||||
}), headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
'Referer': url,
|
||||
})
|
||||
|
||||
def _extract_xsrft_and_vuid(self, webpage):
|
||||
xsrft = self._search_regex(
|
||||
@@ -273,7 +277,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
)?
|
||||
(?:videos?/)?
|
||||
(?P<id>[0-9]+)
|
||||
(?:/[\da-f]+)?
|
||||
(?:/(?P<unlisted_hash>[\da-f]{10}))?
|
||||
/?(?:[?&].*)?(?:[#].*)?$
|
||||
'''
|
||||
IE_NAME = 'vimeo'
|
||||
@@ -326,9 +330,9 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'id': '54469442',
|
||||
'ext': 'mp4',
|
||||
'title': 'Kathy Sierra: Building the minimum Badass User, Business of Software 2012',
|
||||
'uploader': 'The BLN & Business of Software',
|
||||
'uploader_url': r're:https?://(?:www\.)?vimeo\.com/theblnbusinessofsoftware',
|
||||
'uploader_id': 'theblnbusinessofsoftware',
|
||||
'uploader': 'Business of Software',
|
||||
'uploader_url': r're:https?://(?:www\.)?vimeo\.com/businessofsoftware',
|
||||
'uploader_id': 'businessofsoftware',
|
||||
'duration': 3610,
|
||||
'description': None,
|
||||
},
|
||||
@@ -463,6 +467,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['Unable to download JSON metadata'],
|
||||
'skip': 'this page is no longer available.',
|
||||
},
|
||||
{
|
||||
'url': 'http://player.vimeo.com/video/68375962',
|
||||
@@ -563,9 +568,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
return urls[0] if urls else None
|
||||
|
||||
def _verify_player_video_password(self, url, video_id, headers):
|
||||
password = self._downloader.params.get('videopassword')
|
||||
if password is None:
|
||||
raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True)
|
||||
password = self._get_video_password()
|
||||
data = urlencode_postdata({
|
||||
'password': base64.b64encode(password.encode()),
|
||||
})
|
||||
@@ -628,11 +631,37 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
if 'Referer' not in headers:
|
||||
headers['Referer'] = url
|
||||
|
||||
channel_id = self._search_regex(
|
||||
r'vimeo\.com/channels/([^/]+)', url, 'channel id', default=None)
|
||||
|
||||
# Extract ID from URL
|
||||
video_id = self._match_id(url)
|
||||
video_id, unlisted_hash = re.match(self._VALID_URL, url).groups()
|
||||
if unlisted_hash:
|
||||
token = self._download_json(
|
||||
'https://vimeo.com/_rv/jwt', video_id, headers={
|
||||
'X-Requested-With': 'XMLHttpRequest'
|
||||
})['token']
|
||||
video = self._download_json(
|
||||
'https://api.vimeo.com/videos/%s:%s' % (video_id, unlisted_hash),
|
||||
video_id, headers={
|
||||
'Authorization': 'jwt ' + token,
|
||||
}, query={
|
||||
'fields': 'config_url,created_time,description,license,metadata.connections.comments.total,metadata.connections.likes.total,release_time,stats.plays',
|
||||
})
|
||||
info = self._parse_config(self._download_json(
|
||||
video['config_url'], video_id), video_id)
|
||||
self._vimeo_sort_formats(info['formats'])
|
||||
get_timestamp = lambda x: parse_iso8601(video.get(x + '_time'))
|
||||
info.update({
|
||||
'description': video.get('description'),
|
||||
'license': video.get('license'),
|
||||
'release_timestamp': get_timestamp('release'),
|
||||
'timestamp': get_timestamp('created'),
|
||||
'view_count': int_or_none(try_get(video, lambda x: x['stats']['plays'])),
|
||||
})
|
||||
connections = try_get(
|
||||
video, lambda x: x['metadata']['connections'], dict) or {}
|
||||
for k in ('comment', 'like'):
|
||||
info[k + '_count'] = int_or_none(try_get(connections, lambda x: x[k + 's']['total']))
|
||||
return info
|
||||
|
||||
orig_url = url
|
||||
is_pro = 'vimeopro.com/' in url
|
||||
is_player = '://player.vimeo.com/video/' in url
|
||||
@@ -722,7 +751,10 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
if re.search(r'<form[^>]+?id="pw_form"', webpage) is not None:
|
||||
if '_video_password_verified' in data:
|
||||
raise ExtractorError('video password verification failed!')
|
||||
self._verify_video_password(redirect_url, video_id, webpage)
|
||||
video_password = self._get_video_password()
|
||||
token, vuid = self._extract_xsrft_and_vuid(webpage)
|
||||
self._verify_video_password(
|
||||
redirect_url, video_id, video_password, token, vuid)
|
||||
return self._real_extract(
|
||||
smuggle_url(redirect_url, {'_video_password_verified': 'verified'}))
|
||||
else:
|
||||
@@ -772,7 +804,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
video_description = self._html_search_meta(
|
||||
'description', orig_webpage, default=None)
|
||||
if not video_description and not is_player:
|
||||
self._downloader.report_warning('Cannot find video description')
|
||||
self.report_warning('Cannot find video description')
|
||||
|
||||
# Extract upload date
|
||||
if not timestamp:
|
||||
@@ -808,6 +840,8 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
r'<link[^>]+rel=["\']license["\'][^>]+href=(["\'])(?P<license>(?:(?!\1).)+)\1',
|
||||
webpage, 'license', default=None, group='license')
|
||||
|
||||
channel_id = self._search_regex(
|
||||
r'vimeo\.com/channels/([^/]+)', url, 'channel id', default=None)
|
||||
channel_url = 'https://vimeo.com/channels/%s' % channel_id if channel_id else None
|
||||
|
||||
info_dict = {
|
||||
@@ -1114,10 +1148,23 @@ class VimeoReviewIE(VimeoBaseInfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
page_url, video_id = re.match(self._VALID_URL, url).groups()
|
||||
clip_data = self._download_json(
|
||||
page_url.replace('/review/', '/review/data/'),
|
||||
video_id)['clipData']
|
||||
config_url = clip_data['configUrl']
|
||||
data = self._download_json(
|
||||
page_url.replace('/review/', '/review/data/'), video_id)
|
||||
if data.get('isLocked') is True:
|
||||
video_password = self._get_video_password()
|
||||
viewer = self._download_json(
|
||||
'https://vimeo.com/_rv/viewer', video_id)
|
||||
webpage = self._verify_video_password(
|
||||
'https://vimeo.com/' + video_id, video_id,
|
||||
video_password, viewer['xsrft'], viewer['vuid'])
|
||||
clip_page_config = self._parse_json(self._search_regex(
|
||||
r'window\.vimeo\.clip_page_config\s*=\s*({.+?});',
|
||||
webpage, 'clip page config'), video_id)
|
||||
config_url = clip_page_config['player']['config_url']
|
||||
clip_data = clip_page_config.get('clip') or {}
|
||||
else:
|
||||
clip_data = data['clipData']
|
||||
config_url = clip_data['configUrl']
|
||||
config = self._download_json(config_url, video_id)
|
||||
info_dict = self._parse_config(config, video_id)
|
||||
source_format = self._extract_original_format(
|
||||
|
||||
@@ -1,16 +1,22 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_kwargs,
|
||||
compat_str,
|
||||
compat_urlparse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
try_get,
|
||||
smuggle_url,
|
||||
unsmuggle_url,
|
||||
)
|
||||
|
||||
|
||||
@@ -168,7 +174,8 @@ class ViuPlaylistIE(ViuBaseIE):
|
||||
|
||||
class ViuOTTIE(InfoExtractor):
|
||||
IE_NAME = 'viu:ott'
|
||||
_VALID_URL = r'https?://(?:www\.)?viu\.com/ott/(?P<country_code>[a-z]{2})/[a-z]{2}-[a-z]{2}/vod/(?P<id>\d+)'
|
||||
_NETRC_MACHINE = 'viu'
|
||||
_VALID_URL = r'https?://(?:www\.)?viu\.com/ott/(?P<country_code>[a-z]{2})/(?P<lang_code>[a-z]{2}-[a-z]{2})/vod/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.viu.com/ott/sg/en-us/vod/3421/The%20Prime%20Minister%20and%20I',
|
||||
'info_dict': {
|
||||
@@ -179,6 +186,7 @@ class ViuOTTIE(InfoExtractor):
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8 download',
|
||||
'noplaylist': True,
|
||||
},
|
||||
'skip': 'Geo-restricted to Singapore',
|
||||
}, {
|
||||
@@ -191,6 +199,19 @@ class ViuOTTIE(InfoExtractor):
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8 download',
|
||||
'noplaylist': True,
|
||||
},
|
||||
'skip': 'Geo-restricted to Hong Kong',
|
||||
}, {
|
||||
'url': 'https://www.viu.com/ott/hk/zh-hk/vod/68776/%E6%99%82%E5%B0%9A%E5%AA%BD%E5%92%AA',
|
||||
'playlist_count': 12,
|
||||
'info_dict': {
|
||||
'id': '3916',
|
||||
'title': '時尚媽咪',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8 download',
|
||||
'noplaylist': False,
|
||||
},
|
||||
'skip': 'Geo-restricted to Hong Kong',
|
||||
}]
|
||||
@@ -201,9 +222,51 @@ class ViuOTTIE(InfoExtractor):
|
||||
'TH': 4,
|
||||
'PH': 5,
|
||||
}
|
||||
_LANGUAGE_FLAG = {
|
||||
'zh-hk': 1,
|
||||
'zh-cn': 2,
|
||||
'en-us': 3,
|
||||
}
|
||||
_user_info = None
|
||||
|
||||
def _detect_error(self, response):
|
||||
code = response.get('status', {}).get('code')
|
||||
if code > 0:
|
||||
message = try_get(response, lambda x: x['status']['message'])
|
||||
raise ExtractorError('%s said: %s (%s)' % (
|
||||
self.IE_NAME, message, code), expected=True)
|
||||
return response['data']
|
||||
|
||||
def _raise_login_required(self):
|
||||
raise ExtractorError(
|
||||
'This video requires login. '
|
||||
'Specify --username and --password or --netrc (machine: %s) '
|
||||
'to provide account credentials.' % self._NETRC_MACHINE,
|
||||
expected=True)
|
||||
|
||||
def _login(self, country_code, video_id):
|
||||
if not self._user_info:
|
||||
username, password = self._get_login_info()
|
||||
if username is None or password is None:
|
||||
return
|
||||
|
||||
data = self._download_json(
|
||||
compat_urllib_request.Request(
|
||||
'https://www.viu.com/ott/%s/index.php' % country_code, method='POST'),
|
||||
video_id, 'Logging in', errnote=False, fatal=False,
|
||||
query={'r': 'user/login'},
|
||||
data=json.dumps({
|
||||
'username': username,
|
||||
'password': password,
|
||||
'platform_flag_label': 'web',
|
||||
}).encode())
|
||||
self._user_info = self._detect_error(data)['user']
|
||||
|
||||
return self._user_info
|
||||
|
||||
def _real_extract(self, url):
|
||||
country_code, video_id = re.match(self._VALID_URL, url).groups()
|
||||
url, idata = unsmuggle_url(url, {})
|
||||
country_code, lang_code, video_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
query = {
|
||||
'r': 'vod/ajax-detail',
|
||||
@@ -223,20 +286,88 @@ class ViuOTTIE(InfoExtractor):
|
||||
if not video_data:
|
||||
raise ExtractorError('This video is not available in your region.', expected=True)
|
||||
|
||||
stream_data = self._download_json(
|
||||
'https://d1k2us671qcoau.cloudfront.net/distribute_web_%s.php' % country_code,
|
||||
video_id, 'Downloading stream info', query={
|
||||
'ccs_product_id': video_data['ccs_product_id'],
|
||||
}, headers={
|
||||
'Referer': url,
|
||||
'Origin': re.search(r'https?://[^/]+', url).group(0),
|
||||
})['data']['stream']
|
||||
series_id = video_data.get('series_id')
|
||||
if not self._downloader.params.get('noplaylist') and not idata.get('force_noplaylist'):
|
||||
self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % series_id)
|
||||
series = product_data.get('series', {})
|
||||
product = series.get('product')
|
||||
if product:
|
||||
entries = []
|
||||
for entry in sorted(product, key=lambda x: int_or_none(x.get('number', 0))):
|
||||
item_id = entry.get('product_id')
|
||||
if not item_id:
|
||||
continue
|
||||
item_id = compat_str(item_id)
|
||||
entries.append(self.url_result(
|
||||
smuggle_url(
|
||||
'http://www.viu.com/ott/%s/%s/vod/%s/' % (country_code, lang_code, item_id),
|
||||
{'force_noplaylist': True}), # prevent infinite recursion
|
||||
'ViuOTT',
|
||||
item_id,
|
||||
entry.get('synopsis', '').strip()))
|
||||
|
||||
return self.playlist_result(entries, series_id, series.get('name'), series.get('description'))
|
||||
|
||||
if self._downloader.params.get('noplaylist'):
|
||||
self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
|
||||
|
||||
duration_limit = False
|
||||
query = {
|
||||
'ccs_product_id': video_data['ccs_product_id'],
|
||||
'language_flag_id': self._LANGUAGE_FLAG.get(lang_code.lower()) or '3',
|
||||
}
|
||||
headers = {
|
||||
'Referer': url,
|
||||
'Origin': url,
|
||||
}
|
||||
try:
|
||||
stream_data = self._download_json(
|
||||
'https://d1k2us671qcoau.cloudfront.net/distribute_web_%s.php' % country_code,
|
||||
video_id, 'Downloading stream info', query=query, headers=headers)
|
||||
stream_data = self._detect_error(stream_data)['stream']
|
||||
except (ExtractorError, KeyError):
|
||||
stream_data = None
|
||||
if video_data.get('user_level', 0) > 0:
|
||||
user = self._login(country_code, video_id)
|
||||
if user:
|
||||
query['identity'] = user['identity']
|
||||
stream_data = self._download_json(
|
||||
'https://d1k2us671qcoau.cloudfront.net/distribute_web_%s.php' % country_code,
|
||||
video_id, 'Downloading stream info', query=query, headers=headers)
|
||||
stream_data = self._detect_error(stream_data).get('stream')
|
||||
else:
|
||||
# preview is limited to 3min for non-members
|
||||
# try to bypass the duration limit
|
||||
duration_limit = True
|
||||
query['duration'] = '180'
|
||||
stream_data = self._download_json(
|
||||
'https://d1k2us671qcoau.cloudfront.net/distribute_web_%s.php' % country_code,
|
||||
video_id, 'Downloading stream info', query=query, headers=headers)
|
||||
try:
|
||||
stream_data = self._detect_error(stream_data)['stream']
|
||||
except (ExtractorError, KeyError): # if still not working, give up
|
||||
self._raise_login_required()
|
||||
|
||||
if not stream_data:
|
||||
raise ExtractorError('Cannot get stream info', expected=True)
|
||||
|
||||
stream_sizes = stream_data.get('size', {})
|
||||
formats = []
|
||||
for vid_format, stream_url in stream_data.get('url', {}).items():
|
||||
height = int_or_none(self._search_regex(
|
||||
r's(\d+)p', vid_format, 'height', default=None))
|
||||
|
||||
# bypass preview duration limit
|
||||
if duration_limit:
|
||||
stream_url = compat_urlparse.urlparse(stream_url)
|
||||
query = dict(compat_urlparse.parse_qsl(stream_url.query, keep_blank_values=True))
|
||||
time_duration = int_or_none(video_data.get('time_duration'))
|
||||
query.update({
|
||||
'duration': time_duration if time_duration > 0 else '9999999',
|
||||
'duration_start': '0',
|
||||
})
|
||||
stream_url = stream_url._replace(query=compat_urlparse.urlencode(query)).geturl()
|
||||
|
||||
formats.append({
|
||||
'format_id': vid_format,
|
||||
'url': stream_url,
|
||||
|
||||
@@ -113,7 +113,7 @@ class VLiveIE(VLiveBaseIE):
|
||||
raise ExtractorError('Unable to log in', expected=True)
|
||||
|
||||
def _call_api(self, path_template, video_id, fields=None, limit=None):
|
||||
query = {'appId': self._APP_ID, 'gcc': 'KR'}
|
||||
query = {'appId': self._APP_ID, 'gcc': 'KR', 'platformType': 'PC'}
|
||||
if fields:
|
||||
query['fields'] = fields
|
||||
if limit:
|
||||
|
||||
@@ -8,7 +8,6 @@ from ..compat import (
|
||||
)
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
@@ -125,13 +124,13 @@ class VubeIE(InfoExtractor):
|
||||
})
|
||||
formats.append(fmt)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
if not formats and video.get('vst') == 'dmca':
|
||||
raise ExtractorError(
|
||||
self.raise_no_formats(
|
||||
'This video has been removed in response to a complaint received under the US Digital Millennium Copyright Act.',
|
||||
expected=True)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = video['title']
|
||||
description = video.get('description')
|
||||
thumbnail = self._proto_relative_url(video.get('thumbnail_src'), scheme='http:')
|
||||
|
||||
@@ -87,7 +87,7 @@ class WatIE(InfoExtractor):
|
||||
extract_formats({delivery.get('format'): delivery.get('url')})
|
||||
if not formats:
|
||||
if delivery.get('drm'):
|
||||
raise ExtractorError('This video is DRM protected.', expected=True)
|
||||
self.raise_no_formats('This video is DRM protected.', expected=True)
|
||||
manifest_urls = self._download_json(
|
||||
'http://www.wat.tv/get/webhtml/' + video_id, video_id, fatal=False)
|
||||
if manifest_urls:
|
||||
|
||||
@@ -239,7 +239,7 @@ class YahooIE(InfoExtractor):
|
||||
'm3u8_native', m3u8_id='hls', fatal=False))
|
||||
|
||||
if not formats and msg == 'geo restricted':
|
||||
self.raise_geo_restricted()
|
||||
self.raise_geo_restricted(metadata_available=True)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
|
||||
@@ -154,7 +154,7 @@ class YoukuIE(InfoExtractor):
|
||||
# request basic data
|
||||
basic_data_params = {
|
||||
'vid': video_id,
|
||||
'ccode': '0590',
|
||||
'ccode': '0532',
|
||||
'client_ip': '192.168.1.1',
|
||||
'utid': cna,
|
||||
'client_ts': time.time() / 1000,
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user