mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-12-17 11:22:23 +01:00
Compare commits
105 Commits
2021.06.23
...
2021.07.24
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f703a88055 | ||
|
|
a353beba83 | ||
|
|
052e135029 | ||
|
|
cb89cfc14b | ||
|
|
060ac76257 | ||
|
|
063c409dfb | ||
|
|
767b02a99b | ||
|
|
f45e6c1126 | ||
|
|
3944e7af92 | ||
|
|
ad34b2951e | ||
|
|
c8fa48fd94 | ||
|
|
2fd226f6a7 | ||
|
|
3ba7740dd8 | ||
|
|
29b208f6f9 | ||
|
|
e4d666d27b | ||
|
|
245524e6a3 | ||
|
|
9c0d7f4951 | ||
|
|
e37d0efbd9 | ||
|
|
c926c9541f | ||
|
|
982ee69a74 | ||
|
|
7ea6541124 | ||
|
|
ae30b84072 | ||
|
|
cc9d1493c6 | ||
|
|
f6755419d1 | ||
|
|
145bd631c5 | ||
|
|
b35496d825 | ||
|
|
352d63fdb5 | ||
|
|
11f9be0912 | ||
|
|
c84aeac6b5 | ||
|
|
50fed816dd | ||
|
|
a1a7907bc0 | ||
|
|
d61fc64618 | ||
|
|
6586bca9b9 | ||
|
|
da503b7a52 | ||
|
|
7c365c2109 | ||
|
|
3f698246b2 | ||
|
|
cca80fe611 | ||
|
|
c634ad2a3c | ||
|
|
8f3343809e | ||
|
|
0ba692acc8 | ||
|
|
d9488f69c1 | ||
|
|
dce8743677 | ||
|
|
5520aa2dc9 | ||
|
|
8d9b902243 | ||
|
|
fe93e2c4cf | ||
|
|
314ee30548 | ||
|
|
34917076ad | ||
|
|
ccc7795ca3 | ||
|
|
da1c94ee45 | ||
|
|
3b297919e0 | ||
|
|
47193e0298 | ||
|
|
49bd8c66d3 | ||
|
|
182b6ae8a6 | ||
|
|
c843e68588 | ||
|
|
198f7ea89e | ||
|
|
c888ffb95a | ||
|
|
9752433221 | ||
|
|
f0ff9979c6 | ||
|
|
501dd1ad55 | ||
|
|
75722b037d | ||
|
|
2d6659b9ea | ||
|
|
c5370857b3 | ||
|
|
00034c146a | ||
|
|
325ebc1703 | ||
|
|
7dde84f3c9 | ||
|
|
6606817a86 | ||
|
|
73d829c144 | ||
|
|
60bdb7bd9e | ||
|
|
4bb6b02f93 | ||
|
|
b5ac45b197 | ||
|
|
38a40c9e16 | ||
|
|
a8bf9b4dc1 | ||
|
|
51f8a31d65 | ||
|
|
be05d5cff1 | ||
|
|
30d569d2ac | ||
|
|
08625e4125 | ||
|
|
3acf6d3856 | ||
|
|
46890374f7 | ||
|
|
60755938b3 | ||
|
|
723d44b92b | ||
|
|
bc97cdae67 | ||
|
|
e010672ab5 | ||
|
|
169dbde946 | ||
|
|
17f0eb66b8 | ||
|
|
981052c9c6 | ||
|
|
b1e60d1806 | ||
|
|
6b6c16ca6c | ||
|
|
f6745c4980 | ||
|
|
109dd3b237 | ||
|
|
c2603313b1 | ||
|
|
1e79316e20 | ||
|
|
45261e063b | ||
|
|
49c258e18d | ||
|
|
d3f62c1967 | ||
|
|
5d3a0e794b | ||
|
|
125728b038 | ||
|
|
15a4fd53d3 | ||
|
|
4513a41a72 | ||
|
|
6033d9808d | ||
|
|
bd4d1ea398 | ||
|
|
8e897ed283 | ||
|
|
412cce82b0 | ||
|
|
d534c4520b | ||
|
|
2b18a8c590 | ||
|
|
dac8b87b0c |
8
.github/ISSUE_TEMPLATE/1_broken_site.md
vendored
8
.github/ISSUE_TEMPLATE/1_broken_site.md
vendored
@@ -21,7 +21,7 @@ assignees: ''
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp:
|
||||
- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is 2021.06.09. If it's not, see https://github.com/yt-dlp/yt-dlp on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is 2021.07.21. If it's not, see https://github.com/yt-dlp/yt-dlp on how to update. Issues with outdated version will be REJECTED.
|
||||
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
||||
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in https://github.com/yt-dlp/yt-dlp.
|
||||
- Search the bugtracker for similar issues: https://github.com/yt-dlp/yt-dlp. DO NOT post duplicates.
|
||||
@@ -29,7 +29,7 @@ Carefully read and work through this check list in order to prevent the most com
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a broken site support
|
||||
- [ ] I've verified that I'm running yt-dlp version **2021.06.09**
|
||||
- [ ] I've verified that I'm running yt-dlp version **2021.07.21**
|
||||
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
||||
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
|
||||
- [ ] I've searched the bugtracker for similar issues including closed ones
|
||||
@@ -42,9 +42,9 @@ Provide the complete verbose output of yt-dlp that clearly demonstrates the prob
|
||||
Add the `-v` flag to your command line you run yt-dlp with (`yt-dlp -v <your command line>`), copy the WHOLE output and insert it below. It should look similar to this:
|
||||
[debug] System config: []
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKc']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] yt-dlp version 2021.06.09
|
||||
[debug] yt-dlp version 2021.07.21
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
||||
@@ -21,7 +21,7 @@ assignees: ''
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp:
|
||||
- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is 2021.06.09. If it's not, see https://github.com/yt-dlp/yt-dlp on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is 2021.07.21. If it's not, see https://github.com/yt-dlp/yt-dlp on how to update. Issues with outdated version will be REJECTED.
|
||||
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
||||
- Make sure that site you are requesting is not dedicated to copyright infringement, see https://github.com/yt-dlp/yt-dlp. yt-dlp does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights.
|
||||
- Search the bugtracker for similar site support requests: https://github.com/yt-dlp/yt-dlp. DO NOT post duplicates.
|
||||
@@ -29,7 +29,7 @@ Carefully read and work through this check list in order to prevent the most com
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a new site support request
|
||||
- [ ] I've verified that I'm running yt-dlp version **2021.06.09**
|
||||
- [ ] I've verified that I'm running yt-dlp version **2021.07.21**
|
||||
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
||||
- [ ] I've checked that none of provided URLs violate any copyrights
|
||||
- [ ] I've searched the bugtracker for similar site support requests including closed ones
|
||||
|
||||
@@ -21,13 +21,13 @@ assignees: ''
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp:
|
||||
- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is 2021.06.09. If it's not, see https://github.com/yt-dlp/yt-dlp on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is 2021.07.21. If it's not, see https://github.com/yt-dlp/yt-dlp on how to update. Issues with outdated version will be REJECTED.
|
||||
- Search the bugtracker for similar site feature requests: https://github.com/yt-dlp/yt-dlp. DO NOT post duplicates.
|
||||
- Finally, put x into all relevant boxes like this [x] (Dont forget to delete the empty space)
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a site feature request
|
||||
- [ ] I've verified that I'm running yt-dlp version **2021.06.09**
|
||||
- [ ] I've verified that I'm running yt-dlp version **2021.07.21**
|
||||
- [ ] I've searched the bugtracker for similar site feature requests including closed ones
|
||||
|
||||
|
||||
|
||||
8
.github/ISSUE_TEMPLATE/4_bug_report.md
vendored
8
.github/ISSUE_TEMPLATE/4_bug_report.md
vendored
@@ -21,7 +21,7 @@ assignees: ''
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp:
|
||||
- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is 2021.06.09. If it's not, see https://github.com/yt-dlp/yt-dlp on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is 2021.07.21. If it's not, see https://github.com/yt-dlp/yt-dlp on how to update. Issues with outdated version will be REJECTED.
|
||||
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
||||
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in https://github.com/yt-dlp/yt-dlp.
|
||||
- Search the bugtracker for similar issues: https://github.com/yt-dlp/yt-dlp. DO NOT post duplicates.
|
||||
@@ -30,7 +30,7 @@ Carefully read and work through this check list in order to prevent the most com
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a broken site support issue
|
||||
- [ ] I've verified that I'm running yt-dlp version **2021.06.09**
|
||||
- [ ] I've verified that I'm running yt-dlp version **2021.07.21**
|
||||
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
||||
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
|
||||
- [ ] I've searched the bugtracker for similar bug reports including closed ones
|
||||
@@ -44,9 +44,9 @@ Provide the complete verbose output of yt-dlp that clearly demonstrates the prob
|
||||
Add the `-v` flag to your command line you run yt-dlp with (`yt-dlp -v <your command line>`), copy the WHOLE output and insert it below. It should look similar to this:
|
||||
[debug] System config: []
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKc']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] yt-dlp version 2021.06.09
|
||||
[debug] yt-dlp version 2021.07.21
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
||||
4
.github/ISSUE_TEMPLATE/5_feature_request.md
vendored
4
.github/ISSUE_TEMPLATE/5_feature_request.md
vendored
@@ -21,13 +21,13 @@ assignees: ''
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp:
|
||||
- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is 2021.06.09. If it's not, see https://github.com/yt-dlp/yt-dlp on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is 2021.07.21. If it's not, see https://github.com/yt-dlp/yt-dlp on how to update. Issues with outdated version will be REJECTED.
|
||||
- Search the bugtracker for similar feature requests: https://github.com/yt-dlp/yt-dlp. DO NOT post duplicates.
|
||||
- Finally, put x into all relevant boxes like this [x] (Dont forget to delete the empty space)
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a feature request
|
||||
- [ ] I've verified that I'm running yt-dlp version **2021.06.09**
|
||||
- [ ] I've verified that I'm running yt-dlp version **2021.07.21**
|
||||
- [ ] I've searched the bugtracker for similar feature requests including closed ones
|
||||
|
||||
|
||||
|
||||
2
.github/ISSUE_TEMPLATE_tmpl/1_broken_site.md
vendored
2
.github/ISSUE_TEMPLATE_tmpl/1_broken_site.md
vendored
@@ -42,7 +42,7 @@ Provide the complete verbose output of yt-dlp that clearly demonstrates the prob
|
||||
Add the `-v` flag to your command line you run yt-dlp with (`yt-dlp -v <your command line>`), copy the WHOLE output and insert it below. It should look similar to this:
|
||||
[debug] System config: []
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKc']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] yt-dlp version %(version)s
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
|
||||
2
.github/ISSUE_TEMPLATE_tmpl/4_bug_report.md
vendored
2
.github/ISSUE_TEMPLATE_tmpl/4_bug_report.md
vendored
@@ -44,7 +44,7 @@ Provide the complete verbose output of yt-dlp that clearly demonstrates the prob
|
||||
Add the `-v` flag to your command line you run yt-dlp with (`yt-dlp -v <your command line>`), copy the WHOLE output and insert it below. It should look similar to this:
|
||||
[debug] System config: []
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKc']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] yt-dlp version %(version)s
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
|
||||
10
.github/workflows/core.yml
vendored
10
.github/workflows/core.yml
vendored
@@ -10,7 +10,7 @@ jobs:
|
||||
matrix:
|
||||
os: [ubuntu-18.04]
|
||||
# py3.9 is in quick-test
|
||||
python-version: [3.7, 3.8, pypy-3.6, pypy-3.7]
|
||||
python-version: [3.7, 3.8, 3.10-dev, pypy-3.6, pypy-3.7]
|
||||
run-tests-ext: [sh]
|
||||
include:
|
||||
# atleast one of the tests must be in windows
|
||||
@@ -23,11 +23,9 @@ jobs:
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Install nose
|
||||
run: pip install nose
|
||||
- name: Install pytest
|
||||
run: pip install pytest
|
||||
- name: Run tests
|
||||
continue-on-error: False
|
||||
env:
|
||||
YTDL_TEST_SET: core
|
||||
run: ./devscripts/run_tests.${{ matrix.run-tests-ext }}
|
||||
run: ./devscripts/run_tests.${{ matrix.run-tests-ext }} core
|
||||
# Linter is in quick-test
|
||||
|
||||
10
.github/workflows/download.yml
vendored
10
.github/workflows/download.yml
vendored
@@ -9,7 +9,7 @@ jobs:
|
||||
fail-fast: true
|
||||
matrix:
|
||||
os: [ubuntu-18.04]
|
||||
python-version: [3.7, 3.8, 3.9, pypy-3.6, pypy-3.7]
|
||||
python-version: [3.7, 3.8, 3.9, 3.10-dev, pypy-3.6, pypy-3.7]
|
||||
run-tests-ext: [sh]
|
||||
include:
|
||||
- os: windows-latest
|
||||
@@ -21,10 +21,8 @@ jobs:
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Install nose
|
||||
run: pip install nose
|
||||
- name: Install pytest
|
||||
run: pip install pytest
|
||||
- name: Run tests
|
||||
continue-on-error: true
|
||||
env:
|
||||
YTDL_TEST_SET: download
|
||||
run: ./devscripts/run_tests.${{ matrix.run-tests-ext }}
|
||||
run: ./devscripts/run_tests.${{ matrix.run-tests-ext }} download
|
||||
|
||||
10
.github/workflows/quick-test.yml
vendored
10
.github/workflows/quick-test.yml
vendored
@@ -11,12 +11,10 @@ jobs:
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: 3.9
|
||||
- name: Install nose
|
||||
run: pip install nose
|
||||
- name: Install test requirements
|
||||
run: pip install pytest pycryptodome
|
||||
- name: Run tests
|
||||
env:
|
||||
YTDL_TEST_SET: core
|
||||
run: ./devscripts/run_tests.sh
|
||||
run: ./devscripts/run_tests.sh core
|
||||
flake8:
|
||||
name: Linter
|
||||
if: "!contains(github.event.head_commit.message, 'ci skip all')"
|
||||
@@ -30,4 +28,4 @@ jobs:
|
||||
- name: Install flake8
|
||||
run: pip install flake8
|
||||
- name: Run flake8
|
||||
run: flake8 .
|
||||
run: flake8 .
|
||||
|
||||
3
.gitignore
vendored
3
.gitignore
vendored
@@ -33,6 +33,7 @@ cookies.txt
|
||||
*.info.json
|
||||
*.live_chat.json
|
||||
*.jpg
|
||||
*.jpeg
|
||||
*.png
|
||||
*.webp
|
||||
*.annotations.xml
|
||||
@@ -44,6 +45,7 @@ cookies.txt
|
||||
# Python
|
||||
*.pyc
|
||||
*.pyo
|
||||
.pytest_cache
|
||||
wine-py2exe/
|
||||
py2exe.log
|
||||
build/
|
||||
@@ -78,6 +80,7 @@ README.txt
|
||||
*.tar.gz
|
||||
*.zsh
|
||||
*.spec
|
||||
test/testdata/player-*.js
|
||||
|
||||
# Binary
|
||||
/youtube-dl
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
$ youtube-dl -v <your command line>
|
||||
[debug] System config: []
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'https://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Command-line args: [u'-v', u'https://www.youtube.com/watch?v=BaW_jenozKc']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2015.12.06
|
||||
[debug] Git HEAD: 135392e
|
||||
@@ -81,16 +81,17 @@ To run the test, simply invoke your favorite test runner, or execute a test file
|
||||
python -m unittest discover
|
||||
python test/test_download.py
|
||||
nosetests
|
||||
pytest
|
||||
|
||||
See item 6 of [new extractor tutorial](#adding-support-for-a-new-site) for how to run extractor specific test cases.
|
||||
|
||||
If you want to create a build of youtube-dl yourself, you'll need
|
||||
|
||||
* python
|
||||
* python3
|
||||
* make (only GNU make is supported)
|
||||
* pandoc
|
||||
* zip
|
||||
* nosetests
|
||||
* pytest
|
||||
|
||||
### Adding support for a new site
|
||||
|
||||
|
||||
11
CONTRIBUTORS
11
CONTRIBUTORS
@@ -52,5 +52,14 @@ hhirtz
|
||||
louie-github
|
||||
MinePlayersPE
|
||||
olifre
|
||||
rhsmachine
|
||||
rhsmachine/zenerdi0de
|
||||
nihil-admirari
|
||||
krichbanana
|
||||
ohmybahgosh
|
||||
nyuszika7h
|
||||
blackjack4494
|
||||
pyx
|
||||
TpmKranz
|
||||
mzbaulhaque
|
||||
zackmark29
|
||||
mbway
|
||||
|
||||
132
Changelog.md
132
Changelog.md
@@ -19,12 +19,142 @@
|
||||
-->
|
||||
|
||||
|
||||
### 2021.07.24
|
||||
|
||||
* [youtube:tab] Extract video duration early
|
||||
* [downloader] Pass `info_dict` to `progress_hook`s
|
||||
* [youtube] Fix age-gated videos for API clients when cookies are supplied by [colethedj](https://github.com/colethedj)
|
||||
* [youtube] Disable `get_video_info` age-gate workaround - This endpoint seems to be completely dead
|
||||
* [youtube] Try all clients even if age-gated
|
||||
* [youtube] Fix subtitles only being extracted from the first client
|
||||
* [youtube] Simplify `_get_text`
|
||||
* [cookies] bugfix for microsoft edge on macOS
|
||||
* [cookies] Handle `sqlite` `ImportError` gracefully by [mbway](https://github.com/mbway)
|
||||
* [cookies] Handle errors when importing `keyring`
|
||||
|
||||
### 2021.07.21
|
||||
|
||||
* **Add option `--cookies-from-browser`** to load cookies from a browser by [mbway](https://github.com/mbway)
|
||||
* Usage: `--cookies-from-browser BROWSER[:PROFILE_NAME_OR_PATH]`
|
||||
* Also added `--no-cookies-from-browser`
|
||||
* To decrypt chromium cookies, `keyring` is needed for UNIX and `pycryptodome` for Windows
|
||||
* Add option `--exec-before-download`
|
||||
* Add field `live_status`
|
||||
* [FFmpegMetadata] Add language of each stream and some refactoring
|
||||
* [douyin] Add extractor by [pukkandan](https://github.com/pukkandan), [pyx](https://github.com/pyx)
|
||||
* [pornflip] Add extractor by [mzbaulhaque](https://github.com/mzbaulhaque)
|
||||
* **[youtube] Extract data from multiple clients** by [pukkandan](https://github.com/pukkandan), [colethedj](https://github.com/colethedj)
|
||||
* `player_client` now accepts multiple clients
|
||||
* Default `player_client` = `android,web`
|
||||
* This uses twice as many requests, but avoids throttling for most videos while also not losing any formats
|
||||
* Music clients can be specifically requested and is enabled by default if `music.youtube.com`
|
||||
* Added `player_client=ios` (Known issue: formats from ios are not sorted correctly)
|
||||
* Add age-gate bypass for android and ios clients
|
||||
* [youtube] Extract more thumbnails
|
||||
* The thumbnail URLs are hard-coded and their actual existence is tested lazily
|
||||
* Added option `--no-check-formats` to not test them
|
||||
* [youtube] Misc fixes
|
||||
* Improve extraction of livestream metadata by [pukkandan](https://github.com/pukkandan), [krichbanana](https://github.com/krichbanana)
|
||||
* Hide live dash formats since they can't be downloaded anyway
|
||||
* Fix authentication when using multiple accounts by [colethedj](https://github.com/colethedj)
|
||||
* Fix controversial videos when requested via API by [colethedj](https://github.com/colethedj)
|
||||
* Fix session index extraction and headers for non-web player clients by [colethedj](https://github.com/colethedj)
|
||||
* Make `--extractor-retries` work for more errors
|
||||
* Fix sorting of 3gp format
|
||||
* Sanity check `chapters` (and refactor related code)
|
||||
* Make `parse_time_text` and `_extract_chapters` non-fatal
|
||||
* Misc cleanup and bug fixes by [colethedj](https://github.com/colethedj)
|
||||
* [youtube:tab] Fix channels tab
|
||||
* [youtube:tab] Extract playlist availability by [colethedj](https://github.com/colethedj)
|
||||
* **[youtube:comments] Move comment extraction to new API** by [colethedj](https://github.com/colethedj)
|
||||
* Adds extractor-args `comment_sort` (`top`/`new`), `max_comments`, `max_comment_depth`
|
||||
* [youtube:comments] Fix `is_favorited`, improve `like_count` parsing by [colethedj](https://github.com/colethedj)
|
||||
* [BravoTV] Improve metadata extraction by [kevinoconnor7](https://github.com/kevinoconnor7)
|
||||
* [crunchyroll:playlist] Force http
|
||||
* [yahoo:gyao:player] Relax `_VALID_URL` by [nao20010128nao](https://github.com/nao20010128nao)
|
||||
* [nebula] Authentication via tokens from cookie jar by [hheimbuerger](https://github.com/hheimbuerger), [TpmKranz](https://github.com/TpmKranz)
|
||||
* [RTP] Fix extraction and add subtitles by [fstirlitz](https://github.com/fstirlitz)
|
||||
* [viki] Rewrite extractors and add extractor-arg `video_types` to `vikichannel` by [zackmark29](https://github.com/zackmark29), [pukkandan](https://github.com/pukkandan)
|
||||
* [vlive] Extract thumbnail directly in addition to the one from Naver
|
||||
* [generic] Extract previously missed subtitles by [fstirlitz](https://github.com/fstirlitz)
|
||||
* [generic] Extract everything in the SMIL manifest and detect discarded subtitles by [fstirlitz](https://github.com/fstirlitz)
|
||||
* [embedthumbnail] Fix `_get_thumbnail_resolution`
|
||||
* [metadatafromfield] Do not detect numbers as field names
|
||||
* Fix selectors `all`, `mergeall` and add tests
|
||||
* Errors in playlist extraction should obey `--ignore-errors`
|
||||
* Fix bug where `original_url` was not propagated when `_type`=`url`
|
||||
* Revert "Merge webm formats into mkv if thumbnails are to be embedded (#173)"
|
||||
* This was wrongly checking for `write_thumbnail`
|
||||
* Improve `extractor_args` parsing
|
||||
* Rename `NOTE` in `-F` to `MORE INFO` since it's often confused to be the same as `format_note`
|
||||
* Add `only_once` param for `write_debug` and `report_warning`
|
||||
* [extractor] Allow extracting multiple groups in `_search_regex` by [fstirlitz](https://github.com/fstirlitz)
|
||||
* [utils] Improve `traverse_obj`
|
||||
* [utils] Add `variadic`
|
||||
* [utils] Improve `js_to_json` comment regex by [fstirlitz](https://github.com/fstirlitz)
|
||||
* [webtt] Fix timestamps
|
||||
* [compat] Remove unnecessary code
|
||||
* [doc] fix default of multistreams
|
||||
|
||||
|
||||
### 2021.07.07
|
||||
|
||||
* Merge youtube-dl: Upto [commit/a803582](https://github.com/ytdl-org/youtube-dl/commit/a8035827177d6b59aca03bd717acb6a9bdd75ada)
|
||||
* Add `--extractor-args` to pass some extractor-specific arguments. See [readme](https://github.com/yt-dlp/yt-dlp#extractor-arguments)
|
||||
* Add extractor option `skip` for `youtube`. Eg: `--extractor-args youtube:skip=hls,dash`
|
||||
* Deprecates `--youtube-skip-dash-manifest`, `--youtube-skip-hls-manifest`, `--youtube-include-dash-manifest`, `--youtube-include-hls-manifest`
|
||||
* Allow `--list...` options to work with `--print`, `--quiet` and other `--list...` options
|
||||
* [youtube] Use `player` API for additional video extraction requests by [colethedj](https://github.com/colethedj)
|
||||
* **Fixes youtube premium music** (format 141) extraction
|
||||
* Adds extractor option `player_client` = `web`/`android`
|
||||
* **`--extractor-args youtube:player_client=android` works around the throttling** for the time-being
|
||||
* Adds extractor option `player_skip=config`
|
||||
* Adds age-gate fallback using embedded client
|
||||
* [youtube] Choose correct Live chat API for upcoming streams by [krichbanana](https://github.com/krichbanana)
|
||||
* [youtube] Fix subtitle names for age-gated videos
|
||||
* [youtube:comments] Fix error handling and add `itct` to params by [colethedj](https://github.com/colethedj)
|
||||
* [youtube_live_chat] Fix download with cookies by [siikamiika](https://github.com/siikamiika)
|
||||
* [youtube_live_chat] use `clickTrackingParams` by [siikamiika](https://github.com/siikamiika)
|
||||
* [Funimation] Rewrite extractor
|
||||
* Add `FunimationShowIE` by [Mevious](https://github.com/Mevious)
|
||||
* **Treat the different versions of an episode as different formats of a single video**
|
||||
* This changes the video `id` and will break break existing archives
|
||||
* Compat option `seperate-video-versions` to fall back to old behavior including using the old video ids
|
||||
* Support direct `/player/` URL
|
||||
* Extractor options `language` and `version` to pre-select them during extraction
|
||||
* These options may be removed in the future if we can extract all formats without additional network requests
|
||||
* Do not rely on these for format selection and use `-f` filters instead
|
||||
* [AdobePass] Add Spectrum MSO by [kevinoconnor7](https://github.com/kevinoconnor7), [ohmybahgosh](https://github.com/ohmybahgosh)
|
||||
* [facebook] Extract description and fix title
|
||||
* [fancode] Fix extraction, support live and allow login with refresh token by [zenerdi0de](https://github.com/zenerdi0de)
|
||||
* [plutotv] Improve `_VALID_URL`
|
||||
* [RCTIPlus] Add extractor by [MinePlayersPE](https://github.com/MinePlayersPE)
|
||||
* [Soundcloud] Allow login using oauth token by [blackjack4494](https://github.com/blackjack4494)
|
||||
* [TBS] Support livestreams by [llacb47](https://github.com/llacb47)
|
||||
* [videa] Fix extraction by [nyuszika7h](https://github.com/nyuszika7h)
|
||||
* [yahoo] Fix extraction by [llacb47](https://github.com/llacb47), [pukkandan](https://github.com/pukkandan)
|
||||
* Process videos when using `--ignore-no-formats-error` by [krichbanana](https://github.com/krichbanana)
|
||||
* Fix `--throttled-rate` when using `--load-info-json`
|
||||
* Fix `--flat-playlist` when entry has no `ie_key`
|
||||
* Fix `check_formats` catching `ExtractorError` instead of `DownloadError`
|
||||
* Fix deprecated option `--list-formats-old`
|
||||
* [downloader/ffmpeg] Fix `--ppa` when using simultaneous download
|
||||
* [extractor] Prevent unnecessary download of hls manifests and refactor `hls_split_discontinuity`
|
||||
* [fragment] Handle status of download and errors in threads correctly; and minor refactoring
|
||||
* [thumbnailsconvertor] Treat `jpeg` as `jpg`
|
||||
* [utils] Fix issues with `LazyList` reversal
|
||||
* [extractor] Allow extractors to set their own login hint
|
||||
* [cleanup] Simplify format selector code with `LazyList` and `yield from`
|
||||
* [cleanup] Clean `extractor.common._merge_subtitles` signature
|
||||
* [cleanup] Fix some typos
|
||||
|
||||
|
||||
### 2021.06.23
|
||||
|
||||
* Merge youtube-dl: Upto [commit/379f52a](https://github.com/ytdl-org/youtube-dl/commit/379f52a4954013767219d25099cce9e0f9401961)
|
||||
* **Add option `--throttled-rate`** below which video data is re-extracted
|
||||
* [fragment] **Merge during download for `-N`**, and refactor `hls`/`dash`
|
||||
* [websockets] Add `WebSocketFragmentFD`by [nao20010128nao](https://github.com/nao20010128nao), [pukkandan](https://github.com/pukkandan)
|
||||
* [websockets] Add `WebSocketFragmentFD` by [nao20010128nao](https://github.com/nao20010128nao), [pukkandan](https://github.com/pukkandan)
|
||||
* Allow `images` formats in addition to video/audio
|
||||
* [downloader/mhtml] Add new downloader for slideshows/storyboards by [fstirlitz](https://github.com/fstirlitz)
|
||||
* [youtube] Temporary **fix for age-gate**
|
||||
|
||||
18
Makefile
18
Makefile
@@ -13,7 +13,7 @@ pypi-files: AUTHORS Changelog.md LICENSE README.md README.txt supportedsites com
|
||||
.PHONY: all clean install test tar pypi-files completions ot offlinetest codetest supportedsites
|
||||
|
||||
clean-test:
|
||||
rm -rf *.dump *.part* *.ytdl *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.3gp *.wav *.ape *.swf *.jpg *.png *.frag *.frag.urls *.frag.aria2
|
||||
rm -rf *.dump *.part* *.ytdl *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.3gp *.wav *.ape *.swf *.jpg *.png *.frag *.frag.urls *.frag.aria2 test/testdata/player-*.js
|
||||
clean-dist:
|
||||
rm -rf yt-dlp.1.temp.md yt-dlp.1 README.txt MANIFEST build/ dist/ .coverage cover/ yt-dlp.tar.gz completions/ yt_dlp/extractor/lazy_extractors.py *.spec CONTRIBUTING.md.tmp yt-dlp yt-dlp.exe yt_dlp.egg-info/ AUTHORS .mailmap
|
||||
clean-cache:
|
||||
@@ -49,23 +49,11 @@ codetest:
|
||||
flake8 .
|
||||
|
||||
test:
|
||||
#nosetests --with-coverage --cover-package=yt_dlp --cover-html --verbose --processes 4 test
|
||||
nosetests --verbose test
|
||||
$(PYTHON) -m pytest
|
||||
$(MAKE) codetest
|
||||
|
||||
# Keep this list in sync with devscripts/run_tests.sh
|
||||
offlinetest: codetest
|
||||
$(PYTHON) -m nose --verbose test \
|
||||
--exclude test_age_restriction.py \
|
||||
--exclude test_download.py \
|
||||
--exclude test_iqiyi_sdk_interpreter.py \
|
||||
--exclude test_overwrites.py \
|
||||
--exclude test_socks.py \
|
||||
--exclude test_subtitles.py \
|
||||
--exclude test_write_annotations.py \
|
||||
--exclude test_youtube_lists.py \
|
||||
--exclude test_youtube_signature.py \
|
||||
--exclude test_post_hooks.py
|
||||
$(PYTHON) -m pytest -k "not download"
|
||||
|
||||
yt-dlp: yt_dlp/*.py yt_dlp/*/*.py
|
||||
mkdir -p zip
|
||||
|
||||
83
README.md
83
README.md
@@ -53,6 +53,7 @@ yt-dlp is a [youtube-dl](https://github.com/ytdl-org/youtube-dl) fork based on t
|
||||
* [Format Selection examples](#format-selection-examples)
|
||||
* [MODIFYING METADATA](#modifying-metadata)
|
||||
* [Modifying metadata examples](#modifying-metadata-examples)
|
||||
* [EXTRACTOR ARGUMENTS](#extractor-arguments)
|
||||
* [PLUGINS](#plugins)
|
||||
* [DEPRECATED OPTIONS](#deprecated-options)
|
||||
* [MORE](#more)
|
||||
@@ -74,19 +75,22 @@ The major new features from the latest release of [blackjack4494/yt-dlc](https:/
|
||||
* All Feeds (`:ytfav`, `:ytwatchlater`, `:ytsubs`, `:ythistory`, `:ytrec`) supports downloading multiple pages of content
|
||||
* Search (`ytsearch:`, `ytsearchdate:`), search URLs and in-channel search works
|
||||
* Mixes supports downloading multiple pages of content
|
||||
* Partial workaround for throttling issue
|
||||
* Redirect channel's home URL automatically to `/video` to preserve the old behaviour
|
||||
* `255kbps` audio is extracted from youtube music if premium cookies are given
|
||||
* Youtube music Albums, channels etc can be downloaded
|
||||
|
||||
* **Cookies from browser**: Cookies can be automatically extracted from all major web browsers using `--cookies-from-browser BROWSER[:PROFILE]`
|
||||
|
||||
* **Split video by chapters**: Videos can be split into multiple files based on chapters using `--split-chapters`
|
||||
|
||||
* **Multi-threaded fragment downloads**: Download multiple fragments of m3u8/mpd videos in parallel. Use `--concurrent-fragments` (`-N`) option to set the number of threads used
|
||||
|
||||
* **Aria2c with HLS/DASH**: You can use `aria2c` as the external downloader for DASH(mpd) and HLS(m3u8) formats
|
||||
|
||||
* **New extractors**: AnimeLab, Philo MSO, Rcs, Gedi, bitwave.tv, mildom, audius, zee5, mtv.it, wimtv, pluto.tv, niconico users, discoveryplus.in, mediathek, NFHSNetwork, nebula, ukcolumn, whowatch, MxplayerShow, parlview (au), YoutubeWebArchive, fancode, Saitosan, ShemarooMe, telemundo, VootSeries, SonyLIVSeries, HotstarSeries, VidioPremier, VidioLive
|
||||
* **New extractors**: AnimeLab, Philo MSO, Spectrum MSO, Rcs, Gedi, bitwave.tv, mildom, audius, zee5, mtv.it, wimtv, pluto.tv, niconico users, discoveryplus.in, mediathek, NFHSNetwork, nebula, ukcolumn, whowatch, MxplayerShow, parlview (au), YoutubeWebArchive, fancode, Saitosan, ShemarooMe, telemundo, VootSeries, SonyLIVSeries, HotstarSeries, VidioPremier, VidioLive, RCTIPlus, TBS Live, douyin, pornflip
|
||||
|
||||
* **Fixed extractors**: archive.org, roosterteeth.com, skyit, instagram, itv, SouthparkDe, spreaker, Vlive, akamai, ina, rumble, tennistv, amcnetworks, la7 podcasts, linuxacadamy, nitter, twitcasting, viu, crackle, curiositystream, mediasite, rmcdecouverte, sonyliv, tubi, tenplay, patreon
|
||||
* **Fixed extractors**: archive.org, roosterteeth.com, skyit, instagram, itv, SouthparkDe, spreaker, Vlive, akamai, ina, rumble, tennistv, amcnetworks, la7 podcasts, linuxacadamy, nitter, twitcasting, viu, crackle, curiositystream, mediasite, rmcdecouverte, sonyliv, tubi, tenplay, patreon, videa, yahoo, BravoTV, crunchyroll playlist, RTP, viki
|
||||
|
||||
* **Subtitle extraction from manifests**: Subtitles can be extracted from streaming media manifests. See [commit/be6202f](https://github.com/yt-dlp/yt-dlp/commit/be6202f12b97858b9d716e608394b51065d0419f) for details
|
||||
|
||||
@@ -127,6 +131,7 @@ Some of yt-dlp's default options are different from that of youtube-dl and youtu
|
||||
* `--add-metadata` attaches the `infojson` to `mkv` files in addition to writing the metadata when used with `--write-infojson`. Use `--compat-options no-attach-info-json` to revert this
|
||||
* `playlist_index` behaves differently when used with options like `--playlist-reverse` and `--playlist-items`. See [#302](https://github.com/yt-dlp/yt-dlp/issues/302) for details. You can use `--compat-options playlist-index` if you want to keep the earlier behavior
|
||||
* The output of `-F` is listed in a new format. Use `--compat-options list-formats` to revert this
|
||||
* All *experiences* of a funimation episode are considered as a single video. This behavior breaks existing archives. Use `--compat-options seperate-video-versions` to extract information from only the default player
|
||||
* Youtube live chat (if available) is considered as a subtitle. Use `--sub-langs all,-live_chat` to download all subtitles except live chat. You can also use `--compat-options no-live-chat` to prevent live chat from downloading
|
||||
* Youtube channel URLs are automatically redirected to `/video`. Append a `/featured` to the URL to download only the videos in the home page. If the channel does not have a videos tab, we try to download the equivalent `UU` playlist instead. Also, `/live` URLs raise an error if there are no live videos instead of silently downloading the entire channel. You may use `--compat-options no-youtube-channel-redirect` to revert all these redirections
|
||||
* Unavailable videos are also listed for youtube playlists. Use `--compat-options no-youtube-unavailable-videos` to remove this
|
||||
@@ -183,6 +188,7 @@ While all the other dependancies are optional, `ffmpeg` and `ffprobe` are highly
|
||||
* [**mutagen**](https://github.com/quodlibet/mutagen) - For embedding thumbnail in certain formats. Licenced under [GPLv2+](https://github.com/quodlibet/mutagen/blob/master/COPYING)
|
||||
* [**pycryptodome**](https://github.com/Legrandin/pycryptodome) - For decrypting various data. Licenced under [BSD2](https://github.com/Legrandin/pycryptodome/blob/master/LICENSE.rst)
|
||||
* [**websockets**](https://github.com/aaugustin/websockets) - For downloading over websocket. Licenced under [BSD3](https://github.com/aaugustin/websockets/blob/main/LICENSE)
|
||||
* [**keyring**](https://github.com/jaraco/keyring) - For decrypting cookies of chromium-based browsers on Linux. Licenced under [MIT](https://github.com/jaraco/keyring/blob/main/LICENSE)
|
||||
* [**AtomicParsley**](https://github.com/wez/atomicparsley) - For embedding thumbnail in mp4/m4a if mutagen is not present. Licenced under [GPLv2+](https://github.com/wez/atomicparsley/blob/master/COPYING)
|
||||
* [**rtmpdump**](http://rtmpdump.mplayerhq.hu) - For downloading `rtmp` streams. ffmpeg will be used as a fallback. Licenced under [GPLv2+](http://rtmpdump.mplayerhq.hu)
|
||||
* [**mplayer**](http://mplayerhq.hu/design7/info.html) or [**mpv**](https://mpv.io) - For downloading `rstp` streams. ffmpeg will be used as a fallback. Licenced under [GPLv2+](https://github.com/mpv-player/mpv/blob/master/Copyright)
|
||||
@@ -209,7 +215,7 @@ You can also build the executable without any version info or metadata by using:
|
||||
Note that pyinstaller [does not support](https://github.com/pyinstaller/pyinstaller#requirements-and-tested-platforms) Python installed from the Windows store without using a virtual environment
|
||||
|
||||
**For Unix**:
|
||||
You will need the required build tools: `python`, `make` (GNU), `pandoc`, `zip`, `nosetests`
|
||||
You will need the required build tools: `python`, `make` (GNU), `pandoc`, `zip`, `pytest`
|
||||
Then simply run `make`. You can also run `make yt-dlp` instead to compile only the binary without updating any of the additional files
|
||||
|
||||
**Note**: In either platform, `devscripts\update-version.py` can be used to automatically update the version number
|
||||
@@ -433,7 +439,8 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t
|
||||
--downloader-args NAME:ARGS Give these arguments to the external
|
||||
downloader. Specify the downloader name and
|
||||
the arguments separated by a colon ":". You
|
||||
can use this option multiple times
|
||||
can use this option multiple times to give
|
||||
different arguments to different downloaders
|
||||
(Alias: --external-downloader-args)
|
||||
|
||||
## Filesystem Options:
|
||||
@@ -517,7 +524,19 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t
|
||||
option)
|
||||
--cookies FILE File to read cookies from and dump cookie
|
||||
jar in
|
||||
--no-cookies Do not read/dump cookies (default)
|
||||
--no-cookies Do not read/dump cookies from/to file
|
||||
(default)
|
||||
--cookies-from-browser BROWSER[:PROFILE]
|
||||
Load cookies from a user profile of the
|
||||
given web browser. Currently supported
|
||||
browsers are: brave|chrome|chromium|edge|fi
|
||||
refox|opera|safari|vivaldi. You can specify
|
||||
the user profile name or directory using
|
||||
"BROWSER:PROFILE_NAME" or
|
||||
"BROWSER:PROFILE_PATH". If no profile is
|
||||
given, the most recently accessed one is
|
||||
used
|
||||
--no-cookies-from-browser Do not load cookies from browser (default)
|
||||
--cache-dir DIR Location in the filesystem where youtube-dl
|
||||
can store some downloaded information (such
|
||||
as client ids and signatures) permanently.
|
||||
@@ -635,7 +654,9 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t
|
||||
--no-prefer-free-formats Don't give any special preference to free
|
||||
containers (default)
|
||||
--check-formats Check that the formats selected are
|
||||
actually downloadable (Experimental)
|
||||
actually downloadable
|
||||
--no-check-formats Do not check that the formats selected are
|
||||
actually downloadable
|
||||
-F, --list-formats List all available formats of requested
|
||||
videos
|
||||
--merge-output-format FORMAT If a merge is required (e.g.
|
||||
@@ -770,6 +791,8 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t
|
||||
downloaded file is also available. If no
|
||||
fields are passed, "%(filepath)s" is
|
||||
appended to the end of the command
|
||||
--exec-before-download CMD Execute a command before the actual
|
||||
download. The syntax is the same as --exec
|
||||
--convert-subs FORMAT Convert the subtitles to another format
|
||||
(currently supported: srt|vtt|ass|lrc)
|
||||
(Alias: --convert-subtitles)
|
||||
@@ -816,18 +839,10 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t
|
||||
--no-hls-split-discontinuity Do not split HLS playlists to different
|
||||
formats at discontinuities such as ad
|
||||
breaks (default)
|
||||
--youtube-include-dash-manifest Download the DASH manifests and related
|
||||
data on YouTube videos (default)
|
||||
(Alias: --no-youtube-skip-dash-manifest)
|
||||
--youtube-skip-dash-manifest Do not download the DASH manifests and
|
||||
related data on YouTube videos
|
||||
(Alias: --no-youtube-include-dash-manifest)
|
||||
--youtube-include-hls-manifest Download the HLS manifests and related data
|
||||
on YouTube videos (default)
|
||||
(Alias: --no-youtube-skip-hls-manifest)
|
||||
--youtube-skip-hls-manifest Do not download the HLS manifests and
|
||||
related data on YouTube videos
|
||||
(Alias: --no-youtube-include-hls-manifest)
|
||||
--extractor-args KEY:ARGS Pass these arguments to the extractor. See
|
||||
"EXTRACTOR ARGUMENTS" for details. You can
|
||||
use this option multiple times to give
|
||||
arguments for different extractors
|
||||
|
||||
# CONFIGURATION
|
||||
|
||||
@@ -942,6 +957,7 @@ The available fields are:
|
||||
- `average_rating` (numeric): Average rating give by users, the scale used depends on the webpage
|
||||
- `comment_count` (numeric): Number of comments on the video (For some extractors, comments are only downloaded at the end, and so this field cannot be used)
|
||||
- `age_limit` (numeric): Age restriction for the video (years)
|
||||
- `live_status` (string): One of 'is_live', 'was_live', 'upcoming', 'not_live'
|
||||
- `is_live` (boolean): Whether this video is a live stream or a fixed-length video
|
||||
- `was_live` (boolean): Whether this video was originally a live stream
|
||||
- `playable_in_embed` (string): Whether this video is allowed to play in embedded players on other sites
|
||||
@@ -1021,7 +1037,7 @@ Available only when used in `--print`:
|
||||
|
||||
Each aforementioned sequence when referenced in an output template will be replaced by the actual value corresponding to the sequence name. Note that some of the sequences are not guaranteed to be present since they depend on the metadata obtained by a particular extractor. Such sequences will be replaced with placeholder value provided with `--output-na-placeholder` (`NA` by default).
|
||||
|
||||
For example for `-o %(title)s-%(id)s.%(ext)s` and an mp4 video with title `yt-dlp test video` and id `BaW_jenozKcj`, this will result in a `yt-dlp test video-BaW_jenozKcj.mp4` file created in the current directory.
|
||||
For example for `-o %(title)s-%(id)s.%(ext)s` and an mp4 video with title `yt-dlp test video` and id `BaW_jenozKc`, this will result in a `yt-dlp test video-BaW_jenozKc.mp4` file created in the current directory.
|
||||
|
||||
For numeric sequences you can use numeric related formatting, for example, `%(view_count)05d` will result in a string with view count padded with zeros up to 5 characters, like in `00042`.
|
||||
|
||||
@@ -1105,7 +1121,7 @@ If you want to download multiple videos and they don't have the same formats ava
|
||||
|
||||
If you want to download several formats of the same video use a comma as a separator, e.g. `-f 22,17,18` will download all these three formats, of course if they are available. Or a more sophisticated example combined with the precedence feature: `-f 136/137/mp4/bestvideo,140/m4a/bestaudio`.
|
||||
|
||||
You can merge the video and audio of multiple formats into a single file using `-f <format1>+<format2>+...` (requires ffmpeg installed), for example `-f bestvideo+bestaudio` will download the best video-only format, the best audio-only format and mux them together with ffmpeg. If `--no-video-multistreams` is used, all formats with a video stream except the first one are ignored. Similarly, if `--no-audio-multistreams` is used, all formats with an audio stream except the first one are ignored. For example, `-f bestvideo+best+bestaudio` will download and merge all 3 given formats. The resulting file will have 2 video streams and 2 audio streams. But `-f bestvideo+best+bestaudio --no-video-multistreams` will download and merge only `bestvideo` and `bestaudio`. `best` is ignored since another format containing a video stream (`bestvideo`) has already been selected. The order of the formats is therefore important. `-f best+bestaudio --no-audio-multistreams` will download and merge both formats while `-f bestaudio+best --no-audio-multistreams` will ignore `best` and download only `bestaudio`.
|
||||
You can merge the video and audio of multiple formats into a single file using `-f <format1>+<format2>+...` (requires ffmpeg installed), for example `-f bestvideo+bestaudio` will download the best video-only format, the best audio-only format and mux them together with ffmpeg. Unless `--video-multistreams` is used, all formats with a video stream except the first one are ignored. Similarly, unless `--audio-multistreams` is used, all formats with an audio stream except the first one are ignored. For example, `-f bestvideo+best+bestaudio --video-multistreams --audio-multistreams` will download and merge all 3 given formats. The resulting file will have 2 video streams and 2 audio streams. But `-f bestvideo+best+bestaudio --no-video-multistreams` will download and merge only `bestvideo` and `bestaudio`. `best` is ignored since another format containing a video stream (`bestvideo`) has already been selected. The order of the formats is therefore important. `-f best+bestaudio --no-audio-multistreams` will download and merge both formats while `-f bestaudio+best --no-audio-multistreams` will ignore `best` and download only `bestaudio`.
|
||||
|
||||
## Filtering Formats
|
||||
|
||||
@@ -1331,6 +1347,29 @@ $ yt-dlp --parse-metadata 'description:(?s)(?P<meta_comment>.+)' --add-metadata
|
||||
|
||||
```
|
||||
|
||||
# EXTRACTOR ARGUMENTS
|
||||
|
||||
Some extractors accept additional arguments which can be passed using `--extractor-args KEY:ARGS`. `ARGS` is a `;` (semicolon) seperated string of `ARG=VAL1,VAL2`. Eg: `--extractor-args "youtube:skip=dash,hls;player_client=android" --extractor-args "funimation:version=uncut"`
|
||||
|
||||
The following extractors use this feature:
|
||||
* **youtube**
|
||||
* `skip`: `hls` or `dash` (or both) to skip download of the respective manifests
|
||||
* `player_client`: Clients to extract video data from - one or more of `web`, `android`, `ios`, `web_music`, `android_music`, `ios_music`. By default, `android,web` is used. If the URL is from `music.youtube.com`, `android,web,android_music,web_music` is used
|
||||
* `player_skip`: `configs` - skip any requests for client configs and use defaults
|
||||
* `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side).
|
||||
* `max_comments`: maximum amount of comments to download (default all).
|
||||
* `max_comment_depth`: maximum depth for nested comments. YouTube supports depths 1 or 2 (default).
|
||||
|
||||
* **funimation**
|
||||
* `language`: Languages to extract. Eg: `funimation:language=english,japanese`
|
||||
* `version`: The video version to extract - `uncut` or `simulcast`
|
||||
|
||||
* **vikiChannel**
|
||||
* `video_types`: Types of videos to download - one or more of `episodes`, `movies`, `clips`, `trailers`
|
||||
|
||||
NOTE: These options may be changed/removed in the future without concern for backward compatibility
|
||||
|
||||
|
||||
# PLUGINS
|
||||
|
||||
Plugins are loaded from `<root-dir>/ytdlp_plugins/<type>/__init__.py`. Currently only `extractor` plugins are supported. Support for `downloader` and `postprocessor` plugins may be added in the future. See [ytdlp_plugins](ytdlp_plugins) for example.
|
||||
@@ -1362,6 +1401,10 @@ While these options still work, their use is not recommended since there are oth
|
||||
--list-formats-old --compat-options list-formats (Alias: --no-list-formats-as-table)
|
||||
--list-formats-as-table --compat-options -list-formats [Default] (Alias: --no-list-formats-old)
|
||||
--sponskrub-args ARGS --ppa "sponskrub:ARGS"
|
||||
--youtube-skip-dash-manifest --extractor-args "youtube:skip=dash" (Alias: --no-youtube-include-dash-manifest)
|
||||
--youtube-skip-hls-manifest --extractor-args "youtube:skip=hls" (Alias: --no-youtube-include-hls-manifest)
|
||||
--youtube-include-dash-manifest Default (Alias: --no-youtube-skip-dash-manifest)
|
||||
--youtube-include-hls-manifest Default (Alias: --no-youtube-skip-hls-manifest)
|
||||
--test Used by developers for testing extractors. Not intended for the end user
|
||||
--youtube-print-sig-code Used for testing youtube signatures
|
||||
|
||||
|
||||
@@ -1,17 +1,16 @@
|
||||
@setlocal
|
||||
@echo off
|
||||
cd /d %~dp0..
|
||||
|
||||
rem Keep this list in sync with the `offlinetest` target in Makefile
|
||||
set DOWNLOAD_TESTS="age_restriction^|download^|iqiyi_sdk_interpreter^|socks^|subtitles^|write_annotations^|youtube_lists^|youtube_signature^|post_hooks"
|
||||
|
||||
if "%YTDL_TEST_SET%" == "core" (
|
||||
set test_set="-I test_("%DOWNLOAD_TESTS%")\.py"
|
||||
set multiprocess_args=""
|
||||
) else if "%YTDL_TEST_SET%" == "download" (
|
||||
set test_set="-I test_(?!"%DOWNLOAD_TESTS%").+\.py"
|
||||
set multiprocess_args="--processes=4 --process-timeout=540"
|
||||
if ["%~1"]==[""] (
|
||||
set "test_set="
|
||||
) else if ["%~1"]==["core"] (
|
||||
set "test_set=-k "not download""
|
||||
) else if ["%~1"]==["download"] (
|
||||
set "test_set=-k download"
|
||||
) else (
|
||||
echo YTDL_TEST_SET is not set or invalid
|
||||
echo.Invalid test type "%~1". Use "core" ^| "download"
|
||||
exit /b 1
|
||||
)
|
||||
|
||||
nosetests test --verbose %test_set:"=% %multiprocess_args:"=%
|
||||
pytest %test_set%
|
||||
|
||||
@@ -1,22 +1,15 @@
|
||||
#!/bin/bash
|
||||
#!/bin/sh
|
||||
|
||||
# Keep this list in sync with the `offlinetest` target in Makefile
|
||||
DOWNLOAD_TESTS="age_restriction|download|iqiyi_sdk_interpreter|overwrites|socks|subtitles|write_annotations|youtube_lists|youtube_signature|post_hooks"
|
||||
if [ -z $1 ]; then
|
||||
test_set='test'
|
||||
elif [ $1 = 'core' ]; then
|
||||
test_set='not download'
|
||||
elif [ $1 = 'download' ]; then
|
||||
test_set='download'
|
||||
else
|
||||
echo 'Invalid test type "'$1'". Use "core" | "download"'
|
||||
exit 1
|
||||
fi
|
||||
|
||||
test_set=""
|
||||
multiprocess_args=""
|
||||
|
||||
case "$YTDL_TEST_SET" in
|
||||
core)
|
||||
test_set="-I test_($DOWNLOAD_TESTS)\.py"
|
||||
;;
|
||||
download)
|
||||
test_set="-I test_(?!$DOWNLOAD_TESTS).+\.py"
|
||||
multiprocess_args="--processes=4 --process-timeout=540"
|
||||
;;
|
||||
*)
|
||||
break
|
||||
;;
|
||||
esac
|
||||
|
||||
nosetests test --verbose $test_set $multiprocess_args
|
||||
echo python3 -m pytest -k $test_set
|
||||
python3 -m pytest -k "$test_set"
|
||||
|
||||
4
pytest.ini
Normal file
4
pytest.ini
Normal file
@@ -0,0 +1,4 @@
|
||||
[pytest]
|
||||
addopts = -ra -v --strict-markers
|
||||
markers =
|
||||
download
|
||||
@@ -259,6 +259,7 @@
|
||||
- **dlive:vod**
|
||||
- **DoodStream**
|
||||
- **Dotsub**
|
||||
- **Douyin**
|
||||
- **DouyuShow**
|
||||
- **DouyuTV**: 斗鱼
|
||||
- **DPlay**
|
||||
@@ -306,6 +307,7 @@
|
||||
- **EyedoTV**
|
||||
- **facebook**
|
||||
- **FacebookPluginsVideo**
|
||||
- **fancode:live**
|
||||
- **fancode:vod**
|
||||
- **faz.net**
|
||||
- **fc2**
|
||||
@@ -343,6 +345,8 @@
|
||||
- **FrontendMastersLesson**
|
||||
- **FujiTVFODPlus7**
|
||||
- **Funimation**
|
||||
- **funimation:page**
|
||||
- **funimation:show**
|
||||
- **Funk**
|
||||
- **Fusion**
|
||||
- **Fux**
|
||||
@@ -766,6 +770,7 @@
|
||||
- **PopcornTV**
|
||||
- **PornCom**
|
||||
- **PornerBros**
|
||||
- **PornFlip**
|
||||
- **PornHd**
|
||||
- **PornHub**: PornHub and Thumbzilla
|
||||
- **PornHubPagedVideoList**
|
||||
@@ -808,6 +813,8 @@
|
||||
- **RCS**
|
||||
- **RCSEmbeds**
|
||||
- **RCSVarious**
|
||||
- **RCTIPlus**
|
||||
- **RCTIPlusSeries**
|
||||
- **RDS**: RDS.ca
|
||||
- **RedBull**
|
||||
- **RedBullEmbed**
|
||||
|
||||
@@ -22,6 +22,14 @@ from yt_dlp.utils import (
|
||||
)
|
||||
|
||||
|
||||
if "pytest" in sys.modules:
|
||||
import pytest
|
||||
is_download_test = pytest.mark.download
|
||||
else:
|
||||
def is_download_test(testClass):
|
||||
return testClass
|
||||
|
||||
|
||||
def get_params(override=None):
|
||||
PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)),
|
||||
"parameters.json")
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
{
|
||||
"check_formats": false,
|
||||
"consoletitle": false,
|
||||
"continuedl": true,
|
||||
"forcedescription": false,
|
||||
|
||||
@@ -35,13 +35,13 @@ class InfoExtractorTestRequestHandler(compat_http_server.BaseHTTPRequestHandler)
|
||||
assert False
|
||||
|
||||
|
||||
class TestIE(InfoExtractor):
|
||||
class DummyIE(InfoExtractor):
|
||||
pass
|
||||
|
||||
|
||||
class TestInfoExtractor(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.ie = TestIE(FakeYDL())
|
||||
self.ie = DummyIE(FakeYDL())
|
||||
|
||||
def test_ie_key(self):
|
||||
self.assertEqual(get_info_extractor(YoutubeIE.ie_key()), YoutubeIE)
|
||||
|
||||
@@ -35,6 +35,9 @@ class YDL(FakeYDL):
|
||||
def to_screen(self, msg):
|
||||
self.msgs.append(msg)
|
||||
|
||||
def dl(self, *args, **kwargs):
|
||||
assert False, 'Downloader must not be invoked for test_YoutubeDL'
|
||||
|
||||
|
||||
def _make_result(formats, **kwargs):
|
||||
res = {
|
||||
@@ -117,35 +120,24 @@ class TestFormatSelection(unittest.TestCase):
|
||||
]
|
||||
info_dict = _make_result(formats)
|
||||
|
||||
ydl = YDL({'format': '20/47'})
|
||||
ydl.process_ie_result(info_dict.copy())
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], '47')
|
||||
def test(inp, *expected, multi=False):
|
||||
ydl = YDL({
|
||||
'format': inp,
|
||||
'allow_multiple_video_streams': multi,
|
||||
'allow_multiple_audio_streams': multi,
|
||||
})
|
||||
ydl.process_ie_result(info_dict.copy())
|
||||
downloaded = map(lambda x: x['format_id'], ydl.downloaded_info_dicts)
|
||||
self.assertEqual(list(downloaded), list(expected))
|
||||
|
||||
ydl = YDL({'format': '20/71/worst'})
|
||||
ydl.process_ie_result(info_dict.copy())
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], '35')
|
||||
|
||||
ydl = YDL()
|
||||
ydl.process_ie_result(info_dict.copy())
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], '2')
|
||||
|
||||
ydl = YDL({'format': 'webm/mp4'})
|
||||
ydl.process_ie_result(info_dict.copy())
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], '47')
|
||||
|
||||
ydl = YDL({'format': '3gp/40/mp4'})
|
||||
ydl.process_ie_result(info_dict.copy())
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], '35')
|
||||
|
||||
ydl = YDL({'format': 'example-with-dashes'})
|
||||
ydl.process_ie_result(info_dict.copy())
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], 'example-with-dashes')
|
||||
test('20/47', '47')
|
||||
test('20/71/worst', '35')
|
||||
test(None, '2')
|
||||
test('webm/mp4', '47')
|
||||
test('3gp/40/mp4', '35')
|
||||
test('example-with-dashes', 'example-with-dashes')
|
||||
test('all', '35', 'example-with-dashes', '45', '47', '2') # Order doesn't actually matter for this
|
||||
test('mergeall', '2+47+45+example-with-dashes+35', multi=True)
|
||||
|
||||
def test_format_selection_audio(self):
|
||||
formats = [
|
||||
|
||||
@@ -7,8 +7,7 @@ import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from test.helper import try_rm
|
||||
|
||||
from test.helper import try_rm, is_download_test
|
||||
|
||||
from yt_dlp import YoutubeDL
|
||||
|
||||
@@ -32,6 +31,7 @@ def _download_restricted(url, filename, age):
|
||||
return res
|
||||
|
||||
|
||||
@is_download_test
|
||||
class TestAgeRestriction(unittest.TestCase):
|
||||
def _assert_restricted(self, url, filename, age, old_age=None):
|
||||
self.assertTrue(_download_restricted(url, filename, old_age))
|
||||
|
||||
96
test/test_cookies.py
Normal file
96
test/test_cookies.py
Normal file
@@ -0,0 +1,96 @@
|
||||
import unittest
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from yt_dlp import cookies
|
||||
from yt_dlp.cookies import (
|
||||
CRYPTO_AVAILABLE,
|
||||
LinuxChromeCookieDecryptor,
|
||||
MacChromeCookieDecryptor,
|
||||
WindowsChromeCookieDecryptor,
|
||||
YDLLogger,
|
||||
parse_safari_cookies,
|
||||
pbkdf2_sha1,
|
||||
)
|
||||
|
||||
|
||||
class MonkeyPatch:
|
||||
def __init__(self, module, temporary_values):
|
||||
self._module = module
|
||||
self._temporary_values = temporary_values
|
||||
self._backup_values = {}
|
||||
|
||||
def __enter__(self):
|
||||
for name, temp_value in self._temporary_values.items():
|
||||
self._backup_values[name] = getattr(self._module, name)
|
||||
setattr(self._module, name, temp_value)
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
for name, backup_value in self._backup_values.items():
|
||||
setattr(self._module, name, backup_value)
|
||||
|
||||
|
||||
class TestCookies(unittest.TestCase):
|
||||
def test_chrome_cookie_decryptor_linux_derive_key(self):
|
||||
key = LinuxChromeCookieDecryptor.derive_key(b'abc')
|
||||
self.assertEqual(key, b'7\xa1\xec\xd4m\xfcA\xc7\xb19Z\xd0\x19\xdcM\x17')
|
||||
|
||||
def test_chrome_cookie_decryptor_mac_derive_key(self):
|
||||
key = MacChromeCookieDecryptor.derive_key(b'abc')
|
||||
self.assertEqual(key, b'Y\xe2\xc0\xd0P\xf6\xf4\xe1l\xc1\x8cQ\xcb|\xcdY')
|
||||
|
||||
def test_chrome_cookie_decryptor_linux_v10(self):
|
||||
with MonkeyPatch(cookies, {'_get_linux_keyring_password': lambda *args, **kwargs: b''}):
|
||||
encrypted_value = b'v10\xccW%\xcd\xe6\xe6\x9fM" \xa7\xb0\xca\xe4\x07\xd6'
|
||||
value = 'USD'
|
||||
decryptor = LinuxChromeCookieDecryptor('Chrome', YDLLogger())
|
||||
self.assertEqual(decryptor.decrypt(encrypted_value), value)
|
||||
|
||||
def test_chrome_cookie_decryptor_linux_v11(self):
|
||||
with MonkeyPatch(cookies, {'_get_linux_keyring_password': lambda *args, **kwargs: b'',
|
||||
'KEYRING_AVAILABLE': True}):
|
||||
encrypted_value = b'v11#\x81\x10>`w\x8f)\xc0\xb2\xc1\r\xf4\x1al\xdd\x93\xfd\xf8\xf8N\xf2\xa9\x83\xf1\xe9o\x0elVQd'
|
||||
value = 'tz=Europe.London'
|
||||
decryptor = LinuxChromeCookieDecryptor('Chrome', YDLLogger())
|
||||
self.assertEqual(decryptor.decrypt(encrypted_value), value)
|
||||
|
||||
@unittest.skipIf(not CRYPTO_AVAILABLE, 'cryptography library not available')
|
||||
def test_chrome_cookie_decryptor_windows_v10(self):
|
||||
with MonkeyPatch(cookies, {
|
||||
'_get_windows_v10_key': lambda *args, **kwargs: b'Y\xef\xad\xad\xeerp\xf0Y\xe6\x9b\x12\xc2<z\x16]\n\xbb\xb8\xcb\xd7\x9bA\xc3\x14e\x99{\xd6\xf4&'
|
||||
}):
|
||||
encrypted_value = b'v10T\xb8\xf3\xb8\x01\xa7TtcV\xfc\x88\xb8\xb8\xef\x05\xb5\xfd\x18\xc90\x009\xab\xb1\x893\x85)\x87\xe1\xa9-\xa3\xad='
|
||||
value = '32101439'
|
||||
decryptor = WindowsChromeCookieDecryptor('', YDLLogger())
|
||||
self.assertEqual(decryptor.decrypt(encrypted_value), value)
|
||||
|
||||
def test_chrome_cookie_decryptor_mac_v10(self):
|
||||
with MonkeyPatch(cookies, {'_get_mac_keyring_password': lambda *args, **kwargs: b'6eIDUdtKAacvlHwBVwvg/Q=='}):
|
||||
encrypted_value = b'v10\xb3\xbe\xad\xa1[\x9fC\xa1\x98\xe0\x9a\x01\xd9\xcf\xbfc'
|
||||
value = '2021-06-01-22'
|
||||
decryptor = MacChromeCookieDecryptor('', YDLLogger())
|
||||
self.assertEqual(decryptor.decrypt(encrypted_value), value)
|
||||
|
||||
def test_safari_cookie_parsing(self):
|
||||
cookies = \
|
||||
b'cook\x00\x00\x00\x01\x00\x00\x00i\x00\x00\x01\x00\x01\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00Y' \
|
||||
b'\x00\x00\x00\x00\x00\x00\x00 \x00\x00\x00\x00\x00\x00\x008\x00\x00\x00B\x00\x00\x00F\x00\x00\x00H' \
|
||||
b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x80\x03\xa5>\xc3A\x00\x00\x80\xc3\x07:\xc3A' \
|
||||
b'localhost\x00foo\x00/\x00test%20%3Bcookie\x00\x00\x00\x054\x07\x17 \x05\x00\x00\x00Kbplist00\xd1\x01' \
|
||||
b'\x02_\x10\x18NSHTTPCookieAcceptPolicy\x10\x02\x08\x0b&\x00\x00\x00\x00\x00\x00\x01\x01\x00\x00\x00' \
|
||||
b'\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00('
|
||||
|
||||
jar = parse_safari_cookies(cookies)
|
||||
self.assertEqual(len(jar), 1)
|
||||
cookie = list(jar)[0]
|
||||
self.assertEqual(cookie.domain, 'localhost')
|
||||
self.assertEqual(cookie.port, None)
|
||||
self.assertEqual(cookie.path, '/')
|
||||
self.assertEqual(cookie.name, 'foo')
|
||||
self.assertEqual(cookie.value, 'test%20%3Bcookie')
|
||||
self.assertFalse(cookie.secure)
|
||||
expected_expiration = datetime(2021, 6, 18, 21, 39, 19, tzinfo=timezone.utc)
|
||||
self.assertEqual(cookie.expires, int(expected_expiration.timestamp()))
|
||||
|
||||
def test_pbkdf2_sha1(self):
|
||||
key = pbkdf2_sha1(b'peanuts', b' ' * 16, 1, 16)
|
||||
self.assertEqual(key, b'g\xe1\x8e\x0fQ\x1c\x9b\xf3\xc9`!\xaa\x90\xd9\xd34')
|
||||
@@ -10,12 +10,13 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from test.helper import (
|
||||
assertGreaterEqual,
|
||||
expect_info_dict,
|
||||
expect_warnings,
|
||||
get_params,
|
||||
gettestcases,
|
||||
expect_info_dict,
|
||||
try_rm,
|
||||
is_download_test,
|
||||
report_warning,
|
||||
try_rm,
|
||||
)
|
||||
|
||||
|
||||
@@ -64,6 +65,7 @@ def _file_md5(fn):
|
||||
defs = gettestcases()
|
||||
|
||||
|
||||
@is_download_test
|
||||
class TestDownload(unittest.TestCase):
|
||||
# Parallel testing in nosetests. See
|
||||
# http://nose.readthedocs.org/en/latest/doc_tests/test_multiprocess/multiprocess.html
|
||||
|
||||
@@ -8,7 +8,7 @@ import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from test.helper import FakeYDL
|
||||
from test.helper import FakeYDL, is_download_test
|
||||
from yt_dlp.extractor import IqiyiIE
|
||||
|
||||
|
||||
@@ -31,6 +31,7 @@ class WarningLogger(object):
|
||||
pass
|
||||
|
||||
|
||||
@is_download_test
|
||||
class TestIqiyiSDKInterpreter(unittest.TestCase):
|
||||
def test_iqiyi_sdk_interpreter(self):
|
||||
'''
|
||||
|
||||
@@ -7,7 +7,7 @@ import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from test.helper import get_params, try_rm
|
||||
from test.helper import get_params, try_rm, is_download_test
|
||||
import yt_dlp.YoutubeDL
|
||||
from yt_dlp.utils import DownloadError
|
||||
|
||||
@@ -22,6 +22,7 @@ TEST_ID = 'gr51aVj-mLg'
|
||||
EXPECTED_NAME = 'gr51aVj-mLg'
|
||||
|
||||
|
||||
@is_download_test
|
||||
class TestPostHooks(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.stored_name_1 = None
|
||||
|
||||
@@ -14,6 +14,7 @@ import subprocess
|
||||
from test.helper import (
|
||||
FakeYDL,
|
||||
get_params,
|
||||
is_download_test,
|
||||
)
|
||||
from yt_dlp.compat import (
|
||||
compat_str,
|
||||
@@ -21,6 +22,7 @@ from yt_dlp.compat import (
|
||||
)
|
||||
|
||||
|
||||
@is_download_test
|
||||
class TestMultipleSocks(unittest.TestCase):
|
||||
@staticmethod
|
||||
def _check_params(attrs):
|
||||
@@ -76,6 +78,7 @@ class TestMultipleSocks(unittest.TestCase):
|
||||
params['secondary_server_ip'])
|
||||
|
||||
|
||||
@is_download_test
|
||||
class TestSocks(unittest.TestCase):
|
||||
_SKIP_SOCKS_TEST = True
|
||||
|
||||
|
||||
@@ -7,7 +7,7 @@ import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from test.helper import FakeYDL, md5
|
||||
from test.helper import FakeYDL, md5, is_download_test
|
||||
|
||||
|
||||
from yt_dlp.extractor import (
|
||||
@@ -30,6 +30,7 @@ from yt_dlp.extractor import (
|
||||
)
|
||||
|
||||
|
||||
@is_download_test
|
||||
class BaseTestSubtitles(unittest.TestCase):
|
||||
url = None
|
||||
IE = None
|
||||
@@ -55,6 +56,7 @@ class BaseTestSubtitles(unittest.TestCase):
|
||||
return dict((l, sub_info['data']) for l, sub_info in subtitles.items())
|
||||
|
||||
|
||||
@is_download_test
|
||||
class TestYoutubeSubtitles(BaseTestSubtitles):
|
||||
url = 'QRS8MkLhQmM'
|
||||
IE = YoutubeIE
|
||||
@@ -111,6 +113,7 @@ class TestYoutubeSubtitles(BaseTestSubtitles):
|
||||
self.assertFalse(subtitles)
|
||||
|
||||
|
||||
@is_download_test
|
||||
class TestDailymotionSubtitles(BaseTestSubtitles):
|
||||
url = 'http://www.dailymotion.com/video/xczg00'
|
||||
IE = DailymotionIE
|
||||
@@ -134,6 +137,7 @@ class TestDailymotionSubtitles(BaseTestSubtitles):
|
||||
self.assertFalse(subtitles)
|
||||
|
||||
|
||||
@is_download_test
|
||||
class TestTedSubtitles(BaseTestSubtitles):
|
||||
url = 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html'
|
||||
IE = TEDIE
|
||||
@@ -149,6 +153,7 @@ class TestTedSubtitles(BaseTestSubtitles):
|
||||
self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
|
||||
|
||||
|
||||
@is_download_test
|
||||
class TestVimeoSubtitles(BaseTestSubtitles):
|
||||
url = 'http://vimeo.com/76979871'
|
||||
IE = VimeoIE
|
||||
@@ -170,6 +175,7 @@ class TestVimeoSubtitles(BaseTestSubtitles):
|
||||
self.assertFalse(subtitles)
|
||||
|
||||
|
||||
@is_download_test
|
||||
class TestWallaSubtitles(BaseTestSubtitles):
|
||||
url = 'http://vod.walla.co.il/movie/2705958/the-yes-men'
|
||||
IE = WallaIE
|
||||
@@ -191,6 +197,7 @@ class TestWallaSubtitles(BaseTestSubtitles):
|
||||
self.assertFalse(subtitles)
|
||||
|
||||
|
||||
@is_download_test
|
||||
class TestCeskaTelevizeSubtitles(BaseTestSubtitles):
|
||||
url = 'http://www.ceskatelevize.cz/ivysilani/10600540290-u6-uzasny-svet-techniky'
|
||||
IE = CeskaTelevizeIE
|
||||
@@ -212,6 +219,7 @@ class TestCeskaTelevizeSubtitles(BaseTestSubtitles):
|
||||
self.assertFalse(subtitles)
|
||||
|
||||
|
||||
@is_download_test
|
||||
class TestLyndaSubtitles(BaseTestSubtitles):
|
||||
url = 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html'
|
||||
IE = LyndaIE
|
||||
@@ -224,6 +232,7 @@ class TestLyndaSubtitles(BaseTestSubtitles):
|
||||
self.assertEqual(md5(subtitles['en']), '09bbe67222259bed60deaa26997d73a7')
|
||||
|
||||
|
||||
@is_download_test
|
||||
class TestNPOSubtitles(BaseTestSubtitles):
|
||||
url = 'http://www.npo.nl/nos-journaal/28-08-2014/POW_00722860'
|
||||
IE = NPOIE
|
||||
@@ -236,6 +245,7 @@ class TestNPOSubtitles(BaseTestSubtitles):
|
||||
self.assertEqual(md5(subtitles['nl']), 'fc6435027572b63fb4ab143abd5ad3f4')
|
||||
|
||||
|
||||
@is_download_test
|
||||
class TestMTVSubtitles(BaseTestSubtitles):
|
||||
url = 'http://www.cc.com/video-clips/p63lk0/adam-devine-s-house-party-chasing-white-swans'
|
||||
IE = ComedyCentralIE
|
||||
@@ -251,6 +261,7 @@ class TestMTVSubtitles(BaseTestSubtitles):
|
||||
self.assertEqual(md5(subtitles['en']), '78206b8d8a0cfa9da64dc026eea48961')
|
||||
|
||||
|
||||
@is_download_test
|
||||
class TestNRKSubtitles(BaseTestSubtitles):
|
||||
url = 'http://tv.nrk.no/serie/ikke-gjoer-dette-hjemme/DMPV73000411/sesong-2/episode-1'
|
||||
IE = NRKTVIE
|
||||
@@ -263,6 +274,7 @@ class TestNRKSubtitles(BaseTestSubtitles):
|
||||
self.assertEqual(md5(subtitles['no']), '544fa917d3197fcbee64634559221cc2')
|
||||
|
||||
|
||||
@is_download_test
|
||||
class TestRaiPlaySubtitles(BaseTestSubtitles):
|
||||
IE = RaiPlayIE
|
||||
|
||||
@@ -283,6 +295,7 @@ class TestRaiPlaySubtitles(BaseTestSubtitles):
|
||||
self.assertEqual(md5(subtitles['it']), '4b3264186fbb103508abe5311cfcb9cd')
|
||||
|
||||
|
||||
@is_download_test
|
||||
class TestVikiSubtitles(BaseTestSubtitles):
|
||||
url = 'http://www.viki.com/videos/1060846v-punch-episode-18'
|
||||
IE = VikiIE
|
||||
@@ -295,6 +308,7 @@ class TestVikiSubtitles(BaseTestSubtitles):
|
||||
self.assertEqual(md5(subtitles['en']), '53cb083a5914b2d84ef1ab67b880d18a')
|
||||
|
||||
|
||||
@is_download_test
|
||||
class TestThePlatformSubtitles(BaseTestSubtitles):
|
||||
# from http://www.3playmedia.com/services-features/tools/integrations/theplatform/
|
||||
# (see http://theplatform.com/about/partners/type/subtitles-closed-captioning/)
|
||||
@@ -309,6 +323,7 @@ class TestThePlatformSubtitles(BaseTestSubtitles):
|
||||
self.assertEqual(md5(subtitles['en']), '97e7670cbae3c4d26ae8bcc7fdd78d4b')
|
||||
|
||||
|
||||
@is_download_test
|
||||
class TestThePlatformFeedSubtitles(BaseTestSubtitles):
|
||||
url = 'http://feed.theplatform.com/f/7wvmTC/msnbc_video-p-test?form=json&pretty=true&range=-40&byGuid=n_hardball_5biden_140207'
|
||||
IE = ThePlatformFeedIE
|
||||
@@ -321,6 +336,7 @@ class TestThePlatformFeedSubtitles(BaseTestSubtitles):
|
||||
self.assertEqual(md5(subtitles['en']), '48649a22e82b2da21c9a67a395eedade')
|
||||
|
||||
|
||||
@is_download_test
|
||||
class TestRtveSubtitles(BaseTestSubtitles):
|
||||
url = 'http://www.rtve.es/alacarta/videos/los-misterios-de-laura/misterios-laura-capitulo-32-misterio-del-numero-17-2-parte/2428621/'
|
||||
IE = RTVEALaCartaIE
|
||||
@@ -335,6 +351,7 @@ class TestRtveSubtitles(BaseTestSubtitles):
|
||||
self.assertEqual(md5(subtitles['es']), '69e70cae2d40574fb7316f31d6eb7fca')
|
||||
|
||||
|
||||
@is_download_test
|
||||
class TestDemocracynowSubtitles(BaseTestSubtitles):
|
||||
url = 'http://www.democracynow.org/shows/2015/7/3'
|
||||
IE = DemocracynowIE
|
||||
|
||||
@@ -1054,6 +1054,9 @@ class TestUtil(unittest.TestCase):
|
||||
on = js_to_json('{ "040": "040" }')
|
||||
self.assertEqual(json.loads(on), {'040': '040'})
|
||||
|
||||
on = js_to_json('[1,//{},\n2]')
|
||||
self.assertEqual(json.loads(on), [1, 2])
|
||||
|
||||
def test_js_to_json_malformed(self):
|
||||
self.assertEqual(js_to_json('42a1'), '42"a1"')
|
||||
self.assertEqual(js_to_json('42a-1'), '42"a"-1')
|
||||
@@ -1545,8 +1548,8 @@ Line 1
|
||||
self.assertEqual(repr(LazyList(it)), repr(it))
|
||||
self.assertEqual(str(LazyList(it)), str(it))
|
||||
|
||||
self.assertEqual(list(reversed(LazyList(it))), it[::-1])
|
||||
self.assertEqual(list(reversed(LazyList(it))[1:3:7]), it[::-1][1:3:7])
|
||||
self.assertEqual(list(LazyList(it).reverse()), it[::-1])
|
||||
self.assertEqual(list(LazyList(it).reverse()[1:3:7]), it[::-1][1:3:7])
|
||||
|
||||
def test_LazyList_laziness(self):
|
||||
|
||||
@@ -1559,13 +1562,13 @@ Line 1
|
||||
test(ll, 5, 5, range(6))
|
||||
test(ll, -3, 7, range(10))
|
||||
|
||||
ll = reversed(LazyList(range(10)))
|
||||
ll = LazyList(range(10)).reverse()
|
||||
test(ll, -1, 0, range(1))
|
||||
test(ll, 3, 6, range(10))
|
||||
|
||||
ll = LazyList(itertools.count())
|
||||
test(ll, 10, 10, range(11))
|
||||
reversed(ll)
|
||||
ll.reverse()
|
||||
test(ll, -15, 14, range(15))
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@ import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from test.helper import get_params, try_rm
|
||||
from test.helper import get_params, try_rm, is_download_test
|
||||
|
||||
|
||||
import io
|
||||
@@ -38,6 +38,7 @@ ANNOTATIONS_FILE = TEST_ID + '.annotations.xml'
|
||||
EXPECTED_ANNOTATIONS = ['Speech bubble', 'Note', 'Title', 'Spotlight', 'Label']
|
||||
|
||||
|
||||
@is_download_test
|
||||
class TestAnnotations(unittest.TestCase):
|
||||
def setUp(self):
|
||||
# Clear old files
|
||||
|
||||
@@ -7,7 +7,7 @@ import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from test.helper import FakeYDL
|
||||
from test.helper import FakeYDL, is_download_test
|
||||
|
||||
|
||||
from yt_dlp.extractor import (
|
||||
@@ -17,6 +17,7 @@ from yt_dlp.extractor import (
|
||||
)
|
||||
|
||||
|
||||
@is_download_test
|
||||
class TestYoutubeLists(unittest.TestCase):
|
||||
def assertIsPlaylist(self, info):
|
||||
"""Make sure the info has '_type' set to 'playlist'"""
|
||||
|
||||
@@ -12,7 +12,7 @@ import io
|
||||
import re
|
||||
import string
|
||||
|
||||
from test.helper import FakeYDL
|
||||
from test.helper import FakeYDL, is_download_test
|
||||
from yt_dlp.extractor import YoutubeIE
|
||||
from yt_dlp.compat import compat_str, compat_urlretrieve
|
||||
|
||||
@@ -65,6 +65,7 @@ _TESTS = [
|
||||
]
|
||||
|
||||
|
||||
@is_download_test
|
||||
class TestPlayerInfo(unittest.TestCase):
|
||||
def test_youtube_extract_player_info(self):
|
||||
PLAYER_URLS = (
|
||||
@@ -87,6 +88,7 @@ class TestPlayerInfo(unittest.TestCase):
|
||||
self.assertEqual(player_id, expected_player_id)
|
||||
|
||||
|
||||
@is_download_test
|
||||
class TestSignature(unittest.TestCase):
|
||||
def setUp(self):
|
||||
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
2
tox.ini
2
tox.ini
@@ -1,5 +1,7 @@
|
||||
[tox]
|
||||
envlist = py26,py27,py33,py34,py35
|
||||
|
||||
# Needed?
|
||||
[testenv]
|
||||
deps =
|
||||
nose
|
||||
|
||||
@@ -31,7 +31,6 @@ from zipimport import zipimporter
|
||||
|
||||
from .compat import (
|
||||
compat_basestring,
|
||||
compat_cookiejar,
|
||||
compat_get_terminal_size,
|
||||
compat_kwargs,
|
||||
compat_numeric_types,
|
||||
@@ -42,6 +41,7 @@ from .compat import (
|
||||
compat_urllib_request,
|
||||
compat_urllib_request_DataHandler,
|
||||
)
|
||||
from .cookies import load_cookies
|
||||
from .utils import (
|
||||
age_restricted,
|
||||
args_to_str,
|
||||
@@ -104,12 +104,12 @@ from .utils import (
|
||||
ThrottledDownload,
|
||||
to_high_limit_path,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
UnavailableVideoError,
|
||||
url_basename,
|
||||
version_tuple,
|
||||
write_json_file,
|
||||
write_string,
|
||||
YoutubeDLCookieJar,
|
||||
YoutubeDLCookieProcessor,
|
||||
YoutubeDLHandler,
|
||||
YoutubeDLRedirectHandler,
|
||||
@@ -208,6 +208,9 @@ class YoutubeDL(object):
|
||||
into a single file
|
||||
allow_multiple_audio_streams: Allow multiple audio streams to be merged
|
||||
into a single file
|
||||
check_formats Whether to test if the formats are downloadable.
|
||||
Can be True (check all), False (check none)
|
||||
or None (check only if requested by extractor)
|
||||
paths: Dictionary of output paths. The allowed keys are 'home'
|
||||
'temp' and the keys of OUTTMPL_TYPES (in utils.py)
|
||||
outtmpl: Dictionary of templates for output names. Allowed keys
|
||||
@@ -252,7 +255,7 @@ class YoutubeDL(object):
|
||||
writedesktoplink: Write a Linux internet shortcut file (.desktop)
|
||||
writesubtitles: Write the video subtitles to a file
|
||||
writeautomaticsub: Write the automatically generated subtitles to a file
|
||||
allsubtitles: Deprecated - Use subtitlelangs = ['all']
|
||||
allsubtitles: Deprecated - Use subtitleslangs = ['all']
|
||||
Downloads all the subtitles of the video
|
||||
(requires writesubtitles or writeautomaticsub)
|
||||
listsubtitles: Lists all available subtitles for the video
|
||||
@@ -286,6 +289,9 @@ class YoutubeDL(object):
|
||||
break_on_reject: Stop the download process when encountering a video that
|
||||
has been filtered out.
|
||||
cookiefile: File name where cookies should be read from and dumped to
|
||||
cookiesfrombrowser: A tuple containing the name of the browser and the profile
|
||||
name/path from where cookies are loaded.
|
||||
Eg: ('chrome', ) or (vivaldi, 'default')
|
||||
nocheckcertificate:Do not verify SSL certificates
|
||||
prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
|
||||
At the moment, this is only supported by YouTube.
|
||||
@@ -316,6 +322,7 @@ class YoutubeDL(object):
|
||||
progress, with a dictionary with the entries
|
||||
* status: One of "downloading", "error", or "finished".
|
||||
Check this first and ignore unknown values.
|
||||
* info_dict: The extracted info_dict
|
||||
|
||||
If status is one of "downloading", or "finished", the
|
||||
following properties may also be present:
|
||||
@@ -391,11 +398,9 @@ class YoutubeDL(object):
|
||||
if True, otherwise use ffmpeg/avconv if False, otherwise
|
||||
use downloader suggested by extractor if None.
|
||||
compat_opts: Compatibility options. See "Differences in default behavior".
|
||||
Note that only format-sort, format-spec, no-live-chat,
|
||||
no-attach-info-json, playlist-index, list-formats,
|
||||
no-direct-merge, embed-thumbnail-atomicparsley,
|
||||
no-youtube-unavailable-videos, no-youtube-channel-redirect,
|
||||
works when used via the API
|
||||
The following options do not work when used through the API:
|
||||
filename, abort-on-error, multistreams, no-live-chat,
|
||||
no-playlist-metafiles. Refer __init__.py for their implementation
|
||||
|
||||
The following parameters are not used by YoutubeDL itself, they are used by
|
||||
the downloader (see yt_dlp/downloader/common.py):
|
||||
@@ -419,11 +424,16 @@ class YoutubeDL(object):
|
||||
dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
|
||||
hls_split_discontinuity: Split HLS playlists to different formats at
|
||||
discontinuities such as ad breaks (default: False)
|
||||
youtube_include_dash_manifest: If True (default), DASH manifests and related
|
||||
extractor_args: A dictionary of arguments to be passed to the extractors.
|
||||
See "EXTRACTOR ARGUMENTS" for details.
|
||||
Eg: {'youtube': {'skip': ['dash', 'hls']}}
|
||||
youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
|
||||
If True (default), DASH manifests and related
|
||||
data will be downloaded and processed by extractor.
|
||||
You can reduce network I/O by disabling it if you don't
|
||||
care about DASH. (only for youtube)
|
||||
youtube_include_hls_manifest: If True (default), HLS manifests and related
|
||||
youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
|
||||
If True (default), HLS manifests and related
|
||||
data will be downloaded and processed by extractor.
|
||||
You can reduce network I/O by disabling it if you don't
|
||||
care about HLS. (only for youtube)
|
||||
@@ -443,7 +453,7 @@ class YoutubeDL(object):
|
||||
params = None
|
||||
_ies = []
|
||||
_pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
|
||||
__prepare_filename_warned = False
|
||||
_printed_messages = set()
|
||||
_first_webpage_request = True
|
||||
_download_retcode = None
|
||||
_num_downloads = None
|
||||
@@ -458,7 +468,7 @@ class YoutubeDL(object):
|
||||
self._ies = []
|
||||
self._ies_instances = {}
|
||||
self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
|
||||
self.__prepare_filename_warned = False
|
||||
self._printed_messages = set()
|
||||
self._first_webpage_request = True
|
||||
self._post_hooks = []
|
||||
self._progress_hooks = []
|
||||
@@ -653,8 +663,12 @@ class YoutubeDL(object):
|
||||
for _ in range(line_count))
|
||||
return res[:-len('\n')]
|
||||
|
||||
def _write_string(self, s, out=None):
|
||||
write_string(s, out=out, encoding=self.params.get('encoding'))
|
||||
def _write_string(self, message, out=None, only_once=False):
|
||||
if only_once:
|
||||
if message in self._printed_messages:
|
||||
return
|
||||
self._printed_messages.add(message)
|
||||
write_string(message, out=out, encoding=self.params.get('encoding'))
|
||||
|
||||
def to_stdout(self, message, skip_eol=False, quiet=False):
|
||||
"""Print message to stdout"""
|
||||
@@ -665,13 +679,13 @@ class YoutubeDL(object):
|
||||
'%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
|
||||
self._err_file if quiet else self._screen_file)
|
||||
|
||||
def to_stderr(self, message):
|
||||
def to_stderr(self, message, only_once=False):
|
||||
"""Print message to stderr"""
|
||||
assert isinstance(message, compat_str)
|
||||
if self.params.get('logger'):
|
||||
self.params['logger'].error(message)
|
||||
else:
|
||||
self._write_string('%s\n' % self._bidi_workaround(message), self._err_file)
|
||||
self._write_string('%s\n' % self._bidi_workaround(message), self._err_file, only_once=only_once)
|
||||
|
||||
def to_console_title(self, message):
|
||||
if not self.params.get('consoletitle', False):
|
||||
@@ -748,7 +762,7 @@ class YoutubeDL(object):
|
||||
self.to_stdout(
|
||||
message, skip_eol, quiet=self.params.get('quiet', False))
|
||||
|
||||
def report_warning(self, message):
|
||||
def report_warning(self, message, only_once=False):
|
||||
'''
|
||||
Print the message to stderr, it will be prefixed with 'WARNING:'
|
||||
If stderr is a tty file the 'WARNING:' will be colored
|
||||
@@ -763,7 +777,7 @@ class YoutubeDL(object):
|
||||
else:
|
||||
_msg_header = 'WARNING:'
|
||||
warning_message = '%s %s' % (_msg_header, message)
|
||||
self.to_stderr(warning_message)
|
||||
self.to_stderr(warning_message, only_once)
|
||||
|
||||
def report_error(self, message, tb=None):
|
||||
'''
|
||||
@@ -777,7 +791,7 @@ class YoutubeDL(object):
|
||||
error_message = '%s %s' % (_msg_header, message)
|
||||
self.trouble(error_message, tb)
|
||||
|
||||
def write_debug(self, message):
|
||||
def write_debug(self, message, only_once=False):
|
||||
'''Log debug message or Print message to stderr'''
|
||||
if not self.params.get('verbose', False):
|
||||
return
|
||||
@@ -785,7 +799,7 @@ class YoutubeDL(object):
|
||||
if self.params.get('logger'):
|
||||
self.params['logger'].debug(message)
|
||||
else:
|
||||
self._write_string('%s\n' % message)
|
||||
self.to_stderr(message, only_once)
|
||||
|
||||
def report_file_already_downloaded(self, file_name):
|
||||
"""Report file has already been fully downloaded."""
|
||||
@@ -1010,13 +1024,13 @@ class YoutubeDL(object):
|
||||
|
||||
filename = self._prepare_filename(info_dict, dir_type or 'default')
|
||||
|
||||
if warn and not self.__prepare_filename_warned:
|
||||
if warn:
|
||||
if not self.params.get('paths'):
|
||||
pass
|
||||
elif filename == '-':
|
||||
self.report_warning('--paths is ignored when an outputting to stdout')
|
||||
self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
|
||||
elif os.path.isabs(filename):
|
||||
self.report_warning('--paths is ignored since an absolute path is given in output template')
|
||||
self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
|
||||
self.__prepare_filename_warned = True
|
||||
if filename == '-' or not filename:
|
||||
return filename
|
||||
@@ -1132,7 +1146,7 @@ class YoutubeDL(object):
|
||||
else:
|
||||
self.report_error('no suitable InfoExtractor for URL %s' % url)
|
||||
|
||||
def __handle_extraction_exceptions(func):
|
||||
def __handle_extraction_exceptions(func, handle_all_errors=True):
|
||||
def wrapper(self, *args, **kwargs):
|
||||
try:
|
||||
return func(self, *args, **kwargs)
|
||||
@@ -1152,7 +1166,7 @@ class YoutubeDL(object):
|
||||
except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached):
|
||||
raise
|
||||
except Exception as e:
|
||||
if self.params.get('ignoreerrors', False):
|
||||
if handle_all_errors and self.params.get('ignoreerrors', False):
|
||||
self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
|
||||
else:
|
||||
raise
|
||||
@@ -1169,6 +1183,8 @@ class YoutubeDL(object):
|
||||
'_type': 'compat_list',
|
||||
'entries': ie_result,
|
||||
}
|
||||
if extra_info.get('original_url'):
|
||||
ie_result.setdefault('original_url', extra_info['original_url'])
|
||||
self.add_default_extra_info(ie_result, ie, url)
|
||||
if process:
|
||||
return self.process_ie_result(ie_result, download, extra_info)
|
||||
@@ -1176,13 +1192,17 @@ class YoutubeDL(object):
|
||||
return ie_result
|
||||
|
||||
def add_default_extra_info(self, ie_result, ie, url):
|
||||
self.add_extra_info(ie_result, {
|
||||
'extractor': ie.IE_NAME,
|
||||
'webpage_url': url,
|
||||
'original_url': url,
|
||||
'webpage_url_basename': url_basename(url),
|
||||
'extractor_key': ie.ie_key(),
|
||||
})
|
||||
if url is not None:
|
||||
self.add_extra_info(ie_result, {
|
||||
'webpage_url': url,
|
||||
'original_url': url,
|
||||
'webpage_url_basename': url_basename(url),
|
||||
})
|
||||
if ie is not None:
|
||||
self.add_extra_info(ie_result, {
|
||||
'extractor': ie.IE_NAME,
|
||||
'extractor_key': ie.ie_key(),
|
||||
})
|
||||
|
||||
def process_ie_result(self, ie_result, download=True, extra_info={}):
|
||||
"""
|
||||
@@ -1196,13 +1216,16 @@ class YoutubeDL(object):
|
||||
|
||||
if result_type in ('url', 'url_transparent'):
|
||||
ie_result['url'] = sanitize_url(ie_result['url'])
|
||||
if ie_result.get('original_url'):
|
||||
extra_info.setdefault('original_url', ie_result['original_url'])
|
||||
|
||||
extract_flat = self.params.get('extract_flat', False)
|
||||
if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
|
||||
or extract_flat is True):
|
||||
info_copy = ie_result.copy()
|
||||
self.add_extra_info(info_copy, extra_info)
|
||||
self.add_default_extra_info(
|
||||
info_copy, self.get_info_extractor(ie_result.get('ie_key')), ie_result['url'])
|
||||
ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
|
||||
self.add_default_extra_info(info_copy, ie, ie_result['url'])
|
||||
self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
|
||||
return ie_result
|
||||
|
||||
@@ -1352,13 +1375,19 @@ class YoutubeDL(object):
|
||||
if not isinstance(ie_entries, (list, PagedList)):
|
||||
ie_entries = LazyList(ie_entries)
|
||||
|
||||
def get_entry(i):
|
||||
return YoutubeDL.__handle_extraction_exceptions(
|
||||
lambda self, i: ie_entries[i - 1],
|
||||
False
|
||||
)(self, i)
|
||||
|
||||
entries = []
|
||||
for i in playlistitems or itertools.count(playliststart):
|
||||
if playlistitems is None and playlistend is not None and playlistend < i:
|
||||
break
|
||||
entry = None
|
||||
try:
|
||||
entry = ie_entries[i - 1]
|
||||
entry = get_entry(i)
|
||||
if entry is None:
|
||||
raise EntryNotInPlaylist()
|
||||
except (IndexError, EntryNotInPlaylist):
|
||||
@@ -1748,6 +1777,9 @@ class YoutubeDL(object):
|
||||
return new_dict
|
||||
|
||||
def _check_formats(formats):
|
||||
if not check_formats:
|
||||
yield from formats
|
||||
return
|
||||
for f in formats:
|
||||
self.to_screen('[info] Testing format %s' % f['format_id'])
|
||||
temp_file = tempfile.NamedTemporaryFile(
|
||||
@@ -1755,16 +1787,16 @@ class YoutubeDL(object):
|
||||
dir=self.get_output_path('temp') or None)
|
||||
temp_file.close()
|
||||
try:
|
||||
dl, _ = self.dl(temp_file.name, f, test=True)
|
||||
except (ExtractorError, IOError, OSError, ValueError) + network_exceptions:
|
||||
dl = False
|
||||
success, _ = self.dl(temp_file.name, f, test=True)
|
||||
except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
|
||||
success = False
|
||||
finally:
|
||||
if os.path.exists(temp_file.name):
|
||||
try:
|
||||
os.remove(temp_file.name)
|
||||
except OSError:
|
||||
self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
|
||||
if dl:
|
||||
if success:
|
||||
yield f
|
||||
else:
|
||||
self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
|
||||
@@ -1775,8 +1807,7 @@ class YoutubeDL(object):
|
||||
|
||||
def selector_function(ctx):
|
||||
for f in fs:
|
||||
for format in f(ctx):
|
||||
yield format
|
||||
yield from f(ctx)
|
||||
return selector_function
|
||||
|
||||
elif selector.type == GROUP: # ()
|
||||
@@ -1792,22 +1823,24 @@ class YoutubeDL(object):
|
||||
return picked_formats
|
||||
return []
|
||||
|
||||
elif selector.type == MERGE: # +
|
||||
selector_1, selector_2 = map(_build_selector_function, selector.selector)
|
||||
|
||||
def selector_function(ctx):
|
||||
for pair in itertools.product(
|
||||
selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
|
||||
yield _merge(pair)
|
||||
|
||||
elif selector.type == SINGLE: # atom
|
||||
format_spec = selector.selector or 'best'
|
||||
|
||||
# TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
|
||||
if format_spec == 'all':
|
||||
def selector_function(ctx):
|
||||
formats = list(ctx['formats'])
|
||||
if check_formats:
|
||||
formats = _check_formats(formats)
|
||||
for f in formats:
|
||||
yield f
|
||||
yield from _check_formats(ctx['formats'])
|
||||
elif format_spec == 'mergeall':
|
||||
def selector_function(ctx):
|
||||
formats = ctx['formats']
|
||||
if check_formats:
|
||||
formats = list(_check_formats(formats))
|
||||
formats = list(_check_formats(ctx['formats']))
|
||||
if not formats:
|
||||
return
|
||||
merged_format = formats[-1]
|
||||
@@ -1845,29 +1878,17 @@ class YoutubeDL(object):
|
||||
|
||||
def selector_function(ctx):
|
||||
formats = list(ctx['formats'])
|
||||
if not formats:
|
||||
return
|
||||
matches = list(filter(filter_f, formats)) if filter_f is not None else formats
|
||||
if format_fallback and ctx['incomplete_formats'] and not matches:
|
||||
# for extractors with incomplete formats (audio only (soundcloud)
|
||||
# or video only (imgur)) best/worst will fallback to
|
||||
# best/worst {video,audio}-only format
|
||||
matches = formats
|
||||
if format_reverse:
|
||||
matches = matches[::-1]
|
||||
if check_formats:
|
||||
matches = list(itertools.islice(_check_formats(matches), format_idx))
|
||||
n = len(matches)
|
||||
if -n <= format_idx - 1 < n:
|
||||
matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
|
||||
try:
|
||||
yield matches[format_idx - 1]
|
||||
|
||||
elif selector.type == MERGE: # +
|
||||
selector_1, selector_2 = map(_build_selector_function, selector.selector)
|
||||
|
||||
def selector_function(ctx):
|
||||
for pair in itertools.product(
|
||||
selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
|
||||
yield _merge(pair)
|
||||
except IndexError:
|
||||
return
|
||||
|
||||
filters = [self._build_format_filter(f) for f in selector.filters]
|
||||
|
||||
@@ -1944,15 +1965,27 @@ class YoutubeDL(object):
|
||||
t.get('id') if t.get('id') is not None else '',
|
||||
t.get('url')))
|
||||
|
||||
def test_thumbnail(t):
|
||||
self.to_screen('[info] Testing thumbnail %s' % t['id'])
|
||||
try:
|
||||
self.urlopen(HEADRequest(t['url']))
|
||||
except network_exceptions as err:
|
||||
self.to_screen('[info] Unable to connect to thumbnail %s URL "%s" - %s. Skipping...' % (
|
||||
t['id'], t['url'], error_to_compat_str(err)))
|
||||
return False
|
||||
return True
|
||||
def thumbnail_tester():
|
||||
if self.params.get('check_formats'):
|
||||
test_all = True
|
||||
to_screen = lambda msg: self.to_screen(f'[info] {msg}')
|
||||
else:
|
||||
test_all = False
|
||||
to_screen = self.write_debug
|
||||
|
||||
def test_thumbnail(t):
|
||||
if not test_all and not t.get('_test_url'):
|
||||
return True
|
||||
to_screen('Testing thumbnail %s' % t['id'])
|
||||
try:
|
||||
self.urlopen(HEADRequest(t['url']))
|
||||
except network_exceptions as err:
|
||||
to_screen('Unable to connect to thumbnail %s URL "%s" - %s. Skipping...' % (
|
||||
t['id'], t['url'], error_to_compat_str(err)))
|
||||
return False
|
||||
return True
|
||||
|
||||
return test_thumbnail
|
||||
|
||||
for i, t in enumerate(thumbnails):
|
||||
if t.get('id') is None:
|
||||
@@ -1960,8 +1993,11 @@ class YoutubeDL(object):
|
||||
if t.get('width') and t.get('height'):
|
||||
t['resolution'] = '%dx%d' % (t['width'], t['height'])
|
||||
t['url'] = sanitize_url(t['url'])
|
||||
if self.params.get('check_formats'):
|
||||
info_dict['thumbnails'] = reversed(LazyList(filter(test_thumbnail, thumbnails[::-1])))
|
||||
|
||||
if self.params.get('check_formats') is not False:
|
||||
info_dict['thumbnails'] = LazyList(filter(thumbnail_tester(), thumbnails[::-1])).reverse()
|
||||
else:
|
||||
info_dict['thumbnails'] = thumbnails
|
||||
|
||||
def process_video_result(self, info_dict, download=True):
|
||||
assert info_dict.get('_type', 'video') == 'video'
|
||||
@@ -2001,10 +2037,6 @@ class YoutubeDL(object):
|
||||
|
||||
self._sanitize_thumbnails(info_dict)
|
||||
|
||||
if self.params.get('list_thumbnails'):
|
||||
self.list_thumbnails(info_dict)
|
||||
return
|
||||
|
||||
thumbnail = info_dict.get('thumbnail')
|
||||
thumbnails = info_dict.get('thumbnails')
|
||||
if thumbnail:
|
||||
@@ -2012,7 +2044,7 @@ class YoutubeDL(object):
|
||||
elif thumbnails:
|
||||
info_dict['thumbnail'] = thumbnails[-1]['url']
|
||||
|
||||
if 'display_id' not in info_dict and 'id' in info_dict:
|
||||
if info_dict.get('display_id') is None and 'id' in info_dict:
|
||||
info_dict['display_id'] = info_dict['id']
|
||||
|
||||
for ts_key, date_key in (
|
||||
@@ -2028,6 +2060,23 @@ class YoutubeDL(object):
|
||||
except (ValueError, OverflowError, OSError):
|
||||
pass
|
||||
|
||||
live_keys = ('is_live', 'was_live')
|
||||
live_status = info_dict.get('live_status')
|
||||
if live_status is None:
|
||||
for key in live_keys:
|
||||
if info_dict.get(key) is False:
|
||||
continue
|
||||
if info_dict.get(key):
|
||||
live_status = key
|
||||
break
|
||||
if all(info_dict.get(key) is False for key in live_keys):
|
||||
live_status = 'not_live'
|
||||
if live_status:
|
||||
info_dict['live_status'] = live_status
|
||||
for key in live_keys:
|
||||
if info_dict.get(key) is None:
|
||||
info_dict[key] = (live_status == key)
|
||||
|
||||
# Auto generate title fields corresponding to the *_number fields when missing
|
||||
# in order to always have clean titles. This is very common for TV series.
|
||||
for field in ('chapter', 'season', 'episode'):
|
||||
@@ -2047,13 +2096,6 @@ class YoutubeDL(object):
|
||||
automatic_captions = info_dict.get('automatic_captions')
|
||||
subtitles = info_dict.get('subtitles')
|
||||
|
||||
if self.params.get('listsubtitles', False):
|
||||
if 'automatic_captions' in info_dict:
|
||||
self.list_subtitles(
|
||||
info_dict['id'], automatic_captions, 'automatic captions')
|
||||
self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
|
||||
return
|
||||
|
||||
info_dict['requested_subtitles'] = self.process_subtitles(
|
||||
info_dict['id'], subtitles, automatic_captions)
|
||||
|
||||
@@ -2141,10 +2183,20 @@ class YoutubeDL(object):
|
||||
|
||||
info_dict, _ = self.pre_process(info_dict)
|
||||
|
||||
if self.params.get('listformats'):
|
||||
if not info_dict.get('formats'):
|
||||
raise ExtractorError('No video formats found', expected=True)
|
||||
self.list_formats(info_dict)
|
||||
list_only = self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles')
|
||||
if list_only:
|
||||
self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
|
||||
if self.params.get('list_thumbnails'):
|
||||
self.list_thumbnails(info_dict)
|
||||
if self.params.get('listformats'):
|
||||
if not info_dict.get('formats'):
|
||||
raise ExtractorError('No video formats found', expected=True)
|
||||
self.list_formats(info_dict)
|
||||
if self.params.get('listsubtitles'):
|
||||
if 'automatic_captions' in info_dict:
|
||||
self.list_subtitles(
|
||||
info_dict['id'], automatic_captions, 'automatic captions')
|
||||
self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
|
||||
return
|
||||
|
||||
format_selector = self.format_selector
|
||||
@@ -2185,6 +2237,8 @@ class YoutubeDL(object):
|
||||
raise ExtractorError('Requested format is not available', expected=True)
|
||||
else:
|
||||
self.report_warning('Requested format is not available')
|
||||
# Process what we can, even without any available formats.
|
||||
self.process_info(dict(info_dict))
|
||||
elif download:
|
||||
self.to_screen(
|
||||
'[info] %s: Downloading %d format(s): %s' % (
|
||||
@@ -2349,7 +2403,7 @@ class YoutubeDL(object):
|
||||
# TODO: backward compatibility, to be removed
|
||||
info_dict['fulltitle'] = info_dict['title']
|
||||
|
||||
if 'format' not in info_dict:
|
||||
if 'format' not in info_dict and 'ext' in info_dict:
|
||||
info_dict['format'] = info_dict['ext']
|
||||
|
||||
if self._match_entry(info_dict) is not None:
|
||||
@@ -2364,7 +2418,7 @@ class YoutubeDL(object):
|
||||
files_to_move = {}
|
||||
|
||||
# Forced printings
|
||||
self.__forced_printings(info_dict, full_filename, incomplete=False)
|
||||
self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
|
||||
|
||||
if self.params.get('simulate', False):
|
||||
if self.params.get('force_write_download_archive', False):
|
||||
@@ -2589,17 +2643,10 @@ class YoutubeDL(object):
|
||||
|
||||
requested_formats = info_dict['requested_formats']
|
||||
old_ext = info_dict['ext']
|
||||
if self.params.get('merge_output_format') is None:
|
||||
if not compatible_formats(requested_formats):
|
||||
info_dict['ext'] = 'mkv'
|
||||
self.report_warning(
|
||||
'Requested formats are incompatible for merge and will be merged into mkv.')
|
||||
if (info_dict['ext'] == 'webm'
|
||||
and self.params.get('writethumbnail', False)
|
||||
and info_dict.get('thumbnails')):
|
||||
info_dict['ext'] = 'mkv'
|
||||
self.report_warning(
|
||||
'webm doesn\'t support embedding a thumbnail, mkv will be used.')
|
||||
if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
|
||||
info_dict['ext'] = 'mkv'
|
||||
self.report_warning(
|
||||
'Requested formats are incompatible for merge and will be merged into mkv.')
|
||||
|
||||
def correct_ext(filename):
|
||||
filename_real_ext = os.path.splitext(filename)[1][1:]
|
||||
@@ -2791,7 +2838,7 @@ class YoutubeDL(object):
|
||||
info = self.filter_requested_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
|
||||
try:
|
||||
self.process_ie_result(info, download=True)
|
||||
except (DownloadError, EntryNotInPlaylist):
|
||||
except (DownloadError, EntryNotInPlaylist, ThrottledDownload):
|
||||
webpage_url = info.get('webpage_url')
|
||||
if webpage_url is not None:
|
||||
self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
|
||||
@@ -2995,22 +3042,11 @@ class YoutubeDL(object):
|
||||
res += '~' + format_bytes(fdict['filesize_approx'])
|
||||
return res
|
||||
|
||||
def _format_note_table(self, f):
|
||||
def join_fields(*vargs):
|
||||
return ', '.join((val for val in vargs if val != ''))
|
||||
|
||||
return join_fields(
|
||||
'UNSUPPORTED' if f.get('ext') in ('f4f', 'f4m') else '',
|
||||
format_field(f, 'language', '[%s]'),
|
||||
format_field(f, 'format_note'),
|
||||
format_field(f, 'container', ignore=(None, f.get('ext'))),
|
||||
format_field(f, 'asr', '%5dHz'))
|
||||
|
||||
def list_formats(self, info_dict):
|
||||
formats = info_dict.get('formats', [info_dict])
|
||||
new_format = (
|
||||
'list-formats' not in self.params.get('compat_opts', [])
|
||||
and self.params.get('list_formats_as_table', True) is not False)
|
||||
and self.params.get('listformats_table', True) is not False)
|
||||
if new_format:
|
||||
table = [
|
||||
[
|
||||
@@ -3028,11 +3064,15 @@ class YoutubeDL(object):
|
||||
format_field(f, 'acodec', default='unknown').replace('none', ''),
|
||||
format_field(f, 'abr', '%3dk'),
|
||||
format_field(f, 'asr', '%5dHz'),
|
||||
self._format_note_table(f)]
|
||||
for f in formats
|
||||
if f.get('preference') is None or f['preference'] >= -1000]
|
||||
', '.join(filter(None, (
|
||||
'UNSUPPORTED' if f.get('ext') in ('f4f', 'f4m') else '',
|
||||
format_field(f, 'language', '[%s]'),
|
||||
format_field(f, 'format_note'),
|
||||
format_field(f, 'container', ignore=(None, f.get('ext'))),
|
||||
format_field(f, 'asr', '%5dHz')))),
|
||||
] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
|
||||
header_line = ['ID', 'EXT', 'RESOLUTION', 'FPS', '|', ' FILESIZE', ' TBR', 'PROTO',
|
||||
'|', 'VCODEC', ' VBR', 'ACODEC', ' ABR', ' ASR', 'NOTE']
|
||||
'|', 'VCODEC', ' VBR', 'ACODEC', ' ABR', ' ASR', 'MORE INFO']
|
||||
else:
|
||||
table = [
|
||||
[
|
||||
@@ -3045,12 +3085,9 @@ class YoutubeDL(object):
|
||||
header_line = ['format code', 'extension', 'resolution', 'note']
|
||||
|
||||
self.to_screen(
|
||||
'[info] Available formats for %s:\n%s' % (info_dict['id'], render_table(
|
||||
header_line,
|
||||
table,
|
||||
delim=new_format,
|
||||
extraGap=(0 if new_format else 1),
|
||||
hideEmpty=new_format)))
|
||||
'[info] Available formats for %s:' % info_dict['id'])
|
||||
self.to_stdout(render_table(
|
||||
header_line, table, delim=new_format, extraGap=(0 if new_format else 1), hideEmpty=new_format))
|
||||
|
||||
def list_thumbnails(self, info_dict):
|
||||
thumbnails = list(info_dict.get('thumbnails'))
|
||||
@@ -3060,7 +3097,7 @@ class YoutubeDL(object):
|
||||
|
||||
self.to_screen(
|
||||
'[info] Thumbnails for %s:' % info_dict['id'])
|
||||
self.to_screen(render_table(
|
||||
self.to_stdout(render_table(
|
||||
['ID', 'width', 'height', 'URL'],
|
||||
[[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
|
||||
|
||||
@@ -3072,12 +3109,12 @@ class YoutubeDL(object):
|
||||
'Available %s for %s:' % (name, video_id))
|
||||
|
||||
def _row(lang, formats):
|
||||
exts, names = zip(*((f['ext'], f.get('name', 'unknown')) for f in reversed(formats)))
|
||||
exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
|
||||
if len(set(names)) == 1:
|
||||
names = [] if names[0] == 'unknown' else names[:1]
|
||||
return [lang, ', '.join(names), ', '.join(exts)]
|
||||
|
||||
self.to_screen(render_table(
|
||||
self.to_stdout(render_table(
|
||||
['Language', 'Name', 'Formats'],
|
||||
[_row(lang, formats) for lang, formats in subtitles.items()],
|
||||
hideEmpty=True))
|
||||
@@ -3182,16 +3219,11 @@ class YoutubeDL(object):
|
||||
timeout_val = self.params.get('socket_timeout')
|
||||
self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
|
||||
|
||||
opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
|
||||
opts_cookiefile = self.params.get('cookiefile')
|
||||
opts_proxy = self.params.get('proxy')
|
||||
|
||||
if opts_cookiefile is None:
|
||||
self.cookiejar = compat_cookiejar.CookieJar()
|
||||
else:
|
||||
opts_cookiefile = expand_path(opts_cookiefile)
|
||||
self.cookiejar = YoutubeDLCookieJar(opts_cookiefile)
|
||||
if os.access(opts_cookiefile, os.R_OK):
|
||||
self.cookiejar.load(ignore_discard=True, ignore_expires=True)
|
||||
self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
|
||||
|
||||
cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
|
||||
if opts_proxy is not None:
|
||||
@@ -3255,7 +3287,7 @@ class YoutubeDL(object):
|
||||
multiple = write_all and len(thumbnails) > 1
|
||||
|
||||
ret = []
|
||||
for t in thumbnails[::1 if write_all else -1]:
|
||||
for t in thumbnails[::-1]:
|
||||
thumb_ext = determine_ext(t['url'], 'jpg')
|
||||
suffix = '%s.' % t['id'] if multiple else ''
|
||||
thumb_display_id = '%s ' % t['id'] if multiple else ''
|
||||
|
||||
@@ -20,6 +20,7 @@ from .compat import (
|
||||
compat_getpass,
|
||||
workaround_optparse_bug9161,
|
||||
)
|
||||
from .cookies import SUPPORTED_BROWSERS
|
||||
from .utils import (
|
||||
DateRange,
|
||||
decodeOption,
|
||||
@@ -242,6 +243,12 @@ def _real_main(argv=None):
|
||||
if opts.convertthumbnails not in FFmpegThumbnailsConvertorPP.SUPPORTED_EXTS:
|
||||
parser.error('invalid thumbnail format specified')
|
||||
|
||||
if opts.cookiesfrombrowser is not None:
|
||||
opts.cookiesfrombrowser = [
|
||||
part.strip() or None for part in opts.cookiesfrombrowser.split(':', 1)]
|
||||
if opts.cookiesfrombrowser[0] not in SUPPORTED_BROWSERS:
|
||||
parser.error('unsupported browser specified for cookies')
|
||||
|
||||
if opts.date is not None:
|
||||
date = DateRange.day(opts.date)
|
||||
else:
|
||||
@@ -273,7 +280,7 @@ def _real_main(argv=None):
|
||||
'filename', 'format-sort', 'abort-on-error', 'format-spec', 'no-playlist-metafiles',
|
||||
'multistreams', 'no-live-chat', 'playlist-index', 'list-formats', 'no-direct-merge',
|
||||
'no-youtube-channel-redirect', 'no-youtube-unavailable-videos', 'no-attach-info-json',
|
||||
'embed-thumbnail-atomicparsley',
|
||||
'embed-thumbnail-atomicparsley', 'seperate-video-versions',
|
||||
]
|
||||
compat_opts = parse_compat_opts()
|
||||
|
||||
@@ -415,6 +422,13 @@ def _real_main(argv=None):
|
||||
# Run this before the actual video download
|
||||
'when': 'before_dl'
|
||||
})
|
||||
# Must be after all other before_dl
|
||||
if opts.exec_before_dl_cmd:
|
||||
postprocessors.append({
|
||||
'key': 'ExecAfterDownload',
|
||||
'exec_cmd': opts.exec_before_dl_cmd,
|
||||
'when': 'before_dl'
|
||||
})
|
||||
if opts.extractaudio:
|
||||
postprocessors.append({
|
||||
'key': 'FFmpegExtractAudio',
|
||||
@@ -621,6 +635,7 @@ def _real_main(argv=None):
|
||||
'break_on_reject': opts.break_on_reject,
|
||||
'skip_playlist_after_errors': opts.skip_playlist_after_errors,
|
||||
'cookiefile': opts.cookiefile,
|
||||
'cookiesfrombrowser': opts.cookiesfrombrowser,
|
||||
'nocheckcertificate': opts.no_check_certificate,
|
||||
'prefer_insecure': opts.prefer_insecure,
|
||||
'proxy': opts.proxy,
|
||||
@@ -631,6 +646,7 @@ def _real_main(argv=None):
|
||||
'include_ads': opts.include_ads,
|
||||
'default_search': opts.default_search,
|
||||
'dynamic_mpd': opts.dynamic_mpd,
|
||||
'extractor_args': opts.extractor_args,
|
||||
'youtube_include_dash_manifest': opts.youtube_include_dash_manifest,
|
||||
'youtube_include_hls_manifest': opts.youtube_include_hls_manifest,
|
||||
'encoding': opts.encoding,
|
||||
|
||||
3029
yt_dlp/compat.py
3029
yt_dlp/compat.py
File diff suppressed because it is too large
Load Diff
755
yt_dlp/cookies.py
Normal file
755
yt_dlp/cookies.py
Normal file
@@ -0,0 +1,755 @@
|
||||
import ctypes
|
||||
import json
|
||||
import os
|
||||
import shutil
|
||||
import struct
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from hashlib import pbkdf2_hmac
|
||||
|
||||
from yt_dlp.aes import aes_cbc_decrypt
|
||||
from yt_dlp.compat import (
|
||||
compat_b64decode,
|
||||
compat_cookiejar_Cookie,
|
||||
)
|
||||
from yt_dlp.utils import (
|
||||
bug_reports_message,
|
||||
bytes_to_intlist,
|
||||
expand_path,
|
||||
intlist_to_bytes,
|
||||
process_communicate_or_kill,
|
||||
YoutubeDLCookieJar,
|
||||
)
|
||||
|
||||
try:
|
||||
import sqlite3
|
||||
SQLITE_AVAILABLE = True
|
||||
except ImportError:
|
||||
# although sqlite3 is part of the standard library, it is possible to compile python without
|
||||
# sqlite support. See: https://github.com/yt-dlp/yt-dlp/issues/544
|
||||
SQLITE_AVAILABLE = False
|
||||
|
||||
|
||||
try:
|
||||
from Crypto.Cipher import AES
|
||||
CRYPTO_AVAILABLE = True
|
||||
except ImportError:
|
||||
CRYPTO_AVAILABLE = False
|
||||
|
||||
try:
|
||||
import keyring
|
||||
KEYRING_AVAILABLE = True
|
||||
KEYRING_UNAVAILABLE_REASON = f'due to unknown reasons{bug_reports_message()}'
|
||||
except ImportError:
|
||||
KEYRING_AVAILABLE = False
|
||||
KEYRING_UNAVAILABLE_REASON = (
|
||||
'as the `keyring` module is not installed. '
|
||||
'Please install by running `python3 -m pip install keyring`. '
|
||||
'Depending on your platform, additional packages may be required '
|
||||
'to access the keyring; see https://pypi.org/project/keyring')
|
||||
except Exception as _err:
|
||||
KEYRING_AVAILABLE = False
|
||||
KEYRING_UNAVAILABLE_REASON = 'as the `keyring` module could not be initialized: %s' % _err
|
||||
|
||||
|
||||
CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'}
|
||||
SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'}
|
||||
|
||||
|
||||
class YDLLogger:
|
||||
def __init__(self, ydl=None):
|
||||
self._ydl = ydl
|
||||
|
||||
def debug(self, message):
|
||||
if self._ydl:
|
||||
self._ydl.write_debug(message)
|
||||
|
||||
def info(self, message):
|
||||
if self._ydl:
|
||||
self._ydl.to_screen(f'[Cookies] {message}')
|
||||
|
||||
def warning(self, message, only_once=False):
|
||||
if self._ydl:
|
||||
self._ydl.report_warning(message, only_once)
|
||||
|
||||
def error(self, message):
|
||||
if self._ydl:
|
||||
self._ydl.report_error(message)
|
||||
|
||||
|
||||
def load_cookies(cookie_file, browser_specification, ydl):
|
||||
cookie_jars = []
|
||||
if browser_specification is not None:
|
||||
browser_name, profile = _parse_browser_specification(*browser_specification)
|
||||
cookie_jars.append(extract_cookies_from_browser(browser_name, profile, YDLLogger(ydl)))
|
||||
|
||||
if cookie_file is not None:
|
||||
cookie_file = expand_path(cookie_file)
|
||||
jar = YoutubeDLCookieJar(cookie_file)
|
||||
if os.access(cookie_file, os.R_OK):
|
||||
jar.load(ignore_discard=True, ignore_expires=True)
|
||||
cookie_jars.append(jar)
|
||||
|
||||
return _merge_cookie_jars(cookie_jars)
|
||||
|
||||
|
||||
def extract_cookies_from_browser(browser_name, profile=None, logger=YDLLogger()):
|
||||
if browser_name == 'firefox':
|
||||
return _extract_firefox_cookies(profile, logger)
|
||||
elif browser_name == 'safari':
|
||||
return _extract_safari_cookies(profile, logger)
|
||||
elif browser_name in CHROMIUM_BASED_BROWSERS:
|
||||
return _extract_chrome_cookies(browser_name, profile, logger)
|
||||
else:
|
||||
raise ValueError('unknown browser: {}'.format(browser_name))
|
||||
|
||||
|
||||
def _extract_firefox_cookies(profile, logger):
|
||||
logger.info('Extracting cookies from firefox')
|
||||
if not SQLITE_AVAILABLE:
|
||||
logger.warning('Cannot extract cookies from firefox without sqlite3 support. '
|
||||
'Please use a python interpreter compiled with sqlite3 support')
|
||||
return YoutubeDLCookieJar()
|
||||
|
||||
if profile is None:
|
||||
search_root = _firefox_browser_dir()
|
||||
elif _is_path(profile):
|
||||
search_root = profile
|
||||
else:
|
||||
search_root = os.path.join(_firefox_browser_dir(), profile)
|
||||
|
||||
cookie_database_path = _find_most_recently_used_file(search_root, 'cookies.sqlite')
|
||||
if cookie_database_path is None:
|
||||
raise FileNotFoundError('could not find firefox cookies database in {}'.format(search_root))
|
||||
logger.debug('extracting from: "{}"'.format(cookie_database_path))
|
||||
|
||||
with tempfile.TemporaryDirectory(prefix='youtube_dl') as tmpdir:
|
||||
cursor = None
|
||||
try:
|
||||
cursor = _open_database_copy(cookie_database_path, tmpdir)
|
||||
cursor.execute('SELECT host, name, value, path, expiry, isSecure FROM moz_cookies')
|
||||
jar = YoutubeDLCookieJar()
|
||||
for host, name, value, path, expiry, is_secure in cursor.fetchall():
|
||||
cookie = compat_cookiejar_Cookie(
|
||||
version=0, name=name, value=value, port=None, port_specified=False,
|
||||
domain=host, domain_specified=bool(host), domain_initial_dot=host.startswith('.'),
|
||||
path=path, path_specified=bool(path), secure=is_secure, expires=expiry, discard=False,
|
||||
comment=None, comment_url=None, rest={})
|
||||
jar.set_cookie(cookie)
|
||||
logger.info('Extracted {} cookies from firefox'.format(len(jar)))
|
||||
return jar
|
||||
finally:
|
||||
if cursor is not None:
|
||||
cursor.connection.close()
|
||||
|
||||
|
||||
def _firefox_browser_dir():
|
||||
if sys.platform in ('linux', 'linux2'):
|
||||
return os.path.expanduser('~/.mozilla/firefox')
|
||||
elif sys.platform == 'win32':
|
||||
return os.path.expandvars(r'%APPDATA%\Mozilla\Firefox\Profiles')
|
||||
elif sys.platform == 'darwin':
|
||||
return os.path.expanduser('~/Library/Application Support/Firefox')
|
||||
else:
|
||||
raise ValueError('unsupported platform: {}'.format(sys.platform))
|
||||
|
||||
|
||||
def _get_chromium_based_browser_settings(browser_name):
|
||||
# https://chromium.googlesource.com/chromium/src/+/HEAD/docs/user_data_dir.md
|
||||
if sys.platform in ('linux', 'linux2'):
|
||||
config = _config_home()
|
||||
browser_dir = {
|
||||
'brave': os.path.join(config, 'BraveSoftware/Brave-Browser'),
|
||||
'chrome': os.path.join(config, 'google-chrome'),
|
||||
'chromium': os.path.join(config, 'chromium'),
|
||||
'edge': os.path.join(config, 'microsoft-edge'),
|
||||
'opera': os.path.join(config, 'opera'),
|
||||
'vivaldi': os.path.join(config, 'vivaldi'),
|
||||
}[browser_name]
|
||||
|
||||
elif sys.platform == 'win32':
|
||||
appdata_local = os.path.expandvars('%LOCALAPPDATA%')
|
||||
appdata_roaming = os.path.expandvars('%APPDATA%')
|
||||
browser_dir = {
|
||||
'brave': os.path.join(appdata_local, r'BraveSoftware\Brave-Browser\User Data'),
|
||||
'chrome': os.path.join(appdata_local, r'Google\Chrome\User Data'),
|
||||
'chromium': os.path.join(appdata_local, r'Chromium\User Data'),
|
||||
'edge': os.path.join(appdata_local, r'Microsoft\Edge\User Data'),
|
||||
'opera': os.path.join(appdata_roaming, r'Opera Software\Opera Stable'),
|
||||
'vivaldi': os.path.join(appdata_local, r'Vivaldi\User Data'),
|
||||
}[browser_name]
|
||||
|
||||
elif sys.platform == 'darwin':
|
||||
appdata = os.path.expanduser('~/Library/Application Support')
|
||||
browser_dir = {
|
||||
'brave': os.path.join(appdata, 'BraveSoftware/Brave-Browser'),
|
||||
'chrome': os.path.join(appdata, 'Google/Chrome'),
|
||||
'chromium': os.path.join(appdata, 'Chromium'),
|
||||
'edge': os.path.join(appdata, 'Microsoft Edge'),
|
||||
'opera': os.path.join(appdata, 'com.operasoftware.Opera'),
|
||||
'vivaldi': os.path.join(appdata, 'Vivaldi'),
|
||||
}[browser_name]
|
||||
|
||||
else:
|
||||
raise ValueError('unsupported platform: {}'.format(sys.platform))
|
||||
|
||||
# Linux keyring names can be determined by snooping on dbus while opening the browser in KDE:
|
||||
# dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
|
||||
keyring_name = {
|
||||
'brave': 'Brave',
|
||||
'chrome': 'Chrome',
|
||||
'chromium': 'Chromium',
|
||||
'edge': 'Microsoft Edge' if sys.platform == 'darwin' else 'Chromium',
|
||||
'opera': 'Opera' if sys.platform == 'darwin' else 'Chromium',
|
||||
'vivaldi': 'Vivaldi' if sys.platform == 'darwin' else 'Chrome',
|
||||
}[browser_name]
|
||||
|
||||
browsers_without_profiles = {'opera'}
|
||||
|
||||
return {
|
||||
'browser_dir': browser_dir,
|
||||
'keyring_name': keyring_name,
|
||||
'supports_profiles': browser_name not in browsers_without_profiles
|
||||
}
|
||||
|
||||
|
||||
def _extract_chrome_cookies(browser_name, profile, logger):
|
||||
logger.info('Extracting cookies from {}'.format(browser_name))
|
||||
|
||||
if not SQLITE_AVAILABLE:
|
||||
logger.warning(('Cannot extract cookies from {} without sqlite3 support. '
|
||||
'Please use a python interpreter compiled with sqlite3 support').format(browser_name))
|
||||
return YoutubeDLCookieJar()
|
||||
|
||||
config = _get_chromium_based_browser_settings(browser_name)
|
||||
|
||||
if profile is None:
|
||||
search_root = config['browser_dir']
|
||||
elif _is_path(profile):
|
||||
search_root = profile
|
||||
config['browser_dir'] = os.path.dirname(profile) if config['supports_profiles'] else profile
|
||||
else:
|
||||
if config['supports_profiles']:
|
||||
search_root = os.path.join(config['browser_dir'], profile)
|
||||
else:
|
||||
logger.error('{} does not support profiles'.format(browser_name))
|
||||
search_root = config['browser_dir']
|
||||
|
||||
cookie_database_path = _find_most_recently_used_file(search_root, 'Cookies')
|
||||
if cookie_database_path is None:
|
||||
raise FileNotFoundError('could not find {} cookies database in "{}"'.format(browser_name, search_root))
|
||||
logger.debug('extracting from: "{}"'.format(cookie_database_path))
|
||||
|
||||
decryptor = get_cookie_decryptor(config['browser_dir'], config['keyring_name'], logger)
|
||||
|
||||
with tempfile.TemporaryDirectory(prefix='youtube_dl') as tmpdir:
|
||||
cursor = None
|
||||
try:
|
||||
cursor = _open_database_copy(cookie_database_path, tmpdir)
|
||||
cursor.connection.text_factory = bytes
|
||||
column_names = _get_column_names(cursor, 'cookies')
|
||||
secure_column = 'is_secure' if 'is_secure' in column_names else 'secure'
|
||||
cursor.execute('SELECT host_key, name, value, encrypted_value, path, '
|
||||
'expires_utc, {} FROM cookies'.format(secure_column))
|
||||
jar = YoutubeDLCookieJar()
|
||||
failed_cookies = 0
|
||||
for host_key, name, value, encrypted_value, path, expires_utc, is_secure in cursor.fetchall():
|
||||
host_key = host_key.decode('utf-8')
|
||||
name = name.decode('utf-8')
|
||||
value = value.decode('utf-8')
|
||||
path = path.decode('utf-8')
|
||||
|
||||
if not value and encrypted_value:
|
||||
value = decryptor.decrypt(encrypted_value)
|
||||
if value is None:
|
||||
failed_cookies += 1
|
||||
continue
|
||||
|
||||
cookie = compat_cookiejar_Cookie(
|
||||
version=0, name=name, value=value, port=None, port_specified=False,
|
||||
domain=host_key, domain_specified=bool(host_key), domain_initial_dot=host_key.startswith('.'),
|
||||
path=path, path_specified=bool(path), secure=is_secure, expires=expires_utc, discard=False,
|
||||
comment=None, comment_url=None, rest={})
|
||||
jar.set_cookie(cookie)
|
||||
if failed_cookies > 0:
|
||||
failed_message = ' ({} could not be decrypted)'.format(failed_cookies)
|
||||
else:
|
||||
failed_message = ''
|
||||
logger.info('Extracted {} cookies from {}{}'.format(len(jar), browser_name, failed_message))
|
||||
return jar
|
||||
finally:
|
||||
if cursor is not None:
|
||||
cursor.connection.close()
|
||||
|
||||
|
||||
class ChromeCookieDecryptor:
|
||||
"""
|
||||
Overview:
|
||||
|
||||
Linux:
|
||||
- cookies are either v10 or v11
|
||||
- v10: AES-CBC encrypted with a fixed key
|
||||
- v11: AES-CBC encrypted with an OS protected key (keyring)
|
||||
- v11 keys can be stored in various places depending on the activate desktop environment [2]
|
||||
|
||||
Mac:
|
||||
- cookies are either v10 or not v10
|
||||
- v10: AES-CBC encrypted with an OS protected key (keyring) and more key derivation iterations than linux
|
||||
- not v10: 'old data' stored as plaintext
|
||||
|
||||
Windows:
|
||||
- cookies are either v10 or not v10
|
||||
- v10: AES-GCM encrypted with a key which is encrypted with DPAPI
|
||||
- not v10: encrypted with DPAPI
|
||||
|
||||
Sources:
|
||||
- [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/
|
||||
- [2] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_linux.cc
|
||||
- KeyStorageLinux::CreateService
|
||||
"""
|
||||
|
||||
def decrypt(self, encrypted_value):
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
def get_cookie_decryptor(browser_root, browser_keyring_name, logger):
|
||||
if sys.platform in ('linux', 'linux2'):
|
||||
return LinuxChromeCookieDecryptor(browser_keyring_name, logger)
|
||||
elif sys.platform == 'darwin':
|
||||
return MacChromeCookieDecryptor(browser_keyring_name, logger)
|
||||
elif sys.platform == 'win32':
|
||||
return WindowsChromeCookieDecryptor(browser_root, logger)
|
||||
else:
|
||||
raise NotImplementedError('Chrome cookie decryption is not supported '
|
||||
'on this platform: {}'.format(sys.platform))
|
||||
|
||||
|
||||
class LinuxChromeCookieDecryptor(ChromeCookieDecryptor):
|
||||
def __init__(self, browser_keyring_name, logger):
|
||||
self._logger = logger
|
||||
self._v10_key = self.derive_key(b'peanuts')
|
||||
if KEYRING_AVAILABLE:
|
||||
self._v11_key = self.derive_key(_get_linux_keyring_password(browser_keyring_name))
|
||||
else:
|
||||
self._v11_key = None
|
||||
|
||||
@staticmethod
|
||||
def derive_key(password):
|
||||
# values from
|
||||
# https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_linux.cc
|
||||
return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1, key_length=16)
|
||||
|
||||
def decrypt(self, encrypted_value):
|
||||
version = encrypted_value[:3]
|
||||
ciphertext = encrypted_value[3:]
|
||||
|
||||
if version == b'v10':
|
||||
return _decrypt_aes_cbc(ciphertext, self._v10_key, self._logger)
|
||||
|
||||
elif version == b'v11':
|
||||
if self._v11_key is None:
|
||||
self._logger.warning(f'cannot decrypt cookie {KEYRING_UNAVAILABLE_REASON}', only_once=True)
|
||||
return None
|
||||
return _decrypt_aes_cbc(ciphertext, self._v11_key, self._logger)
|
||||
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
class MacChromeCookieDecryptor(ChromeCookieDecryptor):
|
||||
def __init__(self, browser_keyring_name, logger):
|
||||
self._logger = logger
|
||||
password = _get_mac_keyring_password(browser_keyring_name)
|
||||
self._v10_key = None if password is None else self.derive_key(password)
|
||||
|
||||
@staticmethod
|
||||
def derive_key(password):
|
||||
# values from
|
||||
# https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_mac.mm
|
||||
return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1003, key_length=16)
|
||||
|
||||
def decrypt(self, encrypted_value):
|
||||
version = encrypted_value[:3]
|
||||
ciphertext = encrypted_value[3:]
|
||||
|
||||
if version == b'v10':
|
||||
if self._v10_key is None:
|
||||
self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
|
||||
return None
|
||||
|
||||
return _decrypt_aes_cbc(ciphertext, self._v10_key, self._logger)
|
||||
|
||||
else:
|
||||
# other prefixes are considered 'old data' which were stored as plaintext
|
||||
# https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_mac.mm
|
||||
return encrypted_value
|
||||
|
||||
|
||||
class WindowsChromeCookieDecryptor(ChromeCookieDecryptor):
|
||||
def __init__(self, browser_root, logger):
|
||||
self._logger = logger
|
||||
self._v10_key = _get_windows_v10_key(browser_root, logger)
|
||||
|
||||
def decrypt(self, encrypted_value):
|
||||
version = encrypted_value[:3]
|
||||
ciphertext = encrypted_value[3:]
|
||||
|
||||
if version == b'v10':
|
||||
if self._v10_key is None:
|
||||
self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
|
||||
return None
|
||||
elif not CRYPTO_AVAILABLE:
|
||||
self._logger.warning('cannot decrypt cookie as the `pycryptodome` module is not installed. '
|
||||
'Please install by running `python3 -m pip install pycryptodome`',
|
||||
only_once=True)
|
||||
return None
|
||||
|
||||
# https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_win.cc
|
||||
# kNonceLength
|
||||
nonce_length = 96 // 8
|
||||
# boringssl
|
||||
# EVP_AEAD_AES_GCM_TAG_LEN
|
||||
authentication_tag_length = 16
|
||||
|
||||
raw_ciphertext = ciphertext
|
||||
nonce = raw_ciphertext[:nonce_length]
|
||||
ciphertext = raw_ciphertext[nonce_length:-authentication_tag_length]
|
||||
authentication_tag = raw_ciphertext[-authentication_tag_length:]
|
||||
|
||||
return _decrypt_aes_gcm(ciphertext, self._v10_key, nonce, authentication_tag, self._logger)
|
||||
|
||||
else:
|
||||
# any other prefix means the data is DPAPI encrypted
|
||||
# https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_win.cc
|
||||
return _decrypt_windows_dpapi(encrypted_value, self._logger).decode('utf-8')
|
||||
|
||||
|
||||
def _extract_safari_cookies(profile, logger):
|
||||
if profile is not None:
|
||||
logger.error('safari does not support profiles')
|
||||
if sys.platform != 'darwin':
|
||||
raise ValueError('unsupported platform: {}'.format(sys.platform))
|
||||
|
||||
cookies_path = os.path.expanduser('~/Library/Cookies/Cookies.binarycookies')
|
||||
|
||||
if not os.path.isfile(cookies_path):
|
||||
raise FileNotFoundError('could not find safari cookies database')
|
||||
|
||||
with open(cookies_path, 'rb') as f:
|
||||
cookies_data = f.read()
|
||||
|
||||
jar = parse_safari_cookies(cookies_data, logger=logger)
|
||||
logger.info('Extracted {} cookies from safari'.format(len(jar)))
|
||||
return jar
|
||||
|
||||
|
||||
class ParserError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class DataParser:
|
||||
def __init__(self, data, logger):
|
||||
self._data = data
|
||||
self.cursor = 0
|
||||
self._logger = logger
|
||||
|
||||
def read_bytes(self, num_bytes):
|
||||
if num_bytes < 0:
|
||||
raise ParserError('invalid read of {} bytes'.format(num_bytes))
|
||||
end = self.cursor + num_bytes
|
||||
if end > len(self._data):
|
||||
raise ParserError('reached end of input')
|
||||
data = self._data[self.cursor:end]
|
||||
self.cursor = end
|
||||
return data
|
||||
|
||||
def expect_bytes(self, expected_value, message):
|
||||
value = self.read_bytes(len(expected_value))
|
||||
if value != expected_value:
|
||||
raise ParserError('unexpected value: {} != {} ({})'.format(value, expected_value, message))
|
||||
|
||||
def read_uint(self, big_endian=False):
|
||||
data_format = '>I' if big_endian else '<I'
|
||||
return struct.unpack(data_format, self.read_bytes(4))[0]
|
||||
|
||||
def read_double(self, big_endian=False):
|
||||
data_format = '>d' if big_endian else '<d'
|
||||
return struct.unpack(data_format, self.read_bytes(8))[0]
|
||||
|
||||
def read_cstring(self):
|
||||
buffer = []
|
||||
while True:
|
||||
c = self.read_bytes(1)
|
||||
if c == b'\x00':
|
||||
return b''.join(buffer).decode('utf-8')
|
||||
else:
|
||||
buffer.append(c)
|
||||
|
||||
def skip(self, num_bytes, description='unknown'):
|
||||
if num_bytes > 0:
|
||||
self._logger.debug('skipping {} bytes ({}): {}'.format(
|
||||
num_bytes, description, self.read_bytes(num_bytes)))
|
||||
elif num_bytes < 0:
|
||||
raise ParserError('invalid skip of {} bytes'.format(num_bytes))
|
||||
|
||||
def skip_to(self, offset, description='unknown'):
|
||||
self.skip(offset - self.cursor, description)
|
||||
|
||||
def skip_to_end(self, description='unknown'):
|
||||
self.skip_to(len(self._data), description)
|
||||
|
||||
|
||||
def _mac_absolute_time_to_posix(timestamp):
|
||||
return int((datetime(2001, 1, 1, 0, 0, tzinfo=timezone.utc) + timedelta(seconds=timestamp)).timestamp())
|
||||
|
||||
|
||||
def _parse_safari_cookies_header(data, logger):
|
||||
p = DataParser(data, logger)
|
||||
p.expect_bytes(b'cook', 'database signature')
|
||||
number_of_pages = p.read_uint(big_endian=True)
|
||||
page_sizes = [p.read_uint(big_endian=True) for _ in range(number_of_pages)]
|
||||
return page_sizes, p.cursor
|
||||
|
||||
|
||||
def _parse_safari_cookies_page(data, jar, logger):
|
||||
p = DataParser(data, logger)
|
||||
p.expect_bytes(b'\x00\x00\x01\x00', 'page signature')
|
||||
number_of_cookies = p.read_uint()
|
||||
record_offsets = [p.read_uint() for _ in range(number_of_cookies)]
|
||||
if number_of_cookies == 0:
|
||||
logger.debug('a cookies page of size {} has no cookies'.format(len(data)))
|
||||
return
|
||||
|
||||
p.skip_to(record_offsets[0], 'unknown page header field')
|
||||
|
||||
for record_offset in record_offsets:
|
||||
p.skip_to(record_offset, 'space between records')
|
||||
record_length = _parse_safari_cookies_record(data[record_offset:], jar, logger)
|
||||
p.read_bytes(record_length)
|
||||
p.skip_to_end('space in between pages')
|
||||
|
||||
|
||||
def _parse_safari_cookies_record(data, jar, logger):
|
||||
p = DataParser(data, logger)
|
||||
record_size = p.read_uint()
|
||||
p.skip(4, 'unknown record field 1')
|
||||
flags = p.read_uint()
|
||||
is_secure = bool(flags & 0x0001)
|
||||
p.skip(4, 'unknown record field 2')
|
||||
domain_offset = p.read_uint()
|
||||
name_offset = p.read_uint()
|
||||
path_offset = p.read_uint()
|
||||
value_offset = p.read_uint()
|
||||
p.skip(8, 'unknown record field 3')
|
||||
expiration_date = _mac_absolute_time_to_posix(p.read_double())
|
||||
_creation_date = _mac_absolute_time_to_posix(p.read_double()) # noqa: F841
|
||||
|
||||
try:
|
||||
p.skip_to(domain_offset)
|
||||
domain = p.read_cstring()
|
||||
|
||||
p.skip_to(name_offset)
|
||||
name = p.read_cstring()
|
||||
|
||||
p.skip_to(path_offset)
|
||||
path = p.read_cstring()
|
||||
|
||||
p.skip_to(value_offset)
|
||||
value = p.read_cstring()
|
||||
except UnicodeDecodeError:
|
||||
logger.warning('failed to parse cookie because UTF-8 decoding failed')
|
||||
return record_size
|
||||
|
||||
p.skip_to(record_size, 'space at the end of the record')
|
||||
|
||||
cookie = compat_cookiejar_Cookie(
|
||||
version=0, name=name, value=value, port=None, port_specified=False,
|
||||
domain=domain, domain_specified=bool(domain), domain_initial_dot=domain.startswith('.'),
|
||||
path=path, path_specified=bool(path), secure=is_secure, expires=expiration_date, discard=False,
|
||||
comment=None, comment_url=None, rest={})
|
||||
jar.set_cookie(cookie)
|
||||
return record_size
|
||||
|
||||
|
||||
def parse_safari_cookies(data, jar=None, logger=YDLLogger()):
|
||||
"""
|
||||
References:
|
||||
- https://github.com/libyal/dtformats/blob/main/documentation/Safari%20Cookies.asciidoc
|
||||
- this data appears to be out of date but the important parts of the database structure is the same
|
||||
- there are a few bytes here and there which are skipped during parsing
|
||||
"""
|
||||
if jar is None:
|
||||
jar = YoutubeDLCookieJar()
|
||||
page_sizes, body_start = _parse_safari_cookies_header(data, logger)
|
||||
p = DataParser(data[body_start:], logger)
|
||||
for page_size in page_sizes:
|
||||
_parse_safari_cookies_page(p.read_bytes(page_size), jar, logger)
|
||||
p.skip_to_end('footer')
|
||||
return jar
|
||||
|
||||
|
||||
def _get_linux_keyring_password(browser_keyring_name):
|
||||
password = keyring.get_password('{} Keys'.format(browser_keyring_name),
|
||||
'{} Safe Storage'.format(browser_keyring_name))
|
||||
if password is None:
|
||||
# this sometimes occurs in KDE because chrome does not check hasEntry and instead
|
||||
# just tries to read the value (which kwallet returns "") whereas keyring checks hasEntry
|
||||
# to verify this:
|
||||
# dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
|
||||
# while starting chrome.
|
||||
# this may be a bug as the intended behaviour is to generate a random password and store
|
||||
# it, but that doesn't matter here.
|
||||
password = ''
|
||||
return password.encode('utf-8')
|
||||
|
||||
|
||||
def _get_mac_keyring_password(browser_keyring_name):
|
||||
if KEYRING_AVAILABLE:
|
||||
password = keyring.get_password('{} Safe Storage'.format(browser_keyring_name), browser_keyring_name)
|
||||
return password.encode('utf-8')
|
||||
else:
|
||||
proc = subprocess.Popen(['security', 'find-generic-password',
|
||||
'-w', # write password to stdout
|
||||
'-a', browser_keyring_name, # match 'account'
|
||||
'-s', '{} Safe Storage'.format(browser_keyring_name)], # match 'service'
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.DEVNULL)
|
||||
try:
|
||||
stdout, stderr = process_communicate_or_kill(proc)
|
||||
return stdout
|
||||
except BaseException:
|
||||
return None
|
||||
|
||||
|
||||
def _get_windows_v10_key(browser_root, logger):
|
||||
path = _find_most_recently_used_file(browser_root, 'Local State')
|
||||
if path is None:
|
||||
logger.error('could not find local state file')
|
||||
return None
|
||||
with open(path, 'r') as f:
|
||||
data = json.load(f)
|
||||
try:
|
||||
base64_key = data['os_crypt']['encrypted_key']
|
||||
except KeyError:
|
||||
logger.error('no encrypted key in Local State')
|
||||
return None
|
||||
encrypted_key = compat_b64decode(base64_key)
|
||||
prefix = b'DPAPI'
|
||||
if not encrypted_key.startswith(prefix):
|
||||
logger.error('invalid key')
|
||||
return None
|
||||
return _decrypt_windows_dpapi(encrypted_key[len(prefix):], logger)
|
||||
|
||||
|
||||
def pbkdf2_sha1(password, salt, iterations, key_length):
|
||||
return pbkdf2_hmac('sha1', password, salt, iterations, key_length)
|
||||
|
||||
|
||||
def _decrypt_aes_cbc(ciphertext, key, logger, initialization_vector=b' ' * 16):
|
||||
plaintext = aes_cbc_decrypt(bytes_to_intlist(ciphertext),
|
||||
bytes_to_intlist(key),
|
||||
bytes_to_intlist(initialization_vector))
|
||||
padding_length = plaintext[-1]
|
||||
try:
|
||||
return intlist_to_bytes(plaintext[:-padding_length]).decode('utf-8')
|
||||
except UnicodeDecodeError:
|
||||
logger.warning('failed to decrypt cookie because UTF-8 decoding failed. Possibly the key is wrong?')
|
||||
return None
|
||||
|
||||
|
||||
def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger):
|
||||
cipher = AES.new(key, AES.MODE_GCM, nonce)
|
||||
try:
|
||||
plaintext = cipher.decrypt_and_verify(ciphertext, authentication_tag)
|
||||
except ValueError:
|
||||
logger.warning('failed to decrypt cookie because the MAC check failed. Possibly the key is wrong?')
|
||||
return None
|
||||
|
||||
try:
|
||||
return plaintext.decode('utf-8')
|
||||
except UnicodeDecodeError:
|
||||
logger.warning('failed to decrypt cookie because UTF-8 decoding failed. Possibly the key is wrong?')
|
||||
return None
|
||||
|
||||
|
||||
def _decrypt_windows_dpapi(ciphertext, logger):
|
||||
"""
|
||||
References:
|
||||
- https://docs.microsoft.com/en-us/windows/win32/api/dpapi/nf-dpapi-cryptunprotectdata
|
||||
"""
|
||||
from ctypes.wintypes import DWORD
|
||||
|
||||
class DATA_BLOB(ctypes.Structure):
|
||||
_fields_ = [('cbData', DWORD),
|
||||
('pbData', ctypes.POINTER(ctypes.c_char))]
|
||||
|
||||
buffer = ctypes.create_string_buffer(ciphertext)
|
||||
blob_in = DATA_BLOB(ctypes.sizeof(buffer), buffer)
|
||||
blob_out = DATA_BLOB()
|
||||
ret = ctypes.windll.crypt32.CryptUnprotectData(
|
||||
ctypes.byref(blob_in), # pDataIn
|
||||
None, # ppszDataDescr: human readable description of pDataIn
|
||||
None, # pOptionalEntropy: salt?
|
||||
None, # pvReserved: must be NULL
|
||||
None, # pPromptStruct: information about prompts to display
|
||||
0, # dwFlags
|
||||
ctypes.byref(blob_out) # pDataOut
|
||||
)
|
||||
if not ret:
|
||||
logger.warning('failed to decrypt with DPAPI')
|
||||
return None
|
||||
|
||||
result = ctypes.string_at(blob_out.pbData, blob_out.cbData)
|
||||
ctypes.windll.kernel32.LocalFree(blob_out.pbData)
|
||||
return result
|
||||
|
||||
|
||||
def _config_home():
|
||||
return os.environ.get('XDG_CONFIG_HOME', os.path.expanduser('~/.config'))
|
||||
|
||||
|
||||
def _open_database_copy(database_path, tmpdir):
|
||||
# cannot open sqlite databases if they are already in use (e.g. by the browser)
|
||||
database_copy_path = os.path.join(tmpdir, 'temporary.sqlite')
|
||||
shutil.copy(database_path, database_copy_path)
|
||||
conn = sqlite3.connect(database_copy_path)
|
||||
return conn.cursor()
|
||||
|
||||
|
||||
def _get_column_names(cursor, table_name):
|
||||
table_info = cursor.execute('PRAGMA table_info({})'.format(table_name)).fetchall()
|
||||
return [row[1].decode('utf-8') for row in table_info]
|
||||
|
||||
|
||||
def _find_most_recently_used_file(root, filename):
|
||||
# if there are multiple browser profiles, take the most recently used one
|
||||
paths = []
|
||||
for root, dirs, files in os.walk(root):
|
||||
for file in files:
|
||||
if file == filename:
|
||||
paths.append(os.path.join(root, file))
|
||||
return None if not paths else max(paths, key=lambda path: os.lstat(path).st_mtime)
|
||||
|
||||
|
||||
def _merge_cookie_jars(jars):
|
||||
output_jar = YoutubeDLCookieJar()
|
||||
for jar in jars:
|
||||
for cookie in jar:
|
||||
output_jar.set_cookie(cookie)
|
||||
if jar.filename is not None:
|
||||
output_jar.filename = jar.filename
|
||||
return output_jar
|
||||
|
||||
|
||||
def _is_path(value):
|
||||
return os.path.sep in value
|
||||
|
||||
|
||||
def _parse_browser_specification(browser_name, profile=None):
|
||||
if browser_name not in SUPPORTED_BROWSERS:
|
||||
raise ValueError(f'unsupported browser: "{browser_name}"')
|
||||
if profile is not None and _is_path(profile):
|
||||
profile = os.path.expanduser(profile)
|
||||
return browser_name, profile
|
||||
@@ -1,5 +1,6 @@
|
||||
from __future__ import division, unicode_literals
|
||||
|
||||
import copy
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
@@ -360,7 +361,7 @@ class FileDownloader(object):
|
||||
'filename': filename,
|
||||
'status': 'finished',
|
||||
'total_bytes': os.path.getsize(encodeFilename(filename)),
|
||||
})
|
||||
}, info_dict)
|
||||
return True, False
|
||||
|
||||
if subtitle is False:
|
||||
@@ -388,7 +389,16 @@ class FileDownloader(object):
|
||||
"""Real download process. Redefine in subclasses."""
|
||||
raise NotImplementedError('This method must be implemented by subclasses')
|
||||
|
||||
def _hook_progress(self, status):
|
||||
def _hook_progress(self, status, info_dict):
|
||||
if not self._progress_hooks:
|
||||
return
|
||||
info_dict = dict(info_dict)
|
||||
for key in ('__original_infodict', '__postprocessors'):
|
||||
info_dict.pop(key, None)
|
||||
# youtube-dl passes the same status object to all the hooks.
|
||||
# Some third party scripts seems to be relying on this.
|
||||
# So keep this behavior if possible
|
||||
status['info_dict'] = copy.deepcopy(info_dict)
|
||||
for ph in self._progress_hooks:
|
||||
ph(status)
|
||||
|
||||
|
||||
@@ -29,7 +29,7 @@ class DashSegmentsFD(FragmentFD):
|
||||
if real_downloader:
|
||||
self._prepare_external_frag_download(ctx)
|
||||
else:
|
||||
self._prepare_and_start_frag_download(ctx)
|
||||
self._prepare_and_start_frag_download(ctx, info_dict)
|
||||
|
||||
fragments_to_download = []
|
||||
frag_index = 0
|
||||
@@ -57,9 +57,6 @@ class DashSegmentsFD(FragmentFD):
|
||||
# TODO: Make progress updates work without hooking twice
|
||||
# for ph in self._progress_hooks:
|
||||
# fd.add_progress_hook(ph)
|
||||
success = fd.real_download(filename, info_copy)
|
||||
if not success:
|
||||
return False
|
||||
else:
|
||||
self.download_and_append_fragments(ctx, fragments_to_download, info_dict)
|
||||
return True
|
||||
return fd.real_download(filename, info_copy)
|
||||
|
||||
return self.download_and_append_fragments(ctx, fragments_to_download, info_dict)
|
||||
|
||||
@@ -67,7 +67,7 @@ class ExternalFD(FileDownloader):
|
||||
'downloaded_bytes': fsize,
|
||||
'total_bytes': fsize,
|
||||
})
|
||||
self._hook_progress(status)
|
||||
self._hook_progress(status, info_dict)
|
||||
return True
|
||||
else:
|
||||
self.to_stderr('\n')
|
||||
@@ -377,8 +377,6 @@ class FFmpegFD(ExternalFD):
|
||||
# http://trac.ffmpeg.org/ticket/6125#comment:10
|
||||
args += ['-seekable', '1' if seekable else '0']
|
||||
|
||||
args += self._configuration_args()
|
||||
|
||||
# start_time = info_dict.get('start_time') or 0
|
||||
# if start_time:
|
||||
# args += ['-ss', compat_str(start_time)]
|
||||
@@ -446,7 +444,8 @@ class FFmpegFD(ExternalFD):
|
||||
|
||||
for url in urls:
|
||||
args += ['-i', url]
|
||||
args += ['-c', 'copy']
|
||||
|
||||
args += self._configuration_args() + ['-c', 'copy']
|
||||
if info_dict.get('requested_formats'):
|
||||
for (i, fmt) in enumerate(info_dict['requested_formats']):
|
||||
if fmt.get('acodec') != 'none':
|
||||
|
||||
@@ -380,7 +380,7 @@ class F4mFD(FragmentFD):
|
||||
|
||||
base_url_parsed = compat_urllib_parse_urlparse(base_url)
|
||||
|
||||
self._start_frag_download(ctx)
|
||||
self._start_frag_download(ctx, info_dict)
|
||||
|
||||
frag_index = 0
|
||||
while fragments_list:
|
||||
@@ -434,6 +434,6 @@ class F4mFD(FragmentFD):
|
||||
msg = 'Missed %d fragments' % (fragments_list[0][1] - (frag_i + 1))
|
||||
self.report_warning(msg)
|
||||
|
||||
self._finish_frag_download(ctx)
|
||||
self._finish_frag_download(ctx, info_dict)
|
||||
|
||||
return True
|
||||
|
||||
@@ -83,9 +83,9 @@ class FragmentFD(FileDownloader):
|
||||
headers = info_dict.get('http_headers')
|
||||
return sanitized_Request(url, None, headers) if headers else url
|
||||
|
||||
def _prepare_and_start_frag_download(self, ctx):
|
||||
def _prepare_and_start_frag_download(self, ctx, info_dict):
|
||||
self._prepare_frag_download(ctx)
|
||||
self._start_frag_download(ctx)
|
||||
self._start_frag_download(ctx, info_dict)
|
||||
|
||||
def __do_ytdl_file(self, ctx):
|
||||
return not ctx['live'] and not ctx['tmpfilename'] == '-' and not self.params.get('_no_ytdl_file')
|
||||
@@ -219,7 +219,7 @@ class FragmentFD(FileDownloader):
|
||||
'complete_frags_downloaded_bytes': resume_len,
|
||||
})
|
||||
|
||||
def _start_frag_download(self, ctx):
|
||||
def _start_frag_download(self, ctx, info_dict):
|
||||
resume_len = ctx['complete_frags_downloaded_bytes']
|
||||
total_frags = ctx['total_frags']
|
||||
# This dict stores the download progress, it's updated by the progress
|
||||
@@ -248,6 +248,7 @@ class FragmentFD(FileDownloader):
|
||||
time_now = time.time()
|
||||
state['elapsed'] = time_now - start
|
||||
frag_total_bytes = s.get('total_bytes') or 0
|
||||
s['fragment_info_dict'] = s.pop('info_dict', {})
|
||||
if not ctx['live']:
|
||||
estimated_size = (
|
||||
(ctx['complete_frags_downloaded_bytes'] + frag_total_bytes)
|
||||
@@ -270,13 +271,13 @@ class FragmentFD(FileDownloader):
|
||||
state['speed'] = s.get('speed') or ctx.get('speed')
|
||||
ctx['speed'] = state['speed']
|
||||
ctx['prev_frag_downloaded_bytes'] = frag_downloaded_bytes
|
||||
self._hook_progress(state)
|
||||
self._hook_progress(state, info_dict)
|
||||
|
||||
ctx['dl'].add_progress_hook(frag_progress_hook)
|
||||
|
||||
return start
|
||||
|
||||
def _finish_frag_download(self, ctx):
|
||||
def _finish_frag_download(self, ctx, info_dict):
|
||||
ctx['dest_stream'].close()
|
||||
if self.__do_ytdl_file(ctx):
|
||||
ytdl_filename = encodeFilename(self.ytdl_filename(ctx['filename']))
|
||||
@@ -303,7 +304,7 @@ class FragmentFD(FileDownloader):
|
||||
'filename': ctx['filename'],
|
||||
'status': 'finished',
|
||||
'elapsed': elapsed,
|
||||
})
|
||||
}, info_dict)
|
||||
|
||||
def _prepare_external_frag_download(self, ctx):
|
||||
if 'live' not in ctx:
|
||||
@@ -328,8 +329,7 @@ class FragmentFD(FileDownloader):
|
||||
|
||||
def download_and_append_fragments(self, ctx, fragments, info_dict, pack_func=None):
|
||||
fragment_retries = self.params.get('fragment_retries', 0)
|
||||
skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
|
||||
test = self.params.get('test', False)
|
||||
is_fatal = (lambda idx: idx == 0) if self.params.get('skip_unavailable_fragments', True) else (lambda _: True)
|
||||
if not pack_func:
|
||||
pack_func = lambda frag_content, _: frag_content
|
||||
|
||||
@@ -341,7 +341,7 @@ class FragmentFD(FileDownloader):
|
||||
headers['Range'] = 'bytes=%d-%d' % (byte_range['start'], byte_range['end'] - 1)
|
||||
|
||||
# Never skip the first fragment
|
||||
fatal = (fragment.get('index') or frag_index) == 0 or not skip_unavailable_fragments
|
||||
fatal = is_fatal(fragment.get('index') or (frag_index - 1))
|
||||
count, frag_content = 0, None
|
||||
while count <= fragment_retries:
|
||||
try:
|
||||
@@ -382,14 +382,13 @@ class FragmentFD(FileDownloader):
|
||||
# Don't decrypt the content in tests since the data is explicitly truncated and it's not to a valid block
|
||||
# size (see https://github.com/ytdl-org/youtube-dl/pull/27660). Tests only care that the correct data downloaded,
|
||||
# not what it decrypts to.
|
||||
if test:
|
||||
if self.params.get('test', False):
|
||||
return frag_content
|
||||
return AES.new(decrypt_info['KEY'], AES.MODE_CBC, iv).decrypt(frag_content)
|
||||
|
||||
def append_fragment(frag_content, frag_index, ctx):
|
||||
if not frag_content:
|
||||
fatal = frag_index == 1 or not skip_unavailable_fragments
|
||||
if not fatal:
|
||||
if not is_fatal(frag_index - 1):
|
||||
self.report_skip_fragment(frag_index)
|
||||
return True
|
||||
else:
|
||||
@@ -404,13 +403,9 @@ class FragmentFD(FileDownloader):
|
||||
if can_threaded_download and max_workers > 1:
|
||||
|
||||
def _download_fragment(fragment):
|
||||
try:
|
||||
ctx_copy = ctx.copy()
|
||||
frag_content, frag_index = download_fragment(fragment, ctx_copy)
|
||||
return fragment, frag_content, frag_index, ctx_copy.get('fragment_filename_sanitized')
|
||||
except Exception:
|
||||
# Return immediately on exception so that it is raised in the main thread
|
||||
return
|
||||
ctx_copy = ctx.copy()
|
||||
frag_content, frag_index = download_fragment(fragment, ctx_copy)
|
||||
return fragment, frag_content, frag_index, ctx_copy.get('fragment_filename_sanitized')
|
||||
|
||||
self.report_warning('The download speed shown is only of one thread. This is a known issue and patches are welcome')
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers) as pool:
|
||||
@@ -427,4 +422,5 @@ class FragmentFD(FileDownloader):
|
||||
if not result:
|
||||
return False
|
||||
|
||||
self._finish_frag_download(ctx)
|
||||
self._finish_frag_download(ctx, info_dict)
|
||||
return True
|
||||
|
||||
@@ -133,7 +133,7 @@ class HlsFD(FragmentFD):
|
||||
if real_downloader:
|
||||
self._prepare_external_frag_download(ctx)
|
||||
else:
|
||||
self._prepare_and_start_frag_download(ctx)
|
||||
self._prepare_and_start_frag_download(ctx, info_dict)
|
||||
|
||||
extra_state = ctx.setdefault('extra_state', {})
|
||||
|
||||
@@ -250,78 +250,75 @@ class HlsFD(FragmentFD):
|
||||
# TODO: Make progress updates work without hooking twice
|
||||
# for ph in self._progress_hooks:
|
||||
# fd.add_progress_hook(ph)
|
||||
success = fd.real_download(filename, info_copy)
|
||||
if not success:
|
||||
return False
|
||||
return fd.real_download(filename, info_copy)
|
||||
|
||||
if is_webvtt:
|
||||
def pack_fragment(frag_content, frag_index):
|
||||
output = io.StringIO()
|
||||
adjust = 0
|
||||
for block in webvtt.parse_fragment(frag_content):
|
||||
if isinstance(block, webvtt.CueBlock):
|
||||
block.start += adjust
|
||||
block.end += adjust
|
||||
|
||||
dedup_window = extra_state.setdefault('webvtt_dedup_window', [])
|
||||
cue = block.as_json
|
||||
|
||||
# skip the cue if an identical one appears
|
||||
# in the window of potential duplicates
|
||||
# and prune the window of unviable candidates
|
||||
i = 0
|
||||
skip = True
|
||||
while i < len(dedup_window):
|
||||
window_cue = dedup_window[i]
|
||||
if window_cue == cue:
|
||||
break
|
||||
if window_cue['end'] >= cue['start']:
|
||||
i += 1
|
||||
continue
|
||||
del dedup_window[i]
|
||||
else:
|
||||
skip = False
|
||||
|
||||
if skip:
|
||||
continue
|
||||
|
||||
# add the cue to the window
|
||||
dedup_window.append(cue)
|
||||
elif isinstance(block, webvtt.Magic):
|
||||
# take care of MPEG PES timestamp overflow
|
||||
if block.mpegts is None:
|
||||
block.mpegts = 0
|
||||
extra_state.setdefault('webvtt_mpegts_adjust', 0)
|
||||
block.mpegts += extra_state['webvtt_mpegts_adjust'] << 33
|
||||
if block.mpegts < extra_state.get('webvtt_mpegts_last', 0):
|
||||
extra_state['webvtt_mpegts_adjust'] += 1
|
||||
block.mpegts += 1 << 33
|
||||
extra_state['webvtt_mpegts_last'] = block.mpegts
|
||||
|
||||
if frag_index == 1:
|
||||
extra_state['webvtt_mpegts'] = block.mpegts or 0
|
||||
extra_state['webvtt_local'] = block.local or 0
|
||||
# XXX: block.local = block.mpegts = None ?
|
||||
else:
|
||||
if block.mpegts is not None and block.local is not None:
|
||||
adjust = (
|
||||
(block.mpegts - extra_state.get('webvtt_mpegts', 0))
|
||||
- (block.local - extra_state.get('webvtt_local', 0))
|
||||
)
|
||||
continue
|
||||
elif isinstance(block, webvtt.HeaderBlock):
|
||||
if frag_index != 1:
|
||||
# XXX: this should probably be silent as well
|
||||
# or verify that all segments contain the same data
|
||||
self.report_warning(bug_reports_message(
|
||||
'Discarding a %s block found in the middle of the stream; '
|
||||
'if the subtitles display incorrectly,'
|
||||
% (type(block).__name__)))
|
||||
continue
|
||||
block.write_into(output)
|
||||
|
||||
return output.getvalue().encode('utf-8')
|
||||
else:
|
||||
if is_webvtt:
|
||||
def pack_fragment(frag_content, frag_index):
|
||||
output = io.StringIO()
|
||||
adjust = 0
|
||||
for block in webvtt.parse_fragment(frag_content):
|
||||
if isinstance(block, webvtt.CueBlock):
|
||||
block.start += adjust
|
||||
block.end += adjust
|
||||
|
||||
dedup_window = extra_state.setdefault('webvtt_dedup_window', [])
|
||||
cue = block.as_json
|
||||
|
||||
# skip the cue if an identical one appears
|
||||
# in the window of potential duplicates
|
||||
# and prune the window of unviable candidates
|
||||
i = 0
|
||||
skip = True
|
||||
while i < len(dedup_window):
|
||||
window_cue = dedup_window[i]
|
||||
if window_cue == cue:
|
||||
break
|
||||
if window_cue['end'] >= cue['start']:
|
||||
i += 1
|
||||
continue
|
||||
del dedup_window[i]
|
||||
else:
|
||||
skip = False
|
||||
|
||||
if skip:
|
||||
continue
|
||||
|
||||
# add the cue to the window
|
||||
dedup_window.append(cue)
|
||||
elif isinstance(block, webvtt.Magic):
|
||||
# take care of MPEG PES timestamp overflow
|
||||
if block.mpegts is None:
|
||||
block.mpegts = 0
|
||||
extra_state.setdefault('webvtt_mpegts_adjust', 0)
|
||||
block.mpegts += extra_state['webvtt_mpegts_adjust'] << 33
|
||||
if block.mpegts < extra_state.get('webvtt_mpegts_last', 0):
|
||||
extra_state['webvtt_mpegts_adjust'] += 1
|
||||
block.mpegts += 1 << 33
|
||||
extra_state['webvtt_mpegts_last'] = block.mpegts
|
||||
|
||||
if frag_index == 1:
|
||||
extra_state['webvtt_mpegts'] = block.mpegts or 0
|
||||
extra_state['webvtt_local'] = block.local or 0
|
||||
# XXX: block.local = block.mpegts = None ?
|
||||
else:
|
||||
if block.mpegts is not None and block.local is not None:
|
||||
adjust = (
|
||||
(block.mpegts - extra_state.get('webvtt_mpegts', 0))
|
||||
- (block.local - extra_state.get('webvtt_local', 0))
|
||||
)
|
||||
continue
|
||||
elif isinstance(block, webvtt.HeaderBlock):
|
||||
if frag_index != 1:
|
||||
# XXX: this should probably be silent as well
|
||||
# or verify that all segments contain the same data
|
||||
self.report_warning(bug_reports_message(
|
||||
'Discarding a %s block found in the middle of the stream; '
|
||||
'if the subtitles display incorrectly,'
|
||||
% (type(block).__name__)))
|
||||
continue
|
||||
block.write_into(output)
|
||||
|
||||
return output.getvalue().encode('utf-8')
|
||||
else:
|
||||
pack_fragment = None
|
||||
self.download_and_append_fragments(ctx, fragments, info_dict, pack_fragment)
|
||||
return True
|
||||
pack_fragment = None
|
||||
return self.download_and_append_fragments(ctx, fragments, info_dict, pack_fragment)
|
||||
|
||||
@@ -177,7 +177,7 @@ class HttpFD(FileDownloader):
|
||||
'status': 'finished',
|
||||
'downloaded_bytes': ctx.resume_len,
|
||||
'total_bytes': ctx.resume_len,
|
||||
})
|
||||
}, info_dict)
|
||||
raise SucceedDownload()
|
||||
else:
|
||||
# The length does not match, we start the download over
|
||||
@@ -310,7 +310,7 @@ class HttpFD(FileDownloader):
|
||||
'eta': eta,
|
||||
'speed': speed,
|
||||
'elapsed': now - ctx.start_time,
|
||||
})
|
||||
}, info_dict)
|
||||
|
||||
if data_len is not None and byte_counter == data_len:
|
||||
break
|
||||
@@ -357,7 +357,7 @@ class HttpFD(FileDownloader):
|
||||
'filename': ctx.filename,
|
||||
'status': 'finished',
|
||||
'elapsed': time.time() - ctx.start_time,
|
||||
})
|
||||
}, info_dict)
|
||||
|
||||
return True
|
||||
|
||||
|
||||
@@ -246,7 +246,7 @@ class IsmFD(FragmentFD):
|
||||
'total_frags': len(segments),
|
||||
}
|
||||
|
||||
self._prepare_and_start_frag_download(ctx)
|
||||
self._prepare_and_start_frag_download(ctx, info_dict)
|
||||
|
||||
extra_state = ctx.setdefault('extra_state', {
|
||||
'ism_track_written': False,
|
||||
@@ -284,6 +284,6 @@ class IsmFD(FragmentFD):
|
||||
self.report_error('giving up after %s fragment retries' % fragment_retries)
|
||||
return False
|
||||
|
||||
self._finish_frag_download(ctx)
|
||||
self._finish_frag_download(ctx, info_dict)
|
||||
|
||||
return True
|
||||
|
||||
@@ -122,7 +122,7 @@ body > figure > img {
|
||||
'total_frags': len(fragments),
|
||||
}
|
||||
|
||||
self._prepare_and_start_frag_download(ctx)
|
||||
self._prepare_and_start_frag_download(ctx, info_dict)
|
||||
|
||||
extra_state = ctx.setdefault('extra_state', {
|
||||
'header_written': False,
|
||||
@@ -198,5 +198,5 @@ body > figure > img {
|
||||
|
||||
ctx['dest_stream'].write(
|
||||
b'--%b--\r\n\r\n' % frag_boundary.encode('us-ascii'))
|
||||
self._finish_frag_download(ctx)
|
||||
self._finish_frag_download(ctx, info_dict)
|
||||
return True
|
||||
|
||||
@@ -66,7 +66,7 @@ class RtmpFD(FileDownloader):
|
||||
'eta': eta,
|
||||
'elapsed': time_now - start,
|
||||
'speed': speed,
|
||||
})
|
||||
}, info_dict)
|
||||
cursor_in_new_line = False
|
||||
else:
|
||||
# no percent for live streams
|
||||
@@ -82,7 +82,7 @@ class RtmpFD(FileDownloader):
|
||||
'status': 'downloading',
|
||||
'elapsed': time_now - start,
|
||||
'speed': speed,
|
||||
})
|
||||
}, info_dict)
|
||||
cursor_in_new_line = False
|
||||
elif self.params.get('verbose', False):
|
||||
if not cursor_in_new_line:
|
||||
@@ -208,7 +208,7 @@ class RtmpFD(FileDownloader):
|
||||
'filename': filename,
|
||||
'status': 'finished',
|
||||
'elapsed': time.time() - started,
|
||||
})
|
||||
}, info_dict)
|
||||
return True
|
||||
else:
|
||||
self.to_stderr('\n')
|
||||
|
||||
@@ -39,7 +39,7 @@ class RtspFD(FileDownloader):
|
||||
'total_bytes': fsize,
|
||||
'filename': filename,
|
||||
'status': 'finished',
|
||||
})
|
||||
}, info_dict)
|
||||
return True
|
||||
else:
|
||||
self.to_stderr('\n')
|
||||
|
||||
@@ -44,7 +44,7 @@ class YoutubeLiveChatFD(FragmentFD):
|
||||
return self._download_fragment(ctx, url, info_dict, http_headers, data)
|
||||
|
||||
def parse_actions_replay(live_chat_continuation):
|
||||
offset = continuation_id = None
|
||||
offset = continuation_id = click_tracking_params = None
|
||||
processed_fragment = bytearray()
|
||||
for action in live_chat_continuation.get('actions', []):
|
||||
if 'replayChatItemAction' in action:
|
||||
@@ -53,17 +53,34 @@ class YoutubeLiveChatFD(FragmentFD):
|
||||
processed_fragment.extend(
|
||||
json.dumps(action, ensure_ascii=False).encode('utf-8') + b'\n')
|
||||
if offset is not None:
|
||||
continuation_id = try_get(
|
||||
continuation = try_get(
|
||||
live_chat_continuation,
|
||||
lambda x: x['continuations'][0]['liveChatReplayContinuationData']['continuation'])
|
||||
lambda x: x['continuations'][0]['liveChatReplayContinuationData'], dict)
|
||||
if continuation:
|
||||
continuation_id = continuation.get('continuation')
|
||||
click_tracking_params = continuation.get('clickTrackingParams')
|
||||
self._append_fragment(ctx, processed_fragment)
|
||||
return continuation_id, offset
|
||||
return continuation_id, offset, click_tracking_params
|
||||
|
||||
def try_refresh_replay_beginning(live_chat_continuation):
|
||||
# choose the second option that contains the unfiltered live chat replay
|
||||
refresh_continuation = try_get(
|
||||
live_chat_continuation,
|
||||
lambda x: x['header']['liveChatHeaderRenderer']['viewSelector']['sortFilterSubMenuRenderer']['subMenuItems'][1]['continuation']['reloadContinuationData'], dict)
|
||||
if refresh_continuation:
|
||||
# no data yet but required to call _append_fragment
|
||||
self._append_fragment(ctx, b'')
|
||||
refresh_continuation_id = refresh_continuation.get('continuation')
|
||||
offset = 0
|
||||
click_tracking_params = refresh_continuation.get('trackingParams')
|
||||
return refresh_continuation_id, offset, click_tracking_params
|
||||
return parse_actions_replay(live_chat_continuation)
|
||||
|
||||
live_offset = 0
|
||||
|
||||
def parse_actions_live(live_chat_continuation):
|
||||
nonlocal live_offset
|
||||
continuation_id = None
|
||||
continuation_id = click_tracking_params = None
|
||||
processed_fragment = bytearray()
|
||||
for action in live_chat_continuation.get('actions', []):
|
||||
timestamp = self.parse_live_timestamp(action)
|
||||
@@ -84,45 +101,52 @@ class YoutubeLiveChatFD(FragmentFD):
|
||||
continuation_data = try_get(live_chat_continuation, continuation_data_getters, dict)
|
||||
if continuation_data:
|
||||
continuation_id = continuation_data.get('continuation')
|
||||
click_tracking_params = continuation_data.get('clickTrackingParams')
|
||||
timeout_ms = int_or_none(continuation_data.get('timeoutMs'))
|
||||
if timeout_ms is not None:
|
||||
time.sleep(timeout_ms / 1000)
|
||||
self._append_fragment(ctx, processed_fragment)
|
||||
return continuation_id, live_offset
|
||||
return continuation_id, live_offset, click_tracking_params
|
||||
|
||||
if info_dict['protocol'] == 'youtube_live_chat_replay':
|
||||
parse_actions = parse_actions_replay
|
||||
elif info_dict['protocol'] == 'youtube_live_chat':
|
||||
parse_actions = parse_actions_live
|
||||
|
||||
def download_and_parse_fragment(url, frag_index, request_data, headers):
|
||||
def download_and_parse_fragment(url, frag_index, request_data=None, headers=None):
|
||||
count = 0
|
||||
while count <= fragment_retries:
|
||||
try:
|
||||
success, raw_fragment = dl_fragment(url, request_data, headers)
|
||||
if not success:
|
||||
return False, None, None
|
||||
data = json.loads(raw_fragment)
|
||||
return False, None, None, None
|
||||
try:
|
||||
data = ie.extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace'))
|
||||
except RegexNotFoundError:
|
||||
data = None
|
||||
if not data:
|
||||
data = json.loads(raw_fragment)
|
||||
live_chat_continuation = try_get(
|
||||
data,
|
||||
lambda x: x['continuationContents']['liveChatContinuation'], dict) or {}
|
||||
continuation_id, offset = parse_actions(live_chat_continuation)
|
||||
return True, continuation_id, offset
|
||||
if info_dict['protocol'] == 'youtube_live_chat_replay':
|
||||
if frag_index == 1:
|
||||
continuation_id, offset, click_tracking_params = try_refresh_replay_beginning(live_chat_continuation)
|
||||
else:
|
||||
continuation_id, offset, click_tracking_params = parse_actions_replay(live_chat_continuation)
|
||||
elif info_dict['protocol'] == 'youtube_live_chat':
|
||||
continuation_id, offset, click_tracking_params = parse_actions_live(live_chat_continuation)
|
||||
return True, continuation_id, offset, click_tracking_params
|
||||
except compat_urllib_error.HTTPError as err:
|
||||
count += 1
|
||||
if count <= fragment_retries:
|
||||
self.report_retry_fragment(err, frag_index, count, fragment_retries)
|
||||
if count > fragment_retries:
|
||||
self.report_error('giving up after %s fragment retries' % fragment_retries)
|
||||
return False, None, None
|
||||
return False, None, None, None
|
||||
|
||||
self._prepare_and_start_frag_download(ctx)
|
||||
self._prepare_and_start_frag_download(ctx, info_dict)
|
||||
|
||||
success, raw_fragment = dl_fragment(info_dict['url'])
|
||||
if not success:
|
||||
return False
|
||||
try:
|
||||
data = ie._extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace'))
|
||||
data = ie.extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace'))
|
||||
except RegexNotFoundError:
|
||||
return False
|
||||
continuation_id = try_get(
|
||||
@@ -131,7 +155,7 @@ class YoutubeLiveChatFD(FragmentFD):
|
||||
# no data yet but required to call _append_fragment
|
||||
self._append_fragment(ctx, b'')
|
||||
|
||||
ytcfg = ie._extract_ytcfg(video_id, raw_fragment.decode('utf-8', 'replace'))
|
||||
ytcfg = ie.extract_ytcfg(video_id, raw_fragment.decode('utf-8', 'replace'))
|
||||
|
||||
if not ytcfg:
|
||||
return False
|
||||
@@ -142,10 +166,13 @@ class YoutubeLiveChatFD(FragmentFD):
|
||||
visitor_data = try_get(innertube_context, lambda x: x['client']['visitorData'], str)
|
||||
if info_dict['protocol'] == 'youtube_live_chat_replay':
|
||||
url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat_replay?key=' + api_key
|
||||
chat_page_url = 'https://www.youtube.com/live_chat_replay?continuation=' + continuation_id
|
||||
elif info_dict['protocol'] == 'youtube_live_chat':
|
||||
url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat?key=' + api_key
|
||||
chat_page_url = 'https://www.youtube.com/live_chat?continuation=' + continuation_id
|
||||
|
||||
frag_index = offset = 0
|
||||
click_tracking_params = None
|
||||
while continuation_id is not None:
|
||||
frag_index += 1
|
||||
request_data = {
|
||||
@@ -154,17 +181,22 @@ class YoutubeLiveChatFD(FragmentFD):
|
||||
}
|
||||
if frag_index > 1:
|
||||
request_data['currentPlayerState'] = {'playerOffsetMs': str(max(offset - 5000, 0))}
|
||||
headers = ie._generate_api_headers(ytcfg, visitor_data=visitor_data)
|
||||
headers.update({'content-type': 'application/json'})
|
||||
fragment_request_data = json.dumps(request_data, ensure_ascii=False).encode('utf-8') + b'\n'
|
||||
success, continuation_id, offset = download_and_parse_fragment(
|
||||
url, frag_index, fragment_request_data, headers)
|
||||
if click_tracking_params:
|
||||
request_data['context']['clickTracking'] = {'clickTrackingParams': click_tracking_params}
|
||||
headers = ie.generate_api_headers(ytcfg, visitor_data=visitor_data)
|
||||
headers.update({'content-type': 'application/json'})
|
||||
fragment_request_data = json.dumps(request_data, ensure_ascii=False).encode('utf-8') + b'\n'
|
||||
success, continuation_id, offset, click_tracking_params = download_and_parse_fragment(
|
||||
url, frag_index, fragment_request_data, headers)
|
||||
else:
|
||||
success, continuation_id, offset, click_tracking_params = download_and_parse_fragment(
|
||||
chat_page_url, frag_index)
|
||||
if not success:
|
||||
return False
|
||||
if test:
|
||||
break
|
||||
|
||||
self._finish_frag_download(ctx)
|
||||
self._finish_frag_download(ctx, info_dict)
|
||||
return True
|
||||
|
||||
@staticmethod
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
import xml.etree.ElementTree as etree
|
||||
@@ -61,6 +62,11 @@ MSO_INFO = {
|
||||
'username_field': 'IDToken1',
|
||||
'password_field': 'IDToken2',
|
||||
},
|
||||
'Spectrum': {
|
||||
'name': 'Spectrum',
|
||||
'username_field': 'IDToken1',
|
||||
'password_field': 'IDToken2',
|
||||
},
|
||||
'Philo': {
|
||||
'name': 'Philo',
|
||||
'username_field': 'ident'
|
||||
@@ -1524,6 +1530,41 @@ class AdobePassIE(InfoExtractor):
|
||||
}), headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded'
|
||||
})
|
||||
elif mso_id == 'Spectrum':
|
||||
# Spectrum's login for is dynamically loaded via JS so we need to hardcode the flow
|
||||
# as a one-off implementation.
|
||||
provider_redirect_page, urlh = provider_redirect_page_res
|
||||
provider_login_page_res = post_form(
|
||||
provider_redirect_page_res, self._DOWNLOADING_LOGIN_PAGE)
|
||||
saml_login_page, urlh = provider_login_page_res
|
||||
relay_state = self._search_regex(
|
||||
r'RelayState\s*=\s*"(?P<relay>.+?)";',
|
||||
saml_login_page, 'RelayState', group='relay')
|
||||
saml_request = self._search_regex(
|
||||
r'SAMLRequest\s*=\s*"(?P<saml_request>.+?)";',
|
||||
saml_login_page, 'SAMLRequest', group='saml_request')
|
||||
login_json = {
|
||||
mso_info['username_field']: username,
|
||||
mso_info['password_field']: password,
|
||||
'RelayState': relay_state,
|
||||
'SAMLRequest': saml_request,
|
||||
}
|
||||
saml_response_json = self._download_json(
|
||||
'https://tveauthn.spectrum.net/tveauthentication/api/v1/manualAuth', video_id,
|
||||
'Downloading SAML Response',
|
||||
data=json.dumps(login_json).encode(),
|
||||
headers={
|
||||
'Content-Type': 'application/json',
|
||||
'Accept': 'application/json',
|
||||
})
|
||||
self._download_webpage(
|
||||
saml_response_json['SAMLRedirectUri'], video_id,
|
||||
'Confirming Login', data=urlencode_postdata({
|
||||
'SAMLResponse': saml_response_json['SAMLResponse'],
|
||||
'RelayState': relay_state,
|
||||
}), headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded'
|
||||
})
|
||||
else:
|
||||
# Some providers (e.g. DIRECTV NOW) have another meta refresh
|
||||
# based redirect that should be followed.
|
||||
|
||||
@@ -8,6 +8,9 @@ from ..utils import (
|
||||
smuggle_url,
|
||||
update_url_query,
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
try_get,
|
||||
dict_get,
|
||||
)
|
||||
|
||||
|
||||
@@ -24,6 +27,11 @@ class BravoTVIE(AdobePassIE):
|
||||
'uploader': 'NBCU-BRAV',
|
||||
'upload_date': '20190314',
|
||||
'timestamp': 1552591860,
|
||||
'season_number': 16,
|
||||
'episode_number': 15,
|
||||
'series': 'Top Chef',
|
||||
'episode': 'The Top Chef Season 16 Winner Is...',
|
||||
'duration': 190.0,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.bravotv.com/below-deck/season-3/ep-14-reunion-part-1',
|
||||
@@ -79,12 +87,34 @@ class BravoTVIE(AdobePassIE):
|
||||
'episode_number': int_or_none(metadata.get('episode_num')),
|
||||
})
|
||||
query['switch'] = 'progressive'
|
||||
|
||||
tp_url = 'http://link.theplatform.com/s/%s/%s' % (account_pid, tp_path)
|
||||
|
||||
tp_metadata = self._download_json(
|
||||
update_url_query(tp_url, {'format': 'preview'}),
|
||||
display_id, fatal=False)
|
||||
if tp_metadata:
|
||||
info.update({
|
||||
'title': tp_metadata.get('title'),
|
||||
'description': tp_metadata.get('description'),
|
||||
'duration': float_or_none(tp_metadata.get('duration'), 1000),
|
||||
'season_number': int_or_none(
|
||||
dict_get(tp_metadata, ('pl1$seasonNumber', 'nbcu$seasonNumber'))),
|
||||
'episode_number': int_or_none(
|
||||
dict_get(tp_metadata, ('pl1$episodeNumber', 'nbcu$episodeNumber'))),
|
||||
# For some reason the series is sometimes wrapped into a single element array.
|
||||
'series': try_get(
|
||||
dict_get(tp_metadata, ('pl1$show', 'nbcu$show')),
|
||||
lambda x: x[0] if isinstance(x, list) else x,
|
||||
expected_type=str),
|
||||
'episode': dict_get(
|
||||
tp_metadata, ('pl1$episodeName', 'nbcu$episodeName', 'title')),
|
||||
})
|
||||
|
||||
info.update({
|
||||
'_type': 'url_transparent',
|
||||
'id': release_pid,
|
||||
'url': smuggle_url(update_url_query(
|
||||
'http://link.theplatform.com/s/%s/%s' % (account_pid, tp_path),
|
||||
query), {'force_smil_url': True}),
|
||||
'url': smuggle_url(update_url_query(tp_url, query), {'force_smil_url': True}),
|
||||
'ie_key': 'ThePlatform',
|
||||
})
|
||||
return info
|
||||
|
||||
@@ -19,7 +19,6 @@ from ..compat import (
|
||||
compat_etree_Element,
|
||||
compat_etree_fromstring,
|
||||
compat_getpass,
|
||||
compat_integer_types,
|
||||
compat_http_client,
|
||||
compat_os_name,
|
||||
compat_str,
|
||||
@@ -70,6 +69,7 @@ from ..utils import (
|
||||
str_or_none,
|
||||
str_to_int,
|
||||
strip_or_none,
|
||||
traverse_obj,
|
||||
unescapeHTML,
|
||||
unified_strdate,
|
||||
unified_timestamp,
|
||||
@@ -78,6 +78,7 @@ from ..utils import (
|
||||
urljoin,
|
||||
url_basename,
|
||||
url_or_none,
|
||||
variadic,
|
||||
xpath_element,
|
||||
xpath_text,
|
||||
xpath_with_ns,
|
||||
@@ -228,6 +229,7 @@ class InfoExtractor(object):
|
||||
* "resolution" (optional, string "{width}x{height}",
|
||||
deprecated)
|
||||
* "filesize" (optional, int)
|
||||
* "_test_url" (optional, bool) - If true, test the URL
|
||||
thumbnail: Full URL to a video thumbnail image.
|
||||
description: Full video description.
|
||||
uploader: Full name of the video uploader.
|
||||
@@ -295,6 +297,8 @@ class InfoExtractor(object):
|
||||
live stream that goes on instead of a fixed-length video.
|
||||
was_live: True, False, or None (=unknown). Whether this video was
|
||||
originally a live stream.
|
||||
live_status: 'is_live', 'upcoming', 'was_live', 'not_live' or None (=unknown)
|
||||
If absent, automatically set from is_live, was_live
|
||||
start_time: Time in seconds where the reproduction should start, as
|
||||
specified in the URL.
|
||||
end_time: Time in seconds where the reproduction should end, as
|
||||
@@ -627,14 +631,10 @@ class InfoExtractor(object):
|
||||
assert isinstance(err, compat_urllib_error.HTTPError)
|
||||
if expected_status is None:
|
||||
return False
|
||||
if isinstance(expected_status, compat_integer_types):
|
||||
return err.code == expected_status
|
||||
elif isinstance(expected_status, (list, tuple)):
|
||||
return err.code in expected_status
|
||||
elif callable(expected_status):
|
||||
return expected_status(err.code) is True
|
||||
else:
|
||||
assert False
|
||||
return err.code in variadic(expected_status)
|
||||
|
||||
def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, data=None, headers={}, query={}, expected_status=None):
|
||||
"""
|
||||
@@ -1037,7 +1037,9 @@ class InfoExtractor(object):
|
||||
metadata_available=False, method='any'):
|
||||
if metadata_available and self.get_param('ignore_no_formats_error'):
|
||||
self.report_warning(msg)
|
||||
raise ExtractorError('%s. %s' % (msg, self._LOGIN_HINTS[method]), expected=True)
|
||||
if method is not None:
|
||||
msg = '%s. %s' % (msg, self._LOGIN_HINTS[method])
|
||||
raise ExtractorError(msg, expected=True)
|
||||
|
||||
def raise_geo_restricted(
|
||||
self, msg='This video is not available from your location due to geo restriction',
|
||||
@@ -1112,6 +1114,8 @@ class InfoExtractor(object):
|
||||
if group is None:
|
||||
# return the first matching group
|
||||
return next(g for g in mobj.groups() if g is not None)
|
||||
elif isinstance(group, (list, tuple)):
|
||||
return tuple(mobj.group(g) for g in group)
|
||||
else:
|
||||
return mobj.group(group)
|
||||
elif default is not NO_DEFAULT:
|
||||
@@ -1204,8 +1208,7 @@ class InfoExtractor(object):
|
||||
[^>]+?content=(["\'])(?P<content>.*?)\2''' % re.escape(prop)
|
||||
|
||||
def _og_search_property(self, prop, html, name=None, **kargs):
|
||||
if not isinstance(prop, (list, tuple)):
|
||||
prop = [prop]
|
||||
prop = variadic(prop)
|
||||
if name is None:
|
||||
name = 'OpenGraph %s' % prop[0]
|
||||
og_regexes = []
|
||||
@@ -1235,8 +1238,7 @@ class InfoExtractor(object):
|
||||
return self._og_search_property('url', html, **kargs)
|
||||
|
||||
def _html_search_meta(self, name, html, display_name=None, fatal=False, **kwargs):
|
||||
if not isinstance(name, (list, tuple)):
|
||||
name = [name]
|
||||
name = variadic(name)
|
||||
if display_name is None:
|
||||
display_name = name[0]
|
||||
return self._html_search_regex(
|
||||
@@ -1978,24 +1980,33 @@ class InfoExtractor(object):
|
||||
preference=None, quality=None, m3u8_id=None, live=False, note=None,
|
||||
errnote=None, fatal=True, data=None, headers={}, query={},
|
||||
video_id=None):
|
||||
formats, subtitles = [], {}
|
||||
|
||||
if '#EXT-X-FAXS-CM:' in m3u8_doc: # Adobe Flash Access
|
||||
return [], {}
|
||||
return formats, subtitles
|
||||
|
||||
if (not self.get_param('allow_unplayable_formats')
|
||||
and re.search(r'#EXT-X-SESSION-KEY:.*?URI="skd://', m3u8_doc)): # Apple FairPlay
|
||||
return [], {}
|
||||
return formats, subtitles
|
||||
|
||||
formats = []
|
||||
def format_url(url):
|
||||
return url if re.match(r'^https?://', url) else compat_urlparse.urljoin(m3u8_url, url)
|
||||
|
||||
subtitles = {}
|
||||
if self.get_param('hls_split_discontinuity', False):
|
||||
def _extract_m3u8_playlist_indices(manifest_url=None, m3u8_doc=None):
|
||||
if not m3u8_doc:
|
||||
if not manifest_url:
|
||||
return []
|
||||
m3u8_doc = self._download_webpage(
|
||||
manifest_url, video_id, fatal=fatal, data=data, headers=headers,
|
||||
note=False, errnote='Failed to download m3u8 playlist information')
|
||||
if m3u8_doc is False:
|
||||
return []
|
||||
return range(1 + sum(line.startswith('#EXT-X-DISCONTINUITY') for line in m3u8_doc.splitlines()))
|
||||
|
||||
format_url = lambda u: (
|
||||
u
|
||||
if re.match(r'^https?://', u)
|
||||
else compat_urlparse.urljoin(m3u8_url, u))
|
||||
|
||||
split_discontinuity = self.get_param('hls_split_discontinuity', False)
|
||||
else:
|
||||
def _extract_m3u8_playlist_indices(*args, **kwargs):
|
||||
return [None]
|
||||
|
||||
# References:
|
||||
# 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-21
|
||||
@@ -2013,68 +2024,16 @@ class InfoExtractor(object):
|
||||
# media playlist and MUST NOT appear in master playlist thus we can
|
||||
# clearly detect media playlist with this criterion.
|
||||
|
||||
def _extract_m3u8_playlist_formats(format_url=None, m3u8_doc=None, video_id=None,
|
||||
fatal=True, data=None, headers={}):
|
||||
if not m3u8_doc:
|
||||
if not format_url:
|
||||
return []
|
||||
res = self._download_webpage_handle(
|
||||
format_url, video_id,
|
||||
note=False,
|
||||
errnote='Failed to download m3u8 playlist information',
|
||||
fatal=fatal, data=data, headers=headers)
|
||||
|
||||
if res is False:
|
||||
return []
|
||||
|
||||
m3u8_doc, urlh = res
|
||||
format_url = urlh.geturl()
|
||||
|
||||
playlist_formats = []
|
||||
i = (
|
||||
0
|
||||
if split_discontinuity
|
||||
else None)
|
||||
format_info = {
|
||||
'index': i,
|
||||
'key_data': None,
|
||||
'files': [],
|
||||
}
|
||||
for line in m3u8_doc.splitlines():
|
||||
if not line.startswith('#'):
|
||||
format_info['files'].append(line)
|
||||
elif split_discontinuity and line.startswith('#EXT-X-DISCONTINUITY'):
|
||||
i += 1
|
||||
playlist_formats.append(format_info)
|
||||
format_info = {
|
||||
'index': i,
|
||||
'url': format_url,
|
||||
'files': [],
|
||||
}
|
||||
playlist_formats.append(format_info)
|
||||
return playlist_formats
|
||||
|
||||
if '#EXT-X-TARGETDURATION' in m3u8_doc: # media playlist, return as is
|
||||
|
||||
playlist_formats = _extract_m3u8_playlist_formats(m3u8_doc=m3u8_doc)
|
||||
|
||||
for format in playlist_formats:
|
||||
format_id = []
|
||||
if m3u8_id:
|
||||
format_id.append(m3u8_id)
|
||||
format_index = format.get('index')
|
||||
if format_index:
|
||||
format_id.append(str(format_index))
|
||||
f = {
|
||||
'format_id': '-'.join(format_id),
|
||||
'format_index': format_index,
|
||||
'url': m3u8_url,
|
||||
'ext': ext,
|
||||
'protocol': entry_protocol,
|
||||
'preference': preference,
|
||||
'quality': quality,
|
||||
}
|
||||
formats.append(f)
|
||||
formats = [{
|
||||
'format_id': '-'.join(map(str, filter(None, [m3u8_id, idx]))),
|
||||
'format_index': idx,
|
||||
'url': m3u8_url,
|
||||
'ext': ext,
|
||||
'protocol': entry_protocol,
|
||||
'preference': preference,
|
||||
'quality': quality,
|
||||
} for idx in _extract_m3u8_playlist_indices(m3u8_doc=m3u8_doc)]
|
||||
|
||||
return formats, subtitles
|
||||
|
||||
@@ -2114,32 +2073,19 @@ class InfoExtractor(object):
|
||||
media_url = media.get('URI')
|
||||
if media_url:
|
||||
manifest_url = format_url(media_url)
|
||||
format_id = []
|
||||
playlist_formats = _extract_m3u8_playlist_formats(manifest_url, video_id=video_id,
|
||||
fatal=fatal, data=data, headers=headers)
|
||||
|
||||
for format in playlist_formats:
|
||||
format_index = format.get('index')
|
||||
for v in (m3u8_id, group_id, name):
|
||||
if v:
|
||||
format_id.append(v)
|
||||
if format_index:
|
||||
format_id.append(str(format_index))
|
||||
f = {
|
||||
'format_id': '-'.join(format_id),
|
||||
'format_note': name,
|
||||
'format_index': format_index,
|
||||
'url': manifest_url,
|
||||
'manifest_url': m3u8_url,
|
||||
'language': media.get('LANGUAGE'),
|
||||
'ext': ext,
|
||||
'protocol': entry_protocol,
|
||||
'preference': preference,
|
||||
'quality': quality,
|
||||
}
|
||||
if media_type == 'AUDIO':
|
||||
f['vcodec'] = 'none'
|
||||
formats.append(f)
|
||||
formats.extend({
|
||||
'format_id': '-'.join(map(str, filter(None, (m3u8_id, group_id, name, idx)))),
|
||||
'format_note': name,
|
||||
'format_index': idx,
|
||||
'url': manifest_url,
|
||||
'manifest_url': m3u8_url,
|
||||
'language': media.get('LANGUAGE'),
|
||||
'ext': ext,
|
||||
'protocol': entry_protocol,
|
||||
'preference': preference,
|
||||
'quality': quality,
|
||||
'vcodec': 'none' if media_type == 'AUDIO' else None,
|
||||
} for idx in _extract_m3u8_playlist_indices(manifest_url))
|
||||
|
||||
def build_stream_name():
|
||||
# Despite specification does not mention NAME attribute for
|
||||
@@ -2178,25 +2124,17 @@ class InfoExtractor(object):
|
||||
or last_stream_inf.get('BANDWIDTH'), scale=1000)
|
||||
manifest_url = format_url(line.strip())
|
||||
|
||||
playlist_formats = _extract_m3u8_playlist_formats(manifest_url, video_id=video_id,
|
||||
fatal=fatal, data=data, headers=headers)
|
||||
|
||||
for frmt in playlist_formats:
|
||||
format_id = []
|
||||
if m3u8_id:
|
||||
format_id.append(m3u8_id)
|
||||
format_index = frmt.get('index')
|
||||
stream_name = build_stream_name()
|
||||
for idx in _extract_m3u8_playlist_indices(manifest_url):
|
||||
format_id = [m3u8_id, None, idx]
|
||||
# Bandwidth of live streams may differ over time thus making
|
||||
# format_id unpredictable. So it's better to keep provided
|
||||
# format_id intact.
|
||||
if not live:
|
||||
format_id.append(stream_name if stream_name else '%d' % (tbr if tbr else len(formats)))
|
||||
if format_index:
|
||||
format_id.append(str(format_index))
|
||||
stream_name = build_stream_name()
|
||||
format_id[1] = stream_name if stream_name else '%d' % (tbr if tbr else len(formats))
|
||||
f = {
|
||||
'format_id': '-'.join(format_id),
|
||||
'format_index': format_index,
|
||||
'format_id': '-'.join(map(str, filter(None, format_id))),
|
||||
'format_index': idx,
|
||||
'url': manifest_url,
|
||||
'manifest_url': m3u8_url,
|
||||
'tbr': tbr,
|
||||
@@ -2271,7 +2209,7 @@ class InfoExtractor(object):
|
||||
out.append('{%s}%s' % (namespace, c))
|
||||
return '/'.join(out)
|
||||
|
||||
def _extract_smil_formats(self, smil_url, video_id, fatal=True, f4m_params=None, transform_source=None):
|
||||
def _extract_smil_formats_and_subtitles(self, smil_url, video_id, fatal=True, f4m_params=None, transform_source=None):
|
||||
smil = self._download_smil(smil_url, video_id, fatal=fatal, transform_source=transform_source)
|
||||
|
||||
if smil is False:
|
||||
@@ -2280,8 +2218,21 @@ class InfoExtractor(object):
|
||||
|
||||
namespace = self._parse_smil_namespace(smil)
|
||||
|
||||
return self._parse_smil_formats(
|
||||
fmts = self._parse_smil_formats(
|
||||
smil, smil_url, video_id, namespace=namespace, f4m_params=f4m_params)
|
||||
subs = self._parse_smil_subtitles(
|
||||
smil, namespace=namespace)
|
||||
|
||||
return fmts, subs
|
||||
|
||||
def _extract_smil_formats(self, *args, **kwargs):
|
||||
fmts, subs = self._extract_smil_formats_and_subtitles(*args, **kwargs)
|
||||
if subs:
|
||||
self.report_warning(bug_reports_message(
|
||||
"Ignoring subtitle tracks found in the SMIL manifest; "
|
||||
"if any subtitle tracks are missing,"
|
||||
))
|
||||
return fmts
|
||||
|
||||
def _extract_smil_info(self, smil_url, video_id, fatal=True, f4m_params=None):
|
||||
smil = self._download_smil(smil_url, video_id, fatal=fatal)
|
||||
@@ -3505,16 +3456,8 @@ class InfoExtractor(object):
|
||||
return ret
|
||||
|
||||
@classmethod
|
||||
def _merge_subtitles(cls, *dicts, **kwargs):
|
||||
def _merge_subtitles(cls, *dicts, target=None):
|
||||
""" Merge subtitle dictionaries, language by language. """
|
||||
|
||||
target = (lambda target=None: target)(**kwargs)
|
||||
# The above lambda extracts the keyword argument 'target' from kwargs
|
||||
# while ensuring there are no stray ones. When Python 2 support
|
||||
# is dropped, remove it and change the function signature to:
|
||||
#
|
||||
# def _merge_subtitles(cls, *dicts, target=None):
|
||||
|
||||
if target is None:
|
||||
target = {}
|
||||
for d in dicts:
|
||||
@@ -3567,6 +3510,19 @@ class InfoExtractor(object):
|
||||
else 'public' if all_known
|
||||
else None)
|
||||
|
||||
def _configuration_arg(self, key, default=NO_DEFAULT, casesense=False):
|
||||
'''
|
||||
@returns A list of values for the extractor argument given by "key"
|
||||
or "default" if no such key is present
|
||||
@param default The default value to return when the key is not present (default: [])
|
||||
@param casesense When false, the values are converted to lower case
|
||||
'''
|
||||
val = traverse_obj(
|
||||
self._downloader.params, ('extractor_args', self.ie_key().lower(), key))
|
||||
if val is None:
|
||||
return [] if default is NO_DEFAULT else default
|
||||
return list(val) if casesense else [x.lower() for x in val]
|
||||
|
||||
|
||||
class SearchInfoExtractor(InfoExtractor):
|
||||
"""
|
||||
|
||||
@@ -636,7 +636,7 @@ class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE):
|
||||
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?!(?:news|anime-news|library|forum|launchcalendar|lineup|store|comics|freetrial|login|media-\d+))(?P<id>[\w\-]+))/?(?:\?|$)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.crunchyroll.com/a-bridge-to-the-starry-skies-hoshizora-e-kakaru-hashi',
|
||||
'url': 'https://www.crunchyroll.com/a-bridge-to-the-starry-skies-hoshizora-e-kakaru-hashi',
|
||||
'info_dict': {
|
||||
'id': 'a-bridge-to-the-starry-skies-hoshizora-e-kakaru-hashi',
|
||||
'title': 'A Bridge to the Starry Skies - Hoshizora e Kakaru Hashi'
|
||||
@@ -661,7 +661,8 @@ class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE):
|
||||
show_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
self._add_skip_wall(url), show_id,
|
||||
# https:// gives a 403, but http:// does not
|
||||
self._add_skip_wall(url).replace('https://', 'http://'), show_id,
|
||||
headers=self.geo_verification_headers())
|
||||
title = self._html_search_meta('name', webpage, default=None)
|
||||
|
||||
|
||||
145
yt_dlp/extractor/douyin.py
Normal file
145
yt_dlp/extractor/douyin.py
Normal file
@@ -0,0 +1,145 @@
|
||||
# coding: utf-8
|
||||
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
traverse_obj,
|
||||
url_or_none,
|
||||
)
|
||||
from .common import (
|
||||
InfoExtractor,
|
||||
compat_urllib_parse_unquote,
|
||||
)
|
||||
|
||||
|
||||
class DouyinIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?douyin\.com/video/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.douyin.com/video/6961737553342991651',
|
||||
'md5': '10523312c8b8100f353620ac9dc8f067',
|
||||
'info_dict': {
|
||||
'id': '6961737553342991651',
|
||||
'ext': 'mp4',
|
||||
'title': '#杨超越 小小水手带你去远航❤️',
|
||||
'uploader': '杨超越',
|
||||
'upload_date': '20210513',
|
||||
'timestamp': 1620905839,
|
||||
'uploader_id': '110403406559',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.douyin.com/video/6982497745948921092',
|
||||
'md5': 'd78408c984b9b5102904cf6b6bc2d712',
|
||||
'info_dict': {
|
||||
'id': '6982497745948921092',
|
||||
'ext': 'mp4',
|
||||
'title': '这个夏日和小羊@杨超越 一起遇见白色幻想',
|
||||
'uploader': '杨超越工作室',
|
||||
'upload_date': '20210708',
|
||||
'timestamp': 1625739481,
|
||||
'uploader_id': '408654318141572',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.douyin.com/video/6953975910773099811',
|
||||
'md5': '72e882e24f75064c218b76c8b713c185',
|
||||
'info_dict': {
|
||||
'id': '6953975910773099811',
|
||||
'ext': 'mp4',
|
||||
'title': '#一起看海 出现在你的夏日里',
|
||||
'uploader': '杨超越',
|
||||
'upload_date': '20210422',
|
||||
'timestamp': 1619098692,
|
||||
'uploader_id': '110403406559',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.douyin.com/video/6950251282489675042',
|
||||
'md5': 'b4db86aec367ef810ddd38b1737d2fed',
|
||||
'info_dict': {
|
||||
'id': '6950251282489675042',
|
||||
'ext': 'mp4',
|
||||
'title': '哈哈哈,成功了哈哈哈哈哈哈',
|
||||
'uploader': '杨超越',
|
||||
'upload_date': '20210412',
|
||||
'timestamp': 1618231483,
|
||||
'uploader_id': '110403406559',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.douyin.com/video/6963263655114722595',
|
||||
'md5': '1abe1c477d05ee62efb40bf2329957cf',
|
||||
'info_dict': {
|
||||
'id': '6963263655114722595',
|
||||
'ext': 'mp4',
|
||||
'title': '#哪个爱豆的105度最甜 换个角度看看我哈哈',
|
||||
'uploader': '杨超越',
|
||||
'upload_date': '20210517',
|
||||
'timestamp': 1621261163,
|
||||
'uploader_id': '110403406559',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
render_data = self._parse_json(
|
||||
self._search_regex(
|
||||
r'<script [^>]*\bid=[\'"]RENDER_DATA[\'"][^>]*>(%7B.+%7D)</script>',
|
||||
webpage, 'render data'),
|
||||
video_id, transform_source=compat_urllib_parse_unquote)
|
||||
details = traverse_obj(render_data, (..., 'aweme', 'detail'), get_all=False)
|
||||
|
||||
thumbnails = [{'url': self._proto_relative_url(url)} for url in traverse_obj(
|
||||
details, ('video', ('cover', 'dynamicCover', 'originCover')), expected_type=url_or_none, default=[])]
|
||||
|
||||
common = {
|
||||
'width': traverse_obj(details, ('video', 'width'), expected_type=int),
|
||||
'height': traverse_obj(details, ('video', 'height'), expected_type=int),
|
||||
'ext': 'mp4',
|
||||
}
|
||||
formats = [{**common, 'url': self._proto_relative_url(url)} for url in traverse_obj(
|
||||
details, ('video', 'playAddr', ..., 'src'), expected_type=url_or_none, default=[]) if url]
|
||||
self._remove_duplicate_formats(formats)
|
||||
|
||||
download_url = traverse_obj(details, ('download', 'url'), expected_type=url_or_none)
|
||||
if download_url:
|
||||
formats.append({
|
||||
**common,
|
||||
'format_id': 'download',
|
||||
'url': self._proto_relative_url(download_url),
|
||||
'quality': 1,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': details.get('desc') or self._html_search_meta('title', webpage),
|
||||
'formats': formats,
|
||||
'thumbnails': thumbnails,
|
||||
'uploader': traverse_obj(details, ('authorInfo', 'nickname'), expected_type=str),
|
||||
'uploader_id': traverse_obj(details, ('authorInfo', 'uid'), expected_type=str),
|
||||
'uploader_url': 'https://www.douyin.com/user/%s' % traverse_obj(
|
||||
details, ('authorInfo', 'secUid'), expected_type=str),
|
||||
'timestamp': int_or_none(details.get('createTime')),
|
||||
'duration': traverse_obj(details, ('video', 'duration'), expected_type=int),
|
||||
'view_count': traverse_obj(details, ('stats', 'playCount'), expected_type=int),
|
||||
'like_count': traverse_obj(details, ('stats', 'diggCount'), expected_type=int),
|
||||
'repost_count': traverse_obj(details, ('stats', 'shareCount'), expected_type=int),
|
||||
'comment_count': traverse_obj(details, ('stats', 'commentCount'), expected_type=int),
|
||||
}
|
||||
@@ -321,6 +321,7 @@ from .discoveryplusindia import (
|
||||
DiscoveryPlusIndiaShowIE,
|
||||
)
|
||||
from .dotsub import DotsubIE
|
||||
from .douyin import DouyinIE
|
||||
from .douyutv import (
|
||||
DouyuShowIE,
|
||||
DouyuTVIE,
|
||||
@@ -398,7 +399,11 @@ from .facebook import (
|
||||
FacebookIE,
|
||||
FacebookPluginsVideoIE,
|
||||
)
|
||||
from .fancode import FancodeVodIE
|
||||
from .fancode import (
|
||||
FancodeVodIE,
|
||||
FancodeLiveIE
|
||||
)
|
||||
|
||||
from .faz import FazIE
|
||||
from .fc2 import (
|
||||
FC2IE,
|
||||
@@ -455,7 +460,11 @@ from .frontendmasters import (
|
||||
FrontendMastersCourseIE
|
||||
)
|
||||
from .fujitv import FujiTVFODPlus7IE
|
||||
from .funimation import FunimationIE
|
||||
from .funimation import (
|
||||
FunimationIE,
|
||||
FunimationPageIE,
|
||||
FunimationShowIE,
|
||||
)
|
||||
from .funk import FunkIE
|
||||
from .fusion import FusionIE
|
||||
from .gaia import GaiaIE
|
||||
@@ -1007,6 +1016,7 @@ from .popcorntimes import PopcorntimesIE
|
||||
from .popcorntv import PopcornTVIE
|
||||
from .porn91 import Porn91IE
|
||||
from .porncom import PornComIE
|
||||
from .pornflip import PornFlipIE
|
||||
from .pornhd import PornHdIE
|
||||
from .pornhub import (
|
||||
PornHubIE,
|
||||
@@ -1060,6 +1070,10 @@ from .rcs import (
|
||||
RCSEmbedsIE,
|
||||
RCSVariousIE,
|
||||
)
|
||||
from .rcti import (
|
||||
RCTIPlusIE,
|
||||
RCTIPlusSeriesIE,
|
||||
)
|
||||
from .rds import RDSIE
|
||||
from .redbulltv import (
|
||||
RedBullTVIE,
|
||||
|
||||
@@ -629,16 +629,11 @@ class FacebookIE(InfoExtractor):
|
||||
|
||||
process_formats(formats)
|
||||
|
||||
description = self._html_search_meta('description', webpage, default=None)
|
||||
video_title = self._html_search_regex(
|
||||
r'<h2\s+[^>]*class="uiHeaderTitle"[^>]*>([^<]*)</h2>', webpage,
|
||||
'title', default=None)
|
||||
if not video_title:
|
||||
video_title = self._html_search_regex(
|
||||
r'(?s)<span class="fbPhotosPhotoCaption".*?id="fbPhotoPageCaption"><span class="hasCaption">(.*?)</span>',
|
||||
webpage, 'alternative title', default=None)
|
||||
if not video_title:
|
||||
video_title = self._html_search_meta(
|
||||
'description', webpage, 'title', default=None)
|
||||
(r'<h2\s+[^>]*class="uiHeaderTitle"[^>]*>([^<]*)</h2>',
|
||||
r'(?s)<span class="fbPhotosPhotoCaption".*?id="fbPhotoPageCaption"><span class="hasCaption">(.*?)</span>'),
|
||||
webpage, 'title', default=None) or self._og_search_title(webpage, default=None) or description
|
||||
if video_title:
|
||||
video_title = limit_length(video_title, 80)
|
||||
else:
|
||||
@@ -662,6 +657,7 @@ class FacebookIE(InfoExtractor):
|
||||
'formats': formats,
|
||||
'uploader': uploader,
|
||||
'timestamp': timestamp,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'view_count': view_count,
|
||||
'subtitles': subtitles,
|
||||
|
||||
@@ -7,7 +7,8 @@ from ..compat import compat_str
|
||||
from ..utils import (
|
||||
parse_iso8601,
|
||||
ExtractorError,
|
||||
try_get
|
||||
try_get,
|
||||
mimetype2ext
|
||||
)
|
||||
|
||||
|
||||
@@ -38,16 +39,63 @@ class FancodeVodIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_ACCESS_TOKEN = None
|
||||
_NETRC_MACHINE = 'fancode'
|
||||
|
||||
_LOGIN_HINT = 'Use "--user refresh --password <refresh_token>" to login using a refresh token'
|
||||
|
||||
headers = {
|
||||
'content-type': 'application/json',
|
||||
'origin': 'https://fancode.com',
|
||||
'referer': 'https://fancode.com',
|
||||
}
|
||||
|
||||
def _login(self):
|
||||
# Access tokens are shortlived, so get them using the refresh token.
|
||||
username, password = self._get_login_info()
|
||||
if username == 'refresh' and password is not None:
|
||||
self.report_login()
|
||||
data = '''{
|
||||
"query":"mutation RefreshToken($refreshToken: String\\u0021) { refreshToken(refreshToken: $refreshToken) { accessToken }}",
|
||||
"variables":{
|
||||
"refreshToken":"%s"
|
||||
},
|
||||
"operationName":"RefreshToken"
|
||||
}''' % password
|
||||
|
||||
token_json = self.download_gql('refresh token', data, "Getting the Access token")
|
||||
self._ACCESS_TOKEN = try_get(token_json, lambda x: x['data']['refreshToken']['accessToken'])
|
||||
if self._ACCESS_TOKEN is None:
|
||||
self.report_warning('Failed to get Access token')
|
||||
else:
|
||||
self.headers.update({'Authorization': 'Bearer %s' % self._ACCESS_TOKEN})
|
||||
elif username is not None:
|
||||
self.report_warning(f'Login using username and password is not currently supported. {self._LOGIN_HINT}')
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
def _check_login_required(self, is_available, is_premium):
|
||||
msg = None
|
||||
if is_premium and self._ACCESS_TOKEN is None:
|
||||
msg = f'This video is only available for registered users. {self._LOGIN_HINT}'
|
||||
elif not is_available and self._ACCESS_TOKEN is not None:
|
||||
msg = 'This video isn\'t available to the current logged in account'
|
||||
if msg:
|
||||
self.raise_login_required(msg, metadata_available=True, method=None)
|
||||
|
||||
def download_gql(self, variable, data, note, fatal=False, headers=headers):
|
||||
return self._download_json(
|
||||
'https://www.fancode.com/graphql', variable,
|
||||
data=data.encode(), note=note,
|
||||
headers=headers, fatal=fatal)
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'https://players.brightcove.net/%s/default_default/index.html?videoId=%s'
|
||||
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
brightcove_user_id = self._html_search_regex(
|
||||
r'(?:https?://)?players\.brightcove\.net/(\d+)/default_default/index(?:\.min)?\.js',
|
||||
webpage, 'user id')
|
||||
|
||||
brightcove_user_id = '6008340455001'
|
||||
data = '''{
|
||||
"query":"query Video($id: Int\\u0021, $filter: SegmentFilter) { media(id: $id, filter: $filter) { id contentId title contentId publishedTime totalViews totalUpvotes provider thumbnail { src } mediaSource {brightcove } duration isPremium isUserEntitled tags duration }}",
|
||||
"variables":{
|
||||
@@ -57,15 +105,9 @@ class FancodeVodIE(InfoExtractor):
|
||||
}
|
||||
},
|
||||
"operationName":"Video"
|
||||
}''' % video_id
|
||||
}''' % video_id
|
||||
|
||||
metadata_json = self._download_json(
|
||||
'https://www.fancode.com/graphql', video_id, data=data.encode(), note='Downloading metadata',
|
||||
headers={
|
||||
'content-type': 'application/json',
|
||||
'origin': 'https://fancode.com',
|
||||
'referer': url,
|
||||
})
|
||||
metadata_json = self.download_gql(video_id, data, note='Downloading metadata')
|
||||
|
||||
media = try_get(metadata_json, lambda x: x['data']['media'], dict) or {}
|
||||
brightcove_video_id = try_get(media, lambda x: x['mediaSource']['brightcove'], compat_str)
|
||||
@@ -74,8 +116,8 @@ class FancodeVodIE(InfoExtractor):
|
||||
raise ExtractorError('Unable to extract brightcove Video ID')
|
||||
|
||||
is_premium = media.get('isPremium')
|
||||
if is_premium:
|
||||
self.report_warning('this video requires a premium account', video_id)
|
||||
|
||||
self._check_login_required(media.get('isUserEntitled'), is_premium)
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
@@ -89,3 +131,57 @@ class FancodeVodIE(InfoExtractor):
|
||||
'release_timestamp': parse_iso8601(media.get('publishedTime')),
|
||||
'availability': self._availability(needs_premium=is_premium),
|
||||
}
|
||||
|
||||
|
||||
class FancodeLiveIE(FancodeVodIE):
|
||||
IE_NAME = 'fancode:live'
|
||||
|
||||
_VALID_URL = r'https?://(www\.)?fancode\.com/match/(?P<id>[0-9]+).+'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://fancode.com/match/35328/cricket-fancode-ecs-hungary-2021-bub-vs-blb?slug=commentary',
|
||||
'info_dict': {
|
||||
'id': '35328',
|
||||
'ext': 'mp4',
|
||||
'title': 'BUB vs BLB',
|
||||
"timestamp": 1624863600,
|
||||
'is_live': True,
|
||||
'upload_date': '20210628',
|
||||
},
|
||||
'skip': 'Ended'
|
||||
}, {
|
||||
'url': 'https://fancode.com/match/35328/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://fancode.com/match/35567?slug=scorecard',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
id = self._match_id(url)
|
||||
data = '''{
|
||||
"query":"query MatchResponse($id: Int\\u0021, $isLoggedIn: Boolean\\u0021) { match: matchWithScores(id: $id) { id matchDesc mediaId videoStreamId videoStreamUrl { ...VideoSource } liveStreams { videoStreamId videoStreamUrl { ...VideoSource } contentId } name startTime streamingStatus isPremium isUserEntitled @include(if: $isLoggedIn) status metaTags bgImage { src } sport { name slug } tour { id name } squads { name shortName } liveStreams { contentId } mediaId }}fragment VideoSource on VideoSource { title description posterUrl url deliveryType playerType}",
|
||||
"variables":{
|
||||
"id":%s,
|
||||
"isLoggedIn":true
|
||||
},
|
||||
"operationName":"MatchResponse"
|
||||
}''' % id
|
||||
|
||||
info_json = self.download_gql(id, data, "Info json")
|
||||
|
||||
match_info = try_get(info_json, lambda x: x['data']['match'])
|
||||
|
||||
if match_info.get('status') != "LIVE":
|
||||
raise ExtractorError('The stream can\'t be accessed', expected=True)
|
||||
self._check_login_required(match_info.get('isUserEntitled'), True) # all live streams are premium only
|
||||
|
||||
return {
|
||||
'id': id,
|
||||
'title': match_info.get('name'),
|
||||
'formats': self._extract_akamai_formats(try_get(match_info, lambda x: x['videoStreamUrl']['url']), id),
|
||||
'ext': mimetype2ext(try_get(match_info, lambda x: x['videoStreamUrl']['deliveryType'])),
|
||||
'is_live': True,
|
||||
'release_timestamp': parse_iso8601(match_info.get('startTime'))
|
||||
}
|
||||
|
||||
@@ -2,60 +2,124 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import random
|
||||
import re
|
||||
import string
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_HTTPError
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
dict_get,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
str_or_none,
|
||||
try_get,
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class FunimationIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?funimation(?:\.com|now\.uk)/(?:[^/]+/)?shows/[^/]+/(?P<id>[^/?#&]+)'
|
||||
|
||||
_NETRC_MACHINE = 'funimation'
|
||||
_TOKEN = None
|
||||
class FunimationPageIE(InfoExtractor):
|
||||
IE_NAME = 'funimation:page'
|
||||
_VALID_URL = r'(?P<origin>https?://(?:www\.)?funimation(?:\.com|now\.uk))/(?P<lang>[^/]+/)?(?P<path>shows/(?P<id>[^/]+/[^/?#&]+).*$)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.funimation.com/shows/hacksign/role-play/',
|
||||
'info_dict': {
|
||||
'id': '91144',
|
||||
'display_id': 'role-play',
|
||||
'ext': 'mp4',
|
||||
'title': '.hack//SIGN - Role Play',
|
||||
'description': 'md5:b602bdc15eef4c9bbb201bb6e6a4a2dd',
|
||||
'thumbnail': r're:https?://.*\.jpg',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.funimation.com/shows/attack-on-titan-junior-high/broadcast-dub-preview/',
|
||||
'info_dict': {
|
||||
'id': '210051',
|
||||
'display_id': 'broadcast-dub-preview',
|
||||
'id': '210050',
|
||||
'ext': 'mp4',
|
||||
'title': 'Attack on Titan: Junior High - Broadcast Dub Preview',
|
||||
'thumbnail': r're:https?://.*\.(?:jpg|png)',
|
||||
'title': 'Broadcast Dub Preview',
|
||||
# Other metadata is tested in FunimationIE
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
'add_ie': ['Funimation'],
|
||||
}, {
|
||||
'url': 'https://www.funimationnow.uk/shows/puzzle-dragons-x/drop-impact/simulcast/',
|
||||
# Not available in US
|
||||
'url': 'https://www.funimation.com/shows/hacksign/role-play/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# with lang code
|
||||
'url': 'https://www.funimation.com/en/shows/hacksign/role-play/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.funimationnow.uk/shows/puzzle-dragons-x/drop-impact/simulcast/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
display_id = mobj.group('id').replace('/', '_')
|
||||
if not mobj.group('lang'):
|
||||
url = '%s/en/%s' % (mobj.group('origin'), mobj.group('path'))
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
title_data = self._parse_json(self._search_regex(
|
||||
r'TITLE_DATA\s*=\s*({[^}]+})',
|
||||
webpage, 'title data', default=''),
|
||||
display_id, js_to_json, fatal=False) or {}
|
||||
|
||||
video_id = (
|
||||
title_data.get('id')
|
||||
or self._search_regex(
|
||||
(r"KANE_customdimensions.videoID\s*=\s*'(\d+)';", r'<iframe[^>]+src="/player/(\d+)'),
|
||||
webpage, 'video_id', default=None)
|
||||
or self._search_regex(
|
||||
r'/player/(\d+)',
|
||||
self._html_search_meta(['al:web:url', 'og:video:url', 'og:video:secure_url'], webpage, fatal=True),
|
||||
'video id'))
|
||||
return self.url_result(f'https://www.funimation.com/player/{video_id}', FunimationIE.ie_key(), video_id)
|
||||
|
||||
|
||||
class FunimationIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?funimation\.com/player/(?P<id>\d+)'
|
||||
|
||||
_NETRC_MACHINE = 'funimation'
|
||||
_TOKEN = None
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.funimation.com/player/210051',
|
||||
'info_dict': {
|
||||
'id': '210050',
|
||||
'display_id': 'broadcast-dub-preview',
|
||||
'ext': 'mp4',
|
||||
'title': 'Broadcast Dub Preview',
|
||||
'thumbnail': r're:https?://.*\.(?:jpg|png)',
|
||||
'episode': 'Broadcast Dub Preview',
|
||||
'episode_id': '210050',
|
||||
'season': 'Extras',
|
||||
'season_id': '166038',
|
||||
'season_number': 99,
|
||||
'series': 'Attack on Titan: Junior High',
|
||||
'description': '',
|
||||
'duration': 154,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
}, {
|
||||
'note': 'player_id should be extracted with the relevent compat-opt',
|
||||
'url': 'https://www.funimation.com/player/210051',
|
||||
'info_dict': {
|
||||
'id': '210051',
|
||||
'display_id': 'broadcast-dub-preview',
|
||||
'ext': 'mp4',
|
||||
'title': 'Broadcast Dub Preview',
|
||||
'thumbnail': r're:https?://.*\.(?:jpg|png)',
|
||||
'episode': 'Broadcast Dub Preview',
|
||||
'episode_id': '210050',
|
||||
'season': 'Extras',
|
||||
'season_id': '166038',
|
||||
'season_number': 99,
|
||||
'series': 'Attack on Titan: Junior High',
|
||||
'description': '',
|
||||
'duration': 154,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
'compat_opts': ['seperate-video-versions'],
|
||||
},
|
||||
}]
|
||||
|
||||
def _login(self):
|
||||
@@ -79,100 +143,184 @@ class FunimationIE(InfoExtractor):
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
@staticmethod
|
||||
def _get_experiences(episode):
|
||||
for lang, lang_data in episode.get('languages', {}).items():
|
||||
for video_data in lang_data.values():
|
||||
for version, f in video_data.items():
|
||||
yield lang, version.title(), f
|
||||
|
||||
def _get_episode(self, webpage, experience_id=None, episode_id=None, fatal=True):
|
||||
''' Extract the episode, season and show objects given either episode/experience id '''
|
||||
show = self._parse_json(
|
||||
self._search_regex(
|
||||
r'show\s*=\s*({.+?})\s*;', webpage, 'show data', fatal=fatal),
|
||||
experience_id, transform_source=js_to_json, fatal=fatal) or []
|
||||
for season in show.get('seasons', []):
|
||||
for episode in season.get('episodes', []):
|
||||
if episode_id is not None:
|
||||
if str(episode.get('episodePk')) == episode_id:
|
||||
return episode, season, show
|
||||
continue
|
||||
for _, _, f in self._get_experiences(episode):
|
||||
if f.get('experienceId') == experience_id:
|
||||
return episode, season, show
|
||||
if fatal:
|
||||
raise ExtractorError('Unable to find episode information')
|
||||
else:
|
||||
self.report_warning('Unable to find episode information')
|
||||
return {}, {}, {}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
initial_experience_id = self._match_id(url)
|
||||
webpage = self._download_webpage(
|
||||
url, initial_experience_id, note=f'Downloading player webpage for {initial_experience_id}')
|
||||
episode, season, show = self._get_episode(webpage, experience_id=int(initial_experience_id))
|
||||
episode_id = str(episode['episodePk'])
|
||||
display_id = episode.get('slug') or episode_id
|
||||
|
||||
def _search_kane(name):
|
||||
return self._search_regex(
|
||||
r"KANE_customdimensions\.%s\s*=\s*'([^']+)';" % name,
|
||||
webpage, name, default=None)
|
||||
formats, subtitles, thumbnails, duration = [], {}, [], 0
|
||||
requested_languages, requested_versions = self._configuration_arg('language'), self._configuration_arg('version')
|
||||
only_initial_experience = 'seperate-video-versions' in self.get_param('compat_opts', [])
|
||||
|
||||
title_data = self._parse_json(self._search_regex(
|
||||
r'TITLE_DATA\s*=\s*({[^}]+})',
|
||||
webpage, 'title data', default=''),
|
||||
display_id, js_to_json, fatal=False) or {}
|
||||
for lang, version, fmt in self._get_experiences(episode):
|
||||
experience_id = str(fmt['experienceId'])
|
||||
if (only_initial_experience and experience_id != initial_experience_id
|
||||
or requested_languages and lang.lower() not in requested_languages
|
||||
or requested_versions and version.lower() not in requested_versions):
|
||||
continue
|
||||
thumbnails.append({'url': fmt.get('poster')})
|
||||
duration = max(duration, fmt.get('duration', 0))
|
||||
format_name = '%s %s (%s)' % (version, lang, experience_id)
|
||||
self.extract_subtitles(
|
||||
subtitles, experience_id, display_id=display_id, format_name=format_name,
|
||||
episode=episode if experience_id == initial_experience_id else episode_id)
|
||||
|
||||
video_id = title_data.get('id') or self._search_regex([
|
||||
r"KANE_customdimensions.videoID\s*=\s*'(\d+)';",
|
||||
r'<iframe[^>]+src="/player/(\d+)',
|
||||
], webpage, 'video_id', default=None)
|
||||
if not video_id:
|
||||
player_url = self._html_search_meta([
|
||||
'al:web:url',
|
||||
'og:video:url',
|
||||
'og:video:secure_url',
|
||||
], webpage, fatal=True)
|
||||
video_id = self._search_regex(r'/player/(\d+)', player_url, 'video id')
|
||||
|
||||
title = episode = title_data.get('title') or _search_kane('videoTitle') or self._og_search_title(webpage)
|
||||
series = _search_kane('showName')
|
||||
if series:
|
||||
title = '%s - %s' % (series, title)
|
||||
description = self._html_search_meta(['description', 'og:description'], webpage, fatal=True)
|
||||
subtitles = self.extract_subtitles(url, video_id, display_id)
|
||||
|
||||
try:
|
||||
headers = {}
|
||||
if self._TOKEN:
|
||||
headers['Authorization'] = 'Token %s' % self._TOKEN
|
||||
sources = self._download_json(
|
||||
'https://www.funimation.com/api/showexperience/%s/' % video_id,
|
||||
video_id, headers=headers, query={
|
||||
page = self._download_json(
|
||||
'https://www.funimation.com/api/showexperience/%s/' % experience_id,
|
||||
display_id, headers=headers, expected_status=403, query={
|
||||
'pinst_id': ''.join([random.choice(string.digits + string.ascii_letters) for _ in range(8)]),
|
||||
})['items']
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||
error = self._parse_json(e.cause.read(), video_id)['errors'][0]
|
||||
raise ExtractorError('%s said: %s' % (
|
||||
self.IE_NAME, error.get('detail') or error.get('title')), expected=True)
|
||||
raise
|
||||
}, note=f'Downloading {format_name} JSON')
|
||||
sources = page.get('items') or []
|
||||
if not sources:
|
||||
error = try_get(page, lambda x: x['errors'][0], dict)
|
||||
if error:
|
||||
self.report_warning('%s said: Error %s - %s' % (
|
||||
self.IE_NAME, error.get('code'), error.get('detail') or error.get('title')))
|
||||
else:
|
||||
self.report_warning('No sources found for format')
|
||||
|
||||
formats = []
|
||||
for source in sources:
|
||||
source_url = source.get('src')
|
||||
if not source_url:
|
||||
continue
|
||||
source_type = source.get('videoType') or determine_ext(source_url)
|
||||
if source_type == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
source_url, video_id, 'mp4',
|
||||
m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'format_id': source_type,
|
||||
'url': source_url,
|
||||
})
|
||||
current_formats = []
|
||||
for source in sources:
|
||||
source_url = source.get('src')
|
||||
source_type = source.get('videoType') or determine_ext(source_url)
|
||||
if source_type == 'm3u8':
|
||||
current_formats.extend(self._extract_m3u8_formats(
|
||||
source_url, display_id, 'mp4', m3u8_id='%s-%s' % (experience_id, 'hls'), fatal=False,
|
||||
note=f'Downloading {format_name} m3u8 information'))
|
||||
else:
|
||||
current_formats.append({
|
||||
'format_id': '%s-%s' % (experience_id, source_type),
|
||||
'url': source_url,
|
||||
})
|
||||
for f in current_formats:
|
||||
# TODO: Convert language to code
|
||||
f.update({'language': lang, 'format_note': version})
|
||||
formats.extend(current_formats)
|
||||
self._remove_duplicate_formats(formats)
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'id': initial_experience_id if only_initial_experience else episode_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'series': series,
|
||||
'season_number': int_or_none(title_data.get('seasonNum') or _search_kane('season')),
|
||||
'episode_number': int_or_none(title_data.get('episodeNum')),
|
||||
'episode': episode,
|
||||
'subtitles': subtitles,
|
||||
'season_id': title_data.get('seriesId'),
|
||||
'duration': duration,
|
||||
'title': episode['episodeTitle'],
|
||||
'description': episode.get('episodeSummary'),
|
||||
'episode': episode.get('episodeTitle'),
|
||||
'episode_number': int_or_none(episode.get('episodeId')),
|
||||
'episode_id': episode_id,
|
||||
'season': season.get('seasonTitle'),
|
||||
'season_number': int_or_none(season.get('seasonId')),
|
||||
'season_id': str_or_none(season.get('seasonPk')),
|
||||
'series': show.get('showTitle'),
|
||||
'formats': formats,
|
||||
'thumbnails': thumbnails,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
def _get_subtitles(self, url, video_id, display_id):
|
||||
player_url = urljoin(url, '/player/' + video_id)
|
||||
player_page = self._download_webpage(player_url, display_id)
|
||||
text_tracks_json_string = self._search_regex(
|
||||
r'"textTracks": (\[{.+?}\])',
|
||||
player_page, 'subtitles data', default='')
|
||||
text_tracks = self._parse_json(
|
||||
text_tracks_json_string, display_id, js_to_json, fatal=False) or []
|
||||
subtitles = {}
|
||||
for text_track in text_tracks:
|
||||
url_element = {'url': text_track.get('src')}
|
||||
language = text_track.get('language')
|
||||
if text_track.get('type') == 'CC':
|
||||
language += '_CC'
|
||||
subtitles.setdefault(language, []).append(url_element)
|
||||
def _get_subtitles(self, subtitles, experience_id, episode, display_id, format_name):
|
||||
if isinstance(episode, str):
|
||||
webpage = self._download_webpage(
|
||||
f'https://www.funimation.com/player/{experience_id}', display_id,
|
||||
fatal=False, note=f'Downloading player webpage for {format_name}')
|
||||
episode, _, _ = self._get_episode(webpage, episode_id=episode, fatal=False)
|
||||
|
||||
for _, version, f in self._get_experiences(episode):
|
||||
for source in f.get('sources'):
|
||||
for text_track in source.get('textTracks'):
|
||||
if not text_track.get('src'):
|
||||
continue
|
||||
sub_type = text_track.get('type').upper()
|
||||
sub_type = sub_type if sub_type != 'FULL' else None
|
||||
current_sub = {
|
||||
'url': text_track['src'],
|
||||
'name': ' '.join(filter(None, (version, text_track.get('label'), sub_type)))
|
||||
}
|
||||
lang = '_'.join(filter(None, (
|
||||
text_track.get('language', 'und'), version if version != 'Simulcast' else None, sub_type)))
|
||||
if current_sub not in subtitles.get(lang, []):
|
||||
subtitles.setdefault(lang, []).append(current_sub)
|
||||
return subtitles
|
||||
|
||||
|
||||
class FunimationShowIE(FunimationIE):
|
||||
IE_NAME = 'funimation:show'
|
||||
_VALID_URL = r'(?P<url>https?://(?:www\.)?funimation(?:\.com|now\.uk)/(?P<locale>[^/]+)?/?shows/(?P<id>[^/?#&]+))/?(?:[?#]|$)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.funimation.com/en/shows/sk8-the-infinity',
|
||||
'info_dict': {
|
||||
'id': 1315000,
|
||||
'title': 'SK8 the Infinity'
|
||||
},
|
||||
'playlist_count': 13,
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# without lang code
|
||||
'url': 'https://www.funimation.com/shows/ouran-high-school-host-club/',
|
||||
'info_dict': {
|
||||
'id': 39643,
|
||||
'title': 'Ouran High School Host Club'
|
||||
},
|
||||
'playlist_count': 26,
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
base_url, locale, display_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
show_info = self._download_json(
|
||||
'https://title-api.prd.funimationsvc.com/v2/shows/%s?region=US&deviceType=web&locale=%s'
|
||||
% (display_id, locale or 'en'), display_id)
|
||||
items = self._download_json(
|
||||
'https://prod-api-funimationnow.dadcdigital.com/api/funimation/episodes/?limit=99999&title_id=%s'
|
||||
% show_info.get('id'), display_id).get('items')
|
||||
vod_items = map(lambda k: dict_get(k, ('mostRecentSvod', 'mostRecentAvod')).get('item'), items)
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': show_info['id'],
|
||||
'title': show_info['name'],
|
||||
'entries': [
|
||||
self.url_result(
|
||||
'%s/%s' % (base_url, vod_item.get('episodeSlug')), FunimationPageIE.ie_key(),
|
||||
vod_item.get('episodeId'), vod_item.get('episodeName'))
|
||||
for vod_item in sorted(vod_items, key=lambda x: x.get('episodeOrder'))],
|
||||
}
|
||||
|
||||
@@ -2462,7 +2462,7 @@ class GenericIE(InfoExtractor):
|
||||
|
||||
# Is it an M3U playlist?
|
||||
if first_bytes.startswith(b'#EXTM3U'):
|
||||
info_dict['formats'] = self._extract_m3u8_formats(url, video_id, 'mp4')
|
||||
info_dict['formats'], info_dict['subtitles'] = self._extract_m3u8_formats_and_subtitles(url, video_id, 'mp4')
|
||||
self._sort_formats(info_dict['formats'])
|
||||
return info_dict
|
||||
|
||||
@@ -3410,6 +3410,7 @@ class GenericIE(InfoExtractor):
|
||||
if not isinstance(sources, list):
|
||||
sources = [sources]
|
||||
formats = []
|
||||
subtitles = {}
|
||||
for source in sources:
|
||||
src = source.get('src')
|
||||
if not src or not isinstance(src, compat_str):
|
||||
@@ -3422,12 +3423,16 @@ class GenericIE(InfoExtractor):
|
||||
if src_type == 'video/youtube':
|
||||
return self.url_result(src, YoutubeIE.ie_key())
|
||||
if src_type == 'application/dash+xml' or ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
src, video_id, mpd_id='dash', fatal=False))
|
||||
fmts, subs = self._extract_mpd_formats_and_subtitles(
|
||||
src, video_id, mpd_id='dash', fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
elif src_type == 'application/x-mpegurl' or ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
src, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
m3u8_id='hls', fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
else:
|
||||
formats.append({
|
||||
'url': src,
|
||||
@@ -3437,9 +3442,10 @@ class GenericIE(InfoExtractor):
|
||||
'Referer': full_response.geturl(),
|
||||
},
|
||||
})
|
||||
if formats:
|
||||
if formats or subtitles:
|
||||
self._sort_formats(formats)
|
||||
info_dict['formats'] = formats
|
||||
info_dict['subtitles'] = subtitles
|
||||
return info_dict
|
||||
|
||||
# Looking for http://schema.org/VideoObject
|
||||
@@ -3574,13 +3580,13 @@ class GenericIE(InfoExtractor):
|
||||
|
||||
ext = determine_ext(video_url)
|
||||
if ext == 'smil':
|
||||
entry_info_dict['formats'] = self._extract_smil_formats(video_url, video_id)
|
||||
entry_info_dict = {**self._extract_smil_info(video_url, video_id), **entry_info_dict}
|
||||
elif ext == 'xspf':
|
||||
return self.playlist_result(self._extract_xspf_playlist(video_url, video_id), video_id)
|
||||
elif ext == 'm3u8':
|
||||
entry_info_dict['formats'] = self._extract_m3u8_formats(video_url, video_id, ext='mp4')
|
||||
entry_info_dict['formats'], entry_info_dict['subtitles'] = self._extract_m3u8_formats_and_subtitles(video_url, video_id, ext='mp4')
|
||||
elif ext == 'mpd':
|
||||
entry_info_dict['formats'] = self._extract_mpd_formats(video_url, video_id)
|
||||
entry_info_dict['formats'], entry_info_dict['subtitles'] = self._extract_mpd_formats_and_subtitles(video_url, video_id)
|
||||
elif ext == 'f4m':
|
||||
entry_info_dict['formats'] = self._extract_f4m_formats(video_url, video_id)
|
||||
elif re.search(r'(?i)\.(?:ism|smil)/manifest', video_url) and video_url != url:
|
||||
|
||||
@@ -19,6 +19,7 @@ from ..utils import (
|
||||
std_headers,
|
||||
try_get,
|
||||
url_or_none,
|
||||
variadic,
|
||||
)
|
||||
|
||||
|
||||
@@ -188,9 +189,7 @@ class InstagramIE(InfoExtractor):
|
||||
uploader_id = media.get('owner', {}).get('username')
|
||||
|
||||
def get_count(keys, kind):
|
||||
if not isinstance(keys, (list, tuple)):
|
||||
keys = [keys]
|
||||
for key in keys:
|
||||
for key in variadic(keys):
|
||||
count = int_or_none(try_get(
|
||||
media, (lambda x: x['edge_media_%s' % key]['count'],
|
||||
lambda x: x['%ss' % kind]['count'])))
|
||||
|
||||
@@ -249,6 +249,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
||||
if info:
|
||||
entries.append(info)
|
||||
|
||||
# TODO: should be multi-video
|
||||
return self.playlist_result(
|
||||
entries, playlist_title=title, playlist_description=description)
|
||||
|
||||
|
||||
@@ -2,9 +2,11 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import time
|
||||
|
||||
from urllib.error import HTTPError
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..compat import compat_str, compat_urllib_parse_unquote, compat_urllib_parse_quote
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
parse_iso8601,
|
||||
@@ -78,7 +80,9 @@ class NebulaIE(InfoExtractor):
|
||||
]
|
||||
_NETRC_MACHINE = 'watchnebula'
|
||||
|
||||
def _retrieve_nebula_auth(self, video_id):
|
||||
_nebula_token = None
|
||||
|
||||
def _retrieve_nebula_auth(self):
|
||||
"""
|
||||
Log in to Nebula, and returns a Nebula API token
|
||||
"""
|
||||
@@ -91,7 +95,7 @@ class NebulaIE(InfoExtractor):
|
||||
data = json.dumps({'email': username, 'password': password}).encode('utf8')
|
||||
response = self._download_json(
|
||||
'https://api.watchnebula.com/api/v1/auth/login/',
|
||||
data=data, fatal=False, video_id=video_id,
|
||||
data=data, fatal=False, video_id=None,
|
||||
headers={
|
||||
'content-type': 'application/json',
|
||||
# Submitting the 'sessionid' cookie always causes a 403 on auth endpoint
|
||||
@@ -101,6 +105,19 @@ class NebulaIE(InfoExtractor):
|
||||
errnote='Authentication failed or rejected')
|
||||
if not response or not response.get('key'):
|
||||
self.raise_login_required()
|
||||
|
||||
# save nebula token as cookie
|
||||
self._set_cookie(
|
||||
'nebula.app', 'nebula-auth',
|
||||
compat_urllib_parse_quote(
|
||||
json.dumps({
|
||||
"apiToken": response["key"],
|
||||
"isLoggingIn": False,
|
||||
"isLoggingOut": False,
|
||||
}, separators=(",", ":"))),
|
||||
expire_time=int(time.time()) + 86400 * 365,
|
||||
)
|
||||
|
||||
return response['key']
|
||||
|
||||
def _retrieve_zype_api_key(self, page_url, display_id):
|
||||
@@ -139,8 +156,17 @@ class NebulaIE(InfoExtractor):
|
||||
'Authorization': 'Token {access_token}'.format(access_token=access_token)
|
||||
}, note=note)
|
||||
|
||||
def _fetch_zype_access_token(self, video_id, nebula_token):
|
||||
user_object = self._call_nebula_api('/auth/user/', video_id, nebula_token, note='Retrieving Zype access token')
|
||||
def _fetch_zype_access_token(self, video_id):
|
||||
try:
|
||||
user_object = self._call_nebula_api('/auth/user/', video_id, self._nebula_token, note='Retrieving Zype access token')
|
||||
except ExtractorError as exc:
|
||||
# if 401, attempt credential auth and retry
|
||||
if exc.cause and isinstance(exc.cause, HTTPError) and exc.cause.code == 401:
|
||||
self._nebula_token = self._retrieve_nebula_auth()
|
||||
user_object = self._call_nebula_api('/auth/user/', video_id, self._nebula_token, note='Retrieving Zype access token')
|
||||
else:
|
||||
raise
|
||||
|
||||
access_token = try_get(user_object, lambda x: x['zype_auth_info']['access_token'], compat_str)
|
||||
if not access_token:
|
||||
if try_get(user_object, lambda x: x['is_subscribed'], bool):
|
||||
@@ -162,9 +188,21 @@ class NebulaIE(InfoExtractor):
|
||||
if category.get('value'):
|
||||
return category['value'][0]
|
||||
|
||||
def _real_initialize(self):
|
||||
# check cookie jar for valid token
|
||||
nebula_cookies = self._get_cookies('https://nebula.app')
|
||||
nebula_cookie = nebula_cookies.get('nebula-auth')
|
||||
if nebula_cookie:
|
||||
self.to_screen('Authenticating to Nebula with token from cookie jar')
|
||||
nebula_cookie_value = compat_urllib_parse_unquote(nebula_cookie.value)
|
||||
self._nebula_token = self._parse_json(nebula_cookie_value, None).get('apiToken')
|
||||
|
||||
# try to authenticate using credentials if no valid token has been found
|
||||
if not self._nebula_token:
|
||||
self._nebula_token = self._retrieve_nebula_auth()
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
nebula_token = self._retrieve_nebula_auth(display_id)
|
||||
api_key = self._retrieve_zype_api_key(url, display_id)
|
||||
|
||||
response = self._call_zype_api('/videos', {'friendly_title': display_id},
|
||||
@@ -174,7 +212,7 @@ class NebulaIE(InfoExtractor):
|
||||
video_meta = response['response'][0]
|
||||
|
||||
video_id = video_meta['_id']
|
||||
zype_access_token = self._fetch_zype_access_token(display_id, nebula_token=nebula_token)
|
||||
zype_access_token = self._fetch_zype_access_token(display_id)
|
||||
|
||||
channel_title = self._extract_channel_title(video_meta)
|
||||
|
||||
@@ -187,13 +225,12 @@ class NebulaIE(InfoExtractor):
|
||||
'title': video_meta.get('title'),
|
||||
'description': video_meta.get('description'),
|
||||
'timestamp': parse_iso8601(video_meta.get('published_at')),
|
||||
'thumbnails': [
|
||||
{
|
||||
'id': tn.get('name'), # this appears to be null
|
||||
'url': tn['url'],
|
||||
'width': tn.get('width'),
|
||||
'height': tn.get('height'),
|
||||
} for tn in video_meta.get('thumbnails', [])],
|
||||
'thumbnails': [{
|
||||
'id': tn.get('name'), # this appears to be null
|
||||
'url': tn['url'],
|
||||
'width': tn.get('width'),
|
||||
'height': tn.get('height'),
|
||||
} for tn in video_meta.get('thumbnails', [])],
|
||||
'duration': video_meta.get('duration'),
|
||||
'channel': channel_title,
|
||||
'uploader': channel_title, # we chose uploader = channel name
|
||||
|
||||
@@ -569,15 +569,15 @@ class PeerTubeIE(InfoExtractor):
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
full_description = self._call_api(
|
||||
host, video_id, 'description', note='Downloading description JSON',
|
||||
fatal=False)
|
||||
description = video.get('description')
|
||||
if len(description) >= 250:
|
||||
# description is shortened
|
||||
full_description = self._call_api(
|
||||
host, video_id, 'description', note='Downloading description JSON',
|
||||
fatal=False)
|
||||
|
||||
description = None
|
||||
if isinstance(full_description, dict):
|
||||
description = str_or_none(full_description.get('description'))
|
||||
if not description:
|
||||
description = video.get('description')
|
||||
if isinstance(full_description, dict):
|
||||
description = str_or_none(full_description.get('description')) or description
|
||||
|
||||
subtitles = self.extract_subtitles(host, video_id)
|
||||
|
||||
|
||||
@@ -12,6 +12,10 @@ from ..utils import (
|
||||
|
||||
|
||||
class PeriscopeBaseIE(InfoExtractor):
|
||||
_M3U8_HEADERS = {
|
||||
'Referer': 'https://www.periscope.tv/'
|
||||
}
|
||||
|
||||
def _call_api(self, method, query, item_id):
|
||||
return self._download_json(
|
||||
'https://api.periscope.tv/api/v2/%s' % method,
|
||||
@@ -54,9 +58,11 @@ class PeriscopeBaseIE(InfoExtractor):
|
||||
m3u8_url, video_id, 'mp4',
|
||||
entry_protocol='m3u8_native'
|
||||
if state in ('ended', 'timed_out') else 'm3u8',
|
||||
m3u8_id=format_id, fatal=fatal)
|
||||
m3u8_id=format_id, fatal=fatal, headers=self._M3U8_HEADERS)
|
||||
if len(m3u8_formats) == 1:
|
||||
self._add_width_and_height(m3u8_formats[0], width, height)
|
||||
for f in m3u8_formats:
|
||||
f.setdefault('http_headers', {}).update(self._M3U8_HEADERS)
|
||||
return m3u8_formats
|
||||
|
||||
|
||||
|
||||
@@ -19,7 +19,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class PlutoTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?pluto\.tv/on-demand/(?P<video_type>movies|series)/(?P<slug>.*)/?$'
|
||||
_VALID_URL = r'https?://(?:www\.)?pluto\.tv(?:/en)?/on-demand/(?P<video_type>movies|series)/(?P<slug>.*)/?$'
|
||||
_INFO_URL = 'https://service-vod.clusters.pluto.tv/v3/vod/slugs/'
|
||||
_INFO_QUERY_PARAMS = {
|
||||
'appName': 'web',
|
||||
@@ -48,24 +48,21 @@ class PlutoTVIE(InfoExtractor):
|
||||
'episode_number': 3,
|
||||
'duration': 3600,
|
||||
}
|
||||
},
|
||||
{
|
||||
}, {
|
||||
'url': 'https://pluto.tv/on-demand/series/i-love-money/season/1/',
|
||||
'playlist_count': 11,
|
||||
'info_dict': {
|
||||
'id': '5de6c582e9379ae4912dedbd',
|
||||
'title': 'I Love Money - Season 1',
|
||||
}
|
||||
},
|
||||
{
|
||||
}, {
|
||||
'url': 'https://pluto.tv/on-demand/series/i-love-money/',
|
||||
'playlist_count': 26,
|
||||
'info_dict': {
|
||||
'id': '5de6c582e9379ae4912dedbd',
|
||||
'title': 'I Love Money',
|
||||
}
|
||||
},
|
||||
{
|
||||
}, {
|
||||
'url': 'https://pluto.tv/on-demand/movies/arrival-2015-1-1',
|
||||
'md5': '3cead001d317a018bf856a896dee1762',
|
||||
'info_dict': {
|
||||
@@ -75,7 +72,10 @@ class PlutoTVIE(InfoExtractor):
|
||||
'description': 'When mysterious spacecraft touch down across the globe, an elite team - led by expert translator Louise Banks (Academy Award® nominee Amy Adams) – races against time to decipher their intent.',
|
||||
'duration': 9000,
|
||||
}
|
||||
},
|
||||
}, {
|
||||
'url': 'https://pluto.tv/en/on-demand/series/manhunters-fugitive-task-force/seasons/1/episode/third-times-the-charm-1-1',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
|
||||
def _to_ad_free_formats(self, video_id, formats, subtitles):
|
||||
|
||||
82
yt_dlp/extractor/pornflip.py
Normal file
82
yt_dlp/extractor/pornflip.py
Normal file
@@ -0,0 +1,82 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
parse_iso8601
|
||||
)
|
||||
|
||||
|
||||
class PornFlipIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?pornflip\.com/(?:(embed|sv|v)/)?(?P<id>[^/]+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://www.pornflip.com/dzv9Mtw1qj2/sv/brazzers-double-dare-two-couples-fucked-jenna-reid-maya-bijou',
|
||||
'info_dict': {
|
||||
'id': 'dzv9Mtw1qj2',
|
||||
'ext': 'mp4',
|
||||
'title': 'Brazzers - Double Dare Two couples fucked Jenna Reid Maya Bijou',
|
||||
'description': 'md5:d2b69e6cc743c5fd158e162aa7f05821',
|
||||
'duration': 476,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'view_count': int,
|
||||
'timestamp': 1617846819,
|
||||
'upload_date': '20210408',
|
||||
'uploader': 'Brazzers',
|
||||
'age_limit': 18,
|
||||
},
|
||||
'params': {
|
||||
'format': 'bestvideo',
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'https://www.pornflip.com/v/IrJEC40i21L',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'https://www.pornflip.com/Z3jzbChC5-P/sexintaxi-e-sereyna-gomez-czech-naked-couple',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'https://www.pornflip.com/embed/bLcDFxnrZnU',
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
_HOST = 'www.pornflip.com'
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(
|
||||
'https://{}/sv/{}'.format(self._HOST, video_id), video_id, headers={'host': self._HOST})
|
||||
description = self._html_search_regex(r'&p\[summary\]=(.*?)\s*&p', webpage, 'description', fatal=False)
|
||||
duration = self._search_regex(r'"duration":\s+"([^"]+)",', webpage, 'duration', fatal=False)
|
||||
view_count = self._search_regex(r'"interactionCount":\s+"([^"]+)"', webpage, 'view_count', fatal=False)
|
||||
title = self._html_search_regex(r'id="mediaPlayerTitleLink"[^>]*>(.+)</a>', webpage, 'title', fatal=False)
|
||||
uploader = self._html_search_regex(r'class="title-chanel"[^>]*>[^<]*<a[^>]*>([^<]+)<', webpage, 'uploader', fatal=False)
|
||||
upload_date = self._search_regex(r'"uploadDate":\s+"([^"]+)",', webpage, 'upload_date', fatal=False)
|
||||
likes = self._html_search_regex(
|
||||
r'class="btn btn-up-rating[^>]*>[^<]*<i[^>]*>[^<]*</i>[^>]*<span[^>]*>[^0-9]*([0-9]+)[^<0-9]*<', webpage, 'like_count', fatal=False)
|
||||
dislikes = self._html_search_regex(
|
||||
r'class="btn btn-down-rating[^>]*>[^<]*<i[^>]*>[^<]*</i>[^>]*<span[^>]*>[^0-9]*([0-9]+)[^<0-9]*<', webpage, 'dislike_count', fatal=False)
|
||||
mpd_url = self._search_regex(r'"([^"]+userscontent.net/dash/[0-9]+/manifest.mpd[^"]*)"', webpage, 'mpd_url').replace('&', '&')
|
||||
formats = self._extract_mpd_formats(mpd_url, video_id, mpd_id='dash')
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'age_limit': 18,
|
||||
'description': description,
|
||||
'dislike_count': int_or_none(dislikes),
|
||||
'duration': parse_duration(duration),
|
||||
'formats': formats,
|
||||
'id': video_id,
|
||||
'like_count': int_or_none(likes),
|
||||
'timestamp': parse_iso8601(upload_date),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'title': title,
|
||||
'uploader': uploader,
|
||||
'view_count': int_or_none(view_count),
|
||||
}
|
||||
242
yt_dlp/extractor/rcti.py
Normal file
242
yt_dlp/extractor/rcti.py
Normal file
@@ -0,0 +1,242 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import itertools
|
||||
import re
|
||||
|
||||
from .openload import PhantomJSwrapper
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
RegexNotFoundError,
|
||||
strip_or_none,
|
||||
try_get
|
||||
)
|
||||
|
||||
|
||||
class RCTIPlusBaseIE(InfoExtractor):
|
||||
def _real_initialize(self):
|
||||
self._AUTH_KEY = self._download_json(
|
||||
'https://api.rctiplus.com/api/v1/visitor?platform=web', # platform can be web, mweb, android, ios
|
||||
None, 'Fetching authorization key')['data']['access_token']
|
||||
|
||||
def _call_api(self, url, video_id, note=None):
|
||||
json = self._download_json(
|
||||
url, video_id, note=note, headers={'Authorization': self._AUTH_KEY})
|
||||
if json.get('status', {}).get('code', 0) != 0:
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, json["status"]["message_client"]), cause=json)
|
||||
return json.get('data'), json.get('meta')
|
||||
|
||||
|
||||
class RCTIPlusIE(RCTIPlusBaseIE):
|
||||
_VALID_URL = r'https://www\.rctiplus\.com/programs/\d+?/.*?/(?P<type>episode|clip|extra)/(?P<id>\d+)/(?P<display_id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.rctiplus.com/programs/1259/kiko-untuk-lola/episode/22124/untuk-lola',
|
||||
'md5': '56ed45affad45fa18d5592a1bc199997',
|
||||
'info_dict': {
|
||||
'id': 'v_e22124',
|
||||
'title': 'Untuk Lola',
|
||||
'display_id': 'untuk-lola',
|
||||
'description': 'md5:2b809075c0b1e071e228ad6d13e41deb',
|
||||
'ext': 'mp4',
|
||||
'duration': 1400,
|
||||
'timestamp': 1615978800,
|
||||
'upload_date': '20210317',
|
||||
'series': 'Kiko : Untuk Lola',
|
||||
'season_number': 1,
|
||||
'episode_number': 1,
|
||||
'channel': 'RCTI',
|
||||
},
|
||||
'params': {
|
||||
'fixup': 'never',
|
||||
},
|
||||
}, { # Clip; Series title doesn't appear on metadata JSON
|
||||
'url': 'https://www.rctiplus.com/programs/316/cahaya-terindah/clip/3921/make-a-wish',
|
||||
'md5': 'd179b2ff356f0e91a53bcc6a4d8504f0',
|
||||
'info_dict': {
|
||||
'id': 'v_c3921',
|
||||
'title': 'Make A Wish',
|
||||
'display_id': 'make-a-wish',
|
||||
'description': 'Make A Wish',
|
||||
'ext': 'mp4',
|
||||
'duration': 288,
|
||||
'timestamp': 1571652600,
|
||||
'upload_date': '20191021',
|
||||
'series': 'Cahaya Terindah',
|
||||
'channel': 'RCTI',
|
||||
},
|
||||
'params': {
|
||||
'fixup': 'never',
|
||||
},
|
||||
}, { # Extra
|
||||
'url': 'https://www.rctiplus.com/programs/616/inews-malam/extra/9438/diungkapkan-melalui-surat-terbuka-ceo-ruangguru-belva-devara-mundur-dari-staf-khusus-presiden',
|
||||
'md5': 'c48106afdbce609749f5e0c007d9278a',
|
||||
'info_dict': {
|
||||
'id': 'v_ex9438',
|
||||
'title': 'md5:2ede828c0f8bde249e0912be150314ca',
|
||||
'display_id': 'md5:62b8d4e9ff096db527a1ad797e8a9933',
|
||||
'description': 'md5:2ede828c0f8bde249e0912be150314ca',
|
||||
'ext': 'mp4',
|
||||
'duration': 93,
|
||||
'timestamp': 1587561540,
|
||||
'upload_date': '20200422',
|
||||
'series': 'iNews Malam',
|
||||
'channel': 'INews',
|
||||
},
|
||||
'params': {
|
||||
'format': 'bestvideo',
|
||||
},
|
||||
}]
|
||||
|
||||
def _search_auth_key(self, webpage):
|
||||
try:
|
||||
self._AUTH_KEY = self._search_regex(
|
||||
r'\'Authorization\':"(?P<auth>[^"]+)"', webpage, 'auth-key')
|
||||
except RegexNotFoundError:
|
||||
pass
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_type, video_id, display_id = re.match(self._VALID_URL, url).groups()
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
self._search_auth_key(webpage)
|
||||
|
||||
video_json = self._call_api(
|
||||
'https://api.rctiplus.com/api/v1/%s/%s/url?appierid=.1' % (video_type, video_id), display_id, 'Downloading video URL JSON')[0]
|
||||
video_url = video_json['url']
|
||||
if 'akamaized' in video_url:
|
||||
# Akamai's CDN requires a session to at least be made via Conviva's API
|
||||
# TODO: Reverse-engineer Conviva's heartbeat code to avoid phantomJS
|
||||
phantom = None
|
||||
try:
|
||||
phantom = PhantomJSwrapper(self)
|
||||
phantom.get(url, webpage, display_id, note2='Initiating video session')
|
||||
except ExtractorError:
|
||||
self.report_warning('PhantomJS is highly recommended for this video, as it might load incredibly slowly otherwise.'
|
||||
'You can also try opening the page in this device\'s browser first')
|
||||
|
||||
video_meta, meta_paths = self._call_api(
|
||||
'https://api.rctiplus.com/api/v1/%s/%s' % (video_type, video_id), display_id, 'Downloading video metadata')
|
||||
|
||||
thumbnails, image_path = [], meta_paths.get('image_path', 'https://rstatic.akamaized.net/media/')
|
||||
if video_meta.get('portrait_image'):
|
||||
thumbnails.append({
|
||||
'id': 'portrait_image',
|
||||
'url': '%s%d%s' % (image_path, 2000, video_meta['portrait_image']) # 2000px seems to be the highest resolution that can be given
|
||||
})
|
||||
if video_meta.get('landscape_image'):
|
||||
thumbnails.append({
|
||||
'id': 'landscape_image',
|
||||
'url': '%s%d%s' % (image_path, 2000, video_meta['landscape_image'])
|
||||
})
|
||||
|
||||
formats = self._extract_m3u8_formats(video_url, display_id, 'mp4', headers={'Referer': 'https://www.rctiplus.com/'})
|
||||
for f in formats:
|
||||
if 'akamaized' in f['url']:
|
||||
f.setdefault('http_headers', {})['Referer'] = 'https://www.rctiplus.com/' # Referer header is required for akamai CDNs
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_meta.get('product_id') or video_json.get('product_id'),
|
||||
'title': video_meta.get('title') or video_json.get('content_name'),
|
||||
'display_id': display_id,
|
||||
'description': video_meta.get('summary'),
|
||||
'timestamp': video_meta.get('release_date'),
|
||||
'duration': video_meta.get('duration'),
|
||||
'categories': [video_meta.get('genre')],
|
||||
'average_rating': video_meta.get('star_rating'),
|
||||
'series': video_meta.get('program_title') or video_json.get('program_title'),
|
||||
'season_number': video_meta.get('season'),
|
||||
'episode_number': video_meta.get('episode'),
|
||||
'channel': video_json.get('tv_name'),
|
||||
'channel_id': video_json.get('tv_id'),
|
||||
'formats': formats,
|
||||
'thumbnails': thumbnails
|
||||
}
|
||||
|
||||
|
||||
class RCTIPlusSeriesIE(RCTIPlusBaseIE):
|
||||
_VALID_URL = r'https://www\.rctiplus\.com/programs/(?P<id>\d+)/(?P<display_id>[^/?#&]+)(?:\W)*$'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.rctiplus.com/programs/540/upin-ipin',
|
||||
'playlist_mincount': 417,
|
||||
'info_dict': {
|
||||
'id': '540',
|
||||
'title': 'Upin & Ipin',
|
||||
'description': 'md5:22cc912381f389664416844e1ec4f86b',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.rctiplus.com/programs/540/upin-ipin/#',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_AGE_RATINGS = { # Based off https://id.wikipedia.org/wiki/Sistem_rating_konten_televisi with additional ratings
|
||||
'S-SU': 2,
|
||||
'SU': 2,
|
||||
'P': 2,
|
||||
'A': 7,
|
||||
'R': 13,
|
||||
'R-R/1': 17, # Labelled as 17+ despite being R
|
||||
'D': 18,
|
||||
}
|
||||
|
||||
def _entries(self, url, display_id=None, note='Downloading entries JSON', metadata={}):
|
||||
total_pages = 0
|
||||
try:
|
||||
total_pages = self._call_api(
|
||||
'%s&length=20&page=0' % url,
|
||||
display_id, note)[1]['pagination']['total_page']
|
||||
except ExtractorError as e:
|
||||
if 'not found' in str(e):
|
||||
return []
|
||||
raise e
|
||||
if total_pages <= 0:
|
||||
return []
|
||||
|
||||
for page_num in range(1, total_pages + 1):
|
||||
episode_list = self._call_api(
|
||||
'%s&length=20&page=%s' % (url, page_num),
|
||||
display_id, '%s page %s' % (note, page_num))[0] or []
|
||||
|
||||
for video_json in episode_list:
|
||||
link = video_json['share_link']
|
||||
url_res = self.url_result(link, 'RCTIPlus', video_json.get('product_id'), video_json.get('title'))
|
||||
url_res.update(metadata)
|
||||
yield url_res
|
||||
|
||||
def _real_extract(self, url):
|
||||
series_id, display_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
series_meta, meta_paths = self._call_api(
|
||||
'https://api.rctiplus.com/api/v1/program/%s/detail' % series_id, display_id, 'Downloading series metadata')
|
||||
metadata = {
|
||||
'age_limit': try_get(series_meta, lambda x: self._AGE_RATINGS[x['age_restriction'][0]['code']])
|
||||
}
|
||||
|
||||
cast = []
|
||||
for star in series_meta.get('starring', []):
|
||||
cast.append(strip_or_none(star.get('name')))
|
||||
for star in series_meta.get('creator', []):
|
||||
cast.append(strip_or_none(star.get('name')))
|
||||
for star in series_meta.get('writer', []):
|
||||
cast.append(strip_or_none(star.get('name')))
|
||||
metadata['cast'] = cast
|
||||
|
||||
tags = []
|
||||
for tag in series_meta.get('tag', []):
|
||||
tags.append(strip_or_none(tag.get('name')))
|
||||
metadata['tag'] = tags
|
||||
|
||||
entries = []
|
||||
seasons_list = self._call_api(
|
||||
'https://api.rctiplus.com/api/v1/program/%s/season' % series_id, display_id, 'Downloading seasons list JSON')[0]
|
||||
for season in seasons_list:
|
||||
entries.append(self._entries('https://api.rctiplus.com/api/v2/program/%s/episode?season=%s' % (series_id, season['season']),
|
||||
display_id, 'Downloading season %s episode entries' % season['season'], metadata))
|
||||
|
||||
entries.append(self._entries('https://api.rctiplus.com/api/v2/program/%s/clip?content_id=0' % series_id,
|
||||
display_id, 'Downloading clip entries', metadata))
|
||||
entries.append(self._entries('https://api.rctiplus.com/api/v2/program/%s/extra?content_id=0' % series_id,
|
||||
display_id, 'Downloading extra entries', metadata))
|
||||
|
||||
return self.playlist_result(itertools.chain(*entries), series_id, series_meta.get('title'), series_meta.get('summary'), **metadata)
|
||||
@@ -2,10 +2,11 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
js_to_json,
|
||||
)
|
||||
from ..utils import js_to_json
|
||||
import re
|
||||
import json
|
||||
import urllib.parse
|
||||
import base64
|
||||
|
||||
|
||||
class RTPIE(InfoExtractor):
|
||||
@@ -25,6 +26,22 @@ class RTPIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_RX_OBFUSCATION = re.compile(r'''(?xs)
|
||||
atob\s*\(\s*decodeURIComponent\s*\(\s*
|
||||
(\[[0-9A-Za-z%,'"]*\])
|
||||
\s*\.\s*join\(\s*(?:""|'')\s*\)\s*\)\s*\)
|
||||
''')
|
||||
|
||||
def __unobfuscate(self, data, *, video_id):
|
||||
if data.startswith('{'):
|
||||
data = self._RX_OBFUSCATION.sub(
|
||||
lambda m: json.dumps(
|
||||
base64.b64decode(urllib.parse.unquote(
|
||||
''.join(self._parse_json(m.group(1), video_id))
|
||||
)).decode('iso-8859-1')),
|
||||
data)
|
||||
return js_to_json(data)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
@@ -32,30 +49,46 @@ class RTPIE(InfoExtractor):
|
||||
title = self._html_search_meta(
|
||||
'twitter:title', webpage, display_name='title', fatal=True)
|
||||
|
||||
config = self._parse_json(self._search_regex(
|
||||
r'(?s)RTPPlayer\(({.+?})\);', webpage,
|
||||
'player config'), video_id, js_to_json)
|
||||
file_url = config['file']
|
||||
ext = determine_ext(file_url)
|
||||
if ext == 'm3u8':
|
||||
file_key = config.get('fileKey')
|
||||
formats = self._extract_m3u8_formats(
|
||||
file_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=file_key)
|
||||
if file_key:
|
||||
formats.append({
|
||||
'url': 'https://cdn-ondemand.rtp.pt' + file_key,
|
||||
'quality': 1,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
f, config = self._search_regex(
|
||||
r'''(?sx)
|
||||
var\s+f\s*=\s*(?P<f>".*?"|{[^;]+?});\s*
|
||||
var\s+player1\s+=\s+new\s+RTPPlayer\s*\((?P<config>{(?:(?!\*/).)+?})\);(?!\s*\*/)
|
||||
''', webpage,
|
||||
'player config', group=('f', 'config'))
|
||||
|
||||
f = self._parse_json(
|
||||
f, video_id,
|
||||
lambda data: self.__unobfuscate(data, video_id=video_id))
|
||||
config = self._parse_json(
|
||||
config, video_id,
|
||||
lambda data: self.__unobfuscate(data, video_id=video_id))
|
||||
|
||||
formats = []
|
||||
if isinstance(f, dict):
|
||||
f_hls = f.get('hls')
|
||||
if f_hls is not None:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
f_hls, video_id, 'mp4', 'm3u8_native', m3u8_id='hls'))
|
||||
|
||||
f_dash = f.get('dash')
|
||||
if f_dash is not None:
|
||||
formats.extend(self._extract_mpd_formats(f_dash, video_id, mpd_id='dash'))
|
||||
else:
|
||||
formats = [{
|
||||
'url': file_url,
|
||||
'ext': ext,
|
||||
}]
|
||||
if config.get('mediaType') == 'audio':
|
||||
for f in formats:
|
||||
f['vcodec'] = 'none'
|
||||
formats.append({
|
||||
'format_id': 'f',
|
||||
'url': f,
|
||||
'vcodec': 'none' if config.get('mediaType') == 'audio' else None,
|
||||
})
|
||||
|
||||
subtitles = {}
|
||||
|
||||
vtt = config.get('vtt')
|
||||
if vtt is not None:
|
||||
for lcode, lname, url in vtt:
|
||||
subtitles.setdefault(lcode, []).append({
|
||||
'name': lname,
|
||||
'url': url,
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
@@ -63,4 +96,5 @@ class RTPIE(InfoExtractor):
|
||||
'formats': formats,
|
||||
'description': self._html_search_meta(['description', 'twitter:description'], webpage),
|
||||
'thumbnail': config.get('poster') or self._og_search_thumbnail(webpage),
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
@@ -4,7 +4,7 @@ from __future__ import unicode_literals
|
||||
import itertools
|
||||
import re
|
||||
import json
|
||||
import random
|
||||
# import random
|
||||
|
||||
from .common import (
|
||||
InfoExtractor,
|
||||
@@ -164,23 +164,11 @@ class SoundcloudIE(InfoExtractor):
|
||||
},
|
||||
# downloadable song
|
||||
{
|
||||
'url': 'https://soundcloud.com/oddsamples/bus-brakes',
|
||||
'md5': '7624f2351f8a3b2e7cd51522496e7631',
|
||||
'url': 'https://soundcloud.com/the80m/the-following',
|
||||
'md5': '9ffcddb08c87d74fb5808a3c183a1d04',
|
||||
'info_dict': {
|
||||
'id': '128590877',
|
||||
'ext': 'mp3',
|
||||
'title': 'Bus Brakes',
|
||||
'description': 'md5:0053ca6396e8d2fd7b7e1595ef12ab66',
|
||||
'uploader': 'oddsamples',
|
||||
'uploader_id': '73680509',
|
||||
'timestamp': 1389232924,
|
||||
'upload_date': '20140109',
|
||||
'duration': 17.346,
|
||||
'license': 'cc-by-sa',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'repost_count': int,
|
||||
'id': '343609555',
|
||||
'ext': 'wav',
|
||||
},
|
||||
},
|
||||
# private link, downloadable format
|
||||
@@ -317,12 +305,13 @@ class SoundcloudIE(InfoExtractor):
|
||||
raise
|
||||
|
||||
def _real_initialize(self):
|
||||
self._CLIENT_ID = self._downloader.cache.load('soundcloud', 'client_id') or "T5R4kgWS2PRf6lzLyIravUMnKlbIxQag" # 'EXLwg5lHTO2dslU5EePe3xkw0m1h86Cd' # 'YUKXoArFcqrlQn9tfNHvvyfnDISj04zk'
|
||||
self._CLIENT_ID = self._downloader.cache.load('soundcloud', 'client_id') or 'fXuVKzsVXlc6tzniWWS31etd7VHWFUuN' # persistent `client_id`
|
||||
self._login()
|
||||
|
||||
_USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36"
|
||||
_USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36'
|
||||
_API_AUTH_QUERY_TEMPLATE = '?client_id=%s'
|
||||
_API_AUTH_URL_PW = 'https://api-auth.soundcloud.com/web-auth/sign-in/password%s'
|
||||
_API_VERIFY_AUTH_TOKEN = 'https://api-auth.soundcloud.com/connect/session%s'
|
||||
_access_token = None
|
||||
_HEADERS = {}
|
||||
_NETRC_MACHINE = 'soundcloud'
|
||||
@@ -332,6 +321,23 @@ class SoundcloudIE(InfoExtractor):
|
||||
if username is None:
|
||||
return
|
||||
|
||||
if username == 'oauth' and password is not None:
|
||||
self._access_token = password
|
||||
query = self._API_AUTH_QUERY_TEMPLATE % self._CLIENT_ID
|
||||
payload = {'session': {'access_token': self._access_token}}
|
||||
token_verification = sanitized_Request(self._API_VERIFY_AUTH_TOKEN % query, json.dumps(payload).encode('utf-8'))
|
||||
response = self._download_json(token_verification, None, note='Verifying login token...', fatal=False)
|
||||
if response is not False:
|
||||
self._HEADERS = {'Authorization': 'OAuth ' + self._access_token}
|
||||
self.report_login()
|
||||
else:
|
||||
self.report_warning('Provided authorization token seems to be invalid. Continue as guest')
|
||||
elif username is not None:
|
||||
self.report_warning(
|
||||
'Login using username and password is not currently supported. '
|
||||
'Use "--user oauth --password <oauth_token>" to login using an oauth token')
|
||||
|
||||
r'''
|
||||
def genDevId():
|
||||
def genNumBlock():
|
||||
return ''.join([str(random.randrange(10)) for i in range(6)])
|
||||
@@ -358,6 +364,7 @@ class SoundcloudIE(InfoExtractor):
|
||||
self.report_warning('Unable to get access token, login may has failed')
|
||||
else:
|
||||
self._HEADERS = {'Authorization': 'OAuth ' + self._access_token}
|
||||
'''
|
||||
|
||||
# signature generation
|
||||
def sign(self, user, pw, clid):
|
||||
@@ -370,9 +377,9 @@ class SoundcloudIE(InfoExtractor):
|
||||
b = 37
|
||||
k = 37
|
||||
c = 5
|
||||
n = "0763ed7314c69015fd4a0dc16bbf4b90" # _KEY
|
||||
y = "8" # _REV
|
||||
r = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36" # _USER_AGENT
|
||||
n = '0763ed7314c69015fd4a0dc16bbf4b90' # _KEY
|
||||
y = '8' # _REV
|
||||
r = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36' # _USER_AGENT
|
||||
e = user # _USERNAME
|
||||
t = clid # _CLIENT_ID
|
||||
|
||||
|
||||
@@ -16,7 +16,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class TBSIE(TurnerBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<site>tbs|tntdrama)\.com(?P<path>/(?:movies|shows/[^/]+/(?:clips|season-\d+/episode-\d+))/(?P<id>[^/?#]+))'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<site>tbs|tntdrama)\.com(?P<path>/(?:movies|watchtnt|shows/[^/]+/(?:clips|season-\d+/episode-\d+))/(?P<id>[^/?#]+))'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.tntdrama.com/shows/the-alienist/clips/monster',
|
||||
'info_dict': {
|
||||
@@ -45,7 +45,8 @@ class TBSIE(TurnerBaseIE):
|
||||
drupal_settings = self._parse_json(self._search_regex(
|
||||
r'<script[^>]+?data-drupal-selector="drupal-settings-json"[^>]*?>({.+?})</script>',
|
||||
webpage, 'drupal setting'), display_id)
|
||||
video_data = next(v for v in drupal_settings['turner_playlist'] if v.get('url') == path)
|
||||
isLive = 'watchtnt' in path
|
||||
video_data = next(v for v in drupal_settings['turner_playlist'] if isLive or v.get('url') == path)
|
||||
|
||||
media_id = video_data['mediaID']
|
||||
title = video_data['title']
|
||||
@@ -56,7 +57,8 @@ class TBSIE(TurnerBaseIE):
|
||||
media_id, tokenizer_query, {
|
||||
'url': url,
|
||||
'site_name': site[:3].upper(),
|
||||
'auth_required': video_data.get('authRequired') == '1',
|
||||
'auth_required': video_data.get('authRequired') == '1' or isLive,
|
||||
'is_live': isLive
|
||||
})
|
||||
|
||||
thumbnails = []
|
||||
@@ -85,5 +87,6 @@ class TBSIE(TurnerBaseIE):
|
||||
'season_number': int_or_none(video_data.get('season')),
|
||||
'episode_number': int_or_none(video_data.get('episode')),
|
||||
'thumbnails': thumbnails,
|
||||
'is_live': isLive
|
||||
})
|
||||
return info
|
||||
|
||||
@@ -221,6 +221,7 @@ class TurnerBaseIE(AdobePassIE):
|
||||
}
|
||||
|
||||
def _extract_ngtv_info(self, media_id, tokenizer_query, ap_data=None):
|
||||
is_live = ap_data.get('is_live')
|
||||
streams_data = self._download_json(
|
||||
'http://medium.ngtv.io/media/%s/tv' % media_id,
|
||||
media_id)['media']['tv']
|
||||
@@ -237,11 +238,11 @@ class TurnerBaseIE(AdobePassIE):
|
||||
'http://token.ngtv.io/token/token_spe',
|
||||
m3u8_url, media_id, ap_data or {}, tokenizer_query)
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, media_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
m3u8_url, media_id, 'mp4', m3u8_id='hls', live=is_live, fatal=False))
|
||||
|
||||
duration = float_or_none(stream_data.get('totalRuntime'))
|
||||
|
||||
if not chapters:
|
||||
if not chapters and not is_live:
|
||||
for chapter in stream_data.get('contentSegments', []):
|
||||
start_time = float_or_none(chapter.get('start'))
|
||||
chapter_duration = float_or_none(chapter.get('duration'))
|
||||
|
||||
@@ -12,6 +12,7 @@ from ..utils import (
|
||||
mimetype2ext,
|
||||
parse_codecs,
|
||||
update_url_query,
|
||||
urljoin,
|
||||
xpath_element,
|
||||
xpath_text,
|
||||
)
|
||||
@@ -19,6 +20,7 @@ from ..compat import (
|
||||
compat_b64decode,
|
||||
compat_ord,
|
||||
compat_struct_pack,
|
||||
compat_urlparse,
|
||||
)
|
||||
|
||||
|
||||
@@ -95,9 +97,13 @@ class VideaIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
query = {'v': video_id}
|
||||
player_page = self._download_webpage(
|
||||
'https://videa.hu/player', video_id, query=query)
|
||||
|
||||
video_page = self._download_webpage(url, video_id)
|
||||
|
||||
player_url = self._search_regex(
|
||||
r'<iframe.*?src="(/player\?[^"]+)"', video_page, 'player url')
|
||||
player_url = urljoin(url, player_url)
|
||||
player_page = self._download_webpage(player_url, video_id)
|
||||
|
||||
nonce = self._search_regex(
|
||||
r'_xt\s*=\s*"([^"]+)"', player_page, 'nonce')
|
||||
@@ -107,6 +113,7 @@ class VideaIE(InfoExtractor):
|
||||
for i in range(0, 32):
|
||||
result += s[i - (self._STATIC_SECRET.index(l[i]) - 31)]
|
||||
|
||||
query = compat_urlparse.parse_qs(compat_urlparse.urlparse(player_url).query)
|
||||
random_seed = ''.join(random.choice(string.ascii_letters + string.digits) for _ in range(8))
|
||||
query['_s'] = random_seed
|
||||
query['_t'] = result[:16]
|
||||
|
||||
@@ -1,39 +1,28 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
import hashlib
|
||||
import hmac
|
||||
import itertools
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urllib_parse_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
HEADRequest,
|
||||
parse_age_limit,
|
||||
parse_iso8601,
|
||||
sanitized_Request,
|
||||
std_headers,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class VikiBaseIE(InfoExtractor):
|
||||
_VALID_URL_BASE = r'https?://(?:www\.)?viki\.(?:com|net|mx|jp|fr)/'
|
||||
_API_QUERY_TEMPLATE = '/v4/%sapp=%s&t=%s&site=www.viki.com'
|
||||
_API_URL_TEMPLATE = 'https://api.viki.io%s&sig=%s'
|
||||
_API_URL_TEMPLATE = 'https://api.viki.io%s'
|
||||
|
||||
_DEVICE_ID = '86085977d' # used for android api
|
||||
_APP = '100005a'
|
||||
_APP_VERSION = '6.0.0'
|
||||
_APP_SECRET = 'MM_d*yP@`&1@]@!AVrXf_o-HVEnoTnm$O-ti4[G~$JDI/Dc-&piU&z&5.;:}95=Iad'
|
||||
_APP_VERSION = '6.11.3'
|
||||
_APP_SECRET = 'd96704b180208dbb2efa30fe44c48bd8690441af9f567ba8fd710a72badc85198f7472'
|
||||
|
||||
_GEO_BYPASS = False
|
||||
_NETRC_MACHINE = 'viki'
|
||||
@@ -46,53 +35,57 @@ class VikiBaseIE(InfoExtractor):
|
||||
'paywall': 'Sorry, this content is only available to Viki Pass Plus subscribers',
|
||||
}
|
||||
|
||||
def _prepare_call(self, path, timestamp=None, post_data=None):
|
||||
def _stream_headers(self, timestamp, sig):
|
||||
return {
|
||||
'X-Viki-manufacturer': 'vivo',
|
||||
'X-Viki-device-model': 'vivo 1606',
|
||||
'X-Viki-device-os-ver': '6.0.1',
|
||||
'X-Viki-connection-type': 'WIFI',
|
||||
'X-Viki-carrier': '',
|
||||
'X-Viki-as-id': '100005a-1625321982-3932',
|
||||
'timestamp': str(timestamp),
|
||||
'signature': str(sig),
|
||||
'x-viki-app-ver': self._APP_VERSION
|
||||
}
|
||||
|
||||
def _api_query(self, path, version=4, **kwargs):
|
||||
path += '?' if '?' not in path else '&'
|
||||
if not timestamp:
|
||||
timestamp = int(time.time())
|
||||
query = self._API_QUERY_TEMPLATE % (path, self._APP, timestamp)
|
||||
query = f'/v{version}/{path}app={self._APP}'
|
||||
if self._token:
|
||||
query += '&token=%s' % self._token
|
||||
return query + ''.join(f'&{name}={val}' for name, val in kwargs.items())
|
||||
|
||||
def _sign_query(self, path):
|
||||
timestamp = int(time.time())
|
||||
query = self._api_query(path, version=5)
|
||||
sig = hmac.new(
|
||||
self._APP_SECRET.encode('ascii'),
|
||||
query.encode('ascii'),
|
||||
hashlib.sha1
|
||||
).hexdigest()
|
||||
url = self._API_URL_TEMPLATE % (query, sig)
|
||||
return sanitized_Request(
|
||||
url, json.dumps(post_data).encode('utf-8')) if post_data else url
|
||||
self._APP_SECRET.encode('ascii'), f'{query}&t={timestamp}'.encode('ascii'), hashlib.sha1).hexdigest()
|
||||
return timestamp, sig, self._API_URL_TEMPLATE % query
|
||||
|
||||
def _call_api(self, path, video_id, note, timestamp=None, post_data=None):
|
||||
def _call_api(
|
||||
self, path, video_id, note='Downloading JSON metadata', data=None, query=None, fatal=True):
|
||||
if query is None:
|
||||
timestamp, sig, url = self._sign_query(path)
|
||||
else:
|
||||
url = self._API_URL_TEMPLATE % self._api_query(path, version=4)
|
||||
resp = self._download_json(
|
||||
self._prepare_call(path, timestamp, post_data),
|
||||
video_id, note,
|
||||
headers={
|
||||
'x-client-user-agent': std_headers['User-Agent'],
|
||||
'x-viki-as-id': self._APP,
|
||||
'x-viki-app-ver': self._APP_VERSION,
|
||||
})
|
||||
|
||||
error = resp.get('error')
|
||||
if error:
|
||||
if error == 'invalid timestamp':
|
||||
resp = self._download_json(
|
||||
self._prepare_call(path, int(resp['current_timestamp']), post_data),
|
||||
video_id, '%s (retry)' % note,
|
||||
headers={
|
||||
'x-client-user-agent': std_headers['User-Agent'],
|
||||
'x-viki-as-id': self._APP,
|
||||
'x-viki-app-ver': self._APP_VERSION,
|
||||
})
|
||||
error = resp.get('error')
|
||||
if error:
|
||||
self._raise_error(resp['error'])
|
||||
url, video_id, note, fatal=fatal, query=query,
|
||||
data=json.dumps(data).encode('utf-8') if data else None,
|
||||
headers=({'x-viki-app-ver': self._APP_VERSION} if data
|
||||
else self._stream_headers(timestamp, sig) if query is None
|
||||
else None)) or {}
|
||||
|
||||
self._raise_error(resp.get('error'), fatal)
|
||||
return resp
|
||||
|
||||
def _raise_error(self, error):
|
||||
raise ExtractorError(
|
||||
'%s returned error: %s' % (self.IE_NAME, error),
|
||||
expected=True)
|
||||
def _raise_error(self, error, fatal=True):
|
||||
if error is None:
|
||||
return
|
||||
msg = '%s said: %s' % (self.IE_NAME, error)
|
||||
if fatal:
|
||||
raise ExtractorError(msg, expected=True)
|
||||
else:
|
||||
self.report_warning(msg)
|
||||
|
||||
def _check_errors(self, data):
|
||||
for reason, status in (data.get('blocking') or {}).items():
|
||||
@@ -101,9 +94,10 @@ class VikiBaseIE(InfoExtractor):
|
||||
if reason == 'geo':
|
||||
self.raise_geo_restricted(msg=message)
|
||||
elif reason == 'paywall':
|
||||
if try_get(data, lambda x: x['paywallable']['tvod']):
|
||||
self._raise_error('This video is for rent only or TVOD (Transactional Video On demand)')
|
||||
self.raise_login_required(message)
|
||||
raise ExtractorError('%s said: %s' % (
|
||||
self.IE_NAME, message), expected=True)
|
||||
self._raise_error(message)
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
@@ -113,29 +107,17 @@ class VikiBaseIE(InfoExtractor):
|
||||
if username is None:
|
||||
return
|
||||
|
||||
login_form = {
|
||||
'login_id': username,
|
||||
'password': password,
|
||||
}
|
||||
|
||||
login = self._call_api(
|
||||
'sessions.json', None,
|
||||
'Logging in', post_data=login_form)
|
||||
|
||||
self._token = login.get('token')
|
||||
self._token = self._call_api(
|
||||
'sessions.json', None, 'Logging in', fatal=False,
|
||||
data={'username': username, 'password': password}).get('token')
|
||||
if not self._token:
|
||||
self.report_warning('Unable to get session token, login has probably failed')
|
||||
self.report_warning('Login Failed: Unable to get session token')
|
||||
|
||||
@staticmethod
|
||||
def dict_selection(dict_obj, preferred_key, allow_fallback=True):
|
||||
def dict_selection(dict_obj, preferred_key):
|
||||
if preferred_key in dict_obj:
|
||||
return dict_obj.get(preferred_key)
|
||||
|
||||
if not allow_fallback:
|
||||
return
|
||||
|
||||
filtered_dict = list(filter(None, [dict_obj.get(k) for k in dict_obj.keys()]))
|
||||
return filtered_dict[0] if filtered_dict else None
|
||||
return dict_obj[preferred_key]
|
||||
return (list(filter(None, dict_obj.values())) or [None])[0]
|
||||
|
||||
|
||||
class VikiIE(VikiBaseIE):
|
||||
@@ -266,18 +248,10 @@ class VikiIE(VikiBaseIE):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
resp = self._download_json(
|
||||
'https://www.viki.com/api/videos/' + video_id,
|
||||
video_id, 'Downloading video JSON', headers={
|
||||
'x-client-user-agent': std_headers['User-Agent'],
|
||||
'x-viki-app-ver': '3.0.0',
|
||||
})
|
||||
video = resp['video']
|
||||
|
||||
video = self._call_api(f'videos/{video_id}.json', video_id, 'Downloading video JSON', query={})
|
||||
self._check_errors(video)
|
||||
|
||||
title = self.dict_selection(video.get('titles', {}), 'en', allow_fallback=False)
|
||||
title = try_get(video, lambda x: x['titles']['en'], str)
|
||||
episode_number = int_or_none(video.get('number'))
|
||||
if not title:
|
||||
title = 'Episode %d' % episode_number if video.get('type') == 'episode' else video.get('id') or video_id
|
||||
@@ -285,116 +259,46 @@ class VikiIE(VikiBaseIE):
|
||||
container_title = self.dict_selection(container_titles, 'en')
|
||||
title = '%s - %s' % (container_title, title)
|
||||
|
||||
description = self.dict_selection(video.get('descriptions', {}), 'en')
|
||||
thumbnails = [{
|
||||
'id': thumbnail_id,
|
||||
'url': thumbnail['url'],
|
||||
} for thumbnail_id, thumbnail in (video.get('images') or {}).items() if thumbnail.get('url')]
|
||||
|
||||
like_count = int_or_none(try_get(video, lambda x: x['likes']['count']))
|
||||
resp = self._call_api(
|
||||
'playback_streams/%s.json?drms=dt1,dt2&device_id=%s' % (video_id, self._DEVICE_ID),
|
||||
video_id, 'Downloading video streams JSON')['main'][0]
|
||||
|
||||
thumbnails = []
|
||||
for thumbnail_id, thumbnail in (video.get('images') or {}).items():
|
||||
thumbnails.append({
|
||||
'id': thumbnail_id,
|
||||
'url': thumbnail.get('url'),
|
||||
})
|
||||
stream_id = try_get(resp, lambda x: x['properties']['track']['stream_id'])
|
||||
subtitles = dict((lang, [{
|
||||
'ext': ext,
|
||||
'url': self._API_URL_TEMPLATE % self._api_query(
|
||||
f'videos/{video_id}/auth_subtitles/{lang}.{ext}', stream_id=stream_id)
|
||||
} for ext in ('srt', 'vtt')]) for lang in (video.get('subtitle_completions') or {}).keys())
|
||||
|
||||
subtitles = {}
|
||||
for subtitle_lang, _ in (video.get('subtitle_completions') or {}).items():
|
||||
subtitles[subtitle_lang] = [{
|
||||
'ext': subtitles_format,
|
||||
'url': self._prepare_call(
|
||||
'videos/%s/subtitles/%s.%s' % (video_id, subtitle_lang, subtitles_format)),
|
||||
} for subtitles_format in ('srt', 'vtt')]
|
||||
mpd_url = resp['url']
|
||||
# 1080p is hidden in another mpd which can be found in the current manifest content
|
||||
mpd_content = self._download_webpage(mpd_url, video_id, note='Downloading initial MPD manifest')
|
||||
mpd_url = self._search_regex(
|
||||
r'(?mi)<BaseURL>(http.+.mpd)', mpd_content, 'new manifest', default=mpd_url)
|
||||
formats = self._extract_mpd_formats(mpd_url, video_id)
|
||||
self._sort_formats(formats)
|
||||
|
||||
result = {
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'description': self.dict_selection(video.get('descriptions', {}), 'en'),
|
||||
'duration': int_or_none(video.get('duration')),
|
||||
'timestamp': parse_iso8601(video.get('created_at')),
|
||||
'uploader': video.get('author'),
|
||||
'uploader_url': video.get('author_url'),
|
||||
'like_count': like_count,
|
||||
'like_count': int_or_none(try_get(video, lambda x: x['likes']['count'])),
|
||||
'age_limit': parse_age_limit(video.get('rating')),
|
||||
'thumbnails': thumbnails,
|
||||
'subtitles': subtitles,
|
||||
'episode_number': episode_number,
|
||||
}
|
||||
|
||||
formats = []
|
||||
|
||||
def add_format(format_id, format_dict, protocol='http'):
|
||||
# rtmps URLs does not seem to work
|
||||
if protocol == 'rtmps':
|
||||
return
|
||||
format_url = format_dict.get('url')
|
||||
if not format_url:
|
||||
return
|
||||
qs = compat_parse_qs(compat_urllib_parse_urlparse(format_url).query)
|
||||
stream = qs.get('stream', [None])[0]
|
||||
if stream:
|
||||
format_url = base64.b64decode(stream).decode()
|
||||
if format_id in ('m3u8', 'hls'):
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4',
|
||||
entry_protocol='m3u8_native',
|
||||
m3u8_id='m3u8-%s' % protocol, fatal=False)
|
||||
# Despite CODECS metadata in m3u8 all video-only formats
|
||||
# are actually video+audio
|
||||
for f in m3u8_formats:
|
||||
if not self.get_param('allow_unplayable_formats') and '_drm/index_' in f['url']:
|
||||
continue
|
||||
if f.get('acodec') == 'none' and f.get('vcodec') != 'none':
|
||||
f['acodec'] = None
|
||||
formats.append(f)
|
||||
elif format_id in ('mpd', 'dash'):
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
format_url, video_id, 'mpd-%s' % protocol, fatal=False))
|
||||
elif format_url.startswith('rtmp'):
|
||||
mobj = re.search(
|
||||
r'^(?P<url>rtmp://[^/]+/(?P<app>.+?))/(?P<playpath>mp4:.+)$',
|
||||
format_url)
|
||||
if not mobj:
|
||||
return
|
||||
formats.append({
|
||||
'format_id': 'rtmp-%s' % format_id,
|
||||
'ext': 'flv',
|
||||
'url': mobj.group('url'),
|
||||
'play_path': mobj.group('playpath'),
|
||||
'app': mobj.group('app'),
|
||||
'page_url': url,
|
||||
})
|
||||
else:
|
||||
urlh = self._request_webpage(
|
||||
HEADRequest(format_url), video_id, 'Checking file size', fatal=False)
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'format_id': '%s-%s' % (format_id, protocol),
|
||||
'height': int_or_none(self._search_regex(
|
||||
r'^(\d+)[pP]$', format_id, 'height', default=None)),
|
||||
'filesize': int_or_none(urlh.headers.get('Content-Length')),
|
||||
})
|
||||
|
||||
for format_id, format_dict in (resp.get('streams') or {}).items():
|
||||
add_format(format_id, format_dict)
|
||||
if not formats:
|
||||
streams = self._call_api(
|
||||
'videos/%s/streams.json' % video_id, video_id,
|
||||
'Downloading video streams JSON')
|
||||
|
||||
if 'external' in streams:
|
||||
result.update({
|
||||
'_type': 'url_transparent',
|
||||
'url': streams['external']['url'],
|
||||
})
|
||||
return result
|
||||
|
||||
for format_id, stream_dict in streams.items():
|
||||
for protocol, format_dict in stream_dict.items():
|
||||
add_format(format_id, format_dict, protocol)
|
||||
self._sort_formats(formats)
|
||||
|
||||
result['formats'] = formats
|
||||
return result
|
||||
|
||||
|
||||
class VikiChannelIE(VikiBaseIE):
|
||||
IE_NAME = 'viki:channel'
|
||||
@@ -406,7 +310,7 @@ class VikiChannelIE(VikiBaseIE):
|
||||
'title': 'Boys Over Flowers',
|
||||
'description': 'md5:804ce6e7837e1fd527ad2f25420f4d59',
|
||||
},
|
||||
'playlist_mincount': 71,
|
||||
'playlist_mincount': 51,
|
||||
}, {
|
||||
'url': 'http://www.viki.com/tv/1354c-poor-nastya-complete',
|
||||
'info_dict': {
|
||||
@@ -427,33 +331,35 @@ class VikiChannelIE(VikiBaseIE):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_PER_PAGE = 25
|
||||
_video_types = ('episodes', 'movies', 'clips', 'trailers')
|
||||
|
||||
def _entries(self, channel_id):
|
||||
params = {
|
||||
'app': self._APP, 'token': self._token, 'only_ids': 'true',
|
||||
'direction': 'asc', 'sort': 'number', 'per_page': 30
|
||||
}
|
||||
video_types = self._configuration_arg('video_types') or self._video_types
|
||||
for video_type in video_types:
|
||||
if video_type not in self._video_types:
|
||||
self.report_warning(f'Unknown video_type: {video_type}')
|
||||
page_num = 0
|
||||
while True:
|
||||
page_num += 1
|
||||
params['page'] = page_num
|
||||
res = self._call_api(
|
||||
f'containers/{channel_id}/{video_type}.json', channel_id, query=params, fatal=False,
|
||||
note='Downloading %s JSON page %d' % (video_type.title(), page_num))
|
||||
|
||||
for video_id in res.get('response') or []:
|
||||
yield self.url_result(f'https://www.viki.com/videos/{video_id}', VikiIE.ie_key(), video_id)
|
||||
if not res.get('more'):
|
||||
break
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_id = self._match_id(url)
|
||||
|
||||
channel = self._call_api(
|
||||
'containers/%s.json' % channel_id, channel_id,
|
||||
'Downloading channel JSON')
|
||||
|
||||
channel = self._call_api('containers/%s.json' % channel_id, channel_id, 'Downloading channel JSON')
|
||||
self._check_errors(channel)
|
||||
|
||||
title = self.dict_selection(channel['titles'], 'en')
|
||||
|
||||
description = self.dict_selection(channel['descriptions'], 'en')
|
||||
|
||||
entries = []
|
||||
for video_type in ('episodes', 'clips', 'movies'):
|
||||
for page_num in itertools.count(1):
|
||||
page = self._call_api(
|
||||
'containers/%s/%s.json?per_page=%d&sort=number&direction=asc&with_paging=true&page=%d'
|
||||
% (channel_id, video_type, self._PER_PAGE, page_num), channel_id,
|
||||
'Downloading %s JSON page #%d' % (video_type, page_num))
|
||||
for video in page['response']:
|
||||
video_id = video['id']
|
||||
entries.append(self.url_result(
|
||||
'https://www.viki.com/videos/%s' % video_id, 'Viki'))
|
||||
if not page['pagination']['next']:
|
||||
break
|
||||
|
||||
return self.playlist_result(entries, channel_id, title, description)
|
||||
return self.playlist_result(
|
||||
self._entries(channel_id), channel_id,
|
||||
self.dict_selection(channel['titles'], 'en'),
|
||||
self.dict_selection(channel['descriptions'], 'en'))
|
||||
|
||||
@@ -178,9 +178,15 @@ class VLiveIE(VLiveBaseIE):
|
||||
if video_type == 'VOD':
|
||||
inkey = self._call_api('video/v1.0/vod/%s/inkey', video_id)['inkey']
|
||||
vod_id = video['vodId']
|
||||
return merge_dicts(
|
||||
info_dict = merge_dicts(
|
||||
get_common_fields(),
|
||||
self._extract_video_info(video_id, vod_id, inkey))
|
||||
thumbnail = video.get('thumb')
|
||||
if thumbnail:
|
||||
if not info_dict.get('thumbnails') and info_dict.get('thumbnail'):
|
||||
info_dict['thumbnails'] = [{'url': info_dict.pop('thumbnail')}]
|
||||
info_dict.setdefault('thumbnails', []).append({'url': thumbnail, 'preference': 1})
|
||||
return info_dict
|
||||
elif video_type == 'LIVE':
|
||||
status = video.get('status')
|
||||
if status == 'ON_AIR':
|
||||
|
||||
@@ -22,6 +22,7 @@ from ..utils import (
|
||||
)
|
||||
|
||||
from .brightcove import BrightcoveNewIE
|
||||
from .youtube import YoutubeIE
|
||||
|
||||
|
||||
class YahooIE(InfoExtractor):
|
||||
@@ -38,6 +39,7 @@ class YahooIE(InfoExtractor):
|
||||
'timestamp': 1369812016,
|
||||
'upload_date': '20130529',
|
||||
},
|
||||
'skip': 'No longer exists',
|
||||
}, {
|
||||
'url': 'https://screen.yahoo.com/community/community-sizzle-reel-203225340.html?format=embed',
|
||||
'md5': '7993e572fac98e044588d0b5260f4352',
|
||||
@@ -50,6 +52,7 @@ class YahooIE(InfoExtractor):
|
||||
'timestamp': 1406838636,
|
||||
'upload_date': '20140731',
|
||||
},
|
||||
'skip': 'Unfortunately, this video is not available in your region',
|
||||
}, {
|
||||
'url': 'https://uk.screen.yahoo.com/editor-picks/cute-raccoon-freed-drain-using-091756545.html',
|
||||
'md5': '71298482f7c64cbb7fa064e4553ff1c1',
|
||||
@@ -61,7 +64,8 @@ class YahooIE(InfoExtractor):
|
||||
'duration': 97,
|
||||
'timestamp': 1414489862,
|
||||
'upload_date': '20141028',
|
||||
}
|
||||
},
|
||||
'skip': 'No longer exists',
|
||||
}, {
|
||||
'url': 'http://news.yahoo.com/video/china-moses-crazy-blues-104538833.html',
|
||||
'md5': '88e209b417f173d86186bef6e4d1f160',
|
||||
@@ -120,6 +124,7 @@ class YahooIE(InfoExtractor):
|
||||
'season_number': 6,
|
||||
'episode_number': 1,
|
||||
},
|
||||
'skip': 'No longer exists',
|
||||
}, {
|
||||
# ytwnews://cavideo/
|
||||
'url': 'https://tw.video.yahoo.com/movie-tw/單車天使-中文版預-092316541.html',
|
||||
@@ -156,7 +161,7 @@ class YahooIE(InfoExtractor):
|
||||
'id': '352CFDOQrKg',
|
||||
'ext': 'mp4',
|
||||
'title': 'Kyndal Inskeep "Performs the Hell Out of" Sia\'s "Elastic Heart" - The Voice Knockouts 2019',
|
||||
'description': 'md5:35b61e94c2ae214bc965ff4245f80d11',
|
||||
'description': 'md5:7fe8e3d5806f96002e55f190d1d94479',
|
||||
'uploader': 'The Voice',
|
||||
'uploader_id': 'NBCTheVoice',
|
||||
'upload_date': '20191029',
|
||||
@@ -165,7 +170,7 @@ class YahooIE(InfoExtractor):
|
||||
'params': {
|
||||
'playlistend': 2,
|
||||
},
|
||||
'expected_warnings': ['HTTP Error 404'],
|
||||
'expected_warnings': ['HTTP Error 404', 'Ignoring subtitle tracks'],
|
||||
}, {
|
||||
'url': 'https://malaysia.news.yahoo.com/video/bystanders-help-ontario-policeman-bust-190932818.html',
|
||||
'only_matching': True,
|
||||
@@ -280,12 +285,13 @@ class YahooIE(InfoExtractor):
|
||||
else:
|
||||
country = country.split('-')[0]
|
||||
|
||||
item = self._download_json(
|
||||
items = self._download_json(
|
||||
'https://%s.yahoo.com/caas/content/article' % country, display_id,
|
||||
'Downloading content JSON metadata', query={
|
||||
'url': url
|
||||
})['items'][0]['data']['partnerData']
|
||||
})['items'][0]
|
||||
|
||||
item = items['data']['partnerData']
|
||||
if item.get('type') != 'video':
|
||||
entries = []
|
||||
|
||||
@@ -299,9 +305,19 @@ class YahooIE(InfoExtractor):
|
||||
for e in (item.get('body') or []):
|
||||
if e.get('type') == 'videoIframe':
|
||||
iframe_url = e.get('url')
|
||||
if not iframe_url:
|
||||
continue
|
||||
if iframe_url:
|
||||
entries.append(self.url_result(iframe_url))
|
||||
|
||||
if item.get('type') == 'storywithleadvideo':
|
||||
iframe_url = try_get(item, lambda x: x['meta']['player']['url'])
|
||||
if iframe_url:
|
||||
entries.append(self.url_result(iframe_url))
|
||||
else:
|
||||
self.report_warning("Yahoo didn't provide an iframe url for this storywithleadvideo")
|
||||
|
||||
if items.get('markup'):
|
||||
entries.extend(
|
||||
self.url_result(yt_url) for yt_url in YoutubeIE._extract_urls(items['markup']))
|
||||
|
||||
return self.playlist_result(
|
||||
entries, item.get('uuid'),
|
||||
@@ -346,7 +362,7 @@ class YahooSearchIE(SearchInfoExtractor):
|
||||
|
||||
class YahooGyaOPlayerIE(InfoExtractor):
|
||||
IE_NAME = 'yahoo:gyao:player'
|
||||
_VALID_URL = r'https?://(?:gyao\.yahoo\.co\.jp/(?:player|episode/[^/]+)|streaming\.yahoo\.co\.jp/c/y)/(?P<id>\d+/v\d+/v\d+|[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
|
||||
_VALID_URL = r'https?://(?:gyao\.yahoo\.co\.jp/(?:player|episode(?:/[^/]+)?)|streaming\.yahoo\.co\.jp/c/y)/(?P<id>\d+/v\d+/v\d+|[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
|
||||
_TESTS = [{
|
||||
'url': 'https://gyao.yahoo.co.jp/player/00998/v00818/v0000000000000008564/',
|
||||
'info_dict': {
|
||||
@@ -368,6 +384,9 @@ class YahooGyaOPlayerIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://gyao.yahoo.co.jp/episode/%E3%81%8D%E3%81%AE%E3%81%86%E4%BD%95%E9%A3%9F%E3%81%B9%E3%81%9F%EF%BC%9F%20%E7%AC%AC2%E8%A9%B1%202019%2F4%2F12%E6%94%BE%E9%80%81%E5%88%86/5cb02352-b725-409e-9f8d-88f947a9f682',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://gyao.yahoo.co.jp/episode/5fa1226c-ef8d-4e93-af7a-fd92f4e30597',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_GEO_BYPASS = False
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -19,6 +19,7 @@ from .utils import (
|
||||
preferredencoding,
|
||||
write_string,
|
||||
)
|
||||
from .cookies import SUPPORTED_BROWSERS
|
||||
from .version import __version__
|
||||
|
||||
from .downloader.external import list_external_downloaders
|
||||
@@ -137,14 +138,21 @@ def parseOpts(overrideArguments=None):
|
||||
else:
|
||||
raise optparse.OptionValueError(
|
||||
'wrong %s formatting; it should be %s, not "%s"' % (opt_str, option.metavar, value))
|
||||
val = process(val) if callable(process) else val
|
||||
try:
|
||||
val = process(val) if process else val
|
||||
except Exception as err:
|
||||
raise optparse.OptionValueError(
|
||||
'wrong %s formatting; %s' % (opt_str, err))
|
||||
for key in keys:
|
||||
out_dict[key] = val
|
||||
|
||||
# No need to wrap help messages if we're on a wide console
|
||||
columns = compat_get_terminal_size().columns
|
||||
max_width = columns if columns else 80
|
||||
max_help_position = 80
|
||||
# 47% is chosen because that is how README.md is currently formatted
|
||||
# and moving help text even further to the right is undesirable.
|
||||
# This can be reduced in the future to get a prettier output
|
||||
max_help_position = int(0.47 * max_width)
|
||||
|
||||
fmt = optparse.IndentedHelpFormatter(width=max_width, max_help_position=max_help_position)
|
||||
fmt.format_option_strings = _format_option_string
|
||||
@@ -520,8 +528,12 @@ def parseOpts(overrideArguments=None):
|
||||
help="Don't give any special preference to free containers (default)")
|
||||
video_format.add_option(
|
||||
'--check-formats',
|
||||
action='store_true', dest='check_formats', default=False,
|
||||
help="Check that the formats selected are actually downloadable (Experimental)")
|
||||
action='store_true', dest='check_formats', default=None,
|
||||
help='Check that the formats selected are actually downloadable')
|
||||
video_format.add_option(
|
||||
'--no-check-formats',
|
||||
action='store_false', dest='check_formats',
|
||||
help='Do not check that the formats selected are actually downloadable')
|
||||
video_format.add_option(
|
||||
'-F', '--list-formats',
|
||||
action='store_true', dest='listformats',
|
||||
@@ -716,7 +728,8 @@ def parseOpts(overrideArguments=None):
|
||||
help=(
|
||||
'Give these arguments to the external downloader. '
|
||||
'Specify the downloader name and the arguments separated by a colon ":". '
|
||||
'You can use this option multiple times (Alias: --external-downloader-args)'))
|
||||
'You can use this option multiple times to give different arguments to different downloaders '
|
||||
'(Alias: --external-downloader-args)'))
|
||||
|
||||
workarounds = optparse.OptionGroup(parser, 'Workarounds')
|
||||
workarounds.add_option(
|
||||
@@ -1078,7 +1091,21 @@ def parseOpts(overrideArguments=None):
|
||||
filesystem.add_option(
|
||||
'--no-cookies',
|
||||
action='store_const', const=None, dest='cookiefile', metavar='FILE',
|
||||
help='Do not read/dump cookies (default)')
|
||||
help='Do not read/dump cookies from/to file (default)')
|
||||
filesystem.add_option(
|
||||
'--cookies-from-browser',
|
||||
dest='cookiesfrombrowser', metavar='BROWSER[:PROFILE]',
|
||||
help=(
|
||||
'Load cookies from a user profile of the given web browser. '
|
||||
'Currently supported browsers are: {}. '
|
||||
'You can specify the user profile name or directory using '
|
||||
'"BROWSER:PROFILE_NAME" or "BROWSER:PROFILE_PATH". '
|
||||
'If no profile is given, the most recently accessed one is used'.format(
|
||||
'|'.join(sorted(SUPPORTED_BROWSERS)))))
|
||||
filesystem.add_option(
|
||||
'--no-cookies-from-browser',
|
||||
action='store_const', const=None, dest='cookiesfrombrowser',
|
||||
help='Do not load cookies from browser (default)')
|
||||
filesystem.add_option(
|
||||
'--cache-dir', dest='cachedir', default=None, metavar='DIR',
|
||||
help='Location in the filesystem where youtube-dl can store some downloaded information (such as client ids and signatures) permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl')
|
||||
@@ -1260,6 +1287,10 @@ def parseOpts(overrideArguments=None):
|
||||
'Similar syntax to the output template can be used to pass any field as arguments to the command. '
|
||||
'An additional field "filepath" that contains the final path of the downloaded file is also available. '
|
||||
'If no fields are passed, "%(filepath)s" is appended to the end of the command'))
|
||||
postproc.add_option(
|
||||
'--exec-before-download',
|
||||
metavar='CMD', dest='exec_before_dl_cmd',
|
||||
help='Execute a command before the actual download. The syntax is the same as --exec')
|
||||
postproc.add_option(
|
||||
'--convert-subs', '--convert-sub', '--convert-subtitles',
|
||||
metavar='FORMAT', dest='convertsubtitles', default=None,
|
||||
@@ -1343,22 +1374,35 @@ def parseOpts(overrideArguments=None):
|
||||
'--no-hls-split-discontinuity',
|
||||
dest='hls_split_discontinuity', action='store_false',
|
||||
help='Do not split HLS playlists to different formats at discontinuities such as ad breaks (default)')
|
||||
_extractor_arg_parser = lambda key, vals='': (key.strip().lower(), [val.strip() for val in vals.split(',')])
|
||||
extractor.add_option(
|
||||
'--extractor-args',
|
||||
metavar='KEY:ARGS', dest='extractor_args', default={}, type='str',
|
||||
action='callback', callback=_dict_from_options_callback,
|
||||
callback_kwargs={
|
||||
'multiple_keys': False,
|
||||
'process': lambda val: dict(
|
||||
_extractor_arg_parser(*arg.split('=', 1)) for arg in val.split(';'))
|
||||
},
|
||||
help=(
|
||||
'Pass these arguments to the extractor. See "EXTRACTOR ARGUMENTS" for details. '
|
||||
'You can use this option multiple times to give arguments for different extractors'))
|
||||
extractor.add_option(
|
||||
'--youtube-include-dash-manifest', '--no-youtube-skip-dash-manifest',
|
||||
action='store_true', dest='youtube_include_dash_manifest', default=True,
|
||||
help='Download the DASH manifests and related data on YouTube videos (default) (Alias: --no-youtube-skip-dash-manifest)')
|
||||
help=optparse.SUPPRESS_HELP)
|
||||
extractor.add_option(
|
||||
'--youtube-skip-dash-manifest', '--no-youtube-include-dash-manifest',
|
||||
action='store_false', dest='youtube_include_dash_manifest',
|
||||
help='Do not download the DASH manifests and related data on YouTube videos (Alias: --no-youtube-include-dash-manifest)')
|
||||
help=optparse.SUPPRESS_HELP)
|
||||
extractor.add_option(
|
||||
'--youtube-include-hls-manifest', '--no-youtube-skip-hls-manifest',
|
||||
action='store_true', dest='youtube_include_hls_manifest', default=True,
|
||||
help='Download the HLS manifests and related data on YouTube videos (default) (Alias: --no-youtube-skip-hls-manifest)')
|
||||
help=optparse.SUPPRESS_HELP)
|
||||
extractor.add_option(
|
||||
'--youtube-skip-hls-manifest', '--no-youtube-include-hls-manifest',
|
||||
action='store_false', dest='youtube_include_hls_manifest',
|
||||
help='Do not download the HLS manifests and related data on YouTube videos (Alias: --no-youtube-include-hls-manifest)')
|
||||
help=optparse.SUPPRESS_HELP)
|
||||
|
||||
parser.add_option_group(general)
|
||||
parser.add_option_group(network)
|
||||
|
||||
@@ -51,7 +51,7 @@ class EmbedThumbnailPP(FFmpegPostProcessor):
|
||||
|
||||
try:
|
||||
size_regex = r',\s*(?P<w>\d+)x(?P<h>\d+)\s*[,\[]'
|
||||
size_result = self.run_ffmpeg(filename, filename, ['-hide_banner'])
|
||||
size_result = self.run_ffmpeg(filename, None, ['-hide_banner'], expected_retcodes=(1,))
|
||||
mobj = re.search(size_regex, size_result)
|
||||
if mobj is None:
|
||||
return guess()
|
||||
@@ -92,7 +92,7 @@ class EmbedThumbnailPP(FFmpegPostProcessor):
|
||||
# format, there will be some additional data loss.
|
||||
# PNG, on the other hand, is lossless.
|
||||
thumbnail_ext = os.path.splitext(thumbnail_filename)[1][1:]
|
||||
if thumbnail_ext not in ('jpg', 'png'):
|
||||
if thumbnail_ext not in ('jpg', 'jpeg', 'png'):
|
||||
thumbnail_filename = convertor.convert_thumbnail(thumbnail_filename, 'png')
|
||||
thumbnail_ext = 'png'
|
||||
|
||||
|
||||
@@ -28,7 +28,8 @@ class ExecAfterDownloadPP(PostProcessor):
|
||||
# If no replacements are found, replace {} for backard compatibility
|
||||
if '{}' not in cmd:
|
||||
cmd += ' {}'
|
||||
return cmd.replace('{}', compat_shlex_quote(info['filepath']))
|
||||
return cmd.replace('{}', compat_shlex_quote(
|
||||
info.get('filepath') or info['_filename']))
|
||||
|
||||
def run(self, info):
|
||||
cmd = self.parse_cmd(self.exec_cmd, info)
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import io
|
||||
import itertools
|
||||
import os
|
||||
import subprocess
|
||||
import time
|
||||
@@ -24,6 +25,7 @@ from ..utils import (
|
||||
process_communicate_or_kill,
|
||||
replace_extension,
|
||||
traverse_obj,
|
||||
variadic,
|
||||
)
|
||||
|
||||
|
||||
@@ -233,16 +235,16 @@ class FFmpegPostProcessor(PostProcessor):
|
||||
None)
|
||||
return num, len(streams)
|
||||
|
||||
def run_ffmpeg_multiple_files(self, input_paths, out_path, opts):
|
||||
def run_ffmpeg_multiple_files(self, input_paths, out_path, opts, **kwargs):
|
||||
return self.real_run_ffmpeg(
|
||||
[(path, []) for path in input_paths],
|
||||
[(out_path, opts)])
|
||||
[(out_path, opts)], **kwargs)
|
||||
|
||||
def real_run_ffmpeg(self, input_path_opts, output_path_opts):
|
||||
def real_run_ffmpeg(self, input_path_opts, output_path_opts, *, expected_retcodes=(0,)):
|
||||
self.check_version()
|
||||
|
||||
oldest_mtime = min(
|
||||
os.stat(encodeFilename(path)).st_mtime for path, _ in input_path_opts)
|
||||
os.stat(encodeFilename(path)).st_mtime for path, _ in input_path_opts if path)
|
||||
|
||||
cmd = [encodeFilename(self.executable, True), encodeArgument('-y')]
|
||||
# avconv does not have repeat option
|
||||
@@ -261,23 +263,25 @@ class FFmpegPostProcessor(PostProcessor):
|
||||
+ [encodeFilename(self._ffmpeg_filename_argument(file), True)])
|
||||
|
||||
for arg_type, path_opts in (('i', input_path_opts), ('o', output_path_opts)):
|
||||
cmd += [arg for i, o in enumerate(path_opts)
|
||||
for arg in make_args(o[0], o[1], arg_type, i + 1)]
|
||||
cmd += itertools.chain.from_iterable(
|
||||
make_args(path, list(opts), arg_type, i + 1)
|
||||
for i, (path, opts) in enumerate(path_opts) if path)
|
||||
|
||||
self.write_debug('ffmpeg command line: %s' % shell_quote(cmd))
|
||||
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
|
||||
stdout, stderr = process_communicate_or_kill(p)
|
||||
if p.returncode != 0:
|
||||
if p.returncode not in variadic(expected_retcodes):
|
||||
stderr = stderr.decode('utf-8', 'replace').strip()
|
||||
if self.get_param('verbose', False):
|
||||
self.report_error(stderr)
|
||||
raise FFmpegPostProcessorError(stderr.split('\n')[-1])
|
||||
for out_path, _ in output_path_opts:
|
||||
self.try_utime(out_path, oldest_mtime, oldest_mtime)
|
||||
if out_path:
|
||||
self.try_utime(out_path, oldest_mtime, oldest_mtime)
|
||||
return stderr.decode('utf-8', 'replace')
|
||||
|
||||
def run_ffmpeg(self, path, out_path, opts):
|
||||
return self.run_ffmpeg_multiple_files([path], out_path, opts)
|
||||
def run_ffmpeg(self, path, out_path, opts, **kwargs):
|
||||
return self.run_ffmpeg_multiple_files([path], out_path, opts, **kwargs)
|
||||
|
||||
def _ffmpeg_filename_argument(self, fn):
|
||||
# Always use 'file:' because the filename may contain ':' (ffmpeg
|
||||
@@ -526,6 +530,15 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
|
||||
|
||||
|
||||
class FFmpegMetadataPP(FFmpegPostProcessor):
|
||||
|
||||
@staticmethod
|
||||
def _options(target_ext):
|
||||
yield from ('-map', '0', '-dn')
|
||||
if target_ext == 'm4a':
|
||||
yield from ('-vn', '-acodec', 'copy')
|
||||
else:
|
||||
yield from ('-c', 'copy')
|
||||
|
||||
@PostProcessor._restrict_to(images=False)
|
||||
def run(self, info):
|
||||
metadata = {}
|
||||
@@ -533,15 +546,9 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
|
||||
def add(meta_list, info_list=None):
|
||||
if not meta_list:
|
||||
return
|
||||
if not info_list:
|
||||
info_list = meta_list
|
||||
if not isinstance(meta_list, (list, tuple)):
|
||||
meta_list = (meta_list,)
|
||||
if not isinstance(info_list, (list, tuple)):
|
||||
info_list = (info_list,)
|
||||
for info_f in info_list:
|
||||
for info_f in variadic(info_list or meta_list):
|
||||
if isinstance(info.get(info_f), (compat_str, compat_numeric_types)):
|
||||
for meta_f in meta_list:
|
||||
for meta_f in variadic(meta_list):
|
||||
metadata[meta_f] = info[info_f]
|
||||
break
|
||||
|
||||
@@ -570,22 +577,17 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
|
||||
for key in filter(lambda k: k.startswith(prefix), info.keys()):
|
||||
add(key[len(prefix):], key)
|
||||
|
||||
if not metadata:
|
||||
self.to_screen('There isn\'t any metadata to add')
|
||||
return [], info
|
||||
filename, metadata_filename = info['filepath'], None
|
||||
options = [('-metadata', f'{name}={value}') for name, value in metadata.items()]
|
||||
|
||||
filename = info['filepath']
|
||||
temp_filename = prepend_extension(filename, 'temp')
|
||||
in_filenames = [filename]
|
||||
options = ['-map', '0', '-dn']
|
||||
|
||||
if info['ext'] == 'm4a':
|
||||
options.extend(['-vn', '-acodec', 'copy'])
|
||||
else:
|
||||
options.extend(['-c', 'copy'])
|
||||
|
||||
for name, value in metadata.items():
|
||||
options.extend(['-metadata', '%s=%s' % (name, value)])
|
||||
stream_idx = 0
|
||||
for fmt in info.get('requested_formats') or []:
|
||||
stream_count = 2 if 'none' not in (fmt.get('vcodec'), fmt.get('acodec')) else 1
|
||||
if fmt.get('language'):
|
||||
lang = ISO639Utils.short2long(fmt['language']) or fmt['language']
|
||||
options.extend(('-metadata:s:%d' % (stream_idx + i), 'language=%s' % lang)
|
||||
for i in range(stream_count))
|
||||
stream_idx += stream_count
|
||||
|
||||
chapters = info.get('chapters', [])
|
||||
if chapters:
|
||||
@@ -603,24 +605,29 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
|
||||
if chapter_title:
|
||||
metadata_file_content += 'title=%s\n' % ffmpeg_escape(chapter_title)
|
||||
f.write(metadata_file_content)
|
||||
in_filenames.append(metadata_filename)
|
||||
options.extend(['-map_metadata', '1'])
|
||||
options.append(('-map_metadata', '1'))
|
||||
|
||||
if ('no-attach-info-json' not in self.get_param('compat_opts', [])
|
||||
and '__infojson_filename' in info and info['ext'] in ('mkv', 'mka')):
|
||||
old_stream, new_stream = self.get_stream_number(
|
||||
filename, ('tags', 'mimetype'), 'application/json')
|
||||
old_stream, new_stream = self.get_stream_number(filename, ('tags', 'mimetype'), 'application/json')
|
||||
if old_stream is not None:
|
||||
options.extend(['-map', '-0:%d' % old_stream])
|
||||
options.append(('-map', '-0:%d' % old_stream))
|
||||
new_stream -= 1
|
||||
|
||||
options.extend([
|
||||
options.append((
|
||||
'-attach', info['__infojson_filename'],
|
||||
'-metadata:s:%d' % new_stream, 'mimetype=application/json'
|
||||
])
|
||||
))
|
||||
|
||||
self.to_screen('Adding metadata to \'%s\'' % filename)
|
||||
self.run_ffmpeg_multiple_files(in_filenames, temp_filename, options)
|
||||
if not options:
|
||||
self.to_screen('There isn\'t any metadata to add')
|
||||
return [], info
|
||||
|
||||
temp_filename = prepend_extension(filename, 'temp')
|
||||
self.to_screen('Adding metadata to "%s"' % filename)
|
||||
self.run_ffmpeg_multiple_files(
|
||||
(filename, metadata_filename), temp_filename,
|
||||
itertools.chain(self._options(info['ext']), *options))
|
||||
if chapters:
|
||||
os.remove(metadata_filename)
|
||||
os.remove(encodeFilename(filename))
|
||||
@@ -896,6 +903,8 @@ class FFmpegThumbnailsConvertorPP(FFmpegPostProcessor):
|
||||
_, thumbnail_ext = os.path.splitext(original_thumbnail)
|
||||
if thumbnail_ext:
|
||||
thumbnail_ext = thumbnail_ext[1:].lower()
|
||||
if thumbnail_ext == 'jpeg':
|
||||
thumbnail_ext = 'jpg'
|
||||
if thumbnail_ext == self.format:
|
||||
self.to_screen('Thumbnail "%s" is already in the requested format' % original_thumbnail)
|
||||
continue
|
||||
|
||||
@@ -27,7 +27,7 @@ class MetadataFromFieldPP(PostProcessor):
|
||||
|
||||
@staticmethod
|
||||
def field_to_template(tmpl):
|
||||
if re.match(r'\w+$', tmpl):
|
||||
if re.match(r'[a-zA-Z_]+$', tmpl):
|
||||
return '%%(%s)s' % tmpl
|
||||
return tmpl
|
||||
|
||||
@@ -63,7 +63,7 @@ class MetadataFromFieldPP(PostProcessor):
|
||||
continue
|
||||
for attribute, value in match.groupdict().items():
|
||||
info[attribute] = value
|
||||
self.to_screen('parsed %s from "%s": %s' % (attribute, dictn['in'], value if value is not None else 'NA'))
|
||||
self.to_screen('parsed %s from "%s": %s' % (attribute, dictn['tmpl'], value if value is not None else 'NA'))
|
||||
return [], info
|
||||
|
||||
|
||||
|
||||
128
yt_dlp/utils.py
128
yt_dlp/utils.py
@@ -3964,7 +3964,7 @@ def detect_exe_version(output, version_re=None, unrecognized='present'):
|
||||
return unrecognized
|
||||
|
||||
|
||||
class LazyList(collections.Sequence):
|
||||
class LazyList(collections.abc.Sequence):
|
||||
''' Lazy immutable list from an iterable
|
||||
Note that slices of a LazyList are lists and not LazyList'''
|
||||
|
||||
@@ -3976,20 +3976,23 @@ class LazyList(collections.Sequence):
|
||||
def __iter__(self):
|
||||
if self.__reversed:
|
||||
# We need to consume the entire iterable to iterate in reverse
|
||||
yield from self.exhaust()[::-1]
|
||||
yield from self.exhaust()
|
||||
return
|
||||
yield from self.__cache
|
||||
for item in self.__iterable:
|
||||
self.__cache.append(item)
|
||||
yield item
|
||||
|
||||
def exhaust(self):
|
||||
''' Evaluate the entire iterable '''
|
||||
def __exhaust(self):
|
||||
self.__cache.extend(self.__iterable)
|
||||
return self.__cache
|
||||
|
||||
def exhaust(self):
|
||||
''' Evaluate the entire iterable '''
|
||||
return self.__exhaust()[::-1 if self.__reversed else 1]
|
||||
|
||||
@staticmethod
|
||||
def _reverse_index(x):
|
||||
def __reverse_index(x):
|
||||
return -(x + 1)
|
||||
|
||||
def __getitem__(self, idx):
|
||||
@@ -3998,18 +4001,18 @@ class LazyList(collections.Sequence):
|
||||
start = idx.start if idx.start is not None else 0 if step > 0 else -1
|
||||
stop = idx.stop if idx.stop is not None else -1 if step > 0 else 0
|
||||
if self.__reversed:
|
||||
start, stop, step = map(self._reverse_index, (start, stop, step))
|
||||
(start, stop), step = map(self.__reverse_index, (start, stop)), -step
|
||||
idx = slice(start, stop, step)
|
||||
elif isinstance(idx, int):
|
||||
if self.__reversed:
|
||||
idx = self._reverse_index(idx)
|
||||
idx = self.__reverse_index(idx)
|
||||
start = stop = idx
|
||||
else:
|
||||
raise TypeError('indices must be integers or slices')
|
||||
if start < 0 or stop < 0:
|
||||
# We need to consume the entire iterable to be able to slice from the end
|
||||
# Obviously, never use this with infinite iterables
|
||||
return self.exhaust()[idx]
|
||||
return self.__exhaust()[idx]
|
||||
|
||||
n = max(start, stop) - len(self.__cache) + 1
|
||||
if n > 0:
|
||||
@@ -4027,7 +4030,7 @@ class LazyList(collections.Sequence):
|
||||
self.exhaust()
|
||||
return len(self.__cache)
|
||||
|
||||
def __reversed__(self):
|
||||
def reverse(self):
|
||||
self.__reversed = not self.__reversed
|
||||
return self
|
||||
|
||||
@@ -4286,9 +4289,7 @@ def dict_get(d, key_or_keys, default=None, skip_false_values=True):
|
||||
|
||||
|
||||
def try_get(src, getter, expected_type=None):
|
||||
if not isinstance(getter, (list, tuple)):
|
||||
getter = [getter]
|
||||
for get in getter:
|
||||
for get in variadic(getter):
|
||||
try:
|
||||
v = get(src)
|
||||
except (AttributeError, KeyError, TypeError, IndexError):
|
||||
@@ -4364,7 +4365,7 @@ def strip_jsonp(code):
|
||||
|
||||
def js_to_json(code, vars={}):
|
||||
# vars is a dict of var, val pairs to substitute
|
||||
COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*'
|
||||
COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*\n'
|
||||
SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
|
||||
INTEGER_TABLE = (
|
||||
(r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
|
||||
@@ -4961,11 +4962,9 @@ def cli_configuration_args(argdict, keys, default=[], use_compat=True):
|
||||
|
||||
assert isinstance(keys, (list, tuple))
|
||||
for key_list in keys:
|
||||
if isinstance(key_list, compat_str):
|
||||
key_list = (key_list,)
|
||||
arg_list = list(filter(
|
||||
lambda x: x is not None,
|
||||
[argdict.get(key.lower()) for key in key_list]))
|
||||
[argdict.get(key.lower()) for key in variadic(key_list)]))
|
||||
if arg_list:
|
||||
return [arg for args in arg_list for arg in args]
|
||||
return default
|
||||
@@ -6225,40 +6224,93 @@ def load_plugins(name, suffix, namespace):
|
||||
return classes
|
||||
|
||||
|
||||
def traverse_obj(obj, keys, *, casesense=True, is_user_input=False, traverse_string=False):
|
||||
def traverse_obj(
|
||||
obj, *path_list, default=None, expected_type=None, get_all=True,
|
||||
casesense=True, is_user_input=False, traverse_string=False):
|
||||
''' Traverse nested list/dict/tuple
|
||||
@param path_list A list of paths which are checked one by one.
|
||||
Each path is a list of keys where each key is a string,
|
||||
a tuple of strings or "...". When a tuple is given,
|
||||
all the keys given in the tuple are traversed, and
|
||||
"..." traverses all the keys in the object
|
||||
@param default Default value to return
|
||||
@param expected_type Only accept final value of this type (Can also be any callable)
|
||||
@param get_all Return all the values obtained from a path or only the first one
|
||||
@param casesense Whether to consider dictionary keys as case sensitive
|
||||
@param is_user_input Whether the keys are generated from user input. If True,
|
||||
strings are converted to int/slice if necessary
|
||||
@param traverse_string Whether to traverse inside strings. If True, any
|
||||
non-compatible object will also be converted into a string
|
||||
# TODO: Write tests
|
||||
'''
|
||||
keys = list(keys)[::-1]
|
||||
while keys:
|
||||
key = keys.pop()
|
||||
if isinstance(obj, dict):
|
||||
assert isinstance(key, compat_str)
|
||||
if not casesense:
|
||||
obj = {k.lower(): v for k, v in obj.items()}
|
||||
key = key.lower()
|
||||
obj = obj.get(key)
|
||||
else:
|
||||
if is_user_input:
|
||||
key = (int_or_none(key) if ':' not in key
|
||||
else slice(*map(int_or_none, key.split(':'))))
|
||||
if key is None:
|
||||
if not casesense:
|
||||
_lower = lambda k: k.lower() if isinstance(k, str) else k
|
||||
path_list = (map(_lower, variadic(path)) for path in path_list)
|
||||
|
||||
def _traverse_obj(obj, path, _current_depth=0):
|
||||
nonlocal depth
|
||||
path = tuple(variadic(path))
|
||||
for i, key in enumerate(path):
|
||||
if isinstance(key, (list, tuple)):
|
||||
obj = [_traverse_obj(obj, sub_key, _current_depth) for sub_key in key]
|
||||
key = ...
|
||||
if key is ...:
|
||||
obj = (obj.values() if isinstance(obj, dict)
|
||||
else obj if isinstance(obj, (list, tuple, LazyList))
|
||||
else str(obj) if traverse_string else [])
|
||||
_current_depth += 1
|
||||
depth = max(depth, _current_depth)
|
||||
return [_traverse_obj(inner_obj, path[i + 1:], _current_depth) for inner_obj in obj]
|
||||
elif isinstance(obj, dict):
|
||||
obj = (obj.get(key) if casesense or (key in obj)
|
||||
else next((v for k, v in obj.items() if _lower(k) == key), None))
|
||||
else:
|
||||
if is_user_input:
|
||||
key = (int_or_none(key) if ':' not in key
|
||||
else slice(*map(int_or_none, key.split(':'))))
|
||||
if key == slice(None):
|
||||
return _traverse_obj(obj, (..., *path[i + 1:]))
|
||||
if not isinstance(key, (int, slice)):
|
||||
return None
|
||||
if not isinstance(obj, (list, tuple)):
|
||||
if traverse_string:
|
||||
obj = compat_str(obj)
|
||||
else:
|
||||
if not isinstance(obj, (list, tuple, LazyList)):
|
||||
if not traverse_string:
|
||||
return None
|
||||
obj = str(obj)
|
||||
try:
|
||||
obj = obj[key]
|
||||
except IndexError:
|
||||
return None
|
||||
assert isinstance(key, (int, slice))
|
||||
obj = try_get(obj, lambda x: x[key])
|
||||
return obj
|
||||
return obj
|
||||
|
||||
if isinstance(expected_type, type):
|
||||
type_test = lambda val: val if isinstance(val, expected_type) else None
|
||||
elif expected_type is not None:
|
||||
type_test = expected_type
|
||||
else:
|
||||
type_test = lambda val: val
|
||||
|
||||
for path in path_list:
|
||||
depth = 0
|
||||
val = _traverse_obj(obj, path)
|
||||
if val is not None:
|
||||
if depth:
|
||||
for _ in range(depth - 1):
|
||||
val = itertools.chain.from_iterable(v for v in val if v is not None)
|
||||
val = [v for v in map(type_test, val) if v is not None]
|
||||
if val:
|
||||
return val if get_all else val[0]
|
||||
else:
|
||||
val = type_test(val)
|
||||
if val is not None:
|
||||
return val
|
||||
return default
|
||||
|
||||
|
||||
def traverse_dict(dictn, keys, casesense=True):
|
||||
''' For backward compatibility. Do not use '''
|
||||
return traverse_obj(dictn, keys, casesense=casesense,
|
||||
is_user_input=True, traverse_string=True)
|
||||
|
||||
|
||||
def variadic(x, allowed_types=(str, bytes)):
|
||||
return x if isinstance(x, collections.abc.Iterable) and not isinstance(x, allowed_types) else (x,)
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = '2021.06.09'
|
||||
__version__ = '2021.07.21'
|
||||
|
||||
@@ -120,12 +120,11 @@ def _format_ts(ts):
|
||||
Convert an MPEG PES timestamp into a WebVTT timestamp.
|
||||
This will lose sub-millisecond precision.
|
||||
"""
|
||||
|
||||
ts = int((ts + 45) // 90)
|
||||
ms , ts = divmod(ts, 1000) # noqa: W504,E221,E222,E203
|
||||
s , ts = divmod(ts, 60) # noqa: W504,E221,E222,E203
|
||||
min, h = divmod(ts, 60) # noqa: W504,E221,E222
|
||||
return '%02u:%02u:%02u.%03u' % (h, min, s, ms)
|
||||
msec = int((ts + 45) // 90)
|
||||
secs, msec = divmod(msec, 1000)
|
||||
mins, secs = divmod(secs, 60)
|
||||
hrs, mins = divmod(mins, 60)
|
||||
return '%02u:%02u:%02u.%03u' % (hrs, mins, secs, msec)
|
||||
|
||||
|
||||
class Block(object):
|
||||
|
||||
Reference in New Issue
Block a user