mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-12-08 15:12:47 +01:00
Compare commits
236 Commits
2021.06.09
...
2021.08.10
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
418964fa91 | ||
|
|
c196640ff1 | ||
|
|
60c8fc73c6 | ||
|
|
bc8745480e | ||
|
|
ff5e16f2f6 | ||
|
|
be2fc5b212 | ||
|
|
7be9ccff0b | ||
|
|
245d43cacf | ||
|
|
246fb276e0 | ||
|
|
6e6e0d95b3 | ||
|
|
25a3f4f5d6 | ||
|
|
ad3dc496bb | ||
|
|
2831b4686c | ||
|
|
8c0ae192a4 | ||
|
|
e9f4ccd19e | ||
|
|
a38bd1defa | ||
|
|
476febeb3a | ||
|
|
b6a35ad83b | ||
|
|
bfd56b74b9 | ||
|
|
858a65ecc1 | ||
|
|
3b34e38813 | ||
|
|
3448870205 | ||
|
|
b868936cd6 | ||
|
|
c681cb5d93 | ||
|
|
379e44ed3c | ||
|
|
243c57cfe8 | ||
|
|
28f436bad0 | ||
|
|
2b8a2973bd | ||
|
|
b7b04c782e | ||
|
|
6e84b21559 | ||
|
|
575e17a1b9 | ||
|
|
57015a4a3f | ||
|
|
9cc1a3130a | ||
|
|
b51d2ae3ca | ||
|
|
fee5f0c909 | ||
|
|
7bb6434767 | ||
|
|
124bc071ee | ||
|
|
a047eeb6d2 | ||
|
|
77b87f0519 | ||
|
|
678da2f21b | ||
|
|
cc3fa8d39d | ||
|
|
89efdc15dd | ||
|
|
8012d892bd | ||
|
|
9d65e7bd6d | ||
|
|
36576d7c4c | ||
|
|
bb36a55c41 | ||
|
|
3dbb2a9dcb | ||
|
|
9997eee4af | ||
|
|
3e376d183e | ||
|
|
888299e6ca | ||
|
|
c31be5b009 | ||
|
|
e5611e8eda | ||
|
|
8e6cc12c80 | ||
|
|
e980017ac8 | ||
|
|
e9d9efc0f2 | ||
|
|
6ccf351a87 | ||
|
|
28dff70b51 | ||
|
|
1aebc0f79e | ||
|
|
cf87314d4e | ||
|
|
1bd3639f69 | ||
|
|
68f5867cf0 | ||
|
|
605cad0be7 | ||
|
|
0855702f3f | ||
|
|
e8384376c0 | ||
|
|
e7e94f2a5c | ||
|
|
a46a815b05 | ||
|
|
96fccc101f | ||
|
|
dbf5416a20 | ||
|
|
d74a58a186 | ||
|
|
f5510afef0 | ||
|
|
e4f0275711 | ||
|
|
e0f2b4b47d | ||
|
|
eca330cb88 | ||
|
|
d24734daea | ||
|
|
d9e6e9481e | ||
|
|
3619f78d2c | ||
|
|
65c2fde23f | ||
|
|
000c15a4ca | ||
|
|
9275f62cf8 | ||
|
|
6552469433 | ||
|
|
11cc45718c | ||
|
|
fe07e2c69f | ||
|
|
89ce723edd | ||
|
|
45d1f15725 | ||
|
|
a318f59d14 | ||
|
|
7d1eb38af1 | ||
|
|
901130bbcf | ||
|
|
c0bc527bca | ||
|
|
2a9c6dcd22 | ||
|
|
5a1fc62b41 | ||
|
|
b4c055bac2 | ||
|
|
ea05b3020d | ||
|
|
9536bc072d | ||
|
|
8242bf220d | ||
|
|
4bfa401d40 | ||
|
|
0222620725 | ||
|
|
1fe3c4c27e | ||
|
|
f703a88055 | ||
|
|
a353beba83 | ||
|
|
052e135029 | ||
|
|
cb89cfc14b | ||
|
|
060ac76257 | ||
|
|
063c409dfb | ||
|
|
767b02a99b | ||
|
|
f45e6c1126 | ||
|
|
3944e7af92 | ||
|
|
ad34b2951e | ||
|
|
c8fa48fd94 | ||
|
|
2fd226f6a7 | ||
|
|
3ba7740dd8 | ||
|
|
29b208f6f9 | ||
|
|
e4d666d27b | ||
|
|
245524e6a3 | ||
|
|
9c0d7f4951 | ||
|
|
e37d0efbd9 | ||
|
|
c926c9541f | ||
|
|
982ee69a74 | ||
|
|
7ea6541124 | ||
|
|
ae30b84072 | ||
|
|
cc9d1493c6 | ||
|
|
f6755419d1 | ||
|
|
145bd631c5 | ||
|
|
b35496d825 | ||
|
|
352d63fdb5 | ||
|
|
11f9be0912 | ||
|
|
c84aeac6b5 | ||
|
|
50fed816dd | ||
|
|
a1a7907bc0 | ||
|
|
d61fc64618 | ||
|
|
6586bca9b9 | ||
|
|
da503b7a52 | ||
|
|
7c365c2109 | ||
|
|
3f698246b2 | ||
|
|
cca80fe611 | ||
|
|
c634ad2a3c | ||
|
|
8f3343809e | ||
|
|
0ba692acc8 | ||
|
|
d9488f69c1 | ||
|
|
dce8743677 | ||
|
|
5520aa2dc9 | ||
|
|
8d9b902243 | ||
|
|
fe93e2c4cf | ||
|
|
314ee30548 | ||
|
|
34917076ad | ||
|
|
ccc7795ca3 | ||
|
|
da1c94ee45 | ||
|
|
3b297919e0 | ||
|
|
47193e0298 | ||
|
|
49bd8c66d3 | ||
|
|
182b6ae8a6 | ||
|
|
c843e68588 | ||
|
|
198f7ea89e | ||
|
|
c888ffb95a | ||
|
|
9752433221 | ||
|
|
f0ff9979c6 | ||
|
|
501dd1ad55 | ||
|
|
75722b037d | ||
|
|
2d6659b9ea | ||
|
|
c5370857b3 | ||
|
|
00034c146a | ||
|
|
325ebc1703 | ||
|
|
7dde84f3c9 | ||
|
|
6606817a86 | ||
|
|
73d829c144 | ||
|
|
60bdb7bd9e | ||
|
|
4bb6b02f93 | ||
|
|
b5ac45b197 | ||
|
|
38a40c9e16 | ||
|
|
a8bf9b4dc1 | ||
|
|
51f8a31d65 | ||
|
|
be05d5cff1 | ||
|
|
30d569d2ac | ||
|
|
08625e4125 | ||
|
|
3acf6d3856 | ||
|
|
46890374f7 | ||
|
|
60755938b3 | ||
|
|
723d44b92b | ||
|
|
bc97cdae67 | ||
|
|
e010672ab5 | ||
|
|
169dbde946 | ||
|
|
17f0eb66b8 | ||
|
|
981052c9c6 | ||
|
|
b1e60d1806 | ||
|
|
6b6c16ca6c | ||
|
|
f6745c4980 | ||
|
|
109dd3b237 | ||
|
|
c2603313b1 | ||
|
|
1e79316e20 | ||
|
|
45261e063b | ||
|
|
49c258e18d | ||
|
|
d3f62c1967 | ||
|
|
5d3a0e794b | ||
|
|
125728b038 | ||
|
|
15a4fd53d3 | ||
|
|
4513a41a72 | ||
|
|
6033d9808d | ||
|
|
bd4d1ea398 | ||
|
|
8e897ed283 | ||
|
|
412cce82b0 | ||
|
|
d534c4520b | ||
|
|
2b18a8c590 | ||
|
|
dac8b87b0c | ||
|
|
6aecd87106 | ||
|
|
ed807c1837 | ||
|
|
29f63c9672 | ||
|
|
9fc0de5796 | ||
|
|
c60ee3a218 | ||
|
|
8a77e5e6bc | ||
|
|
51d9739f80 | ||
|
|
4c7853de14 | ||
|
|
e6779b9400 | ||
|
|
e36d50c5dd | ||
|
|
ff0f78e1fe | ||
|
|
7e067091e8 | ||
|
|
f89b3e2d7a | ||
|
|
fd7cfb6444 | ||
|
|
4e6767b5f2 | ||
|
|
9fea350f0d | ||
|
|
e858a9d6d3 | ||
|
|
7e87e27c52 | ||
|
|
d0fb4bd16f | ||
|
|
3fd4c2a543 | ||
|
|
cdb19aa4c2 | ||
|
|
4d85fbbdbb | ||
|
|
551f93885e | ||
|
|
8326b00aab | ||
|
|
b0249bcaf0 | ||
|
|
21cd8fae49 | ||
|
|
45db527fa6 | ||
|
|
28419ca2c8 | ||
|
|
8ba8714880 | ||
|
|
187986a857 | ||
|
|
4ba001080f | ||
|
|
1974e99f4b | ||
|
|
0181adefc6 | ||
|
|
fd3c633d26 |
3
.gitattributes
vendored
3
.gitattributes
vendored
@@ -1 +1,4 @@
|
||||
* text=auto
|
||||
|
||||
Makefile* text whitespace=-tab-in-indent
|
||||
*.sh text eol=lf
|
||||
|
||||
13
.github/FUNDING.yml
vendored
Normal file
13
.github/FUNDING.yml
vendored
Normal file
@@ -0,0 +1,13 @@
|
||||
# These are supported funding model platforms
|
||||
|
||||
github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2]
|
||||
patreon: # Replace with a single Patreon username
|
||||
open_collective: # Replace with a single Open Collective username
|
||||
ko_fi: # Replace with a single Ko-fi username
|
||||
tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
|
||||
community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
|
||||
liberapay: # Replace with a single Liberapay username
|
||||
issuehunt: # Replace with a single IssueHunt username
|
||||
otechie: # Replace with a single Otechie username
|
||||
|
||||
custom: ['https://github.com/yt-dlp/yt-dlp/blob/master/Collaborators.md#collaborators']
|
||||
8
.github/ISSUE_TEMPLATE/1_broken_site.md
vendored
8
.github/ISSUE_TEMPLATE/1_broken_site.md
vendored
@@ -21,7 +21,7 @@ assignees: ''
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp:
|
||||
- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is 2021.06.08. If it's not, see https://github.com/yt-dlp/yt-dlp on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is 2021.08.02. If it's not, see https://github.com/yt-dlp/yt-dlp on how to update. Issues with outdated version will be REJECTED.
|
||||
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
||||
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in https://github.com/yt-dlp/yt-dlp.
|
||||
- Search the bugtracker for similar issues: https://github.com/yt-dlp/yt-dlp. DO NOT post duplicates.
|
||||
@@ -29,7 +29,7 @@ Carefully read and work through this check list in order to prevent the most com
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a broken site support
|
||||
- [ ] I've verified that I'm running yt-dlp version **2021.06.08**
|
||||
- [ ] I've verified that I'm running yt-dlp version **2021.08.02**
|
||||
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
||||
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
|
||||
- [ ] I've searched the bugtracker for similar issues including closed ones
|
||||
@@ -42,9 +42,9 @@ Provide the complete verbose output of yt-dlp that clearly demonstrates the prob
|
||||
Add the `-v` flag to your command line you run yt-dlp with (`yt-dlp -v <your command line>`), copy the WHOLE output and insert it below. It should look similar to this:
|
||||
[debug] System config: []
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKc']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] yt-dlp version 2021.06.08
|
||||
[debug] yt-dlp version 2021.08.02
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
||||
@@ -21,7 +21,7 @@ assignees: ''
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp:
|
||||
- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is 2021.06.08. If it's not, see https://github.com/yt-dlp/yt-dlp on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is 2021.08.02. If it's not, see https://github.com/yt-dlp/yt-dlp on how to update. Issues with outdated version will be REJECTED.
|
||||
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
||||
- Make sure that site you are requesting is not dedicated to copyright infringement, see https://github.com/yt-dlp/yt-dlp. yt-dlp does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights.
|
||||
- Search the bugtracker for similar site support requests: https://github.com/yt-dlp/yt-dlp. DO NOT post duplicates.
|
||||
@@ -29,7 +29,7 @@ Carefully read and work through this check list in order to prevent the most com
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a new site support request
|
||||
- [ ] I've verified that I'm running yt-dlp version **2021.06.08**
|
||||
- [ ] I've verified that I'm running yt-dlp version **2021.08.02**
|
||||
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
||||
- [ ] I've checked that none of provided URLs violate any copyrights
|
||||
- [ ] I've searched the bugtracker for similar site support requests including closed ones
|
||||
|
||||
@@ -21,13 +21,13 @@ assignees: ''
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp:
|
||||
- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is 2021.06.08. If it's not, see https://github.com/yt-dlp/yt-dlp on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is 2021.08.02. If it's not, see https://github.com/yt-dlp/yt-dlp on how to update. Issues with outdated version will be REJECTED.
|
||||
- Search the bugtracker for similar site feature requests: https://github.com/yt-dlp/yt-dlp. DO NOT post duplicates.
|
||||
- Finally, put x into all relevant boxes like this [x] (Dont forget to delete the empty space)
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a site feature request
|
||||
- [ ] I've verified that I'm running yt-dlp version **2021.06.08**
|
||||
- [ ] I've verified that I'm running yt-dlp version **2021.08.02**
|
||||
- [ ] I've searched the bugtracker for similar site feature requests including closed ones
|
||||
|
||||
|
||||
|
||||
8
.github/ISSUE_TEMPLATE/4_bug_report.md
vendored
8
.github/ISSUE_TEMPLATE/4_bug_report.md
vendored
@@ -21,7 +21,7 @@ assignees: ''
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp:
|
||||
- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is 2021.06.08. If it's not, see https://github.com/yt-dlp/yt-dlp on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is 2021.08.02. If it's not, see https://github.com/yt-dlp/yt-dlp on how to update. Issues with outdated version will be REJECTED.
|
||||
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
||||
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in https://github.com/yt-dlp/yt-dlp.
|
||||
- Search the bugtracker for similar issues: https://github.com/yt-dlp/yt-dlp. DO NOT post duplicates.
|
||||
@@ -30,7 +30,7 @@ Carefully read and work through this check list in order to prevent the most com
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a broken site support issue
|
||||
- [ ] I've verified that I'm running yt-dlp version **2021.06.08**
|
||||
- [ ] I've verified that I'm running yt-dlp version **2021.08.02**
|
||||
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
||||
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
|
||||
- [ ] I've searched the bugtracker for similar bug reports including closed ones
|
||||
@@ -44,9 +44,9 @@ Provide the complete verbose output of yt-dlp that clearly demonstrates the prob
|
||||
Add the `-v` flag to your command line you run yt-dlp with (`yt-dlp -v <your command line>`), copy the WHOLE output and insert it below. It should look similar to this:
|
||||
[debug] System config: []
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKc']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] yt-dlp version 2021.06.08
|
||||
[debug] yt-dlp version 2021.08.02
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
||||
4
.github/ISSUE_TEMPLATE/5_feature_request.md
vendored
4
.github/ISSUE_TEMPLATE/5_feature_request.md
vendored
@@ -21,13 +21,13 @@ assignees: ''
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp:
|
||||
- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is 2021.06.08. If it's not, see https://github.com/yt-dlp/yt-dlp on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is 2021.08.02. If it's not, see https://github.com/yt-dlp/yt-dlp on how to update. Issues with outdated version will be REJECTED.
|
||||
- Search the bugtracker for similar feature requests: https://github.com/yt-dlp/yt-dlp. DO NOT post duplicates.
|
||||
- Finally, put x into all relevant boxes like this [x] (Dont forget to delete the empty space)
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a feature request
|
||||
- [ ] I've verified that I'm running yt-dlp version **2021.06.08**
|
||||
- [ ] I've verified that I'm running yt-dlp version **2021.08.02**
|
||||
- [ ] I've searched the bugtracker for similar feature requests including closed ones
|
||||
|
||||
|
||||
|
||||
2
.github/ISSUE_TEMPLATE_tmpl/1_broken_site.md
vendored
2
.github/ISSUE_TEMPLATE_tmpl/1_broken_site.md
vendored
@@ -42,7 +42,7 @@ Provide the complete verbose output of yt-dlp that clearly demonstrates the prob
|
||||
Add the `-v` flag to your command line you run yt-dlp with (`yt-dlp -v <your command line>`), copy the WHOLE output and insert it below. It should look similar to this:
|
||||
[debug] System config: []
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKc']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] yt-dlp version %(version)s
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
|
||||
2
.github/ISSUE_TEMPLATE_tmpl/4_bug_report.md
vendored
2
.github/ISSUE_TEMPLATE_tmpl/4_bug_report.md
vendored
@@ -44,7 +44,7 @@ Provide the complete verbose output of yt-dlp that clearly demonstrates the prob
|
||||
Add the `-v` flag to your command line you run yt-dlp with (`yt-dlp -v <your command line>`), copy the WHOLE output and insert it below. It should look similar to this:
|
||||
[debug] System config: []
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKc']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] yt-dlp version %(version)s
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
|
||||
31
.github/banner.svg
vendored
Normal file
31
.github/banner.svg
vendored
Normal file
File diff suppressed because one or more lines are too long
|
After Width: | Height: | Size: 24 KiB |
17
.github/workflows/build.yml
vendored
17
.github/workflows/build.yml
vendored
@@ -95,14 +95,16 @@ jobs:
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Set up Python
|
||||
# 3.8 is used for Win7 support
|
||||
- name: Set up Python 3.8
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: '3.8'
|
||||
- name: Upgrade pip and enable wheel support
|
||||
run: python -m pip install --upgrade pip setuptools wheel
|
||||
- name: Install Requirements
|
||||
run: pip install pyinstaller mutagen pycryptodome
|
||||
# Custom pyinstaller built with https://github.com/yt-dlp/pyinstaller-builds
|
||||
run: pip install "https://yt-dlp.github.io/pyinstaller-builds/x86_64/pyinstaller-4.5.1-py3-none-any.whl" mutagen pycryptodome websockets
|
||||
- name: Bump version
|
||||
id: bump_version
|
||||
run: python devscripts/update-version.py
|
||||
@@ -137,15 +139,16 @@ jobs:
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Set up Python 3.4.4 32-Bit
|
||||
# 3.7 is used for Vista support. See https://github.com/yt-dlp/yt-dlp/issues/390
|
||||
- name: Set up Python 3.7 32-Bit
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: '3.4.4'
|
||||
python-version: '3.7'
|
||||
architecture: 'x86'
|
||||
- name: Upgrade pip and enable wheel support
|
||||
run: python -m pip install pip==19.1.1 setuptools==43.0.0 wheel==0.33.6
|
||||
- name: Install Requirements for 32 Bit
|
||||
run: pip install pyinstaller==3.5 mutagen==1.42.0 pycryptodome==3.9.4 pefile==2019.4.18
|
||||
run: python -m pip install --upgrade pip setuptools wheel
|
||||
- name: Install Requirements
|
||||
run: pip install "https://yt-dlp.github.io/pyinstaller-builds/i686/pyinstaller-4.5.1-py3-none-any.whl" mutagen pycryptodome websockets
|
||||
- name: Bump version
|
||||
id: bump_version
|
||||
run: python devscripts/update-version.py
|
||||
|
||||
14
.github/workflows/core.yml
vendored
14
.github/workflows/core.yml
vendored
@@ -9,11 +9,13 @@ jobs:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [ubuntu-18.04]
|
||||
python-version: [3.6, 3.7, 3.8, 3.9, pypy-3.6, pypy-3.7]
|
||||
# py3.9 is in quick-test
|
||||
python-version: [3.7, 3.8, 3.10-dev, pypy-3.6, pypy-3.7]
|
||||
run-tests-ext: [sh]
|
||||
include:
|
||||
# atleast one of the tests must be in windows
|
||||
- os: windows-latest
|
||||
python-version: 3.4 # Windows x86 build is still in 3.4
|
||||
python-version: 3.6
|
||||
run-tests-ext: bat
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
@@ -21,11 +23,9 @@ jobs:
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Install nose
|
||||
run: pip install nose
|
||||
- name: Install pytest
|
||||
run: pip install pytest
|
||||
- name: Run tests
|
||||
continue-on-error: False
|
||||
env:
|
||||
YTDL_TEST_SET: core
|
||||
run: ./devscripts/run_tests.${{ matrix.run-tests-ext }}
|
||||
run: ./devscripts/run_tests.${{ matrix.run-tests-ext }} core
|
||||
# Linter is in quick-test
|
||||
|
||||
12
.github/workflows/download.yml
vendored
12
.github/workflows/download.yml
vendored
@@ -9,11 +9,11 @@ jobs:
|
||||
fail-fast: true
|
||||
matrix:
|
||||
os: [ubuntu-18.04]
|
||||
python-version: [3.6, 3.7, 3.8, 3.9, pypy-3.6, pypy-3.7]
|
||||
python-version: [3.7, 3.8, 3.9, 3.10-dev, pypy-3.6, pypy-3.7]
|
||||
run-tests-ext: [sh]
|
||||
include:
|
||||
- os: windows-latest
|
||||
python-version: 3.4 # Windows x86 build is still in 3.4
|
||||
python-version: 3.6
|
||||
run-tests-ext: bat
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
@@ -21,10 +21,8 @@ jobs:
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Install nose
|
||||
run: pip install nose
|
||||
- name: Install pytest
|
||||
run: pip install pytest
|
||||
- name: Run tests
|
||||
continue-on-error: true
|
||||
env:
|
||||
YTDL_TEST_SET: download
|
||||
run: ./devscripts/run_tests.${{ matrix.run-tests-ext }}
|
||||
run: ./devscripts/run_tests.${{ matrix.run-tests-ext }} download
|
||||
|
||||
10
.github/workflows/quick-test.yml
vendored
10
.github/workflows/quick-test.yml
vendored
@@ -11,12 +11,10 @@ jobs:
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: 3.9
|
||||
- name: Install nose
|
||||
run: pip install nose
|
||||
- name: Install test requirements
|
||||
run: pip install pytest pycryptodome
|
||||
- name: Run tests
|
||||
env:
|
||||
YTDL_TEST_SET: core
|
||||
run: ./devscripts/run_tests.sh
|
||||
run: ./devscripts/run_tests.sh core
|
||||
flake8:
|
||||
name: Linter
|
||||
if: "!contains(github.event.head_commit.message, 'ci skip all')"
|
||||
@@ -30,4 +28,4 @@ jobs:
|
||||
- name: Install flake8
|
||||
run: pip install flake8
|
||||
- name: Run flake8
|
||||
run: flake8 .
|
||||
run: flake8 .
|
||||
|
||||
3
.gitignore
vendored
3
.gitignore
vendored
@@ -33,6 +33,7 @@ cookies.txt
|
||||
*.info.json
|
||||
*.live_chat.json
|
||||
*.jpg
|
||||
*.jpeg
|
||||
*.png
|
||||
*.webp
|
||||
*.annotations.xml
|
||||
@@ -44,6 +45,7 @@ cookies.txt
|
||||
# Python
|
||||
*.pyc
|
||||
*.pyo
|
||||
.pytest_cache
|
||||
wine-py2exe/
|
||||
py2exe.log
|
||||
build/
|
||||
@@ -78,6 +80,7 @@ README.txt
|
||||
*.tar.gz
|
||||
*.zsh
|
||||
*.spec
|
||||
test/testdata/player-*.js
|
||||
|
||||
# Binary
|
||||
/youtube-dl
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
$ youtube-dl -v <your command line>
|
||||
[debug] System config: []
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'https://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Command-line args: [u'-v', u'https://www.youtube.com/watch?v=BaW_jenozKc']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2015.12.06
|
||||
[debug] Git HEAD: 135392e
|
||||
@@ -81,16 +81,17 @@ To run the test, simply invoke your favorite test runner, or execute a test file
|
||||
python -m unittest discover
|
||||
python test/test_download.py
|
||||
nosetests
|
||||
pytest
|
||||
|
||||
See item 6 of [new extractor tutorial](#adding-support-for-a-new-site) for how to run extractor specific test cases.
|
||||
|
||||
If you want to create a build of youtube-dl yourself, you'll need
|
||||
|
||||
* python
|
||||
* python3
|
||||
* make (only GNU make is supported)
|
||||
* pandoc
|
||||
* zip
|
||||
* nosetests
|
||||
* pytest
|
||||
|
||||
### Adding support for a new site
|
||||
|
||||
|
||||
28
CONTRIBUTORS
28
CONTRIBUTORS
@@ -1,6 +1,6 @@
|
||||
pukkandan (owner)
|
||||
shirt-dev (collaborator)
|
||||
colethedj (collaborator)
|
||||
coletdjnz/colethedj (collaborator)
|
||||
Ashish0804 (collaborator)
|
||||
h-h-h-h
|
||||
pauldubois98
|
||||
@@ -52,5 +52,29 @@ hhirtz
|
||||
louie-github
|
||||
MinePlayersPE
|
||||
olifre
|
||||
rhsmachine
|
||||
rhsmachine/zenerdi0de
|
||||
nihil-admirari
|
||||
krichbanana
|
||||
ohmybahgosh
|
||||
nyuszika7h
|
||||
blackjack4494
|
||||
pyx
|
||||
TpmKranz
|
||||
mzbaulhaque
|
||||
zackmark29
|
||||
mbway
|
||||
zerodytrash
|
||||
wesnm
|
||||
pento
|
||||
rigstot
|
||||
dirkf
|
||||
funniray
|
||||
Jessecar96
|
||||
jhwgh1968
|
||||
kikuyan
|
||||
max-te
|
||||
nchilada
|
||||
pgaig
|
||||
PSlava
|
||||
stdedos
|
||||
u-spec-png
|
||||
|
||||
330
Changelog.md
330
Changelog.md
@@ -19,6 +19,266 @@
|
||||
-->
|
||||
|
||||
|
||||
### 2021.08.10
|
||||
|
||||
* Add option `--replace-in-metadata`
|
||||
* Add option `--no-simulate` to not simulate even when `--print` or `--list...` are used - Deprecates `--print-json`
|
||||
* Allow entire infodict to be printed using `%()s` - makes `--dump-json` redundant
|
||||
* Allow multiple `--exec` and `--exec-before-download`
|
||||
* Add regex to `--match-filter`
|
||||
* Add all format filtering operators also to `--match-filter` by [max-te](https://github.com/max-te)
|
||||
* Add compat-option `no-keep-subs`
|
||||
* [adobepass] Add MSO Cablevision by [Jessecar96](https://github.com/Jessecar96)
|
||||
* [BandCamp] Add BandcampMusicIE by [Ashish0804](https://github.com/Ashish0804)
|
||||
* [blackboardcollaborate] Add new extractor by [Ashish0804](https://github.com/Ashish0804)
|
||||
* [eroprofile] Add album downloader by [jhwgh1968](https://github.com/jhwgh1968)
|
||||
* [mirrativ] Add extractors by [nao20010128nao](https://github.com/nao20010128nao)
|
||||
* [openrec] Add extractors by [nao20010128nao](https://github.com/nao20010128nao)
|
||||
* [nbcolympics:stream] Fix extractor by [nchilada](https://github.com/nchilada), [pukkandan](https://github.com/pukkandan)
|
||||
* [nbcolympics] Update extractor for 2020 olympics by [wesnm](https://github.com/wesnm)
|
||||
* [paramountplus] Separate extractor and fix some titles by [shirt](https://github.com/shirt-dev), [pukkandan](https://github.com/pukkandan)
|
||||
* [RCTIPlus] Support events and TV by [MinePlayersPE](https://github.com/MinePlayersPE)
|
||||
* [Newgrounds] Improve extractor and fix playlist by [u-spec-png](https://github.com/u-spec-png)
|
||||
* [aenetworks] Update `_THEPLATFORM_KEY` and `_THEPLATFORM_SECRET` by [wesnm](https://github.com/wesnm)
|
||||
* [crunchyroll] Fix thumbnail by [funniray](https://github.com/funniray)
|
||||
* [HotStar] Use API for metadata and extract subtitles by [Ashish0804](https://github.com/Ashish0804)
|
||||
* [instagram] Fix comments extraction by [u-spec-png](https://github.com/u-spec-png)
|
||||
* [peertube] Fix videos without description by [u-spec-png](https://github.com/u-spec-png)
|
||||
* [twitch:clips] Extract `display_id` by [dirkf](https://github.com/dirkf)
|
||||
* [viki] Print error message from API request
|
||||
* [Vine] Remove invalid formats by [u-spec-png](https://github.com/u-spec-png)
|
||||
* [VrtNU] Fix XSRF token by [pgaig](https://github.com/pgaig)
|
||||
* [vrv] Fix thumbnail extraction by [funniray](https://github.com/funniray)
|
||||
* [youtube] Add extractor-arg `include-live-dash` to show live dash formats
|
||||
* [youtube] Improve signature function detection by [PSlava](https://github.com/PSlava)
|
||||
* [youtube] Raise appropriate error when API pages can't be downloaded
|
||||
* Ensure `_write_ytdl_file` closes file handle on error
|
||||
* Fix `--compat-options filename` by [stdedos](https://github.com/stdedos)
|
||||
* Fix issues with infodict sanitization
|
||||
* Fix resuming when using `--no-part`
|
||||
* Fix wrong extension for intermediate files
|
||||
* Handle `BrokenPipeError` by [kikuyan](https://github.com/kikuyan)
|
||||
* Show libraries present in verbose head
|
||||
* [extractor] Detect `sttp` as subtitles in MPD by [fstirlitz](https://github.com/fstirlitz)
|
||||
* [extractor] Reset non-repeating warnings per video
|
||||
* [ffmpeg] Fix streaming `mp4` to `stdout`
|
||||
* [ffpmeg] Allow `--ffmpeg-location` to be a file with different name
|
||||
* [utils] Fix `InAdvancePagedList.__getitem__`
|
||||
* [utils] Fix `traverse_obj` depth when `is_user_input`
|
||||
* [webvtt] Merge daisy-chained duplicate cues by [fstirlitz](https://github.com/fstirlitz)
|
||||
* [build] Use custom build of `pyinstaller` by [shirt](https://github.com/shirt-dev)
|
||||
* [tests:download] Add batch testing for extractors (`test_YourExtractor_all`)
|
||||
* [docs] Document which fields `--add-metadata` adds to the file
|
||||
* [docs] Fix some mistakes and improve doc
|
||||
* [cleanup] Misc code cleanup
|
||||
|
||||
|
||||
### 2021.08.02
|
||||
|
||||
* Add logo, banner and donate links
|
||||
* Expand and escape environment variables correctly in output template
|
||||
* Add format types `j` (json), `l` (comma delimited list), `q` (quoted for terminal) in output template
|
||||
* [downloader] Allow streaming some unmerged formats to stdout using ffmpeg
|
||||
* [youtube] **Age-gate bypass**
|
||||
* Add `agegate` clients by [pukkandan](https://github.com/pukkandan), [MinePlayersPE](https://github.com/MinePlayersPE)
|
||||
* Add `thirdParty` to agegate clients to bypass more videos
|
||||
* Simplify client definitions, expose `embedded` clients
|
||||
* Improve age-gate detection by [coletdjnz](https://github.com/coletdjnz)
|
||||
* Fix default global API key by [coletdjnz](https://github.com/coletdjnz)
|
||||
* Add `creator` clients for age-gate bypass using unverified accounts by [zerodytrash](https://github.com/zerodytrash), [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan)
|
||||
* [adobepass] Add MSO Sling TV by [wesnm](https://github.com/wesnm)
|
||||
* [CBS] Add ParamountPlusSeriesIE by [Ashish0804](https://github.com/Ashish0804)
|
||||
* [dplay] Add `ScienceChannelIE` by [Sipherdrakon](https://github.com/Sipherdrakon)
|
||||
* [UtreonIE] Add extractor by [Ashish0804](https://github.com/Ashish0804)
|
||||
* [youtube] Add `mweb` client by [coletdjnz](https://github.com/coletdjnz)
|
||||
* [youtube] Add `player_client=all`
|
||||
* [youtube] Force `hl=en` for comments by [coletdjnz](https://github.com/coletdjnz)
|
||||
* [youtube] Fix format sorting when using alternate clients
|
||||
* [youtube] Misc cleanup by [pukkandan](https://github.com/pukkandan), [coletdjnz](https://github.com/coletdjnz)
|
||||
* [youtube] Extract SAPISID only once
|
||||
* [CBS] Add fallback by [llacb47](https://github.com/llacb47), [pukkandan](https://github.com/pukkandan)
|
||||
* [Hotstar] Support cookies by [Ashish0804](https://github.com/Ashish0804)
|
||||
* [HotStarSeriesIE] Fix regex by [Ashish0804](https://github.com/Ashish0804)
|
||||
* [bilibili] Improve `_VALID_URL`
|
||||
* [mediaset] Fix extraction by [nixxo](https://github.com/nixxo)
|
||||
* [Mxplayer] Add h265 formats by [Ashish0804](https://github.com/Ashish0804)
|
||||
* [RCTIPlus] Remove PhantomJS dependency by [MinePlayersPE](https://github.com/MinePlayersPE)
|
||||
* [tenplay] Add MA15+ age limit by [pento](https://github.com/pento)
|
||||
* [vidio] Fix login error detection by [MinePlayersPE](https://github.com/MinePlayersPE)
|
||||
* [vimeo] Better extraction of original file by [Ashish0804](https://github.com/Ashish0804)
|
||||
* [generic] Support KVS player (replaces ThisVidIE) by [rigstot](https://github.com/rigstot)
|
||||
* Add compat-option `no-clean-infojson`
|
||||
* Remove `asr` appearing twice in `-F`
|
||||
* Set `home:` as the default key for `-P`
|
||||
* [utils] Fix slicing of reversed `LazyList`
|
||||
* [FormatSort] Fix bug for audio with unknown codec
|
||||
* [test:download] Support testing with `ignore_no_formats_error`
|
||||
* [cleanup] Refactor some code
|
||||
|
||||
|
||||
### 2021.07.24
|
||||
|
||||
* [youtube:tab] Extract video duration early
|
||||
* [downloader] Pass `info_dict` to `progress_hook`s
|
||||
* [youtube] Fix age-gated videos for API clients when cookies are supplied by [coletdjnz](https://github.com/coletdjnz)
|
||||
* [youtube] Disable `get_video_info` age-gate workaround - This endpoint seems to be completely dead
|
||||
* [youtube] Try all clients even if age-gated
|
||||
* [youtube] Fix subtitles only being extracted from the first client
|
||||
* [youtube] Simplify `_get_text`
|
||||
* [cookies] bugfix for microsoft edge on macOS
|
||||
* [cookies] Handle `sqlite` `ImportError` gracefully by [mbway](https://github.com/mbway)
|
||||
* [cookies] Handle errors when importing `keyring`
|
||||
|
||||
### 2021.07.21
|
||||
|
||||
* **Add option `--cookies-from-browser`** to load cookies from a browser by [mbway](https://github.com/mbway)
|
||||
* Usage: `--cookies-from-browser BROWSER[:PROFILE_NAME_OR_PATH]`
|
||||
* Also added `--no-cookies-from-browser`
|
||||
* To decrypt chromium cookies, `keyring` is needed for UNIX and `pycryptodome` for Windows
|
||||
* Add option `--exec-before-download`
|
||||
* Add field `live_status`
|
||||
* [FFmpegMetadata] Add language of each stream and some refactoring
|
||||
* [douyin] Add extractor by [pukkandan](https://github.com/pukkandan), [pyx](https://github.com/pyx)
|
||||
* [pornflip] Add extractor by [mzbaulhaque](https://github.com/mzbaulhaque)
|
||||
* **[youtube] Extract data from multiple clients** by [pukkandan](https://github.com/pukkandan), [coletdjnz](https://github.com/coletdjnz)
|
||||
* `player_client` now accepts multiple clients
|
||||
* Default `player_client` = `android,web`
|
||||
* This uses twice as many requests, but avoids throttling for most videos while also not losing any formats
|
||||
* Music clients can be specifically requested and is enabled by default if `music.youtube.com`
|
||||
* Added `player_client=ios` (Known issue: formats from ios are not sorted correctly)
|
||||
* Add age-gate bypass for android and ios clients
|
||||
* [youtube] Extract more thumbnails
|
||||
* The thumbnail URLs are hard-coded and their actual existence is tested lazily
|
||||
* Added option `--no-check-formats` to not test them
|
||||
* [youtube] Misc fixes
|
||||
* Improve extraction of livestream metadata by [pukkandan](https://github.com/pukkandan), [krichbanana](https://github.com/krichbanana)
|
||||
* Hide live dash formats since they can't be downloaded anyway
|
||||
* Fix authentication when using multiple accounts by [coletdjnz](https://github.com/coletdjnz)
|
||||
* Fix controversial videos when requested via API by [coletdjnz](https://github.com/coletdjnz)
|
||||
* Fix session index extraction and headers for non-web player clients by [coletdjnz](https://github.com/coletdjnz)
|
||||
* Make `--extractor-retries` work for more errors
|
||||
* Fix sorting of 3gp format
|
||||
* Sanity check `chapters` (and refactor related code)
|
||||
* Make `parse_time_text` and `_extract_chapters` non-fatal
|
||||
* Misc cleanup and bug fixes by [coletdjnz](https://github.com/coletdjnz)
|
||||
* [youtube:tab] Fix channels tab
|
||||
* [youtube:tab] Extract playlist availability by [coletdjnz](https://github.com/coletdjnz)
|
||||
* **[youtube:comments] Move comment extraction to new API** by [coletdjnz](https://github.com/coletdjnz)
|
||||
* Adds extractor-args `comment_sort` (`top`/`new`), `max_comments`, `max_comment_depth`
|
||||
* [youtube:comments] Fix `is_favorited`, improve `like_count` parsing by [coletdjnz](https://github.com/coletdjnz)
|
||||
* [BravoTV] Improve metadata extraction by [kevinoconnor7](https://github.com/kevinoconnor7)
|
||||
* [crunchyroll:playlist] Force http
|
||||
* [yahoo:gyao:player] Relax `_VALID_URL` by [nao20010128nao](https://github.com/nao20010128nao)
|
||||
* [nebula] Authentication via tokens from cookie jar by [hheimbuerger](https://github.com/hheimbuerger), [TpmKranz](https://github.com/TpmKranz)
|
||||
* [RTP] Fix extraction and add subtitles by [fstirlitz](https://github.com/fstirlitz)
|
||||
* [viki] Rewrite extractors and add extractor-arg `video_types` to `vikichannel` by [zackmark29](https://github.com/zackmark29), [pukkandan](https://github.com/pukkandan)
|
||||
* [vlive] Extract thumbnail directly in addition to the one from Naver
|
||||
* [generic] Extract previously missed subtitles by [fstirlitz](https://github.com/fstirlitz)
|
||||
* [generic] Extract everything in the SMIL manifest and detect discarded subtitles by [fstirlitz](https://github.com/fstirlitz)
|
||||
* [embedthumbnail] Fix `_get_thumbnail_resolution`
|
||||
* [metadatafromfield] Do not detect numbers as field names
|
||||
* Fix selectors `all`, `mergeall` and add tests
|
||||
* Errors in playlist extraction should obey `--ignore-errors`
|
||||
* Fix bug where `original_url` was not propagated when `_type`=`url`
|
||||
* Revert "Merge webm formats into mkv if thumbnails are to be embedded (#173)"
|
||||
* This was wrongly checking for `write_thumbnail`
|
||||
* Improve `extractor_args` parsing
|
||||
* Rename `NOTE` in `-F` to `MORE INFO` since it's often confused to be the same as `format_note`
|
||||
* Add `only_once` param for `write_debug` and `report_warning`
|
||||
* [extractor] Allow extracting multiple groups in `_search_regex` by [fstirlitz](https://github.com/fstirlitz)
|
||||
* [utils] Improve `traverse_obj`
|
||||
* [utils] Add `variadic`
|
||||
* [utils] Improve `js_to_json` comment regex by [fstirlitz](https://github.com/fstirlitz)
|
||||
* [webtt] Fix timestamps
|
||||
* [compat] Remove unnecessary code
|
||||
* [docs] fix default of multistreams
|
||||
|
||||
|
||||
### 2021.07.07
|
||||
|
||||
* Merge youtube-dl: Upto [commit/a803582](https://github.com/ytdl-org/youtube-dl/commit/a8035827177d6b59aca03bd717acb6a9bdd75ada)
|
||||
* Add `--extractor-args` to pass some extractor-specific arguments. See [readme](https://github.com/yt-dlp/yt-dlp#extractor-arguments)
|
||||
* Add extractor option `skip` for `youtube`. Eg: `--extractor-args youtube:skip=hls,dash`
|
||||
* Deprecates `--youtube-skip-dash-manifest`, `--youtube-skip-hls-manifest`, `--youtube-include-dash-manifest`, `--youtube-include-hls-manifest`
|
||||
* Allow `--list...` options to work with `--print`, `--quiet` and other `--list...` options
|
||||
* [youtube] Use `player` API for additional video extraction requests by [coletdjnz](https://github.com/coletdjnz)
|
||||
* **Fixes youtube premium music** (format 141) extraction
|
||||
* Adds extractor option `player_client` = `web`/`android`
|
||||
* **`--extractor-args youtube:player_client=android` works around the throttling** for the time-being
|
||||
* Adds extractor option `player_skip=config`
|
||||
* Adds age-gate fallback using embedded client
|
||||
* [youtube] Choose correct Live chat API for upcoming streams by [krichbanana](https://github.com/krichbanana)
|
||||
* [youtube] Fix subtitle names for age-gated videos
|
||||
* [youtube:comments] Fix error handling and add `itct` to params by [coletdjnz](https://github.com/coletdjnz)
|
||||
* [youtube_live_chat] Fix download with cookies by [siikamiika](https://github.com/siikamiika)
|
||||
* [youtube_live_chat] use `clickTrackingParams` by [siikamiika](https://github.com/siikamiika)
|
||||
* [Funimation] Rewrite extractor
|
||||
* Add `FunimationShowIE` by [Mevious](https://github.com/Mevious)
|
||||
* **Treat the different versions of an episode as different formats of a single video**
|
||||
* This changes the video `id` and will break break existing archives
|
||||
* Compat option `seperate-video-versions` to fall back to old behavior including using the old video ids
|
||||
* Support direct `/player/` URL
|
||||
* Extractor options `language` and `version` to pre-select them during extraction
|
||||
* These options may be removed in the future if we can extract all formats without additional network requests
|
||||
* Do not rely on these for format selection and use `-f` filters instead
|
||||
* [AdobePass] Add Spectrum MSO by [kevinoconnor7](https://github.com/kevinoconnor7), [ohmybahgosh](https://github.com/ohmybahgosh)
|
||||
* [facebook] Extract description and fix title
|
||||
* [fancode] Fix extraction, support live and allow login with refresh token by [zenerdi0de](https://github.com/zenerdi0de)
|
||||
* [plutotv] Improve `_VALID_URL`
|
||||
* [RCTIPlus] Add extractor by [MinePlayersPE](https://github.com/MinePlayersPE)
|
||||
* [Soundcloud] Allow login using oauth token by [blackjack4494](https://github.com/blackjack4494)
|
||||
* [TBS] Support livestreams by [llacb47](https://github.com/llacb47)
|
||||
* [videa] Fix extraction by [nyuszika7h](https://github.com/nyuszika7h)
|
||||
* [yahoo] Fix extraction by [llacb47](https://github.com/llacb47), [pukkandan](https://github.com/pukkandan)
|
||||
* Process videos when using `--ignore-no-formats-error` by [krichbanana](https://github.com/krichbanana)
|
||||
* Fix `--throttled-rate` when using `--load-info-json`
|
||||
* Fix `--flat-playlist` when entry has no `ie_key`
|
||||
* Fix `check_formats` catching `ExtractorError` instead of `DownloadError`
|
||||
* Fix deprecated option `--list-formats-old`
|
||||
* [downloader/ffmpeg] Fix `--ppa` when using simultaneous download
|
||||
* [extractor] Prevent unnecessary download of hls manifests and refactor `hls_split_discontinuity`
|
||||
* [fragment] Handle status of download and errors in threads correctly; and minor refactoring
|
||||
* [thumbnailsconvertor] Treat `jpeg` as `jpg`
|
||||
* [utils] Fix issues with `LazyList` reversal
|
||||
* [extractor] Allow extractors to set their own login hint
|
||||
* [cleanup] Simplify format selector code with `LazyList` and `yield from`
|
||||
* [cleanup] Clean `extractor.common._merge_subtitles` signature
|
||||
* [cleanup] Fix some typos
|
||||
|
||||
|
||||
### 2021.06.23
|
||||
|
||||
* Merge youtube-dl: Upto [commit/379f52a](https://github.com/ytdl-org/youtube-dl/commit/379f52a4954013767219d25099cce9e0f9401961)
|
||||
* **Add option `--throttled-rate`** below which video data is re-extracted
|
||||
* [fragment] **Merge during download for `-N`**, and refactor `hls`/`dash`
|
||||
* [websockets] Add `WebSocketFragmentFD` by [nao20010128nao](https://github.com/nao20010128nao), [pukkandan](https://github.com/pukkandan)
|
||||
* Allow `images` formats in addition to video/audio
|
||||
* [downloader/mhtml] Add new downloader for slideshows/storyboards by [fstirlitz](https://github.com/fstirlitz)
|
||||
* [youtube] Temporary **fix for age-gate**
|
||||
* [youtube] Support ongoing live chat by [siikamiika](https://github.com/siikamiika)
|
||||
* [youtube] Improve SAPISID cookie handling by [coletdjnz](https://github.com/coletdjnz)
|
||||
* [youtube] Login is not needed for `:ytrec`
|
||||
* [youtube] Non-fatal alert reporting for unavailable videos page by [coletdjnz](https://github.com/coletdjnz)
|
||||
* [twitcasting] Websocket support by [nao20010128nao](https://github.com/nao20010128nao)
|
||||
* [mediasite] Extract slides by [fstirlitz](https://github.com/fstirlitz)
|
||||
* [funimation] Extract subtitles
|
||||
* [pornhub] Extract `cast`
|
||||
* [hotstar] Use server time for authentication instead of local time
|
||||
* [EmbedThumbnail] Fix for already downloaded thumbnail
|
||||
* [EmbedThumbnail] Add compat-option `embed-thumbnail-atomicparsley`
|
||||
* Expand `--check-formats` to thumbnails
|
||||
* Fix id sanitization in filenames
|
||||
* Skip fixup of existing files and add `--fixup force` to force it
|
||||
* Better error handling of syntax errors in `-f`
|
||||
* Use `NamedTemporaryFile` for `--check-formats`
|
||||
* [aria2c] Lower `--min-split-size` for HTTP downloads
|
||||
* [options] Rename `--add-metadata` to `--embed-metadata`
|
||||
* [utils] Improve `LazyList` and add tests
|
||||
* [build] Build Windows x86 version with py3.7 and remove redundant tests by [pukkandan](https://github.com/pukkandan), [shirt](https://github.com/shirt-dev)
|
||||
* [docs] Clarify that `--embed-metadata` embeds chapter markers
|
||||
* [cleanup] Refactor fixup
|
||||
|
||||
|
||||
### 2021.06.09
|
||||
|
||||
* Fix bug where `%(field)d` in filename template throws error
|
||||
@@ -34,7 +294,7 @@
|
||||
* [extractor] Fix FourCC fallback when parsing ISM by [fstirlitz](https://github.com/fstirlitz)
|
||||
* [twitcasting] Add TwitCastingUserIE, TwitCastingLiveIE by [pukkandan](https://github.com/pukkandan), [nao20010128nao](https://github.com/nao20010128nao)
|
||||
* [vidio] Add VidioPremierIE and VidioLiveIE by [MinePlayersPE](Https://github.com/MinePlayersPE)
|
||||
* [viki] Fix extraction from by [ytdl-org/youtube-dl@59e583f](https://github.com/ytdl-org/youtube-dl/commit/59e583f7e8530ca92776c866897d895c072e2a82)
|
||||
* [viki] Fix extraction from [ytdl-org/youtube-dl@59e583f](https://github.com/ytdl-org/youtube-dl/commit/59e583f7e8530ca92776c866897d895c072e2a82)
|
||||
* [youtube] Support shorts URL
|
||||
* [zoom] Extract transcripts as subtitles
|
||||
* Add field `original_url` with the user-inputted URL
|
||||
@@ -56,7 +316,7 @@
|
||||
* Merge youtube-dl: Upto [commit/d495292](https://github.com/ytdl-org/youtube-dl/commit/d495292852b6c2f1bd58bc2141ff2b0265c952cf)
|
||||
* Pre-check archive and filters during playlist extraction
|
||||
* Handle Basic Auth `user:pass` in URLs by [hhirtz](https://github.com/hhirtz) and [pukkandan](https://github.com/pukkandan)
|
||||
* [archiveorg] Add YoutubeWebArchiveIE by [colethedj](https://github.com/colethedj) and [alex-gedeon](https://github.com/alex-gedeon)
|
||||
* [archiveorg] Add YoutubeWebArchiveIE by [coletdjnz](https://github.com/coletdjnz) and [alex-gedeon](https://github.com/alex-gedeon)
|
||||
* [fancode] Add extractor by [rhsmachine](https://github.com/rhsmachine)
|
||||
* [patreon] Support vimeo embeds by [rhsmachine](https://github.com/rhsmachine)
|
||||
* [Saitosan] Add new extractor by [llacb47](https://github.com/llacb47)
|
||||
@@ -99,7 +359,7 @@
|
||||
|
||||
* **Youtube improvements**:
|
||||
* Support youtube music `MP`, `VL` and `browse` pages
|
||||
* Extract more formats for youtube music by [craftingmod](https://github.com/craftingmod), [colethedj](https://github.com/colethedj) and [pukkandan](https://github.com/pukkandan)
|
||||
* Extract more formats for youtube music by [craftingmod](https://github.com/craftingmod), [coletdjnz](https://github.com/coletdjnz) and [pukkandan](https://github.com/pukkandan)
|
||||
* Extract multiple subtitles in same language by [pukkandan](https://github.com/pukkandan) and [tpikonen](https://github.com/tpikonen)
|
||||
* Redirect channels that doesn't have a `videos` tab to their `UU` playlists
|
||||
* Support in-channel search
|
||||
@@ -108,10 +368,10 @@
|
||||
* Extract audio language
|
||||
* Add subtitle language names by [nixxo](https://github.com/nixxo) and [tpikonen](https://github.com/tpikonen)
|
||||
* Show alerts only from the final webpage
|
||||
* Add `html5=1` param to `get_video_info` page requests by [colethedj](https://github.com/colethedj)
|
||||
* Add `html5=1` param to `get_video_info` page requests by [coletdjnz](https://github.com/coletdjnz)
|
||||
* Better message when login required
|
||||
* **Add option `--print`**: to print any field/template
|
||||
* Deprecates: `--get-description`, `--get-duration`, `--get-filename`, `--get-format`, `--get-id`, `--get-thumbnail`, `--get-title`, `--get-url`
|
||||
* Makes redundant: `--get-description`, `--get-duration`, `--get-filename`, `--get-format`, `--get-id`, `--get-thumbnail`, `--get-title`, `--get-url`
|
||||
* Field `additional_urls` to download additional videos from metadata using [`--parse-metadata`](https://github.com/yt-dlp/yt-dlp#modifying-metadata)
|
||||
* Merge youtube-dl: Upto [commit/dfbbe29](https://github.com/ytdl-org/youtube-dl/commit/dfbbe2902fc67f0f93ee47a8077c148055c67a9b)
|
||||
* Write thumbnail of playlist and add `pl_thumbnail` outtmpl key
|
||||
@@ -205,11 +465,11 @@
|
||||
* [TubiTv] Add TubiTvShowIE by [Ashish0804](https://github.com/Ashish0804)
|
||||
* [twitcasting] Fix extractor
|
||||
* [viu:ott] Fix extractor and support series by [lkho](https://github.com/lkho) and [pukkandan](https://github.com/pukkandan)
|
||||
* [youtube:tab] Show unavailable videos in playlists by [colethedj](https://github.com/colethedj)
|
||||
* [youtube:tab] Show unavailable videos in playlists by [coletdjnz](https://github.com/coletdjnz)
|
||||
* [youtube:tab] Reload with unavailable videos for all playlists
|
||||
* [youtube] Ignore invalid stretch ratio
|
||||
* [youtube] Improve channel syncid extraction to support ytcfg by [colethedj](https://github.com/colethedj)
|
||||
* [youtube] Standardize API calls for tabs, mixes and search by [colethedj](https://github.com/colethedj)
|
||||
* [youtube] Improve channel syncid extraction to support ytcfg by [coletdjnz](https://github.com/coletdjnz)
|
||||
* [youtube] Standardize API calls for tabs, mixes and search by [coletdjnz](https://github.com/coletdjnz)
|
||||
* [youtube] Bugfix in `_extract_ytcfg`
|
||||
* [mildom:user:vod] Download only necessary amount of pages
|
||||
* [mildom] Remove proxy completely by [fstirlitz](https://github.com/fstirlitz)
|
||||
@@ -221,8 +481,8 @@
|
||||
* Improve the yt-dlp.sh script by [fstirlitz](https://github.com/fstirlitz)
|
||||
* [lazy_extractor] Do not load plugins
|
||||
* [ci] Disable fail-fast
|
||||
* [documentation] Clarify which deprecated options still work
|
||||
* [documentation] Fix typos
|
||||
* [docs] Clarify which deprecated options still work
|
||||
* [docs] Fix typos
|
||||
|
||||
|
||||
### 2021.04.11
|
||||
@@ -239,17 +499,17 @@
|
||||
* [nitter] Fix extraction of reply tweets and update instance list by [B0pol](https://github.com/B0pol)
|
||||
* [nitter] Fix thumbnails by [B0pol](https://github.com/B0pol)
|
||||
* [youtube] Fix thumbnail URL
|
||||
* [youtube] Parse API parameters from initial webpage by [colethedj](https://github.com/colethedj)
|
||||
* [youtube] Extract comments' approximate timestamp by [colethedj](https://github.com/colethedj)
|
||||
* [youtube] Parse API parameters from initial webpage by [coletdjnz](https://github.com/coletdjnz)
|
||||
* [youtube] Extract comments' approximate timestamp by [coletdjnz](https://github.com/coletdjnz)
|
||||
* [youtube] Fix alert extraction
|
||||
* [bilibili] Fix uploader
|
||||
* [utils] Add `datetime_from_str` and `datetime_add_months` by [colethedj](https://github.com/colethedj)
|
||||
* [utils] Add `datetime_from_str` and `datetime_add_months` by [coletdjnz](https://github.com/coletdjnz)
|
||||
* Run some `postprocessors` before actual download
|
||||
* Improve argument parsing for `-P`, `-o`, `-S`
|
||||
* Fix some `m3u8` not obeying `--allow-unplayable-formats`
|
||||
* Fix default of `dynamic_mpd`
|
||||
* Deprecate `--all-formats`, `--include-ads`, `--hls-prefer-native`, `--hls-prefer-ffmpeg`
|
||||
* [documentation] Improvements
|
||||
* [docs] Improvements
|
||||
|
||||
### 2021.04.03
|
||||
* Merge youtube-dl: Upto [commit/654b4f4](https://github.com/ytdl-org/youtube-dl/commit/654b4f4ff2718f38b3182c1188c5d569c14cc70a)
|
||||
@@ -260,10 +520,10 @@
|
||||
* [mildom] Update extractor with current proxy by [nao20010128nao](https://github.com/nao20010128nao)
|
||||
* [ard:mediathek] Fix video id extraction
|
||||
* [generic] Detect Invidious' link element
|
||||
* [youtube] Show premium state in `availability` by [colethedj](https://github.com/colethedj)
|
||||
* [youtube] Show premium state in `availability` by [coletdjnz](https://github.com/coletdjnz)
|
||||
* [viewsource] Add extractor to handle `view-source:`
|
||||
* [sponskrub] Run before embedding thumbnail
|
||||
* [documentation] Improve `--parse-metadata` documentation
|
||||
* [docs] Improve `--parse-metadata` documentation
|
||||
|
||||
|
||||
### 2021.03.24.1
|
||||
@@ -295,8 +555,8 @@
|
||||
* Use headers and cookies when downloading subtitles by [damianoamatruda](https://github.com/damianoamatruda)
|
||||
* Parse resolution in info dictionary by [damianoamatruda](https://github.com/damianoamatruda)
|
||||
* More consistent warning messages by [damianoamatruda](https://github.com/damianoamatruda) and [pukkandan](https://github.com/pukkandan)
|
||||
* [documentation] Add deprecated options and aliases in readme
|
||||
* [documentation] Fix some minor mistakes
|
||||
* [docs] Add deprecated options and aliases in readme
|
||||
* [docs] Fix some minor mistakes
|
||||
|
||||
* [niconico] Partial fix adapted from [animelover1984/youtube-dl@b5eff52](https://github.com/animelover1984/youtube-dl/commit/b5eff52dd9ed5565672ea1694b38c9296db3fade) (login and smile formats still don't work)
|
||||
* [niconico] Add user extractor by [animelover1984](https://github.com/animelover1984)
|
||||
@@ -305,7 +565,7 @@
|
||||
* [stitcher] Merge from youtube-dl by [nixxo](https://github.com/nixxo)
|
||||
* [rcs] Improved extraction by [nixxo](https://github.com/nixxo)
|
||||
* [linuxacadamy] Improve regex
|
||||
* [youtube] Show if video is `private`, `unlisted` etc in info (`availability`) by [colethedj](https://github.com/colethedj) and [pukkandan](https://github.com/pukkandan)
|
||||
* [youtube] Show if video is `private`, `unlisted` etc in info (`availability`) by [coletdjnz](https://github.com/coletdjnz) and [pukkandan](https://github.com/pukkandan)
|
||||
* [youtube] bugfix for channel playlist extraction
|
||||
* [nbc] Improve metadata extraction by [2ShedsJackson](https://github.com/2ShedsJackson)
|
||||
|
||||
@@ -322,15 +582,15 @@
|
||||
* [wimtv] Add extractor by [nixxo](https://github.com/nixxo)
|
||||
* [mtv] Add mtv.it and extract series metadata by [nixxo](https://github.com/nixxo)
|
||||
* [pluto.tv] Add extractor by [kevinoconnor7](https://github.com/kevinoconnor7)
|
||||
* [youtube] Rewrite comment extraction by [colethedj](https://github.com/colethedj)
|
||||
* [youtube] Rewrite comment extraction by [coletdjnz](https://github.com/coletdjnz)
|
||||
* [embedthumbnail] Set mtime correctly
|
||||
* Refactor some postprocessor/downloader code by [pukkandan](https://github.com/pukkandan) and [shirt](https://github.com/shirt-dev)
|
||||
|
||||
|
||||
### 2021.03.07
|
||||
* [youtube] Fix history, mixes, community pages and trending by [pukkandan](https://github.com/pukkandan) and [colethedj](https://github.com/colethedj)
|
||||
* [youtube] Fix private feeds/playlists on multi-channel accounts by [colethedj](https://github.com/colethedj)
|
||||
* [youtube] Extract alerts from continuation by [colethedj](https://github.com/colethedj)
|
||||
* [youtube] Fix history, mixes, community pages and trending by [pukkandan](https://github.com/pukkandan) and [coletdjnz](https://github.com/coletdjnz)
|
||||
* [youtube] Fix private feeds/playlists on multi-channel accounts by [coletdjnz](https://github.com/coletdjnz)
|
||||
* [youtube] Extract alerts from continuation by [coletdjnz](https://github.com/coletdjnz)
|
||||
* [cbs] Add support for ParamountPlus by [shirt](https://github.com/shirt-dev)
|
||||
* [mxplayer] Rewrite extractor with show support by [pukkandan](https://github.com/pukkandan) and [Ashish0804](https://github.com/Ashish0804)
|
||||
* [gedi] Improvements from youtube-dl by [nixxo](https://github.com/nixxo)
|
||||
@@ -342,7 +602,7 @@
|
||||
* [downloader] Fix bug for `ffmpeg`/`httpie`
|
||||
* [update] Fix updater removing the executable bit on some UNIX distros
|
||||
* [update] Fix current build hash for UNIX
|
||||
* [documentation] Include wget/curl/aria2c install instructions for Unix by [Ashish0804](https://github.com/Ashish0804)
|
||||
* [docs] Include wget/curl/aria2c install instructions for Unix by [Ashish0804](https://github.com/Ashish0804)
|
||||
* Fix some videos downloading with `m3u8` extension
|
||||
* Remove "fixup is ignored" warning when fixup wasn't passed by user
|
||||
|
||||
@@ -351,7 +611,7 @@
|
||||
* [build] Fix bug
|
||||
|
||||
### 2021.03.03
|
||||
* [youtube] Use new browse API for continuation page extraction by [colethedj](https://github.com/colethedj) and [pukkandan](https://github.com/pukkandan)
|
||||
* [youtube] Use new browse API for continuation page extraction by [coletdjnz](https://github.com/coletdjnz) and [pukkandan](https://github.com/pukkandan)
|
||||
* Fix HLS playlist downloading by [shirt](https://github.com/shirt-dev)
|
||||
* Merge youtube-dl: Upto [2021.03.03](https://github.com/ytdl-org/youtube-dl/releases/tag/2021.03.03)
|
||||
* [mtv] Fix extractor
|
||||
@@ -399,7 +659,7 @@
|
||||
* [ffmpeg] Allow passing custom arguments before -i using `--ppa "ffmpeg_i1:ARGS"` syntax
|
||||
* Fix `--windows-filenames` removing `/` from UNIX paths
|
||||
* [hls] Show warning if pycryptodome is not found
|
||||
* [documentation] Improvements
|
||||
* [docs] Improvements
|
||||
* Fix documentation of `Extractor Options`
|
||||
* Document `all` in format selection
|
||||
* Document `playable_in_embed` in output templates
|
||||
@@ -427,7 +687,7 @@
|
||||
* Exclude `vcruntime140.dll` from UPX by [jbruchon](https://github.com/jbruchon)
|
||||
* Set version number based on UTC time, not local time
|
||||
* Publish on PyPi only if token is set
|
||||
* [documentation] Better document `--prefer-free-formats` and add `--no-prefer-free-format`
|
||||
* [docs] Better document `--prefer-free-formats` and add `--no-prefer-free-format`
|
||||
|
||||
|
||||
### 2021.02.15
|
||||
@@ -470,7 +730,7 @@
|
||||
* [movefiles] Fix compatibility with python2
|
||||
* [remuxvideo] Fix validation of conditional remux
|
||||
* [sponskrub] Don't raise error when the video does not exist
|
||||
* [documentation] Crypto is an optional dependency
|
||||
* [docs] Crypto is an optional dependency
|
||||
|
||||
|
||||
### 2021.02.04
|
||||
@@ -531,10 +791,10 @@
|
||||
* Merge youtube-dl: Upto [2021.01.24](https://github.com/ytdl-org/youtube-dl/releases/tag/2021.01.16)
|
||||
* Plugin support ([documentation](https://github.com/yt-dlp/yt-dlp#plugins))
|
||||
* **Multiple paths**: New option `-P`/`--paths` to give different paths for different types of files
|
||||
* The syntax is `-P "type:path" -P "type:path"` ([documentation](https://github.com/yt-dlp/yt-dlp#:~:text=-P,%20--paths%20TYPE:PATH))
|
||||
* The syntax is `-P "type:path" -P "type:path"`
|
||||
* Valid types are: home, temp, description, annotation, subtitle, infojson, thumbnail
|
||||
* Additionally, configuration file is taken from home directory or current directory ([documentation](https://github.com/yt-dlp/yt-dlp#:~:text=Home%20Configuration))
|
||||
* Allow passing different arguments to different external downloaders ([documentation](https://github.com/yt-dlp/yt-dlp#:~:text=--downloader-args%20NAME:ARGS))
|
||||
* Additionally, configuration file is taken from home directory or current directory
|
||||
* Allow passing different arguments to different external downloaders
|
||||
* [mildom] Add extractor by [nao20010128nao](https://github.com/nao20010128nao)
|
||||
* Warn when using old style `--external-downloader-args` and `--post-processor-args`
|
||||
* Fix `--no-overwrite` when using `--write-link`
|
||||
@@ -569,9 +829,9 @@
|
||||
* [roosterteeth.com] Fix for bonus episodes by [Zocker1999NET](https://github.com/Zocker1999NET)
|
||||
* [tiktok] Fix for when share_info is empty
|
||||
* [EmbedThumbnail] Fix bug due to incorrect function name
|
||||
* [documentation] Changed sponskrub links to point to [yt-dlp/SponSkrub](https://github.com/yt-dlp/SponSkrub) since I am now providing both linux and windows releases
|
||||
* [documentation] Change all links to correctly point to new fork URL
|
||||
* [documentation] Fixes typos
|
||||
* [docs] Changed sponskrub links to point to [yt-dlp/SponSkrub](https://github.com/yt-dlp/SponSkrub) since I am now providing both linux and windows releases
|
||||
* [docs] Change all links to correctly point to new fork URL
|
||||
* [docs] Fixes typos
|
||||
|
||||
|
||||
### 2021.01.12
|
||||
@@ -667,7 +927,7 @@
|
||||
* Redirect channel home to /video
|
||||
* Print youtube's warning message
|
||||
* Handle Multiple pages for feeds better
|
||||
* [youtube] Fix ytsearch not returning results sometimes due to promoted content by [colethedj](https://github.com/colethedj)
|
||||
* [youtube] Fix ytsearch not returning results sometimes due to promoted content by [coletdjnz](https://github.com/coletdjnz)
|
||||
* [youtube] Temporary fix for automatic captions - disable json3 by [blackjack4494](https://github.com/blackjack4494)
|
||||
* Add --break-on-existing by [gergesh](https://github.com/gergesh)
|
||||
* Pre-check video IDs in the archive before downloading by [pukkandan](https://github.com/pukkandan)
|
||||
|
||||
39
Collaborators.md
Normal file
39
Collaborators.md
Normal file
@@ -0,0 +1,39 @@
|
||||
# Collaborators
|
||||
|
||||
This is a list of the collaborators of the project and their major contributions. See the [Changelog](Changelog.md) for more details.
|
||||
|
||||
You can also find lists of all [contributors of yt-dlp](CONTRIBUTORS) and [authors of youtube-dl](https://github.com/ytdl-org/youtube-dl/blob/master/AUTHORS)
|
||||
|
||||
|
||||
## [pukkandan](https://github.com/pukkandan)
|
||||
|
||||
[](https://ko-fi.com/pukkandan)
|
||||
|
||||
* Owner of the fork
|
||||
|
||||
|
||||
|
||||
## [shirt](https://github.com/shirt-dev)
|
||||
|
||||
[](https://ko-fi.com/shirt)
|
||||
|
||||
* Multithreading (`-N`) and aria2c support for fragment downloads
|
||||
* Support for media initialization and discontinuity in HLS
|
||||
* The self-updater (`-U`)
|
||||
|
||||
|
||||
|
||||
## [coletdjnz](https://github.com/coletdjnz)
|
||||
|
||||
[](https://github.com/sponsors/coletdjnz)
|
||||
|
||||
* YouTube improvements including: age-gate bypass, private playlists, multiple-clients (to avoid throttling) and a lot of under-the-hood improvements
|
||||
|
||||
|
||||
|
||||
## [Ashish0804](https://github.com/Ashish0804)
|
||||
|
||||
[](https://ko-fi.com/ashish0804)
|
||||
|
||||
* Added support for new websites Zee5, MXPlayer, DiscoveryPlusIndia, ShemarooMe, Utreon etc
|
||||
* Added playlist/series downloads for TubiTv, SonyLIV, Voot, HotStar etc
|
||||
18
Makefile
18
Makefile
@@ -13,7 +13,7 @@ pypi-files: AUTHORS Changelog.md LICENSE README.md README.txt supportedsites com
|
||||
.PHONY: all clean install test tar pypi-files completions ot offlinetest codetest supportedsites
|
||||
|
||||
clean-test:
|
||||
rm -rf *.dump *.part* *.ytdl *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.3gp *.wav *.ape *.swf *.jpg *.png *.frag *.frag.urls *.frag.aria2
|
||||
rm -rf *.dump *.part* *.ytdl *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.3gp *.wav *.ape *.swf *.jpg *.png *.frag *.frag.urls *.frag.aria2 test/testdata/player-*.js *.opus *.webp *.ttml *.vtt *.jpeg
|
||||
clean-dist:
|
||||
rm -rf yt-dlp.1.temp.md yt-dlp.1 README.txt MANIFEST build/ dist/ .coverage cover/ yt-dlp.tar.gz completions/ yt_dlp/extractor/lazy_extractors.py *.spec CONTRIBUTING.md.tmp yt-dlp yt-dlp.exe yt_dlp.egg-info/ AUTHORS .mailmap
|
||||
clean-cache:
|
||||
@@ -49,23 +49,11 @@ codetest:
|
||||
flake8 .
|
||||
|
||||
test:
|
||||
#nosetests --with-coverage --cover-package=yt_dlp --cover-html --verbose --processes 4 test
|
||||
nosetests --verbose test
|
||||
$(PYTHON) -m pytest
|
||||
$(MAKE) codetest
|
||||
|
||||
# Keep this list in sync with devscripts/run_tests.sh
|
||||
offlinetest: codetest
|
||||
$(PYTHON) -m nose --verbose test \
|
||||
--exclude test_age_restriction.py \
|
||||
--exclude test_download.py \
|
||||
--exclude test_iqiyi_sdk_interpreter.py \
|
||||
--exclude test_overwrites.py \
|
||||
--exclude test_socks.py \
|
||||
--exclude test_subtitles.py \
|
||||
--exclude test_write_annotations.py \
|
||||
--exclude test_youtube_lists.py \
|
||||
--exclude test_youtube_signature.py \
|
||||
--exclude test_post_hooks.py
|
||||
$(PYTHON) -m pytest -k "not download"
|
||||
|
||||
yt-dlp: yt_dlp/*.py yt_dlp/*/*.py
|
||||
mkdir -p zip
|
||||
|
||||
322
README.md
322
README.md
@@ -1,17 +1,16 @@
|
||||
<div align="center">
|
||||
|
||||
# YT-DLP
|
||||
A command-line program to download videos from YouTube and many other [video platforms](supportedsites.md)
|
||||
[](#readme)
|
||||
|
||||
<!-- GHA doesn't have for-the-badge style
|
||||
[](https://github.com/yt-dlp/yt-dlp/actions)
|
||||
-->
|
||||
[](https://github.com/yt-dlp/yt-dlp/releases/latest)
|
||||
[](LICENSE)
|
||||
[](https://github.com/yt-dlp/yt-dlp/releases/latest)
|
||||
[](https://github.com/yt-dlp/yt-dlp/actions)
|
||||
[](LICENSE)
|
||||
[](Collaborators.md#collaborators)
|
||||
[](supportedsites.md)
|
||||
[](https://discord.gg/H5MNcFW63r)
|
||||
[](https://yt-dlp.readthedocs.io)
|
||||
[](https://discord.gg/H5MNcFW63r)
|
||||
[](https://github.com/yt-dlp/yt-dlp/commits)
|
||||
[](https://github.com/yt-dlp/yt-dlp/commits)
|
||||
[](https://github.com/yt-dlp/yt-dlp/commits)
|
||||
[](https://github.com/yt-dlp/yt-dlp/releases/latest)
|
||||
[](https://pypi.org/project/yt-dlp)
|
||||
|
||||
@@ -53,6 +52,7 @@ yt-dlp is a [youtube-dl](https://github.com/ytdl-org/youtube-dl) fork based on t
|
||||
* [Format Selection examples](#format-selection-examples)
|
||||
* [MODIFYING METADATA](#modifying-metadata)
|
||||
* [Modifying metadata examples](#modifying-metadata-examples)
|
||||
* [EXTRACTOR ARGUMENTS](#extractor-arguments)
|
||||
* [PLUGINS](#plugins)
|
||||
* [DEPRECATED OPTIONS](#deprecated-options)
|
||||
* [MORE](#more)
|
||||
@@ -66,27 +66,31 @@ The major new features from the latest release of [blackjack4494/yt-dlc](https:/
|
||||
|
||||
* **[Format Sorting](#sorting-formats)**: The default format sorting options have been changed so that higher resolution and better codecs will be now preferred instead of simply using larger bitrate. Furthermore, you can now specify the sort order using `-S`. This allows for much easier format selection that what is possible by simply using `--format` ([examples](#format-selection-examples))
|
||||
|
||||
* **Merged with youtube-dl [commit/c2350ca](https://github.com/ytdl-org/youtube-dl/commit/c2350cac243ba1ec1586fe85b0d62d1b700047a2)**: (v2021.06.06) You get all the latest features and patches of [youtube-dl](https://github.com/ytdl-org/youtube-dl) in addition to all the features of [youtube-dlc](https://github.com/blackjack4494/yt-dlc)
|
||||
* **Merged with youtube-dl [commit/379f52a](https://github.com/ytdl-org/youtube-dl/commit/379f52a4954013767219d25099cce9e0f9401961)**: (v2021.06.06) You get all the latest features and patches of [youtube-dl](https://github.com/ytdl-org/youtube-dl) in addition to all the features of [youtube-dlc](https://github.com/blackjack4494/yt-dlc)
|
||||
|
||||
* **Merged with animelover1984/youtube-dl**: You get most of the features and improvements from [animelover1984/youtube-dl](https://github.com/animelover1984/youtube-dl) including `--write-comments`, `BiliBiliSearch`, `BilibiliChannel`, Embedding thumbnail in mp4/ogg/opus, playlist infojson etc. Note that the NicoNico improvements are not available. See [#31](https://github.com/yt-dlp/yt-dlp/pull/31) for details.
|
||||
|
||||
* **Youtube improvements**:
|
||||
* All Feeds (`:ytfav`, `:ytwatchlater`, `:ytsubs`, `:ythistory`, `:ytrec`) supports downloading multiple pages of content
|
||||
* All Feeds (`:ytfav`, `:ytwatchlater`, `:ytsubs`, `:ythistory`, `:ytrec`) and private playlists supports downloading multiple pages of content
|
||||
* Search (`ytsearch:`, `ytsearchdate:`), search URLs and in-channel search works
|
||||
* Mixes supports downloading multiple pages of content
|
||||
* Most (but not all) age-gated content can be downloaded without cookies
|
||||
* Partial workaround for throttling issue
|
||||
* Redirect channel's home URL automatically to `/video` to preserve the old behaviour
|
||||
* `255kbps` audio is extracted from youtube music if premium cookies are given
|
||||
* Youtube music Albums, channels etc can be downloaded
|
||||
|
||||
* **Cookies from browser**: Cookies can be automatically extracted from all major web browsers using `--cookies-from-browser BROWSER[:PROFILE]`
|
||||
|
||||
* **Split video by chapters**: Videos can be split into multiple files based on chapters using `--split-chapters`
|
||||
|
||||
* **Multi-threaded fragment downloads**: Download multiple fragments of m3u8/mpd videos in parallel. Use `--concurrent-fragments` (`-N`) option to set the number of threads used
|
||||
|
||||
* **Aria2c with HLS/DASH**: You can use `aria2c` as the external downloader for DASH(mpd) and HLS(m3u8) formats
|
||||
|
||||
* **New extractors**: AnimeLab, Philo MSO, Rcs, Gedi, bitwave.tv, mildom, audius, zee5, mtv.it, wimtv, pluto.tv, niconico users, discoveryplus.in, mediathek, NFHSNetwork, nebula, ukcolumn, whowatch, MxplayerShow, parlview (au), YoutubeWebArchive, fancode, Saitosan, ShemarooMe, telemundo, VootSeries, SonyLIVSeries, HotstarSeries, VidioPremier, VidioLive
|
||||
* **New extractors**: AnimeLab, Philo MSO, Spectrum MSO, SlingTV MSO, Cablevision MSO, Rcs, Gedi, bitwave.tv, mildom, audius, zee5, mtv.it, wimtv, pluto.tv, niconico users, discoveryplus.in, mediathek, NFHSNetwork, nebula, ukcolumn, whowatch, MxplayerShow, parlview (au), YoutubeWebArchive, fancode, Saitosan, ShemarooMe, telemundo, VootSeries, SonyLIVSeries, HotstarSeries, VidioPremier, VidioLive, RCTIPlus, TBS Live, douyin, pornflip, ParamountPlusSeries, ScienceChannel, Utreon, OpenRec, BandcampMusic, blackboardcollaborate, eroprofile albums, mirrativ
|
||||
|
||||
* **Fixed extractors**: archive.org, roosterteeth.com, skyit, instagram, itv, SouthparkDe, spreaker, Vlive, akamai, ina, rumble, tennistv, amcnetworks, la7 podcasts, linuxacadamy, nitter, twitcasting, viu, crackle, curiositystream, mediasite, rmcdecouverte, sonyliv, tubi, tenplay, patreon
|
||||
* **Fixed/improved extractors**: archive.org, roosterteeth.com, skyit, instagram, itv, SouthparkDe, spreaker, Vlive, akamai, ina, rumble, tennistv, amcnetworks, la7 podcasts, linuxacadamy, nitter, twitcasting, viu, crackle, curiositystream, mediasite, rmcdecouverte, sonyliv, tubi, tenplay, patreon, videa, yahoo, BravoTV, crunchyroll playlist, RTP, viki, Hotstar, vidio, vimeo, mediaset, Mxplayer, nbcolympics, ParamountPlus, Newgrounds,
|
||||
|
||||
* **Subtitle extraction from manifests**: Subtitles can be extracted from streaming media manifests. See [commit/be6202f](https://github.com/yt-dlp/yt-dlp/commit/be6202f12b97858b9d716e608394b51065d0419f) for details
|
||||
|
||||
@@ -94,11 +98,11 @@ The major new features from the latest release of [blackjack4494/yt-dlc](https:/
|
||||
|
||||
* **Portable Configuration**: Configuration files are automatically loaded from the home and root directories. See [configuration](#configuration) for details
|
||||
|
||||
* **Output template improvements**: Output templates can now have date-time formatting, numeric offsets, object traversal etc. See [output template](#output-template) for details. Even more advanced operations can also be done with the help of `--parse-metadata`
|
||||
* **Output template improvements**: Output templates can now have date-time formatting, numeric offsets, object traversal etc. See [output template](#output-template) for details. Even more advanced operations can also be done with the help of `--parse-metadata` and `--replace-in-metadata`
|
||||
|
||||
* **Other new options**: `--sleep-requests`, `--convert-thumbnails`, `--write-link`, `--force-download-archive`, `--force-overwrites`, `--break-on-reject` etc
|
||||
* **Other new options**: `--print`, `--sleep-requests`, `--convert-thumbnails`, `--write-link`, `--force-download-archive`, `--force-overwrites`, `--break-on-reject` etc
|
||||
|
||||
* **Improvements**: Multiple `--postprocessor-args` and `--downloader-args`, faster archive checking, more [format selection options](#format-selection) etc
|
||||
* **Improvements**: Regex and other operators in `--match-filter`, multiple `--postprocessor-args` and `--downloader-args`, faster archive checking, more [format selection options](#format-selection) etc
|
||||
|
||||
* **Plugin extractors**: Extractors can be loaded from an external file. See [plugins](#plugins) for details
|
||||
|
||||
@@ -119,7 +123,7 @@ Some of yt-dlp's default options are different from that of youtube-dl and youtu
|
||||
* The options `--id`, `--auto-number` (`-A`), `--title` (`-t`) and `--literal` (`-l`), no longer work. See [removed options](#Removed) for details
|
||||
* `avconv` is not supported as as an alternative to `ffmpeg`
|
||||
* The default [output template](#output-template) is `%(title)s [%(id)s].%(ext)s`. There is no real reason for this change. This was changed before yt-dlp was ever made public and now there are no plans to change it back to `%(title)s.%(id)s.%(ext)s`. Instead, you may use `--compat-options filename`
|
||||
* The default [format sorting](sorting-formats) is different from youtube-dl and prefers higher resolution and better codecs rather than higher bitrates. You can use the `--format-sort` option to change this to any order you prefer, or use `--compat-options format-sort` to use youtube-dl's sorting order
|
||||
* The default [format sorting](#sorting-formats) is different from youtube-dl and prefers higher resolution and better codecs rather than higher bitrates. You can use the `--format-sort` option to change this to any order you prefer, or use `--compat-options format-sort` to use youtube-dl's sorting order
|
||||
* The default format selector is `bv*+ba/b`. This means that if a combined video + audio format that is better than the best video-only format is found, the former will be prefered. Use `-f bv+ba/b` or `--compat-options format-spec` to revert this
|
||||
* Unlike youtube-dlc, yt-dlp does not allow merging multiple audio/video streams into one file by default (since this conflicts with the use of `-f bv*+ba`). If needed, this feature must be enabled using `--audio-multistreams` and `--video-multistreams`. You can also use `--compat-options multistreams` to enable both
|
||||
* `--ignore-errors` is enabled by default. Use `--abort-on-error` or `--compat-options abort-on-error` to abort on errors instead
|
||||
@@ -127,10 +131,14 @@ Some of yt-dlp's default options are different from that of youtube-dl and youtu
|
||||
* `--add-metadata` attaches the `infojson` to `mkv` files in addition to writing the metadata when used with `--write-infojson`. Use `--compat-options no-attach-info-json` to revert this
|
||||
* `playlist_index` behaves differently when used with options like `--playlist-reverse` and `--playlist-items`. See [#302](https://github.com/yt-dlp/yt-dlp/issues/302) for details. You can use `--compat-options playlist-index` if you want to keep the earlier behavior
|
||||
* The output of `-F` is listed in a new format. Use `--compat-options list-formats` to revert this
|
||||
* All *experiences* of a funimation episode are considered as a single video. This behavior breaks existing archives. Use `--compat-options seperate-video-versions` to extract information from only the default player
|
||||
* Youtube live chat (if available) is considered as a subtitle. Use `--sub-langs all,-live_chat` to download all subtitles except live chat. You can also use `--compat-options no-live-chat` to prevent live chat from downloading
|
||||
* Youtube channel URLs are automatically redirected to `/video`. Append a `/featured` to the URL to download only the videos in the home page. If the channel does not have a videos tab, we try to download the equivalent `UU` playlist instead. Also, `/live` URLs raise an error if there are no live videos instead of silently downloading the entire channel. You may use `--compat-options no-youtube-channel-redirect` to revert all these redirections
|
||||
* Unavailable videos are also listed for youtube playlists. Use `--compat-options no-youtube-unavailable-videos` to remove this
|
||||
* If `ffmpeg` is used as the downloader, the downloading and merging of formats happen in a single step when possible. Use `--compat-options no-direct-merge` to revert this
|
||||
* Thumbnail embedding in `mp4` is done with mutagen if possible. Use `--compat-options embed-thumbnail-atomicparsley` to force the use of AtomicParsley instead
|
||||
* Some private fields such as filenames are removed by default from the infojson. Use `--no-clean-infojson` or `--compat-options no-clean-infojson` to revert this
|
||||
* When `--embed-subs` and `--write-subs` are used together, the subtitles are written to disk and also embedded in the media file. You can use just `--embed-subs` to embed the subs and automatically delete the seperate file. See [#630 (comment)](https://github.com/yt-dlp/yt-dlp/issues/630#issuecomment-893659460) for more info. `--compat-options no-keep-subs` can be used to revert this.
|
||||
|
||||
For ease of use, a few more compat options are available:
|
||||
* `--compat-options all`: Use all compat options
|
||||
@@ -181,6 +189,8 @@ While all the other dependancies are optional, `ffmpeg` and `ffprobe` are highly
|
||||
* [**sponskrub**](https://github.com/faissaloo/SponSkrub) - For using the [sponskrub options](#sponskrub-sponsorblock-options). Licenced under [GPLv3+](https://github.com/faissaloo/SponSkrub/blob/master/LICENCE.md)
|
||||
* [**mutagen**](https://github.com/quodlibet/mutagen) - For embedding thumbnail in certain formats. Licenced under [GPLv2+](https://github.com/quodlibet/mutagen/blob/master/COPYING)
|
||||
* [**pycryptodome**](https://github.com/Legrandin/pycryptodome) - For decrypting various data. Licenced under [BSD2](https://github.com/Legrandin/pycryptodome/blob/master/LICENSE.rst)
|
||||
* [**websockets**](https://github.com/aaugustin/websockets) - For downloading over websocket. Licenced under [BSD3](https://github.com/aaugustin/websockets/blob/main/LICENSE)
|
||||
* [**keyring**](https://github.com/jaraco/keyring) - For decrypting cookies of chromium-based browsers on Linux. Licenced under [MIT](https://github.com/jaraco/keyring/blob/main/LICENSE)
|
||||
* [**AtomicParsley**](https://github.com/wez/atomicparsley) - For embedding thumbnail in mp4/m4a if mutagen is not present. Licenced under [GPLv2+](https://github.com/wez/atomicparsley/blob/master/COPYING)
|
||||
* [**rtmpdump**](http://rtmpdump.mplayerhq.hu) - For downloading `rtmp` streams. ffmpeg will be used as a fallback. Licenced under [GPLv2+](http://rtmpdump.mplayerhq.hu)
|
||||
* [**mplayer**](http://mplayerhq.hu/design7/info.html) or [**mpv**](https://mpv.io) - For downloading `rstp` streams. ffmpeg will be used as a fallback. Licenced under [GPLv2+](https://github.com/mpv-player/mpv/blob/master/Copyright)
|
||||
@@ -189,14 +199,14 @@ While all the other dependancies are optional, `ffmpeg` and `ffprobe` are highly
|
||||
|
||||
To use or redistribute the dependencies, you must agree to their respective licensing terms.
|
||||
|
||||
Note that the windows releases are already built with the python interpreter, mutagen and pycryptodome included.
|
||||
Note that the windows releases are already built with the python interpreter, mutagen, pycryptodome and websockets included.
|
||||
|
||||
### COMPILE
|
||||
|
||||
**For Windows**:
|
||||
To build the Windows executable, you must have pyinstaller (and optionally mutagen and pycryptodome)
|
||||
To build the Windows executable, you must have pyinstaller (and optionally mutagen, pycryptodome, websockets)
|
||||
|
||||
python3 -m pip install --upgrade pyinstaller mutagen pycryptodome
|
||||
python3 -m pip install --upgrade pyinstaller mutagen pycryptodome websockets
|
||||
|
||||
Once you have all the necessary dependencies installed, just run `py pyinst.py`. The executable will be built for the same architecture (32/64 bit) as the python used to build it.
|
||||
|
||||
@@ -207,7 +217,7 @@ You can also build the executable without any version info or metadata by using:
|
||||
Note that pyinstaller [does not support](https://github.com/pyinstaller/pyinstaller#requirements-and-tested-platforms) Python installed from the Windows store without using a virtual environment
|
||||
|
||||
**For Unix**:
|
||||
You will need the required build tools: `python`, `make` (GNU), `pandoc`, `zip`, `nosetests`
|
||||
You will need the required build tools: `python`, `make` (GNU), `pandoc`, `zip`, `pytest`
|
||||
Then simply run `make`. You can also run `make yt-dlp` instead to compile only the binary without updating any of the additional files
|
||||
|
||||
**Note**: In either platform, `devscripts\update-version.py` can be used to automatically update the version number
|
||||
@@ -230,10 +240,10 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t
|
||||
(default) (Alias: --no-abort-on-error)
|
||||
--abort-on-error Abort downloading of further videos if an
|
||||
error occurs (Alias: --no-ignore-errors)
|
||||
--dump-user-agent Display the current browser identification
|
||||
--list-extractors List all supported extractors
|
||||
--dump-user-agent Display the current user-agent and exit
|
||||
--list-extractors List all supported extractors and exit
|
||||
--extractor-descriptions Output descriptions of all supported
|
||||
extractors
|
||||
extractors and exit
|
||||
--force-generic-extractor Force extraction to use the generic
|
||||
extractor
|
||||
--default-search PREFIX Use this prefix for unqualified URLs. For
|
||||
@@ -329,25 +339,24 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t
|
||||
COUNT views
|
||||
--max-views COUNT Do not download any videos with more than
|
||||
COUNT views
|
||||
--match-filter FILTER Generic video filter. Specify any key (see
|
||||
"OUTPUT TEMPLATE" for a list of available
|
||||
keys) to match if the key is present, !key
|
||||
to check if the key is not present,
|
||||
key>NUMBER (like "view_count > 12", also
|
||||
works with >=, <, <=, !=, =) to compare
|
||||
against a number, key = 'LITERAL' (like
|
||||
"uploader = 'Mike Smith'", also works with
|
||||
!=) to match against a string literal and &
|
||||
to require multiple matches. Values which
|
||||
are not known are excluded unless you put a
|
||||
question mark (?) after the operator. For
|
||||
example, to only match videos that have
|
||||
been liked more than 100 times and disliked
|
||||
less than 50 times (or the dislike
|
||||
functionality is not available at the given
|
||||
service), but who also have a description,
|
||||
use --match-filter "like_count > 100 &
|
||||
dislike_count <? 50 & description"
|
||||
--match-filter FILTER Generic video filter. Any field (see
|
||||
"OUTPUT TEMPLATE") can be compared with a
|
||||
number or a string using the operators
|
||||
defined in "Filtering formats". You can
|
||||
also simply specify a field to match if the
|
||||
field is present and "!field" to check if
|
||||
the field is not present. In addition,
|
||||
Python style regular expression matching
|
||||
can be done using "~=", and multiple
|
||||
filters can be checked with "&". Use a "\"
|
||||
to escape "&" or quotes if needed. Eg:
|
||||
--match-filter "!is_live & like_count>?100
|
||||
& description~=\'(?i)\bcats \& dogs\b\'"
|
||||
matches only videos that are not live, has
|
||||
a like count more than 100 (or the like
|
||||
field is not available), and also has a
|
||||
description that contains the phrase "cats
|
||||
& dogs" (ignoring case)
|
||||
--no-match-filter Do not use generic video filter (default)
|
||||
--no-playlist Download only the video, if the URL refers
|
||||
to a video and a playlist
|
||||
@@ -372,6 +381,9 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t
|
||||
(default is 1)
|
||||
-r, --limit-rate RATE Maximum download rate in bytes per second
|
||||
(e.g. 50K or 4.2M)
|
||||
--throttled-rate RATE Minimum download rate in bytes per second
|
||||
below which throttling is assumed and the
|
||||
video data is re-extracted (e.g. 100K)
|
||||
-R, --retries RETRIES Number of retries (default is 10), or
|
||||
"infinite"
|
||||
--fragment-retries RETRIES Number of retries for a fragment (default
|
||||
@@ -428,7 +440,8 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t
|
||||
--downloader-args NAME:ARGS Give these arguments to the external
|
||||
downloader. Specify the downloader name and
|
||||
the arguments separated by a colon ":". You
|
||||
can use this option multiple times
|
||||
can use this option multiple times to give
|
||||
different arguments to different downloaders
|
||||
(Alias: --external-downloader-args)
|
||||
|
||||
## Filesystem Options:
|
||||
@@ -436,17 +449,17 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t
|
||||
stdin), one URL per line. Lines starting
|
||||
with '#', ';' or ']' are considered as
|
||||
comments and ignored
|
||||
-P, --paths TYPES:PATH The paths where the files should be
|
||||
-P, --paths [TYPES:]PATH The paths where the files should be
|
||||
downloaded. Specify the type of file and
|
||||
the path separated by a colon ":". All the
|
||||
same types as --output are supported.
|
||||
Additionally, you can also provide "home"
|
||||
and "temp" paths. All intermediary files
|
||||
are first downloaded to the temp path and
|
||||
then the final files are moved over to the
|
||||
home path after download is finished. This
|
||||
option is ignored if --output is an
|
||||
absolute path
|
||||
(default) and "temp" paths. All
|
||||
intermediary files are first downloaded to
|
||||
the temp path and then the final files are
|
||||
moved over to the home path after download
|
||||
is finished. This option is ignored if
|
||||
--output is an absolute path
|
||||
-o, --output [TYPES:]TEMPLATE Output filename template; see "OUTPUT
|
||||
TEMPLATE" for details
|
||||
--output-na-placeholder TEXT Placeholder value for unavailable meta
|
||||
@@ -512,7 +525,19 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t
|
||||
option)
|
||||
--cookies FILE File to read cookies from and dump cookie
|
||||
jar in
|
||||
--no-cookies Do not read/dump cookies (default)
|
||||
--no-cookies Do not read/dump cookies from/to file
|
||||
(default)
|
||||
--cookies-from-browser BROWSER[:PROFILE]
|
||||
Load cookies from a user profile of the
|
||||
given web browser. Currently supported
|
||||
browsers are: brave|chrome|chromium|edge|fi
|
||||
refox|opera|safari|vivaldi. You can specify
|
||||
the user profile name or directory using
|
||||
"BROWSER:PROFILE_NAME" or
|
||||
"BROWSER:PROFILE_PATH". If no profile is
|
||||
given, the most recently accessed one is
|
||||
used
|
||||
--no-cookies-from-browser Do not load cookies from browser (default)
|
||||
--cache-dir DIR Location in the filesystem where youtube-dl
|
||||
can store some downloaded information (such
|
||||
as client ids and signatures) permanently.
|
||||
@@ -526,8 +551,8 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t
|
||||
--no-write-thumbnail Do not write thumbnail image to disk
|
||||
(default)
|
||||
--write-all-thumbnails Write all thumbnail image formats to disk
|
||||
--list-thumbnails Simulate and list all available thumbnail
|
||||
formats
|
||||
--list-thumbnails List available thumbnails of each video.
|
||||
Simulate unless --no-simulate is used
|
||||
|
||||
## Internet Shortcut Options:
|
||||
--write-link Write an internet shortcut file, depending
|
||||
@@ -539,30 +564,34 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t
|
||||
--write-desktop-link Write a .desktop Linux internet shortcut
|
||||
|
||||
## Verbosity and Simulation Options:
|
||||
-q, --quiet Activate quiet mode
|
||||
-q, --quiet Activate quiet mode. If used with
|
||||
--verbose, print the log to stderr
|
||||
--no-warnings Ignore warnings
|
||||
-s, --simulate Do not download the video and do not write
|
||||
anything to disk
|
||||
--no-simulate Download the video even if printing/listing
|
||||
options are used
|
||||
--ignore-no-formats-error Ignore "No video formats" error. Usefull
|
||||
for extracting metadata even if the video
|
||||
is not actually available for download
|
||||
for extracting metadata even if the videos
|
||||
are not actually available for download
|
||||
(experimental)
|
||||
--no-ignore-no-formats-error Throw error when no downloadable video
|
||||
formats are found (default)
|
||||
--skip-download Do not download the video but write all
|
||||
related files (Alias: --no-download)
|
||||
-O, --print TEMPLATE Simulate, quiet but print the given fields.
|
||||
Either a field name or similar formatting
|
||||
as the output template can be used
|
||||
-j, --dump-json Simulate, quiet but print JSON information.
|
||||
See "OUTPUT TEMPLATE" for a description of
|
||||
available keys
|
||||
-J, --dump-single-json Simulate, quiet but print JSON information
|
||||
for each command-line argument. If the URL
|
||||
refers to a playlist, dump the whole
|
||||
playlist information in a single line
|
||||
--print-json Be quiet and print the video information as
|
||||
JSON (video is still being downloaded)
|
||||
-O, --print TEMPLATE Quiet, but print the given fields for each
|
||||
video. Simulate unless --no-simulate is
|
||||
used. Either a field name or same syntax as
|
||||
the output template can be used
|
||||
-j, --dump-json Quiet, but print JSON information for each
|
||||
video. Simulate unless --no-simulate is
|
||||
used. See "OUTPUT TEMPLATE" for a
|
||||
description of available keys
|
||||
-J, --dump-single-json Quiet, but print JSON information for each
|
||||
url or infojson passed. Simulate unless
|
||||
--no-simulate is used. If the URL refers to
|
||||
a playlist, the whole playlist information
|
||||
is dumped in a single line
|
||||
--force-write-archive Force download archive entries to be
|
||||
written as far as no errors occur, even if
|
||||
-s or another simulation option is used
|
||||
@@ -630,9 +659,11 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t
|
||||
--no-prefer-free-formats Don't give any special preference to free
|
||||
containers (default)
|
||||
--check-formats Check that the formats selected are
|
||||
actually downloadable (Experimental)
|
||||
-F, --list-formats List all available formats of requested
|
||||
videos
|
||||
actually downloadable
|
||||
--no-check-formats Do not check that the formats selected are
|
||||
actually downloadable
|
||||
-F, --list-formats List available formats of each video.
|
||||
Simulate unless --no-simulate is used
|
||||
--merge-output-format FORMAT If a merge is required (e.g.
|
||||
bestvideo+bestaudio), output to given
|
||||
container format. One of mkv, mp4, ogg,
|
||||
@@ -650,7 +681,8 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t
|
||||
(Alias: --write-automatic-subs)
|
||||
--no-write-auto-subs Do not write auto-generated subtitles
|
||||
(default) (Alias: --no-write-automatic-subs)
|
||||
--list-subs List all available subtitles for the video
|
||||
--list-subs List available subtitles of each video.
|
||||
Simulate unless --no-simulate is used
|
||||
--sub-format FORMAT Subtitle format, accepts formats
|
||||
preference, for example: "srt" or
|
||||
"ass/srt/best"
|
||||
@@ -685,7 +717,7 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t
|
||||
--audio-format FORMAT Specify audio format to convert the audio
|
||||
to when -x is used. Currently supported
|
||||
formats are: best (default) or one of
|
||||
aac|flac|mp3|m4a|opus|vorbis|wav
|
||||
best|aac|flac|mp3|m4a|opus|vorbis|wav
|
||||
--audio-quality QUALITY Specify ffmpeg audio quality, insert a
|
||||
value between 0 (better) and 9 (worse) for
|
||||
VBR or a specific bitrate like 128K
|
||||
@@ -710,7 +742,8 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t
|
||||
Metadata, EmbedSubtitle, EmbedThumbnail,
|
||||
SubtitlesConvertor, ThumbnailsConvertor,
|
||||
VideoRemuxer, VideoConvertor, SponSkrub,
|
||||
FixupStretched, FixupM4a and FixupM3u8. The
|
||||
FixupStretched, FixupM4a, FixupM3u8,
|
||||
FixupTimestamp and FixupDuration. The
|
||||
supported executables are: AtomicParsley,
|
||||
FFmpeg, FFprobe, and SponSkrub. You can
|
||||
also specify "PP+EXE:ARGS" to give the
|
||||
@@ -734,32 +767,48 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t
|
||||
--embed-subs Embed subtitles in the video (only for mp4,
|
||||
webm and mkv videos)
|
||||
--no-embed-subs Do not embed subtitles (default)
|
||||
--embed-thumbnail Embed thumbnail in the audio as cover art
|
||||
--embed-thumbnail Embed thumbnail in the video as cover art
|
||||
--no-embed-thumbnail Do not embed thumbnail (default)
|
||||
--add-metadata Write metadata to the video file
|
||||
--no-add-metadata Do not write metadata (default)
|
||||
--embed-metadata Embed metadata including chapter markers
|
||||
(if supported by the format) to the video
|
||||
file (Alias: --add-metadata)
|
||||
--no-embed-metadata Do not write metadata (default)
|
||||
(Alias: --no-add-metadata)
|
||||
--parse-metadata FROM:TO Parse additional metadata like title/artist
|
||||
from other fields; see "MODIFYING METADATA"
|
||||
for details
|
||||
--replace-in-metadata FIELDS REGEX REPLACE
|
||||
Replace text in a metadata field using the
|
||||
given regex. This option can be used
|
||||
multiple times
|
||||
--xattrs Write metadata to the video file's xattrs
|
||||
(using dublin core and xdg standards)
|
||||
--fixup POLICY Automatically correct known faults of the
|
||||
file. One of never (do nothing), warn (only
|
||||
emit a warning), detect_or_warn (the
|
||||
default; fix file if we can, warn
|
||||
otherwise)
|
||||
otherwise), force (try fixing even if file
|
||||
already exists
|
||||
--ffmpeg-location PATH Location of the ffmpeg binary; either the
|
||||
path to the binary or its containing
|
||||
directory
|
||||
--exec CMD Execute a command on the file after
|
||||
downloading and post-processing. Similar
|
||||
syntax to the output template can be used
|
||||
downloading and post-processing. Same
|
||||
syntax as the output template can be used
|
||||
to pass any field as arguments to the
|
||||
command. An additional field "filepath"
|
||||
that contains the final path of the
|
||||
downloaded file is also available. If no
|
||||
fields are passed, "%(filepath)s" is
|
||||
appended to the end of the command
|
||||
fields are passed, %(filepath)q is appended
|
||||
to the end of the command. This option can
|
||||
be used multiple times
|
||||
--no-exec Remove any previously defined --exec
|
||||
--exec-before-download CMD Execute a command before the actual
|
||||
download. The syntax is the same as --exec
|
||||
but "filepath" is not available. This
|
||||
option can be used multiple times
|
||||
--no-exec-before-download Remove any previously defined
|
||||
--exec-before-download
|
||||
--convert-subs FORMAT Convert the subtitles to another format
|
||||
(currently supported: srt|vtt|ass|lrc)
|
||||
(Alias: --convert-subtitles)
|
||||
@@ -806,18 +855,10 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t
|
||||
--no-hls-split-discontinuity Do not split HLS playlists to different
|
||||
formats at discontinuities such as ad
|
||||
breaks (default)
|
||||
--youtube-include-dash-manifest Download the DASH manifests and related
|
||||
data on YouTube videos (default)
|
||||
(Alias: --no-youtube-skip-dash-manifest)
|
||||
--youtube-skip-dash-manifest Do not download the DASH manifests and
|
||||
related data on YouTube videos
|
||||
(Alias: --no-youtube-include-dash-manifest)
|
||||
--youtube-include-hls-manifest Download the HLS manifests and related data
|
||||
on YouTube videos (default)
|
||||
(Alias: --no-youtube-skip-hls-manifest)
|
||||
--youtube-skip-hls-manifest Do not download the HLS manifests and
|
||||
related data on YouTube videos
|
||||
(Alias: --no-youtube-include-hls-manifest)
|
||||
--extractor-args KEY:ARGS Pass these arguments to the extractor. See
|
||||
"EXTRACTOR ARGUMENTS" for details. You can
|
||||
use this option multiple times to give
|
||||
arguments for different extractors
|
||||
|
||||
# CONFIGURATION
|
||||
|
||||
@@ -892,10 +933,11 @@ The simplest usage of `-o` is not to set any template arguments when downloading
|
||||
It may however also contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [python string formatting operations](https://docs.python.org/2/library/stdtypes.html#string-formatting). For example, `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by formatting operations.
|
||||
|
||||
The field names themselves (the part inside the parenthesis) can also have some special formatting:
|
||||
1. **Object traversal**: The dictionaries and lists available in metadata can be traversed by using a `.` (dot) separator. You can also do python slicing using `:`. Eg: `%(tags.0)s`, `%(subtitles.en.-1.ext)`, `%(id.3:7:-1)s`. Note that the fields that become available using this method are not listed below. Use `-j` to see such fields
|
||||
1. **Object traversal**: The dictionaries and lists available in metadata can be traversed by using a `.` (dot) separator. You can also do python slicing using `:`. Eg: `%(tags.0)s`, `%(subtitles.en.-1.ext)s`, `%(id.3:7:-1)s`, `%(formats.:.format_id)s`. `%()s` refers to the entire infodict. Note that all the fields that become available using this method are not listed below. Use `-j` to see such fields
|
||||
1. **Addition**: Addition and subtraction of numeric fields can be done using `+` and `-` respectively. Eg: `%(playlist_index+10)03d`, `%(n_entries+1-playlist_index)d`
|
||||
1. **Date/time Formatting**: Date/time fields can be formatted according to [strftime formatting](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes) by specifying it separated from the field name using a `>`. Eg: `%(duration>%H-%M-%S)s`, `%(upload_date>%Y-%m-%d)s`, `%(epoch-3600>%H-%M-%S)s`
|
||||
1. **Default**: A default value can be specified for when the field is empty using a `|` seperator. This overrides `--output-na-template`. Eg: `%(uploader|Unknown)s`
|
||||
1. **More Conversions**: In addition to the normal format types `diouxXeEfFgGcrs`, `j`, `l`, `q` can be used for converting to **j**son, a comma seperated **l**ist and a string **q**uoted for the terminal respectively
|
||||
|
||||
To summarize, the general syntax for a field is:
|
||||
```
|
||||
@@ -932,6 +974,7 @@ The available fields are:
|
||||
- `average_rating` (numeric): Average rating give by users, the scale used depends on the webpage
|
||||
- `comment_count` (numeric): Number of comments on the video (For some extractors, comments are only downloaded at the end, and so this field cannot be used)
|
||||
- `age_limit` (numeric): Age restriction for the video (years)
|
||||
- `live_status` (string): One of 'is_live', 'was_live', 'is_upcoming', 'not_live'
|
||||
- `is_live` (boolean): Whether this video is a live stream or a fixed-length video
|
||||
- `was_live` (boolean): Whether this video was originally a live stream
|
||||
- `playable_in_embed` (string): Whether this video is allowed to play in embedded players on other sites
|
||||
@@ -1011,7 +1054,7 @@ Available only when used in `--print`:
|
||||
|
||||
Each aforementioned sequence when referenced in an output template will be replaced by the actual value corresponding to the sequence name. Note that some of the sequences are not guaranteed to be present since they depend on the metadata obtained by a particular extractor. Such sequences will be replaced with placeholder value provided with `--output-na-placeholder` (`NA` by default).
|
||||
|
||||
For example for `-o %(title)s-%(id)s.%(ext)s` and an mp4 video with title `yt-dlp test video` and id `BaW_jenozKcj`, this will result in a `yt-dlp test video-BaW_jenozKcj.mp4` file created in the current directory.
|
||||
For example for `-o %(title)s-%(id)s.%(ext)s` and an mp4 video with title `yt-dlp test video` and id `BaW_jenozKc`, this will result in a `yt-dlp test video-BaW_jenozKc.mp4` file created in the current directory.
|
||||
|
||||
For numeric sequences you can use numeric related formatting, for example, `%(view_count)05d` will result in a string with view count padded with zeros up to 5 characters, like in `00042`.
|
||||
|
||||
@@ -1095,7 +1138,7 @@ If you want to download multiple videos and they don't have the same formats ava
|
||||
|
||||
If you want to download several formats of the same video use a comma as a separator, e.g. `-f 22,17,18` will download all these three formats, of course if they are available. Or a more sophisticated example combined with the precedence feature: `-f 136/137/mp4/bestvideo,140/m4a/bestaudio`.
|
||||
|
||||
You can merge the video and audio of multiple formats into a single file using `-f <format1>+<format2>+...` (requires ffmpeg installed), for example `-f bestvideo+bestaudio` will download the best video-only format, the best audio-only format and mux them together with ffmpeg. If `--no-video-multistreams` is used, all formats with a video stream except the first one are ignored. Similarly, if `--no-audio-multistreams` is used, all formats with an audio stream except the first one are ignored. For example, `-f bestvideo+best+bestaudio` will download and merge all 3 given formats. The resulting file will have 2 video streams and 2 audio streams. But `-f bestvideo+best+bestaudio --no-video-multistreams` will download and merge only `bestvideo` and `bestaudio`. `best` is ignored since another format containing a video stream (`bestvideo`) has already been selected. The order of the formats is therefore important. `-f best+bestaudio --no-audio-multistreams` will download and merge both formats while `-f bestaudio+best --no-audio-multistreams` will ignore `best` and download only `bestaudio`.
|
||||
You can merge the video and audio of multiple formats into a single file using `-f <format1>+<format2>+...` (requires ffmpeg installed), for example `-f bestvideo+bestaudio` will download the best video-only format, the best audio-only format and mux them together with ffmpeg. Unless `--video-multistreams` is used, all formats with a video stream except the first one are ignored. Similarly, unless `--audio-multistreams` is used, all formats with an audio stream except the first one are ignored. For example, `-f bestvideo+best+bestaudio --video-multistreams --audio-multistreams` will download and merge all 3 given formats. The resulting file will have 2 video streams and 2 audio streams. But `-f bestvideo+best+bestaudio --no-video-multistreams` will download and merge only `bestvideo` and `bestaudio`. `best` is ignored since another format containing a video stream (`bestvideo`) has already been selected. The order of the formats is therefore important. `-f best+bestaudio --no-audio-multistreams` will download and merge both formats while `-f bestaudio+best --no-audio-multistreams` will ignore `best` and download only `bestaudio`.
|
||||
|
||||
## Filtering Formats
|
||||
|
||||
@@ -1140,7 +1183,7 @@ You can change the criteria for being considered the `best` by using `-S` (`--fo
|
||||
- `lang`: Language preference as given by the extractor
|
||||
- `quality`: The quality of the format as given by the extractor
|
||||
- `source`: Preference of the source as given by the extractor
|
||||
- `proto`: Protocol used for download (`https`/`ftps` > `http`/`ftp` > `m3u8_native` > `m3u8` > `http_dash_segments` > other > `mms`/`rtsp` > unknown > `f4f`/`f4m`)
|
||||
- `proto`: Protocol used for download (`https`/`ftps` > `http`/`ftp` > `m3u8_native`/`m3u8` > `http_dash_segments`> `websocket_frag` > other > `mms`/`rtsp` > unknown > `f4f`/`f4m`)
|
||||
- `vcodec`: Video Codec (`av01` > `vp9.2` > `vp9` > `h265` > `h264` > `vp8` > `h263` > `theora` > other > unknown)
|
||||
- `acodec`: Audio Codec (`opus` > `vorbis` > `aac` > `mp4a` > `mp3` > `ac3` > `dts` > other > unknown)
|
||||
- `codec`: Equivalent to `vcodec,acodec`
|
||||
@@ -1294,13 +1337,39 @@ $ yt-dlp -S '+res:480,codec,br'
|
||||
|
||||
# MODIFYING METADATA
|
||||
|
||||
The metadata obtained the the extractors can be modified by using `--parse-metadata FROM:TO`. The general syntax is to give the name of a field or a template (with similar syntax to [output template](#output-template)) to extract data from, and the format to interpret it as, separated by a colon `:`. Either a [python regular expression](https://docs.python.org/3/library/re.html#regular-expression-syntax) with named capture groups or a similar syntax to the [output template](#output-template) (only `%(field)s` formatting is supported) can be used for `TO`. The option can be used multiple times to parse and modify various fields.
|
||||
The metadata obtained the the extractors can be modified by using `--parse-metadata` and `--replace-in-metadata`
|
||||
|
||||
`--replace-in-metadata FIELDS REGEX REPLACE` is used to replace text in any metadata field using [python regular expression](https://docs.python.org/3/library/re.html#regular-expression-syntax). [Backreferences](https://docs.python.org/3/library/re.html?highlight=backreferences#re.sub) can be used in the replace string for advanced use.
|
||||
|
||||
The general syntax of `--parse-metadata FROM:TO` is to give the name of a field or a template (with same syntax as [output template](#output-template)) to extract data from, and the format to interpret it as, separated by a colon `:`. Either a [python regular expression](https://docs.python.org/3/library/re.html#regular-expression-syntax) with named capture groups or a similar syntax to the [output template](#output-template) (only `%(field)s` formatting is supported) can be used for `TO`. The option can be used multiple times to parse and modify various fields.
|
||||
|
||||
Note that any field created by this can be used in the [output template](#output-template) and will also affect the media file's metadata added when using `--add-metadata`.
|
||||
|
||||
This option also has a few special uses:
|
||||
* You can use this to change the metadata that is embedded in the media file. To do this, set the value of the corresponding field with a `meta_` prefix. For example, any value you set to `meta_description` field will be added to the `description` field in the file. You can use this to set a different "description" and "synopsis", for example
|
||||
* You can download an additional URL based on the metadata of the currently downloaded video. To do this, set the field `additional_urls` to the URL that you want to download. Eg: `--parse-metadata "description:(?P<additional_urls>https?://www\.vimeo\.com/\d+)` will download the first vimeo video found in the description
|
||||
* You can use this to change the metadata that is embedded in the media file. To do this, set the value of the corresponding field with a `meta_` prefix. For example, any value you set to `meta_description` field will be added to the `description` field in the file. For example, you can use this to set a different "description" and "synopsis"
|
||||
|
||||
For reference, these are the fields yt-dlp adds by default to the file metadata:
|
||||
|
||||
Metadata fields|From
|
||||
:---|:---
|
||||
`title`|`track` or `title`
|
||||
`date`|`upload_date`
|
||||
`description`, `synopsis`|`description`
|
||||
`purl`, `comment`|`webpage_url`
|
||||
`track`|`track_number`
|
||||
`artist`|`artist`, `creator`, `uploader` or `uploader_id`
|
||||
`genre`|`genre`
|
||||
`album`|`album`
|
||||
`album_artist`|`album_artist`
|
||||
`disc`|`disc_number`
|
||||
`show`|`series`
|
||||
`season_number`|`season_number`
|
||||
`episode_id`|`episode` or `episode_id`
|
||||
`episode_sort`|`episode_number`
|
||||
`language` of each stream|From the format's `language`
|
||||
**Note**: The file format may not support some of these fields
|
||||
|
||||
|
||||
## Modifying metadata examples
|
||||
|
||||
@@ -1319,8 +1388,35 @@ $ yt-dlp --parse-metadata '%(series)s S%(season_number)02dE%(episode_number)02d:
|
||||
# Set "comment" field in video metadata using description instead of webpage_url
|
||||
$ yt-dlp --parse-metadata 'description:(?s)(?P<meta_comment>.+)' --add-metadata
|
||||
|
||||
# Replace all spaces and "_" in title and uploader with a `-`
|
||||
$ yt-dlp --replace-in-metadata 'title,uploader' '[ _]' '-'
|
||||
|
||||
```
|
||||
|
||||
# EXTRACTOR ARGUMENTS
|
||||
|
||||
Some extractors accept additional arguments which can be passed using `--extractor-args KEY:ARGS`. `ARGS` is a `;` (semicolon) seperated string of `ARG=VAL1,VAL2`. Eg: `--extractor-args "youtube:player_client=android_agegate,web;include_live_dash" --extractor-args "funimation:version=uncut"`
|
||||
|
||||
The following extractors use this feature:
|
||||
* **youtube**
|
||||
* `skip`: `hls` or `dash` (or both) to skip download of the respective manifests
|
||||
* `player_client`: Clients to extract video data from. The main clients are `web`, `android`, `ios`, `mweb`. These also have `_music`, `_embedded`, `_agegate`, and `_creator` variants (Eg: `web_embedded`) (`mweb` has only `_agegate`). By default, `android,web` is used, but the agegate and creator variants are added as required for age-gated videos. Similarly the music variants are added for `music.youtube.com` urls. You can also use `all` to use all the clients
|
||||
* `player_skip`: `configs` - skip any requests for client configs and use defaults
|
||||
* `include_live_dash`: Include live dash formats (These formats don't download properly)
|
||||
* `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side).
|
||||
* `max_comments`: Maximum amount of comments to download (default all).
|
||||
* `max_comment_depth`: Maximum depth for nested comments. YouTube supports depths 1 or 2 (default).
|
||||
|
||||
* **funimation**
|
||||
* `language`: Languages to extract. Eg: `funimation:language=english,japanese`
|
||||
* `version`: The video version to extract - `uncut` or `simulcast`
|
||||
|
||||
* **vikiChannel**
|
||||
* `video_types`: Types of videos to download - one or more of `episodes`, `movies`, `clips`, `trailers`
|
||||
|
||||
NOTE: These options may be changed/removed in the future without concern for backward compatibility
|
||||
|
||||
|
||||
# PLUGINS
|
||||
|
||||
Plugins are loaded from `<root-dir>/ytdlp_plugins/<type>/__init__.py`. Currently only `extractor` plugins are supported. Support for `downloader` and `postprocessor` plugins may be added in the future. See [ytdlp_plugins](ytdlp_plugins) for example.
|
||||
@@ -1331,8 +1427,8 @@ Plugins are loaded from `<root-dir>/ytdlp_plugins/<type>/__init__.py`. Currently
|
||||
|
||||
These are all the deprecated options and the current alternative to achieve the same effect
|
||||
|
||||
#### Not recommended
|
||||
While these options still work, their use is not recommended since there are other alternatives to achieve the same
|
||||
#### Redundant options
|
||||
While these options are redundant, they are still expected to be used due to their ease of use
|
||||
|
||||
--get-description --print description
|
||||
--get-duration --print duration_string
|
||||
@@ -1342,8 +1438,15 @@ While these options still work, their use is not recommended since there are oth
|
||||
--get-thumbnail --print thumbnail
|
||||
-e, --get-title --print title
|
||||
-g, --get-url --print urls
|
||||
-j, --dump-json --print "%()j"
|
||||
|
||||
|
||||
#### Not recommended
|
||||
While these options still work, their use is not recommended since there are other alternatives to achieve the same
|
||||
|
||||
--all-formats -f all
|
||||
--all-subs --sub-langs all --write-subs
|
||||
--print-json -j --no-simulate
|
||||
--autonumber-size NUMBER Use string formatting. Eg: %(autonumber)03d
|
||||
--autonumber-start NUMBER Use internal field formatting like %(autonumber+NUMBER)s
|
||||
--metadata-from-title FORMAT --parse-metadata "%(title)s:FORMAT"
|
||||
@@ -1352,8 +1455,17 @@ While these options still work, their use is not recommended since there are oth
|
||||
--list-formats-old --compat-options list-formats (Alias: --no-list-formats-as-table)
|
||||
--list-formats-as-table --compat-options -list-formats [Default] (Alias: --no-list-formats-old)
|
||||
--sponskrub-args ARGS --ppa "sponskrub:ARGS"
|
||||
--test Used by developers for testing extractors. Not intended for the end user
|
||||
--youtube-print-sig-code Used for testing youtube signatures
|
||||
--youtube-skip-dash-manifest --extractor-args "youtube:skip=dash" (Alias: --no-youtube-include-dash-manifest)
|
||||
--youtube-skip-hls-manifest --extractor-args "youtube:skip=hls" (Alias: --no-youtube-include-hls-manifest)
|
||||
--youtube-include-dash-manifest Default (Alias: --no-youtube-skip-dash-manifest)
|
||||
--youtube-include-hls-manifest Default (Alias: --no-youtube-skip-hls-manifest)
|
||||
|
||||
|
||||
#### Developer options
|
||||
These options are not intended to be used by the end-user
|
||||
|
||||
--test Download only part of video for testing extractors
|
||||
--youtube-print-sig-code For testing youtube signatures
|
||||
|
||||
|
||||
#### Old aliases
|
||||
|
||||
Binary file not shown.
|
Before Width: | Height: | Size: 4.2 KiB |
BIN
devscripts/logo.ico
Normal file
BIN
devscripts/logo.ico
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 40 KiB |
@@ -1,17 +1,16 @@
|
||||
@setlocal
|
||||
@echo off
|
||||
cd /d %~dp0..
|
||||
|
||||
rem Keep this list in sync with the `offlinetest` target in Makefile
|
||||
set DOWNLOAD_TESTS="age_restriction^|download^|iqiyi_sdk_interpreter^|socks^|subtitles^|write_annotations^|youtube_lists^|youtube_signature^|post_hooks"
|
||||
|
||||
if "%YTDL_TEST_SET%" == "core" (
|
||||
set test_set="-I test_("%DOWNLOAD_TESTS%")\.py"
|
||||
set multiprocess_args=""
|
||||
) else if "%YTDL_TEST_SET%" == "download" (
|
||||
set test_set="-I test_(?!"%DOWNLOAD_TESTS%").+\.py"
|
||||
set multiprocess_args="--processes=4 --process-timeout=540"
|
||||
if ["%~1"]==[""] (
|
||||
set "test_set="
|
||||
) else if ["%~1"]==["core"] (
|
||||
set "test_set=-k "not download""
|
||||
) else if ["%~1"]==["download"] (
|
||||
set "test_set=-k download"
|
||||
) else (
|
||||
echo YTDL_TEST_SET is not set or invalid
|
||||
echo.Invalid test type "%~1". Use "core" ^| "download"
|
||||
exit /b 1
|
||||
)
|
||||
|
||||
nosetests test --verbose %test_set:"=% %multiprocess_args:"=%
|
||||
pytest %test_set%
|
||||
|
||||
@@ -1,22 +1,14 @@
|
||||
#!/bin/bash
|
||||
#!/bin/sh
|
||||
|
||||
# Keep this list in sync with the `offlinetest` target in Makefile
|
||||
DOWNLOAD_TESTS="age_restriction|download|iqiyi_sdk_interpreter|overwrites|socks|subtitles|write_annotations|youtube_lists|youtube_signature|post_hooks"
|
||||
if [ -z $1 ]; then
|
||||
test_set='test'
|
||||
elif [ $1 = 'core' ]; then
|
||||
test_set='not download'
|
||||
elif [ $1 = 'download' ]; then
|
||||
test_set='download'
|
||||
else
|
||||
echo 'Invalid test type "'$1'". Use "core" | "download"'
|
||||
exit 1
|
||||
fi
|
||||
|
||||
test_set=""
|
||||
multiprocess_args=""
|
||||
|
||||
case "$YTDL_TEST_SET" in
|
||||
core)
|
||||
test_set="-I test_($DOWNLOAD_TESTS)\.py"
|
||||
;;
|
||||
download)
|
||||
test_set="-I test_(?!$DOWNLOAD_TESTS).+\.py"
|
||||
multiprocess_args="--processes=4 --process-timeout=540"
|
||||
;;
|
||||
*)
|
||||
break
|
||||
;;
|
||||
esac
|
||||
|
||||
nosetests test --verbose $test_set $multiprocess_args
|
||||
python3 -m pytest -k "$test_set"
|
||||
|
||||
5
docs/Collaborators.md
Normal file
5
docs/Collaborators.md
Normal file
@@ -0,0 +1,5 @@
|
||||
---
|
||||
orphan: true
|
||||
---
|
||||
```{include} ../Collaborators.md
|
||||
```
|
||||
14
pyinst.py
14
pyinst.py
@@ -6,6 +6,7 @@ import sys
|
||||
# import os
|
||||
import platform
|
||||
|
||||
from PyInstaller.utils.hooks import collect_submodules
|
||||
from PyInstaller.utils.win32.versioninfo import (
|
||||
VarStruct, VarFileInfo, StringStruct, StringTable,
|
||||
StringFileInfo, FixedFileInfo, VSVersionInfo, SetVersion,
|
||||
@@ -66,16 +67,15 @@ VERSION_FILE = VSVersionInfo(
|
||||
]
|
||||
)
|
||||
|
||||
dependancies = ['Crypto', 'mutagen'] + collect_submodules('websockets')
|
||||
excluded_modules = ['test', 'ytdlp_plugins', 'youtube-dl', 'youtube-dlc']
|
||||
|
||||
PyInstaller.__main__.run([
|
||||
'--name=yt-dlp%s' % _x86,
|
||||
'--onefile',
|
||||
'--icon=devscripts/cloud.ico',
|
||||
'--exclude-module=youtube_dl',
|
||||
'--exclude-module=youtube_dlc',
|
||||
'--exclude-module=test',
|
||||
'--exclude-module=ytdlp_plugins',
|
||||
'--hidden-import=mutagen',
|
||||
'--hidden-import=Crypto',
|
||||
'--icon=devscripts/logo.ico',
|
||||
*[f'--exclude-module={module}' for module in excluded_modules],
|
||||
*[f'--hidden-import={module}' for module in dependancies],
|
||||
'--upx-exclude=vcruntime140.dll',
|
||||
'yt_dlp/__main__.py',
|
||||
])
|
||||
|
||||
4
pytest.ini
Normal file
4
pytest.ini
Normal file
@@ -0,0 +1,4 @@
|
||||
[pytest]
|
||||
addopts = -ra -v --strict-markers
|
||||
markers =
|
||||
download
|
||||
@@ -1,2 +1,3 @@
|
||||
mutagen
|
||||
pycryptodome
|
||||
websockets
|
||||
|
||||
2
setup.py
2
setup.py
@@ -19,7 +19,7 @@ LONG_DESCRIPTION = '\n\n'.join((
|
||||
'**PS**: Some links in this document will not work since this is a copy of the README.md from Github',
|
||||
open('README.md', 'r', encoding='utf-8').read()))
|
||||
|
||||
REQUIREMENTS = ['mutagen', 'pycryptodome']
|
||||
REQUIREMENTS = ['mutagen', 'pycryptodome', 'websockets']
|
||||
|
||||
if sys.argv[1:2] == ['py2exe']:
|
||||
raise NotImplementedError('py2exe is not currently supported; instead, use "pyinst.py" to build with pyinstaller')
|
||||
|
||||
@@ -95,6 +95,7 @@
|
||||
- **Bandcamp**
|
||||
- **Bandcamp:album**
|
||||
- **Bandcamp:weekly**
|
||||
- **BandcampMusic**
|
||||
- **bangumi.bilibili.com**: BiliBili番剧
|
||||
- **bbc**: BBC
|
||||
- **bbc.co.uk**: BBC iPlayer
|
||||
@@ -129,6 +130,7 @@
|
||||
- **BitChuteChannel**
|
||||
- **bitwave:replay**
|
||||
- **bitwave:stream**
|
||||
- **BlackboardCollaborate**
|
||||
- **BleacherReport**
|
||||
- **BleacherReportCMS**
|
||||
- **Bloomberg**
|
||||
@@ -225,8 +227,7 @@
|
||||
- **Culturebox**
|
||||
- **CultureUnplugged**
|
||||
- **curiositystream**
|
||||
- **curiositystream:collections**
|
||||
- **curiositystream:series**
|
||||
- **curiositystream:collection**
|
||||
- **CWTV**
|
||||
- **DagelijkseKost**: dagelijksekost.een.be
|
||||
- **DailyMail**
|
||||
@@ -260,6 +261,7 @@
|
||||
- **dlive:vod**
|
||||
- **DoodStream**
|
||||
- **Dotsub**
|
||||
- **Douyin**
|
||||
- **DouyuShow**
|
||||
- **DouyuTV**: 斗鱼
|
||||
- **DPlay**
|
||||
@@ -295,6 +297,7 @@
|
||||
- **Engadget**
|
||||
- **Eporner**
|
||||
- **EroProfile**
|
||||
- **EroProfile:album**
|
||||
- **Escapist**
|
||||
- **ESPN**
|
||||
- **ESPNArticle**
|
||||
@@ -307,6 +310,7 @@
|
||||
- **EyedoTV**
|
||||
- **facebook**
|
||||
- **FacebookPluginsVideo**
|
||||
- **fancode:live**
|
||||
- **fancode:vod**
|
||||
- **faz.net**
|
||||
- **fc2**
|
||||
@@ -344,6 +348,8 @@
|
||||
- **FrontendMastersLesson**
|
||||
- **FujiTVFODPlus7**
|
||||
- **Funimation**
|
||||
- **funimation:page**
|
||||
- **funimation:show**
|
||||
- **Funk**
|
||||
- **Fusion**
|
||||
- **Fux**
|
||||
@@ -497,8 +503,6 @@
|
||||
- **LinuxAcademy**
|
||||
- **LiTV**
|
||||
- **LiveJournal**
|
||||
- **LiveLeak**
|
||||
- **LiveLeakEmbed**
|
||||
- **livestream**
|
||||
- **livestream:original**
|
||||
- **LnkGo**
|
||||
@@ -551,6 +555,8 @@
|
||||
- **MinistryGrid**
|
||||
- **Minoto**
|
||||
- **miomio.tv**
|
||||
- **mirrativ**
|
||||
- **mirrativ:user**
|
||||
- **MiTele**: mitele.es
|
||||
- **mixcloud**
|
||||
- **mixcloud:playlist**
|
||||
@@ -702,6 +708,8 @@
|
||||
- **OnionStudios**
|
||||
- **Ooyala**
|
||||
- **OoyalaExternal**
|
||||
- **openrec**
|
||||
- **openrec:capture**
|
||||
- **OraTV**
|
||||
- **orf:burgenland**: Radio Burgenland
|
||||
- **orf:fm4**: radio FM4
|
||||
@@ -727,6 +735,8 @@
|
||||
- **PalcoMP3:video**
|
||||
- **pandora.tv**: 판도라TV
|
||||
- **ParamountNetwork**
|
||||
- **ParamountPlus**
|
||||
- **ParamountPlusSeries**
|
||||
- **parliamentlive.tv**: UK parliament videos
|
||||
- **Parlview**
|
||||
- **Patreon**
|
||||
@@ -769,6 +779,7 @@
|
||||
- **PopcornTV**
|
||||
- **PornCom**
|
||||
- **PornerBros**
|
||||
- **PornFlip**
|
||||
- **PornHd**
|
||||
- **PornHub**: PornHub and Thumbzilla
|
||||
- **PornHubPagedVideoList**
|
||||
@@ -811,6 +822,9 @@
|
||||
- **RCS**
|
||||
- **RCSEmbeds**
|
||||
- **RCSVarious**
|
||||
- **RCTIPlus**
|
||||
- **RCTIPlusSeries**
|
||||
- **RCTIPlusTV**
|
||||
- **RDS**: RDS.ca
|
||||
- **RedBull**
|
||||
- **RedBullEmbed**
|
||||
@@ -869,6 +883,7 @@
|
||||
- **savefrom.net**
|
||||
- **SBS**: sbs.com.au
|
||||
- **schooltv**
|
||||
- **ScienceChannel**
|
||||
- **screen.yahoo:search**: Yahoo screen search
|
||||
- **Screencast**
|
||||
- **ScreencastOMatic**
|
||||
@@ -1007,7 +1022,6 @@
|
||||
- **ThisAmericanLife**
|
||||
- **ThisAV**
|
||||
- **ThisOldHouse**
|
||||
- **ThisVid**
|
||||
- **TikTok**
|
||||
- **tinypic**: tinypic.com videos
|
||||
- **TMZ**
|
||||
@@ -1104,6 +1118,7 @@
|
||||
- **ustream:channel**
|
||||
- **ustudio**
|
||||
- **ustudio:embed**
|
||||
- **Utreon**
|
||||
- **Varzesh3**
|
||||
- **Vbox7**
|
||||
- **VeeHD**
|
||||
|
||||
@@ -22,6 +22,14 @@ from yt_dlp.utils import (
|
||||
)
|
||||
|
||||
|
||||
if "pytest" in sys.modules:
|
||||
import pytest
|
||||
is_download_test = pytest.mark.download
|
||||
else:
|
||||
def is_download_test(testClass):
|
||||
return testClass
|
||||
|
||||
|
||||
def get_params(override=None):
|
||||
PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)),
|
||||
"parameters.json")
|
||||
@@ -190,7 +198,10 @@ def expect_info_dict(self, got_dict, expected_dict):
|
||||
expect_dict(self, got_dict, expected_dict)
|
||||
# Check for the presence of mandatory fields
|
||||
if got_dict.get('_type') not in ('playlist', 'multi_video'):
|
||||
for key in ('id', 'url', 'title', 'ext'):
|
||||
mandatory_fields = ['id', 'title']
|
||||
if expected_dict.get('ext'):
|
||||
mandatory_fields.extend(('url', 'ext'))
|
||||
for key in mandatory_fields:
|
||||
self.assertTrue(got_dict.get(key), 'Missing mandatory field %s' % key)
|
||||
# Check for mandatory fields that are automatically set by YoutubeDL
|
||||
for key in ['webpage_url', 'extractor', 'extractor_key']:
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
{
|
||||
"check_formats": false,
|
||||
"consoletitle": false,
|
||||
"continuedl": true,
|
||||
"forcedescription": false,
|
||||
|
||||
@@ -35,13 +35,13 @@ class InfoExtractorTestRequestHandler(compat_http_server.BaseHTTPRequestHandler)
|
||||
assert False
|
||||
|
||||
|
||||
class TestIE(InfoExtractor):
|
||||
class DummyIE(InfoExtractor):
|
||||
pass
|
||||
|
||||
|
||||
class TestInfoExtractor(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.ie = TestIE(FakeYDL())
|
||||
self.ie = DummyIE(FakeYDL())
|
||||
|
||||
def test_ie_key(self):
|
||||
self.assertEqual(get_info_extractor(YoutubeIE.ie_key()), YoutubeIE)
|
||||
|
||||
@@ -10,14 +10,15 @@ import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
import copy
|
||||
import json
|
||||
|
||||
from test.helper import FakeYDL, assertRegexpMatches
|
||||
from yt_dlp import YoutubeDL
|
||||
from yt_dlp.compat import compat_str, compat_urllib_error
|
||||
from yt_dlp.compat import compat_os_name, compat_setenv, compat_str, compat_urllib_error
|
||||
from yt_dlp.extractor import YoutubeIE
|
||||
from yt_dlp.extractor.common import InfoExtractor
|
||||
from yt_dlp.postprocessor.common import PostProcessor
|
||||
from yt_dlp.utils import ExtractorError, int_or_none, match_filter_func
|
||||
from yt_dlp.utils import ExtractorError, int_or_none, match_filter_func, LazyList
|
||||
|
||||
TEST_URL = 'http://localhost/sample.mp4'
|
||||
|
||||
@@ -35,6 +36,9 @@ class YDL(FakeYDL):
|
||||
def to_screen(self, msg):
|
||||
self.msgs.append(msg)
|
||||
|
||||
def dl(self, *args, **kwargs):
|
||||
assert False, 'Downloader must not be invoked for test_YoutubeDL'
|
||||
|
||||
|
||||
def _make_result(formats, **kwargs):
|
||||
res = {
|
||||
@@ -117,35 +121,24 @@ class TestFormatSelection(unittest.TestCase):
|
||||
]
|
||||
info_dict = _make_result(formats)
|
||||
|
||||
ydl = YDL({'format': '20/47'})
|
||||
ydl.process_ie_result(info_dict.copy())
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], '47')
|
||||
def test(inp, *expected, multi=False):
|
||||
ydl = YDL({
|
||||
'format': inp,
|
||||
'allow_multiple_video_streams': multi,
|
||||
'allow_multiple_audio_streams': multi,
|
||||
})
|
||||
ydl.process_ie_result(info_dict.copy())
|
||||
downloaded = map(lambda x: x['format_id'], ydl.downloaded_info_dicts)
|
||||
self.assertEqual(list(downloaded), list(expected))
|
||||
|
||||
ydl = YDL({'format': '20/71/worst'})
|
||||
ydl.process_ie_result(info_dict.copy())
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], '35')
|
||||
|
||||
ydl = YDL()
|
||||
ydl.process_ie_result(info_dict.copy())
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], '2')
|
||||
|
||||
ydl = YDL({'format': 'webm/mp4'})
|
||||
ydl.process_ie_result(info_dict.copy())
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], '47')
|
||||
|
||||
ydl = YDL({'format': '3gp/40/mp4'})
|
||||
ydl.process_ie_result(info_dict.copy())
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], '35')
|
||||
|
||||
ydl = YDL({'format': 'example-with-dashes'})
|
||||
ydl.process_ie_result(info_dict.copy())
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], 'example-with-dashes')
|
||||
test('20/47', '47')
|
||||
test('20/71/worst', '35')
|
||||
test(None, '2')
|
||||
test('webm/mp4', '47')
|
||||
test('3gp/40/mp4', '35')
|
||||
test('example-with-dashes', 'example-with-dashes')
|
||||
test('all', '35', 'example-with-dashes', '45', '47', '2') # Order doesn't actually matter for this
|
||||
test('mergeall', '2+47+45+example-with-dashes+35', multi=True)
|
||||
|
||||
def test_format_selection_audio(self):
|
||||
formats = [
|
||||
@@ -461,14 +454,13 @@ class TestFormatSelection(unittest.TestCase):
|
||||
|
||||
def test_invalid_format_specs(self):
|
||||
def assert_syntax_error(format_spec):
|
||||
ydl = YDL({'format': format_spec})
|
||||
info_dict = _make_result([{'format_id': 'foo', 'url': TEST_URL}])
|
||||
self.assertRaises(SyntaxError, ydl.process_ie_result, info_dict)
|
||||
self.assertRaises(SyntaxError, YDL, {'format': format_spec})
|
||||
|
||||
assert_syntax_error('bestvideo,,best')
|
||||
assert_syntax_error('+bestaudio')
|
||||
assert_syntax_error('bestvideo+')
|
||||
assert_syntax_error('/')
|
||||
assert_syntax_error('[720<height]')
|
||||
|
||||
def test_format_filtering(self):
|
||||
formats = [
|
||||
@@ -656,6 +648,7 @@ class TestYoutubeDL(unittest.TestCase):
|
||||
'title1': '$PATH',
|
||||
'title2': '%PATH%',
|
||||
'title3': 'foo/bar\\test',
|
||||
'title4': 'foo "bar" test',
|
||||
'timestamp': 1618488000,
|
||||
'duration': 100000,
|
||||
'playlist_index': 1,
|
||||
@@ -665,28 +658,35 @@ class TestYoutubeDL(unittest.TestCase):
|
||||
}
|
||||
|
||||
def test_prepare_outtmpl_and_filename(self):
|
||||
def test(tmpl, expected, **params):
|
||||
def test(tmpl, expected, *, info=None, **params):
|
||||
params['outtmpl'] = tmpl
|
||||
ydl = YoutubeDL(params)
|
||||
ydl._num_downloads = 1
|
||||
self.assertEqual(ydl.validate_outtmpl(tmpl), None)
|
||||
|
||||
outtmpl, tmpl_dict = ydl.prepare_outtmpl(tmpl, self.outtmpl_info)
|
||||
out = outtmpl % tmpl_dict
|
||||
fname = ydl.prepare_filename(self.outtmpl_info)
|
||||
outtmpl, tmpl_dict = ydl.prepare_outtmpl(tmpl, info or self.outtmpl_info)
|
||||
out = ydl.escape_outtmpl(outtmpl) % tmpl_dict
|
||||
fname = ydl.prepare_filename(info or self.outtmpl_info)
|
||||
|
||||
if callable(expected):
|
||||
self.assertTrue(expected(out))
|
||||
self.assertTrue(expected(fname))
|
||||
elif isinstance(expected, compat_str):
|
||||
self.assertEqual((out, fname), (expected, expected))
|
||||
else:
|
||||
self.assertEqual((out, fname), expected)
|
||||
if not isinstance(expected, (list, tuple)):
|
||||
expected = (expected, expected)
|
||||
for (name, got), expect in zip((('outtmpl', out), ('filename', fname)), expected):
|
||||
if callable(expect):
|
||||
self.assertTrue(expect(got), f'Wrong {name} from {tmpl}')
|
||||
else:
|
||||
self.assertEqual(got, expect, f'Wrong {name} from {tmpl}')
|
||||
|
||||
# Side-effects
|
||||
original_infodict = dict(self.outtmpl_info)
|
||||
test('foo.bar', 'foo.bar')
|
||||
original_infodict['epoch'] = self.outtmpl_info.get('epoch')
|
||||
self.assertTrue(isinstance(original_infodict['epoch'], int))
|
||||
test('%(epoch)d', int_or_none)
|
||||
self.assertEqual(original_infodict, self.outtmpl_info)
|
||||
|
||||
# Auto-generated fields
|
||||
test('%(id)s.%(ext)s', '1234.mp4')
|
||||
test('%(duration_string)s', ('27:46:40', '27-46-40'))
|
||||
test('%(epoch)d', int_or_none)
|
||||
test('%(resolution)s', '1080p')
|
||||
test('%(playlist_index)s', '001')
|
||||
test('%(autonumber)s', '00001')
|
||||
@@ -694,20 +694,41 @@ class TestYoutubeDL(unittest.TestCase):
|
||||
test('%(autonumber)s', '001', autonumber_size=3)
|
||||
|
||||
# Escaping %
|
||||
test('%', '%')
|
||||
test('%%', '%')
|
||||
test('%%%%', '%%')
|
||||
test('%s', '%s')
|
||||
test('%%%s', '%%s')
|
||||
test('%d', '%d')
|
||||
test('%abc%', '%abc%')
|
||||
test('%%(width)06d.%(ext)s', '%(width)06d.mp4')
|
||||
test('%%%(height)s', '%1080')
|
||||
test('%(width)06d.%(ext)s', 'NA.mp4')
|
||||
test('%(width)06d.%%(ext)s', 'NA.%(ext)s')
|
||||
test('%%(width)06d.%(ext)s', '%(width)06d.mp4')
|
||||
|
||||
# ID sanitization
|
||||
test('%(id)s', '_abcd', info={'id': '_abcd'})
|
||||
test('%(some_id)s', '_abcd', info={'some_id': '_abcd'})
|
||||
test('%(formats.0.id)s', '_abcd', info={'formats': [{'id': '_abcd'}]})
|
||||
test('%(id)s', '-abcd', info={'id': '-abcd'})
|
||||
test('%(id)s', '.abcd', info={'id': '.abcd'})
|
||||
test('%(id)s', 'ab__cd', info={'id': 'ab__cd'})
|
||||
test('%(id)s', ('ab:cd', 'ab -cd'), info={'id': 'ab:cd'})
|
||||
|
||||
# Invalid templates
|
||||
self.assertTrue(isinstance(YoutubeDL.validate_outtmpl('%'), ValueError))
|
||||
self.assertTrue(isinstance(YoutubeDL.validate_outtmpl('%(title)'), ValueError))
|
||||
test('%(invalid@tmpl|def)s', 'none', outtmpl_na_placeholder='none')
|
||||
test('%()s', 'NA')
|
||||
test('%s', '%s')
|
||||
test('%d', '%d')
|
||||
test('%(..)s', 'NA')
|
||||
|
||||
# Entire info_dict
|
||||
def expect_same_infodict(out):
|
||||
got_dict = json.loads(out)
|
||||
for info_field, expected in self.outtmpl_info.items():
|
||||
self.assertEqual(got_dict.get(info_field), expected, info_field)
|
||||
return True
|
||||
|
||||
test('%()j', (expect_same_infodict, str))
|
||||
|
||||
# NA placeholder
|
||||
NA_TEST_OUTTMPL = '%(uploader_date)s-%(width)d-%(x|def)s-%(id)s.%(ext)s'
|
||||
@@ -738,13 +759,26 @@ class TestYoutubeDL(unittest.TestCase):
|
||||
test('%(width|0)04d', '0000')
|
||||
test('a%(width|)d', 'a', outtmpl_na_placeholder='none')
|
||||
|
||||
# Internal formatting
|
||||
FORMATS = self.outtmpl_info['formats']
|
||||
sanitize = lambda x: x.replace(':', ' -').replace('"', "'")
|
||||
|
||||
# Custom type casting
|
||||
test('%(formats.:.id)l', 'id1, id2, id3')
|
||||
test('%(ext)l', 'mp4')
|
||||
test('%(formats.:.id) 15l', ' id1, id2, id3')
|
||||
test('%(formats)j', (json.dumps(FORMATS), sanitize(json.dumps(FORMATS))))
|
||||
if compat_os_name == 'nt':
|
||||
test('%(title4)q', ('"foo \\"bar\\" test"', "'foo _'bar_' test'"))
|
||||
else:
|
||||
test('%(title4)q', ('\'foo "bar" test\'', "'foo 'bar' test'"))
|
||||
|
||||
# Internal formatting
|
||||
test('%(timestamp-1000>%H-%M-%S)s', '11-43-20')
|
||||
test('%(title|%)s %(title|%%)s', '% %%')
|
||||
test('%(id+1-height+3)05d', '00158')
|
||||
test('%(width+100)05d', 'NA')
|
||||
test('%(formats.0) 15s', ('% 15s' % FORMATS[0], '% 15s' % str(FORMATS[0]).replace(':', ' -')))
|
||||
test('%(formats.0)r', (repr(FORMATS[0]), repr(FORMATS[0]).replace(':', ' -')))
|
||||
test('%(formats.0) 15s', ('% 15s' % FORMATS[0], '% 15s' % sanitize(str(FORMATS[0]))))
|
||||
test('%(formats.0)r', (repr(FORMATS[0]), sanitize(repr(FORMATS[0]))))
|
||||
test('%(height.0)03d', '001')
|
||||
test('%(-height.0)04d', '-001')
|
||||
test('%(formats.-1.id)s', FORMATS[-1]['id'])
|
||||
@@ -754,11 +788,22 @@ class TestYoutubeDL(unittest.TestCase):
|
||||
test('%(formats.0.id.-1+id)f', '1235.000000')
|
||||
test('%(formats.0.id.-1+formats.1.id.-1)d', '3')
|
||||
|
||||
# Laziness
|
||||
def gen():
|
||||
yield from range(5)
|
||||
raise self.assertTrue(False, 'LazyList should not be evaluated till here')
|
||||
test('%(key.4)s', '4', info={'key': LazyList(gen())})
|
||||
|
||||
# Empty filename
|
||||
test('%(foo|)s-%(bar|)s.%(ext)s', '-.mp4')
|
||||
# test('%(foo|)s.%(ext)s', ('.mp4', '_.mp4')) # fixme
|
||||
# test('%(foo|)s', ('', '_')) # fixme
|
||||
|
||||
# Environment variable expansion for prepare_filename
|
||||
compat_setenv('__yt_dlp_var', 'expanded')
|
||||
envvar = '%__yt_dlp_var%' if compat_os_name == 'nt' else '$__yt_dlp_var'
|
||||
test(envvar, (envvar, 'expanded'))
|
||||
|
||||
# Path expansion and escaping
|
||||
test('Hello %(title1)s', 'Hello $PATH')
|
||||
test('Hello %(title2)s', 'Hello %PATH%')
|
||||
|
||||
@@ -7,8 +7,7 @@ import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from test.helper import try_rm
|
||||
|
||||
from test.helper import try_rm, is_download_test
|
||||
|
||||
from yt_dlp import YoutubeDL
|
||||
|
||||
@@ -32,6 +31,7 @@ def _download_restricted(url, filename, age):
|
||||
return res
|
||||
|
||||
|
||||
@is_download_test
|
||||
class TestAgeRestriction(unittest.TestCase):
|
||||
def _assert_restricted(self, url, filename, age, old_age=None):
|
||||
self.assertTrue(_download_restricted(url, filename, old_age))
|
||||
|
||||
96
test/test_cookies.py
Normal file
96
test/test_cookies.py
Normal file
@@ -0,0 +1,96 @@
|
||||
import unittest
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from yt_dlp import cookies
|
||||
from yt_dlp.cookies import (
|
||||
CRYPTO_AVAILABLE,
|
||||
LinuxChromeCookieDecryptor,
|
||||
MacChromeCookieDecryptor,
|
||||
WindowsChromeCookieDecryptor,
|
||||
YDLLogger,
|
||||
parse_safari_cookies,
|
||||
pbkdf2_sha1,
|
||||
)
|
||||
|
||||
|
||||
class MonkeyPatch:
|
||||
def __init__(self, module, temporary_values):
|
||||
self._module = module
|
||||
self._temporary_values = temporary_values
|
||||
self._backup_values = {}
|
||||
|
||||
def __enter__(self):
|
||||
for name, temp_value in self._temporary_values.items():
|
||||
self._backup_values[name] = getattr(self._module, name)
|
||||
setattr(self._module, name, temp_value)
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
for name, backup_value in self._backup_values.items():
|
||||
setattr(self._module, name, backup_value)
|
||||
|
||||
|
||||
class TestCookies(unittest.TestCase):
|
||||
def test_chrome_cookie_decryptor_linux_derive_key(self):
|
||||
key = LinuxChromeCookieDecryptor.derive_key(b'abc')
|
||||
self.assertEqual(key, b'7\xa1\xec\xd4m\xfcA\xc7\xb19Z\xd0\x19\xdcM\x17')
|
||||
|
||||
def test_chrome_cookie_decryptor_mac_derive_key(self):
|
||||
key = MacChromeCookieDecryptor.derive_key(b'abc')
|
||||
self.assertEqual(key, b'Y\xe2\xc0\xd0P\xf6\xf4\xe1l\xc1\x8cQ\xcb|\xcdY')
|
||||
|
||||
def test_chrome_cookie_decryptor_linux_v10(self):
|
||||
with MonkeyPatch(cookies, {'_get_linux_keyring_password': lambda *args, **kwargs: b''}):
|
||||
encrypted_value = b'v10\xccW%\xcd\xe6\xe6\x9fM" \xa7\xb0\xca\xe4\x07\xd6'
|
||||
value = 'USD'
|
||||
decryptor = LinuxChromeCookieDecryptor('Chrome', YDLLogger())
|
||||
self.assertEqual(decryptor.decrypt(encrypted_value), value)
|
||||
|
||||
def test_chrome_cookie_decryptor_linux_v11(self):
|
||||
with MonkeyPatch(cookies, {'_get_linux_keyring_password': lambda *args, **kwargs: b'',
|
||||
'KEYRING_AVAILABLE': True}):
|
||||
encrypted_value = b'v11#\x81\x10>`w\x8f)\xc0\xb2\xc1\r\xf4\x1al\xdd\x93\xfd\xf8\xf8N\xf2\xa9\x83\xf1\xe9o\x0elVQd'
|
||||
value = 'tz=Europe.London'
|
||||
decryptor = LinuxChromeCookieDecryptor('Chrome', YDLLogger())
|
||||
self.assertEqual(decryptor.decrypt(encrypted_value), value)
|
||||
|
||||
@unittest.skipIf(not CRYPTO_AVAILABLE, 'cryptography library not available')
|
||||
def test_chrome_cookie_decryptor_windows_v10(self):
|
||||
with MonkeyPatch(cookies, {
|
||||
'_get_windows_v10_key': lambda *args, **kwargs: b'Y\xef\xad\xad\xeerp\xf0Y\xe6\x9b\x12\xc2<z\x16]\n\xbb\xb8\xcb\xd7\x9bA\xc3\x14e\x99{\xd6\xf4&'
|
||||
}):
|
||||
encrypted_value = b'v10T\xb8\xf3\xb8\x01\xa7TtcV\xfc\x88\xb8\xb8\xef\x05\xb5\xfd\x18\xc90\x009\xab\xb1\x893\x85)\x87\xe1\xa9-\xa3\xad='
|
||||
value = '32101439'
|
||||
decryptor = WindowsChromeCookieDecryptor('', YDLLogger())
|
||||
self.assertEqual(decryptor.decrypt(encrypted_value), value)
|
||||
|
||||
def test_chrome_cookie_decryptor_mac_v10(self):
|
||||
with MonkeyPatch(cookies, {'_get_mac_keyring_password': lambda *args, **kwargs: b'6eIDUdtKAacvlHwBVwvg/Q=='}):
|
||||
encrypted_value = b'v10\xb3\xbe\xad\xa1[\x9fC\xa1\x98\xe0\x9a\x01\xd9\xcf\xbfc'
|
||||
value = '2021-06-01-22'
|
||||
decryptor = MacChromeCookieDecryptor('', YDLLogger())
|
||||
self.assertEqual(decryptor.decrypt(encrypted_value), value)
|
||||
|
||||
def test_safari_cookie_parsing(self):
|
||||
cookies = \
|
||||
b'cook\x00\x00\x00\x01\x00\x00\x00i\x00\x00\x01\x00\x01\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00Y' \
|
||||
b'\x00\x00\x00\x00\x00\x00\x00 \x00\x00\x00\x00\x00\x00\x008\x00\x00\x00B\x00\x00\x00F\x00\x00\x00H' \
|
||||
b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x80\x03\xa5>\xc3A\x00\x00\x80\xc3\x07:\xc3A' \
|
||||
b'localhost\x00foo\x00/\x00test%20%3Bcookie\x00\x00\x00\x054\x07\x17 \x05\x00\x00\x00Kbplist00\xd1\x01' \
|
||||
b'\x02_\x10\x18NSHTTPCookieAcceptPolicy\x10\x02\x08\x0b&\x00\x00\x00\x00\x00\x00\x01\x01\x00\x00\x00' \
|
||||
b'\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00('
|
||||
|
||||
jar = parse_safari_cookies(cookies)
|
||||
self.assertEqual(len(jar), 1)
|
||||
cookie = list(jar)[0]
|
||||
self.assertEqual(cookie.domain, 'localhost')
|
||||
self.assertEqual(cookie.port, None)
|
||||
self.assertEqual(cookie.path, '/')
|
||||
self.assertEqual(cookie.name, 'foo')
|
||||
self.assertEqual(cookie.value, 'test%20%3Bcookie')
|
||||
self.assertFalse(cookie.secure)
|
||||
expected_expiration = datetime(2021, 6, 18, 21, 39, 19, tzinfo=timezone.utc)
|
||||
self.assertEqual(cookie.expires, int(expected_expiration.timestamp()))
|
||||
|
||||
def test_pbkdf2_sha1(self):
|
||||
key = pbkdf2_sha1(b'peanuts', b' ' * 16, 1, 16)
|
||||
self.assertEqual(key, b'g\xe1\x8e\x0fQ\x1c\x9b\xf3\xc9`!\xaa\x90\xd9\xd34')
|
||||
@@ -10,12 +10,13 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from test.helper import (
|
||||
assertGreaterEqual,
|
||||
expect_info_dict,
|
||||
expect_warnings,
|
||||
get_params,
|
||||
gettestcases,
|
||||
expect_info_dict,
|
||||
try_rm,
|
||||
is_download_test,
|
||||
report_warning,
|
||||
try_rm,
|
||||
)
|
||||
|
||||
|
||||
@@ -64,6 +65,7 @@ def _file_md5(fn):
|
||||
defs = gettestcases()
|
||||
|
||||
|
||||
@is_download_test
|
||||
class TestDownload(unittest.TestCase):
|
||||
# Parallel testing in nosetests. See
|
||||
# http://nose.readthedocs.org/en/latest/doc_tests/test_multiprocess/multiprocess.html
|
||||
@@ -71,6 +73,8 @@ class TestDownload(unittest.TestCase):
|
||||
|
||||
maxDiff = None
|
||||
|
||||
COMPLETED_TESTS = {}
|
||||
|
||||
def __str__(self):
|
||||
"""Identify each test with the `add_ie` attribute, if available."""
|
||||
|
||||
@@ -92,6 +96,9 @@ class TestDownload(unittest.TestCase):
|
||||
def generator(test_case, tname):
|
||||
|
||||
def test_template(self):
|
||||
if self.COMPLETED_TESTS.get(tname):
|
||||
return
|
||||
self.COMPLETED_TESTS[tname] = True
|
||||
ie = yt_dlp.extractor.get_info_extractor(test_case['name'])()
|
||||
other_ies = [get_info_extractor(ie_key)() for ie_key in test_case.get('add_ie', [])]
|
||||
is_playlist = any(k.startswith('playlist') for k in test_case)
|
||||
@@ -106,8 +113,13 @@ def generator(test_case, tname):
|
||||
|
||||
for tc in test_cases:
|
||||
info_dict = tc.get('info_dict', {})
|
||||
if not (info_dict.get('id') and info_dict.get('ext')):
|
||||
raise Exception('Test definition incorrect. The output file cannot be known. Are both \'id\' and \'ext\' keys present?')
|
||||
params = tc.get('params', {})
|
||||
if not info_dict.get('id'):
|
||||
raise Exception('Test definition incorrect. \'id\' key is not present')
|
||||
elif not info_dict.get('ext'):
|
||||
if params.get('skip_download') and params.get('ignore_no_formats_error'):
|
||||
continue
|
||||
raise Exception('Test definition incorrect. The output file cannot be known. \'ext\' key is not present')
|
||||
|
||||
if 'skip' in test_case:
|
||||
print_skipping(test_case['skip'])
|
||||
@@ -135,7 +147,7 @@ def generator(test_case, tname):
|
||||
expect_warnings(ydl, test_case.get('expected_warnings', []))
|
||||
|
||||
def get_tc_filename(tc):
|
||||
return ydl.prepare_filename(tc.get('info_dict', {}))
|
||||
return ydl.prepare_filename(dict(tc.get('info_dict', {})))
|
||||
|
||||
res_dict = None
|
||||
|
||||
@@ -248,12 +260,12 @@ def generator(test_case, tname):
|
||||
|
||||
|
||||
# And add them to TestDownload
|
||||
for n, test_case in enumerate(defs):
|
||||
tname = 'test_' + str(test_case['name'])
|
||||
i = 1
|
||||
while hasattr(TestDownload, tname):
|
||||
tname = 'test_%s_%d' % (test_case['name'], i)
|
||||
i += 1
|
||||
tests_counter = {}
|
||||
for test_case in defs:
|
||||
name = test_case['name']
|
||||
i = tests_counter.get(name, 0)
|
||||
tests_counter[name] = i + 1
|
||||
tname = f'test_{name}_{i}' if i else f'test_{name}'
|
||||
test_method = generator(test_case, tname)
|
||||
test_method.__name__ = str(tname)
|
||||
ie_list = test_case.get('add_ie')
|
||||
@@ -262,5 +274,22 @@ for n, test_case in enumerate(defs):
|
||||
del test_method
|
||||
|
||||
|
||||
def batch_generator(name, num_tests):
|
||||
|
||||
def test_template(self):
|
||||
for i in range(num_tests):
|
||||
getattr(self, f'test_{name}_{i}' if i else f'test_{name}')()
|
||||
|
||||
return test_template
|
||||
|
||||
|
||||
for name, num_tests in tests_counter.items():
|
||||
test_method = batch_generator(name, num_tests)
|
||||
test_method.__name__ = f'test_{name}_all'
|
||||
test_method.add_ie = ''
|
||||
setattr(TestDownload, test_method.__name__, test_method)
|
||||
del test_method
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
||||
@@ -8,7 +8,7 @@ import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from test.helper import FakeYDL
|
||||
from test.helper import FakeYDL, is_download_test
|
||||
from yt_dlp.extractor import IqiyiIE
|
||||
|
||||
|
||||
@@ -31,6 +31,7 @@ class WarningLogger(object):
|
||||
pass
|
||||
|
||||
|
||||
@is_download_test
|
||||
class TestIqiyiSDKInterpreter(unittest.TestCase):
|
||||
def test_iqiyi_sdk_interpreter(self):
|
||||
'''
|
||||
|
||||
@@ -8,13 +8,14 @@ import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from test.helper import try_rm
|
||||
from test.helper import is_download_test, try_rm
|
||||
|
||||
|
||||
root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
download_file = join(root_dir, 'test.webm')
|
||||
|
||||
|
||||
@is_download_test
|
||||
class TestOverwrites(unittest.TestCase):
|
||||
def setUp(self):
|
||||
# create an empty file
|
||||
|
||||
@@ -7,7 +7,7 @@ import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from test.helper import get_params, try_rm
|
||||
from test.helper import get_params, try_rm, is_download_test
|
||||
import yt_dlp.YoutubeDL
|
||||
from yt_dlp.utils import DownloadError
|
||||
|
||||
@@ -22,6 +22,7 @@ TEST_ID = 'gr51aVj-mLg'
|
||||
EXPECTED_NAME = 'gr51aVj-mLg'
|
||||
|
||||
|
||||
@is_download_test
|
||||
class TestPostHooks(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.stored_name_1 = None
|
||||
|
||||
@@ -11,32 +11,31 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
from yt_dlp import YoutubeDL
|
||||
from yt_dlp.compat import compat_shlex_quote
|
||||
from yt_dlp.postprocessor import (
|
||||
ExecAfterDownloadPP,
|
||||
ExecPP,
|
||||
FFmpegThumbnailsConvertorPP,
|
||||
MetadataFromFieldPP,
|
||||
MetadataFromTitlePP,
|
||||
MetadataParserPP,
|
||||
)
|
||||
|
||||
|
||||
class TestMetadataFromField(unittest.TestCase):
|
||||
|
||||
def test_format_to_regex(self):
|
||||
pp = MetadataFromFieldPP(None, ['title:%(title)s - %(artist)s'])
|
||||
self.assertEqual(pp._data[0]['regex'], r'(?P<title>.+)\ \-\ (?P<artist>.+)')
|
||||
self.assertEqual(
|
||||
MetadataParserPP.format_to_regex('%(title)s - %(artist)s'),
|
||||
r'(?P<title>.+)\ \-\ (?P<artist>.+)')
|
||||
self.assertEqual(MetadataParserPP.format_to_regex(r'(?P<x>.+)'), r'(?P<x>.+)')
|
||||
|
||||
def test_field_to_outtmpl(self):
|
||||
pp = MetadataFromFieldPP(None, ['title:%(title)s : %(artist)s'])
|
||||
self.assertEqual(pp._data[0]['tmpl'], '%(title)s')
|
||||
def test_field_to_template(self):
|
||||
self.assertEqual(MetadataParserPP.field_to_template('title'), '%(title)s')
|
||||
self.assertEqual(MetadataParserPP.field_to_template('1'), '1')
|
||||
self.assertEqual(MetadataParserPP.field_to_template('foo bar'), 'foo bar')
|
||||
self.assertEqual(MetadataParserPP.field_to_template(' literal'), ' literal')
|
||||
|
||||
def test_in_out_seperation(self):
|
||||
pp = MetadataFromFieldPP(None, ['%(title)s \\: %(artist)s:%(title)s : %(artist)s'])
|
||||
self.assertEqual(pp._data[0]['in'], '%(title)s : %(artist)s')
|
||||
self.assertEqual(pp._data[0]['out'], '%(title)s : %(artist)s')
|
||||
|
||||
|
||||
class TestMetadataFromTitle(unittest.TestCase):
|
||||
def test_format_to_regex(self):
|
||||
pp = MetadataFromTitlePP(None, '%(title)s - %(artist)s')
|
||||
self.assertEqual(pp._titleregex, r'(?P<title>.+)\ \-\ (?P<artist>.+)')
|
||||
def test_metadatafromfield(self):
|
||||
self.assertEqual(
|
||||
MetadataFromFieldPP.to_action('%(title)s \\: %(artist)s:%(title)s : %(artist)s'),
|
||||
(MetadataParserPP.Actions.INTERPRET, '%(title)s : %(artist)s', '%(title)s : %(artist)s'))
|
||||
|
||||
|
||||
class TestConvertThumbnail(unittest.TestCase):
|
||||
@@ -60,12 +59,12 @@ class TestConvertThumbnail(unittest.TestCase):
|
||||
os.remove(file.format(out))
|
||||
|
||||
|
||||
class TestExecAfterDownload(unittest.TestCase):
|
||||
class TestExec(unittest.TestCase):
|
||||
def test_parse_cmd(self):
|
||||
pp = ExecAfterDownloadPP(YoutubeDL(), '')
|
||||
pp = ExecPP(YoutubeDL(), '')
|
||||
info = {'filepath': 'file name'}
|
||||
quoted_filepath = compat_shlex_quote(info['filepath'])
|
||||
cmd = 'echo %s' % compat_shlex_quote(info['filepath'])
|
||||
|
||||
self.assertEqual(pp.parse_cmd('echo', info), 'echo %s' % quoted_filepath)
|
||||
self.assertEqual(pp.parse_cmd('echo.{}', info), 'echo.%s' % quoted_filepath)
|
||||
self.assertEqual(pp.parse_cmd('echo "%(filepath)s"', info), 'echo "%s"' % info['filepath'])
|
||||
self.assertEqual(pp.parse_cmd('echo', info), cmd)
|
||||
self.assertEqual(pp.parse_cmd('echo {}', info), cmd)
|
||||
self.assertEqual(pp.parse_cmd('echo %(filepath)q', info), cmd)
|
||||
|
||||
@@ -14,6 +14,7 @@ import subprocess
|
||||
from test.helper import (
|
||||
FakeYDL,
|
||||
get_params,
|
||||
is_download_test,
|
||||
)
|
||||
from yt_dlp.compat import (
|
||||
compat_str,
|
||||
@@ -21,6 +22,7 @@ from yt_dlp.compat import (
|
||||
)
|
||||
|
||||
|
||||
@is_download_test
|
||||
class TestMultipleSocks(unittest.TestCase):
|
||||
@staticmethod
|
||||
def _check_params(attrs):
|
||||
@@ -76,6 +78,7 @@ class TestMultipleSocks(unittest.TestCase):
|
||||
params['secondary_server_ip'])
|
||||
|
||||
|
||||
@is_download_test
|
||||
class TestSocks(unittest.TestCase):
|
||||
_SKIP_SOCKS_TEST = True
|
||||
|
||||
|
||||
@@ -7,7 +7,7 @@ import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from test.helper import FakeYDL, md5
|
||||
from test.helper import FakeYDL, md5, is_download_test
|
||||
|
||||
|
||||
from yt_dlp.extractor import (
|
||||
@@ -30,6 +30,7 @@ from yt_dlp.extractor import (
|
||||
)
|
||||
|
||||
|
||||
@is_download_test
|
||||
class BaseTestSubtitles(unittest.TestCase):
|
||||
url = None
|
||||
IE = None
|
||||
@@ -55,6 +56,7 @@ class BaseTestSubtitles(unittest.TestCase):
|
||||
return dict((l, sub_info['data']) for l, sub_info in subtitles.items())
|
||||
|
||||
|
||||
@is_download_test
|
||||
class TestYoutubeSubtitles(BaseTestSubtitles):
|
||||
url = 'QRS8MkLhQmM'
|
||||
IE = YoutubeIE
|
||||
@@ -111,6 +113,7 @@ class TestYoutubeSubtitles(BaseTestSubtitles):
|
||||
self.assertFalse(subtitles)
|
||||
|
||||
|
||||
@is_download_test
|
||||
class TestDailymotionSubtitles(BaseTestSubtitles):
|
||||
url = 'http://www.dailymotion.com/video/xczg00'
|
||||
IE = DailymotionIE
|
||||
@@ -134,6 +137,7 @@ class TestDailymotionSubtitles(BaseTestSubtitles):
|
||||
self.assertFalse(subtitles)
|
||||
|
||||
|
||||
@is_download_test
|
||||
class TestTedSubtitles(BaseTestSubtitles):
|
||||
url = 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html'
|
||||
IE = TEDIE
|
||||
@@ -149,6 +153,7 @@ class TestTedSubtitles(BaseTestSubtitles):
|
||||
self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
|
||||
|
||||
|
||||
@is_download_test
|
||||
class TestVimeoSubtitles(BaseTestSubtitles):
|
||||
url = 'http://vimeo.com/76979871'
|
||||
IE = VimeoIE
|
||||
@@ -170,6 +175,7 @@ class TestVimeoSubtitles(BaseTestSubtitles):
|
||||
self.assertFalse(subtitles)
|
||||
|
||||
|
||||
@is_download_test
|
||||
class TestWallaSubtitles(BaseTestSubtitles):
|
||||
url = 'http://vod.walla.co.il/movie/2705958/the-yes-men'
|
||||
IE = WallaIE
|
||||
@@ -191,6 +197,7 @@ class TestWallaSubtitles(BaseTestSubtitles):
|
||||
self.assertFalse(subtitles)
|
||||
|
||||
|
||||
@is_download_test
|
||||
class TestCeskaTelevizeSubtitles(BaseTestSubtitles):
|
||||
url = 'http://www.ceskatelevize.cz/ivysilani/10600540290-u6-uzasny-svet-techniky'
|
||||
IE = CeskaTelevizeIE
|
||||
@@ -212,6 +219,7 @@ class TestCeskaTelevizeSubtitles(BaseTestSubtitles):
|
||||
self.assertFalse(subtitles)
|
||||
|
||||
|
||||
@is_download_test
|
||||
class TestLyndaSubtitles(BaseTestSubtitles):
|
||||
url = 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html'
|
||||
IE = LyndaIE
|
||||
@@ -224,6 +232,7 @@ class TestLyndaSubtitles(BaseTestSubtitles):
|
||||
self.assertEqual(md5(subtitles['en']), '09bbe67222259bed60deaa26997d73a7')
|
||||
|
||||
|
||||
@is_download_test
|
||||
class TestNPOSubtitles(BaseTestSubtitles):
|
||||
url = 'http://www.npo.nl/nos-journaal/28-08-2014/POW_00722860'
|
||||
IE = NPOIE
|
||||
@@ -236,6 +245,7 @@ class TestNPOSubtitles(BaseTestSubtitles):
|
||||
self.assertEqual(md5(subtitles['nl']), 'fc6435027572b63fb4ab143abd5ad3f4')
|
||||
|
||||
|
||||
@is_download_test
|
||||
class TestMTVSubtitles(BaseTestSubtitles):
|
||||
url = 'http://www.cc.com/video-clips/p63lk0/adam-devine-s-house-party-chasing-white-swans'
|
||||
IE = ComedyCentralIE
|
||||
@@ -251,6 +261,7 @@ class TestMTVSubtitles(BaseTestSubtitles):
|
||||
self.assertEqual(md5(subtitles['en']), '78206b8d8a0cfa9da64dc026eea48961')
|
||||
|
||||
|
||||
@is_download_test
|
||||
class TestNRKSubtitles(BaseTestSubtitles):
|
||||
url = 'http://tv.nrk.no/serie/ikke-gjoer-dette-hjemme/DMPV73000411/sesong-2/episode-1'
|
||||
IE = NRKTVIE
|
||||
@@ -263,6 +274,7 @@ class TestNRKSubtitles(BaseTestSubtitles):
|
||||
self.assertEqual(md5(subtitles['no']), '544fa917d3197fcbee64634559221cc2')
|
||||
|
||||
|
||||
@is_download_test
|
||||
class TestRaiPlaySubtitles(BaseTestSubtitles):
|
||||
IE = RaiPlayIE
|
||||
|
||||
@@ -283,6 +295,7 @@ class TestRaiPlaySubtitles(BaseTestSubtitles):
|
||||
self.assertEqual(md5(subtitles['it']), '4b3264186fbb103508abe5311cfcb9cd')
|
||||
|
||||
|
||||
@is_download_test
|
||||
class TestVikiSubtitles(BaseTestSubtitles):
|
||||
url = 'http://www.viki.com/videos/1060846v-punch-episode-18'
|
||||
IE = VikiIE
|
||||
@@ -295,6 +308,7 @@ class TestVikiSubtitles(BaseTestSubtitles):
|
||||
self.assertEqual(md5(subtitles['en']), '53cb083a5914b2d84ef1ab67b880d18a')
|
||||
|
||||
|
||||
@is_download_test
|
||||
class TestThePlatformSubtitles(BaseTestSubtitles):
|
||||
# from http://www.3playmedia.com/services-features/tools/integrations/theplatform/
|
||||
# (see http://theplatform.com/about/partners/type/subtitles-closed-captioning/)
|
||||
@@ -309,6 +323,7 @@ class TestThePlatformSubtitles(BaseTestSubtitles):
|
||||
self.assertEqual(md5(subtitles['en']), '97e7670cbae3c4d26ae8bcc7fdd78d4b')
|
||||
|
||||
|
||||
@is_download_test
|
||||
class TestThePlatformFeedSubtitles(BaseTestSubtitles):
|
||||
url = 'http://feed.theplatform.com/f/7wvmTC/msnbc_video-p-test?form=json&pretty=true&range=-40&byGuid=n_hardball_5biden_140207'
|
||||
IE = ThePlatformFeedIE
|
||||
@@ -321,6 +336,7 @@ class TestThePlatformFeedSubtitles(BaseTestSubtitles):
|
||||
self.assertEqual(md5(subtitles['en']), '48649a22e82b2da21c9a67a395eedade')
|
||||
|
||||
|
||||
@is_download_test
|
||||
class TestRtveSubtitles(BaseTestSubtitles):
|
||||
url = 'http://www.rtve.es/alacarta/videos/los-misterios-de-laura/misterios-laura-capitulo-32-misterio-del-numero-17-2-parte/2428621/'
|
||||
IE = RTVEALaCartaIE
|
||||
@@ -335,6 +351,7 @@ class TestRtveSubtitles(BaseTestSubtitles):
|
||||
self.assertEqual(md5(subtitles['es']), '69e70cae2d40574fb7316f31d6eb7fca')
|
||||
|
||||
|
||||
@is_download_test
|
||||
class TestDemocracynowSubtitles(BaseTestSubtitles):
|
||||
url = 'http://www.democracynow.org/shows/2015/7/3'
|
||||
IE = DemocracynowIE
|
||||
|
||||
@@ -12,6 +12,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
# Various small unit tests
|
||||
import io
|
||||
import itertools
|
||||
import json
|
||||
import xml.etree.ElementTree
|
||||
|
||||
@@ -108,6 +109,7 @@ from yt_dlp.utils import (
|
||||
cli_bool_option,
|
||||
parse_codecs,
|
||||
iri_to_uri,
|
||||
LazyList,
|
||||
)
|
||||
from yt_dlp.compat import (
|
||||
compat_chr,
|
||||
@@ -1052,6 +1054,9 @@ class TestUtil(unittest.TestCase):
|
||||
on = js_to_json('{ "040": "040" }')
|
||||
self.assertEqual(json.loads(on), {'040': '040'})
|
||||
|
||||
on = js_to_json('[1,//{},\n2]')
|
||||
self.assertEqual(json.loads(on), [1, 2])
|
||||
|
||||
def test_js_to_json_malformed(self):
|
||||
self.assertEqual(js_to_json('42a1'), '42"a1"')
|
||||
self.assertEqual(js_to_json('42a-1'), '42"a"-1')
|
||||
@@ -1202,35 +1207,12 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4')
|
||||
'9999 51')
|
||||
|
||||
def test_match_str(self):
|
||||
self.assertRaises(ValueError, match_str, 'xy>foobar', {})
|
||||
# Unary
|
||||
self.assertFalse(match_str('xy', {'x': 1200}))
|
||||
self.assertTrue(match_str('!xy', {'x': 1200}))
|
||||
self.assertTrue(match_str('x', {'x': 1200}))
|
||||
self.assertFalse(match_str('!x', {'x': 1200}))
|
||||
self.assertTrue(match_str('x', {'x': 0}))
|
||||
self.assertFalse(match_str('x>0', {'x': 0}))
|
||||
self.assertFalse(match_str('x>0', {}))
|
||||
self.assertTrue(match_str('x>?0', {}))
|
||||
self.assertTrue(match_str('x>1K', {'x': 1200}))
|
||||
self.assertFalse(match_str('x>2K', {'x': 1200}))
|
||||
self.assertTrue(match_str('x>=1200 & x < 1300', {'x': 1200}))
|
||||
self.assertFalse(match_str('x>=1100 & x < 1200', {'x': 1200}))
|
||||
self.assertFalse(match_str('y=a212', {'y': 'foobar42'}))
|
||||
self.assertTrue(match_str('y=foobar42', {'y': 'foobar42'}))
|
||||
self.assertFalse(match_str('y!=foobar42', {'y': 'foobar42'}))
|
||||
self.assertTrue(match_str('y!=foobar2', {'y': 'foobar42'}))
|
||||
self.assertFalse(match_str(
|
||||
'like_count > 100 & dislike_count <? 50 & description',
|
||||
{'like_count': 90, 'description': 'foo'}))
|
||||
self.assertTrue(match_str(
|
||||
'like_count > 100 & dislike_count <? 50 & description',
|
||||
{'like_count': 190, 'description': 'foo'}))
|
||||
self.assertFalse(match_str(
|
||||
'like_count > 100 & dislike_count <? 50 & description',
|
||||
{'like_count': 190, 'dislike_count': 60, 'description': 'foo'}))
|
||||
self.assertFalse(match_str(
|
||||
'like_count > 100 & dislike_count <? 50 & description',
|
||||
{'like_count': 190, 'dislike_count': 10}))
|
||||
self.assertTrue(match_str('is_live', {'is_live': True}))
|
||||
self.assertFalse(match_str('is_live', {'is_live': False}))
|
||||
self.assertFalse(match_str('is_live', {'is_live': None}))
|
||||
@@ -1244,6 +1226,69 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4')
|
||||
self.assertFalse(match_str('!title', {'title': 'abc'}))
|
||||
self.assertFalse(match_str('!title', {'title': ''}))
|
||||
|
||||
# Numeric
|
||||
self.assertFalse(match_str('x>0', {'x': 0}))
|
||||
self.assertFalse(match_str('x>0', {}))
|
||||
self.assertTrue(match_str('x>?0', {}))
|
||||
self.assertTrue(match_str('x>1K', {'x': 1200}))
|
||||
self.assertFalse(match_str('x>2K', {'x': 1200}))
|
||||
self.assertTrue(match_str('x>=1200 & x < 1300', {'x': 1200}))
|
||||
self.assertFalse(match_str('x>=1100 & x < 1200', {'x': 1200}))
|
||||
|
||||
# String
|
||||
self.assertFalse(match_str('y=a212', {'y': 'foobar42'}))
|
||||
self.assertTrue(match_str('y=foobar42', {'y': 'foobar42'}))
|
||||
self.assertFalse(match_str('y!=foobar42', {'y': 'foobar42'}))
|
||||
self.assertTrue(match_str('y!=foobar2', {'y': 'foobar42'}))
|
||||
self.assertTrue(match_str('y^=foo', {'y': 'foobar42'}))
|
||||
self.assertFalse(match_str('y!^=foo', {'y': 'foobar42'}))
|
||||
self.assertFalse(match_str('y^=bar', {'y': 'foobar42'}))
|
||||
self.assertTrue(match_str('y!^=bar', {'y': 'foobar42'}))
|
||||
self.assertRaises(ValueError, match_str, 'x^=42', {'x': 42})
|
||||
self.assertTrue(match_str('y*=bar', {'y': 'foobar42'}))
|
||||
self.assertFalse(match_str('y!*=bar', {'y': 'foobar42'}))
|
||||
self.assertFalse(match_str('y*=baz', {'y': 'foobar42'}))
|
||||
self.assertTrue(match_str('y!*=baz', {'y': 'foobar42'}))
|
||||
self.assertTrue(match_str('y$=42', {'y': 'foobar42'}))
|
||||
self.assertFalse(match_str('y$=43', {'y': 'foobar42'}))
|
||||
|
||||
# And
|
||||
self.assertFalse(match_str(
|
||||
'like_count > 100 & dislike_count <? 50 & description',
|
||||
{'like_count': 90, 'description': 'foo'}))
|
||||
self.assertTrue(match_str(
|
||||
'like_count > 100 & dislike_count <? 50 & description',
|
||||
{'like_count': 190, 'description': 'foo'}))
|
||||
self.assertFalse(match_str(
|
||||
'like_count > 100 & dislike_count <? 50 & description',
|
||||
{'like_count': 190, 'dislike_count': 60, 'description': 'foo'}))
|
||||
self.assertFalse(match_str(
|
||||
'like_count > 100 & dislike_count <? 50 & description',
|
||||
{'like_count': 190, 'dislike_count': 10}))
|
||||
|
||||
# Regex
|
||||
self.assertTrue(match_str(r'x~=\bbar', {'x': 'foo bar'}))
|
||||
self.assertFalse(match_str(r'x~=\bbar.+', {'x': 'foo bar'}))
|
||||
self.assertFalse(match_str(r'x~=^FOO', {'x': 'foo bar'}))
|
||||
self.assertTrue(match_str(r'x~=(?i)^FOO', {'x': 'foo bar'}))
|
||||
|
||||
# Quotes
|
||||
self.assertTrue(match_str(r'x^="foo"', {'x': 'foo "bar"'}))
|
||||
self.assertFalse(match_str(r'x^="foo "', {'x': 'foo "bar"'}))
|
||||
self.assertFalse(match_str(r'x$="bar"', {'x': 'foo "bar"'}))
|
||||
self.assertTrue(match_str(r'x$=" \"bar\""', {'x': 'foo "bar"'}))
|
||||
|
||||
# Escaping &
|
||||
self.assertFalse(match_str(r'x=foo & bar', {'x': 'foo & bar'}))
|
||||
self.assertTrue(match_str(r'x=foo \& bar', {'x': 'foo & bar'}))
|
||||
self.assertTrue(match_str(r'x=foo \& bar & x^=foo', {'x': 'foo & bar'}))
|
||||
self.assertTrue(match_str(r'x="foo \& bar" & x^=foo', {'x': 'foo & bar'}))
|
||||
|
||||
# Example from docs
|
||||
self.assertTrue(
|
||||
r'!is_live & like_count>?100 & description~=\'(?i)\bcats \& dogs\b\'',
|
||||
{'description': 'Raining Cats & Dogs'})
|
||||
|
||||
def test_parse_dfxp_time_expr(self):
|
||||
self.assertEqual(parse_dfxp_time_expr(None), None)
|
||||
self.assertEqual(parse_dfxp_time_expr(''), None)
|
||||
@@ -1525,6 +1570,51 @@ Line 1
|
||||
self.assertEqual(clean_podcast_url('https://www.podtrac.com/pts/redirect.mp3/chtbl.com/track/5899E/traffic.megaphone.fm/HSW7835899191.mp3'), 'https://traffic.megaphone.fm/HSW7835899191.mp3')
|
||||
self.assertEqual(clean_podcast_url('https://play.podtrac.com/npr-344098539/edge1.pod.npr.org/anon.npr-podcasts/podcast/npr/waitwait/2020/10/20201003_waitwait_wwdtmpodcast201003-015621a5-f035-4eca-a9a1-7c118d90bc3c.mp3'), 'https://edge1.pod.npr.org/anon.npr-podcasts/podcast/npr/waitwait/2020/10/20201003_waitwait_wwdtmpodcast201003-015621a5-f035-4eca-a9a1-7c118d90bc3c.mp3')
|
||||
|
||||
def test_LazyList(self):
|
||||
it = list(range(10))
|
||||
|
||||
self.assertEqual(list(LazyList(it)), it)
|
||||
self.assertEqual(LazyList(it).exhaust(), it)
|
||||
self.assertEqual(LazyList(it)[5], it[5])
|
||||
|
||||
self.assertEqual(LazyList(it)[5:], it[5:])
|
||||
self.assertEqual(LazyList(it)[:5], it[:5])
|
||||
self.assertEqual(LazyList(it)[::2], it[::2])
|
||||
self.assertEqual(LazyList(it)[1::2], it[1::2])
|
||||
self.assertEqual(LazyList(it)[5::-1], it[5::-1])
|
||||
self.assertEqual(LazyList(it)[6:2:-2], it[6:2:-2])
|
||||
self.assertEqual(LazyList(it)[::-1], it[::-1])
|
||||
|
||||
self.assertTrue(LazyList(it))
|
||||
self.assertFalse(LazyList(range(0)))
|
||||
self.assertEqual(len(LazyList(it)), len(it))
|
||||
self.assertEqual(repr(LazyList(it)), repr(it))
|
||||
self.assertEqual(str(LazyList(it)), str(it))
|
||||
|
||||
self.assertEqual(list(LazyList(it).reverse()), it[::-1])
|
||||
self.assertEqual(list(LazyList(it).reverse()[1:3:7]), it[::-1][1:3:7])
|
||||
self.assertEqual(list(LazyList(it).reverse()[::-1]), it)
|
||||
|
||||
def test_LazyList_laziness(self):
|
||||
|
||||
def test(ll, idx, val, cache):
|
||||
self.assertEqual(ll[idx], val)
|
||||
self.assertEqual(getattr(ll, '_LazyList__cache'), list(cache))
|
||||
|
||||
ll = LazyList(range(10))
|
||||
test(ll, 0, 0, range(1))
|
||||
test(ll, 5, 5, range(6))
|
||||
test(ll, -3, 7, range(10))
|
||||
|
||||
ll = LazyList(range(10)).reverse()
|
||||
test(ll, -1, 0, range(1))
|
||||
test(ll, 3, 6, range(10))
|
||||
|
||||
ll = LazyList(itertools.count())
|
||||
test(ll, 10, 10, range(11))
|
||||
ll.reverse()
|
||||
test(ll, -15, 14, range(15))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
||||
@@ -8,7 +8,7 @@ import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from test.helper import get_params, try_rm
|
||||
from test.helper import get_params, try_rm, is_download_test
|
||||
|
||||
|
||||
import io
|
||||
@@ -38,6 +38,7 @@ ANNOTATIONS_FILE = TEST_ID + '.annotations.xml'
|
||||
EXPECTED_ANNOTATIONS = ['Speech bubble', 'Note', 'Title', 'Spotlight', 'Label']
|
||||
|
||||
|
||||
@is_download_test
|
||||
class TestAnnotations(unittest.TestCase):
|
||||
def setUp(self):
|
||||
# Clear old files
|
||||
|
||||
@@ -7,7 +7,7 @@ import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from test.helper import FakeYDL
|
||||
from test.helper import FakeYDL, is_download_test
|
||||
|
||||
|
||||
from yt_dlp.extractor import (
|
||||
@@ -17,6 +17,7 @@ from yt_dlp.extractor import (
|
||||
)
|
||||
|
||||
|
||||
@is_download_test
|
||||
class TestYoutubeLists(unittest.TestCase):
|
||||
def assertIsPlaylist(self, info):
|
||||
"""Make sure the info has '_type' set to 'playlist'"""
|
||||
|
||||
@@ -12,7 +12,7 @@ import io
|
||||
import re
|
||||
import string
|
||||
|
||||
from test.helper import FakeYDL
|
||||
from test.helper import FakeYDL, is_download_test
|
||||
from yt_dlp.extractor import YoutubeIE
|
||||
from yt_dlp.compat import compat_str, compat_urlretrieve
|
||||
|
||||
@@ -65,6 +65,7 @@ _TESTS = [
|
||||
]
|
||||
|
||||
|
||||
@is_download_test
|
||||
class TestPlayerInfo(unittest.TestCase):
|
||||
def test_youtube_extract_player_info(self):
|
||||
PLAYER_URLS = (
|
||||
@@ -87,6 +88,7 @@ class TestPlayerInfo(unittest.TestCase):
|
||||
self.assertEqual(player_id, expected_player_id)
|
||||
|
||||
|
||||
@is_download_test
|
||||
class TestSignature(unittest.TestCase):
|
||||
def setUp(self):
|
||||
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
2
tox.ini
2
tox.ini
@@ -1,5 +1,7 @@
|
||||
[tox]
|
||||
envlist = py26,py27,py33,py34,py35
|
||||
|
||||
# Needed?
|
||||
[testenv]
|
||||
deps =
|
||||
nose
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -7,6 +7,7 @@ __license__ = 'Public Domain'
|
||||
|
||||
import codecs
|
||||
import io
|
||||
import itertools
|
||||
import os
|
||||
import random
|
||||
import re
|
||||
@@ -18,8 +19,10 @@ from .options import (
|
||||
)
|
||||
from .compat import (
|
||||
compat_getpass,
|
||||
compat_shlex_quote,
|
||||
workaround_optparse_bug9161,
|
||||
)
|
||||
from .cookies import SUPPORTED_BROWSERS
|
||||
from .utils import (
|
||||
DateRange,
|
||||
decodeOption,
|
||||
@@ -45,14 +48,15 @@ from .downloader import (
|
||||
from .extractor import gen_extractors, list_extractors
|
||||
from .extractor.common import InfoExtractor
|
||||
from .extractor.adobepass import MSO_INFO
|
||||
from .postprocessor.ffmpeg import (
|
||||
from .postprocessor import (
|
||||
FFmpegExtractAudioPP,
|
||||
FFmpegSubtitlesConvertorPP,
|
||||
FFmpegThumbnailsConvertorPP,
|
||||
FFmpegVideoConvertorPP,
|
||||
FFmpegVideoRemuxerPP,
|
||||
MetadataFromFieldPP,
|
||||
MetadataParserPP,
|
||||
)
|
||||
from .postprocessor.metadatafromfield import MetadataFromFieldPP
|
||||
from .YoutubeDL import YoutubeDL
|
||||
|
||||
|
||||
@@ -151,6 +155,11 @@ def _real_main(argv=None):
|
||||
if numeric_limit is None:
|
||||
parser.error('invalid rate limit specified')
|
||||
opts.ratelimit = numeric_limit
|
||||
if opts.throttledratelimit is not None:
|
||||
numeric_limit = FileDownloader.parse_bytes(opts.throttledratelimit)
|
||||
if numeric_limit is None:
|
||||
parser.error('invalid rate limit specified')
|
||||
opts.throttledratelimit = numeric_limit
|
||||
if opts.min_filesize is not None:
|
||||
numeric_limit = FileDownloader.parse_bytes(opts.min_filesize)
|
||||
if numeric_limit is None:
|
||||
@@ -237,6 +246,12 @@ def _real_main(argv=None):
|
||||
if opts.convertthumbnails not in FFmpegThumbnailsConvertorPP.SUPPORTED_EXTS:
|
||||
parser.error('invalid thumbnail format specified')
|
||||
|
||||
if opts.cookiesfrombrowser is not None:
|
||||
opts.cookiesfrombrowser = [
|
||||
part.strip() or None for part in opts.cookiesfrombrowser.split(':', 1)]
|
||||
if opts.cookiesfrombrowser[0] not in SUPPORTED_BROWSERS:
|
||||
parser.error('unsupported browser specified for cookies')
|
||||
|
||||
if opts.date is not None:
|
||||
date = DateRange.day(opts.date)
|
||||
else:
|
||||
@@ -268,6 +283,7 @@ def _real_main(argv=None):
|
||||
'filename', 'format-sort', 'abort-on-error', 'format-spec', 'no-playlist-metafiles',
|
||||
'multistreams', 'no-live-chat', 'playlist-index', 'list-formats', 'no-direct-merge',
|
||||
'no-youtube-channel-redirect', 'no-youtube-unavailable-videos', 'no-attach-info-json',
|
||||
'embed-thumbnail-atomicparsley', 'seperate-video-versions', 'no-clean-infojson', 'no-keep-subs',
|
||||
]
|
||||
compat_opts = parse_compat_opts()
|
||||
|
||||
@@ -278,7 +294,7 @@ def _real_main(argv=None):
|
||||
compat_opts.update(['*%s' % name])
|
||||
return True
|
||||
|
||||
def set_default_compat(compat_name, opt_name, default=True, remove_compat=False):
|
||||
def set_default_compat(compat_name, opt_name, default=True, remove_compat=True):
|
||||
attr = getattr(opts, opt_name)
|
||||
if compat_name in compat_opts:
|
||||
if attr is None:
|
||||
@@ -294,6 +310,7 @@ def _real_main(argv=None):
|
||||
|
||||
set_default_compat('abort-on-error', 'ignoreerrors')
|
||||
set_default_compat('no-playlist-metafiles', 'allow_playlist_files')
|
||||
set_default_compat('no-clean-infojson', 'clean_infojson')
|
||||
if 'format-sort' in compat_opts:
|
||||
opts.format_sort.extend(InfoExtractor.FormatSort.ytdl_default)
|
||||
_video_multistreams_set = set_default_compat('multistreams', 'allow_multiple_video_streams', False, remove_compat=False)
|
||||
@@ -303,7 +320,7 @@ def _real_main(argv=None):
|
||||
outtmpl_default = opts.outtmpl.get('default')
|
||||
if 'filename' in compat_opts:
|
||||
if outtmpl_default is None:
|
||||
outtmpl_default = '%(title)s.%(id)s.%(ext)s'
|
||||
outtmpl_default = '%(title)s-%(id)s.%(ext)s'
|
||||
opts.outtmpl.update({'default': outtmpl_default})
|
||||
else:
|
||||
_unused_compat_opt('filename')
|
||||
@@ -315,7 +332,8 @@ def _real_main(argv=None):
|
||||
|
||||
for k, tmpl in opts.outtmpl.items():
|
||||
validate_outtmpl(tmpl, '%s output template' % k)
|
||||
for tmpl in opts.forceprint:
|
||||
opts.forceprint = opts.forceprint or []
|
||||
for tmpl in opts.forceprint or []:
|
||||
validate_outtmpl(tmpl, 'print template')
|
||||
|
||||
if opts.extractaudio and not opts.keepvideo and opts.format is None:
|
||||
@@ -330,13 +348,29 @@ def _real_main(argv=None):
|
||||
if re.match(InfoExtractor.FormatSort.regex, f) is None:
|
||||
parser.error('invalid format sort string "%s" specified' % f)
|
||||
|
||||
if opts.metafromfield is None:
|
||||
opts.metafromfield = []
|
||||
def metadataparser_actions(f):
|
||||
if isinstance(f, str):
|
||||
cmd = '--parse-metadata %s' % compat_shlex_quote(f)
|
||||
try:
|
||||
actions = [MetadataFromFieldPP.to_action(f)]
|
||||
except Exception as err:
|
||||
parser.error(f'{cmd} is invalid; {err}')
|
||||
else:
|
||||
cmd = '--replace-in-metadata %s' % ' '.join(map(compat_shlex_quote, f))
|
||||
actions = ((MetadataParserPP.Actions.REPLACE, x, *f[1:]) for x in f[0].split(','))
|
||||
|
||||
for action in actions:
|
||||
try:
|
||||
MetadataParserPP.validate_action(*action)
|
||||
except Exception as err:
|
||||
parser.error(f'{cmd} is invalid; {err}')
|
||||
yield action
|
||||
|
||||
if opts.parse_metadata is None:
|
||||
opts.parse_metadata = []
|
||||
if opts.metafromtitle is not None:
|
||||
opts.metafromfield.append('title:%s' % opts.metafromtitle)
|
||||
for f in opts.metafromfield:
|
||||
if re.match(MetadataFromFieldPP.regex, f) is None:
|
||||
parser.error('invalid format string "%s" specified for --parse-metadata' % f)
|
||||
opts.parse_metadata.append('title:%s' % opts.metafromtitle)
|
||||
opts.parse_metadata = list(itertools.chain(*map(metadataparser_actions, opts.parse_metadata)))
|
||||
|
||||
any_getting = opts.forceprint or opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.getduration or opts.dumpjson or opts.dump_single_json
|
||||
any_printing = opts.print_json
|
||||
@@ -388,10 +422,10 @@ def _real_main(argv=None):
|
||||
|
||||
# PostProcessors
|
||||
postprocessors = []
|
||||
if opts.metafromfield:
|
||||
if opts.parse_metadata:
|
||||
postprocessors.append({
|
||||
'key': 'MetadataFromField',
|
||||
'formats': opts.metafromfield,
|
||||
'key': 'MetadataParser',
|
||||
'actions': opts.parse_metadata,
|
||||
# Run this immediately after extraction is complete
|
||||
'when': 'pre_process'
|
||||
})
|
||||
@@ -409,6 +443,13 @@ def _real_main(argv=None):
|
||||
# Run this before the actual video download
|
||||
'when': 'before_dl'
|
||||
})
|
||||
# Must be after all other before_dl
|
||||
if opts.exec_before_dl_cmd:
|
||||
postprocessors.append({
|
||||
'key': 'Exec',
|
||||
'exec_cmd': opts.exec_before_dl_cmd,
|
||||
'when': 'before_dl'
|
||||
})
|
||||
if opts.extractaudio:
|
||||
postprocessors.append({
|
||||
'key': 'FFmpegExtractAudio',
|
||||
@@ -437,13 +478,13 @@ def _real_main(argv=None):
|
||||
if opts.addmetadata:
|
||||
postprocessors.append({'key': 'FFmpegMetadata'})
|
||||
if opts.embedsubtitles:
|
||||
already_have_subtitle = opts.writesubtitles
|
||||
already_have_subtitle = opts.writesubtitles and 'no-keep-subs' not in compat_opts
|
||||
postprocessors.append({
|
||||
'key': 'FFmpegEmbedSubtitle',
|
||||
# already_have_subtitle = True prevents the file from being deleted after embedding
|
||||
'already_have_subtitle': already_have_subtitle
|
||||
})
|
||||
if not already_have_subtitle:
|
||||
if not opts.writeautomaticsub and 'no-keep-subs' not in compat_opts:
|
||||
opts.writesubtitles = True
|
||||
# --all-sub automatically sets --write-sub if --write-auto-sub is not given
|
||||
# this was the old behaviour if only --all-sub was given.
|
||||
@@ -476,10 +517,10 @@ def _real_main(argv=None):
|
||||
# XAttrMetadataPP should be run after post-processors that may change file contents
|
||||
if opts.xattrs:
|
||||
postprocessors.append({'key': 'XAttrMetadata'})
|
||||
# ExecAfterDownload must be the last PP
|
||||
# Exec must be the last PP
|
||||
if opts.exec_cmd:
|
||||
postprocessors.append({
|
||||
'key': 'ExecAfterDownload',
|
||||
'key': 'Exec',
|
||||
'exec_cmd': opts.exec_cmd,
|
||||
# Run this only after the files have been moved to their final locations
|
||||
'when': 'after_move'
|
||||
@@ -529,7 +570,7 @@ def _real_main(argv=None):
|
||||
'forcejson': opts.dumpjson or opts.print_json,
|
||||
'dump_single_json': opts.dump_single_json,
|
||||
'force_write_download_archive': opts.force_write_download_archive,
|
||||
'simulate': opts.simulate or any_getting,
|
||||
'simulate': (any_getting or None) if opts.simulate is None else opts.simulate,
|
||||
'skip_download': opts.skip_download,
|
||||
'format': opts.format,
|
||||
'allow_unplayable_formats': opts.allow_unplayable_formats,
|
||||
@@ -551,6 +592,7 @@ def _real_main(argv=None):
|
||||
'ignoreerrors': opts.ignoreerrors,
|
||||
'force_generic_extractor': opts.force_generic_extractor,
|
||||
'ratelimit': opts.ratelimit,
|
||||
'throttledratelimit': opts.throttledratelimit,
|
||||
'overwrites': opts.overwrites,
|
||||
'retries': opts.retries,
|
||||
'fragment_retries': opts.fragment_retries,
|
||||
@@ -614,6 +656,7 @@ def _real_main(argv=None):
|
||||
'break_on_reject': opts.break_on_reject,
|
||||
'skip_playlist_after_errors': opts.skip_playlist_after_errors,
|
||||
'cookiefile': opts.cookiefile,
|
||||
'cookiesfrombrowser': opts.cookiesfrombrowser,
|
||||
'nocheckcertificate': opts.no_check_certificate,
|
||||
'prefer_insecure': opts.prefer_insecure,
|
||||
'proxy': opts.proxy,
|
||||
@@ -624,6 +667,7 @@ def _real_main(argv=None):
|
||||
'include_ads': opts.include_ads,
|
||||
'default_search': opts.default_search,
|
||||
'dynamic_mpd': opts.dynamic_mpd,
|
||||
'extractor_args': opts.extractor_args,
|
||||
'youtube_include_dash_manifest': opts.youtube_include_dash_manifest,
|
||||
'youtube_include_hls_manifest': opts.youtube_include_hls_manifest,
|
||||
'encoding': opts.encoding,
|
||||
@@ -710,6 +754,11 @@ def main(argv=None):
|
||||
sys.exit('ERROR: fixed output name but more than one file to download')
|
||||
except KeyboardInterrupt:
|
||||
sys.exit('\nERROR: Interrupted by user')
|
||||
except BrokenPipeError:
|
||||
# https://docs.python.org/3/library/signal.html#note-on-sigpipe
|
||||
devnull = os.open(os.devnull, os.O_WRONLY)
|
||||
os.dup2(devnull, sys.stdout.fileno())
|
||||
sys.exit(r'\nERROR: {err}')
|
||||
|
||||
|
||||
__all__ = ['main', 'YoutubeDL', 'gen_extractors', 'list_extractors']
|
||||
|
||||
3041
yt_dlp/compat.py
3041
yt_dlp/compat.py
File diff suppressed because it is too large
Load Diff
755
yt_dlp/cookies.py
Normal file
755
yt_dlp/cookies.py
Normal file
@@ -0,0 +1,755 @@
|
||||
import ctypes
|
||||
import json
|
||||
import os
|
||||
import shutil
|
||||
import struct
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from hashlib import pbkdf2_hmac
|
||||
|
||||
from yt_dlp.aes import aes_cbc_decrypt
|
||||
from yt_dlp.compat import (
|
||||
compat_b64decode,
|
||||
compat_cookiejar_Cookie,
|
||||
)
|
||||
from yt_dlp.utils import (
|
||||
bug_reports_message,
|
||||
bytes_to_intlist,
|
||||
expand_path,
|
||||
intlist_to_bytes,
|
||||
process_communicate_or_kill,
|
||||
YoutubeDLCookieJar,
|
||||
)
|
||||
|
||||
try:
|
||||
import sqlite3
|
||||
SQLITE_AVAILABLE = True
|
||||
except ImportError:
|
||||
# although sqlite3 is part of the standard library, it is possible to compile python without
|
||||
# sqlite support. See: https://github.com/yt-dlp/yt-dlp/issues/544
|
||||
SQLITE_AVAILABLE = False
|
||||
|
||||
|
||||
try:
|
||||
from Crypto.Cipher import AES
|
||||
CRYPTO_AVAILABLE = True
|
||||
except ImportError:
|
||||
CRYPTO_AVAILABLE = False
|
||||
|
||||
try:
|
||||
import keyring
|
||||
KEYRING_AVAILABLE = True
|
||||
KEYRING_UNAVAILABLE_REASON = f'due to unknown reasons{bug_reports_message()}'
|
||||
except ImportError:
|
||||
KEYRING_AVAILABLE = False
|
||||
KEYRING_UNAVAILABLE_REASON = (
|
||||
'as the `keyring` module is not installed. '
|
||||
'Please install by running `python3 -m pip install keyring`. '
|
||||
'Depending on your platform, additional packages may be required '
|
||||
'to access the keyring; see https://pypi.org/project/keyring')
|
||||
except Exception as _err:
|
||||
KEYRING_AVAILABLE = False
|
||||
KEYRING_UNAVAILABLE_REASON = 'as the `keyring` module could not be initialized: %s' % _err
|
||||
|
||||
|
||||
CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'}
|
||||
SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'}
|
||||
|
||||
|
||||
class YDLLogger:
|
||||
def __init__(self, ydl=None):
|
||||
self._ydl = ydl
|
||||
|
||||
def debug(self, message):
|
||||
if self._ydl:
|
||||
self._ydl.write_debug(message)
|
||||
|
||||
def info(self, message):
|
||||
if self._ydl:
|
||||
self._ydl.to_screen(f'[Cookies] {message}')
|
||||
|
||||
def warning(self, message, only_once=False):
|
||||
if self._ydl:
|
||||
self._ydl.report_warning(message, only_once)
|
||||
|
||||
def error(self, message):
|
||||
if self._ydl:
|
||||
self._ydl.report_error(message)
|
||||
|
||||
|
||||
def load_cookies(cookie_file, browser_specification, ydl):
|
||||
cookie_jars = []
|
||||
if browser_specification is not None:
|
||||
browser_name, profile = _parse_browser_specification(*browser_specification)
|
||||
cookie_jars.append(extract_cookies_from_browser(browser_name, profile, YDLLogger(ydl)))
|
||||
|
||||
if cookie_file is not None:
|
||||
cookie_file = expand_path(cookie_file)
|
||||
jar = YoutubeDLCookieJar(cookie_file)
|
||||
if os.access(cookie_file, os.R_OK):
|
||||
jar.load(ignore_discard=True, ignore_expires=True)
|
||||
cookie_jars.append(jar)
|
||||
|
||||
return _merge_cookie_jars(cookie_jars)
|
||||
|
||||
|
||||
def extract_cookies_from_browser(browser_name, profile=None, logger=YDLLogger()):
|
||||
if browser_name == 'firefox':
|
||||
return _extract_firefox_cookies(profile, logger)
|
||||
elif browser_name == 'safari':
|
||||
return _extract_safari_cookies(profile, logger)
|
||||
elif browser_name in CHROMIUM_BASED_BROWSERS:
|
||||
return _extract_chrome_cookies(browser_name, profile, logger)
|
||||
else:
|
||||
raise ValueError('unknown browser: {}'.format(browser_name))
|
||||
|
||||
|
||||
def _extract_firefox_cookies(profile, logger):
|
||||
logger.info('Extracting cookies from firefox')
|
||||
if not SQLITE_AVAILABLE:
|
||||
logger.warning('Cannot extract cookies from firefox without sqlite3 support. '
|
||||
'Please use a python interpreter compiled with sqlite3 support')
|
||||
return YoutubeDLCookieJar()
|
||||
|
||||
if profile is None:
|
||||
search_root = _firefox_browser_dir()
|
||||
elif _is_path(profile):
|
||||
search_root = profile
|
||||
else:
|
||||
search_root = os.path.join(_firefox_browser_dir(), profile)
|
||||
|
||||
cookie_database_path = _find_most_recently_used_file(search_root, 'cookies.sqlite')
|
||||
if cookie_database_path is None:
|
||||
raise FileNotFoundError('could not find firefox cookies database in {}'.format(search_root))
|
||||
logger.debug('extracting from: "{}"'.format(cookie_database_path))
|
||||
|
||||
with tempfile.TemporaryDirectory(prefix='youtube_dl') as tmpdir:
|
||||
cursor = None
|
||||
try:
|
||||
cursor = _open_database_copy(cookie_database_path, tmpdir)
|
||||
cursor.execute('SELECT host, name, value, path, expiry, isSecure FROM moz_cookies')
|
||||
jar = YoutubeDLCookieJar()
|
||||
for host, name, value, path, expiry, is_secure in cursor.fetchall():
|
||||
cookie = compat_cookiejar_Cookie(
|
||||
version=0, name=name, value=value, port=None, port_specified=False,
|
||||
domain=host, domain_specified=bool(host), domain_initial_dot=host.startswith('.'),
|
||||
path=path, path_specified=bool(path), secure=is_secure, expires=expiry, discard=False,
|
||||
comment=None, comment_url=None, rest={})
|
||||
jar.set_cookie(cookie)
|
||||
logger.info('Extracted {} cookies from firefox'.format(len(jar)))
|
||||
return jar
|
||||
finally:
|
||||
if cursor is not None:
|
||||
cursor.connection.close()
|
||||
|
||||
|
||||
def _firefox_browser_dir():
|
||||
if sys.platform in ('linux', 'linux2'):
|
||||
return os.path.expanduser('~/.mozilla/firefox')
|
||||
elif sys.platform == 'win32':
|
||||
return os.path.expandvars(r'%APPDATA%\Mozilla\Firefox\Profiles')
|
||||
elif sys.platform == 'darwin':
|
||||
return os.path.expanduser('~/Library/Application Support/Firefox')
|
||||
else:
|
||||
raise ValueError('unsupported platform: {}'.format(sys.platform))
|
||||
|
||||
|
||||
def _get_chromium_based_browser_settings(browser_name):
|
||||
# https://chromium.googlesource.com/chromium/src/+/HEAD/docs/user_data_dir.md
|
||||
if sys.platform in ('linux', 'linux2'):
|
||||
config = _config_home()
|
||||
browser_dir = {
|
||||
'brave': os.path.join(config, 'BraveSoftware/Brave-Browser'),
|
||||
'chrome': os.path.join(config, 'google-chrome'),
|
||||
'chromium': os.path.join(config, 'chromium'),
|
||||
'edge': os.path.join(config, 'microsoft-edge'),
|
||||
'opera': os.path.join(config, 'opera'),
|
||||
'vivaldi': os.path.join(config, 'vivaldi'),
|
||||
}[browser_name]
|
||||
|
||||
elif sys.platform == 'win32':
|
||||
appdata_local = os.path.expandvars('%LOCALAPPDATA%')
|
||||
appdata_roaming = os.path.expandvars('%APPDATA%')
|
||||
browser_dir = {
|
||||
'brave': os.path.join(appdata_local, r'BraveSoftware\Brave-Browser\User Data'),
|
||||
'chrome': os.path.join(appdata_local, r'Google\Chrome\User Data'),
|
||||
'chromium': os.path.join(appdata_local, r'Chromium\User Data'),
|
||||
'edge': os.path.join(appdata_local, r'Microsoft\Edge\User Data'),
|
||||
'opera': os.path.join(appdata_roaming, r'Opera Software\Opera Stable'),
|
||||
'vivaldi': os.path.join(appdata_local, r'Vivaldi\User Data'),
|
||||
}[browser_name]
|
||||
|
||||
elif sys.platform == 'darwin':
|
||||
appdata = os.path.expanduser('~/Library/Application Support')
|
||||
browser_dir = {
|
||||
'brave': os.path.join(appdata, 'BraveSoftware/Brave-Browser'),
|
||||
'chrome': os.path.join(appdata, 'Google/Chrome'),
|
||||
'chromium': os.path.join(appdata, 'Chromium'),
|
||||
'edge': os.path.join(appdata, 'Microsoft Edge'),
|
||||
'opera': os.path.join(appdata, 'com.operasoftware.Opera'),
|
||||
'vivaldi': os.path.join(appdata, 'Vivaldi'),
|
||||
}[browser_name]
|
||||
|
||||
else:
|
||||
raise ValueError('unsupported platform: {}'.format(sys.platform))
|
||||
|
||||
# Linux keyring names can be determined by snooping on dbus while opening the browser in KDE:
|
||||
# dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
|
||||
keyring_name = {
|
||||
'brave': 'Brave',
|
||||
'chrome': 'Chrome',
|
||||
'chromium': 'Chromium',
|
||||
'edge': 'Microsoft Edge' if sys.platform == 'darwin' else 'Chromium',
|
||||
'opera': 'Opera' if sys.platform == 'darwin' else 'Chromium',
|
||||
'vivaldi': 'Vivaldi' if sys.platform == 'darwin' else 'Chrome',
|
||||
}[browser_name]
|
||||
|
||||
browsers_without_profiles = {'opera'}
|
||||
|
||||
return {
|
||||
'browser_dir': browser_dir,
|
||||
'keyring_name': keyring_name,
|
||||
'supports_profiles': browser_name not in browsers_without_profiles
|
||||
}
|
||||
|
||||
|
||||
def _extract_chrome_cookies(browser_name, profile, logger):
|
||||
logger.info('Extracting cookies from {}'.format(browser_name))
|
||||
|
||||
if not SQLITE_AVAILABLE:
|
||||
logger.warning(('Cannot extract cookies from {} without sqlite3 support. '
|
||||
'Please use a python interpreter compiled with sqlite3 support').format(browser_name))
|
||||
return YoutubeDLCookieJar()
|
||||
|
||||
config = _get_chromium_based_browser_settings(browser_name)
|
||||
|
||||
if profile is None:
|
||||
search_root = config['browser_dir']
|
||||
elif _is_path(profile):
|
||||
search_root = profile
|
||||
config['browser_dir'] = os.path.dirname(profile) if config['supports_profiles'] else profile
|
||||
else:
|
||||
if config['supports_profiles']:
|
||||
search_root = os.path.join(config['browser_dir'], profile)
|
||||
else:
|
||||
logger.error('{} does not support profiles'.format(browser_name))
|
||||
search_root = config['browser_dir']
|
||||
|
||||
cookie_database_path = _find_most_recently_used_file(search_root, 'Cookies')
|
||||
if cookie_database_path is None:
|
||||
raise FileNotFoundError('could not find {} cookies database in "{}"'.format(browser_name, search_root))
|
||||
logger.debug('extracting from: "{}"'.format(cookie_database_path))
|
||||
|
||||
decryptor = get_cookie_decryptor(config['browser_dir'], config['keyring_name'], logger)
|
||||
|
||||
with tempfile.TemporaryDirectory(prefix='youtube_dl') as tmpdir:
|
||||
cursor = None
|
||||
try:
|
||||
cursor = _open_database_copy(cookie_database_path, tmpdir)
|
||||
cursor.connection.text_factory = bytes
|
||||
column_names = _get_column_names(cursor, 'cookies')
|
||||
secure_column = 'is_secure' if 'is_secure' in column_names else 'secure'
|
||||
cursor.execute('SELECT host_key, name, value, encrypted_value, path, '
|
||||
'expires_utc, {} FROM cookies'.format(secure_column))
|
||||
jar = YoutubeDLCookieJar()
|
||||
failed_cookies = 0
|
||||
for host_key, name, value, encrypted_value, path, expires_utc, is_secure in cursor.fetchall():
|
||||
host_key = host_key.decode('utf-8')
|
||||
name = name.decode('utf-8')
|
||||
value = value.decode('utf-8')
|
||||
path = path.decode('utf-8')
|
||||
|
||||
if not value and encrypted_value:
|
||||
value = decryptor.decrypt(encrypted_value)
|
||||
if value is None:
|
||||
failed_cookies += 1
|
||||
continue
|
||||
|
||||
cookie = compat_cookiejar_Cookie(
|
||||
version=0, name=name, value=value, port=None, port_specified=False,
|
||||
domain=host_key, domain_specified=bool(host_key), domain_initial_dot=host_key.startswith('.'),
|
||||
path=path, path_specified=bool(path), secure=is_secure, expires=expires_utc, discard=False,
|
||||
comment=None, comment_url=None, rest={})
|
||||
jar.set_cookie(cookie)
|
||||
if failed_cookies > 0:
|
||||
failed_message = ' ({} could not be decrypted)'.format(failed_cookies)
|
||||
else:
|
||||
failed_message = ''
|
||||
logger.info('Extracted {} cookies from {}{}'.format(len(jar), browser_name, failed_message))
|
||||
return jar
|
||||
finally:
|
||||
if cursor is not None:
|
||||
cursor.connection.close()
|
||||
|
||||
|
||||
class ChromeCookieDecryptor:
|
||||
"""
|
||||
Overview:
|
||||
|
||||
Linux:
|
||||
- cookies are either v10 or v11
|
||||
- v10: AES-CBC encrypted with a fixed key
|
||||
- v11: AES-CBC encrypted with an OS protected key (keyring)
|
||||
- v11 keys can be stored in various places depending on the activate desktop environment [2]
|
||||
|
||||
Mac:
|
||||
- cookies are either v10 or not v10
|
||||
- v10: AES-CBC encrypted with an OS protected key (keyring) and more key derivation iterations than linux
|
||||
- not v10: 'old data' stored as plaintext
|
||||
|
||||
Windows:
|
||||
- cookies are either v10 or not v10
|
||||
- v10: AES-GCM encrypted with a key which is encrypted with DPAPI
|
||||
- not v10: encrypted with DPAPI
|
||||
|
||||
Sources:
|
||||
- [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/
|
||||
- [2] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_linux.cc
|
||||
- KeyStorageLinux::CreateService
|
||||
"""
|
||||
|
||||
def decrypt(self, encrypted_value):
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
def get_cookie_decryptor(browser_root, browser_keyring_name, logger):
|
||||
if sys.platform in ('linux', 'linux2'):
|
||||
return LinuxChromeCookieDecryptor(browser_keyring_name, logger)
|
||||
elif sys.platform == 'darwin':
|
||||
return MacChromeCookieDecryptor(browser_keyring_name, logger)
|
||||
elif sys.platform == 'win32':
|
||||
return WindowsChromeCookieDecryptor(browser_root, logger)
|
||||
else:
|
||||
raise NotImplementedError('Chrome cookie decryption is not supported '
|
||||
'on this platform: {}'.format(sys.platform))
|
||||
|
||||
|
||||
class LinuxChromeCookieDecryptor(ChromeCookieDecryptor):
|
||||
def __init__(self, browser_keyring_name, logger):
|
||||
self._logger = logger
|
||||
self._v10_key = self.derive_key(b'peanuts')
|
||||
if KEYRING_AVAILABLE:
|
||||
self._v11_key = self.derive_key(_get_linux_keyring_password(browser_keyring_name))
|
||||
else:
|
||||
self._v11_key = None
|
||||
|
||||
@staticmethod
|
||||
def derive_key(password):
|
||||
# values from
|
||||
# https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_linux.cc
|
||||
return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1, key_length=16)
|
||||
|
||||
def decrypt(self, encrypted_value):
|
||||
version = encrypted_value[:3]
|
||||
ciphertext = encrypted_value[3:]
|
||||
|
||||
if version == b'v10':
|
||||
return _decrypt_aes_cbc(ciphertext, self._v10_key, self._logger)
|
||||
|
||||
elif version == b'v11':
|
||||
if self._v11_key is None:
|
||||
self._logger.warning(f'cannot decrypt cookie {KEYRING_UNAVAILABLE_REASON}', only_once=True)
|
||||
return None
|
||||
return _decrypt_aes_cbc(ciphertext, self._v11_key, self._logger)
|
||||
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
class MacChromeCookieDecryptor(ChromeCookieDecryptor):
|
||||
def __init__(self, browser_keyring_name, logger):
|
||||
self._logger = logger
|
||||
password = _get_mac_keyring_password(browser_keyring_name)
|
||||
self._v10_key = None if password is None else self.derive_key(password)
|
||||
|
||||
@staticmethod
|
||||
def derive_key(password):
|
||||
# values from
|
||||
# https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_mac.mm
|
||||
return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1003, key_length=16)
|
||||
|
||||
def decrypt(self, encrypted_value):
|
||||
version = encrypted_value[:3]
|
||||
ciphertext = encrypted_value[3:]
|
||||
|
||||
if version == b'v10':
|
||||
if self._v10_key is None:
|
||||
self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
|
||||
return None
|
||||
|
||||
return _decrypt_aes_cbc(ciphertext, self._v10_key, self._logger)
|
||||
|
||||
else:
|
||||
# other prefixes are considered 'old data' which were stored as plaintext
|
||||
# https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_mac.mm
|
||||
return encrypted_value
|
||||
|
||||
|
||||
class WindowsChromeCookieDecryptor(ChromeCookieDecryptor):
|
||||
def __init__(self, browser_root, logger):
|
||||
self._logger = logger
|
||||
self._v10_key = _get_windows_v10_key(browser_root, logger)
|
||||
|
||||
def decrypt(self, encrypted_value):
|
||||
version = encrypted_value[:3]
|
||||
ciphertext = encrypted_value[3:]
|
||||
|
||||
if version == b'v10':
|
||||
if self._v10_key is None:
|
||||
self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
|
||||
return None
|
||||
elif not CRYPTO_AVAILABLE:
|
||||
self._logger.warning('cannot decrypt cookie as the `pycryptodome` module is not installed. '
|
||||
'Please install by running `python3 -m pip install pycryptodome`',
|
||||
only_once=True)
|
||||
return None
|
||||
|
||||
# https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_win.cc
|
||||
# kNonceLength
|
||||
nonce_length = 96 // 8
|
||||
# boringssl
|
||||
# EVP_AEAD_AES_GCM_TAG_LEN
|
||||
authentication_tag_length = 16
|
||||
|
||||
raw_ciphertext = ciphertext
|
||||
nonce = raw_ciphertext[:nonce_length]
|
||||
ciphertext = raw_ciphertext[nonce_length:-authentication_tag_length]
|
||||
authentication_tag = raw_ciphertext[-authentication_tag_length:]
|
||||
|
||||
return _decrypt_aes_gcm(ciphertext, self._v10_key, nonce, authentication_tag, self._logger)
|
||||
|
||||
else:
|
||||
# any other prefix means the data is DPAPI encrypted
|
||||
# https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_win.cc
|
||||
return _decrypt_windows_dpapi(encrypted_value, self._logger).decode('utf-8')
|
||||
|
||||
|
||||
def _extract_safari_cookies(profile, logger):
|
||||
if profile is not None:
|
||||
logger.error('safari does not support profiles')
|
||||
if sys.platform != 'darwin':
|
||||
raise ValueError('unsupported platform: {}'.format(sys.platform))
|
||||
|
||||
cookies_path = os.path.expanduser('~/Library/Cookies/Cookies.binarycookies')
|
||||
|
||||
if not os.path.isfile(cookies_path):
|
||||
raise FileNotFoundError('could not find safari cookies database')
|
||||
|
||||
with open(cookies_path, 'rb') as f:
|
||||
cookies_data = f.read()
|
||||
|
||||
jar = parse_safari_cookies(cookies_data, logger=logger)
|
||||
logger.info('Extracted {} cookies from safari'.format(len(jar)))
|
||||
return jar
|
||||
|
||||
|
||||
class ParserError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class DataParser:
|
||||
def __init__(self, data, logger):
|
||||
self._data = data
|
||||
self.cursor = 0
|
||||
self._logger = logger
|
||||
|
||||
def read_bytes(self, num_bytes):
|
||||
if num_bytes < 0:
|
||||
raise ParserError('invalid read of {} bytes'.format(num_bytes))
|
||||
end = self.cursor + num_bytes
|
||||
if end > len(self._data):
|
||||
raise ParserError('reached end of input')
|
||||
data = self._data[self.cursor:end]
|
||||
self.cursor = end
|
||||
return data
|
||||
|
||||
def expect_bytes(self, expected_value, message):
|
||||
value = self.read_bytes(len(expected_value))
|
||||
if value != expected_value:
|
||||
raise ParserError('unexpected value: {} != {} ({})'.format(value, expected_value, message))
|
||||
|
||||
def read_uint(self, big_endian=False):
|
||||
data_format = '>I' if big_endian else '<I'
|
||||
return struct.unpack(data_format, self.read_bytes(4))[0]
|
||||
|
||||
def read_double(self, big_endian=False):
|
||||
data_format = '>d' if big_endian else '<d'
|
||||
return struct.unpack(data_format, self.read_bytes(8))[0]
|
||||
|
||||
def read_cstring(self):
|
||||
buffer = []
|
||||
while True:
|
||||
c = self.read_bytes(1)
|
||||
if c == b'\x00':
|
||||
return b''.join(buffer).decode('utf-8')
|
||||
else:
|
||||
buffer.append(c)
|
||||
|
||||
def skip(self, num_bytes, description='unknown'):
|
||||
if num_bytes > 0:
|
||||
self._logger.debug('skipping {} bytes ({}): {}'.format(
|
||||
num_bytes, description, self.read_bytes(num_bytes)))
|
||||
elif num_bytes < 0:
|
||||
raise ParserError('invalid skip of {} bytes'.format(num_bytes))
|
||||
|
||||
def skip_to(self, offset, description='unknown'):
|
||||
self.skip(offset - self.cursor, description)
|
||||
|
||||
def skip_to_end(self, description='unknown'):
|
||||
self.skip_to(len(self._data), description)
|
||||
|
||||
|
||||
def _mac_absolute_time_to_posix(timestamp):
|
||||
return int((datetime(2001, 1, 1, 0, 0, tzinfo=timezone.utc) + timedelta(seconds=timestamp)).timestamp())
|
||||
|
||||
|
||||
def _parse_safari_cookies_header(data, logger):
|
||||
p = DataParser(data, logger)
|
||||
p.expect_bytes(b'cook', 'database signature')
|
||||
number_of_pages = p.read_uint(big_endian=True)
|
||||
page_sizes = [p.read_uint(big_endian=True) for _ in range(number_of_pages)]
|
||||
return page_sizes, p.cursor
|
||||
|
||||
|
||||
def _parse_safari_cookies_page(data, jar, logger):
|
||||
p = DataParser(data, logger)
|
||||
p.expect_bytes(b'\x00\x00\x01\x00', 'page signature')
|
||||
number_of_cookies = p.read_uint()
|
||||
record_offsets = [p.read_uint() for _ in range(number_of_cookies)]
|
||||
if number_of_cookies == 0:
|
||||
logger.debug('a cookies page of size {} has no cookies'.format(len(data)))
|
||||
return
|
||||
|
||||
p.skip_to(record_offsets[0], 'unknown page header field')
|
||||
|
||||
for record_offset in record_offsets:
|
||||
p.skip_to(record_offset, 'space between records')
|
||||
record_length = _parse_safari_cookies_record(data[record_offset:], jar, logger)
|
||||
p.read_bytes(record_length)
|
||||
p.skip_to_end('space in between pages')
|
||||
|
||||
|
||||
def _parse_safari_cookies_record(data, jar, logger):
|
||||
p = DataParser(data, logger)
|
||||
record_size = p.read_uint()
|
||||
p.skip(4, 'unknown record field 1')
|
||||
flags = p.read_uint()
|
||||
is_secure = bool(flags & 0x0001)
|
||||
p.skip(4, 'unknown record field 2')
|
||||
domain_offset = p.read_uint()
|
||||
name_offset = p.read_uint()
|
||||
path_offset = p.read_uint()
|
||||
value_offset = p.read_uint()
|
||||
p.skip(8, 'unknown record field 3')
|
||||
expiration_date = _mac_absolute_time_to_posix(p.read_double())
|
||||
_creation_date = _mac_absolute_time_to_posix(p.read_double()) # noqa: F841
|
||||
|
||||
try:
|
||||
p.skip_to(domain_offset)
|
||||
domain = p.read_cstring()
|
||||
|
||||
p.skip_to(name_offset)
|
||||
name = p.read_cstring()
|
||||
|
||||
p.skip_to(path_offset)
|
||||
path = p.read_cstring()
|
||||
|
||||
p.skip_to(value_offset)
|
||||
value = p.read_cstring()
|
||||
except UnicodeDecodeError:
|
||||
logger.warning('failed to parse cookie because UTF-8 decoding failed')
|
||||
return record_size
|
||||
|
||||
p.skip_to(record_size, 'space at the end of the record')
|
||||
|
||||
cookie = compat_cookiejar_Cookie(
|
||||
version=0, name=name, value=value, port=None, port_specified=False,
|
||||
domain=domain, domain_specified=bool(domain), domain_initial_dot=domain.startswith('.'),
|
||||
path=path, path_specified=bool(path), secure=is_secure, expires=expiration_date, discard=False,
|
||||
comment=None, comment_url=None, rest={})
|
||||
jar.set_cookie(cookie)
|
||||
return record_size
|
||||
|
||||
|
||||
def parse_safari_cookies(data, jar=None, logger=YDLLogger()):
|
||||
"""
|
||||
References:
|
||||
- https://github.com/libyal/dtformats/blob/main/documentation/Safari%20Cookies.asciidoc
|
||||
- this data appears to be out of date but the important parts of the database structure is the same
|
||||
- there are a few bytes here and there which are skipped during parsing
|
||||
"""
|
||||
if jar is None:
|
||||
jar = YoutubeDLCookieJar()
|
||||
page_sizes, body_start = _parse_safari_cookies_header(data, logger)
|
||||
p = DataParser(data[body_start:], logger)
|
||||
for page_size in page_sizes:
|
||||
_parse_safari_cookies_page(p.read_bytes(page_size), jar, logger)
|
||||
p.skip_to_end('footer')
|
||||
return jar
|
||||
|
||||
|
||||
def _get_linux_keyring_password(browser_keyring_name):
|
||||
password = keyring.get_password('{} Keys'.format(browser_keyring_name),
|
||||
'{} Safe Storage'.format(browser_keyring_name))
|
||||
if password is None:
|
||||
# this sometimes occurs in KDE because chrome does not check hasEntry and instead
|
||||
# just tries to read the value (which kwallet returns "") whereas keyring checks hasEntry
|
||||
# to verify this:
|
||||
# dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
|
||||
# while starting chrome.
|
||||
# this may be a bug as the intended behaviour is to generate a random password and store
|
||||
# it, but that doesn't matter here.
|
||||
password = ''
|
||||
return password.encode('utf-8')
|
||||
|
||||
|
||||
def _get_mac_keyring_password(browser_keyring_name):
|
||||
if KEYRING_AVAILABLE:
|
||||
password = keyring.get_password('{} Safe Storage'.format(browser_keyring_name), browser_keyring_name)
|
||||
return password.encode('utf-8')
|
||||
else:
|
||||
proc = subprocess.Popen(['security', 'find-generic-password',
|
||||
'-w', # write password to stdout
|
||||
'-a', browser_keyring_name, # match 'account'
|
||||
'-s', '{} Safe Storage'.format(browser_keyring_name)], # match 'service'
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.DEVNULL)
|
||||
try:
|
||||
stdout, stderr = process_communicate_or_kill(proc)
|
||||
return stdout
|
||||
except BaseException:
|
||||
return None
|
||||
|
||||
|
||||
def _get_windows_v10_key(browser_root, logger):
|
||||
path = _find_most_recently_used_file(browser_root, 'Local State')
|
||||
if path is None:
|
||||
logger.error('could not find local state file')
|
||||
return None
|
||||
with open(path, 'r') as f:
|
||||
data = json.load(f)
|
||||
try:
|
||||
base64_key = data['os_crypt']['encrypted_key']
|
||||
except KeyError:
|
||||
logger.error('no encrypted key in Local State')
|
||||
return None
|
||||
encrypted_key = compat_b64decode(base64_key)
|
||||
prefix = b'DPAPI'
|
||||
if not encrypted_key.startswith(prefix):
|
||||
logger.error('invalid key')
|
||||
return None
|
||||
return _decrypt_windows_dpapi(encrypted_key[len(prefix):], logger)
|
||||
|
||||
|
||||
def pbkdf2_sha1(password, salt, iterations, key_length):
|
||||
return pbkdf2_hmac('sha1', password, salt, iterations, key_length)
|
||||
|
||||
|
||||
def _decrypt_aes_cbc(ciphertext, key, logger, initialization_vector=b' ' * 16):
|
||||
plaintext = aes_cbc_decrypt(bytes_to_intlist(ciphertext),
|
||||
bytes_to_intlist(key),
|
||||
bytes_to_intlist(initialization_vector))
|
||||
padding_length = plaintext[-1]
|
||||
try:
|
||||
return intlist_to_bytes(plaintext[:-padding_length]).decode('utf-8')
|
||||
except UnicodeDecodeError:
|
||||
logger.warning('failed to decrypt cookie because UTF-8 decoding failed. Possibly the key is wrong?')
|
||||
return None
|
||||
|
||||
|
||||
def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger):
|
||||
cipher = AES.new(key, AES.MODE_GCM, nonce)
|
||||
try:
|
||||
plaintext = cipher.decrypt_and_verify(ciphertext, authentication_tag)
|
||||
except ValueError:
|
||||
logger.warning('failed to decrypt cookie because the MAC check failed. Possibly the key is wrong?')
|
||||
return None
|
||||
|
||||
try:
|
||||
return plaintext.decode('utf-8')
|
||||
except UnicodeDecodeError:
|
||||
logger.warning('failed to decrypt cookie because UTF-8 decoding failed. Possibly the key is wrong?')
|
||||
return None
|
||||
|
||||
|
||||
def _decrypt_windows_dpapi(ciphertext, logger):
|
||||
"""
|
||||
References:
|
||||
- https://docs.microsoft.com/en-us/windows/win32/api/dpapi/nf-dpapi-cryptunprotectdata
|
||||
"""
|
||||
from ctypes.wintypes import DWORD
|
||||
|
||||
class DATA_BLOB(ctypes.Structure):
|
||||
_fields_ = [('cbData', DWORD),
|
||||
('pbData', ctypes.POINTER(ctypes.c_char))]
|
||||
|
||||
buffer = ctypes.create_string_buffer(ciphertext)
|
||||
blob_in = DATA_BLOB(ctypes.sizeof(buffer), buffer)
|
||||
blob_out = DATA_BLOB()
|
||||
ret = ctypes.windll.crypt32.CryptUnprotectData(
|
||||
ctypes.byref(blob_in), # pDataIn
|
||||
None, # ppszDataDescr: human readable description of pDataIn
|
||||
None, # pOptionalEntropy: salt?
|
||||
None, # pvReserved: must be NULL
|
||||
None, # pPromptStruct: information about prompts to display
|
||||
0, # dwFlags
|
||||
ctypes.byref(blob_out) # pDataOut
|
||||
)
|
||||
if not ret:
|
||||
logger.warning('failed to decrypt with DPAPI')
|
||||
return None
|
||||
|
||||
result = ctypes.string_at(blob_out.pbData, blob_out.cbData)
|
||||
ctypes.windll.kernel32.LocalFree(blob_out.pbData)
|
||||
return result
|
||||
|
||||
|
||||
def _config_home():
|
||||
return os.environ.get('XDG_CONFIG_HOME', os.path.expanduser('~/.config'))
|
||||
|
||||
|
||||
def _open_database_copy(database_path, tmpdir):
|
||||
# cannot open sqlite databases if they are already in use (e.g. by the browser)
|
||||
database_copy_path = os.path.join(tmpdir, 'temporary.sqlite')
|
||||
shutil.copy(database_path, database_copy_path)
|
||||
conn = sqlite3.connect(database_copy_path)
|
||||
return conn.cursor()
|
||||
|
||||
|
||||
def _get_column_names(cursor, table_name):
|
||||
table_info = cursor.execute('PRAGMA table_info({})'.format(table_name)).fetchall()
|
||||
return [row[1].decode('utf-8') for row in table_info]
|
||||
|
||||
|
||||
def _find_most_recently_used_file(root, filename):
|
||||
# if there are multiple browser profiles, take the most recently used one
|
||||
paths = []
|
||||
for root, dirs, files in os.walk(root):
|
||||
for file in files:
|
||||
if file == filename:
|
||||
paths.append(os.path.join(root, file))
|
||||
return None if not paths else max(paths, key=lambda path: os.lstat(path).st_mtime)
|
||||
|
||||
|
||||
def _merge_cookie_jars(jars):
|
||||
output_jar = YoutubeDLCookieJar()
|
||||
for jar in jars:
|
||||
for cookie in jar:
|
||||
output_jar.set_cookie(cookie)
|
||||
if jar.filename is not None:
|
||||
output_jar.filename = jar.filename
|
||||
return output_jar
|
||||
|
||||
|
||||
def _is_path(value):
|
||||
return os.path.sep in value
|
||||
|
||||
|
||||
def _parse_browser_specification(browser_name, profile=None):
|
||||
if browser_name not in SUPPORTED_BROWSERS:
|
||||
raise ValueError(f'unsupported browser: "{browser_name}"')
|
||||
if profile is not None and _is_path(profile):
|
||||
profile = os.path.expanduser(profile)
|
||||
return browser_name, profile
|
||||
@@ -3,17 +3,20 @@ from __future__ import unicode_literals
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
determine_protocol,
|
||||
NO_DEFAULT
|
||||
)
|
||||
|
||||
|
||||
def _get_real_downloader(info_dict, protocol=None, *args, **kwargs):
|
||||
def get_suitable_downloader(info_dict, params={}, default=NO_DEFAULT, protocol=None, to_stdout=False):
|
||||
info_dict['protocol'] = determine_protocol(info_dict)
|
||||
info_copy = info_dict.copy()
|
||||
if protocol:
|
||||
info_copy['protocol'] = protocol
|
||||
return get_suitable_downloader(info_copy, *args, **kwargs)
|
||||
info_copy['to_stdout'] = to_stdout
|
||||
return _get_suitable_downloader(info_copy, params, default)
|
||||
|
||||
|
||||
# Some of these require _get_real_downloader
|
||||
# Some of these require get_suitable_downloader
|
||||
from .common import FileDownloader
|
||||
from .dash import DashSegmentsFD
|
||||
from .f4m import F4mFD
|
||||
@@ -22,8 +25,10 @@ from .http import HttpFD
|
||||
from .rtmp import RtmpFD
|
||||
from .rtsp import RtspFD
|
||||
from .ism import IsmFD
|
||||
from .mhtml import MhtmlFD
|
||||
from .niconico import NiconicoDmcFD
|
||||
from .youtube_live_chat import YoutubeLiveChatReplayFD
|
||||
from .websocket import WebSocketFragmentFD
|
||||
from .youtube_live_chat import YoutubeLiveChatFD
|
||||
from .external import (
|
||||
get_external_downloader,
|
||||
FFmpegFD,
|
||||
@@ -39,8 +44,11 @@ PROTOCOL_MAP = {
|
||||
'f4m': F4mFD,
|
||||
'http_dash_segments': DashSegmentsFD,
|
||||
'ism': IsmFD,
|
||||
'mhtml': MhtmlFD,
|
||||
'niconico_dmc': NiconicoDmcFD,
|
||||
'youtube_live_chat_replay': YoutubeLiveChatReplayFD,
|
||||
'websocket_frag': WebSocketFragmentFD,
|
||||
'youtube_live_chat': YoutubeLiveChatFD,
|
||||
'youtube_live_chat_replay': YoutubeLiveChatFD,
|
||||
}
|
||||
|
||||
|
||||
@@ -50,6 +58,7 @@ def shorten_protocol_name(proto, simplify=False):
|
||||
'rtmp_ffmpeg': 'rtmp_f',
|
||||
'http_dash_segments': 'dash',
|
||||
'niconico_dmc': 'dmc',
|
||||
'websocket_frag': 'WSfrag',
|
||||
}
|
||||
if simplify:
|
||||
short_protocol_names.update({
|
||||
@@ -63,22 +72,24 @@ def shorten_protocol_name(proto, simplify=False):
|
||||
return short_protocol_names.get(proto, proto)
|
||||
|
||||
|
||||
def get_suitable_downloader(info_dict, params={}, default=HttpFD):
|
||||
def _get_suitable_downloader(info_dict, params, default):
|
||||
"""Get the downloader class that can handle the info dict."""
|
||||
protocol = determine_protocol(info_dict)
|
||||
info_dict['protocol'] = protocol
|
||||
if default is NO_DEFAULT:
|
||||
default = HttpFD
|
||||
|
||||
# if (info_dict.get('start_time') or info_dict.get('end_time')) and not info_dict.get('requested_formats') and FFmpegFD.can_download(info_dict):
|
||||
# return FFmpegFD
|
||||
|
||||
protocol = info_dict['protocol']
|
||||
downloaders = params.get('external_downloader')
|
||||
external_downloader = (
|
||||
downloaders if isinstance(downloaders, compat_str) or downloaders is None
|
||||
else downloaders.get(shorten_protocol_name(protocol, True), downloaders.get('default')))
|
||||
if external_downloader and external_downloader.lower() == 'native':
|
||||
external_downloader = 'native'
|
||||
|
||||
if external_downloader not in (None, 'native'):
|
||||
if external_downloader is None:
|
||||
if info_dict['to_stdout'] and FFmpegFD.can_merge_formats(info_dict, params):
|
||||
return FFmpegFD
|
||||
elif external_downloader.lower() != 'native':
|
||||
ed = get_external_downloader(external_downloader)
|
||||
if ed.can_download(info_dict, external_downloader):
|
||||
return ed
|
||||
@@ -86,9 +97,10 @@ def get_suitable_downloader(info_dict, params={}, default=HttpFD):
|
||||
if protocol in ('m3u8', 'm3u8_native'):
|
||||
if info_dict.get('is_live'):
|
||||
return FFmpegFD
|
||||
elif external_downloader == 'native':
|
||||
elif (external_downloader or '').lower() == 'native':
|
||||
return HlsFD
|
||||
elif _get_real_downloader(info_dict, 'm3u8_frag_urls', params, None):
|
||||
elif get_suitable_downloader(
|
||||
info_dict, params, None, protocol='m3u8_frag_urls', to_stdout=info_dict['to_stdout']):
|
||||
return HlsFD
|
||||
elif params.get('hls_prefer_native') is True:
|
||||
return HlsFD
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
from __future__ import division, unicode_literals
|
||||
|
||||
import copy
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
@@ -32,6 +33,7 @@ class FileDownloader(object):
|
||||
verbose: Print additional info to stdout.
|
||||
quiet: Do not print messages to stdout.
|
||||
ratelimit: Download speed limit, in bytes/sec.
|
||||
throttledratelimit: Assume the download is being throttled below this speed (bytes/sec)
|
||||
retries: Number of times to retry for HTTP error 5xx
|
||||
buffersize: Size of download buffer in bytes.
|
||||
noresizebuffer: Do not automatically resize the download buffer.
|
||||
@@ -45,8 +47,11 @@ class FileDownloader(object):
|
||||
min_filesize: Skip files smaller than this size
|
||||
max_filesize: Skip files larger than this size
|
||||
xattr_set_filesize: Set ytdl.filesize user xattribute with expected size.
|
||||
external_downloader_args: A list of additional command-line arguments for the
|
||||
external downloader.
|
||||
external_downloader_args: A dictionary of downloader keys (in lower case)
|
||||
and a list of additional command-line arguments for the
|
||||
executable. Use 'default' as the name for arguments to be
|
||||
passed to all downloaders. For compatibility with youtube-dl,
|
||||
a single list of args can also be used
|
||||
hls_use_mpegts: Use the mpegts container for HLS videos.
|
||||
http_chunk_size: Size of a chunk for chunk-based HTTP downloading. May be
|
||||
useful for bypassing bandwidth throttling imposed by
|
||||
@@ -318,12 +323,9 @@ class FileDownloader(object):
|
||||
'[download] Got server HTTP error: %s. Retrying (attempt %d of %s) ...'
|
||||
% (error_to_compat_str(err), count, self.format_retries(retries)))
|
||||
|
||||
def report_file_already_downloaded(self, file_name):
|
||||
def report_file_already_downloaded(self, *args, **kwargs):
|
||||
"""Report file has already been fully downloaded."""
|
||||
try:
|
||||
self.to_screen('[download] %s has already been downloaded' % file_name)
|
||||
except UnicodeEncodeError:
|
||||
self.to_screen('[download] The file has already been downloaded')
|
||||
return self.ydl.report_file_already_downloaded(*args, **kwargs)
|
||||
|
||||
def report_unable_to_resume(self):
|
||||
"""Report it was impossible to resume download."""
|
||||
@@ -341,7 +343,7 @@ class FileDownloader(object):
|
||||
"""
|
||||
|
||||
nooverwrites_and_exists = (
|
||||
not self.params.get('overwrites', subtitle)
|
||||
not self.params.get('overwrites', True)
|
||||
and os.path.exists(encodeFilename(filename))
|
||||
)
|
||||
|
||||
@@ -359,7 +361,7 @@ class FileDownloader(object):
|
||||
'filename': filename,
|
||||
'status': 'finished',
|
||||
'total_bytes': os.path.getsize(encodeFilename(filename)),
|
||||
})
|
||||
}, info_dict)
|
||||
return True, False
|
||||
|
||||
if subtitle is False:
|
||||
@@ -387,7 +389,16 @@ class FileDownloader(object):
|
||||
"""Real download process. Redefine in subclasses."""
|
||||
raise NotImplementedError('This method must be implemented by subclasses')
|
||||
|
||||
def _hook_progress(self, status):
|
||||
def _hook_progress(self, status, info_dict):
|
||||
if not self._progress_hooks:
|
||||
return
|
||||
info_dict = dict(info_dict)
|
||||
for key in ('__original_infodict', '__postprocessors'):
|
||||
info_dict.pop(key, None)
|
||||
# youtube-dl passes the same status object to all the hooks.
|
||||
# Some third party scripts seems to be relying on this.
|
||||
# So keep this behavior if possible
|
||||
status['info_dict'] = copy.deepcopy(info_dict)
|
||||
for ph in self._progress_hooks:
|
||||
ph(status)
|
||||
|
||||
|
||||
@@ -1,21 +1,9 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import errno
|
||||
try:
|
||||
import concurrent.futures
|
||||
can_threaded_download = True
|
||||
except ImportError:
|
||||
can_threaded_download = False
|
||||
|
||||
from ..downloader import _get_real_downloader
|
||||
from ..downloader import get_suitable_downloader
|
||||
from .fragment import FragmentFD
|
||||
|
||||
from ..compat import compat_urllib_error
|
||||
from ..utils import (
|
||||
DownloadError,
|
||||
sanitize_open,
|
||||
urljoin,
|
||||
)
|
||||
from ..utils import urljoin
|
||||
|
||||
|
||||
class DashSegmentsFD(FragmentFD):
|
||||
@@ -27,11 +15,15 @@ class DashSegmentsFD(FragmentFD):
|
||||
FD_NAME = 'dashsegments'
|
||||
|
||||
def real_download(self, filename, info_dict):
|
||||
if info_dict.get('is_live'):
|
||||
self.report_error('Live DASH videos are not supported')
|
||||
|
||||
fragment_base_url = info_dict.get('fragment_base_url')
|
||||
fragments = info_dict['fragments'][:1] if self.params.get(
|
||||
'test', False) else info_dict['fragments']
|
||||
|
||||
real_downloader = _get_real_downloader(info_dict, 'dash_frag_urls', self.params, None)
|
||||
real_downloader = get_suitable_downloader(
|
||||
info_dict, self.params, None, protocol='dash_frag_urls', to_stdout=(filename == '-'))
|
||||
|
||||
ctx = {
|
||||
'filename': filename,
|
||||
@@ -41,10 +33,7 @@ class DashSegmentsFD(FragmentFD):
|
||||
if real_downloader:
|
||||
self._prepare_external_frag_download(ctx)
|
||||
else:
|
||||
self._prepare_and_start_frag_download(ctx)
|
||||
|
||||
fragment_retries = self.params.get('fragment_retries', 0)
|
||||
skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
|
||||
self._prepare_and_start_frag_download(ctx, info_dict)
|
||||
|
||||
fragments_to_download = []
|
||||
frag_index = 0
|
||||
@@ -69,123 +58,6 @@ class DashSegmentsFD(FragmentFD):
|
||||
info_copy = info_dict.copy()
|
||||
info_copy['fragments'] = fragments_to_download
|
||||
fd = real_downloader(self.ydl, self.params)
|
||||
# TODO: Make progress updates work without hooking twice
|
||||
# for ph in self._progress_hooks:
|
||||
# fd.add_progress_hook(ph)
|
||||
success = fd.real_download(filename, info_copy)
|
||||
if not success:
|
||||
return False
|
||||
else:
|
||||
def download_fragment(fragment):
|
||||
i = fragment['index']
|
||||
frag_index = fragment['frag_index']
|
||||
fragment_url = fragment['url']
|
||||
return fd.real_download(filename, info_copy)
|
||||
|
||||
ctx['fragment_index'] = frag_index
|
||||
|
||||
# In DASH, the first segment contains necessary headers to
|
||||
# generate a valid MP4 file, so always abort for the first segment
|
||||
fatal = i == 0 or not skip_unavailable_fragments
|
||||
count = 0
|
||||
while count <= fragment_retries:
|
||||
try:
|
||||
success, frag_content = self._download_fragment(ctx, fragment_url, info_dict)
|
||||
if not success:
|
||||
return False, frag_index
|
||||
break
|
||||
except compat_urllib_error.HTTPError as err:
|
||||
# YouTube may often return 404 HTTP error for a fragment causing the
|
||||
# whole download to fail. However if the same fragment is immediately
|
||||
# retried with the same request data this usually succeeds (1-2 attempts
|
||||
# is usually enough) thus allowing to download the whole file successfully.
|
||||
# To be future-proof we will retry all fragments that fail with any
|
||||
# HTTP error.
|
||||
count += 1
|
||||
if count <= fragment_retries:
|
||||
self.report_retry_fragment(err, frag_index, count, fragment_retries)
|
||||
except DownloadError:
|
||||
# Don't retry fragment if error occurred during HTTP downloading
|
||||
# itself since it has own retry settings
|
||||
if not fatal:
|
||||
break
|
||||
raise
|
||||
|
||||
if count > fragment_retries:
|
||||
if not fatal:
|
||||
return False, frag_index
|
||||
ctx['dest_stream'].close()
|
||||
self.report_error('Giving up after %s fragment retries' % fragment_retries)
|
||||
return False, frag_index
|
||||
|
||||
return frag_content, frag_index
|
||||
|
||||
def append_fragment(frag_content, frag_index):
|
||||
fatal = frag_index == 1 or not skip_unavailable_fragments
|
||||
if frag_content:
|
||||
fragment_filename = '%s-Frag%d' % (ctx['tmpfilename'], frag_index)
|
||||
try:
|
||||
file, frag_sanitized = sanitize_open(fragment_filename, 'rb')
|
||||
ctx['fragment_filename_sanitized'] = frag_sanitized
|
||||
file.close()
|
||||
self._append_fragment(ctx, frag_content)
|
||||
return True
|
||||
except EnvironmentError as ose:
|
||||
if ose.errno != errno.ENOENT:
|
||||
raise
|
||||
# FileNotFoundError
|
||||
if not fatal:
|
||||
self.report_skip_fragment(frag_index)
|
||||
return True
|
||||
else:
|
||||
ctx['dest_stream'].close()
|
||||
self.report_error(
|
||||
'fragment %s not found, unable to continue' % frag_index)
|
||||
return False
|
||||
else:
|
||||
if not fatal:
|
||||
self.report_skip_fragment(frag_index)
|
||||
return True
|
||||
else:
|
||||
ctx['dest_stream'].close()
|
||||
self.report_error(
|
||||
'fragment %s not found, unable to continue' % frag_index)
|
||||
return False
|
||||
|
||||
max_workers = self.params.get('concurrent_fragment_downloads', 1)
|
||||
if can_threaded_download and max_workers > 1:
|
||||
self.report_warning('The download speed shown is only of one thread. This is a known issue')
|
||||
_download_fragment = lambda f: (f, download_fragment(f)[1])
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers) as pool:
|
||||
futures = [pool.submit(_download_fragment, fragment) for fragment in fragments_to_download]
|
||||
# timeout must be 0 to return instantly
|
||||
done, not_done = concurrent.futures.wait(futures, timeout=0)
|
||||
try:
|
||||
while not_done:
|
||||
# Check every 1 second for KeyboardInterrupt
|
||||
freshly_done, not_done = concurrent.futures.wait(not_done, timeout=1)
|
||||
done |= freshly_done
|
||||
except KeyboardInterrupt:
|
||||
for future in not_done:
|
||||
future.cancel()
|
||||
# timeout must be none to cancel
|
||||
concurrent.futures.wait(not_done, timeout=None)
|
||||
raise KeyboardInterrupt
|
||||
|
||||
for fragment, frag_index in map(lambda x: x.result(), futures):
|
||||
fragment_filename = '%s-Frag%d' % (ctx['tmpfilename'], frag_index)
|
||||
down, frag_sanitized = sanitize_open(fragment_filename, 'rb')
|
||||
fragment['fragment_filename_sanitized'] = frag_sanitized
|
||||
frag_content = down.read()
|
||||
down.close()
|
||||
result = append_fragment(frag_content, frag_index)
|
||||
if not result:
|
||||
return False
|
||||
else:
|
||||
for fragment in fragments_to_download:
|
||||
frag_content, frag_index = download_fragment(fragment)
|
||||
result = append_fragment(frag_content, frag_index)
|
||||
if not result:
|
||||
return False
|
||||
|
||||
self._finish_frag_download(ctx)
|
||||
return True
|
||||
return self.download_and_append_fragments(ctx, fragments_to_download, info_dict)
|
||||
|
||||
@@ -36,6 +36,7 @@ from ..utils import (
|
||||
|
||||
class ExternalFD(FileDownloader):
|
||||
SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps')
|
||||
can_download_to_stdout = False
|
||||
|
||||
def real_download(self, filename, info_dict):
|
||||
self.report_destination(filename)
|
||||
@@ -67,7 +68,7 @@ class ExternalFD(FileDownloader):
|
||||
'downloaded_bytes': fsize,
|
||||
'total_bytes': fsize,
|
||||
})
|
||||
self._hook_progress(status)
|
||||
self._hook_progress(status, info_dict)
|
||||
return True
|
||||
else:
|
||||
self.to_stderr('\n')
|
||||
@@ -93,7 +94,9 @@ class ExternalFD(FileDownloader):
|
||||
|
||||
@classmethod
|
||||
def supports(cls, info_dict):
|
||||
return info_dict['protocol'] in cls.SUPPORTED_PROTOCOLS
|
||||
return (
|
||||
(cls.can_download_to_stdout or not info_dict.get('to_stdout'))
|
||||
and info_dict['protocol'] in cls.SUPPORTED_PROTOCOLS)
|
||||
|
||||
@classmethod
|
||||
def can_download(cls, info_dict, path=None):
|
||||
@@ -280,6 +283,8 @@ class Aria2cFD(ExternalFD):
|
||||
'--file-allocation=none', '-x16', '-j16', '-s16']
|
||||
if 'fragments' in info_dict:
|
||||
cmd += ['--allow-overwrite=true', '--allow-piece-length-change=true']
|
||||
else:
|
||||
cmd += ['--min-split-size', '1M']
|
||||
|
||||
if info_dict.get('http_headers') is not None:
|
||||
for key, val in info_dict['http_headers'].items():
|
||||
@@ -339,12 +344,27 @@ class HttpieFD(ExternalFD):
|
||||
|
||||
class FFmpegFD(ExternalFD):
|
||||
SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps', 'm3u8', 'm3u8_native', 'rtsp', 'rtmp', 'rtmp_ffmpeg', 'mms')
|
||||
can_download_to_stdout = True
|
||||
|
||||
@classmethod
|
||||
def available(cls, path=None):
|
||||
# TODO: Fix path for ffmpeg
|
||||
# Fixme: This may be wrong when --ffmpeg-location is used
|
||||
return FFmpegPostProcessor().available
|
||||
|
||||
def on_process_started(self, proc, stdin):
|
||||
""" Override this in subclasses """
|
||||
pass
|
||||
|
||||
@classmethod
|
||||
def can_merge_formats(cls, info_dict, params={}):
|
||||
return (
|
||||
info_dict.get('requested_formats')
|
||||
and info_dict.get('protocol')
|
||||
and not params.get('allow_unplayable_formats')
|
||||
and 'no-direct-merge' not in params.get('compat_opts', [])
|
||||
and cls.can_download(info_dict))
|
||||
|
||||
def _call_downloader(self, tmpfilename, info_dict):
|
||||
urls = [f['url'] for f in info_dict.get('requested_formats', [])] or [info_dict['url']]
|
||||
ffpp = FFmpegPostProcessor(downloader=self)
|
||||
@@ -362,6 +382,9 @@ class FFmpegFD(ExternalFD):
|
||||
if not self.params.get('verbose'):
|
||||
args += ['-hide_banner']
|
||||
|
||||
args += info_dict.get('_ffmpeg_args', [])
|
||||
|
||||
# This option exists only for compatibility. Extractors should use `_ffmpeg_args` instead
|
||||
seekable = info_dict.get('_seekable')
|
||||
if seekable is not None:
|
||||
# setting -seekable prevents ffmpeg from guessing if the server
|
||||
@@ -371,8 +394,6 @@ class FFmpegFD(ExternalFD):
|
||||
# http://trac.ffmpeg.org/ticket/6125#comment:10
|
||||
args += ['-seekable', '1' if seekable else '0']
|
||||
|
||||
args += self._configuration_args()
|
||||
|
||||
# start_time = info_dict.get('start_time') or 0
|
||||
# if start_time:
|
||||
# args += ['-ss', compat_str(start_time)]
|
||||
@@ -440,7 +461,8 @@ class FFmpegFD(ExternalFD):
|
||||
|
||||
for url in urls:
|
||||
args += ['-i', url]
|
||||
args += ['-c', 'copy']
|
||||
|
||||
args += self._configuration_args() + ['-c', 'copy']
|
||||
if info_dict.get('requested_formats'):
|
||||
for (i, fmt) in enumerate(info_dict['requested_formats']):
|
||||
if fmt.get('acodec') != 'none':
|
||||
@@ -451,6 +473,7 @@ class FFmpegFD(ExternalFD):
|
||||
if self.params.get('test', False):
|
||||
args += ['-fs', compat_str(self._TEST_FILE_SIZE)]
|
||||
|
||||
ext = info_dict['ext']
|
||||
if protocol in ('m3u8', 'm3u8_native'):
|
||||
use_mpegts = (tmpfilename == '-') or self.params.get('hls_use_mpegts')
|
||||
if use_mpegts is None:
|
||||
@@ -463,8 +486,10 @@ class FFmpegFD(ExternalFD):
|
||||
args += ['-bsf:a', 'aac_adtstoasc']
|
||||
elif protocol == 'rtmp':
|
||||
args += ['-f', 'flv']
|
||||
elif ext == 'mp4' and tmpfilename == '-':
|
||||
args += ['-f', 'mpegts']
|
||||
else:
|
||||
args += ['-f', EXT_TO_OUT_FORMATS.get(info_dict['ext'], info_dict['ext'])]
|
||||
args += ['-f', EXT_TO_OUT_FORMATS.get(ext, ext)]
|
||||
|
||||
args = [encodeArgument(opt) for opt in args]
|
||||
args.append(encodeFilename(ffpp._ffmpeg_filename_argument(tmpfilename), True))
|
||||
@@ -472,6 +497,8 @@ class FFmpegFD(ExternalFD):
|
||||
self._debug_cmd(args)
|
||||
|
||||
proc = subprocess.Popen(args, stdin=subprocess.PIPE, env=env)
|
||||
if url in ('-', 'pipe:'):
|
||||
self.on_process_started(proc, proc.stdin)
|
||||
try:
|
||||
retval = proc.wait()
|
||||
except BaseException as e:
|
||||
@@ -480,7 +507,7 @@ class FFmpegFD(ExternalFD):
|
||||
# produces a file that is playable (this is mostly useful for live
|
||||
# streams). Note that Windows is not affected and produces playable
|
||||
# files (see https://github.com/ytdl-org/youtube-dl/issues/8300).
|
||||
if isinstance(e, KeyboardInterrupt) and sys.platform != 'win32':
|
||||
if isinstance(e, KeyboardInterrupt) and sys.platform != 'win32' and url not in ('-', 'pipe:'):
|
||||
process_communicate_or_kill(proc, b'q')
|
||||
else:
|
||||
proc.kill()
|
||||
|
||||
@@ -380,7 +380,7 @@ class F4mFD(FragmentFD):
|
||||
|
||||
base_url_parsed = compat_urllib_parse_urlparse(base_url)
|
||||
|
||||
self._start_frag_download(ctx)
|
||||
self._start_frag_download(ctx, info_dict)
|
||||
|
||||
frag_index = 0
|
||||
while fragments_list:
|
||||
@@ -434,6 +434,6 @@ class F4mFD(FragmentFD):
|
||||
msg = 'Missed %d fragments' % (fragments_list[0][1] - (frag_i + 1))
|
||||
self.report_warning(msg)
|
||||
|
||||
self._finish_frag_download(ctx)
|
||||
self._finish_frag_download(ctx, info_dict)
|
||||
|
||||
return True
|
||||
|
||||
@@ -4,9 +4,26 @@ import os
|
||||
import time
|
||||
import json
|
||||
|
||||
try:
|
||||
from Crypto.Cipher import AES
|
||||
can_decrypt_frag = True
|
||||
except ImportError:
|
||||
can_decrypt_frag = False
|
||||
|
||||
try:
|
||||
import concurrent.futures
|
||||
can_threaded_download = True
|
||||
except ImportError:
|
||||
can_threaded_download = False
|
||||
|
||||
from .common import FileDownloader
|
||||
from .http import HttpFD
|
||||
from ..compat import (
|
||||
compat_urllib_error,
|
||||
compat_struct_pack,
|
||||
)
|
||||
from ..utils import (
|
||||
DownloadError,
|
||||
error_to_compat_str,
|
||||
encodeFilename,
|
||||
sanitize_open,
|
||||
@@ -56,7 +73,7 @@ class FragmentFD(FileDownloader):
|
||||
|
||||
def report_retry_fragment(self, err, frag_index, count, retries):
|
||||
self.to_screen(
|
||||
'[download] Got server HTTP error: %s. Retrying fragment %d (attempt %d of %s) ...'
|
||||
'\r[download] Got server HTTP error: %s. Retrying fragment %d (attempt %d of %s) ...'
|
||||
% (error_to_compat_str(err), frag_index, count, self.format_retries(retries)))
|
||||
|
||||
def report_skip_fragment(self, frag_index):
|
||||
@@ -66,9 +83,9 @@ class FragmentFD(FileDownloader):
|
||||
headers = info_dict.get('http_headers')
|
||||
return sanitized_Request(url, None, headers) if headers else url
|
||||
|
||||
def _prepare_and_start_frag_download(self, ctx):
|
||||
def _prepare_and_start_frag_download(self, ctx, info_dict):
|
||||
self._prepare_frag_download(ctx)
|
||||
self._start_frag_download(ctx)
|
||||
self._start_frag_download(ctx, info_dict)
|
||||
|
||||
def __do_ytdl_file(self, ctx):
|
||||
return not ctx['live'] and not ctx['tmpfilename'] == '-' and not self.params.get('_no_ytdl_file')
|
||||
@@ -88,17 +105,19 @@ class FragmentFD(FileDownloader):
|
||||
|
||||
def _write_ytdl_file(self, ctx):
|
||||
frag_index_stream, _ = sanitize_open(self.ytdl_filename(ctx['filename']), 'w')
|
||||
downloader = {
|
||||
'current_fragment': {
|
||||
'index': ctx['fragment_index'],
|
||||
},
|
||||
}
|
||||
if 'extra_state' in ctx:
|
||||
downloader['extra_state'] = ctx['extra_state']
|
||||
if ctx.get('fragment_count') is not None:
|
||||
downloader['fragment_count'] = ctx['fragment_count']
|
||||
frag_index_stream.write(json.dumps({'downloader': downloader}))
|
||||
frag_index_stream.close()
|
||||
try:
|
||||
downloader = {
|
||||
'current_fragment': {
|
||||
'index': ctx['fragment_index'],
|
||||
},
|
||||
}
|
||||
if 'extra_state' in ctx:
|
||||
downloader['extra_state'] = ctx['extra_state']
|
||||
if ctx.get('fragment_count') is not None:
|
||||
downloader['fragment_count'] = ctx['fragment_count']
|
||||
frag_index_stream.write(json.dumps({'downloader': downloader}))
|
||||
finally:
|
||||
frag_index_stream.close()
|
||||
|
||||
def _download_fragment(self, ctx, frag_url, info_dict, headers=None, request_data=None):
|
||||
fragment_filename = '%s-Frag%d' % (ctx['tmpfilename'], ctx['fragment_index'])
|
||||
@@ -112,11 +131,15 @@ class FragmentFD(FileDownloader):
|
||||
return False, None
|
||||
if fragment_info_dict.get('filetime'):
|
||||
ctx['fragment_filetime'] = fragment_info_dict.get('filetime')
|
||||
down, frag_sanitized = sanitize_open(fragment_filename, 'rb')
|
||||
ctx['fragment_filename_sanitized'] = fragment_filename
|
||||
return True, self._read_fragment(ctx)
|
||||
|
||||
def _read_fragment(self, ctx):
|
||||
down, frag_sanitized = sanitize_open(ctx['fragment_filename_sanitized'], 'rb')
|
||||
ctx['fragment_filename_sanitized'] = frag_sanitized
|
||||
frag_content = down.read()
|
||||
down.close()
|
||||
return True, frag_content
|
||||
return frag_content
|
||||
|
||||
def _append_fragment(self, ctx, frag_content):
|
||||
try:
|
||||
@@ -198,7 +221,7 @@ class FragmentFD(FileDownloader):
|
||||
'complete_frags_downloaded_bytes': resume_len,
|
||||
})
|
||||
|
||||
def _start_frag_download(self, ctx):
|
||||
def _start_frag_download(self, ctx, info_dict):
|
||||
resume_len = ctx['complete_frags_downloaded_bytes']
|
||||
total_frags = ctx['total_frags']
|
||||
# This dict stores the download progress, it's updated by the progress
|
||||
@@ -227,6 +250,7 @@ class FragmentFD(FileDownloader):
|
||||
time_now = time.time()
|
||||
state['elapsed'] = time_now - start
|
||||
frag_total_bytes = s.get('total_bytes') or 0
|
||||
s['fragment_info_dict'] = s.pop('info_dict', {})
|
||||
if not ctx['live']:
|
||||
estimated_size = (
|
||||
(ctx['complete_frags_downloaded_bytes'] + frag_total_bytes)
|
||||
@@ -249,13 +273,13 @@ class FragmentFD(FileDownloader):
|
||||
state['speed'] = s.get('speed') or ctx.get('speed')
|
||||
ctx['speed'] = state['speed']
|
||||
ctx['prev_frag_downloaded_bytes'] = frag_downloaded_bytes
|
||||
self._hook_progress(state)
|
||||
self._hook_progress(state, info_dict)
|
||||
|
||||
ctx['dl'].add_progress_hook(frag_progress_hook)
|
||||
|
||||
return start
|
||||
|
||||
def _finish_frag_download(self, ctx):
|
||||
def _finish_frag_download(self, ctx, info_dict):
|
||||
ctx['dest_stream'].close()
|
||||
if self.__do_ytdl_file(ctx):
|
||||
ytdl_filename = encodeFilename(self.ytdl_filename(ctx['filename']))
|
||||
@@ -282,7 +306,7 @@ class FragmentFD(FileDownloader):
|
||||
'filename': ctx['filename'],
|
||||
'status': 'finished',
|
||||
'elapsed': elapsed,
|
||||
})
|
||||
}, info_dict)
|
||||
|
||||
def _prepare_external_frag_download(self, ctx):
|
||||
if 'live' not in ctx:
|
||||
@@ -304,3 +328,104 @@ class FragmentFD(FileDownloader):
|
||||
'tmpfilename': tmpfilename,
|
||||
'fragment_index': 0,
|
||||
})
|
||||
|
||||
def download_and_append_fragments(self, ctx, fragments, info_dict, *, pack_func=None, finish_func=None):
|
||||
fragment_retries = self.params.get('fragment_retries', 0)
|
||||
is_fatal = (lambda idx: idx == 0) if self.params.get('skip_unavailable_fragments', True) else (lambda _: True)
|
||||
if not pack_func:
|
||||
pack_func = lambda frag_content, _: frag_content
|
||||
|
||||
def download_fragment(fragment, ctx):
|
||||
frag_index = ctx['fragment_index'] = fragment['frag_index']
|
||||
headers = info_dict.get('http_headers', {})
|
||||
byte_range = fragment.get('byte_range')
|
||||
if byte_range:
|
||||
headers['Range'] = 'bytes=%d-%d' % (byte_range['start'], byte_range['end'] - 1)
|
||||
|
||||
# Never skip the first fragment
|
||||
fatal = is_fatal(fragment.get('index') or (frag_index - 1))
|
||||
count, frag_content = 0, None
|
||||
while count <= fragment_retries:
|
||||
try:
|
||||
success, frag_content = self._download_fragment(ctx, fragment['url'], info_dict, headers)
|
||||
if not success:
|
||||
return False, frag_index
|
||||
break
|
||||
except compat_urllib_error.HTTPError as err:
|
||||
# Unavailable (possibly temporary) fragments may be served.
|
||||
# First we try to retry then either skip or abort.
|
||||
# See https://github.com/ytdl-org/youtube-dl/issues/10165,
|
||||
# https://github.com/ytdl-org/youtube-dl/issues/10448).
|
||||
count += 1
|
||||
if count <= fragment_retries:
|
||||
self.report_retry_fragment(err, frag_index, count, fragment_retries)
|
||||
except DownloadError:
|
||||
# Don't retry fragment if error occurred during HTTP downloading
|
||||
# itself since it has own retry settings
|
||||
if not fatal:
|
||||
break
|
||||
raise
|
||||
|
||||
if count > fragment_retries:
|
||||
if not fatal:
|
||||
return False, frag_index
|
||||
ctx['dest_stream'].close()
|
||||
self.report_error('Giving up after %s fragment retries' % fragment_retries)
|
||||
return False, frag_index
|
||||
return frag_content, frag_index
|
||||
|
||||
def decrypt_fragment(fragment, frag_content):
|
||||
decrypt_info = fragment.get('decrypt_info')
|
||||
if not decrypt_info or decrypt_info['METHOD'] != 'AES-128':
|
||||
return frag_content
|
||||
iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', fragment['media_sequence'])
|
||||
decrypt_info['KEY'] = decrypt_info.get('KEY') or self.ydl.urlopen(
|
||||
self._prepare_url(info_dict, info_dict.get('_decryption_key_url') or decrypt_info['URI'])).read()
|
||||
# Don't decrypt the content in tests since the data is explicitly truncated and it's not to a valid block
|
||||
# size (see https://github.com/ytdl-org/youtube-dl/pull/27660). Tests only care that the correct data downloaded,
|
||||
# not what it decrypts to.
|
||||
if self.params.get('test', False):
|
||||
return frag_content
|
||||
return AES.new(decrypt_info['KEY'], AES.MODE_CBC, iv).decrypt(frag_content)
|
||||
|
||||
def append_fragment(frag_content, frag_index, ctx):
|
||||
if not frag_content:
|
||||
if not is_fatal(frag_index - 1):
|
||||
self.report_skip_fragment(frag_index)
|
||||
return True
|
||||
else:
|
||||
ctx['dest_stream'].close()
|
||||
self.report_error(
|
||||
'fragment %s not found, unable to continue' % frag_index)
|
||||
return False
|
||||
self._append_fragment(ctx, pack_func(frag_content, frag_index))
|
||||
return True
|
||||
|
||||
max_workers = self.params.get('concurrent_fragment_downloads', 1)
|
||||
if can_threaded_download and max_workers > 1:
|
||||
|
||||
def _download_fragment(fragment):
|
||||
ctx_copy = ctx.copy()
|
||||
frag_content, frag_index = download_fragment(fragment, ctx_copy)
|
||||
return fragment, frag_content, frag_index, ctx_copy.get('fragment_filename_sanitized')
|
||||
|
||||
self.report_warning('The download speed shown is only of one thread. This is a known issue and patches are welcome')
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers) as pool:
|
||||
for fragment, frag_content, frag_index, frag_filename in pool.map(_download_fragment, fragments):
|
||||
ctx['fragment_filename_sanitized'] = frag_filename
|
||||
ctx['fragment_index'] = frag_index
|
||||
result = append_fragment(decrypt_fragment(fragment, frag_content), frag_index, ctx)
|
||||
if not result:
|
||||
return False
|
||||
else:
|
||||
for fragment in fragments:
|
||||
frag_content, frag_index = download_fragment(fragment, ctx)
|
||||
result = append_fragment(decrypt_fragment(fragment, frag_content), frag_index, ctx)
|
||||
if not result:
|
||||
return False
|
||||
|
||||
if finish_func is not None:
|
||||
ctx['dest_stream'].write(finish_func())
|
||||
ctx['dest_stream'].flush()
|
||||
self._finish_frag_download(ctx, info_dict)
|
||||
return True
|
||||
|
||||
@@ -1,32 +1,18 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import errno
|
||||
import re
|
||||
import io
|
||||
import binascii
|
||||
try:
|
||||
from Crypto.Cipher import AES
|
||||
can_decrypt_frag = True
|
||||
except ImportError:
|
||||
can_decrypt_frag = False
|
||||
try:
|
||||
import concurrent.futures
|
||||
can_threaded_download = True
|
||||
except ImportError:
|
||||
can_threaded_download = False
|
||||
|
||||
from ..downloader import _get_real_downloader
|
||||
from .fragment import FragmentFD
|
||||
from ..downloader import get_suitable_downloader
|
||||
from .fragment import FragmentFD, can_decrypt_frag
|
||||
from .external import FFmpegFD
|
||||
|
||||
from ..compat import (
|
||||
compat_urllib_error,
|
||||
compat_urlparse,
|
||||
compat_struct_pack,
|
||||
)
|
||||
from ..utils import (
|
||||
parse_m3u8_attributes,
|
||||
sanitize_open,
|
||||
update_url_query,
|
||||
bug_reports_message,
|
||||
)
|
||||
@@ -94,16 +80,14 @@ class HlsFD(FragmentFD):
|
||||
fd = FFmpegFD(self.ydl, self.params)
|
||||
self.report_warning(
|
||||
'%s detected unsupported features; extraction will be delegated to %s' % (self.FD_NAME, fd.get_basename()))
|
||||
# TODO: Make progress updates work without hooking twice
|
||||
# for ph in self._progress_hooks:
|
||||
# fd.add_progress_hook(ph)
|
||||
return fd.real_download(filename, info_dict)
|
||||
|
||||
is_webvtt = info_dict['ext'] == 'vtt'
|
||||
if is_webvtt:
|
||||
real_downloader = None # Packing the fragments is not currently supported for external downloader
|
||||
else:
|
||||
real_downloader = _get_real_downloader(info_dict, 'm3u8_frag_urls', self.params, None)
|
||||
real_downloader = get_suitable_downloader(
|
||||
info_dict, self.params, None, protocol='m3u8_frag_urls', to_stdout=(filename == '-'))
|
||||
if real_downloader and not real_downloader.supports_manifest(s):
|
||||
real_downloader = None
|
||||
if real_downloader:
|
||||
@@ -147,14 +131,10 @@ class HlsFD(FragmentFD):
|
||||
if real_downloader:
|
||||
self._prepare_external_frag_download(ctx)
|
||||
else:
|
||||
self._prepare_and_start_frag_download(ctx)
|
||||
self._prepare_and_start_frag_download(ctx, info_dict)
|
||||
|
||||
extra_state = ctx.setdefault('extra_state', {})
|
||||
|
||||
fragment_retries = self.params.get('fragment_retries', 0)
|
||||
skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
|
||||
test = self.params.get('test', False)
|
||||
|
||||
format_index = info_dict.get('format_index')
|
||||
extra_query = None
|
||||
extra_param_to_segment_url = info_dict.get('extra_param_to_segment_url')
|
||||
@@ -258,7 +238,7 @@ class HlsFD(FragmentFD):
|
||||
media_sequence += 1
|
||||
|
||||
# We only download the first fragment during the test
|
||||
if test:
|
||||
if self.params.get('test', False):
|
||||
fragments = [fragments[0] if fragments else None]
|
||||
|
||||
if real_downloader:
|
||||
@@ -268,195 +248,94 @@ class HlsFD(FragmentFD):
|
||||
# TODO: Make progress updates work without hooking twice
|
||||
# for ph in self._progress_hooks:
|
||||
# fd.add_progress_hook(ph)
|
||||
success = fd.real_download(filename, info_copy)
|
||||
if not success:
|
||||
return False
|
||||
else:
|
||||
def decrypt_fragment(fragment, frag_content):
|
||||
decrypt_info = fragment['decrypt_info']
|
||||
if decrypt_info['METHOD'] != 'AES-128':
|
||||
return frag_content
|
||||
iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', fragment['media_sequence'])
|
||||
decrypt_info['KEY'] = decrypt_info.get('KEY') or self.ydl.urlopen(
|
||||
self._prepare_url(info_dict, info_dict.get('_decryption_key_url') or decrypt_info['URI'])).read()
|
||||
# Don't decrypt the content in tests since the data is explicitly truncated and it's not to a valid block
|
||||
# size (see https://github.com/ytdl-org/youtube-dl/pull/27660). Tests only care that the correct data downloaded,
|
||||
# not what it decrypts to.
|
||||
if test:
|
||||
return frag_content
|
||||
return AES.new(decrypt_info['KEY'], AES.MODE_CBC, iv).decrypt(frag_content)
|
||||
return fd.real_download(filename, info_copy)
|
||||
|
||||
def download_fragment(fragment):
|
||||
frag_index = fragment['frag_index']
|
||||
frag_url = fragment['url']
|
||||
byte_range = fragment['byte_range']
|
||||
if is_webvtt:
|
||||
def pack_fragment(frag_content, frag_index):
|
||||
output = io.StringIO()
|
||||
adjust = 0
|
||||
for block in webvtt.parse_fragment(frag_content):
|
||||
if isinstance(block, webvtt.CueBlock):
|
||||
block.start += adjust
|
||||
block.end += adjust
|
||||
|
||||
ctx['fragment_index'] = frag_index
|
||||
dedup_window = extra_state.setdefault('webvtt_dedup_window', [])
|
||||
|
||||
count = 0
|
||||
headers = info_dict.get('http_headers', {})
|
||||
if byte_range:
|
||||
headers['Range'] = 'bytes=%d-%d' % (byte_range['start'], byte_range['end'] - 1)
|
||||
while count <= fragment_retries:
|
||||
try:
|
||||
success, frag_content = self._download_fragment(
|
||||
ctx, frag_url, info_dict, headers)
|
||||
if not success:
|
||||
return False, frag_index
|
||||
break
|
||||
except compat_urllib_error.HTTPError as err:
|
||||
# Unavailable (possibly temporary) fragments may be served.
|
||||
# First we try to retry then either skip or abort.
|
||||
# See https://github.com/ytdl-org/youtube-dl/issues/10165,
|
||||
# https://github.com/ytdl-org/youtube-dl/issues/10448).
|
||||
count += 1
|
||||
if count <= fragment_retries:
|
||||
self.report_retry_fragment(err, frag_index, count, fragment_retries)
|
||||
if count > fragment_retries:
|
||||
ctx['dest_stream'].close()
|
||||
self.report_error('Giving up after %s fragment retries' % fragment_retries)
|
||||
return False, frag_index
|
||||
ready = []
|
||||
|
||||
return decrypt_fragment(fragment, frag_content), frag_index
|
||||
|
||||
pack_fragment = lambda frag_content, _: frag_content
|
||||
|
||||
if is_webvtt:
|
||||
def pack_fragment(frag_content, frag_index):
|
||||
output = io.StringIO()
|
||||
adjust = 0
|
||||
for block in webvtt.parse_fragment(frag_content):
|
||||
if isinstance(block, webvtt.CueBlock):
|
||||
block.start += adjust
|
||||
block.end += adjust
|
||||
|
||||
dedup_window = extra_state.setdefault('webvtt_dedup_window', [])
|
||||
cue = block.as_json
|
||||
|
||||
# skip the cue if an identical one appears
|
||||
# in the window of potential duplicates
|
||||
# and prune the window of unviable candidates
|
||||
i = 0
|
||||
skip = True
|
||||
while i < len(dedup_window):
|
||||
window_cue = dedup_window[i]
|
||||
if window_cue == cue:
|
||||
break
|
||||
if window_cue['end'] >= cue['start']:
|
||||
i += 1
|
||||
continue
|
||||
del dedup_window[i]
|
||||
else:
|
||||
skip = False
|
||||
|
||||
if skip:
|
||||
i = 0
|
||||
is_new = True
|
||||
while i < len(dedup_window):
|
||||
wcue = dedup_window[i]
|
||||
wblock = webvtt.CueBlock.from_json(wcue)
|
||||
i += 1
|
||||
if wblock.hinges(block):
|
||||
wcue['end'] = block.end
|
||||
is_new = False
|
||||
continue
|
||||
|
||||
# add the cue to the window
|
||||
dedup_window.append(cue)
|
||||
elif isinstance(block, webvtt.Magic):
|
||||
# take care of MPEG PES timestamp overflow
|
||||
if block.mpegts is None:
|
||||
block.mpegts = 0
|
||||
extra_state.setdefault('webvtt_mpegts_adjust', 0)
|
||||
block.mpegts += extra_state['webvtt_mpegts_adjust'] << 33
|
||||
if block.mpegts < extra_state.get('webvtt_mpegts_last', 0):
|
||||
extra_state['webvtt_mpegts_adjust'] += 1
|
||||
block.mpegts += 1 << 33
|
||||
extra_state['webvtt_mpegts_last'] = block.mpegts
|
||||
|
||||
if frag_index == 1:
|
||||
extra_state['webvtt_mpegts'] = block.mpegts or 0
|
||||
extra_state['webvtt_local'] = block.local or 0
|
||||
# XXX: block.local = block.mpegts = None ?
|
||||
else:
|
||||
if block.mpegts is not None and block.local is not None:
|
||||
adjust = (
|
||||
(block.mpegts - extra_state.get('webvtt_mpegts', 0))
|
||||
- (block.local - extra_state.get('webvtt_local', 0))
|
||||
)
|
||||
if wblock == block:
|
||||
is_new = False
|
||||
continue
|
||||
elif isinstance(block, webvtt.HeaderBlock):
|
||||
if frag_index != 1:
|
||||
# XXX: this should probably be silent as well
|
||||
# or verify that all segments contain the same data
|
||||
self.report_warning(bug_reports_message(
|
||||
'Discarding a %s block found in the middle of the stream; '
|
||||
'if the subtitles display incorrectly,'
|
||||
% (type(block).__name__)))
|
||||
if wblock.end > block.start:
|
||||
continue
|
||||
block.write_into(output)
|
||||
ready.append(wblock)
|
||||
i -= 1
|
||||
del dedup_window[i]
|
||||
|
||||
return output.getvalue().encode('utf-8')
|
||||
if is_new:
|
||||
dedup_window.append(block.as_json)
|
||||
for block in ready:
|
||||
block.write_into(output)
|
||||
|
||||
def append_fragment(frag_content, frag_index):
|
||||
fatal = frag_index == 1 or not skip_unavailable_fragments
|
||||
if frag_content:
|
||||
fragment_filename = '%s-Frag%d' % (ctx['tmpfilename'], frag_index)
|
||||
try:
|
||||
file, frag_sanitized = sanitize_open(fragment_filename, 'rb')
|
||||
ctx['fragment_filename_sanitized'] = frag_sanitized
|
||||
file.close()
|
||||
frag_content = pack_fragment(frag_content, frag_index)
|
||||
self._append_fragment(ctx, frag_content)
|
||||
return True
|
||||
except EnvironmentError as ose:
|
||||
if ose.errno != errno.ENOENT:
|
||||
raise
|
||||
# FileNotFoundError
|
||||
if not fatal:
|
||||
self.report_skip_fragment(frag_index)
|
||||
return True
|
||||
# we only emit cues once they fall out of the duplicate window
|
||||
continue
|
||||
elif isinstance(block, webvtt.Magic):
|
||||
# take care of MPEG PES timestamp overflow
|
||||
if block.mpegts is None:
|
||||
block.mpegts = 0
|
||||
extra_state.setdefault('webvtt_mpegts_adjust', 0)
|
||||
block.mpegts += extra_state['webvtt_mpegts_adjust'] << 33
|
||||
if block.mpegts < extra_state.get('webvtt_mpegts_last', 0):
|
||||
extra_state['webvtt_mpegts_adjust'] += 1
|
||||
block.mpegts += 1 << 33
|
||||
extra_state['webvtt_mpegts_last'] = block.mpegts
|
||||
|
||||
if frag_index == 1:
|
||||
extra_state['webvtt_mpegts'] = block.mpegts or 0
|
||||
extra_state['webvtt_local'] = block.local or 0
|
||||
# XXX: block.local = block.mpegts = None ?
|
||||
else:
|
||||
ctx['dest_stream'].close()
|
||||
self.report_error(
|
||||
'fragment %s not found, unable to continue' % frag_index)
|
||||
return False
|
||||
else:
|
||||
if not fatal:
|
||||
self.report_skip_fragment(frag_index)
|
||||
return True
|
||||
else:
|
||||
ctx['dest_stream'].close()
|
||||
self.report_error(
|
||||
'fragment %s not found, unable to continue' % frag_index)
|
||||
return False
|
||||
if block.mpegts is not None and block.local is not None:
|
||||
adjust = (
|
||||
(block.mpegts - extra_state.get('webvtt_mpegts', 0))
|
||||
- (block.local - extra_state.get('webvtt_local', 0))
|
||||
)
|
||||
continue
|
||||
elif isinstance(block, webvtt.HeaderBlock):
|
||||
if frag_index != 1:
|
||||
# XXX: this should probably be silent as well
|
||||
# or verify that all segments contain the same data
|
||||
self.report_warning(bug_reports_message(
|
||||
'Discarding a %s block found in the middle of the stream; '
|
||||
'if the subtitles display incorrectly,'
|
||||
% (type(block).__name__)))
|
||||
continue
|
||||
block.write_into(output)
|
||||
|
||||
max_workers = self.params.get('concurrent_fragment_downloads', 1)
|
||||
if can_threaded_download and max_workers > 1:
|
||||
self.report_warning('The download speed shown is only of one thread. This is a known issue')
|
||||
_download_fragment = lambda f: (f, download_fragment(f)[1])
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers) as pool:
|
||||
futures = [pool.submit(_download_fragment, fragment) for fragment in fragments]
|
||||
# timeout must be 0 to return instantly
|
||||
done, not_done = concurrent.futures.wait(futures, timeout=0)
|
||||
try:
|
||||
while not_done:
|
||||
# Check every 1 second for KeyboardInterrupt
|
||||
freshly_done, not_done = concurrent.futures.wait(not_done, timeout=1)
|
||||
done |= freshly_done
|
||||
except KeyboardInterrupt:
|
||||
for future in not_done:
|
||||
future.cancel()
|
||||
# timeout must be none to cancel
|
||||
concurrent.futures.wait(not_done, timeout=None)
|
||||
raise KeyboardInterrupt
|
||||
return output.getvalue().encode('utf-8')
|
||||
|
||||
for fragment, frag_index in map(lambda x: x.result(), futures):
|
||||
fragment_filename = '%s-Frag%d' % (ctx['tmpfilename'], frag_index)
|
||||
down, frag_sanitized = sanitize_open(fragment_filename, 'rb')
|
||||
fragment['fragment_filename_sanitized'] = frag_sanitized
|
||||
frag_content = down.read()
|
||||
down.close()
|
||||
result = append_fragment(decrypt_fragment(fragment, frag_content), frag_index)
|
||||
if not result:
|
||||
return False
|
||||
else:
|
||||
for fragment in fragments:
|
||||
frag_content, frag_index = download_fragment(fragment)
|
||||
result = append_fragment(frag_content, frag_index)
|
||||
if not result:
|
||||
return False
|
||||
def fin_fragments():
|
||||
dedup_window = extra_state.get('webvtt_dedup_window')
|
||||
if not dedup_window:
|
||||
return b''
|
||||
|
||||
self._finish_frag_download(ctx)
|
||||
return True
|
||||
output = io.StringIO()
|
||||
for cue in dedup_window:
|
||||
webvtt.CueBlock.from_json(cue).write_into(output)
|
||||
|
||||
return output.getvalue().encode('utf-8')
|
||||
|
||||
self.download_and_append_fragments(
|
||||
ctx, fragments, info_dict, pack_func=pack_fragment, finish_func=fin_fragments)
|
||||
else:
|
||||
return self.download_and_append_fragments(ctx, fragments, info_dict)
|
||||
|
||||
@@ -18,6 +18,7 @@ from ..utils import (
|
||||
int_or_none,
|
||||
sanitize_open,
|
||||
sanitized_Request,
|
||||
ThrottledDownload,
|
||||
write_xattr,
|
||||
XAttrMetadataError,
|
||||
XAttrUnavailableError,
|
||||
@@ -176,7 +177,7 @@ class HttpFD(FileDownloader):
|
||||
'status': 'finished',
|
||||
'downloaded_bytes': ctx.resume_len,
|
||||
'total_bytes': ctx.resume_len,
|
||||
})
|
||||
}, info_dict)
|
||||
raise SucceedDownload()
|
||||
else:
|
||||
# The length does not match, we start the download over
|
||||
@@ -223,6 +224,7 @@ class HttpFD(FileDownloader):
|
||||
# measure time over whole while-loop, so slow_down() and best_block_size() work together properly
|
||||
now = None # needed for slow_down() in the first loop run
|
||||
before = start # start measuring
|
||||
throttle_start = None
|
||||
|
||||
def retry(e):
|
||||
to_stdout = ctx.tmpfilename == '-'
|
||||
@@ -308,11 +310,23 @@ class HttpFD(FileDownloader):
|
||||
'eta': eta,
|
||||
'speed': speed,
|
||||
'elapsed': now - ctx.start_time,
|
||||
})
|
||||
}, info_dict)
|
||||
|
||||
if data_len is not None and byte_counter == data_len:
|
||||
break
|
||||
|
||||
if speed and speed < (self.params.get('throttledratelimit') or 0):
|
||||
# The speed must stay below the limit for 3 seconds
|
||||
# This prevents raising error when the speed temporarily goes down
|
||||
if throttle_start is None:
|
||||
throttle_start = now
|
||||
elif now - throttle_start > 3:
|
||||
if ctx.stream is not None and ctx.tmpfilename != '-':
|
||||
ctx.stream.close()
|
||||
raise ThrottledDownload()
|
||||
else:
|
||||
throttle_start = None
|
||||
|
||||
if not is_test and ctx.chunk_size and ctx.data_len is not None and byte_counter < ctx.data_len:
|
||||
ctx.resume_len = byte_counter
|
||||
# ctx.block_size = block_size
|
||||
@@ -343,7 +357,7 @@ class HttpFD(FileDownloader):
|
||||
'filename': ctx.filename,
|
||||
'status': 'finished',
|
||||
'elapsed': time.time() - ctx.start_time,
|
||||
})
|
||||
}, info_dict)
|
||||
|
||||
return True
|
||||
|
||||
|
||||
@@ -246,7 +246,7 @@ class IsmFD(FragmentFD):
|
||||
'total_frags': len(segments),
|
||||
}
|
||||
|
||||
self._prepare_and_start_frag_download(ctx)
|
||||
self._prepare_and_start_frag_download(ctx, info_dict)
|
||||
|
||||
extra_state = ctx.setdefault('extra_state', {
|
||||
'ism_track_written': False,
|
||||
@@ -284,6 +284,6 @@ class IsmFD(FragmentFD):
|
||||
self.report_error('giving up after %s fragment retries' % fragment_retries)
|
||||
return False
|
||||
|
||||
self._finish_frag_download(ctx)
|
||||
self._finish_frag_download(ctx, info_dict)
|
||||
|
||||
return True
|
||||
|
||||
202
yt_dlp/downloader/mhtml.py
Normal file
202
yt_dlp/downloader/mhtml.py
Normal file
@@ -0,0 +1,202 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import io
|
||||
import quopri
|
||||
import re
|
||||
import uuid
|
||||
|
||||
from .fragment import FragmentFD
|
||||
from ..utils import (
|
||||
escapeHTML,
|
||||
formatSeconds,
|
||||
srt_subtitles_timecode,
|
||||
urljoin,
|
||||
)
|
||||
from ..version import __version__ as YT_DLP_VERSION
|
||||
|
||||
|
||||
class MhtmlFD(FragmentFD):
|
||||
FD_NAME = 'mhtml'
|
||||
|
||||
_STYLESHEET = """\
|
||||
html, body {
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
height: 100vh;
|
||||
}
|
||||
|
||||
html {
|
||||
overflow-y: scroll;
|
||||
scroll-snap-type: y mandatory;
|
||||
}
|
||||
|
||||
body {
|
||||
scroll-snap-type: y mandatory;
|
||||
display: flex;
|
||||
flex-flow: column;
|
||||
}
|
||||
|
||||
body > figure {
|
||||
max-width: 100vw;
|
||||
max-height: 100vh;
|
||||
scroll-snap-align: center;
|
||||
}
|
||||
|
||||
body > figure > figcaption {
|
||||
text-align: center;
|
||||
height: 2.5em;
|
||||
}
|
||||
|
||||
body > figure > img {
|
||||
display: block;
|
||||
margin: auto;
|
||||
max-width: 100%;
|
||||
max-height: calc(100vh - 5em);
|
||||
}
|
||||
"""
|
||||
_STYLESHEET = re.sub(r'\s+', ' ', _STYLESHEET)
|
||||
_STYLESHEET = re.sub(r'\B \B|(?<=[\w\-]) (?=[^\w\-])|(?<=[^\w\-]) (?=[\w\-])', '', _STYLESHEET)
|
||||
|
||||
@staticmethod
|
||||
def _escape_mime(s):
|
||||
return '=?utf-8?Q?' + (b''.join(
|
||||
bytes((b,)) if b >= 0x20 else b'=%02X' % b
|
||||
for b in quopri.encodestring(s.encode('utf-8'), header=True)
|
||||
)).decode('us-ascii') + '?='
|
||||
|
||||
def _gen_cid(self, i, fragment, frag_boundary):
|
||||
return '%u.%s@yt-dlp.github.io.invalid' % (i, frag_boundary)
|
||||
|
||||
def _gen_stub(self, *, fragments, frag_boundary, title):
|
||||
output = io.StringIO()
|
||||
|
||||
output.write((
|
||||
'<!DOCTYPE html>'
|
||||
'<html>'
|
||||
'<head>'
|
||||
'' '<meta name="generator" content="yt-dlp {version}">'
|
||||
'' '<title>{title}</title>'
|
||||
'' '<style>{styles}</style>'
|
||||
'<body>'
|
||||
).format(
|
||||
version=escapeHTML(YT_DLP_VERSION),
|
||||
styles=self._STYLESHEET,
|
||||
title=escapeHTML(title)
|
||||
))
|
||||
|
||||
t0 = 0
|
||||
for i, frag in enumerate(fragments):
|
||||
output.write('<figure>')
|
||||
try:
|
||||
t1 = t0 + frag['duration']
|
||||
output.write((
|
||||
'<figcaption>Slide #{num}: {t0} – {t1} (duration: {duration})</figcaption>'
|
||||
).format(
|
||||
num=i + 1,
|
||||
t0=srt_subtitles_timecode(t0),
|
||||
t1=srt_subtitles_timecode(t1),
|
||||
duration=formatSeconds(frag['duration'], msec=True)
|
||||
))
|
||||
except (KeyError, ValueError, TypeError):
|
||||
t1 = None
|
||||
output.write((
|
||||
'<figcaption>Slide #{num}</figcaption>'
|
||||
).format(num=i + 1))
|
||||
output.write('<img src="cid:{cid}">'.format(
|
||||
cid=self._gen_cid(i, frag, frag_boundary)))
|
||||
output.write('</figure>')
|
||||
t0 = t1
|
||||
|
||||
return output.getvalue()
|
||||
|
||||
def real_download(self, filename, info_dict):
|
||||
fragment_base_url = info_dict.get('fragment_base_url')
|
||||
fragments = info_dict['fragments'][:1] if self.params.get(
|
||||
'test', False) else info_dict['fragments']
|
||||
title = info_dict['title']
|
||||
origin = info_dict['webpage_url']
|
||||
|
||||
ctx = {
|
||||
'filename': filename,
|
||||
'total_frags': len(fragments),
|
||||
}
|
||||
|
||||
self._prepare_and_start_frag_download(ctx, info_dict)
|
||||
|
||||
extra_state = ctx.setdefault('extra_state', {
|
||||
'header_written': False,
|
||||
'mime_boundary': str(uuid.uuid4()).replace('-', ''),
|
||||
})
|
||||
|
||||
frag_boundary = extra_state['mime_boundary']
|
||||
|
||||
if not extra_state['header_written']:
|
||||
stub = self._gen_stub(
|
||||
fragments=fragments,
|
||||
frag_boundary=frag_boundary,
|
||||
title=title
|
||||
)
|
||||
|
||||
ctx['dest_stream'].write((
|
||||
'MIME-Version: 1.0\r\n'
|
||||
'From: <nowhere@yt-dlp.github.io.invalid>\r\n'
|
||||
'To: <nowhere@yt-dlp.github.io.invalid>\r\n'
|
||||
'Subject: {title}\r\n'
|
||||
'Content-type: multipart/related; '
|
||||
'' 'boundary="{boundary}"; '
|
||||
'' 'type="text/html"\r\n'
|
||||
'X.yt-dlp.Origin: {origin}\r\n'
|
||||
'\r\n'
|
||||
'--{boundary}\r\n'
|
||||
'Content-Type: text/html; charset=utf-8\r\n'
|
||||
'Content-Length: {length}\r\n'
|
||||
'\r\n'
|
||||
'{stub}\r\n'
|
||||
).format(
|
||||
origin=origin,
|
||||
boundary=frag_boundary,
|
||||
length=len(stub),
|
||||
title=self._escape_mime(title),
|
||||
stub=stub
|
||||
).encode('utf-8'))
|
||||
extra_state['header_written'] = True
|
||||
|
||||
for i, fragment in enumerate(fragments):
|
||||
if (i + 1) <= ctx['fragment_index']:
|
||||
continue
|
||||
|
||||
fragment_url = urljoin(fragment_base_url, fragment['path'])
|
||||
success, frag_content = self._download_fragment(ctx, fragment_url, info_dict)
|
||||
if not success:
|
||||
continue
|
||||
|
||||
mime_type = b'image/jpeg'
|
||||
if frag_content.startswith(b'\x89PNG\r\n\x1a\n'):
|
||||
mime_type = b'image/png'
|
||||
if frag_content.startswith((b'GIF87a', b'GIF89a')):
|
||||
mime_type = b'image/gif'
|
||||
if frag_content.startswith(b'RIFF') and frag_content[8:12] == 'WEBP':
|
||||
mime_type = b'image/webp'
|
||||
|
||||
frag_header = io.BytesIO()
|
||||
frag_header.write(
|
||||
b'--%b\r\n' % frag_boundary.encode('us-ascii'))
|
||||
frag_header.write(
|
||||
b'Content-ID: <%b>\r\n' % self._gen_cid(i, fragment, frag_boundary).encode('us-ascii'))
|
||||
frag_header.write(
|
||||
b'Content-type: %b\r\n' % mime_type)
|
||||
frag_header.write(
|
||||
b'Content-length: %u\r\n' % len(frag_content))
|
||||
frag_header.write(
|
||||
b'Content-location: %b\r\n' % fragment_url.encode('us-ascii'))
|
||||
frag_header.write(
|
||||
b'X.yt-dlp.Duration: %f\r\n' % fragment['duration'])
|
||||
frag_header.write(b'\r\n')
|
||||
self._append_fragment(
|
||||
ctx, frag_header.getvalue() + frag_content + b'\r\n')
|
||||
|
||||
ctx['dest_stream'].write(
|
||||
b'--%b--\r\n\r\n' % frag_boundary.encode('us-ascii'))
|
||||
self._finish_frag_download(ctx, info_dict)
|
||||
return True
|
||||
@@ -4,7 +4,7 @@ from __future__ import unicode_literals
|
||||
import threading
|
||||
|
||||
from .common import FileDownloader
|
||||
from ..downloader import _get_real_downloader
|
||||
from ..downloader import get_suitable_downloader
|
||||
from ..extractor.niconico import NiconicoIE
|
||||
from ..compat import compat_urllib_request
|
||||
|
||||
@@ -20,7 +20,7 @@ class NiconicoDmcFD(FileDownloader):
|
||||
ie = NiconicoIE(self.ydl)
|
||||
info_dict, heartbeat_info_dict = ie._get_heartbeat_info(info_dict)
|
||||
|
||||
fd = _get_real_downloader(info_dict, params=self.params)(self.ydl, self.params)
|
||||
fd = get_suitable_downloader(info_dict, params=self.params)(self.ydl, self.params)
|
||||
|
||||
success = download_complete = False
|
||||
timer = [None]
|
||||
|
||||
@@ -66,7 +66,7 @@ class RtmpFD(FileDownloader):
|
||||
'eta': eta,
|
||||
'elapsed': time_now - start,
|
||||
'speed': speed,
|
||||
})
|
||||
}, info_dict)
|
||||
cursor_in_new_line = False
|
||||
else:
|
||||
# no percent for live streams
|
||||
@@ -82,7 +82,7 @@ class RtmpFD(FileDownloader):
|
||||
'status': 'downloading',
|
||||
'elapsed': time_now - start,
|
||||
'speed': speed,
|
||||
})
|
||||
}, info_dict)
|
||||
cursor_in_new_line = False
|
||||
elif self.params.get('verbose', False):
|
||||
if not cursor_in_new_line:
|
||||
@@ -208,7 +208,7 @@ class RtmpFD(FileDownloader):
|
||||
'filename': filename,
|
||||
'status': 'finished',
|
||||
'elapsed': time.time() - started,
|
||||
})
|
||||
}, info_dict)
|
||||
return True
|
||||
else:
|
||||
self.to_stderr('\n')
|
||||
|
||||
@@ -39,7 +39,7 @@ class RtspFD(FileDownloader):
|
||||
'total_bytes': fsize,
|
||||
'filename': filename,
|
||||
'status': 'finished',
|
||||
})
|
||||
}, info_dict)
|
||||
return True
|
||||
else:
|
||||
self.to_stderr('\n')
|
||||
|
||||
59
yt_dlp/downloader/websocket.py
Normal file
59
yt_dlp/downloader/websocket.py
Normal file
@@ -0,0 +1,59 @@
|
||||
import os
|
||||
import signal
|
||||
import asyncio
|
||||
import threading
|
||||
|
||||
try:
|
||||
import websockets
|
||||
has_websockets = True
|
||||
except ImportError:
|
||||
has_websockets = False
|
||||
|
||||
from .common import FileDownloader
|
||||
from .external import FFmpegFD
|
||||
|
||||
|
||||
class FFmpegSinkFD(FileDownloader):
|
||||
""" A sink to ffmpeg for downloading fragments in any form """
|
||||
|
||||
def real_download(self, filename, info_dict):
|
||||
info_copy = info_dict.copy()
|
||||
info_copy['url'] = '-'
|
||||
|
||||
async def call_conn(proc, stdin):
|
||||
try:
|
||||
await self.real_connection(stdin, info_dict)
|
||||
except (BrokenPipeError, OSError):
|
||||
pass
|
||||
finally:
|
||||
try:
|
||||
stdin.flush()
|
||||
stdin.close()
|
||||
except OSError:
|
||||
pass
|
||||
os.kill(os.getpid(), signal.SIGINT)
|
||||
|
||||
class FFmpegStdinFD(FFmpegFD):
|
||||
@classmethod
|
||||
def get_basename(cls):
|
||||
return FFmpegFD.get_basename()
|
||||
|
||||
def on_process_started(self, proc, stdin):
|
||||
thread = threading.Thread(target=asyncio.run, daemon=True, args=(call_conn(proc, stdin), ))
|
||||
thread.start()
|
||||
|
||||
return FFmpegStdinFD(self.ydl, self.params or {}).download(filename, info_copy)
|
||||
|
||||
async def real_connection(self, sink, info_dict):
|
||||
""" Override this in subclasses """
|
||||
raise NotImplementedError('This method must be implemented by subclasses')
|
||||
|
||||
|
||||
class WebSocketFragmentFD(FFmpegSinkFD):
|
||||
async def real_connection(self, sink, info_dict):
|
||||
async with websockets.connect(info_dict['url'], extra_headers=info_dict.get('http_headers', {})) as ws:
|
||||
while True:
|
||||
recv = await ws.recv()
|
||||
if isinstance(recv, str):
|
||||
recv = recv.encode('utf8')
|
||||
sink.write(recv)
|
||||
@@ -1,20 +1,23 @@
|
||||
from __future__ import division, unicode_literals
|
||||
|
||||
import json
|
||||
import time
|
||||
|
||||
from .fragment import FragmentFD
|
||||
from ..compat import compat_urllib_error
|
||||
from ..utils import (
|
||||
try_get,
|
||||
dict_get,
|
||||
int_or_none,
|
||||
RegexNotFoundError,
|
||||
)
|
||||
from ..extractor.youtube import YoutubeBaseInfoExtractor as YT_BaseIE
|
||||
|
||||
|
||||
class YoutubeLiveChatReplayFD(FragmentFD):
|
||||
""" Downloads YouTube live chat replays fragment by fragment """
|
||||
class YoutubeLiveChatFD(FragmentFD):
|
||||
""" Downloads YouTube live chats fragment by fragment """
|
||||
|
||||
FD_NAME = 'youtube_live_chat_replay'
|
||||
FD_NAME = 'youtube_live_chat'
|
||||
|
||||
def real_download(self, filename, info_dict):
|
||||
video_id = info_dict['video_id']
|
||||
@@ -31,6 +34,8 @@ class YoutubeLiveChatReplayFD(FragmentFD):
|
||||
|
||||
ie = YT_BaseIE(self.ydl)
|
||||
|
||||
start_time = int(time.time() * 1000)
|
||||
|
||||
def dl_fragment(url, data=None, headers=None):
|
||||
http_headers = info_dict.get('http_headers', {})
|
||||
if headers:
|
||||
@@ -38,15 +43,80 @@ class YoutubeLiveChatReplayFD(FragmentFD):
|
||||
http_headers.update(headers)
|
||||
return self._download_fragment(ctx, url, info_dict, http_headers, data)
|
||||
|
||||
def download_and_parse_fragment(url, frag_index, request_data):
|
||||
def parse_actions_replay(live_chat_continuation):
|
||||
offset = continuation_id = click_tracking_params = None
|
||||
processed_fragment = bytearray()
|
||||
for action in live_chat_continuation.get('actions', []):
|
||||
if 'replayChatItemAction' in action:
|
||||
replay_chat_item_action = action['replayChatItemAction']
|
||||
offset = int(replay_chat_item_action['videoOffsetTimeMsec'])
|
||||
processed_fragment.extend(
|
||||
json.dumps(action, ensure_ascii=False).encode('utf-8') + b'\n')
|
||||
if offset is not None:
|
||||
continuation = try_get(
|
||||
live_chat_continuation,
|
||||
lambda x: x['continuations'][0]['liveChatReplayContinuationData'], dict)
|
||||
if continuation:
|
||||
continuation_id = continuation.get('continuation')
|
||||
click_tracking_params = continuation.get('clickTrackingParams')
|
||||
self._append_fragment(ctx, processed_fragment)
|
||||
return continuation_id, offset, click_tracking_params
|
||||
|
||||
def try_refresh_replay_beginning(live_chat_continuation):
|
||||
# choose the second option that contains the unfiltered live chat replay
|
||||
refresh_continuation = try_get(
|
||||
live_chat_continuation,
|
||||
lambda x: x['header']['liveChatHeaderRenderer']['viewSelector']['sortFilterSubMenuRenderer']['subMenuItems'][1]['continuation']['reloadContinuationData'], dict)
|
||||
if refresh_continuation:
|
||||
# no data yet but required to call _append_fragment
|
||||
self._append_fragment(ctx, b'')
|
||||
refresh_continuation_id = refresh_continuation.get('continuation')
|
||||
offset = 0
|
||||
click_tracking_params = refresh_continuation.get('trackingParams')
|
||||
return refresh_continuation_id, offset, click_tracking_params
|
||||
return parse_actions_replay(live_chat_continuation)
|
||||
|
||||
live_offset = 0
|
||||
|
||||
def parse_actions_live(live_chat_continuation):
|
||||
nonlocal live_offset
|
||||
continuation_id = click_tracking_params = None
|
||||
processed_fragment = bytearray()
|
||||
for action in live_chat_continuation.get('actions', []):
|
||||
timestamp = self.parse_live_timestamp(action)
|
||||
if timestamp is not None:
|
||||
live_offset = timestamp - start_time
|
||||
# compatibility with replay format
|
||||
pseudo_action = {
|
||||
'replayChatItemAction': {'actions': [action]},
|
||||
'videoOffsetTimeMsec': str(live_offset),
|
||||
'isLive': True,
|
||||
}
|
||||
processed_fragment.extend(
|
||||
json.dumps(pseudo_action, ensure_ascii=False).encode('utf-8') + b'\n')
|
||||
continuation_data_getters = [
|
||||
lambda x: x['continuations'][0]['invalidationContinuationData'],
|
||||
lambda x: x['continuations'][0]['timedContinuationData'],
|
||||
]
|
||||
continuation_data = try_get(live_chat_continuation, continuation_data_getters, dict)
|
||||
if continuation_data:
|
||||
continuation_id = continuation_data.get('continuation')
|
||||
click_tracking_params = continuation_data.get('clickTrackingParams')
|
||||
timeout_ms = int_or_none(continuation_data.get('timeoutMs'))
|
||||
if timeout_ms is not None:
|
||||
time.sleep(timeout_ms / 1000)
|
||||
self._append_fragment(ctx, processed_fragment)
|
||||
return continuation_id, live_offset, click_tracking_params
|
||||
|
||||
def download_and_parse_fragment(url, frag_index, request_data=None, headers=None):
|
||||
count = 0
|
||||
while count <= fragment_retries:
|
||||
try:
|
||||
success, raw_fragment = dl_fragment(url, request_data, {'content-type': 'application/json'})
|
||||
success, raw_fragment = dl_fragment(url, request_data, headers)
|
||||
if not success:
|
||||
return False, None, None
|
||||
return False, None, None, None
|
||||
try:
|
||||
data = ie._extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace'))
|
||||
data = ie.extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace'))
|
||||
except RegexNotFoundError:
|
||||
data = None
|
||||
if not data:
|
||||
@@ -54,36 +124,29 @@ class YoutubeLiveChatReplayFD(FragmentFD):
|
||||
live_chat_continuation = try_get(
|
||||
data,
|
||||
lambda x: x['continuationContents']['liveChatContinuation'], dict) or {}
|
||||
offset = continuation_id = None
|
||||
processed_fragment = bytearray()
|
||||
for action in live_chat_continuation.get('actions', []):
|
||||
if 'replayChatItemAction' in action:
|
||||
replay_chat_item_action = action['replayChatItemAction']
|
||||
offset = int(replay_chat_item_action['videoOffsetTimeMsec'])
|
||||
processed_fragment.extend(
|
||||
json.dumps(action, ensure_ascii=False).encode('utf-8') + b'\n')
|
||||
if offset is not None:
|
||||
continuation_id = try_get(
|
||||
live_chat_continuation,
|
||||
lambda x: x['continuations'][0]['liveChatReplayContinuationData']['continuation'])
|
||||
self._append_fragment(ctx, processed_fragment)
|
||||
|
||||
return True, continuation_id, offset
|
||||
if info_dict['protocol'] == 'youtube_live_chat_replay':
|
||||
if frag_index == 1:
|
||||
continuation_id, offset, click_tracking_params = try_refresh_replay_beginning(live_chat_continuation)
|
||||
else:
|
||||
continuation_id, offset, click_tracking_params = parse_actions_replay(live_chat_continuation)
|
||||
elif info_dict['protocol'] == 'youtube_live_chat':
|
||||
continuation_id, offset, click_tracking_params = parse_actions_live(live_chat_continuation)
|
||||
return True, continuation_id, offset, click_tracking_params
|
||||
except compat_urllib_error.HTTPError as err:
|
||||
count += 1
|
||||
if count <= fragment_retries:
|
||||
self.report_retry_fragment(err, frag_index, count, fragment_retries)
|
||||
if count > fragment_retries:
|
||||
self.report_error('giving up after %s fragment retries' % fragment_retries)
|
||||
return False, None, None
|
||||
return False, None, None, None
|
||||
|
||||
self._prepare_and_start_frag_download(ctx)
|
||||
self._prepare_and_start_frag_download(ctx, info_dict)
|
||||
|
||||
success, raw_fragment = dl_fragment(info_dict['url'])
|
||||
if not success:
|
||||
return False
|
||||
try:
|
||||
data = ie._extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace'))
|
||||
data = ie.extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace'))
|
||||
except RegexNotFoundError:
|
||||
return False
|
||||
continuation_id = try_get(
|
||||
@@ -92,7 +155,7 @@ class YoutubeLiveChatReplayFD(FragmentFD):
|
||||
# no data yet but required to call _append_fragment
|
||||
self._append_fragment(ctx, b'')
|
||||
|
||||
ytcfg = ie._extract_ytcfg(video_id, raw_fragment.decode('utf-8', 'replace'))
|
||||
ytcfg = ie.extract_ytcfg(video_id, raw_fragment.decode('utf-8', 'replace'))
|
||||
|
||||
if not ytcfg:
|
||||
return False
|
||||
@@ -100,9 +163,16 @@ class YoutubeLiveChatReplayFD(FragmentFD):
|
||||
innertube_context = try_get(ytcfg, lambda x: x['INNERTUBE_CONTEXT'])
|
||||
if not api_key or not innertube_context:
|
||||
return False
|
||||
url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat_replay?key=' + api_key
|
||||
visitor_data = try_get(innertube_context, lambda x: x['client']['visitorData'], str)
|
||||
if info_dict['protocol'] == 'youtube_live_chat_replay':
|
||||
url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat_replay?key=' + api_key
|
||||
chat_page_url = 'https://www.youtube.com/live_chat_replay?continuation=' + continuation_id
|
||||
elif info_dict['protocol'] == 'youtube_live_chat':
|
||||
url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat?key=' + api_key
|
||||
chat_page_url = 'https://www.youtube.com/live_chat?continuation=' + continuation_id
|
||||
|
||||
frag_index = offset = 0
|
||||
click_tracking_params = None
|
||||
while continuation_id is not None:
|
||||
frag_index += 1
|
||||
request_data = {
|
||||
@@ -111,12 +181,56 @@ class YoutubeLiveChatReplayFD(FragmentFD):
|
||||
}
|
||||
if frag_index > 1:
|
||||
request_data['currentPlayerState'] = {'playerOffsetMs': str(max(offset - 5000, 0))}
|
||||
success, continuation_id, offset = download_and_parse_fragment(
|
||||
url, frag_index, json.dumps(request_data, ensure_ascii=False).encode('utf-8') + b'\n')
|
||||
if click_tracking_params:
|
||||
request_data['context']['clickTracking'] = {'clickTrackingParams': click_tracking_params}
|
||||
headers = ie.generate_api_headers(ytcfg, visitor_data=visitor_data)
|
||||
headers.update({'content-type': 'application/json'})
|
||||
fragment_request_data = json.dumps(request_data, ensure_ascii=False).encode('utf-8') + b'\n'
|
||||
success, continuation_id, offset, click_tracking_params = download_and_parse_fragment(
|
||||
url, frag_index, fragment_request_data, headers)
|
||||
else:
|
||||
success, continuation_id, offset, click_tracking_params = download_and_parse_fragment(
|
||||
chat_page_url, frag_index)
|
||||
if not success:
|
||||
return False
|
||||
if test:
|
||||
break
|
||||
|
||||
self._finish_frag_download(ctx)
|
||||
self._finish_frag_download(ctx, info_dict)
|
||||
return True
|
||||
|
||||
@staticmethod
|
||||
def parse_live_timestamp(action):
|
||||
action_content = dict_get(
|
||||
action,
|
||||
['addChatItemAction', 'addLiveChatTickerItemAction', 'addBannerToLiveChatCommand'])
|
||||
if not isinstance(action_content, dict):
|
||||
return None
|
||||
item = dict_get(action_content, ['item', 'bannerRenderer'])
|
||||
if not isinstance(item, dict):
|
||||
return None
|
||||
renderer = dict_get(item, [
|
||||
# text
|
||||
'liveChatTextMessageRenderer', 'liveChatPaidMessageRenderer',
|
||||
'liveChatMembershipItemRenderer', 'liveChatPaidStickerRenderer',
|
||||
# ticker
|
||||
'liveChatTickerPaidMessageItemRenderer',
|
||||
'liveChatTickerSponsorItemRenderer',
|
||||
# banner
|
||||
'liveChatBannerRenderer',
|
||||
])
|
||||
if not isinstance(renderer, dict):
|
||||
return None
|
||||
parent_item_getters = [
|
||||
lambda x: x['showItemEndpoint']['showLiveChatItemEndpoint']['renderer'],
|
||||
lambda x: x['contents'],
|
||||
]
|
||||
parent_item = try_get(renderer, parent_item_getters, dict)
|
||||
if parent_item:
|
||||
renderer = dict_get(parent_item, [
|
||||
'liveChatTextMessageRenderer', 'liveChatPaidMessageRenderer',
|
||||
'liveChatMembershipItemRenderer', 'liveChatPaidStickerRenderer',
|
||||
])
|
||||
if not isinstance(renderer, dict):
|
||||
return None
|
||||
return int_or_none(renderer.get('timestampUsec'), 1000)
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
import xml.etree.ElementTree as etree
|
||||
@@ -61,6 +62,11 @@ MSO_INFO = {
|
||||
'username_field': 'IDToken1',
|
||||
'password_field': 'IDToken2',
|
||||
},
|
||||
'Spectrum': {
|
||||
'name': 'Spectrum',
|
||||
'username_field': 'IDToken1',
|
||||
'password_field': 'IDToken2',
|
||||
},
|
||||
'Philo': {
|
||||
'name': 'Philo',
|
||||
'username_field': 'ident'
|
||||
@@ -70,6 +76,11 @@ MSO_INFO = {
|
||||
'username_field': 'IDToken1',
|
||||
'password_field': 'IDToken2',
|
||||
},
|
||||
'Cablevision': {
|
||||
'name': 'Optimum/Cablevision',
|
||||
'username_field': 'j_username',
|
||||
'password_field': 'j_password',
|
||||
},
|
||||
'thr030': {
|
||||
'name': '3 Rivers Communications'
|
||||
},
|
||||
@@ -1324,6 +1335,11 @@ MSO_INFO = {
|
||||
'cou060': {
|
||||
'name': 'Zito Media'
|
||||
},
|
||||
'slingtv': {
|
||||
'name': 'Sling TV',
|
||||
'username_field': 'username',
|
||||
'password_field': 'password',
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
@@ -1524,6 +1540,75 @@ class AdobePassIE(InfoExtractor):
|
||||
}), headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded'
|
||||
})
|
||||
elif mso_id == 'Spectrum':
|
||||
# Spectrum's login for is dynamically loaded via JS so we need to hardcode the flow
|
||||
# as a one-off implementation.
|
||||
provider_redirect_page, urlh = provider_redirect_page_res
|
||||
provider_login_page_res = post_form(
|
||||
provider_redirect_page_res, self._DOWNLOADING_LOGIN_PAGE)
|
||||
saml_login_page, urlh = provider_login_page_res
|
||||
relay_state = self._search_regex(
|
||||
r'RelayState\s*=\s*"(?P<relay>.+?)";',
|
||||
saml_login_page, 'RelayState', group='relay')
|
||||
saml_request = self._search_regex(
|
||||
r'SAMLRequest\s*=\s*"(?P<saml_request>.+?)";',
|
||||
saml_login_page, 'SAMLRequest', group='saml_request')
|
||||
login_json = {
|
||||
mso_info['username_field']: username,
|
||||
mso_info['password_field']: password,
|
||||
'RelayState': relay_state,
|
||||
'SAMLRequest': saml_request,
|
||||
}
|
||||
saml_response_json = self._download_json(
|
||||
'https://tveauthn.spectrum.net/tveauthentication/api/v1/manualAuth', video_id,
|
||||
'Downloading SAML Response',
|
||||
data=json.dumps(login_json).encode(),
|
||||
headers={
|
||||
'Content-Type': 'application/json',
|
||||
'Accept': 'application/json',
|
||||
})
|
||||
self._download_webpage(
|
||||
saml_response_json['SAMLRedirectUri'], video_id,
|
||||
'Confirming Login', data=urlencode_postdata({
|
||||
'SAMLResponse': saml_response_json['SAMLResponse'],
|
||||
'RelayState': relay_state,
|
||||
}), headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded'
|
||||
})
|
||||
elif mso_id == 'slingtv':
|
||||
# SlingTV has a meta-refresh based authentication, but also
|
||||
# looks at the tab history to count the number of times the
|
||||
# browser has been on a page
|
||||
|
||||
first_bookend_page, urlh = provider_redirect_page_res
|
||||
|
||||
hidden_data = self._hidden_inputs(first_bookend_page)
|
||||
hidden_data['history'] = 1
|
||||
|
||||
provider_login_page_res = self._download_webpage_handle(
|
||||
urlh.geturl(), video_id, 'Sending first bookend',
|
||||
query=hidden_data)
|
||||
|
||||
provider_association_redirect, urlh = post_form(
|
||||
provider_login_page_res, 'Logging in', {
|
||||
mso_info['username_field']: username,
|
||||
mso_info['password_field']: password
|
||||
})
|
||||
|
||||
provider_refresh_redirect_url = extract_redirect_url(
|
||||
provider_association_redirect, url=urlh.geturl())
|
||||
|
||||
last_bookend_page, urlh = self._download_webpage_handle(
|
||||
provider_refresh_redirect_url, video_id,
|
||||
'Downloading Auth Association Redirect Page')
|
||||
hidden_data = self._hidden_inputs(last_bookend_page)
|
||||
hidden_data['history'] = 3
|
||||
|
||||
mvpd_confirm_page_res = self._download_webpage_handle(
|
||||
urlh.geturl(), video_id, 'Sending final bookend',
|
||||
query=hidden_data)
|
||||
|
||||
post_form(mvpd_confirm_page_res, 'Confirming Login')
|
||||
else:
|
||||
# Some providers (e.g. DIRECTV NOW) have another meta refresh
|
||||
# based redirect that should be followed.
|
||||
@@ -1536,10 +1621,13 @@ class AdobePassIE(InfoExtractor):
|
||||
'Downloading Provider Redirect Page (meta refresh)')
|
||||
provider_login_page_res = post_form(
|
||||
provider_redirect_page_res, self._DOWNLOADING_LOGIN_PAGE)
|
||||
mvpd_confirm_page_res = post_form(provider_login_page_res, 'Logging in', {
|
||||
form_data = {
|
||||
mso_info.get('username_field', 'username'): username,
|
||||
mso_info.get('password_field', 'password'): password,
|
||||
})
|
||||
mso_info.get('password_field', 'password'): password
|
||||
}
|
||||
if mso_id == 'Cablevision':
|
||||
form_data['_eventId_proceed'] = ''
|
||||
mvpd_confirm_page_res = post_form(provider_login_page_res, 'Logging in', form_data)
|
||||
if mso_id != 'Rogers':
|
||||
post_form(mvpd_confirm_page_res, 'Confirming Login')
|
||||
|
||||
|
||||
@@ -20,8 +20,8 @@ class AENetworksBaseIE(ThePlatformIE):
|
||||
(?:history(?:vault)?|aetv|mylifetime|lifetimemovieclub)\.com|
|
||||
fyi\.tv
|
||||
)/'''
|
||||
_THEPLATFORM_KEY = 'crazyjava'
|
||||
_THEPLATFORM_SECRET = 's3cr3t'
|
||||
_THEPLATFORM_KEY = '43jXaGRQud'
|
||||
_THEPLATFORM_SECRET = 'S10BPXHMlb'
|
||||
_DOMAIN_MAP = {
|
||||
'history.com': ('HISTORY', 'history'),
|
||||
'aetv.com': ('AETV', 'aetv'),
|
||||
|
||||
@@ -9,10 +9,10 @@ from ..utils import (
|
||||
|
||||
|
||||
class AppleConnectIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://itunes\.apple\.com/\w{0,2}/?post/idsa\.(?P<id>[\w-]+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://itunes\.apple\.com/\w{0,2}/?post/(?:id)?sa\.(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://itunes.apple.com/us/post/idsa.4ab17a39-2720-11e5-96c5-a5b38f6c42d3',
|
||||
'md5': 'e7c38568a01ea45402570e6029206723',
|
||||
'md5': 'c1d41f72c8bcaf222e089434619316e4',
|
||||
'info_dict': {
|
||||
'id': '4ab17a39-2720-11e5-96c5-a5b38f6c42d3',
|
||||
'ext': 'm4v',
|
||||
@@ -22,7 +22,10 @@ class AppleConnectIE(InfoExtractor):
|
||||
'upload_date': '20150710',
|
||||
'timestamp': 1436545535,
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'https://itunes.apple.com/us/post/sa.0fe0229f-2457-11e5-9f40-1bb645f2d5d9',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
@@ -36,7 +39,7 @@ class AppleConnectIE(InfoExtractor):
|
||||
|
||||
video_data = self._parse_json(video_json, video_id)
|
||||
timestamp = str_to_int(self._html_search_regex(r'data-timestamp="(\d+)"', webpage, 'timestamp'))
|
||||
like_count = str_to_int(self._html_search_regex(r'(\d+) Loves', webpage, 'like count'))
|
||||
like_count = str_to_int(self._html_search_regex(r'(\d+) Loves', webpage, 'like count', default=None))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
||||
@@ -212,7 +212,7 @@ class BandcampIE(InfoExtractor):
|
||||
|
||||
class BandcampAlbumIE(BandcampIE):
|
||||
IE_NAME = 'Bandcamp:album'
|
||||
_VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<id>[^/?#&]+))?'
|
||||
_VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?!/music)(?:/album/(?P<id>[^/?#&]+))?'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
|
||||
@@ -389,3 +389,43 @@ class BandcampWeeklyIE(BandcampIE):
|
||||
'episode_id': show_id,
|
||||
'formats': formats
|
||||
}
|
||||
|
||||
|
||||
class BandcampMusicIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?P<id>[^/]+)\.bandcamp\.com/music'
|
||||
_TESTS = [{
|
||||
'url': 'https://steviasphere.bandcamp.com/music',
|
||||
'playlist_mincount': 47,
|
||||
'info_dict': {
|
||||
'id': 'steviasphere',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://coldworldofficial.bandcamp.com/music',
|
||||
'playlist_mincount': 10,
|
||||
'info_dict': {
|
||||
'id': 'coldworldofficial',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://nuclearwarnowproductions.bandcamp.com/music',
|
||||
'playlist_mincount': 399,
|
||||
'info_dict': {
|
||||
'id': 'nuclearwarnowproductions',
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
_TYPE_IE_DICT = {
|
||||
'album': BandcampAlbumIE.ie_key(),
|
||||
'track': BandcampIE.ie_key()
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, id)
|
||||
items = re.findall(r'href\=\"\/(?P<path>(?P<type>album|track)+/[^\"]+)', webpage)
|
||||
entries = [
|
||||
self.url_result(
|
||||
f'https://{id}.bandcamp.com/{item[0]}',
|
||||
ie=self._TYPE_IE_DICT[item[1]])
|
||||
for item in items]
|
||||
return self.playlist_result(entries, id)
|
||||
|
||||
@@ -37,7 +37,7 @@ class BiliBiliIE(InfoExtractor):
|
||||
video/[aA][vV]|
|
||||
anime/(?P<anime_id>\d+)/play\#
|
||||
)(?P<id>\d+)|
|
||||
video/[bB][vV](?P<id_bv>[^/?#&]+)
|
||||
(s/)?video/[bB][vV](?P<id_bv>[^/?#&]+)
|
||||
)
|
||||
(?:/?\?p=(?P<page>\d+))?
|
||||
'''
|
||||
@@ -281,7 +281,7 @@ class BiliBiliIE(InfoExtractor):
|
||||
webpage)
|
||||
if uploader_mobj:
|
||||
info.update({
|
||||
'uploader': uploader_mobj.group('name'),
|
||||
'uploader': uploader_mobj.group('name').strip(),
|
||||
'uploader_id': uploader_mobj.group('id'),
|
||||
})
|
||||
|
||||
|
||||
68
yt_dlp/extractor/blackboardcollaborate.py
Normal file
68
yt_dlp/extractor/blackboardcollaborate.py
Normal file
@@ -0,0 +1,68 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import parse_iso8601
|
||||
|
||||
|
||||
class BlackboardCollaborateIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?P<region>[a-z-]+)\.bbcollab\.com/
|
||||
(?:
|
||||
collab/ui/session/playback/load|
|
||||
recording
|
||||
)/
|
||||
(?P<id>[^/]+)'''
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://us-lti.bbcollab.com/collab/ui/session/playback/load/0a633b6a88824deb8c918f470b22b256',
|
||||
'md5': 'bb7a055682ee4f25fdb5838cdf014541',
|
||||
'info_dict': {
|
||||
'id': '0a633b6a88824deb8c918f470b22b256',
|
||||
'title': 'HESI A2 Information Session - Thursday, May 6, 2021 - recording_1',
|
||||
'ext': 'mp4',
|
||||
'duration': 1896000,
|
||||
'timestamp': 1620331399,
|
||||
'upload_date': '20210506',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'https://us.bbcollab.com/collab/ui/session/playback/load/76761522adfe4345a0dee6794bbcabda',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'https://ca.bbcollab.com/collab/ui/session/playback/load/b6399dcb44df4f21b29ebe581e22479d',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'https://eu.bbcollab.com/recording/51ed7b50810c4444a106e48cefb3e6b5',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'https://au.bbcollab.com/collab/ui/session/playback/load/2bccf7165d7c419ab87afc1ec3f3bb15',
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
region = mobj.group('region')
|
||||
video_id = mobj.group('id')
|
||||
info = self._download_json(
|
||||
'https://{}.bbcollab.com/collab/api/csa/recordings/{}/data'.format(region, video_id), video_id)
|
||||
duration = info.get('duration')
|
||||
title = info['name']
|
||||
upload_date = info.get('created')
|
||||
streams = info['streams']
|
||||
formats = [{'format_id': k, 'url': url} for k, url in streams.items()]
|
||||
|
||||
return {
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
'id': video_id,
|
||||
'timestamp': parse_iso8601(upload_date),
|
||||
'title': title,
|
||||
}
|
||||
@@ -8,6 +8,9 @@ from ..utils import (
|
||||
smuggle_url,
|
||||
update_url_query,
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
try_get,
|
||||
dict_get,
|
||||
)
|
||||
|
||||
|
||||
@@ -24,6 +27,11 @@ class BravoTVIE(AdobePassIE):
|
||||
'uploader': 'NBCU-BRAV',
|
||||
'upload_date': '20190314',
|
||||
'timestamp': 1552591860,
|
||||
'season_number': 16,
|
||||
'episode_number': 15,
|
||||
'series': 'Top Chef',
|
||||
'episode': 'The Top Chef Season 16 Winner Is...',
|
||||
'duration': 190.0,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.bravotv.com/below-deck/season-3/ep-14-reunion-part-1',
|
||||
@@ -79,12 +87,34 @@ class BravoTVIE(AdobePassIE):
|
||||
'episode_number': int_or_none(metadata.get('episode_num')),
|
||||
})
|
||||
query['switch'] = 'progressive'
|
||||
|
||||
tp_url = 'http://link.theplatform.com/s/%s/%s' % (account_pid, tp_path)
|
||||
|
||||
tp_metadata = self._download_json(
|
||||
update_url_query(tp_url, {'format': 'preview'}),
|
||||
display_id, fatal=False)
|
||||
if tp_metadata:
|
||||
info.update({
|
||||
'title': tp_metadata.get('title'),
|
||||
'description': tp_metadata.get('description'),
|
||||
'duration': float_or_none(tp_metadata.get('duration'), 1000),
|
||||
'season_number': int_or_none(
|
||||
dict_get(tp_metadata, ('pl1$seasonNumber', 'nbcu$seasonNumber'))),
|
||||
'episode_number': int_or_none(
|
||||
dict_get(tp_metadata, ('pl1$episodeNumber', 'nbcu$episodeNumber'))),
|
||||
# For some reason the series is sometimes wrapped into a single element array.
|
||||
'series': try_get(
|
||||
dict_get(tp_metadata, ('pl1$show', 'nbcu$show')),
|
||||
lambda x: x[0] if isinstance(x, list) else x,
|
||||
expected_type=str),
|
||||
'episode': dict_get(
|
||||
tp_metadata, ('pl1$episodeName', 'nbcu$episodeName', 'title')),
|
||||
})
|
||||
|
||||
info.update({
|
||||
'_type': 'url_transparent',
|
||||
'id': release_pid,
|
||||
'url': smuggle_url(update_url_query(
|
||||
'http://link.theplatform.com/s/%s/%s' % (account_pid, tp_path),
|
||||
query), {'force_smil_url': True}),
|
||||
'url': smuggle_url(update_url_query(tp_url, query), {'force_smil_url': True}),
|
||||
'ie_key': 'ThePlatform',
|
||||
})
|
||||
return info
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .gigya import GigyaBaseIE
|
||||
@@ -17,6 +16,7 @@ from ..utils import (
|
||||
str_or_none,
|
||||
strip_or_none,
|
||||
url_or_none,
|
||||
urlencode_postdata
|
||||
)
|
||||
|
||||
|
||||
@@ -24,7 +24,7 @@ class CanvasIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://mediazone\.vrt\.be/api/v1/(?P<site_id>canvas|een|ketnet|vrt(?:video|nieuws)|sporza|dako)/assets/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://mediazone.vrt.be/api/v1/ketnet/assets/md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
|
||||
'md5': '68993eda72ef62386a15ea2cf3c93107',
|
||||
'md5': '37b2b7bb9b3dcaa05b67058dc3a714a9',
|
||||
'info_dict': {
|
||||
'id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
|
||||
'display_id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
|
||||
@@ -32,9 +32,9 @@ class CanvasIE(InfoExtractor):
|
||||
'title': 'Nachtwacht: De Greystook',
|
||||
'description': 'Nachtwacht: De Greystook',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 1468.04,
|
||||
'duration': 1468.02,
|
||||
},
|
||||
'expected_warnings': ['is not a supported codec', 'Unknown MIME type'],
|
||||
'expected_warnings': ['is not a supported codec'],
|
||||
}, {
|
||||
'url': 'https://mediazone.vrt.be/api/v1/canvas/assets/mz-ast-5e5f90b6-2d72-4c40-82c2-e134f884e93e',
|
||||
'only_matching': True,
|
||||
@@ -265,7 +265,7 @@ class VrtNUIE(GigyaBaseIE):
|
||||
'expected_warnings': ['Unable to download asset JSON', 'is not a supported codec', 'Unknown MIME type'],
|
||||
}]
|
||||
_NETRC_MACHINE = 'vrtnu'
|
||||
_APIKEY = '3_0Z2HujMtiWq_pkAjgnS2Md2E11a1AwZjYiBETtwNE-EoEHDINgtnvcAOpNgmrVGy'
|
||||
_APIKEY = '3_qhEcPa5JGFROVwu5SWKqJ4mVOIkwlFNMSKwzPDAh8QZOtHqu6L4nD5Q7lk0eXOOG'
|
||||
_CONTEXT_ID = 'R3595707040'
|
||||
|
||||
def _real_initialize(self):
|
||||
@@ -276,35 +276,38 @@ class VrtNUIE(GigyaBaseIE):
|
||||
if username is None:
|
||||
return
|
||||
|
||||
auth_data = {
|
||||
'APIKey': self._APIKEY,
|
||||
'targetEnv': 'jssdk',
|
||||
'loginID': username,
|
||||
'password': password,
|
||||
'authMode': 'cookie',
|
||||
}
|
||||
|
||||
auth_info = self._gigya_login(auth_data)
|
||||
auth_info = self._download_json(
|
||||
'https://accounts.vrt.be/accounts.login', None,
|
||||
note='Login data', errnote='Could not get Login data',
|
||||
headers={}, data=urlencode_postdata({
|
||||
'loginID': username,
|
||||
'password': password,
|
||||
'sessionExpiration': '-2',
|
||||
'APIKey': self._APIKEY,
|
||||
'targetEnv': 'jssdk',
|
||||
}))
|
||||
|
||||
# Sometimes authentication fails for no good reason, retry
|
||||
login_attempt = 1
|
||||
while login_attempt <= 3:
|
||||
try:
|
||||
# When requesting a token, no actual token is returned, but the
|
||||
# necessary cookies are set.
|
||||
self._request_webpage('https://token.vrt.be/vrtnuinitlogin',
|
||||
None, note='Requesting XSRF Token', errnote='Could not get XSRF Token',
|
||||
query={'provider': 'site', 'destination': 'https://www.vrt.be/vrtnu/'})
|
||||
|
||||
post_data = {
|
||||
'UID': auth_info['UID'],
|
||||
'UIDSignature': auth_info['UIDSignature'],
|
||||
'signatureTimestamp': auth_info['signatureTimestamp'],
|
||||
'client_id': 'vrtnu-site',
|
||||
'_csrf': self._get_cookies('https://login.vrt.be').get('OIDCXSRF').value,
|
||||
}
|
||||
|
||||
self._request_webpage(
|
||||
'https://token.vrt.be',
|
||||
'https://login.vrt.be/perform_login',
|
||||
None, note='Requesting a token', errnote='Could not get a token',
|
||||
headers={
|
||||
'Content-Type': 'application/json',
|
||||
'Referer': 'https://www.vrt.be/vrtnu/',
|
||||
},
|
||||
data=json.dumps({
|
||||
'uid': auth_info['UID'],
|
||||
'uidsig': auth_info['UIDSignature'],
|
||||
'ts': auth_info['signatureTimestamp'],
|
||||
'email': auth_info['profile']['email'],
|
||||
}).encode('utf-8'))
|
||||
headers={}, data=urlencode_postdata(post_data))
|
||||
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
|
||||
login_attempt += 1
|
||||
|
||||
@@ -8,6 +8,7 @@ from ..utils import (
|
||||
xpath_element,
|
||||
xpath_text,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -25,16 +26,62 @@ class CBSBaseIE(ThePlatformFeedIE):
|
||||
})
|
||||
return subtitles
|
||||
|
||||
def _extract_common_video_info(self, content_id, asset_types, mpx_acc, extra_info):
|
||||
tp_path = 'dJ5BDC/media/guid/%d/%s' % (mpx_acc, content_id)
|
||||
tp_release_url = f'https://link.theplatform.com/s/{tp_path}'
|
||||
info = self._extract_theplatform_metadata(tp_path, content_id)
|
||||
|
||||
formats, subtitles = [], {}
|
||||
last_e = None
|
||||
for asset_type, query in asset_types.items():
|
||||
try:
|
||||
tp_formats, tp_subtitles = self._extract_theplatform_smil(
|
||||
update_url_query(tp_release_url, query), content_id,
|
||||
'Downloading %s SMIL data' % asset_type)
|
||||
except ExtractorError as e:
|
||||
last_e = e
|
||||
if asset_type != 'fallback':
|
||||
continue
|
||||
query['formats'] = '' # blank query to check if expired
|
||||
try:
|
||||
tp_formats, tp_subtitles = self._extract_theplatform_smil(
|
||||
update_url_query(tp_release_url, query), content_id,
|
||||
'Downloading %s SMIL data, trying again with another format' % asset_type)
|
||||
except ExtractorError as e:
|
||||
last_e = e
|
||||
continue
|
||||
formats.extend(tp_formats)
|
||||
subtitles = self._merge_subtitles(subtitles, tp_subtitles)
|
||||
if last_e and not formats:
|
||||
self.raise_no_formats(last_e, True, content_id)
|
||||
self._sort_formats(formats)
|
||||
|
||||
extra_info.update({
|
||||
'id': content_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
})
|
||||
info.update({k: v for k, v in extra_info.items() if v is not None})
|
||||
return info
|
||||
|
||||
def _extract_video_info(self, *args, **kwargs):
|
||||
# Extract assets + metadata and call _extract_common_video_info
|
||||
raise NotImplementedError('This method must be implemented by subclasses')
|
||||
|
||||
def _real_extract(self, url):
|
||||
return self._extract_video_info(self._match_id(url))
|
||||
|
||||
|
||||
class CBSIE(CBSBaseIE):
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:
|
||||
cbs:|
|
||||
https?://(?:www\.)?(?:
|
||||
(?:cbs|paramountplus)\.com/(?:shows/[^/]+/video|movies/[^/]+)/|
|
||||
cbs\.com/(?:shows/[^/]+/video|movies/[^/]+)/|
|
||||
colbertlateshow\.com/(?:video|podcasts)/)
|
||||
)(?P<id>[\w-]+)'''
|
||||
|
||||
# All tests are blocked outside US
|
||||
_TESTS = [{
|
||||
'url': 'https://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/',
|
||||
'info_dict': {
|
||||
@@ -51,19 +98,28 @@ class CBSIE(CBSBaseIE):
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'_skip': 'Blocked outside the US',
|
||||
}, {
|
||||
'url': 'https://www.cbs.com/shows/the-late-show-with-stephen-colbert/video/60icOhMb9NcjbcWnF_gub9XXHdeBcNk2/the-late-show-6-23-21-christine-baranski-joy-oladokun-',
|
||||
'info_dict': {
|
||||
'id': '60icOhMb9NcjbcWnF_gub9XXHdeBcNk2',
|
||||
'title': 'The Late Show - 6/23/21 (Christine Baranski, Joy Oladokun)',
|
||||
'timestamp': 1624507140,
|
||||
'description': 'md5:e01af24e95c74d55e8775aef86117b95',
|
||||
'uploader': 'CBSI-NEW',
|
||||
'upload_date': '20210624',
|
||||
},
|
||||
'params': {
|
||||
'ignore_no_formats_error': True,
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': [
|
||||
'This content expired on', 'No video formats found', 'Requested format is not available'],
|
||||
}, {
|
||||
'url': 'http://colbertlateshow.com/video/8GmB0oY0McANFvp2aEffk9jZZZ2YyXxy/the-colbeard/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.colbertlateshow.com/podcasts/dYSwjqPs_X1tvbV_P2FcPWRa_qT6akTC/in-the-bad-room-with-stephen/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.paramountplus.com/shows/all-rise/video/QmR1WhNkh1a_IrdHZrbcRklm176X_rVc/all-rise-space/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.paramountplus.com/movies/million-dollar-american-princesses-meghan-and-harry/C0LpgNwXYeB8txxycdWdR9TjxpJOsdCq',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _extract_video_info(self, content_id, site='cbs', mpx_acc=2198311517):
|
||||
@@ -72,53 +128,34 @@ class CBSIE(CBSBaseIE):
|
||||
content_id, query={'partner': site, 'contentId': content_id})
|
||||
video_data = xpath_element(items_data, './/item')
|
||||
title = xpath_text(video_data, 'videoTitle', 'title') or xpath_text(video_data, 'videotitle', 'title')
|
||||
tp_path = 'dJ5BDC/media/guid/%d/%s' % (mpx_acc, content_id)
|
||||
tp_release_url = 'https://link.theplatform.com/s/' + tp_path
|
||||
|
||||
asset_types = []
|
||||
subtitles = {}
|
||||
formats = []
|
||||
last_e = None
|
||||
asset_types = {}
|
||||
for item in items_data.findall('.//item'):
|
||||
asset_type = xpath_text(item, 'assetType')
|
||||
if not asset_type or asset_type in asset_types or 'HLS_FPS' in asset_type or 'DASH_CENC' in asset_type:
|
||||
continue
|
||||
asset_types.append(asset_type)
|
||||
query = {
|
||||
'mbr': 'true',
|
||||
'assetTypes': asset_type,
|
||||
}
|
||||
if asset_type.startswith('HLS') or asset_type in ('OnceURL', 'StreamPack'):
|
||||
if not asset_type:
|
||||
# fallback for content_ids that videoPlayerService doesn't return anything for
|
||||
asset_type = 'fallback'
|
||||
query['formats'] = 'M3U+none,MPEG4,M3U+appleHlsEncryption,MP3'
|
||||
del query['assetTypes']
|
||||
if asset_type in asset_types:
|
||||
continue
|
||||
elif any(excluded in asset_type for excluded in ('HLS_FPS', 'DASH_CENC', 'OnceURL')):
|
||||
continue
|
||||
if asset_type.startswith('HLS') or 'StreamPack' in asset_type:
|
||||
query['formats'] = 'MPEG4,M3U'
|
||||
elif asset_type in ('RTMP', 'WIFI', '3G'):
|
||||
query['formats'] = 'MPEG4,FLV'
|
||||
try:
|
||||
tp_formats, tp_subtitles = self._extract_theplatform_smil(
|
||||
update_url_query(tp_release_url, query), content_id,
|
||||
'Downloading %s SMIL data' % asset_type)
|
||||
except ExtractorError as e:
|
||||
last_e = e
|
||||
continue
|
||||
formats.extend(tp_formats)
|
||||
subtitles = self._merge_subtitles(subtitles, tp_subtitles)
|
||||
if last_e and not formats:
|
||||
raise last_e
|
||||
self._sort_formats(formats)
|
||||
asset_types[asset_type] = query
|
||||
|
||||
info = self._extract_theplatform_metadata(tp_path, content_id)
|
||||
info.update({
|
||||
'id': content_id,
|
||||
return self._extract_common_video_info(content_id, asset_types, mpx_acc, extra_info={
|
||||
'title': title,
|
||||
'series': xpath_text(video_data, 'seriesTitle'),
|
||||
'season_number': int_or_none(xpath_text(video_data, 'seasonNumber')),
|
||||
'episode_number': int_or_none(xpath_text(video_data, 'episodeNumber')),
|
||||
'duration': int_or_none(xpath_text(video_data, 'videoLength'), 1000),
|
||||
'thumbnail': xpath_text(video_data, 'previewImageURL'),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'thumbnail': url_or_none(xpath_text(video_data, 'previewImageURL')),
|
||||
})
|
||||
return info
|
||||
|
||||
def _real_extract(self, url):
|
||||
content_id = self._match_id(url)
|
||||
return self._extract_video_info(content_id)
|
||||
|
||||
@@ -19,7 +19,6 @@ from ..compat import (
|
||||
compat_etree_Element,
|
||||
compat_etree_fromstring,
|
||||
compat_getpass,
|
||||
compat_integer_types,
|
||||
compat_http_client,
|
||||
compat_os_name,
|
||||
compat_str,
|
||||
@@ -36,7 +35,6 @@ from ..downloader.f4m import (
|
||||
remove_encrypted_media,
|
||||
)
|
||||
from ..utils import (
|
||||
NO_DEFAULT,
|
||||
age_restricted,
|
||||
base_url,
|
||||
bug_reports_message,
|
||||
@@ -46,10 +44,11 @@ from ..utils import (
|
||||
determine_protocol,
|
||||
dict_get,
|
||||
error_to_compat_str,
|
||||
ExtractorError,
|
||||
extract_attributes,
|
||||
ExtractorError,
|
||||
fix_xml_ampersands,
|
||||
float_or_none,
|
||||
format_field,
|
||||
GeoRestrictedError,
|
||||
GeoUtils,
|
||||
int_or_none,
|
||||
@@ -57,6 +56,7 @@ from ..utils import (
|
||||
JSON_LD_RE,
|
||||
mimetype2ext,
|
||||
network_exceptions,
|
||||
NO_DEFAULT,
|
||||
orderedSet,
|
||||
parse_bitrate,
|
||||
parse_codecs,
|
||||
@@ -65,19 +65,21 @@ from ..utils import (
|
||||
parse_m3u8_attributes,
|
||||
parse_resolution,
|
||||
RegexNotFoundError,
|
||||
sanitized_Request,
|
||||
sanitize_filename,
|
||||
sanitized_Request,
|
||||
str_or_none,
|
||||
str_to_int,
|
||||
strip_or_none,
|
||||
traverse_obj,
|
||||
unescapeHTML,
|
||||
unified_strdate,
|
||||
unified_timestamp,
|
||||
update_Request,
|
||||
update_url_query,
|
||||
urljoin,
|
||||
url_basename,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
variadic,
|
||||
xpath_element,
|
||||
xpath_text,
|
||||
xpath_with_ns,
|
||||
@@ -228,6 +230,7 @@ class InfoExtractor(object):
|
||||
* "resolution" (optional, string "{width}x{height}",
|
||||
deprecated)
|
||||
* "filesize" (optional, int)
|
||||
* "_test_url" (optional, bool) - If true, test the URL
|
||||
thumbnail: Full URL to a video thumbnail image.
|
||||
description: Full video description.
|
||||
uploader: Full name of the video uploader.
|
||||
@@ -290,10 +293,13 @@ class InfoExtractor(object):
|
||||
categories: A list of categories that the video falls in, for example
|
||||
["Sports", "Berlin"]
|
||||
tags: A list of tags assigned to the video, e.g. ["sweden", "pop music"]
|
||||
cast: A list of the video cast
|
||||
is_live: True, False, or None (=unknown). Whether this video is a
|
||||
live stream that goes on instead of a fixed-length video.
|
||||
was_live: True, False, or None (=unknown). Whether this video was
|
||||
originally a live stream.
|
||||
live_status: 'is_live', 'is_upcoming', 'was_live', 'not_live' or None (=unknown)
|
||||
If absent, automatically set from is_live, was_live
|
||||
start_time: Time in seconds where the reproduction should start, as
|
||||
specified in the URL.
|
||||
end_time: Time in seconds where the reproduction should end, as
|
||||
@@ -437,6 +443,7 @@ class InfoExtractor(object):
|
||||
"""Constructor. Receives an optional downloader."""
|
||||
self._ready = False
|
||||
self._x_forwarded_for_ip = None
|
||||
self._printed_messages = set()
|
||||
self.set_downloader(downloader)
|
||||
|
||||
@classmethod
|
||||
@@ -465,6 +472,7 @@ class InfoExtractor(object):
|
||||
|
||||
def initialize(self):
|
||||
"""Initializes an instance (authentication, etc)."""
|
||||
self._printed_messages = set()
|
||||
self._initialize_geo_bypass({
|
||||
'countries': self._GEO_COUNTRIES,
|
||||
'ip_blocks': self._GEO_IP_BLOCKS,
|
||||
@@ -626,14 +634,10 @@ class InfoExtractor(object):
|
||||
assert isinstance(err, compat_urllib_error.HTTPError)
|
||||
if expected_status is None:
|
||||
return False
|
||||
if isinstance(expected_status, compat_integer_types):
|
||||
return err.code == expected_status
|
||||
elif isinstance(expected_status, (list, tuple)):
|
||||
return err.code in expected_status
|
||||
elif callable(expected_status):
|
||||
return expected_status(err.code) is True
|
||||
else:
|
||||
assert False
|
||||
return err.code in variadic(expected_status)
|
||||
|
||||
def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, data=None, headers={}, query={}, expected_status=None):
|
||||
"""
|
||||
@@ -998,10 +1002,14 @@ class InfoExtractor(object):
|
||||
expected_status=expected_status)
|
||||
return res if res is False else res[0]
|
||||
|
||||
def report_warning(self, msg, video_id=None, *args, **kwargs):
|
||||
idstr = '' if video_id is None else '%s: ' % video_id
|
||||
self._downloader.report_warning(
|
||||
'[%s] %s%s' % (self.IE_NAME, idstr, msg), *args, **kwargs)
|
||||
def report_warning(self, msg, video_id=None, *args, only_once=False, **kwargs):
|
||||
idstr = format_field(video_id, template='%s: ')
|
||||
msg = f'[{self.IE_NAME}] {idstr}{msg}'
|
||||
if only_once:
|
||||
if f'WARNING: {msg}' in self._printed_messages:
|
||||
return
|
||||
self._printed_messages.add(f'WARNING: {msg}')
|
||||
self._downloader.report_warning(msg, *args, **kwargs)
|
||||
|
||||
def to_screen(self, msg, *args, **kwargs):
|
||||
"""Print msg to screen, prefixing it with '[ie_name]'"""
|
||||
@@ -1036,7 +1044,9 @@ class InfoExtractor(object):
|
||||
metadata_available=False, method='any'):
|
||||
if metadata_available and self.get_param('ignore_no_formats_error'):
|
||||
self.report_warning(msg)
|
||||
raise ExtractorError('%s. %s' % (msg, self._LOGIN_HINTS[method]), expected=True)
|
||||
if method is not None:
|
||||
msg = '%s. %s' % (msg, self._LOGIN_HINTS[method])
|
||||
raise ExtractorError(msg, expected=True)
|
||||
|
||||
def raise_geo_restricted(
|
||||
self, msg='This video is not available from your location due to geo restriction',
|
||||
@@ -1049,6 +1059,8 @@ class InfoExtractor(object):
|
||||
def raise_no_formats(self, msg, expected=False, video_id=None):
|
||||
if expected and self.get_param('ignore_no_formats_error'):
|
||||
self.report_warning(msg, video_id)
|
||||
elif isinstance(msg, ExtractorError):
|
||||
raise msg
|
||||
else:
|
||||
raise ExtractorError(msg, expected=expected, video_id=video_id)
|
||||
|
||||
@@ -1111,6 +1123,8 @@ class InfoExtractor(object):
|
||||
if group is None:
|
||||
# return the first matching group
|
||||
return next(g for g in mobj.groups() if g is not None)
|
||||
elif isinstance(group, (list, tuple)):
|
||||
return tuple(mobj.group(g) for g in group)
|
||||
else:
|
||||
return mobj.group(group)
|
||||
elif default is not NO_DEFAULT:
|
||||
@@ -1203,8 +1217,7 @@ class InfoExtractor(object):
|
||||
[^>]+?content=(["\'])(?P<content>.*?)\2''' % re.escape(prop)
|
||||
|
||||
def _og_search_property(self, prop, html, name=None, **kargs):
|
||||
if not isinstance(prop, (list, tuple)):
|
||||
prop = [prop]
|
||||
prop = variadic(prop)
|
||||
if name is None:
|
||||
name = 'OpenGraph %s' % prop[0]
|
||||
og_regexes = []
|
||||
@@ -1234,8 +1247,7 @@ class InfoExtractor(object):
|
||||
return self._og_search_property('url', html, **kargs)
|
||||
|
||||
def _html_search_meta(self, name, html, display_name=None, fatal=False, **kwargs):
|
||||
if not isinstance(name, (list, tuple)):
|
||||
name = [name]
|
||||
name = variadic(name)
|
||||
if display_name is None:
|
||||
display_name = name[0]
|
||||
return self._html_search_regex(
|
||||
@@ -1295,7 +1307,7 @@ class InfoExtractor(object):
|
||||
# JSON-LD may be malformed and thus `fatal` should be respected.
|
||||
# At the same time `default` may be passed that assumes `fatal=False`
|
||||
# for _search_regex. Let's simulate the same behavior here as well.
|
||||
fatal = kwargs.get('fatal', True) if default == NO_DEFAULT else False
|
||||
fatal = kwargs.get('fatal', True) if default is NO_DEFAULT else False
|
||||
json_ld = []
|
||||
for mobj in json_ld_list:
|
||||
json_ld_item = self._parse_json(
|
||||
@@ -1473,7 +1485,7 @@ class InfoExtractor(object):
|
||||
class FormatSort:
|
||||
regex = r' *((?P<reverse>\+)?(?P<field>[a-zA-Z0-9_]+)((?P<separator>[~:])(?P<limit>.*?))?)? *$'
|
||||
|
||||
default = ('hidden', 'hasvid', 'ie_pref', 'lang', 'quality',
|
||||
default = ('hidden', 'aud_or_vid', 'hasvid', 'ie_pref', 'lang', 'quality',
|
||||
'res', 'fps', 'codec:vp9.2', 'size', 'br', 'asr',
|
||||
'proto', 'ext', 'hasaud', 'source', 'format_id') # These must not be aliases
|
||||
ytdl_default = ('hasaud', 'quality', 'tbr', 'filesize', 'vbr',
|
||||
@@ -1486,7 +1498,7 @@ class InfoExtractor(object):
|
||||
'acodec': {'type': 'ordered', 'regex': True,
|
||||
'order': ['opus', 'vorbis', 'aac', 'mp?4a?', 'mp3', 'e?a?c-?3', 'dts', '', None, 'none']},
|
||||
'proto': {'type': 'ordered', 'regex': True, 'field': 'protocol',
|
||||
'order': ['(ht|f)tps', '(ht|f)tp$', 'm3u8.+', 'm3u8', '.*dash', '', 'mms|rtsp', 'none', 'f4']},
|
||||
'order': ['(ht|f)tps', '(ht|f)tp$', 'm3u8.+', '.*dash', 'ws|websocket', '', 'mms|rtsp', 'none', 'f4']},
|
||||
'vext': {'type': 'ordered', 'field': 'video_ext',
|
||||
'order': ('mp4', 'webm', 'flv', '', 'none'),
|
||||
'order_free': ('webm', 'mp4', 'flv', '', 'none')},
|
||||
@@ -1494,6 +1506,9 @@ class InfoExtractor(object):
|
||||
'order': ('m4a', 'aac', 'mp3', 'ogg', 'opus', 'webm', '', 'none'),
|
||||
'order_free': ('opus', 'ogg', 'webm', 'm4a', 'mp3', 'aac', '', 'none')},
|
||||
'hidden': {'visible': False, 'forced': True, 'type': 'extractor', 'max': -1000},
|
||||
'aud_or_vid': {'visible': False, 'forced': True, 'type': 'multiple',
|
||||
'field': ('vcodec', 'acodec'),
|
||||
'function': lambda it: int(any(v != 'none' for v in it))},
|
||||
'ie_pref': {'priority': True, 'type': 'extractor'},
|
||||
'hasvid': {'priority': True, 'field': 'vcodec', 'type': 'boolean', 'not_in_list': ('none',)},
|
||||
'hasaud': {'field': 'acodec', 'type': 'boolean', 'not_in_list': ('none',)},
|
||||
@@ -1515,7 +1530,8 @@ class InfoExtractor(object):
|
||||
'br': {'type': 'combined', 'field': ('tbr', 'vbr', 'abr'), 'same_limit': True},
|
||||
'size': {'type': 'combined', 'same_limit': True, 'field': ('filesize', 'fs_approx')},
|
||||
'ext': {'type': 'combined', 'field': ('vext', 'aext')},
|
||||
'res': {'type': 'multiple', 'field': ('height', 'width'), 'function': min},
|
||||
'res': {'type': 'multiple', 'field': ('height', 'width'),
|
||||
'function': lambda it: (lambda l: min(l) if l else 0)(tuple(filter(None, it)))},
|
||||
|
||||
# Most of these exist only for compatibility reasons
|
||||
'dimension': {'type': 'alias', 'field': 'res'},
|
||||
@@ -1559,7 +1575,7 @@ class InfoExtractor(object):
|
||||
elif key == 'convert':
|
||||
default = 'order' if type == 'ordered' else 'float_string' if field else 'ignore'
|
||||
else:
|
||||
default = {'type': 'field', 'visible': True, 'order': [], 'not_in_list': (None,), 'function': max}.get(key, None)
|
||||
default = {'type': 'field', 'visible': True, 'order': [], 'not_in_list': (None,)}.get(key, None)
|
||||
propObj[key] = default
|
||||
return propObj[key]
|
||||
|
||||
@@ -1699,13 +1715,7 @@ class InfoExtractor(object):
|
||||
type = 'field' # Only 'field' is allowed in multiple for now
|
||||
actual_fields = self._get_field_setting(field, 'field')
|
||||
|
||||
def wrapped_function(values):
|
||||
values = tuple(filter(lambda x: x is not None, values))
|
||||
return (self._get_field_setting(field, 'function')(*values) if len(values) > 1
|
||||
else values[0] if values
|
||||
else None)
|
||||
|
||||
value = wrapped_function((get_value(f) for f in actual_fields))
|
||||
value = self._get_field_setting(field, 'function')(get_value(f) for f in actual_fields)
|
||||
else:
|
||||
value = get_value(field)
|
||||
return self._calculate_field_preference_from_value(format, field, type, value)
|
||||
@@ -1719,7 +1729,7 @@ class InfoExtractor(object):
|
||||
if not format.get('ext') and 'url' in format:
|
||||
format['ext'] = determine_ext(format['url'])
|
||||
if format.get('vcodec') == 'none':
|
||||
format['audio_ext'] = format['ext']
|
||||
format['audio_ext'] = format['ext'] if format.get('acodec') != 'none' else 'none'
|
||||
format['video_ext'] = 'none'
|
||||
else:
|
||||
format['video_ext'] = format['ext']
|
||||
@@ -1944,7 +1954,7 @@ class InfoExtractor(object):
|
||||
self.report_warning(bug_reports_message(
|
||||
"Ignoring subtitle tracks found in the HLS manifest; "
|
||||
"if any subtitle tracks are missing,"
|
||||
))
|
||||
), only_once=True)
|
||||
return fmts
|
||||
|
||||
def _extract_m3u8_formats_and_subtitles(
|
||||
@@ -1976,24 +1986,33 @@ class InfoExtractor(object):
|
||||
preference=None, quality=None, m3u8_id=None, live=False, note=None,
|
||||
errnote=None, fatal=True, data=None, headers={}, query={},
|
||||
video_id=None):
|
||||
formats, subtitles = [], {}
|
||||
|
||||
if '#EXT-X-FAXS-CM:' in m3u8_doc: # Adobe Flash Access
|
||||
return [], {}
|
||||
return formats, subtitles
|
||||
|
||||
if (not self.get_param('allow_unplayable_formats')
|
||||
and re.search(r'#EXT-X-SESSION-KEY:.*?URI="skd://', m3u8_doc)): # Apple FairPlay
|
||||
return [], {}
|
||||
return formats, subtitles
|
||||
|
||||
formats = []
|
||||
def format_url(url):
|
||||
return url if re.match(r'^https?://', url) else compat_urlparse.urljoin(m3u8_url, url)
|
||||
|
||||
subtitles = {}
|
||||
if self.get_param('hls_split_discontinuity', False):
|
||||
def _extract_m3u8_playlist_indices(manifest_url=None, m3u8_doc=None):
|
||||
if not m3u8_doc:
|
||||
if not manifest_url:
|
||||
return []
|
||||
m3u8_doc = self._download_webpage(
|
||||
manifest_url, video_id, fatal=fatal, data=data, headers=headers,
|
||||
note=False, errnote='Failed to download m3u8 playlist information')
|
||||
if m3u8_doc is False:
|
||||
return []
|
||||
return range(1 + sum(line.startswith('#EXT-X-DISCONTINUITY') for line in m3u8_doc.splitlines()))
|
||||
|
||||
format_url = lambda u: (
|
||||
u
|
||||
if re.match(r'^https?://', u)
|
||||
else compat_urlparse.urljoin(m3u8_url, u))
|
||||
|
||||
split_discontinuity = self.get_param('hls_split_discontinuity', False)
|
||||
else:
|
||||
def _extract_m3u8_playlist_indices(*args, **kwargs):
|
||||
return [None]
|
||||
|
||||
# References:
|
||||
# 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-21
|
||||
@@ -2011,68 +2030,16 @@ class InfoExtractor(object):
|
||||
# media playlist and MUST NOT appear in master playlist thus we can
|
||||
# clearly detect media playlist with this criterion.
|
||||
|
||||
def _extract_m3u8_playlist_formats(format_url=None, m3u8_doc=None, video_id=None,
|
||||
fatal=True, data=None, headers={}):
|
||||
if not m3u8_doc:
|
||||
if not format_url:
|
||||
return []
|
||||
res = self._download_webpage_handle(
|
||||
format_url, video_id,
|
||||
note=False,
|
||||
errnote='Failed to download m3u8 playlist information',
|
||||
fatal=fatal, data=data, headers=headers)
|
||||
|
||||
if res is False:
|
||||
return []
|
||||
|
||||
m3u8_doc, urlh = res
|
||||
format_url = urlh.geturl()
|
||||
|
||||
playlist_formats = []
|
||||
i = (
|
||||
0
|
||||
if split_discontinuity
|
||||
else None)
|
||||
format_info = {
|
||||
'index': i,
|
||||
'key_data': None,
|
||||
'files': [],
|
||||
}
|
||||
for line in m3u8_doc.splitlines():
|
||||
if not line.startswith('#'):
|
||||
format_info['files'].append(line)
|
||||
elif split_discontinuity and line.startswith('#EXT-X-DISCONTINUITY'):
|
||||
i += 1
|
||||
playlist_formats.append(format_info)
|
||||
format_info = {
|
||||
'index': i,
|
||||
'url': format_url,
|
||||
'files': [],
|
||||
}
|
||||
playlist_formats.append(format_info)
|
||||
return playlist_formats
|
||||
|
||||
if '#EXT-X-TARGETDURATION' in m3u8_doc: # media playlist, return as is
|
||||
|
||||
playlist_formats = _extract_m3u8_playlist_formats(m3u8_doc=m3u8_doc)
|
||||
|
||||
for format in playlist_formats:
|
||||
format_id = []
|
||||
if m3u8_id:
|
||||
format_id.append(m3u8_id)
|
||||
format_index = format.get('index')
|
||||
if format_index:
|
||||
format_id.append(str(format_index))
|
||||
f = {
|
||||
'format_id': '-'.join(format_id),
|
||||
'format_index': format_index,
|
||||
'url': m3u8_url,
|
||||
'ext': ext,
|
||||
'protocol': entry_protocol,
|
||||
'preference': preference,
|
||||
'quality': quality,
|
||||
}
|
||||
formats.append(f)
|
||||
formats = [{
|
||||
'format_id': '-'.join(map(str, filter(None, [m3u8_id, idx]))),
|
||||
'format_index': idx,
|
||||
'url': m3u8_url,
|
||||
'ext': ext,
|
||||
'protocol': entry_protocol,
|
||||
'preference': preference,
|
||||
'quality': quality,
|
||||
} for idx in _extract_m3u8_playlist_indices(m3u8_doc=m3u8_doc)]
|
||||
|
||||
return formats, subtitles
|
||||
|
||||
@@ -2112,31 +2079,19 @@ class InfoExtractor(object):
|
||||
media_url = media.get('URI')
|
||||
if media_url:
|
||||
manifest_url = format_url(media_url)
|
||||
format_id = []
|
||||
playlist_formats = _extract_m3u8_playlist_formats(manifest_url, video_id=video_id,
|
||||
fatal=fatal, data=data, headers=headers)
|
||||
|
||||
for format in playlist_formats:
|
||||
format_index = format.get('index')
|
||||
for v in (m3u8_id, group_id, name):
|
||||
if v:
|
||||
format_id.append(v)
|
||||
if format_index:
|
||||
format_id.append(str(format_index))
|
||||
f = {
|
||||
'format_id': '-'.join(format_id),
|
||||
'format_index': format_index,
|
||||
'url': manifest_url,
|
||||
'manifest_url': m3u8_url,
|
||||
'language': media.get('LANGUAGE'),
|
||||
'ext': ext,
|
||||
'protocol': entry_protocol,
|
||||
'preference': preference,
|
||||
'quality': quality,
|
||||
}
|
||||
if media_type == 'AUDIO':
|
||||
f['vcodec'] = 'none'
|
||||
formats.append(f)
|
||||
formats.extend({
|
||||
'format_id': '-'.join(map(str, filter(None, (m3u8_id, group_id, name, idx)))),
|
||||
'format_note': name,
|
||||
'format_index': idx,
|
||||
'url': manifest_url,
|
||||
'manifest_url': m3u8_url,
|
||||
'language': media.get('LANGUAGE'),
|
||||
'ext': ext,
|
||||
'protocol': entry_protocol,
|
||||
'preference': preference,
|
||||
'quality': quality,
|
||||
'vcodec': 'none' if media_type == 'AUDIO' else None,
|
||||
} for idx in _extract_m3u8_playlist_indices(manifest_url))
|
||||
|
||||
def build_stream_name():
|
||||
# Despite specification does not mention NAME attribute for
|
||||
@@ -2175,25 +2130,17 @@ class InfoExtractor(object):
|
||||
or last_stream_inf.get('BANDWIDTH'), scale=1000)
|
||||
manifest_url = format_url(line.strip())
|
||||
|
||||
playlist_formats = _extract_m3u8_playlist_formats(manifest_url, video_id=video_id,
|
||||
fatal=fatal, data=data, headers=headers)
|
||||
|
||||
for frmt in playlist_formats:
|
||||
format_id = []
|
||||
if m3u8_id:
|
||||
format_id.append(m3u8_id)
|
||||
format_index = frmt.get('index')
|
||||
stream_name = build_stream_name()
|
||||
for idx in _extract_m3u8_playlist_indices(manifest_url):
|
||||
format_id = [m3u8_id, None, idx]
|
||||
# Bandwidth of live streams may differ over time thus making
|
||||
# format_id unpredictable. So it's better to keep provided
|
||||
# format_id intact.
|
||||
if not live:
|
||||
format_id.append(stream_name if stream_name else '%d' % (tbr if tbr else len(formats)))
|
||||
if format_index:
|
||||
format_id.append(str(format_index))
|
||||
stream_name = build_stream_name()
|
||||
format_id[1] = stream_name if stream_name else '%d' % (tbr if tbr else len(formats))
|
||||
f = {
|
||||
'format_id': '-'.join(format_id),
|
||||
'format_index': format_index,
|
||||
'format_id': '-'.join(map(str, filter(None, format_id))),
|
||||
'format_index': idx,
|
||||
'url': manifest_url,
|
||||
'manifest_url': m3u8_url,
|
||||
'tbr': tbr,
|
||||
@@ -2268,7 +2215,7 @@ class InfoExtractor(object):
|
||||
out.append('{%s}%s' % (namespace, c))
|
||||
return '/'.join(out)
|
||||
|
||||
def _extract_smil_formats(self, smil_url, video_id, fatal=True, f4m_params=None, transform_source=None):
|
||||
def _extract_smil_formats_and_subtitles(self, smil_url, video_id, fatal=True, f4m_params=None, transform_source=None):
|
||||
smil = self._download_smil(smil_url, video_id, fatal=fatal, transform_source=transform_source)
|
||||
|
||||
if smil is False:
|
||||
@@ -2277,8 +2224,21 @@ class InfoExtractor(object):
|
||||
|
||||
namespace = self._parse_smil_namespace(smil)
|
||||
|
||||
return self._parse_smil_formats(
|
||||
fmts = self._parse_smil_formats(
|
||||
smil, smil_url, video_id, namespace=namespace, f4m_params=f4m_params)
|
||||
subs = self._parse_smil_subtitles(
|
||||
smil, namespace=namespace)
|
||||
|
||||
return fmts, subs
|
||||
|
||||
def _extract_smil_formats(self, *args, **kwargs):
|
||||
fmts, subs = self._extract_smil_formats_and_subtitles(*args, **kwargs)
|
||||
if subs:
|
||||
self.report_warning(bug_reports_message(
|
||||
"Ignoring subtitle tracks found in the SMIL manifest; "
|
||||
"if any subtitle tracks are missing,"
|
||||
), only_once=True)
|
||||
return fmts
|
||||
|
||||
def _extract_smil_info(self, smil_url, video_id, fatal=True, f4m_params=None):
|
||||
smil = self._download_smil(smil_url, video_id, fatal=fatal)
|
||||
@@ -2503,7 +2463,7 @@ class InfoExtractor(object):
|
||||
self.report_warning(bug_reports_message(
|
||||
"Ignoring subtitle tracks found in the DASH manifest; "
|
||||
"if any subtitle tracks are missing,"
|
||||
))
|
||||
), only_once=True)
|
||||
return fmts
|
||||
|
||||
def _extract_mpd_formats_and_subtitles(
|
||||
@@ -2530,7 +2490,7 @@ class InfoExtractor(object):
|
||||
self.report_warning(bug_reports_message(
|
||||
"Ignoring subtitle tracks found in the DASH manifest; "
|
||||
"if any subtitle tracks are missing,"
|
||||
))
|
||||
), only_once=True)
|
||||
return fmts
|
||||
|
||||
def _parse_mpd_formats_and_subtitles(
|
||||
@@ -2636,197 +2596,223 @@ class InfoExtractor(object):
|
||||
mime_type = representation_attrib['mimeType']
|
||||
content_type = representation_attrib.get('contentType', mime_type.split('/')[0])
|
||||
|
||||
if content_type in ('video', 'audio', 'text'):
|
||||
base_url = ''
|
||||
for element in (representation, adaptation_set, period, mpd_doc):
|
||||
base_url_e = element.find(_add_ns('BaseURL'))
|
||||
if base_url_e is not None:
|
||||
base_url = base_url_e.text + base_url
|
||||
if re.match(r'^https?://', base_url):
|
||||
break
|
||||
if mpd_base_url and not re.match(r'^https?://', base_url):
|
||||
if not mpd_base_url.endswith('/') and not base_url.startswith('/'):
|
||||
mpd_base_url += '/'
|
||||
base_url = mpd_base_url + base_url
|
||||
representation_id = representation_attrib.get('id')
|
||||
lang = representation_attrib.get('lang')
|
||||
url_el = representation.find(_add_ns('BaseURL'))
|
||||
filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength') if url_el is not None else None)
|
||||
bandwidth = int_or_none(representation_attrib.get('bandwidth'))
|
||||
if content_type in ('video', 'audio'):
|
||||
f = {
|
||||
'format_id': '%s-%s' % (mpd_id, representation_id) if mpd_id else representation_id,
|
||||
'manifest_url': mpd_url,
|
||||
'ext': mimetype2ext(mime_type),
|
||||
'width': int_or_none(representation_attrib.get('width')),
|
||||
'height': int_or_none(representation_attrib.get('height')),
|
||||
'tbr': float_or_none(bandwidth, 1000),
|
||||
'asr': int_or_none(representation_attrib.get('audioSamplingRate')),
|
||||
'fps': int_or_none(representation_attrib.get('frameRate')),
|
||||
'language': lang if lang not in ('mul', 'und', 'zxx', 'mis') else None,
|
||||
'format_note': 'DASH %s' % content_type,
|
||||
'filesize': filesize,
|
||||
'container': mimetype2ext(mime_type) + '_dash',
|
||||
}
|
||||
f.update(parse_codecs(representation_attrib.get('codecs')))
|
||||
elif content_type == 'text':
|
||||
f = {
|
||||
'ext': mimetype2ext(mime_type),
|
||||
'manifest_url': mpd_url,
|
||||
'filesize': filesize,
|
||||
}
|
||||
representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info)
|
||||
codecs = representation_attrib.get('codecs', '')
|
||||
if content_type not in ('video', 'audio', 'text'):
|
||||
if mime_type == 'image/jpeg':
|
||||
content_type = 'image/jpeg'
|
||||
if codecs.split('.')[0] == 'stpp':
|
||||
content_type = 'text'
|
||||
else:
|
||||
self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
|
||||
continue
|
||||
|
||||
def prepare_template(template_name, identifiers):
|
||||
tmpl = representation_ms_info[template_name]
|
||||
# First of, % characters outside $...$ templates
|
||||
# must be escaped by doubling for proper processing
|
||||
# by % operator string formatting used further (see
|
||||
# https://github.com/ytdl-org/youtube-dl/issues/16867).
|
||||
t = ''
|
||||
in_template = False
|
||||
for c in tmpl:
|
||||
base_url = ''
|
||||
for element in (representation, adaptation_set, period, mpd_doc):
|
||||
base_url_e = element.find(_add_ns('BaseURL'))
|
||||
if base_url_e is not None:
|
||||
base_url = base_url_e.text + base_url
|
||||
if re.match(r'^https?://', base_url):
|
||||
break
|
||||
if mpd_base_url and not re.match(r'^https?://', base_url):
|
||||
if not mpd_base_url.endswith('/') and not base_url.startswith('/'):
|
||||
mpd_base_url += '/'
|
||||
base_url = mpd_base_url + base_url
|
||||
representation_id = representation_attrib.get('id')
|
||||
lang = representation_attrib.get('lang')
|
||||
url_el = representation.find(_add_ns('BaseURL'))
|
||||
filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength') if url_el is not None else None)
|
||||
bandwidth = int_or_none(representation_attrib.get('bandwidth'))
|
||||
if representation_id is not None:
|
||||
format_id = representation_id
|
||||
else:
|
||||
format_id = content_type
|
||||
if mpd_id:
|
||||
format_id = mpd_id + '-' + format_id
|
||||
if content_type in ('video', 'audio'):
|
||||
f = {
|
||||
'format_id': format_id,
|
||||
'manifest_url': mpd_url,
|
||||
'ext': mimetype2ext(mime_type),
|
||||
'width': int_or_none(representation_attrib.get('width')),
|
||||
'height': int_or_none(representation_attrib.get('height')),
|
||||
'tbr': float_or_none(bandwidth, 1000),
|
||||
'asr': int_or_none(representation_attrib.get('audioSamplingRate')),
|
||||
'fps': int_or_none(representation_attrib.get('frameRate')),
|
||||
'language': lang if lang not in ('mul', 'und', 'zxx', 'mis') else None,
|
||||
'format_note': 'DASH %s' % content_type,
|
||||
'filesize': filesize,
|
||||
'container': mimetype2ext(mime_type) + '_dash',
|
||||
}
|
||||
f.update(parse_codecs(codecs))
|
||||
elif content_type == 'text':
|
||||
f = {
|
||||
'ext': mimetype2ext(mime_type),
|
||||
'manifest_url': mpd_url,
|
||||
'filesize': filesize,
|
||||
}
|
||||
elif content_type == 'image/jpeg':
|
||||
# See test case in VikiIE
|
||||
# https://www.viki.com/videos/1175236v-choosing-spouse-by-lottery-episode-1
|
||||
f = {
|
||||
'format_id': format_id,
|
||||
'ext': 'mhtml',
|
||||
'manifest_url': mpd_url,
|
||||
'format_note': 'DASH storyboards (jpeg)',
|
||||
'acodec': 'none',
|
||||
'vcodec': 'none',
|
||||
}
|
||||
representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info)
|
||||
|
||||
def prepare_template(template_name, identifiers):
|
||||
tmpl = representation_ms_info[template_name]
|
||||
# First of, % characters outside $...$ templates
|
||||
# must be escaped by doubling for proper processing
|
||||
# by % operator string formatting used further (see
|
||||
# https://github.com/ytdl-org/youtube-dl/issues/16867).
|
||||
t = ''
|
||||
in_template = False
|
||||
for c in tmpl:
|
||||
t += c
|
||||
if c == '$':
|
||||
in_template = not in_template
|
||||
elif c == '%' and not in_template:
|
||||
t += c
|
||||
if c == '$':
|
||||
in_template = not in_template
|
||||
elif c == '%' and not in_template:
|
||||
t += c
|
||||
# Next, $...$ templates are translated to their
|
||||
# %(...) counterparts to be used with % operator
|
||||
# Next, $...$ templates are translated to their
|
||||
# %(...) counterparts to be used with % operator
|
||||
if representation_id is not None:
|
||||
t = t.replace('$RepresentationID$', representation_id)
|
||||
t = re.sub(r'\$(%s)\$' % '|'.join(identifiers), r'%(\1)d', t)
|
||||
t = re.sub(r'\$(%s)%%([^$]+)\$' % '|'.join(identifiers), r'%(\1)\2', t)
|
||||
t.replace('$$', '$')
|
||||
return t
|
||||
t = re.sub(r'\$(%s)\$' % '|'.join(identifiers), r'%(\1)d', t)
|
||||
t = re.sub(r'\$(%s)%%([^$]+)\$' % '|'.join(identifiers), r'%(\1)\2', t)
|
||||
t.replace('$$', '$')
|
||||
return t
|
||||
|
||||
# @initialization is a regular template like @media one
|
||||
# so it should be handled just the same way (see
|
||||
# https://github.com/ytdl-org/youtube-dl/issues/11605)
|
||||
if 'initialization' in representation_ms_info:
|
||||
initialization_template = prepare_template(
|
||||
'initialization',
|
||||
# As per [1, 5.3.9.4.2, Table 15, page 54] $Number$ and
|
||||
# $Time$ shall not be included for @initialization thus
|
||||
# only $Bandwidth$ remains
|
||||
('Bandwidth', ))
|
||||
representation_ms_info['initialization_url'] = initialization_template % {
|
||||
'Bandwidth': bandwidth,
|
||||
}
|
||||
# @initialization is a regular template like @media one
|
||||
# so it should be handled just the same way (see
|
||||
# https://github.com/ytdl-org/youtube-dl/issues/11605)
|
||||
if 'initialization' in representation_ms_info:
|
||||
initialization_template = prepare_template(
|
||||
'initialization',
|
||||
# As per [1, 5.3.9.4.2, Table 15, page 54] $Number$ and
|
||||
# $Time$ shall not be included for @initialization thus
|
||||
# only $Bandwidth$ remains
|
||||
('Bandwidth', ))
|
||||
representation_ms_info['initialization_url'] = initialization_template % {
|
||||
'Bandwidth': bandwidth,
|
||||
}
|
||||
|
||||
def location_key(location):
|
||||
return 'url' if re.match(r'^https?://', location) else 'path'
|
||||
def location_key(location):
|
||||
return 'url' if re.match(r'^https?://', location) else 'path'
|
||||
|
||||
if 'segment_urls' not in representation_ms_info and 'media' in representation_ms_info:
|
||||
if 'segment_urls' not in representation_ms_info and 'media' in representation_ms_info:
|
||||
|
||||
media_template = prepare_template('media', ('Number', 'Bandwidth', 'Time'))
|
||||
media_location_key = location_key(media_template)
|
||||
media_template = prepare_template('media', ('Number', 'Bandwidth', 'Time'))
|
||||
media_location_key = location_key(media_template)
|
||||
|
||||
# As per [1, 5.3.9.4.4, Table 16, page 55] $Number$ and $Time$
|
||||
# can't be used at the same time
|
||||
if '%(Number' in media_template and 's' not in representation_ms_info:
|
||||
segment_duration = None
|
||||
if 'total_number' not in representation_ms_info and 'segment_duration' in representation_ms_info:
|
||||
segment_duration = float_or_none(representation_ms_info['segment_duration'], representation_ms_info['timescale'])
|
||||
representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration))
|
||||
representation_ms_info['fragments'] = [{
|
||||
media_location_key: media_template % {
|
||||
'Number': segment_number,
|
||||
'Bandwidth': bandwidth,
|
||||
},
|
||||
'duration': segment_duration,
|
||||
} for segment_number in range(
|
||||
representation_ms_info['start_number'],
|
||||
representation_ms_info['total_number'] + representation_ms_info['start_number'])]
|
||||
else:
|
||||
# $Number*$ or $Time$ in media template with S list available
|
||||
# Example $Number*$: http://www.svtplay.se/klipp/9023742/stopptid-om-bjorn-borg
|
||||
# Example $Time$: https://play.arkena.com/embed/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411
|
||||
representation_ms_info['fragments'] = []
|
||||
segment_time = 0
|
||||
segment_d = None
|
||||
segment_number = representation_ms_info['start_number']
|
||||
# As per [1, 5.3.9.4.4, Table 16, page 55] $Number$ and $Time$
|
||||
# can't be used at the same time
|
||||
if '%(Number' in media_template and 's' not in representation_ms_info:
|
||||
segment_duration = None
|
||||
if 'total_number' not in representation_ms_info and 'segment_duration' in representation_ms_info:
|
||||
segment_duration = float_or_none(representation_ms_info['segment_duration'], representation_ms_info['timescale'])
|
||||
representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration))
|
||||
representation_ms_info['fragments'] = [{
|
||||
media_location_key: media_template % {
|
||||
'Number': segment_number,
|
||||
'Bandwidth': bandwidth,
|
||||
},
|
||||
'duration': segment_duration,
|
||||
} for segment_number in range(
|
||||
representation_ms_info['start_number'],
|
||||
representation_ms_info['total_number'] + representation_ms_info['start_number'])]
|
||||
else:
|
||||
# $Number*$ or $Time$ in media template with S list available
|
||||
# Example $Number*$: http://www.svtplay.se/klipp/9023742/stopptid-om-bjorn-borg
|
||||
# Example $Time$: https://play.arkena.com/embed/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411
|
||||
representation_ms_info['fragments'] = []
|
||||
segment_time = 0
|
||||
segment_d = None
|
||||
segment_number = representation_ms_info['start_number']
|
||||
|
||||
def add_segment_url():
|
||||
segment_url = media_template % {
|
||||
'Time': segment_time,
|
||||
'Bandwidth': bandwidth,
|
||||
'Number': segment_number,
|
||||
}
|
||||
representation_ms_info['fragments'].append({
|
||||
media_location_key: segment_url,
|
||||
'duration': float_or_none(segment_d, representation_ms_info['timescale']),
|
||||
})
|
||||
def add_segment_url():
|
||||
segment_url = media_template % {
|
||||
'Time': segment_time,
|
||||
'Bandwidth': bandwidth,
|
||||
'Number': segment_number,
|
||||
}
|
||||
representation_ms_info['fragments'].append({
|
||||
media_location_key: segment_url,
|
||||
'duration': float_or_none(segment_d, representation_ms_info['timescale']),
|
||||
})
|
||||
|
||||
for num, s in enumerate(representation_ms_info['s']):
|
||||
segment_time = s.get('t') or segment_time
|
||||
segment_d = s['d']
|
||||
for num, s in enumerate(representation_ms_info['s']):
|
||||
segment_time = s.get('t') or segment_time
|
||||
segment_d = s['d']
|
||||
add_segment_url()
|
||||
segment_number += 1
|
||||
for r in range(s.get('r', 0)):
|
||||
segment_time += segment_d
|
||||
add_segment_url()
|
||||
segment_number += 1
|
||||
for r in range(s.get('r', 0)):
|
||||
segment_time += segment_d
|
||||
add_segment_url()
|
||||
segment_number += 1
|
||||
segment_time += segment_d
|
||||
elif 'segment_urls' in representation_ms_info and 's' in representation_ms_info:
|
||||
# No media template
|
||||
# Example: https://www.youtube.com/watch?v=iXZV5uAYMJI
|
||||
# or any YouTube dashsegments video
|
||||
fragments = []
|
||||
segment_index = 0
|
||||
timescale = representation_ms_info['timescale']
|
||||
for s in representation_ms_info['s']:
|
||||
duration = float_or_none(s['d'], timescale)
|
||||
for r in range(s.get('r', 0) + 1):
|
||||
segment_uri = representation_ms_info['segment_urls'][segment_index]
|
||||
fragments.append({
|
||||
location_key(segment_uri): segment_uri,
|
||||
'duration': duration,
|
||||
})
|
||||
segment_index += 1
|
||||
representation_ms_info['fragments'] = fragments
|
||||
elif 'segment_urls' in representation_ms_info:
|
||||
# Segment URLs with no SegmentTimeline
|
||||
# Example: https://www.seznam.cz/zpravy/clanek/cesko-zasahne-vitr-o-sile-vichrice-muze-byt-i-zivotu-nebezpecny-39091
|
||||
# https://github.com/ytdl-org/youtube-dl/pull/14844
|
||||
fragments = []
|
||||
segment_duration = float_or_none(
|
||||
representation_ms_info['segment_duration'],
|
||||
representation_ms_info['timescale']) if 'segment_duration' in representation_ms_info else None
|
||||
for segment_url in representation_ms_info['segment_urls']:
|
||||
fragment = {
|
||||
location_key(segment_url): segment_url,
|
||||
}
|
||||
if segment_duration:
|
||||
fragment['duration'] = segment_duration
|
||||
fragments.append(fragment)
|
||||
representation_ms_info['fragments'] = fragments
|
||||
# If there is a fragments key available then we correctly recognized fragmented media.
|
||||
# Otherwise we will assume unfragmented media with direct access. Technically, such
|
||||
# assumption is not necessarily correct since we may simply have no support for
|
||||
# some forms of fragmented media renditions yet, but for now we'll use this fallback.
|
||||
if 'fragments' in representation_ms_info:
|
||||
f.update({
|
||||
# NB: mpd_url may be empty when MPD manifest is parsed from a string
|
||||
'url': mpd_url or base_url,
|
||||
'fragment_base_url': base_url,
|
||||
'fragments': [],
|
||||
'protocol': 'http_dash_segments',
|
||||
})
|
||||
if 'initialization_url' in representation_ms_info:
|
||||
initialization_url = representation_ms_info['initialization_url']
|
||||
if not f.get('url'):
|
||||
f['url'] = initialization_url
|
||||
f['fragments'].append({location_key(initialization_url): initialization_url})
|
||||
f['fragments'].extend(representation_ms_info['fragments'])
|
||||
else:
|
||||
# Assuming direct URL to unfragmented media.
|
||||
f['url'] = base_url
|
||||
if content_type in ('video', 'audio'):
|
||||
formats.append(f)
|
||||
elif content_type == 'text':
|
||||
subtitles.setdefault(lang or 'und', []).append(f)
|
||||
segment_time += segment_d
|
||||
elif 'segment_urls' in representation_ms_info and 's' in representation_ms_info:
|
||||
# No media template
|
||||
# Example: https://www.youtube.com/watch?v=iXZV5uAYMJI
|
||||
# or any YouTube dashsegments video
|
||||
fragments = []
|
||||
segment_index = 0
|
||||
timescale = representation_ms_info['timescale']
|
||||
for s in representation_ms_info['s']:
|
||||
duration = float_or_none(s['d'], timescale)
|
||||
for r in range(s.get('r', 0) + 1):
|
||||
segment_uri = representation_ms_info['segment_urls'][segment_index]
|
||||
fragments.append({
|
||||
location_key(segment_uri): segment_uri,
|
||||
'duration': duration,
|
||||
})
|
||||
segment_index += 1
|
||||
representation_ms_info['fragments'] = fragments
|
||||
elif 'segment_urls' in representation_ms_info:
|
||||
# Segment URLs with no SegmentTimeline
|
||||
# Example: https://www.seznam.cz/zpravy/clanek/cesko-zasahne-vitr-o-sile-vichrice-muze-byt-i-zivotu-nebezpecny-39091
|
||||
# https://github.com/ytdl-org/youtube-dl/pull/14844
|
||||
fragments = []
|
||||
segment_duration = float_or_none(
|
||||
representation_ms_info['segment_duration'],
|
||||
representation_ms_info['timescale']) if 'segment_duration' in representation_ms_info else None
|
||||
for segment_url in representation_ms_info['segment_urls']:
|
||||
fragment = {
|
||||
location_key(segment_url): segment_url,
|
||||
}
|
||||
if segment_duration:
|
||||
fragment['duration'] = segment_duration
|
||||
fragments.append(fragment)
|
||||
representation_ms_info['fragments'] = fragments
|
||||
# If there is a fragments key available then we correctly recognized fragmented media.
|
||||
# Otherwise we will assume unfragmented media with direct access. Technically, such
|
||||
# assumption is not necessarily correct since we may simply have no support for
|
||||
# some forms of fragmented media renditions yet, but for now we'll use this fallback.
|
||||
if 'fragments' in representation_ms_info:
|
||||
f.update({
|
||||
# NB: mpd_url may be empty when MPD manifest is parsed from a string
|
||||
'url': mpd_url or base_url,
|
||||
'fragment_base_url': base_url,
|
||||
'fragments': [],
|
||||
'protocol': 'http_dash_segments' if mime_type != 'image/jpeg' else 'mhtml',
|
||||
})
|
||||
if 'initialization_url' in representation_ms_info:
|
||||
initialization_url = representation_ms_info['initialization_url']
|
||||
if not f.get('url'):
|
||||
f['url'] = initialization_url
|
||||
f['fragments'].append({location_key(initialization_url): initialization_url})
|
||||
f['fragments'].extend(representation_ms_info['fragments'])
|
||||
else:
|
||||
self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
|
||||
# Assuming direct URL to unfragmented media.
|
||||
f['url'] = base_url
|
||||
if content_type in ('video', 'audio') or mime_type == 'image/jpeg':
|
||||
formats.append(f)
|
||||
elif content_type == 'text':
|
||||
subtitles.setdefault(lang or 'und', []).append(f)
|
||||
|
||||
return formats, subtitles
|
||||
|
||||
def _extract_ism_formats(self, *args, **kwargs):
|
||||
@@ -3484,16 +3470,8 @@ class InfoExtractor(object):
|
||||
return ret
|
||||
|
||||
@classmethod
|
||||
def _merge_subtitles(cls, *dicts, **kwargs):
|
||||
def _merge_subtitles(cls, *dicts, target=None):
|
||||
""" Merge subtitle dictionaries, language by language. """
|
||||
|
||||
target = (lambda target=None: target)(**kwargs)
|
||||
# The above lambda extracts the keyword argument 'target' from kwargs
|
||||
# while ensuring there are no stray ones. When Python 2 support
|
||||
# is dropped, remove it and change the function signature to:
|
||||
#
|
||||
# def _merge_subtitles(cls, *dicts, target=None):
|
||||
|
||||
if target is None:
|
||||
target = {}
|
||||
for d in dicts:
|
||||
@@ -3546,6 +3524,19 @@ class InfoExtractor(object):
|
||||
else 'public' if all_known
|
||||
else None)
|
||||
|
||||
def _configuration_arg(self, key, default=NO_DEFAULT, casesense=False):
|
||||
'''
|
||||
@returns A list of values for the extractor argument given by "key"
|
||||
or "default" if no such key is present
|
||||
@param default The default value to return when the key is not present (default: [])
|
||||
@param casesense When false, the values are converted to lower case
|
||||
'''
|
||||
val = traverse_obj(
|
||||
self._downloader.params, ('extractor_args', self.ie_key().lower(), key))
|
||||
if val is None:
|
||||
return [] if default is NO_DEFAULT else default
|
||||
return list(val) if casesense else [x.lower() for x in val]
|
||||
|
||||
|
||||
class SearchInfoExtractor(InfoExtractor):
|
||||
"""
|
||||
|
||||
@@ -29,6 +29,7 @@ from ..utils import (
|
||||
merge_dicts,
|
||||
remove_end,
|
||||
sanitized_Request,
|
||||
try_get,
|
||||
urlencode_postdata,
|
||||
xpath_text,
|
||||
)
|
||||
@@ -458,6 +459,18 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
video_description = (self._parse_json(self._html_search_regex(
|
||||
r'<script[^>]*>\s*.+?\[media_id=%s\].+?({.+?"description"\s*:.+?})\);' % video_id,
|
||||
webpage, 'description', default='{}'), video_id) or media_metadata).get('description')
|
||||
|
||||
thumbnails = []
|
||||
thumbnail_url = (self._parse_json(self._html_search_regex(
|
||||
r'<script type="application\/ld\+json">\n\s*(.+?)<\/script>',
|
||||
webpage, 'thumbnail_url', default='{}'), video_id)).get('image')
|
||||
if thumbnail_url:
|
||||
thumbnails.append({
|
||||
'url': thumbnail_url,
|
||||
'width': 1920,
|
||||
'height': 1080
|
||||
})
|
||||
|
||||
if video_description:
|
||||
video_description = lowercase_escape(video_description.replace(r'\r\n', '\n'))
|
||||
video_uploader = self._html_search_regex(
|
||||
@@ -592,21 +605,25 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
r'(?s)<h\d[^>]+\bid=["\']showmedia_about_episode_num[^>]+>(.+?)</h\d',
|
||||
webpage, 'series', fatal=False)
|
||||
|
||||
season = episode = episode_number = duration = thumbnail = None
|
||||
season = episode = episode_number = duration = None
|
||||
|
||||
if isinstance(metadata, compat_etree_Element):
|
||||
season = xpath_text(metadata, 'series_title')
|
||||
episode = xpath_text(metadata, 'episode_title')
|
||||
episode_number = int_or_none(xpath_text(metadata, 'episode_number'))
|
||||
duration = float_or_none(media_metadata.get('duration'), 1000)
|
||||
thumbnail = xpath_text(metadata, 'episode_image_url')
|
||||
|
||||
if not episode:
|
||||
episode = media_metadata.get('title')
|
||||
if not episode_number:
|
||||
episode_number = int_or_none(media_metadata.get('episode_number'))
|
||||
if not thumbnail:
|
||||
thumbnail = media_metadata.get('thumbnail', {}).get('url')
|
||||
thumbnail_url = try_get(media, lambda x: x['thumbnail']['url'])
|
||||
if thumbnail_url:
|
||||
thumbnails.append({
|
||||
'url': thumbnail_url,
|
||||
'width': 640,
|
||||
'height': 360
|
||||
})
|
||||
|
||||
season_number = int_or_none(self._search_regex(
|
||||
r'(?s)<h\d[^>]+id=["\']showmedia_about_episode_num[^>]+>.+?</h\d>\s*<h4>\s*Season (\d+)',
|
||||
@@ -619,7 +636,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
'title': video_title,
|
||||
'description': video_description,
|
||||
'duration': duration,
|
||||
'thumbnail': thumbnail,
|
||||
'thumbnails': thumbnails,
|
||||
'uploader': video_uploader,
|
||||
'series': series,
|
||||
'season': season,
|
||||
@@ -636,7 +653,7 @@ class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE):
|
||||
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?!(?:news|anime-news|library|forum|launchcalendar|lineup|store|comics|freetrial|login|media-\d+))(?P<id>[\w\-]+))/?(?:\?|$)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.crunchyroll.com/a-bridge-to-the-starry-skies-hoshizora-e-kakaru-hashi',
|
||||
'url': 'https://www.crunchyroll.com/a-bridge-to-the-starry-skies-hoshizora-e-kakaru-hashi',
|
||||
'info_dict': {
|
||||
'id': 'a-bridge-to-the-starry-skies-hoshizora-e-kakaru-hashi',
|
||||
'title': 'A Bridge to the Starry Skies - Hoshizora e Kakaru Hashi'
|
||||
@@ -661,7 +678,8 @@ class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE):
|
||||
show_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
self._add_skip_wall(url), show_id,
|
||||
# https:// gives a 403, but http:// does not
|
||||
self._add_skip_wall(url).replace('https://', 'http://'), show_id,
|
||||
headers=self.geo_verification_headers())
|
||||
title = self._html_search_meta('name', webpage, default=None)
|
||||
|
||||
|
||||
@@ -143,9 +143,9 @@ class CuriosityStreamIE(CuriosityStreamBaseIE):
|
||||
}
|
||||
|
||||
|
||||
class CuriosityStreamCollectionsIE(CuriosityStreamBaseIE):
|
||||
IE_NAME = 'curiositystream:collections'
|
||||
_VALID_URL = r'https?://(?:app\.)?curiositystream\.com/collections/(?P<id>\d+)'
|
||||
class CuriosityStreamCollectionIE(CuriosityStreamBaseIE):
|
||||
IE_NAME = 'curiositystream:collection'
|
||||
_VALID_URL = r'https?://(?:app\.)?curiositystream\.com/(?:collections?|series)/(?P<id>\d+)'
|
||||
_API_BASE_URL = 'https://api.curiositystream.com/v2/collections/'
|
||||
_TESTS = [{
|
||||
'url': 'https://curiositystream.com/collections/86',
|
||||
@@ -155,6 +155,20 @@ class CuriosityStreamCollectionsIE(CuriosityStreamBaseIE):
|
||||
'description': 'Wondering where to start? Here are a few of our favorite series and films... from our couch to yours.',
|
||||
},
|
||||
'playlist_mincount': 7,
|
||||
}, {
|
||||
'url': 'https://app.curiositystream.com/collection/2',
|
||||
'info_dict': {
|
||||
'id': '2',
|
||||
'title': 'Curious Minds: The Internet',
|
||||
'description': 'How is the internet shaping our lives in the 21st Century?',
|
||||
},
|
||||
'playlist_mincount': 16,
|
||||
}, {
|
||||
'url': 'https://curiositystream.com/series/2',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://curiositystream.com/collections/36',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -163,25 +177,10 @@ class CuriosityStreamCollectionsIE(CuriosityStreamBaseIE):
|
||||
entries = []
|
||||
for media in collection.get('media', []):
|
||||
media_id = compat_str(media.get('id'))
|
||||
media_type, ie = ('series', CuriosityStreamSeriesIE) if media.get('is_collection') else ('video', CuriosityStreamIE)
|
||||
media_type, ie = ('series', CuriosityStreamCollectionIE) if media.get('is_collection') else ('video', CuriosityStreamIE)
|
||||
entries.append(self.url_result(
|
||||
'https://curiositystream.com/%s/%s' % (media_type, media_id),
|
||||
ie=ie.ie_key(), video_id=media_id))
|
||||
return self.playlist_result(
|
||||
entries, collection_id,
|
||||
collection.get('title'), collection.get('description'))
|
||||
|
||||
|
||||
class CuriosityStreamSeriesIE(CuriosityStreamCollectionsIE):
|
||||
IE_NAME = 'curiositystream:series'
|
||||
_VALID_URL = r'https?://(?:app\.)?curiositystream\.com/series/(?P<id>\d+)'
|
||||
_API_BASE_URL = 'https://api.curiositystream.com/v2/series/'
|
||||
_TESTS = [{
|
||||
'url': 'https://app.curiositystream.com/series/2',
|
||||
'info_dict': {
|
||||
'id': '2',
|
||||
'title': 'Curious Minds: The Internet',
|
||||
'description': 'How is the internet shaping our lives in the 21st Century?',
|
||||
},
|
||||
'playlist_mincount': 16,
|
||||
}]
|
||||
|
||||
@@ -63,8 +63,7 @@ class DiscoveryPlusIndiaShowIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': 'how-do-they-do-it',
|
||||
},
|
||||
}
|
||||
]
|
||||
}]
|
||||
|
||||
def _entries(self, show_name):
|
||||
headers = {
|
||||
|
||||
145
yt_dlp/extractor/douyin.py
Normal file
145
yt_dlp/extractor/douyin.py
Normal file
@@ -0,0 +1,145 @@
|
||||
# coding: utf-8
|
||||
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
traverse_obj,
|
||||
url_or_none,
|
||||
)
|
||||
from .common import (
|
||||
InfoExtractor,
|
||||
compat_urllib_parse_unquote,
|
||||
)
|
||||
|
||||
|
||||
class DouyinIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?douyin\.com/video/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.douyin.com/video/6961737553342991651',
|
||||
'md5': '10523312c8b8100f353620ac9dc8f067',
|
||||
'info_dict': {
|
||||
'id': '6961737553342991651',
|
||||
'ext': 'mp4',
|
||||
'title': '#杨超越 小小水手带你去远航❤️',
|
||||
'uploader': '杨超越',
|
||||
'upload_date': '20210513',
|
||||
'timestamp': 1620905839,
|
||||
'uploader_id': '110403406559',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.douyin.com/video/6982497745948921092',
|
||||
'md5': 'd78408c984b9b5102904cf6b6bc2d712',
|
||||
'info_dict': {
|
||||
'id': '6982497745948921092',
|
||||
'ext': 'mp4',
|
||||
'title': '这个夏日和小羊@杨超越 一起遇见白色幻想',
|
||||
'uploader': '杨超越工作室',
|
||||
'upload_date': '20210708',
|
||||
'timestamp': 1625739481,
|
||||
'uploader_id': '408654318141572',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.douyin.com/video/6953975910773099811',
|
||||
'md5': '72e882e24f75064c218b76c8b713c185',
|
||||
'info_dict': {
|
||||
'id': '6953975910773099811',
|
||||
'ext': 'mp4',
|
||||
'title': '#一起看海 出现在你的夏日里',
|
||||
'uploader': '杨超越',
|
||||
'upload_date': '20210422',
|
||||
'timestamp': 1619098692,
|
||||
'uploader_id': '110403406559',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.douyin.com/video/6950251282489675042',
|
||||
'md5': 'b4db86aec367ef810ddd38b1737d2fed',
|
||||
'info_dict': {
|
||||
'id': '6950251282489675042',
|
||||
'ext': 'mp4',
|
||||
'title': '哈哈哈,成功了哈哈哈哈哈哈',
|
||||
'uploader': '杨超越',
|
||||
'upload_date': '20210412',
|
||||
'timestamp': 1618231483,
|
||||
'uploader_id': '110403406559',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.douyin.com/video/6963263655114722595',
|
||||
'md5': '1abe1c477d05ee62efb40bf2329957cf',
|
||||
'info_dict': {
|
||||
'id': '6963263655114722595',
|
||||
'ext': 'mp4',
|
||||
'title': '#哪个爱豆的105度最甜 换个角度看看我哈哈',
|
||||
'uploader': '杨超越',
|
||||
'upload_date': '20210517',
|
||||
'timestamp': 1621261163,
|
||||
'uploader_id': '110403406559',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
render_data = self._parse_json(
|
||||
self._search_regex(
|
||||
r'<script [^>]*\bid=[\'"]RENDER_DATA[\'"][^>]*>(%7B.+%7D)</script>',
|
||||
webpage, 'render data'),
|
||||
video_id, transform_source=compat_urllib_parse_unquote)
|
||||
details = traverse_obj(render_data, (..., 'aweme', 'detail'), get_all=False)
|
||||
|
||||
thumbnails = [{'url': self._proto_relative_url(url)} for url in traverse_obj(
|
||||
details, ('video', ('cover', 'dynamicCover', 'originCover')), expected_type=url_or_none, default=[])]
|
||||
|
||||
common = {
|
||||
'width': traverse_obj(details, ('video', 'width'), expected_type=int),
|
||||
'height': traverse_obj(details, ('video', 'height'), expected_type=int),
|
||||
'ext': 'mp4',
|
||||
}
|
||||
formats = [{**common, 'url': self._proto_relative_url(url)} for url in traverse_obj(
|
||||
details, ('video', 'playAddr', ..., 'src'), expected_type=url_or_none, default=[]) if url]
|
||||
self._remove_duplicate_formats(formats)
|
||||
|
||||
download_url = traverse_obj(details, ('download', 'url'), expected_type=url_or_none)
|
||||
if download_url:
|
||||
formats.append({
|
||||
**common,
|
||||
'format_id': 'download',
|
||||
'url': self._proto_relative_url(download_url),
|
||||
'quality': 1,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': details.get('desc') or self._html_search_meta('title', webpage),
|
||||
'formats': formats,
|
||||
'thumbnails': thumbnails,
|
||||
'uploader': traverse_obj(details, ('authorInfo', 'nickname'), expected_type=str),
|
||||
'uploader_id': traverse_obj(details, ('authorInfo', 'uid'), expected_type=str),
|
||||
'uploader_url': 'https://www.douyin.com/user/%s' % traverse_obj(
|
||||
details, ('authorInfo', 'secUid'), expected_type=str),
|
||||
'timestamp': int_or_none(details.get('createTime')),
|
||||
'duration': traverse_obj(details, ('video', 'duration'), expected_type=int),
|
||||
'view_count': traverse_obj(details, ('stats', 'playCount'), expected_type=int),
|
||||
'like_count': traverse_obj(details, ('stats', 'diggCount'), expected_type=int),
|
||||
'repost_count': traverse_obj(details, ('stats', 'shareCount'), expected_type=int),
|
||||
'comment_count': traverse_obj(details, ('stats', 'commentCount'), expected_type=int),
|
||||
}
|
||||
@@ -296,50 +296,6 @@ class DPlayIE(InfoExtractor):
|
||||
url, display_id, host, 'dplay' + country, country)
|
||||
|
||||
|
||||
class DiscoveryPlusIE(DPlayIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?discoveryplus\.com/video' + DPlayIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://www.discoveryplus.com/video/property-brothers-forever-home/food-and-family',
|
||||
'info_dict': {
|
||||
'id': '1140794',
|
||||
'display_id': 'property-brothers-forever-home/food-and-family',
|
||||
'ext': 'mp4',
|
||||
'title': 'Food and Family',
|
||||
'description': 'The brothers help a Richmond family expand their single-level home.',
|
||||
'duration': 2583.113,
|
||||
'timestamp': 1609304400,
|
||||
'upload_date': '20201230',
|
||||
'creator': 'HGTV',
|
||||
'series': 'Property Brothers: Forever Home',
|
||||
'season_number': 1,
|
||||
'episode_number': 1,
|
||||
},
|
||||
'skip': 'Available for Premium users',
|
||||
}]
|
||||
|
||||
def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
|
||||
headers['x-disco-client'] = 'WEB:UNKNOWN:dplus_us:15.0.0'
|
||||
|
||||
def _download_video_playback_info(self, disco_base, video_id, headers):
|
||||
return self._download_json(
|
||||
disco_base + 'playback/v3/videoPlaybackInfo',
|
||||
video_id, headers=headers, data=json.dumps({
|
||||
'deviceInfo': {
|
||||
'adBlocker': False,
|
||||
},
|
||||
'videoId': video_id,
|
||||
'wisteriaProperties': {
|
||||
'platform': 'desktop',
|
||||
'product': 'dplus_us',
|
||||
},
|
||||
}).encode('utf-8'))['data']['attributes']['streaming']
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
return self._get_disco_api_info(
|
||||
url, display_id, 'us1-prod-direct.discoveryplus.com', 'go', 'us')
|
||||
|
||||
|
||||
class HGTVDeIE(DPlayIE):
|
||||
_VALID_URL = r'https?://de\.hgtv\.com/sendungen' + DPlayIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
@@ -367,3 +323,70 @@ class HGTVDeIE(DPlayIE):
|
||||
display_id = self._match_id(url)
|
||||
return self._get_disco_api_info(
|
||||
url, display_id, 'eu1-prod.disco-api.com', 'hgtv', 'de')
|
||||
|
||||
|
||||
class DiscoveryPlusIE(DPlayIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?discoveryplus\.com/video' + DPlayIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://www.discoveryplus.com/video/property-brothers-forever-home/food-and-family',
|
||||
'info_dict': {
|
||||
'id': '1140794',
|
||||
'display_id': 'property-brothers-forever-home/food-and-family',
|
||||
'ext': 'mp4',
|
||||
'title': 'Food and Family',
|
||||
'description': 'The brothers help a Richmond family expand their single-level home.',
|
||||
'duration': 2583.113,
|
||||
'timestamp': 1609304400,
|
||||
'upload_date': '20201230',
|
||||
'creator': 'HGTV',
|
||||
'series': 'Property Brothers: Forever Home',
|
||||
'season_number': 1,
|
||||
'episode_number': 1,
|
||||
},
|
||||
'skip': 'Available for Premium users',
|
||||
}]
|
||||
|
||||
_PRODUCT = 'dplus_us'
|
||||
_API_URL = 'us1-prod-direct.discoveryplus.com'
|
||||
|
||||
def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
|
||||
headers['x-disco-client'] = f'WEB:UNKNOWN:{self._PRODUCT}:15.0.0'
|
||||
|
||||
def _download_video_playback_info(self, disco_base, video_id, headers):
|
||||
return self._download_json(
|
||||
disco_base + 'playback/v3/videoPlaybackInfo',
|
||||
video_id, headers=headers, data=json.dumps({
|
||||
'deviceInfo': {
|
||||
'adBlocker': False,
|
||||
},
|
||||
'videoId': video_id,
|
||||
'wisteriaProperties': {
|
||||
'platform': 'desktop',
|
||||
'product': self._PRODUCT,
|
||||
},
|
||||
}).encode('utf-8'))['data']['attributes']['streaming']
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
return self._get_disco_api_info(
|
||||
url, display_id, self._API_URL, 'go', 'us')
|
||||
|
||||
|
||||
class ScienceChannelIE(DiscoveryPlusIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?sciencechannel\.com/video' + DPlayIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://www.sciencechannel.com/video/strangest-things-science-atve-us/nazi-mystery-machine',
|
||||
'info_dict': {
|
||||
'id': '2842849',
|
||||
'display_id': 'strangest-things-science-atve-us/nazi-mystery-machine',
|
||||
'ext': 'mp4',
|
||||
'title': 'Nazi Mystery Machine',
|
||||
'description': 'Experts investigate the secrets of a revolutionary encryption machine.',
|
||||
'season_number': 1,
|
||||
'episode_number': 1,
|
||||
},
|
||||
'skip': 'Available for Premium users',
|
||||
}]
|
||||
|
||||
_PRODUCT = 'sci'
|
||||
_API_URL = 'us1-prod-direct.sciencechannel.com'
|
||||
|
||||
@@ -22,16 +22,19 @@ class EggheadBaseIE(InfoExtractor):
|
||||
class EggheadCourseIE(EggheadBaseIE):
|
||||
IE_DESC = 'egghead.io course'
|
||||
IE_NAME = 'egghead:course'
|
||||
_VALID_URL = r'https://egghead\.io/courses/(?P<id>[^/?#&]+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https://(?:app\.)?egghead\.io/(?:course|playlist)s/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://egghead.io/courses/professor-frisby-introduces-composable-functional-javascript',
|
||||
'playlist_count': 29,
|
||||
'info_dict': {
|
||||
'id': '72',
|
||||
'id': '432655',
|
||||
'title': 'Professor Frisby Introduces Composable Functional JavaScript',
|
||||
'description': 're:(?s)^This course teaches the ubiquitous.*You\'ll start composing functionality before you know it.$',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'https://app.egghead.io/playlists/professor-frisby-introduces-composable-functional-javascript',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
@@ -65,7 +68,7 @@ class EggheadCourseIE(EggheadBaseIE):
|
||||
class EggheadLessonIE(EggheadBaseIE):
|
||||
IE_DESC = 'egghead.io lesson'
|
||||
IE_NAME = 'egghead:lesson'
|
||||
_VALID_URL = r'https://egghead\.io/(?:api/v1/)?lessons/(?P<id>[^/?#&]+)'
|
||||
_VALID_URL = r'https://(?:app\.)?egghead\.io/(?:api/v1/)?lessons/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://egghead.io/lessons/javascript-linear-data-flow-with-container-style-types-box',
|
||||
'info_dict': {
|
||||
@@ -88,6 +91,9 @@ class EggheadLessonIE(EggheadBaseIE):
|
||||
}, {
|
||||
'url': 'https://egghead.io/api/v1/lessons/react-add-redux-to-a-react-application',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://app.egghead.io/lessons/javascript-linear-data-flow-with-container-style-types-box',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -90,3 +90,40 @@ class EroProfileIE(InfoExtractor):
|
||||
'title': title,
|
||||
'age_limit': 18,
|
||||
})
|
||||
|
||||
|
||||
class EroProfileAlbumIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?eroprofile\.com/m/videos/album/(?P<id>[^/]+)'
|
||||
IE_NAME = 'EroProfile:album'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.eroprofile.com/m/videos/album/BBW-2-893',
|
||||
'info_dict': {
|
||||
'id': 'BBW-2-893',
|
||||
'title': 'BBW 2'
|
||||
},
|
||||
'playlist_mincount': 486,
|
||||
},
|
||||
]
|
||||
|
||||
def _extract_from_page(self, page):
|
||||
for url in re.findall(r'href=".*?(/m/videos/view/[^"]+)"', page):
|
||||
yield self.url_result(f'https://www.eroprofile.com{url}', EroProfileIE.ie_key())
|
||||
|
||||
def _entries(self, playlist_id, first_page):
|
||||
yield from self._extract_from_page(first_page)
|
||||
|
||||
page_urls = re.findall(rf'href=".*?(/m/videos/album/{playlist_id}\?pnum=(\d+))"', first_page)
|
||||
|
||||
for url, n in page_urls[1:]:
|
||||
yield from self._extract_from_page(self._download_webpage(
|
||||
f'https://www.eroprofile.com{url}',
|
||||
playlist_id, note=f'Downloading playlist page {int(n) - 1}'))
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
first_page = self._download_webpage(url, playlist_id, note='Downloading playlist')
|
||||
playlist_title = self._search_regex(
|
||||
r'<title>Album: (.*) - EroProfile</title>', first_page, 'playlist_title')
|
||||
|
||||
return self.playlist_result(self._entries(playlist_id, first_page), playlist_id, playlist_title)
|
||||
|
||||
@@ -109,7 +109,12 @@ from .awaan import (
|
||||
from .azmedien import AZMedienIE
|
||||
from .baidu import BaiduVideoIE
|
||||
from .bandaichannel import BandaiChannelIE
|
||||
from .bandcamp import BandcampIE, BandcampAlbumIE, BandcampWeeklyIE
|
||||
from .bandcamp import (
|
||||
BandcampIE,
|
||||
BandcampAlbumIE,
|
||||
BandcampWeeklyIE,
|
||||
BandcampMusicIE,
|
||||
)
|
||||
from .bbc import (
|
||||
BBCCoUkIE,
|
||||
BBCCoUkArticleIE,
|
||||
@@ -151,6 +156,7 @@ from .bitwave import (
|
||||
BitwaveStreamIE,
|
||||
)
|
||||
from .biqle import BIQLEIE
|
||||
from .blackboardcollaborate import BlackboardCollaborateIE
|
||||
from .bleacherreport import (
|
||||
BleacherReportIE,
|
||||
BleacherReportCMSIE,
|
||||
@@ -291,8 +297,7 @@ from .ctvnews import CTVNewsIE
|
||||
from .cultureunplugged import CultureUnpluggedIE
|
||||
from .curiositystream import (
|
||||
CuriosityStreamIE,
|
||||
CuriosityStreamCollectionsIE,
|
||||
CuriosityStreamSeriesIE,
|
||||
CuriosityStreamCollectionIE,
|
||||
)
|
||||
from .cwtv import CWTVIE
|
||||
from .dailymail import DailyMailIE
|
||||
@@ -322,6 +327,7 @@ from .discoveryplusindia import (
|
||||
DiscoveryPlusIndiaShowIE,
|
||||
)
|
||||
from .dotsub import DotsubIE
|
||||
from .douyin import DouyinIE
|
||||
from .douyutv import (
|
||||
DouyuShowIE,
|
||||
DouyuTVIE,
|
||||
@@ -330,6 +336,7 @@ from .dplay import (
|
||||
DPlayIE,
|
||||
DiscoveryPlusIE,
|
||||
HGTVDeIE,
|
||||
ScienceChannelIE
|
||||
)
|
||||
from .dreisat import DreiSatIE
|
||||
from .drbonanza import DRBonanzaIE
|
||||
@@ -382,7 +389,10 @@ from .elpais import ElPaisIE
|
||||
from .embedly import EmbedlyIE
|
||||
from .engadget import EngadgetIE
|
||||
from .eporner import EpornerIE
|
||||
from .eroprofile import EroProfileIE
|
||||
from .eroprofile import (
|
||||
EroProfileIE,
|
||||
EroProfileAlbumIE,
|
||||
)
|
||||
from .escapist import EscapistIE
|
||||
from .espn import (
|
||||
ESPNIE,
|
||||
@@ -399,7 +409,11 @@ from .facebook import (
|
||||
FacebookIE,
|
||||
FacebookPluginsVideoIE,
|
||||
)
|
||||
from .fancode import FancodeVodIE
|
||||
from .fancode import (
|
||||
FancodeVodIE,
|
||||
FancodeLiveIE
|
||||
)
|
||||
|
||||
from .faz import FazIE
|
||||
from .fc2 import (
|
||||
FC2IE,
|
||||
@@ -456,7 +470,11 @@ from .frontendmasters import (
|
||||
FrontendMastersCourseIE
|
||||
)
|
||||
from .fujitv import FujiTVFODPlus7IE
|
||||
from .funimation import FunimationIE
|
||||
from .funimation import (
|
||||
FunimationIE,
|
||||
FunimationPageIE,
|
||||
FunimationShowIE,
|
||||
)
|
||||
from .funk import FunkIE
|
||||
from .fusion import FusionIE
|
||||
from .gaia import GaiaIE
|
||||
@@ -655,10 +673,6 @@ from .linkedin import (
|
||||
from .linuxacademy import LinuxAcademyIE
|
||||
from .litv import LiTVIE
|
||||
from .livejournal import LiveJournalIE
|
||||
from .liveleak import (
|
||||
LiveLeakIE,
|
||||
LiveLeakEmbedIE,
|
||||
)
|
||||
from .livestream import (
|
||||
LivestreamIE,
|
||||
LivestreamOriginalIE,
|
||||
@@ -727,6 +741,10 @@ from .minds import (
|
||||
from .ministrygrid import MinistryGridIE
|
||||
from .minoto import MinotoIE
|
||||
from .miomio import MioMioIE
|
||||
from .mirrativ import (
|
||||
MirrativIE,
|
||||
MirrativUserIE,
|
||||
)
|
||||
from .mit import TechTVMITIE, OCWMITIE
|
||||
from .mitele import MiTeleIE
|
||||
from .mixcloud import (
|
||||
@@ -928,6 +946,10 @@ from .ooyala import (
|
||||
OoyalaIE,
|
||||
OoyalaExternalIE,
|
||||
)
|
||||
from .openrec import (
|
||||
OpenRecIE,
|
||||
OpenRecCaptureIE,
|
||||
)
|
||||
from .ora import OraTVIE
|
||||
from .orf import (
|
||||
ORFTVthekIE,
|
||||
@@ -957,6 +979,10 @@ from .palcomp3 import (
|
||||
PalcoMP3VideoIE,
|
||||
)
|
||||
from .pandoratv import PandoraTVIE
|
||||
from .paramountplus import (
|
||||
ParamountPlusIE,
|
||||
ParamountPlusSeriesIE,
|
||||
)
|
||||
from .parliamentliveuk import ParliamentLiveUKIE
|
||||
from .parlview import ParlviewIE
|
||||
from .patreon import PatreonIE
|
||||
@@ -1012,6 +1038,7 @@ from .popcorntimes import PopcorntimesIE
|
||||
from .popcorntv import PopcornTVIE
|
||||
from .porn91 import Porn91IE
|
||||
from .porncom import PornComIE
|
||||
from .pornflip import PornFlipIE
|
||||
from .pornhd import PornHdIE
|
||||
from .pornhub import (
|
||||
PornHubIE,
|
||||
@@ -1065,6 +1092,11 @@ from .rcs import (
|
||||
RCSEmbedsIE,
|
||||
RCSVariousIE,
|
||||
)
|
||||
from .rcti import (
|
||||
RCTIPlusIE,
|
||||
RCTIPlusSeriesIE,
|
||||
RCTIPlusTVIE,
|
||||
)
|
||||
from .rds import RDSIE
|
||||
from .redbulltv import (
|
||||
RedBullTVIE,
|
||||
@@ -1329,7 +1361,6 @@ from .theweatherchannel import TheWeatherChannelIE
|
||||
from .thisamericanlife import ThisAmericanLifeIE
|
||||
from .thisav import ThisAVIE
|
||||
from .thisoldhouse import ThisOldHouseIE
|
||||
from .thisvid import ThisVidIE
|
||||
from .threeqsdn import ThreeQSDNIE
|
||||
from .tiktok import TikTokIE
|
||||
from .tinypic import TinyPicIE
|
||||
@@ -1483,6 +1514,7 @@ from .ustudio import (
|
||||
UstudioIE,
|
||||
UstudioEmbedIE,
|
||||
)
|
||||
from .utreon import UtreonIE
|
||||
from .varzesh3 import Varzesh3IE
|
||||
from .vbox7 import Vbox7IE
|
||||
from .veehd import VeeHDIE
|
||||
|
||||
@@ -629,16 +629,11 @@ class FacebookIE(InfoExtractor):
|
||||
|
||||
process_formats(formats)
|
||||
|
||||
description = self._html_search_meta('description', webpage, default=None)
|
||||
video_title = self._html_search_regex(
|
||||
r'<h2\s+[^>]*class="uiHeaderTitle"[^>]*>([^<]*)</h2>', webpage,
|
||||
'title', default=None)
|
||||
if not video_title:
|
||||
video_title = self._html_search_regex(
|
||||
r'(?s)<span class="fbPhotosPhotoCaption".*?id="fbPhotoPageCaption"><span class="hasCaption">(.*?)</span>',
|
||||
webpage, 'alternative title', default=None)
|
||||
if not video_title:
|
||||
video_title = self._html_search_meta(
|
||||
'description', webpage, 'title', default=None)
|
||||
(r'<h2\s+[^>]*class="uiHeaderTitle"[^>]*>([^<]*)</h2>',
|
||||
r'(?s)<span class="fbPhotosPhotoCaption".*?id="fbPhotoPageCaption"><span class="hasCaption">(.*?)</span>'),
|
||||
webpage, 'title', default=None) or self._og_search_title(webpage, default=None) or description
|
||||
if video_title:
|
||||
video_title = limit_length(video_title, 80)
|
||||
else:
|
||||
@@ -662,6 +657,7 @@ class FacebookIE(InfoExtractor):
|
||||
'formats': formats,
|
||||
'uploader': uploader,
|
||||
'timestamp': timestamp,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'view_count': view_count,
|
||||
'subtitles': subtitles,
|
||||
|
||||
@@ -7,7 +7,8 @@ from ..compat import compat_str
|
||||
from ..utils import (
|
||||
parse_iso8601,
|
||||
ExtractorError,
|
||||
try_get
|
||||
try_get,
|
||||
mimetype2ext
|
||||
)
|
||||
|
||||
|
||||
@@ -38,16 +39,63 @@ class FancodeVodIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_ACCESS_TOKEN = None
|
||||
_NETRC_MACHINE = 'fancode'
|
||||
|
||||
_LOGIN_HINT = 'Use "--user refresh --password <refresh_token>" to login using a refresh token'
|
||||
|
||||
headers = {
|
||||
'content-type': 'application/json',
|
||||
'origin': 'https://fancode.com',
|
||||
'referer': 'https://fancode.com',
|
||||
}
|
||||
|
||||
def _login(self):
|
||||
# Access tokens are shortlived, so get them using the refresh token.
|
||||
username, password = self._get_login_info()
|
||||
if username == 'refresh' and password is not None:
|
||||
self.report_login()
|
||||
data = '''{
|
||||
"query":"mutation RefreshToken($refreshToken: String\\u0021) { refreshToken(refreshToken: $refreshToken) { accessToken }}",
|
||||
"variables":{
|
||||
"refreshToken":"%s"
|
||||
},
|
||||
"operationName":"RefreshToken"
|
||||
}''' % password
|
||||
|
||||
token_json = self.download_gql('refresh token', data, "Getting the Access token")
|
||||
self._ACCESS_TOKEN = try_get(token_json, lambda x: x['data']['refreshToken']['accessToken'])
|
||||
if self._ACCESS_TOKEN is None:
|
||||
self.report_warning('Failed to get Access token')
|
||||
else:
|
||||
self.headers.update({'Authorization': 'Bearer %s' % self._ACCESS_TOKEN})
|
||||
elif username is not None:
|
||||
self.report_warning(f'Login using username and password is not currently supported. {self._LOGIN_HINT}')
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
def _check_login_required(self, is_available, is_premium):
|
||||
msg = None
|
||||
if is_premium and self._ACCESS_TOKEN is None:
|
||||
msg = f'This video is only available for registered users. {self._LOGIN_HINT}'
|
||||
elif not is_available and self._ACCESS_TOKEN is not None:
|
||||
msg = 'This video isn\'t available to the current logged in account'
|
||||
if msg:
|
||||
self.raise_login_required(msg, metadata_available=True, method=None)
|
||||
|
||||
def download_gql(self, variable, data, note, fatal=False, headers=headers):
|
||||
return self._download_json(
|
||||
'https://www.fancode.com/graphql', variable,
|
||||
data=data.encode(), note=note,
|
||||
headers=headers, fatal=fatal)
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'https://players.brightcove.net/%s/default_default/index.html?videoId=%s'
|
||||
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
brightcove_user_id = self._html_search_regex(
|
||||
r'(?:https?://)?players\.brightcove\.net/(\d+)/default_default/index(?:\.min)?\.js',
|
||||
webpage, 'user id')
|
||||
|
||||
brightcove_user_id = '6008340455001'
|
||||
data = '''{
|
||||
"query":"query Video($id: Int\\u0021, $filter: SegmentFilter) { media(id: $id, filter: $filter) { id contentId title contentId publishedTime totalViews totalUpvotes provider thumbnail { src } mediaSource {brightcove } duration isPremium isUserEntitled tags duration }}",
|
||||
"variables":{
|
||||
@@ -57,15 +105,9 @@ class FancodeVodIE(InfoExtractor):
|
||||
}
|
||||
},
|
||||
"operationName":"Video"
|
||||
}''' % video_id
|
||||
}''' % video_id
|
||||
|
||||
metadata_json = self._download_json(
|
||||
'https://www.fancode.com/graphql', video_id, data=data.encode(), note='Downloading metadata',
|
||||
headers={
|
||||
'content-type': 'application/json',
|
||||
'origin': 'https://fancode.com',
|
||||
'referer': url,
|
||||
})
|
||||
metadata_json = self.download_gql(video_id, data, note='Downloading metadata')
|
||||
|
||||
media = try_get(metadata_json, lambda x: x['data']['media'], dict) or {}
|
||||
brightcove_video_id = try_get(media, lambda x: x['mediaSource']['brightcove'], compat_str)
|
||||
@@ -74,8 +116,8 @@ class FancodeVodIE(InfoExtractor):
|
||||
raise ExtractorError('Unable to extract brightcove Video ID')
|
||||
|
||||
is_premium = media.get('isPremium')
|
||||
if is_premium:
|
||||
self.report_warning('this video requires a premium account', video_id)
|
||||
|
||||
self._check_login_required(media.get('isUserEntitled'), is_premium)
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
@@ -89,3 +131,57 @@ class FancodeVodIE(InfoExtractor):
|
||||
'release_timestamp': parse_iso8601(media.get('publishedTime')),
|
||||
'availability': self._availability(needs_premium=is_premium),
|
||||
}
|
||||
|
||||
|
||||
class FancodeLiveIE(FancodeVodIE):
|
||||
IE_NAME = 'fancode:live'
|
||||
|
||||
_VALID_URL = r'https?://(www\.)?fancode\.com/match/(?P<id>[0-9]+).+'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://fancode.com/match/35328/cricket-fancode-ecs-hungary-2021-bub-vs-blb?slug=commentary',
|
||||
'info_dict': {
|
||||
'id': '35328',
|
||||
'ext': 'mp4',
|
||||
'title': 'BUB vs BLB',
|
||||
"timestamp": 1624863600,
|
||||
'is_live': True,
|
||||
'upload_date': '20210628',
|
||||
},
|
||||
'skip': 'Ended'
|
||||
}, {
|
||||
'url': 'https://fancode.com/match/35328/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://fancode.com/match/35567?slug=scorecard',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
id = self._match_id(url)
|
||||
data = '''{
|
||||
"query":"query MatchResponse($id: Int\\u0021, $isLoggedIn: Boolean\\u0021) { match: matchWithScores(id: $id) { id matchDesc mediaId videoStreamId videoStreamUrl { ...VideoSource } liveStreams { videoStreamId videoStreamUrl { ...VideoSource } contentId } name startTime streamingStatus isPremium isUserEntitled @include(if: $isLoggedIn) status metaTags bgImage { src } sport { name slug } tour { id name } squads { name shortName } liveStreams { contentId } mediaId }}fragment VideoSource on VideoSource { title description posterUrl url deliveryType playerType}",
|
||||
"variables":{
|
||||
"id":%s,
|
||||
"isLoggedIn":true
|
||||
},
|
||||
"operationName":"MatchResponse"
|
||||
}''' % id
|
||||
|
||||
info_json = self.download_gql(id, data, "Info json")
|
||||
|
||||
match_info = try_get(info_json, lambda x: x['data']['match'])
|
||||
|
||||
if match_info.get('status') != "LIVE":
|
||||
raise ExtractorError('The stream can\'t be accessed', expected=True)
|
||||
self._check_login_required(match_info.get('isUserEntitled'), True) # all live streams are premium only
|
||||
|
||||
return {
|
||||
'id': id,
|
||||
'title': match_info.get('name'),
|
||||
'formats': self._extract_akamai_formats(try_get(match_info, lambda x: x['videoStreamUrl']['url']), id),
|
||||
'ext': mimetype2ext(try_get(match_info, lambda x: x['videoStreamUrl']['deliveryType'])),
|
||||
'is_live': True,
|
||||
'release_timestamp': parse_iso8601(match_info.get('startTime'))
|
||||
}
|
||||
|
||||
@@ -2,59 +2,124 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import random
|
||||
import re
|
||||
import string
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_HTTPError
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
dict_get,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
str_or_none,
|
||||
try_get,
|
||||
urlencode_postdata,
|
||||
ExtractorError,
|
||||
urlencode_postdata
|
||||
)
|
||||
|
||||
|
||||
class FunimationIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?funimation(?:\.com|now\.uk)/(?:[^/]+/)?shows/[^/]+/(?P<id>[^/?#&]+)'
|
||||
|
||||
_NETRC_MACHINE = 'funimation'
|
||||
_TOKEN = None
|
||||
class FunimationPageIE(InfoExtractor):
|
||||
IE_NAME = 'funimation:page'
|
||||
_VALID_URL = r'(?P<origin>https?://(?:www\.)?funimation(?:\.com|now\.uk))/(?P<lang>[^/]+/)?(?P<path>shows/(?P<id>[^/]+/[^/?#&]+).*$)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.funimation.com/shows/hacksign/role-play/',
|
||||
'info_dict': {
|
||||
'id': '91144',
|
||||
'display_id': 'role-play',
|
||||
'ext': 'mp4',
|
||||
'title': '.hack//SIGN - Role Play',
|
||||
'description': 'md5:b602bdc15eef4c9bbb201bb6e6a4a2dd',
|
||||
'thumbnail': r're:https?://.*\.jpg',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.funimation.com/shows/attack-on-titan-junior-high/broadcast-dub-preview/',
|
||||
'info_dict': {
|
||||
'id': '210051',
|
||||
'display_id': 'broadcast-dub-preview',
|
||||
'id': '210050',
|
||||
'ext': 'mp4',
|
||||
'title': 'Attack on Titan: Junior High - Broadcast Dub Preview',
|
||||
'thumbnail': r're:https?://.*\.(?:jpg|png)',
|
||||
'title': 'Broadcast Dub Preview',
|
||||
# Other metadata is tested in FunimationIE
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
'add_ie': ['Funimation'],
|
||||
}, {
|
||||
'url': 'https://www.funimationnow.uk/shows/puzzle-dragons-x/drop-impact/simulcast/',
|
||||
# Not available in US
|
||||
'url': 'https://www.funimation.com/shows/hacksign/role-play/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# with lang code
|
||||
'url': 'https://www.funimation.com/en/shows/hacksign/role-play/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.funimationnow.uk/shows/puzzle-dragons-x/drop-impact/simulcast/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
display_id = mobj.group('id').replace('/', '_')
|
||||
if not mobj.group('lang'):
|
||||
url = '%s/en/%s' % (mobj.group('origin'), mobj.group('path'))
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
title_data = self._parse_json(self._search_regex(
|
||||
r'TITLE_DATA\s*=\s*({[^}]+})',
|
||||
webpage, 'title data', default=''),
|
||||
display_id, js_to_json, fatal=False) or {}
|
||||
|
||||
video_id = (
|
||||
title_data.get('id')
|
||||
or self._search_regex(
|
||||
(r"KANE_customdimensions.videoID\s*=\s*'(\d+)';", r'<iframe[^>]+src="/player/(\d+)'),
|
||||
webpage, 'video_id', default=None)
|
||||
or self._search_regex(
|
||||
r'/player/(\d+)',
|
||||
self._html_search_meta(['al:web:url', 'og:video:url', 'og:video:secure_url'], webpage, fatal=True),
|
||||
'video id'))
|
||||
return self.url_result(f'https://www.funimation.com/player/{video_id}', FunimationIE.ie_key(), video_id)
|
||||
|
||||
|
||||
class FunimationIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?funimation\.com/player/(?P<id>\d+)'
|
||||
|
||||
_NETRC_MACHINE = 'funimation'
|
||||
_TOKEN = None
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.funimation.com/player/210051',
|
||||
'info_dict': {
|
||||
'id': '210050',
|
||||
'display_id': 'broadcast-dub-preview',
|
||||
'ext': 'mp4',
|
||||
'title': 'Broadcast Dub Preview',
|
||||
'thumbnail': r're:https?://.*\.(?:jpg|png)',
|
||||
'episode': 'Broadcast Dub Preview',
|
||||
'episode_id': '210050',
|
||||
'season': 'Extras',
|
||||
'season_id': '166038',
|
||||
'season_number': 99,
|
||||
'series': 'Attack on Titan: Junior High',
|
||||
'description': '',
|
||||
'duration': 154,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
}, {
|
||||
'note': 'player_id should be extracted with the relevent compat-opt',
|
||||
'url': 'https://www.funimation.com/player/210051',
|
||||
'info_dict': {
|
||||
'id': '210051',
|
||||
'display_id': 'broadcast-dub-preview',
|
||||
'ext': 'mp4',
|
||||
'title': 'Broadcast Dub Preview',
|
||||
'thumbnail': r're:https?://.*\.(?:jpg|png)',
|
||||
'episode': 'Broadcast Dub Preview',
|
||||
'episode_id': '210050',
|
||||
'season': 'Extras',
|
||||
'season_id': '166038',
|
||||
'season_number': 99,
|
||||
'series': 'Attack on Titan: Junior High',
|
||||
'description': '',
|
||||
'duration': 154,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
'compat_opts': ['seperate-video-versions'],
|
||||
},
|
||||
}]
|
||||
|
||||
def _login(self):
|
||||
@@ -78,81 +143,184 @@ class FunimationIE(InfoExtractor):
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
@staticmethod
|
||||
def _get_experiences(episode):
|
||||
for lang, lang_data in episode.get('languages', {}).items():
|
||||
for video_data in lang_data.values():
|
||||
for version, f in video_data.items():
|
||||
yield lang, version.title(), f
|
||||
|
||||
def _get_episode(self, webpage, experience_id=None, episode_id=None, fatal=True):
|
||||
''' Extract the episode, season and show objects given either episode/experience id '''
|
||||
show = self._parse_json(
|
||||
self._search_regex(
|
||||
r'show\s*=\s*({.+?})\s*;', webpage, 'show data', fatal=fatal),
|
||||
experience_id, transform_source=js_to_json, fatal=fatal) or []
|
||||
for season in show.get('seasons', []):
|
||||
for episode in season.get('episodes', []):
|
||||
if episode_id is not None:
|
||||
if str(episode.get('episodePk')) == episode_id:
|
||||
return episode, season, show
|
||||
continue
|
||||
for _, _, f in self._get_experiences(episode):
|
||||
if f.get('experienceId') == experience_id:
|
||||
return episode, season, show
|
||||
if fatal:
|
||||
raise ExtractorError('Unable to find episode information')
|
||||
else:
|
||||
self.report_warning('Unable to find episode information')
|
||||
return {}, {}, {}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
initial_experience_id = self._match_id(url)
|
||||
webpage = self._download_webpage(
|
||||
url, initial_experience_id, note=f'Downloading player webpage for {initial_experience_id}')
|
||||
episode, season, show = self._get_episode(webpage, experience_id=int(initial_experience_id))
|
||||
episode_id = str(episode['episodePk'])
|
||||
display_id = episode.get('slug') or episode_id
|
||||
|
||||
def _search_kane(name):
|
||||
return self._search_regex(
|
||||
r"KANE_customdimensions\.%s\s*=\s*'([^']+)';" % name,
|
||||
webpage, name, default=None)
|
||||
formats, subtitles, thumbnails, duration = [], {}, [], 0
|
||||
requested_languages, requested_versions = self._configuration_arg('language'), self._configuration_arg('version')
|
||||
only_initial_experience = 'seperate-video-versions' in self.get_param('compat_opts', [])
|
||||
|
||||
title_data = self._parse_json(self._search_regex(
|
||||
r'TITLE_DATA\s*=\s*({[^}]+})',
|
||||
webpage, 'title data', default=''),
|
||||
display_id, js_to_json, fatal=False) or {}
|
||||
for lang, version, fmt in self._get_experiences(episode):
|
||||
experience_id = str(fmt['experienceId'])
|
||||
if (only_initial_experience and experience_id != initial_experience_id
|
||||
or requested_languages and lang.lower() not in requested_languages
|
||||
or requested_versions and version.lower() not in requested_versions):
|
||||
continue
|
||||
thumbnails.append({'url': fmt.get('poster')})
|
||||
duration = max(duration, fmt.get('duration', 0))
|
||||
format_name = '%s %s (%s)' % (version, lang, experience_id)
|
||||
self.extract_subtitles(
|
||||
subtitles, experience_id, display_id=display_id, format_name=format_name,
|
||||
episode=episode if experience_id == initial_experience_id else episode_id)
|
||||
|
||||
video_id = title_data.get('id') or self._search_regex([
|
||||
r"KANE_customdimensions.videoID\s*=\s*'(\d+)';",
|
||||
r'<iframe[^>]+src="/player/(\d+)',
|
||||
], webpage, 'video_id', default=None)
|
||||
if not video_id:
|
||||
player_url = self._html_search_meta([
|
||||
'al:web:url',
|
||||
'og:video:url',
|
||||
'og:video:secure_url',
|
||||
], webpage, fatal=True)
|
||||
video_id = self._search_regex(r'/player/(\d+)', player_url, 'video id')
|
||||
|
||||
title = episode = title_data.get('title') or _search_kane('videoTitle') or self._og_search_title(webpage)
|
||||
series = _search_kane('showName')
|
||||
if series:
|
||||
title = '%s - %s' % (series, title)
|
||||
description = self._html_search_meta(['description', 'og:description'], webpage, fatal=True)
|
||||
|
||||
try:
|
||||
headers = {}
|
||||
if self._TOKEN:
|
||||
headers['Authorization'] = 'Token %s' % self._TOKEN
|
||||
sources = self._download_json(
|
||||
'https://www.funimation.com/api/showexperience/%s/' % video_id,
|
||||
video_id, headers=headers, query={
|
||||
page = self._download_json(
|
||||
'https://www.funimation.com/api/showexperience/%s/' % experience_id,
|
||||
display_id, headers=headers, expected_status=403, query={
|
||||
'pinst_id': ''.join([random.choice(string.digits + string.ascii_letters) for _ in range(8)]),
|
||||
})['items']
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||
error = self._parse_json(e.cause.read(), video_id)['errors'][0]
|
||||
raise ExtractorError('%s said: %s' % (
|
||||
self.IE_NAME, error.get('detail') or error.get('title')), expected=True)
|
||||
raise
|
||||
}, note=f'Downloading {format_name} JSON')
|
||||
sources = page.get('items') or []
|
||||
if not sources:
|
||||
error = try_get(page, lambda x: x['errors'][0], dict)
|
||||
if error:
|
||||
self.report_warning('%s said: Error %s - %s' % (
|
||||
self.IE_NAME, error.get('code'), error.get('detail') or error.get('title')))
|
||||
else:
|
||||
self.report_warning('No sources found for format')
|
||||
|
||||
formats = []
|
||||
for source in sources:
|
||||
source_url = source.get('src')
|
||||
if not source_url:
|
||||
continue
|
||||
source_type = source.get('videoType') or determine_ext(source_url)
|
||||
if source_type == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
source_url, video_id, 'mp4',
|
||||
m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'format_id': source_type,
|
||||
'url': source_url,
|
||||
})
|
||||
current_formats = []
|
||||
for source in sources:
|
||||
source_url = source.get('src')
|
||||
source_type = source.get('videoType') or determine_ext(source_url)
|
||||
if source_type == 'm3u8':
|
||||
current_formats.extend(self._extract_m3u8_formats(
|
||||
source_url, display_id, 'mp4', m3u8_id='%s-%s' % (experience_id, 'hls'), fatal=False,
|
||||
note=f'Downloading {format_name} m3u8 information'))
|
||||
else:
|
||||
current_formats.append({
|
||||
'format_id': '%s-%s' % (experience_id, source_type),
|
||||
'url': source_url,
|
||||
})
|
||||
for f in current_formats:
|
||||
# TODO: Convert language to code
|
||||
f.update({'language': lang, 'format_note': version})
|
||||
formats.extend(current_formats)
|
||||
self._remove_duplicate_formats(formats)
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'id': initial_experience_id if only_initial_experience else episode_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'series': series,
|
||||
'season_number': int_or_none(title_data.get('seasonNum') or _search_kane('season')),
|
||||
'episode_number': int_or_none(title_data.get('episodeNum')),
|
||||
'episode': episode,
|
||||
'season_id': title_data.get('seriesId'),
|
||||
'duration': duration,
|
||||
'title': episode['episodeTitle'],
|
||||
'description': episode.get('episodeSummary'),
|
||||
'episode': episode.get('episodeTitle'),
|
||||
'episode_number': int_or_none(episode.get('episodeId')),
|
||||
'episode_id': episode_id,
|
||||
'season': season.get('seasonTitle'),
|
||||
'season_number': int_or_none(season.get('seasonId')),
|
||||
'season_id': str_or_none(season.get('seasonPk')),
|
||||
'series': show.get('showTitle'),
|
||||
'formats': formats,
|
||||
'thumbnails': thumbnails,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
def _get_subtitles(self, subtitles, experience_id, episode, display_id, format_name):
|
||||
if isinstance(episode, str):
|
||||
webpage = self._download_webpage(
|
||||
f'https://www.funimation.com/player/{experience_id}', display_id,
|
||||
fatal=False, note=f'Downloading player webpage for {format_name}')
|
||||
episode, _, _ = self._get_episode(webpage, episode_id=episode, fatal=False)
|
||||
|
||||
for _, version, f in self._get_experiences(episode):
|
||||
for source in f.get('sources'):
|
||||
for text_track in source.get('textTracks'):
|
||||
if not text_track.get('src'):
|
||||
continue
|
||||
sub_type = text_track.get('type').upper()
|
||||
sub_type = sub_type if sub_type != 'FULL' else None
|
||||
current_sub = {
|
||||
'url': text_track['src'],
|
||||
'name': ' '.join(filter(None, (version, text_track.get('label'), sub_type)))
|
||||
}
|
||||
lang = '_'.join(filter(None, (
|
||||
text_track.get('language', 'und'), version if version != 'Simulcast' else None, sub_type)))
|
||||
if current_sub not in subtitles.get(lang, []):
|
||||
subtitles.setdefault(lang, []).append(current_sub)
|
||||
return subtitles
|
||||
|
||||
|
||||
class FunimationShowIE(FunimationIE):
|
||||
IE_NAME = 'funimation:show'
|
||||
_VALID_URL = r'(?P<url>https?://(?:www\.)?funimation(?:\.com|now\.uk)/(?P<locale>[^/]+)?/?shows/(?P<id>[^/?#&]+))/?(?:[?#]|$)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.funimation.com/en/shows/sk8-the-infinity',
|
||||
'info_dict': {
|
||||
'id': 1315000,
|
||||
'title': 'SK8 the Infinity'
|
||||
},
|
||||
'playlist_count': 13,
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# without lang code
|
||||
'url': 'https://www.funimation.com/shows/ouran-high-school-host-club/',
|
||||
'info_dict': {
|
||||
'id': 39643,
|
||||
'title': 'Ouran High School Host Club'
|
||||
},
|
||||
'playlist_count': 26,
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
base_url, locale, display_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
show_info = self._download_json(
|
||||
'https://title-api.prd.funimationsvc.com/v2/shows/%s?region=US&deviceType=web&locale=%s'
|
||||
% (display_id, locale or 'en'), display_id)
|
||||
items = self._download_json(
|
||||
'https://prod-api-funimationnow.dadcdigital.com/api/funimation/episodes/?limit=99999&title_id=%s'
|
||||
% show_info.get('id'), display_id).get('items')
|
||||
vod_items = map(lambda k: dict_get(k, ('mostRecentSvod', 'mostRecentAvod')).get('item'), items)
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': show_info['id'],
|
||||
'title': show_info['name'],
|
||||
'entries': [
|
||||
self.url_result(
|
||||
'%s/%s' % (base_url, vod_item.get('episodeSlug')), FunimationPageIE.ie_key(),
|
||||
vod_item.get('episodeId'), vod_item.get('episodeName'))
|
||||
for vod_item in sorted(vod_items, key=lambda x: x.get('episodeOrder'))],
|
||||
}
|
||||
|
||||
@@ -84,7 +84,6 @@ from .jwplatform import JWPlatformIE
|
||||
from .digiteka import DigitekaIE
|
||||
from .arkena import ArkenaIE
|
||||
from .instagram import InstagramIE
|
||||
from .liveleak import LiveLeakIE
|
||||
from .threeqsdn import ThreeQSDNIE
|
||||
from .theplatform import ThePlatformIE
|
||||
from .kaltura import KalturaIE
|
||||
@@ -1632,31 +1631,6 @@ class GenericIE(InfoExtractor):
|
||||
'upload_date': '20160409',
|
||||
},
|
||||
},
|
||||
# LiveLeak embed
|
||||
{
|
||||
'url': 'http://www.wykop.pl/link/3088787/',
|
||||
'md5': '7619da8c820e835bef21a1efa2a0fc71',
|
||||
'info_dict': {
|
||||
'id': '874_1459135191',
|
||||
'ext': 'mp4',
|
||||
'title': 'Man shows poor quality of new apartment building',
|
||||
'description': 'The wall is like a sand pile.',
|
||||
'uploader': 'Lake8737',
|
||||
},
|
||||
'add_ie': [LiveLeakIE.ie_key()],
|
||||
},
|
||||
# Another LiveLeak embed pattern (#13336)
|
||||
{
|
||||
'url': 'https://milo.yiannopoulos.net/2017/06/concealed-carry-robbery/',
|
||||
'info_dict': {
|
||||
'id': '2eb_1496309988',
|
||||
'ext': 'mp4',
|
||||
'title': 'Thief robs place where everyone was armed',
|
||||
'description': 'md5:694d73ee79e535953cf2488562288eee',
|
||||
'uploader': 'brazilwtf',
|
||||
},
|
||||
'add_ie': [LiveLeakIE.ie_key()],
|
||||
},
|
||||
# Duplicated embedded video URLs
|
||||
{
|
||||
'url': 'http://www.hudl.com/athlete/2538180/highlights/149298443',
|
||||
@@ -2264,6 +2238,87 @@ class GenericIE(InfoExtractor):
|
||||
'title': '#WEAREFMI – PT.2 – 2021 – MsMotorTV',
|
||||
},
|
||||
'playlist_count': 1,
|
||||
}, {
|
||||
# KVS Player
|
||||
'url': 'https://www.kvs-demo.com/videos/105/kelis-4th-of-july/',
|
||||
'info_dict': {
|
||||
'id': '105',
|
||||
'display_id': 'kelis-4th-of-july',
|
||||
'ext': 'mp4',
|
||||
'title': 'Kelis - 4th Of July',
|
||||
'thumbnail': 'https://kvs-demo.com/contents/videos_screenshots/0/105/preview.jpg',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# KVS Player
|
||||
'url': 'https://www.kvs-demo.com/embed/105/',
|
||||
'info_dict': {
|
||||
'id': '105',
|
||||
'display_id': 'kelis-4th-of-july',
|
||||
'ext': 'mp4',
|
||||
'title': 'Kelis - 4th Of July / Embed Player',
|
||||
'thumbnail': 'https://kvs-demo.com/contents/videos_screenshots/0/105/preview.jpg',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# KVS Player
|
||||
'url': 'https://thisvid.com/videos/french-boy-pantsed/',
|
||||
'md5': '3397979512c682f6b85b3b04989df224',
|
||||
'info_dict': {
|
||||
'id': '2400174',
|
||||
'display_id': 'french-boy-pantsed',
|
||||
'ext': 'mp4',
|
||||
'title': 'French Boy Pantsed - ThisVid.com',
|
||||
'thumbnail': 'https://media.thisvid.com/contents/videos_screenshots/2400000/2400174/preview.mp4.jpg',
|
||||
}
|
||||
}, {
|
||||
# KVS Player
|
||||
'url': 'https://thisvid.com/embed/2400174/',
|
||||
'md5': '3397979512c682f6b85b3b04989df224',
|
||||
'info_dict': {
|
||||
'id': '2400174',
|
||||
'display_id': 'french-boy-pantsed',
|
||||
'ext': 'mp4',
|
||||
'title': 'French Boy Pantsed - ThisVid.com',
|
||||
'thumbnail': 'https://media.thisvid.com/contents/videos_screenshots/2400000/2400174/preview.mp4.jpg',
|
||||
}
|
||||
}, {
|
||||
# KVS Player
|
||||
'url': 'https://youix.com/video/leningrad-zoj/',
|
||||
'md5': '94f96ba95706dc3880812b27b7d8a2b8',
|
||||
'info_dict': {
|
||||
'id': '18485',
|
||||
'display_id': 'leningrad-zoj',
|
||||
'ext': 'mp4',
|
||||
'title': 'Клип: Ленинград - ЗОЖ скачать, смотреть онлайн | Youix.com',
|
||||
'thumbnail': 'https://youix.com/contents/videos_screenshots/18000/18485/preview_480x320_youix_com.mp4.jpg',
|
||||
}
|
||||
}, {
|
||||
# KVS Player
|
||||
'url': 'https://youix.com/embed/18485',
|
||||
'md5': '94f96ba95706dc3880812b27b7d8a2b8',
|
||||
'info_dict': {
|
||||
'id': '18485',
|
||||
'display_id': 'leningrad-zoj',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ленинград - ЗОЖ',
|
||||
'thumbnail': 'https://youix.com/contents/videos_screenshots/18000/18485/preview_480x320_youix_com.mp4.jpg',
|
||||
}
|
||||
}, {
|
||||
# KVS Player
|
||||
'url': 'https://bogmedia.org/videos/21217/40-nochey-40-nights-2016/',
|
||||
'md5': '94166bdb26b4cb1fb9214319a629fc51',
|
||||
'info_dict': {
|
||||
'id': '21217',
|
||||
'display_id': '40-nochey-40-nights-2016',
|
||||
'ext': 'mp4',
|
||||
'title': '40 ночей (2016) - BogMedia.org',
|
||||
'thumbnail': 'https://bogmedia.org/contents/videos_screenshots/21000/21217/preview_480p.mp4.jpg',
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
@@ -2369,6 +2424,44 @@ class GenericIE(InfoExtractor):
|
||||
'title': title,
|
||||
}
|
||||
|
||||
def _kvs_getrealurl(self, video_url, license_code):
|
||||
if not video_url.startswith('function/0/'):
|
||||
return video_url # not obfuscated
|
||||
|
||||
url_path, _, url_query = video_url.partition('?')
|
||||
urlparts = url_path.split('/')[2:]
|
||||
license = self._kvs_getlicensetoken(license_code)
|
||||
newmagic = urlparts[5][:32]
|
||||
|
||||
for o in range(len(newmagic) - 1, -1, -1):
|
||||
new = ''
|
||||
l = (o + sum([int(n) for n in license[o:]])) % 32
|
||||
|
||||
for i in range(0, len(newmagic)):
|
||||
if i == o:
|
||||
new += newmagic[l]
|
||||
elif i == l:
|
||||
new += newmagic[o]
|
||||
else:
|
||||
new += newmagic[i]
|
||||
newmagic = new
|
||||
|
||||
urlparts[5] = newmagic + urlparts[5][32:]
|
||||
return '/'.join(urlparts) + '?' + url_query
|
||||
|
||||
def _kvs_getlicensetoken(self, license):
|
||||
modlicense = license.replace('$', '').replace('0', '1')
|
||||
center = int(len(modlicense) / 2)
|
||||
fronthalf = int(modlicense[:center + 1])
|
||||
backhalf = int(modlicense[center:])
|
||||
|
||||
modlicense = str(4 * abs(fronthalf - backhalf))
|
||||
retval = ''
|
||||
for o in range(0, center + 1):
|
||||
for i in range(1, 5):
|
||||
retval += str((int(license[o + i]) + int(modlicense[o])) % 10)
|
||||
return retval
|
||||
|
||||
def _real_extract(self, url):
|
||||
if url.startswith('//'):
|
||||
return self.url_result(self.http_scheme() + url)
|
||||
@@ -2488,7 +2581,7 @@ class GenericIE(InfoExtractor):
|
||||
|
||||
# Is it an M3U playlist?
|
||||
if first_bytes.startswith(b'#EXTM3U'):
|
||||
info_dict['formats'] = self._extract_m3u8_formats(url, video_id, 'mp4')
|
||||
info_dict['formats'], info_dict['subtitles'] = self._extract_m3u8_formats_and_subtitles(url, video_id, 'mp4')
|
||||
self._sort_formats(info_dict['formats'])
|
||||
return info_dict
|
||||
|
||||
@@ -3204,11 +3297,6 @@ class GenericIE(InfoExtractor):
|
||||
return self.url_result(
|
||||
self._proto_relative_url(instagram_embed_url), InstagramIE.ie_key())
|
||||
|
||||
# Look for LiveLeak embeds
|
||||
liveleak_urls = LiveLeakIE._extract_urls(webpage)
|
||||
if liveleak_urls:
|
||||
return self.playlist_from_matches(liveleak_urls, video_id, video_title)
|
||||
|
||||
# Look for 3Q SDN embeds
|
||||
threeqsdn_url = ThreeQSDNIE._extract_url(webpage)
|
||||
if threeqsdn_url:
|
||||
@@ -3441,6 +3529,7 @@ class GenericIE(InfoExtractor):
|
||||
if not isinstance(sources, list):
|
||||
sources = [sources]
|
||||
formats = []
|
||||
subtitles = {}
|
||||
for source in sources:
|
||||
src = source.get('src')
|
||||
if not src or not isinstance(src, compat_str):
|
||||
@@ -3453,12 +3542,16 @@ class GenericIE(InfoExtractor):
|
||||
if src_type == 'video/youtube':
|
||||
return self.url_result(src, YoutubeIE.ie_key())
|
||||
if src_type == 'application/dash+xml' or ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
src, video_id, mpd_id='dash', fatal=False))
|
||||
fmts, subs = self._extract_mpd_formats_and_subtitles(
|
||||
src, video_id, mpd_id='dash', fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
elif src_type == 'application/x-mpegurl' or ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
src, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
m3u8_id='hls', fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
else:
|
||||
formats.append({
|
||||
'url': src,
|
||||
@@ -3468,9 +3561,10 @@ class GenericIE(InfoExtractor):
|
||||
'Referer': full_response.geturl(),
|
||||
},
|
||||
})
|
||||
if formats:
|
||||
if formats or subtitles:
|
||||
self._sort_formats(formats)
|
||||
info_dict['formats'] = formats
|
||||
info_dict['subtitles'] = subtitles
|
||||
return info_dict
|
||||
|
||||
# Looking for http://schema.org/VideoObject
|
||||
@@ -3503,6 +3597,52 @@ class GenericIE(InfoExtractor):
|
||||
)
|
||||
.*?
|
||||
['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
|
||||
if not found:
|
||||
# Look for generic KVS player
|
||||
found = re.search(r'<script [^>]*?src="https://.+?/kt_player\.js\?v=(?P<ver>(?P<maj_ver>\d+)(\.\d+)+)".*?>', webpage)
|
||||
if found:
|
||||
if found.group('maj_ver') not in ['4', '5']:
|
||||
self.report_warning('Untested major version (%s) in player engine--Download may fail.' % found.group('ver'))
|
||||
flashvars = re.search(r'(?ms)<script.*?>.*?var\s+flashvars\s*=\s*(\{.*?\});.*?</script>', webpage)
|
||||
flashvars = self._parse_json(flashvars.group(1), video_id, transform_source=js_to_json)
|
||||
|
||||
# extract the part after the last / as the display_id from the
|
||||
# canonical URL.
|
||||
display_id = self._search_regex(
|
||||
r'(?:<link href="https?://[^"]+/(.+?)/?" rel="canonical"\s*/?>'
|
||||
r'|<link rel="canonical" href="https?://[^"]+/(.+?)/?"\s*/?>)',
|
||||
webpage, 'display_id', fatal=False
|
||||
)
|
||||
title = self._html_search_regex(r'<(?:h1|title)>(?:Video: )?(.+?)</(?:h1|title)>', webpage, 'title')
|
||||
|
||||
thumbnail = flashvars['preview_url']
|
||||
if thumbnail.startswith('//'):
|
||||
protocol, _, _ = url.partition('/')
|
||||
thumbnail = protocol + thumbnail
|
||||
|
||||
formats = []
|
||||
for key in ('video_url', 'video_alt_url', 'video_alt_url2'):
|
||||
if key in flashvars and '/get_file/' in flashvars[key]:
|
||||
next_format = {
|
||||
'url': self._kvs_getrealurl(flashvars[key], flashvars['license_code']),
|
||||
'format_id': flashvars.get(key + '_text', key),
|
||||
'ext': 'mp4',
|
||||
}
|
||||
height = re.search(r'%s_(\d+)p\.mp4(?:/[?].*)?$' % flashvars['video_id'], flashvars[key])
|
||||
if height:
|
||||
next_format['height'] = int(height.group(1))
|
||||
else:
|
||||
next_format['quality'] = 1
|
||||
formats.append(next_format)
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': flashvars['video_id'],
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'formats': formats,
|
||||
}
|
||||
if not found:
|
||||
# Broaden the search a little bit
|
||||
found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
|
||||
@@ -3605,13 +3745,13 @@ class GenericIE(InfoExtractor):
|
||||
|
||||
ext = determine_ext(video_url)
|
||||
if ext == 'smil':
|
||||
entry_info_dict['formats'] = self._extract_smil_formats(video_url, video_id)
|
||||
entry_info_dict = {**self._extract_smil_info(video_url, video_id), **entry_info_dict}
|
||||
elif ext == 'xspf':
|
||||
return self.playlist_result(self._extract_xspf_playlist(video_url, video_id), video_id)
|
||||
elif ext == 'm3u8':
|
||||
entry_info_dict['formats'] = self._extract_m3u8_formats(video_url, video_id, ext='mp4')
|
||||
entry_info_dict['formats'], entry_info_dict['subtitles'] = self._extract_m3u8_formats_and_subtitles(video_url, video_id, ext='mp4')
|
||||
elif ext == 'mpd':
|
||||
entry_info_dict['formats'] = self._extract_mpd_formats(video_url, video_id)
|
||||
entry_info_dict['formats'], entry_info_dict['subtitles'] = self._extract_mpd_formats_and_subtitles(video_url, video_id)
|
||||
elif ext == 'f4m':
|
||||
entry_info_dict['formats'] = self._extract_f4m_formats(video_url, video_id)
|
||||
elif re.search(r'(?i)\.(?:ism|smil)/manifest', video_url) and video_url != url:
|
||||
|
||||
@@ -7,7 +7,6 @@ import re
|
||||
import time
|
||||
import uuid
|
||||
import json
|
||||
import random
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
@@ -27,34 +26,24 @@ from ..utils import (
|
||||
class HotStarBaseIE(InfoExtractor):
|
||||
_AKAMAI_ENCRYPTION_KEY = b'\x05\xfc\x1a\x01\xca\xc9\x4b\xc4\x12\xfc\x53\x12\x07\x75\xf9\xee'
|
||||
|
||||
def _call_api_impl(self, path, video_id, query):
|
||||
st = int(time.time())
|
||||
def _call_api_impl(self, path, video_id, query, st=None, cookies=None):
|
||||
st = int_or_none(st) or int(time.time())
|
||||
exp = st + 6000
|
||||
auth = 'st=%d~exp=%d~acl=/*' % (st, exp)
|
||||
auth += '~hmac=' + hmac.new(self._AKAMAI_ENCRYPTION_KEY, auth.encode(), hashlib.sha256).hexdigest()
|
||||
|
||||
def _generate_device_id():
|
||||
"""
|
||||
Reversed from javascript library.
|
||||
JS function is generateUUID
|
||||
"""
|
||||
t = int(round(time.time() * 1000))
|
||||
e = "xxxxxxxx-xxxx-4xxx-xxxx-xxxxxxxxxxxx" # 4 seems to be interchangeable
|
||||
|
||||
def _replacer():
|
||||
n = int((t + 16 * random.random())) % 16 | 0
|
||||
return hex(n if "x" == e else 3 & n | 8)[2:]
|
||||
return "".join([_.replace('x', _replacer()) for _ in e])
|
||||
|
||||
token = self._download_json(
|
||||
'https://api.hotstar.com/um/v3/users',
|
||||
video_id, note='Downloading token',
|
||||
data=json.dumps({"device_ids": [{"id": compat_str(uuid.uuid4()), "type": "device_id"}]}).encode('utf-8'),
|
||||
headers={
|
||||
'hotstarauth': auth,
|
||||
'x-hs-platform': 'PCTV', # or 'web'
|
||||
'Content-Type': 'application/json',
|
||||
})['user_identity']
|
||||
if cookies and cookies.get('userUP'):
|
||||
token = cookies.get('userUP').value
|
||||
else:
|
||||
token = self._download_json(
|
||||
'https://api.hotstar.com/um/v3/users',
|
||||
video_id, note='Downloading token',
|
||||
data=json.dumps({"device_ids": [{"id": compat_str(uuid.uuid4()), "type": "device_id"}]}).encode('utf-8'),
|
||||
headers={
|
||||
'hotstarauth': auth,
|
||||
'x-hs-platform': 'PCTV', # or 'web'
|
||||
'Content-Type': 'application/json',
|
||||
})['user_identity']
|
||||
|
||||
response = self._download_json(
|
||||
'https://api.hotstar.com/' + path, video_id, headers={
|
||||
@@ -70,16 +59,19 @@ class HotStarBaseIE(InfoExtractor):
|
||||
return response['data']
|
||||
|
||||
def _call_api(self, path, video_id, query_name='contentId'):
|
||||
return self._call_api_impl(path, video_id, {
|
||||
return self._download_json('https://api.hotstar.com/' + path, video_id=video_id, query={
|
||||
query_name: video_id,
|
||||
'tas': 10000,
|
||||
}, headers={
|
||||
'x-country-code': 'IN',
|
||||
'x-platform-code': 'PCTV',
|
||||
})
|
||||
|
||||
def _call_api_v2(self, path, video_id):
|
||||
def _call_api_v2(self, path, video_id, st=None, cookies=None):
|
||||
return self._call_api_impl(
|
||||
'%s/content/%s' % (path, video_id), video_id, {
|
||||
'%s/content/%s' % (path, video_id), video_id, st=st, cookies=cookies, query={
|
||||
'desired-config': 'audio_channel:stereo|dynamic_range:sdr|encryption:plain|ladder:tv|package:dash|resolution:hd|subs-tag:HotstarVIP|video_codec:vp9',
|
||||
'device-id': compat_str(uuid.uuid4()),
|
||||
'device-id': cookies.get('device_id').value if cookies.get('device_id') else compat_str(uuid.uuid4()),
|
||||
'os-name': 'Windows',
|
||||
'os-version': '10',
|
||||
})
|
||||
@@ -88,15 +80,25 @@ class HotStarBaseIE(InfoExtractor):
|
||||
class HotStarIE(HotStarBaseIE):
|
||||
IE_NAME = 'hotstar'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:www\.)?hotstar\.com(?:/in)?/(?!in/)
|
||||
(?:
|
||||
tv/(?:[^/?#]+/){3}|
|
||||
(?!tv/)[^?#]+/
|
||||
)?
|
||||
(?P<id>\d{10})
|
||||
(?:
|
||||
hotstar\:|
|
||||
https?://(?:www\.)?hotstar\.com(?:/in)?/(?!in/)
|
||||
)
|
||||
(?:
|
||||
(?P<type>movies|sports|episode|(?P<tv>tv))
|
||||
(?:
|
||||
\:|
|
||||
/[^/?#]+/
|
||||
(?(tv)
|
||||
(?:[^/?#]+/){2}|
|
||||
(?:[^/?#]+/)*
|
||||
)
|
||||
)|
|
||||
[^/?#]+/
|
||||
)?
|
||||
(?P<id>\d{10})
|
||||
'''
|
||||
_TESTS = [{
|
||||
# contentData
|
||||
'url': 'https://www.hotstar.com/can-you-not-spread-rumours/1000076273',
|
||||
'info_dict': {
|
||||
'id': '1000076273',
|
||||
@@ -107,55 +109,89 @@ class HotStarIE(HotStarBaseIE):
|
||||
'upload_date': '20151111',
|
||||
'duration': 381,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
# contentDetail
|
||||
'url': 'hotstar:1000076273',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.hotstar.com/movies/radha-gopalam/1000057157',
|
||||
'info_dict': {
|
||||
'id': '1000057157',
|
||||
'ext': 'mp4',
|
||||
'title': 'Radha Gopalam',
|
||||
'description': 'md5:be3bc342cc120bbc95b3b0960e2b0d22',
|
||||
'timestamp': 1140805800,
|
||||
'upload_date': '20060224',
|
||||
'duration': 9182,
|
||||
},
|
||||
}, {
|
||||
'url': 'hotstar:movies:1000057157',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.hotstar.com/sports/cricket/rajitha-sizzles-on-debut-with-329/2001477583',
|
||||
'url': 'https://www.hotstar.com/in/sports/cricket/follow-the-blues-2021/recap-eng-fight-back-on-day-2/1260066104',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.hotstar.com/1000000515',
|
||||
'url': 'https://www.hotstar.com/in/sports/football/most-costly-pl-transfers-ft-grealish/1260065956',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# contentData
|
||||
'url': 'hotstar:sports:1260065956',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# contentData
|
||||
'url': 'hotstar:sports:1260066104',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# only available via api v2
|
||||
'url': 'https://www.hotstar.com/tv/ek-bhram-sarvagun-sampanna/s-2116/janhvi-targets-suman/1000234847',
|
||||
'info_dict': {
|
||||
'id': '1000234847',
|
||||
'ext': 'mp4',
|
||||
'title': 'Janhvi Targets Suman',
|
||||
'description': 'md5:78a85509348910bd1ca31be898c5796b',
|
||||
'timestamp': 1556670600,
|
||||
'upload_date': '20190501',
|
||||
'duration': 1219,
|
||||
'channel': 'StarPlus',
|
||||
'channel_id': 3,
|
||||
'series': 'Ek Bhram - Sarvagun Sampanna',
|
||||
'season': 'Chapter 1',
|
||||
'season_number': 1,
|
||||
'season_id': 6771,
|
||||
'episode': 'Janhvi Targets Suman',
|
||||
'episode_number': 8,
|
||||
},
|
||||
}, {
|
||||
'url': 'hotstar:episode:1000234847',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_GEO_BYPASS = False
|
||||
_TYPE = {
|
||||
'movies': 'movie',
|
||||
'sports': 'match',
|
||||
'episode': 'episode',
|
||||
'tv': 'episode',
|
||||
None: 'content',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
app_state = self._parse_json(self._search_regex(
|
||||
r'<script>window\.APP_STATE\s*=\s*({.+?})</script>',
|
||||
webpage, 'app state'), video_id)
|
||||
video_data = {}
|
||||
getters = list(
|
||||
lambda x, k=k: x['initialState']['content%s' % k]['content']
|
||||
for k in ('Data', 'Detail')
|
||||
)
|
||||
for v in app_state.values():
|
||||
content = try_get(v, getters, dict)
|
||||
if content and content.get('contentId') == video_id:
|
||||
video_data = content
|
||||
break
|
||||
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
video_type = mobj.group('type')
|
||||
cookies = self._get_cookies(url)
|
||||
video_type = self._TYPE.get(video_type, video_type)
|
||||
video_data = self._call_api(f'o/v1/{video_type}/detail', video_id)['body']['results']['item']
|
||||
title = video_data['title']
|
||||
|
||||
if not self.get_param('allow_unplayable_formats') and video_data.get('drmProtected'):
|
||||
raise ExtractorError('This video is DRM protected.', expected=True)
|
||||
|
||||
headers = {'Referer': url}
|
||||
headers = {'Referer': 'https://www.hotstar.com/in'}
|
||||
formats = []
|
||||
subs = {}
|
||||
geo_restricted = False
|
||||
_, urlh = self._download_webpage_handle('https://www.hotstar.com/in', video_id)
|
||||
# Required to fix https://github.com/yt-dlp/yt-dlp/issues/396
|
||||
st = urlh.headers.get('x-origin-date')
|
||||
# change to v2 in the future
|
||||
playback_sets = self._call_api_v2('play/v1/playback', video_id)['playBackSets']
|
||||
playback_sets = self._call_api_v2('play/v1/playback', video_id, st=st, cookies=cookies)['playBackSets']
|
||||
for playback_set in playback_sets:
|
||||
if not isinstance(playback_set, dict):
|
||||
continue
|
||||
@@ -170,13 +206,17 @@ class HotStarIE(HotStarBaseIE):
|
||||
ext = determine_ext(format_url)
|
||||
try:
|
||||
if 'package:hls' in tags or ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
hls_formats, hls_subs = self._extract_m3u8_formats_and_subtitles(
|
||||
format_url, video_id, 'mp4',
|
||||
entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', headers=headers))
|
||||
m3u8_id='hls', headers=headers)
|
||||
formats.extend(hls_formats)
|
||||
subs = self._merge_subtitles(subs, hls_subs)
|
||||
elif 'package:dash' in tags or ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
format_url, video_id, mpd_id='dash', headers=headers))
|
||||
dash_formats, dash_subs = self._extract_mpd_formats_and_subtitles(
|
||||
format_url, video_id, mpd_id='dash', headers=headers)
|
||||
formats.extend(dash_formats)
|
||||
subs = self._merge_subtitles(subs, dash_subs)
|
||||
elif ext == 'f4m':
|
||||
# produce broken files
|
||||
pass
|
||||
@@ -204,6 +244,7 @@ class HotStarIE(HotStarBaseIE):
|
||||
'duration': int_or_none(video_data.get('duration')),
|
||||
'timestamp': int_or_none(video_data.get('broadcastDate') or video_data.get('startDate')),
|
||||
'formats': formats,
|
||||
'subtitles': subs,
|
||||
'channel': video_data.get('channelName'),
|
||||
'channel_id': video_data.get('channelId'),
|
||||
'series': video_data.get('showName'),
|
||||
@@ -232,8 +273,7 @@ class HotStarPlaylistIE(HotStarBaseIE):
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
|
||||
collection = self._call_api('o/v1/tray/find', playlist_id, 'uqId')
|
||||
|
||||
collection = self._call_api('o/v1/tray/find', playlist_id, 'uqId')['body']['results']
|
||||
entries = [
|
||||
self.url_result(
|
||||
'https://www.hotstar.com/%s' % video['contentId'],
|
||||
@@ -246,7 +286,7 @@ class HotStarPlaylistIE(HotStarBaseIE):
|
||||
|
||||
class HotStarSeriesIE(HotStarBaseIE):
|
||||
IE_NAME = 'hotstar:series'
|
||||
_VALID_URL = r'(?:https?://)(?:www\.)?hotstar\.com(?:/in)?/tv/[^/]+/(?P<id>\d{10})$'
|
||||
_VALID_URL = r'(?:https?://)(?:www\.)?hotstar\.com(?:/in)?/tv/[^/]+/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.hotstar.com/in/tv/radhakrishn/1260000646',
|
||||
'info_dict': {
|
||||
@@ -259,6 +299,12 @@ class HotStarSeriesIE(HotStarBaseIE):
|
||||
'id': '1260050431',
|
||||
},
|
||||
'playlist_mincount': 43,
|
||||
}, {
|
||||
'url': 'https://www.hotstar.com/in/tv/mahabharat/435/',
|
||||
'info_dict': {
|
||||
'id': '435',
|
||||
},
|
||||
'playlist_mincount': 269,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -274,7 +320,7 @@ class HotStarSeriesIE(HotStarBaseIE):
|
||||
video_id=series_id, headers=headers)
|
||||
entries = [
|
||||
self.url_result(
|
||||
'https://www.hotstar.com/%d' % video['contentId'],
|
||||
'hotstar:episode:%d' % video['contentId'],
|
||||
ie=HotStarIE.ie_key(), video_id=video['contentId'])
|
||||
for video in item_json['body']['results']['items']
|
||||
if video.get('contentId')]
|
||||
|
||||
@@ -19,6 +19,7 @@ from ..utils import (
|
||||
std_headers,
|
||||
try_get,
|
||||
url_or_none,
|
||||
variadic,
|
||||
)
|
||||
|
||||
|
||||
@@ -188,26 +189,29 @@ class InstagramIE(InfoExtractor):
|
||||
uploader_id = media.get('owner', {}).get('username')
|
||||
|
||||
def get_count(keys, kind):
|
||||
if not isinstance(keys, (list, tuple)):
|
||||
keys = [keys]
|
||||
for key in keys:
|
||||
for key in variadic(keys):
|
||||
count = int_or_none(try_get(
|
||||
media, (lambda x: x['edge_media_%s' % key]['count'],
|
||||
lambda x: x['%ss' % kind]['count'])))
|
||||
if count is not None:
|
||||
return count
|
||||
|
||||
like_count = get_count('preview_like', 'like')
|
||||
comment_count = get_count(
|
||||
('preview_comment', 'to_comment', 'to_parent_comment'), 'comment')
|
||||
|
||||
comments = [{
|
||||
'author': comment.get('user', {}).get('username'),
|
||||
'author_id': comment.get('user', {}).get('id'),
|
||||
'id': comment.get('id'),
|
||||
'text': comment.get('text'),
|
||||
'timestamp': int_or_none(comment.get('created_at')),
|
||||
} for comment in media.get(
|
||||
'comments', {}).get('nodes', []) if comment.get('text')]
|
||||
comments = []
|
||||
for comment in try_get(media, lambda x: x['edge_media_to_parent_comment']['edges']):
|
||||
comment_dict = comment.get('node', {})
|
||||
comment_text = comment_dict.get('text')
|
||||
if comment_text:
|
||||
comments.append({
|
||||
'author': try_get(comment_dict, lambda x: x['owner']['username']),
|
||||
'author_id': try_get(comment_dict, lambda x: x['owner']['id']),
|
||||
'id': comment_dict.get('id'),
|
||||
'text': comment_text,
|
||||
'timestamp': int_or_none(comment_dict.get('created_at')),
|
||||
})
|
||||
if not video_url:
|
||||
edges = try_get(
|
||||
media, lambda x: x['edge_sidecar_to_children']['edges'],
|
||||
|
||||
@@ -1,191 +0,0 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class LiveLeakIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:\w+\.)?liveleak\.com/view\?.*?\b[it]=(?P<id>[\w_]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.liveleak.com/view?i=757_1364311680',
|
||||
'md5': '0813c2430bea7a46bf13acf3406992f4',
|
||||
'info_dict': {
|
||||
'id': '757_1364311680',
|
||||
'ext': 'mp4',
|
||||
'description': 'extremely bad day for this guy..!',
|
||||
'uploader': 'ljfriel2',
|
||||
'title': 'Most unlucky car accident',
|
||||
'thumbnail': r're:^https?://.*\.jpg$'
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.liveleak.com/view?i=f93_1390833151',
|
||||
'md5': 'd3f1367d14cc3c15bf24fbfbe04b9abf',
|
||||
'info_dict': {
|
||||
'id': 'f93_1390833151',
|
||||
'ext': 'mp4',
|
||||
'description': 'German Television Channel NDR does an exclusive interview with Edward Snowden.\r\nUploaded on LiveLeak cause German Television thinks the rest of the world isn\'t intereseted in Edward Snowden.',
|
||||
'uploader': 'ARD_Stinkt',
|
||||
'title': 'German Television does first Edward Snowden Interview (ENGLISH)',
|
||||
'thumbnail': r're:^https?://.*\.jpg$'
|
||||
}
|
||||
}, {
|
||||
# Prochan embed
|
||||
'url': 'http://www.liveleak.com/view?i=4f7_1392687779',
|
||||
'md5': '42c6d97d54f1db107958760788c5f48f',
|
||||
'info_dict': {
|
||||
'id': '4f7_1392687779',
|
||||
'ext': 'mp4',
|
||||
'description': "The guy with the cigarette seems amazingly nonchalant about the whole thing... I really hope my friends' reactions would be a bit stronger.\r\n\r\nAction-go to 0:55.",
|
||||
'uploader': 'CapObveus',
|
||||
'title': 'Man is Fatally Struck by Reckless Car While Packing up a Moving Truck',
|
||||
'age_limit': 18,
|
||||
},
|
||||
'skip': 'Video is dead',
|
||||
}, {
|
||||
# Covers https://github.com/ytdl-org/youtube-dl/pull/5983
|
||||
# Multiple resolutions
|
||||
'url': 'http://www.liveleak.com/view?i=801_1409392012',
|
||||
'md5': 'c3a449dbaca5c0d1825caecd52a57d7b',
|
||||
'info_dict': {
|
||||
'id': '801_1409392012',
|
||||
'ext': 'mp4',
|
||||
'description': 'Happened on 27.7.2014. \r\nAt 0:53 you can see people still swimming at near beach.',
|
||||
'uploader': 'bony333',
|
||||
'title': 'Crazy Hungarian tourist films close call waterspout in Croatia',
|
||||
'thumbnail': r're:^https?://.*\.jpg$'
|
||||
}
|
||||
}, {
|
||||
# Covers https://github.com/ytdl-org/youtube-dl/pull/10664#issuecomment-247439521
|
||||
'url': 'http://m.liveleak.com/view?i=763_1473349649',
|
||||
'add_ie': ['Youtube'],
|
||||
'info_dict': {
|
||||
'id': '763_1473349649',
|
||||
'ext': 'mp4',
|
||||
'title': 'Reporters and public officials ignore epidemic of black on asian violence in Sacramento | Colin Flaherty',
|
||||
'description': 'Colin being the warrior he is and showing the injustice Asians in Sacramento are being subjected to.',
|
||||
'uploader': 'Ziz',
|
||||
'upload_date': '20160908',
|
||||
'uploader_id': 'UCEbta5E_jqlZmEJsriTEtnw'
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.liveleak.com/view?i=677_1439397581',
|
||||
'info_dict': {
|
||||
'id': '677_1439397581',
|
||||
'title': 'Fuel Depot in China Explosion caught on video',
|
||||
},
|
||||
'playlist_count': 3,
|
||||
}, {
|
||||
'url': 'https://www.liveleak.com/view?t=HvHi_1523016227',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# No original video
|
||||
'url': 'https://www.liveleak.com/view?t=C26ZZ_1558612804',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return re.findall(
|
||||
r'<iframe[^>]+src="(https?://(?:\w+\.)?liveleak\.com/ll_embed\?[^"]*[ift]=[\w_]+[^"]+)"',
|
||||
webpage)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_title = self._og_search_title(webpage).replace('LiveLeak.com -', '').strip()
|
||||
video_description = self._og_search_description(webpage)
|
||||
video_uploader = self._html_search_regex(
|
||||
r'By:.*?(\w+)</a>', webpage, 'uploader', fatal=False)
|
||||
age_limit = int_or_none(self._search_regex(
|
||||
r'you confirm that you are ([0-9]+) years and over.',
|
||||
webpage, 'age limit', default=None))
|
||||
video_thumbnail = self._og_search_thumbnail(webpage)
|
||||
|
||||
entries = self._parse_html5_media_entries(url, webpage, video_id)
|
||||
if not entries:
|
||||
# Maybe an embed?
|
||||
embed_url = self._search_regex(
|
||||
r'<iframe[^>]+src="((?:https?:)?//(?:www\.)?(?:prochan|youtube)\.com/embed[^"]+)"',
|
||||
webpage, 'embed URL')
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': embed_url,
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
'description': video_description,
|
||||
'uploader': video_uploader,
|
||||
'age_limit': age_limit,
|
||||
}
|
||||
|
||||
for idx, info_dict in enumerate(entries):
|
||||
formats = []
|
||||
for a_format in info_dict['formats']:
|
||||
if not a_format.get('height'):
|
||||
a_format['height'] = int_or_none(self._search_regex(
|
||||
r'([0-9]+)p\.mp4', a_format['url'], 'height label',
|
||||
default=None))
|
||||
formats.append(a_format)
|
||||
|
||||
# Removing '.*.mp4' gives the raw video, which is essentially
|
||||
# the same video without the LiveLeak logo at the top (see
|
||||
# https://github.com/ytdl-org/youtube-dl/pull/4768)
|
||||
orig_url = re.sub(r'\.mp4\.[^.]+', '', a_format['url'])
|
||||
if a_format['url'] != orig_url:
|
||||
format_id = a_format.get('format_id')
|
||||
format_id = 'original' + ('-' + format_id if format_id else '')
|
||||
if self._is_valid_url(orig_url, video_id, format_id):
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': orig_url,
|
||||
'quality': 1,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
info_dict['formats'] = formats
|
||||
|
||||
# Don't append entry ID for one-video pages to keep backward compatibility
|
||||
if len(entries) > 1:
|
||||
info_dict['id'] = '%s_%s' % (video_id, idx + 1)
|
||||
else:
|
||||
info_dict['id'] = video_id
|
||||
|
||||
info_dict.update({
|
||||
'title': video_title,
|
||||
'description': video_description,
|
||||
'uploader': video_uploader,
|
||||
'age_limit': age_limit,
|
||||
'thumbnail': video_thumbnail,
|
||||
})
|
||||
|
||||
return self.playlist_result(entries, video_id, video_title)
|
||||
|
||||
|
||||
class LiveLeakEmbedIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?liveleak\.com/ll_embed\?.*?\b(?P<kind>[ift])=(?P<id>[\w_]+)'
|
||||
|
||||
# See generic.py for actual test cases
|
||||
_TESTS = [{
|
||||
'url': 'https://www.liveleak.com/ll_embed?i=874_1459135191',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.liveleak.com/ll_embed?f=ab065df993c1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
kind, video_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
if kind == 'f':
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
liveleak_url = self._search_regex(
|
||||
r'(?:logourl\s*:\s*|window\.open\()(?P<q1>[\'"])(?P<url>%s)(?P=q1)' % LiveLeakIE._VALID_URL,
|
||||
webpage, 'LiveLeak URL', group='url')
|
||||
else:
|
||||
liveleak_url = 'http://www.liveleak.com/view?%s=%s' % (kind, video_id)
|
||||
|
||||
return self.url_result(liveleak_url, ie=LiveLeakIE.ie_key())
|
||||
@@ -30,20 +30,20 @@ class MediasetIE(ThePlatformBaseIE):
|
||||
'''
|
||||
_TESTS = [{
|
||||
# full episode
|
||||
'url': 'https://www.mediasetplay.mediaset.it/video/hellogoodbye/quarta-puntata_FAFU000000661824',
|
||||
'md5': '9b75534d42c44ecef7bf1ffeacb7f85d',
|
||||
'url': 'https://www.mediasetplay.mediaset.it/video/mrwronglezionidamore/episodio-1_F310575103000102',
|
||||
'md5': 'a7e75c6384871f322adb781d3bd72c26',
|
||||
'info_dict': {
|
||||
'id': 'FAFU000000661824',
|
||||
'id': 'F310575103000102',
|
||||
'ext': 'mp4',
|
||||
'title': 'Quarta puntata',
|
||||
'title': 'Episodio 1',
|
||||
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 1414.26,
|
||||
'upload_date': '20161107',
|
||||
'series': 'Hello Goodbye',
|
||||
'timestamp': 1478532900,
|
||||
'uploader': 'Rete 4',
|
||||
'uploader_id': 'R4',
|
||||
'duration': 2682.0,
|
||||
'upload_date': '20210530',
|
||||
'series': 'Mr Wrong - Lezioni d\'amore',
|
||||
'timestamp': 1622413946,
|
||||
'uploader': 'Canale 5',
|
||||
'uploader_id': 'C5',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.mediasetplay.mediaset.it/video/matrix/puntata-del-25-maggio_F309013801000501',
|
||||
@@ -54,10 +54,10 @@ class MediasetIE(ThePlatformBaseIE):
|
||||
'title': 'Puntata del 25 maggio',
|
||||
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 6565.007,
|
||||
'upload_date': '20180526',
|
||||
'duration': 6565.008,
|
||||
'upload_date': '20200903',
|
||||
'series': 'Matrix',
|
||||
'timestamp': 1527326245,
|
||||
'timestamp': 1599172492,
|
||||
'uploader': 'Canale 5',
|
||||
'uploader_id': 'C5',
|
||||
},
|
||||
@@ -135,36 +135,38 @@ class MediasetIE(ThePlatformBaseIE):
|
||||
formats = []
|
||||
subtitles = {}
|
||||
first_e = None
|
||||
for asset_type in ('SD', 'HD'):
|
||||
# TODO: fixup ISM+none manifest URLs
|
||||
for f in ('MPEG4', 'MPEG-DASH+none', 'M3U+none'):
|
||||
try:
|
||||
tp_formats, tp_subtitles = self._extract_theplatform_smil(
|
||||
update_url_query('http://link.theplatform.%s/s/%s' % (self._TP_TLD, tp_path), {
|
||||
'mbr': 'true',
|
||||
'formats': f,
|
||||
'assetTypes': asset_type,
|
||||
}), guid, 'Downloading %s %s SMIL data' % (f.split('+')[0], asset_type))
|
||||
except ExtractorError as e:
|
||||
if not first_e:
|
||||
first_e = e
|
||||
break
|
||||
for tp_f in tp_formats:
|
||||
tp_f['quality'] = 1 if asset_type == 'HD' else 0
|
||||
formats.extend(tp_formats)
|
||||
subtitles = self._merge_subtitles(subtitles, tp_subtitles)
|
||||
asset_type = 'HD,browser,geoIT|SD,browser,geoIT|geoNo:HD,browser,geoIT|geoNo:SD,browser,geoIT|geoNo'
|
||||
# TODO: fixup ISM+none manifest URLs
|
||||
for f in ('MPEG4', 'MPEG-DASH+none', 'M3U+none'):
|
||||
try:
|
||||
tp_formats, tp_subtitles = self._extract_theplatform_smil(
|
||||
update_url_query('http://link.theplatform.%s/s/%s' % (self._TP_TLD, tp_path), {
|
||||
'mbr': 'true',
|
||||
'formats': f,
|
||||
'assetTypes': asset_type,
|
||||
}), guid, 'Downloading %s SMIL data' % (f.split('+')[0]))
|
||||
except ExtractorError as e:
|
||||
if not first_e:
|
||||
first_e = e
|
||||
break
|
||||
formats.extend(tp_formats)
|
||||
subtitles = self._merge_subtitles(subtitles, tp_subtitles)
|
||||
if first_e and not formats:
|
||||
raise first_e
|
||||
self._sort_formats(formats)
|
||||
|
||||
fields = []
|
||||
for templ, repls in (('tvSeason%sNumber', ('', 'Episode')), ('mediasetprogram$%s', ('brandTitle', 'numberOfViews', 'publishInfo'))):
|
||||
fields.extend(templ % repl for repl in repls)
|
||||
feed_data = self._download_json(
|
||||
'https://feed.entertainment.tv.theplatform.eu/f/PR1GhC/mediaset-prod-all-programs/guid/-/' + guid,
|
||||
guid, fatal=False, query={'fields': ','.join(fields)})
|
||||
'https://feed.entertainment.tv.theplatform.eu/f/PR1GhC/mediaset-prod-all-programs-v2/guid/-/' + guid,
|
||||
guid, fatal=False)
|
||||
if feed_data:
|
||||
publish_info = feed_data.get('mediasetprogram$publishInfo') or {}
|
||||
thumbnails = feed_data.get('thumbnails') or {}
|
||||
thumbnail = None
|
||||
for key, value in thumbnails.items():
|
||||
if key.startswith('image_keyframe_poster-'):
|
||||
thumbnail = value.get('url')
|
||||
break
|
||||
|
||||
info.update({
|
||||
'episode_number': int_or_none(feed_data.get('tvSeasonEpisodeNumber')),
|
||||
'season_number': int_or_none(feed_data.get('tvSeasonNumber')),
|
||||
@@ -172,6 +174,7 @@ class MediasetIE(ThePlatformBaseIE):
|
||||
'uploader': publish_info.get('description'),
|
||||
'uploader_id': publish_info.get('channel'),
|
||||
'view_count': int_or_none(feed_data.get('mediasetprogram$numberOfViews')),
|
||||
'thumbnail': thumbnail,
|
||||
})
|
||||
|
||||
info.update({
|
||||
|
||||
@@ -122,6 +122,52 @@ class MediasiteIE(InfoExtractor):
|
||||
r'(?xi)<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:(?:https?:)?//[^/]+)?/Mediasite/Play/%s(?:\?.*?)?)\1' % _ID_RE,
|
||||
webpage)]
|
||||
|
||||
def __extract_slides(self, *, stream_id, snum, Stream, duration, images):
|
||||
slide_base_url = Stream['SlideBaseUrl']
|
||||
|
||||
fname_template = Stream['SlideImageFileNameTemplate']
|
||||
if fname_template != 'slide_{0:D4}.jpg':
|
||||
self.report_warning('Unusual slide file name template; report a bug if slide downloading fails')
|
||||
fname_template = re.sub(r'\{0:D([0-9]+)\}', r'{0:0\1}', fname_template)
|
||||
|
||||
fragments = []
|
||||
for i, slide in enumerate(Stream['Slides']):
|
||||
if i == 0:
|
||||
if slide['Time'] > 0:
|
||||
default_slide = images.get('DefaultSlide')
|
||||
if default_slide is None:
|
||||
default_slide = images.get('DefaultStreamImage')
|
||||
if default_slide is not None:
|
||||
default_slide = default_slide['ImageFilename']
|
||||
if default_slide is not None:
|
||||
fragments.append({
|
||||
'path': default_slide,
|
||||
'duration': slide['Time'] / 1000,
|
||||
})
|
||||
|
||||
next_time = try_get(None, [
|
||||
lambda _: Stream['Slides'][i + 1]['Time'],
|
||||
lambda _: duration,
|
||||
lambda _: slide['Time'],
|
||||
], expected_type=(int, float))
|
||||
|
||||
fragments.append({
|
||||
'path': fname_template.format(slide.get('Number', i + 1)),
|
||||
'duration': (next_time - slide['Time']) / 1000
|
||||
})
|
||||
|
||||
return {
|
||||
'format_id': '%s-%u.slides' % (stream_id, snum),
|
||||
'ext': 'mhtml',
|
||||
'url': slide_base_url,
|
||||
'protocol': 'mhtml',
|
||||
'acodec': 'none',
|
||||
'vcodec': 'none',
|
||||
'format_note': 'Slides',
|
||||
'fragments': fragments,
|
||||
'fragment_base_url': slide_base_url,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, data = unsmuggle_url(url, {})
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
@@ -198,10 +244,15 @@ class MediasiteIE(InfoExtractor):
|
||||
'ext': mimetype2ext(VideoUrl.get('MimeType')),
|
||||
})
|
||||
|
||||
# TODO: if Stream['HasSlideContent']:
|
||||
# synthesise an MJPEG video stream '%s-%u.slides' % (stream_type, snum)
|
||||
# from Stream['Slides']
|
||||
# this will require writing a custom downloader...
|
||||
if Stream.get('HasSlideContent', False):
|
||||
images = player_options['PlayerLayoutOptions']['Images']
|
||||
stream_formats.append(self.__extract_slides(
|
||||
stream_id=stream_id,
|
||||
snum=snum,
|
||||
Stream=Stream,
|
||||
duration=presentation.get('Duration'),
|
||||
images=images,
|
||||
))
|
||||
|
||||
# disprefer 'secondary' streams
|
||||
if stream_type != 0:
|
||||
|
||||
134
yt_dlp/extractor/mirrativ.py
Normal file
134
yt_dlp/extractor/mirrativ.py
Normal file
@@ -0,0 +1,134 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
dict_get,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class MirrativBaseIE(InfoExtractor):
|
||||
def assert_error(self, response):
|
||||
error_message = traverse_obj(response, ('status', 'error'))
|
||||
if error_message:
|
||||
raise ExtractorError('Mirrativ says: %s' % error_message, expected=True)
|
||||
|
||||
|
||||
class MirrativIE(MirrativBaseIE):
|
||||
IE_NAME = 'mirrativ'
|
||||
_VALID_URL = r'https?://(?:www\.)?mirrativ\.com/live/(?P<id>[^/?#&]+)'
|
||||
LIVE_API_URL = 'https://www.mirrativ.com/api/live/live?live_id=%s'
|
||||
|
||||
TESTS = [{
|
||||
'url': 'https://mirrativ.com/live/POxyuG1KmW2982lqlDTuPw',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage('https://www.mirrativ.com/live/%s' % video_id, video_id)
|
||||
live_response = self._download_json(self.LIVE_API_URL % video_id, video_id)
|
||||
self.assert_error(live_response)
|
||||
|
||||
hls_url = dict_get(live_response, ('archive_url_hls', 'streaming_url_hls'))
|
||||
is_live = bool(live_response.get('is_live'))
|
||||
was_live = bool(live_response.get('is_archive'))
|
||||
if not hls_url:
|
||||
raise ExtractorError('Neither archive nor live is available.', expected=True)
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
hls_url, video_id,
|
||||
ext='mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', live=is_live)
|
||||
rtmp_url = live_response.get('streaming_url_edge')
|
||||
if rtmp_url:
|
||||
keys_to_copy = ('width', 'height', 'vcodec', 'acodec', 'tbr')
|
||||
fmt = {
|
||||
'format_id': 'rtmp',
|
||||
'url': rtmp_url,
|
||||
'protocol': 'rtmp',
|
||||
'ext': 'mp4',
|
||||
}
|
||||
fmt.update({k: traverse_obj(formats, (0, k)) for k in keys_to_copy})
|
||||
formats.append(fmt)
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = self._og_search_title(webpage, default=None) or self._search_regex(
|
||||
r'<title>\s*(.+?) - Mirrativ\s*</title>', webpage) or live_response.get('title')
|
||||
description = live_response.get('description')
|
||||
thumbnail = live_response.get('image_url')
|
||||
|
||||
duration = try_get(live_response, lambda x: x['ended_at'] - x['started_at'])
|
||||
view_count = live_response.get('total_viewer_num')
|
||||
release_timestamp = live_response.get('started_at')
|
||||
timestamp = live_response.get('created_at')
|
||||
|
||||
owner = live_response.get('owner', {})
|
||||
uploader = owner.get('name')
|
||||
uploader_id = owner.get('user_id')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'is_live': is_live,
|
||||
'description': description,
|
||||
'formats': formats,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'release_timestamp': release_timestamp,
|
||||
'timestamp': timestamp,
|
||||
'was_live': was_live,
|
||||
}
|
||||
|
||||
|
||||
class MirrativUserIE(MirrativBaseIE):
|
||||
IE_NAME = 'mirrativ:user'
|
||||
_VALID_URL = r'https?://(?:www\.)?mirrativ\.com/user/(?P<id>\d+)'
|
||||
LIVE_HISTORY_API_URL = 'https://www.mirrativ.com/api/live/live_history?user_id=%s&page=%d'
|
||||
USER_INFO_API_URL = 'https://www.mirrativ.com/api/user/profile?user_id=%s'
|
||||
|
||||
_TESTS = [{
|
||||
# Live archive is available up to 3 days
|
||||
# see: https://helpfeel.com/mirrativ/%E9%8C%B2%E7%94%BB-5e26d3ad7b59ef0017fb49ac (Japanese)
|
||||
'url': 'https://www.mirrativ.com/user/110943130',
|
||||
'note': 'multiple archives available',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _entries(self, user_id):
|
||||
page = 1
|
||||
while page is not None:
|
||||
api_response = self._download_json(
|
||||
self.LIVE_HISTORY_API_URL % (user_id, page), user_id,
|
||||
note='Downloading page %d' % page)
|
||||
self.assert_error(api_response)
|
||||
lives = api_response.get('lives')
|
||||
if not lives:
|
||||
break
|
||||
for live in lives:
|
||||
if not live.get('is_archive') and not live.get('is_live'):
|
||||
# neither archive nor live is available, so skip it
|
||||
# or the service will ban your IP address for a while
|
||||
continue
|
||||
live_id = live.get('live_id')
|
||||
url = 'https://www.mirrativ.com/live/%s' % live_id
|
||||
yield self.url_result(url, video_id=live_id, video_title=live.get('title'))
|
||||
page = api_response.get('next_page')
|
||||
|
||||
def _real_extract(self, url):
|
||||
user_id = self._match_id(url)
|
||||
user_info = self._download_json(
|
||||
self.USER_INFO_API_URL % user_id, user_id,
|
||||
note='Downloading user info', fatal=False)
|
||||
self.assert_error(user_info)
|
||||
|
||||
uploader = user_info.get('name')
|
||||
description = user_info.get('description')
|
||||
|
||||
entries = self._entries(user_id)
|
||||
return self.playlist_result(entries, user_id, uploader, description)
|
||||
@@ -249,6 +249,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
||||
if info:
|
||||
entries.append(info)
|
||||
|
||||
# TODO: should be multi-video
|
||||
return self.playlist_result(
|
||||
entries, playlist_title=title, playlist_description=description)
|
||||
|
||||
|
||||
@@ -110,10 +110,15 @@ class MxplayerIE(InfoExtractor):
|
||||
for frmt in dash_formats:
|
||||
frmt['quality'] = get_quality(quality)
|
||||
formats.extend(dash_formats)
|
||||
dash_formats_h265 = self._extract_mpd_formats(
|
||||
format_url.replace('h264_high', 'h265_main'), video_id, mpd_id='dash-%s' % quality, headers={'Referer': url}, fatal=False)
|
||||
for frmt in dash_formats_h265:
|
||||
frmt['quality'] = get_quality(quality)
|
||||
formats.extend(dash_formats_h265)
|
||||
elif stream_type == 'hls':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, video_id, fatal=False,
|
||||
m3u8_id='hls-%s' % quality, quality=get_quality(quality)))
|
||||
m3u8_id='hls-%s' % quality, quality=get_quality(quality), ext='mp4'))
|
||||
|
||||
self._sort_formats(formats)
|
||||
return {
|
||||
|
||||
@@ -12,6 +12,7 @@ from ..utils import (
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
parse_duration,
|
||||
RegexNotFoundError,
|
||||
smuggle_url,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
@@ -460,7 +461,7 @@ class NBCNewsIE(ThePlatformIE):
|
||||
|
||||
class NBCOlympicsIE(InfoExtractor):
|
||||
IE_NAME = 'nbcolympics'
|
||||
_VALID_URL = r'https?://www\.nbcolympics\.com/video/(?P<id>[a-z-]+)'
|
||||
_VALID_URL = r'https?://www\.nbcolympics\.com/videos?/(?P<id>[0-9a-z-]+)'
|
||||
|
||||
_TEST = {
|
||||
# Geo-restricted to US
|
||||
@@ -483,13 +484,18 @@ class NBCOlympicsIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
drupal_settings = self._parse_json(self._search_regex(
|
||||
r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
|
||||
webpage, 'drupal settings'), display_id)
|
||||
try:
|
||||
drupal_settings = self._parse_json(self._search_regex(
|
||||
r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
|
||||
webpage, 'drupal settings'), display_id)
|
||||
|
||||
iframe_url = drupal_settings['vod']['iframe_url']
|
||||
theplatform_url = iframe_url.replace(
|
||||
'vplayer.nbcolympics.com', 'player.theplatform.com')
|
||||
iframe_url = drupal_settings['vod']['iframe_url']
|
||||
theplatform_url = iframe_url.replace(
|
||||
'vplayer.nbcolympics.com', 'player.theplatform.com')
|
||||
except RegexNotFoundError:
|
||||
theplatform_url = self._search_regex(
|
||||
r"([\"'])embedUrl\1: *([\"'])(?P<embedUrl>.+)\2",
|
||||
webpage, 'embedding URL', group="embedUrl")
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
@@ -502,43 +508,79 @@ class NBCOlympicsIE(InfoExtractor):
|
||||
class NBCOlympicsStreamIE(AdobePassIE):
|
||||
IE_NAME = 'nbcolympics:stream'
|
||||
_VALID_URL = r'https?://stream\.nbcolympics\.com/(?P<id>[0-9a-z-]+)'
|
||||
_TEST = {
|
||||
'url': 'http://stream.nbcolympics.com/2018-winter-olympics-nbcsn-evening-feb-8',
|
||||
'info_dict': {
|
||||
'id': '203493',
|
||||
'ext': 'mp4',
|
||||
'title': 're:Curling, Alpine, Luge [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
_TESTS = [
|
||||
{
|
||||
'note': 'Tokenized m3u8 source URL',
|
||||
'url': 'https://stream.nbcolympics.com/womens-soccer-group-round-11',
|
||||
'info_dict': {
|
||||
'id': '2019740',
|
||||
'ext': 'mp4',
|
||||
'title': r"re:Women's Group Stage - Netherlands vs\. Brazil [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$",
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
}, {
|
||||
'note': 'Plain m3u8 source URL',
|
||||
'url': 'https://stream.nbcolympics.com/gymnastics-event-finals-mens-floor-pommel-horse-womens-vault-bars',
|
||||
'info_dict': {
|
||||
'id': '2021729',
|
||||
'ext': 'mp4',
|
||||
'title': r're:Event Finals: M Floor, W Vault, M Pommel, W Uneven Bars [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
_DATA_URL_TEMPLATE = 'http://stream.nbcolympics.com/data/%s_%s.json'
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
pid = self._search_regex(r'pid\s*=\s*(\d+);', webpage, 'pid')
|
||||
resource = self._search_regex(
|
||||
r"resource\s*=\s*'(.+)';", webpage,
|
||||
'resource').replace("' + pid + '", pid)
|
||||
|
||||
event_config = self._download_json(
|
||||
self._DATA_URL_TEMPLATE % ('event_config', pid),
|
||||
pid)['eventConfig']
|
||||
title = self._live_title(event_config['eventTitle'])
|
||||
f'http://stream.nbcolympics.com/data/event_config_{pid}.json',
|
||||
pid, 'Downloading event config')['eventConfig']
|
||||
|
||||
title = event_config['eventTitle']
|
||||
is_live = {'live': True, 'replay': False}.get(event_config.get('eventStatus'))
|
||||
if is_live:
|
||||
title = self._live_title(title)
|
||||
|
||||
source_url = self._download_json(
|
||||
self._DATA_URL_TEMPLATE % ('live_sources', pid),
|
||||
pid)['videoSources'][0]['sourceUrl']
|
||||
media_token = self._extract_mvpd_auth(
|
||||
url, pid, event_config.get('requestorId', 'NBCOlympics'), resource)
|
||||
formats = self._extract_m3u8_formats(self._download_webpage(
|
||||
'http://sp.auth.adobe.com/tvs/v1/sign', pid, query={
|
||||
'cdn': 'akamai',
|
||||
'mediaToken': base64.b64encode(media_token.encode()),
|
||||
'resource': base64.b64encode(resource.encode()),
|
||||
'url': source_url,
|
||||
}), pid, 'mp4')
|
||||
f'https://api-leap.nbcsports.com/feeds/assets/{pid}?application=NBCOlympics&platform=desktop&format=nbc-player&env=staging',
|
||||
pid, 'Downloading leap config'
|
||||
)['videoSources'][0]['cdnSources']['primary'][0]['sourceUrl']
|
||||
|
||||
if event_config.get('cdnToken'):
|
||||
ap_resource = self._get_mvpd_resource(
|
||||
event_config.get('resourceId', 'NBCOlympics'),
|
||||
re.sub(r'[^\w\d ]+', '', event_config['eventTitle']), pid,
|
||||
event_config.get('ratingId', 'NO VALUE'))
|
||||
media_token = self._extract_mvpd_auth(url, pid, event_config.get('requestorId', 'NBCOlympics'), ap_resource)
|
||||
|
||||
source_url = self._download_json(
|
||||
'https://tokens.playmakerservices.com/', pid, 'Retrieving tokenized URL',
|
||||
data=json.dumps({
|
||||
'application': 'NBCSports',
|
||||
'authentication-type': 'adobe-pass',
|
||||
'cdn': 'akamai',
|
||||
'pid': pid,
|
||||
'platform': 'desktop',
|
||||
'requestorId': 'NBCOlympics',
|
||||
'resourceId': base64.b64encode(ap_resource.encode()).decode(),
|
||||
'token': base64.b64encode(media_token.encode()).decode(),
|
||||
'url': source_url,
|
||||
'version': 'v1',
|
||||
}).encode(),
|
||||
)['akamai'][0]['tokenizedUrl']
|
||||
|
||||
formats = self._extract_m3u8_formats(source_url, pid, 'mp4', live=is_live)
|
||||
for f in formats:
|
||||
# -http_seekable requires ffmpeg 4.3+ but it doesnt seem possible to
|
||||
# download with ffmpeg without this option
|
||||
f['_ffmpeg_args'] = ['-seekable', '0', '-http_seekable', '0', '-icy', '0']
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
@@ -546,5 +588,5 @@ class NBCOlympicsStreamIE(AdobePassIE):
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'is_live': True,
|
||||
'is_live': is_live,
|
||||
}
|
||||
|
||||
@@ -2,9 +2,11 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import time
|
||||
|
||||
from urllib.error import HTTPError
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..compat import compat_str, compat_urllib_parse_unquote, compat_urllib_parse_quote
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
parse_iso8601,
|
||||
@@ -78,7 +80,9 @@ class NebulaIE(InfoExtractor):
|
||||
]
|
||||
_NETRC_MACHINE = 'watchnebula'
|
||||
|
||||
def _retrieve_nebula_auth(self, video_id):
|
||||
_nebula_token = None
|
||||
|
||||
def _retrieve_nebula_auth(self):
|
||||
"""
|
||||
Log in to Nebula, and returns a Nebula API token
|
||||
"""
|
||||
@@ -91,7 +95,7 @@ class NebulaIE(InfoExtractor):
|
||||
data = json.dumps({'email': username, 'password': password}).encode('utf8')
|
||||
response = self._download_json(
|
||||
'https://api.watchnebula.com/api/v1/auth/login/',
|
||||
data=data, fatal=False, video_id=video_id,
|
||||
data=data, fatal=False, video_id=None,
|
||||
headers={
|
||||
'content-type': 'application/json',
|
||||
# Submitting the 'sessionid' cookie always causes a 403 on auth endpoint
|
||||
@@ -101,6 +105,19 @@ class NebulaIE(InfoExtractor):
|
||||
errnote='Authentication failed or rejected')
|
||||
if not response or not response.get('key'):
|
||||
self.raise_login_required()
|
||||
|
||||
# save nebula token as cookie
|
||||
self._set_cookie(
|
||||
'nebula.app', 'nebula-auth',
|
||||
compat_urllib_parse_quote(
|
||||
json.dumps({
|
||||
"apiToken": response["key"],
|
||||
"isLoggingIn": False,
|
||||
"isLoggingOut": False,
|
||||
}, separators=(",", ":"))),
|
||||
expire_time=int(time.time()) + 86400 * 365,
|
||||
)
|
||||
|
||||
return response['key']
|
||||
|
||||
def _retrieve_zype_api_key(self, page_url, display_id):
|
||||
@@ -139,8 +156,17 @@ class NebulaIE(InfoExtractor):
|
||||
'Authorization': 'Token {access_token}'.format(access_token=access_token)
|
||||
}, note=note)
|
||||
|
||||
def _fetch_zype_access_token(self, video_id, nebula_token):
|
||||
user_object = self._call_nebula_api('/auth/user/', video_id, nebula_token, note='Retrieving Zype access token')
|
||||
def _fetch_zype_access_token(self, video_id):
|
||||
try:
|
||||
user_object = self._call_nebula_api('/auth/user/', video_id, self._nebula_token, note='Retrieving Zype access token')
|
||||
except ExtractorError as exc:
|
||||
# if 401, attempt credential auth and retry
|
||||
if exc.cause and isinstance(exc.cause, HTTPError) and exc.cause.code == 401:
|
||||
self._nebula_token = self._retrieve_nebula_auth()
|
||||
user_object = self._call_nebula_api('/auth/user/', video_id, self._nebula_token, note='Retrieving Zype access token')
|
||||
else:
|
||||
raise
|
||||
|
||||
access_token = try_get(user_object, lambda x: x['zype_auth_info']['access_token'], compat_str)
|
||||
if not access_token:
|
||||
if try_get(user_object, lambda x: x['is_subscribed'], bool):
|
||||
@@ -162,9 +188,21 @@ class NebulaIE(InfoExtractor):
|
||||
if category.get('value'):
|
||||
return category['value'][0]
|
||||
|
||||
def _real_initialize(self):
|
||||
# check cookie jar for valid token
|
||||
nebula_cookies = self._get_cookies('https://nebula.app')
|
||||
nebula_cookie = nebula_cookies.get('nebula-auth')
|
||||
if nebula_cookie:
|
||||
self.to_screen('Authenticating to Nebula with token from cookie jar')
|
||||
nebula_cookie_value = compat_urllib_parse_unquote(nebula_cookie.value)
|
||||
self._nebula_token = self._parse_json(nebula_cookie_value, None).get('apiToken')
|
||||
|
||||
# try to authenticate using credentials if no valid token has been found
|
||||
if not self._nebula_token:
|
||||
self._nebula_token = self._retrieve_nebula_auth()
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
nebula_token = self._retrieve_nebula_auth(display_id)
|
||||
api_key = self._retrieve_zype_api_key(url, display_id)
|
||||
|
||||
response = self._call_zype_api('/videos', {'friendly_title': display_id},
|
||||
@@ -174,7 +212,7 @@ class NebulaIE(InfoExtractor):
|
||||
video_meta = response['response'][0]
|
||||
|
||||
video_id = video_meta['_id']
|
||||
zype_access_token = self._fetch_zype_access_token(display_id, nebula_token=nebula_token)
|
||||
zype_access_token = self._fetch_zype_access_token(display_id)
|
||||
|
||||
channel_title = self._extract_channel_title(video_meta)
|
||||
|
||||
@@ -187,13 +225,12 @@ class NebulaIE(InfoExtractor):
|
||||
'title': video_meta.get('title'),
|
||||
'description': video_meta.get('description'),
|
||||
'timestamp': parse_iso8601(video_meta.get('published_at')),
|
||||
'thumbnails': [
|
||||
{
|
||||
'id': tn.get('name'), # this appears to be null
|
||||
'url': tn['url'],
|
||||
'width': tn.get('width'),
|
||||
'height': tn.get('height'),
|
||||
} for tn in video_meta.get('thumbnails', [])],
|
||||
'thumbnails': [{
|
||||
'id': tn.get('name'), # this appears to be null
|
||||
'url': tn['url'],
|
||||
'width': tn.get('width'),
|
||||
'height': tn.get('height'),
|
||||
} for tn in video_meta.get('thumbnails', [])],
|
||||
'duration': video_meta.get('duration'),
|
||||
'channel': channel_title,
|
||||
'uploader': channel_title, # we chose uploader = channel name
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user