mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-12-08 23:22:49 +01:00
Compare commits
157 Commits
2021.02.24
...
2021.04.11
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a0f30f194a | ||
|
|
b31fdeedfd | ||
|
|
8fa43c73d8 | ||
|
|
56d868dbb7 | ||
|
|
f4f751af40 | ||
|
|
1988fab7e3 | ||
|
|
9de3ea3126 | ||
|
|
e01d6aa435 | ||
|
|
f7ad71607d | ||
|
|
68379de561 | ||
|
|
d9aa233295 | ||
|
|
f37468c41f | ||
|
|
52a8a1e1b9 | ||
|
|
d818eb7473 | ||
|
|
f8d4ad9ab0 | ||
|
|
3ffc7c89b0 | ||
|
|
f1823403b0 | ||
|
|
384fb069ec | ||
|
|
a4ddaf231e | ||
|
|
7e60c06925 | ||
|
|
d92f5d5a90 | ||
|
|
9e62f283ff | ||
|
|
c24ce07a84 | ||
|
|
de6758128e | ||
|
|
73d4343e39 | ||
|
|
57d104424f | ||
|
|
02aabd45d0 | ||
|
|
39ed931e53 | ||
|
|
b28f8d244a | ||
|
|
73cd218f5a | ||
|
|
84601bb72b | ||
|
|
54df8fc5b2 | ||
|
|
5d39972ed0 | ||
|
|
0481374e1d | ||
|
|
eff635394a | ||
|
|
df0c81513e | ||
|
|
3f6a90eb63 | ||
|
|
b050d210df | ||
|
|
f4e4be19f0 | ||
|
|
cce889b900 | ||
|
|
a6ae61a4c2 | ||
|
|
b23b9eefd9 | ||
|
|
a2f0b0c672 | ||
|
|
b704fc1a68 | ||
|
|
a3affbe6a0 | ||
|
|
1418a0437f | ||
|
|
143db31d48 | ||
|
|
3700c7ef10 | ||
|
|
498f560638 | ||
|
|
394dcd4486 | ||
|
|
83b20a970d | ||
|
|
e1feb88fdf | ||
|
|
389b9dbbcc | ||
|
|
a7f347d9c9 | ||
|
|
421a459573 | ||
|
|
c224251aad | ||
|
|
037cc66ec8 | ||
|
|
9160a0c6a2 | ||
|
|
5c5fae6d2f | ||
|
|
c1d3a4a8f0 | ||
|
|
adc74b3c6d | ||
|
|
beb4b92a66 | ||
|
|
cd9b384cc3 | ||
|
|
4d971a16b8 | ||
|
|
3561530776 | ||
|
|
4690688658 | ||
|
|
fe845284c4 | ||
|
|
2b3bf01c90 | ||
|
|
23c1a66730 | ||
|
|
dd18a58cb1 | ||
|
|
a94bfd6cfe | ||
|
|
a515a78dd3 | ||
|
|
e167860ce7 | ||
|
|
75d43ca080 | ||
|
|
5226731e2d | ||
|
|
dcf64d43e0 | ||
|
|
e3c076970e | ||
|
|
7978e172f3 | ||
|
|
605d299f83 | ||
|
|
18c1f04362 | ||
|
|
e4beae703d | ||
|
|
d034ab669c | ||
|
|
5aeefbd633 | ||
|
|
597c18665e | ||
|
|
10db0d2f57 | ||
|
|
7275535116 | ||
|
|
a1c5d2ca64 | ||
|
|
ca87974543 | ||
|
|
e92caff5d5 | ||
|
|
ea3a012d2a | ||
|
|
5b8917fb52 | ||
|
|
8eec0120a2 | ||
|
|
4cf1e5d2f9 | ||
|
|
0a473f2f0f | ||
|
|
e4edeb6226 | ||
|
|
d488e254d9 | ||
|
|
d7009caa03 | ||
|
|
54759df586 | ||
|
|
605b684c2d | ||
|
|
994443d24d | ||
|
|
c5640c4508 | ||
|
|
1f52a09e2e | ||
|
|
fc21af505c | ||
|
|
015f3b3120 | ||
|
|
5ba4a0b69c | ||
|
|
0852947fcc | ||
|
|
99594a11ce | ||
|
|
2be71994c0 | ||
|
|
26fe8ffed0 | ||
|
|
feee67ae88 | ||
|
|
1caaf92d47 | ||
|
|
d069eca7a3 | ||
|
|
f3eaa8dd1c | ||
|
|
9e631877f8 | ||
|
|
36147a63e3 | ||
|
|
57db6a87ef | ||
|
|
cd7c66cf01 | ||
|
|
2c736b4f61 | ||
|
|
c4a508ab31 | ||
|
|
7815e55572 | ||
|
|
162e6f0000 | ||
|
|
a8278ababd | ||
|
|
bd9ed42387 | ||
|
|
5f7514957f | ||
|
|
3721515bde | ||
|
|
a5c5623470 | ||
|
|
c705177da2 | ||
|
|
d6e51845b7 | ||
|
|
da7f321e93 | ||
|
|
097b056c5a | ||
|
|
f3b737ed19 | ||
|
|
ee1e05581e | ||
|
|
ec5e77c558 | ||
|
|
b3b30a4bca | ||
|
|
5372545ddb | ||
|
|
5ef7d9bdd8 | ||
|
|
62bff2c170 | ||
|
|
f0884c8b3f | ||
|
|
277d6ff5f2 | ||
|
|
1cf376f55a | ||
|
|
7f7de7f94d | ||
|
|
86878b6cd9 | ||
|
|
b3d1242534 | ||
|
|
9bd2020476 | ||
|
|
ed9b7e3dd3 | ||
|
|
c552ae8838 | ||
|
|
31a5e037a7 | ||
|
|
3638226215 | ||
|
|
14fdfea973 | ||
|
|
b45d4e4a8e | ||
|
|
3e39273418 | ||
|
|
b965087396 | ||
|
|
359d6d8650 | ||
|
|
0e0040519b | ||
|
|
127d075955 | ||
|
|
bce8cbb089 | ||
|
|
aae273ded8 |
1
.gitattributes
vendored
Normal file
1
.gitattributes
vendored
Normal file
@@ -0,0 +1 @@
|
|||||||
|
Makefile* text whitespace=-tab-in-indent
|
||||||
6
.github/ISSUE_TEMPLATE/1_broken_site.md
vendored
6
.github/ISSUE_TEMPLATE/1_broken_site.md
vendored
@@ -21,7 +21,7 @@ assignees: ''
|
|||||||
|
|
||||||
<!--
|
<!--
|
||||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp:
|
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp:
|
||||||
- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is 2021.02.19. If it's not, see https://github.com/yt-dlp/yt-dlp on how to update. Issues with outdated version will be REJECTED.
|
- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is 2021.04.03. If it's not, see https://github.com/yt-dlp/yt-dlp on how to update. Issues with outdated version will be REJECTED.
|
||||||
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
||||||
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in https://github.com/yt-dlp/yt-dlp.
|
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in https://github.com/yt-dlp/yt-dlp.
|
||||||
- Search the bugtracker for similar issues: https://github.com/yt-dlp/yt-dlp. DO NOT post duplicates.
|
- Search the bugtracker for similar issues: https://github.com/yt-dlp/yt-dlp. DO NOT post duplicates.
|
||||||
@@ -29,7 +29,7 @@ Carefully read and work through this check list in order to prevent the most com
|
|||||||
-->
|
-->
|
||||||
|
|
||||||
- [ ] I'm reporting a broken site support
|
- [ ] I'm reporting a broken site support
|
||||||
- [ ] I've verified that I'm running yt-dlp version **2021.02.19**
|
- [ ] I've verified that I'm running yt-dlp version **2021.04.03**
|
||||||
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
||||||
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
|
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
|
||||||
- [ ] I've searched the bugtracker for similar issues including closed ones
|
- [ ] I've searched the bugtracker for similar issues including closed ones
|
||||||
@@ -44,7 +44,7 @@ Add the `-v` flag to your command line you run yt-dlp with (`yt-dlp -v <your com
|
|||||||
[debug] User config: []
|
[debug] User config: []
|
||||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||||
[debug] yt-dlp version 2021.02.19
|
[debug] yt-dlp version 2021.04.03
|
||||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||||
[debug] Proxy map: {}
|
[debug] Proxy map: {}
|
||||||
|
|||||||
@@ -21,7 +21,7 @@ assignees: ''
|
|||||||
|
|
||||||
<!--
|
<!--
|
||||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp:
|
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp:
|
||||||
- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is 2021.02.19. If it's not, see https://github.com/yt-dlp/yt-dlp on how to update. Issues with outdated version will be REJECTED.
|
- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is 2021.04.03. If it's not, see https://github.com/yt-dlp/yt-dlp on how to update. Issues with outdated version will be REJECTED.
|
||||||
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
||||||
- Make sure that site you are requesting is not dedicated to copyright infringement, see https://github.com/yt-dlp/yt-dlp. yt-dlp does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights.
|
- Make sure that site you are requesting is not dedicated to copyright infringement, see https://github.com/yt-dlp/yt-dlp. yt-dlp does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights.
|
||||||
- Search the bugtracker for similar site support requests: https://github.com/yt-dlp/yt-dlp. DO NOT post duplicates.
|
- Search the bugtracker for similar site support requests: https://github.com/yt-dlp/yt-dlp. DO NOT post duplicates.
|
||||||
@@ -29,7 +29,7 @@ Carefully read and work through this check list in order to prevent the most com
|
|||||||
-->
|
-->
|
||||||
|
|
||||||
- [ ] I'm reporting a new site support request
|
- [ ] I'm reporting a new site support request
|
||||||
- [ ] I've verified that I'm running yt-dlp version **2021.02.19**
|
- [ ] I've verified that I'm running yt-dlp version **2021.04.03**
|
||||||
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
||||||
- [ ] I've checked that none of provided URLs violate any copyrights
|
- [ ] I've checked that none of provided URLs violate any copyrights
|
||||||
- [ ] I've searched the bugtracker for similar site support requests including closed ones
|
- [ ] I've searched the bugtracker for similar site support requests including closed ones
|
||||||
|
|||||||
@@ -21,13 +21,13 @@ assignees: ''
|
|||||||
|
|
||||||
<!--
|
<!--
|
||||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp:
|
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp:
|
||||||
- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is 2021.02.19. If it's not, see https://github.com/yt-dlp/yt-dlp on how to update. Issues with outdated version will be REJECTED.
|
- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is 2021.04.03. If it's not, see https://github.com/yt-dlp/yt-dlp on how to update. Issues with outdated version will be REJECTED.
|
||||||
- Search the bugtracker for similar site feature requests: https://github.com/yt-dlp/yt-dlp. DO NOT post duplicates.
|
- Search the bugtracker for similar site feature requests: https://github.com/yt-dlp/yt-dlp. DO NOT post duplicates.
|
||||||
- Finally, put x into all relevant boxes like this [x] (Dont forget to delete the empty space)
|
- Finally, put x into all relevant boxes like this [x] (Dont forget to delete the empty space)
|
||||||
-->
|
-->
|
||||||
|
|
||||||
- [ ] I'm reporting a site feature request
|
- [ ] I'm reporting a site feature request
|
||||||
- [ ] I've verified that I'm running yt-dlp version **2021.02.19**
|
- [ ] I've verified that I'm running yt-dlp version **2021.04.03**
|
||||||
- [ ] I've searched the bugtracker for similar site feature requests including closed ones
|
- [ ] I've searched the bugtracker for similar site feature requests including closed ones
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
6
.github/ISSUE_TEMPLATE/4_bug_report.md
vendored
6
.github/ISSUE_TEMPLATE/4_bug_report.md
vendored
@@ -21,7 +21,7 @@ assignees: ''
|
|||||||
|
|
||||||
<!--
|
<!--
|
||||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp:
|
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp:
|
||||||
- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is 2021.02.19. If it's not, see https://github.com/yt-dlp/yt-dlp on how to update. Issues with outdated version will be REJECTED.
|
- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is 2021.04.03. If it's not, see https://github.com/yt-dlp/yt-dlp on how to update. Issues with outdated version will be REJECTED.
|
||||||
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
||||||
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in https://github.com/yt-dlp/yt-dlp.
|
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in https://github.com/yt-dlp/yt-dlp.
|
||||||
- Search the bugtracker for similar issues: https://github.com/yt-dlp/yt-dlp. DO NOT post duplicates.
|
- Search the bugtracker for similar issues: https://github.com/yt-dlp/yt-dlp. DO NOT post duplicates.
|
||||||
@@ -30,7 +30,7 @@ Carefully read and work through this check list in order to prevent the most com
|
|||||||
-->
|
-->
|
||||||
|
|
||||||
- [ ] I'm reporting a broken site support issue
|
- [ ] I'm reporting a broken site support issue
|
||||||
- [ ] I've verified that I'm running yt-dlp version **2021.02.19**
|
- [ ] I've verified that I'm running yt-dlp version **2021.04.03**
|
||||||
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
||||||
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
|
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
|
||||||
- [ ] I've searched the bugtracker for similar bug reports including closed ones
|
- [ ] I've searched the bugtracker for similar bug reports including closed ones
|
||||||
@@ -46,7 +46,7 @@ Add the `-v` flag to your command line you run yt-dlp with (`yt-dlp -v <your com
|
|||||||
[debug] User config: []
|
[debug] User config: []
|
||||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||||
[debug] yt-dlp version 2021.02.19
|
[debug] yt-dlp version 2021.04.03
|
||||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||||
[debug] Proxy map: {}
|
[debug] Proxy map: {}
|
||||||
|
|||||||
4
.github/ISSUE_TEMPLATE/5_feature_request.md
vendored
4
.github/ISSUE_TEMPLATE/5_feature_request.md
vendored
@@ -21,13 +21,13 @@ assignees: ''
|
|||||||
|
|
||||||
<!--
|
<!--
|
||||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp:
|
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp:
|
||||||
- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is 2021.02.19. If it's not, see https://github.com/yt-dlp/yt-dlp on how to update. Issues with outdated version will be REJECTED.
|
- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is 2021.04.03. If it's not, see https://github.com/yt-dlp/yt-dlp on how to update. Issues with outdated version will be REJECTED.
|
||||||
- Search the bugtracker for similar feature requests: https://github.com/yt-dlp/yt-dlp. DO NOT post duplicates.
|
- Search the bugtracker for similar feature requests: https://github.com/yt-dlp/yt-dlp. DO NOT post duplicates.
|
||||||
- Finally, put x into all relevant boxes like this [x] (Dont forget to delete the empty space)
|
- Finally, put x into all relevant boxes like this [x] (Dont forget to delete the empty space)
|
||||||
-->
|
-->
|
||||||
|
|
||||||
- [ ] I'm reporting a feature request
|
- [ ] I'm reporting a feature request
|
||||||
- [ ] I've verified that I'm running yt-dlp version **2021.02.19**
|
- [ ] I've verified that I'm running yt-dlp version **2021.04.03**
|
||||||
- [ ] I've searched the bugtracker for similar feature requests including closed ones
|
- [ ] I've searched the bugtracker for similar feature requests including closed ones
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
2
.github/workflows/core.yml
vendored
2
.github/workflows/core.yml
vendored
@@ -3,7 +3,7 @@ on: [push, pull_request]
|
|||||||
jobs:
|
jobs:
|
||||||
tests:
|
tests:
|
||||||
name: Core Tests
|
name: Core Tests
|
||||||
if: "!contains(github.event.head_commit.message, 'ci skip all')"
|
if: "!contains(github.event.head_commit.message, 'ci skip')"
|
||||||
runs-on: ${{ matrix.os }}
|
runs-on: ${{ matrix.os }}
|
||||||
strategy:
|
strategy:
|
||||||
fail-fast: true
|
fail-fast: true
|
||||||
|
|||||||
2
.github/workflows/download.yml
vendored
2
.github/workflows/download.yml
vendored
@@ -3,7 +3,7 @@ on: [push, pull_request]
|
|||||||
jobs:
|
jobs:
|
||||||
tests:
|
tests:
|
||||||
name: Download Tests
|
name: Download Tests
|
||||||
if: "!contains(github.event.head_commit.message, 'ci skip dl') && !contains(github.event.head_commit.message, 'ci skip all')"
|
if: "contains(github.event.head_commit.message, 'ci run dl')"
|
||||||
runs-on: ${{ matrix.os }}
|
runs-on: ${{ matrix.os }}
|
||||||
strategy:
|
strategy:
|
||||||
fail-fast: true
|
fail-fast: true
|
||||||
|
|||||||
11
.gitignore
vendored
11
.gitignore
vendored
@@ -8,6 +8,7 @@ dist/
|
|||||||
zip/
|
zip/
|
||||||
tmp/
|
tmp/
|
||||||
venv/
|
venv/
|
||||||
|
completions/
|
||||||
|
|
||||||
# Misc
|
# Misc
|
||||||
*~
|
*~
|
||||||
@@ -24,7 +25,9 @@ updates_key.pem
|
|||||||
*.class
|
*.class
|
||||||
|
|
||||||
# Generated
|
# Generated
|
||||||
|
AUTHORS
|
||||||
README.txt
|
README.txt
|
||||||
|
.mailmap
|
||||||
*.1
|
*.1
|
||||||
*.bash-completion
|
*.bash-completion
|
||||||
*.fish
|
*.fish
|
||||||
@@ -34,8 +37,9 @@ README.txt
|
|||||||
*.spec
|
*.spec
|
||||||
|
|
||||||
# Binary
|
# Binary
|
||||||
youtube-dl
|
/youtube-dl
|
||||||
youtube-dlc
|
/youtube-dlc
|
||||||
|
/yt-dlp
|
||||||
yt-dlp.zip
|
yt-dlp.zip
|
||||||
*.exe
|
*.exe
|
||||||
|
|
||||||
@@ -50,12 +54,15 @@ yt-dlp.zip
|
|||||||
*.m4v
|
*.m4v
|
||||||
*.mp3
|
*.mp3
|
||||||
*.3gp
|
*.3gp
|
||||||
|
*.webm
|
||||||
*.wav
|
*.wav
|
||||||
*.ape
|
*.ape
|
||||||
*.mkv
|
*.mkv
|
||||||
*.swf
|
*.swf
|
||||||
*.part
|
*.part
|
||||||
|
*.part-*
|
||||||
*.ytdl
|
*.ytdl
|
||||||
|
*.dump
|
||||||
*.frag
|
*.frag
|
||||||
*.frag.urls
|
*.frag.urls
|
||||||
*.aria2
|
*.aria2
|
||||||
|
|||||||
@@ -20,4 +20,3 @@ python:
|
|||||||
version: 3
|
version: 3
|
||||||
install:
|
install:
|
||||||
- requirements: docs/requirements.txt
|
- requirements: docs/requirements.txt
|
||||||
- requirements: requirements.txt
|
|
||||||
|
|||||||
@@ -1,38 +0,0 @@
|
|||||||
language: python
|
|
||||||
python:
|
|
||||||
- "2.6"
|
|
||||||
- "2.7"
|
|
||||||
- "3.2"
|
|
||||||
- "3.3"
|
|
||||||
- "3.4"
|
|
||||||
- "3.5"
|
|
||||||
- "3.6"
|
|
||||||
- "pypy"
|
|
||||||
- "pypy3"
|
|
||||||
dist: trusty
|
|
||||||
env:
|
|
||||||
- YTDL_TEST_SET=core
|
|
||||||
jobs:
|
|
||||||
include:
|
|
||||||
- python: 3.7
|
|
||||||
dist: xenial
|
|
||||||
env: YTDL_TEST_SET=core
|
|
||||||
- python: 3.8
|
|
||||||
dist: xenial
|
|
||||||
env: YTDL_TEST_SET=core
|
|
||||||
- python: 3.8-dev
|
|
||||||
dist: xenial
|
|
||||||
env: YTDL_TEST_SET=core
|
|
||||||
- env: JYTHON=true; YTDL_TEST_SET=core
|
|
||||||
- name: flake8
|
|
||||||
python: 3.8
|
|
||||||
dist: xenial
|
|
||||||
install: pip install flake8
|
|
||||||
script: flake8 .
|
|
||||||
fast_finish: true
|
|
||||||
allow_failures:
|
|
||||||
- env: YTDL_TEST_SET=download
|
|
||||||
- env: JYTHON=true; YTDL_TEST_SET=core
|
|
||||||
before_install:
|
|
||||||
- if [ "$JYTHON" == "true" ]; then ./devscripts/install_jython.sh; export PATH="$HOME/jython/bin:$PATH"; fi
|
|
||||||
script: ./devscripts/run_tests.sh
|
|
||||||
17
CONTRIBUTORS
17
CONTRIBUTORS
@@ -1,5 +1,6 @@
|
|||||||
pukkandan (owner)
|
pukkandan (owner)
|
||||||
shirt-dev (collaborator)
|
shirt-dev (collaborator)
|
||||||
|
colethedj (collaborator)
|
||||||
h-h-h-h
|
h-h-h-h
|
||||||
pauldubois98
|
pauldubois98
|
||||||
nixxo
|
nixxo
|
||||||
@@ -21,5 +22,19 @@ nao20010128nao
|
|||||||
kurumigi
|
kurumigi
|
||||||
tsukumi
|
tsukumi
|
||||||
bbepis
|
bbepis
|
||||||
|
animelover1984
|
||||||
Pccode66
|
Pccode66
|
||||||
Ashish
|
Ashish0804
|
||||||
|
RobinD42
|
||||||
|
hseg
|
||||||
|
DennyDai
|
||||||
|
codeasashu
|
||||||
|
teesid
|
||||||
|
kevinoconnor7
|
||||||
|
damianoamatruda
|
||||||
|
2ShedsJackson
|
||||||
|
CXwudi
|
||||||
|
xtkoba
|
||||||
|
llacb47
|
||||||
|
hheimbuerger
|
||||||
|
B0pol
|
||||||
241
Changelog.md
241
Changelog.md
@@ -6,8 +6,10 @@
|
|||||||
* Run `make doc`
|
* Run `make doc`
|
||||||
* Update Changelog.md and CONTRIBUTORS
|
* Update Changelog.md and CONTRIBUTORS
|
||||||
* Change "Merged with ytdl" version in Readme.md if needed
|
* Change "Merged with ytdl" version in Readme.md if needed
|
||||||
|
* Add new/fixed extractors in "new features" section of Readme.md
|
||||||
* Commit to master as `Release <version>`
|
* Commit to master as `Release <version>`
|
||||||
* Push to origin/release - build task will now run
|
* Push to origin/release using `git push origin master:release`
|
||||||
|
build task will now run
|
||||||
* Update version.py using devscripts\update-version.py
|
* Update version.py using devscripts\update-version.py
|
||||||
* Run `make issuetemplates`
|
* Run `make issuetemplates`
|
||||||
* Commit to master as `[version] update :ci skip all`
|
* Commit to master as `[version] update :ci skip all`
|
||||||
@@ -17,28 +19,188 @@
|
|||||||
-->
|
-->
|
||||||
|
|
||||||
|
|
||||||
|
### 2021.04.11
|
||||||
|
* Add option `--convert-thumbnails` (only jpg currently supported)
|
||||||
|
* Format selector `mergeall` to download and merge all formats
|
||||||
|
* Pass any field to `--exec` using similar syntax to output template
|
||||||
|
* Choose downloader for each protocol using `--downloader PROTO:NAME`
|
||||||
|
* Alias `--downloader` for `--external-downloader`
|
||||||
|
* Added `native` as an option for the downloader
|
||||||
|
* Merge youtube-dl: Upto [commit/4fb25ff](https://github.com/ytdl-org/youtube-dl/commit/4fb25ff5a3be5206bb72e5c4046715b1529fb2c7) (except vimeo)
|
||||||
|
* [DiscoveryPlusIndia] Add DiscoveryPlusIndiaShowIE by [Ashish0804](https://github.com/Ashish0804)
|
||||||
|
* [NFHSNetwork] Add extractor by [llacb47](https://github.com/llacb47)
|
||||||
|
* [nebula] Add extractor (watchnebula.com) by [hheimbuerger](https://github.com/hheimbuerger)
|
||||||
|
* [nitter] Fix extraction of reply tweets and update instance list by [B0pol](https://github.com/B0pol)
|
||||||
|
* [nitter] Fix thumbnails by [B0pol](https://github.com/B0pol)
|
||||||
|
* [youtube] Fix thumbnail URL
|
||||||
|
* [youtube] Parse API parameters from initial webpage by [colethedj](https://github.com/colethedj)
|
||||||
|
* [youtube] Extract comments' approximate timestamp by [colethedj](https://github.com/colethedj)
|
||||||
|
* [youtube] Fix `\_extract_alerts`
|
||||||
|
* [bilibili] Fix uploader
|
||||||
|
* [utils] Add `datetime_from_str` and `datetime_add_months` by [colethedj](https://github.com/colethedj)
|
||||||
|
* Run some `postprocessors` before actual download
|
||||||
|
* Improve argument parsing for `-P`, `-o`, `-S`
|
||||||
|
* Fix some `m3u8` not obeying `--allow-unplayable-formats`
|
||||||
|
* Fix default of `dynamic_mpd`
|
||||||
|
* Deprecate `--all-formats`, `--include-ads`, `--hls-prefer-native`, `--hls-prefer-ffmpeg`
|
||||||
|
* [documentation] Improvements
|
||||||
|
|
||||||
|
### 2021.04.03
|
||||||
|
* Merge youtube-dl: Upto [commit/654b4f4](https://github.com/ytdl-org/youtube-dl/commit/654b4f4ff2718f38b3182c1188c5d569c14cc70a)
|
||||||
|
* Ability to set a specific field in the file's metadata using `--parse-metadata`
|
||||||
|
* Ability to select n'th best format like `-f bv*.2`
|
||||||
|
* [DiscoveryPlus] Add discoveryplus.in
|
||||||
|
* [la7] Add podcasts and podcast playlists by [nixxo](https://github.com/nixxo)
|
||||||
|
* [mildom] Update extractor with current proxy by [nao20010128nao](https://github.com/nao20010128nao)
|
||||||
|
* [ard:mediathek] Fix video id extraction
|
||||||
|
* [generic] Detect Invidious' link element
|
||||||
|
* [youtube] Show premium state in `availability` by [colethedj](https://github.com/colethedj)
|
||||||
|
* [viewsource] Add extractor to handle `view-source:`
|
||||||
|
* [sponskrub] Run before embedding thumbnail
|
||||||
|
* [documentation] Improve `--parse-metadata` documentation
|
||||||
|
|
||||||
|
|
||||||
|
### 2021.03.24.1
|
||||||
|
* Revert [commit/8562218](https://github.com/ytdl-org/youtube-dl/commit/8562218350a79d4709da8593bb0c538aa0824acf)
|
||||||
|
|
||||||
|
### 2021.03.24
|
||||||
|
* Merge youtube-dl: Upto 2021.03.25 ([commit/8562218](https://github.com/ytdl-org/youtube-dl/commit/8562218350a79d4709da8593bb0c538aa0824acf))
|
||||||
|
* Parse metadata from multiple fields using `--parse-metadata`
|
||||||
|
* Ability to load playlist infojson using `--load-info-json`
|
||||||
|
* Write current epoch to infojson when using `--no-clean-infojson`
|
||||||
|
* [youtube_live_chat] fix bug when trying to set cookies
|
||||||
|
* [niconico] Fix for when logged in by [CXwudi](https://github.com/CXwudi) and [xtkoba](https://github.com/xtkoba)
|
||||||
|
* [linuxacadamy] Fix login
|
||||||
|
|
||||||
|
|
||||||
|
### 2021.03.21
|
||||||
|
* Merge youtube-dl: Upto [commit/7e79ba7](https://github.com/ytdl-org/youtube-dl/commit/7e79ba7dd6e6649dd2ce3a74004b2044f2182881)
|
||||||
|
* Option `--no-clean-infojson` to keep private keys in the infojson
|
||||||
|
* [aria2c] Support retry/abort unavailable fragments by [damianoamatruda](https://github.com/damianoamatruda)
|
||||||
|
* [aria2c] Better default arguments
|
||||||
|
* [movefiles] Fix bugs and make more robust
|
||||||
|
* [formatSort] Fix `quality` being ignored
|
||||||
|
* [splitchapters] Fix for older ffmpeg
|
||||||
|
* [sponskrub] Pass proxy to sponskrub
|
||||||
|
* Make sure `post_hook` gets the final filename
|
||||||
|
* Recursively remove any private keys from infojson
|
||||||
|
* Embed video URL metadata inside `mp4` by [damianoamatruda](https://github.com/damianoamatruda) and [pukkandan](https://github.com/pukkandan)
|
||||||
|
* Merge `webm` formats into `mkv` if thumbnails are to be embedded by [damianoamatruda](https://github.com/damianoamatruda)
|
||||||
|
* Use headers and cookies when downloading subtitles by [damianoamatruda](https://github.com/damianoamatruda)
|
||||||
|
* Parse resolution in info dictionary by [damianoamatruda](https://github.com/damianoamatruda)
|
||||||
|
* More consistent warning messages by [damianoamatruda](https://github.com/damianoamatruda) and [pukkandan](https://github.com/pukkandan)
|
||||||
|
* [documentation] Add deprecated options and aliases in readme
|
||||||
|
* [documentation] Fix some minor mistakes
|
||||||
|
|
||||||
|
* [niconico] Partial fix adapted from [animelover1984/youtube-dl@b5eff52](https://github.com/animelover1984/youtube-dl/commit/b5eff52dd9ed5565672ea1694b38c9296db3fade) (login and smile formats still don't work)
|
||||||
|
* [niconico] Add user extractor by [animelover1984](https://github.com/animelover1984)
|
||||||
|
* [bilibili] Add anthology support by [animelover1984](https://github.com/animelover1984)
|
||||||
|
* [amcnetworks] Fix extractor by [2ShedsJackson](https://github.com/2ShedsJackson)
|
||||||
|
* [stitcher] Merge from youtube-dl by [nixxo](https://github.com/nixxo)
|
||||||
|
* [rcs] Improved extraction by [nixxo](https://github.com/nixxo)
|
||||||
|
* [linuxacadamy] Improve regex
|
||||||
|
* [youtube] Show if video is `private`, `unlisted` etc in info (`availability`) by [colethedj](https://github.com/colethedj) and [pukkandan](https://github.com/pukkandan)
|
||||||
|
* [youtube] bugfix for channel playlist extraction
|
||||||
|
* [nbc] Improve metadata extraction by [2ShedsJackson](https://github.com/2ShedsJackson)
|
||||||
|
|
||||||
|
|
||||||
|
### 2021.03.15
|
||||||
|
* **Split video by chapters**: using option `--split-chapters`
|
||||||
|
* The output file of the split files can be set with `-o`/`-P` using the prefix `chapter:`
|
||||||
|
* Additional keys `section_title`, `section_number`, `section_start`, `section_end` are available in the output template
|
||||||
|
* **Parallel fragment downloads** by [shirt](https://github.com/shirt-dev)
|
||||||
|
* Use option `--concurrent-fragments` (`-N`) to set the number of threads (default 1)
|
||||||
|
* Merge youtube-dl: Upto [commit/3be0980](https://github.com/ytdl-org/youtube-dl/commit/3be098010f667b14075e3dfad1e74e5e2becc8ea)
|
||||||
|
* [zee5] Add Show Extractor by [Ashish0804](https://github.com/Ashish0804) and [pukkandan](https://github.com/pukkandan)
|
||||||
|
* [rai] fix drm check [nixxo](https://github.com/nixxo)
|
||||||
|
* [wimtv] Add extractor by [nixxo](https://github.com/nixxo)
|
||||||
|
* [mtv] Add mtv.it and extract series metadata by [nixxo](https://github.com/nixxo)
|
||||||
|
* [pluto.tv] Add extractor by [kevinoconnor7](https://github.com/kevinoconnor7)
|
||||||
|
* [youtube] Rewrite comment extraction by [colethedj](https://github.com/colethedj)
|
||||||
|
* [embedthumbnail] Set mtime correctly
|
||||||
|
* Refactor some postprocessor/downloader code by [pukkandan](https://github.com/pukkandan) and [shirt](https://github.com/shirt-dev)
|
||||||
|
|
||||||
|
|
||||||
|
### 2021.03.07
|
||||||
|
* [youtube] Fix history, mixes, community pages and trending by [pukkandan](https://github.com/pukkandan) and [colethedj](https://github.com/colethedj)
|
||||||
|
* [youtube] Fix private feeds/playlists on multi-channel accounts by [colethedj](https://github.com/colethedj)
|
||||||
|
* [youtube] Extract alerts from continuation by [colethedj](https://github.com/colethedj)
|
||||||
|
* [cbs] Add support for ParamountPlus by [shirt](https://github.com/shirt-dev)
|
||||||
|
* [mxplayer] Rewrite extractor with show support by [pukkandan](https://github.com/pukkandan) and [Ashish0804](https://github.com/Ashish0804)
|
||||||
|
* [gedi] Improvements from youtube-dl by [nixxo](https://github.com/nixxo)
|
||||||
|
* [vimeo] Fix videos with password by [teesid](https://github.com/teesid)
|
||||||
|
* [lbry] Support `lbry://` url by [nixxo](https://github.com/nixxo)
|
||||||
|
* [bilibili] Change `Accept` header by [pukkandan](https://github.com/pukkandan) and [animelover1984](https://github.com/animelover1984)
|
||||||
|
* [trovo] Pass origin header
|
||||||
|
* [rai] Check for DRM by [nixxo](https://github.com/nixxo)
|
||||||
|
* [downloader] Fix bug for `ffmpeg`/`httpie`
|
||||||
|
* [update] Fix updater removing the executable bit on some UNIX distros
|
||||||
|
* [update] Fix current build hash for UNIX
|
||||||
|
* [documentation] Include wget/curl/aria2c install instructions for Unix by [Ashish0804](https://github.com/Ashish0804)
|
||||||
|
* Fix some videos downloading with `m3u8` extension
|
||||||
|
* Remove "fixup is ignored" warning when fixup wasn't passed by user
|
||||||
|
|
||||||
|
|
||||||
|
### 2021.03.03.2
|
||||||
|
* [build] Fix bug
|
||||||
|
|
||||||
|
### 2021.03.03
|
||||||
|
* [youtube] Use new browse API for continuation page extraction by [colethedj](https://github.com/colethedj) and [pukkandan](https://github.com/pukkandan)
|
||||||
|
* Fix HLS playlist downloading by [shirt](https://github.com/shirt-dev)
|
||||||
|
* Merge youtube-dl: Upto [2021.03.03](https://github.com/ytdl-org/youtube-dl/releases/tag/2021.03.03)
|
||||||
|
* [mtv] Fix extractor
|
||||||
|
* [nick] Fix extractor by [DennyDai](https://github.com/DennyDai)
|
||||||
|
* [mxplayer] Add new extractor by [codeasashu](https://github.com/codeasashu)
|
||||||
|
* [youtube] Throw error when `--extractor-retries` are exhausted
|
||||||
|
* Reduce default of `--extractor-retries` to 3
|
||||||
|
* Fix packaging bugs by [hseg](https://github.com/hseg)
|
||||||
|
|
||||||
|
|
||||||
|
### 2021.03.01
|
||||||
|
* Allow specifying path in `--external-downloader`
|
||||||
|
* Add option `--sleep-requests` to sleep b/w requests
|
||||||
|
* Add option `--extractor-retries` to retry on known extractor errors
|
||||||
|
* Extract comments only when needed
|
||||||
|
* `--get-comments` doesn't imply `--write-info-json` if `-J`, `-j` or `--print-json` are used
|
||||||
|
* Fix `get_executable_path` by [shirt](https://github.com/shirt-dev)
|
||||||
|
* [youtube] Retry on more known errors than just HTTP-5xx
|
||||||
|
* [youtube] Fix inconsistent `webpage_url`
|
||||||
|
* [tennistv] Fix format sorting
|
||||||
|
* [bilibiliaudio] Recognize the file as audio-only
|
||||||
|
* [hrfensehen] Fix wrong import
|
||||||
|
* [viki] Fix viki play pass authentication by [RobinD42](https://github.com/RobinD42)
|
||||||
|
* [readthedocs] Improvements by [shirt](https://github.com/shirt-dev)
|
||||||
|
* [hls] Fix bug with m3u8 format extraction
|
||||||
|
* [hls] Enable `--hls-use-mpegts` by default when downloading live-streams
|
||||||
|
* [embedthumbnail] Fix bug with deleting original thumbnail
|
||||||
|
* [build] Fix completion paths, zsh pip completion install by [hseg](https://github.com/hseg)
|
||||||
|
* [ci] Disable download tests unless specifically invoked
|
||||||
|
* Cleanup some code and fix typos
|
||||||
|
|
||||||
|
|
||||||
### 2021.02.24
|
### 2021.02.24
|
||||||
* Moved project to an organization [yt-dlp](https://github.com/yt-dlp)
|
* Moved project to an organization [yt-dlp](https://github.com/yt-dlp)
|
||||||
* **Completely changed project name to yt-dlp** by [Pccode66](https://github.com/Pccode66) and [pukkandan](https://github.com/pukkandan)
|
* **Completely changed project name to yt-dlp** by [Pccode66](https://github.com/Pccode66) and [pukkandan](https://github.com/pukkandan)
|
||||||
* **Merge youtube-dl:** Upto [commit/4460329](https://github.com/ytdl-org/youtube-dl/commit/44603290e5002153f3ebad6230cc73aef42cc2cd) (except tmz, gedi)
|
* Also, `youtube-dlc` config files are no longer loaded
|
||||||
|
* Merge youtube-dl: Upto [commit/4460329](https://github.com/ytdl-org/youtube-dl/commit/44603290e5002153f3ebad6230cc73aef42cc2cd) (except tmz, gedi)
|
||||||
* [Readthedocs](https://yt-dlp.readthedocs.io) support by [shirt](https://github.com/shirt-dev)
|
* [Readthedocs](https://yt-dlp.readthedocs.io) support by [shirt](https://github.com/shirt-dev)
|
||||||
* [youtube] Show if video was a live stream in info (`was_live`)
|
* [youtube] Show if video was a live stream in info (`was_live`)
|
||||||
* [Zee5] Add new extractor by [Ashish](https://github.com/Ashish) and [pukkandan](https://github.com/pukkandan)
|
* [Zee5] Add new extractor by [Ashish0804](https://github.com/Ashish0804) and [pukkandan](https://github.com/pukkandan)
|
||||||
* [jwplatform] Add support for `hyland.com`
|
* [jwplatform] Add support for `hyland.com`
|
||||||
* [tennistv] Fix extractor
|
* [tennistv] Fix extractor
|
||||||
* [hls] Support media initialization by [shirt](https://github.com/shirt-dev)
|
* [hls] Support media initialization by [shirt](https://github.com/shirt-dev)
|
||||||
* [hls] Added options `--hls-split-discontinuity` to better support media discontinuity by [shirt](https://github.com/shirt-dev)
|
* [hls] Added options `--hls-split-discontinuity` to better support media discontinuity by [shirt](https://github.com/shirt-dev)
|
||||||
* [ffmpeg] Allow passing custom arguments before -i using `--ppa "ffmpeg_i1:ARGS"` synatax
|
* [ffmpeg] Allow passing custom arguments before -i using `--ppa "ffmpeg_i1:ARGS"` syntax
|
||||||
* Fix `--windows-filenames` removing `/` from UNIX paths
|
* Fix `--windows-filenames` removing `/` from UNIX paths
|
||||||
* [hls] Show warning if pycryptodome is not found
|
* [hls] Show warning if pycryptodome is not found
|
||||||
* [documentation] Improvements
|
* [documentation] Improvements
|
||||||
* Fix documentation of `Extractor Options`
|
* Fix documentation of `Extractor Options`
|
||||||
* Document `all` in format selection (Closes #101)
|
* Document `all` in format selection
|
||||||
* Document `playable_in_embed` in output templates
|
* Document `playable_in_embed` in output templates
|
||||||
|
|
||||||
|
|
||||||
### 2021.02.19
|
### 2021.02.19
|
||||||
* **Merge youtube-dl:** Upto [commit/cf2dbec](https://github.com/ytdl-org/youtube-dl/commit/cf2dbec6301177a1fddf72862de05fa912d9869d) (except kakao)
|
* Merge youtube-dl: Upto [commit/cf2dbec](https://github.com/ytdl-org/youtube-dl/commit/cf2dbec6301177a1fddf72862de05fa912d9869d) (except kakao)
|
||||||
* [viki] Fix extractor
|
* [viki] Fix extractor
|
||||||
* [niconico] Extract `channel` and `channel_id` by [kurumigi](https://github.com/kurumigi)
|
* [niconico] Extract `channel` and `channel_id` by [kurumigi](https://github.com/kurumigi)
|
||||||
* [youtube] Multiple page support for hashtag URLs
|
* [youtube] Multiple page support for hashtag URLs
|
||||||
@@ -63,7 +225,7 @@
|
|||||||
|
|
||||||
|
|
||||||
### 2021.02.15
|
### 2021.02.15
|
||||||
* **Merge youtube-dl:** Upto [2021.02.10](https://github.com/ytdl-org/youtube-dl/releases/tag/2021.02.10) (except archive.org)
|
* Merge youtube-dl: Upto [2021.02.10](https://github.com/ytdl-org/youtube-dl/releases/tag/2021.02.10) (except archive.org)
|
||||||
* [niconico] Improved extraction and support encrypted/SMILE movies by [kurumigi](https://github.com/kurumigi), [tsukumi](https://github.com/tsukumi), [bbepis](https://github.com/bbepis), [pukkandan](https://github.com/pukkandan)
|
* [niconico] Improved extraction and support encrypted/SMILE movies by [kurumigi](https://github.com/kurumigi), [tsukumi](https://github.com/tsukumi), [bbepis](https://github.com/bbepis), [pukkandan](https://github.com/pukkandan)
|
||||||
* Fix HLS AES-128 with multiple keys in external downloaders by [shirt](https://github.com/shirt-dev)
|
* Fix HLS AES-128 with multiple keys in external downloaders by [shirt](https://github.com/shirt-dev)
|
||||||
* [youtube_live_chat] Fix by using POST API by [siikamiika](https://github.com/siikamiika)
|
* [youtube_live_chat] Fix by using POST API by [siikamiika](https://github.com/siikamiika)
|
||||||
@@ -106,7 +268,7 @@
|
|||||||
|
|
||||||
|
|
||||||
### 2021.02.04
|
### 2021.02.04
|
||||||
* **Merge youtube-dl:** Upto [2021.02.04.1](https://github.com/ytdl-org/youtube-dl/releases/tag/2021.02.04.1)
|
* Merge youtube-dl: Upto [2021.02.04.1](https://github.com/ytdl-org/youtube-dl/releases/tag/2021.02.04.1)
|
||||||
* **Date/time formatting in output template:**
|
* **Date/time formatting in output template:**
|
||||||
* You can use [`strftime`](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes) to format date/time fields. Example: `%(upload_date>%Y-%m-%d)s`
|
* You can use [`strftime`](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes) to format date/time fields. Example: `%(upload_date>%Y-%m-%d)s`
|
||||||
* **Multiple output templates:**
|
* **Multiple output templates:**
|
||||||
@@ -160,7 +322,7 @@
|
|||||||
|
|
||||||
|
|
||||||
### 2021.01.24
|
### 2021.01.24
|
||||||
* **Merge youtube-dl:** Upto [2021.01.24](https://github.com/ytdl-org/youtube-dl/releases/tag/2021.01.16)
|
* Merge youtube-dl: Upto [2021.01.24](https://github.com/ytdl-org/youtube-dl/releases/tag/2021.01.16)
|
||||||
* Plugin support ([documentation](https://github.com/yt-dlp/yt-dlp#plugins))
|
* Plugin support ([documentation](https://github.com/yt-dlp/yt-dlp#plugins))
|
||||||
* **Multiple paths**: New option `-P`/`--paths` to give different paths for different types of files
|
* **Multiple paths**: New option `-P`/`--paths` to give different paths for different types of files
|
||||||
* The syntax is `-P "type:path" -P "type:path"` ([documentation](https://github.com/yt-dlp/yt-dlp#:~:text=-P,%20--paths%20TYPE:PATH))
|
* The syntax is `-P "type:path" -P "type:path"` ([documentation](https://github.com/yt-dlp/yt-dlp#:~:text=-P,%20--paths%20TYPE:PATH))
|
||||||
@@ -189,7 +351,7 @@
|
|||||||
|
|
||||||
|
|
||||||
### 2021.01.16
|
### 2021.01.16
|
||||||
* **Merge youtube-dl:** Upto [2021.01.16](https://github.com/ytdl-org/youtube-dl/releases/tag/2021.01.16)
|
* Merge youtube-dl: Upto [2021.01.16](https://github.com/ytdl-org/youtube-dl/releases/tag/2021.01.16)
|
||||||
* **Configuration files:**
|
* **Configuration files:**
|
||||||
* Portable configuration file: `./yt-dlp.conf`
|
* Portable configuration file: `./yt-dlp.conf`
|
||||||
* Allow the configuration files to be named `yt-dlp` instead of `youtube-dlc`. See [this](https://github.com/yt-dlp/yt-dlp#configuration) for details
|
* Allow the configuration files to be named `yt-dlp` instead of `youtube-dlc`. See [this](https://github.com/yt-dlp/yt-dlp#configuration) for details
|
||||||
@@ -219,7 +381,7 @@
|
|||||||
* [archive.org] Fix extractor and add support for audio and playlists by [wporr](https://github.com/wporr)
|
* [archive.org] Fix extractor and add support for audio and playlists by [wporr](https://github.com/wporr)
|
||||||
* [Animelab] Added by [mariuszskon](https://github.com/mariuszskon)
|
* [Animelab] Added by [mariuszskon](https://github.com/mariuszskon)
|
||||||
* [youtube:search] Fix view_count by [ohnonot](https://github.com/ohnonot)
|
* [youtube:search] Fix view_count by [ohnonot](https://github.com/ohnonot)
|
||||||
* [youtube] Show if video is embeddable in info
|
* [youtube] Show if video is embeddable in info (`playable_in_embed`)
|
||||||
* Update version badge automatically in README
|
* Update version badge automatically in README
|
||||||
* Enable `test_youtube_search_matching`
|
* Enable `test_youtube_search_matching`
|
||||||
* Create `to_screen` and similar functions in postprocessor/common
|
* Create `to_screen` and similar functions in postprocessor/common
|
||||||
@@ -235,9 +397,8 @@
|
|||||||
|
|
||||||
|
|
||||||
### 2021.01.08
|
### 2021.01.08
|
||||||
* **Merge youtube-dl:** Upto [2021.01.08](https://github.com/ytdl-org/youtube-dl/releases/tag/2021.01.08)
|
* Merge youtube-dl: Upto [2021.01.08](https://github.com/ytdl-org/youtube-dl/releases/tag/2021.01.08) except stitcher ([1](https://github.com/ytdl-org/youtube-dl/commit/bb38a1215718cdf36d73ff0a7830a64cd9fa37cc), [2](https://github.com/ytdl-org/youtube-dl/commit/a563c97c5cddf55f8989ed7ea8314ef78e30107f))
|
||||||
* Extractor stitcher ([1](https://github.com/ytdl-org/youtube-dl/commit/bb38a1215718cdf36d73ff0a7830a64cd9fa37cc), [2](https://github.com/ytdl-org/youtube-dl/commit/a563c97c5cddf55f8989ed7ea8314ef78e30107f)) have not been merged
|
* Moved changelog to separate file
|
||||||
* Moved changelog to seperate file
|
|
||||||
|
|
||||||
|
|
||||||
### 2021.01.07-1
|
### 2021.01.07-1
|
||||||
@@ -275,8 +436,8 @@
|
|||||||
* Changed video format sorting to show video only files and video+audio files together.
|
* Changed video format sorting to show video only files and video+audio files together.
|
||||||
* Added `--video-multistreams`, `--no-video-multistreams`, `--audio-multistreams`, `--no-audio-multistreams`
|
* Added `--video-multistreams`, `--no-video-multistreams`, `--audio-multistreams`, `--no-audio-multistreams`
|
||||||
* Added `b`,`w`,`v`,`a` as alias for `best`, `worst`, `video` and `audio` respectively
|
* Added `b`,`w`,`v`,`a` as alias for `best`, `worst`, `video` and `audio` respectively
|
||||||
* **Shortcut Options:** Added `--write-link`, `--write-url-link`, `--write-webloc-link`, `--write-desktop-link` by [h-h-h-h](https://github.com/h-h-h-h) - See [Internet Shortcut Options](README.md#internet-shortcut-options) for details
|
* Shortcut Options: Added `--write-link`, `--write-url-link`, `--write-webloc-link`, `--write-desktop-link` by [h-h-h-h](https://github.com/h-h-h-h) - See [Internet Shortcut Options](README.md#internet-shortcut-options) for details
|
||||||
* **Sponskrub integration:** Added `--sponskrub`, `--sponskrub-cut`, `--sponskrub-force`, `--sponskrub-location`, `--sponskrub-args` - See [SponSkrub Options](README.md#sponskrub-options-sponsorblock) for details
|
* **Sponskrub integration:** Added `--sponskrub`, `--sponskrub-cut`, `--sponskrub-force`, `--sponskrub-location`, `--sponskrub-args` - See [SponSkrub Options](README.md#sponskrub-sponsorblock-options) for details
|
||||||
* Added `--force-download-archive` (`--force-write-archive`) by [h-h-h-h](https://github.com/h-h-h-h)
|
* Added `--force-download-archive` (`--force-write-archive`) by [h-h-h-h](https://github.com/h-h-h-h)
|
||||||
* Added `--list-formats-as-table`, `--list-formats-old`
|
* Added `--list-formats-as-table`, `--list-formats-old`
|
||||||
* **Negative Options:** Makes it possible to negate most boolean options by adding a `no-` to the switch. Usefull when you want to reverse an option that is defined in a config file
|
* **Negative Options:** Makes it possible to negate most boolean options by adding a `no-` to the switch. Usefull when you want to reverse an option that is defined in a config file
|
||||||
@@ -285,36 +446,38 @@
|
|||||||
* Relaxed validation for format filters so that any arbitrary field can be used
|
* Relaxed validation for format filters so that any arbitrary field can be used
|
||||||
* Fix for embedding thumbnail in mp3 by [pauldubois98](https://github.com/pauldubois98) ([ytdl-org/youtube-dl#21569](https://github.com/ytdl-org/youtube-dl/pull/21569))
|
* Fix for embedding thumbnail in mp3 by [pauldubois98](https://github.com/pauldubois98) ([ytdl-org/youtube-dl#21569](https://github.com/ytdl-org/youtube-dl/pull/21569))
|
||||||
* Make Twitch Video ID output from Playlist and VOD extractor same. This is only a temporary fix
|
* Make Twitch Video ID output from Playlist and VOD extractor same. This is only a temporary fix
|
||||||
* **Merge youtube-dl:** Upto [2021.01.03](https://github.com/ytdl-org/youtube-dl/commit/8e953dcbb10a1a42f4e12e4e132657cb0100a1f8) - See [blackjack4494/yt-dlc#280](https://github.com/blackjack4494/yt-dlc/pull/280) for details
|
* Merge youtube-dl: Upto [2021.01.03](https://github.com/ytdl-org/youtube-dl/commit/8e953dcbb10a1a42f4e12e4e132657cb0100a1f8) - See [blackjack4494/yt-dlc#280](https://github.com/blackjack4494/yt-dlc/pull/280) for details
|
||||||
* Extractors [tiktok](https://github.com/ytdl-org/youtube-dl/commit/fb626c05867deab04425bad0c0b16b55473841a2) and [hotstar](https://github.com/ytdl-org/youtube-dl/commit/bb38a1215718cdf36d73ff0a7830a64cd9fa37cc) have not been merged
|
* Extractors [tiktok](https://github.com/ytdl-org/youtube-dl/commit/fb626c05867deab04425bad0c0b16b55473841a2) and [hotstar](https://github.com/ytdl-org/youtube-dl/commit/bb38a1215718cdf36d73ff0a7830a64cd9fa37cc) have not been merged
|
||||||
* Cleaned up the fork for public use
|
* Cleaned up the fork for public use
|
||||||
|
|
||||||
|
|
||||||
|
**PS**: All uncredited changes above this point are authored by [pukkandan](https://github.com/pukkandan)
|
||||||
|
|
||||||
### Unreleased changes in [blackjack4494/yt-dlc](https://github.com/blackjack4494/yt-dlc)
|
### Unreleased changes in [blackjack4494/yt-dlc](https://github.com/blackjack4494/yt-dlc)
|
||||||
* Updated to youtube-dl release 2020.11.26
|
* Updated to youtube-dl release 2020.11.26 by [pukkandan](https://github.com/pukkandan)
|
||||||
* [youtube]
|
* Youtube improvements by [pukkandan](https://github.com/pukkandan)
|
||||||
* Implemented all Youtube Feeds (ytfav, ytwatchlater, ytsubs, ythistory, ytrec) and SearchURL
|
* Implemented all Youtube Feeds (ytfav, ytwatchlater, ytsubs, ythistory, ytrec) and SearchURL
|
||||||
* Fix ytsearch not returning results sometimes due to promoted content
|
|
||||||
* Temporary fix for automatic captions - disable json3
|
|
||||||
* Fix some improper Youtube URLs
|
* Fix some improper Youtube URLs
|
||||||
* Redirect channel home to /video
|
* Redirect channel home to /video
|
||||||
* Print youtube's warning message
|
* Print youtube's warning message
|
||||||
* Multiple pages are handled better for feeds
|
* Handle Multiple pages for feeds better
|
||||||
|
* [youtube] Fix ytsearch not returning results sometimes due to promoted content by [colethedj](https://github.com/colethedj)
|
||||||
|
* [youtube] Temporary fix for automatic captions - disable json3 by [blackjack4494](https://github.com/blackjack4494)
|
||||||
* Add --break-on-existing by [gergesh](https://github.com/gergesh)
|
* Add --break-on-existing by [gergesh](https://github.com/gergesh)
|
||||||
* Pre-check video IDs in the archive before downloading
|
* Pre-check video IDs in the archive before downloading by [pukkandan](https://github.com/pukkandan)
|
||||||
* [bitwave.tv] New extractor
|
* [bitwave.tv] New extractor by [lorpus](https://github.com/lorpus)
|
||||||
* [Gedi] Add extractor
|
* [Gedi] Add extractor by [nixxo](https://github.com/nixxo)
|
||||||
* [Rcs] Add new extractor
|
* [Rcs] Add new extractor by [nixxo](https://github.com/nixxo)
|
||||||
* [skyit] Add support for multiple Sky Italia website and removed old skyitalia extractor
|
* [skyit] New skyitalia extractor by [nixxo](https://github.com/nixxo)
|
||||||
* [france.tv] Fix thumbnail URL
|
* [france.tv] Fix thumbnail URL by [renalid](https://github.com/renalid)
|
||||||
* [ina] support mobile links
|
* [ina] support mobile links by [B0pol](https://github.com/B0pol)
|
||||||
* [instagram] Fix extractor
|
* [instagram] Fix thumbnail extractor by [nao20010128nao](https://github.com/nao20010128nao)
|
||||||
* [itv] BTCC new pages' URL update (articles instead of races)
|
* [SouthparkDe] Support for English URLs by [xypwn](https://github.com/xypwn)
|
||||||
* [SouthparkDe] Support for English URLs
|
* [spreaker] fix SpreakerShowIE test URL by [pukkandan](https://github.com/pukkandan)
|
||||||
* [spreaker] fix SpreakerShowIE test URL
|
* [Vlive] Fix playlist handling when downloading a channel by [kyuyeunk](https://github.com/kyuyeunk)
|
||||||
* [Vlive] Fix playlist handling when downloading a channel
|
* [tmz] Fix extractor by [diegorodriguezv](https://github.com/diegorodriguezv)
|
||||||
* [generic] Detect embedded bitchute videos
|
* [generic] Detect embedded bitchute videos by [pukkandan](https://github.com/pukkandan)
|
||||||
* [generic] Extract embedded youtube and twitter videos
|
* [generic] Extract embedded youtube and twitter videos by [diegorodriguezv](https://github.com/diegorodriguezv)
|
||||||
* [ffmpeg] Ensure all streams are copied
|
* [ffmpeg] Ensure all streams are copied by [pukkandan](https://github.com/pukkandan)
|
||||||
* Fix for os.rename error when embedding thumbnail to video in a different drive
|
* [embedthumbnail] Fix for os.rename error by [pukkandan](https://github.com/pukkandan)
|
||||||
* make_win.bat: don't use UPX to pack vcruntime140.dll
|
* make_win.bat: don't use UPX to pack vcruntime140.dll by [jbruchon](https://github.com/jbruchon)
|
||||||
12
MANIFEST.in
12
MANIFEST.in
@@ -1,9 +1,9 @@
|
|||||||
include README.md
|
|
||||||
include LICENSE
|
|
||||||
include AUTHORS
|
include AUTHORS
|
||||||
include ChangeLog
|
include Changelog.md
|
||||||
include yt-dlp.bash-completion
|
include LICENSE
|
||||||
include yt-dlp.fish
|
include README.md
|
||||||
|
include completions/*/*
|
||||||
|
include supportedsites.md
|
||||||
include yt-dlp.1
|
include yt-dlp.1
|
||||||
recursive-include docs Makefile conf.py *.rst
|
recursive-include devscripts *
|
||||||
recursive-include test *
|
recursive-include test *
|
||||||
|
|||||||
91
Makefile
91
Makefile
@@ -1,12 +1,28 @@
|
|||||||
all: yt-dlp doc man
|
all: yt-dlp doc pypi-files
|
||||||
|
clean: clean-test clean-dist clean-cache
|
||||||
|
completions: completion-bash completion-fish completion-zsh
|
||||||
doc: README.md CONTRIBUTING.md issuetemplates supportedsites
|
doc: README.md CONTRIBUTING.md issuetemplates supportedsites
|
||||||
man: README.txt yt-dlp.1 yt-dlp.bash-completion yt-dlp.zsh yt-dlp.fish
|
ot: offlinetest
|
||||||
|
tar: yt-dlp.tar.gz
|
||||||
|
|
||||||
|
# Keep this list in sync with MANIFEST.in
|
||||||
|
# intended use: when building a source distribution,
|
||||||
|
# make pypi-files && python setup.py sdist
|
||||||
|
pypi-files: AUTHORS Changelog.md LICENSE README.md README.txt supportedsites completions yt-dlp.1 devscripts/* test/*
|
||||||
|
|
||||||
clean:
|
.PHONY: all clean install test tar pypi-files completions ot offlinetest codetest supportedsites
|
||||||
rm -rf yt-dlp.1.temp.md yt-dlp.1 yt-dlp.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ yt-dlp.tar.gz yt-dlp.zsh yt-dlp.fish yt_dlp/extractor/lazy_extractors.py *.dump *.part* *.ytdl *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.3gp *.wav *.ape *.swf *.jpg *.png *.spec *.frag *.frag.urls *.frag.aria2 CONTRIBUTING.md.tmp yt-dlp yt-dlp.exe
|
|
||||||
find . -name "*.pyc" -delete
|
clean-test:
|
||||||
find . -name "*.class" -delete
|
rm -rf *.dump *.part* *.ytdl *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.3gp *.wav *.ape *.swf *.jpg *.png *.frag *.frag.urls *.frag.aria2
|
||||||
|
clean-dist:
|
||||||
|
rm -rf yt-dlp.1.temp.md yt-dlp.1 README.txt MANIFEST build/ dist/ .coverage cover/ yt-dlp.tar.gz completions/ yt_dlp/extractor/lazy_extractors.py *.spec CONTRIBUTING.md.tmp yt-dlp yt-dlp.exe yt_dlp.egg-info/ AUTHORS .mailmap
|
||||||
|
clean-cache:
|
||||||
|
find . -name "*.pyc" -o -name "*.class" -delete
|
||||||
|
|
||||||
|
completion-bash: completions/bash/yt-dlp
|
||||||
|
completion-fish: completions/fish/yt-dlp.fish
|
||||||
|
completion-zsh: completions/zsh/_yt-dlp
|
||||||
|
lazy-extractors: yt_dlp/extractor/lazy_extractors.py
|
||||||
|
|
||||||
PREFIX ?= /usr/local
|
PREFIX ?= /usr/local
|
||||||
BINDIR ?= $(PREFIX)/bin
|
BINDIR ?= $(PREFIX)/bin
|
||||||
@@ -21,17 +37,12 @@ SYSCONFDIR = $(shell if [ $(PREFIX) = /usr -o $(PREFIX) = /usr/local ]; then ech
|
|||||||
# set markdown input format to "markdown-smart" for pandoc version 2 and to "markdown" for pandoc prior to version 2
|
# set markdown input format to "markdown-smart" for pandoc version 2 and to "markdown" for pandoc prior to version 2
|
||||||
MARKDOWN = $(shell if [ `pandoc -v | head -n1 | cut -d" " -f2 | head -c1` = "2" ]; then echo markdown-smart; else echo markdown; fi)
|
MARKDOWN = $(shell if [ `pandoc -v | head -n1 | cut -d" " -f2 | head -c1` = "2" ]; then echo markdown-smart; else echo markdown; fi)
|
||||||
|
|
||||||
install: yt-dlp yt-dlp.1 yt-dlp.bash-completion yt-dlp.zsh yt-dlp.fish
|
install: yt-dlp yt-dlp.1 completions
|
||||||
install -d $(DESTDIR)$(BINDIR)
|
install -Dm755 yt-dlp $(DESTDIR)$(BINDIR)
|
||||||
install -m 755 yt-dlp $(DESTDIR)$(BINDIR)
|
install -Dm644 yt-dlp.1 $(DESTDIR)$(MANDIR)/man1
|
||||||
install -d $(DESTDIR)$(MANDIR)/man1
|
install -Dm644 completions/bash/yt-dlp $(DESTDIR)$(SHAREDIR)/bash-completion/completions/yt-dlp
|
||||||
install -m 644 yt-dlp.1 $(DESTDIR)$(MANDIR)/man1
|
install -Dm644 completions/zsh/_yt-dlp $(DESTDIR)$(SHAREDIR)/zsh/site-functions/_yt-dlp
|
||||||
install -d $(DESTDIR)$(SYSCONFDIR)/bash_completion.d
|
install -Dm644 completions/fish/yt-dlp.fish $(DESTDIR)$(SHAREDIR)/fish/vendor_completions.d/yt-dlp.fish
|
||||||
install -m 644 yt-dlp.bash-completion $(DESTDIR)$(SYSCONFDIR)/bash_completion.d/yt-dlp
|
|
||||||
install -d $(DESTDIR)$(SHAREDIR)/zsh/site-functions
|
|
||||||
install -m 644 yt-dlp.zsh $(DESTDIR)$(SHAREDIR)/zsh/site-functions/_yt-dlp
|
|
||||||
install -d $(DESTDIR)$(SYSCONFDIR)/fish/completions
|
|
||||||
install -m 644 yt-dlp.fish $(DESTDIR)$(SYSCONFDIR)/fish/completions/yt-dlp.fish
|
|
||||||
|
|
||||||
codetest:
|
codetest:
|
||||||
flake8 .
|
flake8 .
|
||||||
@@ -41,8 +52,6 @@ test:
|
|||||||
nosetests --verbose test
|
nosetests --verbose test
|
||||||
$(MAKE) codetest
|
$(MAKE) codetest
|
||||||
|
|
||||||
ot: offlinetest
|
|
||||||
|
|
||||||
# Keep this list in sync with devscripts/run_tests.sh
|
# Keep this list in sync with devscripts/run_tests.sh
|
||||||
offlinetest: codetest
|
offlinetest: codetest
|
||||||
$(PYTHON) -m nose --verbose test \
|
$(PYTHON) -m nose --verbose test \
|
||||||
@@ -57,12 +66,6 @@ offlinetest: codetest
|
|||||||
--exclude test_youtube_signature.py \
|
--exclude test_youtube_signature.py \
|
||||||
--exclude test_post_hooks.py
|
--exclude test_post_hooks.py
|
||||||
|
|
||||||
tar: yt-dlp.tar.gz
|
|
||||||
|
|
||||||
.PHONY: all clean install test tar bash-completion pypi-files zsh-completion fish-completion ot offlinetest codetest supportedsites
|
|
||||||
|
|
||||||
pypi-files: yt-dlp.bash-completion README.txt yt-dlp.1 yt-dlp.fish
|
|
||||||
|
|
||||||
yt-dlp: yt_dlp/*.py yt_dlp/*/*.py
|
yt-dlp: yt_dlp/*.py yt_dlp/*/*.py
|
||||||
mkdir -p zip
|
mkdir -p zip
|
||||||
for d in yt_dlp yt_dlp/downloader yt_dlp/extractor yt_dlp/postprocessor ; do \
|
for d in yt_dlp yt_dlp/downloader yt_dlp/extractor yt_dlp/postprocessor ; do \
|
||||||
@@ -92,7 +95,7 @@ issuetemplates: devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl/1_
|
|||||||
$(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl/5_feature_request.md .github/ISSUE_TEMPLATE/5_feature_request.md
|
$(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl/5_feature_request.md .github/ISSUE_TEMPLATE/5_feature_request.md
|
||||||
|
|
||||||
supportedsites:
|
supportedsites:
|
||||||
$(PYTHON) devscripts/make_supportedsites.py docs/supportedsites.md
|
$(PYTHON) devscripts/make_supportedsites.py supportedsites.md
|
||||||
|
|
||||||
README.txt: README.md
|
README.txt: README.md
|
||||||
pandoc -f $(MARKDOWN) -t plain README.md -o README.txt
|
pandoc -f $(MARKDOWN) -t plain README.md -o README.txt
|
||||||
@@ -102,29 +105,24 @@ yt-dlp.1: README.md
|
|||||||
pandoc -s -f $(MARKDOWN) -t man yt-dlp.1.temp.md -o yt-dlp.1
|
pandoc -s -f $(MARKDOWN) -t man yt-dlp.1.temp.md -o yt-dlp.1
|
||||||
rm -f yt-dlp.1.temp.md
|
rm -f yt-dlp.1.temp.md
|
||||||
|
|
||||||
yt-dlp.bash-completion: yt_dlp/*.py yt_dlp/*/*.py devscripts/bash-completion.in
|
completions/bash/yt-dlp: yt_dlp/*.py yt_dlp/*/*.py devscripts/bash-completion.in
|
||||||
|
mkdir -p completions/bash
|
||||||
$(PYTHON) devscripts/bash-completion.py
|
$(PYTHON) devscripts/bash-completion.py
|
||||||
|
|
||||||
bash-completion: yt-dlp.bash-completion
|
completions/zsh/_yt-dlp: yt_dlp/*.py yt_dlp/*/*.py devscripts/zsh-completion.in
|
||||||
|
mkdir -p completions/zsh
|
||||||
yt-dlp.zsh: yt_dlp/*.py yt_dlp/*/*.py devscripts/zsh-completion.in
|
|
||||||
$(PYTHON) devscripts/zsh-completion.py
|
$(PYTHON) devscripts/zsh-completion.py
|
||||||
|
|
||||||
zsh-completion: yt-dlp.zsh
|
completions/fish/yt-dlp.fish: yt_dlp/*.py yt_dlp/*/*.py devscripts/fish-completion.in
|
||||||
|
mkdir -p completions/fish
|
||||||
yt-dlp.fish: yt_dlp/*.py yt_dlp/*/*.py devscripts/fish-completion.in
|
|
||||||
$(PYTHON) devscripts/fish-completion.py
|
$(PYTHON) devscripts/fish-completion.py
|
||||||
|
|
||||||
fish-completion: yt-dlp.fish
|
|
||||||
|
|
||||||
lazy-extractors: yt_dlp/extractor/lazy_extractors.py
|
|
||||||
|
|
||||||
_EXTRACTOR_FILES = $(shell find yt_dlp/extractor -iname '*.py' -and -not -iname 'lazy_extractors.py')
|
_EXTRACTOR_FILES = $(shell find yt_dlp/extractor -iname '*.py' -and -not -iname 'lazy_extractors.py')
|
||||||
yt_dlp/extractor/lazy_extractors.py: devscripts/make_lazy_extractors.py devscripts/lazy_load_template.py $(_EXTRACTOR_FILES)
|
yt_dlp/extractor/lazy_extractors.py: devscripts/make_lazy_extractors.py devscripts/lazy_load_template.py $(_EXTRACTOR_FILES)
|
||||||
$(PYTHON) devscripts/make_lazy_extractors.py $@
|
$(PYTHON) devscripts/make_lazy_extractors.py $@
|
||||||
|
|
||||||
yt-dlp.tar.gz: yt-dlp README.md README.txt yt-dlp.1 yt-dlp.bash-completion yt-dlp.zsh yt-dlp.fish ChangeLog AUTHORS
|
yt-dlp.tar.gz: README.md yt-dlp.1 completions Changelog.md AUTHORS
|
||||||
@tar -czf yt-dlp.tar.gz --transform "s|^|yt-dlp/|" --owner 0 --group 0 \
|
@tar -czf $(DESTDIR)/yt-dlp.tar.gz --transform "s|^|yt-dlp/|" --owner 0 --group 0 \
|
||||||
--exclude '*.DS_Store' \
|
--exclude '*.DS_Store' \
|
||||||
--exclude '*.kate-swp' \
|
--exclude '*.kate-swp' \
|
||||||
--exclude '*.pyc' \
|
--exclude '*.pyc' \
|
||||||
@@ -134,8 +132,13 @@ yt-dlp.tar.gz: yt-dlp README.md README.txt yt-dlp.1 yt-dlp.bash-completion yt-dl
|
|||||||
--exclude '.git' \
|
--exclude '.git' \
|
||||||
--exclude 'docs/_build' \
|
--exclude 'docs/_build' \
|
||||||
-- \
|
-- \
|
||||||
bin devscripts test yt_dlp docs \
|
devscripts test \
|
||||||
ChangeLog AUTHORS LICENSE README.md README.txt \
|
Changelog.md AUTHORS LICENSE README.md supportedsites.md \
|
||||||
Makefile MANIFEST.in yt-dlp.1 yt-dlp.bash-completion \
|
Makefile MANIFEST.in yt-dlp.1 completions \
|
||||||
yt-dlp.zsh yt-dlp.fish setup.py setup.cfg \
|
setup.py setup.cfg yt-dlp
|
||||||
yt-dlp
|
|
||||||
|
AUTHORS: .mailmap
|
||||||
|
git shortlog -s -n | cut -f2 | sort > AUTHORS
|
||||||
|
|
||||||
|
.mailmap:
|
||||||
|
git shortlog -s -e -n | awk '!(out[$$NF]++) { $$1="";sub(/^[ \t]+/,""); print}' > .mailmap
|
||||||
|
|||||||
359
README.md
359
README.md
@@ -1,39 +1,44 @@
|
|||||||
|
<div align="center">
|
||||||
|
|
||||||
# YT-DLP
|
# YT-DLP
|
||||||
|
A command-line program to download videos from YouTube and many other [video platforms](supportedsites.md)
|
||||||
|
|
||||||
[](https://github.com/yt-dlp/yt-dlp/releases/latest)
|
<!-- GHA doesnot have for-the-badge style
|
||||||
[](LICENSE)
|
|
||||||
[](https://github.com/yt-dlp/yt-dlp/actions)
|
[](https://github.com/yt-dlp/yt-dlp/actions)
|
||||||
[](https://discord.gg/S75JaBna)
|
-->
|
||||||
|
[](https://github.com/yt-dlp/yt-dlp/releases/latest)
|
||||||
|
[](LICENSE)
|
||||||
|
[](https://yt-dlp.readthedocs.io)
|
||||||
|
[](https://discord.gg/H5MNcFW63r)
|
||||||
|
|
||||||
[](https://github.com/yt-dlp/yt-dlp/commits)
|
[](https://github.com/yt-dlp/yt-dlp/commits)
|
||||||
[](https://github.com/yt-dlp/yt-dlp/commits)
|
[](https://github.com/yt-dlp/yt-dlp/commits)
|
||||||
[](https://github.com/yt-dlp/yt-dlp/releases/latest)
|
[](https://github.com/yt-dlp/yt-dlp/releases/latest)
|
||||||
[](https://pypi.org/project/yt-dlp)
|
[](https://pypi.org/project/yt-dlp)
|
||||||
[](https://yt-dlp.readthedocs.io)
|
|
||||||
|
|
||||||
A command-line program to download videos from youtube.com and many other [video platforms](docs/supportedsites.md)
|
</div>
|
||||||
|
|
||||||
This is a fork of [youtube-dlc](https://github.com/blackjack4494/yt-dlc) which is inturn a fork of [youtube-dl](https://github.com/ytdl-org/youtube-dl)
|
yt-dlp is a [youtube-dl](https://github.com/ytdl-org/youtube-dl) fork based on the now inactive [youtube-dlc](https://github.com/blackjack4494/yt-dlc). The main focus of this project is adding new features and patches while also keeping up to date with the original project
|
||||||
|
|
||||||
* [NEW FEATURES](#new-features)
|
* [NEW FEATURES](#new-features)
|
||||||
* [INSTALLATION](#installation)
|
* [INSTALLATION](#installation)
|
||||||
|
* [Dependancies](#dependancies)
|
||||||
* [Update](#update)
|
* [Update](#update)
|
||||||
* [Compile](#compile)
|
* [Compile](#compile)
|
||||||
* [DESCRIPTION](#description)
|
* [USAGE AND OPTIONS](#usage-and-options)
|
||||||
* [OPTIONS](#options)
|
* [General Options](#general-options)
|
||||||
* [Network Options](#network-options)
|
* [Network Options](#network-options)
|
||||||
* [Geo Restriction](#geo-restriction)
|
* [Geo-restriction](#geo-restriction)
|
||||||
* [Video Selection](#video-selection)
|
* [Video Selection](#video-selection)
|
||||||
* [Download Options](#download-options)
|
* [Download Options](#download-options)
|
||||||
* [Filesystem Options](#filesystem-options)
|
* [Filesystem Options](#filesystem-options)
|
||||||
* [Thumbnail images](#thumbnail-images)
|
* [Thumbnail Options](#thumbnail-options)
|
||||||
* [Internet Shortcut Options](#internet-shortcut-options)
|
* [Internet Shortcut Options](#internet-shortcut-options)
|
||||||
* [Verbosity and Simulation Options](#verbosity-and-simulation-options)
|
* [Verbosity and Simulation Options](#verbosity-and-simulation-options)
|
||||||
* [Workarounds](#workarounds)
|
* [Workarounds](#workarounds)
|
||||||
* [Video Format Options](#video-format-options)
|
* [Video Format Options](#video-format-options)
|
||||||
* [Subtitle Options](#subtitle-options)
|
* [Subtitle Options](#subtitle-options)
|
||||||
* [Authentication Options](#authentication-options)
|
* [Authentication Options](#authentication-options)
|
||||||
* [Adobe Pass Options](#adobe-pass-options)
|
|
||||||
* [Post-processing Options](#post-processing-options)
|
* [Post-processing Options](#post-processing-options)
|
||||||
* [SponSkrub (SponsorBlock) Options](#sponskrub-sponsorblock-options)
|
* [SponSkrub (SponsorBlock) Options](#sponskrub-sponsorblock-options)
|
||||||
* [Extractor Options](#extractor-options)
|
* [Extractor Options](#extractor-options)
|
||||||
@@ -46,8 +51,12 @@ This is a fork of [youtube-dlc](https://github.com/blackjack4494/yt-dlc) which i
|
|||||||
* [Filtering Formats](#filtering-formats)
|
* [Filtering Formats](#filtering-formats)
|
||||||
* [Sorting Formats](#sorting-formats)
|
* [Sorting Formats](#sorting-formats)
|
||||||
* [Format Selection examples](#format-selection-examples)
|
* [Format Selection examples](#format-selection-examples)
|
||||||
|
* [MODIFYING METADATA](#modifying-metadata)
|
||||||
|
* [Modifying metadata examples](#modifying-metadata-examples)
|
||||||
* [PLUGINS](#plugins)
|
* [PLUGINS](#plugins)
|
||||||
|
* [DEPRECATED OPTIONS](#deprecated-options)
|
||||||
* [MORE](#more)
|
* [MORE](#more)
|
||||||
|
</div>
|
||||||
|
|
||||||
|
|
||||||
# NEW FEATURES
|
# NEW FEATURES
|
||||||
@@ -57,7 +66,7 @@ The major new features from the latest release of [blackjack4494/yt-dlc](https:/
|
|||||||
|
|
||||||
* **[Format Sorting](#sorting-formats)**: The default format sorting options have been changed so that higher resolution and better codecs will be now preferred instead of simply using larger bitrate. Furthermore, you can now specify the sort order using `-S`. This allows for much easier format selection that what is possible by simply using `--format` ([examples](#format-selection-examples))
|
* **[Format Sorting](#sorting-formats)**: The default format sorting options have been changed so that higher resolution and better codecs will be now preferred instead of simply using larger bitrate. Furthermore, you can now specify the sort order using `-S`. This allows for much easier format selection that what is possible by simply using `--format` ([examples](#format-selection-examples))
|
||||||
|
|
||||||
* **Merged with youtube-dl v2021.02.10**: You get all the latest features and patches of [youtube-dl](https://github.com/ytdl-org/youtube-dl) in addition to all the features of [youtube-dlc](https://github.com/blackjack4494/yt-dlc)
|
* **Merged with youtube-dl v2021.04.07**: You get all the latest features and patches of [youtube-dl](https://github.com/ytdl-org/youtube-dl) in addition to all the features of [youtube-dlc](https://github.com/blackjack4494/yt-dlc)
|
||||||
|
|
||||||
* **Merged with animelover1984/youtube-dl**: You get most of the features and improvements from [animelover1984/youtube-dl](https://github.com/animelover1984/youtube-dl) including `--get-comments`, `BiliBiliSearch`, `BilibiliChannel`, Embedding thumbnail in mp4/ogg/opus, Playlist infojson etc. Note that the NicoNico improvements are not available. See [#31](https://github.com/yt-dlp/yt-dlp/pull/31) for details.
|
* **Merged with animelover1984/youtube-dl**: You get most of the features and improvements from [animelover1984/youtube-dl](https://github.com/animelover1984/youtube-dl) including `--get-comments`, `BiliBiliSearch`, `BilibiliChannel`, Embedding thumbnail in mp4/ogg/opus, Playlist infojson etc. Note that the NicoNico improvements are not available. See [#31](https://github.com/yt-dlp/yt-dlp/pull/31) for details.
|
||||||
|
|
||||||
@@ -66,17 +75,17 @@ The major new features from the latest release of [blackjack4494/yt-dlc](https:/
|
|||||||
* Youtube search (`ytsearch:`, `ytsearchdate:`) along with Search URLs works correctly
|
* Youtube search (`ytsearch:`, `ytsearchdate:`) along with Search URLs works correctly
|
||||||
* Redirect channel's home URL automatically to `/video` to preserve the old behaviour
|
* Redirect channel's home URL automatically to `/video` to preserve the old behaviour
|
||||||
|
|
||||||
* **Aria2c with HLS/DASH**: You can use aria2c as the external downloader for DASH(mpd) and HLS(m3u8) formats. No more slow ffmpeg/native downloads
|
* **Split video by chapters**: Videos can be split into multiple files based on chapters using `--split-chapters`
|
||||||
|
|
||||||
* **New extractors**: AnimeLab, Philo MSO, Rcs, Gedi, bitwave.tv, mildom, audius, zee5
|
* **Multi-threaded fragment downloads**: Download multiple fragments of m3u8/mpd videos in parallel. Use `--concurrent-fragments` (`-N`) option to set the number of threads used
|
||||||
|
|
||||||
* **Fixed extractors**: archive.org, roosterteeth.com, skyit, instagram, itv, SouthparkDe, spreaker, Vlive, tiktok, akamai, ina, rumble, tennistv
|
* **Aria2c with HLS/DASH**: You can use `aria2c` as the external downloader for DASH(mpd) and HLS(m3u8) formats
|
||||||
|
|
||||||
* **Plugin support**: Extractors can be loaded from an external file. See [plugins](#plugins) for details
|
* **New extractors**: AnimeLab, Philo MSO, Rcs, Gedi, bitwave.tv, mildom, audius, zee5, mtv.it, wimtv, pluto.tv, niconico users, discoveryplus.in, mediathek, NFHSNetwork, nebula
|
||||||
|
|
||||||
* **Multiple paths and output templates**: You can give different [output templates](#output-template) and download paths for different types of files. You can also set a temporary path where intermediary files are downloaded to. See [`--paths`](https://github.com/yt-dlp/yt-dlp/#:~:text=-P,%20--paths%20TYPE:PATH) for details
|
* **Fixed extractors**: archive.org, roosterteeth.com, skyit, instagram, itv, SouthparkDe, spreaker, Vlive, akamai, ina, rumble, tennistv, amcnetworks, la7 podcasts, linuxacadamy, nitter
|
||||||
|
|
||||||
<!-- Relative link doesn't work for "#:~:text=" -->
|
* **Multiple paths and output templates**: You can give different [output templates](#output-template) and download paths for different types of files. You can also set a temporary path where intermediary files are downloaded to using `--paths` (`-P`)
|
||||||
|
|
||||||
* **Portable Configuration**: Configuration files are automatically loaded from the home and root directories. See [configuration](#configuration) for details
|
* **Portable Configuration**: Configuration files are automatically loaded from the home and root directories. See [configuration](#configuration) for details
|
||||||
|
|
||||||
@@ -84,6 +93,8 @@ The major new features from the latest release of [blackjack4494/yt-dlc](https:/
|
|||||||
|
|
||||||
* **Improvements**: Multiple `--postprocessor-args` and `--external-downloader-args`, Date/time formatting in `-o`, faster archive checking, more [format selection options](#format-selection) etc
|
* **Improvements**: Multiple `--postprocessor-args` and `--external-downloader-args`, Date/time formatting in `-o`, faster archive checking, more [format selection options](#format-selection) etc
|
||||||
|
|
||||||
|
* **Plugin extractors**: Extractors can be loaded from an external file. See [plugins](#plugins) for details
|
||||||
|
|
||||||
* **Self-updater**: The releases can be updated using `yt-dlp -U`
|
* **Self-updater**: The releases can be updated using `yt-dlp -U`
|
||||||
|
|
||||||
|
|
||||||
@@ -92,10 +103,11 @@ See [changelog](Changelog.md) or [commits](https://github.com/yt-dlp/yt-dlp/comm
|
|||||||
|
|
||||||
**PS**: Some of these changes are already in youtube-dlc, but are still unreleased. See [this](Changelog.md#unreleased-changes-in-blackjack4494yt-dlc) for details
|
**PS**: Some of these changes are already in youtube-dlc, but are still unreleased. See [this](Changelog.md#unreleased-changes-in-blackjack4494yt-dlc) for details
|
||||||
|
|
||||||
If you are coming from [youtube-dl](https://github.com/ytdl-org/youtube-dl), the amount of changes are very large. Compare [options](#options) and [supported sites](docs/supportedsites.md) with youtube-dl's to get an idea of the massive number of features/patches [youtube-dlc](https://github.com/blackjack4494/yt-dlc) has accumulated.
|
If you are coming from [youtube-dl](https://github.com/ytdl-org/youtube-dl), the amount of changes are very large. Compare [options](#options) and [supported sites](supportedsites.md) with youtube-dl's to get an idea of the massive number of features/patches [youtube-dlc](https://github.com/blackjack4494/yt-dlc) has accumulated.
|
||||||
|
|
||||||
|
|
||||||
# INSTALLATION
|
# INSTALLATION
|
||||||
|
yt-dlp is not platform specific. So it should work on your Unix box, on Windows or on macOS
|
||||||
|
|
||||||
You can install yt-dlp using one of the following methods:
|
You can install yt-dlp using one of the following methods:
|
||||||
* Download the binary from the [latest release](https://github.com/yt-dlp/yt-dlp/releases/latest) (recommended method)
|
* Download the binary from the [latest release](https://github.com/yt-dlp/yt-dlp/releases/latest) (recommended method)
|
||||||
@@ -103,8 +115,31 @@ You can install yt-dlp using one of the following methods:
|
|||||||
* Use pip+git: `python -m pip install --upgrade git+https://github.com/yt-dlp/yt-dlp.git@release`
|
* Use pip+git: `python -m pip install --upgrade git+https://github.com/yt-dlp/yt-dlp.git@release`
|
||||||
* Install master branch: `python -m pip install --upgrade git+https://github.com/yt-dlp/yt-dlp`
|
* Install master branch: `python -m pip install --upgrade git+https://github.com/yt-dlp/yt-dlp`
|
||||||
|
|
||||||
|
UNIX users (Linux, macOS, BSD) can also install the [latest release](https://github.com/yt-dlp/yt-dlp/releases/latest) one of the following ways:
|
||||||
|
|
||||||
|
```
|
||||||
|
sudo curl -L https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp -o /usr/local/bin/yt-dlp
|
||||||
|
sudo chmod a+rx /usr/local/bin/yt-dlp
|
||||||
|
```
|
||||||
|
|
||||||
|
```
|
||||||
|
sudo wget https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp -O /usr/local/bin/yt-dlp
|
||||||
|
sudo chmod a+rx /usr/local/bin/yt-dlp
|
||||||
|
```
|
||||||
|
|
||||||
|
```
|
||||||
|
sudo aria2c https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp -o /usr/local/bin/yt-dlp
|
||||||
|
sudo chmod a+rx /usr/local/bin/yt-dlp
|
||||||
|
```
|
||||||
|
|
||||||
|
### DEPENDANCIES
|
||||||
|
|
||||||
|
Python versions 2.6, 2.7, or 3.2+ are currently supported. However, 3.2+ is strongly recommended and python2 support will be deprecated in the future.
|
||||||
|
|
||||||
|
Although there are no required dependancies, `ffmpeg` and `ffprobe` are highly recommended. Other optional dependancies are `sponskrub`, `AtomicParsley`, `mutagen`, `pycryptodome` and any of the supported external downloaders. Note that the windows releases are already built with the python interpreter, mutagen and pycryptodome included.
|
||||||
|
|
||||||
### UPDATE
|
### UPDATE
|
||||||
Starting from version `2021.02.09`, you can use `yt-dlp -U` to update if you are using the provided release.
|
You can use `yt-dlp -U` to update if you are using the provided release.
|
||||||
If you are using `pip`, simply re-run the same command that was used to install the program.
|
If you are using `pip`, simply re-run the same command that was used to install the program.
|
||||||
|
|
||||||
### COMPILE
|
### COMPILE
|
||||||
@@ -122,17 +157,14 @@ You can also build the executable without any version info or metadata by using:
|
|||||||
|
|
||||||
**For Unix**:
|
**For Unix**:
|
||||||
You will need the required build tools: `python`, `make` (GNU), `pandoc`, `zip`, `nosetests`
|
You will need the required build tools: `python`, `make` (GNU), `pandoc`, `zip`, `nosetests`
|
||||||
Then simply run `make`. You can also run `make youtube_dlc` instead to compile only the binary without updating any of the additional files
|
Then simply run `make`. You can also run `make yt-dlp` instead to compile only the binary without updating any of the additional files
|
||||||
|
|
||||||
**Note**: In either platform, `devscripts\update-version.py` can be used to automatically update the version number
|
**Note**: In either platform, `devscripts\update-version.py` can be used to automatically update the version number
|
||||||
|
|
||||||
# DESCRIPTION
|
# USAGE AND OPTIONS
|
||||||
**yt-dlp** is a command-line program to download videos from youtube.com many other [video platforms](docs/supportedsites.md). It requires the Python interpreter, version 2.6, 2.7, or 3.2+, and it is not platform specific. It should work on your Unix box, on Windows or on macOS. It is released to the public domain, which means you can modify it, redistribute it or use it however you like.
|
|
||||||
|
|
||||||
yt-dlp [OPTIONS] [--] URL [URL...]
|
yt-dlp [OPTIONS] [--] URL [URL...]
|
||||||
|
|
||||||
|
|
||||||
# OPTIONS
|
|
||||||
`Ctrl+F` is your friend :D
|
`Ctrl+F` is your friend :D
|
||||||
<!-- Autogenerated -->
|
<!-- Autogenerated -->
|
||||||
|
|
||||||
@@ -177,7 +209,7 @@ Then simply run `make`. You can also run `make youtube_dlc` instead to compile o
|
|||||||
only list them
|
only list them
|
||||||
--no-flat-playlist Extract the videos of a playlist
|
--no-flat-playlist Extract the videos of a playlist
|
||||||
--mark-watched Mark videos watched (YouTube only)
|
--mark-watched Mark videos watched (YouTube only)
|
||||||
--no-mark-watched Do not mark videos watched
|
--no-mark-watched Do not mark videos watched (default)
|
||||||
--no-colors Do not emit color codes in output
|
--no-colors Do not emit color codes in output
|
||||||
|
|
||||||
## Network Options:
|
## Network Options:
|
||||||
@@ -191,7 +223,7 @@ Then simply run `make`. You can also run `make youtube_dlc` instead to compile o
|
|||||||
-4, --force-ipv4 Make all connections via IPv4
|
-4, --force-ipv4 Make all connections via IPv4
|
||||||
-6, --force-ipv6 Make all connections via IPv6
|
-6, --force-ipv6 Make all connections via IPv6
|
||||||
|
|
||||||
## Geo Restriction:
|
## Geo-restriction:
|
||||||
--geo-verification-proxy URL Use this proxy to verify the IP address for
|
--geo-verification-proxy URL Use this proxy to verify the IP address for
|
||||||
some geo-restricted sites. The default
|
some geo-restricted sites. The default
|
||||||
proxy specified by --proxy (or none, if the
|
proxy specified by --proxy (or none, if the
|
||||||
@@ -245,7 +277,7 @@ Then simply run `make`. You can also run `make youtube_dlc` instead to compile o
|
|||||||
"OUTPUT TEMPLATE" for a list of available
|
"OUTPUT TEMPLATE" for a list of available
|
||||||
keys) to match if the key is present, !key
|
keys) to match if the key is present, !key
|
||||||
to check if the key is not present,
|
to check if the key is not present,
|
||||||
key>NUMBER (like "comment_count > 12", also
|
key>NUMBER (like "view_count > 12", also
|
||||||
works with >=, <, <=, !=, =) to compare
|
works with >=, <, <=, !=, =) to compare
|
||||||
against a number, key = 'LITERAL' (like
|
against a number, key = 'LITERAL' (like
|
||||||
"uploader = 'Mike Smith'", also works with
|
"uploader = 'Mike Smith'", also works with
|
||||||
@@ -275,11 +307,11 @@ Then simply run `make`. You can also run `make youtube_dlc` instead to compile o
|
|||||||
--break-on-reject Stop the download process when encountering
|
--break-on-reject Stop the download process when encountering
|
||||||
a file that has been filtered out
|
a file that has been filtered out
|
||||||
--no-download-archive Do not use archive file (default)
|
--no-download-archive Do not use archive file (default)
|
||||||
--include-ads Download advertisements as well
|
|
||||||
(experimental)
|
|
||||||
--no-include-ads Do not download advertisements (default)
|
|
||||||
|
|
||||||
## Download Options:
|
## Download Options:
|
||||||
|
-N, --concurrent-fragments N Number of fragments of a dash/hlsnative
|
||||||
|
video that should be download concurrently
|
||||||
|
(default is 1)
|
||||||
-r, --limit-rate RATE Maximum download rate in bytes per second
|
-r, --limit-rate RATE Maximum download rate in bytes per second
|
||||||
(e.g. 50K or 4.2M)
|
(e.g. 50K or 4.2M)
|
||||||
-R, --retries RETRIES Number of retries (default is 10), or
|
-R, --retries RETRIES Number of retries (default is 10), or
|
||||||
@@ -313,17 +345,28 @@ Then simply run `make`. You can also run `make youtube_dlc` instead to compile o
|
|||||||
--playlist-random Download playlist videos in random order
|
--playlist-random Download playlist videos in random order
|
||||||
--xattr-set-filesize Set file xattribute ytdl.filesize with
|
--xattr-set-filesize Set file xattribute ytdl.filesize with
|
||||||
expected file size
|
expected file size
|
||||||
--hls-prefer-native Use the native HLS downloader instead of
|
--hls-use-mpegts Use the mpegts container for HLS videos;
|
||||||
ffmpeg
|
allowing some players to play the video
|
||||||
--hls-prefer-ffmpeg Use ffmpeg instead of the native HLS
|
while downloading, and reducing the chance
|
||||||
downloader
|
of file corruption if download is
|
||||||
--hls-use-mpegts Use the mpegts container for HLS videos,
|
interrupted. This is enabled by default for
|
||||||
allowing to play the video while
|
live streams
|
||||||
downloading (some players may not be able
|
--no-hls-use-mpegts Do not use the mpegts container for HLS
|
||||||
to play it)
|
videos. This is default when not
|
||||||
--external-downloader NAME Use the specified external downloader.
|
downloading live streams
|
||||||
Currently supports aria2c, avconv, axel,
|
--downloader [PROTO:]NAME Name or path of the external downloader to
|
||||||
curl, ffmpeg, httpie, wget
|
use (optionally) prefixed by the protocols
|
||||||
|
(http, ftp, m3u8, dash, rstp, rtmp, mms) to
|
||||||
|
use it for. Currently supports native,
|
||||||
|
aria2c, avconv, axel, curl, ffmpeg, httpie,
|
||||||
|
wget (Recommended: aria2c). You can use
|
||||||
|
this option multiple times to set different
|
||||||
|
downloaders for different protocols. For
|
||||||
|
example, --downloader aria2c --downloader
|
||||||
|
"dash,m3u8:native" will use aria2c for
|
||||||
|
http/ftp downloads, and the native
|
||||||
|
downloader for dash/m3u8 downloads
|
||||||
|
(Alias: --external-downloader)
|
||||||
--downloader-args NAME:ARGS Give these arguments to the external
|
--downloader-args NAME:ARGS Give these arguments to the external
|
||||||
downloader. Specify the downloader name and
|
downloader. Specify the downloader name and
|
||||||
the arguments separated by a colon ":". You
|
the arguments separated by a colon ":". You
|
||||||
@@ -335,7 +378,7 @@ Then simply run `make`. You can also run `make youtube_dlc` instead to compile o
|
|||||||
stdin), one URL per line. Lines starting
|
stdin), one URL per line. Lines starting
|
||||||
with '#', ';' or ']' are considered as
|
with '#', ';' or ']' are considered as
|
||||||
comments and ignored
|
comments and ignored
|
||||||
-P, --paths TYPE:PATH The paths where the files should be
|
-P, --paths TYPES:PATH The paths where the files should be
|
||||||
downloaded. Specify the type of file and
|
downloaded. Specify the type of file and
|
||||||
the path separated by a colon ":". All the
|
the path separated by a colon ":". All the
|
||||||
same types as --output are supported.
|
same types as --output are supported.
|
||||||
@@ -346,7 +389,7 @@ Then simply run `make`. You can also run `make youtube_dlc` instead to compile o
|
|||||||
home path after download is finished. This
|
home path after download is finished. This
|
||||||
option is ignored if --output is an
|
option is ignored if --output is an
|
||||||
absolute path
|
absolute path
|
||||||
-o, --output [TYPE:]TEMPLATE Output filename template, see "OUTPUT
|
-o, --output [TYPES:]TEMPLATE Output filename template; see "OUTPUT
|
||||||
TEMPLATE" for details
|
TEMPLATE" for details
|
||||||
--output-na-placeholder TEXT Placeholder value for unavailable meta
|
--output-na-placeholder TEXT Placeholder value for unavailable meta
|
||||||
fields in output filename template
|
fields in output filename template
|
||||||
@@ -396,8 +439,15 @@ Then simply run `make`. You can also run `make youtube_dlc` instead to compile o
|
|||||||
--write-description etc. (default)
|
--write-description etc. (default)
|
||||||
--no-write-playlist-metafiles Do not write playlist metadata when using
|
--no-write-playlist-metafiles Do not write playlist metadata when using
|
||||||
--write-info-json, --write-description etc.
|
--write-info-json, --write-description etc.
|
||||||
|
--clean-infojson Remove some private fields such as
|
||||||
|
filenames from the infojson. Note that it
|
||||||
|
could still contain some personal
|
||||||
|
information (default)
|
||||||
|
--no-clean-infojson Write all fields to the infojson
|
||||||
--get-comments Retrieve video comments to be placed in the
|
--get-comments Retrieve video comments to be placed in the
|
||||||
.info.json file
|
.info.json file. The comments are fetched
|
||||||
|
even without this option if the extraction
|
||||||
|
is known to be quick
|
||||||
--load-info-json FILE JSON file containing the video information
|
--load-info-json FILE JSON file containing the video information
|
||||||
(created with the "--write-info-json"
|
(created with the "--write-info-json"
|
||||||
option)
|
option)
|
||||||
@@ -415,7 +465,7 @@ Then simply run `make`. You can also run `make youtube_dlc` instead to compile o
|
|||||||
--no-cache-dir Disable filesystem caching
|
--no-cache-dir Disable filesystem caching
|
||||||
--rm-cache-dir Delete all filesystem cache files
|
--rm-cache-dir Delete all filesystem cache files
|
||||||
|
|
||||||
## Thumbnail Images:
|
## Thumbnail Options:
|
||||||
--write-thumbnail Write thumbnail image to disk
|
--write-thumbnail Write thumbnail image to disk
|
||||||
--no-write-thumbnail Do not write thumbnail image to disk
|
--no-write-thumbnail Do not write thumbnail image to disk
|
||||||
(default)
|
(default)
|
||||||
@@ -437,7 +487,8 @@ Then simply run `make`. You can also run `make youtube_dlc` instead to compile o
|
|||||||
--no-warnings Ignore warnings
|
--no-warnings Ignore warnings
|
||||||
-s, --simulate Do not download the video and do not write
|
-s, --simulate Do not download the video and do not write
|
||||||
anything to disk
|
anything to disk
|
||||||
--skip-download Do not download the video
|
--skip-download Do not download the video but write all
|
||||||
|
related files (Alias: --no-download)
|
||||||
-g, --get-url Simulate, quiet but print URL
|
-g, --get-url Simulate, quiet but print URL
|
||||||
-e, --get-title Simulate, quiet but print title
|
-e, --get-title Simulate, quiet but print title
|
||||||
--get-id Simulate, quiet but print id
|
--get-id Simulate, quiet but print id
|
||||||
@@ -474,7 +525,7 @@ Then simply run `make`. You can also run `make youtube_dlc` instead to compile o
|
|||||||
--encoding ENCODING Force the specified encoding (experimental)
|
--encoding ENCODING Force the specified encoding (experimental)
|
||||||
--no-check-certificate Suppress HTTPS certificate validation
|
--no-check-certificate Suppress HTTPS certificate validation
|
||||||
--prefer-insecure Use an unencrypted connection to retrieve
|
--prefer-insecure Use an unencrypted connection to retrieve
|
||||||
information about the video. (Currently
|
information about the video (Currently
|
||||||
supported only for YouTube)
|
supported only for YouTube)
|
||||||
--user-agent UA Specify a custom user agent
|
--user-agent UA Specify a custom user agent
|
||||||
--referer URL Specify a custom referer, use if the video
|
--referer URL Specify a custom referer, use if the video
|
||||||
@@ -485,17 +536,16 @@ Then simply run `make`. You can also run `make youtube_dlc` instead to compile o
|
|||||||
--bidi-workaround Work around terminals that lack
|
--bidi-workaround Work around terminals that lack
|
||||||
bidirectional text support. Requires bidiv
|
bidirectional text support. Requires bidiv
|
||||||
or fribidi executable in PATH
|
or fribidi executable in PATH
|
||||||
|
--sleep-requests SECONDS Number of seconds to sleep between requests
|
||||||
|
during data extraction
|
||||||
--sleep-interval SECONDS Number of seconds to sleep before each
|
--sleep-interval SECONDS Number of seconds to sleep before each
|
||||||
download when used alone or a lower bound
|
download. This is the minimum time to sleep
|
||||||
of a range for randomized sleep before each
|
when used along with --max-sleep-interval
|
||||||
download (minimum possible number of
|
(Alias: --min-sleep-interval)
|
||||||
seconds to sleep) when used along with
|
--max-sleep-interval SECONDS Maximum number of seconds to sleep. Can
|
||||||
--max-sleep-interval
|
only be used along with --min-sleep-interval
|
||||||
--max-sleep-interval SECONDS Upper bound of a range for randomized sleep
|
--sleep-subtitles SECONDS Number of seconds to sleep before each
|
||||||
before each download (maximum possible
|
subtitle download
|
||||||
number of seconds to sleep). Must only be
|
|
||||||
used along with --min-sleep-interval
|
|
||||||
--sleep-subtitles SECONDS Enforce sleep interval on subtitles as well
|
|
||||||
|
|
||||||
## Video Format Options:
|
## Video Format Options:
|
||||||
-f, --format FORMAT Video format code, see "FORMAT SELECTION"
|
-f, --format FORMAT Video format code, see "FORMAT SELECTION"
|
||||||
@@ -516,7 +566,6 @@ Then simply run `make`. You can also run `make youtube_dlc` instead to compile o
|
|||||||
into a single file
|
into a single file
|
||||||
--no-audio-multistreams Only one audio stream is downloaded for
|
--no-audio-multistreams Only one audio stream is downloaded for
|
||||||
each output file (default)
|
each output file (default)
|
||||||
--all-formats Download all available video formats
|
|
||||||
--prefer-free-formats Prefer video formats with free containers
|
--prefer-free-formats Prefer video formats with free containers
|
||||||
over non-free ones of same quality. Use
|
over non-free ones of same quality. Use
|
||||||
with "-S ext" to strictly prefer free
|
with "-S ext" to strictly prefer free
|
||||||
@@ -543,16 +592,16 @@ Then simply run `make`. You can also run `make youtube_dlc` instead to compile o
|
|||||||
--write-subs Write subtitle file
|
--write-subs Write subtitle file
|
||||||
--no-write-subs Do not write subtitle file (default)
|
--no-write-subs Do not write subtitle file (default)
|
||||||
--write-auto-subs Write automatically generated subtitle file
|
--write-auto-subs Write automatically generated subtitle file
|
||||||
(YouTube only)
|
(Alias: --write-automatic-subs)
|
||||||
--no-write-auto-subs Do not write automatically generated
|
--no-write-auto-subs Do not write auto-generated subtitles
|
||||||
subtitle file (default)
|
(default) (Alias: --no-write-automatic-subs)
|
||||||
--all-subs Download all the available subtitles of the
|
--all-subs Download all the available subtitles of the
|
||||||
video
|
video
|
||||||
--list-subs List all available subtitles for the video
|
--list-subs List all available subtitles for the video
|
||||||
--sub-format FORMAT Subtitle format, accepts formats
|
--sub-format FORMAT Subtitle format, accepts formats
|
||||||
preference, for example: "srt" or
|
preference, for example: "srt" or
|
||||||
"ass/srt/best"
|
"ass/srt/best"
|
||||||
--sub-lang LANGS Languages of the subtitles to download
|
--sub-langs LANGS Languages of the subtitles to download
|
||||||
(optional) separated by commas, use --list-
|
(optional) separated by commas, use --list-
|
||||||
subs for available language tags
|
subs for available language tags
|
||||||
|
|
||||||
@@ -563,8 +612,6 @@ Then simply run `make`. You can also run `make youtube_dlc` instead to compile o
|
|||||||
-2, --twofactor TWOFACTOR Two-factor authentication code
|
-2, --twofactor TWOFACTOR Two-factor authentication code
|
||||||
-n, --netrc Use .netrc authentication data
|
-n, --netrc Use .netrc authentication data
|
||||||
--video-password PASSWORD Video password (vimeo, youku)
|
--video-password PASSWORD Video password (vimeo, youku)
|
||||||
|
|
||||||
## Adobe Pass Options:
|
|
||||||
--ap-mso MSO Adobe Pass multiple-system operator (TV
|
--ap-mso MSO Adobe Pass multiple-system operator (TV
|
||||||
provider) identifier, use --ap-list-mso for
|
provider) identifier, use --ap-list-mso for
|
||||||
a list of available MSOs
|
a list of available MSOs
|
||||||
@@ -601,23 +648,24 @@ Then simply run `make`. You can also run `make youtube_dlc` instead to compile o
|
|||||||
Specify the postprocessor/executable name
|
Specify the postprocessor/executable name
|
||||||
and the arguments separated by a colon ":"
|
and the arguments separated by a colon ":"
|
||||||
to give the argument to the specified
|
to give the argument to the specified
|
||||||
postprocessor/executable. Supported
|
postprocessor/executable. Supported PP are:
|
||||||
postprocessors are: SponSkrub,
|
Merger, ExtractAudio, SplitChapters,
|
||||||
ExtractAudio, VideoRemuxer, VideoConvertor,
|
Metadata, EmbedSubtitle, EmbedThumbnail,
|
||||||
EmbedSubtitle, Metadata, Merger,
|
SubtitlesConvertor, ThumbnailsConvertor,
|
||||||
FixupStretched, FixupM4a, FixupM3u8,
|
VideoRemuxer, VideoConvertor, SponSkrub,
|
||||||
SubtitlesConvertor and EmbedThumbnail. The
|
FixupStretched, FixupM4a and FixupM3u8. The
|
||||||
supported executables are: SponSkrub,
|
supported executables are: AtomicParsley,
|
||||||
FFmpeg, FFprobe, and AtomicParsley. You can
|
FFmpeg, FFprobe, and SponSkrub. You can
|
||||||
also specify "PP+EXE:ARGS" to give the
|
also specify "PP+EXE:ARGS" to give the
|
||||||
arguments to the specified executable only
|
arguments to the specified executable only
|
||||||
when being used by the specified
|
when being used by the specified
|
||||||
postprocessor. Additionally, for
|
postprocessor. Additionally, for
|
||||||
ffmpeg/ffprobe, a number can be appended to
|
ffmpeg/ffprobe, "_i"/"_o" can be appended
|
||||||
the exe name seperated by "_i" to pass the
|
to the prefix optionally followed by a
|
||||||
argument before the specified input file.
|
number to pass the argument before the
|
||||||
Eg: --ppa "Merger+ffmpeg_i1:-v quiet". You
|
specified input/output file. Eg: --ppa
|
||||||
can use this option multiple times to give
|
"Merger+ffmpeg_i1:-v quiet". You can use
|
||||||
|
this option multiple times to give
|
||||||
different arguments to different
|
different arguments to different
|
||||||
postprocessors. (Alias: --ppa)
|
postprocessors. (Alias: --ppa)
|
||||||
-k, --keep-video Keep the intermediate video file on disk
|
-k, --keep-video Keep the intermediate video file on disk
|
||||||
@@ -633,20 +681,9 @@ Then simply run `make`. You can also run `make youtube_dlc` instead to compile o
|
|||||||
--no-embed-thumbnail Do not embed thumbnail (default)
|
--no-embed-thumbnail Do not embed thumbnail (default)
|
||||||
--add-metadata Write metadata to the video file
|
--add-metadata Write metadata to the video file
|
||||||
--no-add-metadata Do not write metadata (default)
|
--no-add-metadata Do not write metadata (default)
|
||||||
--parse-metadata FIELD:FORMAT Parse additional metadata like title/artist
|
--parse-metadata FROM:TO Parse additional metadata like title/artist
|
||||||
from other fields. Give field name to
|
from other fields; see "MODIFYING METADATA"
|
||||||
extract data from, and format of the field
|
for details
|
||||||
seperated by a ":". Either regular
|
|
||||||
expression with named capture groups or a
|
|
||||||
similar syntax to the output template can
|
|
||||||
also be used. The parsed parameters replace
|
|
||||||
any existing values and can be use in
|
|
||||||
output templateThis option can be used
|
|
||||||
multiple times. Example: --parse-metadata
|
|
||||||
"title:%(artist)s - %(title)s" matches a
|
|
||||||
title like "Coldplay - Paradise". Example
|
|
||||||
(regex): --parse-metadata
|
|
||||||
"description:Artist - (?P<artist>.+?)"
|
|
||||||
--xattrs Write metadata to the video file's xattrs
|
--xattrs Write metadata to the video file's xattrs
|
||||||
(using dublin core and xdg standards)
|
(using dublin core and xdg standards)
|
||||||
--fixup POLICY Automatically correct known faults of the
|
--fixup POLICY Automatically correct known faults of the
|
||||||
@@ -658,11 +695,26 @@ Then simply run `make`. You can also run `make youtube_dlc` instead to compile o
|
|||||||
path to the binary or its containing
|
path to the binary or its containing
|
||||||
directory
|
directory
|
||||||
--exec CMD Execute a command on the file after
|
--exec CMD Execute a command on the file after
|
||||||
downloading and post-processing, similar to
|
downloading and post-processing. Similar
|
||||||
find's -exec syntax. Example: --exec 'adb
|
syntax to the output template can be used
|
||||||
push {} /sdcard/Music/ && rm {}'
|
to pass any field as arguments to the
|
||||||
--convert-subs FORMAT Convert the subtitles to other format
|
command. An additional field "filepath"
|
||||||
|
that contains the final path of the
|
||||||
|
downloaded file is also available. If no
|
||||||
|
fields are passed, "%(filepath)s" is
|
||||||
|
appended to the end of the command
|
||||||
|
--convert-subs FORMAT Convert the subtitles to another format
|
||||||
(currently supported: srt|ass|vtt|lrc)
|
(currently supported: srt|ass|vtt|lrc)
|
||||||
|
(Alias: --convert-subtitles)
|
||||||
|
--convert-thumbnails FORMAT Convert the thumbnails to another format
|
||||||
|
(currently supported: jpg)
|
||||||
|
--split-chapters Split video into multiple files based on
|
||||||
|
internal chapters. The "chapter:" prefix
|
||||||
|
can be used with "--paths" and "--output"
|
||||||
|
to set the output filename for the split
|
||||||
|
files. See "OUTPUT TEMPLATE" for details
|
||||||
|
--no-split-chapters Do not split video based on chapters
|
||||||
|
(default)
|
||||||
|
|
||||||
## SponSkrub (SponsorBlock) Options:
|
## SponSkrub (SponsorBlock) Options:
|
||||||
[SponSkrub](https://github.com/yt-dlp/SponSkrub) is a utility to
|
[SponSkrub](https://github.com/yt-dlp/SponSkrub) is a utility to
|
||||||
@@ -686,6 +738,8 @@ Then simply run `make`. You can also run `make youtube_dlc` instead to compile o
|
|||||||
directory
|
directory
|
||||||
|
|
||||||
## Extractor Options:
|
## Extractor Options:
|
||||||
|
--extractor-retries RETRIES Number of retries for known extractor
|
||||||
|
errors (default is 3), or "infinite"
|
||||||
--allow-dynamic-mpd Process dynamic DASH manifests (default)
|
--allow-dynamic-mpd Process dynamic DASH manifests (default)
|
||||||
(Alias: --no-ignore-dynamic-mpd)
|
(Alias: --no-ignore-dynamic-mpd)
|
||||||
--ignore-dynamic-mpd Do not process dynamic DASH manifests
|
--ignore-dynamic-mpd Do not process dynamic DASH manifests
|
||||||
@@ -776,9 +830,9 @@ The `-o` option is used to indicate a template for the output file names while `
|
|||||||
|
|
||||||
**tl;dr:** [navigate me to examples](#output-template-examples).
|
**tl;dr:** [navigate me to examples](#output-template-examples).
|
||||||
|
|
||||||
The basic usage of `-o` is not to set any template arguments when downloading a single file, like in `yt-dlp -o funny_video.flv "https://some/video"`. However, it may contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [python string formatting operations](https://docs.python.org/2/library/stdtypes.html#string-formatting). For example, `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by formatting operations. Date/time fields can also be formatted according to [strftime formatting](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes) by specifying it inside the parantheses seperated from the field name using a `>`. For example, `%(duration>%H-%M-%S)s`.
|
The basic usage of `-o` is not to set any template arguments when downloading a single file, like in `yt-dlp -o funny_video.flv "https://some/video"` (hard-coding file extension like this is not recommended). However, it may contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [python string formatting operations](https://docs.python.org/2/library/stdtypes.html#string-formatting). For example, `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by formatting operations. Date/time fields can also be formatted according to [strftime formatting](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes) by specifying it inside the parantheses separated from the field name using a `>`. For example, `%(duration>%H-%M-%S)s`.
|
||||||
|
|
||||||
Additionally, you can set different output templates for the various metadata files seperately from the general output template by specifying the type of file followed by the template seperated by a colon ":". The different filetypes supported are `subtitle|thumbnail|description|annotation|infojson|pl_description|pl_infojson`. For example, `-o '%(title)s.%(ext)s' -o 'thumbnail:%(title)s\%(title)s.%(ext)s'` will put the thumbnails in a folder with the same name as the video.
|
Additionally, you can set different output templates for the various metadata files separately from the general output template by specifying the type of file followed by the template separated by a colon `:`. The different filetypes supported are `subtitle`, `thumbnail`, `description`, `annotation`, `infojson`, `pl_description`, `pl_infojson`, `chapter`. For example, `-o '%(title)s.%(ext)s' -o 'thumbnail:%(title)s\%(title)s.%(ext)s'` will put the thumbnails in a folder with the same name as the video.
|
||||||
|
|
||||||
The available fields are:
|
The available fields are:
|
||||||
|
|
||||||
@@ -787,6 +841,7 @@ The available fields are:
|
|||||||
- `url` (string): Video URL
|
- `url` (string): Video URL
|
||||||
- `ext` (string): Video filename extension
|
- `ext` (string): Video filename extension
|
||||||
- `alt_title` (string): A secondary title of the video
|
- `alt_title` (string): A secondary title of the video
|
||||||
|
- `description` (string): The description of the video
|
||||||
- `display_id` (string): An alternative identifier for the video
|
- `display_id` (string): An alternative identifier for the video
|
||||||
- `uploader` (string): Full name of the video uploader
|
- `uploader` (string): Full name of the video uploader
|
||||||
- `license` (string): License name the video is licensed under
|
- `license` (string): License name the video is licensed under
|
||||||
@@ -805,11 +860,12 @@ The available fields are:
|
|||||||
- `dislike_count` (numeric): Number of negative ratings of the video
|
- `dislike_count` (numeric): Number of negative ratings of the video
|
||||||
- `repost_count` (numeric): Number of reposts of the video
|
- `repost_count` (numeric): Number of reposts of the video
|
||||||
- `average_rating` (numeric): Average rating give by users, the scale used depends on the webpage
|
- `average_rating` (numeric): Average rating give by users, the scale used depends on the webpage
|
||||||
- `comment_count` (numeric): Number of comments on the video
|
- `comment_count` (numeric): Number of comments on the video (For some extractors, comments are only downloaded at the end, and so this field cannot be used)
|
||||||
- `age_limit` (numeric): Age restriction for the video (years)
|
- `age_limit` (numeric): Age restriction for the video (years)
|
||||||
- `is_live` (boolean): Whether this video is a live stream or a fixed-length video
|
- `is_live` (boolean): Whether this video is a live stream or a fixed-length video
|
||||||
- `was_live` (boolean): Whether this video was originally a live stream
|
- `was_live` (boolean): Whether this video was originally a live stream
|
||||||
- `playable_in_embed` (string): Whether this video is allowed to play in embedded players on other sites
|
- `playable_in_embed` (string): Whether this video is allowed to play in embedded players on other sites
|
||||||
|
- `availability` (string): Whether the video is 'private', 'premium_only', 'subscriber_only', 'needs_auth', 'unlisted' or 'public'
|
||||||
- `start_time` (numeric): Time in seconds where the reproduction should start, as specified in the URL
|
- `start_time` (numeric): Time in seconds where the reproduction should start, as specified in the URL
|
||||||
- `end_time` (numeric): Time in seconds where the reproduction should end, as specified in the URL
|
- `end_time` (numeric): Time in seconds where the reproduction should end, as specified in the URL
|
||||||
- `format` (string): A human-readable description of the format
|
- `format` (string): A human-readable description of the format
|
||||||
@@ -869,6 +925,13 @@ Available for the media that is a track or a part of a music album:
|
|||||||
- `disc_number` (numeric): Number of the disc or other physical medium the track belongs to
|
- `disc_number` (numeric): Number of the disc or other physical medium the track belongs to
|
||||||
- `release_year` (numeric): Year (YYYY) when the album was released
|
- `release_year` (numeric): Year (YYYY) when the album was released
|
||||||
|
|
||||||
|
Available for `chapter:` prefix when using `--split-chapters` for videos with internal chapters:
|
||||||
|
|
||||||
|
- `section_title` (string): Title of the chapter
|
||||||
|
- `section_number` (numeric): Number of the chapter within the file
|
||||||
|
- `section_start` (numeric): Start time of the chapter in seconds
|
||||||
|
- `section_end` (numeric): End time of the chapter in seconds
|
||||||
|
|
||||||
Each aforementioned sequence when referenced in an output template will be replaced by the actual value corresponding to the sequence name. Note that some of the sequences are not guaranteed to be present since they depend on the metadata obtained by a particular extractor. Such sequences will be replaced with placeholder value provided with `--output-na-placeholder` (`NA` by default).
|
Each aforementioned sequence when referenced in an output template will be replaced by the actual value corresponding to the sequence name. Note that some of the sequences are not guaranteed to be present since they depend on the metadata obtained by a particular extractor. Such sequences will be replaced with placeholder value provided with `--output-na-placeholder` (`NA` by default).
|
||||||
|
|
||||||
For example for `-o %(title)s-%(id)s.%(ext)s` and an mp4 video with title `yt-dlp test video` and id `BaW_jenozKcj`, this will result in a `yt-dlp test video-BaW_jenozKcj.mp4` file created in the current directory.
|
For example for `-o %(title)s-%(id)s.%(ext)s` and an mp4 video with title `yt-dlp test video` and id `BaW_jenozKcj`, this will result in a `yt-dlp test video-BaW_jenozKcj.mp4` file created in the current directory.
|
||||||
@@ -901,7 +964,7 @@ youtube-dl_test_video_.mp4 # A simple file name
|
|||||||
# Download YouTube playlist videos in separate directory indexed by video order in a playlist
|
# Download YouTube playlist videos in separate directory indexed by video order in a playlist
|
||||||
$ yt-dlp -o '%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s' https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re
|
$ yt-dlp -o '%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s' https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re
|
||||||
|
|
||||||
# Download YouTube playlist videos in seperate directories according to their uploaded year
|
# Download YouTube playlist videos in separate directories according to their uploaded year
|
||||||
$ yt-dlp -o '%(upload_date>%Y)s/%(title)s.%(ext)s' https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re
|
$ yt-dlp -o '%(upload_date>%Y)s/%(title)s.%(ext)s' https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re
|
||||||
|
|
||||||
# Download all playlists of YouTube channel/user keeping each playlist in separate directory:
|
# Download all playlists of YouTube channel/user keeping each playlist in separate directory:
|
||||||
@@ -922,7 +985,7 @@ $ yt-dlp -o - BaW_jenozKc
|
|||||||
By default, yt-dlp tries to download the best available quality if you **don't** pass any options.
|
By default, yt-dlp tries to download the best available quality if you **don't** pass any options.
|
||||||
This is generally equivalent to using `-f bestvideo*+bestaudio/best`. However, if multiple audiostreams is enabled (`--audio-multistreams`), the default format changes to `-f bestvideo+bestaudio/best`. Similarly, if ffmpeg is unavailable, or if you use yt-dlp to stream to `stdout` (`-o -`), the default becomes `-f best/bestvideo+bestaudio`.
|
This is generally equivalent to using `-f bestvideo*+bestaudio/best`. However, if multiple audiostreams is enabled (`--audio-multistreams`), the default format changes to `-f bestvideo+bestaudio/best`. Similarly, if ffmpeg is unavailable, or if you use yt-dlp to stream to `stdout` (`-o -`), the default becomes `-f best/bestvideo+bestaudio`.
|
||||||
|
|
||||||
The general syntax for format selection is `--f FORMAT` (or `--format FORMAT`) where `FORMAT` is a *selector expression*, i.e. an expression that describes format or formats you would like to download.
|
The general syntax for format selection is `-f FORMAT` (or `--format FORMAT`) where `FORMAT` is a *selector expression*, i.e. an expression that describes format or formats you would like to download.
|
||||||
|
|
||||||
**tl;dr:** [navigate me to examples](#format-selection-examples).
|
**tl;dr:** [navigate me to examples](#format-selection-examples).
|
||||||
|
|
||||||
@@ -933,8 +996,9 @@ You can also use a file extension (currently `3gp`, `aac`, `flv`, `m4a`, `mp3`,
|
|||||||
You can also use special names to select particular edge case formats:
|
You can also use special names to select particular edge case formats:
|
||||||
|
|
||||||
- `all`: Select all formats
|
- `all`: Select all formats
|
||||||
- `b*`, `best*`: Select the best quality format irrespective of whether it contains video or audio.
|
- `mergeall`: Select and merge all formats (Must be used with `--audio-multistreams`, `--video-multistreams` or both)
|
||||||
- `w*`, `worst*`: Select the worst quality format irrespective of whether it contains video or audio.
|
- `b*`, `best*`: Select the best quality format irrespective of whether it contains video or audio
|
||||||
|
- `w*`, `worst*`: Select the worst quality format irrespective of whether it contains video or audio
|
||||||
- `b`, `best`: Select the best quality format that contains both video and audio. Equivalent to `best*[vcodec!=none][acodec!=none]`
|
- `b`, `best`: Select the best quality format that contains both video and audio. Equivalent to `best*[vcodec!=none][acodec!=none]`
|
||||||
- `w`, `worst`: Select the worst quality format that contains both video and audio. Equivalent to `worst*[vcodec!=none][acodec!=none]`
|
- `w`, `worst`: Select the worst quality format that contains both video and audio. Equivalent to `worst*[vcodec!=none][acodec!=none]`
|
||||||
- `bv`, `bestvideo`: Select the best quality video-only format. Equivalent to `best*[acodec=none]`
|
- `bv`, `bestvideo`: Select the best quality video-only format. Equivalent to `best*[acodec=none]`
|
||||||
@@ -946,7 +1010,9 @@ You can also use special names to select particular edge case formats:
|
|||||||
- `ba*`, `bestaudio*`: Select the best quality format that contains audio. It may also contain video. Equivalent to `best*[acodec!=none]`
|
- `ba*`, `bestaudio*`: Select the best quality format that contains audio. It may also contain video. Equivalent to `best*[acodec!=none]`
|
||||||
- `wa*`, `worstaudio*`: Select the worst quality format that contains audio. It may also contain video. Equivalent to `worst*[acodec!=none]`
|
- `wa*`, `worstaudio*`: Select the worst quality format that contains audio. It may also contain video. Equivalent to `worst*[acodec!=none]`
|
||||||
|
|
||||||
For example, to download the worst quality video-only format you can use `-f worstvideo`. It is however recomended to never actually use `worst` and related options. When your format selector is `worst`, the format which is worst in all respects is selected. Most of the time, what you actually want is the video with the smallest filesize instead. So it is generally better to use `-f best -S +size,+br,+res,+fps` instead of `-f worst`. See [sorting formats](#sorting-formats) for more details.
|
For example, to download the worst quality video-only format you can use `-f worstvideo`. It is however recomended not to use `worst` and related options. When your format selector is `worst`, the format which is worst in all respects is selected. Most of the time, what you actually want is the video with the smallest filesize instead. So it is generally better to use `-f best -S +size,+br,+res,+fps` instead of `-f worst`. See [sorting formats](#sorting-formats) for more details.
|
||||||
|
|
||||||
|
You can select the n'th best format of a type by using `best<type>.<n>`. For example, `best.2` will select the 2nd best combined format. Similarly, `bv*.3` will select the 3rd best format that contains a video stream.
|
||||||
|
|
||||||
If you want to download multiple videos and they don't have the same formats available, you can specify the order of preference using slashes. Note that formats on the left hand side are preferred, for example `-f 22/17/18` will download format 22 if it's available, otherwise it will download format 17 if it's available, otherwise it will download format 18 if it's available, otherwise it will complain that no suitable formats are available for download.
|
If you want to download multiple videos and they don't have the same formats available, you can specify the order of preference using slashes. Note that formats on the left hand side are preferred, for example `-f 22/17/18` will download format 22 if it's available, otherwise it will download format 17 if it's available, otherwise it will download format 18 if it's available, otherwise it will complain that no suitable formats are available for download.
|
||||||
|
|
||||||
@@ -1017,7 +1083,7 @@ You can change the criteria for being considered the `best` by using `-S` (`--fo
|
|||||||
- `br`: Equivalent to using `tbr,vbr,abr`
|
- `br`: Equivalent to using `tbr,vbr,abr`
|
||||||
- `asr`: Audio sample rate in Hz
|
- `asr`: Audio sample rate in Hz
|
||||||
|
|
||||||
Note that any other **numerical** field made available by the extractor can also be used. All fields, unless specified otherwise, are sorted in decending order. To reverse this, prefix the field with a `+`. Eg: `+res` prefers format with the smallest resolution. Additionally, you can suffix a prefered value for the fields, seperated by a `:`. Eg: `res:720` prefers larger videos, but no larger than 720p and the smallest video if there are no videos less than 720p. For `codec` and `ext`, you can provide two prefered values, the first for video and the second for audio. Eg: `+codec:avc:m4a` (equivalent to `+vcodec:avc,+acodec:m4a`) sets the video codec preference to `h264` > `h265` > `vp9` > `vp9.2` > `av01` > `vp8` > `h263` > `theora` and audio codec preference to `mp4a` > `aac` > `vorbis` > `opus` > `mp3` > `ac3` > `dts`. You can also make the sorting prefer the nearest values to the provided by using `~` as the delimiter. Eg: `filesize~1G` prefers the format with filesize closest to 1 GiB.
|
Note that any other **numerical** field made available by the extractor can also be used. All fields, unless specified otherwise, are sorted in decending order. To reverse this, prefix the field with a `+`. Eg: `+res` prefers format with the smallest resolution. Additionally, you can suffix a prefered value for the fields, separated by a `:`. Eg: `res:720` prefers larger videos, but no larger than 720p and the smallest video if there are no videos less than 720p. For `codec` and `ext`, you can provide two prefered values, the first for video and the second for audio. Eg: `+codec:avc:m4a` (equivalent to `+vcodec:avc,+acodec:m4a`) sets the video codec preference to `h264` > `h265` > `vp9` > `vp9.2` > `av01` > `vp8` > `h263` > `theora` and audio codec preference to `mp4a` > `aac` > `vorbis` > `opus` > `mp3` > `ac3` > `dts`. You can also make the sorting prefer the nearest values to the provided by using `~` as the delimiter. Eg: `filesize~1G` prefers the format with filesize closest to 1 GiB.
|
||||||
|
|
||||||
The fields `hasvid`, `ie_pref`, `lang` are always given highest priority in sorting, irrespective of the user-defined order. This behaviour can be changed by using `--force-format-sort`. Apart from these, the default order used is: `quality,res,fps,codec:vp9.2,size,br,asr,proto,ext,hasaud,source,id`. Note that the extractors may override this default order, but they cannot override the user-provided order.
|
The fields `hasvid`, `ie_pref`, `lang` are always given highest priority in sorting, irrespective of the user-defined order. This behaviour can be changed by using `--force-format-sort`. Apart from these, the default order used is: `quality,res,fps,codec:vp9.2,size,br,asr,proto,ext,hasaud,source,id`. Note that the extractors may override this default order, but they cannot override the user-provided order.
|
||||||
|
|
||||||
@@ -1046,10 +1112,17 @@ $ yt-dlp
|
|||||||
# by default, bestvideo and bestaudio will have the same file name.
|
# by default, bestvideo and bestaudio will have the same file name.
|
||||||
$ yt-dlp -f 'bv,ba' -o '%(title)s.f%(format_id)s.%(ext)s'
|
$ yt-dlp -f 'bv,ba' -o '%(title)s.f%(format_id)s.%(ext)s'
|
||||||
|
|
||||||
|
# Download and merge the best format that has a video stream,
|
||||||
|
# and all audio-only formats into one file
|
||||||
|
$ yt-dlp -f 'bv*+mergeall[vcodec=none]' --audio-multistreams
|
||||||
|
|
||||||
|
# Download and merge the best format that has a video stream,
|
||||||
|
# and the best 2 audio-only formats into one file
|
||||||
|
$ yt-dlp -f 'bv*+ba+ba.2' --audio-multistreams
|
||||||
|
|
||||||
|
|
||||||
# The following examples show the old method (without -S) of format selection
|
# The following examples show the old method (without -S) of format selection
|
||||||
# and how to use -S to achieve a similar but better result
|
# and how to use -S to achieve a similar but (generally) better result
|
||||||
|
|
||||||
# Download the worst video available (old method)
|
# Download the worst video available (old method)
|
||||||
$ yt-dlp -f 'wv*+wa/w'
|
$ yt-dlp -f 'wv*+wa/w'
|
||||||
@@ -1142,11 +1215,77 @@ $ yt-dlp -S 'res:720,fps'
|
|||||||
$ yt-dlp -S '+res:480,codec,br'
|
$ yt-dlp -S '+res:480,codec,br'
|
||||||
```
|
```
|
||||||
|
|
||||||
|
# MODIFYING METADATA
|
||||||
|
|
||||||
|
The metadata obtained the the extractors can be modified by using `--parse-metadata FROM:TO`. The general syntax is to give the name of a field or a template (with similar syntax to [output template](#output-template)) to extract data from, and the format to interpret it as, separated by a colon `:`. Either a [python regular expression](https://docs.python.org/3/library/re.html#regular-expression-syntax) with named capture groups or a similar syntax to the [output template](#output-template) (only `%(field)s` formatting is supported) can be used for `TO`. The option can be used multiple times to parse and modify various fields.
|
||||||
|
|
||||||
|
Note that any field created by this can be used in the [output template](#output-template) and will also affect the media file's metadata added when using `--add-metadata`.
|
||||||
|
|
||||||
|
You can also use this to change only the metadata that is embedded in the media file. To do this, set the value of the corresponding field with a `meta_` prefix. For example, any value you set to `meta_description` field will be added to the `description` field in the file. You can use this to set a different "description" and "synopsis", for example.
|
||||||
|
|
||||||
|
## Modifying metadata examples
|
||||||
|
|
||||||
|
Note that on Windows you may need to use double quotes instead of single.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Interpret the title as "Artist - Title"
|
||||||
|
$ yt-dlp --parse-metadata 'title:%(artist)s - %(title)s'
|
||||||
|
|
||||||
|
# Regex example
|
||||||
|
$ yt-dlp --parse-metadata 'description:Artist - (?P<artist>.+)'
|
||||||
|
|
||||||
|
# Set title as "Series name S01E05"
|
||||||
|
$ yt-dlp --parse-metadata '%(series)s S%(season_number)02dE%(episode_number)02d:%(title)s'
|
||||||
|
|
||||||
|
# Set "comment" field in video metadata using description instead of webpage_url
|
||||||
|
$ yt-dlp --parse-metadata 'description:(?s)(?P<meta_comment>.+)' --add-metadata
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
# PLUGINS
|
# PLUGINS
|
||||||
|
|
||||||
Plugins are loaded from `<root-dir>/ytdlp_plugins/<type>/__init__.py`. Currently only `extractor` plugins are supported. Support for `downloader` and `postprocessor` plugins may be added in the future. See [ytdlp_plugins](ytdlp_plugins) for example.
|
Plugins are loaded from `<root-dir>/ytdlp_plugins/<type>/__init__.py`. Currently only `extractor` plugins are supported. Support for `downloader` and `postprocessor` plugins may be added in the future. See [ytdlp_plugins](ytdlp_plugins) for example.
|
||||||
|
|
||||||
**Note**: `<root-dir>` is the directory of the binary (`<root-dir>/yt-dlp`), or the root directory of the module if you are running directly from source-code (`<root dir>/yt_dlp/__main__.py`)
|
**Note**: `<root-dir>` is the directory of the binary (`<root-dir>/yt-dlp`), or the root directory of the module if you are running directly from source-code (`<root dir>/yt_dlp/__main__.py`)
|
||||||
|
|
||||||
|
# DEPRECATED OPTIONS
|
||||||
|
|
||||||
|
These are all the deprecated options and the current alternative to achieve the same effect
|
||||||
|
|
||||||
|
--cn-verification-proxy URL --geo-verification-proxy URL
|
||||||
|
--id -o "%(id)s.%(ext)s"
|
||||||
|
-A, --auto-number -o "%(autonumber)s-%(id)s.%(ext)s"
|
||||||
|
-t, --title -o "%(title)s-%(id)s.%(ext)s"
|
||||||
|
-l, --literal -o accepts literal names
|
||||||
|
--all-formats -f all
|
||||||
|
--autonumber-size NUMBER Use string formatting. Eg: %(autonumber)03d
|
||||||
|
--metadata-from-title FORMAT --parse-metadata "%(title)s:FORMAT"
|
||||||
|
--prefer-avconv avconv is no longer officially supported (Alias: --no-prefer-ffmpeg)
|
||||||
|
--prefer-ffmpeg Default (Alias: --no-prefer-avconv)
|
||||||
|
--hls-prefer-native --downloader "m3u8:native"
|
||||||
|
--hls-prefer-ffmpeg --downloader "m3u8:ffmpeg"
|
||||||
|
--avconv-location avconv is no longer officially supported
|
||||||
|
-C, --call-home Not implemented
|
||||||
|
--no-call-home Default
|
||||||
|
--include-ads Not implemented
|
||||||
|
--no-include-ads Default
|
||||||
|
--write-srt --write-subs
|
||||||
|
--no-write-srt --no-write-subs
|
||||||
|
--srt-lang LANGS --sub-langs LANGS
|
||||||
|
--prefer-unsecure --prefer-insecure
|
||||||
|
--rate-limit RATE --limit-rate RATE
|
||||||
|
--force-write-download-archive --force-write-archive
|
||||||
|
--dump-intermediate-pages --dump-pages
|
||||||
|
--dump-headers --print-traffic
|
||||||
|
--youtube-print-sig-code No longer supported
|
||||||
|
--trim-file-names LENGTH --trim-filenames LENGTH
|
||||||
|
--yes-overwrites --force-overwrites
|
||||||
|
--load-info --load-info-json
|
||||||
|
--split-tracks --split-chapters
|
||||||
|
--no-split-tracks --no-split-chapters
|
||||||
|
--sponskrub-args ARGS --ppa "sponskrub:ARGS"
|
||||||
|
--test Only used for testing extractors
|
||||||
|
|
||||||
|
|
||||||
# MORE
|
# MORE
|
||||||
For FAQ, Developer Instructions etc., see the [original README](https://github.com/ytdl-org/youtube-dl#faq)
|
For FAQ, Developer Instructions etc., see the [original README](https://github.com/ytdl-org/youtube-dl#faq)
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ import sys
|
|||||||
sys.path.insert(0, dirn(dirn((os.path.abspath(__file__)))))
|
sys.path.insert(0, dirn(dirn((os.path.abspath(__file__)))))
|
||||||
import yt_dlp
|
import yt_dlp
|
||||||
|
|
||||||
BASH_COMPLETION_FILE = "yt-dlp.bash-completion"
|
BASH_COMPLETION_FILE = "completions/bash/yt-dlp"
|
||||||
BASH_COMPLETION_TEMPLATE = "devscripts/bash-completion.in"
|
BASH_COMPLETION_TEMPLATE = "devscripts/bash-completion.in"
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ sys.path.insert(0, dirn(dirn((os.path.abspath(__file__)))))
|
|||||||
import yt_dlp
|
import yt_dlp
|
||||||
from yt_dlp.utils import shell_quote
|
from yt_dlp.utils import shell_quote
|
||||||
|
|
||||||
FISH_COMPLETION_FILE = 'yt-dlp.fish'
|
FISH_COMPLETION_FILE = 'completions/fish/yt-dlp.fish'
|
||||||
FISH_COMPLETION_TEMPLATE = 'devscripts/fish-completion.in'
|
FISH_COMPLETION_TEMPLATE = 'devscripts/fish-completion.in'
|
||||||
|
|
||||||
EXTRA_ARGS = {
|
EXTRA_ARGS = {
|
||||||
|
|||||||
@@ -61,7 +61,7 @@ if ! type pandoc >/dev/null 2>/dev/null; then echo 'ERROR: pandoc is missing'; e
|
|||||||
if ! python3 -c 'import rsa' 2>/dev/null; then echo 'ERROR: python3-rsa is missing'; exit 1; fi
|
if ! python3 -c 'import rsa' 2>/dev/null; then echo 'ERROR: python3-rsa is missing'; exit 1; fi
|
||||||
if ! python3 -c 'import wheel' 2>/dev/null; then echo 'ERROR: wheel is missing'; exit 1; fi
|
if ! python3 -c 'import wheel' 2>/dev/null; then echo 'ERROR: wheel is missing'; exit 1; fi
|
||||||
|
|
||||||
read -p "Is ChangeLog up to date? (y/n) " -n 1
|
read -p "Is Changelog up to date? (y/n) " -n 1
|
||||||
if [[ ! $REPLY =~ ^[Yy]$ ]]; then exit 1; fi
|
if [[ ! $REPLY =~ ^[Yy]$ ]]; then exit 1; fi
|
||||||
|
|
||||||
/bin/echo -e "\n### First of all, testing..."
|
/bin/echo -e "\n### First of all, testing..."
|
||||||
@@ -75,12 +75,12 @@ fi
|
|||||||
/bin/echo -e "\n### Changing version in version.py..."
|
/bin/echo -e "\n### Changing version in version.py..."
|
||||||
sed -i "s/__version__ = '.*'/__version__ = '$version'/" yt_dlp/version.py
|
sed -i "s/__version__ = '.*'/__version__ = '$version'/" yt_dlp/version.py
|
||||||
|
|
||||||
/bin/echo -e "\n### Changing version in ChangeLog..."
|
/bin/echo -e "\n### Changing version in Changelog..."
|
||||||
sed -i "s/<unreleased>/$version/" ChangeLog
|
sed -i "s/<unreleased>/$version/" Changelog.md
|
||||||
|
|
||||||
/bin/echo -e "\n### Committing documentation, templates and yt_dlp/version.py..."
|
/bin/echo -e "\n### Committing documentation, templates and yt_dlp/version.py..."
|
||||||
make README.md CONTRIBUTING.md issuetemplates supportedsites
|
make README.md CONTRIBUTING.md issuetemplates supportedsites
|
||||||
git add README.md CONTRIBUTING.md .github/ISSUE_TEMPLATE/1_broken_site.md .github/ISSUE_TEMPLATE/2_site_support_request.md .github/ISSUE_TEMPLATE/3_site_feature_request.md .github/ISSUE_TEMPLATE/4_bug_report.md .github/ISSUE_TEMPLATE/5_feature_request.md .github/ISSUE_TEMPLATE/6_question.md docs/supportedsites.md yt_dlp/version.py ChangeLog
|
git add README.md CONTRIBUTING.md .github/ISSUE_TEMPLATE/1_broken_site.md .github/ISSUE_TEMPLATE/2_site_support_request.md .github/ISSUE_TEMPLATE/3_site_feature_request.md .github/ISSUE_TEMPLATE/4_bug_report.md .github/ISSUE_TEMPLATE/5_feature_request.md .github/ISSUE_TEMPLATE/6_question.md docs/supportedsites.md yt_dlp/version.py Changelog.md
|
||||||
git commit $gpg_sign_commits -m "release $version"
|
git commit $gpg_sign_commits -m "release $version"
|
||||||
|
|
||||||
/bin/echo -e "\n### Now tagging, signing and pushing..."
|
/bin/echo -e "\n### Now tagging, signing and pushing..."
|
||||||
@@ -111,7 +111,7 @@ RELEASE_FILES="yt-dlp yt-dlp.exe yt-dlp-$version.tar.gz"
|
|||||||
for f in $RELEASE_FILES; do gpg --passphrase-repeat 5 --detach-sig "build/$version/$f"; done
|
for f in $RELEASE_FILES; do gpg --passphrase-repeat 5 --detach-sig "build/$version/$f"; done
|
||||||
|
|
||||||
ROOT=$(pwd)
|
ROOT=$(pwd)
|
||||||
python devscripts/create-github-release.py ChangeLog $version "$ROOT/build/$version"
|
python devscripts/create-github-release.py Changelog.md $version "$ROOT/build/$version"
|
||||||
|
|
||||||
ssh ytdl@yt-dl.org "sh html/update_latest.sh $version"
|
ssh ytdl@yt-dl.org "sh html/update_latest.sh $version"
|
||||||
|
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ import sys
|
|||||||
sys.path.insert(0, dirn(dirn((os.path.abspath(__file__)))))
|
sys.path.insert(0, dirn(dirn((os.path.abspath(__file__)))))
|
||||||
import yt_dlp
|
import yt_dlp
|
||||||
|
|
||||||
ZSH_COMPLETION_FILE = "yt-dlp.zsh"
|
ZSH_COMPLETION_FILE = "completions/zsh/_yt-dlp"
|
||||||
ZSH_COMPLETION_TEMPLATE = "devscripts/zsh-completion.in"
|
ZSH_COMPLETION_TEMPLATE = "devscripts/zsh-completion.in"
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
5
docs/Changelog.md
Normal file
5
docs/Changelog.md
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
---
|
||||||
|
orphan: true
|
||||||
|
---
|
||||||
|
```{include} ../Changelog.md
|
||||||
|
```
|
||||||
6
docs/LICENSE.md
Normal file
6
docs/LICENSE.md
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
---
|
||||||
|
orphan: true
|
||||||
|
---
|
||||||
|
# LICENSE
|
||||||
|
```{include} ../LICENSE
|
||||||
|
```
|
||||||
2
docs/README.md
Normal file
2
docs/README.md
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
```{include} ../README.md
|
||||||
|
```
|
||||||
23
docs/conf.py
23
docs/conf.py
@@ -7,26 +7,21 @@ import os
|
|||||||
|
|
||||||
# Allows to import yt-dlp
|
# Allows to import yt-dlp
|
||||||
sys.path.insert(0, os.path.abspath('..'))
|
sys.path.insert(0, os.path.abspath('..'))
|
||||||
from recommonmark.transform import AutoStructify
|
|
||||||
|
|
||||||
# -- General configuration ------------------------------------------------
|
# -- General configuration ------------------------------------------------
|
||||||
|
|
||||||
# The suffix of source filenames.
|
|
||||||
source_suffix = ['.rst', '.md']
|
|
||||||
|
|
||||||
# Add any Sphinx extension module names here, as strings. They can be
|
# Add any Sphinx extension module names here, as strings. They can be
|
||||||
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
|
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
|
||||||
# ones.
|
# ones.
|
||||||
extensions = [
|
extensions = [
|
||||||
'sphinx.ext.autodoc',
|
'myst_parser',
|
||||||
'recommonmark',
|
|
||||||
]
|
]
|
||||||
|
|
||||||
# Add any paths that contain templates here, relative to this directory.
|
# Add any paths that contain templates here, relative to this directory.
|
||||||
templates_path = ['_templates']
|
templates_path = ['_templates']
|
||||||
|
|
||||||
# The master toctree document.
|
# The master toctree document.
|
||||||
master_doc = 'index'
|
master_doc = 'README'
|
||||||
|
|
||||||
# General information about the project.
|
# General information about the project.
|
||||||
project = u'yt-dlp'
|
project = u'yt-dlp'
|
||||||
@@ -64,12 +59,10 @@ highlight_language = 'none'
|
|||||||
# so a file named "default.css" will overwrite the builtin "default.css".
|
# so a file named "default.css" will overwrite the builtin "default.css".
|
||||||
# html_static_path = ['_static']
|
# html_static_path = ['_static']
|
||||||
|
|
||||||
|
# Enable heading anchors
|
||||||
|
myst_heading_anchors = 4
|
||||||
|
|
||||||
def setup(app):
|
# Suppress heading warnings
|
||||||
app.add_config_value('recommonmark_config', {
|
suppress_warnings = [
|
||||||
'enable_math': False,
|
'myst.header',
|
||||||
'enable_inline_math': False,
|
]
|
||||||
'enable_eval_rst': True,
|
|
||||||
'enable_auto_toc_tree': True,
|
|
||||||
}, True)
|
|
||||||
app.add_transform(AutoStructify)
|
|
||||||
|
|||||||
@@ -1 +0,0 @@
|
|||||||
../README.md
|
|
||||||
@@ -1,2 +1 @@
|
|||||||
recommonmark>=0.6.0
|
myst-parser
|
||||||
m2r2
|
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
6
docs/ytdlp_plugins.md
Normal file
6
docs/ytdlp_plugins.md
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
---
|
||||||
|
orphan: true
|
||||||
|
---
|
||||||
|
# ytdlp_plugins
|
||||||
|
|
||||||
|
See [https://github.com/yt-dlp/yt-dlp/tree/master/ytdlp_plugins](https://github.com/yt-dlp/yt-dlp/tree/master/ytdlp_plugins).
|
||||||
7
setup.py
7
setup.py
@@ -27,8 +27,9 @@ if len(sys.argv) >= 2 and sys.argv[1] == 'py2exe':
|
|||||||
print("inv")
|
print("inv")
|
||||||
else:
|
else:
|
||||||
files_spec = [
|
files_spec = [
|
||||||
('etc/bash_completion.d', ['yt-dlp.bash-completion']),
|
('share/bash-completion/completions', ['completions/bash/yt-dlp']),
|
||||||
('etc/fish/completions', ['yt-dlp.fish']),
|
('share/zsh/site-functions', ['completions/zsh/_yt-dlp']),
|
||||||
|
('share/fish/vendor_completions.d', ['completions/fish/yt-dlp.fish']),
|
||||||
('share/doc/yt_dlp', ['README.txt']),
|
('share/doc/yt_dlp', ['README.txt']),
|
||||||
('share/man/man1', ['yt-dlp.1'])
|
('share/man/man1', ['yt-dlp.1'])
|
||||||
]
|
]
|
||||||
@@ -38,7 +39,7 @@ else:
|
|||||||
resfiles = []
|
resfiles = []
|
||||||
for fn in files:
|
for fn in files:
|
||||||
if not os.path.exists(fn):
|
if not os.path.exists(fn):
|
||||||
warnings.warn('Skipping file %s since it is not present. Type make to build all automatically generated files.' % fn)
|
warnings.warn('Skipping file %s since it is not present. Try running `make pypi-files` first.' % fn)
|
||||||
else:
|
else:
|
||||||
resfiles.append(fn)
|
resfiles.append(fn)
|
||||||
data_files.append((dirname, resfiles))
|
data_files.append((dirname, resfiles))
|
||||||
|
|||||||
1272
supportedsites.md
Normal file
1272
supportedsites.md
Normal file
File diff suppressed because it is too large
Load Diff
@@ -37,7 +37,6 @@ class TestAllURLsMatching(unittest.TestCase):
|
|||||||
assertPlaylist('PL63F0C78739B09958')
|
assertPlaylist('PL63F0C78739B09958')
|
||||||
assertTab('https://www.youtube.com/AsapSCIENCE')
|
assertTab('https://www.youtube.com/AsapSCIENCE')
|
||||||
assertTab('https://www.youtube.com/embedded')
|
assertTab('https://www.youtube.com/embedded')
|
||||||
assertTab('https://www.youtube.com/feed') # Own channel's home page
|
|
||||||
assertTab('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')
|
assertTab('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')
|
||||||
assertTab('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
|
assertTab('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
|
||||||
assertTab('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
|
assertTab('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
|
||||||
|
|||||||
@@ -14,10 +14,10 @@ from yt_dlp.postprocessor import MetadataFromFieldPP, MetadataFromTitlePP
|
|||||||
class TestMetadataFromField(unittest.TestCase):
|
class TestMetadataFromField(unittest.TestCase):
|
||||||
def test_format_to_regex(self):
|
def test_format_to_regex(self):
|
||||||
pp = MetadataFromFieldPP(None, ['title:%(title)s - %(artist)s'])
|
pp = MetadataFromFieldPP(None, ['title:%(title)s - %(artist)s'])
|
||||||
self.assertEqual(pp._data[0]['regex'], r'(?P<title>[^\r\n]+)\ \-\ (?P<artist>[^\r\n]+)')
|
self.assertEqual(pp._data[0]['regex'], r'(?P<title>.+)\ \-\ (?P<artist>.+)')
|
||||||
|
|
||||||
|
|
||||||
class TestMetadataFromTitle(unittest.TestCase):
|
class TestMetadataFromTitle(unittest.TestCase):
|
||||||
def test_format_to_regex(self):
|
def test_format_to_regex(self):
|
||||||
pp = MetadataFromTitlePP(None, '%(title)s - %(artist)s')
|
pp = MetadataFromTitlePP(None, '%(title)s - %(artist)s')
|
||||||
self.assertEqual(pp._titleregex, r'(?P<title>[^\r\n]+)\ \-\ (?P<artist>[^\r\n]+)')
|
self.assertEqual(pp._titleregex, r'(?P<title>.+)\ \-\ (?P<artist>.+)')
|
||||||
|
|||||||
@@ -23,6 +23,7 @@ from yt_dlp.utils import (
|
|||||||
clean_html,
|
clean_html,
|
||||||
clean_podcast_url,
|
clean_podcast_url,
|
||||||
date_from_str,
|
date_from_str,
|
||||||
|
datetime_from_str,
|
||||||
DateRange,
|
DateRange,
|
||||||
detect_exe_version,
|
detect_exe_version,
|
||||||
determine_ext,
|
determine_ext,
|
||||||
@@ -311,8 +312,18 @@ class TestUtil(unittest.TestCase):
|
|||||||
self.assertEqual(date_from_str('yesterday'), date_from_str('now-1day'))
|
self.assertEqual(date_from_str('yesterday'), date_from_str('now-1day'))
|
||||||
self.assertEqual(date_from_str('now+7day'), date_from_str('now+1week'))
|
self.assertEqual(date_from_str('now+7day'), date_from_str('now+1week'))
|
||||||
self.assertEqual(date_from_str('now+14day'), date_from_str('now+2week'))
|
self.assertEqual(date_from_str('now+14day'), date_from_str('now+2week'))
|
||||||
self.assertEqual(date_from_str('now+365day'), date_from_str('now+1year'))
|
self.assertEqual(date_from_str('20200229+365day'), date_from_str('20200229+1year'))
|
||||||
self.assertEqual(date_from_str('now+30day'), date_from_str('now+1month'))
|
self.assertEqual(date_from_str('20210131+28day'), date_from_str('20210131+1month'))
|
||||||
|
|
||||||
|
def test_datetime_from_str(self):
|
||||||
|
self.assertEqual(datetime_from_str('yesterday', precision='day'), datetime_from_str('now-1day', precision='auto'))
|
||||||
|
self.assertEqual(datetime_from_str('now+7day', precision='day'), datetime_from_str('now+1week', precision='auto'))
|
||||||
|
self.assertEqual(datetime_from_str('now+14day', precision='day'), datetime_from_str('now+2week', precision='auto'))
|
||||||
|
self.assertEqual(datetime_from_str('20200229+365day', precision='day'), datetime_from_str('20200229+1year', precision='auto'))
|
||||||
|
self.assertEqual(datetime_from_str('20210131+28day', precision='day'), datetime_from_str('20210131+1month', precision='auto'))
|
||||||
|
self.assertEqual(datetime_from_str('20210131+59day', precision='day'), datetime_from_str('20210131+2month', precision='auto'))
|
||||||
|
self.assertEqual(datetime_from_str('now+1day', precision='hour'), datetime_from_str('now+24hours', precision='auto'))
|
||||||
|
self.assertEqual(datetime_from_str('now+23hours', precision='hour'), datetime_from_str('now+23hours', precision='auto'))
|
||||||
|
|
||||||
def test_daterange(self):
|
def test_daterange(self):
|
||||||
_20century = DateRange("19000101", "20000101")
|
_20century = DateRange("19000101", "20000101")
|
||||||
|
|||||||
@@ -60,12 +60,14 @@ from .utils import (
|
|||||||
encode_compat_str,
|
encode_compat_str,
|
||||||
encodeFilename,
|
encodeFilename,
|
||||||
error_to_compat_str,
|
error_to_compat_str,
|
||||||
|
EntryNotInPlaylist,
|
||||||
ExistingVideoReached,
|
ExistingVideoReached,
|
||||||
expand_path,
|
expand_path,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
format_bytes,
|
format_bytes,
|
||||||
format_field,
|
format_field,
|
||||||
|
FORMAT_RE,
|
||||||
formatSeconds,
|
formatSeconds,
|
||||||
GeoRestrictedError,
|
GeoRestrictedError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
@@ -109,9 +111,17 @@ from .utils import (
|
|||||||
process_communicate_or_kill,
|
process_communicate_or_kill,
|
||||||
)
|
)
|
||||||
from .cache import Cache
|
from .cache import Cache
|
||||||
from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER, _PLUGIN_CLASSES
|
from .extractor import (
|
||||||
|
gen_extractor_classes,
|
||||||
|
get_info_extractor,
|
||||||
|
_LAZY_LOADER,
|
||||||
|
_PLUGIN_CLASSES
|
||||||
|
)
|
||||||
from .extractor.openload import PhantomJSwrapper
|
from .extractor.openload import PhantomJSwrapper
|
||||||
from .downloader import get_suitable_downloader
|
from .downloader import (
|
||||||
|
get_suitable_downloader,
|
||||||
|
shorten_protocol_name
|
||||||
|
)
|
||||||
from .downloader.rtmp import rtmpdump_version
|
from .downloader.rtmp import rtmpdump_version
|
||||||
from .postprocessor import (
|
from .postprocessor import (
|
||||||
FFmpegFixupM3u8PP,
|
FFmpegFixupM3u8PP,
|
||||||
@@ -216,6 +226,7 @@ class YoutubeDL(object):
|
|||||||
logtostderr: Log messages to stderr instead of stdout.
|
logtostderr: Log messages to stderr instead of stdout.
|
||||||
writedescription: Write the video description to a .description file
|
writedescription: Write the video description to a .description file
|
||||||
writeinfojson: Write the video description to a .info.json file
|
writeinfojson: Write the video description to a .info.json file
|
||||||
|
clean_infojson: Remove private fields from the infojson
|
||||||
writecomments: Extract video comments. This will not be written to disk
|
writecomments: Extract video comments. This will not be written to disk
|
||||||
unless writeinfojson is also given
|
unless writeinfojson is also given
|
||||||
writeannotations: Write the video annotations to a .annotations.xml file
|
writeannotations: Write the video annotations to a .annotations.xml file
|
||||||
@@ -280,10 +291,9 @@ class YoutubeDL(object):
|
|||||||
postprocessors: A list of dictionaries, each with an entry
|
postprocessors: A list of dictionaries, each with an entry
|
||||||
* key: The name of the postprocessor. See
|
* key: The name of the postprocessor. See
|
||||||
yt_dlp/postprocessor/__init__.py for a list.
|
yt_dlp/postprocessor/__init__.py for a list.
|
||||||
* _after_move: Optional. If True, run this post_processor
|
* when: When to run the postprocessor. Can be one of
|
||||||
after 'MoveFilesAfterDownload'
|
pre_process|before_dl|post_process|after_move.
|
||||||
as well as any further keyword arguments for the
|
Assumed to be 'post_process' if not given
|
||||||
postprocessor.
|
|
||||||
post_hooks: A list of functions that get called as the final step
|
post_hooks: A list of functions that get called as the final step
|
||||||
for each video file, after all postprocessors have been
|
for each video file, after all postprocessors have been
|
||||||
called. The filename will be passed as the only argument.
|
called. The filename will be passed as the only argument.
|
||||||
@@ -324,6 +334,8 @@ class YoutubeDL(object):
|
|||||||
source_address: Client-side IP address to bind to.
|
source_address: Client-side IP address to bind to.
|
||||||
call_home: Boolean, true iff we are allowed to contact the
|
call_home: Boolean, true iff we are allowed to contact the
|
||||||
yt-dlp servers for debugging. (BROKEN)
|
yt-dlp servers for debugging. (BROKEN)
|
||||||
|
sleep_interval_requests: Number of seconds to sleep between requests
|
||||||
|
during extraction
|
||||||
sleep_interval: Number of seconds to sleep before each download when
|
sleep_interval: Number of seconds to sleep before each download when
|
||||||
used alone or a lower bound of a range for randomized
|
used alone or a lower bound of a range for randomized
|
||||||
sleep before each download (minimum possible number
|
sleep before each download (minimum possible number
|
||||||
@@ -334,6 +346,7 @@ class YoutubeDL(object):
|
|||||||
Must only be used along with sleep_interval.
|
Must only be used along with sleep_interval.
|
||||||
Actual sleep time will be a random float from range
|
Actual sleep time will be a random float from range
|
||||||
[sleep_interval; max_sleep_interval].
|
[sleep_interval; max_sleep_interval].
|
||||||
|
sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
|
||||||
listformats: Print an overview of available video formats and exit.
|
listformats: Print an overview of available video formats and exit.
|
||||||
list_thumbnails: Print a table of all thumbnails and exit.
|
list_thumbnails: Print a table of all thumbnails and exit.
|
||||||
match_filter: A function that gets called with the info_dict of
|
match_filter: A function that gets called with the info_dict of
|
||||||
@@ -353,9 +366,13 @@ class YoutubeDL(object):
|
|||||||
geo_bypass_country
|
geo_bypass_country
|
||||||
|
|
||||||
The following options determine which downloader is picked:
|
The following options determine which downloader is picked:
|
||||||
external_downloader: Executable of the external downloader to call.
|
external_downloader: A dictionary of protocol keys and the executable of the
|
||||||
None or unset for standard (built-in) downloader.
|
external downloader to use for it. The allowed protocols
|
||||||
hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv
|
are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
|
||||||
|
Set the value to 'native' to use the native downloader
|
||||||
|
hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
|
||||||
|
or {'m3u8': 'ffmpeg'} instead.
|
||||||
|
Use the native HLS downloader instead of ffmpeg/avconv
|
||||||
if True, otherwise use ffmpeg/avconv if False, otherwise
|
if True, otherwise use ffmpeg/avconv if False, otherwise
|
||||||
use downloader suggested by extractor if None.
|
use downloader suggested by extractor if None.
|
||||||
|
|
||||||
@@ -378,6 +395,7 @@ class YoutubeDL(object):
|
|||||||
Use 'default' as the name for arguments to passed to all PP
|
Use 'default' as the name for arguments to passed to all PP
|
||||||
|
|
||||||
The following options are used by the extractors:
|
The following options are used by the extractors:
|
||||||
|
extractor_retries: Number of times to retry for known errors
|
||||||
dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
|
dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
|
||||||
hls_split_discontinuity: Split HLS playlists to different formats at
|
hls_split_discontinuity: Split HLS playlists to different formats at
|
||||||
discontinuities such as ad breaks (default: False)
|
discontinuities such as ad breaks (default: False)
|
||||||
@@ -404,8 +422,9 @@ class YoutubeDL(object):
|
|||||||
|
|
||||||
params = None
|
params = None
|
||||||
_ies = []
|
_ies = []
|
||||||
_pps = {'beforedl': [], 'aftermove': [], 'normal': []}
|
_pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
|
||||||
__prepare_filename_warned = False
|
__prepare_filename_warned = False
|
||||||
|
_first_webpage_request = True
|
||||||
_download_retcode = None
|
_download_retcode = None
|
||||||
_num_downloads = None
|
_num_downloads = None
|
||||||
_playlist_level = 0
|
_playlist_level = 0
|
||||||
@@ -418,8 +437,9 @@ class YoutubeDL(object):
|
|||||||
params = {}
|
params = {}
|
||||||
self._ies = []
|
self._ies = []
|
||||||
self._ies_instances = {}
|
self._ies_instances = {}
|
||||||
self._pps = {'beforedl': [], 'aftermove': [], 'normal': []}
|
self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
|
||||||
self.__prepare_filename_warned = False
|
self.__prepare_filename_warned = False
|
||||||
|
self._first_webpage_request = True
|
||||||
self._post_hooks = []
|
self._post_hooks = []
|
||||||
self._progress_hooks = []
|
self._progress_hooks = []
|
||||||
self._download_retcode = 0
|
self._download_retcode = 0
|
||||||
@@ -530,7 +550,7 @@ class YoutubeDL(object):
|
|||||||
when = pp_def['when']
|
when = pp_def['when']
|
||||||
del pp_def['when']
|
del pp_def['when']
|
||||||
else:
|
else:
|
||||||
when = 'normal'
|
when = 'post_process'
|
||||||
pp = pp_class(self, **compat_kwargs(pp_def))
|
pp = pp_class(self, **compat_kwargs(pp_def))
|
||||||
self.add_post_processor(pp, when=when)
|
self.add_post_processor(pp, when=when)
|
||||||
|
|
||||||
@@ -584,7 +604,7 @@ class YoutubeDL(object):
|
|||||||
for ie in gen_extractor_classes():
|
for ie in gen_extractor_classes():
|
||||||
self.add_info_extractor(ie)
|
self.add_info_extractor(ie)
|
||||||
|
|
||||||
def add_post_processor(self, pp, when='normal'):
|
def add_post_processor(self, pp, when='post_process'):
|
||||||
"""Add a PostProcessor object to the end of the chain."""
|
"""Add a PostProcessor object to the end of the chain."""
|
||||||
self._pps[when].append(pp)
|
self._pps[when].append(pp)
|
||||||
pp.set_downloader(self)
|
pp.set_downloader(self)
|
||||||
@@ -764,20 +784,26 @@ class YoutubeDL(object):
|
|||||||
'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
|
'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
|
||||||
return outtmpl_dict
|
return outtmpl_dict
|
||||||
|
|
||||||
def _prepare_filename(self, info_dict, tmpl_type='default'):
|
def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
|
||||||
try:
|
""" Make the template and info_dict suitable for substitution (outtmpl % info_dict)"""
|
||||||
template_dict = dict(info_dict)
|
template_dict = dict(info_dict)
|
||||||
|
|
||||||
|
# duration_string
|
||||||
template_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
|
template_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
|
||||||
formatSeconds(info_dict['duration'], '-')
|
formatSeconds(info_dict['duration'], '-')
|
||||||
if info_dict.get('duration', None) is not None
|
if info_dict.get('duration', None) is not None
|
||||||
else None)
|
else None)
|
||||||
|
|
||||||
|
# epoch
|
||||||
template_dict['epoch'] = int(time.time())
|
template_dict['epoch'] = int(time.time())
|
||||||
|
|
||||||
|
# autonumber
|
||||||
autonumber_size = self.params.get('autonumber_size')
|
autonumber_size = self.params.get('autonumber_size')
|
||||||
if autonumber_size is None:
|
if autonumber_size is None:
|
||||||
autonumber_size = 5
|
autonumber_size = 5
|
||||||
template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
|
template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
|
||||||
|
|
||||||
|
# resolution if not defined
|
||||||
if template_dict.get('resolution') is None:
|
if template_dict.get('resolution') is None:
|
||||||
if template_dict.get('width') and template_dict.get('height'):
|
if template_dict.get('width') and template_dict.get('height'):
|
||||||
template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
|
template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
|
||||||
@@ -786,19 +812,14 @@ class YoutubeDL(object):
|
|||||||
elif template_dict.get('width'):
|
elif template_dict.get('width'):
|
||||||
template_dict['resolution'] = '%dx?' % template_dict['width']
|
template_dict['resolution'] = '%dx?' % template_dict['width']
|
||||||
|
|
||||||
sanitize = lambda k, v: sanitize_filename(
|
if sanitize is None:
|
||||||
compat_str(v),
|
sanitize = lambda k, v: v
|
||||||
restricted=self.params.get('restrictfilenames'),
|
|
||||||
is_id=(k == 'id' or k.endswith('_id')))
|
|
||||||
template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
|
template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
|
||||||
for k, v in template_dict.items()
|
for k, v in template_dict.items()
|
||||||
if v is not None and not isinstance(v, (list, tuple, dict)))
|
if v is not None and not isinstance(v, (list, tuple, dict)))
|
||||||
na = self.params.get('outtmpl_na_placeholder', 'NA')
|
na = self.params.get('outtmpl_na_placeholder', 'NA')
|
||||||
template_dict = collections.defaultdict(lambda: na, template_dict)
|
template_dict = collections.defaultdict(lambda: na, template_dict)
|
||||||
|
|
||||||
outtmpl = self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default'])
|
|
||||||
force_ext = OUTTMPL_TYPES.get(tmpl_type)
|
|
||||||
|
|
||||||
# For fields playlist_index and autonumber convert all occurrences
|
# For fields playlist_index and autonumber convert all occurrences
|
||||||
# of %(field)s to %(field)0Nd for backward compatibility
|
# of %(field)s to %(field)0Nd for backward compatibility
|
||||||
field_size_compat_map = {
|
field_size_compat_map = {
|
||||||
@@ -813,20 +834,6 @@ class YoutubeDL(object):
|
|||||||
r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
|
r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
|
||||||
outtmpl)
|
outtmpl)
|
||||||
|
|
||||||
# As of [1] format syntax is:
|
|
||||||
# %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
|
|
||||||
# 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
|
|
||||||
FORMAT_RE = r'''(?x)
|
|
||||||
(?<!%)
|
|
||||||
%
|
|
||||||
\({0}\) # mapping key
|
|
||||||
(?:[#0\-+ ]+)? # conversion flags (optional)
|
|
||||||
(?:\d+)? # minimum field width (optional)
|
|
||||||
(?:\.\d+)? # precision (optional)
|
|
||||||
[hlL]? # length modifier (optional)
|
|
||||||
(?P<type>[diouxXeEfFgGcrs%]) # conversion type
|
|
||||||
'''
|
|
||||||
|
|
||||||
numeric_fields = list(self._NUMERIC_FIELDS)
|
numeric_fields = list(self._NUMERIC_FIELDS)
|
||||||
|
|
||||||
# Format date
|
# Format date
|
||||||
@@ -854,6 +861,17 @@ class YoutubeDL(object):
|
|||||||
FORMAT_RE.format(re.escape(numeric_field)),
|
FORMAT_RE.format(re.escape(numeric_field)),
|
||||||
r'%({0})s'.format(numeric_field), outtmpl)
|
r'%({0})s'.format(numeric_field), outtmpl)
|
||||||
|
|
||||||
|
return outtmpl, template_dict
|
||||||
|
|
||||||
|
def _prepare_filename(self, info_dict, tmpl_type='default'):
|
||||||
|
try:
|
||||||
|
sanitize = lambda k, v: sanitize_filename(
|
||||||
|
compat_str(v),
|
||||||
|
restricted=self.params.get('restrictfilenames'),
|
||||||
|
is_id=(k == 'id' or k.endswith('_id')))
|
||||||
|
outtmpl = self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default'])
|
||||||
|
outtmpl, template_dict = self.prepare_outtmpl(outtmpl, info_dict, sanitize)
|
||||||
|
|
||||||
# expand_path translates '%%' into '%' and '$$' into '$'
|
# expand_path translates '%%' into '%' and '$$' into '$'
|
||||||
# correspondingly that is not what we want since we need to keep
|
# correspondingly that is not what we want since we need to keep
|
||||||
# '%%' intact for template dict substitution step. Working around
|
# '%%' intact for template dict substitution step. Working around
|
||||||
@@ -867,6 +885,7 @@ class YoutubeDL(object):
|
|||||||
# title "Hello $PATH", we don't want `$PATH` to be expanded.
|
# title "Hello $PATH", we don't want `$PATH` to be expanded.
|
||||||
filename = expand_path(outtmpl).replace(sep, '') % template_dict
|
filename = expand_path(outtmpl).replace(sep, '') % template_dict
|
||||||
|
|
||||||
|
force_ext = OUTTMPL_TYPES.get(tmpl_type)
|
||||||
if force_ext is not None:
|
if force_ext is not None:
|
||||||
filename = replace_extension(filename, force_ext, template_dict.get('ext'))
|
filename = replace_extension(filename, force_ext, template_dict.get('ext'))
|
||||||
|
|
||||||
@@ -1165,57 +1184,24 @@ class YoutubeDL(object):
|
|||||||
else:
|
else:
|
||||||
raise Exception('Invalid result type: %s' % result_type)
|
raise Exception('Invalid result type: %s' % result_type)
|
||||||
|
|
||||||
|
def _ensure_dir_exists(self, path):
|
||||||
|
return make_dir(path, self.report_error)
|
||||||
|
|
||||||
def __process_playlist(self, ie_result, download):
|
def __process_playlist(self, ie_result, download):
|
||||||
# We process each entry in the playlist
|
# We process each entry in the playlist
|
||||||
playlist = ie_result.get('title') or ie_result.get('id')
|
playlist = ie_result.get('title') or ie_result.get('id')
|
||||||
self.to_screen('[download] Downloading playlist: %s' % playlist)
|
self.to_screen('[download] Downloading playlist: %s' % playlist)
|
||||||
|
|
||||||
if self.params.get('allow_playlist_files', True):
|
if 'entries' not in ie_result:
|
||||||
ie_copy = {
|
raise EntryNotInPlaylist()
|
||||||
'playlist': playlist,
|
incomplete_entries = bool(ie_result.get('requested_entries'))
|
||||||
'playlist_id': ie_result.get('id'),
|
if incomplete_entries:
|
||||||
'playlist_title': ie_result.get('title'),
|
def fill_missing_entries(entries, indexes):
|
||||||
'playlist_uploader': ie_result.get('uploader'),
|
ret = [None] * max(*indexes)
|
||||||
'playlist_uploader_id': ie_result.get('uploader_id'),
|
for i, entry in zip(indexes, entries):
|
||||||
'playlist_index': 0
|
ret[i - 1] = entry
|
||||||
}
|
return ret
|
||||||
ie_copy.update(dict(ie_result))
|
ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
|
||||||
|
|
||||||
def ensure_dir_exists(path):
|
|
||||||
return make_dir(path, self.report_error)
|
|
||||||
|
|
||||||
if self.params.get('writeinfojson', False):
|
|
||||||
infofn = self.prepare_filename(ie_copy, 'pl_infojson')
|
|
||||||
if not ensure_dir_exists(encodeFilename(infofn)):
|
|
||||||
return
|
|
||||||
if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
|
|
||||||
self.to_screen('[info] Playlist metadata is already present')
|
|
||||||
else:
|
|
||||||
playlist_info = dict(ie_result)
|
|
||||||
# playlist_info['entries'] = list(playlist_info['entries']) # Entries is a generator which shouldnot be resolved here
|
|
||||||
del playlist_info['entries']
|
|
||||||
self.to_screen('[info] Writing playlist metadata as JSON to: ' + infofn)
|
|
||||||
try:
|
|
||||||
write_json_file(self.filter_requested_info(playlist_info), infofn)
|
|
||||||
except (OSError, IOError):
|
|
||||||
self.report_error('Cannot write playlist metadata to JSON file ' + infofn)
|
|
||||||
|
|
||||||
if self.params.get('writedescription', False):
|
|
||||||
descfn = self.prepare_filename(ie_copy, 'pl_description')
|
|
||||||
if not ensure_dir_exists(encodeFilename(descfn)):
|
|
||||||
return
|
|
||||||
if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
|
|
||||||
self.to_screen('[info] Playlist description is already present')
|
|
||||||
elif ie_result.get('description') is None:
|
|
||||||
self.report_warning('There\'s no playlist description to write.')
|
|
||||||
else:
|
|
||||||
try:
|
|
||||||
self.to_screen('[info] Writing playlist description to: ' + descfn)
|
|
||||||
with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
|
|
||||||
descfile.write(ie_result['description'])
|
|
||||||
except (OSError, IOError):
|
|
||||||
self.report_error('Cannot write playlist description file ' + descfn)
|
|
||||||
return
|
|
||||||
|
|
||||||
playlist_results = []
|
playlist_results = []
|
||||||
|
|
||||||
@@ -1242,25 +1228,20 @@ class YoutubeDL(object):
|
|||||||
|
|
||||||
def make_playlistitems_entries(list_ie_entries):
|
def make_playlistitems_entries(list_ie_entries):
|
||||||
num_entries = len(list_ie_entries)
|
num_entries = len(list_ie_entries)
|
||||||
return [
|
for i in playlistitems:
|
||||||
list_ie_entries[i - 1] for i in playlistitems
|
if -num_entries < i <= num_entries:
|
||||||
if -num_entries <= i - 1 < num_entries]
|
yield list_ie_entries[i - 1]
|
||||||
|
elif incomplete_entries:
|
||||||
def report_download(num_entries):
|
raise EntryNotInPlaylist()
|
||||||
self.to_screen(
|
|
||||||
'[%s] playlist %s: Downloading %d videos' %
|
|
||||||
(ie_result['extractor'], playlist, num_entries))
|
|
||||||
|
|
||||||
if isinstance(ie_entries, list):
|
if isinstance(ie_entries, list):
|
||||||
n_all_entries = len(ie_entries)
|
n_all_entries = len(ie_entries)
|
||||||
if playlistitems:
|
if playlistitems:
|
||||||
entries = make_playlistitems_entries(ie_entries)
|
entries = list(make_playlistitems_entries(ie_entries))
|
||||||
else:
|
else:
|
||||||
entries = ie_entries[playliststart:playlistend]
|
entries = ie_entries[playliststart:playlistend]
|
||||||
n_entries = len(entries)
|
n_entries = len(entries)
|
||||||
self.to_screen(
|
msg = 'Collected %d videos; downloading %d of them' % (n_all_entries, n_entries)
|
||||||
'[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
|
|
||||||
(ie_result['extractor'], playlist, n_all_entries, n_entries))
|
|
||||||
elif isinstance(ie_entries, PagedList):
|
elif isinstance(ie_entries, PagedList):
|
||||||
if playlistitems:
|
if playlistitems:
|
||||||
entries = []
|
entries = []
|
||||||
@@ -1272,25 +1253,73 @@ class YoutubeDL(object):
|
|||||||
entries = ie_entries.getslice(
|
entries = ie_entries.getslice(
|
||||||
playliststart, playlistend)
|
playliststart, playlistend)
|
||||||
n_entries = len(entries)
|
n_entries = len(entries)
|
||||||
report_download(n_entries)
|
msg = 'Downloading %d videos' % n_entries
|
||||||
else: # iterable
|
else: # iterable
|
||||||
if playlistitems:
|
if playlistitems:
|
||||||
entries = make_playlistitems_entries(list(itertools.islice(
|
entries = list(make_playlistitems_entries(list(itertools.islice(
|
||||||
ie_entries, 0, max(playlistitems))))
|
ie_entries, 0, max(playlistitems)))))
|
||||||
else:
|
else:
|
||||||
entries = list(itertools.islice(
|
entries = list(itertools.islice(
|
||||||
ie_entries, playliststart, playlistend))
|
ie_entries, playliststart, playlistend))
|
||||||
n_entries = len(entries)
|
n_entries = len(entries)
|
||||||
report_download(n_entries)
|
msg = 'Downloading %d videos' % n_entries
|
||||||
|
|
||||||
|
if any((entry is None for entry in entries)):
|
||||||
|
raise EntryNotInPlaylist()
|
||||||
|
if not playlistitems and (playliststart or playlistend):
|
||||||
|
playlistitems = list(range(1 + playliststart, 1 + playliststart + len(entries)))
|
||||||
|
ie_result['entries'] = entries
|
||||||
|
ie_result['requested_entries'] = playlistitems
|
||||||
|
|
||||||
|
if self.params.get('allow_playlist_files', True):
|
||||||
|
ie_copy = {
|
||||||
|
'playlist': playlist,
|
||||||
|
'playlist_id': ie_result.get('id'),
|
||||||
|
'playlist_title': ie_result.get('title'),
|
||||||
|
'playlist_uploader': ie_result.get('uploader'),
|
||||||
|
'playlist_uploader_id': ie_result.get('uploader_id'),
|
||||||
|
'playlist_index': 0
|
||||||
|
}
|
||||||
|
ie_copy.update(dict(ie_result))
|
||||||
|
|
||||||
|
if self.params.get('writeinfojson', False):
|
||||||
|
infofn = self.prepare_filename(ie_copy, 'pl_infojson')
|
||||||
|
if not self._ensure_dir_exists(encodeFilename(infofn)):
|
||||||
|
return
|
||||||
|
if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
|
||||||
|
self.to_screen('[info] Playlist metadata is already present')
|
||||||
|
else:
|
||||||
|
self.to_screen('[info] Writing playlist metadata as JSON to: ' + infofn)
|
||||||
|
try:
|
||||||
|
write_json_file(self.filter_requested_info(ie_result, self.params.get('clean_infojson', True)), infofn)
|
||||||
|
except (OSError, IOError):
|
||||||
|
self.report_error('Cannot write playlist metadata to JSON file ' + infofn)
|
||||||
|
|
||||||
|
if self.params.get('writedescription', False):
|
||||||
|
descfn = self.prepare_filename(ie_copy, 'pl_description')
|
||||||
|
if not self._ensure_dir_exists(encodeFilename(descfn)):
|
||||||
|
return
|
||||||
|
if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
|
||||||
|
self.to_screen('[info] Playlist description is already present')
|
||||||
|
elif ie_result.get('description') is None:
|
||||||
|
self.report_warning('There\'s no playlist description to write.')
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
self.to_screen('[info] Writing playlist description to: ' + descfn)
|
||||||
|
with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
|
||||||
|
descfile.write(ie_result['description'])
|
||||||
|
except (OSError, IOError):
|
||||||
|
self.report_error('Cannot write playlist description file ' + descfn)
|
||||||
|
return
|
||||||
|
|
||||||
if self.params.get('playlistreverse', False):
|
if self.params.get('playlistreverse', False):
|
||||||
entries = entries[::-1]
|
entries = entries[::-1]
|
||||||
|
|
||||||
if self.params.get('playlistrandom', False):
|
if self.params.get('playlistrandom', False):
|
||||||
random.shuffle(entries)
|
random.shuffle(entries)
|
||||||
|
|
||||||
x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
|
x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
|
||||||
|
|
||||||
|
self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg))
|
||||||
for i, entry in enumerate(entries, 1):
|
for i, entry in enumerate(entries, 1):
|
||||||
self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
|
self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
|
||||||
# This __x_forwarded_for_ip thing is a bit ugly but requires
|
# This __x_forwarded_for_ip thing is a bit ugly but requires
|
||||||
@@ -1304,7 +1333,7 @@ class YoutubeDL(object):
|
|||||||
'playlist_title': ie_result.get('title'),
|
'playlist_title': ie_result.get('title'),
|
||||||
'playlist_uploader': ie_result.get('uploader'),
|
'playlist_uploader': ie_result.get('uploader'),
|
||||||
'playlist_uploader_id': ie_result.get('uploader_id'),
|
'playlist_uploader_id': ie_result.get('uploader_id'),
|
||||||
'playlist_index': playlistitems[i - 1] if playlistitems else i + playliststart,
|
'playlist_index': playlistitems[i - 1] if playlistitems else i,
|
||||||
'extractor': ie_result['extractor'],
|
'extractor': ie_result['extractor'],
|
||||||
'webpage_url': ie_result['webpage_url'],
|
'webpage_url': ie_result['webpage_url'],
|
||||||
'webpage_url_basename': url_basename(ie_result['webpage_url']),
|
'webpage_url_basename': url_basename(ie_result['webpage_url']),
|
||||||
@@ -1523,76 +1552,6 @@ class YoutubeDL(object):
|
|||||||
selectors.append(current_selector)
|
selectors.append(current_selector)
|
||||||
return selectors
|
return selectors
|
||||||
|
|
||||||
def _build_selector_function(selector):
|
|
||||||
if isinstance(selector, list): # ,
|
|
||||||
fs = [_build_selector_function(s) for s in selector]
|
|
||||||
|
|
||||||
def selector_function(ctx):
|
|
||||||
for f in fs:
|
|
||||||
for format in f(ctx):
|
|
||||||
yield format
|
|
||||||
return selector_function
|
|
||||||
|
|
||||||
elif selector.type == GROUP: # ()
|
|
||||||
selector_function = _build_selector_function(selector.selector)
|
|
||||||
|
|
||||||
elif selector.type == PICKFIRST: # /
|
|
||||||
fs = [_build_selector_function(s) for s in selector.selector]
|
|
||||||
|
|
||||||
def selector_function(ctx):
|
|
||||||
for f in fs:
|
|
||||||
picked_formats = list(f(ctx))
|
|
||||||
if picked_formats:
|
|
||||||
return picked_formats
|
|
||||||
return []
|
|
||||||
|
|
||||||
elif selector.type == SINGLE: # atom
|
|
||||||
format_spec = selector.selector if selector.selector is not None else 'best'
|
|
||||||
|
|
||||||
if format_spec == 'all':
|
|
||||||
def selector_function(ctx):
|
|
||||||
formats = list(ctx['formats'])
|
|
||||||
if formats:
|
|
||||||
for f in formats:
|
|
||||||
yield f
|
|
||||||
|
|
||||||
else:
|
|
||||||
format_fallback = False
|
|
||||||
format_spec_obj = re.match(r'(best|worst|b|w)(video|audio|v|a)?(\*)?$', format_spec)
|
|
||||||
if format_spec_obj is not None:
|
|
||||||
format_idx = 0 if format_spec_obj.group(1)[0] == 'w' else -1
|
|
||||||
format_type = format_spec_obj.group(2)[0] if format_spec_obj.group(2) else False
|
|
||||||
not_format_type = 'v' if format_type == 'a' else 'a'
|
|
||||||
format_modified = format_spec_obj.group(3) is not None
|
|
||||||
|
|
||||||
format_fallback = not format_type and not format_modified # for b, w
|
|
||||||
filter_f = ((lambda f: f.get(format_type + 'codec') != 'none')
|
|
||||||
if format_type and format_modified # bv*, ba*, wv*, wa*
|
|
||||||
else (lambda f: f.get(not_format_type + 'codec') == 'none')
|
|
||||||
if format_type # bv, ba, wv, wa
|
|
||||||
else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
|
|
||||||
if not format_modified # b, w
|
|
||||||
else None) # b*, w*
|
|
||||||
else:
|
|
||||||
format_idx = -1
|
|
||||||
filter_f = ((lambda f: f.get('ext') == format_spec)
|
|
||||||
if format_spec in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav'] # extension
|
|
||||||
else (lambda f: f.get('format_id') == format_spec)) # id
|
|
||||||
|
|
||||||
def selector_function(ctx):
|
|
||||||
formats = list(ctx['formats'])
|
|
||||||
if not formats:
|
|
||||||
return
|
|
||||||
matches = list(filter(filter_f, formats)) if filter_f is not None else formats
|
|
||||||
if matches:
|
|
||||||
yield matches[format_idx]
|
|
||||||
elif format_fallback == 'force' or (format_fallback and ctx['incomplete_formats']):
|
|
||||||
# for extractors with incomplete formats (audio only (soundcloud)
|
|
||||||
# or video only (imgur)) best/worst will fallback to
|
|
||||||
# best/worst {video,audio}-only format
|
|
||||||
yield formats[format_idx]
|
|
||||||
|
|
||||||
elif selector.type == MERGE: # +
|
|
||||||
def _merge(formats_pair):
|
def _merge(formats_pair):
|
||||||
format_1, format_2 = formats_pair
|
format_1, format_2 = formats_pair
|
||||||
|
|
||||||
@@ -1638,7 +1597,7 @@ class YoutubeDL(object):
|
|||||||
new_dict.update({
|
new_dict.update({
|
||||||
'width': the_only_video.get('width'),
|
'width': the_only_video.get('width'),
|
||||||
'height': the_only_video.get('height'),
|
'height': the_only_video.get('height'),
|
||||||
'resolution': the_only_video.get('resolution'),
|
'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
|
||||||
'fps': the_only_video.get('fps'),
|
'fps': the_only_video.get('fps'),
|
||||||
'vcodec': the_only_video.get('vcodec'),
|
'vcodec': the_only_video.get('vcodec'),
|
||||||
'vbr': the_only_video.get('vbr'),
|
'vbr': the_only_video.get('vbr'),
|
||||||
@@ -1653,6 +1612,93 @@ class YoutubeDL(object):
|
|||||||
|
|
||||||
return new_dict
|
return new_dict
|
||||||
|
|
||||||
|
def _build_selector_function(selector):
|
||||||
|
if isinstance(selector, list): # ,
|
||||||
|
fs = [_build_selector_function(s) for s in selector]
|
||||||
|
|
||||||
|
def selector_function(ctx):
|
||||||
|
for f in fs:
|
||||||
|
for format in f(ctx):
|
||||||
|
yield format
|
||||||
|
return selector_function
|
||||||
|
|
||||||
|
elif selector.type == GROUP: # ()
|
||||||
|
selector_function = _build_selector_function(selector.selector)
|
||||||
|
|
||||||
|
elif selector.type == PICKFIRST: # /
|
||||||
|
fs = [_build_selector_function(s) for s in selector.selector]
|
||||||
|
|
||||||
|
def selector_function(ctx):
|
||||||
|
for f in fs:
|
||||||
|
picked_formats = list(f(ctx))
|
||||||
|
if picked_formats:
|
||||||
|
return picked_formats
|
||||||
|
return []
|
||||||
|
|
||||||
|
elif selector.type == SINGLE: # atom
|
||||||
|
format_spec = (selector.selector or 'best').lower()
|
||||||
|
|
||||||
|
# TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
|
||||||
|
if format_spec == 'all':
|
||||||
|
def selector_function(ctx):
|
||||||
|
formats = list(ctx['formats'])
|
||||||
|
if formats:
|
||||||
|
for f in formats:
|
||||||
|
yield f
|
||||||
|
elif format_spec == 'mergeall':
|
||||||
|
def selector_function(ctx):
|
||||||
|
formats = list(ctx['formats'])
|
||||||
|
if not formats:
|
||||||
|
return
|
||||||
|
merged_format = formats[0]
|
||||||
|
for f in formats[1:]:
|
||||||
|
merged_format = _merge((merged_format, f))
|
||||||
|
yield merged_format
|
||||||
|
|
||||||
|
else:
|
||||||
|
format_fallback = False
|
||||||
|
mobj = re.match(
|
||||||
|
r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
|
||||||
|
format_spec)
|
||||||
|
if mobj is not None:
|
||||||
|
format_idx = int_or_none(mobj.group('n'), default=1)
|
||||||
|
format_idx = format_idx - 1 if mobj.group('bw')[0] == 'w' else -format_idx
|
||||||
|
format_type = (mobj.group('type') or [None])[0]
|
||||||
|
not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
|
||||||
|
format_modified = mobj.group('mod') is not None
|
||||||
|
|
||||||
|
format_fallback = not format_type and not format_modified # for b, w
|
||||||
|
filter_f = (
|
||||||
|
(lambda f: f.get('%scodec' % format_type) != 'none')
|
||||||
|
if format_type and format_modified # bv*, ba*, wv*, wa*
|
||||||
|
else (lambda f: f.get('%scodec' % not_format_type) == 'none')
|
||||||
|
if format_type # bv, ba, wv, wa
|
||||||
|
else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
|
||||||
|
if not format_modified # b, w
|
||||||
|
else None) # b*, w*
|
||||||
|
else:
|
||||||
|
format_idx = -1
|
||||||
|
filter_f = ((lambda f: f.get('ext') == format_spec)
|
||||||
|
if format_spec in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav'] # extension
|
||||||
|
else (lambda f: f.get('format_id') == format_spec)) # id
|
||||||
|
|
||||||
|
def selector_function(ctx):
|
||||||
|
formats = list(ctx['formats'])
|
||||||
|
if not formats:
|
||||||
|
return
|
||||||
|
matches = list(filter(filter_f, formats)) if filter_f is not None else formats
|
||||||
|
n = len(matches)
|
||||||
|
if -n <= format_idx < n:
|
||||||
|
yield matches[format_idx]
|
||||||
|
elif format_fallback and ctx['incomplete_formats']:
|
||||||
|
# for extractors with incomplete formats (audio only (soundcloud)
|
||||||
|
# or video only (imgur)) best/worst will fallback to
|
||||||
|
# best/worst {video,audio}-only format
|
||||||
|
n = len(formats)
|
||||||
|
if -n <= format_idx < n:
|
||||||
|
yield formats[format_idx]
|
||||||
|
|
||||||
|
elif selector.type == MERGE: # +
|
||||||
selector_1, selector_2 = map(_build_selector_function, selector.selector)
|
selector_1, selector_2 = map(_build_selector_function, selector.selector)
|
||||||
|
|
||||||
def selector_function(ctx):
|
def selector_function(ctx):
|
||||||
@@ -1788,12 +1834,16 @@ class YoutubeDL(object):
|
|||||||
if 'display_id' not in info_dict and 'id' in info_dict:
|
if 'display_id' not in info_dict and 'id' in info_dict:
|
||||||
info_dict['display_id'] = info_dict['id']
|
info_dict['display_id'] = info_dict['id']
|
||||||
|
|
||||||
if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
|
for ts_key, date_key in (
|
||||||
|
('timestamp', 'upload_date'),
|
||||||
|
('release_timestamp', 'release_date'),
|
||||||
|
):
|
||||||
|
if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
|
||||||
# Working around out-of-range timestamp values (e.g. negative ones on Windows,
|
# Working around out-of-range timestamp values (e.g. negative ones on Windows,
|
||||||
# see http://bugs.python.org/issue1646728)
|
# see http://bugs.python.org/issue1646728)
|
||||||
try:
|
try:
|
||||||
upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
|
upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
|
||||||
info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
|
info_dict[date_key] = upload_date.strftime('%Y%m%d')
|
||||||
except (ValueError, OverflowError, OSError):
|
except (ValueError, OverflowError, OSError):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@@ -2036,7 +2086,8 @@ class YoutubeDL(object):
|
|||||||
self.to_stdout(formatSeconds(info_dict['duration']))
|
self.to_stdout(formatSeconds(info_dict['duration']))
|
||||||
print_mandatory('format')
|
print_mandatory('format')
|
||||||
if self.params.get('forcejson', False):
|
if self.params.get('forcejson', False):
|
||||||
self.to_stdout(json.dumps(info_dict))
|
self.post_extract(info_dict)
|
||||||
|
self.to_stdout(json.dumps(info_dict, default=repr))
|
||||||
|
|
||||||
def process_info(self, info_dict):
|
def process_info(self, info_dict):
|
||||||
"""Process a single resolved IE result."""
|
"""Process a single resolved IE result."""
|
||||||
@@ -2059,14 +2110,15 @@ class YoutubeDL(object):
|
|||||||
if self._match_entry(info_dict, incomplete=False) is not None:
|
if self._match_entry(info_dict, incomplete=False) is not None:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
self.post_extract(info_dict)
|
||||||
self._num_downloads += 1
|
self._num_downloads += 1
|
||||||
|
|
||||||
info_dict = self.pre_process(info_dict)
|
info_dict, _ = self.pre_process(info_dict)
|
||||||
|
|
||||||
|
# info_dict['_filename'] needs to be set for backward compatibility
|
||||||
info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
|
info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
|
||||||
temp_filename = self.prepare_filename(info_dict, 'temp')
|
temp_filename = self.prepare_filename(info_dict, 'temp')
|
||||||
files_to_move = {}
|
files_to_move = {}
|
||||||
skip_dl = self.params.get('skip_download', False)
|
|
||||||
|
|
||||||
# Forced printings
|
# Forced printings
|
||||||
self.__forced_printings(info_dict, full_filename, incomplete=False)
|
self.__forced_printings(info_dict, full_filename, incomplete=False)
|
||||||
@@ -2081,17 +2133,14 @@ class YoutubeDL(object):
|
|||||||
if full_filename is None:
|
if full_filename is None:
|
||||||
return
|
return
|
||||||
|
|
||||||
def ensure_dir_exists(path):
|
if not self._ensure_dir_exists(encodeFilename(full_filename)):
|
||||||
return make_dir(path, self.report_error)
|
|
||||||
|
|
||||||
if not ensure_dir_exists(encodeFilename(full_filename)):
|
|
||||||
return
|
return
|
||||||
if not ensure_dir_exists(encodeFilename(temp_filename)):
|
if not self._ensure_dir_exists(encodeFilename(temp_filename)):
|
||||||
return
|
return
|
||||||
|
|
||||||
if self.params.get('writedescription', False):
|
if self.params.get('writedescription', False):
|
||||||
descfn = self.prepare_filename(info_dict, 'description')
|
descfn = self.prepare_filename(info_dict, 'description')
|
||||||
if not ensure_dir_exists(encodeFilename(descfn)):
|
if not self._ensure_dir_exists(encodeFilename(descfn)):
|
||||||
return
|
return
|
||||||
if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
|
if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
|
||||||
self.to_screen('[info] Video description is already present')
|
self.to_screen('[info] Video description is already present')
|
||||||
@@ -2108,7 +2157,7 @@ class YoutubeDL(object):
|
|||||||
|
|
||||||
if self.params.get('writeannotations', False):
|
if self.params.get('writeannotations', False):
|
||||||
annofn = self.prepare_filename(info_dict, 'annotation')
|
annofn = self.prepare_filename(info_dict, 'annotation')
|
||||||
if not ensure_dir_exists(encodeFilename(annofn)):
|
if not self._ensure_dir_exists(encodeFilename(annofn)):
|
||||||
return
|
return
|
||||||
if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
|
if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
|
||||||
self.to_screen('[info] Video annotations are already present')
|
self.to_screen('[info] Video annotations are already present')
|
||||||
@@ -2131,7 +2180,10 @@ class YoutubeDL(object):
|
|||||||
fd.add_progress_hook(ph)
|
fd.add_progress_hook(ph)
|
||||||
if self.params.get('verbose'):
|
if self.params.get('verbose'):
|
||||||
self.to_screen('[debug] Invoking downloader on %r' % info.get('url'))
|
self.to_screen('[debug] Invoking downloader on %r' % info.get('url'))
|
||||||
return fd.download(name, info, subtitle)
|
new_info = dict(info)
|
||||||
|
if new_info.get('http_headers') is None:
|
||||||
|
new_info['http_headers'] = self._calc_headers(new_info)
|
||||||
|
return fd.download(name, new_info, subtitle)
|
||||||
|
|
||||||
subtitles_are_requested = any([self.params.get('writesubtitles', False),
|
subtitles_are_requested = any([self.params.get('writesubtitles', False),
|
||||||
self.params.get('writeautomaticsub')])
|
self.params.get('writeautomaticsub')])
|
||||||
@@ -2143,13 +2195,12 @@ class YoutubeDL(object):
|
|||||||
# ie = self.get_info_extractor(info_dict['extractor_key'])
|
# ie = self.get_info_extractor(info_dict['extractor_key'])
|
||||||
for sub_lang, sub_info in subtitles.items():
|
for sub_lang, sub_info in subtitles.items():
|
||||||
sub_format = sub_info['ext']
|
sub_format = sub_info['ext']
|
||||||
sub_fn = self.prepare_filename(info_dict, 'subtitle')
|
sub_filename = subtitles_filename(temp_filename, sub_lang, sub_format, info_dict.get('ext'))
|
||||||
sub_filename = subtitles_filename(
|
sub_filename_final = subtitles_filename(
|
||||||
temp_filename if not skip_dl else sub_fn,
|
self.prepare_filename(info_dict, 'subtitle'), sub_lang, sub_format, info_dict.get('ext'))
|
||||||
sub_lang, sub_format, info_dict.get('ext'))
|
|
||||||
sub_filename_final = subtitles_filename(sub_fn, sub_lang, sub_format, info_dict.get('ext'))
|
|
||||||
if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(sub_filename)):
|
if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(sub_filename)):
|
||||||
self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
|
self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
|
||||||
|
sub_info['filepath'] = sub_filename
|
||||||
files_to_move[sub_filename] = sub_filename_final
|
files_to_move[sub_filename] = sub_filename_final
|
||||||
else:
|
else:
|
||||||
self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
|
self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
|
||||||
@@ -2159,71 +2210,41 @@ class YoutubeDL(object):
|
|||||||
# See https://github.com/ytdl-org/youtube-dl/issues/10268
|
# See https://github.com/ytdl-org/youtube-dl/issues/10268
|
||||||
with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
|
with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
|
||||||
subfile.write(sub_info['data'])
|
subfile.write(sub_info['data'])
|
||||||
|
sub_info['filepath'] = sub_filename
|
||||||
files_to_move[sub_filename] = sub_filename_final
|
files_to_move[sub_filename] = sub_filename_final
|
||||||
except (OSError, IOError):
|
except (OSError, IOError):
|
||||||
self.report_error('Cannot write subtitles file ' + sub_filename)
|
self.report_error('Cannot write subtitles file ' + sub_filename)
|
||||||
return
|
return
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
dl(sub_filename, sub_info, subtitle=True)
|
dl(sub_filename, sub_info.copy(), subtitle=True)
|
||||||
'''
|
sub_info['filepath'] = sub_filename
|
||||||
if self.params.get('sleep_interval_subtitles', False):
|
|
||||||
dl(sub_filename, sub_info)
|
|
||||||
else:
|
|
||||||
sub_data = ie._request_webpage(
|
|
||||||
sub_info['url'], info_dict['id'], note=False).read()
|
|
||||||
with io.open(encodeFilename(sub_filename), 'wb') as subfile:
|
|
||||||
subfile.write(sub_data)
|
|
||||||
'''
|
|
||||||
files_to_move[sub_filename] = sub_filename_final
|
files_to_move[sub_filename] = sub_filename_final
|
||||||
except (ExtractorError, IOError, OSError, ValueError, compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
except (ExtractorError, IOError, OSError, ValueError, compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||||
self.report_warning('Unable to download subtitle for "%s": %s' %
|
self.report_warning('Unable to download subtitle for "%s": %s' %
|
||||||
(sub_lang, error_to_compat_str(err)))
|
(sub_lang, error_to_compat_str(err)))
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if skip_dl:
|
|
||||||
if self.params.get('convertsubtitles', False):
|
|
||||||
# subconv = FFmpegSubtitlesConvertorPP(self, format=self.params.get('convertsubtitles'))
|
|
||||||
filename_real_ext = os.path.splitext(full_filename)[1][1:]
|
|
||||||
filename_wo_ext = (
|
|
||||||
os.path.splitext(full_filename)[0]
|
|
||||||
if filename_real_ext == info_dict['ext']
|
|
||||||
else full_filename)
|
|
||||||
afilename = '%s.%s' % (filename_wo_ext, self.params.get('convertsubtitles'))
|
|
||||||
# if subconv.available:
|
|
||||||
# info_dict['__postprocessors'].append(subconv)
|
|
||||||
if os.path.exists(encodeFilename(afilename)):
|
|
||||||
self.to_screen(
|
|
||||||
'[download] %s has already been downloaded and '
|
|
||||||
'converted' % afilename)
|
|
||||||
else:
|
|
||||||
try:
|
|
||||||
self.post_process(full_filename, info_dict, files_to_move)
|
|
||||||
except PostProcessingError as err:
|
|
||||||
self.report_error('Postprocessing: %s' % str(err))
|
|
||||||
return
|
|
||||||
|
|
||||||
if self.params.get('writeinfojson', False):
|
if self.params.get('writeinfojson', False):
|
||||||
infofn = self.prepare_filename(info_dict, 'infojson')
|
infofn = self.prepare_filename(info_dict, 'infojson')
|
||||||
if not ensure_dir_exists(encodeFilename(infofn)):
|
if not self._ensure_dir_exists(encodeFilename(infofn)):
|
||||||
return
|
return
|
||||||
if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
|
if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
|
||||||
self.to_screen('[info] Video metadata is already present')
|
self.to_screen('[info] Video metadata is already present')
|
||||||
else:
|
else:
|
||||||
self.to_screen('[info] Writing video metadata as JSON to: ' + infofn)
|
self.to_screen('[info] Writing video metadata as JSON to: ' + infofn)
|
||||||
try:
|
try:
|
||||||
write_json_file(self.filter_requested_info(info_dict), infofn)
|
write_json_file(self.filter_requested_info(info_dict, self.params.get('clean_infojson', True)), infofn)
|
||||||
except (OSError, IOError):
|
except (OSError, IOError):
|
||||||
self.report_error('Cannot write video metadata to JSON file ' + infofn)
|
self.report_error('Cannot write video metadata to JSON file ' + infofn)
|
||||||
return
|
return
|
||||||
info_dict['__infojson_filename'] = infofn
|
info_dict['__infojson_filename'] = infofn
|
||||||
|
|
||||||
thumbfn = self.prepare_filename(info_dict, 'thumbnail')
|
for thumb_ext in self._write_thumbnails(info_dict, temp_filename):
|
||||||
thumb_fn_temp = temp_filename if not skip_dl else thumbfn
|
thumb_filename_temp = replace_extension(temp_filename, thumb_ext, info_dict.get('ext'))
|
||||||
for thumb_ext in self._write_thumbnails(info_dict, thumb_fn_temp):
|
thumb_filename = replace_extension(
|
||||||
thumb_filename_temp = replace_extension(thumb_fn_temp, thumb_ext, info_dict.get('ext'))
|
self.prepare_filename(info_dict, 'thumbnail'), thumb_ext, info_dict.get('ext'))
|
||||||
thumb_filename = replace_extension(thumbfn, thumb_ext, info_dict.get('ext'))
|
files_to_move[thumb_filename_temp] = thumb_filename
|
||||||
files_to_move[thumb_filename_temp] = info_dict['__thumbnail_filename'] = thumb_filename
|
|
||||||
|
|
||||||
# Write internet shortcut files
|
# Write internet shortcut files
|
||||||
url_link = webloc_link = desktop_link = False
|
url_link = webloc_link = desktop_link = False
|
||||||
@@ -2274,9 +2295,20 @@ class YoutubeDL(object):
|
|||||||
if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
|
if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
|
||||||
return
|
return
|
||||||
|
|
||||||
# Download
|
try:
|
||||||
|
info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
|
||||||
|
except PostProcessingError as err:
|
||||||
|
self.report_error('Preprocessing: %s' % str(err))
|
||||||
|
return
|
||||||
|
|
||||||
must_record_download_archive = False
|
must_record_download_archive = False
|
||||||
if not skip_dl:
|
if self.params.get('skip_download', False):
|
||||||
|
info_dict['filepath'] = temp_filename
|
||||||
|
info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
|
||||||
|
info_dict['__files_to_move'] = files_to_move
|
||||||
|
info_dict = self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)
|
||||||
|
else:
|
||||||
|
# Download
|
||||||
try:
|
try:
|
||||||
|
|
||||||
def existing_file(*filepaths):
|
def existing_file(*filepaths):
|
||||||
@@ -2336,10 +2368,17 @@ class YoutubeDL(object):
|
|||||||
|
|
||||||
requested_formats = info_dict['requested_formats']
|
requested_formats = info_dict['requested_formats']
|
||||||
old_ext = info_dict['ext']
|
old_ext = info_dict['ext']
|
||||||
if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
|
if self.params.get('merge_output_format') is None:
|
||||||
|
if not compatible_formats(requested_formats):
|
||||||
info_dict['ext'] = 'mkv'
|
info_dict['ext'] = 'mkv'
|
||||||
self.report_warning(
|
self.report_warning(
|
||||||
'Requested formats are incompatible for merge and will be merged into mkv.')
|
'Requested formats are incompatible for merge and will be merged into mkv.')
|
||||||
|
if (info_dict['ext'] == 'webm'
|
||||||
|
and self.params.get('writethumbnail', False)
|
||||||
|
and info_dict.get('thumbnails')):
|
||||||
|
info_dict['ext'] = 'mkv'
|
||||||
|
self.report_warning(
|
||||||
|
'webm doesn\'t support embedding a thumbnail, mkv will be used.')
|
||||||
|
|
||||||
def correct_ext(filename):
|
def correct_ext(filename):
|
||||||
filename_real_ext = os.path.splitext(filename)[1][1:]
|
filename_real_ext = os.path.splitext(filename)[1][1:]
|
||||||
@@ -2361,7 +2400,7 @@ class YoutubeDL(object):
|
|||||||
fname = prepend_extension(
|
fname = prepend_extension(
|
||||||
self.prepare_filename(new_info, 'temp'),
|
self.prepare_filename(new_info, 'temp'),
|
||||||
'f%s' % f['format_id'], new_info['ext'])
|
'f%s' % f['format_id'], new_info['ext'])
|
||||||
if not ensure_dir_exists(fname):
|
if not self._ensure_dir_exists(fname):
|
||||||
return
|
return
|
||||||
downloaded.append(fname)
|
downloaded.append(fname)
|
||||||
partial_success, real_download = dl(fname, new_info)
|
partial_success, real_download = dl(fname, new_info)
|
||||||
@@ -2438,9 +2477,8 @@ class YoutubeDL(object):
|
|||||||
else:
|
else:
|
||||||
assert fixup_policy in ('ignore', 'never')
|
assert fixup_policy in ('ignore', 'never')
|
||||||
|
|
||||||
if (info_dict.get('protocol') == 'm3u8_native'
|
if ('protocol' in info_dict
|
||||||
or info_dict.get('protocol') == 'm3u8'
|
and get_suitable_downloader(info_dict, self.params).__name__ == 'HlsFD'):
|
||||||
and self.params.get('hls_prefer_native')):
|
|
||||||
if fixup_policy == 'warn':
|
if fixup_policy == 'warn':
|
||||||
self.report_warning('%s: malformed AAC bitstream detected.' % (
|
self.report_warning('%s: malformed AAC bitstream detected.' % (
|
||||||
info_dict['id']))
|
info_dict['id']))
|
||||||
@@ -2456,13 +2494,13 @@ class YoutubeDL(object):
|
|||||||
assert fixup_policy in ('ignore', 'never')
|
assert fixup_policy in ('ignore', 'never')
|
||||||
|
|
||||||
try:
|
try:
|
||||||
self.post_process(dl_filename, info_dict, files_to_move)
|
info_dict = self.post_process(dl_filename, info_dict, files_to_move)
|
||||||
except PostProcessingError as err:
|
except PostProcessingError as err:
|
||||||
self.report_error('Postprocessing: %s' % str(err))
|
self.report_error('Postprocessing: %s' % str(err))
|
||||||
return
|
return
|
||||||
try:
|
try:
|
||||||
for ph in self._post_hooks:
|
for ph in self._post_hooks:
|
||||||
ph(full_filename)
|
ph(info_dict['filepath'])
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
self.report_error('post hooks: %s' % str(err))
|
self.report_error('post hooks: %s' % str(err))
|
||||||
return
|
return
|
||||||
@@ -2501,7 +2539,8 @@ class YoutubeDL(object):
|
|||||||
raise
|
raise
|
||||||
else:
|
else:
|
||||||
if self.params.get('dump_single_json', False):
|
if self.params.get('dump_single_json', False):
|
||||||
self.to_stdout(json.dumps(res))
|
self.post_extract(res)
|
||||||
|
self.to_stdout(json.dumps(res, default=repr))
|
||||||
|
|
||||||
return self._download_retcode
|
return self._download_retcode
|
||||||
|
|
||||||
@@ -2510,10 +2549,10 @@ class YoutubeDL(object):
|
|||||||
[info_filename], mode='r',
|
[info_filename], mode='r',
|
||||||
openhook=fileinput.hook_encoded('utf-8'))) as f:
|
openhook=fileinput.hook_encoded('utf-8'))) as f:
|
||||||
# FileInput doesn't have a read method, we can't call json.load
|
# FileInput doesn't have a read method, we can't call json.load
|
||||||
info = self.filter_requested_info(json.loads('\n'.join(f)))
|
info = self.filter_requested_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
|
||||||
try:
|
try:
|
||||||
self.process_ie_result(info, download=True)
|
self.process_ie_result(info, download=True)
|
||||||
except DownloadError:
|
except (DownloadError, EntryNotInPlaylist):
|
||||||
webpage_url = info.get('webpage_url')
|
webpage_url = info.get('webpage_url')
|
||||||
if webpage_url is not None:
|
if webpage_url is not None:
|
||||||
self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
|
self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
|
||||||
@@ -2523,21 +2562,32 @@ class YoutubeDL(object):
|
|||||||
return self._download_retcode
|
return self._download_retcode
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def filter_requested_info(info_dict):
|
def filter_requested_info(info_dict, actually_filter=True):
|
||||||
fields_to_remove = ('requested_formats', 'requested_subtitles')
|
if not actually_filter:
|
||||||
return dict(
|
info_dict['epoch'] = int(time.time())
|
||||||
(k, v) for k, v in info_dict.items()
|
return info_dict
|
||||||
if (k[0] != '_' or k == '_type') and k not in fields_to_remove)
|
exceptions = {
|
||||||
|
'remove': ['requested_formats', 'requested_subtitles', 'requested_entries', 'filepath', 'entries'],
|
||||||
|
'keep': ['_type'],
|
||||||
|
}
|
||||||
|
keep_key = lambda k: k in exceptions['keep'] or not (k.startswith('_') or k in exceptions['remove'])
|
||||||
|
filter_fn = lambda obj: (
|
||||||
|
list(map(filter_fn, obj)) if isinstance(obj, (list, tuple))
|
||||||
|
else obj if not isinstance(obj, dict)
|
||||||
|
else dict((k, filter_fn(v)) for k, v in obj.items() if keep_key(k)))
|
||||||
|
return filter_fn(info_dict)
|
||||||
|
|
||||||
def run_pp(self, pp, infodict, files_to_move={}):
|
def run_pp(self, pp, infodict):
|
||||||
files_to_delete = []
|
files_to_delete = []
|
||||||
|
if '__files_to_move' not in infodict:
|
||||||
|
infodict['__files_to_move'] = {}
|
||||||
files_to_delete, infodict = pp.run(infodict)
|
files_to_delete, infodict = pp.run(infodict)
|
||||||
if not files_to_delete:
|
if not files_to_delete:
|
||||||
return files_to_move, infodict
|
return infodict
|
||||||
|
|
||||||
if self.params.get('keepvideo', False):
|
if self.params.get('keepvideo', False):
|
||||||
for f in files_to_delete:
|
for f in files_to_delete:
|
||||||
files_to_move.setdefault(f, '')
|
infodict['__files_to_move'].setdefault(f, '')
|
||||||
else:
|
else:
|
||||||
for old_filename in set(files_to_delete):
|
for old_filename in set(files_to_delete):
|
||||||
self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
|
self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
|
||||||
@@ -2545,27 +2595,48 @@ class YoutubeDL(object):
|
|||||||
os.remove(encodeFilename(old_filename))
|
os.remove(encodeFilename(old_filename))
|
||||||
except (IOError, OSError):
|
except (IOError, OSError):
|
||||||
self.report_warning('Unable to remove downloaded original file')
|
self.report_warning('Unable to remove downloaded original file')
|
||||||
if old_filename in files_to_move:
|
if old_filename in infodict['__files_to_move']:
|
||||||
del files_to_move[old_filename]
|
del infodict['__files_to_move'][old_filename]
|
||||||
return files_to_move, infodict
|
return infodict
|
||||||
|
|
||||||
def pre_process(self, ie_info):
|
@staticmethod
|
||||||
|
def post_extract(info_dict):
|
||||||
|
def actual_post_extract(info_dict):
|
||||||
|
if info_dict.get('_type') in ('playlist', 'multi_video'):
|
||||||
|
for video_dict in info_dict.get('entries', {}):
|
||||||
|
actual_post_extract(video_dict or {})
|
||||||
|
return
|
||||||
|
|
||||||
|
if '__post_extractor' not in info_dict:
|
||||||
|
return
|
||||||
|
post_extractor = info_dict['__post_extractor']
|
||||||
|
if post_extractor:
|
||||||
|
info_dict.update(post_extractor().items())
|
||||||
|
del info_dict['__post_extractor']
|
||||||
|
return
|
||||||
|
|
||||||
|
actual_post_extract(info_dict or {})
|
||||||
|
|
||||||
|
def pre_process(self, ie_info, key='pre_process', files_to_move=None):
|
||||||
info = dict(ie_info)
|
info = dict(ie_info)
|
||||||
for pp in self._pps['beforedl']:
|
info['__files_to_move'] = files_to_move or {}
|
||||||
info = self.run_pp(pp, info)[1]
|
for pp in self._pps[key]:
|
||||||
return info
|
info = self.run_pp(pp, info)
|
||||||
|
return info, info.pop('__files_to_move', None)
|
||||||
|
|
||||||
def post_process(self, filename, ie_info, files_to_move={}):
|
def post_process(self, filename, ie_info, files_to_move=None):
|
||||||
"""Run all the postprocessors on the given file."""
|
"""Run all the postprocessors on the given file."""
|
||||||
info = dict(ie_info)
|
info = dict(ie_info)
|
||||||
info['filepath'] = filename
|
info['filepath'] = filename
|
||||||
info['__files_to_move'] = {}
|
info['__files_to_move'] = files_to_move or {}
|
||||||
|
|
||||||
for pp in ie_info.get('__postprocessors', []) + self._pps['normal']:
|
for pp in ie_info.get('__postprocessors', []) + self._pps['post_process']:
|
||||||
files_to_move, info = self.run_pp(pp, info, files_to_move)
|
info = self.run_pp(pp, info)
|
||||||
info = self.run_pp(MoveFilesAfterDownloadPP(self, files_to_move), info)[1]
|
info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
|
||||||
for pp in self._pps['aftermove']:
|
del info['__files_to_move']
|
||||||
info = self.run_pp(pp, info, {})[1]
|
for pp in self._pps['after_move']:
|
||||||
|
info = self.run_pp(pp, info)
|
||||||
|
return info
|
||||||
|
|
||||||
def _make_archive_id(self, info_dict):
|
def _make_archive_id(self, info_dict):
|
||||||
video_id = info_dict.get('id')
|
video_id = info_dict.get('id')
|
||||||
@@ -2614,12 +2685,11 @@ class YoutubeDL(object):
|
|||||||
return 'audio only'
|
return 'audio only'
|
||||||
if format.get('resolution') is not None:
|
if format.get('resolution') is not None:
|
||||||
return format['resolution']
|
return format['resolution']
|
||||||
if format.get('height') is not None:
|
if format.get('width') and format.get('height'):
|
||||||
if format.get('width') is not None:
|
res = '%dx%d' % (format['width'], format['height'])
|
||||||
res = '%sx%s' % (format['width'], format['height'])
|
elif format.get('height'):
|
||||||
else:
|
|
||||||
res = '%sp' % format['height']
|
res = '%sp' % format['height']
|
||||||
elif format.get('width') is not None:
|
elif format.get('width'):
|
||||||
res = '%dx?' % format['width']
|
res = '%dx?' % format['width']
|
||||||
else:
|
else:
|
||||||
res = default
|
res = default
|
||||||
@@ -2705,7 +2775,7 @@ class YoutubeDL(object):
|
|||||||
'|',
|
'|',
|
||||||
format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
|
format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
|
||||||
format_field(f, 'tbr', '%4dk'),
|
format_field(f, 'tbr', '%4dk'),
|
||||||
f.get('protocol').replace('http_dash_segments', 'dash').replace("native", "n").replace('niconico_', ''),
|
shorten_protocol_name(f.get('protocol', '').replace("native", "n")),
|
||||||
'|',
|
'|',
|
||||||
format_field(f, 'vcodec', default='unknown').replace('none', ''),
|
format_field(f, 'vcodec', default='unknown').replace('none', ''),
|
||||||
format_field(f, 'vbr', '%4dk'),
|
format_field(f, 'vbr', '%4dk'),
|
||||||
@@ -2933,7 +3003,7 @@ class YoutubeDL(object):
|
|||||||
thumb_ext = determine_ext(t['url'], 'jpg')
|
thumb_ext = determine_ext(t['url'], 'jpg')
|
||||||
suffix = '%s.' % t['id'] if multiple else ''
|
suffix = '%s.' % t['id'] if multiple else ''
|
||||||
thumb_display_id = '%s ' % t['id'] if multiple else ''
|
thumb_display_id = '%s ' % t['id'] if multiple else ''
|
||||||
t['filename'] = thumb_filename = replace_extension(filename, suffix + thumb_ext, info_dict.get('ext'))
|
t['filepath'] = thumb_filename = replace_extension(filename, suffix + thumb_ext, info_dict.get('ext'))
|
||||||
|
|
||||||
if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(thumb_filename)):
|
if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(thumb_filename)):
|
||||||
ret.append(suffix + thumb_ext)
|
ret.append(suffix + thumb_ext)
|
||||||
|
|||||||
@@ -169,25 +169,35 @@ def _real_main(argv=None):
|
|||||||
parser.error('max sleep interval must be greater than or equal to min sleep interval')
|
parser.error('max sleep interval must be greater than or equal to min sleep interval')
|
||||||
else:
|
else:
|
||||||
opts.max_sleep_interval = opts.sleep_interval
|
opts.max_sleep_interval = opts.sleep_interval
|
||||||
|
if opts.sleep_interval_subtitles is not None:
|
||||||
|
if opts.sleep_interval_subtitles < 0:
|
||||||
|
parser.error('subtitles sleep interval must be positive or 0')
|
||||||
|
if opts.sleep_interval_requests is not None:
|
||||||
|
if opts.sleep_interval_requests < 0:
|
||||||
|
parser.error('requests sleep interval must be positive or 0')
|
||||||
if opts.ap_mso and opts.ap_mso not in MSO_INFO:
|
if opts.ap_mso and opts.ap_mso not in MSO_INFO:
|
||||||
parser.error('Unsupported TV Provider, use --ap-list-mso to get a list of supported TV Providers')
|
parser.error('Unsupported TV Provider, use --ap-list-mso to get a list of supported TV Providers')
|
||||||
if opts.overwrites:
|
if opts.overwrites:
|
||||||
# --yes-overwrites implies --no-continue
|
# --yes-overwrites implies --no-continue
|
||||||
opts.continue_dl = False
|
opts.continue_dl = False
|
||||||
|
if opts.concurrent_fragment_downloads <= 0:
|
||||||
|
raise ValueError('Concurrent fragments must be positive')
|
||||||
|
|
||||||
def parse_retries(retries):
|
def parse_retries(retries, name=''):
|
||||||
if retries in ('inf', 'infinite'):
|
if retries in ('inf', 'infinite'):
|
||||||
parsed_retries = float('inf')
|
parsed_retries = float('inf')
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
parsed_retries = int(retries)
|
parsed_retries = int(retries)
|
||||||
except (TypeError, ValueError):
|
except (TypeError, ValueError):
|
||||||
parser.error('invalid retry count specified')
|
parser.error('invalid %sretry count specified' % name)
|
||||||
return parsed_retries
|
return parsed_retries
|
||||||
if opts.retries is not None:
|
if opts.retries is not None:
|
||||||
opts.retries = parse_retries(opts.retries)
|
opts.retries = parse_retries(opts.retries)
|
||||||
if opts.fragment_retries is not None:
|
if opts.fragment_retries is not None:
|
||||||
opts.fragment_retries = parse_retries(opts.fragment_retries)
|
opts.fragment_retries = parse_retries(opts.fragment_retries, 'fragment ')
|
||||||
|
if opts.extractor_retries is not None:
|
||||||
|
opts.extractor_retries = parse_retries(opts.extractor_retries, 'extractor ')
|
||||||
if opts.buffersize is not None:
|
if opts.buffersize is not None:
|
||||||
numeric_buffersize = FileDownloader.parse_bytes(opts.buffersize)
|
numeric_buffersize = FileDownloader.parse_bytes(opts.buffersize)
|
||||||
if numeric_buffersize is None:
|
if numeric_buffersize is None:
|
||||||
@@ -218,8 +228,11 @@ def _real_main(argv=None):
|
|||||||
if not re.match(remux_regex, opts.remuxvideo):
|
if not re.match(remux_regex, opts.remuxvideo):
|
||||||
parser.error('invalid video remux format specified')
|
parser.error('invalid video remux format specified')
|
||||||
if opts.convertsubtitles is not None:
|
if opts.convertsubtitles is not None:
|
||||||
if opts.convertsubtitles not in ['srt', 'vtt', 'ass', 'lrc']:
|
if opts.convertsubtitles not in ('srt', 'vtt', 'ass', 'lrc'):
|
||||||
parser.error('invalid subtitle format specified')
|
parser.error('invalid subtitle format specified')
|
||||||
|
if opts.convertthumbnails is not None:
|
||||||
|
if opts.convertthumbnails not in ('jpg', ):
|
||||||
|
parser.error('invalid thumbnail format specified')
|
||||||
|
|
||||||
if opts.date is not None:
|
if opts.date is not None:
|
||||||
date = DateRange.day(opts.date)
|
date = DateRange.day(opts.date)
|
||||||
@@ -262,11 +275,21 @@ def _real_main(argv=None):
|
|||||||
any_printing = opts.print_json
|
any_printing = opts.print_json
|
||||||
download_archive_fn = expand_path(opts.download_archive) if opts.download_archive is not None else opts.download_archive
|
download_archive_fn = expand_path(opts.download_archive) if opts.download_archive is not None else opts.download_archive
|
||||||
|
|
||||||
|
# If JSON is not printed anywhere, but comments are requested, save it to file
|
||||||
|
printing_json = opts.dumpjson or opts.print_json or opts.dump_single_json
|
||||||
|
if opts.getcomments and not printing_json:
|
||||||
|
opts.writeinfojson = True
|
||||||
|
|
||||||
def report_conflict(arg1, arg2):
|
def report_conflict(arg1, arg2):
|
||||||
write_string('WARNING: %s is ignored since %s was given\n' % (arg2, arg1), out=sys.stderr)
|
write_string('WARNING: %s is ignored since %s was given\n' % (arg2, arg1), out=sys.stderr)
|
||||||
|
|
||||||
if opts.remuxvideo and opts.recodevideo:
|
if opts.remuxvideo and opts.recodevideo:
|
||||||
report_conflict('--recode-video', '--remux-video')
|
report_conflict('--recode-video', '--remux-video')
|
||||||
opts.remuxvideo = False
|
opts.remuxvideo = False
|
||||||
|
if opts.sponskrub_cut and opts.split_chapters and opts.sponskrub is not False:
|
||||||
|
report_conflict('--split-chapter', '--sponskrub-cut')
|
||||||
|
opts.sponskrub_cut = False
|
||||||
|
|
||||||
if opts.allow_unplayable_formats:
|
if opts.allow_unplayable_formats:
|
||||||
if opts.extractaudio:
|
if opts.extractaudio:
|
||||||
report_conflict('--allow-unplayable-formats', '--extract-audio')
|
report_conflict('--allow-unplayable-formats', '--extract-audio')
|
||||||
@@ -302,7 +325,22 @@ def _real_main(argv=None):
|
|||||||
postprocessors.append({
|
postprocessors.append({
|
||||||
'key': 'MetadataFromField',
|
'key': 'MetadataFromField',
|
||||||
'formats': opts.metafromfield,
|
'formats': opts.metafromfield,
|
||||||
'when': 'beforedl'
|
# Run this immediately after extraction is complete
|
||||||
|
'when': 'pre_process'
|
||||||
|
})
|
||||||
|
if opts.convertsubtitles:
|
||||||
|
postprocessors.append({
|
||||||
|
'key': 'FFmpegSubtitlesConvertor',
|
||||||
|
'format': opts.convertsubtitles,
|
||||||
|
# Run this before the actual video download
|
||||||
|
'when': 'before_dl'
|
||||||
|
})
|
||||||
|
if opts.convertthumbnails:
|
||||||
|
postprocessors.append({
|
||||||
|
'key': 'FFmpegThumbnailsConvertor',
|
||||||
|
'format': opts.convertthumbnails,
|
||||||
|
# Run this before the actual video download
|
||||||
|
'when': 'before_dl'
|
||||||
})
|
})
|
||||||
if opts.extractaudio:
|
if opts.extractaudio:
|
||||||
postprocessors.append({
|
postprocessors.append({
|
||||||
@@ -331,15 +369,11 @@ def _real_main(argv=None):
|
|||||||
# so metadata can be added here.
|
# so metadata can be added here.
|
||||||
if opts.addmetadata:
|
if opts.addmetadata:
|
||||||
postprocessors.append({'key': 'FFmpegMetadata'})
|
postprocessors.append({'key': 'FFmpegMetadata'})
|
||||||
if opts.convertsubtitles:
|
|
||||||
postprocessors.append({
|
|
||||||
'key': 'FFmpegSubtitlesConvertor',
|
|
||||||
'format': opts.convertsubtitles,
|
|
||||||
})
|
|
||||||
if opts.embedsubtitles:
|
if opts.embedsubtitles:
|
||||||
already_have_subtitle = opts.writesubtitles
|
already_have_subtitle = opts.writesubtitles
|
||||||
postprocessors.append({
|
postprocessors.append({
|
||||||
'key': 'FFmpegEmbedSubtitle',
|
'key': 'FFmpegEmbedSubtitle',
|
||||||
|
# already_have_subtitle = True prevents the file from being deleted after embedding
|
||||||
'already_have_subtitle': already_have_subtitle
|
'already_have_subtitle': already_have_subtitle
|
||||||
})
|
})
|
||||||
if not already_have_subtitle:
|
if not already_have_subtitle:
|
||||||
@@ -348,19 +382,9 @@ def _real_main(argv=None):
|
|||||||
# this was the old behaviour if only --all-sub was given.
|
# this was the old behaviour if only --all-sub was given.
|
||||||
if opts.allsubtitles and not opts.writeautomaticsub:
|
if opts.allsubtitles and not opts.writeautomaticsub:
|
||||||
opts.writesubtitles = True
|
opts.writesubtitles = True
|
||||||
if opts.embedthumbnail:
|
# This should be above EmbedThumbnail since sponskrub removes the thumbnail attachment
|
||||||
already_have_thumbnail = opts.writethumbnail or opts.write_all_thumbnails
|
# but must be below EmbedSubtitle and FFmpegMetadata
|
||||||
postprocessors.append({
|
# See https://github.com/yt-dlp/yt-dlp/issues/204 , https://github.com/faissaloo/SponSkrub/issues/29
|
||||||
'key': 'EmbedThumbnail',
|
|
||||||
'already_have_thumbnail': already_have_thumbnail
|
|
||||||
})
|
|
||||||
if not already_have_thumbnail:
|
|
||||||
opts.writethumbnail = True
|
|
||||||
# XAttrMetadataPP should be run after post-processors that may change file
|
|
||||||
# contents
|
|
||||||
if opts.xattrs:
|
|
||||||
postprocessors.append({'key': 'XAttrMetadata'})
|
|
||||||
# This should be below all ffmpeg PP because it may cut parts out from the video
|
|
||||||
# If opts.sponskrub is None, sponskrub is used, but it silently fails if the executable can't be found
|
# If opts.sponskrub is None, sponskrub is used, but it silently fails if the executable can't be found
|
||||||
if opts.sponskrub is not False:
|
if opts.sponskrub is not False:
|
||||||
postprocessors.append({
|
postprocessors.append({
|
||||||
@@ -371,12 +395,27 @@ def _real_main(argv=None):
|
|||||||
'force': opts.sponskrub_force,
|
'force': opts.sponskrub_force,
|
||||||
'ignoreerror': opts.sponskrub is None,
|
'ignoreerror': opts.sponskrub is None,
|
||||||
})
|
})
|
||||||
|
if opts.embedthumbnail:
|
||||||
|
already_have_thumbnail = opts.writethumbnail or opts.write_all_thumbnails
|
||||||
|
postprocessors.append({
|
||||||
|
'key': 'EmbedThumbnail',
|
||||||
|
# already_have_thumbnail = True prevents the file from being deleted after embedding
|
||||||
|
'already_have_thumbnail': already_have_thumbnail
|
||||||
|
})
|
||||||
|
if not already_have_thumbnail:
|
||||||
|
opts.writethumbnail = True
|
||||||
|
if opts.split_chapters:
|
||||||
|
postprocessors.append({'key': 'FFmpegSplitChapters'})
|
||||||
|
# XAttrMetadataPP should be run after post-processors that may change file contents
|
||||||
|
if opts.xattrs:
|
||||||
|
postprocessors.append({'key': 'XAttrMetadata'})
|
||||||
# ExecAfterDownload must be the last PP
|
# ExecAfterDownload must be the last PP
|
||||||
if opts.exec_cmd:
|
if opts.exec_cmd:
|
||||||
postprocessors.append({
|
postprocessors.append({
|
||||||
'key': 'ExecAfterDownload',
|
'key': 'ExecAfterDownload',
|
||||||
'exec_cmd': opts.exec_cmd,
|
'exec_cmd': opts.exec_cmd,
|
||||||
'when': 'aftermove'
|
# Run this only after the files have been moved to their final locations
|
||||||
|
'when': 'after_move'
|
||||||
})
|
})
|
||||||
|
|
||||||
def report_args_compat(arg, name):
|
def report_args_compat(arg, name):
|
||||||
@@ -402,7 +441,6 @@ def _real_main(argv=None):
|
|||||||
else match_filter_func(opts.match_filter))
|
else match_filter_func(opts.match_filter))
|
||||||
|
|
||||||
ydl_opts = {
|
ydl_opts = {
|
||||||
'convertsubtitles': opts.convertsubtitles,
|
|
||||||
'usenetrc': opts.usenetrc,
|
'usenetrc': opts.usenetrc,
|
||||||
'username': opts.username,
|
'username': opts.username,
|
||||||
'password': opts.password,
|
'password': opts.password,
|
||||||
@@ -447,8 +485,10 @@ def _real_main(argv=None):
|
|||||||
'overwrites': opts.overwrites,
|
'overwrites': opts.overwrites,
|
||||||
'retries': opts.retries,
|
'retries': opts.retries,
|
||||||
'fragment_retries': opts.fragment_retries,
|
'fragment_retries': opts.fragment_retries,
|
||||||
|
'extractor_retries': opts.extractor_retries,
|
||||||
'skip_unavailable_fragments': opts.skip_unavailable_fragments,
|
'skip_unavailable_fragments': opts.skip_unavailable_fragments,
|
||||||
'keep_fragments': opts.keep_fragments,
|
'keep_fragments': opts.keep_fragments,
|
||||||
|
'concurrent_fragment_downloads': opts.concurrent_fragment_downloads,
|
||||||
'buffersize': opts.buffersize,
|
'buffersize': opts.buffersize,
|
||||||
'noresizebuffer': opts.noresizebuffer,
|
'noresizebuffer': opts.noresizebuffer,
|
||||||
'http_chunk_size': opts.http_chunk_size,
|
'http_chunk_size': opts.http_chunk_size,
|
||||||
@@ -466,8 +506,9 @@ def _real_main(argv=None):
|
|||||||
'updatetime': opts.updatetime,
|
'updatetime': opts.updatetime,
|
||||||
'writedescription': opts.writedescription,
|
'writedescription': opts.writedescription,
|
||||||
'writeannotations': opts.writeannotations,
|
'writeannotations': opts.writeannotations,
|
||||||
'writeinfojson': opts.writeinfojson or opts.getcomments,
|
'writeinfojson': opts.writeinfojson,
|
||||||
'allow_playlist_files': opts.allow_playlist_files,
|
'allow_playlist_files': opts.allow_playlist_files,
|
||||||
|
'clean_infojson': opts.clean_infojson,
|
||||||
'getcomments': opts.getcomments,
|
'getcomments': opts.getcomments,
|
||||||
'writethumbnail': opts.writethumbnail,
|
'writethumbnail': opts.writethumbnail,
|
||||||
'write_all_thumbnails': opts.write_all_thumbnails,
|
'write_all_thumbnails': opts.write_all_thumbnails,
|
||||||
@@ -524,6 +565,7 @@ def _real_main(argv=None):
|
|||||||
'fixup': opts.fixup,
|
'fixup': opts.fixup,
|
||||||
'source_address': opts.source_address,
|
'source_address': opts.source_address,
|
||||||
'call_home': opts.call_home,
|
'call_home': opts.call_home,
|
||||||
|
'sleep_interval_requests': opts.sleep_interval_requests,
|
||||||
'sleep_interval': opts.sleep_interval,
|
'sleep_interval': opts.sleep_interval,
|
||||||
'max_sleep_interval': opts.max_sleep_interval,
|
'max_sleep_interval': opts.max_sleep_interval,
|
||||||
'sleep_interval_subtitles': opts.sleep_interval_subtitles,
|
'sleep_interval_subtitles': opts.sleep_interval_subtitles,
|
||||||
@@ -541,7 +583,6 @@ def _real_main(argv=None):
|
|||||||
'postprocessor_args': opts.postprocessor_args,
|
'postprocessor_args': opts.postprocessor_args,
|
||||||
'cn_verification_proxy': opts.cn_verification_proxy,
|
'cn_verification_proxy': opts.cn_verification_proxy,
|
||||||
'geo_verification_proxy': opts.geo_verification_proxy,
|
'geo_verification_proxy': opts.geo_verification_proxy,
|
||||||
'config_location': opts.config_location,
|
|
||||||
'geo_bypass': opts.geo_bypass,
|
'geo_bypass': opts.geo_bypass,
|
||||||
'geo_bypass_country': opts.geo_bypass_country,
|
'geo_bypass_country': opts.geo_bypass_country,
|
||||||
'geo_bypass_ip_block': opts.geo_bypass_ip_block,
|
'geo_bypass_ip_block': opts.geo_bypass_ip_block,
|
||||||
|
|||||||
@@ -78,6 +78,15 @@ try:
|
|||||||
except ImportError: # Python 2
|
except ImportError: # Python 2
|
||||||
import Cookie as compat_cookies
|
import Cookie as compat_cookies
|
||||||
|
|
||||||
|
if sys.version_info[0] == 2:
|
||||||
|
class compat_cookies_SimpleCookie(compat_cookies.SimpleCookie):
|
||||||
|
def load(self, rawdata):
|
||||||
|
if isinstance(rawdata, compat_str):
|
||||||
|
rawdata = str(rawdata)
|
||||||
|
return super(compat_cookies_SimpleCookie, self).load(rawdata)
|
||||||
|
else:
|
||||||
|
compat_cookies_SimpleCookie = compat_cookies.SimpleCookie
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import html.entities as compat_html_entities
|
import html.entities as compat_html_entities
|
||||||
except ImportError: # Python 2
|
except ImportError: # Python 2
|
||||||
@@ -3020,6 +3029,7 @@ __all__ = [
|
|||||||
'compat_cookiejar',
|
'compat_cookiejar',
|
||||||
'compat_cookiejar_Cookie',
|
'compat_cookiejar_Cookie',
|
||||||
'compat_cookies',
|
'compat_cookies',
|
||||||
|
'compat_cookies_SimpleCookie',
|
||||||
'compat_ctypes_WINFUNCTYPE',
|
'compat_ctypes_WINFUNCTYPE',
|
||||||
'compat_etree_Element',
|
'compat_etree_Element',
|
||||||
'compat_etree_fromstring',
|
'compat_etree_fromstring',
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from ..compat import compat_str
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_protocol,
|
determine_protocol,
|
||||||
)
|
)
|
||||||
@@ -42,6 +43,23 @@ PROTOCOL_MAP = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def shorten_protocol_name(proto, simplify=False):
|
||||||
|
short_protocol_names = {
|
||||||
|
'm3u8_native': 'm3u8_n',
|
||||||
|
'http_dash_segments': 'dash',
|
||||||
|
'niconico_dmc': 'dmc',
|
||||||
|
}
|
||||||
|
if simplify:
|
||||||
|
short_protocol_names.update({
|
||||||
|
'https': 'http',
|
||||||
|
'ftps': 'ftp',
|
||||||
|
'm3u8_native': 'm3u8',
|
||||||
|
'm3u8_frag_urls': 'm3u8',
|
||||||
|
'dash_frag_urls': 'dash',
|
||||||
|
})
|
||||||
|
return short_protocol_names.get(proto, proto)
|
||||||
|
|
||||||
|
|
||||||
def get_suitable_downloader(info_dict, params={}, default=HttpFD):
|
def get_suitable_downloader(info_dict, params={}, default=HttpFD):
|
||||||
"""Get the downloader class that can handle the info dict."""
|
"""Get the downloader class that can handle the info dict."""
|
||||||
protocol = determine_protocol(info_dict)
|
protocol = determine_protocol(info_dict)
|
||||||
@@ -50,15 +68,23 @@ def get_suitable_downloader(info_dict, params={}, default=HttpFD):
|
|||||||
# if (info_dict.get('start_time') or info_dict.get('end_time')) and not info_dict.get('requested_formats') and FFmpegFD.can_download(info_dict):
|
# if (info_dict.get('start_time') or info_dict.get('end_time')) and not info_dict.get('requested_formats') and FFmpegFD.can_download(info_dict):
|
||||||
# return FFmpegFD
|
# return FFmpegFD
|
||||||
|
|
||||||
external_downloader = params.get('external_downloader')
|
downloaders = params.get('external_downloader')
|
||||||
if external_downloader is not None:
|
external_downloader = (
|
||||||
|
downloaders if isinstance(downloaders, compat_str) or downloaders is None
|
||||||
|
else downloaders.get(shorten_protocol_name(protocol, True), downloaders.get('default')))
|
||||||
|
if external_downloader and external_downloader.lower() == 'native':
|
||||||
|
external_downloader = 'native'
|
||||||
|
|
||||||
|
if external_downloader not in (None, 'native'):
|
||||||
ed = get_external_downloader(external_downloader)
|
ed = get_external_downloader(external_downloader)
|
||||||
if ed.can_download(info_dict):
|
if ed.can_download(info_dict, external_downloader):
|
||||||
return ed
|
return ed
|
||||||
|
|
||||||
if protocol.startswith('m3u8'):
|
if protocol.startswith('m3u8'):
|
||||||
if info_dict.get('is_live'):
|
if info_dict.get('is_live'):
|
||||||
return FFmpegFD
|
return FFmpegFD
|
||||||
|
elif external_downloader == 'native':
|
||||||
|
return HlsFD
|
||||||
elif _get_real_downloader(info_dict, 'frag_urls', params, None):
|
elif _get_real_downloader(info_dict, 'frag_urls', params, None):
|
||||||
return HlsFD
|
return HlsFD
|
||||||
elif params.get('hls_prefer_native') is True:
|
elif params.get('hls_prefer_native') is True:
|
||||||
@@ -70,6 +96,7 @@ def get_suitable_downloader(info_dict, params={}, default=HttpFD):
|
|||||||
|
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
'get_suitable_downloader',
|
|
||||||
'FileDownloader',
|
'FileDownloader',
|
||||||
|
'get_suitable_downloader',
|
||||||
|
'shorten_protocol_name',
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -326,6 +326,12 @@ class FileDownloader(object):
|
|||||||
"""Report it was impossible to resume download."""
|
"""Report it was impossible to resume download."""
|
||||||
self.to_screen('[download] Unable to resume')
|
self.to_screen('[download] Unable to resume')
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def supports_manifest(manifest):
|
||||||
|
""" Whether the downloader can download the fragments from the manifest.
|
||||||
|
Redefine in subclasses if needed. """
|
||||||
|
pass
|
||||||
|
|
||||||
def download(self, filename, info_dict, subtitle=False):
|
def download(self, filename, info_dict, subtitle=False):
|
||||||
"""Download to a filename using the info from info_dict
|
"""Download to a filename using the info from info_dict
|
||||||
Return True on success and False otherwise
|
Return True on success and False otherwise
|
||||||
|
|||||||
@@ -1,18 +1,26 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
try:
|
||||||
|
import concurrent.futures
|
||||||
|
can_threaded_download = True
|
||||||
|
except ImportError:
|
||||||
|
can_threaded_download = False
|
||||||
|
|
||||||
from ..downloader import _get_real_downloader
|
from ..downloader import _get_real_downloader
|
||||||
from .fragment import FragmentFD
|
from .fragment import FragmentFD
|
||||||
|
|
||||||
from ..compat import compat_urllib_error
|
from ..compat import compat_urllib_error
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
DownloadError,
|
DownloadError,
|
||||||
|
sanitize_open,
|
||||||
urljoin,
|
urljoin,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class DashSegmentsFD(FragmentFD):
|
class DashSegmentsFD(FragmentFD):
|
||||||
"""
|
"""
|
||||||
Download segments in a DASH manifest
|
Download segments in a DASH manifest. External downloaders can take over
|
||||||
|
the fragment downloads by supporting the 'dash_frag_urls' protocol
|
||||||
"""
|
"""
|
||||||
|
|
||||||
FD_NAME = 'dashsegments'
|
FD_NAME = 'dashsegments'
|
||||||
@@ -22,7 +30,7 @@ class DashSegmentsFD(FragmentFD):
|
|||||||
fragments = info_dict['fragments'][:1] if self.params.get(
|
fragments = info_dict['fragments'][:1] if self.params.get(
|
||||||
'test', False) else info_dict['fragments']
|
'test', False) else info_dict['fragments']
|
||||||
|
|
||||||
real_downloader = _get_real_downloader(info_dict, 'frag_urls', self.params, None)
|
real_downloader = _get_real_downloader(info_dict, 'dash_frag_urls', self.params, None)
|
||||||
|
|
||||||
ctx = {
|
ctx = {
|
||||||
'filename': filename,
|
'filename': filename,
|
||||||
@@ -37,7 +45,7 @@ class DashSegmentsFD(FragmentFD):
|
|||||||
fragment_retries = self.params.get('fragment_retries', 0)
|
fragment_retries = self.params.get('fragment_retries', 0)
|
||||||
skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
|
skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
|
||||||
|
|
||||||
fragment_urls = []
|
fragments_to_download = []
|
||||||
frag_index = 0
|
frag_index = 0
|
||||||
for i, fragment in enumerate(fragments):
|
for i, fragment in enumerate(fragments):
|
||||||
frag_index += 1
|
frag_index += 1
|
||||||
@@ -48,9 +56,31 @@ class DashSegmentsFD(FragmentFD):
|
|||||||
assert fragment_base_url
|
assert fragment_base_url
|
||||||
fragment_url = urljoin(fragment_base_url, fragment['path'])
|
fragment_url = urljoin(fragment_base_url, fragment['path'])
|
||||||
|
|
||||||
|
fragments_to_download.append({
|
||||||
|
'frag_index': frag_index,
|
||||||
|
'index': i,
|
||||||
|
'url': fragment_url,
|
||||||
|
})
|
||||||
|
|
||||||
if real_downloader:
|
if real_downloader:
|
||||||
fragment_urls.append(fragment_url)
|
self.to_screen(
|
||||||
continue
|
'[%s] Fragment downloads will be delegated to %s' % (self.FD_NAME, real_downloader.get_basename()))
|
||||||
|
info_copy = info_dict.copy()
|
||||||
|
info_copy['fragments'] = fragments_to_download
|
||||||
|
fd = real_downloader(self.ydl, self.params)
|
||||||
|
# TODO: Make progress updates work without hooking twice
|
||||||
|
# for ph in self._progress_hooks:
|
||||||
|
# fd.add_progress_hook(ph)
|
||||||
|
success = fd.real_download(filename, info_copy)
|
||||||
|
if not success:
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
def download_fragment(fragment):
|
||||||
|
i = fragment['index']
|
||||||
|
frag_index = fragment['frag_index']
|
||||||
|
fragment_url = fragment['url']
|
||||||
|
|
||||||
|
ctx['fragment_index'] = frag_index
|
||||||
|
|
||||||
# In DASH, the first segment contains necessary headers to
|
# In DASH, the first segment contains necessary headers to
|
||||||
# generate a valid MP4 file, so always abort for the first segment
|
# generate a valid MP4 file, so always abort for the first segment
|
||||||
@@ -60,8 +90,7 @@ class DashSegmentsFD(FragmentFD):
|
|||||||
try:
|
try:
|
||||||
success, frag_content = self._download_fragment(ctx, fragment_url, info_dict)
|
success, frag_content = self._download_fragment(ctx, fragment_url, info_dict)
|
||||||
if not success:
|
if not success:
|
||||||
return False
|
return False, frag_index
|
||||||
self._append_fragment(ctx, frag_content)
|
|
||||||
break
|
break
|
||||||
except compat_urllib_error.HTTPError as err:
|
except compat_urllib_error.HTTPError as err:
|
||||||
# YouTube may often return 404 HTTP error for a fragment causing the
|
# YouTube may often return 404 HTTP error for a fragment causing the
|
||||||
@@ -77,27 +106,73 @@ class DashSegmentsFD(FragmentFD):
|
|||||||
# Don't retry fragment if error occurred during HTTP downloading
|
# Don't retry fragment if error occurred during HTTP downloading
|
||||||
# itself since it has own retry settings
|
# itself since it has own retry settings
|
||||||
if not fatal:
|
if not fatal:
|
||||||
self.report_skip_fragment(frag_index)
|
|
||||||
break
|
break
|
||||||
raise
|
raise
|
||||||
|
|
||||||
if count > fragment_retries:
|
if count > fragment_retries:
|
||||||
if not fatal:
|
if not fatal:
|
||||||
self.report_skip_fragment(frag_index)
|
return False, frag_index
|
||||||
continue
|
self.report_error('Giving up after %s fragment retries' % fragment_retries)
|
||||||
self.report_error('giving up after %s fragment retries' % fragment_retries)
|
return False, frag_index
|
||||||
return False
|
|
||||||
|
|
||||||
if real_downloader:
|
return frag_content, frag_index
|
||||||
info_copy = info_dict.copy()
|
|
||||||
info_copy['url_list'] = fragment_urls
|
def append_fragment(frag_content, frag_index):
|
||||||
fd = real_downloader(self.ydl, self.params)
|
if frag_content:
|
||||||
# TODO: Make progress updates work without hooking twice
|
fragment_filename = '%s-Frag%d' % (ctx['tmpfilename'], frag_index)
|
||||||
# for ph in self._progress_hooks:
|
try:
|
||||||
# fd.add_progress_hook(ph)
|
file, frag_sanitized = sanitize_open(fragment_filename, 'rb')
|
||||||
success = fd.real_download(filename, info_copy)
|
ctx['fragment_filename_sanitized'] = frag_sanitized
|
||||||
if not success:
|
file.close()
|
||||||
|
self._append_fragment(ctx, frag_content)
|
||||||
|
return True
|
||||||
|
except FileNotFoundError:
|
||||||
|
if skip_unavailable_fragments:
|
||||||
|
self.report_skip_fragment(frag_index)
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
self.report_error(
|
||||||
|
'fragment %s not found, unable to continue' % frag_index)
|
||||||
return False
|
return False
|
||||||
else:
|
else:
|
||||||
|
if skip_unavailable_fragments:
|
||||||
|
self.report_skip_fragment(frag_index)
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
self.report_error(
|
||||||
|
'fragment %s not found, unable to continue' % frag_index)
|
||||||
|
return False
|
||||||
|
|
||||||
|
max_workers = self.params.get('concurrent_fragment_downloads', 1)
|
||||||
|
if can_threaded_download and max_workers > 1:
|
||||||
|
self.report_warning('The download speed shown is only of one thread. This is a known issue')
|
||||||
|
with concurrent.futures.ThreadPoolExecutor(max_workers) as pool:
|
||||||
|
futures = [pool.submit(download_fragment, fragment) for fragment in fragments_to_download]
|
||||||
|
# timeout must be 0 to return instantly
|
||||||
|
done, not_done = concurrent.futures.wait(futures, timeout=0)
|
||||||
|
try:
|
||||||
|
while not_done:
|
||||||
|
# Check every 1 second for KeyboardInterrupt
|
||||||
|
freshly_done, not_done = concurrent.futures.wait(not_done, timeout=1)
|
||||||
|
done |= freshly_done
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
for future in not_done:
|
||||||
|
future.cancel()
|
||||||
|
# timeout must be none to cancel
|
||||||
|
concurrent.futures.wait(not_done, timeout=None)
|
||||||
|
raise KeyboardInterrupt
|
||||||
|
results = [future.result() for future in futures]
|
||||||
|
|
||||||
|
for frag_content, frag_index in results:
|
||||||
|
result = append_fragment(frag_content, frag_index)
|
||||||
|
if not result:
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
for fragment in fragments_to_download:
|
||||||
|
frag_content, frag_index = download_fragment(fragment)
|
||||||
|
result = append_fragment(frag_content, frag_index)
|
||||||
|
if not result:
|
||||||
|
return False
|
||||||
|
|
||||||
self._finish_frag_download(ctx)
|
self._finish_frag_download(ctx)
|
||||||
return True
|
return True
|
||||||
|
|||||||
@@ -24,7 +24,6 @@ from ..utils import (
|
|||||||
cli_bool_option,
|
cli_bool_option,
|
||||||
cli_configuration_args,
|
cli_configuration_args,
|
||||||
encodeFilename,
|
encodeFilename,
|
||||||
error_to_compat_str,
|
|
||||||
encodeArgument,
|
encodeArgument,
|
||||||
handle_youtubedl_headers,
|
handle_youtubedl_headers,
|
||||||
check_executable,
|
check_executable,
|
||||||
@@ -82,19 +81,23 @@ class ExternalFD(FileDownloader):
|
|||||||
|
|
||||||
@property
|
@property
|
||||||
def exe(self):
|
def exe(self):
|
||||||
return self.params.get('external_downloader')
|
return self.get_basename()
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def available(cls):
|
def available(cls, path=None):
|
||||||
return check_executable(cls.get_basename(), [cls.AVAILABLE_OPT])
|
path = check_executable(path or cls.get_basename(), [cls.AVAILABLE_OPT])
|
||||||
|
if path:
|
||||||
|
cls.exe = path
|
||||||
|
return path
|
||||||
|
return False
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def supports(cls, info_dict):
|
def supports(cls, info_dict):
|
||||||
return info_dict['protocol'] in cls.SUPPORTED_PROTOCOLS
|
return info_dict['protocol'] in cls.SUPPORTED_PROTOCOLS
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def can_download(cls, info_dict):
|
def can_download(cls, info_dict, path=None):
|
||||||
return cls.available() and cls.supports(info_dict)
|
return cls.available(path) and cls.supports(info_dict)
|
||||||
|
|
||||||
def _option(self, command_option, param):
|
def _option(self, command_option, param):
|
||||||
return cli_option(self.params, command_option, param)
|
return cli_option(self.params, command_option, param)
|
||||||
@@ -108,7 +111,8 @@ class ExternalFD(FileDownloader):
|
|||||||
def _configuration_args(self, *args, **kwargs):
|
def _configuration_args(self, *args, **kwargs):
|
||||||
return cli_configuration_args(
|
return cli_configuration_args(
|
||||||
self.params.get('external_downloader_args'),
|
self.params.get('external_downloader_args'),
|
||||||
self.get_basename(), *args, **kwargs)
|
[self.get_basename(), 'default'],
|
||||||
|
*args, **kwargs)
|
||||||
|
|
||||||
def _call_downloader(self, tmpfilename, info_dict):
|
def _call_downloader(self, tmpfilename, info_dict):
|
||||||
""" Either overwrite this or implement _make_cmd """
|
""" Either overwrite this or implement _make_cmd """
|
||||||
@@ -116,24 +120,43 @@ class ExternalFD(FileDownloader):
|
|||||||
|
|
||||||
self._debug_cmd(cmd)
|
self._debug_cmd(cmd)
|
||||||
|
|
||||||
|
if 'fragments' in info_dict:
|
||||||
|
fragment_retries = self.params.get('fragment_retries', 0)
|
||||||
|
skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
|
||||||
|
|
||||||
|
count = 0
|
||||||
|
while count <= fragment_retries:
|
||||||
p = subprocess.Popen(
|
p = subprocess.Popen(
|
||||||
cmd, stderr=subprocess.PIPE)
|
cmd, stderr=subprocess.PIPE)
|
||||||
_, stderr = process_communicate_or_kill(p)
|
_, stderr = process_communicate_or_kill(p)
|
||||||
if p.returncode != 0:
|
if p.returncode == 0:
|
||||||
|
break
|
||||||
|
# TODO: Decide whether to retry based on error code
|
||||||
|
# https://aria2.github.io/manual/en/html/aria2c.html#exit-status
|
||||||
self.to_stderr(stderr.decode('utf-8', 'replace'))
|
self.to_stderr(stderr.decode('utf-8', 'replace'))
|
||||||
|
count += 1
|
||||||
|
if count <= fragment_retries:
|
||||||
|
self.to_screen(
|
||||||
|
'[%s] Got error. Retrying fragments (attempt %d of %s)...'
|
||||||
|
% (self.get_basename(), count, self.format_retries(fragment_retries)))
|
||||||
|
if count > fragment_retries:
|
||||||
|
if not skip_unavailable_fragments:
|
||||||
|
self.report_error('Giving up after %s fragment retries' % fragment_retries)
|
||||||
|
return -1
|
||||||
|
|
||||||
if 'url_list' in info_dict:
|
|
||||||
file_list = []
|
|
||||||
for [i, url] in enumerate(info_dict['url_list']):
|
|
||||||
tmpsegmentname = '%s_%s.frag' % (tmpfilename, i)
|
|
||||||
file_list.append(tmpsegmentname)
|
|
||||||
key_list = info_dict.get('key_list')
|
|
||||||
decrypt_info = None
|
|
||||||
dest, _ = sanitize_open(tmpfilename, 'wb')
|
dest, _ = sanitize_open(tmpfilename, 'wb')
|
||||||
for i, file in enumerate(file_list):
|
for frag_index, fragment in enumerate(info_dict['fragments']):
|
||||||
src, _ = sanitize_open(file, 'rb')
|
fragment_filename = '%s-Frag%d' % (tmpfilename, frag_index)
|
||||||
if key_list:
|
try:
|
||||||
decrypt_info = next((x for x in key_list if x['INDEX'] == i), decrypt_info)
|
src, _ = sanitize_open(fragment_filename, 'rb')
|
||||||
|
except IOError:
|
||||||
|
if skip_unavailable_fragments and frag_index > 1:
|
||||||
|
self.to_screen('[%s] Skipping fragment %d ...' % (self.get_basename(), frag_index))
|
||||||
|
continue
|
||||||
|
self.report_error('Unable to open fragment %d' % frag_index)
|
||||||
|
return -1
|
||||||
|
decrypt_info = fragment.get('decrypt_info')
|
||||||
|
if decrypt_info:
|
||||||
if decrypt_info['METHOD'] == 'AES-128':
|
if decrypt_info['METHOD'] == 'AES-128':
|
||||||
iv = decrypt_info.get('IV')
|
iv = decrypt_info.get('IV')
|
||||||
decrypt_info['KEY'] = decrypt_info.get('KEY') or self.ydl.urlopen(
|
decrypt_info['KEY'] = decrypt_info.get('KEY') or self.ydl.urlopen(
|
||||||
@@ -149,19 +172,16 @@ class ExternalFD(FileDownloader):
|
|||||||
fragment_data = src.read()
|
fragment_data = src.read()
|
||||||
dest.write(fragment_data)
|
dest.write(fragment_data)
|
||||||
src.close()
|
src.close()
|
||||||
dest.close()
|
|
||||||
if not self.params.get('keep_fragments', False):
|
if not self.params.get('keep_fragments', False):
|
||||||
for file_path in file_list:
|
os.remove(encodeFilename(fragment_filename))
|
||||||
try:
|
dest.close()
|
||||||
os.remove(file_path)
|
os.remove(encodeFilename('%s.frag.urls' % tmpfilename))
|
||||||
except OSError as ose:
|
else:
|
||||||
self.report_error("Unable to delete file %s; %s" % (file_path, error_to_compat_str(ose)))
|
p = subprocess.Popen(
|
||||||
try:
|
cmd, stderr=subprocess.PIPE)
|
||||||
file_path = '%s.frag.urls' % tmpfilename
|
_, stderr = process_communicate_or_kill(p)
|
||||||
os.remove(file_path)
|
if p.returncode != 0:
|
||||||
except OSError as ose:
|
self.to_stderr(stderr.decode('utf-8', 'replace'))
|
||||||
self.report_error("Unable to delete file %s; %s" % (file_path, error_to_compat_str(ose)))
|
|
||||||
|
|
||||||
return p.returncode
|
return p.returncode
|
||||||
|
|
||||||
def _prepare_url(self, info_dict, url):
|
def _prepare_url(self, info_dict, url):
|
||||||
@@ -243,17 +263,24 @@ class WgetFD(ExternalFD):
|
|||||||
|
|
||||||
class Aria2cFD(ExternalFD):
|
class Aria2cFD(ExternalFD):
|
||||||
AVAILABLE_OPT = '-v'
|
AVAILABLE_OPT = '-v'
|
||||||
SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps', 'frag_urls')
|
SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps', 'dash_frag_urls', 'm3u8_frag_urls')
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def supports_manifest(manifest):
|
||||||
|
UNSUPPORTED_FEATURES = [
|
||||||
|
r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [1]
|
||||||
|
# 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2
|
||||||
|
]
|
||||||
|
check_results = (not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES)
|
||||||
|
return all(check_results)
|
||||||
|
|
||||||
def _make_cmd(self, tmpfilename, info_dict):
|
def _make_cmd(self, tmpfilename, info_dict):
|
||||||
cmd = [self.exe, '-c']
|
cmd = [self.exe, '-c',
|
||||||
dn = os.path.dirname(tmpfilename)
|
'--console-log-level=warn', '--summary-interval=0', '--download-result=hide',
|
||||||
if 'url_list' not in info_dict:
|
'--file-allocation=none', '-x16', '-j16', '-s16']
|
||||||
cmd += ['--out', os.path.basename(tmpfilename)]
|
if 'fragments' in info_dict:
|
||||||
verbose_level_args = ['--console-log-level=warn', '--summary-interval=0']
|
cmd += ['--allow-overwrite=true', '--allow-piece-length-change=true']
|
||||||
cmd += self._configuration_args(['--file-allocation=none', '-x16', '-j16', '-s16'] + verbose_level_args)
|
|
||||||
if dn:
|
|
||||||
cmd += ['--dir', dn]
|
|
||||||
if info_dict.get('http_headers') is not None:
|
if info_dict.get('http_headers') is not None:
|
||||||
for key, val in info_dict['http_headers'].items():
|
for key, val in info_dict['http_headers'].items():
|
||||||
cmd += ['--header', '%s: %s' % (key, val)]
|
cmd += ['--header', '%s: %s' % (key, val)]
|
||||||
@@ -261,19 +288,25 @@ class Aria2cFD(ExternalFD):
|
|||||||
cmd += self._option('--all-proxy', 'proxy')
|
cmd += self._option('--all-proxy', 'proxy')
|
||||||
cmd += self._bool_option('--check-certificate', 'nocheckcertificate', 'false', 'true', '=')
|
cmd += self._bool_option('--check-certificate', 'nocheckcertificate', 'false', 'true', '=')
|
||||||
cmd += self._bool_option('--remote-time', 'updatetime', 'true', 'false', '=')
|
cmd += self._bool_option('--remote-time', 'updatetime', 'true', 'false', '=')
|
||||||
|
cmd += self._configuration_args()
|
||||||
|
|
||||||
|
dn = os.path.dirname(tmpfilename)
|
||||||
|
if dn:
|
||||||
|
cmd += ['--dir', dn]
|
||||||
|
if 'fragments' not in info_dict:
|
||||||
|
cmd += ['--out', os.path.basename(tmpfilename)]
|
||||||
cmd += ['--auto-file-renaming=false']
|
cmd += ['--auto-file-renaming=false']
|
||||||
if 'url_list' in info_dict:
|
|
||||||
cmd += verbose_level_args
|
if 'fragments' in info_dict:
|
||||||
cmd += ['--uri-selector', 'inorder', '--download-result=hide']
|
cmd += ['--file-allocation=none', '--uri-selector=inorder']
|
||||||
url_list_file = '%s.frag.urls' % tmpfilename
|
url_list_file = '%s.frag.urls' % tmpfilename
|
||||||
url_list = []
|
url_list = []
|
||||||
for [i, url] in enumerate(info_dict['url_list']):
|
for frag_index, fragment in enumerate(info_dict['fragments']):
|
||||||
tmpsegmentname = '%s_%s.frag' % (os.path.basename(tmpfilename), i)
|
fragment_filename = '%s-Frag%d' % (os.path.basename(tmpfilename), frag_index)
|
||||||
url_list.append('%s\n\tout=%s' % (url, tmpsegmentname))
|
url_list.append('%s\n\tout=%s' % (fragment['url'], fragment_filename))
|
||||||
stream, _ = sanitize_open(url_list_file, 'wb')
|
stream, _ = sanitize_open(url_list_file, 'wb')
|
||||||
stream.write('\n'.join(url_list).encode('utf-8'))
|
stream.write('\n'.join(url_list).encode('utf-8'))
|
||||||
stream.close()
|
stream.close()
|
||||||
|
|
||||||
cmd += ['-i', url_list_file]
|
cmd += ['-i', url_list_file]
|
||||||
else:
|
else:
|
||||||
cmd += ['--', info_dict['url']]
|
cmd += ['--', info_dict['url']]
|
||||||
@@ -281,9 +314,11 @@ class Aria2cFD(ExternalFD):
|
|||||||
|
|
||||||
|
|
||||||
class HttpieFD(ExternalFD):
|
class HttpieFD(ExternalFD):
|
||||||
|
AVAILABLE_OPT = '--version'
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def available(cls):
|
def available(cls, path=None):
|
||||||
return check_executable('http', ['--version'])
|
return ExternalFD.available(cls, path or 'http')
|
||||||
|
|
||||||
def _make_cmd(self, tmpfilename, info_dict):
|
def _make_cmd(self, tmpfilename, info_dict):
|
||||||
cmd = ['http', '--download', '--output', tmpfilename, info_dict['url']]
|
cmd = ['http', '--download', '--output', tmpfilename, info_dict['url']]
|
||||||
@@ -298,7 +333,8 @@ class FFmpegFD(ExternalFD):
|
|||||||
SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps', 'm3u8', 'rtsp', 'rtmp', 'mms')
|
SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps', 'm3u8', 'rtsp', 'rtmp', 'mms')
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def available(cls):
|
def available(cls, path=None):
|
||||||
|
# TODO: Fix path for ffmpeg
|
||||||
return FFmpegPostProcessor().available
|
return FFmpegPostProcessor().available
|
||||||
|
|
||||||
def _call_downloader(self, tmpfilename, info_dict):
|
def _call_downloader(self, tmpfilename, info_dict):
|
||||||
@@ -398,7 +434,10 @@ class FFmpegFD(ExternalFD):
|
|||||||
args += ['-fs', compat_str(self._TEST_FILE_SIZE)]
|
args += ['-fs', compat_str(self._TEST_FILE_SIZE)]
|
||||||
|
|
||||||
if protocol in ('m3u8', 'm3u8_native'):
|
if protocol in ('m3u8', 'm3u8_native'):
|
||||||
if self.params.get('hls_use_mpegts', False) or tmpfilename == '-':
|
use_mpegts = (tmpfilename == '-') or self.params.get('hls_use_mpegts')
|
||||||
|
if use_mpegts is None:
|
||||||
|
use_mpegts = info_dict.get('is_live')
|
||||||
|
if use_mpegts:
|
||||||
args += ['-f', 'mpegts']
|
args += ['-f', 'mpegts']
|
||||||
else:
|
else:
|
||||||
args += ['-f', 'mp4']
|
args += ['-f', 'mp4']
|
||||||
@@ -452,4 +491,4 @@ def get_external_downloader(external_downloader):
|
|||||||
downloader . """
|
downloader . """
|
||||||
# Drop .exe extension on Windows
|
# Drop .exe extension on Windows
|
||||||
bn = os.path.splitext(os.path.basename(external_downloader))[0]
|
bn = os.path.splitext(os.path.basename(external_downloader))[0]
|
||||||
return _BY_NAME[bn]
|
return _BY_NAME.get(bn)
|
||||||
|
|||||||
@@ -7,6 +7,11 @@ try:
|
|||||||
can_decrypt_frag = True
|
can_decrypt_frag = True
|
||||||
except ImportError:
|
except ImportError:
|
||||||
can_decrypt_frag = False
|
can_decrypt_frag = False
|
||||||
|
try:
|
||||||
|
import concurrent.futures
|
||||||
|
can_threaded_download = True
|
||||||
|
except ImportError:
|
||||||
|
can_threaded_download = False
|
||||||
|
|
||||||
from ..downloader import _get_real_downloader
|
from ..downloader import _get_real_downloader
|
||||||
from .fragment import FragmentFD
|
from .fragment import FragmentFD
|
||||||
@@ -19,12 +24,17 @@ from ..compat import (
|
|||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
parse_m3u8_attributes,
|
parse_m3u8_attributes,
|
||||||
|
sanitize_open,
|
||||||
update_url_query,
|
update_url_query,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class HlsFD(FragmentFD):
|
class HlsFD(FragmentFD):
|
||||||
""" A limited implementation that does not require ffmpeg """
|
"""
|
||||||
|
Download segments in a m3u8 manifest. External downloaders can take over
|
||||||
|
the fragment downloads by supporting the 'm3u8_frag_urls' protocol and
|
||||||
|
re-defining 'supports_manifest' function
|
||||||
|
"""
|
||||||
|
|
||||||
FD_NAME = 'hlsnative'
|
FD_NAME = 'hlsnative'
|
||||||
|
|
||||||
@@ -53,12 +63,15 @@ class HlsFD(FragmentFD):
|
|||||||
UNSUPPORTED_FEATURES += [
|
UNSUPPORTED_FEATURES += [
|
||||||
r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)', # encrypted streams [1]
|
r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)', # encrypted streams [1]
|
||||||
]
|
]
|
||||||
check_results = [not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES]
|
|
||||||
|
def check_results():
|
||||||
|
yield not info_dict.get('is_live')
|
||||||
is_aes128_enc = '#EXT-X-KEY:METHOD=AES-128' in manifest
|
is_aes128_enc = '#EXT-X-KEY:METHOD=AES-128' in manifest
|
||||||
check_results.append(with_crypto or not is_aes128_enc)
|
yield with_crypto or not is_aes128_enc
|
||||||
check_results.append(not (is_aes128_enc and r'#EXT-X-BYTERANGE' in manifest))
|
yield not (is_aes128_enc and r'#EXT-X-BYTERANGE' in manifest)
|
||||||
check_results.append(not info_dict.get('is_live'))
|
for feature in UNSUPPORTED_FEATURES:
|
||||||
return all(check_results)
|
yield not re.search(feature, manifest)
|
||||||
|
return all(check_results())
|
||||||
|
|
||||||
def real_download(self, filename, info_dict):
|
def real_download(self, filename, info_dict):
|
||||||
man_url = info_dict['url']
|
man_url = info_dict['url']
|
||||||
@@ -70,20 +83,24 @@ class HlsFD(FragmentFD):
|
|||||||
|
|
||||||
if not self.can_download(s, info_dict, self.params.get('allow_unplayable_formats')):
|
if not self.can_download(s, info_dict, self.params.get('allow_unplayable_formats')):
|
||||||
if info_dict.get('extra_param_to_segment_url') or info_dict.get('_decryption_key_url'):
|
if info_dict.get('extra_param_to_segment_url') or info_dict.get('_decryption_key_url'):
|
||||||
self.report_error('pycryptodome not found. Please install it.')
|
self.report_error('pycryptodome not found. Please install')
|
||||||
return False
|
return False
|
||||||
if self.can_download(s, info_dict, with_crypto=True):
|
if self.can_download(s, info_dict, with_crypto=True):
|
||||||
self.report_warning('pycryptodome is needed to download this file with hlsnative')
|
self.report_warning('pycryptodome is needed to download this file natively')
|
||||||
self.report_warning(
|
|
||||||
'hlsnative has detected features it does not support, '
|
|
||||||
'extraction will be delegated to ffmpeg')
|
|
||||||
fd = FFmpegFD(self.ydl, self.params)
|
fd = FFmpegFD(self.ydl, self.params)
|
||||||
|
self.report_warning(
|
||||||
|
'%s detected unsupported features; extraction will be delegated to %s' % (self.FD_NAME, fd.get_basename()))
|
||||||
# TODO: Make progress updates work without hooking twice
|
# TODO: Make progress updates work without hooking twice
|
||||||
# for ph in self._progress_hooks:
|
# for ph in self._progress_hooks:
|
||||||
# fd.add_progress_hook(ph)
|
# fd.add_progress_hook(ph)
|
||||||
return fd.real_download(filename, info_dict)
|
return fd.real_download(filename, info_dict)
|
||||||
|
|
||||||
real_downloader = _get_real_downloader(info_dict, 'frag_urls', self.params, None)
|
real_downloader = _get_real_downloader(info_dict, 'm3u8_frag_urls', self.params, None)
|
||||||
|
if real_downloader and not real_downloader.supports_manifest(s):
|
||||||
|
real_downloader = None
|
||||||
|
if real_downloader:
|
||||||
|
self.to_screen(
|
||||||
|
'[%s] Fragment downloads will be delegated to %s' % (self.FD_NAME, real_downloader.get_basename()))
|
||||||
|
|
||||||
def is_ad_fragment_start(s):
|
def is_ad_fragment_start(s):
|
||||||
return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s
|
return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s
|
||||||
@@ -93,7 +110,7 @@ class HlsFD(FragmentFD):
|
|||||||
return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=master' in s
|
return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=master' in s
|
||||||
or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',segment'))
|
or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',segment'))
|
||||||
|
|
||||||
fragment_urls = []
|
fragments = []
|
||||||
|
|
||||||
media_frags = 0
|
media_frags = 0
|
||||||
ad_frags = 0
|
ad_frags = 0
|
||||||
@@ -136,14 +153,12 @@ class HlsFD(FragmentFD):
|
|||||||
i = 0
|
i = 0
|
||||||
media_sequence = 0
|
media_sequence = 0
|
||||||
decrypt_info = {'METHOD': 'NONE'}
|
decrypt_info = {'METHOD': 'NONE'}
|
||||||
key_list = []
|
|
||||||
byte_range = {}
|
byte_range = {}
|
||||||
discontinuity_count = 0
|
discontinuity_count = 0
|
||||||
frag_index = 0
|
frag_index = 0
|
||||||
ad_frag_next = False
|
ad_frag_next = False
|
||||||
for line in s.splitlines():
|
for line in s.splitlines():
|
||||||
line = line.strip()
|
line = line.strip()
|
||||||
download_frag = False
|
|
||||||
if line:
|
if line:
|
||||||
if not line.startswith('#'):
|
if not line.startswith('#'):
|
||||||
if format_index and discontinuity_count != format_index:
|
if format_index and discontinuity_count != format_index:
|
||||||
@@ -160,17 +175,20 @@ class HlsFD(FragmentFD):
|
|||||||
if extra_query:
|
if extra_query:
|
||||||
frag_url = update_url_query(frag_url, extra_query)
|
frag_url = update_url_query(frag_url, extra_query)
|
||||||
|
|
||||||
if real_downloader:
|
fragments.append({
|
||||||
fragment_urls.append(frag_url)
|
'frag_index': frag_index,
|
||||||
continue
|
'url': frag_url,
|
||||||
download_frag = True
|
'decrypt_info': decrypt_info,
|
||||||
|
'byte_range': byte_range,
|
||||||
|
'media_sequence': media_sequence,
|
||||||
|
})
|
||||||
|
|
||||||
elif line.startswith('#EXT-X-MAP'):
|
elif line.startswith('#EXT-X-MAP'):
|
||||||
if format_index and discontinuity_count != format_index:
|
if format_index and discontinuity_count != format_index:
|
||||||
continue
|
continue
|
||||||
if frag_index > 0:
|
if frag_index > 0:
|
||||||
self.report_error(
|
self.report_error(
|
||||||
'initialization fragment found after media fragments, unable to download')
|
'Initialization fragment found after media fragments, unable to download')
|
||||||
return False
|
return False
|
||||||
frag_index += 1
|
frag_index += 1
|
||||||
map_info = parse_m3u8_attributes(line[11:])
|
map_info = parse_m3u8_attributes(line[11:])
|
||||||
@@ -180,9 +198,14 @@ class HlsFD(FragmentFD):
|
|||||||
else compat_urlparse.urljoin(man_url, map_info.get('URI')))
|
else compat_urlparse.urljoin(man_url, map_info.get('URI')))
|
||||||
if extra_query:
|
if extra_query:
|
||||||
frag_url = update_url_query(frag_url, extra_query)
|
frag_url = update_url_query(frag_url, extra_query)
|
||||||
if real_downloader:
|
|
||||||
fragment_urls.append(frag_url)
|
fragments.append({
|
||||||
continue
|
'frag_index': frag_index,
|
||||||
|
'url': frag_url,
|
||||||
|
'decrypt_info': decrypt_info,
|
||||||
|
'byte_range': byte_range,
|
||||||
|
'media_sequence': media_sequence
|
||||||
|
})
|
||||||
|
|
||||||
if map_info.get('BYTERANGE'):
|
if map_info.get('BYTERANGE'):
|
||||||
splitted_byte_range = map_info.get('BYTERANGE').split('@')
|
splitted_byte_range = map_info.get('BYTERANGE').split('@')
|
||||||
@@ -191,7 +214,6 @@ class HlsFD(FragmentFD):
|
|||||||
'start': sub_range_start,
|
'start': sub_range_start,
|
||||||
'end': sub_range_start + int(splitted_byte_range[0]),
|
'end': sub_range_start + int(splitted_byte_range[0]),
|
||||||
}
|
}
|
||||||
download_frag = True
|
|
||||||
|
|
||||||
elif line.startswith('#EXT-X-KEY'):
|
elif line.startswith('#EXT-X-KEY'):
|
||||||
decrypt_url = decrypt_info.get('URI')
|
decrypt_url = decrypt_info.get('URI')
|
||||||
@@ -206,9 +228,6 @@ class HlsFD(FragmentFD):
|
|||||||
decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_query)
|
decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_query)
|
||||||
if decrypt_url != decrypt_info['URI']:
|
if decrypt_url != decrypt_info['URI']:
|
||||||
decrypt_info['KEY'] = None
|
decrypt_info['KEY'] = None
|
||||||
key_data = decrypt_info.copy()
|
|
||||||
key_data['INDEX'] = frag_index
|
|
||||||
key_list.append(key_data)
|
|
||||||
|
|
||||||
elif line.startswith('#EXT-X-MEDIA-SEQUENCE'):
|
elif line.startswith('#EXT-X-MEDIA-SEQUENCE'):
|
||||||
media_sequence = int(line[22:])
|
media_sequence = int(line[22:])
|
||||||
@@ -225,8 +244,33 @@ class HlsFD(FragmentFD):
|
|||||||
ad_frag_next = False
|
ad_frag_next = False
|
||||||
elif line.startswith('#EXT-X-DISCONTINUITY'):
|
elif line.startswith('#EXT-X-DISCONTINUITY'):
|
||||||
discontinuity_count += 1
|
discontinuity_count += 1
|
||||||
|
i += 1
|
||||||
|
media_sequence += 1
|
||||||
|
|
||||||
|
# We only download the first fragment during the test
|
||||||
|
if test:
|
||||||
|
fragments = [fragments[0] if fragments else None]
|
||||||
|
|
||||||
|
if real_downloader:
|
||||||
|
info_copy = info_dict.copy()
|
||||||
|
info_copy['fragments'] = fragments
|
||||||
|
fd = real_downloader(self.ydl, self.params)
|
||||||
|
# TODO: Make progress updates work without hooking twice
|
||||||
|
# for ph in self._progress_hooks:
|
||||||
|
# fd.add_progress_hook(ph)
|
||||||
|
success = fd.real_download(filename, info_copy)
|
||||||
|
if not success:
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
def download_fragment(fragment):
|
||||||
|
frag_index = fragment['frag_index']
|
||||||
|
frag_url = fragment['url']
|
||||||
|
decrypt_info = fragment['decrypt_info']
|
||||||
|
byte_range = fragment['byte_range']
|
||||||
|
media_sequence = fragment['media_sequence']
|
||||||
|
|
||||||
|
ctx['fragment_index'] = frag_index
|
||||||
|
|
||||||
if download_frag:
|
|
||||||
count = 0
|
count = 0
|
||||||
headers = info_dict.get('http_headers', {})
|
headers = info_dict.get('http_headers', {})
|
||||||
if byte_range:
|
if byte_range:
|
||||||
@@ -236,7 +280,7 @@ class HlsFD(FragmentFD):
|
|||||||
success, frag_content = self._download_fragment(
|
success, frag_content = self._download_fragment(
|
||||||
ctx, frag_url, info_dict, headers)
|
ctx, frag_url, info_dict, headers)
|
||||||
if not success:
|
if not success:
|
||||||
return False
|
return False, frag_index
|
||||||
break
|
break
|
||||||
except compat_urllib_error.HTTPError as err:
|
except compat_urllib_error.HTTPError as err:
|
||||||
# Unavailable (possibly temporary) fragments may be served.
|
# Unavailable (possibly temporary) fragments may be served.
|
||||||
@@ -247,14 +291,8 @@ class HlsFD(FragmentFD):
|
|||||||
if count <= fragment_retries:
|
if count <= fragment_retries:
|
||||||
self.report_retry_fragment(err, frag_index, count, fragment_retries)
|
self.report_retry_fragment(err, frag_index, count, fragment_retries)
|
||||||
if count > fragment_retries:
|
if count > fragment_retries:
|
||||||
if skip_unavailable_fragments:
|
self.report_error('Giving up after %s fragment retries' % fragment_retries)
|
||||||
i += 1
|
return False, frag_index
|
||||||
media_sequence += 1
|
|
||||||
self.report_skip_fragment(frag_index)
|
|
||||||
continue
|
|
||||||
self.report_error(
|
|
||||||
'giving up after %s fragment retries' % fragment_retries)
|
|
||||||
return False
|
|
||||||
|
|
||||||
if decrypt_info['METHOD'] == 'AES-128':
|
if decrypt_info['METHOD'] == 'AES-128':
|
||||||
iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', media_sequence)
|
iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', media_sequence)
|
||||||
@@ -266,24 +304,65 @@ class HlsFD(FragmentFD):
|
|||||||
if not test:
|
if not test:
|
||||||
frag_content = AES.new(
|
frag_content = AES.new(
|
||||||
decrypt_info['KEY'], AES.MODE_CBC, iv).decrypt(frag_content)
|
decrypt_info['KEY'], AES.MODE_CBC, iv).decrypt(frag_content)
|
||||||
self._append_fragment(ctx, frag_content)
|
|
||||||
# We only download the first fragment during the test
|
|
||||||
if test:
|
|
||||||
break
|
|
||||||
i += 1
|
|
||||||
media_sequence += 1
|
|
||||||
|
|
||||||
if real_downloader:
|
return frag_content, frag_index
|
||||||
info_copy = info_dict.copy()
|
|
||||||
info_copy['url_list'] = fragment_urls
|
def append_fragment(frag_content, frag_index):
|
||||||
info_copy['key_list'] = key_list
|
if frag_content:
|
||||||
fd = real_downloader(self.ydl, self.params)
|
fragment_filename = '%s-Frag%d' % (ctx['tmpfilename'], frag_index)
|
||||||
# TODO: Make progress updates work without hooking twice
|
try:
|
||||||
# for ph in self._progress_hooks:
|
file, frag_sanitized = sanitize_open(fragment_filename, 'rb')
|
||||||
# fd.add_progress_hook(ph)
|
ctx['fragment_filename_sanitized'] = frag_sanitized
|
||||||
success = fd.real_download(filename, info_copy)
|
file.close()
|
||||||
if not success:
|
self._append_fragment(ctx, frag_content)
|
||||||
|
return True
|
||||||
|
except FileNotFoundError:
|
||||||
|
if skip_unavailable_fragments:
|
||||||
|
self.report_skip_fragment(frag_index)
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
self.report_error(
|
||||||
|
'fragment %s not found, unable to continue' % frag_index)
|
||||||
return False
|
return False
|
||||||
else:
|
else:
|
||||||
|
if skip_unavailable_fragments:
|
||||||
|
self.report_skip_fragment(frag_index)
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
self.report_error(
|
||||||
|
'fragment %s not found, unable to continue' % frag_index)
|
||||||
|
return False
|
||||||
|
|
||||||
|
max_workers = self.params.get('concurrent_fragment_downloads', 1)
|
||||||
|
if can_threaded_download and max_workers > 1:
|
||||||
|
self.report_warning('The download speed shown is only of one thread. This is a known issue')
|
||||||
|
with concurrent.futures.ThreadPoolExecutor(max_workers) as pool:
|
||||||
|
futures = [pool.submit(download_fragment, fragment) for fragment in fragments]
|
||||||
|
# timeout must be 0 to return instantly
|
||||||
|
done, not_done = concurrent.futures.wait(futures, timeout=0)
|
||||||
|
try:
|
||||||
|
while not_done:
|
||||||
|
# Check every 1 second for KeyboardInterrupt
|
||||||
|
freshly_done, not_done = concurrent.futures.wait(not_done, timeout=1)
|
||||||
|
done |= freshly_done
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
for future in not_done:
|
||||||
|
future.cancel()
|
||||||
|
# timeout must be none to cancel
|
||||||
|
concurrent.futures.wait(not_done, timeout=None)
|
||||||
|
raise KeyboardInterrupt
|
||||||
|
results = [future.result() for future in futures]
|
||||||
|
|
||||||
|
for frag_content, frag_index in results:
|
||||||
|
result = append_fragment(frag_content, frag_index)
|
||||||
|
if not result:
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
for fragment in fragments:
|
||||||
|
frag_content, frag_index = download_fragment(fragment)
|
||||||
|
result = append_fragment(frag_content, frag_index)
|
||||||
|
if not result:
|
||||||
|
return False
|
||||||
|
|
||||||
self._finish_frag_download(ctx)
|
self._finish_frag_download(ctx)
|
||||||
return True
|
return True
|
||||||
|
|||||||
@@ -117,7 +117,7 @@ class RtmpFD(FileDownloader):
|
|||||||
|
|
||||||
# Check for rtmpdump first
|
# Check for rtmpdump first
|
||||||
if not check_executable('rtmpdump', ['-h']):
|
if not check_executable('rtmpdump', ['-h']):
|
||||||
self.report_error('RTMP download detected but "rtmpdump" could not be run. Please install it.')
|
self.report_error('RTMP download detected but "rtmpdump" could not be run. Please install')
|
||||||
return False
|
return False
|
||||||
|
|
||||||
# Download using rtmpdump. rtmpdump returns exit code 2 when
|
# Download using rtmpdump. rtmpdump returns exit code 2 when
|
||||||
|
|||||||
@@ -24,7 +24,7 @@ class RtspFD(FileDownloader):
|
|||||||
args = [
|
args = [
|
||||||
'mpv', '-really-quiet', '--vo=null', '--stream-dump=' + tmpfilename, url]
|
'mpv', '-really-quiet', '--vo=null', '--stream-dump=' + tmpfilename, url]
|
||||||
else:
|
else:
|
||||||
self.report_error('MMS or RTSP download detected but neither "mplayer" nor "mpv" could be run. Please install any.')
|
self.report_error('MMS or RTSP download detected but neither "mplayer" nor "mpv" could be run. Please install one')
|
||||||
return False
|
return False
|
||||||
|
|
||||||
self._debug_cmd(args)
|
self._debug_cmd(args)
|
||||||
|
|||||||
@@ -79,8 +79,7 @@ class YoutubeLiveChatReplayFD(FragmentFD):
|
|||||||
|
|
||||||
self._prepare_and_start_frag_download(ctx)
|
self._prepare_and_start_frag_download(ctx)
|
||||||
|
|
||||||
success, raw_fragment = dl_fragment(
|
success, raw_fragment = dl_fragment(info_dict['url'])
|
||||||
'https://www.youtube.com/watch?v={}'.format(video_id))
|
|
||||||
if not success:
|
if not success:
|
||||||
return False
|
return False
|
||||||
try:
|
try:
|
||||||
|
|||||||
@@ -65,15 +65,35 @@ class AMCNetworksIE(ThePlatformIE):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
site, display_id = re.match(self._VALID_URL, url).groups()
|
site, display_id = re.match(self._VALID_URL, url).groups()
|
||||||
requestor_id = self._REQUESTOR_ID_MAP[site]
|
requestor_id = self._REQUESTOR_ID_MAP[site]
|
||||||
properties = self._download_json(
|
page_data = self._download_json(
|
||||||
'https://content-delivery-gw.svc.ds.amcn.com/api/v2/content/amcn/%s/url/%s' % (requestor_id.lower(), display_id),
|
'https://content-delivery-gw.svc.ds.amcn.com/api/v2/content/amcn/%s/url/%s'
|
||||||
display_id)['data']['properties']
|
% (requestor_id.lower(), display_id), display_id)['data']
|
||||||
|
properties = page_data.get('properties') or {}
|
||||||
query = {
|
query = {
|
||||||
'mbr': 'true',
|
'mbr': 'true',
|
||||||
'manifest': 'm3u',
|
'manifest': 'm3u',
|
||||||
}
|
}
|
||||||
|
|
||||||
|
video_player_count = 0
|
||||||
|
try:
|
||||||
|
for v in page_data['children']:
|
||||||
|
if v.get('type') == 'video-player':
|
||||||
|
releasePid = v['properties']['currentVideo']['meta']['releasePid']
|
||||||
|
tp_path = 'M_UwQC/' + releasePid
|
||||||
|
media_url = 'https://link.theplatform.com/s/' + tp_path
|
||||||
|
video_player_count += 1
|
||||||
|
except KeyError:
|
||||||
|
pass
|
||||||
|
if video_player_count > 1:
|
||||||
|
self.report_warning(
|
||||||
|
'The JSON data has %d video players. Only one will be extracted' % video_player_count)
|
||||||
|
|
||||||
|
# Fall back to videoPid if releasePid not found.
|
||||||
|
# TODO: Fall back to videoPid if releasePid manifest uses DRM.
|
||||||
|
if not video_player_count:
|
||||||
tp_path = 'M_UwQC/media/' + properties['videoPid']
|
tp_path = 'M_UwQC/media/' + properties['videoPid']
|
||||||
media_url = 'https://link.theplatform.com/s/' + tp_path
|
media_url = 'https://link.theplatform.com/s/' + tp_path
|
||||||
|
|
||||||
theplatform_metadata = self._download_theplatform_metadata(tp_path, display_id)
|
theplatform_metadata = self._download_theplatform_metadata(tp_path, display_id)
|
||||||
info = self._parse_theplatform_metadata(theplatform_metadata)
|
info = self._parse_theplatform_metadata(theplatform_metadata)
|
||||||
video_id = theplatform_metadata['pid']
|
video_id = theplatform_metadata['pid']
|
||||||
@@ -90,30 +110,41 @@ class AMCNetworksIE(ThePlatformIE):
|
|||||||
formats, subtitles = self._extract_theplatform_smil(
|
formats, subtitles = self._extract_theplatform_smil(
|
||||||
media_url, video_id)
|
media_url, video_id)
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
thumbnails = []
|
||||||
|
thumbnail_urls = [properties.get('imageDesktop')]
|
||||||
|
if 'thumbnail' in info:
|
||||||
|
thumbnail_urls.append(info.pop('thumbnail'))
|
||||||
|
for thumbnail_url in thumbnail_urls:
|
||||||
|
if not thumbnail_url:
|
||||||
|
continue
|
||||||
|
mobj = re.search(r'(\d+)x(\d+)', thumbnail_url)
|
||||||
|
thumbnails.append({
|
||||||
|
'url': thumbnail_url,
|
||||||
|
'width': int(mobj.group(1)) if mobj else None,
|
||||||
|
'height': int(mobj.group(2)) if mobj else None,
|
||||||
|
})
|
||||||
|
|
||||||
info.update({
|
info.update({
|
||||||
|
'age_limit': parse_age_limit(rating),
|
||||||
|
'formats': formats,
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
'formats': formats,
|
'thumbnails': thumbnails,
|
||||||
'age_limit': parse_age_limit(parse_age_limit(rating)),
|
|
||||||
})
|
})
|
||||||
ns_keys = theplatform_metadata.get('$xmlns', {}).keys()
|
ns_keys = theplatform_metadata.get('$xmlns', {}).keys()
|
||||||
if ns_keys:
|
if ns_keys:
|
||||||
ns = list(ns_keys)[0]
|
ns = list(ns_keys)[0]
|
||||||
series = theplatform_metadata.get(ns + '$show')
|
episode = theplatform_metadata.get(ns + '$episodeTitle') or None
|
||||||
season_number = int_or_none(
|
|
||||||
theplatform_metadata.get(ns + '$season'))
|
|
||||||
episode = theplatform_metadata.get(ns + '$episodeTitle')
|
|
||||||
episode_number = int_or_none(
|
episode_number = int_or_none(
|
||||||
theplatform_metadata.get(ns + '$episode'))
|
theplatform_metadata.get(ns + '$episode'))
|
||||||
if season_number:
|
season_number = int_or_none(
|
||||||
title = 'Season %d - %s' % (season_number, title)
|
theplatform_metadata.get(ns + '$season'))
|
||||||
if series:
|
series = theplatform_metadata.get(ns + '$show') or None
|
||||||
title = '%s - %s' % (series, title)
|
|
||||||
info.update({
|
info.update({
|
||||||
'title': title,
|
|
||||||
'series': series,
|
|
||||||
'season_number': season_number,
|
|
||||||
'episode': episode,
|
'episode': episode,
|
||||||
'episode_number': episode_number,
|
'episode_number': episode_number,
|
||||||
|
'season_number': season_number,
|
||||||
|
'series': series,
|
||||||
})
|
})
|
||||||
return info
|
return info
|
||||||
|
|||||||
@@ -42,6 +42,7 @@ class ApplePodcastsIE(InfoExtractor):
|
|||||||
ember_data = self._parse_json(self._search_regex(
|
ember_data = self._parse_json(self._search_regex(
|
||||||
r'id="shoebox-ember-data-store"[^>]*>\s*({.+?})\s*<',
|
r'id="shoebox-ember-data-store"[^>]*>\s*({.+?})\s*<',
|
||||||
webpage, 'ember data'), episode_id)
|
webpage, 'ember data'), episode_id)
|
||||||
|
ember_data = ember_data.get(episode_id) or ember_data
|
||||||
episode = ember_data['data']['attributes']
|
episode = ember_data['data']['attributes']
|
||||||
description = episode.get('description') or {}
|
description = episode.get('description') or {}
|
||||||
|
|
||||||
|
|||||||
@@ -272,7 +272,8 @@ class ARDMediathekIE(ARDMediathekBaseIE):
|
|||||||
else: # request JSON file
|
else: # request JSON file
|
||||||
if not document_id:
|
if not document_id:
|
||||||
video_id = self._search_regex(
|
video_id = self._search_regex(
|
||||||
r'/play/(?:config|media)/(\d+)', webpage, 'media id')
|
(r'/play/(?:config|media|sola)/(\d+)', r'contentId["\']\s*:\s*(\d+)'),
|
||||||
|
webpage, 'media id', default=None)
|
||||||
info = self._extract_media_info(
|
info = self._extract_media_info(
|
||||||
'http://www.ardmediathek.de/play/media/%s' % video_id,
|
'http://www.ardmediathek.de/play/media/%s' % video_id,
|
||||||
webpage, video_id)
|
webpage, video_id)
|
||||||
|
|||||||
101
yt_dlp/extractor/arnes.py
Normal file
101
yt_dlp/extractor/arnes.py
Normal file
@@ -0,0 +1,101 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import (
|
||||||
|
compat_parse_qs,
|
||||||
|
compat_urllib_parse_urlparse,
|
||||||
|
)
|
||||||
|
from ..utils import (
|
||||||
|
float_or_none,
|
||||||
|
int_or_none,
|
||||||
|
parse_iso8601,
|
||||||
|
remove_start,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ArnesIE(InfoExtractor):
|
||||||
|
IE_NAME = 'video.arnes.si'
|
||||||
|
IE_DESC = 'Arnes Video'
|
||||||
|
_VALID_URL = r'https?://video\.arnes\.si/(?:[a-z]{2}/)?(?:watch|embed|api/(?:asset|public/video))/(?P<id>[0-9a-zA-Z]{12})'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://video.arnes.si/watch/a1qrWTOQfVoU?t=10',
|
||||||
|
'md5': '4d0f4d0a03571b33e1efac25fd4a065d',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'a1qrWTOQfVoU',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Linearna neodvisnost, definicija',
|
||||||
|
'description': 'Linearna neodvisnost, definicija',
|
||||||
|
'license': 'PRIVATE',
|
||||||
|
'creator': 'Polona Oblak',
|
||||||
|
'timestamp': 1585063725,
|
||||||
|
'upload_date': '20200324',
|
||||||
|
'channel': 'Polona Oblak',
|
||||||
|
'channel_id': 'q6pc04hw24cj',
|
||||||
|
'channel_url': 'https://video.arnes.si/?channel=q6pc04hw24cj',
|
||||||
|
'duration': 596.75,
|
||||||
|
'view_count': int,
|
||||||
|
'tags': ['linearna_algebra'],
|
||||||
|
'start_time': 10,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://video.arnes.si/api/asset/s1YjnV7hadlC/play.mp4',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://video.arnes.si/embed/s1YjnV7hadlC',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://video.arnes.si/en/watch/s1YjnV7hadlC',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://video.arnes.si/embed/s1YjnV7hadlC?t=123&hideRelated=1',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://video.arnes.si/api/public/video/s1YjnV7hadlC',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
_BASE_URL = 'https://video.arnes.si'
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
video = self._download_json(
|
||||||
|
self._BASE_URL + '/api/public/video/' + video_id, video_id)['data']
|
||||||
|
title = video['title']
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for media in (video.get('media') or []):
|
||||||
|
media_url = media.get('url')
|
||||||
|
if not media_url:
|
||||||
|
continue
|
||||||
|
formats.append({
|
||||||
|
'url': self._BASE_URL + media_url,
|
||||||
|
'format_id': remove_start(media.get('format'), 'FORMAT_'),
|
||||||
|
'format_note': media.get('formatTranslation'),
|
||||||
|
'width': int_or_none(media.get('width')),
|
||||||
|
'height': int_or_none(media.get('height')),
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
channel = video.get('channel') or {}
|
||||||
|
channel_id = channel.get('url')
|
||||||
|
thumbnail = video.get('thumbnailUrl')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'formats': formats,
|
||||||
|
'thumbnail': self._BASE_URL + thumbnail,
|
||||||
|
'description': video.get('description'),
|
||||||
|
'license': video.get('license'),
|
||||||
|
'creator': video.get('author'),
|
||||||
|
'timestamp': parse_iso8601(video.get('creationTime')),
|
||||||
|
'channel': channel.get('name'),
|
||||||
|
'channel_id': channel_id,
|
||||||
|
'channel_url': self._BASE_URL + '/?channel=' + channel_id if channel_id else None,
|
||||||
|
'duration': float_or_none(video.get('duration'), 1000),
|
||||||
|
'view_count': int_or_none(video.get('views')),
|
||||||
|
'tags': video.get('hashtags'),
|
||||||
|
'start_time': int_or_none(compat_parse_qs(
|
||||||
|
compat_urllib_parse_urlparse(url).query).get('t', [None])[0]),
|
||||||
|
}
|
||||||
37
yt_dlp/extractor/bandaichannel.py
Normal file
37
yt_dlp/extractor/bandaichannel.py
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .brightcove import BrightcoveNewIE
|
||||||
|
from ..utils import extract_attributes
|
||||||
|
|
||||||
|
|
||||||
|
class BandaiChannelIE(BrightcoveNewIE):
|
||||||
|
IE_NAME = 'bandaichannel'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?b-ch\.com/titles/(?P<id>\d+/\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.b-ch.com/titles/514/001',
|
||||||
|
'md5': 'a0f2d787baa5729bed71108257f613a4',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '6128044564001',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'メタルファイターMIKU 第1話',
|
||||||
|
'timestamp': 1580354056,
|
||||||
|
'uploader_id': '5797077852001',
|
||||||
|
'upload_date': '20200130',
|
||||||
|
'duration': 1387.733,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'format': 'bestvideo',
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
attrs = extract_attributes(self._search_regex(
|
||||||
|
r'(<video-js[^>]+\bid="bcplayer"[^>]*>)', webpage, 'player'))
|
||||||
|
bc = self._download_json(
|
||||||
|
'https://pbifcd.b-ch.com/v1/playbackinfo/ST/70/' + attrs['data-info'],
|
||||||
|
video_id, headers={'X-API-KEY': attrs['data-auth'].strip()})['bc']
|
||||||
|
return self._parse_brightcove_metadata(bc, bc['id'])
|
||||||
@@ -49,6 +49,7 @@ class BandcampIE(InfoExtractor):
|
|||||||
'uploader': 'Ben Prunty',
|
'uploader': 'Ben Prunty',
|
||||||
'timestamp': 1396508491,
|
'timestamp': 1396508491,
|
||||||
'upload_date': '20140403',
|
'upload_date': '20140403',
|
||||||
|
'release_timestamp': 1396483200,
|
||||||
'release_date': '20140403',
|
'release_date': '20140403',
|
||||||
'duration': 260.877,
|
'duration': 260.877,
|
||||||
'track': 'Lanius (Battle)',
|
'track': 'Lanius (Battle)',
|
||||||
@@ -69,6 +70,7 @@ class BandcampIE(InfoExtractor):
|
|||||||
'uploader': 'Mastodon',
|
'uploader': 'Mastodon',
|
||||||
'timestamp': 1322005399,
|
'timestamp': 1322005399,
|
||||||
'upload_date': '20111122',
|
'upload_date': '20111122',
|
||||||
|
'release_timestamp': 1076112000,
|
||||||
'release_date': '20040207',
|
'release_date': '20040207',
|
||||||
'duration': 120.79,
|
'duration': 120.79,
|
||||||
'track': 'Hail to Fire',
|
'track': 'Hail to Fire',
|
||||||
@@ -197,7 +199,7 @@ class BandcampIE(InfoExtractor):
|
|||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
'uploader': artist,
|
'uploader': artist,
|
||||||
'timestamp': timestamp,
|
'timestamp': timestamp,
|
||||||
'release_date': unified_strdate(tralbum.get('album_release_date')),
|
'release_timestamp': unified_timestamp(tralbum.get('album_release_date')),
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
'track': track,
|
'track': track,
|
||||||
'track_number': track_number,
|
'track_number': track_number,
|
||||||
|
|||||||
@@ -1,14 +1,24 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import functools
|
||||||
import itertools
|
import itertools
|
||||||
|
import json
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..compat import (
|
||||||
|
compat_etree_Element,
|
||||||
|
compat_HTTPError,
|
||||||
|
compat_parse_qs,
|
||||||
|
compat_urllib_parse_urlparse,
|
||||||
|
compat_urlparse,
|
||||||
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
OnDemandPagedList,
|
||||||
clean_html,
|
clean_html,
|
||||||
dict_get,
|
dict_get,
|
||||||
ExtractorError,
|
|
||||||
float_or_none,
|
float_or_none,
|
||||||
get_element_by_class,
|
get_element_by_class,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
@@ -21,11 +31,6 @@ from ..utils import (
|
|||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
urljoin,
|
urljoin,
|
||||||
)
|
)
|
||||||
from ..compat import (
|
|
||||||
compat_etree_Element,
|
|
||||||
compat_HTTPError,
|
|
||||||
compat_urlparse,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class BBCCoUkIE(InfoExtractor):
|
class BBCCoUkIE(InfoExtractor):
|
||||||
@@ -793,11 +798,25 @@ class BBCIE(BBCCoUkIE):
|
|||||||
'description': 'Learn English words and phrases from this story',
|
'description': 'Learn English words and phrases from this story',
|
||||||
},
|
},
|
||||||
'add_ie': [BBCCoUkIE.ie_key()],
|
'add_ie': [BBCCoUkIE.ie_key()],
|
||||||
|
}, {
|
||||||
|
# BBC Reel
|
||||||
|
'url': 'https://www.bbc.com/reel/video/p07c6sb6/how-positive-thinking-is-harming-your-happiness',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'p07c6sb9',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'How positive thinking is harming your happiness',
|
||||||
|
'alt_title': 'The downsides of positive thinking',
|
||||||
|
'description': 'md5:fad74b31da60d83b8265954ee42d85b4',
|
||||||
|
'duration': 235,
|
||||||
|
'thumbnail': r're:https?://.+/p07c9dsr.jpg',
|
||||||
|
'upload_date': '20190604',
|
||||||
|
'categories': ['Psychology'],
|
||||||
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def suitable(cls, url):
|
def suitable(cls, url):
|
||||||
EXCLUDE_IE = (BBCCoUkIE, BBCCoUkArticleIE, BBCCoUkIPlayerPlaylistIE, BBCCoUkPlaylistIE)
|
EXCLUDE_IE = (BBCCoUkIE, BBCCoUkArticleIE, BBCCoUkIPlayerEpisodesIE, BBCCoUkIPlayerGroupIE, BBCCoUkPlaylistIE)
|
||||||
return (False if any(ie.suitable(url) for ie in EXCLUDE_IE)
|
return (False if any(ie.suitable(url) for ie in EXCLUDE_IE)
|
||||||
else super(BBCIE, cls).suitable(url))
|
else super(BBCIE, cls).suitable(url))
|
||||||
|
|
||||||
@@ -929,7 +948,7 @@ class BBCIE(BBCCoUkIE):
|
|||||||
else:
|
else:
|
||||||
entry['title'] = info['title']
|
entry['title'] = info['title']
|
||||||
entry['formats'].extend(info['formats'])
|
entry['formats'].extend(info['formats'])
|
||||||
except Exception as e:
|
except ExtractorError as e:
|
||||||
# Some playlist URL may fail with 500, at the same time
|
# Some playlist URL may fail with 500, at the same time
|
||||||
# the other one may work fine (e.g.
|
# the other one may work fine (e.g.
|
||||||
# http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu)
|
# http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu)
|
||||||
@@ -980,6 +999,37 @@ class BBCIE(BBCCoUkIE):
|
|||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# bbc reel (e.g. https://www.bbc.com/reel/video/p07c6sb6/how-positive-thinking-is-harming-your-happiness)
|
||||||
|
initial_data = self._parse_json(self._html_search_regex(
|
||||||
|
r'<script[^>]+id=(["\'])initial-data\1[^>]+data-json=(["\'])(?P<json>(?:(?!\2).)+)',
|
||||||
|
webpage, 'initial data', default='{}', group='json'), playlist_id, fatal=False)
|
||||||
|
if initial_data:
|
||||||
|
init_data = try_get(
|
||||||
|
initial_data, lambda x: x['initData']['items'][0], dict) or {}
|
||||||
|
smp_data = init_data.get('smpData') or {}
|
||||||
|
clip_data = try_get(smp_data, lambda x: x['items'][0], dict) or {}
|
||||||
|
version_id = clip_data.get('versionID')
|
||||||
|
if version_id:
|
||||||
|
title = smp_data['title']
|
||||||
|
formats, subtitles = self._download_media_selector(version_id)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
image_url = smp_data.get('holdingImageURL')
|
||||||
|
display_date = init_data.get('displayDate')
|
||||||
|
topic_title = init_data.get('topicTitle')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': version_id,
|
||||||
|
'title': title,
|
||||||
|
'formats': formats,
|
||||||
|
'alt_title': init_data.get('shortTitle'),
|
||||||
|
'thumbnail': image_url.replace('$recipe', 'raw') if image_url else None,
|
||||||
|
'description': smp_data.get('summary') or init_data.get('shortSummary'),
|
||||||
|
'upload_date': display_date.replace('-', '') if display_date else None,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
'duration': int_or_none(clip_data.get('duration')),
|
||||||
|
'categories': [topic_title] if topic_title else None,
|
||||||
|
}
|
||||||
|
|
||||||
# Morph based embed (e.g. http://www.bbc.co.uk/sport/live/olympics/36895975)
|
# Morph based embed (e.g. http://www.bbc.co.uk/sport/live/olympics/36895975)
|
||||||
# There are several setPayload calls may be present but the video
|
# There are several setPayload calls may be present but the video
|
||||||
# seems to be always related to the first one
|
# seems to be always related to the first one
|
||||||
@@ -1041,7 +1091,7 @@ class BBCIE(BBCCoUkIE):
|
|||||||
thumbnail = None
|
thumbnail = None
|
||||||
image_url = current_programme.get('image_url')
|
image_url = current_programme.get('image_url')
|
||||||
if image_url:
|
if image_url:
|
||||||
thumbnail = image_url.replace('{recipe}', '1920x1920')
|
thumbnail = image_url.replace('{recipe}', 'raw')
|
||||||
return {
|
return {
|
||||||
'id': programme_id,
|
'id': programme_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
@@ -1293,21 +1343,149 @@ class BBCCoUkPlaylistBaseIE(InfoExtractor):
|
|||||||
playlist_id, title, description)
|
playlist_id, title, description)
|
||||||
|
|
||||||
|
|
||||||
class BBCCoUkIPlayerPlaylistIE(BBCCoUkPlaylistBaseIE):
|
class BBCCoUkIPlayerPlaylistBaseIE(InfoExtractor):
|
||||||
IE_NAME = 'bbc.co.uk:iplayer:playlist'
|
_VALID_URL_TMPL = r'https?://(?:www\.)?bbc\.co\.uk/iplayer/%%s/(?P<id>%s)' % BBCCoUkIE._ID_REGEX
|
||||||
_VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/iplayer/(?:episodes|group)/(?P<id>%s)' % BBCCoUkIE._ID_REGEX
|
|
||||||
_URL_TEMPLATE = 'http://www.bbc.co.uk/iplayer/episode/%s'
|
@staticmethod
|
||||||
_VIDEO_ID_TEMPLATE = r'data-ip-id=["\'](%s)'
|
def _get_default(episode, key, default_key='default'):
|
||||||
|
return try_get(episode, lambda x: x[key][default_key])
|
||||||
|
|
||||||
|
def _get_description(self, data):
|
||||||
|
synopsis = data.get(self._DESCRIPTION_KEY) or {}
|
||||||
|
return dict_get(synopsis, ('large', 'medium', 'small'))
|
||||||
|
|
||||||
|
def _fetch_page(self, programme_id, per_page, series_id, page):
|
||||||
|
elements = self._get_elements(self._call_api(
|
||||||
|
programme_id, per_page, page + 1, series_id))
|
||||||
|
for element in elements:
|
||||||
|
episode = self._get_episode(element)
|
||||||
|
episode_id = episode.get('id')
|
||||||
|
if not episode_id:
|
||||||
|
continue
|
||||||
|
thumbnail = None
|
||||||
|
image = self._get_episode_image(episode)
|
||||||
|
if image:
|
||||||
|
thumbnail = image.replace('{recipe}', 'raw')
|
||||||
|
category = self._get_default(episode, 'labels', 'category')
|
||||||
|
yield {
|
||||||
|
'_type': 'url',
|
||||||
|
'id': episode_id,
|
||||||
|
'title': self._get_episode_field(episode, 'subtitle'),
|
||||||
|
'url': 'https://www.bbc.co.uk/iplayer/episode/' + episode_id,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'description': self._get_description(episode),
|
||||||
|
'categories': [category] if category else None,
|
||||||
|
'series': self._get_episode_field(episode, 'title'),
|
||||||
|
'ie_key': BBCCoUkIE.ie_key(),
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
pid = self._match_id(url)
|
||||||
|
qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
|
||||||
|
series_id = qs.get('seriesId', [None])[0]
|
||||||
|
page = qs.get('page', [None])[0]
|
||||||
|
per_page = 36 if page else self._PAGE_SIZE
|
||||||
|
fetch_page = functools.partial(self._fetch_page, pid, per_page, series_id)
|
||||||
|
entries = fetch_page(int(page) - 1) if page else OnDemandPagedList(fetch_page, self._PAGE_SIZE)
|
||||||
|
playlist_data = self._get_playlist_data(self._call_api(pid, 1))
|
||||||
|
return self.playlist_result(
|
||||||
|
entries, pid, self._get_playlist_title(playlist_data),
|
||||||
|
self._get_description(playlist_data))
|
||||||
|
|
||||||
|
|
||||||
|
class BBCCoUkIPlayerEpisodesIE(BBCCoUkIPlayerPlaylistBaseIE):
|
||||||
|
IE_NAME = 'bbc.co.uk:iplayer:episodes'
|
||||||
|
_VALID_URL = BBCCoUkIPlayerPlaylistBaseIE._VALID_URL_TMPL % 'episodes'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.bbc.co.uk/iplayer/episodes/b05rcz9v',
|
'url': 'http://www.bbc.co.uk/iplayer/episodes/b05rcz9v',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'b05rcz9v',
|
'id': 'b05rcz9v',
|
||||||
'title': 'The Disappearance',
|
'title': 'The Disappearance',
|
||||||
'description': 'French thriller serial about a missing teenager.',
|
'description': 'md5:58eb101aee3116bad4da05f91179c0cb',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 6,
|
'playlist_mincount': 8,
|
||||||
'skip': 'This programme is not currently available on BBC iPlayer',
|
|
||||||
}, {
|
}, {
|
||||||
|
# all seasons
|
||||||
|
'url': 'https://www.bbc.co.uk/iplayer/episodes/b094m5t9/doctor-foster',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'b094m5t9',
|
||||||
|
'title': 'Doctor Foster',
|
||||||
|
'description': 'md5:5aa9195fad900e8e14b52acd765a9fd6',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 10,
|
||||||
|
}, {
|
||||||
|
# explicit season
|
||||||
|
'url': 'https://www.bbc.co.uk/iplayer/episodes/b094m5t9/doctor-foster?seriesId=b094m6nv',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'b094m5t9',
|
||||||
|
'title': 'Doctor Foster',
|
||||||
|
'description': 'md5:5aa9195fad900e8e14b52acd765a9fd6',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 5,
|
||||||
|
}, {
|
||||||
|
# all pages
|
||||||
|
'url': 'https://www.bbc.co.uk/iplayer/episodes/m0004c4v/beechgrove',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'm0004c4v',
|
||||||
|
'title': 'Beechgrove',
|
||||||
|
'description': 'Gardening show that celebrates Scottish horticulture and growing conditions.',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 37,
|
||||||
|
}, {
|
||||||
|
# explicit page
|
||||||
|
'url': 'https://www.bbc.co.uk/iplayer/episodes/m0004c4v/beechgrove?page=2',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'm0004c4v',
|
||||||
|
'title': 'Beechgrove',
|
||||||
|
'description': 'Gardening show that celebrates Scottish horticulture and growing conditions.',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 1,
|
||||||
|
}]
|
||||||
|
_PAGE_SIZE = 100
|
||||||
|
_DESCRIPTION_KEY = 'synopsis'
|
||||||
|
|
||||||
|
def _get_episode_image(self, episode):
|
||||||
|
return self._get_default(episode, 'image')
|
||||||
|
|
||||||
|
def _get_episode_field(self, episode, field):
|
||||||
|
return self._get_default(episode, field)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _get_elements(data):
|
||||||
|
return data['entities']['results']
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _get_episode(element):
|
||||||
|
return element.get('episode') or {}
|
||||||
|
|
||||||
|
def _call_api(self, pid, per_page, page=1, series_id=None):
|
||||||
|
variables = {
|
||||||
|
'id': pid,
|
||||||
|
'page': page,
|
||||||
|
'perPage': per_page,
|
||||||
|
}
|
||||||
|
if series_id:
|
||||||
|
variables['sliceId'] = series_id
|
||||||
|
return self._download_json(
|
||||||
|
'https://graph.ibl.api.bbc.co.uk/', pid, headers={
|
||||||
|
'Content-Type': 'application/json'
|
||||||
|
}, data=json.dumps({
|
||||||
|
'id': '5692d93d5aac8d796a0305e895e61551',
|
||||||
|
'variables': variables,
|
||||||
|
}).encode('utf-8'))['data']['programme']
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _get_playlist_data(data):
|
||||||
|
return data
|
||||||
|
|
||||||
|
def _get_playlist_title(self, data):
|
||||||
|
return self._get_default(data, 'title')
|
||||||
|
|
||||||
|
|
||||||
|
class BBCCoUkIPlayerGroupIE(BBCCoUkIPlayerPlaylistBaseIE):
|
||||||
|
IE_NAME = 'bbc.co.uk:iplayer:group'
|
||||||
|
_VALID_URL = BBCCoUkIPlayerPlaylistBaseIE._VALID_URL_TMPL % 'group'
|
||||||
|
_TESTS = [{
|
||||||
# Available for over a year unlike 30 days for most other programmes
|
# Available for over a year unlike 30 days for most other programmes
|
||||||
'url': 'http://www.bbc.co.uk/iplayer/group/p02tcc32',
|
'url': 'http://www.bbc.co.uk/iplayer/group/p02tcc32',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@@ -1316,14 +1494,56 @@ class BBCCoUkIPlayerPlaylistIE(BBCCoUkPlaylistBaseIE):
|
|||||||
'description': 'md5:683e901041b2fe9ba596f2ab04c4dbe7',
|
'description': 'md5:683e901041b2fe9ba596f2ab04c4dbe7',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 10,
|
'playlist_mincount': 10,
|
||||||
|
}, {
|
||||||
|
# all pages
|
||||||
|
'url': 'https://www.bbc.co.uk/iplayer/group/p081d7j7',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'p081d7j7',
|
||||||
|
'title': 'Music in Scotland',
|
||||||
|
'description': 'Perfomances in Scotland and programmes featuring Scottish acts.',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 47,
|
||||||
|
}, {
|
||||||
|
# explicit page
|
||||||
|
'url': 'https://www.bbc.co.uk/iplayer/group/p081d7j7?page=2',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'p081d7j7',
|
||||||
|
'title': 'Music in Scotland',
|
||||||
|
'description': 'Perfomances in Scotland and programmes featuring Scottish acts.',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 11,
|
||||||
}]
|
}]
|
||||||
|
_PAGE_SIZE = 200
|
||||||
|
_DESCRIPTION_KEY = 'synopses'
|
||||||
|
|
||||||
def _extract_title_and_description(self, webpage):
|
def _get_episode_image(self, episode):
|
||||||
title = self._search_regex(r'<h1>([^<]+)</h1>', webpage, 'title', fatal=False)
|
return self._get_default(episode, 'images', 'standard')
|
||||||
description = self._search_regex(
|
|
||||||
r'<p[^>]+class=(["\'])subtitle\1[^>]*>(?P<value>[^<]+)</p>',
|
def _get_episode_field(self, episode, field):
|
||||||
webpage, 'description', fatal=False, group='value')
|
return episode.get(field)
|
||||||
return title, description
|
|
||||||
|
@staticmethod
|
||||||
|
def _get_elements(data):
|
||||||
|
return data['elements']
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _get_episode(element):
|
||||||
|
return element
|
||||||
|
|
||||||
|
def _call_api(self, pid, per_page, page=1, series_id=None):
|
||||||
|
return self._download_json(
|
||||||
|
'http://ibl.api.bbc.co.uk/ibl/v1/groups/%s/episodes' % pid,
|
||||||
|
pid, query={
|
||||||
|
'page': page,
|
||||||
|
'per_page': per_page,
|
||||||
|
})['group_episodes']
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _get_playlist_data(data):
|
||||||
|
return data['group']
|
||||||
|
|
||||||
|
def _get_playlist_title(self, data):
|
||||||
|
return data.get('title')
|
||||||
|
|
||||||
|
|
||||||
class BBCCoUkPlaylistIE(BBCCoUkPlaylistBaseIE):
|
class BBCCoUkPlaylistIE(BBCCoUkPlaylistBaseIE):
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ import re
|
|||||||
|
|
||||||
from .common import InfoExtractor, SearchInfoExtractor
|
from .common import InfoExtractor, SearchInfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
|
compat_str,
|
||||||
compat_parse_qs,
|
compat_parse_qs,
|
||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
)
|
)
|
||||||
@@ -15,6 +16,7 @@ from ..utils import (
|
|||||||
int_or_none,
|
int_or_none,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
|
try_get,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
str_or_none,
|
str_or_none,
|
||||||
strip_jsonp,
|
strip_jsonp,
|
||||||
@@ -113,6 +115,14 @@ class BiliBiliIE(InfoExtractor):
|
|||||||
# new BV video id format
|
# new BV video id format
|
||||||
'url': 'https://www.bilibili.com/video/BV1JE411F741',
|
'url': 'https://www.bilibili.com/video/BV1JE411F741',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# Anthology
|
||||||
|
'url': 'https://www.bilibili.com/video/BV1bK411W797',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'BV1bK411W797',
|
||||||
|
'title': '物语中的人物是如何吐槽自己的OP的'
|
||||||
|
},
|
||||||
|
'playlist_count': 17,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
_APP_KEY = 'iVGUTjsxvpLeuDCf'
|
_APP_KEY = 'iVGUTjsxvpLeuDCf'
|
||||||
@@ -139,9 +149,20 @@ class BiliBiliIE(InfoExtractor):
|
|||||||
page_id = mobj.group('page')
|
page_id = mobj.group('page')
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
# Bilibili anthologies are similar to playlists but all videos share the same video ID as the anthology itself.
|
||||||
|
# If the video has no page argument, check to see if it's an anthology
|
||||||
|
if page_id is None:
|
||||||
|
if not self._downloader.params.get('noplaylist'):
|
||||||
|
r = self._extract_anthology_entries(bv_id, video_id, webpage)
|
||||||
|
if r is not None:
|
||||||
|
self.to_screen('Downloading anthology %s - add --no-playlist to just download video' % video_id)
|
||||||
|
return r
|
||||||
|
else:
|
||||||
|
self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
|
||||||
|
|
||||||
if 'anime/' not in url:
|
if 'anime/' not in url:
|
||||||
cid = self._search_regex(
|
cid = self._search_regex(
|
||||||
r'\bcid(?:["\']:|=)(\d+),["\']page(?:["\']:|=)' + str(page_id), webpage, 'cid',
|
r'\bcid(?:["\']:|=)(\d+),["\']page(?:["\']:|=)' + compat_str(page_id), webpage, 'cid',
|
||||||
default=None
|
default=None
|
||||||
) or self._search_regex(
|
) or self._search_regex(
|
||||||
r'\bcid(?:["\']:|=)(\d+)', webpage, 'cid',
|
r'\bcid(?:["\']:|=)(\d+)', webpage, 'cid',
|
||||||
@@ -170,6 +191,7 @@ class BiliBiliIE(InfoExtractor):
|
|||||||
cid = js['result']['cid']
|
cid = js['result']['cid']
|
||||||
|
|
||||||
headers = {
|
headers = {
|
||||||
|
'Accept': 'application/json',
|
||||||
'Referer': url
|
'Referer': url
|
||||||
}
|
}
|
||||||
headers.update(self.geo_verification_headers())
|
headers.update(self.geo_verification_headers())
|
||||||
@@ -223,7 +245,18 @@ class BiliBiliIE(InfoExtractor):
|
|||||||
title = self._html_search_regex(
|
title = self._html_search_regex(
|
||||||
(r'<h1[^>]+\btitle=(["\'])(?P<title>(?:(?!\1).)+)\1',
|
(r'<h1[^>]+\btitle=(["\'])(?P<title>(?:(?!\1).)+)\1',
|
||||||
r'(?s)<h1[^>]*>(?P<title>.+?)</h1>'), webpage, 'title',
|
r'(?s)<h1[^>]*>(?P<title>.+?)</h1>'), webpage, 'title',
|
||||||
group='title') + ('_p' + str(page_id) if page_id is not None else '')
|
group='title')
|
||||||
|
|
||||||
|
# Get part title for anthologies
|
||||||
|
if page_id is not None:
|
||||||
|
# TODO: The json is already downloaded by _extract_anthology_entries. Don't redownload for each video
|
||||||
|
part_title = try_get(
|
||||||
|
self._download_json(
|
||||||
|
"https://api.bilibili.com/x/player/pagelist?bvid=%s&jsonp=jsonp" % bv_id,
|
||||||
|
video_id, note='Extracting videos in anthology'),
|
||||||
|
lambda x: x['data'][int(page_id) - 1]['part'])
|
||||||
|
title = part_title or title
|
||||||
|
|
||||||
description = self._html_search_meta('description', webpage)
|
description = self._html_search_meta('description', webpage)
|
||||||
timestamp = unified_timestamp(self._html_search_regex(
|
timestamp = unified_timestamp(self._html_search_regex(
|
||||||
r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time',
|
r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time',
|
||||||
@@ -233,7 +266,7 @@ class BiliBiliIE(InfoExtractor):
|
|||||||
|
|
||||||
# TODO 'view_count' requires deobfuscating Javascript
|
# TODO 'view_count' requires deobfuscating Javascript
|
||||||
info = {
|
info = {
|
||||||
'id': str(video_id) if page_id is None else '%s_p%s' % (video_id, page_id),
|
'id': compat_str(video_id) if page_id is None else '%s_p%s' % (video_id, page_id),
|
||||||
'cid': cid,
|
'cid': cid,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': description,
|
'description': description,
|
||||||
@@ -243,7 +276,7 @@ class BiliBiliIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
uploader_mobj = re.search(
|
uploader_mobj = re.search(
|
||||||
r'<a[^>]+href="(?:https?:)?//space\.bilibili\.com/(?P<id>\d+)"[^>]*>(?P<name>[^<]+)',
|
r'<a[^>]+href="(?:https?:)?//space\.bilibili\.com/(?P<id>\d+)"[^>]*>\s*(?P<name>[^<]+?)\s*<',
|
||||||
webpage)
|
webpage)
|
||||||
if uploader_mobj:
|
if uploader_mobj:
|
||||||
info.update({
|
info.update({
|
||||||
@@ -255,10 +288,6 @@ class BiliBiliIE(InfoExtractor):
|
|||||||
info['uploader'] = self._html_search_meta(
|
info['uploader'] = self._html_search_meta(
|
||||||
'author', webpage, 'uploader', default=None)
|
'author', webpage, 'uploader', default=None)
|
||||||
|
|
||||||
comments = None
|
|
||||||
if self._downloader.params.get('getcomments', False):
|
|
||||||
comments = self._get_all_comment_pages(video_id)
|
|
||||||
|
|
||||||
raw_danmaku = self._get_raw_danmaku(video_id, cid)
|
raw_danmaku = self._get_raw_danmaku(video_id, cid)
|
||||||
|
|
||||||
raw_tags = self._get_tags(video_id)
|
raw_tags = self._get_tags(video_id)
|
||||||
@@ -266,11 +295,18 @@ class BiliBiliIE(InfoExtractor):
|
|||||||
|
|
||||||
top_level_info = {
|
top_level_info = {
|
||||||
'raw_danmaku': raw_danmaku,
|
'raw_danmaku': raw_danmaku,
|
||||||
'comments': comments,
|
|
||||||
'comment_count': len(comments) if comments is not None else None,
|
|
||||||
'tags': tags,
|
'tags': tags,
|
||||||
'raw_tags': raw_tags,
|
'raw_tags': raw_tags,
|
||||||
}
|
}
|
||||||
|
if self._downloader.params.get('getcomments', False):
|
||||||
|
def get_comments():
|
||||||
|
comments = self._get_all_comment_pages(video_id)
|
||||||
|
return {
|
||||||
|
'comments': comments,
|
||||||
|
'comment_count': len(comments)
|
||||||
|
}
|
||||||
|
|
||||||
|
top_level_info['__post_extractor'] = get_comments
|
||||||
|
|
||||||
'''
|
'''
|
||||||
# Requires https://github.com/m13253/danmaku2ass which is licenced under GPL3
|
# Requires https://github.com/m13253/danmaku2ass which is licenced under GPL3
|
||||||
@@ -296,7 +332,7 @@ class BiliBiliIE(InfoExtractor):
|
|||||||
|
|
||||||
global_info = {
|
global_info = {
|
||||||
'_type': 'multi_video',
|
'_type': 'multi_video',
|
||||||
'id': video_id,
|
'id': compat_str(video_id),
|
||||||
'bv_id': bv_id,
|
'bv_id': bv_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': description,
|
'description': description,
|
||||||
@@ -308,6 +344,20 @@ class BiliBiliIE(InfoExtractor):
|
|||||||
|
|
||||||
return global_info
|
return global_info
|
||||||
|
|
||||||
|
def _extract_anthology_entries(self, bv_id, video_id, webpage):
|
||||||
|
title = self._html_search_regex(
|
||||||
|
(r'<h1[^>]+\btitle=(["\'])(?P<title>(?:(?!\1).)+)\1',
|
||||||
|
r'(?s)<h1[^>]*>(?P<title>.+?)</h1>'), webpage, 'title',
|
||||||
|
group='title')
|
||||||
|
json_data = self._download_json(
|
||||||
|
"https://api.bilibili.com/x/player/pagelist?bvid=%s&jsonp=jsonp" % bv_id,
|
||||||
|
video_id, note='Extracting videos in anthology')
|
||||||
|
|
||||||
|
if len(json_data['data']) > 1:
|
||||||
|
return self.playlist_from_matches(
|
||||||
|
json_data['data'], bv_id, title, ie=BiliBiliIE.ie_key(),
|
||||||
|
getter=lambda entry: 'https://www.bilibili.com/video/%s?p=%d' % (bv_id, entry['page']))
|
||||||
|
|
||||||
def _get_video_id_set(self, id, is_bv):
|
def _get_video_id_set(self, id, is_bv):
|
||||||
query = {'bvid': id} if is_bv else {'aid': id}
|
query = {'bvid': id} if is_bv else {'aid': id}
|
||||||
response = self._download_json(
|
response = self._download_json(
|
||||||
@@ -502,7 +552,7 @@ class BiliBiliSearchIE(SearchInfoExtractor):
|
|||||||
|
|
||||||
videos = data['result']
|
videos = data['result']
|
||||||
for video in videos:
|
for video in videos:
|
||||||
e = self.url_result(video['arcurl'], 'BiliBili', str(video['aid']))
|
e = self.url_result(video['arcurl'], 'BiliBili', compat_str(video['aid']))
|
||||||
entries.append(e)
|
entries.append(e)
|
||||||
|
|
||||||
if(len(entries) >= n or len(videos) >= BiliBiliSearchIE.MAX_NUMBER_OF_RESULTS):
|
if(len(entries) >= n or len(videos) >= BiliBiliSearchIE.MAX_NUMBER_OF_RESULTS):
|
||||||
@@ -555,6 +605,7 @@ class BilibiliAudioIE(BilibiliAudioBaseIE):
|
|||||||
formats = [{
|
formats = [{
|
||||||
'url': play_data['cdns'][0],
|
'url': play_data['cdns'][0],
|
||||||
'filesize': int_or_none(play_data.get('size')),
|
'filesize': int_or_none(play_data.get('size')),
|
||||||
|
'vcodec': 'none'
|
||||||
}]
|
}]
|
||||||
|
|
||||||
song = self._call_api('song/info', au_id)
|
song = self._call_api('song/info', au_id)
|
||||||
|
|||||||
@@ -27,10 +27,10 @@ class CBSBaseIE(ThePlatformFeedIE):
|
|||||||
|
|
||||||
|
|
||||||
class CBSIE(CBSBaseIE):
|
class CBSIE(CBSBaseIE):
|
||||||
_VALID_URL = r'(?:cbs:|https?://(?:www\.)?(?:cbs\.com/shows/[^/]+/video|colbertlateshow\.com/(?:video|podcasts))/)(?P<id>[\w-]+)'
|
_VALID_URL = r'(?:cbs:|https?://(?:www\.)?(?:(?:cbs|paramountplus)\.com/shows/[^/]+/video|colbertlateshow\.com/(?:video|podcasts))/)(?P<id>[\w-]+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/',
|
'url': 'https://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '_u7W953k6la293J7EPTd9oHkSPs6Xn6_',
|
'id': '_u7W953k6la293J7EPTd9oHkSPs6Xn6_',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@@ -52,16 +52,19 @@ class CBSIE(CBSBaseIE):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'http://www.colbertlateshow.com/podcasts/dYSwjqPs_X1tvbV_P2FcPWRa_qT6akTC/in-the-bad-room-with-stephen/',
|
'url': 'http://www.colbertlateshow.com/podcasts/dYSwjqPs_X1tvbV_P2FcPWRa_qT6akTC/in-the-bad-room-with-stephen/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.paramountplus.com/shows/all-rise/video/QmR1WhNkh1a_IrdHZrbcRklm176X_rVc/all-rise-space/',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _extract_video_info(self, content_id, site='cbs', mpx_acc=2198311517):
|
def _extract_video_info(self, content_id, site='cbs', mpx_acc=2198311517):
|
||||||
items_data = self._download_xml(
|
items_data = self._download_xml(
|
||||||
'http://can.cbs.com/thunder/player/videoPlayerService.php',
|
'https://can.cbs.com/thunder/player/videoPlayerService.php',
|
||||||
content_id, query={'partner': site, 'contentId': content_id})
|
content_id, query={'partner': site, 'contentId': content_id})
|
||||||
video_data = xpath_element(items_data, './/item')
|
video_data = xpath_element(items_data, './/item')
|
||||||
title = xpath_text(video_data, 'videoTitle', 'title') or xpath_text(video_data, 'videotitle', 'title')
|
title = xpath_text(video_data, 'videoTitle', 'title') or xpath_text(video_data, 'videotitle', 'title')
|
||||||
tp_path = 'dJ5BDC/media/guid/%d/%s' % (mpx_acc, content_id)
|
tp_path = 'dJ5BDC/media/guid/%d/%s' % (mpx_acc, content_id)
|
||||||
tp_release_url = 'http://link.theplatform.com/s/' + tp_path
|
tp_release_url = 'https://link.theplatform.com/s/' + tp_path
|
||||||
|
|
||||||
asset_types = []
|
asset_types = []
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
|
|||||||
@@ -1,38 +1,113 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .cbs import CBSBaseIE
|
import re
|
||||||
|
|
||||||
|
# from .cbs import CBSBaseIE
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
try_get,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class CBSSportsIE(CBSBaseIE):
|
# class CBSSportsEmbedIE(CBSBaseIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?cbssports\.com/[^/]+/(?:video|news)/(?P<id>[^/?#&]+)'
|
class CBSSportsEmbedIE(InfoExtractor):
|
||||||
|
IE_NAME = 'cbssports:embed'
|
||||||
|
_VALID_URL = r'''(?ix)https?://(?:(?:www\.)?cbs|embed\.247)sports\.com/player/embed.+?
|
||||||
|
(?:
|
||||||
|
ids%3D(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})|
|
||||||
|
pcid%3D(?P<pcid>\d+)
|
||||||
|
)'''
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.cbssports.com/nba/video/donovan-mitchell-flashes-star-potential-in-game-2-victory-over-thunder/',
|
'url': 'https://www.cbssports.com/player/embed/?args=player_id%3Db56c03a6-231a-4bbe-9c55-af3c8a8e9636%26ids%3Db56c03a6-231a-4bbe-9c55-af3c8a8e9636%26resizable%3D1%26autoplay%3Dtrue%26domain%3Dcbssports.com%26comp_ads_enabled%3Dfalse%26watchAndRead%3D0%26startTime%3D0%26env%3Dprod',
|
||||||
'info_dict': {
|
'only_matching': True,
|
||||||
'id': '1214315075735',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Donovan Mitchell flashes star potential in Game 2 victory over Thunder',
|
|
||||||
'description': 'md5:df6f48622612c2d6bd2e295ddef58def',
|
|
||||||
'timestamp': 1524111457,
|
|
||||||
'upload_date': '20180419',
|
|
||||||
'uploader': 'CBSI-NEW',
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
# m3u8 download
|
|
||||||
'skip_download': True,
|
|
||||||
}
|
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.cbssports.com/nba/news/nba-playoffs-2018-watch-76ers-vs-heat-game-3-series-schedule-tv-channel-online-stream/',
|
'url': 'https://embed.247sports.com/player/embed/?args=%3fplayer_id%3d1827823171591%26channel%3dcollege-football-recruiting%26pcid%3d1827823171591%26width%3d640%26height%3d360%26autoplay%3dTrue%26comp_ads_enabled%3dFalse%26uvpc%3dhttps%253a%252f%252fwww.cbssports.com%252fapi%252fcontent%252fvideo%252fconfig%252f%253fcfg%253duvp_247sports_v4%2526partner%253d247%26uvpc_m%3dhttps%253a%252f%252fwww.cbssports.com%252fapi%252fcontent%252fvideo%252fconfig%252f%253fcfg%253duvp_247sports_m_v4%2526partner_m%253d247_mobile%26utag%3d247sportssite%26resizable%3dTrue',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _extract_video_info(self, filter_query, video_id):
|
# def _extract_video_info(self, filter_query, video_id):
|
||||||
return self._extract_feed_info('dJ5BDC', 'VxxJg8Ymh8sE', filter_query, video_id)
|
# return self._extract_feed_info('dJ5BDC', 'VxxJg8Ymh8sE', filter_query, video_id)
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
uuid, pcid = re.match(self._VALID_URL, url).groups()
|
||||||
|
query = {'id': uuid} if uuid else {'pcid': pcid}
|
||||||
|
video = self._download_json(
|
||||||
|
'https://www.cbssports.com/api/content/video/',
|
||||||
|
uuid or pcid, query=query)[0]
|
||||||
|
video_id = video['id']
|
||||||
|
title = video['title']
|
||||||
|
metadata = video.get('metaData') or {}
|
||||||
|
# return self._extract_video_info('byId=%d' % metadata['mpxOutletId'], video_id)
|
||||||
|
# return self._extract_video_info('byGuid=' + metadata['mpxRefId'], video_id)
|
||||||
|
|
||||||
|
formats = self._extract_m3u8_formats(
|
||||||
|
metadata['files'][0]['url'], video_id, 'mp4',
|
||||||
|
'm3u8_native', m3u8_id='hls', fatal=False)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
image = video.get('image')
|
||||||
|
thumbnails = None
|
||||||
|
if image:
|
||||||
|
image_path = image.get('path')
|
||||||
|
if image_path:
|
||||||
|
thumbnails = [{
|
||||||
|
'url': image_path,
|
||||||
|
'width': int_or_none(image.get('width')),
|
||||||
|
'height': int_or_none(image.get('height')),
|
||||||
|
'filesize': int_or_none(image.get('size')),
|
||||||
|
}]
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'formats': formats,
|
||||||
|
'thumbnails': thumbnails,
|
||||||
|
'description': video.get('description'),
|
||||||
|
'timestamp': int_or_none(try_get(video, lambda x: x['dateCreated']['epoch'])),
|
||||||
|
'duration': int_or_none(metadata.get('duration')),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class CBSSportsBaseIE(InfoExtractor):
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id = self._match_id(url)
|
display_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
video_id = self._search_regex(
|
iframe_url = self._search_regex(
|
||||||
[r'(?:=|%26)pcid%3D(\d+)', r'embedVideo(?:Container)?_(\d+)'],
|
r'<iframe[^>]+(?:data-)?src="(https?://[^/]+/player/embed[^"]+)"',
|
||||||
webpage, 'video id')
|
webpage, 'embed url')
|
||||||
return self._extract_video_info('byId=%s' % video_id, video_id)
|
return self.url_result(iframe_url, CBSSportsEmbedIE.ie_key())
|
||||||
|
|
||||||
|
|
||||||
|
class CBSSportsIE(CBSSportsBaseIE):
|
||||||
|
IE_NAME = 'cbssports'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?cbssports\.com/[^/]+/video/(?P<id>[^/?#&]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.cbssports.com/college-football/video/cover-3-stanford-spring-gleaning/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'b56c03a6-231a-4bbe-9c55-af3c8a8e9636',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Cover 3: Stanford Spring Gleaning',
|
||||||
|
'description': 'The Cover 3 crew break down everything you need to know about the Stanford Cardinal this spring.',
|
||||||
|
'timestamp': 1617218398,
|
||||||
|
'upload_date': '20210331',
|
||||||
|
'duration': 502,
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
|
||||||
|
class TwentyFourSevenSportsIE(CBSSportsBaseIE):
|
||||||
|
IE_NAME = '247sports'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?247sports\.com/Video/(?:[^/?#&]+-)?(?P<id>\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://247sports.com/Video/2021-QB-Jake-Garcia-senior-highlights-through-five-games-10084854/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '4f1265cb-c3b5-44a8-bb1d-1914119a0ccc',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '2021 QB Jake Garcia senior highlights through five games',
|
||||||
|
'description': 'md5:8cb67ebed48e2e6adac1701e0ff6e45b',
|
||||||
|
'timestamp': 1607114223,
|
||||||
|
'upload_date': '20201204',
|
||||||
|
'duration': 208,
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|||||||
@@ -17,7 +17,7 @@ import math
|
|||||||
|
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_cookiejar_Cookie,
|
compat_cookiejar_Cookie,
|
||||||
compat_cookies,
|
compat_cookies_SimpleCookie,
|
||||||
compat_etree_Element,
|
compat_etree_Element,
|
||||||
compat_etree_fromstring,
|
compat_etree_fromstring,
|
||||||
compat_getpass,
|
compat_getpass,
|
||||||
@@ -231,8 +231,9 @@ class InfoExtractor(object):
|
|||||||
uploader: Full name of the video uploader.
|
uploader: Full name of the video uploader.
|
||||||
license: License name the video is licensed under.
|
license: License name the video is licensed under.
|
||||||
creator: The creator of the video.
|
creator: The creator of the video.
|
||||||
|
release_timestamp: UNIX timestamp of the moment the video was released.
|
||||||
release_date: The date (YYYYMMDD) when the video was released.
|
release_date: The date (YYYYMMDD) when the video was released.
|
||||||
timestamp: UNIX timestamp of the moment the video became available.
|
timestamp: UNIX timestamp of the moment the video was uploaded
|
||||||
upload_date: Video upload date (YYYYMMDD).
|
upload_date: Video upload date (YYYYMMDD).
|
||||||
If not explicitly set, calculated from timestamp.
|
If not explicitly set, calculated from timestamp.
|
||||||
uploader_id: Nickname or id of the video uploader.
|
uploader_id: Nickname or id of the video uploader.
|
||||||
@@ -251,8 +252,8 @@ class InfoExtractor(object):
|
|||||||
* "data": The subtitles file contents
|
* "data": The subtitles file contents
|
||||||
* "url": A URL pointing to the subtitles file
|
* "url": A URL pointing to the subtitles file
|
||||||
"ext" will be calculated from URL if missing
|
"ext" will be calculated from URL if missing
|
||||||
automatic_captions: Like 'subtitles', used by the YoutubeIE for
|
automatic_captions: Like 'subtitles'; contains automatically generated
|
||||||
automatically generated captions
|
captions instead of normal subtitles
|
||||||
duration: Length of the video in seconds, as an integer or float.
|
duration: Length of the video in seconds, as an integer or float.
|
||||||
view_count: How many users have watched the video on the platform.
|
view_count: How many users have watched the video on the platform.
|
||||||
like_count: Number of positive ratings of the video
|
like_count: Number of positive ratings of the video
|
||||||
@@ -264,6 +265,7 @@ class InfoExtractor(object):
|
|||||||
properties (all but one of text or html optional):
|
properties (all but one of text or html optional):
|
||||||
* "author" - human-readable name of the comment author
|
* "author" - human-readable name of the comment author
|
||||||
* "author_id" - user ID of the comment author
|
* "author_id" - user ID of the comment author
|
||||||
|
* "author_thumbnail" - The thumbnail of the comment author
|
||||||
* "id" - Comment ID
|
* "id" - Comment ID
|
||||||
* "html" - Comment as HTML
|
* "html" - Comment as HTML
|
||||||
* "text" - Plain text of the comment
|
* "text" - Plain text of the comment
|
||||||
@@ -271,6 +273,12 @@ class InfoExtractor(object):
|
|||||||
* "parent" - ID of the comment this one is replying to.
|
* "parent" - ID of the comment this one is replying to.
|
||||||
Set to "root" to indicate that this is a
|
Set to "root" to indicate that this is a
|
||||||
comment to the original video.
|
comment to the original video.
|
||||||
|
* "like_count" - Number of positive ratings of the comment
|
||||||
|
* "dislike_count" - Number of negative ratings of the comment
|
||||||
|
* "is_favorited" - Whether the comment is marked as
|
||||||
|
favorite by the video uploader
|
||||||
|
* "author_is_uploader" - Whether the comment is made by
|
||||||
|
the video uploader
|
||||||
age_limit: Age restriction for the video, as an integer (years)
|
age_limit: Age restriction for the video, as an integer (years)
|
||||||
webpage_url: The URL to the video webpage, if given to yt-dlp it
|
webpage_url: The URL to the video webpage, if given to yt-dlp it
|
||||||
should allow to get the same result again. (It will be set
|
should allow to get the same result again. (It will be set
|
||||||
@@ -293,7 +301,19 @@ class InfoExtractor(object):
|
|||||||
playable_in_embed: Whether this video is allowed to play in embedded
|
playable_in_embed: Whether this video is allowed to play in embedded
|
||||||
players on other sites. Can be True (=always allowed),
|
players on other sites. Can be True (=always allowed),
|
||||||
False (=never allowed), None (=unknown), or a string
|
False (=never allowed), None (=unknown), or a string
|
||||||
specifying the criteria for embedability (Eg: 'whitelist').
|
specifying the criteria for embedability (Eg: 'whitelist')
|
||||||
|
availability: Under what condition the video is available. One of
|
||||||
|
'private', 'premium_only', 'subscriber_only', 'needs_auth',
|
||||||
|
'unlisted' or 'public'. Use 'InfoExtractor._availability'
|
||||||
|
to set it
|
||||||
|
__post_extractor: A function to be called just before the metadata is
|
||||||
|
written to either disk, logger or console. The function
|
||||||
|
must return a dict which will be added to the info_dict.
|
||||||
|
This is usefull for additional information that is
|
||||||
|
time-consuming to extract. Note that the fields thus
|
||||||
|
extracted will not be available to output template and
|
||||||
|
match_filter. So, only "comments" and "comment_count" are
|
||||||
|
currently allowed to be extracted via this method.
|
||||||
|
|
||||||
The following fields should only be used when the video belongs to some logical
|
The following fields should only be used when the video belongs to some logical
|
||||||
chapter or section:
|
chapter or section:
|
||||||
@@ -606,6 +626,14 @@ class InfoExtractor(object):
|
|||||||
|
|
||||||
See _download_webpage docstring for arguments specification.
|
See _download_webpage docstring for arguments specification.
|
||||||
"""
|
"""
|
||||||
|
if not self._downloader._first_webpage_request:
|
||||||
|
sleep_interval = float_or_none(self._downloader.params.get('sleep_interval_requests')) or 0
|
||||||
|
if sleep_interval > 0:
|
||||||
|
self.to_screen('Sleeping %s seconds ...' % sleep_interval)
|
||||||
|
time.sleep(sleep_interval)
|
||||||
|
else:
|
||||||
|
self._downloader._first_webpage_request = False
|
||||||
|
|
||||||
if note is None:
|
if note is None:
|
||||||
self.report_download_webpage(video_id)
|
self.report_download_webpage(video_id)
|
||||||
elif note is not False:
|
elif note is not False:
|
||||||
@@ -1280,6 +1308,7 @@ class InfoExtractor(object):
|
|||||||
|
|
||||||
def extract_video_object(e):
|
def extract_video_object(e):
|
||||||
assert e['@type'] == 'VideoObject'
|
assert e['@type'] == 'VideoObject'
|
||||||
|
author = e.get('author')
|
||||||
info.update({
|
info.update({
|
||||||
'url': url_or_none(e.get('contentUrl')),
|
'url': url_or_none(e.get('contentUrl')),
|
||||||
'title': unescapeHTML(e.get('name')),
|
'title': unescapeHTML(e.get('name')),
|
||||||
@@ -1287,7 +1316,11 @@ class InfoExtractor(object):
|
|||||||
'thumbnail': url_or_none(e.get('thumbnailUrl') or e.get('thumbnailURL')),
|
'thumbnail': url_or_none(e.get('thumbnailUrl') or e.get('thumbnailURL')),
|
||||||
'duration': parse_duration(e.get('duration')),
|
'duration': parse_duration(e.get('duration')),
|
||||||
'timestamp': unified_timestamp(e.get('uploadDate')),
|
'timestamp': unified_timestamp(e.get('uploadDate')),
|
||||||
'uploader': str_or_none(e.get('author')),
|
# author can be an instance of 'Organization' or 'Person' types.
|
||||||
|
# both types can have 'name' property(inherited from 'Thing' type). [1]
|
||||||
|
# however some websites are using 'Text' type instead.
|
||||||
|
# 1. https://schema.org/VideoObject
|
||||||
|
'uploader': author.get('name') if isinstance(author, dict) else author if isinstance(author, compat_str) else None,
|
||||||
'filesize': float_or_none(e.get('contentSize')),
|
'filesize': float_or_none(e.get('contentSize')),
|
||||||
'tbr': int_or_none(e.get('bitrate')),
|
'tbr': int_or_none(e.get('bitrate')),
|
||||||
'width': int_or_none(e.get('width')),
|
'width': int_or_none(e.get('width')),
|
||||||
@@ -1370,7 +1403,7 @@ class InfoExtractor(object):
|
|||||||
return self._hidden_inputs(form)
|
return self._hidden_inputs(form)
|
||||||
|
|
||||||
class FormatSort:
|
class FormatSort:
|
||||||
regex = r' *((?P<reverse>\+)?(?P<field>[a-zA-Z0-9_]+)((?P<seperator>[~:])(?P<limit>.*?))?)? *$'
|
regex = r' *((?P<reverse>\+)?(?P<field>[a-zA-Z0-9_]+)((?P<separator>[~:])(?P<limit>.*?))?)? *$'
|
||||||
|
|
||||||
default = ('hidden', 'hasvid', 'ie_pref', 'lang', 'quality',
|
default = ('hidden', 'hasvid', 'ie_pref', 'lang', 'quality',
|
||||||
'res', 'fps', 'codec:vp9.2', 'size', 'br', 'asr',
|
'res', 'fps', 'codec:vp9.2', 'size', 'br', 'asr',
|
||||||
@@ -1393,8 +1426,8 @@ class InfoExtractor(object):
|
|||||||
'ie_pref': {'priority': True, 'type': 'extractor'},
|
'ie_pref': {'priority': True, 'type': 'extractor'},
|
||||||
'hasvid': {'priority': True, 'field': 'vcodec', 'type': 'boolean', 'not_in_list': ('none',)},
|
'hasvid': {'priority': True, 'field': 'vcodec', 'type': 'boolean', 'not_in_list': ('none',)},
|
||||||
'hasaud': {'field': 'acodec', 'type': 'boolean', 'not_in_list': ('none',)},
|
'hasaud': {'field': 'acodec', 'type': 'boolean', 'not_in_list': ('none',)},
|
||||||
'lang': {'priority': True, 'convert': 'ignore', 'type': 'extractor', 'field': 'language_preference'},
|
'lang': {'priority': True, 'convert': 'ignore', 'field': 'language_preference'},
|
||||||
'quality': {'convert': 'float_none', 'type': 'extractor'},
|
'quality': {'convert': 'float_none'},
|
||||||
'filesize': {'convert': 'bytes'},
|
'filesize': {'convert': 'bytes'},
|
||||||
'fs_approx': {'convert': 'bytes', 'field': 'filesize_approx'},
|
'fs_approx': {'convert': 'bytes', 'field': 'filesize_approx'},
|
||||||
'id': {'convert': 'string', 'field': 'format_id'},
|
'id': {'convert': 'string', 'field': 'format_id'},
|
||||||
@@ -1405,7 +1438,7 @@ class InfoExtractor(object):
|
|||||||
'vbr': {'convert': 'float_none'},
|
'vbr': {'convert': 'float_none'},
|
||||||
'abr': {'convert': 'float_none'},
|
'abr': {'convert': 'float_none'},
|
||||||
'asr': {'convert': 'float_none'},
|
'asr': {'convert': 'float_none'},
|
||||||
'source': {'convert': 'ignore', 'type': 'extractor', 'field': 'source_preference'},
|
'source': {'convert': 'ignore', 'field': 'source_preference'},
|
||||||
|
|
||||||
'codec': {'type': 'combined', 'field': ('vcodec', 'acodec')},
|
'codec': {'type': 'combined', 'field': ('vcodec', 'acodec')},
|
||||||
'br': {'type': 'combined', 'field': ('tbr', 'vbr', 'abr'), 'same_limit': True},
|
'br': {'type': 'combined', 'field': ('tbr', 'vbr', 'abr'), 'same_limit': True},
|
||||||
@@ -1530,7 +1563,7 @@ class InfoExtractor(object):
|
|||||||
if self._get_field_setting(field, 'type') == 'alias':
|
if self._get_field_setting(field, 'type') == 'alias':
|
||||||
field = self._get_field_setting(field, 'field')
|
field = self._get_field_setting(field, 'field')
|
||||||
reverse = match.group('reverse') is not None
|
reverse = match.group('reverse') is not None
|
||||||
closest = match.group('seperator') == '~'
|
closest = match.group('separator') == '~'
|
||||||
limit_text = match.group('limit')
|
limit_text = match.group('limit')
|
||||||
|
|
||||||
has_limit = limit_text is not None
|
has_limit = limit_text is not None
|
||||||
@@ -1547,6 +1580,7 @@ class InfoExtractor(object):
|
|||||||
else None)
|
else None)
|
||||||
|
|
||||||
def print_verbose_info(self, to_screen):
|
def print_verbose_info(self, to_screen):
|
||||||
|
if self._sort_user:
|
||||||
to_screen('[debug] Sort order given by user: %s' % ','.join(self._sort_user))
|
to_screen('[debug] Sort order given by user: %s' % ','.join(self._sort_user))
|
||||||
if self._sort_extractor:
|
if self._sort_extractor:
|
||||||
to_screen('[debug] Sort order given by extractor: %s' % ', '.join(self._sort_extractor))
|
to_screen('[debug] Sort order given by extractor: %s' % ', '.join(self._sort_extractor))
|
||||||
@@ -1833,8 +1867,9 @@ class InfoExtractor(object):
|
|||||||
|
|
||||||
def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None,
|
def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None,
|
||||||
entry_protocol='m3u8', preference=None, quality=None,
|
entry_protocol='m3u8', preference=None, quality=None,
|
||||||
m3u8_id=None, live=False, note=None, errnote=None,
|
m3u8_id=None, note=None, errnote=None,
|
||||||
fatal=True, data=None, headers={}, query={}):
|
fatal=True, live=False, data=None, headers={},
|
||||||
|
query={}):
|
||||||
res = self._download_webpage_handle(
|
res = self._download_webpage_handle(
|
||||||
m3u8_url, video_id,
|
m3u8_url, video_id,
|
||||||
note=note or 'Downloading m3u8 information',
|
note=note or 'Downloading m3u8 information',
|
||||||
@@ -1860,7 +1895,8 @@ class InfoExtractor(object):
|
|||||||
if '#EXT-X-FAXS-CM:' in m3u8_doc: # Adobe Flash Access
|
if '#EXT-X-FAXS-CM:' in m3u8_doc: # Adobe Flash Access
|
||||||
return []
|
return []
|
||||||
|
|
||||||
if re.search(r'#EXT-X-SESSION-KEY:.*?URI="skd://', m3u8_doc): # Apple FairPlay
|
if (not self._downloader.params.get('allow_unplayable_formats')
|
||||||
|
and re.search(r'#EXT-X-SESSION-KEY:.*?URI="skd://', m3u8_doc)): # Apple FairPlay
|
||||||
return []
|
return []
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
@@ -1888,13 +1924,16 @@ class InfoExtractor(object):
|
|||||||
# media playlist and MUST NOT appear in master playlist thus we can
|
# media playlist and MUST NOT appear in master playlist thus we can
|
||||||
# clearly detect media playlist with this criterion.
|
# clearly detect media playlist with this criterion.
|
||||||
|
|
||||||
def _extract_m3u8_playlist_formats(format_url, m3u8_doc=None):
|
def _extract_m3u8_playlist_formats(format_url=None, m3u8_doc=None, video_id=None,
|
||||||
|
fatal=True, data=None, headers={}):
|
||||||
if not m3u8_doc:
|
if not m3u8_doc:
|
||||||
|
if not format_url:
|
||||||
|
return []
|
||||||
res = self._download_webpage_handle(
|
res = self._download_webpage_handle(
|
||||||
format_url, video_id,
|
format_url, video_id,
|
||||||
note=False,
|
note=False,
|
||||||
errnote=errnote or 'Failed to download m3u8 playlist information',
|
errnote='Failed to download m3u8 playlist information',
|
||||||
fatal=fatal, data=data, headers=headers, query=query)
|
fatal=fatal, data=data, headers=headers)
|
||||||
|
|
||||||
if res is False:
|
if res is False:
|
||||||
return []
|
return []
|
||||||
@@ -1928,7 +1967,7 @@ class InfoExtractor(object):
|
|||||||
|
|
||||||
if '#EXT-X-TARGETDURATION' in m3u8_doc: # media playlist, return as is
|
if '#EXT-X-TARGETDURATION' in m3u8_doc: # media playlist, return as is
|
||||||
|
|
||||||
playlist_formats = _extract_m3u8_playlist_formats(m3u8_doc, True)
|
playlist_formats = _extract_m3u8_playlist_formats(m3u8_doc=m3u8_doc)
|
||||||
|
|
||||||
for format in playlist_formats:
|
for format in playlist_formats:
|
||||||
format_id = []
|
format_id = []
|
||||||
@@ -1966,7 +2005,8 @@ class InfoExtractor(object):
|
|||||||
if media_url:
|
if media_url:
|
||||||
manifest_url = format_url(media_url)
|
manifest_url = format_url(media_url)
|
||||||
format_id = []
|
format_id = []
|
||||||
playlist_formats = _extract_m3u8_playlist_formats(manifest_url)
|
playlist_formats = _extract_m3u8_playlist_formats(manifest_url, video_id=video_id,
|
||||||
|
fatal=fatal, data=data, headers=headers)
|
||||||
|
|
||||||
for format in playlist_formats:
|
for format in playlist_formats:
|
||||||
format_index = format.get('index')
|
format_index = format.get('index')
|
||||||
@@ -2027,13 +2067,14 @@ class InfoExtractor(object):
|
|||||||
or last_stream_inf.get('BANDWIDTH'), scale=1000)
|
or last_stream_inf.get('BANDWIDTH'), scale=1000)
|
||||||
manifest_url = format_url(line.strip())
|
manifest_url = format_url(line.strip())
|
||||||
|
|
||||||
playlist_formats = _extract_m3u8_playlist_formats(manifest_url)
|
playlist_formats = _extract_m3u8_playlist_formats(manifest_url, video_id=video_id,
|
||||||
|
fatal=fatal, data=data, headers=headers)
|
||||||
|
|
||||||
for format in playlist_formats:
|
for frmt in playlist_formats:
|
||||||
format_id = []
|
format_id = []
|
||||||
if m3u8_id:
|
if m3u8_id:
|
||||||
format_id.append(m3u8_id)
|
format_id.append(m3u8_id)
|
||||||
format_index = format.get('index')
|
format_index = frmt.get('index')
|
||||||
stream_name = build_stream_name()
|
stream_name = build_stream_name()
|
||||||
# Bandwidth of live streams may differ over time thus making
|
# Bandwidth of live streams may differ over time thus making
|
||||||
# format_id unpredictable. So it's better to keep provided
|
# format_id unpredictable. So it's better to keep provided
|
||||||
@@ -2088,6 +2129,8 @@ class InfoExtractor(object):
|
|||||||
# TODO: update acodec for audio only formats with
|
# TODO: update acodec for audio only formats with
|
||||||
# the same GROUP-ID
|
# the same GROUP-ID
|
||||||
f['acodec'] = 'none'
|
f['acodec'] = 'none'
|
||||||
|
if not f.get('ext'):
|
||||||
|
f['ext'] = 'm4a' if f.get('vcodec') == 'none' else 'mp4'
|
||||||
formats.append(f)
|
formats.append(f)
|
||||||
|
|
||||||
# for DailyMotion
|
# for DailyMotion
|
||||||
@@ -2370,7 +2413,7 @@ class InfoExtractor(object):
|
|||||||
http://standards.iso.org/ittf/PubliclyAvailableStandards/c065274_ISO_IEC_23009-1_2014.zip
|
http://standards.iso.org/ittf/PubliclyAvailableStandards/c065274_ISO_IEC_23009-1_2014.zip
|
||||||
2. https://en.wikipedia.org/wiki/Dynamic_Adaptive_Streaming_over_HTTP
|
2. https://en.wikipedia.org/wiki/Dynamic_Adaptive_Streaming_over_HTTP
|
||||||
"""
|
"""
|
||||||
if not self._downloader.params.get('dynamic_mpd'):
|
if not self._downloader.params.get('dynamic_mpd', True):
|
||||||
if mpd_doc.get('type') == 'dynamic':
|
if mpd_doc.get('type') == 'dynamic':
|
||||||
return []
|
return []
|
||||||
|
|
||||||
@@ -3182,10 +3225,10 @@ class InfoExtractor(object):
|
|||||||
self._downloader.cookiejar.set_cookie(cookie)
|
self._downloader.cookiejar.set_cookie(cookie)
|
||||||
|
|
||||||
def _get_cookies(self, url):
|
def _get_cookies(self, url):
|
||||||
""" Return a compat_cookies.SimpleCookie with the cookies for the url """
|
""" Return a compat_cookies_SimpleCookie with the cookies for the url """
|
||||||
req = sanitized_Request(url)
|
req = sanitized_Request(url)
|
||||||
self._downloader.cookiejar.add_cookie_header(req)
|
self._downloader.cookiejar.add_cookie_header(req)
|
||||||
return compat_cookies.SimpleCookie(req.get_header('Cookie'))
|
return compat_cookies_SimpleCookie(req.get_header('Cookie'))
|
||||||
|
|
||||||
def _apply_first_set_cookie_header(self, url_handle, cookie):
|
def _apply_first_set_cookie_header(self, url_handle, cookie):
|
||||||
"""
|
"""
|
||||||
@@ -3300,6 +3343,20 @@ class InfoExtractor(object):
|
|||||||
def _generic_title(self, url):
|
def _generic_title(self, url):
|
||||||
return compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0])
|
return compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0])
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _availability(is_private, needs_premium, needs_subscription, needs_auth, is_unlisted):
|
||||||
|
all_known = all(map(
|
||||||
|
lambda x: x is not None,
|
||||||
|
(is_private, needs_premium, needs_subscription, needs_auth, is_unlisted)))
|
||||||
|
return (
|
||||||
|
'private' if is_private
|
||||||
|
else 'premium_only' if needs_premium
|
||||||
|
else 'subscriber_only' if needs_subscription
|
||||||
|
else 'needs_auth' if needs_auth
|
||||||
|
else 'unlisted' if is_unlisted
|
||||||
|
else 'public' if all_known
|
||||||
|
else None)
|
||||||
|
|
||||||
|
|
||||||
class SearchInfoExtractor(InfoExtractor):
|
class SearchInfoExtractor(InfoExtractor):
|
||||||
"""
|
"""
|
||||||
|
|||||||
@@ -1,5 +1,7 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
@@ -58,3 +60,16 @@ class MmsIE(InfoExtractor):
|
|||||||
'title': title,
|
'title': title,
|
||||||
'url': url,
|
'url': url,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class ViewSourceIE(InfoExtractor):
|
||||||
|
IE_DESC = False
|
||||||
|
_VALID_URL = r'view-source:(?P<url>.+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'view-source:https://www.youtube.com/watch?v=BaW_jenozKc',
|
||||||
|
'only_matching': True
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
return self.url_result(re.match(self._VALID_URL, url).group('url'))
|
||||||
|
|||||||
@@ -25,12 +25,12 @@ class CuriosityStreamBaseIE(InfoExtractor):
|
|||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
'%s said: %s' % (self.IE_NAME, error), expected=True)
|
'%s said: %s' % (self.IE_NAME, error), expected=True)
|
||||||
|
|
||||||
def _call_api(self, path, video_id):
|
def _call_api(self, path, video_id, query=None):
|
||||||
headers = {}
|
headers = {}
|
||||||
if self._auth_token:
|
if self._auth_token:
|
||||||
headers['X-Auth-Token'] = self._auth_token
|
headers['X-Auth-Token'] = self._auth_token
|
||||||
result = self._download_json(
|
result = self._download_json(
|
||||||
self._API_BASE_URL + path, video_id, headers=headers)
|
self._API_BASE_URL + path, video_id, headers=headers, query=query)
|
||||||
self._handle_errors(result)
|
self._handle_errors(result)
|
||||||
return result['data']
|
return result['data']
|
||||||
|
|
||||||
@@ -52,27 +52,38 @@ class CuriosityStreamIE(CuriosityStreamBaseIE):
|
|||||||
_VALID_URL = r'https?://(?:app\.)?curiositystream\.com/video/(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:app\.)?curiositystream\.com/video/(?P<id>\d+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'https://app.curiositystream.com/video/2',
|
'url': 'https://app.curiositystream.com/video/2',
|
||||||
'md5': '262bb2f257ff301115f1973540de8983',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '2',
|
'id': '2',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'How Did You Develop The Internet?',
|
'title': 'How Did You Develop The Internet?',
|
||||||
'description': 'Vint Cerf, Google\'s Chief Internet Evangelist, describes how he and Bob Kahn created the internet.',
|
'description': 'Vint Cerf, Google\'s Chief Internet Evangelist, describes how he and Bob Kahn created the internet.',
|
||||||
}
|
},
|
||||||
|
'params': {
|
||||||
|
'format': 'bestvideo',
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
media = self._call_api('media/' + video_id, video_id)
|
|
||||||
title = media['title']
|
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
|
for encoding_format in ('m3u8', 'mpd'):
|
||||||
|
media = self._call_api('media/' + video_id, video_id, query={
|
||||||
|
'encodingsNew': 'true',
|
||||||
|
'encodingsFormat': encoding_format,
|
||||||
|
})
|
||||||
for encoding in media.get('encodings', []):
|
for encoding in media.get('encodings', []):
|
||||||
m3u8_url = encoding.get('master_playlist_url')
|
playlist_url = encoding.get('master_playlist_url')
|
||||||
if m3u8_url:
|
if encoding_format == 'm3u8':
|
||||||
|
# use `m3u8` entry_protocol until EXT-X-MAP is properly supported by `m3u8_native` entry_protocol
|
||||||
formats.extend(self._extract_m3u8_formats(
|
formats.extend(self._extract_m3u8_formats(
|
||||||
m3u8_url, video_id, 'mp4', 'm3u8_native',
|
playlist_url, video_id, 'mp4',
|
||||||
m3u8_id='hls', fatal=False))
|
m3u8_id='hls', fatal=False))
|
||||||
|
elif encoding_format == 'mpd':
|
||||||
|
formats.extend(self._extract_mpd_formats(
|
||||||
|
playlist_url, video_id, mpd_id='dash', fatal=False))
|
||||||
encoding_url = encoding.get('url')
|
encoding_url = encoding.get('url')
|
||||||
file_url = encoding.get('file_url')
|
file_url = encoding.get('file_url')
|
||||||
if not encoding_url and not file_url:
|
if not encoding_url and not file_url:
|
||||||
@@ -108,6 +119,8 @@ class CuriosityStreamIE(CuriosityStreamBaseIE):
|
|||||||
formats.append(fmt)
|
formats.append(fmt)
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
title = media['title']
|
||||||
|
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
for closed_caption in media.get('closed_captions', []):
|
for closed_caption in media.get('closed_captions', []):
|
||||||
sub_url = closed_caption.get('file')
|
sub_url = closed_caption.get('file')
|
||||||
@@ -140,7 +153,7 @@ class CuriosityStreamCollectionIE(CuriosityStreamBaseIE):
|
|||||||
'title': 'Curious Minds: The Internet',
|
'title': 'Curious Minds: The Internet',
|
||||||
'description': 'How is the internet shaping our lives in the 21st Century?',
|
'description': 'How is the internet shaping our lives in the 21st Century?',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 17,
|
'playlist_mincount': 16,
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://curiositystream.com/series/2',
|
'url': 'https://curiositystream.com/series/2',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
|||||||
100
yt_dlp/extractor/discoveryplusindia.py
Normal file
100
yt_dlp/extractor/discoveryplusindia.py
Normal file
@@ -0,0 +1,100 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
|
||||||
|
from ..compat import compat_str
|
||||||
|
from ..utils import try_get
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from .dplay import DPlayIE
|
||||||
|
|
||||||
|
|
||||||
|
class DiscoveryPlusIndiaIE(DPlayIE):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?discoveryplus\.in/videos?' + DPlayIE._PATH_REGEX
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.discoveryplus.in/videos/how-do-they-do-it/fugu-and-more?seasonId=8&type=EPISODE',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '27104',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'display_id': 'how-do-they-do-it/fugu-and-more',
|
||||||
|
'title': 'Fugu and More',
|
||||||
|
'description': 'The Japanese catch, prepare and eat the deadliest fish on the planet.',
|
||||||
|
'duration': 1319,
|
||||||
|
'timestamp': 1582309800,
|
||||||
|
'upload_date': '20200221',
|
||||||
|
'series': 'How Do They Do It?',
|
||||||
|
'season_number': 8,
|
||||||
|
'episode_number': 2,
|
||||||
|
'creator': 'Discovery Channel',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'format': 'bestvideo',
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
'skip': 'Cookies (not necessarily logged in) are needed'
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
|
||||||
|
headers['x-disco-params'] = 'realm=%s' % realm
|
||||||
|
headers['x-disco-client'] = 'WEB:UNKNOWN:dplus-india:17.0.0'
|
||||||
|
|
||||||
|
def _download_video_playback_info(self, disco_base, video_id, headers):
|
||||||
|
return self._download_json(
|
||||||
|
disco_base + 'playback/v3/videoPlaybackInfo',
|
||||||
|
video_id, headers=headers, data=json.dumps({
|
||||||
|
'deviceInfo': {
|
||||||
|
'adBlocker': False,
|
||||||
|
},
|
||||||
|
'videoId': video_id,
|
||||||
|
}).encode('utf-8'))['data']['attributes']['streaming']
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
return self._get_disco_api_info(
|
||||||
|
url, display_id, 'ap2-prod-direct.discoveryplus.in', 'dplusindia', 'in')
|
||||||
|
|
||||||
|
|
||||||
|
class DiscoveryPlusIndiaShowIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?discoveryplus\.in/show/(?P<show_name>[^/]+)/?(?:[?#]|$)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.discoveryplus.in/show/how-do-they-do-it',
|
||||||
|
'playlist_mincount': 140,
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'how-do-they-do-it',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
def _entries(self, show_name):
|
||||||
|
headers = {
|
||||||
|
'x-disco-client': 'WEB:UNKNOWN:dplus-india:prod',
|
||||||
|
'x-disco-params': 'realm=dplusindia',
|
||||||
|
'referer': 'https://www.discoveryplus.in/',
|
||||||
|
}
|
||||||
|
show_url = 'https://ap2-prod-direct.discoveryplus.in/cms/routes/show/{}?include=default'.format(show_name)
|
||||||
|
show_json = self._download_json(show_url,
|
||||||
|
video_id=show_name,
|
||||||
|
headers=headers)['included'][4]['attributes']['component']
|
||||||
|
show_id = show_json['mandatoryParams'].split('=')[-1]
|
||||||
|
season_url = 'https://ap2-prod-direct.discoveryplus.in/content/videos?sort=episodeNumber&filter[seasonNumber]={}&filter[show.id]={}&page[size]=100&page[number]={}'
|
||||||
|
for season in show_json['filters'][0]['options']:
|
||||||
|
season_id = season['id']
|
||||||
|
total_pages, page_num = 1, 0
|
||||||
|
while page_num < total_pages:
|
||||||
|
season_json = self._download_json(season_url.format(season_id, show_id, compat_str(page_num + 1)),
|
||||||
|
video_id=show_id, headers=headers,
|
||||||
|
note='Downloading JSON metadata%s' % (' page %d' % page_num if page_num else ''))
|
||||||
|
if page_num == 0:
|
||||||
|
total_pages = try_get(season_json, lambda x: x['meta']['totalPages'], int) or 1
|
||||||
|
episodes_json = season_json['data']
|
||||||
|
for episode in episodes_json:
|
||||||
|
video_id = episode['attributes']['path']
|
||||||
|
yield self.url_result(
|
||||||
|
'https://discoveryplus.in/videos/%s' % video_id,
|
||||||
|
ie=DiscoveryPlusIndiaIE.ie_key(), video_id=video_id)
|
||||||
|
page_num += 1
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
show_name = re.match(self._VALID_URL, url).group('show_name')
|
||||||
|
return self.playlist_result(self._entries(show_name), playlist_id=show_name)
|
||||||
@@ -1,193 +1,43 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
from .zdf import ZDFIE
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from ..utils import (
|
|
||||||
int_or_none,
|
|
||||||
unified_strdate,
|
|
||||||
xpath_text,
|
|
||||||
determine_ext,
|
|
||||||
float_or_none,
|
|
||||||
ExtractorError,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class DreiSatIE(InfoExtractor):
|
class DreiSatIE(ZDFIE):
|
||||||
IE_NAME = '3sat'
|
IE_NAME = '3sat'
|
||||||
_GEO_COUNTRIES = ['DE']
|
_VALID_URL = r'https?://(?:www\.)?3sat\.de/(?:[^/]+/)*(?P<id>[^/?#&]+)\.html'
|
||||||
_VALID_URL = r'https?://(?:www\.)?3sat\.de/mediathek/(?:(?:index|mediathek)\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)'
|
_TESTS = [{
|
||||||
_TESTS = [
|
# Same as https://www.zdf.de/dokumentation/ab-18/10-wochen-sommer-102.html
|
||||||
{
|
'url': 'https://www.3sat.de/film/ab-18/10-wochen-sommer-108.html',
|
||||||
'url': 'http://www.3sat.de/mediathek/index.php?mode=play&obj=45918',
|
'md5': '0aff3e7bc72c8813f5e0fae333316a1d',
|
||||||
'md5': 'be37228896d30a88f315b638900a026e',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '45918',
|
'id': '141007_ab18_10wochensommer_film',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Ab 18! - 10 Wochen Sommer',
|
||||||
|
'description': 'md5:8253f41dc99ce2c3ff892dac2d65fe26',
|
||||||
|
'duration': 2660,
|
||||||
|
'timestamp': 1608604200,
|
||||||
|
'upload_date': '20201222',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.3sat.de/gesellschaft/schweizweit/waidmannsheil-100.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '140913_sendung_schweizweit',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Waidmannsheil',
|
'title': 'Waidmannsheil',
|
||||||
'description': 'md5:cce00ca1d70e21425e72c86a98a56817',
|
'description': 'md5:cce00ca1d70e21425e72c86a98a56817',
|
||||||
'uploader': 'SCHWEIZWEIT',
|
'timestamp': 1410623100,
|
||||||
'uploader_id': '100000210',
|
|
||||||
'upload_date': '20140913'
|
'upload_date': '20140913'
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True, # m3u8 downloads
|
'skip_download': True,
|
||||||
}
|
}
|
||||||
},
|
}, {
|
||||||
{
|
# Same as https://www.zdf.de/filme/filme-sonstige/der-hauptmann-112.html
|
||||||
'url': 'http://www.3sat.de/mediathek/mediathek.php?mode=play&obj=51066',
|
'url': 'https://www.3sat.de/film/spielfilm/der-hauptmann-100.html',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
},
|
}, {
|
||||||
]
|
# Same as https://www.zdf.de/wissen/nano/nano-21-mai-2019-102.html, equal media ids
|
||||||
|
'url': 'https://www.3sat.de/wissen/nano/nano-21-mai-2019-102.html',
|
||||||
def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
|
'only_matching': True,
|
||||||
param_groups = {}
|
}]
|
||||||
for param_group in smil.findall(self._xpath_ns('./head/paramGroup', namespace)):
|
|
||||||
group_id = param_group.get(self._xpath_ns(
|
|
||||||
'id', 'http://www.w3.org/XML/1998/namespace'))
|
|
||||||
params = {}
|
|
||||||
for param in param_group:
|
|
||||||
params[param.get('name')] = param.get('value')
|
|
||||||
param_groups[group_id] = params
|
|
||||||
|
|
||||||
formats = []
|
|
||||||
for video in smil.findall(self._xpath_ns('.//video', namespace)):
|
|
||||||
src = video.get('src')
|
|
||||||
if not src:
|
|
||||||
continue
|
|
||||||
bitrate = int_or_none(self._search_regex(r'_(\d+)k', src, 'bitrate', None)) or float_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
|
|
||||||
group_id = video.get('paramGroup')
|
|
||||||
param_group = param_groups[group_id]
|
|
||||||
for proto in param_group['protocols'].split(','):
|
|
||||||
formats.append({
|
|
||||||
'url': '%s://%s' % (proto, param_group['host']),
|
|
||||||
'app': param_group['app'],
|
|
||||||
'play_path': src,
|
|
||||||
'ext': 'flv',
|
|
||||||
'format_id': '%s-%d' % (proto, bitrate),
|
|
||||||
'tbr': bitrate,
|
|
||||||
})
|
|
||||||
self._sort_formats(formats)
|
|
||||||
return formats
|
|
||||||
|
|
||||||
def extract_from_xml_url(self, video_id, xml_url):
|
|
||||||
doc = self._download_xml(
|
|
||||||
xml_url, video_id,
|
|
||||||
note='Downloading video info',
|
|
||||||
errnote='Failed to download video info')
|
|
||||||
|
|
||||||
status_code = xpath_text(doc, './status/statuscode')
|
|
||||||
if status_code and status_code != 'ok':
|
|
||||||
if status_code == 'notVisibleAnymore':
|
|
||||||
message = 'Video %s is not available' % video_id
|
|
||||||
else:
|
|
||||||
message = '%s returned error: %s' % (self.IE_NAME, status_code)
|
|
||||||
raise ExtractorError(message, expected=True)
|
|
||||||
|
|
||||||
title = xpath_text(doc, './/information/title', 'title', True)
|
|
||||||
|
|
||||||
urls = []
|
|
||||||
formats = []
|
|
||||||
for fnode in doc.findall('.//formitaeten/formitaet'):
|
|
||||||
video_url = xpath_text(fnode, 'url')
|
|
||||||
if not video_url or video_url in urls:
|
|
||||||
continue
|
|
||||||
urls.append(video_url)
|
|
||||||
|
|
||||||
is_available = 'http://www.metafilegenerator' not in video_url
|
|
||||||
geoloced = 'static_geoloced_online' in video_url
|
|
||||||
if not is_available or geoloced:
|
|
||||||
continue
|
|
||||||
|
|
||||||
format_id = fnode.attrib['basetype']
|
|
||||||
format_m = re.match(r'''(?x)
|
|
||||||
(?P<vcodec>[^_]+)_(?P<acodec>[^_]+)_(?P<container>[^_]+)_
|
|
||||||
(?P<proto>[^_]+)_(?P<index>[^_]+)_(?P<indexproto>[^_]+)
|
|
||||||
''', format_id)
|
|
||||||
|
|
||||||
ext = determine_ext(video_url, None) or format_m.group('container')
|
|
||||||
|
|
||||||
if ext == 'meta':
|
|
||||||
continue
|
|
||||||
elif ext == 'smil':
|
|
||||||
formats.extend(self._extract_smil_formats(
|
|
||||||
video_url, video_id, fatal=False))
|
|
||||||
elif ext == 'm3u8':
|
|
||||||
# the certificates are misconfigured (see
|
|
||||||
# https://github.com/ytdl-org/youtube-dl/issues/8665)
|
|
||||||
if video_url.startswith('https://'):
|
|
||||||
continue
|
|
||||||
formats.extend(self._extract_m3u8_formats(
|
|
||||||
video_url, video_id, 'mp4', 'm3u8_native',
|
|
||||||
m3u8_id=format_id, fatal=False))
|
|
||||||
elif ext == 'f4m':
|
|
||||||
formats.extend(self._extract_f4m_formats(
|
|
||||||
video_url, video_id, f4m_id=format_id, fatal=False))
|
|
||||||
else:
|
|
||||||
quality = xpath_text(fnode, './quality')
|
|
||||||
if quality:
|
|
||||||
format_id += '-' + quality
|
|
||||||
|
|
||||||
abr = int_or_none(xpath_text(fnode, './audioBitrate'), 1000)
|
|
||||||
vbr = int_or_none(xpath_text(fnode, './videoBitrate'), 1000)
|
|
||||||
|
|
||||||
tbr = int_or_none(self._search_regex(
|
|
||||||
r'_(\d+)k', video_url, 'bitrate', None))
|
|
||||||
if tbr and vbr and not abr:
|
|
||||||
abr = tbr - vbr
|
|
||||||
|
|
||||||
formats.append({
|
|
||||||
'format_id': format_id,
|
|
||||||
'url': video_url,
|
|
||||||
'ext': ext,
|
|
||||||
'acodec': format_m.group('acodec'),
|
|
||||||
'vcodec': format_m.group('vcodec'),
|
|
||||||
'abr': abr,
|
|
||||||
'vbr': vbr,
|
|
||||||
'tbr': tbr,
|
|
||||||
'width': int_or_none(xpath_text(fnode, './width')),
|
|
||||||
'height': int_or_none(xpath_text(fnode, './height')),
|
|
||||||
'filesize': int_or_none(xpath_text(fnode, './filesize')),
|
|
||||||
'protocol': format_m.group('proto').lower(),
|
|
||||||
})
|
|
||||||
|
|
||||||
geolocation = xpath_text(doc, './/details/geolocation')
|
|
||||||
if not formats and geolocation and geolocation != 'none':
|
|
||||||
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
|
|
||||||
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
thumbnails = []
|
|
||||||
for node in doc.findall('.//teaserimages/teaserimage'):
|
|
||||||
thumbnail_url = node.text
|
|
||||||
if not thumbnail_url:
|
|
||||||
continue
|
|
||||||
thumbnail = {
|
|
||||||
'url': thumbnail_url,
|
|
||||||
}
|
|
||||||
thumbnail_key = node.get('key')
|
|
||||||
if thumbnail_key:
|
|
||||||
m = re.match('^([0-9]+)x([0-9]+)$', thumbnail_key)
|
|
||||||
if m:
|
|
||||||
thumbnail['width'] = int(m.group(1))
|
|
||||||
thumbnail['height'] = int(m.group(2))
|
|
||||||
thumbnails.append(thumbnail)
|
|
||||||
|
|
||||||
upload_date = unified_strdate(xpath_text(doc, './/details/airtime'))
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'title': title,
|
|
||||||
'description': xpath_text(doc, './/information/detail'),
|
|
||||||
'duration': int_or_none(xpath_text(doc, './/details/lengthSec')),
|
|
||||||
'thumbnails': thumbnails,
|
|
||||||
'uploader': xpath_text(doc, './/details/originChannelTitle'),
|
|
||||||
'uploader_id': xpath_text(doc, './/details/originChannelId'),
|
|
||||||
'upload_date': upload_date,
|
|
||||||
'formats': formats,
|
|
||||||
}
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
video_id = self._match_id(url)
|
|
||||||
details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?id=%s' % video_id
|
|
||||||
return self.extract_from_xml_url(video_id, details_url)
|
|
||||||
|
|||||||
@@ -80,6 +80,7 @@ from .arte import (
|
|||||||
ArteTVEmbedIE,
|
ArteTVEmbedIE,
|
||||||
ArteTVPlaylistIE,
|
ArteTVPlaylistIE,
|
||||||
)
|
)
|
||||||
|
from .arnes import ArnesIE
|
||||||
from .asiancrush import (
|
from .asiancrush import (
|
||||||
AsianCrushIE,
|
AsianCrushIE,
|
||||||
AsianCrushPlaylistIE,
|
AsianCrushPlaylistIE,
|
||||||
@@ -103,11 +104,13 @@ from .awaan import (
|
|||||||
)
|
)
|
||||||
from .azmedien import AZMedienIE
|
from .azmedien import AZMedienIE
|
||||||
from .baidu import BaiduVideoIE
|
from .baidu import BaiduVideoIE
|
||||||
|
from .bandaichannel import BandaiChannelIE
|
||||||
from .bandcamp import BandcampIE, BandcampAlbumIE, BandcampWeeklyIE
|
from .bandcamp import BandcampIE, BandcampAlbumIE, BandcampWeeklyIE
|
||||||
from .bbc import (
|
from .bbc import (
|
||||||
BBCCoUkIE,
|
BBCCoUkIE,
|
||||||
BBCCoUkArticleIE,
|
BBCCoUkArticleIE,
|
||||||
BBCCoUkIPlayerPlaylistIE,
|
BBCCoUkIPlayerEpisodesIE,
|
||||||
|
BBCCoUkIPlayerGroupIE,
|
||||||
BBCCoUkPlaylistIE,
|
BBCCoUkPlaylistIE,
|
||||||
BBCIE,
|
BBCIE,
|
||||||
)
|
)
|
||||||
@@ -207,7 +210,11 @@ from .cbsnews import (
|
|||||||
CBSNewsIE,
|
CBSNewsIE,
|
||||||
CBSNewsLiveVideoIE,
|
CBSNewsLiveVideoIE,
|
||||||
)
|
)
|
||||||
from .cbssports import CBSSportsIE
|
from .cbssports import (
|
||||||
|
CBSSportsEmbedIE,
|
||||||
|
CBSSportsIE,
|
||||||
|
TwentyFourSevenSportsIE,
|
||||||
|
)
|
||||||
from .ccc import (
|
from .ccc import (
|
||||||
CCCIE,
|
CCCIE,
|
||||||
CCCPlaylistIE,
|
CCCPlaylistIE,
|
||||||
@@ -262,6 +269,7 @@ from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
|
|||||||
from .commonprotocols import (
|
from .commonprotocols import (
|
||||||
MmsIE,
|
MmsIE,
|
||||||
RtmpIE,
|
RtmpIE,
|
||||||
|
ViewSourceIE,
|
||||||
)
|
)
|
||||||
from .condenast import CondeNastIE
|
from .condenast import CondeNastIE
|
||||||
from .contv import CONtvIE
|
from .contv import CONtvIE
|
||||||
@@ -305,6 +313,10 @@ from .democracynow import DemocracynowIE
|
|||||||
from .dfb import DFBIE
|
from .dfb import DFBIE
|
||||||
from .dhm import DHMIE
|
from .dhm import DHMIE
|
||||||
from .digg import DiggIE
|
from .digg import DiggIE
|
||||||
|
from .discoveryplusindia import (
|
||||||
|
DiscoveryPlusIndiaIE,
|
||||||
|
DiscoveryPlusIndiaShowIE,
|
||||||
|
)
|
||||||
from .dotsub import DotsubIE
|
from .dotsub import DotsubIE
|
||||||
from .douyutv import (
|
from .douyutv import (
|
||||||
DouyuShowIE,
|
DouyuShowIE,
|
||||||
@@ -449,10 +461,7 @@ from .gamestar import GameStarIE
|
|||||||
from .gaskrank import GaskrankIE
|
from .gaskrank import GaskrankIE
|
||||||
from .gazeta import GazetaIE
|
from .gazeta import GazetaIE
|
||||||
from .gdcvault import GDCVaultIE
|
from .gdcvault import GDCVaultIE
|
||||||
from .gedi import (
|
from .gedidigital import GediDigitalIE
|
||||||
GediIE,
|
|
||||||
GediEmbedsIE,
|
|
||||||
)
|
|
||||||
from .generic import GenericIE
|
from .generic import GenericIE
|
||||||
from .gfycat import GfycatIE
|
from .gfycat import GfycatIE
|
||||||
from .giantbomb import GiantBombIE
|
from .giantbomb import GiantBombIE
|
||||||
@@ -583,7 +592,11 @@ from .kuwo import (
|
|||||||
KuwoCategoryIE,
|
KuwoCategoryIE,
|
||||||
KuwoMvIE,
|
KuwoMvIE,
|
||||||
)
|
)
|
||||||
from .la7 import LA7IE
|
from .la7 import (
|
||||||
|
LA7IE,
|
||||||
|
LA7PodcastEpisodeIE,
|
||||||
|
LA7PodcastIE,
|
||||||
|
)
|
||||||
from .laola1tv import (
|
from .laola1tv import (
|
||||||
Laola1TvEmbedIE,
|
Laola1TvEmbedIE,
|
||||||
Laola1TvIE,
|
Laola1TvIE,
|
||||||
@@ -624,7 +637,11 @@ from .limelight import (
|
|||||||
LimelightChannelIE,
|
LimelightChannelIE,
|
||||||
LimelightChannelListIE,
|
LimelightChannelListIE,
|
||||||
)
|
)
|
||||||
from .line import LineTVIE
|
from .line import (
|
||||||
|
LineTVIE,
|
||||||
|
LineLiveIE,
|
||||||
|
LineLiveChannelIE,
|
||||||
|
)
|
||||||
from .linkedin import (
|
from .linkedin import (
|
||||||
LinkedInLearningIE,
|
LinkedInLearningIE,
|
||||||
LinkedInLearningCourseIE,
|
LinkedInLearningCourseIE,
|
||||||
@@ -662,6 +679,7 @@ from .mangomolo import (
|
|||||||
MangomoloLiveIE,
|
MangomoloLiveIE,
|
||||||
)
|
)
|
||||||
from .manyvids import ManyVidsIE
|
from .manyvids import ManyVidsIE
|
||||||
|
from .maoritv import MaoriTVIE
|
||||||
from .markiza import (
|
from .markiza import (
|
||||||
MarkizaIE,
|
MarkizaIE,
|
||||||
MarkizaPageIE,
|
MarkizaPageIE,
|
||||||
@@ -710,7 +728,10 @@ from .mixcloud import (
|
|||||||
MixcloudUserIE,
|
MixcloudUserIE,
|
||||||
MixcloudPlaylistIE,
|
MixcloudPlaylistIE,
|
||||||
)
|
)
|
||||||
from .mlb import MLBIE
|
from .mlb import (
|
||||||
|
MLBIE,
|
||||||
|
MLBVideoIE,
|
||||||
|
)
|
||||||
from .mnet import MnetIE
|
from .mnet import MnetIE
|
||||||
from .moevideo import MoeVideoIE
|
from .moevideo import MoeVideoIE
|
||||||
from .mofosex import (
|
from .mofosex import (
|
||||||
@@ -734,9 +755,12 @@ from .mtv import (
|
|||||||
MTVServicesEmbeddedIE,
|
MTVServicesEmbeddedIE,
|
||||||
MTVDEIE,
|
MTVDEIE,
|
||||||
MTVJapanIE,
|
MTVJapanIE,
|
||||||
|
MTVItaliaIE,
|
||||||
|
MTVItaliaProgrammaIE,
|
||||||
)
|
)
|
||||||
from .muenchentv import MuenchenTVIE
|
from .muenchentv import MuenchenTVIE
|
||||||
from .mwave import MwaveIE, MwaveMeetGreetIE
|
from .mwave import MwaveIE, MwaveMeetGreetIE
|
||||||
|
from .mxplayer import MxplayerIE
|
||||||
from .mychannels import MyChannelsIE
|
from .mychannels import MyChannelsIE
|
||||||
from .myspace import MySpaceIE, MySpaceAlbumIE
|
from .myspace import MySpaceIE, MySpaceAlbumIE
|
||||||
from .myspass import MySpassIE
|
from .myspass import MySpassIE
|
||||||
@@ -779,8 +803,9 @@ from .ndr import (
|
|||||||
NJoyEmbedIE,
|
NJoyEmbedIE,
|
||||||
)
|
)
|
||||||
from .ndtv import NDTVIE
|
from .ndtv import NDTVIE
|
||||||
from .netzkino import NetzkinoIE
|
from .nebula import NebulaIE
|
||||||
from .nerdcubed import NerdCubedFeedIE
|
from .nerdcubed import NerdCubedFeedIE
|
||||||
|
from .netzkino import NetzkinoIE
|
||||||
from .neteasemusic import (
|
from .neteasemusic import (
|
||||||
NetEaseMusicIE,
|
NetEaseMusicIE,
|
||||||
NetEaseMusicAlbumIE,
|
NetEaseMusicAlbumIE,
|
||||||
@@ -805,6 +830,7 @@ from .nexx import (
|
|||||||
NexxIE,
|
NexxIE,
|
||||||
NexxEmbedIE,
|
NexxEmbedIE,
|
||||||
)
|
)
|
||||||
|
from .nfhsnetwork import NFHSNetworkIE
|
||||||
from .nfl import (
|
from .nfl import (
|
||||||
NFLIE,
|
NFLIE,
|
||||||
NFLArticleIE,
|
NFLArticleIE,
|
||||||
@@ -821,7 +847,7 @@ from .nick import (
|
|||||||
NickNightIE,
|
NickNightIE,
|
||||||
NickRuIE,
|
NickRuIE,
|
||||||
)
|
)
|
||||||
from .niconico import NiconicoIE, NiconicoPlaylistIE
|
from .niconico import NiconicoIE, NiconicoPlaylistIE, NiconicoUserIE
|
||||||
from .ninecninemedia import NineCNineMediaIE
|
from .ninecninemedia import NineCNineMediaIE
|
||||||
from .ninegag import NineGagIE
|
from .ninegag import NineGagIE
|
||||||
from .ninenow import NineNowIE
|
from .ninenow import NineNowIE
|
||||||
@@ -916,6 +942,11 @@ from .packtpub import (
|
|||||||
PacktPubIE,
|
PacktPubIE,
|
||||||
PacktPubCourseIE,
|
PacktPubCourseIE,
|
||||||
)
|
)
|
||||||
|
from .palcomp3 import (
|
||||||
|
PalcoMP3IE,
|
||||||
|
PalcoMP3ArtistIE,
|
||||||
|
PalcoMP3VideoIE,
|
||||||
|
)
|
||||||
from .pandoratv import PandoraTVIE
|
from .pandoratv import PandoraTVIE
|
||||||
from .parliamentliveuk import ParliamentLiveUKIE
|
from .parliamentliveuk import ParliamentLiveUKIE
|
||||||
from .patreon import PatreonIE
|
from .patreon import PatreonIE
|
||||||
@@ -952,6 +983,7 @@ from .plays import PlaysTVIE
|
|||||||
from .playtvak import PlaytvakIE
|
from .playtvak import PlaytvakIE
|
||||||
from .playvid import PlayvidIE
|
from .playvid import PlayvidIE
|
||||||
from .playwire import PlaywireIE
|
from .playwire import PlaywireIE
|
||||||
|
from .plutotv import PlutoTVIE
|
||||||
from .pluralsight import (
|
from .pluralsight import (
|
||||||
PluralsightIE,
|
PluralsightIE,
|
||||||
PluralsightCourseIE,
|
PluralsightCourseIE,
|
||||||
@@ -1177,7 +1209,10 @@ from .spike import (
|
|||||||
BellatorIE,
|
BellatorIE,
|
||||||
ParamountNetworkIE,
|
ParamountNetworkIE,
|
||||||
)
|
)
|
||||||
from .stitcher import StitcherIE
|
from .stitcher import (
|
||||||
|
StitcherIE,
|
||||||
|
StitcherShowIE,
|
||||||
|
)
|
||||||
from .sport5 import Sport5IE
|
from .sport5 import Sport5IE
|
||||||
from .sportbox import SportBoxIE
|
from .sportbox import SportBoxIE
|
||||||
from .sportdeutschland import SportDeutschlandIE
|
from .sportdeutschland import SportDeutschlandIE
|
||||||
@@ -1558,6 +1593,7 @@ from .weibo import (
|
|||||||
WeiboMobileIE
|
WeiboMobileIE
|
||||||
)
|
)
|
||||||
from .weiqitv import WeiqiTVIE
|
from .weiqitv import WeiqiTVIE
|
||||||
|
from .wimtv import WimTVIE
|
||||||
from .wistia import (
|
from .wistia import (
|
||||||
WistiaIE,
|
WistiaIE,
|
||||||
WistiaPlaylistIE,
|
WistiaPlaylistIE,
|
||||||
@@ -1664,8 +1700,14 @@ from .zattoo import (
|
|||||||
ZattooLiveIE,
|
ZattooLiveIE,
|
||||||
)
|
)
|
||||||
from .zdf import ZDFIE, ZDFChannelIE
|
from .zdf import ZDFIE, ZDFChannelIE
|
||||||
|
from .zee5 import (
|
||||||
|
Zee5IE,
|
||||||
|
Zee5SeriesIE,
|
||||||
|
)
|
||||||
from .zhihu import ZhihuIE
|
from .zhihu import ZhihuIE
|
||||||
from .zingmp3 import ZingMp3IE
|
from .zingmp3 import (
|
||||||
from .zee5 import Zee5IE
|
ZingMp3IE,
|
||||||
|
ZingMp3AlbumIE,
|
||||||
|
)
|
||||||
from .zoom import ZoomIE
|
from .zoom import ZoomIE
|
||||||
from .zype import ZypeIE
|
from .zype import ZypeIE
|
||||||
|
|||||||
@@ -401,7 +401,7 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor):
|
|||||||
(r'player\.load[^;]+src:\s*["\']([^"\']+)',
|
(r'player\.load[^;]+src:\s*["\']([^"\']+)',
|
||||||
r'id-video=([^@]+@[^"]+)',
|
r'id-video=([^@]+@[^"]+)',
|
||||||
r'<a[^>]+href="(?:https?:)?//videos\.francetv\.fr/video/([^@]+@[^"]+)"',
|
r'<a[^>]+href="(?:https?:)?//videos\.francetv\.fr/video/([^@]+@[^"]+)"',
|
||||||
r'data-id="([^"]+)"'),
|
r'data-id=["\']([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'),
|
||||||
webpage, 'video id')
|
webpage, 'video id')
|
||||||
|
|
||||||
return self._make_url_result(video_id)
|
return self._make_url_result(video_id)
|
||||||
|
|||||||
@@ -17,7 +17,7 @@ class FujiTVFODPlus7IE(InfoExtractor):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
formats = self._extract_m3u8_formats(
|
formats = self._extract_m3u8_formats(
|
||||||
self._BASE_URL + 'abr/pc_html5/%s.m3u8' % video_id, video_id)
|
self._BASE_URL + 'abr/pc_html5/%s.m3u8' % video_id, video_id, 'mp4')
|
||||||
for f in formats:
|
for f in formats:
|
||||||
wh = self._BITRATE_MAP.get(f.get('tbr'))
|
wh = self._BITRATE_MAP.get(f.get('tbr'))
|
||||||
if wh:
|
if wh:
|
||||||
|
|||||||
@@ -1,266 +0,0 @@
|
|||||||
# coding: utf-8
|
|
||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from ..compat import compat_str
|
|
||||||
from ..utils import (
|
|
||||||
base_url,
|
|
||||||
url_basename,
|
|
||||||
urljoin,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class GediBaseIE(InfoExtractor):
|
|
||||||
@staticmethod
|
|
||||||
def _clean_audio_fmts(formats):
|
|
||||||
unique_formats = []
|
|
||||||
for f in formats:
|
|
||||||
if 'acodec' in f:
|
|
||||||
unique_formats.append(f)
|
|
||||||
formats[:] = unique_formats
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
video_id = self._match_id(url)
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
|
||||||
player_data = re.findall(
|
|
||||||
r'PlayerFactory\.setParam\(\'(?P<type>.+?)\',\s*\'(?P<name>.+?)\',\s*\'(?P<val>.+?)\'\);',
|
|
||||||
webpage)
|
|
||||||
|
|
||||||
formats = []
|
|
||||||
audio_fmts = []
|
|
||||||
hls_fmts = []
|
|
||||||
http_fmts = []
|
|
||||||
title = ''
|
|
||||||
thumb = ''
|
|
||||||
|
|
||||||
fmt_reg = r'(?P<t>video|audio)-(?P<p>rrtv|hls)-(?P<h>[\w\d]+)(?:-(?P<br>[\w\d]+))?$'
|
|
||||||
br_reg = r'video-rrtv-(?P<br>\d+)-'
|
|
||||||
|
|
||||||
for t, n, v in player_data:
|
|
||||||
if t == 'format':
|
|
||||||
m = re.match(fmt_reg, n)
|
|
||||||
if m:
|
|
||||||
# audio formats
|
|
||||||
if m.group('t') == 'audio':
|
|
||||||
if m.group('p') == 'hls':
|
|
||||||
audio_fmts.extend(self._extract_m3u8_formats(
|
|
||||||
v, video_id, 'm4a', m3u8_id='hls', fatal=False))
|
|
||||||
elif m.group('p') == 'rrtv':
|
|
||||||
audio_fmts.append({
|
|
||||||
'format_id': 'mp3',
|
|
||||||
'url': v,
|
|
||||||
'tbr': 128,
|
|
||||||
'ext': 'mp3',
|
|
||||||
'vcodec': 'none',
|
|
||||||
'acodec': 'mp3',
|
|
||||||
})
|
|
||||||
|
|
||||||
# video formats
|
|
||||||
elif m.group('t') == 'video':
|
|
||||||
# hls manifest video
|
|
||||||
if m.group('p') == 'hls':
|
|
||||||
hls_fmts.extend(self._extract_m3u8_formats(
|
|
||||||
v, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
|
||||||
# direct mp4 video
|
|
||||||
elif m.group('p') == 'rrtv':
|
|
||||||
if not m.group('br'):
|
|
||||||
mm = re.search(br_reg, v)
|
|
||||||
http_fmts.append({
|
|
||||||
'format_id': 'https-' + m.group('h'),
|
|
||||||
'protocol': 'https',
|
|
||||||
'url': v,
|
|
||||||
'tbr': int(m.group('br')) if m.group('br') else
|
|
||||||
(int(mm.group('br')) if mm.group('br') else 0),
|
|
||||||
'height': int(m.group('h'))
|
|
||||||
})
|
|
||||||
|
|
||||||
elif t == 'param':
|
|
||||||
if n == 'videotitle':
|
|
||||||
title = v
|
|
||||||
if n == 'image_full_play':
|
|
||||||
thumb = v
|
|
||||||
|
|
||||||
title = self._og_search_title(webpage) if title == '' else title
|
|
||||||
|
|
||||||
# clean weird char
|
|
||||||
title = compat_str(title).encode('utf8', 'replace').replace(b'\xc3\x82', b'').decode('utf8', 'replace')
|
|
||||||
|
|
||||||
if audio_fmts:
|
|
||||||
self._clean_audio_fmts(audio_fmts)
|
|
||||||
self._sort_formats(audio_fmts)
|
|
||||||
if hls_fmts:
|
|
||||||
self._sort_formats(hls_fmts)
|
|
||||||
if http_fmts:
|
|
||||||
self._sort_formats(http_fmts)
|
|
||||||
|
|
||||||
formats.extend(audio_fmts)
|
|
||||||
formats.extend(hls_fmts)
|
|
||||||
formats.extend(http_fmts)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'title': title,
|
|
||||||
'description': self._html_search_meta('twitter:description', webpage),
|
|
||||||
'thumbnail': thumb,
|
|
||||||
'formats': formats,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class GediIE(GediBaseIE):
|
|
||||||
_VALID_URL = r'''(?x)https?://video\.
|
|
||||||
(?:
|
|
||||||
(?:espresso\.)?repubblica
|
|
||||||
|lastampa
|
|
||||||
|huffingtonpost
|
|
||||||
|ilsecoloxix
|
|
||||||
|iltirreno
|
|
||||||
|messaggeroveneto
|
|
||||||
|ilpiccolo
|
|
||||||
|gazzettadimantova
|
|
||||||
|mattinopadova
|
|
||||||
|laprovinciapavese
|
|
||||||
|tribunatreviso
|
|
||||||
|nuovavenezia
|
|
||||||
|gazzettadimodena
|
|
||||||
|lanuovaferrara
|
|
||||||
|corrierealpi
|
|
||||||
|lasentinella
|
|
||||||
)
|
|
||||||
(?:\.gelocal)?\.it/(?!embed/).+?/(?P<id>[\d/]+)(?:\?|\&|$)'''
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'https://video.lastampa.it/politica/il-paradosso-delle-regionali-la-lega-vince-ma-sembra-aver-perso/121559/121683',
|
|
||||||
'md5': '84658d7fb9e55a6e57ecc77b73137494',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '121559/121683',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Il paradosso delle Regionali: ecco perché la Lega vince ma sembra aver perso',
|
|
||||||
'description': 'md5:de7f4d6eaaaf36c153b599b10f8ce7ca',
|
|
||||||
'thumbnail': r're:^https://www\.repstatic\.it/video/photo/.+?-thumb-social-play\.jpg$',
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'url': 'https://video.repubblica.it/motori/record-della-pista-a-spa-francorchamps-la-pagani-huayra-roadster-bc-stupisce/367415/367963',
|
|
||||||
'md5': 'e763b94b7920799a0e0e23ffefa2d157',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '367415/367963',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Record della pista a Spa Francorchamps, la Pagani Huayra Roadster BC stupisce',
|
|
||||||
'description': 'md5:5deb503cefe734a3eb3f07ed74303920',
|
|
||||||
'thumbnail': r're:^https://www\.repstatic\.it/video/photo/.+?-thumb-social-play\.jpg$',
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'url': 'https://video.ilsecoloxix.it/sport/cassani-e-i-brividi-azzurri-ai-mondiali-di-imola-qui-mi-sono-innamorato-del-ciclismo-da-ragazzino-incredibile-tornarci-da-ct/66184/66267',
|
|
||||||
'md5': 'e48108e97b1af137d22a8469f2019057',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '66184/66267',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Cassani e i brividi azzurri ai Mondiali di Imola: \\"Qui mi sono innamorato del ciclismo da ragazzino, incredibile tornarci da ct\\"',
|
|
||||||
'description': 'md5:fc9c50894f70a2469bb9b54d3d0a3d3b',
|
|
||||||
'thumbnail': r're:^https://www\.repstatic\.it/video/photo/.+?-thumb-social-play\.jpg$',
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'url': 'https://video.iltirreno.gelocal.it/sport/dentro-la-notizia-ferrari-cosa-succede-a-maranello/141059/142723',
|
|
||||||
'md5': 'a6e39f3bdc1842bbd92abbbbef230817',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '141059/142723',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Dentro la notizia - Ferrari, cosa succede a Maranello',
|
|
||||||
'description': 'md5:9907d65b53765681fa3a0b3122617c1f',
|
|
||||||
'thumbnail': r're:^https://www\.repstatic\.it/video/photo/.+?-thumb-social-play\.jpg$',
|
|
||||||
},
|
|
||||||
}]
|
|
||||||
|
|
||||||
|
|
||||||
class GediEmbedsIE(GediBaseIE):
|
|
||||||
_VALID_URL = r'''(?x)https?://video\.
|
|
||||||
(?:
|
|
||||||
(?:espresso\.)?repubblica
|
|
||||||
|lastampa
|
|
||||||
|huffingtonpost
|
|
||||||
|ilsecoloxix
|
|
||||||
|iltirreno
|
|
||||||
|messaggeroveneto
|
|
||||||
|ilpiccolo
|
|
||||||
|gazzettadimantova
|
|
||||||
|mattinopadova
|
|
||||||
|laprovinciapavese
|
|
||||||
|tribunatreviso
|
|
||||||
|nuovavenezia
|
|
||||||
|gazzettadimodena
|
|
||||||
|lanuovaferrara
|
|
||||||
|corrierealpi
|
|
||||||
|lasentinella
|
|
||||||
)
|
|
||||||
(?:\.gelocal)?\.it/embed/.+?/(?P<id>[\d/]+)(?:\?|\&|$)'''
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'https://video.huffingtonpost.it/embed/politica/cotticelli-non-so-cosa-mi-sia-successo-sto-cercando-di-capire-se-ho-avuto-un-malore/29312/29276?responsive=true&el=video971040871621586700',
|
|
||||||
'md5': 'f4ac23cadfea7fef89bea536583fa7ed',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '29312/29276',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Cotticelli: \\"Non so cosa mi sia successo. Sto cercando di capire se ho avuto un malore\\"',
|
|
||||||
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
|
|
||||||
'thumbnail': r're:^https://www\.repstatic\.it/video/photo/.+?-thumb-social-play\.jpg$',
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'url': 'https://video.espresso.repubblica.it/embed/tutti-i-video/01-ted-villa/14772/14870&width=640&height=360',
|
|
||||||
'md5': '0391c2c83c6506581003aaf0255889c0',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '14772/14870',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Festival EMERGENCY, Villa: «La buona informazione aiuta la salute» (14772-14870)',
|
|
||||||
'description': 'md5:2bce954d278248f3c950be355b7c2226',
|
|
||||||
'thumbnail': r're:^https://www\.repstatic\.it/video/photo/.+?-thumb-social-play\.jpg$',
|
|
||||||
},
|
|
||||||
}]
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def _sanitize_urls(urls):
|
|
||||||
# add protocol if missing
|
|
||||||
for i, e in enumerate(urls):
|
|
||||||
if e.startswith('//'):
|
|
||||||
urls[i] = 'https:%s' % e
|
|
||||||
# clean iframes urls
|
|
||||||
for i, e in enumerate(urls):
|
|
||||||
urls[i] = urljoin(base_url(e), url_basename(e))
|
|
||||||
return urls
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def _extract_urls(webpage):
|
|
||||||
entries = [
|
|
||||||
mobj.group('url')
|
|
||||||
for mobj in re.finditer(r'''(?x)
|
|
||||||
(?:
|
|
||||||
data-frame-src=|
|
|
||||||
<iframe[^\n]+src=
|
|
||||||
)
|
|
||||||
(["'])
|
|
||||||
(?P<url>https?://video\.
|
|
||||||
(?:
|
|
||||||
(?:espresso\.)?repubblica
|
|
||||||
|lastampa
|
|
||||||
|huffingtonpost
|
|
||||||
|ilsecoloxix
|
|
||||||
|iltirreno
|
|
||||||
|messaggeroveneto
|
|
||||||
|ilpiccolo
|
|
||||||
|gazzettadimantova
|
|
||||||
|mattinopadova
|
|
||||||
|laprovinciapavese
|
|
||||||
|tribunatreviso
|
|
||||||
|nuovavenezia
|
|
||||||
|gazzettadimodena
|
|
||||||
|lanuovaferrara
|
|
||||||
|corrierealpi
|
|
||||||
|lasentinella
|
|
||||||
)
|
|
||||||
(?:\.gelocal)?\.it/embed/.+?)
|
|
||||||
\1''', webpage)]
|
|
||||||
return GediEmbedsIE._sanitize_urls(entries)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def _extract_url(webpage):
|
|
||||||
urls = GediEmbedsIE._extract_urls(webpage)
|
|
||||||
return urls[0] if urls else None
|
|
||||||
210
yt_dlp/extractor/gedidigital.py
Normal file
210
yt_dlp/extractor/gedidigital.py
Normal file
@@ -0,0 +1,210 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
base_url,
|
||||||
|
determine_ext,
|
||||||
|
int_or_none,
|
||||||
|
url_basename,
|
||||||
|
urljoin,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class GediDigitalIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'''(?x)(?P<url>(?:https?:)//video\.
|
||||||
|
(?:
|
||||||
|
(?:
|
||||||
|
(?:espresso\.)?repubblica
|
||||||
|
|lastampa
|
||||||
|
|ilsecoloxix
|
||||||
|
|huffingtonpost
|
||||||
|
)|
|
||||||
|
(?:
|
||||||
|
iltirreno
|
||||||
|
|messaggeroveneto
|
||||||
|
|ilpiccolo
|
||||||
|
|gazzettadimantova
|
||||||
|
|mattinopadova
|
||||||
|
|laprovinciapavese
|
||||||
|
|tribunatreviso
|
||||||
|
|nuovavenezia
|
||||||
|
|gazzettadimodena
|
||||||
|
|lanuovaferrara
|
||||||
|
|corrierealpi
|
||||||
|
|lasentinella
|
||||||
|
)\.gelocal
|
||||||
|
)\.it(?:/[^/]+){2,4}/(?P<id>\d+))(?:$|[?&].*)'''
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://video.lastampa.it/politica/il-paradosso-delle-regionali-la-lega-vince-ma-sembra-aver-perso/121559/121683',
|
||||||
|
'md5': '84658d7fb9e55a6e57ecc77b73137494',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '121683',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Il paradosso delle Regionali: ecco perché la Lega vince ma sembra aver perso',
|
||||||
|
'description': 'md5:de7f4d6eaaaf36c153b599b10f8ce7ca',
|
||||||
|
'thumbnail': r're:^https://www\.repstatic\.it/video/photo/.+?-thumb-full-.+?\.jpg$',
|
||||||
|
'duration': 125,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://video.huffingtonpost.it/embed/politica/cotticelli-non-so-cosa-mi-sia-successo-sto-cercando-di-capire-se-ho-avuto-un-malore/29312/29276?responsive=true&el=video971040871621586700',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://video.espresso.repubblica.it/embed/tutti-i-video/01-ted-villa/14772/14870&width=640&height=360',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://video.repubblica.it/motori/record-della-pista-a-spa-francorchamps-la-pagani-huayra-roadster-bc-stupisce/367415/367963',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://video.ilsecoloxix.it/sport/cassani-e-i-brividi-azzurri-ai-mondiali-di-imola-qui-mi-sono-innamorato-del-ciclismo-da-ragazzino-incredibile-tornarci-da-ct/66184/66267',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://video.iltirreno.gelocal.it/sport/dentro-la-notizia-ferrari-cosa-succede-a-maranello/141059/142723',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://video.messaggeroveneto.gelocal.it/locale/maria-giovanna-elmi-covid-vaccino/138155/139268',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://video.ilpiccolo.gelocal.it/dossier/big-john/dinosauro-big-john-al-via-le-visite-guidate-a-trieste/135226/135751',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://video.gazzettadimantova.gelocal.it/locale/dal-ponte-visconteo-di-valeggio-l-and-8217sos-dei-ristoratori-aprire-anche-a-cena/137310/137818',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://video.mattinopadova.gelocal.it/dossier/coronavirus-in-veneto/covid-a-vo-un-anno-dopo-un-cuore-tricolore-per-non-dimenticare/138402/138964',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://video.laprovinciapavese.gelocal.it/locale/mede-zona-rossa-via-alle-vaccinazioni-per-gli-over-80/137545/138120',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://video.tribunatreviso.gelocal.it/dossier/coronavirus-in-veneto/ecco-le-prima-vaccinazioni-di-massa-nella-marca/134485/135024',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://video.nuovavenezia.gelocal.it/locale/camion-troppo-alto-per-il-ponte-ferroviario-perde-il-carico/135734/136266',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://video.gazzettadimodena.gelocal.it/locale/modena-scoperta-la-proteina-che-predice-il-livello-di-gravita-del-covid/139109/139796',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://video.lanuovaferrara.gelocal.it/locale/due-bombole-di-gpl-aperte-e-abbandonate-i-vigili-bruciano-il-gas/134391/134957',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://video.corrierealpi.gelocal.it/dossier/cortina-2021-i-mondiali-di-sci-alpino/mondiali-di-sci-il-timelapse-sulla-splendida-olympia/133760/134331',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://video.lasentinella.gelocal.it/locale/vestigne-centra-un-auto-e-si-ribalta/138931/139466',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://video.espresso.repubblica.it/tutti-i-video/01-ted-villa/14772',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _sanitize_urls(urls):
|
||||||
|
# add protocol if missing
|
||||||
|
for i, e in enumerate(urls):
|
||||||
|
if e.startswith('//'):
|
||||||
|
urls[i] = 'https:%s' % e
|
||||||
|
# clean iframes urls
|
||||||
|
for i, e in enumerate(urls):
|
||||||
|
urls[i] = urljoin(base_url(e), url_basename(e))
|
||||||
|
return urls
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _extract_urls(webpage):
|
||||||
|
entries = [
|
||||||
|
mobj.group('eurl')
|
||||||
|
for mobj in re.finditer(r'''(?x)
|
||||||
|
(?:
|
||||||
|
data-frame-src=|
|
||||||
|
<iframe[^\n]+src=
|
||||||
|
)
|
||||||
|
(["'])(?P<eurl>%s)\1''' % GediDigitalIE._VALID_URL, webpage)]
|
||||||
|
return GediDigitalIE._sanitize_urls(entries)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _extract_url(webpage):
|
||||||
|
urls = GediDigitalIE._extract_urls(webpage)
|
||||||
|
return urls[0] if urls else None
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _clean_formats(formats):
|
||||||
|
format_urls = set()
|
||||||
|
clean_formats = []
|
||||||
|
for f in formats:
|
||||||
|
if f['url'] not in format_urls:
|
||||||
|
if f.get('audio_ext') != 'none' and not f.get('acodec'):
|
||||||
|
continue
|
||||||
|
format_urls.add(f['url'])
|
||||||
|
clean_formats.append(f)
|
||||||
|
formats[:] = clean_formats
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
url = re.match(self._VALID_URL, url).group('url')
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
title = self._html_search_meta(
|
||||||
|
['twitter:title', 'og:title'], webpage, fatal=True)
|
||||||
|
player_data = re.findall(
|
||||||
|
r"PlayerFactory\.setParam\('(?P<type>format|param)',\s*'(?P<name>[^']+)',\s*'(?P<val>[^']+)'\);",
|
||||||
|
webpage)
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
duration = thumb = None
|
||||||
|
for t, n, v in player_data:
|
||||||
|
if t == 'format':
|
||||||
|
if n in ('video-hds-vod-ec', 'video-hls-vod-ec', 'video-viralize', 'video-youtube-pfp'):
|
||||||
|
continue
|
||||||
|
elif n.endswith('-vod-ak'):
|
||||||
|
formats.extend(self._extract_akamai_formats(
|
||||||
|
v, video_id, {'http': 'media.gedidigital.it'}))
|
||||||
|
else:
|
||||||
|
ext = determine_ext(v)
|
||||||
|
if ext == 'm3u8':
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
v, video_id, 'mp4', 'm3u8_native', m3u8_id=n, fatal=False))
|
||||||
|
continue
|
||||||
|
f = {
|
||||||
|
'format_id': n,
|
||||||
|
'url': v,
|
||||||
|
}
|
||||||
|
if ext == 'mp3':
|
||||||
|
abr = int_or_none(self._search_regex(
|
||||||
|
r'-mp3-audio-(\d+)', v, 'abr', default=None))
|
||||||
|
f.update({
|
||||||
|
'abr': abr,
|
||||||
|
'tbr': abr,
|
||||||
|
'acodec': ext,
|
||||||
|
'vcodec': 'none'
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
mobj = re.match(r'^video-rrtv-(\d+)(?:-(\d+))?$', n)
|
||||||
|
if mobj:
|
||||||
|
f.update({
|
||||||
|
'height': int(mobj.group(1)),
|
||||||
|
'vbr': int_or_none(mobj.group(2)),
|
||||||
|
})
|
||||||
|
if not f.get('vbr'):
|
||||||
|
f['vbr'] = int_or_none(self._search_regex(
|
||||||
|
r'-video-rrtv-(\d+)', v, 'abr', default=None))
|
||||||
|
formats.append(f)
|
||||||
|
elif t == 'param':
|
||||||
|
if n in ['image_full', 'image']:
|
||||||
|
thumb = v
|
||||||
|
elif n == 'videoDuration':
|
||||||
|
duration = int_or_none(v)
|
||||||
|
|
||||||
|
self._clean_formats(formats)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': self._html_search_meta(
|
||||||
|
['twitter:description', 'og:description', 'description'], webpage),
|
||||||
|
'thumbnail': thumb or self._og_search_thumbnail(webpage),
|
||||||
|
'formats': formats,
|
||||||
|
'duration': duration,
|
||||||
|
}
|
||||||
@@ -127,13 +127,14 @@ from .expressen import ExpressenIE
|
|||||||
from .zype import ZypeIE
|
from .zype import ZypeIE
|
||||||
from .odnoklassniki import OdnoklassnikiIE
|
from .odnoklassniki import OdnoklassnikiIE
|
||||||
from .kinja import KinjaEmbedIE
|
from .kinja import KinjaEmbedIE
|
||||||
from .gedi import GediEmbedsIE
|
from .gedidigital import GediDigitalIE
|
||||||
from .rcs import RCSEmbedsIE
|
from .rcs import RCSEmbedsIE
|
||||||
from .bitchute import BitChuteIE
|
from .bitchute import BitChuteIE
|
||||||
from .rumble import RumbleEmbedIE
|
from .rumble import RumbleEmbedIE
|
||||||
from .arcpublishing import ArcPublishingIE
|
from .arcpublishing import ArcPublishingIE
|
||||||
from .medialaan import MedialaanIE
|
from .medialaan import MedialaanIE
|
||||||
from .simplecast import SimplecastIE
|
from .simplecast import SimplecastIE
|
||||||
|
from .wimtv import WimTVIE
|
||||||
|
|
||||||
|
|
||||||
class GenericIE(InfoExtractor):
|
class GenericIE(InfoExtractor):
|
||||||
@@ -2250,6 +2251,15 @@ class GenericIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
'playlist_mincount': 52,
|
'playlist_mincount': 52,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
# WimTv embed player
|
||||||
|
'url': 'http://www.msmotor.tv/wearefmi-pt-2-2021/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'wearefmi-pt-2-2021',
|
||||||
|
'title': '#WEAREFMI – PT.2 – 2021 – MsMotorTV',
|
||||||
|
},
|
||||||
|
'playlist_count': 1,
|
||||||
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
def report_following_redirect(self, new_url):
|
def report_following_redirect(self, new_url):
|
||||||
@@ -2649,6 +2659,15 @@ class GenericIE(InfoExtractor):
|
|||||||
if vid_me_embed_url is not None:
|
if vid_me_embed_url is not None:
|
||||||
return self.url_result(vid_me_embed_url, 'Vidme')
|
return self.url_result(vid_me_embed_url, 'Vidme')
|
||||||
|
|
||||||
|
# Invidious Instances
|
||||||
|
# https://github.com/yt-dlp/yt-dlp/issues/195
|
||||||
|
# https://github.com/iv-org/invidious/pull/1730
|
||||||
|
youtube_url = self._search_regex(
|
||||||
|
r'<link rel="alternate" href="(https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"',
|
||||||
|
webpage, 'youtube link', default=None)
|
||||||
|
if youtube_url:
|
||||||
|
return self.url_result(youtube_url, YoutubeIE.ie_key())
|
||||||
|
|
||||||
# Look for YouTube embeds
|
# Look for YouTube embeds
|
||||||
youtube_urls = YoutubeIE._extract_urls(webpage)
|
youtube_urls = YoutubeIE._extract_urls(webpage)
|
||||||
if youtube_urls:
|
if youtube_urls:
|
||||||
@@ -2955,7 +2974,7 @@ class GenericIE(InfoExtractor):
|
|||||||
webpage)
|
webpage)
|
||||||
if not mobj:
|
if not mobj:
|
||||||
mobj = re.search(
|
mobj = re.search(
|
||||||
r'data-video-link=["\'](?P<url>http://m.mlb.com/video/[^"\']+)',
|
r'data-video-link=["\'](?P<url>http://m\.mlb\.com/video/[^"\']+)',
|
||||||
webpage)
|
webpage)
|
||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
return self.url_result(mobj.group('url'), 'MLB')
|
return self.url_result(mobj.group('url'), 'MLB')
|
||||||
@@ -3339,17 +3358,22 @@ class GenericIE(InfoExtractor):
|
|||||||
return self.playlist_from_matches(
|
return self.playlist_from_matches(
|
||||||
zype_urls, video_id, video_title, ie=ZypeIE.ie_key())
|
zype_urls, video_id, video_title, ie=ZypeIE.ie_key())
|
||||||
|
|
||||||
# Look for RCS media group embeds
|
gedi_urls = GediDigitalIE._extract_urls(webpage)
|
||||||
gedi_urls = GediEmbedsIE._extract_urls(webpage)
|
|
||||||
if gedi_urls:
|
if gedi_urls:
|
||||||
return self.playlist_from_matches(
|
return self.playlist_from_matches(
|
||||||
gedi_urls, video_id, video_title, ie=GediEmbedsIE.ie_key())
|
gedi_urls, video_id, video_title, ie=GediDigitalIE.ie_key())
|
||||||
|
|
||||||
|
# Look for RCS media group embeds
|
||||||
rcs_urls = RCSEmbedsIE._extract_urls(webpage)
|
rcs_urls = RCSEmbedsIE._extract_urls(webpage)
|
||||||
if rcs_urls:
|
if rcs_urls:
|
||||||
return self.playlist_from_matches(
|
return self.playlist_from_matches(
|
||||||
rcs_urls, video_id, video_title, ie=RCSEmbedsIE.ie_key())
|
rcs_urls, video_id, video_title, ie=RCSEmbedsIE.ie_key())
|
||||||
|
|
||||||
|
wimtv_urls = WimTVIE._extract_urls(webpage)
|
||||||
|
if wimtv_urls:
|
||||||
|
return self.playlist_from_matches(
|
||||||
|
wimtv_urls, video_id, video_title, ie=WimTVIE.ie_key())
|
||||||
|
|
||||||
bitchute_urls = BitChuteIE._extract_urls(webpage)
|
bitchute_urls = BitChuteIE._extract_urls(webpage)
|
||||||
if bitchute_urls:
|
if bitchute_urls:
|
||||||
return self.playlist_from_matches(
|
return self.playlist_from_matches(
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ from __future__ import unicode_literals
|
|||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from yt_dlp.utils import int_or_none, unified_timestamp, unescapeHTML
|
from ..utils import int_or_none, unified_timestamp, unescapeHTML
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ from ..compat import (
|
|||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
float_or_none,
|
||||||
get_element_by_attribute,
|
get_element_by_attribute,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
lowercase_escape,
|
lowercase_escape,
|
||||||
@@ -32,6 +33,7 @@ class InstagramIE(InfoExtractor):
|
|||||||
'title': 'Video by naomipq',
|
'title': 'Video by naomipq',
|
||||||
'description': 'md5:1f17f0ab29bd6fe2bfad705f58de3cb8',
|
'description': 'md5:1f17f0ab29bd6fe2bfad705f58de3cb8',
|
||||||
'thumbnail': r're:^https?://.*\.jpg',
|
'thumbnail': r're:^https?://.*\.jpg',
|
||||||
|
'duration': 0,
|
||||||
'timestamp': 1371748545,
|
'timestamp': 1371748545,
|
||||||
'upload_date': '20130620',
|
'upload_date': '20130620',
|
||||||
'uploader_id': 'naomipq',
|
'uploader_id': 'naomipq',
|
||||||
@@ -48,6 +50,7 @@ class InstagramIE(InfoExtractor):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Video by britneyspears',
|
'title': 'Video by britneyspears',
|
||||||
'thumbnail': r're:^https?://.*\.jpg',
|
'thumbnail': r're:^https?://.*\.jpg',
|
||||||
|
'duration': 0,
|
||||||
'timestamp': 1453760977,
|
'timestamp': 1453760977,
|
||||||
'upload_date': '20160125',
|
'upload_date': '20160125',
|
||||||
'uploader_id': 'britneyspears',
|
'uploader_id': 'britneyspears',
|
||||||
@@ -86,6 +89,24 @@ class InstagramIE(InfoExtractor):
|
|||||||
'title': 'Post by instagram',
|
'title': 'Post by instagram',
|
||||||
'description': 'md5:0f9203fc6a2ce4d228da5754bcf54957',
|
'description': 'md5:0f9203fc6a2ce4d228da5754bcf54957',
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
# IGTV
|
||||||
|
'url': 'https://www.instagram.com/tv/BkfuX9UB-eK/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'BkfuX9UB-eK',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Fingerboarding Tricks with @cass.fb',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg',
|
||||||
|
'duration': 53.83,
|
||||||
|
'timestamp': 1530032919,
|
||||||
|
'upload_date': '20180626',
|
||||||
|
'uploader_id': 'instagram',
|
||||||
|
'uploader': 'Instagram',
|
||||||
|
'like_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
'comments': list,
|
||||||
|
'description': 'Meet Cass Hirst (@cass.fb), a fingerboarding pro who can perform tiny ollies and kickflips while blindfolded.',
|
||||||
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://instagram.com/p/-Cmh1cukG2/',
|
'url': 'https://instagram.com/p/-Cmh1cukG2/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@@ -159,7 +180,9 @@ class InstagramIE(InfoExtractor):
|
|||||||
description = try_get(
|
description = try_get(
|
||||||
media, lambda x: x['edge_media_to_caption']['edges'][0]['node']['text'],
|
media, lambda x: x['edge_media_to_caption']['edges'][0]['node']['text'],
|
||||||
compat_str) or media.get('caption')
|
compat_str) or media.get('caption')
|
||||||
|
title = media.get('title')
|
||||||
thumbnail = media.get('display_src') or media.get('display_url')
|
thumbnail = media.get('display_src') or media.get('display_url')
|
||||||
|
duration = float_or_none(media.get('video_duration'))
|
||||||
timestamp = int_or_none(media.get('taken_at_timestamp') or media.get('date'))
|
timestamp = int_or_none(media.get('taken_at_timestamp') or media.get('date'))
|
||||||
uploader = media.get('owner', {}).get('full_name')
|
uploader = media.get('owner', {}).get('full_name')
|
||||||
uploader_id = media.get('owner', {}).get('username')
|
uploader_id = media.get('owner', {}).get('username')
|
||||||
@@ -200,9 +223,10 @@ class InstagramIE(InfoExtractor):
|
|||||||
continue
|
continue
|
||||||
entries.append({
|
entries.append({
|
||||||
'id': node.get('shortcode') or node['id'],
|
'id': node.get('shortcode') or node['id'],
|
||||||
'title': 'Video %d' % edge_num,
|
'title': node.get('title') or 'Video %d' % edge_num,
|
||||||
'url': node_video_url,
|
'url': node_video_url,
|
||||||
'thumbnail': node.get('display_url'),
|
'thumbnail': node.get('display_url'),
|
||||||
|
'duration': float_or_none(node.get('video_duration')),
|
||||||
'width': int_or_none(try_get(node, lambda x: x['dimensions']['width'])),
|
'width': int_or_none(try_get(node, lambda x: x['dimensions']['width'])),
|
||||||
'height': int_or_none(try_get(node, lambda x: x['dimensions']['height'])),
|
'height': int_or_none(try_get(node, lambda x: x['dimensions']['height'])),
|
||||||
'view_count': int_or_none(node.get('video_view_count')),
|
'view_count': int_or_none(node.get('video_view_count')),
|
||||||
@@ -239,8 +263,9 @@ class InstagramIE(InfoExtractor):
|
|||||||
'id': video_id,
|
'id': video_id,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Video by %s' % uploader_id,
|
'title': title or 'Video by %s' % uploader_id,
|
||||||
'description': description,
|
'description': description,
|
||||||
|
'duration': duration,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
'timestamp': timestamp,
|
'timestamp': timestamp,
|
||||||
'uploader_id': uploader_id,
|
'uploader_id': uploader_id,
|
||||||
|
|||||||
@@ -146,7 +146,7 @@ class IviIE(InfoExtractor):
|
|||||||
expected=True)
|
expected=True)
|
||||||
elif not pycryptodomex_found:
|
elif not pycryptodomex_found:
|
||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
'pycryptodomex not found. Please install it.',
|
'pycryptodomex not found. Please install',
|
||||||
expected=True)
|
expected=True)
|
||||||
elif message:
|
elif message:
|
||||||
extractor_msg += ': ' + message
|
extractor_msg += ': ' + message
|
||||||
|
|||||||
@@ -29,34 +29,51 @@ class JamendoIE(InfoExtractor):
|
|||||||
'id': '196219',
|
'id': '196219',
|
||||||
'display_id': 'stories-from-emona-i',
|
'display_id': 'stories-from-emona-i',
|
||||||
'ext': 'flac',
|
'ext': 'flac',
|
||||||
'title': 'Maya Filipič - Stories from Emona I',
|
# 'title': 'Maya Filipič - Stories from Emona I',
|
||||||
'artist': 'Maya Filipič',
|
'title': 'Stories from Emona I',
|
||||||
|
# 'artist': 'Maya Filipič',
|
||||||
'track': 'Stories from Emona I',
|
'track': 'Stories from Emona I',
|
||||||
'duration': 210,
|
'duration': 210,
|
||||||
'thumbnail': r're:^https?://.*\.jpg',
|
'thumbnail': r're:^https?://.*\.jpg',
|
||||||
'timestamp': 1217438117,
|
'timestamp': 1217438117,
|
||||||
'upload_date': '20080730',
|
'upload_date': '20080730',
|
||||||
|
'license': 'by-nc-nd',
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'average_rating': int,
|
||||||
|
'tags': ['piano', 'peaceful', 'newage', 'strings', 'upbeat'],
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://licensing.jamendo.com/en/track/1496667/energetic-rock',
|
'url': 'https://licensing.jamendo.com/en/track/1496667/energetic-rock',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
def _call_api(self, resource, resource_id):
|
||||||
|
path = '/api/%ss' % resource
|
||||||
|
rand = compat_str(random.random())
|
||||||
|
return self._download_json(
|
||||||
|
'https://www.jamendo.com' + path, resource_id, query={
|
||||||
|
'id[]': resource_id,
|
||||||
|
}, headers={
|
||||||
|
'X-Jam-Call': '$%s*%s~' % (hashlib.sha1((path + rand).encode()).hexdigest(), rand)
|
||||||
|
})[0]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
track_id, display_id = self._VALID_URL_RE.match(url).groups()
|
track_id, display_id = self._VALID_URL_RE.match(url).groups()
|
||||||
webpage = self._download_webpage(
|
# webpage = self._download_webpage(
|
||||||
'https://www.jamendo.com/track/' + track_id, track_id)
|
# 'https://www.jamendo.com/track/' + track_id, track_id)
|
||||||
models = self._parse_json(self._html_search_regex(
|
# models = self._parse_json(self._html_search_regex(
|
||||||
r"data-bundled-models='([^']+)",
|
# r"data-bundled-models='([^']+)",
|
||||||
webpage, 'bundled models'), track_id)
|
# webpage, 'bundled models'), track_id)
|
||||||
track = models['track']['models'][0]
|
# track = models['track']['models'][0]
|
||||||
|
track = self._call_api('track', track_id)
|
||||||
title = track_name = track['name']
|
title = track_name = track['name']
|
||||||
get_model = lambda x: try_get(models, lambda y: y[x]['models'][0], dict) or {}
|
# get_model = lambda x: try_get(models, lambda y: y[x]['models'][0], dict) or {}
|
||||||
artist = get_model('artist')
|
# artist = get_model('artist')
|
||||||
artist_name = artist.get('name')
|
# artist_name = artist.get('name')
|
||||||
if artist_name:
|
# if artist_name:
|
||||||
title = '%s - %s' % (artist_name, title)
|
# title = '%s - %s' % (artist_name, title)
|
||||||
album = get_model('album')
|
# album = get_model('album')
|
||||||
|
|
||||||
formats = [{
|
formats = [{
|
||||||
'url': 'https://%s.jamendo.com/?trackid=%s&format=%s&from=app-97dab294'
|
'url': 'https://%s.jamendo.com/?trackid=%s&format=%s&from=app-97dab294'
|
||||||
@@ -74,7 +91,7 @@ class JamendoIE(InfoExtractor):
|
|||||||
|
|
||||||
urls = []
|
urls = []
|
||||||
thumbnails = []
|
thumbnails = []
|
||||||
for _, covers in track.get('cover', {}).items():
|
for covers in (track.get('cover') or {}).values():
|
||||||
for cover_id, cover_url in covers.items():
|
for cover_id, cover_url in covers.items():
|
||||||
if not cover_url or cover_url in urls:
|
if not cover_url or cover_url in urls:
|
||||||
continue
|
continue
|
||||||
@@ -88,13 +105,14 @@ class JamendoIE(InfoExtractor):
|
|||||||
})
|
})
|
||||||
|
|
||||||
tags = []
|
tags = []
|
||||||
for tag in track.get('tags', []):
|
for tag in (track.get('tags') or []):
|
||||||
tag_name = tag.get('name')
|
tag_name = tag.get('name')
|
||||||
if not tag_name:
|
if not tag_name:
|
||||||
continue
|
continue
|
||||||
tags.append(tag_name)
|
tags.append(tag_name)
|
||||||
|
|
||||||
stats = track.get('stats') or {}
|
stats = track.get('stats') or {}
|
||||||
|
license = track.get('licenseCC') or []
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': track_id,
|
'id': track_id,
|
||||||
@@ -103,11 +121,11 @@ class JamendoIE(InfoExtractor):
|
|||||||
'title': title,
|
'title': title,
|
||||||
'description': track.get('description'),
|
'description': track.get('description'),
|
||||||
'duration': int_or_none(track.get('duration')),
|
'duration': int_or_none(track.get('duration')),
|
||||||
'artist': artist_name,
|
# 'artist': artist_name,
|
||||||
'track': track_name,
|
'track': track_name,
|
||||||
'album': album.get('name'),
|
# 'album': album.get('name'),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'license': '-'.join(track.get('licenseCC', [])) or None,
|
'license': '-'.join(license) if license else None,
|
||||||
'timestamp': int_or_none(track.get('dateCreated')),
|
'timestamp': int_or_none(track.get('dateCreated')),
|
||||||
'view_count': int_or_none(stats.get('listenedAll')),
|
'view_count': int_or_none(stats.get('listenedAll')),
|
||||||
'like_count': int_or_none(stats.get('favorited')),
|
'like_count': int_or_none(stats.get('favorited')),
|
||||||
@@ -116,9 +134,9 @@ class JamendoIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class JamendoAlbumIE(InfoExtractor):
|
class JamendoAlbumIE(JamendoIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?jamendo\.com/album/(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://(?:www\.)?jamendo\.com/album/(?P<id>[0-9]+)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'https://www.jamendo.com/album/121486/duck-on-cover',
|
'url': 'https://www.jamendo.com/album/121486/duck-on-cover',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '121486',
|
'id': '121486',
|
||||||
@@ -151,17 +169,7 @@ class JamendoAlbumIE(InfoExtractor):
|
|||||||
'params': {
|
'params': {
|
||||||
'playlistend': 2
|
'playlistend': 2
|
||||||
}
|
}
|
||||||
}
|
}]
|
||||||
|
|
||||||
def _call_api(self, resource, resource_id):
|
|
||||||
path = '/api/%ss' % resource
|
|
||||||
rand = compat_str(random.random())
|
|
||||||
return self._download_json(
|
|
||||||
'https://www.jamendo.com' + path, resource_id, query={
|
|
||||||
'id[]': resource_id,
|
|
||||||
}, headers={
|
|
||||||
'X-Jam-Call': '$%s*%s~' % (hashlib.sha1((path + rand).encode()).hexdigest(), rand)
|
|
||||||
})[0]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
album_id = self._match_id(url)
|
album_id = self._match_id(url)
|
||||||
@@ -169,7 +177,7 @@ class JamendoAlbumIE(InfoExtractor):
|
|||||||
album_name = album.get('name')
|
album_name = album.get('name')
|
||||||
|
|
||||||
entries = []
|
entries = []
|
||||||
for track in album.get('tracks', []):
|
for track in (album.get('tracks') or []):
|
||||||
track_id = track.get('id')
|
track_id = track.get('id')
|
||||||
if not track_id:
|
if not track_id:
|
||||||
continue
|
continue
|
||||||
|
|||||||
@@ -1,9 +1,15 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
determine_ext,
|
||||||
|
float_or_none,
|
||||||
|
parse_duration,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
|
unified_strdate,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -57,3 +63,141 @@ class LA7IE(InfoExtractor):
|
|||||||
'thumbnail': self._og_search_thumbnail(webpage, default=None),
|
'thumbnail': self._og_search_thumbnail(webpage, default=None),
|
||||||
'ie_key': 'Kaltura',
|
'ie_key': 'Kaltura',
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class LA7PodcastEpisodeIE(InfoExtractor):
|
||||||
|
IE_NAME = 'la7.it:pod:episode'
|
||||||
|
_VALID_URL = r'''(?x)(https?://)?
|
||||||
|
(?:www\.)?la7\.it/[^/]+/podcast/([^/]+-)?(?P<id>\d+)'''
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.la7.it/voicetown/podcast/la-carezza-delle-memoria-di-carlo-verdone-23-03-2021-371497',
|
||||||
|
'md5': '7737d4d79b3c1a34b3de3e16297119ed',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '371497',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': '"La carezza delle memoria" di Carlo Verdone',
|
||||||
|
'description': 'md5:5abf07c3c551a687db80af3f9ceb7d52',
|
||||||
|
'thumbnail': 'https://www.la7.it/sites/default/files/podcast/371497.jpg',
|
||||||
|
'upload_date': '20210323',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# embed url
|
||||||
|
'url': 'https://www.la7.it/embed/podcast/371497',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# date already in the title
|
||||||
|
'url': 'https://www.la7.it/propagandalive/podcast/lintervista-di-diego-bianchi-ad-annalisa-cuzzocrea-puntata-del-1932021-20-03-2021-371130',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# title same as show_title
|
||||||
|
'url': 'https://www.la7.it/otto-e-mezzo/podcast/otto-e-mezzo-26-03-2021-372340',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _extract_info(self, webpage, video_id=None, ppn=None):
|
||||||
|
if not video_id:
|
||||||
|
video_id = self._search_regex(
|
||||||
|
r'data-nid=([\'"])(?P<vid>\d+)\1',
|
||||||
|
webpage, 'video_id', group='vid')
|
||||||
|
|
||||||
|
media_url = self._search_regex(
|
||||||
|
(r'src:\s*([\'"])(?P<url>.+?mp3.+?)\1',
|
||||||
|
r'data-podcast=([\'"])(?P<url>.+?mp3.+?)\1'),
|
||||||
|
webpage, 'media_url', group='url')
|
||||||
|
ext = determine_ext(media_url)
|
||||||
|
formats = [{
|
||||||
|
'url': media_url,
|
||||||
|
'format_id': ext,
|
||||||
|
'ext': ext,
|
||||||
|
}]
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
title = self._html_search_regex(
|
||||||
|
(r'<div class="title">(?P<title>.+?)</',
|
||||||
|
r'<title>(?P<title>[^<]+)</title>',
|
||||||
|
r'title:\s*([\'"])(?P<title>.+?)\1'),
|
||||||
|
webpage, 'title', group='title')
|
||||||
|
|
||||||
|
description = (
|
||||||
|
self._html_search_regex(
|
||||||
|
(r'<div class="description">(.+?)</div>',
|
||||||
|
r'<div class="description-mobile">(.+?)</div>',
|
||||||
|
r'<div class="box-txt">([^<]+?)</div>',
|
||||||
|
r'<div class="field-content"><p>(.+?)</p></div>'),
|
||||||
|
webpage, 'description', default=None)
|
||||||
|
or self._html_search_meta('description', webpage))
|
||||||
|
|
||||||
|
thumb = self._html_search_regex(
|
||||||
|
(r'<div class="podcast-image"><img src="(.+?)"></div>',
|
||||||
|
r'<div class="container-embed"[^<]+url\((.+?)\);">',
|
||||||
|
r'<div class="field-content"><img src="(.+?)"'),
|
||||||
|
webpage, 'thumbnail', fatal=False, default=None)
|
||||||
|
|
||||||
|
duration = parse_duration(self._html_search_regex(
|
||||||
|
r'<span class="(?:durata|duration)">([\d:]+)</span>',
|
||||||
|
webpage, 'duration', fatal=False, default=None))
|
||||||
|
|
||||||
|
date = self._html_search_regex(
|
||||||
|
r'class="data">\s*(?:<span>)?([\d\.]+)\s*</',
|
||||||
|
webpage, 'date', default=None)
|
||||||
|
|
||||||
|
date_alt = self._search_regex(
|
||||||
|
r'(\d+[\./]\d+[\./]\d+)', title, 'date_alt', default=None)
|
||||||
|
ppn = ppn or self._search_regex(
|
||||||
|
r'ppN:\s*([\'"])(?P<ppn>.+?)\1',
|
||||||
|
webpage, 'ppn', group='ppn', default=None)
|
||||||
|
# if the date is not in the title
|
||||||
|
# and title is the same as the show_title
|
||||||
|
# add the date to the title
|
||||||
|
if date and not date_alt and ppn and ppn.lower() == title.lower():
|
||||||
|
title += ' del %s' % date
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'duration': float_or_none(duration),
|
||||||
|
'formats': formats,
|
||||||
|
'thumbnail': thumb,
|
||||||
|
'upload_date': unified_strdate(date),
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
return self._extract_info(webpage, video_id)
|
||||||
|
|
||||||
|
|
||||||
|
class LA7PodcastIE(LA7PodcastEpisodeIE):
|
||||||
|
IE_NAME = 'la7.it:podcast'
|
||||||
|
_VALID_URL = r'(https?://)?(www\.)?la7\.it/(?P<id>[^/]+)/podcast/?(?:$|[#?])'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.la7.it/propagandalive/podcast',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'propagandalive',
|
||||||
|
'title': "Propaganda Live",
|
||||||
|
},
|
||||||
|
'playlist_count': 10,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
playlist_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, playlist_id)
|
||||||
|
|
||||||
|
title = (
|
||||||
|
self._html_search_regex(
|
||||||
|
r'<h1.*?>(.+?)</h1>', webpage, 'title', fatal=False, default=None)
|
||||||
|
or self._og_search_title(webpage))
|
||||||
|
ppn = self._search_regex(
|
||||||
|
r'window\.ppN\s*=\s*([\'"])(?P<ppn>.+?)\1',
|
||||||
|
webpage, 'ppn', group='ppn', default=None)
|
||||||
|
|
||||||
|
entries = []
|
||||||
|
for episode in re.finditer(
|
||||||
|
r'<div class="container-podcast-property">([\s\S]+?)(?:</div>\s*){3}',
|
||||||
|
webpage):
|
||||||
|
entries.append(self._extract_info(episode.group(1), ppn=ppn))
|
||||||
|
|
||||||
|
return self.playlist_result(entries, playlist_id, title)
|
||||||
|
|||||||
@@ -6,8 +6,10 @@ import json
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
|
compat_parse_qs,
|
||||||
compat_str,
|
compat_str,
|
||||||
compat_urllib_parse_unquote,
|
compat_urllib_parse_unquote,
|
||||||
|
compat_urllib_parse_urlparse,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
@@ -21,9 +23,9 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class LBRYBaseIE(InfoExtractor):
|
class LBRYBaseIE(InfoExtractor):
|
||||||
_BASE_URL_REGEX = r'https?://(?:www\.)?(?:lbry\.tv|odysee\.com)/'
|
_BASE_URL_REGEX = r'(?:https?://(?:www\.)?(?:lbry\.tv|odysee\.com)/|lbry://)'
|
||||||
_CLAIM_ID_REGEX = r'[0-9a-f]{1,40}'
|
_CLAIM_ID_REGEX = r'[0-9a-f]{1,40}'
|
||||||
_OPT_CLAIM_ID = '[^:/?#&]+(?::%s)?' % _CLAIM_ID_REGEX
|
_OPT_CLAIM_ID = '[^:/?#&]+(?:[:#]%s)?' % _CLAIM_ID_REGEX
|
||||||
_SUPPORTED_STREAM_TYPES = ['video', 'audio']
|
_SUPPORTED_STREAM_TYPES = ['video', 'audio']
|
||||||
|
|
||||||
def _call_api_proxy(self, method, display_id, params, resource):
|
def _call_api_proxy(self, method, display_id, params, resource):
|
||||||
@@ -41,7 +43,9 @@ class LBRYBaseIE(InfoExtractor):
|
|||||||
'resolve', display_id, {'urls': url}, resource)[url]
|
'resolve', display_id, {'urls': url}, resource)[url]
|
||||||
|
|
||||||
def _permanent_url(self, url, claim_name, claim_id):
|
def _permanent_url(self, url, claim_name, claim_id):
|
||||||
return urljoin(url, '/%s:%s' % (claim_name, claim_id))
|
return urljoin(
|
||||||
|
url.replace('lbry://', 'https://lbry.tv/'),
|
||||||
|
'/%s:%s' % (claim_name, claim_id))
|
||||||
|
|
||||||
def _parse_stream(self, stream, url):
|
def _parse_stream(self, stream, url):
|
||||||
stream_value = stream.get('value') or {}
|
stream_value = stream.get('value') or {}
|
||||||
@@ -60,6 +64,7 @@ class LBRYBaseIE(InfoExtractor):
|
|||||||
'description': stream_value.get('description'),
|
'description': stream_value.get('description'),
|
||||||
'license': stream_value.get('license'),
|
'license': stream_value.get('license'),
|
||||||
'timestamp': int_or_none(stream.get('timestamp')),
|
'timestamp': int_or_none(stream.get('timestamp')),
|
||||||
|
'release_timestamp': int_or_none(stream_value.get('release_time')),
|
||||||
'tags': stream_value.get('tags'),
|
'tags': stream_value.get('tags'),
|
||||||
'duration': int_or_none(media.get('duration')),
|
'duration': int_or_none(media.get('duration')),
|
||||||
'channel': try_get(signing_channel, lambda x: x['value']['title']),
|
'channel': try_get(signing_channel, lambda x: x['value']['title']),
|
||||||
@@ -92,6 +97,8 @@ class LBRYIE(LBRYBaseIE):
|
|||||||
'description': 'md5:f6cb5c704b332d37f5119313c2c98f51',
|
'description': 'md5:f6cb5c704b332d37f5119313c2c98f51',
|
||||||
'timestamp': 1595694354,
|
'timestamp': 1595694354,
|
||||||
'upload_date': '20200725',
|
'upload_date': '20200725',
|
||||||
|
'release_timestamp': 1595340697,
|
||||||
|
'release_date': '20200721',
|
||||||
'width': 1280,
|
'width': 1280,
|
||||||
'height': 720,
|
'height': 720,
|
||||||
}
|
}
|
||||||
@@ -106,6 +113,8 @@ class LBRYIE(LBRYBaseIE):
|
|||||||
'description': 'md5:661ac4f1db09f31728931d7b88807a61',
|
'description': 'md5:661ac4f1db09f31728931d7b88807a61',
|
||||||
'timestamp': 1591312601,
|
'timestamp': 1591312601,
|
||||||
'upload_date': '20200604',
|
'upload_date': '20200604',
|
||||||
|
'release_timestamp': 1591312421,
|
||||||
|
'release_date': '20200604',
|
||||||
'tags': list,
|
'tags': list,
|
||||||
'duration': 2570,
|
'duration': 2570,
|
||||||
'channel': 'The LBRY Foundation',
|
'channel': 'The LBRY Foundation',
|
||||||
@@ -137,6 +146,9 @@ class LBRYIE(LBRYBaseIE):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'https://lbry.tv/@lacajadepandora:a/TRUMP-EST%C3%81-BIEN-PUESTO-con-Pilar-Baselga,-Carlos-Senra,-Luis-Palacios-(720p_30fps_H264-192kbit_AAC):1',
|
'url': 'https://lbry.tv/@lacajadepandora:a/TRUMP-EST%C3%81-BIEN-PUESTO-con-Pilar-Baselga,-Carlos-Senra,-Luis-Palacios-(720p_30fps_H264-192kbit_AAC):1',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'lbry://@lbry#3f/odysee#7',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@@ -166,7 +178,7 @@ class LBRYIE(LBRYBaseIE):
|
|||||||
|
|
||||||
class LBRYChannelIE(LBRYBaseIE):
|
class LBRYChannelIE(LBRYBaseIE):
|
||||||
IE_NAME = 'lbry:channel'
|
IE_NAME = 'lbry:channel'
|
||||||
_VALID_URL = LBRYBaseIE._BASE_URL_REGEX + r'(?P<id>@%s)/?(?:[?#&]|$)' % LBRYBaseIE._OPT_CLAIM_ID
|
_VALID_URL = LBRYBaseIE._BASE_URL_REGEX + r'(?P<id>@%s)/?(?:[?&]|$)' % LBRYBaseIE._OPT_CLAIM_ID
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://lbry.tv/@LBRYFoundation:0',
|
'url': 'https://lbry.tv/@LBRYFoundation:0',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@@ -178,20 +190,24 @@ class LBRYChannelIE(LBRYBaseIE):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'https://lbry.tv/@LBRYFoundation',
|
'url': 'https://lbry.tv/@LBRYFoundation',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'lbry://@lbry#3f',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
_PAGE_SIZE = 50
|
_PAGE_SIZE = 50
|
||||||
|
|
||||||
def _fetch_page(self, claim_id, url, page):
|
def _fetch_page(self, claim_id, url, params, page):
|
||||||
page += 1
|
page += 1
|
||||||
result = self._call_api_proxy(
|
page_params = {
|
||||||
'claim_search', claim_id, {
|
|
||||||
'channel_ids': [claim_id],
|
'channel_ids': [claim_id],
|
||||||
'claim_type': 'stream',
|
'claim_type': 'stream',
|
||||||
'no_totals': True,
|
'no_totals': True,
|
||||||
'page': page,
|
'page': page,
|
||||||
'page_size': self._PAGE_SIZE,
|
'page_size': self._PAGE_SIZE,
|
||||||
'stream_types': self._SUPPORTED_STREAM_TYPES,
|
}
|
||||||
}, 'page %d' % page)
|
page_params.update(params)
|
||||||
|
result = self._call_api_proxy(
|
||||||
|
'claim_search', claim_id, page_params, 'page %d' % page)
|
||||||
for item in (result.get('items') or []):
|
for item in (result.get('items') or []):
|
||||||
stream_claim_name = item.get('name')
|
stream_claim_name = item.get('name')
|
||||||
stream_claim_id = item.get('claim_id')
|
stream_claim_id = item.get('claim_id')
|
||||||
@@ -212,8 +228,31 @@ class LBRYChannelIE(LBRYBaseIE):
|
|||||||
result = self._resolve_url(
|
result = self._resolve_url(
|
||||||
'lbry://' + display_id, display_id, 'channel')
|
'lbry://' + display_id, display_id, 'channel')
|
||||||
claim_id = result['claim_id']
|
claim_id = result['claim_id']
|
||||||
|
qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
|
||||||
|
content = qs.get('content', [None])[0]
|
||||||
|
params = {
|
||||||
|
'fee_amount': qs.get('fee_amount', ['>=0'])[0],
|
||||||
|
'order_by': {
|
||||||
|
'new': ['release_time'],
|
||||||
|
'top': ['effective_amount'],
|
||||||
|
'trending': ['trending_group', 'trending_mixed'],
|
||||||
|
}[qs.get('order', ['new'])[0]],
|
||||||
|
'stream_types': [content] if content in ['audio', 'video'] else self._SUPPORTED_STREAM_TYPES,
|
||||||
|
}
|
||||||
|
duration = qs.get('duration', [None])[0]
|
||||||
|
if duration:
|
||||||
|
params['duration'] = {
|
||||||
|
'long': '>=1200',
|
||||||
|
'short': '<=240',
|
||||||
|
}[duration]
|
||||||
|
language = qs.get('language', ['all'])[0]
|
||||||
|
if language != 'all':
|
||||||
|
languages = [language]
|
||||||
|
if language == 'en':
|
||||||
|
languages.append('none')
|
||||||
|
params['any_languages'] = languages
|
||||||
entries = OnDemandPagedList(
|
entries = OnDemandPagedList(
|
||||||
functools.partial(self._fetch_page, claim_id, url),
|
functools.partial(self._fetch_page, claim_id, url, params),
|
||||||
self._PAGE_SIZE)
|
self._PAGE_SIZE)
|
||||||
result_value = result.get('value') or {}
|
result_value = result.get('value') or {}
|
||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
|
|||||||
@@ -4,7 +4,13 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import js_to_json
|
from ..compat import compat_str
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
int_or_none,
|
||||||
|
js_to_json,
|
||||||
|
str_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class LineTVIE(InfoExtractor):
|
class LineTVIE(InfoExtractor):
|
||||||
@@ -88,3 +94,137 @@ class LineTVIE(InfoExtractor):
|
|||||||
for thumbnail in video_info.get('thumbnails', {}).get('list', [])],
|
for thumbnail in video_info.get('thumbnails', {}).get('list', [])],
|
||||||
'view_count': video_info.get('meta', {}).get('count'),
|
'view_count': video_info.get('meta', {}).get('count'),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class LineLiveBaseIE(InfoExtractor):
|
||||||
|
_API_BASE_URL = 'https://live-api.line-apps.com/web/v4.0/channel/'
|
||||||
|
|
||||||
|
def _parse_broadcast_item(self, item):
|
||||||
|
broadcast_id = compat_str(item['id'])
|
||||||
|
title = item['title']
|
||||||
|
is_live = item.get('isBroadcastingNow')
|
||||||
|
|
||||||
|
thumbnails = []
|
||||||
|
for thumbnail_id, thumbnail_url in (item.get('thumbnailURLs') or {}).items():
|
||||||
|
if not thumbnail_url:
|
||||||
|
continue
|
||||||
|
thumbnails.append({
|
||||||
|
'id': thumbnail_id,
|
||||||
|
'url': thumbnail_url,
|
||||||
|
})
|
||||||
|
|
||||||
|
channel = item.get('channel') or {}
|
||||||
|
channel_id = str_or_none(channel.get('id'))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': broadcast_id,
|
||||||
|
'title': self._live_title(title) if is_live else title,
|
||||||
|
'thumbnails': thumbnails,
|
||||||
|
'timestamp': int_or_none(item.get('createdAt')),
|
||||||
|
'channel': channel.get('name'),
|
||||||
|
'channel_id': channel_id,
|
||||||
|
'channel_url': 'https://live.line.me/channels/' + channel_id if channel_id else None,
|
||||||
|
'duration': int_or_none(item.get('archiveDuration')),
|
||||||
|
'view_count': int_or_none(item.get('viewerCount')),
|
||||||
|
'comment_count': int_or_none(item.get('chatCount')),
|
||||||
|
'is_live': is_live,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class LineLiveIE(LineLiveBaseIE):
|
||||||
|
_VALID_URL = r'https?://live\.line\.me/channels/(?P<channel_id>\d+)/broadcast/(?P<id>\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://live.line.me/channels/4867368/broadcast/16331360',
|
||||||
|
'md5': 'bc931f26bf1d4f971e3b0982b3fab4a3',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '16331360',
|
||||||
|
'title': '振りコピ講座😙😙😙',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'timestamp': 1617095132,
|
||||||
|
'upload_date': '20210330',
|
||||||
|
'channel': '白川ゆめか',
|
||||||
|
'channel_id': '4867368',
|
||||||
|
'view_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
'is_live': False,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
# archiveStatus == 'DELETED'
|
||||||
|
'url': 'https://live.line.me/channels/4778159/broadcast/16378488',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
channel_id, broadcast_id = re.match(self._VALID_URL, url).groups()
|
||||||
|
broadcast = self._download_json(
|
||||||
|
self._API_BASE_URL + '%s/broadcast/%s' % (channel_id, broadcast_id),
|
||||||
|
broadcast_id)
|
||||||
|
item = broadcast['item']
|
||||||
|
info = self._parse_broadcast_item(item)
|
||||||
|
protocol = 'm3u8' if info['is_live'] else 'm3u8_native'
|
||||||
|
formats = []
|
||||||
|
for k, v in (broadcast.get(('live' if info['is_live'] else 'archived') + 'HLSURLs') or {}).items():
|
||||||
|
if not v:
|
||||||
|
continue
|
||||||
|
if k == 'abr':
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
v, broadcast_id, 'mp4', protocol,
|
||||||
|
m3u8_id='hls', fatal=False))
|
||||||
|
continue
|
||||||
|
f = {
|
||||||
|
'ext': 'mp4',
|
||||||
|
'format_id': 'hls-' + k,
|
||||||
|
'protocol': protocol,
|
||||||
|
'url': v,
|
||||||
|
}
|
||||||
|
if not k.isdigit():
|
||||||
|
f['vcodec'] = 'none'
|
||||||
|
formats.append(f)
|
||||||
|
if not formats:
|
||||||
|
archive_status = item.get('archiveStatus')
|
||||||
|
if archive_status != 'ARCHIVED':
|
||||||
|
raise ExtractorError('this video has been ' + archive_status.lower(), expected=True)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
info['formats'] = formats
|
||||||
|
return info
|
||||||
|
|
||||||
|
|
||||||
|
class LineLiveChannelIE(LineLiveBaseIE):
|
||||||
|
_VALID_URL = r'https?://live\.line\.me/channels/(?P<id>\d+)(?!/broadcast/\d+)(?:[/?&#]|$)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://live.line.me/channels/5893542',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '5893542',
|
||||||
|
'title': 'いくらちゃん',
|
||||||
|
'description': 'md5:c3a4af801f43b2fac0b02294976580be',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 29
|
||||||
|
}
|
||||||
|
|
||||||
|
def _archived_broadcasts_entries(self, archived_broadcasts, channel_id):
|
||||||
|
while True:
|
||||||
|
for row in (archived_broadcasts.get('rows') or []):
|
||||||
|
share_url = str_or_none(row.get('shareURL'))
|
||||||
|
if not share_url:
|
||||||
|
continue
|
||||||
|
info = self._parse_broadcast_item(row)
|
||||||
|
info.update({
|
||||||
|
'_type': 'url',
|
||||||
|
'url': share_url,
|
||||||
|
'ie_key': LineLiveIE.ie_key(),
|
||||||
|
})
|
||||||
|
yield info
|
||||||
|
if not archived_broadcasts.get('hasNextPage'):
|
||||||
|
return
|
||||||
|
archived_broadcasts = self._download_json(
|
||||||
|
self._API_BASE_URL + channel_id + '/archived_broadcasts',
|
||||||
|
channel_id, query={
|
||||||
|
'lastId': info['id'],
|
||||||
|
})
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
channel_id = self._match_id(url)
|
||||||
|
channel = self._download_json(self._API_BASE_URL + channel_id, channel_id)
|
||||||
|
return self.playlist_result(
|
||||||
|
self._archived_broadcasts_entries(channel.get('archivedBroadcasts') or {}, channel_id),
|
||||||
|
channel_id, channel.get('title'), channel.get('information'))
|
||||||
|
|||||||
@@ -38,8 +38,8 @@ class LinuxAcademyIE(InfoExtractor):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'What Is Data Science',
|
'title': 'What Is Data Science',
|
||||||
'description': 'md5:c574a3c20607144fb36cb65bdde76c99',
|
'description': 'md5:c574a3c20607144fb36cb65bdde76c99',
|
||||||
'timestamp': 1607387907,
|
'timestamp': int, # The timestamp and upload date changes
|
||||||
'upload_date': '20201208',
|
'upload_date': r're:\d+',
|
||||||
'duration': 304,
|
'duration': 304,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
@@ -59,6 +59,16 @@ class LinuxAcademyIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
'playlist_count': 41,
|
'playlist_count': 41,
|
||||||
'skip': 'Requires Linux Academy account credentials',
|
'skip': 'Requires Linux Academy account credentials',
|
||||||
|
}, {
|
||||||
|
'url': 'https://linuxacademy.com/cp/modules/view/id/39',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '39',
|
||||||
|
'title': 'Red Hat Certified Systems Administrator - RHCSA (EX200) Exam Prep (legacy)',
|
||||||
|
'description': 'md5:0f1d3369e90c3fb14a79813b863c902f',
|
||||||
|
'duration': 89280,
|
||||||
|
},
|
||||||
|
'playlist_count': 73,
|
||||||
|
'skip': 'Requires Linux Academy account credentials',
|
||||||
}]
|
}]
|
||||||
|
|
||||||
_AUTHORIZE_URL = 'https://login.linuxacademy.com/authorize'
|
_AUTHORIZE_URL = 'https://login.linuxacademy.com/authorize'
|
||||||
@@ -102,7 +112,7 @@ class LinuxAcademyIE(InfoExtractor):
|
|||||||
'client_id': self._CLIENT_ID,
|
'client_id': self._CLIENT_ID,
|
||||||
'redirect_uri': self._ORIGIN_URL,
|
'redirect_uri': self._ORIGIN_URL,
|
||||||
'tenant': 'lacausers',
|
'tenant': 'lacausers',
|
||||||
'connection': 'Username-Password-Authentication',
|
'connection': 'Username-Password-ACG-Proxy',
|
||||||
'username': username,
|
'username': username,
|
||||||
'password': password,
|
'password': password,
|
||||||
'sso': 'true',
|
'sso': 'true',
|
||||||
@@ -162,7 +172,7 @@ class LinuxAcademyIE(InfoExtractor):
|
|||||||
if course_id:
|
if course_id:
|
||||||
module = self._parse_json(
|
module = self._parse_json(
|
||||||
self._search_regex(
|
self._search_regex(
|
||||||
r'window\.module\s*=\s*({.+?})\s*;', webpage, 'module'),
|
r'window\.module\s*=\s*({(?:(?!};)[^"]|"([^"]|\\")*")+})\s*;', webpage, 'module'),
|
||||||
item_id)
|
item_id)
|
||||||
entries = []
|
entries = []
|
||||||
chapter_number = None
|
chapter_number = None
|
||||||
|
|||||||
31
yt_dlp/extractor/maoritv.py
Normal file
31
yt_dlp/extractor/maoritv.py
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class MaoriTVIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?maoritelevision\.com/shows/(?:[^/]+/)+(?P<id>[^/?&#]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://www.maoritelevision.com/shows/korero-mai/S01E054/korero-mai-series-1-episode-54',
|
||||||
|
'md5': '5ade8ef53851b6a132c051b1cd858899',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '4774724855001',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Kōrero Mai, Series 1 Episode 54',
|
||||||
|
'upload_date': '20160226',
|
||||||
|
'timestamp': 1456455018,
|
||||||
|
'description': 'md5:59bde32fd066d637a1a55794c56d8dcb',
|
||||||
|
'uploader_id': '1614493167001',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1614493167001/HJlhIQhQf_default/index.html?videoId=%s'
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
brightcove_id = self._search_regex(
|
||||||
|
r'data-main-video-id=["\'](\d+)', webpage, 'brightcove id')
|
||||||
|
return self.url_result(
|
||||||
|
self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
|
||||||
|
'BrightcoveNew', brightcove_id)
|
||||||
@@ -5,6 +5,7 @@ from datetime import datetime
|
|||||||
import itertools
|
import itertools
|
||||||
import json
|
import json
|
||||||
import base64
|
import base64
|
||||||
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@@ -68,7 +69,7 @@ class MildomBaseIE(InfoExtractor):
|
|||||||
self._DISPATCHER_CONFIG = self._parse_json(base64.b64decode(tmp['data']), 'initialization')
|
self._DISPATCHER_CONFIG = self._parse_json(base64.b64decode(tmp['data']), 'initialization')
|
||||||
except ExtractorError:
|
except ExtractorError:
|
||||||
self._DISPATCHER_CONFIG = self._download_json(
|
self._DISPATCHER_CONFIG = self._download_json(
|
||||||
'https://bookish-octo-barnacle.vercel.app/api/dispatcher_config', 'initialization',
|
'https://bookish-octo-barnacle.vercel.app/api/mildom/dispatcher_config', 'initialization',
|
||||||
note='Downloading dispatcher_config fallback')
|
note='Downloading dispatcher_config fallback')
|
||||||
return self._DISPATCHER_CONFIG
|
return self._DISPATCHER_CONFIG
|
||||||
|
|
||||||
@@ -110,6 +111,7 @@ class MildomIE(MildomBaseIE):
|
|||||||
enterstudio = self._call_api(
|
enterstudio = self._call_api(
|
||||||
'https://cloudac.mildom.com/nonolive/gappserv/live/enterstudio', video_id,
|
'https://cloudac.mildom.com/nonolive/gappserv/live/enterstudio', video_id,
|
||||||
note='Downloading live metadata', query={'user_id': video_id})
|
note='Downloading live metadata', query={'user_id': video_id})
|
||||||
|
result_video_id = enterstudio.get('log_id', video_id)
|
||||||
|
|
||||||
title = try_get(
|
title = try_get(
|
||||||
enterstudio, (
|
enterstudio, (
|
||||||
@@ -128,7 +130,7 @@ class MildomIE(MildomBaseIE):
|
|||||||
), compat_str)
|
), compat_str)
|
||||||
|
|
||||||
servers = self._call_api(
|
servers = self._call_api(
|
||||||
'https://cloudac.mildom.com/nonolive/gappserv/live/liveserver', video_id,
|
'https://cloudac.mildom.com/nonolive/gappserv/live/liveserver', result_video_id,
|
||||||
note='Downloading live server list', query={
|
note='Downloading live server list', query={
|
||||||
'user_id': video_id,
|
'user_id': video_id,
|
||||||
'live_server_type': 'hls',
|
'live_server_type': 'hls',
|
||||||
@@ -139,7 +141,7 @@ class MildomIE(MildomBaseIE):
|
|||||||
'is_lhls': '0',
|
'is_lhls': '0',
|
||||||
})
|
})
|
||||||
m3u8_url = update_url_query(servers['stream_server'] + '/%s_master.m3u8' % video_id, stream_query)
|
m3u8_url = update_url_query(servers['stream_server'] + '/%s_master.m3u8' % video_id, stream_query)
|
||||||
formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', headers={
|
formats = self._extract_m3u8_formats(m3u8_url, result_video_id, 'mp4', headers={
|
||||||
'Referer': 'https://www.mildom.com/',
|
'Referer': 'https://www.mildom.com/',
|
||||||
'Origin': 'https://www.mildom.com',
|
'Origin': 'https://www.mildom.com',
|
||||||
}, note='Downloading m3u8 information')
|
}, note='Downloading m3u8 information')
|
||||||
@@ -150,13 +152,13 @@ class MildomIE(MildomBaseIE):
|
|||||||
parsed = parsed._replace(
|
parsed = parsed._replace(
|
||||||
netloc='bookish-octo-barnacle.vercel.app',
|
netloc='bookish-octo-barnacle.vercel.app',
|
||||||
query=compat_urllib_parse_urlencode(stream_query, True),
|
query=compat_urllib_parse_urlencode(stream_query, True),
|
||||||
path='/api' + parsed.path)
|
path='/api/mildom' + parsed.path)
|
||||||
fmt['url'] = compat_urlparse.urlunparse(parsed)
|
fmt['url'] = compat_urlparse.urlunparse(parsed)
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': result_video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': description,
|
'description': description,
|
||||||
'uploader': uploader,
|
'uploader': uploader,
|
||||||
@@ -172,9 +174,8 @@ class MildomVodIE(MildomBaseIE):
|
|||||||
_VALID_URL = r'https?://(?:(?:www|m)\.)mildom\.com/playback/(?P<user_id>\d+)/(?P<id>(?P=user_id)-[a-zA-Z0-9]+)'
|
_VALID_URL = r'https?://(?:(?:www|m)\.)mildom\.com/playback/(?P<user_id>\d+)/(?P<id>(?P=user_id)-[a-zA-Z0-9]+)'
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
m = re.match(self._VALID_URL, url)
|
||||||
m = self._VALID_URL_RE.match(url)
|
user_id, video_id = m.group('user_id'), m.group('id')
|
||||||
user_id = m.group('user_id')
|
|
||||||
url = 'https://www.mildom.com/playback/%s/%s' % (user_id, video_id)
|
url = 'https://www.mildom.com/playback/%s/%s' % (user_id, video_id)
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
@@ -230,7 +231,7 @@ class MildomVodIE(MildomBaseIE):
|
|||||||
parsed = parsed._replace(
|
parsed = parsed._replace(
|
||||||
netloc='bookish-octo-barnacle.vercel.app',
|
netloc='bookish-octo-barnacle.vercel.app',
|
||||||
query=compat_urllib_parse_urlencode(stream_query, True),
|
query=compat_urllib_parse_urlencode(stream_query, True),
|
||||||
path='/api/vod2/proxy')
|
path='/api/mildom/vod2/proxy')
|
||||||
fmt['url'] = compat_urlparse.urlunparse(parsed)
|
fmt['url'] = compat_urlparse.urlunparse(parsed)
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|||||||
@@ -1,15 +1,91 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .nhl import NHLBaseIE
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
determine_ext,
|
||||||
|
int_or_none,
|
||||||
|
parse_duration,
|
||||||
|
parse_iso8601,
|
||||||
|
try_get,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class MLBIE(NHLBaseIE):
|
class MLBBaseIE(InfoExtractor):
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
video = self._download_video_data(display_id)
|
||||||
|
video_id = video['id']
|
||||||
|
title = video['title']
|
||||||
|
feed = self._get_feed(video)
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for playback in (feed.get('playbacks') or []):
|
||||||
|
playback_url = playback.get('url')
|
||||||
|
if not playback_url:
|
||||||
|
continue
|
||||||
|
name = playback.get('name')
|
||||||
|
ext = determine_ext(playback_url)
|
||||||
|
if ext == 'm3u8':
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
playback_url, video_id, 'mp4',
|
||||||
|
'm3u8_native', m3u8_id=name, fatal=False))
|
||||||
|
else:
|
||||||
|
f = {
|
||||||
|
'format_id': name,
|
||||||
|
'url': playback_url,
|
||||||
|
}
|
||||||
|
mobj = re.search(r'_(\d+)K_(\d+)X(\d+)', name)
|
||||||
|
if mobj:
|
||||||
|
f.update({
|
||||||
|
'height': int(mobj.group(3)),
|
||||||
|
'tbr': int(mobj.group(1)),
|
||||||
|
'width': int(mobj.group(2)),
|
||||||
|
})
|
||||||
|
mobj = re.search(r'_(\d+)x(\d+)_(\d+)_(\d+)K\.mp4', playback_url)
|
||||||
|
if mobj:
|
||||||
|
f.update({
|
||||||
|
'fps': int(mobj.group(3)),
|
||||||
|
'height': int(mobj.group(2)),
|
||||||
|
'tbr': int(mobj.group(4)),
|
||||||
|
'width': int(mobj.group(1)),
|
||||||
|
})
|
||||||
|
formats.append(f)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
thumbnails = []
|
||||||
|
for cut in (try_get(feed, lambda x: x['image']['cuts'], list) or []):
|
||||||
|
src = cut.get('src')
|
||||||
|
if not src:
|
||||||
|
continue
|
||||||
|
thumbnails.append({
|
||||||
|
'height': int_or_none(cut.get('height')),
|
||||||
|
'url': src,
|
||||||
|
'width': int_or_none(cut.get('width')),
|
||||||
|
})
|
||||||
|
|
||||||
|
language = (video.get('language') or 'EN').lower()
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'formats': formats,
|
||||||
|
'description': video.get('description'),
|
||||||
|
'duration': parse_duration(feed.get('duration')),
|
||||||
|
'thumbnails': thumbnails,
|
||||||
|
'timestamp': parse_iso8601(video.get(self._TIMESTAMP_KEY)),
|
||||||
|
'subtitles': self._extract_mlb_subtitles(feed, language),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class MLBIE(MLBBaseIE):
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
https?://
|
https?://
|
||||||
(?:[\da-z_-]+\.)*(?P<site>mlb)\.com/
|
(?:[\da-z_-]+\.)*mlb\.com/
|
||||||
(?:
|
(?:
|
||||||
(?:
|
(?:
|
||||||
(?:[^/]+/)*c-|
|
(?:[^/]+/)*video/[^/]+/c-|
|
||||||
(?:
|
(?:
|
||||||
shared/video/embed/(?:embed|m-internal-embed)\.html|
|
shared/video/embed/(?:embed|m-internal-embed)\.html|
|
||||||
(?:[^/]+/)+(?:play|index)\.jsp|
|
(?:[^/]+/)+(?:play|index)\.jsp|
|
||||||
@@ -18,7 +94,6 @@ class MLBIE(NHLBaseIE):
|
|||||||
(?P<id>\d+)
|
(?P<id>\d+)
|
||||||
)
|
)
|
||||||
'''
|
'''
|
||||||
_CONTENT_DOMAIN = 'content.mlb.com'
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'https://www.mlb.com/mariners/video/ackleys-spectacular-catch/c-34698933',
|
'url': 'https://www.mlb.com/mariners/video/ackleys-spectacular-catch/c-34698933',
|
||||||
@@ -76,18 +151,6 @@ class MLBIE(NHLBaseIE):
|
|||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
|
||||||
'url': 'https://www.mlb.com/news/blue-jays-kevin-pillar-goes-spidey-up-the-wall-to-rob-tim-beckham-of-a-homer/c-118550098',
|
|
||||||
'md5': 'e09e37b552351fddbf4d9e699c924d68',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '75609783',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Must C: Pillar climbs for catch',
|
|
||||||
'description': '4/15/15: Blue Jays outfielder Kevin Pillar continues his defensive dominance by climbing the wall in left to rob Tim Beckham of a home run',
|
|
||||||
'timestamp': 1429139220,
|
|
||||||
'upload_date': '20150415',
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
'url': 'https://www.mlb.com/video/hargrove-homers-off-caldwell/c-1352023483?tid=67793694',
|
'url': 'https://www.mlb.com/video/hargrove-homers-off-caldwell/c-1352023483?tid=67793694',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@@ -113,8 +176,92 @@ class MLBIE(NHLBaseIE):
|
|||||||
'url': 'http://mlb.mlb.com/shared/video/embed/m-internal-embed.html?content_id=75609783&property=mlb&autoplay=true&hashmode=false&siteSection=mlb/multimedia/article_118550098/article_embed&club=mlb',
|
'url': 'http://mlb.mlb.com/shared/video/embed/m-internal-embed.html?content_id=75609783&property=mlb&autoplay=true&hashmode=false&siteSection=mlb/multimedia/article_118550098/article_embed&club=mlb',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
},
|
},
|
||||||
{
|
|
||||||
'url': 'https://www.mlb.com/cut4/carlos-gomez-borrowed-sunglasses-from-an-as-fan/c-278912842',
|
|
||||||
'only_matching': True,
|
|
||||||
}
|
|
||||||
]
|
]
|
||||||
|
_TIMESTAMP_KEY = 'date'
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _get_feed(video):
|
||||||
|
return video
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _extract_mlb_subtitles(feed, language):
|
||||||
|
subtitles = {}
|
||||||
|
for keyword in (feed.get('keywordsAll') or []):
|
||||||
|
keyword_type = keyword.get('type')
|
||||||
|
if keyword_type and keyword_type.startswith('closed_captions_location_'):
|
||||||
|
cc_location = keyword.get('value')
|
||||||
|
if cc_location:
|
||||||
|
subtitles.setdefault(language, []).append({
|
||||||
|
'url': cc_location,
|
||||||
|
})
|
||||||
|
return subtitles
|
||||||
|
|
||||||
|
def _download_video_data(self, display_id):
|
||||||
|
return self._download_json(
|
||||||
|
'http://content.mlb.com/mlb/item/id/v1/%s/details/web-v1.json' % display_id,
|
||||||
|
display_id)
|
||||||
|
|
||||||
|
|
||||||
|
class MLBVideoIE(MLBBaseIE):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?mlb\.com/(?:[^/]+/)*video/(?P<id>[^/?&#]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://www.mlb.com/mariners/video/ackley-s-spectacular-catch-c34698933',
|
||||||
|
'md5': '632358dacfceec06bad823b83d21df2d',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'c04a8863-f569-42e6-9f87-992393657614',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': "Ackley's spectacular catch",
|
||||||
|
'description': 'md5:7f5a981eb4f3cbc8daf2aeffa2215bf0',
|
||||||
|
'duration': 66,
|
||||||
|
'timestamp': 1405995000,
|
||||||
|
'upload_date': '20140722',
|
||||||
|
'thumbnail': r're:^https?://.+',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
_TIMESTAMP_KEY = 'timestamp'
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def suitable(cls, url):
|
||||||
|
return False if MLBIE.suitable(url) else super(MLBVideoIE, cls).suitable(url)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _get_feed(video):
|
||||||
|
return video['feeds'][0]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _extract_mlb_subtitles(feed, language):
|
||||||
|
subtitles = {}
|
||||||
|
for cc_location in (feed.get('closedCaptions') or []):
|
||||||
|
subtitles.setdefault(language, []).append({
|
||||||
|
'url': cc_location,
|
||||||
|
})
|
||||||
|
|
||||||
|
def _download_video_data(self, display_id):
|
||||||
|
# https://www.mlb.com/data-service/en/videos/[SLUG]
|
||||||
|
return self._download_json(
|
||||||
|
'https://fastball-gateway.mlb.com/graphql',
|
||||||
|
display_id, query={
|
||||||
|
'query': '''{
|
||||||
|
mediaPlayback(ids: "%s") {
|
||||||
|
description
|
||||||
|
feeds(types: CMS) {
|
||||||
|
closedCaptions
|
||||||
|
duration
|
||||||
|
image {
|
||||||
|
cuts {
|
||||||
|
width
|
||||||
|
height
|
||||||
|
src
|
||||||
|
}
|
||||||
|
}
|
||||||
|
playbacks {
|
||||||
|
name
|
||||||
|
url
|
||||||
|
}
|
||||||
|
}
|
||||||
|
id
|
||||||
|
timestamp
|
||||||
|
title
|
||||||
|
}
|
||||||
|
}''' % display_id,
|
||||||
|
})['data']['mediaPlayback'][0]
|
||||||
|
|||||||
@@ -7,7 +7,6 @@ from .common import InfoExtractor
|
|||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_str,
|
compat_str,
|
||||||
compat_xpath,
|
compat_xpath,
|
||||||
compat_urlparse,
|
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
@@ -15,6 +14,7 @@ from ..utils import (
|
|||||||
fix_xml_ampersands,
|
fix_xml_ampersands,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
HEADRequest,
|
HEADRequest,
|
||||||
|
int_or_none,
|
||||||
RegexNotFoundError,
|
RegexNotFoundError,
|
||||||
sanitized_Request,
|
sanitized_Request,
|
||||||
strip_or_none,
|
strip_or_none,
|
||||||
@@ -23,7 +23,6 @@ from ..utils import (
|
|||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
update_url_query,
|
update_url_query,
|
||||||
url_basename,
|
url_basename,
|
||||||
get_domain,
|
|
||||||
xpath_text,
|
xpath_text,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -45,7 +44,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
|||||||
# Remove the templates, like &device={device}
|
# Remove the templates, like &device={device}
|
||||||
return re.sub(r'&[^=]*?={.*?}(?=(&|$))', '', url)
|
return re.sub(r'&[^=]*?={.*?}(?=(&|$))', '', url)
|
||||||
|
|
||||||
def _get_feed_url(self, uri, url=None):
|
def _get_feed_url(self, uri):
|
||||||
return self._FEED_URL
|
return self._FEED_URL
|
||||||
|
|
||||||
def _get_thumbnail_url(self, uri, itemdoc):
|
def _get_thumbnail_url(self, uri, itemdoc):
|
||||||
@@ -178,6 +177,22 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
|||||||
raise ExtractorError('Could not find video title')
|
raise ExtractorError('Could not find video title')
|
||||||
title = title.strip()
|
title = title.strip()
|
||||||
|
|
||||||
|
series = find_xpath_attr(
|
||||||
|
itemdoc, './/{http://search.yahoo.com/mrss/}category',
|
||||||
|
'scheme', 'urn:mtvn:franchise')
|
||||||
|
season = find_xpath_attr(
|
||||||
|
itemdoc, './/{http://search.yahoo.com/mrss/}category',
|
||||||
|
'scheme', 'urn:mtvn:seasonN')
|
||||||
|
episode = find_xpath_attr(
|
||||||
|
itemdoc, './/{http://search.yahoo.com/mrss/}category',
|
||||||
|
'scheme', 'urn:mtvn:episodeN')
|
||||||
|
series = series.text if series is not None else None
|
||||||
|
season = season.text if season is not None else None
|
||||||
|
episode = episode.text if episode is not None else None
|
||||||
|
if season and episode:
|
||||||
|
# episode number includes season, so remove it
|
||||||
|
episode = re.sub(r'^%s' % season, '', episode)
|
||||||
|
|
||||||
# This a short id that's used in the webpage urls
|
# This a short id that's used in the webpage urls
|
||||||
mtvn_id = None
|
mtvn_id = None
|
||||||
mtvn_id_node = find_xpath_attr(itemdoc, './/{http://search.yahoo.com/mrss/}category',
|
mtvn_id_node = find_xpath_attr(itemdoc, './/{http://search.yahoo.com/mrss/}category',
|
||||||
@@ -203,6 +218,9 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
|||||||
'description': description,
|
'description': description,
|
||||||
'duration': float_or_none(content_el.attrib.get('duration')),
|
'duration': float_or_none(content_el.attrib.get('duration')),
|
||||||
'timestamp': timestamp,
|
'timestamp': timestamp,
|
||||||
|
'series': series,
|
||||||
|
'season_number': int_or_none(season),
|
||||||
|
'episode_number': int_or_none(episode),
|
||||||
}
|
}
|
||||||
|
|
||||||
def _get_feed_query(self, uri):
|
def _get_feed_query(self, uri):
|
||||||
@@ -211,9 +229,9 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
|||||||
data['lang'] = self._LANG
|
data['lang'] = self._LANG
|
||||||
return data
|
return data
|
||||||
|
|
||||||
def _get_videos_info(self, uri, use_hls=True, url=None):
|
def _get_videos_info(self, uri, use_hls=True):
|
||||||
video_id = self._id_from_uri(uri)
|
video_id = self._id_from_uri(uri)
|
||||||
feed_url = self._get_feed_url(uri, url)
|
feed_url = self._get_feed_url(uri)
|
||||||
info_url = update_url_query(feed_url, self._get_feed_query(uri))
|
info_url = update_url_query(feed_url, self._get_feed_query(uri))
|
||||||
return self._get_videos_info_from_url(info_url, video_id, use_hls)
|
return self._get_videos_info_from_url(info_url, video_id, use_hls)
|
||||||
|
|
||||||
@@ -257,43 +275,11 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _extract_child_with_type(parent, t):
|
def _extract_child_with_type(parent, t):
|
||||||
return next(c for c in parent['children'] if c.get('type') == t)
|
for c in parent['children']:
|
||||||
|
if c.get('type') == t:
|
||||||
|
return c
|
||||||
|
|
||||||
def _extract_new_triforce_mgid(self, webpage, url='', video_id=None):
|
def _extract_mgid(self, webpage):
|
||||||
if url == '':
|
|
||||||
return
|
|
||||||
domain = get_domain(url)
|
|
||||||
if domain is None:
|
|
||||||
raise ExtractorError(
|
|
||||||
'[%s] could not get domain' % self.IE_NAME,
|
|
||||||
expected=True)
|
|
||||||
url = url.replace("https://", "http://")
|
|
||||||
enc_url = compat_urlparse.quote(url, safe='')
|
|
||||||
_TRIFORCE_V8_TEMPLATE = 'https://%s/feeds/triforce/manifest/v8?url=%s'
|
|
||||||
triforce_manifest_url = _TRIFORCE_V8_TEMPLATE % (domain, enc_url)
|
|
||||||
|
|
||||||
manifest = self._download_json(triforce_manifest_url, video_id, fatal=False)
|
|
||||||
if manifest:
|
|
||||||
if manifest.get('manifest').get('type') == 'redirect':
|
|
||||||
self.to_screen('Found a redirect. Downloading manifest from new location')
|
|
||||||
new_loc = manifest.get('manifest').get('newLocation')
|
|
||||||
new_loc = new_loc.replace("https://", "http://")
|
|
||||||
enc_new_loc = compat_urlparse.quote(new_loc, safe='')
|
|
||||||
triforce_manifest_new_loc = _TRIFORCE_V8_TEMPLATE % (domain, enc_new_loc)
|
|
||||||
manifest = self._download_json(triforce_manifest_new_loc, video_id, fatal=False)
|
|
||||||
|
|
||||||
item_id = try_get(manifest, lambda x: x['manifest']['reporting']['itemId'], compat_str)
|
|
||||||
if not item_id:
|
|
||||||
self.to_screen('No id found!')
|
|
||||||
return
|
|
||||||
|
|
||||||
# 'episode' can be anything. 'content' is used often as well
|
|
||||||
_MGID_TEMPLATE = 'mgid:arc:episode:%s:%s'
|
|
||||||
mgid = _MGID_TEMPLATE % (domain, item_id)
|
|
||||||
|
|
||||||
return mgid
|
|
||||||
|
|
||||||
def _extract_mgid(self, webpage, url, title=None, data_zone=None):
|
|
||||||
try:
|
try:
|
||||||
# the url can be http://media.mtvnservices.com/fb/{mgid}.swf
|
# the url can be http://media.mtvnservices.com/fb/{mgid}.swf
|
||||||
# or http://media.mtvnservices.com/{mgid}
|
# or http://media.mtvnservices.com/{mgid}
|
||||||
@@ -304,21 +290,6 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
|||||||
except RegexNotFoundError:
|
except RegexNotFoundError:
|
||||||
mgid = None
|
mgid = None
|
||||||
|
|
||||||
if not title:
|
|
||||||
title = url_basename(url)
|
|
||||||
|
|
||||||
try:
|
|
||||||
window_data = self._parse_json(self._search_regex(
|
|
||||||
r'(?s)window.__DATA__ = (?P<json>{.+});', webpage,
|
|
||||||
'JSON Window Data', default=None, fatal=False, group='json'), title, fatal=False)
|
|
||||||
main_container = None
|
|
||||||
for i in range(len(window_data['children'])):
|
|
||||||
if window_data['children'][i]['type'] == 'MainContainer':
|
|
||||||
main_container = window_data['children'][i]
|
|
||||||
mgid = main_container['children'][0]['props']['media']['video']['config']['uri']
|
|
||||||
except (KeyError, IndexError, TypeError):
|
|
||||||
pass
|
|
||||||
|
|
||||||
if mgid is None or ':' not in mgid:
|
if mgid is None or ':' not in mgid:
|
||||||
mgid = self._search_regex(
|
mgid = self._search_regex(
|
||||||
[r'data-mgid="(.*?)"', r'swfobject\.embedSWF\(".*?(mgid:.*?)"'],
|
[r'data-mgid="(.*?)"', r'swfobject\.embedSWF\(".*?(mgid:.*?)"'],
|
||||||
@@ -331,16 +302,14 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
|||||||
r'embed/(mgid:.+?)["\'&?/]', sm4_embed, 'mgid', default=None)
|
r'embed/(mgid:.+?)["\'&?/]', sm4_embed, 'mgid', default=None)
|
||||||
|
|
||||||
if not mgid:
|
if not mgid:
|
||||||
mgid = self._extract_new_triforce_mgid(webpage, url)
|
mgid = self._extract_triforce_mgid(webpage)
|
||||||
|
|
||||||
if not mgid:
|
|
||||||
mgid = self._extract_triforce_mgid(webpage, data_zone)
|
|
||||||
|
|
||||||
if not mgid:
|
if not mgid:
|
||||||
data = self._parse_json(self._search_regex(
|
data = self._parse_json(self._search_regex(
|
||||||
r'__DATA__\s*=\s*({.+?});', webpage, 'data'), None)
|
r'__DATA__\s*=\s*({.+?});', webpage, 'data'), None)
|
||||||
main_container = self._extract_child_with_type(data, 'MainContainer')
|
main_container = self._extract_child_with_type(data, 'MainContainer')
|
||||||
video_player = self._extract_child_with_type(main_container, 'VideoPlayer')
|
ab_testing = self._extract_child_with_type(main_container, 'ABTesting')
|
||||||
|
video_player = self._extract_child_with_type(ab_testing or main_container, 'VideoPlayer')
|
||||||
mgid = video_player['props']['media']['video']['config']['uri']
|
mgid = video_player['props']['media']['video']['config']['uri']
|
||||||
|
|
||||||
return mgid
|
return mgid
|
||||||
@@ -348,8 +317,8 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
title = url_basename(url)
|
title = url_basename(url)
|
||||||
webpage = self._download_webpage(url, title)
|
webpage = self._download_webpage(url, title)
|
||||||
mgid = self._extract_mgid(webpage, url, title=title)
|
mgid = self._extract_mgid(webpage)
|
||||||
videos_info = self._get_videos_info(mgid, url=url)
|
videos_info = self._get_videos_info(mgid)
|
||||||
return videos_info
|
return videos_info
|
||||||
|
|
||||||
|
|
||||||
@@ -374,7 +343,7 @@ class MTVServicesEmbeddedIE(MTVServicesInfoExtractor):
|
|||||||
@staticmethod
|
@staticmethod
|
||||||
def _extract_url(webpage):
|
def _extract_url(webpage):
|
||||||
mobj = re.search(
|
mobj = re.search(
|
||||||
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//media.mtvnservices.com/embed/.+?)\1', webpage)
|
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//media\.mtvnservices\.com/embed/.+?)\1', webpage)
|
||||||
if mobj:
|
if mobj:
|
||||||
return mobj.group('url')
|
return mobj.group('url')
|
||||||
|
|
||||||
@@ -537,3 +506,152 @@ class MTVDEIE(MTVServicesInfoExtractor):
|
|||||||
'arcEp': 'mtv.de',
|
'arcEp': 'mtv.de',
|
||||||
'mgid': uri,
|
'mgid': uri,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class MTVItaliaIE(MTVServicesInfoExtractor):
|
||||||
|
IE_NAME = 'mtv.it'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?mtv\.it/(?:episodi|video|musica)/(?P<id>[0-9a-z]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://www.mtv.it/episodi/24bqab/mario-una-serie-di-maccio-capatonda-cavoli-amario-episodio-completo-S1-E1',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '0f0fc78e-45fc-4cce-8f24-971c25477530',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Cavoli amario (episodio completo)',
|
||||||
|
'description': 'md5:4962bccea8fed5b7c03b295ae1340660',
|
||||||
|
'series': 'Mario - Una Serie Di Maccio Capatonda',
|
||||||
|
'season_number': 1,
|
||||||
|
'episode_number': 1,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
_GEO_COUNTRIES = ['IT']
|
||||||
|
_FEED_URL = 'http://feeds.mtvnservices.com/od/feed/intl-mrss-player-feed'
|
||||||
|
|
||||||
|
def _get_feed_query(self, uri):
|
||||||
|
return {
|
||||||
|
'arcEp': 'mtv.it',
|
||||||
|
'mgid': uri,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class MTVItaliaProgrammaIE(MTVItaliaIE):
|
||||||
|
IE_NAME = 'mtv.it:programma'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?mtv\.it/(?:programmi|playlist)/(?P<id>[0-9a-z]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
# program page: general
|
||||||
|
'url': 'http://www.mtv.it/programmi/s2rppv/mario-una-serie-di-maccio-capatonda',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'a6f155bc-8220-4640-aa43-9b95f64ffa3d',
|
||||||
|
'title': 'Mario - Una Serie Di Maccio Capatonda',
|
||||||
|
'description': 'md5:72fbffe1f77ccf4e90757dd4e3216153',
|
||||||
|
},
|
||||||
|
'playlist_count': 2,
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# program page: specific season
|
||||||
|
'url': 'http://www.mtv.it/programmi/d9ncjf/mario-una-serie-di-maccio-capatonda-S2',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '4deeb5d8-f272-490c-bde2-ff8d261c6dd1',
|
||||||
|
'title': 'Mario - Una Serie Di Maccio Capatonda - Stagione 2',
|
||||||
|
},
|
||||||
|
'playlist_count': 34,
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# playlist page + redirect
|
||||||
|
'url': 'http://www.mtv.it/playlist/sexy-videos/ilctal',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'dee8f9ee-756d-493b-bf37-16d1d2783359',
|
||||||
|
'title': 'Sexy Videos',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 145,
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
_GEO_COUNTRIES = ['IT']
|
||||||
|
_FEED_URL = 'http://www.mtv.it/feeds/triforce/manifest/v8'
|
||||||
|
|
||||||
|
def _get_entries(self, title, url):
|
||||||
|
while True:
|
||||||
|
pg = self._search_regex(r'/(\d+)$', url, 'entries', '1')
|
||||||
|
entries = self._download_json(url, title, 'page %s' % pg)
|
||||||
|
url = try_get(
|
||||||
|
entries, lambda x: x['result']['nextPageURL'], compat_str)
|
||||||
|
entries = try_get(
|
||||||
|
entries, (
|
||||||
|
lambda x: x['result']['data']['items'],
|
||||||
|
lambda x: x['result']['data']['seasons']),
|
||||||
|
list)
|
||||||
|
for entry in entries or []:
|
||||||
|
if entry.get('canonicalURL'):
|
||||||
|
yield self.url_result(entry['canonicalURL'])
|
||||||
|
if not url:
|
||||||
|
break
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
query = {'url': url}
|
||||||
|
info_url = update_url_query(self._FEED_URL, query)
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
info = self._download_json(info_url, video_id).get('manifest')
|
||||||
|
|
||||||
|
redirect = try_get(
|
||||||
|
info, lambda x: x['newLocation']['url'], compat_str)
|
||||||
|
if redirect:
|
||||||
|
return self.url_result(redirect)
|
||||||
|
|
||||||
|
title = info.get('title')
|
||||||
|
video_id = try_get(
|
||||||
|
info, lambda x: x['reporting']['itemId'], compat_str)
|
||||||
|
parent_id = try_get(
|
||||||
|
info, lambda x: x['reporting']['parentId'], compat_str)
|
||||||
|
|
||||||
|
playlist_url = current_url = None
|
||||||
|
for z in (info.get('zones') or {}).values():
|
||||||
|
if z.get('moduleName') in ('INTL_M304', 'INTL_M209'):
|
||||||
|
info_url = z.get('feed')
|
||||||
|
if z.get('moduleName') in ('INTL_M308', 'INTL_M317'):
|
||||||
|
playlist_url = playlist_url or z.get('feed')
|
||||||
|
if z.get('moduleName') in ('INTL_M300',):
|
||||||
|
current_url = current_url or z.get('feed')
|
||||||
|
|
||||||
|
if not info_url:
|
||||||
|
raise ExtractorError('No info found')
|
||||||
|
|
||||||
|
if video_id == parent_id:
|
||||||
|
video_id = self._search_regex(
|
||||||
|
r'([^\/]+)/[^\/]+$', info_url, 'video_id')
|
||||||
|
|
||||||
|
info = self._download_json(info_url, video_id, 'Show infos')
|
||||||
|
info = try_get(info, lambda x: x['result']['data'], dict)
|
||||||
|
title = title or try_get(
|
||||||
|
info, (
|
||||||
|
lambda x: x['title'],
|
||||||
|
lambda x: x['headline']),
|
||||||
|
compat_str)
|
||||||
|
description = try_get(info, lambda x: x['content'], compat_str)
|
||||||
|
|
||||||
|
if current_url:
|
||||||
|
season = try_get(
|
||||||
|
self._download_json(playlist_url, video_id, 'Seasons info'),
|
||||||
|
lambda x: x['result']['data'], dict)
|
||||||
|
current = try_get(
|
||||||
|
season, lambda x: x['currentSeason'], compat_str)
|
||||||
|
seasons = try_get(
|
||||||
|
season, lambda x: x['seasons'], list) or []
|
||||||
|
|
||||||
|
if current in [s.get('eTitle') for s in seasons]:
|
||||||
|
playlist_url = current_url
|
||||||
|
|
||||||
|
title = re.sub(
|
||||||
|
r'[-|]\s*(?:mtv\s*italia|programma|playlist)',
|
||||||
|
'', title, flags=re.IGNORECASE).strip()
|
||||||
|
|
||||||
|
return self.playlist_result(
|
||||||
|
self._get_entries(title, playlist_url),
|
||||||
|
video_id, title, description)
|
||||||
|
|||||||
127
yt_dlp/extractor/mxplayer.py
Normal file
127
yt_dlp/extractor/mxplayer.py
Normal file
@@ -0,0 +1,127 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
js_to_json,
|
||||||
|
qualities,
|
||||||
|
try_get,
|
||||||
|
url_or_none,
|
||||||
|
urljoin,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class MxplayerIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?mxplayer\.in/(?:show|movie)/(?:(?P<display_id>[-/a-z0-9]+)-)?(?P<id>[a-z0-9]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.mxplayer.in/movie/watch-knock-knock-hindi-dubbed-movie-online-b9fa28df3bfb8758874735bbd7d2655a?watch=true',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'b9fa28df3bfb8758874735bbd7d2655a',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Knock Knock (Hindi Dubbed)',
|
||||||
|
'description': 'md5:b195ba93ff1987309cfa58e2839d2a5b'
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
'format': 'bestvideo'
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.mxplayer.in/show/watch-shaitaan/season-1/the-infamous-taxi-gang-of-meerut-online-45055d5bcff169ad48f2ad7552a83d6c',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '45055d5bcff169ad48f2ad7552a83d6c',
|
||||||
|
'ext': 'm3u8',
|
||||||
|
'title': 'The infamous taxi gang of Meerut',
|
||||||
|
'description': 'md5:033a0a7e3fd147be4fb7e07a01a3dc28',
|
||||||
|
'season': 'Season 1',
|
||||||
|
'series': 'Shaitaan'
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.mxplayer.in/show/watch-aashram/chapter-1/duh-swapna-online-d445579792b0135598ba1bc9088a84cb',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'd445579792b0135598ba1bc9088a84cb',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Duh Swapna',
|
||||||
|
'description': 'md5:35ff39c4bdac403c53be1e16a04192d8',
|
||||||
|
'season': 'Chapter 1',
|
||||||
|
'series': 'Aashram'
|
||||||
|
},
|
||||||
|
'expected_warnings': ['Unknown MIME type application/mp4 in DASH manifest'],
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
'format': 'bestvideo'
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _get_stream_urls(self, video_dict):
|
||||||
|
stream_provider_dict = try_get(
|
||||||
|
video_dict,
|
||||||
|
lambda x: x['stream'][x['stream']['provider']])
|
||||||
|
if not stream_provider_dict:
|
||||||
|
raise ExtractorError('No stream provider found', expected=True)
|
||||||
|
|
||||||
|
for stream_name, stream in stream_provider_dict.items():
|
||||||
|
if stream_name in ('hls', 'dash', 'hlsUrl', 'dashUrl'):
|
||||||
|
stream_type = stream_name.replace('Url', '')
|
||||||
|
if isinstance(stream, dict):
|
||||||
|
for quality, stream_url in stream.items():
|
||||||
|
if stream_url:
|
||||||
|
yield stream_type, quality, stream_url
|
||||||
|
else:
|
||||||
|
yield stream_type, 'base', stream
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id, video_id = re.match(self._VALID_URL, url).groups()
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
source = self._parse_json(
|
||||||
|
js_to_json(self._html_search_regex(
|
||||||
|
r'(?s)<script>window\.state\s*[:=]\s(\{.+\})\n(\w+).*(</script>).*',
|
||||||
|
webpage, 'WindowState')),
|
||||||
|
video_id)
|
||||||
|
if not source:
|
||||||
|
raise ExtractorError('Cannot find source', expected=True)
|
||||||
|
|
||||||
|
config_dict = source['config']
|
||||||
|
video_dict = source['entities'][video_id]
|
||||||
|
|
||||||
|
thumbnails = []
|
||||||
|
for i in video_dict.get('imageInfo') or []:
|
||||||
|
thumbnails.append({
|
||||||
|
'url': urljoin(config_dict['imageBaseUrl'], i['url']),
|
||||||
|
'width': i['width'],
|
||||||
|
'height': i['height'],
|
||||||
|
})
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
get_quality = qualities(['main', 'base', 'high'])
|
||||||
|
for stream_type, quality, stream_url in self._get_stream_urls(video_dict):
|
||||||
|
format_url = url_or_none(urljoin(config_dict['videoCdnBaseUrl'], stream_url))
|
||||||
|
if not format_url:
|
||||||
|
continue
|
||||||
|
if stream_type == 'dash':
|
||||||
|
dash_formats = self._extract_mpd_formats(
|
||||||
|
format_url, video_id, mpd_id='dash-%s' % quality, headers={'Referer': url})
|
||||||
|
for frmt in dash_formats:
|
||||||
|
frmt['quality'] = get_quality(quality)
|
||||||
|
formats.extend(dash_formats)
|
||||||
|
elif stream_type == 'hls':
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
format_url, video_id, fatal=False,
|
||||||
|
m3u8_id='hls-%s' % quality, quality=get_quality(quality)))
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'display_id': display_id.replace('/', '-'),
|
||||||
|
'title': video_dict['title'] or self._og_search_title(webpage),
|
||||||
|
'formats': formats,
|
||||||
|
'description': video_dict.get('description'),
|
||||||
|
'season': try_get(video_dict, lambda x: x['container']['title']),
|
||||||
|
'series': try_get(video_dict, lambda x: x['container']['container']['title']),
|
||||||
|
'thumbnails': thumbnails,
|
||||||
|
}
|
||||||
@@ -10,6 +10,7 @@ from .adobepass import AdobePassIE
|
|||||||
from ..compat import compat_urllib_parse_unquote
|
from ..compat import compat_urllib_parse_unquote
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
parse_age_limit,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
try_get,
|
try_get,
|
||||||
@@ -18,7 +19,7 @@ from ..utils import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class NBCIE(AdobePassIE):
|
class NBCIE(ThePlatformIE):
|
||||||
_VALID_URL = r'https?(?P<permalink>://(?:www\.)?nbc\.com/(?:classic-tv/)?[^/]+/video/[^/]+/(?P<id>n?\d+))'
|
_VALID_URL = r'https?(?P<permalink>://(?:www\.)?nbc\.com/(?:classic-tv/)?[^/]+/video/[^/]+/(?P<id>n?\d+))'
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
@@ -132,7 +133,9 @@ class NBCIE(AdobePassIE):
|
|||||||
'manifest': 'm3u',
|
'manifest': 'm3u',
|
||||||
}
|
}
|
||||||
video_id = video_data['mpxGuid']
|
video_id = video_data['mpxGuid']
|
||||||
title = video_data['secondaryTitle']
|
tp_path = 'NnzsPC/media/guid/%s/%s' % (video_data.get('mpxAccountId') or '2410887629', video_id)
|
||||||
|
tpm = self._download_theplatform_metadata(tp_path, video_id)
|
||||||
|
title = tpm.get('title') or video_data.get('secondaryTitle')
|
||||||
if video_data.get('locked'):
|
if video_data.get('locked'):
|
||||||
resource = self._get_mvpd_resource(
|
resource = self._get_mvpd_resource(
|
||||||
video_data.get('resourceId') or 'nbcentertainment',
|
video_data.get('resourceId') or 'nbcentertainment',
|
||||||
@@ -142,18 +145,40 @@ class NBCIE(AdobePassIE):
|
|||||||
theplatform_url = smuggle_url(update_url_query(
|
theplatform_url = smuggle_url(update_url_query(
|
||||||
'http://link.theplatform.com/s/NnzsPC/media/guid/%s/%s' % (video_data.get('mpxAccountId') or '2410887629', video_id),
|
'http://link.theplatform.com/s/NnzsPC/media/guid/%s/%s' % (video_data.get('mpxAccountId') or '2410887629', video_id),
|
||||||
query), {'force_smil_url': True})
|
query), {'force_smil_url': True})
|
||||||
|
|
||||||
|
# Empty string or 0 can be valid values for these. So the check must be `is None`
|
||||||
|
description = video_data.get('description')
|
||||||
|
if description is None:
|
||||||
|
description = tpm.get('description')
|
||||||
|
episode_number = int_or_none(video_data.get('episodeNumber'))
|
||||||
|
if episode_number is None:
|
||||||
|
episode_number = int_or_none(tpm.get('nbcu$airOrder'))
|
||||||
|
rating = video_data.get('rating')
|
||||||
|
if rating is None:
|
||||||
|
try_get(tpm, lambda x: x['ratings'][0]['rating'])
|
||||||
|
season_number = int_or_none(video_data.get('seasonNumber'))
|
||||||
|
if season_number is None:
|
||||||
|
season_number = int_or_none(tpm.get('nbcu$seasonNumber'))
|
||||||
|
series = video_data.get('seriesShortTitle')
|
||||||
|
if series is None:
|
||||||
|
series = tpm.get('nbcu$seriesShortTitle')
|
||||||
|
tags = video_data.get('keywords')
|
||||||
|
if tags is None or len(tags) == 0:
|
||||||
|
tags = tpm.get('keywords')
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'_type': 'url_transparent',
|
'_type': 'url_transparent',
|
||||||
|
'age_limit': parse_age_limit(rating),
|
||||||
|
'description': description,
|
||||||
|
'episode': title,
|
||||||
|
'episode_number': episode_number,
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
'ie_key': 'ThePlatform',
|
||||||
|
'season_number': season_number,
|
||||||
|
'series': series,
|
||||||
|
'tags': tags,
|
||||||
'title': title,
|
'title': title,
|
||||||
'url': theplatform_url,
|
'url': theplatform_url,
|
||||||
'description': video_data.get('description'),
|
|
||||||
'tags': video_data.get('keywords'),
|
|
||||||
'season_number': int_or_none(video_data.get('seasonNumber')),
|
|
||||||
'episode_number': int_or_none(video_data.get('episodeNumber')),
|
|
||||||
'episode': title,
|
|
||||||
'series': video_data.get('seriesShortTitle'),
|
|
||||||
'ie_key': 'ThePlatform',
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
197
yt_dlp/extractor/nebula.py
Normal file
197
yt_dlp/extractor/nebula.py
Normal file
@@ -0,0 +1,197 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import json
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
parse_iso8601,
|
||||||
|
try_get,
|
||||||
|
urljoin,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class NebulaIE(InfoExtractor):
|
||||||
|
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?watchnebula\.com/videos/(?P<id>[-\w]+)'
|
||||||
|
_TESTS = [
|
||||||
|
{
|
||||||
|
'url': 'https://watchnebula.com/videos/that-time-disney-remade-beauty-and-the-beast',
|
||||||
|
'md5': 'fe79c4df8b3aa2fea98a93d027465c7e',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '5c271b40b13fd613090034fd',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'That Time Disney Remade Beauty and the Beast',
|
||||||
|
'description': 'Note: this video was originally posted on YouTube with the sponsor read included. We weren’t able to remove it without reducing video quality, so it’s presented here in its original context.',
|
||||||
|
'upload_date': '20180731',
|
||||||
|
'timestamp': 1533009600,
|
||||||
|
'channel': 'Lindsay Ellis',
|
||||||
|
'uploader': 'Lindsay Ellis',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'usenetrc': True,
|
||||||
|
},
|
||||||
|
'skip': 'All Nebula content requires authentication',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'https://watchnebula.com/videos/the-logistics-of-d-day-landing-craft-how-the-allies-got-ashore',
|
||||||
|
'md5': '6d4edd14ce65720fa63aba5c583fb328',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '5e7e78171aaf320001fbd6be',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Landing Craft - How The Allies Got Ashore',
|
||||||
|
'description': r're:^In this episode we explore the unsung heroes of D-Day, the landing craft.',
|
||||||
|
'upload_date': '20200327',
|
||||||
|
'timestamp': 1585348140,
|
||||||
|
'channel': 'The Logistics of D-Day',
|
||||||
|
'uploader': 'The Logistics of D-Day',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'usenetrc': True,
|
||||||
|
},
|
||||||
|
'skip': 'All Nebula content requires authentication',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'https://watchnebula.com/videos/money-episode-1-the-draw',
|
||||||
|
'md5': '8c7d272910eea320f6f8e6d3084eecf5',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '5e779ebdd157bc0001d1c75a',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Episode 1: The Draw',
|
||||||
|
'description': r'contains:There’s free money on offer… if the players can all work together.',
|
||||||
|
'upload_date': '20200323',
|
||||||
|
'timestamp': 1584980400,
|
||||||
|
'channel': 'Tom Scott Presents: Money',
|
||||||
|
'uploader': 'Tom Scott Presents: Money',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'usenetrc': True,
|
||||||
|
},
|
||||||
|
'skip': 'All Nebula content requires authentication',
|
||||||
|
},
|
||||||
|
]
|
||||||
|
_NETRC_MACHINE = 'watchnebula'
|
||||||
|
|
||||||
|
def _retrieve_nebula_auth(self, video_id):
|
||||||
|
"""
|
||||||
|
Log in to Nebula, and returns a Nebula API token
|
||||||
|
"""
|
||||||
|
|
||||||
|
username, password = self._get_login_info()
|
||||||
|
if not (username and password):
|
||||||
|
self.raise_login_required()
|
||||||
|
|
||||||
|
self.report_login()
|
||||||
|
data = json.dumps({'email': username, 'password': password}).encode('utf8')
|
||||||
|
response = self._download_json(
|
||||||
|
'https://api.watchnebula.com/api/v1/auth/login/',
|
||||||
|
data=data, fatal=False, video_id=video_id,
|
||||||
|
headers={
|
||||||
|
'content-type': 'application/json',
|
||||||
|
# Submitting the 'sessionid' cookie always causes a 403 on auth endpoint
|
||||||
|
'cookie': ''
|
||||||
|
},
|
||||||
|
note='Authenticating to Nebula with supplied credentials',
|
||||||
|
errnote='Authentication failed or rejected')
|
||||||
|
if not response or not response.get('key'):
|
||||||
|
self.raise_login_required()
|
||||||
|
return response['key']
|
||||||
|
|
||||||
|
def _retrieve_zype_api_key(self, page_url, display_id):
|
||||||
|
"""
|
||||||
|
Retrieves the Zype API key
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Find the js that has the API key from the webpage and download it
|
||||||
|
webpage = self._download_webpage(page_url, video_id=display_id)
|
||||||
|
main_script_relpath = self._search_regex(
|
||||||
|
r'<script[^>]*src="(?P<script_relpath>[^"]*main.[0-9a-f]*.chunk.js)"[^>]*>', webpage,
|
||||||
|
group='script_relpath', name='script relative path', fatal=True)
|
||||||
|
main_script_abspath = urljoin(page_url, main_script_relpath)
|
||||||
|
main_script = self._download_webpage(main_script_abspath, video_id=display_id,
|
||||||
|
note='Retrieving Zype API key')
|
||||||
|
|
||||||
|
api_key = self._search_regex(
|
||||||
|
r'REACT_APP_ZYPE_API_KEY\s*:\s*"(?P<api_key>[\w-]*)"', main_script,
|
||||||
|
group='api_key', name='API key', fatal=True)
|
||||||
|
|
||||||
|
return api_key
|
||||||
|
|
||||||
|
def _call_zype_api(self, path, params, video_id, api_key, note):
|
||||||
|
"""
|
||||||
|
A helper for making calls to the Zype API.
|
||||||
|
"""
|
||||||
|
query = {'api_key': api_key, 'per_page': 1}
|
||||||
|
query.update(params)
|
||||||
|
return self._download_json('https://api.zype.com' + path, video_id, query=query, note=note)
|
||||||
|
|
||||||
|
def _call_nebula_api(self, path, video_id, access_token, note):
|
||||||
|
"""
|
||||||
|
A helper for making calls to the Nebula API.
|
||||||
|
"""
|
||||||
|
return self._download_json('https://api.watchnebula.com/api/v1' + path, video_id, headers={
|
||||||
|
'Authorization': 'Token {access_token}'.format(access_token=access_token)
|
||||||
|
}, note=note)
|
||||||
|
|
||||||
|
def _fetch_zype_access_token(self, video_id, nebula_token):
|
||||||
|
user_object = self._call_nebula_api('/auth/user/', video_id, nebula_token, note='Retrieving Zype access token')
|
||||||
|
access_token = try_get(user_object, lambda x: x['zype_auth_info']['access_token'], compat_str)
|
||||||
|
if not access_token:
|
||||||
|
if try_get(user_object, lambda x: x['is_subscribed'], bool):
|
||||||
|
# TODO: Reimplement the same Zype token polling the Nebula frontend implements
|
||||||
|
# see https://github.com/ytdl-org/youtube-dl/pull/24805#issuecomment-749231532
|
||||||
|
raise ExtractorError(
|
||||||
|
'Unable to extract Zype access token from Nebula API authentication endpoint. '
|
||||||
|
'Open an arbitrary video in a browser with this account to generate a token',
|
||||||
|
expected=True)
|
||||||
|
raise ExtractorError('Unable to extract Zype access token from Nebula API authentication endpoint')
|
||||||
|
return access_token
|
||||||
|
|
||||||
|
def _extract_channel_title(self, video_meta):
|
||||||
|
# TODO: Implement the API calls giving us the channel list,
|
||||||
|
# so that we can do the title lookup and then figure out the channel URL
|
||||||
|
categories = video_meta.get('categories', []) if video_meta else []
|
||||||
|
# the channel name is the value of the first category
|
||||||
|
for category in categories:
|
||||||
|
if category.get('value'):
|
||||||
|
return category['value'][0]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
nebula_token = self._retrieve_nebula_auth(display_id)
|
||||||
|
api_key = self._retrieve_zype_api_key(url, display_id)
|
||||||
|
|
||||||
|
response = self._call_zype_api('/videos', {'friendly_title': display_id},
|
||||||
|
display_id, api_key, note='Retrieving metadata from Zype')
|
||||||
|
if len(response.get('response') or []) != 1:
|
||||||
|
raise ExtractorError('Unable to find video on Zype API')
|
||||||
|
video_meta = response['response'][0]
|
||||||
|
|
||||||
|
video_id = video_meta['_id']
|
||||||
|
zype_access_token = self._fetch_zype_access_token(display_id, nebula_token=nebula_token)
|
||||||
|
|
||||||
|
channel_title = self._extract_channel_title(video_meta)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'display_id': display_id,
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'ie_key': 'Zype',
|
||||||
|
'url': 'https://player.zype.com/embed/%s.html?access_token=%s' % (video_id, zype_access_token),
|
||||||
|
'title': video_meta.get('title'),
|
||||||
|
'description': video_meta.get('description'),
|
||||||
|
'timestamp': parse_iso8601(video_meta.get('published_at')),
|
||||||
|
'thumbnails': [
|
||||||
|
{
|
||||||
|
'id': tn.get('name'), # this appears to be null
|
||||||
|
'url': tn['url'],
|
||||||
|
'width': tn.get('width'),
|
||||||
|
'height': tn.get('height'),
|
||||||
|
} for tn in video_meta.get('thumbnails', [])],
|
||||||
|
'duration': video_meta.get('duration'),
|
||||||
|
'channel': channel_title,
|
||||||
|
'uploader': channel_title, # we chose uploader = channel name
|
||||||
|
# TODO: uploader_url, channel_id, channel_url
|
||||||
|
}
|
||||||
144
yt_dlp/extractor/nfhsnetwork.py
Normal file
144
yt_dlp/extractor/nfhsnetwork.py
Normal file
@@ -0,0 +1,144 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
from ..utils import (
|
||||||
|
try_get,
|
||||||
|
unified_strdate,
|
||||||
|
unified_timestamp
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class NFHSNetworkIE(InfoExtractor):
|
||||||
|
IE_NAME = 'NFHSNetwork'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?nfhsnetwork\.com/events/[\w-]+/(?P<id>(?:gam|evt|dd|)?[\w\d]{0,10})'
|
||||||
|
_TESTS = [{
|
||||||
|
# Auto-generated two-team sport (pixellot)
|
||||||
|
'url': 'https://www.nfhsnetwork.com/events/rockford-high-school-rockford-mi/gamcf7e54cfbc',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'gamcf7e54cfbc',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Rockford vs Spring Lake - Girls Varsity Lacrosse 03/27/2021',
|
||||||
|
'uploader': 'MHSAA - Michigan: Rockford High School, Rockford, MI',
|
||||||
|
'uploader_id': 'cd2622cf76',
|
||||||
|
'uploader_url': 'https://www.nfhsnetwork.com/schools/rockford-high-school-rockford-mi',
|
||||||
|
'location': 'Rockford, Michigan',
|
||||||
|
'timestamp': 1616859000,
|
||||||
|
'upload_date': '20210327'
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
# Non-sport activity with description
|
||||||
|
'url': 'https://www.nfhsnetwork.com/events/limon-high-school-limon-co/evt4a30e3726c',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'evt4a30e3726c',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Drama Performance Limon High School vs. Limon High School - 12/13/2020',
|
||||||
|
'description': 'Join the broadcast of the Limon High School Musical Performance at 2 PM.',
|
||||||
|
'uploader': 'CHSAA: Limon High School, Limon, CO',
|
||||||
|
'uploader_id': '7d2d121332',
|
||||||
|
'uploader_url': 'https://www.nfhsnetwork.com/schools/limon-high-school-limon-co',
|
||||||
|
'location': 'Limon, Colorado',
|
||||||
|
'timestamp': 1607893200,
|
||||||
|
'upload_date': '20201213'
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
# Postseason game
|
||||||
|
'url': 'https://www.nfhsnetwork.com/events/nfhs-network-special-events/dd8de71d45',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'dd8de71d45',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '2015 UA Holiday Classic Tournament: National Division - 12/26/2015',
|
||||||
|
'uploader': 'SoCal Sports Productions',
|
||||||
|
'uploader_id': '063dba0150',
|
||||||
|
'uploader_url': 'https://www.nfhsnetwork.com/affiliates/socal-sports-productions',
|
||||||
|
'location': 'San Diego, California',
|
||||||
|
'timestamp': 1451187000,
|
||||||
|
'upload_date': '20151226'
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
# Video with no broadcasts object
|
||||||
|
'url': 'https://www.nfhsnetwork.com/events/wiaa-wi/9aa2f92f82',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '9aa2f92f82',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Competitive Equity - 01/21/2015',
|
||||||
|
'description': 'Committee members discuss points of their research regarding a competitive equity plan',
|
||||||
|
'uploader': 'WIAA - Wisconsin: Wisconsin Interscholastic Athletic Association',
|
||||||
|
'uploader_id': 'a49f7d1002',
|
||||||
|
'uploader_url': 'https://www.nfhsnetwork.com/associations/wiaa-wi',
|
||||||
|
'location': 'Stevens Point, Wisconsin',
|
||||||
|
'timestamp': 1421856000,
|
||||||
|
'upload_date': '20150121'
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
data = self._download_json(
|
||||||
|
'https://cfunity.nfhsnetwork.com/v2/game_or_event/' + video_id,
|
||||||
|
video_id)
|
||||||
|
publisher = data.get('publishers')[0] # always exists
|
||||||
|
broadcast = (publisher.get('broadcasts') or publisher.get('vods'))[0] # some (older) videos don't have a broadcasts object
|
||||||
|
uploader = publisher.get('formatted_name') or publisher.get('name')
|
||||||
|
uploaderID = publisher.get('publisher_key')
|
||||||
|
pubType = publisher.get('type')
|
||||||
|
uploaderPrefix = (
|
||||||
|
"schools" if pubType == "school"
|
||||||
|
else "associations" if "association" in pubType
|
||||||
|
else "affiliates" if (pubType == "publisher" or pubType == "affiliate")
|
||||||
|
else "schools")
|
||||||
|
uploaderPage = 'https://www.nfhsnetwork.com/%s/%s' % (uploaderPrefix, publisher.get('slug'))
|
||||||
|
location = '%s, %s' % (data.get('city'), data.get('state_name'))
|
||||||
|
description = broadcast.get('description')
|
||||||
|
isLive = broadcast.get('on_air') or broadcast.get('status') == 'on_air' or False
|
||||||
|
|
||||||
|
timestamp = unified_timestamp(data.get('local_start_time'))
|
||||||
|
upload_date = unified_strdate(data.get('local_start_time'))
|
||||||
|
|
||||||
|
title = (
|
||||||
|
self._og_search_title(webpage)
|
||||||
|
or self._html_search_regex(r'<h1 class="sr-hidden">(.*?)</h1>', webpage, 'title'))
|
||||||
|
title = title.split('|')[0].strip()
|
||||||
|
|
||||||
|
video_type = 'broadcasts' if isLive else 'vods'
|
||||||
|
key = broadcast.get('key') if isLive else try_get(publisher, lambda x: x['vods'][0]['key'])
|
||||||
|
m3u8_url = self._download_json(
|
||||||
|
'https://cfunity.nfhsnetwork.com/v2/%s/%s/url' % (video_type, key),
|
||||||
|
video_id).get('video_url')
|
||||||
|
|
||||||
|
formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', live=isLive)
|
||||||
|
self._sort_formats(formats, ['res', 'tbr'])
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'formats': formats,
|
||||||
|
'description': description,
|
||||||
|
'timestamp': timestamp,
|
||||||
|
'uploader': uploader,
|
||||||
|
'uploader_id': uploaderID,
|
||||||
|
'uploader_url': uploaderPage,
|
||||||
|
'location': location,
|
||||||
|
'upload_date': upload_date,
|
||||||
|
'is_live': isLive
|
||||||
|
}
|
||||||
@@ -8,59 +8,66 @@ from ..utils import update_url_query
|
|||||||
|
|
||||||
|
|
||||||
class NickIE(MTVServicesInfoExtractor):
|
class NickIE(MTVServicesInfoExtractor):
|
||||||
# None of videos on the website are still alive?
|
|
||||||
IE_NAME = 'nick.com'
|
IE_NAME = 'nick.com'
|
||||||
_VALID_URL = r'https?://(?P<domain>(?:(?:www|beta)\.)?nick(?:jr)?\.com)/(?:[^/]+/)?(?:videos/clip|[^/]+/videos)/(?P<id>[^/?#.]+)'
|
_VALID_URL = r'https?://(?P<domain>(?:www\.)?nick(?:jr)?\.com)/(?:[^/]+/)?(?P<type>videos/clip|[^/]+/videos|episodes/[^/]+)/(?P<id>[^/?#.]+)'
|
||||||
_FEED_URL = 'http://udat.mtvnservices.com/service1/dispatch.htm'
|
_FEED_URL = 'http://udat.mtvnservices.com/service1/dispatch.htm'
|
||||||
_GEO_COUNTRIES = ['US']
|
_GEO_COUNTRIES = ['US']
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.nick.com/videos/clip/alvinnn-and-the-chipmunks-112-full-episode.html',
|
'url': 'https://www.nick.com/episodes/sq47rw/spongebob-squarepants-a-place-for-pets-lockdown-for-love-season-13-ep-1',
|
||||||
|
'info_dict': {
|
||||||
|
'description': 'md5:0650a9eb88955609d5c1d1c79292e234',
|
||||||
|
'title': 'A Place for Pets/Lockdown for Love',
|
||||||
|
},
|
||||||
'playlist': [
|
'playlist': [
|
||||||
{
|
{
|
||||||
'md5': '6e5adc1e28253bbb1b28ab05403dd4d4',
|
'md5': 'cb8a2afeafb7ae154aca5a64815ec9d6',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'be6a17b0-412d-11e5-8ff7-0026b9414f30',
|
'id': '85ee8177-d6ce-48f8-9eee-a65364f8a6df',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'ALVINNN!!! and The Chipmunks: "Mojo Missing/Who\'s The Animal" S1',
|
'title': 'SpongeBob SquarePants: "A Place for Pets/Lockdown for Love" S1',
|
||||||
'description': 'Alvin is convinced his mojo was in a cap he gave to a fan, and must find a way to get his hat back before the Chipmunks’ big concert.\nDuring a costume visit to the zoo, Alvin finds himself mistaken for the real Tasmanian devil.',
|
'description': 'A Place for Pets/Lockdown for Love: When customers bring pets into the Krusty Krab, Mr. Krabs realizes pets are more profitable than owners. Plankton ruins another date with Karen, so she puts the Chum Bucket on lockdown until he proves his affection.',
|
||||||
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'md5': 'd7be441fc53a1d4882fa9508a1e5b3ce',
|
'md5': '839a04f49900a1fcbf517020d94e0737',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'be6b8f96-412d-11e5-8ff7-0026b9414f30',
|
'id': '2e2a9960-8fd4-411d-868b-28eb1beb7fae',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'ALVINNN!!! and The Chipmunks: "Mojo Missing/Who\'s The Animal" S2',
|
'title': 'SpongeBob SquarePants: "A Place for Pets/Lockdown for Love" S2',
|
||||||
'description': 'Alvin is convinced his mojo was in a cap he gave to a fan, and must find a way to get his hat back before the Chipmunks’ big concert.\nDuring a costume visit to the zoo, Alvin finds himself mistaken for the real Tasmanian devil.',
|
'description': 'A Place for Pets/Lockdown for Love: When customers bring pets into the Krusty Krab, Mr. Krabs realizes pets are more profitable than owners. Plankton ruins another date with Karen, so she puts the Chum Bucket on lockdown until he proves his affection.',
|
||||||
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'md5': 'efffe1728a234b2b0d2f2b343dd1946f',
|
'md5': 'f1145699f199770e2919ee8646955d46',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'be6cf7e6-412d-11e5-8ff7-0026b9414f30',
|
'id': 'dc91c304-6876-40f7-84a6-7aece7baa9d0',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'ALVINNN!!! and The Chipmunks: "Mojo Missing/Who\'s The Animal" S3',
|
'title': 'SpongeBob SquarePants: "A Place for Pets/Lockdown for Love" S3',
|
||||||
'description': 'Alvin is convinced his mojo was in a cap he gave to a fan, and must find a way to get his hat back before the Chipmunks’ big concert.\nDuring a costume visit to the zoo, Alvin finds himself mistaken for the real Tasmanian devil.',
|
'description': 'A Place for Pets/Lockdown for Love: When customers bring pets into the Krusty Krab, Mr. Krabs realizes pets are more profitable than owners. Plankton ruins another date with Karen, so she puts the Chum Bucket on lockdown until he proves his affection.',
|
||||||
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'md5': '1ec6690733ab9f41709e274a1d5c7556',
|
'md5': 'd463116875aee2585ee58de3b12caebd',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'be6e3354-412d-11e5-8ff7-0026b9414f30',
|
'id': '5d929486-cf4c-42a1-889a-6e0d183a101a',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'ALVINNN!!! and The Chipmunks: "Mojo Missing/Who\'s The Animal" S4',
|
'title': 'SpongeBob SquarePants: "A Place for Pets/Lockdown for Love" S4',
|
||||||
'description': 'Alvin is convinced his mojo was in a cap he gave to a fan, and must find a way to get his hat back before the Chipmunks’ big concert.\nDuring a costume visit to the zoo, Alvin finds himself mistaken for the real Tasmanian devil.',
|
'description': 'A Place for Pets/Lockdown for Love: When customers bring pets into the Krusty Krab, Mr. Krabs realizes pets are more profitable than owners. Plankton ruins another date with Karen, so she puts the Chum Bucket on lockdown until he proves his affection.',
|
||||||
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.nickjr.com/paw-patrol/videos/pups-save-a-goldrush-s3-ep302-full-episode/',
|
'url': 'http://www.nickjr.com/blues-clues-and-you/videos/blues-clues-and-you-original-209-imagination-station/',
|
||||||
'only_matching': True,
|
'info_dict': {
|
||||||
}, {
|
'id': '31631529-2fc5-430b-b2ef-6a74b4609abd',
|
||||||
'url': 'http://beta.nick.com/nicky-ricky-dicky-and-dawn/videos/nicky-ricky-dicky-dawn-301-full-episode/',
|
'ext': 'mp4',
|
||||||
'only_matching': True,
|
'description': 'md5:9d65a66df38e02254852794b2809d1cf',
|
||||||
|
'title': 'Blue\'s Imagination Station',
|
||||||
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _get_feed_query(self, uri):
|
def _get_feed_query(self, uri):
|
||||||
@@ -69,8 +76,14 @@ class NickIE(MTVServicesInfoExtractor):
|
|||||||
'mgid': uri,
|
'mgid': uri,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def _extract_mgid(self, webpage):
|
||||||
|
mgid = self._search_regex(r'"media":{"video":{"config":{"uri":"(mgid:.*?)"', webpage, 'mgid', default=None)
|
||||||
|
return mgid
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
domain, display_id = re.match(self._VALID_URL, url).groups()
|
domain, video_type, display_id = re.match(self._VALID_URL, url).groups()
|
||||||
|
if video_type.startswith("episodes"):
|
||||||
|
return super()._real_extract(url)
|
||||||
video_data = self._download_json(
|
video_data = self._download_json(
|
||||||
'http://%s/data/video.endLevel.json' % domain,
|
'http://%s/data/video.endLevel.json' % domain,
|
||||||
display_id, query={
|
display_id, query={
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ import datetime
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..postprocessor.ffmpeg import FFmpegPostProcessor
|
from ..postprocessor.ffmpeg import FFmpegPostProcessor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
|
compat_str,
|
||||||
compat_parse_qs,
|
compat_parse_qs,
|
||||||
compat_urllib_parse_urlparse,
|
compat_urllib_parse_urlparse,
|
||||||
)
|
)
|
||||||
@@ -20,6 +21,7 @@ from ..utils import (
|
|||||||
parse_duration,
|
parse_duration,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
PostProcessingError,
|
PostProcessingError,
|
||||||
|
str_or_none,
|
||||||
remove_start,
|
remove_start,
|
||||||
try_get,
|
try_get,
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
@@ -34,7 +36,7 @@ class NiconicoIE(InfoExtractor):
|
|||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.nicovideo.jp/watch/sm22312215',
|
'url': 'http://www.nicovideo.jp/watch/sm22312215',
|
||||||
'md5': 'd1a75c0823e2f629128c43e1212760f9',
|
'md5': 'a5bad06f1347452102953f323c69da34s',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'sm22312215',
|
'id': 'sm22312215',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@@ -203,7 +205,7 @@ class NiconicoIE(InfoExtractor):
|
|||||||
'data-api-data="([^"]+)"', webpage,
|
'data-api-data="([^"]+)"', webpage,
|
||||||
'API data', default='{}'), video_id)
|
'API data', default='{}'), video_id)
|
||||||
|
|
||||||
session_api_data = try_get(api_data, lambda x: x['video']['dmcInfo']['session_api'])
|
session_api_data = try_get(api_data, lambda x: x['media']['delivery']['movie']['session'])
|
||||||
session_api_endpoint = try_get(session_api_data, lambda x: x['urls'][0])
|
session_api_endpoint = try_get(session_api_data, lambda x: x['urls'][0])
|
||||||
|
|
||||||
# ping
|
# ping
|
||||||
@@ -220,7 +222,7 @@ class NiconicoIE(InfoExtractor):
|
|||||||
yesno = lambda x: 'yes' if x else 'no'
|
yesno = lambda x: 'yes' if x else 'no'
|
||||||
|
|
||||||
# m3u8 (encryption)
|
# m3u8 (encryption)
|
||||||
if 'encryption' in try_get(api_data, lambda x: x['video']['dmcInfo']) or {}:
|
if 'encryption' in (try_get(api_data, lambda x: x['media']['delivery']['movie']) or {}):
|
||||||
protocol = 'm3u8'
|
protocol = 'm3u8'
|
||||||
session_api_http_parameters = {
|
session_api_http_parameters = {
|
||||||
'parameters': {
|
'parameters': {
|
||||||
@@ -244,8 +246,8 @@ class NiconicoIE(InfoExtractor):
|
|||||||
session_api_http_parameters = {
|
session_api_http_parameters = {
|
||||||
'parameters': {
|
'parameters': {
|
||||||
'http_output_download_parameters': {
|
'http_output_download_parameters': {
|
||||||
'use_ssl': yesno(session_api_endpoint['is_ssl']),
|
'use_ssl': yesno(session_api_endpoint['isSsl']),
|
||||||
'use_well_known_port': yesno(session_api_endpoint['is_well_known_port']),
|
'use_well_known_port': yesno(session_api_endpoint['isWellKnownPort']),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -258,15 +260,15 @@ class NiconicoIE(InfoExtractor):
|
|||||||
data=json.dumps({
|
data=json.dumps({
|
||||||
'session': {
|
'session': {
|
||||||
'client_info': {
|
'client_info': {
|
||||||
'player_id': session_api_data.get('player_id'),
|
'player_id': session_api_data.get('playerId'),
|
||||||
},
|
},
|
||||||
'content_auth': {
|
'content_auth': {
|
||||||
'auth_type': try_get(session_api_data, lambda x: x['auth_types'][session_api_data['protocols'][0]]),
|
'auth_type': try_get(session_api_data, lambda x: x['authTypes'][session_api_data['protocols'][0]]),
|
||||||
'content_key_timeout': session_api_data.get('content_key_timeout'),
|
'content_key_timeout': session_api_data.get('contentKeyTimeout'),
|
||||||
'service_id': 'nicovideo',
|
'service_id': 'nicovideo',
|
||||||
'service_user_id': session_api_data.get('service_user_id')
|
'service_user_id': session_api_data.get('serviceUserId')
|
||||||
},
|
},
|
||||||
'content_id': session_api_data.get('content_id'),
|
'content_id': session_api_data.get('contentId'),
|
||||||
'content_src_id_sets': [{
|
'content_src_id_sets': [{
|
||||||
'content_src_ids': [{
|
'content_src_ids': [{
|
||||||
'src_id_to_mux': {
|
'src_id_to_mux': {
|
||||||
@@ -279,7 +281,7 @@ class NiconicoIE(InfoExtractor):
|
|||||||
'content_uri': '',
|
'content_uri': '',
|
||||||
'keep_method': {
|
'keep_method': {
|
||||||
'heartbeat': {
|
'heartbeat': {
|
||||||
'lifetime': session_api_data.get('heartbeat_lifetime')
|
'lifetime': session_api_data.get('heartbeatLifetime')
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
'priority': session_api_data.get('priority'),
|
'priority': session_api_data.get('priority'),
|
||||||
@@ -289,7 +291,7 @@ class NiconicoIE(InfoExtractor):
|
|||||||
'http_parameters': session_api_http_parameters
|
'http_parameters': session_api_http_parameters
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
'recipe_id': session_api_data.get('recipe_id'),
|
'recipe_id': session_api_data.get('recipeId'),
|
||||||
'session_operation_auth': {
|
'session_operation_auth': {
|
||||||
'session_operation_auth_by_signature': {
|
'session_operation_auth_by_signature': {
|
||||||
'signature': session_api_data.get('signature'),
|
'signature': session_api_data.get('signature'),
|
||||||
@@ -308,7 +310,7 @@ class NiconicoIE(InfoExtractor):
|
|||||||
'url': session_api_endpoint['url'] + '/' + session_response['data']['session']['id'] + '?_format=json&_method=PUT',
|
'url': session_api_endpoint['url'] + '/' + session_response['data']['session']['id'] + '?_format=json&_method=PUT',
|
||||||
'data': json.dumps(session_response['data']),
|
'data': json.dumps(session_response['data']),
|
||||||
# interval, convert milliseconds to seconds, then halve to make a buffer.
|
# interval, convert milliseconds to seconds, then halve to make a buffer.
|
||||||
'interval': float_or_none(session_api_data.get('heartbeat_lifetime'), scale=2000),
|
'interval': float_or_none(session_api_data.get('heartbeatLifetime'), scale=2000),
|
||||||
}
|
}
|
||||||
|
|
||||||
return info_dict, heartbeat_info_dict
|
return info_dict, heartbeat_info_dict
|
||||||
@@ -327,15 +329,17 @@ class NiconicoIE(InfoExtractor):
|
|||||||
format_id = '-'.join(map(lambda s: remove_start(s['id'], 'archive_'), [video_quality, audio_quality]))
|
format_id = '-'.join(map(lambda s: remove_start(s['id'], 'archive_'), [video_quality, audio_quality]))
|
||||||
vdict = parse_format_id(video_quality['id'])
|
vdict = parse_format_id(video_quality['id'])
|
||||||
adict = parse_format_id(audio_quality['id'])
|
adict = parse_format_id(audio_quality['id'])
|
||||||
resolution = video_quality.get('resolution', {'height': vdict.get('res')})
|
resolution = try_get(video_quality, lambda x: x['metadata']['resolution'], dict) or {'height': vdict.get('res')}
|
||||||
|
vbr = try_get(video_quality, lambda x: x['metadata']['bitrate'], float)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'url': '%s:%s/%s/%s' % (protocol, video_id, video_quality['id'], audio_quality['id']),
|
'url': '%s:%s/%s/%s' % (protocol, video_id, video_quality['id'], audio_quality['id']),
|
||||||
'format_id': format_id,
|
'format_id': format_id,
|
||||||
|
'format_note': 'DMC %s' % try_get(video_quality, lambda x: x['metadata']['label'], compat_str),
|
||||||
'ext': 'mp4', # Session API are used in HTML5, which always serves mp4
|
'ext': 'mp4', # Session API are used in HTML5, which always serves mp4
|
||||||
'vcodec': vdict.get('codec'),
|
'vcodec': vdict.get('codec'),
|
||||||
'acodec': adict.get('codec'),
|
'acodec': adict.get('codec'),
|
||||||
'vbr': float_or_none(video_quality.get('bitrate'), 1000) or float_or_none(vdict.get('br')),
|
'vbr': float_or_none(vbr, 1000) or float_or_none(vdict.get('br')),
|
||||||
'abr': float_or_none(audio_quality.get('bitrate'), 1000) or float_or_none(adict.get('br')),
|
'abr': float_or_none(audio_quality.get('bitrate'), 1000) or float_or_none(adict.get('br')),
|
||||||
'height': int_or_none(resolution.get('height', vdict.get('res'))),
|
'height': int_or_none(resolution.get('height', vdict.get('res'))),
|
||||||
'width': int_or_none(resolution.get('width')),
|
'width': int_or_none(resolution.get('width')),
|
||||||
@@ -394,22 +398,23 @@ class NiconicoIE(InfoExtractor):
|
|||||||
formats = []
|
formats = []
|
||||||
|
|
||||||
# Get HTML5 videos info
|
# Get HTML5 videos info
|
||||||
try:
|
quality_info = try_get(api_data, lambda x: x['media']['delivery']['movie'])
|
||||||
dmc_info = api_data['video']['dmcInfo']
|
if not quality_info:
|
||||||
except KeyError:
|
raise ExtractorError('The video can\'t downloaded.', expected=True)
|
||||||
raise ExtractorError('The video can\'t downloaded.',
|
|
||||||
expected=True)
|
|
||||||
|
|
||||||
quality_info = dmc_info.get('quality')
|
|
||||||
for audio_quality in quality_info.get('audios') or {}:
|
for audio_quality in quality_info.get('audios') or {}:
|
||||||
for video_quality in quality_info.get('videos') or {}:
|
for video_quality in quality_info.get('videos') or {}:
|
||||||
if not audio_quality.get('available') or not video_quality.get('available'):
|
if not audio_quality.get('isAvailable') or not video_quality.get('isAvailable'):
|
||||||
continue
|
continue
|
||||||
formats.append(self._extract_format_for_quality(
|
formats.append(self._extract_format_for_quality(
|
||||||
api_data, video_id, audio_quality, video_quality))
|
api_data, video_id, audio_quality, video_quality))
|
||||||
|
|
||||||
# Get flv/swf info
|
# Get flv/swf info
|
||||||
|
timestamp = None
|
||||||
video_real_url = try_get(api_data, lambda x: x['video']['smileInfo']['url'])
|
video_real_url = try_get(api_data, lambda x: x['video']['smileInfo']['url'])
|
||||||
|
if not video_real_url:
|
||||||
|
self.report_warning('Unable to obtain smile video information')
|
||||||
|
else:
|
||||||
is_economy = video_real_url.endswith('low')
|
is_economy = video_real_url.endswith('low')
|
||||||
|
|
||||||
if is_economy:
|
if is_economy:
|
||||||
@@ -487,13 +492,12 @@ class NiconicoIE(InfoExtractor):
|
|||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
# Start extracting information
|
# Start extracting information
|
||||||
title = get_video_info_web('originalTitle')
|
title = (
|
||||||
if not title:
|
get_video_info_web(['originalTitle', 'title'])
|
||||||
title = self._og_search_title(webpage, default=None)
|
or self._og_search_title(webpage, default=None)
|
||||||
if not title:
|
or self._html_search_regex(
|
||||||
title = self._html_search_regex(
|
|
||||||
r'<span[^>]+class="videoHeaderTitle"[^>]*>([^<]+)</span>',
|
r'<span[^>]+class="videoHeaderTitle"[^>]*>([^<]+)</span>',
|
||||||
webpage, 'video title')
|
webpage, 'video title'))
|
||||||
|
|
||||||
watch_api_data_string = self._html_search_regex(
|
watch_api_data_string = self._html_search_regex(
|
||||||
r'<div[^>]+id="watchAPIDataContainer"[^>]+>([^<]+)</div>',
|
r'<div[^>]+id="watchAPIDataContainer"[^>]+>([^<]+)</div>',
|
||||||
@@ -517,6 +521,7 @@ class NiconicoIE(InfoExtractor):
|
|||||||
timestamp = parse_iso8601(
|
timestamp = parse_iso8601(
|
||||||
video_detail['postedAt'].replace('/', '-'),
|
video_detail['postedAt'].replace('/', '-'),
|
||||||
delimiter=' ', timezone=datetime.timedelta(hours=9))
|
delimiter=' ', timezone=datetime.timedelta(hours=9))
|
||||||
|
timestamp = timestamp or try_get(api_data, lambda x: parse_iso8601(x['video']['registeredAt']))
|
||||||
|
|
||||||
view_count = int_or_none(get_video_info_web(['view_counter', 'viewCount']))
|
view_count = int_or_none(get_video_info_web(['view_counter', 'viewCount']))
|
||||||
if not view_count:
|
if not view_count:
|
||||||
@@ -525,11 +530,16 @@ class NiconicoIE(InfoExtractor):
|
|||||||
webpage, 'view count', default=None)
|
webpage, 'view count', default=None)
|
||||||
if match:
|
if match:
|
||||||
view_count = int_or_none(match.replace(',', ''))
|
view_count = int_or_none(match.replace(',', ''))
|
||||||
view_count = view_count or video_detail.get('viewCount')
|
view_count = (
|
||||||
|
view_count
|
||||||
|
or video_detail.get('viewCount')
|
||||||
|
or try_get(api_data, lambda x: x['video']['count']['view']))
|
||||||
|
|
||||||
comment_count = (int_or_none(get_video_info_web('comment_num'))
|
comment_count = (
|
||||||
|
int_or_none(get_video_info_web('comment_num'))
|
||||||
or video_detail.get('commentCount')
|
or video_detail.get('commentCount')
|
||||||
or try_get(api_data, lambda x: x['thread']['commentCount']))
|
or try_get(api_data, lambda x: x['video']['count']['comment']))
|
||||||
|
|
||||||
if not comment_count:
|
if not comment_count:
|
||||||
match = self._html_search_regex(
|
match = self._html_search_regex(
|
||||||
r'>Comments: <strong[^>]*>([^<]+)</strong>',
|
r'>Comments: <strong[^>]*>([^<]+)</strong>',
|
||||||
@@ -559,7 +569,7 @@ class NiconicoIE(InfoExtractor):
|
|||||||
# Note: cannot use api_data.get('owner', {}) because owner may be set to "null"
|
# Note: cannot use api_data.get('owner', {}) because owner may be set to "null"
|
||||||
# in the JSON, which will cause None to be returned instead of {}.
|
# in the JSON, which will cause None to be returned instead of {}.
|
||||||
owner = try_get(api_data, lambda x: x.get('owner'), dict) or {}
|
owner = try_get(api_data, lambda x: x.get('owner'), dict) or {}
|
||||||
uploader_id = (
|
uploader_id = str_or_none(
|
||||||
get_video_info_web(['ch_id', 'user_id'])
|
get_video_info_web(['ch_id', 'user_id'])
|
||||||
or owner.get('id')
|
or owner.get('id')
|
||||||
or channel_id
|
or channel_id
|
||||||
@@ -589,7 +599,7 @@ class NiconicoIE(InfoExtractor):
|
|||||||
|
|
||||||
|
|
||||||
class NiconicoPlaylistIE(InfoExtractor):
|
class NiconicoPlaylistIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/(?:user/\d+/)?mylist/(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/(?:user/\d+/|my/)?mylist/(?P<id>\d+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.nicovideo.jp/mylist/27411728',
|
'url': 'http://www.nicovideo.jp/mylist/27411728',
|
||||||
@@ -647,3 +657,40 @@ class NiconicoPlaylistIE(InfoExtractor):
|
|||||||
'uploader_id': uploader_id,
|
'uploader_id': uploader_id,
|
||||||
'entries': OnDemandPagedList(pagefunc, 25),
|
'entries': OnDemandPagedList(pagefunc, 25),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class NiconicoUserIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/user/(?P<id>\d+)/?(?:$|[#?])'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://www.nicovideo.jp/user/419948',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '419948',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 101,
|
||||||
|
}
|
||||||
|
_API_URL = "https://nvapi.nicovideo.jp/v1/users/%s/videos?sortKey=registeredAt&sortOrder=desc&pageSize=%s&page=%s"
|
||||||
|
_api_headers = {
|
||||||
|
'X-Frontend-ID': '6',
|
||||||
|
'X-Frontend-Version': '0',
|
||||||
|
'X-Niconico-Language': 'en-us'
|
||||||
|
}
|
||||||
|
_PAGE_SIZE = 100
|
||||||
|
|
||||||
|
def _entries(self, list_id, ):
|
||||||
|
total_count = 1
|
||||||
|
count = page_num = 0
|
||||||
|
while count < total_count:
|
||||||
|
json_parsed = self._download_json(
|
||||||
|
self._API_URL % (list_id, self._PAGE_SIZE, page_num + 1), list_id,
|
||||||
|
headers=self._api_headers,
|
||||||
|
note='Downloading JSON metadata%s' % (' page %d' % page_num if page_num else ''))
|
||||||
|
if not page_num:
|
||||||
|
total_count = int_or_none(json_parsed['data'].get('totalCount'))
|
||||||
|
for entry in json_parsed["data"]["items"]:
|
||||||
|
count += 1
|
||||||
|
yield self.url_result('https://www.nicovideo.jp/watch/%s' % entry['id'])
|
||||||
|
page_num += 1
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
list_id = self._match_id(url)
|
||||||
|
return self.playlist_result(self._entries(list_id), list_id, ie=NiconicoIE.ie_key())
|
||||||
|
|||||||
@@ -23,11 +23,9 @@ class NineCNineMediaIE(InfoExtractor):
|
|||||||
destination_code, content_id = re.match(self._VALID_URL, url).groups()
|
destination_code, content_id = re.match(self._VALID_URL, url).groups()
|
||||||
api_base_url = self._API_BASE_TEMPLATE % (destination_code, content_id)
|
api_base_url = self._API_BASE_TEMPLATE % (destination_code, content_id)
|
||||||
content = self._download_json(api_base_url, content_id, query={
|
content = self._download_json(api_base_url, content_id, query={
|
||||||
'$include': '[Media,Season,ContentPackages]',
|
'$include': '[Media.Name,Season,ContentPackages.Duration,ContentPackages.Id]',
|
||||||
})
|
})
|
||||||
title = content['Name']
|
title = content['Name']
|
||||||
if len(content['ContentPackages']) > 1:
|
|
||||||
raise ExtractorError('multiple content packages')
|
|
||||||
content_package = content['ContentPackages'][0]
|
content_package = content['ContentPackages'][0]
|
||||||
package_id = content_package['Id']
|
package_id = content_package['Id']
|
||||||
content_package_url = api_base_url + 'contentpackages/%s/' % package_id
|
content_package_url = api_base_url + 'contentpackages/%s/' % package_id
|
||||||
|
|||||||
@@ -11,60 +11,100 @@ from ..utils import (
|
|||||||
determine_ext,
|
determine_ext,
|
||||||
)
|
)
|
||||||
import re
|
import re
|
||||||
|
import random
|
||||||
|
|
||||||
|
|
||||||
class NitterIE(InfoExtractor):
|
class NitterIE(InfoExtractor):
|
||||||
# Taken from https://github.com/zedeus/nitter/wiki/Instances
|
# Taken from https://github.com/zedeus/nitter/wiki/Instances
|
||||||
INSTANCES = ('nitter.net',
|
|
||||||
'nitter.snopyta.org',
|
NON_HTTP_INSTANCES = (
|
||||||
|
'3nzoldnxplag42gqjs23xvghtzf6t6yzssrtytnntc6ppc7xxuoneoad.onion',
|
||||||
|
'nitter.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd.onion',
|
||||||
|
'nitter7bryz3jv7e3uekphigvmoyoem4al3fynerxkj22dmoxoq553qd.onion',
|
||||||
|
'npf37k3mtzwxreiw52ccs5ay4e6qt2fkcs2ndieurdyn2cuzzsfyfvid.onion',
|
||||||
|
'nitter.v6vgyqpa7yefkorazmg5d5fimstmvm2vtbirt6676mt7qmllrcnwycqd.onion',
|
||||||
|
'i23nv6w3juvzlw32xzoxcqzktegd4i4fu3nmnc2ewv4ggiu4ledwklad.onion',
|
||||||
|
'26oq3gioiwcmfojub37nz5gzbkdiqp7fue5kvye7d4txv4ny6fb4wwid.onion',
|
||||||
|
|
||||||
|
'nitter.i2p',
|
||||||
|
'u6ikd6zndl3c4dsdq4mmujpntgeevdk5qzkfb57r4tnfeccrn2qa.b32.i2p',
|
||||||
|
|
||||||
|
'nitterlgj3n5fgwesu3vxc5h67ruku33nqaoeoocae2mvlzhsu6k7fqd.onion',
|
||||||
|
)
|
||||||
|
|
||||||
|
HTTP_INSTANCES = (
|
||||||
'nitter.42l.fr',
|
'nitter.42l.fr',
|
||||||
'nitter.nixnet.services',
|
|
||||||
'nitter.13ad.de',
|
|
||||||
'nitter.pussthecat.org',
|
'nitter.pussthecat.org',
|
||||||
|
'nitter.nixnet.services',
|
||||||
'nitter.mastodont.cat',
|
'nitter.mastodont.cat',
|
||||||
'nitter.dark.fail',
|
|
||||||
'nitter.tedomum.net',
|
'nitter.tedomum.net',
|
||||||
'nitter.cattube.org',
|
|
||||||
'nitter.fdn.fr',
|
'nitter.fdn.fr',
|
||||||
'nitter.1d4.us',
|
'nitter.1d4.us',
|
||||||
'nitter.kavin.rocks',
|
'nitter.kavin.rocks',
|
||||||
'tweet.lambda.dance',
|
'tweet.lambda.dance',
|
||||||
'nitter.cc',
|
'nitter.cc',
|
||||||
|
'nitter.vxempire.xyz',
|
||||||
|
'nitter.unixfox.eu',
|
||||||
|
'nitter.domain.glass',
|
||||||
|
'nitter.himiko.cloud',
|
||||||
|
'nitter.eu',
|
||||||
|
'nitter.namazso.eu',
|
||||||
|
'nitter.mailstation.de',
|
||||||
|
'nitter.actionsack.com',
|
||||||
|
'nitter.cattube.org',
|
||||||
|
'nitter.dark.fail',
|
||||||
|
'birdsite.xanny.family',
|
||||||
|
'nitter.40two.app',
|
||||||
|
'nitter.skrep.in',
|
||||||
|
|
||||||
|
# not in the list anymore
|
||||||
|
'nitter.snopyta.org',
|
||||||
|
)
|
||||||
|
|
||||||
|
DEAD_INSTANCES = (
|
||||||
|
# maintenance
|
||||||
|
'nitter.ethibox.fr',
|
||||||
|
|
||||||
|
# official, rate limited
|
||||||
|
'nitter.net',
|
||||||
|
# offline
|
||||||
|
'nitter.13ad.de',
|
||||||
'nitter.weaponizedhumiliation.com',
|
'nitter.weaponizedhumiliation.com',
|
||||||
'3nzoldnxplag42gqjs23xvghtzf6t6yzssrtytnntc6ppc7xxuoneoad.onion',
|
)
|
||||||
'nitter.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd.onion',
|
|
||||||
'nitterlgj3n5fgwesu3vxc5h67ruku33nqaoeoocae2mvlzhsu6k7fqd.onion')
|
INSTANCES = NON_HTTP_INSTANCES + HTTP_INSTANCES + DEAD_INSTANCES
|
||||||
|
|
||||||
_INSTANCES_RE = '(?:' + '|'.join([re.escape(instance) for instance in INSTANCES]) + ')'
|
_INSTANCES_RE = '(?:' + '|'.join([re.escape(instance) for instance in INSTANCES]) + ')'
|
||||||
_VALID_URL = r'https?://%(instance)s/(?P<uploader_id>.+)/status/(?P<id>[0-9]+)(#.)?' % {'instance': _INSTANCES_RE}
|
_VALID_URL = r'https?://%(instance)s/(?P<uploader_id>.+)/status/(?P<id>[0-9]+)(#.)?' % {'instance': _INSTANCES_RE}
|
||||||
current_instance = INSTANCES[0] # the test and official instance
|
current_instance = random.choice(HTTP_INSTANCES)
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
# GIF (wrapped in mp4)
|
# GIF (wrapped in mp4)
|
||||||
'url': 'https://' + current_instance + '/firefox/status/1314279897502629888#m',
|
'url': 'https://%s/firefox/status/1314279897502629888#m' % current_instance,
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1314279897502629888',
|
'id': '1314279897502629888',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Firefox 🔥 - You know the old saying, if you see something say something. Now you actually can with the YouTube regrets extension. Report harmful YouTube recommendations so others can avoid watching them. ➡️ https://mzl.la/3iFIiyg #UnfckTheInternet',
|
'title': 'Firefox 🔥 - You know the old saying, if you see something say something. Now you actually can with the YouTube regrets extension. \n\nReport harmful YouTube recommendations so others can avoid watching them. ➡️ https://mzl.la/3iFIiyg\n\n#UnfckTheInternet',
|
||||||
'description': 'You know the old saying, if you see something say something. Now you actually can with the YouTube regrets extension. Report harmful YouTube recommendations so others can avoid watching them. ➡️ https://mzl.la/3iFIiyg #UnfckTheInternet',
|
'description': 'You know the old saying, if you see something say something. Now you actually can with the YouTube regrets extension. \n\nReport harmful YouTube recommendations so others can avoid watching them. ➡️ https://mzl.la/3iFIiyg\n\n#UnfckTheInternet',
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
'uploader': 'Firefox 🔥',
|
'uploader': 'Firefox 🔥',
|
||||||
'uploader_id': 'firefox',
|
'uploader_id': 'firefox',
|
||||||
'uploader_url': 'https://' + current_instance + '/firefox',
|
'uploader_url': 'https://%s/firefox' % current_instance,
|
||||||
'upload_date': '20201008',
|
'upload_date': '20201008',
|
||||||
'timestamp': 1602183720,
|
'timestamp': 1602183720,
|
||||||
},
|
},
|
||||||
}, { # normal video
|
}, { # normal video
|
||||||
'url': 'https://' + current_instance + '/Le___Doc/status/1299715685392756737#m',
|
'url': 'https://%s/Le___Doc/status/1299715685392756737#m' % current_instance,
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1299715685392756737',
|
'id': '1299715685392756737',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Le Doc - "Je ne prédis jamais rien" D Raoult, Août 2020...',
|
'title': 'Le Doc - "Je ne prédis jamais rien"\nD Raoult, Août 2020...',
|
||||||
'description': '"Je ne prédis jamais rien" D Raoult, Août 2020...',
|
'description': '"Je ne prédis jamais rien"\nD Raoult, Août 2020...',
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
'uploader': 'Le Doc',
|
'uploader': 'Le Doc',
|
||||||
'uploader_id': 'Le___Doc',
|
'uploader_id': 'Le___Doc',
|
||||||
'uploader_url': 'https://' + current_instance + '/Le___Doc',
|
'uploader_url': 'https://%s/Le___Doc' % current_instance,
|
||||||
'upload_date': '20200829',
|
'upload_date': '20200829',
|
||||||
'timestamp': 1598711341,
|
'timestamp': 1598711341,
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
@@ -73,31 +113,51 @@ class NitterIE(InfoExtractor):
|
|||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
},
|
},
|
||||||
}, { # video embed in a "Streaming Political Ads" box
|
}, { # video embed in a "Streaming Political Ads" box
|
||||||
'url': 'https://' + current_instance + '/mozilla/status/1321147074491092994#m',
|
'url': 'https://%s/mozilla/status/1321147074491092994#m' % current_instance,
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1321147074491092994',
|
'id': '1321147074491092994',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': "Mozilla - Are you being targeted with weird, ominous or just plain annoying political ads while streaming your favorite shows? This isn't a real political ad, but if you're watching streaming TV in the U.S., chances are you've seen quite a few. Learn more ➡️ https://mzl.la/StreamingAds",
|
'title': "Mozilla - Are you being targeted with weird, ominous or just plain annoying political ads while streaming your favorite shows?\n\nThis isn't a real political ad, but if you're watching streaming TV in the U.S., chances are you've seen quite a few. \n\nLearn more ➡️ https://mzl.la/StreamingAds",
|
||||||
'description': "Are you being targeted with weird, ominous or just plain annoying political ads while streaming your favorite shows? This isn't a real political ad, but if you're watching streaming TV in the U.S., chances are you've seen quite a few. Learn more ➡️ https://mzl.la/StreamingAds",
|
'description': "Are you being targeted with weird, ominous or just plain annoying political ads while streaming your favorite shows?\n\nThis isn't a real political ad, but if you're watching streaming TV in the U.S., chances are you've seen quite a few. \n\nLearn more ➡️ https://mzl.la/StreamingAds",
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
'uploader': 'Mozilla',
|
'uploader': 'Mozilla',
|
||||||
'uploader_id': 'mozilla',
|
'uploader_id': 'mozilla',
|
||||||
'uploader_url': 'https://' + current_instance + '/mozilla',
|
'uploader_url': 'https://%s/mozilla' % current_instance,
|
||||||
'upload_date': '20201027',
|
'upload_date': '20201027',
|
||||||
'timestamp': 1603820982
|
'timestamp': 1603820982
|
||||||
},
|
},
|
||||||
},
|
}, { # not the first tweet but main-tweet
|
||||||
|
'url': 'https://%s/TheNaturalNu/status/1379050895539724290#m' % current_instance,
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1379050895539724290',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Dorothy Zbornak - This had me hollering!!',
|
||||||
|
'description': 'This had me hollering!!',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'uploader': 'Dorothy Zbornak',
|
||||||
|
'uploader_id': 'TheNaturalNu',
|
||||||
|
'uploader_url': 'https://%s/TheNaturalNu' % current_instance,
|
||||||
|
'timestamp': 1617626329,
|
||||||
|
'upload_date': '20210405'
|
||||||
|
}
|
||||||
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
parsed_url = compat_urlparse.urlparse(url)
|
parsed_url = compat_urlparse.urlparse(url)
|
||||||
base_url = parsed_url.scheme + '://' + parsed_url.netloc
|
base_url = '%s://%s' % (parsed_url.scheme, parsed_url.netloc)
|
||||||
|
|
||||||
self._set_cookie(parsed_url.netloc, 'hlsPlayback', 'on')
|
self._set_cookie(parsed_url.netloc, 'hlsPlayback', 'on')
|
||||||
webpage = self._download_webpage(url, video_id)
|
full_webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
video_url = base_url + self._html_search_regex(r'(?:<video[^>]+data-url|<source[^>]+src)="([^"]+)"', webpage, 'video url')
|
main_tweet_start = full_webpage.find('class="main-tweet"')
|
||||||
|
if main_tweet_start > 0:
|
||||||
|
webpage = full_webpage[main_tweet_start:]
|
||||||
|
if not webpage:
|
||||||
|
webpage = full_webpage
|
||||||
|
|
||||||
|
video_url = '%s%s' % (base_url, self._html_search_regex(r'(?:<video[^>]+data-url|<source[^>]+src)="([^"]+)"', webpage, 'video url'))
|
||||||
ext = determine_ext(video_url)
|
ext = determine_ext(video_url)
|
||||||
|
|
||||||
if ext == 'unknown_video':
|
if ext == 'unknown_video':
|
||||||
@@ -108,33 +168,34 @@ class NitterIE(InfoExtractor):
|
|||||||
'ext': ext
|
'ext': ext
|
||||||
}]
|
}]
|
||||||
|
|
||||||
title = (
|
title = self._og_search_description(full_webpage)
|
||||||
self._og_search_description(webpage).replace('\n', ' ')
|
if not title:
|
||||||
or self._html_search_regex(r'<div class="tweet-content[^>]+>([^<]+)</div>', webpage, 'title'))
|
title = self._html_search_regex(r'<div class="tweet-content[^>]+>([^<]+)</div>', webpage, 'title')
|
||||||
description = title
|
description = title
|
||||||
|
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
uploader_id = (
|
uploader_id = (
|
||||||
mobj.group('uploader_id')
|
mobj.group('uploader_id')
|
||||||
or self._html_search_regex(r'<a class="fullname"[^>]+title="([^"]+)"', webpage, 'uploader name', fatal=False))
|
or self._html_search_regex(r'<a class="fullname"[^>]+title="([^"]+)"', webpage, 'uploader name', fatal=False)
|
||||||
|
)
|
||||||
|
|
||||||
if uploader_id:
|
if uploader_id:
|
||||||
uploader_url = base_url + '/' + uploader_id
|
uploader_url = '%s/%s' % (base_url, uploader_id)
|
||||||
|
|
||||||
uploader = self._html_search_regex(r'<a class="fullname"[^>]+title="([^"]+)"', webpage, 'uploader name', fatal=False)
|
uploader = self._html_search_regex(r'<a class="fullname"[^>]+title="([^"]+)"', webpage, 'uploader name', fatal=False)
|
||||||
|
|
||||||
if uploader:
|
if uploader:
|
||||||
title = uploader + ' - ' + title
|
title = '%s - %s' % (uploader, title)
|
||||||
|
|
||||||
view_count = parse_count(self._html_search_regex(r'<span[^>]+class="icon-play[^>]*></span>\s([^<]+)</div>', webpage, 'view count', fatal=False))
|
view_count = parse_count(self._html_search_regex(r'<span[^>]+class="icon-play[^>]*></span>\s([^<]+)</div>', webpage, 'view count', fatal=False))
|
||||||
like_count = parse_count(self._html_search_regex(r'<span[^>]+class="icon-heart[^>]*></span>\s([^<]+)</div>', webpage, 'like count', fatal=False))
|
like_count = parse_count(self._html_search_regex(r'<span[^>]+class="icon-heart[^>]*></span>\s([^<]+)</div>', webpage, 'like count', fatal=False))
|
||||||
repost_count = parse_count(self._html_search_regex(r'<span[^>]+class="icon-retweet[^>]*></span>\s([^<]+)</div>', webpage, 'repost count', fatal=False))
|
repost_count = parse_count(self._html_search_regex(r'<span[^>]+class="icon-retweet[^>]*></span>\s([^<]+)</div>', webpage, 'repost count', fatal=False))
|
||||||
comment_count = parse_count(self._html_search_regex(r'<span[^>]+class="icon-comment[^>]*></span>\s([^<]+)</div>', webpage, 'repost count', fatal=False))
|
comment_count = parse_count(self._html_search_regex(r'<span[^>]+class="icon-comment[^>]*></span>\s([^<]+)</div>', webpage, 'repost count', fatal=False))
|
||||||
|
|
||||||
thumbnail = base_url + (self._html_search_meta('og:image', webpage, 'thumbnail url')
|
thumbnail = self._html_search_meta('og:image', full_webpage, 'thumbnail url')
|
||||||
or self._html_search_regex(r'<video[^>]+poster="([^"]+)"', webpage, 'thumbnail url', fatal=False))
|
if not thumbnail:
|
||||||
|
thumbnail = '%s%s' % (base_url, self._html_search_regex(r'<video[^>]+poster="([^"]+)"', webpage, 'thumbnail url', fatal=False))
|
||||||
thumbnail = remove_end(thumbnail, '%3Asmall') # if parsed with regex, it should contain this
|
thumbnail = remove_end(thumbnail, '%3Asmall')
|
||||||
|
|
||||||
thumbnails = []
|
thumbnails = []
|
||||||
thumbnail_ids = ('thumb', 'small', 'large', 'medium', 'orig')
|
thumbnail_ids = ('thumb', 'small', 'large', 'medium', 'orig')
|
||||||
|
|||||||
148
yt_dlp/extractor/palcomp3.py
Normal file
148
yt_dlp/extractor/palcomp3.py
Normal file
@@ -0,0 +1,148 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
str_or_none,
|
||||||
|
try_get,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class PalcoMP3BaseIE(InfoExtractor):
|
||||||
|
_GQL_QUERY_TMPL = '''{
|
||||||
|
artist(slug: "%s") {
|
||||||
|
%s
|
||||||
|
}
|
||||||
|
}'''
|
||||||
|
_ARTIST_FIELDS_TMPL = '''music(slug: "%%s") {
|
||||||
|
%s
|
||||||
|
}'''
|
||||||
|
_MUSIC_FIELDS = '''duration
|
||||||
|
hls
|
||||||
|
mp3File
|
||||||
|
musicID
|
||||||
|
plays
|
||||||
|
title'''
|
||||||
|
|
||||||
|
def _call_api(self, artist_slug, artist_fields):
|
||||||
|
return self._download_json(
|
||||||
|
'https://www.palcomp3.com.br/graphql/', artist_slug, query={
|
||||||
|
'query': self._GQL_QUERY_TMPL % (artist_slug, artist_fields),
|
||||||
|
})['data']
|
||||||
|
|
||||||
|
def _parse_music(self, music):
|
||||||
|
music_id = compat_str(music['musicID'])
|
||||||
|
title = music['title']
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
hls_url = music.get('hls')
|
||||||
|
if hls_url:
|
||||||
|
formats.append({
|
||||||
|
'url': hls_url,
|
||||||
|
'protocol': 'm3u8_native',
|
||||||
|
'ext': 'mp4',
|
||||||
|
})
|
||||||
|
mp3_file = music.get('mp3File')
|
||||||
|
if mp3_file:
|
||||||
|
formats.append({
|
||||||
|
'url': mp3_file,
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': music_id,
|
||||||
|
'title': title,
|
||||||
|
'formats': formats,
|
||||||
|
'duration': int_or_none(music.get('duration')),
|
||||||
|
'view_count': int_or_none(music.get('plays')),
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_initialize(self):
|
||||||
|
self._ARTIST_FIELDS_TMPL = self._ARTIST_FIELDS_TMPL % self._MUSIC_FIELDS
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
artist_slug, music_slug = re.match(self._VALID_URL, url).groups()
|
||||||
|
artist_fields = self._ARTIST_FIELDS_TMPL % music_slug
|
||||||
|
music = self._call_api(artist_slug, artist_fields)['artist']['music']
|
||||||
|
return self._parse_music(music)
|
||||||
|
|
||||||
|
|
||||||
|
class PalcoMP3IE(PalcoMP3BaseIE):
|
||||||
|
IE_NAME = 'PalcoMP3:song'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?palcomp3\.com(?:\.br)?/(?P<artist>[^/]+)/(?P<id>[^/?&#]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.palcomp3.com/maiaraemaraisaoficial/nossas-composicoes-cuida-bem-dela/',
|
||||||
|
'md5': '99fd6405b2d8fd589670f6db1ba3b358',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3162927',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 'Nossas Composições - CUIDA BEM DELA',
|
||||||
|
'duration': 210,
|
||||||
|
'view_count': int,
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def suitable(cls, url):
|
||||||
|
return False if PalcoMP3VideoIE.suitable(url) else super(PalcoMP3IE, cls).suitable(url)
|
||||||
|
|
||||||
|
|
||||||
|
class PalcoMP3ArtistIE(PalcoMP3BaseIE):
|
||||||
|
IE_NAME = 'PalcoMP3:artist'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?palcomp3\.com(?:\.br)?/(?P<id>[^/?&#]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.palcomp3.com.br/condedoforro/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '358396',
|
||||||
|
'title': 'Conde do Forró',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 188,
|
||||||
|
}]
|
||||||
|
_ARTIST_FIELDS_TMPL = '''artistID
|
||||||
|
musics {
|
||||||
|
nodes {
|
||||||
|
%s
|
||||||
|
}
|
||||||
|
}
|
||||||
|
name'''
|
||||||
|
|
||||||
|
@ classmethod
|
||||||
|
def suitable(cls, url):
|
||||||
|
return False if re.match(PalcoMP3IE._VALID_URL, url) else super(PalcoMP3ArtistIE, cls).suitable(url)
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
artist_slug = self._match_id(url)
|
||||||
|
artist = self._call_api(artist_slug, self._ARTIST_FIELDS_TMPL)['artist']
|
||||||
|
|
||||||
|
def entries():
|
||||||
|
for music in (try_get(artist, lambda x: x['musics']['nodes'], list) or []):
|
||||||
|
yield self._parse_music(music)
|
||||||
|
|
||||||
|
return self.playlist_result(
|
||||||
|
entries(), str_or_none(artist.get('artistID')), artist.get('name'))
|
||||||
|
|
||||||
|
|
||||||
|
class PalcoMP3VideoIE(PalcoMP3BaseIE):
|
||||||
|
IE_NAME = 'PalcoMP3:video'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?palcomp3\.com(?:\.br)?/(?P<artist>[^/]+)/(?P<id>[^/?&#]+)/?#clipe'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.palcomp3.com/maiaraemaraisaoficial/maiara-e-maraisa-voce-faz-falta-aqui-ao-vivo-em-vicosa-mg/#clipe',
|
||||||
|
'add_ie': ['Youtube'],
|
||||||
|
'info_dict': {
|
||||||
|
'id': '_pD1nR2qqPg',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Maiara e Maraisa - Você Faz Falta Aqui - DVD Ao Vivo Em Campo Grande',
|
||||||
|
'description': 'md5:7043342c09a224598e93546e98e49282',
|
||||||
|
'upload_date': '20161107',
|
||||||
|
'uploader_id': 'maiaramaraisaoficial',
|
||||||
|
'uploader': 'Maiara e Maraisa',
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
_MUSIC_FIELDS = 'youtubeID'
|
||||||
|
|
||||||
|
def _parse_music(self, music):
|
||||||
|
youtube_id = music['youtubeID']
|
||||||
|
return self.url_result(youtube_id, 'Youtube', youtube_id)
|
||||||
@@ -599,11 +599,13 @@ class PeerTubeIE(InfoExtractor):
|
|||||||
else:
|
else:
|
||||||
age_limit = None
|
age_limit = None
|
||||||
|
|
||||||
|
webpage_url = 'https://%s/videos/watch/%s' % (host, video_id)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': description,
|
'description': description,
|
||||||
'thumbnail': urljoin(url, video.get('thumbnailPath')),
|
'thumbnail': urljoin(webpage_url, video.get('thumbnailPath')),
|
||||||
'timestamp': unified_timestamp(video.get('publishedAt')),
|
'timestamp': unified_timestamp(video.get('publishedAt')),
|
||||||
'uploader': account_data('displayName', compat_str),
|
'uploader': account_data('displayName', compat_str),
|
||||||
'uploader_id': str_or_none(account_data('id', int)),
|
'uploader_id': str_or_none(account_data('id', int)),
|
||||||
@@ -621,5 +623,6 @@ class PeerTubeIE(InfoExtractor):
|
|||||||
'tags': try_get(video, lambda x: x['tags'], list),
|
'tags': try_get(video, lambda x: x['tags'], list),
|
||||||
'categories': categories,
|
'categories': categories,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'subtitles': subtitles
|
'subtitles': subtitles,
|
||||||
|
'webpage_url': webpage_url,
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,52 +1,128 @@
|
|||||||
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .common import InfoExtractor
|
import re
|
||||||
from ..utils import ExtractorError
|
|
||||||
|
from .youtube import YoutubeIE
|
||||||
|
from .zdf import ZDFBaseIE
|
||||||
|
from ..compat import compat_str
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
merge_dicts,
|
||||||
|
unified_timestamp,
|
||||||
|
xpath_text,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class PhoenixIE(InfoExtractor):
|
class PhoenixIE(ZDFBaseIE):
|
||||||
IE_NAME = 'phoenix.de'
|
IE_NAME = 'phoenix.de'
|
||||||
_VALID_URL = r'''https?://(?:www\.)?phoenix.de/\D+(?P<id>\d+)\.html'''
|
_VALID_URL = r'https?://(?:www\.)?phoenix\.de/(?:[^/]+/)*[^/?#&]*-a-(?P<id>\d+)\.html'
|
||||||
_TESTS = [
|
_TESTS = [{
|
||||||
{
|
# Same as https://www.zdf.de/politik/phoenix-sendungen/wohin-fuehrt-der-protest-in-der-pandemie-100.html
|
||||||
'url': 'https://www.phoenix.de/sendungen/dokumentationen/unsere-welt-in-zukunft---stadt-a-1283620.html',
|
'url': 'https://www.phoenix.de/sendungen/ereignisse/corona-nachgehakt/wohin-fuehrt-der-protest-in-der-pandemie-a-2050630.html',
|
||||||
'md5': '5e765e838aa3531c745a4f5b249ee3e3',
|
'md5': '34ec321e7eb34231fd88616c65c92db0',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '0OB4HFc43Ns',
|
'id': '210222_phx_nachgehakt_corona_protest',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Unsere Welt in Zukunft - Stadt',
|
'title': 'Wohin führt der Protest in der Pandemie?',
|
||||||
'description': 'md5:9bfb6fd498814538f953b2dcad7ce044',
|
'description': 'md5:7d643fe7f565e53a24aac036b2122fbd',
|
||||||
'upload_date': '20190912',
|
'duration': 1691,
|
||||||
|
'timestamp': 1613906100,
|
||||||
|
'upload_date': '20210221',
|
||||||
|
'uploader': 'Phoenix',
|
||||||
|
'channel': 'corona nachgehakt',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# Youtube embed
|
||||||
|
'url': 'https://www.phoenix.de/sendungen/gespraeche/phoenix-streitgut-brennglas-corona-a-1965505.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'hMQtqFYjomk',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'phoenix streitgut: Brennglas Corona - Wie gerecht ist unsere Gesellschaft?',
|
||||||
|
'description': 'md5:ac7a02e2eb3cb17600bc372e4ab28fdd',
|
||||||
|
'duration': 3509,
|
||||||
|
'upload_date': '20201219',
|
||||||
'uploader': 'phoenix',
|
'uploader': 'phoenix',
|
||||||
'uploader_id': 'phoenix',
|
'uploader_id': 'phoenix',
|
||||||
}
|
|
||||||
},
|
},
|
||||||
{
|
'params': {
|
||||||
'url': 'https://www.phoenix.de/drohnenangriffe-in-saudi-arabien-a-1286995.html?ref=aktuelles',
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.phoenix.de/entwicklungen-in-russland-a-2044720.html',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
},
|
}, {
|
||||||
# an older page: https://www.phoenix.de/sendungen/gespraeche/phoenix-persoenlich/im-dialog-a-177727.html
|
# no media
|
||||||
# seems to not have an embedded video, even though it's uploaded on youtube: https://www.youtube.com/watch?v=4GxnoUHvOkM
|
'url': 'https://www.phoenix.de/sendungen/dokumentationen/mit-dem-jumbo-durch-die-nacht-a-89625.html',
|
||||||
]
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
def extract_from_json_api(self, video_id, api_url):
|
# Same as https://www.zdf.de/politik/phoenix-sendungen/die-gesten-der-maechtigen-100.html
|
||||||
doc = self._download_json(
|
'url': 'https://www.phoenix.de/sendungen/dokumentationen/gesten-der-maechtigen-i-a-89468.html?ref=suche',
|
||||||
api_url, video_id,
|
'only_matching': True,
|
||||||
note="Downloading webpage metadata",
|
}]
|
||||||
errnote="Failed to load webpage metadata")
|
|
||||||
|
|
||||||
for a in doc["absaetze"]:
|
|
||||||
if a["typ"] == "video-youtube":
|
|
||||||
return {
|
|
||||||
'_type': 'url_transparent',
|
|
||||||
'id': a["id"],
|
|
||||||
'title': doc["titel"],
|
|
||||||
'url': "https://www.youtube.com/watch?v=%s" % a["id"],
|
|
||||||
'ie_key': 'Youtube',
|
|
||||||
}
|
|
||||||
raise ExtractorError("No downloadable video found", expected=True)
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
page_id = self._match_id(url)
|
article_id = self._match_id(url)
|
||||||
api_url = 'https://www.phoenix.de/response/id/%s' % page_id
|
|
||||||
return self.extract_from_json_api(page_id, api_url)
|
article = self._download_json(
|
||||||
|
'https://www.phoenix.de/response/id/%s' % article_id, article_id,
|
||||||
|
'Downloading article JSON')
|
||||||
|
|
||||||
|
video = article['absaetze'][0]
|
||||||
|
title = video.get('titel') or article.get('subtitel')
|
||||||
|
|
||||||
|
if video.get('typ') == 'video-youtube':
|
||||||
|
video_id = video['id']
|
||||||
|
return self.url_result(
|
||||||
|
video_id, ie=YoutubeIE.ie_key(), video_id=video_id,
|
||||||
|
video_title=title)
|
||||||
|
|
||||||
|
video_id = compat_str(video.get('basename') or video.get('content'))
|
||||||
|
|
||||||
|
details = self._download_xml(
|
||||||
|
'https://www.phoenix.de/php/mediaplayer/data/beitrags_details.php',
|
||||||
|
video_id, 'Downloading details XML', query={
|
||||||
|
'ak': 'web',
|
||||||
|
'ptmd': 'true',
|
||||||
|
'id': video_id,
|
||||||
|
'profile': 'player2',
|
||||||
|
})
|
||||||
|
|
||||||
|
title = title or xpath_text(
|
||||||
|
details, './/information/title', 'title', fatal=True)
|
||||||
|
content_id = xpath_text(
|
||||||
|
details, './/video/details/basename', 'content id', fatal=True)
|
||||||
|
|
||||||
|
info = self._extract_ptmd(
|
||||||
|
'https://tmd.phoenix.de/tmd/2/ngplayer_2_3/vod/ptmd/phoenix/%s' % content_id,
|
||||||
|
content_id, None, url)
|
||||||
|
|
||||||
|
timestamp = unified_timestamp(xpath_text(details, './/details/airtime'))
|
||||||
|
|
||||||
|
thumbnails = []
|
||||||
|
for node in details.findall('.//teaserimages/teaserimage'):
|
||||||
|
thumbnail_url = node.text
|
||||||
|
if not thumbnail_url:
|
||||||
|
continue
|
||||||
|
thumbnail = {
|
||||||
|
'url': thumbnail_url,
|
||||||
|
}
|
||||||
|
thumbnail_key = node.get('key')
|
||||||
|
if thumbnail_key:
|
||||||
|
m = re.match('^([0-9]+)x([0-9]+)$', thumbnail_key)
|
||||||
|
if m:
|
||||||
|
thumbnail['width'] = int(m.group(1))
|
||||||
|
thumbnail['height'] = int(m.group(2))
|
||||||
|
thumbnails.append(thumbnail)
|
||||||
|
|
||||||
|
return merge_dicts(info, {
|
||||||
|
'id': content_id,
|
||||||
|
'title': title,
|
||||||
|
'description': xpath_text(details, './/information/detail'),
|
||||||
|
'duration': int_or_none(xpath_text(details, './/details/lengthSec')),
|
||||||
|
'thumbnails': thumbnails,
|
||||||
|
'timestamp': timestamp,
|
||||||
|
'uploader': xpath_text(details, './/details/channel'),
|
||||||
|
'uploader_id': xpath_text(details, './/details/originChannelId'),
|
||||||
|
'channel': xpath_text(details, './/details/originChannelTitle'),
|
||||||
|
})
|
||||||
|
|||||||
@@ -1,22 +1,15 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
import time
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_str
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
js_to_json,
|
js_to_json,
|
||||||
try_get,
|
|
||||||
update_url_query,
|
|
||||||
urlencode_postdata,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class PicartoIE(InfoExtractor):
|
class PicartoIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www.)?picarto\.tv/(?P<id>[a-zA-Z0-9]+)(?:/(?P<token>[a-zA-Z0-9]+))?'
|
_VALID_URL = r'https?://(?:www.)?picarto\.tv/(?P<id>[a-zA-Z0-9]+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'https://picarto.tv/Setz',
|
'url': 'https://picarto.tv/Setz',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@@ -34,65 +27,46 @@ class PicartoIE(InfoExtractor):
|
|||||||
return False if PicartoVodIE.suitable(url) else super(PicartoIE, cls).suitable(url)
|
return False if PicartoVodIE.suitable(url) else super(PicartoIE, cls).suitable(url)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
channel_id = self._match_id(url)
|
||||||
channel_id = mobj.group('id')
|
|
||||||
|
|
||||||
metadata = self._download_json(
|
data = self._download_json(
|
||||||
'https://api.picarto.tv/v1/channel/name/' + channel_id,
|
'https://ptvintern.picarto.tv/ptvapi', channel_id, query={
|
||||||
channel_id)
|
'query': '''{
|
||||||
|
channel(name: "%s") {
|
||||||
|
adult
|
||||||
|
id
|
||||||
|
online
|
||||||
|
stream_name
|
||||||
|
title
|
||||||
|
}
|
||||||
|
getLoadBalancerUrl(channel_name: "%s") {
|
||||||
|
url
|
||||||
|
}
|
||||||
|
}''' % (channel_id, channel_id),
|
||||||
|
})['data']
|
||||||
|
metadata = data['channel']
|
||||||
|
|
||||||
if metadata.get('online') is False:
|
if metadata.get('online') == 0:
|
||||||
raise ExtractorError('Stream is offline', expected=True)
|
raise ExtractorError('Stream is offline', expected=True)
|
||||||
|
title = metadata['title']
|
||||||
|
|
||||||
cdn_data = self._download_json(
|
cdn_data = self._download_json(
|
||||||
'https://picarto.tv/process/channel', channel_id,
|
data['getLoadBalancerUrl']['url'] + '/stream/json_' + metadata['stream_name'] + '.js',
|
||||||
data=urlencode_postdata({'loadbalancinginfo': channel_id}),
|
channel_id, 'Downloading load balancing info')
|
||||||
note='Downloading load balancing info')
|
|
||||||
|
|
||||||
token = mobj.group('token') or 'public'
|
|
||||||
params = {
|
|
||||||
'con': int(time.time() * 1000),
|
|
||||||
'token': token,
|
|
||||||
}
|
|
||||||
|
|
||||||
prefered_edge = cdn_data.get('preferedEdge')
|
|
||||||
formats = []
|
formats = []
|
||||||
|
for source in (cdn_data.get('source') or []):
|
||||||
for edge in cdn_data['edges']:
|
source_url = source.get('url')
|
||||||
edge_ep = edge.get('ep')
|
if not source_url:
|
||||||
if not edge_ep or not isinstance(edge_ep, compat_str):
|
|
||||||
continue
|
continue
|
||||||
edge_id = edge.get('id')
|
source_type = source.get('type')
|
||||||
for tech in cdn_data['techs']:
|
if source_type == 'html5/application/vnd.apple.mpegurl':
|
||||||
tech_label = tech.get('label')
|
|
||||||
tech_type = tech.get('type')
|
|
||||||
preference = 0
|
|
||||||
if edge_id == prefered_edge:
|
|
||||||
preference += 1
|
|
||||||
format_id = []
|
|
||||||
if edge_id:
|
|
||||||
format_id.append(edge_id)
|
|
||||||
if tech_type == 'application/x-mpegurl' or tech_label == 'HLS':
|
|
||||||
format_id.append('hls')
|
|
||||||
formats.extend(self._extract_m3u8_formats(
|
formats.extend(self._extract_m3u8_formats(
|
||||||
update_url_query(
|
source_url, channel_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||||
'https://%s/hls/%s/index.m3u8'
|
elif source_type == 'html5/video/mp4':
|
||||||
% (edge_ep, channel_id), params),
|
|
||||||
channel_id, 'mp4', quality=preference,
|
|
||||||
m3u8_id='-'.join(format_id), fatal=False))
|
|
||||||
continue
|
|
||||||
elif tech_type == 'video/mp4' or tech_label == 'MP4':
|
|
||||||
format_id.append('mp4')
|
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': update_url_query(
|
'url': source_url,
|
||||||
'https://%s/mp4/%s.mp4' % (edge_ep, channel_id),
|
|
||||||
params),
|
|
||||||
'format_id': '-'.join(format_id),
|
|
||||||
'quality': preference,
|
|
||||||
})
|
})
|
||||||
else:
|
|
||||||
# rtmp format does not seem to work
|
|
||||||
continue
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
mature = metadata.get('adult')
|
mature = metadata.get('adult')
|
||||||
@@ -103,10 +77,10 @@ class PicartoIE(InfoExtractor):
|
|||||||
|
|
||||||
return {
|
return {
|
||||||
'id': channel_id,
|
'id': channel_id,
|
||||||
'title': self._live_title(metadata.get('title') or channel_id),
|
'title': self._live_title(title.strip()),
|
||||||
'is_live': True,
|
'is_live': True,
|
||||||
'thumbnail': try_get(metadata, lambda x: x['thumbnails']['web']),
|
|
||||||
'channel': channel_id,
|
'channel': channel_id,
|
||||||
|
'channel_id': metadata.get('id'),
|
||||||
'channel_url': 'https://picarto.tv/%s' % channel_id,
|
'channel_url': 'https://picarto.tv/%s' % channel_id,
|
||||||
'age_limit': age_limit,
|
'age_limit': age_limit,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
|
|||||||
@@ -31,6 +31,7 @@ class PinterestBaseIE(InfoExtractor):
|
|||||||
|
|
||||||
title = (data.get('title') or data.get('grid_title') or video_id).strip()
|
title = (data.get('title') or data.get('grid_title') or video_id).strip()
|
||||||
|
|
||||||
|
urls = []
|
||||||
formats = []
|
formats = []
|
||||||
duration = None
|
duration = None
|
||||||
if extract_formats:
|
if extract_formats:
|
||||||
@@ -38,8 +39,9 @@ class PinterestBaseIE(InfoExtractor):
|
|||||||
if not isinstance(format_dict, dict):
|
if not isinstance(format_dict, dict):
|
||||||
continue
|
continue
|
||||||
format_url = url_or_none(format_dict.get('url'))
|
format_url = url_or_none(format_dict.get('url'))
|
||||||
if not format_url:
|
if not format_url or format_url in urls:
|
||||||
continue
|
continue
|
||||||
|
urls.append(format_url)
|
||||||
duration = float_or_none(format_dict.get('duration'), scale=1000)
|
duration = float_or_none(format_dict.get('duration'), scale=1000)
|
||||||
ext = determine_ext(format_url)
|
ext = determine_ext(format_url)
|
||||||
if 'hls' in format_id.lower() or ext == 'm3u8':
|
if 'hls' in format_id.lower() or ext == 'm3u8':
|
||||||
|
|||||||
164
yt_dlp/extractor/plutotv.py
Normal file
164
yt_dlp/extractor/plutotv.py
Normal file
@@ -0,0 +1,164 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
import uuid
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import (
|
||||||
|
compat_str,
|
||||||
|
compat_urlparse,
|
||||||
|
)
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
float_or_none,
|
||||||
|
int_or_none,
|
||||||
|
try_get,
|
||||||
|
url_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class PlutoTVIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?pluto\.tv/on-demand/(?P<video_type>movies|series)/(?P<slug>.*)/?$'
|
||||||
|
_INFO_URL = 'https://service-vod.clusters.pluto.tv/v3/vod/slugs/'
|
||||||
|
_INFO_QUERY_PARAMS = {
|
||||||
|
'appName': 'web',
|
||||||
|
'appVersion': 'na',
|
||||||
|
'clientID': compat_str(uuid.uuid1()),
|
||||||
|
'clientModelNumber': 'na',
|
||||||
|
'serverSideAds': 'false',
|
||||||
|
'deviceMake': 'unknown',
|
||||||
|
'deviceModel': 'web',
|
||||||
|
'deviceType': 'web',
|
||||||
|
'deviceVersion': 'unknown',
|
||||||
|
'sid': compat_str(uuid.uuid1()),
|
||||||
|
}
|
||||||
|
_TESTS = [
|
||||||
|
{
|
||||||
|
'url': 'https://pluto.tv/on-demand/series/i-love-money/season/2/episode/its-in-the-cards-2009-2-3',
|
||||||
|
'md5': 'ebcdd8ed89aaace9df37924f722fd9bd',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '5de6c598e9379ae4912df0a8',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'It\'s In The Cards',
|
||||||
|
'episode': 'It\'s In The Cards',
|
||||||
|
'description': 'The teams face off against each other in a 3-on-2 soccer showdown. Strategy comes into play, though, as each team gets to select their opposing teams’ two defenders.',
|
||||||
|
'series': 'I Love Money',
|
||||||
|
'season_number': 2,
|
||||||
|
'episode_number': 3,
|
||||||
|
'duration': 3600,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'https://pluto.tv/on-demand/series/i-love-money/season/1/',
|
||||||
|
'playlist_count': 11,
|
||||||
|
'info_dict': {
|
||||||
|
'id': '5de6c582e9379ae4912dedbd',
|
||||||
|
'title': 'I Love Money - Season 1',
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'https://pluto.tv/on-demand/series/i-love-money/',
|
||||||
|
'playlist_count': 26,
|
||||||
|
'info_dict': {
|
||||||
|
'id': '5de6c582e9379ae4912dedbd',
|
||||||
|
'title': 'I Love Money',
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'https://pluto.tv/on-demand/movies/arrival-2015-1-1',
|
||||||
|
'md5': '3cead001d317a018bf856a896dee1762',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '5e83ac701fa6a9001bb9df24',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Arrival',
|
||||||
|
'description': 'When mysterious spacecraft touch down across the globe, an elite team - led by expert translator Louise Banks (Academy Award® nominee Amy Adams) – races against time to decipher their intent.',
|
||||||
|
'duration': 9000,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
def _to_ad_free_formats(self, video_id, formats):
|
||||||
|
ad_free_formats = []
|
||||||
|
m3u8_urls = set()
|
||||||
|
for format in formats:
|
||||||
|
res = self._download_webpage(
|
||||||
|
format.get('url'), video_id, note='Downloading m3u8 playlist',
|
||||||
|
fatal=False)
|
||||||
|
if not res:
|
||||||
|
continue
|
||||||
|
first_segment_url = re.search(
|
||||||
|
r'^(https?://.*/)0\-(end|[0-9]+)/[^/]+\.ts$', res,
|
||||||
|
re.MULTILINE)
|
||||||
|
if not first_segment_url:
|
||||||
|
continue
|
||||||
|
m3u8_urls.add(
|
||||||
|
compat_urlparse.urljoin(first_segment_url.group(1), '0-end/master.m3u8'))
|
||||||
|
|
||||||
|
for m3u8_url in m3u8_urls:
|
||||||
|
ad_free_formats.extend(
|
||||||
|
self._extract_m3u8_formats(
|
||||||
|
m3u8_url, video_id, 'mp4', 'm3u8_native',
|
||||||
|
m3u8_id='hls', fatal=False))
|
||||||
|
self._sort_formats(ad_free_formats)
|
||||||
|
return ad_free_formats
|
||||||
|
|
||||||
|
def _get_video_info(self, video_json, slug, series_name=None):
|
||||||
|
video_id = video_json.get('_id', slug)
|
||||||
|
formats = []
|
||||||
|
for video_url in try_get(video_json, lambda x: x['stitched']['urls'], list) or []:
|
||||||
|
if video_url.get('type') != 'hls':
|
||||||
|
continue
|
||||||
|
url = url_or_none(video_url.get('url'))
|
||||||
|
formats.extend(
|
||||||
|
self._extract_m3u8_formats(
|
||||||
|
url, video_id, 'mp4', 'm3u8_native',
|
||||||
|
m3u8_id='hls', fatal=False))
|
||||||
|
info = {
|
||||||
|
'id': video_id,
|
||||||
|
'formats': self._to_ad_free_formats(video_id, formats),
|
||||||
|
'title': video_json.get('name'),
|
||||||
|
'description': video_json.get('description'),
|
||||||
|
'duration': float_or_none(video_json.get('duration'), scale=1000),
|
||||||
|
}
|
||||||
|
if series_name:
|
||||||
|
info.update({
|
||||||
|
'series': series_name,
|
||||||
|
'episode': video_json.get('name'),
|
||||||
|
'season_number': int_or_none(video_json.get('season')),
|
||||||
|
'episode_number': int_or_none(video_json.get('number')),
|
||||||
|
})
|
||||||
|
return info
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
path = compat_urlparse.urlparse(url).path
|
||||||
|
path_components = path.split('/')
|
||||||
|
video_type = path_components[2]
|
||||||
|
info_slug = path_components[3]
|
||||||
|
video_json = self._download_json(self._INFO_URL + info_slug, info_slug,
|
||||||
|
query=self._INFO_QUERY_PARAMS)
|
||||||
|
|
||||||
|
if video_type == 'series':
|
||||||
|
series_name = video_json.get('name', info_slug)
|
||||||
|
season_number = int_or_none(try_get(path_components, lambda x: x[5]))
|
||||||
|
episode_slug = try_get(path_components, lambda x: x[7])
|
||||||
|
|
||||||
|
videos = []
|
||||||
|
for season in video_json['seasons']:
|
||||||
|
if season_number is not None and season_number != int_or_none(season.get('number')):
|
||||||
|
continue
|
||||||
|
for episode in season['episodes']:
|
||||||
|
if episode_slug is not None and episode_slug != episode.get('slug'):
|
||||||
|
continue
|
||||||
|
videos.append(self._get_video_info(episode, episode_slug, series_name))
|
||||||
|
if not videos:
|
||||||
|
raise ExtractorError('Failed to find any videos to extract')
|
||||||
|
if episode_slug is not None and len(videos) == 1:
|
||||||
|
return videos[0]
|
||||||
|
playlist_title = series_name
|
||||||
|
if season_number is not None:
|
||||||
|
playlist_title += ' - Season %d' % season_number
|
||||||
|
return self.playlist_result(videos,
|
||||||
|
playlist_id=video_json.get('_id', info_slug),
|
||||||
|
playlist_title=playlist_title)
|
||||||
|
return self._get_video_info(video_json, info_slug)
|
||||||
@@ -167,6 +167,7 @@ class PornHubIE(PornHubBaseIE):
|
|||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
'skip': 'Video has been flagged for verification in accordance with our trust and safety policy',
|
||||||
}, {
|
}, {
|
||||||
# subtitles
|
# subtitles
|
||||||
'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5af5fef7c2aa7',
|
'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5af5fef7c2aa7',
|
||||||
@@ -265,7 +266,8 @@ class PornHubIE(PornHubBaseIE):
|
|||||||
webpage = dl_webpage('pc')
|
webpage = dl_webpage('pc')
|
||||||
|
|
||||||
error_msg = self._html_search_regex(
|
error_msg = self._html_search_regex(
|
||||||
r'(?s)<div[^>]+class=(["\'])(?:(?!\1).)*\b(?:removed|userMessageSection)\b(?:(?!\1).)*\1[^>]*>(?P<error>.+?)</div>',
|
(r'(?s)<div[^>]+class=(["\'])(?:(?!\1).)*\b(?:removed|userMessageSection)\b(?:(?!\1).)*\1[^>]*>(?P<error>.+?)</div>',
|
||||||
|
r'(?s)<section[^>]+class=["\']noVideo["\'][^>]*>(?P<error>.+?)</section>'),
|
||||||
webpage, 'error message', default=None, group='error')
|
webpage, 'error message', default=None, group='error')
|
||||||
if error_msg:
|
if error_msg:
|
||||||
error_msg = re.sub(r'\s+', ' ', error_msg)
|
error_msg = re.sub(r'\s+', ' ', error_msg)
|
||||||
@@ -394,34 +396,50 @@ class PornHubIE(PornHubBaseIE):
|
|||||||
|
|
||||||
upload_date = None
|
upload_date = None
|
||||||
formats = []
|
formats = []
|
||||||
|
|
||||||
|
def add_format(format_url, height=None):
|
||||||
|
ext = determine_ext(format_url)
|
||||||
|
if ext == 'mpd':
|
||||||
|
formats.extend(self._extract_mpd_formats(
|
||||||
|
format_url, video_id, mpd_id='dash', fatal=False))
|
||||||
|
return
|
||||||
|
if ext == 'm3u8':
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
format_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||||
|
m3u8_id='hls', fatal=False))
|
||||||
|
return
|
||||||
|
tbr = None
|
||||||
|
mobj = re.search(r'(?P<height>\d+)[pP]?_(?P<tbr>\d+)[kK]', format_url)
|
||||||
|
if mobj:
|
||||||
|
if not height:
|
||||||
|
height = int(mobj.group('height'))
|
||||||
|
tbr = int(mobj.group('tbr'))
|
||||||
|
formats.append({
|
||||||
|
'url': format_url,
|
||||||
|
'format_id': '%dp' % height if height else None,
|
||||||
|
'height': height,
|
||||||
|
'tbr': tbr,
|
||||||
|
})
|
||||||
|
|
||||||
for video_url, height in video_urls:
|
for video_url, height in video_urls:
|
||||||
if not upload_date:
|
if not upload_date:
|
||||||
upload_date = self._search_regex(
|
upload_date = self._search_regex(
|
||||||
r'/(\d{6}/\d{2})/', video_url, 'upload data', default=None)
|
r'/(\d{6}/\d{2})/', video_url, 'upload data', default=None)
|
||||||
if upload_date:
|
if upload_date:
|
||||||
upload_date = upload_date.replace('/', '')
|
upload_date = upload_date.replace('/', '')
|
||||||
ext = determine_ext(video_url)
|
if '/video/get_media' in video_url:
|
||||||
if ext == 'mpd':
|
medias = self._download_json(video_url, video_id, fatal=False)
|
||||||
formats.extend(self._extract_mpd_formats(
|
if isinstance(medias, list):
|
||||||
video_url, video_id, mpd_id='dash', fatal=False))
|
for media in medias:
|
||||||
|
if not isinstance(media, dict):
|
||||||
continue
|
continue
|
||||||
elif ext == 'm3u8':
|
video_url = url_or_none(media.get('videoUrl'))
|
||||||
formats.extend(self._extract_m3u8_formats(
|
if not video_url:
|
||||||
video_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
|
||||||
m3u8_id='hls', fatal=False))
|
|
||||||
continue
|
continue
|
||||||
tbr = None
|
height = int_or_none(media.get('quality'))
|
||||||
mobj = re.search(r'(?P<height>\d+)[pP]?_(?P<tbr>\d+)[kK]', video_url)
|
add_format(video_url, height)
|
||||||
if mobj:
|
continue
|
||||||
if not height:
|
add_format(video_url)
|
||||||
height = int(mobj.group('height'))
|
|
||||||
tbr = int(mobj.group('tbr'))
|
|
||||||
formats.append({
|
|
||||||
'url': video_url,
|
|
||||||
'format_id': '%dp' % height if height else None,
|
|
||||||
'height': height,
|
|
||||||
'tbr': tbr,
|
|
||||||
})
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
video_uploader = self._html_search_regex(
|
video_uploader = self._html_search_regex(
|
||||||
|
|||||||
@@ -158,6 +158,10 @@ class RaiPlayIE(RaiBaseIE):
|
|||||||
# subtitles at 'subtitlesArray' key (see #27698)
|
# subtitles at 'subtitlesArray' key (see #27698)
|
||||||
'url': 'https://www.raiplay.it/video/2020/12/Report---04-01-2021-2e90f1de-8eee-4de4-ac0e-78d21db5b600.html',
|
'url': 'https://www.raiplay.it/video/2020/12/Report---04-01-2021-2e90f1de-8eee-4de4-ac0e-78d21db5b600.html',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# DRM protected
|
||||||
|
'url': 'https://www.raiplay.it/video/2020/09/Lo-straordinario-mondo-di-Zoey-S1E1-Lo-straordinario-potere-di-Zoey-ed493918-1d32-44b7-8454-862e473d00ff.html',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@@ -166,6 +170,14 @@ class RaiPlayIE(RaiBaseIE):
|
|||||||
media = self._download_json(
|
media = self._download_json(
|
||||||
base + '.json', video_id, 'Downloading video JSON')
|
base + '.json', video_id, 'Downloading video JSON')
|
||||||
|
|
||||||
|
if not self._downloader.params.get('allow_unplayable_formats'):
|
||||||
|
if try_get(
|
||||||
|
media,
|
||||||
|
(lambda x: x['rights_management']['rights']['drm'],
|
||||||
|
lambda x: x['program_info']['rights_management']['rights']['drm']),
|
||||||
|
dict):
|
||||||
|
raise ExtractorError('This video is DRM protected.', expected=True)
|
||||||
|
|
||||||
title = media['name']
|
title = media['name']
|
||||||
video = media['video']
|
video = media['video']
|
||||||
|
|
||||||
|
|||||||
@@ -15,6 +15,9 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class RCSBaseIE(InfoExtractor):
|
class RCSBaseIE(InfoExtractor):
|
||||||
|
# based on VideoPlayerLoader.prototype.getVideoSrc
|
||||||
|
# and VideoPlayerLoader.prototype.transformSrc from
|
||||||
|
# https://js2.corriereobjects.it/includes2013/LIBS/js/corriere_video.sjs
|
||||||
_ALL_REPLACE = {
|
_ALL_REPLACE = {
|
||||||
'media2vam.corriere.it.edgesuite.net':
|
'media2vam.corriere.it.edgesuite.net':
|
||||||
'media2vam-corriere-it.akamaized.net',
|
'media2vam-corriere-it.akamaized.net',
|
||||||
@@ -191,10 +194,10 @@ class RCSBaseIE(InfoExtractor):
|
|||||||
urls.get('m3u8'), video_id, 'mp4', entry_protocol='m3u8_native',
|
urls.get('m3u8'), video_id, 'mp4', entry_protocol='m3u8_native',
|
||||||
m3u8_id='hls', fatal=False)
|
m3u8_id='hls', fatal=False)
|
||||||
|
|
||||||
if not formats:
|
if urls.get('mp4'):
|
||||||
formats.append({
|
formats.append({
|
||||||
'format_id': 'http-mp4',
|
'format_id': 'http-mp4',
|
||||||
'url': urls.get('mp4')
|
'url': urls['mp4']
|
||||||
})
|
})
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
return formats
|
return formats
|
||||||
@@ -216,10 +219,12 @@ class RCSBaseIE(InfoExtractor):
|
|||||||
video_data = None
|
video_data = None
|
||||||
# look for json video data url
|
# look for json video data url
|
||||||
json = self._search_regex(
|
json = self._search_regex(
|
||||||
r'''(?x)var url\s*=\s*["']((?:https?:)?
|
r'''(?x)url\s*=\s*(["'])
|
||||||
//video\.rcs\.it
|
(?P<url>
|
||||||
/fragment-includes/video-includes/.+?\.json)["'];''',
|
(?:https?:)?//video\.rcs\.it
|
||||||
page, video_id, default=None)
|
/fragment-includes/video-includes/.+?\.json
|
||||||
|
)\1;''',
|
||||||
|
page, video_id, group='url', default=None)
|
||||||
if json:
|
if json:
|
||||||
if json.startswith('//'):
|
if json.startswith('//'):
|
||||||
json = 'https:%s' % json
|
json = 'https:%s' % json
|
||||||
@@ -227,13 +232,16 @@ class RCSBaseIE(InfoExtractor):
|
|||||||
|
|
||||||
# if json url not found, look for json video data directly in the page
|
# if json url not found, look for json video data directly in the page
|
||||||
else:
|
else:
|
||||||
|
# RCS normal pages and most of the embeds
|
||||||
json = self._search_regex(
|
json = self._search_regex(
|
||||||
r'[\s;]video\s*=\s*({[\s\S]+?})(?:;|,playlist=)',
|
r'[\s;]video\s*=\s*({[\s\S]+?})(?:;|,playlist=)',
|
||||||
page, video_id, default=None)
|
page, video_id, default=None)
|
||||||
if json:
|
if not json and 'video-embed' in url:
|
||||||
video_data = self._parse_json(
|
page = self._download_webpage(url.replace('video-embed', 'video-json'), video_id)
|
||||||
json, video_id, transform_source=js_to_json)
|
json = self._search_regex(
|
||||||
else:
|
r'##start-video##({[\s\S]+?})##end-video##',
|
||||||
|
page, video_id, default=None)
|
||||||
|
if not json:
|
||||||
# if no video data found try search for iframes
|
# if no video data found try search for iframes
|
||||||
emb = RCSEmbedsIE._extract_url(page)
|
emb = RCSEmbedsIE._extract_url(page)
|
||||||
if emb:
|
if emb:
|
||||||
@@ -242,6 +250,9 @@ class RCSBaseIE(InfoExtractor):
|
|||||||
'url': emb,
|
'url': emb,
|
||||||
'ie_key': RCSEmbedsIE.ie_key()
|
'ie_key': RCSEmbedsIE.ie_key()
|
||||||
}
|
}
|
||||||
|
if json:
|
||||||
|
video_data = self._parse_json(
|
||||||
|
json, video_id, transform_source=js_to_json)
|
||||||
|
|
||||||
if not video_data:
|
if not video_data:
|
||||||
raise ExtractorError('Video data not found in the page')
|
raise ExtractorError('Video data not found in the page')
|
||||||
@@ -250,7 +261,8 @@ class RCSBaseIE(InfoExtractor):
|
|||||||
self._get_video_src(video_data), video_id)
|
self._get_video_src(video_data), video_id)
|
||||||
|
|
||||||
description = (video_data.get('description')
|
description = (video_data.get('description')
|
||||||
or clean_html(video_data.get('htmlDescription')))
|
or clean_html(video_data.get('htmlDescription'))
|
||||||
|
or self._html_search_meta('description', page))
|
||||||
uploader = video_data.get('provider') or mobj.group('cdn')
|
uploader = video_data.get('provider') or mobj.group('cdn')
|
||||||
|
|
||||||
return {
|
return {
|
||||||
@@ -283,6 +295,7 @@ class RCSEmbedsIE(RCSBaseIE):
|
|||||||
'uploader': 'rcs.it',
|
'uploader': 'rcs.it',
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
|
# redownload the page changing 'video-embed' in 'video-json'
|
||||||
'url': 'https://video.gazzanet.gazzetta.it/video-embed/gazzanet-mo05-0000260789',
|
'url': 'https://video.gazzanet.gazzetta.it/video-embed/gazzanet-mo05-0000260789',
|
||||||
'md5': 'a043e3fecbe4d9ed7fc5d888652a5440',
|
'md5': 'a043e3fecbe4d9ed7fc5d888652a5440',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@@ -359,6 +372,7 @@ class RCSIE(RCSBaseIE):
|
|||||||
'uploader': 'Corriere Tv',
|
'uploader': 'Corriere Tv',
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
|
# video data inside iframe
|
||||||
'url': 'https://viaggi.corriere.it/video/norvegia-il-nuovo-ponte-spettacolare-sopra-la-cascata-di-voringsfossen/',
|
'url': 'https://viaggi.corriere.it/video/norvegia-il-nuovo-ponte-spettacolare-sopra-la-cascata-di-voringsfossen/',
|
||||||
'md5': 'da378e4918d2afbf7d61c35abb948d4c',
|
'md5': 'da378e4918d2afbf7d61c35abb948d4c',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@@ -389,15 +403,15 @@ class RCSVariousIE(RCSBaseIE):
|
|||||||
(?P<cdn>
|
(?P<cdn>
|
||||||
leitv\.it|
|
leitv\.it|
|
||||||
youreporter\.it
|
youreporter\.it
|
||||||
)/(?:video/)?(?P<id>[^/]+?)(?:$|\?|/)'''
|
)/(?:[^/]+/)?(?P<id>[^/]+?)(?:$|\?|/)'''
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.leitv.it/video/marmellata-di-ciliegie-fatta-in-casa/',
|
'url': 'https://www.leitv.it/benessere/mal-di-testa-come-combatterlo-ed-evitarne-la-comparsa/',
|
||||||
'md5': '618aaabac32152199c1af86784d4d554',
|
'md5': '92b4e63667b8f95acb0a04da25ae28a1',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'marmellata-di-ciliegie-fatta-in-casa',
|
'id': 'mal-di-testa-come-combatterlo-ed-evitarne-la-comparsa',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Marmellata di ciliegie fatta in casa',
|
'title': 'Cervicalgia e mal di testa, il video con i suggerimenti dell\'esperto',
|
||||||
'description': 'md5:89133864d6aad456dbcf6e7a29f86263',
|
'description': 'md5:ae21418f34cee0b8d02a487f55bcabb5',
|
||||||
'uploader': 'leitv.it',
|
'uploader': 'leitv.it',
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
|
|||||||
@@ -15,17 +15,17 @@ class RDSIE(InfoExtractor):
|
|||||||
_VALID_URL = r'https?://(?:www\.)?rds\.ca/vid(?:[eé]|%C3%A9)os/(?:[^/]+/)*(?P<id>[^/]+)-\d+\.\d+'
|
_VALID_URL = r'https?://(?:www\.)?rds\.ca/vid(?:[eé]|%C3%A9)os/(?:[^/]+/)*(?P<id>[^/]+)-\d+\.\d+'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.rds.ca/videos/football/nfl/fowler-jr-prend-la-direction-de-jacksonville-3.1132799',
|
# has two 9c9media ContentPackages, the web player selects the first ContentPackage
|
||||||
|
'url': 'https://www.rds.ca/videos/Hockey/NationalHockeyLeague/teams/9/forum-du-5-a-7-jesperi-kotkaniemi-de-retour-de-finlande-3.1377606',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '604333',
|
'id': '2083309',
|
||||||
'display_id': 'fowler-jr-prend-la-direction-de-jacksonville',
|
'display_id': 'forum-du-5-a-7-jesperi-kotkaniemi-de-retour-de-finlande',
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'title': 'Fowler Jr. prend la direction de Jacksonville',
|
'title': 'Forum du 5 à 7 : Kotkaniemi de retour de Finlande',
|
||||||
'description': 'Dante Fowler Jr. est le troisième choix du repêchage 2015 de la NFL. ',
|
'description': 'md5:83fa38ecc4a79b19e433433254077f25',
|
||||||
'timestamp': 1430397346,
|
'timestamp': 1606129030,
|
||||||
'upload_date': '20150430',
|
'upload_date': '20201123',
|
||||||
'duration': 154.354,
|
'duration': 773.039,
|
||||||
'age_limit': 0,
|
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.rds.ca/vid%C3%A9os/un-voyage-positif-3.877934',
|
'url': 'http://www.rds.ca/vid%C3%A9os/un-voyage-positif-3.877934',
|
||||||
|
|||||||
@@ -6,11 +6,12 @@ import re
|
|||||||
from .srgssr import SRGSSRIE
|
from .srgssr import SRGSSRIE
|
||||||
from ..compat import compat_str
|
from ..compat import compat_str
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
determine_ext,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
determine_ext,
|
urljoin,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -21,7 +22,7 @@ class RTSIE(SRGSSRIE):
|
|||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'http://www.rts.ch/archives/tv/divers/3449373-les-enfants-terribles.html',
|
'url': 'http://www.rts.ch/archives/tv/divers/3449373-les-enfants-terribles.html',
|
||||||
'md5': 'ff7f8450a90cf58dacb64e29707b4a8e',
|
'md5': '753b877968ad8afaeddccc374d4256a5',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '3449373',
|
'id': '3449373',
|
||||||
'display_id': 'les-enfants-terribles',
|
'display_id': 'les-enfants-terribles',
|
||||||
@@ -35,6 +36,7 @@ class RTSIE(SRGSSRIE):
|
|||||||
'thumbnail': r're:^https?://.*\.image',
|
'thumbnail': r're:^https?://.*\.image',
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
},
|
},
|
||||||
|
'expected_warnings': ['Unable to download f4m manifest', 'Failed to download m3u8 information'],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://www.rts.ch/emissions/passe-moi-les-jumelles/5624067-entre-ciel-et-mer.html',
|
'url': 'http://www.rts.ch/emissions/passe-moi-les-jumelles/5624067-entre-ciel-et-mer.html',
|
||||||
@@ -63,11 +65,12 @@ class RTSIE(SRGSSRIE):
|
|||||||
# m3u8 download
|
# m3u8 download
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
'expected_warnings': ['Unable to download f4m manifest', 'Failed to download m3u8 information'],
|
||||||
'skip': 'Blocked outside Switzerland',
|
'skip': 'Blocked outside Switzerland',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://www.rts.ch/video/info/journal-continu/5745356-londres-cachee-par-un-epais-smog.html',
|
'url': 'http://www.rts.ch/video/info/journal-continu/5745356-londres-cachee-par-un-epais-smog.html',
|
||||||
'md5': '1bae984fe7b1f78e94abc74e802ed99f',
|
'md5': '9bb06503773c07ce83d3cbd793cebb91',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '5745356',
|
'id': '5745356',
|
||||||
'display_id': 'londres-cachee-par-un-epais-smog',
|
'display_id': 'londres-cachee-par-un-epais-smog',
|
||||||
@@ -81,6 +84,7 @@ class RTSIE(SRGSSRIE):
|
|||||||
'thumbnail': r're:^https?://.*\.image',
|
'thumbnail': r're:^https?://.*\.image',
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
},
|
},
|
||||||
|
'expected_warnings': ['Unable to download f4m manifest', 'Failed to download m3u8 information'],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://www.rts.ch/audio/couleur3/programmes/la-belle-video-de-stephane-laurenceau/5706148-urban-hippie-de-damien-krisl-03-04-2014.html',
|
'url': 'http://www.rts.ch/audio/couleur3/programmes/la-belle-video-de-stephane-laurenceau/5706148-urban-hippie-de-damien-krisl-03-04-2014.html',
|
||||||
@@ -160,7 +164,7 @@ class RTSIE(SRGSSRIE):
|
|||||||
media_type = 'video' if 'video' in all_info else 'audio'
|
media_type = 'video' if 'video' in all_info else 'audio'
|
||||||
|
|
||||||
# check for errors
|
# check for errors
|
||||||
self.get_media_data('rts', media_type, media_id)
|
self._get_media_data('rts', media_type, media_id)
|
||||||
|
|
||||||
info = all_info['video']['JSONinfo'] if 'video' in all_info else all_info['audio']
|
info = all_info['video']['JSONinfo'] if 'video' in all_info else all_info['audio']
|
||||||
|
|
||||||
@@ -194,6 +198,7 @@ class RTSIE(SRGSSRIE):
|
|||||||
'tbr': extract_bitrate(format_url),
|
'tbr': extract_bitrate(format_url),
|
||||||
})
|
})
|
||||||
|
|
||||||
|
download_base = 'http://rtsww%s-d.rts.ch/' % ('-a' if media_type == 'audio' else '')
|
||||||
for media in info.get('media', []):
|
for media in info.get('media', []):
|
||||||
media_url = media.get('url')
|
media_url = media.get('url')
|
||||||
if not media_url or re.match(r'https?://', media_url):
|
if not media_url or re.match(r'https?://', media_url):
|
||||||
@@ -205,7 +210,7 @@ class RTSIE(SRGSSRIE):
|
|||||||
format_id += '-%dk' % rate
|
format_id += '-%dk' % rate
|
||||||
formats.append({
|
formats.append({
|
||||||
'format_id': format_id,
|
'format_id': format_id,
|
||||||
'url': 'http://download-video.rts.ch/' + media_url,
|
'url': urljoin(download_base, media_url),
|
||||||
'tbr': rate or extract_bitrate(media_url),
|
'tbr': rate or extract_bitrate(media_url),
|
||||||
})
|
})
|
||||||
|
|
||||||
|
|||||||
@@ -2,8 +2,9 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import base64
|
import base64
|
||||||
|
import io
|
||||||
import re
|
import re
|
||||||
import time
|
import sys
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
@@ -14,56 +15,13 @@ from ..utils import (
|
|||||||
determine_ext,
|
determine_ext,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
|
qualities,
|
||||||
remove_end,
|
remove_end,
|
||||||
remove_start,
|
remove_start,
|
||||||
sanitized_Request,
|
|
||||||
std_headers,
|
std_headers,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
_bytes_to_chr = (lambda x: x) if sys.version_info[0] == 2 else (lambda x: map(chr, x))
|
||||||
def _decrypt_url(png):
|
|
||||||
encrypted_data = compat_b64decode(png)
|
|
||||||
text_index = encrypted_data.find(b'tEXt')
|
|
||||||
text_chunk = encrypted_data[text_index - 4:]
|
|
||||||
length = compat_struct_unpack('!I', text_chunk[:4])[0]
|
|
||||||
# Use bytearray to get integers when iterating in both python 2.x and 3.x
|
|
||||||
data = bytearray(text_chunk[8:8 + length])
|
|
||||||
data = [chr(b) for b in data if b != 0]
|
|
||||||
hash_index = data.index('#')
|
|
||||||
alphabet_data = data[:hash_index]
|
|
||||||
url_data = data[hash_index + 1:]
|
|
||||||
if url_data[0] == 'H' and url_data[3] == '%':
|
|
||||||
# remove useless HQ%% at the start
|
|
||||||
url_data = url_data[4:]
|
|
||||||
|
|
||||||
alphabet = []
|
|
||||||
e = 0
|
|
||||||
d = 0
|
|
||||||
for l in alphabet_data:
|
|
||||||
if d == 0:
|
|
||||||
alphabet.append(l)
|
|
||||||
d = e = (e + 1) % 4
|
|
||||||
else:
|
|
||||||
d -= 1
|
|
||||||
url = ''
|
|
||||||
f = 0
|
|
||||||
e = 3
|
|
||||||
b = 1
|
|
||||||
for letter in url_data:
|
|
||||||
if f == 0:
|
|
||||||
l = int(letter) * 10
|
|
||||||
f = 1
|
|
||||||
else:
|
|
||||||
if e == 0:
|
|
||||||
l += int(letter)
|
|
||||||
url += alphabet[l]
|
|
||||||
e = (b + 3) % 4
|
|
||||||
f = 0
|
|
||||||
b += 1
|
|
||||||
else:
|
|
||||||
e -= 1
|
|
||||||
|
|
||||||
return url
|
|
||||||
|
|
||||||
|
|
||||||
class RTVEALaCartaIE(InfoExtractor):
|
class RTVEALaCartaIE(InfoExtractor):
|
||||||
@@ -79,28 +37,31 @@ class RTVEALaCartaIE(InfoExtractor):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Balonmano - Swiss Cup masculina. Final: España-Suecia',
|
'title': 'Balonmano - Swiss Cup masculina. Final: España-Suecia',
|
||||||
'duration': 5024.566,
|
'duration': 5024.566,
|
||||||
|
'series': 'Balonmano',
|
||||||
},
|
},
|
||||||
|
'expected_warnings': ['Failed to download MPD manifest', 'Failed to download m3u8 information'],
|
||||||
}, {
|
}, {
|
||||||
'note': 'Live stream',
|
'note': 'Live stream',
|
||||||
'url': 'http://www.rtve.es/alacarta/videos/television/24h-live/1694255/',
|
'url': 'http://www.rtve.es/alacarta/videos/television/24h-live/1694255/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1694255',
|
'id': '1694255',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'title': 'TODO',
|
'title': 're:^24H LIVE [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||||
|
'is_live': True,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': 'live stream',
|
||||||
},
|
},
|
||||||
'skip': 'The f4m manifest can\'t be used yet',
|
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.rtve.es/alacarta/videos/servir-y-proteger/servir-proteger-capitulo-104/4236788/',
|
'url': 'http://www.rtve.es/alacarta/videos/servir-y-proteger/servir-proteger-capitulo-104/4236788/',
|
||||||
'md5': 'e55e162379ad587e9640eda4f7353c0f',
|
'md5': 'd850f3c8731ea53952ebab489cf81cbf',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '4236788',
|
'id': '4236788',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Servir y proteger - Capítulo 104',
|
'title': 'Servir y proteger - Capítulo 104',
|
||||||
'duration': 3222.0,
|
'duration': 3222.0,
|
||||||
},
|
},
|
||||||
'params': {
|
'expected_warnings': ['Failed to download MPD manifest', 'Failed to download m3u8 information'],
|
||||||
'skip_download': True, # requires ffmpeg
|
|
||||||
},
|
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.rtve.es/m/alacarta/videos/cuentame-como-paso/cuentame-como-paso-t16-ultimo-minuto-nuestra-vida-capitulo-276/2969138/?media=tve',
|
'url': 'http://www.rtve.es/m/alacarta/videos/cuentame-como-paso/cuentame-como-paso-t16-ultimo-minuto-nuestra-vida-capitulo-276/2969138/?media=tve',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@@ -111,58 +72,102 @@ class RTVEALaCartaIE(InfoExtractor):
|
|||||||
|
|
||||||
def _real_initialize(self):
|
def _real_initialize(self):
|
||||||
user_agent_b64 = base64.b64encode(std_headers['User-Agent'].encode('utf-8')).decode('utf-8')
|
user_agent_b64 = base64.b64encode(std_headers['User-Agent'].encode('utf-8')).decode('utf-8')
|
||||||
manager_info = self._download_json(
|
self._manager = self._download_json(
|
||||||
'http://www.rtve.es/odin/loki/' + user_agent_b64,
|
'http://www.rtve.es/odin/loki/' + user_agent_b64,
|
||||||
None, 'Fetching manager info')
|
None, 'Fetching manager info')['manager']
|
||||||
self._manager = manager_info['manager']
|
|
||||||
|
@staticmethod
|
||||||
|
def _decrypt_url(png):
|
||||||
|
encrypted_data = io.BytesIO(compat_b64decode(png)[8:])
|
||||||
|
while True:
|
||||||
|
length = compat_struct_unpack('!I', encrypted_data.read(4))[0]
|
||||||
|
chunk_type = encrypted_data.read(4)
|
||||||
|
if chunk_type == b'IEND':
|
||||||
|
break
|
||||||
|
data = encrypted_data.read(length)
|
||||||
|
if chunk_type == b'tEXt':
|
||||||
|
alphabet_data, text = data.split(b'\0')
|
||||||
|
quality, url_data = text.split(b'%%')
|
||||||
|
alphabet = []
|
||||||
|
e = 0
|
||||||
|
d = 0
|
||||||
|
for l in _bytes_to_chr(alphabet_data):
|
||||||
|
if d == 0:
|
||||||
|
alphabet.append(l)
|
||||||
|
d = e = (e + 1) % 4
|
||||||
|
else:
|
||||||
|
d -= 1
|
||||||
|
url = ''
|
||||||
|
f = 0
|
||||||
|
e = 3
|
||||||
|
b = 1
|
||||||
|
for letter in _bytes_to_chr(url_data):
|
||||||
|
if f == 0:
|
||||||
|
l = int(letter) * 10
|
||||||
|
f = 1
|
||||||
|
else:
|
||||||
|
if e == 0:
|
||||||
|
l += int(letter)
|
||||||
|
url += alphabet[l]
|
||||||
|
e = (b + 3) % 4
|
||||||
|
f = 0
|
||||||
|
b += 1
|
||||||
|
else:
|
||||||
|
e -= 1
|
||||||
|
|
||||||
|
yield quality.decode(), url
|
||||||
|
encrypted_data.read(4) # CRC
|
||||||
|
|
||||||
|
def _extract_png_formats(self, video_id):
|
||||||
|
png = self._download_webpage(
|
||||||
|
'http://www.rtve.es/ztnr/movil/thumbnail/%s/videos/%s.png' % (self._manager, video_id),
|
||||||
|
video_id, 'Downloading url information', query={'q': 'v2'})
|
||||||
|
q = qualities(['Media', 'Alta', 'HQ', 'HD_READY', 'HD_FULL'])
|
||||||
|
formats = []
|
||||||
|
for quality, video_url in self._decrypt_url(png):
|
||||||
|
ext = determine_ext(video_url)
|
||||||
|
if ext == 'm3u8':
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
video_url, video_id, 'mp4', 'm3u8_native',
|
||||||
|
m3u8_id='hls', fatal=False))
|
||||||
|
elif ext == 'mpd':
|
||||||
|
formats.extend(self._extract_mpd_formats(
|
||||||
|
video_url, video_id, 'dash', fatal=False))
|
||||||
|
else:
|
||||||
|
formats.append({
|
||||||
|
'format_id': quality,
|
||||||
|
'quality': q(quality),
|
||||||
|
'url': video_url,
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
return formats
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group('id')
|
|
||||||
info = self._download_json(
|
info = self._download_json(
|
||||||
'http://www.rtve.es/api/videos/%s/config/alacarta_videos.json' % video_id,
|
'http://www.rtve.es/api/videos/%s/config/alacarta_videos.json' % video_id,
|
||||||
video_id)['page']['items'][0]
|
video_id)['page']['items'][0]
|
||||||
if info['state'] == 'DESPU':
|
if info['state'] == 'DESPU':
|
||||||
raise ExtractorError('The video is no longer available', expected=True)
|
raise ExtractorError('The video is no longer available', expected=True)
|
||||||
title = info['title']
|
title = info['title'].strip()
|
||||||
png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/%s/videos/%s.png' % (self._manager, video_id)
|
formats = self._extract_png_formats(video_id)
|
||||||
png_request = sanitized_Request(png_url)
|
|
||||||
png_request.add_header('Referer', url)
|
|
||||||
png = self._download_webpage(png_request, video_id, 'Downloading url information')
|
|
||||||
video_url = _decrypt_url(png)
|
|
||||||
ext = determine_ext(video_url)
|
|
||||||
|
|
||||||
formats = []
|
|
||||||
if not video_url.endswith('.f4m') and ext != 'm3u8':
|
|
||||||
if '?' not in video_url:
|
|
||||||
video_url = video_url.replace('resources/', 'auth/resources/')
|
|
||||||
video_url = video_url.replace('.net.rtve', '.multimedia.cdn.rtve')
|
|
||||||
|
|
||||||
if ext == 'm3u8':
|
|
||||||
formats.extend(self._extract_m3u8_formats(
|
|
||||||
video_url, video_id, ext='mp4', entry_protocol='m3u8_native',
|
|
||||||
m3u8_id='hls', fatal=False))
|
|
||||||
elif ext == 'f4m':
|
|
||||||
formats.extend(self._extract_f4m_formats(
|
|
||||||
video_url, video_id, f4m_id='hds', fatal=False))
|
|
||||||
else:
|
|
||||||
formats.append({
|
|
||||||
'url': video_url,
|
|
||||||
})
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
subtitles = None
|
subtitles = None
|
||||||
if info.get('sbtFile') is not None:
|
sbt_file = info.get('sbtFile')
|
||||||
subtitles = self.extract_subtitles(video_id, info['sbtFile'])
|
if sbt_file:
|
||||||
|
subtitles = self.extract_subtitles(video_id, sbt_file)
|
||||||
|
|
||||||
|
is_live = info.get('live') is True
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': self._live_title(title) if is_live else title,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'thumbnail': info.get('image'),
|
'thumbnail': info.get('image'),
|
||||||
'page_url': url,
|
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
'duration': float_or_none(info.get('duration'), scale=1000),
|
'duration': float_or_none(info.get('duration'), 1000),
|
||||||
|
'is_live': is_live,
|
||||||
|
'series': info.get('programTitle'),
|
||||||
}
|
}
|
||||||
|
|
||||||
def _get_subtitles(self, video_id, sub_file):
|
def _get_subtitles(self, video_id, sub_file):
|
||||||
@@ -174,48 +179,26 @@ class RTVEALaCartaIE(InfoExtractor):
|
|||||||
for s in subs)
|
for s in subs)
|
||||||
|
|
||||||
|
|
||||||
class RTVEInfantilIE(InfoExtractor):
|
class RTVEInfantilIE(RTVEALaCartaIE):
|
||||||
IE_NAME = 'rtve.es:infantil'
|
IE_NAME = 'rtve.es:infantil'
|
||||||
IE_DESC = 'RTVE infantil'
|
IE_DESC = 'RTVE infantil'
|
||||||
_VALID_URL = r'https?://(?:www\.)?rtve\.es/infantil/serie/(?P<show>[^/]*)/video/(?P<short_title>[^/]*)/(?P<id>[0-9]+)/'
|
_VALID_URL = r'https?://(?:www\.)?rtve\.es/infantil/serie/[^/]+/video/[^/]+/(?P<id>[0-9]+)/'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.rtve.es/infantil/serie/cleo/video/maneras-vivir/3040283/',
|
'url': 'http://www.rtve.es/infantil/serie/cleo/video/maneras-vivir/3040283/',
|
||||||
'md5': '915319587b33720b8e0357caaa6617e6',
|
'md5': '5747454717aedf9f9fdf212d1bcfc48d',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '3040283',
|
'id': '3040283',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Maneras de vivir',
|
'title': 'Maneras de vivir',
|
||||||
'thumbnail': 'http://www.rtve.es/resources/jpg/6/5/1426182947956.JPG',
|
'thumbnail': r're:https?://.+/1426182947956\.JPG',
|
||||||
'duration': 357.958,
|
'duration': 357.958,
|
||||||
},
|
},
|
||||||
|
'expected_warnings': ['Failed to download MPD manifest', 'Failed to download m3u8 information'],
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
video_id = self._match_id(url)
|
|
||||||
info = self._download_json(
|
|
||||||
'http://www.rtve.es/api/videos/%s/config/alacarta_videos.json' % video_id,
|
|
||||||
video_id)['page']['items'][0]
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
class RTVELiveIE(RTVEALaCartaIE):
|
||||||
vidplayer_id = self._search_regex(
|
|
||||||
r' id="vidplayer([0-9]+)"', webpage, 'internal video ID')
|
|
||||||
|
|
||||||
png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/default/videos/%s.png' % vidplayer_id
|
|
||||||
png = self._download_webpage(png_url, video_id, 'Downloading url information')
|
|
||||||
video_url = _decrypt_url(png)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': info['title'],
|
|
||||||
'url': video_url,
|
|
||||||
'thumbnail': info.get('image'),
|
|
||||||
'duration': float_or_none(info.get('duration'), scale=1000),
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class RTVELiveIE(InfoExtractor):
|
|
||||||
IE_NAME = 'rtve.es:live'
|
IE_NAME = 'rtve.es:live'
|
||||||
IE_DESC = 'RTVE.es live streams'
|
IE_DESC = 'RTVE.es live streams'
|
||||||
_VALID_URL = r'https?://(?:www\.)?rtve\.es/directo/(?P<id>[a-zA-Z0-9-]+)'
|
_VALID_URL = r'https?://(?:www\.)?rtve\.es/directo/(?P<id>[a-zA-Z0-9-]+)'
|
||||||
@@ -225,7 +208,7 @@ class RTVELiveIE(InfoExtractor):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'la-1',
|
'id': 'la-1',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 're:^La 1 [0-9]{4}-[0-9]{2}-[0-9]{2}Z[0-9]{6}$',
|
'title': 're:^La 1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': 'live stream',
|
'skip_download': 'live stream',
|
||||||
@@ -234,29 +217,22 @@ class RTVELiveIE(InfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
start_time = time.gmtime()
|
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
title = remove_end(self._og_search_title(webpage), ' en directo en RTVE.es')
|
title = remove_end(self._og_search_title(webpage), ' en directo en RTVE.es')
|
||||||
title = remove_start(title, 'Estoy viendo ')
|
title = remove_start(title, 'Estoy viendo ')
|
||||||
title += ' ' + time.strftime('%Y-%m-%dZ%H%M%S', start_time)
|
|
||||||
|
|
||||||
vidplayer_id = self._search_regex(
|
vidplayer_id = self._search_regex(
|
||||||
(r'playerId=player([0-9]+)',
|
(r'playerId=player([0-9]+)',
|
||||||
r'class=["\'].*?\blive_mod\b.*?["\'][^>]+data-assetid=["\'](\d+)',
|
r'class=["\'].*?\blive_mod\b.*?["\'][^>]+data-assetid=["\'](\d+)',
|
||||||
r'data-id=["\'](\d+)'),
|
r'data-id=["\'](\d+)'),
|
||||||
webpage, 'internal video ID')
|
webpage, 'internal video ID')
|
||||||
png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/amonet/videos/%s.png' % vidplayer_id
|
|
||||||
png = self._download_webpage(png_url, video_id, 'Downloading url information')
|
|
||||||
m3u8_url = _decrypt_url(png)
|
|
||||||
formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4')
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': self._live_title(title),
|
||||||
'formats': formats,
|
'formats': self._extract_png_formats(vidplayer_id),
|
||||||
'is_live': True,
|
'is_live': True,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ from ..utils import (
|
|||||||
|
|
||||||
class SBSIE(InfoExtractor):
|
class SBSIE(InfoExtractor):
|
||||||
IE_DESC = 'sbs.com.au'
|
IE_DESC = 'sbs.com.au'
|
||||||
_VALID_URL = r'https?://(?:www\.)?sbs\.com\.au/(?:ondemand(?:/video/(?:single/)?|.*?\bplay=)|news/(?:embeds/)?video/)(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://(?:www\.)?sbs\.com\.au/(?:ondemand(?:/video/(?:single/)?|.*?\bplay=|/watch/)|news/(?:embeds/)?video/)(?P<id>[0-9]+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# Original URL is handled by the generic IE which finds the iframe:
|
# Original URL is handled by the generic IE which finds the iframe:
|
||||||
@@ -43,6 +43,9 @@ class SBSIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'https://www.sbs.com.au/news/embeds/video/1840778819866',
|
'url': 'https://www.sbs.com.au/news/embeds/video/1840778819866',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.sbs.com.au/ondemand/watch/1698704451971',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
|||||||
@@ -2,12 +2,18 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import js_to_json
|
from ..utils import (
|
||||||
|
get_element_by_class,
|
||||||
|
int_or_none,
|
||||||
|
remove_start,
|
||||||
|
strip_or_none,
|
||||||
|
unified_strdate,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class ScreencastOMaticIE(InfoExtractor):
|
class ScreencastOMaticIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://screencast-o-matic\.com/watch/(?P<id>[0-9a-zA-Z]+)'
|
_VALID_URL = r'https?://screencast-o-matic\.com/(?:(?:watch|player)/|embed\?.*?\bsc=)(?P<id>[0-9a-zA-Z]+)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://screencast-o-matic.com/watch/c2lD3BeOPl',
|
'url': 'http://screencast-o-matic.com/watch/c2lD3BeOPl',
|
||||||
'md5': '483583cb80d92588f15ccbedd90f0c18',
|
'md5': '483583cb80d92588f15ccbedd90f0c18',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@@ -16,22 +22,30 @@ class ScreencastOMaticIE(InfoExtractor):
|
|||||||
'title': 'Welcome to 3-4 Philosophy @ DECV!',
|
'title': 'Welcome to 3-4 Philosophy @ DECV!',
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
'description': 'as the title says! also: some general info re 1) VCE philosophy and 2) distance learning.',
|
'description': 'as the title says! also: some general info re 1) VCE philosophy and 2) distance learning.',
|
||||||
'duration': 369.163,
|
'duration': 369,
|
||||||
}
|
'upload_date': '20141216',
|
||||||
}
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'http://screencast-o-matic.com/player/c2lD3BeOPl',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://screencast-o-matic.com/embed?ff=true&sc=cbV2r4Q5TL&fromPH=true&a=1',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(
|
||||||
|
'https://screencast-o-matic.com/player/' + video_id, video_id)
|
||||||
jwplayer_data = self._parse_json(
|
info = self._parse_html5_media_entries(url, webpage, video_id)[0]
|
||||||
self._search_regex(
|
info.update({
|
||||||
r"(?s)jwplayer\('mp4Player'\).setup\((\{.*?\})\);", webpage, 'setup code'),
|
'id': video_id,
|
||||||
video_id, transform_source=js_to_json)
|
'title': get_element_by_class('overlayTitle', webpage),
|
||||||
|
'description': strip_or_none(get_element_by_class('overlayDescription', webpage)) or None,
|
||||||
info_dict = self._parse_jwplayer_data(jwplayer_data, video_id, require_title=False)
|
'duration': int_or_none(self._search_regex(
|
||||||
info_dict.update({
|
r'player\.duration\s*=\s*function\(\)\s*{\s*return\s+(\d+);\s*};',
|
||||||
'title': self._og_search_title(webpage),
|
webpage, 'duration', default=None)),
|
||||||
'description': self._og_search_description(webpage),
|
'upload_date': unified_strdate(remove_start(
|
||||||
|
get_element_by_class('overlayPublished', webpage), 'Published: ')),
|
||||||
})
|
})
|
||||||
return info_dict
|
return info
|
||||||
|
|||||||
@@ -51,13 +51,16 @@ class ShahidIE(ShahidBaseIE):
|
|||||||
_NETRC_MACHINE = 'shahid'
|
_NETRC_MACHINE = 'shahid'
|
||||||
_VALID_URL = r'https?://shahid\.mbc\.net/ar/(?:serie|show|movie)s/[^/]+/(?P<type>episode|clip|movie)-(?P<id>\d+)'
|
_VALID_URL = r'https?://shahid\.mbc\.net/ar/(?:serie|show|movie)s/[^/]+/(?P<type>episode|clip|movie)-(?P<id>\d+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://shahid.mbc.net/ar/shows/%D9%85%D8%AC%D9%84%D8%B3-%D8%A7%D9%84%D8%B4%D8%A8%D8%A7%D8%A8-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D9%83%D9%84%D9%8A%D8%A8-1/clip-275286',
|
'url': 'https://shahid.mbc.net/ar/shows/%D9%85%D8%AA%D8%AD%D9%81-%D8%A7%D9%84%D8%AF%D8%AD%D9%8A%D8%AD-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D9%83%D9%84%D9%8A%D8%A8-1/clip-816924',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '275286',
|
'id': '816924',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'مجلس الشباب الموسم 1 كليب 1',
|
'title': 'متحف الدحيح الموسم 1 كليب 1',
|
||||||
'timestamp': 1506988800,
|
'timestamp': 1602806400,
|
||||||
'upload_date': '20171003',
|
'upload_date': '20201016',
|
||||||
|
'description': 'برومو',
|
||||||
|
'duration': 22,
|
||||||
|
'categories': ['كوميديا'],
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
# m3u8 download
|
# m3u8 download
|
||||||
@@ -109,12 +112,15 @@ class ShahidIE(ShahidBaseIE):
|
|||||||
page_type = 'episode'
|
page_type = 'episode'
|
||||||
|
|
||||||
playout = self._call_api(
|
playout = self._call_api(
|
||||||
'playout/url/' + video_id, video_id)['playout']
|
'playout/new/url/' + video_id, video_id)['playout']
|
||||||
|
|
||||||
if not self._downloader.params.get('allow_unplayable_formats') and playout.get('drm'):
|
if not self._downloader.params.get('allow_unplayable_formats') and playout.get('drm'):
|
||||||
raise ExtractorError('This video is DRM protected.', expected=True)
|
raise ExtractorError('This video is DRM protected.', expected=True)
|
||||||
|
|
||||||
formats = self._extract_m3u8_formats(playout['url'], video_id, 'mp4')
|
formats = self._extract_m3u8_formats(re.sub(
|
||||||
|
# https://docs.aws.amazon.com/mediapackage/latest/ug/manifest-filtering.html
|
||||||
|
r'aws\.manifestfilter=[\w:;,-]+&?',
|
||||||
|
'', playout['url']), video_id, 'mp4')
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
# video = self._call_api(
|
# video = self._call_api(
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user