mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-12-18 03:42:23 +01:00
Compare commits
200 Commits
2021.12.27
...
2022.02.03
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
3cea9ec2eb | ||
|
|
28469edd7d | ||
|
|
d5a398988b | ||
|
|
455a15e2dc | ||
|
|
460a1c08b9 | ||
|
|
4918522735 | ||
|
|
65662dffb1 | ||
|
|
5e51f4a8ad | ||
|
|
54bb39065c | ||
|
|
c5332d7fbb | ||
|
|
35cd4c4d88 | ||
|
|
67fb99f193 | ||
|
|
85553414ae | ||
|
|
d16df59db5 | ||
|
|
63c3ee4f63 | ||
|
|
182bda88e8 | ||
|
|
16aa9ea41d | ||
|
|
d6bc443bde | ||
|
|
046cab3915 | ||
|
|
7df07a3b55 | ||
|
|
2d49720f89 | ||
|
|
48416bc4a8 | ||
|
|
6a0546e313 | ||
|
|
dbcea0585f | ||
|
|
f7d4854131 | ||
|
|
403be2eefb | ||
|
|
63bac931c2 | ||
|
|
7c74a01584 | ||
|
|
1d3586d0d5 | ||
|
|
c533c89ce1 | ||
|
|
b8b3f4562a | ||
|
|
1c6f480160 | ||
|
|
f8580bf02f | ||
|
|
19afd9ea51 | ||
|
|
b72270d27e | ||
|
|
706dfe441b | ||
|
|
c4da5ff971 | ||
|
|
e26f9cc1e5 | ||
|
|
fa8fd95118 | ||
|
|
05b23b4156 | ||
|
|
8f028b5f40 | ||
|
|
013322a95e | ||
|
|
fb62afd6f0 | ||
|
|
50600e833d | ||
|
|
fc08bdd6ab | ||
|
|
2568d41f70 | ||
|
|
88f23a18e0 | ||
|
|
bb66c24797 | ||
|
|
2edb38e8ca | ||
|
|
af6793f804 | ||
|
|
b695e3f9bd | ||
|
|
6a5a30f9e2 | ||
|
|
d37707bda4 | ||
|
|
f40ee5e9a0 | ||
|
|
1f13021eca | ||
|
|
e612f66c7c | ||
|
|
87e8e8a7d0 | ||
|
|
e600a5c908 | ||
|
|
50ce204cc2 | ||
|
|
144a3588b4 | ||
|
|
ed40877833 | ||
|
|
935f5a4209 | ||
|
|
6970b6005e | ||
|
|
fc5fa964c7 | ||
|
|
e0ddbd02bd | ||
|
|
0bfc53d05c | ||
|
|
78ab4f447c | ||
|
|
85fee22152 | ||
|
|
ad9158d5f4 | ||
|
|
f81c62a6a4 | ||
|
|
6c73052c0a | ||
|
|
593e43c030 | ||
|
|
8fe514d382 | ||
|
|
b1156c1e59 | ||
|
|
311b6615d8 | ||
|
|
396a76f7bf | ||
|
|
301d07fc4b | ||
|
|
d14cbdd92d | ||
|
|
19b4c74d40 | ||
|
|
135dfa2c7e | ||
|
|
e0585e6562 | ||
|
|
426764371f | ||
|
|
64f36541c9 | ||
|
|
0ff1e0fba3 | ||
|
|
1a20d29552 | ||
|
|
f7085283e1 | ||
|
|
e25ca9b017 | ||
|
|
4259402c56 | ||
|
|
dfb7f2a25d | ||
|
|
42c5458a02 | ||
|
|
ba1c671d2e | ||
|
|
b143e83ec9 | ||
|
|
4a77fb1d6b | ||
|
|
66f7c6a3e0 | ||
|
|
baf599effa | ||
|
|
8bd1c00bf3 | ||
|
|
596379e260 | ||
|
|
b6ce9bb038 | ||
|
|
eea1b0358e | ||
|
|
32b95bb643 | ||
|
|
fdf80059d9 | ||
|
|
aa062713c1 | ||
|
|
71738b1451 | ||
|
|
0bb5ac1ac4 | ||
|
|
77b28f000a | ||
|
|
d57576b9d9 | ||
|
|
11c861702d | ||
|
|
a4a426023d | ||
|
|
3b603dbdf1 | ||
|
|
5df1ac92bd | ||
|
|
b2db8102dc | ||
|
|
e9a6a65a55 | ||
|
|
ed8d87f911 | ||
|
|
397235c52b | ||
|
|
4636548463 | ||
|
|
cb3c5682ae | ||
|
|
7d449fff53 | ||
|
|
80fa6e5327 | ||
|
|
fabb27fcea | ||
|
|
e04938ab88 | ||
|
|
8bcd404818 | ||
|
|
0df11dafdd | ||
|
|
dc5f409cdc | ||
|
|
99d6f9461d | ||
|
|
8130779db6 | ||
|
|
ed5835b451 | ||
|
|
e88e1febd8 | ||
|
|
faca674510 | ||
|
|
0931ba94ab | ||
|
|
b31874334d | ||
|
|
f1150b9e1e | ||
|
|
d6579d532b | ||
|
|
2be56f2242 | ||
|
|
f95a7b93e6 | ||
|
|
62c955efc9 | ||
|
|
0254f16274 | ||
|
|
a70b71e85a | ||
|
|
4c968755fc | ||
|
|
be1f331f21 | ||
|
|
3cf5429a21 | ||
|
|
bfa0e270cf | ||
|
|
f76ca2dd56 | ||
|
|
5f969a78b0 | ||
|
|
443f8de820 | ||
|
|
768145d48a | ||
|
|
976ae3eabb | ||
|
|
f0d785d3ed | ||
|
|
97a6b117d9 | ||
|
|
6f32a0b5b7 | ||
|
|
e8736539f3 | ||
|
|
9c634ef857 | ||
|
|
9f517bb1f3 | ||
|
|
b8eeced286 | ||
|
|
db47787024 | ||
|
|
fdeab99eab | ||
|
|
9e907ebddf | ||
|
|
21df2117e4 | ||
|
|
06e57990f7 | ||
|
|
b62fa6d75f | ||
|
|
be72c62480 | ||
|
|
61e9d9268c | ||
|
|
a13e684813 | ||
|
|
f46e2f9d92 | ||
|
|
9c906919ae | ||
|
|
6020e05d23 | ||
|
|
ebed8b3732 | ||
|
|
1e43a6f733 | ||
|
|
ca30f449a1 | ||
|
|
af3cbd8782 | ||
|
|
7141ced57d | ||
|
|
18c7683d27 | ||
|
|
f5c2c2c9b0 | ||
|
|
8896899216 | ||
|
|
1797b073ed | ||
|
|
4c922dd3fc | ||
|
|
b8e976a445 | ||
|
|
a9f5f5d6eb | ||
|
|
f522573787 | ||
|
|
7592749cbe | ||
|
|
767f999b53 | ||
|
|
8efffafa53 | ||
|
|
26f2aa3db9 | ||
|
|
3464a2727b | ||
|
|
497d77e1aa | ||
|
|
9040e2d6e3 | ||
|
|
6134fbeb65 | ||
|
|
cfcf60ea99 | ||
|
|
4afa3ec4b6 | ||
|
|
11aa91a12f | ||
|
|
abbeeebc4c | ||
|
|
2c539d493a | ||
|
|
042931a507 | ||
|
|
96f13f01a6 | ||
|
|
4b9353239e | ||
|
|
dd5e60b15d | ||
|
|
e540c56f39 | ||
|
|
45d86abeb4 | ||
|
|
f02d24d8d2 | ||
|
|
ceb98323f2 | ||
|
|
7537e35b64 |
12
.github/ISSUE_TEMPLATE/1_broken_site.yml
vendored
12
.github/ISSUE_TEMPLATE/1_broken_site.yml
vendored
@@ -1,4 +1,4 @@
|
||||
name: Broken site support
|
||||
name: Broken site
|
||||
description: Report broken or misfunctioning site
|
||||
labels: [triage, site-bug]
|
||||
body:
|
||||
@@ -11,7 +11,7 @@ body:
|
||||
options:
|
||||
- label: I'm reporting a broken site
|
||||
required: true
|
||||
- label: I've verified that I'm running yt-dlp version **2021.12.27**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update))
|
||||
- label: I've verified that I'm running yt-dlp version **2022.02.03**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update))
|
||||
required: true
|
||||
- label: I've checked that all provided URLs are alive and playable in a browser
|
||||
required: true
|
||||
@@ -44,19 +44,19 @@ body:
|
||||
label: Verbose log
|
||||
description: |
|
||||
Provide the complete verbose output of yt-dlp **that clearly demonstrates the problem**.
|
||||
Add the `-Uv` flag to your command line you run yt-dlp with (`yt-dlp -Uv <your command line>`), copy the WHOLE output and insert it below.
|
||||
Add the `-vU` flag to your command line you run yt-dlp with (`yt-dlp -vU <your command line>`), copy the WHOLE output and insert it below.
|
||||
It should look similar to this:
|
||||
placeholder: |
|
||||
[debug] Command-line config: ['-Uv', 'http://www.youtube.com/watch?v=BaW_jenozKc']
|
||||
[debug] Command-line config: ['-vU', 'http://www.youtube.com/watch?v=BaW_jenozKc']
|
||||
[debug] Portable config file: yt-dlp.conf
|
||||
[debug] Portable config: ['-i']
|
||||
[debug] Encodings: locale cp1252, fs utf-8, stdout utf-8, stderr utf-8, pref cp1252
|
||||
[debug] yt-dlp version 2021.12.27 (exe)
|
||||
[debug] yt-dlp version 2022.02.03 (exe)
|
||||
[debug] Python version 3.8.8 (CPython 64bit) - Windows-10-10.0.19041-SP0
|
||||
[debug] exe versions: ffmpeg 3.0.1, ffprobe 3.0.1
|
||||
[debug] Optional libraries: Cryptodome, keyring, mutagen, sqlite, websockets
|
||||
[debug] Proxy map: {}
|
||||
yt-dlp is up to date (2021.12.27)
|
||||
yt-dlp is up to date (2022.02.03)
|
||||
<more lines>
|
||||
render: shell
|
||||
validations:
|
||||
|
||||
@@ -11,7 +11,7 @@ body:
|
||||
options:
|
||||
- label: I'm reporting a new site support request
|
||||
required: true
|
||||
- label: I've verified that I'm running yt-dlp version **2021.12.27**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update))
|
||||
- label: I've verified that I'm running yt-dlp version **2022.02.03**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update))
|
||||
required: true
|
||||
- label: I've checked that all provided URLs are alive and playable in a browser
|
||||
required: true
|
||||
@@ -55,19 +55,19 @@ body:
|
||||
label: Verbose log
|
||||
description: |
|
||||
Provide the complete verbose output **using one of the example URLs provided above**.
|
||||
Add the `-Uv` flag to your command line you run yt-dlp with (`yt-dlp -Uv <your command line>`), copy the WHOLE output and insert it below.
|
||||
Add the `-vU` flag to your command line you run yt-dlp with (`yt-dlp -vU <your command line>`), copy the WHOLE output and insert it below.
|
||||
It should look similar to this:
|
||||
placeholder: |
|
||||
[debug] Command-line config: ['-Uv', 'http://www.youtube.com/watch?v=BaW_jenozKc']
|
||||
[debug] Command-line config: ['-vU', 'http://www.youtube.com/watch?v=BaW_jenozKc']
|
||||
[debug] Portable config file: yt-dlp.conf
|
||||
[debug] Portable config: ['-i']
|
||||
[debug] Encodings: locale cp1252, fs utf-8, stdout utf-8, stderr utf-8, pref cp1252
|
||||
[debug] yt-dlp version 2021.12.27 (exe)
|
||||
[debug] yt-dlp version 2022.02.03 (exe)
|
||||
[debug] Python version 3.8.8 (CPython 64bit) - Windows-10-10.0.19041-SP0
|
||||
[debug] exe versions: ffmpeg 3.0.1, ffprobe 3.0.1
|
||||
[debug] Optional libraries: Cryptodome, keyring, mutagen, sqlite, websockets
|
||||
[debug] Proxy map: {}
|
||||
yt-dlp is up to date (2021.12.27)
|
||||
yt-dlp is up to date (2022.02.03)
|
||||
<more lines>
|
||||
render: shell
|
||||
validations:
|
||||
|
||||
@@ -11,7 +11,7 @@ body:
|
||||
options:
|
||||
- label: I'm reporting a site feature request
|
||||
required: true
|
||||
- label: I've verified that I'm running yt-dlp version **2021.12.27**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update))
|
||||
- label: I've verified that I'm running yt-dlp version **2022.02.03**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update))
|
||||
required: true
|
||||
- label: I've checked that all provided URLs are alive and playable in a browser
|
||||
required: true
|
||||
@@ -32,7 +32,7 @@ body:
|
||||
label: Example URLs
|
||||
description: |
|
||||
Example URLs that can be used to demonstrate the requested feature
|
||||
value: |
|
||||
placeholder: |
|
||||
https://www.youtube.com/watch?v=BaW_jenozKc
|
||||
validations:
|
||||
required: true
|
||||
@@ -53,19 +53,19 @@ body:
|
||||
label: Verbose log
|
||||
description: |
|
||||
Provide the complete verbose output of yt-dlp that demonstrates the need for the enhancement.
|
||||
Add the `-Uv` flag to your command line you run yt-dlp with (`yt-dlp -Uv <your command line>`), copy the WHOLE output and insert it below.
|
||||
Add the `-vU` flag to your command line you run yt-dlp with (`yt-dlp -vU <your command line>`), copy the WHOLE output and insert it below.
|
||||
It should look similar to this:
|
||||
placeholder: |
|
||||
[debug] Command-line config: ['-Uv', 'http://www.youtube.com/watch?v=BaW_jenozKc']
|
||||
[debug] Command-line config: ['-vU', 'http://www.youtube.com/watch?v=BaW_jenozKc']
|
||||
[debug] Portable config file: yt-dlp.conf
|
||||
[debug] Portable config: ['-i']
|
||||
[debug] Encodings: locale cp1252, fs utf-8, stdout utf-8, stderr utf-8, pref cp1252
|
||||
[debug] yt-dlp version 2021.12.27 (exe)
|
||||
[debug] yt-dlp version 2022.02.03 (exe)
|
||||
[debug] Python version 3.8.8 (CPython 64bit) - Windows-10-10.0.19041-SP0
|
||||
[debug] exe versions: ffmpeg 3.0.1, ffprobe 3.0.1
|
||||
[debug] Optional libraries: Cryptodome, keyring, mutagen, sqlite, websockets
|
||||
[debug] Proxy map: {}
|
||||
yt-dlp is up to date (2021.12.27)
|
||||
yt-dlp is up to date (2022.02.03)
|
||||
<more lines>
|
||||
render: shell
|
||||
validations:
|
||||
|
||||
10
.github/ISSUE_TEMPLATE/4_bug_report.yml
vendored
10
.github/ISSUE_TEMPLATE/4_bug_report.yml
vendored
@@ -11,7 +11,7 @@ body:
|
||||
options:
|
||||
- label: I'm reporting a bug unrelated to a specific site
|
||||
required: true
|
||||
- label: I've verified that I'm running yt-dlp version **2021.12.27**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update))
|
||||
- label: I've verified that I'm running yt-dlp version **2022.02.03**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update))
|
||||
required: true
|
||||
- label: I've checked that all provided URLs are alive and playable in a browser
|
||||
required: true
|
||||
@@ -38,19 +38,19 @@ body:
|
||||
label: Verbose log
|
||||
description: |
|
||||
Provide the complete verbose output of yt-dlp **that clearly demonstrates the problem**.
|
||||
Add the `-Uv` flag to **your** command line you run yt-dlp with (`yt-dlp -Uv <your command line>`), copy the WHOLE output and insert it below.
|
||||
Add the `-vU` flag to **your** command line you run yt-dlp with (`yt-dlp -vU <your command line>`), copy the WHOLE output and insert it below.
|
||||
It should look similar to this:
|
||||
placeholder: |
|
||||
[debug] Command-line config: ['-Uv', 'http://www.youtube.com/watch?v=BaW_jenozKc']
|
||||
[debug] Command-line config: ['-vU', 'http://www.youtube.com/watch?v=BaW_jenozKc']
|
||||
[debug] Portable config file: yt-dlp.conf
|
||||
[debug] Portable config: ['-i']
|
||||
[debug] Encodings: locale cp1252, fs utf-8, stdout utf-8, stderr utf-8, pref cp1252
|
||||
[debug] yt-dlp version 2021.12.27 (exe)
|
||||
[debug] yt-dlp version 2022.02.03 (exe)
|
||||
[debug] Python version 3.8.8 (CPython 64bit) - Windows-10-10.0.19041-SP0
|
||||
[debug] exe versions: ffmpeg 3.0.1, ffprobe 3.0.1
|
||||
[debug] Optional libraries: Cryptodome, keyring, mutagen, sqlite, websockets
|
||||
[debug] Proxy map: {}
|
||||
yt-dlp is up to date (2021.12.27)
|
||||
yt-dlp is up to date (2022.02.03)
|
||||
<more lines>
|
||||
render: shell
|
||||
validations:
|
||||
|
||||
4
.github/ISSUE_TEMPLATE/5_feature_request.yml
vendored
4
.github/ISSUE_TEMPLATE/5_feature_request.yml
vendored
@@ -11,7 +11,9 @@ body:
|
||||
options:
|
||||
- label: I'm reporting a feature request
|
||||
required: true
|
||||
- label: I've verified that I'm running yt-dlp version **2021.12.27**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update))
|
||||
- label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme)
|
||||
required: true
|
||||
- label: I've verified that I'm running yt-dlp version **2022.02.03**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update))
|
||||
required: true
|
||||
- label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues including closed ones. DO NOT post duplicates
|
||||
required: true
|
||||
|
||||
7
.github/ISSUE_TEMPLATE/6_question.yml
vendored
7
.github/ISSUE_TEMPLATE/6_question.yml
vendored
@@ -25,7 +25,8 @@ body:
|
||||
Ask your question in an arbitrary form.
|
||||
Please make sure it's worded well enough to be understood, see [is-the-description-of-the-issue-itself-sufficient](https://github.com/ytdl-org/youtube-dl#is-the-description-of-the-issue-itself-sufficient).
|
||||
Provide any additional information and as much context and examples as possible.
|
||||
If your question contains "isn't working" or "can you add", this is most likely the wrong template
|
||||
If your question contains "isn't working" or "can you add", this is most likely the wrong template.
|
||||
If you are in doubt if this is the right template, use another template!
|
||||
placeholder: WRITE QUESTION HERE
|
||||
validations:
|
||||
required: true
|
||||
@@ -35,10 +36,10 @@ body:
|
||||
label: Verbose log
|
||||
description: |
|
||||
If your question involes a yt-dlp command, provide the complete verbose output of that command.
|
||||
Add the `-Uv` flag to **your** command line you run yt-dlp with (`yt-dlp -Uv <your command line>`), copy the WHOLE output and insert it below.
|
||||
Add the `-vU` flag to **your** command line you run yt-dlp with (`yt-dlp -vU <your command line>`), copy the WHOLE output and insert it below.
|
||||
It should look similar to this:
|
||||
placeholder: |
|
||||
[debug] Command-line config: ['-Uv', 'http://www.youtube.com/watch?v=BaW_jenozKc']
|
||||
[debug] Command-line config: ['-vU', 'http://www.youtube.com/watch?v=BaW_jenozKc']
|
||||
[debug] Portable config file: yt-dlp.conf
|
||||
[debug] Portable config: ['-i']
|
||||
[debug] Encodings: locale cp1252, fs utf-8, stdout utf-8, stderr utf-8, pref cp1252
|
||||
|
||||
3
.github/ISSUE_TEMPLATE/config.yml
vendored
3
.github/ISSUE_TEMPLATE/config.yml
vendored
@@ -3,3 +3,6 @@ contact_links:
|
||||
- name: Get help from the community on Discord
|
||||
url: https://discord.gg/H5MNcFW63r
|
||||
about: Join the yt-dlp Discord for community-powered support!
|
||||
- name: Matrix Bridge to the Discord server
|
||||
url: https://matrix.to/#/#yt-dlp:matrix.org
|
||||
about: For those who do not want to use Discord
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
name: Broken site support
|
||||
name: Broken site
|
||||
description: Report broken or misfunctioning site
|
||||
labels: [triage, site-bug]
|
||||
body:
|
||||
@@ -44,10 +44,10 @@ body:
|
||||
label: Verbose log
|
||||
description: |
|
||||
Provide the complete verbose output of yt-dlp **that clearly demonstrates the problem**.
|
||||
Add the `-Uv` flag to your command line you run yt-dlp with (`yt-dlp -Uv <your command line>`), copy the WHOLE output and insert it below.
|
||||
Add the `-vU` flag to your command line you run yt-dlp with (`yt-dlp -vU <your command line>`), copy the WHOLE output and insert it below.
|
||||
It should look similar to this:
|
||||
placeholder: |
|
||||
[debug] Command-line config: ['-Uv', 'http://www.youtube.com/watch?v=BaW_jenozKc']
|
||||
[debug] Command-line config: ['-vU', 'http://www.youtube.com/watch?v=BaW_jenozKc']
|
||||
[debug] Portable config file: yt-dlp.conf
|
||||
[debug] Portable config: ['-i']
|
||||
[debug] Encodings: locale cp1252, fs utf-8, stdout utf-8, stderr utf-8, pref cp1252
|
||||
|
||||
@@ -55,10 +55,10 @@ body:
|
||||
label: Verbose log
|
||||
description: |
|
||||
Provide the complete verbose output **using one of the example URLs provided above**.
|
||||
Add the `-Uv` flag to your command line you run yt-dlp with (`yt-dlp -Uv <your command line>`), copy the WHOLE output and insert it below.
|
||||
Add the `-vU` flag to your command line you run yt-dlp with (`yt-dlp -vU <your command line>`), copy the WHOLE output and insert it below.
|
||||
It should look similar to this:
|
||||
placeholder: |
|
||||
[debug] Command-line config: ['-Uv', 'http://www.youtube.com/watch?v=BaW_jenozKc']
|
||||
[debug] Command-line config: ['-vU', 'http://www.youtube.com/watch?v=BaW_jenozKc']
|
||||
[debug] Portable config file: yt-dlp.conf
|
||||
[debug] Portable config: ['-i']
|
||||
[debug] Encodings: locale cp1252, fs utf-8, stdout utf-8, stderr utf-8, pref cp1252
|
||||
|
||||
@@ -32,7 +32,7 @@ body:
|
||||
label: Example URLs
|
||||
description: |
|
||||
Example URLs that can be used to demonstrate the requested feature
|
||||
value: |
|
||||
placeholder: |
|
||||
https://www.youtube.com/watch?v=BaW_jenozKc
|
||||
validations:
|
||||
required: true
|
||||
@@ -53,10 +53,10 @@ body:
|
||||
label: Verbose log
|
||||
description: |
|
||||
Provide the complete verbose output of yt-dlp that demonstrates the need for the enhancement.
|
||||
Add the `-Uv` flag to your command line you run yt-dlp with (`yt-dlp -Uv <your command line>`), copy the WHOLE output and insert it below.
|
||||
Add the `-vU` flag to your command line you run yt-dlp with (`yt-dlp -vU <your command line>`), copy the WHOLE output and insert it below.
|
||||
It should look similar to this:
|
||||
placeholder: |
|
||||
[debug] Command-line config: ['-Uv', 'http://www.youtube.com/watch?v=BaW_jenozKc']
|
||||
[debug] Command-line config: ['-vU', 'http://www.youtube.com/watch?v=BaW_jenozKc']
|
||||
[debug] Portable config file: yt-dlp.conf
|
||||
[debug] Portable config: ['-i']
|
||||
[debug] Encodings: locale cp1252, fs utf-8, stdout utf-8, stderr utf-8, pref cp1252
|
||||
|
||||
4
.github/ISSUE_TEMPLATE_tmpl/4_bug_report.yml
vendored
4
.github/ISSUE_TEMPLATE_tmpl/4_bug_report.yml
vendored
@@ -38,10 +38,10 @@ body:
|
||||
label: Verbose log
|
||||
description: |
|
||||
Provide the complete verbose output of yt-dlp **that clearly demonstrates the problem**.
|
||||
Add the `-Uv` flag to **your** command line you run yt-dlp with (`yt-dlp -Uv <your command line>`), copy the WHOLE output and insert it below.
|
||||
Add the `-vU` flag to **your** command line you run yt-dlp with (`yt-dlp -vU <your command line>`), copy the WHOLE output and insert it below.
|
||||
It should look similar to this:
|
||||
placeholder: |
|
||||
[debug] Command-line config: ['-Uv', 'http://www.youtube.com/watch?v=BaW_jenozKc']
|
||||
[debug] Command-line config: ['-vU', 'http://www.youtube.com/watch?v=BaW_jenozKc']
|
||||
[debug] Portable config file: yt-dlp.conf
|
||||
[debug] Portable config: ['-i']
|
||||
[debug] Encodings: locale cp1252, fs utf-8, stdout utf-8, stderr utf-8, pref cp1252
|
||||
|
||||
@@ -11,6 +11,8 @@ body:
|
||||
options:
|
||||
- label: I'm reporting a feature request
|
||||
required: true
|
||||
- label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme)
|
||||
required: true
|
||||
- label: I've verified that I'm running yt-dlp version **%(version)s**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update))
|
||||
required: true
|
||||
- label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues including closed ones. DO NOT post duplicates
|
||||
|
||||
7
.github/ISSUE_TEMPLATE_tmpl/6_question.yml
vendored
7
.github/ISSUE_TEMPLATE_tmpl/6_question.yml
vendored
@@ -25,7 +25,8 @@ body:
|
||||
Ask your question in an arbitrary form.
|
||||
Please make sure it's worded well enough to be understood, see [is-the-description-of-the-issue-itself-sufficient](https://github.com/ytdl-org/youtube-dl#is-the-description-of-the-issue-itself-sufficient).
|
||||
Provide any additional information and as much context and examples as possible.
|
||||
If your question contains "isn't working" or "can you add", this is most likely the wrong template
|
||||
If your question contains "isn't working" or "can you add", this is most likely the wrong template.
|
||||
If you are in doubt if this is the right template, use another template!
|
||||
placeholder: WRITE QUESTION HERE
|
||||
validations:
|
||||
required: true
|
||||
@@ -35,10 +36,10 @@ body:
|
||||
label: Verbose log
|
||||
description: |
|
||||
If your question involes a yt-dlp command, provide the complete verbose output of that command.
|
||||
Add the `-Uv` flag to **your** command line you run yt-dlp with (`yt-dlp -Uv <your command line>`), copy the WHOLE output and insert it below.
|
||||
Add the `-vU` flag to **your** command line you run yt-dlp with (`yt-dlp -vU <your command line>`), copy the WHOLE output and insert it below.
|
||||
It should look similar to this:
|
||||
placeholder: |
|
||||
[debug] Command-line config: ['-Uv', 'http://www.youtube.com/watch?v=BaW_jenozKc']
|
||||
[debug] Command-line config: ['-vU', 'http://www.youtube.com/watch?v=BaW_jenozKc']
|
||||
[debug] Portable config file: yt-dlp.conf
|
||||
[debug] Portable config: ['-i']
|
||||
[debug] Encodings: locale cp1252, fs utf-8, stdout utf-8, stderr utf-8, pref cp1252
|
||||
|
||||
26
.github/workflows/build.yml
vendored
26
.github/workflows/build.yml
vendored
@@ -96,7 +96,7 @@ jobs:
|
||||
env:
|
||||
BREW_TOKEN: ${{ secrets.BREW_TOKEN }}
|
||||
if: "env.BREW_TOKEN != ''"
|
||||
uses: webfactory/ssh-agent@v0.5.3
|
||||
uses: yt-dlp/ssh-agent@v0.5.3
|
||||
with:
|
||||
ssh-private-key: ${{ env.BREW_TOKEN }}
|
||||
- name: Update Homebrew Formulae
|
||||
@@ -165,7 +165,7 @@ jobs:
|
||||
- name: Install Requirements
|
||||
run: |
|
||||
brew install coreutils
|
||||
/usr/bin/python3 -m pip install -U --user pip Pyinstaller==4.5.1 mutagen pycryptodomex websockets
|
||||
/usr/bin/python3 -m pip install -U --user pip Pyinstaller==4.5.1 -r requirements.txt
|
||||
- name: Bump version
|
||||
id: bump_version
|
||||
run: /usr/bin/python3 devscripts/update-version.py
|
||||
@@ -192,11 +192,9 @@ jobs:
|
||||
run: echo "::set-output name=sha512_macos::$(sha512sum dist/yt-dlp_macos | awk '{print $1}')"
|
||||
|
||||
- name: Run PyInstaller Script with --onedir
|
||||
run: /usr/bin/python3 pyinst.py --target-architecture universal2 --onedir
|
||||
- uses: papeloto/action-zip@v1
|
||||
with:
|
||||
files: ./dist/yt-dlp_macos
|
||||
dest: ./dist/yt-dlp_macos.zip
|
||||
run: |
|
||||
/usr/bin/python3 pyinst.py --target-architecture universal2 --onedir
|
||||
zip ./dist/yt-dlp_macos.zip ./dist/yt-dlp_macos
|
||||
- name: Upload yt-dlp MacOS onedir
|
||||
id: upload-release-macos-zip
|
||||
uses: actions/upload-release-asset@v1
|
||||
@@ -210,7 +208,7 @@ jobs:
|
||||
- name: Get SHA2-256SUMS for yt-dlp_macos.zip
|
||||
id: sha256_macos_zip
|
||||
run: echo "::set-output name=sha256_macos_zip::$(sha256sum dist/yt-dlp_macos.zip | awk '{print $1}')"
|
||||
- name: Get SHA2-512SUMS for yt-dlp_macos
|
||||
- name: Get SHA2-512SUMS for yt-dlp_macos.zip
|
||||
id: sha512_macos_zip
|
||||
run: echo "::set-output name=sha512_macos_zip::$(sha512sum dist/yt-dlp_macos.zip | awk '{print $1}')"
|
||||
|
||||
@@ -236,7 +234,7 @@ jobs:
|
||||
# Custom pyinstaller built with https://github.com/yt-dlp/pyinstaller-builds
|
||||
run: |
|
||||
python -m pip install --upgrade pip setuptools wheel py2exe
|
||||
pip install "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-4.5.1-py3-none-any.whl" mutagen pycryptodomex websockets
|
||||
pip install "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-4.5.1-py3-none-any.whl" -r requirements.txt
|
||||
- name: Bump version
|
||||
id: bump_version
|
||||
env:
|
||||
@@ -265,11 +263,9 @@ jobs:
|
||||
run: echo "::set-output name=sha512_win::$((Get-FileHash dist\yt-dlp.exe -Algorithm SHA512).Hash.ToLower())"
|
||||
|
||||
- name: Run PyInstaller Script with --onedir
|
||||
run: python pyinst.py --onedir
|
||||
- uses: papeloto/action-zip@v1
|
||||
with:
|
||||
files: ./dist/yt-dlp
|
||||
dest: ./dist/yt-dlp_win.zip
|
||||
run: |
|
||||
python pyinst.py --onedir
|
||||
Compress-Archive -LiteralPath ./dist/yt-dlp -DestinationPath ./dist/yt-dlp_win.zip
|
||||
- name: Upload yt-dlp Windows onedir
|
||||
id: upload-release-windows-zip
|
||||
uses: actions/upload-release-asset@v1
|
||||
@@ -325,7 +321,7 @@ jobs:
|
||||
- name: Install Requirements
|
||||
run: |
|
||||
python -m pip install --upgrade pip setuptools wheel
|
||||
pip install "https://yt-dlp.github.io/Pyinstaller-Builds/i686/pyinstaller-4.5.1-py3-none-any.whl" mutagen pycryptodomex websockets
|
||||
pip install "https://yt-dlp.github.io/Pyinstaller-Builds/i686/pyinstaller-4.5.1-py3-none-any.whl" -r requirements.txt
|
||||
- name: Bump version
|
||||
id: bump_version
|
||||
env:
|
||||
|
||||
5
.gitignore
vendored
5
.gitignore
vendored
@@ -14,7 +14,10 @@ cookies
|
||||
*.frag.urls
|
||||
*.info.json
|
||||
*.live_chat.json
|
||||
*.meta
|
||||
*.part*
|
||||
*.tmp
|
||||
*.temp
|
||||
*.unknown_video
|
||||
*.ytdl
|
||||
.cache/
|
||||
@@ -89,7 +92,7 @@ README.txt
|
||||
*.tar.gz
|
||||
*.zsh
|
||||
*.spec
|
||||
test/testdata/player-*.js
|
||||
test/testdata/sigs/player-*.js
|
||||
|
||||
# Binary
|
||||
/youtube-dl
|
||||
|
||||
111
CONTRIBUTING.md
111
CONTRIBUTING.md
@@ -19,6 +19,7 @@
|
||||
- [Provide fallbacks](#provide-fallbacks)
|
||||
- [Regular expressions](#regular-expressions)
|
||||
- [Long lines policy](#long-lines-policy)
|
||||
- [Quotes](#quotes)
|
||||
- [Inline values](#inline-values)
|
||||
- [Collapse fallbacks](#collapse-fallbacks)
|
||||
- [Trailing parentheses](#trailing-parentheses)
|
||||
@@ -31,9 +32,9 @@
|
||||
|
||||
Bugs and suggestions should be reported at: [yt-dlp/yt-dlp/issues](https://github.com/yt-dlp/yt-dlp/issues). Unless you were prompted to or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email. For discussions, join us in our [discord server](https://discord.gg/H5MNcFW63r).
|
||||
|
||||
**Please include the full output of yt-dlp when run with `-Uv`**, i.e. **add** `-Uv` flag to **your command line**, copy the **whole** output and post it in the issue body wrapped in \`\`\` for better formatting. It should look similar to this:
|
||||
**Please include the full output of yt-dlp when run with `-vU`**, i.e. **add** `-vU` flag to **your command line**, copy the **whole** output and post it in the issue body wrapped in \`\`\` for better formatting. It should look similar to this:
|
||||
```
|
||||
$ yt-dlp -Uv <your command line>
|
||||
$ yt-dlp -vU <your command line>
|
||||
[debug] Command-line config: ['-v', 'demo.com']
|
||||
[debug] Encodings: locale UTF-8, fs utf-8, out utf-8, pref UTF-8
|
||||
[debug] yt-dlp version 2021.09.25 (zip)
|
||||
@@ -64,7 +65,7 @@ So please elaborate on what feature you are requesting, or what bug you want to
|
||||
|
||||
If your report is shorter than two lines, it is almost certainly missing some of these, which makes it hard for us to respond to it. We're often too polite to close the issue outright, but the missing info makes misinterpretation likely. We often get frustrated by these issues, since the only possible way for us to move forward on them is to ask for clarification over and over.
|
||||
|
||||
For bug reports, this means that your report should contain the **complete** output of yt-dlp when called with the `-Uv` flag. The error message you get for (most) bugs even says so, but you would not believe how many of our bug reports do not contain this information.
|
||||
For bug reports, this means that your report should contain the **complete** output of yt-dlp when called with the `-vU` flag. The error message you get for (most) bugs even says so, but you would not believe how many of our bug reports do not contain this information.
|
||||
|
||||
If the error is `ERROR: Unable to extract ...` and you cannot reproduce it from multiple countries, add `--write-pages` and upload the `.dump` files you get [somewhere](https://gist.github.com).
|
||||
|
||||
@@ -112,7 +113,7 @@ If the issue is with `youtube-dl` (the upstream fork of yt-dlp) and not with yt-
|
||||
|
||||
### Are you willing to share account details if needed?
|
||||
|
||||
The maintainers and potential contributors of the project often do not have an account for the website you are asking support for. So any developer interested in solving your issue may ask you for account details. It is your personal discression whether you are willing to share the account in order for the developer to try and solve your issue. However, if you are unwilling or unable to provide details, they obviously cannot work on the issue and it cannot be solved unless some developer who both has an account and is willing/able to contribute decides to solve it.
|
||||
The maintainers and potential contributors of the project often do not have an account for the website you are asking support for. So any developer interested in solving your issue may ask you for account details. It is your personal discretion whether you are willing to share the account in order for the developer to try and solve your issue. However, if you are unwilling or unable to provide details, they obviously cannot work on the issue and it cannot be solved unless some developer who both has an account and is willing/able to contribute decides to solve it.
|
||||
|
||||
By sharing an account with anyone, you agree to bear all risks associated with it. The maintainers and yt-dlp can't be held responsible for any misuse of the credentials.
|
||||
|
||||
@@ -251,7 +252,11 @@ For extraction to work yt-dlp relies on metadata your extractor extracts and pro
|
||||
- `title` (media title)
|
||||
- `url` (media download URL) or `formats`
|
||||
|
||||
The aforementioned metafields are the critical data that the extraction does not make any sense without and if any of them fail to be extracted then the extractor is considered completely broken. While, in fact, only `id` is technically mandatory, due to compatibility reasons, yt-dlp also treats `title` as mandatory. The extractor is allowed to return the info dict without url or formats in some special cases if it allows the user to extract usefull information with `--ignore-no-formats-error` - Eg: when the video is a live stream that has not started yet.
|
||||
The aforementioned metafields are the critical data that the extraction does not make any sense without and if any of them fail to be extracted then the extractor is considered completely broken. While all extractors must return a `title`, they must also allow it's extraction to be non-fatal.
|
||||
|
||||
For pornographic sites, appropriate `age_limit` must also be returned.
|
||||
|
||||
The extractor is allowed to return the info dict without url or formats in some special cases if it allows the user to extract usefull information with `--ignore-no-formats-error` - Eg: when the video is a live stream that has not started yet.
|
||||
|
||||
[Any field](yt_dlp/extractor/common.py#219-L426) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerant** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields.
|
||||
|
||||
@@ -452,10 +457,14 @@ Here the presence or absence of other attributes including `style` is irrelevent
|
||||
|
||||
### Long lines policy
|
||||
|
||||
There is a soft limit to keep lines of code under 100 characters long. This means it should be respected if possible and if it does not make readability and code maintenance worse. Sometimes, it may be reasonable to go upto 120 characters and sometimes even 80 can be unreadable. Keep in mind that this is not a hard limit and is just one of many tools to make the code more readable
|
||||
There is a soft limit to keep lines of code under 100 characters long. This means it should be respected if possible and if it does not make readability and code maintenance worse. Sometimes, it may be reasonable to go upto 120 characters and sometimes even 80 can be unreadable. Keep in mind that this is not a hard limit and is just one of many tools to make the code more readable.
|
||||
|
||||
For example, you should **never** split long string literals like URLs or some other often copied entities over multiple lines to fit this limit:
|
||||
|
||||
Conversely, don't unecessarily split small lines further. As a rule of thumb, if removing the line split keeps the code under 80 characters, it should be a single line.
|
||||
|
||||
##### Examples
|
||||
|
||||
Correct:
|
||||
|
||||
```python
|
||||
@@ -469,6 +478,47 @@ Incorrect:
|
||||
'PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4'
|
||||
```
|
||||
|
||||
Correct:
|
||||
|
||||
```python
|
||||
uploader = traverse_obj(info, ('uploader', 'name'), ('author', 'fullname'))
|
||||
```
|
||||
|
||||
Incorrect:
|
||||
|
||||
```python
|
||||
uploader = traverse_obj(
|
||||
info,
|
||||
('uploader', 'name'),
|
||||
('author', 'fullname'))
|
||||
```
|
||||
|
||||
Correct:
|
||||
|
||||
```python
|
||||
formats = self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls',
|
||||
note='Downloading HD m3u8 information', errnote='Unable to download HD m3u8 information')
|
||||
```
|
||||
|
||||
Incorrect:
|
||||
|
||||
```python
|
||||
formats = self._extract_m3u8_formats(m3u8_url,
|
||||
video_id,
|
||||
'mp4',
|
||||
'm3u8_native',
|
||||
m3u8_id='hls',
|
||||
note='Downloading HD m3u8 information',
|
||||
errnote='Unable to download HD m3u8 information')
|
||||
```
|
||||
|
||||
|
||||
### Quotes
|
||||
|
||||
Always use single quotes for strings (even if the string has `'`) and double quotes for docstrings. Use `'''` only for multi-line strings. An exception can be made if a string has multiple single quotes in it and escaping makes it significantly harder to read. For f-strings, use you can use double quotes on the inside. But avoid f-strings that have too many quotes inside.
|
||||
|
||||
|
||||
### Inline values
|
||||
|
||||
Extracting variables is acceptable for reducing code duplication and improving readability of complex expressions. However, you should avoid extracting variables used only once and moving them to opposite parts of the extractor file, which makes reading the linear flow difficult.
|
||||
@@ -518,27 +568,68 @@ Methods supporting list of patterns are: `_search_regex`, `_html_search_regex`,
|
||||
|
||||
### Trailing parentheses
|
||||
|
||||
Always move trailing parentheses after the last argument.
|
||||
Always move trailing parentheses used for grouping/functions after the last argument. On the other hand, literal list/tuple/dict/set should closed be in a new line. Generators and list/dict comprehensions may use either style
|
||||
|
||||
Note that this *does not* apply to braces `}` or square brackets `]` both of which should closed be in a new line
|
||||
|
||||
#### Example
|
||||
#### Examples
|
||||
|
||||
Correct:
|
||||
|
||||
```python
|
||||
url = try_get(
|
||||
info,
|
||||
lambda x: x['ResultSet']['Result'][0]['VideoUrlSet']['VideoUrl'],
|
||||
list)
|
||||
```
|
||||
Correct:
|
||||
|
||||
```python
|
||||
url = try_get(info,
|
||||
lambda x: x['ResultSet']['Result'][0]['VideoUrlSet']['VideoUrl'],
|
||||
list)
|
||||
```
|
||||
|
||||
Incorrect:
|
||||
|
||||
```python
|
||||
url = try_get(
|
||||
info,
|
||||
lambda x: x['ResultSet']['Result'][0]['VideoUrlSet']['VideoUrl'],
|
||||
list,
|
||||
)
|
||||
```
|
||||
|
||||
Correct:
|
||||
|
||||
```python
|
||||
f = {
|
||||
'url': url,
|
||||
'format_id': format_id,
|
||||
}
|
||||
```
|
||||
|
||||
Incorrect:
|
||||
|
||||
```python
|
||||
f = {'url': url,
|
||||
'format_id': format_id}
|
||||
```
|
||||
|
||||
Correct:
|
||||
|
||||
```python
|
||||
formats = [process_formats(f) for f in format_data
|
||||
if f.get('type') in ('hls', 'dash', 'direct') and f.get('downloadable')]
|
||||
```
|
||||
|
||||
Correct:
|
||||
|
||||
```python
|
||||
formats = [
|
||||
process_formats(f) for f in format_data
|
||||
if f.get('type') in ('hls', 'dash', 'direct') and f.get('downloadable')
|
||||
]
|
||||
```
|
||||
|
||||
|
||||
### Use convenience conversion and parsing functions
|
||||
|
||||
|
||||
18
CONTRIBUTORS
18
CONTRIBUTORS
@@ -2,6 +2,7 @@ pukkandan (owner)
|
||||
shirt-dev (collaborator)
|
||||
coletdjnz/colethedj (collaborator)
|
||||
Ashish0804 (collaborator)
|
||||
nao20010128nao/Lesmiscore (collaborator)
|
||||
h-h-h-h
|
||||
pauldubois98
|
||||
nixxo
|
||||
@@ -19,7 +20,6 @@ samiksome
|
||||
alxnull
|
||||
FelixFrog
|
||||
Zocker1999NET
|
||||
nao20010128nao
|
||||
kurumigi
|
||||
bbepis
|
||||
animelover1984/horahoradev
|
||||
@@ -178,3 +178,19 @@ jaller94
|
||||
r5d
|
||||
julien-hadleyjack
|
||||
git-anony-mouse
|
||||
mdawar
|
||||
trassshhub
|
||||
foghawk
|
||||
k3ns1n
|
||||
teridon
|
||||
mozlima
|
||||
timendum
|
||||
ischmidt20
|
||||
CreaValix
|
||||
sian1468
|
||||
arkamar
|
||||
hyano
|
||||
KiberInfinity
|
||||
tejing1
|
||||
Bricio
|
||||
lazypete365
|
||||
|
||||
179
Changelog.md
179
Changelog.md
@@ -11,6 +11,185 @@
|
||||
-->
|
||||
|
||||
|
||||
### 2022.02.03
|
||||
|
||||
* Merge youtube-dl: Upto [commit/78ce962](https://github.com/ytdl-org/youtube-dl/commit/78ce962f4fe020994c216dd2671546fbe58a5c67)
|
||||
* Add option `--print-to-file`
|
||||
* Make nested --config-locations relative to parent file
|
||||
* Ensure `_type` is present in `info.json`
|
||||
* Fix `--compat-options list-formats`
|
||||
* Fix/improve `InAdvancePagedList`
|
||||
* [downloader/ffmpeg] Handle unknown formats better
|
||||
* [outtmpl] Handle `-o ""` better
|
||||
* [outtmpl] Handle hard-coded file extension better
|
||||
* [extractor] Add convinience function `_yes_playlist`
|
||||
* [extractor] Allow non-fatal `title` extraction
|
||||
* [extractor] Extract video inside `Article` json_ld
|
||||
* [generic] Allow further processing of json_ld URL
|
||||
* [cookies] Fix keyring selection for unsupported desktops
|
||||
* [utils] Strip double spaces in `clean_html` by [dirkf](https://github.com/dirkf)
|
||||
* [aes] Add `unpad_pkcs7`
|
||||
* [test] Fix `test_youtube_playlist_noplaylist`
|
||||
* [docs,cleanup] Misc cleanup
|
||||
* [dplay] Add extractors for site changes by [Sipherdrakon](https://github.com/Sipherdrakon)
|
||||
* [ertgr] Add extractors by [zmousm](https://github.com/zmousm), [dirkf](https://github.com/dirkf)
|
||||
* [Musicdex] Add extractors by [Ashish0804](https://github.com/Ashish0804)
|
||||
* [YandexVideoPreview] Add extractor by [KiberInfinity](https://github.com/KiberInfinity)
|
||||
* [youtube] Add extractor `YoutubeMusicSearchURLIE`
|
||||
* [archive.org] Ignore unnecessary files
|
||||
* [Bilibili] Add 8k support by [u-spec-png](https://github.com/u-spec-png)
|
||||
* [bilibili] Fix extractor, make anthology title non-fatal
|
||||
* [CAM4] Add thumbnail extraction by [alerikaisattera](https://github.com/alerikaisattera)
|
||||
* [cctv] De-prioritize sample format
|
||||
* [crunchyroll:beta] Add cookies support by [tejing1](https://github.com/tejing1)
|
||||
* [crunchyroll] Fix login by [tejing1](https://github.com/tejing1)
|
||||
* [doodstream] Fix extractor
|
||||
* [fc2] Fix extraction by [Lesmiscore](https://github.com/Lesmiscore)
|
||||
* [FFmpegConcat] Abort on --skip-download and download errors
|
||||
* [Fujitv] Extract metadata and support premium by [YuenSzeHong](https://github.com/YuenSzeHong)
|
||||
* [globo] Fix extractor by [Bricio](https://github.com/Bricio)
|
||||
* [glomex] Simplify embed detection
|
||||
* [GoogleSearch] Fix extractor
|
||||
* [Instagram] Fix extraction when logged in by [MinePlayersPE](https://github.com/MinePlayersPE)
|
||||
* [iq.com] Add VIP support by [MinePlayersPE](https://github.com/MinePlayersPE)
|
||||
* [mildom] Fix extractor by [lazypete365](https://github.com/lazypete365)
|
||||
* [MySpass] Fix video url processing by [trassshhub](https://github.com/trassshhub)
|
||||
* [Odnoklassniki] Improve embedded players extraction by [KiberInfinity](https://github.com/KiberInfinity)
|
||||
* [orf:tvthek] Lazy playlist extraction and obey --no-playlist
|
||||
* [Pladform] Fix redirection to external player by [KiberInfinity](https://github.com/KiberInfinity)
|
||||
* [ThisOldHouse] Improve Premium URL check by [Ashish0804](https://github.com/Ashish0804)
|
||||
* [TikTok] Iterate through app versions by [MinePlayersPE](https://github.com/MinePlayersPE)
|
||||
* [tumblr] Fix 403 errors and handle vimeo embeds by [foghawk](https://github.com/foghawk)
|
||||
* [viki] Fix "Bad request" for manifest by [nyuszika7h](https://github.com/nyuszika7h)
|
||||
* [Vimm] add recording extractor by [alerikaisattera](https://github.com/alerikaisattera)
|
||||
* [web.archive:youtube] Add `ytarchive:` prefix and misc cleanup
|
||||
* [youtube:api] Do not use seek when reading HTTPError response by [coletdjnz](https://github.com/coletdjnz)
|
||||
* [youtube] Fix n-sig for player e06dea74
|
||||
* [youtube, cleanup] Misc fixes and cleanup
|
||||
|
||||
|
||||
### 2022.01.21
|
||||
|
||||
* Add option `--concat-playlist` to **concat videos in a playlist**
|
||||
* Allow **multiple and nested configuration files**
|
||||
* Add more post-processing stages (`after_video`, `playlist`)
|
||||
* Allow `--exec` to be run at any post-processing stage (Deprecates `--exec-before-download`)
|
||||
* Allow `--print` to be run at any post-processing stage
|
||||
* Allow listing formats, thumbnails, subtitles using `--print` by [pukkandan](https://github.com/pukkandan), [Zirro](https://github.com/Zirro)
|
||||
* Add fields `video_autonumber`, `modified_date`, `modified_timestamp`, `playlist_count`, `channel_follower_count`
|
||||
* Add key `requested_downloads` in the root `info_dict`
|
||||
* Write `download_archive` only after all formats are downloaded
|
||||
* [FfmpegMetadata] Allow setting metadata of individual streams using `meta<n>_` prefix
|
||||
* Add option `--legacy-server-connect` by [xtkoba](https://github.com/xtkoba)
|
||||
* Allow escaped `,` in `--extractor-args`
|
||||
* Allow unicode characters in `info.json`
|
||||
* Check for existing thumbnail/subtitle in final directory
|
||||
* Don't treat empty containers as `None` in `sanitize_info`
|
||||
* Fix `-s --ignore-no-formats --force-write-archive`
|
||||
* Fix live title for multiple formats
|
||||
* List playlist thumbnails in `--list-thumbnails`
|
||||
* Raise error if subtitle download fails
|
||||
* [cookies] Fix bug when keyring is unspecified
|
||||
* [ffmpeg] Ignore unknown streams, standardize use of `-map 0`
|
||||
* [outtmpl] Alternate form for `D` and fix suffix's case
|
||||
* [utils] Add `Sec-Fetch-Mode` to `std_headers`
|
||||
* [utils] Fix `format_bytes` output for Bytes by [pukkandan](https://github.com/pukkandan), [mdawar](https://github.com/mdawar)
|
||||
* [utils] Handle `ss:xxx` in `parse_duration`
|
||||
* [utils] Improve parsing for nested HTML elements by [zmousm](https://github.com/zmousm), [pukkandan](https://github.com/pukkandan)
|
||||
* [utils] Use key `None` in `traverse_obj` to return as-is
|
||||
* [extractor] Detect more subtitle codecs in MPD manifests by [fstirlitz](https://github.com/fstirlitz)
|
||||
* [extractor] Extract chapters from JSON-LD by [iw0nderhow](https://github.com/iw0nderhow), [pukkandan](https://github.com/pukkandan)
|
||||
* [extractor] Extract thumbnails from JSON-LD by [nixxo](https://github.com/nixxo)
|
||||
* [extractor] Improve `url_result` and related
|
||||
* [generic] Improve KVS player extraction by [trassshhub](https://github.com/trassshhub)
|
||||
* [build] Reduce dependency on third party workflows
|
||||
* [extractor,cleanup] Use `_search_nextjs_data`, `format_field`
|
||||
* [cleanup] Minor fixes and cleanup
|
||||
* [docs] Improvements
|
||||
* [test] Fix TestVerboseOutput
|
||||
* [afreecatv] Add livestreams extractor by [wlritchi](https://github.com/wlritchi)
|
||||
* [callin] Add extractor by [foghawk](https://github.com/foghawk)
|
||||
* [CrowdBunker] Add extractors by [Ashish0804](https://github.com/Ashish0804)
|
||||
* [daftsex] Add extractors by [k3ns1n](https://github.com/k3ns1n)
|
||||
* [digitalconcerthall] Add extractor by [teridon](https://github.com/teridon)
|
||||
* [Drooble] Add extractor by [u-spec-png](https://github.com/u-spec-png)
|
||||
* [EuropeanTour] Add extractor by [Ashish0804](https://github.com/Ashish0804)
|
||||
* [iq.com] Add extractors by [MinePlayersPE](https://github.com/MinePlayersPE)
|
||||
* [KelbyOne] Add extractor by [Ashish0804](https://github.com/Ashish0804)
|
||||
* [LnkIE] Add extractor by [Ashish0804](https://github.com/Ashish0804)
|
||||
* [MainStreaming] Add extractor by [coletdjnz](https://github.com/coletdjnz)
|
||||
* [megatvcom] Add extractors by [zmousm](https://github.com/zmousm)
|
||||
* [Newsy] Add extractor by [Ashish0804](https://github.com/Ashish0804)
|
||||
* [noodlemagazine] Add extractor by [trassshhub](https://github.com/trassshhub)
|
||||
* [PokerGo] Add extractors by [Ashish0804](https://github.com/Ashish0804)
|
||||
* [Pornez] Add extractor by [mozlima](https://github.com/mozlima)
|
||||
* [PRX] Add Extractors by [coletdjnz](https://github.com/coletdjnz)
|
||||
* [RTNews] Add extractor by [Ashish0804](https://github.com/Ashish0804)
|
||||
* [Rule34video] Add extractor by [trassshhub](https://github.com/trassshhub)
|
||||
* [tvopengr] Add extractors by [zmousm](https://github.com/zmousm)
|
||||
* [Vimm] Add extractor by [alerikaisattera](https://github.com/alerikaisattera)
|
||||
* [glomex] Add extractors by [zmousm](https://github.com/zmousm)
|
||||
* [instagram] Add story/highlight extractor by [u-spec-png](https://github.com/u-spec-png)
|
||||
* [openrec] Add movie extractor by [Lesmiscore](https://github.com/Lesmiscore)
|
||||
* [rai] Add Raiplaysound extractors by [nixxo](https://github.com/nixxo), [pukkandan](https://github.com/pukkandan)
|
||||
* [aparat] Fix extractor
|
||||
* [ard] Extract subtitles by [fstirlitz](https://github.com/fstirlitz)
|
||||
* [BiliIntl] Add login by [MinePlayersPE](https://github.com/MinePlayersPE)
|
||||
* [CeskaTelevize] Use `http` for manifests
|
||||
* [CTVNewsIE] Add fallback for video search by [Ashish0804](https://github.com/Ashish0804)
|
||||
* [dplay] Migrate DiscoveryPlusItaly to DiscoveryPlus by [timendum](https://github.com/timendum)
|
||||
* [dplay] Re-structure DiscoveryPlus extractors
|
||||
* [Dropbox] Support password protected files and more formats by [zenerdi0de](https://github.com/zenerdi0de)
|
||||
* [facebook] Fix extraction from groups
|
||||
* [facebook] Improve title and uploader extraction
|
||||
* [facebook] Parse dash manifests
|
||||
* [fox] Extract m3u8 from preview by [ischmidt20](https://github.com/ischmidt20)
|
||||
* [funk] Support origin URLs
|
||||
* [gfycat] Fix `uploader`
|
||||
* [gfycat] Support embeds by [coletdjnz](https://github.com/coletdjnz)
|
||||
* [hotstar] Add extractor args to ignore tags by [Ashish0804](https://github.com/Ashish0804)
|
||||
* [hrfernsehen] Fix ardloader extraction by [CreaValix](https://github.com/CreaValix)
|
||||
* [instagram] Fix username extraction for stories and highlights by [nyuszika7h](https://github.com/nyuszika7h)
|
||||
* [kakao] Detect geo-restriction
|
||||
* [line] Remove `tv.line.me` by [sian1468](https://github.com/sian1468)
|
||||
* [mixch] Add `MixchArchiveIE` by [Lesmiscore](https://github.com/Lesmiscore)
|
||||
* [mixcloud] Detect restrictions by [llacb47](https://github.com/llacb47)
|
||||
* [NBCSports] Fix extraction of platform URLs by [ischmidt20](https://github.com/ischmidt20)
|
||||
* [Nexx] Extract more metadata by [MinePlayersPE](https://github.com/MinePlayersPE)
|
||||
* [Nexx] Support 3q CDN by [MinePlayersPE](https://github.com/MinePlayersPE)
|
||||
* [pbs] de-prioritize AD formats
|
||||
* [PornHub,YouTube] Refresh onion addresses by [unit193](https://github.com/unit193)
|
||||
* [RedBullTV] Parse subtitles from manifest by [Ashish0804](https://github.com/Ashish0804)
|
||||
* [streamcz] Fix extractor by [arkamar](https://github.com/arkamar), [pukkandan](https://github.com/pukkandan)
|
||||
* [Ted] Rewrite extractor by [pukkandan](https://github.com/pukkandan), [trassshhub](https://github.com/trassshhub)
|
||||
* [Theta] Fix valid URL by [alerikaisattera](https://github.com/alerikaisattera)
|
||||
* [ThisOldHouseIE] Add support for premium videos by [Ashish0804](https://github.com/Ashish0804)
|
||||
* [TikTok] Fix extraction for sigi-based webpages, add API fallback by [MinePlayersPE](https://github.com/MinePlayersPE)
|
||||
* [TikTok] Pass cookies to formats, and misc fixes by [MinePlayersPE](https://github.com/MinePlayersPE)
|
||||
* [TikTok] Extract captions, user thumbnail by [MinePlayersPE](https://github.com/MinePlayersPE)
|
||||
* [TikTok] Change app version by [MinePlayersPE](https://github.com/MinePlayersPE), [llacb47](https://github.com/llacb47)
|
||||
* [TVer] Extract message for unaired live by [Lesmiscore](https://github.com/Lesmiscore)
|
||||
* [twitcasting] Refactor extractor by [Lesmiscore](https://github.com/Lesmiscore)
|
||||
* [twitter] Fix video in quoted tweets
|
||||
* [veoh] Improve extractor by [foghawk](https://github.com/foghawk)
|
||||
* [vk] Capture `clip` URLs
|
||||
* [vk] Fix VKUserVideosIE by [Ashish0804](https://github.com/Ashish0804)
|
||||
* [vk] Improve `_VALID_URL` by [k3ns1n](https://github.com/k3ns1n)
|
||||
* [VrtNU] Handle empty title by [pgaig](https://github.com/pgaig)
|
||||
* [XVideos] Check HLS formats by [MinePlayersPE](https://github.com/MinePlayersPE)
|
||||
* [yahoo:gyao] Improved playlist handling by [hyano](https://github.com/hyano)
|
||||
* [youtube:tab] Extract more playlist metadata by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan)
|
||||
* [youtube:tab] Raise error on tab redirect by [krichbanana](https://github.com/krichbanana), [coletdjnz](https://github.com/coletdjnz)
|
||||
* [youtube] Update Innertube clients by [coletdjnz](https://github.com/coletdjnz)
|
||||
* [youtube] Detect live-stream embeds
|
||||
* [youtube] Do not return `upload_date` for playlists
|
||||
* [youtube] Extract channel subscriber count by [coletdjnz](https://github.com/coletdjnz)
|
||||
* [youtube] Make invalid storyboard URL non-fatal
|
||||
* [youtube] Enforce UTC, update innertube clients and tests by [coletdjnz](https://github.com/coletdjnz)
|
||||
* [zdf] Add chapter extraction by [iw0nderhow](https://github.com/iw0nderhow)
|
||||
* [zee5] Add geo-bypass
|
||||
|
||||
|
||||
### 2021.12.27
|
||||
|
||||
* Avoid recursion error when re-extracting info
|
||||
|
||||
@@ -36,5 +36,15 @@ You can also find lists of all [contributors of yt-dlp](CONTRIBUTORS) and [autho
|
||||
|
||||
[](https://ko-fi.com/ashish0804)
|
||||
|
||||
* Added support for new websites Zee5, MXPlayer, DiscoveryPlusIndia, ShemarooMe, Utreon etc
|
||||
* Added playlist/series downloads for TubiTv, SonyLIV, Voot, HotStar etc
|
||||
* Added support for new websites BiliIntl, DiscoveryPlusIndia, OlympicsReplay, PlanetMarathi, ShemarooMe, Utreon, Zee5 etc
|
||||
* Added playlist/series downloads for Hotstar, ParamountPlus, Rumble, SonyLIV, Trovo, TubiTv, Voot etc
|
||||
* Improved/fixed support for HiDive, HotStar, Hungama, LBRY, LinkedInLearning, Mxplayer, SonyLiv, TV2, Vimeo, VLive etc
|
||||
|
||||
|
||||
## [Lesmiscore](https://github.com/Lesmiscore) (nao20010128nao)
|
||||
|
||||
**Bitcoin**: bc1qfd02r007cutfdjwjmyy9w23rjvtls6ncve7r3s
|
||||
**Monacoin**: mona1q3tf7dzvshrhfe3md379xtvt2n22duhglv5dskr
|
||||
|
||||
* Download live from start to end for YouTube
|
||||
* Added support for new websites mildom, PixivSketch, skeb, radiko, voicy, mirrativ, openrec, whowatch, damtomo, 17.live, mixch etc
|
||||
|
||||
9
Makefile
9
Makefile
@@ -1,5 +1,6 @@
|
||||
all: lazy-extractors yt-dlp doc pypi-files
|
||||
clean: clean-test clean-dist clean-cache
|
||||
clean: clean-test clean-dist
|
||||
clean-all: clean clean-cache
|
||||
completions: completion-bash completion-fish completion-zsh
|
||||
doc: README.md CONTRIBUTING.md issuetemplates supportedsites
|
||||
ot: offlinetest
|
||||
@@ -13,15 +14,15 @@ pypi-files: AUTHORS Changelog.md LICENSE README.md README.txt supportedsites com
|
||||
.PHONY: all clean install test tar pypi-files completions ot offlinetest codetest supportedsites
|
||||
|
||||
clean-test:
|
||||
rm -rf test/testdata/player-*.js tmp/ *.annotations.xml *.aria2 *.description *.dump *.frag \
|
||||
*.frag.aria2 *.frag.urls *.info.json *.live_chat.json *.part* *.unknown_video *.ytdl \
|
||||
rm -rf test/testdata/sigs/player-*.js tmp/ *.annotations.xml *.aria2 *.description *.dump *.frag \
|
||||
*.frag.aria2 *.frag.urls *.info.json *.live_chat.json *.meta *.part* *.tmp *.temp *.unknown_video *.ytdl \
|
||||
*.3gp *.ape *.avi *.desktop *.flac *.flv *.jpeg *.jpg *.m4a *.m4v *.mhtml *.mkv *.mov *.mp3 \
|
||||
*.mp4 *.ogg *.opus *.png *.sbv *.srt *.swf *.swp *.ttml *.url *.vtt *.wav *.webloc *.webm *.webp
|
||||
clean-dist:
|
||||
rm -rf yt-dlp.1.temp.md yt-dlp.1 README.txt MANIFEST build/ dist/ .coverage cover/ yt-dlp.tar.gz completions/ \
|
||||
yt_dlp/extractor/lazy_extractors.py *.spec CONTRIBUTING.md.tmp yt-dlp yt-dlp.exe yt_dlp.egg-info/ AUTHORS .mailmap
|
||||
clean-cache:
|
||||
find . -name "*.pyc" -o -name "*.class" -delete
|
||||
find . \( -name "*.pyc" -o -name "*.class" \) -delete
|
||||
|
||||
completion-bash: completions/bash/yt-dlp
|
||||
completion-fish: completions/fish/yt-dlp.fish
|
||||
|
||||
234
README.md
234
README.md
@@ -3,17 +3,17 @@
|
||||
|
||||
[](#readme)
|
||||
|
||||
[](https://github.com/yt-dlp/yt-dlp/releases/latest)
|
||||
[](https://github.com/yt-dlp/yt-dlp/actions)
|
||||
[](LICENSE)
|
||||
[](Collaborators.md#collaborators)
|
||||
[](supportedsites.md)
|
||||
[](https://discord.gg/H5MNcFW63r)
|
||||
[](https://yt-dlp.readthedocs.io)
|
||||
[](https://github.com/yt-dlp/yt-dlp/commits)
|
||||
[](https://github.com/yt-dlp/yt-dlp/commits)
|
||||
[](https://github.com/yt-dlp/yt-dlp/releases/latest)
|
||||
[](https://pypi.org/project/yt-dlp)
|
||||
[](#release-files "Release")
|
||||
[](LICENSE "License")
|
||||
[](Collaborators.md#collaborators "Donate")
|
||||
[](https://readthedocs.org/projects/yt-dlp/ "Docs")
|
||||
[](supportedsites.md "Supported Sites")
|
||||
[](https://pypi.org/project/yt-dlp "PyPi")
|
||||
[](https://github.com/yt-dlp/yt-dlp/actions "CI Status")
|
||||
[](https://discord.gg/H5MNcFW63r "Discord")
|
||||
[](https://matrix.to/#/#yt-dlp:matrix.org "Matrix")
|
||||
[](https://github.com/yt-dlp/yt-dlp/commits "Commit History")
|
||||
[](https://github.com/yt-dlp/yt-dlp/commits "Commit History")
|
||||
|
||||
</div>
|
||||
<!-- MANPAGE: END EXCLUDED SECTION -->
|
||||
@@ -71,7 +71,7 @@ yt-dlp is a [youtube-dl](https://github.com/ytdl-org/youtube-dl) fork based on t
|
||||
|
||||
# NEW FEATURES
|
||||
|
||||
* Based on **youtube-dl 2021.12.17 [commit/5014bd6](https://github.com/ytdl-org/youtube-dl/commit/5014bd67c22b421207b2650d4dc874b95b36dda1)** and **youtube-dlc 2020.11.11-3 [commit/f9401f2](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee)**: You get all the features and patches of [youtube-dlc](https://github.com/blackjack4494/yt-dlc) in addition to the latest [youtube-dl](https://github.com/ytdl-org/youtube-dl)
|
||||
* Based on **youtube-dl 2021.12.17 [commit/78ce962](https://github.com/ytdl-org/youtube-dl/commit/78ce962f4fe020994c216dd2671546fbe58a5c67)** and **youtube-dlc 2020.11.11-3 [commit/f9401f2](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee)**: You get all the features and patches of [youtube-dlc](https://github.com/blackjack4494/yt-dlc) in addition to the latest [youtube-dl](https://github.com/ytdl-org/youtube-dl)
|
||||
|
||||
* **[SponsorBlock Integration](#sponsorblock-options)**: You can mark/remove sponsor sections in youtube videos by utilizing the [SponsorBlock](https://sponsor.ajay.app) API
|
||||
|
||||
@@ -88,7 +88,7 @@ yt-dlp is a [youtube-dl](https://github.com/ytdl-org/youtube-dl) fork based on t
|
||||
* Redirect channel's home URL automatically to `/video` to preserve the old behaviour
|
||||
* `255kbps` audio is extracted (if available) from youtube music when premium cookies are given
|
||||
* Youtube music Albums, channels etc can be downloaded ([except self-uploaded music](https://github.com/yt-dlp/yt-dlp/issues/723))
|
||||
* Download livestreams from the start using `--live-from-start`
|
||||
* Download livestreams from the start using `--live-from-start` (experimental)
|
||||
|
||||
* **Cookies from browser**: Cookies can be automatically extracted from all major web browsers using `--cookies-from-browser BROWSER[+KEYRING][:PROFILE]`
|
||||
|
||||
@@ -110,9 +110,9 @@ yt-dlp is a [youtube-dl](https://github.com/ytdl-org/youtube-dl) fork based on t
|
||||
|
||||
* **Output template improvements**: Output templates can now have date-time formatting, numeric offsets, object traversal etc. See [output template](#output-template) for details. Even more advanced operations can also be done with the help of `--parse-metadata` and `--replace-in-metadata`
|
||||
|
||||
* **Other new options**: Many new options have been added such as `--print`, `--wait-for-video`, `--sleep-requests`, `--convert-thumbnails`, `--write-link`, `--force-download-archive`, `--force-overwrites`, `--break-on-reject` etc
|
||||
* **Other new options**: Many new options have been added such as `--concat-playlist`, `--print`, `--wait-for-video`, `--sleep-requests`, `--convert-thumbnails`, `--write-link`, `--force-download-archive`, `--force-overwrites`, `--break-on-reject` etc
|
||||
|
||||
* **Improvements**: Regex and other operators in `--match-filter`, multiple `--postprocessor-args` and `--downloader-args`, faster archive checking, more [format selection options](#format-selection), merge multi-video/audio etc
|
||||
* **Improvements**: Regex and other operators in `--match-filter`, multiple `--postprocessor-args` and `--downloader-args`, faster archive checking, more [format selection options](#format-selection), merge multi-video/audio, multiple `--config-locations`, `--exec` at different stages, etc
|
||||
|
||||
* **Plugins**: Extractors and PostProcessors can be loaded from an external file. See [plugins](#plugins) for details
|
||||
|
||||
@@ -133,12 +133,12 @@ Some of yt-dlp's default options are different from that of youtube-dl and youtu
|
||||
* `--ignore-errors` is enabled by default. Use `--abort-on-error` or `--compat-options abort-on-error` to abort on errors instead
|
||||
* When writing metadata files such as thumbnails, description or infojson, the same information (if available) is also written for playlists. Use `--no-write-playlist-metafiles` or `--compat-options no-playlist-metafiles` to not write these files
|
||||
* `--add-metadata` attaches the `infojson` to `mkv` files in addition to writing the metadata when used with `--write-info-json`. Use `--no-embed-info-json` or `--compat-options no-attach-info-json` to revert this
|
||||
* Some metadata are embedded into different fields when using `--add-metadata` as compared to youtube-dl. Most notably, `comment` field contains the `webpage_url` and `synopsis` contains the `description`. You can [use `--parse-metadata`](https://github.com/yt-dlp/yt-dlp#modifying-metadata) to modify this to your liking or use `--compat-options embed-metadata` to revert this
|
||||
* Some metadata are embedded into different fields when using `--add-metadata` as compared to youtube-dl. Most notably, `comment` field contains the `webpage_url` and `synopsis` contains the `description`. You can [use `--parse-metadata`](#modifying-metadata) to modify this to your liking or use `--compat-options embed-metadata` to revert this
|
||||
* `playlist_index` behaves differently when used with options like `--playlist-reverse` and `--playlist-items`. See [#302](https://github.com/yt-dlp/yt-dlp/issues/302) for details. You can use `--compat-options playlist-index` if you want to keep the earlier behavior
|
||||
* The output of `-F` is listed in a new format. Use `--compat-options list-formats` to revert this
|
||||
* All *experiences* of a funimation episode are considered as a single video. This behavior breaks existing archives. Use `--compat-options seperate-video-versions` to extract information from only the default player
|
||||
* Youtube live chat (if available) is considered as a subtitle. Use `--sub-langs all,-live_chat` to download all subtitles except live chat. You can also use `--compat-options no-live-chat` to prevent live chat from downloading
|
||||
* Youtube channel URLs are automatically redirected to `/video`. Append a `/featured` to the URL to download only the videos in the home page. If the channel does not have a videos tab, we try to download the equivalent `UU` playlist instead. Also, `/live` URLs raise an error if there are no live videos instead of silently downloading the entire channel. You may use `--compat-options no-youtube-channel-redirect` to revert all these redirections
|
||||
* Youtube channel URLs are automatically redirected to `/video`. Append a `/featured` to the URL to download only the videos in the home page. If the channel does not have a videos tab, we try to download the equivalent `UU` playlist instead. For all other tabs, if the channel does not show the requested tab, an error will be raised. Also, `/live` URLs raise an error if there are no live videos instead of silently downloading the entire channel. You may use `--compat-options no-youtube-channel-redirect` to revert all these redirections
|
||||
* Unavailable videos are also listed for youtube playlists. Use `--compat-options no-youtube-unavailable-videos` to remove this
|
||||
* If `ffmpeg` is used as the downloader, the downloading and merging of formats happen in a single step when possible. Use `--compat-options no-direct-merge` to revert this
|
||||
* Thumbnail embedding in `mp4` is done with mutagen if possible. Use `--compat-options embed-thumbnail-atomicparsley` to force the use of AtomicParsley instead
|
||||
@@ -157,8 +157,19 @@ You can install yt-dlp using one of the following methods:
|
||||
|
||||
### Using the release binary
|
||||
|
||||
You can simply download the [correct binary file](#release-files) for your OS: **[[Windows](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.exe)] [[UNIX-like](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp)]**
|
||||
You can simply download the [correct binary file](#release-files) for your OS
|
||||
|
||||
<!-- MANPAGE: BEGIN EXCLUDED SECTION -->
|
||||
[](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.exe)
|
||||
[](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp)
|
||||
[](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.tar.gz)
|
||||
[](#release-files)
|
||||
[](https://github.com/yt-dlp/yt-dlp/releases)
|
||||
<!-- MANPAGE: END EXCLUDED SECTION -->
|
||||
|
||||
Note: The manpages, shell completion files etc. are available in the [source tarball](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.tar.gz)
|
||||
|
||||
<!-- TODO: Move to Wiki -->
|
||||
In UNIX-like OSes (MacOS, Linux, BSD), you can also install the same in one of the following ways:
|
||||
|
||||
```
|
||||
@@ -176,7 +187,6 @@ sudo aria2c https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp --d
|
||||
sudo chmod a+rx /usr/local/bin/yt-dlp
|
||||
```
|
||||
|
||||
PS: The manpages, shell completion files etc. are available in [yt-dlp.tar.gz](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.tar.gz)
|
||||
|
||||
### With [PIP](https://pypi.org/project/pip)
|
||||
|
||||
@@ -197,6 +207,7 @@ python3 -m pip install --force-reinstall https://github.com/yt-dlp/yt-dlp/archiv
|
||||
|
||||
Note that on some systems, you may need to use `py` or `python` instead of `python3`
|
||||
|
||||
<!-- TODO: Add to Wiki, Remove Taps -->
|
||||
### With [Homebrew](https://brew.sh)
|
||||
|
||||
macOS or Linux users that are using Homebrew can also install it by:
|
||||
@@ -267,7 +278,7 @@ To use or redistribute the dependencies, you must agree to their respective lice
|
||||
|
||||
The Windows and MacOS standalone release binaries are already built with the python interpreter, mutagen, pycryptodomex and websockets included.
|
||||
|
||||
**Note**: There are some regressions in newer ffmpeg versions that causes various issues when used alongside yt-dlp. Since ffmpeg is such an important dependency, we provide [custom builds](https://github.com/yt-dlp/FFmpeg-Builds/wiki/Latest#latest-autobuilds) with patches for these issues at [yt-dlp/FFmpeg-Builds](https://github.com/yt-dlp/FFmpeg-Builds). See [the readme](https://github.com/yt-dlp/FFmpeg-Builds#patches-applied) for details on the specific issues solved by these builds
|
||||
**Note**: There are some regressions in newer ffmpeg versions that causes various issues when used alongside yt-dlp. Since ffmpeg is such an important dependency, we provide [custom builds](https://github.com/yt-dlp/FFmpeg-Builds#ffmpeg-static-auto-builds) with patches for these issues at [yt-dlp/FFmpeg-Builds](https://github.com/yt-dlp/FFmpeg-Builds). See [the readme](https://github.com/yt-dlp/FFmpeg-Builds#patches-applied) for details on the specific issues solved by these builds
|
||||
|
||||
|
||||
## COMPILE
|
||||
@@ -327,22 +338,27 @@ You can also fork the project on github and run your fork's [build workflow](.gi
|
||||
an error. The default value "fixup_error"
|
||||
repairs broken URLs, but emits an error if
|
||||
this is not possible instead of searching
|
||||
--ignore-config, --no-config Disable loading any configuration files
|
||||
except the one provided by --config-location.
|
||||
When given inside a configuration
|
||||
file, no further configuration files are
|
||||
loaded. Additionally, (for backward
|
||||
compatibility) if this option is found
|
||||
inside the system configuration file, the
|
||||
user configuration is not loaded
|
||||
--config-location PATH Location of the main configuration file;
|
||||
--ignore-config Don't load any more configuration files
|
||||
except those given by --config-locations.
|
||||
For backward compatibility, if this option
|
||||
is found inside the system configuration
|
||||
file, the user configuration is not loaded.
|
||||
(Alias: --no-config)
|
||||
--no-config-locations Do not load any custom configuration files
|
||||
(default). When given inside a
|
||||
configuration file, ignore all previous
|
||||
--config-locations defined in the current
|
||||
file
|
||||
--config-locations PATH Location of the main configuration file;
|
||||
either the path to the config or its
|
||||
containing directory
|
||||
containing directory. Can be used multiple
|
||||
times and inside other configuration files
|
||||
--flat-playlist Do not extract the videos of a playlist,
|
||||
only list them
|
||||
--no-flat-playlist Extract the videos of a playlist
|
||||
--live-from-start Download livestreams from the start.
|
||||
Currently only supported for YouTube
|
||||
(Experimental)
|
||||
--no-live-from-start Download livestreams from the current time
|
||||
(default)
|
||||
--wait-for-video MIN[-MAX] Wait for scheduled streams to become
|
||||
@@ -363,8 +379,9 @@ You can also fork the project on github and run your fork's [build workflow](.gi
|
||||
--proxy URL Use the specified HTTP/HTTPS/SOCKS proxy.
|
||||
To enable SOCKS proxy, specify a proper
|
||||
scheme. For example
|
||||
socks5://127.0.0.1:1080/. Pass in an empty
|
||||
string (--proxy "") for direct connection
|
||||
socks5://user:pass@127.0.0.1:1080/. Pass in
|
||||
an empty string (--proxy "") for direct
|
||||
connection
|
||||
--socket-timeout SECONDS Time to wait before giving up, in seconds
|
||||
--source-address IP Client-side IP address to bind to
|
||||
-4, --force-ipv4 Make all connections via IPv4
|
||||
@@ -377,7 +394,7 @@ You can also fork the project on github and run your fork's [build workflow](.gi
|
||||
option is not present) is used for the
|
||||
actual downloading
|
||||
--geo-bypass Bypass geographic restriction via faking
|
||||
X-Forwarded-For HTTP header
|
||||
X-Forwarded-For HTTP header (default)
|
||||
--no-geo-bypass Do not bypass geographic restriction via
|
||||
faking X-Forwarded-For HTTP header
|
||||
--geo-bypass-country CODE Force bypass geographic restriction with
|
||||
@@ -514,8 +531,8 @@ You can also fork the project on github and run your fork's [build workflow](.gi
|
||||
example, --downloader aria2c --downloader
|
||||
"dash,m3u8:native" will use aria2c for
|
||||
http/ftp downloads, and the native
|
||||
downloader for dash/m3u8 downloads
|
||||
(Alias: --external-downloader)
|
||||
downloader for dash/m3u8 downloads (Alias:
|
||||
--external-downloader)
|
||||
--downloader-args NAME:ARGS Give these arguments to the external
|
||||
downloader. Specify the downloader name and
|
||||
the arguments separated by a colon ":". For
|
||||
@@ -523,8 +540,8 @@ You can also fork the project on github and run your fork's [build workflow](.gi
|
||||
different positions using the same syntax
|
||||
as --postprocessor-args. You can use this
|
||||
option multiple times to give different
|
||||
arguments to different downloaders
|
||||
(Alias: --external-downloader-args)
|
||||
arguments to different downloaders (Alias:
|
||||
--external-downloader-args)
|
||||
|
||||
## Filesystem Options:
|
||||
-a, --batch-file FILE File containing URLs to download ("-" for
|
||||
@@ -598,8 +615,8 @@ You can also fork the project on github and run your fork's [build workflow](.gi
|
||||
without this option if the extraction is
|
||||
known to be quick (Alias: --get-comments)
|
||||
--no-write-comments Do not retrieve video comments unless the
|
||||
extraction is known to be quick
|
||||
(Alias: --no-get-comments)
|
||||
extraction is known to be quick (Alias:
|
||||
--no-get-comments)
|
||||
--load-info-json FILE JSON file containing the video information
|
||||
(created with the "--write-info-json"
|
||||
option)
|
||||
@@ -662,10 +679,20 @@ You can also fork the project on github and run your fork's [build workflow](.gi
|
||||
formats are found (default)
|
||||
--skip-download Do not download the video but write all
|
||||
related files (Alias: --no-download)
|
||||
-O, --print TEMPLATE Quiet, but print the given fields for each
|
||||
video. Simulate unless --no-simulate is
|
||||
used. Either a field name or same syntax as
|
||||
the output template can be used
|
||||
-O, --print [WHEN:]TEMPLATE Field name or output template to print to
|
||||
screen, optionally prefixed with when to
|
||||
print it, separated by a ":". Supported
|
||||
values of "WHEN" are the same as that of
|
||||
--use-postprocessor, and "video" (default).
|
||||
Implies --quiet and --simulate (unless
|
||||
--no-simulate is used). This option can be
|
||||
used multiple times
|
||||
--print-to-file [WHEN:]TEMPLATE FILE
|
||||
Append given template to the file. The
|
||||
values of WHEN and TEMPLATE are same as
|
||||
that of --print. FILE uses the same syntax
|
||||
as the output template. This option can be
|
||||
used multiple times
|
||||
-j, --dump-json Quiet, but print JSON information for each
|
||||
video. Simulate unless --no-simulate is
|
||||
used. See "OUTPUT TEMPLATE" for a
|
||||
@@ -703,6 +730,9 @@ You can also fork the project on github and run your fork's [build workflow](.gi
|
||||
|
||||
## Workarounds:
|
||||
--encoding ENCODING Force the specified encoding (experimental)
|
||||
--legacy-server-connect Explicitly allow HTTPS connection to
|
||||
servers that do not support RFC 5746 secure
|
||||
renegotiation
|
||||
--no-check-certificates Suppress HTTPS certificate validation
|
||||
--prefer-insecure Use an unencrypted connection to retrieve
|
||||
information about the video (Currently
|
||||
@@ -781,9 +811,9 @@ You can also fork the project on github and run your fork's [build workflow](.gi
|
||||
be regex) or "all" separated by commas.
|
||||
(Eg: --sub-langs "en.*,ja") You can prefix
|
||||
the language code with a "-" to exempt it
|
||||
from the requested languages. (Eg: --sub-
|
||||
langs all,-live_chat) Use --list-subs for a
|
||||
list of available language tags
|
||||
from the requested languages. (Eg:
|
||||
--sub-langs all,-live_chat) Use --list-subs
|
||||
for a list of available language tags
|
||||
|
||||
## Authentication Options:
|
||||
-u, --username USERNAME Login with this account ID
|
||||
@@ -885,6 +915,15 @@ You can also fork the project on github and run your fork's [build workflow](.gi
|
||||
multiple times
|
||||
--xattrs Write metadata to the video file's xattrs
|
||||
(using dublin core and xdg standards)
|
||||
--concat-playlist POLICY Concatenate videos in a playlist. One of
|
||||
"never", "always", or "multi_video"
|
||||
(default; only when the videos form a
|
||||
single show). All the video files must have
|
||||
same codecs and number of streams to be
|
||||
concatable. The "pl_video:" prefix can be
|
||||
used with "--paths" and "--output" to set
|
||||
the output filename for the split files.
|
||||
See "OUTPUT TEMPLATE" for details
|
||||
--fixup POLICY Automatically correct known faults of the
|
||||
file. One of never (do nothing), warn (only
|
||||
emit a warning), detect_or_warn (the
|
||||
@@ -894,23 +933,20 @@ You can also fork the project on github and run your fork's [build workflow](.gi
|
||||
--ffmpeg-location PATH Location of the ffmpeg binary; either the
|
||||
path to the binary or its containing
|
||||
directory
|
||||
--exec CMD Execute a command on the file after
|
||||
downloading and post-processing. Same
|
||||
syntax as the output template can be used
|
||||
to pass any field as arguments to the
|
||||
command. An additional field "filepath"
|
||||
--exec [WHEN:]CMD Execute a command, optionally prefixed with
|
||||
when to execute it (after_move if
|
||||
unspecified), separated by a ":". Supported
|
||||
values of "WHEN" are the same as that of
|
||||
--use-postprocessor. Same syntax as the
|
||||
output template can be used to pass any
|
||||
field as arguments to the command. After
|
||||
download, an additional field "filepath"
|
||||
that contains the final path of the
|
||||
downloaded file is also available. If no
|
||||
fields are passed, %(filepath)q is appended
|
||||
to the end of the command. This option can
|
||||
be used multiple times
|
||||
--no-exec Remove any previously defined --exec
|
||||
--exec-before-download CMD Execute a command before the actual
|
||||
download. The syntax is the same as --exec
|
||||
but "filepath" is not available. This
|
||||
downloaded file is also available, and if
|
||||
no fields are passed, %(filepath)q is
|
||||
appended to the end of the command. This
|
||||
option can be used multiple times
|
||||
--no-exec-before-download Remove any previously defined
|
||||
--exec-before-download
|
||||
--no-exec Remove any previously defined --exec
|
||||
--convert-subs FORMAT Convert the subtitles to another format
|
||||
(currently supported: srt|vtt|ass|lrc)
|
||||
(Alias: --convert-subtitles)
|
||||
@@ -949,10 +985,12 @@ You can also fork the project on github and run your fork's [build workflow](.gi
|
||||
"pre_process" (after extraction),
|
||||
"before_dl" (before video download),
|
||||
"post_process" (after video download;
|
||||
default) or "after_move" (after moving file
|
||||
to their final locations). This option can
|
||||
be used multiple times to add different
|
||||
postprocessors
|
||||
default), "after_move" (after moving file
|
||||
to their final locations), "after_video"
|
||||
(after downloading and processing all
|
||||
formats of a video), or "playlist" (end of
|
||||
playlist). This option can be used multiple
|
||||
times to add different postprocessors
|
||||
|
||||
## SponsorBlock Options:
|
||||
Make chapter entries for, or remove various segments (sponsor,
|
||||
@@ -1012,7 +1050,7 @@ You can configure yt-dlp by placing any supported command line option to a confi
|
||||
|
||||
1. **Main Configuration**: The file given by `--config-location`
|
||||
1. **Portable Configuration**: `yt-dlp.conf` in the same directory as the bundled binary. If you are running from source-code (`<root dir>/yt_dlp/__main__.py`), the root directory is used instead.
|
||||
1. **Home Configuration**: `yt-dlp.conf` in the home path given by `-P "home:<path>"`, or in the current directory if no such path is given
|
||||
1. **Home Configuration**: `yt-dlp.conf` in the home path given by `-P`, or in the current directory if no such path is given
|
||||
1. **User Configuration**:
|
||||
* `%XDG_CONFIG_HOME%/yt-dlp/config` (recommended on Linux/macOS)
|
||||
* `%XDG_CONFIG_HOME%/yt-dlp.conf`
|
||||
@@ -1090,7 +1128,7 @@ The field names themselves (the part inside the parenthesis) can also have some
|
||||
|
||||
1. **Default**: A literal default value can be specified for when the field is empty using a `|` separator. This overrides `--output-na-template`. Eg: `%(uploader|Unknown)s`
|
||||
|
||||
1. **More Conversions**: In addition to the normal format types `diouxXeEfFgGcrs`, `B`, `j`, `l`, `q`, `D`, `S` can be used for converting to **B**ytes, **j**son (flag `#` for pretty-printing), a comma separated **l**ist (flag `#` for `\n` newline-separated), a string **q**uoted for the terminal (flag `#` to split a list into different arguments), to add **D**ecimal suffixes (Eg: 10M), and to **S**anitize as filename (flag `#` for restricted), respectively
|
||||
1. **More Conversions**: In addition to the normal format types `diouxXeEfFgGcrs`, `B`, `j`, `l`, `q`, `D`, `S` can be used for converting to **B**ytes, **j**son (flag `#` for pretty-printing), a comma separated **l**ist (flag `#` for `\n` newline-separated), a string **q**uoted for the terminal (flag `#` to split a list into different arguments), to add **D**ecimal suffixes (Eg: 10M) (flag `#` to use 1024 as factor), and to **S**anitize as filename (flag `#` for restricted), respectively
|
||||
|
||||
1. **Unicode normalization**: The format type `U` can be used for NFC [unicode normalization](https://docs.python.org/3/library/unicodedata.html#unicodedata.normalize). The alternate form flag (`#`) changes the normalization to NFD and the conversion flag `+` can be used for NFKC/NFKD compatibility equivalence normalization. Eg: `%(title)+.100U` is NFKC
|
||||
|
||||
@@ -1099,12 +1137,13 @@ To summarize, the general syntax for a field is:
|
||||
%(name[.keys][addition][>strf][,alternate][&replacement][|default])[flags][width][.precision][length]type
|
||||
```
|
||||
|
||||
Additionally, you can set different output templates for the various metadata files separately from the general output template by specifying the type of file followed by the template separated by a colon `:`. The different file types supported are `subtitle`, `thumbnail`, `description`, `annotation` (deprecated), `infojson`, `link`, `pl_thumbnail`, `pl_description`, `pl_infojson`, `chapter`. For example, `-o "%(title)s.%(ext)s" -o "thumbnail:%(title)s\%(title)s.%(ext)s"` will put the thumbnails in a folder with the same name as the video. If any of the templates (except default) is empty, that type of file will not be written. Eg: `--write-thumbnail -o "thumbnail:"` will write thumbnails only for playlists and not for video.
|
||||
Additionally, you can set different output templates for the various metadata files separately from the general output template by specifying the type of file followed by the template separated by a colon `:`. The different file types supported are `subtitle`, `thumbnail`, `description`, `annotation` (deprecated), `infojson`, `link`, `pl_thumbnail`, `pl_description`, `pl_infojson`, `chapter`, `pl_video`. For example, `-o "%(title)s.%(ext)s" -o "thumbnail:%(title)s\%(title)s.%(ext)s"` will put the thumbnails in a folder with the same name as the video. If any of the templates is empty, that type of file will not be written. Eg: `--write-thumbnail -o "thumbnail:"` will write thumbnails only for playlists and not for video.
|
||||
|
||||
The available fields are:
|
||||
|
||||
- `id` (string): Video identifier
|
||||
- `title` (string): Video title
|
||||
- `fulltitle` (string): Video title ignoring live timestamp and generic title
|
||||
- `url` (string): Video URL
|
||||
- `ext` (string): Video filename extension
|
||||
- `alt_title` (string): A secondary title of the video
|
||||
@@ -1115,11 +1154,14 @@ The available fields are:
|
||||
- `creator` (string): The creator of the video
|
||||
- `timestamp` (numeric): UNIX timestamp of the moment the video became available
|
||||
- `upload_date` (string): Video upload date (YYYYMMDD)
|
||||
- `release_date` (string): The date (YYYYMMDD) when the video was released
|
||||
- `release_timestamp` (numeric): UNIX timestamp of the moment the video was released
|
||||
- `release_date` (string): The date (YYYYMMDD) when the video was released
|
||||
- `modified_timestamp` (numeric): UNIX timestamp of the moment the video was last modified
|
||||
- `modified_date` (string): The date (YYYYMMDD) when the video was last modified
|
||||
- `uploader_id` (string): Nickname or id of the video uploader
|
||||
- `channel` (string): Full name of the channel the video is uploaded on
|
||||
- `channel_id` (string): Id of the channel
|
||||
- `channel_follower_count` (numeric): Number of followers of the channel
|
||||
- `location` (string): Physical location where the video was filmed
|
||||
- `duration` (numeric): Length of the video in seconds
|
||||
- `duration_string` (string): Length of the video (HH:mm:ss)
|
||||
@@ -1157,14 +1199,16 @@ The available fields are:
|
||||
- `protocol` (string): The protocol that will be used for the actual download
|
||||
- `extractor` (string): Name of the extractor
|
||||
- `extractor_key` (string): Key name of the extractor
|
||||
- `epoch` (numeric): Unix epoch when creating the file
|
||||
- `epoch` (numeric): Unix epoch of when the information extraction was completed
|
||||
- `autonumber` (numeric): Number that will be increased with each download, starting at `--autonumber-start`
|
||||
- `video_autonumber` (numeric): Number that will be increased with each video
|
||||
- `n_entries` (numeric): Total number of extracted items in the playlist
|
||||
- `playlist` (string): Name or id of the playlist that contains the video
|
||||
- `playlist_id` (string): Identifier of the playlist that contains the video
|
||||
- `playlist_title` (string): Name of the playlist that contains the video
|
||||
- `playlist` (string): `playlist_id` or `playlist_title`
|
||||
- `playlist_count` (numeric): Total number of items in the playlist. May not be known if entire playlist is not extracted
|
||||
- `playlist_index` (numeric): Index of the video in the playlist padded with leading zeros according the final index
|
||||
- `playlist_autonumber` (numeric): Position of the video in the playlist download queue padded with leading zeros according to the total length of the playlist
|
||||
- `playlist_id` (string): Playlist identifier
|
||||
- `playlist_title` (string): Playlist title
|
||||
- `playlist_uploader` (string): Full name of the playlist uploader
|
||||
- `playlist_uploader_id` (string): Nickname or id of the playlist uploader
|
||||
- `webpage_url` (string): A URL to the video webpage which if given to yt-dlp should allow to get the same result again
|
||||
@@ -1212,6 +1256,11 @@ Available only when used in `--print`:
|
||||
|
||||
- `urls` (string): The URLs of all requested formats, one in each line
|
||||
- `filename` (string): Name of the video file. Note that the actual filename may be different due to post-processing. Use `--exec echo` to get the name after all postprocessing is complete
|
||||
- `formats_table` (table): The video format table as printed by `--list-formats`
|
||||
- `thumbnails_table` (table): The thumbnail format table as printed by `--list-thumbnails`
|
||||
- `subtitles_table` (table): The subtitle format table as printed by `--list-subs`
|
||||
- `automatic_captions_table` (table): The automatic subtitle format table as printed by `--list-subs`
|
||||
|
||||
|
||||
Available only in `--sponsorblock-chapter-title`:
|
||||
|
||||
@@ -1376,10 +1425,10 @@ The available fields are:
|
||||
|
||||
- `hasvid`: Gives priority to formats that has a video stream
|
||||
- `hasaud`: Gives priority to formats that has a audio stream
|
||||
- `ie_pref`: The format preference as given by the extractor
|
||||
- `lang`: Language preference as given by the extractor
|
||||
- `quality`: The quality of the format as given by the extractor
|
||||
- `source`: Preference of the source as given by the extractor
|
||||
- `ie_pref`: The format preference
|
||||
- `lang`: The language preference
|
||||
- `quality`: The quality of the format
|
||||
- `source`: The preference of the source
|
||||
- `proto`: Protocol used for download (`https`/`ftps` > `http`/`ftp` > `m3u8_native`/`m3u8` > `http_dash_segments`> `websocket_frag` > `mms`/`rtsp` > `f4f`/`f4m`)
|
||||
- `vcodec`: Video Codec (`av01` > `vp9.2` > `vp9` > `h265` > `h264` > `vp8` > `h263` > `theora` > other)
|
||||
- `acodec`: Audio Codec (`flac`/`alac` > `wav`/`aiff` > `opus` > `vorbis` > `aac` > `mp4a` > `mp3` > `eac3` > `ac3` > `dts` > other)
|
||||
@@ -1504,7 +1553,7 @@ $ yt-dlp -S "proto"
|
||||
|
||||
|
||||
# Download the best video with h264 codec, or the best video if there is no such video
|
||||
$ yt-dlp -f "(bv*+ba/b)[vcodec^=avc1] / (bv*+ba/b)"
|
||||
$ yt-dlp -f "(bv*[vcodec^=avc1]+ba) / (bv*+ba/b)"
|
||||
|
||||
# Download the best video with best codec no better than h264,
|
||||
# or the best video with worst codec if there is no such video
|
||||
@@ -1547,7 +1596,7 @@ Note that any field created by this can be used in the [output template](#output
|
||||
|
||||
This option also has a few special uses:
|
||||
* You can download an additional URL based on the metadata of the currently downloaded video. To do this, set the field `additional_urls` to the URL that you want to download. Eg: `--parse-metadata "description:(?P<additional_urls>https?://www\.vimeo\.com/\d+)` will download the first vimeo video found in the description
|
||||
* You can use this to change the metadata that is embedded in the media file. To do this, set the value of the corresponding field with a `meta_` prefix. For example, any value you set to `meta_description` field will be added to the `description` field in the file. For example, you can use this to set a different "description" and "synopsis". Any value set to the `meta_` field will overwrite all default values.
|
||||
* You can use this to change the metadata that is embedded in the media file. To do this, set the value of the corresponding field with a `meta_` prefix. For example, any value you set to `meta_description` field will be added to the `description` field in the file. For example, you can use this to set a different "description" and "synopsis". To modify the metadata of individual streams, use the `meta<n>_` prefix (Eg: `meta1_language`). Any value set to the `meta_` field will overwrite all default values.
|
||||
|
||||
For reference, these are the fields yt-dlp adds by default to the file metadata:
|
||||
|
||||
@@ -1622,6 +1671,11 @@ The following extractors use this feature:
|
||||
* `language`: Languages to extract. Eg: `crunchyroll:language=jaJp`
|
||||
* `hardsub`: Which hard-sub versions to extract. Eg: `crunchyroll:hardsub=None,enUS`
|
||||
|
||||
#### crunchyroll:beta
|
||||
* `format`: Which stream type(s) to extract. Default is `adaptive_hls` Eg: `crunchyrollbeta:format=vo_adaptive_hls`
|
||||
* Potentially useful values include `adaptive_hls`, `adaptive_dash`, `vo_adaptive_hls`, `vo_adaptive_dash`, `download_hls`, `trailer_hls`, `trailer_dash`
|
||||
* `hardsub`: Preference order for which hardsub versions to extract. Default is `None` (no hardsubs). Eg: `crunchyrollbeta:hardsub=en-US,None`
|
||||
|
||||
#### vikichannel
|
||||
* `video_types`: Types of videos to download - one or more of `episodes`, `movies`, `clips`, `trailers`
|
||||
|
||||
@@ -1631,6 +1685,15 @@ The following extractors use this feature:
|
||||
#### gamejolt
|
||||
* `comment_sort`: `hot` (default), `you` (cookies needed), `top`, `new` - choose comment sorting mode (on GameJolt's side)
|
||||
|
||||
#### hotstar
|
||||
* `res`: resolution to ignore - one or more of `sd`, `hd`, `fhd`
|
||||
* `vcodec`: vcodec to ignore - one or more of `h264`, `h265`, `dvh265`
|
||||
* `dr`: dynamic range to ignore - one or more of `sdr`, `hdr10`, `dv`
|
||||
|
||||
#### tiktok
|
||||
* `app_version`: App version to call mobile APIs with - should be set along with `manifest_app_version`. (e.g. `20.2.1`)
|
||||
* `manifest_app_version`: Numeric app version to call mobile APIs with. (e.g. `221`)
|
||||
|
||||
NOTE: These options may be changed/removed in the future without concern for backward compatibility
|
||||
|
||||
<!-- MANPAGE: MOVE "INSTALLATION" SECTION HERE -->
|
||||
@@ -1666,7 +1729,7 @@ with YoutubeDL(ydl_opts) as ydl:
|
||||
ydl.download(['https://www.youtube.com/watch?v=BaW_jenozKc'])
|
||||
```
|
||||
|
||||
Most likely, you'll want to use various options. For a list of options available, have a look at [`yt_dlp/YoutubeDL.py`](yt_dlp/YoutubeDL.py#L162).
|
||||
Most likely, you'll want to use various options. For a list of options available, have a look at [`yt_dlp/YoutubeDL.py`](yt_dlp/YoutubeDL.py#L191).
|
||||
|
||||
Here's a more complete example demonstrating various functionality:
|
||||
|
||||
@@ -1772,6 +1835,14 @@ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
||||
|
||||
These are all the deprecated options and the current alternative to achieve the same effect
|
||||
|
||||
#### Almost redundant options
|
||||
While these options are almost the same as their new counterparts, there are some differences that prevents them being redundant
|
||||
|
||||
-j, --dump-json --print "%()j"
|
||||
-F, --list-formats --print formats_table
|
||||
--list-thumbnails --print thumbnails_table --print playlist:thumbnails_table
|
||||
--list-subs --print automatic_captions_table --print subtitles_table
|
||||
|
||||
#### Redundant options
|
||||
While these options are redundant, they are still expected to be used due to their ease of use
|
||||
|
||||
@@ -1783,7 +1854,6 @@ While these options are redundant, they are still expected to be used due to the
|
||||
--get-thumbnail --print thumbnail
|
||||
-e, --get-title --print title
|
||||
-g, --get-url --print urls
|
||||
-j, --dump-json --print "%()j"
|
||||
--match-title REGEX --match-filter "title ~= (?i)REGEX"
|
||||
--reject-title REGEX --match-filter "title !~= (?i)REGEX"
|
||||
--min-views COUNT --match-filter "view_count >=? COUNT"
|
||||
@@ -1793,6 +1863,8 @@ While these options are redundant, they are still expected to be used due to the
|
||||
#### Not recommended
|
||||
While these options still work, their use is not recommended since there are other alternatives to achieve the same
|
||||
|
||||
--exec-before-download CMD --exec "before_dl:CMD"
|
||||
--no-exec-before-download --no-exec
|
||||
--all-formats -f all
|
||||
--all-subs --sub-langs all --write-subs
|
||||
--print-json -j --no-simulate
|
||||
|
||||
@@ -41,6 +41,7 @@
|
||||
- **aenetworks:collection**
|
||||
- **aenetworks:show**
|
||||
- **afreecatv**: afreecatv.com
|
||||
- **afreecatv:live**: afreecatv.com
|
||||
- **AirMozilla**
|
||||
- **AliExpressLive**
|
||||
- **AlJazeera**
|
||||
@@ -53,6 +54,7 @@
|
||||
- **AMCNetworks**
|
||||
- **AmericasTestKitchen**
|
||||
- **AmericasTestKitchenSeason**
|
||||
- **AmHistoryChannel**
|
||||
- **anderetijden**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
|
||||
- **AnimalPlanet**
|
||||
- **AnimeLab**
|
||||
@@ -162,6 +164,7 @@
|
||||
- **BuzzFeed**
|
||||
- **BYUtv**
|
||||
- **CableAV**
|
||||
- **Callin**
|
||||
- **CAM4**
|
||||
- **Camdemy**
|
||||
- **CamdemyFolder**
|
||||
@@ -225,6 +228,7 @@
|
||||
- **ComedyCentralTV**
|
||||
- **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED
|
||||
- **CONtv**
|
||||
- **CookingChannel**
|
||||
- **Corus**
|
||||
- **Coub**
|
||||
- **CozyTV**
|
||||
@@ -232,6 +236,8 @@
|
||||
- **Cracked**
|
||||
- **Crackle**
|
||||
- **CrooksAndLiars**
|
||||
- **CrowdBunker**
|
||||
- **CrowdBunkerChannel**
|
||||
- **crunchyroll**
|
||||
- **crunchyroll:beta**
|
||||
- **crunchyroll:playlist**
|
||||
@@ -246,6 +252,7 @@
|
||||
- **curiositystream:collections**
|
||||
- **curiositystream:series**
|
||||
- **CWTV**
|
||||
- **Daftsex**
|
||||
- **DagelijkseKost**: dagelijksekost.een.be
|
||||
- **DailyMail**
|
||||
- **dailymotion**
|
||||
@@ -263,20 +270,20 @@
|
||||
- **DeezerPlaylist**
|
||||
- **defense.gouv.fr**
|
||||
- **democracynow**
|
||||
- **DestinationAmerica**
|
||||
- **DHM**: Filmarchiv - Deutsches Historisches Museum
|
||||
- **Digg**
|
||||
- **DigitalConcertHall**: DigitalConcertHall extractor
|
||||
- **DigitallySpeaking**
|
||||
- **Digiteka**
|
||||
- **Discovery**
|
||||
- **DiscoveryGo**
|
||||
- **DiscoveryGoPlaylist**
|
||||
- **DiscoveryLife**
|
||||
- **DiscoveryNetworksDe**
|
||||
- **DiscoveryPlus**
|
||||
- **DiscoveryPlusIndia**
|
||||
- **DiscoveryPlusIndiaShow**
|
||||
- **DiscoveryPlusItaly**
|
||||
- **DiscoveryPlusItalyShow**
|
||||
- **DiscoveryVR**
|
||||
- **Disney**
|
||||
- **DIYNetwork**
|
||||
- **dlive:stream**
|
||||
@@ -288,6 +295,7 @@
|
||||
- **DouyuTV**: 斗鱼
|
||||
- **DPlay**
|
||||
- **DRBonanza**
|
||||
- **Drooble**
|
||||
- **Dropbox**
|
||||
- **Dropout**
|
||||
- **DropoutSeason**
|
||||
@@ -324,12 +332,16 @@
|
||||
- **Eporner**
|
||||
- **EroProfile**
|
||||
- **EroProfile:album**
|
||||
- **ertflix**: ERTFLIX videos
|
||||
- **ertflix:codename**: ERTFLIX videos by codename
|
||||
- **ertwebtv:embed**: ert.gr webtv embedded videos
|
||||
- **Escapist**
|
||||
- **ESPN**
|
||||
- **ESPNArticle**
|
||||
- **ESPNCricInfo**
|
||||
- **EsriVideo**
|
||||
- **Europa**
|
||||
- **EuropeanTour**
|
||||
- **EUScreen**
|
||||
- **EWETV**
|
||||
- **ExpoTV**
|
||||
@@ -352,6 +364,7 @@
|
||||
- **FiveTV**
|
||||
- **Flickr**
|
||||
- **Folketinget**: Folketinget (ft.dk; Danish parliament)
|
||||
- **FoodNetwork**
|
||||
- **FootyRoom**
|
||||
- **Formula1**
|
||||
- **FOX**
|
||||
@@ -407,7 +420,10 @@
|
||||
- **Glide**: Glide mobile video messages (glide.me)
|
||||
- **Globo**
|
||||
- **GloboArticle**
|
||||
- **glomex**: Glomex videos
|
||||
- **glomex:embed**: Glomex embedded videos
|
||||
- **Go**
|
||||
- **GoDiscovery**
|
||||
- **GodTube**
|
||||
- **Gofile**
|
||||
- **Golem**
|
||||
@@ -429,6 +445,7 @@
|
||||
- **hetklokhuis**
|
||||
- **hgtv.com:show**
|
||||
- **HGTVDe**
|
||||
- **HGTVUsa**
|
||||
- **HiDive**
|
||||
- **HistoricFilms**
|
||||
- **history:player**
|
||||
@@ -470,13 +487,17 @@
|
||||
- **IndavideoEmbed**
|
||||
- **InfoQ**
|
||||
- **Instagram**
|
||||
- **instagram:story**
|
||||
- **instagram:tag**: Instagram hashtag search URLs
|
||||
- **instagram:user**: Instagram user profile
|
||||
- **InstagramIOS**: IOS instagram:// URL
|
||||
- **Internazionale**
|
||||
- **InternetVideoArchive**
|
||||
- **InvestigationDiscovery**
|
||||
- **IPrima**
|
||||
- **IPrimaCNN**
|
||||
- **iq.com**: International version of iQiyi
|
||||
- **iq.com:album**
|
||||
- **iqiyi**: 爱奇艺
|
||||
- **Ir90Tv**
|
||||
- **ITTF**
|
||||
@@ -500,6 +521,7 @@
|
||||
- **KarriereVideos**
|
||||
- **Katsomo**
|
||||
- **KeezMovies**
|
||||
- **KelbyOne**
|
||||
- **Ketnet**
|
||||
- **khanacademy**
|
||||
- **khanacademy:unit**
|
||||
@@ -545,7 +567,6 @@
|
||||
- **limelight:channel_list**
|
||||
- **LineLive**
|
||||
- **LineLiveChannel**
|
||||
- **LineTV**
|
||||
- **LinkedIn**
|
||||
- **linkedin:learning**
|
||||
- **linkedin:learning:course**
|
||||
@@ -554,6 +575,7 @@
|
||||
- **LiveJournal**
|
||||
- **livestream**
|
||||
- **livestream:original**
|
||||
- **Lnk**
|
||||
- **LnkGo**
|
||||
- **loc**: Library of Congress
|
||||
- **LocalNews8**
|
||||
@@ -566,6 +588,7 @@
|
||||
- **mailru**: Видео@Mail.Ru
|
||||
- **mailru:music**: Музыка@Mail.Ru
|
||||
- **mailru:music:search**: Музыка@Mail.Ru
|
||||
- **MainStreaming**: MainStreaming Player
|
||||
- **MallTV**
|
||||
- **mangomolo:live**
|
||||
- **mangomolo:video**
|
||||
@@ -592,6 +615,8 @@
|
||||
- **MediasiteNamedCatalog**
|
||||
- **Medici**
|
||||
- **megaphone.fm**: megaphone.fm embedded players
|
||||
- **megatvcom**: megatv.com videos
|
||||
- **megatvcom:embed**: megatv.com embedded videos
|
||||
- **Meipai**: 美拍
|
||||
- **MelonVOD**
|
||||
- **META**
|
||||
@@ -615,6 +640,7 @@
|
||||
- **mirrativ:user**
|
||||
- **MiTele**: mitele.es
|
||||
- **mixch**
|
||||
- **mixch:archive**
|
||||
- **mixcloud**
|
||||
- **mixcloud:playlist**
|
||||
- **mixcloud:user**
|
||||
@@ -647,6 +673,10 @@
|
||||
- **MTVUutisetArticle**
|
||||
- **MuenchenTV**: münchen.tv
|
||||
- **MuseScore**
|
||||
- **MusicdexAlbum**
|
||||
- **MusicdexArtist**
|
||||
- **MusicdexPlaylist**
|
||||
- **MusicdexSong**
|
||||
- **mva**: Microsoft Virtual Academy videos
|
||||
- **mva:course**: Microsoft Virtual Academy courses
|
||||
- **Mwave**
|
||||
@@ -704,6 +734,7 @@
|
||||
- **Newgrounds:playlist**
|
||||
- **Newgrounds:user**
|
||||
- **Newstube**
|
||||
- **Newsy**
|
||||
- **NextMedia**: 蘋果日報
|
||||
- **NextMediaActionNews**: 蘋果日報 - 動新聞
|
||||
- **NextTV**: 壹電視
|
||||
@@ -733,6 +764,7 @@
|
||||
- **NJPWWorld**: 新日本プロレスワールド
|
||||
- **NobelPrize**
|
||||
- **NonkTube**
|
||||
- **NoodleMagazine**
|
||||
- **Noovo**
|
||||
- **Normalboots**
|
||||
- **NosVideo**
|
||||
@@ -785,6 +817,7 @@
|
||||
- **OpencastPlaylist**
|
||||
- **openrec**
|
||||
- **openrec:capture**
|
||||
- **openrec:movie**
|
||||
- **OraTV**
|
||||
- **orf:burgenland**: Radio Burgenland
|
||||
- **orf:fm4**: radio FM4
|
||||
@@ -856,6 +889,8 @@
|
||||
- **podomatic**
|
||||
- **Pokemon**
|
||||
- **PokemonWatch**
|
||||
- **PokerGo**
|
||||
- **PokerGoCollection**
|
||||
- **PolsatGo**
|
||||
- **PolskieRadio**
|
||||
- **polskieradio:kierowcow**
|
||||
@@ -867,6 +902,7 @@
|
||||
- **PopcornTV**
|
||||
- **PornCom**
|
||||
- **PornerBros**
|
||||
- **Pornez**
|
||||
- **PornFlip**
|
||||
- **PornHd**
|
||||
- **PornHub**: PornHub and Thumbzilla
|
||||
@@ -881,6 +917,11 @@
|
||||
- **PressTV**
|
||||
- **ProjectVeritas**
|
||||
- **prosiebensat1**: ProSiebenSat.1 Digital
|
||||
- **PRXAccount**
|
||||
- **PRXSeries**
|
||||
- **prxseries:search**: PRX Series Search; "prxseries:" prefix
|
||||
- **prxstories:search**: PRX Stories Search; "prxstories:" prefix
|
||||
- **PRXStory**
|
||||
- **puhutv**
|
||||
- **puhutv:serie**
|
||||
- **Puls4**
|
||||
@@ -914,8 +955,9 @@
|
||||
- **RaiPlay**
|
||||
- **RaiPlayLive**
|
||||
- **RaiPlayPlaylist**
|
||||
- **RaiPlayRadio**
|
||||
- **RaiPlayRadioPlaylist**
|
||||
- **RaiPlaySound**
|
||||
- **RaiPlaySoundLive**
|
||||
- **RaiPlaySoundPlaylist**
|
||||
- **RayWenderlich**
|
||||
- **RayWenderlichCourse**
|
||||
- **RBMARadio**
|
||||
@@ -950,12 +992,15 @@
|
||||
- **Roxwel**
|
||||
- **Rozhlas**
|
||||
- **RTBF**
|
||||
- **RTDocumentry**
|
||||
- **RTDocumentryPlaylist**
|
||||
- **rte**: Raidió Teilifís Éireann TV
|
||||
- **rte:radio**: Raidió Teilifís Éireann radio
|
||||
- **rtl.nl**: rtl.nl and rtlxl.nl
|
||||
- **rtl2**
|
||||
- **rtl2:you**
|
||||
- **rtl2:you:series**
|
||||
- **RTNews**
|
||||
- **RTP**
|
||||
- **RTRFM**
|
||||
- **RTS**: RTS.ch
|
||||
@@ -967,8 +1012,10 @@
|
||||
- **RTVNH**
|
||||
- **RTVS**
|
||||
- **RUHD**
|
||||
- **Rule34Video**
|
||||
- **RumbleChannel**
|
||||
- **RumbleEmbed**
|
||||
- **Ruptly**
|
||||
- **rutube**: Rutube videos
|
||||
- **rutube:channel**: Rutube channel
|
||||
- **rutube:embed**: Rutube embedded videos
|
||||
@@ -1109,7 +1156,10 @@
|
||||
- **TeamTreeHouse**
|
||||
- **TechTalks**
|
||||
- **techtv.mit.edu**
|
||||
- **ted**
|
||||
- **TedEmbed**
|
||||
- **TedPlaylist**
|
||||
- **TedSeries**
|
||||
- **TedTalk**
|
||||
- **Tele13**
|
||||
- **Tele5**
|
||||
- **TeleBruxelles**
|
||||
@@ -1148,6 +1198,7 @@
|
||||
- **tiktok:tag**
|
||||
- **tiktok:user**
|
||||
- **tinypic**: tinypic.com videos
|
||||
- **TLC**
|
||||
- **TMZ**
|
||||
- **TNAFlix**
|
||||
- **TNAFlixNetworkEmbed**
|
||||
@@ -1160,6 +1211,7 @@
|
||||
- **Toypics**: Toypics video
|
||||
- **ToypicsUser**: Toypics user profile
|
||||
- **TrailerAddict** (Currently broken)
|
||||
- **TravelChannel**
|
||||
- **Trilulilu**
|
||||
- **Trovo**
|
||||
- **TrovoChannelClip**: All Clips of a trovo.live channel; "trovoclip:" prefix
|
||||
@@ -1207,6 +1259,8 @@
|
||||
- **TVNowNew**
|
||||
- **TVNowSeason**
|
||||
- **TVNowShow**
|
||||
- **tvopengr:embed**: tvopen.gr embedded videos
|
||||
- **tvopengr:watch**: tvopen.gr (and ethnos.gr) videos
|
||||
- **tvp**: Telewizja Polska
|
||||
- **tvp:embed**: Telewizja Polska
|
||||
- **tvp:series**
|
||||
@@ -1270,7 +1324,7 @@
|
||||
- **Viddler**
|
||||
- **Videa**
|
||||
- **video.arnes.si**: Arnes Video
|
||||
- **video.google:search**: Google Video search; "gvsearch:" prefix (Currently broken)
|
||||
- **video.google:search**: Google Video search; "gvsearch:" prefix
|
||||
- **video.sky.it**
|
||||
- **video.sky.it:live**
|
||||
- **VideoDetective**
|
||||
@@ -1299,6 +1353,8 @@
|
||||
- **vimeo:review**: Review pages on vimeo
|
||||
- **vimeo:user**
|
||||
- **vimeo:watchlater**: Vimeo watch later list, "vimeowatchlater" keyword (requires authentication)
|
||||
- **Vimm:recording**
|
||||
- **Vimm:stream**
|
||||
- **Vimple**: Vimple - one-click video hosting
|
||||
- **Vine**
|
||||
- **vine:user**
|
||||
@@ -1351,7 +1407,7 @@
|
||||
- **wdr:mobile**
|
||||
- **WDRElefant**
|
||||
- **WDRPage**
|
||||
- **web.archive:youtube**: web.archive.org saved youtube videos
|
||||
- **web.archive:youtube**: web.archive.org saved youtube videos, "ytarchive:" prefix
|
||||
- **Webcaster**
|
||||
- **WebcasterFeed**
|
||||
- **WebOfStories**
|
||||
@@ -1402,6 +1458,7 @@
|
||||
- **yandexmusic:playlist**: Яндекс.Музыка - Плейлист
|
||||
- **yandexmusic:track**: Яндекс.Музыка - Трек
|
||||
- **YandexVideo**
|
||||
- **YandexVideoPreview**
|
||||
- **YapFiles**
|
||||
- **YesJapan**
|
||||
- **yinyuetai:video**: 音悦Tai
|
||||
@@ -1418,6 +1475,7 @@
|
||||
- **youtube**: YouTube
|
||||
- **youtube:favorites**: YouTube liked videos; ":ytfav" keyword (requires cookies)
|
||||
- **youtube:history**: Youtube watch history; ":ythis" keyword (requires cookies)
|
||||
- **youtube:music:search_url**: YouTube music search URLs with selectable sections (Eg: #songs)
|
||||
- **youtube:playlist**: YouTube playlists
|
||||
- **youtube:recommended**: YouTube recommended videos; ":ytrec" keyword
|
||||
- **youtube:search**: YouTube search; "ytsearch:" prefix
|
||||
@@ -1425,9 +1483,10 @@
|
||||
- **youtube:search_url**: YouTube search URLs with sorting and filter support
|
||||
- **youtube:subscriptions**: YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)
|
||||
- **youtube:tab**: YouTube Tabs
|
||||
- **youtube:user**: YouTube user videos; "ytuser:" prefix
|
||||
- **youtube:watchlater**: Youtube watch later list; ":ytwatchlater" keyword (requires cookies)
|
||||
- **YoutubeLivestreamEmbed**: YouTube livestream embeds
|
||||
- **YoutubeYtBe**: youtu.be
|
||||
- **YoutubeYtUser**: YouTube user videos; "ytuser:" prefix
|
||||
- **Zapiks**
|
||||
- **Zattoo**
|
||||
- **ZattooLive**
|
||||
|
||||
@@ -211,7 +211,7 @@ def sanitize_got_info_dict(got_dict):
|
||||
|
||||
# Auto-generated
|
||||
'autonumber', 'playlist', 'format_index', 'video_ext', 'audio_ext', 'duration_string', 'epoch',
|
||||
'fulltitle', 'extractor', 'extractor_key', 'filepath', 'infojson_filename', 'original_url',
|
||||
'fulltitle', 'extractor', 'extractor_key', 'filepath', 'infojson_filename', 'original_url', 'n_entries',
|
||||
|
||||
# Only live_status needs to be checked
|
||||
'is_live', 'was_live',
|
||||
@@ -220,10 +220,12 @@ def sanitize_got_info_dict(got_dict):
|
||||
IGNORED_PREFIXES = ('', 'playlist', 'requested', 'webpage')
|
||||
|
||||
def sanitize(key, value):
|
||||
if isinstance(value, str) and len(value) > 100:
|
||||
if isinstance(value, str) and len(value) > 100 and key != 'thumbnail':
|
||||
return f'md5:{md5(value)}'
|
||||
elif isinstance(value, list) and len(value) > 10:
|
||||
return f'count:{len(value)}'
|
||||
elif key.endswith('_count') and isinstance(value, int):
|
||||
return int
|
||||
return value
|
||||
|
||||
test_info_dict = {
|
||||
@@ -233,7 +235,7 @@ def sanitize_got_info_dict(got_dict):
|
||||
}
|
||||
|
||||
# display_id may be generated from id
|
||||
if test_info_dict.get('display_id') == test_info_dict['id']:
|
||||
if test_info_dict.get('display_id') == test_info_dict.get('id'):
|
||||
test_info_dict.pop('display_id')
|
||||
|
||||
return test_info_dict
|
||||
@@ -259,6 +261,8 @@ def expect_info_dict(self, got_dict, expected_dict):
|
||||
def _repr(v):
|
||||
if isinstance(v, compat_str):
|
||||
return "'%s'" % v.replace('\\', '\\\\').replace("'", "\\'").replace('\n', '\\n')
|
||||
elif isinstance(v, type):
|
||||
return v.__name__
|
||||
else:
|
||||
return repr(v)
|
||||
info_dict_str = ''
|
||||
|
||||
@@ -208,6 +208,91 @@ class TestInfoExtractor(unittest.TestCase):
|
||||
},
|
||||
{'expected_type': 'NewsArticle'},
|
||||
),
|
||||
(
|
||||
r'''<script type="application/ld+json">
|
||||
{"url":"/vrtnu/a-z/het-journaal/2021/het-journaal-het-journaal-19u-20211231/",
|
||||
"name":"Het journaal 19u",
|
||||
"description":"Het journaal 19u van vrijdag 31 december 2021.",
|
||||
"potentialAction":{"url":"https://vrtnu.page.link/pfVy6ihgCAJKgHqe8","@type":"ShareAction"},
|
||||
"mainEntityOfPage":{"@id":"1640092242445","@type":"WebPage"},
|
||||
"publication":[{
|
||||
"startDate":"2021-12-31T19:00:00.000+01:00",
|
||||
"endDate":"2022-01-30T23:55:00.000+01:00",
|
||||
"publishedBy":{"name":"een","@type":"Organization"},
|
||||
"publishedOn":{"url":"https://www.vrt.be/vrtnu/","name":"VRT NU","@type":"BroadcastService"},
|
||||
"@id":"pbs-pub-3a7ec233-da95-4c1e-9b2b-cf5fdfebcbe8",
|
||||
"@type":"BroadcastEvent"
|
||||
}],
|
||||
"video":{
|
||||
"name":"Het journaal - Aflevering 365 (Seizoen 2021)",
|
||||
"description":"Het journaal 19u van vrijdag 31 december 2021. Bekijk aflevering 365 van seizoen 2021 met VRT NU via de site of app.",
|
||||
"thumbnailUrl":"//images.vrt.be/width1280/2021/12/31/80d5ed00-6a64-11ec-b07d-02b7b76bf47f.jpg",
|
||||
"expires":"2022-01-30T23:55:00.000+01:00",
|
||||
"hasPart":[
|
||||
{"name":"Explosie Turnhout","startOffset":70,"@type":"Clip"},
|
||||
{"name":"Jaarwisseling","startOffset":440,"@type":"Clip"},
|
||||
{"name":"Natuurbranden Colorado","startOffset":1179,"@type":"Clip"},
|
||||
{"name":"Klimaatverandering","startOffset":1263,"@type":"Clip"},
|
||||
{"name":"Zacht weer","startOffset":1367,"@type":"Clip"},
|
||||
{"name":"Financiële balans","startOffset":1383,"@type":"Clip"},
|
||||
{"name":"Club Brugge","startOffset":1484,"@type":"Clip"},
|
||||
{"name":"Mentale gezondheid bij topsporters","startOffset":1575,"@type":"Clip"},
|
||||
{"name":"Olympische Winterspelen","startOffset":1728,"@type":"Clip"},
|
||||
{"name":"Sober oudjaar in Nederland","startOffset":1873,"@type":"Clip"}
|
||||
],
|
||||
"duration":"PT34M39.23S",
|
||||
"uploadDate":"2021-12-31T19:00:00.000+01:00",
|
||||
"@id":"vid-9457d0c6-b8ac-4aba-b5e1-15aa3a3295b5",
|
||||
"@type":"VideoObject"
|
||||
},
|
||||
"genre":["Nieuws en actua"],
|
||||
"episodeNumber":365,
|
||||
"partOfSeries":{"name":"Het journaal","@id":"222831405527","@type":"TVSeries"},
|
||||
"partOfSeason":{"name":"Seizoen 2021","@id":"961809365527","@type":"TVSeason"},
|
||||
"@context":"https://schema.org","@id":"961685295527","@type":"TVEpisode"}</script>
|
||||
''',
|
||||
{
|
||||
'chapters': [
|
||||
{"title": "Explosie Turnhout", "start_time": 70, "end_time": 440},
|
||||
{"title": "Jaarwisseling", "start_time": 440, "end_time": 1179},
|
||||
{"title": "Natuurbranden Colorado", "start_time": 1179, "end_time": 1263},
|
||||
{"title": "Klimaatverandering", "start_time": 1263, "end_time": 1367},
|
||||
{"title": "Zacht weer", "start_time": 1367, "end_time": 1383},
|
||||
{"title": "Financiële balans", "start_time": 1383, "end_time": 1484},
|
||||
{"title": "Club Brugge", "start_time": 1484, "end_time": 1575},
|
||||
{"title": "Mentale gezondheid bij topsporters", "start_time": 1575, "end_time": 1728},
|
||||
{"title": "Olympische Winterspelen", "start_time": 1728, "end_time": 1873},
|
||||
{"title": "Sober oudjaar in Nederland", "start_time": 1873, "end_time": 2079.23}
|
||||
],
|
||||
'title': 'Het journaal - Aflevering 365 (Seizoen 2021)'
|
||||
}, {}
|
||||
),
|
||||
(
|
||||
# test multiple thumbnails in a list
|
||||
r'''
|
||||
<script type="application/ld+json">
|
||||
{"@context":"https://schema.org",
|
||||
"@type":"VideoObject",
|
||||
"thumbnailUrl":["https://www.rainews.it/cropgd/640x360/dl/img/2021/12/30/1640886376927_GettyImages.jpg"]}
|
||||
</script>''',
|
||||
{
|
||||
'thumbnails': [{'url': 'https://www.rainews.it/cropgd/640x360/dl/img/2021/12/30/1640886376927_GettyImages.jpg'}],
|
||||
},
|
||||
{},
|
||||
),
|
||||
(
|
||||
# test single thumbnail
|
||||
r'''
|
||||
<script type="application/ld+json">
|
||||
{"@context":"https://schema.org",
|
||||
"@type":"VideoObject",
|
||||
"thumbnailUrl":"https://www.rainews.it/cropgd/640x360/dl/img/2021/12/30/1640886376927_GettyImages.jpg"}
|
||||
</script>''',
|
||||
{
|
||||
'thumbnails': [{'url': 'https://www.rainews.it/cropgd/640x360/dl/img/2021/12/30/1640886376927_GettyImages.jpg'}],
|
||||
},
|
||||
{},
|
||||
)
|
||||
]
|
||||
for html, expected_dict, search_json_ld_kwargs in _TESTS:
|
||||
expect_dict(
|
||||
|
||||
@@ -30,6 +30,7 @@ class YDL(FakeYDL):
|
||||
self.msgs = []
|
||||
|
||||
def process_info(self, info_dict):
|
||||
info_dict = info_dict.copy()
|
||||
info_dict.pop('__original_infodict', None)
|
||||
self.downloaded_info_dicts.append(info_dict)
|
||||
|
||||
@@ -645,6 +646,7 @@ class TestYoutubeDL(unittest.TestCase):
|
||||
'ext': 'mp4',
|
||||
'width': None,
|
||||
'height': 1080,
|
||||
'filesize': 1024,
|
||||
'title1': '$PATH',
|
||||
'title2': '%PATH%',
|
||||
'title3': 'foo/bar\\test',
|
||||
@@ -778,8 +780,9 @@ class TestYoutubeDL(unittest.TestCase):
|
||||
test('%(title5)#U', 'a\u0301e\u0301i\u0301 𝐀')
|
||||
test('%(title5)+U', 'áéí A')
|
||||
test('%(title5)+#U', 'a\u0301e\u0301i\u0301 A')
|
||||
test('%(height)D', '1K')
|
||||
test('%(height)5.2D', ' 1.08K')
|
||||
test('%(height)D', '1k')
|
||||
test('%(filesize)#D', '1Ki')
|
||||
test('%(height)5.2D', ' 1.08k')
|
||||
test('%(title4)#S', 'foo_bar_test')
|
||||
test('%(title4).10S', ('foo \'bar\' ', 'foo \'bar\'' + ('#' if compat_os_name == 'nt' else ' ')))
|
||||
if compat_os_name == 'nt':
|
||||
@@ -906,7 +909,7 @@ class TestYoutubeDL(unittest.TestCase):
|
||||
def _match_entry(self, info_dict, incomplete=False):
|
||||
res = super(FilterYDL, self)._match_entry(info_dict, incomplete)
|
||||
if res is None:
|
||||
self.downloaded_info_dicts.append(info_dict)
|
||||
self.downloaded_info_dicts.append(info_dict.copy())
|
||||
return res
|
||||
|
||||
first = {
|
||||
@@ -1151,6 +1154,7 @@ class TestYoutubeDL(unittest.TestCase):
|
||||
self.assertTrue(entries[1] is None)
|
||||
self.assertEqual(len(ydl.downloaded_info_dicts), 1)
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
entries[2].pop('requested_downloads', None)
|
||||
self.assertEqual(entries[2], downloaded)
|
||||
self.assertEqual(downloaded['url'], TEST_URL)
|
||||
self.assertEqual(downloaded['title'], 'Video Transparent 2')
|
||||
|
||||
@@ -53,7 +53,7 @@ class YoutubeDL(yt_dlp.YoutubeDL):
|
||||
raise ExtractorError(message)
|
||||
|
||||
def process_info(self, info_dict):
|
||||
self.processed_info_dicts.append(info_dict)
|
||||
self.processed_info_dicts.append(info_dict.copy())
|
||||
return super(YoutubeDL, self).process_info(info_dict)
|
||||
|
||||
|
||||
|
||||
@@ -1,26 +0,0 @@
|
||||
# coding: utf-8
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
# Allow direct execution
|
||||
import os
|
||||
import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from yt_dlp.options import _hide_login_info
|
||||
|
||||
|
||||
class TestOptions(unittest.TestCase):
|
||||
def test_hide_login_info(self):
|
||||
self.assertEqual(_hide_login_info(['-u', 'foo', '-p', 'bar']),
|
||||
['-u', 'PRIVATE', '-p', 'PRIVATE'])
|
||||
self.assertEqual(_hide_login_info(['-u']), ['-u'])
|
||||
self.assertEqual(_hide_login_info(['-u', 'foo', '-u', 'bar']),
|
||||
['-u', 'PRIVATE', '-u', 'PRIVATE'])
|
||||
self.assertEqual(_hide_login_info(['--username=foo']),
|
||||
['--username=PRIVATE'])
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
@@ -13,7 +13,7 @@ from test.helper import FakeYDL, md5, is_download_test
|
||||
from yt_dlp.extractor import (
|
||||
YoutubeIE,
|
||||
DailymotionIE,
|
||||
TEDIE,
|
||||
TedTalkIE,
|
||||
VimeoIE,
|
||||
WallaIE,
|
||||
CeskaTelevizeIE,
|
||||
@@ -141,7 +141,7 @@ class TestDailymotionSubtitles(BaseTestSubtitles):
|
||||
@is_download_test
|
||||
class TestTedSubtitles(BaseTestSubtitles):
|
||||
url = 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html'
|
||||
IE = TEDIE
|
||||
IE = TedTalkIE
|
||||
|
||||
def test_allsubtitles(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
|
||||
@@ -23,6 +23,7 @@ from yt_dlp.utils import (
|
||||
caesar,
|
||||
clean_html,
|
||||
clean_podcast_url,
|
||||
Config,
|
||||
date_from_str,
|
||||
datetime_from_str,
|
||||
DateRange,
|
||||
@@ -37,11 +38,18 @@ from yt_dlp.utils import (
|
||||
ExtractorError,
|
||||
find_xpath_attr,
|
||||
fix_xml_ampersands,
|
||||
format_bytes,
|
||||
float_or_none,
|
||||
get_element_by_class,
|
||||
get_element_by_attribute,
|
||||
get_elements_by_class,
|
||||
get_elements_by_attribute,
|
||||
get_element_html_by_class,
|
||||
get_element_html_by_attribute,
|
||||
get_elements_html_by_class,
|
||||
get_elements_html_by_attribute,
|
||||
get_elements_text_and_html_by_attribute,
|
||||
get_element_text_and_html_by_tag,
|
||||
InAdvancePagedList,
|
||||
int_or_none,
|
||||
intlist_to_bytes,
|
||||
@@ -116,6 +124,7 @@ from yt_dlp.compat import (
|
||||
compat_chr,
|
||||
compat_etree_fromstring,
|
||||
compat_getenv,
|
||||
compat_HTMLParseError,
|
||||
compat_os_name,
|
||||
compat_setenv,
|
||||
)
|
||||
@@ -634,6 +643,8 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(parse_duration('PT1H0.040S'), 3600.04)
|
||||
self.assertEqual(parse_duration('PT00H03M30SZ'), 210)
|
||||
self.assertEqual(parse_duration('P0Y0M0DT0H4M20.880S'), 260.88)
|
||||
self.assertEqual(parse_duration('01:02:03:050'), 3723.05)
|
||||
self.assertEqual(parse_duration('103:050'), 103.05)
|
||||
|
||||
def test_fix_xml_ampersands(self):
|
||||
self.assertEqual(
|
||||
@@ -1122,7 +1133,7 @@ class TestUtil(unittest.TestCase):
|
||||
|
||||
def test_clean_html(self):
|
||||
self.assertEqual(clean_html('a:\nb'), 'a: b')
|
||||
self.assertEqual(clean_html('a:\n "b"'), 'a: "b"')
|
||||
self.assertEqual(clean_html('a:\n "b"'), 'a: "b"')
|
||||
self.assertEqual(clean_html('a<br>\xa0b'), 'a\nb')
|
||||
|
||||
def test_intlist_to_bytes(self):
|
||||
@@ -1573,46 +1584,116 @@ Line 1
|
||||
self.assertEqual(urshift(3, 1), 1)
|
||||
self.assertEqual(urshift(-3, 1), 2147483646)
|
||||
|
||||
GET_ELEMENT_BY_CLASS_TEST_STRING = '''
|
||||
<span class="foo bar">nice</span>
|
||||
'''
|
||||
|
||||
def test_get_element_by_class(self):
|
||||
html = '''
|
||||
<span class="foo bar">nice</span>
|
||||
'''
|
||||
html = self.GET_ELEMENT_BY_CLASS_TEST_STRING
|
||||
|
||||
self.assertEqual(get_element_by_class('foo', html), 'nice')
|
||||
self.assertEqual(get_element_by_class('no-such-class', html), None)
|
||||
|
||||
def test_get_element_html_by_class(self):
|
||||
html = self.GET_ELEMENT_BY_CLASS_TEST_STRING
|
||||
|
||||
self.assertEqual(get_element_html_by_class('foo', html), html.strip())
|
||||
self.assertEqual(get_element_by_class('no-such-class', html), None)
|
||||
|
||||
GET_ELEMENT_BY_ATTRIBUTE_TEST_STRING = '''
|
||||
<div itemprop="author" itemscope>foo</div>
|
||||
'''
|
||||
|
||||
def test_get_element_by_attribute(self):
|
||||
html = '''
|
||||
<span class="foo bar">nice</span>
|
||||
'''
|
||||
html = self.GET_ELEMENT_BY_CLASS_TEST_STRING
|
||||
|
||||
self.assertEqual(get_element_by_attribute('class', 'foo bar', html), 'nice')
|
||||
self.assertEqual(get_element_by_attribute('class', 'foo', html), None)
|
||||
self.assertEqual(get_element_by_attribute('class', 'no-such-foo', html), None)
|
||||
|
||||
html = '''
|
||||
<div itemprop="author" itemscope>foo</div>
|
||||
'''
|
||||
html = self.GET_ELEMENT_BY_ATTRIBUTE_TEST_STRING
|
||||
|
||||
self.assertEqual(get_element_by_attribute('itemprop', 'author', html), 'foo')
|
||||
|
||||
def test_get_element_html_by_attribute(self):
|
||||
html = self.GET_ELEMENT_BY_CLASS_TEST_STRING
|
||||
|
||||
self.assertEqual(get_element_html_by_attribute('class', 'foo bar', html), html.strip())
|
||||
self.assertEqual(get_element_html_by_attribute('class', 'foo', html), None)
|
||||
self.assertEqual(get_element_html_by_attribute('class', 'no-such-foo', html), None)
|
||||
|
||||
html = self.GET_ELEMENT_BY_ATTRIBUTE_TEST_STRING
|
||||
|
||||
self.assertEqual(get_element_html_by_attribute('itemprop', 'author', html), html.strip())
|
||||
|
||||
GET_ELEMENTS_BY_CLASS_TEST_STRING = '''
|
||||
<span class="foo bar">nice</span><span class="foo bar">also nice</span>
|
||||
'''
|
||||
GET_ELEMENTS_BY_CLASS_RES = ['<span class="foo bar">nice</span>', '<span class="foo bar">also nice</span>']
|
||||
|
||||
def test_get_elements_by_class(self):
|
||||
html = '''
|
||||
<span class="foo bar">nice</span><span class="foo bar">also nice</span>
|
||||
'''
|
||||
html = self.GET_ELEMENTS_BY_CLASS_TEST_STRING
|
||||
|
||||
self.assertEqual(get_elements_by_class('foo', html), ['nice', 'also nice'])
|
||||
self.assertEqual(get_elements_by_class('no-such-class', html), [])
|
||||
|
||||
def test_get_elements_html_by_class(self):
|
||||
html = self.GET_ELEMENTS_BY_CLASS_TEST_STRING
|
||||
|
||||
self.assertEqual(get_elements_html_by_class('foo', html), self.GET_ELEMENTS_BY_CLASS_RES)
|
||||
self.assertEqual(get_elements_html_by_class('no-such-class', html), [])
|
||||
|
||||
def test_get_elements_by_attribute(self):
|
||||
html = '''
|
||||
<span class="foo bar">nice</span><span class="foo bar">also nice</span>
|
||||
'''
|
||||
html = self.GET_ELEMENTS_BY_CLASS_TEST_STRING
|
||||
|
||||
self.assertEqual(get_elements_by_attribute('class', 'foo bar', html), ['nice', 'also nice'])
|
||||
self.assertEqual(get_elements_by_attribute('class', 'foo', html), [])
|
||||
self.assertEqual(get_elements_by_attribute('class', 'no-such-foo', html), [])
|
||||
|
||||
def test_get_elements_html_by_attribute(self):
|
||||
html = self.GET_ELEMENTS_BY_CLASS_TEST_STRING
|
||||
|
||||
self.assertEqual(get_elements_html_by_attribute('class', 'foo bar', html), self.GET_ELEMENTS_BY_CLASS_RES)
|
||||
self.assertEqual(get_elements_html_by_attribute('class', 'foo', html), [])
|
||||
self.assertEqual(get_elements_html_by_attribute('class', 'no-such-foo', html), [])
|
||||
|
||||
def test_get_elements_text_and_html_by_attribute(self):
|
||||
html = self.GET_ELEMENTS_BY_CLASS_TEST_STRING
|
||||
|
||||
self.assertEqual(
|
||||
list(get_elements_text_and_html_by_attribute('class', 'foo bar', html)),
|
||||
list(zip(['nice', 'also nice'], self.GET_ELEMENTS_BY_CLASS_RES)))
|
||||
self.assertEqual(list(get_elements_text_and_html_by_attribute('class', 'foo', html)), [])
|
||||
self.assertEqual(list(get_elements_text_and_html_by_attribute('class', 'no-such-foo', html)), [])
|
||||
|
||||
GET_ELEMENT_BY_TAG_TEST_STRING = '''
|
||||
random text lorem ipsum</p>
|
||||
<div>
|
||||
this should be returned
|
||||
<span>this should also be returned</span>
|
||||
<div>
|
||||
this should also be returned
|
||||
</div>
|
||||
closing tag above should not trick, so this should also be returned
|
||||
</div>
|
||||
but this text should not be returned
|
||||
'''
|
||||
GET_ELEMENT_BY_TAG_RES_OUTERDIV_HTML = GET_ELEMENT_BY_TAG_TEST_STRING.strip()[32:276]
|
||||
GET_ELEMENT_BY_TAG_RES_OUTERDIV_TEXT = GET_ELEMENT_BY_TAG_RES_OUTERDIV_HTML[5:-6]
|
||||
GET_ELEMENT_BY_TAG_RES_INNERSPAN_HTML = GET_ELEMENT_BY_TAG_TEST_STRING.strip()[78:119]
|
||||
GET_ELEMENT_BY_TAG_RES_INNERSPAN_TEXT = GET_ELEMENT_BY_TAG_RES_INNERSPAN_HTML[6:-7]
|
||||
|
||||
def test_get_element_text_and_html_by_tag(self):
|
||||
html = self.GET_ELEMENT_BY_TAG_TEST_STRING
|
||||
|
||||
self.assertEqual(
|
||||
get_element_text_and_html_by_tag('div', html),
|
||||
(self.GET_ELEMENT_BY_TAG_RES_OUTERDIV_TEXT, self.GET_ELEMENT_BY_TAG_RES_OUTERDIV_HTML))
|
||||
self.assertEqual(
|
||||
get_element_text_and_html_by_tag('span', html),
|
||||
(self.GET_ELEMENT_BY_TAG_RES_INNERSPAN_TEXT, self.GET_ELEMENT_BY_TAG_RES_INNERSPAN_HTML))
|
||||
self.assertRaises(compat_HTMLParseError, get_element_text_and_html_by_tag, 'article', html)
|
||||
|
||||
def test_iri_to_uri(self):
|
||||
self.assertEqual(
|
||||
iri_to_uri('https://www.google.com/search?q=foo&ie=utf-8&oe=utf-8&client=firefox-b'),
|
||||
@@ -1688,6 +1769,27 @@ Line 1
|
||||
ll = reversed(ll)
|
||||
test(ll, -15, 14, range(15))
|
||||
|
||||
def test_format_bytes(self):
|
||||
self.assertEqual(format_bytes(0), '0.00B')
|
||||
self.assertEqual(format_bytes(1000), '1000.00B')
|
||||
self.assertEqual(format_bytes(1024), '1.00KiB')
|
||||
self.assertEqual(format_bytes(1024**2), '1.00MiB')
|
||||
self.assertEqual(format_bytes(1024**3), '1.00GiB')
|
||||
self.assertEqual(format_bytes(1024**4), '1.00TiB')
|
||||
self.assertEqual(format_bytes(1024**5), '1.00PiB')
|
||||
self.assertEqual(format_bytes(1024**6), '1.00EiB')
|
||||
self.assertEqual(format_bytes(1024**7), '1.00ZiB')
|
||||
self.assertEqual(format_bytes(1024**8), '1.00YiB')
|
||||
|
||||
def test_hide_login_info(self):
|
||||
self.assertEqual(Config.hide_login_info(['-u', 'foo', '-p', 'bar']),
|
||||
['-u', 'PRIVATE', '-p', 'PRIVATE'])
|
||||
self.assertEqual(Config.hide_login_info(['-u']), ['-u'])
|
||||
self.assertEqual(Config.hide_login_info(['-u', 'foo', '-u', 'bar']),
|
||||
['-u', 'PRIVATE', '-u', 'PRIVATE'])
|
||||
self.assertEqual(Config.hide_login_info(['--username=foo']),
|
||||
['--username=PRIVATE'])
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
||||
@@ -19,52 +19,52 @@ class TestVerboseOutput(unittest.TestCase):
|
||||
[
|
||||
sys.executable, 'yt_dlp/__main__.py', '-v',
|
||||
'--username', 'johnsmith@gmail.com',
|
||||
'--password', 'secret',
|
||||
'--password', 'my_secret_password',
|
||||
], cwd=rootDir, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
sout, serr = outp.communicate()
|
||||
self.assertTrue(b'--username' in serr)
|
||||
self.assertTrue(b'johnsmith' not in serr)
|
||||
self.assertTrue(b'--password' in serr)
|
||||
self.assertTrue(b'secret' not in serr)
|
||||
self.assertTrue(b'my_secret_password' not in serr)
|
||||
|
||||
def test_private_info_shortarg(self):
|
||||
outp = subprocess.Popen(
|
||||
[
|
||||
sys.executable, 'yt_dlp/__main__.py', '-v',
|
||||
'-u', 'johnsmith@gmail.com',
|
||||
'-p', 'secret',
|
||||
'-p', 'my_secret_password',
|
||||
], cwd=rootDir, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
sout, serr = outp.communicate()
|
||||
self.assertTrue(b'-u' in serr)
|
||||
self.assertTrue(b'johnsmith' not in serr)
|
||||
self.assertTrue(b'-p' in serr)
|
||||
self.assertTrue(b'secret' not in serr)
|
||||
self.assertTrue(b'my_secret_password' not in serr)
|
||||
|
||||
def test_private_info_eq(self):
|
||||
outp = subprocess.Popen(
|
||||
[
|
||||
sys.executable, 'yt_dlp/__main__.py', '-v',
|
||||
'--username=johnsmith@gmail.com',
|
||||
'--password=secret',
|
||||
'--password=my_secret_password',
|
||||
], cwd=rootDir, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
sout, serr = outp.communicate()
|
||||
self.assertTrue(b'--username' in serr)
|
||||
self.assertTrue(b'johnsmith' not in serr)
|
||||
self.assertTrue(b'--password' in serr)
|
||||
self.assertTrue(b'secret' not in serr)
|
||||
self.assertTrue(b'my_secret_password' not in serr)
|
||||
|
||||
def test_private_info_shortarg_eq(self):
|
||||
outp = subprocess.Popen(
|
||||
[
|
||||
sys.executable, 'yt_dlp/__main__.py', '-v',
|
||||
'-u=johnsmith@gmail.com',
|
||||
'-p=secret',
|
||||
'-p=my_secret_password',
|
||||
], cwd=rootDir, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
sout, serr = outp.communicate()
|
||||
self.assertTrue(b'-u' in serr)
|
||||
self.assertTrue(b'johnsmith' not in serr)
|
||||
self.assertTrue(b'-p' in serr)
|
||||
self.assertTrue(b'secret' not in serr)
|
||||
self.assertTrue(b'my_secret_password' not in serr)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
@@ -9,11 +9,9 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from test.helper import FakeYDL, is_download_test
|
||||
|
||||
|
||||
from yt_dlp.extractor import (
|
||||
YoutubePlaylistIE,
|
||||
YoutubeTabIE,
|
||||
YoutubeIE,
|
||||
YoutubeTabIE,
|
||||
)
|
||||
|
||||
|
||||
@@ -27,21 +25,10 @@ class TestYoutubeLists(unittest.TestCase):
|
||||
dl = FakeYDL()
|
||||
dl.params['noplaylist'] = True
|
||||
ie = YoutubeTabIE(dl)
|
||||
result = ie.extract('https://www.youtube.com/watch?v=FXxLjLQi3Fg&list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')
|
||||
result = ie.extract('https://www.youtube.com/watch?v=OmJ-4B-mS-Y&list=PLydZ2Hrp_gPRJViZjLFKaBMgCQOYEEkyp&index=2')
|
||||
self.assertEqual(result['_type'], 'url')
|
||||
self.assertEqual(YoutubeIE.extract_id(result['url']), 'FXxLjLQi3Fg')
|
||||
|
||||
def test_youtube_course(self):
|
||||
print('Skipping: Course URLs no longer exists')
|
||||
return
|
||||
dl = FakeYDL()
|
||||
ie = YoutubePlaylistIE(dl)
|
||||
# TODO find a > 100 (paginating?) videos course
|
||||
result = ie.extract('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
|
||||
entries = list(result['entries'])
|
||||
self.assertEqual(YoutubeIE.extract_id(entries[0]['url']), 'j9WZyLZCBzs')
|
||||
self.assertEqual(len(entries), 25)
|
||||
self.assertEqual(YoutubeIE.extract_id(entries[-1]['url']), 'rYefUsYuEp0')
|
||||
self.assertEqual(result['ie_key'], YoutubeIE.ie_key())
|
||||
self.assertEqual(YoutubeIE.extract_id(result['url']), 'OmJ-4B-mS-Y')
|
||||
|
||||
def test_youtube_mix(self):
|
||||
dl = FakeYDL()
|
||||
@@ -52,15 +39,6 @@ class TestYoutubeLists(unittest.TestCase):
|
||||
original_video = entries[0]
|
||||
self.assertEqual(original_video['id'], 'tyITL_exICo')
|
||||
|
||||
def test_youtube_toptracks(self):
|
||||
print('Skipping: The playlist page gives error 500')
|
||||
return
|
||||
dl = FakeYDL()
|
||||
ie = YoutubePlaylistIE(dl)
|
||||
result = ie.extract('https://www.youtube.com/playlist?list=MCUS')
|
||||
entries = result['entries']
|
||||
self.assertEqual(len(entries), 100)
|
||||
|
||||
def test_youtube_flat_playlist_extraction(self):
|
||||
dl = FakeYDL()
|
||||
dl.params['extract_flat'] = True
|
||||
|
||||
@@ -86,6 +86,10 @@ _NSIG_TESTS = [
|
||||
'https://www.youtube.com/s/player/8040e515/player_ias.vflset/en_US/base.js',
|
||||
'wvOFaY-yjgDuIEg5', 'HkfBFDHmgw4rsw',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/e06dea74/player_ias.vflset/en_US/base.js',
|
||||
'AiuodmaDDYw8d3y4bf', 'ankd8eza2T6Qmw',
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
@@ -116,10 +120,17 @@ class TestPlayerInfo(unittest.TestCase):
|
||||
class TestSignature(unittest.TestCase):
|
||||
def setUp(self):
|
||||
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
self.TESTDATA_DIR = os.path.join(TEST_DIR, 'testdata')
|
||||
self.TESTDATA_DIR = os.path.join(TEST_DIR, 'testdata/sigs')
|
||||
if not os.path.exists(self.TESTDATA_DIR):
|
||||
os.mkdir(self.TESTDATA_DIR)
|
||||
|
||||
def tearDown(self):
|
||||
try:
|
||||
for f in os.listdir(self.TESTDATA_DIR):
|
||||
os.remove(f)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
def t_factory(name, sig_func, url_pattern):
|
||||
def make_tfunc(url, sig_input, expected_sig):
|
||||
|
||||
@@ -72,6 +72,7 @@ from .utils import (
|
||||
GeoRestrictedError,
|
||||
get_domain,
|
||||
HEADRequest,
|
||||
InAdvancePagedList,
|
||||
int_or_none,
|
||||
iri_to_uri,
|
||||
ISO3166Utils,
|
||||
@@ -91,6 +92,7 @@ from .utils import (
|
||||
PerRequestProxyHandler,
|
||||
platform_name,
|
||||
Popen,
|
||||
POSTPROCESS_WHEN,
|
||||
PostProcessingError,
|
||||
preferredencoding,
|
||||
prepend_extension,
|
||||
@@ -199,7 +201,12 @@ class YoutubeDL(object):
|
||||
verbose: Print additional info to stdout.
|
||||
quiet: Do not print messages to stdout.
|
||||
no_warnings: Do not print out anything for warnings.
|
||||
forceprint: A list of templates to force print
|
||||
forceprint: A dict with keys WHEN mapped to a list of templates to
|
||||
print to stdout. The allowed keys are video or any of the
|
||||
items in utils.POSTPROCESS_WHEN.
|
||||
For compatibility, a single list is also accepted
|
||||
print_to_file: A dict with keys WHEN (same as forceprint) mapped to
|
||||
a list of tuples with (template, filename)
|
||||
forceurl: Force printing final URL. (Deprecated)
|
||||
forcetitle: Force printing title. (Deprecated)
|
||||
forceid: Force printing ID. (Deprecated)
|
||||
@@ -320,6 +327,8 @@ class YoutubeDL(object):
|
||||
cookiesfrombrowser: A tuple containing the name of the browser, the profile
|
||||
name/pathfrom where cookies are loaded, and the name of the
|
||||
keyring. Eg: ('chrome', ) or ('vivaldi', 'default', 'BASICTEXT')
|
||||
legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
|
||||
support RFC 5746 secure renegotiation
|
||||
nocheckcertificate: Do not verify SSL certificates
|
||||
prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
|
||||
At the moment, this is only supported by YouTube.
|
||||
@@ -343,8 +352,8 @@ class YoutubeDL(object):
|
||||
postprocessors: A list of dictionaries, each with an entry
|
||||
* key: The name of the postprocessor. See
|
||||
yt_dlp/postprocessor/__init__.py for a list.
|
||||
* when: When to run the postprocessor. Can be one of
|
||||
pre_process|before_dl|post_process|after_move.
|
||||
* when: When to run the postprocessor. Allowed values are
|
||||
the entries of utils.POSTPROCESS_WHEN
|
||||
Assumed to be 'post_process' if not given
|
||||
post_hooks: Deprecated - Register a custom postprocessor instead
|
||||
A list of functions that get called as the final step
|
||||
@@ -475,6 +484,7 @@ class YoutubeDL(object):
|
||||
extractor_args: A dictionary of arguments to be passed to the extractors.
|
||||
See "EXTRACTOR ARGUMENTS" for details.
|
||||
Eg: {'youtube': {'skip': ['dash', 'hls']}}
|
||||
mark_watched: Mark videos watched (even with --simulate). Only for YouTube
|
||||
youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
|
||||
If True (default), DASH manifests and related
|
||||
data will be downloaded and processed by extractor.
|
||||
@@ -505,7 +515,7 @@ class YoutubeDL(object):
|
||||
|
||||
params = None
|
||||
_ies = {}
|
||||
_pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
|
||||
_pps = {k: [] for k in POSTPROCESS_WHEN}
|
||||
_printed_messages = set()
|
||||
_first_webpage_request = True
|
||||
_download_retcode = None
|
||||
@@ -523,7 +533,7 @@ class YoutubeDL(object):
|
||||
params = {}
|
||||
self._ies = {}
|
||||
self._ies_instances = {}
|
||||
self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
|
||||
self._pps = {k: [] for k in POSTPROCESS_WHEN}
|
||||
self._printed_messages = set()
|
||||
self._first_webpage_request = True
|
||||
self._post_hooks = []
|
||||
@@ -531,6 +541,7 @@ class YoutubeDL(object):
|
||||
self._postprocessor_hooks = []
|
||||
self._download_retcode = 0
|
||||
self._num_downloads = 0
|
||||
self._num_videos = 0
|
||||
self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
|
||||
self._err_file = sys.stderr
|
||||
self.params = params
|
||||
@@ -585,7 +596,14 @@ class YoutubeDL(object):
|
||||
else:
|
||||
self.params['nooverwrites'] = not self.params['overwrites']
|
||||
|
||||
if params.get('bidi_workaround', False):
|
||||
self.params.setdefault('forceprint', {})
|
||||
self.params.setdefault('print_to_file', {})
|
||||
|
||||
# Compatibility with older syntax
|
||||
if not isinstance(params['forceprint'], dict):
|
||||
self.params['forceprint'] = {'video': params['forceprint']}
|
||||
|
||||
if self.params.get('bidi_workaround', False):
|
||||
try:
|
||||
import pty
|
||||
master, slave = pty.openpty()
|
||||
@@ -613,7 +631,7 @@ class YoutubeDL(object):
|
||||
|
||||
if (sys.platform != 'win32'
|
||||
and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
|
||||
and not params.get('restrictfilenames', False)):
|
||||
and not self.params.get('restrictfilenames', False)):
|
||||
# Unicode filesystem API will throw errors (#1474, #13027)
|
||||
self.report_warning(
|
||||
'Assuming --restrict-filenames since file system encoding '
|
||||
@@ -1036,6 +1054,7 @@ class YoutubeDL(object):
|
||||
if info_dict.get('duration', None) is not None
|
||||
else None)
|
||||
info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
|
||||
info_dict['video_autonumber'] = self._num_videos
|
||||
if info_dict.get('resolution') is None:
|
||||
info_dict['resolution'] = self.format_resolution(info_dict, default=None)
|
||||
|
||||
@@ -1151,7 +1170,7 @@ class YoutubeDL(object):
|
||||
str_fmt = f'{fmt[:-1]}s'
|
||||
if fmt[-1] == 'l': # list
|
||||
delim = '\n' if '#' in flags else ', '
|
||||
value, fmt = delim.join(variadic(value, allowed_types=(str, bytes))), str_fmt
|
||||
value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt
|
||||
elif fmt[-1] == 'j': # json
|
||||
value, fmt = json.dumps(value, default=_dumpjson_default, indent=4 if '#' in flags else None), str_fmt
|
||||
elif fmt[-1] == 'q': # quoted
|
||||
@@ -1166,7 +1185,9 @@ class YoutubeDL(object):
|
||||
'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
|
||||
value), str_fmt
|
||||
elif fmt[-1] == 'D': # decimal suffix
|
||||
value, fmt = format_decimal_suffix(value, f'%{fmt[:-1]}f%s' if fmt[:-1] else '%d%s'), 's'
|
||||
num_fmt, fmt = fmt[:-1].replace('#', ''), 's'
|
||||
value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s',
|
||||
factor=1024 if '#' in flags else 1000)
|
||||
elif fmt[-1] == 'S': # filename sanitization
|
||||
value, fmt = filename_sanitizer(initial_field, value, restricted='#' in flags), str_fmt
|
||||
elif fmt[-1] == 'c':
|
||||
@@ -1201,10 +1222,17 @@ class YoutubeDL(object):
|
||||
try:
|
||||
outtmpl = self._outtmpl_expandpath(self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default']))
|
||||
filename = self.evaluate_outtmpl(outtmpl, info_dict, True)
|
||||
if not filename:
|
||||
return None
|
||||
|
||||
force_ext = OUTTMPL_TYPES.get(tmpl_type)
|
||||
if filename and force_ext is not None:
|
||||
filename = replace_extension(filename, force_ext, info_dict.get('ext'))
|
||||
if tmpl_type in ('default', 'temp'):
|
||||
final_ext, ext = self.params.get('final_ext'), info_dict.get('ext')
|
||||
if final_ext and ext and final_ext != ext and filename.endswith(f'.{final_ext}'):
|
||||
filename = replace_extension(filename, ext, final_ext)
|
||||
else:
|
||||
force_ext = OUTTMPL_TYPES[tmpl_type]
|
||||
if force_ext:
|
||||
filename = replace_extension(filename, force_ext, info_dict.get('ext'))
|
||||
|
||||
# https://github.com/blackjack4494/youtube-dlc/issues/85
|
||||
trim_file_name = self.params.get('trim_file_name', False)
|
||||
@@ -1584,6 +1612,19 @@ class YoutubeDL(object):
|
||||
def _ensure_dir_exists(self, path):
|
||||
return make_dir(path, self.report_error)
|
||||
|
||||
@staticmethod
|
||||
def _playlist_infodict(ie_result, **kwargs):
|
||||
return {
|
||||
**ie_result,
|
||||
'playlist': ie_result.get('title') or ie_result.get('id'),
|
||||
'playlist_id': ie_result.get('id'),
|
||||
'playlist_title': ie_result.get('title'),
|
||||
'playlist_uploader': ie_result.get('uploader'),
|
||||
'playlist_uploader_id': ie_result.get('uploader_id'),
|
||||
'playlist_index': 0,
|
||||
**kwargs,
|
||||
}
|
||||
|
||||
def __process_playlist(self, ie_result, download):
|
||||
# We process each entry in the playlist
|
||||
playlist = ie_result.get('title') or ie_result.get('id')
|
||||
@@ -1624,23 +1665,27 @@ class YoutubeDL(object):
|
||||
playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
|
||||
|
||||
ie_entries = ie_result['entries']
|
||||
msg = (
|
||||
'Downloading %d videos' if not isinstance(ie_entries, list)
|
||||
else 'Collected %d videos; downloading %%d of them' % len(ie_entries))
|
||||
|
||||
if isinstance(ie_entries, list):
|
||||
playlist_count = len(ie_entries)
|
||||
msg = f'Collected {playlist_count} videos; downloading %d of them'
|
||||
ie_result['playlist_count'] = ie_result.get('playlist_count') or playlist_count
|
||||
|
||||
def get_entry(i):
|
||||
return ie_entries[i - 1]
|
||||
else:
|
||||
msg = 'Downloading %d videos'
|
||||
if not isinstance(ie_entries, (PagedList, LazyList)):
|
||||
ie_entries = LazyList(ie_entries)
|
||||
elif isinstance(ie_entries, InAdvancePagedList):
|
||||
if ie_entries._pagesize == 1:
|
||||
playlist_count = ie_entries._pagecount
|
||||
|
||||
def get_entry(i):
|
||||
return YoutubeDL.__handle_extraction_exceptions(
|
||||
lambda self, i: ie_entries[i - 1]
|
||||
)(self, i)
|
||||
|
||||
entries = []
|
||||
entries, broken = [], False
|
||||
items = playlistitems if playlistitems is not None else itertools.count(playliststart)
|
||||
for i in items:
|
||||
if i == 0:
|
||||
@@ -1662,6 +1707,7 @@ class YoutubeDL(object):
|
||||
if entry is not None:
|
||||
self._match_entry(entry, incomplete=True, silent=True)
|
||||
except (ExistingVideoReached, RejectedVideoReached):
|
||||
broken = True
|
||||
break
|
||||
ie_result['entries'] = entries
|
||||
|
||||
@@ -1672,23 +1718,19 @@ class YoutubeDL(object):
|
||||
if entry is not None]
|
||||
n_entries = len(entries)
|
||||
|
||||
if not (ie_result.get('playlist_count') or broken or playlistitems or playlistend):
|
||||
ie_result['playlist_count'] = n_entries
|
||||
|
||||
if not playlistitems and (playliststart != 1 or playlistend):
|
||||
playlistitems = list(range(playliststart, playliststart + n_entries))
|
||||
ie_result['requested_entries'] = playlistitems
|
||||
|
||||
_infojson_written = False
|
||||
if not self.params.get('simulate') and self.params.get('allow_playlist_files', True):
|
||||
ie_copy = {
|
||||
'playlist': playlist,
|
||||
'playlist_id': ie_result.get('id'),
|
||||
'playlist_title': ie_result.get('title'),
|
||||
'playlist_uploader': ie_result.get('uploader'),
|
||||
'playlist_uploader_id': ie_result.get('uploader_id'),
|
||||
'playlist_index': 0,
|
||||
'n_entries': n_entries,
|
||||
}
|
||||
ie_copy.update(dict(ie_result))
|
||||
|
||||
write_playlist_files = self.params.get('allow_playlist_files', True)
|
||||
if write_playlist_files and self.params.get('list_thumbnails'):
|
||||
self.list_thumbnails(ie_result)
|
||||
if write_playlist_files and not self.params.get('simulate'):
|
||||
ie_copy = self._playlist_infodict(ie_result, n_entries=n_entries)
|
||||
_infojson_written = self._write_info_json(
|
||||
'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
|
||||
if _infojson_written is None:
|
||||
@@ -1721,6 +1763,7 @@ class YoutubeDL(object):
|
||||
extra = {
|
||||
'n_entries': n_entries,
|
||||
'_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
|
||||
'playlist_count': ie_result.get('playlist_count'),
|
||||
'playlist_index': playlist_index,
|
||||
'playlist_autonumber': i,
|
||||
'playlist': playlist,
|
||||
@@ -1753,7 +1796,9 @@ class YoutubeDL(object):
|
||||
'updated playlist', ie_result,
|
||||
self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
|
||||
return
|
||||
self.to_screen('[download] Finished downloading playlist: %s' % playlist)
|
||||
|
||||
ie_result = self.run_all_pps('playlist', ie_result)
|
||||
self.to_screen(f'[download] Finished downloading playlist: {playlist}')
|
||||
return ie_result
|
||||
|
||||
@__handle_extraction_exceptions
|
||||
@@ -2195,10 +2240,7 @@ class YoutubeDL(object):
|
||||
|
||||
def _calc_headers(self, info_dict):
|
||||
res = std_headers.copy()
|
||||
|
||||
add_headers = info_dict.get('http_headers')
|
||||
if add_headers:
|
||||
res.update(add_headers)
|
||||
res.update(info_dict.get('http_headers') or {})
|
||||
|
||||
cookies = self._calc_cookies(info_dict)
|
||||
if cookies:
|
||||
@@ -2258,12 +2300,20 @@ class YoutubeDL(object):
|
||||
|
||||
def process_video_result(self, info_dict, download=True):
|
||||
assert info_dict.get('_type', 'video') == 'video'
|
||||
self._num_videos += 1
|
||||
|
||||
if 'id' not in info_dict:
|
||||
raise ExtractorError('Missing "id" field in extractor result')
|
||||
raise ExtractorError('Missing "id" field in extractor result', ie=info_dict['extractor'])
|
||||
elif not info_dict.get('id'):
|
||||
raise ExtractorError('Extractor failed to obtain "id"', ie=info_dict['extractor'])
|
||||
|
||||
info_dict['fulltitle'] = info_dict.get('title')
|
||||
if 'title' not in info_dict:
|
||||
raise ExtractorError('Missing "title" field in extractor result',
|
||||
video_id=info_dict['id'], ie=info_dict['extractor'])
|
||||
elif not info_dict.get('title'):
|
||||
self.report_warning('Extractor failed to obtain "title". Creating a generic title instead')
|
||||
info_dict['title'] = f'{info_dict["extractor"]} video #{info_dict["id"]}'
|
||||
|
||||
def report_force_conversion(field, field_not, conversion):
|
||||
self.report_warning(
|
||||
@@ -2311,6 +2361,7 @@ class YoutubeDL(object):
|
||||
for ts_key, date_key in (
|
||||
('timestamp', 'upload_date'),
|
||||
('release_timestamp', 'release_date'),
|
||||
('modified_timestamp', 'modified_date'),
|
||||
):
|
||||
if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
|
||||
# Working around out-of-range timestamp values (e.g. negative ones on Windows,
|
||||
@@ -2373,6 +2424,8 @@ class YoutubeDL(object):
|
||||
if info_dict.get('is_live'):
|
||||
get_from_start = bool(self.params.get('live_from_start'))
|
||||
formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]
|
||||
if not get_from_start:
|
||||
info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
|
||||
|
||||
if not formats:
|
||||
self.raise_no_formats(info_dict)
|
||||
@@ -2534,24 +2587,46 @@ class YoutubeDL(object):
|
||||
if not self.params.get('ignore_no_formats_error'):
|
||||
raise ExtractorError('Requested format is not available', expected=True,
|
||||
video_id=info_dict['id'], ie=info_dict['extractor'])
|
||||
else:
|
||||
self.report_warning('Requested format is not available')
|
||||
# Process what we can, even without any available formats.
|
||||
self.process_info(dict(info_dict))
|
||||
elif download:
|
||||
self.to_screen(
|
||||
'[info] %s: Downloading %d format(s): %s' % (
|
||||
info_dict['id'], len(formats_to_download),
|
||||
", ".join([f['format_id'] for f in formats_to_download])))
|
||||
for fmt in formats_to_download:
|
||||
new_info = dict(info_dict)
|
||||
self.report_warning('Requested format is not available')
|
||||
# Process what we can, even without any available formats.
|
||||
formats_to_download = [{}]
|
||||
|
||||
best_format = formats_to_download[-1]
|
||||
if download:
|
||||
if best_format:
|
||||
self.to_screen(
|
||||
f'[info] {info_dict["id"]}: Downloading {len(formats_to_download)} format(s): '
|
||||
+ ', '.join([f['format_id'] for f in formats_to_download]))
|
||||
max_downloads_reached = False
|
||||
for i, fmt in enumerate(formats_to_download):
|
||||
formats_to_download[i] = new_info = dict(info_dict)
|
||||
# Save a reference to the original info_dict so that it can be modified in process_info if needed
|
||||
new_info['__original_infodict'] = info_dict
|
||||
new_info.update(fmt)
|
||||
self.process_info(new_info)
|
||||
new_info['__original_infodict'] = info_dict
|
||||
try:
|
||||
self.process_info(new_info)
|
||||
except MaxDownloadsReached:
|
||||
max_downloads_reached = True
|
||||
new_info.pop('__original_infodict')
|
||||
# Remove copied info
|
||||
for key, val in tuple(new_info.items()):
|
||||
if info_dict.get(key) == val:
|
||||
new_info.pop(key)
|
||||
if max_downloads_reached:
|
||||
break
|
||||
|
||||
write_archive = set(f.get('__write_download_archive', False) for f in formats_to_download)
|
||||
assert write_archive.issubset({True, False, 'ignore'})
|
||||
if True in write_archive and False not in write_archive:
|
||||
self.record_download_archive(info_dict)
|
||||
|
||||
info_dict['requested_downloads'] = formats_to_download
|
||||
info_dict = self.run_all_pps('after_video', info_dict)
|
||||
if max_downloads_reached:
|
||||
raise MaxDownloadsReached()
|
||||
|
||||
# We update the info dict with the selected best quality format (backwards compatibility)
|
||||
if formats_to_download:
|
||||
info_dict.update(formats_to_download[-1])
|
||||
info_dict.update(best_format)
|
||||
return info_dict
|
||||
|
||||
def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
|
||||
@@ -2622,6 +2697,33 @@ class YoutubeDL(object):
|
||||
subs[lang] = f
|
||||
return subs
|
||||
|
||||
def _forceprint(self, key, info_dict):
|
||||
if info_dict is None:
|
||||
return
|
||||
info_copy = info_dict.copy()
|
||||
info_copy['formats_table'] = self.render_formats_table(info_dict)
|
||||
info_copy['thumbnails_table'] = self.render_thumbnails_table(info_dict)
|
||||
info_copy['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles'))
|
||||
info_copy['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions'))
|
||||
|
||||
def format_tmpl(tmpl):
|
||||
mobj = re.match(r'\w+(=?)$', tmpl)
|
||||
if mobj and mobj.group(1):
|
||||
return f'{tmpl[:-1]} = %({tmpl[:-1]})r'
|
||||
elif mobj:
|
||||
return f'%({tmpl})s'
|
||||
return tmpl
|
||||
|
||||
for tmpl in self.params['forceprint'].get(key, []):
|
||||
self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy))
|
||||
|
||||
for tmpl, file_tmpl in self.params['print_to_file'].get(key, []):
|
||||
filename = self.evaluate_outtmpl(file_tmpl, info_dict)
|
||||
tmpl = format_tmpl(tmpl)
|
||||
self.to_screen(f'[info] Writing {tmpl!r} to: {filename}')
|
||||
with io.open(filename, 'a', encoding='utf-8') as f:
|
||||
f.write(self.evaluate_outtmpl(tmpl, info_copy) + '\n')
|
||||
|
||||
def __forced_printings(self, info_dict, filename, incomplete):
|
||||
def print_mandatory(field, actual_field=None):
|
||||
if actual_field is None:
|
||||
@@ -2644,15 +2746,11 @@ class YoutubeDL(object):
|
||||
elif 'url' in info_dict:
|
||||
info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
|
||||
|
||||
if self.params.get('forceprint') or self.params.get('forcejson'):
|
||||
if (self.params.get('forcejson')
|
||||
or self.params['forceprint'].get('video')
|
||||
or self.params['print_to_file'].get('video')):
|
||||
self.post_extract(info_dict)
|
||||
for tmpl in self.params.get('forceprint', []):
|
||||
mobj = re.match(r'\w+(=?)$', tmpl)
|
||||
if mobj and mobj.group(1):
|
||||
tmpl = f'{tmpl[:-1]} = %({tmpl[:-1]})s'
|
||||
elif mobj:
|
||||
tmpl = '%({})s'.format(tmpl)
|
||||
self.to_stdout(self.evaluate_outtmpl(tmpl, info_dict))
|
||||
self._forceprint('video', info_dict)
|
||||
|
||||
print_mandatory('title')
|
||||
print_mandatory('id')
|
||||
@@ -2690,7 +2788,9 @@ class YoutubeDL(object):
|
||||
if not test:
|
||||
for ph in self._progress_hooks:
|
||||
fd.add_progress_hook(ph)
|
||||
urls = '", "'.join([f['url'] for f in info.get('requested_formats', [])] or [info['url']])
|
||||
urls = '", "'.join(
|
||||
(f['url'].split(',')[0] + ',<data>' if f['url'].startswith('data:') else f['url'])
|
||||
for f in info.get('requested_formats', []) or [info])
|
||||
self.write_debug('Invoking downloader on "%s"' % urls)
|
||||
|
||||
# Note: Ideally info should be a deep-copied so that hooks cannot modify it.
|
||||
@@ -2700,26 +2800,27 @@ class YoutubeDL(object):
|
||||
new_info['http_headers'] = self._calc_headers(new_info)
|
||||
return fd.download(name, new_info, subtitle)
|
||||
|
||||
def existing_file(self, filepaths, *, default_overwrite=True):
|
||||
existing_files = list(filter(os.path.exists, orderedSet(filepaths)))
|
||||
if existing_files and not self.params.get('overwrites', default_overwrite):
|
||||
return existing_files[0]
|
||||
|
||||
for file in existing_files:
|
||||
self.report_file_delete(file)
|
||||
os.remove(file)
|
||||
return None
|
||||
|
||||
def process_info(self, info_dict):
|
||||
"""Process a single resolved IE result."""
|
||||
"""Process a single resolved IE result. (Modified it in-place)"""
|
||||
|
||||
assert info_dict.get('_type', 'video') == 'video'
|
||||
|
||||
max_downloads = self.params.get('max_downloads')
|
||||
if max_downloads is not None:
|
||||
if self._num_downloads >= int(max_downloads):
|
||||
raise MaxDownloadsReached()
|
||||
|
||||
if info_dict.get('is_live') and not self.params.get('live_from_start'):
|
||||
info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
|
||||
|
||||
# TODO: backward compatibility, to be removed
|
||||
info_dict['fulltitle'] = info_dict['title']
|
||||
original_infodict = info_dict
|
||||
|
||||
if 'format' not in info_dict and 'ext' in info_dict:
|
||||
info_dict['format'] = info_dict['ext']
|
||||
|
||||
if self._match_entry(info_dict) is not None:
|
||||
info_dict['__write_download_archive'] = 'ignore'
|
||||
return
|
||||
|
||||
self.post_extract(info_dict)
|
||||
@@ -2734,9 +2835,7 @@ class YoutubeDL(object):
|
||||
self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
|
||||
|
||||
if self.params.get('simulate'):
|
||||
if self.params.get('force_write_download_archive', False):
|
||||
self.record_download_archive(info_dict)
|
||||
# Do nothing else if in simulate mode
|
||||
info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
|
||||
return
|
||||
|
||||
if full_filename is None:
|
||||
@@ -2831,43 +2930,39 @@ class YoutubeDL(object):
|
||||
for link_type, should_write in write_links.items()):
|
||||
return
|
||||
|
||||
def replace_info_dict(new_info):
|
||||
nonlocal info_dict
|
||||
if new_info == info_dict:
|
||||
return
|
||||
info_dict.clear()
|
||||
info_dict.update(new_info)
|
||||
|
||||
try:
|
||||
info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
|
||||
new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
|
||||
replace_info_dict(new_info)
|
||||
except PostProcessingError as err:
|
||||
self.report_error('Preprocessing: %s' % str(err))
|
||||
return
|
||||
|
||||
must_record_download_archive = False
|
||||
if self.params.get('skip_download', False):
|
||||
if self.params.get('skip_download'):
|
||||
info_dict['filepath'] = temp_filename
|
||||
info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
|
||||
info_dict['__files_to_move'] = files_to_move
|
||||
info_dict = self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)
|
||||
replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict))
|
||||
info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
|
||||
else:
|
||||
# Download
|
||||
info_dict.setdefault('__postprocessors', [])
|
||||
try:
|
||||
|
||||
def existing_file(*filepaths):
|
||||
def existing_video_file(*filepaths):
|
||||
ext = info_dict.get('ext')
|
||||
final_ext = self.params.get('final_ext', ext)
|
||||
existing_files = []
|
||||
for file in orderedSet(filepaths):
|
||||
if final_ext != ext:
|
||||
converted = replace_extension(file, final_ext, ext)
|
||||
if os.path.exists(encodeFilename(converted)):
|
||||
existing_files.append(converted)
|
||||
if os.path.exists(encodeFilename(file)):
|
||||
existing_files.append(file)
|
||||
|
||||
if not existing_files or self.params.get('overwrites', False):
|
||||
for file in orderedSet(existing_files):
|
||||
self.report_file_delete(file)
|
||||
os.remove(encodeFilename(file))
|
||||
return None
|
||||
|
||||
info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
|
||||
return existing_files[0]
|
||||
converted = lambda file: replace_extension(file, self.params.get('final_ext') or ext, ext)
|
||||
file = self.existing_file(itertools.chain(*zip(map(converted, filepaths), filepaths)),
|
||||
default_overwrite=False)
|
||||
if file:
|
||||
info_dict['ext'] = os.path.splitext(file)[1][1:]
|
||||
return file
|
||||
|
||||
success = True
|
||||
if info_dict.get('requested_formats') is not None:
|
||||
@@ -2921,7 +3016,7 @@ class YoutubeDL(object):
|
||||
# Ensure filename always has a correct extension for successful merge
|
||||
full_filename = correct_ext(full_filename)
|
||||
temp_filename = correct_ext(temp_filename)
|
||||
dl_filename = existing_file(full_filename, temp_filename)
|
||||
dl_filename = existing_video_file(full_filename, temp_filename)
|
||||
info_dict['__real_download'] = False
|
||||
|
||||
downloaded = []
|
||||
@@ -2984,7 +3079,7 @@ class YoutubeDL(object):
|
||||
files_to_move[file] = None
|
||||
else:
|
||||
# Just a single file
|
||||
dl_filename = existing_file(full_filename, temp_filename)
|
||||
dl_filename = existing_video_file(full_filename, temp_filename)
|
||||
if dl_filename is None or dl_filename == temp_filename:
|
||||
# dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
|
||||
# So we should try to resume the download
|
||||
@@ -3061,7 +3156,7 @@ class YoutubeDL(object):
|
||||
|
||||
fixup()
|
||||
try:
|
||||
info_dict = self.post_process(dl_filename, info_dict, files_to_move)
|
||||
replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move))
|
||||
except PostProcessingError as err:
|
||||
self.report_error('Postprocessing: %s' % str(err))
|
||||
return
|
||||
@@ -3071,10 +3166,14 @@ class YoutubeDL(object):
|
||||
except Exception as err:
|
||||
self.report_error('post hooks: %s' % str(err))
|
||||
return
|
||||
must_record_download_archive = True
|
||||
info_dict['__write_download_archive'] = True
|
||||
|
||||
if self.params.get('force_write_download_archive'):
|
||||
info_dict['__write_download_archive'] = True
|
||||
|
||||
# Make sure the info_dict was modified in-place
|
||||
assert info_dict is original_infodict
|
||||
|
||||
if must_record_download_archive or self.params.get('force_write_download_archive', False):
|
||||
self.record_download_archive(info_dict)
|
||||
max_downloads = self.params.get('max_downloads')
|
||||
if max_downloads is not None and self._num_downloads >= int(max_downloads):
|
||||
raise MaxDownloadsReached()
|
||||
@@ -3140,16 +3239,16 @@ class YoutubeDL(object):
|
||||
if info_dict is None:
|
||||
return info_dict
|
||||
info_dict.setdefault('epoch', int(time.time()))
|
||||
info_dict.setdefault('_type', 'video')
|
||||
remove_keys = {'__original_infodict'} # Always remove this since this may contain a copy of the entire dict
|
||||
keep_keys = ['_type'] # Always keep this to facilitate load-info-json
|
||||
if remove_private_keys:
|
||||
remove_keys |= {
|
||||
'requested_formats', 'requested_subtitles', 'requested_entries', 'entries',
|
||||
'filepath', 'infojson_filename', 'original_url', 'playlist_autonumber',
|
||||
'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
|
||||
'entries', 'filepath', 'infojson_filename', 'original_url', 'playlist_autonumber',
|
||||
}
|
||||
empty_values = (None, {}, [], set(), tuple())
|
||||
reject = lambda k, v: k not in keep_keys and (
|
||||
k.startswith('_') or k in remove_keys or v in empty_values)
|
||||
k.startswith('_') or k in remove_keys or v is None)
|
||||
else:
|
||||
reject = lambda k, v: k in remove_keys
|
||||
|
||||
@@ -3170,6 +3269,25 @@ class YoutubeDL(object):
|
||||
''' Alias of sanitize_info for backward compatibility '''
|
||||
return YoutubeDL.sanitize_info(info_dict, actually_filter)
|
||||
|
||||
@staticmethod
|
||||
def post_extract(info_dict):
|
||||
def actual_post_extract(info_dict):
|
||||
if info_dict.get('_type') in ('playlist', 'multi_video'):
|
||||
for video_dict in info_dict.get('entries', {}):
|
||||
actual_post_extract(video_dict or {})
|
||||
return
|
||||
|
||||
post_extractor = info_dict.get('__post_extractor') or (lambda: {})
|
||||
extra = post_extractor().items()
|
||||
info_dict.update(extra)
|
||||
info_dict.pop('__post_extractor', None)
|
||||
|
||||
original_infodict = info_dict.get('__original_infodict') or {}
|
||||
original_infodict.update(extra)
|
||||
original_infodict.pop('__post_extractor', None)
|
||||
|
||||
actual_post_extract(info_dict or {})
|
||||
|
||||
def run_pp(self, pp, infodict):
|
||||
files_to_delete = []
|
||||
if '__files_to_move' not in infodict:
|
||||
@@ -3199,45 +3317,26 @@ class YoutubeDL(object):
|
||||
del infodict['__files_to_move'][old_filename]
|
||||
return infodict
|
||||
|
||||
@staticmethod
|
||||
def post_extract(info_dict):
|
||||
def actual_post_extract(info_dict):
|
||||
if info_dict.get('_type') in ('playlist', 'multi_video'):
|
||||
for video_dict in info_dict.get('entries', {}):
|
||||
actual_post_extract(video_dict or {})
|
||||
return
|
||||
|
||||
post_extractor = info_dict.get('__post_extractor') or (lambda: {})
|
||||
extra = post_extractor().items()
|
||||
info_dict.update(extra)
|
||||
info_dict.pop('__post_extractor', None)
|
||||
|
||||
original_infodict = info_dict.get('__original_infodict') or {}
|
||||
original_infodict.update(extra)
|
||||
original_infodict.pop('__post_extractor', None)
|
||||
|
||||
actual_post_extract(info_dict or {})
|
||||
def run_all_pps(self, key, info, *, additional_pps=None):
|
||||
self._forceprint(key, info)
|
||||
for pp in (additional_pps or []) + self._pps[key]:
|
||||
info = self.run_pp(pp, info)
|
||||
return info
|
||||
|
||||
def pre_process(self, ie_info, key='pre_process', files_to_move=None):
|
||||
info = dict(ie_info)
|
||||
info['__files_to_move'] = files_to_move or {}
|
||||
for pp in self._pps[key]:
|
||||
info = self.run_pp(pp, info)
|
||||
info = self.run_all_pps(key, info)
|
||||
return info, info.pop('__files_to_move', None)
|
||||
|
||||
def post_process(self, filename, ie_info, files_to_move=None):
|
||||
def post_process(self, filename, info, files_to_move=None):
|
||||
"""Run all the postprocessors on the given file."""
|
||||
info = dict(ie_info)
|
||||
info['filepath'] = filename
|
||||
info['__files_to_move'] = files_to_move or {}
|
||||
|
||||
for pp in ie_info.get('__postprocessors', []) + self._pps['post_process']:
|
||||
info = self.run_pp(pp, info)
|
||||
info = self.run_all_pps('post_process', info, additional_pps=info.get('__postprocessors'))
|
||||
info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
|
||||
del info['__files_to_move']
|
||||
for pp in self._pps['after_move']:
|
||||
info = self.run_pp(pp, info)
|
||||
return info
|
||||
return self.run_all_pps('after_move', info)
|
||||
|
||||
def _make_archive_id(self, info_dict):
|
||||
video_id = info_dict.get('id')
|
||||
@@ -3276,6 +3375,7 @@ class YoutubeDL(object):
|
||||
return
|
||||
vid_id = self._make_archive_id(info_dict)
|
||||
assert vid_id
|
||||
self.write_debug(f'Adding to archive: {vid_id}')
|
||||
with locked_file(fn, 'a', encoding='utf-8') as archive_file:
|
||||
archive_file.write(vid_id + '\n')
|
||||
self.archive.add(vid_id)
|
||||
@@ -3294,6 +3394,11 @@ class YoutubeDL(object):
|
||||
return '%dx?' % format['width']
|
||||
return default
|
||||
|
||||
def _list_format_headers(self, *headers):
|
||||
if self.params.get('listformats_table', True) is not False:
|
||||
return [self._format_screen(header, self.Styles.HEADERS) for header in headers]
|
||||
return headers
|
||||
|
||||
def _format_note(self, fdict):
|
||||
res = ''
|
||||
if fdict.get('ext') in ['f4f', 'f4m']:
|
||||
@@ -3354,102 +3459,97 @@ class YoutubeDL(object):
|
||||
res += '~' + format_bytes(fdict['filesize_approx'])
|
||||
return res
|
||||
|
||||
def _list_format_headers(self, *headers):
|
||||
if self.params.get('listformats_table', True) is not False:
|
||||
return [self._format_screen(header, self.Styles.HEADERS) for header in headers]
|
||||
return headers
|
||||
|
||||
def list_formats(self, info_dict):
|
||||
def render_formats_table(self, info_dict):
|
||||
if not info_dict.get('formats') and not info_dict.get('url'):
|
||||
self.to_screen('%s has no formats' % info_dict['id'])
|
||||
return
|
||||
self.to_screen('[info] Available formats for %s:' % info_dict['id'])
|
||||
return None
|
||||
|
||||
formats = info_dict.get('formats', [info_dict])
|
||||
new_format = self.params.get('listformats_table', True) is not False
|
||||
if new_format:
|
||||
delim = self._format_screen('\u2502', self.Styles.DELIM, '|', test_encoding=True)
|
||||
table = [
|
||||
[
|
||||
self._format_screen(format_field(f, 'format_id'), self.Styles.ID),
|
||||
format_field(f, 'ext'),
|
||||
format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
|
||||
format_field(f, 'fps', '\t%d'),
|
||||
format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
|
||||
delim,
|
||||
format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes),
|
||||
format_field(f, 'tbr', '\t%dk'),
|
||||
shorten_protocol_name(f.get('protocol', '')),
|
||||
delim,
|
||||
format_field(f, 'vcodec', default='unknown').replace(
|
||||
'none',
|
||||
'images' if f.get('acodec') == 'none'
|
||||
else self._format_screen('audio only', self.Styles.SUPPRESS)),
|
||||
format_field(f, 'vbr', '\t%dk'),
|
||||
format_field(f, 'acodec', default='unknown').replace(
|
||||
'none',
|
||||
'' if f.get('vcodec') == 'none'
|
||||
else self._format_screen('video only', self.Styles.SUPPRESS)),
|
||||
format_field(f, 'abr', '\t%dk'),
|
||||
format_field(f, 'asr', '\t%dHz'),
|
||||
join_nonempty(
|
||||
self._format_screen('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,
|
||||
format_field(f, 'language', '[%s]'),
|
||||
join_nonempty(
|
||||
format_field(f, 'format_note'),
|
||||
format_field(f, 'container', ignore=(None, f.get('ext'))),
|
||||
delim=', '),
|
||||
delim=' '),
|
||||
] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
|
||||
header_line = self._list_format_headers(
|
||||
'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', delim, '\tFILESIZE', '\tTBR', 'PROTO',
|
||||
delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
|
||||
else:
|
||||
if not self.params.get('listformats_table', True) is not False:
|
||||
table = [
|
||||
[
|
||||
format_field(f, 'format_id'),
|
||||
format_field(f, 'ext'),
|
||||
self.format_resolution(f),
|
||||
self._format_note(f)]
|
||||
for f in formats
|
||||
if f.get('preference') is None or f['preference'] >= -1000]
|
||||
header_line = ['format code', 'extension', 'resolution', 'note']
|
||||
self._format_note(f)
|
||||
] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
|
||||
return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1)
|
||||
|
||||
self.to_stdout(render_table(
|
||||
header_line, table,
|
||||
extra_gap=(0 if new_format else 1),
|
||||
hide_empty=new_format,
|
||||
delim=new_format and self._format_screen('\u2500', self.Styles.DELIM, '-', test_encoding=True)))
|
||||
delim = self._format_screen('\u2502', self.Styles.DELIM, '|', test_encoding=True)
|
||||
table = [
|
||||
[
|
||||
self._format_screen(format_field(f, 'format_id'), self.Styles.ID),
|
||||
format_field(f, 'ext'),
|
||||
format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
|
||||
format_field(f, 'fps', '\t%d'),
|
||||
format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
|
||||
delim,
|
||||
format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes),
|
||||
format_field(f, 'tbr', '\t%dk'),
|
||||
shorten_protocol_name(f.get('protocol', '')),
|
||||
delim,
|
||||
format_field(f, 'vcodec', default='unknown').replace(
|
||||
'none', 'images' if f.get('acodec') == 'none'
|
||||
else self._format_screen('audio only', self.Styles.SUPPRESS)),
|
||||
format_field(f, 'vbr', '\t%dk'),
|
||||
format_field(f, 'acodec', default='unknown').replace(
|
||||
'none', '' if f.get('vcodec') == 'none'
|
||||
else self._format_screen('video only', self.Styles.SUPPRESS)),
|
||||
format_field(f, 'abr', '\t%dk'),
|
||||
format_field(f, 'asr', '\t%dHz'),
|
||||
join_nonempty(
|
||||
self._format_screen('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,
|
||||
format_field(f, 'language', '[%s]'),
|
||||
join_nonempty(format_field(f, 'format_note'),
|
||||
format_field(f, 'container', ignore=(None, f.get('ext'))),
|
||||
delim=', '),
|
||||
delim=' '),
|
||||
] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
|
||||
header_line = self._list_format_headers(
|
||||
'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', delim, '\tFILESIZE', '\tTBR', 'PROTO',
|
||||
delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
|
||||
|
||||
def list_thumbnails(self, info_dict):
|
||||
thumbnails = list(info_dict.get('thumbnails'))
|
||||
return render_table(
|
||||
header_line, table, hide_empty=True,
|
||||
delim=self._format_screen('\u2500', self.Styles.DELIM, '-', test_encoding=True))
|
||||
|
||||
def render_thumbnails_table(self, info_dict):
|
||||
thumbnails = list(info_dict.get('thumbnails') or [])
|
||||
if not thumbnails:
|
||||
self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
|
||||
return
|
||||
|
||||
self.to_screen(
|
||||
'[info] Thumbnails for %s:' % info_dict['id'])
|
||||
self.to_stdout(render_table(
|
||||
return None
|
||||
return render_table(
|
||||
self._list_format_headers('ID', 'Width', 'Height', 'URL'),
|
||||
[[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
|
||||
|
||||
def list_subtitles(self, video_id, subtitles, name='subtitles'):
|
||||
if not subtitles:
|
||||
self.to_screen('%s has no %s' % (video_id, name))
|
||||
return
|
||||
self.to_screen(
|
||||
'Available %s for %s:' % (name, video_id))
|
||||
[[t.get('id'), t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails])
|
||||
|
||||
def render_subtitles_table(self, video_id, subtitles):
|
||||
def _row(lang, formats):
|
||||
exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
|
||||
if len(set(names)) == 1:
|
||||
names = [] if names[0] == 'unknown' else names[:1]
|
||||
return [lang, ', '.join(names), ', '.join(exts)]
|
||||
|
||||
self.to_stdout(render_table(
|
||||
if not subtitles:
|
||||
return None
|
||||
return render_table(
|
||||
self._list_format_headers('Language', 'Name', 'Formats'),
|
||||
[_row(lang, formats) for lang, formats in subtitles.items()],
|
||||
hide_empty=True))
|
||||
hide_empty=True)
|
||||
|
||||
def __list_table(self, video_id, name, func, *args):
|
||||
table = func(*args)
|
||||
if not table:
|
||||
self.to_screen(f'{video_id} has no {name}')
|
||||
return
|
||||
self.to_screen(f'[info] Available {name} for {video_id}:')
|
||||
self.to_stdout(table)
|
||||
|
||||
def list_formats(self, info_dict):
|
||||
self.__list_table(info_dict['id'], 'formats', self.render_formats_table, info_dict)
|
||||
|
||||
def list_thumbnails(self, info_dict):
|
||||
self.__list_table(info_dict['id'], 'thumbnails', self.render_thumbnails_table, info_dict)
|
||||
|
||||
def list_subtitles(self, video_id, subtitles, name='subtitles'):
|
||||
self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles)
|
||||
|
||||
def urlopen(self, req):
|
||||
""" Start an HTTP download """
|
||||
@@ -3698,10 +3798,11 @@ class YoutubeDL(object):
|
||||
sub_format = sub_info['ext']
|
||||
sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
|
||||
sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
|
||||
if not self.params.get('overwrites', True) and os.path.exists(sub_filename):
|
||||
existing_sub = self.existing_file((sub_filename_final, sub_filename))
|
||||
if existing_sub:
|
||||
self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
|
||||
sub_info['filepath'] = sub_filename
|
||||
ret.append((sub_filename, sub_filename_final))
|
||||
sub_info['filepath'] = existing_sub
|
||||
ret.append((existing_sub, sub_filename_final))
|
||||
continue
|
||||
|
||||
self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
|
||||
@@ -3724,9 +3825,10 @@ class YoutubeDL(object):
|
||||
self.dl(sub_filename, sub_copy, subtitle=True)
|
||||
sub_info['filepath'] = sub_filename
|
||||
ret.append((sub_filename, sub_filename_final))
|
||||
except (ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
|
||||
except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
|
||||
if self.params.get('ignoreerrors') is not True: # False or 'only_download'
|
||||
raise DownloadError(f'Unable to download video subtitles for {sub_lang!r}: {err}', err)
|
||||
self.report_warning(f'Unable to download video subtitles for {sub_lang!r}: {err}')
|
||||
continue
|
||||
return ret
|
||||
|
||||
def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
|
||||
@@ -3749,11 +3851,12 @@ class YoutubeDL(object):
|
||||
thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
|
||||
thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
|
||||
|
||||
if not self.params.get('overwrites', True) and os.path.exists(thumb_filename):
|
||||
ret.append((thumb_filename, thumb_filename_final))
|
||||
t['filepath'] = thumb_filename
|
||||
existing_thumb = self.existing_file((thumb_filename_final, thumb_filename))
|
||||
if existing_thumb:
|
||||
self.to_screen('[info] %s is already present' % (
|
||||
thumb_display_id if multiple else f'{label} thumbnail').capitalize())
|
||||
t['filepath'] = existing_thumb
|
||||
ret.append((existing_thumb, thumb_filename_final))
|
||||
else:
|
||||
self.to_screen(f'[info] Downloading {thumb_display_id} ...')
|
||||
try:
|
||||
|
||||
@@ -143,6 +143,8 @@ def _real_main(argv=None):
|
||||
'"-f best" selects the best pre-merged format which is often not the best option',
|
||||
'To let yt-dlp download and merge the best available formats, simply do not pass any format selection',
|
||||
'If you know what you are doing and want only the best pre-merged format, use "-f b" instead to suppress this warning')))
|
||||
if opts.exec_cmd.get('before_dl') and opts.exec_before_dl_cmd:
|
||||
parser.error('using "--exec-before-download" conflicts with "--exec before_dl:"')
|
||||
if opts.usenetrc and (opts.username is not None or opts.password is not None):
|
||||
parser.error('using .netrc conflicts with giving username/password')
|
||||
if opts.password is not None and opts.username is None:
|
||||
@@ -333,6 +335,9 @@ def _real_main(argv=None):
|
||||
if _video_multistreams_set is False and _audio_multistreams_set is False:
|
||||
_unused_compat_opt('multistreams')
|
||||
outtmpl_default = opts.outtmpl.get('default')
|
||||
if outtmpl_default == '':
|
||||
outtmpl_default, opts.skip_download = None, True
|
||||
del opts.outtmpl['default']
|
||||
if opts.useid:
|
||||
if outtmpl_default is None:
|
||||
outtmpl_default = opts.outtmpl['default'] = '%(id)s.%(ext)s'
|
||||
@@ -351,9 +356,13 @@ def _real_main(argv=None):
|
||||
|
||||
for k, tmpl in opts.outtmpl.items():
|
||||
validate_outtmpl(tmpl, f'{k} output template')
|
||||
opts.forceprint = opts.forceprint or []
|
||||
for tmpl in opts.forceprint or []:
|
||||
validate_outtmpl(tmpl, 'print template')
|
||||
for type_, tmpl_list in opts.forceprint.items():
|
||||
for tmpl in tmpl_list:
|
||||
validate_outtmpl(tmpl, f'{type_} print template')
|
||||
for type_, tmpl_list in opts.print_to_file.items():
|
||||
for tmpl, file in tmpl_list:
|
||||
validate_outtmpl(tmpl, f'{type_} print-to-file template')
|
||||
validate_outtmpl(file, f'{type_} print-to-file filename')
|
||||
validate_outtmpl(opts.sponsorblock_chapter_title, 'SponsorBlock chapter title')
|
||||
for k, tmpl in opts.progress_template.items():
|
||||
k = f'{k[:-6]} console title' if '-title' in k else f'{k} progress'
|
||||
@@ -395,7 +404,10 @@ def _real_main(argv=None):
|
||||
opts.parse_metadata.append('title:%s' % opts.metafromtitle)
|
||||
opts.parse_metadata = list(itertools.chain(*map(metadataparser_actions, opts.parse_metadata)))
|
||||
|
||||
any_getting = opts.forceprint or opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.getduration or opts.dumpjson or opts.dump_single_json
|
||||
any_getting = (any(opts.forceprint.values()) or opts.dumpjson or opts.dump_single_json
|
||||
or opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail
|
||||
or opts.getdescription or opts.getfilename or opts.getformat or opts.getduration)
|
||||
|
||||
any_printing = opts.print_json
|
||||
download_archive_fn = expand_path(opts.download_archive) if opts.download_archive is not None else opts.download_archive
|
||||
|
||||
@@ -486,13 +498,6 @@ def _real_main(argv=None):
|
||||
# Run this before the actual video download
|
||||
'when': 'before_dl'
|
||||
})
|
||||
# Must be after all other before_dl
|
||||
if opts.exec_before_dl_cmd:
|
||||
postprocessors.append({
|
||||
'key': 'Exec',
|
||||
'exec_cmd': opts.exec_before_dl_cmd,
|
||||
'when': 'before_dl'
|
||||
})
|
||||
if opts.extractaudio:
|
||||
postprocessors.append({
|
||||
'key': 'FFmpegExtractAudio',
|
||||
@@ -593,13 +598,21 @@ def _real_main(argv=None):
|
||||
# XAttrMetadataPP should be run after post-processors that may change file contents
|
||||
if opts.xattrs:
|
||||
postprocessors.append({'key': 'XAttrMetadata'})
|
||||
# Exec must be the last PP
|
||||
if opts.exec_cmd:
|
||||
if opts.concat_playlist != 'never':
|
||||
postprocessors.append({
|
||||
'key': 'FFmpegConcat',
|
||||
'only_multi_video': opts.concat_playlist != 'always',
|
||||
'when': 'playlist',
|
||||
})
|
||||
# Exec must be the last PP of each category
|
||||
if opts.exec_before_dl_cmd:
|
||||
opts.exec_cmd.setdefault('before_dl', opts.exec_before_dl_cmd)
|
||||
for when, exec_cmd in opts.exec_cmd.items():
|
||||
postprocessors.append({
|
||||
'key': 'Exec',
|
||||
'exec_cmd': opts.exec_cmd,
|
||||
'exec_cmd': exec_cmd,
|
||||
# Run this only after the files have been moved to their final locations
|
||||
'when': 'after_move'
|
||||
'when': when,
|
||||
})
|
||||
|
||||
def report_args_compat(arg, name):
|
||||
@@ -657,6 +670,7 @@ def _real_main(argv=None):
|
||||
'forcefilename': opts.getfilename,
|
||||
'forceformat': opts.getformat,
|
||||
'forceprint': opts.forceprint,
|
||||
'print_to_file': opts.print_to_file,
|
||||
'forcejson': opts.dumpjson or opts.print_json,
|
||||
'dump_single_json': opts.dump_single_json,
|
||||
'force_write_download_archive': opts.force_write_download_archive,
|
||||
@@ -750,6 +764,7 @@ def _real_main(argv=None):
|
||||
'skip_playlist_after_errors': opts.skip_playlist_after_errors,
|
||||
'cookiefile': opts.cookiefile,
|
||||
'cookiesfrombrowser': opts.cookiesfrombrowser,
|
||||
'legacyserverconnect': opts.legacy_server_connect,
|
||||
'nocheckcertificate': opts.no_check_certificate,
|
||||
'prefer_insecure': opts.prefer_insecure,
|
||||
'proxy': opts.proxy,
|
||||
|
||||
@@ -2,8 +2,15 @@ from __future__ import unicode_literals
|
||||
|
||||
from math import ceil
|
||||
|
||||
from .compat import compat_b64decode, compat_pycrypto_AES
|
||||
from .utils import bytes_to_intlist, intlist_to_bytes
|
||||
from .compat import (
|
||||
compat_b64decode,
|
||||
compat_ord,
|
||||
compat_pycrypto_AES,
|
||||
)
|
||||
from .utils import (
|
||||
bytes_to_intlist,
|
||||
intlist_to_bytes,
|
||||
)
|
||||
|
||||
|
||||
if compat_pycrypto_AES:
|
||||
@@ -25,6 +32,10 @@ else:
|
||||
return intlist_to_bytes(aes_gcm_decrypt_and_verify(*map(bytes_to_intlist, (data, key, tag, nonce))))
|
||||
|
||||
|
||||
def unpad_pkcs7(data):
|
||||
return data[:-compat_ord(data[-1])]
|
||||
|
||||
|
||||
BLOCK_SIZE_BYTES = 16
|
||||
|
||||
|
||||
@@ -506,5 +517,6 @@ __all__ = [
|
||||
'aes_encrypt',
|
||||
'aes_gcm_decrypt_and_verify',
|
||||
'aes_gcm_decrypt_and_verify_bytes',
|
||||
'key_expansion'
|
||||
'key_expansion',
|
||||
'unpad_pkcs7',
|
||||
]
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
|
||||
import asyncio
|
||||
import base64
|
||||
import collections
|
||||
import ctypes
|
||||
import getpass
|
||||
import html
|
||||
@@ -180,14 +181,17 @@ def windows_enable_vt_mode(): # TODO: Do this the proper way https://bugs.pytho
|
||||
|
||||
compat_basestring = str
|
||||
compat_chr = chr
|
||||
compat_filter = filter
|
||||
compat_input = input
|
||||
compat_integer_types = (int, )
|
||||
compat_kwargs = lambda kwargs: kwargs
|
||||
compat_map = map
|
||||
compat_numeric_types = (int, float, complex)
|
||||
compat_str = str
|
||||
compat_xpath = lambda xpath: xpath
|
||||
compat_zip = zip
|
||||
|
||||
compat_collections_abc = collections.abc
|
||||
compat_HTMLParser = html.parser.HTMLParser
|
||||
compat_HTTPError = urllib.error.HTTPError
|
||||
compat_Struct = struct.Struct
|
||||
@@ -245,6 +249,7 @@ __all__ = [
|
||||
'compat_b64decode',
|
||||
'compat_basestring',
|
||||
'compat_chr',
|
||||
'compat_collections_abc',
|
||||
'compat_cookiejar',
|
||||
'compat_cookiejar_Cookie',
|
||||
'compat_cookies',
|
||||
@@ -254,6 +259,7 @@ __all__ = [
|
||||
'compat_etree_fromstring',
|
||||
'compat_etree_register_namespace',
|
||||
'compat_expanduser',
|
||||
'compat_filter',
|
||||
'compat_get_terminal_size',
|
||||
'compat_getenv',
|
||||
'compat_getpass',
|
||||
@@ -265,6 +271,7 @@ __all__ = [
|
||||
'compat_integer_types',
|
||||
'compat_itertools_count',
|
||||
'compat_kwargs',
|
||||
'compat_map',
|
||||
'compat_numeric_types',
|
||||
'compat_ord',
|
||||
'compat_os_name',
|
||||
|
||||
@@ -11,7 +11,11 @@ from datetime import datetime, timedelta, timezone
|
||||
from enum import Enum, auto
|
||||
from hashlib import pbkdf2_hmac
|
||||
|
||||
from .aes import aes_cbc_decrypt_bytes, aes_gcm_decrypt_and_verify_bytes
|
||||
from .aes import (
|
||||
aes_cbc_decrypt_bytes,
|
||||
aes_gcm_decrypt_and_verify_bytes,
|
||||
unpad_pkcs7,
|
||||
)
|
||||
from .compat import (
|
||||
compat_b64decode,
|
||||
compat_cookiejar_Cookie,
|
||||
@@ -669,8 +673,7 @@ def _get_linux_desktop_environment(env):
|
||||
return _LinuxDesktopEnvironment.GNOME
|
||||
elif 'KDE_FULL_SESSION' in env:
|
||||
return _LinuxDesktopEnvironment.KDE
|
||||
else:
|
||||
return _LinuxDesktopEnvironment.OTHER
|
||||
return _LinuxDesktopEnvironment.OTHER
|
||||
|
||||
|
||||
def _choose_linux_keyring(logger):
|
||||
@@ -790,7 +793,7 @@ def _get_linux_keyring_password(browser_keyring_name, keyring, logger):
|
||||
# Chromium supports a flag: --password-store=<basic|gnome|kwallet> so the automatic detection
|
||||
# will not be sufficient in all cases.
|
||||
|
||||
keyring = _LinuxKeyring[keyring] or _choose_linux_keyring(logger)
|
||||
keyring = _LinuxKeyring[keyring] if keyring else _choose_linux_keyring(logger)
|
||||
logger.debug(f'Chosen keyring: {keyring.name}')
|
||||
|
||||
if keyring == _LinuxKeyring.KWALLET:
|
||||
@@ -847,10 +850,9 @@ def pbkdf2_sha1(password, salt, iterations, key_length):
|
||||
|
||||
|
||||
def _decrypt_aes_cbc(ciphertext, key, logger, initialization_vector=b' ' * 16):
|
||||
plaintext = aes_cbc_decrypt_bytes(ciphertext, key, initialization_vector)
|
||||
padding_length = plaintext[-1]
|
||||
plaintext = unpad_pkcs7(aes_cbc_decrypt_bytes(ciphertext, key, initialization_vector))
|
||||
try:
|
||||
return plaintext[:-padding_length].decode('utf-8')
|
||||
return plaintext.decode('utf-8')
|
||||
except UnicodeDecodeError:
|
||||
logger.warning('failed to decrypt cookie (AES-CBC) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
|
||||
return None
|
||||
|
||||
@@ -17,11 +17,13 @@ from ..utils import (
|
||||
cli_valueless_option,
|
||||
cli_bool_option,
|
||||
_configuration_args,
|
||||
determine_ext,
|
||||
encodeFilename,
|
||||
encodeArgument,
|
||||
handle_youtubedl_headers,
|
||||
check_executable,
|
||||
Popen,
|
||||
remove_end,
|
||||
)
|
||||
|
||||
|
||||
@@ -304,7 +306,7 @@ class HttpieFD(ExternalFD):
|
||||
|
||||
@classmethod
|
||||
def available(cls, path=None):
|
||||
return ExternalFD.available(cls, path or 'http')
|
||||
return super().available(path or 'http')
|
||||
|
||||
def _make_cmd(self, tmpfilename, info_dict):
|
||||
cmd = ['http', '--download', '--output', tmpfilename, info_dict['url']]
|
||||
@@ -463,6 +465,15 @@ class FFmpegFD(ExternalFD):
|
||||
args += ['-f', 'flv']
|
||||
elif ext == 'mp4' and tmpfilename == '-':
|
||||
args += ['-f', 'mpegts']
|
||||
elif ext == 'unknown_video':
|
||||
ext = determine_ext(remove_end(tmpfilename, '.part'))
|
||||
if ext == 'unknown_video':
|
||||
self.report_warning(
|
||||
'The video format is unknown and cannot be downloaded by ffmpeg. '
|
||||
'Explicitly set the extension in the filename to attempt download in that format')
|
||||
else:
|
||||
self.report_warning(f'The video format is unknown. Trying to download as {ext} according to the filename')
|
||||
args += ['-f', EXT_TO_OUT_FORMATS.get(ext, ext)]
|
||||
else:
|
||||
args += ['-f', EXT_TO_OUT_FORMATS.get(ext, ext)]
|
||||
|
||||
|
||||
@@ -14,7 +14,7 @@ except ImportError:
|
||||
|
||||
from .common import FileDownloader
|
||||
from .http import HttpFD
|
||||
from ..aes import aes_cbc_decrypt_bytes
|
||||
from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7
|
||||
from ..compat import (
|
||||
compat_os_name,
|
||||
compat_urllib_error,
|
||||
@@ -366,8 +366,7 @@ class FragmentFD(FileDownloader):
|
||||
# not what it decrypts to.
|
||||
if self.params.get('test', False):
|
||||
return frag_content
|
||||
decrypted_data = aes_cbc_decrypt_bytes(frag_content, decrypt_info['KEY'], iv)
|
||||
return decrypted_data[:-decrypted_data[-1]]
|
||||
return unpad_pkcs7(aes_cbc_decrypt_bytes(frag_content, decrypt_info['KEY'], iv))
|
||||
|
||||
return decrypt_fragment
|
||||
|
||||
|
||||
@@ -300,11 +300,10 @@ class ABCIViewShowSeriesIE(InfoExtractor):
|
||||
unescapeHTML(webpage_data).encode('utf-8').decode('unicode_escape'), show_id)
|
||||
video_data = video_data['route']['pageData']['_embedded']
|
||||
|
||||
if self.get_param('noplaylist') and 'highlightVideo' in video_data:
|
||||
self.to_screen('Downloading just the highlight video because of --no-playlist')
|
||||
return self.url_result(video_data['highlightVideo']['shareUrl'], ie=ABCIViewIE.ie_key())
|
||||
highlight = try_get(video_data, lambda x: x['highlightVideo']['shareUrl'])
|
||||
if not self._yes_playlist(show_id, bool(highlight), video_label='highlight video'):
|
||||
return self.url_result(highlight, ie=ABCIViewIE.ie_key())
|
||||
|
||||
self.to_screen(f'Downloading playlist {show_id} - add --no-playlist to just download the highlight video')
|
||||
series = video_data['selectedSeries']
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
|
||||
@@ -8,11 +8,10 @@ import os
|
||||
import random
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..aes import aes_cbc_decrypt
|
||||
from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7
|
||||
from ..compat import (
|
||||
compat_HTTPError,
|
||||
compat_b64decode,
|
||||
compat_ord,
|
||||
)
|
||||
from ..utils import (
|
||||
ass_subtitles_timecode,
|
||||
@@ -84,14 +83,11 @@ class ADNIE(InfoExtractor):
|
||||
return None
|
||||
|
||||
# http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
|
||||
dec_subtitles = intlist_to_bytes(aes_cbc_decrypt(
|
||||
bytes_to_intlist(compat_b64decode(enc_subtitles[24:])),
|
||||
bytes_to_intlist(binascii.unhexlify(self._K + 'ab9f52f5baae7c72')),
|
||||
bytes_to_intlist(compat_b64decode(enc_subtitles[:24]))
|
||||
))
|
||||
subtitles_json = self._parse_json(
|
||||
dec_subtitles[:-compat_ord(dec_subtitles[-1])].decode(),
|
||||
None, fatal=False)
|
||||
dec_subtitles = unpad_pkcs7(aes_cbc_decrypt_bytes(
|
||||
compat_b64decode(enc_subtitles[24:]),
|
||||
binascii.unhexlify(self._K + 'ab9f52f5baae7c72'),
|
||||
compat_b64decode(enc_subtitles[:24])))
|
||||
subtitles_json = self._parse_json(dec_subtitles.decode(), None, fatal=False)
|
||||
if not subtitles_json:
|
||||
return None
|
||||
|
||||
|
||||
@@ -10,7 +10,11 @@ from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
qualities,
|
||||
traverse_obj,
|
||||
unified_strdate,
|
||||
unified_timestamp,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
xpath_text,
|
||||
@@ -380,3 +384,96 @@ class AfreecaTVIE(InfoExtractor):
|
||||
})
|
||||
|
||||
return info
|
||||
|
||||
|
||||
class AfreecaTVLiveIE(AfreecaTVIE):
|
||||
|
||||
IE_NAME = 'afreecatv:live'
|
||||
_VALID_URL = r'https?://play\.afreeca(?:tv)?\.com/(?P<id>[^/]+)(?:/(?P<bno>\d+))?'
|
||||
_TESTS = [{
|
||||
'url': 'https://play.afreecatv.com/pyh3646/237852185',
|
||||
'info_dict': {
|
||||
'id': '237852185',
|
||||
'ext': 'mp4',
|
||||
'title': '【 우루과이 오늘은 무슨일이? 】',
|
||||
'uploader': '박진우[JINU]',
|
||||
'uploader_id': 'pyh3646',
|
||||
'timestamp': 1640661495,
|
||||
'is_live': True,
|
||||
},
|
||||
'skip': 'Livestream has ended',
|
||||
}, {
|
||||
'url': 'http://play.afreeca.com/pyh3646/237852185',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://play.afreeca.com/pyh3646',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_LIVE_API_URL = 'https://live.afreecatv.com/afreeca/player_live_api.php'
|
||||
|
||||
_QUALITIES = ('sd', 'hd', 'hd2k', 'original')
|
||||
|
||||
def _real_extract(self, url):
|
||||
broadcaster_id, broadcast_no = self._match_valid_url(url).group('id', 'bno')
|
||||
|
||||
info = self._download_json(self._LIVE_API_URL, broadcaster_id, fatal=False,
|
||||
data=urlencode_postdata({'bid': broadcaster_id})) or {}
|
||||
channel_info = info.get('CHANNEL') or {}
|
||||
broadcaster_id = channel_info.get('BJID') or broadcaster_id
|
||||
broadcast_no = channel_info.get('BNO') or broadcast_no
|
||||
if not broadcast_no:
|
||||
raise ExtractorError(f'Unable to extract broadcast number ({broadcaster_id} may not be live)', expected=True)
|
||||
|
||||
formats = []
|
||||
quality_key = qualities(self._QUALITIES)
|
||||
for quality_str in self._QUALITIES:
|
||||
aid_response = self._download_json(
|
||||
self._LIVE_API_URL, broadcast_no, fatal=False,
|
||||
data=urlencode_postdata({
|
||||
'bno': broadcast_no,
|
||||
'stream_type': 'common',
|
||||
'type': 'aid',
|
||||
'quality': quality_str,
|
||||
}),
|
||||
note=f'Downloading access token for {quality_str} stream',
|
||||
errnote=f'Unable to download access token for {quality_str} stream')
|
||||
aid = traverse_obj(aid_response, ('CHANNEL', 'AID'))
|
||||
if not aid:
|
||||
continue
|
||||
|
||||
stream_base_url = channel_info.get('RMD') or 'https://livestream-manager.afreecatv.com'
|
||||
stream_info = self._download_json(
|
||||
f'{stream_base_url}/broad_stream_assign.html', broadcast_no, fatal=False,
|
||||
query={
|
||||
'return_type': channel_info.get('CDN', 'gcp_cdn'),
|
||||
'broad_key': f'{broadcast_no}-common-{quality_str}-hls',
|
||||
},
|
||||
note=f'Downloading metadata for {quality_str} stream',
|
||||
errnote=f'Unable to download metadata for {quality_str} stream') or {}
|
||||
|
||||
if stream_info.get('view_url'):
|
||||
formats.append({
|
||||
'format_id': quality_str,
|
||||
'url': update_url_query(stream_info['view_url'], {'aid': aid}),
|
||||
'ext': 'mp4',
|
||||
'protocol': 'm3u8',
|
||||
'quality': quality_key(quality_str),
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
station_info = self._download_json(
|
||||
'https://st.afreecatv.com/api/get_station_status.php', broadcast_no,
|
||||
query={'szBjId': broadcaster_id}, fatal=False,
|
||||
note='Downloading channel metadata', errnote='Unable to download channel metadata') or {}
|
||||
|
||||
return {
|
||||
'id': broadcast_no,
|
||||
'title': channel_info.get('TITLE') or station_info.get('station_title'),
|
||||
'uploader': channel_info.get('BJNICK') or station_info.get('station_name'),
|
||||
'uploader_id': broadcaster_id,
|
||||
'timestamp': unified_timestamp(station_info.get('broad_start')),
|
||||
'formats': formats,
|
||||
'is_live': True,
|
||||
}
|
||||
|
||||
@@ -33,19 +33,22 @@ class AparatIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _parse_options(self, webpage, video_id, fatal=True):
|
||||
return self._parse_json(self._search_regex(
|
||||
r'options\s*=\s*({.+?})\s*;', webpage, 'options', default='{}'), video_id)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
# Provides more metadata
|
||||
# If available, provides more metadata
|
||||
webpage = self._download_webpage(url, video_id, fatal=False)
|
||||
options = self._parse_options(webpage, video_id, fatal=False)
|
||||
|
||||
if not webpage:
|
||||
if not options:
|
||||
webpage = self._download_webpage(
|
||||
'http://www.aparat.com/video/video/embed/vt/frame/showvideo/yes/videohash/' + video_id,
|
||||
video_id)
|
||||
|
||||
options = self._parse_json(self._search_regex(
|
||||
r'options\s*=\s*({.+?})\s*;', webpage, 'options'), video_id)
|
||||
video_id, 'Downloading embed webpage')
|
||||
options = self._parse_options(webpage, video_id)
|
||||
|
||||
formats = []
|
||||
for sources in (options.get('multiSRC') or []):
|
||||
|
||||
@@ -19,6 +19,7 @@ from ..utils import (
|
||||
get_element_by_id,
|
||||
HEADRequest,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
KNOWN_EXTENSIONS,
|
||||
merge_dicts,
|
||||
mimetype2ext,
|
||||
@@ -64,7 +65,7 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
'description': 'md5:43a603fd6c5b4b90d12a96b921212b9c',
|
||||
'uploader': 'yorkmba99@hotmail.com',
|
||||
'timestamp': 1387699629,
|
||||
'upload_date': "20131222",
|
||||
'upload_date': '20131222',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://archive.org/embed/XD300-23_68HighlightsAResearchCntAugHumanIntellect',
|
||||
@@ -150,8 +151,7 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
|
||||
# Archive.org metadata API doesn't clearly demarcate playlist entries
|
||||
# or subtitle tracks, so we get them from the embeddable player.
|
||||
embed_page = self._download_webpage(
|
||||
'https://archive.org/embed/' + identifier, identifier)
|
||||
embed_page = self._download_webpage(f'https://archive.org/embed/{identifier}', identifier)
|
||||
playlist = self._playlist_data(embed_page)
|
||||
|
||||
entries = {}
|
||||
@@ -166,17 +166,17 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
'thumbnails': [],
|
||||
'artist': p.get('artist'),
|
||||
'track': p.get('title'),
|
||||
'subtitles': {}}
|
||||
'subtitles': {},
|
||||
}
|
||||
|
||||
for track in p.get('tracks', []):
|
||||
if track['kind'] != 'subtitles':
|
||||
continue
|
||||
|
||||
entries[p['orig']][track['label']] = {
|
||||
'url': 'https://archive.org/' + track['file'].lstrip('/')}
|
||||
'url': 'https://archive.org/' + track['file'].lstrip('/')
|
||||
}
|
||||
|
||||
metadata = self._download_json(
|
||||
'http://archive.org/metadata/' + identifier, identifier)
|
||||
metadata = self._download_json('http://archive.org/metadata/' + identifier, identifier)
|
||||
m = metadata['metadata']
|
||||
identifier = m['identifier']
|
||||
|
||||
@@ -189,7 +189,7 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
'license': m.get('licenseurl'),
|
||||
'release_date': unified_strdate(m.get('date')),
|
||||
'timestamp': unified_timestamp(dict_get(m, ['publicdate', 'addeddate'])),
|
||||
'webpage_url': 'https://archive.org/details/' + identifier,
|
||||
'webpage_url': f'https://archive.org/details/{identifier}',
|
||||
'location': m.get('venue'),
|
||||
'release_year': int_or_none(m.get('year'))}
|
||||
|
||||
@@ -207,7 +207,7 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
'discnumber': int_or_none(f.get('disc')),
|
||||
'release_year': int_or_none(f.get('year'))})
|
||||
entry = entries[f['name']]
|
||||
elif f.get('original') in entries:
|
||||
elif traverse_obj(f, 'original', expected_type=str) in entries:
|
||||
entry = entries[f['original']]
|
||||
else:
|
||||
continue
|
||||
@@ -230,13 +230,12 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
'filesize': int_or_none(f.get('size')),
|
||||
'protocol': 'https'})
|
||||
|
||||
# Sort available formats by filesize
|
||||
for entry in entries.values():
|
||||
entry['formats'] = list(sorted(entry['formats'], key=lambda x: x.get('filesize', -1)))
|
||||
self._sort_formats(entry['formats'])
|
||||
|
||||
if len(entries) == 1:
|
||||
# If there's only one item, use it as the main info dict
|
||||
only_video = entries[list(entries.keys())[0]]
|
||||
only_video = next(iter(entries.values()))
|
||||
if entry_id:
|
||||
info = merge_dicts(only_video, info)
|
||||
else:
|
||||
@@ -261,19 +260,19 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
|
||||
class YoutubeWebArchiveIE(InfoExtractor):
|
||||
IE_NAME = 'web.archive:youtube'
|
||||
IE_DESC = 'web.archive.org saved youtube videos'
|
||||
_VALID_URL = r"""(?x)^
|
||||
(?:https?://)?web\.archive\.org/
|
||||
(?:web/)?
|
||||
(?:(?P<date>[0-9]{14})?[0-9A-Za-z_*]*/)? # /web and the version index is optional
|
||||
|
||||
(?:https?(?::|%3[Aa])//)?
|
||||
(?:
|
||||
(?:\w+\.)?youtube\.com(?::(?:80|443))?/watch(?:\.php)?(?:\?|%3[fF])(?:[^\#]+(?:&|%26))?v(?:=|%3[dD]) # Youtube URL
|
||||
|(?:wayback-fakeurl\.archive\.org/yt/) # Or the internal fake url
|
||||
)
|
||||
(?P<id>[0-9A-Za-z_-]{11})(?:%26|\#|&|$)
|
||||
"""
|
||||
IE_DESC = 'web.archive.org saved youtube videos, "ytarchive:" prefix'
|
||||
_VALID_URL = r'''(?x)(?:(?P<prefix>ytarchive:)|
|
||||
(?:https?://)?web\.archive\.org/
|
||||
(?:web/)?(?:(?P<date>[0-9]{14})?[0-9A-Za-z_*]*/)? # /web and the version index is optional
|
||||
(?:https?(?::|%3[Aa])//)?(?:
|
||||
(?:\w+\.)?youtube\.com(?::(?:80|443))?/watch(?:\.php)?(?:\?|%3[fF])(?:[^\#]+(?:&|%26))?v(?:=|%3[dD]) # Youtube URL
|
||||
|(?:wayback-fakeurl\.archive\.org/yt/) # Or the internal fake url
|
||||
)
|
||||
)(?P<id>[0-9A-Za-z_-]{11})
|
||||
(?(prefix)
|
||||
(?::(?P<date2>[0-9]{14}))?$|
|
||||
(?:%26|[#&]|$)
|
||||
)'''
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
@@ -438,7 +437,13 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://web.archive.org/http://www.youtube.com:80/watch?v=-05VVye-ffg',
|
||||
'only_matching': True
|
||||
}
|
||||
}, {
|
||||
'url': 'ytarchive:BaW_jenozKc:20050214000000',
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'ytarchive:BaW_jenozKc',
|
||||
'only_matching': True
|
||||
},
|
||||
]
|
||||
_YT_INITIAL_DATA_RE = r'(?:(?:(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;)|%s)' % YoutubeBaseInfoExtractor._YT_INITIAL_DATA_RE
|
||||
_YT_INITIAL_PLAYER_RESPONSE_RE = r'(?:(?:(?:window\s*\[\s*["\']ytInitialPlayerResponse["\']\s*\]|ytInitialPlayerResponse)\s*=[(\s]*({.+?})[)\s]*;)|%s)' % YoutubeBaseInfoExtractor._YT_INITIAL_PLAYER_RESPONSE_RE
|
||||
@@ -484,7 +489,6 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
page_title, 'title', default='')
|
||||
|
||||
def _extract_metadata(self, video_id, webpage):
|
||||
|
||||
search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None)) if webpage else (lambda x: None))
|
||||
player_response = self._extract_yt_initial_variable(
|
||||
webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE, video_id, 'initial player response') or {}
|
||||
@@ -596,7 +600,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
|
||||
# Prefer the new polymer UI captures as we support extracting more metadata from them
|
||||
# WBM captures seem to all switch to this layout ~July 2020
|
||||
modern_captures = list(filter(lambda x: x >= 20200701000000, all_captures))
|
||||
modern_captures = [x for x in all_captures if x >= 20200701000000]
|
||||
if modern_captures:
|
||||
capture_dates.append(modern_captures[0])
|
||||
capture_dates.append(url_date)
|
||||
@@ -608,11 +612,11 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
|
||||
# Fallbacks if any of the above fail
|
||||
capture_dates.extend([self._OLDEST_CAPTURE_DATE, self._NEWEST_CAPTURE_DATE])
|
||||
return orderedSet(capture_dates)
|
||||
return orderedSet(filter(None, capture_dates))
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
url_date, video_id = self._match_valid_url(url).groups()
|
||||
video_id, url_date, url_date_2 = self._match_valid_url(url).group('id', 'date', 'date2')
|
||||
url_date = url_date or url_date_2
|
||||
|
||||
urlh = None
|
||||
try:
|
||||
@@ -629,11 +633,9 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
raise
|
||||
|
||||
capture_dates = self._get_capture_dates(video_id, int_or_none(url_date))
|
||||
self.write_debug('Captures to try: ' + ', '.join(str(i) for i in capture_dates if i is not None))
|
||||
self.write_debug('Captures to try: ' + join_nonempty(*capture_dates, delim=', '))
|
||||
info = {'id': video_id}
|
||||
for capture in capture_dates:
|
||||
if not capture:
|
||||
continue
|
||||
webpage = self._download_webpage(
|
||||
(self._WAYBACK_BASE_URL + 'http://www.youtube.com/watch?v=%s') % (capture, video_id),
|
||||
video_id=video_id, fatal=False, errnote='unable to download capture webpage (it may not be archived)',
|
||||
@@ -648,7 +650,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
info['thumbnails'] = self._extract_thumbnails(video_id)
|
||||
|
||||
if urlh:
|
||||
url = compat_urllib_parse_unquote(urlh.url)
|
||||
url = compat_urllib_parse_unquote(urlh.geturl())
|
||||
video_file_url_qs = parse_qs(url)
|
||||
# Attempt to recover any ext & format info from playback url & response headers
|
||||
format = {'url': url, 'filesize': int_or_none(urlh.headers.get('x-archive-orig-content-length'))}
|
||||
|
||||
@@ -376,9 +376,24 @@ class ARDIE(InfoExtractor):
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
_SUB_FORMATS = (
|
||||
('./dataTimedText', 'ttml'),
|
||||
('./dataTimedTextNoOffset', 'ttml'),
|
||||
('./dataTimedTextVtt', 'vtt'),
|
||||
)
|
||||
|
||||
subtitles = {}
|
||||
for subsel, subext in _SUB_FORMATS:
|
||||
for node in video_node.findall(subsel):
|
||||
subtitles.setdefault('de', []).append({
|
||||
'url': node.attrib['url'],
|
||||
'ext': subext,
|
||||
})
|
||||
|
||||
return {
|
||||
'id': xpath_text(video_node, './videoId', default=display_id),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'display_id': display_id,
|
||||
'title': video_node.find('./title').text,
|
||||
'duration': parse_duration(video_node.find('./duration').text),
|
||||
|
||||
@@ -7,6 +7,7 @@ from ..compat import (
|
||||
compat_urllib_parse_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
format_field,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
@@ -92,7 +93,7 @@ class ArnesIE(InfoExtractor):
|
||||
'timestamp': parse_iso8601(video.get('creationTime')),
|
||||
'channel': channel.get('name'),
|
||||
'channel_id': channel_id,
|
||||
'channel_url': self._BASE_URL + '/?channel=' + channel_id if channel_id else None,
|
||||
'channel_url': format_field(channel_id, template=f'{self._BASE_URL}/?channel=%s'),
|
||||
'duration': float_or_none(video.get('duration'), 1000),
|
||||
'view_count': int_or_none(video.get('views')),
|
||||
'tags': video.get('hashtags'),
|
||||
|
||||
@@ -9,6 +9,7 @@ from ..compat import (
|
||||
compat_str,
|
||||
)
|
||||
from ..utils import (
|
||||
format_field,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
smuggle_url,
|
||||
@@ -43,7 +44,7 @@ class AWAANBaseIE(InfoExtractor):
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': video_data.get('description_en') or video_data.get('description_ar'),
|
||||
'thumbnail': 'http://admin.mangomolo.com/analytics/%s' % img if img else None,
|
||||
'thumbnail': format_field(img, template='http://admin.mangomolo.com/analytics/%s'),
|
||||
'duration': int_or_none(video_data.get('duration')),
|
||||
'timestamp': parse_iso8601(video_data.get('create_time'), ' '),
|
||||
'is_live': is_live,
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
# coding: utf-8
|
||||
|
||||
import base64
|
||||
import hashlib
|
||||
import itertools
|
||||
import functools
|
||||
@@ -16,9 +17,9 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
mimetype2ext,
|
||||
parse_iso8601,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
parse_count,
|
||||
smuggle_url,
|
||||
srt_subtitles_timecode,
|
||||
@@ -51,16 +52,14 @@ class BiliBiliIE(InfoExtractor):
|
||||
'url': 'http://www.bilibili.com/video/av1074402/',
|
||||
'md5': '5f7d29e1a2872f3df0cf76b1f87d3788',
|
||||
'info_dict': {
|
||||
'id': '1074402',
|
||||
'ext': 'flv',
|
||||
'id': '1074402_part1',
|
||||
'ext': 'mp4',
|
||||
'title': '【金坷垃】金泡沫',
|
||||
'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923',
|
||||
'duration': 308.067,
|
||||
'timestamp': 1398012678,
|
||||
'upload_date': '20140420',
|
||||
'thumbnail': r're:^https?://.+\.jpg',
|
||||
'uploader': '菊子桑',
|
||||
'uploader_id': '156160',
|
||||
'uploader': '菊子桑',
|
||||
'upload_date': '20140420',
|
||||
'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923',
|
||||
'timestamp': 1398012678,
|
||||
},
|
||||
}, {
|
||||
# Tested in BiliBiliBangumiIE
|
||||
@@ -74,49 +73,27 @@ class BiliBiliIE(InfoExtractor):
|
||||
'url': 'http://bangumi.bilibili.com/anime/5802/play#100643',
|
||||
'md5': '3f721ad1e75030cc06faf73587cfec57',
|
||||
'info_dict': {
|
||||
'id': '100643',
|
||||
'id': '100643_part1',
|
||||
'ext': 'mp4',
|
||||
'title': 'CHAOS;CHILD',
|
||||
'description': '如果你是神明,并且能够让妄想成为现实。那你会进行怎么样的妄想?是淫靡的世界?独裁社会?毁灭性的制裁?还是……2015年,涩谷。从6年前发生的大灾害“涩谷地震”之后复兴了的这个街区里新设立的私立高中...',
|
||||
},
|
||||
'skip': 'Geo-restricted to China',
|
||||
}, {
|
||||
# Title with double quotes
|
||||
'url': 'http://www.bilibili.com/video/av8903802/',
|
||||
'info_dict': {
|
||||
'id': '8903802',
|
||||
'id': '8903802_part1',
|
||||
'ext': 'mp4',
|
||||
'title': '阿滴英文|英文歌分享#6 "Closer',
|
||||
'upload_date': '20170301',
|
||||
'description': '滴妹今天唱Closer給你聽! 有史以来,被推最多次也是最久的歌曲,其实歌词跟我原本想像差蛮多的,不过还是好听! 微博@阿滴英文',
|
||||
'timestamp': 1488382634,
|
||||
'uploader_id': '65880958',
|
||||
'uploader': '阿滴英文',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': '8903802_part1',
|
||||
'ext': 'flv',
|
||||
'title': '阿滴英文|英文歌分享#6 "Closer',
|
||||
'description': 'md5:3b1b9e25b78da4ef87e9b548b88ee76a',
|
||||
'uploader': '阿滴英文',
|
||||
'uploader_id': '65880958',
|
||||
'timestamp': 1488382634,
|
||||
'upload_date': '20170301',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': '8903802_part2',
|
||||
'ext': 'flv',
|
||||
'title': '阿滴英文|英文歌分享#6 "Closer',
|
||||
'description': 'md5:3b1b9e25b78da4ef87e9b548b88ee76a',
|
||||
'uploader': '阿滴英文',
|
||||
'uploader_id': '65880958',
|
||||
'timestamp': 1488382634,
|
||||
'upload_date': '20170301',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
}, {
|
||||
# new BV video id format
|
||||
'url': 'https://www.bilibili.com/video/BV1JE411F741',
|
||||
@@ -151,6 +128,7 @@ class BiliBiliIE(InfoExtractor):
|
||||
av_id, bv_id = self._get_video_id_set(video_id, mobj.group('id_bv') is not None)
|
||||
video_id = av_id
|
||||
|
||||
info = {}
|
||||
anime_id = mobj.group('anime_id')
|
||||
page_id = mobj.group('page')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
@@ -202,35 +180,48 @@ class BiliBiliIE(InfoExtractor):
|
||||
}
|
||||
headers.update(self.geo_verification_headers())
|
||||
|
||||
video_info = self._parse_json(
|
||||
self._search_regex(r'window.__playinfo__\s*=\s*({.+?})</script>', webpage, 'video info', default=None) or '{}',
|
||||
video_id, fatal=False)
|
||||
video_info = video_info.get('data') or {}
|
||||
|
||||
durl = traverse_obj(video_info, ('dash', 'video'))
|
||||
audios = traverse_obj(video_info, ('dash', 'audio')) or []
|
||||
entries = []
|
||||
|
||||
RENDITIONS = ('qn=80&quality=80&type=', 'quality=2&type=mp4')
|
||||
for num, rendition in enumerate(RENDITIONS, start=1):
|
||||
payload = 'appkey=%s&cid=%s&otype=json&%s' % (self._APP_KEY, cid, rendition)
|
||||
sign = hashlib.md5((payload + self._BILIBILI_KEY).encode('utf-8')).hexdigest()
|
||||
|
||||
video_info = self._download_json(
|
||||
'http://interface.bilibili.com/v2/playurl?%s&sign=%s' % (payload, sign),
|
||||
video_id, note='Downloading video info page',
|
||||
headers=headers, fatal=num == len(RENDITIONS))
|
||||
|
||||
if not video_info:
|
||||
continue
|
||||
video_info = self._download_json(
|
||||
'http://interface.bilibili.com/v2/playurl?%s&sign=%s' % (payload, sign),
|
||||
video_id, note='Downloading video info page',
|
||||
headers=headers, fatal=num == len(RENDITIONS))
|
||||
if not video_info:
|
||||
continue
|
||||
|
||||
if 'durl' not in video_info:
|
||||
if not durl and 'durl' not in video_info:
|
||||
if num < len(RENDITIONS):
|
||||
continue
|
||||
self._report_error(video_info)
|
||||
|
||||
for idx, durl in enumerate(video_info['durl']):
|
||||
formats = [{
|
||||
'url': durl['url'],
|
||||
'filesize': int_or_none(durl['size']),
|
||||
}]
|
||||
for backup_url in durl.get('backup_url', []):
|
||||
formats = []
|
||||
for idx, durl in enumerate(durl or video_info['durl']):
|
||||
formats.append({
|
||||
'url': durl.get('baseUrl') or durl.get('base_url') or durl.get('url'),
|
||||
'ext': mimetype2ext(durl.get('mimeType') or durl.get('mime_type')),
|
||||
'fps': int_or_none(durl.get('frameRate') or durl.get('frame_rate')),
|
||||
'width': int_or_none(durl.get('width')),
|
||||
'height': int_or_none(durl.get('height')),
|
||||
'vcodec': durl.get('codecs'),
|
||||
'acodec': 'none' if audios else None,
|
||||
'tbr': float_or_none(durl.get('bandwidth'), scale=1000),
|
||||
'filesize': int_or_none(durl.get('size')),
|
||||
})
|
||||
for backup_url in traverse_obj(durl, 'backup_url', expected_type=list) or []:
|
||||
formats.append({
|
||||
'url': backup_url,
|
||||
# backup URLs have lower priorities
|
||||
'quality': -2 if 'hd.mp4' in backup_url else -3,
|
||||
})
|
||||
|
||||
@@ -238,30 +229,47 @@ class BiliBiliIE(InfoExtractor):
|
||||
a_format.setdefault('http_headers', {}).update({
|
||||
'Referer': url,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
entries.append({
|
||||
'id': '%s_part%s' % (video_id, idx),
|
||||
'duration': float_or_none(durl.get('length'), 1000),
|
||||
'formats': formats,
|
||||
for audio in audios:
|
||||
formats.append({
|
||||
'url': audio.get('baseUrl') or audio.get('base_url') or audio.get('url'),
|
||||
'ext': mimetype2ext(audio.get('mimeType') or audio.get('mime_type')),
|
||||
'fps': int_or_none(audio.get('frameRate') or audio.get('frame_rate')),
|
||||
'width': int_or_none(audio.get('width')),
|
||||
'height': int_or_none(audio.get('height')),
|
||||
'acodec': audio.get('codecs'),
|
||||
'vcodec': 'none',
|
||||
'tbr': float_or_none(audio.get('bandwidth'), scale=1000),
|
||||
'filesize': int_or_none(audio.get('size'))
|
||||
})
|
||||
for backup_url in traverse_obj(audio, 'backup_url', expected_type=list) or []:
|
||||
formats.append({
|
||||
'url': backup_url,
|
||||
# backup URLs have lower priorities
|
||||
'quality': -3,
|
||||
})
|
||||
|
||||
info.update({
|
||||
'id': video_id,
|
||||
'duration': float_or_none(durl.get('length'), 1000),
|
||||
'formats': formats,
|
||||
})
|
||||
break
|
||||
|
||||
title = self._html_search_regex(
|
||||
(r'<h1[^>]+\btitle=(["\'])(?P<title>(?:(?!\1).)+)\1',
|
||||
r'(?s)<h1[^>]*>(?P<title>.+?)</h1>'), webpage, 'title',
|
||||
group='title')
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = self._html_search_regex((
|
||||
r'<h1[^>]+title=(["\'])(?P<content>[^"\']+)',
|
||||
r'(?s)<h1[^>]*>(?P<content>.+?)</h1>',
|
||||
self._meta_regex('title')
|
||||
), webpage, 'title', group='content', fatal=False)
|
||||
|
||||
# Get part title for anthologies
|
||||
if page_id is not None:
|
||||
# TODO: The json is already downloaded by _extract_anthology_entries. Don't redownload for each video
|
||||
part_title = try_get(
|
||||
self._download_json(
|
||||
f'https://api.bilibili.com/x/player/pagelist?bvid={bv_id}&jsonp=jsonp',
|
||||
video_id, note='Extracting videos in anthology'),
|
||||
lambda x: x['data'][int(page_id) - 1]['part'])
|
||||
title = part_title or title
|
||||
# TODO: The json is already downloaded by _extract_anthology_entries. Don't redownload for each video.
|
||||
part_info = traverse_obj(self._download_json(
|
||||
f'https://api.bilibili.com/x/player/pagelist?bvid={bv_id}&jsonp=jsonp',
|
||||
video_id, note='Extracting videos in anthology'), 'data', expected_type=list)
|
||||
title = title if len(part_info) == 1 else traverse_obj(part_info, (int(page_id) - 1, 'part')) or title
|
||||
|
||||
description = self._html_search_meta('description', webpage)
|
||||
timestamp = unified_timestamp(self._html_search_regex(
|
||||
@@ -271,15 +279,15 @@ class BiliBiliIE(InfoExtractor):
|
||||
thumbnail = self._html_search_meta(['og:image', 'thumbnailUrl'], webpage)
|
||||
|
||||
# TODO 'view_count' requires deobfuscating Javascript
|
||||
info = {
|
||||
'id': str(video_id) if page_id is None else '%s_part%s' % (video_id, page_id),
|
||||
info.update({
|
||||
'id': f'{video_id}_part{page_id or 1}',
|
||||
'cid': cid,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'timestamp': timestamp,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': float_or_none(video_info.get('timelength'), scale=1000),
|
||||
}
|
||||
})
|
||||
|
||||
uploader_mobj = re.search(
|
||||
r'<a[^>]+href="(?:https?:)?//space\.bilibili\.com/(?P<id>\d+)"[^>]*>\s*(?P<name>[^<]+?)\s*<',
|
||||
@@ -300,7 +308,7 @@ class BiliBiliIE(InfoExtractor):
|
||||
video_id, fatal=False, note='Downloading tags'), ('data', ..., 'tag_name')),
|
||||
}
|
||||
|
||||
entries[0]['subtitles'] = {
|
||||
info['subtitles'] = {
|
||||
'danmaku': [{
|
||||
'ext': 'xml',
|
||||
'url': f'https://comment.bilibili.com/{cid}.xml',
|
||||
@@ -335,12 +343,10 @@ class BiliBiliIE(InfoExtractor):
|
||||
entry['id'] = '%s_part%d' % (video_id, (idx + 1))
|
||||
|
||||
return {
|
||||
'_type': 'multi_video',
|
||||
'id': str(video_id),
|
||||
'bv_id': bv_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'entries': entries,
|
||||
**info, **top_level_info
|
||||
}
|
||||
|
||||
@@ -481,9 +487,9 @@ class BilibiliChannelIE(InfoExtractor):
|
||||
data = self._download_json(
|
||||
self._API_URL % (list_id, page_num), list_id, note=f'Downloading page {page_num}')['data']
|
||||
|
||||
max_count = max_count or try_get(data, lambda x: x['page']['count'])
|
||||
max_count = max_count or traverse_obj(data, ('page', 'count'))
|
||||
|
||||
entries = try_get(data, lambda x: x['list']['vlist'])
|
||||
entries = traverse_obj(data, ('list', 'vlist'))
|
||||
if not entries:
|
||||
return
|
||||
for entry in entries:
|
||||
@@ -521,7 +527,7 @@ class BilibiliCategoryIE(InfoExtractor):
|
||||
api_url, query, query={'Search_key': query, 'pn': page_num},
|
||||
note='Extracting results from page %s of %s' % (page_num, num_pages))
|
||||
|
||||
video_list = try_get(parsed_json, lambda x: x['data']['archives'], list)
|
||||
video_list = traverse_obj(parsed_json, ('data', 'archives'), expected_type=list)
|
||||
if not video_list:
|
||||
raise ExtractorError('Failed to retrieve video list for page %d' % page_num)
|
||||
|
||||
@@ -551,7 +557,7 @@ class BilibiliCategoryIE(InfoExtractor):
|
||||
|
||||
api_url = 'https://api.bilibili.com/x/web-interface/newlist?rid=%d&type=1&ps=20&jsonp=jsonp' % rid_value
|
||||
page_json = self._download_json(api_url, query, query={'Search_key': query, 'pn': '1'})
|
||||
page_data = try_get(page_json, lambda x: x['data']['page'], dict)
|
||||
page_data = traverse_obj(page_json, ('data', 'page'), expected_type=dict)
|
||||
count, size = int_or_none(page_data.get('count')), int_or_none(page_data.get('size'))
|
||||
if count is None or not size:
|
||||
raise ExtractorError('Failed to calculate either page count or size')
|
||||
@@ -724,14 +730,30 @@ class BiliBiliPlayerIE(InfoExtractor):
|
||||
|
||||
class BiliIntlBaseIE(InfoExtractor):
|
||||
_API_URL = 'https://api.bilibili.tv/intl/gateway'
|
||||
_NETRC_MACHINE = 'biliintl'
|
||||
|
||||
def _call_api(self, endpoint, *args, **kwargs):
|
||||
return self._download_json(self._API_URL + endpoint, *args, **kwargs)['data']
|
||||
json = self._download_json(self._API_URL + endpoint, *args, **kwargs)
|
||||
if json.get('code'):
|
||||
if json['code'] in (10004004, 10004005, 10023006):
|
||||
self.raise_login_required()
|
||||
elif json['code'] == 10004001:
|
||||
self.raise_geo_restricted()
|
||||
else:
|
||||
if json.get('message') and str(json['code']) != json['message']:
|
||||
errmsg = f'{kwargs.get("errnote", "Unable to download JSON metadata")}: {self.IE_NAME} said: {json["message"]}'
|
||||
else:
|
||||
errmsg = kwargs.get('errnote', 'Unable to download JSON metadata')
|
||||
if kwargs.get('fatal'):
|
||||
raise ExtractorError(errmsg)
|
||||
else:
|
||||
self.report_warning(errmsg)
|
||||
return json.get('data')
|
||||
|
||||
def json2srt(self, json):
|
||||
data = '\n\n'.join(
|
||||
f'{i + 1}\n{srt_subtitles_timecode(line["from"])} --> {srt_subtitles_timecode(line["to"])}\n{line["content"]}'
|
||||
for i, line in enumerate(json['body']))
|
||||
for i, line in enumerate(json['body']) if line.get('content'))
|
||||
return data
|
||||
|
||||
def _get_subtitles(self, ep_id):
|
||||
@@ -755,16 +777,6 @@ class BiliIntlBaseIE(InfoExtractor):
|
||||
def _get_formats(self, ep_id):
|
||||
video_json = self._call_api(f'/web/playurl?ep_id={ep_id}&platform=web', ep_id,
|
||||
note='Downloading video formats', errnote='Unable to download video formats')
|
||||
if video_json.get('code'):
|
||||
if video_json['code'] in (10004004, 10004005, 10023006):
|
||||
self.raise_login_required(method='cookies')
|
||||
elif video_json['code'] == 10004001:
|
||||
self.raise_geo_restricted()
|
||||
elif video_json.get('message') and str(video_json['code']) != video_json['message']:
|
||||
raise ExtractorError(
|
||||
f'Unable to download video formats: {self.IE_NAME} said: {video_json["message"]}', expected=True)
|
||||
else:
|
||||
raise ExtractorError('Unable to download video formats')
|
||||
video_json = video_json['playurl']
|
||||
formats = []
|
||||
for vid in video_json.get('video') or []:
|
||||
@@ -810,10 +822,49 @@ class BiliIntlBaseIE(InfoExtractor):
|
||||
'extractor_key': BiliIntlIE.ie_key(),
|
||||
}
|
||||
|
||||
def _login(self):
|
||||
username, password = self._get_login_info()
|
||||
if username is None:
|
||||
return
|
||||
|
||||
try:
|
||||
from Cryptodome.PublicKey import RSA
|
||||
from Cryptodome.Cipher import PKCS1_v1_5
|
||||
except ImportError:
|
||||
try:
|
||||
from Crypto.PublicKey import RSA
|
||||
from Crypto.Cipher import PKCS1_v1_5
|
||||
except ImportError:
|
||||
raise ExtractorError('pycryptodomex not found. Please install', expected=True)
|
||||
|
||||
key_data = self._download_json(
|
||||
'https://passport.bilibili.tv/x/intl/passport-login/web/key?lang=en-US', None,
|
||||
note='Downloading login key', errnote='Unable to download login key')['data']
|
||||
|
||||
public_key = RSA.importKey(key_data['key'])
|
||||
password_hash = PKCS1_v1_5.new(public_key).encrypt((key_data['hash'] + password).encode('utf-8'))
|
||||
login_post = self._download_json(
|
||||
'https://passport.bilibili.tv/x/intl/passport-login/web/login/password?lang=en-US', None, data=urlencode_postdata({
|
||||
'username': username,
|
||||
'password': base64.b64encode(password_hash).decode('ascii'),
|
||||
'keep_me': 'true',
|
||||
's_locale': 'en_US',
|
||||
'isTrusted': 'true'
|
||||
}), note='Logging in', errnote='Unable to log in')
|
||||
if login_post.get('code'):
|
||||
if login_post.get('message'):
|
||||
raise ExtractorError(f'Unable to log in: {self.IE_NAME} said: {login_post["message"]}', expected=True)
|
||||
else:
|
||||
raise ExtractorError('Unable to log in')
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
|
||||
class BiliIntlIE(BiliIntlBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-z]{2}/)?play/(?P<season_id>\d+)/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
# Bstation page
|
||||
'url': 'https://www.bilibili.tv/en/play/34613/341736',
|
||||
'info_dict': {
|
||||
'id': '341736',
|
||||
@@ -823,6 +874,7 @@ class BiliIntlIE(BiliIntlBaseIE):
|
||||
'episode_number': 2,
|
||||
}
|
||||
}, {
|
||||
# Non-Bstation page
|
||||
'url': 'https://www.bilibili.tv/en/play/1033760/11005006',
|
||||
'info_dict': {
|
||||
'id': '11005006',
|
||||
@@ -831,6 +883,17 @@ class BiliIntlIE(BiliIntlBaseIE):
|
||||
'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
|
||||
'episode_number': 3,
|
||||
}
|
||||
}, {
|
||||
# Subtitle with empty content
|
||||
'url': 'https://www.bilibili.tv/en/play/1005144/10131790',
|
||||
'info_dict': {
|
||||
'id': '10131790',
|
||||
'ext': 'mp4',
|
||||
'title': 'E140 - Two Heartbeats: Kabuto\'s Trap',
|
||||
'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
|
||||
'episode_number': 140,
|
||||
},
|
||||
'skip': 'According to the copyright owner\'s request, you may only watch the video after you log in.'
|
||||
}, {
|
||||
'url': 'https://www.biliintl.com/en/play/34613/341736',
|
||||
'only_matching': True,
|
||||
|
||||
114
yt_dlp/extractor/callin.py
Normal file
114
yt_dlp/extractor/callin.py
Normal file
@@ -0,0 +1,114 @@
|
||||
# coding: utf-8
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
traverse_obj,
|
||||
float_or_none,
|
||||
int_or_none
|
||||
)
|
||||
|
||||
|
||||
class CallinIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?callin\.com/(episode)/(?P<id>[-a-zA-Z]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.callin.com/episode/the-title-ix-regime-and-the-long-march-through-EBfXYSrsjc',
|
||||
'info_dict': {
|
||||
'id': '218b979630a35ead12c6fd096f2996c56c37e4d0dc1f6dc0feada32dcf7b31cd',
|
||||
'title': 'The Title IX Regime and the Long March Through and Beyond the Institutions',
|
||||
'ext': 'ts',
|
||||
'display_id': 'the-title-ix-regime-and-the-long-march-through-EBfXYSrsjc',
|
||||
'thumbnail': 're:https://.+\\.png',
|
||||
'description': 'First episode',
|
||||
'uploader': 'Wesley Yang',
|
||||
'timestamp': 1639404128.65,
|
||||
'upload_date': '20211213',
|
||||
'uploader_id': 'wesyang',
|
||||
'uploader_url': 'http://wesleyyang.substack.com',
|
||||
'channel': 'Conversations in Year Zero',
|
||||
'channel_id': '436d1f82ddeb30cd2306ea9156044d8d2cfdc3f1f1552d245117a42173e78553',
|
||||
'channel_url': 'https://callin.com/show/conversations-in-year-zero-oJNllRFSfx',
|
||||
'duration': 9951.936,
|
||||
'view_count': int,
|
||||
'categories': ['News & Politics', 'History', 'Technology'],
|
||||
'cast': ['Wesley Yang', 'KC Johnson', 'Gabi Abramovich'],
|
||||
'series': 'Conversations in Year Zero',
|
||||
'series_id': '436d1f82ddeb30cd2306ea9156044d8d2cfdc3f1f1552d245117a42173e78553',
|
||||
'episode': 'The Title IX Regime and the Long March Through and Beyond the Institutions',
|
||||
'episode_number': 1,
|
||||
'episode_id': '218b979630a35ead12c6fd096f2996c56c37e4d0dc1f6dc0feada32dcf7b31cd'
|
||||
}
|
||||
}]
|
||||
|
||||
def try_get_user_name(self, d):
|
||||
names = [d.get(n) for n in ('first', 'last')]
|
||||
if None in names:
|
||||
return next((n for n in names if n), default=None)
|
||||
return ' '.join(names)
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
next_data = self._search_nextjs_data(webpage, display_id)
|
||||
episode = next_data['props']['pageProps']['episode']
|
||||
|
||||
id = episode['id']
|
||||
title = (episode.get('title')
|
||||
or self._og_search_title(webpage, fatal=False)
|
||||
or self._html_search_regex('<title>(.*?)</title>', webpage, 'title'))
|
||||
url = episode['m3u8']
|
||||
formats = self._extract_m3u8_formats(url, display_id, ext='ts')
|
||||
self._sort_formats(formats)
|
||||
|
||||
show = traverse_obj(episode, ('show', 'title'))
|
||||
show_id = traverse_obj(episode, ('show', 'id'))
|
||||
|
||||
show_json = None
|
||||
app_slug = (self._html_search_regex(
|
||||
'<script\\s+src=["\']/_next/static/([-_a-zA-Z0-9]+)/_',
|
||||
webpage, 'app slug', fatal=False) or next_data.get('buildId'))
|
||||
show_slug = traverse_obj(episode, ('show', 'linkObj', 'resourceUrl'))
|
||||
if app_slug and show_slug and '/' in show_slug:
|
||||
show_slug = show_slug.rsplit('/', 1)[1]
|
||||
show_json_url = f'https://www.callin.com/_next/data/{app_slug}/show/{show_slug}.json'
|
||||
show_json = self._download_json(show_json_url, display_id, fatal=False)
|
||||
|
||||
host = (traverse_obj(show_json, ('pageProps', 'show', 'hosts', 0))
|
||||
or traverse_obj(episode, ('speakers', 0)))
|
||||
|
||||
host_nick = traverse_obj(host, ('linkObj', 'resourceUrl'))
|
||||
host_nick = host_nick.rsplit('/', 1)[1] if (host_nick and '/' in host_nick) else None
|
||||
|
||||
cast = list(filter(None, [
|
||||
self.try_get_user_name(u) for u in
|
||||
traverse_obj(episode, (('speakers', 'callerTags'), ...)) or []
|
||||
]))
|
||||
|
||||
episode_list = traverse_obj(show_json, ('pageProps', 'show', 'episodes')) or []
|
||||
episode_number = next(
|
||||
(len(episode_list) - i for (i, e) in enumerate(episode_list) if e.get('id') == id),
|
||||
None)
|
||||
|
||||
return {
|
||||
'id': id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': traverse_obj(episode, ('show', 'photo')),
|
||||
'description': episode.get('description'),
|
||||
'uploader': self.try_get_user_name(host) if host else None,
|
||||
'timestamp': episode.get('publishedAt'),
|
||||
'uploader_id': host_nick,
|
||||
'uploader_url': traverse_obj(show_json, ('pageProps', 'show', 'url')),
|
||||
'channel': show,
|
||||
'channel_id': show_id,
|
||||
'channel_url': traverse_obj(episode, ('show', 'linkObj', 'resourceUrl')),
|
||||
'duration': float_or_none(episode.get('runtime')),
|
||||
'view_count': int_or_none(episode.get('plays')),
|
||||
'categories': traverse_obj(episode, ('show', 'categorizations', ..., 'name')),
|
||||
'cast': cast if cast else None,
|
||||
'series': show,
|
||||
'series_id': show_id,
|
||||
'episode': title,
|
||||
'episode_number': episode_number,
|
||||
'episode_id': id
|
||||
}
|
||||
@@ -13,6 +13,8 @@ class CAM4IE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 're:^foxynesss [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'age_limit': 18,
|
||||
'live_status': 'is_live',
|
||||
'thumbnail': 'https://snapshots.xcdnpro.com/thumbnails/foxynesss',
|
||||
}
|
||||
}
|
||||
|
||||
@@ -29,4 +31,5 @@ class CAM4IE(InfoExtractor):
|
||||
'is_live': True,
|
||||
'age_limit': 18,
|
||||
'formats': formats,
|
||||
'thumbnail': f'https://snapshots.xcdnpro.com/thumbnails/{channel_id}',
|
||||
}
|
||||
|
||||
@@ -78,11 +78,11 @@ class CanalAlphaIE(InfoExtractor):
|
||||
'height': try_get(video, lambda x: x['res']['height'], expected_type=int),
|
||||
} for video in try_get(data_json, lambda x: x['video']['mp4'], expected_type=list) or [] if video.get('$url')]
|
||||
if manifests.get('hls'):
|
||||
m3u8_frmts, m3u8_subs = self._parse_m3u8_formats_and_subtitles(manifests['hls'], id)
|
||||
m3u8_frmts, m3u8_subs = self._parse_m3u8_formats_and_subtitles(manifests['hls'], video_id=id)
|
||||
formats.extend(m3u8_frmts)
|
||||
subtitles = self._merge_subtitles(subtitles, m3u8_subs)
|
||||
if manifests.get('dash'):
|
||||
dash_frmts, dash_subs = self._parse_mpd_formats_and_subtitles(manifests['dash'], id)
|
||||
dash_frmts, dash_subs = self._parse_mpd_formats_and_subtitles(manifests['dash'])
|
||||
formats.extend(dash_frmts)
|
||||
subtitles = self._merge_subtitles(subtitles, dash_subs)
|
||||
self._sort_formats(formats)
|
||||
|
||||
@@ -76,7 +76,7 @@ class CanvasIE(InfoExtractor):
|
||||
'vrtPlayerToken': vrtPlayerToken,
|
||||
'client': 'null',
|
||||
}, expected_status=400)
|
||||
if not data.get('title'):
|
||||
if 'title' not in data:
|
||||
code = data.get('code')
|
||||
if code == 'AUTHENTICATION_REQUIRED':
|
||||
self.raise_login_required()
|
||||
@@ -84,7 +84,8 @@ class CanvasIE(InfoExtractor):
|
||||
self.raise_geo_restricted(countries=['BE'])
|
||||
raise ExtractorError(data.get('message') or code, expected=True)
|
||||
|
||||
title = data['title']
|
||||
# Note: The title may be an empty string
|
||||
title = data['title'] or f'{site_id} {video_id}'
|
||||
description = data.get('description')
|
||||
|
||||
formats = []
|
||||
|
||||
@@ -4,6 +4,7 @@ from __future__ import unicode_literals
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
format_field,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
try_get,
|
||||
@@ -43,7 +44,7 @@ class CarambaTVIE(InfoExtractor):
|
||||
formats = [{
|
||||
'url': base_url + f['fn'],
|
||||
'height': int_or_none(f.get('height')),
|
||||
'format_id': '%sp' % f['height'] if f.get('height') else None,
|
||||
'format_id': format_field(f, 'height', '%sp'),
|
||||
} for f in video['qualities'] if f.get('fn')]
|
||||
self._sort_formats(formats)
|
||||
|
||||
|
||||
@@ -162,7 +162,8 @@ class CCTVIE(InfoExtractor):
|
||||
'url': video_url,
|
||||
'format_id': 'http',
|
||||
'quality': quality,
|
||||
'source_preference': -10
|
||||
# Sample clip
|
||||
'preference': -10
|
||||
})
|
||||
|
||||
hls_url = try_get(data, lambda x: x['hls_url'], compat_str)
|
||||
|
||||
@@ -177,6 +177,7 @@ class CeskaTelevizeIE(InfoExtractor):
|
||||
is_live = item.get('type') == 'LIVE'
|
||||
formats = []
|
||||
for format_id, stream_url in item.get('streamUrls', {}).items():
|
||||
stream_url = stream_url.replace('https://', 'http://')
|
||||
if 'playerType=flash' in stream_url:
|
||||
stream_formats = self._extract_m3u8_formats(
|
||||
stream_url, playlist_id, 'mp4', 'm3u8_native',
|
||||
|
||||
@@ -45,6 +45,7 @@ from ..utils import (
|
||||
determine_ext,
|
||||
determine_protocol,
|
||||
dict_get,
|
||||
encode_data_uri,
|
||||
error_to_compat_str,
|
||||
extract_attributes,
|
||||
ExtractorError,
|
||||
@@ -243,11 +244,16 @@ class InfoExtractor(object):
|
||||
uploader: Full name of the video uploader.
|
||||
license: License name the video is licensed under.
|
||||
creator: The creator of the video.
|
||||
release_timestamp: UNIX timestamp of the moment the video was released.
|
||||
release_date: The date (YYYYMMDD) when the video was released.
|
||||
timestamp: UNIX timestamp of the moment the video was uploaded
|
||||
upload_date: Video upload date (YYYYMMDD).
|
||||
If not explicitly set, calculated from timestamp.
|
||||
If not explicitly set, calculated from timestamp
|
||||
release_timestamp: UNIX timestamp of the moment the video was released.
|
||||
If it is not clear whether to use timestamp or this, use the former
|
||||
release_date: The date (YYYYMMDD) when the video was released.
|
||||
If not explicitly set, calculated from release_timestamp
|
||||
modified_timestamp: UNIX timestamp of the moment the video was last modified.
|
||||
modified_date: The date (YYYYMMDD) when the video was last modified.
|
||||
If not explicitly set, calculated from modified_timestamp
|
||||
uploader_id: Nickname or id of the video uploader.
|
||||
uploader_url: Full URL to a personal webpage of the video uploader.
|
||||
channel: Full name of the channel the video is uploaded on.
|
||||
@@ -255,6 +261,7 @@ class InfoExtractor(object):
|
||||
fields. This depends on a particular extractor.
|
||||
channel_id: Id of the channel.
|
||||
channel_url: Full URL to a channel webpage.
|
||||
channel_follower_count: Number of followers of the channel.
|
||||
location: Physical location where the video was filmed.
|
||||
subtitles: The available subtitles as a dictionary in the format
|
||||
{tag: subformats}. "tag" is usually a language code, and
|
||||
@@ -370,6 +377,7 @@ class InfoExtractor(object):
|
||||
disc_number: Number of the disc or other physical medium the track belongs to,
|
||||
as an integer.
|
||||
release_year: Year (YYYY) when the album was released.
|
||||
composer: Composer of the piece
|
||||
|
||||
Unless mentioned otherwise, the fields should be Unicode strings.
|
||||
|
||||
@@ -383,6 +391,11 @@ class InfoExtractor(object):
|
||||
Additionally, playlists can have "id", "title", and any other relevent
|
||||
attributes with the same semantics as videos (see above).
|
||||
|
||||
It can also have the following optional fields:
|
||||
|
||||
playlist_count: The total number of videos in a playlist. If not given,
|
||||
YoutubeDL tries to calculate it from "entries"
|
||||
|
||||
|
||||
_type "multi_video" indicates that there are multiple videos that
|
||||
form a single show, for examples multiple acts of an opera or TV episode.
|
||||
@@ -1108,39 +1121,39 @@ class InfoExtractor(object):
|
||||
|
||||
# Methods for following #608
|
||||
@staticmethod
|
||||
def url_result(url, ie=None, video_id=None, video_title=None, **kwargs):
|
||||
def url_result(url, ie=None, video_id=None, video_title=None, *, url_transparent=False, **kwargs):
|
||||
"""Returns a URL that points to a page that should be processed"""
|
||||
# TODO: ie should be the class used for getting the info
|
||||
video_info = {'_type': 'url',
|
||||
'url': url,
|
||||
'ie_key': ie}
|
||||
video_info.update(kwargs)
|
||||
if ie is not None:
|
||||
kwargs['ie_key'] = ie if isinstance(ie, str) else ie.ie_key()
|
||||
if video_id is not None:
|
||||
video_info['id'] = video_id
|
||||
kwargs['id'] = video_id
|
||||
if video_title is not None:
|
||||
video_info['title'] = video_title
|
||||
return video_info
|
||||
kwargs['title'] = video_title
|
||||
return {
|
||||
**kwargs,
|
||||
'_type': 'url_transparent' if url_transparent else 'url',
|
||||
'url': url,
|
||||
}
|
||||
|
||||
def playlist_from_matches(self, matches, playlist_id=None, playlist_title=None, getter=None, ie=None):
|
||||
urls = orderedSet(
|
||||
self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
|
||||
for m in matches)
|
||||
return self.playlist_result(
|
||||
urls, playlist_id=playlist_id, playlist_title=playlist_title)
|
||||
def playlist_from_matches(self, matches, playlist_id=None, playlist_title=None, getter=None, ie=None, **kwargs):
|
||||
urls = (self.url_result(self._proto_relative_url(m), ie)
|
||||
for m in orderedSet(map(getter, matches) if getter else matches))
|
||||
return self.playlist_result(urls, playlist_id, playlist_title, **kwargs)
|
||||
|
||||
@staticmethod
|
||||
def playlist_result(entries, playlist_id=None, playlist_title=None, playlist_description=None, **kwargs):
|
||||
def playlist_result(entries, playlist_id=None, playlist_title=None, playlist_description=None, *, multi_video=False, **kwargs):
|
||||
"""Returns a playlist"""
|
||||
video_info = {'_type': 'playlist',
|
||||
'entries': entries}
|
||||
video_info.update(kwargs)
|
||||
if playlist_id:
|
||||
video_info['id'] = playlist_id
|
||||
kwargs['id'] = playlist_id
|
||||
if playlist_title:
|
||||
video_info['title'] = playlist_title
|
||||
kwargs['title'] = playlist_title
|
||||
if playlist_description is not None:
|
||||
video_info['description'] = playlist_description
|
||||
return video_info
|
||||
kwargs['description'] = playlist_description
|
||||
return {
|
||||
**kwargs,
|
||||
'_type': 'multi_video' if multi_video else 'playlist',
|
||||
'entries': entries,
|
||||
}
|
||||
|
||||
def _search_regex(self, pattern, string, name, default=NO_DEFAULT, fatal=True, flags=0, group=None):
|
||||
"""
|
||||
@@ -1278,6 +1291,7 @@ class InfoExtractor(object):
|
||||
return self._og_search_property('description', html, fatal=False, **kargs)
|
||||
|
||||
def _og_search_title(self, html, **kargs):
|
||||
kargs.setdefault('fatal', False)
|
||||
return self._og_search_property('title', html, **kargs)
|
||||
|
||||
def _og_search_video_url(self, html, name='video url', secure=True, **kargs):
|
||||
@@ -1429,6 +1443,23 @@ class InfoExtractor(object):
|
||||
continue
|
||||
info[count_key] = interaction_count
|
||||
|
||||
def extract_chapter_information(e):
|
||||
chapters = [{
|
||||
'title': part.get('name'),
|
||||
'start_time': part.get('startOffset'),
|
||||
'end_time': part.get('endOffset'),
|
||||
} for part in variadic(e.get('hasPart') or []) if part.get('@type') == 'Clip']
|
||||
for idx, (last_c, current_c, next_c) in enumerate(zip(
|
||||
[{'end_time': 0}] + chapters, chapters, chapters[1:])):
|
||||
current_c['end_time'] = current_c['end_time'] or next_c['start_time']
|
||||
current_c['start_time'] = current_c['start_time'] or last_c['end_time']
|
||||
if None in current_c.values():
|
||||
self.report_warning(f'Chapter {idx} contains broken data. Not extracting chapters')
|
||||
return
|
||||
if chapters:
|
||||
chapters[-1]['end_time'] = chapters[-1]['end_time'] or info['duration']
|
||||
info['chapters'] = chapters
|
||||
|
||||
def extract_video_object(e):
|
||||
assert e['@type'] == 'VideoObject'
|
||||
author = e.get('author')
|
||||
@@ -1436,7 +1467,8 @@ class InfoExtractor(object):
|
||||
'url': url_or_none(e.get('contentUrl')),
|
||||
'title': unescapeHTML(e.get('name')),
|
||||
'description': unescapeHTML(e.get('description')),
|
||||
'thumbnail': url_or_none(e.get('thumbnailUrl') or e.get('thumbnailURL')),
|
||||
'thumbnails': [{'url': url_or_none(url)}
|
||||
for url in variadic(traverse_obj(e, 'thumbnailUrl', 'thumbnailURL'))],
|
||||
'duration': parse_duration(e.get('duration')),
|
||||
'timestamp': unified_timestamp(e.get('uploadDate')),
|
||||
# author can be an instance of 'Organization' or 'Person' types.
|
||||
@@ -1451,6 +1483,7 @@ class InfoExtractor(object):
|
||||
'view_count': int_or_none(e.get('interactionCount')),
|
||||
})
|
||||
extract_interaction_statistic(e)
|
||||
extract_chapter_information(e)
|
||||
|
||||
def traverse_json_ld(json_ld, at_top_level=True):
|
||||
for e in json_ld:
|
||||
@@ -1496,6 +1529,8 @@ class InfoExtractor(object):
|
||||
'title': unescapeHTML(e.get('headline')),
|
||||
'description': unescapeHTML(e.get('articleBody') or e.get('description')),
|
||||
})
|
||||
if traverse_obj(e, ('video', 0, '@type')) == 'VideoObject':
|
||||
extract_video_object(e['video'][0])
|
||||
elif item_type == 'VideoObject':
|
||||
extract_video_object(e)
|
||||
if expected_type is None:
|
||||
@@ -1513,12 +1548,12 @@ class InfoExtractor(object):
|
||||
|
||||
return dict((k, v) for k, v in info.items() if v is not None)
|
||||
|
||||
def _search_nextjs_data(self, webpage, video_id, **kw):
|
||||
def _search_nextjs_data(self, webpage, video_id, *, transform_source=None, fatal=True, **kw):
|
||||
return self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?s)<script[^>]+id=[\'"]__NEXT_DATA__[\'"][^>]*>([^<]+)</script>',
|
||||
webpage, 'next.js data', **kw),
|
||||
video_id, **kw)
|
||||
webpage, 'next.js data', fatal=fatal, **kw),
|
||||
video_id, transform_source=transform_source, fatal=fatal)
|
||||
|
||||
def _search_nuxt_data(self, webpage, video_id, context_name='__NUXT__'):
|
||||
''' Parses Nuxt.js metadata. This works as long as the function __NUXT__ invokes is a pure function. '''
|
||||
@@ -2076,7 +2111,7 @@ class InfoExtractor(object):
|
||||
headers=headers, query=query, video_id=video_id)
|
||||
|
||||
def _parse_m3u8_formats_and_subtitles(
|
||||
self, m3u8_doc, m3u8_url, ext=None, entry_protocol='m3u8_native',
|
||||
self, m3u8_doc, m3u8_url=None, ext=None, entry_protocol='m3u8_native',
|
||||
preference=None, quality=None, m3u8_id=None, live=False, note=None,
|
||||
errnote=None, fatal=True, data=None, headers={}, query={},
|
||||
video_id=None):
|
||||
@@ -2126,7 +2161,7 @@ class InfoExtractor(object):
|
||||
formats = [{
|
||||
'format_id': join_nonempty(m3u8_id, idx),
|
||||
'format_index': idx,
|
||||
'url': m3u8_url,
|
||||
'url': m3u8_url or encode_data_uri(m3u8_doc.encode('utf-8'), 'application/x-mpegurl'),
|
||||
'ext': ext,
|
||||
'protocol': entry_protocol,
|
||||
'preference': preference,
|
||||
@@ -2712,11 +2747,15 @@ class InfoExtractor(object):
|
||||
mime_type = representation_attrib['mimeType']
|
||||
content_type = representation_attrib.get('contentType', mime_type.split('/')[0])
|
||||
|
||||
codecs = representation_attrib.get('codecs', '')
|
||||
codecs = parse_codecs(representation_attrib.get('codecs', ''))
|
||||
if content_type not in ('video', 'audio', 'text'):
|
||||
if mime_type == 'image/jpeg':
|
||||
content_type = mime_type
|
||||
elif codecs.split('.')[0] == 'stpp':
|
||||
elif codecs['vcodec'] != 'none':
|
||||
content_type = 'video'
|
||||
elif codecs['acodec'] != 'none':
|
||||
content_type = 'audio'
|
||||
elif codecs.get('tcodec', 'none') != 'none':
|
||||
content_type = 'text'
|
||||
elif mimetype2ext(mime_type) in ('tt', 'dfxp', 'ttml', 'xml', 'json'):
|
||||
content_type = 'text'
|
||||
@@ -2762,8 +2801,8 @@ class InfoExtractor(object):
|
||||
'format_note': 'DASH %s' % content_type,
|
||||
'filesize': filesize,
|
||||
'container': mimetype2ext(mime_type) + '_dash',
|
||||
**codecs
|
||||
}
|
||||
f.update(parse_codecs(codecs))
|
||||
elif content_type == 'text':
|
||||
f = {
|
||||
'ext': mimetype2ext(mime_type),
|
||||
@@ -3468,8 +3507,6 @@ class InfoExtractor(object):
|
||||
|
||||
def _int(self, v, name, fatal=False, **kwargs):
|
||||
res = int_or_none(v, **kwargs)
|
||||
if 'get_attr' in kwargs:
|
||||
print(getattr(v, kwargs['get_attr']))
|
||||
if res is None:
|
||||
msg = 'Failed to extract %s: Could not parse value %r' % (name, v)
|
||||
if fatal:
|
||||
@@ -3676,6 +3713,22 @@ class InfoExtractor(object):
|
||||
return [] if default is NO_DEFAULT else default
|
||||
return list(val) if casesense else [x.lower() for x in val]
|
||||
|
||||
def _yes_playlist(self, playlist_id, video_id, smuggled_data=None, *, playlist_label='playlist', video_label='video'):
|
||||
if not playlist_id or not video_id:
|
||||
return not video_id
|
||||
|
||||
no_playlist = (smuggled_data or {}).get('force_noplaylist')
|
||||
if no_playlist is not None:
|
||||
return not no_playlist
|
||||
|
||||
video_id = '' if video_id is True else f' {video_id}'
|
||||
playlist_id = '' if playlist_id is True else f' {playlist_id}'
|
||||
if self.get_param('noplaylist'):
|
||||
self.to_screen(f'Downloading just the {video_label}{video_id} because of --no-playlist')
|
||||
return False
|
||||
self.to_screen(f'Downloading {playlist_label}{playlist_id} - add --no-playlist to download just the {video_label}{video_id}')
|
||||
return True
|
||||
|
||||
|
||||
class SearchInfoExtractor(InfoExtractor):
|
||||
"""
|
||||
|
||||
113
yt_dlp/extractor/crowdbunker.py
Normal file
113
yt_dlp/extractor/crowdbunker.py
Normal file
@@ -0,0 +1,113 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import itertools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
try_get,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class CrowdBunkerIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?crowdbunker\.com/v/(?P<id>[^/?#$&]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://crowdbunker.com/v/0z4Kms8pi8I',
|
||||
'info_dict': {
|
||||
'id': '0z4Kms8pi8I',
|
||||
'ext': 'mp4',
|
||||
'title': '117) Pass vax et solutions',
|
||||
'description': 'md5:86bcb422c29475dbd2b5dcfa6ec3749c',
|
||||
'view_count': int,
|
||||
'duration': 5386,
|
||||
'uploader': 'Jérémie Mercier',
|
||||
'uploader_id': 'UCeN_qQV829NYf0pvPJhW5dQ',
|
||||
'like_count': int,
|
||||
'upload_date': '20211218',
|
||||
'thumbnail': 'https://scw.divulg.org/cb-medias4/images/0z4Kms8pi8I/maxres.jpg'
|
||||
},
|
||||
'params': {'skip_download': True}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
id = self._match_id(url)
|
||||
data_json = self._download_json(f'https://api.divulg.org/post/{id}/details',
|
||||
id, headers={'accept': 'application/json, text/plain, */*'})
|
||||
video_json = data_json['video']
|
||||
formats, subtitles = [], {}
|
||||
for sub in video_json.get('captions') or []:
|
||||
sub_url = try_get(sub, lambda x: x['file']['url'])
|
||||
if not sub_url:
|
||||
continue
|
||||
subtitles.setdefault(sub.get('languageCode', 'fr'), []).append({
|
||||
'url': sub_url,
|
||||
})
|
||||
|
||||
mpd_url = try_get(video_json, lambda x: x['dashManifest']['url'])
|
||||
if mpd_url:
|
||||
fmts, subs = self._extract_mpd_formats_and_subtitles(mpd_url, id)
|
||||
formats.extend(fmts)
|
||||
subtitles = self._merge_subtitles(subtitles, subs)
|
||||
m3u8_url = try_get(video_json, lambda x: x['hlsManifest']['url'])
|
||||
if m3u8_url:
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(mpd_url, id)
|
||||
formats.extend(fmts)
|
||||
subtitles = self._merge_subtitles(subtitles, subs)
|
||||
|
||||
thumbnails = [{
|
||||
'url': image['url'],
|
||||
'height': int_or_none(image.get('height')),
|
||||
'width': int_or_none(image.get('width')),
|
||||
} for image in video_json.get('thumbnails') or [] if image.get('url')]
|
||||
|
||||
self._sort_formats(formats)
|
||||
return {
|
||||
'id': id,
|
||||
'title': video_json.get('title'),
|
||||
'description': video_json.get('description'),
|
||||
'view_count': video_json.get('viewCount'),
|
||||
'duration': video_json.get('duration'),
|
||||
'uploader': try_get(data_json, lambda x: x['channel']['name']),
|
||||
'uploader_id': try_get(data_json, lambda x: x['channel']['id']),
|
||||
'like_count': data_json.get('likesCount'),
|
||||
'upload_date': unified_strdate(video_json.get('publishedAt') or video_json.get('createdAt')),
|
||||
'thumbnails': thumbnails,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
|
||||
class CrowdBunkerChannelIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?crowdbunker\.com/@(?P<id>[^/?#$&]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://crowdbunker.com/@Milan_UHRIN',
|
||||
'playlist_mincount': 14,
|
||||
'info_dict': {
|
||||
'id': 'Milan_UHRIN',
|
||||
},
|
||||
}]
|
||||
|
||||
def _entries(self, id):
|
||||
last = None
|
||||
|
||||
for page in itertools.count():
|
||||
channel_json = self._download_json(
|
||||
f'https://api.divulg.org/organization/{id}/posts', id, headers={'accept': 'application/json, text/plain, */*'},
|
||||
query={'after': last} if last else {}, note=f'Downloading Page {page}')
|
||||
for item in channel_json.get('items') or []:
|
||||
v_id = item.get('uid')
|
||||
if not v_id:
|
||||
continue
|
||||
yield self.url_result(
|
||||
'https://crowdbunker.com/v/%s' % v_id, ie=CrowdBunkerIE.ie_key(), video_id=v_id)
|
||||
last = channel_json.get('last')
|
||||
if not last:
|
||||
break
|
||||
|
||||
def _real_extract(self, url):
|
||||
id = self._match_id(url)
|
||||
return self.playlist_result(self._entries(id), playlist_id=id)
|
||||
@@ -1,6 +1,7 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
import re
|
||||
import json
|
||||
import zlib
|
||||
@@ -23,15 +24,17 @@ from ..utils import (
|
||||
bytes_to_intlist,
|
||||
extract_attributes,
|
||||
float_or_none,
|
||||
format_field,
|
||||
intlist_to_bytes,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
lowercase_escape,
|
||||
merge_dicts,
|
||||
qualities,
|
||||
remove_end,
|
||||
sanitized_Request,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
urlencode_postdata,
|
||||
xpath_text,
|
||||
)
|
||||
from ..aes import (
|
||||
@@ -40,8 +43,8 @@ from ..aes import (
|
||||
|
||||
|
||||
class CrunchyrollBaseIE(InfoExtractor):
|
||||
_LOGIN_URL = 'https://www.crunchyroll.com/login'
|
||||
_LOGIN_FORM = 'login_form'
|
||||
_LOGIN_URL = 'https://www.crunchyroll.com/welcome/login'
|
||||
_API_BASE = 'https://api.crunchyroll.com'
|
||||
_NETRC_MACHINE = 'crunchyroll'
|
||||
|
||||
def _call_rpc_api(self, method, video_id, note=None, data=None):
|
||||
@@ -58,50 +61,33 @@ class CrunchyrollBaseIE(InfoExtractor):
|
||||
username, password = self._get_login_info()
|
||||
if username is None:
|
||||
return
|
||||
|
||||
login_page = self._download_webpage(
|
||||
self._LOGIN_URL, None, 'Downloading login page')
|
||||
|
||||
def is_logged(webpage):
|
||||
return 'href="/logout"' in webpage
|
||||
|
||||
# Already logged in
|
||||
if is_logged(login_page):
|
||||
if self._get_cookies(self._LOGIN_URL).get('etp_rt'):
|
||||
return
|
||||
|
||||
login_form_str = self._search_regex(
|
||||
r'(?P<form><form[^>]+?id=(["\'])%s\2[^>]*>)' % self._LOGIN_FORM,
|
||||
login_page, 'login form', group='form')
|
||||
upsell_response = self._download_json(
|
||||
f'{self._API_BASE}/get_upsell_data.0.json', None, 'Getting session id',
|
||||
query={
|
||||
'sess_id': 1,
|
||||
'device_id': 'whatvalueshouldbeforweb',
|
||||
'device_type': 'com.crunchyroll.static',
|
||||
'access_token': 'giKq5eY27ny3cqz',
|
||||
'referer': self._LOGIN_URL
|
||||
})
|
||||
if upsell_response['code'] != 'ok':
|
||||
raise ExtractorError('Could not get session id')
|
||||
session_id = upsell_response['data']['session_id']
|
||||
|
||||
post_url = extract_attributes(login_form_str).get('action')
|
||||
if not post_url:
|
||||
post_url = self._LOGIN_URL
|
||||
elif not post_url.startswith('http'):
|
||||
post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url)
|
||||
|
||||
login_form = self._form_hidden_inputs(self._LOGIN_FORM, login_page)
|
||||
|
||||
login_form.update({
|
||||
'login_form[name]': username,
|
||||
'login_form[password]': password,
|
||||
})
|
||||
|
||||
response = self._download_webpage(
|
||||
post_url, None, 'Logging in', 'Wrong login info',
|
||||
data=urlencode_postdata(login_form),
|
||||
headers={'Content-Type': 'application/x-www-form-urlencoded'})
|
||||
|
||||
# Successful login
|
||||
if is_logged(response):
|
||||
return
|
||||
|
||||
error = self._html_search_regex(
|
||||
'(?s)<ul[^>]+class=["\']messages["\'][^>]*>(.+?)</ul>',
|
||||
response, 'error message', default=None)
|
||||
if error:
|
||||
raise ExtractorError('Unable to login: %s' % error, expected=True)
|
||||
|
||||
raise ExtractorError('Unable to log in')
|
||||
login_response = self._download_json(
|
||||
f'{self._API_BASE}/login.1.json', None, 'Logging in',
|
||||
data=compat_urllib_parse_urlencode({
|
||||
'account': username,
|
||||
'password': password,
|
||||
'session_id': session_id
|
||||
}).encode('ascii'))
|
||||
if login_response['code'] != 'ok':
|
||||
raise ExtractorError('Login failed. Bad username or password?', expected=True)
|
||||
if not self._get_cookies(self._LOGIN_URL).get('etp_rt'):
|
||||
raise ExtractorError('Login succeeded but did not set etp_rt cookie')
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
@@ -733,13 +719,118 @@ class CrunchyrollBetaIE(CrunchyrollBaseIE):
|
||||
def _real_extract(self, url):
|
||||
lang, internal_id, display_id = self._match_valid_url(url).group('lang', 'internal_id', 'id')
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
episode_data = self._parse_json(
|
||||
self._search_regex(r'__INITIAL_STATE__\s*=\s*({.+?})\s*;', webpage, 'episode data'),
|
||||
display_id)['content']['byId'][internal_id]
|
||||
video_id = episode_data['external_id'].split('.')[1]
|
||||
series_id = episode_data['episode_metadata']['series_slug_title']
|
||||
return self.url_result(f'https://www.crunchyroll.com/{lang}{series_id}/{display_id}-{video_id}',
|
||||
CrunchyrollIE.ie_key(), video_id)
|
||||
initial_state = self._parse_json(
|
||||
self._search_regex(r'__INITIAL_STATE__\s*=\s*({.+?})\s*;', webpage, 'initial state'),
|
||||
display_id)
|
||||
episode_data = initial_state['content']['byId'][internal_id]
|
||||
if not self._get_cookies(url).get('etp_rt'):
|
||||
video_id = episode_data['external_id'].split('.')[1]
|
||||
series_id = episode_data['episode_metadata']['series_slug_title']
|
||||
return self.url_result(f'https://www.crunchyroll.com/{lang}{series_id}/{display_id}-{video_id}',
|
||||
CrunchyrollIE.ie_key(), video_id)
|
||||
|
||||
app_config = self._parse_json(
|
||||
self._search_regex(r'__APP_CONFIG__\s*=\s*({.+?})\s*;', webpage, 'app config'),
|
||||
display_id)
|
||||
client_id = app_config['cxApiParams']['accountAuthClientId']
|
||||
api_domain = app_config['cxApiParams']['apiDomain']
|
||||
basic_token = str(base64.b64encode(('%s:' % client_id).encode('ascii')), 'ascii')
|
||||
auth_response = self._download_json(
|
||||
f'{api_domain}/auth/v1/token', display_id,
|
||||
note='Authenticating with cookie',
|
||||
headers={
|
||||
'Authorization': 'Basic ' + basic_token
|
||||
}, data='grant_type=etp_rt_cookie'.encode('ascii'))
|
||||
policy_response = self._download_json(
|
||||
f'{api_domain}/index/v2', display_id,
|
||||
note='Retrieving signed policy',
|
||||
headers={
|
||||
'Authorization': auth_response['token_type'] + ' ' + auth_response['access_token']
|
||||
})
|
||||
bucket = policy_response['cms']['bucket']
|
||||
params = {
|
||||
'Policy': policy_response['cms']['policy'],
|
||||
'Signature': policy_response['cms']['signature'],
|
||||
'Key-Pair-Id': policy_response['cms']['key_pair_id']
|
||||
}
|
||||
locale = traverse_obj(initial_state, ('localization', 'locale'))
|
||||
if locale:
|
||||
params['locale'] = locale
|
||||
episode_response = self._download_json(
|
||||
f'{api_domain}/cms/v2{bucket}/episodes/{internal_id}', display_id,
|
||||
note='Retrieving episode metadata',
|
||||
query=params)
|
||||
if episode_response.get('is_premium_only') and not episode_response.get('playback'):
|
||||
raise ExtractorError('This video is for premium members only.', expected=True)
|
||||
stream_response = self._download_json(
|
||||
episode_response['playback'], display_id,
|
||||
note='Retrieving stream info')
|
||||
|
||||
thumbnails = []
|
||||
for thumbnails_data in traverse_obj(episode_response, ('images', 'thumbnail')):
|
||||
for thumbnail_data in thumbnails_data:
|
||||
thumbnails.append({
|
||||
'url': thumbnail_data.get('source'),
|
||||
'width': thumbnail_data.get('width'),
|
||||
'height': thumbnail_data.get('height'),
|
||||
})
|
||||
subtitles = {}
|
||||
for lang, subtitle_data in stream_response.get('subtitles').items():
|
||||
subtitles[lang] = [{
|
||||
'url': subtitle_data.get('url'),
|
||||
'ext': subtitle_data.get('format')
|
||||
}]
|
||||
|
||||
requested_hardsubs = [('' if val == 'none' else val) for val in (self._configuration_arg('hardsub') or ['none'])]
|
||||
hardsub_preference = qualities(requested_hardsubs[::-1])
|
||||
requested_formats = self._configuration_arg('format') or ['adaptive_hls']
|
||||
|
||||
formats = []
|
||||
for stream_type, streams in stream_response.get('streams', {}).items():
|
||||
if stream_type not in requested_formats:
|
||||
continue
|
||||
for stream in streams.values():
|
||||
hardsub_lang = stream.get('hardsub_locale') or ''
|
||||
if hardsub_lang.lower() not in requested_hardsubs:
|
||||
continue
|
||||
format_id = join_nonempty(
|
||||
stream_type,
|
||||
format_field(stream, 'hardsub_locale', 'hardsub-%s'))
|
||||
if not stream.get('url'):
|
||||
continue
|
||||
if stream_type.split('_')[-1] == 'hls':
|
||||
adaptive_formats = self._extract_m3u8_formats(
|
||||
stream['url'], display_id, 'mp4', m3u8_id=format_id,
|
||||
note='Downloading %s information' % format_id,
|
||||
fatal=False)
|
||||
elif stream_type.split('_')[-1] == 'dash':
|
||||
adaptive_formats = self._extract_mpd_formats(
|
||||
stream['url'], display_id, mpd_id=format_id,
|
||||
note='Downloading %s information' % format_id,
|
||||
fatal=False)
|
||||
for f in adaptive_formats:
|
||||
if f.get('acodec') != 'none':
|
||||
f['language'] = stream_response.get('audio_locale')
|
||||
f['quality'] = hardsub_preference(hardsub_lang.lower())
|
||||
formats.extend(adaptive_formats)
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': internal_id,
|
||||
'title': '%s Episode %s – %s' % (episode_response.get('season_title'), episode_response.get('episode'), episode_response.get('title')),
|
||||
'description': episode_response.get('description').replace(r'\r\n', '\n'),
|
||||
'duration': float_or_none(episode_response.get('duration_ms'), 1000),
|
||||
'thumbnails': thumbnails,
|
||||
'series': episode_response.get('series_title'),
|
||||
'series_id': episode_response.get('series_id'),
|
||||
'season': episode_response.get('season_title'),
|
||||
'season_id': episode_response.get('season_id'),
|
||||
'season_number': episode_response.get('season_number'),
|
||||
'episode': episode_response.get('title'),
|
||||
'episode_number': episode_response.get('sequence_number'),
|
||||
'subtitles': subtitles,
|
||||
'formats': formats
|
||||
}
|
||||
|
||||
|
||||
class CrunchyrollBetaShowIE(CrunchyrollBaseIE):
|
||||
|
||||
@@ -65,4 +65,9 @@ class CTVNewsIE(InfoExtractor):
|
||||
})
|
||||
entries = [ninecninemedia_url_result(clip_id) for clip_id in orderedSet(
|
||||
re.findall(r'clip\.id\s*=\s*(\d+);', webpage))]
|
||||
if not entries:
|
||||
webpage = self._download_webpage(url, page_id)
|
||||
if 'getAuthStates("' in webpage:
|
||||
entries = [ninecninemedia_url_result(clip_id) for clip_id in
|
||||
self._search_regex(r'getAuthStates\("([\d+,]+)"', webpage, 'clip ids').split(',')]
|
||||
return self.playlist_result(entries, page_id)
|
||||
|
||||
79
yt_dlp/extractor/daftsex.py
Normal file
79
yt_dlp/extractor/daftsex.py
Normal file
@@ -0,0 +1,79 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_b64decode
|
||||
from ..utils import (
|
||||
get_elements_by_class,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
parse_count,
|
||||
parse_duration,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class DaftsexIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?daftsex\.com/watch/(?P<id>-?\d+_\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://daftsex.com/watch/-156601359_456242791',
|
||||
'info_dict': {
|
||||
'id': '-156601359_456242791',
|
||||
'ext': 'mp4',
|
||||
'title': 'Skye Blue - Dinner And A Show',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
title = get_elements_by_class('heading', webpage)[-1]
|
||||
duration = parse_duration(self._search_regex(
|
||||
r'Duration: ((?:[0-9]{2}:){0,2}[0-9]{2})',
|
||||
webpage, 'duration', fatal=False))
|
||||
views = parse_count(self._search_regex(
|
||||
r'Views: ([0-9 ]+)',
|
||||
webpage, 'views', fatal=False))
|
||||
|
||||
player_hash = self._search_regex(
|
||||
r'DaxabPlayer\.Init\({[\s\S]*hash:\s*"([0-9a-zA-Z_\-]+)"[\s\S]*}',
|
||||
webpage, 'player hash')
|
||||
player_color = self._search_regex(
|
||||
r'DaxabPlayer\.Init\({[\s\S]*color:\s*"([0-9a-z]+)"[\s\S]*}',
|
||||
webpage, 'player color', fatal=False) or ''
|
||||
|
||||
embed_page = self._download_webpage(
|
||||
'https://daxab.com/player/%s?color=%s' % (player_hash, player_color),
|
||||
video_id, headers={'Referer': url})
|
||||
video_params = self._parse_json(
|
||||
self._search_regex(
|
||||
r'window\.globParams\s*=\s*({[\S\s]+})\s*;\s*<\/script>',
|
||||
embed_page, 'video parameters'),
|
||||
video_id, transform_source=js_to_json)
|
||||
|
||||
server_domain = 'https://%s' % compat_b64decode(video_params['server'][::-1]).decode('utf-8')
|
||||
formats = []
|
||||
for format_id, format_data in video_params['video']['cdn_files'].items():
|
||||
ext, height = format_id.split('_')
|
||||
extra_quality_data = format_data.split('.')[-1]
|
||||
url = f'{server_domain}/videos/{video_id.replace("_", "/")}/{height}.mp4?extra={extra_quality_data}'
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': url,
|
||||
'height': int_or_none(height),
|
||||
'ext': ext,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnail = try_get(video_params,
|
||||
lambda vi: 'https:' + compat_b64decode(vi['video']['thumb']).decode('utf-8'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'duration': duration,
|
||||
'thumbnail': thumbnail,
|
||||
'view_count': views,
|
||||
'age_limit': 18,
|
||||
}
|
||||
@@ -207,12 +207,10 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
video_id, playlist_id = self._match_valid_url(url).groups()
|
||||
|
||||
if playlist_id:
|
||||
if not self.get_param('noplaylist'):
|
||||
self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % playlist_id)
|
||||
if self._yes_playlist(playlist_id, video_id):
|
||||
return self.url_result(
|
||||
'http://www.dailymotion.com/playlist/' + playlist_id,
|
||||
'DailymotionPlaylist', playlist_id)
|
||||
self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
|
||||
|
||||
password = self.get_param('videopassword')
|
||||
media = self._call_api(
|
||||
|
||||
@@ -157,11 +157,8 @@ class DaumListIE(InfoExtractor):
|
||||
query_dict = parse_qs(url)
|
||||
if 'clipid' in query_dict:
|
||||
clip_id = query_dict['clipid'][0]
|
||||
if self.get_param('noplaylist'):
|
||||
self.to_screen('Downloading just video %s because of --no-playlist' % clip_id)
|
||||
if not self._yes_playlist(list_id, clip_id):
|
||||
return self.url_result(DaumClipIE._URL_TEMPLATE % clip_id, 'DaumClip')
|
||||
else:
|
||||
self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % list_id)
|
||||
|
||||
|
||||
class DaumPlaylistIE(DaumListIE):
|
||||
|
||||
143
yt_dlp/extractor/digitalconcerthall.py
Normal file
143
yt_dlp/extractor/digitalconcerthall.py
Normal file
@@ -0,0 +1,143 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
parse_resolution,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class DigitalConcertHallIE(InfoExtractor):
|
||||
IE_DESC = 'DigitalConcertHall extractor'
|
||||
_VALID_URL = r'https?://(?:www\.)?digitalconcerthall\.com/(?P<language>[a-z]+)/concert/(?P<id>[0-9]+)'
|
||||
_OAUTH_URL = 'https://api.digitalconcerthall.com/v2/oauth2/token'
|
||||
_ACCESS_TOKEN = None
|
||||
_NETRC_MACHINE = 'digitalconcerthall'
|
||||
_TESTS = [{
|
||||
'note': 'Playlist with only one video',
|
||||
'url': 'https://www.digitalconcerthall.com/en/concert/53201',
|
||||
'info_dict': {
|
||||
'id': '53201-1',
|
||||
'ext': 'mp4',
|
||||
'composer': 'Kurt Weill',
|
||||
'title': '[Magic Night]',
|
||||
'thumbnail': r're:^https?://images.digitalconcerthall.com/cms/thumbnails.*\.jpg$',
|
||||
'upload_date': '20210624',
|
||||
'timestamp': 1624548600,
|
||||
'duration': 2798,
|
||||
'album_artist': 'Members of the Berliner Philharmoniker / Simon Rössler',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'note': 'Concert with several works and an interview',
|
||||
'url': 'https://www.digitalconcerthall.com/en/concert/53785',
|
||||
'info_dict': {
|
||||
'id': '53785',
|
||||
'album_artist': 'Berliner Philharmoniker / Kirill Petrenko',
|
||||
'title': 'Kirill Petrenko conducts Mendelssohn and Shostakovich',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'playlist_count': 3,
|
||||
}]
|
||||
|
||||
def _login(self):
|
||||
username, password = self._get_login_info()
|
||||
if not username:
|
||||
self.raise_login_required()
|
||||
token_response = self._download_json(
|
||||
self._OAUTH_URL,
|
||||
None, 'Obtaining token', errnote='Unable to obtain token', data=urlencode_postdata({
|
||||
'affiliate': 'none',
|
||||
'grant_type': 'device',
|
||||
'device_vendor': 'unknown',
|
||||
'app_id': 'dch.webapp',
|
||||
'app_version': '1.0.0',
|
||||
'client_secret': '2ySLN+2Fwb',
|
||||
}), headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
})
|
||||
self._ACCESS_TOKEN = token_response['access_token']
|
||||
try:
|
||||
self._download_json(
|
||||
self._OAUTH_URL,
|
||||
None, note='Logging in', errnote='Unable to login', data=urlencode_postdata({
|
||||
'grant_type': 'password',
|
||||
'username': username,
|
||||
'password': password,
|
||||
}), headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
'Referer': 'https://www.digitalconcerthall.com',
|
||||
'Authorization': f'Bearer {self._ACCESS_TOKEN}'
|
||||
})
|
||||
except ExtractorError:
|
||||
self.raise_login_required(msg='Login info incorrect')
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
def _entries(self, items, language, **kwargs):
|
||||
for item in items:
|
||||
video_id = item['id']
|
||||
stream_info = self._download_json(
|
||||
self._proto_relative_url(item['_links']['streams']['href']), video_id, headers={
|
||||
'Accept': 'application/json',
|
||||
'Authorization': f'Bearer {self._ACCESS_TOKEN}',
|
||||
'Accept-Language': language
|
||||
})
|
||||
|
||||
m3u8_url = traverse_obj(
|
||||
stream_info, ('channel', lambda x: x.startswith('vod_mixed'), 'stream', 0, 'url'), get_all=False)
|
||||
formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', 'm3u8_native', fatal=False)
|
||||
self._sort_formats(formats)
|
||||
|
||||
yield {
|
||||
'id': video_id,
|
||||
'title': item.get('title'),
|
||||
'composer': item.get('name_composer'),
|
||||
'url': m3u8_url,
|
||||
'formats': formats,
|
||||
'duration': item.get('duration_total'),
|
||||
'timestamp': traverse_obj(item, ('date', 'published')),
|
||||
'description': item.get('short_description') or stream_info.get('short_description'),
|
||||
**kwargs,
|
||||
'chapters': [{
|
||||
'start_time': chapter.get('time'),
|
||||
'end_time': try_get(chapter, lambda x: x['time'] + x['duration']),
|
||||
'title': chapter.get('text'),
|
||||
} for chapter in item['cuepoints']] if item.get('cuepoints') else None,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
language, video_id = self._match_valid_url(url).group('language', 'id')
|
||||
if not language:
|
||||
language = 'en'
|
||||
|
||||
thumbnail_url = self._html_search_regex(
|
||||
r'(https?://images\.digitalconcerthall\.com/cms/thumbnails/.*\.jpg)',
|
||||
self._download_webpage(url, video_id), 'thumbnail')
|
||||
thumbnails = [{
|
||||
'url': thumbnail_url,
|
||||
**parse_resolution(thumbnail_url)
|
||||
}]
|
||||
|
||||
vid_info = self._download_json(
|
||||
f'https://api.digitalconcerthall.com/v2/concert/{video_id}', video_id, headers={
|
||||
'Accept': 'application/json',
|
||||
'Accept-Language': language
|
||||
})
|
||||
album_artist = ' / '.join(traverse_obj(vid_info, ('_links', 'artist', ..., 'name')) or '')
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': video_id,
|
||||
'title': vid_info.get('title'),
|
||||
'entries': self._entries(traverse_obj(vid_info, ('_embedded', ..., ...)), language,
|
||||
thumbnails=thumbnails, album_artist=album_artist),
|
||||
'thumbnails': thumbnails,
|
||||
'album_artist': album_artist,
|
||||
}
|
||||
@@ -74,13 +74,11 @@ class DigitallySpeakingIE(InfoExtractor):
|
||||
tbr = int_or_none(bitrate)
|
||||
vbr = int_or_none(self._search_regex(
|
||||
r'-(\d+)\.mp4', video_path, 'vbr', default=None))
|
||||
abr = tbr - vbr if tbr and vbr else None
|
||||
video_formats.append({
|
||||
'format_id': bitrate,
|
||||
'url': url,
|
||||
'tbr': tbr,
|
||||
'vbr': vbr,
|
||||
'abr': abr,
|
||||
})
|
||||
return video_formats
|
||||
|
||||
@@ -121,6 +119,7 @@ class DigitallySpeakingIE(InfoExtractor):
|
||||
video_formats = self._parse_mp4(metadata)
|
||||
if video_formats is None:
|
||||
video_formats = self._parse_flv(metadata)
|
||||
self._sort_formats(video_formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
||||
@@ -20,6 +20,16 @@ class DoodStreamIE(InfoExtractor):
|
||||
'description': 'Kat Wonders - Monthly May 2020 | DoodStream.com',
|
||||
'thumbnail': 'https://img.doodcdn.com/snaps/flyus84qgl2fsk4g.jpg',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://dood.watch/d/5s1wmbdacezb',
|
||||
'md5': '4568b83b31e13242b3f1ff96c55f0595',
|
||||
'info_dict': {
|
||||
'id': '5s1wmbdacezb',
|
||||
'ext': 'mp4',
|
||||
'title': 'Kat Wonders - Monthly May 2020',
|
||||
'description': 'Kat Wonders - Monthly May 2020 | DoodStream.com',
|
||||
'thumbnail': 'https://img.doodcdn.com/snaps/flyus84qgl2fsk4g.jpg',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://dood.to/d/jzrxn12t2s7n',
|
||||
'md5': '3207e199426eca7c2aa23c2872e6728a',
|
||||
@@ -34,31 +44,26 @@ class DoodStreamIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
url = f'https://dood.to/e/{video_id}'
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
if '/d/' in url:
|
||||
url = "https://dood.to" + self._html_search_regex(
|
||||
r'<iframe src="(/e/[a-z0-9]+)"', webpage, 'embed')
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_search_meta(['og:title', 'twitter:title'],
|
||||
webpage, default=None)
|
||||
thumb = self._html_search_meta(['og:image', 'twitter:image'],
|
||||
webpage, default=None)
|
||||
title = self._html_search_meta(['og:title', 'twitter:title'], webpage, default=None)
|
||||
thumb = self._html_search_meta(['og:image', 'twitter:image'], webpage, default=None)
|
||||
token = self._html_search_regex(r'[?&]token=([a-z0-9]+)[&\']', webpage, 'token')
|
||||
description = self._html_search_meta(
|
||||
['og:description', 'description', 'twitter:description'],
|
||||
webpage, default=None)
|
||||
auth_url = 'https://dood.to' + self._html_search_regex(
|
||||
r'(/pass_md5.*?)\'', webpage, 'pass_md5')
|
||||
['og:description', 'description', 'twitter:description'], webpage, default=None)
|
||||
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:53.0) Gecko/20100101 Firefox/66.0',
|
||||
'referer': url
|
||||
}
|
||||
|
||||
webpage = self._download_webpage(auth_url, video_id, headers=headers)
|
||||
final_url = webpage + ''.join([random.choice(string.ascii_letters + string.digits) for _ in range(10)]) + "?token=" + token + "&expiry=" + str(int(time.time() * 1000))
|
||||
pass_md5 = self._html_search_regex(r'(/pass_md5.*?)\'', webpage, 'pass_md5')
|
||||
final_url = ''.join((
|
||||
self._download_webpage(f'https://dood.to{pass_md5}', video_id, headers=headers),
|
||||
*(random.choice(string.ascii_letters + string.digits) for _ in range(10)),
|
||||
f'?token={token}&expiry={int(time.time() * 1000)}',
|
||||
))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
||||
@@ -347,7 +347,380 @@ class HGTVDeIE(DPlayBaseIE):
|
||||
url, display_id, 'eu1-prod.disco-api.com', 'hgtv', 'de')
|
||||
|
||||
|
||||
class DiscoveryPlusIE(DPlayBaseIE):
|
||||
class DiscoveryPlusBaseIE(DPlayBaseIE):
|
||||
def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
|
||||
headers['x-disco-client'] = f'WEB:UNKNOWN:{self._PRODUCT}:25.2.6'
|
||||
|
||||
def _download_video_playback_info(self, disco_base, video_id, headers):
|
||||
return self._download_json(
|
||||
disco_base + 'playback/v3/videoPlaybackInfo',
|
||||
video_id, headers=headers, data=json.dumps({
|
||||
'deviceInfo': {
|
||||
'adBlocker': False,
|
||||
},
|
||||
'videoId': video_id,
|
||||
'wisteriaProperties': {
|
||||
'platform': 'desktop',
|
||||
'product': self._PRODUCT,
|
||||
},
|
||||
}).encode('utf-8'))['data']['attributes']['streaming']
|
||||
|
||||
def _real_extract(self, url):
|
||||
return self._get_disco_api_info(url, self._match_id(url), **self._DISCO_API_PARAMS)
|
||||
|
||||
|
||||
class GoDiscoveryIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:go\.)?discovery\.com/video' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://go.discovery.com/video/dirty-jobs-discovery-atve-us/rodbuster-galvanizer',
|
||||
'info_dict': {
|
||||
'id': '4164906',
|
||||
'display_id': 'dirty-jobs-discovery-atve-us/rodbuster-galvanizer',
|
||||
'ext': 'mp4',
|
||||
'title': 'Rodbuster / Galvanizer',
|
||||
'description': 'Mike installs rebar with a team of rodbusters, then he galvanizes steel.',
|
||||
'season_number': 9,
|
||||
'episode_number': 1,
|
||||
},
|
||||
'skip': 'Available for Premium users',
|
||||
}, {
|
||||
'url': 'https://discovery.com/video/dirty-jobs-discovery-atve-us/rodbuster-galvanizer',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_PRODUCT = 'dsc'
|
||||
_DISCO_API_PARAMS = {
|
||||
'disco_host': 'us1-prod-direct.go.discovery.com',
|
||||
'realm': 'go',
|
||||
'country': 'us',
|
||||
}
|
||||
|
||||
|
||||
class TravelChannelIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:watch\.)?travelchannel\.com/video' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://watch.travelchannel.com/video/ghost-adventures-travel-channel/ghost-train-of-ely',
|
||||
'info_dict': {
|
||||
'id': '2220256',
|
||||
'display_id': 'ghost-adventures-travel-channel/ghost-train-of-ely',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ghost Train of Ely',
|
||||
'description': 'The crew investigates the dark history of the Nevada Northern Railway.',
|
||||
'season_number': 24,
|
||||
'episode_number': 1,
|
||||
},
|
||||
'skip': 'Available for Premium users',
|
||||
}, {
|
||||
'url': 'https://watch.travelchannel.com/video/ghost-adventures-travel-channel/ghost-train-of-ely',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_PRODUCT = 'trav'
|
||||
_DISCO_API_PARAMS = {
|
||||
'disco_host': 'us1-prod-direct.watch.travelchannel.com',
|
||||
'realm': 'go',
|
||||
'country': 'us',
|
||||
}
|
||||
|
||||
|
||||
class CookingChannelIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:watch\.)?cookingchanneltv\.com/video' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://watch.cookingchanneltv.com/video/carnival-eats-cooking-channel/the-postman-always-brings-rice-2348634',
|
||||
'info_dict': {
|
||||
'id': '2348634',
|
||||
'display_id': 'carnival-eats-cooking-channel/the-postman-always-brings-rice-2348634',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Postman Always Brings Rice',
|
||||
'description': 'Noah visits the Maui Fair and the Aurora Winter Festival in Vancouver.',
|
||||
'season_number': 9,
|
||||
'episode_number': 1,
|
||||
},
|
||||
'skip': 'Available for Premium users',
|
||||
}, {
|
||||
'url': 'https://watch.cookingchanneltv.com/video/carnival-eats-cooking-channel/the-postman-always-brings-rice-2348634',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_PRODUCT = 'cook'
|
||||
_DISCO_API_PARAMS = {
|
||||
'disco_host': 'us1-prod-direct.watch.cookingchanneltv.com',
|
||||
'realm': 'go',
|
||||
'country': 'us',
|
||||
}
|
||||
|
||||
|
||||
class HGTVUsaIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:watch\.)?hgtv\.com/video' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://watch.hgtv.com/video/home-inspector-joe-hgtv-atve-us/this-mold-house',
|
||||
'info_dict': {
|
||||
'id': '4289736',
|
||||
'display_id': 'home-inspector-joe-hgtv-atve-us/this-mold-house',
|
||||
'ext': 'mp4',
|
||||
'title': 'This Mold House',
|
||||
'description': 'Joe and Noel help take a familys dream home from hazardous to fabulous.',
|
||||
'season_number': 1,
|
||||
'episode_number': 1,
|
||||
},
|
||||
'skip': 'Available for Premium users',
|
||||
}, {
|
||||
'url': 'https://watch.hgtv.com/video/home-inspector-joe-hgtv-atve-us/this-mold-house',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_PRODUCT = 'hgtv'
|
||||
_DISCO_API_PARAMS = {
|
||||
'disco_host': 'us1-prod-direct.watch.hgtv.com',
|
||||
'realm': 'go',
|
||||
'country': 'us',
|
||||
}
|
||||
|
||||
|
||||
class FoodNetworkIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:watch\.)?foodnetwork\.com/video' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://watch.foodnetwork.com/video/kids-baking-championship-food-network/float-like-a-butterfly',
|
||||
'info_dict': {
|
||||
'id': '4116449',
|
||||
'display_id': 'kids-baking-championship-food-network/float-like-a-butterfly',
|
||||
'ext': 'mp4',
|
||||
'title': 'Float Like a Butterfly',
|
||||
'description': 'The 12 kid bakers create colorful carved butterfly cakes.',
|
||||
'season_number': 10,
|
||||
'episode_number': 1,
|
||||
},
|
||||
'skip': 'Available for Premium users',
|
||||
}, {
|
||||
'url': 'https://watch.foodnetwork.com/video/kids-baking-championship-food-network/float-like-a-butterfly',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_PRODUCT = 'food'
|
||||
_DISCO_API_PARAMS = {
|
||||
'disco_host': 'us1-prod-direct.watch.foodnetwork.com',
|
||||
'realm': 'go',
|
||||
'country': 'us',
|
||||
}
|
||||
|
||||
|
||||
class DestinationAmericaIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?destinationamerica\.com/video' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://www.destinationamerica.com/video/alaska-monsters-destination-america-atve-us/central-alaskas-bigfoot',
|
||||
'info_dict': {
|
||||
'id': '4210904',
|
||||
'display_id': 'alaska-monsters-destination-america-atve-us/central-alaskas-bigfoot',
|
||||
'ext': 'mp4',
|
||||
'title': 'Central Alaskas Bigfoot',
|
||||
'description': 'A team heads to central Alaska to investigate an aggressive Bigfoot.',
|
||||
'season_number': 1,
|
||||
'episode_number': 1,
|
||||
},
|
||||
'skip': 'Available for Premium users',
|
||||
}, {
|
||||
'url': 'https://www.destinationamerica.com/video/alaska-monsters-destination-america-atve-us/central-alaskas-bigfoot',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_PRODUCT = 'dam'
|
||||
_DISCO_API_PARAMS = {
|
||||
'disco_host': 'us1-prod-direct.destinationamerica.com',
|
||||
'realm': 'go',
|
||||
'country': 'us',
|
||||
}
|
||||
|
||||
|
||||
class InvestigationDiscoveryIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?investigationdiscovery\.com/video' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://www.investigationdiscovery.com/video/unmasked-investigation-discovery/the-killer-clown',
|
||||
'info_dict': {
|
||||
'id': '2139409',
|
||||
'display_id': 'unmasked-investigation-discovery/the-killer-clown',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Killer Clown',
|
||||
'description': 'A wealthy Florida woman is fatally shot in the face by a clown at her door.',
|
||||
'season_number': 1,
|
||||
'episode_number': 1,
|
||||
},
|
||||
'skip': 'Available for Premium users',
|
||||
}, {
|
||||
'url': 'https://www.investigationdiscovery.com/video/unmasked-investigation-discovery/the-killer-clown',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_PRODUCT = 'ids'
|
||||
_DISCO_API_PARAMS = {
|
||||
'disco_host': 'us1-prod-direct.investigationdiscovery.com',
|
||||
'realm': 'go',
|
||||
'country': 'us',
|
||||
}
|
||||
|
||||
|
||||
class AmHistoryChannelIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?ahctv\.com/video' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://www.ahctv.com/video/modern-sniper-ahc/army',
|
||||
'info_dict': {
|
||||
'id': '2309730',
|
||||
'display_id': 'modern-sniper-ahc/army',
|
||||
'ext': 'mp4',
|
||||
'title': 'Army',
|
||||
'description': 'Snipers today face challenges their predecessors couldve only dreamed of.',
|
||||
'season_number': 1,
|
||||
'episode_number': 1,
|
||||
},
|
||||
'skip': 'Available for Premium users',
|
||||
}, {
|
||||
'url': 'https://www.ahctv.com/video/modern-sniper-ahc/army',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_PRODUCT = 'ahc'
|
||||
_DISCO_API_PARAMS = {
|
||||
'disco_host': 'us1-prod-direct.ahctv.com',
|
||||
'realm': 'go',
|
||||
'country': 'us',
|
||||
}
|
||||
|
||||
|
||||
class ScienceChannelIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?sciencechannel\.com/video' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://www.sciencechannel.com/video/strangest-things-science-atve-us/nazi-mystery-machine',
|
||||
'info_dict': {
|
||||
'id': '2842849',
|
||||
'display_id': 'strangest-things-science-atve-us/nazi-mystery-machine',
|
||||
'ext': 'mp4',
|
||||
'title': 'Nazi Mystery Machine',
|
||||
'description': 'Experts investigate the secrets of a revolutionary encryption machine.',
|
||||
'season_number': 1,
|
||||
'episode_number': 1,
|
||||
},
|
||||
'skip': 'Available for Premium users',
|
||||
}, {
|
||||
'url': 'https://www.sciencechannel.com/video/strangest-things-science-atve-us/nazi-mystery-machine',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_PRODUCT = 'sci'
|
||||
_DISCO_API_PARAMS = {
|
||||
'disco_host': 'us1-prod-direct.sciencechannel.com',
|
||||
'realm': 'go',
|
||||
'country': 'us',
|
||||
}
|
||||
|
||||
|
||||
class DIYNetworkIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:watch\.)?diynetwork\.com/video' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://watch.diynetwork.com/video/pool-kings-diy-network/bringing-beach-life-to-texas',
|
||||
'info_dict': {
|
||||
'id': '2309730',
|
||||
'display_id': 'pool-kings-diy-network/bringing-beach-life-to-texas',
|
||||
'ext': 'mp4',
|
||||
'title': 'Bringing Beach Life to Texas',
|
||||
'description': 'The Pool Kings give a family a day at the beach in their own backyard.',
|
||||
'season_number': 10,
|
||||
'episode_number': 2,
|
||||
},
|
||||
'skip': 'Available for Premium users',
|
||||
}, {
|
||||
'url': 'https://watch.diynetwork.com/video/pool-kings-diy-network/bringing-beach-life-to-texas',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_PRODUCT = 'diy'
|
||||
_DISCO_API_PARAMS = {
|
||||
'disco_host': 'us1-prod-direct.watch.diynetwork.com',
|
||||
'realm': 'go',
|
||||
'country': 'us',
|
||||
}
|
||||
|
||||
|
||||
class DiscoveryLifeIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?discoverylife\.com/video' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://www.discoverylife.com/video/surviving-death-discovery-life-atve-us/bodily-trauma',
|
||||
'info_dict': {
|
||||
'id': '2218238',
|
||||
'display_id': 'surviving-death-discovery-life-atve-us/bodily-trauma',
|
||||
'ext': 'mp4',
|
||||
'title': 'Bodily Trauma',
|
||||
'description': 'Meet three people who tested the limits of the human body.',
|
||||
'season_number': 1,
|
||||
'episode_number': 2,
|
||||
},
|
||||
'skip': 'Available for Premium users',
|
||||
}, {
|
||||
'url': 'https://www.discoverylife.com/video/surviving-death-discovery-life-atve-us/bodily-trauma',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_PRODUCT = 'dlf'
|
||||
_DISCO_API_PARAMS = {
|
||||
'disco_host': 'us1-prod-direct.discoverylife.com',
|
||||
'realm': 'go',
|
||||
'country': 'us',
|
||||
}
|
||||
|
||||
|
||||
class AnimalPlanetIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?animalplanet\.com/video' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://www.animalplanet.com/video/north-woods-law-animal-planet/squirrel-showdown',
|
||||
'info_dict': {
|
||||
'id': '3338923',
|
||||
'display_id': 'north-woods-law-animal-planet/squirrel-showdown',
|
||||
'ext': 'mp4',
|
||||
'title': 'Squirrel Showdown',
|
||||
'description': 'A woman is suspected of being in possession of flying squirrel kits.',
|
||||
'season_number': 16,
|
||||
'episode_number': 11,
|
||||
},
|
||||
'skip': 'Available for Premium users',
|
||||
}, {
|
||||
'url': 'https://www.animalplanet.com/video/north-woods-law-animal-planet/squirrel-showdown',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_PRODUCT = 'apl'
|
||||
_DISCO_API_PARAMS = {
|
||||
'disco_host': 'us1-prod-direct.animalplanet.com',
|
||||
'realm': 'go',
|
||||
'country': 'us',
|
||||
}
|
||||
|
||||
|
||||
class TLCIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:go\.)?tlc\.com/video' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://go.tlc.com/video/my-600-lb-life-tlc/melissas-story-part-1',
|
||||
'info_dict': {
|
||||
'id': '2206540',
|
||||
'display_id': 'my-600-lb-life-tlc/melissas-story-part-1',
|
||||
'ext': 'mp4',
|
||||
'title': 'Melissas Story (Part 1)',
|
||||
'description': 'At 650 lbs, Melissa is ready to begin her seven-year weight loss journey.',
|
||||
'season_number': 1,
|
||||
'episode_number': 1,
|
||||
},
|
||||
'skip': 'Available for Premium users',
|
||||
}, {
|
||||
'url': 'https://go.tlc.com/video/my-600-lb-life-tlc/melissas-story-part-1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_PRODUCT = 'tlc'
|
||||
_DISCO_API_PARAMS = {
|
||||
'disco_host': 'us1-prod-direct.tlc.com',
|
||||
'realm': 'go',
|
||||
'country': 'us',
|
||||
}
|
||||
|
||||
|
||||
class DiscoveryPlusIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?discoveryplus\.com/(?!it/)(?:\w{2}/)?video' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://www.discoveryplus.com/video/property-brothers-forever-home/food-and-family',
|
||||
@@ -372,92 +745,14 @@ class DiscoveryPlusIE(DPlayBaseIE):
|
||||
}]
|
||||
|
||||
_PRODUCT = 'dplus_us'
|
||||
_API_URL = 'us1-prod-direct.discoveryplus.com'
|
||||
|
||||
def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
|
||||
headers['x-disco-client'] = f'WEB:UNKNOWN:{self._PRODUCT}:25.2.6'
|
||||
|
||||
def _download_video_playback_info(self, disco_base, video_id, headers):
|
||||
return self._download_json(
|
||||
disco_base + 'playback/v3/videoPlaybackInfo',
|
||||
video_id, headers=headers, data=json.dumps({
|
||||
'deviceInfo': {
|
||||
'adBlocker': False,
|
||||
},
|
||||
'videoId': video_id,
|
||||
'wisteriaProperties': {
|
||||
'platform': 'desktop',
|
||||
'product': self._PRODUCT,
|
||||
},
|
||||
}).encode('utf-8'))['data']['attributes']['streaming']
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
return self._get_disco_api_info(
|
||||
url, display_id, self._API_URL, 'go', 'us')
|
||||
_DISCO_API_PARAMS = {
|
||||
'disco_host': 'us1-prod-direct.discoveryplus.com',
|
||||
'realm': 'go',
|
||||
'country': 'us',
|
||||
}
|
||||
|
||||
|
||||
class ScienceChannelIE(DiscoveryPlusIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?sciencechannel\.com/video' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://www.sciencechannel.com/video/strangest-things-science-atve-us/nazi-mystery-machine',
|
||||
'info_dict': {
|
||||
'id': '2842849',
|
||||
'display_id': 'strangest-things-science-atve-us/nazi-mystery-machine',
|
||||
'ext': 'mp4',
|
||||
'title': 'Nazi Mystery Machine',
|
||||
'description': 'Experts investigate the secrets of a revolutionary encryption machine.',
|
||||
'season_number': 1,
|
||||
'episode_number': 1,
|
||||
},
|
||||
'skip': 'Available for Premium users',
|
||||
}]
|
||||
|
||||
_PRODUCT = 'sci'
|
||||
_API_URL = 'us1-prod-direct.sciencechannel.com'
|
||||
|
||||
|
||||
class DIYNetworkIE(DiscoveryPlusIE):
|
||||
_VALID_URL = r'https?://(?:watch\.)?diynetwork\.com/video' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://watch.diynetwork.com/video/pool-kings-diy-network/bringing-beach-life-to-texas',
|
||||
'info_dict': {
|
||||
'id': '2309730',
|
||||
'display_id': 'pool-kings-diy-network/bringing-beach-life-to-texas',
|
||||
'ext': 'mp4',
|
||||
'title': 'Bringing Beach Life to Texas',
|
||||
'description': 'The Pool Kings give a family a day at the beach in their own backyard.',
|
||||
'season_number': 10,
|
||||
'episode_number': 2,
|
||||
},
|
||||
'skip': 'Available for Premium users',
|
||||
}]
|
||||
|
||||
_PRODUCT = 'diy'
|
||||
_API_URL = 'us1-prod-direct.watch.diynetwork.com'
|
||||
|
||||
|
||||
class AnimalPlanetIE(DiscoveryPlusIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?animalplanet\.com/video' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://www.animalplanet.com/video/north-woods-law-animal-planet/squirrel-showdown',
|
||||
'info_dict': {
|
||||
'id': '3338923',
|
||||
'display_id': 'north-woods-law-animal-planet/squirrel-showdown',
|
||||
'ext': 'mp4',
|
||||
'title': 'Squirrel Showdown',
|
||||
'description': 'A woman is suspected of being in possession of flying squirrel kits.',
|
||||
'season_number': 16,
|
||||
'episode_number': 11,
|
||||
},
|
||||
'skip': 'Available for Premium users',
|
||||
}]
|
||||
|
||||
_PRODUCT = 'apl'
|
||||
_API_URL = 'us1-prod-direct.animalplanet.com'
|
||||
|
||||
|
||||
class DiscoveryPlusIndiaIE(DPlayBaseIE):
|
||||
class DiscoveryPlusIndiaIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?discoveryplus\.in/videos?' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://www.discoveryplus.in/videos/how-do-they-do-it/fugu-and-more?seasonId=8&type=EPISODE',
|
||||
@@ -467,41 +762,38 @@ class DiscoveryPlusIndiaIE(DPlayBaseIE):
|
||||
'display_id': 'how-do-they-do-it/fugu-and-more',
|
||||
'title': 'Fugu and More',
|
||||
'description': 'The Japanese catch, prepare and eat the deadliest fish on the planet.',
|
||||
'duration': 1319,
|
||||
'duration': 1319.32,
|
||||
'timestamp': 1582309800,
|
||||
'upload_date': '20200221',
|
||||
'series': 'How Do They Do It?',
|
||||
'season_number': 8,
|
||||
'episode_number': 2,
|
||||
'creator': 'Discovery Channel',
|
||||
'thumbnail': r're:https://.+\.jpeg',
|
||||
'episode': 'Episode 2',
|
||||
'season': 'Season 8',
|
||||
'tags': [],
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
}]
|
||||
|
||||
_PRODUCT = 'dplus-india'
|
||||
_DISCO_API_PARAMS = {
|
||||
'disco_host': 'ap2-prod-direct.discoveryplus.in',
|
||||
'realm': 'dplusindia',
|
||||
'country': 'in',
|
||||
'domain': 'https://www.discoveryplus.in/',
|
||||
}
|
||||
|
||||
def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
|
||||
headers.update({
|
||||
'x-disco-params': 'realm=%s' % realm,
|
||||
'x-disco-client': 'WEB:UNKNOWN:dplus-india:17.0.0',
|
||||
'x-disco-client': f'WEB:UNKNOWN:{self._PRODUCT}:17.0.0',
|
||||
'Authorization': self._get_auth(disco_base, display_id, realm),
|
||||
})
|
||||
|
||||
def _download_video_playback_info(self, disco_base, video_id, headers):
|
||||
return self._download_json(
|
||||
disco_base + 'playback/v3/videoPlaybackInfo',
|
||||
video_id, headers=headers, data=json.dumps({
|
||||
'deviceInfo': {
|
||||
'adBlocker': False,
|
||||
},
|
||||
'videoId': video_id,
|
||||
}).encode('utf-8'))['data']['attributes']['streaming']
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
return self._get_disco_api_info(
|
||||
url, display_id, 'ap2-prod-direct.discoveryplus.in', 'dplusindia', 'in', 'https://www.discoveryplus.in/')
|
||||
|
||||
|
||||
class DiscoveryNetworksDeIE(DPlayBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:tlc|dmax)\.de|dplay\.co\.uk)/(?:programme|show|sendungen)/(?P<programme>[^/]+)/(?:video/)?(?P<alternate_id>[^/]+)'
|
||||
@@ -515,6 +807,16 @@ class DiscoveryNetworksDeIE(DPlayBaseIE):
|
||||
'description': 'md5:61033c12b73286e409d99a41742ef608',
|
||||
'timestamp': 1554069600,
|
||||
'upload_date': '20190331',
|
||||
'creator': 'TLC',
|
||||
'season': 'Season 1',
|
||||
'series': 'Breaking Amish',
|
||||
'episode_number': 1,
|
||||
'tags': ['new york', 'großstadt', 'amische', 'landleben', 'modern', 'infos', 'tradition', 'herausforderung'],
|
||||
'display_id': 'breaking-amish/die-welt-da-drauen',
|
||||
'episode': 'Episode 1',
|
||||
'duration': 2625.024,
|
||||
'season_number': 1,
|
||||
'thumbnail': r're:https://.+\.jpg',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -575,16 +877,19 @@ class DiscoveryPlusShowBaseIE(DPlayBaseIE):
|
||||
return self.playlist_result(self._entries(show_name), playlist_id=show_name)
|
||||
|
||||
|
||||
class DiscoveryPlusItalyIE(InfoExtractor):
|
||||
class DiscoveryPlusItalyIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?discoveryplus\.com/it/video' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://www.discoveryplus.com/it/video/i-signori-della-neve/stagione-2-episodio-1-i-preparativi',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
return self.url_result(f'https://discoveryplus.it/video/{video_id}', DPlayIE.ie_key(), video_id)
|
||||
_PRODUCT = 'dplus_us'
|
||||
_DISCO_API_PARAMS = {
|
||||
'disco_host': 'eu1-prod-direct.discoveryplus.com',
|
||||
'realm': 'dplay',
|
||||
'country': 'it',
|
||||
}
|
||||
|
||||
|
||||
class DiscoveryPlusItalyShowIE(DiscoveryPlusShowBaseIE):
|
||||
|
||||
116
yt_dlp/extractor/drooble.py
Normal file
116
yt_dlp/extractor/drooble.py
Normal file
@@ -0,0 +1,116 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class DroobleIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)https?://drooble\.com/(?:
|
||||
(?:(?P<user>[^/]+)/)?(?P<kind>song|videos|music/albums)/(?P<id>\d+)|
|
||||
(?P<user_2>[^/]+)/(?P<kind_2>videos|music))
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://drooble.com/song/2858030',
|
||||
'md5': '5ffda90f61c7c318dc0c3df4179eb064',
|
||||
'info_dict': {
|
||||
'id': '2858030',
|
||||
'ext': 'mp3',
|
||||
'title': 'Skankocillin',
|
||||
'upload_date': '20200801',
|
||||
'timestamp': 1596241390,
|
||||
'uploader_id': '95894',
|
||||
'uploader': 'Bluebeat Shelter',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://drooble.com/karl340758/videos/2859183',
|
||||
'info_dict': {
|
||||
'id': 'J6QCQY_I5Tk',
|
||||
'ext': 'mp4',
|
||||
'title': 'Skankocillin',
|
||||
'uploader_id': 'UCrSRoI5vVyeYihtWEYua7rg',
|
||||
'description': 'md5:ffc0bd8ba383db5341a86a6cd7d9bcca',
|
||||
'upload_date': '20200731',
|
||||
'uploader': 'Bluebeat Shelter',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://drooble.com/karl340758/music/albums/2858031',
|
||||
'info_dict': {
|
||||
'id': '2858031',
|
||||
},
|
||||
'playlist_mincount': 8,
|
||||
}, {
|
||||
'url': 'https://drooble.com/karl340758/music',
|
||||
'info_dict': {
|
||||
'id': 'karl340758',
|
||||
},
|
||||
'playlist_mincount': 8,
|
||||
}, {
|
||||
'url': 'https://drooble.com/karl340758/videos',
|
||||
'info_dict': {
|
||||
'id': 'karl340758',
|
||||
},
|
||||
'playlist_mincount': 8,
|
||||
}]
|
||||
|
||||
def _call_api(self, method, video_id, data=None):
|
||||
response = self._download_json(
|
||||
f'https://drooble.com/api/dt/{method}', video_id, data=json.dumps(data).encode())
|
||||
if not response[0]:
|
||||
raise ExtractorError('Unable to download JSON metadata')
|
||||
return response[1]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = self._match_valid_url(url)
|
||||
user = mobj.group('user') or mobj.group('user_2')
|
||||
kind = mobj.group('kind') or mobj.group('kind_2')
|
||||
display_id = mobj.group('id') or user
|
||||
|
||||
if mobj.group('kind_2') == 'videos':
|
||||
data = {'from_user': display_id, 'album': -1, 'limit': 18, 'offset': 0, 'order': 'new2old', 'type': 'video'}
|
||||
elif kind in ('music/albums', 'music'):
|
||||
data = {'user': user, 'public_only': True, 'individual_limit': {'singles': 1, 'albums': 1, 'playlists': 1}}
|
||||
else:
|
||||
data = {'url_slug': display_id, 'children': 10, 'order': 'old2new'}
|
||||
|
||||
method = 'getMusicOverview' if kind in ('music/albums', 'music') else 'getElements'
|
||||
json_data = self._call_api(method, display_id, data=data)
|
||||
if kind in ('music/albums', 'music'):
|
||||
json_data = json_data['singles']['list']
|
||||
|
||||
entites = []
|
||||
for media in json_data:
|
||||
url = media.get('external_media_url') or media.get('link')
|
||||
if url.startswith('https://www.youtube.com'):
|
||||
entites.append({
|
||||
'_type': 'url',
|
||||
'url': url,
|
||||
'ie_key': 'Youtube'
|
||||
})
|
||||
continue
|
||||
is_audio = (media.get('type') or '').lower() == 'audio'
|
||||
entites.append({
|
||||
'url': url,
|
||||
'id': media['id'],
|
||||
'title': media['title'],
|
||||
'duration': int_or_none(media.get('duration')),
|
||||
'timestamp': int_or_none(media.get('timestamp')),
|
||||
'album': try_get(media, lambda x: x['album']['title']),
|
||||
'uploader': try_get(media, lambda x: x['creator']['display_name']),
|
||||
'uploader_id': try_get(media, lambda x: x['creator']['id']),
|
||||
'thumbnail': media.get('image_comment'),
|
||||
'like_count': int_or_none(media.get('likes')),
|
||||
'vcodec': 'none' if is_audio else None,
|
||||
'ext': 'mp3' if is_audio else None,
|
||||
})
|
||||
|
||||
if len(entites) > 1:
|
||||
return self.playlist_result(entites, display_id)
|
||||
|
||||
return entites[0]
|
||||
@@ -6,7 +6,12 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
from ..utils import url_basename
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
url_basename,
|
||||
)
|
||||
|
||||
|
||||
class DropboxIE(InfoExtractor):
|
||||
@@ -28,13 +33,44 @@ class DropboxIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
mobj = self._match_valid_url(url)
|
||||
video_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
fn = compat_urllib_parse_unquote(url_basename(url))
|
||||
title = os.path.splitext(fn)[0]
|
||||
video_url = re.sub(r'[?&]dl=0', '', url)
|
||||
video_url += ('?' if '?' not in video_url else '&') + 'dl=1'
|
||||
|
||||
password = self.get_param('videopassword')
|
||||
if (self._og_search_title(webpage) == 'Dropbox - Password Required'
|
||||
or 'Enter the password for this link' in webpage):
|
||||
|
||||
if password:
|
||||
content_id = self._search_regex(r'content_id=(.*?)["\']', webpage, 'content_id')
|
||||
payload = f'is_xhr=true&t={self._get_cookies("https://www.dropbox.com").get("t").value}&content_id={content_id}&password={password}&url={url}'
|
||||
response = self._download_json(
|
||||
'https://www.dropbox.com/sm/auth', video_id, 'POSTing video password', data=payload.encode('UTF-8'),
|
||||
headers={'content-type': 'application/x-www-form-urlencoded; charset=UTF-8'})
|
||||
|
||||
if response.get('status') != 'authed':
|
||||
raise ExtractorError('Authentication failed!', expected=True)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
elif self._get_cookies('https://dropbox.com').get('sm_auth'):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
else:
|
||||
raise ExtractorError('Password protected video, use --video-password <password>', expected=True)
|
||||
|
||||
json_string = self._html_search_regex(r'InitReact\.mountComponent.+ "props":(.+), "elem_id"', webpage, 'Info JSON')
|
||||
info_json = self._parse_json(json_string, video_id)
|
||||
transcode_url = traverse_obj(info_json, ((None, 'preview'), 'file', 'preview', 'content', 'transcode_url'), get_all=False)
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(transcode_url, video_id)
|
||||
|
||||
# downloads enabled we can get the original file
|
||||
if 'anonymous' in (try_get(info_json, lambda x: x['sharePermission']['canDownloadRoles']) or []):
|
||||
video_url = re.sub(r'[?&]dl=0', '', url)
|
||||
video_url += ('?' if '?' not in video_url else '&') + 'dl=1'
|
||||
formats.append({'url': video_url, 'format_id': 'original', 'format_note': 'Original', 'quality': 1})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'url': video_url,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles
|
||||
}
|
||||
|
||||
@@ -7,13 +7,11 @@ import re
|
||||
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..aes import aes_cbc_decrypt
|
||||
from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
from ..utils import (
|
||||
bytes_to_intlist,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
intlist_to_bytes,
|
||||
float_or_none,
|
||||
mimetype2ext,
|
||||
str_or_none,
|
||||
@@ -191,13 +189,11 @@ class DRTVIE(InfoExtractor):
|
||||
def decrypt_uri(e):
|
||||
n = int(e[2:10], 16)
|
||||
a = e[10 + n:]
|
||||
data = bytes_to_intlist(hex_to_bytes(e[10:10 + n]))
|
||||
key = bytes_to_intlist(hashlib.sha256(
|
||||
('%s:sRBzYNXBzkKgnjj8pGtkACch' % a).encode('utf-8')).digest())
|
||||
iv = bytes_to_intlist(hex_to_bytes(a))
|
||||
decrypted = aes_cbc_decrypt(data, key, iv)
|
||||
return intlist_to_bytes(
|
||||
decrypted[:-decrypted[-1]]).decode('utf-8').split('?')[0]
|
||||
data = hex_to_bytes(e[10:10 + n])
|
||||
key = hashlib.sha256(('%s:sRBzYNXBzkKgnjj8pGtkACch' % a).encode('utf-8')).digest()
|
||||
iv = hex_to_bytes(a)
|
||||
decrypted = unpad_pkcs7(aes_cbc_decrypt_bytes(data, key, iv))
|
||||
return decrypted.decode('utf-8').split('?')[0]
|
||||
|
||||
for asset in assets:
|
||||
kind = asset.get('Kind')
|
||||
|
||||
316
yt_dlp/extractor/ertgr.py
Normal file
316
yt_dlp/extractor/ertgr.py
Normal file
@@ -0,0 +1,316 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
dict_get,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
parse_qs,
|
||||
parse_age_limit,
|
||||
parse_iso8601,
|
||||
str_or_none,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
url_or_none,
|
||||
variadic,
|
||||
)
|
||||
|
||||
|
||||
class ERTFlixBaseIE(InfoExtractor):
|
||||
def _call_api(
|
||||
self, video_id, method='Player/AcquireContent', api_version=1,
|
||||
param_headers=None, data=None, headers=None, **params):
|
||||
platform_codename = {'platformCodename': 'www'}
|
||||
headers_as_param = {'X-Api-Date-Format': 'iso', 'X-Api-Camel-Case': False}
|
||||
headers_as_param.update(param_headers or {})
|
||||
headers = headers or {}
|
||||
if data:
|
||||
headers['Content-Type'] = headers_as_param['Content-Type'] = 'application/json;charset=utf-8'
|
||||
data = json.dumps(merge_dicts(platform_codename, data)).encode('utf-8')
|
||||
query = merge_dicts(
|
||||
{} if data else platform_codename,
|
||||
{'$headers': json.dumps(headers_as_param)},
|
||||
params)
|
||||
response = self._download_json(
|
||||
'https://api.app.ertflix.gr/v%s/%s' % (str(api_version), method),
|
||||
video_id, fatal=False, query=query, data=data, headers=headers)
|
||||
if try_get(response, lambda x: x['Result']['Success']) is True:
|
||||
return response
|
||||
|
||||
def _call_api_get_tiles(self, video_id, *tile_ids):
|
||||
requested_tile_ids = [video_id] + list(tile_ids)
|
||||
requested_tiles = [{'Id': tile_id} for tile_id in requested_tile_ids]
|
||||
tiles_response = self._call_api(
|
||||
video_id, method='Tile/GetTiles', api_version=2,
|
||||
data={'RequestedTiles': requested_tiles})
|
||||
tiles = try_get(tiles_response, lambda x: x['Tiles'], list) or []
|
||||
if tile_ids:
|
||||
if sorted([tile['Id'] for tile in tiles]) != sorted(requested_tile_ids):
|
||||
raise ExtractorError('Requested tiles not found', video_id=video_id)
|
||||
return tiles
|
||||
try:
|
||||
return next(tile for tile in tiles if tile['Id'] == video_id)
|
||||
except StopIteration:
|
||||
raise ExtractorError('No matching tile found', video_id=video_id)
|
||||
|
||||
|
||||
class ERTFlixCodenameIE(ERTFlixBaseIE):
|
||||
IE_NAME = 'ertflix:codename'
|
||||
IE_DESC = 'ERTFLIX videos by codename'
|
||||
_VALID_URL = r'ertflix:(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'ertflix:monogramma-praxitelis-tzanoylinos',
|
||||
'md5': '5b9c2cd171f09126167e4082fc1dd0ef',
|
||||
'info_dict': {
|
||||
'id': 'monogramma-praxitelis-tzanoylinos',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:ef0b439902963d56c43ac83c3f41dd0e',
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def _extract_formats_and_subs(self, video_id, allow_none=True):
|
||||
media_info = self._call_api(video_id, codename=video_id)
|
||||
formats, subs = [], {}
|
||||
for media_file in try_get(media_info, lambda x: x['MediaFiles'], list) or []:
|
||||
for media in try_get(media_file, lambda x: x['Formats'], list) or []:
|
||||
fmt_url = url_or_none(try_get(media, lambda x: x['Url']))
|
||||
if not fmt_url:
|
||||
continue
|
||||
ext = determine_ext(fmt_url)
|
||||
if ext == 'm3u8':
|
||||
formats_, subs_ = self._extract_m3u8_formats_and_subtitles(
|
||||
fmt_url, video_id, m3u8_id='hls', ext='mp4', fatal=False)
|
||||
elif ext == 'mpd':
|
||||
formats_, subs_ = self._extract_mpd_formats_and_subtitles(
|
||||
fmt_url, video_id, mpd_id='dash', fatal=False)
|
||||
else:
|
||||
formats.append({
|
||||
'url': fmt_url,
|
||||
'format_id': str_or_none(media.get('Id')),
|
||||
})
|
||||
continue
|
||||
formats.extend(formats_)
|
||||
self._merge_subtitles(subs_, target=subs)
|
||||
|
||||
if formats or not allow_none:
|
||||
self._sort_formats(formats)
|
||||
return formats, subs
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
formats, subs = self._extract_formats_and_subs(video_id)
|
||||
|
||||
if formats:
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subs,
|
||||
'title': self._generic_title(url),
|
||||
}
|
||||
|
||||
|
||||
class ERTFlixIE(ERTFlixBaseIE):
|
||||
IE_NAME = 'ertflix'
|
||||
IE_DESC = 'ERTFLIX videos'
|
||||
_VALID_URL = r'https?://www\.ertflix\.gr/(?:series|vod)/(?P<id>[a-z]{3}\.\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.ertflix.gr/vod/vod.173258-aoratoi-ergates',
|
||||
'md5': '6479d5e60fd7e520b07ba5411dcdd6e7',
|
||||
'info_dict': {
|
||||
'id': 'aoratoi-ergates',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:c1433d598fbba0211b0069021517f8b4',
|
||||
'description': 'md5:01a64d113c31957eb7eb07719ab18ff4',
|
||||
'thumbnail': r're:https?://.+\.jpg',
|
||||
'episode_id': 'vod.173258',
|
||||
'timestamp': 1639648800,
|
||||
'upload_date': '20211216',
|
||||
'duration': 3166,
|
||||
'age_limit': 8,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.ertflix.gr/series/ser.3448-monogramma',
|
||||
'info_dict': {
|
||||
'id': 'ser.3448',
|
||||
'age_limit': 8,
|
||||
'description': 'Η εκπομπή σαράντα ετών που σημάδεψε τον πολιτισμό μας.',
|
||||
'title': 'Μονόγραμμα',
|
||||
},
|
||||
'playlist_mincount': 64,
|
||||
}, {
|
||||
'url': 'https://www.ertflix.gr/series/ser.3448-monogramma?season=1',
|
||||
'info_dict': {
|
||||
'id': 'ser.3448',
|
||||
'age_limit': 8,
|
||||
'description': 'Η εκπομπή σαράντα ετών που σημάδεψε τον πολιτισμό μας.',
|
||||
'title': 'Μονόγραμμα',
|
||||
},
|
||||
'playlist_count': 22,
|
||||
}, {
|
||||
'url': 'https://www.ertflix.gr/series/ser.3448-monogramma?season=1&season=2021%20-%202022',
|
||||
'info_dict': {
|
||||
'id': 'ser.3448',
|
||||
'age_limit': 8,
|
||||
'description': 'Η εκπομπή σαράντα ετών που σημάδεψε τον πολιτισμό μας.',
|
||||
'title': 'Μονόγραμμα',
|
||||
},
|
||||
'playlist_mincount': 36,
|
||||
}, {
|
||||
'url': 'https://www.ertflix.gr/series/ser.164991-to-diktuo-1?season=1-9',
|
||||
'info_dict': {
|
||||
'id': 'ser.164991',
|
||||
'age_limit': 8,
|
||||
'description': 'Η πρώτη ελληνική εκπομπή με θεματολογία αποκλειστικά γύρω από το ίντερνετ.',
|
||||
'title': 'Το δίκτυο',
|
||||
},
|
||||
'playlist_mincount': 9,
|
||||
}]
|
||||
|
||||
def _extract_episode(self, episode):
|
||||
codename = try_get(episode, lambda x: x['Codename'], compat_str)
|
||||
title = episode.get('Title')
|
||||
description = clean_html(dict_get(episode, ('ShortDescription', 'TinyDescription', )))
|
||||
if not codename or not title or not episode.get('HasPlayableStream', True):
|
||||
return
|
||||
thumbnail = next((
|
||||
url_or_none(thumb.get('Url'))
|
||||
for thumb in variadic(dict_get(episode, ('Images', 'Image')) or {})
|
||||
if thumb.get('IsMain')),
|
||||
None)
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'thumbnail': thumbnail,
|
||||
'id': codename,
|
||||
'episode_id': episode.get('Id'),
|
||||
'title': title,
|
||||
'alt_title': episode.get('Subtitle'),
|
||||
'description': description,
|
||||
'timestamp': parse_iso8601(episode.get('PublishDate')),
|
||||
'duration': episode.get('DurationSeconds'),
|
||||
'age_limit': self._parse_age_rating(episode),
|
||||
'url': 'ertflix:%s' % (codename, ),
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _parse_age_rating(info_dict):
|
||||
return parse_age_limit(
|
||||
info_dict.get('AgeRating')
|
||||
or (info_dict.get('IsAdultContent') and 18)
|
||||
or (info_dict.get('IsKidsContent') and 0))
|
||||
|
||||
def _extract_series(self, video_id, season_titles=None, season_numbers=None):
|
||||
media_info = self._call_api(video_id, method='Tile/GetSeriesDetails', id=video_id)
|
||||
|
||||
series = try_get(media_info, lambda x: x['Series'], dict) or {}
|
||||
series_info = {
|
||||
'age_limit': self._parse_age_rating(series),
|
||||
'title': series.get('Title'),
|
||||
'description': dict_get(series, ('ShortDescription', 'TinyDescription', )),
|
||||
}
|
||||
if season_numbers:
|
||||
season_titles = season_titles or []
|
||||
for season in try_get(series, lambda x: x['Seasons'], list) or []:
|
||||
if season.get('SeasonNumber') in season_numbers and season.get('Title'):
|
||||
season_titles.append(season['Title'])
|
||||
|
||||
def gen_episode(m_info, season_titles):
|
||||
for episode_group in try_get(m_info, lambda x: x['EpisodeGroups'], list) or []:
|
||||
if season_titles and episode_group.get('Title') not in season_titles:
|
||||
continue
|
||||
episodes = try_get(episode_group, lambda x: x['Episodes'], list)
|
||||
if not episodes:
|
||||
continue
|
||||
season_info = {
|
||||
'season': episode_group.get('Title'),
|
||||
'season_number': int_or_none(episode_group.get('SeasonNumber')),
|
||||
}
|
||||
try:
|
||||
episodes = [(int(ep['EpisodeNumber']), ep) for ep in episodes]
|
||||
episodes.sort()
|
||||
except (KeyError, ValueError):
|
||||
episodes = enumerate(episodes, 1)
|
||||
for n, episode in episodes:
|
||||
info = self._extract_episode(episode)
|
||||
if info is None:
|
||||
continue
|
||||
info['episode_number'] = n
|
||||
info.update(season_info)
|
||||
yield info
|
||||
|
||||
return self.playlist_result(
|
||||
gen_episode(media_info, season_titles), playlist_id=video_id, **series_info)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
if video_id.startswith('ser.'):
|
||||
param_season = parse_qs(url).get('season', [None])
|
||||
param_season = [
|
||||
(have_number, int_or_none(v) if have_number else str_or_none(v))
|
||||
for have_number, v in
|
||||
[(int_or_none(ps) is not None, ps) for ps in param_season]
|
||||
if v is not None
|
||||
]
|
||||
season_kwargs = {
|
||||
k: [v for is_num, v in param_season if is_num is c] or None
|
||||
for k, c in
|
||||
[('season_titles', False), ('season_numbers', True)]
|
||||
}
|
||||
return self._extract_series(video_id, **season_kwargs)
|
||||
|
||||
return self._extract_episode(self._call_api_get_tiles(video_id))
|
||||
|
||||
|
||||
class ERTWebtvEmbedIE(InfoExtractor):
|
||||
IE_NAME = 'ertwebtv:embed'
|
||||
IE_DESC = 'ert.gr webtv embedded videos'
|
||||
_BASE_PLAYER_URL_RE = re.escape('//www.ert.gr/webtv/live-uni/vod/dt-uni-vod.php')
|
||||
_VALID_URL = rf'https?:{_BASE_PLAYER_URL_RE}\?([^#]+&)?f=(?P<id>[^#&]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.ert.gr/webtv/live-uni/vod/dt-uni-vod.php?f=trailers/E2251_TO_DIKTYO_E09_16-01_1900.mp4&bgimg=/photos/2022/1/to_diktio_ep09_i_istoria_tou_diadiktiou_stin_Ellada_1021x576.jpg',
|
||||
'md5': 'f9e9900c25c26f4ecfbddbb4b6305854',
|
||||
'info_dict': {
|
||||
'id': 'trailers/E2251_TO_DIKTYO_E09_16-01_1900.mp4',
|
||||
'title': 'md5:914f06a73cd8b62fbcd6fb90c636e497',
|
||||
'ext': 'mp4',
|
||||
'thumbnail': 'https://program.ert.gr/photos/2022/1/to_diktio_ep09_i_istoria_tou_diadiktiou_stin_Ellada_1021x576.jpg'
|
||||
},
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def _extract_urls(cls, webpage):
|
||||
EMBED_URL_RE = rf'(?:https?:)?{cls._BASE_PLAYER_URL_RE}\?(?:(?!(?P=_q1)).)+'
|
||||
EMBED_RE = rf'<iframe[^>]+?src=(?P<_q1>["\'])(?P<url>{EMBED_URL_RE})(?P=_q1)'
|
||||
|
||||
for mobj in re.finditer(EMBED_RE, webpage):
|
||||
url = unescapeHTML(mobj.group('url'))
|
||||
if not cls.suitable(url):
|
||||
continue
|
||||
yield url
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
formats, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
f'https://mediastream.ert.gr/vodedge/_definst_/mp4:dvrorigin/{video_id}/playlist.m3u8',
|
||||
video_id, 'mp4')
|
||||
self._sort_formats(formats)
|
||||
thumbnail_id = parse_qs(url).get('bgimg', [None])[0]
|
||||
if thumbnail_id and not thumbnail_id.startswith('http'):
|
||||
thumbnail_id = f'https://program.ert.gr{thumbnail_id}'
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': f'VOD - {video_id}',
|
||||
'thumbnail': thumbnail_id,
|
||||
'formats': formats,
|
||||
'subtitles': subs,
|
||||
}
|
||||
37
yt_dlp/extractor/europeantour.py
Normal file
37
yt_dlp/extractor/europeantour.py
Normal file
@@ -0,0 +1,37 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class EuropeanTourIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?europeantour\.com/dpworld-tour/news/video/(?P<id>[^/&?#$]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.europeantour.com/dpworld-tour/news/video/the-best-shots-of-the-2021-seasons/',
|
||||
'info_dict': {
|
||||
'id': '6287788195001',
|
||||
'ext': 'mp4',
|
||||
'title': 'The best shots of the 2021 seasons',
|
||||
'duration': 2416.512,
|
||||
'timestamp': 1640010141,
|
||||
'uploader_id': '5136026580001',
|
||||
'tags': ['prod-imported'],
|
||||
'thumbnail': 'md5:fdac52bc826548860edf8145ee74e71a',
|
||||
'upload_date': '20211220'
|
||||
},
|
||||
'params': {'skip_download': True}
|
||||
}]
|
||||
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s'
|
||||
|
||||
def _real_extract(self, url):
|
||||
id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, id)
|
||||
vid, aid = re.search(r'(?s)brightcove-player\s?video-id="([^"]+)".*"ACCOUNT_ID":"([^"]+)"', webpage).groups()
|
||||
if not aid:
|
||||
aid = '5136026580001'
|
||||
return self.url_result(
|
||||
self.BRIGHTCOVE_URL_TEMPLATE % (aid, vid), 'BrightcoveNew')
|
||||
@@ -37,7 +37,10 @@ from .aenetworks import (
|
||||
HistoryPlayerIE,
|
||||
BiographyIE,
|
||||
)
|
||||
from .afreecatv import AfreecaTVIE
|
||||
from .afreecatv import (
|
||||
AfreecaTVIE,
|
||||
AfreecaTVLiveIE,
|
||||
)
|
||||
from .airmozilla import AirMozillaIE
|
||||
from .aljazeera import AlJazeeraIE
|
||||
from .alphaporno import AlphaPornoIE
|
||||
@@ -190,6 +193,7 @@ from .buzzfeed import BuzzFeedIE
|
||||
from .byutv import BYUtvIE
|
||||
from .c56 import C56IE
|
||||
from .cableav import CableAVIE
|
||||
from .callin import CallinIE
|
||||
from .cam4 import CAM4IE
|
||||
from .camdemy import (
|
||||
CamdemyIE,
|
||||
@@ -300,6 +304,10 @@ from .cozytv import CozyTVIE
|
||||
from .cracked import CrackedIE
|
||||
from .crackle import CrackleIE
|
||||
from .crooksandliars import CrooksAndLiarsIE
|
||||
from .crowdbunker import (
|
||||
CrowdBunkerIE,
|
||||
CrowdBunkerChannelIE,
|
||||
)
|
||||
from .crunchyroll import (
|
||||
CrunchyrollIE,
|
||||
CrunchyrollShowPlaylistIE,
|
||||
@@ -317,6 +325,7 @@ from .curiositystream import (
|
||||
CuriosityStreamSeriesIE,
|
||||
)
|
||||
from .cwtv import CWTVIE
|
||||
from .daftsex import DaftsexIE
|
||||
from .dailymail import DailyMailIE
|
||||
from .dailymotion import (
|
||||
DailymotionIE,
|
||||
@@ -352,9 +361,19 @@ from .dplay import (
|
||||
DPlayIE,
|
||||
DiscoveryPlusIE,
|
||||
HGTVDeIE,
|
||||
GoDiscoveryIE,
|
||||
TravelChannelIE,
|
||||
CookingChannelIE,
|
||||
HGTVUsaIE,
|
||||
FoodNetworkIE,
|
||||
InvestigationDiscoveryIE,
|
||||
DestinationAmericaIE,
|
||||
AmHistoryChannelIE,
|
||||
ScienceChannelIE,
|
||||
DIYNetworkIE,
|
||||
DiscoveryLifeIE,
|
||||
AnimalPlanetIE,
|
||||
TLCIE,
|
||||
DiscoveryPlusIndiaIE,
|
||||
DiscoveryNetworksDeIE,
|
||||
DiscoveryPlusItalyIE,
|
||||
@@ -376,12 +395,8 @@ from .duboku import (
|
||||
)
|
||||
from .dumpert import DumpertIE
|
||||
from .defense import DefenseGouvFrIE
|
||||
from .digitalconcerthall import DigitalConcertHallIE
|
||||
from .discovery import DiscoveryIE
|
||||
from .discoverygo import (
|
||||
DiscoveryGoIE,
|
||||
DiscoveryGoPlaylistIE,
|
||||
)
|
||||
from .discoveryvr import DiscoveryVRIE
|
||||
from .disney import DisneyIE
|
||||
from .dispeak import DigitallySpeakingIE
|
||||
from .doodstream import DoodStreamIE
|
||||
@@ -423,6 +438,11 @@ from .eroprofile import (
|
||||
EroProfileIE,
|
||||
EroProfileAlbumIE,
|
||||
)
|
||||
from .ertgr import (
|
||||
ERTFlixCodenameIE,
|
||||
ERTFlixIE,
|
||||
ERTWebtvEmbedIE,
|
||||
)
|
||||
from .escapist import EscapistIE
|
||||
from .espn import (
|
||||
ESPNIE,
|
||||
@@ -432,6 +452,7 @@ from .espn import (
|
||||
)
|
||||
from .esri import EsriVideoIE
|
||||
from .europa import EuropaIE
|
||||
from .europeantour import EuropeanTourIE
|
||||
from .euscreen import EUScreenIE
|
||||
from .expotv import ExpoTVIE
|
||||
from .expressen import ExpressenIE
|
||||
@@ -621,6 +642,7 @@ from .instagram import (
|
||||
InstagramIOSIE,
|
||||
InstagramUserIE,
|
||||
InstagramTagIE,
|
||||
InstagramStoryIE,
|
||||
)
|
||||
from .internazionale import InternazionaleIE
|
||||
from .internetvideoarchive import InternetVideoArchiveIE
|
||||
@@ -628,7 +650,11 @@ from .iprima import (
|
||||
IPrimaIE,
|
||||
IPrimaCNNIE
|
||||
)
|
||||
from .iqiyi import IqiyiIE
|
||||
from .iqiyi import (
|
||||
IqiyiIE,
|
||||
IqIE,
|
||||
IqAlbumIE
|
||||
)
|
||||
from .ir90tv import Ir90TvIE
|
||||
from .itv import (
|
||||
ITVIE,
|
||||
@@ -655,6 +681,7 @@ from .kankan import KankanIE
|
||||
from .karaoketv import KaraoketvIE
|
||||
from .karrierevideos import KarriereVideosIE
|
||||
from .keezmovies import KeezMoviesIE
|
||||
from .kelbyone import KelbyOneIE
|
||||
from .ketnet import KetnetIE
|
||||
from .khanacademy import (
|
||||
KhanAcademyIE,
|
||||
@@ -722,7 +749,6 @@ from .limelight import (
|
||||
LimelightChannelListIE,
|
||||
)
|
||||
from .line import (
|
||||
LineTVIE,
|
||||
LineLiveIE,
|
||||
LineLiveChannelIE,
|
||||
)
|
||||
@@ -739,7 +765,10 @@ from .livestream import (
|
||||
LivestreamOriginalIE,
|
||||
LivestreamShortenerIE,
|
||||
)
|
||||
from .lnkgo import LnkGoIE
|
||||
from .lnkgo import (
|
||||
LnkGoIE,
|
||||
LnkIE,
|
||||
)
|
||||
from .localnews8 import LocalNews8IE
|
||||
from .lovehomeporn import LoveHomePornIE
|
||||
from .lrt import LRTIE
|
||||
@@ -754,6 +783,7 @@ from .mailru import (
|
||||
MailRuMusicIE,
|
||||
MailRuMusicSearchIE,
|
||||
)
|
||||
from .mainstreaming import MainStreamingIE
|
||||
from .malltv import MallTVIE
|
||||
from .mangomolo import (
|
||||
MangomoloVideoIE,
|
||||
@@ -819,7 +849,10 @@ from .mirrativ import (
|
||||
)
|
||||
from .mit import TechTVMITIE, OCWMITIE
|
||||
from .mitele import MiTeleIE
|
||||
from .mixch import MixchIE
|
||||
from .mixch import (
|
||||
MixchIE,
|
||||
MixchArchiveIE,
|
||||
)
|
||||
from .mixcloud import (
|
||||
MixcloudIE,
|
||||
MixcloudUserIE,
|
||||
@@ -858,6 +891,12 @@ from .mtv import (
|
||||
)
|
||||
from .muenchentv import MuenchenTVIE
|
||||
from .musescore import MuseScoreIE
|
||||
from .musicdex import (
|
||||
MusicdexSongIE,
|
||||
MusicdexAlbumIE,
|
||||
MusicdexArtistIE,
|
||||
MusicdexPlaylistIE,
|
||||
)
|
||||
from .mwave import MwaveIE, MwaveMeetGreetIE
|
||||
from .mxplayer import (
|
||||
MxplayerIE,
|
||||
@@ -934,6 +973,7 @@ from .newgrounds import (
|
||||
NewgroundsUserIE,
|
||||
)
|
||||
from .newstube import NewstubeIE
|
||||
from .newsy import NewsyIE
|
||||
from .nextmedia import (
|
||||
NextMediaIE,
|
||||
NextMediaActionNewsIE,
|
||||
@@ -981,6 +1021,7 @@ from .nitter import NitterIE
|
||||
from .njpwworld import NJPWWorldIE
|
||||
from .nobelprize import NobelPrizeIE
|
||||
from .nonktube import NonkTubeIE
|
||||
from .noodlemagazine import NoodleMagazineIE
|
||||
from .noovo import NoovoIE
|
||||
from .normalboots import NormalbootsIE
|
||||
from .nosvideo import NosVideoIE
|
||||
@@ -1056,6 +1097,7 @@ from .opencast import (
|
||||
from .openrec import (
|
||||
OpenRecIE,
|
||||
OpenRecCaptureIE,
|
||||
OpenRecMovieIE,
|
||||
)
|
||||
from .ora import OraTVIE
|
||||
from .orf import (
|
||||
@@ -1153,6 +1195,10 @@ from .pokemon import (
|
||||
PokemonIE,
|
||||
PokemonWatchIE,
|
||||
)
|
||||
from .pokergo import (
|
||||
PokerGoIE,
|
||||
PokerGoCollectionIE,
|
||||
)
|
||||
from .polsatgo import PolsatGoIE
|
||||
from .polskieradio import (
|
||||
PolskieRadioIE,
|
||||
@@ -1178,6 +1224,7 @@ from .pornhub import (
|
||||
from .pornotube import PornotubeIE
|
||||
from .pornovoisines import PornoVoisinesIE
|
||||
from .pornoxo import PornoXOIE
|
||||
from .pornez import PornezIE
|
||||
from .puhutv import (
|
||||
PuhuTVIE,
|
||||
PuhuTVSerieIE,
|
||||
@@ -1185,6 +1232,13 @@ from .puhutv import (
|
||||
from .presstv import PressTVIE
|
||||
from .projectveritas import ProjectVeritasIE
|
||||
from .prosiebensat1 import ProSiebenSat1IE
|
||||
from .prx import (
|
||||
PRXStoryIE,
|
||||
PRXSeriesIE,
|
||||
PRXAccountIE,
|
||||
PRXStoriesSearchIE,
|
||||
PRXSeriesSearchIE
|
||||
)
|
||||
from .puls4 import Puls4IE
|
||||
from .pyvideo import PyvideoIE
|
||||
from .qqmusic import (
|
||||
@@ -1221,9 +1275,10 @@ from .rai import (
|
||||
RaiPlayIE,
|
||||
RaiPlayLiveIE,
|
||||
RaiPlayPlaylistIE,
|
||||
RaiPlaySoundIE,
|
||||
RaiPlaySoundLiveIE,
|
||||
RaiPlaySoundPlaylistIE,
|
||||
RaiIE,
|
||||
RaiPlayRadioIE,
|
||||
RaiPlayRadioPlaylistIE,
|
||||
)
|
||||
from .raywenderlich import (
|
||||
RayWenderlichIE,
|
||||
@@ -1278,6 +1333,12 @@ from .rtl2 import (
|
||||
RTL2YouIE,
|
||||
RTL2YouSeriesIE,
|
||||
)
|
||||
from .rtnews import (
|
||||
RTNewsIE,
|
||||
RTDocumentryIE,
|
||||
RTDocumentryPlaylistIE,
|
||||
RuptlyIE,
|
||||
)
|
||||
from .rtp import RTPIE
|
||||
from .rtrfm import RTRFMIE
|
||||
from .rts import RTSIE
|
||||
@@ -1291,6 +1352,7 @@ from .rtve import (
|
||||
from .rtvnh import RTVNHIE
|
||||
from .rtvs import RTVSIE
|
||||
from .ruhd import RUHDIE
|
||||
from .rule34video import Rule34VideoIE
|
||||
from .rumble import (
|
||||
RumbleEmbedIE,
|
||||
RumbleChannelIE,
|
||||
@@ -1304,6 +1366,14 @@ from .rutube import (
|
||||
RutubePlaylistIE,
|
||||
RutubeTagsIE,
|
||||
)
|
||||
from .glomex import (
|
||||
GlomexIE,
|
||||
GlomexEmbedIE,
|
||||
)
|
||||
from .megatvcom import (
|
||||
MegaTVComIE,
|
||||
MegaTVComEmbedIE,
|
||||
)
|
||||
from .rutv import RUTVIE
|
||||
from .ruutu import RuutuIE
|
||||
from .ruv import RuvIE
|
||||
@@ -1492,7 +1562,12 @@ from .teachingchannel import TeachingChannelIE
|
||||
from .teamcoco import TeamcocoIE
|
||||
from .teamtreehouse import TeamTreeHouseIE
|
||||
from .techtalks import TechTalksIE
|
||||
from .ted import TEDIE
|
||||
from .ted import (
|
||||
TedEmbedIE,
|
||||
TedPlaylistIE,
|
||||
TedSeriesIE,
|
||||
TedTalkIE,
|
||||
)
|
||||
from .tele5 import Tele5IE
|
||||
from .tele13 import Tele13IE
|
||||
from .telebruxelles import TeleBruxellesIE
|
||||
@@ -1638,6 +1713,10 @@ from .tvnow import (
|
||||
TVNowAnnualIE,
|
||||
TVNowShowIE,
|
||||
)
|
||||
from .tvopengr import (
|
||||
TVOpenGrWatchIE,
|
||||
TVOpenGrEmbedIE,
|
||||
)
|
||||
from .tvp import (
|
||||
TVPEmbedIE,
|
||||
TVPIE,
|
||||
@@ -1691,6 +1770,7 @@ from .dlive import (
|
||||
DLiveVODIE,
|
||||
DLiveStreamIE,
|
||||
)
|
||||
from .drooble import DroobleIE
|
||||
from .umg import UMGDeIE
|
||||
from .unistra import UnistraIE
|
||||
from .unity import UnityIE
|
||||
@@ -1765,6 +1845,10 @@ from .vimeo import (
|
||||
VimeoWatchLaterIE,
|
||||
VHXEmbedIE,
|
||||
)
|
||||
from .vimm import (
|
||||
VimmIE,
|
||||
VimmRecordingIE,
|
||||
)
|
||||
from .vimple import VimpleIE
|
||||
from .vine import (
|
||||
VineIE,
|
||||
@@ -1911,6 +1995,7 @@ from .yandexmusic import (
|
||||
)
|
||||
from .yandexvideo import (
|
||||
YandexVideoIE,
|
||||
YandexVideoPreviewIE,
|
||||
ZenYandexIE,
|
||||
ZenYandexChannelIE,
|
||||
)
|
||||
@@ -1937,11 +2022,13 @@ from .youtube import (
|
||||
YoutubeFavouritesIE,
|
||||
YoutubeHistoryIE,
|
||||
YoutubeTabIE,
|
||||
YoutubeLivestreamEmbedIE,
|
||||
YoutubePlaylistIE,
|
||||
YoutubeRecommendedIE,
|
||||
YoutubeSearchDateIE,
|
||||
YoutubeSearchIE,
|
||||
YoutubeSearchURLIE,
|
||||
YoutubeMusicSearchURLIE,
|
||||
YoutubeSubscriptionsIE,
|
||||
YoutubeTruncatedIDIE,
|
||||
YoutubeTruncatedURLIE,
|
||||
|
||||
@@ -13,23 +13,25 @@ from ..compat import (
|
||||
)
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
determine_ext,
|
||||
error_to_compat_str,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
get_element_by_id,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
limit_length,
|
||||
merge_dicts,
|
||||
network_exceptions,
|
||||
parse_count,
|
||||
parse_qs,
|
||||
qualities,
|
||||
sanitized_Request,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
variadic,
|
||||
)
|
||||
|
||||
|
||||
@@ -163,7 +165,7 @@ class FacebookIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '1417995061575415',
|
||||
'ext': 'mp4',
|
||||
'title': 'Yaroslav Korpan - Довгоочікуване відео',
|
||||
'title': 'Ukrainian Scientists Worldwide | Довгоочікуване відео',
|
||||
'description': 'Довгоочікуване відео',
|
||||
'timestamp': 1486648771,
|
||||
'upload_date': '20170209',
|
||||
@@ -194,8 +196,8 @@ class FacebookIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '202882990186699',
|
||||
'ext': 'mp4',
|
||||
'title': 'Elisabeth Ahtn - Hello? Yes your uber ride is here\n* Jukin...',
|
||||
'description': 'Hello? Yes your uber ride is here\n* Jukin Media Verified *\nFind this video and others like it by visiting...',
|
||||
'title': 'birb (O v O") | Hello? Yes your uber ride is here',
|
||||
'description': 'Hello? Yes your uber ride is here * Jukin Media Verified * Find this video and others like it by visiting...',
|
||||
'timestamp': 1486035513,
|
||||
'upload_date': '20170202',
|
||||
'uploader': 'Elisabeth Ahtn',
|
||||
@@ -397,28 +399,31 @@ class FacebookIE(InfoExtractor):
|
||||
url.replace('://m.facebook.com/', '://www.facebook.com/'), video_id)
|
||||
|
||||
def extract_metadata(webpage):
|
||||
video_title = self._html_search_regex(
|
||||
r'<h2\s+[^>]*class="uiHeaderTitle"[^>]*>([^<]*)</h2>', webpage,
|
||||
'title', default=None)
|
||||
if not video_title:
|
||||
video_title = self._html_search_regex(
|
||||
r'(?s)<span class="fbPhotosPhotoCaption".*?id="fbPhotoPageCaption"><span class="hasCaption">(.*?)</span>',
|
||||
webpage, 'alternative title', default=None)
|
||||
if not video_title:
|
||||
video_title = self._html_search_meta(
|
||||
['og:title', 'twitter:title', 'description'],
|
||||
webpage, 'title', default=None)
|
||||
if video_title:
|
||||
video_title = limit_length(video_title, 80)
|
||||
else:
|
||||
video_title = 'Facebook video #%s' % video_id
|
||||
description = self._html_search_meta(
|
||||
post_data = [self._parse_json(j, video_id, fatal=False) for j in re.findall(
|
||||
r'handleWithCustomApplyEach\(\s*ScheduledApplyEach\s*,\s*(\{.+?\})\s*\);', webpage)]
|
||||
post = traverse_obj(post_data, (
|
||||
..., 'require', ..., ..., ..., '__bbox', 'result', 'data'), expected_type=dict) or []
|
||||
media = [m for m in traverse_obj(post, (..., 'attachments', ..., 'media'), expected_type=dict) or []
|
||||
if str(m.get('id')) == video_id and m.get('__typename') == 'Video']
|
||||
title = traverse_obj(media, (..., 'title', 'text'), get_all=False)
|
||||
description = traverse_obj(media, (
|
||||
..., 'creation_story', 'comet_sections', 'message', 'story', 'message', 'text'), get_all=False)
|
||||
uploader_data = (traverse_obj(media, (..., 'owner'), get_all=False)
|
||||
or traverse_obj(post, (..., 'node', 'actors', ...), get_all=False) or {})
|
||||
|
||||
page_title = title or self._html_search_regex((
|
||||
r'<h2\s+[^>]*class="uiHeaderTitle"[^>]*>(?P<content>[^<]*)</h2>',
|
||||
r'(?s)<span class="fbPhotosPhotoCaption".*?id="fbPhotoPageCaption"><span class="hasCaption">(?P<content>.*?)</span>',
|
||||
self._meta_regex('og:title'), self._meta_regex('twitter:title'), r'<title>(?P<content>.+?)</title>'
|
||||
), webpage, 'title', default=None, group='content')
|
||||
description = description or self._html_search_meta(
|
||||
['description', 'og:description', 'twitter:description'],
|
||||
webpage, 'description', default=None)
|
||||
uploader = clean_html(get_element_by_id(
|
||||
'fbPhotoPageAuthorName', webpage)) or self._search_regex(
|
||||
r'ownerName\s*:\s*"([^"]+)"', webpage, 'uploader',
|
||||
default=None) or self._og_search_title(webpage, fatal=False)
|
||||
uploader = uploader_data.get('name') or (
|
||||
clean_html(get_element_by_id('fbPhotoPageAuthorName', webpage))
|
||||
or self._search_regex(
|
||||
(r'ownerName\s*:\s*"([^"]+)"', *self._og_regexes('title')), webpage, 'uploader', fatal=False))
|
||||
|
||||
timestamp = int_or_none(self._search_regex(
|
||||
r'<abbr[^>]+data-utime=["\'](\d+)', webpage,
|
||||
'timestamp', default=None))
|
||||
@@ -433,17 +438,17 @@ class FacebookIE(InfoExtractor):
|
||||
r'\bviewCount\s*:\s*["\']([\d,.]+)', webpage, 'view count',
|
||||
default=None))
|
||||
info_dict = {
|
||||
'title': video_title,
|
||||
'description': description,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_data.get('id'),
|
||||
'timestamp': timestamp,
|
||||
'thumbnail': thumbnail,
|
||||
'view_count': view_count,
|
||||
}
|
||||
|
||||
info_json_ld = self._search_json_ld(webpage, video_id, default={})
|
||||
if info_json_ld.get('title'):
|
||||
info_json_ld['title'] = limit_length(
|
||||
re.sub(r'\s*\|\s*Facebook$', '', info_json_ld['title']), 80)
|
||||
info_json_ld['title'] = (re.sub(r'\s*\|\s*Facebook$', '', title or info_json_ld.get('title') or page_title or '')
|
||||
or (description or '').replace('\n', ' ') or f'Facebook video #{video_id}')
|
||||
return merge_dicts(info_json_ld, info_dict)
|
||||
|
||||
video_data = None
|
||||
@@ -510,15 +515,19 @@ class FacebookIE(InfoExtractor):
|
||||
def parse_graphql_video(video):
|
||||
formats = []
|
||||
q = qualities(['sd', 'hd'])
|
||||
for (suffix, format_id) in [('', 'sd'), ('_quality_hd', 'hd')]:
|
||||
playable_url = video.get('playable_url' + suffix)
|
||||
for key, format_id in (('playable_url', 'sd'), ('playable_url_quality_hd', 'hd'),
|
||||
('playable_url_dash', '')):
|
||||
playable_url = video.get(key)
|
||||
if not playable_url:
|
||||
continue
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'quality': q(format_id),
|
||||
'url': playable_url,
|
||||
})
|
||||
if determine_ext(playable_url) == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(playable_url, video_id))
|
||||
else:
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'quality': q(format_id),
|
||||
'url': playable_url,
|
||||
})
|
||||
extract_dash_manifest(video, formats)
|
||||
process_formats(formats)
|
||||
v_id = video.get('videoId') or video.get('id') or video_id
|
||||
@@ -546,22 +555,15 @@ class FacebookIE(InfoExtractor):
|
||||
if media.get('__typename') == 'Video':
|
||||
return parse_graphql_video(media)
|
||||
|
||||
nodes = data.get('nodes') or []
|
||||
node = data.get('node') or {}
|
||||
if not nodes and node:
|
||||
nodes.append(node)
|
||||
for node in nodes:
|
||||
story = try_get(node, lambda x: x['comet_sections']['content']['story'], dict) or {}
|
||||
attachments = try_get(story, [
|
||||
lambda x: x['attached_story']['attachments'],
|
||||
lambda x: x['attachments']
|
||||
], list) or []
|
||||
for attachment in attachments:
|
||||
attachment = try_get(attachment, lambda x: x['style_type_renderer']['attachment'], dict)
|
||||
ns = try_get(attachment, lambda x: x['all_subattachments']['nodes'], list) or []
|
||||
for n in ns:
|
||||
parse_attachment(n)
|
||||
parse_attachment(attachment)
|
||||
nodes = variadic(traverse_obj(data, 'nodes', 'node') or [])
|
||||
attachments = traverse_obj(nodes, (
|
||||
..., 'comet_sections', 'content', 'story', (None, 'attached_story'), 'attachments',
|
||||
..., ('styles', 'style_type_renderer'), 'attachment'), expected_type=dict) or []
|
||||
for attachment in attachments:
|
||||
ns = try_get(attachment, lambda x: x['all_subattachments']['nodes'], list) or []
|
||||
for n in ns:
|
||||
parse_attachment(n)
|
||||
parse_attachment(attachment)
|
||||
|
||||
edges = try_get(data, lambda x: x['mediaset']['currMedia']['edges'], list) or []
|
||||
for edge in edges:
|
||||
@@ -730,6 +732,7 @@ class FacebookPluginsVideoIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '10154383743583686',
|
||||
'ext': 'mp4',
|
||||
# TODO: Fix title, uploader
|
||||
'title': 'What to do during the haze?',
|
||||
'uploader': 'Gov.sg',
|
||||
'upload_date': '20160826',
|
||||
|
||||
@@ -1,18 +1,16 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import hashlib
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
sanitized_Request,
|
||||
traverse_obj,
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
@@ -82,41 +80,32 @@ class FC2IE(InfoExtractor):
|
||||
self._downloader.cookiejar.clear_session_cookies() # must clear
|
||||
self._login()
|
||||
|
||||
title = 'FC2 video %s' % video_id
|
||||
thumbnail = None
|
||||
title, thumbnail, description = None, None, None
|
||||
if webpage is not None:
|
||||
title = self._og_search_title(webpage)
|
||||
title = self._html_search_regex(
|
||||
(r'<h2\s+class="videoCnt_title">([^<]+?)</h2>',
|
||||
r'\s+href="[^"]+"\s*title="([^"]+?)"\s*rel="nofollow">\s*<img',
|
||||
# there's two matches in the webpage
|
||||
r'\s+href="[^"]+"\s*title="([^"]+?)"\s*rel="nofollow">\s*\1'),
|
||||
webpage,
|
||||
'title', fatal=False)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
refer = url.replace('/content/', '/a/content/') if '/a/content/' not in url else url
|
||||
description = self._og_search_description(webpage)
|
||||
|
||||
mimi = hashlib.md5((video_id + '_gGddgPfeaf_gzyr').encode('utf-8')).hexdigest()
|
||||
|
||||
info_url = (
|
||||
'http://video.fc2.com/ginfo.php?mimi={1:s}&href={2:s}&v={0:s}&fversion=WIN%2011%2C6%2C602%2C180&from=2&otag=0&upid={0:s}&tk=null&'.
|
||||
format(video_id, mimi, compat_urllib_request.quote(refer, safe=b'').replace('.', '%2E')))
|
||||
|
||||
info_webpage = self._download_webpage(
|
||||
info_url, video_id, note='Downloading info page')
|
||||
info = compat_urlparse.parse_qs(info_webpage)
|
||||
|
||||
if 'err_code' in info:
|
||||
# most of the time we can still download wideo even if err_code is 403 or 602
|
||||
self.report_warning(
|
||||
'Error code was: %s... but still trying' % info['err_code'][0])
|
||||
|
||||
if 'filepath' not in info:
|
||||
raise ExtractorError('Cannot download file. Are you logged in?')
|
||||
|
||||
video_url = info['filepath'][0] + '?mid=' + info['mid'][0]
|
||||
title_info = info.get('title')
|
||||
if title_info:
|
||||
title = title_info[0]
|
||||
vidplaylist = self._download_json(
|
||||
'https://video.fc2.com/api/v3/videoplaylist/%s?sh=1&fs=0' % video_id, video_id,
|
||||
note='Downloading info page')
|
||||
vid_url = traverse_obj(vidplaylist, ('playlist', 'nq'))
|
||||
if not vid_url:
|
||||
raise ExtractorError('Unable to extract video URL')
|
||||
vid_url = urljoin('https://video.fc2.com/', vid_url)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'url': video_url,
|
||||
'ext': 'flv',
|
||||
'url': vid_url,
|
||||
'ext': 'mp4',
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
|
||||
|
||||
@@ -7,6 +7,7 @@ from ..compat import (
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
format_field,
|
||||
int_or_none,
|
||||
qualities,
|
||||
)
|
||||
@@ -95,7 +96,7 @@ class FlickrIE(InfoExtractor):
|
||||
owner = video_info.get('owner', {})
|
||||
uploader_id = owner.get('nsid')
|
||||
uploader_path = owner.get('path_alias') or uploader_id
|
||||
uploader_url = 'https://www.flickr.com/photos/%s/' % uploader_path if uploader_path else None
|
||||
uploader_url = format_field(uploader_path, template='https://www.flickr.com/photos/%s/')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
||||
@@ -4,7 +4,7 @@ from __future__ import unicode_literals
|
||||
import json
|
||||
import uuid
|
||||
|
||||
from .adobepass import AdobePassIE
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_HTTPError,
|
||||
compat_str,
|
||||
@@ -20,7 +20,7 @@ from ..utils import (
|
||||
)
|
||||
|
||||
|
||||
class FOXIE(AdobePassIE):
|
||||
class FOXIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?fox\.com/watch/(?P<id>[\da-fA-F]+)'
|
||||
_TESTS = [{
|
||||
# clip
|
||||
@@ -37,6 +37,7 @@ class FOXIE(AdobePassIE):
|
||||
'creator': 'FOX',
|
||||
'series': 'Gotham',
|
||||
'age_limit': 14,
|
||||
'episode': 'Aftermath: Bruce Wayne Develops Into The Dark Knight'
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -46,14 +47,15 @@ class FOXIE(AdobePassIE):
|
||||
'url': 'https://www.fox.com/watch/087036ca7f33c8eb79b08152b4dd75c1/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# episode, geo-restricted, tv provided required
|
||||
'url': 'https://www.fox.com/watch/30056b295fb57f7452aeeb4920bc3024/',
|
||||
# sports event, geo-restricted
|
||||
'url': 'https://www.fox.com/watch/b057484dade738d1f373b3e46216fa2c/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_GEO_BYPASS = False
|
||||
_HOME_PAGE_URL = 'https://www.fox.com/'
|
||||
_API_KEY = 'abdcbed02c124d393b39e818a4312055'
|
||||
_API_KEY = '6E9S4bmcoNnZwVLOHywOv8PJEdu76cM9'
|
||||
_access_token = None
|
||||
_device_id = compat_str(uuid.uuid4())
|
||||
|
||||
def _call_api(self, path, video_id, data=None):
|
||||
headers = {
|
||||
@@ -63,7 +65,7 @@ class FOXIE(AdobePassIE):
|
||||
headers['Authorization'] = 'Bearer ' + self._access_token
|
||||
try:
|
||||
return self._download_json(
|
||||
'https://api2.fox.com/v2.0/' + path,
|
||||
'https://api3.fox.com/v2.0/' + path,
|
||||
video_id, data=data, headers=headers)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||
@@ -87,16 +89,37 @@ class FOXIE(AdobePassIE):
|
||||
if not self._access_token:
|
||||
self._access_token = self._call_api(
|
||||
'login', None, json.dumps({
|
||||
'deviceId': compat_str(uuid.uuid4()),
|
||||
'deviceId': self._device_id,
|
||||
}).encode())['accessToken']
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
video = self._call_api('vodplayer/' + video_id, video_id)
|
||||
self._access_token = self._call_api(
|
||||
'previewpassmvpd?device_id=%s&mvpd_id=TempPass_fbcfox_60min' % self._device_id,
|
||||
video_id)['accessToken']
|
||||
|
||||
video = self._call_api('watch', video_id, data=json.dumps({
|
||||
'capabilities': ['drm/widevine', 'fsdk/yo'],
|
||||
'deviceWidth': 1280,
|
||||
'deviceHeight': 720,
|
||||
'maxRes': '720p',
|
||||
'os': 'macos',
|
||||
'osv': '',
|
||||
'provider': {
|
||||
'freewheel': {'did': self._device_id},
|
||||
'vdms': {'rays': ''},
|
||||
'dmp': {'kuid': '', 'seg': ''}
|
||||
},
|
||||
'playlist': '',
|
||||
'privacy': {'us': '1---'},
|
||||
'siteSection': '',
|
||||
'streamType': 'vod',
|
||||
'streamId': video_id}).encode('utf-8'))
|
||||
|
||||
title = video['name']
|
||||
release_url = video['url']
|
||||
|
||||
try:
|
||||
m3u8_url = self._download_json(release_url, video_id)['playURL']
|
||||
except ExtractorError as e:
|
||||
|
||||
@@ -1,48 +1,52 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from ..utils import HEADRequest
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class FujiTVFODPlus7IE(InfoExtractor):
|
||||
_VALID_URL = r'https?://fod\.fujitv\.co\.jp/title/[0-9a-z]{4}/(?P<id>[0-9a-z]+)'
|
||||
_BASE_URL = 'http://i.fod.fujitv.co.jp/'
|
||||
_BITRATE_MAP = {
|
||||
300: (320, 180),
|
||||
800: (640, 360),
|
||||
1200: (1280, 720),
|
||||
2000: (1280, 720),
|
||||
4000: (1920, 1080),
|
||||
}
|
||||
_VALID_URL = r'https?://fod\.fujitv\.co\.jp/title/(?P<sid>[0-9a-z]{4})/(?P<id>[0-9a-z]+)'
|
||||
_BASE_URL = 'https://i.fod.fujitv.co.jp/'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://fod.fujitv.co.jp/title/5d40/5d40810075',
|
||||
'url': 'https://fod.fujitv.co.jp/title/5d40/5d40110076',
|
||||
'info_dict': {
|
||||
'id': '5d40810075',
|
||||
'title': '5d40810075',
|
||||
'id': '5d40110076',
|
||||
'ext': 'mp4',
|
||||
'format_id': '4000',
|
||||
'thumbnail': 'http://i.fod.fujitv.co.jp/pc/image/wbtn/wbtn_5d40810075.jpg'
|
||||
'title': '#1318 『まる子、まぼろしの洋館を見る』の巻',
|
||||
'series': 'ちびまる子ちゃん',
|
||||
'series_id': '5d40',
|
||||
'description': 'md5:b3f51dbfdda162ac4f789e0ff4d65750',
|
||||
'thumbnail': 'https://i.fod.fujitv.co.jp/img/program/5d40/episode/5d40110076_a.jpg',
|
||||
},
|
||||
'skip': 'Expires after a week'
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
formats = self._extract_m3u8_formats(
|
||||
self._BASE_URL + 'abr/tv_android/%s.m3u8' % video_id, video_id, 'mp4')
|
||||
for f in formats:
|
||||
wh = self._BITRATE_MAP.get(f.get('tbr'))
|
||||
if wh:
|
||||
f.update({
|
||||
'width': wh[0],
|
||||
'height': wh[1],
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
series_id, video_id = self._match_valid_url(url).groups()
|
||||
self._request_webpage(HEADRequest(url), video_id)
|
||||
json_info = {}
|
||||
token = self._get_cookies(url).get('CT')
|
||||
if token:
|
||||
json_info = self._download_json('https://fod-sp.fujitv.co.jp/apps/api/episode/detail/?ep_id=%s&is_premium=false' % video_id, video_id, headers={'x-authorization': f'Bearer {token.value}'}, fatal=False)
|
||||
else:
|
||||
self.report_warning(f'The token cookie is needed to extract video metadata. {self._LOGIN_HINTS["cookies"]}')
|
||||
formats, subtitles = [], {}
|
||||
src_json = self._download_json(f'{self._BASE_URL}abrjson_v2/tv_android/{video_id}', video_id)
|
||||
for src in src_json['video_selector']:
|
||||
if not src.get('url'):
|
||||
continue
|
||||
fmt, subs = self._extract_m3u8_formats_and_subtitles(src['url'], video_id, 'mp4')
|
||||
formats.extend(fmt)
|
||||
subtitles = self._merge_subtitles(subtitles, subs)
|
||||
self._sort_formats(formats, ['tbr'])
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_id,
|
||||
'title': json_info.get('ep_title'),
|
||||
'series': json_info.get('lu_title'),
|
||||
'series_id': series_id,
|
||||
'description': json_info.get('ep_description'),
|
||||
'formats': formats,
|
||||
'thumbnail': self._BASE_URL + 'pc/image/wbtn/wbtn_%s.jpg' % video_id,
|
||||
'subtitles': subtitles,
|
||||
'thumbnail': f'{self._BASE_URL}img/program/{series_id}/episode/{video_id}_a.jpg',
|
||||
}
|
||||
|
||||
@@ -11,7 +11,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class FunkIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?funk\.net/(?:channel|playlist)/[^/]+/(?P<display_id>[0-9a-z-]+)-(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.|origin\.)?funk\.net/(?:channel|playlist)/[^/]+/(?P<display_id>[0-9a-z-]+)-(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.funk.net/channel/ba-793/die-lustigsten-instrumente-aus-dem-internet-teil-2-1155821',
|
||||
'md5': '8dd9d9ab59b4aa4173b3197f2ea48e81',
|
||||
|
||||
@@ -7,6 +7,7 @@ from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
format_field,
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
@@ -86,7 +87,7 @@ class GameJoltBaseIE(InfoExtractor):
|
||||
'display_id': post_data.get('slug'),
|
||||
'uploader': user_data.get('display_name') or user_data.get('name'),
|
||||
'uploader_id': user_data.get('username'),
|
||||
'uploader_url': 'https://gamejolt.com' + user_data['url'] if user_data.get('url') else None,
|
||||
'uploader_url': format_field(user_data, 'url', 'https://gamejolt.com%s'),
|
||||
'categories': [try_get(category, lambda x: '%s - %s' % (x['community']['name'], x['channel'].get('display_title') or x['channel']['title']))
|
||||
for category in post_data.get('communities' or [])],
|
||||
'tags': traverse_obj(
|
||||
|
||||
@@ -28,6 +28,7 @@ from ..utils import (
|
||||
mimetype2ext,
|
||||
orderedSet,
|
||||
parse_duration,
|
||||
parse_resolution,
|
||||
sanitized_Request,
|
||||
smuggle_url,
|
||||
unescapeHTML,
|
||||
@@ -100,6 +101,8 @@ from .ustream import UstreamIE
|
||||
from .arte import ArteTVEmbedIE
|
||||
from .videopress import VideoPressIE
|
||||
from .rutube import RutubeIE
|
||||
from .glomex import GlomexEmbedIE
|
||||
from .megatvcom import MegaTVComEmbedIE
|
||||
from .limelight import LimelightBaseIE
|
||||
from .anvato import AnvatoIE
|
||||
from .washingtonpost import WashingtonPostIE
|
||||
@@ -112,6 +115,7 @@ from .channel9 import Channel9IE
|
||||
from .vshare import VShareIE
|
||||
from .mediasite import MediasiteIE
|
||||
from .springboardplatform import SpringboardPlatformIE
|
||||
from .ted import TedEmbedIE
|
||||
from .yapfiles import YapFilesIE
|
||||
from .vice import ViceIE
|
||||
from .xfileshare import XFileShareIE
|
||||
@@ -135,8 +139,12 @@ from .arcpublishing import ArcPublishingIE
|
||||
from .medialaan import MedialaanIE
|
||||
from .simplecast import SimplecastIE
|
||||
from .wimtv import WimTVIE
|
||||
from .tvopengr import TVOpenGrEmbedIE
|
||||
from .ertgr import ERTWebtvEmbedIE
|
||||
from .tvp import TVPEmbedIE
|
||||
from .blogger import BloggerIE
|
||||
from .mainstreaming import MainStreamingIE
|
||||
from .gfycat import GfycatIE
|
||||
|
||||
|
||||
class GenericIE(InfoExtractor):
|
||||
@@ -1869,6 +1877,62 @@ class GenericIE(InfoExtractor):
|
||||
},
|
||||
'add_ie': [RutubeIE.ie_key()],
|
||||
},
|
||||
{
|
||||
# glomex:embed
|
||||
'url': 'https://www.skai.gr/news/world/iatrikos-syllogos-tourkias-to-turkovac-aplo-dialyma-erntogan-eiste-apateones-kai-pseytes',
|
||||
'info_dict': {
|
||||
'id': 'v-ch2nkhcirwc9-sf',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:786e1e24e06c55993cee965ef853a0c1',
|
||||
'description': 'md5:8b517a61d577efe7e36fde72fd535995',
|
||||
'timestamp': 1641885019,
|
||||
'upload_date': '20220111',
|
||||
'duration': 460000,
|
||||
'thumbnail': 'https://i3thumbs.glomex.com/dC1idjJwdndiMjRzeGwvMjAyMi8wMS8xMS8wNy8xMF8zNV82MWRkMmQ2YmU5ZTgyLmpwZw==/profile:player-960x540',
|
||||
},
|
||||
},
|
||||
{
|
||||
# megatvcom:embed
|
||||
'url': 'https://www.in.gr/2021/12/18/greece/apokalypsi-mega-poios-parelave-tin-ereyna-tsiodra-ek-merous-tis-kyvernisis-o-prothypourgos-telika-gnorize/',
|
||||
'info_dict': {
|
||||
'id': 'apokalypsi-mega-poios-parelave-tin-ereyna-tsiodra-ek-merous-tis-kyvernisis-o-prothypourgos-telika-gnorize',
|
||||
'title': 'md5:5e569cf996ec111057c2764ec272848f',
|
||||
},
|
||||
'playlist': [{
|
||||
'md5': '1afa26064ff00ccb91617957dbc73dc1',
|
||||
'info_dict': {
|
||||
'ext': 'mp4',
|
||||
'id': '564916',
|
||||
'display_id': 'md5:6cdf22d3a2e7bacb274b7295089a1770',
|
||||
'title': 'md5:33b9dd39584685b62873043670eb52a6',
|
||||
'description': 'md5:c1db7310f390518ac36dd69d947ef1a1',
|
||||
'timestamp': 1639753145,
|
||||
'upload_date': '20211217',
|
||||
'thumbnail': 'https://www.megatv.com/wp-content/uploads/2021/12/prezerakos-1024x597.jpg',
|
||||
},
|
||||
}, {
|
||||
'md5': '4a1c220695f1ef865a8b7966a53e2474',
|
||||
'info_dict': {
|
||||
'ext': 'mp4',
|
||||
'id': '564905',
|
||||
'display_id': 'md5:ead15695e485e649aed2b81ebd699b88',
|
||||
'title': 'md5:2b71fd54249a3ca34609fe39ae31c47b',
|
||||
'description': 'md5:c42e12f638d0a97d6de4508e2c4df982',
|
||||
'timestamp': 1639753047,
|
||||
'upload_date': '20211217',
|
||||
'thumbnail': 'https://www.megatv.com/wp-content/uploads/2021/12/tsiodras-mitsotakis-1024x545.jpg',
|
||||
},
|
||||
}]
|
||||
},
|
||||
{
|
||||
'url': 'https://www.ertnews.gr/video/manolis-goyalles-o-anthropos-piso-apo-ti-diadiktyaki-vasilopita/',
|
||||
'info_dict': {
|
||||
'id': '2022/tv/news-themata-ianouarios/20220114-apotis6-gouales-pita.mp4',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:df64f5b61c06d0e9556c0cdd5cf14464',
|
||||
'thumbnail': 'https://www.ert.gr/themata/photos/2021/20220114-apotis6-gouales-pita.jpg',
|
||||
},
|
||||
},
|
||||
{
|
||||
# ThePlatform embedded with whitespaces in URLs
|
||||
'url': 'http://www.golfchannel.com/topics/shows/golftalkcentral.htm',
|
||||
@@ -2174,6 +2238,22 @@ class GenericIE(InfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
# tvopengr:embed
|
||||
'url': 'https://www.ethnos.gr/World/article/190604/hparosiaxekinoynoisynomiliessthgeneyhmethskiatoypolemoypanoapothnoykrania',
|
||||
'md5': 'eb0c3995d0a6f18f6538c8e057865d7d',
|
||||
'info_dict': {
|
||||
'id': '101119',
|
||||
'ext': 'mp4',
|
||||
'display_id': 'oikarpoitondiapragmateyseonhparosias',
|
||||
'title': 'md5:b979f4d640c568617d6547035528a149',
|
||||
'description': 'md5:e54fc1977c7159b01cc11cd7d9d85550',
|
||||
'timestamp': 1641772800,
|
||||
'upload_date': '20220110',
|
||||
'thumbnail': 'https://opentv-static.siliconweb.com/imgHandler/1920/70bc39fa-895b-4918-a364-c39d2135fc6d.jpg',
|
||||
|
||||
}
|
||||
},
|
||||
{
|
||||
# blogger embed
|
||||
'url': 'https://blog.tomeuvizoso.net/2019/01/a-panfrost-milestone.html',
|
||||
@@ -2382,8 +2462,47 @@ class GenericIE(InfoExtractor):
|
||||
'timestamp': 1636788683.0,
|
||||
'upload_date': '20211113'
|
||||
}
|
||||
},
|
||||
{
|
||||
# MainStreaming player
|
||||
'url': 'https://www.lactv.it/2021/10/03/lac-news24-la-settimana-03-10-2021/',
|
||||
'info_dict': {
|
||||
'id': 'EUlZfGWkGpOd',
|
||||
'title': 'La Settimana ',
|
||||
'description': '03 Ottobre ore 02:00',
|
||||
'ext': 'mp4',
|
||||
'live_status': 'not_live',
|
||||
'thumbnail': r're:https?://[A-Za-z0-9-]*\.msvdn.net/image/\w+/poster',
|
||||
'duration': 1512
|
||||
}
|
||||
},
|
||||
{
|
||||
# Multiple gfycat iframe embeds
|
||||
'url': 'https://www.gezip.net/bbs/board.php?bo_table=entertaine&wr_id=613422',
|
||||
'info_dict': {
|
||||
'title': '재이, 윤, 세은 황금 드레스를 입고 빛난다',
|
||||
'id': 'board'
|
||||
},
|
||||
'playlist_count': 8,
|
||||
},
|
||||
{
|
||||
# Multiple gfycat gifs (direct links)
|
||||
'url': 'https://www.gezip.net/bbs/board.php?bo_table=entertaine&wr_id=612199',
|
||||
'info_dict': {
|
||||
'title': '옳게 된 크롭 니트 스테이씨 아이사',
|
||||
'id': 'board'
|
||||
},
|
||||
'playlist_count': 6
|
||||
},
|
||||
{
|
||||
# Multiple gfycat embeds, with uppercase "IFR" in urls
|
||||
'url': 'https://kkzz.kr/?vid=2295',
|
||||
'info_dict': {
|
||||
'title': '지방시 앰버서더 에스파 카리나 움짤',
|
||||
'id': '?vid=2295'
|
||||
},
|
||||
'playlist_count': 9
|
||||
}
|
||||
#
|
||||
]
|
||||
|
||||
def report_following_redirect(self, new_url):
|
||||
@@ -3083,10 +3202,9 @@ class GenericIE(InfoExtractor):
|
||||
return self.url_result(mobj.group('url'), 'Tvigle')
|
||||
|
||||
# Look for embedded TED player
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'), 'TED')
|
||||
ted_urls = TedEmbedIE._extract_urls(webpage)
|
||||
if ted_urls:
|
||||
return self.playlist_from_matches(ted_urls, video_id, video_title, ie=TedEmbedIE.ie_key())
|
||||
|
||||
# Look for embedded Ustream videos
|
||||
ustream_url = UstreamIE._extract_url(webpage)
|
||||
@@ -3422,6 +3540,18 @@ class GenericIE(InfoExtractor):
|
||||
return self.playlist_from_matches(
|
||||
rutube_urls, video_id, video_title, ie=RutubeIE.ie_key())
|
||||
|
||||
# Look for Glomex embeds
|
||||
glomex_urls = list(GlomexEmbedIE._extract_urls(webpage, url))
|
||||
if glomex_urls:
|
||||
return self.playlist_from_matches(
|
||||
glomex_urls, video_id, video_title, ie=GlomexEmbedIE.ie_key())
|
||||
|
||||
# Look for megatv.com embeds
|
||||
megatvcom_urls = list(MegaTVComEmbedIE._extract_urls(webpage))
|
||||
if megatvcom_urls:
|
||||
return self.playlist_from_matches(
|
||||
megatvcom_urls, video_id, video_title, ie=MegaTVComEmbedIE.ie_key())
|
||||
|
||||
# Look for WashingtonPost embeds
|
||||
wapo_urls = WashingtonPostIE._extract_urls(webpage)
|
||||
if wapo_urls:
|
||||
@@ -3568,10 +3698,32 @@ class GenericIE(InfoExtractor):
|
||||
return self.playlist_from_matches(
|
||||
rumble_urls, video_id, video_title, ie=RumbleEmbedIE.ie_key())
|
||||
|
||||
# Look for (tvopen|ethnos).gr embeds
|
||||
tvopengr_urls = list(TVOpenGrEmbedIE._extract_urls(webpage))
|
||||
if tvopengr_urls:
|
||||
return self.playlist_from_matches(tvopengr_urls, video_id, video_title, ie=TVOpenGrEmbedIE.ie_key())
|
||||
|
||||
# Look for ert.gr webtv embeds
|
||||
ertwebtv_urls = list(ERTWebtvEmbedIE._extract_urls(webpage))
|
||||
if len(ertwebtv_urls) == 1:
|
||||
return self.url_result(self._proto_relative_url(ertwebtv_urls[0]), video_title=video_title, url_transparent=True)
|
||||
elif ertwebtv_urls:
|
||||
return self.playlist_from_matches(ertwebtv_urls, video_id, video_title, ie=ERTWebtvEmbedIE.ie_key())
|
||||
|
||||
tvp_urls = TVPEmbedIE._extract_urls(webpage)
|
||||
if tvp_urls:
|
||||
return self.playlist_from_matches(tvp_urls, video_id, video_title, ie=TVPEmbedIE.ie_key())
|
||||
|
||||
# Look for MainStreaming embeds
|
||||
mainstreaming_urls = MainStreamingIE._extract_urls(webpage)
|
||||
if mainstreaming_urls:
|
||||
return self.playlist_from_matches(mainstreaming_urls, video_id, video_title, ie=MainStreamingIE.ie_key())
|
||||
|
||||
# Look for Gfycat Embeds
|
||||
gfycat_urls = GfycatIE._extract_urls(webpage)
|
||||
if gfycat_urls:
|
||||
return self.playlist_from_matches(gfycat_urls, video_id, video_title, ie=GfycatIE.ie_key())
|
||||
|
||||
# Look for HTML5 media
|
||||
entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
|
||||
if entries:
|
||||
@@ -3663,12 +3815,16 @@ class GenericIE(InfoExtractor):
|
||||
|
||||
# Looking for http://schema.org/VideoObject
|
||||
json_ld = self._search_json_ld(webpage, video_id, default={})
|
||||
if json_ld.get('url'):
|
||||
if json_ld.get('url') not in (url, None):
|
||||
self.report_detected('JSON LD')
|
||||
if determine_ext(json_ld.get('url')) == 'm3u8':
|
||||
if determine_ext(json_ld['url']) == 'm3u8':
|
||||
json_ld['formats'], json_ld['subtitles'] = self._extract_m3u8_formats_and_subtitles(
|
||||
json_ld['url'], video_id, 'mp4')
|
||||
json_ld.pop('url')
|
||||
self._sort_formats(json_ld['formats'])
|
||||
else:
|
||||
json_ld['_type'] = 'url_transparent'
|
||||
json_ld['url'] = smuggle_url(json_ld['url'], {'force_videoid': video_id, 'to_generic': True})
|
||||
return merge_dicts(json_ld, info_dict)
|
||||
|
||||
def check_video(vurl):
|
||||
@@ -3723,20 +3879,21 @@ class GenericIE(InfoExtractor):
|
||||
protocol, _, _ = url.partition('/')
|
||||
thumbnail = protocol + thumbnail
|
||||
|
||||
url_keys = list(filter(re.compile(r'video_url|video_alt_url\d*').fullmatch, flashvars.keys()))
|
||||
formats = []
|
||||
for key in ('video_url', 'video_alt_url', 'video_alt_url2'):
|
||||
if key in flashvars and '/get_file/' in flashvars[key]:
|
||||
next_format = {
|
||||
'url': self._kvs_getrealurl(flashvars[key], flashvars['license_code']),
|
||||
'format_id': flashvars.get(key + '_text', key),
|
||||
'ext': 'mp4',
|
||||
}
|
||||
height = re.search(r'%s_(\d+)p\.mp4(?:/[?].*)?$' % flashvars['video_id'], flashvars[key])
|
||||
if height:
|
||||
next_format['height'] = int(height.group(1))
|
||||
else:
|
||||
next_format['quality'] = 1
|
||||
formats.append(next_format)
|
||||
for key in url_keys:
|
||||
if '/get_file/' not in flashvars[key]:
|
||||
continue
|
||||
format_id = flashvars.get(f'{key}_text', key)
|
||||
formats.append({
|
||||
'url': self._kvs_getrealurl(flashvars[key], flashvars['license_code']),
|
||||
'format_id': format_id,
|
||||
'ext': 'mp4',
|
||||
**(parse_resolution(format_id) or parse_resolution(flashvars[key]))
|
||||
})
|
||||
if not formats[-1].get('height'):
|
||||
formats[-1]['quality'] = 1
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
@@ -11,7 +13,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class GfycatIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:www|giant|thumbs)\.)?gfycat\.com/(?:ru/|ifr/|gifs/detail/)?(?P<id>[^-/?#\.]+)'
|
||||
_VALID_URL = r'(?i)https?://(?:(?:www|giant|thumbs)\.)?gfycat\.com/(?:ru/|ifr/|gifs/detail/)?(?P<id>[^-/?#\."\']+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://gfycat.com/DeadlyDecisiveGermanpinscher',
|
||||
'info_dict': {
|
||||
@@ -24,9 +26,10 @@ class GfycatIE(InfoExtractor):
|
||||
'duration': 10.4,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'categories': list,
|
||||
'age_limit': 0,
|
||||
'uploader_id': 'anonymous',
|
||||
'description': '',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://gfycat.com/ifr/JauntyTimelyAmazontreeboa',
|
||||
@@ -40,9 +43,27 @@ class GfycatIE(InfoExtractor):
|
||||
'duration': 3.52,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'categories': list,
|
||||
'age_limit': 0,
|
||||
'uploader_id': 'anonymous',
|
||||
'description': '',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://gfycat.com/alienatedsolidgreathornedowl',
|
||||
'info_dict': {
|
||||
'id': 'alienatedsolidgreathornedowl',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20211226',
|
||||
'uploader_id': 'reactions',
|
||||
'timestamp': 1640536930,
|
||||
'like_count': int,
|
||||
'description': '',
|
||||
'title': 'Ingrid Michaelson, Zooey Deschanel - Merry Christmas Happy New Year',
|
||||
'categories': list,
|
||||
'age_limit': 0,
|
||||
'duration': 2.9583333333333335,
|
||||
'uploader': 'Reaction GIFs',
|
||||
'view_count': int,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://gfycat.com/ru/RemarkableDrearyAmurstarfish',
|
||||
@@ -59,8 +80,19 @@ class GfycatIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://giant.gfycat.com/acceptablehappygoluckyharborporpoise.mp4',
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'http://gfycat.com/IFR/JauntyTimelyAmazontreeboa',
|
||||
'only_matching': True
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return [
|
||||
mobj.group('url')
|
||||
for mobj in re.finditer(
|
||||
r'<(?:iframe|source)[^>]+\bsrc=["\'](?P<url>%s)' % GfycatIE._VALID_URL,
|
||||
webpage)]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
@@ -74,7 +106,7 @@ class GfycatIE(InfoExtractor):
|
||||
title = gfy.get('title') or gfy['gfyName']
|
||||
description = gfy.get('description')
|
||||
timestamp = int_or_none(gfy.get('createDate'))
|
||||
uploader = gfy.get('userName')
|
||||
uploader = gfy.get('userName') or gfy.get('username')
|
||||
view_count = int_or_none(gfy.get('views'))
|
||||
like_count = int_or_none(gfy.get('likes'))
|
||||
dislike_count = int_or_none(gfy.get('dislikes'))
|
||||
@@ -114,7 +146,8 @@ class GfycatIE(InfoExtractor):
|
||||
'title': title,
|
||||
'description': description,
|
||||
'timestamp': timestamp,
|
||||
'uploader': uploader,
|
||||
'uploader': gfy.get('userDisplayName') or uploader,
|
||||
'uploader_id': uploader,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'like_count': like_count,
|
||||
|
||||
@@ -12,6 +12,7 @@ from ..compat import (
|
||||
compat_str,
|
||||
)
|
||||
from ..utils import (
|
||||
HEADRequest,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
orderedSet,
|
||||
@@ -67,11 +68,28 @@ class GloboIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'globo:3607726',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://globoplay.globo.com/v/10248083/',
|
||||
'info_dict': {
|
||||
'id': '10248083',
|
||||
'ext': 'mp4',
|
||||
'title': 'Melhores momentos: Equador 1 x 1 Brasil pelas Eliminatórias da Copa do Mundo 2022',
|
||||
'duration': 530.964,
|
||||
'uploader': 'SporTV',
|
||||
'uploader_id': '698',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
self._request_webpage(
|
||||
HEADRequest('https://globo-ab.globo.com/v2/selected-alternatives?experiments=player-isolated-experiment-02&skipImpressions=true'),
|
||||
video_id, 'Getting cookies')
|
||||
|
||||
video = self._download_json(
|
||||
'http://api.globovideos.com/videos/%s/playlist' % video_id,
|
||||
video_id)['videos'][0]
|
||||
@@ -82,7 +100,7 @@ class GloboIE(InfoExtractor):
|
||||
|
||||
formats = []
|
||||
security = self._download_json(
|
||||
'https://playback.video.globo.com/v1/video-session', video_id, 'Downloading security hash for %s' % video_id,
|
||||
'https://playback.video.globo.com/v2/video-session', video_id, 'Downloading security hash for %s' % video_id,
|
||||
headers={'content-type': 'application/json'}, data=json.dumps({
|
||||
"player_type": "desktop",
|
||||
"video_id": video_id,
|
||||
@@ -92,7 +110,9 @@ class GloboIE(InfoExtractor):
|
||||
"tz": "-3.0:00"
|
||||
}).encode())
|
||||
|
||||
security_hash = security['source']['token']
|
||||
self._request_webpage(HEADRequest(security['sources'][0]['url_template']), video_id, 'Getting locksession cookie')
|
||||
|
||||
security_hash = security['sources'][0]['token']
|
||||
if not security_hash:
|
||||
message = security.get('message')
|
||||
if message:
|
||||
@@ -115,7 +135,7 @@ class GloboIE(InfoExtractor):
|
||||
md5_data = (received_md5 + padded_sign_time + '0xAC10FD').encode()
|
||||
signed_md5 = base64.urlsafe_b64encode(hashlib.md5(md5_data).digest()).decode().strip('=')
|
||||
signed_hash = hash_prefix + padded_sign_time + signed_md5
|
||||
source = security['source']['url_parts']
|
||||
source = security['sources'][0]['url_parts']
|
||||
resource_url = source['scheme'] + '://' + source['domain'] + source['path']
|
||||
signed_url = '%s?h=%s&k=html5&a=%s' % (resource_url, signed_hash, 'F' if video.get('subscriber_only') else 'A')
|
||||
|
||||
|
||||
220
yt_dlp/extractor/glomex.py
Normal file
220
yt_dlp/extractor/glomex.py
Normal file
@@ -0,0 +1,220 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
extract_attributes,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_qs,
|
||||
smuggle_url,
|
||||
unescapeHTML,
|
||||
unsmuggle_url,
|
||||
)
|
||||
|
||||
|
||||
class GlomexBaseIE(InfoExtractor):
|
||||
_DEFAULT_ORIGIN_URL = 'https://player.glomex.com/'
|
||||
_API_URL = 'https://integration-cloudfront-eu-west-1.mes.glomex.cloud/'
|
||||
|
||||
@staticmethod
|
||||
def _smuggle_origin_url(url, origin_url):
|
||||
if origin_url is None:
|
||||
return url
|
||||
return smuggle_url(url, {'origin': origin_url})
|
||||
|
||||
@classmethod
|
||||
def _unsmuggle_origin_url(cls, url, fallback_origin_url=None):
|
||||
defaults = {'origin': fallback_origin_url or cls._DEFAULT_ORIGIN_URL}
|
||||
unsmuggled_url, data = unsmuggle_url(url, default=defaults)
|
||||
return unsmuggled_url, data['origin']
|
||||
|
||||
def _get_videoid_type(self, video_id):
|
||||
_VIDEOID_TYPES = {
|
||||
'v': 'video',
|
||||
'pl': 'playlist',
|
||||
'rl': 'related videos playlist',
|
||||
'cl': 'curated playlist',
|
||||
}
|
||||
prefix = video_id.split('-')[0]
|
||||
return _VIDEOID_TYPES.get(prefix, 'unknown type')
|
||||
|
||||
def _download_api_data(self, video_id, integration, current_url=None):
|
||||
query = {
|
||||
'integration_id': integration,
|
||||
'playlist_id': video_id,
|
||||
'current_url': current_url or self._DEFAULT_ORIGIN_URL,
|
||||
}
|
||||
video_id_type = self._get_videoid_type(video_id)
|
||||
return self._download_json(
|
||||
self._API_URL,
|
||||
video_id, 'Downloading %s JSON' % video_id_type,
|
||||
'Unable to download %s JSON' % video_id_type,
|
||||
query=query)
|
||||
|
||||
def _download_and_extract_api_data(self, video_id, integration, current_url):
|
||||
api_data = self._download_api_data(video_id, integration, current_url)
|
||||
videos = api_data['videos']
|
||||
if not videos:
|
||||
raise ExtractorError('no videos found for %s' % video_id)
|
||||
videos = [self._extract_api_data(video, video_id) for video in videos]
|
||||
return videos[0] if len(videos) == 1 else self.playlist_result(videos, video_id)
|
||||
|
||||
def _extract_api_data(self, video, video_id):
|
||||
if video.get('error_code') == 'contentGeoblocked':
|
||||
self.raise_geo_restricted(countries=video['geo_locations'])
|
||||
|
||||
formats, subs = [], {}
|
||||
for format_id, format_url in video['source'].items():
|
||||
ext = determine_ext(format_url)
|
||||
if ext == 'm3u8':
|
||||
formats_, subs_ = self._extract_m3u8_formats_and_subtitles(
|
||||
format_url, video_id, 'mp4', m3u8_id=format_id,
|
||||
fatal=False)
|
||||
formats.extend(formats_)
|
||||
self._merge_subtitles(subs_, target=subs)
|
||||
else:
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'format_id': format_id,
|
||||
})
|
||||
if video.get('language'):
|
||||
for fmt in formats:
|
||||
fmt['language'] = video['language']
|
||||
self._sort_formats(formats)
|
||||
|
||||
images = (video.get('images') or []) + [video.get('image') or {}]
|
||||
thumbnails = [{
|
||||
'id': image.get('id'),
|
||||
'url': f'{image["url"]}/profile:player-960x540',
|
||||
'width': 960,
|
||||
'height': 540,
|
||||
} for image in images if image.get('url')]
|
||||
self._remove_duplicate_formats(thumbnails)
|
||||
|
||||
return {
|
||||
'id': video.get('clip_id') or video_id,
|
||||
'title': video.get('title'),
|
||||
'description': video.get('description'),
|
||||
'thumbnails': thumbnails,
|
||||
'duration': int_or_none(video.get('clip_duration')),
|
||||
'timestamp': video.get('created_at'),
|
||||
'formats': formats,
|
||||
'subtitles': subs,
|
||||
}
|
||||
|
||||
|
||||
class GlomexIE(GlomexBaseIE):
|
||||
IE_NAME = 'glomex'
|
||||
IE_DESC = 'Glomex videos'
|
||||
_VALID_URL = r'https?://video\.glomex\.com/[^/]+/(?P<id>v-[^-]+)'
|
||||
_INTEGRATION_ID = '19syy24xjn1oqlpc'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://video.glomex.com/sport/v-cb24uwg77hgh-nach-2-0-sieg-guardiola-mit-mancity-vor-naechstem-titel',
|
||||
'md5': 'cec33a943c4240c9cb33abea8c26242e',
|
||||
'info_dict': {
|
||||
'id': 'v-cb24uwg77hgh',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:38a90cedcfadd72982c81acf13556e0c',
|
||||
'description': 'md5:1ea6b6caff1443fcbbba159e432eedb8',
|
||||
'duration': 29600,
|
||||
'timestamp': 1619895017,
|
||||
'upload_date': '20210501',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
return self.url_result(
|
||||
GlomexEmbedIE.build_player_url(video_id, self._INTEGRATION_ID, url),
|
||||
GlomexEmbedIE.ie_key(), video_id)
|
||||
|
||||
|
||||
class GlomexEmbedIE(GlomexBaseIE):
|
||||
IE_NAME = 'glomex:embed'
|
||||
IE_DESC = 'Glomex embedded videos'
|
||||
_BASE_PLAYER_URL = '//player.glomex.com/integration/1/iframe-player.html'
|
||||
_BASE_PLAYER_URL_RE = re.escape(_BASE_PLAYER_URL).replace('/1/', r'/[^/]/')
|
||||
_VALID_URL = rf'https?:{_BASE_PLAYER_URL_RE}\?([^#]+&)?playlistId=(?P<id>[^#&]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://player.glomex.com/integration/1/iframe-player.html?integrationId=4059a013k56vb2yd&playlistId=v-cfa6lye0dkdd-sf',
|
||||
'md5': '68f259b98cc01918ac34180142fce287',
|
||||
'info_dict': {
|
||||
'id': 'v-cfa6lye0dkdd-sf',
|
||||
'ext': 'mp4',
|
||||
'timestamp': 1635337199,
|
||||
'duration': 133080,
|
||||
'upload_date': '20211027',
|
||||
'description': 'md5:e741185fc309310ff5d0c789b437be66',
|
||||
'title': 'md5:35647293513a6c92363817a0fb0a7961',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://player.glomex.com/integration/1/iframe-player.html?origin=fullpage&integrationId=19syy24xjn1oqlpc&playlistId=rl-vcb49w1fb592p&playlistIndex=0',
|
||||
'info_dict': {
|
||||
'id': 'rl-vcb49w1fb592p',
|
||||
},
|
||||
'playlist_count': 100,
|
||||
}, {
|
||||
'url': 'https://player.glomex.com/integration/1/iframe-player.html?playlistId=cl-bgqaata6aw8x&integrationId=19syy24xjn1oqlpc',
|
||||
'info_dict': {
|
||||
'id': 'cl-bgqaata6aw8x',
|
||||
},
|
||||
'playlist_mincount': 2,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def build_player_url(cls, video_id, integration, origin_url=None):
|
||||
query_string = urllib.parse.urlencode({
|
||||
'playlistId': video_id,
|
||||
'integrationId': integration,
|
||||
})
|
||||
return cls._smuggle_origin_url(f'https:{cls._BASE_PLAYER_URL}?{query_string}', origin_url)
|
||||
|
||||
@classmethod
|
||||
def _extract_urls(cls, webpage, origin_url):
|
||||
# https://docs.glomex.com/publisher/video-player-integration/javascript-api/
|
||||
quot_re = r'["\']'
|
||||
|
||||
regex = fr'''(?x)
|
||||
<iframe[^>]+?src=(?P<q>{quot_re})(?P<url>
|
||||
(?:https?:)?{cls._BASE_PLAYER_URL_RE}\?(?:(?!(?P=q)).)+
|
||||
)(?P=q)'''
|
||||
for mobj in re.finditer(regex, webpage):
|
||||
url = unescapeHTML(mobj.group('url'))
|
||||
if cls.suitable(url):
|
||||
yield cls._smuggle_origin_url(url, origin_url)
|
||||
|
||||
regex = fr'''(?x)
|
||||
<glomex-player [^>]+?>|
|
||||
<div[^>]* data-glomex-player=(?P<q>{quot_re})true(?P=q)[^>]*>'''
|
||||
for mobj in re.finditer(regex, webpage):
|
||||
attrs = extract_attributes(mobj.group(0))
|
||||
if attrs.get('data-integration-id') and attrs.get('data-playlist-id'):
|
||||
yield cls.build_player_url(attrs['data-playlist-id'], attrs['data-integration-id'], origin_url)
|
||||
|
||||
# naive parsing of inline scripts for hard-coded integration parameters
|
||||
regex = fr'''(?x)
|
||||
(?P<is_js>dataset\.)?%s\s*(?(is_js)=|:)\s*
|
||||
(?P<q>{quot_re})(?P<id>(?:(?!(?P=q)).)+)(?P=q)\s'''
|
||||
for mobj in re.finditer(r'(?x)<script[^<]*>.+?</script>', webpage):
|
||||
script = mobj.group(0)
|
||||
integration_id = re.search(regex % 'integrationId', script)
|
||||
if not integration_id:
|
||||
continue
|
||||
playlist_id = re.search(regex % 'playlistId', script)
|
||||
if playlist_id:
|
||||
yield cls.build_player_url(playlist_id, integration_id, origin_url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, origin_url = self._unsmuggle_origin_url(url)
|
||||
playlist_id = self._match_id(url)
|
||||
integration = parse_qs(url).get('integrationId', [None])[0]
|
||||
if not integration:
|
||||
raise ExtractorError('No integrationId in URL', expected=True)
|
||||
return self._download_and_extract_api_data(playlist_id, integration, origin_url)
|
||||
@@ -8,36 +8,33 @@ from .common import SearchInfoExtractor
|
||||
|
||||
class GoogleSearchIE(SearchInfoExtractor):
|
||||
IE_DESC = 'Google Video search'
|
||||
_MAX_RESULTS = 1000
|
||||
IE_NAME = 'video.google:search'
|
||||
_SEARCH_KEY = 'gvsearch'
|
||||
_WORKING = False
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'gvsearch15:python language',
|
||||
'info_dict': {
|
||||
'id': 'python language',
|
||||
'title': 'python language',
|
||||
},
|
||||
'playlist_count': 15,
|
||||
}
|
||||
}]
|
||||
_PAGE_SIZE = 100
|
||||
|
||||
def _search_results(self, query):
|
||||
for pagenum in itertools.count():
|
||||
webpage = self._download_webpage(
|
||||
'http://www.google.com/search',
|
||||
'gvsearch:' + query,
|
||||
note='Downloading result page %s' % (pagenum + 1),
|
||||
'http://www.google.com/search', f'gvsearch:{query}',
|
||||
note=f'Downloading result page {pagenum + 1}',
|
||||
query={
|
||||
'tbm': 'vid',
|
||||
'q': query,
|
||||
'start': pagenum * 10,
|
||||
'start': pagenum * self._PAGE_SIZE,
|
||||
'num': self._PAGE_SIZE,
|
||||
'hl': 'en',
|
||||
})
|
||||
|
||||
for hit_idx, mobj in enumerate(re.finditer(
|
||||
r'<h3 class="r"><a href="([^"]+)"', webpage)):
|
||||
if re.search(f'id="vidthumb{hit_idx + 1}"', webpage):
|
||||
yield self.url_result(mobj.group(1))
|
||||
for url in re.findall(r'<div[^>]* class="dXiKIc"[^>]*><a href="([^"]+)"', webpage):
|
||||
yield self.url_result(url)
|
||||
|
||||
if not re.search(r'id="pnnext"', webpage):
|
||||
return
|
||||
|
||||
@@ -203,6 +203,9 @@ class HotStarIE(HotStarBaseIE):
|
||||
format_url = re.sub(
|
||||
r'(?<=//staragvod)(\d)', r'web\1', format_url)
|
||||
tags = str_or_none(playback_set.get('tagsCombination')) or ''
|
||||
ingored_res, ignored_vcodec, ignored_dr = self._configuration_arg('res'), self._configuration_arg('vcodec'), self._configuration_arg('dr')
|
||||
if any(f'resolution:{ig_res}' in tags for ig_res in ingored_res) or any(f'video_codec:{ig_vc}' in tags for ig_vc in ignored_vcodec) or any(f'dynamic_range:{ig_dr}' in tags for ig_dr in ignored_dr):
|
||||
continue
|
||||
ext = determine_ext(format_url)
|
||||
current_formats, current_subs = [], {}
|
||||
try:
|
||||
|
||||
@@ -26,13 +26,7 @@ class HRFernsehenIE(InfoExtractor):
|
||||
}]},
|
||||
'timestamp': 1598470200,
|
||||
'upload_date': '20200826',
|
||||
'thumbnails': [{
|
||||
'url': 'https://www.hessenschau.de/tv-sendung/hs_ganz-1554~_t-1598465545029_v-16to9.jpg',
|
||||
'id': '0'
|
||||
}, {
|
||||
'url': 'https://www.hessenschau.de/tv-sendung/hs_ganz-1554~_t-1598465545029_v-16to9__medium.jpg',
|
||||
'id': '1'
|
||||
}],
|
||||
'thumbnail': 'https://www.hessenschau.de/tv-sendung/hs_ganz-1554~_t-1598465545029_v-16to9__medium.jpg',
|
||||
'title': 'hessenschau vom 26.08.2020'
|
||||
}
|
||||
}, {
|
||||
@@ -81,7 +75,7 @@ class HRFernsehenIE(InfoExtractor):
|
||||
description = self._html_search_meta(
|
||||
['description'], webpage)
|
||||
|
||||
loader_str = unescapeHTML(self._search_regex(r"data-hr-mediaplayer-loader='([^']*)'", webpage, "ardloader"))
|
||||
loader_str = unescapeHTML(self._search_regex(r"data-new-hr-mediaplayer-loader='([^']*)'", webpage, "ardloader"))
|
||||
loader_data = json.loads(loader_str)
|
||||
|
||||
info = {
|
||||
|
||||
@@ -64,10 +64,7 @@ class ImgGamingBaseIE(InfoExtractor):
|
||||
domain, media_type, media_id, playlist_id = self._match_valid_url(url).groups()
|
||||
|
||||
if playlist_id:
|
||||
if self.get_param('noplaylist'):
|
||||
self.to_screen('Downloading just video %s because of --no-playlist' % media_id)
|
||||
else:
|
||||
self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % playlist_id)
|
||||
if self._yes_playlist(playlist_id, media_id):
|
||||
media_type, media_id = 'playlist', playlist_id
|
||||
|
||||
if media_type == 'playlist':
|
||||
|
||||
@@ -12,11 +12,13 @@ from ..compat import (
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
format_field,
|
||||
float_or_none,
|
||||
get_element_by_attribute,
|
||||
int_or_none,
|
||||
lowercase_escape,
|
||||
std_headers,
|
||||
str_or_none,
|
||||
str_to_int,
|
||||
traverse_obj,
|
||||
url_or_none,
|
||||
@@ -126,6 +128,74 @@ class InstagramBaseIE(InfoExtractor):
|
||||
'like_count': self._get_count(node, 'likes', 'preview_like'),
|
||||
}
|
||||
|
||||
def _extract_product_media(self, product_media):
|
||||
media_id = product_media.get('code') or product_media.get('id')
|
||||
vcodec = product_media.get('video_codec')
|
||||
dash_manifest_raw = product_media.get('video_dash_manifest')
|
||||
videos_list = product_media.get('video_versions')
|
||||
if not (dash_manifest_raw or videos_list):
|
||||
return {}
|
||||
|
||||
formats = [{
|
||||
'format_id': format.get('id'),
|
||||
'url': format.get('url'),
|
||||
'width': format.get('width'),
|
||||
'height': format.get('height'),
|
||||
'vcodec': vcodec,
|
||||
} for format in videos_list or []]
|
||||
if dash_manifest_raw:
|
||||
formats.extend(self._parse_mpd_formats(self._parse_xml(dash_manifest_raw, media_id), mpd_id='dash'))
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnails = [{
|
||||
'url': thumbnail.get('url'),
|
||||
'width': thumbnail.get('width'),
|
||||
'height': thumbnail.get('height')
|
||||
} for thumbnail in traverse_obj(product_media, ('image_versions2', 'candidates')) or []]
|
||||
return {
|
||||
'id': media_id,
|
||||
'duration': float_or_none(product_media.get('video_duration')),
|
||||
'formats': formats,
|
||||
'thumbnails': thumbnails
|
||||
}
|
||||
|
||||
def _extract_product(self, product_info):
|
||||
if isinstance(product_info, list):
|
||||
product_info = product_info[0]
|
||||
|
||||
user_info = product_info.get('user') or {}
|
||||
info_dict = {
|
||||
'id': product_info.get('code') or product_info.get('id'),
|
||||
'title': product_info.get('title') or f'Video by {user_info.get("username")}',
|
||||
'description': traverse_obj(product_info, ('caption', 'text'), expected_type=str_or_none),
|
||||
'timestamp': int_or_none(product_info.get('taken_at')),
|
||||
'channel': user_info.get('username'),
|
||||
'uploader': user_info.get('full_name'),
|
||||
'uploader_id': str_or_none(user_info.get('pk')),
|
||||
'view_count': int_or_none(product_info.get('view_count')),
|
||||
'like_count': int_or_none(product_info.get('like_count')),
|
||||
'comment_count': int_or_none(product_info.get('comment_count')),
|
||||
'http_headers': {
|
||||
'Referer': 'https://www.instagram.com/',
|
||||
}
|
||||
}
|
||||
carousel_media = product_info.get('carousel_media')
|
||||
if carousel_media:
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
**info_dict,
|
||||
'title': f'Post by {user_info.get("username")}',
|
||||
'entries': [{
|
||||
**info_dict,
|
||||
**self._extract_product_media(product_media),
|
||||
} for product_media in carousel_media],
|
||||
}
|
||||
|
||||
return {
|
||||
**info_dict,
|
||||
**self._extract_product_media(product_info)
|
||||
}
|
||||
|
||||
|
||||
class InstagramIOSIE(InfoExtractor):
|
||||
IE_DESC = 'IOS instagram:// URL'
|
||||
@@ -184,8 +254,9 @@ class InstagramIE(InstagramBaseIE):
|
||||
'duration': 0,
|
||||
'timestamp': 1371748545,
|
||||
'upload_date': '20130620',
|
||||
'uploader_id': 'naomipq',
|
||||
'uploader_id': '2815873',
|
||||
'uploader': 'B E A U T Y F O R A S H E S',
|
||||
'channel': 'naomipq',
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'comments': list,
|
||||
@@ -201,8 +272,9 @@ class InstagramIE(InstagramBaseIE):
|
||||
'duration': 0,
|
||||
'timestamp': 1453760977,
|
||||
'upload_date': '20160125',
|
||||
'uploader_id': 'britneyspears',
|
||||
'uploader_id': '12246775',
|
||||
'uploader': 'Britney Spears',
|
||||
'channel': 'britneyspears',
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'comments': list,
|
||||
@@ -248,8 +320,9 @@ class InstagramIE(InstagramBaseIE):
|
||||
'duration': 53.83,
|
||||
'timestamp': 1530032919,
|
||||
'upload_date': '20180626',
|
||||
'uploader_id': 'instagram',
|
||||
'uploader_id': '25025320',
|
||||
'uploader': 'Instagram',
|
||||
'channel': 'instagram',
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'comments': list,
|
||||
@@ -315,16 +388,19 @@ class InstagramIE(InstagramBaseIE):
|
||||
if not media:
|
||||
additional_data = self._parse_json(
|
||||
self._search_regex(
|
||||
r'window\.__additionalDataLoaded\s*\(\s*[^,]+,\s*({.+?})\s*\)\s*;',
|
||||
r'window\.__additionalDataLoaded\s*\(\s*[^,]+,\s*({.+?})\s*\);',
|
||||
webpage, 'additional data', default='{}'),
|
||||
video_id, fatal=False)
|
||||
product_item = traverse_obj(additional_data, ('items', 0), expected_type=dict)
|
||||
if product_item:
|
||||
return self._extract_product(product_item)
|
||||
media = traverse_obj(additional_data, ('graphql', 'shortcode_media'), 'shortcode_media', expected_type=dict) or {}
|
||||
|
||||
if not media and 'www.instagram.com/accounts/login' in urlh.geturl():
|
||||
self.raise_login_required('You need to log in to access this content')
|
||||
|
||||
uploader_id = traverse_obj(media, ('owner', 'username')) or self._search_regex(
|
||||
r'"owner"\s*:\s*{\s*"username"\s*:\s*"(.+?)"', webpage, 'uploader id', fatal=False)
|
||||
username = traverse_obj(media, ('owner', 'username')) or self._search_regex(
|
||||
r'"owner"\s*:\s*{\s*"username"\s*:\s*"(.+?)"', webpage, 'username', fatal=False)
|
||||
|
||||
description = (
|
||||
traverse_obj(media, ('edge_media_to_caption', 'edges', 0, 'node', 'text'), expected_type=str)
|
||||
@@ -341,7 +417,7 @@ class InstagramIE(InstagramBaseIE):
|
||||
if nodes:
|
||||
return self.playlist_result(
|
||||
self._extract_nodes(nodes, True), video_id,
|
||||
'Post by %s' % uploader_id if uploader_id else None, description)
|
||||
format_field(username, template='Post by %s'), description)
|
||||
|
||||
video_url = self._og_search_video_url(webpage, secure=False)
|
||||
|
||||
@@ -377,12 +453,13 @@ class InstagramIE(InstagramBaseIE):
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': media.get('title') or 'Video by %s' % uploader_id,
|
||||
'title': media.get('title') or 'Video by %s' % username,
|
||||
'description': description,
|
||||
'duration': float_or_none(media.get('video_duration')),
|
||||
'timestamp': traverse_obj(media, 'taken_at_timestamp', 'date', expected_type=int_or_none),
|
||||
'uploader_id': uploader_id,
|
||||
'uploader_id': traverse_obj(media, ('owner', 'id')),
|
||||
'uploader': traverse_obj(media, ('owner', 'full_name')),
|
||||
'channel': username,
|
||||
'like_count': self._get_count(media, 'likes', 'preview_like') or str_to_int(self._search_regex(
|
||||
r'data-log-event="likeCountClick"[^>]*>[^\d]*([\d,\.]+)', webpage, 'like count', fatal=False)),
|
||||
'comment_count': self._get_count(media, 'comments', 'preview_comment', 'to_comment', 'to_parent_comment'),
|
||||
@@ -542,3 +619,58 @@ class InstagramTagIE(InstagramPlaylistBaseIE):
|
||||
'tag_name':
|
||||
data['entry_data']['TagPage'][0]['graphql']['hashtag']['name']
|
||||
}
|
||||
|
||||
|
||||
class InstagramStoryIE(InstagramBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?instagram\.com/stories/(?P<user>[^/]+)/(?P<id>\d+)'
|
||||
IE_NAME = 'instagram:story'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.instagram.com/stories/highlights/18090946048123978/',
|
||||
'info_dict': {
|
||||
'id': '18090946048123978',
|
||||
'title': 'Rare',
|
||||
},
|
||||
'playlist_mincount': 50
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
username, story_id = self._match_valid_url(url).groups()
|
||||
|
||||
story_info_url = f'{username}/{story_id}/?__a=1' if username == 'highlights' else f'{username}/?__a=1'
|
||||
story_info = self._download_json(f'https://www.instagram.com/stories/{story_info_url}', story_id, headers={
|
||||
'X-IG-App-ID': 936619743392459,
|
||||
'X-ASBD-ID': 198387,
|
||||
'X-IG-WWW-Claim': 0,
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
'Referer': url,
|
||||
})
|
||||
user_id = story_info['user']['id']
|
||||
highlight_title = traverse_obj(story_info, ('highlight', 'title'))
|
||||
|
||||
story_info_url = user_id if username != 'highlights' else f'highlight:{story_id}'
|
||||
videos = self._download_json(f'https://i.instagram.com/api/v1/feed/reels_media/?reel_ids={story_info_url}', story_id, headers={
|
||||
'X-IG-App-ID': 936619743392459,
|
||||
'X-ASBD-ID': 198387,
|
||||
'X-IG-WWW-Claim': 0,
|
||||
})['reels']
|
||||
|
||||
full_name = traverse_obj(videos, ('user', 'full_name'))
|
||||
|
||||
user_info = {}
|
||||
if not (username and username != 'highlights' and full_name):
|
||||
user_info = self._download_json(
|
||||
f'https://i.instagram.com/api/v1/users/{user_id}/info/', story_id, headers={
|
||||
'User-Agent': 'Mozilla/5.0 (Linux; Android 11; SM-A505F Build/RP1A.200720.012; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/96.0.4664.45 Mobile Safari/537.36 Instagram 214.1.0.29.120 Android (30/11; 450dpi; 1080x2122; samsung; SM-A505F; a50; exynos9610; en_US; 333717274)',
|
||||
}, note='Downloading user info')
|
||||
|
||||
username = traverse_obj(user_info, ('user', 'username')) or username
|
||||
full_name = traverse_obj(user_info, ('user', 'full_name')) or full_name
|
||||
|
||||
highlights = traverse_obj(videos, (f'highlight:{story_id}', 'items'), (str(user_id), 'items'))
|
||||
return self.playlist_result([{
|
||||
**self._extract_product(highlight),
|
||||
'title': f'Story by {username}',
|
||||
'uploader': full_name,
|
||||
'uploader_id': user_id,
|
||||
} for highlight in highlights], playlist_id=story_id, playlist_title=highlight_title)
|
||||
|
||||
@@ -10,15 +10,29 @@ from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_parse_urlencode,
|
||||
compat_urllib_parse_unquote
|
||||
)
|
||||
from .openload import PhantomJSwrapper
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
decode_packed_codes,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
format_field,
|
||||
get_element_by_id,
|
||||
get_element_by_attribute,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
ohdave_rsa_encrypt,
|
||||
parse_age_limit,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
parse_resolution,
|
||||
qualities,
|
||||
remove_start,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
@@ -392,3 +406,359 @@ class IqiyiIE(InfoExtractor):
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class IqIE(InfoExtractor):
|
||||
IE_NAME = 'iq.com'
|
||||
IE_DESC = 'International version of iQiyi'
|
||||
_VALID_URL = r'https?://(?:www\.)?iq\.com/play/(?:[\w%-]*-)?(?P<id>\w+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.iq.com/play/one-piece-episode-1000-1ma1i6ferf4',
|
||||
'md5': '2d7caf6eeca8a32b407094b33b757d39',
|
||||
'info_dict': {
|
||||
'ext': 'mp4',
|
||||
'id': '1ma1i6ferf4',
|
||||
'title': '航海王 第1000集',
|
||||
'description': 'Subtitle available on Sunday 4PM(GMT+8).',
|
||||
'duration': 1430,
|
||||
'timestamp': 1637488203,
|
||||
'upload_date': '20211121',
|
||||
'episode_number': 1000,
|
||||
'episode': 'Episode 1000',
|
||||
'series': 'One Piece',
|
||||
'age_limit': 13,
|
||||
'average_rating': float,
|
||||
},
|
||||
'params': {
|
||||
'format': '500',
|
||||
},
|
||||
'expected_warnings': ['format is restricted']
|
||||
}, {
|
||||
# VIP-restricted video
|
||||
'url': 'https://www.iq.com/play/mermaid-in-the-fog-2021-gbdpx13bs4',
|
||||
'only_matching': True
|
||||
}]
|
||||
_BID_TAGS = {
|
||||
'100': '240P',
|
||||
'200': '360P',
|
||||
'300': '480P',
|
||||
'500': '720P',
|
||||
'600': '1080P',
|
||||
'610': '1080P50',
|
||||
'700': '2K',
|
||||
'800': '4K',
|
||||
}
|
||||
_LID_TAGS = {
|
||||
'1': 'zh_CN',
|
||||
'2': 'zh_TW',
|
||||
'3': 'en',
|
||||
'18': 'th',
|
||||
'21': 'my',
|
||||
'23': 'vi',
|
||||
'24': 'id',
|
||||
'26': 'es',
|
||||
'28': 'ar',
|
||||
}
|
||||
|
||||
_DASH_JS = '''
|
||||
console.log(page.evaluate(function() {
|
||||
var tvid = "%(tvid)s"; var vid = "%(vid)s"; var src = "%(src)s";
|
||||
var uid = "%(uid)s"; var dfp = "%(dfp)s"; var mode = "%(mode)s"; var lang = "%(lang)s";
|
||||
var bid_list = %(bid_list)s; var ut_list = %(ut_list)s; var tm = new Date().getTime();
|
||||
var cmd5x_func = %(cmd5x_func)s; var cmd5x_exporter = {}; cmd5x_func({}, cmd5x_exporter, {}); var cmd5x = cmd5x_exporter.cmd5x;
|
||||
var authKey = cmd5x(cmd5x('') + tm + '' + tvid);
|
||||
var k_uid = Array.apply(null, Array(32)).map(function() {return Math.floor(Math.random() * 15).toString(16)}).join('');
|
||||
var dash_paths = {};
|
||||
bid_list.forEach(function(bid) {
|
||||
var query = {
|
||||
'tvid': tvid,
|
||||
'bid': bid,
|
||||
'ds': 1,
|
||||
'vid': vid,
|
||||
'src': src,
|
||||
'vt': 0,
|
||||
'rs': 1,
|
||||
'uid': uid,
|
||||
'ori': 'pcw',
|
||||
'ps': 1,
|
||||
'k_uid': k_uid,
|
||||
'pt': 0,
|
||||
'd': 0,
|
||||
's': '',
|
||||
'lid': '',
|
||||
'slid': 0,
|
||||
'cf': '',
|
||||
'ct': '',
|
||||
'authKey': authKey,
|
||||
'k_tag': 1,
|
||||
'ost': 0,
|
||||
'ppt': 0,
|
||||
'dfp': dfp,
|
||||
'prio': JSON.stringify({
|
||||
'ff': 'f4v',
|
||||
'code': 2
|
||||
}),
|
||||
'k_err_retries': 0,
|
||||
'up': '',
|
||||
'su': 2,
|
||||
'applang': lang,
|
||||
'sver': 2,
|
||||
'X-USER-MODE': mode,
|
||||
'qd_v': 2,
|
||||
'tm': tm,
|
||||
'qdy': 'a',
|
||||
'qds': 0,
|
||||
'k_ft1': 141287244169348,
|
||||
'k_ft4': 34359746564,
|
||||
'k_ft5': 1,
|
||||
'bop': JSON.stringify({
|
||||
'version': '10.0',
|
||||
'dfp': dfp
|
||||
}),
|
||||
};
|
||||
var enc_params = [];
|
||||
for (var prop in query) {
|
||||
enc_params.push(encodeURIComponent(prop) + '=' + encodeURIComponent(query[prop]));
|
||||
}
|
||||
ut_list.forEach(function(ut) {
|
||||
enc_params.push('ut=' + ut);
|
||||
})
|
||||
var dash_path = '/dash?' + enc_params.join('&'); dash_path += '&vf=' + cmd5x(dash_path);
|
||||
dash_paths[bid] = dash_path;
|
||||
});
|
||||
return JSON.stringify(dash_paths);
|
||||
}));
|
||||
saveAndExit();
|
||||
'''
|
||||
|
||||
def _extract_vms_player_js(self, webpage, video_id):
|
||||
player_js_cache = self._downloader.cache.load('iq', 'player_js')
|
||||
if player_js_cache:
|
||||
return player_js_cache
|
||||
webpack_js_url = self._proto_relative_url(self._search_regex(
|
||||
r'<script src="((?:https?)?//stc.iqiyipic.com/_next/static/chunks/webpack-\w+\.js)"', webpage, 'webpack URL'))
|
||||
webpack_js = self._download_webpage(webpack_js_url, video_id, note='Downloading webpack JS', errnote='Unable to download webpack JS')
|
||||
webpack_map1, webpack_map2 = [self._parse_json(js_map, video_id, transform_source=js_to_json) for js_map in self._search_regex(
|
||||
r'\(({[^}]*})\[\w+\][^\)]*\)\s*\+\s*["\']\.["\']\s*\+\s*({[^}]*})\[\w+\]\+["\']\.js', webpack_js, 'JS locations', group=(1, 2))]
|
||||
for module_index in reversed(list(webpack_map2.keys())):
|
||||
module_js = self._download_webpage(
|
||||
f'https://stc.iqiyipic.com/_next/static/chunks/{webpack_map1.get(module_index, module_index)}.{webpack_map2[module_index]}.js',
|
||||
video_id, note=f'Downloading #{module_index} module JS', errnote='Unable to download module JS', fatal=False) or ''
|
||||
if 'vms request' in module_js:
|
||||
self._downloader.cache.store('iq', 'player_js', module_js)
|
||||
return module_js
|
||||
raise ExtractorError('Unable to extract player JS')
|
||||
|
||||
def _extract_cmd5x_function(self, webpage, video_id):
|
||||
return self._search_regex(r',\s*(function\s*\([^\)]*\)\s*{\s*var _qda.+_qdc\(\)\s*})\s*,',
|
||||
self._extract_vms_player_js(webpage, video_id), 'signature function')
|
||||
|
||||
def _update_bid_tags(self, webpage, video_id):
|
||||
extracted_bid_tags = self._parse_json(
|
||||
self._search_regex(
|
||||
r'arguments\[1\][^,]*,\s*function\s*\([^\)]*\)\s*{\s*"use strict";?\s*var \w=({.+}})\s*,\s*\w\s*=\s*{\s*getNewVd',
|
||||
self._extract_vms_player_js(webpage, video_id), 'video tags', default=''),
|
||||
video_id, transform_source=js_to_json, fatal=False)
|
||||
if not extracted_bid_tags:
|
||||
return
|
||||
self._BID_TAGS = {
|
||||
bid: traverse_obj(extracted_bid_tags, (bid, 'value'), expected_type=str, default=self._BID_TAGS.get(bid))
|
||||
for bid in extracted_bid_tags.keys()
|
||||
}
|
||||
|
||||
def _get_cookie(self, name, default=None):
|
||||
cookie = self._get_cookies('https://iq.com/').get(name)
|
||||
return cookie.value if cookie else default
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
self._update_bid_tags(webpage, video_id)
|
||||
|
||||
next_props = self._search_nextjs_data(webpage, video_id)['props']
|
||||
page_data = next_props['initialState']['play']
|
||||
video_info = page_data['curVideoInfo']
|
||||
|
||||
uid = traverse_obj(
|
||||
self._parse_json(
|
||||
self._get_cookie('I00002', '{}'), video_id, transform_source=compat_urllib_parse_unquote, fatal=False),
|
||||
('data', 'uid'), default=0)
|
||||
|
||||
if uid:
|
||||
vip_data = self._download_json(
|
||||
'https://pcw-api.iq.com/api/vtype', video_id, note='Downloading VIP data', errnote='Unable to download VIP data', query={
|
||||
'batch': 1,
|
||||
'platformId': 3,
|
||||
'modeCode': self._get_cookie('mod', 'intl'),
|
||||
'langCode': self._get_cookie('lang', 'en_us'),
|
||||
'deviceId': self._get_cookie('QC005', '')
|
||||
}, fatal=False)
|
||||
ut_list = traverse_obj(vip_data, ('data', 'all_vip', ..., 'vipType'), expected_type=str_or_none, default=[])
|
||||
else:
|
||||
ut_list = ['0']
|
||||
|
||||
# bid 0 as an initial format checker
|
||||
dash_paths = self._parse_json(PhantomJSwrapper(self).get(
|
||||
url, html='<!DOCTYPE html>', video_id=video_id, note2='Executing signature code', jscode=self._DASH_JS % {
|
||||
'tvid': video_info['tvId'],
|
||||
'vid': video_info['vid'],
|
||||
'src': traverse_obj(next_props, ('initialProps', 'pageProps', 'ptid'),
|
||||
expected_type=str, default='04022001010011000000'),
|
||||
'uid': uid,
|
||||
'dfp': self._get_cookie('dfp', ''),
|
||||
'mode': self._get_cookie('mod', 'intl'),
|
||||
'lang': self._get_cookie('lang', 'en_us'),
|
||||
'bid_list': '[' + ','.join(['0', *self._BID_TAGS.keys()]) + ']',
|
||||
'ut_list': '[' + ','.join(ut_list) + ']',
|
||||
'cmd5x_func': self._extract_cmd5x_function(webpage, video_id),
|
||||
})[1].strip(), video_id)
|
||||
|
||||
formats, subtitles = [], {}
|
||||
initial_format_data = self._download_json(
|
||||
urljoin('https://cache-video.iq.com', dash_paths['0']), video_id,
|
||||
note='Downloading initial video format info', errnote='Unable to download initial video format info')['data']
|
||||
|
||||
preview_time = traverse_obj(
|
||||
initial_format_data, ('boss_ts', (None, 'data'), ('previewTime', 'rtime')), expected_type=float_or_none, get_all=False)
|
||||
if traverse_obj(initial_format_data, ('boss_ts', 'data', 'prv'), expected_type=int_or_none):
|
||||
self.report_warning('This preview video is limited%s' % format_field(preview_time, template='to %s seconds'))
|
||||
|
||||
# TODO: Extract audio-only formats
|
||||
for bid in set(traverse_obj(initial_format_data, ('program', 'video', ..., 'bid'), expected_type=str_or_none, default=[])):
|
||||
dash_path = dash_paths.get(bid)
|
||||
if not dash_path:
|
||||
self.report_warning(f'Unknown format id: {bid}. It is currently not being extracted')
|
||||
continue
|
||||
format_data = traverse_obj(self._download_json(
|
||||
urljoin('https://cache-video.iq.com', dash_path), video_id,
|
||||
note=f'Downloading format data for {self._BID_TAGS[bid]}', errnote='Unable to download format data',
|
||||
fatal=False), 'data', expected_type=dict)
|
||||
|
||||
video_format = next((video_format for video_format in traverse_obj(
|
||||
format_data, ('program', 'video', ...), expected_type=dict, default=[]) if str(video_format['bid']) == bid), {})
|
||||
extracted_formats = []
|
||||
if video_format.get('m3u8Url'):
|
||||
extracted_formats.extend(self._extract_m3u8_formats(
|
||||
urljoin(format_data.get('dm3u8', 'https://cache-m.iq.com/dc/dt/'), video_format['m3u8Url']),
|
||||
'mp4', m3u8_id=bid, fatal=False))
|
||||
if video_format.get('mpdUrl'):
|
||||
# TODO: Properly extract mpd hostname
|
||||
extracted_formats.extend(self._extract_mpd_formats(
|
||||
urljoin(format_data.get('dm3u8', 'https://cache-m.iq.com/dc/dt/'), video_format['mpdUrl']),
|
||||
mpd_id=bid, fatal=False))
|
||||
if video_format.get('m3u8'):
|
||||
ff = video_format.get('ff', 'ts')
|
||||
if ff == 'ts':
|
||||
m3u8_formats, _ = self._parse_m3u8_formats_and_subtitles(
|
||||
video_format['m3u8'], ext='mp4', m3u8_id=bid, fatal=False)
|
||||
extracted_formats.extend(m3u8_formats)
|
||||
elif ff == 'm4s':
|
||||
mpd_data = traverse_obj(
|
||||
self._parse_json(video_format['m3u8'], video_id, fatal=False), ('payload', ..., 'data'), expected_type=str)
|
||||
if not mpd_data:
|
||||
continue
|
||||
mpd_formats, _ = self._parse_mpd_formats_and_subtitles(
|
||||
mpd_data, bid, format_data.get('dm3u8', 'https://cache-m.iq.com/dc/dt/'))
|
||||
extracted_formats.extend(mpd_formats)
|
||||
else:
|
||||
self.report_warning(f'{ff} formats are currently not supported')
|
||||
|
||||
if not extracted_formats:
|
||||
if video_format.get('s'):
|
||||
self.report_warning(f'{self._BID_TAGS[bid]} format is restricted')
|
||||
else:
|
||||
self.report_warning(f'Unable to extract {self._BID_TAGS[bid]} format')
|
||||
for f in extracted_formats:
|
||||
f.update({
|
||||
'quality': qualities(list(self._BID_TAGS.keys()))(bid),
|
||||
'format_note': self._BID_TAGS[bid],
|
||||
**parse_resolution(video_format.get('scrsz'))
|
||||
})
|
||||
formats.extend(extracted_formats)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
for sub_format in traverse_obj(initial_format_data, ('program', 'stl', ...), expected_type=dict, default=[]):
|
||||
lang = self._LID_TAGS.get(str_or_none(sub_format.get('lid')), sub_format.get('_name'))
|
||||
subtitles.setdefault(lang, []).extend([{
|
||||
'ext': format_ext,
|
||||
'url': urljoin(initial_format_data.get('dstl', 'http://meta.video.iqiyi.com'), sub_format[format_key])
|
||||
} for format_key, format_ext in [('srt', 'srt'), ('webvtt', 'vtt')] if sub_format.get(format_key)])
|
||||
|
||||
extra_metadata = page_data.get('albumInfo') if video_info.get('albumId') and page_data.get('albumInfo') else video_info
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_info['name'],
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'description': video_info.get('mergeDesc'),
|
||||
'duration': parse_duration(video_info.get('len')),
|
||||
'age_limit': parse_age_limit(video_info.get('rating')),
|
||||
'average_rating': traverse_obj(page_data, ('playScoreInfo', 'score'), expected_type=float_or_none),
|
||||
'timestamp': parse_iso8601(video_info.get('isoUploadDate')),
|
||||
'categories': traverse_obj(extra_metadata, ('videoTagMap', ..., ..., 'name'), expected_type=str),
|
||||
'cast': traverse_obj(extra_metadata, ('actorArr', ..., 'name'), expected_type=str),
|
||||
'episode_number': int_or_none(video_info.get('order')) or None,
|
||||
'series': video_info.get('albumName'),
|
||||
}
|
||||
|
||||
|
||||
class IqAlbumIE(InfoExtractor):
|
||||
IE_NAME = 'iq.com:album'
|
||||
_VALID_URL = r'https?://(?:www\.)?iq\.com/album/(?:[\w%-]*-)?(?P<id>\w+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.iq.com/album/one-piece-1999-1bk9icvr331',
|
||||
'info_dict': {
|
||||
'id': '1bk9icvr331',
|
||||
'title': 'One Piece',
|
||||
'description': 'Subtitle available on Sunday 4PM(GMT+8).'
|
||||
},
|
||||
'playlist_mincount': 238
|
||||
}, {
|
||||
# Movie/single video
|
||||
'url': 'https://www.iq.com/album/九龙城寨-2021-22yjnij099k',
|
||||
'info_dict': {
|
||||
'ext': 'mp4',
|
||||
'id': '22yjnij099k',
|
||||
'title': '九龙城寨',
|
||||
'description': 'md5:8a09f50b8ba0db4dc69bc7c844228044',
|
||||
'duration': 5000,
|
||||
'timestamp': 1641911371,
|
||||
'upload_date': '20220111',
|
||||
'series': '九龙城寨',
|
||||
'cast': ['Shi Yan Neng', 'Yu Lang', 'Peter lv', 'Sun Zi Jun', 'Yang Xiao Bo'],
|
||||
'age_limit': 13,
|
||||
'average_rating': float,
|
||||
},
|
||||
'expected_warnings': ['format is restricted']
|
||||
}]
|
||||
|
||||
def _entries(self, album_id_num, page_ranges, album_id=None, mode_code='intl', lang_code='en_us'):
|
||||
for page_range in page_ranges:
|
||||
page = self._download_json(
|
||||
f'https://pcw-api.iq.com/api/episodeListSource/{album_id_num}', album_id,
|
||||
note=f'Downloading video list episodes {page_range.get("msg", "")}',
|
||||
errnote='Unable to download video list', query={
|
||||
'platformId': 3,
|
||||
'modeCode': mode_code,
|
||||
'langCode': lang_code,
|
||||
'endOrder': page_range['to'],
|
||||
'startOrder': page_range['from']
|
||||
})
|
||||
for video in page['data']['epg']:
|
||||
yield self.url_result('https://www.iq.com/play/%s' % (video.get('playLocSuffix') or video['qipuIdStr']),
|
||||
IqIE.ie_key(), video.get('qipuIdStr'), video.get('name'))
|
||||
|
||||
def _real_extract(self, url):
|
||||
album_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, album_id)
|
||||
next_data = self._search_nextjs_data(webpage, album_id)
|
||||
album_data = next_data['props']['initialState']['album']['videoAlbumInfo']
|
||||
|
||||
if album_data.get('videoType') == 'singleVideo':
|
||||
return self.url_result('https://www.iq.com/play/%s' % album_id, IqIE.ie_key())
|
||||
return self.playlist_result(
|
||||
self._entries(album_data['albumId'], album_data['totalPageRange'], album_id,
|
||||
traverse_obj(next_data, ('props', 'initialProps', 'pageProps', 'modeCode')),
|
||||
traverse_obj(next_data, ('props', 'initialProps', 'pageProps', 'langCode'))),
|
||||
album_id, album_data.get('name'), album_data.get('desc'))
|
||||
|
||||
@@ -243,8 +243,8 @@ class ITVBTCCIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
json_map = try_get(self._parse_json(self._html_search_regex(
|
||||
'(?s)<script[^>]+id=[\'"]__NEXT_DATA__[^>]*>([^<]+)</script>', webpage, 'json_map'), playlist_id),
|
||||
json_map = try_get(
|
||||
self._search_nextjs_data(webpage, playlist_id),
|
||||
lambda x: x['props']['pageProps']['article']['body']['content']) or []
|
||||
|
||||
entries = []
|
||||
|
||||
@@ -6,6 +6,7 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
format_field,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
try_get,
|
||||
@@ -72,7 +73,7 @@ class JojIE(InfoExtractor):
|
||||
r'(\d+)[pP]\.', format_url, 'height', default=None)
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'format_id': '%sp' % height if height else None,
|
||||
'format_id': format_field(height, template='%sp'),
|
||||
'height': int(height),
|
||||
})
|
||||
if not formats:
|
||||
|
||||
@@ -3,10 +3,12 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..compat import compat_HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
strip_or_none,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
unified_timestamp,
|
||||
)
|
||||
@@ -24,10 +26,17 @@ class KakaoIE(InfoExtractor):
|
||||
'id': '301965083',
|
||||
'ext': 'mp4',
|
||||
'title': '乃木坂46 バナナマン 「3期生紹介コーナーが始動!顔高低差GPも!」 『乃木坂工事中』',
|
||||
'uploader_id': 2671005,
|
||||
'description': '',
|
||||
'uploader_id': '2671005',
|
||||
'uploader': '그랑그랑이',
|
||||
'timestamp': 1488160199,
|
||||
'upload_date': '20170227',
|
||||
'like_count': int,
|
||||
'thumbnail': r're:http://.+/thumb\.png',
|
||||
'tags': ['乃木坂'],
|
||||
'view_count': int,
|
||||
'duration': 1503,
|
||||
'comment_count': int,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://tv.kakao.com/channel/2653210/cliplink/300103180',
|
||||
@@ -37,11 +46,21 @@ class KakaoIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'description': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)\r\n\r\n[쇼! 음악중심] 20160611, 507회',
|
||||
'title': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)',
|
||||
'uploader_id': 2653210,
|
||||
'uploader_id': '2653210',
|
||||
'uploader': '쇼! 음악중심',
|
||||
'timestamp': 1485684628,
|
||||
'upload_date': '20170129',
|
||||
'like_count': int,
|
||||
'thumbnail': r're:http://.+/thumb\.png',
|
||||
'tags': 'count:28',
|
||||
'view_count': int,
|
||||
'duration': 184,
|
||||
'comment_count': int,
|
||||
}
|
||||
}, {
|
||||
# geo restricted
|
||||
'url': 'https://tv.kakao.com/channel/3643855/cliplink/412069491',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -73,19 +92,24 @@ class KakaoIE(InfoExtractor):
|
||||
title = clip.get('title') or clip_link.get('displayTitle')
|
||||
|
||||
formats = []
|
||||
for fmt in clip.get('videoOutputList', []):
|
||||
for fmt in clip.get('videoOutputList') or []:
|
||||
profile_name = fmt.get('profile')
|
||||
if not profile_name or profile_name == 'AUDIO':
|
||||
continue
|
||||
query.update({
|
||||
'profile': profile_name,
|
||||
'fields': '-*,url',
|
||||
'fields': '-*,code,message,url',
|
||||
})
|
||||
try:
|
||||
fmt_url_json = self._download_json(
|
||||
cdn_api_base, video_id, query=query,
|
||||
note='Downloading video URL for profile %s' % profile_name)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||
resp = self._parse_json(e.cause.read().decode(), video_id)
|
||||
if resp.get('code') == 'GeoBlocked':
|
||||
self.raise_geo_restricted()
|
||||
|
||||
fmt_url_json = self._download_json(
|
||||
cdn_api_base, video_id,
|
||||
'Downloading video URL for profile %s' % profile_name,
|
||||
query=query, fatal=False)
|
||||
fmt_url = traverse_obj(fmt_url_json, ('videoLocation', 'url'))
|
||||
if not fmt_url:
|
||||
continue
|
||||
@@ -105,7 +129,7 @@ class KakaoIE(InfoExtractor):
|
||||
for thumb in clip.get('clipChapterThumbnailList') or []:
|
||||
thumbs.append({
|
||||
'url': thumb.get('thumbnailUrl'),
|
||||
'id': compat_str(thumb.get('timeInSec')),
|
||||
'id': str(thumb.get('timeInSec')),
|
||||
'preference': -1 if thumb.get('isDefault') else 0
|
||||
})
|
||||
top_thumbnail = clip.get('thumbnailUrl')
|
||||
@@ -120,7 +144,7 @@ class KakaoIE(InfoExtractor):
|
||||
'title': title,
|
||||
'description': strip_or_none(clip.get('description')),
|
||||
'uploader': traverse_obj(clip_link, ('channel', 'name')),
|
||||
'uploader_id': clip_link.get('channelId'),
|
||||
'uploader_id': str_or_none(clip_link.get('channelId')),
|
||||
'thumbnails': thumbs,
|
||||
'timestamp': unified_timestamp(clip_link.get('createTime')),
|
||||
'duration': int_or_none(clip.get('duration')),
|
||||
|
||||
@@ -12,6 +12,7 @@ from ..compat import (
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
ExtractorError,
|
||||
format_field,
|
||||
int_or_none,
|
||||
unsmuggle_url,
|
||||
smuggle_url,
|
||||
@@ -372,6 +373,6 @@ class KalturaIE(InfoExtractor):
|
||||
'thumbnail': info.get('thumbnailUrl'),
|
||||
'duration': info.get('duration'),
|
||||
'timestamp': info.get('createdAt'),
|
||||
'uploader_id': info.get('userId') if info.get('userId') != 'None' else None,
|
||||
'uploader_id': format_field(info, 'userId', ignore=('None', None)),
|
||||
'view_count': info.get('plays'),
|
||||
}
|
||||
|
||||
@@ -8,6 +8,7 @@ from ..compat import compat_urllib_parse_unquote
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
format_field,
|
||||
int_or_none,
|
||||
str_to_int,
|
||||
strip_or_none,
|
||||
@@ -69,7 +70,7 @@ class KeezMoviesIE(InfoExtractor):
|
||||
video_url, title, 32).decode('utf-8')
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'format_id': '%dp' % height if height else None,
|
||||
'format_id': format_field(height, template='%dp'),
|
||||
'height': height,
|
||||
'tbr': tbr,
|
||||
})
|
||||
|
||||
84
yt_dlp/extractor/kelbyone.py
Normal file
84
yt_dlp/extractor/kelbyone.py
Normal file
@@ -0,0 +1,84 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class KelbyOneIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://members\.kelbyone\.com/course/(?P<id>[^$&?#/]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://members.kelbyone.com/course/glyn-dewis-mastering-selections/',
|
||||
'playlist_mincount': 1,
|
||||
'info_dict': {
|
||||
'id': 'glyn-dewis-mastering-selections',
|
||||
'title': 'Trailer - Mastering Selections in Photoshop',
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': 'MkiOnLqK',
|
||||
'ext': 'mp4',
|
||||
'title': 'Trailer - Mastering Selections in Photoshop',
|
||||
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
|
||||
'thumbnail': 'https://content.jwplatform.com/v2/media/MkiOnLqK/poster.jpg?width=720',
|
||||
'timestamp': 1601568639,
|
||||
'duration': 90,
|
||||
'upload_date': '20201001',
|
||||
},
|
||||
}]
|
||||
}]
|
||||
|
||||
def _entries(self, playlist):
|
||||
for item in playlist:
|
||||
video_id = item['mediaid']
|
||||
thumbnails = [{
|
||||
'url': image.get('src'),
|
||||
'width': int_or_none(image.get('width')),
|
||||
} for image in item.get('images') or []]
|
||||
formats, subtitles = [], {}
|
||||
for source in item.get('sources') or []:
|
||||
if not source.get('file'):
|
||||
continue
|
||||
if source.get('type') == 'application/vnd.apple.mpegurl':
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(source['file'], video_id)
|
||||
formats.extend(fmts)
|
||||
subtitles = self._merge_subtitles(subs, subtitles)
|
||||
elif source.get('type') == 'audio/mp4':
|
||||
formats.append({
|
||||
'format_id': source.get('label'),
|
||||
'url': source['file'],
|
||||
'vcodec': 'none',
|
||||
})
|
||||
else:
|
||||
formats.append({
|
||||
'format_id': source.get('label'),
|
||||
'height': source.get('height'),
|
||||
'width': source.get('width'),
|
||||
'url': source['file'],
|
||||
})
|
||||
for track in item.get('tracks'):
|
||||
if track.get('kind') == 'captions' and track.get('file'):
|
||||
subtitles.setdefault('en', []).append({
|
||||
'url': track['file'],
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
yield {
|
||||
'id': video_id,
|
||||
'title': item['title'],
|
||||
'description': item.get('description'),
|
||||
'thumbnails': thumbnails,
|
||||
'thumbnail': item.get('image'),
|
||||
'timestamp': item.get('pubdate'),
|
||||
'duration': item.get('duration'),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
item_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, item_id)
|
||||
playlist_url = self._html_search_regex(r'playlist"\:"(https.*content\.jwplatform\.com.*json)"', webpage, 'playlist url').replace('\\', '')
|
||||
course_data = self._download_json(playlist_url, item_id)
|
||||
return self.playlist_result(self._entries(course_data['playlist']), item_id,
|
||||
course_data.get('title'), course_data.get('description'))
|
||||
@@ -5,95 +5,12 @@ from __future__ import unicode_literals
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
format_field,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
str_or_none,
|
||||
)
|
||||
|
||||
|
||||
class LineTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://tv\.line\.me/v/(?P<id>\d+)_[^/]+-(?P<segment>ep\d+-\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://tv.line.me/v/793123_goodbye-mrblack-ep1-1/list/69246',
|
||||
'info_dict': {
|
||||
'id': '793123_ep1-1',
|
||||
'ext': 'mp4',
|
||||
'title': 'Goodbye Mr.Black | EP.1-1',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 998.509,
|
||||
'view_count': int,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://tv.line.me/v/2587507_%E6%B4%BE%E9%81%A3%E5%A5%B3%E9%86%ABx-ep1-02/list/185245',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
series_id, segment = self._match_valid_url(url).groups()
|
||||
video_id = '%s_%s' % (series_id, segment)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
player_params = self._parse_json(self._search_regex(
|
||||
r'naver\.WebPlayer\(({[^}]+})\)', webpage, 'player parameters'),
|
||||
video_id, transform_source=js_to_json)
|
||||
|
||||
video_info = self._download_json(
|
||||
'https://global-nvapis.line.me/linetv/rmcnmv/vod_play_videoInfo.json',
|
||||
video_id, query={
|
||||
'videoId': player_params['videoId'],
|
||||
'key': player_params['key'],
|
||||
})
|
||||
|
||||
stream = video_info['streams'][0]
|
||||
extra_query = '?__gda__=' + stream['key']['value']
|
||||
formats = self._extract_m3u8_formats(
|
||||
stream['source'] + extra_query, video_id, ext='mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls')
|
||||
|
||||
for a_format in formats:
|
||||
a_format['url'] += extra_query
|
||||
|
||||
duration = None
|
||||
for video in video_info.get('videos', {}).get('list', []):
|
||||
encoding_option = video.get('encodingOption', {})
|
||||
abr = video['bitrate']['audio']
|
||||
vbr = video['bitrate']['video']
|
||||
tbr = abr + vbr
|
||||
formats.append({
|
||||
'url': video['source'],
|
||||
'format_id': 'http-%d' % int(tbr),
|
||||
'height': encoding_option.get('height'),
|
||||
'width': encoding_option.get('width'),
|
||||
'abr': abr,
|
||||
'vbr': vbr,
|
||||
'filesize': video.get('size'),
|
||||
})
|
||||
if video.get('duration') and duration is None:
|
||||
duration = video['duration']
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
if formats and not formats[0].get('width'):
|
||||
formats[0]['vcodec'] = 'none'
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
|
||||
# like_count requires an additional API request https://tv.line.me/api/likeit/getCount
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'extra_param_to_segment_url': extra_query[1:],
|
||||
'duration': duration,
|
||||
'thumbnails': [{'url': thumbnail['source']}
|
||||
for thumbnail in video_info.get('thumbnails', {}).get('list', [])],
|
||||
'view_count': video_info.get('meta', {}).get('count'),
|
||||
}
|
||||
|
||||
|
||||
class LineLiveBaseIE(InfoExtractor):
|
||||
_API_BASE_URL = 'https://live-api.line-apps.com/web/v4.0/channel/'
|
||||
|
||||
@@ -121,7 +38,7 @@ class LineLiveBaseIE(InfoExtractor):
|
||||
'timestamp': int_or_none(item.get('createdAt')),
|
||||
'channel': channel.get('name'),
|
||||
'channel_id': channel_id,
|
||||
'channel_url': 'https://live.line.me/channels/' + channel_id if channel_id else None,
|
||||
'channel_url': format_field(channel_id, template='https://live.line.me/channels/%s'),
|
||||
'duration': int_or_none(item.get('archiveDuration')),
|
||||
'view_count': int_or_none(item.get('viewerCount')),
|
||||
'comment_count': int_or_none(item.get('chatCount')),
|
||||
@@ -132,16 +49,19 @@ class LineLiveBaseIE(InfoExtractor):
|
||||
class LineLiveIE(LineLiveBaseIE):
|
||||
_VALID_URL = r'https?://live\.line\.me/channels/(?P<channel_id>\d+)/broadcast/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://live.line.me/channels/4867368/broadcast/16331360',
|
||||
'md5': 'bc931f26bf1d4f971e3b0982b3fab4a3',
|
||||
'url': 'https://live.line.me/channels/5833718/broadcast/18373277',
|
||||
'md5': '2c15843b8cb3acd55009ddcb2db91f7c',
|
||||
'info_dict': {
|
||||
'id': '16331360',
|
||||
'title': '振りコピ講座😙😙😙',
|
||||
'id': '18373277',
|
||||
'title': '2021/12/05 (15分犬)定例譲渡会🐶',
|
||||
'ext': 'mp4',
|
||||
'timestamp': 1617095132,
|
||||
'upload_date': '20210330',
|
||||
'channel': '白川ゆめか',
|
||||
'channel_id': '4867368',
|
||||
'timestamp': 1638674925,
|
||||
'upload_date': '20211205',
|
||||
'thumbnail': 'md5:e1f5817e60f4a72b7e43377cf308d7ef',
|
||||
'channel_url': 'https://live.line.me/channels/5833718',
|
||||
'channel': 'Yahooニュース掲載🗞プロフ見てね🐕🐕',
|
||||
'channel_id': '5833718',
|
||||
'duration': 937,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'is_live': False,
|
||||
@@ -193,8 +113,8 @@ class LineLiveChannelIE(LineLiveBaseIE):
|
||||
'url': 'https://live.line.me/channels/5893542',
|
||||
'info_dict': {
|
||||
'id': '5893542',
|
||||
'title': 'いくらちゃん',
|
||||
'description': 'md5:c3a4af801f43b2fac0b02294976580be',
|
||||
'title': 'いくらちゃんだよぉ🦒',
|
||||
'description': 'md5:4d418087973ad081ceb1b3481f0b1816',
|
||||
},
|
||||
'playlist_mincount': 29
|
||||
}
|
||||
|
||||
@@ -7,6 +7,7 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
traverse_obj,
|
||||
smuggle_url,
|
||||
unsmuggle_url,
|
||||
)
|
||||
@@ -55,9 +56,6 @@ class LiTVIE(InfoExtractor):
|
||||
episode_title = program_info['title']
|
||||
content_id = season_list['contentId']
|
||||
|
||||
if prompt:
|
||||
self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (content_id, video_id))
|
||||
|
||||
all_episodes = [
|
||||
self.url_result(smuggle_url(
|
||||
self._URL_TEMPLATE % (program_info['contentType'], episode['contentId']),
|
||||
@@ -67,16 +65,10 @@ class LiTVIE(InfoExtractor):
|
||||
return self.playlist_result(all_episodes, content_id, episode_title)
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, data = unsmuggle_url(url, {})
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
|
||||
video_id = self._match_id(url)
|
||||
|
||||
noplaylist = self.get_param('noplaylist')
|
||||
noplaylist_prompt = True
|
||||
if 'force_noplaylist' in data:
|
||||
noplaylist = data['force_noplaylist']
|
||||
noplaylist_prompt = False
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
program_info = self._parse_json(self._search_regex(
|
||||
@@ -84,14 +76,9 @@ class LiTVIE(InfoExtractor):
|
||||
video_id)
|
||||
|
||||
season_list = list(program_info.get('seasonList', {}).values())
|
||||
if season_list:
|
||||
if not noplaylist:
|
||||
return self._extract_playlist(
|
||||
season_list[0], video_id, program_info,
|
||||
prompt=noplaylist_prompt)
|
||||
|
||||
if noplaylist_prompt:
|
||||
self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
|
||||
playlist_id = traverse_obj(season_list, 0, 'contentId')
|
||||
if self._yes_playlist(playlist_id, video_id, smuggled_data):
|
||||
return self._extract_playlist(season_list[0], video_id, program_info)
|
||||
|
||||
# In browsers `getMainUrl` request is always issued. Usually this
|
||||
# endpoint gives the same result as the data embedded in the webpage.
|
||||
|
||||
@@ -6,8 +6,10 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
compat_str,
|
||||
format_field,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
@@ -71,17 +73,97 @@ class LnkGoIE(InfoExtractor):
|
||||
video_id, 'mp4', 'm3u8_native')
|
||||
self._sort_formats(formats)
|
||||
|
||||
poster_image = video_info.get('posterImage')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': 'https://lnk.lt/all-images/' + poster_image if poster_image else None,
|
||||
'thumbnail': format_field(video_info, 'posterImage', 'https://lnk.lt/all-images/%s'),
|
||||
'duration': int_or_none(video_info.get('duration')),
|
||||
'description': clean_html(video_info.get('htmlDescription')),
|
||||
'age_limit': self._AGE_LIMITS.get(video_info.get('pgRating'), 0),
|
||||
'timestamp': parse_iso8601(video_info.get('airDate')),
|
||||
'view_count': int_or_none(video_info.get('viewsCount')),
|
||||
}
|
||||
|
||||
|
||||
class LnkIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?lnk\.lt/[^/]+/(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://lnk.lt/zinios/79791',
|
||||
'info_dict': {
|
||||
'id': '79791',
|
||||
'ext': 'mp4',
|
||||
'title': 'LNK.lt: Viešintų gyventojai sukilo prieš radijo bangų siųstuvą',
|
||||
'description': 'Svarbiausios naujienos trumpai, LNK žinios ir Info dienos pokalbiai.',
|
||||
'view_count': int,
|
||||
'duration': 233,
|
||||
'upload_date': '20191123',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'episode_number': 13431,
|
||||
'series': 'Naujausi žinių reportažai',
|
||||
'episode': 'Episode 13431'
|
||||
},
|
||||
'params': {'skip_download': True}
|
||||
}, {
|
||||
'url': 'https://lnk.lt/istorijos-trumpai/152546',
|
||||
'info_dict': {
|
||||
'id': '152546',
|
||||
'ext': 'mp4',
|
||||
'title': 'Radžio koncertas gaisre ',
|
||||
'description': 'md5:0666b5b85cb9fc7c1238dec96f71faba',
|
||||
'view_count': int,
|
||||
'duration': 54,
|
||||
'upload_date': '20220105',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'episode_number': 1036,
|
||||
'series': 'Istorijos trumpai',
|
||||
'episode': 'Episode 1036'
|
||||
},
|
||||
'params': {'skip_download': True}
|
||||
}, {
|
||||
'url': 'https://lnk.lt/gyvunu-pasaulis/151549',
|
||||
'info_dict': {
|
||||
'id': '151549',
|
||||
'ext': 'mp4',
|
||||
'title': 'Gyvūnų pasaulis',
|
||||
'description': '',
|
||||
'view_count': int,
|
||||
'duration': 1264,
|
||||
'upload_date': '20220108',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'episode_number': 16,
|
||||
'series': 'Gyvūnų pasaulis',
|
||||
'episode': 'Episode 16'
|
||||
},
|
||||
'params': {'skip_download': True}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
id = self._match_id(url)
|
||||
video_json = self._download_json(f'https://lnk.lt/api/video/video-config/{id}', id)['videoInfo']
|
||||
formats, subtitles = [], {}
|
||||
if video_json.get('videoUrl'):
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(video_json['videoUrl'], id)
|
||||
formats.extend(fmts)
|
||||
subtitles = self._merge_subtitles(subtitles, subs)
|
||||
if video_json.get('videoFairplayUrl') and not video_json.get('drm'):
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(video_json['videoFairplayUrl'], id)
|
||||
formats.extend(fmts)
|
||||
subtitles = self._merge_subtitles(subtitles, subs)
|
||||
|
||||
self._sort_formats(formats)
|
||||
return {
|
||||
'id': id,
|
||||
'title': video_json.get('title'),
|
||||
'description': video_json.get('description'),
|
||||
'view_count': video_json.get('viewsCount'),
|
||||
'duration': video_json.get('duration'),
|
||||
'upload_date': unified_strdate(video_json.get('airDate')),
|
||||
'thumbnail': format_field(video_json, 'posterImage', 'https://lnk.lt/all-images/%s'),
|
||||
'episode_number': int_or_none(video_json.get('episodeNumber')),
|
||||
'series': video_json.get('programTitle'),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
219
yt_dlp/extractor/mainstreaming.py
Normal file
219
yt_dlp/extractor/mainstreaming.py
Normal file
@@ -0,0 +1,219 @@
|
||||
# coding: utf-8
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
parse_duration,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
urljoin
|
||||
)
|
||||
|
||||
|
||||
class MainStreamingIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:webtools-?)?(?P<host>[A-Za-z0-9-]*\.msvdn.net)/(?:embed|amp_embed|content)/(?P<id>\w+)'
|
||||
IE_DESC = 'MainStreaming Player'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
# Live stream offline, has alternative content id
|
||||
'url': 'https://webtools-e18da6642b684f8aa9ae449862783a56.msvdn.net/embed/53EN6GxbWaJC',
|
||||
'info_dict': {
|
||||
'id': '53EN6GxbWaJC',
|
||||
'title': 'Diretta homepage 2021-12-31 12:00',
|
||||
'description': '',
|
||||
'live_status': 'was_live',
|
||||
'ext': 'mp4',
|
||||
'thumbnail': r're:https?://[A-Za-z0-9-]*\.msvdn.net/image/\w+/poster',
|
||||
},
|
||||
'expected_warnings': [
|
||||
'Ignoring alternative content ID: WDAF1KOWUpH3',
|
||||
'MainStreaming said: Live event is OFFLINE'
|
||||
],
|
||||
'skip': 'live stream offline'
|
||||
}, {
|
||||
# playlist
|
||||
'url': 'https://webtools-e18da6642b684f8aa9ae449862783a56.msvdn.net/embed/WDAF1KOWUpH3',
|
||||
'info_dict': {
|
||||
'id': 'WDAF1KOWUpH3',
|
||||
'title': 'Playlist homepage',
|
||||
},
|
||||
'playlist_mincount': 2
|
||||
}, {
|
||||
# livestream
|
||||
'url': 'https://webtools-859c1818ed614cc5b0047439470927b0.msvdn.net/embed/tDoFkZD3T1Lw',
|
||||
'info_dict': {
|
||||
'id': 'tDoFkZD3T1Lw',
|
||||
'title': r're:Class CNBC Live \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
|
||||
'live_status': 'is_live',
|
||||
'ext': 'mp4',
|
||||
'thumbnail': r're:https?://[A-Za-z0-9-]*\.msvdn.net/image/\w+/poster',
|
||||
},
|
||||
'skip': 'live stream'
|
||||
}, {
|
||||
'url': 'https://webtools-f5842579ff984c1c98d63b8d789673eb.msvdn.net/embed/EUlZfGWkGpOd?autoPlay=false',
|
||||
'info_dict': {
|
||||
'id': 'EUlZfGWkGpOd',
|
||||
'title': 'La Settimana ',
|
||||
'description': '03 Ottobre ore 02:00',
|
||||
'ext': 'mp4',
|
||||
'live_status': 'not_live',
|
||||
'thumbnail': r're:https?://[A-Za-z0-9-]*\.msvdn.net/image/\w+/poster',
|
||||
'duration': 1512
|
||||
}
|
||||
}, {
|
||||
# video without webtools- prefix
|
||||
'url': 'https://f5842579ff984c1c98d63b8d789673eb.msvdn.net/embed/MfuWmzL2lGkA?autoplay=false&T=1635860445',
|
||||
'info_dict': {
|
||||
'id': 'MfuWmzL2lGkA',
|
||||
'title': 'TG Mattina',
|
||||
'description': '06 Ottobre ore 08:00',
|
||||
'ext': 'mp4',
|
||||
'live_status': 'not_live',
|
||||
'thumbnail': r're:https?://[A-Za-z0-9-]*\.msvdn.net/image/\w+/poster',
|
||||
'duration': 789.04
|
||||
}
|
||||
}, {
|
||||
# always-on livestream with DVR
|
||||
'url': 'https://webtools-f5842579ff984c1c98d63b8d789673eb.msvdn.net/embed/HVvPMzy',
|
||||
'info_dict': {
|
||||
'id': 'HVvPMzy',
|
||||
'title': r're:^Diretta LaC News24 \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
|
||||
'description': 'canale all news',
|
||||
'live_status': 'is_live',
|
||||
'ext': 'mp4',
|
||||
'thumbnail': r're:https?://[A-Za-z0-9-]*\.msvdn.net/image/\w+/poster',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# no host
|
||||
'url': 'https://webtools.msvdn.net/embed/MfuWmzL2lGkA',
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'https://859c1818ed614cc5b0047439470927b0.msvdn.net/amp_embed/tDoFkZD3T1Lw',
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'https://859c1818ed614cc5b0047439470927b0.msvdn.net/content/tDoFkZD3T1Lw#',
|
||||
'only_matching': True
|
||||
}
|
||||
]
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
mobj = re.findall(
|
||||
r'<iframe[^>]+?src=["\']?(?P<url>%s)["\']?' % MainStreamingIE._VALID_URL, webpage)
|
||||
if mobj:
|
||||
return [group[0] for group in mobj]
|
||||
|
||||
def _playlist_entries(self, host, playlist_content):
|
||||
for entry in playlist_content:
|
||||
content_id = entry.get('contentID')
|
||||
yield {
|
||||
'_type': 'url',
|
||||
'ie_key': MainStreamingIE.ie_key(),
|
||||
'id': content_id,
|
||||
'duration': int_or_none(traverse_obj(entry, ('duration', 'totalSeconds'))),
|
||||
'title': entry.get('title'),
|
||||
'url': f'https://{host}/embed/{content_id}'
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _get_webtools_host(host):
|
||||
if not host.startswith('webtools'):
|
||||
host = 'webtools' + ('-' if not host.startswith('.') else '') + host
|
||||
return host
|
||||
|
||||
def _get_webtools_base_url(self, host):
|
||||
return f'{self.http_scheme()}//{self._get_webtools_host(host)}'
|
||||
|
||||
def _call_api(self, host: str, path: str, item_id: str, query=None, note='Downloading API JSON', fatal=False):
|
||||
# JSON API, does not appear to be documented
|
||||
return self._call_webtools_api(host, '/api/v2/' + path, item_id, query, note, fatal)
|
||||
|
||||
def _call_webtools_api(self, host: str, path: str, item_id: str, query=None, note='Downloading webtools API JSON', fatal=False):
|
||||
# webtools docs: https://webtools.msvdn.net/
|
||||
return self._download_json(
|
||||
urljoin(self._get_webtools_base_url(host), path), item_id, query=query, note=note, fatal=fatal)
|
||||
|
||||
def _real_extract(self, url):
|
||||
host, video_id = self._match_valid_url(url).groups()
|
||||
content_info = try_get(
|
||||
self._call_api(
|
||||
host, f'content/{video_id}', video_id, note='Downloading content info API JSON'), lambda x: x['playerContentInfo'])
|
||||
# Fallback
|
||||
if not content_info:
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
player_config = self._parse_json(
|
||||
self._search_regex(
|
||||
r'config\s*=\s*({.+?})\s*;', webpage, 'mainstreaming player config',
|
||||
default='{}', flags=re.DOTALL),
|
||||
video_id, transform_source=js_to_json, fatal=False) or {}
|
||||
content_info = player_config['contentInfo']
|
||||
|
||||
host = content_info.get('host') or host
|
||||
video_id = content_info.get('contentID') or video_id
|
||||
title = content_info.get('title')
|
||||
description = traverse_obj(content_info, 'longDescription', 'shortDescription', expected_type=str)
|
||||
live_status = 'not_live'
|
||||
if content_info.get('drmEnabled'):
|
||||
self.report_drm(video_id)
|
||||
|
||||
alternative_content_id = content_info.get('alternativeContentID')
|
||||
if alternative_content_id:
|
||||
self.report_warning(f'Ignoring alternative content ID: {alternative_content_id}')
|
||||
|
||||
content_type = int_or_none(content_info.get('contentType'))
|
||||
format_base_url = None
|
||||
formats = []
|
||||
subtitles = {}
|
||||
# Live content
|
||||
if content_type == 20:
|
||||
dvr_enabled = traverse_obj(content_info, ('playerSettings', 'dvrEnabled'), expected_type=bool)
|
||||
format_base_url = f"https://{host}/live/{content_info['liveSourceID']}/{video_id}/%s{'?DVR' if dvr_enabled else ''}"
|
||||
live_status = 'is_live'
|
||||
heartbeat = self._call_api(host, f'heartbeat/{video_id}', video_id, note='Checking stream status') or {}
|
||||
if heartbeat.get('heartBeatUp') is False:
|
||||
self.raise_no_formats(f'MainStreaming said: {heartbeat.get("responseMessage")}', expected=True)
|
||||
live_status = 'was_live'
|
||||
|
||||
# Playlist
|
||||
elif content_type == 31:
|
||||
return self.playlist_result(
|
||||
self._playlist_entries(host, content_info.get('playlistContents')), video_id, title, description)
|
||||
# Normal video content?
|
||||
elif content_type == 10:
|
||||
format_base_url = f'https://{host}/vod/{video_id}/%s'
|
||||
# Progressive format
|
||||
# Note: in https://webtools.msvdn.net/loader/playerV2.js there is mention of original.mp3 format,
|
||||
# however it seems to be the same as original.mp4?
|
||||
formats.append({'url': format_base_url % 'original.mp4', 'format_note': 'original', 'quality': 1})
|
||||
else:
|
||||
self.raise_no_formats(f'Unknown content type {content_type}')
|
||||
|
||||
if format_base_url:
|
||||
m3u8_formats, m3u8_subs = self._extract_m3u8_formats_and_subtitles(
|
||||
format_base_url % 'playlist.m3u8', video_id=video_id, fatal=False)
|
||||
mpd_formats, mpd_subs = self._extract_mpd_formats_and_subtitles(
|
||||
format_base_url % 'manifest.mpd', video_id=video_id, fatal=False)
|
||||
|
||||
subtitles = self._merge_subtitles(m3u8_subs, mpd_subs)
|
||||
formats.extend(m3u8_formats + mpd_formats)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'formats': formats,
|
||||
'live_status': live_status,
|
||||
'duration': parse_duration(content_info.get('duration')),
|
||||
'tags': content_info.get('tags'),
|
||||
'subtitles': subtitles,
|
||||
'thumbnail': urljoin(self._get_webtools_base_url(host), f'image/{video_id}/poster')
|
||||
}
|
||||
@@ -7,6 +7,7 @@ from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
format_field,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
@@ -118,7 +119,7 @@ class MedalTVIE(InfoExtractor):
|
||||
author = try_get(
|
||||
hydration_data, lambda x: list(x['profiles'].values())[0], dict) or {}
|
||||
author_id = str_or_none(author.get('id'))
|
||||
author_url = 'https://medal.tv/users/{0}'.format(author_id) if author_id else None
|
||||
author_url = format_field(author_id, template='https://medal.tv/users/%s')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user