mirror of
https://github.com/ytdl-org/youtube-dl.git
synced 2026-01-04 05:07:37 +01:00
Compare commits
831 Commits
2015.05.20
...
2015.07.28
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
289bbb350e | ||
|
|
d247a2c8bf | ||
|
|
88ed52aec9 | ||
|
|
4c6bd5b5b6 | ||
|
|
aeb7b41d44 | ||
|
|
5bdec59de1 | ||
|
|
7a89681722 | ||
|
|
51da40e621 | ||
|
|
1af330f29f | ||
|
|
9afa1770d1 | ||
|
|
3ebbcce1c7 | ||
|
|
2c7c721933 | ||
|
|
7523647391 | ||
|
|
9700cd9097 | ||
|
|
eab7faa0c1 | ||
|
|
a56c1e38c7 | ||
|
|
40a2d17052 | ||
|
|
b14fa8e687 | ||
|
|
678e436f2e | ||
|
|
ff81c4c99c | ||
|
|
420658e6cb | ||
|
|
593ddd851b | ||
|
|
1243402657 | ||
|
|
2b2ee140c3 | ||
|
|
d97f5cd795 | ||
|
|
f3f0b8e403 | ||
|
|
660f9459da | ||
|
|
10952eb2cf | ||
|
|
cdad742700 | ||
|
|
a9e8f60ef6 | ||
|
|
a8b7b26068 | ||
|
|
ba911137fa | ||
|
|
d3f007af18 | ||
|
|
2929fa0e79 | ||
|
|
297a564bee | ||
|
|
53b8247cb5 | ||
|
|
59db9f8018 | ||
|
|
b73b14f72c | ||
|
|
41597d9bed | ||
|
|
b37317d8b0 | ||
|
|
87dc451108 | ||
|
|
ca4456eda8 | ||
|
|
993df6bc22 | ||
|
|
61be92e26a | ||
|
|
c59b61c0da | ||
|
|
3e214851a4 | ||
|
|
a47b602b08 | ||
|
|
a083b859e4 | ||
|
|
948199deac | ||
|
|
c356620ec1 | ||
|
|
f79ebf09a2 | ||
|
|
c7620992d2 | ||
|
|
ce1bafdce9 | ||
|
|
9872e588c8 | ||
|
|
d609edf4f1 | ||
|
|
3a99d321a8 | ||
|
|
4bb3d999ac | ||
|
|
40101dc311 | ||
|
|
e9c6deffee | ||
|
|
9c29bc69f7 | ||
|
|
1e12429564 | ||
|
|
795704f0f1 | ||
|
|
981b9cdc8c | ||
|
|
3f724339db | ||
|
|
70c857b728 | ||
|
|
c84683c88b | ||
|
|
b68a2613f8 | ||
|
|
28afa6e77a | ||
|
|
496ce6b349 | ||
|
|
ce9512b78b | ||
|
|
4eb59a6b1c | ||
|
|
80b1ee0a4c | ||
|
|
f993afb26d | ||
|
|
7c80519cbf | ||
|
|
8250c32f49 | ||
|
|
2fe1ff8582 | ||
|
|
17ee98e1a5 | ||
|
|
2ee8f5d80f | ||
|
|
3f302bca8c | ||
|
|
c909e5820e | ||
|
|
a1b85269a4 | ||
|
|
faa1f83ab4 | ||
|
|
308c505c3d | ||
|
|
0eacd2aaae | ||
|
|
18ae46ad4b | ||
|
|
65c2b21df1 | ||
|
|
772acaf31f | ||
|
|
f8d0745e27 | ||
|
|
d719c6a5ab | ||
|
|
769efa16af | ||
|
|
86b4e98ac6 | ||
|
|
3bf8c316a6 | ||
|
|
e37c92ec6d | ||
|
|
a5dd9a0c5d | ||
|
|
7a4a945f13 | ||
|
|
1d18e26eca | ||
|
|
ac4b8df5e4 | ||
|
|
3bc9fb5889 | ||
|
|
632cbb8efa | ||
|
|
789a12aaaf | ||
|
|
ecdbe09e10 | ||
|
|
1dc31c2786 | ||
|
|
32470bf619 | ||
|
|
8b61bfd638 | ||
|
|
8a7a208905 | ||
|
|
0215103e92 | ||
|
|
c2d1be8981 | ||
|
|
4951c9f821 | ||
|
|
726adc43ec | ||
|
|
3c6ae8b59e | ||
|
|
605be3f7f8 | ||
|
|
c51bc70e0f | ||
|
|
e89d7e3029 | ||
|
|
4b0f45f667 | ||
|
|
36068ae019 | ||
|
|
761ee0d827 | ||
|
|
fb8bc3f818 | ||
|
|
826a7da808 | ||
|
|
cbd55ade68 | ||
|
|
5705ee6ef8 | ||
|
|
3f5c6d0c1b | ||
|
|
e58066e244 | ||
|
|
ee48b6a88f | ||
|
|
9ac09ed4de | ||
|
|
22603348aa | ||
|
|
fec73daaa3 | ||
|
|
c6b68648f4 | ||
|
|
1ecb5d1d83 | ||
|
|
dc786d3db5 | ||
|
|
74fe23ec35 | ||
|
|
b0bff54b08 | ||
|
|
1b541d8d6e | ||
|
|
f29ac588dd | ||
|
|
0696667734 | ||
|
|
1793d71db6 | ||
|
|
4211e1941b | ||
|
|
4bdfef5a18 | ||
|
|
8a37f53685 | ||
|
|
4e1ad6e9a8 | ||
|
|
fb10e1aa57 | ||
|
|
3c283a381e | ||
|
|
dac4d5be12 | ||
|
|
530857182d | ||
|
|
9441f77faa | ||
|
|
3cc8b4c327 | ||
|
|
6b19647d57 | ||
|
|
7bd42d0d96 | ||
|
|
c60e8cfaf7 | ||
|
|
7fd002c006 | ||
|
|
db6c50f109 | ||
|
|
aa4789d632 | ||
|
|
ee8de13e14 | ||
|
|
7dde5f6a8d | ||
|
|
736f003f2e | ||
|
|
47af21e8f1 | ||
|
|
605cbef653 | ||
|
|
388ad0c05c | ||
|
|
2ebbb6f1f7 | ||
|
|
d54f1c7477 | ||
|
|
b78f5ec4c3 | ||
|
|
9fd3bf04b7 | ||
|
|
e97bb3de83 | ||
|
|
c2daf8dfa4 | ||
|
|
09b718c439 | ||
|
|
c177bb3a50 | ||
|
|
977a247a06 | ||
|
|
899a3e2f13 | ||
|
|
8ee4ecb48d | ||
|
|
f7e6f7fa23 | ||
|
|
1f80e360fc | ||
|
|
d7011316d0 | ||
|
|
d3671b344f | ||
|
|
a60cccbf9f | ||
|
|
3e72f5f10e | ||
|
|
b94b78971c | ||
|
|
4d08161ac2 | ||
|
|
8954e48140 | ||
|
|
aa99aa4e85 | ||
|
|
d79febcd06 | ||
|
|
13fc7f3a05 | ||
|
|
14309e1ddc | ||
|
|
5513967926 | ||
|
|
eacd875f3b | ||
|
|
c4fe07c7af | ||
|
|
1186e3f91a | ||
|
|
f354385bf5 | ||
|
|
cabe001590 | ||
|
|
89f691e141 | ||
|
|
4a63291144 | ||
|
|
593b77064c | ||
|
|
9fefc88656 | ||
|
|
a3bfddfa5e | ||
|
|
36da48798a | ||
|
|
a0f28f90fa | ||
|
|
851229a01f | ||
|
|
c9c854cea7 | ||
|
|
a38436e889 | ||
|
|
23fc384f2c | ||
|
|
1540119723 | ||
|
|
574f42d79a | ||
|
|
536b0700b0 | ||
|
|
5ba761eb85 | ||
|
|
611ac379bb | ||
|
|
03f32a7ead | ||
|
|
50ea2bb20d | ||
|
|
525daedd5a | ||
|
|
e118031ef8 | ||
|
|
45eedbe58c | ||
|
|
e37c932fca | ||
|
|
5eb778bf4d | ||
|
|
ab9b890b52 | ||
|
|
31c746e5dc | ||
|
|
f01f731107 | ||
|
|
70f0f5a8ca | ||
|
|
cc357c4db8 | ||
|
|
97f4aecfc1 | ||
|
|
2af0f87c8b | ||
|
|
b062d94eef | ||
|
|
6c1b0c0ed2 | ||
|
|
ddcdc684e2 | ||
|
|
eae89f92e6 | ||
|
|
01d115b06b | ||
|
|
79057965a8 | ||
|
|
dcd4d95c8e | ||
|
|
cf61d96df0 | ||
|
|
f8da79f828 | ||
|
|
9750e7d70e | ||
|
|
50aa2bb6b9 | ||
|
|
1d1dd597ed | ||
|
|
cfe5537ee5 | ||
|
|
7869eb3fc4 | ||
|
|
6dfa0602f0 | ||
|
|
75a40b2251 | ||
|
|
28fb109ed0 | ||
|
|
48607afac5 | ||
|
|
b6ea9ef21a | ||
|
|
b8dd44baa9 | ||
|
|
c4f1fde75b | ||
|
|
667170e2c7 | ||
|
|
53429e6551 | ||
|
|
ac8f97f2b3 | ||
|
|
41c0d2f8cb | ||
|
|
1f3a43dbe6 | ||
|
|
369e195a44 | ||
|
|
15006fedb9 | ||
|
|
e35b23f54d | ||
|
|
f72b0a6032 | ||
|
|
ac9ed061ec | ||
|
|
d919fa3344 | ||
|
|
79913fde35 | ||
|
|
da634d0a8b | ||
|
|
fac54cb426 | ||
|
|
3f19b9b7c1 | ||
|
|
86f2541695 | ||
|
|
181c4ccaaa | ||
|
|
ed848087d5 | ||
|
|
edd66be5be | ||
|
|
246995dbc8 | ||
|
|
b931fbe5ab | ||
|
|
e014ff015d | ||
|
|
4fa5f40232 | ||
|
|
9b15be97aa | ||
|
|
a7ada46bd9 | ||
|
|
9d16788ad9 | ||
|
|
6ce89aecc3 | ||
|
|
963d0ce7e3 | ||
|
|
0f08d7f851 | ||
|
|
44c514eb9c | ||
|
|
513cbdda93 | ||
|
|
e1ba152352 | ||
|
|
446e764500 | ||
|
|
901d00caa6 | ||
|
|
094790d2c9 | ||
|
|
1c0163a5cc | ||
|
|
8fa7e5817a | ||
|
|
01b89d5682 | ||
|
|
9f01c1a803 | ||
|
|
46f0f50016 | ||
|
|
b8070dbbd7 | ||
|
|
3b16d803c9 | ||
|
|
de195c23a6 | ||
|
|
d3b8908886 | ||
|
|
2688176c77 | ||
|
|
a5839317aa | ||
|
|
a0aab26a41 | ||
|
|
27713812a0 | ||
|
|
cf2c5fda4f | ||
|
|
a9684c0dbf | ||
|
|
c0bf5e1c4d | ||
|
|
a31e3e7dcb | ||
|
|
17b41a3337 | ||
|
|
89a683ae74 | ||
|
|
008661069b | ||
|
|
9296e92e1c | ||
|
|
a34af8d066 | ||
|
|
8726e04629 | ||
|
|
2a01c940ec | ||
|
|
4eab60cbd2 | ||
|
|
a0e060ac1e | ||
|
|
397a8ea96e | ||
|
|
15830339ef | ||
|
|
b29280285e | ||
|
|
1633491bff | ||
|
|
2b0fa1f7dd | ||
|
|
02b386f80a | ||
|
|
bf20b9c540 | ||
|
|
06a12933f3 | ||
|
|
6dd94d3a79 | ||
|
|
f2f89c762a | ||
|
|
e6c2d9ad29 | ||
|
|
83423254cc | ||
|
|
1c20ddc966 | ||
|
|
675e9f22ea | ||
|
|
77c6fb5b24 | ||
|
|
082a0140ef | ||
|
|
9e535ce055 | ||
|
|
d76dea001b | ||
|
|
af0f9b0e95 | ||
|
|
e2082ea942 | ||
|
|
68923e52a3 | ||
|
|
9281f6d253 | ||
|
|
4647845679 | ||
|
|
cf9cf7dd04 | ||
|
|
1316b54956 | ||
|
|
cbc1fadd6f | ||
|
|
4dd09c9add | ||
|
|
267dc07e6b | ||
|
|
d7b4d5dd50 | ||
|
|
7f220b2fac | ||
|
|
275c0423aa | ||
|
|
d3ee4bbc5a | ||
|
|
85a064861f | ||
|
|
d0b436bff2 | ||
|
|
92b2f18072 | ||
|
|
dfc4eca21f | ||
|
|
fc7ae675e2 | ||
|
|
804ad79985 | ||
|
|
da839880e9 | ||
|
|
e9d33454b5 | ||
|
|
d80891efc4 | ||
|
|
37c1e4025c | ||
|
|
59a83d3e5b | ||
|
|
13af92fdc4 | ||
|
|
0c20ee7d4b | ||
|
|
89d42c2c75 | ||
|
|
04611765a4 | ||
|
|
9dfc4fa1a1 | ||
|
|
43232d5c14 | ||
|
|
f7c272d4fa | ||
|
|
ede21449c8 | ||
|
|
d7c9a3e976 | ||
|
|
35eb649e9d | ||
|
|
e56a4c9e9b | ||
|
|
95506e37af | ||
|
|
e41840c522 | ||
|
|
2a46a27e6c | ||
|
|
0bcdc27653 | ||
|
|
ddf0f74de7 | ||
|
|
91b21b2334 | ||
|
|
66e568de3b | ||
|
|
f5ca97e393 | ||
|
|
8d06a62485 | ||
|
|
93f9420993 | ||
|
|
5b61070c70 | ||
|
|
dbe1a93526 | ||
|
|
aa5d9a79d6 | ||
|
|
86511ea417 | ||
|
|
1866432db7 | ||
|
|
33f1f81b8b | ||
|
|
9d0b581fea | ||
|
|
c05724cb18 | ||
|
|
f0714c9f86 | ||
|
|
cf386750c9 | ||
|
|
54f428f645 | ||
|
|
dc2bd20e55 | ||
|
|
c608ee491f | ||
|
|
738b926322 | ||
|
|
bea41c7f3f | ||
|
|
1bbe660dfa | ||
|
|
c4bd188da4 | ||
|
|
5414623791 | ||
|
|
c93d53f5e3 | ||
|
|
507683780e | ||
|
|
e8b9ee5e08 | ||
|
|
d16154d163 | ||
|
|
c342041fba | ||
|
|
bf42a9906d | ||
|
|
9603e8a7d9 | ||
|
|
c7c040b825 | ||
|
|
ac0474f89d | ||
|
|
bb512e57dc | ||
|
|
db652ea186 | ||
|
|
5a9cc19972 | ||
|
|
1a5fd4eebc | ||
|
|
8a1b49ff19 | ||
|
|
b971abe897 | ||
|
|
43b925ce74 | ||
|
|
62b742ece3 | ||
|
|
d16ef949ca | ||
|
|
23e7cba87f | ||
|
|
a8e6f30d8e | ||
|
|
9c49410898 | ||
|
|
802d74aa6b | ||
|
|
71f9e49e67 | ||
|
|
82ea1051b5 | ||
|
|
6c4d20cd6f | ||
|
|
04c27802c0 | ||
|
|
c3b7202f4f | ||
|
|
81103ef35d | ||
|
|
0eb5c1c62a | ||
|
|
a9de951744 | ||
|
|
a42a1bb09d | ||
|
|
9fbfc9bd4d | ||
|
|
242a998bdc | ||
|
|
9d1bf70234 | ||
|
|
b8c1cc1a51 | ||
|
|
eedd20ef96 | ||
|
|
7c197ad96d | ||
|
|
654fd03c73 | ||
|
|
cee16e0fa3 | ||
|
|
73c471e9ef | ||
|
|
533b99fbf9 | ||
|
|
f39eb98bab | ||
|
|
da77d856a1 | ||
|
|
b2575b38e7 | ||
|
|
0a3cf9ad3d | ||
|
|
00334d0de0 | ||
|
|
226b886ca8 | ||
|
|
bc93bdb5bb | ||
|
|
af214c3a79 | ||
|
|
4eb10f6621 | ||
|
|
7d7d469025 | ||
|
|
fd40bdc0be | ||
|
|
7e0480ae0e | ||
|
|
d80265ccd6 | ||
|
|
1b5a1ae257 | ||
|
|
d8d24a922a | ||
|
|
03339b7b5b | ||
|
|
2028c6e03d | ||
|
|
2988835af5 | ||
|
|
62cca96b72 | ||
|
|
b4dea075a3 | ||
|
|
533f67d3fa | ||
|
|
906e2f0eac | ||
|
|
b8091db6b9 | ||
|
|
381c067755 | ||
|
|
2182ab5187 | ||
|
|
a1593a4a0e | ||
|
|
aa5740fb61 | ||
|
|
da92eeae42 | ||
|
|
12e9e8445d | ||
|
|
8084be78c5 | ||
|
|
1ac1c4c26e | ||
|
|
d4f58034f7 | ||
|
|
f843300fe5 | ||
|
|
03b9c94437 | ||
|
|
a219d175c6 | ||
|
|
3c3b4176bd | ||
|
|
022383139b | ||
|
|
1a1251e877 | ||
|
|
18b5e1e534 | ||
|
|
2fece970b8 | ||
|
|
e20d0c1e69 | ||
|
|
a9dcf4a860 | ||
|
|
255f5694aa | ||
|
|
25701d5a2c | ||
|
|
a5158f38a3 | ||
|
|
c76799c555 | ||
|
|
2bb5b6d0a1 | ||
|
|
0bbba43ed0 | ||
|
|
98ca102441 | ||
|
|
3f3308cd75 | ||
|
|
6f96e308d0 | ||
|
|
756f574e4e | ||
|
|
78294e6a9c | ||
|
|
4e33577173 | ||
|
|
607841af64 | ||
|
|
396726244a | ||
|
|
c5895d5dbd | ||
|
|
b407e173e4 | ||
|
|
6a745c2c0f | ||
|
|
2da0cad6ae | ||
|
|
af1fa6234e | ||
|
|
c9ac7fa909 | ||
|
|
964afd0689 | ||
|
|
2a282a3b5f | ||
|
|
7bb23aeca4 | ||
|
|
de939d89eb | ||
|
|
77c975f536 | ||
|
|
75ab0ebcf5 | ||
|
|
10273d6e08 | ||
|
|
16d6973f8a | ||
|
|
edcd2d665b | ||
|
|
385c3e5e91 | ||
|
|
c8e337450b | ||
|
|
10464af5d1 | ||
|
|
cbcd1a5474 | ||
|
|
c9bebed294 | ||
|
|
d5552a3477 | ||
|
|
a8b081a052 | ||
|
|
9e96dc8b35 | ||
|
|
360075e28a | ||
|
|
accf79b107 | ||
|
|
4d58b24c15 | ||
|
|
c33a8639a7 | ||
|
|
25fa8d66e6 | ||
|
|
974a6146fe | ||
|
|
0392ac98d2 | ||
|
|
5e3915cbe3 | ||
|
|
29b809de68 | ||
|
|
8f73e89ca0 | ||
|
|
0d0d5d3717 | ||
|
|
a69e8bfdd9 | ||
|
|
062a3fdf36 | ||
|
|
028a33d7f2 | ||
|
|
14835de9fb | ||
|
|
447053668f | ||
|
|
f3aecb27a4 | ||
|
|
7f0172b3e5 | ||
|
|
79fe954d79 | ||
|
|
0501bfa159 | ||
|
|
a155b7e76c | ||
|
|
5228b756af | ||
|
|
04e7596680 | ||
|
|
f1e66cb2eb | ||
|
|
4fd35ee072 | ||
|
|
ee69799262 | ||
|
|
636a9637f4 | ||
|
|
9383e66f94 | ||
|
|
99e6833c85 | ||
|
|
c203be3fb4 | ||
|
|
02175a7986 | ||
|
|
8117df4cd9 | ||
|
|
7c7dd9dc7f | ||
|
|
054932f403 | ||
|
|
aed473ccf9 | ||
|
|
8268e94cd4 | ||
|
|
4af98ecdfb | ||
|
|
4b9f9010b0 | ||
|
|
2a0fcf6113 | ||
|
|
67d95f177c | ||
|
|
44773ad125 | ||
|
|
5774ef35c4 | ||
|
|
b95cfa9170 | ||
|
|
afa1ded425 | ||
|
|
00ac23e6e0 | ||
|
|
7d0c934a3e | ||
|
|
8f75761f24 | ||
|
|
9fd24e3a22 | ||
|
|
755a9d3d1a | ||
|
|
ac499cb61c | ||
|
|
180940e02d | ||
|
|
976b03c56b | ||
|
|
450d89ddc1 | ||
|
|
463b2e5542 | ||
|
|
70a2002399 | ||
|
|
a617b10075 | ||
|
|
0029071adb | ||
|
|
ad49fe7c8f | ||
|
|
49bc802f81 | ||
|
|
af9cdee9cb | ||
|
|
b4e1576aee | ||
|
|
78e2b74bb9 | ||
|
|
65d161c480 | ||
|
|
9fcbd5db2a | ||
|
|
4f3bf679f5 | ||
|
|
0072afca8e | ||
|
|
61aa5ba36e | ||
|
|
9f4323252a | ||
|
|
8b6c896c4b | ||
|
|
185dbc4974 | ||
|
|
3d535e0471 | ||
|
|
d22dec74ff | ||
|
|
9872d3110c | ||
|
|
b859971873 | ||
|
|
e5095f1198 | ||
|
|
499a077761 | ||
|
|
5da7177729 | ||
|
|
3507766bd0 | ||
|
|
f37bdbe537 | ||
|
|
2da09ff8b0 | ||
|
|
5ccddb7ecf | ||
|
|
954c1d0529 | ||
|
|
494f20cbdc | ||
|
|
29902c8ec0 | ||
|
|
9f15bdabc8 | ||
|
|
fff3455f58 | ||
|
|
87446dc618 | ||
|
|
99ac0390f5 | ||
|
|
ff0f0b9172 | ||
|
|
97b570a94c | ||
|
|
a9d56c6843 | ||
|
|
f98470df69 | ||
|
|
eb8be1fe76 | ||
|
|
8a1a26ce4c | ||
|
|
5bf3276e8d | ||
|
|
7ebd5376fe | ||
|
|
93dfcb9357 | ||
|
|
0c8662d2b6 | ||
|
|
d84f1d14b5 | ||
|
|
70219b0f43 | ||
|
|
bd5bc0cd5a | ||
|
|
6e054aacca | ||
|
|
9d581f3d52 | ||
|
|
9bf99891d0 | ||
|
|
d9cf48e81e | ||
|
|
e1b9322b09 | ||
|
|
627b964825 | ||
|
|
a55e36f48d | ||
|
|
01e21b89ee | ||
|
|
788be3313d | ||
|
|
e1ec93304d | ||
|
|
edb99d4c18 | ||
|
|
68477c3dab | ||
|
|
65ba8b23f4 | ||
|
|
621ed9f5f4 | ||
|
|
b26733ba7f | ||
|
|
9836cfb8d6 | ||
|
|
665b6c1236 | ||
|
|
9414338a48 | ||
|
|
de390ea077 | ||
|
|
717b0239fd | ||
|
|
d00735a0c5 | ||
|
|
c23d5ce926 | ||
|
|
b5a3c7f109 | ||
|
|
9c5f685ef1 | ||
|
|
08bb8ef201 | ||
|
|
865ab62f43 | ||
|
|
9948113590 | ||
|
|
c4ee87022b | ||
|
|
ffba4edb06 | ||
|
|
958d0b659b | ||
|
|
aacda28b28 | ||
|
|
29e7e0781b | ||
|
|
7012620e2b | ||
|
|
f1da861018 | ||
|
|
4da31bd566 | ||
|
|
05aa9c82d9 | ||
|
|
a9e58ecd3f | ||
|
|
223544552f | ||
|
|
3d8e9573a4 | ||
|
|
54eb81a087 | ||
|
|
c33c547d66 | ||
|
|
dfe7dd9bdb | ||
|
|
63ccf6474d | ||
|
|
e8ac61e840 | ||
|
|
f00a650705 | ||
|
|
4bde5ce992 | ||
|
|
d31573fa37 | ||
|
|
8b8cde2140 | ||
|
|
0e805e782b | ||
|
|
f5c78d118b | ||
|
|
9d4f213f90 | ||
|
|
168db222c6 | ||
|
|
3d6388e34e | ||
|
|
3ce9bc712a | ||
|
|
e52c0bd0eb | ||
|
|
56c837ccb7 | ||
|
|
423d2be5f8 | ||
|
|
453a1617aa | ||
|
|
b9258c6178 | ||
|
|
55e5841f14 | ||
|
|
ed15e9ba02 | ||
|
|
eedda32e6b | ||
|
|
4c8fea92f3 | ||
|
|
d073055dcd | ||
|
|
e4ac7bb1e5 | ||
|
|
9bac8c57e3 | ||
|
|
6800d3372f | ||
|
|
3153a2c98d | ||
|
|
15b74b94be | ||
|
|
687cb3ad35 | ||
|
|
8f94784124 | ||
|
|
23dd1fc74c | ||
|
|
fa971259e6 | ||
|
|
b0cda32f72 | ||
|
|
08b7968e28 | ||
|
|
4b5fe1349f | ||
|
|
d23da75b32 | ||
|
|
06e027992d | ||
|
|
b5597738d4 | ||
|
|
bc03e58565 | ||
|
|
a00234f1c5 | ||
|
|
34c0f95db2 | ||
|
|
fcb04bcaca | ||
|
|
9464a194db | ||
|
|
9f4b9118cc | ||
|
|
60158217ef | ||
|
|
923e79e2e4 | ||
|
|
866b296d0f | ||
|
|
4053ee9104 | ||
|
|
47fd8c2f76 | ||
|
|
96b9690985 | ||
|
|
df15ef8dab | ||
|
|
002c0fb511 | ||
|
|
7584e38ce4 | ||
|
|
eb47569f8a | ||
|
|
d2a9de78df | ||
|
|
c5138a7ce4 | ||
|
|
c5fa81fe81 | ||
|
|
a074e92296 | ||
|
|
1ddb9456c4 | ||
|
|
58bde34a23 | ||
|
|
339516072b | ||
|
|
931bc3c3a7 | ||
|
|
db1e9ee771 | ||
|
|
a2d971309b | ||
|
|
d05a1dbe70 | ||
|
|
a80601f8d9 | ||
|
|
1c22238756 | ||
|
|
9ff811c5cd | ||
|
|
1ebc05df91 | ||
|
|
386bdfa698 | ||
|
|
1ae7ff771b | ||
|
|
5196b98897 | ||
|
|
e6e63e91a7 | ||
|
|
b4dd98358f | ||
|
|
181c7053e3 | ||
|
|
4d454c5e4b | ||
|
|
5c2191a605 | ||
|
|
bba5bfc890 | ||
|
|
1a5b77dc21 | ||
|
|
b2cf6543b2 | ||
|
|
0385d64223 | ||
|
|
6ebdfe43e4 | ||
|
|
fafec39d41 | ||
|
|
670861bd20 | ||
|
|
08f7db20c1 | ||
|
|
605ec701b7 | ||
|
|
d6aa68ce75 | ||
|
|
eb6cb9fbe9 | ||
|
|
84e1e036c2 | ||
|
|
1498940b10 | ||
|
|
806598b94d | ||
|
|
e26be70bca | ||
|
|
ca45246627 | ||
|
|
f9355dc989 | ||
|
|
9e0b579128 | ||
|
|
ff4a1279f2 | ||
|
|
9b254aa177 | ||
|
|
703d78bbf5 | ||
|
|
d9446c7319 | ||
|
|
b25b645d51 | ||
|
|
d90b3854ca | ||
|
|
bf24c3d017 | ||
|
|
f0bfaa2d7d | ||
|
|
f9f3e3df9a | ||
|
|
f8d5e1cfb5 | ||
|
|
c23848b3c5 | ||
|
|
6d00a2dcd1 | ||
|
|
b535170b21 | ||
|
|
1434184c57 | ||
|
|
7a372b64df | ||
|
|
5406af92bc | ||
|
|
7d65242dc3 | ||
|
|
544a8693b7 | ||
|
|
35a4f24a37 | ||
|
|
ff305edd64 | ||
|
|
efec4358b9 | ||
|
|
db3ca36403 | ||
|
|
42833b44b5 | ||
|
|
5d0a33eebc | ||
|
|
ba2df04b41 | ||
|
|
c6bbdadd79 | ||
|
|
b885bae634 | ||
|
|
680f9744c4 | ||
|
|
2c935c0c72 | ||
|
|
7198063d96 | ||
|
|
d41ebe146b | ||
|
|
4b4e1af059 | ||
|
|
80240b347e | ||
|
|
04b3b3df05 | ||
|
|
2ad5708c43 | ||
|
|
63f3cab4ae | ||
|
|
8cdf03a7a2 | ||
|
|
d78c834ead | ||
|
|
05a976cd99 | ||
|
|
34fb7e46ad | ||
|
|
abac15f3c6 | ||
|
|
b700055ba4 | ||
|
|
23905927e1 | ||
|
|
56be5f1567 | ||
|
|
1807ae22dd | ||
|
|
71646e4653 | ||
|
|
1335c3aca8 | ||
|
|
30455ce255 | ||
|
|
9bf87ae3aa | ||
|
|
abca34cbc0 | ||
|
|
d386878af9 | ||
|
|
685c74d315 | ||
|
|
69e0f1b445 | ||
|
|
79979c6897 | ||
|
|
ba64547616 | ||
|
|
ed5a637d62 | ||
|
|
8a278a1d7e | ||
|
|
77d9cb2f04 | ||
|
|
0459432d96 | ||
|
|
43150d7ac3 | ||
|
|
afe8b594be | ||
|
|
878563c847 | ||
|
|
06947add03 | ||
|
|
5cd47a5e4f | ||
|
|
53de95da5e | ||
|
|
663004ac2b | ||
|
|
6ad9cb224a | ||
|
|
e7752cd578 | ||
|
|
4d2f42361e | ||
|
|
4d8ee01389 | ||
|
|
d01924f488 | ||
|
|
bc56355ec6 | ||
|
|
ac20d95f97 | ||
|
|
1a83c731bd | ||
|
|
ca57a59883 | ||
|
|
b0d619fde2 | ||
|
|
cc7051efd7 | ||
|
|
5137adb94d | ||
|
|
2632941f32 | ||
|
|
137597b0ea | ||
|
|
f670ef1c8e | ||
|
|
ba9d16291b | ||
|
|
725652e924 | ||
|
|
8da0e0e946 | ||
|
|
adb6b1b316 | ||
|
|
1c18de0019 | ||
|
|
363cf58645 | ||
|
|
2f3bdab2b9 | ||
|
|
0d7f036429 | ||
|
|
a650110ba7 | ||
|
|
54b31d149e | ||
|
|
a745475808 | ||
|
|
f11554092b |
11
AUTHORS
11
AUTHORS
@@ -124,3 +124,14 @@ Mohammad Teimori Pabandi
|
|||||||
Roman Le Négrate
|
Roman Le Négrate
|
||||||
Matthias Küch
|
Matthias Küch
|
||||||
Julian Richen
|
Julian Richen
|
||||||
|
Ping O.
|
||||||
|
Mister Hat
|
||||||
|
Peter Ding
|
||||||
|
jackyzy823
|
||||||
|
George Brighton
|
||||||
|
Remita Amine
|
||||||
|
Aurélio A. Heckert
|
||||||
|
Bernhard Minks
|
||||||
|
sceext
|
||||||
|
Zach Bruggeman
|
||||||
|
Tjark Saul
|
||||||
|
|||||||
46
README.md
46
README.md
@@ -17,12 +17,12 @@ youtube-dl - download videos from youtube.com or other video platforms
|
|||||||
To install it right away for all UNIX users (Linux, OS X, etc.), type:
|
To install it right away for all UNIX users (Linux, OS X, etc.), type:
|
||||||
|
|
||||||
sudo curl https://yt-dl.org/latest/youtube-dl -o /usr/local/bin/youtube-dl
|
sudo curl https://yt-dl.org/latest/youtube-dl -o /usr/local/bin/youtube-dl
|
||||||
sudo chmod a+x /usr/local/bin/youtube-dl
|
sudo chmod a+rx /usr/local/bin/youtube-dl
|
||||||
|
|
||||||
If you do not have curl, you can alternatively use a recent wget:
|
If you do not have curl, you can alternatively use a recent wget:
|
||||||
|
|
||||||
sudo wget https://yt-dl.org/downloads/latest/youtube-dl -O /usr/local/bin/youtube-dl
|
sudo wget https://yt-dl.org/downloads/latest/youtube-dl -O /usr/local/bin/youtube-dl
|
||||||
sudo chmod a+x /usr/local/bin/youtube-dl
|
sudo chmod a+rx /usr/local/bin/youtube-dl
|
||||||
|
|
||||||
Windows users can [download a .exe file](https://yt-dl.org/latest/youtube-dl.exe) and place it in their home directory or any other location on their [PATH](http://en.wikipedia.org/wiki/PATH_%28variable%29).
|
Windows users can [download a .exe file](https://yt-dl.org/latest/youtube-dl.exe) and place it in their home directory or any other location on their [PATH](http://en.wikipedia.org/wiki/PATH_%28variable%29).
|
||||||
|
|
||||||
@@ -52,8 +52,9 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
-i, --ignore-errors Continue on download errors, for example to skip unavailable videos in a playlist
|
-i, --ignore-errors Continue on download errors, for example to skip unavailable videos in a playlist
|
||||||
--abort-on-error Abort downloading of further videos (in the playlist or the command line) if an error occurs
|
--abort-on-error Abort downloading of further videos (in the playlist or the command line) if an error occurs
|
||||||
--dump-user-agent Display the current browser identification
|
--dump-user-agent Display the current browser identification
|
||||||
--list-extractors List all supported extractors and the URLs they would handle
|
--list-extractors List all supported extractors
|
||||||
--extractor-descriptions Output descriptions of all supported extractors
|
--extractor-descriptions Output descriptions of all supported extractors
|
||||||
|
--force-generic-extractor Force extraction to use the generic extractor
|
||||||
--default-search PREFIX Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple".
|
--default-search PREFIX Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple".
|
||||||
Use the value "auto" to let youtube-dl guess ("auto_warning" to emit a warning when guessing). "error" just throws an error. The
|
Use the value "auto" to let youtube-dl guess ("auto_warning" to emit a warning when guessing). "error" just throws an error. The
|
||||||
default value "fixup_error" repairs broken URLs, but emits an error if this is not possible instead of searching.
|
default value "fixup_error" repairs broken URLs, but emits an error if this is not possible instead of searching.
|
||||||
@@ -74,7 +75,7 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
## Video Selection:
|
## Video Selection:
|
||||||
--playlist-start NUMBER Playlist video to start at (default is 1)
|
--playlist-start NUMBER Playlist video to start at (default is 1)
|
||||||
--playlist-end NUMBER Playlist video to end at (default is last)
|
--playlist-end NUMBER Playlist video to end at (default is last)
|
||||||
--playlist-items ITEM_SPEC Playlist video items to download. Specify indices of the videos in the playlist seperated by commas like: "--playlist-items 1,2,5,8"
|
--playlist-items ITEM_SPEC Playlist video items to download. Specify indices of the videos in the playlist separated by commas like: "--playlist-items 1,2,5,8"
|
||||||
if you want to download videos indexed 1, 2, 5, 8 in the playlist. You can specify range: "--playlist-items 1-3,7,10-13", it will
|
if you want to download videos indexed 1, 2, 5, 8 in the playlist. You can specify range: "--playlist-items 1-3,7,10-13", it will
|
||||||
download the videos at index 1, 2, 3, 7, 10, 11, 12 and 13.
|
download the videos at index 1, 2, 3, 7, 10, 11, 12 and 13.
|
||||||
--match-title REGEX Download only matching titles (regex or caseless sub-string)
|
--match-title REGEX Download only matching titles (regex or caseless sub-string)
|
||||||
@@ -107,7 +108,7 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
--playlist-reverse Download playlist videos in reverse order
|
--playlist-reverse Download playlist videos in reverse order
|
||||||
--xattr-set-filesize Set file xattribute ytdl.filesize with expected filesize (experimental)
|
--xattr-set-filesize Set file xattribute ytdl.filesize with expected filesize (experimental)
|
||||||
--hls-prefer-native Use the native HLS downloader instead of ffmpeg (experimental)
|
--hls-prefer-native Use the native HLS downloader instead of ffmpeg (experimental)
|
||||||
--external-downloader COMMAND Use the specified external downloader. Currently supports aria2c,curl,wget
|
--external-downloader COMMAND Use the specified external downloader. Currently supports aria2c,curl,httpie,wget
|
||||||
--external-downloader-args ARGS Give these arguments to the external downloader
|
--external-downloader-args ARGS Give these arguments to the external downloader
|
||||||
|
|
||||||
## Filesystem Options:
|
## Filesystem Options:
|
||||||
@@ -168,7 +169,7 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
--no-progress Do not print progress bar
|
--no-progress Do not print progress bar
|
||||||
--console-title Display progress in console titlebar
|
--console-title Display progress in console titlebar
|
||||||
-v, --verbose Print various debugging information
|
-v, --verbose Print various debugging information
|
||||||
--dump-pages Print downloaded pages to debug problems (very verbose)
|
--dump-pages Print downloaded pages encoded using base64 to debug problems (very verbose)
|
||||||
--write-pages Write downloaded intermediary pages to files in the current directory to debug problems
|
--write-pages Write downloaded intermediary pages to files in the current directory to debug problems
|
||||||
--print-traffic Display sent and read HTTP traffic
|
--print-traffic Display sent and read HTTP traffic
|
||||||
-C, --call-home Contact the youtube-dl server for debugging
|
-C, --call-home Contact the youtube-dl server for debugging
|
||||||
@@ -189,8 +190,8 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
--all-formats Download all available video formats
|
--all-formats Download all available video formats
|
||||||
--prefer-free-formats Prefer free video formats unless a specific one is requested
|
--prefer-free-formats Prefer free video formats unless a specific one is requested
|
||||||
-F, --list-formats List all available formats
|
-F, --list-formats List all available formats
|
||||||
--youtube-skip-dash-manifest Do not download the DASH manifest on YouTube videos
|
--youtube-skip-dash-manifest Do not download the DASH manifests and related data on YouTube videos
|
||||||
--merge-output-format FORMAT If a merge is required (e.g. bestvideo+bestaudio), output to given container format. One of mkv, mp4, ogg, webm, flv.Ignored if no
|
--merge-output-format FORMAT If a merge is required (e.g. bestvideo+bestaudio), output to given container format. One of mkv, mp4, ogg, webm, flv. Ignored if no
|
||||||
merge is required
|
merge is required
|
||||||
|
|
||||||
## Subtitle Options:
|
## Subtitle Options:
|
||||||
@@ -213,17 +214,18 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
--audio-format FORMAT Specify audio format: "best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; "best" by default
|
--audio-format FORMAT Specify audio format: "best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; "best" by default
|
||||||
--audio-quality QUALITY Specify ffmpeg/avconv audio quality, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default
|
--audio-quality QUALITY Specify ffmpeg/avconv audio quality, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default
|
||||||
5)
|
5)
|
||||||
--recode-video FORMAT Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm|mkv)
|
--recode-video FORMAT Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm|mkv|avi)
|
||||||
|
--postprocessor-args ARGS Give these arguments to the postprocessor
|
||||||
-k, --keep-video Keep the video file on disk after the post-processing; the video is erased by default
|
-k, --keep-video Keep the video file on disk after the post-processing; the video is erased by default
|
||||||
--no-post-overwrites Do not overwrite post-processed files; the post-processed files are overwritten by default
|
--no-post-overwrites Do not overwrite post-processed files; the post-processed files are overwritten by default
|
||||||
--embed-subs Embed subtitles in the video (only for mkv and mp4 videos)
|
--embed-subs Embed subtitles in the video (only for mkv and mp4 videos)
|
||||||
--embed-thumbnail Embed thumbnail in the audio as cover art
|
--embed-thumbnail Embed thumbnail in the audio as cover art
|
||||||
--add-metadata Write metadata to the video file
|
--add-metadata Write metadata to the video file
|
||||||
--metadata-from-title FORMAT Parse additional metadata like song title / artist from the video title. The format syntax is the same as --output, the parsed
|
--metadata-from-title FORMAT Parse additional metadata like song title / artist from the video title. The format syntax is the same as --output, the parsed
|
||||||
parameters replace existing values. Additional templates: %(album), %(artist). Example: --metadata-from-title "%(artist)s -
|
parameters replace existing values. Additional templates: %(album)s, %(artist)s. Example: --metadata-from-title "%(artist)s -
|
||||||
%(title)s" matches a title like "Coldplay - Paradise"
|
%(title)s" matches a title like "Coldplay - Paradise"
|
||||||
--xattrs Write metadata to the video file's xattrs (using dublin core and xdg standards)
|
--xattrs Write metadata to the video file's xattrs (using dublin core and xdg standards)
|
||||||
--fixup POLICY Automatically correct known faults of the file. One of never (do nothing), warn (only emit a warning), detect_or_warn(the default;
|
--fixup POLICY Automatically correct known faults of the file. One of never (do nothing), warn (only emit a warning), detect_or_warn (the default;
|
||||||
fix file if we can, warn otherwise)
|
fix file if we can, warn otherwise)
|
||||||
--prefer-avconv Prefer avconv over ffmpeg for running the postprocessors (default)
|
--prefer-avconv Prefer avconv over ffmpeg for running the postprocessors (default)
|
||||||
--prefer-ffmpeg Prefer ffmpeg over avconv for running the postprocessors
|
--prefer-ffmpeg Prefer ffmpeg over avconv for running the postprocessors
|
||||||
@@ -236,6 +238,26 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
|
|
||||||
You can configure youtube-dl by placing default arguments (such as `--extract-audio --no-mtime` to always extract the audio and not copy the mtime) into `/etc/youtube-dl.conf` and/or `~/.config/youtube-dl/config`. On Windows, the configuration file locations are `%APPDATA%\youtube-dl\config.txt` and `C:\Users\<user name>\youtube-dl.conf`.
|
You can configure youtube-dl by placing default arguments (such as `--extract-audio --no-mtime` to always extract the audio and not copy the mtime) into `/etc/youtube-dl.conf` and/or `~/.config/youtube-dl/config`. On Windows, the configuration file locations are `%APPDATA%\youtube-dl\config.txt` and `C:\Users\<user name>\youtube-dl.conf`.
|
||||||
|
|
||||||
|
### Authentication with `.netrc` file ###
|
||||||
|
|
||||||
|
You may also want to configure automatic credentials storage for extractors that support authentication (by providing login and password with `--username` and `--password`) in order not to pass credentials as command line arguments on every youtube-dl execution and prevent tracking plain text passwords in shell command history. You can achieve this using [`.netrc` file](http://stackoverflow.com/tags/.netrc/info) on per extractor basis. For that you will need to create `.netrc` file in your `$HOME` and restrict permissions to read/write by you only:
|
||||||
|
```
|
||||||
|
touch $HOME/.netrc
|
||||||
|
chmod a-rwx,u+rw $HOME/.netrc
|
||||||
|
```
|
||||||
|
After that you can add credentials for extractor in the following format, where *extractor* is the name of extractor in lowercase:
|
||||||
|
```
|
||||||
|
machine <extractor> login <login> password <password>
|
||||||
|
```
|
||||||
|
For example:
|
||||||
|
```
|
||||||
|
machine youtube login myaccount@gmail.com password my_youtube_password
|
||||||
|
machine twitch login my_twitch_account_name password my_twitch_password
|
||||||
|
```
|
||||||
|
To activate authentication with `.netrc` file you should pass `--netrc` to youtube-dl or to place it in [configuration file](#configuration).
|
||||||
|
|
||||||
|
On Windows you may also need to setup `%HOME%` environment variable manually.
|
||||||
|
|
||||||
# OUTPUT TEMPLATE
|
# OUTPUT TEMPLATE
|
||||||
|
|
||||||
The `-o` option allows users to indicate a template for the output file names. The basic usage is not to set any template arguments when downloading a single file, like in `youtube-dl -o funny_video.flv "http://some/video"`. However, it may contain special sequences that will be replaced when downloading each video. The special sequences have the format `%(NAME)s`. To clarify, that is a percent symbol followed by a name in parenthesis, followed by a lowercase S. Allowed names are:
|
The `-o` option allows users to indicate a template for the output file names. The basic usage is not to set any template arguments when downloading a single file, like in `youtube-dl -o funny_video.flv "http://some/video"`. However, it may contain special sequences that will be replaced when downloading each video. The special sequences have the format `%(NAME)s`. To clarify, that is a percent symbol followed by a name in parenthesis, followed by a lowercase S. Allowed names are:
|
||||||
@@ -379,7 +401,7 @@ In February 2015, the new YouTube player contained a character sequence in a str
|
|||||||
|
|
||||||
### HTTP Error 429: Too Many Requests or 402: Payment Required
|
### HTTP Error 429: Too Many Requests or 402: Payment Required
|
||||||
|
|
||||||
These two error codes indicate that the service is blocking your IP address because of overuse. Contact the service and ask them to unblock your IP address, or - if you have acquired a whitelisted IP address already - use the [`--proxy` or `--network-address` options](#network-options) to select another IP address.
|
These two error codes indicate that the service is blocking your IP address because of overuse. Contact the service and ask them to unblock your IP address, or - if you have acquired a whitelisted IP address already - use the [`--proxy` or `--source-address` options](#network-options) to select another IP address.
|
||||||
|
|
||||||
### SyntaxError: Non-ASCII character ###
|
### SyntaxError: Non-ASCII character ###
|
||||||
|
|
||||||
|
|||||||
@@ -10,12 +10,14 @@
|
|||||||
- **56.com**
|
- **56.com**
|
||||||
- **5min**
|
- **5min**
|
||||||
- **8tracks**
|
- **8tracks**
|
||||||
|
- **91porn**
|
||||||
- **9gag**
|
- **9gag**
|
||||||
- **abc.net.au**
|
- **abc.net.au**
|
||||||
- **Abc7News**
|
- **Abc7News**
|
||||||
- **AcademicEarth:Course**
|
- **AcademicEarth:Course**
|
||||||
- **AddAnime**
|
- **AddAnime**
|
||||||
- **AdobeTV**
|
- **AdobeTV**
|
||||||
|
- **AdobeTVVideo**
|
||||||
- **AdultSwim**
|
- **AdultSwim**
|
||||||
- **Aftenposten**
|
- **Aftenposten**
|
||||||
- **Aftonbladet**
|
- **Aftonbladet**
|
||||||
@@ -26,8 +28,8 @@
|
|||||||
- **anitube.se**
|
- **anitube.se**
|
||||||
- **AnySex**
|
- **AnySex**
|
||||||
- **Aparat**
|
- **Aparat**
|
||||||
- **AppleDailyAnimationNews**
|
- **AppleConnect**
|
||||||
- **AppleDailyRealtimeNews**
|
- **AppleDaily**: 臺灣蘋果日報
|
||||||
- **AppleTrailers**
|
- **AppleTrailers**
|
||||||
- **archive.org**: archive.org videos
|
- **archive.org**: archive.org videos
|
||||||
- **ARD**
|
- **ARD**
|
||||||
@@ -44,11 +46,12 @@
|
|||||||
- **audiomack**
|
- **audiomack**
|
||||||
- **audiomack:album**
|
- **audiomack:album**
|
||||||
- **Azubu**
|
- **Azubu**
|
||||||
- **BaiduVideo**
|
- **BaiduVideo**: 百度视频
|
||||||
- **bambuser**
|
- **bambuser**
|
||||||
- **bambuser:channel**
|
- **bambuser:channel**
|
||||||
- **Bandcamp**
|
- **Bandcamp**
|
||||||
- **Bandcamp:album**
|
- **Bandcamp:album**
|
||||||
|
- **bbc**: BBC
|
||||||
- **bbc.co.uk**: BBC iPlayer
|
- **bbc.co.uk**: BBC iPlayer
|
||||||
- **BeatportPro**
|
- **BeatportPro**
|
||||||
- **Beeg**
|
- **Beeg**
|
||||||
@@ -105,11 +108,12 @@
|
|||||||
- **Crunchyroll**
|
- **Crunchyroll**
|
||||||
- **crunchyroll:playlist**
|
- **crunchyroll:playlist**
|
||||||
- **CSpan**: C-SPAN
|
- **CSpan**: C-SPAN
|
||||||
- **CtsNews**
|
- **CtsNews**: 華視新聞
|
||||||
- **culturebox.francetvinfo.fr**
|
- **culturebox.francetvinfo.fr**
|
||||||
- **dailymotion**
|
- **dailymotion**
|
||||||
- **dailymotion:playlist**
|
- **dailymotion:playlist**
|
||||||
- **dailymotion:user**
|
- **dailymotion:user**
|
||||||
|
- **DailymotionCloud**
|
||||||
- **daum.net**
|
- **daum.net**
|
||||||
- **DBTV**
|
- **DBTV**
|
||||||
- **DctpTv**
|
- **DctpTv**
|
||||||
@@ -119,7 +123,9 @@
|
|||||||
- **Discovery**
|
- **Discovery**
|
||||||
- **divxstage**: DivxStage
|
- **divxstage**: DivxStage
|
||||||
- **Dotsub**
|
- **Dotsub**
|
||||||
- **DouyuTV**
|
- **DouyuTV**: 斗鱼
|
||||||
|
- **dramafever**
|
||||||
|
- **dramafever:series**
|
||||||
- **DRBonanza**
|
- **DRBonanza**
|
||||||
- **Dropbox**
|
- **Dropbox**
|
||||||
- **DrTuber**
|
- **DrTuber**
|
||||||
@@ -152,8 +158,8 @@
|
|||||||
- **fc2**
|
- **fc2**
|
||||||
- **fernsehkritik.tv**
|
- **fernsehkritik.tv**
|
||||||
- **fernsehkritik.tv:postecke**
|
- **fernsehkritik.tv:postecke**
|
||||||
- **Firedrive**
|
|
||||||
- **Firstpost**
|
- **Firstpost**
|
||||||
|
- **FiveTV**
|
||||||
- **Flickr**
|
- **Flickr**
|
||||||
- **Folketinget**: Folketinget (ft.dk; Danish parliament)
|
- **Folketinget**: Folketinget (ft.dk; Danish parliament)
|
||||||
- **FootyRoom**
|
- **FootyRoom**
|
||||||
@@ -218,6 +224,8 @@
|
|||||||
- **instagram:user**: Instagram user profile
|
- **instagram:user**: Instagram user profile
|
||||||
- **InternetVideoArchive**
|
- **InternetVideoArchive**
|
||||||
- **IPrima**
|
- **IPrima**
|
||||||
|
- **iqiyi**: 爱奇艺
|
||||||
|
- **Ir90Tv**
|
||||||
- **ivi**: ivi.ru
|
- **ivi**: ivi.ru
|
||||||
- **ivi:compilation**: ivi.ru compilations
|
- **ivi:compilation**: ivi.ru compilations
|
||||||
- **Izlesene**
|
- **Izlesene**
|
||||||
@@ -230,6 +238,7 @@
|
|||||||
- **KanalPlay**: Kanal 5/9/11 Play
|
- **KanalPlay**: Kanal 5/9/11 Play
|
||||||
- **Kankan**
|
- **Kankan**
|
||||||
- **Karaoketv**
|
- **Karaoketv**
|
||||||
|
- **KarriereVideos**
|
||||||
- **keek**
|
- **keek**
|
||||||
- **KeezMovies**
|
- **KeezMovies**
|
||||||
- **KhanAcademy**
|
- **KhanAcademy**
|
||||||
@@ -237,9 +246,16 @@
|
|||||||
- **kontrtube**: KontrTube.ru - Труба зовёт
|
- **kontrtube**: KontrTube.ru - Труба зовёт
|
||||||
- **KrasView**: Красвью
|
- **KrasView**: Красвью
|
||||||
- **Ku6**
|
- **Ku6**
|
||||||
|
- **kuwo:album**: 酷我音乐 - 专辑
|
||||||
|
- **kuwo:category**: 酷我音乐 - 分类
|
||||||
|
- **kuwo:chart**: 酷我音乐 - 排行榜
|
||||||
|
- **kuwo:mv**: 酷我音乐 - MV
|
||||||
|
- **kuwo:singer**: 酷我音乐 - 歌手
|
||||||
|
- **kuwo:song**: 酷我音乐
|
||||||
- **la7.tv**
|
- **la7.tv**
|
||||||
- **Laola1Tv**
|
- **Laola1Tv**
|
||||||
- **Letv**
|
- **Lecture2Go**
|
||||||
|
- **Letv**: 乐视网
|
||||||
- **LetvPlaylist**
|
- **LetvPlaylist**
|
||||||
- **LetvTv**
|
- **LetvTv**
|
||||||
- **Libsyn**
|
- **Libsyn**
|
||||||
@@ -277,6 +293,7 @@
|
|||||||
- **Motherless**
|
- **Motherless**
|
||||||
- **Motorsport**: motorsport.com
|
- **Motorsport**: motorsport.com
|
||||||
- **MovieClips**
|
- **MovieClips**
|
||||||
|
- **MovieFap**
|
||||||
- **Moviezine**
|
- **Moviezine**
|
||||||
- **movshare**: MovShare
|
- **movshare**: MovShare
|
||||||
- **MPORA**
|
- **MPORA**
|
||||||
@@ -290,6 +307,7 @@
|
|||||||
- **MySpace**
|
- **MySpace**
|
||||||
- **MySpace:album**
|
- **MySpace:album**
|
||||||
- **MySpass**
|
- **MySpass**
|
||||||
|
- **Myvi**
|
||||||
- **myvideo**
|
- **myvideo**
|
||||||
- **MyVidster**
|
- **MyVidster**
|
||||||
- **N-JOY**
|
- **N-JOY**
|
||||||
@@ -305,11 +323,18 @@
|
|||||||
- **NDTV**
|
- **NDTV**
|
||||||
- **NerdCubedFeed**
|
- **NerdCubedFeed**
|
||||||
- **Nerdist**
|
- **Nerdist**
|
||||||
|
- **netease:album**: 网易云音乐 - 专辑
|
||||||
|
- **netease:djradio**: 网易云音乐 - 电台
|
||||||
|
- **netease:mv**: 网易云音乐 - MV
|
||||||
|
- **netease:playlist**: 网易云音乐 - 歌单
|
||||||
|
- **netease:program**: 网易云音乐 - 电台节目
|
||||||
|
- **netease:singer**: 网易云音乐 - 歌手
|
||||||
|
- **netease:song**: 网易云音乐
|
||||||
- **Netzkino**
|
- **Netzkino**
|
||||||
- **Newgrounds**
|
- **Newgrounds**
|
||||||
- **Newstube**
|
- **Newstube**
|
||||||
- **NextMedia**
|
- **NextMedia**: 蘋果日報
|
||||||
- **NextMediaActionNews**
|
- **NextMediaActionNews**: 蘋果日報 - 動新聞
|
||||||
- **nfb**: National Film Board of Canada
|
- **nfb**: National Film Board of Canada
|
||||||
- **nfl.com**
|
- **nfl.com**
|
||||||
- **nhl.com**
|
- **nhl.com**
|
||||||
@@ -320,16 +345,19 @@
|
|||||||
- **Noco**
|
- **Noco**
|
||||||
- **Normalboots**
|
- **Normalboots**
|
||||||
- **NosVideo**
|
- **NosVideo**
|
||||||
|
- **Nova**: TN.cz, Prásk.tv, Nova.cz, Novaplus.cz, FANDA.tv, Krásná.cz and Doma.cz
|
||||||
- **novamov**: NovaMov
|
- **novamov**: NovaMov
|
||||||
- **Nowness**
|
- **Nowness**
|
||||||
|
- **NowTV**
|
||||||
- **nowvideo**: NowVideo
|
- **nowvideo**: NowVideo
|
||||||
- **npo.nl**
|
- **npo**: npo.nl and ntr.nl
|
||||||
|
- **npo**: npo.nl and ntr.nl
|
||||||
- **npo.nl:live**
|
- **npo.nl:live**
|
||||||
- **npo.nl:radio**
|
- **npo.nl:radio**
|
||||||
- **npo.nl:radio:fragment**
|
- **npo.nl:radio:fragment**
|
||||||
- **NRK**
|
- **NRK**
|
||||||
- **NRKPlaylist**
|
- **NRKPlaylist**
|
||||||
- **NRKTV**
|
- **NRKTV**: NRK TV and NRK Radio
|
||||||
- **ntv.ru**
|
- **ntv.ru**
|
||||||
- **Nuvid**
|
- **Nuvid**
|
||||||
- **NYTimes**
|
- **NYTimes**
|
||||||
@@ -338,6 +366,7 @@
|
|||||||
- **Odnoklassniki**
|
- **Odnoklassniki**
|
||||||
- **OktoberfestTV**
|
- **OktoberfestTV**
|
||||||
- **on.aol.com**
|
- **on.aol.com**
|
||||||
|
- **OnionStudios**
|
||||||
- **Ooyala**
|
- **Ooyala**
|
||||||
- **OoyalaExternal**
|
- **OoyalaExternal**
|
||||||
- **OpenFilm**
|
- **OpenFilm**
|
||||||
@@ -351,6 +380,7 @@
|
|||||||
- **PhilharmonieDeParis**: Philharmonie de Paris
|
- **PhilharmonieDeParis**: Philharmonie de Paris
|
||||||
- **Phoenix**
|
- **Phoenix**
|
||||||
- **Photobucket**
|
- **Photobucket**
|
||||||
|
- **Pinkbike**
|
||||||
- **Pladform**
|
- **Pladform**
|
||||||
- **PlanetaPlay**
|
- **PlanetaPlay**
|
||||||
- **play.fm**
|
- **play.fm**
|
||||||
@@ -371,10 +401,11 @@
|
|||||||
- **prosiebensat1**: ProSiebenSat.1 Digital
|
- **prosiebensat1**: ProSiebenSat.1 Digital
|
||||||
- **Puls4**
|
- **Puls4**
|
||||||
- **Pyvideo**
|
- **Pyvideo**
|
||||||
- **qqmusic**
|
- **qqmusic**: QQ音乐
|
||||||
- **qqmusic:album**
|
- **qqmusic:album**: QQ音乐 - 专辑
|
||||||
- **qqmusic:singer**
|
- **qqmusic:playlist**: QQ音乐 - 歌单
|
||||||
- **qqmusic:toplist**
|
- **qqmusic:singer**: QQ音乐 - 歌手
|
||||||
|
- **qqmusic:toplist**: QQ音乐 - 排行榜
|
||||||
- **QuickVid**
|
- **QuickVid**
|
||||||
- **R7**
|
- **R7**
|
||||||
- **radio.de**
|
- **radio.de**
|
||||||
@@ -383,6 +414,7 @@
|
|||||||
- **RadioJavan**
|
- **RadioJavan**
|
||||||
- **Rai**
|
- **Rai**
|
||||||
- **RBMARadio**
|
- **RBMARadio**
|
||||||
|
- **RDS**: RDS.ca
|
||||||
- **RedTube**
|
- **RedTube**
|
||||||
- **Restudy**
|
- **Restudy**
|
||||||
- **ReverbNation**
|
- **ReverbNation**
|
||||||
@@ -393,7 +425,6 @@
|
|||||||
- **Rte**
|
- **Rte**
|
||||||
- **rtl.nl**: rtl.nl and rtlxl.nl
|
- **rtl.nl**: rtl.nl and rtlxl.nl
|
||||||
- **RTL2**
|
- **RTL2**
|
||||||
- **RTLnow**
|
|
||||||
- **RTP**
|
- **RTP**
|
||||||
- **RTS**: RTS.ch
|
- **RTS**: RTS.ch
|
||||||
- **rtve.es:alacarta**: RTVE a la carta
|
- **rtve.es:alacarta**: RTVE a la carta
|
||||||
@@ -406,6 +437,7 @@
|
|||||||
- **rutube:movie**: Rutube movies
|
- **rutube:movie**: Rutube movies
|
||||||
- **rutube:person**: Rutube person videos
|
- **rutube:person**: Rutube person videos
|
||||||
- **RUTV**: RUTV.RU
|
- **RUTV**: RUTV.RU
|
||||||
|
- **Ruutu**
|
||||||
- **safari**: safaribooksonline.com online video
|
- **safari**: safaribooksonline.com online video
|
||||||
- **safari:course**: safaribooksonline.com online courses
|
- **safari:course**: safaribooksonline.com online courses
|
||||||
- **Sandia**: Sandia National Laboratories
|
- **Sandia**: Sandia National Laboratories
|
||||||
@@ -430,9 +462,12 @@
|
|||||||
- **smotri:broadcast**: Smotri.com broadcasts
|
- **smotri:broadcast**: Smotri.com broadcasts
|
||||||
- **smotri:community**: Smotri.com community videos
|
- **smotri:community**: Smotri.com community videos
|
||||||
- **smotri:user**: Smotri.com user videos
|
- **smotri:user**: Smotri.com user videos
|
||||||
|
- **SnagFilms**
|
||||||
|
- **SnagFilmsEmbed**
|
||||||
- **Snotr**
|
- **Snotr**
|
||||||
- **Sockshare**
|
|
||||||
- **Sohu**
|
- **Sohu**
|
||||||
|
- **soompi**
|
||||||
|
- **soompi:show**
|
||||||
- **soundcloud**
|
- **soundcloud**
|
||||||
- **soundcloud:playlist**
|
- **soundcloud:playlist**
|
||||||
- **soundcloud:set**
|
- **soundcloud:set**
|
||||||
@@ -455,6 +490,7 @@
|
|||||||
- **SportBox**
|
- **SportBox**
|
||||||
- **SportBoxEmbed**
|
- **SportBoxEmbed**
|
||||||
- **SportDeutschland**
|
- **SportDeutschland**
|
||||||
|
- **Sportschau**
|
||||||
- **Srf**
|
- **Srf**
|
||||||
- **SRMediathek**: Saarländischer Rundfunk
|
- **SRMediathek**: Saarländischer Rundfunk
|
||||||
- **SSA**
|
- **SSA**
|
||||||
@@ -480,7 +516,6 @@
|
|||||||
- **TechTalks**
|
- **TechTalks**
|
||||||
- **techtv.mit.edu**
|
- **techtv.mit.edu**
|
||||||
- **ted**
|
- **ted**
|
||||||
- **tegenlicht.vpro.nl**
|
|
||||||
- **TeleBruxelles**
|
- **TeleBruxelles**
|
||||||
- **telecinco.es**
|
- **telecinco.es**
|
||||||
- **TeleMB**
|
- **TeleMB**
|
||||||
@@ -491,6 +526,7 @@
|
|||||||
- **TheOnion**
|
- **TheOnion**
|
||||||
- **ThePlatform**
|
- **ThePlatform**
|
||||||
- **TheSixtyOne**
|
- **TheSixtyOne**
|
||||||
|
- **ThisAmericanLife**
|
||||||
- **ThisAV**
|
- **ThisAV**
|
||||||
- **THVideo**
|
- **THVideo**
|
||||||
- **THVideoPlaylist**
|
- **THVideoPlaylist**
|
||||||
@@ -507,6 +543,7 @@
|
|||||||
- **Trilulilu**
|
- **Trilulilu**
|
||||||
- **TruTube**
|
- **TruTube**
|
||||||
- **Tube8**
|
- **Tube8**
|
||||||
|
- **TubiTv**
|
||||||
- **Tudou**
|
- **Tudou**
|
||||||
- **Tumblr**
|
- **Tumblr**
|
||||||
- **TuneIn**
|
- **TuneIn**
|
||||||
@@ -516,6 +553,8 @@
|
|||||||
- **TV2**
|
- **TV2**
|
||||||
- **TV2Article**
|
- **TV2Article**
|
||||||
- **TV4**: tv4.se and tv4play.se
|
- **TV4**: tv4.se and tv4play.se
|
||||||
|
- **TVC**
|
||||||
|
- **TVCArticle**
|
||||||
- **tvigle**: Интернет-телевидение Tvigle.ru
|
- **tvigle**: Интернет-телевидение Tvigle.ru
|
||||||
- **tvp.pl**
|
- **tvp.pl**
|
||||||
- **tvp.pl:Series**
|
- **tvp.pl:Series**
|
||||||
@@ -528,10 +567,11 @@
|
|||||||
- **twitch:stream**
|
- **twitch:stream**
|
||||||
- **twitch:video**
|
- **twitch:video**
|
||||||
- **twitch:vod**
|
- **twitch:vod**
|
||||||
|
- **TwitterCard**
|
||||||
- **Ubu**
|
- **Ubu**
|
||||||
- **udemy**
|
- **udemy**
|
||||||
- **udemy:course**
|
- **udemy:course**
|
||||||
- **UDNEmbed**
|
- **UDNEmbed**: 聯合影音
|
||||||
- **Ultimedia**
|
- **Ultimedia**
|
||||||
- **Unistra**
|
- **Unistra**
|
||||||
- **Urort**: NRK P3 Urørt
|
- **Urort**: NRK P3 Urørt
|
||||||
@@ -564,6 +604,7 @@
|
|||||||
- **vier:videos**
|
- **vier:videos**
|
||||||
- **Viewster**
|
- **Viewster**
|
||||||
- **viki**
|
- **viki**
|
||||||
|
- **viki:channel**
|
||||||
- **vimeo**
|
- **vimeo**
|
||||||
- **vimeo:album**
|
- **vimeo:album**
|
||||||
- **vimeo:channel**
|
- **vimeo:channel**
|
||||||
@@ -575,8 +616,8 @@
|
|||||||
- **Vimple**: Vimple - one-click video hosting
|
- **Vimple**: Vimple - one-click video hosting
|
||||||
- **Vine**
|
- **Vine**
|
||||||
- **vine:user**
|
- **vine:user**
|
||||||
- **vk.com**
|
- **vk**: VK
|
||||||
- **vk.com:user-videos**: vk.com:All of a user's videos
|
- **vk:uservideos**: VK - User's Videos
|
||||||
- **Vodlocker**
|
- **Vodlocker**
|
||||||
- **VoiceRepublic**
|
- **VoiceRepublic**
|
||||||
- **Vporn**
|
- **Vporn**
|
||||||
@@ -592,32 +633,36 @@
|
|||||||
- **wdr:mobile**
|
- **wdr:mobile**
|
||||||
- **WDRMaus**: Sendung mit der Maus
|
- **WDRMaus**: Sendung mit der Maus
|
||||||
- **WebOfStories**
|
- **WebOfStories**
|
||||||
|
- **WebOfStoriesPlaylist**
|
||||||
- **Weibo**
|
- **Weibo**
|
||||||
- **Wimp**
|
- **Wimp**
|
||||||
- **Wistia**
|
- **Wistia**
|
||||||
|
- **WNL**
|
||||||
- **WorldStarHipHop**
|
- **WorldStarHipHop**
|
||||||
- **wrzuta.pl**
|
- **wrzuta.pl**
|
||||||
- **WSJ**: Wall Street Journal
|
- **WSJ**: Wall Street Journal
|
||||||
- **XBef**
|
- **XBef**
|
||||||
- **XboxClips**
|
- **XboxClips**
|
||||||
- **XHamster**
|
- **XHamster**
|
||||||
|
- **XHamsterEmbed**
|
||||||
- **XMinus**
|
- **XMinus**
|
||||||
- **XNXX**
|
- **XNXX**
|
||||||
- **Xstream**
|
- **Xstream**
|
||||||
- **XTube**
|
- **XTube**
|
||||||
- **XTubeUser**: XTube user profile
|
- **XTubeUser**: XTube user profile
|
||||||
- **Xuite**
|
- **Xuite**: 隨意窩Xuite影音
|
||||||
- **XVideos**
|
- **XVideos**
|
||||||
- **XXXYMovies**
|
- **XXXYMovies**
|
||||||
- **Yahoo**: Yahoo screen and movies
|
- **Yahoo**: Yahoo screen and movies
|
||||||
- **Yam**
|
- **Yam**: 蕃薯藤yam天空部落
|
||||||
- **yandexmusic:album**: Яндекс.Музыка - Альбом
|
- **yandexmusic:album**: Яндекс.Музыка - Альбом
|
||||||
- **yandexmusic:playlist**: Яндекс.Музыка - Плейлист
|
- **yandexmusic:playlist**: Яндекс.Музыка - Плейлист
|
||||||
- **yandexmusic:track**: Яндекс.Музыка - Трек
|
- **yandexmusic:track**: Яндекс.Музыка - Трек
|
||||||
- **YesJapan**
|
- **YesJapan**
|
||||||
|
- **yinyuetai:video**: 音悦Tai
|
||||||
- **Ynet**
|
- **Ynet**
|
||||||
- **YouJizz**
|
- **YouJizz**
|
||||||
- **Youku**
|
- **youku**: 优酷
|
||||||
- **YouPorn**
|
- **YouPorn**
|
||||||
- **YourUpload**
|
- **YourUpload**
|
||||||
- **youtube**: YouTube.com
|
- **youtube**: YouTube.com
|
||||||
|
|||||||
@@ -39,7 +39,7 @@ class TestAES(unittest.TestCase):
|
|||||||
encrypted = base64.b64encode(
|
encrypted = base64.b64encode(
|
||||||
intlist_to_bytes(self.iv[:8]) +
|
intlist_to_bytes(self.iv[:8]) +
|
||||||
b'\x17\x15\x93\xab\x8d\x80V\xcdV\xe0\t\xcdo\xc2\xa5\xd8ksM\r\xe27N\xae'
|
b'\x17\x15\x93\xab\x8d\x80V\xcdV\xe0\t\xcdo\xc2\xa5\xd8ksM\r\xe27N\xae'
|
||||||
)
|
).decode('utf-8')
|
||||||
decrypted = (aes_decrypt_text(encrypted, password, 16))
|
decrypted = (aes_decrypt_text(encrypted, password, 16))
|
||||||
self.assertEqual(decrypted, self.secret_msg)
|
self.assertEqual(decrypted, self.secret_msg)
|
||||||
|
|
||||||
@@ -47,7 +47,7 @@ class TestAES(unittest.TestCase):
|
|||||||
encrypted = base64.b64encode(
|
encrypted = base64.b64encode(
|
||||||
intlist_to_bytes(self.iv[:8]) +
|
intlist_to_bytes(self.iv[:8]) +
|
||||||
b'\x0b\xe6\xa4\xd9z\x0e\xb8\xb9\xd0\xd4i_\x85\x1d\x99\x98_\xe5\x80\xe7.\xbf\xa5\x83'
|
b'\x0b\xe6\xa4\xd9z\x0e\xb8\xb9\xd0\xd4i_\x85\x1d\x99\x98_\xe5\x80\xe7.\xbf\xa5\x83'
|
||||||
)
|
).decode('utf-8')
|
||||||
decrypted = (aes_decrypt_text(encrypted, password, 32))
|
decrypted = (aes_decrypt_text(encrypted, password, 32))
|
||||||
self.assertEqual(decrypted, self.secret_msg)
|
self.assertEqual(decrypted, self.secret_msg)
|
||||||
|
|
||||||
|
|||||||
@@ -14,6 +14,8 @@ from youtube_dl.utils import get_filesystem_encoding
|
|||||||
from youtube_dl.compat import (
|
from youtube_dl.compat import (
|
||||||
compat_getenv,
|
compat_getenv,
|
||||||
compat_expanduser,
|
compat_expanduser,
|
||||||
|
compat_urllib_parse_unquote,
|
||||||
|
compat_urllib_parse_unquote_plus,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -42,5 +44,28 @@ class TestCompat(unittest.TestCase):
|
|||||||
dir(youtube_dl.compat))) - set(['unicode_literals'])
|
dir(youtube_dl.compat))) - set(['unicode_literals'])
|
||||||
self.assertEqual(all_names, sorted(present_names))
|
self.assertEqual(all_names, sorted(present_names))
|
||||||
|
|
||||||
|
def test_compat_urllib_parse_unquote(self):
|
||||||
|
self.assertEqual(compat_urllib_parse_unquote('abc%20def'), 'abc def')
|
||||||
|
self.assertEqual(compat_urllib_parse_unquote('%7e/abc+def'), '~/abc+def')
|
||||||
|
self.assertEqual(compat_urllib_parse_unquote(''), '')
|
||||||
|
self.assertEqual(compat_urllib_parse_unquote('%'), '%')
|
||||||
|
self.assertEqual(compat_urllib_parse_unquote('%%'), '%%')
|
||||||
|
self.assertEqual(compat_urllib_parse_unquote('%%%'), '%%%')
|
||||||
|
self.assertEqual(compat_urllib_parse_unquote('%2F'), '/')
|
||||||
|
self.assertEqual(compat_urllib_parse_unquote('%2f'), '/')
|
||||||
|
self.assertEqual(compat_urllib_parse_unquote('%E6%B4%A5%E6%B3%A2'), '津波')
|
||||||
|
self.assertEqual(
|
||||||
|
compat_urllib_parse_unquote('''<meta property="og:description" content="%E2%96%81%E2%96%82%E2%96%83%E2%96%84%25%E2%96%85%E2%96%86%E2%96%87%E2%96%88" />
|
||||||
|
%<a href="https://ar.wikipedia.org/wiki/%D8%AA%D8%B3%D9%88%D9%86%D8%A7%D9%85%D9%8A">%a'''),
|
||||||
|
'''<meta property="og:description" content="▁▂▃▄%▅▆▇█" />
|
||||||
|
%<a href="https://ar.wikipedia.org/wiki/تسونامي">%a''')
|
||||||
|
self.assertEqual(
|
||||||
|
compat_urllib_parse_unquote('''%28%5E%E2%97%A3_%E2%97%A2%5E%29%E3%81%A3%EF%B8%BB%E3%83%87%E2%95%90%E4%B8%80 %E2%87%80 %E2%87%80 %E2%87%80 %E2%87%80 %E2%87%80 %E2%86%B6%I%Break%25Things%'''),
|
||||||
|
'''(^◣_◢^)っ︻デ═一 ⇀ ⇀ ⇀ ⇀ ⇀ ↶%I%Break%Things%''')
|
||||||
|
|
||||||
|
def test_compat_urllib_parse_unquote_plus(self):
|
||||||
|
self.assertEqual(compat_urllib_parse_unquote_plus('abc%20def'), 'abc def')
|
||||||
|
self.assertEqual(compat_urllib_parse_unquote_plus('%7e/abc+def'), '~/abc def')
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|||||||
@@ -266,7 +266,7 @@ class TestNRKSubtitles(BaseTestSubtitles):
|
|||||||
self.DL.params['allsubtitles'] = True
|
self.DL.params['allsubtitles'] = True
|
||||||
subtitles = self.getSubtitles()
|
subtitles = self.getSubtitles()
|
||||||
self.assertEqual(set(subtitles.keys()), set(['no']))
|
self.assertEqual(set(subtitles.keys()), set(['no']))
|
||||||
self.assertEqual(md5(subtitles['no']), '1d221e6458c95c5494dcd38e6a1f129a')
|
self.assertEqual(md5(subtitles['no']), '544fa917d3197fcbee64634559221cc2')
|
||||||
|
|
||||||
|
|
||||||
class TestRaiSubtitles(BaseTestSubtitles):
|
class TestRaiSubtitles(BaseTestSubtitles):
|
||||||
|
|||||||
@@ -324,6 +324,7 @@ class TestUtil(unittest.TestCase):
|
|||||||
self.assertEqual(parse_duration('02:03:04'), 7384)
|
self.assertEqual(parse_duration('02:03:04'), 7384)
|
||||||
self.assertEqual(parse_duration('01:02:03:04'), 93784)
|
self.assertEqual(parse_duration('01:02:03:04'), 93784)
|
||||||
self.assertEqual(parse_duration('1 hour 3 minutes'), 3780)
|
self.assertEqual(parse_duration('1 hour 3 minutes'), 3780)
|
||||||
|
self.assertEqual(parse_duration('87 Min.'), 5220)
|
||||||
|
|
||||||
def test_fix_xml_ampersands(self):
|
def test_fix_xml_ampersands(self):
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
|
|||||||
@@ -49,6 +49,7 @@ from .utils import (
|
|||||||
ExtractorError,
|
ExtractorError,
|
||||||
format_bytes,
|
format_bytes,
|
||||||
formatSeconds,
|
formatSeconds,
|
||||||
|
HEADRequest,
|
||||||
locked_file,
|
locked_file,
|
||||||
make_HTTPS_handler,
|
make_HTTPS_handler,
|
||||||
MaxDownloadsReached,
|
MaxDownloadsReached,
|
||||||
@@ -118,7 +119,7 @@ class YoutubeDL(object):
|
|||||||
|
|
||||||
username: Username for authentication purposes.
|
username: Username for authentication purposes.
|
||||||
password: Password for authentication purposes.
|
password: Password for authentication purposes.
|
||||||
videopassword: Password for acces a video.
|
videopassword: Password for accessing a video.
|
||||||
usenetrc: Use netrc for authentication instead.
|
usenetrc: Use netrc for authentication instead.
|
||||||
verbose: Print additional info to stdout.
|
verbose: Print additional info to stdout.
|
||||||
quiet: Do not print messages to stdout.
|
quiet: Do not print messages to stdout.
|
||||||
@@ -138,6 +139,7 @@ class YoutubeDL(object):
|
|||||||
outtmpl: Template for output names.
|
outtmpl: Template for output names.
|
||||||
restrictfilenames: Do not allow "&" and spaces in file names
|
restrictfilenames: Do not allow "&" and spaces in file names
|
||||||
ignoreerrors: Do not stop on download errors.
|
ignoreerrors: Do not stop on download errors.
|
||||||
|
force_generic_extractor: Force downloader to use the generic extractor
|
||||||
nooverwrites: Prevent overwriting files.
|
nooverwrites: Prevent overwriting files.
|
||||||
playliststart: Playlist item to start at.
|
playliststart: Playlist item to start at.
|
||||||
playlistend: Playlist item to end at.
|
playlistend: Playlist item to end at.
|
||||||
@@ -260,6 +262,8 @@ class YoutubeDL(object):
|
|||||||
The following options are used by the post processors:
|
The following options are used by the post processors:
|
||||||
prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
|
prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
|
||||||
otherwise prefer avconv.
|
otherwise prefer avconv.
|
||||||
|
postprocessor_args: A list of additional command-line arguments for the
|
||||||
|
postprocessor.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
params = None
|
params = None
|
||||||
@@ -625,13 +629,16 @@ class YoutubeDL(object):
|
|||||||
info_dict.setdefault(key, value)
|
info_dict.setdefault(key, value)
|
||||||
|
|
||||||
def extract_info(self, url, download=True, ie_key=None, extra_info={},
|
def extract_info(self, url, download=True, ie_key=None, extra_info={},
|
||||||
process=True):
|
process=True, force_generic_extractor=False):
|
||||||
'''
|
'''
|
||||||
Returns a list with a dictionary for each video we find.
|
Returns a list with a dictionary for each video we find.
|
||||||
If 'download', also downloads the videos.
|
If 'download', also downloads the videos.
|
||||||
extra_info is a dict containing the extra values to add to each result
|
extra_info is a dict containing the extra values to add to each result
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
if not ie_key and force_generic_extractor:
|
||||||
|
ie_key = 'Generic'
|
||||||
|
|
||||||
if ie_key:
|
if ie_key:
|
||||||
ies = [self.get_info_extractor(ie_key)]
|
ies = [self.get_info_extractor(ie_key)]
|
||||||
else:
|
else:
|
||||||
@@ -923,8 +930,9 @@ class YoutubeDL(object):
|
|||||||
if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
|
if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
|
||||||
if audiovideo_formats:
|
if audiovideo_formats:
|
||||||
return audiovideo_formats[format_idx]
|
return audiovideo_formats[format_idx]
|
||||||
# for audio only urls, select the best/worst audio format
|
# for audio only (soundcloud) or video only (imgur) urls, select the best/worst audio format
|
||||||
elif all(f.get('acodec') != 'none' for f in available_formats):
|
elif (all(f.get('acodec') != 'none' for f in available_formats) or
|
||||||
|
all(f.get('vcodec') != 'none' for f in available_formats)):
|
||||||
return available_formats[format_idx]
|
return available_formats[format_idx]
|
||||||
elif format_spec == 'bestaudio':
|
elif format_spec == 'bestaudio':
|
||||||
audio_formats = [
|
audio_formats = [
|
||||||
@@ -1002,7 +1010,7 @@ class YoutubeDL(object):
|
|||||||
t.get('preference'), t.get('width'), t.get('height'),
|
t.get('preference'), t.get('width'), t.get('height'),
|
||||||
t.get('id'), t.get('url')))
|
t.get('id'), t.get('url')))
|
||||||
for i, t in enumerate(thumbnails):
|
for i, t in enumerate(thumbnails):
|
||||||
if 'width' in t and 'height' in t:
|
if t.get('width') and t.get('height'):
|
||||||
t['resolution'] = '%dx%d' % (t['width'], t['height'])
|
t['resolution'] = '%dx%d' % (t['width'], t['height'])
|
||||||
if t.get('id') is None:
|
if t.get('id') is None:
|
||||||
t['id'] = '%d' % i
|
t['id'] = '%d' % i
|
||||||
@@ -1014,13 +1022,13 @@ class YoutubeDL(object):
|
|||||||
info_dict['display_id'] = info_dict['id']
|
info_dict['display_id'] = info_dict['id']
|
||||||
|
|
||||||
if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
|
if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
|
||||||
# Working around negative timestamps in Windows
|
# Working around out-of-range timestamp values (e.g. negative ones on Windows,
|
||||||
# (see http://bugs.python.org/issue1646728)
|
# see http://bugs.python.org/issue1646728)
|
||||||
if info_dict['timestamp'] < 0 and os.name == 'nt':
|
try:
|
||||||
info_dict['timestamp'] = 0
|
upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
|
||||||
upload_date = datetime.datetime.utcfromtimestamp(
|
info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
|
||||||
info_dict['timestamp'])
|
except (ValueError, OverflowError, OSError):
|
||||||
info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
|
pass
|
||||||
|
|
||||||
if self.params.get('listsubtitles', False):
|
if self.params.get('listsubtitles', False):
|
||||||
if 'automatic_captions' in info_dict:
|
if 'automatic_captions' in info_dict:
|
||||||
@@ -1031,12 +1039,6 @@ class YoutubeDL(object):
|
|||||||
info_dict['id'], info_dict.get('subtitles'),
|
info_dict['id'], info_dict.get('subtitles'),
|
||||||
info_dict.get('automatic_captions'))
|
info_dict.get('automatic_captions'))
|
||||||
|
|
||||||
# This extractors handle format selection themselves
|
|
||||||
if info_dict['extractor'] in ['Youku']:
|
|
||||||
if download:
|
|
||||||
self.process_info(info_dict)
|
|
||||||
return info_dict
|
|
||||||
|
|
||||||
# We now pick which formats have to be downloaded
|
# We now pick which formats have to be downloaded
|
||||||
if info_dict.get('formats') is None:
|
if info_dict.get('formats') is None:
|
||||||
# There's only one format available
|
# There's only one format available
|
||||||
@@ -1047,6 +1049,8 @@ class YoutubeDL(object):
|
|||||||
if not formats:
|
if not formats:
|
||||||
raise ExtractorError('No video formats found!')
|
raise ExtractorError('No video formats found!')
|
||||||
|
|
||||||
|
formats_dict = {}
|
||||||
|
|
||||||
# We check that all the formats have the format and format_id fields
|
# We check that all the formats have the format and format_id fields
|
||||||
for i, format in enumerate(formats):
|
for i, format in enumerate(formats):
|
||||||
if 'url' not in format:
|
if 'url' not in format:
|
||||||
@@ -1054,6 +1058,18 @@ class YoutubeDL(object):
|
|||||||
|
|
||||||
if format.get('format_id') is None:
|
if format.get('format_id') is None:
|
||||||
format['format_id'] = compat_str(i)
|
format['format_id'] = compat_str(i)
|
||||||
|
format_id = format['format_id']
|
||||||
|
if format_id not in formats_dict:
|
||||||
|
formats_dict[format_id] = []
|
||||||
|
formats_dict[format_id].append(format)
|
||||||
|
|
||||||
|
# Make sure all formats have unique format_id
|
||||||
|
for format_id, ambiguous_formats in formats_dict.items():
|
||||||
|
if len(ambiguous_formats) > 1:
|
||||||
|
for i, format in enumerate(ambiguous_formats):
|
||||||
|
format['format_id'] = '%s-%d' % (format_id, i)
|
||||||
|
|
||||||
|
for i, format in enumerate(formats):
|
||||||
if format.get('format') is None:
|
if format.get('format') is None:
|
||||||
format['format'] = '{id} - {res}{note}'.format(
|
format['format'] = '{id} - {res}{note}'.format(
|
||||||
id=format['format_id'],
|
id=format['format_id'],
|
||||||
@@ -1088,7 +1104,8 @@ class YoutubeDL(object):
|
|||||||
if req_format is None:
|
if req_format is None:
|
||||||
req_format_list = []
|
req_format_list = []
|
||||||
if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
|
if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
|
||||||
info_dict['extractor'] in ['youtube', 'ted']):
|
info_dict['extractor'] in ['youtube', 'ted'] and
|
||||||
|
not info_dict.get('is_live')):
|
||||||
merger = FFmpegMergerPP(self)
|
merger = FFmpegMergerPP(self)
|
||||||
if merger.available and merger.can_merge():
|
if merger.available and merger.can_merge():
|
||||||
req_format_list.append('bestvideo+bestaudio')
|
req_format_list.append('bestvideo+bestaudio')
|
||||||
@@ -1483,7 +1500,8 @@ class YoutubeDL(object):
|
|||||||
for url in url_list:
|
for url in url_list:
|
||||||
try:
|
try:
|
||||||
# It also downloads the videos
|
# It also downloads the videos
|
||||||
res = self.extract_info(url)
|
res = self.extract_info(
|
||||||
|
url, force_generic_extractor=self.params.get('force_generic_extractor', False))
|
||||||
except UnavailableVideoError:
|
except UnavailableVideoError:
|
||||||
self.report_error('unable to download video')
|
self.report_error('unable to download video')
|
||||||
except MaxDownloadsReached:
|
except MaxDownloadsReached:
|
||||||
@@ -1527,6 +1545,7 @@ class YoutubeDL(object):
|
|||||||
pps_chain.extend(ie_info['__postprocessors'])
|
pps_chain.extend(ie_info['__postprocessors'])
|
||||||
pps_chain.extend(self._pps)
|
pps_chain.extend(self._pps)
|
||||||
for pp in pps_chain:
|
for pp in pps_chain:
|
||||||
|
files_to_delete = []
|
||||||
try:
|
try:
|
||||||
files_to_delete, info = pp.run(info)
|
files_to_delete, info = pp.run(info)
|
||||||
except PostProcessingError as e:
|
except PostProcessingError as e:
|
||||||
@@ -1705,7 +1724,8 @@ class YoutubeDL(object):
|
|||||||
if req_is_string:
|
if req_is_string:
|
||||||
req = url_escaped
|
req = url_escaped
|
||||||
else:
|
else:
|
||||||
req = compat_urllib_request.Request(
|
req_type = HEADRequest if req.get_method() == 'HEAD' else compat_urllib_request.Request
|
||||||
|
req = req_type(
|
||||||
url_escaped, data=req.data, headers=req.headers,
|
url_escaped, data=req.data, headers=req.headers,
|
||||||
origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
|
origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
|
||||||
|
|
||||||
|
|||||||
@@ -169,7 +169,7 @@ def _real_main(argv=None):
|
|||||||
if not opts.audioquality.isdigit():
|
if not opts.audioquality.isdigit():
|
||||||
parser.error('invalid audio quality specified')
|
parser.error('invalid audio quality specified')
|
||||||
if opts.recodevideo is not None:
|
if opts.recodevideo is not None:
|
||||||
if opts.recodevideo not in ['mp4', 'flv', 'webm', 'ogg', 'mkv']:
|
if opts.recodevideo not in ['mp4', 'flv', 'webm', 'ogg', 'mkv', 'avi']:
|
||||||
parser.error('invalid video recode format specified')
|
parser.error('invalid video recode format specified')
|
||||||
if opts.convertsubtitles is not None:
|
if opts.convertsubtitles is not None:
|
||||||
if opts.convertsubtitles not in ['srt', 'vtt', 'ass']:
|
if opts.convertsubtitles not in ['srt', 'vtt', 'ass']:
|
||||||
@@ -263,6 +263,9 @@ def _real_main(argv=None):
|
|||||||
external_downloader_args = None
|
external_downloader_args = None
|
||||||
if opts.external_downloader_args:
|
if opts.external_downloader_args:
|
||||||
external_downloader_args = shlex.split(opts.external_downloader_args)
|
external_downloader_args = shlex.split(opts.external_downloader_args)
|
||||||
|
postprocessor_args = None
|
||||||
|
if opts.postprocessor_args:
|
||||||
|
postprocessor_args = shlex.split(opts.postprocessor_args)
|
||||||
match_filter = (
|
match_filter = (
|
||||||
None if opts.match_filter is None
|
None if opts.match_filter is None
|
||||||
else match_filter_func(opts.match_filter))
|
else match_filter_func(opts.match_filter))
|
||||||
@@ -293,6 +296,7 @@ def _real_main(argv=None):
|
|||||||
'autonumber_size': opts.autonumber_size,
|
'autonumber_size': opts.autonumber_size,
|
||||||
'restrictfilenames': opts.restrictfilenames,
|
'restrictfilenames': opts.restrictfilenames,
|
||||||
'ignoreerrors': opts.ignoreerrors,
|
'ignoreerrors': opts.ignoreerrors,
|
||||||
|
'force_generic_extractor': opts.force_generic_extractor,
|
||||||
'ratelimit': opts.ratelimit,
|
'ratelimit': opts.ratelimit,
|
||||||
'nooverwrites': opts.nooverwrites,
|
'nooverwrites': opts.nooverwrites,
|
||||||
'retries': opts_retries,
|
'retries': opts_retries,
|
||||||
@@ -366,6 +370,7 @@ def _real_main(argv=None):
|
|||||||
'ffmpeg_location': opts.ffmpeg_location,
|
'ffmpeg_location': opts.ffmpeg_location,
|
||||||
'hls_prefer_native': opts.hls_prefer_native,
|
'hls_prefer_native': opts.hls_prefer_native,
|
||||||
'external_downloader_args': external_downloader_args,
|
'external_downloader_args': external_downloader_args,
|
||||||
|
'postprocessor_args': postprocessor_args,
|
||||||
'cn_verification_proxy': opts.cn_verification_proxy,
|
'cn_verification_proxy': opts.cn_verification_proxy,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -152,7 +152,7 @@ def aes_decrypt_text(data, password, key_size_bytes):
|
|||||||
"""
|
"""
|
||||||
NONCE_LENGTH_BYTES = 8
|
NONCE_LENGTH_BYTES = 8
|
||||||
|
|
||||||
data = bytes_to_intlist(base64.b64decode(data))
|
data = bytes_to_intlist(base64.b64decode(data.encode('utf-8')))
|
||||||
password = bytes_to_intlist(password.encode('utf-8'))
|
password = bytes_to_intlist(password.encode('utf-8'))
|
||||||
|
|
||||||
key = password[:key_size_bytes] + [0] * (key_size_bytes - len(password))
|
key = password[:key_size_bytes] + [0] * (key_size_bytes - len(password))
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ import shutil
|
|||||||
import socket
|
import socket
|
||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
|
import itertools
|
||||||
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@@ -74,42 +75,74 @@ except ImportError:
|
|||||||
import BaseHTTPServer as compat_http_server
|
import BaseHTTPServer as compat_http_server
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
from urllib.parse import unquote_to_bytes as compat_urllib_parse_unquote_to_bytes
|
||||||
from urllib.parse import unquote as compat_urllib_parse_unquote
|
from urllib.parse import unquote as compat_urllib_parse_unquote
|
||||||
except ImportError:
|
from urllib.parse import unquote_plus as compat_urllib_parse_unquote_plus
|
||||||
def compat_urllib_parse_unquote(string, encoding='utf-8', errors='replace'):
|
except ImportError: # Python 2
|
||||||
if string == '':
|
_asciire = (compat_urllib_parse._asciire if hasattr(compat_urllib_parse, '_asciire')
|
||||||
|
else re.compile('([\x00-\x7f]+)'))
|
||||||
|
|
||||||
|
# HACK: The following are the correct unquote_to_bytes, unquote and unquote_plus
|
||||||
|
# implementations from cpython 3.4.3's stdlib. Python 2's version
|
||||||
|
# is apparently broken (see https://github.com/rg3/youtube-dl/pull/6244)
|
||||||
|
|
||||||
|
def compat_urllib_parse_unquote_to_bytes(string):
|
||||||
|
"""unquote_to_bytes('abc%20def') -> b'abc def'."""
|
||||||
|
# Note: strings are encoded as UTF-8. This is only an issue if it contains
|
||||||
|
# unescaped non-ASCII characters, which URIs should not.
|
||||||
|
if not string:
|
||||||
|
# Is it a string-like object?
|
||||||
|
string.split
|
||||||
|
return b''
|
||||||
|
if isinstance(string, unicode):
|
||||||
|
string = string.encode('utf-8')
|
||||||
|
bits = string.split(b'%')
|
||||||
|
if len(bits) == 1:
|
||||||
return string
|
return string
|
||||||
res = string.split('%')
|
res = [bits[0]]
|
||||||
if len(res) == 1:
|
append = res.append
|
||||||
|
for item in bits[1:]:
|
||||||
|
try:
|
||||||
|
append(compat_urllib_parse._hextochr[item[:2]])
|
||||||
|
append(item[2:])
|
||||||
|
except KeyError:
|
||||||
|
append(b'%')
|
||||||
|
append(item)
|
||||||
|
return b''.join(res)
|
||||||
|
|
||||||
|
def compat_urllib_parse_unquote(string, encoding='utf-8', errors='replace'):
|
||||||
|
"""Replace %xx escapes by their single-character equivalent. The optional
|
||||||
|
encoding and errors parameters specify how to decode percent-encoded
|
||||||
|
sequences into Unicode characters, as accepted by the bytes.decode()
|
||||||
|
method.
|
||||||
|
By default, percent-encoded sequences are decoded with UTF-8, and invalid
|
||||||
|
sequences are replaced by a placeholder character.
|
||||||
|
|
||||||
|
unquote('abc%20def') -> 'abc def'.
|
||||||
|
"""
|
||||||
|
if '%' not in string:
|
||||||
|
string.split
|
||||||
return string
|
return string
|
||||||
if encoding is None:
|
if encoding is None:
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
if errors is None:
|
if errors is None:
|
||||||
errors = 'replace'
|
errors = 'replace'
|
||||||
# pct_sequence: contiguous sequence of percent-encoded bytes, decoded
|
bits = _asciire.split(string)
|
||||||
pct_sequence = b''
|
res = [bits[0]]
|
||||||
string = res[0]
|
append = res.append
|
||||||
for item in res[1:]:
|
for i in range(1, len(bits), 2):
|
||||||
try:
|
append(compat_urllib_parse_unquote_to_bytes(bits[i]).decode(encoding, errors))
|
||||||
if not item:
|
append(bits[i + 1])
|
||||||
raise ValueError
|
return ''.join(res)
|
||||||
pct_sequence += item[:2].decode('hex')
|
|
||||||
rest = item[2:]
|
def compat_urllib_parse_unquote_plus(string, encoding='utf-8', errors='replace'):
|
||||||
if not rest:
|
"""Like unquote(), but also replace plus signs by spaces, as required for
|
||||||
# This segment was just a single percent-encoded character.
|
unquoting HTML form values.
|
||||||
# May be part of a sequence of code units, so delay decoding.
|
|
||||||
# (Stored in pct_sequence).
|
unquote_plus('%7e/abc+def') -> '~/abc def'
|
||||||
continue
|
"""
|
||||||
except ValueError:
|
string = string.replace('+', ' ')
|
||||||
rest = '%' + item
|
return compat_urllib_parse_unquote(string, encoding, errors)
|
||||||
# Encountered non-percent-encoded characters. Flush the current
|
|
||||||
# pct_sequence.
|
|
||||||
string += pct_sequence.decode(encoding, errors) + rest
|
|
||||||
pct_sequence = b''
|
|
||||||
if pct_sequence:
|
|
||||||
# Flush the final pct_sequence
|
|
||||||
string += pct_sequence.decode(encoding, errors)
|
|
||||||
return string
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
compat_str = unicode # Python 2
|
compat_str = unicode # Python 2
|
||||||
@@ -388,6 +421,15 @@ else:
|
|||||||
pass
|
pass
|
||||||
return _terminal_size(columns, lines)
|
return _terminal_size(columns, lines)
|
||||||
|
|
||||||
|
try:
|
||||||
|
itertools.count(start=0, step=1)
|
||||||
|
compat_itertools_count = itertools.count
|
||||||
|
except TypeError: # Python 2.6
|
||||||
|
def compat_itertools_count(start=0, step=1):
|
||||||
|
n = start
|
||||||
|
while True:
|
||||||
|
yield n
|
||||||
|
n += step
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
'compat_HTTPError',
|
'compat_HTTPError',
|
||||||
@@ -401,6 +443,7 @@ __all__ = [
|
|||||||
'compat_html_entities',
|
'compat_html_entities',
|
||||||
'compat_http_client',
|
'compat_http_client',
|
||||||
'compat_http_server',
|
'compat_http_server',
|
||||||
|
'compat_itertools_count',
|
||||||
'compat_kwargs',
|
'compat_kwargs',
|
||||||
'compat_ord',
|
'compat_ord',
|
||||||
'compat_parse_qs',
|
'compat_parse_qs',
|
||||||
@@ -411,6 +454,8 @@ __all__ = [
|
|||||||
'compat_urllib_error',
|
'compat_urllib_error',
|
||||||
'compat_urllib_parse',
|
'compat_urllib_parse',
|
||||||
'compat_urllib_parse_unquote',
|
'compat_urllib_parse_unquote',
|
||||||
|
'compat_urllib_parse_unquote_plus',
|
||||||
|
'compat_urllib_parse_unquote_to_bytes',
|
||||||
'compat_urllib_parse_urlparse',
|
'compat_urllib_parse_urlparse',
|
||||||
'compat_urllib_request',
|
'compat_urllib_request',
|
||||||
'compat_urlparse',
|
'compat_urlparse',
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ from .hls import NativeHlsFD
|
|||||||
from .http import HttpFD
|
from .http import HttpFD
|
||||||
from .rtsp import RtspFD
|
from .rtsp import RtspFD
|
||||||
from .rtmp import RtmpFD
|
from .rtmp import RtmpFD
|
||||||
|
from .dash import DashSegmentsFD
|
||||||
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_protocol,
|
determine_protocol,
|
||||||
@@ -20,6 +21,7 @@ PROTOCOL_MAP = {
|
|||||||
'mms': RtspFD,
|
'mms': RtspFD,
|
||||||
'rtsp': RtspFD,
|
'rtsp': RtspFD,
|
||||||
'f4m': F4mFD,
|
'f4m': F4mFD,
|
||||||
|
'http_dash_segments': DashSegmentsFD,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
66
youtube_dl/downloader/dash.py
Normal file
66
youtube_dl/downloader/dash.py
Normal file
@@ -0,0 +1,66 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import FileDownloader
|
||||||
|
from ..compat import compat_urllib_request
|
||||||
|
|
||||||
|
|
||||||
|
class DashSegmentsFD(FileDownloader):
|
||||||
|
"""
|
||||||
|
Download segments in a DASH manifest
|
||||||
|
"""
|
||||||
|
def real_download(self, filename, info_dict):
|
||||||
|
self.report_destination(filename)
|
||||||
|
tmpfilename = self.temp_name(filename)
|
||||||
|
base_url = info_dict['url']
|
||||||
|
segment_urls = info_dict['segment_urls']
|
||||||
|
|
||||||
|
is_test = self.params.get('test', False)
|
||||||
|
remaining_bytes = self._TEST_FILE_SIZE if is_test else None
|
||||||
|
byte_counter = 0
|
||||||
|
|
||||||
|
def append_url_to_file(outf, target_url, target_name, remaining_bytes=None):
|
||||||
|
self.to_screen('[DashSegments] %s: Downloading %s' % (info_dict['id'], target_name))
|
||||||
|
req = compat_urllib_request.Request(target_url)
|
||||||
|
if remaining_bytes is not None:
|
||||||
|
req.add_header('Range', 'bytes=0-%d' % (remaining_bytes - 1))
|
||||||
|
|
||||||
|
data = self.ydl.urlopen(req).read()
|
||||||
|
|
||||||
|
if remaining_bytes is not None:
|
||||||
|
data = data[:remaining_bytes]
|
||||||
|
|
||||||
|
outf.write(data)
|
||||||
|
return len(data)
|
||||||
|
|
||||||
|
def combine_url(base_url, target_url):
|
||||||
|
if re.match(r'^https?://', target_url):
|
||||||
|
return target_url
|
||||||
|
return '%s%s%s' % (base_url, '' if base_url.endswith('/') else '/', target_url)
|
||||||
|
|
||||||
|
with open(tmpfilename, 'wb') as outf:
|
||||||
|
append_url_to_file(
|
||||||
|
outf, combine_url(base_url, info_dict['initialization_url']),
|
||||||
|
'initialization segment')
|
||||||
|
for i, segment_url in enumerate(segment_urls):
|
||||||
|
segment_len = append_url_to_file(
|
||||||
|
outf, combine_url(base_url, segment_url),
|
||||||
|
'segment %d / %d' % (i + 1, len(segment_urls)),
|
||||||
|
remaining_bytes)
|
||||||
|
byte_counter += segment_len
|
||||||
|
if remaining_bytes is not None:
|
||||||
|
remaining_bytes -= segment_len
|
||||||
|
if remaining_bytes <= 0:
|
||||||
|
break
|
||||||
|
|
||||||
|
self.try_rename(tmpfilename, filename)
|
||||||
|
|
||||||
|
self._hook_progress({
|
||||||
|
'downloaded_bytes': byte_counter,
|
||||||
|
'total_bytes': byte_counter,
|
||||||
|
'filename': filename,
|
||||||
|
'status': 'finished',
|
||||||
|
})
|
||||||
|
|
||||||
|
return True
|
||||||
@@ -109,6 +109,14 @@ class Aria2cFD(ExternalFD):
|
|||||||
cmd += ['--', info_dict['url']]
|
cmd += ['--', info_dict['url']]
|
||||||
return cmd
|
return cmd
|
||||||
|
|
||||||
|
|
||||||
|
class HttpieFD(ExternalFD):
|
||||||
|
def _make_cmd(self, tmpfilename, info_dict):
|
||||||
|
cmd = ['http', '--download', '--output', tmpfilename, info_dict['url']]
|
||||||
|
for key, val in info_dict['http_headers'].items():
|
||||||
|
cmd += ['%s:%s' % (key, val)]
|
||||||
|
return cmd
|
||||||
|
|
||||||
_BY_NAME = dict(
|
_BY_NAME = dict(
|
||||||
(klass.get_basename(), klass)
|
(klass.get_basename(), klass)
|
||||||
for name, klass in globals().items()
|
for name, klass in globals().items()
|
||||||
@@ -123,5 +131,6 @@ def list_external_downloaders():
|
|||||||
def get_external_downloader(external_downloader):
|
def get_external_downloader(external_downloader):
|
||||||
""" Given the name of the executable, see whether we support the given
|
""" Given the name of the executable, see whether we support the given
|
||||||
downloader . """
|
downloader . """
|
||||||
bn = os.path.basename(external_downloader)
|
# Drop .exe extension on Windows
|
||||||
|
bn = os.path.splitext(os.path.basename(external_downloader))[0]
|
||||||
return _BY_NAME[bn]
|
return _BY_NAME[bn]
|
||||||
|
|||||||
@@ -4,7 +4,10 @@ from .abc import ABCIE
|
|||||||
from .abc7news import Abc7NewsIE
|
from .abc7news import Abc7NewsIE
|
||||||
from .academicearth import AcademicEarthCourseIE
|
from .academicearth import AcademicEarthCourseIE
|
||||||
from .addanime import AddAnimeIE
|
from .addanime import AddAnimeIE
|
||||||
from .adobetv import AdobeTVIE
|
from .adobetv import (
|
||||||
|
AdobeTVIE,
|
||||||
|
AdobeTVVideoIE,
|
||||||
|
)
|
||||||
from .adultswim import AdultSwimIE
|
from .adultswim import AdultSwimIE
|
||||||
from .aftenposten import AftenpostenIE
|
from .aftenposten import AftenpostenIE
|
||||||
from .aftonbladet import AftonbladetIE
|
from .aftonbladet import AftonbladetIE
|
||||||
@@ -16,9 +19,14 @@ from .anysex import AnySexIE
|
|||||||
from .aol import AolIE
|
from .aol import AolIE
|
||||||
from .allocine import AllocineIE
|
from .allocine import AllocineIE
|
||||||
from .aparat import AparatIE
|
from .aparat import AparatIE
|
||||||
|
from .appleconnect import AppleConnectIE
|
||||||
from .appletrailers import AppleTrailersIE
|
from .appletrailers import AppleTrailersIE
|
||||||
from .archiveorg import ArchiveOrgIE
|
from .archiveorg import ArchiveOrgIE
|
||||||
from .ard import ARDIE, ARDMediathekIE
|
from .ard import (
|
||||||
|
ARDIE,
|
||||||
|
ARDMediathekIE,
|
||||||
|
SportschauIE,
|
||||||
|
)
|
||||||
from .arte import (
|
from .arte import (
|
||||||
ArteTvIE,
|
ArteTvIE,
|
||||||
ArteTVPlus7IE,
|
ArteTVPlus7IE,
|
||||||
@@ -35,7 +43,10 @@ from .azubu import AzubuIE
|
|||||||
from .baidu import BaiduVideoIE
|
from .baidu import BaiduVideoIE
|
||||||
from .bambuser import BambuserIE, BambuserChannelIE
|
from .bambuser import BambuserIE, BambuserChannelIE
|
||||||
from .bandcamp import BandcampIE, BandcampAlbumIE
|
from .bandcamp import BandcampIE, BandcampAlbumIE
|
||||||
from .bbccouk import BBCCoUkIE
|
from .bbc import (
|
||||||
|
BBCCoUkIE,
|
||||||
|
BBCIE,
|
||||||
|
)
|
||||||
from .beeg import BeegIE
|
from .beeg import BeegIE
|
||||||
from .behindkink import BehindKinkIE
|
from .behindkink import BehindKinkIE
|
||||||
from .beatportpro import BeatportProIE
|
from .beatportpro import BeatportProIE
|
||||||
@@ -103,6 +114,7 @@ from .dailymotion import (
|
|||||||
DailymotionIE,
|
DailymotionIE,
|
||||||
DailymotionPlaylistIE,
|
DailymotionPlaylistIE,
|
||||||
DailymotionUserIE,
|
DailymotionUserIE,
|
||||||
|
DailymotionCloudIE,
|
||||||
)
|
)
|
||||||
from .daum import DaumIE
|
from .daum import DaumIE
|
||||||
from .dbtv import DBTVIE
|
from .dbtv import DBTVIE
|
||||||
@@ -112,6 +124,10 @@ from .dfb import DFBIE
|
|||||||
from .dhm import DHMIE
|
from .dhm import DHMIE
|
||||||
from .dotsub import DotsubIE
|
from .dotsub import DotsubIE
|
||||||
from .douyutv import DouyuTVIE
|
from .douyutv import DouyuTVIE
|
||||||
|
from .dramafever import (
|
||||||
|
DramaFeverIE,
|
||||||
|
DramaFeverSeriesIE,
|
||||||
|
)
|
||||||
from .dreisat import DreiSatIE
|
from .dreisat import DreiSatIE
|
||||||
from .drbonanza import DRBonanzaIE
|
from .drbonanza import DRBonanzaIE
|
||||||
from .drtuber import DrTuberIE
|
from .drtuber import DrTuberIE
|
||||||
@@ -136,7 +152,6 @@ from .ellentv import (
|
|||||||
)
|
)
|
||||||
from .elpais import ElPaisIE
|
from .elpais import ElPaisIE
|
||||||
from .embedly import EmbedlyIE
|
from .embedly import EmbedlyIE
|
||||||
from .empflix import EMPFlixIE
|
|
||||||
from .engadget import EngadgetIE
|
from .engadget import EngadgetIE
|
||||||
from .eporner import EpornerIE
|
from .eporner import EpornerIE
|
||||||
from .eroprofile import EroProfileIE
|
from .eroprofile import EroProfileIE
|
||||||
@@ -149,10 +164,10 @@ from .extremetube import ExtremeTubeIE
|
|||||||
from .facebook import FacebookIE
|
from .facebook import FacebookIE
|
||||||
from .faz import FazIE
|
from .faz import FazIE
|
||||||
from .fc2 import FC2IE
|
from .fc2 import FC2IE
|
||||||
from .firedrive import FiredriveIE
|
|
||||||
from .firstpost import FirstpostIE
|
from .firstpost import FirstpostIE
|
||||||
from .firsttv import FirstTVIE
|
from .firsttv import FirstTVIE
|
||||||
from .fivemin import FiveMinIE
|
from .fivemin import FiveMinIE
|
||||||
|
from .fivetv import FiveTVIE
|
||||||
from .fktv import (
|
from .fktv import (
|
||||||
FKTVIE,
|
FKTVIE,
|
||||||
FKTVPosteckeIE,
|
FKTVPosteckeIE,
|
||||||
@@ -230,6 +245,8 @@ from .infoq import InfoQIE
|
|||||||
from .instagram import InstagramIE, InstagramUserIE
|
from .instagram import InstagramIE, InstagramUserIE
|
||||||
from .internetvideoarchive import InternetVideoArchiveIE
|
from .internetvideoarchive import InternetVideoArchiveIE
|
||||||
from .iprima import IPrimaIE
|
from .iprima import IPrimaIE
|
||||||
|
from .iqiyi import IqiyiIE
|
||||||
|
from .ir90tv import Ir90TvIE
|
||||||
from .ivi import (
|
from .ivi import (
|
||||||
IviIE,
|
IviIE,
|
||||||
IviCompilationIE
|
IviCompilationIE
|
||||||
@@ -244,6 +261,7 @@ from .kaltura import KalturaIE
|
|||||||
from .kanalplay import KanalPlayIE
|
from .kanalplay import KanalPlayIE
|
||||||
from .kankan import KankanIE
|
from .kankan import KankanIE
|
||||||
from .karaoketv import KaraoketvIE
|
from .karaoketv import KaraoketvIE
|
||||||
|
from .karrierevideos import KarriereVideosIE
|
||||||
from .keezmovies import KeezMoviesIE
|
from .keezmovies import KeezMoviesIE
|
||||||
from .khanacademy import KhanAcademyIE
|
from .khanacademy import KhanAcademyIE
|
||||||
from .kickstarter import KickStarterIE
|
from .kickstarter import KickStarterIE
|
||||||
@@ -251,8 +269,17 @@ from .keek import KeekIE
|
|||||||
from .kontrtube import KontrTubeIE
|
from .kontrtube import KontrTubeIE
|
||||||
from .krasview import KrasViewIE
|
from .krasview import KrasViewIE
|
||||||
from .ku6 import Ku6IE
|
from .ku6 import Ku6IE
|
||||||
|
from .kuwo import (
|
||||||
|
KuwoIE,
|
||||||
|
KuwoAlbumIE,
|
||||||
|
KuwoChartIE,
|
||||||
|
KuwoSingerIE,
|
||||||
|
KuwoCategoryIE,
|
||||||
|
KuwoMvIE,
|
||||||
|
)
|
||||||
from .la7 import LA7IE
|
from .la7 import LA7IE
|
||||||
from .laola1tv import Laola1TvIE
|
from .laola1tv import Laola1TvIE
|
||||||
|
from .lecture2go import Lecture2GoIE
|
||||||
from .letv import (
|
from .letv import (
|
||||||
LetvIE,
|
LetvIE,
|
||||||
LetvTvIE,
|
LetvTvIE,
|
||||||
@@ -314,6 +341,7 @@ from .musicvault import MusicVaultIE
|
|||||||
from .muzu import MuzuTVIE
|
from .muzu import MuzuTVIE
|
||||||
from .myspace import MySpaceIE, MySpaceAlbumIE
|
from .myspace import MySpaceIE, MySpaceAlbumIE
|
||||||
from .myspass import MySpassIE
|
from .myspass import MySpassIE
|
||||||
|
from .myvi import MyviIE
|
||||||
from .myvideo import MyVideoIE
|
from .myvideo import MyVideoIE
|
||||||
from .myvidster import MyVidsterIE
|
from .myvidster import MyVidsterIE
|
||||||
from .nationalgeographic import NationalGeographicIE
|
from .nationalgeographic import NationalGeographicIE
|
||||||
@@ -333,13 +361,21 @@ from .ndtv import NDTVIE
|
|||||||
from .netzkino import NetzkinoIE
|
from .netzkino import NetzkinoIE
|
||||||
from .nerdcubed import NerdCubedFeedIE
|
from .nerdcubed import NerdCubedFeedIE
|
||||||
from .nerdist import NerdistIE
|
from .nerdist import NerdistIE
|
||||||
|
from .neteasemusic import (
|
||||||
|
NetEaseMusicIE,
|
||||||
|
NetEaseMusicAlbumIE,
|
||||||
|
NetEaseMusicSingerIE,
|
||||||
|
NetEaseMusicListIE,
|
||||||
|
NetEaseMusicMvIE,
|
||||||
|
NetEaseMusicProgramIE,
|
||||||
|
NetEaseMusicDjRadioIE,
|
||||||
|
)
|
||||||
from .newgrounds import NewgroundsIE
|
from .newgrounds import NewgroundsIE
|
||||||
from .newstube import NewstubeIE
|
from .newstube import NewstubeIE
|
||||||
from .nextmedia import (
|
from .nextmedia import (
|
||||||
NextMediaIE,
|
NextMediaIE,
|
||||||
NextMediaActionNewsIE,
|
NextMediaActionNewsIE,
|
||||||
AppleDailyRealtimeNewsIE,
|
AppleDailyIE,
|
||||||
AppleDailyAnimationNewsIE
|
|
||||||
)
|
)
|
||||||
from .nfb import NFBIE
|
from .nfb import NFBIE
|
||||||
from .nfl import NFLIE
|
from .nfl import NFLIE
|
||||||
@@ -353,15 +389,18 @@ from .ninegag import NineGagIE
|
|||||||
from .noco import NocoIE
|
from .noco import NocoIE
|
||||||
from .normalboots import NormalbootsIE
|
from .normalboots import NormalbootsIE
|
||||||
from .nosvideo import NosVideoIE
|
from .nosvideo import NosVideoIE
|
||||||
|
from .nova import NovaIE
|
||||||
from .novamov import NovaMovIE
|
from .novamov import NovaMovIE
|
||||||
from .nowness import NownessIE
|
from .nowness import NownessIE
|
||||||
|
from .nowtv import NowTVIE
|
||||||
from .nowvideo import NowVideoIE
|
from .nowvideo import NowVideoIE
|
||||||
from .npo import (
|
from .npo import (
|
||||||
NPOIE,
|
NPOIE,
|
||||||
NPOLiveIE,
|
NPOLiveIE,
|
||||||
NPORadioIE,
|
NPORadioIE,
|
||||||
NPORadioFragmentIE,
|
NPORadioFragmentIE,
|
||||||
TegenlichtVproIE,
|
VPROIE,
|
||||||
|
WNLIE
|
||||||
)
|
)
|
||||||
from .nrk import (
|
from .nrk import (
|
||||||
NRKIE,
|
NRKIE,
|
||||||
@@ -377,6 +416,7 @@ from .nytimes import (
|
|||||||
from .nuvid import NuvidIE
|
from .nuvid import NuvidIE
|
||||||
from .odnoklassniki import OdnoklassnikiIE
|
from .odnoklassniki import OdnoklassnikiIE
|
||||||
from .oktoberfesttv import OktoberfestTVIE
|
from .oktoberfesttv import OktoberfestTVIE
|
||||||
|
from .onionstudios import OnionStudiosIE
|
||||||
from .ooyala import (
|
from .ooyala import (
|
||||||
OoyalaIE,
|
OoyalaIE,
|
||||||
OoyalaExternalIE,
|
OoyalaExternalIE,
|
||||||
@@ -394,6 +434,7 @@ from .pbs import PBSIE
|
|||||||
from .philharmoniedeparis import PhilharmonieDeParisIE
|
from .philharmoniedeparis import PhilharmonieDeParisIE
|
||||||
from .phoenix import PhoenixIE
|
from .phoenix import PhoenixIE
|
||||||
from .photobucket import PhotobucketIE
|
from .photobucket import PhotobucketIE
|
||||||
|
from .pinkbike import PinkbikeIE
|
||||||
from .planetaplay import PlanetaPlayIE
|
from .planetaplay import PlanetaPlayIE
|
||||||
from .pladform import PladformIE
|
from .pladform import PladformIE
|
||||||
from .played import PlayedIE
|
from .played import PlayedIE
|
||||||
@@ -401,6 +442,7 @@ from .playfm import PlayFMIE
|
|||||||
from .playvid import PlayvidIE
|
from .playvid import PlayvidIE
|
||||||
from .playwire import PlaywireIE
|
from .playwire import PlaywireIE
|
||||||
from .podomatic import PodomaticIE
|
from .podomatic import PodomaticIE
|
||||||
|
from .porn91 import Porn91IE
|
||||||
from .pornhd import PornHdIE
|
from .pornhd import PornHdIE
|
||||||
from .pornhub import (
|
from .pornhub import (
|
||||||
PornHubIE,
|
PornHubIE,
|
||||||
@@ -419,6 +461,7 @@ from .qqmusic import (
|
|||||||
QQMusicSingerIE,
|
QQMusicSingerIE,
|
||||||
QQMusicAlbumIE,
|
QQMusicAlbumIE,
|
||||||
QQMusicToplistIE,
|
QQMusicToplistIE,
|
||||||
|
QQMusicPlaylistIE,
|
||||||
)
|
)
|
||||||
from .quickvid import QuickVidIE
|
from .quickvid import QuickVidIE
|
||||||
from .r7 import R7IE
|
from .r7 import R7IE
|
||||||
@@ -428,6 +471,7 @@ from .radiobremen import RadioBremenIE
|
|||||||
from .radiofrance import RadioFranceIE
|
from .radiofrance import RadioFranceIE
|
||||||
from .rai import RaiIE
|
from .rai import RaiIE
|
||||||
from .rbmaradio import RBMARadioIE
|
from .rbmaradio import RBMARadioIE
|
||||||
|
from .rds import RDSIE
|
||||||
from .redtube import RedTubeIE
|
from .redtube import RedTubeIE
|
||||||
from .restudy import RestudyIE
|
from .restudy import RestudyIE
|
||||||
from .reverbnation import ReverbNationIE
|
from .reverbnation import ReverbNationIE
|
||||||
@@ -438,7 +482,6 @@ from .roxwel import RoxwelIE
|
|||||||
from .rtbf import RTBFIE
|
from .rtbf import RTBFIE
|
||||||
from .rte import RteIE
|
from .rte import RteIE
|
||||||
from .rtlnl import RtlNlIE
|
from .rtlnl import RtlNlIE
|
||||||
from .rtlnow import RTLnowIE
|
|
||||||
from .rtl2 import RTL2IE
|
from .rtl2 import RTL2IE
|
||||||
from .rtp import RTPIE
|
from .rtp import RTPIE
|
||||||
from .rts import RTSIE
|
from .rts import RTSIE
|
||||||
@@ -452,6 +495,7 @@ from .rutube import (
|
|||||||
RutubePersonIE,
|
RutubePersonIE,
|
||||||
)
|
)
|
||||||
from .rutv import RUTVIE
|
from .rutv import RUTVIE
|
||||||
|
from .ruutu import RuutuIE
|
||||||
from .sandia import SandiaIE
|
from .sandia import SandiaIE
|
||||||
from .safari import (
|
from .safari import (
|
||||||
SafariIE,
|
SafariIE,
|
||||||
@@ -479,9 +523,16 @@ from .smotri import (
|
|||||||
SmotriUserIE,
|
SmotriUserIE,
|
||||||
SmotriBroadcastIE,
|
SmotriBroadcastIE,
|
||||||
)
|
)
|
||||||
|
from .snagfilms import (
|
||||||
|
SnagFilmsIE,
|
||||||
|
SnagFilmsEmbedIE,
|
||||||
|
)
|
||||||
from .snotr import SnotrIE
|
from .snotr import SnotrIE
|
||||||
from .sockshare import SockshareIE
|
|
||||||
from .sohu import SohuIE
|
from .sohu import SohuIE
|
||||||
|
from .soompi import (
|
||||||
|
SoompiIE,
|
||||||
|
SoompiShowIE,
|
||||||
|
)
|
||||||
from .soundcloud import (
|
from .soundcloud import (
|
||||||
SoundcloudIE,
|
SoundcloudIE,
|
||||||
SoundcloudSetIE,
|
SoundcloudSetIE,
|
||||||
@@ -549,6 +600,7 @@ from .tf1 import TF1IE
|
|||||||
from .theonion import TheOnionIE
|
from .theonion import TheOnionIE
|
||||||
from .theplatform import ThePlatformIE
|
from .theplatform import ThePlatformIE
|
||||||
from .thesixtyone import TheSixtyOneIE
|
from .thesixtyone import TheSixtyOneIE
|
||||||
|
from .thisamericanlife import ThisAmericanLifeIE
|
||||||
from .thisav import ThisAVIE
|
from .thisav import ThisAVIE
|
||||||
from .tinypic import TinyPicIE
|
from .tinypic import TinyPicIE
|
||||||
from .tlc import TlcIE, TlcDeIE
|
from .tlc import TlcIE, TlcDeIE
|
||||||
@@ -556,7 +608,11 @@ from .tmz import (
|
|||||||
TMZIE,
|
TMZIE,
|
||||||
TMZArticleIE,
|
TMZArticleIE,
|
||||||
)
|
)
|
||||||
from .tnaflix import TNAFlixIE
|
from .tnaflix import (
|
||||||
|
TNAFlixIE,
|
||||||
|
EMPFlixIE,
|
||||||
|
MovieFapIE,
|
||||||
|
)
|
||||||
from .thvideo import (
|
from .thvideo import (
|
||||||
THVideoIE,
|
THVideoIE,
|
||||||
THVideoPlaylistIE
|
THVideoPlaylistIE
|
||||||
@@ -567,6 +623,7 @@ from .traileraddict import TrailerAddictIE
|
|||||||
from .trilulilu import TriluliluIE
|
from .trilulilu import TriluliluIE
|
||||||
from .trutube import TruTubeIE
|
from .trutube import TruTubeIE
|
||||||
from .tube8 import Tube8IE
|
from .tube8 import Tube8IE
|
||||||
|
from .tubitv import TubiTvIE
|
||||||
from .tudou import TudouIE
|
from .tudou import TudouIE
|
||||||
from .tumblr import TumblrIE
|
from .tumblr import TumblrIE
|
||||||
from .tunein import TuneInIE
|
from .tunein import TuneInIE
|
||||||
@@ -577,6 +634,10 @@ from .tv2 import (
|
|||||||
TV2ArticleIE,
|
TV2ArticleIE,
|
||||||
)
|
)
|
||||||
from .tv4 import TV4IE
|
from .tv4 import TV4IE
|
||||||
|
from .tvc import (
|
||||||
|
TVCIE,
|
||||||
|
TVCArticleIE,
|
||||||
|
)
|
||||||
from .tvigle import TvigleIE
|
from .tvigle import TvigleIE
|
||||||
from .tvp import TvpIE, TvpSeriesIE
|
from .tvp import TvpIE, TvpSeriesIE
|
||||||
from .tvplay import TVPlayIE
|
from .tvplay import TVPlayIE
|
||||||
@@ -595,6 +656,7 @@ from .twitch import (
|
|||||||
TwitchBookmarksIE,
|
TwitchBookmarksIE,
|
||||||
TwitchStreamIE,
|
TwitchStreamIE,
|
||||||
)
|
)
|
||||||
|
from .twitter import TwitterCardIE
|
||||||
from .ubu import UbuIE
|
from .ubu import UbuIE
|
||||||
from .udemy import (
|
from .udemy import (
|
||||||
UdemyIE,
|
UdemyIE,
|
||||||
@@ -647,7 +709,10 @@ from .vine import (
|
|||||||
VineIE,
|
VineIE,
|
||||||
VineUserIE,
|
VineUserIE,
|
||||||
)
|
)
|
||||||
from .viki import VikiIE
|
from .viki import (
|
||||||
|
VikiIE,
|
||||||
|
VikiChannelIE,
|
||||||
|
)
|
||||||
from .vk import (
|
from .vk import (
|
||||||
VKIE,
|
VKIE,
|
||||||
VKUserVideosIE,
|
VKUserVideosIE,
|
||||||
@@ -668,7 +733,10 @@ from .wdr import (
|
|||||||
WDRMobileIE,
|
WDRMobileIE,
|
||||||
WDRMausIE,
|
WDRMausIE,
|
||||||
)
|
)
|
||||||
from .webofstories import WebOfStoriesIE
|
from .webofstories import (
|
||||||
|
WebOfStoriesIE,
|
||||||
|
WebOfStoriesPlaylistIE,
|
||||||
|
)
|
||||||
from .weibo import WeiboIE
|
from .weibo import WeiboIE
|
||||||
from .wimp import WimpIE
|
from .wimp import WimpIE
|
||||||
from .wistia import WistiaIE
|
from .wistia import WistiaIE
|
||||||
@@ -677,7 +745,10 @@ from .wrzuta import WrzutaIE
|
|||||||
from .wsj import WSJIE
|
from .wsj import WSJIE
|
||||||
from .xbef import XBefIE
|
from .xbef import XBefIE
|
||||||
from .xboxclips import XboxClipsIE
|
from .xboxclips import XboxClipsIE
|
||||||
from .xhamster import XHamsterIE
|
from .xhamster import (
|
||||||
|
XHamsterIE,
|
||||||
|
XHamsterEmbedIE,
|
||||||
|
)
|
||||||
from .xminus import XMinusIE
|
from .xminus import XMinusIE
|
||||||
from .xnxx import XNXXIE
|
from .xnxx import XNXXIE
|
||||||
from .xstream import XstreamIE
|
from .xstream import XstreamIE
|
||||||
@@ -696,6 +767,7 @@ from .yandexmusic import (
|
|||||||
YandexMusicPlaylistIE,
|
YandexMusicPlaylistIE,
|
||||||
)
|
)
|
||||||
from .yesjapan import YesJapanIE
|
from .yesjapan import YesJapanIE
|
||||||
|
from .yinyuetai import YinYueTaiIE
|
||||||
from .ynet import YnetIE
|
from .ynet import YnetIE
|
||||||
from .youjizz import YouJizzIE
|
from .youjizz import YouJizzIE
|
||||||
from .youku import YoukuIE
|
from .youku import YoukuIE
|
||||||
|
|||||||
@@ -5,6 +5,8 @@ from ..utils import (
|
|||||||
parse_duration,
|
parse_duration,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
str_to_int,
|
str_to_int,
|
||||||
|
float_or_none,
|
||||||
|
ISO639Utils,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -69,3 +71,61 @@ class AdobeTVIE(InfoExtractor):
|
|||||||
'view_count': view_count,
|
'view_count': view_count,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class AdobeTVVideoIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://video\.tv\.adobe\.com/v/(?P<id>\d+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
# From https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners
|
||||||
|
'url': 'https://video.tv.adobe.com/v/2456/',
|
||||||
|
'md5': '43662b577c018ad707a63766462b1e87',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2456',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'New experience with Acrobat DC',
|
||||||
|
'description': 'New experience with Acrobat DC',
|
||||||
|
'duration': 248.667,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
player_params = self._parse_json(self._search_regex(
|
||||||
|
r'var\s+bridge\s*=\s*([^;]+);', webpage, 'player parameters'),
|
||||||
|
video_id)
|
||||||
|
|
||||||
|
formats = [{
|
||||||
|
'url': source['src'],
|
||||||
|
'width': source.get('width'),
|
||||||
|
'height': source.get('height'),
|
||||||
|
'tbr': source.get('bitrate'),
|
||||||
|
} for source in player_params['sources']]
|
||||||
|
|
||||||
|
# For both metadata and downloaded files the duration varies among
|
||||||
|
# formats. I just pick the max one
|
||||||
|
duration = max(filter(None, [
|
||||||
|
float_or_none(source.get('duration'), scale=1000)
|
||||||
|
for source in player_params['sources']]))
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
for translation in player_params.get('translations', []):
|
||||||
|
lang_id = translation.get('language_w3c') or ISO639Utils.long2short(translation['language_medium'])
|
||||||
|
if lang_id not in subtitles:
|
||||||
|
subtitles[lang_id] = []
|
||||||
|
subtitles[lang_id].append({
|
||||||
|
'url': translation['vttPath'],
|
||||||
|
'ext': 'vtt',
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'formats': formats,
|
||||||
|
'title': player_params['title'],
|
||||||
|
'description': self._og_search_description(webpage),
|
||||||
|
'duration': duration,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
}
|
||||||
|
|||||||
@@ -6,11 +6,11 @@ from ..utils import int_or_none
|
|||||||
|
|
||||||
|
|
||||||
class AftonbladetIE(InfoExtractor):
|
class AftonbladetIE(InfoExtractor):
|
||||||
_VALID_URL = r'http://tv\.aftonbladet\.se/webbtv.+?(?P<id>article[0-9]+)\.ab(?:$|[?#])'
|
_VALID_URL = r'http://tv\.aftonbladet\.se/abtv/articles/(?P<id>[0-9]+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://tv.aftonbladet.se/webbtv/nyheter/vetenskap/rymden/article36015.ab',
|
'url': 'http://tv.aftonbladet.se/abtv/articles/36015',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'article36015',
|
'id': '36015',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Vulkanutbrott i rymden - nu släpper NASA bilderna',
|
'title': 'Vulkanutbrott i rymden - nu släpper NASA bilderna',
|
||||||
'description': 'Jupiters måne mest aktiv av alla himlakroppar',
|
'description': 'Jupiters måne mest aktiv av alla himlakroppar',
|
||||||
@@ -25,8 +25,9 @@ class AftonbladetIE(InfoExtractor):
|
|||||||
|
|
||||||
# find internal video meta data
|
# find internal video meta data
|
||||||
meta_url = 'http://aftonbladet-play.drlib.aptoma.no/video/%s.json'
|
meta_url = 'http://aftonbladet-play.drlib.aptoma.no/video/%s.json'
|
||||||
internal_meta_id = self._html_search_regex(
|
player_config = self._parse_json(self._html_search_regex(
|
||||||
r'data-aptomaId="([\w\d]+)"', webpage, 'internal_meta_id')
|
r'data-player-config="([^"]+)"', webpage, 'player config'), video_id)
|
||||||
|
internal_meta_id = player_config['videoId']
|
||||||
internal_meta_url = meta_url % internal_meta_id
|
internal_meta_url = meta_url % internal_meta_id
|
||||||
internal_meta_json = self._download_json(
|
internal_meta_json = self._download_json(
|
||||||
internal_meta_url, video_id, 'Downloading video meta data')
|
internal_meta_url, video_id, 'Downloading video meta data')
|
||||||
|
|||||||
50
youtube_dl/extractor/appleconnect.py
Normal file
50
youtube_dl/extractor/appleconnect.py
Normal file
@@ -0,0 +1,50 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
str_to_int,
|
||||||
|
ExtractorError
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class AppleConnectIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://itunes\.apple\.com/\w{0,2}/?post/idsa\.(?P<id>[\w-]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://itunes.apple.com/us/post/idsa.4ab17a39-2720-11e5-96c5-a5b38f6c42d3',
|
||||||
|
'md5': '10d0f2799111df4cb1c924520ca78f98',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '4ab17a39-2720-11e5-96c5-a5b38f6c42d3',
|
||||||
|
'ext': 'm4v',
|
||||||
|
'title': 'Energy',
|
||||||
|
'uploader': 'Drake',
|
||||||
|
'thumbnail': 'http://is5.mzstatic.com/image/thumb/Video5/v4/78/61/c5/7861c5fa-ad6d-294b-1464-cf7605b911d6/source/1920x1080sr.jpg',
|
||||||
|
'upload_date': '20150710',
|
||||||
|
'timestamp': 1436545535,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
try:
|
||||||
|
video_json = self._html_search_regex(
|
||||||
|
r'class="auc-video-data">(\{.*?\})', webpage, 'json')
|
||||||
|
except ExtractorError:
|
||||||
|
raise ExtractorError('This post doesn\'t contain a video', expected=True)
|
||||||
|
|
||||||
|
video_data = self._parse_json(video_json, video_id)
|
||||||
|
timestamp = str_to_int(self._html_search_regex(r'data-timestamp="(\d+)"', webpage, 'timestamp'))
|
||||||
|
like_count = str_to_int(self._html_search_regex(r'(\d+) Loves', webpage, 'like count'))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'url': video_data['sslSrc'],
|
||||||
|
'title': video_data['title'],
|
||||||
|
'description': video_data['description'],
|
||||||
|
'uploader': video_data['artistName'],
|
||||||
|
'thumbnail': video_data['artworkUrl'],
|
||||||
|
'timestamp': timestamp,
|
||||||
|
'like_count': like_count,
|
||||||
|
}
|
||||||
@@ -8,6 +8,7 @@ from .generic import GenericIE
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
get_element_by_attribute,
|
||||||
qualities,
|
qualities,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
@@ -22,19 +23,125 @@ class ARDMediathekIE(InfoExtractor):
|
|||||||
_VALID_URL = r'^https?://(?:(?:www\.)?ardmediathek\.de|mediathek\.daserste\.de)/(?:.*/)(?P<video_id>[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?'
|
_VALID_URL = r'^https?://(?:(?:www\.)?ardmediathek\.de|mediathek\.daserste\.de)/(?:.*/)(?P<video_id>[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
'url': 'http://www.ardmediathek.de/tv/Dokumentation-und-Reportage/Ich-liebe-das-Leben-trotzdem/rbb-Fernsehen/Video?documentId=29582122&bcastId=3822114',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '29582122',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Ich liebe das Leben trotzdem',
|
||||||
|
'description': 'md5:45e4c225c72b27993314b31a84a5261c',
|
||||||
|
'duration': 4557,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.ardmediathek.de/tv/Tatort/Tatort-Scheinwelten-H%C3%B6rfassung-Video/Das-Erste/Video?documentId=29522730&bcastId=602916',
|
||||||
|
'md5': 'f4d98b10759ac06c0072bbcd1f0b9e3e',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '29522730',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Tatort: Scheinwelten - Hörfassung (Video tgl. ab 20 Uhr)',
|
||||||
|
'description': 'md5:196392e79876d0ac94c94e8cdb2875f1',
|
||||||
|
'duration': 5252,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# audio
|
||||||
|
'url': 'http://www.ardmediathek.de/tv/WDR-H%C3%B6rspiel-Speicher/Tod-eines-Fu%C3%9Fballers/WDR-3/Audio-Podcast?documentId=28488308&bcastId=23074086',
|
||||||
|
'md5': '219d94d8980b4f538c7fcb0865eb7f2c',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '28488308',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 'Tod eines Fußballers',
|
||||||
|
'description': 'md5:f6e39f3461f0e1f54bfa48c8875c86ef',
|
||||||
|
'duration': 3240,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
'url': 'http://mediathek.daserste.de/sendungen_a-z/328454_anne-will/22429276_vertrauen-ist-gut-spionieren-ist-besser-geht',
|
'url': 'http://mediathek.daserste.de/sendungen_a-z/328454_anne-will/22429276_vertrauen-ist-gut-spionieren-ist-besser-geht',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
|
||||||
'url': 'http://www.ardmediathek.de/tv/Tatort/Das-Wunder-von-Wolbeck-Video-tgl-ab-20/Das-Erste/Video?documentId=22490580&bcastId=602916',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '22490580',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Das Wunder von Wolbeck (Video tgl. ab 20 Uhr)',
|
|
||||||
'description': 'Auf einem restaurierten Hof bei Wolbeck wird der Heilpraktiker Raffael Lembeck eines morgens von seiner Frau Stella tot aufgefunden. Das Opfer war offensichtlich in seiner Praxis zu Fall gekommen und ist dann verblutet, erklärt Prof. Boerne am Tatort.',
|
|
||||||
},
|
|
||||||
'skip': 'Blocked outside of Germany',
|
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
def _extract_media_info(self, media_info_url, webpage, video_id):
|
||||||
|
media_info = self._download_json(
|
||||||
|
media_info_url, video_id, 'Downloading media JSON')
|
||||||
|
|
||||||
|
formats = self._extract_formats(media_info, video_id)
|
||||||
|
|
||||||
|
if not formats:
|
||||||
|
if '"fsk"' in webpage:
|
||||||
|
raise ExtractorError(
|
||||||
|
'This video is only available after 20:00', expected=True)
|
||||||
|
elif media_info.get('_geoblocked'):
|
||||||
|
raise ExtractorError('This video is not available due to geo restriction', expected=True)
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
duration = int_or_none(media_info.get('_duration'))
|
||||||
|
thumbnail = media_info.get('_previewImage')
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
subtitle_url = media_info.get('_subtitleUrl')
|
||||||
|
if subtitle_url:
|
||||||
|
subtitles['de'] = [{
|
||||||
|
'ext': 'srt',
|
||||||
|
'url': subtitle_url,
|
||||||
|
}]
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'duration': duration,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _extract_formats(self, media_info, video_id):
|
||||||
|
type_ = media_info.get('_type')
|
||||||
|
media_array = media_info.get('_mediaArray', [])
|
||||||
|
formats = []
|
||||||
|
for num, media in enumerate(media_array):
|
||||||
|
for stream in media.get('_mediaStreamArray', []):
|
||||||
|
stream_urls = stream.get('_stream')
|
||||||
|
if not stream_urls:
|
||||||
|
continue
|
||||||
|
if not isinstance(stream_urls, list):
|
||||||
|
stream_urls = [stream_urls]
|
||||||
|
quality = stream.get('_quality')
|
||||||
|
server = stream.get('_server')
|
||||||
|
for stream_url in stream_urls:
|
||||||
|
ext = determine_ext(stream_url)
|
||||||
|
if ext == 'f4m':
|
||||||
|
formats.extend(self._extract_f4m_formats(
|
||||||
|
stream_url + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124',
|
||||||
|
video_id, preference=-1, f4m_id='hds'))
|
||||||
|
elif ext == 'm3u8':
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
stream_url, video_id, 'mp4', preference=1, m3u8_id='hls'))
|
||||||
|
else:
|
||||||
|
if server and server.startswith('rtmp'):
|
||||||
|
f = {
|
||||||
|
'url': server,
|
||||||
|
'play_path': stream_url,
|
||||||
|
'format_id': 'a%s-rtmp-%s' % (num, quality),
|
||||||
|
}
|
||||||
|
elif stream_url.startswith('http'):
|
||||||
|
f = {
|
||||||
|
'url': stream_url,
|
||||||
|
'format_id': 'a%s-%s-%s' % (num, ext, quality)
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
continue
|
||||||
|
m = re.search(r'_(?P<width>\d+)x(?P<height>\d+)\.mp4$', stream_url)
|
||||||
|
if m:
|
||||||
|
f.update({
|
||||||
|
'width': int(m.group('width')),
|
||||||
|
'height': int(m.group('height')),
|
||||||
|
})
|
||||||
|
if type_ == 'audio':
|
||||||
|
f['vcodec'] = 'none'
|
||||||
|
formats.append(f)
|
||||||
|
return formats
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
# determine video id from url
|
# determine video id from url
|
||||||
m = re.match(self._VALID_URL, url)
|
m = re.match(self._VALID_URL, url)
|
||||||
@@ -92,46 +199,22 @@ class ARDMediathekIE(InfoExtractor):
|
|||||||
'format_id': fid,
|
'format_id': fid,
|
||||||
'url': furl,
|
'url': furl,
|
||||||
})
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
info = {
|
||||||
|
'formats': formats,
|
||||||
|
}
|
||||||
else: # request JSON file
|
else: # request JSON file
|
||||||
media_info = self._download_json(
|
info = self._extract_media_info(
|
||||||
'http://www.ardmediathek.de/play/media/%s' % video_id, video_id)
|
'http://www.ardmediathek.de/play/media/%s' % video_id, webpage, video_id)
|
||||||
# The second element of the _mediaArray contains the standard http urls
|
|
||||||
streams = media_info['_mediaArray'][1]['_mediaStreamArray']
|
|
||||||
if not streams:
|
|
||||||
if '"fsk"' in webpage:
|
|
||||||
raise ExtractorError('This video is only available after 20:00')
|
|
||||||
|
|
||||||
formats = []
|
info.update({
|
||||||
for s in streams:
|
|
||||||
if type(s['_stream']) == list:
|
|
||||||
for index, url in enumerate(s['_stream'][::-1]):
|
|
||||||
quality = s['_quality'] + index
|
|
||||||
formats.append({
|
|
||||||
'quality': quality,
|
|
||||||
'url': url,
|
|
||||||
'format_id': '%s-%s' % (determine_ext(url), quality)
|
|
||||||
})
|
|
||||||
continue
|
|
||||||
|
|
||||||
format = {
|
|
||||||
'quality': s['_quality'],
|
|
||||||
'url': s['_stream'],
|
|
||||||
}
|
|
||||||
|
|
||||||
format['format_id'] = '%s-%s' % (
|
|
||||||
determine_ext(format['url']), format['quality'])
|
|
||||||
|
|
||||||
formats.append(format)
|
|
||||||
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': description,
|
'description': description,
|
||||||
'formats': formats,
|
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
}
|
})
|
||||||
|
|
||||||
|
return info
|
||||||
|
|
||||||
|
|
||||||
class ARDIE(InfoExtractor):
|
class ARDIE(InfoExtractor):
|
||||||
@@ -189,3 +272,41 @@ class ARDIE(InfoExtractor):
|
|||||||
'upload_date': upload_date,
|
'upload_date': upload_date,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class SportschauIE(ARDMediathekIE):
|
||||||
|
IE_NAME = 'Sportschau'
|
||||||
|
_VALID_URL = r'(?P<baseurl>https?://(?:www\.)?sportschau\.de/(?:[^/]+/)+video(?P<id>[^/#?]+))\.html'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://www.sportschau.de/tourdefrance/videoseppeltkokainhatnichtsmitklassischemdopingzutun100.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'seppeltkokainhatnichtsmitklassischemdopingzutun100',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Seppelt: "Kokain hat nichts mit klassischem Doping zu tun"',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'description': 'Der ARD-Doping Experte Hajo Seppelt gibt seine Einschätzung zum ersten Dopingfall der diesjährigen Tour de France um den Italiener Luca Paolini ab.',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
base_url = mobj.group('baseurl')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
title = get_element_by_attribute('class', 'headline', webpage)
|
||||||
|
description = self._html_search_meta('description', webpage, 'description')
|
||||||
|
|
||||||
|
info = self._extract_media_info(
|
||||||
|
base_url + '-mc_defaultQuality-h.json', webpage, video_id)
|
||||||
|
|
||||||
|
info.update({
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
})
|
||||||
|
|
||||||
|
return info
|
||||||
|
|||||||
@@ -7,7 +7,6 @@ from .common import InfoExtractor
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
find_xpath_attr,
|
find_xpath_attr,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
get_element_by_id,
|
|
||||||
get_element_by_attribute,
|
get_element_by_attribute,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
qualities,
|
qualities,
|
||||||
@@ -195,7 +194,9 @@ class ArteTVFutureIE(ArteTVPlus7IE):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
anchor_id, lang = self._extract_url_info(url)
|
anchor_id, lang = self._extract_url_info(url)
|
||||||
webpage = self._download_webpage(url, anchor_id)
|
webpage = self._download_webpage(url, anchor_id)
|
||||||
row = get_element_by_id(anchor_id, webpage)
|
row = self._search_regex(
|
||||||
|
r'(?s)id="%s"[^>]*>.+?(<div[^>]*arte_vp_url[^>]*>)' % anchor_id,
|
||||||
|
webpage, 'row')
|
||||||
return self._extract_from_webpage(row, anchor_id, lang)
|
return self._extract_from_webpage(row, anchor_id, lang)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ from ..compat import compat_urlparse
|
|||||||
|
|
||||||
|
|
||||||
class BaiduVideoIE(InfoExtractor):
|
class BaiduVideoIE(InfoExtractor):
|
||||||
|
IE_DESC = '百度视频'
|
||||||
_VALID_URL = r'http://v\.baidu\.com/(?P<type>[a-z]+)/(?P<id>\d+)\.htm'
|
_VALID_URL = r'http://v\.baidu\.com/(?P<type>[a-z]+)/(?P<id>\d+)\.htm'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://v.baidu.com/comic/1069.htm?frp=bdbrand&q=%E4%B8%AD%E5%8D%8E%E5%B0%8F%E5%BD%93%E5%AE%B6',
|
'url': 'http://v.baidu.com/comic/1069.htm?frp=bdbrand&q=%E4%B8%AD%E5%8D%8E%E5%B0%8F%E5%BD%93%E5%AE%B6',
|
||||||
|
|||||||
722
youtube_dl/extractor/bbc.py
Normal file
722
youtube_dl/extractor/bbc.py
Normal file
@@ -0,0 +1,722 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
import xml.etree.ElementTree
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
float_or_none,
|
||||||
|
int_or_none,
|
||||||
|
parse_duration,
|
||||||
|
parse_iso8601,
|
||||||
|
)
|
||||||
|
from ..compat import compat_HTTPError
|
||||||
|
|
||||||
|
|
||||||
|
class BBCCoUkIE(InfoExtractor):
|
||||||
|
IE_NAME = 'bbc.co.uk'
|
||||||
|
IE_DESC = 'BBC iPlayer'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:(?:(?:programmes|iplayer(?:/[^/]+)?/(?:episode|playlist))/)|music/clips[/#])(?P<id>[\da-z]{8})'
|
||||||
|
|
||||||
|
_MEDIASELECTOR_URL = 'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s'
|
||||||
|
|
||||||
|
_TESTS = [
|
||||||
|
{
|
||||||
|
'url': 'http://www.bbc.co.uk/programmes/b039g8p7',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'b039d07m',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Kaleidoscope, Leonard Cohen',
|
||||||
|
'description': 'The Canadian poet and songwriter reflects on his musical career.',
|
||||||
|
'duration': 1740,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.bbc.co.uk/iplayer/episode/b00yng5w/The_Man_in_Black_Series_3_The_Printed_Name/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'b00yng1d',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'The Man in Black: Series 3: The Printed Name',
|
||||||
|
'description': "Mark Gatiss introduces Nicholas Pierpan's chilling tale of a writer's devilish pact with a mysterious man. Stars Ewan Bailey.",
|
||||||
|
'duration': 1800,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
'skip': 'Episode is no longer available on BBC iPlayer Radio',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.bbc.co.uk/iplayer/episode/b03vhd1f/The_Voice_UK_Series_3_Blind_Auditions_5/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'b00yng1d',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'The Voice UK: Series 3: Blind Auditions 5',
|
||||||
|
'description': "Emma Willis and Marvin Humes present the fifth set of blind auditions in the singing competition, as the coaches continue to build their teams based on voice alone.",
|
||||||
|
'duration': 5100,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.bbc.co.uk/iplayer/episode/p026c7jt/tomorrows-worlds-the-unearthly-history-of-science-fiction-2-invasion',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'b03k3pb7',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': "Tomorrow's Worlds: The Unearthly History of Science Fiction",
|
||||||
|
'description': '2. Invasion',
|
||||||
|
'duration': 3600,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.bbc.co.uk/programmes/b04v20dw',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'b04v209v',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Pete Tong, The Essential New Tune Special',
|
||||||
|
'description': "Pete has a very special mix - all of 2014's Essential New Tunes!",
|
||||||
|
'duration': 10800,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.bbc.co.uk/music/clips/p02frcc3',
|
||||||
|
'note': 'Audio',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'p02frcch',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Pete Tong, Past, Present and Future Special, Madeon - After Hours mix',
|
||||||
|
'description': 'French house superstar Madeon takes us out of the club and onto the after party.',
|
||||||
|
'duration': 3507,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.bbc.co.uk/music/clips/p025c0zz',
|
||||||
|
'note': 'Video',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'p025c103',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Reading and Leeds Festival, 2014, Rae Morris - Closer (Live on BBC Three)',
|
||||||
|
'description': 'Rae Morris performs Closer for BBC Three at Reading 2014',
|
||||||
|
'duration': 226,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.bbc.co.uk/iplayer/episode/b054fn09/ad/natural-world-20152016-2-super-powered-owls',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'p02n76xf',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Natural World, 2015-2016: 2. Super Powered Owls',
|
||||||
|
'description': 'md5:e4db5c937d0e95a7c6b5e654d429183d',
|
||||||
|
'duration': 3540,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
'skip': 'geolocation',
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.bbc.co.uk/iplayer/episode/b05zmgwn/royal-academy-summer-exhibition',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'b05zmgw1',
|
||||||
|
'ext': 'flv',
|
||||||
|
'description': 'Kirsty Wark and Morgan Quaintance visit the Royal Academy as it prepares for its annual artistic extravaganza, meeting people who have come together to make the show unique.',
|
||||||
|
'title': 'Royal Academy Summer Exhibition',
|
||||||
|
'duration': 3540,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
'skip': 'geolocation',
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.bbc.co.uk/iplayer/playlist/p01dvks4',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.bbc.co.uk/music/clips#p02frcc3',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.bbc.co.uk/iplayer/cbeebies/episode/b0480276/bing-14-atchoo',
|
||||||
|
'only_matching': True,
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
def _extract_asx_playlist(self, connection, programme_id):
|
||||||
|
asx = self._download_xml(connection.get('href'), programme_id, 'Downloading ASX playlist')
|
||||||
|
return [ref.get('href') for ref in asx.findall('./Entry/ref')]
|
||||||
|
|
||||||
|
def _extract_connection(self, connection, programme_id):
|
||||||
|
formats = []
|
||||||
|
protocol = connection.get('protocol')
|
||||||
|
supplier = connection.get('supplier')
|
||||||
|
if protocol == 'http':
|
||||||
|
href = connection.get('href')
|
||||||
|
transfer_format = connection.get('transferFormat')
|
||||||
|
# ASX playlist
|
||||||
|
if supplier == 'asx':
|
||||||
|
for i, ref in enumerate(self._extract_asx_playlist(connection, programme_id)):
|
||||||
|
formats.append({
|
||||||
|
'url': ref,
|
||||||
|
'format_id': 'ref%s_%s' % (i, supplier),
|
||||||
|
})
|
||||||
|
# Skip DASH until supported
|
||||||
|
elif transfer_format == 'dash':
|
||||||
|
pass
|
||||||
|
# Direct link
|
||||||
|
else:
|
||||||
|
formats.append({
|
||||||
|
'url': href,
|
||||||
|
'format_id': supplier,
|
||||||
|
})
|
||||||
|
elif protocol == 'rtmp':
|
||||||
|
application = connection.get('application', 'ondemand')
|
||||||
|
auth_string = connection.get('authString')
|
||||||
|
identifier = connection.get('identifier')
|
||||||
|
server = connection.get('server')
|
||||||
|
formats.append({
|
||||||
|
'url': '%s://%s/%s?%s' % (protocol, server, application, auth_string),
|
||||||
|
'play_path': identifier,
|
||||||
|
'app': '%s?%s' % (application, auth_string),
|
||||||
|
'page_url': 'http://www.bbc.co.uk',
|
||||||
|
'player_url': 'http://www.bbc.co.uk/emp/releases/iplayer/revisions/617463_618125_4/617463_618125_4_emp.swf',
|
||||||
|
'rtmp_live': False,
|
||||||
|
'ext': 'flv',
|
||||||
|
'format_id': supplier,
|
||||||
|
})
|
||||||
|
return formats
|
||||||
|
|
||||||
|
def _extract_items(self, playlist):
|
||||||
|
return playlist.findall('./{http://bbc.co.uk/2008/emp/playlist}item')
|
||||||
|
|
||||||
|
def _extract_medias(self, media_selection):
|
||||||
|
error = media_selection.find('./{http://bbc.co.uk/2008/mp/mediaselection}error')
|
||||||
|
if error is not None:
|
||||||
|
raise ExtractorError(
|
||||||
|
'%s returned error: %s' % (self.IE_NAME, error.get('id')), expected=True)
|
||||||
|
return media_selection.findall('./{http://bbc.co.uk/2008/mp/mediaselection}media')
|
||||||
|
|
||||||
|
def _extract_connections(self, media):
|
||||||
|
return media.findall('./{http://bbc.co.uk/2008/mp/mediaselection}connection')
|
||||||
|
|
||||||
|
def _extract_video(self, media, programme_id):
|
||||||
|
formats = []
|
||||||
|
vbr = int_or_none(media.get('bitrate'))
|
||||||
|
vcodec = media.get('encoding')
|
||||||
|
service = media.get('service')
|
||||||
|
width = int_or_none(media.get('width'))
|
||||||
|
height = int_or_none(media.get('height'))
|
||||||
|
file_size = int_or_none(media.get('media_file_size'))
|
||||||
|
for connection in self._extract_connections(media):
|
||||||
|
conn_formats = self._extract_connection(connection, programme_id)
|
||||||
|
for format in conn_formats:
|
||||||
|
format.update({
|
||||||
|
'format_id': '%s_%s' % (service, format['format_id']),
|
||||||
|
'width': width,
|
||||||
|
'height': height,
|
||||||
|
'vbr': vbr,
|
||||||
|
'vcodec': vcodec,
|
||||||
|
'filesize': file_size,
|
||||||
|
})
|
||||||
|
formats.extend(conn_formats)
|
||||||
|
return formats
|
||||||
|
|
||||||
|
def _extract_audio(self, media, programme_id):
|
||||||
|
formats = []
|
||||||
|
abr = int_or_none(media.get('bitrate'))
|
||||||
|
acodec = media.get('encoding')
|
||||||
|
service = media.get('service')
|
||||||
|
for connection in self._extract_connections(media):
|
||||||
|
conn_formats = self._extract_connection(connection, programme_id)
|
||||||
|
for format in conn_formats:
|
||||||
|
format.update({
|
||||||
|
'format_id': '%s_%s' % (service, format['format_id']),
|
||||||
|
'abr': abr,
|
||||||
|
'acodec': acodec,
|
||||||
|
})
|
||||||
|
formats.extend(conn_formats)
|
||||||
|
return formats
|
||||||
|
|
||||||
|
def _get_subtitles(self, media, programme_id):
|
||||||
|
subtitles = {}
|
||||||
|
for connection in self._extract_connections(media):
|
||||||
|
captions = self._download_xml(connection.get('href'), programme_id, 'Downloading captions')
|
||||||
|
lang = captions.get('{http://www.w3.org/XML/1998/namespace}lang', 'en')
|
||||||
|
subtitles[lang] = [
|
||||||
|
{
|
||||||
|
'url': connection.get('href'),
|
||||||
|
'ext': 'ttml',
|
||||||
|
},
|
||||||
|
]
|
||||||
|
return subtitles
|
||||||
|
|
||||||
|
def _download_media_selector(self, programme_id):
|
||||||
|
return self._download_media_selector_url(
|
||||||
|
self._MEDIASELECTOR_URL % programme_id, programme_id)
|
||||||
|
|
||||||
|
def _download_media_selector_url(self, url, programme_id=None):
|
||||||
|
try:
|
||||||
|
media_selection = self._download_xml(
|
||||||
|
url, programme_id, 'Downloading media selection XML')
|
||||||
|
except ExtractorError as ee:
|
||||||
|
if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403:
|
||||||
|
media_selection = xml.etree.ElementTree.fromstring(ee.cause.read().decode('utf-8'))
|
||||||
|
else:
|
||||||
|
raise
|
||||||
|
return self._process_media_selector(media_selection, programme_id)
|
||||||
|
|
||||||
|
def _process_media_selector(self, media_selection, programme_id):
|
||||||
|
formats = []
|
||||||
|
subtitles = None
|
||||||
|
|
||||||
|
for media in self._extract_medias(media_selection):
|
||||||
|
kind = media.get('kind')
|
||||||
|
if kind == 'audio':
|
||||||
|
formats.extend(self._extract_audio(media, programme_id))
|
||||||
|
elif kind == 'video':
|
||||||
|
formats.extend(self._extract_video(media, programme_id))
|
||||||
|
elif kind == 'captions':
|
||||||
|
subtitles = self.extract_subtitles(media, programme_id)
|
||||||
|
|
||||||
|
return formats, subtitles
|
||||||
|
|
||||||
|
def _download_playlist(self, playlist_id):
|
||||||
|
try:
|
||||||
|
playlist = self._download_json(
|
||||||
|
'http://www.bbc.co.uk/programmes/%s/playlist.json' % playlist_id,
|
||||||
|
playlist_id, 'Downloading playlist JSON')
|
||||||
|
|
||||||
|
version = playlist.get('defaultAvailableVersion')
|
||||||
|
if version:
|
||||||
|
smp_config = version['smpConfig']
|
||||||
|
title = smp_config['title']
|
||||||
|
description = smp_config['summary']
|
||||||
|
for item in smp_config['items']:
|
||||||
|
kind = item['kind']
|
||||||
|
if kind != 'programme' and kind != 'radioProgramme':
|
||||||
|
continue
|
||||||
|
programme_id = item.get('vpid')
|
||||||
|
duration = int_or_none(item.get('duration'))
|
||||||
|
formats, subtitles = self._download_media_selector(programme_id)
|
||||||
|
return programme_id, title, description, duration, formats, subtitles
|
||||||
|
except ExtractorError as ee:
|
||||||
|
if not (isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404):
|
||||||
|
raise
|
||||||
|
|
||||||
|
# fallback to legacy playlist
|
||||||
|
return self._process_legacy_playlist(playlist_id)
|
||||||
|
|
||||||
|
def _process_legacy_playlist_url(self, url, display_id):
|
||||||
|
playlist = self._download_legacy_playlist_url(url, display_id)
|
||||||
|
return self._extract_from_legacy_playlist(playlist, display_id)
|
||||||
|
|
||||||
|
def _process_legacy_playlist(self, playlist_id):
|
||||||
|
return self._process_legacy_playlist_url(
|
||||||
|
'http://www.bbc.co.uk/iplayer/playlist/%s' % playlist_id, playlist_id)
|
||||||
|
|
||||||
|
def _download_legacy_playlist_url(self, url, playlist_id=None):
|
||||||
|
return self._download_xml(
|
||||||
|
url, playlist_id, 'Downloading legacy playlist XML')
|
||||||
|
|
||||||
|
def _extract_from_legacy_playlist(self, playlist, playlist_id):
|
||||||
|
no_items = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}noItems')
|
||||||
|
if no_items is not None:
|
||||||
|
reason = no_items.get('reason')
|
||||||
|
if reason == 'preAvailability':
|
||||||
|
msg = 'Episode %s is not yet available' % playlist_id
|
||||||
|
elif reason == 'postAvailability':
|
||||||
|
msg = 'Episode %s is no longer available' % playlist_id
|
||||||
|
elif reason == 'noMedia':
|
||||||
|
msg = 'Episode %s is not currently available' % playlist_id
|
||||||
|
else:
|
||||||
|
msg = 'Episode %s is not available: %s' % (playlist_id, reason)
|
||||||
|
raise ExtractorError(msg, expected=True)
|
||||||
|
|
||||||
|
for item in self._extract_items(playlist):
|
||||||
|
kind = item.get('kind')
|
||||||
|
if kind != 'programme' and kind != 'radioProgramme':
|
||||||
|
continue
|
||||||
|
title = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}title').text
|
||||||
|
description = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}summary').text
|
||||||
|
|
||||||
|
def get_programme_id(item):
|
||||||
|
def get_from_attributes(item):
|
||||||
|
for p in('identifier', 'group'):
|
||||||
|
value = item.get(p)
|
||||||
|
if value and re.match(r'^[pb][\da-z]{7}$', value):
|
||||||
|
return value
|
||||||
|
get_from_attributes(item)
|
||||||
|
mediator = item.find('./{http://bbc.co.uk/2008/emp/playlist}mediator')
|
||||||
|
if mediator is not None:
|
||||||
|
return get_from_attributes(mediator)
|
||||||
|
|
||||||
|
programme_id = get_programme_id(item)
|
||||||
|
duration = int_or_none(item.get('duration'))
|
||||||
|
# TODO: programme_id can be None and media items can be incorporated right inside
|
||||||
|
# playlist's item (e.g. http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu)
|
||||||
|
# as f4m and m3u8
|
||||||
|
formats, subtitles = self._download_media_selector(programme_id)
|
||||||
|
|
||||||
|
return programme_id, title, description, duration, formats, subtitles
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
group_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, group_id, 'Downloading video page')
|
||||||
|
|
||||||
|
programme_id = None
|
||||||
|
|
||||||
|
tviplayer = self._search_regex(
|
||||||
|
r'mediator\.bind\(({.+?})\s*,\s*document\.getElementById',
|
||||||
|
webpage, 'player', default=None)
|
||||||
|
|
||||||
|
if tviplayer:
|
||||||
|
player = self._parse_json(tviplayer, group_id).get('player', {})
|
||||||
|
duration = int_or_none(player.get('duration'))
|
||||||
|
programme_id = player.get('vpid')
|
||||||
|
|
||||||
|
if not programme_id:
|
||||||
|
programme_id = self._search_regex(
|
||||||
|
r'"vpid"\s*:\s*"([\da-z]{8})"', webpage, 'vpid', fatal=False, default=None)
|
||||||
|
|
||||||
|
if programme_id:
|
||||||
|
formats, subtitles = self._download_media_selector(programme_id)
|
||||||
|
title = self._og_search_title(webpage)
|
||||||
|
description = self._search_regex(
|
||||||
|
r'<p class="[^"]*medium-description[^"]*">([^<]+)</p>',
|
||||||
|
webpage, 'description', fatal=False)
|
||||||
|
else:
|
||||||
|
programme_id, title, description, duration, formats, subtitles = self._download_playlist(group_id)
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': programme_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': self._og_search_thumbnail(webpage, default=None),
|
||||||
|
'duration': duration,
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class BBCIE(BBCCoUkIE):
|
||||||
|
IE_NAME = 'bbc'
|
||||||
|
IE_DESC = 'BBC'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?bbc\.(?:com|co\.uk)/(?:[^/]+/)+(?P<id>[^/#?]+)'
|
||||||
|
|
||||||
|
# fails with notukerror for some videos
|
||||||
|
# _MEDIASELECTOR_URL = 'http://open.live.bbc.co.uk/mediaselector/4/mtis/stream/%s'
|
||||||
|
_MEDIASELECTOR_URL = 'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/journalism-pc/vpid/%s'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
# article with multiple videos embedded with data-media-meta containing
|
||||||
|
# playlist.sxml, externalId and no direct video links
|
||||||
|
'url': 'http://www.bbc.com/news/world-europe-32668511',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'world-europe-32668511',
|
||||||
|
'title': 'Russia stages massive WW2 parade despite Western boycott',
|
||||||
|
'description': 'md5:00ff61976f6081841f759a08bf78cc9c',
|
||||||
|
},
|
||||||
|
'playlist_count': 2,
|
||||||
|
}, {
|
||||||
|
# article with multiple videos embedded with data-media-meta (more videos)
|
||||||
|
'url': 'http://www.bbc.com/news/business-28299555',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'business-28299555',
|
||||||
|
'title': 'Farnborough Airshow: Video highlights',
|
||||||
|
'description': 'BBC reports and video highlights at the Farnborough Airshow.',
|
||||||
|
},
|
||||||
|
'playlist_count': 9,
|
||||||
|
'skip': 'Save time',
|
||||||
|
}, {
|
||||||
|
# article with multiple videos embedded with `new SMP()`
|
||||||
|
'url': 'http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3662a707-0af9-3149-963f-47bea720b460',
|
||||||
|
'title': 'BBC Blogs - Adam Curtis - BUGGER',
|
||||||
|
},
|
||||||
|
'playlist_count': 18,
|
||||||
|
}, {
|
||||||
|
# single video embedded with mediaAssetPage.init()
|
||||||
|
'url': 'http://www.bbc.com/news/world-europe-32041533',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'p02mprgb',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Aerial footage showed the site of the crash in the Alps - courtesy BFM TV',
|
||||||
|
'duration': 47,
|
||||||
|
'timestamp': 1427219242,
|
||||||
|
'upload_date': '20150324',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
# article with single video embedded with data-media-meta containing
|
||||||
|
# direct video links (for now these are extracted) and playlist.xml (with
|
||||||
|
# media items as f4m and m3u8 - currently unsupported)
|
||||||
|
'url': 'http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '150615_telabyad_kentin_cogu',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': "YPG: Tel Abyad'ın tamamı kontrolümüzde",
|
||||||
|
'duration': 47,
|
||||||
|
'timestamp': 1434397334,
|
||||||
|
'upload_date': '20150615',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
# single video embedded with mediaAssetPage.init() (regional section)
|
||||||
|
'url': 'http://www.bbc.com/mundo/video_fotos/2015/06/150619_video_honduras_militares_hospitales_corrupcion_aw',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '150619_video_honduras_militares_hospitales_corrupcion_aw',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Honduras militariza sus hospitales por nuevo escándalo de corrupción',
|
||||||
|
'duration': 87,
|
||||||
|
'timestamp': 1434713142,
|
||||||
|
'upload_date': '20150619',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
# single video story with digitalData
|
||||||
|
'url': 'http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'p02q6gc4',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Sri Lanka’s spicy secret',
|
||||||
|
'description': 'As a new train line to Jaffna opens up the country’s north, travellers can experience a truly distinct slice of Tamil culture.',
|
||||||
|
'timestamp': 1437674293,
|
||||||
|
'upload_date': '20150723',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
# single video story without digitalData
|
||||||
|
'url': 'http://www.bbc.com/autos/story/20130513-hyundais-rock-star',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'p018zqqg',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Hyundai Santa Fe Sport: Rock star',
|
||||||
|
'description': 'md5:b042a26142c4154a6e472933cf20793d',
|
||||||
|
'timestamp': 1368473503,
|
||||||
|
'upload_date': '20130513',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
# single video with playlist.sxml URL
|
||||||
|
'url': 'http://www.bbc.com/sport/0/football/33653409',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'p02xycnp',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Transfers: Cristiano Ronaldo to Man Utd, Arsenal to spend?',
|
||||||
|
'description': 'md5:398fca0e2e701c609d726e034fa1fc89',
|
||||||
|
'duration': 140,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
# single video with playlist URL from weather section
|
||||||
|
'url': 'http://www.bbc.com/weather/features/33601775',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# custom redirection to www.bbc.com
|
||||||
|
'url': 'http://www.bbc.co.uk/news/science-environment-33661876',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def suitable(cls, url):
|
||||||
|
return False if BBCCoUkIE.suitable(url) else super(BBCIE, cls).suitable(url)
|
||||||
|
|
||||||
|
def _extract_from_media_meta(self, media_meta, video_id):
|
||||||
|
# Direct links to media in media metadata (e.g.
|
||||||
|
# http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu)
|
||||||
|
# TODO: there are also f4m and m3u8 streams incorporated in playlist.sxml
|
||||||
|
source_files = media_meta.get('sourceFiles')
|
||||||
|
if source_files:
|
||||||
|
return [{
|
||||||
|
'url': f['url'],
|
||||||
|
'format_id': format_id,
|
||||||
|
'ext': f.get('encoding'),
|
||||||
|
'tbr': float_or_none(f.get('bitrate'), 1000),
|
||||||
|
'filesize': int_or_none(f.get('filesize')),
|
||||||
|
} for format_id, f in source_files.items() if f.get('url')], []
|
||||||
|
|
||||||
|
programme_id = media_meta.get('externalId')
|
||||||
|
if programme_id:
|
||||||
|
return self._download_media_selector(programme_id)
|
||||||
|
|
||||||
|
# Process playlist.sxml as legacy playlist
|
||||||
|
href = media_meta.get('href')
|
||||||
|
if href:
|
||||||
|
playlist = self._download_legacy_playlist_url(href)
|
||||||
|
_, _, _, _, formats, subtitles = self._extract_from_legacy_playlist(playlist, video_id)
|
||||||
|
return formats, subtitles
|
||||||
|
|
||||||
|
return [], []
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
playlist_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, playlist_id)
|
||||||
|
|
||||||
|
timestamp = parse_iso8601(self._search_regex(
|
||||||
|
[r'"datePublished":\s*"([^"]+)',
|
||||||
|
r'<meta[^>]+property="article:published_time"[^>]+content="([^"]+)"',
|
||||||
|
r'itemprop="datePublished"[^>]+datetime="([^"]+)"'],
|
||||||
|
webpage, 'date', default=None))
|
||||||
|
|
||||||
|
# single video with playlist.sxml URL (e.g. http://www.bbc.com/sport/0/football/3365340ng)
|
||||||
|
playlist = self._search_regex(
|
||||||
|
r'<param[^>]+name="playlist"[^>]+value="([^"]+)"',
|
||||||
|
webpage, 'playlist', default=None)
|
||||||
|
if playlist:
|
||||||
|
programme_id, title, description, duration, formats, subtitles = \
|
||||||
|
self._process_legacy_playlist_url(playlist, playlist_id)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
return {
|
||||||
|
'id': programme_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'duration': duration,
|
||||||
|
'timestamp': timestamp,
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
}
|
||||||
|
|
||||||
|
# single video story (e.g. http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret)
|
||||||
|
programme_id = self._search_regex(
|
||||||
|
[r'data-video-player-vpid="([\da-z]{8})"',
|
||||||
|
r'<param[^>]+name="externalIdentifier"[^>]+value="([\da-z]{8})"'],
|
||||||
|
webpage, 'vpid', default=None)
|
||||||
|
if programme_id:
|
||||||
|
formats, subtitles = self._download_media_selector(programme_id)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
# digitalData may be missing (e.g. http://www.bbc.com/autos/story/20130513-hyundais-rock-star)
|
||||||
|
digital_data = self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'var\s+digitalData\s*=\s*({.+?});?\n', webpage, 'digital data', default='{}'),
|
||||||
|
programme_id, fatal=False)
|
||||||
|
page_info = digital_data.get('page', {}).get('pageInfo', {})
|
||||||
|
title = page_info.get('pageName') or self._og_search_title(webpage)
|
||||||
|
description = page_info.get('description') or self._og_search_description(webpage)
|
||||||
|
timestamp = parse_iso8601(page_info.get('publicationDate')) or timestamp
|
||||||
|
return {
|
||||||
|
'id': programme_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'timestamp': timestamp,
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
}
|
||||||
|
|
||||||
|
playlist_title = self._html_search_regex(
|
||||||
|
r'<title>(.*?)(?:\s*-\s*BBC [^ ]+)?</title>', webpage, 'playlist title')
|
||||||
|
playlist_description = self._og_search_description(webpage, default=None)
|
||||||
|
|
||||||
|
def extract_all(pattern):
|
||||||
|
return list(filter(None, map(
|
||||||
|
lambda s: self._parse_json(s, playlist_id, fatal=False),
|
||||||
|
re.findall(pattern, webpage))))
|
||||||
|
|
||||||
|
# Multiple video article (e.g.
|
||||||
|
# http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460)
|
||||||
|
EMBED_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:[^/]+/)+[\da-z]{8}(?:\b[^"]+)?'
|
||||||
|
entries = []
|
||||||
|
for match in extract_all(r'new\s+SMP\(({.+?})\)'):
|
||||||
|
embed_url = match.get('playerSettings', {}).get('externalEmbedUrl')
|
||||||
|
if embed_url and re.match(EMBED_URL, embed_url):
|
||||||
|
entries.append(embed_url)
|
||||||
|
entries.extend(re.findall(
|
||||||
|
r'setPlaylist\("(%s)"\)' % EMBED_URL, webpage))
|
||||||
|
if entries:
|
||||||
|
return self.playlist_result(
|
||||||
|
[self.url_result(entry, 'BBCCoUk') for entry in entries],
|
||||||
|
playlist_id, playlist_title, playlist_description)
|
||||||
|
|
||||||
|
# Multiple video article (e.g. http://www.bbc.com/news/world-europe-32668511)
|
||||||
|
medias = extract_all(r"data-media-meta='({[^']+})'")
|
||||||
|
|
||||||
|
if not medias:
|
||||||
|
# Single video article (e.g. http://www.bbc.com/news/video_and_audio/international)
|
||||||
|
media_asset_page = self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'mediaAssetPage\.init\(\s*({.+?}), "/', webpage, 'media asset'),
|
||||||
|
playlist_id)
|
||||||
|
medias = []
|
||||||
|
for video in media_asset_page.get('videos', {}).values():
|
||||||
|
medias.extend(video.values())
|
||||||
|
|
||||||
|
entries = []
|
||||||
|
for num, media_meta in enumerate(medias, start=1):
|
||||||
|
formats, subtitles = self._extract_from_media_meta(media_meta, playlist_id)
|
||||||
|
if not formats:
|
||||||
|
continue
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
video_id = media_meta.get('externalId')
|
||||||
|
if not video_id:
|
||||||
|
video_id = playlist_id if len(medias) == 1 else '%s-%s' % (playlist_id, num)
|
||||||
|
|
||||||
|
title = media_meta.get('caption')
|
||||||
|
if not title:
|
||||||
|
title = playlist_title if len(medias) == 1 else '%s - Video %s' % (playlist_title, num)
|
||||||
|
|
||||||
|
duration = int_or_none(media_meta.get('durationInSeconds')) or parse_duration(media_meta.get('duration'))
|
||||||
|
|
||||||
|
images = []
|
||||||
|
for image in media_meta.get('images', {}).values():
|
||||||
|
images.extend(image.values())
|
||||||
|
if 'image' in media_meta:
|
||||||
|
images.append(media_meta['image'])
|
||||||
|
|
||||||
|
thumbnails = [{
|
||||||
|
'url': image.get('href'),
|
||||||
|
'width': int_or_none(image.get('width')),
|
||||||
|
'height': int_or_none(image.get('height')),
|
||||||
|
} for image in images]
|
||||||
|
|
||||||
|
entries.append({
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'thumbnails': thumbnails,
|
||||||
|
'duration': duration,
|
||||||
|
'timestamp': timestamp,
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
})
|
||||||
|
|
||||||
|
return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
|
||||||
@@ -1,380 +0,0 @@
|
|||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
import xml.etree.ElementTree
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from ..utils import (
|
|
||||||
ExtractorError,
|
|
||||||
int_or_none,
|
|
||||||
)
|
|
||||||
from ..compat import compat_HTTPError
|
|
||||||
|
|
||||||
|
|
||||||
class BBCCoUkIE(InfoExtractor):
|
|
||||||
IE_NAME = 'bbc.co.uk'
|
|
||||||
IE_DESC = 'BBC iPlayer'
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:(?:(?:programmes|iplayer(?:/[^/]+)?/(?:episode|playlist))/)|music/clips[/#])(?P<id>[\da-z]{8})'
|
|
||||||
|
|
||||||
_TESTS = [
|
|
||||||
{
|
|
||||||
'url': 'http://www.bbc.co.uk/programmes/b039g8p7',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'b039d07m',
|
|
||||||
'ext': 'flv',
|
|
||||||
'title': 'Kaleidoscope, Leonard Cohen',
|
|
||||||
'description': 'The Canadian poet and songwriter reflects on his musical career.',
|
|
||||||
'duration': 1740,
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
# rtmp download
|
|
||||||
'skip_download': True,
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
'url': 'http://www.bbc.co.uk/iplayer/episode/b00yng5w/The_Man_in_Black_Series_3_The_Printed_Name/',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'b00yng1d',
|
|
||||||
'ext': 'flv',
|
|
||||||
'title': 'The Man in Black: Series 3: The Printed Name',
|
|
||||||
'description': "Mark Gatiss introduces Nicholas Pierpan's chilling tale of a writer's devilish pact with a mysterious man. Stars Ewan Bailey.",
|
|
||||||
'duration': 1800,
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
# rtmp download
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
'skip': 'Episode is no longer available on BBC iPlayer Radio',
|
|
||||||
},
|
|
||||||
{
|
|
||||||
'url': 'http://www.bbc.co.uk/iplayer/episode/b03vhd1f/The_Voice_UK_Series_3_Blind_Auditions_5/',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'b00yng1d',
|
|
||||||
'ext': 'flv',
|
|
||||||
'title': 'The Voice UK: Series 3: Blind Auditions 5',
|
|
||||||
'description': "Emma Willis and Marvin Humes present the fifth set of blind auditions in the singing competition, as the coaches continue to build their teams based on voice alone.",
|
|
||||||
'duration': 5100,
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
# rtmp download
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
|
|
||||||
},
|
|
||||||
{
|
|
||||||
'url': 'http://www.bbc.co.uk/iplayer/episode/p026c7jt/tomorrows-worlds-the-unearthly-history-of-science-fiction-2-invasion',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'b03k3pb7',
|
|
||||||
'ext': 'flv',
|
|
||||||
'title': "Tomorrow's Worlds: The Unearthly History of Science Fiction",
|
|
||||||
'description': '2. Invasion',
|
|
||||||
'duration': 3600,
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
# rtmp download
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
|
|
||||||
}, {
|
|
||||||
'url': 'http://www.bbc.co.uk/programmes/b04v20dw',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'b04v209v',
|
|
||||||
'ext': 'flv',
|
|
||||||
'title': 'Pete Tong, The Essential New Tune Special',
|
|
||||||
'description': "Pete has a very special mix - all of 2014's Essential New Tunes!",
|
|
||||||
'duration': 10800,
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
# rtmp download
|
|
||||||
'skip_download': True,
|
|
||||||
}
|
|
||||||
}, {
|
|
||||||
'url': 'http://www.bbc.co.uk/music/clips/p02frcc3',
|
|
||||||
'note': 'Audio',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'p02frcch',
|
|
||||||
'ext': 'flv',
|
|
||||||
'title': 'Pete Tong, Past, Present and Future Special, Madeon - After Hours mix',
|
|
||||||
'description': 'French house superstar Madeon takes us out of the club and onto the after party.',
|
|
||||||
'duration': 3507,
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
# rtmp download
|
|
||||||
'skip_download': True,
|
|
||||||
}
|
|
||||||
}, {
|
|
||||||
'url': 'http://www.bbc.co.uk/music/clips/p025c0zz',
|
|
||||||
'note': 'Video',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'p025c103',
|
|
||||||
'ext': 'flv',
|
|
||||||
'title': 'Reading and Leeds Festival, 2014, Rae Morris - Closer (Live on BBC Three)',
|
|
||||||
'description': 'Rae Morris performs Closer for BBC Three at Reading 2014',
|
|
||||||
'duration': 226,
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
# rtmp download
|
|
||||||
'skip_download': True,
|
|
||||||
}
|
|
||||||
}, {
|
|
||||||
'url': 'http://www.bbc.co.uk/iplayer/episode/b054fn09/ad/natural-world-20152016-2-super-powered-owls',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'p02n76xf',
|
|
||||||
'ext': 'flv',
|
|
||||||
'title': 'Natural World, 2015-2016: 2. Super Powered Owls',
|
|
||||||
'description': 'md5:e4db5c937d0e95a7c6b5e654d429183d',
|
|
||||||
'duration': 3540,
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
# rtmp download
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
'skip': 'geolocation',
|
|
||||||
}, {
|
|
||||||
'url': 'http://www.bbc.co.uk/iplayer/playlist/p01dvks4',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'http://www.bbc.co.uk/music/clips#p02frcc3',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'http://www.bbc.co.uk/iplayer/cbeebies/episode/b0480276/bing-14-atchoo',
|
|
||||||
'only_matching': True,
|
|
||||||
}
|
|
||||||
]
|
|
||||||
|
|
||||||
def _extract_asx_playlist(self, connection, programme_id):
|
|
||||||
asx = self._download_xml(connection.get('href'), programme_id, 'Downloading ASX playlist')
|
|
||||||
return [ref.get('href') for ref in asx.findall('./Entry/ref')]
|
|
||||||
|
|
||||||
def _extract_connection(self, connection, programme_id):
|
|
||||||
formats = []
|
|
||||||
protocol = connection.get('protocol')
|
|
||||||
supplier = connection.get('supplier')
|
|
||||||
if protocol == 'http':
|
|
||||||
href = connection.get('href')
|
|
||||||
# ASX playlist
|
|
||||||
if supplier == 'asx':
|
|
||||||
for i, ref in enumerate(self._extract_asx_playlist(connection, programme_id)):
|
|
||||||
formats.append({
|
|
||||||
'url': ref,
|
|
||||||
'format_id': 'ref%s_%s' % (i, supplier),
|
|
||||||
})
|
|
||||||
# Direct link
|
|
||||||
else:
|
|
||||||
formats.append({
|
|
||||||
'url': href,
|
|
||||||
'format_id': supplier,
|
|
||||||
})
|
|
||||||
elif protocol == 'rtmp':
|
|
||||||
application = connection.get('application', 'ondemand')
|
|
||||||
auth_string = connection.get('authString')
|
|
||||||
identifier = connection.get('identifier')
|
|
||||||
server = connection.get('server')
|
|
||||||
formats.append({
|
|
||||||
'url': '%s://%s/%s?%s' % (protocol, server, application, auth_string),
|
|
||||||
'play_path': identifier,
|
|
||||||
'app': '%s?%s' % (application, auth_string),
|
|
||||||
'page_url': 'http://www.bbc.co.uk',
|
|
||||||
'player_url': 'http://www.bbc.co.uk/emp/releases/iplayer/revisions/617463_618125_4/617463_618125_4_emp.swf',
|
|
||||||
'rtmp_live': False,
|
|
||||||
'ext': 'flv',
|
|
||||||
'format_id': supplier,
|
|
||||||
})
|
|
||||||
return formats
|
|
||||||
|
|
||||||
def _extract_items(self, playlist):
|
|
||||||
return playlist.findall('./{http://bbc.co.uk/2008/emp/playlist}item')
|
|
||||||
|
|
||||||
def _extract_medias(self, media_selection):
|
|
||||||
error = media_selection.find('./{http://bbc.co.uk/2008/mp/mediaselection}error')
|
|
||||||
if error is not None:
|
|
||||||
raise ExtractorError(
|
|
||||||
'%s returned error: %s' % (self.IE_NAME, error.get('id')), expected=True)
|
|
||||||
return media_selection.findall('./{http://bbc.co.uk/2008/mp/mediaselection}media')
|
|
||||||
|
|
||||||
def _extract_connections(self, media):
|
|
||||||
return media.findall('./{http://bbc.co.uk/2008/mp/mediaselection}connection')
|
|
||||||
|
|
||||||
def _extract_video(self, media, programme_id):
|
|
||||||
formats = []
|
|
||||||
vbr = int(media.get('bitrate'))
|
|
||||||
vcodec = media.get('encoding')
|
|
||||||
service = media.get('service')
|
|
||||||
width = int(media.get('width'))
|
|
||||||
height = int(media.get('height'))
|
|
||||||
file_size = int(media.get('media_file_size'))
|
|
||||||
for connection in self._extract_connections(media):
|
|
||||||
conn_formats = self._extract_connection(connection, programme_id)
|
|
||||||
for format in conn_formats:
|
|
||||||
format.update({
|
|
||||||
'format_id': '%s_%s' % (service, format['format_id']),
|
|
||||||
'width': width,
|
|
||||||
'height': height,
|
|
||||||
'vbr': vbr,
|
|
||||||
'vcodec': vcodec,
|
|
||||||
'filesize': file_size,
|
|
||||||
})
|
|
||||||
formats.extend(conn_formats)
|
|
||||||
return formats
|
|
||||||
|
|
||||||
def _extract_audio(self, media, programme_id):
|
|
||||||
formats = []
|
|
||||||
abr = int(media.get('bitrate'))
|
|
||||||
acodec = media.get('encoding')
|
|
||||||
service = media.get('service')
|
|
||||||
for connection in self._extract_connections(media):
|
|
||||||
conn_formats = self._extract_connection(connection, programme_id)
|
|
||||||
for format in conn_formats:
|
|
||||||
format.update({
|
|
||||||
'format_id': '%s_%s' % (service, format['format_id']),
|
|
||||||
'abr': abr,
|
|
||||||
'acodec': acodec,
|
|
||||||
})
|
|
||||||
formats.extend(conn_formats)
|
|
||||||
return formats
|
|
||||||
|
|
||||||
def _get_subtitles(self, media, programme_id):
|
|
||||||
subtitles = {}
|
|
||||||
for connection in self._extract_connections(media):
|
|
||||||
captions = self._download_xml(connection.get('href'), programme_id, 'Downloading captions')
|
|
||||||
lang = captions.get('{http://www.w3.org/XML/1998/namespace}lang', 'en')
|
|
||||||
ps = captions.findall('./{0}body/{0}div/{0}p'.format('{http://www.w3.org/2006/10/ttaf1}'))
|
|
||||||
srt = ''
|
|
||||||
|
|
||||||
def _extract_text(p):
|
|
||||||
if p.text is not None:
|
|
||||||
stripped_text = p.text.strip()
|
|
||||||
if stripped_text:
|
|
||||||
return stripped_text
|
|
||||||
return ' '.join(span.text.strip() for span in p.findall('{http://www.w3.org/2006/10/ttaf1}span'))
|
|
||||||
for pos, p in enumerate(ps):
|
|
||||||
srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (str(pos), p.get('begin'), p.get('end'), _extract_text(p))
|
|
||||||
subtitles[lang] = [
|
|
||||||
{
|
|
||||||
'url': connection.get('href'),
|
|
||||||
'ext': 'ttml',
|
|
||||||
},
|
|
||||||
{
|
|
||||||
'data': srt,
|
|
||||||
'ext': 'srt',
|
|
||||||
},
|
|
||||||
]
|
|
||||||
return subtitles
|
|
||||||
|
|
||||||
def _download_media_selector(self, programme_id):
|
|
||||||
try:
|
|
||||||
media_selection = self._download_xml(
|
|
||||||
'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s' % programme_id,
|
|
||||||
programme_id, 'Downloading media selection XML')
|
|
||||||
except ExtractorError as ee:
|
|
||||||
if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403:
|
|
||||||
media_selection = xml.etree.ElementTree.fromstring(ee.cause.read().encode('utf-8'))
|
|
||||||
else:
|
|
||||||
raise
|
|
||||||
|
|
||||||
formats = []
|
|
||||||
subtitles = None
|
|
||||||
|
|
||||||
for media in self._extract_medias(media_selection):
|
|
||||||
kind = media.get('kind')
|
|
||||||
if kind == 'audio':
|
|
||||||
formats.extend(self._extract_audio(media, programme_id))
|
|
||||||
elif kind == 'video':
|
|
||||||
formats.extend(self._extract_video(media, programme_id))
|
|
||||||
elif kind == 'captions':
|
|
||||||
subtitles = self.extract_subtitles(media, programme_id)
|
|
||||||
|
|
||||||
return formats, subtitles
|
|
||||||
|
|
||||||
def _download_playlist(self, playlist_id):
|
|
||||||
try:
|
|
||||||
playlist = self._download_json(
|
|
||||||
'http://www.bbc.co.uk/programmes/%s/playlist.json' % playlist_id,
|
|
||||||
playlist_id, 'Downloading playlist JSON')
|
|
||||||
|
|
||||||
version = playlist.get('defaultAvailableVersion')
|
|
||||||
if version:
|
|
||||||
smp_config = version['smpConfig']
|
|
||||||
title = smp_config['title']
|
|
||||||
description = smp_config['summary']
|
|
||||||
for item in smp_config['items']:
|
|
||||||
kind = item['kind']
|
|
||||||
if kind != 'programme' and kind != 'radioProgramme':
|
|
||||||
continue
|
|
||||||
programme_id = item.get('vpid')
|
|
||||||
duration = int(item.get('duration'))
|
|
||||||
formats, subtitles = self._download_media_selector(programme_id)
|
|
||||||
return programme_id, title, description, duration, formats, subtitles
|
|
||||||
except ExtractorError as ee:
|
|
||||||
if not (isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404):
|
|
||||||
raise
|
|
||||||
|
|
||||||
# fallback to legacy playlist
|
|
||||||
playlist = self._download_xml(
|
|
||||||
'http://www.bbc.co.uk/iplayer/playlist/%s' % playlist_id,
|
|
||||||
playlist_id, 'Downloading legacy playlist XML')
|
|
||||||
|
|
||||||
no_items = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}noItems')
|
|
||||||
if no_items is not None:
|
|
||||||
reason = no_items.get('reason')
|
|
||||||
if reason == 'preAvailability':
|
|
||||||
msg = 'Episode %s is not yet available' % playlist_id
|
|
||||||
elif reason == 'postAvailability':
|
|
||||||
msg = 'Episode %s is no longer available' % playlist_id
|
|
||||||
elif reason == 'noMedia':
|
|
||||||
msg = 'Episode %s is not currently available' % playlist_id
|
|
||||||
else:
|
|
||||||
msg = 'Episode %s is not available: %s' % (playlist_id, reason)
|
|
||||||
raise ExtractorError(msg, expected=True)
|
|
||||||
|
|
||||||
for item in self._extract_items(playlist):
|
|
||||||
kind = item.get('kind')
|
|
||||||
if kind != 'programme' and kind != 'radioProgramme':
|
|
||||||
continue
|
|
||||||
title = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}title').text
|
|
||||||
description = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}summary').text
|
|
||||||
programme_id = item.get('identifier')
|
|
||||||
duration = int(item.get('duration'))
|
|
||||||
formats, subtitles = self._download_media_selector(programme_id)
|
|
||||||
|
|
||||||
return programme_id, title, description, duration, formats, subtitles
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
group_id = self._match_id(url)
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, group_id, 'Downloading video page')
|
|
||||||
|
|
||||||
programme_id = None
|
|
||||||
|
|
||||||
tviplayer = self._search_regex(
|
|
||||||
r'mediator\.bind\(({.+?})\s*,\s*document\.getElementById',
|
|
||||||
webpage, 'player', default=None)
|
|
||||||
|
|
||||||
if tviplayer:
|
|
||||||
player = self._parse_json(tviplayer, group_id).get('player', {})
|
|
||||||
duration = int_or_none(player.get('duration'))
|
|
||||||
programme_id = player.get('vpid')
|
|
||||||
|
|
||||||
if not programme_id:
|
|
||||||
programme_id = self._search_regex(
|
|
||||||
r'"vpid"\s*:\s*"([\da-z]{8})"', webpage, 'vpid', fatal=False, default=None)
|
|
||||||
|
|
||||||
if programme_id:
|
|
||||||
formats, subtitles = self._download_media_selector(programme_id)
|
|
||||||
title = self._og_search_title(webpage)
|
|
||||||
description = self._search_regex(
|
|
||||||
r'<p class="medium-description">([^<]+)</p>',
|
|
||||||
webpage, 'description', fatal=False)
|
|
||||||
else:
|
|
||||||
programme_id, title, description, duration, formats, subtitles = self._download_playlist(group_id)
|
|
||||||
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': programme_id,
|
|
||||||
'title': title,
|
|
||||||
'description': description,
|
|
||||||
'thumbnail': self._og_search_thumbnail(webpage, default=None),
|
|
||||||
'duration': duration,
|
|
||||||
'formats': formats,
|
|
||||||
'subtitles': subtitles,
|
|
||||||
}
|
|
||||||
@@ -1,7 +1,7 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_urllib_parse
|
from ..compat import compat_urllib_parse_unquote
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
xpath_text,
|
xpath_text,
|
||||||
xpath_with_ns,
|
xpath_with_ns,
|
||||||
@@ -57,7 +57,7 @@ class BetIE(InfoExtractor):
|
|||||||
display_id = self._match_id(url)
|
display_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
media_url = compat_urllib_parse.unquote(self._search_regex(
|
media_url = compat_urllib_parse_unquote(self._search_regex(
|
||||||
[r'mediaURL\s*:\s*"([^"]+)"', r"var\s+mrssMediaUrl\s*=\s*'([^']+)'"],
|
[r'mediaURL\s*:\s*"([^"]+)"', r"var\s+mrssMediaUrl\s*=\s*'([^']+)'"],
|
||||||
webpage, 'media URL'))
|
webpage, 'media URL'))
|
||||||
|
|
||||||
|
|||||||
@@ -3,6 +3,8 @@ from __future__ import unicode_literals
|
|||||||
|
|
||||||
import re
|
import re
|
||||||
import itertools
|
import itertools
|
||||||
|
import json
|
||||||
|
import xml.etree.ElementTree as ET
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@@ -39,8 +41,15 @@ class BiliBiliIE(InfoExtractor):
|
|||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
if self._search_regex(r'(此视频不存在或被删除)', webpage, 'error message', default=None):
|
if '(此视频不存在或被删除)' in webpage:
|
||||||
raise ExtractorError('The video does not exist or was deleted', expected=True)
|
raise ExtractorError(
|
||||||
|
'The video does not exist or was deleted', expected=True)
|
||||||
|
|
||||||
|
if '>你没有权限浏览! 由于版权相关问题 我们不对您所在的地区提供服务<' in webpage:
|
||||||
|
raise ExtractorError(
|
||||||
|
'The video is not available in your region due to copyright reasons',
|
||||||
|
expected=True)
|
||||||
|
|
||||||
video_code = self._search_regex(
|
video_code = self._search_regex(
|
||||||
r'(?s)<div itemprop="video".*?>(.*?)</div>', webpage, 'video code')
|
r'(?s)<div itemprop="video".*?>(.*?)</div>', webpage, 'video code')
|
||||||
|
|
||||||
@@ -67,11 +76,19 @@ class BiliBiliIE(InfoExtractor):
|
|||||||
|
|
||||||
entries = []
|
entries = []
|
||||||
|
|
||||||
lq_doc = self._download_xml(
|
lq_page = self._download_webpage(
|
||||||
'http://interface.bilibili.com/v_cdn_play?appkey=1&cid=%s' % cid,
|
'http://interface.bilibili.com/v_cdn_play?appkey=1&cid=%s' % cid,
|
||||||
video_id,
|
video_id,
|
||||||
note='Downloading LQ video info'
|
note='Downloading LQ video info'
|
||||||
)
|
)
|
||||||
|
try:
|
||||||
|
err_info = json.loads(lq_page)
|
||||||
|
raise ExtractorError(
|
||||||
|
'BiliBili said: ' + err_info['error_text'], expected=True)
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
lq_doc = ET.fromstring(lq_page)
|
||||||
lq_durls = lq_doc.findall('./durl')
|
lq_durls = lq_doc.findall('./durl')
|
||||||
|
|
||||||
hq_doc = self._download_xml(
|
hq_doc = self._download_xml(
|
||||||
@@ -80,9 +97,11 @@ class BiliBiliIE(InfoExtractor):
|
|||||||
note='Downloading HQ video info',
|
note='Downloading HQ video info',
|
||||||
fatal=False,
|
fatal=False,
|
||||||
)
|
)
|
||||||
hq_durls = hq_doc.findall('./durl') if hq_doc is not False else itertools.repeat(None)
|
if hq_doc is not False:
|
||||||
|
hq_durls = hq_doc.findall('./durl')
|
||||||
assert len(lq_durls) == len(hq_durls)
|
assert len(lq_durls) == len(hq_durls)
|
||||||
|
else:
|
||||||
|
hq_durls = itertools.repeat(None)
|
||||||
|
|
||||||
i = 1
|
i = 1
|
||||||
for lq_durl, hq_durl in zip(lq_durls, hq_durls):
|
for lq_durl, hq_durl in zip(lq_durls, hq_durls):
|
||||||
@@ -93,7 +112,7 @@ class BiliBiliIE(InfoExtractor):
|
|||||||
'filesize': int_or_none(
|
'filesize': int_or_none(
|
||||||
lq_durl.find('./size'), get_attr='text'),
|
lq_durl.find('./size'), get_attr='text'),
|
||||||
}]
|
}]
|
||||||
if hq_durl:
|
if hq_durl is not None:
|
||||||
formats.append({
|
formats.append({
|
||||||
'format_id': 'hq',
|
'format_id': 'hq',
|
||||||
'quality': 2,
|
'quality': 2,
|
||||||
|
|||||||
@@ -5,7 +5,6 @@ import re
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_str,
|
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
)
|
)
|
||||||
@@ -14,6 +13,8 @@ from ..utils import (
|
|||||||
int_or_none,
|
int_or_none,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
|
xpath_text,
|
||||||
|
xpath_with_ns,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -23,10 +24,10 @@ class BlipTVIE(InfoExtractor):
|
|||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'http://blip.tv/cbr/cbr-exclusive-gotham-city-imposters-bats-vs-jokerz-short-3-5796352',
|
'url': 'http://blip.tv/cbr/cbr-exclusive-gotham-city-imposters-bats-vs-jokerz-short-3-5796352',
|
||||||
'md5': 'c6934ad0b6acf2bd920720ec888eb812',
|
'md5': '80baf1ec5c3d2019037c1c707d676b9f',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '5779306',
|
'id': '5779306',
|
||||||
'ext': 'mov',
|
'ext': 'm4v',
|
||||||
'title': 'CBR EXCLUSIVE: "Gotham City Imposters" Bats VS Jokerz Short 3',
|
'title': 'CBR EXCLUSIVE: "Gotham City Imposters" Bats VS Jokerz Short 3',
|
||||||
'description': 'md5:9bc31f227219cde65e47eeec8d2dc596',
|
'description': 'md5:9bc31f227219cde65e47eeec8d2dc596',
|
||||||
'timestamp': 1323138843,
|
'timestamp': 1323138843,
|
||||||
@@ -100,6 +101,20 @@ class BlipTVIE(InfoExtractor):
|
|||||||
'vcodec': 'none',
|
'vcodec': 'none',
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
# missing duration
|
||||||
|
'url': 'http://blip.tv/rss/flash/6700880',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '6684191',
|
||||||
|
'ext': 'm4v',
|
||||||
|
'title': 'Cowboy Bebop: Gateway Shuffle Review',
|
||||||
|
'description': 'md5:3acc480c0f9ae157f5fe88547ecaf3f8',
|
||||||
|
'timestamp': 1386639757,
|
||||||
|
'upload_date': '20131210',
|
||||||
|
'uploader': 'sfdebris',
|
||||||
|
'uploader_id': '706520',
|
||||||
|
}
|
||||||
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@@ -128,35 +143,34 @@ class BlipTVIE(InfoExtractor):
|
|||||||
|
|
||||||
rss = self._download_xml('http://blip.tv/rss/flash/%s' % video_id, video_id, 'Downloading video RSS')
|
rss = self._download_xml('http://blip.tv/rss/flash/%s' % video_id, video_id, 'Downloading video RSS')
|
||||||
|
|
||||||
def blip(s):
|
def _x(p):
|
||||||
return '{http://blip.tv/dtd/blip/1.0}%s' % s
|
return xpath_with_ns(p, {
|
||||||
|
'blip': 'http://blip.tv/dtd/blip/1.0',
|
||||||
def media(s):
|
'media': 'http://search.yahoo.com/mrss/',
|
||||||
return '{http://search.yahoo.com/mrss/}%s' % s
|
'itunes': 'http://www.itunes.com/dtds/podcast-1.0.dtd',
|
||||||
|
})
|
||||||
def itunes(s):
|
|
||||||
return '{http://www.itunes.com/dtds/podcast-1.0.dtd}%s' % s
|
|
||||||
|
|
||||||
item = rss.find('channel/item')
|
item = rss.find('channel/item')
|
||||||
|
|
||||||
video_id = item.find(blip('item_id')).text
|
video_id = xpath_text(item, _x('blip:item_id'), 'video id') or lookup_id
|
||||||
title = item.find('./title').text
|
title = xpath_text(item, 'title', 'title', fatal=True)
|
||||||
description = clean_html(compat_str(item.find(blip('puredescription')).text))
|
description = clean_html(xpath_text(item, _x('blip:puredescription'), 'description'))
|
||||||
timestamp = parse_iso8601(item.find(blip('datestamp')).text)
|
timestamp = parse_iso8601(xpath_text(item, _x('blip:datestamp'), 'timestamp'))
|
||||||
uploader = item.find(blip('user')).text
|
uploader = xpath_text(item, _x('blip:user'), 'uploader')
|
||||||
uploader_id = item.find(blip('userid')).text
|
uploader_id = xpath_text(item, _x('blip:userid'), 'uploader id')
|
||||||
duration = int(item.find(blip('runtime')).text)
|
duration = int_or_none(xpath_text(item, _x('blip:runtime'), 'duration'))
|
||||||
media_thumbnail = item.find(media('thumbnail'))
|
media_thumbnail = item.find(_x('media:thumbnail'))
|
||||||
thumbnail = media_thumbnail.get('url') if media_thumbnail is not None else item.find(itunes('image')).text
|
thumbnail = (media_thumbnail.get('url') if media_thumbnail is not None
|
||||||
categories = [category.text for category in item.findall('category')]
|
else xpath_text(item, 'image', 'thumbnail'))
|
||||||
|
categories = [category.text for category in item.findall('category') if category is not None]
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
subtitles_urls = {}
|
subtitles_urls = {}
|
||||||
|
|
||||||
media_group = item.find(media('group'))
|
media_group = item.find(_x('media:group'))
|
||||||
for media_content in media_group.findall(media('content')):
|
for media_content in media_group.findall(_x('media:content')):
|
||||||
url = media_content.get('url')
|
url = media_content.get('url')
|
||||||
role = media_content.get(blip('role'))
|
role = media_content.get(_x('blip:role'))
|
||||||
msg = self._download_webpage(
|
msg = self._download_webpage(
|
||||||
url + '?showplayer=20140425131715&referrer=http://blip.tv&mask=7&skin=flashvars&view=url',
|
url + '?showplayer=20140425131715&referrer=http://blip.tv&mask=7&skin=flashvars&view=url',
|
||||||
video_id, 'Resolving URL for %s' % role)
|
video_id, 'Resolving URL for %s' % role)
|
||||||
@@ -175,8 +189,8 @@ class BlipTVIE(InfoExtractor):
|
|||||||
'url': real_url,
|
'url': real_url,
|
||||||
'format_id': role,
|
'format_id': role,
|
||||||
'format_note': media_type,
|
'format_note': media_type,
|
||||||
'vcodec': media_content.get(blip('vcodec')) or 'none',
|
'vcodec': media_content.get(_x('blip:vcodec')) or 'none',
|
||||||
'acodec': media_content.get(blip('acodec')),
|
'acodec': media_content.get(_x('blip:acodec')),
|
||||||
'filesize': media_content.get('filesize'),
|
'filesize': media_content.get('filesize'),
|
||||||
'width': int_or_none(media_content.get('width')),
|
'width': int_or_none(media_content.get('width')),
|
||||||
'height': int_or_none(media_content.get('height')),
|
'height': int_or_none(media_content.get('height')),
|
||||||
|
|||||||
@@ -13,6 +13,7 @@ from ..compat import (
|
|||||||
compat_urllib_parse_urlparse,
|
compat_urllib_parse_urlparse,
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
|
compat_xml_parse_error,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
@@ -119,7 +120,7 @@ class BrightcoveIE(InfoExtractor):
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
object_doc = xml.etree.ElementTree.fromstring(object_str.encode('utf-8'))
|
object_doc = xml.etree.ElementTree.fromstring(object_str.encode('utf-8'))
|
||||||
except xml.etree.ElementTree.ParseError:
|
except compat_xml_parse_error:
|
||||||
return
|
return
|
||||||
|
|
||||||
fv_el = find_xpath_attr(object_doc, './param', 'name', 'flashVars')
|
fv_el = find_xpath_attr(object_doc, './param', 'name', 'flashVars')
|
||||||
@@ -156,6 +157,28 @@ class BrightcoveIE(InfoExtractor):
|
|||||||
linkBase = find_param('linkBaseURL')
|
linkBase = find_param('linkBaseURL')
|
||||||
if linkBase is not None:
|
if linkBase is not None:
|
||||||
params['linkBaseURL'] = linkBase
|
params['linkBaseURL'] = linkBase
|
||||||
|
return cls._make_brightcove_url(params)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _build_brighcove_url_from_js(cls, object_js):
|
||||||
|
# The layout of JS is as follows:
|
||||||
|
# customBC.createVideo = function (width, height, playerID, playerKey, videoPlayer, VideoRandomID) {
|
||||||
|
# // build Brightcove <object /> XML
|
||||||
|
# }
|
||||||
|
m = re.search(
|
||||||
|
r'''(?x)customBC.\createVideo\(
|
||||||
|
.*? # skipping width and height
|
||||||
|
["\'](?P<playerID>\d+)["\']\s*,\s* # playerID
|
||||||
|
["\'](?P<playerKey>AQ[^"\']{48})[^"\']*["\']\s*,\s* # playerKey begins with AQ and is 50 characters
|
||||||
|
# in length, however it's appended to itself
|
||||||
|
# in places, so truncate
|
||||||
|
["\'](?P<videoID>\d+)["\'] # @videoPlayer
|
||||||
|
''', object_js)
|
||||||
|
if m:
|
||||||
|
return cls._make_brightcove_url(m.groupdict())
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _make_brightcove_url(cls, params):
|
||||||
data = compat_urllib_parse.urlencode(params)
|
data = compat_urllib_parse.urlencode(params)
|
||||||
return cls._FEDERATED_URL_TEMPLATE % data
|
return cls._FEDERATED_URL_TEMPLATE % data
|
||||||
|
|
||||||
@@ -172,7 +195,7 @@ class BrightcoveIE(InfoExtractor):
|
|||||||
"""Return a list of all Brightcove URLs from the webpage """
|
"""Return a list of all Brightcove URLs from the webpage """
|
||||||
|
|
||||||
url_m = re.search(
|
url_m = re.search(
|
||||||
r'<meta\s+property="og:video"\s+content="(https?://(?:secure|c)\.brightcove.com/[^"]+)"',
|
r'<meta\s+property=[\'"]og:video[\'"]\s+content=[\'"](https?://(?:secure|c)\.brightcove.com/[^\'"]+)[\'"]',
|
||||||
webpage)
|
webpage)
|
||||||
if url_m:
|
if url_m:
|
||||||
url = unescapeHTML(url_m.group(1))
|
url = unescapeHTML(url_m.group(1))
|
||||||
@@ -188,7 +211,12 @@ class BrightcoveIE(InfoExtractor):
|
|||||||
[^>]*?>\s*<param\s+name="movie"\s+value="https?://[^/]*brightcove\.com/
|
[^>]*?>\s*<param\s+name="movie"\s+value="https?://[^/]*brightcove\.com/
|
||||||
).+?>\s*</object>''',
|
).+?>\s*</object>''',
|
||||||
webpage)
|
webpage)
|
||||||
return list(filter(None, [cls._build_brighcove_url(m) for m in matches]))
|
if matches:
|
||||||
|
return list(filter(None, [cls._build_brighcove_url(m) for m in matches]))
|
||||||
|
|
||||||
|
return list(filter(None, [
|
||||||
|
cls._build_brighcove_url_from_js(custom_bc)
|
||||||
|
for custom_bc in re.findall(r'(customBC\.createVideo\(.+?\);)', webpage)]))
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
url, smuggled_data = unsmuggle_url(url, {})
|
url, smuggled_data = unsmuggle_url(url, {})
|
||||||
|
|||||||
@@ -106,15 +106,11 @@ class CanalplusIE(InfoExtractor):
|
|||||||
continue
|
continue
|
||||||
format_id = fmt.tag
|
format_id = fmt.tag
|
||||||
if format_id == 'HLS':
|
if format_id == 'HLS':
|
||||||
hls_formats = self._extract_m3u8_formats(format_url, video_id, 'flv')
|
formats.extend(self._extract_m3u8_formats(
|
||||||
for fmt in hls_formats:
|
format_url, video_id, 'mp4', preference=preference(format_id)))
|
||||||
fmt['preference'] = preference(format_id)
|
|
||||||
formats.extend(hls_formats)
|
|
||||||
elif format_id == 'HDS':
|
elif format_id == 'HDS':
|
||||||
hds_formats = self._extract_f4m_formats(format_url + '?hdcore=2.11.3', video_id)
|
formats.extend(self._extract_f4m_formats(
|
||||||
for fmt in hds_formats:
|
format_url + '?hdcore=2.11.3', video_id, preference=preference(format_id)))
|
||||||
fmt['preference'] = preference(format_id)
|
|
||||||
formats.extend(hds_formats)
|
|
||||||
else:
|
else:
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': format_url,
|
'url': format_url,
|
||||||
|
|||||||
@@ -4,12 +4,13 @@ from .common import InfoExtractor
|
|||||||
|
|
||||||
|
|
||||||
class CBSIE(InfoExtractor):
|
class CBSIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?cbs\.com/shows/[^/]+/(?:video|artist)/(?P<id>[^/]+)/.*'
|
_VALID_URL = r'https?://(?:www\.)?(?:cbs\.com/shows/[^/]+/(?:video|artist)|colbertlateshow\.com/(?:video|podcasts))/[^/]+/(?P<id>[^/]+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/',
|
'url': 'http://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '4JUVEwq3wUT7',
|
'id': '4JUVEwq3wUT7',
|
||||||
|
'display_id': 'connect-chat-feat-garth-brooks',
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'title': 'Connect Chat feat. Garth Brooks',
|
'title': 'Connect Chat feat. Garth Brooks',
|
||||||
'description': 'Connect with country music singer Garth Brooks, as he chats with fans on Wednesday November 27, 2013. Be sure to tune in to Garth Brooks: Live from Las Vegas, Friday November 29, at 9/8c on CBS!',
|
'description': 'Connect with country music singer Garth Brooks, as he chats with fans on Wednesday November 27, 2013. Be sure to tune in to Garth Brooks: Live from Las Vegas, Friday November 29, at 9/8c on CBS!',
|
||||||
@@ -24,6 +25,7 @@ class CBSIE(InfoExtractor):
|
|||||||
'url': 'http://www.cbs.com/shows/liveonletterman/artist/221752/st-vincent/',
|
'url': 'http://www.cbs.com/shows/liveonletterman/artist/221752/st-vincent/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'WWF_5KqY3PK1',
|
'id': 'WWF_5KqY3PK1',
|
||||||
|
'display_id': 'st-vincent',
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'title': 'Live on Letterman - St. Vincent',
|
'title': 'Live on Letterman - St. Vincent',
|
||||||
'description': 'Live On Letterman: St. Vincent in concert from New York\'s Ed Sullivan Theater on Tuesday, July 16, 2014.',
|
'description': 'Live On Letterman: St. Vincent in concert from New York\'s Ed Sullivan Theater on Tuesday, July 16, 2014.',
|
||||||
@@ -34,12 +36,23 @@ class CBSIE(InfoExtractor):
|
|||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
'_skip': 'Blocked outside the US',
|
'_skip': 'Blocked outside the US',
|
||||||
|
}, {
|
||||||
|
'url': 'http://colbertlateshow.com/video/8GmB0oY0McANFvp2aEffk9jZZZ2YyXxy/the-colbeard/',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.colbertlateshow.com/podcasts/dYSwjqPs_X1tvbV_P2FcPWRa_qT6akTC/in-the-bad-room-with-stephen/',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
display_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
real_id = self._search_regex(
|
real_id = self._search_regex(
|
||||||
r"video\.settings\.pid\s*=\s*'([^']+)';",
|
[r"video\.settings\.pid\s*=\s*'([^']+)';", r"cbsplayer\.pid\s*=\s*'([^']+)';"],
|
||||||
webpage, 'real video ID')
|
webpage, 'real video ID')
|
||||||
return self.url_result('theplatform:%s' % real_id)
|
return {
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'ie_key': 'ThePlatform',
|
||||||
|
'url': 'theplatform:%s' % real_id,
|
||||||
|
'display_id': display_id,
|
||||||
|
}
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ from .common import InfoExtractor
|
|||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
|
compat_urllib_parse_unquote,
|
||||||
compat_urllib_parse_urlparse,
|
compat_urllib_parse_urlparse,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@@ -88,7 +89,7 @@ class CeskaTelevizeIE(InfoExtractor):
|
|||||||
if playlist_url == 'error_region':
|
if playlist_url == 'error_region':
|
||||||
raise ExtractorError(NOT_AVAILABLE_STRING, expected=True)
|
raise ExtractorError(NOT_AVAILABLE_STRING, expected=True)
|
||||||
|
|
||||||
req = compat_urllib_request.Request(compat_urllib_parse.unquote(playlist_url))
|
req = compat_urllib_request.Request(compat_urllib_parse_unquote(playlist_url))
|
||||||
req.add_header('Referer', url)
|
req.add_header('Referer', url)
|
||||||
|
|
||||||
playlist = self._download_json(req, video_id)
|
playlist = self._download_json(req, video_id)
|
||||||
|
|||||||
@@ -57,7 +57,7 @@ class ChilloutzoneIE(InfoExtractor):
|
|||||||
|
|
||||||
base64_video_info = self._html_search_regex(
|
base64_video_info = self._html_search_regex(
|
||||||
r'var cozVidData = "(.+?)";', webpage, 'video data')
|
r'var cozVidData = "(.+?)";', webpage, 'video data')
|
||||||
decoded_video_info = base64.b64decode(base64_video_info).decode("utf-8")
|
decoded_video_info = base64.b64decode(base64_video_info.encode('utf-8')).decode('utf-8')
|
||||||
video_info_dict = json.loads(decoded_video_info)
|
video_info_dict = json.loads(decoded_video_info)
|
||||||
|
|
||||||
# get video information from dict
|
# get video information from dict
|
||||||
|
|||||||
@@ -60,6 +60,17 @@ class CinemassacreIE(InfoExtractor):
|
|||||||
'uploader_id': 'Cinemassacre',
|
'uploader_id': 'Cinemassacre',
|
||||||
'title': 'AVGN: McKids',
|
'title': 'AVGN: McKids',
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://cinemassacre.com/2015/05/25/mario-kart-64-nintendo-64-james-mike-mondays/',
|
||||||
|
'md5': '1376908e49572389e7b06251a53cdd08',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'Cinemassacre-555779690c440',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'description': 'Let’s Play Mario Kart 64 !! Mario Kart 64 is a classic go-kart racing game released for the Nintendo 64 (N64). Today James & Mike do 4 player Battle Mode with Kyle and Bootsy!',
|
||||||
|
'title': 'Mario Kart 64 (Nintendo 64) James & Mike Mondays',
|
||||||
|
'upload_date': '20150525',
|
||||||
|
}
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
@@ -72,7 +83,7 @@ class CinemassacreIE(InfoExtractor):
|
|||||||
|
|
||||||
playerdata_url = self._search_regex(
|
playerdata_url = self._search_regex(
|
||||||
[
|
[
|
||||||
r'src="(http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=.+?)"',
|
r'src="(http://(?:player2\.screenwavemedia\.com|player\.screenwavemedia\.com/play)/[a-zA-Z]+\.php\?[^"]*\bid=.+?)"',
|
||||||
r'<iframe[^>]+src="((?:https?:)?//(?:[^.]+\.)?youtube\.com/.+?)"',
|
r'<iframe[^>]+src="((?:https?:)?//(?:[^.]+\.)?youtube\.com/.+?)"',
|
||||||
],
|
],
|
||||||
webpage, 'player data URL', default=None)
|
webpage, 'player data URL', default=None)
|
||||||
|
|||||||
@@ -1,7 +1,5 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
find_xpath_attr,
|
find_xpath_attr,
|
||||||
@@ -10,9 +8,9 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class ClipsyndicateIE(InfoExtractor):
|
class ClipsyndicateIE(InfoExtractor):
|
||||||
_VALID_URL = r'http://www\.clipsyndicate\.com/video/play(list/\d+)?/(?P<id>\d+)'
|
_VALID_URL = r'http://(?:chic|www)\.clipsyndicate\.com/video/play(list/\d+)?/(?P<id>\d+)'
|
||||||
|
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.clipsyndicate.com/video/play/4629301/brick_briscoe',
|
'url': 'http://www.clipsyndicate.com/video/play/4629301/brick_briscoe',
|
||||||
'md5': '4d7d549451bad625e0ff3d7bd56d776c',
|
'md5': '4d7d549451bad625e0ff3d7bd56d776c',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@@ -22,11 +20,13 @@ class ClipsyndicateIE(InfoExtractor):
|
|||||||
'duration': 612,
|
'duration': 612,
|
||||||
'thumbnail': 're:^https?://.+\.jpg',
|
'thumbnail': 're:^https?://.+\.jpg',
|
||||||
},
|
},
|
||||||
}
|
}, {
|
||||||
|
'url': 'http://chic.clipsyndicate.com/video/play/5844117/shark_attack',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group('id')
|
|
||||||
js_player = self._download_webpage(
|
js_player = self._download_webpage(
|
||||||
'http://eplayer.clipsyndicate.com/embed/player.js?va_id=%s' % video_id,
|
'http://eplayer.clipsyndicate.com/embed/player.js?va_id=%s' % video_id,
|
||||||
video_id, 'Downlaoding player')
|
video_id, 'Downlaoding player')
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ from ..utils import (
|
|||||||
|
|
||||||
class CNETIE(InfoExtractor):
|
class CNETIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?cnet\.com/videos/(?P<id>[^/]+)/'
|
_VALID_URL = r'https?://(?:www\.)?cnet\.com/videos/(?P<id>[^/]+)/'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.cnet.com/videos/hands-on-with-microsofts-windows-8-1-update/',
|
'url': 'http://www.cnet.com/videos/hands-on-with-microsofts-windows-8-1-update/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '56f4ea68-bd21-4852-b08c-4de5b8354c60',
|
'id': '56f4ea68-bd21-4852-b08c-4de5b8354c60',
|
||||||
@@ -25,7 +25,20 @@ class CNETIE(InfoExtractor):
|
|||||||
'params': {
|
'params': {
|
||||||
'skip_download': 'requires rtmpdump',
|
'skip_download': 'requires rtmpdump',
|
||||||
}
|
}
|
||||||
}
|
}, {
|
||||||
|
'url': 'http://www.cnet.com/videos/whiny-pothole-tweets-at-local-government-when-hit-by-cars-tomorrow-daily-187/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '56527b93-d25d-44e3-b738-f989ce2e49ba',
|
||||||
|
'ext': 'flv',
|
||||||
|
'description': 'Khail and Ashley wonder what other civic woes can be solved by self-tweeting objects, investigate a new kind of VR camera and watch an origami robot self-assemble, walk, climb, dig and dissolve. #TDPothole',
|
||||||
|
'uploader_id': 'b163284d-6b73-44fc-b3e6-3da66c392d40',
|
||||||
|
'uploader': 'Ashley Esqueda',
|
||||||
|
'title': 'Whiny potholes tweet at local government when hit by cars (Tomorrow Daily 187)',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True, # requires rtmpdump
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id = self._match_id(url)
|
display_id = self._match_id(url)
|
||||||
@@ -42,7 +55,7 @@ class CNETIE(InfoExtractor):
|
|||||||
raise ExtractorError('Cannot find video data')
|
raise ExtractorError('Cannot find video data')
|
||||||
|
|
||||||
mpx_account = data['config']['players']['default']['mpx_account']
|
mpx_account = data['config']['players']['default']['mpx_account']
|
||||||
vid = vdata['files']['rtmp']
|
vid = vdata['files'].get('rtmp', vdata['files']['hds'])
|
||||||
tp_link = 'http://link.theplatform.com/s/%s/%s' % (mpx_account, vid)
|
tp_link = 'http://link.theplatform.com/s/%s/%s' % (mpx_account, vid)
|
||||||
|
|
||||||
video_id = vdata['id']
|
video_id = vdata['id']
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ from ..utils import (
|
|||||||
|
|
||||||
class CNNIE(InfoExtractor):
|
class CNNIE(InfoExtractor):
|
||||||
_VALID_URL = r'''(?x)https?://(?:(?:edition|www)\.)?cnn\.com/video/(?:data/.+?|\?)/
|
_VALID_URL = r'''(?x)https?://(?:(?:edition|www)\.)?cnn\.com/video/(?:data/.+?|\?)/
|
||||||
(?P<path>.+?/(?P<title>[^/]+?)(?:\.(?:[a-z]{3,5})(?:-ap)?|(?=&)))'''
|
(?P<path>.+?/(?P<title>[^/]+?)(?:\.(?:[a-z\-]+)|(?=&)))'''
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn',
|
'url': 'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn',
|
||||||
|
|||||||
@@ -36,7 +36,7 @@ class ComCarCoffIE(InfoExtractor):
|
|||||||
webpage, 'full data json'))
|
webpage, 'full data json'))
|
||||||
|
|
||||||
video_id = full_data['activeVideo']['video']
|
video_id = full_data['activeVideo']['video']
|
||||||
video_data = full_data['videos'][video_id]
|
video_data = full_data.get('videos', {}).get(video_id) or full_data['singleshots'][video_id]
|
||||||
thumbnails = [{
|
thumbnails = [{
|
||||||
'url': video_data['images']['thumb'],
|
'url': video_data['images']['thumb'],
|
||||||
}, {
|
}, {
|
||||||
|
|||||||
@@ -22,18 +22,20 @@ from ..compat import (
|
|||||||
compat_str,
|
compat_str,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
NO_DEFAULT,
|
||||||
age_restricted,
|
age_restricted,
|
||||||
bug_reports_message,
|
bug_reports_message,
|
||||||
clean_html,
|
clean_html,
|
||||||
compiled_regex_type,
|
compiled_regex_type,
|
||||||
|
determine_ext,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
fix_xml_ampersands,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
RegexNotFoundError,
|
RegexNotFoundError,
|
||||||
sanitize_filename,
|
sanitize_filename,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
)
|
)
|
||||||
_NO_DEFAULT = object()
|
|
||||||
|
|
||||||
|
|
||||||
class InfoExtractor(object):
|
class InfoExtractor(object):
|
||||||
@@ -63,7 +65,7 @@ class InfoExtractor(object):
|
|||||||
|
|
||||||
Potential fields:
|
Potential fields:
|
||||||
* url Mandatory. The URL of the video file
|
* url Mandatory. The URL of the video file
|
||||||
* ext Will be calculated from url if missing
|
* ext Will be calculated from URL if missing
|
||||||
* format A human-readable description of the format
|
* format A human-readable description of the format
|
||||||
("mp4 container with h264/opus").
|
("mp4 container with h264/opus").
|
||||||
Calculated from the format_id, width, height.
|
Calculated from the format_id, width, height.
|
||||||
@@ -153,7 +155,7 @@ class InfoExtractor(object):
|
|||||||
lower to higher preference, each element is a dictionary
|
lower to higher preference, each element is a dictionary
|
||||||
with the "ext" entry and one of:
|
with the "ext" entry and one of:
|
||||||
* "data": The subtitles file contents
|
* "data": The subtitles file contents
|
||||||
* "url": A url pointing to the subtitles file
|
* "url": A URL pointing to the subtitles file
|
||||||
automatic_captions: Like 'subtitles', used by the YoutubeIE for
|
automatic_captions: Like 'subtitles', used by the YoutubeIE for
|
||||||
automatically generated captions
|
automatically generated captions
|
||||||
duration: Length of the video in seconds, as an integer.
|
duration: Length of the video in seconds, as an integer.
|
||||||
@@ -174,13 +176,17 @@ class InfoExtractor(object):
|
|||||||
Set to "root" to indicate that this is a
|
Set to "root" to indicate that this is a
|
||||||
comment to the original video.
|
comment to the original video.
|
||||||
age_limit: Age restriction for the video, as an integer (years)
|
age_limit: Age restriction for the video, as an integer (years)
|
||||||
webpage_url: The url to the video webpage, if given to youtube-dl it
|
webpage_url: The URL to the video webpage, if given to youtube-dl it
|
||||||
should allow to get the same result again. (It will be set
|
should allow to get the same result again. (It will be set
|
||||||
by YoutubeDL if it's missing)
|
by YoutubeDL if it's missing)
|
||||||
categories: A list of categories that the video falls in, for example
|
categories: A list of categories that the video falls in, for example
|
||||||
["Sports", "Berlin"]
|
["Sports", "Berlin"]
|
||||||
is_live: True, False, or None (=unknown). Whether this video is a
|
is_live: True, False, or None (=unknown). Whether this video is a
|
||||||
live stream that goes on instead of a fixed-length video.
|
live stream that goes on instead of a fixed-length video.
|
||||||
|
start_time: Time in seconds where the reproduction should start, as
|
||||||
|
specified in the URL.
|
||||||
|
end_time: Time in seconds where the reproduction should end, as
|
||||||
|
specified in the URL.
|
||||||
|
|
||||||
Unless mentioned otherwise, the fields should be Unicode strings.
|
Unless mentioned otherwise, the fields should be Unicode strings.
|
||||||
|
|
||||||
@@ -499,7 +505,7 @@ class InfoExtractor(object):
|
|||||||
# Methods for following #608
|
# Methods for following #608
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def url_result(url, ie=None, video_id=None, video_title=None):
|
def url_result(url, ie=None, video_id=None, video_title=None):
|
||||||
"""Returns a url that points to a page that should be processed"""
|
"""Returns a URL that points to a page that should be processed"""
|
||||||
# TODO: ie should be the class used for getting the info
|
# TODO: ie should be the class used for getting the info
|
||||||
video_info = {'_type': 'url',
|
video_info = {'_type': 'url',
|
||||||
'url': url,
|
'url': url,
|
||||||
@@ -523,7 +529,7 @@ class InfoExtractor(object):
|
|||||||
video_info['description'] = playlist_description
|
video_info['description'] = playlist_description
|
||||||
return video_info
|
return video_info
|
||||||
|
|
||||||
def _search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0, group=None):
|
def _search_regex(self, pattern, string, name, default=NO_DEFAULT, fatal=True, flags=0, group=None):
|
||||||
"""
|
"""
|
||||||
Perform a regex search on the given string, using a single or a list of
|
Perform a regex search on the given string, using a single or a list of
|
||||||
patterns returning the first matching group.
|
patterns returning the first matching group.
|
||||||
@@ -549,7 +555,7 @@ class InfoExtractor(object):
|
|||||||
return next(g for g in mobj.groups() if g is not None)
|
return next(g for g in mobj.groups() if g is not None)
|
||||||
else:
|
else:
|
||||||
return mobj.group(group)
|
return mobj.group(group)
|
||||||
elif default is not _NO_DEFAULT:
|
elif default is not NO_DEFAULT:
|
||||||
return default
|
return default
|
||||||
elif fatal:
|
elif fatal:
|
||||||
raise RegexNotFoundError('Unable to extract %s' % _name)
|
raise RegexNotFoundError('Unable to extract %s' % _name)
|
||||||
@@ -557,7 +563,7 @@ class InfoExtractor(object):
|
|||||||
self._downloader.report_warning('unable to extract %s' % _name + bug_reports_message())
|
self._downloader.report_warning('unable to extract %s' % _name + bug_reports_message())
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def _html_search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0, group=None):
|
def _html_search_regex(self, pattern, string, name, default=NO_DEFAULT, fatal=True, flags=0, group=None):
|
||||||
"""
|
"""
|
||||||
Like _search_regex, but strips HTML tags and unescapes entities.
|
Like _search_regex, but strips HTML tags and unescapes entities.
|
||||||
"""
|
"""
|
||||||
@@ -633,7 +639,7 @@ class InfoExtractor(object):
|
|||||||
return unescapeHTML(escaped)
|
return unescapeHTML(escaped)
|
||||||
|
|
||||||
def _og_search_thumbnail(self, html, **kargs):
|
def _og_search_thumbnail(self, html, **kargs):
|
||||||
return self._og_search_property('image', html, 'thumbnail url', fatal=False, **kargs)
|
return self._og_search_property('image', html, 'thumbnail URL', fatal=False, **kargs)
|
||||||
|
|
||||||
def _og_search_description(self, html, **kargs):
|
def _og_search_description(self, html, **kargs):
|
||||||
return self._og_search_property('description', html, fatal=False, **kargs)
|
return self._og_search_property('description', html, fatal=False, **kargs)
|
||||||
@@ -705,6 +711,25 @@ class InfoExtractor(object):
|
|||||||
return self._html_search_meta('twitter:player', html,
|
return self._html_search_meta('twitter:player', html,
|
||||||
'twitter card player')
|
'twitter card player')
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _hidden_inputs(html):
|
||||||
|
return dict([
|
||||||
|
(input.group('name'), input.group('value')) for input in re.finditer(
|
||||||
|
r'''(?x)
|
||||||
|
<input\s+
|
||||||
|
type=(?P<q_hidden>["\'])hidden(?P=q_hidden)\s+
|
||||||
|
name=(?P<q_name>["\'])(?P<name>.+?)(?P=q_name)\s+
|
||||||
|
(?:id=(?P<q_id>["\']).+?(?P=q_id)\s+)?
|
||||||
|
value=(?P<q_value>["\'])(?P<value>.*?)(?P=q_value)
|
||||||
|
''', html)
|
||||||
|
])
|
||||||
|
|
||||||
|
def _form_hidden_inputs(self, form_id, html):
|
||||||
|
form = self._search_regex(
|
||||||
|
r'(?s)<form[^>]+?id=(["\'])%s\1[^>]*>(?P<form>.+?)</form>' % form_id,
|
||||||
|
html, '%s form' % form_id, group='form')
|
||||||
|
return self._hidden_inputs(form)
|
||||||
|
|
||||||
def _sort_formats(self, formats, field_preference=None):
|
def _sort_formats(self, formats, field_preference=None):
|
||||||
if not formats:
|
if not formats:
|
||||||
raise ExtractorError('No video formats found')
|
raise ExtractorError('No video formats found')
|
||||||
@@ -815,10 +840,14 @@ class InfoExtractor(object):
|
|||||||
self.to_screen(msg)
|
self.to_screen(msg)
|
||||||
time.sleep(timeout)
|
time.sleep(timeout)
|
||||||
|
|
||||||
def _extract_f4m_formats(self, manifest_url, video_id, preference=None, f4m_id=None):
|
def _extract_f4m_formats(self, manifest_url, video_id, preference=None, f4m_id=None,
|
||||||
|
transform_source=lambda s: fix_xml_ampersands(s).strip()):
|
||||||
manifest = self._download_xml(
|
manifest = self._download_xml(
|
||||||
manifest_url, video_id, 'Downloading f4m manifest',
|
manifest_url, video_id, 'Downloading f4m manifest',
|
||||||
'Unable to download f4m manifest')
|
'Unable to download f4m manifest',
|
||||||
|
# Some manifests may be malformed, e.g. prosiebensat1 generated manifests
|
||||||
|
# (see https://github.com/rg3/youtube-dl/issues/6215#issuecomment-121704244)
|
||||||
|
transform_source=transform_source)
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
manifest_version = '1.0'
|
manifest_version = '1.0'
|
||||||
@@ -828,8 +857,19 @@ class InfoExtractor(object):
|
|||||||
media_nodes = manifest.findall('{http://ns.adobe.com/f4m/2.0}media')
|
media_nodes = manifest.findall('{http://ns.adobe.com/f4m/2.0}media')
|
||||||
for i, media_el in enumerate(media_nodes):
|
for i, media_el in enumerate(media_nodes):
|
||||||
if manifest_version == '2.0':
|
if manifest_version == '2.0':
|
||||||
manifest_url = ('/'.join(manifest_url.split('/')[:-1]) + '/' +
|
media_url = media_el.attrib.get('href') or media_el.attrib.get('url')
|
||||||
(media_el.attrib.get('href') or media_el.attrib.get('url')))
|
if not media_url:
|
||||||
|
continue
|
||||||
|
manifest_url = (
|
||||||
|
media_url if media_url.startswith('http://') or media_url.startswith('https://')
|
||||||
|
else ('/'.join(manifest_url.split('/')[:-1]) + '/' + media_url))
|
||||||
|
# If media_url is itself a f4m manifest do the recursive extraction
|
||||||
|
# since bitrates in parent manifest (this one) and media_url manifest
|
||||||
|
# may differ leading to inability to resolve the format by requested
|
||||||
|
# bitrate in f4m downloader
|
||||||
|
if determine_ext(manifest_url) == 'f4m':
|
||||||
|
formats.extend(self._extract_f4m_formats(manifest_url, video_id, preference, f4m_id))
|
||||||
|
continue
|
||||||
tbr = int_or_none(media_el.attrib.get('bitrate'))
|
tbr = int_or_none(media_el.attrib.get('bitrate'))
|
||||||
formats.append({
|
formats.append({
|
||||||
'format_id': '-'.join(filter(None, [f4m_id, compat_str(i if tbr is None else tbr)])),
|
'format_id': '-'.join(filter(None, [f4m_id, compat_str(i if tbr is None else tbr)])),
|
||||||
@@ -846,7 +886,8 @@ class InfoExtractor(object):
|
|||||||
|
|
||||||
def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None,
|
def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None,
|
||||||
entry_protocol='m3u8', preference=None,
|
entry_protocol='m3u8', preference=None,
|
||||||
m3u8_id=None):
|
m3u8_id=None, note=None, errnote=None,
|
||||||
|
fatal=True):
|
||||||
|
|
||||||
formats = [{
|
formats = [{
|
||||||
'format_id': '-'.join(filter(None, [m3u8_id, 'meta'])),
|
'format_id': '-'.join(filter(None, [m3u8_id, 'meta'])),
|
||||||
@@ -865,8 +906,11 @@ class InfoExtractor(object):
|
|||||||
|
|
||||||
m3u8_doc = self._download_webpage(
|
m3u8_doc = self._download_webpage(
|
||||||
m3u8_url, video_id,
|
m3u8_url, video_id,
|
||||||
note='Downloading m3u8 information',
|
note=note or 'Downloading m3u8 information',
|
||||||
errnote='Failed to download m3u8 information')
|
errnote=errnote or 'Failed to download m3u8 information',
|
||||||
|
fatal=fatal)
|
||||||
|
if m3u8_doc is False:
|
||||||
|
return m3u8_doc
|
||||||
last_info = None
|
last_info = None
|
||||||
last_media = None
|
last_media = None
|
||||||
kv_rex = re.compile(
|
kv_rex = re.compile(
|
||||||
@@ -956,7 +1000,7 @@ class InfoExtractor(object):
|
|||||||
def _parse_smil_video(self, video, video_id, base, rtmp_count):
|
def _parse_smil_video(self, video, video_id, base, rtmp_count):
|
||||||
src = video.get('src')
|
src = video.get('src')
|
||||||
if not src:
|
if not src:
|
||||||
return ([], rtmp_count)
|
return [], rtmp_count
|
||||||
bitrate = int_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
|
bitrate = int_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
|
||||||
width = int_or_none(video.get('width'))
|
width = int_or_none(video.get('width'))
|
||||||
height = int_or_none(video.get('height'))
|
height = int_or_none(video.get('height'))
|
||||||
@@ -969,7 +1013,7 @@ class InfoExtractor(object):
|
|||||||
proto = 'http'
|
proto = 'http'
|
||||||
ext = video.get('ext')
|
ext = video.get('ext')
|
||||||
if proto == 'm3u8':
|
if proto == 'm3u8':
|
||||||
return (self._extract_m3u8_formats(src, video_id, ext), rtmp_count)
|
return self._extract_m3u8_formats(src, video_id, ext), rtmp_count
|
||||||
elif proto == 'rtmp':
|
elif proto == 'rtmp':
|
||||||
rtmp_count += 1
|
rtmp_count += 1
|
||||||
streamer = video.get('streamer') or base
|
streamer = video.get('streamer') or base
|
||||||
@@ -1076,7 +1120,7 @@ class InfoExtractor(object):
|
|||||||
class SearchInfoExtractor(InfoExtractor):
|
class SearchInfoExtractor(InfoExtractor):
|
||||||
"""
|
"""
|
||||||
Base class for paged search queries extractors.
|
Base class for paged search queries extractors.
|
||||||
They accept urls in the format _SEARCH_KEY(|all|[0-9]):{query}
|
They accept URLs in the format _SEARCH_KEY(|all|[0-9]):{query}
|
||||||
Instances should define _SEARCH_KEY and _MAX_RESULTS.
|
Instances should define _SEARCH_KEY and _MAX_RESULTS.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ from math import pow, sqrt, floor
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
|
compat_urllib_parse_unquote,
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@@ -27,7 +28,7 @@ from ..aes import (
|
|||||||
|
|
||||||
|
|
||||||
class CrunchyrollIE(InfoExtractor):
|
class CrunchyrollIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:[^/]*/[^/?&]*?|media/\?id=)(?P<video_id>[0-9]+))(?:[/?&]|$)'
|
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:media(?:-|/\?id=)|[^/]*/[^/?&]*?)(?P<video_id>[0-9]+))(?:[/?&]|$)'
|
||||||
_NETRC_MACHINE = 'crunchyroll'
|
_NETRC_MACHINE = 'crunchyroll'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
|
'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
|
||||||
@@ -45,6 +46,22 @@ class CrunchyrollIE(InfoExtractor):
|
|||||||
# rtmp
|
# rtmp
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.crunchyroll.com/media-589804/culture-japan-1',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '589804',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Culture Japan Episode 1 – Rebuilding Japan after the 3.11',
|
||||||
|
'description': 'md5:fe2743efedb49d279552926d0bd0cd9e',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'uploader': 'Danny Choo Network',
|
||||||
|
'upload_date': '20120213',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmp
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.crunchyroll.fr/girl-friend-beta/episode-11-goodbye-la-mode-661697',
|
'url': 'http://www.crunchyroll.fr/girl-friend-beta/episode-11-goodbye-la-mode-661697',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@@ -76,8 +93,8 @@ class CrunchyrollIE(InfoExtractor):
|
|||||||
self._login()
|
self._login()
|
||||||
|
|
||||||
def _decrypt_subtitles(self, data, iv, id):
|
def _decrypt_subtitles(self, data, iv, id):
|
||||||
data = bytes_to_intlist(data)
|
data = bytes_to_intlist(base64.b64decode(data.encode('utf-8')))
|
||||||
iv = bytes_to_intlist(iv)
|
iv = bytes_to_intlist(base64.b64decode(iv.encode('utf-8')))
|
||||||
id = int(id)
|
id = int(id)
|
||||||
|
|
||||||
def obfuscate_key_aux(count, modulo, start):
|
def obfuscate_key_aux(count, modulo, start):
|
||||||
@@ -179,6 +196,16 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
|||||||
|
|
||||||
return output
|
return output
|
||||||
|
|
||||||
|
def _extract_subtitles(self, subtitle):
|
||||||
|
sub_root = xml.etree.ElementTree.fromstring(subtitle)
|
||||||
|
return [{
|
||||||
|
'ext': 'srt',
|
||||||
|
'data': self._convert_subtitles_to_srt(sub_root),
|
||||||
|
}, {
|
||||||
|
'ext': 'ass',
|
||||||
|
'data': self._convert_subtitles_to_ass(sub_root),
|
||||||
|
}]
|
||||||
|
|
||||||
def _get_subtitles(self, video_id, webpage):
|
def _get_subtitles(self, video_id, webpage):
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
for sub_id, sub_name in re.findall(r'\?ssid=([0-9]+)" title="([^"]+)', webpage):
|
for sub_id, sub_name in re.findall(r'\?ssid=([0-9]+)" title="([^"]+)', webpage):
|
||||||
@@ -190,25 +217,11 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
|||||||
data = self._search_regex(r'<data>([^<]+)', sub_page, 'subtitle_data', fatal=False)
|
data = self._search_regex(r'<data>([^<]+)', sub_page, 'subtitle_data', fatal=False)
|
||||||
if not id or not iv or not data:
|
if not id or not iv or not data:
|
||||||
continue
|
continue
|
||||||
id = int(id)
|
|
||||||
iv = base64.b64decode(iv)
|
|
||||||
data = base64.b64decode(data)
|
|
||||||
|
|
||||||
subtitle = self._decrypt_subtitles(data, iv, id).decode('utf-8')
|
subtitle = self._decrypt_subtitles(data, iv, id).decode('utf-8')
|
||||||
lang_code = self._search_regex(r'lang_code=["\']([^"\']+)', subtitle, 'subtitle_lang_code', fatal=False)
|
lang_code = self._search_regex(r'lang_code=["\']([^"\']+)', subtitle, 'subtitle_lang_code', fatal=False)
|
||||||
if not lang_code:
|
if not lang_code:
|
||||||
continue
|
continue
|
||||||
sub_root = xml.etree.ElementTree.fromstring(subtitle)
|
subtitles[lang_code] = self._extract_subtitles(subtitle)
|
||||||
subtitles[lang_code] = [
|
|
||||||
{
|
|
||||||
'ext': 'srt',
|
|
||||||
'data': self._convert_subtitles_to_srt(sub_root),
|
|
||||||
},
|
|
||||||
{
|
|
||||||
'ext': 'ass',
|
|
||||||
'data': self._convert_subtitles_to_ass(sub_root),
|
|
||||||
},
|
|
||||||
]
|
|
||||||
return subtitles
|
return subtitles
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@@ -242,7 +255,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
|||||||
video_upload_date = unified_strdate(video_upload_date)
|
video_upload_date = unified_strdate(video_upload_date)
|
||||||
video_uploader = self._html_search_regex(r'<div>\s*Publisher:(.+?)</div>', webpage, 'video_uploader', fatal=False, flags=re.DOTALL)
|
video_uploader = self._html_search_regex(r'<div>\s*Publisher:(.+?)</div>', webpage, 'video_uploader', fatal=False, flags=re.DOTALL)
|
||||||
|
|
||||||
playerdata_url = compat_urllib_parse.unquote(self._html_search_regex(r'"config_url":"([^"]+)', webpage, 'playerdata_url'))
|
playerdata_url = compat_urllib_parse_unquote(self._html_search_regex(r'"config_url":"([^"]+)', webpage, 'playerdata_url'))
|
||||||
playerdata_req = compat_urllib_request.Request(playerdata_url)
|
playerdata_req = compat_urllib_request.Request(playerdata_url)
|
||||||
playerdata_req.data = compat_urllib_parse.urlencode({'current_page': webpage_url})
|
playerdata_req.data = compat_urllib_parse.urlencode({'current_page': webpage_url})
|
||||||
playerdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
playerdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||||
@@ -255,16 +268,17 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
|||||||
for fmt in re.findall(r'showmedia\.([0-9]{3,4})p', webpage):
|
for fmt in re.findall(r'showmedia\.([0-9]{3,4})p', webpage):
|
||||||
stream_quality, stream_format = self._FORMAT_IDS[fmt]
|
stream_quality, stream_format = self._FORMAT_IDS[fmt]
|
||||||
video_format = fmt + 'p'
|
video_format = fmt + 'p'
|
||||||
streamdata_req = compat_urllib_request.Request('http://www.crunchyroll.com/xml/')
|
streamdata_req = compat_urllib_request.Request(
|
||||||
# urlencode doesn't work!
|
'http://www.crunchyroll.com/xml/?req=RpcApiVideoPlayer_GetStandardConfig&media_id=%s&video_format=%s&video_quality=%s'
|
||||||
streamdata_req.data = 'req=RpcApiVideoEncode%5FGetStreamInfo&video%5Fencode%5Fquality=' + stream_quality + '&media%5Fid=' + stream_id + '&video%5Fformat=' + stream_format
|
% (stream_id, stream_format, stream_quality),
|
||||||
|
compat_urllib_parse.urlencode({'current_page': url}).encode('utf-8'))
|
||||||
streamdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
streamdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||||
streamdata_req.add_header('Content-Length', str(len(streamdata_req.data)))
|
|
||||||
streamdata = self._download_xml(
|
streamdata = self._download_xml(
|
||||||
streamdata_req, video_id,
|
streamdata_req, video_id,
|
||||||
note='Downloading media info for %s' % video_format)
|
note='Downloading media info for %s' % video_format)
|
||||||
video_url = streamdata.find('./host').text
|
stream_info = streamdata.find('./{default}preload/stream_info')
|
||||||
video_play_path = streamdata.find('./file').text
|
video_url = stream_info.find('./host').text
|
||||||
|
video_play_path = stream_info.find('./file').text
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'play_path': video_play_path,
|
'play_path': video_play_path,
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ from ..utils import parse_iso8601, ExtractorError
|
|||||||
|
|
||||||
|
|
||||||
class CtsNewsIE(InfoExtractor):
|
class CtsNewsIE(InfoExtractor):
|
||||||
|
IE_DESC = '華視新聞'
|
||||||
# https connection failed (Connection reset)
|
# https connection failed (Connection reset)
|
||||||
_VALID_URL = r'http://news\.cts\.com\.tw/[a-z]+/[a-z]+/\d+/(?P<id>\d+)\.html'
|
_VALID_URL = r'http://news\.cts\.com\.tw/[a-z]+/[a-z]+/\d+/(?P<id>\d+)\.html'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
|||||||
@@ -13,8 +13,10 @@ from ..compat import (
|
|||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
determine_ext,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
orderedSet,
|
orderedSet,
|
||||||
|
parse_iso8601,
|
||||||
str_to_int,
|
str_to_int,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
)
|
)
|
||||||
@@ -28,10 +30,16 @@ class DailymotionBaseInfoExtractor(InfoExtractor):
|
|||||||
request.add_header('Cookie', 'family_filter=off; ff=off')
|
request.add_header('Cookie', 'family_filter=off; ff=off')
|
||||||
return request
|
return request
|
||||||
|
|
||||||
|
def _download_webpage_handle_no_ff(self, url, *args, **kwargs):
|
||||||
|
request = self._build_request(url)
|
||||||
|
return self._download_webpage_handle(request, *args, **kwargs)
|
||||||
|
|
||||||
|
def _download_webpage_no_ff(self, url, *args, **kwargs):
|
||||||
|
request = self._build_request(url)
|
||||||
|
return self._download_webpage(request, *args, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
class DailymotionIE(DailymotionBaseInfoExtractor):
|
class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||||
"""Information Extractor for Dailymotion"""
|
|
||||||
|
|
||||||
_VALID_URL = r'(?i)(?:https?://)?(?:(www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(embed|#)/)?video/(?P<id>[^/?_]+)'
|
_VALID_URL = r'(?i)(?:https?://)?(?:(www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(embed|#)/)?video/(?P<id>[^/?_]+)'
|
||||||
IE_NAME = 'dailymotion'
|
IE_NAME = 'dailymotion'
|
||||||
|
|
||||||
@@ -50,9 +58,17 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'x2iuewm',
|
'id': 'x2iuewm',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'uploader': 'IGN',
|
|
||||||
'title': 'Steam Machine Models, Pricing Listed on Steam Store - IGN News',
|
'title': 'Steam Machine Models, Pricing Listed on Steam Store - IGN News',
|
||||||
|
'description': 'Several come bundled with the Steam Controller.',
|
||||||
|
'thumbnail': 're:^https?:.*\.(?:jpg|png)$',
|
||||||
|
'duration': 74,
|
||||||
|
'timestamp': 1425657362,
|
||||||
'upload_date': '20150306',
|
'upload_date': '20150306',
|
||||||
|
'uploader': 'IGN',
|
||||||
|
'uploader_id': 'xijv66',
|
||||||
|
'age_limit': 0,
|
||||||
|
'view_count': int,
|
||||||
|
'comment_count': int,
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
# Vevo video
|
# Vevo video
|
||||||
@@ -86,38 +102,106 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
url = 'https://www.dailymotion.com/video/%s' % video_id
|
|
||||||
|
|
||||||
# Retrieve video webpage to extract further information
|
webpage = self._download_webpage_no_ff(
|
||||||
request = self._build_request(url)
|
'https://www.dailymotion.com/video/%s' % video_id, video_id)
|
||||||
webpage = self._download_webpage(request, video_id)
|
|
||||||
|
|
||||||
# Extract URL, uploader and title from webpage
|
|
||||||
self.report_extraction(video_id)
|
|
||||||
|
|
||||||
# It may just embed a vevo video:
|
|
||||||
m_vevo = re.search(
|
|
||||||
r'<link rel="video_src" href="[^"]*?vevo.com[^"]*?video=(?P<id>[\w]*)',
|
|
||||||
webpage)
|
|
||||||
if m_vevo is not None:
|
|
||||||
vevo_id = m_vevo.group('id')
|
|
||||||
self.to_screen('Vevo video detected: %s' % vevo_id)
|
|
||||||
return self.url_result('vevo:%s' % vevo_id, ie='Vevo')
|
|
||||||
|
|
||||||
age_limit = self._rta_search(webpage)
|
age_limit = self._rta_search(webpage)
|
||||||
|
|
||||||
video_upload_date = None
|
description = self._og_search_description(webpage) or self._html_search_meta(
|
||||||
mobj = re.search(r'<meta property="video:release_date" content="([0-9]{4})-([0-9]{2})-([0-9]{2}).+?"/>', webpage)
|
'description', webpage, 'description')
|
||||||
if mobj is not None:
|
|
||||||
video_upload_date = mobj.group(1) + mobj.group(2) + mobj.group(3)
|
view_count = str_to_int(self._search_regex(
|
||||||
|
[r'<meta[^>]+itemprop="interactionCount"[^>]+content="UserPlays:(\d+)"',
|
||||||
|
r'video_views_count[^>]+>\s+([\d\.,]+)'],
|
||||||
|
webpage, 'view count', fatal=False))
|
||||||
|
comment_count = int_or_none(self._search_regex(
|
||||||
|
r'<meta[^>]+itemprop="interactionCount"[^>]+content="UserComments:(\d+)"',
|
||||||
|
webpage, 'comment count', fatal=False))
|
||||||
|
|
||||||
|
player_v5 = self._search_regex(
|
||||||
|
r'playerV5\s*=\s*dmp\.create\([^,]+?,\s*({.+?})\);',
|
||||||
|
webpage, 'player v5', default=None)
|
||||||
|
if player_v5:
|
||||||
|
player = self._parse_json(player_v5, video_id)
|
||||||
|
metadata = player['metadata']
|
||||||
|
formats = []
|
||||||
|
for quality, media_list in metadata['qualities'].items():
|
||||||
|
for media in media_list:
|
||||||
|
media_url = media.get('url')
|
||||||
|
if not media_url:
|
||||||
|
continue
|
||||||
|
type_ = media.get('type')
|
||||||
|
if type_ == 'application/vnd.lumberjack.manifest':
|
||||||
|
continue
|
||||||
|
if type_ == 'application/x-mpegURL' or determine_ext(media_url) == 'm3u8':
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
media_url, video_id, 'mp4', m3u8_id='hls'))
|
||||||
|
else:
|
||||||
|
f = {
|
||||||
|
'url': media_url,
|
||||||
|
'format_id': quality,
|
||||||
|
}
|
||||||
|
m = re.search(r'H264-(?P<width>\d+)x(?P<height>\d+)', media_url)
|
||||||
|
if m:
|
||||||
|
f.update({
|
||||||
|
'width': int(m.group('width')),
|
||||||
|
'height': int(m.group('height')),
|
||||||
|
})
|
||||||
|
formats.append(f)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
title = metadata['title']
|
||||||
|
duration = int_or_none(metadata.get('duration'))
|
||||||
|
timestamp = int_or_none(metadata.get('created_time'))
|
||||||
|
thumbnail = metadata.get('poster_url')
|
||||||
|
uploader = metadata.get('owner', {}).get('screenname')
|
||||||
|
uploader_id = metadata.get('owner', {}).get('id')
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
for subtitle_lang, subtitle in metadata.get('subtitles', {}).get('data', {}).items():
|
||||||
|
subtitles[subtitle_lang] = [{
|
||||||
|
'ext': determine_ext(subtitle_url),
|
||||||
|
'url': subtitle_url,
|
||||||
|
} for subtitle_url in subtitle.get('urls', [])]
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'duration': duration,
|
||||||
|
'timestamp': timestamp,
|
||||||
|
'uploader': uploader,
|
||||||
|
'uploader_id': uploader_id,
|
||||||
|
'age_limit': age_limit,
|
||||||
|
'view_count': view_count,
|
||||||
|
'comment_count': comment_count,
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
}
|
||||||
|
|
||||||
|
# vevo embed
|
||||||
|
vevo_id = self._search_regex(
|
||||||
|
r'<link rel="video_src" href="[^"]*?vevo.com[^"]*?video=(?P<id>[\w]*)',
|
||||||
|
webpage, 'vevo embed', default=None)
|
||||||
|
if vevo_id:
|
||||||
|
return self.url_result('vevo:%s' % vevo_id, 'Vevo')
|
||||||
|
|
||||||
|
# fallback old player
|
||||||
|
embed_page = self._download_webpage_no_ff(
|
||||||
|
'https://www.dailymotion.com/embed/video/%s' % video_id,
|
||||||
|
video_id, 'Downloading embed page')
|
||||||
|
|
||||||
|
timestamp = parse_iso8601(self._html_search_meta(
|
||||||
|
'video:release_date', webpage, 'upload date'))
|
||||||
|
|
||||||
|
info = self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'var info = ({.*?}),$', embed_page,
|
||||||
|
'video info', flags=re.MULTILINE),
|
||||||
|
video_id)
|
||||||
|
|
||||||
embed_url = 'https://www.dailymotion.com/embed/video/%s' % video_id
|
|
||||||
embed_request = self._build_request(embed_url)
|
|
||||||
embed_page = self._download_webpage(
|
|
||||||
embed_request, video_id, 'Downloading embed page')
|
|
||||||
info = self._search_regex(r'var info = ({.*?}),$', embed_page,
|
|
||||||
'video info', flags=re.MULTILINE)
|
|
||||||
info = json.loads(info)
|
|
||||||
if info.get('error') is not None:
|
if info.get('error') is not None:
|
||||||
msg = 'Couldn\'t get video, Dailymotion says: %s' % info['error']['title']
|
msg = 'Couldn\'t get video, Dailymotion says: %s' % info['error']['title']
|
||||||
raise ExtractorError(msg, expected=True)
|
raise ExtractorError(msg, expected=True)
|
||||||
@@ -138,16 +222,11 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
|||||||
'width': width,
|
'width': width,
|
||||||
'height': height,
|
'height': height,
|
||||||
})
|
})
|
||||||
if not formats:
|
self._sort_formats(formats)
|
||||||
raise ExtractorError('Unable to extract video URL')
|
|
||||||
|
|
||||||
# subtitles
|
# subtitles
|
||||||
video_subtitles = self.extract_subtitles(video_id, webpage)
|
video_subtitles = self.extract_subtitles(video_id, webpage)
|
||||||
|
|
||||||
view_count = str_to_int(self._search_regex(
|
|
||||||
r'video_views_count[^>]+>\s+([\d\.,]+)',
|
|
||||||
webpage, 'view count', fatal=False))
|
|
||||||
|
|
||||||
title = self._og_search_title(webpage, default=None)
|
title = self._og_search_title(webpage, default=None)
|
||||||
if title is None:
|
if title is None:
|
||||||
title = self._html_search_regex(
|
title = self._html_search_regex(
|
||||||
@@ -158,12 +237,14 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
|||||||
'id': video_id,
|
'id': video_id,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'uploader': info['owner.screenname'],
|
'uploader': info['owner.screenname'],
|
||||||
'upload_date': video_upload_date,
|
'timestamp': timestamp,
|
||||||
'title': title,
|
'title': title,
|
||||||
|
'description': description,
|
||||||
'subtitles': video_subtitles,
|
'subtitles': video_subtitles,
|
||||||
'thumbnail': info['thumbnail_url'],
|
'thumbnail': info['thumbnail_url'],
|
||||||
'age_limit': age_limit,
|
'age_limit': age_limit,
|
||||||
'view_count': view_count,
|
'view_count': view_count,
|
||||||
|
'duration': info['duration']
|
||||||
}
|
}
|
||||||
|
|
||||||
def _get_subtitles(self, video_id, webpage):
|
def _get_subtitles(self, video_id, webpage):
|
||||||
@@ -198,10 +279,17 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
|
|||||||
|
|
||||||
def _extract_entries(self, id):
|
def _extract_entries(self, id):
|
||||||
video_ids = []
|
video_ids = []
|
||||||
|
processed_urls = set()
|
||||||
for pagenum in itertools.count(1):
|
for pagenum in itertools.count(1):
|
||||||
request = self._build_request(self._PAGE_TEMPLATE % (id, pagenum))
|
page_url = self._PAGE_TEMPLATE % (id, pagenum)
|
||||||
webpage = self._download_webpage(request,
|
webpage, urlh = self._download_webpage_handle_no_ff(
|
||||||
id, 'Downloading page %s' % pagenum)
|
page_url, id, 'Downloading page %s' % pagenum)
|
||||||
|
if urlh.geturl() in processed_urls:
|
||||||
|
self.report_warning('Stopped at duplicated page %s, which is the same as %s' % (
|
||||||
|
page_url, urlh.geturl()), id)
|
||||||
|
break
|
||||||
|
|
||||||
|
processed_urls.add(urlh.geturl())
|
||||||
|
|
||||||
video_ids.extend(re.findall(r'data-xid="(.+?)"', webpage))
|
video_ids.extend(re.findall(r'data-xid="(.+?)"', webpage))
|
||||||
|
|
||||||
@@ -225,7 +313,7 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
|
|||||||
|
|
||||||
class DailymotionUserIE(DailymotionPlaylistIE):
|
class DailymotionUserIE(DailymotionPlaylistIE):
|
||||||
IE_NAME = 'dailymotion:user'
|
IE_NAME = 'dailymotion:user'
|
||||||
_VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?:old/)?user/(?P<user>[^/]+)'
|
_VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|#|video|playlist)/)(?:(?:old/)?user/)?(?P<user>[^/]+)'
|
||||||
_PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s'
|
_PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.dailymotion.com/user/nqtv',
|
'url': 'https://www.dailymotion.com/user/nqtv',
|
||||||
@@ -234,12 +322,24 @@ class DailymotionUserIE(DailymotionPlaylistIE):
|
|||||||
'title': 'Rémi Gaillard',
|
'title': 'Rémi Gaillard',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 100,
|
'playlist_mincount': 100,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.dailymotion.com/user/UnderProject',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'UnderProject',
|
||||||
|
'title': 'UnderProject',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 1800,
|
||||||
|
'expected_warnings': [
|
||||||
|
'Stopped at duplicated page',
|
||||||
|
],
|
||||||
|
'skip': 'Takes too long time',
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
user = mobj.group('user')
|
user = mobj.group('user')
|
||||||
webpage = self._download_webpage(url, user)
|
webpage = self._download_webpage(
|
||||||
|
'https://www.dailymotion.com/user/%s' % user, user)
|
||||||
full_user = unescapeHTML(self._html_search_regex(
|
full_user = unescapeHTML(self._html_search_regex(
|
||||||
r'<a class="nav-image" title="([^"]+)" href="/%s">' % re.escape(user),
|
r'<a class="nav-image" title="([^"]+)" href="/%s">' % re.escape(user),
|
||||||
webpage, 'user'))
|
webpage, 'user'))
|
||||||
@@ -250,3 +350,52 @@ class DailymotionUserIE(DailymotionPlaylistIE):
|
|||||||
'title': full_user,
|
'title': full_user,
|
||||||
'entries': self._extract_entries(user),
|
'entries': self._extract_entries(user),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class DailymotionCloudIE(DailymotionBaseInfoExtractor):
|
||||||
|
_VALID_URL_PREFIX = r'http://api\.dmcloud\.net/(?:player/)?embed/'
|
||||||
|
_VALID_URL = r'%s[^/]+/(?P<id>[^/?]+)' % _VALID_URL_PREFIX
|
||||||
|
_VALID_EMBED_URL = r'%s[^/]+/[^\'"]+' % _VALID_URL_PREFIX
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
# From http://www.francetvinfo.fr/economie/entreprises/les-entreprises-familiales-le-secret-de-la-reussite_933271.html
|
||||||
|
# Tested at FranceTvInfo_2
|
||||||
|
'url': 'http://api.dmcloud.net/embed/4e7343f894a6f677b10006b4/556e03339473995ee145930c?auth=1464865870-0-jyhsm84b-ead4c701fb750cf9367bf4447167a3db&autoplay=1',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# http://www.francetvinfo.fr/societe/larguez-les-amarres-le-cobaturage-se-developpe_980101.html
|
||||||
|
'url': 'http://api.dmcloud.net/player/embed/4e7343f894a6f677b10006b4/559545469473996d31429f06?auth=1467430263-0-90tglw2l-a3a4b64ed41efe48d7fccad85b8b8fda&autoplay=1',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _extract_dmcloud_url(self, webpage):
|
||||||
|
mobj = re.search(r'<iframe[^>]+src=[\'"](%s)[\'"]' % self._VALID_EMBED_URL, webpage)
|
||||||
|
if mobj:
|
||||||
|
return mobj.group(1)
|
||||||
|
|
||||||
|
mobj = re.search(
|
||||||
|
r'<input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=[\'"](%s)[\'"]' % self._VALID_EMBED_URL,
|
||||||
|
webpage)
|
||||||
|
if mobj:
|
||||||
|
return mobj.group(1)
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage_no_ff(url, video_id)
|
||||||
|
|
||||||
|
title = self._html_search_regex(r'<title>([^>]+)</title>', webpage, 'title')
|
||||||
|
|
||||||
|
video_info = self._parse_json(self._search_regex(
|
||||||
|
r'var\s+info\s*=\s*([^;]+);', webpage, 'video info'), video_id)
|
||||||
|
|
||||||
|
# TODO: parse ios_url, which is in fact a manifest
|
||||||
|
video_url = video_info['mp4_url']
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'url': video_url,
|
||||||
|
'title': title,
|
||||||
|
'thumbnail': video_info.get('thumbnail_url'),
|
||||||
|
}
|
||||||
|
|||||||
@@ -3,42 +3,47 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..utils import unified_strdate
|
||||||
|
|
||||||
|
|
||||||
class DFBIE(InfoExtractor):
|
class DFBIE(InfoExtractor):
|
||||||
IE_NAME = 'tv.dfb.de'
|
IE_NAME = 'tv.dfb.de'
|
||||||
_VALID_URL = r'https?://tv\.dfb\.de/video/[^/]+/(?P<id>\d+)'
|
_VALID_URL = r'https?://tv\.dfb\.de/video/(?P<display_id>[^/]+)/(?P<id>\d+)'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://tv.dfb.de/video/highlights-des-empfangs-in-berlin/9070/',
|
'url': 'http://tv.dfb.de/video/u-19-em-stimmen-zum-spiel-gegen-russland/11633/',
|
||||||
# The md5 is different each time
|
# The md5 is different each time
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '9070',
|
'id': '11633',
|
||||||
|
'display_id': 'u-19-em-stimmen-zum-spiel-gegen-russland',
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'title': 'Highlights des Empfangs in Berlin',
|
'title': 'U 19-EM: Stimmen zum Spiel gegen Russland',
|
||||||
'upload_date': '20140716',
|
'upload_date': '20150714',
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
|
display_id = mobj.group('display_id')
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
player_info = self._download_xml(
|
player_info = self._download_xml(
|
||||||
'http://tv.dfb.de/server/hd_video.php?play=%s' % video_id,
|
'http://tv.dfb.de/server/hd_video.php?play=%s' % video_id,
|
||||||
video_id)
|
display_id)
|
||||||
video_info = player_info.find('video')
|
video_info = player_info.find('video')
|
||||||
|
|
||||||
f4m_info = self._download_xml(self._proto_relative_url(video_info.find('url').text.strip()), video_id)
|
f4m_info = self._download_xml(
|
||||||
|
self._proto_relative_url(video_info.find('url').text.strip()), display_id)
|
||||||
token_el = f4m_info.find('token')
|
token_el = f4m_info.find('token')
|
||||||
manifest_url = token_el.attrib['url'] + '?' + 'hdnea=' + token_el.attrib['auth'] + '&hdcore=3.2.0'
|
manifest_url = token_el.attrib['url'] + '?' + 'hdnea=' + token_el.attrib['auth'] + '&hdcore=3.2.0'
|
||||||
|
formats = self._extract_f4m_formats(manifest_url, display_id)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
'display_id': display_id,
|
||||||
'title': video_info.find('title').text,
|
'title': video_info.find('title').text,
|
||||||
'url': manifest_url,
|
|
||||||
'ext': 'flv',
|
|
||||||
'thumbnail': self._og_search_thumbnail(webpage),
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
'upload_date': ''.join(video_info.find('time_date').text.split('.')[::-1]),
|
'upload_date': unified_strdate(video_info.find('time_date').text),
|
||||||
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2,19 +2,19 @@ from __future__ import unicode_literals
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
parse_duration,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
int_or_none,
|
|
||||||
)
|
)
|
||||||
|
from ..compat import compat_str
|
||||||
|
|
||||||
|
|
||||||
class DiscoveryIE(InfoExtractor):
|
class DiscoveryIE(InfoExtractor):
|
||||||
_VALID_URL = r'http://www\.discovery\.com\/[a-zA-Z0-9\-]*/[a-zA-Z0-9\-]*/videos/(?P<id>[a-zA-Z0-9_\-]*)(?:\.htm)?'
|
_VALID_URL = r'http://www\.discovery\.com\/[a-zA-Z0-9\-]*/[a-zA-Z0-9\-]*/videos/(?P<id>[a-zA-Z0-9_\-]*)(?:\.htm)?'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.discovery.com/tv-shows/mythbusters/videos/mission-impossible-outtakes.htm',
|
'url': 'http://www.discovery.com/tv-shows/mythbusters/videos/mission-impossible-outtakes.htm',
|
||||||
'md5': '3c69d77d9b0d82bfd5e5932a60f26504',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'mission-impossible-outtakes',
|
'id': '20769',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'title': 'Mission Impossible Outtakes',
|
'title': 'Mission Impossible Outtakes',
|
||||||
'description': ('Watch Jamie Hyneman and Adam Savage practice being'
|
'description': ('Watch Jamie Hyneman and Adam Savage practice being'
|
||||||
' each other -- to the point of confusing Jamie\'s dog -- and '
|
' each other -- to the point of confusing Jamie\'s dog -- and '
|
||||||
@@ -24,22 +24,36 @@ class DiscoveryIE(InfoExtractor):
|
|||||||
'timestamp': 1303099200,
|
'timestamp': 1303099200,
|
||||||
'upload_date': '20110418',
|
'upload_date': '20110418',
|
||||||
},
|
},
|
||||||
}
|
'params': {
|
||||||
|
'skip_download': True, # requires ffmpeg
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.discovery.com/tv-shows/mythbusters/videos/mythbusters-the-simpsons',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'mythbusters-the-simpsons',
|
||||||
|
'title': 'MythBusters: The Simpsons',
|
||||||
|
},
|
||||||
|
'playlist_count': 9,
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, video_id)
|
info = self._download_json(url + '?flat=1', video_id)
|
||||||
|
|
||||||
info = self._parse_json(self._search_regex(
|
video_title = info.get('playlist_title') or info.get('video_title')
|
||||||
r'(?s)<script type="application/ld\+json">(.*?)</script>',
|
|
||||||
webpage, 'video info'), video_id)
|
|
||||||
|
|
||||||
return {
|
entries = [{
|
||||||
'id': video_id,
|
'id': compat_str(video_info['id']),
|
||||||
'title': info['name'],
|
'formats': self._extract_m3u8_formats(
|
||||||
'url': info['contentURL'],
|
video_info['src'], video_id, ext='mp4',
|
||||||
'description': info.get('description'),
|
note='Download m3u8 information for video %d' % (idx + 1)),
|
||||||
'thumbnail': info.get('thumbnailUrl'),
|
'title': video_info['title'],
|
||||||
'timestamp': parse_iso8601(info.get('uploadDate')),
|
'description': video_info.get('description'),
|
||||||
'duration': int_or_none(info.get('duration')),
|
'duration': parse_duration(video_info.get('video_length')),
|
||||||
}
|
'webpage_url': video_info.get('href'),
|
||||||
|
'thumbnail': video_info.get('thumbnailURL'),
|
||||||
|
'alt_title': video_info.get('secondary_title'),
|
||||||
|
'timestamp': parse_iso8601(video_info.get('publishedDate')),
|
||||||
|
} for idx, video_info in enumerate(info['playlist'])]
|
||||||
|
|
||||||
|
return self.playlist_result(entries, video_id, video_title)
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ from ..compat import (compat_str, compat_basestring)
|
|||||||
|
|
||||||
|
|
||||||
class DouyuTVIE(InfoExtractor):
|
class DouyuTVIE(InfoExtractor):
|
||||||
|
IE_DESC = '斗鱼'
|
||||||
_VALID_URL = r'http://(?:www\.)?douyutv\.com/(?P<id>[A-Za-z0-9]+)'
|
_VALID_URL = r'http://(?:www\.)?douyutv\.com/(?P<id>[A-Za-z0-9]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.douyutv.com/iseven',
|
'url': 'http://www.douyutv.com/iseven',
|
||||||
|
|||||||
216
youtube_dl/extractor/dramafever.py
Normal file
216
youtube_dl/extractor/dramafever.py
Normal file
@@ -0,0 +1,216 @@
|
|||||||
|
# encoding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import itertools
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import (
|
||||||
|
compat_HTTPError,
|
||||||
|
compat_urllib_parse,
|
||||||
|
compat_urllib_request,
|
||||||
|
compat_urlparse,
|
||||||
|
)
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
clean_html,
|
||||||
|
determine_ext,
|
||||||
|
int_or_none,
|
||||||
|
parse_iso8601,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class DramaFeverBaseIE(InfoExtractor):
|
||||||
|
_LOGIN_URL = 'https://www.dramafever.com/accounts/login/'
|
||||||
|
_NETRC_MACHINE = 'dramafever'
|
||||||
|
|
||||||
|
_CONSUMER_SECRET = 'DA59dtVXYLxajktV'
|
||||||
|
|
||||||
|
_consumer_secret = None
|
||||||
|
|
||||||
|
def _get_consumer_secret(self):
|
||||||
|
mainjs = self._download_webpage(
|
||||||
|
'http://www.dramafever.com/static/51afe95/df2014/scripts/main.js',
|
||||||
|
None, 'Downloading main.js', fatal=False)
|
||||||
|
if not mainjs:
|
||||||
|
return self._CONSUMER_SECRET
|
||||||
|
return self._search_regex(
|
||||||
|
r"var\s+cs\s*=\s*'([^']+)'", mainjs,
|
||||||
|
'consumer secret', default=self._CONSUMER_SECRET)
|
||||||
|
|
||||||
|
def _real_initialize(self):
|
||||||
|
self._login()
|
||||||
|
self._consumer_secret = self._get_consumer_secret()
|
||||||
|
|
||||||
|
def _login(self):
|
||||||
|
(username, password) = self._get_login_info()
|
||||||
|
if username is None:
|
||||||
|
return
|
||||||
|
|
||||||
|
login_form = {
|
||||||
|
'username': username,
|
||||||
|
'password': password,
|
||||||
|
}
|
||||||
|
|
||||||
|
request = compat_urllib_request.Request(
|
||||||
|
self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
|
||||||
|
response = self._download_webpage(
|
||||||
|
request, None, 'Logging in as %s' % username)
|
||||||
|
|
||||||
|
if all(logout_pattern not in response
|
||||||
|
for logout_pattern in ['href="/accounts/logout/"', '>Log out<']):
|
||||||
|
error = self._html_search_regex(
|
||||||
|
r'(?s)class="hidden-xs prompt"[^>]*>(.+?)<',
|
||||||
|
response, 'error message', default=None)
|
||||||
|
if error:
|
||||||
|
raise ExtractorError('Unable to login: %s' % error, expected=True)
|
||||||
|
raise ExtractorError('Unable to log in')
|
||||||
|
|
||||||
|
|
||||||
|
class DramaFeverIE(DramaFeverBaseIE):
|
||||||
|
IE_NAME = 'dramafever'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?dramafever\.com/drama/(?P<id>[0-9]+/[0-9]+)(?:/|$)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.dramafever.com/drama/4512/1/Cooking_with_Shin/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '4512.1',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Cooking with Shin 4512.1',
|
||||||
|
'description': 'md5:a8eec7942e1664a6896fcd5e1287bfd0',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg',
|
||||||
|
'timestamp': 1404336058,
|
||||||
|
'upload_date': '20140702',
|
||||||
|
'duration': 343,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url).replace('/', '.')
|
||||||
|
|
||||||
|
try:
|
||||||
|
feed = self._download_json(
|
||||||
|
'http://www.dramafever.com/amp/episode/feed.json?guid=%s' % video_id,
|
||||||
|
video_id, 'Downloading episode JSON')['channel']['item']
|
||||||
|
except ExtractorError as e:
|
||||||
|
if isinstance(e.cause, compat_HTTPError):
|
||||||
|
raise ExtractorError(
|
||||||
|
'Currently unavailable in your country.', expected=True)
|
||||||
|
raise
|
||||||
|
|
||||||
|
media_group = feed.get('media-group', {})
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for media_content in media_group['media-content']:
|
||||||
|
src = media_content.get('@attributes', {}).get('url')
|
||||||
|
if not src:
|
||||||
|
continue
|
||||||
|
ext = determine_ext(src)
|
||||||
|
if ext == 'f4m':
|
||||||
|
formats.extend(self._extract_f4m_formats(
|
||||||
|
src, video_id, f4m_id='hds'))
|
||||||
|
elif ext == 'm3u8':
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
src, video_id, 'mp4', m3u8_id='hls'))
|
||||||
|
else:
|
||||||
|
formats.append({
|
||||||
|
'url': src,
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
title = media_group.get('media-title')
|
||||||
|
description = media_group.get('media-description')
|
||||||
|
duration = int_or_none(media_group['media-content'][0].get('@attributes', {}).get('duration'))
|
||||||
|
thumbnail = self._proto_relative_url(
|
||||||
|
media_group.get('media-thumbnail', {}).get('@attributes', {}).get('url'))
|
||||||
|
timestamp = parse_iso8601(feed.get('pubDate'), ' ')
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
for media_subtitle in media_group.get('media-subTitle', []):
|
||||||
|
lang = media_subtitle.get('@attributes', {}).get('lang')
|
||||||
|
href = media_subtitle.get('@attributes', {}).get('href')
|
||||||
|
if not lang or not href:
|
||||||
|
continue
|
||||||
|
subtitles[lang] = [{
|
||||||
|
'ext': 'ttml',
|
||||||
|
'url': href,
|
||||||
|
}]
|
||||||
|
|
||||||
|
series_id, episode_number = video_id.split('.')
|
||||||
|
episode_info = self._download_json(
|
||||||
|
# We only need a single episode info, so restricting page size to one episode
|
||||||
|
# and dealing with page number as with episode number
|
||||||
|
r'http://www.dramafever.com/api/4/episode/series/?cs=%s&series_id=%s&page_number=%s&page_size=1'
|
||||||
|
% (self._consumer_secret, series_id, episode_number),
|
||||||
|
video_id, 'Downloading episode info JSON', fatal=False)
|
||||||
|
if episode_info:
|
||||||
|
value = episode_info.get('value')
|
||||||
|
if value:
|
||||||
|
subfile = value[0].get('subfile') or value[0].get('new_subfile')
|
||||||
|
if subfile and subfile != 'http://www.dramafever.com/st/':
|
||||||
|
subtitles.setdefault('English', []).append({
|
||||||
|
'ext': 'srt',
|
||||||
|
'url': subfile,
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'timestamp': timestamp,
|
||||||
|
'duration': duration,
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class DramaFeverSeriesIE(DramaFeverBaseIE):
|
||||||
|
IE_NAME = 'dramafever:series'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?dramafever\.com/drama/(?P<id>[0-9]+)(?:/(?:(?!\d+(?:/|$)).+)?)?$'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://www.dramafever.com/drama/4512/Cooking_with_Shin/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '4512',
|
||||||
|
'title': 'Cooking with Shin',
|
||||||
|
'description': 'md5:84a3f26e3cdc3fb7f500211b3593b5c1',
|
||||||
|
},
|
||||||
|
'playlist_count': 4,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.dramafever.com/drama/124/IRIS/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '124',
|
||||||
|
'title': 'IRIS',
|
||||||
|
'description': 'md5:b3a30e587cf20c59bd1c01ec0ee1b862',
|
||||||
|
},
|
||||||
|
'playlist_count': 20,
|
||||||
|
}]
|
||||||
|
|
||||||
|
_PAGE_SIZE = 60 # max is 60 (see http://api.drama9.com/#get--api-4-episode-series-)
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
series_id = self._match_id(url)
|
||||||
|
|
||||||
|
series = self._download_json(
|
||||||
|
'http://www.dramafever.com/api/4/series/query/?cs=%s&series_id=%s'
|
||||||
|
% (self._consumer_secret, series_id),
|
||||||
|
series_id, 'Downloading series JSON')['series'][series_id]
|
||||||
|
|
||||||
|
title = clean_html(series['name'])
|
||||||
|
description = clean_html(series.get('description') or series.get('description_short'))
|
||||||
|
|
||||||
|
entries = []
|
||||||
|
for page_num in itertools.count(1):
|
||||||
|
episodes = self._download_json(
|
||||||
|
'http://www.dramafever.com/api/4/episode/series/?cs=%s&series_id=%s&page_size=%d&page_number=%d'
|
||||||
|
% (self._consumer_secret, series_id, self._PAGE_SIZE, page_num),
|
||||||
|
series_id, 'Downloading episodes JSON page #%d' % page_num)
|
||||||
|
for episode in episodes.get('value', []):
|
||||||
|
episode_url = episode.get('episode_url')
|
||||||
|
if not episode_url:
|
||||||
|
continue
|
||||||
|
entries.append(self.url_result(
|
||||||
|
compat_urlparse.urljoin(url, episode_url),
|
||||||
|
'DramaFever', episode.get('guid')))
|
||||||
|
if page_num == episodes['num_pages']:
|
||||||
|
break
|
||||||
|
|
||||||
|
return self.playlist_result(entries, series_id, title, description)
|
||||||
@@ -15,7 +15,6 @@ class DRBonanzaIE(InfoExtractor):
|
|||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.dr.dk/bonanza/serie/portraetter/Talkshowet.htm?assetId=65517',
|
'url': 'http://www.dr.dk/bonanza/serie/portraetter/Talkshowet.htm?assetId=65517',
|
||||||
'md5': 'fe330252ddea607635cf2eb2c99a0af3',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '65517',
|
'id': '65517',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@@ -26,6 +25,9 @@ class DRBonanzaIE(InfoExtractor):
|
|||||||
'upload_date': '20110120',
|
'upload_date': '20110120',
|
||||||
'duration': 3664,
|
'duration': 3664,
|
||||||
},
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True, # requires rtmp
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.dr.dk/bonanza/radio/serie/sport/fodbold.htm?assetId=59410',
|
'url': 'http://www.dr.dk/bonanza/radio/serie/sport/fodbold.htm?assetId=59410',
|
||||||
'md5': '6dfe039417e76795fb783c52da3de11d',
|
'md5': '6dfe039417e76795fb783c52da3de11d',
|
||||||
@@ -93,6 +95,11 @@ class DRBonanzaIE(InfoExtractor):
|
|||||||
'format_id': file['Type'].replace('Video', ''),
|
'format_id': file['Type'].replace('Video', ''),
|
||||||
'preference': preferencemap.get(file['Type'], -10),
|
'preference': preferencemap.get(file['Type'], -10),
|
||||||
})
|
})
|
||||||
|
if format['url'].startswith('rtmp'):
|
||||||
|
rtmp_url = format['url']
|
||||||
|
format['rtmp_live'] = True # --resume does not work
|
||||||
|
if '/bonanza/' in rtmp_url:
|
||||||
|
format['play_path'] = rtmp_url.split('/bonanza/')[1]
|
||||||
formats.append(format)
|
formats.append(format)
|
||||||
elif file['Type'] == "Thumb":
|
elif file['Type'] == "Thumb":
|
||||||
thumbnail = file['Location']
|
thumbnail = file['Location']
|
||||||
@@ -111,9 +118,6 @@ class DRBonanzaIE(InfoExtractor):
|
|||||||
description = '%s\n%s\n%s\n' % (
|
description = '%s\n%s\n%s\n' % (
|
||||||
info['Description'], info['Actors'], info['Colophon'])
|
info['Description'], info['Actors'], info['Colophon'])
|
||||||
|
|
||||||
for f in formats:
|
|
||||||
f['url'] = f['url'].replace('rtmp://vod-bonanza.gss.dr.dk/bonanza/', 'http://vodfiles.dr.dk/')
|
|
||||||
f['url'] = f['url'].replace('mp4:bonanza', 'bonanza')
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
display_id = re.sub(r'[^\w\d-]', '', re.sub(r' ', '-', title.lower())) + '-' + asset_id
|
display_id = re.sub(r'[^\w\d-]', '', re.sub(r' ', '-', title.lower())) + '-' + asset_id
|
||||||
|
|||||||
@@ -36,25 +36,24 @@ class DrTuberIE(InfoExtractor):
|
|||||||
r'<source src="([^"]+)"', webpage, 'video URL')
|
r'<source src="([^"]+)"', webpage, 'video URL')
|
||||||
|
|
||||||
title = self._html_search_regex(
|
title = self._html_search_regex(
|
||||||
[r'class="hd_title" style="[^"]+">([^<]+)</h1>', r'<title>([^<]+) - \d+'],
|
[r'<p[^>]+class="title_substrate">([^<]+)</p>', r'<title>([^<]+) - \d+'],
|
||||||
webpage, 'title')
|
webpage, 'title')
|
||||||
|
|
||||||
thumbnail = self._html_search_regex(
|
thumbnail = self._html_search_regex(
|
||||||
r'poster="([^"]+)"',
|
r'poster="([^"]+)"',
|
||||||
webpage, 'thumbnail', fatal=False)
|
webpage, 'thumbnail', fatal=False)
|
||||||
|
|
||||||
like_count = str_to_int(self._html_search_regex(
|
def extract_count(id_, name):
|
||||||
r'<span id="rate_likes">\s*<img[^>]+>\s*<span>([\d,\.]+)</span>',
|
return str_to_int(self._html_search_regex(
|
||||||
webpage, 'like count', fatal=False))
|
r'<span[^>]+(?:class|id)="%s"[^>]*>([\d,\.]+)</span>' % id_,
|
||||||
dislike_count = str_to_int(self._html_search_regex(
|
webpage, '%s count' % name, fatal=False))
|
||||||
r'<span id="rate_dislikes">\s*<img[^>]+>\s*<span>([\d,\.]+)</span>',
|
|
||||||
webpage, 'like count', fatal=False))
|
like_count = extract_count('rate_likes', 'like')
|
||||||
comment_count = str_to_int(self._html_search_regex(
|
dislike_count = extract_count('rate_dislikes', 'dislike')
|
||||||
r'<span class="comments_count">([\d,\.]+)</span>',
|
comment_count = extract_count('comments_count', 'comment')
|
||||||
webpage, 'comment count', fatal=False))
|
|
||||||
|
|
||||||
cats_str = self._search_regex(
|
cats_str = self._search_regex(
|
||||||
r'<span>Categories:</span><div>(.+?)</div>', webpage, 'categories', fatal=False)
|
r'<div[^>]+class="categories_list">(.+?)</div>', webpage, 'categories', fatal=False)
|
||||||
categories = [] if not cats_str else re.findall(r'<a title="([^"]+)"', cats_str)
|
categories = [] if not cats_str else re.findall(r'<a title="([^"]+)"', cats_str)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
|||||||
@@ -1,8 +1,11 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .common import InfoExtractor, ExtractorError
|
from .common import InfoExtractor
|
||||||
from ..utils import parse_iso8601
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
parse_iso8601,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class DRTVIE(InfoExtractor):
|
class DRTVIE(InfoExtractor):
|
||||||
@@ -60,19 +63,31 @@ class DRTVIE(InfoExtractor):
|
|||||||
restricted_to_denmark = asset['RestrictedToDenmark']
|
restricted_to_denmark = asset['RestrictedToDenmark']
|
||||||
spoken_subtitles = asset['Target'] == 'SpokenSubtitles'
|
spoken_subtitles = asset['Target'] == 'SpokenSubtitles'
|
||||||
for link in asset['Links']:
|
for link in asset['Links']:
|
||||||
target = link['Target']
|
|
||||||
uri = link['Uri']
|
uri = link['Uri']
|
||||||
|
target = link['Target']
|
||||||
format_id = target
|
format_id = target
|
||||||
preference = -1 if target == 'HDS' else -2
|
preference = None
|
||||||
if spoken_subtitles:
|
if spoken_subtitles:
|
||||||
preference -= 2
|
preference = -1
|
||||||
format_id += '-spoken-subtitles'
|
format_id += '-spoken-subtitles'
|
||||||
formats.append({
|
if target == 'HDS':
|
||||||
'url': uri + '?hdcore=3.3.0&plugin=aasp-3.3.0.99.43' if target == 'HDS' else uri,
|
formats.extend(self._extract_f4m_formats(
|
||||||
'format_id': format_id,
|
uri + '?hdcore=3.3.0&plugin=aasp-3.3.0.99.43',
|
||||||
'ext': link['FileFormat'],
|
video_id, preference, f4m_id=format_id))
|
||||||
'preference': preference,
|
elif target == 'HLS':
|
||||||
})
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
uri, video_id, 'mp4', preference=preference,
|
||||||
|
m3u8_id=format_id))
|
||||||
|
else:
|
||||||
|
bitrate = link.get('Bitrate')
|
||||||
|
if bitrate:
|
||||||
|
format_id += '-%s' % bitrate
|
||||||
|
formats.append({
|
||||||
|
'url': uri,
|
||||||
|
'format_id': format_id,
|
||||||
|
'tbr': bitrate,
|
||||||
|
'ext': link.get('FileFormat'),
|
||||||
|
})
|
||||||
subtitles_list = asset.get('SubtitlesList')
|
subtitles_list = asset.get('SubtitlesList')
|
||||||
if isinstance(subtitles_list, list):
|
if isinstance(subtitles_list, list):
|
||||||
LANGS = {
|
LANGS = {
|
||||||
|
|||||||
@@ -1,9 +1,7 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from ..compat import (
|
|
||||||
compat_urllib_parse,
|
|
||||||
)
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_urllib_parse_unquote
|
||||||
|
|
||||||
|
|
||||||
class EHowIE(InfoExtractor):
|
class EHowIE(InfoExtractor):
|
||||||
@@ -26,7 +24,7 @@ class EHowIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
video_url = self._search_regex(
|
video_url = self._search_regex(
|
||||||
r'(?:file|source)=(http[^\'"&]*)', webpage, 'video URL')
|
r'(?:file|source)=(http[^\'"&]*)', webpage, 'video URL')
|
||||||
final_url = compat_urllib_parse.unquote(video_url)
|
final_url = compat_urllib_parse_unquote(video_url)
|
||||||
uploader = self._html_search_meta('uploader', webpage)
|
uploader = self._html_search_meta('uploader', webpage)
|
||||||
title = self._og_search_title(webpage).replace(' | eHow', '')
|
title = self._og_search_title(webpage).replace(' | eHow', '')
|
||||||
|
|
||||||
|
|||||||
@@ -1,25 +0,0 @@
|
|||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
from .tnaflix import TNAFlixIE
|
|
||||||
|
|
||||||
|
|
||||||
class EMPFlixIE(TNAFlixIE):
|
|
||||||
_VALID_URL = r'^https?://www\.empflix\.com/videos/(?P<display_id>[0-9a-zA-Z-]+)-(?P<id>[0-9]+)\.html'
|
|
||||||
|
|
||||||
_TITLE_REGEX = r'name="title" value="(?P<title>[^"]*)"'
|
|
||||||
_DESCRIPTION_REGEX = r'name="description" value="([^"]*)"'
|
|
||||||
_CONFIG_REGEX = r'flashvars\.config\s*=\s*escape\("([^"]+)"'
|
|
||||||
|
|
||||||
_TEST = {
|
|
||||||
'url': 'http://www.empflix.com/videos/Amateur-Finger-Fuck-33051.html',
|
|
||||||
'md5': 'b1bc15b6412d33902d6e5952035fcabc',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '33051',
|
|
||||||
'display_id': 'Amateur-Finger-Fuck',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Amateur Finger Fuck',
|
|
||||||
'description': 'Amateur solo finger fucking.',
|
|
||||||
'thumbnail': 're:https?://.*\.jpg$',
|
|
||||||
'age_limit': 18,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -9,7 +9,7 @@ from ..compat import (
|
|||||||
compat_http_client,
|
compat_http_client,
|
||||||
compat_str,
|
compat_str,
|
||||||
compat_urllib_error,
|
compat_urllib_error,
|
||||||
compat_urllib_parse,
|
compat_urllib_parse_unquote,
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@@ -50,7 +50,10 @@ class FacebookIE(InfoExtractor):
|
|||||||
'id': '274175099429670',
|
'id': '274175099429670',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Facebook video #274175099429670',
|
'title': 'Facebook video #274175099429670',
|
||||||
}
|
},
|
||||||
|
'expected_warnings': [
|
||||||
|
'title'
|
||||||
|
]
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.facebook.com/video.php?v=10204634152394104',
|
'url': 'https://www.facebook.com/video.php?v=10204634152394104',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@@ -133,7 +136,7 @@ class FacebookIE(InfoExtractor):
|
|||||||
else:
|
else:
|
||||||
raise ExtractorError('Cannot parse data')
|
raise ExtractorError('Cannot parse data')
|
||||||
data = dict(json.loads(m.group(1)))
|
data = dict(json.loads(m.group(1)))
|
||||||
params_raw = compat_urllib_parse.unquote(data['params'])
|
params_raw = compat_urllib_parse_unquote(data['params'])
|
||||||
params = json.loads(params_raw)
|
params = json.loads(params_raw)
|
||||||
video_data = params['video_data'][0]
|
video_data = params['video_data'][0]
|
||||||
|
|
||||||
@@ -149,12 +152,12 @@ class FacebookIE(InfoExtractor):
|
|||||||
raise ExtractorError('Cannot find video formats')
|
raise ExtractorError('Cannot find video formats')
|
||||||
|
|
||||||
video_title = self._html_search_regex(
|
video_title = self._html_search_regex(
|
||||||
r'<h2 class="uiHeaderTitle">([^<]*)</h2>', webpage, 'title',
|
r'<h2\s+[^>]*class="uiHeaderTitle"[^>]*>([^<]*)</h2>', webpage, 'title',
|
||||||
fatal=False)
|
default=None)
|
||||||
if not video_title:
|
if not video_title:
|
||||||
video_title = self._html_search_regex(
|
video_title = self._html_search_regex(
|
||||||
r'(?s)<span class="fbPhotosPhotoCaption".*?id="fbPhotoPageCaption"><span class="hasCaption">(.*?)</span>',
|
r'(?s)<span class="fbPhotosPhotoCaption".*?id="fbPhotoPageCaption"><span class="hasCaption">(.*?)</span>',
|
||||||
webpage, 'alternative title', default=None)
|
webpage, 'alternative title', fatal=False)
|
||||||
video_title = limit_length(video_title, 80)
|
video_title = limit_length(video_title, 80)
|
||||||
if not video_title:
|
if not video_title:
|
||||||
video_title = 'Facebook video #%s' % video_id
|
video_title = 'Facebook video #%s' % video_id
|
||||||
|
|||||||
@@ -6,9 +6,9 @@ from .common import InfoExtractor
|
|||||||
|
|
||||||
class FazIE(InfoExtractor):
|
class FazIE(InfoExtractor):
|
||||||
IE_NAME = 'faz.net'
|
IE_NAME = 'faz.net'
|
||||||
_VALID_URL = r'https?://www\.faz\.net/multimedia/videos/.*?-(?P<id>\d+)\.html'
|
_VALID_URL = r'https?://(?:www\.)?faz\.net/(?:[^/]+/)*.*?-(?P<id>\d+)\.html'
|
||||||
|
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.faz.net/multimedia/videos/stockholm-chemie-nobelpreis-fuer-drei-amerikanische-forscher-12610585.html',
|
'url': 'http://www.faz.net/multimedia/videos/stockholm-chemie-nobelpreis-fuer-drei-amerikanische-forscher-12610585.html',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '12610585',
|
'id': '12610585',
|
||||||
@@ -16,7 +16,22 @@ class FazIE(InfoExtractor):
|
|||||||
'title': 'Stockholm: Chemie-Nobelpreis für drei amerikanische Forscher',
|
'title': 'Stockholm: Chemie-Nobelpreis für drei amerikanische Forscher',
|
||||||
'description': 'md5:1453fbf9a0d041d985a47306192ea253',
|
'description': 'md5:1453fbf9a0d041d985a47306192ea253',
|
||||||
},
|
},
|
||||||
}
|
}, {
|
||||||
|
'url': 'http://www.faz.net/aktuell/politik/berlin-gabriel-besteht-zerreissprobe-ueber-datenspeicherung-13659345.html',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.faz.net/berlin-gabriel-besteht-zerreissprobe-ueber-datenspeicherung-13659345.html',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.faz.net/-13659345.html',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.faz.net/aktuell/politik/-13659345.html',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.faz.net/foobarblafasel-13659345.html',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|||||||
@@ -1,80 +0,0 @@
|
|||||||
# coding: utf-8
|
|
||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from ..compat import (
|
|
||||||
compat_urllib_parse,
|
|
||||||
compat_urllib_request,
|
|
||||||
)
|
|
||||||
from ..utils import (
|
|
||||||
ExtractorError,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class FiredriveIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?firedrive\.com/' + \
|
|
||||||
'(?:file|embed)/(?P<id>[0-9a-zA-Z]+)'
|
|
||||||
_FILE_DELETED_REGEX = r'<div class="removed_file_image">'
|
|
||||||
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'https://www.firedrive.com/file/FEB892FA160EBD01',
|
|
||||||
'md5': 'd5d4252f80ebeab4dc2d5ceaed1b7970',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'FEB892FA160EBD01',
|
|
||||||
'ext': 'flv',
|
|
||||||
'title': 'bbb_theora_486kbit.flv',
|
|
||||||
'thumbnail': 're:^http://.*\.jpg$',
|
|
||||||
},
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
video_id = self._match_id(url)
|
|
||||||
url = 'http://firedrive.com/file/%s' % video_id
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
|
||||||
|
|
||||||
if re.search(self._FILE_DELETED_REGEX, webpage) is not None:
|
|
||||||
raise ExtractorError('Video %s does not exist' % video_id,
|
|
||||||
expected=True)
|
|
||||||
|
|
||||||
fields = dict(re.findall(r'''(?x)<input\s+
|
|
||||||
type="hidden"\s+
|
|
||||||
name="([^"]+)"\s+
|
|
||||||
value="([^"]*)"
|
|
||||||
''', webpage))
|
|
||||||
|
|
||||||
post = compat_urllib_parse.urlencode(fields)
|
|
||||||
req = compat_urllib_request.Request(url, post)
|
|
||||||
req.add_header('Content-type', 'application/x-www-form-urlencoded')
|
|
||||||
|
|
||||||
# Apparently, this header is required for confirmation to work.
|
|
||||||
req.add_header('Host', 'www.firedrive.com')
|
|
||||||
|
|
||||||
webpage = self._download_webpage(req, video_id,
|
|
||||||
'Downloading video page')
|
|
||||||
|
|
||||||
title = self._search_regex(r'class="external_title_left">(.+)</div>',
|
|
||||||
webpage, 'title')
|
|
||||||
thumbnail = self._search_regex(r'image:\s?"(//[^\"]+)', webpage,
|
|
||||||
'thumbnail', fatal=False)
|
|
||||||
if thumbnail is not None:
|
|
||||||
thumbnail = 'http:' + thumbnail
|
|
||||||
|
|
||||||
ext = self._search_regex(r'type:\s?\'([^\']+)\',',
|
|
||||||
webpage, 'extension', fatal=False)
|
|
||||||
video_url = self._search_regex(
|
|
||||||
r'file:\s?loadURL\(\'(http[^\']+)\'\),', webpage, 'file url')
|
|
||||||
|
|
||||||
formats = [{
|
|
||||||
'format_id': 'sd',
|
|
||||||
'url': video_url,
|
|
||||||
'ext': ext,
|
|
||||||
}]
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'title': title,
|
|
||||||
'thumbnail': thumbnail,
|
|
||||||
'formats': formats,
|
|
||||||
}
|
|
||||||
88
youtube_dl/extractor/fivetv.py
Normal file
88
youtube_dl/extractor/fivetv.py
Normal file
@@ -0,0 +1,88 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import int_or_none
|
||||||
|
|
||||||
|
|
||||||
|
class FiveTVIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'''(?x)
|
||||||
|
http://
|
||||||
|
(?:www\.)?5-tv\.ru/
|
||||||
|
(?:
|
||||||
|
(?:[^/]+/)+(?P<id>\d+)|
|
||||||
|
(?P<path>[^/?#]+)(?:[/?#])?
|
||||||
|
)
|
||||||
|
'''
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://5-tv.ru/news/96814/',
|
||||||
|
'md5': 'bbff554ad415ecf5416a2f48c22d9283',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '96814',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Россияне выбрали имя для общенациональной платежной системы',
|
||||||
|
'description': 'md5:a8aa13e2b7ad36789e9f77a74b6de660',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'duration': 180,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://5-tv.ru/video/1021729/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1021729',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '3D принтер',
|
||||||
|
'description': 'md5:d76c736d29ef7ec5c0cf7d7c65ffcb41',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'duration': 180,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.5-tv.ru/glavnoe/#itemDetails',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'glavnoe',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Итоги недели с 8 по 14 июня 2015 года',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.5-tv.ru/glavnoe/broadcasts/508645/',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://5-tv.ru/films/1507502/',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://5-tv.ru/programs/broadcast/508713/',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://5-tv.ru/angel/',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.5-tv.ru/schedule/?iframe=true&width=900&height=450',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id') or mobj.group('path')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
video_url = self._search_regex(
|
||||||
|
r'<a[^>]+?href="([^"]+)"[^>]+?class="videoplayer"',
|
||||||
|
webpage, 'video url')
|
||||||
|
|
||||||
|
title = self._og_search_title(webpage, default=None) or self._search_regex(
|
||||||
|
r'<title>([^<]+)</title>', webpage, 'title')
|
||||||
|
duration = int_or_none(self._og_search_property(
|
||||||
|
'video:duration', webpage, 'duration', default=None))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'url': video_url,
|
||||||
|
'title': title,
|
||||||
|
'description': self._og_search_description(webpage, default=None),
|
||||||
|
'thumbnail': self._og_search_thumbnail(webpage, default=None),
|
||||||
|
'duration': duration,
|
||||||
|
}
|
||||||
@@ -6,18 +6,15 @@ import re
|
|||||||
import json
|
import json
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import compat_urlparse
|
||||||
compat_urllib_parse_urlparse,
|
|
||||||
compat_urlparse,
|
|
||||||
)
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
clean_html,
|
clean_html,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
float_or_none,
|
|
||||||
parse_duration,
|
parse_duration,
|
||||||
determine_ext,
|
determine_ext,
|
||||||
)
|
)
|
||||||
|
from .dailymotion import DailymotionCloudIE
|
||||||
|
|
||||||
|
|
||||||
class FranceTVBaseInfoExtractor(InfoExtractor):
|
class FranceTVBaseInfoExtractor(InfoExtractor):
|
||||||
@@ -58,12 +55,12 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
|
|||||||
# See https://github.com/rg3/youtube-dl/issues/3963
|
# See https://github.com/rg3/youtube-dl/issues/3963
|
||||||
# m3u8 urls work fine
|
# m3u8 urls work fine
|
||||||
continue
|
continue
|
||||||
video_url_parsed = compat_urllib_parse_urlparse(video_url)
|
|
||||||
f4m_url = self._download_webpage(
|
f4m_url = self._download_webpage(
|
||||||
'http://hdfauth.francetv.fr/esi/urltokengen2.html?url=%s' % video_url_parsed.path,
|
'http://hdfauth.francetv.fr/esi/TA?url=%s' % video_url,
|
||||||
video_id, 'Downloading f4m manifest token', fatal=False)
|
video_id, 'Downloading f4m manifest token', fatal=False)
|
||||||
if f4m_url:
|
if f4m_url:
|
||||||
formats.extend(self._extract_f4m_formats(f4m_url, video_id, 1, format_id))
|
formats.extend(self._extract_f4m_formats(
|
||||||
|
f4m_url + '&hdcore=3.7.0&plugin=aasp-3.7.0.39.44', video_id, 1, format_id))
|
||||||
elif ext == 'm3u8':
|
elif ext == 'm3u8':
|
||||||
formats.extend(self._extract_m3u8_formats(video_url, video_id, 'mp4', m3u8_id=format_id))
|
formats.extend(self._extract_m3u8_formats(video_url, video_id, 'mp4', m3u8_id=format_id))
|
||||||
elif video_url.startswith('rtmp'):
|
elif video_url.startswith('rtmp'):
|
||||||
@@ -86,7 +83,7 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
|
|||||||
'title': info['titre'],
|
'title': info['titre'],
|
||||||
'description': clean_html(info['synopsis']),
|
'description': clean_html(info['synopsis']),
|
||||||
'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', info['image']),
|
'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', info['image']),
|
||||||
'duration': float_or_none(info.get('real_duration'), 1000) or parse_duration(info['duree']),
|
'duration': int_or_none(info.get('real_duration')) or parse_duration(info['duree']),
|
||||||
'timestamp': int_or_none(info['diffusion']['timestamp']),
|
'timestamp': int_or_none(info['diffusion']['timestamp']),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
}
|
}
|
||||||
@@ -131,12 +128,26 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor):
|
|||||||
'skip_download': 'HLS (reqires ffmpeg)'
|
'skip_download': 'HLS (reqires ffmpeg)'
|
||||||
},
|
},
|
||||||
'skip': 'Ce direct est terminé et sera disponible en rattrapage dans quelques minutes.',
|
'skip': 'Ce direct est terminé et sera disponible en rattrapage dans quelques minutes.',
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.francetvinfo.fr/economie/entreprises/les-entreprises-familiales-le-secret-de-la-reussite_933271.html',
|
||||||
|
'md5': 'f485bda6e185e7d15dbc69b72bae993e',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '556e03339473995ee145930c',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Les entreprises familiales : le secret de la réussite',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpe?g$',
|
||||||
|
}
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
page_title = mobj.group('title')
|
page_title = mobj.group('title')
|
||||||
webpage = self._download_webpage(url, page_title)
|
webpage = self._download_webpage(url, page_title)
|
||||||
|
|
||||||
|
dmcloud_url = DailymotionCloudIE._extract_dmcloud_url(webpage)
|
||||||
|
if dmcloud_url:
|
||||||
|
return self.url_result(dmcloud_url, 'DailymotionCloud')
|
||||||
|
|
||||||
video_id, catalogue = self._search_regex(
|
video_id, catalogue = self._search_regex(
|
||||||
r'id-video=([^@]+@[^"]+)', webpage, 'video id').split('@')
|
r'id-video=([^@]+@[^"]+)', webpage, 'video id').split('@')
|
||||||
return self._extract_video(video_id, catalogue)
|
return self._extract_video(video_id, catalogue)
|
||||||
@@ -145,11 +156,21 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor):
|
|||||||
class FranceTVIE(FranceTVBaseInfoExtractor):
|
class FranceTVIE(FranceTVBaseInfoExtractor):
|
||||||
IE_NAME = 'francetv'
|
IE_NAME = 'francetv'
|
||||||
IE_DESC = 'France 2, 3, 4, 5 and Ô'
|
IE_DESC = 'France 2, 3, 4, 5 and Ô'
|
||||||
_VALID_URL = r'''(?x)https?://www\.france[2345o]\.fr/
|
_VALID_URL = r'''(?x)
|
||||||
(?:
|
https?://
|
||||||
emissions/.*?/(videos|emissions)/(?P<id>[^/?]+)
|
(?:
|
||||||
| (emissions?|jt)/(?P<key>[^/?]+)
|
(?:www\.)?france[2345o]\.fr/
|
||||||
)'''
|
(?:
|
||||||
|
emissions/[^/]+/(?:videos|diffusions)|
|
||||||
|
emission/[^/]+|
|
||||||
|
videos|
|
||||||
|
jt
|
||||||
|
)
|
||||||
|
/|
|
||||||
|
embed\.francetv\.fr/\?ue=
|
||||||
|
)
|
||||||
|
(?P<id>[^/?]+)
|
||||||
|
'''
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
# france2
|
# france2
|
||||||
@@ -206,24 +227,46 @@ class FranceTVIE(FranceTVBaseInfoExtractor):
|
|||||||
},
|
},
|
||||||
# franceo
|
# franceo
|
||||||
{
|
{
|
||||||
'url': 'http://www.franceo.fr/jt/info-afrique/04-12-2013',
|
'url': 'http://www.franceo.fr/jt/info-soir/18-07-2015',
|
||||||
'md5': '52f0bfe202848b15915a2f39aaa8981b',
|
'md5': '47d5816d3b24351cdce512ad7ab31da8',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '108634970',
|
'id': '125377621',
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'title': 'Infô Afrique',
|
'title': 'Infô soir',
|
||||||
'description': 'md5:ebf346da789428841bee0fd2a935ea55',
|
'description': 'md5:01b8c6915a3d93d8bbbd692651714309',
|
||||||
'upload_date': '20140915',
|
'upload_date': '20150718',
|
||||||
'timestamp': 1410822000,
|
'timestamp': 1437241200,
|
||||||
|
'duration': 414,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
# francetv embed
|
||||||
|
'url': 'http://embed.francetv.fr/?ue=8d7d3da1e3047c42ade5a5d7dfd3fc87',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'EV_30231',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Alcaline, le concert avec Calogero',
|
||||||
|
'description': 'md5:61f08036dcc8f47e9cfc33aed08ffaff',
|
||||||
|
'upload_date': '20150226',
|
||||||
|
'timestamp': 1424989860,
|
||||||
|
'duration': 5400,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.france4.fr/emission/highlander/diffusion-du-17-07-2015-04h05',
|
||||||
|
'only_matching': True,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.franceo.fr/videos/125377617',
|
||||||
|
'only_matching': True,
|
||||||
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, mobj.group('key') or mobj.group('id'))
|
webpage = self._download_webpage(url, video_id)
|
||||||
video_id, catalogue = self._html_search_regex(
|
video_id, catalogue = self._html_search_regex(
|
||||||
r'href="http://videos\.francetv\.fr/video/([^@]+@[^"]+)"',
|
r'href="http://videos?\.francetv\.fr/video/([^@]+@[^"]+)"',
|
||||||
webpage, 'video ID').split('@')
|
webpage, 'video ID').split('@')
|
||||||
return self._extract_video(video_id, catalogue)
|
return self._extract_video(video_id, catalogue)
|
||||||
|
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ import json
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_urllib_parse,
|
compat_urllib_parse_unquote,
|
||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@@ -75,7 +75,7 @@ class GameSpotIE(InfoExtractor):
|
|||||||
return {
|
return {
|
||||||
'id': data_video['guid'],
|
'id': data_video['guid'],
|
||||||
'display_id': page_id,
|
'display_id': page_id,
|
||||||
'title': compat_urllib_parse.unquote(data_video['title']),
|
'title': compat_urllib_parse_unquote(data_video['title']),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'description': self._html_search_meta('description', webpage),
|
'description': self._html_search_meta('description', webpage),
|
||||||
'thumbnail': self._og_search_thumbnail(webpage),
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
|
|||||||
@@ -8,7 +8,8 @@ import re
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from .youtube import YoutubeIE
|
from .youtube import YoutubeIE
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_urllib_parse,
|
compat_urllib_parse_unquote,
|
||||||
|
compat_urllib_request,
|
||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
compat_xml_parse_error,
|
compat_xml_parse_error,
|
||||||
)
|
)
|
||||||
@@ -32,13 +33,21 @@ from .brightcove import BrightcoveIE
|
|||||||
from .nbc import NBCSportsVPlayerIE
|
from .nbc import NBCSportsVPlayerIE
|
||||||
from .ooyala import OoyalaIE
|
from .ooyala import OoyalaIE
|
||||||
from .rutv import RUTVIE
|
from .rutv import RUTVIE
|
||||||
|
from .tvc import TVCIE
|
||||||
from .sportbox import SportBoxEmbedIE
|
from .sportbox import SportBoxEmbedIE
|
||||||
from .smotri import SmotriIE
|
from .smotri import SmotriIE
|
||||||
|
from .myvi import MyviIE
|
||||||
from .condenast import CondeNastIE
|
from .condenast import CondeNastIE
|
||||||
from .udn import UDNEmbedIE
|
from .udn import UDNEmbedIE
|
||||||
from .senateisvp import SenateISVPIE
|
from .senateisvp import SenateISVPIE
|
||||||
from .bliptv import BlipTVIE
|
from .bliptv import BlipTVIE
|
||||||
from .svt import SVTIE
|
from .svt import SVTIE
|
||||||
|
from .pornhub import PornHubIE
|
||||||
|
from .xhamster import XHamsterEmbedIE
|
||||||
|
from .vimeo import VimeoIE
|
||||||
|
from .dailymotion import DailymotionCloudIE
|
||||||
|
from .onionstudios import OnionStudiosIE
|
||||||
|
from .snagfilms import SnagFilmsEmbedIE
|
||||||
|
|
||||||
|
|
||||||
class GenericIE(InfoExtractor):
|
class GenericIE(InfoExtractor):
|
||||||
@@ -46,6 +55,97 @@ class GenericIE(InfoExtractor):
|
|||||||
_VALID_URL = r'.*'
|
_VALID_URL = r'.*'
|
||||||
IE_NAME = 'generic'
|
IE_NAME = 'generic'
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
|
# Direct link to a video
|
||||||
|
{
|
||||||
|
'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
|
||||||
|
'md5': '67d406c2bcb6af27fa886f31aa934bbe',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'trailer',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'trailer',
|
||||||
|
'upload_date': '20100513',
|
||||||
|
}
|
||||||
|
},
|
||||||
|
# Direct link to media delivered compressed (until Accept-Encoding is *)
|
||||||
|
{
|
||||||
|
'url': 'http://calimero.tk/muzik/FictionJunction-Parallel_Hearts.flac',
|
||||||
|
'md5': '128c42e68b13950268b648275386fc74',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'FictionJunction-Parallel_Hearts',
|
||||||
|
'ext': 'flac',
|
||||||
|
'title': 'FictionJunction-Parallel_Hearts',
|
||||||
|
'upload_date': '20140522',
|
||||||
|
},
|
||||||
|
'expected_warnings': [
|
||||||
|
'URL could be a direct video link, returning it as such.'
|
||||||
|
]
|
||||||
|
},
|
||||||
|
# Direct download with broken HEAD
|
||||||
|
{
|
||||||
|
'url': 'http://ai-radio.org:8000/radio.opus',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'radio',
|
||||||
|
'ext': 'opus',
|
||||||
|
'title': 'radio',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True, # infinite live stream
|
||||||
|
},
|
||||||
|
'expected_warnings': [
|
||||||
|
r'501.*Not Implemented'
|
||||||
|
],
|
||||||
|
},
|
||||||
|
# Direct link with incorrect MIME type
|
||||||
|
{
|
||||||
|
'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
|
||||||
|
'md5': '4ccbebe5f36706d85221f204d7eb5913',
|
||||||
|
'info_dict': {
|
||||||
|
'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
|
||||||
|
'id': '5_Lennart_Poettering_-_Systemd',
|
||||||
|
'ext': 'webm',
|
||||||
|
'title': '5_Lennart_Poettering_-_Systemd',
|
||||||
|
'upload_date': '20141120',
|
||||||
|
},
|
||||||
|
'expected_warnings': [
|
||||||
|
'URL could be a direct video link, returning it as such.'
|
||||||
|
]
|
||||||
|
},
|
||||||
|
# RSS feed
|
||||||
|
{
|
||||||
|
'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
|
||||||
|
'title': 'Zero Punctuation',
|
||||||
|
'description': 're:.*groundbreaking video review series.*'
|
||||||
|
},
|
||||||
|
'playlist_mincount': 11,
|
||||||
|
},
|
||||||
|
# RSS feed with enclosure
|
||||||
|
{
|
||||||
|
'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
|
||||||
|
'ext': 'm4v',
|
||||||
|
'upload_date': '20150228',
|
||||||
|
'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
|
||||||
|
}
|
||||||
|
},
|
||||||
|
# google redirect
|
||||||
|
{
|
||||||
|
'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'cmQHVoWB5FY',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'upload_date': '20130224',
|
||||||
|
'uploader_id': 'TheVerge',
|
||||||
|
'description': 're:^Chris Ziegler takes a look at the\.*',
|
||||||
|
'uploader': 'The Verge',
|
||||||
|
'title': 'First Firefox OS phones side-by-side',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': False,
|
||||||
|
}
|
||||||
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
|
'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
|
||||||
'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
|
'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
|
||||||
@@ -125,17 +225,6 @@ class GenericIE(InfoExtractor):
|
|||||||
'skip_download': True, # m3u8 download
|
'skip_download': True, # m3u8 download
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
# Direct link to a video
|
|
||||||
{
|
|
||||||
'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
|
|
||||||
'md5': '67d406c2bcb6af27fa886f31aa934bbe',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'trailer',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'trailer',
|
|
||||||
'upload_date': '20100513',
|
|
||||||
}
|
|
||||||
},
|
|
||||||
# ooyala video
|
# ooyala video
|
||||||
{
|
{
|
||||||
'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
|
'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
|
||||||
@@ -160,22 +249,6 @@ class GenericIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
'add_ie': ['Ooyala'],
|
'add_ie': ['Ooyala'],
|
||||||
},
|
},
|
||||||
# google redirect
|
|
||||||
{
|
|
||||||
'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'cmQHVoWB5FY',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'upload_date': '20130224',
|
|
||||||
'uploader_id': 'TheVerge',
|
|
||||||
'description': 're:^Chris Ziegler takes a look at the\.*',
|
|
||||||
'uploader': 'The Verge',
|
|
||||||
'title': 'First Firefox OS phones side-by-side',
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'skip_download': False,
|
|
||||||
}
|
|
||||||
},
|
|
||||||
# embed.ly video
|
# embed.ly video
|
||||||
{
|
{
|
||||||
'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
|
'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
|
||||||
@@ -203,14 +276,6 @@ class GenericIE(InfoExtractor):
|
|||||||
'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
|
'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
# BBC iPlayer embeds
|
|
||||||
{
|
|
||||||
'url': 'http://www.bbc.co.uk/blogs/adamcurtis/posts/BUGGER',
|
|
||||||
'info_dict': {
|
|
||||||
'title': 'BBC - Blogs - Adam Curtis - BUGGER',
|
|
||||||
},
|
|
||||||
'playlist_mincount': 18,
|
|
||||||
},
|
|
||||||
# RUTV embed
|
# RUTV embed
|
||||||
{
|
{
|
||||||
'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
|
'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
|
||||||
@@ -225,6 +290,15 @@ class GenericIE(InfoExtractor):
|
|||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
# TVC embed
|
||||||
|
{
|
||||||
|
'url': 'http://sch1298sz.mskobr.ru/dou_edu/karamel_ki/filial_galleries/video/iframe_src_http_tvc_ru_video_iframe_id_55304_isplay_false_acc_video_id_channel_brand_id_11_show_episodes_episode_id_32307_frameb/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '55304',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Дошкольное воспитание',
|
||||||
|
},
|
||||||
|
},
|
||||||
# SportBox embed
|
# SportBox embed
|
||||||
{
|
{
|
||||||
'url': 'http://www.vestifinance.ru/articles/25753',
|
'url': 'http://www.vestifinance.ru/articles/25753',
|
||||||
@@ -256,6 +330,26 @@ class GenericIE(InfoExtractor):
|
|||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
# Myvi.ru embed
|
||||||
|
{
|
||||||
|
'url': 'http://www.kinomyvi.tv/news/detail/Pervij-dublirovannij-trejler--Uzhastikov-_nOw1',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'f4dafcad-ff21-423d-89b5-146cfd89fa1e',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Ужастики, русский трейлер (2015)',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'duration': 153,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
# XHamster embed
|
||||||
|
{
|
||||||
|
'url': 'http://www.numisc.com/forum/showthread.php?11696-FM15-which-pumiscer-was-this-%28-vid-%29-%28-alfa-as-fuck-srx-%29&s=711f5db534502e22260dec8c5e2d66d8',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'showthread',
|
||||||
|
'title': '[NSFL] [FM15] which pumiscer was this ( vid ) ( alfa as fuck srx )',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 7,
|
||||||
|
},
|
||||||
# Embedded TED video
|
# Embedded TED video
|
||||||
{
|
{
|
||||||
'url': 'http://en.support.wordpress.com/videos/ted-talks/',
|
'url': 'http://en.support.wordpress.com/videos/ted-talks/',
|
||||||
@@ -305,6 +399,26 @@ class GenericIE(InfoExtractor):
|
|||||||
'skip_download': 'Requires rtmpdump'
|
'skip_download': 'Requires rtmpdump'
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
# francetv embed
|
||||||
|
{
|
||||||
|
'url': 'http://www.tsprod.com/replay-du-concert-alcaline-de-calogero',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'EV_30231',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Alcaline, le concert avec Calogero',
|
||||||
|
'description': 'md5:61f08036dcc8f47e9cfc33aed08ffaff',
|
||||||
|
'upload_date': '20150226',
|
||||||
|
'timestamp': 1424989860,
|
||||||
|
'duration': 5400,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 downloads
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
'expected_warnings': [
|
||||||
|
'Forbidden'
|
||||||
|
]
|
||||||
|
},
|
||||||
# Condé Nast embed
|
# Condé Nast embed
|
||||||
{
|
{
|
||||||
'url': 'http://www.wired.com/2014/04/honda-asimo/',
|
'url': 'http://www.wired.com/2014/04/honda-asimo/',
|
||||||
@@ -407,16 +521,6 @@ class GenericIE(InfoExtractor):
|
|||||||
'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com',
|
'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com',
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
# RSS feed
|
|
||||||
{
|
|
||||||
'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
|
|
||||||
'title': 'Zero Punctuation',
|
|
||||||
'description': 're:.*groundbreaking video review series.*'
|
|
||||||
},
|
|
||||||
'playlist_mincount': 11,
|
|
||||||
},
|
|
||||||
# Multiple brightcove videos
|
# Multiple brightcove videos
|
||||||
# https://github.com/rg3/youtube-dl/issues/2283
|
# https://github.com/rg3/youtube-dl/issues/2283
|
||||||
{
|
{
|
||||||
@@ -470,21 +574,6 @@ class GenericIE(InfoExtractor):
|
|||||||
'uploader': 'thoughtworks.wistia.com',
|
'uploader': 'thoughtworks.wistia.com',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
# Direct download with broken HEAD
|
|
||||||
{
|
|
||||||
'url': 'http://ai-radio.org:8000/radio.opus',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'radio',
|
|
||||||
'ext': 'opus',
|
|
||||||
'title': 'radio',
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'skip_download': True, # infinite live stream
|
|
||||||
},
|
|
||||||
'expected_warnings': [
|
|
||||||
r'501.*Not Implemented'
|
|
||||||
],
|
|
||||||
},
|
|
||||||
# Soundcloud embed
|
# Soundcloud embed
|
||||||
{
|
{
|
||||||
'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/',
|
'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/',
|
||||||
@@ -516,21 +605,6 @@ class GenericIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
'playlist_mincount': 2,
|
'playlist_mincount': 2,
|
||||||
},
|
},
|
||||||
# Direct link with incorrect MIME type
|
|
||||||
{
|
|
||||||
'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
|
|
||||||
'md5': '4ccbebe5f36706d85221f204d7eb5913',
|
|
||||||
'info_dict': {
|
|
||||||
'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
|
|
||||||
'id': '5_Lennart_Poettering_-_Systemd',
|
|
||||||
'ext': 'webm',
|
|
||||||
'title': '5_Lennart_Poettering_-_Systemd',
|
|
||||||
'upload_date': '20141120',
|
|
||||||
},
|
|
||||||
'expected_warnings': [
|
|
||||||
'URL could be a direct video link, returning it as such.'
|
|
||||||
]
|
|
||||||
},
|
|
||||||
# Cinchcast embed
|
# Cinchcast embed
|
||||||
{
|
{
|
||||||
'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
|
'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
|
||||||
@@ -618,6 +692,18 @@ class GenericIE(InfoExtractor):
|
|||||||
'title': 'John Carlson Postgame 2/25/15',
|
'title': 'John Carlson Postgame 2/25/15',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
# Kaltura embed (different embed code)
|
||||||
|
{
|
||||||
|
'url': 'http://www.premierchristianradio.com/Shows/Saturday/Unbelievable/Conference-Videos/Os-Guinness-Is-It-Fools-Talk-Unbelievable-Conference-2014',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1_a52wc67y',
|
||||||
|
'ext': 'flv',
|
||||||
|
'upload_date': '20150127',
|
||||||
|
'uploader_id': 'PremierMedia',
|
||||||
|
'timestamp': int,
|
||||||
|
'title': 'Os Guinness // Is It Fools Talk? // Unbelievable? Conference 2014',
|
||||||
|
},
|
||||||
|
},
|
||||||
# Eagle.Platform embed (generic URL)
|
# Eagle.Platform embed (generic URL)
|
||||||
{
|
{
|
||||||
'url': 'http://lenta.ru/news/2015/03/06/navalny/',
|
'url': 'http://lenta.ru/news/2015/03/06/navalny/',
|
||||||
@@ -689,16 +775,6 @@ class GenericIE(InfoExtractor):
|
|||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
# RSS feed with enclosure
|
|
||||||
{
|
|
||||||
'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
|
|
||||||
'ext': 'm4v',
|
|
||||||
'upload_date': '20150228',
|
|
||||||
'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
|
|
||||||
}
|
|
||||||
},
|
|
||||||
# Crooks and Liars embed
|
# Crooks and Liars embed
|
||||||
{
|
{
|
||||||
'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
|
'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
|
||||||
@@ -773,6 +849,62 @@ class GenericIE(InfoExtractor):
|
|||||||
# rtmpe downloads
|
# rtmpe downloads
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
# Brightcove URL in single quotes
|
||||||
|
{
|
||||||
|
'url': 'http://www.sportsnet.ca/baseball/mlb/sn-presents-russell-martin-world-citizen/',
|
||||||
|
'md5': '4ae374f1f8b91c889c4b9203c8c752af',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '4255764656001',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'SN Presents: Russell Martin, World Citizen',
|
||||||
|
'description': 'To understand why he was the Toronto Blue Jays’ top off-season priority is to appreciate his background and upbringing in Montreal, where he first developed his baseball skills. Written and narrated by Stephen Brunt.',
|
||||||
|
'uploader': 'Rogers Sportsnet',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
# Dailymotion Cloud video
|
||||||
|
{
|
||||||
|
'url': 'http://replay.publicsenat.fr/vod/le-debat/florent-kolandjian,dominique-cena,axel-decourtye,laurence-abeille,bruno-parmentier/175910',
|
||||||
|
'md5': '49444254273501a64675a7e68c502681',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '5585de919473990de4bee11b',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Le débat',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpe?g$',
|
||||||
|
}
|
||||||
|
},
|
||||||
|
# OnionStudios embed
|
||||||
|
{
|
||||||
|
'url': 'http://www.clickhole.com/video/dont-understand-bitcoin-man-will-mumble-explanatio-2537',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2855',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Don’t Understand Bitcoin? This Man Will Mumble An Explanation At You',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpe?g$',
|
||||||
|
'uploader': 'ClickHole',
|
||||||
|
'uploader_id': 'clickhole',
|
||||||
|
}
|
||||||
|
},
|
||||||
|
# SnagFilms embed
|
||||||
|
{
|
||||||
|
'url': 'http://whilewewatch.blogspot.ru/2012/06/whilewewatch-whilewewatch-gripping.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '74849a00-85a9-11e1-9660-123139220831',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '#whilewewatch',
|
||||||
|
}
|
||||||
|
},
|
||||||
|
# AdobeTVVideo embed
|
||||||
|
{
|
||||||
|
'url': 'https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners',
|
||||||
|
'md5': '43662b577c018ad707a63766462b1e87',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2456',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'New experience with Acrobat DC',
|
||||||
|
'description': 'New experience with Acrobat DC',
|
||||||
|
'duration': 248.667,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
@@ -894,7 +1026,7 @@ class GenericIE(InfoExtractor):
|
|||||||
force_videoid = smuggled_data['force_videoid']
|
force_videoid = smuggled_data['force_videoid']
|
||||||
video_id = force_videoid
|
video_id = force_videoid
|
||||||
else:
|
else:
|
||||||
video_id = os.path.splitext(url.rstrip('/').split('/')[-1])[0]
|
video_id = compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])
|
||||||
|
|
||||||
self.to_screen('%s: Requesting header' % video_id)
|
self.to_screen('%s: Requesting header' % video_id)
|
||||||
|
|
||||||
@@ -916,7 +1048,9 @@ class GenericIE(InfoExtractor):
|
|||||||
|
|
||||||
full_response = None
|
full_response = None
|
||||||
if head_response is False:
|
if head_response is False:
|
||||||
full_response = self._request_webpage(url, video_id)
|
request = compat_urllib_request.Request(url)
|
||||||
|
request.add_header('Accept-Encoding', '*')
|
||||||
|
full_response = self._request_webpage(request, video_id)
|
||||||
head_response = full_response
|
head_response = full_response
|
||||||
|
|
||||||
# Check for direct link to a video
|
# Check for direct link to a video
|
||||||
@@ -927,7 +1061,7 @@ class GenericIE(InfoExtractor):
|
|||||||
head_response.headers.get('Last-Modified'))
|
head_response.headers.get('Last-Modified'))
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': os.path.splitext(url_basename(url))[0],
|
'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
|
||||||
'direct': True,
|
'direct': True,
|
||||||
'formats': [{
|
'formats': [{
|
||||||
'format_id': m.group('format_id'),
|
'format_id': m.group('format_id'),
|
||||||
@@ -938,10 +1072,22 @@ class GenericIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
if not self._downloader.params.get('test', False) and not is_intentional:
|
if not self._downloader.params.get('test', False) and not is_intentional:
|
||||||
self._downloader.report_warning('Falling back on generic information extractor.')
|
force = self._downloader.params.get('force_generic_extractor', False)
|
||||||
|
self._downloader.report_warning(
|
||||||
|
'%s on generic information extractor.' % ('Forcing' if force else 'Falling back'))
|
||||||
|
|
||||||
if not full_response:
|
if not full_response:
|
||||||
full_response = self._request_webpage(url, video_id)
|
request = compat_urllib_request.Request(url)
|
||||||
|
# Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
|
||||||
|
# making it impossible to download only chunk of the file (yet we need only 512kB to
|
||||||
|
# test whether it's HTML or not). According to youtube-dl default Accept-Encoding
|
||||||
|
# that will always result in downloading the whole file that is not desirable.
|
||||||
|
# Therefore for extraction pass we have to override Accept-Encoding to any in order
|
||||||
|
# to accept raw bytes and being able to download only a chunk.
|
||||||
|
# It may probably better to solve this by checking Content-Type for application/octet-stream
|
||||||
|
# after HEAD request finishes, but not sure if we can rely on this.
|
||||||
|
request.add_header('Accept-Encoding', '*')
|
||||||
|
full_response = self._request_webpage(request, video_id)
|
||||||
|
|
||||||
# Maybe it's a direct link to a video?
|
# Maybe it's a direct link to a video?
|
||||||
# Be careful not to download the whole thing!
|
# Be careful not to download the whole thing!
|
||||||
@@ -953,7 +1099,7 @@ class GenericIE(InfoExtractor):
|
|||||||
head_response.headers.get('Last-Modified'))
|
head_response.headers.get('Last-Modified'))
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': os.path.splitext(url_basename(url))[0],
|
'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
|
||||||
'direct': True,
|
'direct': True,
|
||||||
'url': url,
|
'url': url,
|
||||||
'upload_date': upload_date,
|
'upload_date': upload_date,
|
||||||
@@ -980,7 +1126,7 @@ class GenericIE(InfoExtractor):
|
|||||||
# Sometimes embedded video player is hidden behind percent encoding
|
# Sometimes embedded video player is hidden behind percent encoding
|
||||||
# (e.g. https://github.com/rg3/youtube-dl/issues/2448)
|
# (e.g. https://github.com/rg3/youtube-dl/issues/2448)
|
||||||
# Unescaping the whole page allows to handle those cases in a generic way
|
# Unescaping the whole page allows to handle those cases in a generic way
|
||||||
webpage = compat_urllib_parse.unquote(webpage)
|
webpage = compat_urllib_parse_unquote(webpage)
|
||||||
|
|
||||||
# it's tempting to parse this further, but you would
|
# it's tempting to parse this further, but you would
|
||||||
# have to take into account all the variations like
|
# have to take into account all the variations like
|
||||||
@@ -1033,23 +1179,20 @@ class GenericIE(InfoExtractor):
|
|||||||
|
|
||||||
# Look for embedded rtl.nl player
|
# Look for embedded rtl.nl player
|
||||||
matches = re.findall(
|
matches = re.findall(
|
||||||
r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+video_embed[^"]+)"',
|
r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',
|
||||||
webpage)
|
webpage)
|
||||||
if matches:
|
if matches:
|
||||||
return _playlist_from_matches(matches, ie='RtlNl')
|
return _playlist_from_matches(matches, ie='RtlNl')
|
||||||
|
|
||||||
# Look for embedded (iframe) Vimeo player
|
vimeo_url = VimeoIE._extract_vimeo_url(url, webpage)
|
||||||
mobj = re.search(
|
if vimeo_url is not None:
|
||||||
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage)
|
return self.url_result(vimeo_url)
|
||||||
if mobj:
|
|
||||||
player_url = unescapeHTML(mobj.group('url'))
|
vid_me_embed_url = self._search_regex(
|
||||||
surl = smuggle_url(player_url, {'Referer': url})
|
r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]',
|
||||||
return self.url_result(surl)
|
webpage, 'vid.me embed', default=None)
|
||||||
# Look for embedded (swf embed) Vimeo player
|
if vid_me_embed_url is not None:
|
||||||
mobj = re.search(
|
return self.url_result(vid_me_embed_url, 'Vidme')
|
||||||
r'<embed[^>]+?src="((?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage)
|
|
||||||
if mobj:
|
|
||||||
return self.url_result(mobj.group(1))
|
|
||||||
|
|
||||||
# Look for embedded YouTube player
|
# Look for embedded YouTube player
|
||||||
matches = re.findall(r'''(?x)
|
matches = re.findall(r'''(?x)
|
||||||
@@ -1243,7 +1386,7 @@ class GenericIE(InfoExtractor):
|
|||||||
return self.url_result(mobj.group('url'))
|
return self.url_result(mobj.group('url'))
|
||||||
mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
|
mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
|
||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
return self.url_result(compat_urllib_parse.unquote(mobj.group('url')))
|
return self.url_result(compat_urllib_parse_unquote(mobj.group('url')))
|
||||||
|
|
||||||
# Look for funnyordie embed
|
# Look for funnyordie embed
|
||||||
matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
|
matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
|
||||||
@@ -1261,11 +1404,32 @@ class GenericIE(InfoExtractor):
|
|||||||
if rutv_url:
|
if rutv_url:
|
||||||
return self.url_result(rutv_url, 'RUTV')
|
return self.url_result(rutv_url, 'RUTV')
|
||||||
|
|
||||||
|
# Look for embedded TVC player
|
||||||
|
tvc_url = TVCIE._extract_url(webpage)
|
||||||
|
if tvc_url:
|
||||||
|
return self.url_result(tvc_url, 'TVC')
|
||||||
|
|
||||||
# Look for embedded SportBox player
|
# Look for embedded SportBox player
|
||||||
sportbox_urls = SportBoxEmbedIE._extract_urls(webpage)
|
sportbox_urls = SportBoxEmbedIE._extract_urls(webpage)
|
||||||
if sportbox_urls:
|
if sportbox_urls:
|
||||||
return _playlist_from_matches(sportbox_urls, ie='SportBoxEmbed')
|
return _playlist_from_matches(sportbox_urls, ie='SportBoxEmbed')
|
||||||
|
|
||||||
|
# Look for embedded PornHub player
|
||||||
|
pornhub_url = PornHubIE._extract_url(webpage)
|
||||||
|
if pornhub_url:
|
||||||
|
return self.url_result(pornhub_url, 'PornHub')
|
||||||
|
|
||||||
|
# Look for embedded XHamster player
|
||||||
|
xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
|
||||||
|
if xhamster_urls:
|
||||||
|
return _playlist_from_matches(xhamster_urls, ie='XHamsterEmbed')
|
||||||
|
|
||||||
|
# Look for embedded Tvigle player
|
||||||
|
mobj = re.search(
|
||||||
|
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
|
||||||
|
if mobj is not None:
|
||||||
|
return self.url_result(mobj.group('url'), 'Tvigle')
|
||||||
|
|
||||||
# Look for embedded TED player
|
# Look for embedded TED player
|
||||||
mobj = re.search(
|
mobj = re.search(
|
||||||
r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)
|
r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)
|
||||||
@@ -1285,11 +1449,23 @@ class GenericIE(InfoExtractor):
|
|||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
return self.url_result(mobj.group('url'), 'ArteTVEmbed')
|
return self.url_result(mobj.group('url'), 'ArteTVEmbed')
|
||||||
|
|
||||||
|
# Look for embedded francetv player
|
||||||
|
mobj = re.search(
|
||||||
|
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?://)?embed\.francetv\.fr/\?ue=.+?)\1',
|
||||||
|
webpage)
|
||||||
|
if mobj is not None:
|
||||||
|
return self.url_result(mobj.group('url'))
|
||||||
|
|
||||||
# Look for embedded smotri.com player
|
# Look for embedded smotri.com player
|
||||||
smotri_url = SmotriIE._extract_url(webpage)
|
smotri_url = SmotriIE._extract_url(webpage)
|
||||||
if smotri_url:
|
if smotri_url:
|
||||||
return self.url_result(smotri_url, 'Smotri')
|
return self.url_result(smotri_url, 'Smotri')
|
||||||
|
|
||||||
|
# Look for embedded Myvi.ru player
|
||||||
|
myvi_url = MyviIE._extract_url(webpage)
|
||||||
|
if myvi_url:
|
||||||
|
return self.url_result(myvi_url)
|
||||||
|
|
||||||
# Look for embeded soundcloud player
|
# Look for embeded soundcloud player
|
||||||
mobj = re.search(
|
mobj = re.search(
|
||||||
r'<iframe\s+(?:[a-zA-Z0-9_-]+="[^"]+"\s+)*src="(?P<url>https?://(?:w\.)?soundcloud\.com/player[^"]+)"',
|
r'<iframe\s+(?:[a-zA-Z0-9_-]+="[^"]+"\s+)*src="(?P<url>https?://(?:w\.)?soundcloud\.com/player[^"]+)"',
|
||||||
@@ -1369,8 +1545,8 @@ class GenericIE(InfoExtractor):
|
|||||||
return self.url_result(mobj.group('url'), 'Zapiks')
|
return self.url_result(mobj.group('url'), 'Zapiks')
|
||||||
|
|
||||||
# Look for Kaltura embeds
|
# Look for Kaltura embeds
|
||||||
mobj = re.search(
|
mobj = (re.search(r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_id'\s*:\s*'(?P<id>[^']+)',", webpage) or
|
||||||
r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_id'\s*:\s*'(?P<id>[^']+)',", webpage)
|
re.search(r'(?s)(["\'])(?:https?:)?//cdnapisec\.kaltura\.com/.*?(?:p|partner_id)/(?P<partner_id>\d+).*?\1.*?entry_id\s*:\s*(["\'])(?P<id>[^\2]+?)\2', webpage))
|
||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
return self.url_result('kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(), 'Kaltura')
|
return self.url_result('kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(), 'Kaltura')
|
||||||
|
|
||||||
@@ -1427,6 +1603,30 @@ class GenericIE(InfoExtractor):
|
|||||||
if senate_isvp_url:
|
if senate_isvp_url:
|
||||||
return self.url_result(senate_isvp_url, 'SenateISVP')
|
return self.url_result(senate_isvp_url, 'SenateISVP')
|
||||||
|
|
||||||
|
# Look for Dailymotion Cloud videos
|
||||||
|
dmcloud_url = DailymotionCloudIE._extract_dmcloud_url(webpage)
|
||||||
|
if dmcloud_url:
|
||||||
|
return self.url_result(dmcloud_url, 'DailymotionCloud')
|
||||||
|
|
||||||
|
# Look for OnionStudios embeds
|
||||||
|
onionstudios_url = OnionStudiosIE._extract_url(webpage)
|
||||||
|
if onionstudios_url:
|
||||||
|
return self.url_result(onionstudios_url)
|
||||||
|
|
||||||
|
# Look for SnagFilms embeds
|
||||||
|
snagfilms_url = SnagFilmsEmbedIE._extract_url(webpage)
|
||||||
|
if snagfilms_url:
|
||||||
|
return self.url_result(snagfilms_url)
|
||||||
|
|
||||||
|
# Look for AdobeTVVideo embeds
|
||||||
|
mobj = re.search(
|
||||||
|
r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]',
|
||||||
|
webpage)
|
||||||
|
if mobj is not None:
|
||||||
|
return self.url_result(
|
||||||
|
self._proto_relative_url(unescapeHTML(mobj.group(1))),
|
||||||
|
'AdobeTVVideo')
|
||||||
|
|
||||||
def check_video(vurl):
|
def check_video(vurl):
|
||||||
if YoutubeIE.suitable(vurl):
|
if YoutubeIE.suitable(vurl):
|
||||||
return True
|
return True
|
||||||
@@ -1494,7 +1694,7 @@ class GenericIE(InfoExtractor):
|
|||||||
if refresh_header:
|
if refresh_header:
|
||||||
found = re.search(REDIRECT_REGEX, refresh_header)
|
found = re.search(REDIRECT_REGEX, refresh_header)
|
||||||
if found:
|
if found:
|
||||||
new_url = compat_urlparse.urljoin(url, found.group(1))
|
new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1)))
|
||||||
self.report_following_redirect(new_url)
|
self.report_following_redirect(new_url)
|
||||||
return {
|
return {
|
||||||
'_type': 'url',
|
'_type': 'url',
|
||||||
@@ -1506,7 +1706,7 @@ class GenericIE(InfoExtractor):
|
|||||||
entries = []
|
entries = []
|
||||||
for video_url in found:
|
for video_url in found:
|
||||||
video_url = compat_urlparse.urljoin(url, video_url)
|
video_url = compat_urlparse.urljoin(url, video_url)
|
||||||
video_id = compat_urllib_parse.unquote(os.path.basename(video_url))
|
video_id = compat_urllib_parse_unquote(os.path.basename(video_url))
|
||||||
|
|
||||||
# Sometimes, jwplayer extraction will result in a YouTube URL
|
# Sometimes, jwplayer extraction will result in a YouTube URL
|
||||||
if YoutubeIE.suitable(video_url):
|
if YoutubeIE.suitable(video_url):
|
||||||
|
|||||||
@@ -6,12 +6,13 @@ from ..utils import (
|
|||||||
int_or_none,
|
int_or_none,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
qualities,
|
qualities,
|
||||||
|
ExtractorError,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class GfycatIE(InfoExtractor):
|
class GfycatIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?gfycat\.com/(?P<id>[^/?#]+)'
|
_VALID_URL = r'https?://(?:www\.)?gfycat\.com/(?:ifr/)?(?P<id>[^/?#]+)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://gfycat.com/DeadlyDecisiveGermanpinscher',
|
'url': 'http://gfycat.com/DeadlyDecisiveGermanpinscher',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'DeadlyDecisiveGermanpinscher',
|
'id': 'DeadlyDecisiveGermanpinscher',
|
||||||
@@ -27,14 +28,33 @@ class GfycatIE(InfoExtractor):
|
|||||||
'categories': list,
|
'categories': list,
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
}
|
}
|
||||||
}
|
}, {
|
||||||
|
'url': 'http://gfycat.com/ifr/JauntyTimelyAmazontreeboa',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'JauntyTimelyAmazontreeboa',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'JauntyTimelyAmazontreeboa',
|
||||||
|
'timestamp': 1411720126,
|
||||||
|
'upload_date': '20140926',
|
||||||
|
'uploader': 'anonymous',
|
||||||
|
'duration': 3.52,
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'dislike_count': int,
|
||||||
|
'categories': list,
|
||||||
|
'age_limit': 0,
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
gfy = self._download_json(
|
gfy = self._download_json(
|
||||||
'http://gfycat.com/cajax/get/%s' % video_id,
|
'http://gfycat.com/cajax/get/%s' % video_id,
|
||||||
video_id, 'Downloading video info')['gfyItem']
|
video_id, 'Downloading video info')
|
||||||
|
if 'error' in gfy:
|
||||||
|
raise ExtractorError('Gfycat said: ' + gfy['error'], expected=True)
|
||||||
|
gfy = gfy['gfyItem']
|
||||||
|
|
||||||
title = gfy.get('title') or gfy['gfyName']
|
title = gfy.get('title') or gfy['gfyName']
|
||||||
description = gfy.get('description')
|
description = gfy.get('description')
|
||||||
|
|||||||
@@ -78,12 +78,7 @@ class GorillaVidIE(InfoExtractor):
|
|||||||
if re.search(self._FILE_NOT_FOUND_REGEX, webpage) is not None:
|
if re.search(self._FILE_NOT_FOUND_REGEX, webpage) is not None:
|
||||||
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
|
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
|
||||||
|
|
||||||
fields = dict(re.findall(r'''(?x)<input\s+
|
fields = self._hidden_inputs(webpage)
|
||||||
type="hidden"\s+
|
|
||||||
name="([^"]+)"\s+
|
|
||||||
(?:id="[^"]+"\s+)?
|
|
||||||
value="([^"]*)"
|
|
||||||
''', webpage))
|
|
||||||
|
|
||||||
if fields['op'] == 'download1':
|
if fields['op'] == 'download1':
|
||||||
countdown = int_or_none(self._search_regex(
|
countdown = int_or_none(self._search_regex(
|
||||||
|
|||||||
@@ -1,7 +1,5 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
@@ -19,20 +17,19 @@ class HentaiStigmaIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group('id')
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
title = self._html_search_regex(
|
title = self._html_search_regex(
|
||||||
r'<h2 class="posttitle"><a[^>]*>([^<]+)</a>',
|
r'<h2[^>]+class="posttitle"[^>]*><a[^>]*>([^<]+)</a>',
|
||||||
webpage, 'title')
|
webpage, 'title')
|
||||||
wrap_url = self._html_search_regex(
|
wrap_url = self._html_search_regex(
|
||||||
r'<iframe src="([^"]+mp4)"', webpage, 'wrapper url')
|
r'<iframe[^>]+src="([^"]+mp4)"', webpage, 'wrapper url')
|
||||||
wrap_webpage = self._download_webpage(wrap_url, video_id)
|
wrap_webpage = self._download_webpage(wrap_url, video_id)
|
||||||
|
|
||||||
video_url = self._html_search_regex(
|
video_url = self._html_search_regex(
|
||||||
r'clip:\s*{\s*url: "([^"]*)"', wrap_webpage, 'video url')
|
r'file\s*:\s*"([^"]+)"', wrap_webpage, 'video url')
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
|||||||
@@ -58,11 +58,7 @@ class HostingBulkIE(InfoExtractor):
|
|||||||
r'<img src="([^"]+)".+?class="pic"',
|
r'<img src="([^"]+)".+?class="pic"',
|
||||||
webpage, 'thumbnail', fatal=False)
|
webpage, 'thumbnail', fatal=False)
|
||||||
|
|
||||||
fields = dict(re.findall(r'''(?x)<input\s+
|
fields = self._hidden_inputs(webpage)
|
||||||
type="hidden"\s+
|
|
||||||
name="([^"]+)"\s+
|
|
||||||
value="([^"]*)"
|
|
||||||
''', webpage))
|
|
||||||
|
|
||||||
request = compat_urllib_request.Request(url, urlencode_postdata(fields))
|
request = compat_urllib_request.Request(url, urlencode_postdata(fields))
|
||||||
request.add_header('Content-type', 'application/x-www-form-urlencoded')
|
request.add_header('Content-type', 'application/x-www-form-urlencoded')
|
||||||
|
|||||||
@@ -1,8 +1,7 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..utils import parse_iso8601
|
||||||
|
|
||||||
|
|
||||||
class HowcastIE(InfoExtractor):
|
class HowcastIE(InfoExtractor):
|
||||||
@@ -13,29 +12,31 @@ class HowcastIE(InfoExtractor):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '390161',
|
'id': '390161',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'description': 'The square knot, also known as the reef knot, is one of the oldest, most basic knots to tie, and can be used in many different ways. Here\'s the proper way to tie a square knot.',
|
|
||||||
'title': 'How to Tie a Square Knot Properly',
|
'title': 'How to Tie a Square Knot Properly',
|
||||||
}
|
'description': 'md5:dbe792e5f6f1489027027bf2eba188a3',
|
||||||
|
'timestamp': 1276081287,
|
||||||
|
'upload_date': '20100609',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
video_id = mobj.group('id')
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
self.report_extraction(video_id)
|
embed_code = self._search_regex(
|
||||||
|
r'<iframe[^>]+src="[^"]+\bembed_code=([^\b]+)\b',
|
||||||
video_url = self._search_regex(r'\'?file\'?: "(http://mobile-media\.howcast\.com/[0-9]+\.mp4)',
|
webpage, 'ooyala embed code')
|
||||||
webpage, 'video URL')
|
|
||||||
|
|
||||||
video_description = self._html_search_regex(r'<meta content=(?:"([^"]+)"|\'([^\']+)\') name=\'description\'',
|
|
||||||
webpage, 'description', fatal=False)
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'ie_key': 'Ooyala',
|
||||||
|
'url': 'ooyala:%s' % embed_code,
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': video_url,
|
'timestamp': parse_iso8601(self._html_search_meta(
|
||||||
'title': self._og_search_title(webpage),
|
'article:published_time', webpage, 'timestamp')),
|
||||||
'description': video_description,
|
|
||||||
'thumbnail': self._og_search_thumbnail(webpage),
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class HowStuffWorksIE(InfoExtractor):
|
class HowStuffWorksIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://[\da-z-]+\.howstuffworks\.com/(?:[^/]+/)*\d+-(?P<id>.+?)-video\.htm'
|
_VALID_URL = r'https?://[\da-z-]+\.howstuffworks\.com/(?:[^/]+/)*(?:\d+-)?(?P<id>.+?)-video\.htm'
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'http://adventure.howstuffworks.com/5266-cool-jobs-iditarod-musher-video.htm',
|
'url': 'http://adventure.howstuffworks.com/5266-cool-jobs-iditarod-musher-video.htm',
|
||||||
@@ -46,6 +46,10 @@ class HowStuffWorksIE(InfoExtractor):
|
|||||||
'thumbnail': 're:^https?://.*\.jpg$',
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://shows.howstuffworks.com/stuff-to-blow-your-mind/optical-illusions-video.htm',
|
||||||
|
'only_matching': True,
|
||||||
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
|||||||
@@ -46,7 +46,7 @@ class ImdbIE(InfoExtractor):
|
|||||||
format_info = info['videoPlayerObject']['video']
|
format_info = info['videoPlayerObject']['video']
|
||||||
formats.append({
|
formats.append({
|
||||||
'format_id': f_id,
|
'format_id': f_id,
|
||||||
'url': format_info['url'],
|
'url': format_info['videoInfoList'][0]['videoUrl'],
|
||||||
})
|
})
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_urlparse
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
js_to_json,
|
js_to_json,
|
||||||
@@ -12,7 +13,7 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class ImgurIE(InfoExtractor):
|
class ImgurIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?P<id>[a-zA-Z0-9]+)(?:\.mp4|\.gifv)?'
|
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?P<id>[a-zA-Z0-9]+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://i.imgur.com/A61SaA1.gifv',
|
'url': 'https://i.imgur.com/A61SaA1.gifv',
|
||||||
@@ -34,7 +35,8 @@ class ImgurIE(InfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(
|
||||||
|
compat_urlparse.urljoin(url, video_id), video_id)
|
||||||
|
|
||||||
width = int_or_none(self._search_regex(
|
width = int_or_none(self._search_regex(
|
||||||
r'<param name="width" value="([0-9]+)"',
|
r'<param name="width" value="([0-9]+)"',
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ from .common import InfoExtractor
|
|||||||
|
|
||||||
|
|
||||||
class InaIE(InfoExtractor):
|
class InaIE(InfoExtractor):
|
||||||
_VALID_URL = r'http://(?:www\.)?ina\.fr/video/(?P<id>I?[A-Z0-9]+)'
|
_VALID_URL = r'https?://(?:www\.)?ina\.fr/video/(?P<id>I?[A-Z0-9]+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.ina.fr/video/I12055569/francois-hollande-je-crois-que-c-est-clair-video.html',
|
'url': 'http://www.ina.fr/video/I12055569/francois-hollande-je-crois-que-c-est-clair-video.html',
|
||||||
'md5': 'a667021bf2b41f8dc6049479d9bb38a3',
|
'md5': 'a667021bf2b41f8dc6049479d9bb38a3',
|
||||||
|
|||||||
@@ -4,14 +4,15 @@ import base64
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_urllib_parse,
|
compat_urllib_parse_unquote,
|
||||||
|
compat_urlparse,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class InfoQIE(InfoExtractor):
|
class InfoQIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?infoq\.com/[^/]+/(?P<id>[^/]+)$'
|
_VALID_URL = r'https?://(?:www\.)?infoq\.com/(?:[^/]+/)+(?P<id>[^/]+)'
|
||||||
|
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.infoq.com/presentations/A-Few-of-My-Favorite-Python-Things',
|
'url': 'http://www.infoq.com/presentations/A-Few-of-My-Favorite-Python-Things',
|
||||||
'md5': 'b5ca0e0a8c1fed93b0e65e48e462f9a2',
|
'md5': 'b5ca0e0a8c1fed93b0e65e48e462f9a2',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@@ -20,7 +21,10 @@ class InfoQIE(InfoExtractor):
|
|||||||
'description': 'Mike Pirnat presents some tips and tricks, standard libraries and third party packages that make programming in Python a richer experience.',
|
'description': 'Mike Pirnat presents some tips and tricks, standard libraries and third party packages that make programming in Python a richer experience.',
|
||||||
'title': 'A Few of My Favorite [Python] Things',
|
'title': 'A Few of My Favorite [Python] Things',
|
||||||
},
|
},
|
||||||
}
|
}, {
|
||||||
|
'url': 'http://www.infoq.com/fr/presentations/changez-avis-sur-javascript',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
@@ -35,14 +39,14 @@ class InfoQIE(InfoExtractor):
|
|||||||
# Extract video URL
|
# Extract video URL
|
||||||
encoded_id = self._search_regex(
|
encoded_id = self._search_regex(
|
||||||
r"jsclassref\s*=\s*'([^']*)'", webpage, 'encoded id')
|
r"jsclassref\s*=\s*'([^']*)'", webpage, 'encoded id')
|
||||||
real_id = compat_urllib_parse.unquote(base64.b64decode(encoded_id.encode('ascii')).decode('utf-8'))
|
real_id = compat_urllib_parse_unquote(base64.b64decode(encoded_id.encode('ascii')).decode('utf-8'))
|
||||||
playpath = 'mp4:' + real_id
|
playpath = 'mp4:' + real_id
|
||||||
|
|
||||||
video_filename = playpath.split('/')[-1]
|
video_filename = playpath.split('/')[-1]
|
||||||
video_id, extension = video_filename.split('.')
|
video_id, extension = video_filename.split('.')
|
||||||
|
|
||||||
http_base = self._search_regex(
|
http_base = self._search_regex(
|
||||||
r'EXPRESSINSTALL_SWF\s*=\s*"(https?://[^/"]+/)', webpage,
|
r'EXPRESSINSTALL_SWF\s*=\s*[^"]*"((?:https?:)?//[^/"]+/)', webpage,
|
||||||
'HTTP base URL')
|
'HTTP base URL')
|
||||||
|
|
||||||
formats = [{
|
formats = [{
|
||||||
@@ -52,7 +56,7 @@ class InfoQIE(InfoExtractor):
|
|||||||
'play_path': playpath,
|
'play_path': playpath,
|
||||||
}, {
|
}, {
|
||||||
'format_id': 'http',
|
'format_id': 'http',
|
||||||
'url': http_base + real_id,
|
'url': compat_urlparse.urljoin(url, http_base) + real_id,
|
||||||
}]
|
}]
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
|||||||
@@ -3,7 +3,10 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import int_or_none
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
limit_length,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class InstagramIE(InfoExtractor):
|
class InstagramIE(InfoExtractor):
|
||||||
@@ -100,11 +103,13 @@ class InstagramUserIE(InfoExtractor):
|
|||||||
thumbnails_el = it.get('images', {})
|
thumbnails_el = it.get('images', {})
|
||||||
thumbnail = thumbnails_el.get('thumbnail', {}).get('url')
|
thumbnail = thumbnails_el.get('thumbnail', {}).get('url')
|
||||||
|
|
||||||
title = it.get('caption', {}).get('text', it['id'])
|
# In some cases caption is null, which corresponds to None
|
||||||
|
# in python. As a result, it.get('caption', {}) gives None
|
||||||
|
title = (it.get('caption') or {}).get('text', it['id'])
|
||||||
|
|
||||||
entries.append({
|
entries.append({
|
||||||
'id': it['id'],
|
'id': it['id'],
|
||||||
'title': title,
|
'title': limit_length(title, 80),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
'webpage_url': it.get('link'),
|
'webpage_url': it.get('link'),
|
||||||
|
|||||||
@@ -11,11 +11,12 @@ from ..compat import (
|
|||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
remove_end,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class IPrimaIE(InfoExtractor):
|
class IPrimaIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://play\.iprima\.cz/[^?#]+/(?P<id>[^?#]+)'
|
_VALID_URL = r'https?://play\.iprima\.cz/(?:[^/]+/)*(?P<id>[^?#]+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://play.iprima.cz/particka/particka-92',
|
'url': 'http://play.iprima.cz/particka/particka-92',
|
||||||
@@ -23,7 +24,7 @@ class IPrimaIE(InfoExtractor):
|
|||||||
'id': '39152',
|
'id': '39152',
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'title': 'Partička (92)',
|
'title': 'Partička (92)',
|
||||||
'description': 'md5:3740fda51464da35a2d4d0670b8e4fd6',
|
'description': 'md5:74e9617e51bca67c3ecfb2c6f9766f45',
|
||||||
'thumbnail': 'http://play.iprima.cz/sites/default/files/image_crops/image_620x349/3/491483_particka-92_image_620x349.jpg',
|
'thumbnail': 'http://play.iprima.cz/sites/default/files/image_crops/image_620x349/3/491483_particka-92_image_620x349.jpg',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
@@ -35,13 +36,14 @@ class IPrimaIE(InfoExtractor):
|
|||||||
'id': '9718337',
|
'id': '9718337',
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'title': 'Tchibo Partička - Jarní móda',
|
'title': 'Tchibo Partička - Jarní móda',
|
||||||
'description': 'md5:589f8f59f414220621ff8882eb3ce7be',
|
|
||||||
'thumbnail': 're:^http:.*\.jpg$',
|
'thumbnail': 're:^http:.*\.jpg$',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True, # requires rtmpdump
|
'skip_download': True, # requires rtmpdump
|
||||||
},
|
},
|
||||||
'skip': 'Do not have permission to access this page',
|
}, {
|
||||||
|
'url': 'http://play.iprima.cz/zpravy-ftv-prima-2752015',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@@ -102,8 +104,10 @@ class IPrimaIE(InfoExtractor):
|
|||||||
|
|
||||||
return {
|
return {
|
||||||
'id': real_id,
|
'id': real_id,
|
||||||
'title': self._og_search_title(webpage),
|
'title': remove_end(self._og_search_title(webpage), ' | Prima PLAY'),
|
||||||
'thumbnail': self._og_search_thumbnail(webpage),
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'description': self._og_search_description(webpage),
|
'description': self._search_regex(
|
||||||
|
r'<p[^>]+itemprop="description"[^>]*>([^<]+)',
|
||||||
|
webpage, 'description', default=None),
|
||||||
}
|
}
|
||||||
|
|||||||
273
youtube_dl/extractor/iqiyi.py
Normal file
273
youtube_dl/extractor/iqiyi.py
Normal file
@@ -0,0 +1,273 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import hashlib
|
||||||
|
import math
|
||||||
|
import random
|
||||||
|
import time
|
||||||
|
import uuid
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_urllib_parse
|
||||||
|
from ..utils import ExtractorError
|
||||||
|
|
||||||
|
|
||||||
|
class IqiyiIE(InfoExtractor):
|
||||||
|
IE_NAME = 'iqiyi'
|
||||||
|
IE_DESC = '爱奇艺'
|
||||||
|
|
||||||
|
_VALID_URL = r'http://(?:www\.)iqiyi.com/v_.+?\.html'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://www.iqiyi.com/v_19rrojlavg.html',
|
||||||
|
'md5': '2cb594dc2781e6c941a110d8f358118b',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '9c1fb1b99d192b21c559e5a1a2cb3c73',
|
||||||
|
'title': '美国德州空中惊现奇异云团 酷似UFO',
|
||||||
|
'ext': 'f4v',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.iqiyi.com/v_19rrhnnclk.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'e3f585b550a280af23c98b6cb2be19fb',
|
||||||
|
'title': '名侦探柯南第752集',
|
||||||
|
},
|
||||||
|
'playlist': [{
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'e3f585b550a280af23c98b6cb2be19fb_part1',
|
||||||
|
'ext': 'f4v',
|
||||||
|
'title': '名侦探柯南第752集',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'e3f585b550a280af23c98b6cb2be19fb_part2',
|
||||||
|
'ext': 'f4v',
|
||||||
|
'title': '名侦探柯南第752集',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'e3f585b550a280af23c98b6cb2be19fb_part3',
|
||||||
|
'ext': 'f4v',
|
||||||
|
'title': '名侦探柯南第752集',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'e3f585b550a280af23c98b6cb2be19fb_part4',
|
||||||
|
'ext': 'f4v',
|
||||||
|
'title': '名侦探柯南第752集',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'e3f585b550a280af23c98b6cb2be19fb_part5',
|
||||||
|
'ext': 'f4v',
|
||||||
|
'title': '名侦探柯南第752集',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'e3f585b550a280af23c98b6cb2be19fb_part6',
|
||||||
|
'ext': 'f4v',
|
||||||
|
'title': '名侦探柯南第752集',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'e3f585b550a280af23c98b6cb2be19fb_part7',
|
||||||
|
'ext': 'f4v',
|
||||||
|
'title': '名侦探柯南第752集',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'e3f585b550a280af23c98b6cb2be19fb_part8',
|
||||||
|
'ext': 'f4v',
|
||||||
|
'title': '名侦探柯南第752集',
|
||||||
|
},
|
||||||
|
}],
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
_FORMATS_MAP = [
|
||||||
|
('1', 'h6'),
|
||||||
|
('2', 'h5'),
|
||||||
|
('3', 'h4'),
|
||||||
|
('4', 'h3'),
|
||||||
|
('5', 'h2'),
|
||||||
|
('10', 'h1'),
|
||||||
|
]
|
||||||
|
|
||||||
|
def construct_video_urls(self, data, video_id, _uuid):
|
||||||
|
def do_xor(x, y):
|
||||||
|
a = y % 3
|
||||||
|
if a == 1:
|
||||||
|
return x ^ 121
|
||||||
|
if a == 2:
|
||||||
|
return x ^ 72
|
||||||
|
return x ^ 103
|
||||||
|
|
||||||
|
def get_encode_code(l):
|
||||||
|
a = 0
|
||||||
|
b = l.split('-')
|
||||||
|
c = len(b)
|
||||||
|
s = ''
|
||||||
|
for i in range(c - 1, -1, -1):
|
||||||
|
a = do_xor(int(b[c - i - 1], 16), i)
|
||||||
|
s += chr(a)
|
||||||
|
return s[::-1]
|
||||||
|
|
||||||
|
def get_path_key(x, format_id, segment_index):
|
||||||
|
mg = ')(*&^flash@#$%a'
|
||||||
|
tm = self._download_json(
|
||||||
|
'http://data.video.qiyi.com/t?tn=' + str(random.random()), video_id,
|
||||||
|
note='Download path key of segment %d for format %s' % (segment_index + 1, format_id)
|
||||||
|
)['t']
|
||||||
|
t = str(int(math.floor(int(tm) / (600.0))))
|
||||||
|
return hashlib.md5((t + mg + x).encode('utf8')).hexdigest()
|
||||||
|
|
||||||
|
video_urls_dict = {}
|
||||||
|
for format_item in data['vp']['tkl'][0]['vs']:
|
||||||
|
if 0 < int(format_item['bid']) <= 10:
|
||||||
|
format_id = self.get_format(format_item['bid'])
|
||||||
|
else:
|
||||||
|
continue
|
||||||
|
|
||||||
|
video_urls = []
|
||||||
|
|
||||||
|
video_urls_info = format_item['fs']
|
||||||
|
if not format_item['fs'][0]['l'].startswith('/'):
|
||||||
|
t = get_encode_code(format_item['fs'][0]['l'])
|
||||||
|
if t.endswith('mp4'):
|
||||||
|
video_urls_info = format_item['flvs']
|
||||||
|
|
||||||
|
for segment_index, segment in enumerate(video_urls_info):
|
||||||
|
vl = segment['l']
|
||||||
|
if not vl.startswith('/'):
|
||||||
|
vl = get_encode_code(vl)
|
||||||
|
key = get_path_key(
|
||||||
|
vl.split('/')[-1].split('.')[0], format_id, segment_index)
|
||||||
|
filesize = segment['b']
|
||||||
|
base_url = data['vp']['du'].split('/')
|
||||||
|
base_url.insert(-1, key)
|
||||||
|
base_url = '/'.join(base_url)
|
||||||
|
param = {
|
||||||
|
'su': _uuid,
|
||||||
|
'qyid': uuid.uuid4().hex,
|
||||||
|
'client': '',
|
||||||
|
'z': '',
|
||||||
|
'bt': '',
|
||||||
|
'ct': '',
|
||||||
|
'tn': str(int(time.time()))
|
||||||
|
}
|
||||||
|
api_video_url = base_url + vl + '?' + \
|
||||||
|
compat_urllib_parse.urlencode(param)
|
||||||
|
js = self._download_json(
|
||||||
|
api_video_url, video_id,
|
||||||
|
note='Download video info of segment %d for format %s' % (segment_index + 1, format_id))
|
||||||
|
video_url = js['l']
|
||||||
|
video_urls.append(
|
||||||
|
(video_url, filesize))
|
||||||
|
|
||||||
|
video_urls_dict[format_id] = video_urls
|
||||||
|
return video_urls_dict
|
||||||
|
|
||||||
|
def get_format(self, bid):
|
||||||
|
matched_format_ids = [_format_id for _bid, _format_id in self._FORMATS_MAP if _bid == str(bid)]
|
||||||
|
return matched_format_ids[0] if len(matched_format_ids) else None
|
||||||
|
|
||||||
|
def get_bid(self, format_id):
|
||||||
|
matched_bids = [_bid for _bid, _format_id in self._FORMATS_MAP if _format_id == format_id]
|
||||||
|
return matched_bids[0] if len(matched_bids) else None
|
||||||
|
|
||||||
|
def get_raw_data(self, tvid, video_id, enc_key, _uuid):
|
||||||
|
tm = str(int(time.time()))
|
||||||
|
param = {
|
||||||
|
'key': 'fvip',
|
||||||
|
'src': hashlib.md5(b'youtube-dl').hexdigest(),
|
||||||
|
'tvId': tvid,
|
||||||
|
'vid': video_id,
|
||||||
|
'vinfo': 1,
|
||||||
|
'tm': tm,
|
||||||
|
'enc': hashlib.md5(
|
||||||
|
(enc_key + tm + tvid).encode('utf8')).hexdigest(),
|
||||||
|
'qyid': _uuid,
|
||||||
|
'tn': random.random(),
|
||||||
|
'um': 0,
|
||||||
|
'authkey': hashlib.md5(
|
||||||
|
(tm + tvid).encode('utf8')).hexdigest()
|
||||||
|
}
|
||||||
|
|
||||||
|
api_url = 'http://cache.video.qiyi.com/vms' + '?' + \
|
||||||
|
compat_urllib_parse.urlencode(param)
|
||||||
|
raw_data = self._download_json(api_url, video_id)
|
||||||
|
return raw_data
|
||||||
|
|
||||||
|
def get_enc_key(self, swf_url, video_id):
|
||||||
|
enc_key = '8e29ab5666d041c3a1ea76e06dabdffb'
|
||||||
|
return enc_key
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
webpage = self._download_webpage(
|
||||||
|
url, 'temp_id', note='download video page')
|
||||||
|
tvid = self._search_regex(
|
||||||
|
r'data-player-tvid\s*=\s*[\'"](\d+)', webpage, 'tvid')
|
||||||
|
video_id = self._search_regex(
|
||||||
|
r'data-player-videoid\s*=\s*[\'"]([a-f\d]+)', webpage, 'video_id')
|
||||||
|
swf_url = self._search_regex(
|
||||||
|
r'(http://[^\'"]+MainPlayer[^.]+\.swf)', webpage, 'swf player URL')
|
||||||
|
_uuid = uuid.uuid4().hex
|
||||||
|
|
||||||
|
enc_key = self.get_enc_key(swf_url, video_id)
|
||||||
|
|
||||||
|
raw_data = self.get_raw_data(tvid, video_id, enc_key, _uuid)
|
||||||
|
|
||||||
|
if raw_data['code'] != 'A000000':
|
||||||
|
raise ExtractorError('Unable to load data. Error code: ' + raw_data['code'])
|
||||||
|
|
||||||
|
if not raw_data['data']['vp']['tkl']:
|
||||||
|
raise ExtractorError('No support iQiqy VIP video')
|
||||||
|
|
||||||
|
data = raw_data['data']
|
||||||
|
|
||||||
|
title = data['vi']['vn']
|
||||||
|
|
||||||
|
# generate video_urls_dict
|
||||||
|
video_urls_dict = self.construct_video_urls(
|
||||||
|
data, video_id, _uuid)
|
||||||
|
|
||||||
|
# construct info
|
||||||
|
entries = []
|
||||||
|
for format_id in video_urls_dict:
|
||||||
|
video_urls = video_urls_dict[format_id]
|
||||||
|
for i, video_url_info in enumerate(video_urls):
|
||||||
|
if len(entries) < i + 1:
|
||||||
|
entries.append({'formats': []})
|
||||||
|
entries[i]['formats'].append(
|
||||||
|
{
|
||||||
|
'url': video_url_info[0],
|
||||||
|
'filesize': video_url_info[-1],
|
||||||
|
'format_id': format_id,
|
||||||
|
'preference': int(self.get_bid(format_id))
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
for i in range(len(entries)):
|
||||||
|
self._sort_formats(entries[i]['formats'])
|
||||||
|
entries[i].update(
|
||||||
|
{
|
||||||
|
'id': '%s_part%d' % (video_id, i + 1),
|
||||||
|
'title': title,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
if len(entries) > 1:
|
||||||
|
info = {
|
||||||
|
'_type': 'multi_video',
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'entries': entries,
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
info = entries[0]
|
||||||
|
info['id'] = video_id
|
||||||
|
info['title'] = title
|
||||||
|
|
||||||
|
return info
|
||||||
42
youtube_dl/extractor/ir90tv.py
Normal file
42
youtube_dl/extractor/ir90tv.py
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import remove_start
|
||||||
|
|
||||||
|
|
||||||
|
class Ir90TvIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?90tv\.ir/video/(?P<id>[0-9]+)/.*'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://90tv.ir/video/95719/%D8%B4%D8%A7%DB%8C%D8%B9%D8%A7%D8%AA-%D9%86%D9%82%D9%84-%D9%88-%D8%A7%D9%86%D8%AA%D9%82%D8%A7%D9%84%D8%A7%D8%AA-%D9%85%D9%87%D9%85-%D9%81%D9%88%D8%AA%D8%A8%D8%A7%D9%84-%D8%A7%D8%B1%D9%88%D9%BE%D8%A7-940218',
|
||||||
|
'md5': '411dbd94891381960cb9e13daa47a869',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '95719',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'شایعات نقل و انتقالات مهم فوتبال اروپا 94/02/18',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.90tv.ir/video/95719/%D8%B4%D8%A7%DB%8C%D8%B9%D8%A7%D8%AA-%D9%86%D9%82%D9%84-%D9%88-%D8%A7%D9%86%D8%AA%D9%82%D8%A7%D9%84%D8%A7%D8%AA-%D9%85%D9%87%D9%85-%D9%81%D9%88%D8%AA%D8%A8%D8%A7%D9%84-%D8%A7%D8%B1%D9%88%D9%BE%D8%A7-940218',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
title = remove_start(self._html_search_regex(
|
||||||
|
r'<title>([^<]+)</title>', webpage, 'title'), '90tv.ir :: ')
|
||||||
|
|
||||||
|
video_url = self._search_regex(
|
||||||
|
r'<source[^>]+src="([^"]+)"', webpage, 'video url')
|
||||||
|
|
||||||
|
thumbnail = self._search_regex(r'poster="([^"]+)"', webpage, 'thumbnail url', fatal=False)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'url': video_url,
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'video_url': video_url,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
}
|
||||||
@@ -4,6 +4,7 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_urllib_parse_unquote
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
@@ -30,7 +31,7 @@ class IzleseneIE(InfoExtractor):
|
|||||||
'description': 'md5:253753e2655dde93f59f74b572454f6d',
|
'description': 'md5:253753e2655dde93f59f74b572454f6d',
|
||||||
'thumbnail': 're:^http://.*\.jpg',
|
'thumbnail': 're:^http://.*\.jpg',
|
||||||
'uploader_id': 'pelikzzle',
|
'uploader_id': 'pelikzzle',
|
||||||
'timestamp': 1404302298,
|
'timestamp': int,
|
||||||
'upload_date': '20140702',
|
'upload_date': '20140702',
|
||||||
'duration': 95.395,
|
'duration': 95.395,
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
@@ -46,7 +47,7 @@ class IzleseneIE(InfoExtractor):
|
|||||||
'description': 'Tarkan Dortmund 2006 Konseri',
|
'description': 'Tarkan Dortmund 2006 Konseri',
|
||||||
'thumbnail': 're:^http://.*\.jpg',
|
'thumbnail': 're:^http://.*\.jpg',
|
||||||
'uploader_id': 'parlayankiz',
|
'uploader_id': 'parlayankiz',
|
||||||
'timestamp': 1163322193,
|
'timestamp': int,
|
||||||
'upload_date': '20061112',
|
'upload_date': '20061112',
|
||||||
'duration': 253.666,
|
'duration': 253.666,
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
@@ -67,9 +68,9 @@ class IzleseneIE(InfoExtractor):
|
|||||||
|
|
||||||
uploader = self._html_search_regex(
|
uploader = self._html_search_regex(
|
||||||
r"adduserUsername\s*=\s*'([^']+)';",
|
r"adduserUsername\s*=\s*'([^']+)';",
|
||||||
webpage, 'uploader', fatal=False, default='')
|
webpage, 'uploader', fatal=False)
|
||||||
timestamp = parse_iso8601(self._html_search_meta(
|
timestamp = parse_iso8601(self._html_search_meta(
|
||||||
'uploadDate', webpage, 'upload date', fatal=False))
|
'uploadDate', webpage, 'upload date'))
|
||||||
|
|
||||||
duration = float_or_none(self._html_search_regex(
|
duration = float_or_none(self._html_search_regex(
|
||||||
r'"videoduration"\s*:\s*"([^"]+)"',
|
r'"videoduration"\s*:\s*"([^"]+)"',
|
||||||
@@ -86,8 +87,7 @@ class IzleseneIE(InfoExtractor):
|
|||||||
|
|
||||||
# Might be empty for some videos.
|
# Might be empty for some videos.
|
||||||
streams = self._html_search_regex(
|
streams = self._html_search_regex(
|
||||||
r'"qualitylevel"\s*:\s*"([^"]+)"',
|
r'"qualitylevel"\s*:\s*"([^"]+)"', webpage, 'streams', default='')
|
||||||
webpage, 'streams', fatal=False, default='')
|
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
if streams:
|
if streams:
|
||||||
@@ -95,15 +95,15 @@ class IzleseneIE(InfoExtractor):
|
|||||||
quality, url = re.search(r'\[(\w+)\](.+)', stream).groups()
|
quality, url = re.search(r'\[(\w+)\](.+)', stream).groups()
|
||||||
formats.append({
|
formats.append({
|
||||||
'format_id': '%sp' % quality if quality else 'sd',
|
'format_id': '%sp' % quality if quality else 'sd',
|
||||||
'url': url,
|
'url': compat_urllib_parse_unquote(url),
|
||||||
'ext': ext,
|
'ext': ext,
|
||||||
})
|
})
|
||||||
else:
|
else:
|
||||||
stream_url = self._search_regex(
|
stream_url = self._search_regex(
|
||||||
r'"streamurl"\s?:\s?"([^"]+)"', webpage, 'stream URL')
|
r'"streamurl"\s*:\s*"([^"]+)"', webpage, 'stream URL')
|
||||||
formats.append({
|
formats.append({
|
||||||
'format_id': 'sd',
|
'format_id': 'sd',
|
||||||
'url': stream_url,
|
'url': compat_urllib_parse_unquote(stream_url),
|
||||||
'ext': ext,
|
'ext': ext,
|
||||||
})
|
})
|
||||||
|
|
||||||
|
|||||||
@@ -8,9 +8,9 @@ from .common import InfoExtractor
|
|||||||
|
|
||||||
|
|
||||||
class JeuxVideoIE(InfoExtractor):
|
class JeuxVideoIE(InfoExtractor):
|
||||||
_VALID_URL = r'http://.*?\.jeuxvideo\.com/.*/(.*?)-\d+\.htm'
|
_VALID_URL = r'http://.*?\.jeuxvideo\.com/.*/(.*?)\.htm'
|
||||||
|
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.jeuxvideo.com/reportages-videos-jeux/0004/00046170/tearaway-playstation-vita-gc-2013-tearaway-nous-presente-ses-papiers-d-identite-00115182.htm',
|
'url': 'http://www.jeuxvideo.com/reportages-videos-jeux/0004/00046170/tearaway-playstation-vita-gc-2013-tearaway-nous-presente-ses-papiers-d-identite-00115182.htm',
|
||||||
'md5': '046e491afb32a8aaac1f44dd4ddd54ee',
|
'md5': '046e491afb32a8aaac1f44dd4ddd54ee',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@@ -19,7 +19,10 @@ class JeuxVideoIE(InfoExtractor):
|
|||||||
'title': 'Tearaway : GC 2013 : Tearaway nous présente ses papiers d\'identité',
|
'title': 'Tearaway : GC 2013 : Tearaway nous présente ses papiers d\'identité',
|
||||||
'description': 'Lorsque les développeurs de LittleBigPlanet proposent un nouveau titre, on ne peut que s\'attendre à un résultat original et fort attrayant.',
|
'description': 'Lorsque les développeurs de LittleBigPlanet proposent un nouveau titre, on ne peut que s\'attendre à un résultat original et fort attrayant.',
|
||||||
},
|
},
|
||||||
}
|
}, {
|
||||||
|
'url': 'http://www.jeuxvideo.com/videos/chroniques/434220/l-histoire-du-jeu-video-la-saturn.htm',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_urllib_parse
|
from ..compat import compat_urllib_parse_unquote_plus
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
js_to_json,
|
js_to_json,
|
||||||
)
|
)
|
||||||
@@ -24,7 +24,7 @@ class KaraoketvIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
page_video_url = self._og_search_video_url(webpage, video_id)
|
page_video_url = self._og_search_video_url(webpage, video_id)
|
||||||
config_json = compat_urllib_parse.unquote_plus(self._search_regex(
|
config_json = compat_urllib_parse_unquote_plus(self._search_regex(
|
||||||
r'config=(.*)', page_video_url, 'configuration'))
|
r'config=(.*)', page_video_url, 'configuration'))
|
||||||
|
|
||||||
urls_info_json = self._download_json(
|
urls_info_json = self._download_json(
|
||||||
|
|||||||
96
youtube_dl/extractor/karrierevideos.py
Normal file
96
youtube_dl/extractor/karrierevideos.py
Normal file
@@ -0,0 +1,96 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_urlparse
|
||||||
|
from ..utils import (
|
||||||
|
fix_xml_ampersands,
|
||||||
|
float_or_none,
|
||||||
|
xpath_with_ns,
|
||||||
|
xpath_text,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class KarriereVideosIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'http://(?:www\.)?karrierevideos\.at(?:/[^/]+)+/(?P<id>[^/]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://www.karrierevideos.at/berufsvideos/mittlere-hoehere-schulen/altenpflegerin',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '32c91',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'AltenpflegerIn',
|
||||||
|
'description': 'md5:dbadd1259fde2159a9b28667cb664ae2',
|
||||||
|
'thumbnail': 're:^http://.*\.png',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
# broken ampersands
|
||||||
|
'url': 'http://www.karrierevideos.at/orientierung/vaeterkarenz-und-neue-chancen-fuer-muetter-baby-was-nun',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '5sniu',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Väterkarenz und neue Chancen für Mütter - "Baby - was nun?"',
|
||||||
|
'description': 'md5:97092c6ad1fd7d38e9d6a5fdeb2bcc33',
|
||||||
|
'thumbnail': 're:^http://.*\.png',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
title = (self._html_search_meta('title', webpage, default=None) or
|
||||||
|
self._search_regex(r'<h1 class="title">([^<]+)</h1>'))
|
||||||
|
|
||||||
|
video_id = self._search_regex(
|
||||||
|
r'/config/video/(.+?)\.xml', webpage, 'video id')
|
||||||
|
playlist = self._download_xml(
|
||||||
|
'http://www.karrierevideos.at/player-playlist.xml.php?p=%s' % video_id,
|
||||||
|
video_id, transform_source=fix_xml_ampersands)
|
||||||
|
|
||||||
|
NS_MAP = {
|
||||||
|
'jwplayer': 'http://developer.longtailvideo.com/trac/wiki/FlashFormats'
|
||||||
|
}
|
||||||
|
|
||||||
|
def ns(path):
|
||||||
|
return xpath_with_ns(path, NS_MAP)
|
||||||
|
|
||||||
|
item = playlist.find('./tracklist/item')
|
||||||
|
video_file = xpath_text(
|
||||||
|
item, ns('./jwplayer:file'), 'video url', fatal=True)
|
||||||
|
streamer = xpath_text(
|
||||||
|
item, ns('./jwplayer:streamer'), 'streamer', fatal=True)
|
||||||
|
|
||||||
|
uploader = xpath_text(
|
||||||
|
item, ns('./jwplayer:author'), 'uploader')
|
||||||
|
duration = float_or_none(
|
||||||
|
xpath_text(item, ns('./jwplayer:duration'), 'duration'))
|
||||||
|
|
||||||
|
description = self._html_search_regex(
|
||||||
|
r'(?s)<div class="leadtext">(.+?)</div>',
|
||||||
|
webpage, 'description')
|
||||||
|
|
||||||
|
thumbnail = self._html_search_meta(
|
||||||
|
'thumbnail', webpage, 'thumbnail')
|
||||||
|
if thumbnail:
|
||||||
|
thumbnail = compat_urlparse.urljoin(url, thumbnail)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'url': streamer.replace('rtmpt', 'rtmp'),
|
||||||
|
'play_path': 'mp4:%s' % video_file,
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'uploader': uploader,
|
||||||
|
'duration': duration,
|
||||||
|
}
|
||||||
@@ -28,6 +28,14 @@ class KickStarterIE(InfoExtractor):
|
|||||||
'uploader': 'Pebble Technology',
|
'uploader': 'Pebble Technology',
|
||||||
'title': 'Pebble iOS Notifications',
|
'title': 'Pebble iOS Notifications',
|
||||||
}
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.kickstarter.com/projects/1420158244/power-drive-2000/widget/video.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1420158244',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Power Drive 2000',
|
||||||
|
},
|
||||||
|
'expected_warnings': ['OpenGraph description'],
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@@ -48,10 +56,15 @@ class KickStarterIE(InfoExtractor):
|
|||||||
'title': title,
|
'title': title,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
thumbnail = self._og_search_thumbnail(webpage, default=None)
|
||||||
|
if thumbnail is None:
|
||||||
|
thumbnail = self._html_search_regex(
|
||||||
|
r'<img[^>]+class="[^"]+\s*poster\s*[^"]+"[^>]+src="([^"]+)"',
|
||||||
|
webpage, 'thumbnail image', fatal=False)
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': self._og_search_description(webpage),
|
'description': self._og_search_description(webpage),
|
||||||
'thumbnail': self._og_search_thumbnail(webpage),
|
'thumbnail': thumbnail,
|
||||||
}
|
}
|
||||||
|
|||||||
314
youtube_dl/extractor/kuwo.py
Normal file
314
youtube_dl/extractor/kuwo.py
Normal file
@@ -0,0 +1,314 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
import itertools
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
get_element_by_id,
|
||||||
|
clean_html,
|
||||||
|
ExtractorError,
|
||||||
|
remove_start,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class KuwoBaseIE(InfoExtractor):
|
||||||
|
_FORMATS = [
|
||||||
|
{'format': 'ape', 'ext': 'ape', 'preference': 100},
|
||||||
|
{'format': 'mp3-320', 'ext': 'mp3', 'br': '320kmp3', 'abr': 320, 'preference': 80},
|
||||||
|
{'format': 'mp3-192', 'ext': 'mp3', 'br': '192kmp3', 'abr': 192, 'preference': 70},
|
||||||
|
{'format': 'mp3-128', 'ext': 'mp3', 'br': '128kmp3', 'abr': 128, 'preference': 60},
|
||||||
|
{'format': 'wma', 'ext': 'wma', 'preference': 20},
|
||||||
|
{'format': 'aac', 'ext': 'aac', 'abr': 48, 'preference': 10}
|
||||||
|
]
|
||||||
|
|
||||||
|
def _get_formats(self, song_id):
|
||||||
|
formats = []
|
||||||
|
for file_format in self._FORMATS:
|
||||||
|
song_url = self._download_webpage(
|
||||||
|
'http://antiserver.kuwo.cn/anti.s?format=%s&br=%s&rid=MUSIC_%s&type=convert_url&response=url' %
|
||||||
|
(file_format['ext'], file_format.get('br', ''), song_id),
|
||||||
|
song_id, note='Download %s url info' % file_format['format'],
|
||||||
|
)
|
||||||
|
if song_url.startswith('http://') or song_url.startswith('https://'):
|
||||||
|
formats.append({
|
||||||
|
'url': song_url,
|
||||||
|
'format_id': file_format['format'],
|
||||||
|
'format': file_format['format'],
|
||||||
|
'preference': file_format['preference'],
|
||||||
|
'abr': file_format.get('abr'),
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
return formats
|
||||||
|
|
||||||
|
|
||||||
|
class KuwoIE(KuwoBaseIE):
|
||||||
|
IE_NAME = 'kuwo:song'
|
||||||
|
IE_DESC = '酷我音乐'
|
||||||
|
_VALID_URL = r'http://www\.kuwo\.cn/yinyue/(?P<id>\d+?)/'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://www.kuwo.cn/yinyue/635632/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '635632',
|
||||||
|
'ext': 'ape',
|
||||||
|
'title': '爱我别走',
|
||||||
|
'creator': '张震岳',
|
||||||
|
'upload_date': '20080122',
|
||||||
|
'description': 'md5:ed13f58e3c3bf3f7fd9fbc4e5a7aa75c'
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.kuwo.cn/yinyue/6446136/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '6446136',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': '心',
|
||||||
|
'creator': 'IU',
|
||||||
|
'upload_date': '20150518',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'format': 'mp3-320'
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
song_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(
|
||||||
|
url, song_id, note='Download song detail info',
|
||||||
|
errnote='Unable to get song detail info')
|
||||||
|
|
||||||
|
song_name = self._html_search_regex(
|
||||||
|
r'<h1[^>]+title="([^"]+)">', webpage, 'song name')
|
||||||
|
singer_name = self._html_search_regex(
|
||||||
|
r'<div[^>]+class="s_img">\s*<a[^>]+title="([^>]+)"',
|
||||||
|
webpage, 'singer name', fatal=False)
|
||||||
|
lrc_content = clean_html(get_element_by_id('lrcContent', webpage))
|
||||||
|
if lrc_content == '暂无': # indicates no lyrics
|
||||||
|
lrc_content = None
|
||||||
|
|
||||||
|
formats = self._get_formats(song_id)
|
||||||
|
|
||||||
|
album_id = self._html_search_regex(
|
||||||
|
r'<p[^>]+class="album"[^<]+<a[^>]+href="http://www\.kuwo\.cn/album/(\d+)/"',
|
||||||
|
webpage, 'album id', fatal=False)
|
||||||
|
|
||||||
|
publish_time = None
|
||||||
|
if album_id is not None:
|
||||||
|
album_info_page = self._download_webpage(
|
||||||
|
'http://www.kuwo.cn/album/%s/' % album_id, song_id,
|
||||||
|
note='Download album detail info',
|
||||||
|
errnote='Unable to get album detail info')
|
||||||
|
|
||||||
|
publish_time = self._html_search_regex(
|
||||||
|
r'发行时间:(\d{4}-\d{2}-\d{2})', album_info_page,
|
||||||
|
'publish time', fatal=False)
|
||||||
|
if publish_time:
|
||||||
|
publish_time = publish_time.replace('-', '')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': song_id,
|
||||||
|
'title': song_name,
|
||||||
|
'creator': singer_name,
|
||||||
|
'upload_date': publish_time,
|
||||||
|
'description': lrc_content,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class KuwoAlbumIE(InfoExtractor):
|
||||||
|
IE_NAME = 'kuwo:album'
|
||||||
|
IE_DESC = '酷我音乐 - 专辑'
|
||||||
|
_VALID_URL = r'http://www\.kuwo\.cn/album/(?P<id>\d+?)/'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.kuwo.cn/album/502294/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '502294',
|
||||||
|
'title': 'M',
|
||||||
|
'description': 'md5:6a7235a84cc6400ec3b38a7bdaf1d60c',
|
||||||
|
},
|
||||||
|
'playlist_count': 2,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
album_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(
|
||||||
|
url, album_id, note='Download album info',
|
||||||
|
errnote='Unable to get album info')
|
||||||
|
|
||||||
|
album_name = self._html_search_regex(
|
||||||
|
r'<div[^>]+class="comm"[^<]+<h1[^>]+title="([^"]+)"', webpage,
|
||||||
|
'album name')
|
||||||
|
album_intro = remove_start(
|
||||||
|
clean_html(get_element_by_id('intro', webpage)),
|
||||||
|
'%s简介:' % album_name)
|
||||||
|
|
||||||
|
entries = [
|
||||||
|
self.url_result(song_url, 'Kuwo') for song_url in re.findall(
|
||||||
|
r'<p[^>]+class="listen"><a[^>]+href="(http://www\.kuwo\.cn/yinyue/\d+/)"',
|
||||||
|
webpage)
|
||||||
|
]
|
||||||
|
return self.playlist_result(entries, album_id, album_name, album_intro)
|
||||||
|
|
||||||
|
|
||||||
|
class KuwoChartIE(InfoExtractor):
|
||||||
|
IE_NAME = 'kuwo:chart'
|
||||||
|
IE_DESC = '酷我音乐 - 排行榜'
|
||||||
|
_VALID_URL = r'http://yinyue\.kuwo\.cn/billboard_(?P<id>[^.]+).htm'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://yinyue.kuwo.cn/billboard_香港中文龙虎榜.htm',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '香港中文龙虎榜',
|
||||||
|
'title': '香港中文龙虎榜',
|
||||||
|
'description': 're:\d{4}第\d{2}期',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 10,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
chart_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(
|
||||||
|
url, chart_id, note='Download chart info',
|
||||||
|
errnote='Unable to get chart info')
|
||||||
|
|
||||||
|
chart_name = self._html_search_regex(
|
||||||
|
r'<h1[^>]+class="unDis">([^<]+)</h1>', webpage, 'chart name')
|
||||||
|
|
||||||
|
chart_desc = self._html_search_regex(
|
||||||
|
r'<p[^>]+class="tabDef">(\d{4}第\d{2}期)</p>', webpage, 'chart desc')
|
||||||
|
|
||||||
|
entries = [
|
||||||
|
self.url_result(song_url, 'Kuwo') for song_url in re.findall(
|
||||||
|
r'<a[^>]+href="(http://www\.kuwo\.cn/yinyue/\d+)/"', webpage)
|
||||||
|
]
|
||||||
|
return self.playlist_result(entries, chart_id, chart_name, chart_desc)
|
||||||
|
|
||||||
|
|
||||||
|
class KuwoSingerIE(InfoExtractor):
|
||||||
|
IE_NAME = 'kuwo:singer'
|
||||||
|
IE_DESC = '酷我音乐 - 歌手'
|
||||||
|
_VALID_URL = r'http://www\.kuwo\.cn/mingxing/(?P<id>[^/]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://www.kuwo.cn/mingxing/bruno+mars/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'bruno+mars',
|
||||||
|
'title': 'Bruno Mars',
|
||||||
|
},
|
||||||
|
'playlist_count': 10,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.kuwo.cn/mingxing/Ali/music.htm',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'Ali',
|
||||||
|
'title': 'Ali',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 95,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
singer_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(
|
||||||
|
url, singer_id, note='Download singer info',
|
||||||
|
errnote='Unable to get singer info')
|
||||||
|
|
||||||
|
singer_name = self._html_search_regex(
|
||||||
|
r'<div class="title clearfix">\s*<h1>([^<]+)<span', webpage, 'singer name'
|
||||||
|
)
|
||||||
|
|
||||||
|
entries = []
|
||||||
|
first_page_only = False if re.search(r'/music(?:_\d+)?\.htm', url) else True
|
||||||
|
for page_num in itertools.count(1):
|
||||||
|
webpage = self._download_webpage(
|
||||||
|
'http://www.kuwo.cn/mingxing/%s/music_%d.htm' % (singer_id, page_num),
|
||||||
|
singer_id, note='Download song list page #%d' % page_num,
|
||||||
|
errnote='Unable to get song list page #%d' % page_num)
|
||||||
|
|
||||||
|
entries.extend([
|
||||||
|
self.url_result(song_url, 'Kuwo') for song_url in re.findall(
|
||||||
|
r'<p[^>]+class="m_name"><a[^>]+href="(http://www\.kuwo\.cn/yinyue/\d+)/',
|
||||||
|
webpage)
|
||||||
|
][:10 if first_page_only else None])
|
||||||
|
|
||||||
|
if first_page_only or not re.search(r'<a[^>]+href="[^"]+">下一页</a>', webpage):
|
||||||
|
break
|
||||||
|
|
||||||
|
return self.playlist_result(entries, singer_id, singer_name)
|
||||||
|
|
||||||
|
|
||||||
|
class KuwoCategoryIE(InfoExtractor):
|
||||||
|
IE_NAME = 'kuwo:category'
|
||||||
|
IE_DESC = '酷我音乐 - 分类'
|
||||||
|
_VALID_URL = r'http://yinyue\.kuwo\.cn/yy/cinfo_(?P<id>\d+?).htm'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://yinyue.kuwo.cn/yy/cinfo_86375.htm',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '86375',
|
||||||
|
'title': '八十年代精选',
|
||||||
|
'description': '这些都是属于八十年代的回忆!',
|
||||||
|
},
|
||||||
|
'playlist_count': 30,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
category_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(
|
||||||
|
url, category_id, note='Download category info',
|
||||||
|
errnote='Unable to get category info')
|
||||||
|
|
||||||
|
category_name = self._html_search_regex(
|
||||||
|
r'<h1[^>]+title="([^<>]+?)">[^<>]+?</h1>', webpage, 'category name')
|
||||||
|
|
||||||
|
category_desc = remove_start(
|
||||||
|
get_element_by_id('intro', webpage).strip(),
|
||||||
|
'%s简介:' % category_name)
|
||||||
|
|
||||||
|
jsonm = self._parse_json(self._html_search_regex(
|
||||||
|
r'var\s+jsonm\s*=\s*([^;]+);', webpage, 'category songs'), category_id)
|
||||||
|
|
||||||
|
entries = [
|
||||||
|
self.url_result('http://www.kuwo.cn/yinyue/%s/' % song['musicrid'], 'Kuwo')
|
||||||
|
for song in jsonm['musiclist']
|
||||||
|
]
|
||||||
|
return self.playlist_result(entries, category_id, category_name, category_desc)
|
||||||
|
|
||||||
|
|
||||||
|
class KuwoMvIE(KuwoBaseIE):
|
||||||
|
IE_NAME = 'kuwo:mv'
|
||||||
|
IE_DESC = '酷我音乐 - MV'
|
||||||
|
_VALID_URL = r'http://www\.kuwo\.cn/mv/(?P<id>\d+?)/'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.kuwo.cn/mv/6480076/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '6480076',
|
||||||
|
'ext': 'mkv',
|
||||||
|
'title': '我们家MV',
|
||||||
|
'creator': '2PM',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
_FORMATS = KuwoBaseIE._FORMATS + [
|
||||||
|
{'format': 'mkv', 'ext': 'mkv', 'preference': 250},
|
||||||
|
{'format': 'mp4', 'ext': 'mp4', 'preference': 200},
|
||||||
|
]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
song_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(
|
||||||
|
url, song_id, note='Download mv detail info: %s' % song_id,
|
||||||
|
errnote='Unable to get mv detail info: %s' % song_id)
|
||||||
|
|
||||||
|
mobj = re.search(
|
||||||
|
r'<h1[^>]+title="(?P<song>[^"]+)">[^<]+<span[^>]+title="(?P<singer>[^"]+)"',
|
||||||
|
webpage)
|
||||||
|
if mobj:
|
||||||
|
song_name = mobj.group('song')
|
||||||
|
singer_name = mobj.group('singer')
|
||||||
|
else:
|
||||||
|
raise ExtractorError('Unable to find song or singer names')
|
||||||
|
|
||||||
|
formats = self._get_formats(song_id)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': song_id,
|
||||||
|
'title': song_name,
|
||||||
|
'creator': singer_name,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
||||||
62
youtube_dl/extractor/lecture2go.py
Normal file
62
youtube_dl/extractor/lecture2go.py
Normal file
@@ -0,0 +1,62 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
determine_ext,
|
||||||
|
parse_duration,
|
||||||
|
int_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class Lecture2GoIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://lecture2go\.uni-hamburg\.de/veranstaltungen/-/v/(?P<id>\d+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://lecture2go.uni-hamburg.de/veranstaltungen/-/v/17473',
|
||||||
|
'md5': 'ac02b570883020d208d405d5a3fd2f7f',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '17473',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': '2 - Endliche Automaten und reguläre Sprachen',
|
||||||
|
'creator': 'Frank Heitmann',
|
||||||
|
'duration': 5220,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
title = self._html_search_regex(r'<em[^>]+class="title">(.+)</em>', webpage, 'title')
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for url in set(re.findall(r'"src","([^"]+)"', webpage)):
|
||||||
|
ext = determine_ext(url)
|
||||||
|
if ext == 'f4m':
|
||||||
|
formats.extend(self._extract_f4m_formats(url, video_id))
|
||||||
|
elif ext == 'm3u8':
|
||||||
|
formats.extend(self._extract_m3u8_formats(url, video_id))
|
||||||
|
else:
|
||||||
|
formats.append({
|
||||||
|
'url': url,
|
||||||
|
})
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
creator = self._html_search_regex(
|
||||||
|
r'<div[^>]+id="description">([^<]+)</div>', webpage, 'creator', fatal=False)
|
||||||
|
duration = parse_duration(self._html_search_regex(
|
||||||
|
r'Duration:\s*</em>\s*<em[^>]*>([^<]+)</em>', webpage, 'duration', fatal=False))
|
||||||
|
view_count = int_or_none(self._html_search_regex(
|
||||||
|
r'Views:\s*</em>\s*<em[^>]+>(\d+)</em>', webpage, 'view count', fatal=False))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'formats': formats,
|
||||||
|
'creator': creator,
|
||||||
|
'duration': duration,
|
||||||
|
'view_count': view_count,
|
||||||
|
}
|
||||||
@@ -15,10 +15,12 @@ from ..utils import (
|
|||||||
determine_ext,
|
determine_ext,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
|
int_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class LetvIE(InfoExtractor):
|
class LetvIE(InfoExtractor):
|
||||||
|
IE_DESC = '乐视网'
|
||||||
_VALID_URL = r'http://www\.letv\.com/ptv/vplay/(?P<id>\d+).html'
|
_VALID_URL = r'http://www\.letv\.com/ptv/vplay/(?P<id>\d+).html'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
@@ -133,7 +135,7 @@ class LetvIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
if format_id[-1:] == 'p':
|
if format_id[-1:] == 'p':
|
||||||
url_info_dict['height'] = format_id[:-1]
|
url_info_dict['height'] = int_or_none(format_id[:-1])
|
||||||
|
|
||||||
urls.append(url_info_dict)
|
urls.append(url_info_dict)
|
||||||
|
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ from ..compat import compat_urlparse
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
remove_end,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
)
|
)
|
||||||
@@ -39,7 +40,6 @@ class LifeNewsIE(InfoExtractor):
|
|||||||
'title': 'В Сети появилось видео захвата «Правым сектором» колхозных полей ',
|
'title': 'В Сети появилось видео захвата «Правым сектором» колхозных полей ',
|
||||||
'description': 'Жители двух поселков Днепропетровской области не простили радикалам угрозу лишения плодородных земель и пошли в лобовую. ',
|
'description': 'Жители двух поселков Днепропетровской области не простили радикалам угрозу лишения плодородных земель и пошли в лобовую. ',
|
||||||
'upload_date': '20150402',
|
'upload_date': '20150402',
|
||||||
'uploader': 'embed.life.ru',
|
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://lifenews.ru/news/153461',
|
'url': 'http://lifenews.ru/news/153461',
|
||||||
@@ -50,7 +50,6 @@ class LifeNewsIE(InfoExtractor):
|
|||||||
'title': 'В Москве спасли потерявшегося медвежонка, который спрятался на дереве',
|
'title': 'В Москве спасли потерявшегося медвежонка, который спрятался на дереве',
|
||||||
'description': 'Маленький хищник не смог найти дорогу домой и обрел временное убежище на тополе недалеко от жилого массива, пока его не нашла соседская собака.',
|
'description': 'Маленький хищник не смог найти дорогу домой и обрел временное убежище на тополе недалеко от жилого массива, пока его не нашла соседская собака.',
|
||||||
'upload_date': '20150505',
|
'upload_date': '20150505',
|
||||||
'uploader': 'embed.life.ru',
|
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://lifenews.ru/video/13035',
|
'url': 'http://lifenews.ru/video/13035',
|
||||||
@@ -72,20 +71,20 @@ class LifeNewsIE(InfoExtractor):
|
|||||||
if not videos and not iframe_link:
|
if not videos and not iframe_link:
|
||||||
raise ExtractorError('No media links available for %s' % video_id)
|
raise ExtractorError('No media links available for %s' % video_id)
|
||||||
|
|
||||||
title = self._og_search_title(webpage)
|
title = remove_end(
|
||||||
TITLE_SUFFIX = ' - Первый по срочным новостям — LIFE | NEWS'
|
self._og_search_title(webpage),
|
||||||
if title.endswith(TITLE_SUFFIX):
|
' - Первый по срочным новостям — LIFE | NEWS')
|
||||||
title = title[:-len(TITLE_SUFFIX)]
|
|
||||||
|
|
||||||
description = self._og_search_description(webpage)
|
description = self._og_search_description(webpage)
|
||||||
|
|
||||||
view_count = self._html_search_regex(
|
view_count = self._html_search_regex(
|
||||||
r'<div class=\'views\'>\s*(\d+)\s*</div>', webpage, 'view count', fatal=False)
|
r'<div class=\'views\'>\s*(\d+)\s*</div>', webpage, 'view count', fatal=False)
|
||||||
comment_count = self._html_search_regex(
|
comment_count = self._html_search_regex(
|
||||||
r'<div class=\'comments\'>\s*<span class=\'counter\'>\s*(\d+)\s*</span>', webpage, 'comment count', fatal=False)
|
r'=\'commentCount\'[^>]*>\s*(\d+)\s*<',
|
||||||
|
webpage, 'comment count', fatal=False)
|
||||||
|
|
||||||
upload_date = self._html_search_regex(
|
upload_date = self._html_search_regex(
|
||||||
r'<time datetime=\'([^\']+)\'>', webpage, 'upload date', fatal=False)
|
r'<time[^>]*datetime=\'([^\']+)\'', webpage, 'upload date', fatal=False)
|
||||||
if upload_date is not None:
|
if upload_date is not None:
|
||||||
upload_date = unified_strdate(upload_date)
|
upload_date = unified_strdate(upload_date)
|
||||||
|
|
||||||
|
|||||||
@@ -40,6 +40,17 @@ class LiveLeakIE(InfoExtractor):
|
|||||||
'title': 'Man is Fatally Struck by Reckless Car While Packing up a Moving Truck',
|
'title': 'Man is Fatally Struck by Reckless Car While Packing up a Moving Truck',
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
}
|
}
|
||||||
|
}, {
|
||||||
|
# Covers https://github.com/rg3/youtube-dl/pull/5983
|
||||||
|
'url': 'http://www.liveleak.com/view?i=801_1409392012',
|
||||||
|
'md5': '0b3bec2d888c20728ca2ad3642f0ef15',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '801_1409392012',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'description': "Happened on 27.7.2014. \r\nAt 0:53 you can see people still swimming at near beach.",
|
||||||
|
'uploader': 'bony333',
|
||||||
|
'title': 'Crazy Hungarian tourist films close call waterspout in Croatia'
|
||||||
|
}
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@@ -85,7 +96,10 @@ class LiveLeakIE(InfoExtractor):
|
|||||||
'url': s['file'],
|
'url': s['file'],
|
||||||
} for i, s in enumerate(sources)]
|
} for i, s in enumerate(sources)]
|
||||||
for i, s in enumerate(sources):
|
for i, s in enumerate(sources):
|
||||||
orig_url = s['file'].replace('.h264_base.mp4', '')
|
# Removing '.h264_*.mp4' gives the raw video, which is essentially
|
||||||
|
# the same video without the LiveLeak logo at the top (see
|
||||||
|
# https://github.com/rg3/youtube-dl/pull/4768)
|
||||||
|
orig_url = re.sub(r'\.h264_.+?\.mp4', '', s['file'])
|
||||||
if s['file'] != orig_url:
|
if s['file'] != orig_url:
|
||||||
formats.append({
|
formats.append({
|
||||||
'format_id': 'original-%s' % i,
|
'format_id': 'original-%s' % i,
|
||||||
|
|||||||
@@ -30,13 +30,13 @@ class LyndaBaseIE(InfoExtractor):
|
|||||||
return
|
return
|
||||||
|
|
||||||
login_form = {
|
login_form = {
|
||||||
'username': username,
|
'username': username.encode('utf-8'),
|
||||||
'password': password,
|
'password': password.encode('utf-8'),
|
||||||
'remember': 'false',
|
'remember': 'false',
|
||||||
'stayPut': 'false'
|
'stayPut': 'false'
|
||||||
}
|
}
|
||||||
request = compat_urllib_request.Request(
|
request = compat_urllib_request.Request(
|
||||||
self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
|
self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
|
||||||
login_page = self._download_webpage(
|
login_page = self._download_webpage(
|
||||||
request, None, 'Logging in as %s' % username)
|
request, None, 'Logging in as %s' % username)
|
||||||
|
|
||||||
@@ -65,7 +65,7 @@ class LyndaBaseIE(InfoExtractor):
|
|||||||
'stayPut': 'false',
|
'stayPut': 'false',
|
||||||
}
|
}
|
||||||
request = compat_urllib_request.Request(
|
request = compat_urllib_request.Request(
|
||||||
self._LOGIN_URL, compat_urllib_parse.urlencode(confirm_form))
|
self._LOGIN_URL, compat_urllib_parse.urlencode(confirm_form).encode('utf-8'))
|
||||||
login_page = self._download_webpage(
|
login_page = self._download_webpage(
|
||||||
request, None,
|
request, None,
|
||||||
'Confirming log in and log out from another device')
|
'Confirming log in and log out from another device')
|
||||||
|
|||||||
@@ -2,9 +2,7 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import compat_urllib_parse_unquote
|
||||||
compat_urllib_parse,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class MalemotionIE(InfoExtractor):
|
class MalemotionIE(InfoExtractor):
|
||||||
@@ -24,7 +22,7 @@ class MalemotionIE(InfoExtractor):
|
|||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
video_url = compat_urllib_parse.unquote(self._search_regex(
|
video_url = compat_urllib_parse_unquote(self._search_regex(
|
||||||
r'<source type="video/mp4" src="(.+?)"', webpage, 'video URL'))
|
r'<source type="video/mp4" src="(.+?)"', webpage, 'video URL'))
|
||||||
video_title = self._html_search_regex(
|
video_title = self._html_search_regex(
|
||||||
r'<title>(.*?)</title', webpage, 'title')
|
r'<title>(.*?)</title', webpage, 'title')
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ from .common import InfoExtractor
|
|||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_parse_qs,
|
compat_parse_qs,
|
||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
|
compat_urllib_parse_unquote,
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@@ -155,7 +156,7 @@ class MetacafeIE(InfoExtractor):
|
|||||||
video_url = None
|
video_url = None
|
||||||
mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage)
|
mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage)
|
||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
mediaURL = compat_urllib_parse.unquote(mobj.group(1))
|
mediaURL = compat_urllib_parse_unquote(mobj.group(1))
|
||||||
video_ext = mediaURL[-3:]
|
video_ext = mediaURL[-3:]
|
||||||
|
|
||||||
# Extract gdaKey if available
|
# Extract gdaKey if available
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ import json
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
|
compat_urllib_parse_unquote,
|
||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@@ -20,7 +21,6 @@ class MiTeleIE(InfoExtractor):
|
|||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/',
|
'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/',
|
||||||
'md5': '6a75fe9d0d3275bead0cb683c616fddb',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '0fce117d',
|
'id': '0fce117d',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@@ -29,6 +29,10 @@ class MiTeleIE(InfoExtractor):
|
|||||||
'display_id': 'programa-144',
|
'display_id': 'programa-144',
|
||||||
'duration': 2913,
|
'duration': 2913,
|
||||||
},
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@@ -45,7 +49,7 @@ class MiTeleIE(InfoExtractor):
|
|||||||
domain = 'http://' + domain
|
domain = 'http://' + domain
|
||||||
info_url = compat_urlparse.urljoin(
|
info_url = compat_urlparse.urljoin(
|
||||||
domain,
|
domain,
|
||||||
compat_urllib_parse.unquote(embed_data['flashvars']['host'])
|
compat_urllib_parse_unquote(embed_data['flashvars']['host'])
|
||||||
)
|
)
|
||||||
info_el = self._download_xml(info_url, episode).find('./video/info')
|
info_el = self._download_xml(info_url, episode).find('./video/info')
|
||||||
|
|
||||||
@@ -56,12 +60,14 @@ class MiTeleIE(InfoExtractor):
|
|||||||
episode,
|
episode,
|
||||||
transform_source=strip_jsonp
|
transform_source=strip_jsonp
|
||||||
)
|
)
|
||||||
|
formats = self._extract_m3u8_formats(
|
||||||
|
token_info['tokenizedUrl'], episode, ext='mp4')
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': embed_data['videoId'],
|
'id': embed_data['videoId'],
|
||||||
'display_id': episode,
|
'display_id': episode,
|
||||||
'title': info_el.find('title').text,
|
'title': info_el.find('title').text,
|
||||||
'url': token_info['tokenizedUrl'],
|
'formats': formats,
|
||||||
'description': get_element_by_attribute('class', 'text', webpage),
|
'description': get_element_by_attribute('class', 'text', webpage),
|
||||||
'thumbnail': info_el.find('thumb').text,
|
'thumbnail': info_el.find('thumb').text,
|
||||||
'duration': parse_duration(info_el.find('duration').text),
|
'duration': parse_duration(info_el.find('duration').text),
|
||||||
|
|||||||
@@ -3,9 +3,7 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import compat_urllib_parse_unquote
|
||||||
compat_urllib_parse,
|
|
||||||
)
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
HEADRequest,
|
HEADRequest,
|
||||||
@@ -60,7 +58,7 @@ class MixcloudIE(InfoExtractor):
|
|||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
uploader = mobj.group(1)
|
uploader = mobj.group(1)
|
||||||
cloudcast_name = mobj.group(2)
|
cloudcast_name = mobj.group(2)
|
||||||
track_id = compat_urllib_parse.unquote('-'.join((uploader, cloudcast_name)))
|
track_id = compat_urllib_parse_unquote('-'.join((uploader, cloudcast_name)))
|
||||||
|
|
||||||
webpage = self._download_webpage(url, track_id)
|
webpage = self._download_webpage(url, track_id)
|
||||||
|
|
||||||
|
|||||||
@@ -5,9 +5,9 @@ import re
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
|
compat_urllib_parse_unquote,
|
||||||
compat_urllib_parse_urlparse,
|
compat_urllib_parse_urlparse,
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
compat_urllib_parse,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -34,7 +34,7 @@ class MofosexIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(req, video_id)
|
webpage = self._download_webpage(req, video_id)
|
||||||
|
|
||||||
video_title = self._html_search_regex(r'<h1>(.+?)<', webpage, 'title')
|
video_title = self._html_search_regex(r'<h1>(.+?)<', webpage, 'title')
|
||||||
video_url = compat_urllib_parse.unquote(self._html_search_regex(r'flashvars.video_url = \'([^\']+)', webpage, 'video_url'))
|
video_url = compat_urllib_parse_unquote(self._html_search_regex(r'flashvars.video_url = \'([^\']+)', webpage, 'video_url'))
|
||||||
path = compat_urllib_parse_urlparse(video_url).path
|
path = compat_urllib_parse_urlparse(video_url).path
|
||||||
extension = os.path.splitext(path)[1][1:]
|
extension = os.path.splitext(path)[1][1:]
|
||||||
format = path.split('/')[5].split('_')[:2]
|
format = path.split('/')[5].split('_')[:2]
|
||||||
|
|||||||
@@ -35,7 +35,8 @@ class MySpassIE(InfoExtractor):
|
|||||||
|
|
||||||
# get metadata
|
# get metadata
|
||||||
metadata_url = META_DATA_URL_TEMPLATE % video_id
|
metadata_url = META_DATA_URL_TEMPLATE % video_id
|
||||||
metadata = self._download_xml(metadata_url, video_id)
|
metadata = self._download_xml(
|
||||||
|
metadata_url, video_id, transform_source=lambda s: s.strip())
|
||||||
|
|
||||||
# extract values from metadata
|
# extract values from metadata
|
||||||
url_flv_el = metadata.find('url_flv')
|
url_flv_el = metadata.find('url_flv')
|
||||||
|
|||||||
60
youtube_dl/extractor/myvi.py
Normal file
60
youtube_dl/extractor/myvi.py
Normal file
@@ -0,0 +1,60 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .vimple import SprutoBaseIE
|
||||||
|
|
||||||
|
|
||||||
|
class MyviIE(SprutoBaseIE):
|
||||||
|
_VALID_URL = r'''(?x)
|
||||||
|
https?://
|
||||||
|
myvi\.(?:ru/player|tv)/
|
||||||
|
(?:
|
||||||
|
(?:
|
||||||
|
embed/html|
|
||||||
|
flash|
|
||||||
|
api/Video/Get
|
||||||
|
)/|
|
||||||
|
content/preloader\.swf\?.*\bid=
|
||||||
|
)
|
||||||
|
(?P<id>[\da-zA-Z_-]+)
|
||||||
|
'''
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://myvi.ru/player/embed/html/oOy4euHA6LVwNNAjhD9_Jq5Ha2Qf0rtVMVFMAZav8wObeRTZaCATzucDQIDph8hQU0',
|
||||||
|
'md5': '571bbdfba9f9ed229dc6d34cc0f335bf',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'f16b2bbd-cde8-481c-a981-7cd48605df43',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'хозяин жизни',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'duration': 25,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://myvi.ru/player/content/preloader.swf?id=oOy4euHA6LVwNNAjhD9_Jq5Ha2Qf0rtVMVFMAZav8wOYf1WFpPfc_bWTKGVf_Zafr0',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://myvi.ru/player/api/Video/Get/oOy4euHA6LVwNNAjhD9_Jq5Ha2Qf0rtVMVFMAZav8wObeRTZaCATzucDQIDph8hQU0',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://myvi.tv/embed/html/oTGTNWdyz4Zwy_u1nraolwZ1odenTd9WkTnRfIL9y8VOgHYqOHApE575x4_xxS9Vn0?ap=0',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://myvi.ru/player/flash/ocp2qZrHI-eZnHKQBK4cZV60hslH8LALnk0uBfKsB-Q4WnY26SeGoYPi8HWHxu0O30',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _extract_url(cls, webpage):
|
||||||
|
mobj = re.search(
|
||||||
|
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//myvi\.(?:ru/player|tv)/(?:embed/html|flash)/[^"]+)\1', webpage)
|
||||||
|
if mobj:
|
||||||
|
return mobj.group('url')
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
spruto = self._download_json(
|
||||||
|
'http://myvi.ru/player/api/Video/Get/%s?sig' % video_id, video_id)['sprutoData']
|
||||||
|
|
||||||
|
return self._extract_spruto(spruto, video_id)
|
||||||
@@ -10,6 +10,7 @@ from .common import InfoExtractor
|
|||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_ord,
|
compat_ord,
|
||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
|
compat_urllib_parse_unquote,
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@@ -107,7 +108,7 @@ class MyVideoIE(InfoExtractor):
|
|||||||
if not a == '_encxml':
|
if not a == '_encxml':
|
||||||
params[a] = b
|
params[a] = b
|
||||||
else:
|
else:
|
||||||
encxml = compat_urllib_parse.unquote(b)
|
encxml = compat_urllib_parse_unquote(b)
|
||||||
if not params.get('domain'):
|
if not params.get('domain'):
|
||||||
params['domain'] = 'www.myvideo.de'
|
params['domain'] = 'www.myvideo.de'
|
||||||
xmldata_url = '%s?%s' % (encxml, compat_urllib_parse.urlencode(params))
|
xmldata_url = '%s?%s' % (encxml, compat_urllib_parse.urlencode(params))
|
||||||
@@ -135,7 +136,7 @@ class MyVideoIE(InfoExtractor):
|
|||||||
video_url = None
|
video_url = None
|
||||||
mobj = re.search('connectionurl=\'(.*?)\'', dec_data)
|
mobj = re.search('connectionurl=\'(.*?)\'', dec_data)
|
||||||
if mobj:
|
if mobj:
|
||||||
video_url = compat_urllib_parse.unquote(mobj.group(1))
|
video_url = compat_urllib_parse_unquote(mobj.group(1))
|
||||||
if 'myvideo2flash' in video_url:
|
if 'myvideo2flash' in video_url:
|
||||||
self.report_warning(
|
self.report_warning(
|
||||||
'Rewriting URL to use unencrypted rtmp:// ...',
|
'Rewriting URL to use unencrypted rtmp:// ...',
|
||||||
@@ -147,10 +148,10 @@ class MyVideoIE(InfoExtractor):
|
|||||||
mobj = re.search('path=\'(http.*?)\' source=\'(.*?)\'', dec_data)
|
mobj = re.search('path=\'(http.*?)\' source=\'(.*?)\'', dec_data)
|
||||||
if mobj is None:
|
if mobj is None:
|
||||||
raise ExtractorError('unable to extract url')
|
raise ExtractorError('unable to extract url')
|
||||||
video_url = compat_urllib_parse.unquote(mobj.group(1)) + compat_urllib_parse.unquote(mobj.group(2))
|
video_url = compat_urllib_parse_unquote(mobj.group(1)) + compat_urllib_parse_unquote(mobj.group(2))
|
||||||
|
|
||||||
video_file = self._search_regex('source=\'(.*?)\'', dec_data, 'video file')
|
video_file = self._search_regex('source=\'(.*?)\'', dec_data, 'video file')
|
||||||
video_file = compat_urllib_parse.unquote(video_file)
|
video_file = compat_urllib_parse_unquote(video_file)
|
||||||
|
|
||||||
if not video_file.endswith('f4m'):
|
if not video_file.endswith('f4m'):
|
||||||
ppath, prefix = video_file.split('.')
|
ppath, prefix = video_file.split('.')
|
||||||
@@ -159,7 +160,7 @@ class MyVideoIE(InfoExtractor):
|
|||||||
video_playpath = ''
|
video_playpath = ''
|
||||||
|
|
||||||
video_swfobj = self._search_regex('swfobject.embedSWF\(\'(.+?)\'', webpage, 'swfobj')
|
video_swfobj = self._search_regex('swfobject.embedSWF\(\'(.+?)\'', webpage, 'swfobj')
|
||||||
video_swfobj = compat_urllib_parse.unquote(video_swfobj)
|
video_swfobj = compat_urllib_parse_unquote(video_swfobj)
|
||||||
|
|
||||||
video_title = self._html_search_regex("<h1(?: class='globalHd')?>(.*?)</h1>",
|
video_title = self._html_search_regex("<h1(?: class='globalHd')?>(.*?)</h1>",
|
||||||
webpage, 'title')
|
webpage, 'title')
|
||||||
|
|||||||
@@ -8,25 +8,40 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class NationalGeographicIE(InfoExtractor):
|
class NationalGeographicIE(InfoExtractor):
|
||||||
_VALID_URL = r'http://video\.nationalgeographic\.com/video/.*?'
|
_VALID_URL = r'http://video\.nationalgeographic\.com/.*?'
|
||||||
|
|
||||||
_TEST = {
|
_TESTS = [
|
||||||
'url': 'http://video.nationalgeographic.com/video/news/150210-news-crab-mating-vin?source=featuredvideo',
|
{
|
||||||
'info_dict': {
|
'url': 'http://video.nationalgeographic.com/video/news/150210-news-crab-mating-vin?source=featuredvideo',
|
||||||
'id': '4DmDACA6Qtk_',
|
'info_dict': {
|
||||||
'ext': 'flv',
|
'id': '4DmDACA6Qtk_',
|
||||||
'title': 'Mating Crabs Busted by Sharks',
|
'ext': 'flv',
|
||||||
'description': 'md5:16f25aeffdeba55aaa8ec37e093ad8b3',
|
'title': 'Mating Crabs Busted by Sharks',
|
||||||
|
'description': 'md5:16f25aeffdeba55aaa8ec37e093ad8b3',
|
||||||
|
},
|
||||||
|
'add_ie': ['ThePlatform'],
|
||||||
},
|
},
|
||||||
'add_ie': ['ThePlatform'],
|
{
|
||||||
}
|
'url': 'http://video.nationalgeographic.com/wild/when-sharks-attack/the-real-jaws',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '_JeBD_D7PlS5',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'The Real Jaws',
|
||||||
|
'description': 'md5:8d3e09d9d53a85cd397b4b21b2c77be6',
|
||||||
|
},
|
||||||
|
'add_ie': ['ThePlatform'],
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
name = url_basename(url)
|
name = url_basename(url)
|
||||||
|
|
||||||
webpage = self._download_webpage(url, name)
|
webpage = self._download_webpage(url, name)
|
||||||
feed_url = self._search_regex(r'data-feed-url="([^"]+)"', webpage, 'feed url')
|
feed_url = self._search_regex(
|
||||||
guid = self._search_regex(r'data-video-guid="([^"]+)"', webpage, 'guid')
|
r'data-feed-url="([^"]+)"', webpage, 'feed url')
|
||||||
|
guid = self._search_regex(
|
||||||
|
r'id="(?:videoPlayer|player-container)"[^>]+data-guid="([^"]+)"',
|
||||||
|
webpage, 'guid')
|
||||||
|
|
||||||
feed = self._download_xml('%s?byGuid=%s' % (feed_url, guid), name)
|
feed = self._download_xml('%s?byGuid=%s' % (feed_url, guid), name)
|
||||||
content = feed.find('.//{http://search.yahoo.com/mrss/}content')
|
content = feed.find('.//{http://search.yahoo.com/mrss/}content')
|
||||||
@@ -34,5 +49,6 @@ class NationalGeographicIE(InfoExtractor):
|
|||||||
|
|
||||||
return self.url_result(smuggle_url(
|
return self.url_result(smuggle_url(
|
||||||
'http://link.theplatform.com/s/ngs/%s?format=SMIL&formats=MPEG4&manifest=f4m' % theplatform_id,
|
'http://link.theplatform.com/s/ngs/%s?format=SMIL&formats=MPEG4&manifest=f4m' % theplatform_id,
|
||||||
# For some reason, the normal links don't work and we must force the use of f4m
|
# For some reason, the normal links don't work and we must force
|
||||||
|
# the use of f4m
|
||||||
{'force_smil_url': True}))
|
{'force_smil_url': True}))
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ import re
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
|
compat_urlparse,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
@@ -16,7 +17,7 @@ from ..utils import (
|
|||||||
class NaverIE(InfoExtractor):
|
class NaverIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:m\.)?tvcast\.naver\.com/v/(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:m\.)?tvcast\.naver\.com/v/(?P<id>\d+)'
|
||||||
|
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://tvcast.naver.com/v/81652',
|
'url': 'http://tvcast.naver.com/v/81652',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '81652',
|
'id': '81652',
|
||||||
@@ -25,7 +26,18 @@ class NaverIE(InfoExtractor):
|
|||||||
'description': '합격불변의 법칙 메가스터디 | 메가스터디 수학 김상희 선생님이 9월 모의고사 수학A형 16번에서 20번까지 해설강의를 공개합니다.',
|
'description': '합격불변의 법칙 메가스터디 | 메가스터디 수학 김상희 선생님이 9월 모의고사 수학A형 16번에서 20번까지 해설강의를 공개합니다.',
|
||||||
'upload_date': '20130903',
|
'upload_date': '20130903',
|
||||||
},
|
},
|
||||||
}
|
}, {
|
||||||
|
'url': 'http://tvcast.naver.com/v/395837',
|
||||||
|
'md5': '638ed4c12012c458fefcddfd01f173cd',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '395837',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '9년이 지나도 아픈 기억, 전효성의 아버지',
|
||||||
|
'description': 'md5:5bf200dcbf4b66eb1b350d1eb9c753f7',
|
||||||
|
'upload_date': '20150519',
|
||||||
|
},
|
||||||
|
'skip': 'Georestricted',
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
@@ -35,7 +47,7 @@ class NaverIE(InfoExtractor):
|
|||||||
webpage)
|
webpage)
|
||||||
if m_id is None:
|
if m_id is None:
|
||||||
m_error = re.search(
|
m_error = re.search(
|
||||||
r'(?s)<div class="nation_error">\s*(?:<!--.*?-->)?\s*<p class="[^"]+">(?P<msg>.+?)</p>\s*</div>',
|
r'(?s)<div class="(?:nation_error|nation_box)">\s*(?:<!--.*?-->)?\s*<p class="[^"]+">(?P<msg>.+?)</p>\s*</div>',
|
||||||
webpage)
|
webpage)
|
||||||
if m_error:
|
if m_error:
|
||||||
raise ExtractorError(clean_html(m_error.group('msg')), expected=True)
|
raise ExtractorError(clean_html(m_error.group('msg')), expected=True)
|
||||||
@@ -58,14 +70,18 @@ class NaverIE(InfoExtractor):
|
|||||||
formats = []
|
formats = []
|
||||||
for format_el in urls.findall('EncodingOptions/EncodingOption'):
|
for format_el in urls.findall('EncodingOptions/EncodingOption'):
|
||||||
domain = format_el.find('Domain').text
|
domain = format_el.find('Domain').text
|
||||||
|
uri = format_el.find('uri').text
|
||||||
f = {
|
f = {
|
||||||
'url': domain + format_el.find('uri').text,
|
'url': compat_urlparse.urljoin(domain, uri),
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'width': int(format_el.find('width').text),
|
'width': int(format_el.find('width').text),
|
||||||
'height': int(format_el.find('height').text),
|
'height': int(format_el.find('height').text),
|
||||||
}
|
}
|
||||||
if domain.startswith('rtmp'):
|
if domain.startswith('rtmp'):
|
||||||
|
# urlparse does not support custom schemes
|
||||||
|
# https://bugs.python.org/issue18828
|
||||||
f.update({
|
f.update({
|
||||||
|
'url': domain + uri,
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'rtmp_protocol': '1', # rtmpt
|
'rtmp_protocol': '1', # rtmpt
|
||||||
})
|
})
|
||||||
|
|||||||
@@ -22,6 +22,18 @@ class NBAIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'http://www.nba.com/video/games/hornets/2014/12/05/0021400276-nyk-cha-play5.nba/',
|
'url': 'http://www.nba.com/video/games/hornets/2014/12/05/0021400276-nyk-cha-play5.nba/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://watch.nba.com/nba/video/channels/playoffs/2015/05/20/0041400301-cle-atl-recap.nba',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '0041400301-cle-atl-recap.nba',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'NBA GAME TIME | Video: Hawks vs. Cavaliers Game 1',
|
||||||
|
'description': 'md5:8094c3498d35a9bd6b1a8c396a071b4d',
|
||||||
|
'duration': 228,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@@ -35,8 +47,12 @@ class NBAIE(InfoExtractor):
|
|||||||
self._og_search_title(webpage, default=shortened_video_id), ' : NBA.com')
|
self._og_search_title(webpage, default=shortened_video_id), ' : NBA.com')
|
||||||
|
|
||||||
description = self._og_search_description(webpage)
|
description = self._og_search_description(webpage)
|
||||||
duration = parse_duration(
|
duration_str = self._html_search_meta(
|
||||||
self._html_search_meta('duration', webpage, 'duration'))
|
'duration', webpage, 'duration', default=None)
|
||||||
|
if not duration_str:
|
||||||
|
duration_str = self._html_search_regex(
|
||||||
|
r'Duration:</b>\s*(\d+:\d+)', webpage, 'duration', fatal=False)
|
||||||
|
duration = parse_duration(duration_str)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': shortened_video_id,
|
'id': shortened_video_id,
|
||||||
|
|||||||
459
youtube_dl/extractor/neteasemusic.py
Normal file
459
youtube_dl/extractor/neteasemusic.py
Normal file
@@ -0,0 +1,459 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from hashlib import md5
|
||||||
|
from base64 import b64encode
|
||||||
|
from datetime import datetime
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import (
|
||||||
|
compat_urllib_request,
|
||||||
|
compat_urllib_parse,
|
||||||
|
compat_str,
|
||||||
|
compat_itertools_count,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class NetEaseMusicBaseIE(InfoExtractor):
|
||||||
|
_FORMATS = ['bMusic', 'mMusic', 'hMusic']
|
||||||
|
_NETEASE_SALT = '3go8&$8*3*3h0k(2)2'
|
||||||
|
_API_BASE = 'http://music.163.com/api/'
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _encrypt(cls, dfsid):
|
||||||
|
salt_bytes = bytearray(cls._NETEASE_SALT.encode('utf-8'))
|
||||||
|
string_bytes = bytearray(compat_str(dfsid).encode('ascii'))
|
||||||
|
salt_len = len(salt_bytes)
|
||||||
|
for i in range(len(string_bytes)):
|
||||||
|
string_bytes[i] = string_bytes[i] ^ salt_bytes[i % salt_len]
|
||||||
|
m = md5()
|
||||||
|
m.update(bytes(string_bytes))
|
||||||
|
result = b64encode(m.digest()).decode('ascii')
|
||||||
|
return result.replace('/', '_').replace('+', '-')
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def extract_formats(cls, info):
|
||||||
|
formats = []
|
||||||
|
for song_format in cls._FORMATS:
|
||||||
|
details = info.get(song_format)
|
||||||
|
if not details:
|
||||||
|
continue
|
||||||
|
formats.append({
|
||||||
|
'url': 'http://m1.music.126.net/%s/%s.%s' %
|
||||||
|
(cls._encrypt(details['dfsId']), details['dfsId'],
|
||||||
|
details['extension']),
|
||||||
|
'ext': details.get('extension'),
|
||||||
|
'abr': details.get('bitrate', 0) / 1000,
|
||||||
|
'format_id': song_format,
|
||||||
|
'filesize': details.get('size'),
|
||||||
|
'asr': details.get('sr')
|
||||||
|
})
|
||||||
|
return formats
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def convert_milliseconds(cls, ms):
|
||||||
|
return int(round(ms / 1000.0))
|
||||||
|
|
||||||
|
def query_api(self, endpoint, video_id, note):
|
||||||
|
req = compat_urllib_request.Request('%s%s' % (self._API_BASE, endpoint))
|
||||||
|
req.add_header('Referer', self._API_BASE)
|
||||||
|
return self._download_json(req, video_id, note)
|
||||||
|
|
||||||
|
|
||||||
|
class NetEaseMusicIE(NetEaseMusicBaseIE):
|
||||||
|
IE_NAME = 'netease:song'
|
||||||
|
IE_DESC = '网易云音乐'
|
||||||
|
_VALID_URL = r'https?://music\.163\.com/(#/)?song\?id=(?P<id>[0-9]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://music.163.com/#/song?id=32102397',
|
||||||
|
'md5': 'f2e97280e6345c74ba9d5677dd5dcb45',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '32102397',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 'Bad Blood (feat. Kendrick Lamar)',
|
||||||
|
'creator': 'Taylor Swift / Kendrick Lamar',
|
||||||
|
'upload_date': '20150517',
|
||||||
|
'timestamp': 1431878400,
|
||||||
|
'description': 'md5:a10a54589c2860300d02e1de821eb2ef',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'note': 'No lyrics translation.',
|
||||||
|
'url': 'http://music.163.com/#/song?id=29822014',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '29822014',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': '听见下雨的声音',
|
||||||
|
'creator': '周杰伦',
|
||||||
|
'upload_date': '20141225',
|
||||||
|
'timestamp': 1419523200,
|
||||||
|
'description': 'md5:a4d8d89f44656af206b7b2555c0bce6c',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'note': 'No lyrics.',
|
||||||
|
'url': 'http://music.163.com/song?id=17241424',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '17241424',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 'Opus 28',
|
||||||
|
'creator': 'Dustin O\'Halloran',
|
||||||
|
'upload_date': '20080211',
|
||||||
|
'timestamp': 1202745600,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'note': 'Has translated name.',
|
||||||
|
'url': 'http://music.163.com/#/song?id=22735043',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '22735043',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': '소원을 말해봐 (Genie)',
|
||||||
|
'creator': '少女时代',
|
||||||
|
'description': 'md5:79d99cc560e4ca97e0c4d86800ee4184',
|
||||||
|
'upload_date': '20100127',
|
||||||
|
'timestamp': 1264608000,
|
||||||
|
'alt_title': '说出愿望吧(Genie)',
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _process_lyrics(self, lyrics_info):
|
||||||
|
original = lyrics_info.get('lrc', {}).get('lyric')
|
||||||
|
translated = lyrics_info.get('tlyric', {}).get('lyric')
|
||||||
|
|
||||||
|
if not translated:
|
||||||
|
return original
|
||||||
|
|
||||||
|
lyrics_expr = r'(\[[0-9]{2}:[0-9]{2}\.[0-9]{2,}\])([^\n]+)'
|
||||||
|
original_ts_texts = re.findall(lyrics_expr, original)
|
||||||
|
translation_ts_dict = dict(
|
||||||
|
(time_stamp, text) for time_stamp, text in re.findall(lyrics_expr, translated)
|
||||||
|
)
|
||||||
|
lyrics = '\n'.join([
|
||||||
|
'%s%s / %s' % (time_stamp, text, translation_ts_dict.get(time_stamp, ''))
|
||||||
|
for time_stamp, text in original_ts_texts
|
||||||
|
])
|
||||||
|
return lyrics
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
song_id = self._match_id(url)
|
||||||
|
|
||||||
|
params = {
|
||||||
|
'id': song_id,
|
||||||
|
'ids': '[%s]' % song_id
|
||||||
|
}
|
||||||
|
info = self.query_api(
|
||||||
|
'song/detail?' + compat_urllib_parse.urlencode(params),
|
||||||
|
song_id, 'Downloading song info')['songs'][0]
|
||||||
|
|
||||||
|
formats = self.extract_formats(info)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
lyrics_info = self.query_api(
|
||||||
|
'song/lyric?id=%s&lv=-1&tv=-1' % song_id,
|
||||||
|
song_id, 'Downloading lyrics data')
|
||||||
|
lyrics = self._process_lyrics(lyrics_info)
|
||||||
|
|
||||||
|
alt_title = None
|
||||||
|
if info.get('transNames'):
|
||||||
|
alt_title = '/'.join(info.get('transNames'))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': song_id,
|
||||||
|
'title': info['name'],
|
||||||
|
'alt_title': alt_title,
|
||||||
|
'creator': ' / '.join([artist['name'] for artist in info.get('artists', [])]),
|
||||||
|
'timestamp': self.convert_milliseconds(info.get('album', {}).get('publishTime')),
|
||||||
|
'thumbnail': info.get('album', {}).get('picUrl'),
|
||||||
|
'duration': self.convert_milliseconds(info.get('duration', 0)),
|
||||||
|
'description': lyrics,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class NetEaseMusicAlbumIE(NetEaseMusicBaseIE):
|
||||||
|
IE_NAME = 'netease:album'
|
||||||
|
IE_DESC = '网易云音乐 - 专辑'
|
||||||
|
_VALID_URL = r'https?://music\.163\.com/(#/)?album\?id=(?P<id>[0-9]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://music.163.com/#/album?id=220780',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '220780',
|
||||||
|
'title': 'B\'day',
|
||||||
|
},
|
||||||
|
'playlist_count': 23,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
album_id = self._match_id(url)
|
||||||
|
|
||||||
|
info = self.query_api(
|
||||||
|
'album/%s?id=%s' % (album_id, album_id),
|
||||||
|
album_id, 'Downloading album data')['album']
|
||||||
|
|
||||||
|
name = info['name']
|
||||||
|
desc = info.get('description')
|
||||||
|
entries = [
|
||||||
|
self.url_result('http://music.163.com/#/song?id=%s' % song['id'],
|
||||||
|
'NetEaseMusic', song['id'])
|
||||||
|
for song in info['songs']
|
||||||
|
]
|
||||||
|
return self.playlist_result(entries, album_id, name, desc)
|
||||||
|
|
||||||
|
|
||||||
|
class NetEaseMusicSingerIE(NetEaseMusicBaseIE):
|
||||||
|
IE_NAME = 'netease:singer'
|
||||||
|
IE_DESC = '网易云音乐 - 歌手'
|
||||||
|
_VALID_URL = r'https?://music\.163\.com/(#/)?artist\?id=(?P<id>[0-9]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'note': 'Singer has aliases.',
|
||||||
|
'url': 'http://music.163.com/#/artist?id=10559',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '10559',
|
||||||
|
'title': '张惠妹 - aMEI;阿密特',
|
||||||
|
},
|
||||||
|
'playlist_count': 50,
|
||||||
|
}, {
|
||||||
|
'note': 'Singer has translated name.',
|
||||||
|
'url': 'http://music.163.com/#/artist?id=124098',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '124098',
|
||||||
|
'title': '李昇基 - 이승기',
|
||||||
|
},
|
||||||
|
'playlist_count': 50,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
singer_id = self._match_id(url)
|
||||||
|
|
||||||
|
info = self.query_api(
|
||||||
|
'artist/%s?id=%s' % (singer_id, singer_id),
|
||||||
|
singer_id, 'Downloading singer data')
|
||||||
|
|
||||||
|
name = info['artist']['name']
|
||||||
|
if info['artist']['trans']:
|
||||||
|
name = '%s - %s' % (name, info['artist']['trans'])
|
||||||
|
if info['artist']['alias']:
|
||||||
|
name = '%s - %s' % (name, ';'.join(info['artist']['alias']))
|
||||||
|
|
||||||
|
entries = [
|
||||||
|
self.url_result('http://music.163.com/#/song?id=%s' % song['id'],
|
||||||
|
'NetEaseMusic', song['id'])
|
||||||
|
for song in info['hotSongs']
|
||||||
|
]
|
||||||
|
return self.playlist_result(entries, singer_id, name)
|
||||||
|
|
||||||
|
|
||||||
|
class NetEaseMusicListIE(NetEaseMusicBaseIE):
|
||||||
|
IE_NAME = 'netease:playlist'
|
||||||
|
IE_DESC = '网易云音乐 - 歌单'
|
||||||
|
_VALID_URL = r'https?://music\.163\.com/(#/)?(playlist|discover/toplist)\?id=(?P<id>[0-9]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://music.163.com/#/playlist?id=79177352',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '79177352',
|
||||||
|
'title': 'Billboard 2007 Top 100',
|
||||||
|
'description': 'md5:12fd0819cab2965b9583ace0f8b7b022'
|
||||||
|
},
|
||||||
|
'playlist_count': 99,
|
||||||
|
}, {
|
||||||
|
'note': 'Toplist/Charts sample',
|
||||||
|
'url': 'http://music.163.com/#/discover/toplist?id=3733003',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3733003',
|
||||||
|
'title': 're:韩国Melon排行榜周榜 [0-9]{4}-[0-9]{2}-[0-9]{2}',
|
||||||
|
'description': 'md5:73ec782a612711cadc7872d9c1e134fc',
|
||||||
|
},
|
||||||
|
'playlist_count': 50,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
list_id = self._match_id(url)
|
||||||
|
|
||||||
|
info = self.query_api(
|
||||||
|
'playlist/detail?id=%s&lv=-1&tv=-1' % list_id,
|
||||||
|
list_id, 'Downloading playlist data')['result']
|
||||||
|
|
||||||
|
name = info['name']
|
||||||
|
desc = info.get('description')
|
||||||
|
|
||||||
|
if info.get('specialType') == 10: # is a chart/toplist
|
||||||
|
datestamp = datetime.fromtimestamp(
|
||||||
|
self.convert_milliseconds(info['updateTime'])).strftime('%Y-%m-%d')
|
||||||
|
name = '%s %s' % (name, datestamp)
|
||||||
|
|
||||||
|
entries = [
|
||||||
|
self.url_result('http://music.163.com/#/song?id=%s' % song['id'],
|
||||||
|
'NetEaseMusic', song['id'])
|
||||||
|
for song in info['tracks']
|
||||||
|
]
|
||||||
|
return self.playlist_result(entries, list_id, name, desc)
|
||||||
|
|
||||||
|
|
||||||
|
class NetEaseMusicMvIE(NetEaseMusicBaseIE):
|
||||||
|
IE_NAME = 'netease:mv'
|
||||||
|
IE_DESC = '网易云音乐 - MV'
|
||||||
|
_VALID_URL = r'https?://music\.163\.com/(#/)?mv\?id=(?P<id>[0-9]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://music.163.com/#/mv?id=415350',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '415350',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '이럴거면 그러지말지',
|
||||||
|
'description': '白雅言自作曲唱甜蜜爱情',
|
||||||
|
'creator': '白雅言',
|
||||||
|
'upload_date': '20150520',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mv_id = self._match_id(url)
|
||||||
|
|
||||||
|
info = self.query_api(
|
||||||
|
'mv/detail?id=%s&type=mp4' % mv_id,
|
||||||
|
mv_id, 'Downloading mv info')['data']
|
||||||
|
|
||||||
|
formats = [
|
||||||
|
{'url': mv_url, 'ext': 'mp4', 'format_id': '%sp' % brs, 'height': int(brs)}
|
||||||
|
for brs, mv_url in info['brs'].items()
|
||||||
|
]
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': mv_id,
|
||||||
|
'title': info['name'],
|
||||||
|
'description': info.get('desc') or info.get('briefDesc'),
|
||||||
|
'creator': info['artistName'],
|
||||||
|
'upload_date': info['publishTime'].replace('-', ''),
|
||||||
|
'formats': formats,
|
||||||
|
'thumbnail': info.get('cover'),
|
||||||
|
'duration': self.convert_milliseconds(info.get('duration', 0)),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
|
||||||
|
IE_NAME = 'netease:program'
|
||||||
|
IE_DESC = '网易云音乐 - 电台节目'
|
||||||
|
_VALID_URL = r'https?://music\.163\.com/(#/?)program\?id=(?P<id>[0-9]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://music.163.com/#/program?id=10109055',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '10109055',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': '不丹足球背后的故事',
|
||||||
|
'description': '喜马拉雅人的足球梦 ...',
|
||||||
|
'creator': '大话西藏',
|
||||||
|
'timestamp': 1434179342,
|
||||||
|
'upload_date': '20150613',
|
||||||
|
'duration': 900,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'note': 'This program has accompanying songs.',
|
||||||
|
'url': 'http://music.163.com/#/program?id=10141022',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '10141022',
|
||||||
|
'title': '25岁,你是自在如风的少年<27°C>',
|
||||||
|
'description': 'md5:8d594db46cc3e6509107ede70a4aaa3b',
|
||||||
|
},
|
||||||
|
'playlist_count': 4,
|
||||||
|
}, {
|
||||||
|
'note': 'This program has accompanying songs.',
|
||||||
|
'url': 'http://music.163.com/#/program?id=10141022',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '10141022',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': '25岁,你是自在如风的少年<27°C>',
|
||||||
|
'description': 'md5:8d594db46cc3e6509107ede70a4aaa3b',
|
||||||
|
'timestamp': 1434450841,
|
||||||
|
'upload_date': '20150616',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'noplaylist': True
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
program_id = self._match_id(url)
|
||||||
|
|
||||||
|
info = self.query_api(
|
||||||
|
'dj/program/detail?id=%s' % program_id,
|
||||||
|
program_id, 'Downloading program info')['program']
|
||||||
|
|
||||||
|
name = info['name']
|
||||||
|
description = info['description']
|
||||||
|
|
||||||
|
if not info['songs'] or self._downloader.params.get('noplaylist'):
|
||||||
|
if info['songs']:
|
||||||
|
self.to_screen(
|
||||||
|
'Downloading just the main audio %s because of --no-playlist'
|
||||||
|
% info['mainSong']['id'])
|
||||||
|
|
||||||
|
formats = self.extract_formats(info['mainSong'])
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': program_id,
|
||||||
|
'title': name,
|
||||||
|
'description': description,
|
||||||
|
'creator': info['dj']['brand'],
|
||||||
|
'timestamp': self.convert_milliseconds(info['createTime']),
|
||||||
|
'thumbnail': info['coverUrl'],
|
||||||
|
'duration': self.convert_milliseconds(info.get('duration', 0)),
|
||||||
|
'formats': formats,
|
||||||
|
}
|
||||||
|
|
||||||
|
self.to_screen(
|
||||||
|
'Downloading playlist %s - add --no-playlist to just download the main audio %s'
|
||||||
|
% (program_id, info['mainSong']['id']))
|
||||||
|
|
||||||
|
song_ids = [info['mainSong']['id']]
|
||||||
|
song_ids.extend([song['id'] for song in info['songs']])
|
||||||
|
entries = [
|
||||||
|
self.url_result('http://music.163.com/#/song?id=%s' % song_id,
|
||||||
|
'NetEaseMusic', song_id)
|
||||||
|
for song_id in song_ids
|
||||||
|
]
|
||||||
|
return self.playlist_result(entries, program_id, name, description)
|
||||||
|
|
||||||
|
|
||||||
|
class NetEaseMusicDjRadioIE(NetEaseMusicBaseIE):
|
||||||
|
IE_NAME = 'netease:djradio'
|
||||||
|
IE_DESC = '网易云音乐 - 电台'
|
||||||
|
_VALID_URL = r'https?://music\.163\.com/(#/)?djradio\?id=(?P<id>[0-9]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://music.163.com/#/djradio?id=42',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '42',
|
||||||
|
'title': '声音蔓延',
|
||||||
|
'description': 'md5:766220985cbd16fdd552f64c578a6b15'
|
||||||
|
},
|
||||||
|
'playlist_mincount': 40,
|
||||||
|
}
|
||||||
|
_PAGE_SIZE = 1000
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
dj_id = self._match_id(url)
|
||||||
|
|
||||||
|
name = None
|
||||||
|
desc = None
|
||||||
|
entries = []
|
||||||
|
for offset in compat_itertools_count(start=0, step=self._PAGE_SIZE):
|
||||||
|
info = self.query_api(
|
||||||
|
'dj/program/byradio?asc=false&limit=%d&radioId=%s&offset=%d'
|
||||||
|
% (self._PAGE_SIZE, dj_id, offset),
|
||||||
|
dj_id, 'Downloading dj programs - %d' % offset)
|
||||||
|
|
||||||
|
entries.extend([
|
||||||
|
self.url_result(
|
||||||
|
'http://music.163.com/#/program?id=%s' % program['id'],
|
||||||
|
'NetEaseMusicProgram', program['id'])
|
||||||
|
for program in info['programs']
|
||||||
|
])
|
||||||
|
|
||||||
|
if name is None:
|
||||||
|
radio = info['programs'][0]['radio']
|
||||||
|
name = radio['name']
|
||||||
|
desc = radio['desc']
|
||||||
|
|
||||||
|
if not info['more']:
|
||||||
|
break
|
||||||
|
|
||||||
|
return self.playlist_result(entries, dj_id, name, desc)
|
||||||
@@ -31,7 +31,7 @@ class NewstubeIE(InfoExtractor):
|
|||||||
page = self._download_webpage(url, video_id, 'Downloading page')
|
page = self._download_webpage(url, video_id, 'Downloading page')
|
||||||
|
|
||||||
video_guid = self._html_search_regex(
|
video_guid = self._html_search_regex(
|
||||||
r'<meta property="og:video" content="https?://(?:www\.)?newstube\.ru/freshplayer\.swf\?guid=(?P<guid>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})',
|
r'<meta property="og:video:url" content="https?://(?:www\.)?newstube\.ru/freshplayer\.swf\?guid=(?P<guid>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})',
|
||||||
page, 'video GUID')
|
page, 'video GUID')
|
||||||
|
|
||||||
player = self._download_xml(
|
player = self._download_xml(
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ from ..utils import parse_iso8601
|
|||||||
|
|
||||||
|
|
||||||
class NextMediaIE(InfoExtractor):
|
class NextMediaIE(InfoExtractor):
|
||||||
|
IE_DESC = '蘋果日報'
|
||||||
_VALID_URL = r'http://hk.apple.nextmedia.com/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)'
|
_VALID_URL = r'http://hk.apple.nextmedia.com/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://hk.apple.nextmedia.com/realtime/news/20141108/53109199',
|
'url': 'http://hk.apple.nextmedia.com/realtime/news/20141108/53109199',
|
||||||
@@ -66,6 +67,7 @@ class NextMediaIE(InfoExtractor):
|
|||||||
|
|
||||||
|
|
||||||
class NextMediaActionNewsIE(NextMediaIE):
|
class NextMediaActionNewsIE(NextMediaIE):
|
||||||
|
IE_DESC = '蘋果日報 - 動新聞'
|
||||||
_VALID_URL = r'http://hk.dv.nextmedia.com/actionnews/[^/]+/(?P<date>\d+)/(?P<id>\d+)/\d+'
|
_VALID_URL = r'http://hk.dv.nextmedia.com/actionnews/[^/]+/(?P<date>\d+)/(?P<id>\d+)/\d+'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://hk.dv.nextmedia.com/actionnews/hit/20150121/19009428/20061460',
|
'url': 'http://hk.dv.nextmedia.com/actionnews/hit/20150121/19009428/20061460',
|
||||||
@@ -89,8 +91,9 @@ class NextMediaActionNewsIE(NextMediaIE):
|
|||||||
return self._extract_from_nextmedia_page(news_id, url, article_page)
|
return self._extract_from_nextmedia_page(news_id, url, article_page)
|
||||||
|
|
||||||
|
|
||||||
class AppleDailyRealtimeNewsIE(NextMediaIE):
|
class AppleDailyIE(NextMediaIE):
|
||||||
_VALID_URL = r'http://(www|ent).appledaily.com.tw/(realtimenews|enews)/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)(/.*)?'
|
IE_DESC = '臺灣蘋果日報'
|
||||||
|
_VALID_URL = r'http://(www|ent).appledaily.com.tw/(?:animation|appledaily|enews|realtimenews)/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)(/.*)?'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://ent.appledaily.com.tw/enews/article/entertainment/20150128/36354694',
|
'url': 'http://ent.appledaily.com.tw/enews/article/entertainment/20150128/36354694',
|
||||||
'md5': 'a843ab23d150977cc55ef94f1e2c1e4d',
|
'md5': 'a843ab23d150977cc55ef94f1e2c1e4d',
|
||||||
@@ -99,7 +102,7 @@ class AppleDailyRealtimeNewsIE(NextMediaIE):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': '周亭羽走過摩鐵陰霾2男陪吃 九把刀孤寒看醫生',
|
'title': '周亭羽走過摩鐵陰霾2男陪吃 九把刀孤寒看醫生',
|
||||||
'thumbnail': 're:^https?://.*\.jpg$',
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
'description': 'md5:b23787119933404ce515c6356a8c355c',
|
'description': 'md5:2acd430e59956dc47cd7f67cb3c003f4',
|
||||||
'upload_date': '20150128',
|
'upload_date': '20150128',
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
@@ -110,26 +113,10 @@ class AppleDailyRealtimeNewsIE(NextMediaIE):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': '不滿被踩腳 山東兩大媽一路打下車',
|
'title': '不滿被踩腳 山東兩大媽一路打下車',
|
||||||
'thumbnail': 're:^https?://.*\.jpg$',
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
'description': 'md5:2648aaf6fc4f401f6de35a91d111aa1d',
|
'description': 'md5:175b4260c1d7c085993474217e4ab1b4',
|
||||||
'upload_date': '20150128',
|
'upload_date': '20150128',
|
||||||
}
|
}
|
||||||
}]
|
}, {
|
||||||
|
|
||||||
_URL_PATTERN = r'\{url: \'(.+)\'\}'
|
|
||||||
|
|
||||||
def _fetch_title(self, page):
|
|
||||||
return self._html_search_regex(r'<h1 id="h1">([^<>]+)</h1>', page, 'news title')
|
|
||||||
|
|
||||||
def _fetch_thumbnail(self, page):
|
|
||||||
return self._html_search_regex(r"setInitialImage\(\'([^']+)'\)", page, 'video thumbnail', fatal=False)
|
|
||||||
|
|
||||||
def _fetch_timestamp(self, page):
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
class AppleDailyAnimationNewsIE(AppleDailyRealtimeNewsIE):
|
|
||||||
_VALID_URL = 'http://www.appledaily.com.tw/animation/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)(/.*)?'
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'http://www.appledaily.com.tw/animation/realtimenews/new/20150128/5003671',
|
'url': 'http://www.appledaily.com.tw/animation/realtimenews/new/20150128/5003671',
|
||||||
'md5': '03df296d95dedc2d5886debbb80cb43f',
|
'md5': '03df296d95dedc2d5886debbb80cb43f',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@@ -154,10 +141,22 @@ class AppleDailyAnimationNewsIE(AppleDailyRealtimeNewsIE):
|
|||||||
'expected_warnings': [
|
'expected_warnings': [
|
||||||
'video thumbnail',
|
'video thumbnail',
|
||||||
]
|
]
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.appledaily.com.tw/appledaily/article/supplement/20140417/35770334/',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
_URL_PATTERN = r'\{url: \'(.+)\'\}'
|
||||||
|
|
||||||
def _fetch_title(self, page):
|
def _fetch_title(self, page):
|
||||||
return self._html_search_meta('description', page, 'news title')
|
return (self._html_search_regex(r'<h1 id="h1">([^<>]+)</h1>', page, 'news title', default=None) or
|
||||||
|
self._html_search_meta('description', page, 'news title'))
|
||||||
|
|
||||||
|
def _fetch_thumbnail(self, page):
|
||||||
|
return self._html_search_regex(r"setInitialImage\(\'([^']+)'\)", page, 'video thumbnail', fatal=False)
|
||||||
|
|
||||||
|
def _fetch_timestamp(self, page):
|
||||||
|
return None
|
||||||
|
|
||||||
def _fetch_description(self, page):
|
def _fetch_description(self, page):
|
||||||
return self._html_search_meta('description', page, 'news description')
|
return self._html_search_meta('description', page, 'news description')
|
||||||
|
|||||||
@@ -19,7 +19,7 @@ class NFLIE(InfoExtractor):
|
|||||||
_VALID_URL = r'''(?x)https?://
|
_VALID_URL = r'''(?x)https?://
|
||||||
(?P<host>(?:www\.)?(?:nfl\.com|.*?\.clubs\.nfl\.com))/
|
(?P<host>(?:www\.)?(?:nfl\.com|.*?\.clubs\.nfl\.com))/
|
||||||
(?:.+?/)*
|
(?:.+?/)*
|
||||||
(?P<id>(?:\d[a-z]{2}\d{13}|\w{8}\-(?:\w{4}\-){3}\w{12}))'''
|
(?P<id>(?:[a-z0-9]{16}|\w{8}\-(?:\w{4}\-){3}\w{12}))'''
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'http://www.nfl.com/videos/nfl-game-highlights/0ap3000000398478/Week-3-Redskins-vs-Eagles-highlights',
|
'url': 'http://www.nfl.com/videos/nfl-game-highlights/0ap3000000398478/Week-3-Redskins-vs-Eagles-highlights',
|
||||||
@@ -58,6 +58,10 @@ class NFLIE(InfoExtractor):
|
|||||||
'upload_date': '20150202',
|
'upload_date': '20150202',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.nfl.com/videos/nfl-network-top-ten/09000d5d810a6bd4/Top-10-Gutsiest-Performances-Jack-Youngblood',
|
||||||
|
'only_matching': True,
|
||||||
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
|||||||
@@ -182,7 +182,6 @@ class NiconicoIE(InfoExtractor):
|
|||||||
extension = xpath_text(video_info, './/movie_type')
|
extension = xpath_text(video_info, './/movie_type')
|
||||||
if not extension:
|
if not extension:
|
||||||
extension = determine_ext(video_real_url)
|
extension = determine_ext(video_real_url)
|
||||||
video_format = extension.upper()
|
|
||||||
|
|
||||||
thumbnail = (
|
thumbnail = (
|
||||||
xpath_text(video_info, './/thumbnail_url') or
|
xpath_text(video_info, './/thumbnail_url') or
|
||||||
@@ -241,7 +240,7 @@ class NiconicoIE(InfoExtractor):
|
|||||||
'url': video_real_url,
|
'url': video_real_url,
|
||||||
'title': title,
|
'title': title,
|
||||||
'ext': extension,
|
'ext': extension,
|
||||||
'format': video_format,
|
'format_id': 'economy' if video_real_url.endswith('low') else 'normal',
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
'description': description,
|
'description': description,
|
||||||
'uploader': uploader,
|
'uploader': uploader,
|
||||||
|
|||||||
@@ -166,6 +166,10 @@ class NocoIE(InfoExtractor):
|
|||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
timestamp = parse_iso8601(show.get('online_date_start_utc'), ' ')
|
timestamp = parse_iso8601(show.get('online_date_start_utc'), ' ')
|
||||||
|
|
||||||
|
if timestamp is not None and timestamp < 0:
|
||||||
|
timestamp = None
|
||||||
|
|
||||||
uploader = show.get('partner_name')
|
uploader = show.get('partner_name')
|
||||||
uploader_id = show.get('partner_key')
|
uploader_id = show.get('partner_key')
|
||||||
duration = float_or_none(show.get('duration_ms'), 1000)
|
duration = float_or_none(show.get('duration_ms'), 1000)
|
||||||
@@ -191,7 +195,7 @@ class NocoIE(InfoExtractor):
|
|||||||
if episode_number:
|
if episode_number:
|
||||||
title += ' #' + compat_str(episode_number)
|
title += ' #' + compat_str(episode_number)
|
||||||
if episode:
|
if episode:
|
||||||
title += ' - ' + episode
|
title += ' - ' + compat_str(episode)
|
||||||
|
|
||||||
description = show.get('show_resume') or show.get('family_resume')
|
description = show.get('show_resume') or show.get('family_resume')
|
||||||
|
|
||||||
|
|||||||
179
youtube_dl/extractor/nova.py
Normal file
179
youtube_dl/extractor/nova.py
Normal file
@@ -0,0 +1,179 @@
|
|||||||
|
# encoding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
clean_html,
|
||||||
|
unified_strdate,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class NovaIE(InfoExtractor):
|
||||||
|
IE_DESC = 'TN.cz, Prásk.tv, Nova.cz, Novaplus.cz, FANDA.tv, Krásná.cz and Doma.cz'
|
||||||
|
_VALID_URL = 'http://(?:[^.]+\.)?(?P<site>tv(?:noviny)?|tn|novaplus|vymena|fanda|krasna|doma|prask)\.nova\.cz/(?:[^/]+/)+(?P<id>[^/]+?)(?:\.html|/|$)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://tvnoviny.nova.cz/clanek/novinky/co-na-sebe-sportaci-praskli-vime-jestli-pujde-hrdlicka-na-materskou.html?utm_source=tvnoviny&utm_medium=cpfooter&utm_campaign=novaplus',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1608920',
|
||||||
|
'display_id': 'co-na-sebe-sportaci-praskli-vime-jestli-pujde-hrdlicka-na-materskou',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Duel: Michal Hrdlička a Petr Suchoň',
|
||||||
|
'description': 'md5:d0cc509858eee1b1374111c588c6f5d5',
|
||||||
|
'thumbnail': 're:^https?://.*\.(?:jpg)',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'http://tn.nova.cz/clanek/tajemstvi-ukryte-v-podzemi-specialni-nemocnice-v-prazske-krci.html#player_13260',
|
||||||
|
'md5': '1dd7b9d5ea27bc361f110cd855a19bd3',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1757139',
|
||||||
|
'display_id': 'tajemstvi-ukryte-v-podzemi-specialni-nemocnice-v-prazske-krci',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Podzemní nemocnice v pražské Krči',
|
||||||
|
'description': 'md5:f0a42dd239c26f61c28f19e62d20ef53',
|
||||||
|
'thumbnail': 're:^https?://.*\.(?:jpg)',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'http://novaplus.nova.cz/porad/policie-modrava/video/5591-policie-modrava-15-dil-blondynka-na-hrbitove',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1756825',
|
||||||
|
'display_id': '5591-policie-modrava-15-dil-blondynka-na-hrbitove',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Policie Modrava - 15. díl - Blondýnka na hřbitově',
|
||||||
|
'description': 'md5:dc24e50be5908df83348e50d1431295e', # Make sure this description is clean of html tags
|
||||||
|
'thumbnail': 're:^https?://.*\.(?:jpg)',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'http://novaplus.nova.cz/porad/televizni-noviny/video/5585-televizni-noviny-30-5-2015/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1756858',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Televizní noviny - 30. 5. 2015',
|
||||||
|
'thumbnail': 're:^https?://.*\.(?:jpg)',
|
||||||
|
'upload_date': '20150530',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'http://fanda.nova.cz/clanek/fun-and-games/krvavy-epos-zaklinac-3-divoky-hon-vychazi-vyhrajte-ho-pro-sebe.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1753621',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Zaklínač 3: Divoký hon',
|
||||||
|
'description': 're:.*Pokud se stejně jako my nemůžete.*',
|
||||||
|
'thumbnail': 're:https?://.*\.jpg(\?.*)?',
|
||||||
|
'upload_date': '20150521',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'http://sport.tn.nova.cz/clanek/sport/hokej/nhl/zivot-jde-dal-hodnotil-po-vyrazeni-z-playoff-jiri-sekac.html',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://fanda.nova.cz/clanek/fun-and-games/krvavy-epos-zaklinac-3-divoky-hon-vychazi-vyhrajte-ho-pro-sebe.html',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://doma.nova.cz/clanek/zdravi/prijdte-se-zapsat-do-registru-kostni-drene-jiz-ve-stredu-3-cervna.html',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://prask.nova.cz/clanek/novinky/co-si-na-sobe-nase-hvezdy-nechaly-pojistit.html',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://tv.nova.cz/clanek/novinky/zivot-je-zivot-bondovsky-trailer.html',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
display_id = mobj.group('id')
|
||||||
|
site = mobj.group('site')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
video_id = self._search_regex(
|
||||||
|
[r"(?:media|video_id)\s*:\s*'(\d+)'",
|
||||||
|
r'media=(\d+)',
|
||||||
|
r'id="article_video_(\d+)"',
|
||||||
|
r'id="player_(\d+)"'],
|
||||||
|
webpage, 'video id')
|
||||||
|
|
||||||
|
config_url = self._search_regex(
|
||||||
|
r'src="(http://tn\.nova\.cz/bin/player/videojs/config\.php\?[^"]+)"',
|
||||||
|
webpage, 'config url', default=None)
|
||||||
|
|
||||||
|
if not config_url:
|
||||||
|
DEFAULT_SITE_ID = '23000'
|
||||||
|
SITES = {
|
||||||
|
'tvnoviny': DEFAULT_SITE_ID,
|
||||||
|
'novaplus': DEFAULT_SITE_ID,
|
||||||
|
'vymena': DEFAULT_SITE_ID,
|
||||||
|
'krasna': DEFAULT_SITE_ID,
|
||||||
|
'fanda': '30',
|
||||||
|
'tn': '30',
|
||||||
|
'doma': '30',
|
||||||
|
}
|
||||||
|
|
||||||
|
site_id = self._search_regex(
|
||||||
|
r'site=(\d+)', webpage, 'site id', default=None) or SITES.get(site, DEFAULT_SITE_ID)
|
||||||
|
|
||||||
|
config_url = ('http://tn.nova.cz/bin/player/videojs/config.php?site=%s&media=%s&jsVar=vjsconfig'
|
||||||
|
% (site_id, video_id))
|
||||||
|
|
||||||
|
config = self._download_json(
|
||||||
|
config_url, display_id,
|
||||||
|
'Downloading config JSON',
|
||||||
|
transform_source=lambda s: s[s.index('{'):s.rindex('}') + 1])
|
||||||
|
|
||||||
|
mediafile = config['mediafile']
|
||||||
|
video_url = mediafile['src']
|
||||||
|
|
||||||
|
m = re.search(r'^(?P<url>rtmpe?://[^/]+/(?P<app>[^/]+?))/&*(?P<playpath>.+)$', video_url)
|
||||||
|
if m:
|
||||||
|
formats = [{
|
||||||
|
'url': m.group('url'),
|
||||||
|
'app': m.group('app'),
|
||||||
|
'play_path': m.group('playpath'),
|
||||||
|
'player_path': 'http://tvnoviny.nova.cz/static/shared/app/videojs/video-js.swf',
|
||||||
|
'ext': 'flv',
|
||||||
|
}]
|
||||||
|
else:
|
||||||
|
formats = [{
|
||||||
|
'url': video_url,
|
||||||
|
}]
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
title = mediafile.get('meta', {}).get('title') or self._og_search_title(webpage)
|
||||||
|
description = clean_html(self._og_search_description(webpage, default=None))
|
||||||
|
thumbnail = config.get('poster')
|
||||||
|
|
||||||
|
if site == 'novaplus':
|
||||||
|
upload_date = unified_strdate(self._search_regex(
|
||||||
|
r'(\d{1,2}-\d{1,2}-\d{4})$', display_id, 'upload date', default=None))
|
||||||
|
elif site == 'fanda':
|
||||||
|
upload_date = unified_strdate(self._search_regex(
|
||||||
|
r'<span class="date_time">(\d{1,2}\.\d{1,2}\.\d{4})', webpage, 'upload date', default=None))
|
||||||
|
else:
|
||||||
|
upload_date = None
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'display_id': display_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'upload_date': upload_date,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
||||||
192
youtube_dl/extractor/nowtv.py
Normal file
192
youtube_dl/extractor/nowtv.py
Normal file
@@ -0,0 +1,192 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
int_or_none,
|
||||||
|
parse_iso8601,
|
||||||
|
parse_duration,
|
||||||
|
remove_start,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class NowTVIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?nowtv\.de/(?P<station>rtl|rtl2|rtlnitro|superrtl|ntv|vox)/(?P<id>.+?)/player'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
# rtl
|
||||||
|
'url': 'http://www.nowtv.de/rtl/bauer-sucht-frau/die-neuen-bauern-und-eine-hochzeit/player',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '203519',
|
||||||
|
'display_id': 'bauer-sucht-frau/die-neuen-bauern-und-eine-hochzeit',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Die neuen Bauern und eine Hochzeit',
|
||||||
|
'description': 'md5:e234e1ed6d63cf06be5c070442612e7e',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'timestamp': 1432580700,
|
||||||
|
'upload_date': '20150525',
|
||||||
|
'duration': 2786,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# rtl2
|
||||||
|
'url': 'http://www.nowtv.de/rtl2/berlin-tag-nacht/berlin-tag-nacht-folge-934/player',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '203481',
|
||||||
|
'display_id': 'berlin-tag-nacht/berlin-tag-nacht-folge-934',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Berlin - Tag & Nacht (Folge 934)',
|
||||||
|
'description': 'md5:c85e88c2e36c552dfe63433bc9506dd0',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'timestamp': 1432666800,
|
||||||
|
'upload_date': '20150526',
|
||||||
|
'duration': 2641,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# rtlnitro
|
||||||
|
'url': 'http://www.nowtv.de/rtlnitro/alarm-fuer-cobra-11-die-autobahnpolizei/hals-und-beinbruch-2014-08-23-21-10-00/player',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '165780',
|
||||||
|
'display_id': 'alarm-fuer-cobra-11-die-autobahnpolizei/hals-und-beinbruch-2014-08-23-21-10-00',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Hals- und Beinbruch',
|
||||||
|
'description': 'md5:b50d248efffe244e6f56737f0911ca57',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'timestamp': 1432415400,
|
||||||
|
'upload_date': '20150523',
|
||||||
|
'duration': 2742,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# superrtl
|
||||||
|
'url': 'http://www.nowtv.de/superrtl/medicopter-117/angst/player',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '99205',
|
||||||
|
'display_id': 'medicopter-117/angst',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Angst!',
|
||||||
|
'description': 'md5:30cbc4c0b73ec98bcd73c9f2a8c17c4e',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'timestamp': 1222632900,
|
||||||
|
'upload_date': '20080928',
|
||||||
|
'duration': 3025,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# ntv
|
||||||
|
'url': 'http://www.nowtv.de/ntv/ratgeber-geld/thema-ua-der-erste-blick-die-apple-watch/player',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '203521',
|
||||||
|
'display_id': 'ratgeber-geld/thema-ua-der-erste-blick-die-apple-watch',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Thema u.a.: Der erste Blick: Die Apple Watch',
|
||||||
|
'description': 'md5:4312b6c9d839ffe7d8caf03865a531af',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'timestamp': 1432751700,
|
||||||
|
'upload_date': '20150527',
|
||||||
|
'duration': 1083,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# vox
|
||||||
|
'url': 'http://www.nowtv.de/vox/der-hundeprofi/buero-fall-chihuahua-joel/player',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '128953',
|
||||||
|
'display_id': 'der-hundeprofi/buero-fall-chihuahua-joel',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': "Büro-Fall / Chihuahua 'Joel'",
|
||||||
|
'description': 'md5:e62cb6bf7c3cc669179d4f1eb279ad8d',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'timestamp': 1432408200,
|
||||||
|
'upload_date': '20150523',
|
||||||
|
'duration': 3092,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
display_id = mobj.group('id')
|
||||||
|
station = mobj.group('station')
|
||||||
|
|
||||||
|
info = self._download_json(
|
||||||
|
'https://api.nowtv.de/v3/movies/%s?fields=id,title,free,geoblocked,articleLong,articleShort,broadcastStartDate,seoUrl,duration,format,files' % display_id,
|
||||||
|
display_id)
|
||||||
|
|
||||||
|
video_id = compat_str(info['id'])
|
||||||
|
|
||||||
|
files = info['files']
|
||||||
|
if not files:
|
||||||
|
if info.get('geoblocked', False):
|
||||||
|
raise ExtractorError(
|
||||||
|
'Video %s is not available from your location due to geo restriction' % video_id,
|
||||||
|
expected=True)
|
||||||
|
if not info.get('free', True):
|
||||||
|
raise ExtractorError(
|
||||||
|
'Video %s is not available for free' % video_id, expected=True)
|
||||||
|
|
||||||
|
f = info.get('format', {})
|
||||||
|
station = f.get('station') or station
|
||||||
|
|
||||||
|
STATIONS = {
|
||||||
|
'rtl': 'rtlnow',
|
||||||
|
'rtl2': 'rtl2now',
|
||||||
|
'vox': 'voxnow',
|
||||||
|
'nitro': 'rtlnitronow',
|
||||||
|
'ntv': 'n-tvnow',
|
||||||
|
'superrtl': 'superrtlnow'
|
||||||
|
}
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for item in files['items']:
|
||||||
|
item_path = remove_start(item['path'], '/')
|
||||||
|
tbr = int_or_none(item['bitrate'])
|
||||||
|
m3u8_url = 'http://hls.fra.%s.de/hls-vod-enc/%s.m3u8' % (STATIONS[station], item_path)
|
||||||
|
m3u8_url = m3u8_url.replace('now/', 'now/videos/')
|
||||||
|
formats.append({
|
||||||
|
'url': m3u8_url,
|
||||||
|
'format_id': '%s-%sk' % (item['id'], tbr),
|
||||||
|
'ext': 'mp4',
|
||||||
|
'tbr': tbr,
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
title = info['title']
|
||||||
|
description = info.get('articleLong') or info.get('articleShort')
|
||||||
|
timestamp = parse_iso8601(info.get('broadcastStartDate'), ' ')
|
||||||
|
duration = parse_duration(info.get('duration'))
|
||||||
|
thumbnail = f.get('defaultImage169Format') or f.get('defaultImage169Logo')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'display_id': display_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'timestamp': timestamp,
|
||||||
|
'duration': duration,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user