mirror of
https://github.com/ytdl-org/youtube-dl.git
synced 2025-12-09 15:42:43 +01:00
Compare commits
30 Commits
2021.02.10
...
2021.02.22
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
0a04e03a02 | ||
|
|
44b2d5f5fc | ||
|
|
aa9118a373 | ||
|
|
36abc16c3c | ||
|
|
919d764600 | ||
|
|
696183e133 | ||
|
|
f90d825a6b | ||
|
|
3037ab00c7 | ||
|
|
21e872b19a | ||
|
|
cf2dbec630 | ||
|
|
b92bb0e02a | ||
|
|
40edffae3d | ||
|
|
9fc5eafb8e | ||
|
|
08c2fbb844 | ||
|
|
3997efb65e | ||
|
|
a7356dffe9 | ||
|
|
e20ec43094 | ||
|
|
70baa7bfae | ||
|
|
8980f53b42 | ||
|
|
a363fb5d28 | ||
|
|
646052e416 | ||
|
|
844e4cbc54 | ||
|
|
56c63c8c02 | ||
|
|
07eb8f1916 | ||
|
|
4b5410c5c8 | ||
|
|
be2e9b76ee | ||
|
|
d8085580f6 | ||
|
|
6d32c6c6d3 | ||
|
|
f94d764993 | ||
|
|
f28f1b4d6e |
6
.github/ISSUE_TEMPLATE/1_broken_site.md
vendored
6
.github/ISSUE_TEMPLATE/1_broken_site.md
vendored
@@ -18,7 +18,7 @@ title: ''
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.02.10. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.02.22. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
||||
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
|
||||
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||
@@ -26,7 +26,7 @@ Carefully read and work through this check list in order to prevent the most com
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a broken site support
|
||||
- [ ] I've verified that I'm running youtube-dl version **2021.02.10**
|
||||
- [ ] I've verified that I'm running youtube-dl version **2021.02.22**
|
||||
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
||||
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
|
||||
- [ ] I've searched the bugtracker for similar issues including closed ones
|
||||
@@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v <
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2021.02.10
|
||||
[debug] youtube-dl version 2021.02.22
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
||||
@@ -19,7 +19,7 @@ labels: 'site-support-request'
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.02.10. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.02.22. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
||||
- Make sure that site you are requesting is not dedicated to copyright infringement, see https://yt-dl.org/copyright-infringement. youtube-dl does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights.
|
||||
- Search the bugtracker for similar site support requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||
@@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a new site support request
|
||||
- [ ] I've verified that I'm running youtube-dl version **2021.02.10**
|
||||
- [ ] I've verified that I'm running youtube-dl version **2021.02.22**
|
||||
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
||||
- [ ] I've checked that none of provided URLs violate any copyrights
|
||||
- [ ] I've searched the bugtracker for similar site support requests including closed ones
|
||||
|
||||
@@ -18,13 +18,13 @@ title: ''
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.02.10. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.02.22. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- Search the bugtracker for similar site feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||
- Finally, put x into all relevant boxes (like this [x])
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a site feature request
|
||||
- [ ] I've verified that I'm running youtube-dl version **2021.02.10**
|
||||
- [ ] I've verified that I'm running youtube-dl version **2021.02.22**
|
||||
- [ ] I've searched the bugtracker for similar site feature requests including closed ones
|
||||
|
||||
|
||||
|
||||
6
.github/ISSUE_TEMPLATE/4_bug_report.md
vendored
6
.github/ISSUE_TEMPLATE/4_bug_report.md
vendored
@@ -18,7 +18,7 @@ title: ''
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.02.10. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.02.22. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
||||
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
|
||||
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||
@@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a broken site support issue
|
||||
- [ ] I've verified that I'm running youtube-dl version **2021.02.10**
|
||||
- [ ] I've verified that I'm running youtube-dl version **2021.02.22**
|
||||
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
||||
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
|
||||
- [ ] I've searched the bugtracker for similar bug reports including closed ones
|
||||
@@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v <
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2021.02.10
|
||||
[debug] youtube-dl version 2021.02.22
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
||||
4
.github/ISSUE_TEMPLATE/5_feature_request.md
vendored
4
.github/ISSUE_TEMPLATE/5_feature_request.md
vendored
@@ -19,13 +19,13 @@ labels: 'request'
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.02.10. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.02.22. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- Search the bugtracker for similar feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||
- Finally, put x into all relevant boxes (like this [x])
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a feature request
|
||||
- [ ] I've verified that I'm running youtube-dl version **2021.02.10**
|
||||
- [ ] I've verified that I'm running youtube-dl version **2021.02.22**
|
||||
- [ ] I've searched the bugtracker for similar feature requests including closed ones
|
||||
|
||||
|
||||
|
||||
37
ChangeLog
37
ChangeLog
@@ -1,3 +1,40 @@
|
||||
version 2021.02.22
|
||||
|
||||
Core
|
||||
+ [postprocessor/embedthumbnail] Recognize atomicparsley binary in lowercase
|
||||
(#28112)
|
||||
|
||||
Extractors
|
||||
* [apa] Fix and improve extraction (#27750)
|
||||
+ [youporn] Extract duration (#28019)
|
||||
+ [peertube] Add support for canard.tube (#28190)
|
||||
* [youtube] Fixup m4a_dash formats (#28165)
|
||||
+ [samplefocus] Add support for samplefocus.com (#27763)
|
||||
+ [vimeo] Add support for unlisted video source format extraction
|
||||
* [viki] Improve extraction (#26522, #28203)
|
||||
* Extract uploader URL and episode number
|
||||
* Report login required error
|
||||
+ Extract 480p formats
|
||||
* Fix API v4 calls
|
||||
* [ninegag] Unescape title (#28201)
|
||||
* [youtube] Improve URL regular expression (#28193)
|
||||
+ [youtube] Add support for redirect.invidious.io (#28193)
|
||||
+ [dplay] Add support for de.hgtv.com (#28182)
|
||||
+ [dplay] Add support for discoveryplus.com (#24698)
|
||||
+ [simplecast] Add support for simplecast.com (#24107)
|
||||
* [youtube] Fix uploader extraction in flat playlist mode (#28045)
|
||||
* [yandexmusic:playlist] Request missing tracks in chunks (#27355, #28184)
|
||||
+ [storyfire] Add support for storyfire.com (#25628, #26349)
|
||||
+ [zhihu] Add support for zhihu.com (#28177)
|
||||
* [youtube] Fix controversial videos when authenticated with cookies (#28174)
|
||||
* [ccma] Fix timestamp parsing in python 2
|
||||
+ [videopress] Add support for video.wordpress.com
|
||||
* [kakao] Improve info extraction and detect geo restriction (#26577)
|
||||
* [xboxclips] Fix extraction (#27151)
|
||||
* [ard] Improve formats extraction (#28155)
|
||||
+ [canvas] Add support for dagelijksekost.een.be (#28119)
|
||||
|
||||
|
||||
version 2021.02.10
|
||||
|
||||
Extractors
|
||||
|
||||
@@ -212,6 +212,7 @@
|
||||
- **curiositystream**
|
||||
- **curiositystream:collection**
|
||||
- **CWTV**
|
||||
- **DagelijkseKost**: dagelijksekost.een.be
|
||||
- **DailyMail**
|
||||
- **dailymotion**
|
||||
- **dailymotion:playlist**
|
||||
@@ -233,6 +234,7 @@
|
||||
- **DiscoveryGo**
|
||||
- **DiscoveryGoPlaylist**
|
||||
- **DiscoveryNetworksDe**
|
||||
- **DiscoveryPlus**
|
||||
- **DiscoveryVR**
|
||||
- **Disney**
|
||||
- **dlive:stream**
|
||||
@@ -353,6 +355,7 @@
|
||||
- **HentaiStigma**
|
||||
- **hetklokhuis**
|
||||
- **hgtv.com:show**
|
||||
- **HGTVDe**
|
||||
- **HiDive**
|
||||
- **HistoricFilms**
|
||||
- **history:player**
|
||||
@@ -803,6 +806,7 @@
|
||||
- **safari:course**: safaribooksonline.com online courses
|
||||
- **SAKTV**
|
||||
- **SaltTV**
|
||||
- **SampleFocus**
|
||||
- **Sapo**: SAPO Vídeos
|
||||
- **savefrom.net**
|
||||
- **SBS**: sbs.com.au
|
||||
@@ -825,6 +829,9 @@
|
||||
- **ShahidShow**
|
||||
- **Shared**: shared.sx
|
||||
- **ShowRoomLive**
|
||||
- **simplecast**
|
||||
- **simplecast:episode**
|
||||
- **simplecast:podcast**
|
||||
- **Sina**
|
||||
- **sky.it**
|
||||
- **sky:news**
|
||||
@@ -877,6 +884,9 @@
|
||||
- **Steam**
|
||||
- **Stitcher**
|
||||
- **StitcherShow**
|
||||
- **StoryFire**
|
||||
- **StoryFireSeries**
|
||||
- **StoryFireUser**
|
||||
- **Streamable**
|
||||
- **streamcloud.eu**
|
||||
- **StreamCZ**
|
||||
@@ -1198,5 +1208,6 @@
|
||||
- **ZattooLive**
|
||||
- **ZDF**
|
||||
- **ZDFChannel**
|
||||
- **Zhihu**
|
||||
- **zingmp3**: mp3.zing.vn
|
||||
- **Zype**
|
||||
|
||||
@@ -12,6 +12,7 @@ from test.helper import FakeYDL
|
||||
|
||||
from youtube_dl.extractor import (
|
||||
YoutubePlaylistIE,
|
||||
YoutubeTabIE,
|
||||
YoutubeIE,
|
||||
)
|
||||
|
||||
@@ -57,14 +58,22 @@ class TestYoutubeLists(unittest.TestCase):
|
||||
entries = result['entries']
|
||||
self.assertEqual(len(entries), 100)
|
||||
|
||||
def test_youtube_flat_playlist_titles(self):
|
||||
def test_youtube_flat_playlist_extraction(self):
|
||||
dl = FakeYDL()
|
||||
dl.params['extract_flat'] = True
|
||||
ie = YoutubePlaylistIE(dl)
|
||||
result = ie.extract('https://www.youtube.com/playlist?list=PL-KKIb8rvtMSrAO9YFbeM6UQrAqoFTUWv')
|
||||
ie = YoutubeTabIE(dl)
|
||||
result = ie.extract('https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc')
|
||||
self.assertIsPlaylist(result)
|
||||
for entry in result['entries']:
|
||||
self.assertTrue(entry.get('title'))
|
||||
entries = list(result['entries'])
|
||||
self.assertTrue(len(entries) == 1)
|
||||
video = entries[0]
|
||||
self.assertEqual(video['_type'], 'url_transparent')
|
||||
self.assertEqual(video['ie_key'], 'Youtube')
|
||||
self.assertEqual(video['id'], 'BaW_jenozKc')
|
||||
self.assertEqual(video['url'], 'BaW_jenozKc')
|
||||
self.assertEqual(video['title'], 'youtube-dl test video "\'/\\ä↭𝕐')
|
||||
self.assertEqual(video['duration'], 10)
|
||||
self.assertEqual(video['uploader'], 'Philipp Hagemeister')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
@@ -6,25 +6,21 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
js_to_json,
|
||||
int_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class APAIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://[^/]+\.apa\.at/embed/(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
|
||||
_VALID_URL = r'(?P<base_url>https?://[^/]+\.apa\.at)/embed/(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
|
||||
_TESTS = [{
|
||||
'url': 'http://uvp.apa.at/embed/293f6d17-692a-44e3-9fd5-7b178f3a1029',
|
||||
'md5': '2b12292faeb0a7d930c778c7a5b4759b',
|
||||
'info_dict': {
|
||||
'id': 'jjv85FdZ',
|
||||
'id': '293f6d17-692a-44e3-9fd5-7b178f3a1029',
|
||||
'ext': 'mp4',
|
||||
'title': '"Blau ist mysteriös": Die Blue Man Group im Interview',
|
||||
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
|
||||
'title': '293f6d17-692a-44e3-9fd5-7b178f3a1029',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 254,
|
||||
'timestamp': 1519211149,
|
||||
'upload_date': '20180221',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://uvp-apapublisher.sf.apa.at/embed/2f94e9e6-d945-4db2-9548-f9a41ebf7b78',
|
||||
@@ -46,9 +42,11 @@ class APAIE(InfoExtractor):
|
||||
webpage)]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id, base_url = mobj.group('id', 'base_url')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
webpage = self._download_webpage(
|
||||
'%s/player/%s' % (base_url, video_id), video_id)
|
||||
|
||||
jwplatform_id = self._search_regex(
|
||||
r'media[iI]d\s*:\s*["\'](?P<id>[a-zA-Z0-9]{8})', webpage,
|
||||
@@ -59,16 +57,18 @@ class APAIE(InfoExtractor):
|
||||
'jwplatform:' + jwplatform_id, ie='JWPlatform',
|
||||
video_id=video_id)
|
||||
|
||||
sources = self._parse_json(
|
||||
self._search_regex(
|
||||
r'sources\s*=\s*(\[.+?\])\s*;', webpage, 'sources'),
|
||||
video_id, transform_source=js_to_json)
|
||||
def extract(field, name=None):
|
||||
return self._search_regex(
|
||||
r'\b%s["\']\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1' % field,
|
||||
webpage, name or field, default=None, group='value')
|
||||
|
||||
title = extract('title') or video_id
|
||||
description = extract('description')
|
||||
thumbnail = extract('poster', 'thumbnail')
|
||||
|
||||
formats = []
|
||||
for source in sources:
|
||||
if not isinstance(source, dict):
|
||||
continue
|
||||
source_url = url_or_none(source.get('file'))
|
||||
for format_id in ('hls', 'progressive'):
|
||||
source_url = url_or_none(extract(format_id))
|
||||
if not source_url:
|
||||
continue
|
||||
ext = determine_ext(source_url)
|
||||
@@ -77,18 +77,19 @@ class APAIE(InfoExtractor):
|
||||
source_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
height = int_or_none(self._search_regex(
|
||||
r'(\d+)\.mp4', source_url, 'height', default=None))
|
||||
formats.append({
|
||||
'url': source_url,
|
||||
'format_id': format_id,
|
||||
'height': height,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnail = self._search_regex(
|
||||
r'image\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
|
||||
'thumbnail', fatal=False, group='url')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
@@ -284,20 +284,42 @@ class ARDIE(InfoExtractor):
|
||||
|
||||
formats = []
|
||||
for a in video_node.findall('.//asset'):
|
||||
file_name = xpath_text(a, './fileName', default=None)
|
||||
if not file_name:
|
||||
continue
|
||||
format_type = a.attrib.get('type')
|
||||
format_url = url_or_none(file_name)
|
||||
if format_url:
|
||||
ext = determine_ext(file_name)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, display_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id=format_type or 'hls', fatal=False))
|
||||
continue
|
||||
elif ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
update_url_query(format_url, {'hdcore': '3.7.0'}),
|
||||
display_id, f4m_id=format_type or 'hds', fatal=False))
|
||||
continue
|
||||
f = {
|
||||
'format_id': a.attrib['type'],
|
||||
'width': int_or_none(a.find('./frameWidth').text),
|
||||
'height': int_or_none(a.find('./frameHeight').text),
|
||||
'vbr': int_or_none(a.find('./bitrateVideo').text),
|
||||
'abr': int_or_none(a.find('./bitrateAudio').text),
|
||||
'vcodec': a.find('./codecVideo').text,
|
||||
'tbr': int_or_none(a.find('./totalBitrate').text),
|
||||
'format_id': format_type,
|
||||
'width': int_or_none(xpath_text(a, './frameWidth')),
|
||||
'height': int_or_none(xpath_text(a, './frameHeight')),
|
||||
'vbr': int_or_none(xpath_text(a, './bitrateVideo')),
|
||||
'abr': int_or_none(xpath_text(a, './bitrateAudio')),
|
||||
'vcodec': xpath_text(a, './codecVideo'),
|
||||
'tbr': int_or_none(xpath_text(a, './totalBitrate')),
|
||||
}
|
||||
if a.find('./serverPrefix').text:
|
||||
f['url'] = a.find('./serverPrefix').text
|
||||
f['playpath'] = a.find('./fileName').text
|
||||
server_prefix = xpath_text(a, './serverPrefix', default=None)
|
||||
if server_prefix:
|
||||
f.update({
|
||||
'url': server_prefix,
|
||||
'playpath': file_name,
|
||||
})
|
||||
else:
|
||||
f['url'] = a.find('./fileName').text
|
||||
if not format_url:
|
||||
continue
|
||||
f['url'] = format_url
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
|
||||
@@ -7,19 +7,21 @@ from .common import InfoExtractor
|
||||
from .gigya import GigyaBaseIE
|
||||
from ..compat import compat_HTTPError
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
ExtractorError,
|
||||
strip_or_none,
|
||||
clean_html,
|
||||
extract_attributes,
|
||||
float_or_none,
|
||||
get_element_by_class,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
str_or_none,
|
||||
strip_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class CanvasIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://mediazone\.vrt\.be/api/v1/(?P<site_id>canvas|een|ketnet|vrt(?:video|nieuws)|sporza)/assets/(?P<id>[^/?#&]+)'
|
||||
_VALID_URL = r'https?://mediazone\.vrt\.be/api/v1/(?P<site_id>canvas|een|ketnet|vrt(?:video|nieuws)|sporza|dako)/assets/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://mediazone.vrt.be/api/v1/ketnet/assets/md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
|
||||
'md5': '68993eda72ef62386a15ea2cf3c93107',
|
||||
@@ -332,3 +334,51 @@ class VrtNUIE(GigyaBaseIE):
|
||||
'display_id': display_id,
|
||||
'season_number': int_or_none(page.get('episode_season')),
|
||||
})
|
||||
|
||||
|
||||
class DagelijkseKostIE(InfoExtractor):
|
||||
IE_DESC = 'dagelijksekost.een.be'
|
||||
_VALID_URL = r'https?://dagelijksekost\.een\.be/gerechten/(?P<id>[^/?#&]+)'
|
||||
_TEST = {
|
||||
'url': 'https://dagelijksekost.een.be/gerechten/hachis-parmentier-met-witloof',
|
||||
'md5': '30bfffc323009a3e5f689bef6efa2365',
|
||||
'info_dict': {
|
||||
'id': 'md-ast-27a4d1ff-7d7b-425e-b84f-a4d227f592fa',
|
||||
'display_id': 'hachis-parmentier-met-witloof',
|
||||
'ext': 'mp4',
|
||||
'title': 'Hachis parmentier met witloof',
|
||||
'description': 'md5:9960478392d87f63567b5b117688cdc5',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 283.02,
|
||||
},
|
||||
'expected_warnings': ['is not a supported codec'],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
title = strip_or_none(get_element_by_class(
|
||||
'dish-metadata__title', webpage
|
||||
) or self._html_search_meta(
|
||||
'twitter:title', webpage))
|
||||
|
||||
description = clean_html(get_element_by_class(
|
||||
'dish-description', webpage)
|
||||
) or self._html_search_meta(
|
||||
('description', 'twitter:description', 'og:description'),
|
||||
webpage)
|
||||
|
||||
video_id = self._html_search_regex(
|
||||
r'data-url=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'video id',
|
||||
group='id')
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': 'https://mediazone.vrt.be/api/v1/dako/assets/%s' % video_id,
|
||||
'ie_key': CanvasIE.ie_key(),
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
}
|
||||
|
||||
@@ -1,12 +1,14 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import calendar
|
||||
import datetime
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
extract_timezone,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
parse_resolution,
|
||||
@@ -97,8 +99,9 @@ class CCMAIE(InfoExtractor):
|
||||
timestamp = None
|
||||
data_utc = try_get(informacio, lambda x: x['data_emissio']['utc'])
|
||||
try:
|
||||
timestamp = datetime.datetime.strptime(
|
||||
data_utc, '%Y-%d-%mT%H:%M:%S%z').timestamp()
|
||||
timezone, data_utc = extract_timezone(data_utc)
|
||||
timestamp = calendar.timegm((datetime.datetime.strptime(
|
||||
data_utc, '%Y-%d-%mT%H:%M:%S') - timezone).timetuple())
|
||||
except TypeError:
|
||||
pass
|
||||
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -10,11 +11,13 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
strip_or_none,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
class DPlayIE(InfoExtractor):
|
||||
_PATH_REGEX = r'/(?P<id>[^/]+/[^/?#]+)'
|
||||
_VALID_URL = r'''(?x)https?://
|
||||
(?P<domain>
|
||||
(?:www\.)?(?P<host>d
|
||||
@@ -24,7 +27,7 @@ class DPlayIE(InfoExtractor):
|
||||
)
|
||||
)|
|
||||
(?P<subdomain_country>es|it)\.dplay\.com
|
||||
)/[^/]+/(?P<id>[^/]+/[^/?#]+)'''
|
||||
)/[^/]+''' + _PATH_REGEX
|
||||
|
||||
_TESTS = [{
|
||||
# non geo restricted, via secure api, unsigned download hls URL
|
||||
@@ -151,56 +154,79 @@ class DPlayIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _process_errors(self, e, geo_countries):
|
||||
info = self._parse_json(e.cause.read().decode('utf-8'), None)
|
||||
error = info['errors'][0]
|
||||
error_code = error.get('code')
|
||||
if error_code == 'access.denied.geoblocked':
|
||||
self.raise_geo_restricted(countries=geo_countries)
|
||||
elif error_code in ('access.denied.missingpackage', 'invalid.token'):
|
||||
raise ExtractorError(
|
||||
'This video is only available for registered users. You may want to use --cookies.', expected=True)
|
||||
raise ExtractorError(info['errors'][0]['detail'], expected=True)
|
||||
|
||||
def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
|
||||
headers['Authorization'] = 'Bearer ' + self._download_json(
|
||||
disco_base + 'token', display_id, 'Downloading token',
|
||||
query={
|
||||
'realm': realm,
|
||||
})['data']['attributes']['token']
|
||||
|
||||
def _download_video_playback_info(self, disco_base, video_id, headers):
|
||||
streaming = self._download_json(
|
||||
disco_base + 'playback/videoPlaybackInfo/' + video_id,
|
||||
video_id, headers=headers)['data']['attributes']['streaming']
|
||||
streaming_list = []
|
||||
for format_id, format_dict in streaming.items():
|
||||
streaming_list.append({
|
||||
'type': format_id,
|
||||
'url': format_dict.get('url'),
|
||||
})
|
||||
return streaming_list
|
||||
|
||||
def _get_disco_api_info(self, url, display_id, disco_host, realm, country):
|
||||
geo_countries = [country.upper()]
|
||||
self._initialize_geo_bypass({
|
||||
'countries': geo_countries,
|
||||
})
|
||||
disco_base = 'https://%s/' % disco_host
|
||||
token = self._download_json(
|
||||
disco_base + 'token', display_id, 'Downloading token',
|
||||
query={
|
||||
'realm': realm,
|
||||
})['data']['attributes']['token']
|
||||
headers = {
|
||||
'Referer': url,
|
||||
'Authorization': 'Bearer ' + token,
|
||||
}
|
||||
video = self._download_json(
|
||||
disco_base + 'content/videos/' + display_id, display_id,
|
||||
headers=headers, query={
|
||||
'fields[channel]': 'name',
|
||||
'fields[image]': 'height,src,width',
|
||||
'fields[show]': 'name',
|
||||
'fields[tag]': 'name',
|
||||
'fields[video]': 'description,episodeNumber,name,publishStart,seasonNumber,videoDuration',
|
||||
'include': 'images,primaryChannel,show,tags'
|
||||
})
|
||||
self._update_disco_api_headers(headers, disco_base, display_id, realm)
|
||||
try:
|
||||
video = self._download_json(
|
||||
disco_base + 'content/videos/' + display_id, display_id,
|
||||
headers=headers, query={
|
||||
'fields[channel]': 'name',
|
||||
'fields[image]': 'height,src,width',
|
||||
'fields[show]': 'name',
|
||||
'fields[tag]': 'name',
|
||||
'fields[video]': 'description,episodeNumber,name,publishStart,seasonNumber,videoDuration',
|
||||
'include': 'images,primaryChannel,show,tags'
|
||||
})
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
|
||||
self._process_errors(e, geo_countries)
|
||||
raise
|
||||
video_id = video['data']['id']
|
||||
info = video['data']['attributes']
|
||||
title = info['name'].strip()
|
||||
formats = []
|
||||
try:
|
||||
streaming = self._download_json(
|
||||
disco_base + 'playback/videoPlaybackInfo/' + video_id,
|
||||
display_id, headers=headers)['data']['attributes']['streaming']
|
||||
streaming = self._download_video_playback_info(
|
||||
disco_base, video_id, headers)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||
info = self._parse_json(e.cause.read().decode('utf-8'), display_id)
|
||||
error = info['errors'][0]
|
||||
error_code = error.get('code')
|
||||
if error_code == 'access.denied.geoblocked':
|
||||
self.raise_geo_restricted(countries=geo_countries)
|
||||
elif error_code == 'access.denied.missingpackage':
|
||||
self.raise_login_required()
|
||||
raise ExtractorError(info['errors'][0]['detail'], expected=True)
|
||||
self._process_errors(e, geo_countries)
|
||||
raise
|
||||
for format_id, format_dict in streaming.items():
|
||||
for format_dict in streaming:
|
||||
if not isinstance(format_dict, dict):
|
||||
continue
|
||||
format_url = format_dict.get('url')
|
||||
if not format_url:
|
||||
continue
|
||||
format_id = format_dict.get('type')
|
||||
ext = determine_ext(format_url)
|
||||
if format_id == 'dash' or ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
@@ -248,7 +274,7 @@ class DPlayIE(InfoExtractor):
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': info.get('description'),
|
||||
'description': strip_or_none(info.get('description')),
|
||||
'duration': float_or_none(info.get('videoDuration'), 1000),
|
||||
'timestamp': unified_timestamp(info.get('publishStart')),
|
||||
'series': series,
|
||||
@@ -268,3 +294,75 @@ class DPlayIE(InfoExtractor):
|
||||
host = 'disco-api.' + domain if domain[0] == 'd' else 'eu2-prod.disco-api.com'
|
||||
return self._get_disco_api_info(
|
||||
url, display_id, host, 'dplay' + country, country)
|
||||
|
||||
|
||||
class DiscoveryPlusIE(DPlayIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?discoveryplus\.com/video' + DPlayIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://www.discoveryplus.com/video/property-brothers-forever-home/food-and-family',
|
||||
'info_dict': {
|
||||
'id': '1140794',
|
||||
'display_id': 'property-brothers-forever-home/food-and-family',
|
||||
'ext': 'mp4',
|
||||
'title': 'Food and Family',
|
||||
'description': 'The brothers help a Richmond family expand their single-level home.',
|
||||
'duration': 2583.113,
|
||||
'timestamp': 1609304400,
|
||||
'upload_date': '20201230',
|
||||
'creator': 'HGTV',
|
||||
'series': 'Property Brothers: Forever Home',
|
||||
'season_number': 1,
|
||||
'episode_number': 1,
|
||||
},
|
||||
'skip': 'Available for Premium users',
|
||||
}]
|
||||
|
||||
def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
|
||||
headers['x-disco-client'] = 'WEB:UNKNOWN:dplus_us:15.0.0'
|
||||
|
||||
def _download_video_playback_info(self, disco_base, video_id, headers):
|
||||
return self._download_json(
|
||||
disco_base + 'playback/v3/videoPlaybackInfo',
|
||||
video_id, headers=headers, data=json.dumps({
|
||||
'deviceInfo': {
|
||||
'adBlocker': False,
|
||||
},
|
||||
'videoId': video_id,
|
||||
'wisteriaProperties': {
|
||||
'platform': 'desktop',
|
||||
},
|
||||
}).encode('utf-8'))['data']['attributes']['streaming']
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
return self._get_disco_api_info(
|
||||
url, display_id, 'us1-prod-direct.discoveryplus.com', 'go', 'us')
|
||||
|
||||
|
||||
class HGTVDeIE(DPlayIE):
|
||||
_VALID_URL = r'https?://de\.hgtv\.com/sendungen' + DPlayIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://de.hgtv.com/sendungen/tiny-house-klein-aber-oho/wer-braucht-schon-eine-toilette/',
|
||||
'info_dict': {
|
||||
'id': '151205',
|
||||
'display_id': 'tiny-house-klein-aber-oho/wer-braucht-schon-eine-toilette',
|
||||
'ext': 'mp4',
|
||||
'title': 'Wer braucht schon eine Toilette',
|
||||
'description': 'md5:05b40a27e7aed2c9172de34d459134e2',
|
||||
'duration': 1177.024,
|
||||
'timestamp': 1595705400,
|
||||
'upload_date': '20200725',
|
||||
'creator': 'HGTV',
|
||||
'series': 'Tiny House - klein, aber oho',
|
||||
'season_number': 3,
|
||||
'episode_number': 3,
|
||||
},
|
||||
'params': {
|
||||
'format': 'bestvideo',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
return self._get_disco_api_info(
|
||||
url, display_id, 'eu1-prod.disco-api.com', 'hgtv', 'de')
|
||||
|
||||
@@ -163,6 +163,7 @@ from .canvas import (
|
||||
CanvasIE,
|
||||
CanvasEenIE,
|
||||
VrtNUIE,
|
||||
DagelijkseKostIE,
|
||||
)
|
||||
from .carambatv import (
|
||||
CarambaTVIE,
|
||||
@@ -287,7 +288,11 @@ from .douyutv import (
|
||||
DouyuShowIE,
|
||||
DouyuTVIE,
|
||||
)
|
||||
from .dplay import DPlayIE
|
||||
from .dplay import (
|
||||
DPlayIE,
|
||||
DiscoveryPlusIE,
|
||||
HGTVDeIE,
|
||||
)
|
||||
from .dreisat import DreiSatIE
|
||||
from .drbonanza import DRBonanzaIE
|
||||
from .drtuber import DrTuberIE
|
||||
@@ -1024,6 +1029,7 @@ from .safari import (
|
||||
SafariApiIE,
|
||||
SafariCourseIE,
|
||||
)
|
||||
from .samplefocus import SampleFocusIE
|
||||
from .sapo import SapoIE
|
||||
from .savefrom import SaveFromIE
|
||||
from .sbs import SBSIE
|
||||
@@ -1056,6 +1062,11 @@ from .shared import (
|
||||
VivoIE,
|
||||
)
|
||||
from .showroomlive import ShowRoomLiveIE
|
||||
from .simplecast import (
|
||||
SimplecastIE,
|
||||
SimplecastEpisodeIE,
|
||||
SimplecastPodcastIE,
|
||||
)
|
||||
from .sina import SinaIE
|
||||
from .sixplay import SixPlayIE
|
||||
from .skyit import (
|
||||
@@ -1140,6 +1151,11 @@ from .srgssr import (
|
||||
from .srmediathek import SRMediathekIE
|
||||
from .stanfordoc import StanfordOpenClassroomIE
|
||||
from .steam import SteamIE
|
||||
from .storyfire import (
|
||||
StoryFireIE,
|
||||
StoryFireUserIE,
|
||||
StoryFireSeriesIE,
|
||||
)
|
||||
from .streamable import StreamableIE
|
||||
from .streamcloud import StreamcloudIE
|
||||
from .streamcz import StreamCZIE
|
||||
@@ -1602,5 +1618,6 @@ from .zattoo import (
|
||||
ZattooLiveIE,
|
||||
)
|
||||
from .zdf import ZDFIE, ZDFChannelIE
|
||||
from .zhihu import ZhihuIE
|
||||
from .zingmp3 import ZingMp3IE
|
||||
from .zype import ZypeIE
|
||||
|
||||
@@ -129,6 +129,7 @@ from .odnoklassniki import OdnoklassnikiIE
|
||||
from .kinja import KinjaEmbedIE
|
||||
from .arcpublishing import ArcPublishingIE
|
||||
from .medialaan import MedialaanIE
|
||||
from .simplecast import SimplecastIE
|
||||
|
||||
|
||||
class GenericIE(InfoExtractor):
|
||||
@@ -2238,6 +2239,15 @@ class GenericIE(InfoExtractor):
|
||||
'duration': 159,
|
||||
},
|
||||
},
|
||||
{
|
||||
# Simplecast player embed
|
||||
'url': 'https://www.bio.org/podcast',
|
||||
'info_dict': {
|
||||
'id': 'podcast',
|
||||
'title': 'I AM BIO Podcast | BIO',
|
||||
},
|
||||
'playlist_mincount': 52,
|
||||
},
|
||||
]
|
||||
|
||||
def report_following_redirect(self, new_url):
|
||||
@@ -2792,6 +2802,12 @@ class GenericIE(InfoExtractor):
|
||||
return self.playlist_from_matches(
|
||||
matches, video_id, video_title, getter=unescapeHTML, ie='FunnyOrDie')
|
||||
|
||||
# Look for Simplecast embeds
|
||||
simplecast_urls = SimplecastIE._extract_urls(webpage)
|
||||
if simplecast_urls:
|
||||
return self.playlist_from_matches(
|
||||
simplecast_urls, video_id, video_title)
|
||||
|
||||
# Look for BBC iPlayer embed
|
||||
matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
|
||||
if matches:
|
||||
|
||||
@@ -3,10 +3,13 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..compat import compat_HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
strip_or_none,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
update_url_query,
|
||||
)
|
||||
@@ -23,7 +26,7 @@ class KakaoIE(InfoExtractor):
|
||||
'id': '301965083',
|
||||
'ext': 'mp4',
|
||||
'title': '乃木坂46 バナナマン 「3期生紹介コーナーが始動!顔高低差GPも!」 『乃木坂工事中』',
|
||||
'uploader_id': 2671005,
|
||||
'uploader_id': '2671005',
|
||||
'uploader': '그랑그랑이',
|
||||
'timestamp': 1488160199,
|
||||
'upload_date': '20170227',
|
||||
@@ -36,11 +39,15 @@ class KakaoIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'description': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)\r\n\r\n[쇼! 음악중심] 20160611, 507회',
|
||||
'title': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)',
|
||||
'uploader_id': 2653210,
|
||||
'uploader_id': '2653210',
|
||||
'uploader': '쇼! 음악중심',
|
||||
'timestamp': 1485684628,
|
||||
'upload_date': '20170129',
|
||||
}
|
||||
}, {
|
||||
# geo restricted
|
||||
'url': 'https://tv.kakao.com/channel/3643855/cliplink/412069491',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -68,8 +75,7 @@ class KakaoIE(InfoExtractor):
|
||||
'fields': ','.join([
|
||||
'-*', 'tid', 'clipLink', 'displayTitle', 'clip', 'title',
|
||||
'description', 'channelId', 'createTime', 'duration', 'playCount',
|
||||
'likeCount', 'commentCount', 'tagList', 'channel', 'name',
|
||||
'clipChapterThumbnailList', 'thumbnailUrl', 'timeInSec', 'isDefault',
|
||||
'likeCount', 'commentCount', 'tagList', 'channel', 'name', 'thumbnailUrl',
|
||||
'videoOutputList', 'width', 'height', 'kbps', 'profile', 'label'])
|
||||
}
|
||||
|
||||
@@ -82,24 +88,28 @@ class KakaoIE(InfoExtractor):
|
||||
|
||||
title = clip.get('title') or clip_link.get('displayTitle')
|
||||
|
||||
query['tid'] = impress.get('tid', '')
|
||||
query.update({
|
||||
'fields': '-*,code,message,url',
|
||||
'tid': impress.get('tid') or '',
|
||||
})
|
||||
|
||||
formats = []
|
||||
for fmt in clip.get('videoOutputList', []):
|
||||
for fmt in (clip.get('videoOutputList') or []):
|
||||
try:
|
||||
profile_name = fmt['profile']
|
||||
if profile_name == 'AUDIO':
|
||||
continue
|
||||
query.update({
|
||||
'profile': profile_name,
|
||||
'fields': '-*,url',
|
||||
})
|
||||
fmt_url_json = self._download_json(
|
||||
api_base + 'raw/videolocation', display_id,
|
||||
'Downloading video URL for profile %s' % profile_name,
|
||||
query=query, headers=player_header, fatal=False)
|
||||
|
||||
if fmt_url_json is None:
|
||||
query['profile'] = profile_name
|
||||
try:
|
||||
fmt_url_json = self._download_json(
|
||||
api_base + 'raw/videolocation', display_id,
|
||||
'Downloading video URL for profile %s' % profile_name,
|
||||
query=query, headers=player_header)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||
resp = self._parse_json(e.cause.read().decode(), video_id)
|
||||
if resp.get('code') == 'GeoBlocked':
|
||||
self.raise_geo_restricted()
|
||||
continue
|
||||
|
||||
fmt_url = fmt_url_json['url']
|
||||
@@ -116,27 +126,13 @@ class KakaoIE(InfoExtractor):
|
||||
pass
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbs = []
|
||||
for thumb in clip.get('clipChapterThumbnailList', []):
|
||||
thumbs.append({
|
||||
'url': thumb.get('thumbnailUrl'),
|
||||
'id': compat_str(thumb.get('timeInSec')),
|
||||
'preference': -1 if thumb.get('isDefault') else 0
|
||||
})
|
||||
top_thumbnail = clip.get('thumbnailUrl')
|
||||
if top_thumbnail:
|
||||
thumbs.append({
|
||||
'url': top_thumbnail,
|
||||
'preference': 10,
|
||||
})
|
||||
|
||||
return {
|
||||
'id': display_id,
|
||||
'title': title,
|
||||
'description': strip_or_none(clip.get('description')),
|
||||
'uploader': clip_link.get('channel', {}).get('name'),
|
||||
'uploader_id': clip_link.get('channelId'),
|
||||
'thumbnails': thumbs,
|
||||
'uploader': try_get(clip_link, lambda x: x['channel']['name']),
|
||||
'uploader_id': str_or_none(clip_link.get('channelId')),
|
||||
'thumbnail': clip.get('thumbnailUrl'),
|
||||
'timestamp': unified_timestamp(clip_link.get('createTime')),
|
||||
'duration': int_or_none(clip.get('duration')),
|
||||
'view_count': int_or_none(clip.get('playCount')),
|
||||
|
||||
@@ -2,10 +2,11 @@ from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
@@ -14,7 +15,7 @@ class NineGagIE(InfoExtractor):
|
||||
IE_NAME = '9gag'
|
||||
_VALID_URL = r'https?://(?:www\.)?9gag\.com/gag/(?P<id>[^/?&#]+)'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'https://9gag.com/gag/ae5Ag7B',
|
||||
'info_dict': {
|
||||
'id': 'ae5Ag7B',
|
||||
@@ -29,7 +30,11 @@ class NineGagIE(InfoExtractor):
|
||||
'dislike_count': int,
|
||||
'comment_count': int,
|
||||
}
|
||||
}
|
||||
}, {
|
||||
# HTML escaped title
|
||||
'url': 'https://9gag.com/gag/av5nvyb',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
post_id = self._match_id(url)
|
||||
@@ -43,7 +48,7 @@ class NineGagIE(InfoExtractor):
|
||||
'The given url does not contain a video',
|
||||
expected=True)
|
||||
|
||||
title = post['title']
|
||||
title = unescapeHTML(post['title'])
|
||||
|
||||
duration = None
|
||||
formats = []
|
||||
|
||||
@@ -413,7 +413,8 @@ class PeerTubeIE(InfoExtractor):
|
||||
peertube3\.cpy\.re|
|
||||
peertube2\.cpy\.re|
|
||||
videos\.tcit\.fr|
|
||||
peertube\.cpy\.re
|
||||
peertube\.cpy\.re|
|
||||
canard\.tube
|
||||
)'''
|
||||
_UUID_RE = r'[\da-fA-F]{8}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{12}'
|
||||
_API_BASE = 'https://%s/api/v1/videos/%s/%s'
|
||||
|
||||
100
youtube_dl/extractor/samplefocus.py
Normal file
100
youtube_dl/extractor/samplefocus.py
Normal file
@@ -0,0 +1,100 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
get_element_by_attribute,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class SampleFocusIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?samplefocus\.com/samples/(?P<id>[^/?&#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://samplefocus.com/samples/lil-peep-sad-emo-guitar',
|
||||
'md5': '48c8d62d60be467293912e0e619a5120',
|
||||
'info_dict': {
|
||||
'id': '40316',
|
||||
'display_id': 'lil-peep-sad-emo-guitar',
|
||||
'ext': 'mp3',
|
||||
'title': 'Lil Peep Sad Emo Guitar',
|
||||
'thumbnail': r're:^https?://.+\.png',
|
||||
'license': 'Standard License',
|
||||
'uploader': 'CapsCtrl',
|
||||
'uploader_id': 'capsctrl',
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'categories': ['Samples', 'Guitar', 'Electric guitar'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://samplefocus.com/samples/dababy-style-bass-808',
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'https://samplefocus.com/samples/young-chop-kick',
|
||||
'only_matching': True
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
sample_id = self._search_regex(
|
||||
r'<input[^>]+id=(["\'])sample_id\1[^>]+value=(?:["\'])(?P<id>\d+)',
|
||||
webpage, 'sample id', group='id')
|
||||
|
||||
title = self._og_search_title(webpage, fatal=False) or self._html_search_regex(
|
||||
r'<h1>(.+?)</h1>', webpage, 'title')
|
||||
|
||||
mp3_url = self._search_regex(
|
||||
r'<input[^>]+id=(["\'])sample_mp3\1[^>]+value=(["\'])(?P<url>(?:(?!\2).)+)',
|
||||
webpage, 'mp3', fatal=False, group='url') or extract_attributes(self._search_regex(
|
||||
r'<meta[^>]+itemprop=(["\'])contentUrl\1[^>]*>',
|
||||
webpage, 'mp3 url', group=0))['content']
|
||||
|
||||
thumbnail = self._og_search_thumbnail(webpage) or self._html_search_regex(
|
||||
r'<img[^>]+class=(?:["\'])waveform responsive-img[^>]+src=(["\'])(?P<url>(?:(?!\1).)+)',
|
||||
webpage, 'mp3', fatal=False, group='url')
|
||||
|
||||
comments = []
|
||||
for author_id, author, body in re.findall(r'(?s)<p[^>]+class="comment-author"><a[^>]+href="/users/([^"]+)">([^"]+)</a>.+?<p[^>]+class="comment-body">([^>]+)</p>', webpage):
|
||||
comments.append({
|
||||
'author': author,
|
||||
'author_id': author_id,
|
||||
'text': body,
|
||||
})
|
||||
|
||||
uploader_id = uploader = None
|
||||
mobj = re.search(r'>By <a[^>]+href="/users/([^"]+)"[^>]*>([^<]+)', webpage)
|
||||
if mobj:
|
||||
uploader_id, uploader = mobj.groups()
|
||||
|
||||
breadcrumb = get_element_by_attribute('typeof', 'BreadcrumbList', webpage)
|
||||
categories = []
|
||||
if breadcrumb:
|
||||
for _, name in re.findall(r'<span[^>]+property=(["\'])name\1[^>]*>([^<]+)', breadcrumb):
|
||||
categories.append(name)
|
||||
|
||||
def extract_count(klass):
|
||||
return int_or_none(self._html_search_regex(
|
||||
r'<span[^>]+class=(?:["\'])?%s-count[^>]*>(\d+)' % klass,
|
||||
webpage, klass, fatal=False))
|
||||
|
||||
return {
|
||||
'id': sample_id,
|
||||
'title': title,
|
||||
'url': mp3_url,
|
||||
'display_id': display_id,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader,
|
||||
'license': self._html_search_regex(
|
||||
r'<a[^>]+href=(["\'])/license\1[^>]*>(?P<license>[^<]+)<',
|
||||
webpage, 'license', fatal=False, group='license'),
|
||||
'uploader_id': uploader_id,
|
||||
'like_count': extract_count('sample-%s-favorites' % sample_id),
|
||||
'comment_count': extract_count('comments'),
|
||||
'comments': comments,
|
||||
'categories': categories,
|
||||
}
|
||||
160
youtube_dl/extractor/simplecast.py
Normal file
160
youtube_dl/extractor/simplecast.py
Normal file
@@ -0,0 +1,160 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_podcast_url,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
strip_or_none,
|
||||
try_get,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class SimplecastBaseIE(InfoExtractor):
|
||||
_UUID_REGEX = r'[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12}'
|
||||
_API_BASE = 'https://api.simplecast.com/'
|
||||
|
||||
def _call_api(self, path_tmpl, video_id):
|
||||
return self._download_json(
|
||||
self._API_BASE + path_tmpl % video_id, video_id)
|
||||
|
||||
def _call_search_api(self, resource, resource_id, resource_url):
|
||||
return self._download_json(
|
||||
'https://api.simplecast.com/%ss/search' % resource, resource_id,
|
||||
data=urlencode_postdata({'url': resource_url}))
|
||||
|
||||
def _parse_episode(self, episode):
|
||||
episode_id = episode['id']
|
||||
title = episode['title'].strip()
|
||||
audio_file = episode.get('audio_file') or {}
|
||||
audio_file_url = audio_file.get('url') or episode.get('audio_file_url') or episode['enclosure_url']
|
||||
|
||||
season = episode.get('season') or {}
|
||||
season_href = season.get('href')
|
||||
season_id = None
|
||||
if season_href:
|
||||
season_id = self._search_regex(
|
||||
r'https?://api.simplecast.com/seasons/(%s)' % self._UUID_REGEX,
|
||||
season_href, 'season id', default=None)
|
||||
|
||||
webpage_url = episode.get('episode_url')
|
||||
channel_url = None
|
||||
if webpage_url:
|
||||
channel_url = self._search_regex(
|
||||
r'(https?://[^/]+\.simplecast\.com)',
|
||||
webpage_url, 'channel url', default=None)
|
||||
|
||||
return {
|
||||
'id': episode_id,
|
||||
'display_id': episode.get('slug'),
|
||||
'title': title,
|
||||
'url': clean_podcast_url(audio_file_url),
|
||||
'webpage_url': webpage_url,
|
||||
'channel_url': channel_url,
|
||||
'series': try_get(episode, lambda x: x['podcast']['title']),
|
||||
'season_number': int_or_none(season.get('number')),
|
||||
'season_id': season_id,
|
||||
'thumbnail': episode.get('image_url'),
|
||||
'episode_id': episode_id,
|
||||
'episode_number': int_or_none(episode.get('number')),
|
||||
'description': strip_or_none(episode.get('description')),
|
||||
'timestamp': parse_iso8601(episode.get('published_at')),
|
||||
'duration': int_or_none(episode.get('duration')),
|
||||
'filesize': int_or_none(audio_file.get('size') or episode.get('audio_file_size')),
|
||||
}
|
||||
|
||||
|
||||
class SimplecastIE(SimplecastBaseIE):
|
||||
IE_NAME = 'simplecast'
|
||||
_VALID_URL = r'https?://(?:api\.simplecast\.com/episodes|player\.simplecast\.com)/(?P<id>%s)' % SimplecastBaseIE._UUID_REGEX
|
||||
_COMMON_TEST_INFO = {
|
||||
'display_id': 'errant-signal-chris-franklin-new-wave-video-essays',
|
||||
'id': 'b6dc49a2-9404-4853-9aa9-9cfc097be876',
|
||||
'ext': 'mp3',
|
||||
'title': 'Errant Signal - Chris Franklin & New Wave Video Essays',
|
||||
'episode_number': 1,
|
||||
'episode_id': 'b6dc49a2-9404-4853-9aa9-9cfc097be876',
|
||||
'description': 'md5:34752789d3d2702e2d2c975fbd14f357',
|
||||
'season_number': 1,
|
||||
'season_id': 'e23df0da-bae4-4531-8bbf-71364a88dc13',
|
||||
'series': 'The RE:BIND.io Podcast',
|
||||
'duration': 5343,
|
||||
'timestamp': 1580979475,
|
||||
'upload_date': '20200206',
|
||||
'webpage_url': r're:^https?://the-re-bind-io-podcast\.simplecast\.com/episodes/errant-signal-chris-franklin-new-wave-video-essays',
|
||||
'channel_url': r're:^https?://the-re-bind-io-podcast\.simplecast\.com$',
|
||||
}
|
||||
_TESTS = [{
|
||||
'url': 'https://api.simplecast.com/episodes/b6dc49a2-9404-4853-9aa9-9cfc097be876',
|
||||
'md5': '8c93be7be54251bf29ee97464eabe61c',
|
||||
'info_dict': _COMMON_TEST_INFO,
|
||||
}, {
|
||||
'url': 'https://player.simplecast.com/b6dc49a2-9404-4853-9aa9-9cfc097be876',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return re.findall(
|
||||
r'''(?x)<iframe[^>]+src=["\']
|
||||
(
|
||||
https?://(?:embed\.simplecast\.com/[0-9a-f]{8}|
|
||||
player\.simplecast\.com/%s
|
||||
))''' % SimplecastBaseIE._UUID_REGEX, webpage)
|
||||
|
||||
def _real_extract(self, url):
|
||||
episode_id = self._match_id(url)
|
||||
episode = self._call_api('episodes/%s', episode_id)
|
||||
return self._parse_episode(episode)
|
||||
|
||||
|
||||
class SimplecastEpisodeIE(SimplecastBaseIE):
|
||||
IE_NAME = 'simplecast:episode'
|
||||
_VALID_URL = r'https?://(?!api\.)[^/]+\.simplecast\.com/episodes/(?P<id>[^/?&#]+)'
|
||||
_TEST = {
|
||||
'url': 'https://the-re-bind-io-podcast.simplecast.com/episodes/errant-signal-chris-franklin-new-wave-video-essays',
|
||||
'md5': '8c93be7be54251bf29ee97464eabe61c',
|
||||
'info_dict': SimplecastIE._COMMON_TEST_INFO,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
episode = self._call_search_api(
|
||||
'episode', mobj.group(1), mobj.group(0))
|
||||
return self._parse_episode(episode)
|
||||
|
||||
|
||||
class SimplecastPodcastIE(SimplecastBaseIE):
|
||||
IE_NAME = 'simplecast:podcast'
|
||||
_VALID_URL = r'https?://(?!(?:api|cdn|embed|feeds|player)\.)(?P<id>[^/]+)\.simplecast\.com(?!/episodes/[^/?&#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://the-re-bind-io-podcast.simplecast.com',
|
||||
'playlist_mincount': 33,
|
||||
'info_dict': {
|
||||
'id': '07d28d26-7522-42eb-8c53-2bdcfc81c43c',
|
||||
'title': 'The RE:BIND.io Podcast',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://the-re-bind-io-podcast.simplecast.com/episodes',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
subdomain = self._match_id(url)
|
||||
site = self._call_search_api('site', subdomain, url)
|
||||
podcast = site['podcast']
|
||||
podcast_id = podcast['id']
|
||||
podcast_title = podcast.get('title')
|
||||
|
||||
def entries():
|
||||
episodes = self._call_api('podcasts/%s/episodes', podcast_id)
|
||||
for episode in (episodes.get('collection') or []):
|
||||
info = self._parse_episode(episode)
|
||||
info['series'] = podcast_title
|
||||
yield info
|
||||
|
||||
return self.playlist_result(entries(), podcast_id, podcast_title)
|
||||
151
youtube_dl/extractor/storyfire.py
Normal file
151
youtube_dl/extractor/storyfire.py
Normal file
@@ -0,0 +1,151 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import functools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
# HEADRequest,
|
||||
int_or_none,
|
||||
OnDemandPagedList,
|
||||
smuggle_url,
|
||||
)
|
||||
|
||||
|
||||
class StoryFireBaseIE(InfoExtractor):
|
||||
_VALID_URL_BASE = r'https?://(?:www\.)?storyfire\.com/'
|
||||
|
||||
def _call_api(self, path, video_id, resource, query=None):
|
||||
return self._download_json(
|
||||
'https://storyfire.com/app/%s/%s' % (path, video_id), video_id,
|
||||
'Downloading %s JSON metadata' % resource, query=query)
|
||||
|
||||
def _parse_video(self, video):
|
||||
title = video['title']
|
||||
vimeo_id = self._search_regex(
|
||||
r'https?://player\.vimeo\.com/external/(\d+)',
|
||||
video['vimeoVideoURL'], 'vimeo id')
|
||||
|
||||
# video_url = self._request_webpage(
|
||||
# HEADRequest(video['vimeoVideoURL']), video_id).geturl()
|
||||
# formats = []
|
||||
# for v_url, suffix in [(video_url, '_sep'), (video_url.replace('/sep/video/', '/video/'), '')]:
|
||||
# formats.extend(self._extract_m3u8_formats(
|
||||
# v_url, video_id, 'mp4', 'm3u8_native',
|
||||
# m3u8_id='hls' + suffix, fatal=False))
|
||||
# formats.extend(self._extract_mpd_formats(
|
||||
# v_url.replace('.m3u8', '.mpd'), video_id,
|
||||
# mpd_id='dash' + suffix, fatal=False))
|
||||
# self._sort_formats(formats)
|
||||
|
||||
uploader_id = video.get('hostID')
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': vimeo_id,
|
||||
'title': title,
|
||||
'description': video.get('description'),
|
||||
'url': smuggle_url(
|
||||
'https://player.vimeo.com/video/' + vimeo_id, {
|
||||
'http_headers': {
|
||||
'Referer': 'https://storyfire.com/',
|
||||
}
|
||||
}),
|
||||
# 'formats': formats,
|
||||
'thumbnail': video.get('storyImage'),
|
||||
'view_count': int_or_none(video.get('views')),
|
||||
'like_count': int_or_none(video.get('likesCount')),
|
||||
'comment_count': int_or_none(video.get('commentsCount')),
|
||||
'duration': int_or_none(video.get('videoDuration')),
|
||||
'timestamp': int_or_none(video.get('publishDate')),
|
||||
'uploader': video.get('username'),
|
||||
'uploader_id': uploader_id,
|
||||
'uploader_url': 'https://storyfire.com/user/%s/video' % uploader_id if uploader_id else None,
|
||||
'episode_number': int_or_none(video.get('episodeNumber') or video.get('episode_number')),
|
||||
}
|
||||
|
||||
|
||||
class StoryFireIE(StoryFireBaseIE):
|
||||
_VALID_URL = StoryFireBaseIE._VALID_URL_BASE + r'video-details/(?P<id>[0-9a-f]{24})'
|
||||
_TEST = {
|
||||
'url': 'https://storyfire.com/video-details/5df1d132b6378700117f9181',
|
||||
'md5': 'caec54b9e4621186d6079c7ec100c1eb',
|
||||
'info_dict': {
|
||||
'id': '378954662',
|
||||
'ext': 'mp4',
|
||||
'title': 'Buzzfeed Teaches You About Memes',
|
||||
'uploader_id': 'ntZAJFECERSgqHSxzonV5K2E89s1',
|
||||
'timestamp': 1576129028,
|
||||
'description': 'md5:0b4e28021548e144bed69bb7539e62ea',
|
||||
'uploader': 'whang!',
|
||||
'upload_date': '20191212',
|
||||
'duration': 418,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['Unable to download JSON metadata']
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video = self._call_api(
|
||||
'generic/video-detail', video_id, 'video')['video']
|
||||
return self._parse_video(video)
|
||||
|
||||
|
||||
class StoryFireUserIE(StoryFireBaseIE):
|
||||
_VALID_URL = StoryFireBaseIE._VALID_URL_BASE + r'user/(?P<id>[^/]+)/video'
|
||||
_TEST = {
|
||||
'url': 'https://storyfire.com/user/UQ986nFxmAWIgnkZQ0ftVhq4nOk2/video',
|
||||
'info_dict': {
|
||||
'id': 'UQ986nFxmAWIgnkZQ0ftVhq4nOk2',
|
||||
},
|
||||
'playlist_mincount': 151,
|
||||
}
|
||||
_PAGE_SIZE = 20
|
||||
|
||||
def _fetch_page(self, user_id, page):
|
||||
videos = self._call_api(
|
||||
'publicVideos', user_id, 'page %d' % (page + 1), {
|
||||
'skip': page * self._PAGE_SIZE,
|
||||
})['videos']
|
||||
for video in videos:
|
||||
yield self._parse_video(video)
|
||||
|
||||
def _real_extract(self, url):
|
||||
user_id = self._match_id(url)
|
||||
entries = OnDemandPagedList(functools.partial(
|
||||
self._fetch_page, user_id), self._PAGE_SIZE)
|
||||
return self.playlist_result(entries, user_id)
|
||||
|
||||
|
||||
class StoryFireSeriesIE(StoryFireBaseIE):
|
||||
_VALID_URL = StoryFireBaseIE._VALID_URL_BASE + r'write/series/stories/(?P<id>[^/?&#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://storyfire.com/write/series/stories/-Lq6MsuIHLODO6d2dDkr/',
|
||||
'info_dict': {
|
||||
'id': '-Lq6MsuIHLODO6d2dDkr',
|
||||
},
|
||||
'playlist_mincount': 13,
|
||||
}, {
|
||||
'url': 'https://storyfire.com/write/series/stories/the_mortal_one/',
|
||||
'info_dict': {
|
||||
'id': 'the_mortal_one',
|
||||
},
|
||||
'playlist_count': 0,
|
||||
}]
|
||||
|
||||
def _extract_videos(self, stories):
|
||||
for story in stories.values():
|
||||
if story.get('hasVideo'):
|
||||
yield self._parse_video(story)
|
||||
|
||||
def _real_extract(self, url):
|
||||
series_id = self._match_id(url)
|
||||
stories = self._call_api(
|
||||
'seriesStories', series_id, 'series stories')
|
||||
return self.playlist_result(self._extract_videos(stories), series_id)
|
||||
@@ -4,21 +4,22 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
qualities,
|
||||
random_birthday,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class VideoPressIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://videopress\.com/embed/(?P<id>[\da-zA-Z]+)'
|
||||
_ID_REGEX = r'[\da-zA-Z]{8}'
|
||||
_PATH_REGEX = r'video(?:\.word)?press\.com/embed/'
|
||||
_VALID_URL = r'https?://%s(?P<id>%s)' % (_PATH_REGEX, _ID_REGEX)
|
||||
_TESTS = [{
|
||||
'url': 'https://videopress.com/embed/kUJmAcSf',
|
||||
'md5': '706956a6c875873d51010921310e4bc6',
|
||||
@@ -36,35 +37,36 @@ class VideoPressIE(InfoExtractor):
|
||||
# 17+, requires birth_* params
|
||||
'url': 'https://videopress.com/embed/iH3gstfZ',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://video.wordpress.com/embed/kUJmAcSf',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return re.findall(
|
||||
r'<iframe[^>]+src=["\']((?:https?://)?videopress\.com/embed/[\da-zA-Z]+)',
|
||||
r'<iframe[^>]+src=["\']((?:https?://)?%s%s)' % (VideoPressIE._PATH_REGEX, VideoPressIE._ID_REGEX),
|
||||
webpage)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
query = random_birthday('birth_year', 'birth_month', 'birth_day')
|
||||
query['fields'] = 'description,duration,file_url_base,files,height,original,poster,rating,title,upload_date,width'
|
||||
video = self._download_json(
|
||||
'https://public-api.wordpress.com/rest/v1.1/videos/%s' % video_id,
|
||||
video_id, query=query)
|
||||
|
||||
title = video['title']
|
||||
|
||||
def base_url(scheme):
|
||||
return try_get(
|
||||
video, lambda x: x['file_url_base'][scheme], compat_str)
|
||||
|
||||
base_url = base_url('https') or base_url('http')
|
||||
file_url_base = video.get('file_url_base') or {}
|
||||
base_url = file_url_base.get('https') or file_url_base.get('http')
|
||||
|
||||
QUALITIES = ('std', 'dvd', 'hd')
|
||||
quality = qualities(QUALITIES)
|
||||
|
||||
formats = []
|
||||
for format_id, f in video['files'].items():
|
||||
for format_id, f in (video.get('files') or {}).items():
|
||||
if not isinstance(f, dict):
|
||||
continue
|
||||
for ext, path in f.items():
|
||||
@@ -75,12 +77,14 @@ class VideoPressIE(InfoExtractor):
|
||||
'ext': determine_ext(path, ext),
|
||||
'quality': quality(format_id),
|
||||
})
|
||||
original_url = try_get(video, lambda x: x['original'], compat_str)
|
||||
original_url = video.get('original')
|
||||
if original_url:
|
||||
formats.append({
|
||||
'url': original_url,
|
||||
'format_id': 'original',
|
||||
'quality': len(QUALITIES),
|
||||
'width': int_or_none(video.get('width')),
|
||||
'height': int_or_none(video.get('height')),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
|
||||
@@ -21,6 +21,7 @@ from ..utils import (
|
||||
parse_iso8601,
|
||||
sanitized_Request,
|
||||
std_headers,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
@@ -30,7 +31,7 @@ class VikiBaseIE(InfoExtractor):
|
||||
_API_URL_TEMPLATE = 'https://api.viki.io%s&sig=%s'
|
||||
|
||||
_APP = '100005a'
|
||||
_APP_VERSION = '2.2.5.1428709186'
|
||||
_APP_VERSION = '6.0.0'
|
||||
_APP_SECRET = 'MM_d*yP@`&1@]@!AVrXf_o-HVEnoTnm$O-ti4[G~$JDI/Dc-&piU&z&5.;:}95=Iad'
|
||||
|
||||
_GEO_BYPASS = False
|
||||
@@ -41,7 +42,7 @@ class VikiBaseIE(InfoExtractor):
|
||||
_ERRORS = {
|
||||
'geo': 'Sorry, this content is not available in your region.',
|
||||
'upcoming': 'Sorry, this content is not yet available.',
|
||||
# 'paywall': 'paywall',
|
||||
'paywall': 'Sorry, this content is only available to Viki Pass Plus subscribers',
|
||||
}
|
||||
|
||||
def _prepare_call(self, path, timestamp=None, post_data=None):
|
||||
@@ -62,7 +63,8 @@ class VikiBaseIE(InfoExtractor):
|
||||
|
||||
def _call_api(self, path, video_id, note, timestamp=None, post_data=None):
|
||||
resp = self._download_json(
|
||||
self._prepare_call(path, timestamp, post_data), video_id, note)
|
||||
self._prepare_call(path, timestamp, post_data), video_id, note,
|
||||
headers={'x-viki-app-ver': self._APP_VERSION})
|
||||
|
||||
error = resp.get('error')
|
||||
if error:
|
||||
@@ -82,11 +84,13 @@ class VikiBaseIE(InfoExtractor):
|
||||
expected=True)
|
||||
|
||||
def _check_errors(self, data):
|
||||
for reason, status in data.get('blocking', {}).items():
|
||||
for reason, status in (data.get('blocking') or {}).items():
|
||||
if status and reason in self._ERRORS:
|
||||
message = self._ERRORS[reason]
|
||||
if reason == 'geo':
|
||||
self.raise_geo_restricted(msg=message)
|
||||
elif reason == 'paywall':
|
||||
self.raise_login_required(message)
|
||||
raise ExtractorError('%s said: %s' % (
|
||||
self.IE_NAME, message), expected=True)
|
||||
|
||||
@@ -131,13 +135,19 @@ class VikiIE(VikiBaseIE):
|
||||
'info_dict': {
|
||||
'id': '1023585v',
|
||||
'ext': 'mp4',
|
||||
'title': 'Heirs Episode 14',
|
||||
'uploader': 'SBS',
|
||||
'description': 'md5:c4b17b9626dd4b143dcc4d855ba3474e',
|
||||
'title': 'Heirs - Episode 14',
|
||||
'uploader': 'SBS Contents Hub',
|
||||
'timestamp': 1385047627,
|
||||
'upload_date': '20131121',
|
||||
'age_limit': 13,
|
||||
'duration': 3570,
|
||||
'episode_number': 14,
|
||||
},
|
||||
'params': {
|
||||
'format': 'bestvideo',
|
||||
},
|
||||
'skip': 'Blocked in the US',
|
||||
'expected_warnings': ['Unknown MIME type image/jpeg in DASH manifest'],
|
||||
}, {
|
||||
# clip
|
||||
'url': 'http://www.viki.com/videos/1067139v-the-avengers-age-of-ultron-press-conference',
|
||||
@@ -153,7 +163,8 @@ class VikiIE(VikiBaseIE):
|
||||
'uploader': 'Arirang TV',
|
||||
'like_count': int,
|
||||
'age_limit': 0,
|
||||
}
|
||||
},
|
||||
'skip': 'Sorry. There was an error loading this video',
|
||||
}, {
|
||||
'url': 'http://www.viki.com/videos/1048879v-ankhon-dekhi',
|
||||
'info_dict': {
|
||||
@@ -171,7 +182,7 @@ class VikiIE(VikiBaseIE):
|
||||
}, {
|
||||
# episode
|
||||
'url': 'http://www.viki.com/videos/44699v-boys-over-flowers-episode-1',
|
||||
'md5': '94e0e34fd58f169f40c184f232356cfe',
|
||||
'md5': '0a53dc252e6e690feccd756861495a8c',
|
||||
'info_dict': {
|
||||
'id': '44699v',
|
||||
'ext': 'mp4',
|
||||
@@ -183,6 +194,10 @@ class VikiIE(VikiBaseIE):
|
||||
'uploader': 'group8',
|
||||
'like_count': int,
|
||||
'age_limit': 13,
|
||||
'episode_number': 1,
|
||||
},
|
||||
'params': {
|
||||
'format': 'bestvideo',
|
||||
},
|
||||
'expected_warnings': ['Unknown MIME type image/jpeg in DASH manifest'],
|
||||
}, {
|
||||
@@ -209,7 +224,7 @@ class VikiIE(VikiBaseIE):
|
||||
}, {
|
||||
# non-English description
|
||||
'url': 'http://www.viki.com/videos/158036v-love-in-magic',
|
||||
'md5': 'adf9e321a0ae5d0aace349efaaff7691',
|
||||
'md5': '41faaba0de90483fb4848952af7c7d0d',
|
||||
'info_dict': {
|
||||
'id': '158036v',
|
||||
'ext': 'mp4',
|
||||
@@ -220,6 +235,10 @@ class VikiIE(VikiBaseIE):
|
||||
'title': 'Love In Magic',
|
||||
'age_limit': 13,
|
||||
},
|
||||
'params': {
|
||||
'format': 'bestvideo',
|
||||
},
|
||||
'expected_warnings': ['Unknown MIME type image/jpeg in DASH manifest'],
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -229,36 +248,33 @@ class VikiIE(VikiBaseIE):
|
||||
'https://www.viki.com/api/videos/' + video_id,
|
||||
video_id, 'Downloading video JSON', headers={
|
||||
'x-client-user-agent': std_headers['User-Agent'],
|
||||
'x-viki-app-ver': '4.0.57',
|
||||
'x-viki-app-ver': '3.0.0',
|
||||
})
|
||||
video = resp['video']
|
||||
|
||||
self._check_errors(video)
|
||||
|
||||
title = self.dict_selection(video.get('titles', {}), 'en', allow_fallback=False)
|
||||
episode_number = int_or_none(video.get('number'))
|
||||
if not title:
|
||||
title = 'Episode %d' % video.get('number') if video.get('type') == 'episode' else video.get('id') or video_id
|
||||
container_titles = video.get('container', {}).get('titles', {})
|
||||
title = 'Episode %d' % episode_number if video.get('type') == 'episode' else video.get('id') or video_id
|
||||
container_titles = try_get(video, lambda x: x['container']['titles'], dict) or {}
|
||||
container_title = self.dict_selection(container_titles, 'en')
|
||||
title = '%s - %s' % (container_title, title)
|
||||
|
||||
description = self.dict_selection(video.get('descriptions', {}), 'en')
|
||||
|
||||
duration = int_or_none(video.get('duration'))
|
||||
timestamp = parse_iso8601(video.get('created_at'))
|
||||
uploader = video.get('author')
|
||||
like_count = int_or_none(video.get('likes', {}).get('count'))
|
||||
age_limit = parse_age_limit(video.get('rating'))
|
||||
like_count = int_or_none(try_get(video, lambda x: x['likes']['count']))
|
||||
|
||||
thumbnails = []
|
||||
for thumbnail_id, thumbnail in video.get('images', {}).items():
|
||||
for thumbnail_id, thumbnail in (video.get('images') or {}).items():
|
||||
thumbnails.append({
|
||||
'id': thumbnail_id,
|
||||
'url': thumbnail.get('url'),
|
||||
})
|
||||
|
||||
subtitles = {}
|
||||
for subtitle_lang, _ in video.get('subtitle_completions', {}).items():
|
||||
for subtitle_lang, _ in (video.get('subtitle_completions') or {}).items():
|
||||
subtitles[subtitle_lang] = [{
|
||||
'ext': subtitles_format,
|
||||
'url': self._prepare_call(
|
||||
@@ -269,13 +285,15 @@ class VikiIE(VikiBaseIE):
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'timestamp': timestamp,
|
||||
'uploader': uploader,
|
||||
'duration': int_or_none(video.get('duration')),
|
||||
'timestamp': parse_iso8601(video.get('created_at')),
|
||||
'uploader': video.get('author'),
|
||||
'uploader_url': video.get('author_url'),
|
||||
'like_count': like_count,
|
||||
'age_limit': age_limit,
|
||||
'age_limit': parse_age_limit(video.get('rating')),
|
||||
'thumbnails': thumbnails,
|
||||
'subtitles': subtitles,
|
||||
'episode_number': episode_number,
|
||||
}
|
||||
|
||||
formats = []
|
||||
@@ -360,7 +378,7 @@ class VikiChannelIE(VikiBaseIE):
|
||||
'info_dict': {
|
||||
'id': '50c',
|
||||
'title': 'Boys Over Flowers',
|
||||
'description': 'md5:ecd3cff47967fe193cff37c0bec52790',
|
||||
'description': 'md5:804ce6e7837e1fd527ad2f25420f4d59',
|
||||
},
|
||||
'playlist_mincount': 71,
|
||||
}, {
|
||||
@@ -371,6 +389,7 @@ class VikiChannelIE(VikiBaseIE):
|
||||
'description': 'md5:05bf5471385aa8b21c18ad450e350525',
|
||||
},
|
||||
'playlist_count': 127,
|
||||
'skip': 'Page not found',
|
||||
}, {
|
||||
'url': 'http://www.viki.com/news/24569c-showbiz-korea',
|
||||
'only_matching': True,
|
||||
|
||||
@@ -226,10 +226,12 @@ class VimeoBaseInfoExtractor(InfoExtractor):
|
||||
'is_live': is_live,
|
||||
}
|
||||
|
||||
def _extract_original_format(self, url, video_id):
|
||||
def _extract_original_format(self, url, video_id, unlisted_hash=None):
|
||||
query = {'action': 'load_download_config'}
|
||||
if unlisted_hash:
|
||||
query['unlisted_hash'] = unlisted_hash
|
||||
download_data = self._download_json(
|
||||
url, video_id, fatal=False,
|
||||
query={'action': 'load_download_config'},
|
||||
url, video_id, fatal=False, query=query,
|
||||
headers={'X-Requested-With': 'XMLHttpRequest'})
|
||||
if download_data:
|
||||
source_file = download_data.get('source_file')
|
||||
@@ -509,6 +511,11 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
{
|
||||
'url': 'https://vimeo.com/160743502/abd0e13fb4',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
# requires passing unlisted_hash(a52724358e) to load_download_config request
|
||||
'url': 'https://vimeo.com/392479337/a52724358e',
|
||||
'only_matching': True,
|
||||
}
|
||||
# https://gettingthingsdone.com/workflowmap/
|
||||
# vimeo embed with check-password page protected by Referer header
|
||||
@@ -673,7 +680,8 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
if config.get('view') == 4:
|
||||
config = self._verify_player_video_password(redirect_url, video_id, headers)
|
||||
|
||||
vod = config.get('video', {}).get('vod', {})
|
||||
video = config.get('video') or {}
|
||||
vod = video.get('vod') or {}
|
||||
|
||||
def is_rented():
|
||||
if '>You rented this title.<' in webpage:
|
||||
@@ -733,7 +741,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
formats = []
|
||||
|
||||
source_format = self._extract_original_format(
|
||||
'https://vimeo.com/' + video_id, video_id)
|
||||
'https://vimeo.com/' + video_id, video_id, video.get('unlisted_hash'))
|
||||
if source_format:
|
||||
formats.append(source_format)
|
||||
|
||||
|
||||
@@ -1,40 +1,55 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urllib_parse_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
month_by_abbreviation,
|
||||
parse_filesize,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class XboxClipsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?xboxclips\.com/(?:video\.php\?.*vid=|[^/]+/)(?P<id>[\w-]{36})'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:xboxclips\.com|gameclips\.io)/(?:video\.php\?.*vid=|[^/]+/)(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})'
|
||||
_TESTS = [{
|
||||
'url': 'http://xboxclips.com/video.php?uid=2533274823424419&gamertag=Iabdulelah&vid=074a69a9-5faf-46aa-b93b-9909c1720325',
|
||||
'md5': 'fbe1ec805e920aeb8eced3c3e657df5d',
|
||||
'info_dict': {
|
||||
'id': '074a69a9-5faf-46aa-b93b-9909c1720325',
|
||||
'ext': 'mp4',
|
||||
'title': 'Iabdulelah playing Titanfall',
|
||||
'title': 'iAbdulElah playing Titanfall',
|
||||
'filesize_approx': 26800000,
|
||||
'upload_date': '20140807',
|
||||
'duration': 56,
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'https://gameclips.io/iAbdulElah/074a69a9-5faf-46aa-b93b-9909c1720325',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
if '/video.php' in url:
|
||||
qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
|
||||
url = 'https://gameclips.io/%s/%s' % (qs['gamertag'][0], qs['vid'][0])
|
||||
|
||||
video_url = self._html_search_regex(
|
||||
r'>(?:Link|Download): <a[^>]+href="([^"]+)"', webpage, 'video URL')
|
||||
title = self._html_search_regex(
|
||||
r'<title>XboxClips \| ([^<]+)</title>', webpage, 'title')
|
||||
upload_date = unified_strdate(self._html_search_regex(
|
||||
r'>Recorded: ([^<]+)<', webpage, 'upload date', fatal=False))
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
info = self._parse_html5_media_entries(url, webpage, video_id)[0]
|
||||
|
||||
title = self._html_search_meta(['og:title', 'twitter:title'], webpage)
|
||||
upload_date = None
|
||||
mobj = re.search(
|
||||
r'>Recorded: (\d{2})-(Jan|Feb|Mar|Apr|May|Ju[nl]|Aug|Sep|Oct|Nov|Dec)-(\d{4})',
|
||||
webpage)
|
||||
if mobj:
|
||||
upload_date = '%s%.2d%s' % (mobj.group(3), month_by_abbreviation(mobj.group(2)), mobj.group(1))
|
||||
filesize = parse_filesize(self._html_search_regex(
|
||||
r'>Size: ([^<]+)<', webpage, 'file size', fatal=False))
|
||||
duration = int_or_none(self._html_search_regex(
|
||||
@@ -42,12 +57,12 @@ class XboxClipsIE(InfoExtractor):
|
||||
view_count = int_or_none(self._html_search_regex(
|
||||
r'>Views: (\d+)<', webpage, 'view count', fatal=False))
|
||||
|
||||
return {
|
||||
info.update({
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'upload_date': upload_date,
|
||||
'filesize_approx': filesize,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
}
|
||||
})
|
||||
return info
|
||||
|
||||
@@ -1,8 +1,9 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import hashlib
|
||||
import itertools
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
@@ -209,17 +210,27 @@ class YandexMusicPlaylistBaseIE(YandexMusicBaseIE):
|
||||
missing_track_ids = [
|
||||
track_id for track_id in track_ids
|
||||
if track_id not in present_track_ids]
|
||||
missing_tracks = self._call_api(
|
||||
'track-entries', tld, url, item_id,
|
||||
'Downloading missing tracks JSON', {
|
||||
'entries': ','.join(missing_track_ids),
|
||||
'lang': tld,
|
||||
'external-domain': 'music.yandex.%s' % tld,
|
||||
'overembed': 'false',
|
||||
'strict': 'true',
|
||||
})
|
||||
if missing_tracks:
|
||||
tracks.extend(missing_tracks)
|
||||
# Request missing tracks in chunks to avoid exceeding max HTTP header size,
|
||||
# see https://github.com/ytdl-org/youtube-dl/issues/27355
|
||||
_TRACKS_PER_CHUNK = 250
|
||||
for chunk_num in itertools.count(0):
|
||||
start = chunk_num * _TRACKS_PER_CHUNK
|
||||
end = start + _TRACKS_PER_CHUNK
|
||||
missing_track_ids_req = missing_track_ids[start:end]
|
||||
assert missing_track_ids_req
|
||||
missing_tracks = self._call_api(
|
||||
'track-entries', tld, url, item_id,
|
||||
'Downloading missing tracks JSON chunk %d' % (chunk_num + 1), {
|
||||
'entries': ','.join(missing_track_ids_req),
|
||||
'lang': tld,
|
||||
'external-domain': 'music.yandex.%s' % tld,
|
||||
'overembed': 'false',
|
||||
'strict': 'true',
|
||||
})
|
||||
if missing_tracks:
|
||||
tracks.extend(missing_tracks)
|
||||
if end >= len(missing_track_ids):
|
||||
break
|
||||
|
||||
return tracks
|
||||
|
||||
|
||||
@@ -25,6 +25,7 @@ class YouPornIE(InfoExtractor):
|
||||
'title': 'Sex Ed: Is It Safe To Masturbate Daily?',
|
||||
'description': 'Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 210,
|
||||
'uploader': 'Ask Dan And Jennifer',
|
||||
'upload_date': '20101217',
|
||||
'average_rating': int,
|
||||
@@ -54,6 +55,7 @@ class YouPornIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': '404',
|
||||
}, {
|
||||
'url': 'https://www.youporn.com/embed/505835/sex-ed-is-it-safe-to-masturbate-daily/',
|
||||
'only_matching': True,
|
||||
@@ -153,6 +155,8 @@ class YouPornIE(InfoExtractor):
|
||||
thumbnail = self._search_regex(
|
||||
r'(?:imageurl\s*=|poster\s*:)\s*(["\'])(?P<thumbnail>.+?)\1',
|
||||
webpage, 'thumbnail', fatal=False, group='thumbnail')
|
||||
duration = int_or_none(self._html_search_meta(
|
||||
'video:duration', webpage, 'duration', fatal=False))
|
||||
|
||||
uploader = self._html_search_regex(
|
||||
r'(?s)<div[^>]+class=["\']submitByLink["\'][^>]*>(.+?)</div>',
|
||||
@@ -194,6 +198,7 @@ class YouPornIE(InfoExtractor):
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'uploader': uploader,
|
||||
'upload_date': upload_date,
|
||||
'average_rating': average_rating,
|
||||
|
||||
@@ -308,7 +308,9 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
|
||||
'view count', default=None))
|
||||
uploader = try_get(
|
||||
renderer, lambda x: x['ownerText']['runs'][0]['text'], compat_str)
|
||||
renderer,
|
||||
(lambda x: x['ownerText']['runs'][0]['text'],
|
||||
lambda x: x['shortBylineText']['runs'][0]['text']), compat_str)
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': YoutubeIE.ie_key(),
|
||||
@@ -324,52 +326,57 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
|
||||
class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
IE_DESC = 'YouTube.com'
|
||||
_INVIDIOUS_SITES = (
|
||||
# invidious-redirect websites
|
||||
r'(?:www\.)?redirect\.invidious\.io',
|
||||
r'(?:(?:www|dev)\.)?invidio\.us',
|
||||
# Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
|
||||
r'(?:(?:www|no)\.)?invidiou\.sh',
|
||||
r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
|
||||
r'(?:www\.)?invidious\.kabi\.tk',
|
||||
r'(?:www\.)?invidious\.13ad\.de',
|
||||
r'(?:www\.)?invidious\.mastodon\.host',
|
||||
r'(?:www\.)?invidious\.zapashcanon\.fr',
|
||||
r'(?:www\.)?invidious\.kavin\.rocks',
|
||||
r'(?:www\.)?invidious\.tube',
|
||||
r'(?:www\.)?invidiou\.site',
|
||||
r'(?:www\.)?invidious\.site',
|
||||
r'(?:www\.)?invidious\.xyz',
|
||||
r'(?:www\.)?invidious\.nixnet\.xyz',
|
||||
r'(?:www\.)?invidious\.drycat\.fr',
|
||||
r'(?:www\.)?tube\.poal\.co',
|
||||
r'(?:www\.)?tube\.connect\.cafe',
|
||||
r'(?:www\.)?vid\.wxzm\.sx',
|
||||
r'(?:www\.)?vid\.mint\.lgbt',
|
||||
r'(?:www\.)?yewtu\.be',
|
||||
r'(?:www\.)?yt\.elukerio\.org',
|
||||
r'(?:www\.)?yt\.lelux\.fi',
|
||||
r'(?:www\.)?invidious\.ggc-project\.de',
|
||||
r'(?:www\.)?yt\.maisputain\.ovh',
|
||||
r'(?:www\.)?invidious\.13ad\.de',
|
||||
r'(?:www\.)?invidious\.toot\.koeln',
|
||||
r'(?:www\.)?invidious\.fdn\.fr',
|
||||
r'(?:www\.)?watch\.nettohikari\.com',
|
||||
r'(?:www\.)?kgg2m7yk5aybusll\.onion',
|
||||
r'(?:www\.)?qklhadlycap4cnod\.onion',
|
||||
r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
|
||||
r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
|
||||
r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
|
||||
r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
|
||||
r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
|
||||
r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
|
||||
)
|
||||
_VALID_URL = r"""(?x)^
|
||||
(
|
||||
(?:https?://|//) # http(s):// or protocol-independent URL
|
||||
(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com/|
|
||||
(?:www\.)?deturl\.com/www\.youtube\.com/|
|
||||
(?:www\.)?pwnyoutube\.com/|
|
||||
(?:www\.)?hooktube\.com/|
|
||||
(?:www\.)?yourepeat\.com/|
|
||||
tube\.majestyc\.net/|
|
||||
# Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances
|
||||
(?:(?:www|dev)\.)?invidio\.us/|
|
||||
(?:(?:www|no)\.)?invidiou\.sh/|
|
||||
(?:(?:www|fi)\.)?invidious\.snopyta\.org/|
|
||||
(?:www\.)?invidious\.kabi\.tk/|
|
||||
(?:www\.)?invidious\.13ad\.de/|
|
||||
(?:www\.)?invidious\.mastodon\.host/|
|
||||
(?:www\.)?invidious\.zapashcanon\.fr/|
|
||||
(?:www\.)?invidious\.kavin\.rocks/|
|
||||
(?:www\.)?invidious\.tube/|
|
||||
(?:www\.)?invidiou\.site/|
|
||||
(?:www\.)?invidious\.site/|
|
||||
(?:www\.)?invidious\.xyz/|
|
||||
(?:www\.)?invidious\.nixnet\.xyz/|
|
||||
(?:www\.)?invidious\.drycat\.fr/|
|
||||
(?:www\.)?tube\.poal\.co/|
|
||||
(?:www\.)?tube\.connect\.cafe/|
|
||||
(?:www\.)?vid\.wxzm\.sx/|
|
||||
(?:www\.)?vid\.mint\.lgbt/|
|
||||
(?:www\.)?yewtu\.be/|
|
||||
(?:www\.)?yt\.elukerio\.org/|
|
||||
(?:www\.)?yt\.lelux\.fi/|
|
||||
(?:www\.)?invidious\.ggc-project\.de/|
|
||||
(?:www\.)?yt\.maisputain\.ovh/|
|
||||
(?:www\.)?invidious\.13ad\.de/|
|
||||
(?:www\.)?invidious\.toot\.koeln/|
|
||||
(?:www\.)?invidious\.fdn\.fr/|
|
||||
(?:www\.)?watch\.nettohikari\.com/|
|
||||
(?:www\.)?kgg2m7yk5aybusll\.onion/|
|
||||
(?:www\.)?qklhadlycap4cnod\.onion/|
|
||||
(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/|
|
||||
(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/|
|
||||
(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/|
|
||||
(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/|
|
||||
(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p/|
|
||||
(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion/|
|
||||
youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
|
||||
(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
|
||||
(?:www\.)?deturl\.com/www\.youtube\.com|
|
||||
(?:www\.)?pwnyoutube\.com|
|
||||
(?:www\.)?hooktube\.com|
|
||||
(?:www\.)?yourepeat\.com|
|
||||
tube\.majestyc\.net|
|
||||
%(invidious)s|
|
||||
youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
|
||||
(?:.*?\#/)? # handle anchor (#/) redirect urls
|
||||
(?: # the various things that can precede the ID:
|
||||
(?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
|
||||
@@ -384,6 +391,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
youtu\.be| # just youtu.be/xxxx
|
||||
vid\.plus| # or vid.plus/xxxx
|
||||
zwearz\.com/watch| # or zwearz.com/watch/xxxx
|
||||
%(invidious)s
|
||||
)/
|
||||
|(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
|
||||
)
|
||||
@@ -396,7 +404,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
)
|
||||
)
|
||||
(?(1).+)? # if we found the ID, everything can follow
|
||||
$""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
|
||||
$""" % {
|
||||
'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
|
||||
'invidious': '|'.join(_INVIDIOUS_SITES),
|
||||
}
|
||||
_PLAYER_INFO_RE = (
|
||||
r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
|
||||
r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
|
||||
@@ -904,6 +915,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'url': 'https://invidio.us/watch?v=BaW_jenozKc',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
# from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
|
||||
'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
# DRM protected
|
||||
'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
|
||||
@@ -1042,6 +1062,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
# controversial video, only works with bpctr when authenticated with cookies
|
||||
'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
_formats = {
|
||||
'5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
|
||||
@@ -1405,7 +1430,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
base_url = self.http_scheme() + '//www.youtube.com/'
|
||||
webpage_url = base_url + 'watch?v=' + video_id
|
||||
webpage = self._download_webpage(webpage_url, video_id, fatal=False)
|
||||
webpage = self._download_webpage(
|
||||
webpage_url + '&bpctr=9999999999', video_id, fatal=False)
|
||||
|
||||
player_response = None
|
||||
if webpage:
|
||||
@@ -1577,6 +1603,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
# Youtube throttles chunks >~10M
|
||||
'http_chunk_size': 10485760,
|
||||
}
|
||||
if dct.get('ext'):
|
||||
dct['container'] = dct['ext'] + '_dash'
|
||||
formats.append(dct)
|
||||
|
||||
hls_manifest_url = streaming_data.get('hlsManifestUrl')
|
||||
|
||||
69
youtube_dl/extractor/zhihu.py
Normal file
69
youtube_dl/extractor/zhihu.py
Normal file
@@ -0,0 +1,69 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import float_or_none, int_or_none
|
||||
|
||||
|
||||
class ZhihuIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?zhihu\.com/zvideo/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.zhihu.com/zvideo/1342930761977176064',
|
||||
'md5': 'c8d4c9cd72dd58e6f9bc9c2c84266464',
|
||||
'info_dict': {
|
||||
'id': '1342930761977176064',
|
||||
'ext': 'mp4',
|
||||
'title': '写春联也太难了吧!',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'uploader': '桥半舫',
|
||||
'timestamp': 1612959715,
|
||||
'upload_date': '20210210',
|
||||
'uploader_id': '244ecb13b0fd7daf92235288c8ca3365',
|
||||
'duration': 146.333,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
zvideo = self._download_json(
|
||||
'https://www.zhihu.com/api/v4/zvideos/' + video_id, video_id)
|
||||
title = zvideo['title']
|
||||
video = zvideo.get('video') or {}
|
||||
|
||||
formats = []
|
||||
for format_id, q in (video.get('playlist') or {}).items():
|
||||
play_url = q.get('url') or q.get('play_url')
|
||||
if not play_url:
|
||||
continue
|
||||
formats.append({
|
||||
'asr': int_or_none(q.get('sample_rate')),
|
||||
'filesize': int_or_none(q.get('size')),
|
||||
'format_id': format_id,
|
||||
'fps': int_or_none(q.get('fps')),
|
||||
'height': int_or_none(q.get('height')),
|
||||
'tbr': float_or_none(q.get('bitrate')),
|
||||
'url': play_url,
|
||||
'width': int_or_none(q.get('width')),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
author = zvideo.get('author') or {}
|
||||
url_token = author.get('url_token')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': video.get('thumbnail') or zvideo.get('image_url'),
|
||||
'uploader': author.get('name'),
|
||||
'timestamp': int_or_none(zvideo.get('published_at')),
|
||||
'uploader_id': author.get('id'),
|
||||
'uploader_url': 'https://www.zhihu.com/people/' + url_token if url_token else None,
|
||||
'duration': float_or_none(video.get('duration')),
|
||||
'view_count': int_or_none(zvideo.get('play_count')),
|
||||
'like_count': int_or_none(zvideo.get('liked_count')),
|
||||
'comment_count': int_or_none(zvideo.get('comment_count')),
|
||||
}
|
||||
@@ -89,10 +89,14 @@ class EmbedThumbnailPP(FFmpegPostProcessor):
|
||||
os.rename(encodeFilename(temp_filename), encodeFilename(filename))
|
||||
|
||||
elif info['ext'] in ['m4a', 'mp4']:
|
||||
if not check_executable('AtomicParsley', ['-v']):
|
||||
atomicparsley = next((x
|
||||
for x in ['AtomicParsley', 'atomicparsley']
|
||||
if check_executable(x, ['-v'])), None)
|
||||
|
||||
if atomicparsley is None:
|
||||
raise EmbedThumbnailPPError('AtomicParsley was not found. Please install.')
|
||||
|
||||
cmd = [encodeFilename('AtomicParsley', True),
|
||||
cmd = [encodeFilename(atomicparsley, True),
|
||||
encodeFilename(filename, True),
|
||||
encodeArgument('--artwork'),
|
||||
encodeFilename(thumbnail_filename, True),
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = '2021.02.10'
|
||||
__version__ = '2021.02.22'
|
||||
|
||||
Reference in New Issue
Block a user