mirror of
https://github.com/ytdl-org/youtube-dl.git
synced 2025-12-09 07:32:42 +01:00
Compare commits
39 Commits
2017.06.12
...
2017.06.23
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
170719414d | ||
|
|
38dad4737f | ||
|
|
ddbb4c5c3e | ||
|
|
fa3ea7223a | ||
|
|
0f4a5a73e7 | ||
|
|
18166bb8e8 | ||
|
|
d4893e764b | ||
|
|
97b6e30113 | ||
|
|
9be9ec5980 | ||
|
|
048b55804d | ||
|
|
6ce79d7ac0 | ||
|
|
1641ca402d | ||
|
|
85cbcede5b | ||
|
|
a1de83e5f0 | ||
|
|
fee00b3884 | ||
|
|
2d2132ac6e | ||
|
|
cc2ffe5afe | ||
|
|
560050669b | ||
|
|
eaa006d1bd | ||
|
|
a6f29820c6 | ||
|
|
1433734c35 | ||
|
|
aefce8e6dc | ||
|
|
8b6ac49ecc | ||
|
|
b08e235f09 | ||
|
|
be80986ed9 | ||
|
|
473e87064b | ||
|
|
4f90d2aeac | ||
|
|
b230fefc3c | ||
|
|
96a2daa1ee | ||
|
|
0ea6efbb7a | ||
|
|
6a9cb29509 | ||
|
|
ca27037171 | ||
|
|
0bf4b71b75 | ||
|
|
5215f45327 | ||
|
|
0a268c6e11 | ||
|
|
7dd5415cd0 | ||
|
|
b5dc33daa9 | ||
|
|
97fa1f8dc4 | ||
|
|
b081f53b08 |
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@@ -6,8 +6,8 @@
|
||||
|
||||
---
|
||||
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.06.12*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.06.12**
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.06.23*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.06.23**
|
||||
|
||||
### Before submitting an *issue* make sure you have:
|
||||
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||
@@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2017.06.12
|
||||
[debug] youtube-dl version 2017.06.23
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
||||
45
ChangeLog
45
ChangeLog
@@ -1,3 +1,48 @@
|
||||
version 2017.06.23
|
||||
|
||||
Core
|
||||
* [adobepass] Fix extraction on older python 2.6
|
||||
|
||||
Extractors
|
||||
* [youtube] Adapt to new automatic captions rendition (#13467)
|
||||
* [hgtv.com:show] Relax video config regular expression (#13279, #13461)
|
||||
* [drtuber] Fix formats extraction (#12058)
|
||||
* [youporn] Fix upload date extraction
|
||||
* [youporn] Improve formats extraction
|
||||
* [youporn] Fix title extraction (#13456)
|
||||
* [googledrive] Fix formats sorting (#13443)
|
||||
* [watchindianporn] Fix extraction (#13411, #13415)
|
||||
+ [vimeo] Add fallback mp4 extension for original format
|
||||
+ [ruv] Add support for ruv.is (#13396)
|
||||
* [viu] Fix extraction on older python 2.6
|
||||
* [pandora.tv] Fix upload_date extraction (#12846)
|
||||
+ [asiancrush] Add support for asiancrush.com (#13420)
|
||||
|
||||
|
||||
version 2017.06.18
|
||||
|
||||
Core
|
||||
* [downloader/common] Use utils.shell_quote for debug command line
|
||||
* [utils] Use compat_shlex_quote in shell_quote
|
||||
* [postprocessor/execafterdownload] Encode command line (#13407)
|
||||
* [compat] Fix compat_shlex_quote on Windows (#5889, #10254)
|
||||
* [postprocessor/metadatafromtitle] Fix missing optional meta fields processing
|
||||
in --metadata-from-title (#13408)
|
||||
* [extractor/common] Fix json dumping with --geo-bypass
|
||||
+ [extractor/common] Improve jwplayer subtitles extraction
|
||||
+ [extractor/common] Improve jwplayer formats extraction (#13379)
|
||||
|
||||
Extractors
|
||||
* [polskieradio] Fix extraction (#13392)
|
||||
+ [xfileshare] Add support for fastvideo.me (#13385)
|
||||
* [bilibili] Fix extraction of videos with double quotes in titles (#13387)
|
||||
* [4tube] Fix extraction (#13381, #13382)
|
||||
+ [disney] Add support for disneychannel.de (#13383)
|
||||
* [npo] Improve URL regular expression (#13376)
|
||||
+ [corus] Add support for showcase.ca
|
||||
+ [corus] Add support for history.ca (#13359)
|
||||
|
||||
|
||||
version 2017.06.12
|
||||
|
||||
Core
|
||||
|
||||
2
Makefile
2
Makefile
@@ -101,7 +101,7 @@ youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-
|
||||
--exclude '*.pyc' \
|
||||
--exclude '*.pyo' \
|
||||
--exclude '*~' \
|
||||
--exclude '__pycache' \
|
||||
--exclude '__pycache__' \
|
||||
--exclude '.git' \
|
||||
--exclude 'testdata' \
|
||||
--exclude 'docs/_build' \
|
||||
|
||||
@@ -8,7 +8,7 @@ import re
|
||||
ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
README_FILE = os.path.join(ROOT_DIR, 'README.md')
|
||||
|
||||
PREFIX = '''%YOUTUBE-DL(1)
|
||||
PREFIX = r'''%YOUTUBE-DL(1)
|
||||
|
||||
# NAME
|
||||
|
||||
|
||||
@@ -67,6 +67,8 @@
|
||||
- **arte.tv:info**
|
||||
- **arte.tv:magazine**
|
||||
- **arte.tv:playlist**
|
||||
- **AsianCrush**
|
||||
- **AsianCrushPlaylist**
|
||||
- **AtresPlayer**
|
||||
- **ATTTechChannel**
|
||||
- **ATVAt**
|
||||
@@ -686,6 +688,7 @@
|
||||
- **rutube:person**: Rutube person videos
|
||||
- **RUTV**: RUTV.RU
|
||||
- **Ruutu**
|
||||
- **Ruv**
|
||||
- **safari**: safaribooksonline.com online video
|
||||
- **safari:api**
|
||||
- **safari:course**: safaribooksonline.com online courses
|
||||
@@ -975,7 +978,7 @@
|
||||
- **WSJArticle**
|
||||
- **XBef**
|
||||
- **XboxClips**
|
||||
- **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE, Vid ABC, VidBom, vidlo, RapidVideo.TV
|
||||
- **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE, Vid ABC, VidBom, vidlo, RapidVideo.TV, FastVideo.me
|
||||
- **XHamster**
|
||||
- **XHamsterEmbed**
|
||||
- **xiami:album**: 虾米音乐 - 专辑
|
||||
|
||||
@@ -2617,14 +2617,22 @@ except ImportError: # Python 2
|
||||
parsed_result[name] = [value]
|
||||
return parsed_result
|
||||
|
||||
try:
|
||||
from shlex import quote as compat_shlex_quote
|
||||
except ImportError: # Python < 3.3
|
||||
|
||||
compat_os_name = os._name if os.name == 'java' else os.name
|
||||
|
||||
|
||||
if compat_os_name == 'nt':
|
||||
def compat_shlex_quote(s):
|
||||
if re.match(r'^[-_\w./]+$', s):
|
||||
return s
|
||||
else:
|
||||
return "'" + s.replace("'", "'\"'\"'") + "'"
|
||||
return s if re.match(r'^[-_\w./]+$', s) else '"%s"' % s.replace('"', '\\"')
|
||||
else:
|
||||
try:
|
||||
from shlex import quote as compat_shlex_quote
|
||||
except ImportError: # Python < 3.3
|
||||
def compat_shlex_quote(s):
|
||||
if re.match(r'^[-_\w./]+$', s):
|
||||
return s
|
||||
else:
|
||||
return "'" + s.replace("'", "'\"'\"'") + "'"
|
||||
|
||||
|
||||
try:
|
||||
@@ -2649,9 +2657,6 @@ def compat_ord(c):
|
||||
return ord(c)
|
||||
|
||||
|
||||
compat_os_name = os._name if os.name == 'java' else os.name
|
||||
|
||||
|
||||
if sys.version_info >= (3, 0):
|
||||
compat_getenv = os.getenv
|
||||
compat_expanduser = os.path.expanduser
|
||||
@@ -2895,6 +2900,7 @@ else:
|
||||
|
||||
|
||||
__all__ = [
|
||||
'compat_HTMLParseError',
|
||||
'compat_HTMLParser',
|
||||
'compat_HTTPError',
|
||||
'compat_basestring',
|
||||
|
||||
@@ -8,10 +8,11 @@ import random
|
||||
|
||||
from ..compat import compat_os_name
|
||||
from ..utils import (
|
||||
decodeArgument,
|
||||
encodeFilename,
|
||||
error_to_compat_str,
|
||||
decodeArgument,
|
||||
format_bytes,
|
||||
shell_quote,
|
||||
timeconvert,
|
||||
)
|
||||
|
||||
@@ -381,10 +382,5 @@ class FileDownloader(object):
|
||||
if exe is None:
|
||||
exe = os.path.basename(str_args[0])
|
||||
|
||||
try:
|
||||
import pipes
|
||||
shell_quote = lambda args: ' '.join(map(pipes.quote, str_args))
|
||||
except ImportError:
|
||||
shell_quote = repr
|
||||
self.to_screen('[debug] %s command line: %s' % (
|
||||
exe, shell_quote(str_args)))
|
||||
|
||||
@@ -22,7 +22,7 @@ class ABCOTVSIE(InfoExtractor):
|
||||
'display_id': 'east-bay-museum-celebrates-vintage-synthesizers',
|
||||
'ext': 'mp4',
|
||||
'title': 'East Bay museum celebrates vintage synthesizers',
|
||||
'description': 'md5:a4f10fb2f2a02565c1749d4adbab4b10',
|
||||
'description': 'md5:24ed2bd527096ec2a5c67b9d5a9005f3',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'timestamp': 1421123075,
|
||||
'upload_date': '20150113',
|
||||
|
||||
@@ -6,7 +6,10 @@ import time
|
||||
import xml.etree.ElementTree as etree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..compat import (
|
||||
compat_kwargs,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
unescapeHTML,
|
||||
urlencode_postdata,
|
||||
@@ -1317,7 +1320,8 @@ class AdobePassIE(InfoExtractor):
|
||||
headers = kwargs.get('headers', {})
|
||||
headers.update(self.geo_verification_headers())
|
||||
kwargs['headers'] = headers
|
||||
return super(AdobePassIE, self)._download_webpage_handle(*args, **kwargs)
|
||||
return super(AdobePassIE, self)._download_webpage_handle(
|
||||
*args, **compat_kwargs(kwargs))
|
||||
|
||||
@staticmethod
|
||||
def _get_mvpd_resource(provider_id, title, guid, rating):
|
||||
|
||||
93
youtube_dl/extractor/asiancrush.py
Normal file
93
youtube_dl/extractor/asiancrush.py
Normal file
@@ -0,0 +1,93 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .kaltura import KalturaIE
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
remove_end,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class AsianCrushIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?asiancrush\.com/video/(?:[^/]+/)?0+(?P<id>\d+)v\b'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.asiancrush.com/video/012869v/women-who-flirt/',
|
||||
'md5': 'c3b740e48d0ba002a42c0b72857beae6',
|
||||
'info_dict': {
|
||||
'id': '1_y4tmjm5r',
|
||||
'ext': 'mp4',
|
||||
'title': 'Women Who Flirt',
|
||||
'description': 'md5:3db14e9186197857e7063522cb89a805',
|
||||
'timestamp': 1496936429,
|
||||
'upload_date': '20170608',
|
||||
'uploader_id': 'craig@crifkin.com',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.asiancrush.com/video/she-was-pretty/011886v-pretty-episode-3/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
data = self._download_json(
|
||||
'https://www.asiancrush.com/wp-admin/admin-ajax.php', video_id,
|
||||
data=urlencode_postdata({
|
||||
'postid': video_id,
|
||||
'action': 'get_channel_kaltura_vars',
|
||||
}))
|
||||
|
||||
entry_id = data['entry_id']
|
||||
|
||||
return self.url_result(
|
||||
'kaltura:%s:%s' % (data['partner_id'], entry_id),
|
||||
ie=KalturaIE.ie_key(), video_id=entry_id,
|
||||
video_title=data.get('vid_label'))
|
||||
|
||||
|
||||
class AsianCrushPlaylistIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?asiancrush\.com/series/0+(?P<id>\d+)s\b'
|
||||
_TEST = {
|
||||
'url': 'https://www.asiancrush.com/series/012481s/scholar-walks-night/',
|
||||
'info_dict': {
|
||||
'id': '12481',
|
||||
'title': 'Scholar Who Walks the Night',
|
||||
'description': 'md5:7addd7c5132a09fd4741152d96cce886',
|
||||
},
|
||||
'playlist_count': 20,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
entries = []
|
||||
|
||||
for mobj in re.finditer(
|
||||
r'<a[^>]+href=(["\'])(?P<url>%s.*?)\1[^>]*>' % AsianCrushIE._VALID_URL,
|
||||
webpage):
|
||||
attrs = extract_attributes(mobj.group(0))
|
||||
if attrs.get('class') == 'clearfix':
|
||||
entries.append(self.url_result(
|
||||
mobj.group('url'), ie=AsianCrushIE.ie_key()))
|
||||
|
||||
title = remove_end(
|
||||
self._html_search_regex(
|
||||
r'(?s)<h1\b[^>]\bid=["\']movieTitle[^>]+>(.+?)</h1>', webpage,
|
||||
'title', default=None) or self._og_search_title(
|
||||
webpage, default=None) or self._html_search_meta(
|
||||
'twitter:title', webpage, 'title',
|
||||
default=None) or self._search_regex(
|
||||
r'<title>([^<]+)</title>', webpage, 'title', fatal=False),
|
||||
' | AsianCrush')
|
||||
|
||||
description = self._og_search_description(
|
||||
webpage, default=None) or self._html_search_meta(
|
||||
'twitter:description', webpage, 'description', fatal=False)
|
||||
|
||||
return self.playlist_result(entries, playlist_id, title, description)
|
||||
@@ -54,6 +54,22 @@ class BiliBiliIE(InfoExtractor):
|
||||
'description': '如果你是神明,并且能够让妄想成为现实。那你会进行怎么样的妄想?是淫靡的世界?独裁社会?毁灭性的制裁?还是……2015年,涩谷。从6年前发生的大灾害“涩谷地震”之后复兴了的这个街区里新设立的私立高中...',
|
||||
},
|
||||
'skip': 'Geo-restricted to China',
|
||||
}, {
|
||||
# Title with double quotes
|
||||
'url': 'http://www.bilibili.com/video/av8903802/',
|
||||
'info_dict': {
|
||||
'id': '8903802',
|
||||
'ext': 'mp4',
|
||||
'title': '阿滴英文|英文歌分享#6 "Closer',
|
||||
'description': '滴妹今天唱Closer給你聽! 有史以来,被推最多次也是最久的歌曲,其实歌词跟我原本想像差蛮多的,不过还是好听! 微博@阿滴英文',
|
||||
'uploader': '阿滴英文',
|
||||
'uploader_id': '65880958',
|
||||
'timestamp': 1488382620,
|
||||
'upload_date': '20170301',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # Test metadata only
|
||||
},
|
||||
}]
|
||||
|
||||
_APP_KEY = '84956560bc028eb7'
|
||||
@@ -135,7 +151,7 @@ class BiliBiliIE(InfoExtractor):
|
||||
'formats': formats,
|
||||
})
|
||||
|
||||
title = self._html_search_regex('<h1[^>]+title="([^"]+)">', webpage, 'title')
|
||||
title = self._html_search_regex('<h1[^>]*>([^<]+)</h1>', webpage, 'title')
|
||||
description = self._html_search_meta('description', webpage)
|
||||
timestamp = unified_timestamp(self._html_search_regex(
|
||||
r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time', default=None))
|
||||
|
||||
@@ -420,7 +420,7 @@ class InfoExtractor(object):
|
||||
if country_code:
|
||||
self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code)
|
||||
if self._downloader.params.get('verbose', False):
|
||||
self._downloader.to_stdout(
|
||||
self._downloader.to_screen(
|
||||
'[debug] Using fake IP %s (%s) as X-Forwarded-For.'
|
||||
% (self._x_forwarded_for_ip, country_code.upper()))
|
||||
|
||||
@@ -2299,6 +2299,8 @@ class InfoExtractor(object):
|
||||
tracks = video_data.get('tracks')
|
||||
if tracks and isinstance(tracks, list):
|
||||
for track in tracks:
|
||||
if not isinstance(track, dict):
|
||||
continue
|
||||
if track.get('kind') != 'captions':
|
||||
continue
|
||||
track_url = urljoin(base_url, track.get('file'))
|
||||
@@ -2328,6 +2330,8 @@ class InfoExtractor(object):
|
||||
urls = []
|
||||
formats = []
|
||||
for source in jwplayer_sources_data:
|
||||
if not isinstance(source, dict):
|
||||
continue
|
||||
source_url = self._proto_relative_url(source.get('file'))
|
||||
if not source_url:
|
||||
continue
|
||||
|
||||
@@ -8,7 +8,16 @@ from ..utils import int_or_none
|
||||
|
||||
|
||||
class CorusIE(ThePlatformFeedIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:globaltv|etcanada)\.com|(?:hgtv|foodnetwork|slice)\.ca)/(?:video/|(?:[^/]+/)+(?:videos/[a-z0-9-]+-|video\.html\?.*?\bv=))(?P<id>\d+)'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:www\.)?
|
||||
(?P<domain>
|
||||
(?:globaltv|etcanada)\.com|
|
||||
(?:hgtv|foodnetwork|slice|history|showcase)\.ca
|
||||
)
|
||||
/(?:video/|(?:[^/]+/)+(?:videos/[a-z0-9-]+-|video\.html\?.*?\bv=))
|
||||
(?P<id>\d+)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'http://www.hgtv.ca/shows/bryan-inc/videos/movie-night-popcorn-with-bryan-870923331648/',
|
||||
'md5': '05dcbca777bf1e58c2acbb57168ad3a6',
|
||||
@@ -27,6 +36,12 @@ class CorusIE(ThePlatformFeedIE):
|
||||
}, {
|
||||
'url': 'http://etcanada.com/video/873675331955/meet-the-survivor-game-changers-castaways-part-2/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.history.ca/the-world-without-canada/video/full-episodes/natural-resources/video.html?v=955054659646#video',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.showcase.ca/eyewitness/video/eyewitness++106/video.html?v=955070531919&p=1&s=da#video',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_TP_FEEDS = {
|
||||
@@ -50,6 +65,14 @@ class CorusIE(ThePlatformFeedIE):
|
||||
'feed_id': '5tUJLgV2YNJ5',
|
||||
'account_id': 2414427935,
|
||||
},
|
||||
'history': {
|
||||
'feed_id': 'tQFx_TyyEq4J',
|
||||
'account_id': 2369613659,
|
||||
},
|
||||
'showcase': {
|
||||
'feed_id': '9H6qyshBZU3E',
|
||||
'account_id': 2414426607,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -15,7 +15,7 @@ from ..utils import (
|
||||
|
||||
class DisneyIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?P<domain>(?:[^/]+\.)?(?:disney\.[a-z]{2,3}(?:\.[a-z]{2})?|disney(?:(?:me|latino)\.com|turkiye\.com\.tr)|(?:starwars|marvelkids)\.com))/(?:(?:embed/|(?:[^/]+/)+[\w-]+-)(?P<id>[a-z0-9]{24})|(?:[^/]+/)?(?P<display_id>[^/?#]+))'''
|
||||
https?://(?P<domain>(?:[^/]+\.)?(?:disney\.[a-z]{2,3}(?:\.[a-z]{2})?|disney(?:(?:me|latino)\.com|turkiye\.com\.tr|channel\.de)|(?:starwars|marvelkids)\.com))/(?:(?:embed/|(?:[^/]+/)+[\w-]+-)(?P<id>[a-z0-9]{24})|(?:[^/]+/)?(?P<display_id>[^/?#]+))'''
|
||||
_TESTS = [{
|
||||
# Disney.EmbedVideo
|
||||
'url': 'http://video.disney.com/watch/moana-trailer-545ed1857afee5a0ec239977',
|
||||
@@ -68,6 +68,9 @@ class DisneyIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://disneyjunior.en.disneyme.com/dj/watch-my-friends-tigger-and-pooh-promo',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://disneychannel.de/sehen/soy-luna-folge-118-5518518987ba27f3cc729268',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://disneyjunior.disney.com/galactech-the-galactech-grab-galactech-an-admiral-rescue',
|
||||
'only_matching': True,
|
||||
|
||||
@@ -44,8 +44,23 @@ class DrTuberIE(InfoExtractor):
|
||||
webpage = self._download_webpage(
|
||||
'http://www.drtuber.com/video/%s' % video_id, display_id)
|
||||
|
||||
video_url = self._html_search_regex(
|
||||
r'<source src="([^"]+)"', webpage, 'video URL')
|
||||
video_data = self._download_json(
|
||||
'http://www.drtuber.com/player_config_json/', video_id, query={
|
||||
'vid': video_id,
|
||||
'embed': 0,
|
||||
'aid': 0,
|
||||
'domain_id': 0,
|
||||
})
|
||||
|
||||
formats = []
|
||||
for format_id, video_url in video_data['files'].items():
|
||||
if video_url:
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'quality': 2 if format_id == 'hq' else 1,
|
||||
'url': video_url
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = self._html_search_regex(
|
||||
(r'class="title_watch"[^>]*><(?:p|h\d+)[^>]*>([^<]+)<',
|
||||
@@ -75,7 +90,7 @@ class DrTuberIE(InfoExtractor):
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'url': video_url,
|
||||
'formats': formats,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'like_count': like_count,
|
||||
|
||||
@@ -71,6 +71,10 @@ from .arte import (
|
||||
TheOperaPlatformIE,
|
||||
ArteTVPlaylistIE,
|
||||
)
|
||||
from .asiancrush import (
|
||||
AsianCrushIE,
|
||||
AsianCrushPlaylistIE,
|
||||
)
|
||||
from .atresplayer import AtresPlayerIE
|
||||
from .atttechchannel import ATTTechChannelIE
|
||||
from .atvat import ATVAtIE
|
||||
@@ -871,6 +875,7 @@ from .rutube import (
|
||||
)
|
||||
from .rutv import RUTVIE
|
||||
from .ruutu import RuutuIE
|
||||
from .ruv import RuvIE
|
||||
from .sandia import SandiaIE
|
||||
from .safari import (
|
||||
SafariIE,
|
||||
|
||||
@@ -85,11 +85,11 @@ class FourTubeIE(InfoExtractor):
|
||||
media_id = params[0]
|
||||
sources = ['%s' % p for p in params[2]]
|
||||
|
||||
token_url = 'http://tkn.4tube.com/{0}/desktop/{1}'.format(
|
||||
token_url = 'https://tkn.kodicdn.com/{0}/desktop/{1}'.format(
|
||||
media_id, '+'.join(sources))
|
||||
headers = {
|
||||
b'Content-Type': b'application/x-www-form-urlencoded',
|
||||
b'Origin': b'http://www.4tube.com',
|
||||
b'Origin': b'https://www.4tube.com',
|
||||
}
|
||||
token_req = sanitized_Request(token_url, b'{}', headers)
|
||||
tokens = self._download_json(token_req, video_id)
|
||||
|
||||
@@ -69,19 +69,32 @@ class GoogleDriveIE(InfoExtractor):
|
||||
r'"fmt_stream_map"\s*,\s*"([^"]+)', webpage, 'fmt stream map').split(',')
|
||||
fmt_list = self._search_regex(r'"fmt_list"\s*,\s*"([^"]+)', webpage, 'fmt_list').split(',')
|
||||
|
||||
resolutions = {}
|
||||
for fmt in fmt_list:
|
||||
mobj = re.search(
|
||||
r'^(?P<format_id>\d+)/(?P<width>\d+)[xX](?P<height>\d+)', fmt)
|
||||
if mobj:
|
||||
resolutions[mobj.group('format_id')] = (
|
||||
int(mobj.group('width')), int(mobj.group('height')))
|
||||
|
||||
formats = []
|
||||
for fmt, fmt_stream in zip(fmt_list, fmt_stream_map):
|
||||
fmt_id, fmt_url = fmt_stream.split('|')
|
||||
resolution = fmt.split('/')[1]
|
||||
width, height = resolution.split('x')
|
||||
formats.append({
|
||||
'url': lowercase_escape(fmt_url),
|
||||
'format_id': fmt_id,
|
||||
'resolution': resolution,
|
||||
'width': int_or_none(width),
|
||||
'height': int_or_none(height),
|
||||
'ext': self._FORMATS_EXT[fmt_id],
|
||||
})
|
||||
for fmt_stream in fmt_stream_map:
|
||||
fmt_stream_split = fmt_stream.split('|')
|
||||
if len(fmt_stream_split) < 2:
|
||||
continue
|
||||
format_id, format_url = fmt_stream_split[:2]
|
||||
f = {
|
||||
'url': lowercase_escape(format_url),
|
||||
'format_id': format_id,
|
||||
'ext': self._FORMATS_EXT[format_id],
|
||||
}
|
||||
resolution = resolutions.get(format_id)
|
||||
if resolution:
|
||||
f.update({
|
||||
'width': resolution[0],
|
||||
'height': resolution[0],
|
||||
})
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
|
||||
@@ -7,14 +7,19 @@ from .common import InfoExtractor
|
||||
class HGTVComShowIE(InfoExtractor):
|
||||
IE_NAME = 'hgtv.com:show'
|
||||
_VALID_URL = r'https?://(?:www\.)?hgtv\.com/shows/[^/]+/(?P<id>[^/?#&]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.hgtv.com/shows/flip-or-flop/flip-or-flop-full-episodes-videos',
|
||||
_TESTS = [{
|
||||
# data-module="video"
|
||||
'url': 'http://www.hgtv.com/shows/flip-or-flop/flip-or-flop-full-episodes-season-4-videos',
|
||||
'info_dict': {
|
||||
'id': 'flip-or-flop-full-episodes-videos',
|
||||
'id': 'flip-or-flop-full-episodes-season-4-videos',
|
||||
'title': 'Flip or Flop Full Episodes',
|
||||
},
|
||||
'playlist_mincount': 15,
|
||||
}
|
||||
}, {
|
||||
# data-deferred-module="video"
|
||||
'url': 'http://www.hgtv.com/shows/good-bones/episodes/an-old-victorian-house-gets-a-new-facelift',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
@@ -23,7 +28,7 @@ class HGTVComShowIE(InfoExtractor):
|
||||
|
||||
config = self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?s)data-module=["\']video["\'][^>]*>.*?<script[^>]+type=["\']text/x-config["\'][^>]*>(.+?)</script',
|
||||
r'(?s)data-(?:deferred)?-module=["\']video["\'][^>]*>.*?<script[^>]+type=["\']text/x-config["\'][^>]*>(.+?)</script',
|
||||
webpage, 'video config'),
|
||||
display_id)['channels'][0]
|
||||
|
||||
|
||||
@@ -35,7 +35,7 @@ class NPOIE(NPOBaseIE):
|
||||
https?://
|
||||
(?:www\.)?
|
||||
(?:
|
||||
npo\.nl/(?!live|radio)(?:[^/]+/){2}|
|
||||
npo\.nl/(?!(?:live|radio)/)(?:[^/]+/){2}|
|
||||
ntr\.nl/(?:[^/]+/){2,}|
|
||||
omroepwnl\.nl/video/fragment/[^/]+__|
|
||||
zapp\.nl/[^/]+/[^/]+/
|
||||
@@ -150,6 +150,9 @@ class NPOIE(NPOBaseIE):
|
||||
# live stream
|
||||
'url': 'npo:LI_NL1_4188102',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.npo.nl/radio-gaga/13-06-2017/BNN_101383373',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -19,7 +19,7 @@ class PandoraTVIE(InfoExtractor):
|
||||
IE_NAME = 'pandora.tv'
|
||||
IE_DESC = '판도라TV'
|
||||
_VALID_URL = r'https?://(?:.+?\.)?channel\.pandora\.tv/channel/video\.ptv\?'
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://jp.channel.pandora.tv/channel/video.ptv?c1=&prgid=53294230&ch_userid=mikakim&ref=main&lot=cate_01_2',
|
||||
'info_dict': {
|
||||
'id': '53294230',
|
||||
@@ -34,7 +34,26 @@ class PandoraTVIE(InfoExtractor):
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'http://channel.pandora.tv/channel/video.ptv?ch_userid=gogoucc&prgid=54721744',
|
||||
'info_dict': {
|
||||
'id': '54721744',
|
||||
'ext': 'flv',
|
||||
'title': '[HD] JAPAN COUNTDOWN 170423',
|
||||
'description': '[HD] JAPAN COUNTDOWN 170423',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 1704.9,
|
||||
'upload_date': '20170423',
|
||||
'uploader': 'GOGO_UCC',
|
||||
'uploader_id': 'gogoucc',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
},
|
||||
'params': {
|
||||
# Test metadata only
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
|
||||
@@ -86,7 +105,7 @@ class PandoraTVIE(InfoExtractor):
|
||||
'description': info.get('body'),
|
||||
'thumbnail': info.get('thumbnail') or info.get('poster'),
|
||||
'duration': float_or_none(info.get('runtime'), 1000) or parse_duration(info.get('time')),
|
||||
'upload_date': info['fid'][:8] if isinstance(info.get('fid'), compat_str) else None,
|
||||
'upload_date': info['fid'].split('/')[-1][:8] if isinstance(info.get('fid'), compat_str) else None,
|
||||
'uploader': info.get('nickname'),
|
||||
'uploader_id': info.get('upload_userid'),
|
||||
'view_count': str_to_int(info.get('hit')),
|
||||
|
||||
@@ -65,7 +65,7 @@ class PolskieRadioIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
content = self._search_regex(
|
||||
r'(?s)<div[^>]+class="audio atarticle"[^>]*>(.+?)<script>',
|
||||
r'(?s)<div[^>]+class="\s*this-article\s*"[^>]*>(.+?)<div[^>]+class="tags"[^>]*>',
|
||||
webpage, 'content')
|
||||
|
||||
timestamp = unified_timestamp(self._html_search_regex(
|
||||
|
||||
101
youtube_dl/extractor/ruv.py
Normal file
101
youtube_dl/extractor/ruv.py
Normal file
@@ -0,0 +1,101 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
class RuvIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?ruv\.is/(?:sarpurinn/[^/]+|node)/(?P<id>[^/]+(?:/\d+)?)'
|
||||
_TESTS = [{
|
||||
# m3u8
|
||||
'url': 'http://ruv.is/sarpurinn/ruv-aukaras/fh-valur/20170516',
|
||||
'md5': '66347652f4e13e71936817102acc1724',
|
||||
'info_dict': {
|
||||
'id': '1144499',
|
||||
'display_id': 'fh-valur/20170516',
|
||||
'ext': 'mp4',
|
||||
'title': 'FH - Valur',
|
||||
'description': 'Bein útsending frá 3. leik FH og Vals í úrslitum Olísdeildar karla í handbolta.',
|
||||
'timestamp': 1494963600,
|
||||
'upload_date': '20170516',
|
||||
},
|
||||
}, {
|
||||
# mp3
|
||||
'url': 'http://ruv.is/sarpurinn/ras-2/morgunutvarpid/20170619',
|
||||
'md5': '395ea250c8a13e5fdb39d4670ef85378',
|
||||
'info_dict': {
|
||||
'id': '1153630',
|
||||
'display_id': 'morgunutvarpid/20170619',
|
||||
'ext': 'mp3',
|
||||
'title': 'Morgunútvarpið',
|
||||
'description': 'md5:a4cf1202c0a1645ca096b06525915418',
|
||||
'timestamp': 1497855000,
|
||||
'upload_date': '20170619',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://ruv.is/sarpurinn/ruv/frettir/20170614',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.ruv.is/node/1151854',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://ruv.is/sarpurinn/klippa/secret-soltice-hefst-a-morgun',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://ruv.is/sarpurinn/ras-1/morgunvaktin/20170619',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
|
||||
FIELD_RE = r'video\.%s\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1'
|
||||
|
||||
media_url = self._html_search_regex(
|
||||
FIELD_RE % 'src', webpage, 'video URL', group='url')
|
||||
|
||||
video_id = self._search_regex(
|
||||
r'<link\b[^>]+\bhref=["\']https?://www\.ruv\.is/node/(\d+)',
|
||||
webpage, 'video id', default=display_id)
|
||||
|
||||
ext = determine_ext(media_url)
|
||||
|
||||
if ext == 'm3u8':
|
||||
formats = self._extract_m3u8_formats(
|
||||
media_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls')
|
||||
elif ext == 'mp3':
|
||||
formats = [{
|
||||
'format_id': 'mp3',
|
||||
'url': media_url,
|
||||
'vcodec': 'none',
|
||||
}]
|
||||
else:
|
||||
formats = [{
|
||||
'url': media_url,
|
||||
}]
|
||||
|
||||
description = self._og_search_description(webpage, default=None)
|
||||
thumbnail = self._og_search_thumbnail(
|
||||
webpage, default=None) or self._search_regex(
|
||||
FIELD_RE % 'poster', webpage, 'thumbnail', fatal=False)
|
||||
timestamp = unified_timestamp(self._html_search_meta(
|
||||
'article:published_time', webpage, 'timestamp', fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'timestamp': timestamp,
|
||||
'formats': formats,
|
||||
}
|
||||
@@ -615,7 +615,10 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
if download_url and not source_file.get('is_cold') and not source_file.get('is_defrosting'):
|
||||
source_name = source_file.get('public_name', 'Original')
|
||||
if self._is_valid_url(download_url, video_id, '%s video' % source_name):
|
||||
ext = source_file.get('extension', determine_ext(download_url)).lower()
|
||||
ext = (try_get(
|
||||
source_file, lambda x: x['extension'],
|
||||
compat_str) or determine_ext(
|
||||
download_url, None) or 'mp4').lower()
|
||||
formats.append({
|
||||
'url': download_url,
|
||||
'ext': ext,
|
||||
|
||||
@@ -4,7 +4,10 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..compat import (
|
||||
compat_kwargs,
|
||||
compat_str,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
@@ -36,7 +39,8 @@ class ViuBaseIE(InfoExtractor):
|
||||
headers.update(kwargs.get('headers', {}))
|
||||
kwargs['headers'] = headers
|
||||
response = self._download_json(
|
||||
'https://www.viu.com/api/' + path, *args, **kwargs)['response']
|
||||
'https://www.viu.com/api/' + path, *args,
|
||||
**compat_kwargs(kwargs))['response']
|
||||
if response.get('status') != 'success':
|
||||
raise ExtractorError('%s said: %s' % (
|
||||
self.IE_NAME, response['message']), expected=True)
|
||||
|
||||
@@ -4,11 +4,7 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
unified_strdate,
|
||||
parse_duration,
|
||||
int_or_none,
|
||||
)
|
||||
from ..utils import parse_duration
|
||||
|
||||
|
||||
class WatchIndianPornIE(InfoExtractor):
|
||||
@@ -23,11 +19,8 @@ class WatchIndianPornIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Hot milf from kerala shows off her gorgeous large breasts on camera',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'LoveJay',
|
||||
'upload_date': '20160428',
|
||||
'duration': 226,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'categories': list,
|
||||
'age_limit': 18,
|
||||
}
|
||||
@@ -40,51 +33,36 @@ class WatchIndianPornIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
video_url = self._html_search_regex(
|
||||
r"url: escape\('([^']+)'\)", webpage, 'url')
|
||||
info_dict = self._parse_html5_media_entries(url, webpage, video_id)[0]
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<h2 class="he2"><span>(.*?)</span>',
|
||||
webpage, 'title')
|
||||
thumbnail = self._html_search_regex(
|
||||
r'<span id="container"><img\s+src="([^"]+)"',
|
||||
webpage, 'thumbnail', fatal=False)
|
||||
|
||||
uploader = self._html_search_regex(
|
||||
r'class="aupa">\s*(.*?)</a>',
|
||||
webpage, 'uploader')
|
||||
upload_date = unified_strdate(self._html_search_regex(
|
||||
r'Added: <strong>(.+?)</strong>', webpage, 'upload date', fatal=False))
|
||||
title = self._html_search_regex((
|
||||
r'<title>(.+?)\s*-\s*Indian\s+Porn</title>',
|
||||
r'<h4>(.+?)</h4>'
|
||||
), webpage, 'title')
|
||||
|
||||
duration = parse_duration(self._search_regex(
|
||||
r'<td>Time:\s*</td>\s*<td align="right"><span>\s*(.+?)\s*</span>',
|
||||
r'Time:\s*<strong>\s*(.+?)\s*</strong>',
|
||||
webpage, 'duration', fatal=False))
|
||||
|
||||
view_count = int_or_none(self._search_regex(
|
||||
r'<td>Views:\s*</td>\s*<td align="right"><span>\s*(\d+)\s*</span>',
|
||||
view_count = int(self._search_regex(
|
||||
r'(?s)Time:\s*<strong>.*?</strong>.*?<strong>\s*(\d+)\s*</strong>',
|
||||
webpage, 'view count', fatal=False))
|
||||
comment_count = int_or_none(self._search_regex(
|
||||
r'<td>Comments:\s*</td>\s*<td align="right"><span>\s*(\d+)\s*</span>',
|
||||
webpage, 'comment count', fatal=False))
|
||||
|
||||
categories = re.findall(
|
||||
r'<a href="[^"]+/search/video/desi"><span>([^<]+)</span></a>',
|
||||
r'<a[^>]+class=[\'"]categories[\'"][^>]*>\s*([^<]+)\s*</a>',
|
||||
webpage)
|
||||
|
||||
return {
|
||||
info_dict.update({
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'url': video_url,
|
||||
'http_headers': {
|
||||
'Referer': url,
|
||||
},
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader,
|
||||
'upload_date': upload_date,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'comment_count': comment_count,
|
||||
'categories': categories,
|
||||
'age_limit': 18,
|
||||
}
|
||||
})
|
||||
|
||||
return info_dict
|
||||
|
||||
@@ -30,6 +30,7 @@ class XFileShareIE(InfoExtractor):
|
||||
(r'vidbom\.com', 'VidBom'),
|
||||
(r'vidlo\.us', 'vidlo'),
|
||||
(r'rapidvideo\.(?:cool|org)', 'RapidVideo.TV'),
|
||||
(r'fastvideo\.me', 'FastVideo.me'),
|
||||
)
|
||||
|
||||
IE_DESC = 'XFileShare based sites: %s' % ', '.join(list(zip(*_SITES))[1])
|
||||
@@ -112,6 +113,9 @@ class XFileShareIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://www.rapidvideo.cool/b667kprndr8w',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.fastvideo.me/k8604r8nk8sn/FAST_FURIOUS_8_-_Trailer_italiano_ufficiale.mp4.html',
|
||||
'only_matching': True
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -3,6 +3,7 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
sanitized_Request,
|
||||
@@ -26,7 +27,7 @@ class YouPornIE(InfoExtractor):
|
||||
'description': 'Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'Ask Dan And Jennifer',
|
||||
'upload_date': '20101221',
|
||||
'upload_date': '20101217',
|
||||
'average_rating': int,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
@@ -45,7 +46,7 @@ class YouPornIE(InfoExtractor):
|
||||
'description': 'http://sweetlivegirls.com Big Tits Awesome Brunette On amazing webcam show.mp4',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'Unknown',
|
||||
'upload_date': '20111125',
|
||||
'upload_date': '20110418',
|
||||
'average_rating': int,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
@@ -68,28 +69,46 @@ class YouPornIE(InfoExtractor):
|
||||
webpage = self._download_webpage(request, display_id)
|
||||
|
||||
title = self._search_regex(
|
||||
[r'(?:video_titles|videoTitle)\s*[:=]\s*(["\'])(?P<title>.+?)\1',
|
||||
r'<h1[^>]+class=["\']heading\d?["\'][^>]*>([^<])<'],
|
||||
webpage, 'title', group='title')
|
||||
[r'(?:video_titles|videoTitle)\s*[:=]\s*(["\'])(?P<title>(?:(?!\1).)+)\1',
|
||||
r'<h1[^>]+class=["\']heading\d?["\'][^>]*>(?P<title>[^<]+)<'],
|
||||
webpage, 'title', group='title',
|
||||
default=None) or self._og_search_title(
|
||||
webpage, default=None) or self._html_search_meta(
|
||||
'title', webpage, fatal=True)
|
||||
|
||||
links = []
|
||||
|
||||
# Main source
|
||||
definitions = self._parse_json(
|
||||
self._search_regex(
|
||||
r'mediaDefinition\s*=\s*(\[.+?\]);', webpage,
|
||||
'media definitions', default='[]'),
|
||||
video_id, fatal=False)
|
||||
if definitions:
|
||||
for definition in definitions:
|
||||
if not isinstance(definition, dict):
|
||||
continue
|
||||
video_url = definition.get('videoUrl')
|
||||
if isinstance(video_url, compat_str) and video_url:
|
||||
links.append(video_url)
|
||||
|
||||
# Fallback #1, this also contains extra low quality 180p format
|
||||
for _, link in re.findall(r'<a[^>]+href=(["\'])(http.+?)\1[^>]+title=["\']Download [Vv]ideo', webpage):
|
||||
links.append(link)
|
||||
|
||||
# Fallback #2 (unavailable as at 22.06.2017)
|
||||
sources = self._search_regex(
|
||||
r'(?s)sources\s*:\s*({.+?})', webpage, 'sources', default=None)
|
||||
if sources:
|
||||
for _, link in re.findall(r'[^:]+\s*:\s*(["\'])(http.+?)\1', sources):
|
||||
links.append(link)
|
||||
|
||||
# Fallback #1
|
||||
# Fallback #3 (unavailable as at 22.06.2017)
|
||||
for _, link in re.findall(
|
||||
r'(?:videoUrl|videoSrc|videoIpadUrl|html5PlayerSrc)\s*[:=]\s*(["\'])(http.+?)\1', webpage):
|
||||
r'(?:videoSrc|videoIpadUrl|html5PlayerSrc)\s*[:=]\s*(["\'])(http.+?)\1', webpage):
|
||||
links.append(link)
|
||||
|
||||
# Fallback #2, this also contains extra low quality 180p format
|
||||
for _, link in re.findall(r'<a[^>]+href=(["\'])(http.+?)\1[^>]+title=["\']Download [Vv]ideo', webpage):
|
||||
links.append(link)
|
||||
|
||||
# Fallback #3, encrypted links
|
||||
# Fallback #4, encrypted links (unavailable as at 22.06.2017)
|
||||
for _, encrypted_link in re.findall(
|
||||
r'encryptedQuality\d{3,4}URL\s*=\s*(["\'])([\da-zA-Z+/=]+)\1', webpage):
|
||||
links.append(aes_decrypt_text(encrypted_link, title, 32).decode('utf-8'))
|
||||
@@ -124,7 +143,8 @@ class YouPornIE(InfoExtractor):
|
||||
r'(?s)<div[^>]+class=["\']submitByLink["\'][^>]*>(.+?)</div>',
|
||||
webpage, 'uploader', fatal=False)
|
||||
upload_date = unified_strdate(self._html_search_regex(
|
||||
r'(?s)<div[^>]+class=["\']videoInfo(?:Date|Time)["\'][^>]*>(.+?)</div>',
|
||||
[r'Date\s+[Aa]dded:\s*<span>([^<]+)',
|
||||
r'(?s)<div[^>]+class=["\']videoInfo(?:Date|Time)["\'][^>]*>(.+?)</div>'],
|
||||
webpage, 'upload date', fatal=False))
|
||||
|
||||
age_limit = self._rta_search(webpage)
|
||||
|
||||
@@ -1269,37 +1269,57 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
sub_lang_list[sub_lang] = sub_formats
|
||||
return sub_lang_list
|
||||
|
||||
def make_captions(sub_url, sub_langs):
|
||||
parsed_sub_url = compat_urllib_parse_urlparse(sub_url)
|
||||
caption_qs = compat_parse_qs(parsed_sub_url.query)
|
||||
captions = {}
|
||||
for sub_lang in sub_langs:
|
||||
sub_formats = []
|
||||
for ext in self._SUBTITLE_FORMATS:
|
||||
caption_qs.update({
|
||||
'tlang': [sub_lang],
|
||||
'fmt': [ext],
|
||||
})
|
||||
sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(
|
||||
query=compat_urllib_parse_urlencode(caption_qs, True)))
|
||||
sub_formats.append({
|
||||
'url': sub_url,
|
||||
'ext': ext,
|
||||
})
|
||||
captions[sub_lang] = sub_formats
|
||||
return captions
|
||||
|
||||
# New captions format as of 22.06.2017
|
||||
player_response = args.get('player_response')
|
||||
if player_response and isinstance(player_response, compat_str):
|
||||
player_response = self._parse_json(
|
||||
player_response, video_id, fatal=False)
|
||||
if player_response:
|
||||
renderer = player_response['captions']['playerCaptionsTracklistRenderer']
|
||||
base_url = renderer['captionTracks'][0]['baseUrl']
|
||||
sub_lang_list = []
|
||||
for lang in renderer['translationLanguages']:
|
||||
lang_code = lang.get('languageCode')
|
||||
if lang_code:
|
||||
sub_lang_list.append(lang_code)
|
||||
return make_captions(base_url, sub_lang_list)
|
||||
|
||||
# Some videos don't provide ttsurl but rather caption_tracks and
|
||||
# caption_translation_languages (e.g. 20LmZk1hakA)
|
||||
# Does not used anymore as of 22.06.2017
|
||||
caption_tracks = args['caption_tracks']
|
||||
caption_translation_languages = args['caption_translation_languages']
|
||||
caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
|
||||
parsed_caption_url = compat_urllib_parse_urlparse(caption_url)
|
||||
caption_qs = compat_parse_qs(parsed_caption_url.query)
|
||||
|
||||
sub_lang_list = {}
|
||||
sub_lang_list = []
|
||||
for lang in caption_translation_languages.split(','):
|
||||
lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
|
||||
sub_lang = lang_qs.get('lc', [None])[0]
|
||||
if not sub_lang:
|
||||
continue
|
||||
sub_formats = []
|
||||
for ext in self._SUBTITLE_FORMATS:
|
||||
caption_qs.update({
|
||||
'tlang': [sub_lang],
|
||||
'fmt': [ext],
|
||||
})
|
||||
sub_url = compat_urlparse.urlunparse(parsed_caption_url._replace(
|
||||
query=compat_urllib_parse_urlencode(caption_qs, True)))
|
||||
sub_formats.append({
|
||||
'url': sub_url,
|
||||
'ext': ext,
|
||||
})
|
||||
sub_lang_list[sub_lang] = sub_formats
|
||||
return sub_lang_list
|
||||
if sub_lang:
|
||||
sub_lang_list.append(sub_lang)
|
||||
return make_captions(caption_url, sub_lang_list)
|
||||
# An extractor error can be raise by the download process if there are
|
||||
# no automatic captions but there are subtitles
|
||||
except (KeyError, ExtractorError):
|
||||
except (KeyError, IndexError, ExtractorError):
|
||||
self._downloader.report_warning(err_msg)
|
||||
return {}
|
||||
|
||||
|
||||
@@ -4,7 +4,10 @@ import subprocess
|
||||
|
||||
from .common import PostProcessor
|
||||
from ..compat import compat_shlex_quote
|
||||
from ..utils import PostProcessingError
|
||||
from ..utils import (
|
||||
encodeArgument,
|
||||
PostProcessingError,
|
||||
)
|
||||
|
||||
|
||||
class ExecAfterDownloadPP(PostProcessor):
|
||||
@@ -20,7 +23,7 @@ class ExecAfterDownloadPP(PostProcessor):
|
||||
cmd = cmd.replace('{}', compat_shlex_quote(information['filepath']))
|
||||
|
||||
self._downloader.to_screen('[exec] Executing command: %s' % cmd)
|
||||
retCode = subprocess.call(cmd, shell=True)
|
||||
retCode = subprocess.call(encodeArgument(cmd), shell=True)
|
||||
if retCode != 0:
|
||||
raise PostProcessingError(
|
||||
'Command returned error code %d' % retCode)
|
||||
|
||||
@@ -35,11 +35,14 @@ class MetadataFromTitlePP(PostProcessor):
|
||||
title = info['title']
|
||||
match = re.match(self._titleregex, title)
|
||||
if match is None:
|
||||
self._downloader.to_screen('[fromtitle] Could not interpret title of video as "%s"' % self._titleformat)
|
||||
self._downloader.to_screen(
|
||||
'[fromtitle] Could not interpret title of video as "%s"'
|
||||
% self._titleformat)
|
||||
return [], info
|
||||
for attribute, value in match.groupdict().items():
|
||||
value = match.group(attribute)
|
||||
info[attribute] = value
|
||||
self._downloader.to_screen('[fromtitle] parsed ' + attribute + ': ' + value)
|
||||
self._downloader.to_screen(
|
||||
'[fromtitle] parsed %s: %s'
|
||||
% (attribute, value if value is not None else 'NA'))
|
||||
|
||||
return [], info
|
||||
|
||||
@@ -22,7 +22,6 @@ import locale
|
||||
import math
|
||||
import operator
|
||||
import os
|
||||
import pipes
|
||||
import platform
|
||||
import random
|
||||
import re
|
||||
@@ -1535,7 +1534,7 @@ def shell_quote(args):
|
||||
if isinstance(a, bytes):
|
||||
# We may get a filename encoded with 'encodeFilename'
|
||||
a = a.decode(encoding)
|
||||
quoted_args.append(pipes.quote(a))
|
||||
quoted_args.append(compat_shlex_quote(a))
|
||||
return ' '.join(quoted_args)
|
||||
|
||||
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = '2017.06.12'
|
||||
__version__ = '2017.06.23'
|
||||
|
||||
Reference in New Issue
Block a user