Compare commits

..

31 Commits

Author SHA1 Message Date
Philipp Hagemeister
4dd09c9add release 2015.07.07 2015-07-07 10:36:07 +02:00
Yen Chi Hsuan
267dc07e6b [gfycat] Catch errors 2015-07-07 14:22:13 +08:00
Yen Chi Hsuan
d7b4d5dd50 [gfycat] Extract id correctly (fixes #6165) 2015-07-07 14:16:56 +08:00
Sergey M․
7f220b2fac [vk] Catch ownership confirmation request 2015-07-07 00:04:19 +06:00
Sergey M․
275c0423aa [vk] Fix extraction (Closes #6153) 2015-07-07 00:02:34 +06:00
Yen Chi Hsuan
d3ee4bbc5a Merge branch 'ping-qqmusic-format-fix' 2015-07-06 17:55:45 +08:00
Yen Chi Hsuan
85a064861f [qqmusic] Use regex for thumbnails in test cases 2015-07-06 17:54:41 +08:00
Yen Chi Hsuan
d0b436bff2 Merge branch 'qqmusic-format-fix' of https://github.com/ping/youtube-dl into ping-qqmusic-format-fix 2015-07-06 17:24:44 +08:00
Yen Chi Hsuan
92b2f18072 Merge branch 'ping-qqmusic-album-fix' 2015-07-06 17:09:56 +08:00
Yen Chi Hsuan
dfc4eca21f [qqmusic:album] Playlist names are optional 2015-07-06 17:09:17 +08:00
Yen Chi Hsuan
fc7ae675e2 [qqmusic:album] Strip description 2015-07-06 17:08:32 +08:00
Yen Chi Hsuan
804ad79985 Merge branch 'qqmusic-album-fix' of https://github.com/ping/youtube-dl into ping-qqmusic-album-fix 2015-07-06 17:01:59 +08:00
Yen Chi Hsuan
da839880e9 Merge branch 'ping-qqmusic-playlist' 2015-07-06 16:20:46 +08:00
Yen Chi Hsuan
e9d33454b5 [qqmusic:playlist] Playlist names are optional 2015-07-06 16:19:49 +08:00
Yen Chi Hsuan
d80891efc4 Merge branch 'qqmusic-playlist' of https://github.com/ping/youtube-dl into ping-qqmusic-playlist 2015-07-06 16:08:30 +08:00
Yen Chi Hsuan
59a83d3e5b [spiegeltv] Skip invalid m3u8 manifests (closes #6157) 2015-07-06 08:41:14 +08:00
Yen Chi Hsuan
13af92fdc4 [common] Add 'fatal' to _extract_m3u8_formats 2015-07-06 08:39:38 +08:00
Sergey M․
0c20ee7d4b [rtlnl] Clarify current adaptive -> flash workaround rationale 2015-07-06 04:16:56 +06:00
Sergey M․
89d42c2c75 [rtlnl] Clarify test 2015-07-06 02:58:02 +06:00
Sergey M․
04611765a4 Merge branch 'corone17-patch-1' 2015-07-05 19:07:51 +06:00
Sergey M․
9dfc4fa1a1 [rtlnl] Add test with encrypted m3u8 streams for reference 2015-07-05 19:07:07 +06:00
Sergey M․
43232d5c14 [rtlnl] Improve 2015-07-05 19:01:07 +06:00
Sergey M․
f7c272d4fa Merge branch 'patch-1' of https://github.com/corone17/youtube-dl into corone17-patch-1 2015-07-05 18:07:39 +06:00
Sergey M․
ede21449c8 [crunchyroll] Fix extraction (Closes #5855, closes #5881) 2015-07-05 06:29:36 +06:00
Sergey M․
d7c9a3e976 Credit @remitamine for snagfilms (#6096) 2015-07-04 17:22:11 +06:00
corone17
bea41c7f3f Update rtlnl.py
Better to extract 'http://manifest.us.rtl.nl' from the json, I'd say. And I think it's better to use the default json-url to make it more futureproof.
Succesfully tested with tarball.
2015-06-29 00:59:18 +02:00
ping
4d58b24c15 [qqmusic] Use _check_formats instead 2015-06-18 23:09:04 +08:00
ping
0392ac98d2 [qqmusic] Fix code formatting 2015-06-18 21:13:03 +08:00
ping
5e3915cbe3 [qqmusic] Fix song extraction when certain formats are unavailable 2015-06-18 21:06:25 +08:00
ping
29b809de68 [qqmusic] Fix album extraction 2015-06-18 15:52:04 +08:00
ping
0d0d5d3717 [qqmusic] Add support for playlists 2015-06-18 13:59:37 +08:00
11 changed files with 163 additions and 36 deletions

View File

@@ -129,3 +129,4 @@ Mister Hat
Peter Ding
jackyzy823
George Brighton
Remita Amine

View File

@@ -384,6 +384,7 @@
- **Pyvideo**
- **qqmusic**
- **qqmusic:album**
- **qqmusic:playlist**
- **qqmusic:singer**
- **qqmusic:toplist**
- **QuickVid**

View File

@@ -432,6 +432,7 @@ from .qqmusic import (
QQMusicSingerIE,
QQMusicAlbumIE,
QQMusicToplistIE,
QQMusicPlaylistIE,
)
from .quickvid import QuickVidIE
from .r7 import R7IE

View File

@@ -846,7 +846,8 @@ class InfoExtractor(object):
def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None,
entry_protocol='m3u8', preference=None,
m3u8_id=None, note=None, errnote=None):
m3u8_id=None, note=None, errnote=None,
fatal=True):
formats = [{
'format_id': '-'.join(filter(None, [m3u8_id, 'meta'])),
@@ -866,7 +867,10 @@ class InfoExtractor(object):
m3u8_doc = self._download_webpage(
m3u8_url, video_id,
note=note or 'Downloading m3u8 information',
errnote=errnote or 'Failed to download m3u8 information')
errnote=errnote or 'Failed to download m3u8 information',
fatal=fatal)
if m3u8_doc is False:
return m3u8_doc
last_info = None
last_media = None
kv_rex = re.compile(

View File

@@ -27,7 +27,7 @@ from ..aes import (
class CrunchyrollIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:[^/]*/[^/?&]*?|media/\?id=)(?P<video_id>[0-9]+))(?:[/?&]|$)'
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:media(?:-|/\?id=)|[^/]*/[^/?&]*?)(?P<video_id>[0-9]+))(?:[/?&]|$)'
_NETRC_MACHINE = 'crunchyroll'
_TESTS = [{
'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
@@ -45,6 +45,22 @@ class CrunchyrollIE(InfoExtractor):
# rtmp
'skip_download': True,
},
}, {
'url': 'http://www.crunchyroll.com/media-589804/culture-japan-1',
'info_dict': {
'id': '589804',
'ext': 'flv',
'title': 'Culture Japan Episode 1 Rebuilding Japan after the 3.11',
'description': 'md5:fe2743efedb49d279552926d0bd0cd9e',
'thumbnail': 're:^https?://.*\.jpg$',
'uploader': 'Danny Choo Network',
'upload_date': '20120213',
},
'params': {
# rtmp
'skip_download': True,
},
}, {
'url': 'http://www.crunchyroll.fr/girl-friend-beta/episode-11-goodbye-la-mode-661697',
'only_matching': True,
@@ -251,16 +267,17 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
for fmt in re.findall(r'showmedia\.([0-9]{3,4})p', webpage):
stream_quality, stream_format = self._FORMAT_IDS[fmt]
video_format = fmt + 'p'
streamdata_req = compat_urllib_request.Request('http://www.crunchyroll.com/xml/')
# urlencode doesn't work!
streamdata_req.data = 'req=RpcApiVideoEncode%5FGetStreamInfo&video%5Fencode%5Fquality=' + stream_quality + '&media%5Fid=' + stream_id + '&video%5Fformat=' + stream_format
streamdata_req = compat_urllib_request.Request(
'http://www.crunchyroll.com/xml/?req=RpcApiVideoPlayer_GetStandardConfig&media_id=%s&video_format=%s&video_quality=%s'
% (stream_id, stream_format, stream_quality),
compat_urllib_parse.urlencode({'current_page': url}).encode('utf-8'))
streamdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
streamdata_req.add_header('Content-Length', str(len(streamdata_req.data)))
streamdata = self._download_xml(
streamdata_req, video_id,
note='Downloading media info for %s' % video_format)
video_url = streamdata.find('./host').text
video_play_path = streamdata.find('./file').text
stream_info = streamdata.find('./{default}preload/stream_info')
video_url = stream_info.find('./host').text
video_play_path = stream_info.find('./file').text
formats.append({
'url': video_url,
'play_path': video_play_path,

View File

@@ -6,12 +6,13 @@ from ..utils import (
int_or_none,
float_or_none,
qualities,
ExtractorError,
)
class GfycatIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?gfycat\.com/(?P<id>[^/?#]+)'
_TEST = {
_VALID_URL = r'https?://(?:www\.)?gfycat\.com/(?:ifr/)?(?P<id>[^/?#]+)'
_TESTS = [{
'url': 'http://gfycat.com/DeadlyDecisiveGermanpinscher',
'info_dict': {
'id': 'DeadlyDecisiveGermanpinscher',
@@ -27,14 +28,33 @@ class GfycatIE(InfoExtractor):
'categories': list,
'age_limit': 0,
}
}
}, {
'url': 'http://gfycat.com/ifr/JauntyTimelyAmazontreeboa',
'info_dict': {
'id': 'JauntyTimelyAmazontreeboa',
'ext': 'mp4',
'title': 'JauntyTimelyAmazontreeboa',
'timestamp': 1411720126,
'upload_date': '20140926',
'uploader': 'anonymous',
'duration': 3.52,
'view_count': int,
'like_count': int,
'dislike_count': int,
'categories': list,
'age_limit': 0,
}
}]
def _real_extract(self, url):
video_id = self._match_id(url)
gfy = self._download_json(
'http://gfycat.com/cajax/get/%s' % video_id,
video_id, 'Downloading video info')['gfyItem']
video_id, 'Downloading video info')
if 'error' in gfy:
raise ExtractorError('Gfycat said: ' + gfy['error'], expected=True)
gfy = gfy['gfyItem']
title = gfy.get('title') or gfy['gfyName']
description = gfy.get('description')

View File

@@ -9,6 +9,7 @@ from .common import InfoExtractor
from ..utils import (
strip_jsonp,
unescapeHTML,
clean_html,
)
from ..compat import compat_urllib_request
@@ -26,6 +27,20 @@ class QQMusicIE(InfoExtractor):
'upload_date': '20141227',
'creator': '林俊杰',
'description': 'md5:d327722d0361576fde558f1ac68a7065',
'thumbnail': 're:^https?://.*\.jpg$',
}
}, {
'note': 'There is no mp3-320 version of this song.',
'url': 'http://y.qq.com/#type=song&mid=004MsGEo3DdNxV',
'md5': 'fa3926f0c585cda0af8fa4f796482e3e',
'info_dict': {
'id': '004MsGEo3DdNxV',
'ext': 'mp3',
'title': '如果',
'upload_date': '20050626',
'creator': '李季美',
'description': 'md5:46857d5ed62bc4ba84607a805dccf437',
'thumbnail': 're:^https?://.*\.jpg$',
}
}]
@@ -68,6 +83,14 @@ class QQMusicIE(InfoExtractor):
if lrc_content:
lrc_content = lrc_content.replace('\\n', '\n')
thumbnail_url = None
albummid = self._search_regex(
[r'albummid:\'([0-9a-zA-Z]+)\'', r'"albummid":"([0-9a-zA-Z]+)"'],
detail_info_page, 'album mid', default=None)
if albummid:
thumbnail_url = "http://i.gtimg.cn/music/photo/mid_album_500/%s/%s/%s.jpg" \
% (albummid[-2:-1], albummid[-1], albummid)
guid = self.m_r_get_ruin()
vkey = self._download_json(
@@ -85,6 +108,7 @@ class QQMusicIE(InfoExtractor):
'preference': details['preference'],
'abr': details.get('abr'),
})
self._check_formats(formats, mid)
self._sort_formats(formats)
return {
@@ -94,6 +118,7 @@ class QQMusicIE(InfoExtractor):
'upload_date': publish_time,
'creator': singer,
'description': lrc_content,
'thumbnail': thumbnail_url,
}
@@ -163,31 +188,40 @@ class QQMusicAlbumIE(QQPlaylistBaseIE):
IE_NAME = 'qqmusic:album'
_VALID_URL = r'http://y.qq.com/#type=album&mid=(?P<id>[0-9A-Za-z]+)'
_TEST = {
'url': 'http://y.qq.com/#type=album&mid=000gXCTb2AhRR1&play=0',
_TESTS = [{
'url': 'http://y.qq.com/#type=album&mid=000gXCTb2AhRR1',
'info_dict': {
'id': '000gXCTb2AhRR1',
'title': '我们都是这样长大的',
'description': 'md5:d216c55a2d4b3537fe4415b8767d74d6',
'description': 'md5:179c5dce203a5931970d306aa9607ea6',
},
'playlist_count': 4,
}
}, {
'url': 'http://y.qq.com/#type=album&mid=002Y5a3b3AlCu3',
'info_dict': {
'id': '002Y5a3b3AlCu3',
'title': '그리고...',
'description': 'md5:a48823755615508a95080e81b51ba729',
},
'playlist_count': 8,
}]
def _real_extract(self, url):
mid = self._match_id(url)
album_page = self._download_webpage(
self.qq_static_url('album', mid), mid, 'Download album page')
album = self._download_json(
'http://i.y.qq.com/v8/fcg-bin/fcg_v8_album_info_cp.fcg?albummid=%s&format=json' % mid,
mid, 'Download album page')['data']
entries = self.get_entries_from_page(album_page)
album_name = self._html_search_regex(
r"albumname\s*:\s*'([^']+)',", album_page, 'album name',
default=None)
album_detail = self._html_search_regex(
r'<div class="album_detail close_detail">\s*<p>((?:[^<>]+(?:<br />)?)+)</p>',
album_page, 'album details', default=None)
entries = [
self.url_result(
'http://y.qq.com/#type=song&mid=' + song['songmid'], 'QQMusic', song['songmid']
) for song in album['list']
]
album_name = album.get('name')
album_detail = album.get('desc')
if album_detail is not None:
album_detail = album_detail.strip()
return self.playlist_result(entries, mid, album_name, album_detail)
@@ -243,3 +277,36 @@ class QQMusicToplistIE(QQPlaylistBaseIE):
list_name = topinfo.get('ListName')
list_description = topinfo.get('info')
return self.playlist_result(entries, list_id, list_name, list_description)
class QQMusicPlaylistIE(QQPlaylistBaseIE):
IE_NAME = 'qqmusic:playlist'
_VALID_URL = r'http://y\.qq\.com/#type=taoge&id=(?P<id>[0-9]+)'
_TEST = {
'url': 'http://y.qq.com/#type=taoge&id=3462654915',
'info_dict': {
'id': '3462654915',
'title': '韩国5月新歌精选下旬',
'description': 'md5:d2c9d758a96b9888cf4fe82f603121d4',
},
'playlist_count': 40,
}
def _real_extract(self, url):
list_id = self._match_id(url)
list_json = self._download_json(
'http://i.y.qq.com/qzone-music/fcg-bin/fcg_ucc_getcdinfo_byids_cp.fcg?type=1&json=1&utf8=1&onlysong=0&disstid=%s'
% list_id, list_id, 'Download list page',
transform_source=strip_jsonp)['cdlist'][0]
entries = [
self.url_result(
'http://y.qq.com/#type=song&mid=' + song['songmid'], 'QQMusic', song['songmid']
) for song in list_json['songlist']
]
list_name = list_json.get('dissname')
list_description = clean_html(unescapeHTML(list_json.get('desc')))
return self.playlist_result(entries, list_id, list_name, list_description)

View File

@@ -43,6 +43,10 @@ class RtlNlIE(InfoExtractor):
'upload_date': '20150215',
'description': 'Er zijn nieuwe beelden vrijgegeven die vlak na de aanslag in Kopenhagen zijn gemaakt. Op de video is goed te zien hoe omstanders zich bekommeren om één van de slachtoffers, terwijl de eerste agenten ter plaatse komen.',
}
}, {
# encrypted m3u8 streams, georestricted
'url': 'http://www.rtlxl.nl/#!/afl-2-257632/52a74543-c504-4cde-8aa8-ec66fe8d68a7',
'only_matching': True,
}, {
'url': 'http://www.rtl.nl/system/videoplayer/derden/embed.html#!/uuid=bb0353b0-d6a4-1dad-90e9-18fe75b8d1f0',
'only_matching': True,
@@ -51,7 +55,7 @@ class RtlNlIE(InfoExtractor):
def _real_extract(self, url):
uuid = self._match_id(url)
info = self._download_json(
'http://www.rtl.nl/system/s4m/vfd/version=2/uuid=%s/fmt=flash/' % uuid,
'http://www.rtl.nl/system/s4m/vfd/version=2/uuid=%s/fmt=adaptive/' % uuid,
uuid)
material = info['material'][0]
@@ -59,9 +63,14 @@ class RtlNlIE(InfoExtractor):
subtitle = material['title'] or info['episodes'][0]['name']
description = material.get('synopsis') or info['episodes'][0]['synopsis']
meta = info.get('meta', {})
# Use unencrypted m3u8 streams (See https://github.com/rg3/youtube-dl/issues/4118)
videopath = material['videopath'].replace('.f4m', '.m3u8')
m3u8_url = 'http://manifest.us.rtl.nl' + videopath
# NB: nowadays, recent ffmpeg and avconv can handle these encrypted streams, so
# this adaptive -> flash workaround is not required in general, but it also
# allows bypassing georestriction therefore is retained for now.
videopath = material['videopath'].replace('/adaptive/', '/flash/')
m3u8_url = meta.get('videohost', 'http://manifest.us.rtl.nl') + videopath
formats = self._extract_m3u8_formats(m3u8_url, uuid, ext='mp4')
@@ -82,7 +91,7 @@ class RtlNlIE(InfoExtractor):
self._sort_formats(formats)
thumbnails = []
meta = info.get('meta', {})
for p in ('poster_base_url', '"thumb_base_url"'):
if not meta.get(p):
continue

View File

@@ -77,11 +77,13 @@ class SpiegeltvIE(InfoExtractor):
'rtmp_live': True,
})
elif determine_ext(endpoint) == 'm3u8':
formats.extend(self._extract_m3u8_formats(
m3u8_formats = self._extract_m3u8_formats(
endpoint.replace('[video]', play_path),
video_id, 'm4v',
preference=1, # Prefer hls since it allows to workaround georestriction
m3u8_id='hls'))
m3u8_id='hls', fatal=False)
if m3u8_formats is not False:
formats.extend(m3u8_formats)
else:
formats.append({
'url': endpoint,

View File

@@ -153,9 +153,14 @@ class VKIE(InfoExtractor):
if not video_id:
video_id = '%s_%s' % (mobj.group('oid'), mobj.group('id'))
info_url = 'http://vk.com/al_video.php?act=show&al=1&module=video&video=%s' % video_id
info_url = 'https://vk.com/al_video.php?act=show&al=1&module=video&video=%s' % video_id
info_page = self._download_webpage(info_url, video_id)
if re.search(r'<!>/login\.php\?.*\bact=security_check', info_page):
raise ExtractorError(
'You are trying to log in from an unusual location. You should confirm ownership at vk.com to log in with this IP.',
expected=True)
ERRORS = {
r'>Видеозапись .*? была изъята из публичного доступа в связи с обращением правообладателя.<':
'Video %s has been removed from public access due to rightholder complaint.',

View File

@@ -1,3 +1,3 @@
from __future__ import unicode_literals
__version__ = '2015.07.04'
__version__ = '2015.07.07'