Compare commits

..

9 Commits

Author SHA1 Message Date
Philipp Hagemeister
16e6f396b4 release 2014.09.04.3 2014-09-04 16:20:17 +02:00
Sergey M․
c6ec6b2e8b [arte.tv:+7] Add one another one pattern for json vp url 2014-09-04 20:44:51 +07:00
Sergey M․
7bbc6428b6 [nba] Modernize 2014-09-04 20:06:14 +07:00
Sergey M․
c1a3c9ddb2 [techtalks] Modernize 2014-09-04 19:48:29 +07:00
Sergey M․
feec0f56f5 [toypics:user] Update test playlist count 2014-09-04 19:37:40 +07:00
Sergey M․
8029857d27 [washingtonpost] Add playlist title to test 2014-09-04 19:34:40 +07:00
Philipp Hagemeister
aa61802c1e release 2014.09.04.2 2014-09-04 06:40:44 +02:00
Philipp Hagemeister
f54aee0209 [cliphunter] Add support for more formats 2014-09-04 06:40:15 +02:00
Philipp Hagemeister
5df921b0e3 [test_cache] Add a dot in the file name 2014-09-04 04:51:52 +02:00
8 changed files with 123 additions and 52 deletions

View File

@@ -43,16 +43,16 @@ class TestCache(unittest.TestCase):
})
c = Cache(ydl)
obj = {'x': 1, 'y': ['ä', '\\a', True]}
self.assertEqual(c.load('test_cache', 'k'), None)
c.store('test_cache', 'k', obj)
self.assertEqual(c.load('test_cache', 'k.'), None)
c.store('test_cache', 'k.', obj)
self.assertEqual(c.load('test_cache', 'k2'), None)
self.assertFalse(_is_empty(self.test_dir))
self.assertEqual(c.load('test_cache', 'k'), obj)
self.assertEqual(c.load('test_cache', 'k.'), obj)
self.assertEqual(c.load('test_cache', 'y'), None)
self.assertEqual(c.load('test_cache2', 'k'), None)
self.assertEqual(c.load('test_cache2', 'k.'), None)
c.remove()
self.assertFalse(os.path.exists(self.test_dir))
self.assertEqual(c.load('test_cache', 'k'), None)
self.assertEqual(c.load('test_cache', 'k.'), None)
if __name__ == '__main__':

View File

@@ -78,7 +78,8 @@ class ArteTVPlus7IE(InfoExtractor):
def _extract_from_webpage(self, webpage, video_id, lang):
json_url = self._html_search_regex(
r'arte_vp_url="(.*?)"', webpage, 'json vp url')
[r'arte_vp_url="(.*?)"', r'data-url="([^"]+)"'],
webpage, 'json vp url')
return self._extract_from_json_url(json_url, video_id, lang)
def _extract_from_json_url(self, json_url, video_id, lang):

View File

@@ -1,11 +1,13 @@
from __future__ import unicode_literals
import json
import re
from .common import InfoExtractor
from ..utils import int_or_none
translation_table = {
_translation_table = {
'a': 'h', 'd': 'e', 'e': 'v', 'f': 'o', 'g': 'f', 'i': 'd', 'l': 'n',
'm': 'a', 'n': 'm', 'p': 'u', 'q': 't', 'r': 's', 'v': 'p', 'x': 'r',
'y': 'l', 'z': 'i',
@@ -13,6 +15,10 @@ translation_table = {
}
def _decode(s):
return ''.join(_translation_table.get(c, c) for c in s)
class CliphunterIE(InfoExtractor):
IE_NAME = 'cliphunter'
@@ -22,10 +28,14 @@ class CliphunterIE(InfoExtractor):
'''
_TEST = {
'url': 'http://www.cliphunter.com/w/1012420/Fun_Jynx_Maze_solo',
'file': '1012420.flv',
'md5': '15e7740f30428abf70f4223478dc1225',
'md5': 'a2ba71eebf523859fe527a61018f723e',
'info_dict': {
'id': '1012420',
'ext': 'mp4',
'title': 'Fun Jynx Maze solo',
'thumbnail': 're:^https?://.*\.jpg$',
'age_limit': 18,
'duration': 1317,
}
}
@@ -35,22 +45,55 @@ class CliphunterIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
video_title = self._search_regex(
r'mediaTitle = "([^"]+)"', webpage, 'title')
pl_fiji = self._search_regex(
r'pl_fiji = \'([^\']+)\'', webpage, 'video data')
pl_c_qual = self._search_regex(
r'pl_c_qual = "(.)"', webpage, 'video quality')
video_title = self._search_regex(
r'mediaTitle = "([^"]+)"', webpage, 'title')
video_url = ''.join(translation_table.get(c, c) for c in pl_fiji)
video_url = _decode(pl_fiji)
formats = [{
'url': video_url,
'format_id': pl_c_qual,
'format_id': 'default-%s' % pl_c_qual,
}]
qualities_json = self._search_regex(
r'var pl_qualities\s*=\s*(.*?);\n', webpage, 'quality info')
qualities_data = json.loads(qualities_json)
for i, t in enumerate(
re.findall(r"pl_fiji_([a-z0-9]+)\s*=\s*'([^']+')", webpage)):
quality_id, crypted_url = t
video_url = _decode(crypted_url)
f = {
'format_id': quality_id,
'url': video_url,
'quality': i,
}
if quality_id in qualities_data:
qd = qualities_data[quality_id]
m = re.match(
r'''(?x)<b>(?P<width>[0-9]+)x(?P<height>[0-9]+)<\\/b>
\s*\(\s*(?P<tbr>[0-9]+)\s*kb\\/s''', qd)
if m:
f['width'] = int(m.group('width'))
f['height'] = int(m.group('height'))
f['tbr'] = int(m.group('tbr'))
formats.append(f)
self._sort_formats(formats)
thumbnail = self._search_regex(
r"var\s+mov_thumb\s*=\s*'([^']+)';",
webpage, 'thumbnail', fatal=False)
duration = int_or_none(self._search_regex(
r'pl_dur\s*=\s*([0-9]+)', webpage, 'duration', fatal=False))
return {
'id': video_id,
'title': video_title,
'formats': formats,
'duration': duration,
'age_limit': self._rta_search(webpage),
'thumbnail': thumbnail,
}

View File

@@ -3,18 +3,23 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
remove_end,
parse_duration,
)
class NBAIE(InfoExtractor):
_VALID_URL = r'https?://(?:watch\.|www\.)?nba\.com/(?:nba/)?video(?P<id>/[^?]*?)(?:/index\.html)?(?:\?.*)?$'
_TEST = {
'url': 'http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html',
'md5': u'c0edcfc37607344e2ff8f13c378c88a4',
'md5': 'c0edcfc37607344e2ff8f13c378c88a4',
'info_dict': {
'id': '0021200253-okc-bkn-recap.nba',
'ext': 'mp4',
'description': 'Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.',
'title': 'Thunder vs. Nets',
'description': 'Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.',
'duration': 181,
},
}
@@ -27,13 +32,18 @@ class NBAIE(InfoExtractor):
video_url = 'http://ht-mobile.cdn.turner.com/nba/big' + video_id + '_nba_1280x720.mp4'
shortened_video_id = video_id.rpartition('/')[2]
title = self._og_search_title(webpage, default=shortened_video_id).replace('NBA.com: ', '')
title = remove_end(
self._og_search_title(webpage, default=shortened_video_id), ' : NBA.com')
description = self._og_search_description(webpage)
duration = parse_duration(
self._html_search_meta('duration', webpage, 'duration', fatal=False))
description = self._html_search_regex(r'<meta name="description" (?:content|value)="(.*?)" />', webpage, 'description', fatal=False)
return {
'id': shortened_video_id,
'url': video_url,
'title': title,
'description': description,
'duration': duration,
}

View File

@@ -1,3 +1,5 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
@@ -11,24 +13,30 @@ class TechTalksIE(InfoExtractor):
_VALID_URL = r'https?://techtalks\.tv/talks/[^/]*/(?P<id>\d+)/'
_TEST = {
u'url': u'http://techtalks.tv/talks/learning-topic-models-going-beyond-svd/57758/',
u'playlist': [
'url': 'http://techtalks.tv/talks/learning-topic-models-going-beyond-svd/57758/',
'info_dict': {
'id': '57758',
'title': 'Learning Topic Models --- Going beyond SVD',
},
'playlist': [
{
u'file': u'57758.flv',
u'info_dict': {
u'title': u'Learning Topic Models --- Going beyond SVD',
'info_dict': {
'id': '57758',
'ext': 'flv',
'title': 'Learning Topic Models --- Going beyond SVD',
},
},
{
u'file': u'57758-slides.flv',
u'info_dict': {
u'title': u'Learning Topic Models --- Going beyond SVD',
'info_dict': {
'id': '57758-slides',
'ext': 'flv',
'title': 'Learning Topic Models --- Going beyond SVD',
},
},
],
u'params': {
'params': {
# rtmp download
u'skip_download': True,
'skip_download': True,
},
}
@@ -36,30 +44,36 @@ class TechTalksIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url)
talk_id = mobj.group('id')
webpage = self._download_webpage(url, talk_id)
rtmp_url = self._search_regex(r'netConnectionUrl: \'(.*?)\'', webpage,
u'rtmp url')
play_path = self._search_regex(r'href=\'(.*?)\' [^>]*id="flowplayer_presenter"',
webpage, u'presenter play path')
rtmp_url = self._search_regex(
r'netConnectionUrl: \'(.*?)\'', webpage, 'rtmp url')
play_path = self._search_regex(
r'href=\'(.*?)\' [^>]*id="flowplayer_presenter"',
webpage, 'presenter play path')
title = clean_html(get_element_by_attribute('class', 'title', webpage))
video_info = {
'id': talk_id,
'title': title,
'url': rtmp_url,
'play_path': play_path,
'ext': 'flv',
}
'id': talk_id,
'title': title,
'url': rtmp_url,
'play_path': play_path,
'ext': 'flv',
}
m_slides = re.search(r'<a class="slides" href=\'(.*?)\'', webpage)
if m_slides is None:
return video_info
else:
return [
video_info,
# The slides video
{
'id': talk_id + '-slides',
'title': title,
'url': rtmp_url,
'play_path': m_slides.group(1),
'ext': 'flv',
},
]
return {
'_type': 'playlist',
'id': talk_id,
'title': title,
'entries': [
video_info,
# The slides video
{
'id': talk_id + '-slides',
'title': title,
'url': rtmp_url,
'play_path': m_slides.group(1),
'ext': 'flv',
},
],
}

View File

@@ -47,7 +47,7 @@ class ToypicsUserIE(InfoExtractor):
'info_dict': {
'id': 'Mikey',
},
'playlist_mincount': 9917,
'playlist_mincount': 19,
}
def _real_extract(self, url):

View File

@@ -13,6 +13,9 @@ class WashingtonPostIE(InfoExtractor):
_VALID_URL = r'^https?://(?:www\.)?washingtonpost\.com/.*?/(?P<id>[^/]+)/(?:$|[?#])'
_TEST = {
'url': 'http://www.washingtonpost.com/sf/national/2014/03/22/sinkhole-of-bureaucracy/',
'info_dict': {
'title': 'Sinkhole of bureaucracy',
},
'playlist': [{
'md5': 'c3f4b4922ffa259243f68e928db2db8c',
'info_dict': {

View File

@@ -1,2 +1,2 @@
__version__ = '2014.09.04.1'
__version__ = '2014.09.04.3'