release 2014.09.04.3

[arte.tv:+7] Add one another one pattern for json vp url
[nba] Modernize
2025-12-13 01:22:44 +01:00 · 2014-09-04 16:20:17 +02:00 · 2014-09-04 20:44:51 +07:00 · 2014-09-04 20:06:14 +07:00 · 2014-09-04 19:48:29 +07:00 · 2014-09-04 19:37:40 +07:00
8 changed files with 123 additions and 52 deletions
--- a/test/test_cache.py
+++ b/test/test_cache.py
@@ -43,16 +43,16 @@ class TestCache(unittest.TestCase):
        })
        c = Cache(ydl)
        obj = {'x': 1, 'y': ['ä', '\\a', True]}
-        self.assertEqual(c.load('test_cache', 'k'), None)
-        c.store('test_cache', 'k', obj)
+        self.assertEqual(c.load('test_cache', 'k.'), None)
+        c.store('test_cache', 'k.', obj)
        self.assertEqual(c.load('test_cache', 'k2'), None)
        self.assertFalse(_is_empty(self.test_dir))
-        self.assertEqual(c.load('test_cache', 'k'), obj)
+        self.assertEqual(c.load('test_cache', 'k.'), obj)
        self.assertEqual(c.load('test_cache', 'y'), None)
-        self.assertEqual(c.load('test_cache2', 'k'), None)
+        self.assertEqual(c.load('test_cache2', 'k.'), None)
        c.remove()
        self.assertFalse(os.path.exists(self.test_dir))
-        self.assertEqual(c.load('test_cache', 'k'), None)
+        self.assertEqual(c.load('test_cache', 'k.'), None)


 if __name__ == '__main__':
--- a/youtube_dl/extractor/arte.py
+++ b/youtube_dl/extractor/arte.py
@@ -78,7 +78,8 @@ class ArteTVPlus7IE(InfoExtractor):

    def _extract_from_webpage(self, webpage, video_id, lang):
        json_url = self._html_search_regex(
-            r'arte_vp_url="(.*?)"', webpage, 'json vp url')
+            [r'arte_vp_url="(.*?)"', r'data-url="([^"]+)"'],
+            webpage, 'json vp url')
        return self._extract_from_json_url(json_url, video_id, lang)

    def _extract_from_json_url(self, json_url, video_id, lang):
--- a/youtube_dl/extractor/cliphunter.py
+++ b/youtube_dl/extractor/cliphunter.py
@@ -1,11 +1,13 @@
 from __future__ import unicode_literals

+import json
 import re

 from .common import InfoExtractor
+from ..utils import int_or_none


-translation_table = {
+_translation_table = {
    'a': 'h', 'd': 'e', 'e': 'v', 'f': 'o', 'g': 'f', 'i': 'd', 'l': 'n',
    'm': 'a', 'n': 'm', 'p': 'u', 'q': 't', 'r': 's', 'v': 'p', 'x': 'r',
    'y': 'l', 'z': 'i',
@@ -13,6 +15,10 @@ translation_table = {
 }


+def _decode(s):
+    return ''.join(_translation_table.get(c, c) for c in s)
+
+
 class CliphunterIE(InfoExtractor):
    IE_NAME = 'cliphunter'

@@ -22,10 +28,14 @@ class CliphunterIE(InfoExtractor):
    '''
    _TEST = {
        'url': 'http://www.cliphunter.com/w/1012420/Fun_Jynx_Maze_solo',
-        'file': '1012420.flv',
-        'md5': '15e7740f30428abf70f4223478dc1225',
+        'md5': 'a2ba71eebf523859fe527a61018f723e',
        'info_dict': {
+            'id': '1012420',
+            'ext': 'mp4',
            'title': 'Fun Jynx Maze solo',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'age_limit': 18,
+            'duration': 1317,
        }
    }

@@ -35,22 +45,55 @@ class CliphunterIE(InfoExtractor):

        webpage = self._download_webpage(url, video_id)

+        video_title = self._search_regex(
+            r'mediaTitle = "([^"]+)"', webpage, 'title')
+
        pl_fiji = self._search_regex(
            r'pl_fiji = \'([^\']+)\'', webpage, 'video data')
        pl_c_qual = self._search_regex(
            r'pl_c_qual = "(.)"', webpage, 'video quality')
-        video_title = self._search_regex(
-            r'mediaTitle = "([^"]+)"', webpage, 'title')
-
-        video_url = ''.join(translation_table.get(c, c) for c in pl_fiji)
-
+        video_url = _decode(pl_fiji)
        formats = [{
            'url': video_url,
-            'format_id': pl_c_qual,
+            'format_id': 'default-%s' % pl_c_qual,
        }]

+        qualities_json = self._search_regex(
+            r'var pl_qualities\s*=\s*(.*?);\n', webpage, 'quality info')
+        qualities_data = json.loads(qualities_json)
+
+        for i, t in enumerate(
+                re.findall(r"pl_fiji_([a-z0-9]+)\s*=\s*'([^']+')", webpage)):
+            quality_id, crypted_url = t
+            video_url = _decode(crypted_url)
+            f = {
+                'format_id': quality_id,
+                'url': video_url,
+                'quality': i,
+            }
+            if quality_id in qualities_data:
+                qd = qualities_data[quality_id]
+                m = re.match(
+                    r'''(?x)<b>(?P<width>[0-9]+)x(?P<height>[0-9]+)<\\/b>
+                        \s*\(\s*(?P<tbr>[0-9]+)\s*kb\\/s''', qd)
+                if m:
+                    f['width'] = int(m.group('width'))
+                    f['height'] = int(m.group('height'))
+                    f['tbr'] = int(m.group('tbr'))
+            formats.append(f)
+        self._sort_formats(formats)
+
+        thumbnail = self._search_regex(
+            r"var\s+mov_thumb\s*=\s*'([^']+)';",
+            webpage, 'thumbnail', fatal=False)
+        duration = int_or_none(self._search_regex(
+            r'pl_dur\s*=\s*([0-9]+)', webpage, 'duration', fatal=False))
+
        return {
            'id': video_id,
            'title': video_title,
            'formats': formats,
+            'duration': duration,
+            'age_limit': self._rta_search(webpage),
+            'thumbnail': thumbnail,
        }
--- a/youtube_dl/extractor/nba.py
+++ b/youtube_dl/extractor/nba.py
@@ -3,18 +3,23 @@ from __future__ import unicode_literals
 import re

 from .common import InfoExtractor
+from ..utils import (
+    remove_end,
+    parse_duration,
+)


 class NBAIE(InfoExtractor):
    _VALID_URL = r'https?://(?:watch\.|www\.)?nba\.com/(?:nba/)?video(?P<id>/[^?]*?)(?:/index\.html)?(?:\?.*)?$'
    _TEST = {
        'url': 'http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html',
-        'md5': u'c0edcfc37607344e2ff8f13c378c88a4',
+        'md5': 'c0edcfc37607344e2ff8f13c378c88a4',
        'info_dict': {
            'id': '0021200253-okc-bkn-recap.nba',
            'ext': 'mp4',
-            'description': 'Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.',
            'title': 'Thunder vs. Nets',
+            'description': 'Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.',
+            'duration': 181,
        },
    }

@@ -27,13 +32,18 @@ class NBAIE(InfoExtractor):
        video_url = 'http://ht-mobile.cdn.turner.com/nba/big' + video_id + '_nba_1280x720.mp4'

        shortened_video_id = video_id.rpartition('/')[2]
-        title = self._og_search_title(webpage, default=shortened_video_id).replace('NBA.com: ', '')
+        title = remove_end(
+            self._og_search_title(webpage, default=shortened_video_id), ' : NBA.com')
+
+        description = self._og_search_description(webpage)
+        duration = parse_duration(
+            self._html_search_meta('duration', webpage, 'duration', fatal=False))

-        description = self._html_search_regex(r'<meta name="description" (?:content|value)="(.*?)" />', webpage, 'description', fatal=False)

        return {
            'id': shortened_video_id,
            'url': video_url,
            'title': title,
            'description': description,
+            'duration': duration,
        }
--- a/youtube_dl/extractor/techtalks.py
+++ b/youtube_dl/extractor/techtalks.py
@@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
 import re

 from .common import InfoExtractor
@@ -11,24 +13,30 @@ class TechTalksIE(InfoExtractor):
    _VALID_URL = r'https?://techtalks\.tv/talks/[^/]*/(?P<id>\d+)/'

    _TEST = {
-        u'url': u'http://techtalks.tv/talks/learning-topic-models-going-beyond-svd/57758/',
-        u'playlist': [
+        'url': 'http://techtalks.tv/talks/learning-topic-models-going-beyond-svd/57758/',
+        'info_dict': {
+            'id': '57758',
+            'title': 'Learning Topic Models --- Going beyond SVD',
+        },
+        'playlist': [
            {
-                u'file': u'57758.flv',
-                u'info_dict': {
-                    u'title': u'Learning Topic Models --- Going beyond SVD',
+                'info_dict': {
+                    'id': '57758',
+                    'ext': 'flv',
+                    'title': 'Learning Topic Models --- Going beyond SVD',
                },
            },
            {
-                u'file': u'57758-slides.flv',
-                u'info_dict': {
-                    u'title': u'Learning Topic Models --- Going beyond SVD',
+                'info_dict': {
+                    'id': '57758-slides',
+                    'ext': 'flv',
+                    'title': 'Learning Topic Models --- Going beyond SVD',
                },
            },
        ],
-        u'params': {
+        'params': {
            # rtmp download
-            u'skip_download': True,
+            'skip_download': True,
        },
    }

@@ -36,30 +44,36 @@ class TechTalksIE(InfoExtractor):
        mobj = re.match(self._VALID_URL, url)
        talk_id = mobj.group('id')
        webpage = self._download_webpage(url, talk_id)
-        rtmp_url = self._search_regex(r'netConnectionUrl: \'(.*?)\'', webpage,
-            u'rtmp url')
-        play_path = self._search_regex(r'href=\'(.*?)\' [^>]*id="flowplayer_presenter"',
-            webpage, u'presenter play path')
+        rtmp_url = self._search_regex(
+            r'netConnectionUrl: \'(.*?)\'', webpage, 'rtmp url')
+        play_path = self._search_regex(
+            r'href=\'(.*?)\' [^>]*id="flowplayer_presenter"',
+            webpage, 'presenter play path')
        title = clean_html(get_element_by_attribute('class', 'title', webpage))
        video_info = {
-                'id': talk_id,
-                'title': title,
-                'url': rtmp_url,
-                'play_path': play_path,
-                'ext': 'flv',
-            }
+            'id': talk_id,
+            'title': title,
+            'url': rtmp_url,
+            'play_path': play_path,
+            'ext': 'flv',
+        }
        m_slides = re.search(r'<a class="slides" href=\'(.*?)\'', webpage)
        if m_slides is None:
            return video_info
        else:
-            return [
-                video_info,
-                # The slides video
-                {
-                    'id': talk_id + '-slides',
-                    'title': title,
-                    'url': rtmp_url,
-                    'play_path': m_slides.group(1),
-                    'ext': 'flv',
-                },
-            ]
+            return {
+                '_type': 'playlist',
+                'id': talk_id,
+                'title': title,
+                'entries': [
+                    video_info,
+                    # The slides video
+                    {
+                        'id': talk_id + '-slides',
+                        'title': title,
+                        'url': rtmp_url,
+                        'play_path': m_slides.group(1),
+                        'ext': 'flv',
+                    },
+                ],
+            }
--- a/youtube_dl/extractor/toypics.py
+++ b/youtube_dl/extractor/toypics.py
@@ -47,7 +47,7 @@ class ToypicsUserIE(InfoExtractor):
        'info_dict': {
            'id': 'Mikey',
        },
-        'playlist_mincount': 9917,
+        'playlist_mincount': 19,
    }

    def _real_extract(self, url):
--- a/youtube_dl/extractor/washingtonpost.py
+++ b/youtube_dl/extractor/washingtonpost.py
@@ -13,6 +13,9 @@ class WashingtonPostIE(InfoExtractor):
    _VALID_URL = r'^https?://(?:www\.)?washingtonpost\.com/.*?/(?P<id>[^/]+)/(?:$|[?#])'
    _TEST = {
        'url': 'http://www.washingtonpost.com/sf/national/2014/03/22/sinkhole-of-bureaucracy/',
+        'info_dict': {
+            'title': 'Sinkhole of bureaucracy',
+        },
        'playlist': [{
            'md5': 'c3f4b4922ffa259243f68e928db2db8c',
            'info_dict': {
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@

-__version__ = '2014.09.04.1'
+__version__ = '2014.09.04.3'
Author	SHA1	Message	Date
Philipp Hagemeister	16e6f396b4	release 2014.09.04.3	2014-09-04 16:20:17 +02:00
Sergey M․	c6ec6b2e8b	[arte.tv:+7] Add one another one pattern for json vp url	2014-09-04 20:44:51 +07:00
Sergey M․	7bbc6428b6	[nba] Modernize	2014-09-04 20:06:14 +07:00
Sergey M․	c1a3c9ddb2	[techtalks] Modernize	2014-09-04 19:48:29 +07:00
Sergey M․	feec0f56f5	[toypics:user] Update test playlist count	2014-09-04 19:37:40 +07:00
Sergey M․	8029857d27	[washingtonpost] Add playlist title to test	2014-09-04 19:34:40 +07:00
Philipp Hagemeister	aa61802c1e	release 2014.09.04.2	2014-09-04 06:40:44 +02:00
Philipp Hagemeister	f54aee0209	[cliphunter] Add support for more formats	2014-09-04 06:40:15 +02:00
Philipp Hagemeister	5df921b0e3	[test_cache] Add a dot in the file name	2014-09-04 04:51:52 +02:00