release 2014.09.06

Merge branch 'Rudloff-unistra_hd'
[unistra] Modernize
2025-12-12 17:12:43 +01:00 · 2014-09-06 15:26:38 +02:00 · 2014-09-06 15:22:01 +07:00 · 2014-09-06 15:21:27 +07:00 · 2014-09-05 22:26:31 +07:00 · 2014-09-05 22:05:36 +07:00
13 changed files with 256 additions and 74 deletions
--- a/test/test_cache.py
+++ b/test/test_cache.py
@@ -43,16 +43,16 @@ class TestCache(unittest.TestCase):
        })
        c = Cache(ydl)
        obj = {'x': 1, 'y': ['ä', '\\a', True]}
-        self.assertEqual(c.load('test_cache', 'k'), None)
-        c.store('test_cache', 'k', obj)
+        self.assertEqual(c.load('test_cache', 'k.'), None)
+        c.store('test_cache', 'k.', obj)
        self.assertEqual(c.load('test_cache', 'k2'), None)
        self.assertFalse(_is_empty(self.test_dir))
-        self.assertEqual(c.load('test_cache', 'k'), obj)
+        self.assertEqual(c.load('test_cache', 'k.'), obj)
        self.assertEqual(c.load('test_cache', 'y'), None)
-        self.assertEqual(c.load('test_cache2', 'k'), None)
+        self.assertEqual(c.load('test_cache2', 'k.'), None)
        c.remove()
        self.assertFalse(os.path.exists(self.test_dir))
-        self.assertEqual(c.load('test_cache', 'k'), None)
+        self.assertEqual(c.load('test_cache', 'k.'), None)


 if __name__ == '__main__':
--- a/youtube_dl/init.py
+++ b/youtube_dl/init.py
@@ -74,6 +74,7 @@ __authors__  = (
    'Keith Beckman',
    'Ole Ernst',
    'Aaron McDaniel (mcd1992)',
+    'Magnus Kolstad',
 )

 __license__ = 'Public Domain'
--- a/youtube_dl/downloader/http.py
+++ b/youtube_dl/downloader/http.py
@@ -193,7 +193,8 @@ class HttpFD(FileDownloader):
            self.to_stderr(u"\n")
            self.report_error(u'Did not get any data blocks')
            return False
-        stream.close()
+        if tmpfilename != u'-':
+            stream.close()
        self.report_finish(data_len_str, (time.time() - start))
        if data_len is not None and byte_counter != data_len:
            raise ContentTooShortError(byte_counter, int(data_len))
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@@ -67,6 +67,7 @@ from .dailymotion import (
    DailymotionUserIE,
 )
 from .daum import DaumIE
+from .dbtv import DBTVIE
 from .dfb import DFBIE
 from .dotsub import DotsubIE
 from .dreisat import DreiSatIE
--- a/youtube_dl/extractor/arte.py
+++ b/youtube_dl/extractor/arte.py
@@ -78,7 +78,8 @@ class ArteTVPlus7IE(InfoExtractor):

    def _extract_from_webpage(self, webpage, video_id, lang):
        json_url = self._html_search_regex(
-            r'arte_vp_url="(.*?)"', webpage, 'json vp url')
+            [r'arte_vp_url=["\'](.*?)["\']', r'data-url=["\']([^"]+)["\']'],
+            webpage, 'json vp url')
        return self._extract_from_json_url(json_url, video_id, lang)

    def _extract_from_json_url(self, json_url, video_id, lang):
--- a/youtube_dl/extractor/cliphunter.py
+++ b/youtube_dl/extractor/cliphunter.py
@@ -1,11 +1,13 @@
 from __future__ import unicode_literals

+import json
 import re

 from .common import InfoExtractor
+from ..utils import int_or_none


-translation_table = {
+_translation_table = {
    'a': 'h', 'd': 'e', 'e': 'v', 'f': 'o', 'g': 'f', 'i': 'd', 'l': 'n',
    'm': 'a', 'n': 'm', 'p': 'u', 'q': 't', 'r': 's', 'v': 'p', 'x': 'r',
    'y': 'l', 'z': 'i',
@@ -13,6 +15,10 @@ translation_table = {
 }


+def _decode(s):
+    return ''.join(_translation_table.get(c, c) for c in s)
+
+
 class CliphunterIE(InfoExtractor):
    IE_NAME = 'cliphunter'

@@ -22,10 +28,14 @@ class CliphunterIE(InfoExtractor):
    '''
    _TEST = {
        'url': 'http://www.cliphunter.com/w/1012420/Fun_Jynx_Maze_solo',
-        'file': '1012420.flv',
-        'md5': '15e7740f30428abf70f4223478dc1225',
+        'md5': 'a2ba71eebf523859fe527a61018f723e',
        'info_dict': {
+            'id': '1012420',
+            'ext': 'mp4',
            'title': 'Fun Jynx Maze solo',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'age_limit': 18,
+            'duration': 1317,
        }
    }

@@ -35,22 +45,55 @@ class CliphunterIE(InfoExtractor):

        webpage = self._download_webpage(url, video_id)

+        video_title = self._search_regex(
+            r'mediaTitle = "([^"]+)"', webpage, 'title')
+
        pl_fiji = self._search_regex(
            r'pl_fiji = \'([^\']+)\'', webpage, 'video data')
        pl_c_qual = self._search_regex(
            r'pl_c_qual = "(.)"', webpage, 'video quality')
-        video_title = self._search_regex(
-            r'mediaTitle = "([^"]+)"', webpage, 'title')
-
-        video_url = ''.join(translation_table.get(c, c) for c in pl_fiji)
-
+        video_url = _decode(pl_fiji)
        formats = [{
            'url': video_url,
-            'format_id': pl_c_qual,
+            'format_id': 'default-%s' % pl_c_qual,
        }]

+        qualities_json = self._search_regex(
+            r'var pl_qualities\s*=\s*(.*?);\n', webpage, 'quality info')
+        qualities_data = json.loads(qualities_json)
+
+        for i, t in enumerate(
+                re.findall(r"pl_fiji_([a-z0-9]+)\s*=\s*'([^']+')", webpage)):
+            quality_id, crypted_url = t
+            video_url = _decode(crypted_url)
+            f = {
+                'format_id': quality_id,
+                'url': video_url,
+                'quality': i,
+            }
+            if quality_id in qualities_data:
+                qd = qualities_data[quality_id]
+                m = re.match(
+                    r'''(?x)<b>(?P<width>[0-9]+)x(?P<height>[0-9]+)<\\/b>
+                        \s*\(\s*(?P<tbr>[0-9]+)\s*kb\\/s''', qd)
+                if m:
+                    f['width'] = int(m.group('width'))
+                    f['height'] = int(m.group('height'))
+                    f['tbr'] = int(m.group('tbr'))
+            formats.append(f)
+        self._sort_formats(formats)
+
+        thumbnail = self._search_regex(
+            r"var\s+mov_thumb\s*=\s*'([^']+)';",
+            webpage, 'thumbnail', fatal=False)
+        duration = int_or_none(self._search_regex(
+            r'pl_dur\s*=\s*([0-9]+)', webpage, 'duration', fatal=False))
+
        return {
            'id': video_id,
            'title': video_title,
            'formats': formats,
+            'duration': duration,
+            'age_limit': self._rta_search(webpage),
+            'thumbnail': thumbnail,
        }
--- a/youtube_dl/extractor/dbtv.py
+++ b/youtube_dl/extractor/dbtv.py
@@ -0,0 +1,74 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    float_or_none,
+    int_or_none,
+    clean_html,
+)
+
+
+class DBTVIE(InfoExtractor):
+    _VALID_URL = r'http://dbtv\.no/(?P<id>[0-9]+)#(?P<display_id>.+)'
+    _TEST = {
+        'url': 'http://dbtv.no/3649835190001#Skulle_teste_ut_fornøyelsespark,_men_kollegaen_var_bare_opptatt_av_bikinikroppen',
+        'md5': 'b89953ed25dacb6edb3ef6c6f430f8bc',
+        'info_dict': {
+            'id': '33100',
+            'display_id': 'Skulle_teste_ut_fornøyelsespark,_men_kollegaen_var_bare_opptatt_av_bikinikroppen',
+            'ext': 'mp4',
+            'title': 'Skulle teste ut fornøyelsespark, men kollegaen var bare opptatt av bikinikroppen',
+            'description': 'md5:1504a54606c4dde3e4e61fc97aa857e0',
+            'thumbnail': 're:https?://.*\.jpg$',
+            'timestamp': 1404039863.438,
+            'upload_date': '20140629',
+            'duration': 69.544,
+            'view_count': int,
+            'categories': list,
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        display_id = mobj.group('display_id')
+
+        data = self._download_json(
+            'http://api.dbtv.no/discovery/%s' % video_id, display_id)
+
+        video = data['playlist'][0]
+
+        formats = [{
+            'url': f['URL'],
+            'vcodec': f.get('container'),
+            'width': int_or_none(f.get('width')),
+            'height': int_or_none(f.get('height')),
+            'vbr': float_or_none(f.get('rate'), 1000),
+            'filesize': int_or_none(f.get('size')),
+        } for f in video['renditions'] if 'URL' in f]
+
+        if not formats:
+            for url_key, format_id in [('URL', 'mp4'), ('HLSURL', 'hls')]:
+                if url_key in video:
+                    formats.append({
+                        'url': video[url_key],
+                        'format_id': format_id,
+                    })
+
+        self._sort_formats(formats)
+
+        return {
+            'id': video['id'],
+            'display_id': display_id,
+            'title': video['title'],
+            'description': clean_html(video['desc']),
+            'thumbnail': video.get('splash') or video.get('thumb'),
+            'timestamp': float_or_none(video.get('publishedAt'), 1000),
+            'duration': float_or_none(video.get('length'), 1000),
+            'view_count': int_or_none(video.get('views')),
+            'categories': video.get('tags'),
+            'formats': formats,
+        }
--- a/youtube_dl/extractor/nba.py
+++ b/youtube_dl/extractor/nba.py
@@ -3,18 +3,23 @@ from __future__ import unicode_literals
 import re

 from .common import InfoExtractor
+from ..utils import (
+    remove_end,
+    parse_duration,
+)


 class NBAIE(InfoExtractor):
    _VALID_URL = r'https?://(?:watch\.|www\.)?nba\.com/(?:nba/)?video(?P<id>/[^?]*?)(?:/index\.html)?(?:\?.*)?$'
    _TEST = {
        'url': 'http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html',
-        'md5': u'c0edcfc37607344e2ff8f13c378c88a4',
+        'md5': 'c0edcfc37607344e2ff8f13c378c88a4',
        'info_dict': {
            'id': '0021200253-okc-bkn-recap.nba',
            'ext': 'mp4',
-            'description': 'Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.',
            'title': 'Thunder vs. Nets',
+            'description': 'Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.',
+            'duration': 181,
        },
    }

@@ -27,13 +32,18 @@ class NBAIE(InfoExtractor):
        video_url = 'http://ht-mobile.cdn.turner.com/nba/big' + video_id + '_nba_1280x720.mp4'

        shortened_video_id = video_id.rpartition('/')[2]
-        title = self._og_search_title(webpage, default=shortened_video_id).replace('NBA.com: ', '')
+        title = remove_end(
+            self._og_search_title(webpage, default=shortened_video_id), ' : NBA.com')
+
+        description = self._og_search_description(webpage)
+        duration = parse_duration(
+            self._html_search_meta('duration', webpage, 'duration', fatal=False))

-        description = self._html_search_regex(r'<meta name="description" (?:content|value)="(.*?)" />', webpage, 'description', fatal=False)

        return {
            'id': shortened_video_id,
            'url': video_url,
            'title': title,
            'description': description,
+            'duration': duration,
        }
--- a/youtube_dl/extractor/techtalks.py
+++ b/youtube_dl/extractor/techtalks.py
@@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
 import re

 from .common import InfoExtractor
@@ -11,24 +13,30 @@ class TechTalksIE(InfoExtractor):
    _VALID_URL = r'https?://techtalks\.tv/talks/[^/]*/(?P<id>\d+)/'

    _TEST = {
-        u'url': u'http://techtalks.tv/talks/learning-topic-models-going-beyond-svd/57758/',
-        u'playlist': [
+        'url': 'http://techtalks.tv/talks/learning-topic-models-going-beyond-svd/57758/',
+        'info_dict': {
+            'id': '57758',
+            'title': 'Learning Topic Models --- Going beyond SVD',
+        },
+        'playlist': [
            {
-                u'file': u'57758.flv',
-                u'info_dict': {
-                    u'title': u'Learning Topic Models --- Going beyond SVD',
+                'info_dict': {
+                    'id': '57758',
+                    'ext': 'flv',
+                    'title': 'Learning Topic Models --- Going beyond SVD',
                },
            },
            {
-                u'file': u'57758-slides.flv',
-                u'info_dict': {
-                    u'title': u'Learning Topic Models --- Going beyond SVD',
+                'info_dict': {
+                    'id': '57758-slides',
+                    'ext': 'flv',
+                    'title': 'Learning Topic Models --- Going beyond SVD',
                },
            },
        ],
-        u'params': {
+        'params': {
            # rtmp download
-            u'skip_download': True,
+            'skip_download': True,
        },
    }

@@ -36,30 +44,36 @@ class TechTalksIE(InfoExtractor):
        mobj = re.match(self._VALID_URL, url)
        talk_id = mobj.group('id')
        webpage = self._download_webpage(url, talk_id)
-        rtmp_url = self._search_regex(r'netConnectionUrl: \'(.*?)\'', webpage,
-            u'rtmp url')
-        play_path = self._search_regex(r'href=\'(.*?)\' [^>]*id="flowplayer_presenter"',
-            webpage, u'presenter play path')
+        rtmp_url = self._search_regex(
+            r'netConnectionUrl: \'(.*?)\'', webpage, 'rtmp url')
+        play_path = self._search_regex(
+            r'href=\'(.*?)\' [^>]*id="flowplayer_presenter"',
+            webpage, 'presenter play path')
        title = clean_html(get_element_by_attribute('class', 'title', webpage))
        video_info = {
-                'id': talk_id,
-                'title': title,
-                'url': rtmp_url,
-                'play_path': play_path,
-                'ext': 'flv',
-            }
+            'id': talk_id,
+            'title': title,
+            'url': rtmp_url,
+            'play_path': play_path,
+            'ext': 'flv',
+        }
        m_slides = re.search(r'<a class="slides" href=\'(.*?)\'', webpage)
        if m_slides is None:
            return video_info
        else:
-            return [
-                video_info,
-                # The slides video
-                {
-                    'id': talk_id + '-slides',
-                    'title': title,
-                    'url': rtmp_url,
-                    'play_path': m_slides.group(1),
-                    'ext': 'flv',
-                },
-            ]
+            return {
+                '_type': 'playlist',
+                'id': talk_id,
+                'title': title,
+                'entries': [
+                    video_info,
+                    # The slides video
+                    {
+                        'id': talk_id + '-slides',
+                        'title': title,
+                        'url': rtmp_url,
+                        'play_path': m_slides.group(1),
+                        'ext': 'flv',
+                    },
+                ],
+            }
--- a/youtube_dl/extractor/toypics.py
+++ b/youtube_dl/extractor/toypics.py
@@ -47,7 +47,7 @@ class ToypicsUserIE(InfoExtractor):
        'info_dict': {
            'id': 'Mikey',
        },
-        'playlist_mincount': 9917,
+        'playlist_mincount': 19,
    }

    def _real_extract(self, url):
--- a/youtube_dl/extractor/unistra.py
+++ b/youtube_dl/extractor/unistra.py
@@ -1,32 +1,66 @@
+from __future__ import unicode_literals
+
 import re

 from .common import InfoExtractor
+from ..utils import qualities
+

 class UnistraIE(InfoExtractor):
-    _VALID_URL = r'http://utv\.unistra\.fr/(?:index|video)\.php\?id_video\=(\d+)'
+    _VALID_URL = r'http://utv\.unistra\.fr/(?:index|video)\.php\?id_video\=(?P<id>\d+)'

-    _TEST = {
-        u'url': u'http://utv.unistra.fr/video.php?id_video=154',
-        u'file': u'154.mp4',
-        u'md5': u'736f605cfdc96724d55bb543ab3ced24',
-        u'info_dict': {
-            u'title': u'M!ss Yella',
-            u'description': u'md5:104892c71bd48e55d70b902736b81bbf',
+    _TESTS = [
+        {
+            'url': 'http://utv.unistra.fr/video.php?id_video=154',
+            'md5': '736f605cfdc96724d55bb543ab3ced24',
+            'info_dict': {
+                'id': '154',
+                'ext': 'mp4',
+                'title': 'M!ss Yella',
+                'description': 'md5:104892c71bd48e55d70b902736b81bbf',
+            },
        },
-    }
+        {
+            'url': 'http://utv.unistra.fr/index.php?id_video=437',
+            'md5': '1ddddd6cccaae76f622ce29b8779636d',
+            'info_dict': {
+                'id': '437',
+                'ext': 'mp4',
+                'title': 'Prix Louise Weiss 2014',
+                'description': 'md5:cc3a8735f079f4fb6b0b570fc10c135a',
+            },
+        }
+    ]

    def _real_extract(self, url):
-        id = re.match(self._VALID_URL, url).group(1)
-        webpage = self._download_webpage(url, id)
-        file = re.search(r'file: "(.*?)",', webpage).group(1)
-        title = self._html_search_regex(r'<title>UTV - (.*?)</', webpage, u'title')
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')

-        video_url = 'http://vod-flash.u-strasbg.fr:8080/' + file
+        webpage = self._download_webpage(url, video_id)

-        return {'id': id,
-                'title': title,
-                'ext': 'mp4',
-                'url': video_url,
-                'description': self._html_search_regex(r'<meta name="Description" content="(.*?)"', webpage, u'description', flags=re.DOTALL),
-                'thumbnail': self._search_regex(r'image: "(.*?)"', webpage, u'thumbnail'),
-                }
+        files = set(re.findall(r'file\s*:\s*"([^"]+)"', webpage))
+
+        quality = qualities(['SD', 'HD'])
+        formats = []
+        for file_path in files:
+            format_id = 'HD' if file_path.endswith('-HD.mp4') else 'SD'
+            formats.append({
+                'url': 'http://vod-flash.u-strasbg.fr:8080%s' % file_path,
+                'format_id': format_id,
+                'quality': quality(format_id)
+            })
+
+        title = self._html_search_regex(
+            r'<title>UTV - (.*?)</', webpage, 'title')
+        description = self._html_search_regex(
+            r'<meta name="Description" content="(.*?)"', webpage, 'description', flags=re.DOTALL)
+        thumbnail = self._search_regex(
+            r'image: "(.*?)"', webpage, 'thumbnail')
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'formats': formats
+        }
--- a/youtube_dl/extractor/washingtonpost.py
+++ b/youtube_dl/extractor/washingtonpost.py
@@ -13,6 +13,9 @@ class WashingtonPostIE(InfoExtractor):
    _VALID_URL = r'^https?://(?:www\.)?washingtonpost\.com/.*?/(?P<id>[^/]+)/(?:$|[?#])'
    _TEST = {
        'url': 'http://www.washingtonpost.com/sf/national/2014/03/22/sinkhole-of-bureaucracy/',
+        'info_dict': {
+            'title': 'Sinkhole of bureaucracy',
+        },
        'playlist': [{
            'md5': 'c3f4b4922ffa259243f68e928db2db8c',
            'info_dict': {
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@

-__version__ = '2014.09.04.1'
+__version__ = '2014.09.06'
Author	SHA1	Message	Date
Philipp Hagemeister	e154762c74	release 2014.09.06	2014-09-06 15:26:38 +02:00
Sergey M․	ba92ab3d05	Merge branch 'Rudloff-unistra_hd'	2014-09-06 15:22:01 +07:00
Sergey M․	a2f0cdc074	[unistra] Modernize	2014-09-06 15:21:27 +07:00
Sergey M․	70a1ecd2c1	Merge branch 'unistra_hd' of https://github.com/Rudloff/youtube-dl into Rudloff-unistra_hd	2014-09-05 22:26:31 +07:00
Sergey M․	88a23aef5a	[http] Avoid closing stdout (Fixes #3686 )	2014-09-05 22:05:36 +07:00
Sergey M․	140d8d77b3	Credit @mrkolby for dbtv.no (#3685 )	2014-09-05 20:00:12 +07:00
Sergey M․	665cd96929	Merge branch 'mrkolby-dbtv'	2014-09-05 19:54:10 +07:00
Sergey M․	4d067a58ca	[dbtv] Simplify, modernize, extract all formats	2014-09-05 19:53:53 +07:00
Sergey M․	1c1cff6a52	Merge branch 'dbtv' of https://github.com/mrkolby/youtube-dl into mrkolby-dbtv Conflicts: youtube_dl/extractor/__init__.py	2014-09-05 19:01:11 +07:00
Magnus Kolstad	f063a04f07	[dbtv] Add new extractor	2014-09-05 11:24:30 +02:00
Pierre Rudloff	af8812bb9b	Add HD detection to Unistra	2014-09-04 22:22:19 +02:00
Sergey M․	f2d9e3a370	[arte.tv:+7] Allow single quotes for json vp url regexes (Closes #3676 )	2014-09-05 01:17:16 +07:00
Philipp Hagemeister	16e6f396b4	release 2014.09.04.3	2014-09-04 16:20:17 +02:00
Sergey M․	c6ec6b2e8b	[arte.tv:+7] Add one another one pattern for json vp url	2014-09-04 20:44:51 +07:00
Sergey M․	7bbc6428b6	[nba] Modernize	2014-09-04 20:06:14 +07:00
Sergey M․	c1a3c9ddb2	[techtalks] Modernize	2014-09-04 19:48:29 +07:00
Sergey M․	feec0f56f5	[toypics:user] Update test playlist count	2014-09-04 19:37:40 +07:00
Sergey M․	8029857d27	[washingtonpost] Add playlist title to test	2014-09-04 19:34:40 +07:00
Philipp Hagemeister	aa61802c1e	release 2014.09.04.2	2014-09-04 06:40:44 +02:00
Philipp Hagemeister	f54aee0209	[cliphunter] Add support for more formats	2014-09-04 06:40:15 +02:00
Philipp Hagemeister	5df921b0e3	[test_cache] Add a dot in the file name	2014-09-04 04:51:52 +02:00