release 2013.08.29

[youtube] update algo for length 84
Only appears sometimes, nearly identical to length 86.
2025-12-12 17:12:43 +01:00 · 2013-08-29 23:29:34 +02:00 · 2013-08-29 22:44:29 +02:00 · 2013-08-29 22:33:58 +02:00 · 2013-08-29 21:51:09 +02:00 · 2013-08-29 21:39:36 +02:00
11 changed files with 95 additions and 26 deletions
--- a/devscripts/youtube_genalgo.py
+++ b/devscripts/youtube_genalgo.py
@@ -20,15 +20,15 @@ tests = [
    # 87
    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<",
     "uioplkjhgfdsazxcvbnm1t34567890QWE2TYUIOPLKJHGFDSAZXCVeNM!@#$^&*()_-+={[]}|:;?/>.<"),
-    # 86 - vflh9ybst 2013/08/23
+    # 86 - vflg0g8PQ 2013/08/29
    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<",
-     "yuioplkjhgfdsazxcvbnm1234567890QWERrYUIOPLKqHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<"),
+     ">/?;}|[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWq0987654321mnbvcxzasdfghjklpoiuytr"),
    # 85
    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<",
     ".>/?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ0q876543r1mnbvcx9asdfghjklpoiuyt2"),
-    # 84 - vflh9ybst 2013/08/23 (sporadic)
+    # 84 - vflg0g8PQ 2013/08/29 (sporadic)
    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<",
-     "yuioplkjhgfdsazxcvbnm1234567890QWERrYUIOPLKqHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<"),
+     ">?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWq0987654321mnbvcxzasdfghjklpoiuytr"),
    # 83
    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<",
     ".>/?;}[{=+_)(*&^%<#!MNBVCXZASPFGHJKLwOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytreq"),
--- a/test/test_download.py
+++ b/test/test_download.py
@@ -127,12 +127,11 @@ def generator(test_case):
                    info_dict = json.load(infof)
                for (info_field, expected) in tc.get('info_dict', {}).items():
                    if isinstance(expected, compat_str) and expected.startswith('md5:'):
-                        self.assertEqual(expected, 'md5:' + md5(info_dict.get(info_field)))
+                        got = 'md5:' + md5(info_dict.get(info_field))
                    else:
                        got = info_dict.get(info_field)
-                        self.assertEqual(
-                            expected, got,
-                            u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))
+                    self.assertEqual(expected, got,
+                        u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))

                # If checkable fields are missing from the test case, print the info_dict
                test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value))
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@@ -59,6 +59,7 @@ from .myvideo import MyVideoIE
 from .nba import NBAIE
 from .nbc import NBCNewsIE
 from .ooyala import OoyalaIE
+from .orf import ORFIE
 from .pbs import PBSIE
 from .photobucket import PhotobucketIE
 from .pornotube import PornotubeIE
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -150,7 +150,7 @@ class InfoExtractor(object):
        if m:
            encoding = m.group(1)
        else:
-            m = re.search(br'<meta[^>]+charset="?([^"]+)[ /">]',
+            m = re.search(br'<meta[^>]+charset=[\'"]?([^\'")]+)[ /\'">]',
                          webpage_bytes[:1024])
            if m:
                encoding = m.group(1).decode('ascii')
--- a/youtube_dl/extractor/ign.py
+++ b/youtube_dl/extractor/ign.py
@@ -13,7 +13,7 @@ class IGNIE(InfoExtractor):
    Some videos of it.ign.com are also supported
    """

-    _VALID_URL = r'https?://.+?\.ign\.com/(?:videos|show_videos)(/.+)?/(?P<name_or_id>.+)'
+    _VALID_URL = r'https?://.+?\.ign\.com/(?P<type>videos|show_videos|articles)(/.+)?/(?P<name_or_id>.+)'
    IE_NAME = u'ign.com'

    _CONFIG_URL_TEMPLATE = 'http://www.ign.com/videos/configs/id/%s.config'
@@ -41,7 +41,11 @@ class IGNIE(InfoExtractor):
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        name_or_id = mobj.group('name_or_id')
+        page_type = mobj.group('type')
        webpage = self._download_webpage(url, name_or_id)
+        if page_type == 'articles':
+            video_url = self._search_regex(r'var videoUrl = "(.+?)"', webpage, u'video url')
+            return self.url_result(video_url, ie='IGN')
        video_id = self._find_video_id(webpage)
        result = self._get_video_info(video_id)
        description = self._html_search_regex(self._DESCRIPTION_RE,
@@ -68,7 +72,7 @@ class IGNIE(InfoExtractor):
 class OneUPIE(IGNIE):
    """Extractor for 1up.com, it uses the ign videos system."""

-    _VALID_URL = r'https?://gamevideos.1up.com/video/id/(?P<name_or_id>.+)'
+    _VALID_URL = r'https?://gamevideos.1up.com/(?P<type>video)/id/(?P<name_or_id>.+)'
    IE_NAME = '1up.com'

    _DESCRIPTION_RE = r'<div id="vid_summary">(.+?)</div>'
--- a/youtube_dl/extractor/mit.py
+++ b/youtube_dl/extractor/mit.py
@@ -25,23 +25,21 @@ class TechTVMITIE(InfoExtractor):
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
-        webpage = self._download_webpage(
+        raw_page = self._download_webpage(
            'http://techtv.mit.edu/videos/%s' % video_id, video_id)
-        embed_page = self._download_webpage(
-            'http://techtv.mit.edu/embeds/%s/' % video_id, video_id,
-            note=u'Downloading embed page')
+        clean_page = re.compile(u'<!--.*?-->', re.S).sub(u'', raw_page)

        base_url = self._search_regex(r'ipadUrl: \'(.+?cloudfront.net/)',
-            embed_page, u'base url')
-        formats_json = self._search_regex(r'bitrates: (\[.+?\])', embed_page,
+            raw_page, u'base url')
+        formats_json = self._search_regex(r'bitrates: (\[.+?\])', raw_page,
            u'video formats')
        formats = json.loads(formats_json)
        formats = sorted(formats, key=lambda f: f['bitrate'])

-        title = get_element_by_id('edit-title', webpage)
-        description = clean_html(get_element_by_id('edit-description', webpage))
+        title = get_element_by_id('edit-title', clean_page)
+        description = clean_html(get_element_by_id('edit-description', clean_page))
        thumbnail = self._search_regex(r'playlist:.*?url: \'(.+?)\'',
-            embed_page, u'thumbnail', flags=re.DOTALL)
+            raw_page, u'thumbnail', flags=re.DOTALL)

        return {'id': video_id,
                'title': title,
--- a/youtube_dl/extractor/orf.py
+++ b/youtube_dl/extractor/orf.py
@@ -0,0 +1,67 @@
+# coding: utf-8
+
+import re
+import xml.etree.ElementTree
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_urlparse,
+    ExtractorError,
+    find_xpath_attr,
+)
+
+class ORFIE(InfoExtractor):
+    _VALID_URL = r'https?://tvthek.orf.at/(programs/.+?/episodes|topics/.+?)/(?P<id>\d+)'
+
+    _TEST = {
+        u'url': u'http://tvthek.orf.at/programs/1171769-Wetter-ZIB/episodes/6557323-Wetter',
+        u'file': u'6566957.flv',
+        u'info_dict': {
+            u'title': u'Wetter',
+            u'description': u'Christa Kummer, Marcus Wadsak und Kollegen  präsentieren abwechselnd ihre täglichen Wetterprognosen für Österreich.\r \r Mehr Wetter unter wetter.ORF.at',
+        },
+        u'params': {
+            # It uses rtmp
+            u'skip_download': True,
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        playlist_id = mobj.group('id')
+        webpage = self._download_webpage(url, playlist_id)
+
+        flash_xml = self._search_regex('ORF.flashXML = \'(.+?)\'', webpage, u'flash xml')
+        flash_xml = compat_urlparse.parse_qs('xml='+flash_xml)['xml'][0]
+        flash_config = xml.etree.ElementTree.fromstring(flash_xml.encode('utf-8'))
+        playlist_json = self._search_regex(r'playlist\': \'(\[.*?\])\'', webpage, u'playlist').replace(r'\"','"')
+        playlist = json.loads(playlist_json)
+
+        videos = []
+        ns = '{http://tempuri.org/XMLSchema.xsd}'
+        xpath = '%(ns)sPlaylist/%(ns)sItems/%(ns)sItem' % {'ns': ns}
+        webpage_description = self._og_search_description(webpage)
+        for (i, (item, info)) in enumerate(zip(flash_config.findall(xpath), playlist), 1):
+            # Get best quality url
+            rtmp_url = None
+            for q in ['Q6A', 'Q4A', 'Q1A']:
+                video_url = find_xpath_attr(item, '%sVideoUrl' % ns, 'quality', q)
+                if video_url is not None:
+                    rtmp_url = video_url.text
+                    break
+            if rtmp_url is None:
+                raise ExtractorError(u'Couldn\'t get video url: %s' % info['id'])
+            description = self._html_search_regex(
+                r'id="playlist_entry_%s".*?<p>(.*?)</p>' % i, webpage,
+                u'description', default=webpage_description, flags=re.DOTALL)
+            videos.append({
+                '_type': 'video',
+                'id': info['id'],
+                'title': info['title'],
+                'url': rtmp_url,
+                'ext': 'flv',
+                'description': description,
+                })
+
+        return videos
--- a/youtube_dl/extractor/unistra.py
+++ b/youtube_dl/extractor/unistra.py
@@ -11,7 +11,7 @@ class UnistraIE(InfoExtractor):
        u'md5': u'736f605cfdc96724d55bb543ab3ced24',
        u'info_dict': {
            u'title': u'M!ss Yella',
-            u'description': u'md5:75e8439a3e2981cd5d4b6db232e8fdfc',
+            u'description': u'md5:104892c71bd48e55d70b902736b81bbf',
        },
    }

--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -335,7 +335,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
            u"info_dict": {
                u"upload_date": u"20120506",
                u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",
-                u"description": u"md5:b085c9804f5ab69f4adea963a2dceb3c",
+                u"description": u"md5:3e2666e0a55044490499ea45fe9037b7",
                u"uploader": u"Icona Pop",
                u"uploader_id": u"IconaPop"
            }
@@ -423,11 +423,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
        elif len(s) == 87:
            return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:]
        elif len(s) == 86:
-            return s[5:40] + s[3] + s[41:48] + s[0] + s[49:86]
+            return s[83:36:-1] + s[0] + s[35:2:-1]
        elif len(s) == 85:
            return s[83:34:-1] + s[0] + s[33:27:-1] + s[3] + s[26:19:-1] + s[34] + s[18:3:-1] + s[27]
        elif len(s) == 84:
-            return s[5:40] + s[3] + s[41:48] + s[0] + s[49:84]
+            return s[81:36:-1] + s[0] + s[35:2:-1]
        elif len(s) == 83:
            return s[81:64:-1] + s[82] + s[63:52:-1] + s[45] + s[51:45:-1] + s[1] + s[44:1:-1] + s[0]
        elif len(s) == 82:
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -213,7 +213,7 @@ if sys.version_info >= (2,7):
    def find_xpath_attr(node, xpath, key, val):
        """ Find the xpath xpath[@key=val] """
        assert re.match(r'^[a-zA-Z]+$', key)
-        assert re.match(r'^[a-zA-Z@\s]*$', val)
+        assert re.match(r'^[a-zA-Z0-9@\s]*$', val)
        expr = xpath + u"[@%s='%s']" % (key, val)
        return node.find(expr)
 else:
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@

-__version__ = '2013.08.28.1'
+__version__ = '2013.08.29'
Author	SHA1	Message	Date
Philipp Hagemeister	3243d0f7b6	release 2013.08.29	2013-08-29 23:29:34 +02:00
Jaime Marquínez Ferrándiz	23b00bc0e4	[youtube] update algo for length 84 Only appears sometimes, nearly identical to length 86.	2013-08-29 22:44:29 +02:00
Jaime Marquínez Ferrándiz	52e1eea18b	[youtube] update algo for length 86 (fixes #1349 )	2013-08-29 22:33:58 +02:00
Jaime Marquínez Ferrándiz	ee80d66727	[ign] update 1up extractor to work with the updated IGNIE	2013-08-29 21:51:09 +02:00
Jaime Marquínez Ferrándiz	f1fb2d12b3	[ign] extract videos from articles pages	2013-08-29 21:39:36 +02:00
Jaime Marquínez Ferrándiz	deb2c73212	Merge pull request #1347 from whydoubt/fix_orf_at Fix orf.at extractor by adding file coding mark	2013-08-29 11:05:38 -07:00
Jeff Smith	8928491074	Fix orf.at extractor by adding file coding mark	2013-08-29 12:51:38 -05:00
Jaime Marquínez Ferrándiz	545434670b	Add an extractor for orf.at (closes #1346 ) Make find_xpath_attr also accept numbers in the value	2013-08-29 19:16:07 +02:00
Jaime Marquínez Ferrándiz	54fda45bac	Merge pull request #1342 from whydoubt/fix_mit_26 Fix MIT extractor for Python 2.6	2013-08-29 13:42:08 +02:00
Jaime Marquínez Ferrándiz	c7bf7366bc	Update descriptions checksum for some test for Unistra and Youtube	2013-08-29 13:41:59 +02:00
Jaime Marquínez Ferrándiz	b7052e5087	Also print the field that fails if it is a md5 checksum	2013-08-29 12:15:45 +02:00
Jaime Marquínez Ferrándiz	0d75ae2ce3	Fix detection of the webpage charset if it's declared using ' instead of " Like in "<meta charset='utf-8'/>"	2013-08-29 11:35:15 +02:00
Jeff Smith	b5ba7b9dcf	Fix MIT extractor for Python 2.6 The HTML for the MIT page does not parse cleanly for Python 2.6 due to script tags within an actual script element. The offending piece is inside a comment block, so removing all such comment blocks fixes the parsing.	2013-08-28 14:24:42 -05:00