release 2014.08.23

[sbs] Add new extractor (Fixes #3566 )
[rtlnl] Remove unused code
2025-12-12 00:52:43 +01:00 · 2014-08-23 15:24:31 +02:00 · 2014-08-23 15:20:56 +02:00 · 2014-08-23 15:05:21 +02:00 · 2014-08-23 15:04:11 +02:00 · 2014-08-23 14:39:47 +02:00
16 changed files with 389 additions and 74 deletions
--- a/README.md
+++ b/README.md
@@ -429,6 +429,7 @@ If you want to add support for a new site, you can follow this quick list (assum
                'id': '42',
                'ext': 'mp4',
                'title': 'Video title goes here',
+                'thumbnail': 're:^https?://.*\.jpg$',
                # TODO more properties, either as:
                # * A value
                # * MD5 checksum; start the string with md5:
--- a/test/test_YoutubeDL.py
+++ b/test/test_YoutubeDL.py
@@ -221,7 +221,7 @@ class TestFormatSelection(unittest.TestCase):
            '138', '137', '248', '136', '247', '135', '246',
            '245', '244', '134', '243', '133', '242', '160',
            # Dash audio
-            '141', '172', '140', '139', '171',
+            '141', '172', '140', '171', '139',
        ]

        for f1id, f2id in zip(order, order[1:]):
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -480,7 +480,10 @@ class YoutubeDL(object):
                return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
        age_limit = self.params.get('age_limit')
        if age_limit is not None:
-            if age_limit < info_dict.get('age_limit', 0):
+            actual_age_limit = info_dict.get('age_limit')
+            if actual_age_limit is None:
+                actual_age_limit = 0
+            if age_limit < actual_age_limit:
                return 'Skipping "' + title + '" because it is age restricted'
        if self.in_download_archive(info_dict):
            return '%s has already been recorded in archive' % video_title
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@@ -194,6 +194,7 @@ from .mooshare import MooshareIE
 from .morningstar import MorningstarIE
 from .motherless import MotherlessIE
 from .motorsport import MotorsportIE
+from .movieclips import MovieClipsIE
 from .moviezine import MoviezineIE
 from .movshare import MovShareIE
 from .mtv import (
@@ -243,6 +244,7 @@ from .parliamentliveuk import ParliamentLiveUKIE
 from .patreon import PatreonIE
 from .pbs import PBSIE
 from .photobucket import PhotobucketIE
+from .playfm import PlayFMIE
 from .playvid import PlayvidIE
 from .podomatic import PodomaticIE
 from .pornhd import PornHdIE
@@ -263,7 +265,7 @@ from .rtbf import RTBFIE
 from .rtlnl import RtlXlIE
 from .rtlnow import RTLnowIE
 from .rts import RTSIE
-from .rtve import RTVEALaCartaIE
+from .rtve import RTVEALaCartaIE, RTVELiveIE
 from .ruhd import RUHDIE
 from .rutube import (
    RutubeIE,
@@ -274,6 +276,7 @@ from .rutube import (
 from .rutv import RUTVIE
 from .sapo import SapoIE
 from .savefrom import SaveFromIE
+from .sbs import SBSIE
 from .scivee import SciVeeIE
 from .screencast import ScreencastIE
 from .servingsys import ServingSysIE
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -16,6 +16,7 @@ from ..utils import (

    ExtractorError,
    HEADRequest,
+    orderedSet,
    parse_xml,
    smuggle_url,
    unescapeHTML,
@@ -289,6 +290,22 @@ class GenericIE(InfoExtractor):
                'description': 'Mario\'s life in the fast lane has never looked so good.',
            },
        },
+        # YouTube embed via <data-embed-url="">
+        {
+            'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
+            'info_dict': {
+                'id': 'jpSGZsgga_I',
+                'ext': 'mp4',
+                'title': 'Asphalt 8: Airborne - Launch Trailer',
+                'uploader': 'Gameloft',
+                'uploader_id': 'gameloft',
+                'upload_date': '20130821',
+                'description': 'md5:87bd95f13d8be3e7da87a5f2c443106a',
+            },
+            'params': {
+                'skip_download': True,
+            }
+        }
    ]

    def report_download_webpage(self, video_id):
@@ -479,6 +496,12 @@ class GenericIE(InfoExtractor):
        video_uploader = self._search_regex(
            r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')

+        # Helper method
+        def _playlist_from_matches(matches, getter, ie=None):
+            urlrs = orderedSet(self.url_result(getter(m), ie) for m in matches)
+            return self.playlist_result(
+                urlrs, playlist_id=video_id, playlist_title=video_title)
+
        # Look for BrightCove:
        bc_urls = BrightcoveIE._extract_brightcove_urls(webpage)
        if bc_urls:
@@ -514,6 +537,7 @@ class GenericIE(InfoExtractor):
        matches = re.findall(r'''(?x)
            (?:
                <iframe[^>]+?src=|
+                data-video-url=|
                <embed[^>]+?src=|
                embedSWF\(?:\s*
            )
@@ -522,19 +546,15 @@ class GenericIE(InfoExtractor):
                (?:embed|v)/.+?)
            \1''', webpage)
        if matches:
-            urlrs = [self.url_result(unescapeHTML(tuppl[1]), 'Youtube')
-                     for tuppl in matches]
-            return self.playlist_result(
-                urlrs, playlist_id=video_id, playlist_title=video_title)
+            return _playlist_from_matches(
+                matches, lambda m: unescapeHTML(m[1]), ie='Youtube')

        # Look for embedded Dailymotion player
        matches = re.findall(
            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/embed/video/.+?)\1', webpage)
        if matches:
-            urlrs = [self.url_result(unescapeHTML(tuppl[1]))
-                     for tuppl in matches]
-            return self.playlist_result(
-                urlrs, playlist_id=video_id, playlist_title=video_title)
+            return _playlist_from_matches(
+                matches, lambda m: unescapeHTML(m[1]))

        # Look for embedded Wistia player
        match = re.search(
@@ -648,10 +668,8 @@ class GenericIE(InfoExtractor):
        # Look for funnyordie embed
        matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
        if matches:
-            urlrs = [self.url_result(unescapeHTML(eurl), 'FunnyOrDie')
-                     for eurl in matches]
-            return self.playlist_result(
-                urlrs, playlist_id=video_id, playlist_title=video_title)
+            return _playlist_from_matches(
+                matches, getter=unescapeHTML, ie='FunnyOrDie')

        # Look for embedded RUTV player
        rutv_url = RUTVIE._extract_url(webpage)
@@ -713,6 +731,13 @@ class GenericIE(InfoExtractor):
        if mobj is not None:
            return self.url_result(mobj.group('url'), 'Yahoo')

+        # Look for embedded sbs.com.au player
+        mobj = re.search(
+            r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:www\.)sbs\.com\.au/ondemand/video/single/.+?)\1',
+            webpage)
+        if mobj is not None:
+            return self.url_result(mobj.group('url'), 'SBS')
+
        # Start with something easy: JW Player in SWFObject
        found = re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
        if not found:
--- a/youtube_dl/extractor/movieclips.py
+++ b/youtube_dl/extractor/movieclips.py
@@ -0,0 +1,78 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    compat_str,
+    clean_html,
+)
+
+
+class MovieClipsIE(InfoExtractor):
+    _VALID_URL = r'https?://movieclips\.com/(?P<id>[\da-zA-Z]+)(?:-(?P<display_id>[\da-z-]+))?'
+    _TEST = {
+        'url': 'http://movieclips.com/Wy7ZU-my-week-with-marilyn-movie-do-you-love-me/',
+        'info_dict': {
+            'id': 'Wy7ZU',
+            'display_id': 'my-week-with-marilyn-movie-do-you-love-me',
+            'ext': 'mp4',
+            'title': 'My Week with Marilyn - Do You Love Me?',
+            'description': 'md5:e86795bd332fe3cff461e7c8dc542acb',
+            'thumbnail': 're:^https?://.*\.jpg$',
+        },
+        'params': {
+            # rtmp download
+            'skip_download': True,
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        display_id = mobj.group('display_id')
+        show_id = display_id or video_id
+
+        config = self._download_xml(
+            'http://config.movieclips.com/player/config/%s' % video_id,
+            show_id, 'Downloading player config')
+
+        if config.find('./country-region').text == 'false':
+            raise ExtractorError(
+                '%s said: %s' % (self.IE_NAME, config.find('./region_alert').text), expected=True)
+
+        properties = config.find('./video/properties')
+        smil_file = properties.attrib['smil_file']
+
+        smil = self._download_xml(smil_file, show_id, 'Downloading SMIL')
+        base_url = smil.find('./head/meta').attrib['base']
+
+        formats = []
+        for video in smil.findall('./body/switch/video'):
+            vbr = int(video.attrib['system-bitrate']) / 1000
+            src = video.attrib['src']
+            formats.append({
+                'url': base_url,
+                'play_path': src,
+                'ext': src.split(':')[0],
+                'vbr': vbr,
+                'format_id': '%dk' % vbr,
+            })
+
+        self._sort_formats(formats)
+
+        title = '%s - %s' % (properties.attrib['clip_movie_title'], properties.attrib['clip_title'])
+        description = clean_html(compat_str(properties.attrib['clip_description']))
+        thumbnail = properties.attrib['image']
+        categories = properties.attrib['clip_categories'].split(',')
+
+        return {
+            'id': video_id,
+            'display_id': display_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'categories': categories,
+            'formats': formats,
+        }
--- a/youtube_dl/extractor/nuvid.py
+++ b/youtube_dl/extractor/nuvid.py
@@ -38,7 +38,7 @@ class NuvidIE(InfoExtractor):
            webpage = self._download_webpage(
                request, video_id, 'Downloading %s page' % format_id)
            video_url = self._html_search_regex(
-                r'<a href="([^"]+)"\s*>Continue to watch video', webpage, '%s video URL' % format_id, fatal=False)
+                r'<a\s+href="([^"]+)"\s+class="b_link">', webpage, '%s video URL' % format_id, fatal=False)
            if not video_url:
                continue
            formats.append({
@@ -49,19 +49,24 @@ class NuvidIE(InfoExtractor):
        webpage = self._download_webpage(
            'http://m.nuvid.com/video/%s' % video_id, video_id, 'Downloading video page')
        title = self._html_search_regex(
-            r'<div class="title">\s+<h2[^>]*>([^<]+)</h2>', webpage, 'title').strip()
-        thumbnail = self._html_search_regex(
-            r'href="(/thumbs/[^"]+)"[^>]*data-link_type="thumbs"',
-            webpage, 'thumbnail URL', fatal=False)
+            [r'<span title="([^"]+)">',
+             r'<div class="thumb-holder video">\s*<h5[^>]*>([^<]+)</h5>'], webpage, 'title').strip()
+        thumbnails = [
+            {
+                'url': thumb_url,
+            } for thumb_url in re.findall(r'<img src="([^"]+)" alt="" />', webpage)
+        ]
+        thumbnail = thumbnails[0]['url'] if thumbnails else None
        duration = parse_duration(self._html_search_regex(
-            r'Length:\s*<span>(\d{2}:\d{2})</span>',webpage, 'duration', fatal=False))
+            r'<i class="fa fa-clock-o"></i>\s*(\d{2}:\d{2})', webpage, 'duration', fatal=False))
        upload_date = unified_strdate(self._html_search_regex(
-            r'Added:\s*<span>(\d{4}-\d{2}-\d{2})</span>', webpage, 'upload date', fatal=False))
+            r'<i class="fa fa-user"></i>\s*(\d{4}-\d{2}-\d{2})', webpage, 'upload date', fatal=False))

        return {
            'id': video_id,
            'title': title,
-            'thumbnail': 'http://m.nuvid.com%s' % thumbnail,
+            'thumbnails': thumbnails,
+            'thumbnail': thumbnail,
            'duration': duration,
            'upload_date': upload_date,
            'age_limit': 18,
--- a/youtube_dl/extractor/patreon.py
+++ b/youtube_dl/extractor/patreon.py
@@ -6,7 +6,6 @@ import re

 from .common import InfoExtractor
 from ..utils import (
-    compat_urlparse,
    js_to_json,
 )

--- a/youtube_dl/extractor/playfm.py
+++ b/youtube_dl/extractor/playfm.py
@@ -0,0 +1,82 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_urllib_parse,
+    compat_urllib_request,
+    ExtractorError,
+    float_or_none,
+    int_or_none,
+)
+
+
+class PlayFMIE(InfoExtractor):
+    IE_NAME = 'play.fm'
+    _VALID_URL = r'https?://(?:www\.)?play\.fm/[^?#]*(?P<upload_date>[0-9]{8})(?P<id>[0-9]{6})(?:$|[?#])'
+
+    _TEST = {
+        'url': 'http://www.play.fm/recording/leipzigelectronicmusicbatofarparis_fr20140712137220',
+        'md5': 'c505f8307825a245d0c7ad1850001f22',
+        'info_dict': {
+            'id': '137220',
+            'ext': 'mp3',
+            'title': 'LEIPZIG ELECTRONIC MUSIC @ Batofar (Paris,FR) - 2014-07-12',
+            'uploader': 'Sven Tasnadi',
+            'uploader_id': 'sventasnadi',
+            'duration': 5627.428,
+            'upload_date': '20140712',
+            'view_count': int,
+            'thumbnail': 're:^https?://.*\.jpg$',
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        upload_date = mobj.group('upload_date')
+
+        rec_data = compat_urllib_parse.urlencode({'rec_id': video_id})
+        req = compat_urllib_request.Request(
+            'http://www.play.fm/flexRead/recording', data=rec_data)
+        req.add_header('Content-Type', 'application/x-www-form-urlencoded')
+        rec_doc = self._download_xml(req, video_id)
+
+        error_node = rec_doc.find('./error')
+        if error_node is not None:
+            raise ExtractorError('An error occured: %s (code %s)' % (
+                error_node.text, rec_doc.find('./status').text))
+
+        recording = rec_doc.find('./recording')
+        title = recording.find('./title').text
+        view_count = int_or_none(recording.find('./stats/playcount').text)
+        duration = float_or_none(recording.find('./duration').text, scale=1000)
+        thumbnail = recording.find('./image').text
+
+        artist = recording.find('./artists/artist')
+        uploader = artist.find('./name').text
+        uploader_id = artist.find('./slug').text
+
+        video_url = '%s//%s/%s/%s/offset/0/sh/%s/rec/%s/jingle/%s/loc/%s' % (
+            'http:', recording.find('./url').text,
+            recording.find('./_class').text, recording.find('./file_id').text,
+            rec_doc.find('./uuid').text, video_id,
+            rec_doc.find('./jingle/file_id').text,
+            'http%3A%2F%2Fwww.play.fm%2Fplayer',
+        )
+
+        return {
+            'id': video_id,
+            'url': video_url,
+            'ext': 'mp3',
+            'filesize': int_or_none(recording.find('./size').text),
+            'title': title,
+            'upload_date': upload_date,
+            'view_count': view_count,
+            'duration': duration,
+            'thumbnail': thumbnail,
+            'uploader': uploader,
+            'uploader_id': uploader_id,
+        }
--- a/youtube_dl/extractor/rtlnl.py
+++ b/youtube_dl/extractor/rtlnl.py
@@ -35,7 +35,6 @@ class RtlXlIE(InfoExtractor):
        info = self._download_json(
            'http://www.rtl.nl/system/s4m/vfd/version=2/uuid=%s/fmt=flash/' % uuid,
            uuid)
-        meta = info['meta']
        material = info['material'][0]
        episode_info = info['episodes'][0]

--- a/youtube_dl/extractor/rtve.py
+++ b/youtube_dl/extractor/rtve.py
@@ -1,21 +1,66 @@
 # encoding: utf-8
 from __future__ import unicode_literals

-import re
 import base64
+import re
+import time

 from .common import InfoExtractor
 from ..utils import (
    struct_unpack,
+    remove_end,
 )


+def _decrypt_url(png):
+    encrypted_data = base64.b64decode(png)
+    text_index = encrypted_data.find(b'tEXt')
+    text_chunk = encrypted_data[text_index - 4:]
+    length = struct_unpack('!I', text_chunk[:4])[0]
+    # Use bytearray to get integers when iterating in both python 2.x and 3.x
+    data = bytearray(text_chunk[8:8 + length])
+    data = [chr(b) for b in data if b != 0]
+    hash_index = data.index('#')
+    alphabet_data = data[:hash_index]
+    url_data = data[hash_index + 1:]
+
+    alphabet = []
+    e = 0
+    d = 0
+    for l in alphabet_data:
+        if d == 0:
+            alphabet.append(l)
+            d = e = (e + 1) % 4
+        else:
+            d -= 1
+    url = ''
+    f = 0
+    e = 3
+    b = 1
+    for letter in url_data:
+        if f == 0:
+            l = int(letter) * 10
+            f = 1
+        else:
+            if e == 0:
+                l += int(letter)
+                url += alphabet[l]
+                e = (b + 3) % 4
+                f = 0
+                b += 1
+            else:
+                e -= 1
+
+    return url
+
+
+
 class RTVEALaCartaIE(InfoExtractor):
    IE_NAME = 'rtve.es:alacarta'
    IE_DESC = 'RTVE a la carta'
    _VALID_URL = r'http://www\.rtve\.es/alacarta/videos/[^/]+/[^/]+/(?P<id>\d+)'

-    _TEST = {
+    _TESTS = [{
        'url': 'http://www.rtve.es/alacarta/videos/balonmano/o-swiss-cup-masculina-final-espana-suecia/2491869/',
        'md5': '1d49b7e1ca7a7502c56a4bf1b60f1b43',
        'info_dict': {
@@ -23,48 +68,15 @@ class RTVEALaCartaIE(InfoExtractor):
            'ext': 'mp4',
            'title': 'Balonmano - Swiss Cup masculina. Final: España-Suecia',
        },
-    }
-
-    def _decrypt_url(self, png):
-        encrypted_data = base64.b64decode(png)
-        text_index = encrypted_data.find(b'tEXt')
-        text_chunk = encrypted_data[text_index-4:]
-        length = struct_unpack('!I', text_chunk[:4])[0]
-        # Use bytearray to get integers when iterating in both python 2.x and 3.x
-        data = bytearray(text_chunk[8:8+length])
-        data = [chr(b) for b in data if b != 0]
-        hash_index = data.index('#')
-        alphabet_data = data[:hash_index]
-        url_data = data[hash_index+1:]
-
-        alphabet = []
-        e = 0
-        d = 0
-        for l in alphabet_data:
-            if d == 0:
-                alphabet.append(l)
-                d = e = (e + 1) % 4
-            else:
-                d -= 1
-        url = ''
-        f = 0
-        e = 3
-        b = 1
-        for letter in url_data:
-            if f == 0:
-                l = int(letter)*10
-                f = 1
-            else:
-                if e == 0:
-                    l += int(letter)
-                    url += alphabet[l]
-                    e = (b + 3) % 4
-                    f = 0
-                    b += 1
-                else:
-                    e -= 1
-
-        return url
+    }, {
+        'note': 'Live stream',
+        'url': 'http://www.rtve.es/alacarta/videos/television/24h-live/1694255/',
+        'info_dict': {
+            'id': '1694255',
+            'ext': 'flv',
+            'title': 'TODO',
+        }
+    }]

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
@@ -74,11 +86,57 @@ class RTVEALaCartaIE(InfoExtractor):
            video_id)['page']['items'][0]
        png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/default/videos/%s.png' % video_id
        png = self._download_webpage(png_url, video_id, 'Downloading url information')
-        video_url = self._decrypt_url(png)
+        video_url = _decrypt_url(png)

        return {
            'id': video_id,
            'title': info['title'],
            'url': video_url,
-            'thumbnail': info['image'],
+            'thumbnail': info.get('image'),
+            'page_url': url,
+        }
+
+
+class RTVELiveIE(InfoExtractor):
+    IE_NAME = 'rtve.es:live'
+    IE_DESC = 'RTVE.es live streams'
+    _VALID_URL = r'http://www\.rtve\.es/(?:deportes/directo|noticias|television)/(?P<id>[a-zA-Z0-9-]+)'
+
+    _TESTS = [{
+        'url': 'http://www.rtve.es/noticias/directo-la-1/',
+        'info_dict': {
+            'id': 'directo-la-1',
+            'ext': 'flv',
+            'title': 're:^La 1 de TVE [0-9]{4}-[0-9]{2}-[0-9]{2}Z[0-9]{6}$',
+        },
+        'params': {
+            'skip_download': 'live stream',
+        }
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        start_time = time.gmtime()
+        video_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, video_id)
+        player_url = self._search_regex(
+            r'<param name="movie" value="([^"]+)"/>', webpage, 'player URL')
+        title = remove_end(self._og_search_title(webpage), ' en directo')
+        title += ' ' + time.strftime('%Y-%m-%dZ%H%M%S', start_time)
+
+        vidplayer_id = self._search_regex(
+            r' id="vidplayer([0-9]+)"', webpage, 'internal video ID')
+        png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/default/videos/%s.png' % vidplayer_id
+        png = self._download_webpage(png_url, video_id, 'Downloading url information')
+        video_url = _decrypt_url(png)
+
+        return {
+            'id': video_id,
+            'ext': 'flv',
+            'title': title,
+            'url': video_url,
+            'app': 'rtve-live-live?ovpfv=2.1.2',
+            'player_url': player_url,
+            'rtmp_live': True,
        }
--- a/youtube_dl/extractor/sbs.py
+++ b/youtube_dl/extractor/sbs.py
@@ -0,0 +1,56 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+
+import json
+import re
+from .common import InfoExtractor
+from ..utils import (
+    js_to_json,
+    remove_end,
+)
+
+
+class SBSIE(InfoExtractor):
+    IE_DESC = 'sbs.com.au'
+    _VALID_URL = r'https?://(?:www\.)?sbs\.com\.au/ondemand/video/single/(?P<id>[0-9]+)/'
+
+    _TESTS = [{
+        # Original URL is handled by the generic IE which finds the iframe:
+        # http://www.sbs.com.au/thefeed/blog/2014/08/21/dingo-conservation
+        'url': 'http://www.sbs.com.au/ondemand/video/single/320403011771/?source=drupal&vertical=thefeed',
+        'md5': '3150cf278965eeabb5b4cea1c963fe0a',
+        'info_dict': {
+            'id': '320403011771',
+            'ext': 'flv',
+            'title': 'Dingo Conservation',
+            'description': 'Dingoes are on the brink of extinction; most of the animals we think are dingoes are in fact crossbred with wild dogs. This family run a dingo conservation park to prevent their extinction',
+            'thumbnail': 're:http://.*\.jpg',
+        },
+        'add_ies': ['generic'],
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        webpage = self._download_webpage(url, video_id)
+
+        release_urls_json = js_to_json(self._search_regex(
+            r'(?s)playerParams\.releaseUrls\s*=\s*(\{.*?\n\});\n',
+            webpage, ''))
+        release_urls = json.loads(release_urls_json)
+        theplatform_url = (
+            release_urls.get('progressive') or release_urls.get('standard'))
+
+        title = remove_end(self._og_search_title(webpage), ' (The Feed)')
+        description = self._html_search_meta('description', webpage)
+        thumbnail = self._og_search_thumbnail(webpage)
+
+        return {
+            '_type': 'url_transparent',
+            'id': video_id,
+            'url': theplatform_url,
+
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+        }
--- a/youtube_dl/extractor/vodlocker.py
+++ b/youtube_dl/extractor/vodlocker.py
@@ -44,7 +44,7 @@ class VodlockerIE(InfoExtractor):
                req, video_id, 'Downloading video page')

        title = self._search_regex(
-            r'id="file_title".*?>\s*(.*?)\s*<span', webpage, 'title')
+            r'id="file_title".*?>\s*(.*?)\s*<(?:br|span)', webpage, 'title')
        thumbnail = self._search_regex(
            r'image:\s*"(http[^\"]+)",', webpage, 'thumbnail')
        url = self._search_regex(
--- a/youtube_dl/extractor/xhamster.py
+++ b/youtube_dl/extractor/xhamster.py
@@ -14,7 +14,7 @@ from ..utils import (

 class XHamsterIE(InfoExtractor):
    """Information Extractor for xHamster"""
-    _VALID_URL = r'http://(?:www\.)?xhamster\.com/movies/(?P<id>[0-9]+)/(?P<seo>.+?)\.html(?:\?.*)?'
+    _VALID_URL = r'http://(?:.+?\.)?xhamster\.com/movies/(?P<id>[0-9]+)/(?P<seo>.+?)\.html(?:\?.*)?'
    _TESTS = [
        {
            'url': 'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html',
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -1285,6 +1285,12 @@ def remove_start(s, start):
    return s


+def remove_end(s, end):
+    if s.endswith(end):
+        return s[:-len(end)]
+    return s
+
+
 def url_basename(url):
    path = compat_urlparse.urlparse(url).path
    return path.strip(u'/').split(u'/')[-1]
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@

-__version__ = '2014.08.22.2'
+__version__ = '2014.08.23'
Author	SHA1	Message	Date
Philipp Hagemeister	49f3c16543	release 2014.08.23	2014-08-23 15:24:31 +02:00
Philipp Hagemeister	2ef6fcb5d8	[sbs] Add new extractor (Fixes #3566 )	2014-08-23 15:20:56 +02:00
Philipp Hagemeister	38fc045253	[rtlnl] Remove unused code	2014-08-23 15:05:21 +02:00
Philipp Hagemeister	af1fd929c6	[patreon] Remove unused import	2014-08-23 15:04:11 +02:00
Philipp Hagemeister	b7b04c9234	[vodlocker] Allow title to end with a <br>	2014-08-23 14:39:47 +02:00
Sergey M․	bc0bb6fd30	[movieclips] Add extractor (Closes #3554 )	2014-08-23 17:44:56 +07:00
Philipp Hagemeister	430826c9d4	Merge pull request #3568 from MikeCol/xhamster_load changed _VALID_URL to allow for country specific subdomains	2014-08-22 22:46:42 +02:00
MikeCol	68909f0c4e	changed _VALID_URL to allow for country specific prefixes	2014-08-22 22:17:07 +02:00
Philipp Hagemeister	9d048a17d8	[rtve.es:live] Start supporting the 24h channel	2014-08-22 18:47:49 +02:00
Philipp Hagemeister	492641d10a	release 2014.08.22.3	2014-08-22 18:41:43 +02:00
Philipp Hagemeister	2b9faf5542	[rtve] Add support for live stream At the moment, only RTVE-1 seems to work flawlessly. -2 seems geoblocked right now. -TDP doesn't seem to be available outside of Spain.	2014-08-22 18:40:28 +02:00
Philipp Hagemeister	ed2d6a1960	[generic] Simplify playlist support (#2948 )	2014-08-22 18:19:56 +02:00
Philipp Hagemeister	be843678b1	[YouTubeDL] Correct handling of age_limit = None in result	2014-08-22 17:46:57 +02:00
Philipp Hagemeister	c71dfccc98	Merge remote-tracking branch 'anovicecodemonkey/generic-data-video-url' Conflicts: youtube_dl/extractor/generic.py	2014-08-22 17:40:36 +02:00
Philipp Hagemeister	1a9ccac7c1	Merge remote-tracking branch 'origin/master'	2014-08-22 17:38:11 +02:00
Philipp Hagemeister	e330d59abb	[playfm] Add extractor (Fixes #3538 )	2014-08-22 17:38:06 +02:00
Sergey M․	394df6d7d0	[nuvid] Adapt to latest layout changes	2014-08-22 21:41:51 +07:00
Philipp Hagemeister	218f754940	[README] Add thumbnail to _TEST example While it's not mandatory, extractors are highly encouraged to provide a thumbnail field.	2014-08-22 11:30:49 +02:00
Philipp Hagemeister	a053c3493a	[test_YoutubeDL] Reorder formats (#3542 )	2014-08-22 03:44:30 +02:00
anovicecodemonkey	37e3cbe22e	Move duplicate check to generic.py	2014-06-01 01:16:35 +09:30
anovicecodemonkey	610134730a	Add a _TEST_	2014-05-21 19:25:37 +09:30
anovicecodemonkey	212a5e28ba	Add a duplicate check to /extractor/common.py playlist_result function	2014-05-21 19:04:55 +09:30
anovicecodemonkey	3442b30ab2	[generic] Support data-video-url for YouTube embeds (Fixes #2862 )	2014-05-18 23:15:09 +09:30