release 2015.05.04

[escapist] Add uploader to tests
[escapist] Modernize
2025-12-26 09:42:33 +01:00 · 2015-05-04 15:12:48 +02:00 · 2015-05-04 19:06:07 +06:00 · 2015-05-04 19:04:49 +06:00 · 2015-05-04 19:01:08 +06:00 · 2015-05-04 19:00:34 +06:00
40 changed files with 772 additions and 444 deletions
--- a/README.md
+++ b/README.md
@@ -133,7 +133,7 @@ which means you can modify it, redistribute it or use it however you like.
    --no-mtime                       Do not use the Last-modified header to set the file modification time
    --write-description              Write video description to a .description file
    --write-info-json                Write video metadata to a .info.json file
-    --write-annotations              Write video annotations to a .annotation file
+    --write-annotations              Write video annotations to a .annotations.xml file
    --load-info FILE                 JSON file containing the video information (created with the "--write-info-json" option)
    --cookies FILE                   File to read cookies from and dump cookie jar in
    --cache-dir DIR                  Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl
@@ -216,7 +216,7 @@ which means you can modify it, redistribute it or use it however you like.
    --recode-video FORMAT            Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm|mkv)
    -k, --keep-video                 Keep the video file on disk after the post-processing; the video is erased by default
    --no-post-overwrites             Do not overwrite post-processed files; the post-processed files are overwritten by default
-    --embed-subs                     Embed subtitles in the video (only for mp4 videos)
+    --embed-subs                     Embed subtitles in the video (only for mkv and mp4 videos)
    --embed-thumbnail                Embed thumbnail in the audio as cover art
    --add-metadata                   Write metadata to the video file
    --metadata-from-title FORMAT     Parse additional metadata like song title / artist from the video title. The format syntax is the same as --output, the parsed
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -44,6 +44,7 @@
 - **audiomack**
 - **audiomack:album**
 - **Azubu**
+ - **BaiduVideo**
 - **bambuser**
 - **bambuser:channel**
 - **Bandcamp**
@@ -155,6 +156,7 @@
 - **FootyRoom**
 - **Foxgay**
 - **FoxNews**
+ - **FoxSports**
 - **france2.fr:generation-quoi**
 - **FranceCulture**
 - **FranceInter**
@@ -184,7 +186,6 @@
 - **Golem**
 - **GorillaVid**: GorillaVid.in, daclips.in, movpod.in, fastvideo.in and realvid.net
 - **Goshgay**
- - **Grooveshark**
 - **Groupon**
 - **Hark**
 - **HearThisAt**
--- a/test/test_YoutubeDL.py
+++ b/test/test_YoutubeDL.py
@@ -237,7 +237,7 @@ class TestFormatSelection(unittest.TestCase):
            f2['url'] = 'url:' + f2id

            info_dict = _make_result([f1, f2], extractor='youtube')
-            ydl = YDL()
+            ydl = YDL({'format': 'best/bestvideo'})
            yie = YoutubeIE(ydl)
            yie._sort_formats(info_dict['formats'])
            ydl.process_ie_result(info_dict)
@@ -245,7 +245,7 @@ class TestFormatSelection(unittest.TestCase):
            self.assertEqual(downloaded['format_id'], f1id)

            info_dict = _make_result([f2, f1], extractor='youtube')
-            ydl = YDL()
+            ydl = YDL({'format': 'best/bestvideo'})
            yie = YoutubeIE(ydl)
            yie._sort_formats(info_dict['formats'])
            ydl.process_ie_result(info_dict)
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -41,6 +41,8 @@ from youtube_dl.utils import (
    sanitize_filename,
    sanitize_path,
    sanitize_url_path_consecutive_slashes,
+    prepend_extension,
+    replace_extension,
    shell_quote,
    smuggle_url,
    str_to_int,
@@ -193,6 +195,22 @@ class TestUtil(unittest.TestCase):
            sanitize_url_path_consecutive_slashes('http://hostname/abc//'),
            'http://hostname/abc/')

+    def test_prepend_extension(self):
+        self.assertEqual(prepend_extension('abc.ext', 'temp'), 'abc.temp.ext')
+        self.assertEqual(prepend_extension('abc.ext', 'temp', 'ext'), 'abc.temp.ext')
+        self.assertEqual(prepend_extension('abc.unexpected_ext', 'temp', 'ext'), 'abc.unexpected_ext.temp')
+        self.assertEqual(prepend_extension('abc', 'temp'), 'abc.temp')
+        self.assertEqual(prepend_extension('.abc', 'temp'), '.abc.temp')
+        self.assertEqual(prepend_extension('.abc.ext', 'temp'), '.abc.temp.ext')
+
+    def test_replace_extension(self):
+        self.assertEqual(replace_extension('abc.ext', 'temp'), 'abc.temp')
+        self.assertEqual(replace_extension('abc.ext', 'temp', 'ext'), 'abc.temp')
+        self.assertEqual(replace_extension('abc.unexpected_ext', 'temp', 'ext'), 'abc.unexpected_ext.temp')
+        self.assertEqual(replace_extension('abc', 'temp'), 'abc.temp')
+        self.assertEqual(replace_extension('.abc', 'temp'), '.abc.temp')
+        self.assertEqual(replace_extension('.abc.ext', 'temp'), '.abc.temp')
+
    def test_ordered_set(self):
        self.assertEqual(orderedSet([1, 1, 2, 3, 4, 4, 5, 6, 7, 3, 5]), [1, 2, 3, 4, 5, 6, 7])
        self.assertEqual(orderedSet([]), [])
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -71,6 +71,7 @@ from .utils import (
    write_string,
    YoutubeDLHandler,
    prepend_extension,
+    replace_extension,
    args_to_str,
    age_restricted,
 )
@@ -914,15 +915,16 @@ class YoutubeDL(object):
        if not available_formats:
            return None

-        if format_spec == 'best' or format_spec is None:
-            return available_formats[-1]
-        elif format_spec == 'worst':
+        if format_spec in ['best', 'worst', None]:
+            format_idx = 0 if format_spec == 'worst' else -1
            audiovideo_formats = [
                f for f in available_formats
                if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
            if audiovideo_formats:
-                return audiovideo_formats[0]
-            return available_formats[0]
+                return audiovideo_formats[format_idx]
+            # for audio only urls, select the best/worst audio format
+            elif all(f.get('acodec') != 'none' for f in available_formats):
+                return available_formats[format_idx]
        elif format_spec == 'bestaudio':
            audio_formats = [
                f for f in available_formats
@@ -1269,7 +1271,7 @@ class YoutubeDL(object):
            return

        if self.params.get('writedescription', False):
-            descfn = filename + '.description'
+            descfn = replace_extension(filename, 'description', info_dict.get('ext'))
            if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
                self.to_screen('[info] Video description is already present')
            elif info_dict.get('description') is None:
@@ -1284,7 +1286,7 @@ class YoutubeDL(object):
                    return

        if self.params.get('writeannotations', False):
-            annofn = filename + '.annotations.xml'
+            annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
            if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
                self.to_screen('[info] Video annotations are already present')
            else:
@@ -1331,13 +1333,13 @@ class YoutubeDL(object):
                    return

        if self.params.get('writeinfojson', False):
-            infofn = os.path.splitext(filename)[0] + '.info.json'
+            infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
            if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
                self.to_screen('[info] Video description metadata is already present')
            else:
                self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
                try:
-                    write_json_file(info_dict, infofn)
+                    write_json_file(self.filter_requested_info(info_dict), infofn)
                except (OSError, IOError):
                    self.report_error('Cannot write metadata to JSON file ' + infofn)
                    return
@@ -1381,11 +1383,18 @@ class YoutubeDL(object):
                        # TODO: Check acodec/vcodec
                        return False

+                    filename_real_ext = os.path.splitext(filename)[1][1:]
+                    filename_wo_ext = (
+                        os.path.splitext(filename)[0]
+                        if filename_real_ext == info_dict['ext']
+                        else filename)
                    requested_formats = info_dict['requested_formats']
                    if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
-                        filename = os.path.splitext(filename)[0] + '.mkv'
-                        self.report_warning('You have requested formats uncompatible for merge. '
+                        info_dict['ext'] = 'mkv'
+                        self.report_warning('You have requested formats incompatible for merge. '
                                            'The formats will be merged into mkv')
+                    # Ensure filename always has a correct extension for successful merge
+                    filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
                    if os.path.exists(encodeFilename(filename)):
                        self.to_screen(
                            '[download] %s has already been downloaded and '
@@ -1395,7 +1404,7 @@ class YoutubeDL(object):
                            new_info = dict(info_dict)
                            new_info.update(f)
                            fname = self.prepare_filename(new_info)
-                            fname = prepend_extension(fname, 'f%s' % f['format_id'])
+                            fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext'])
                            downloaded.append(fname)
                            partial_success = dl(fname, new_info)
                            success = success and partial_success
@@ -1487,7 +1496,7 @@ class YoutubeDL(object):
                [info_filename], mode='r',
                openhook=fileinput.hook_encoded('utf-8'))) as f:
            # FileInput doesn't have a read method, we can't call json.load
-            info = json.loads('\n'.join(f))
+            info = self.filter_requested_info(json.loads('\n'.join(f)))
        try:
            self.process_ie_result(info, download=True)
        except DownloadError:
@@ -1499,6 +1508,12 @@ class YoutubeDL(object):
                raise
        return self._download_retcode

+    @staticmethod
+    def filter_requested_info(info_dict):
+        return dict(
+            (k, v) for k, v in info_dict.items()
+            if k not in ['requested_formats', 'requested_subtitles'])
+
    def post_process(self, filename, ie_info):
        """Run all the postprocessors on the given file."""
        info = dict(ie_info)
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@@ -46,11 +46,6 @@ try:
 except ImportError:  # Python 2
    import htmlentitydefs as compat_html_entities

-try:
-    import html.parser as compat_html_parser
-except ImportError:  # Python 2
-    import HTMLParser as compat_html_parser
-
 try:
    import http.client as compat_http_client
 except ImportError:  # Python 2
@@ -404,7 +399,6 @@ __all__ = [
    'compat_getenv',
    'compat_getpass',
    'compat_html_entities',
-    'compat_html_parser',
    'compat_http_client',
    'compat_http_server',
    'compat_kwargs',
--- a/youtube_dl/downloader/http.py
+++ b/youtube_dl/downloader/http.py
@@ -28,13 +28,8 @@ class HttpFD(FileDownloader):
        add_headers = info_dict.get('http_headers')
        if add_headers:
            headers.update(add_headers)
-        data = info_dict.get('http_post_data')
-        http_method = info_dict.get('http_method')
-        basic_request = compat_urllib_request.Request(url, data, headers)
-        request = compat_urllib_request.Request(url, data, headers)
-        if http_method is not None:
-            basic_request.get_method = lambda: http_method
-            request.get_method = lambda: http_method
+        basic_request = compat_urllib_request.Request(url, None, headers)
+        request = compat_urllib_request.Request(url, None, headers)

        is_test = self.params.get('test', False)

--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@@ -32,6 +32,7 @@ from .atresplayer import AtresPlayerIE
 from .atttechchannel import ATTTechChannelIE
 from .audiomack import AudiomackIE, AudiomackAlbumIE
 from .azubu import AzubuIE
+from .baidu import BaiduVideoIE
 from .bambuser import BambuserIE, BambuserChannelIE
 from .bandcamp import BandcampIE, BandcampAlbumIE
 from .bbccouk import BBCCoUkIE
@@ -161,6 +162,7 @@ from .footyroom import FootyRoomIE
 from .fourtube import FourTubeIE
 from .foxgay import FoxgayIE
 from .foxnews import FoxNewsIE
+from .foxsports import FoxSportsIE
 from .franceculture import FranceCultureIE
 from .franceinter import FranceInterIE
 from .francetv import (
@@ -198,7 +200,6 @@ from .googleplus import GooglePlusIE
 from .googlesearch import GoogleSearchIE
 from .gorillavid import GorillaVidIE
 from .goshgay import GoshgayIE
-from .grooveshark import GroovesharkIE
 from .groupon import GrouponIE
 from .hark import HarkIE
 from .hearthisat import HearThisAtIE
--- a/youtube_dl/extractor/baidu.py
+++ b/youtube_dl/extractor/baidu.py
@@ -0,0 +1,68 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_urlparse
+
+
+class BaiduVideoIE(InfoExtractor):
+    _VALID_URL = r'http://v\.baidu\.com/(?P<type>[a-z]+)/(?P<id>\d+)\.htm'
+    _TESTS = [{
+        'url': 'http://v.baidu.com/comic/1069.htm?frp=bdbrand&q=%E4%B8%AD%E5%8D%8E%E5%B0%8F%E5%BD%93%E5%AE%B6',
+        'info_dict': {
+            'id': '1069',
+            'title': '中华小当家 TV版 (全52集)',
+            'description': 'md5:395a419e41215e531c857bb037bbaf80',
+        },
+        'playlist_count': 52,
+    }, {
+        'url': 'http://v.baidu.com/show/11595.htm?frp=bdbrand',
+        'info_dict': {
+            'id': '11595',
+            'title': 're:^奔跑吧兄弟',
+            'description': 'md5:1bf88bad6d850930f542d51547c089b8',
+        },
+        'playlist_mincount': 3,
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        playlist_id = mobj.group('id')
+        category = category2 = mobj.group('type')
+        if category == 'show':
+            category2 = 'tvshow'
+
+        webpage = self._download_webpage(url, playlist_id)
+
+        playlist_title = self._html_search_regex(
+            r'title\s*:\s*(["\'])(?P<title>[^\']+)\1', webpage,
+            'playlist title', group='title')
+        playlist_description = self._html_search_regex(
+            r'<input[^>]+class="j-data-intro"[^>]+value="([^"]+)"/>', webpage,
+            playlist_id, 'playlist description')
+
+        site = self._html_search_regex(
+            r'filterSite\s*:\s*["\']([^"]*)["\']', webpage,
+            'primary provider site')
+        api_result = self._download_json(
+            'http://v.baidu.com/%s_intro/?dtype=%sPlayUrl&id=%s&site=%s' % (
+                category, category2, playlist_id, site),
+            playlist_id, 'Get playlist links')
+
+        entries = []
+        for episode in api_result[0]['episodes']:
+            episode_id = '%s_%s' % (playlist_id, episode['episode'])
+
+            redirect_page = self._download_webpage(
+                compat_urlparse.urljoin(url, episode['url']), episode_id,
+                note='Download Baidu redirect page')
+            real_url = self._html_search_regex(
+                r'location\.replace\("([^"]+)"\)', redirect_page, 'real URL')
+
+            entries.append(self.url_result(
+                real_url, video_title=episode['single_title']))
+
+        return self.playlist_result(
+            entries, playlist_id, playlist_title, playlist_description)
--- a/youtube_dl/extractor/bbccouk.py
+++ b/youtube_dl/extractor/bbccouk.py
@@ -3,7 +3,10 @@ from __future__ import unicode_literals
 import xml.etree.ElementTree

 from .common import InfoExtractor
-from ..utils import ExtractorError
+from ..utils import (
+    ExtractorError,
+    int_or_none,
+)
 from ..compat import compat_HTTPError


@@ -112,6 +115,20 @@ class BBCCoUkIE(InfoExtractor):
                # rtmp download
                'skip_download': True,
            }
+        }, {
+            'url': 'http://www.bbc.co.uk/iplayer/episode/b054fn09/ad/natural-world-20152016-2-super-powered-owls',
+            'info_dict': {
+                'id': 'p02n76xf',
+                'ext': 'flv',
+                'title': 'Natural World, 2015-2016: 2. Super Powered Owls',
+                'description': 'md5:e4db5c937d0e95a7c6b5e654d429183d',
+                'duration': 3540,
+            },
+            'params': {
+                # rtmp download
+                'skip_download': True,
+            },
+            'skip': 'geolocation',
        }, {
            'url': 'http://www.bbc.co.uk/iplayer/playlist/p01dvks4',
            'only_matching': True,
@@ -326,16 +343,27 @@ class BBCCoUkIE(InfoExtractor):

        webpage = self._download_webpage(url, group_id, 'Downloading video page')

-        programme_id = self._search_regex(
-            r'"vpid"\s*:\s*"([\da-z]{8})"', webpage, 'vpid', fatal=False, default=None)
+        programme_id = None
+
+        tviplayer = self._search_regex(
+            r'mediator\.bind\(({.+?})\s*,\s*document\.getElementById',
+            webpage, 'player', default=None)
+
+        if tviplayer:
+            player = self._parse_json(tviplayer, group_id).get('player', {})
+            duration = int_or_none(player.get('duration'))
+            programme_id = player.get('vpid')
+
+        if not programme_id:
+            programme_id = self._search_regex(
+                r'"vpid"\s*:\s*"([\da-z]{8})"', webpage, 'vpid', fatal=False, default=None)
+
        if programme_id:
-            player = self._download_json(
-                'http://www.bbc.co.uk/iplayer/episode/%s.json' % group_id,
-                group_id)['jsConf']['player']
-            title = player['title']
-            description = player['subtitle']
-            duration = player['duration']
            formats, subtitles = self._download_media_selector(programme_id)
+            title = self._og_search_title(webpage)
+            description = self._search_regex(
+                r'<p class="medium-description">([^<]+)</p>',
+                webpage, 'description', fatal=False)
        else:
            programme_id, title, description, duration, formats, subtitles = self._download_playlist(group_id)

@@ -345,6 +373,7 @@ class BBCCoUkIE(InfoExtractor):
            'id': programme_id,
            'title': title,
            'description': description,
+            'thumbnail': self._og_search_thumbnail(webpage, default=None),
            'duration': duration,
            'formats': formats,
            'subtitles': subtitles,
--- a/youtube_dl/extractor/bilibili.py
+++ b/youtube_dl/extractor/bilibili.py
@@ -2,6 +2,7 @@
 from __future__ import unicode_literals

 import re
+import itertools

 from .common import InfoExtractor
 from ..utils import (
@@ -14,18 +15,25 @@ from ..utils import (
 class BiliBiliIE(InfoExtractor):
    _VALID_URL = r'http://www\.bilibili\.(?:tv|com)/video/av(?P<id>[0-9]+)/'

-    _TEST = {
+    _TESTS = [{
        'url': 'http://www.bilibili.tv/video/av1074402/',
        'md5': '2c301e4dab317596e837c3e7633e7d86',
        'info_dict': {
-            'id': '1074402',
+            'id': '1074402_part1',
            'ext': 'flv',
            'title': '【金坷垃】金泡沫',
            'duration': 308,
            'upload_date': '20140420',
            'thumbnail': 're:^https?://.+\.jpg',
        },
-    }
+    }, {
+        'url': 'http://www.bilibili.com/video/av1041170/',
+        'info_dict': {
+            'id': '1041170',
+            'title': '【BD1080P】刀语【诸神&异域】',
+        },
+        'playlist_count': 9,
+    }]

    def _real_extract(self, url):
        video_id = self._match_id(url)
@@ -57,19 +65,14 @@ class BiliBiliIE(InfoExtractor):

        cid = self._search_regex(r'cid=(\d+)', webpage, 'cid')

+        entries = []
+
        lq_doc = self._download_xml(
            'http://interface.bilibili.com/v_cdn_play?appkey=1&cid=%s' % cid,
            video_id,
            note='Downloading LQ video info'
        )
-        lq_durl = lq_doc.find('./durl')
-        formats = [{
-            'format_id': 'lq',
-            'quality': 1,
-            'url': lq_durl.find('./url').text,
-            'filesize': int_or_none(
-                lq_durl.find('./size'), get_attr='text'),
-        }]
+        lq_durls = lq_doc.findall('./durl')

        hq_doc = self._download_xml(
            'http://interface.bilibili.com/playurl?appkey=1&cid=%s' % cid,
@@ -77,23 +80,44 @@ class BiliBiliIE(InfoExtractor):
            note='Downloading HQ video info',
            fatal=False,
        )
-        if hq_doc is not False:
-            hq_durl = hq_doc.find('./durl')
-            formats.append({
-                'format_id': 'hq',
-                'quality': 2,
-                'ext': 'flv',
-                'url': hq_durl.find('./url').text,
+        hq_durls = hq_doc.findall('./durl') if hq_doc is not False else itertools.repeat(None)
+
+        assert len(lq_durls) == len(hq_durls)
+
+        i = 1
+        for lq_durl, hq_durl in zip(lq_durls, hq_durls):
+            formats = [{
+                'format_id': 'lq',
+                'quality': 1,
+                'url': lq_durl.find('./url').text,
                'filesize': int_or_none(
-                    hq_durl.find('./size'), get_attr='text'),
+                    lq_durl.find('./size'), get_attr='text'),
+            }]
+            if hq_durl:
+                formats.append({
+                    'format_id': 'hq',
+                    'quality': 2,
+                    'ext': 'flv',
+                    'url': hq_durl.find('./url').text,
+                    'filesize': int_or_none(
+                        hq_durl.find('./size'), get_attr='text'),
+                })
+            self._sort_formats(formats)
+
+            entries.append({
+                'id': '%s_part%d' % (video_id, i),
+                'title': title,
+                'formats': formats,
+                'duration': duration,
+                'upload_date': upload_date,
+                'thumbnail': thumbnail,
            })

-        self._sort_formats(formats)
+            i += 1
+
        return {
+            '_type': 'multi_video',
+            'entries': entries,
            'id': video_id,
-            'title': title,
-            'formats': formats,
-            'duration': duration,
-            'upload_date': upload_date,
-            'thumbnail': thumbnail,
+            'title': title
        }
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -47,7 +47,7 @@ class InfoExtractor(object):
    information possibly downloading the video to the file system, among
    other possible outcomes.

-    The type field determines the the type of the result.
+    The type field determines the type of the result.
    By far the most common value (and the default if _type is missing) is
    "video", which indicates a single video.

@@ -111,11 +111,8 @@ class InfoExtractor(object):
                                  (quality takes higher priority)
                                 -1 for default (order by other properties),
                                 -2 or smaller for less than default.
-                    * http_method  HTTP method to use for the download.
                    * http_headers  A dictionary of additional HTTP headers
                                 to add to the request.
-                    * http_post_data  Additional data to send with a POST
-                                 request.
                    * stretched_ratio  If given and not 1, indicates that the
                                 video's pixels are not square.
                                 width : height ratio as float.
@@ -572,7 +569,7 @@ class InfoExtractor(object):

    def _get_login_info(self):
        """
-        Get the the login info as (username, password)
+        Get the login info as (username, password)
        It will look in the netrc file using the _NETRC_MACHINE value
        If there's no info available, return (None, None)
        """
--- a/youtube_dl/extractor/dailymotion.py
+++ b/youtube_dl/extractor/dailymotion.py
@@ -85,7 +85,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor):

    def _real_extract(self, url):
        video_id = self._match_id(url)
-        url = 'http://www.dailymotion.com/video/%s' % video_id
+        url = 'https://www.dailymotion.com/video/%s' % video_id

        # Retrieve video webpage to extract further information
        request = self._build_request(url)
@@ -110,7 +110,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
        if mobj is not None:
            video_upload_date = mobj.group(3) + mobj.group(2) + mobj.group(1)

-        embed_url = 'http://www.dailymotion.com/embed/video/%s' % video_id
+        embed_url = 'https://www.dailymotion.com/embed/video/%s' % video_id
        embed_request = self._build_request(embed_url)
        embed_page = self._download_webpage(
            embed_request, video_id, 'Downloading embed page')
--- a/youtube_dl/extractor/dreisat.py
+++ b/youtube_dl/extractor/dreisat.py
@@ -11,19 +11,25 @@ from ..utils import (

 class DreiSatIE(InfoExtractor):
    IE_NAME = '3sat'
-    _VALID_URL = r'(?:http://)?(?:www\.)?3sat\.de/mediathek/(?:index\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$'
-    _TEST = {
-        'url': 'http://www.3sat.de/mediathek/index.php?mode=play&obj=45918',
-        'md5': 'be37228896d30a88f315b638900a026e',
-        'info_dict': {
-            'id': '45918',
-            'ext': 'mp4',
-            'title': 'Waidmannsheil',
-            'description': 'md5:cce00ca1d70e21425e72c86a98a56817',
-            'uploader': '3sat',
-            'upload_date': '20140913'
-        }
-    }
+    _VALID_URL = r'(?:http://)?(?:www\.)?3sat\.de/mediathek/(?:index\.php|mediathek\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$'
+    _TESTS = [
+        {
+            'url': 'http://www.3sat.de/mediathek/index.php?mode=play&obj=45918',
+            'md5': 'be37228896d30a88f315b638900a026e',
+            'info_dict': {
+                'id': '45918',
+                'ext': 'mp4',
+                'title': 'Waidmannsheil',
+                'description': 'md5:cce00ca1d70e21425e72c86a98a56817',
+                'uploader': '3sat',
+                'upload_date': '20140913'
+            }
+        },
+        {
+            'url': 'http://www.3sat.de/mediathek/mediathek.php?mode=play&obj=51066',
+            'only_matching': True,
+        },
+    ]

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
--- a/youtube_dl/extractor/escapist.py
+++ b/youtube_dl/extractor/escapist.py
@@ -8,7 +8,8 @@ from ..compat import compat_urllib_request
 from ..utils import (
    determine_ext,
    clean_html,
-    qualities,
+    int_or_none,
+    float_or_none,
 )


@@ -36,10 +37,10 @@ def _decrypt_config(key, string):


 class EscapistIE(InfoExtractor):
-    _VALID_URL = r'https?://?(www\.)?escapistmagazine\.com/videos/view/[^/?#]+/(?P<id>[0-9]+)-[^/?#]*(?:$|[?#])'
+    _VALID_URL = r'https?://?(?:www\.)?escapistmagazine\.com/videos/view/[^/?#]+/(?P<id>[0-9]+)-[^/?#]*(?:$|[?#])'
    _TESTS = [{
        'url': 'http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate',
-        'md5': 'c6793dbda81388f4264c1ba18684a74d',
+        'md5': 'ab3a706c681efca53f0a35f1415cf0d1',
        'info_dict': {
            'id': '6618',
            'ext': 'mp4',
@@ -47,10 +48,11 @@ class EscapistIE(InfoExtractor):
            'title': "Breaking Down Baldur's Gate",
            'thumbnail': 're:^https?://.*\.jpg$',
            'duration': 264,
+            'uploader': 'The Escapist',
        }
    }, {
        'url': 'http://www.escapistmagazine.com/videos/view/zero-punctuation/10044-Evolve-One-vs-Multiplayer',
-        'md5': 'cf8842a8a46444d241f9a9980d7874f2',
+        'md5': '9e8c437b0dbb0387d3bd3255ca77f6bf',
        'info_dict': {
            'id': '10044',
            'ext': 'mp4',
@@ -58,6 +60,7 @@ class EscapistIE(InfoExtractor):
            'title': 'Evolve - One vs Multiplayer',
            'thumbnail': 're:^https?://.*\.jpg$',
            'duration': 304,
+            'uploader': 'The Escapist',
        }
    }]

@@ -65,35 +68,33 @@ class EscapistIE(InfoExtractor):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)

-        imsVideo = self._parse_json(
+        ims_video = self._parse_json(
            self._search_regex(
                r'imsVideo\.play\(({.+?})\);', webpage, 'imsVideo'),
            video_id)
-        video_id = imsVideo['videoID']
-        key = imsVideo['hash']
+        video_id = ims_video['videoID']
+        key = ims_video['hash']

-        quality = qualities(['lq', 'hq', 'hd'])
+        config_req = compat_urllib_request.Request(
+            'http://www.escapistmagazine.com/videos/'
+            'vidconfig.php?videoID=%s&hash=%s' % (video_id, key))
+        config_req.add_header('Referer', url)
+        config = self._download_webpage(config_req, video_id, 'Downloading video config')

-        formats = []
-        for q in ['lq', 'hq', 'hd']:
-            config_req = compat_urllib_request.Request('http://www.escapistmagazine.com/videos/'
-                'vidconfig.php?videoID=%s&hash=%s&quality=%s' % (video_id, key, 'mp4_' + q))
-            config_req.add_header('Referer', url)
-            config = self._download_webpage(config_req, video_id, 'Downloading video config ' + q.upper())
+        data = json.loads(_decrypt_config(key, config))

-            data = json.loads(_decrypt_config(key, config))
+        video_data = data['videoData']

-            title = clean_html(data['videoData']['title'])
-            duration = data['videoData']['duration'] / 1000
-
-            for i, v in enumerate(data['files']['videos']):
-
-                formats.append({
-                    'url': v,
-                    'format_id': determine_ext(v) + '_' + q + str(i),
-                    'quality': quality(q),
-                    })
+        title = clean_html(video_data['title'])
+        duration = float_or_none(video_data.get('duration'), 1000)
+        uploader = video_data.get('publisher')

+        formats = [{
+            'url': video['src'],
+            'format_id': '%s-%sp' % (determine_ext(video['src']), video['res']),
+            'height': int_or_none(video.get('res')),
+        } for video in data['files']['videos']]
+        self._sort_formats(formats)

        return {
            'id': video_id,
@@ -102,4 +103,5 @@ class EscapistIE(InfoExtractor):
            'thumbnail': self._og_search_thumbnail(webpage),
            'description': self._og_search_description(webpage),
            'duration': duration,
+            'uploader': uploader,
        }
--- a/youtube_dl/extractor/foxsports.py
+++ b/youtube_dl/extractor/foxsports.py
@@ -0,0 +1,32 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import smuggle_url
+
+
+class FoxSportsIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?foxsports\.com/video\?vid=(?P<id>\d+)'
+
+    _TEST = {
+        'url': 'http://www.foxsports.com/video?vid=432609859715',
+        'info_dict': {
+            'id': 'gA0bHB3Ladz3',
+            'ext': 'flv',
+            'title': 'Courtney Lee on going up 2-0 in series vs. Blazers',
+            'description': 'Courtney Lee talks about Memphis being focused.',
+        },
+        'add_ie': ['ThePlatform'],
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        config = self._parse_json(
+            self._search_regex(
+                r"data-player-config='([^']+)'", webpage, 'data player config'),
+            video_id)
+
+        return self.url_result(smuggle_url(
+            config['releaseURL'] + '&manifest=f4m', {'force_smil_url': True}))
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -413,6 +413,19 @@ class GenericIE(InfoExtractor):
                'thumbnail': 're:^https?://.*\.jpg$',
            },
        },
+        # MLB articles
+        {
+            'url': 'http://m.mlb.com/news/article/118550098/blue-jays-kevin-pillar-goes-spidey-up-the-wall-to-rob-tim-beckham-of-a-homer',
+            'md5': 'b190e70141fb9a1552a85426b4da1b5d',
+            'info_dict': {
+                'id': '75609783',
+                'ext': 'mp4',
+                'title': 'Must C: Pillar climbs for catch',
+                'description': '4/15/15: Blue Jays outfielder Kevin Pillar continues his defensive dominance by climbing the wall in left to rob Tim Beckham of a home run',
+                'timestamp': 1429124820,
+                'upload_date': '20150415',
+            }
+        },
        # Wistia embed
        {
            'url': 'http://education-portal.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
@@ -1289,6 +1302,10 @@ class GenericIE(InfoExtractor):
        mobj = re.search(
            r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
            webpage)
+        if not mobj:
+            mobj = re.search(
+                r'data-video-link=["\'](?P<url>http://m.mlb.com/video/[^"\']+)',
+                webpage)
        if mobj is not None:
            return self.url_result(mobj.group('url'), 'MLB')

--- a/youtube_dl/extractor/grooveshark.py
+++ b/youtube_dl/extractor/grooveshark.py
@@ -1,191 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import time
-import math
-import os.path
-import re
-
-
-from .common import InfoExtractor
-from ..compat import (
-    compat_html_parser,
-    compat_urllib_parse,
-    compat_urllib_request,
-    compat_urlparse,
-)
-from ..utils import ExtractorError
-
-
-class GroovesharkHtmlParser(compat_html_parser.HTMLParser):
-    def __init__(self):
-        self._current_object = None
-        self.objects = []
-        compat_html_parser.HTMLParser.__init__(self)
-
-    def handle_starttag(self, tag, attrs):
-        attrs = dict((k, v) for k, v in attrs)
-        if tag == 'object':
-            self._current_object = {'attrs': attrs, 'params': []}
-        elif tag == 'param':
-            self._current_object['params'].append(attrs)
-
-    def handle_endtag(self, tag):
-        if tag == 'object':
-            self.objects.append(self._current_object)
-            self._current_object = None
-
-    @classmethod
-    def extract_object_tags(cls, html):
-        p = cls()
-        p.feed(html)
-        p.close()
-        return p.objects
-
-
-class GroovesharkIE(InfoExtractor):
-    _VALID_URL = r'https?://(www\.)?grooveshark\.com/#!/s/([^/]+)/([^/]+)'
-    _TEST = {
-        'url': 'http://grooveshark.com/#!/s/Jolene+Tenth+Key+Remix+Ft+Will+Sessions/6SS1DW?src=5',
-        'md5': '7ecf8aefa59d6b2098517e1baa530023',
-        'info_dict': {
-            'id': '6SS1DW',
-            'title': 'Jolene (Tenth Key Remix ft. Will Sessions)',
-            'ext': 'mp3',
-            'duration': 227,
-        }
-    }
-
-    do_playerpage_request = True
-    do_bootstrap_request = True
-
-    def _parse_target(self, target):
-        uri = compat_urlparse.urlparse(target)
-        hash = uri.fragment[1:].split('?')[0]
-        token = os.path.basename(hash.rstrip('/'))
-        return (uri, hash, token)
-
-    def _build_bootstrap_url(self, target):
-        (uri, hash, token) = self._parse_target(target)
-        query = 'getCommunicationToken=1&hash=%s&%d' % (compat_urllib_parse.quote(hash, safe=''), self.ts)
-        return (compat_urlparse.urlunparse((uri.scheme, uri.netloc, '/preload.php', None, query, None)), token)
-
-    def _build_meta_url(self, target):
-        (uri, hash, token) = self._parse_target(target)
-        query = 'hash=%s&%d' % (compat_urllib_parse.quote(hash, safe=''), self.ts)
-        return (compat_urlparse.urlunparse((uri.scheme, uri.netloc, '/preload.php', None, query, None)), token)
-
-    def _build_stream_url(self, meta):
-        return compat_urlparse.urlunparse(('http', meta['streamKey']['ip'], '/stream.php', None, None, None))
-
-    def _build_swf_referer(self, target, obj):
-        (uri, _, _) = self._parse_target(target)
-        return compat_urlparse.urlunparse((uri.scheme, uri.netloc, obj['attrs']['data'], None, None, None))
-
-    def _transform_bootstrap(self, js):
-        return re.split('(?m)^\s*try\s*\{', js)[0] \
-                 .split(' = ', 1)[1].strip().rstrip(';')
-
-    def _transform_meta(self, js):
-        return js.split('\n')[0].split('=')[1].rstrip(';')
-
-    def _get_meta(self, target):
-        (meta_url, token) = self._build_meta_url(target)
-        self.to_screen('Metadata URL: %s' % meta_url)
-
-        headers = {'Referer': compat_urlparse.urldefrag(target)[0]}
-        req = compat_urllib_request.Request(meta_url, headers=headers)
-        res = self._download_json(req, token,
-                                  transform_source=self._transform_meta)
-
-        if 'getStreamKeyWithSong' not in res:
-            raise ExtractorError(
-                'Metadata not found. URL may be malformed, or Grooveshark API may have changed.')
-
-        if res['getStreamKeyWithSong'] is None:
-            raise ExtractorError(
-                'Metadata download failed, probably due to Grooveshark anti-abuse throttling. Wait at least an hour before retrying from this IP.',
-                expected=True)
-
-        return res['getStreamKeyWithSong']
-
-    def _get_bootstrap(self, target):
-        (bootstrap_url, token) = self._build_bootstrap_url(target)
-
-        headers = {'Referer': compat_urlparse.urldefrag(target)[0]}
-        req = compat_urllib_request.Request(bootstrap_url, headers=headers)
-        res = self._download_json(req, token, fatal=False,
-                                  note='Downloading player bootstrap data',
-                                  errnote='Unable to download player bootstrap data',
-                                  transform_source=self._transform_bootstrap)
-        return res
-
-    def _get_playerpage(self, target):
-        (_, _, token) = self._parse_target(target)
-
-        webpage = self._download_webpage(
-            target, token,
-            note='Downloading player page',
-            errnote='Unable to download player page',
-            fatal=False)
-
-        if webpage is not None:
-            # Search (for example German) error message
-            error_msg = self._html_search_regex(
-                r'<div id="content">\s*<h2>(.*?)</h2>', webpage,
-                'error message', default=None)
-            if error_msg is not None:
-                error_msg = error_msg.replace('\n', ' ')
-                raise ExtractorError('Grooveshark said: %s' % error_msg)
-
-        if webpage is not None:
-            o = GroovesharkHtmlParser.extract_object_tags(webpage)
-            return webpage, [x for x in o if x['attrs']['id'] == 'jsPlayerEmbed']
-
-        return webpage, None
-
-    def _real_initialize(self):
-        self.ts = int(time.time() * 1000)  # timestamp in millis
-
-    def _real_extract(self, url):
-        (target_uri, _, token) = self._parse_target(url)
-
-        # 1. Fill cookiejar by making a request to the player page
-        swf_referer = None
-        if self.do_playerpage_request:
-            (_, player_objs) = self._get_playerpage(url)
-            if player_objs:
-                swf_referer = self._build_swf_referer(url, player_objs[0])
-                self.to_screen('SWF Referer: %s' % swf_referer)
-
-        # 2. Ask preload.php for swf bootstrap data to better mimic webapp
-        if self.do_bootstrap_request:
-            bootstrap = self._get_bootstrap(url)
-            self.to_screen('CommunicationToken: %s' % bootstrap['getCommunicationToken'])
-
-        # 3. Ask preload.php for track metadata.
-        meta = self._get_meta(url)
-
-        # 4. Construct stream request for track.
-        stream_url = self._build_stream_url(meta)
-        duration = int(math.ceil(float(meta['streamKey']['uSecs']) / 1000000))
-        post_dict = {'streamKey': meta['streamKey']['streamKey']}
-        post_data = compat_urllib_parse.urlencode(post_dict).encode('utf-8')
-        headers = {
-            'Content-Length': len(post_data),
-            'Content-Type': 'application/x-www-form-urlencoded'
-        }
-        if swf_referer is not None:
-            headers['Referer'] = swf_referer
-
-        return {
-            'id': token,
-            'title': meta['song']['Name'],
-            'http_method': 'POST',
-            'url': stream_url,
-            'ext': 'mp3',
-            'format': 'mp3 audio',
-            'duration': duration,
-            'http_post_data': post_data,
-            'http_headers': headers,
-        }
--- a/youtube_dl/extractor/iconosquare.py
+++ b/youtube_dl/extractor/iconosquare.py
@@ -1,36 +1,75 @@
 from __future__ import unicode_literals

 from .common import InfoExtractor
+from ..utils import int_or_none


 class IconosquareIE(InfoExtractor):
-    _VALID_URL = r'https?://(www\.)?(?:iconosquare\.com|statigr\.am)/p/(?P<id>[^/]+)'
+    _VALID_URL = r'https?://(?:www\.)?(?:iconosquare\.com|statigr\.am)/p/(?P<id>[^/]+)'
    _TEST = {
        'url': 'http://statigr.am/p/522207370455279102_24101272',
        'md5': '6eb93b882a3ded7c378ee1d6884b1814',
        'info_dict': {
            'id': '522207370455279102_24101272',
            'ext': 'mp4',
-            'uploader_id': 'aguynamedpatrick',
-            'title': 'Instagram photo by @aguynamedpatrick (Patrick Janelle)',
+            'title': 'Instagram media by @aguynamedpatrick (Patrick Janelle)',
            'description': 'md5:644406a9ec27457ed7aa7a9ebcd4ce3d',
+            'timestamp': 1376471991,
+            'upload_date': '20130814',
+            'uploader': 'aguynamedpatrick',
+            'uploader_id': '24101272',
+            'comment_count': int,
+            'like_count': int,
        },
    }

    def _real_extract(self, url):
        video_id = self._match_id(url)
+
        webpage = self._download_webpage(url, video_id)
+
+        media = self._parse_json(
+            self._search_regex(
+                r'window\.media\s*=\s*({.+?});\n', webpage, 'media'),
+            video_id)
+
+        formats = [{
+            'url': f['url'],
+            'format_id': format_id,
+            'width': int_or_none(f.get('width')),
+            'height': int_or_none(f.get('height'))
+        } for format_id, f in media['videos'].items()]
+        self._sort_formats(formats)
+
        title = self._html_search_regex(
            r'<title>(.+?)(?: *\(Videos?\))? \| (?:Iconosquare|Statigram)</title>',
            webpage, 'title')
-        uploader_id = self._html_search_regex(
-            r'@([^ ]+)', title, 'uploader name', fatal=False)
+
+        timestamp = int_or_none(media.get('created_time') or media.get('caption', {}).get('created_time'))
+        description = media.get('caption', {}).get('text')
+
+        uploader = media.get('user', {}).get('username')
+        uploader_id = media.get('user', {}).get('id')
+
+        comment_count = int_or_none(media.get('comments', {}).get('count'))
+        like_count = int_or_none(media.get('likes', {}).get('count'))
+
+        thumbnails = [{
+            'url': t['url'],
+            'id': thumbnail_id,
+            'width': int_or_none(t.get('width')),
+            'height': int_or_none(t.get('height'))
+        } for thumbnail_id, t in media.get('images', {}).items()]

        return {
            'id': video_id,
-            'url': self._og_search_video_url(webpage),
            'title': title,
-            'description': self._og_search_description(webpage),
-            'thumbnail': self._og_search_thumbnail(webpage),
-            'uploader_id': uploader_id
+            'description': description,
+            'thumbnails': thumbnails,
+            'timestamp': timestamp,
+            'uploader': uploader,
+            'uploader_id': uploader_id,
+            'comment_count': comment_count,
+            'like_count': like_count,
+            'formats': formats,
        }
--- a/youtube_dl/extractor/lifenews.py
+++ b/youtube_dl/extractor/lifenews.py
@@ -16,7 +16,7 @@ class LifeNewsIE(InfoExtractor):
    IE_DESC = 'LIFE | NEWS'
    _VALID_URL = r'http://lifenews\.ru/(?:mobile/)?news/(?P<id>\d+)'

-    _TEST = {
+    _TESTS = [{
        'url': 'http://lifenews.ru/news/126342',
        'md5': 'e1b50a5c5fb98a6a544250f2e0db570a',
        'info_dict': {
@@ -27,7 +27,19 @@ class LifeNewsIE(InfoExtractor):
            'thumbnail': 're:http://.*\.jpg',
            'upload_date': '20140130',
        }
-    }
+    }, {
+        # video in <iframe>
+        'url': 'http://lifenews.ru/news/152125',
+        'md5': '77d19a6f0886cd76bdbf44b4d971a273',
+        'info_dict': {
+            'id': '152125',
+            'ext': 'mp4',
+            'title': 'В Сети появилось видео захвата «Правым сектором» колхозных полей ',
+            'description': 'Жители двух поселков Днепропетровской области не простили радикалам угрозу лишения плодородных земель и пошли в лобовую. ',
+            'upload_date': '20150402',
+            'uploader': 'embed.life.ru',
+        }
+    }]

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
@@ -36,7 +48,9 @@ class LifeNewsIE(InfoExtractor):
        webpage = self._download_webpage('http://lifenews.ru/news/%s' % video_id, video_id, 'Downloading page')

        videos = re.findall(r'<video.*?poster="(?P<poster>[^"]+)".*?src="(?P<video>[^"]+)".*?></video>', webpage)
-        if not videos:
+        iframe_link = self._html_search_regex(
+            '<iframe[^>]+src="([^"]+)', webpage, 'iframe link', default=None)
+        if not videos and not iframe_link:
            raise ExtractorError('No media links available for %s' % video_id)

        title = self._og_search_title(webpage)
@@ -47,26 +61,41 @@ class LifeNewsIE(InfoExtractor):
        description = self._og_search_description(webpage)

        view_count = self._html_search_regex(
-            r'<div class=\'views\'>(\d+)</div>', webpage, 'view count', fatal=False)
+            r'<div class=\'views\'>\s*(\d+)\s*</div>', webpage, 'view count', fatal=False)
        comment_count = self._html_search_regex(
-            r'<div class=\'comments\'>\s*<span class=\'counter\'>(\d+)</span>', webpage, 'comment count', fatal=False)
+            r'<div class=\'comments\'>\s*<span class=\'counter\'>\s*(\d+)\s*</span>', webpage, 'comment count', fatal=False)

        upload_date = self._html_search_regex(
            r'<time datetime=\'([^\']+)\'>', webpage, 'upload date', fatal=False)
        if upload_date is not None:
            upload_date = unified_strdate(upload_date)

+        common_info = {
+            'description': description,
+            'view_count': int_or_none(view_count),
+            'comment_count': int_or_none(comment_count),
+            'upload_date': upload_date,
+        }
+
        def make_entry(video_id, media, video_number=None):
-            return {
+            cur_info = dict(common_info)
+            cur_info.update({
                'id': video_id,
                'url': media[1],
                'thumbnail': media[0],
                'title': title if video_number is None else '%s-video%s' % (title, video_number),
-                'description': description,
-                'view_count': int_or_none(view_count),
-                'comment_count': int_or_none(comment_count),
-                'upload_date': upload_date,
-            }
+            })
+            return cur_info
+
+        if iframe_link:
+            cur_info = dict(common_info)
+            cur_info.update({
+                '_type': 'url_transparent',
+                'id': video_id,
+                'title': title,
+                'url': iframe_link,
+            })
+            return cur_info

        if len(videos) == 1:
            return make_entry(video_id, videos[0])
--- a/youtube_dl/extractor/mlb.py
+++ b/youtube_dl/extractor/mlb.py
@@ -10,7 +10,7 @@ from ..utils import (


 class MLBIE(InfoExtractor):
-    _VALID_URL = r'https?://m(?:lb)?\.(?:[\da-z_-]+\.)?mlb\.com/(?:(?:.*?/)?video/(?:topic/[\da-z_-]+/)?v|(?:shared/video/embed/embed\.html|[^/]+/video/play\.jsp)\?.*?\bcontent_id=)(?P<id>n?\d+)'
+    _VALID_URL = r'https?://m(?:lb)?\.(?:[\da-z_-]+\.)?mlb\.com/(?:(?:.*?/)?video/(?:topic/[\da-z_-]+/)?v|(?:shared/video/embed/(?:embed|m-internal-embed)\.html|[^/]+/video/play\.jsp)\?.*?\bcontent_id=)(?P<id>n?\d+)'
    _TESTS = [
        {
            'url': 'http://m.mlb.com/sea/video/topic/51231442/v34698933/nymsea-ackley-robs-a-home-run-with-an-amazing-catch/?c_id=sea',
@@ -83,6 +83,11 @@ class MLBIE(InfoExtractor):
        {
            'url': 'http://m.cardinals.mlb.com/stl/video/v51175783/atlstl-piscotty-makes-great-sliding-catch-on-line/?partnerId=as_mlb_20150321_42500876&adbid=579409712979910656&adbpl=tw&adbpr=52847728',
            'only_matching': True,
+        },
+        {
+            # From http://m.mlb.com/news/article/118550098/blue-jays-kevin-pillar-goes-spidey-up-the-wall-to-rob-tim-beckham-of-a-homer
+            'url': 'http://mlb.mlb.com/shared/video/embed/m-internal-embed.html?content_id=75609783&property=mlb&autoplay=true&hashmode=false&siteSection=mlb/multimedia/article_118550098/article_embed&club=mlb',
+            'only_matching': True,
        }
    ]

--- a/youtube_dl/extractor/nbc.py
+++ b/youtube_dl/extractor/nbc.py
@@ -37,13 +37,26 @@ class NBCIE(InfoExtractor):
            },
            'skip': 'Only works from US',
        },
+        {
+            'url': 'http://www.nbc.com/saturday-night-live/video/star-wars-teaser/2832821',
+            'info_dict': {
+                'id': '8iUuyzWDdYUZ',
+                'ext': 'flv',
+                'title': 'Star Wars Teaser',
+                'description': 'md5:0b40f9cbde5b671a7ff62fceccc4f442',
+            },
+            'skip': 'Only works from US',
+        }
    ]

    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
        theplatform_url = self._search_regex(
-            '(?:class="video-player video-player-full" data-mpx-url|class="player" src)="(.*?)"',
+            [
+                r'(?:class="video-player video-player-full" data-mpx-url|class="player" src)="(.*?)"',
+                r'"embedURL"\s*:\s*"([^"]+)"'
+            ],
            webpage, 'theplatform url').replace('_no_endcard', '')
        if theplatform_url.startswith('//'):
            theplatform_url = 'http:' + theplatform_url
--- a/youtube_dl/extractor/niconico.py
+++ b/youtube_dl/extractor/niconico.py
@@ -3,6 +3,7 @@ from __future__ import unicode_literals

 import re
 import json
+import datetime

 from .common import InfoExtractor
 from ..compat import (
@@ -14,7 +15,9 @@ from ..utils import (
    ExtractorError,
    int_or_none,
    parse_duration,
-    unified_strdate,
+    parse_iso8601,
+    xpath_text,
+    determine_ext,
 )


@@ -32,30 +35,50 @@ class NiconicoIE(InfoExtractor):
            'uploader': 'takuya0301',
            'uploader_id': '2698420',
            'upload_date': '20131123',
+            'timestamp': 1385182762,
            'description': '(c) copyright 2008, Blender Foundation / www.bigbuckbunny.org',
            'duration': 33,
        },
-        'params': {
-            'username': 'ydl.niconico@gmail.com',
-            'password': 'youtube-dl',
-        },
    }, {
+        # File downloaded with and without credentials are different, so omit
+        # the md5 field
        'url': 'http://www.nicovideo.jp/watch/nm14296458',
-        'md5': '8db08e0158457cf852a31519fceea5bc',
        'info_dict': {
            'id': 'nm14296458',
            'ext': 'swf',
            'title': '【鏡音リン】Dance on media【オリジナル】take2!',
-            'description': 'md5:',
+            'description': 'md5:689f066d74610b3b22e0f1739add0f58',
            'uploader': 'りょうた',
            'uploader_id': '18822557',
            'upload_date': '20110429',
+            'timestamp': 1304065916,
            'duration': 209,
        },
-        'params': {
-            'username': 'ydl.niconico@gmail.com',
-            'password': 'youtube-dl',
+    }, {
+        # 'video exists but is marked as "deleted"
+        # md5 is unstable
+        'url': 'http://www.nicovideo.jp/watch/sm10000',
+        'info_dict': {
+            'id': 'sm10000',
+            'ext': 'unknown_video',
+            'description': 'deleted',
+            'title': 'ドラえもんエターナル第3話「決戦第3新東京市」＜前編＞',
+            'upload_date': '20071224',
+            'timestamp': 1198527840,  # timestamp field has different value if logged in
+            'duration': 304,
        },
+    }, {
+        'url': 'http://www.nicovideo.jp/watch/so22543406',
+        'info_dict': {
+            'id': '1388129933',
+            'ext': 'mp4',
+            'title': '【第1回】RADIOアニメロミックス ラブライブ！～のぞえりRadio Garden～',
+            'description': 'md5:b27d224bb0ff53d3c8269e9f8b561cf1',
+            'timestamp': 1388851200,
+            'upload_date': '20140104',
+            'uploader': 'アニメロチャンネル',
+            'uploader_id': '312',
+        }
    }]

    _VALID_URL = r'https?://(?:www\.|secure\.)?nicovideo\.jp/watch/(?P<id>(?:[a-z]{2})?[0-9]+)'
@@ -95,9 +118,13 @@ class NiconicoIE(InfoExtractor):
    def _real_extract(self, url):
        video_id = self._match_id(url)

-        # Get video webpage. We are not actually interested in it, but need
-        # the cookies in order to be able to download the info webpage
-        self._download_webpage('http://www.nicovideo.jp/watch/' + video_id, video_id)
+        # Get video webpage. We are not actually interested in it for normal
+        # cases, but need the cookies in order to be able to download the
+        # info webpage
+        webpage, handle = self._download_webpage_handle(
+            'http://www.nicovideo.jp/watch/' + video_id, video_id)
+        if video_id.startswith('so'):
+            video_id = self._match_id(handle.geturl())

        video_info = self._download_xml(
            'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id, video_id,
@@ -127,22 +154,78 @@ class NiconicoIE(InfoExtractor):
                flv_info_request, video_id,
                note='Downloading flv info', errnote='Unable to download flv info')

-        if 'deleted=' in flv_info_webpage:
-            raise ExtractorError('The video has been deleted.',
-                                 expected=True)
-        video_real_url = compat_urlparse.parse_qs(flv_info_webpage)['url'][0]
+        flv_info = compat_urlparse.parse_qs(flv_info_webpage)
+        if 'url' not in flv_info:
+            if 'deleted' in flv_info:
+                raise ExtractorError('The video has been deleted.',
+                                     expected=True)
+            else:
+                raise ExtractorError('Unable to find video URL')
+
+        video_real_url = flv_info['url'][0]

        # Start extracting information
-        title = video_info.find('.//title').text
-        extension = video_info.find('.//movie_type').text
+        title = xpath_text(video_info, './/title')
+        if not title:
+            title = self._og_search_title(webpage, default=None)
+        if not title:
+            title = self._html_search_regex(
+                r'<span[^>]+class="videoHeaderTitle"[^>]*>([^<]+)</span>',
+                webpage, 'video title')
+
+        watch_api_data_string = self._html_search_regex(
+            r'<div[^>]+id="watchAPIDataContainer"[^>]+>([^<]+)</div>',
+            webpage, 'watch api data', default=None)
+        watch_api_data = self._parse_json(watch_api_data_string, video_id) if watch_api_data_string else {}
+        video_detail = watch_api_data.get('videoDetail', {})
+
+        extension = xpath_text(video_info, './/movie_type')
+        if not extension:
+            extension = determine_ext(video_real_url)
        video_format = extension.upper()
-        thumbnail = video_info.find('.//thumbnail_url').text
-        description = video_info.find('.//description').text
-        upload_date = unified_strdate(video_info.find('.//first_retrieve').text.split('+')[0])
-        view_count = int_or_none(video_info.find('.//view_counter').text)
-        comment_count = int_or_none(video_info.find('.//comment_num').text)
-        duration = parse_duration(video_info.find('.//length').text)
-        webpage_url = video_info.find('.//watch_url').text
+
+        thumbnail = (
+            xpath_text(video_info, './/thumbnail_url') or
+            self._html_search_meta('image', webpage, 'thumbnail', default=None) or
+            video_detail.get('thumbnail'))
+
+        description = xpath_text(video_info, './/description')
+
+        timestamp = parse_iso8601(xpath_text(video_info, './/first_retrieve'))
+        if not timestamp:
+            match = self._html_search_meta('datePublished', webpage, 'date published', default=None)
+            if match:
+                timestamp = parse_iso8601(match.replace('+', ':00+'))
+        if not timestamp and video_detail.get('postedAt'):
+            timestamp = parse_iso8601(
+                video_detail['postedAt'].replace('/', '-'),
+                delimiter=' ', timezone=datetime.timedelta(hours=9))
+
+        view_count = int_or_none(xpath_text(video_info, './/view_counter'))
+        if not view_count:
+            match = self._html_search_regex(
+                r'>Views: <strong[^>]*>([^<]+)</strong>',
+                webpage, 'view count', default=None)
+            if match:
+                view_count = int_or_none(match.replace(',', ''))
+        view_count = view_count or video_detail.get('viewCount')
+
+        comment_count = int_or_none(xpath_text(video_info, './/comment_num'))
+        if not comment_count:
+            match = self._html_search_regex(
+                r'>Comments: <strong[^>]*>([^<]+)</strong>',
+                webpage, 'comment count', default=None)
+            if match:
+                comment_count = int_or_none(match.replace(',', ''))
+        comment_count = comment_count or video_detail.get('commentCount')
+
+        duration = (parse_duration(
+            xpath_text(video_info, './/length') or
+            self._html_search_meta(
+                'video:duration', webpage, 'video duration', default=None)) or
+            video_detail.get('length'))
+
+        webpage_url = xpath_text(video_info, './/watch_url') or url

        if video_info.find('.//ch_id') is not None:
            uploader_id = video_info.find('.//ch_id').text
@@ -162,7 +245,7 @@ class NiconicoIE(InfoExtractor):
            'thumbnail': thumbnail,
            'description': description,
            'uploader': uploader,
-            'upload_date': upload_date,
+            'timestamp': timestamp,
            'uploader_id': uploader_id,
            'view_count': view_count,
            'comment_count': comment_count,
--- a/youtube_dl/extractor/pbs.py
+++ b/youtube_dl/extractor/pbs.py
@@ -5,6 +5,8 @@ import re
 from .common import InfoExtractor
 from ..utils import (
    ExtractorError,
+    determine_ext,
+    int_or_none,
    unified_strdate,
    US_RATINGS,
 )
@@ -149,21 +151,44 @@ class PBSIE(InfoExtractor):
                for vid_id in video_id]
            return self.playlist_result(entries, display_id)

-        info_url = 'http://video.pbs.org/videoInfo/%s?format=json' % video_id
-        info = self._download_json(info_url, display_id)
+        info = self._download_json(
+            'http://video.pbs.org/videoInfo/%s?format=json&type=partner' % video_id,
+            display_id)

-        redirect_url = info['alternate_encoding']['url']
-        redirect_info = self._download_json(
-            redirect_url + '?format=json', display_id,
-            'Downloading video url info')
-        if redirect_info['status'] == 'error':
-            if redirect_info['http_code'] == 403:
-                message = (
-                    'The video is not available in your region due to '
-                    'right restrictions')
+        formats = []
+        for encoding_name in ('recommended_encoding', 'alternate_encoding'):
+            redirect = info.get(encoding_name)
+            if not redirect:
+                continue
+            redirect_url = redirect.get('url')
+            if not redirect_url:
+                continue
+
+            redirect_info = self._download_json(
+                redirect_url + '?format=json', display_id,
+                'Downloading %s video url info' % encoding_name)
+
+            if redirect_info['status'] == 'error':
+                if redirect_info['http_code'] == 403:
+                    message = (
+                        'The video is not available in your region due to '
+                        'right restrictions')
+                else:
+                    message = redirect_info['message']
+                raise ExtractorError(message, expected=True)
+
+            format_url = redirect_info.get('url')
+            if not format_url:
+                continue
+
+            if determine_ext(format_url) == 'm3u8':
+                formats.extend(self._extract_m3u8_formats(
+                    format_url, display_id, 'mp4', preference=1, m3u8_id='hls'))
            else:
-                message = redirect_info['message']
-            raise ExtractorError(message, expected=True)
+                formats.append({
+                    'url': format_url,
+                })
+        self._sort_formats(formats)

        rating_str = info.get('rating')
        if rating_str is not None:
@@ -174,11 +199,10 @@ class PBSIE(InfoExtractor):
            'id': video_id,
            'display_id': display_id,
            'title': info['title'],
-            'url': redirect_info['url'],
-            'ext': 'mp4',
            'description': info['program'].get('description'),
            'thumbnail': info.get('image_url'),
-            'duration': info.get('duration'),
+            'duration': int_or_none(info.get('duration')),
            'age_limit': age_limit,
            'upload_date': upload_date,
+            'formats': formats,
        }
--- a/youtube_dl/extractor/rutv.py
+++ b/youtube_dl/extractor/rutv.py
@@ -84,11 +84,20 @@ class RUTVIE(InfoExtractor):
                'title': 'Сочи-2014. Биатлон. Индивидуальная гонка. Мужчины ',
                'description': 'md5:9e0ed5c9d2fa1efbfdfed90c9a6d179c',
            },
+            'skip': 'Translation has finished',
+        },
+        {
+            'url': 'http://live.russia.tv/index/index/channel_id/3',
+            'info_dict': {
+                'id': '21',
+                'ext': 'mp4',
+                'title': 're:^Россия 24. Прямой эфир [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+                'is_live': True,
+            },
            'params': {
-                # rtmp download
+                # m3u8 download
                'skip_download': True,
            },
-            'skip': 'Translation has finished',
        },
    ]

@@ -181,12 +190,15 @@ class RUTVIE(InfoExtractor):

        self._sort_formats(formats)

+        is_live = video_type == 'live'
+
        return {
            'id': video_id,
-            'title': title,
+            'title': self._live_title(title) if is_live else title,
            'description': description,
            'thumbnail': thumbnail,
            'view_count': view_count,
            'duration': duration,
            'formats': formats,
+            'is_live': is_live,
        }
--- a/youtube_dl/extractor/testtube.py
+++ b/youtube_dl/extractor/testtube.py
@@ -15,19 +15,37 @@ class TestTubeIE(InfoExtractor):
            'id': '60163',
            'display_id': '5-weird-ways-plants-can-eat-animals',
            'duration': 275,
-            'ext': 'mp4',
+            'ext': 'webm',
            'title': '5 Weird Ways Plants Can Eat Animals',
            'description': 'Why have some plants evolved to eat meat?',
            'thumbnail': 're:^https?://.*\.jpg$',
            'uploader': 'DNews',
            'uploader_id': 'dnews',
        },
+    }, {
+        'url': 'https://testtube.com/iflscience/insane-jet-ski-flipping',
+        'info_dict': {
+            'id': 'fAGfJ4YjVus',
+            'ext': 'mp4',
+            'title': 'Flipping Jet-Ski Skills | Outrageous Acts of Science',
+            'uploader': 'Science Channel',
+            'uploader_id': 'ScienceChannel',
+            'upload_date': '20150203',
+            'description': 'md5:e61374030015bae1d2e22f096d4769d6',
+        }
    }]

    def _real_extract(self, url):
        display_id = self._match_id(url)

        webpage = self._download_webpage(url, display_id)
+
+        youtube_url = self._html_search_regex(
+            r'<iframe[^>]+src="((?:https?:)?//www.youtube.com/embed/[^"]+)"',
+            webpage, 'youtube iframe', default=None)
+        if youtube_url:
+            return self.url_result(youtube_url, 'Youtube', video_id=display_id)
+
        video_id = self._search_regex(
            r"player\.loadRevision3Item\('video_id',\s*([0-9]+)\);",
            webpage, 'video ID')
--- a/youtube_dl/extractor/theplatform.py
+++ b/youtube_dl/extractor/theplatform.py
@@ -129,7 +129,9 @@ class ThePlatformIE(InfoExtractor):
        head = meta.find(_x('smil:head'))
        body = meta.find(_x('smil:body'))

-        f4m_node = body.find(_x('smil:seq//smil:video')) or body.find(_x('smil:seq/smil:video'))
+        f4m_node = body.find(_x('smil:seq//smil:video'))
+        if f4m_node is None:
+            f4m_node = body.find(_x('smil:seq/smil:video'))
        if f4m_node is not None and '.f4m' in f4m_node.attrib['src']:
            f4m_url = f4m_node.attrib['src']
            if 'manifest.f4m?' not in f4m_url:
@@ -142,7 +144,9 @@ class ThePlatformIE(InfoExtractor):
            formats = []
            switch = body.find(_x('smil:switch'))
            if switch is None:
-                switch = body.find(_x('smil:par//smil:switch')) or body.find(_x('smil:par/smil:switch'))
+                switch = body.find(_x('smil:par//smil:switch'))
+            if switch is None:
+                switch = body.find(_x('smil:par/smil:switch'))
            if switch is None:
                switch = body.find(_x('smil:par'))
            if switch is not None:
@@ -163,7 +167,9 @@ class ThePlatformIE(InfoExtractor):
                        'vbr': vbr,
                    })
            else:
-                switch = body.find(_x('smil:seq//smil:switch')) or body.find(_x('smil:seq/smil:switch'))
+                switch = body.find(_x('smil:seq//smil:switch'))
+                if switch is None:
+                    switch = body.find(_x('smil:seq/smil:switch'))
                for f in switch.findall(_x('smil:video')):
                    attr = f.attrib
                    vbr = int_or_none(attr.get('system-bitrate'), 1000)
--- a/youtube_dl/extractor/veehd.py
+++ b/youtube_dl/extractor/veehd.py
@@ -17,7 +17,9 @@ from ..utils import (
 class VeeHDIE(InfoExtractor):
    _VALID_URL = r'https?://veehd\.com/video/(?P<id>\d+)'

-    _TEST = {
+    # Seems VeeHD videos have multiple copies on several servers, all of
+    # whom have different MD5 checksums, so omit md5 field in all tests
+    _TESTS = [{
        'url': 'http://veehd.com/video/4639434_Solar-Sinter',
        'info_dict': {
            'id': '4639434',
@@ -26,7 +28,26 @@ class VeeHDIE(InfoExtractor):
            'uploader_id': 'VideoEyes',
            'description': 'md5:46a840e8692ddbaffb5f81d9885cb457',
        },
-    }
+        'skip': 'Video deleted',
+    }, {
+        'url': 'http://veehd.com/video/4905758_Elysian-Fields-Channeling',
+        'info_dict': {
+            'id': '4905758',
+            'ext': 'mp4',
+            'title': 'Elysian Fields - Channeling',
+            'description': 'md5:360e4e95fdab58aefbea0f2a19e5604b',
+            'uploader_id': 'spotted',
+        }
+    }, {
+        'url': 'http://veehd.com/video/2046729_2012-2009-DivX-Trailer',
+        'info_dict': {
+            'id': '2046729',
+            'ext': 'avi',
+            'title': '2012 (2009) DivX Trailer',
+            'description': 'md5:75435ee95255e6a9838ac6f6f3a2396b',
+            'uploader_id': 'Movie_Trailers',
+        }
+    }]

    def _real_extract(self, url):
        video_id = self._match_id(url)
@@ -48,13 +69,21 @@ class VeeHDIE(InfoExtractor):
        player_page = self._download_webpage(
            player_url, video_id, 'Downloading player page')

+        video_url = None
+
        config_json = self._search_regex(
            r'value=\'config=({.+?})\'', player_page, 'config json', default=None)

        if config_json:
            config = json.loads(config_json)
            video_url = compat_urlparse.unquote(config['clip']['url'])
-        else:
+
+        if not video_url:
+            video_url = self._html_search_regex(
+                r'<embed[^>]+type="video/divx"[^>]+src="([^"]+)"',
+                player_page, 'video url', default=None)
+
+        if not video_url:
            iframe_src = self._search_regex(
                r'<iframe[^>]+src="/?([^"]+)"', player_page, 'iframe url')
            iframe_url = 'http://veehd.com/%s' % iframe_src
@@ -82,7 +111,6 @@ class VeeHDIE(InfoExtractor):
            'id': video_id,
            'title': title,
            'url': video_url,
-            'ext': 'mp4',
            'uploader_id': uploader_id,
            'thumbnail': thumbnail,
            'description': description,
--- a/youtube_dl/extractor/vgtv.py
+++ b/youtube_dl/extractor/vgtv.py
@@ -8,7 +8,7 @@ from ..utils import float_or_none


 class VGTVIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?vgtv\.no/#!/(?:.*)/(?P<id>[0-9]+)'
+    _VALID_URL = r'http://(?:www\.)?vgtv\.no/#!/[^/]+/(?P<id>[0-9]+)'
    _TESTS = [
        {
            # streamType: vod
--- a/youtube_dl/extractor/viki.py
+++ b/youtube_dl/extractor/viki.py
@@ -2,12 +2,17 @@ from __future__ import unicode_literals

 import re

-from ..compat import compat_urlparse
+from ..compat import (
+    compat_urlparse,
+    compat_urllib_request,
+)
 from ..utils import (
    ExtractorError,
    unescapeHTML,
    unified_strdate,
    US_RATINGS,
+    determine_ext,
+    mimetype2ext,
 )
 from .common import InfoExtractor

@@ -15,8 +20,11 @@ from .common import InfoExtractor
 class VikiIE(InfoExtractor):
    IE_NAME = 'viki'

+    # iPad2
+    _USER_AGENT = 'Mozilla/5.0(iPad; U; CPU OS 4_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8F191 Safari/6533.18.5'
+
    _VALID_URL = r'^https?://(?:www\.)?viki\.com/videos/(?P<id>[0-9]+v)'
-    _TEST = {
+    _TESTS = [{
        'url': 'http://www.viki.com/videos/1023585v-heirs-episode-14',
        'info_dict': {
            'id': '1023585v',
@@ -28,7 +36,30 @@ class VikiIE(InfoExtractor):
            'age_limit': 13,
        },
        'skip': 'Blocked in the US',
-    }
+    }, {
+        'url': 'http://www.viki.com/videos/1067139v-the-avengers-age-of-ultron-press-conference',
+        'md5': 'ca6493e6f0a6ec07da9aa8d6304b4b2c',
+        'info_dict': {
+            'id': '1067139v',
+            'ext': 'mp4',
+            'description': 'md5:d70b2f9428f5488321bfe1db10d612ea',
+            'upload_date': '20150430',
+            'title': '\'The Avengers: Age of Ultron\' Press Conference',
+        }
+    }, {
+        'url': 'http://www.viki.com/videos/1048879v-ankhon-dekhi',
+        'info_dict': {
+            'id': '1048879v',
+            'ext': 'mp4',
+            'upload_date': '20140820',
+            'description': 'md5:54ff56d51bdfc7a30441ec967394e91c',
+            'title': 'Ankhon Dekhi',
+        },
+        'params': {
+            # requires ffmpeg
+            'skip_download': True,
+        }
+    }]

    def _real_extract(self, url):
        video_id = self._match_id(url)
@@ -50,15 +81,34 @@ class VikiIE(InfoExtractor):
            'rating information', default='').strip()
        age_limit = US_RATINGS.get(rating_str)

-        info_url = 'http://www.viki.com/player5_fragment/%s?action=show&controller=videos' % video_id
+        req = compat_urllib_request.Request(
+            'http://www.viki.com/player5_fragment/%s?action=show&controller=videos' % video_id)
+        req.add_header('User-Agent', self._USER_AGENT)
        info_webpage = self._download_webpage(
-            info_url, video_id, note='Downloading info page')
-        if re.match(r'\s*<div\s+class="video-error', info_webpage):
-            raise ExtractorError(
-                'Video %s is blocked from your location.' % video_id,
-                expected=True)
-        video_url = self._html_search_regex(
-            r'<source[^>]+src="([^"]+)"', info_webpage, 'video URL')
+            req, video_id, note='Downloading info page')
+        err_msg = self._html_search_regex(r'<div[^>]+class="video-error[^>]+>(.+)</div>', info_webpage, 'error message', default=None)
+        if err_msg:
+            if 'not available in your region' in err_msg:
+                raise ExtractorError(
+                    'Video %s is blocked from your location.' % video_id,
+                    expected=True)
+            else:
+                raise ExtractorError('Viki said: ' + err_msg)
+        mobj = re.search(
+            r'<source[^>]+type="(?P<mime_type>[^"]+)"[^>]+src="(?P<url>[^"]+)"', info_webpage)
+        if not mobj:
+            raise ExtractorError('Unable to find video URL')
+        video_url = unescapeHTML(mobj.group('url'))
+        video_ext = mimetype2ext(mobj.group('mime_type'))
+
+        if determine_ext(video_url) == 'm3u8':
+            formats = self._extract_m3u8_formats(
+                video_url, video_id, ext=video_ext)
+        else:
+            formats = [{
+                'url': video_url,
+                'ext': video_ext,
+            }]

        upload_date_str = self._html_search_regex(
            r'"created_at":"([^"]+)"', info_webpage, 'upload date')
@@ -74,7 +124,7 @@ class VikiIE(InfoExtractor):
        return {
            'id': video_id,
            'title': title,
-            'url': video_url,
+            'formats': formats,
            'description': description,
            'thumbnail': thumbnail,
            'age_limit': age_limit,
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@@ -177,7 +177,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
        password = self._downloader.params.get('videopassword', None)
        if password is None:
            raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True)
-        token = self._search_regex(r'xsrft":"(.*?)"', webpage, 'login token')
+        token = self._search_regex(r'xsrft[\s=:"\']+([^"\']+)', webpage, 'login token')
        data = urlencode_postdata({
            'password': password,
            'token': token,
@@ -223,6 +223,8 @@ class VimeoIE(VimeoBaseInfoExtractor):
        orig_url = url
        if mobj.group('pro') or mobj.group('player'):
            url = 'https://player.vimeo.com/video/' + video_id
+        else:
+            url = 'https://vimeo.com/' + video_id

        # Retrieve video webpage to extract further information
        request = compat_urllib_request.Request(url, None, headers)
@@ -439,7 +441,7 @@ class VimeoChannelIE(InfoExtractor):
            name="([^"]+)"\s+
            value="([^"]*)"
            ''', login_form))
-        token = self._search_regex(r'xsrft":"(.*?)"', webpage, 'login token')
+        token = self._search_regex(r'xsrft[\s=:"\']+([^"\']+)', webpage, 'login token')
        fields['token'] = token
        fields['password'] = password
        post = urlencode_postdata(fields)
--- a/youtube_dl/extractor/vporn.py
+++ b/youtube_dl/extractor/vporn.py
@@ -27,9 +27,6 @@ class VpornIE(InfoExtractor):
                'duration': 393,
                'age_limit': 18,
                'view_count': int,
-                'like_count': int,
-                'dislike_count': int,
-                'comment_count': int,
            }
        },
        {
@@ -47,9 +44,6 @@ class VpornIE(InfoExtractor):
                'duration': 588,
                'age_limit': 18,
                'view_count': int,
-                'like_count': int,
-                'dislike_count': int,
-                'comment_count': int,
            }
        },
    ]
@@ -64,29 +58,29 @@ class VpornIE(InfoExtractor):
        title = self._html_search_regex(
            r'videoname\s*=\s*\'([^\']+)\'', webpage, 'title').strip()
        description = self._html_search_regex(
-            r'<div class="description_txt">(.*?)</div>', webpage, 'description', fatal=False)
+            r'class="(?:descr|description_txt)">(.*?)</div>',
+            webpage, 'description', fatal=False)
        thumbnail = self._html_search_regex(
            r'flashvars\.imageUrl\s*=\s*"([^"]+)"', webpage, 'description', fatal=False, default=None)
        if thumbnail:
            thumbnail = 'http://www.vporn.com' + thumbnail

        uploader = self._html_search_regex(
-            r'(?s)UPLOADED BY.*?<a href="/user/[^"]+">([^<]+)</a>',
+            r'(?s)Uploaded by:.*?<a href="/user/[^"]+"[^>]*>(.+?)</a>',
            webpage, 'uploader', fatal=False)

-        categories = re.findall(r'<a href="/cat/[^"]+">([^<]+)</a>', webpage)
+        categories = re.findall(r'<a href="/cat/[^"]+"[^>]*>([^<]+)</a>', webpage)

        duration = parse_duration(self._search_regex(
-            r'duration (\d+ min \d+ sec)', webpage, 'duration', fatal=False))
+            r'Runtime:\s*</span>\s*(\d+ min \d+ sec)',
+            webpage, 'duration', fatal=False))

-        view_count = str_to_int(self._html_search_regex(
-            r'<span>([\d,\.]+) VIEWS</span>', webpage, 'view count', fatal=False))
-        like_count = str_to_int(self._html_search_regex(
-            r'<span id="like" class="n">([\d,\.]+)</span>', webpage, 'like count', fatal=False))
-        dislike_count = str_to_int(self._html_search_regex(
-            r'<span id="dislike" class="n">([\d,\.]+)</span>', webpage, 'dislike count', fatal=False))
+        view_count = str_to_int(self._search_regex(
+            r'class="views">([\d,\.]+) [Vv]iews<',
+            webpage, 'view count', fatal=False))
        comment_count = str_to_int(self._html_search_regex(
-            r'<h4>Comments \(<b>([\d,\.]+)</b>\)</h4>', webpage, 'comment count', fatal=False))
+            r"'Comments \(([\d,\.]+)\)'",
+            webpage, 'comment count', default=None))

        formats = []

@@ -117,8 +111,6 @@ class VpornIE(InfoExtractor):
            'categories': categories,
            'duration': duration,
            'view_count': view_count,
-            'like_count': like_count,
-            'dislike_count': dislike_count,
            'comment_count': comment_count,
            'age_limit': 18,
            'formats': formats,
--- a/youtube_dl/extractor/yahoo.py
+++ b/youtube_dl/extractor/yahoo.py
@@ -22,7 +22,7 @@ from .nbc import NBCSportsVPlayerIE

 class YahooIE(InfoExtractor):
    IE_DESC = 'Yahoo screen and movies'
-    _VALID_URL = r'(?P<url>(?P<host>https?://(?:[a-zA-Z]{2}\.)?[\da-zA-Z_-]+\.yahoo\.com)/(?:[^/]+/)*(?P<display_id>.+?)-(?P<id>[0-9]+)(?:-[a-z]+)?\.html)'
+    _VALID_URL = r'(?P<url>(?P<host>https?://(?:[a-zA-Z]{2}\.)?[\da-zA-Z_-]+\.yahoo\.com)/(?:[^/]+/)*(?P<display_id>.+)?-(?P<id>[0-9]+)(?:-[a-z]+)?\.html)'
    _TESTS = [
        {
            'url': 'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html',
@@ -140,12 +140,15 @@ class YahooIE(InfoExtractor):
                'description': 'md5:df390f70a9ba7c95ff1daace988f0d8d',
                'title': 'Tyler Kalinoski hits buzzer-beater to lift Davidson',
            }
+        }, {
+            'url': 'https://tw.news.yahoo.com/-100120367.html',
+            'only_matching': True,
        }
    ]

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
-        display_id = mobj.group('display_id')
+        display_id = mobj.group('display_id') or self._match_id(url)
        page_id = mobj.group('id')
        url = mobj.group('url')
        host = mobj.group('host')
--- a/youtube_dl/extractor/youporn.py
+++ b/youtube_dl/extractor/youporn.py
@@ -47,7 +47,7 @@ class YouPornIE(InfoExtractor):

        # Get JSON parameters
        json_params = self._search_regex(
-            [r'var\s+videoJa?son\s*=\s*({.+?});',
+            [r'videoJa?son\s*=\s*({.+})',
             r'var\s+currentVideo\s*=\s*new\s+Video\((.+?)\)[,;]'],
            webpage, 'JSON parameters')
        try:
--- a/youtube_dl/extractor/yourupload.py
+++ b/youtube_dl/extractor/yourupload.py
@@ -1,8 +1,6 @@
 # coding: utf-8
 from __future__ import unicode_literals

-import re
-
 from .common import InfoExtractor


@@ -16,7 +14,7 @@ class YourUploadIE(InfoExtractor):
    _TESTS = [
        {
            'url': 'http://yourupload.com/watch/14i14h',
-            'md5': 'bf5c2f95c4c917536e80936af7bc51e1',
+            'md5': '5e2c63385454c557f97c4c4131a393cd',
            'info_dict': {
                'id': '14i14h',
                'ext': 'mp4',
@@ -35,24 +33,21 @@ class YourUploadIE(InfoExtractor):
    ]

    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)

-        url = 'http://embed.yucache.net/{0:}'.format(video_id)
-        webpage = self._download_webpage(url, video_id)
+        embed_url = 'http://embed.yucache.net/{0:}'.format(video_id)
+        webpage = self._download_webpage(embed_url, video_id)

        title = self._og_search_title(webpage)
-        thumbnail = self._og_search_thumbnail(webpage)
-        url = self._og_search_video_url(webpage)
-
-        formats = [{
-            'format_id': 'sd',
-            'url': url,
-        }]
+        video_url = self._og_search_video_url(webpage)
+        thumbnail = self._og_search_thumbnail(webpage, default=None)

        return {
            'id': video_id,
            'title': title,
-            'formats': formats,
+            'url': video_url,
            'thumbnail': thumbnail,
+            'http_headers': {
+                'Referer': embed_url,
+            },
        }
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1291,12 +1291,22 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
        page = self._download_webpage(url, playlist_id)
        more_widget_html = content_html = page

-        # Check if the playlist exists or is private
-        if re.search(r'<div class="yt-alert-message">[^<]*?(The|This) playlist (does not exist|is private)[^<]*?</div>', page) is not None:
-            raise ExtractorError(
-                'The playlist doesn\'t exist or is private, use --username or '
-                '--netrc to access it.',
-                expected=True)
+        for match in re.findall(r'<div class="yt-alert-message">([^<]+)</div>', page):
+            match = match.strip()
+            # Check if the playlist exists or is private
+            if re.match(r'[^<]*(The|This) playlist (does not exist|is private)[^<]*', match):
+                raise ExtractorError(
+                    'The playlist doesn\'t exist or is private, use --username or '
+                    '--netrc to access it.',
+                    expected=True)
+            elif re.match(r'[^<]*Invalid parameters[^<]*', match):
+                raise ExtractorError(
+                    'Invalid parameters. Maybe URL is incorrect.',
+                    expected=True)
+            elif re.match(r'[^<]*Choose your language[^<]*', match):
+                continue
+            else:
+                self.report_warning('Youtube gives an alert message: ' + match)

        # Extract the video ids from the playlist pages
        ids = []
--- a/youtube_dl/options.py
+++ b/youtube_dl/options.py
@@ -637,7 +637,7 @@ def parseOpts(overrideArguments=None):
    filesystem.add_option(
        '--write-annotations',
        action='store_true', dest='writeannotations', default=False,
-        help='Write video annotations to a .annotation file')
+        help='Write video annotations to a .annotations.xml file')
    filesystem.add_option(
        '--load-info',
        dest='load_info_filename', metavar='FILE',
@@ -698,7 +698,7 @@ def parseOpts(overrideArguments=None):
    postproc.add_option(
        '--embed-subs',
        action='store_true', dest='embedsubtitles', default=False,
-        help='Embed subtitles in the video (only for mp4 videos)')
+        help='Embed subtitles in the video (only for mkv and mp4 videos)')
    postproc.add_option(
        '--embed-thumbnail',
        action='store_true', dest='embedthumbnail', default=False,
--- a/youtube_dl/postprocessor/ffmpeg.py
+++ b/youtube_dl/postprocessor/ffmpeg.py
@@ -501,8 +501,8 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
        return cls._lang_map.get(code[:2])

    def run(self, information):
-        if information['ext'] != 'mp4':
-            self._downloader.to_screen('[ffmpeg] Subtitles can only be embedded in mp4 files')
+        if information['ext'] not in ['mp4', 'mkv']:
+            self._downloader.to_screen('[ffmpeg] Subtitles can only be embedded in mp4 or mkv files')
            return [], information
        subtitles = information.get('requested_subtitles')
        if not subtitles:
@@ -520,8 +520,9 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
            # Don't copy the existing subtitles, we may be running the
            # postprocessor a second time
            '-map', '-0:s',
-            '-c:s', 'mov_text',
        ]
+        if information['ext'] == 'mp4':
+            opts += ['-c:s', 'mov_text']
        for (i, lang) in enumerate(sub_langs):
            opts.extend(['-map', '%d:0' % (i + 1)])
            lang_code = self._conver_lang_code(lang)
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -1349,9 +1349,19 @@ def parse_duration(s):
    return res


-def prepend_extension(filename, ext):
+def prepend_extension(filename, ext, expected_real_ext=None):
    name, real_ext = os.path.splitext(filename)
-    return '{0}.{1}{2}'.format(name, ext, real_ext)
+    return (
+        '{0}.{1}{2}'.format(name, ext, real_ext)
+        if not expected_real_ext or real_ext[1:] == expected_real_ext
+        else '{0}.{1}'.format(filename, ext))
+
+
+def replace_extension(filename, ext, expected_real_ext=None):
+    name, real_ext = os.path.splitext(filename)
+    return '{0}.{1}'.format(
+        name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
+        ext)


 def check_executable(exe, args=[]):
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,3 +1,3 @@
 from __future__ import unicode_literals

-__version__ = '2015.04.28'
+__version__ = '2015.05.04'
Author	SHA1	Message	Date
Philipp Hagemeister	dc1eed93be	release 2015.05.04	2015-05-04 15:12:48 +02:00
Sergey M․	b2f82360d7	[escapist] Add uploader to tests	2015-05-04 19:06:07 +06:00
Sergey M․	782e0568ef	[escapist] Modernize	2015-05-04 19:04:49 +06:00
Sergey M․	90b4b0eabe	[escapist] Improve _VALID_URL	2015-05-04 19:01:08 +06:00
Sergey M․	cec04ef3a6	[escapist] Update tests' checksums	2015-05-04 19:00:34 +06:00
Sergey M․	71fa56b887	[escapist] Fix formats extraction	2015-05-04 18:59:22 +06:00
Yen Chi Hsuan	b9b3ab45ea	[NBC] Enhance extraction of ThePlatform URL (fixes #5470 )	2015-05-04 19:09:18 +08:00
Philipp Hagemeister	957b794c26	release 2015.05.03	2015-05-03 22:31:39 +02:00
Yen Chi Hsuan	8001607e90	[generic] Detect more MLB videos (fixes #5443 )	2015-05-04 02:20:07 +08:00
Yen Chi Hsuan	3e7202c1bc	[MLB] Extend _VALID_URL (#5443 )	2015-05-04 01:59:26 +08:00
Yen Chi Hsuan	848edeab89	[lifenews] Detect <iframe> (fixes #5346 )	2015-05-04 01:24:19 +08:00
Yen Chi Hsuan	1748d67aea	[lifenews] Fix view count and comment count	2015-05-04 01:11:23 +08:00
Jaime Marquínez Ferrándiz	5477ca8239	[dailymotion] Use https urls The video url still redirects to an http url, but it doesn't explicitly contain the video id.	2015-05-03 16:59:14 +02:00
Sergey M․	d0fd305023	[rutv] Add test for #5584	2015-05-03 10:00:34 +06:00
Sergey M․	8dab1e9072	[rutv] Recognize live streams (#5584 )	2015-05-03 09:56:03 +06:00
Sergey M․	963aea5279	[baiduvideo] Improve _VALID_URL	2015-05-03 07:45:15 +06:00
Sergey M․	0a64aa7355	[vgtv] Fix _VALID_URL (Closes #5578 )	2015-05-03 00:58:42 +06:00
Sergey M․	0669c89c55	[options] Clarify `--write-annotations` help	2015-05-02 23:38:30 +06:00
Sergey M․	2699da8041	[YoutubeDL] Improve description file naming	2015-05-02 23:36:55 +06:00
Sergey M․	98727e123f	[YoutubeDL] Improve annotations file naming	2015-05-02 23:35:18 +06:00
Sergey M․	b29e0000e6	[YoutubeDL] Improve JSON info file naming	2015-05-02 23:23:44 +06:00
Sergey M․	b3ed15b760	[utils] Add replace_extension	2015-05-02 23:23:06 +06:00
Sergey M․	666a9a2b95	[YoutubeDL] Improve audio/video-only file naming	2015-05-02 23:11:34 +06:00
Sergey M․	a4bcaad773	[test_utils] Add tests for prepend_extension	2015-05-02 23:10:48 +06:00
Sergey M․	e65e4c8874	[utils] Improve prepend_extension Now `ext` is appended to filename if real extension != expected extension.	2015-05-02 23:06:01 +06:00
Yen Chi Hsuan	21f6330274	[baiduvideo] Add new extractor (closes #4563 )	2015-05-03 00:53:24 +08:00
Sergey M․	38c6902b90	[YoutubeDL] Ensure correct extension is always present for a merged file (Closes #5535 )	2015-05-02 22:52:21 +06:00
Jaime Marquínez Ferrándiz	2ddcd88129	Remove code that was only used by the Grooveshark extractor	2015-05-02 17:29:56 +02:00
Yen Chi Hsuan	dd8920653c	[Grooveshark] Remove the extractor grooveshark.com was shut down on 2015/04/30	2015-05-02 21:46:33 +08:00
Sergey M․	c938c35f95	[iconosquare] Fix extraction	2015-05-02 07:18:22 +06:00
Yen Chi Hsuan	2eb0192155	[viki] Remove clean_html call	2015-05-02 01:35:46 +08:00
Yen Chi Hsuan	d948e09b61	[viki] Extract m3u8 videos (#4855 )	2015-05-02 01:20:16 +08:00
Yen Chi Hsuan	89966a5aea	[viki] Enhance error message handling (#3774 )	2015-05-02 01:20:15 +08:00
Yen Chi Hsuan	8e3df9dfee	[viki] Fix extractor and add a global availble test case	2015-05-02 01:20:15 +08:00
Sergey M․	5890eef6b0	[pbs] Add support for HD (Closes #3564 , closes #5390 )	2015-05-01 17:43:06 +06:00
Nikoli	083c1bb960	Add ability to embed subtitles in mkv files (closes #5434 )	2015-05-01 11:54:40 +02:00
Yen Chi Hsuan	861e65eb05	[yahoo] Extend _VALID_URL	2015-05-01 12:32:24 +08:00
Sergey M․	650cfd0cb0	[bbccouk] Mute thumbnail	2015-05-01 04:07:30 +06:00
Sergey M․	e68ae99a41	[bbccouk] Add test for #5530	2015-05-01 04:02:56 +06:00
Sergey M․	8683b4d8d9	[bbccouk] Improve extraction (Closes #5530 )	2015-05-01 03:59:13 +06:00
Sergey M․	1dbd717eb4	[theplaform] Fix FutureWarning	2015-05-01 02:51:55 +06:00
Sergey M․	6a8422b942	[foxsports] Add extractor (Closes #5517 )	2015-05-01 02:49:06 +06:00
Sergey M․	cb202fd286	[YoutubeDL] Filter requested info fields on `--load-info` as well In order to properly handle JSON info files generated by youtube-dl versions prior to `4070b458ec`	2015-05-01 00:44:34 +06:00
Naglis Jonaitis	67fc8ecd53	[dreisat] Extend _VALID_URL (Closes #5548 )	2015-04-30 21:28:08 +03:00
Jaime Marquínez Ferrándiz	df8301fef5	[YoutubeDL] pep8: use 'k not in' instead of 'not k in'	2015-04-30 20:18:42 +02:00
Sergey M․	4070b458ec	[YoutubeDL] Do not write requested info in info JSON file (Closes #5562 , closes #5564 )	2015-04-30 23:55:05 +06:00
Yen Chi Hsuan	ffbc3901d2	Merge remote-tracking branch 'upstream/master'	2015-04-30 23:33:49 +08:00
Sergey M․	7a03280df4	[vporn] More metadata extraction fixes and tests update (#5560 )	2015-04-30 21:31:38 +06:00
Yen Chi Hsuan	482a1258de	[VeeHD] Replace the third test case due to copyright issues	2015-04-30 23:27:07 +08:00
Sergey M․	cd298882cd	[vporn] Fix metadata extraction (#5560 )	2015-04-30 21:25:17 +06:00
Sergey M․	e01c56f9e1	[YoutubeDL] Generalize best/worst format match behavior	2015-04-30 21:06:51 +06:00
Sergey M.	4d72df4031	Merge pull request #5556 from jaimeMF/best-format-nodash Make 'best' format only match non-DASH formats (closes #5554)	2015-04-30 19:57:02 +05:00
Yen Chi Hsuan	f7f1df1d82	[VeeHD] Enhance extraction and fix tests (fixes #4965 )	2015-04-30 22:37:41 +08:00
Yen Chi Hsuan	c4a21bc9db	[bilibili] Extract multipart videos (closes #3250 )	2015-04-30 18:26:08 +08:00
Yen Chi Hsuan	621ffe7bf4	[niconico] Fix so* video extraction (fixes #4874 ) (#2087 )	2015-04-30 17:05:02 +08:00
Jaime Marquínez Ferrándiz	8dd5418803	Make 'best' format only match non-DASH formats (closes #5554 ) Otherwise it's impossible to only download non-DASH formats, for example `best[height=?480]/best` would download a DASH video if it's the only one with height=480, instead for falling back to the second format specifier. For audio only urls (soundcloud, bandcamp ...), the best audio will be downloaded as before.	2015-04-29 22:53:18 +02:00
Jaime Marquínez Ferrándiz	965cb8d530	[escapist] pep8 fixes	2015-04-29 22:46:19 +02:00
Yen Chi Hsuan	b2e8e7dab5	[niconico] Try to extract all optional fields from various sources	2015-04-30 02:24:05 +08:00
Yen Chi Hsuan	59d814f793	[niconico] Remove credentials from tests and enhance title extraction All test videos can be downloaded without username and password now.	2015-04-30 00:50:48 +08:00
Yen Chi Hsuan	bb865f3a5e	[niconico] Fix extraction and update tests (closes #5511 )	2015-04-30 00:50:48 +08:00
Yen Chi Hsuan	9ee53a49f0	[YouPorn] Fix extractor	2015-04-30 00:50:48 +08:00
Sergey M.	79adb09baa	Merge pull request #5553 from zouhair/master Typo: twice "the the" to "the"	2015-04-29 20:05:48 +05:00
zouhair	cf0649f8b7	Typo: twice "the the" to "the"	2015-04-29 11:03:10 -04:00
Sergey M.	f8690631e2	Merge pull request #5552 from zouhair/master Typo "incompatible" instead of "uncompatible"	2015-04-29 19:09:47 +05:00
zouhair	5456d78f0c	Typo "incompatible" instead of "uncompatible"	2015-04-29 10:07:49 -04:00
Yen Chi Hsuan	cbbece96a2	[yourupload] Simplify	2015-04-29 04:05:14 +08:00
Yen Chi Hsuan	9d8ba307ef	[yourupload] Fix extraction	2015-04-29 04:03:07 +08:00
Yen Chi Hsuan	ec7c1e85e0	[testtube] Fix test case 1 Seems the site now provides webm with higher bitrates	2015-04-29 00:24:58 +08:00
Yen Chi Hsuan	e70c7568c0	[testtube] Detect Youtube iframes (fixes #4867 )	2015-04-29 00:22:17 +08:00
Yen Chi Hsuan	39b62db116	[youtube] Catch more alert messages (closes #5074 )	2015-04-28 23:07:56 +08:00
Jaime Marquínez Ferrándiz	2edce52584	[vimeo] Fix password protected videos again (#5082 ) Since they have changed again to the previous format, I've modified the regex to match both formats.	2015-04-28 15:06:08 +02:00
pulpe	10831b5ec9	[vimeo] Fix redirection	2015-04-28 14:56:48 +02:00