Compare commits

..

38 Commits

Author SHA1 Message Date
Philipp Hagemeister
b2e6a1c14c release 2014.10.27 2014-10-27 02:44:07 +01:00
Philipp Hagemeister
8cc3eba79a [phoenix] Add new extractor (Fixes #4036) 2014-10-27 02:43:59 +01:00
Philipp Hagemeister
b0fb6d4db1 [ku6] Modernize 2014-10-27 02:32:44 +01:00
Philipp Hagemeister
81515ad9f6 [extractor/common] Improve m3u8 output 2014-10-27 02:28:37 +01:00
Philipp Hagemeister
8112d4b284 [lrt] Modernize 2014-10-27 02:27:49 +01:00
Philipp Hagemeister
bf7aa6301b [fktv] Modernize 2014-10-27 02:26:05 +01:00
Philipp Hagemeister
aea856621f [zdf] Simplify 2014-10-27 02:14:07 +01:00
Philipp Hagemeister
f24a5a2faa Merge remote-tracking branch 'olebowle/ard' 2014-10-27 01:36:50 +01:00
Philipp Hagemeister
ecfe623422 [heise] Fix extraction
Now they use an XML format instead of JSON.
2014-10-27 01:33:51 +01:00
Philipp Hagemeister
4a6c94288a [kickstarter] Simplify and fix test case 2014-10-27 01:16:18 +01:00
Philipp Hagemeister
10e3d73472 [nbc] Fix ThePlatform embedded videos 2014-10-27 01:14:17 +01:00
Philipp Hagemeister
15956b5aa1 [promptfile] Fix check for deleted videos 2014-10-27 00:50:22 +01:00
Philipp Hagemeister
586f7082ef [francetv] Remove changing md5sum 2014-10-27 00:46:34 +01:00
Philipp Hagemeister
d6d9186f0d [generic] Fix test title 2014-10-27 00:45:15 +01:00
Philipp Hagemeister
2e9ff8f362 [gorillavid] Fix test title 2014-10-27 00:44:27 +01:00
Philipp Hagemeister
6407432333 [Makefile] remove temporary files in clean target 2014-10-27 00:40:07 +01:00
Philipp Hagemeister
f744c0f398 [test_download] Improve error message 2014-10-27 00:39:39 +01:00
Philipp Hagemeister
249efaf44b [pornhub] Modernize and fix test definition 2014-10-27 00:33:35 +01:00
Philipp Hagemeister
8d32abff9e [ruhd] Simplify 2014-10-27 00:20:54 +01:00
Philipp Hagemeister
94f052cbf4 [syfy] Remove test checksum
We have the minsize test now.
2014-10-27 00:19:15 +01:00
Philipp Hagemeister
446a03bd96 [ustream:channel] Change test playlist size (Seems to have been limited that way on the website as well) 2014-10-27 00:18:10 +01:00
Philipp Hagemeister
6009b69f81 [vgtv] Fix test title 2014-10-27 00:16:01 +01:00
Philipp Hagemeister
3d6047113c [vgtv] Simplify 2014-10-27 00:14:52 +01:00
Philipp Hagemeister
9dec99303d [vimeo:review] Fix test title 2014-10-27 00:13:40 +01:00
Philipp Hagemeister
7706927370 [vine:user] Adapt test to changed list size 2014-10-27 00:11:34 +01:00
Philipp Hagemeister
3adba6fa2a [xtube] Fix test description 2014-10-27 00:08:37 +01:00
Philipp Hagemeister
f46a8702cc [youtube:playlist] Fix test title 2014-10-27 00:06:47 +01:00
Philipp Hagemeister
8d11b59bbb [ynet] Remove test md5sums
These fluctuate regularly.
2014-10-27 00:06:00 +01:00
Philipp Hagemeister
cf501a23d2 [srmediathek] Correct IE_NAME/IE_DESC 2014-10-26 23:23:53 +01:00
Philipp Hagemeister
2bcae58d46 [srmediathek] New extractor 2014-10-26 23:23:10 +01:00
Philipp Hagemeister
c9f08154a3 Remove unused imports 2014-10-26 23:13:42 +01:00
Philipp Hagemeister
526b276fd7 [faz] Modernize 2014-10-26 23:11:15 +01:00
Philipp Hagemeister
77ec444d9a release 2014.10.26.2 2014-10-26 21:49:52 +01:00
Philipp Hagemeister
bfc2bedcfc [youtube] Make confirm_age non-fatal (#4042) 2014-10-26 21:49:29 +01:00
Philipp Hagemeister
83855f3a1f [livestream:original] Fix RTMP parameters (Fixes #4040) 2014-10-26 21:44:29 +01:00
Philipp Hagemeister
50b51830fb [ffmpeg] Fix typo 2014-10-26 21:31:51 +01:00
Ole Ernst
bfd91588f3 [ard] make rss match more universal 2014-10-22 14:24:53 +02:00
Ole Ernst
3741302a10 [ard] Add rss support 2014-10-10 20:35:34 +02:00
39 changed files with 301 additions and 243 deletions

View File

@@ -1,7 +1,7 @@
all: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish
clean:
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part
cleanall: clean
rm -f youtube-dl youtube-dl.exe

View File

@@ -185,7 +185,9 @@ def generator(test_case):
md5_for_file = _file_md5(tc_filename)
self.assertEqual(md5_for_file, tc['md5'])
info_json_fn = os.path.splitext(tc_filename)[0] + '.info.json'
self.assertTrue(os.path.exists(info_json_fn))
self.assertTrue(
os.path.exists(info_json_fn),
'Missing info file %s' % info_json_fn)
with io.open(info_json_fn, encoding='utf-8') as infof:
info_dict = json.load(infof)

View File

@@ -280,6 +280,7 @@ from .orf import (
from .parliamentliveuk import ParliamentLiveUKIE
from .patreon import PatreonIE
from .pbs import PBSIE
from .phoenix import PhoenixIE
from .photobucket import PhotobucketIE
from .planetaplay import PlanetaPlayIE
from .played import PlayedIE
@@ -355,6 +356,7 @@ from .spike import SpikeIE
from .sport5 import Sport5IE
from .sportbox import SportBoxIE
from .sportdeutschland import SportDeutschlandIE
from .srmediathek import SRMediathekIE
from .stanfordoc import StanfordOpenClassroomIE
from .steam import SteamIE
from .streamcloud import StreamcloudIE

View File

@@ -4,6 +4,7 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from .generic import GenericIE
from ..utils import (
determine_ext,
ExtractorError,
@@ -12,6 +13,7 @@ from ..utils import (
parse_duration,
unified_strdate,
xpath_text,
parse_xml,
)
@@ -54,6 +56,11 @@ class ARDMediathekIE(InfoExtractor):
if '>Der gewünschte Beitrag ist nicht mehr verfügbar.<' in webpage:
raise ExtractorError('Video %s is no longer available' % video_id, expected=True)
if re.search(r'[\?&]rss($|[=&])', url):
doc = parse_xml(webpage)
if doc.tag == 'rss':
return GenericIE()._extract_rss(url, video_id, doc)
title = self._html_search_regex(
[r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>',
r'<meta name="dcterms.title" content="(.*?)"/>',

View File

@@ -4,7 +4,7 @@ from __future__ import unicode_literals
from .common import InfoExtractor
from .soundcloud import SoundcloudIE
from ..utils import ExtractorError
import datetime
import time

View File

@@ -1,8 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,

View File

@@ -4,7 +4,6 @@ import json
import re
from .common import InfoExtractor
from ..utils import int_or_none
_translation_table = {
@@ -39,9 +38,7 @@ class CliphunterIE(InfoExtractor):
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
video_title = self._search_regex(

View File

@@ -689,7 +689,10 @@ class InfoExtractor(object):
if re.match(r'^https?://', u)
else compat_urlparse.urljoin(m3u8_url, u))
m3u8_doc = self._download_webpage(m3u8_url, video_id)
m3u8_doc = self._download_webpage(
m3u8_url, video_id,
note='Downloading m3u8 information',
errnote='Failed to download m3u8 information')
last_info = None
kv_rex = re.compile(
r'(?P<key>[a-zA-Z_-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)')

View File

@@ -1,49 +1,48 @@
# encoding: utf-8
import re
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
determine_ext,
)
class FazIE(InfoExtractor):
IE_NAME = u'faz.net'
IE_NAME = 'faz.net'
_VALID_URL = r'https?://www\.faz\.net/multimedia/videos/.*?-(?P<id>\d+)\.html'
_TEST = {
u'url': u'http://www.faz.net/multimedia/videos/stockholm-chemie-nobelpreis-fuer-drei-amerikanische-forscher-12610585.html',
u'file': u'12610585.mp4',
u'info_dict': {
u'title': u'Stockholm: Chemie-Nobelpreis für drei amerikanische Forscher',
u'description': u'md5:1453fbf9a0d041d985a47306192ea253',
'url': 'http://www.faz.net/multimedia/videos/stockholm-chemie-nobelpreis-fuer-drei-amerikanische-forscher-12610585.html',
'info_dict': {
'id': '12610585',
'ext': 'mp4',
'title': 'Stockholm: Chemie-Nobelpreis für drei amerikanische Forscher',
'description': 'md5:1453fbf9a0d041d985a47306192ea253',
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
self.to_screen(video_id)
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
config_xml_url = self._search_regex(r'writeFLV\(\'(.+?)\',', webpage,
u'config xml url')
config = self._download_xml(config_xml_url, video_id,
u'Downloading config xml')
config_xml_url = self._search_regex(
r'writeFLV\(\'(.+?)\',', webpage, 'config xml url')
config = self._download_xml(
config_xml_url, video_id, 'Downloading config xml')
encodings = config.find('ENCODINGS')
formats = []
for code in ['LOW', 'HIGH', 'HQ']:
for pref, code in enumerate(['LOW', 'HIGH', 'HQ']):
encoding = encodings.find(code)
if encoding is None:
continue
encoding_url = encoding.find('FILENAME').text
formats.append({
'url': encoding_url,
'ext': determine_ext(encoding_url),
'format_id': code.lower(),
'quality': pref,
})
self._sort_formats(formats)
descr = self._html_search_regex(r'<p class="Content Copy">(.*?)</p>', webpage, u'description')
descr = self._html_search_regex(
r'<p class="Content Copy">(.*?)</p>', webpage, 'description', fatal=False)
return {
'id': video_id,
'title': self._og_search_title(webpage),

View File

@@ -1,3 +1,5 @@
from __future__ import unicode_literals
import re
import random
import json
@@ -11,15 +13,16 @@ from ..utils import (
class FKTVIE(InfoExtractor):
IE_NAME = u'fernsehkritik.tv'
_VALID_URL = r'(?:http://)?(?:www\.)?fernsehkritik\.tv/folge-(?P<ep>[0-9]+)(?:/.*)?'
IE_NAME = 'fernsehkritik.tv'
_VALID_URL = r'http://(?:www\.)?fernsehkritik\.tv/folge-(?P<ep>[0-9]+)(?:/.*)?'
_TEST = {
u'url': u'http://fernsehkritik.tv/folge-1',
u'file': u'00011.flv',
u'info_dict': {
u'title': u'Folge 1 vom 10. April 2007',
u'description': u'md5:fb4818139c7cfe6907d4b83412a6864f',
'url': 'http://fernsehkritik.tv/folge-1',
'info_dict': {
'id': '00011',
'ext': 'flv',
'title': 'Folge 1 vom 10. April 2007',
'description': 'md5:fb4818139c7cfe6907d4b83412a6864f',
},
}
@@ -32,7 +35,7 @@ class FKTVIE(InfoExtractor):
start_webpage = self._download_webpage('http://fernsehkritik.tv/folge-%d/Start' % episode,
episode)
playlist = self._search_regex(r'playlist = (\[.*?\]);', start_webpage,
u'playlist', flags=re.DOTALL)
'playlist', flags=re.DOTALL)
files = json.loads(re.sub('{[^{}]*?}', '{}', playlist))
# TODO: return a single multipart video
videos = []
@@ -42,7 +45,6 @@ class FKTVIE(InfoExtractor):
videos.append({
'id': video_id,
'url': video_url,
'ext': determine_ext(video_url),
'title': clean_html(get_element_by_id('eptitle', start_webpage)),
'description': clean_html(get_element_by_id('contentlist', start_webpage)),
'thumbnail': video_thumbnail
@@ -51,14 +53,15 @@ class FKTVIE(InfoExtractor):
class FKTVPosteckeIE(InfoExtractor):
IE_NAME = u'fernsehkritik.tv:postecke'
_VALID_URL = r'(?:http://)?(?:www\.)?fernsehkritik\.tv/inline-video/postecke\.php\?(.*&)?ep=(?P<ep>[0-9]+)(&|$)'
IE_NAME = 'fernsehkritik.tv:postecke'
_VALID_URL = r'http://(?:www\.)?fernsehkritik\.tv/inline-video/postecke\.php\?(.*&)?ep=(?P<ep>[0-9]+)(&|$)'
_TEST = {
u'url': u'http://fernsehkritik.tv/inline-video/postecke.php?iframe=true&width=625&height=440&ep=120',
u'file': u'0120.flv',
u'md5': u'262f0adbac80317412f7e57b4808e5c4',
u'info_dict': {
u"title": u"Postecke 120"
'url': 'http://fernsehkritik.tv/inline-video/postecke.php?iframe=true&width=625&height=440&ep=120',
'md5': '262f0adbac80317412f7e57b4808e5c4',
'info_dict': {
'id': '0120',
'ext': 'flv',
'title': 'Postecke 120',
}
}
@@ -71,8 +74,7 @@ class FKTVPosteckeIE(InfoExtractor):
video_url = 'http://dl%d.fernsehkritik.tv/postecke/postecke%d.flv' % (server, episode)
video_title = 'Postecke %d' % episode
return {
'id': video_id,
'url': video_url,
'ext': determine_ext(video_url),
'title': video_title,
'id': video_id,
'url': video_url,
'title': video_title,
}

View File

@@ -93,7 +93,6 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor):
_TESTS = [{
'url': 'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html',
'md5': '9cecf35f99c4079c199e9817882a9a1c',
'info_dict': {
'id': '84981923',
'ext': 'flv',

View File

@@ -325,7 +325,7 @@ class GenericIE(InfoExtractor):
'ext': 'mp4',
'age_limit': 18,
'uploader': 'www.handjobhub.com',
'title': 'Busty Blonde Siri Tit Fuck While Wank at Handjob Hub',
'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com',
}
},
# RSS feed

View File

@@ -46,9 +46,9 @@ class GorillaVidIE(InfoExtractor):
'info_dict': {
'id': '3rso4kdn6f9m',
'ext': 'mp4',
'title': 'Micro Pig piglets ready on 16th July 2009',
'title': 'Micro Pig piglets ready on 16th July 2009-bG0PdrCdxUc',
'thumbnail': 're:http://.*\.jpg',
},
}
}, {
'url': 'http://movpod.in/0wguyyxi1yca',
'only_matching': True,

View File

@@ -4,6 +4,7 @@ from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
get_meta_content,
int_or_none,
parse_iso8601,
)
@@ -28,20 +29,26 @@ class HeiseIE(InfoExtractor):
'timestamp': 1411812600,
'upload_date': '20140927',
'description': 'In uplink-Episode 3.3 geht es darum, wie man sich von Cloud-Anbietern emanzipieren kann, worauf man beim Kauf einer Tastatur achten sollte und was Smartphones über uns verraten.',
'thumbnail': 're:https?://.*\.jpg$',
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
json_url = self._search_regex(
r'json_url:\s*"([^"]+)"', webpage, 'json URL')
config = self._download_json(json_url, video_id)
container_id = self._search_regex(
r'<div class="videoplayerjw".*?data-container="([0-9]+)"',
webpage, 'container ID')
sequenz_id = self._search_regex(
r'<div class="videoplayerjw".*?data-sequenz="([0-9]+)"',
webpage, 'sequenz ID')
data_url = 'http://www.heise.de/videout/feed?container=%s&sequenz=%s' % (container_id, sequenz_id)
doc = self._download_xml(data_url, video_id)
info = {
'id': video_id,
'thumbnail': config.get('poster'),
'thumbnail': self._og_search_thumbnail(webpage),
'timestamp': parse_iso8601(get_meta_content('date', webpage)),
'description': self._og_search_description(webpage),
}
@@ -49,32 +56,19 @@ class HeiseIE(InfoExtractor):
title = get_meta_content('fulltitle', webpage)
if title:
info['title'] = title
elif config.get('title'):
info['title'] = config['title']
else:
info['title'] = self._og_search_title(webpage)
formats = []
for t, rs in config['formats'].items():
if not rs or not hasattr(rs, 'items'):
self._downloader.report_warning(
'formats: {0}: no resolutions'.format(t))
continue
for height_str, obj in rs.items():
format_id = '{0}_{1}'.format(t, height_str)
if not obj or not obj.get('url'):
self._downloader.report_warning(
'formats: {0}: no url'.format(format_id))
continue
formats.append({
'url': obj['url'],
'format_id': format_id,
'height': self._int(height_str, 'height'),
})
for source_node in doc.findall('.//{http://rss.jwpcdn.com/}source'):
label = source_node.attrib['label']
height = int_or_none(self._search_regex(
r'^(.*?_)?([0-9]+)p$', label, 'height', default=None))
formats.append({
'url': source_node.attrib['file'],
'format_note': label,
'height': height,
})
self._sort_formats(formats)
info['formats'] = formats

View File

@@ -1,8 +1,6 @@
# encoding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
@@ -21,22 +19,17 @@ class KickStarterIE(InfoExtractor):
}, {
'note': 'Embedded video (not using the native kickstarter video service)',
'url': 'https://www.kickstarter.com/projects/597507018/pebble-e-paper-watch-for-iphone-and-android/posts/659178',
'playlist': [
{
'info_dict': {
'id': '78704821',
'ext': 'mp4',
'uploader_id': 'pebble',
'uploader': 'Pebble Technology',
'title': 'Pebble iOS Notifications',
}
}
],
'info_dict': {
'id': '78704821',
'ext': 'mp4',
'uploader_id': 'pebble',
'uploader': 'Pebble Technology',
'title': 'Pebble iOS Notifications',
}
}]
def _real_extract(self, url):
m = re.match(self._VALID_URL, url)
video_id = m.group('id')
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
title = self._html_search_regex(

View File

@@ -1,7 +1,5 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
@@ -18,11 +16,11 @@ class Ku6IE(InfoExtractor):
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
title = self._search_regex(r'<h1 title=.*>(.*?)</h1>', webpage, 'title')
title = self._html_search_regex(
r'<h1 title=.*>(.*?)</h1>', webpage, 'title')
dataUrl = 'http://v.ku6.com/fetchVideo4Player/%s.html' % video_id
jsonData = self._download_json(dataUrl, video_id)
downloadUrl = jsonData['data']['f']

View File

@@ -190,7 +190,8 @@ class LivestreamOriginalIE(InfoExtractor):
'id': video_id,
'title': item.find('title').text,
'url': 'rtmp://extondemand.livestream.com/ondemand',
'play_path': 'mp4:trans/dv15/mogulus-{0}.mp4'.format(path),
'play_path': 'trans/dv15/mogulus-{0}'.format(path),
'player_url': 'http://static.livestream.com/chromelessPlayer/v21/playerapi.swf?hash=5uetk&v=0803&classid=D27CDB6E-AE6D-11cf-96B8-444553540000&jsEnabled=false&wmode=opaque',
'ext': 'flv',
'thumbnail': thumbnail_url,
}

View File

@@ -32,9 +32,7 @@ class LRTIE(InfoExtractor):
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
title = remove_end(self._og_search_title(webpage), ' - LRT')

View File

@@ -26,8 +26,7 @@ class NBCIE(InfoExtractor):
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
theplatform_url = self._search_regex('class="video-player video-player-full" data-mpx-url="(.*?)"', webpage, 'theplatform url')
if theplatform_url.startswith('//'):
@@ -57,7 +56,7 @@ class NBCNewsIE(InfoExtractor):
'md5': 'b2421750c9f260783721d898f4c42063',
'info_dict': {
'id': 'I1wpAI_zmhsQ',
'ext': 'flv',
'ext': 'mp4',
'title': 'How Twitter Reacted To The Snowden Interview',
'description': 'md5:65a0bd5d76fe114f3c2727aa3a81fe64',
},
@@ -97,6 +96,8 @@ class NBCNewsIE(InfoExtractor):
]
for base_url in base_urls:
if not base_url:
continue
playlist_url = base_url + '?form=MPXNBCNewsAPI'
all_videos = self._download_json(playlist_url, title)['videos']

View File

@@ -7,7 +7,6 @@ from .common import InfoExtractor
from ..utils import (
compat_urlparse,
compat_urllib_parse,
determine_ext,
unified_strdate,
)

View File

@@ -0,0 +1,31 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from .zdf import extract_from_xml_url
class PhoenixIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?phoenix\.de/content/(?P<id>[0-9]+)'
_TEST = {
'url': 'http://www.phoenix.de/content/884301',
'md5': 'ed249f045256150c92e72dbb70eadec6',
'info_dict': {
'id': '884301',
'ext': 'mp4',
'title': 'Michael Krons mit Hans-Werner Sinn',
'description': 'Im Dialog - Sa. 25.10.14, 00.00 - 00.35 Uhr',
'upload_date': '20141025',
'uploader': 'Im Dialog',
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
internal_id = self._search_regex(
r'<div class="phx_vod" id="phx_vod_([0-9]+)"',
webpage, 'internal video ID')
api_url = 'http://www.phoenix.de/php/zdfplayer-v1.3/data/beitragsDetails.php?ak=web&id=%s' % internal_id
return extract_from_xml_url(self, video_id, api_url)

View File

@@ -16,13 +16,14 @@ from ..aes import (
class PornHubIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>pornhub\.com/view_video\.php\?viewkey=(?P<videoid>[0-9a-f]+))'
_VALID_URL = r'^https?://(?:www\.)?pornhub\.com/view_video\.php\?viewkey=(?P<id>[0-9a-f]+)'
_TEST = {
'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015',
'file': '648719015.mp4',
'md5': '882f488fa1f0026f023f33576004a2ed',
'info_dict': {
"uploader": "BABES-COM",
'id': '648719015',
'ext': 'mp4',
"uploader": "Babes",
"title": "Seductive Indian beauty strips down and fingers her pink pussy",
"age_limit": 18
}
@@ -35,9 +36,7 @@ class PornHubIE(InfoExtractor):
return count
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('videoid')
url = 'http://www.' + mobj.group('url')
video_id = self._match_id(url)
req = compat_urllib_request.Request(url)
req.add_header('Cookie', 'age_verified=1')
@@ -45,7 +44,7 @@ class PornHubIE(InfoExtractor):
video_title = self._html_search_regex(r'<h1 [^>]+>([^<]+)', webpage, 'title')
video_uploader = self._html_search_regex(
r'(?s)From:&nbsp;.+?<(?:a href="/users/|<span class="username)[^>]+>(.+?)<',
r'(?s)From:&nbsp;.+?<(?:a href="/users/|a href="/channels/|<span class="username)[^>]+>(.+?)<',
webpage, 'uploader', fatal=False)
thumbnail = self._html_search_regex(r'"image_url":"([^"]+)', webpage, 'thumbnail', fatal=False)
if thumbnail:

View File

@@ -14,7 +14,6 @@ from ..utils import (
class PromptFileIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?promptfile\.com/l/(?P<id>[0-9A-Z\-]+)'
_FILE_NOT_FOUND_REGEX = r'<div.+id="not_found_msg".+>.+</div>[^-]'
_TEST = {
'url': 'http://www.promptfile.com/l/D21B4746E9-F01462F0FF',
'md5': 'd1451b6302da7215485837aaea882c4c',
@@ -27,11 +26,10 @@ class PromptFileIE(InfoExtractor):
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
if re.search(self._FILE_NOT_FOUND_REGEX, webpage) is not None:
if re.search(r'<div.+id="not_found_msg".+>(?!We are).+</div>[^-]', webpage) is not None:
raise ExtractorError('Video %s does not exist' % video_id,
expected=True)

View File

@@ -81,7 +81,7 @@ class RTLnowIE(InfoExtractor):
'id': '99205',
'ext': 'flv',
'title': 'Medicopter 117 - Angst!',
'description': 'md5:895b1df01639b5f61a04fc305a5cb94d',
'description': 're:^Im Therapiezentrum \'Sonnalm\' kommen durch eine Unachtsamkeit die für die B.handlung mit Phobikern gehaltenen Voglespinnen frei\. Eine Ausreißerin',
'thumbnail': 'http://autoimg.static-fra.de/superrtlnow/287529/1500x1500/image2.jpg',
'upload_date': '20080928',
'duration': 2691,

View File

@@ -1,8 +1,6 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import re
from .common import InfoExtractor
@@ -21,19 +19,20 @@ class RUHDIE(InfoExtractor):
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
video_url = self._html_search_regex(
r'<param name="src" value="([^"]+)"', webpage, 'video url')
title = self._html_search_regex(
r'<title>([^<]+)&nbsp;&nbsp; RUHD.ru - Видео Высокого качества №1 в России!</title>', webpage, 'title')
r'<title>([^<]+)&nbsp;&nbsp; RUHD.ru - Видео Высокого качества №1 в России!</title>',
webpage, 'title')
description = self._html_search_regex(
r'(?s)<div id="longdesc">(.+?)<span id="showlink">', webpage, 'description', fatal=False)
r'(?s)<div id="longdesc">(.+?)<span id="showlink">',
webpage, 'description', fatal=False)
thumbnail = self._html_search_regex(
r'<param name="previewImage" value="([^"]+)"', webpage, 'thumbnail', fatal=False)
r'<param name="previewImage" value="([^"]+)"',
webpage, 'thumbnail', fatal=False)
if thumbnail:
thumbnail = 'http://www.ruhd.ru' + thumbnail

View File

@@ -7,7 +7,6 @@ from .common import InfoExtractor
from ..utils import (
parse_duration,
parse_iso8601,
int_or_none,
)

View File

@@ -0,0 +1,43 @@
# encoding: utf-8
from __future__ import unicode_literals
import json
from .common import InfoExtractor
from ..utils import js_to_json
class SRMediathekIE(InfoExtractor):
IE_DESC = 'Süddeutscher Rundfunk'
_VALID_URL = r'https?://sr-mediathek\.sr-online\.de/index\.php\?.*?&id=(?P<id>[0-9]+)'
_TEST = {
'url': 'http://sr-mediathek.sr-online.de/index.php?seite=7&id=28455',
'info_dict': {
'id': '28455',
'ext': 'mp4',
'title': 'sportarena (26.10.2014)',
'description': 'Ringen: KSV Köllerbach gegen Aachen-Walheim; Frauen-Fußball: 1. FC Saarbrücken gegen Sindelfingen; Motorsport: Rallye in Losheim; dazu: Interview mit Timo Bernhard; Turnen: TG Saar; Reitsport: Deutscher Voltigier-Pokal; Badminton: Interview mit Michael Fuchs ',
'thumbnail': 're:^https?://.*\.jpg$',
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
urls = json.loads(js_to_json(self._search_regex(
r'var mediaURLs\s*=\s*(.*?);\n', webpage, 'video URLs')))
formats = [{'url': url} for url in urls]
self._sort_formats(formats)
title = json.loads(js_to_json(self._search_regex(
r'var mediaTitles\s*=\s*(.*?);\n', webpage, 'title')))[0]
return {
'id': video_id,
'title': title,
'formats': formats,
'description': self._og_search_description(webpage),
'thumbnail': self._og_search_thumbnail(webpage),
}

View File

@@ -10,7 +10,6 @@ class SyfyIE(InfoExtractor):
_TESTS = [{
'url': 'http://www.syfy.com/videos/Robot%20Combat%20League/Behind%20the%20Scenes/vid:2631458',
'md5': 'e07de1d52c7278adbb9b9b1c93a66849',
'info_dict': {
'id': 'NmqMrGnXvmO1',
'ext': 'flv',

View File

@@ -6,6 +6,7 @@ import json
from .common import InfoExtractor
from ..utils import (
compat_str,
determine_ext,
ExtractorError,
xpath_with_ns,
)
@@ -34,10 +35,21 @@ class ThePlatformIE(InfoExtractor):
'skip_download': True,
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
if mobj.group('config'):
config_url = url+ '&form=json'
config_url = config_url.replace('swf/', 'config/')
config_url = config_url.replace('onsite/', 'onsite/config/')
config = self._download_json(config_url, video_id, 'Downloading config')
smil_url = config['releaseUrl'] + '&format=SMIL&formats=MPEG4&manifest=f4m'
else:
smil_url = ('http://link.theplatform.com/s/dJ5BDC/{0}/meta.smil?'
'format=smil&mbr=true'.format(video_id))
def _get_info(self, video_id, smil_url):
meta = self._download_xml(smil_url, video_id)
try:
error_msg = next(
n.attrib['abstract']
@@ -89,10 +101,14 @@ class ThePlatformIE(InfoExtractor):
for f in switch.findall(_x('smil:video')):
attr = f.attrib
vbr = int(attr['system-bitrate']) // 1000
ext = determine_ext(attr['src'])
if ext == 'once':
ext = 'mp4'
formats.append({
'format_id': compat_str(vbr),
'url': attr['src'],
'vbr': vbr,
'ext': ext,
})
self._sort_formats(formats)
@@ -104,17 +120,3 @@ class ThePlatformIE(InfoExtractor):
'thumbnail': info['defaultThumbnailUrl'],
'duration': info['duration']//1000,
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
if mobj.group('config'):
config_url = url+ '&form=json'
config_url = config_url.replace('swf/', 'config/')
config_url = config_url.replace('onsite/', 'onsite/config/')
config = self._download_json(config_url, video_id, 'Downloading config')
smil_url = config['releaseUrl'] + '&format=SMIL&formats=MPEG4&manifest=f4m'
else:
smil_url = ('http://link.theplatform.com/s/dJ5BDC/{0}/meta.smil?'
'format=smil&mbr=true'.format(video_id))
return self._get_info(video_id, smil_url)

View File

@@ -72,7 +72,7 @@ class UstreamChannelIE(InfoExtractor):
'info_dict': {
'id': '10874166',
},
'playlist_mincount': 54,
'playlist_mincount': 17,
}
def _real_extract(self, url):

View File

@@ -17,7 +17,7 @@ class VGTVIE(InfoExtractor):
'info_dict': {
'id': '84196',
'ext': 'mp4',
'title': 'Hevnen er søt episode 10: Abu',
'title': 'Hevnen er søt episode 1:10 - Abu',
'description': 'md5:e25e4badb5f544b04341e14abdc72234',
'thumbnail': 're:^https?://.*\.jpg',
'duration': 648.000,
@@ -67,9 +67,7 @@ class VGTVIE(InfoExtractor):
]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
video_id = self._match_id(url)
data = self._download_json(
'http://svp.vg.no/svp/api/v1/vgtv/assets/%s?appName=vgtv-website' % video_id,
video_id, 'Downloading media JSON')

View File

@@ -8,13 +8,11 @@ import itertools
from .common import InfoExtractor
from .subtitles import SubtitlesInfoExtractor
from ..utils import (
clean_html,
compat_HTTPError,
compat_urllib_parse,
compat_urllib_request,
compat_urlparse,
ExtractorError,
get_element_by_attribute,
InAdvancePagedList,
int_or_none,
RegexNotFoundError,
@@ -514,7 +512,7 @@ class VimeoReviewIE(InfoExtractor):
'info_dict': {
'id': '91613211',
'ext': 'mp4',
'title': 'Death by dogma versus assembling agile - Sander Hoogendoorn',
'title': 're:(?i)^Death by dogma versus assembling agile . Sander Hoogendoorn',
'uploader': 'DevWeek Events',
'duration': 2773,
'thumbnail': 're:^https?://.*\.jpg$',

View File

@@ -70,7 +70,7 @@ class VineUserIE(InfoExtractor):
'info_dict': {
'id': 'Visa',
},
'playlist_mincount': 47,
'playlist_mincount': 46,
}
def _real_extract(self, url):

View File

@@ -20,7 +20,7 @@ class XTubeIE(InfoExtractor):
'id': 'kVTUy_G222_',
'ext': 'mp4',
'title': 'strange erotica',
'description': 'surreal gay themed erotica...almost an ET kind of thing',
'description': 'http://www.xtube.com an ET kind of thing',
'uploader': 'greenshowers',
'duration': 450,
'age_limit': 18,

View File

@@ -13,7 +13,6 @@ class YnetIE(InfoExtractor):
_TESTS = [
{
'url': 'http://hot.ynet.co.il/home/0,7340,L-11659-99244,00.html',
'md5': '4b29cb57c3dddd57642b3f051f535b07',
'info_dict': {
'id': 'L-11659-99244',
'ext': 'flv',
@@ -22,7 +21,6 @@ class YnetIE(InfoExtractor):
}
}, {
'url': 'http://hot.ynet.co.il/home/0,7340,L-8859-84418,00.html',
'md5': '8194c2ea221e9a639cac96b6b0753dc5',
'info_dict': {
'id': 'L-8859-84418',
'ext': 'flv',

View File

@@ -185,8 +185,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
self._download_webpage(
req, None,
note='Confirming age', errnote='Unable to confirm age')
return True
note='Confirming age', errnote='Unable to confirm age',
fatal=False)
def _real_initialize(self):
if self._downloader is None:
@@ -1057,7 +1057,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
'note': 'issue #673',
'url': 'PLBB231211A4F62143',
'info_dict': {
'title': 'Team Fortress 2 (Class-based LP)',
'title': '[OLD]Team Fortress 2 (Class-based LP)',
},
'playlist_mincount': 26,
}, {

View File

@@ -10,8 +10,84 @@ from ..utils import (
)
def extract_from_xml_url(ie, video_id, xml_url):
doc = ie._download_xml(
xml_url, video_id,
note='Downloading video info',
errnote='Failed to download video info')
title = doc.find('.//information/title').text
description = doc.find('.//information/detail').text
duration = int(doc.find('.//details/lengthSec').text)
uploader_node = doc.find('.//details/originChannelTitle')
uploader = None if uploader_node is None else uploader_node.text
uploader_id_node = doc.find('.//details/originChannelId')
uploader_id = None if uploader_id_node is None else uploader_id_node.text
upload_date = unified_strdate(doc.find('.//details/airtime').text)
def xml_to_format(fnode):
video_url = fnode.find('url').text
is_available = 'http://www.metafilegenerator' not in video_url
format_id = fnode.attrib['basetype']
format_m = re.match(r'''(?x)
(?P<vcodec>[^_]+)_(?P<acodec>[^_]+)_(?P<container>[^_]+)_
(?P<proto>[^_]+)_(?P<index>[^_]+)_(?P<indexproto>[^_]+)
''', format_id)
ext = format_m.group('container')
proto = format_m.group('proto').lower()
quality = fnode.find('./quality').text
abr = int(fnode.find('./audioBitrate').text) // 1000
vbr_node = fnode.find('./videoBitrate')
vbr = None if vbr_node is None else int(vbr_node.text) // 1000
width_node = fnode.find('./width')
width = None if width_node is None else int_or_none(width_node.text)
height_node = fnode.find('./height')
height = None if height_node is None else int_or_none(height_node.text)
format_note = ''
if not format_note:
format_note = None
return {
'format_id': format_id + '-' + quality,
'url': video_url,
'ext': ext,
'acodec': format_m.group('acodec'),
'vcodec': format_m.group('vcodec'),
'abr': abr,
'vbr': vbr,
'width': width,
'height': height,
'filesize': int_or_none(fnode.find('./filesize').text),
'format_note': format_note,
'protocol': proto,
'_available': is_available,
}
format_nodes = doc.findall('.//formitaeten/formitaet')
formats = list(filter(
lambda f: f['_available'],
map(xml_to_format, format_nodes)))
ie._sort_formats(formats)
return {
'id': video_id,
'title': title,
'description': description,
'duration': duration,
'uploader': uploader,
'uploader_id': uploader_id,
'upload_date': upload_date,
'formats': formats,
}
class ZDFIE(InfoExtractor):
_VALID_URL = r'^https?://www\.zdf\.de/ZDFmediathek(?P<hash>#)?/(.*beitrag/(?:video/)?)(?P<video_id>[0-9]+)(?:/[^/?]+)?(?:\?.*)?'
_VALID_URL = r'^https?://www\.zdf\.de/ZDFmediathek(?P<hash>#)?/(.*beitrag/(?:video/)?)(?P<id>[0-9]+)(?:/[^/?]+)?(?:\?.*)?'
_TEST = {
'url': 'http://www.zdf.de/ZDFmediathek/beitrag/video/2037704/ZDFspezial---Ende-des-Machtpokers--?bc=sts;stt',
@@ -29,81 +105,7 @@ class ZDFIE(InfoExtractor):
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('video_id')
video_id = self._match_id(url)
xml_url = 'http://www.zdf.de/ZDFmediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
doc = self._download_xml(
xml_url, video_id,
note='Downloading video info',
errnote='Failed to download video info')
title = doc.find('.//information/title').text
description = doc.find('.//information/detail').text
duration = int(doc.find('.//details/lengthSec').text)
uploader_node = doc.find('.//details/originChannelTitle')
uploader = None if uploader_node is None else uploader_node.text
uploader_id_node = doc.find('.//details/originChannelId')
uploader_id = None if uploader_id_node is None else uploader_id_node.text
upload_date = unified_strdate(doc.find('.//details/airtime').text)
def xml_to_format(fnode):
video_url = fnode.find('url').text
is_available = 'http://www.metafilegenerator' not in video_url
format_id = fnode.attrib['basetype']
format_m = re.match(r'''(?x)
(?P<vcodec>[^_]+)_(?P<acodec>[^_]+)_(?P<container>[^_]+)_
(?P<proto>[^_]+)_(?P<index>[^_]+)_(?P<indexproto>[^_]+)
''', format_id)
ext = format_m.group('container')
proto = format_m.group('proto').lower()
quality = fnode.find('./quality').text
abr = int(fnode.find('./audioBitrate').text) // 1000
vbr_node = fnode.find('./videoBitrate')
vbr = None if vbr_node is None else int(vbr_node.text) // 1000
width_node = fnode.find('./width')
width = None if width_node is None else int_or_none(width_node.text)
height_node = fnode.find('./height')
height = None if height_node is None else int_or_none(height_node.text)
format_note = ''
if not format_note:
format_note = None
return {
'format_id': format_id + '-' + quality,
'url': video_url,
'ext': ext,
'acodec': format_m.group('acodec'),
'vcodec': format_m.group('vcodec'),
'abr': abr,
'vbr': vbr,
'width': width,
'height': height,
'filesize': int_or_none(fnode.find('./filesize').text),
'format_note': format_note,
'protocol': proto,
'_available': is_available,
}
format_nodes = doc.findall('.//formitaeten/formitaet')
formats = list(filter(
lambda f: f['_available'],
map(xml_to_format, format_nodes)))
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'description': description,
'duration': duration,
'uploader': uploader,
'uploader_id': uploader_id,
'upload_date': upload_date,
'formats': formats,
}
return extract_from_xml_url(self, video_id, xml_url)

View File

@@ -8,7 +8,6 @@ import time
from .common import AudioConversionError, PostProcessor
from ..utils import (
check_executable,
compat_subprocess_get_DEVNULL,
encodeArgument,
encodeFilename,
@@ -78,7 +77,7 @@ class FFmpegPostProcessor(PostProcessor):
@property
def _probe_executable(self):
if self._downloader.params.get('prefer_ffmpeg', False):
prefs = ('ffproe', 'avprobe')
prefs = ('ffprobe', 'avprobe')
else:
prefs = ('avprobe', 'ffprobe')
for p in prefs:

View File

@@ -1,2 +1,2 @@
__version__ = '2014.10.26.1'
__version__ = '2014.10.27'