[ie/xhamster] Fix extractor (#15252)

Closes #15239
Authored by: 0xvd
This commit is contained in:
0x∅
2025-12-07 03:42:38 +05:30
committed by GitHub
parent c70b57c03e
commit 29e2570378

View File

@@ -1,8 +1,6 @@
import base64
import codecs
import itertools import itertools
import re import re
import string import urllib.parse
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
@@ -16,7 +14,6 @@ from ..utils import (
join_nonempty, join_nonempty,
parse_duration, parse_duration,
str_or_none, str_or_none,
try_call,
try_get, try_get,
unified_strdate, unified_strdate,
url_or_none, url_or_none,
@@ -32,7 +29,7 @@ class _ByteGenerator:
try: try:
self._algorithm = getattr(self, f'_algo{algo_id}') self._algorithm = getattr(self, f'_algo{algo_id}')
except AttributeError: except AttributeError:
raise ExtractorError(f'Unknown algorithm ID: {algo_id}') raise ExtractorError(f'Unknown algorithm ID "{algo_id}"')
self._s = to_signed_32(seed) self._s = to_signed_32(seed)
def _algo1(self, s): def _algo1(self, s):
@@ -216,32 +213,28 @@ class XHamsterIE(InfoExtractor):
'only_matching': True, 'only_matching': True,
}] }]
_XOR_KEY = b'xh7999'
def _decipher_format_url(self, format_url, format_id): def _decipher_format_url(self, format_url, format_id):
if all(char in string.hexdigits for char in format_url): parsed_url = urllib.parse.urlparse(format_url)
byte_data = bytes.fromhex(format_url)
seed = int.from_bytes(byte_data[1:5], byteorder='little', signed=True)
byte_gen = _ByteGenerator(byte_data[0], seed)
return bytearray(byte ^ next(byte_gen) for byte in byte_data[5:]).decode('latin-1')
cipher_type, _, ciphertext = try_call( hex_string, path_remainder = self._search_regex(
lambda: base64.b64decode(format_url).decode().partition('_')) or [None] * 3 r'^/(?P<hex>[0-9a-fA-F]{12,})(?P<rem>[/,].+)$', parsed_url.path, 'url components',
default=(None, None), group=('hex', 'rem'))
if not cipher_type or not ciphertext: if not hex_string:
self.report_warning(f'Skipping format "{format_id}": failed to decipher URL') self.report_warning(f'Skipping format "{format_id}": unsupported URL format')
return None return None
if cipher_type == 'xor': byte_data = bytes.fromhex(hex_string)
return bytes( seed = int.from_bytes(byte_data[1:5], byteorder='little', signed=True)
a ^ b for a, b in
zip(ciphertext.encode(), itertools.cycle(self._XOR_KEY))).decode()
if cipher_type == 'rot13': try:
return codecs.decode(ciphertext, cipher_type) byte_gen = _ByteGenerator(byte_data[0], seed)
except ExtractorError as e:
self.report_warning(f'Skipping format "{format_id}": {e.msg}')
return None
self.report_warning(f'Skipping format "{format_id}": unsupported cipher type "{cipher_type}"') deciphered = bytearray(byte ^ next(byte_gen) for byte in byte_data[5:]).decode('latin-1')
return None
return parsed_url._replace(path=f'/{deciphered}{path_remainder}').geturl()
def _fixup_formats(self, formats): def _fixup_formats(self, formats):
for f in formats: for f in formats:
@@ -364,8 +357,11 @@ class XHamsterIE(InfoExtractor):
'height': get_height(quality), 'height': get_height(quality),
'filesize': format_sizes.get(quality), 'filesize': format_sizes.get(quality),
'http_headers': { 'http_headers': {
'Referer': standard_url, 'Referer': urlh.url,
}, },
# HTTP formats return "Wrong key" error even when deciphered by site JS
# TODO: Remove this when resolved on the site's end
'__needs_testing': True,
}) })
categories_list = video.get('categories') categories_list = video.get('categories')
@@ -402,7 +398,8 @@ class XHamsterIE(InfoExtractor):
'age_limit': age_limit if age_limit is not None else 18, 'age_limit': age_limit if age_limit is not None else 18,
'categories': categories, 'categories': categories,
'formats': self._fixup_formats(formats), 'formats': self._fixup_formats(formats),
'_format_sort_fields': ('res', 'proto', 'tbr'), # TODO: Revert to ('res', 'proto', 'tbr') when HTTP formats problem is resolved
'_format_sort_fields': ('res', 'proto:m3u8', 'tbr'),
} }
# Old layout fallback # Old layout fallback