mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-12-08 07:11:39 +01:00
@@ -1,8 +1,6 @@
|
|||||||
import base64
|
|
||||||
import codecs
|
|
||||||
import itertools
|
import itertools
|
||||||
import re
|
import re
|
||||||
import string
|
import urllib.parse
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@@ -16,7 +14,6 @@ from ..utils import (
|
|||||||
join_nonempty,
|
join_nonempty,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
str_or_none,
|
str_or_none,
|
||||||
try_call,
|
|
||||||
try_get,
|
try_get,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
@@ -32,7 +29,7 @@ class _ByteGenerator:
|
|||||||
try:
|
try:
|
||||||
self._algorithm = getattr(self, f'_algo{algo_id}')
|
self._algorithm = getattr(self, f'_algo{algo_id}')
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
raise ExtractorError(f'Unknown algorithm ID: {algo_id}')
|
raise ExtractorError(f'Unknown algorithm ID "{algo_id}"')
|
||||||
self._s = to_signed_32(seed)
|
self._s = to_signed_32(seed)
|
||||||
|
|
||||||
def _algo1(self, s):
|
def _algo1(self, s):
|
||||||
@@ -216,32 +213,28 @@ class XHamsterIE(InfoExtractor):
|
|||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
_XOR_KEY = b'xh7999'
|
|
||||||
|
|
||||||
def _decipher_format_url(self, format_url, format_id):
|
def _decipher_format_url(self, format_url, format_id):
|
||||||
if all(char in string.hexdigits for char in format_url):
|
parsed_url = urllib.parse.urlparse(format_url)
|
||||||
byte_data = bytes.fromhex(format_url)
|
|
||||||
seed = int.from_bytes(byte_data[1:5], byteorder='little', signed=True)
|
|
||||||
byte_gen = _ByteGenerator(byte_data[0], seed)
|
|
||||||
return bytearray(byte ^ next(byte_gen) for byte in byte_data[5:]).decode('latin-1')
|
|
||||||
|
|
||||||
cipher_type, _, ciphertext = try_call(
|
hex_string, path_remainder = self._search_regex(
|
||||||
lambda: base64.b64decode(format_url).decode().partition('_')) or [None] * 3
|
r'^/(?P<hex>[0-9a-fA-F]{12,})(?P<rem>[/,].+)$', parsed_url.path, 'url components',
|
||||||
|
default=(None, None), group=('hex', 'rem'))
|
||||||
if not cipher_type or not ciphertext:
|
if not hex_string:
|
||||||
self.report_warning(f'Skipping format "{format_id}": failed to decipher URL')
|
self.report_warning(f'Skipping format "{format_id}": unsupported URL format')
|
||||||
return None
|
return None
|
||||||
|
|
||||||
if cipher_type == 'xor':
|
byte_data = bytes.fromhex(hex_string)
|
||||||
return bytes(
|
seed = int.from_bytes(byte_data[1:5], byteorder='little', signed=True)
|
||||||
a ^ b for a, b in
|
|
||||||
zip(ciphertext.encode(), itertools.cycle(self._XOR_KEY))).decode()
|
|
||||||
|
|
||||||
if cipher_type == 'rot13':
|
try:
|
||||||
return codecs.decode(ciphertext, cipher_type)
|
byte_gen = _ByteGenerator(byte_data[0], seed)
|
||||||
|
except ExtractorError as e:
|
||||||
|
self.report_warning(f'Skipping format "{format_id}": {e.msg}')
|
||||||
|
return None
|
||||||
|
|
||||||
self.report_warning(f'Skipping format "{format_id}": unsupported cipher type "{cipher_type}"')
|
deciphered = bytearray(byte ^ next(byte_gen) for byte in byte_data[5:]).decode('latin-1')
|
||||||
return None
|
|
||||||
|
return parsed_url._replace(path=f'/{deciphered}{path_remainder}').geturl()
|
||||||
|
|
||||||
def _fixup_formats(self, formats):
|
def _fixup_formats(self, formats):
|
||||||
for f in formats:
|
for f in formats:
|
||||||
@@ -364,8 +357,11 @@ class XHamsterIE(InfoExtractor):
|
|||||||
'height': get_height(quality),
|
'height': get_height(quality),
|
||||||
'filesize': format_sizes.get(quality),
|
'filesize': format_sizes.get(quality),
|
||||||
'http_headers': {
|
'http_headers': {
|
||||||
'Referer': standard_url,
|
'Referer': urlh.url,
|
||||||
},
|
},
|
||||||
|
# HTTP formats return "Wrong key" error even when deciphered by site JS
|
||||||
|
# TODO: Remove this when resolved on the site's end
|
||||||
|
'__needs_testing': True,
|
||||||
})
|
})
|
||||||
|
|
||||||
categories_list = video.get('categories')
|
categories_list = video.get('categories')
|
||||||
@@ -402,7 +398,8 @@ class XHamsterIE(InfoExtractor):
|
|||||||
'age_limit': age_limit if age_limit is not None else 18,
|
'age_limit': age_limit if age_limit is not None else 18,
|
||||||
'categories': categories,
|
'categories': categories,
|
||||||
'formats': self._fixup_formats(formats),
|
'formats': self._fixup_formats(formats),
|
||||||
'_format_sort_fields': ('res', 'proto', 'tbr'),
|
# TODO: Revert to ('res', 'proto', 'tbr') when HTTP formats problem is resolved
|
||||||
|
'_format_sort_fields': ('res', 'proto:m3u8', 'tbr'),
|
||||||
}
|
}
|
||||||
|
|
||||||
# Old layout fallback
|
# Old layout fallback
|
||||||
|
|||||||
Reference in New Issue
Block a user