[networking] Rewrite architecture (#2861)

New networking interface consists of a `RequestDirector` that directs
each `Request` to appropriate `RequestHandler` and returns the
`Response` or raises `RequestError`. The handlers define adapters to
transform its internal Request/Response/Errors to our interfaces.

User-facing changes:
- Fix issues with per request proxies on redirects for urllib
- Support for `ALL_PROXY` environment variable for proxy setting
- Support for `socks5h` proxy
   - Closes https://github.com/yt-dlp/yt-dlp/issues/6325, https://github.com/ytdl-org/youtube-dl/issues/22618, https://github.com/ytdl-org/youtube-dl/pull/28093
- Raise error when using `https` proxy instead of silently converting it to `http`

Authored by: coletdjnz
This commit is contained in:
coletdjnz
2023-07-15 15:55:23 +05:30
committed by pukkandan
parent c365dba843
commit 227bf1a33b
16 changed files with 2586 additions and 474 deletions

View File

@@ -10,16 +10,16 @@ del passthrough_module
from ._utils import preferredencoding
from ..networking._urllib import HTTPHandler
# isort: split
from .networking import random_user_agent, std_headers # noqa: F401
from ..networking._urllib import PUTRequest # noqa: F401
from ..networking._urllib import SUPPORTED_ENCODINGS, HEADRequest # noqa: F401
from ..networking._urllib import HTTPHandler as YoutubeDLHandler # noqa: F401
from ..networking._urllib import ProxyHandler as PerRequestProxyHandler # noqa: F401
from ..networking._urllib import RedirectHandler as YoutubeDLRedirectHandler # noqa: F401
from ..networking._urllib import make_socks_conn_class, update_Request # noqa: F401
from ..networking.exceptions import network_exceptions # noqa: F401
from .networking import random_user_agent, std_headers # noqa: F401
def encodeFilename(s, for_subprocess=False):
@@ -47,3 +47,12 @@ def decodeOption(optval):
def error_to_compat_str(err):
return str(err)
class YoutubeDLHandler(HTTPHandler):
def __init__(self, params, *args, **kwargs):
self._params = params
super().__init__(*args, **kwargs)
YoutubeDLHTTPSHandler = YoutubeDLHandler

View File

@@ -15,8 +15,6 @@ import hashlib
import hmac
import html.entities
import html.parser
import http.client
import http.cookiejar
import inspect
import io
import itertools
@@ -897,6 +895,7 @@ def formatSeconds(secs, delim=':', msec=False):
def make_HTTPS_handler(params, **kwargs):
from ._deprecated import YoutubeDLHTTPSHandler
from ..networking._helper import make_ssl_context
return YoutubeDLHTTPSHandler(params, context=make_ssl_context(
verify=not params.get('nocheckcertificate'),
@@ -1140,38 +1139,6 @@ class XAttrUnavailableError(YoutubeDLError):
pass
class YoutubeDLHTTPSHandler(urllib.request.HTTPSHandler):
def __init__(self, params, https_conn_class=None, *args, **kwargs):
urllib.request.HTTPSHandler.__init__(self, *args, **kwargs)
self._https_conn_class = https_conn_class or http.client.HTTPSConnection
self._params = params
def https_open(self, req):
kwargs = {}
conn_class = self._https_conn_class
if hasattr(self, '_context'): # python > 2.6
kwargs['context'] = self._context
if hasattr(self, '_check_hostname'): # python 3.x
kwargs['check_hostname'] = self._check_hostname
socks_proxy = req.headers.get('Ytdl-socks-proxy')
if socks_proxy:
from ..networking._urllib import make_socks_conn_class
conn_class = make_socks_conn_class(conn_class, socks_proxy)
del req.headers['Ytdl-socks-proxy']
from ..networking._urllib import _create_http_connection
try:
return self.do_open(
functools.partial(_create_http_connection, self, conn_class, True), req, **kwargs)
except urllib.error.URLError as e:
if (isinstance(e.reason, ssl.SSLError)
and getattr(e.reason, 'reason', None) == 'SSLV3_ALERT_HANDSHAKE_FAILURE'):
raise YoutubeDLError('SSLV3_ALERT_HANDSHAKE_FAILURE: Try using --legacy-server-connect')
raise
def is_path_like(f):
return isinstance(f, (str, bytes, os.PathLike))

View File

@@ -1,4 +1,9 @@
import collections
import random
import urllib.parse
import urllib.request
from ._utils import remove_start
def random_user_agent():
@@ -46,15 +51,67 @@ def random_user_agent():
return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
std_headers = {
class HTTPHeaderDict(collections.UserDict, dict):
"""
Store and access keys case-insensitively.
The constructor can take multiple dicts, in which keys in the latter are prioritised.
"""
def __init__(self, *args, **kwargs):
super().__init__()
for dct in args:
if dct is not None:
self.update(dct)
self.update(kwargs)
def __setitem__(self, key, value):
super().__setitem__(key.title(), str(value))
def __getitem__(self, key):
return super().__getitem__(key.title())
def __delitem__(self, key):
super().__delitem__(key.title())
def __contains__(self, key):
return super().__contains__(key.title() if isinstance(key, str) else key)
std_headers = HTTPHeaderDict({
'User-Agent': random_user_agent(),
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language': 'en-us,en;q=0.5',
'Sec-Fetch-Mode': 'navigate',
}
})
def clean_headers(headers):
if 'Youtubedl-no-compression' in headers: # compat
del headers['Youtubedl-no-compression']
def clean_proxies(proxies: dict, headers: HTTPHeaderDict):
req_proxy = headers.pop('Ytdl-Request-Proxy', None)
if req_proxy:
proxies.clear() # XXX: compat: Ytdl-Request-Proxy takes preference over everything, including NO_PROXY
proxies['all'] = req_proxy
for proxy_key, proxy_url in proxies.items():
if proxy_url == '__noproxy__':
proxies[proxy_key] = None
continue
if proxy_key == 'no': # special case
continue
if proxy_url is not None:
# Ensure proxies without a scheme are http.
proxy_scheme = urllib.request._parse_proxy(proxy_url)[0]
if proxy_scheme is None:
proxies[proxy_key] = 'http://' + remove_start(proxy_url, '//')
replace_scheme = {
'socks5': 'socks5h', # compat: socks5 was treated as socks5h
'socks': 'socks4' # compat: non-standard
}
if proxy_scheme in replace_scheme:
proxies[proxy_key] = urllib.parse.urlunparse(
urllib.parse.urlparse(proxy_url)._replace(scheme=replace_scheme[proxy_scheme]))
def clean_headers(headers: HTTPHeaderDict):
if 'Youtubedl-No-Compression' in headers: # compat
del headers['Youtubedl-No-Compression']
headers['Accept-Encoding'] = 'identity'