mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-12-08 07:11:39 +01:00
Compare commits
3 Commits
6842620d56
...
715af0c636
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
715af0c636 | ||
|
|
0c696239ef | ||
|
|
3cb5e4db54 |
@@ -1870,7 +1870,6 @@ The following extractors use this feature:
|
||||
* `po_token`: Proof of Origin (PO) Token(s) to use. Comma seperated list of PO Tokens in the format `CLIENT.CONTEXT+PO_TOKEN`, e.g. `youtube:po_token=web.gvs+XXX,web.player=XXX,web_safari.gvs+YYY`. Context can be any of `gvs` (Google Video Server URLs), `player` (Innertube player request) or `subs` (Subtitles)
|
||||
* `pot_trace`: Enable debug logging for PO Token fetching. Either `true` or `false` (default)
|
||||
* `fetch_pot`: Policy to use for fetching a PO Token from providers. One of `always` (always try fetch a PO Token regardless if the client requires one for the given context), `never` (never fetch a PO Token), or `auto` (default; only fetch a PO Token if the client requires one for the given context)
|
||||
* `playback_wait`: Duration (in seconds) to wait inbetween the extraction and download stages in order to ensure the formats are available. The default is `6` seconds
|
||||
* `jsc_trace`: Enable debug logging for JS Challenge fetching. Either `true` or `false` (default)
|
||||
|
||||
#### youtube-ejs
|
||||
|
||||
@@ -75,6 +75,7 @@ from .afreecatv import (
|
||||
AfreecaTVLiveIE,
|
||||
AfreecaTVUserIE,
|
||||
)
|
||||
from .agalega import AGalegaIE
|
||||
from .agora import (
|
||||
TokFMAuditionIE,
|
||||
TokFMPodcastIE,
|
||||
|
||||
91
yt_dlp/extractor/agalega.py
Normal file
91
yt_dlp/extractor/agalega.py
Normal file
@@ -0,0 +1,91 @@
|
||||
import json
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import jwt_decode_hs256, url_or_none
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class AGalegaBaseIE(InfoExtractor):
|
||||
_access_token = None
|
||||
|
||||
@staticmethod
|
||||
def _jwt_is_expired(token):
|
||||
return jwt_decode_hs256(token)['exp'] - time.time() < 120
|
||||
|
||||
def _refresh_access_token(self, video_id):
|
||||
AGalegaBaseIE._access_token = self._download_json(
|
||||
'https://www.agalega.gal/api/fetch-api/jwt/token', video_id,
|
||||
note='Downloading access token',
|
||||
data=json.dumps({
|
||||
'username': None,
|
||||
'password': None,
|
||||
'client': 'crtvg',
|
||||
'checkExistsCookies': False,
|
||||
}).encode())['access']
|
||||
|
||||
def _call_api(self, endpoint, display_id, note, fatal=True, query=None):
|
||||
if not AGalegaBaseIE._access_token or self._jwt_is_expired(AGalegaBaseIE._access_token):
|
||||
self._refresh_access_token(endpoint)
|
||||
return self._download_json(
|
||||
f'https://api-agalega.interactvty.com/api/2.0/contents/{endpoint}', display_id,
|
||||
note=note, fatal=fatal, query=query,
|
||||
headers={'Authorization': f'jwtok {AGalegaBaseIE._access_token}'})
|
||||
|
||||
|
||||
class AGalegaIE(AGalegaBaseIE):
|
||||
IE_NAME = 'agalega:videos'
|
||||
_VALID_URL = r'https?://(?:www\.)?agalega\.gal/videos/(?:detail/)?(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.agalega.gal/videos/288664-lr-ninguencheconta',
|
||||
'md5': '04533a66c5f863d08dd9724b11d1c223',
|
||||
'info_dict': {
|
||||
'id': '288664',
|
||||
'title': 'Roberto e Ángel Martín atenden consultas dos espectadores',
|
||||
'description': 'O cómico ademais fai un repaso dalgúns momentos da súa traxectoria profesional',
|
||||
'thumbnail': 'https://crtvg-bucket.flumotion.cloud/content_cards/2ef32c3b9f6249d9868fd8f11d389d3d.png',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.agalega.gal/videos/detail/296152-pulso-activo-7',
|
||||
'md5': '26df7fdcf859f38ad92d837279d6b56d',
|
||||
'info_dict': {
|
||||
'id': '296152',
|
||||
'title': 'Pulso activo | 18-11-2025',
|
||||
'description': 'Anxo, Noemí, Silvia e Estrella comparten as sensacións da clase de Eddy.',
|
||||
'thumbnail': 'https://crtvg-bucket.flumotion.cloud/content_cards/a6bb7da6c8994b82bf961ac6cad1707b.png',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
content_data = self._call_api(
|
||||
f'content/{video_id}/', video_id, note='Downloading content data', fatal=False,
|
||||
query={
|
||||
'optional_fields': 'image,is_premium,short_description,has_subtitle',
|
||||
})
|
||||
resource_data = self._call_api(
|
||||
f'content_resources/{video_id}/', video_id, note='Downloading resource data',
|
||||
query={
|
||||
'optional_fields': 'media_url',
|
||||
})
|
||||
|
||||
formats = []
|
||||
subtitles = {}
|
||||
for m3u8_url in traverse_obj(resource_data, ('results', ..., 'media_url', {url_or_none})):
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
m3u8_url, video_id, ext='mp4', m3u8_id='hls')
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
**traverse_obj(content_data, {
|
||||
'title': ('name', {str}),
|
||||
'description': (('description', 'short_description'), {str}, any),
|
||||
'thumbnail': ('image', {url_or_none}),
|
||||
}),
|
||||
}
|
||||
@@ -339,11 +339,20 @@ class WistiaChannelIE(WistiaBaseIE):
|
||||
'title': 'The Roof S2: The Modern CRO',
|
||||
'thumbnail': r're:https?://embed(?:-ssl)?\.wistia\.com/.+\.(?:jpg|png)',
|
||||
'duration': 86.487,
|
||||
'description': 'A sales leader on The Roof? Man, they really must be letting anyone up here this season.\n',
|
||||
'description': 'A sales leader on The Roof? Man, they really must be letting anyone up here this season. ',
|
||||
'timestamp': 1619790290,
|
||||
'upload_date': '20210430',
|
||||
},
|
||||
'params': {'noplaylist': True, 'skip_download': True},
|
||||
}, {
|
||||
# Channel with episodes structure instead of videos
|
||||
'url': 'https://fast.wistia.net/embed/channel/sapab9p6qd',
|
||||
'info_dict': {
|
||||
'id': 'sapab9p6qd',
|
||||
'title': 'Credo: An RCIA Program',
|
||||
'description': '\n',
|
||||
},
|
||||
'playlist_mincount': 80,
|
||||
}]
|
||||
_WEBPAGE_TESTS = [{
|
||||
'url': 'https://www.profitwell.com/recur/boxed-out',
|
||||
@@ -399,8 +408,7 @@ class WistiaChannelIE(WistiaBaseIE):
|
||||
|
||||
entries = [
|
||||
self.url_result(f'wistia:{video["hashedId"]}', WistiaIE, title=video.get('name'))
|
||||
for video in traverse_obj(series, ('sections', ..., 'videos', ...)) or []
|
||||
if video.get('hashedId')
|
||||
for video in traverse_obj(series, ('sections', ..., ('videos', 'episodes'), lambda _, v: v['hashedId']))
|
||||
]
|
||||
|
||||
return self.playlist_result(
|
||||
|
||||
@@ -76,7 +76,7 @@ STREAMING_DATA_FETCH_GVS_PO_TOKEN = '__yt_dlp_fetch_gvs_po_token'
|
||||
STREAMING_DATA_PLAYER_TOKEN_PROVIDED = '__yt_dlp_player_token_provided'
|
||||
STREAMING_DATA_INNERTUBE_CONTEXT = '__yt_dlp_innertube_context'
|
||||
STREAMING_DATA_IS_PREMIUM_SUBSCRIBER = '__yt_dlp_is_premium_subscriber'
|
||||
STREAMING_DATA_FETCHED_TIMESTAMP = '__yt_dlp_fetched_timestamp'
|
||||
STREAMING_DATA_AVAILABLE_AT_TIMESTAMP = '__yt_dlp_available_at_timestamp'
|
||||
|
||||
PO_TOKEN_GUIDE_URL = 'https://github.com/yt-dlp/yt-dlp/wiki/PO-Token-Guide'
|
||||
|
||||
@@ -3032,7 +3032,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
elif pr:
|
||||
# Save client details for introspection later
|
||||
innertube_context = traverse_obj(player_ytcfg or self._get_default_ytcfg(client), 'INNERTUBE_CONTEXT')
|
||||
fetched_timestamp = int(time.time())
|
||||
sd = pr.setdefault('streamingData', {})
|
||||
sd[STREAMING_DATA_CLIENT_NAME] = client
|
||||
sd[STREAMING_DATA_FETCH_GVS_PO_TOKEN] = fetch_gvs_po_token_func
|
||||
@@ -3040,7 +3039,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
sd[STREAMING_DATA_INNERTUBE_CONTEXT] = innertube_context
|
||||
sd[STREAMING_DATA_FETCH_SUBS_PO_TOKEN] = fetch_subs_po_token_func
|
||||
sd[STREAMING_DATA_IS_PREMIUM_SUBSCRIBER] = is_premium_subscriber
|
||||
sd[STREAMING_DATA_FETCHED_TIMESTAMP] = fetched_timestamp
|
||||
sd[STREAMING_DATA_AVAILABLE_AT_TIMESTAMP] = self._get_available_at_timestamp(pr, video_id, client)
|
||||
for f in traverse_obj(sd, (('formats', 'adaptiveFormats'), ..., {dict})):
|
||||
f[STREAMING_DATA_CLIENT_NAME] = client
|
||||
f[STREAMING_DATA_FETCH_GVS_PO_TOKEN] = fetch_gvs_po_token_func
|
||||
@@ -3172,9 +3171,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
# save pots per client to avoid fetching again
|
||||
gvs_pots = {}
|
||||
|
||||
# For handling potential pre-playback required waiting period
|
||||
playback_wait = int_or_none(self._configuration_arg('playback_wait', [None])[0], default=6)
|
||||
|
||||
def get_language_code_and_preference(fmt_stream):
|
||||
audio_track = fmt_stream.get('audioTrack') or {}
|
||||
display_name = audio_track.get('displayName') or ''
|
||||
@@ -3199,7 +3195,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
is_premium_subscriber = streaming_data[STREAMING_DATA_IS_PREMIUM_SUBSCRIBER]
|
||||
player_token_provided = streaming_data[STREAMING_DATA_PLAYER_TOKEN_PROVIDED]
|
||||
client_name = streaming_data.get(STREAMING_DATA_CLIENT_NAME)
|
||||
available_at = streaming_data[STREAMING_DATA_FETCHED_TIMESTAMP] + playback_wait
|
||||
available_at = streaming_data[STREAMING_DATA_AVAILABLE_AT_TIMESTAMP]
|
||||
streaming_formats = traverse_obj(streaming_data, (('formats', 'adaptiveFormats'), ...))
|
||||
|
||||
def get_stream_id(fmt_stream):
|
||||
@@ -3653,6 +3649,36 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
}))
|
||||
return webpage
|
||||
|
||||
def _get_available_at_timestamp(self, player_response, video_id, client):
|
||||
now = time.time()
|
||||
wait_seconds = 0
|
||||
|
||||
for renderer in traverse_obj(player_response, (
|
||||
'adSlots', lambda _, v: v['adSlotRenderer']['adSlotMetadata']['triggerEvent'] == 'SLOT_TRIGGER_EVENT_BEFORE_CONTENT',
|
||||
'adSlotRenderer', 'fulfillmentContent', 'fulfilledLayout', 'playerBytesAdLayoutRenderer', 'renderingContent', (
|
||||
None,
|
||||
('playerBytesSequentialLayoutRenderer', 'sequentialLayouts', ..., 'playerBytesAdLayoutRenderer', 'renderingContent'),
|
||||
), 'instreamVideoAdRenderer', {dict},
|
||||
)):
|
||||
duration = traverse_obj(renderer, ('playerVars', {urllib.parse.parse_qs}, 'length_seconds', -1, {int_or_none}))
|
||||
ad = 'an ad' if duration is None else f'a {duration}s ad'
|
||||
|
||||
skip_time = traverse_obj(renderer, ('skipOffsetMilliseconds', {float_or_none(scale=1000)}))
|
||||
if skip_time is not None:
|
||||
# YT allows skipping this ad; use the wait-until-skip time instead of full ad duration
|
||||
skip_time = skip_time if skip_time % 1 else int(skip_time)
|
||||
ad += f' skippable after {skip_time}s'
|
||||
duration = skip_time
|
||||
|
||||
if duration is not None:
|
||||
self.write_debug(f'{video_id}: Detected {ad} for {client}')
|
||||
wait_seconds += duration
|
||||
|
||||
if wait_seconds:
|
||||
return math.ceil(now) + wait_seconds
|
||||
|
||||
return int(now)
|
||||
|
||||
def _list_formats(self, video_id, microformats, video_details, player_responses, player_url, duration=None):
|
||||
live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
|
||||
is_live = get_first(video_details, 'isLive')
|
||||
|
||||
Reference in New Issue
Block a user