[ie/youtube] Determine wait time from player response (#14646 )

Closes #14645 Authored by: WhatAmISupposedToPutHere, bashonly Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com>
[ie/WistiaChannel] Fix extractor (#14218 )
2025-12-08 07:11:39 +01:00 · 2025-11-23 00:49:36 +00:00 · 2025-11-21 23:08:20 +00:00 · 2025-11-21 20:07:07 +01:00
5 changed files with 136 additions and 11 deletions
--- a/README.md
+++ b/README.md
@@ -1870,7 +1870,6 @@ The following extractors use this feature:
 * `po_token`:  Proof of Origin (PO) Token(s) to use. Comma seperated list of PO Tokens in the format `CLIENT.CONTEXT+PO_TOKEN`, e.g. `youtube:po_token=web.gvs+XXX,web.player=XXX,web_safari.gvs+YYY`. Context can be any of `gvs` (Google Video Server URLs), `player` (Innertube player request) or `subs` (Subtitles)
 * `pot_trace`: Enable debug logging for PO Token fetching. Either `true` or `false` (default)
 * `fetch_pot`: Policy to use for fetching a PO Token from providers. One of `always` (always try fetch a PO Token regardless if the client requires one for the given context), `never` (never fetch a PO Token), or `auto` (default; only fetch a PO Token if the client requires one for the given context)
-* `playback_wait`: Duration (in seconds) to wait inbetween the extraction and download stages in order to ensure the formats are available. The default is `6` seconds
 * `jsc_trace`: Enable debug logging for JS Challenge fetching. Either `true` or `false` (default)

 #### youtube-ejs
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -75,6 +75,7 @@ from .afreecatv import (
    AfreecaTVLiveIE,
    AfreecaTVUserIE,
 )
+from .agalega import AGalegaIE
 from .agora import (
    TokFMAuditionIE,
    TokFMPodcastIE,
--- a/yt_dlp/extractor/agalega.py
+++ b/yt_dlp/extractor/agalega.py
@@ -0,0 +1,91 @@
+import json
+import time
+
+from .common import InfoExtractor
+from ..utils import jwt_decode_hs256, url_or_none
+from ..utils.traversal import traverse_obj
+
+
+class AGalegaBaseIE(InfoExtractor):
+    _access_token = None
+
+    @staticmethod
+    def _jwt_is_expired(token):
+        return jwt_decode_hs256(token)['exp'] - time.time() < 120
+
+    def _refresh_access_token(self, video_id):
+        AGalegaBaseIE._access_token = self._download_json(
+            'https://www.agalega.gal/api/fetch-api/jwt/token', video_id,
+            note='Downloading access token',
+            data=json.dumps({
+                'username': None,
+                'password': None,
+                'client': 'crtvg',
+                'checkExistsCookies': False,
+            }).encode())['access']
+
+    def _call_api(self, endpoint, display_id, note, fatal=True, query=None):
+        if not AGalegaBaseIE._access_token or self._jwt_is_expired(AGalegaBaseIE._access_token):
+            self._refresh_access_token(endpoint)
+        return self._download_json(
+            f'https://api-agalega.interactvty.com/api/2.0/contents/{endpoint}', display_id,
+            note=note, fatal=fatal, query=query,
+            headers={'Authorization': f'jwtok {AGalegaBaseIE._access_token}'})
+
+
+class AGalegaIE(AGalegaBaseIE):
+    IE_NAME = 'agalega:videos'
+    _VALID_URL = r'https?://(?:www\.)?agalega\.gal/videos/(?:detail/)?(?P<id>[0-9]+)'
+    _TESTS = [{
+        'url': 'https://www.agalega.gal/videos/288664-lr-ninguencheconta',
+        'md5': '04533a66c5f863d08dd9724b11d1c223',
+        'info_dict': {
+            'id': '288664',
+            'title': 'Roberto e Ángel Martín atenden consultas dos espectadores',
+            'description': 'O cómico ademais fai un repaso dalgúns momentos da súa traxectoria profesional',
+            'thumbnail': 'https://crtvg-bucket.flumotion.cloud/content_cards/2ef32c3b9f6249d9868fd8f11d389d3d.png',
+            'ext': 'mp4',
+        },
+    }, {
+        'url': 'https://www.agalega.gal/videos/detail/296152-pulso-activo-7',
+        'md5': '26df7fdcf859f38ad92d837279d6b56d',
+        'info_dict': {
+            'id': '296152',
+            'title': 'Pulso activo | 18-11-2025',
+            'description': 'Anxo, Noemí, Silvia e Estrella  comparten as sensacións da clase de Eddy.',
+            'thumbnail': 'https://crtvg-bucket.flumotion.cloud/content_cards/a6bb7da6c8994b82bf961ac6cad1707b.png',
+            'ext': 'mp4',
+        },
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        content_data = self._call_api(
+            f'content/{video_id}/', video_id, note='Downloading content data', fatal=False,
+            query={
+                'optional_fields': 'image,is_premium,short_description,has_subtitle',
+            })
+        resource_data = self._call_api(
+            f'content_resources/{video_id}/', video_id, note='Downloading resource data',
+            query={
+                'optional_fields': 'media_url',
+            })
+
+        formats = []
+        subtitles = {}
+        for m3u8_url in traverse_obj(resource_data, ('results', ..., 'media_url', {url_or_none})):
+            fmts, subs = self._extract_m3u8_formats_and_subtitles(
+                m3u8_url, video_id, ext='mp4', m3u8_id='hls')
+            formats.extend(fmts)
+            self._merge_subtitles(subs, target=subtitles)
+
+        return {
+            'id': video_id,
+            'formats': formats,
+            'subtitles': subtitles,
+            **traverse_obj(content_data, {
+                'title': ('name', {str}),
+                'description': (('description', 'short_description'), {str}, any),
+                'thumbnail': ('image', {url_or_none}),
+            }),
+        }
--- a/yt_dlp/extractor/wistia.py
+++ b/yt_dlp/extractor/wistia.py
@@ -339,11 +339,20 @@ class WistiaChannelIE(WistiaBaseIE):
            'title': 'The Roof S2: The Modern CRO',
            'thumbnail': r're:https?://embed(?:-ssl)?\.wistia\.com/.+\.(?:jpg|png)',
            'duration': 86.487,
-            'description': 'A sales leader on The Roof? Man, they really must be letting anyone up here this season.\n',
+            'description': 'A sales leader on The Roof? Man, they really must be letting anyone up here this season. ',
            'timestamp': 1619790290,
            'upload_date': '20210430',
        },
        'params': {'noplaylist': True, 'skip_download': True},
+    }, {
+        # Channel with episodes structure instead of videos
+        'url': 'https://fast.wistia.net/embed/channel/sapab9p6qd',
+        'info_dict': {
+            'id': 'sapab9p6qd',
+            'title': 'Credo: An RCIA Program',
+            'description': '\n',
+        },
+        'playlist_mincount': 80,
    }]
    _WEBPAGE_TESTS = [{
        'url': 'https://www.profitwell.com/recur/boxed-out',
@@ -399,8 +408,7 @@ class WistiaChannelIE(WistiaBaseIE):

        entries = [
            self.url_result(f'wistia:{video["hashedId"]}', WistiaIE, title=video.get('name'))
-            for video in traverse_obj(series, ('sections', ..., 'videos', ...)) or []
-            if video.get('hashedId')
+            for video in traverse_obj(series, ('sections', ..., ('videos', 'episodes'), lambda _, v: v['hashedId']))
        ]

        return self.playlist_result(
--- a/yt_dlp/extractor/youtube/_video.py
+++ b/yt_dlp/extractor/youtube/_video.py
@@ -76,7 +76,7 @@ STREAMING_DATA_FETCH_GVS_PO_TOKEN = '__yt_dlp_fetch_gvs_po_token'
 STREAMING_DATA_PLAYER_TOKEN_PROVIDED = '__yt_dlp_player_token_provided'
 STREAMING_DATA_INNERTUBE_CONTEXT = '__yt_dlp_innertube_context'
 STREAMING_DATA_IS_PREMIUM_SUBSCRIBER = '__yt_dlp_is_premium_subscriber'
-STREAMING_DATA_FETCHED_TIMESTAMP = '__yt_dlp_fetched_timestamp'
+STREAMING_DATA_AVAILABLE_AT_TIMESTAMP = '__yt_dlp_available_at_timestamp'

 PO_TOKEN_GUIDE_URL = 'https://github.com/yt-dlp/yt-dlp/wiki/PO-Token-Guide'

@@ -3032,7 +3032,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
            elif pr:
                # Save client details for introspection later
                innertube_context = traverse_obj(player_ytcfg or self._get_default_ytcfg(client), 'INNERTUBE_CONTEXT')
-                fetched_timestamp = int(time.time())
                sd = pr.setdefault('streamingData', {})
                sd[STREAMING_DATA_CLIENT_NAME] = client
                sd[STREAMING_DATA_FETCH_GVS_PO_TOKEN] = fetch_gvs_po_token_func
@@ -3040,7 +3039,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                sd[STREAMING_DATA_INNERTUBE_CONTEXT] = innertube_context
                sd[STREAMING_DATA_FETCH_SUBS_PO_TOKEN] = fetch_subs_po_token_func
                sd[STREAMING_DATA_IS_PREMIUM_SUBSCRIBER] = is_premium_subscriber
-                sd[STREAMING_DATA_FETCHED_TIMESTAMP] = fetched_timestamp
+                sd[STREAMING_DATA_AVAILABLE_AT_TIMESTAMP] = self._get_available_at_timestamp(pr, video_id, client)
                for f in traverse_obj(sd, (('formats', 'adaptiveFormats'), ..., {dict})):
                    f[STREAMING_DATA_CLIENT_NAME] = client
                    f[STREAMING_DATA_FETCH_GVS_PO_TOKEN] = fetch_gvs_po_token_func
@@ -3172,9 +3171,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
        # save pots per client to avoid fetching again
        gvs_pots = {}

-        # For handling potential pre-playback required waiting period
-        playback_wait = int_or_none(self._configuration_arg('playback_wait', [None])[0], default=6)
-
        def get_language_code_and_preference(fmt_stream):
            audio_track = fmt_stream.get('audioTrack') or {}
            display_name = audio_track.get('displayName') or ''
@@ -3199,7 +3195,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
            is_premium_subscriber = streaming_data[STREAMING_DATA_IS_PREMIUM_SUBSCRIBER]
            player_token_provided = streaming_data[STREAMING_DATA_PLAYER_TOKEN_PROVIDED]
            client_name = streaming_data.get(STREAMING_DATA_CLIENT_NAME)
-            available_at = streaming_data[STREAMING_DATA_FETCHED_TIMESTAMP] + playback_wait
+            available_at = streaming_data[STREAMING_DATA_AVAILABLE_AT_TIMESTAMP]
            streaming_formats = traverse_obj(streaming_data, (('formats', 'adaptiveFormats'), ...))

            def get_stream_id(fmt_stream):
@@ -3653,6 +3649,36 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                }))
        return webpage

+    def _get_available_at_timestamp(self, player_response, video_id, client):
+        now = time.time()
+        wait_seconds = 0
+
+        for renderer in traverse_obj(player_response, (
+            'adSlots', lambda _, v: v['adSlotRenderer']['adSlotMetadata']['triggerEvent'] == 'SLOT_TRIGGER_EVENT_BEFORE_CONTENT',
+            'adSlotRenderer', 'fulfillmentContent', 'fulfilledLayout', 'playerBytesAdLayoutRenderer', 'renderingContent', (
+                None,
+                ('playerBytesSequentialLayoutRenderer', 'sequentialLayouts', ..., 'playerBytesAdLayoutRenderer', 'renderingContent'),
+            ), 'instreamVideoAdRenderer', {dict},
+        )):
+            duration = traverse_obj(renderer, ('playerVars', {urllib.parse.parse_qs}, 'length_seconds', -1, {int_or_none}))
+            ad = 'an ad' if duration is None else f'a {duration}s ad'
+
+            skip_time = traverse_obj(renderer, ('skipOffsetMilliseconds', {float_or_none(scale=1000)}))
+            if skip_time is not None:
+                # YT allows skipping this ad; use the wait-until-skip time instead of full ad duration
+                skip_time = skip_time if skip_time % 1 else int(skip_time)
+                ad += f' skippable after {skip_time}s'
+                duration = skip_time
+
+            if duration is not None:
+                self.write_debug(f'{video_id}: Detected {ad} for {client}')
+                wait_seconds += duration
+
+        if wait_seconds:
+            return math.ceil(now) + wait_seconds
+
+        return int(now)
+
    def _list_formats(self, video_id, microformats, video_details, player_responses, player_url, duration=None):
        live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
        is_live = get_first(video_details, 'isLive')
Author	SHA1	Message	Date
WhatAmISupposedToPutHere	715af0c636	[ie/youtube] Determine wait time from player response (#14646 ) Closes #14645 Authored by: WhatAmISupposedToPutHere, bashonly Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com>	2025-11-23 00:49:36 +00:00
Sojiroh	0c696239ef	[ie/WistiaChannel] Fix extractor (#14218 ) Closes #14204 Authored by: Sojiroh	2025-11-21 23:08:20 +00:00
putridambassador121	3cb5e4db54	[ie/AGalega] Add extractor (#15105 ) Closes #14758 Authored by: putridambassador121	2025-11-21 20:07:07 +01:00