mirror of
https://github.com/ytdl-org/youtube-dl.git
synced 2025-12-08 07:11:35 +01:00
[utils] Add subs_list_to_dict() traversal helper
Thx: yt-dlp/yt-dlp#10653, etc
This commit is contained in:
@@ -16,6 +16,7 @@ from youtube_dl.traversal import (
|
||||
dict_get,
|
||||
get_first,
|
||||
require,
|
||||
subs_list_to_dict,
|
||||
T,
|
||||
traverse_obj,
|
||||
unpack,
|
||||
@@ -30,6 +31,7 @@ from youtube_dl.compat import (
|
||||
compat_zip as zip,
|
||||
)
|
||||
from youtube_dl.utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
@@ -495,6 +497,105 @@ class TestTraversalHelpers(_TestCase):
|
||||
traverse_obj(_TEST_DATA, ('str', T(require('value')))), 'str',
|
||||
'`require` should pass through non-`None` values')
|
||||
|
||||
def test_subs_list_to_dict(self):
|
||||
self.assertEqual(traverse_obj([
|
||||
{'name': 'de', 'url': 'https://example.com/subs/de.vtt'},
|
||||
{'name': 'en', 'url': 'https://example.com/subs/en1.ass'},
|
||||
{'name': 'en', 'url': 'https://example.com/subs/en2.ass'},
|
||||
], [Ellipsis, {
|
||||
'id': 'name',
|
||||
'url': 'url',
|
||||
}, all, T(subs_list_to_dict)]), {
|
||||
'de': [{'url': 'https://example.com/subs/de.vtt'}],
|
||||
'en': [
|
||||
{'url': 'https://example.com/subs/en1.ass'},
|
||||
{'url': 'https://example.com/subs/en2.ass'},
|
||||
],
|
||||
}, 'function should build subtitle dict from list of subtitles')
|
||||
self.assertEqual(traverse_obj([
|
||||
{'name': 'de', 'url': 'https://example.com/subs/de.ass'},
|
||||
{'name': 'de'},
|
||||
{'name': 'en', 'content': 'content'},
|
||||
{'url': 'https://example.com/subs/en'},
|
||||
], [Ellipsis, {
|
||||
'id': 'name',
|
||||
'data': 'content',
|
||||
'url': 'url',
|
||||
}, all, T(subs_list_to_dict(lang=None))]), {
|
||||
'de': [{'url': 'https://example.com/subs/de.ass'}],
|
||||
'en': [{'data': 'content'}],
|
||||
}, 'subs with mandatory items missing should be filtered')
|
||||
self.assertEqual(traverse_obj([
|
||||
{'url': 'https://example.com/subs/de.ass', 'name': 'de'},
|
||||
{'url': 'https://example.com/subs/en', 'name': 'en'},
|
||||
], [Ellipsis, {
|
||||
'id': 'name',
|
||||
'ext': ['url', T(determine_ext(default_ext=None))],
|
||||
'url': 'url',
|
||||
}, all, T(subs_list_to_dict(ext='ext'))]), {
|
||||
'de': [{'url': 'https://example.com/subs/de.ass', 'ext': 'ass'}],
|
||||
'en': [{'url': 'https://example.com/subs/en', 'ext': 'ext'}],
|
||||
}, '`ext` should set default ext but leave existing value untouched')
|
||||
self.assertEqual(traverse_obj([
|
||||
{'name': 'en', 'url': 'https://example.com/subs/en2', 'prio': True},
|
||||
{'name': 'en', 'url': 'https://example.com/subs/en1', 'prio': False},
|
||||
], [Ellipsis, {
|
||||
'id': 'name',
|
||||
'quality': ['prio', T(int)],
|
||||
'url': 'url',
|
||||
}, all, T(subs_list_to_dict(ext='ext'))]), {'en': [
|
||||
{'url': 'https://example.com/subs/en1', 'ext': 'ext'},
|
||||
{'url': 'https://example.com/subs/en2', 'ext': 'ext'},
|
||||
]}, '`quality` key should sort subtitle list accordingly')
|
||||
self.assertEqual(traverse_obj([
|
||||
{'name': 'de', 'url': 'https://example.com/subs/de.ass'},
|
||||
{'name': 'de'},
|
||||
{'name': 'en', 'content': 'content'},
|
||||
{'url': 'https://example.com/subs/en'},
|
||||
], [Ellipsis, {
|
||||
'id': 'name',
|
||||
'url': 'url',
|
||||
'data': 'content',
|
||||
}, all, T(subs_list_to_dict(lang='en'))]), {
|
||||
'de': [{'url': 'https://example.com/subs/de.ass'}],
|
||||
'en': [
|
||||
{'data': 'content'},
|
||||
{'url': 'https://example.com/subs/en'},
|
||||
],
|
||||
}, 'optionally provided lang should be used if no id available')
|
||||
self.assertEqual(traverse_obj([
|
||||
{'name': 1, 'url': 'https://example.com/subs/de1'},
|
||||
{'name': {}, 'url': 'https://example.com/subs/de2'},
|
||||
{'name': 'de', 'ext': 1, 'url': 'https://example.com/subs/de3'},
|
||||
{'name': 'de', 'ext': {}, 'url': 'https://example.com/subs/de4'},
|
||||
], [Ellipsis, {
|
||||
'id': 'name',
|
||||
'url': 'url',
|
||||
'ext': 'ext',
|
||||
}, all, T(subs_list_to_dict(lang=None))]), {
|
||||
'de': [
|
||||
{'url': 'https://example.com/subs/de3'},
|
||||
{'url': 'https://example.com/subs/de4'},
|
||||
],
|
||||
}, 'non str types should be ignored for id and ext')
|
||||
self.assertEqual(traverse_obj([
|
||||
{'name': 1, 'url': 'https://example.com/subs/de1'},
|
||||
{'name': {}, 'url': 'https://example.com/subs/de2'},
|
||||
{'name': 'de', 'ext': 1, 'url': 'https://example.com/subs/de3'},
|
||||
{'name': 'de', 'ext': {}, 'url': 'https://example.com/subs/de4'},
|
||||
], [Ellipsis, {
|
||||
'id': 'name',
|
||||
'url': 'url',
|
||||
'ext': 'ext',
|
||||
}, all, T(subs_list_to_dict(lang='de'))]), {
|
||||
'de': [
|
||||
{'url': 'https://example.com/subs/de1'},
|
||||
{'url': 'https://example.com/subs/de2'},
|
||||
{'url': 'https://example.com/subs/de3'},
|
||||
{'url': 'https://example.com/subs/de4'},
|
||||
],
|
||||
}, 'non str types should be replaced by default id')
|
||||
|
||||
def test_unpack(self):
|
||||
self.assertEqual(
|
||||
unpack(lambda *x: ''.join(map(compat_str, x)))([1, 2, 3]), '123')
|
||||
|
||||
@@ -6,6 +6,7 @@ from .utils import (
|
||||
dict_get,
|
||||
get_first,
|
||||
require,
|
||||
subs_list_to_dict,
|
||||
T,
|
||||
traverse_obj,
|
||||
unpack,
|
||||
|
||||
@@ -6599,6 +6599,51 @@ class require(ExtractorError):
|
||||
return value
|
||||
|
||||
|
||||
@partial_application
|
||||
# typing: (subs: list[dict], /, *, lang='und', ext=None) -> dict[str, list[dict]
|
||||
def subs_list_to_dict(subs, lang='und', ext=None):
|
||||
"""
|
||||
Convert subtitles from a traversal into a subtitle dict.
|
||||
The path should have an `all` immediately before this function.
|
||||
|
||||
Arguments:
|
||||
`lang` The default language tag for subtitle dicts with no
|
||||
`lang` (`und`: undefined)
|
||||
`ext` The default value for `ext` in the subtitle dicts
|
||||
|
||||
In the dict you can set the following additional items:
|
||||
`id` The language tag to which the subtitle dict should be added
|
||||
`quality` The sort order for each subtitle dict
|
||||
"""
|
||||
|
||||
result = collections.defaultdict(list)
|
||||
|
||||
for sub in subs:
|
||||
tn_url = url_or_none(sub.pop('url', None))
|
||||
if tn_url:
|
||||
sub['url'] = tn_url
|
||||
elif not sub.get('data'):
|
||||
continue
|
||||
sub_lang = sub.pop('id', None)
|
||||
if not isinstance(sub_lang, compat_str):
|
||||
if not lang:
|
||||
continue
|
||||
sub_lang = lang
|
||||
sub_ext = sub.get('ext')
|
||||
if not isinstance(sub_ext, compat_str):
|
||||
if not ext:
|
||||
sub.pop('ext', None)
|
||||
else:
|
||||
sub['ext'] = ext
|
||||
result[sub_lang].append(sub)
|
||||
result = dict(result)
|
||||
|
||||
for subs in result.values():
|
||||
subs.sort(key=lambda x: x.pop('quality', 0) or 0)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def unpack(func, **kwargs):
|
||||
"""Make a function that applies `partial(func, **kwargs)` to its argument as *args"""
|
||||
@functools.wraps(func)
|
||||
|
||||
Reference in New Issue
Block a user