mirror of
https://github.com/ytdl-org/youtube-dl.git
synced 2025-12-08 07:11:35 +01:00
[utils] Add subs_list_to_dict() traversal helper
Thx: yt-dlp/yt-dlp#10653, etc
This commit is contained in:
@@ -16,6 +16,7 @@ from youtube_dl.traversal import (
|
|||||||
dict_get,
|
dict_get,
|
||||||
get_first,
|
get_first,
|
||||||
require,
|
require,
|
||||||
|
subs_list_to_dict,
|
||||||
T,
|
T,
|
||||||
traverse_obj,
|
traverse_obj,
|
||||||
unpack,
|
unpack,
|
||||||
@@ -30,6 +31,7 @@ from youtube_dl.compat import (
|
|||||||
compat_zip as zip,
|
compat_zip as zip,
|
||||||
)
|
)
|
||||||
from youtube_dl.utils import (
|
from youtube_dl.utils import (
|
||||||
|
determine_ext,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
join_nonempty,
|
join_nonempty,
|
||||||
@@ -495,6 +497,105 @@ class TestTraversalHelpers(_TestCase):
|
|||||||
traverse_obj(_TEST_DATA, ('str', T(require('value')))), 'str',
|
traverse_obj(_TEST_DATA, ('str', T(require('value')))), 'str',
|
||||||
'`require` should pass through non-`None` values')
|
'`require` should pass through non-`None` values')
|
||||||
|
|
||||||
|
def test_subs_list_to_dict(self):
|
||||||
|
self.assertEqual(traverse_obj([
|
||||||
|
{'name': 'de', 'url': 'https://example.com/subs/de.vtt'},
|
||||||
|
{'name': 'en', 'url': 'https://example.com/subs/en1.ass'},
|
||||||
|
{'name': 'en', 'url': 'https://example.com/subs/en2.ass'},
|
||||||
|
], [Ellipsis, {
|
||||||
|
'id': 'name',
|
||||||
|
'url': 'url',
|
||||||
|
}, all, T(subs_list_to_dict)]), {
|
||||||
|
'de': [{'url': 'https://example.com/subs/de.vtt'}],
|
||||||
|
'en': [
|
||||||
|
{'url': 'https://example.com/subs/en1.ass'},
|
||||||
|
{'url': 'https://example.com/subs/en2.ass'},
|
||||||
|
],
|
||||||
|
}, 'function should build subtitle dict from list of subtitles')
|
||||||
|
self.assertEqual(traverse_obj([
|
||||||
|
{'name': 'de', 'url': 'https://example.com/subs/de.ass'},
|
||||||
|
{'name': 'de'},
|
||||||
|
{'name': 'en', 'content': 'content'},
|
||||||
|
{'url': 'https://example.com/subs/en'},
|
||||||
|
], [Ellipsis, {
|
||||||
|
'id': 'name',
|
||||||
|
'data': 'content',
|
||||||
|
'url': 'url',
|
||||||
|
}, all, T(subs_list_to_dict(lang=None))]), {
|
||||||
|
'de': [{'url': 'https://example.com/subs/de.ass'}],
|
||||||
|
'en': [{'data': 'content'}],
|
||||||
|
}, 'subs with mandatory items missing should be filtered')
|
||||||
|
self.assertEqual(traverse_obj([
|
||||||
|
{'url': 'https://example.com/subs/de.ass', 'name': 'de'},
|
||||||
|
{'url': 'https://example.com/subs/en', 'name': 'en'},
|
||||||
|
], [Ellipsis, {
|
||||||
|
'id': 'name',
|
||||||
|
'ext': ['url', T(determine_ext(default_ext=None))],
|
||||||
|
'url': 'url',
|
||||||
|
}, all, T(subs_list_to_dict(ext='ext'))]), {
|
||||||
|
'de': [{'url': 'https://example.com/subs/de.ass', 'ext': 'ass'}],
|
||||||
|
'en': [{'url': 'https://example.com/subs/en', 'ext': 'ext'}],
|
||||||
|
}, '`ext` should set default ext but leave existing value untouched')
|
||||||
|
self.assertEqual(traverse_obj([
|
||||||
|
{'name': 'en', 'url': 'https://example.com/subs/en2', 'prio': True},
|
||||||
|
{'name': 'en', 'url': 'https://example.com/subs/en1', 'prio': False},
|
||||||
|
], [Ellipsis, {
|
||||||
|
'id': 'name',
|
||||||
|
'quality': ['prio', T(int)],
|
||||||
|
'url': 'url',
|
||||||
|
}, all, T(subs_list_to_dict(ext='ext'))]), {'en': [
|
||||||
|
{'url': 'https://example.com/subs/en1', 'ext': 'ext'},
|
||||||
|
{'url': 'https://example.com/subs/en2', 'ext': 'ext'},
|
||||||
|
]}, '`quality` key should sort subtitle list accordingly')
|
||||||
|
self.assertEqual(traverse_obj([
|
||||||
|
{'name': 'de', 'url': 'https://example.com/subs/de.ass'},
|
||||||
|
{'name': 'de'},
|
||||||
|
{'name': 'en', 'content': 'content'},
|
||||||
|
{'url': 'https://example.com/subs/en'},
|
||||||
|
], [Ellipsis, {
|
||||||
|
'id': 'name',
|
||||||
|
'url': 'url',
|
||||||
|
'data': 'content',
|
||||||
|
}, all, T(subs_list_to_dict(lang='en'))]), {
|
||||||
|
'de': [{'url': 'https://example.com/subs/de.ass'}],
|
||||||
|
'en': [
|
||||||
|
{'data': 'content'},
|
||||||
|
{'url': 'https://example.com/subs/en'},
|
||||||
|
],
|
||||||
|
}, 'optionally provided lang should be used if no id available')
|
||||||
|
self.assertEqual(traverse_obj([
|
||||||
|
{'name': 1, 'url': 'https://example.com/subs/de1'},
|
||||||
|
{'name': {}, 'url': 'https://example.com/subs/de2'},
|
||||||
|
{'name': 'de', 'ext': 1, 'url': 'https://example.com/subs/de3'},
|
||||||
|
{'name': 'de', 'ext': {}, 'url': 'https://example.com/subs/de4'},
|
||||||
|
], [Ellipsis, {
|
||||||
|
'id': 'name',
|
||||||
|
'url': 'url',
|
||||||
|
'ext': 'ext',
|
||||||
|
}, all, T(subs_list_to_dict(lang=None))]), {
|
||||||
|
'de': [
|
||||||
|
{'url': 'https://example.com/subs/de3'},
|
||||||
|
{'url': 'https://example.com/subs/de4'},
|
||||||
|
],
|
||||||
|
}, 'non str types should be ignored for id and ext')
|
||||||
|
self.assertEqual(traverse_obj([
|
||||||
|
{'name': 1, 'url': 'https://example.com/subs/de1'},
|
||||||
|
{'name': {}, 'url': 'https://example.com/subs/de2'},
|
||||||
|
{'name': 'de', 'ext': 1, 'url': 'https://example.com/subs/de3'},
|
||||||
|
{'name': 'de', 'ext': {}, 'url': 'https://example.com/subs/de4'},
|
||||||
|
], [Ellipsis, {
|
||||||
|
'id': 'name',
|
||||||
|
'url': 'url',
|
||||||
|
'ext': 'ext',
|
||||||
|
}, all, T(subs_list_to_dict(lang='de'))]), {
|
||||||
|
'de': [
|
||||||
|
{'url': 'https://example.com/subs/de1'},
|
||||||
|
{'url': 'https://example.com/subs/de2'},
|
||||||
|
{'url': 'https://example.com/subs/de3'},
|
||||||
|
{'url': 'https://example.com/subs/de4'},
|
||||||
|
],
|
||||||
|
}, 'non str types should be replaced by default id')
|
||||||
|
|
||||||
def test_unpack(self):
|
def test_unpack(self):
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
unpack(lambda *x: ''.join(map(compat_str, x)))([1, 2, 3]), '123')
|
unpack(lambda *x: ''.join(map(compat_str, x)))([1, 2, 3]), '123')
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ from .utils import (
|
|||||||
dict_get,
|
dict_get,
|
||||||
get_first,
|
get_first,
|
||||||
require,
|
require,
|
||||||
|
subs_list_to_dict,
|
||||||
T,
|
T,
|
||||||
traverse_obj,
|
traverse_obj,
|
||||||
unpack,
|
unpack,
|
||||||
|
|||||||
@@ -6599,6 +6599,51 @@ class require(ExtractorError):
|
|||||||
return value
|
return value
|
||||||
|
|
||||||
|
|
||||||
|
@partial_application
|
||||||
|
# typing: (subs: list[dict], /, *, lang='und', ext=None) -> dict[str, list[dict]
|
||||||
|
def subs_list_to_dict(subs, lang='und', ext=None):
|
||||||
|
"""
|
||||||
|
Convert subtitles from a traversal into a subtitle dict.
|
||||||
|
The path should have an `all` immediately before this function.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
`lang` The default language tag for subtitle dicts with no
|
||||||
|
`lang` (`und`: undefined)
|
||||||
|
`ext` The default value for `ext` in the subtitle dicts
|
||||||
|
|
||||||
|
In the dict you can set the following additional items:
|
||||||
|
`id` The language tag to which the subtitle dict should be added
|
||||||
|
`quality` The sort order for each subtitle dict
|
||||||
|
"""
|
||||||
|
|
||||||
|
result = collections.defaultdict(list)
|
||||||
|
|
||||||
|
for sub in subs:
|
||||||
|
tn_url = url_or_none(sub.pop('url', None))
|
||||||
|
if tn_url:
|
||||||
|
sub['url'] = tn_url
|
||||||
|
elif not sub.get('data'):
|
||||||
|
continue
|
||||||
|
sub_lang = sub.pop('id', None)
|
||||||
|
if not isinstance(sub_lang, compat_str):
|
||||||
|
if not lang:
|
||||||
|
continue
|
||||||
|
sub_lang = lang
|
||||||
|
sub_ext = sub.get('ext')
|
||||||
|
if not isinstance(sub_ext, compat_str):
|
||||||
|
if not ext:
|
||||||
|
sub.pop('ext', None)
|
||||||
|
else:
|
||||||
|
sub['ext'] = ext
|
||||||
|
result[sub_lang].append(sub)
|
||||||
|
result = dict(result)
|
||||||
|
|
||||||
|
for subs in result.values():
|
||||||
|
subs.sort(key=lambda x: x.pop('quality', 0) or 0)
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
def unpack(func, **kwargs):
|
def unpack(func, **kwargs):
|
||||||
"""Make a function that applies `partial(func, **kwargs)` to its argument as *args"""
|
"""Make a function that applies `partial(func, **kwargs)` to its argument as *args"""
|
||||||
@functools.wraps(func)
|
@functools.wraps(func)
|
||||||
|
|||||||
Reference in New Issue
Block a user