[YouTube] Improve n-sig function extraction for player aa3fc80b

Resolves #33123
This commit is contained in:
dirkf 2025-05-02 13:49:05 +01:00
parent 4a31290ae1
commit 680069a149
2 changed files with 26 additions and 12 deletions

View File

@ -346,6 +346,10 @@ _NSIG_TESTS = [
'https://www.youtube.com/s/player/8a8ac953/tv-player-es6.vflset/tv-player-es6.js', 'https://www.youtube.com/s/player/8a8ac953/tv-player-es6.vflset/tv-player-es6.js',
'MiBYeXx_vRREbiCCmh', 'RtZYMVvmkE0JE', 'MiBYeXx_vRREbiCCmh', 'RtZYMVvmkE0JE',
), ),
(
'https://www.youtube.com/s/player/aa3fc80b/player_ias.vflset/en_US/base.js',
'0qY9dal2uzOnOGwa-48hha', 'VSh1KDfQMk-eag',
),
] ]

View File

@ -1878,6 +1878,20 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def _extract_n_function_name(self, jscode): def _extract_n_function_name(self, jscode):
func_name, idx = None, None func_name, idx = None, None
def generic_n_function_search(func_name=None):
return self._search_regex(
r'''(?xs)
(?:(?<=[^\w$])|^) # instead of \b, which ignores $
(?P<name>%s)\s*=\s*function\((?!\d)[a-zA-Z\d_$]+\)
\s*\{(?:(?!};).)+?(?:
["']enhanced_except_ |
return\s*(?P<q>"|')[a-zA-Z\d-]+_w8_(?P=q)\s*\+\s*[\w$]+
)
''' % (func_name or r'(?!\d)[a-zA-Z\d_$]+',), jscode,
'Initial JS player n function name', group='name',
default=None if func_name else NO_DEFAULT)
# these special cases are redundant and probably obsolete (2025-04): # these special cases are redundant and probably obsolete (2025-04):
# they make the tests run ~10% faster without fallback warnings # they make the tests run ~10% faster without fallback warnings
r""" r"""
@ -1918,26 +1932,22 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
(?(idx)|\[\s*)(?P<nfunc>(?!\d)[\w$]+)(?(idx)|\s*\]) (?(idx)|\[\s*)(?P<nfunc>(?!\d)[\w$]+)(?(idx)|\s*\])
\s*?[;\n] \s*?[;\n]
''', jscode): ''', jscode):
func_name = self._search_regex( fn = self._search_regex(
r'[;,]\s*(function\s+)?({0})(?(1)|\s*=\s*function)\s*\((?!\d)[\w$]+\)\s*\{1}(?!\s*return\s)'.format( r'[;,]\s*(function\s+)?({0})(?(1)|\s*=\s*function)\s*\((?!\d)[\w$]+\)\s*\{1}(?!\s*return\s)'.format(
re.escape(m.group('nfunc')), '{'), re.escape(m.group('nfunc')), '{'),
jscode, 'Initial JS player n function name (2)', group=2, default=None) jscode, 'Initial JS player n function name (2)', group=2, default=None)
if func_name: if fn:
func_name = fn
idx = m.group('idx') idx = m.group('idx')
break if generic_n_function_search(func_name):
# don't look any further
break
# thx bashonly: yt-dlp/yt-dlp/pull/10611 # thx bashonly: yt-dlp/yt-dlp/pull/10611
if not func_name: if not func_name:
self.report_warning('Falling back to generic n function search', only_once=True) self.report_warning('Falling back to generic n function search', only_once=True)
return self._search_regex( return generic_n_function_search()
r'''(?xs)
(?:(?<=[^\w$])|^) # instead of \b, which ignores $
(?P<name>(?!\d)[a-zA-Z\d_$]+)\s*=\s*function\((?!\d)[a-zA-Z\d_$]+\)
\s*\{(?:(?!};).)+?(?:
["']enhanced_except_ |
return\s*(?P<q>"|')[a-zA-Z\d-]+_w8_(?P=q)\s*\+\s*[\w$]+
)
''', jscode, 'Initial JS player n function name', group='name')
if not idx: if not idx:
return func_name return func_name