diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index a2d3a41ae..98221b9c2 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -346,6 +346,10 @@ _NSIG_TESTS = [ 'https://www.youtube.com/s/player/8a8ac953/tv-player-es6.vflset/tv-player-es6.js', 'MiBYeXx_vRREbiCCmh', 'RtZYMVvmkE0JE', ), + ( + 'https://www.youtube.com/s/player/aa3fc80b/player_ias.vflset/en_US/base.js', + '0qY9dal2uzOnOGwa-48hha', 'VSh1KDfQMk-eag', + ), ] diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 0d6ffa3f2..7290ae813 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1878,6 +1878,20 @@ class YoutubeIE(YoutubeBaseInfoExtractor): def _extract_n_function_name(self, jscode): func_name, idx = None, None + + def generic_n_function_search(func_name=None): + return self._search_regex( + r'''(?xs) + (?:(?<=[^\w$])|^) # instead of \b, which ignores $ + (?P%s)\s*=\s*function\((?!\d)[a-zA-Z\d_$]+\) + \s*\{(?:(?!};).)+?(?: + ["']enhanced_except_ | + return\s*(?P"|')[a-zA-Z\d-]+_w8_(?P=q)\s*\+\s*[\w$]+ + ) + ''' % (func_name or r'(?!\d)[a-zA-Z\d_$]+',), jscode, + 'Initial JS player n function name', group='name', + default=None if func_name else NO_DEFAULT) + # these special cases are redundant and probably obsolete (2025-04): # they make the tests run ~10% faster without fallback warnings r""" @@ -1918,26 +1932,22 @@ class YoutubeIE(YoutubeBaseInfoExtractor): (?(idx)|\[\s*)(?P(?!\d)[\w$]+)(?(idx)|\s*\]) \s*?[;\n] ''', jscode): - func_name = self._search_regex( + fn = self._search_regex( r'[;,]\s*(function\s+)?({0})(?(1)|\s*=\s*function)\s*\((?!\d)[\w$]+\)\s*\{1}(?!\s*return\s)'.format( re.escape(m.group('nfunc')), '{'), jscode, 'Initial JS player n function name (2)', group=2, default=None) - if func_name: + if fn: + func_name = fn idx = m.group('idx') - break + if generic_n_function_search(func_name): + # don't look any further + break # thx bashonly: yt-dlp/yt-dlp/pull/10611 if not func_name: self.report_warning('Falling back to generic n function search', only_once=True) - return self._search_regex( - r'''(?xs) - (?:(?<=[^\w$])|^) # instead of \b, which ignores $ - (?P(?!\d)[a-zA-Z\d_$]+)\s*=\s*function\((?!\d)[a-zA-Z\d_$]+\) - \s*\{(?:(?!};).)+?(?: - ["']enhanced_except_ | - return\s*(?P"|')[a-zA-Z\d-]+_w8_(?P=q)\s*\+\s*[\w$]+ - ) - ''', jscode, 'Initial JS player n function name', group='name') + return generic_n_function_search() + if not idx: return func_name