1
0
mirror of https://github.com/yt-dlp/yt-dlp synced 2025-04-03 13:40:16 -05:00

[ie/youtube] Make signature and nsig extraction more robust (#12761)

Authored by: bashonly, seproDev

Co-authored-by: sepro <sepro@sepr0.com>
This commit is contained in:
bashonly 2025-03-27 17:31:01 -05:00 committed by GitHub
parent a8b9ff3c2a
commit 48be862b32
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 136 additions and 29 deletions

View File

@ -88,11 +88,51 @@ _SIG_TESTS = [
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
'0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpz2ICs6EVdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
),
(
'https://www.youtube.com/s/player/363db69b/player_ias_tce.vflset/en_US/base.js',
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
'0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpz2ICs6EVdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
),
(
'https://www.youtube.com/s/player/4fcd6e4a/player_ias.vflset/en_US/base.js',
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
'wAOAOq0QJ8ARAIgXmPlOPSBkkUs1bYFYlJCfe29xx8q7v1pDL0QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0',
),
(
'https://www.youtube.com/s/player/4fcd6e4a/player_ias_tce.vflset/en_US/base.js',
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
'wAOAOq0QJ8ARAIgXmPlOPSBkkUs1bYFYlJCfe29xx8q7v1pDL0QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0',
),
(
'https://www.youtube.com/s/player/20830619/player_ias.vflset/en_US/base.js',
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
'7AOq0QJ8wRAIgXmPlOPSBkkAs1bYFYlJCfe29xx8jOv1pDL0Q2bdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0qaw',
),
(
'https://www.youtube.com/s/player/20830619/player_ias_tce.vflset/en_US/base.js',
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
'7AOq0QJ8wRAIgXmPlOPSBkkAs1bYFYlJCfe29xx8jOv1pDL0Q2bdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0qaw',
),
(
'https://www.youtube.com/s/player/20830619/player-plasma-ias-phone-en_US.vflset/base.js',
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
'7AOq0QJ8wRAIgXmPlOPSBkkAs1bYFYlJCfe29xx8jOv1pDL0Q2bdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0qaw',
),
(
'https://www.youtube.com/s/player/20830619/player-plasma-ias-tablet-en_US.vflset/base.js',
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
'7AOq0QJ8wRAIgXmPlOPSBkkAs1bYFYlJCfe29xx8jOv1pDL0Q2bdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0qaw',
),
(
'https://www.youtube.com/s/player/8a8ac953/player_ias_tce.vflset/en_US/base.js',
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
'IAOAOq0QJ8wRAAgXmPlOPSBkkUs1bYFYlJCfe29xx8j7v1pDL0QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_E2u-m37KtXJoOySqa0',
),
(
'https://www.youtube.com/s/player/8a8ac953/tv-player-es6.vflset/tv-player-es6.js',
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
'IAOAOq0QJ8wRAAgXmPlOPSBkkUs1bYFYlJCfe29xx8j7v1pDL0QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_E2u-m37KtXJoOySqa0',
),
]
_NSIG_TESTS = [
@ -252,6 +292,30 @@ _NSIG_TESTS = [
'https://www.youtube.com/s/player/4fcd6e4a/player_ias.vflset/en_US/base.js',
'o_L251jm8yhZkWtBW', 'lXoxI3XvToqn6A',
),
(
'https://www.youtube.com/s/player/4fcd6e4a/player_ias_tce.vflset/en_US/base.js',
'o_L251jm8yhZkWtBW', 'lXoxI3XvToqn6A',
),
(
'https://www.youtube.com/s/player/20830619/tv-player-ias.vflset/tv-player-ias.js',
'ir9-V6cdbCiyKxhr', '9YE85kNjZiS4',
),
(
'https://www.youtube.com/s/player/20830619/player-plasma-ias-phone-en_US.vflset/base.js',
'ir9-V6cdbCiyKxhr', '9YE85kNjZiS4',
),
(
'https://www.youtube.com/s/player/20830619/player-plasma-ias-tablet-en_US.vflset/base.js',
'ir9-V6cdbCiyKxhr', '9YE85kNjZiS4',
),
(
'https://www.youtube.com/s/player/8a8ac953/player_ias_tce.vflset/en_US/base.js',
'MiBYeXx_vRREbiCCmh', 'RtZYMVvmkE0JE',
),
(
'https://www.youtube.com/s/player/8a8ac953/tv-player-es6.vflset/tv-player-es6.js',
'MiBYeXx_vRREbiCCmh', 'RtZYMVvmkE0JE',
),
]
@ -302,33 +366,33 @@ def t_factory(name, sig_func, url_pattern):
test_id = re.sub(r'[/.-]', '_', m.group('id') or m.group('compat_id'))
def test_func(self):
basename = f'player-{name}-{test_id}.js'
basename = f'player-{test_id}.js'
fn = os.path.join(self.TESTDATA_DIR, basename)
if not os.path.exists(fn):
urllib.request.urlretrieve(url, fn)
with open(fn, encoding='utf-8') as testf:
jscode = testf.read()
self.assertEqual(sig_func(jscode, sig_input), expected_sig)
self.assertEqual(sig_func(jscode, sig_input, url), expected_sig)
test_func.__name__ = f'test_{name}_js_{test_id}'
setattr(TestSignature, test_func.__name__, test_func)
return make_tfunc
def signature(jscode, sig_input):
func = YoutubeIE(FakeYDL())._parse_sig_js(jscode)
def signature(jscode, sig_input, player_url):
func = YoutubeIE(FakeYDL())._parse_sig_js(jscode, player_url)
src_sig = (
str(string.printable[:sig_input])
if isinstance(sig_input, int) else sig_input)
return func(src_sig)
def n_sig(jscode, sig_input):
def n_sig(jscode, sig_input, player_url):
ie = YoutubeIE(FakeYDL())
funcname = ie._extract_n_function_name(jscode)
funcname = ie._extract_n_function_name(jscode, player_url=player_url)
jsi = JSInterpreter(jscode)
func = jsi.extract_function_from_code(*ie._fixup_n_function_code(*jsi.extract_function_code(funcname), jscode))
func = jsi.extract_function_from_code(*ie._fixup_n_function_code(*jsi.extract_function_code(funcname), jscode, player_url))
return func([sig_input])

View File

@ -34,6 +34,7 @@ from ...utils import (
clean_html,
datetime_from_str,
filesize_from_tbr,
filter_dict,
float_or_none,
format_field,
get_first,
@ -1986,12 +1987,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
assert os.path.basename(func_id) == func_id
self.write_debug(f'Extracting signature function {func_id}')
cache_spec, code = self.cache.load('youtube-sigfuncs', func_id), None
cache_spec, code = self.cache.load('youtube-sigfuncs', func_id, min_ver='2025.03.27'), None
if not cache_spec:
code = self._load_player(video_id, player_url)
if code:
res = self._parse_sig_js(code)
res = self._parse_sig_js(code, player_url)
test_string = ''.join(map(chr, range(len(example_sig))))
cache_spec = [ord(c) for c in res(test_string)]
self.cache.store('youtube-sigfuncs', func_id, cache_spec)
@ -2039,7 +2040,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
f' return {expr_code}\n')
self.to_screen('Extracted signature function:\n' + code)
def _parse_sig_js(self, jscode):
def _parse_sig_js(self, jscode, player_url):
# Examples where `sig` is funcname:
# sig=function(a){a=a.split(""); ... ;return a.join("")};
# ;c&&(c=sig(decodeURIComponent(c)),a.set(b,encodeURIComponent(c)));return a};
@ -2063,12 +2064,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
jscode, 'Initial JS player signature function name', group='sig')
varname, global_list = self._interpret_player_js_global_var(jscode, player_url)
jsi = JSInterpreter(jscode)
global_var_map = {}
_, varname, value = self._extract_player_js_global_var(jscode)
if varname:
global_var_map[varname] = jsi.interpret_expression(value, {}, allow_recursion=100)
initial_function = jsi.extract_function(funcname, global_var_map)
initial_function = jsi.extract_function(funcname, filter_dict({varname: global_list}))
return lambda s: initial_function([s])
def _cached(self, func, *cache_id):
@ -2093,7 +2091,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if func_code := self._player_cache.get(cache_id):
return func_code
func_code = self.cache.load('youtube-nsig', player_id, min_ver='2025.03.26')
func_code = self.cache.load('youtube-nsig', player_id, min_ver='2025.03.27')
if func_code:
self._player_cache[cache_id] = func_code
@ -2150,6 +2148,26 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
return ret
def _extract_n_function_name(self, jscode, player_url=None):
varname, global_list = self._interpret_player_js_global_var(jscode, player_url)
if debug_str := traverse_obj(global_list, (lambda _, v: v.endswith('_w8_'), any)):
funcname = self._search_regex(
r'''(?xs)
[;\n](?:
(?P<f>function\s+)|
(?:var\s+)?
)(?P<funcname>[a-zA-Z0-9_$]+)\s*(?(f)|=\s*function\s*)
\((?P<argname>[a-zA-Z0-9_$]+)\)\s*\{
(?:(?!\}[;\n]).)+
\}\s*catch\(\s*[a-zA-Z0-9_$]+\s*\)\s*
\{\s*return\s+%s\[%d\]\s*\+\s*(?P=argname)\s*\}\s*return\s+[^}]+\}[;\n]
''' % (re.escape(varname), global_list.index(debug_str)),
jscode, 'nsig function name', group='funcname', default=None)
if funcname:
return funcname
self.write_debug(join_nonempty(
'Initial search was unable to find nsig function name',
player_url and f' player = {player_url}', delim='\n'), only_once=True)
# Examples (with placeholders nfunc, narray, idx):
# * .get("n"))&&(b=nfunc(b)
# * .get("n"))&&(b=narray[idx](b)
@ -2179,7 +2197,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if not funcname:
self.report_warning(join_nonempty(
'Falling back to generic n function search',
player_url and f' player = {player_url}', delim='\n'))
player_url and f' player = {player_url}', delim='\n'), only_once=True)
return self._search_regex(
r'''(?xs)
;\s*(?P<name>[a-zA-Z0-9_$]+)\s*=\s*function\([a-zA-Z0-9_$]+\)
@ -2192,9 +2210,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
rf'var {re.escape(funcname)}\s*=\s*(\[.+?\])\s*[,;]', jscode,
f'Initial JS player n function list ({funcname}.{idx})')))[int(idx)]
def _extract_player_js_global_var(self, jscode):
def _extract_player_js_global_var(self, jscode, player_url):
"""Returns tuple of strings: variable assignment code, variable name, variable value code"""
return self._search_regex(
extract_global_var = self._cached(self._search_regex, 'js global array', player_url)
varcode, varname, varvalue = extract_global_var(
r'''(?x)
(?P<q1>["\'])use\s+strict(?P=q1);\s*
(?P<code>
@ -2206,17 +2225,41 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
)
)[;,]
''', jscode, 'global variable', group=('code', 'name', 'value'), default=(None, None, None))
if not varcode:
self.write_debug(join_nonempty(
'No global array variable found in player JS',
player_url and f' player = {player_url}', delim='\n'), only_once=True)
return varcode, varname, varvalue
def _fixup_n_function_code(self, argnames, code, full_code):
global_var, varname, _ = self._extract_player_js_global_var(full_code)
if global_var:
self.write_debug(f'Prepending n function code with global array variable "{varname}"')
code = global_var + '; ' + code
def _interpret_player_js_global_var(self, jscode, player_url):
"""Returns tuple of: variable name string, variable value list"""
_, varname, array_code = self._extract_player_js_global_var(jscode, player_url)
jsi = JSInterpreter(array_code)
interpret_global_var = self._cached(jsi.interpret_expression, 'js global list', player_url)
return varname, interpret_global_var(array_code, {}, allow_recursion=10)
def _fixup_n_function_code(self, argnames, nsig_code, jscode, player_url):
varcode, varname, _ = self._extract_player_js_global_var(jscode, player_url)
if varcode and varname:
nsig_code = varcode + '; ' + nsig_code
_, global_list = self._interpret_player_js_global_var(jscode, player_url)
else:
self.write_debug('No global array variable found in player JS')
return argnames, re.sub(
rf';\s*if\s*\(\s*typeof\s+[a-zA-Z0-9_$]+\s*===?\s*(?:(["\'])undefined\1|{varname}\[\d+\])\s*\)\s*return\s+{argnames[0]};',
';', code)
varname = 'dlp_wins'
global_list = []
undefined_idx = global_list.index('undefined') if 'undefined' in global_list else r'\d+'
fixed_code = re.sub(
rf'''(?x)
;\s*if\s*\(\s*typeof\s+[a-zA-Z0-9_$]+\s*===?\s*(?:
(["\'])undefined\1|
{re.escape(varname)}\[{undefined_idx}\]
)\s*\)\s*return\s+{re.escape(argnames[0])};
''', ';', nsig_code)
if fixed_code == nsig_code:
self.write_debug(join_nonempty(
'No typeof statement found in nsig function code',
player_url and f' player = {player_url}', delim='\n'), only_once=True)
return argnames, fixed_code
def _extract_n_function_code(self, video_id, player_url):
player_id = self._extract_player_info(player_url)
@ -2230,7 +2273,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
func_name = self._extract_n_function_name(jscode, player_url=player_url)
# XXX: Workaround for the global array variable and lack of `typeof` implementation
func_code = self._fixup_n_function_code(*jsi.extract_function_code(func_name), jscode)
func_code = self._fixup_n_function_code(*jsi.extract_function_code(func_name), jscode, player_url)
return jsi, player_id, func_code