mirror of
https://github.com/yt-dlp/yt-dlp
synced 2025-04-03 13:40:16 -05:00
[ie/youtube] Make signature and nsig extraction more robust (#12761)
Authored by: bashonly, seproDev Co-authored-by: sepro <sepro@sepr0.com>
This commit is contained in:
parent
a8b9ff3c2a
commit
48be862b32
@ -88,11 +88,51 @@ _SIG_TESTS = [
|
||||
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
|
||||
'0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpz2ICs6EVdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/363db69b/player_ias_tce.vflset/en_US/base.js',
|
||||
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
|
||||
'0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpz2ICs6EVdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/4fcd6e4a/player_ias.vflset/en_US/base.js',
|
||||
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
|
||||
'wAOAOq0QJ8ARAIgXmPlOPSBkkUs1bYFYlJCfe29xx8q7v1pDL0QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/4fcd6e4a/player_ias_tce.vflset/en_US/base.js',
|
||||
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
|
||||
'wAOAOq0QJ8ARAIgXmPlOPSBkkUs1bYFYlJCfe29xx8q7v1pDL0QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/20830619/player_ias.vflset/en_US/base.js',
|
||||
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
|
||||
'7AOq0QJ8wRAIgXmPlOPSBkkAs1bYFYlJCfe29xx8jOv1pDL0Q2bdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0qaw',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/20830619/player_ias_tce.vflset/en_US/base.js',
|
||||
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
|
||||
'7AOq0QJ8wRAIgXmPlOPSBkkAs1bYFYlJCfe29xx8jOv1pDL0Q2bdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0qaw',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/20830619/player-plasma-ias-phone-en_US.vflset/base.js',
|
||||
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
|
||||
'7AOq0QJ8wRAIgXmPlOPSBkkAs1bYFYlJCfe29xx8jOv1pDL0Q2bdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0qaw',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/20830619/player-plasma-ias-tablet-en_US.vflset/base.js',
|
||||
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
|
||||
'7AOq0QJ8wRAIgXmPlOPSBkkAs1bYFYlJCfe29xx8jOv1pDL0Q2bdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0qaw',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/8a8ac953/player_ias_tce.vflset/en_US/base.js',
|
||||
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
|
||||
'IAOAOq0QJ8wRAAgXmPlOPSBkkUs1bYFYlJCfe29xx8j7v1pDL0QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_E2u-m37KtXJoOySqa0',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/8a8ac953/tv-player-es6.vflset/tv-player-es6.js',
|
||||
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
|
||||
'IAOAOq0QJ8wRAAgXmPlOPSBkkUs1bYFYlJCfe29xx8j7v1pDL0QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_E2u-m37KtXJoOySqa0',
|
||||
),
|
||||
]
|
||||
|
||||
_NSIG_TESTS = [
|
||||
@ -252,6 +292,30 @@ _NSIG_TESTS = [
|
||||
'https://www.youtube.com/s/player/4fcd6e4a/player_ias.vflset/en_US/base.js',
|
||||
'o_L251jm8yhZkWtBW', 'lXoxI3XvToqn6A',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/4fcd6e4a/player_ias_tce.vflset/en_US/base.js',
|
||||
'o_L251jm8yhZkWtBW', 'lXoxI3XvToqn6A',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/20830619/tv-player-ias.vflset/tv-player-ias.js',
|
||||
'ir9-V6cdbCiyKxhr', '9YE85kNjZiS4',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/20830619/player-plasma-ias-phone-en_US.vflset/base.js',
|
||||
'ir9-V6cdbCiyKxhr', '9YE85kNjZiS4',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/20830619/player-plasma-ias-tablet-en_US.vflset/base.js',
|
||||
'ir9-V6cdbCiyKxhr', '9YE85kNjZiS4',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/8a8ac953/player_ias_tce.vflset/en_US/base.js',
|
||||
'MiBYeXx_vRREbiCCmh', 'RtZYMVvmkE0JE',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/8a8ac953/tv-player-es6.vflset/tv-player-es6.js',
|
||||
'MiBYeXx_vRREbiCCmh', 'RtZYMVvmkE0JE',
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
@ -302,33 +366,33 @@ def t_factory(name, sig_func, url_pattern):
|
||||
test_id = re.sub(r'[/.-]', '_', m.group('id') or m.group('compat_id'))
|
||||
|
||||
def test_func(self):
|
||||
basename = f'player-{name}-{test_id}.js'
|
||||
basename = f'player-{test_id}.js'
|
||||
fn = os.path.join(self.TESTDATA_DIR, basename)
|
||||
|
||||
if not os.path.exists(fn):
|
||||
urllib.request.urlretrieve(url, fn)
|
||||
with open(fn, encoding='utf-8') as testf:
|
||||
jscode = testf.read()
|
||||
self.assertEqual(sig_func(jscode, sig_input), expected_sig)
|
||||
self.assertEqual(sig_func(jscode, sig_input, url), expected_sig)
|
||||
|
||||
test_func.__name__ = f'test_{name}_js_{test_id}'
|
||||
setattr(TestSignature, test_func.__name__, test_func)
|
||||
return make_tfunc
|
||||
|
||||
|
||||
def signature(jscode, sig_input):
|
||||
func = YoutubeIE(FakeYDL())._parse_sig_js(jscode)
|
||||
def signature(jscode, sig_input, player_url):
|
||||
func = YoutubeIE(FakeYDL())._parse_sig_js(jscode, player_url)
|
||||
src_sig = (
|
||||
str(string.printable[:sig_input])
|
||||
if isinstance(sig_input, int) else sig_input)
|
||||
return func(src_sig)
|
||||
|
||||
|
||||
def n_sig(jscode, sig_input):
|
||||
def n_sig(jscode, sig_input, player_url):
|
||||
ie = YoutubeIE(FakeYDL())
|
||||
funcname = ie._extract_n_function_name(jscode)
|
||||
funcname = ie._extract_n_function_name(jscode, player_url=player_url)
|
||||
jsi = JSInterpreter(jscode)
|
||||
func = jsi.extract_function_from_code(*ie._fixup_n_function_code(*jsi.extract_function_code(funcname), jscode))
|
||||
func = jsi.extract_function_from_code(*ie._fixup_n_function_code(*jsi.extract_function_code(funcname), jscode, player_url))
|
||||
return func([sig_input])
|
||||
|
||||
|
||||
|
@ -34,6 +34,7 @@ from ...utils import (
|
||||
clean_html,
|
||||
datetime_from_str,
|
||||
filesize_from_tbr,
|
||||
filter_dict,
|
||||
float_or_none,
|
||||
format_field,
|
||||
get_first,
|
||||
@ -1986,12 +1987,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
assert os.path.basename(func_id) == func_id
|
||||
|
||||
self.write_debug(f'Extracting signature function {func_id}')
|
||||
cache_spec, code = self.cache.load('youtube-sigfuncs', func_id), None
|
||||
cache_spec, code = self.cache.load('youtube-sigfuncs', func_id, min_ver='2025.03.27'), None
|
||||
|
||||
if not cache_spec:
|
||||
code = self._load_player(video_id, player_url)
|
||||
if code:
|
||||
res = self._parse_sig_js(code)
|
||||
res = self._parse_sig_js(code, player_url)
|
||||
test_string = ''.join(map(chr, range(len(example_sig))))
|
||||
cache_spec = [ord(c) for c in res(test_string)]
|
||||
self.cache.store('youtube-sigfuncs', func_id, cache_spec)
|
||||
@ -2039,7 +2040,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
f' return {expr_code}\n')
|
||||
self.to_screen('Extracted signature function:\n' + code)
|
||||
|
||||
def _parse_sig_js(self, jscode):
|
||||
def _parse_sig_js(self, jscode, player_url):
|
||||
# Examples where `sig` is funcname:
|
||||
# sig=function(a){a=a.split(""); ... ;return a.join("")};
|
||||
# ;c&&(c=sig(decodeURIComponent(c)),a.set(b,encodeURIComponent(c)));return a};
|
||||
@ -2063,12 +2064,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
|
||||
jscode, 'Initial JS player signature function name', group='sig')
|
||||
|
||||
varname, global_list = self._interpret_player_js_global_var(jscode, player_url)
|
||||
jsi = JSInterpreter(jscode)
|
||||
global_var_map = {}
|
||||
_, varname, value = self._extract_player_js_global_var(jscode)
|
||||
if varname:
|
||||
global_var_map[varname] = jsi.interpret_expression(value, {}, allow_recursion=100)
|
||||
initial_function = jsi.extract_function(funcname, global_var_map)
|
||||
initial_function = jsi.extract_function(funcname, filter_dict({varname: global_list}))
|
||||
return lambda s: initial_function([s])
|
||||
|
||||
def _cached(self, func, *cache_id):
|
||||
@ -2093,7 +2091,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
if func_code := self._player_cache.get(cache_id):
|
||||
return func_code
|
||||
|
||||
func_code = self.cache.load('youtube-nsig', player_id, min_ver='2025.03.26')
|
||||
func_code = self.cache.load('youtube-nsig', player_id, min_ver='2025.03.27')
|
||||
if func_code:
|
||||
self._player_cache[cache_id] = func_code
|
||||
|
||||
@ -2150,6 +2148,26 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
return ret
|
||||
|
||||
def _extract_n_function_name(self, jscode, player_url=None):
|
||||
varname, global_list = self._interpret_player_js_global_var(jscode, player_url)
|
||||
if debug_str := traverse_obj(global_list, (lambda _, v: v.endswith('_w8_'), any)):
|
||||
funcname = self._search_regex(
|
||||
r'''(?xs)
|
||||
[;\n](?:
|
||||
(?P<f>function\s+)|
|
||||
(?:var\s+)?
|
||||
)(?P<funcname>[a-zA-Z0-9_$]+)\s*(?(f)|=\s*function\s*)
|
||||
\((?P<argname>[a-zA-Z0-9_$]+)\)\s*\{
|
||||
(?:(?!\}[;\n]).)+
|
||||
\}\s*catch\(\s*[a-zA-Z0-9_$]+\s*\)\s*
|
||||
\{\s*return\s+%s\[%d\]\s*\+\s*(?P=argname)\s*\}\s*return\s+[^}]+\}[;\n]
|
||||
''' % (re.escape(varname), global_list.index(debug_str)),
|
||||
jscode, 'nsig function name', group='funcname', default=None)
|
||||
if funcname:
|
||||
return funcname
|
||||
self.write_debug(join_nonempty(
|
||||
'Initial search was unable to find nsig function name',
|
||||
player_url and f' player = {player_url}', delim='\n'), only_once=True)
|
||||
|
||||
# Examples (with placeholders nfunc, narray, idx):
|
||||
# * .get("n"))&&(b=nfunc(b)
|
||||
# * .get("n"))&&(b=narray[idx](b)
|
||||
@ -2179,7 +2197,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
if not funcname:
|
||||
self.report_warning(join_nonempty(
|
||||
'Falling back to generic n function search',
|
||||
player_url and f' player = {player_url}', delim='\n'))
|
||||
player_url and f' player = {player_url}', delim='\n'), only_once=True)
|
||||
return self._search_regex(
|
||||
r'''(?xs)
|
||||
;\s*(?P<name>[a-zA-Z0-9_$]+)\s*=\s*function\([a-zA-Z0-9_$]+\)
|
||||
@ -2192,9 +2210,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
rf'var {re.escape(funcname)}\s*=\s*(\[.+?\])\s*[,;]', jscode,
|
||||
f'Initial JS player n function list ({funcname}.{idx})')))[int(idx)]
|
||||
|
||||
def _extract_player_js_global_var(self, jscode):
|
||||
def _extract_player_js_global_var(self, jscode, player_url):
|
||||
"""Returns tuple of strings: variable assignment code, variable name, variable value code"""
|
||||
return self._search_regex(
|
||||
extract_global_var = self._cached(self._search_regex, 'js global array', player_url)
|
||||
varcode, varname, varvalue = extract_global_var(
|
||||
r'''(?x)
|
||||
(?P<q1>["\'])use\s+strict(?P=q1);\s*
|
||||
(?P<code>
|
||||
@ -2206,17 +2225,41 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
)
|
||||
)[;,]
|
||||
''', jscode, 'global variable', group=('code', 'name', 'value'), default=(None, None, None))
|
||||
if not varcode:
|
||||
self.write_debug(join_nonempty(
|
||||
'No global array variable found in player JS',
|
||||
player_url and f' player = {player_url}', delim='\n'), only_once=True)
|
||||
return varcode, varname, varvalue
|
||||
|
||||
def _fixup_n_function_code(self, argnames, code, full_code):
|
||||
global_var, varname, _ = self._extract_player_js_global_var(full_code)
|
||||
if global_var:
|
||||
self.write_debug(f'Prepending n function code with global array variable "{varname}"')
|
||||
code = global_var + '; ' + code
|
||||
def _interpret_player_js_global_var(self, jscode, player_url):
|
||||
"""Returns tuple of: variable name string, variable value list"""
|
||||
_, varname, array_code = self._extract_player_js_global_var(jscode, player_url)
|
||||
jsi = JSInterpreter(array_code)
|
||||
interpret_global_var = self._cached(jsi.interpret_expression, 'js global list', player_url)
|
||||
return varname, interpret_global_var(array_code, {}, allow_recursion=10)
|
||||
|
||||
def _fixup_n_function_code(self, argnames, nsig_code, jscode, player_url):
|
||||
varcode, varname, _ = self._extract_player_js_global_var(jscode, player_url)
|
||||
if varcode and varname:
|
||||
nsig_code = varcode + '; ' + nsig_code
|
||||
_, global_list = self._interpret_player_js_global_var(jscode, player_url)
|
||||
else:
|
||||
self.write_debug('No global array variable found in player JS')
|
||||
return argnames, re.sub(
|
||||
rf';\s*if\s*\(\s*typeof\s+[a-zA-Z0-9_$]+\s*===?\s*(?:(["\'])undefined\1|{varname}\[\d+\])\s*\)\s*return\s+{argnames[0]};',
|
||||
';', code)
|
||||
varname = 'dlp_wins'
|
||||
global_list = []
|
||||
|
||||
undefined_idx = global_list.index('undefined') if 'undefined' in global_list else r'\d+'
|
||||
fixed_code = re.sub(
|
||||
rf'''(?x)
|
||||
;\s*if\s*\(\s*typeof\s+[a-zA-Z0-9_$]+\s*===?\s*(?:
|
||||
(["\'])undefined\1|
|
||||
{re.escape(varname)}\[{undefined_idx}\]
|
||||
)\s*\)\s*return\s+{re.escape(argnames[0])};
|
||||
''', ';', nsig_code)
|
||||
if fixed_code == nsig_code:
|
||||
self.write_debug(join_nonempty(
|
||||
'No typeof statement found in nsig function code',
|
||||
player_url and f' player = {player_url}', delim='\n'), only_once=True)
|
||||
return argnames, fixed_code
|
||||
|
||||
def _extract_n_function_code(self, video_id, player_url):
|
||||
player_id = self._extract_player_info(player_url)
|
||||
@ -2230,7 +2273,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
func_name = self._extract_n_function_name(jscode, player_url=player_url)
|
||||
|
||||
# XXX: Workaround for the global array variable and lack of `typeof` implementation
|
||||
func_code = self._fixup_n_function_code(*jsi.extract_function_code(func_name), jscode)
|
||||
func_code = self._fixup_n_function_code(*jsi.extract_function_code(func_name), jscode, player_url)
|
||||
|
||||
return jsi, player_id, func_code
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user