From bdf7f73d3db1bff40607266f98e8f10a26f00a52 Mon Sep 17 00:00:00 2001 From: Simon Tatham Date: Wed, 19 Apr 2023 14:21:51 +0100 Subject: [PATCH] split_into_argv: stop using isspace(). I checked exhaustively today and found that the only characters (even in Unicode) that Windows's default argv splitter will recognise as word separators are the space and tab characters. So I think it's a mistake to use functions to identify word separators; we should use that fixed character pair, and then we know we're getting the right ones only. (cherry picked from commit 9adfa797677ba5cc5a5c9db45e593a9e4ab293aa) --- windows/utils/split_into_argv.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/windows/utils/split_into_argv.c b/windows/utils/split_into_argv.c index fe6bdbf7..e65ae0a2 100644 --- a/windows/utils/split_into_argv.c +++ b/windows/utils/split_into_argv.c @@ -161,6 +161,11 @@ #define MOD3 0 #endif +static inline bool is_word_sep(char c) +{ + return c == ' ' || c == '\t'; +} + void split_into_argv(char *cmdline, int *argc, char ***argv, char ***argstart) { @@ -173,7 +178,7 @@ void split_into_argv(char *cmdline, int *argc, char ***argv, * First deal with the simplest of all special cases: if there * aren't any arguments, return 0,NULL,NULL. */ - while (*cmdline && isspace((unsigned char)*cmdline)) cmdline++; + while (*cmdline && is_word_sep(*cmdline)) cmdline++; if (!*cmdline) { if (argc) *argc = 0; if (argv) *argv = NULL; @@ -195,7 +200,7 @@ void split_into_argv(char *cmdline, int *argc, char ***argv, bool quote; /* Skip whitespace searching for start of argument. */ - while (*p && isspace((unsigned char)*p)) p++; + while (*p && is_word_sep(*p)) p++; if (!*p) break; /* We have an argument; start it. */ @@ -206,7 +211,7 @@ void split_into_argv(char *cmdline, int *argc, char ***argv, /* Copy data into the argument until it's finished. */ while (*p) { - if (!quote && isspace((unsigned char)*p)) + if (!quote && is_word_sep(*p)) break; /* argument is finished */ if (*p == '"' || *p == '\\') {