mirror of
https://git.tartarus.org/simon/putty.git
synced 2025-06-30 19:12:48 -05:00
Function to check a UTF-8 string for unknown characters.
So we can reject things we don't know how to NFC yet.
This commit is contained in:
@ -65,6 +65,7 @@ add_sources_from_current_dir(utils
|
||||
stripctrl.c
|
||||
tempseat.c
|
||||
tree234.c
|
||||
unicode-known.c
|
||||
unicode-norm.c
|
||||
validate_manual_hostkey.c
|
||||
version.c
|
||||
|
53
utils/unicode-known.c
Normal file
53
utils/unicode-known.c
Normal file
@ -0,0 +1,53 @@
|
||||
/*
|
||||
* Check a UTF-8 string to ensure every character in it is part of the
|
||||
* version of Unicode that we understand.
|
||||
*
|
||||
* (If it isn't, then we don't know what combining properties it has,
|
||||
* so we can't safely NFC it and rely on the result not changing when
|
||||
* we later update our Unicode version.)
|
||||
*/
|
||||
|
||||
#include "misc.h"
|
||||
#include "unicode/version.h"
|
||||
|
||||
static bool known(unsigned c)
|
||||
{
|
||||
struct range {
|
||||
unsigned start, end;
|
||||
};
|
||||
static const struct range ranges[] = {
|
||||
#include "unicode/known_chars.h"
|
||||
};
|
||||
|
||||
const struct range *start = ranges, *end = start + lenof(ranges);
|
||||
|
||||
while (end > start) {
|
||||
const struct range *curr = start + (end-start) / 2;
|
||||
if (c < curr->start)
|
||||
end = curr;
|
||||
else if (c > curr->end)
|
||||
start = curr + 1;
|
||||
else
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
};
|
||||
|
||||
char *utf8_unknown_char(ptrlen input)
|
||||
{
|
||||
BinarySource src[1];
|
||||
BinarySource_BARE_INIT_PL(src, input);
|
||||
|
||||
for (size_t nchars = 0; get_avail(src); nchars++) {
|
||||
unsigned c = decode_utf8(src);
|
||||
if (!known(c))
|
||||
return dupprintf(
|
||||
"cannot stably normalise this string: code point %04X "
|
||||
"(at character position %"SIZEu", byte position %"SIZEu") "
|
||||
"is not in Unicode %s", c, nchars, src->pos,
|
||||
UNICODE_VERSION_SHORT);
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
Reference in New Issue
Block a user