1
0
mirror of https://git.tartarus.org/simon/putty.git synced 2025-01-09 09:27:59 +00:00

A better solution to the problem of duplicated positions in

CS_ISO8859_1_X11: where two SBCS positions map to the same Unicode
code point, we now have a `sortpriority' hint which can tell
sbcsgen.pl which one it should preferentially generate when
converting back to SBCS.

[originally from svn r2427]
This commit is contained in:
Simon Tatham 2003-01-02 16:56:29 +00:00
parent 8de5682450
commit a2afc03bdb
2 changed files with 30 additions and 14 deletions

View File

@ -306,13 +306,15 @@ charset CS_ISO8859_16
appear from positions 0x5F to 0x7E inclusive. Here is the modified
ISO8859-1 code table.
Note that position 0 is still 0000, not 0020 as it might plausibly
be, because I didn't like the idea that converting several words
in Unicode through this table would produce NULs in place of all
the spaces! In principle that works fine, but it makes me uneasy.
Since this table contains a few duplicated positions, we use the
`sortpriority' hint to indicate that things in the main part of
the code table (0x20-0xFF) should be generated preferentially when
converting _from_ Unicode. Hence, U+00b0 (for example) will yield
0xb0 rather than 0x07.
charset CS_ISO8859_1_X11
0000 2666 2592 2409 240c 240d 240a 00b0 00b1 2424 240b 2518 2510 250c 2514 253c
sortpriority 00-1F -1
0020 2666 2592 2409 240c 240d 240a 00b0 00b1 2424 240b 2518 2510 250c 2514 253c
23ba 23bb 2500 23bc 23bd 251c 2524 2534 252c 2502 2264 2265 03c0 2260 00a3 00b7
0020 0021 0022 0023 0024 0025 0026 0027 0028 0029 002a 002b 002c 002d 002e 002f
0030 0031 0032 0033 0034 0035 0036 0037 0038 0039 003a 003b 003c 003d 003e 003f

View File

@ -27,21 +27,28 @@ my $charsetname = undef;
my @vals = ();
my @charsetnames = ();
my @sortpriority = ();
while (<FOO>) {
chomp;
if (/^charset (.*)$/) {
$charsetname = $1;
@vals = ();
@sortpriority = map { 0 } 0..255;
} elsif (/^sortpriority ([^-]*)-([^-]*) (.*)$/) {
for ($i = hex $1; $i <= hex $2; $i++) {
$sortpriority[$i] += $3;
}
} elsif (/^[0-9a-fA-FX]/) {
push @vals, map { $_ eq "XXXX" ? -1 : hex $_ } split / +/, $_;
if (scalar @vals > 256) {
die "$infile:$.: charset $charsetname has more than 256 values\n";
} elsif (scalar @vals == 256) {
&outcharset($charsetname, @vals);
&outcharset($charsetname, \@vals, \@sortpriority);
push @charsetnames, $charsetname;
$charsetname = undef;
@vals = ();
@sortpriority = map { 0 } 0..255;
}
}
}
@ -56,8 +63,8 @@ foreach $i (@charsetnames) {
print "\n";
print "#endif /* ENUM_CHARSETS */\n";
sub outcharset($@) {
my ($name, @vals) = @_;
sub outcharset($$$) {
my ($name, $vals, $sortpriority) = @_;
my ($prefix, $i, @sorted);
print "static const sbcs_data data_$name = {\n";
@ -65,11 +72,12 @@ sub outcharset($@) {
$prefix = " ";
@sorted = ();
for ($i = 0; $i < 256; $i++) {
if ($vals[$i] < 0) {
if ($vals->[$i] < 0) {
printf "%sERROR ", $prefix;
} else {
printf "%s0x%04x", $prefix, $vals[$i];
push @sorted, [$i, $vals[$i]];
printf "%s0x%04x", $prefix, $vals->[$i];
die "ooh? $i\n" unless defined $sortpriority->[$i];
push @sorted, [$i, $vals->[$i], 0+$sortpriority->[$i]];
}
if ($i % 8 == 7) {
$prefix = ",\n ";
@ -78,15 +86,21 @@ sub outcharset($@) {
}
}
print "\n },\n {\n";
@sorted = sort { $a->[1] <=> $b->[1] } @sorted;
@sorted = sort { $a->[1] == $b->[1] ?
$b->[2] <=> $a->[2] :
$a->[1] <=> $b->[1] } @sorted;
$prefix = " ";
for ($i = 0; $i < scalar @sorted; $i++) {
$uval = -1;
for ($i = $j = 0; $i < scalar @sorted; $i++) {
next if ($uval == $sorted[$i]->[1]); # low-priority alternative
$uval = $sorted[$i]->[1];
printf "%s0x%02x", $prefix, $sorted[$i]->[0];
if ($i % 8 == 7) {
if ($j % 8 == 7) {
$prefix = ",\n ";
} else {
$prefix = ", ";
}
$j++;
}
printf "\n },\n %d\n", scalar @sorted;
print "};\n";