mirror of
https://git.tartarus.org/simon/putty.git
synced 2025-01-25 09:12:24 +00:00
Polish the output of read_ucd.py.
The initial outputs were all deliberately inconsistent with each other, so that each one exactly matched the existing table I was trying to replace. Now I've done that check, I can clean them up. Normalised spacing and case to be consistent; removed pointless indentation (these are now include files, so they don't have to be indented to the same level as the array declaration surrounding each one's #include); added a header comment in each autogenerated file, saying that it's autogenerated, what it's for, and who it's used by. The currently supported version number of Unicode is also exposed in a header file, so that I can put it in diagnostics.
This commit is contained in:
parent
b72c9aba28
commit
430af47a38
@ -13,6 +13,7 @@ import argparse
|
|||||||
import collections
|
import collections
|
||||||
import io
|
import io
|
||||||
import os
|
import os
|
||||||
|
import string
|
||||||
import sys
|
import sys
|
||||||
import zipfile
|
import zipfile
|
||||||
|
|
||||||
@ -90,6 +91,10 @@ class Main:
|
|||||||
self.open_ucd_file = lambda filename: (
|
self.open_ucd_file = lambda filename: (
|
||||||
io.TextIOWrapper(ucd_zip.open(filename)))
|
io.TextIOWrapper(ucd_zip.open(filename)))
|
||||||
|
|
||||||
|
self.find_unicode_version()
|
||||||
|
|
||||||
|
with open("version.h", "w") as fh:
|
||||||
|
self.write_version_header(fh)
|
||||||
with open("bidi_type.h", "w") as fh:
|
with open("bidi_type.h", "w") as fh:
|
||||||
self.write_bidi_type_table(fh)
|
self.write_bidi_type_table(fh)
|
||||||
with open("bidi_mirror.h", "w") as fh:
|
with open("bidi_mirror.h", "w") as fh:
|
||||||
@ -103,6 +108,22 @@ class Main:
|
|||||||
with open("ambiguous_wide_chars.h", "w") as fh:
|
with open("ambiguous_wide_chars.h", "w") as fh:
|
||||||
self.write_ambiguous_wide_chars_list(fh)
|
self.write_ambiguous_wide_chars_list(fh)
|
||||||
|
|
||||||
|
def find_unicode_version(self):
|
||||||
|
"""Find out the version of Unicode.
|
||||||
|
|
||||||
|
This is read from the top of NamesList.txt, which has the
|
||||||
|
closest thing to a machine-readable statement of the version
|
||||||
|
number that I found in the whole collection of files.
|
||||||
|
"""
|
||||||
|
with self.open_ucd_file("NamesList.txt") as fh:
|
||||||
|
for line in lines(fh):
|
||||||
|
if line.startswith("@@@\t"):
|
||||||
|
self.unicode_version_full = line[4:]
|
||||||
|
self.unicode_version_short = " ".join(
|
||||||
|
w for w in self.unicode_version_full.split(" ")
|
||||||
|
if any(c in string.digits for c in w))
|
||||||
|
return
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def UnicodeData(self):
|
def UnicodeData(self):
|
||||||
"""Records from UnicodeData.txt.
|
"""Records from UnicodeData.txt.
|
||||||
@ -210,7 +231,41 @@ class Main:
|
|||||||
for c in cs:
|
for c in cs:
|
||||||
yield c, fields[1]
|
yield c, fields[1]
|
||||||
|
|
||||||
|
def write_file_header_comment(self, fh, description):
|
||||||
|
print("/*", file=fh)
|
||||||
|
print(" * Autogenerated by read_ucd.py from",
|
||||||
|
self.unicode_version_full, file=fh)
|
||||||
|
print(" *", file=fh)
|
||||||
|
for line in description.strip("\n").split("\n"):
|
||||||
|
print(" *" + (" " if line != "" else "") + line, file=fh)
|
||||||
|
print(" */", file=fh)
|
||||||
|
print(file=fh)
|
||||||
|
|
||||||
|
def write_version_header(self, fh):
|
||||||
|
self.write_file_header_comment(fh, """
|
||||||
|
|
||||||
|
String literals giving the currently supported version of Unicode.
|
||||||
|
Useful for error messages and 'about' boxes.
|
||||||
|
|
||||||
|
""")
|
||||||
|
assert all(0x20 <= ord(c) < 0x7F and c != '"'
|
||||||
|
for c in self.unicode_version_full)
|
||||||
|
|
||||||
|
print("#define UNICODE_VERSION_FULL \"{}\"".format(
|
||||||
|
self.unicode_version_full), file=fh)
|
||||||
|
print("#define UNICODE_VERSION_SHORT \"{}\"".format(
|
||||||
|
self.unicode_version_short), file=fh)
|
||||||
|
|
||||||
def write_bidi_type_table(self, fh):
|
def write_bidi_type_table(self, fh):
|
||||||
|
self.write_file_header_comment(fh, """
|
||||||
|
|
||||||
|
Bidirectional type of every Unicode character, excluding those with
|
||||||
|
type ON.
|
||||||
|
|
||||||
|
Used by terminal/bidi.c, whose associated lookup function returns ON
|
||||||
|
by default for anything not in this list.
|
||||||
|
|
||||||
|
""")
|
||||||
types = {}
|
types = {}
|
||||||
|
|
||||||
for rec in self.UnicodeData:
|
for rec in self.UnicodeData:
|
||||||
@ -221,6 +276,14 @@ class Main:
|
|||||||
print(f"{{0x{start:04x}, 0x{end:04x}, {t}}},", file=fh)
|
print(f"{{0x{start:04x}, 0x{end:04x}, {t}}},", file=fh)
|
||||||
|
|
||||||
def write_bidi_mirroring_table(self, fh):
|
def write_bidi_mirroring_table(self, fh):
|
||||||
|
self.write_file_header_comment(fh, """
|
||||||
|
|
||||||
|
Map each Unicode character to its mirrored form when printing right to
|
||||||
|
left.
|
||||||
|
|
||||||
|
Used by terminal/bidi.c.
|
||||||
|
|
||||||
|
""")
|
||||||
bidi_mirror = {}
|
bidi_mirror = {}
|
||||||
for c1, c2 in self.BidiMirroring:
|
for c1, c2 in self.BidiMirroring:
|
||||||
assert bidi_mirror.get(c1, c2) == c2, f"Clash at {c1:%04X}"
|
assert bidi_mirror.get(c1, c2) == c2, f"Clash at {c1:%04X}"
|
||||||
@ -232,6 +295,16 @@ class Main:
|
|||||||
print("{{0x{:04x}, 0x{:04x}}},".format(c1, c2), file=fh)
|
print("{{0x{:04x}, 0x{:04x}}},".format(c1, c2), file=fh)
|
||||||
|
|
||||||
def write_bidi_brackets_table(self, fh):
|
def write_bidi_brackets_table(self, fh):
|
||||||
|
self.write_file_header_comment(fh, """
|
||||||
|
|
||||||
|
Identify Unicode characters that count as brackets for the purposes of
|
||||||
|
bidirectional text layout. For each one, indicate whether it's an open
|
||||||
|
or closed bracket, and identify up to two characters that can act as
|
||||||
|
its counterpart.
|
||||||
|
|
||||||
|
Used by terminal/bidi.c.
|
||||||
|
|
||||||
|
""")
|
||||||
bracket_map = {}
|
bracket_map = {}
|
||||||
for c1, c2, kind in self.BidiBrackets:
|
for c1, c2, kind in self.BidiBrackets:
|
||||||
bracket_map[c1] = kind, c2
|
bracket_map[c1] = kind, c2
|
||||||
@ -253,6 +326,14 @@ class Main:
|
|||||||
src, dst, dsteq, enumval), file=fh)
|
src, dst, dsteq, enumval), file=fh)
|
||||||
|
|
||||||
def write_nonspacing_chars_list(self, fh):
|
def write_nonspacing_chars_list(self, fh):
|
||||||
|
self.write_file_header_comment(fh, """
|
||||||
|
|
||||||
|
Identify Unicode characters that occupy no character cells of a
|
||||||
|
terminal.
|
||||||
|
|
||||||
|
Used by utils/wcwidth.c.
|
||||||
|
|
||||||
|
""")
|
||||||
cs = set()
|
cs = set()
|
||||||
|
|
||||||
for rec in self.UnicodeData:
|
for rec in self.UnicodeData:
|
||||||
@ -273,7 +354,7 @@ class Main:
|
|||||||
cs.add(rec.c)
|
cs.add(rec.c)
|
||||||
|
|
||||||
for start, end in set_to_ranges(cs):
|
for start, end in set_to_ranges(cs):
|
||||||
print(f" {{ 0x{start:04X}, 0x{end:04X} }},", file=fh)
|
print(f"{{0x{start:04x}, 0x{end:04x}}},", file=fh)
|
||||||
|
|
||||||
def write_width_table(self, fh, accept):
|
def write_width_table(self, fh, accept):
|
||||||
cs = set()
|
cs = set()
|
||||||
@ -283,12 +364,29 @@ class Main:
|
|||||||
cs.add(c)
|
cs.add(c)
|
||||||
|
|
||||||
for start, end in set_to_ranges(cs):
|
for start, end in set_to_ranges(cs):
|
||||||
print(f" {{0x{start:04X}, 0x{end:04X}}},", file=fh)
|
print(f"{{0x{start:04x}, 0x{end:04x}}},", file=fh)
|
||||||
|
|
||||||
def write_wide_chars_list(self, fh):
|
def write_wide_chars_list(self, fh):
|
||||||
|
self.write_file_header_comment(fh, """
|
||||||
|
|
||||||
|
Identify Unicode characters that occupy two adjacent character cells
|
||||||
|
in a terminal.
|
||||||
|
|
||||||
|
Used by utils/wcwidth.c.
|
||||||
|
|
||||||
|
""")
|
||||||
self.write_width_table(fh, {'W', 'F'})
|
self.write_width_table(fh, {'W', 'F'})
|
||||||
|
|
||||||
def write_ambiguous_wide_chars_list(self, fh):
|
def write_ambiguous_wide_chars_list(self, fh):
|
||||||
|
self.write_file_header_comment(fh, """
|
||||||
|
|
||||||
|
Identify Unicode characters that are width-ambiguous: some regimes
|
||||||
|
regard them as occupying two adjacent character cells in a terminal,
|
||||||
|
and others do not.
|
||||||
|
|
||||||
|
Used by utils/wcwidth.c.
|
||||||
|
|
||||||
|
""")
|
||||||
self.write_width_table(fh, {'A'})
|
self.write_width_table(fh, {'A'})
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
Loading…
Reference in New Issue
Block a user