diff --git a/unicode/read_ucd.py b/unicode/read_ucd.py
new file mode 100755
index 00000000..51828ceb
--- /dev/null
+++ b/unicode/read_ucd.py
@@ -0,0 +1,295 @@
+#!/usr/bin/env python3
+
+# Tool to read various files from the Unicode character database and
+# generate headers containing derived arrays and lookup tables needed
+# by PuTTY.
+#
+# The aim is to have this be a single tool which you can easily re-run
+# against a new version of Unicode, simply by pointing it at an
+# appropriate UCD.zip or a directory containing the same files
+# unpacked.
+
+import argparse
+import collections
+import io
+import os
+import sys
+import zipfile
+
+UCDRecord = collections.namedtuple('UCDRecord', [
+    'c',
+    'General_Category',
+    'Bidi_Class',
+    'Decomposition_Mapping',
+])
+
+def to_ranges(iterable):
+    """Collect together adjacent ranges in a list of (key, value) pairs.
+
+    The input iterable should deliver a sequence of (key, value) pairs
+    in which the keys are integers in sorted order. The output is a
+    sequence of tuples with structure ((start, end), value), each
+    indicating that all the keys [start, start+1, ..., end] go with
+    that value.
+    """
+    start = end = val = None
+
+    for k, v in iterable:
+        if (k-1, v) == (end, val):
+            end = k
+        else:
+            if start is not None:
+                yield (start, end), val
+            start, end, val = k, k, v
+
+    if start is not None:
+        yield (start, end), val
+
+def map_to_ranges(m):
+    """Convert an integer-keyed map into a list of (range, value) pairs."""
+    yield from to_ranges(sorted(m.items()))
+
+def set_to_ranges(s):
+    """Convert a set into a list of ranges."""
+    for r, _ in to_ranges((x, None) for x in sorted(s)):
+        yield r
+
+def lines(iterable, keep_comments=False):
+    """Deliver the lines of a Unicode data file.
+
+    The input iterable should yield raw lines of the file: for
+    example, it can be the file handle itself. The output values have
+    their newlines removed, comments and trailing spaces deleted, and
+    blank lines discarded.
+    """
+    for line in iter(iterable):
+        line = line.rstrip("\r\n")
+        if not keep_comments:
+            line = line.split("#", 1)[0]
+        line = line.rstrip(" \t")
+        if line == "":
+            continue
+        yield line
+
+class Main:
+    def run(self):
+        "Parse arguments and generate all the output files."
+
+        parser = argparse.ArgumentParser(
+            description='Build UCD-derived source files.')
+        parser.add_argument("ucd", help="UCD to work from, either UCD.zip or "
+                            "a directory full of unpacked files.")
+        args = parser.parse_args()
+
+        if os.path.isdir(args.ucd):
+            ucd_dir = args.ucd
+            self.open_ucd_file = lambda filename: (
+                open(os.path.join(ucd_dir, filename)))
+        else:
+            ucd_zip = zipfile.ZipFile(args.ucd)
+            self.open_ucd_file = lambda filename: (
+                io.TextIOWrapper(ucd_zip.open(filename)))
+
+        with open("bidi_type.h", "w") as fh:
+            self.write_bidi_type_table(fh)
+        with open("bidi_mirror.h", "w") as fh:
+            self.write_bidi_mirroring_table(fh)
+        with open("bidi_brackets.h", "w") as fh:
+            self.write_bidi_brackets_table(fh)
+        with open("nonspacing_chars.h", "w") as fh:
+            self.write_nonspacing_chars_list(fh)
+        with open("wide_chars.h", "w") as fh:
+            self.write_wide_chars_list(fh)
+        with open("ambiguous_wide_chars.h", "w") as fh:
+            self.write_ambiguous_wide_chars_list(fh)
+
+    @property
+    def UnicodeData(self):
+        """Records from UnicodeData.txt.
+
+        Each yielded item is a UCDRecord tuple.
+        """
+        with self.open_ucd_file("UnicodeData.txt") as fh:
+            for line in lines(fh):
+                # Split up the line into its raw fields.
+                (
+                    codepoint, name, category, cclass, bidiclass, decomp,
+                    num6, num7, num8, bidimirrored, obsolete_unicode1_name,
+                    obsolete_comment, uppercase, lowercase, titlecase,
+                ) = line.split(";")
+
+                # By default, we expect that this record describes
+                # just one code point.
+                codepoints = [int(codepoint, 16)]
+
+                # Spot the special markers where consecutive lines say
+                # <Foo, First> and <Foo, Last>, indicating that the
+                # entire range of code points in between are treated
+                # the same. If so, we replace 'codepoints' with a
+                # range object.
+                if "<" in name:
+                    assert name.startswith("<") and name.endswith(">"), (
+                        "Confusing < in character name: {!r}".format(line))
+                    name_pieces = [piece.strip(" \t") for piece in
+                                   name.lstrip("<").rstrip(">").split(",")]
+                    if "First" in name_pieces:
+                        assert isinstance(codepoints, list)
+                        prev_line_was_first = True
+                        prev_codepoint = codepoints[0]
+                        continue
+                    elif "Last" in name_pieces:
+                        assert prev_line_was_first
+                        codepoints = range(prev_codepoint, codepoints[0]+1)
+                        del prev_codepoint
+                prev_line_was_first = False
+
+                # Decode some of the raw fields into more cooked
+                # forms.
+
+                # For the moment, we only care about decomposition
+                # mappings that consist of a single hex number (i.e.
+                # are singletons and not compatibility mappings)
+                try:
+                    dm = [int(decomp, 16)]
+                except ValueError:
+                    dm = []
+
+                # And yield a UCDRecord for each code point in our
+                # range.
+                for codepoint in codepoints:
+                    yield UCDRecord(
+                        c=codepoint,
+                        General_Category=category,
+                        Bidi_Class=bidiclass,
+                        Decomposition_Mapping=dm,
+                    )
+
+    @property
+    def BidiMirroring(self):
+        """Parsed character pairs from BidiMirroring.txt.
+
+        Each yielded tuple is a pair of Unicode code points.
+        """
+        with self.open_ucd_file("BidiMirroring.txt") as fh:
+            for line in lines(fh):
+                cs1, cs2 = line.split(";")
+                c1 = int(cs1, 16)
+                c2 = int(cs2, 16)
+                yield c1, c2
+
+    @property
+    def BidiBrackets(self):
+        """Bracket pairs from BidiBrackets.txt.
+
+        Each yielded tuple is a pair of Unicode code points, followed
+        by either 'o', 'c' or 'n' to indicate whether the first one is
+        an open or closing parenthesis or neither.
+        """
+        with self.open_ucd_file("BidiBrackets.txt") as fh:
+            for line in lines(fh):
+                cs1, cs2, kind = line.split(";")
+                c1 = int(cs1, 16)
+                c2 = int(cs2, 16)
+                kind = kind.strip(" \t")
+                yield c1, c2, kind
+
+    @property
+    def EastAsianWidth(self):
+        """East Asian width types from EastAsianWidth.txt.
+
+        Each yielded tuple is (code point, width type).
+        """
+        with self.open_ucd_file("EastAsianWidth.txt") as fh:
+            for line in lines(fh):
+                fields = line.split(";")
+                if ".." in fields[0]:
+                    start, end = [int(s, 16) for s in fields[0].split("..")]
+                    cs = range(start, end+1)
+                else:
+                    cs = [int(fields[0], 16)]
+                for c in cs:
+                    yield c, fields[1]
+
+    def write_bidi_type_table(self, fh):
+        types = {}
+
+        for rec in self.UnicodeData:
+            if rec.Bidi_Class != "ON":
+                types[rec.c] = rec.Bidi_Class
+
+        for (start, end), t in map_to_ranges(types):
+            print(f"        {{0x{start:04x}, 0x{end:04x}, {t}}},", file=fh)
+
+    def write_bidi_mirroring_table(self, fh):
+        bidi_mirror = {}
+        for c1, c2 in self.BidiMirroring:
+            assert bidi_mirror.get(c1, c2) == c2, f"Clash at {c1:%04X}"
+            bidi_mirror[c1] = c2
+            assert bidi_mirror.get(c2, c1) == c1, f"Clash at {c2:%04X}"
+            bidi_mirror[c2] = c1
+
+        for c1, c2 in sorted(bidi_mirror.items()):
+            print("        {{0x{:04x}, 0x{:04x}}},".format(c1, c2), file=fh)
+
+    def write_bidi_brackets_table(self, fh):
+        bracket_map = {}
+        for c1, c2, kind in self.BidiBrackets:
+            bracket_map[c1] = kind, c2
+
+        equivalents = {}
+        for rec in self.UnicodeData:
+            if len(rec.Decomposition_Mapping) == 1:
+                c = rec.c
+                c2 = rec.Decomposition_Mapping[0]
+                equivalents[c] = c2
+                equivalents[c2] = c
+
+        for src, (kind, dst) in sorted(bracket_map.items()):
+            dsteq = equivalents.get(dst, 0)
+            # UCD claims there's an 'n' kind possible, but as of UCD
+            # 14, no instances of it exist
+            enumval = {'o': 'BT_OPEN', 'c': 'BT_CLOSE'}[kind]
+            print("        {{0x{:04x}, {{0x{:04x}, 0x{:04x}, {}}}}},".format(
+                src, dst, dsteq, enumval), file=fh)
+
+    def write_nonspacing_chars_list(self, fh):
+        cs = set()
+
+        for rec in self.UnicodeData:
+            nonspacing = rec.General_Category in {"Me", "Mn", "Cf"}
+            if rec.c == 0xAD:
+                # In typography this is a SOFT HYPHEN and counts as
+                # discardable. But it's also an ISO 8859-1 printing
+                # character, and all of those occupy one character
+                # cell in a terminal.
+                nonspacing = False
+            if 0x1160 <= rec.c <= 0x11FF:
+                # Medial (vowel) and final (consonant) jamo for
+                # decomposed Hangul characters. These are regarded as
+                # non-spacing on the grounds that they compose with
+                # the preceding initial consonant.
+                nonspacing = True
+            if nonspacing:
+                cs.add(rec.c)
+
+        for start, end in set_to_ranges(cs):
+            print(f"    {{ 0x{start:04X}, 0x{end:04X} }},", file=fh)
+
+    def write_width_table(self, fh, accept):
+        cs = set()
+
+        for c, wid in self.EastAsianWidth:
+            if wid in accept:
+                cs.add(c)
+
+        for start, end in set_to_ranges(cs):
+            print(f"    {{0x{start:04X}, 0x{end:04X}}},", file=fh)
+
+    def write_wide_chars_list(self, fh):
+        self.write_width_table(fh, {'W', 'F'})
+
+    def write_ambiguous_wide_chars_list(self, fh):
+        self.write_width_table(fh, {'A'})
+
+if __name__ == '__main__':
+    Main().run()