diff --git a/CMakeLists.txt b/CMakeLists.txt index 0eb4cf1c..deba83f7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -94,6 +94,11 @@ add_executable(test_wildcard target_compile_definitions(test_wildcard PRIVATE TEST) target_link_libraries(test_wildcard utils ${platform_libraries}) +add_executable(test_cert_expr + utils/cert-expr.c) +target_compile_definitions(test_cert_expr PRIVATE TEST) +target_link_libraries(test_cert_expr utils ${platform_libraries}) + add_executable(bidi_gettype terminal/bidi_gettype.c) target_link_libraries(bidi_gettype guiterminal utils ${platform_libraries}) diff --git a/defs.h b/defs.h index 14e378ab..48cbaf23 100644 --- a/defs.h +++ b/defs.h @@ -143,6 +143,8 @@ typedef struct Channel Channel; typedef struct SshChannel SshChannel; typedef struct mainchan mainchan; +typedef struct CertExprBuilder CertExprBuilder; + typedef struct ssh_sharing_state ssh_sharing_state; typedef struct ssh_sharing_connstate ssh_sharing_connstate; typedef struct share_channel share_channel; diff --git a/misc.h b/misc.h index 9cf656fd..bdfa8c2e 100644 --- a/misc.h +++ b/misc.h @@ -507,4 +507,18 @@ static inline ptrlen ptrlen_from_lf(LoadedFile *lf) * is made to handle difficult overlap cases. */ void memxor(uint8_t *out, const uint8_t *in1, const uint8_t *in2, size_t size); +/* Boolean expressions used in OpenSSH certificate configuration */ +bool cert_expr_valid(const char *expression, + char **error_msg, ptrlen *error_loc); +bool cert_expr_match_str(const char *expression, + const char *hostname, unsigned port); +/* Build a certificate expression out of hostname wildcards. Required + * to handle legacy configuration from early in development, when + * multiple wildcards were stored separately in config, implicitly + * ORed together. */ +CertExprBuilder *cert_expr_builder_new(); +void cert_expr_builder_free(CertExprBuilder *eb); +void cert_expr_builder_add(CertExprBuilder *eb, const char *wildcard); +char *cert_expr_expression(CertExprBuilder *eb); + #endif diff --git a/ssh/ca-config.c b/ssh/ca-config.c index 6c84b68b..350613df 100644 --- a/ssh/ca-config.c +++ b/ssh/ca-config.c @@ -18,12 +18,10 @@ struct ca_state { dlgcontrol *ca_reclist; dlgcontrol *ca_pubkey_edit; dlgcontrol *ca_pubkey_info; - dlgcontrol *ca_wclist; - dlgcontrol *ca_wc_edit; + dlgcontrol *ca_validity_edit; dlgcontrol *rsa_type_checkboxes[NRSATYPES]; - char *name, *pubkey, *wc; + char *name, *pubkey, *validity; tree234 *ca_names; /* stores plain 'char *' */ - tree234 *host_wcs; /* stores plain 'char *' */ ca_options opts; strbuf *ca_pubkey_blob; }; @@ -45,10 +43,8 @@ static void ca_state_free(void *vctx) struct ca_state *st = (struct ca_state *)vctx; clear_string_tree(st->ca_names); freetree234(st->ca_names); - clear_string_tree(st->host_wcs); - freetree234(st->host_wcs); sfree(st->name); - sfree(st->wc); + sfree(st->validity); sfree(st); } @@ -86,13 +82,8 @@ static void set_from_hca(struct ca_state *st, host_ca *hca) else st->pubkey = dupstr(""); - clear_string_tree(st->host_wcs); - for (size_t i = 0; i < hca->n_hostname_wildcards; i++) { - char *name = dupstr(hca->hostname_wildcards[i]); - char *added = add234(st->host_wcs, name); - if (added != name) - sfree(name); /* de-duplicate, just in case */ - } + st->validity = dupstr(hca->validity_expression ? + hca->validity_expression : ""); st->opts = hca->opts; /* structure copy */ } @@ -194,7 +185,7 @@ static void ca_load_selected_record(struct ca_state *st, dlgparam *dp) dlg_refresh(st->ca_name_edit, dp); dlg_refresh(st->ca_pubkey_edit, dp); - dlg_refresh(st->ca_wclist, dp); + dlg_refresh(st->ca_validity_edit, dp); for (size_t i = 0; i < NRSATYPES; i++) dlg_refresh(st->rsa_type_checkboxes[i], dp); ca_refresh_pubkey_info(st, dp); @@ -259,8 +250,23 @@ static void ca_save_handler(dlgcontrol *ctrl, dlgparam *dp, { struct ca_state *st = (struct ca_state *)ctrl->context.p; if (event == EVENT_ACTION) { - if (!count234(st->host_wcs)) { - dlg_error_msg(dp, "No hostnames configured for this key"); + if (!*st->validity) { + dlg_error_msg(dp, "No validity expression configured " + "for this key"); + return; + } + + char *error_msg; + ptrlen error_loc; + if (!cert_expr_valid(st->validity, &error_msg, &error_loc)) { + char *error_full = dupprintf("Error in expression: %s", error_msg); + dlg_error_msg(dp, error_full); + dlg_set_focus(st->ca_validity_edit, dp); + dlg_editbox_select_range( + st->ca_validity_edit, dp, + (const char *)error_loc.ptr - st->validity, error_loc.len); + sfree(error_msg); + sfree(error_full); return; } @@ -274,10 +280,7 @@ static void ca_save_handler(dlgcontrol *ctrl, dlgparam *dp, hca->name = dupstr(st->name); hca->ca_public_key = strbuf_dup(ptrlen_from_strbuf( st->ca_pubkey_blob)); - hca->n_hostname_wildcards = count234(st->host_wcs); - hca->hostname_wildcards = snewn(hca->n_hostname_wildcards, char *); - for (size_t i = 0; i < hca->n_hostname_wildcards; i++) - hca->hostname_wildcards[i] = dupstr(index234(st->host_wcs, i)); + hca->validity_expression = dupstr(st->validity); hca->opts = st->opts; /* structure copy */ char *error = host_ca_save(hca); @@ -360,73 +363,15 @@ static void ca_pubkey_file_handler(dlgcontrol *ctrl, dlgparam *dp, } } -static void ca_wclist_handler(dlgcontrol *ctrl, dlgparam *dp, - void *data, int event) +static void ca_validity_handler(dlgcontrol *ctrl, dlgparam *dp, + void *data, int event) { struct ca_state *st = (struct ca_state *)ctrl->context.p; if (event == EVENT_REFRESH) { - dlg_update_start(ctrl, dp); - dlg_listbox_clear(ctrl, dp); - const char *name; - for (int i = 0; (name = index234(st->host_wcs, i)) != NULL; i++) - dlg_listbox_add(ctrl, dp, name); - dlg_update_done(ctrl, dp); - } -} - -static void ca_wc_edit_handler(dlgcontrol *ctrl, dlgparam *dp, - void *data, int event) -{ - struct ca_state *st = (struct ca_state *)ctrl->context.p; - if (event == EVENT_REFRESH) { - dlg_editbox_set(ctrl, dp, st->wc); + dlg_editbox_set(ctrl, dp, st->validity); } else if (event == EVENT_VALCHANGE) { - sfree(st->wc); - st->wc = dlg_editbox_get(ctrl, dp); - } -} - -static void ca_wc_add_handler(dlgcontrol *ctrl, dlgparam *dp, - void *data, int event) -{ - struct ca_state *st = (struct ca_state *)ctrl->context.p; - if (event == EVENT_ACTION) { - if (!st->wc) { - dlg_beep(dp); - return; - } - - if (add234(st->host_wcs, st->wc) == st->wc) { - dlg_refresh(st->ca_wclist, dp); - } else { - sfree(st->wc); - } - - st->wc = dupstr(""); - dlg_refresh(st->ca_wc_edit, dp); - } -} - -static void ca_wc_rem_handler(dlgcontrol *ctrl, dlgparam *dp, - void *data, int event) -{ - struct ca_state *st = (struct ca_state *)ctrl->context.p; - if (event == EVENT_ACTION) { - int i = dlg_listbox_index(st->ca_wclist, dp); - if (i < 0) { - dlg_beep(dp); - return; - } - char *wc = delpos234(st->host_wcs, i); - if (!wc) { - dlg_beep(dp); - return; - } - - sfree(st->wc); - st->wc = wc; - dlg_refresh(st->ca_wclist, dp); - dlg_refresh(st->ca_wc_edit, dp); + sfree(st->validity); + st->validity = dlg_editbox_get(ctrl, dp); } } @@ -454,8 +399,7 @@ void setup_ca_config_box(struct controlbox *b) b, sizeof(struct ca_state), ca_state_free); memset(st, 0, sizeof(*st)); st->ca_names = newtree234(ca_name_compare); - st->host_wcs = newtree234(ca_name_compare); - st->wc = dupstr(""); + st->validity = dupstr(""); ca_refresh_name_list(st); /* Initialise the settings to a default blank host_ca */ @@ -520,25 +464,9 @@ void setup_ca_config_box(struct controlbox *b) s = ctrl_getset(b, "Main", "options", "What this CA is trusted to do"); - c = ctrl_listbox(s, "Hostname patterns this key is trusted to certify", - NO_SHORTCUT, HELPCTX(no_help), ca_wclist_handler, P(st)); - c->listbox.height = 3; - st->ca_wclist = c; - - ctrl_columns(s, 3, 70, 15, 15); - c = ctrl_editbox(s, "Hostname pattern to add", 'h', 100, - HELPCTX(no_help), ca_wc_edit_handler, P(st), P(NULL)); - c->column = 0; - st->ca_wc_edit = c; - c = ctrl_pushbutton(s, "Add", NO_SHORTCUT, HELPCTX(no_help), - ca_wc_add_handler, P(st)); - c->align_next_to = st->ca_wc_edit; - c->column = 1; - c = ctrl_pushbutton(s, "Remove", NO_SHORTCUT, HELPCTX(no_help), - ca_wc_rem_handler, P(st)); - c->align_next_to = st->ca_wc_edit; - c->column = 2; - ctrl_columns(s, 1, 100); + c = ctrl_editbox(s, "Valid hosts this key is trusted to certify", 'h', 100, + HELPCTX(no_help), ca_validity_handler, P(st), P(NULL)); + st->ca_validity_edit = c; ctrl_columns(s, 4, 44, 18, 18, 18); c = ctrl_text(s, "Signature types (RSA keys only):", HELPCTX(no_help)); diff --git a/ssh/transport2.c b/ssh/transport2.c index 1d30f240..4383bb98 100644 --- a/ssh/transport2.c +++ b/ssh/transport2.c @@ -690,16 +690,9 @@ static void ssh2_write_kexinit_lists( if (!hca) continue; - bool match = false; - for (size_t i = 0, e = hca->n_hostname_wildcards; - i < e; i++) { - if (wc_match(hca->hostname_wildcards[i], hk_host)) { - match = true; - break; - } - } - - if (match && hca->ca_public_key) { + if (hca->ca_public_key && + cert_expr_match_str(hca->validity_expression, + hk_host, hk_port)) { accept_certs = true; add234(host_cas, hca); } else { diff --git a/storage.h b/storage.h index f581256e..e9138f40 100644 --- a/storage.h +++ b/storage.h @@ -103,8 +103,7 @@ void store_host_key(const char *hostname, int port, struct host_ca { char *name; strbuf *ca_public_key; - char **hostname_wildcards; - size_t n_hostname_wildcards; + char *validity_expression; ca_options opts; }; diff --git a/unix/storage.c b/unix/storage.c index b18c5166..83e1c19c 100644 --- a/unix/storage.c +++ b/unix/storage.c @@ -646,8 +646,8 @@ host_ca *host_ca_load(const char *name) host_ca *hca = host_ca_new(); hca->name = dupstr(name); - size_t wcsize = 0; char *line; + CertExprBuilder *eb = NULL; while ( (line = fgetline(fp)) ) { char *value = strchr(line, '='); @@ -662,10 +662,12 @@ host_ca *host_ca_load(const char *name) if (!strcmp(line, "PublicKey")) { hca->ca_public_key = base64_decode_sb(ptrlen_from_asciz(value)); } else if (!strcmp(line, "MatchHosts")) { - sgrowarray(hca->hostname_wildcards, wcsize, - hca->n_hostname_wildcards); - hca->hostname_wildcards[hca->n_hostname_wildcards++] = - dupstr(value); + if (!eb) + eb = cert_expr_builder_new(); + cert_expr_builder_add(eb, value); + } else if (!strcmp(line, "Validity")) { + hca->validity_expression = strbuf_to_str( + percent_decode_sb(ptrlen_from_asciz(value))); } else if (!strcmp(line, "PermitRSASHA1")) { hca->opts.permit_rsa_sha1 = atoi(value); } else if (!strcmp(line, "PermitRSASHA256")) { @@ -677,6 +679,13 @@ host_ca *host_ca_load(const char *name) sfree(line); } + if (eb) { + if (!hca->validity_expression) { + hca->validity_expression = cert_expr_expression(eb); + } + cert_expr_builder_free(eb); + } + return hca; } @@ -694,8 +703,9 @@ char *host_ca_save(host_ca *hca) base64_encode_fp(fp, ptrlen_from_strbuf(hca->ca_public_key), 0); fprintf(fp, "\n"); - for (size_t i = 0; i < hca->n_hostname_wildcards; i++) - fprintf(fp, "MatchHosts=%s\n", hca->hostname_wildcards[i]); + fprintf(fp, "Validity="); + percent_encode_fp(fp, ptrlen_from_asciz(hca->validity_expression), NULL); + fprintf(fp, "\n"); fprintf(fp, "PermitRSASHA1=%d\n", (int)hca->opts.permit_rsa_sha1); fprintf(fp, "PermitRSASHA256=%d\n", (int)hca->opts.permit_rsa_sha256); diff --git a/utils/CMakeLists.txt b/utils/CMakeLists.txt index 56cc3c1b..4f5479a6 100644 --- a/utils/CMakeLists.txt +++ b/utils/CMakeLists.txt @@ -9,6 +9,7 @@ add_sources_from_current_dir(utils bufchain.c buildinfo.c burnstr.c + cert-expr.c chomp.c cmdline_get_passwd_input_state_new.c conf.c diff --git a/utils/cert-expr.c b/utils/cert-expr.c new file mode 100644 index 00000000..8fe8df7c --- /dev/null +++ b/utils/cert-expr.c @@ -0,0 +1,967 @@ +/* + * Parser for the boolean expression language used to configure what + * host names an OpenSSH certificate will be trusted to sign for. + */ + +/* + +Language specification +====================== + +Outer lexical layer: the input expression is broken up into tokens, +with any whitespace between them discarded and ignored. The following +tokens are special: + + ( ) && || ! + +and the remaining token type is an 'atom', which is any non-empty +sequence of characters from the following set: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ + abcdefghijklmnopqrstuvwxyz + 0123456789 + .-_*?[]/: + +Inner lexical layer: once the boundaries of an 'atom' token have been +determined by the outer lex layer, each atom is further classified +into one of the following subtypes: + + - If it contains no ':' or '/', it's taken to be a wildcard matching + hostnames, e.g. "*.example.com". + + - If it begins with 'port:' followed by digits, it's taken to be a + single port number specification, e.g. "port:22". + + - If it begins with 'port:' followed by two digit sequences separated + by '-', it's taken to be a port number range, e.g. "port:0-1023". + + - Any other atom is reserved for future expansion. (See Rationale.) + +Syntax layer: all of those types of atom are interpreted as predicates +applied to the (hostname, port) data configured for the SSH connection +for which the certificate is being validated. + +Wildcards are handled using the syntax in wildcard.c. + +More complex boolean expressions can be made by combining those +predicates using the boolean operators and parentheses, in the obvious +way: && and || are infix operators representing logical AND and OR, ! +is a prefix operator representing logical NOT, and parentheses +indicate grouping. + +Each of && and || can associate freely with itself (that is, you can +write "a && b && c" without having to parenthesise one or the other +subexpression). But they are forbidden to associate with _each other_. +That is, if you write "a && b || c" or "a || b && c", it's a syntax +error, and you must add parentheses to indicate which operator was +intended to have the higher priority. + +Rationale +========= + +Atoms: restrictions +------------------- + +The characters permitted in the 'atom' token don't include \, even +though it's a special character defined by wildcard.c. That's because +in this restricted context wildcards will never need it: no hostname +contains a literal \, and neither does any hostname contain a literal +instance of any of the wildcard characters that wildcard.c allows you +to use \ to escape. + +Atoms: future extension +----------------------- + +The specification of the 'atom' token is intended to leave space for +more than one kind of future extension. + +Most obviously, additional special predicates similar to "port:", with +different disambiguating prefixes. I don't know what things of that +kind we might need, but space is left for them just in case. + +Also, the unused '/' in the permitted-characters spec is intended to +leave open the possibility of allowing certificate acceptance to be +based on IP address, because the usual CIDR syntax for specifying IP +ranges (e.g. "192.168.1.0/24" or "2345:6789:abcd:ef01::/128") would be +lexed as a single atom under these rules. + +For the moment, certificate acceptance rules based on IP address are +not supported, because it's not clear what the semantics ought to be. +There are two problems with using IP addresses for this purpose: + + 1. Sometimes they come from the DNS, which means you can't trust + them. The whole idea of SSH is to end-to-end authenticate the host + key against only the input given _by the user_ to the client. Any + additional data provided by the network, such as the result of a + DNS lookup, is suspect. + + On the other hand, sometimes the IP address *is* part of the user + input, because the user can provide an IP address rather than a + hostname as the intended connection destination. So there are two + kinds of IP address, and they should very likely be treated + differently. + + 2. Sometimes the server's IP address is not even *known* by the + client, if you're connecting via a proxy and leaving DNS lookups + to the proxy. + +So, what should a boolean expression do if it's asked to accept or +reject based on an IP address, and the IP address is unknown or +untrustworthy? I'm not sure, and therefore, in the initial version of +this expression system, I haven't implemented them at all. + +But the syntax is still available for a future extension to use, if we +come up with good answers to these questions. + +(One possibility would be to evaluate the whole expression in Kleene +three-valued logic, so that every subexpression has the possible +answers TRUE, FALSE and UNKNOWN. If a definite IP address is not +available, IP address predicates evaluate to UNKNOWN. Then, once the +expression as a whole is evaluated, fail closed, by interpreting +UNKNOWN as 'reject'. The effect would be that a positive _or_ negative +constraint on the IP address would cause rejection if the IP address +is not reliably known, because once the predicate itself has returned +UNKNOWN, negating it still gives UNKNOWN. The only way you could still +accept a certificate in that situation would be if the overall +structure of the expression meant that the test of the IP address +couldn't affect the result anyway, e.g. if it was ANDed with another +subexpression that definitely evaluated to FALSE, or ORed with one +that evaluated to TRUE. This system seems conceptually elegant to me, +but the argument against it is that it's complicated and +counterintuitive, which is not a property you want in something a user +is writing for security purposes!) + +Operator precedence +------------------- + +Why did I choose to make && and || refuse to associate with each +other, instead of applying the usual C precedence rule that && beats +||? Because I think the C precedence rule is essentially arbitrary, in +the sense that when people are writing boolean expressions in practice +based on predicates from the rest of their program, it's about equally +common to want to nest an && within an || and vice versa. So the +default precedence rule only gives the user what they actually wanted +about 50% of the time, and leads to absent-minded errors about as +often as it conveniently allows you to omit a pair of parens. + +With my mathematician hat on, it's not so arbitrary. I agree that if +you're *going* to give || and && a relative priority then it makes +more sense to make && the higher-priority one, because if you're +thinking algebraically, && is more multiplicative and || is more +additive. But the pure-maths contexts in which that's convenient have +nothing to do with general boolean expressions in if statements. + +This boolean syntax is still close enough to that of C and its +derivatives to allow easy enough expression interchange (not counting +the fact that atoms would need rewriting). Any boolean expression +structure accepted by this syntax is also legal C and means the same +thing; any expression structure accepted by C is either legal and +equivalent in this syntax, or will fail with an error. In no case is +anything accepted but mapped to a different meaning. + + */ + +#include "putty.h" + +typedef enum Token { + TOK_LPAR, TOK_RPAR, + TOK_AND, TOK_OR, TOK_NOT, + TOK_ATOM, + TOK_END, TOK_ERROR +} Token; + +static inline bool is_space(char c) +{ + return (c == ' ' || c == '\n' || c == '\r' || c == '\t' || + c == '\f' || c == '\v'); +} + +static inline bool is_operator_char(char c) +{ + return (c == '(' || c == ')' || c == '&' || c == '|' || c == '!'); +} + +static inline bool is_atom_char(char c) +{ + return (('A' <= c && c <= 'Z') || + ('a' <= c && c <= 'z') || + ('0' <= c && c <= '9') || + c == '.' || c == '-' || c == '_' || c == '*' || c == '?' || + c == '[' || c == ']' || c == '/' || c == ':'); +} + +static Token lex(ptrlen *text, ptrlen *token, char **err) +{ + const char *p = text->ptr, *e = p + text->len; + Token type = TOK_ERROR; + + /* Skip whitespace */ + while (p < e && is_space(*p)) + p++; + + const char *start = p; + + if (!(p < e)) { + type = TOK_END; + goto out; + } + + if (is_operator_char(*p)) { + /* Match boolean-expression tokens */ + static const struct operator { + ptrlen text; + Token type; + } operators[] = { + {PTRLEN_DECL_LITERAL("("), TOK_LPAR}, + {PTRLEN_DECL_LITERAL(")"), TOK_RPAR}, + {PTRLEN_DECL_LITERAL("&&"), TOK_AND}, + {PTRLEN_DECL_LITERAL("||"), TOK_OR}, + {PTRLEN_DECL_LITERAL("!"), TOK_NOT}, + }; + + for (size_t i = 0; i < lenof(operators); i++) { + const struct operator *op = &operators[i]; + if (e - p >= op->text.len && + ptrlen_eq_ptrlen(op->text, make_ptrlen(p, op->text.len))) { + p += op->text.len; + type = op->type; + goto out; + } + } + + /* + * Report an error if one of the operator characters is used + * in a way that doesn't match something in that table (e.g. a + * single &). + */ + p++; + type = TOK_ERROR; + *err = dupstr("unrecognised boolean operator"); + goto out; + } else if (is_atom_char(*p)) { + /* + * Match an 'atom' token, which is any non-empty sequence of + * characters from the combined set that allows hostname + * wildcards, IP address ranges and special predicates like + * port numbers. + */ + do { + p++; + } while (p < e && is_atom_char(*p)); + + type = TOK_ATOM; + goto out; + } else { + /* + * Otherwise, report an error. + */ + p++; + type = TOK_ERROR; + *err = dupstr("unexpected character in expression"); + goto out; + } + + out: + *token = make_ptrlen(start, p - start); + text->ptr = p; + text->len = e - p; + return type; +} + +typedef enum Operator { + OP_AND, OP_OR, OP_NOT, + OP_HOSTNAME_WC, OP_PORT_RANGE +} Operator; + +typedef struct ExprNode ExprNode; +struct ExprNode { + Operator op; + ptrlen text; + union { + struct { + /* OP_AND, OP_OR */ + ExprNode **subexprs; + size_t nsubexprs; + }; + struct { + /* OP_NOT */ + ExprNode *subexpr; + }; + struct { + /* OP_HOSTNAME_WC */ + char *wc; + }; + struct { + /* OP_PORT_RANGE */ + unsigned lo, hi; /* both inclusive */ + }; + }; +}; + +static ExprNode *exprnode_new(Operator op, ptrlen text) +{ + ExprNode *en = snew(ExprNode); + memset(en, 0, sizeof(*en)); + en->op = op; + en->text = text; + return en; +} + +static void exprnode_free(ExprNode *en) +{ + switch (en->op) { + case OP_AND: + case OP_OR: + for (size_t i = 0; i < en->nsubexprs; i++) + exprnode_free(en->subexprs[i]); + sfree(en->subexprs); + break; + case OP_NOT: + exprnode_free(en->subexpr); + break; + case OP_HOSTNAME_WC: + sfree(en->wc); + break; + case OP_PORT_RANGE: + break; + default: + unreachable("unhandled node type in exprnode_free"); + } + + sfree(en); +} + +static unsigned ptrlen_to_port_number(ptrlen input) +{ + unsigned val = 0; + for (const char *p = input.ptr, *end = p + input.len; p < end; p++) { + assert('0' <= *p && *p <= '9'); /* expect parser to have checked */ + val = 10 * val + (*p - '0'); + if (val >= 65536) + val = 65536; /* normalise 'too large' to avoid integer overflow */ + } + return val; +} + +typedef struct ParserState ParserState; +struct ParserState { + ptrlen currtext; + Token tok; + ptrlen toktext, lasttoktext; + char *err; + ptrlen errloc; +}; + +static void error(ParserState *ps, char *errtext, ptrlen errloc) +{ + if (!ps->err) { + ps->err = errtext; + ps->errloc = errloc; + } else { + sfree(errtext); + } +} + +static void advance(ParserState *ps) +{ + char *err = NULL; + ps->lasttoktext = ps->toktext; + ps->tok = lex(&ps->currtext, &ps->toktext, &err); + if (ps->tok == TOK_ERROR) + error(ps, err, ps->toktext); +} + +static ExprNode *parse_atom(ParserState *ps); +static ExprNode *parse_expr(ParserState *ps); + +static bool atom_is_hostname_wc(ptrlen toktext) +{ + return !ptrlen_contains(toktext, ":/"); +} + +static ExprNode *parse_atom(ParserState *ps) +{ + if (ps->tok == TOK_LPAR) { + ptrlen openpar = ps->toktext; + advance(ps); /* eat the ( */ + + ExprNode *subexpr = parse_expr(ps); + if (!subexpr) + return NULL; + + if (ps->tok != TOK_RPAR) { + error(ps, dupstr("expected ')' after parenthesised subexpression"), + subexpr->text); + exprnode_free(subexpr); + return NULL; + } + + ptrlen closepar = ps->toktext; + advance(ps); /* eat the ) */ + + /* We can reuse the existing AST node, but we need to extend + * its bounds within the input expression to include the + * parentheses */ + subexpr->text = make_ptrlen_startend( + openpar.ptr, ptrlen_end(closepar)); + return subexpr; + } + + if (ps->tok == TOK_NOT) { + ptrlen notloc = ps->toktext; + advance(ps); /* eat the ! */ + + ExprNode *subexpr = parse_atom(ps); + if (!subexpr) + return NULL; + + ExprNode *en = exprnode_new( + OP_NOT, make_ptrlen_startend( + notloc.ptr, ptrlen_end(subexpr->text))); + en->subexpr = subexpr; + return en; + } + + if (ps->tok == TOK_ATOM) { + if (atom_is_hostname_wc(ps->toktext)) { + /* Hostname wildcard. */ + ExprNode *en = exprnode_new(OP_HOSTNAME_WC, ps->toktext); + en->wc = mkstr(ps->toktext); + advance(ps); + return en; + } + + ptrlen tail; + if (ptrlen_startswith(ps->toktext, PTRLEN_LITERAL("port:"), &tail)) { + /* Port number (single or range). */ + unsigned lo, hi; + char *minus; + static const char DIGITS[] = "0123456789\0"; + bool parse_ok = false; + + if (tail.len > 0 && ptrlen_contains_only(tail, DIGITS)) { + lo = ptrlen_to_port_number(tail); + if (lo >= 65536) { + error(ps, dupstr("port number too large"), tail); + return NULL; + } + hi = lo; + parse_ok = true; + } else if ((minus = memchr(tail.ptr, '-', tail.len)) != NULL) { + ptrlen pl_lo = make_ptrlen_startend(tail.ptr, minus); + ptrlen pl_hi = make_ptrlen_startend(minus+1, ptrlen_end(tail)); + if (pl_lo.len > 0 && ptrlen_contains_only(pl_lo, DIGITS) && + pl_hi.len > 0 && ptrlen_contains_only(pl_hi, DIGITS)) { + + lo = ptrlen_to_port_number(pl_lo); + if (lo >= 65536) { + error(ps, dupstr("port number too large"), pl_lo); + return NULL; + } + + hi = ptrlen_to_port_number(pl_hi); + if (hi >= 65536) { + error(ps, dupstr("port number too large"), pl_hi); + return NULL; + } + + if (hi < lo) { + error(ps, dupstr("port number range is backwards"), + make_ptrlen_startend(pl_lo.ptr, + ptrlen_end(pl_hi))); + return NULL; + } + + parse_ok = true; + } + } + + if (!parse_ok) { + error(ps, dupstr("unable to parse port number specification"), + ps->toktext); + return NULL; + } + + + ExprNode *en = exprnode_new(OP_PORT_RANGE, ps->toktext); + en->lo = lo; + en->hi = hi; + advance(ps); + return en; + } + } + + error(ps, dupstr("expected a predicate or a parenthesised subexpression"), + ps->toktext); + return NULL; +} + +static ExprNode *parse_expr(ParserState *ps) +{ + ExprNode *subexpr = parse_atom(ps); + if (!subexpr) + return NULL; + + if (ps->tok != TOK_AND && ps->tok != TOK_OR) + return subexpr; + + Token operator = ps->tok; + ExprNode *en = exprnode_new(ps->tok == TOK_AND ? OP_AND : OP_OR, + subexpr->text); + size_t subexprs_size = 0; + + sgrowarray(en->subexprs, subexprs_size, en->nsubexprs); + en->subexprs[en->nsubexprs++] = subexpr; + + while (true) { + advance(ps); /* eat the operator */ + + subexpr = parse_atom(ps); + if (!subexpr) { + exprnode_free(en); + return NULL; + } + sgrowarray(en->subexprs, subexprs_size, en->nsubexprs); + en->subexprs[en->nsubexprs++] = subexpr; + en->text = make_ptrlen_startend( + en->text.ptr, ptrlen_end(subexpr->text)); + + if (ps->tok != TOK_AND && ps->tok != TOK_OR) + return en; + + if (ps->tok != operator) { + error(ps, dupstr("expected parentheses to disambiguate && and || " + "on either side of expression"), subexpr->text); + exprnode_free(en); + return NULL; + } + } +} + +static ExprNode *parse(ptrlen expr, char **error_msg, ptrlen *error_loc) +{ + ParserState ps[1]; + ps->currtext = expr; + ps->lasttoktext = make_ptrlen(ps->currtext.ptr, 0); + ps->err = NULL; + advance(ps); + + ExprNode *en = parse_expr(ps); + if (en && ps->tok != TOK_END) { + error(ps, dupstr("unexpected text at end of expression"), + make_ptrlen_startend(ps->toktext.ptr, ptrlen_end(expr))); + exprnode_free(en); + en = NULL; + } + + if (!en) { + if (error_msg) + *error_msg = ps->err; + else + sfree(ps->err); + if (error_loc) + *error_loc = ps->errloc; + return NULL; + } + + return en; +} + +static bool eval(ExprNode *en, const char *hostname, unsigned port) +{ + switch (en->op) { + case OP_AND: + for (size_t i = 0; i < en->nsubexprs; i++) + if (!eval(en->subexprs[i], hostname, port)) + return false; + return true; + + case OP_OR: + for (size_t i = 0; i < en->nsubexprs; i++) + if (eval(en->subexprs[i], hostname, port)) + return true; + return false; + + case OP_NOT: + return !eval(en->subexpr, hostname, port); + + case OP_HOSTNAME_WC: + return wc_match(en->wc, hostname); + + case OP_PORT_RANGE: + return en->lo <= port && port <= en->hi; + + default: + unreachable("unhandled node type in eval"); + } +} + +bool cert_expr_match_str(const char *expression, + const char *hostname, unsigned port) +{ + ExprNode *en = parse(ptrlen_from_asciz(expression), NULL, NULL); + if (!en) + return false; + + bool matched = eval(en, hostname, port); + exprnode_free(en); + return matched; +} + +bool cert_expr_valid(const char *expression, + char **error_msg, ptrlen *error_loc) +{ + ExprNode *en = parse(ptrlen_from_asciz(expression), error_msg, error_loc); + if (en) { + exprnode_free(en); + return true; + } else { + return false; + } +} + +struct CertExprBuilder { + char **wcs; + size_t nwcs, wcsize; +}; + +CertExprBuilder *cert_expr_builder_new(void) +{ + CertExprBuilder *eb = snew(CertExprBuilder); + eb->wcs = NULL; + eb->nwcs = eb->wcsize = 0; + return eb; +} + +void cert_expr_builder_free(CertExprBuilder *eb) +{ + for (size_t i = 0; i < eb->nwcs; i++) + sfree(eb->wcs[i]); + sfree(eb->wcs); + sfree(eb); +} + +void cert_expr_builder_add(CertExprBuilder *eb, const char *wildcard) +{ + /* Check this wildcard is lexically valid as an atom */ + ptrlen orig = ptrlen_from_asciz(wildcard), pl = orig; + ptrlen toktext; + char *err; + Token tok = lex(&pl, &toktext, &err); + if (!(tok == TOK_ATOM && + toktext.ptr == orig.ptr && + toktext.len == orig.len && + atom_is_hostname_wc(toktext))) { + if (tok == TOK_ERROR) + sfree(err); + return; + } + + sgrowarray(eb->wcs, eb->wcsize, eb->nwcs); + eb->wcs[eb->nwcs++] = mkstr(orig); +} + +char *cert_expr_expression(CertExprBuilder *eb) +{ + strbuf *sb = strbuf_new(); + for (size_t i = 0; i < eb->nwcs; i++) { + if (i) + put_dataz(sb, " || "); + put_dataz(sb, eb->wcs[i]); + } + return strbuf_to_str(sb); +} + +#ifdef TEST + +void out_of_memory(void) { fprintf(stderr, "out of memory\n"); abort(); } + +static void exprnode_dump(BinarySink *bs, ExprNode *en, const char *origtext) +{ + put_fmt(bs, "(%zu:%zu ", + (size_t)((const char *)en->text.ptr - origtext), + (size_t)((const char *)ptrlen_end(en->text) - origtext)); + switch (en->op) { + case OP_AND: + case OP_OR: + put_dataz(bs, en->op == OP_AND ? "and" : "or"); + for (size_t i = 0; i < en->nsubexprs; i++) { + put_byte(bs, ' '); + exprnode_dump(bs, en->subexprs[i], origtext); + } + break; + case OP_NOT: + put_dataz(bs, "not "); + exprnode_dump(bs, en->subexpr, origtext); + break; + case OP_HOSTNAME_WC: + put_dataz(bs, "host-wc '"); + put_dataz(bs, en->wc); + put_byte(bs, '\''); + break; + case OP_PORT_RANGE: + put_fmt(bs, "port-range %u %u", en->lo, en->hi); + break; + default: + unreachable("unhandled node type in exprnode_dump"); + } + put_byte(bs, ')'); +} + +static const struct ParseTest { + const char *file; + int line; + const char *expr, *output; +} parsetests[] = { +#define T(expr_, output_) { \ + .file=__FILE__, .line=__LINE__, .expr=expr_, .output=output_} + + T("*.example.com", "(0:13 host-wc '*.example.com')"), + T("port:0", "(0:6 port-range 0 0)"), + T("port:22", "(0:7 port-range 22 22)"), + T("port:22-22", "(0:10 port-range 22 22)"), + T("port:65535", "(0:10 port-range 65535 65535)"), + T("port:0-1023", "(0:11 port-range 0 1023)"), + + T("&", "ERR:0:1:unrecognised boolean operator"), + T("|", "ERR:0:1:unrecognised boolean operator"), + T(";", "ERR:0:1:unexpected character in expression"), + T("port:", "ERR:0:5:unable to parse port number specification"), + T("port:abc", "ERR:0:8:unable to parse port number specification"), + T("port:65536", "ERR:5:10:port number too large"), + T("port:65536-65537", "ERR:5:10:port number too large"), + T("port:0-65536", "ERR:7:12:port number too large"), + T("port:23-22", "ERR:5:10:port number range is backwards"), + + T("a", "(0:1 host-wc 'a')"), + T("(a)", "(0:3 host-wc 'a')"), + T("((a))", "(0:5 host-wc 'a')"), + T(" (\n(\ra\t)\f)\v", "(1:10 host-wc 'a')"), + T("a&&b", "(0:4 and (0:1 host-wc 'a') (3:4 host-wc 'b'))"), + T("a||b", "(0:4 or (0:1 host-wc 'a') (3:4 host-wc 'b'))"), + T("a&&b&&c", "(0:7 and (0:1 host-wc 'a') (3:4 host-wc 'b') (6:7 host-wc 'c'))"), + T("a||b||c", "(0:7 or (0:1 host-wc 'a') (3:4 host-wc 'b') (6:7 host-wc 'c'))"), + T("a&&(b||c)", "(0:9 and (0:1 host-wc 'a') (3:9 or (4:5 host-wc 'b') (7:8 host-wc 'c')))"), + T("a||(b&&c)", "(0:9 or (0:1 host-wc 'a') (3:9 and (4:5 host-wc 'b') (7:8 host-wc 'c')))"), + T("(a&&b)||c", "(0:9 or (0:6 and (1:2 host-wc 'a') (4:5 host-wc 'b')) (8:9 host-wc 'c'))"), + T("(a||b)&&c", "(0:9 and (0:6 or (1:2 host-wc 'a') (4:5 host-wc 'b')) (8:9 host-wc 'c'))"), + T("!a&&b", "(0:5 and (0:2 not (1:2 host-wc 'a')) (4:5 host-wc 'b'))"), + T("a&&!b&&c", "(0:8 and (0:1 host-wc 'a') (3:5 not (4:5 host-wc 'b')) (7:8 host-wc 'c'))"), + T("!a||b", "(0:5 or (0:2 not (1:2 host-wc 'a')) (4:5 host-wc 'b'))"), + T("a||!b||c", "(0:8 or (0:1 host-wc 'a') (3:5 not (4:5 host-wc 'b')) (7:8 host-wc 'c'))"), + + T("", "ERR:0:0:expected a predicate or a parenthesised subexpression"), + T("a &&", "ERR:4:4:expected a predicate or a parenthesised subexpression"), + T("a ||", "ERR:4:4:expected a predicate or a parenthesised subexpression"), + T("a b c d", "ERR:2:7:unexpected text at end of expression"), + T("(", "ERR:1:1:expected a predicate or a parenthesised subexpression"), + T("(a", "ERR:1:2:expected ')' after parenthesised subexpression"), + T("(a b", "ERR:1:2:expected ')' after parenthesised subexpression"), + T("a&&b&&c||d||e", "ERR:6:7:expected parentheses to disambiguate && and || on either side of expression"), + T("a||b||c&&d&&e", "ERR:6:7:expected parentheses to disambiguate && and || on either side of expression"), + T("!", "ERR:1:1:expected a predicate or a parenthesised subexpression"), + + T("!a", "(0:2 not (1:2 host-wc 'a'))"), + +#undef T +}; + +static const struct EvalTest { + const char *file; + int line; + const char *expr; + const char *host; + unsigned port; + bool output; +} evaltests[] = { +#define T(expr_, host_, port_, output_) { \ + .file=__FILE__, .line=__LINE__, \ + .expr=expr_, .host=host_, .port=port_, .output=output_} + + T("*.example.com", "hostname.example.com", 22, true), + T("*.example.com", "hostname.example.org", 22, false), + T("*.example.com && port:22", "hostname.example.com", 21, false), + T("*.example.com && port:22", "hostname.example.com", 22, true), + T("*.example.com && port:22", "hostname.example.com", 23, false), + T("*.example.com && port:22-24", "hostname.example.com", 21, false), + T("*.example.com && port:22-24", "hostname.example.com", 22, true), + T("*.example.com && port:22-24", "hostname.example.com", 23, true), + T("*.example.com && port:22-24", "hostname.example.com", 24, true), + T("*.example.com && port:22-24", "hostname.example.com", 25, false), + + T("*a* && *b* && *c*", "", 22, false), + T("*a* && *b* && *c*", "a", 22, false), + T("*a* && *b* && *c*", "b", 22, false), + T("*a* && *b* && *c*", "c", 22, false), + T("*a* && *b* && *c*", "ab", 22, false), + T("*a* && *b* && *c*", "ac", 22, false), + T("*a* && *b* && *c*", "bc", 22, false), + T("*a* && *b* && *c*", "abc", 22, true), + + T("*a* || *b* || *c*", "", 22, false), + T("*a* || *b* || *c*", "a", 22, true), + T("*a* || *b* || *c*", "b", 22, true), + T("*a* || *b* || *c*", "c", 22, true), + T("*a* || *b* || *c*", "ab", 22, true), + T("*a* || *b* || *c*", "ac", 22, true), + T("*a* || *b* || *c*", "bc", 22, true), + T("*a* || *b* || *c*", "abc", 22, true), + + T("*a* && !*b* && *c*", "", 22, false), + T("*a* && !*b* && *c*", "a", 22, false), + T("*a* && !*b* && *c*", "b", 22, false), + T("*a* && !*b* && *c*", "c", 22, false), + T("*a* && !*b* && *c*", "ab", 22, false), + T("*a* && !*b* && *c*", "ac", 22, true), + T("*a* && !*b* && *c*", "bc", 22, false), + T("*a* && !*b* && *c*", "abc", 22, false), + + T("*a* || !*b* || *c*", "", 22, true), + T("*a* || !*b* || *c*", "a", 22, true), + T("*a* || !*b* || *c*", "b", 22, false), + T("*a* || !*b* || *c*", "c", 22, true), + T("*a* || !*b* || *c*", "ab", 22, true), + T("*a* || !*b* || *c*", "ac", 22, true), + T("*a* || !*b* || *c*", "bc", 22, true), + T("*a* || !*b* || *c*", "abc", 22, true), + +#undef T +}; + +int main(int argc, char **argv) +{ + if (argc > 1) { + /* + * Parse an expression from the command line. + */ + + ptrlen expr = ptrlen_from_asciz(argv[1]); + char *error_msg; + ptrlen error_loc; + ExprNode *en = parse(expr, &error_msg, &error_loc); + if (!en) { + fprintf(stderr, "ERR:%zu:%zu:%s\n", + (size_t)((const char *)error_loc.ptr - argv[1]), + (size_t)((const char *)ptrlen_end(error_loc) - argv[1]), + error_msg); + fprintf(stderr, "%.*s\n", PTRLEN_PRINTF(expr)); + for (const char *p = expr.ptr, *e = error_loc.ptr; p 2) { + /* + * Test-evaluate against a host/port pair given on the + * command line. + */ + const char *host = argv[2]; + unsigned port = (argc > 3 ? strtoul(argv[3], NULL, 0) : 22); + bool result = eval(en, host, port); + printf("%s\n", result ? "accept" : "reject"); + } else { + /* + * Just dump the result of parsing the expression. + */ + stdio_sink ss[1]; + stdio_sink_init(ss, stdout); + exprnode_dump(BinarySink_UPCAST(ss), en, expr.ptr); + put_byte(ss, '\n'); + } + + exprnode_free(en); + + return 0; + } else { + /* + * Run our automated tests. + */ + size_t pass = 0, fail = 0; + + for (size_t i = 0; i < lenof(parsetests); i++) { + const struct ParseTest *test = &parsetests[i]; + + ptrlen expr = ptrlen_from_asciz(test->expr); + char *error_msg; + ptrlen error_loc; + ExprNode *en = parse(expr, &error_msg, &error_loc); + + strbuf *output = strbuf_new(); + if (!en) { + put_fmt(output, "ERR:%zu:%zu:%s", + (size_t)((const char *)error_loc.ptr - test->expr), + (size_t)((const char *)ptrlen_end(error_loc) - + test->expr), + error_msg); + sfree(error_msg); + } else { + exprnode_dump(BinarySink_UPCAST(output), en, expr.ptr); + exprnode_free(en); + } + + if (ptrlen_eq_ptrlen(ptrlen_from_strbuf(output), + ptrlen_from_asciz(test->output))) { + pass++; + } else { + fprintf(stderr, "FAIL: parsetests[%zu] @ %s:%d:\n" + " expression: %s\n" + " expected: %s\n" + " actual: %s\n", + i, test->file, test->line, test->expr, + test->output, output->s); + fail++; + } + + strbuf_free(output); + } + + for (size_t i = 0; i < lenof(evaltests); i++) { + const struct EvalTest *test = &evaltests[i]; + + ptrlen expr = ptrlen_from_asciz(test->expr); + char *error_msg; + ptrlen error_loc; + ExprNode *en = parse(expr, &error_msg, &error_loc); + + if (!en) { + fprintf(stderr, "FAIL: evaltests[%zu] @ %s:%d:\n" + " expression: %s\n" + " parse error: %zu:%zu:%s\n", + i, test->file, test->line, test->expr, + (size_t)((const char *)error_loc.ptr - test->expr), + (size_t)((const char *)ptrlen_end(error_loc) - + test->expr), + error_msg); + sfree(error_msg); + } else { + bool output = eval(en, test->host, test->port); + if (output == test->output) { + pass++; + } else { + fprintf(stderr, "FAIL: evaltests[%zu] @ %s:%d:\n" + " expression: %s\n" + " host: %s\n" + " port: %u\n" + " expected: %s\n" + " actual: %s\n", + i, test->file, test->line, test->expr, + test->host, test->port, + test->output ? "accept" : "reject", + output ? "accept" : "reject"); + fail++; + } + exprnode_free(en); + } + } + + fprintf(stderr, "pass %zu fail %zu total %zu\n", + pass, fail, pass+fail); + return fail != 0; + } +} + +#endif // TEST diff --git a/utils/host_ca_new_free.c b/utils/host_ca_new_free.c index 8ae158ba..4c91c320 100644 --- a/utils/host_ca_new_free.c +++ b/utils/host_ca_new_free.c @@ -15,10 +15,8 @@ host_ca *host_ca_new(void) void host_ca_free(host_ca *hca) { sfree(hca->name); + sfree(hca->validity_expression); if (hca->ca_public_key) strbuf_free(hca->ca_public_key); - for (size_t i = 0; i < hca->n_hostname_wildcards; i++) - sfree(hca->hostname_wildcards[i]); - sfree(hca->hostname_wildcards); sfree(hca); } diff --git a/windows/storage.c b/windows/storage.c index 6a48aee6..30147a45 100644 --- a/windows/storage.c +++ b/windows/storage.c @@ -432,19 +432,20 @@ host_ca *host_ca_load(const char *name) if ((s = get_reg_sz(rkey, "PublicKey")) != NULL) hca->ca_public_key = base64_decode_sb(ptrlen_from_asciz(s)); - if ((sb = get_reg_multi_sz(rkey, "MatchHosts")) != NULL) { + if ((s = get_reg_sz(rkey, "Validity")) != NULL) { + hca->validity_expression = strbuf_to_str( + percent_decode_sb(ptrlen_from_asciz(s))); + } else if ((sb = get_reg_multi_sz(rkey, "MatchHosts")) != NULL) { BinarySource src[1]; BinarySource_BARE_INIT_PL(src, ptrlen_from_strbuf(sb)); + CertExprBuilder *eb = cert_expr_builder_new(); const char *wc; - size_t wcsize = 0; - while (wc = get_asciz(src), !get_err(src)) { - sgrowarray(hca->hostname_wildcards, wcsize, - hca->n_hostname_wildcards); - hca->hostname_wildcards[hca->n_hostname_wildcards++] = dupstr(wc); - } + while (wc = get_asciz(src), !get_err(src)) + cert_expr_builder_add(eb, wc); - strbuf_free(sb); + hca->validity_expression = cert_expr_expression(eb); + cert_expr_builder_free(eb); } if (get_reg_dword(rkey, "PermitRSASHA1", &val)) @@ -479,11 +480,10 @@ char *host_ca_save(host_ca *hca) put_reg_sz(rkey, "PublicKey", base64_pubkey->s); strbuf_free(base64_pubkey); - strbuf *wcs = strbuf_new(); - for (size_t i = 0; i < hca->n_hostname_wildcards; i++) - put_asciz(wcs, hca->hostname_wildcards[i]); - put_reg_multi_sz(rkey, "MatchHosts", wcs); - strbuf_free(wcs); + strbuf *validity = percent_encode_sb( + ptrlen_from_asciz(hca->validity_expression), NULL); + put_reg_sz(rkey, "Validity", validity->s); + strbuf_free(validity); put_reg_dword(rkey, "PermitRSASHA1", hca->opts.permit_rsa_sha1); put_reg_dword(rkey, "PermitRSASHA256", hca->opts.permit_rsa_sha256);