2000-11-01 21:34:21 +00:00
|
|
|
/*
|
|
|
|
* Zlib (RFC1950 / RFC1951) compression for PuTTY.
|
|
|
|
*
|
|
|
|
* There will no doubt be criticism of my decision to reimplement
|
|
|
|
* Zlib compression from scratch instead of using the existing zlib
|
|
|
|
* code. People will cry `reinventing the wheel'; they'll claim
|
|
|
|
* that the `fundamental basis of OSS' is code reuse; they'll want
|
|
|
|
* to see a really good reason for me having chosen not to use the
|
|
|
|
* existing code.
|
|
|
|
*
|
|
|
|
* Well, here are my reasons. Firstly, I don't want to link the
|
|
|
|
* whole of zlib into the PuTTY binary; PuTTY is justifiably proud
|
|
|
|
* of its small size and I think zlib contains a lot of unnecessary
|
|
|
|
* baggage for the kind of compression that SSH requires.
|
|
|
|
*
|
|
|
|
* Secondly, I also don't like the alternative of using zlib.dll.
|
|
|
|
* Another thing PuTTY is justifiably proud of is its ease of
|
|
|
|
* installation, and the last thing I want to do is to start
|
|
|
|
* mandating DLLs. Not only that, but there are two _kinds_ of
|
|
|
|
* zlib.dll kicking around, one with C calling conventions on the
|
|
|
|
* exported functions and another with WINAPI conventions, and
|
|
|
|
* there would be a significant danger of getting the wrong one.
|
|
|
|
*
|
|
|
|
* Thirdly, there seems to be a difference of opinion on the IETF
|
|
|
|
* secsh mailing list about the correct way to round off a
|
|
|
|
* compressed packet and start the next. In particular, there's
|
|
|
|
* some talk of switching to a mechanism zlib isn't currently
|
|
|
|
* capable of supporting (see below for an explanation). Given that
|
|
|
|
* sort of uncertainty, I thought it might be better to have code
|
|
|
|
* that will support even the zlib-incompatible worst case.
|
|
|
|
*
|
|
|
|
* Fourthly, it's a _second implementation_. Second implementations
|
|
|
|
* are fundamentally a Good Thing in standardisation efforts. The
|
|
|
|
* difference of opinion mentioned above has arisen _precisely_
|
|
|
|
* because there has been only one zlib implementation and
|
|
|
|
* everybody has used it. I don't intend that this should happen
|
|
|
|
* again.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <stdlib.h>
|
2012-06-01 19:43:05 +00:00
|
|
|
#include <string.h>
|
2000-11-01 21:34:21 +00:00
|
|
|
#include <assert.h>
|
|
|
|
|
2018-05-26 06:19:18 +00:00
|
|
|
#include "defs.h"
|
|
|
|
#include "ssh.h"
|
2001-09-07 22:49:17 +00:00
|
|
|
|
2000-11-01 21:34:21 +00:00
|
|
|
/* ----------------------------------------------------------------------
|
|
|
|
* Basic LZ77 code. This bit is designed modularly, so it could be
|
|
|
|
* ripped out and used in a different LZ77 compressor. Go to it,
|
|
|
|
* and good luck :-)
|
|
|
|
*/
|
|
|
|
|
|
|
|
struct LZ77InternalContext;
|
|
|
|
struct LZ77Context {
|
|
|
|
struct LZ77InternalContext *ictx;
|
|
|
|
void *userdata;
|
2001-05-06 14:35:20 +00:00
|
|
|
void (*literal) (struct LZ77Context * ctx, unsigned char c);
|
|
|
|
void (*match) (struct LZ77Context * ctx, int distance, int len);
|
2000-11-01 21:34:21 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Initialise the private fields of an LZ77Context. It's up to the
|
|
|
|
* user to initialise the public fields.
|
|
|
|
*/
|
|
|
|
static int lz77_init(struct LZ77Context *ctx);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Supply data to be compressed. Will update the private fields of
|
|
|
|
* the LZ77Context, and will call literal() and match() to output.
|
2018-10-29 19:50:29 +00:00
|
|
|
* If `compress' is false, it will never emit a match, but will
|
2001-03-05 16:38:42 +00:00
|
|
|
* instead call literal() for everything.
|
2000-11-01 21:34:21 +00:00
|
|
|
*/
|
|
|
|
static void lz77_compress(struct LZ77Context *ctx,
|
2018-11-27 19:23:15 +00:00
|
|
|
const unsigned char *data, int len);
|
2000-11-01 21:34:21 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Modifiable parameters.
|
|
|
|
*/
|
|
|
|
#define WINSIZE 32768 /* window size. Must be power of 2! */
|
|
|
|
#define HASHMAX 2039 /* one more than max hash value */
|
|
|
|
#define MAXMATCH 32 /* how many matches we track */
|
|
|
|
#define HASHCHARS 3 /* how many chars make a hash */
|
|
|
|
|
|
|
|
/*
|
|
|
|
* This compressor takes a less slapdash approach than the
|
|
|
|
* gzip/zlib one. Rather than allowing our hash chains to fall into
|
|
|
|
* disuse near the far end, we keep them doubly linked so we can
|
|
|
|
* _find_ the far end, and then every time we add a new byte to the
|
|
|
|
* window (thus rolling round by one and removing the previous
|
|
|
|
* byte), we can carefully remove the hash chain entry.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#define INVALID -1 /* invalid hash _and_ invalid offset */
|
|
|
|
struct WindowEntry {
|
2001-04-16 17:18:24 +00:00
|
|
|
short next, prev; /* array indices within the window */
|
|
|
|
short hashval;
|
2000-11-01 21:34:21 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
struct HashEntry {
|
2001-04-16 17:18:24 +00:00
|
|
|
short first; /* window index of first in chain */
|
2000-11-01 21:34:21 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
struct Match {
|
|
|
|
int distance, len;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct LZ77InternalContext {
|
|
|
|
struct WindowEntry win[WINSIZE];
|
|
|
|
unsigned char data[WINSIZE];
|
|
|
|
int winpos;
|
|
|
|
struct HashEntry hashtab[HASHMAX];
|
|
|
|
unsigned char pending[HASHCHARS];
|
|
|
|
int npending;
|
|
|
|
};
|
|
|
|
|
2018-11-27 19:22:02 +00:00
|
|
|
static int lz77_hash(const unsigned char *data)
|
2001-05-06 14:35:20 +00:00
|
|
|
{
|
|
|
|
return (257 * data[0] + 263 * data[1] + 269 * data[2]) % HASHMAX;
|
2000-11-01 21:34:21 +00:00
|
|
|
}
|
|
|
|
|
2001-05-06 14:35:20 +00:00
|
|
|
static int lz77_init(struct LZ77Context *ctx)
|
|
|
|
{
|
2000-11-01 21:34:21 +00:00
|
|
|
struct LZ77InternalContext *st;
|
|
|
|
int i;
|
|
|
|
|
2003-03-29 16:14:26 +00:00
|
|
|
st = snew(struct LZ77InternalContext);
|
2000-11-01 21:34:21 +00:00
|
|
|
if (!st)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
ctx->ictx = st;
|
|
|
|
|
|
|
|
for (i = 0; i < WINSIZE; i++)
|
|
|
|
st->win[i].next = st->win[i].prev = st->win[i].hashval = INVALID;
|
|
|
|
for (i = 0; i < HASHMAX; i++)
|
|
|
|
st->hashtab[i].first = INVALID;
|
|
|
|
st->winpos = 0;
|
|
|
|
|
|
|
|
st->npending = 0;
|
|
|
|
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void lz77_advance(struct LZ77InternalContext *st,
|
2001-05-06 14:35:20 +00:00
|
|
|
unsigned char c, int hash)
|
|
|
|
{
|
2000-11-01 21:34:21 +00:00
|
|
|
int off;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Remove the hash entry at winpos from the tail of its chain,
|
|
|
|
* or empty the chain if it's the only thing on the chain.
|
|
|
|
*/
|
|
|
|
if (st->win[st->winpos].prev != INVALID) {
|
|
|
|
st->win[st->win[st->winpos].prev].next = INVALID;
|
|
|
|
} else if (st->win[st->winpos].hashval != INVALID) {
|
|
|
|
st->hashtab[st->win[st->winpos].hashval].first = INVALID;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Create a new entry at winpos and add it to the head of its
|
|
|
|
* hash chain.
|
|
|
|
*/
|
|
|
|
st->win[st->winpos].hashval = hash;
|
|
|
|
st->win[st->winpos].prev = INVALID;
|
|
|
|
off = st->win[st->winpos].next = st->hashtab[hash].first;
|
|
|
|
st->hashtab[hash].first = st->winpos;
|
|
|
|
if (off != INVALID)
|
|
|
|
st->win[off].prev = st->winpos;
|
|
|
|
st->data[st->winpos] = c;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Advance the window pointer.
|
|
|
|
*/
|
2001-05-06 14:35:20 +00:00
|
|
|
st->winpos = (st->winpos + 1) & (WINSIZE - 1);
|
2000-11-01 21:34:21 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
#define CHARAT(k) ( (k)<0 ? st->data[(st->winpos+k)&(WINSIZE-1)] : data[k] )
|
|
|
|
|
|
|
|
static void lz77_compress(struct LZ77Context *ctx,
|
2018-11-27 19:23:15 +00:00
|
|
|
const unsigned char *data, int len)
|
2001-05-06 14:35:20 +00:00
|
|
|
{
|
2000-11-01 21:34:21 +00:00
|
|
|
struct LZ77InternalContext *st = ctx->ictx;
|
2017-02-14 22:13:24 +00:00
|
|
|
int i, distance, off, nmatch, matchlen, advance;
|
2000-11-01 21:34:21 +00:00
|
|
|
struct Match defermatch, matches[MAXMATCH];
|
|
|
|
int deferchr;
|
|
|
|
|
2014-02-22 18:02:14 +00:00
|
|
|
assert(st->npending <= HASHCHARS);
|
|
|
|
|
2000-11-01 21:34:21 +00:00
|
|
|
/*
|
|
|
|
* Add any pending characters from last time to the window. (We
|
|
|
|
* might not be able to.)
|
2014-02-22 18:02:14 +00:00
|
|
|
*
|
|
|
|
* This leaves st->pending empty in the usual case (when len >=
|
|
|
|
* HASHCHARS); otherwise it leaves st->pending empty enough that
|
|
|
|
* adding all the remaining 'len' characters will not push it past
|
|
|
|
* HASHCHARS in size.
|
2000-11-01 21:34:21 +00:00
|
|
|
*/
|
|
|
|
for (i = 0; i < st->npending; i++) {
|
|
|
|
unsigned char foo[HASHCHARS];
|
|
|
|
int j;
|
|
|
|
if (len + st->npending - i < HASHCHARS) {
|
|
|
|
/* Update the pending array. */
|
|
|
|
for (j = i; j < st->npending; j++)
|
2001-05-06 14:35:20 +00:00
|
|
|
st->pending[j - i] = st->pending[j];
|
2000-11-01 21:34:21 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
for (j = 0; j < HASHCHARS; j++)
|
2001-05-06 14:35:20 +00:00
|
|
|
foo[j] = (i + j < st->npending ? st->pending[i + j] :
|
2000-11-01 21:34:21 +00:00
|
|
|
data[i + j - st->npending]);
|
|
|
|
lz77_advance(st, foo[0], lz77_hash(foo));
|
|
|
|
}
|
|
|
|
st->npending -= i;
|
|
|
|
|
2006-12-30 23:00:14 +00:00
|
|
|
defermatch.distance = 0; /* appease compiler */
|
2000-11-01 21:34:21 +00:00
|
|
|
defermatch.len = 0;
|
2001-05-13 14:02:28 +00:00
|
|
|
deferchr = '\0';
|
2000-11-01 21:34:21 +00:00
|
|
|
while (len > 0) {
|
|
|
|
|
2018-11-27 19:23:15 +00:00
|
|
|
if (len >= HASHCHARS) {
|
2001-05-06 14:35:20 +00:00
|
|
|
/*
|
|
|
|
* Hash the next few characters.
|
|
|
|
*/
|
2017-02-14 22:13:24 +00:00
|
|
|
int hash = lz77_hash(data);
|
2001-05-06 14:35:20 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Look the hash up in the corresponding hash chain and see
|
|
|
|
* what we can find.
|
|
|
|
*/
|
|
|
|
nmatch = 0;
|
|
|
|
for (off = st->hashtab[hash].first;
|
|
|
|
off != INVALID; off = st->win[off].next) {
|
|
|
|
/* distance = 1 if off == st->winpos-1 */
|
|
|
|
/* distance = WINSIZE if off == st->winpos */
|
|
|
|
distance =
|
|
|
|
WINSIZE - (off + WINSIZE - st->winpos) % WINSIZE;
|
|
|
|
for (i = 0; i < HASHCHARS; i++)
|
|
|
|
if (CHARAT(i) != CHARAT(i - distance))
|
|
|
|
break;
|
|
|
|
if (i == HASHCHARS) {
|
|
|
|
matches[nmatch].distance = distance;
|
|
|
|
matches[nmatch].len = 3;
|
|
|
|
if (++nmatch >= MAXMATCH)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
nmatch = 0;
|
2000-11-01 21:34:21 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (nmatch > 0) {
|
|
|
|
/*
|
|
|
|
* We've now filled up matches[] with nmatch potential
|
|
|
|
* matches. Follow them down to find the longest. (We
|
|
|
|
* assume here that it's always worth favouring a
|
|
|
|
* longer match over a shorter one.)
|
|
|
|
*/
|
|
|
|
matchlen = HASHCHARS;
|
|
|
|
while (matchlen < len) {
|
|
|
|
int j;
|
|
|
|
for (i = j = 0; i < nmatch; i++) {
|
|
|
|
if (CHARAT(matchlen) ==
|
|
|
|
CHARAT(matchlen - matches[i].distance)) {
|
|
|
|
matches[j++] = matches[i];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (j == 0)
|
|
|
|
break;
|
|
|
|
matchlen++;
|
|
|
|
nmatch = j;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We've now got all the longest matches. We favour the
|
|
|
|
* shorter distances, which means we go with matches[0].
|
|
|
|
* So see if we want to defer it or throw it away.
|
|
|
|
*/
|
|
|
|
matches[0].len = matchlen;
|
|
|
|
if (defermatch.len > 0) {
|
|
|
|
if (matches[0].len > defermatch.len + 1) {
|
|
|
|
/* We have a better match. Emit the deferred char,
|
|
|
|
* and defer this match. */
|
2001-05-06 14:35:20 +00:00
|
|
|
ctx->literal(ctx, (unsigned char) deferchr);
|
2000-11-01 21:34:21 +00:00
|
|
|
defermatch = matches[0];
|
|
|
|
deferchr = data[0];
|
|
|
|
advance = 1;
|
|
|
|
} else {
|
|
|
|
/* We don't have a better match. Do the deferred one. */
|
|
|
|
ctx->match(ctx, defermatch.distance, defermatch.len);
|
|
|
|
advance = defermatch.len - 1;
|
|
|
|
defermatch.len = 0;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
/* There was no deferred match. Defer this one. */
|
|
|
|
defermatch = matches[0];
|
|
|
|
deferchr = data[0];
|
|
|
|
advance = 1;
|
2001-05-06 14:35:20 +00:00
|
|
|
}
|
2000-11-01 21:34:21 +00:00
|
|
|
} else {
|
|
|
|
/*
|
|
|
|
* We found no matches. Emit the deferred match, if
|
|
|
|
* any; otherwise emit a literal.
|
|
|
|
*/
|
|
|
|
if (defermatch.len > 0) {
|
|
|
|
ctx->match(ctx, defermatch.distance, defermatch.len);
|
|
|
|
advance = defermatch.len - 1;
|
|
|
|
defermatch.len = 0;
|
|
|
|
} else {
|
|
|
|
ctx->literal(ctx, data[0]);
|
|
|
|
advance = 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Now advance the position by `advance' characters,
|
|
|
|
* keeping the window and hash chains consistent.
|
|
|
|
*/
|
|
|
|
while (advance > 0) {
|
|
|
|
if (len >= HASHCHARS) {
|
|
|
|
lz77_advance(st, *data, lz77_hash(data));
|
|
|
|
} else {
|
2014-02-22 18:02:14 +00:00
|
|
|
assert(st->npending < HASHCHARS);
|
2000-11-01 21:34:21 +00:00
|
|
|
st->pending[st->npending++] = *data;
|
|
|
|
}
|
|
|
|
data++;
|
|
|
|
len--;
|
|
|
|
advance--;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* ----------------------------------------------------------------------
|
|
|
|
* Zlib compression. We always use the static Huffman tree option.
|
|
|
|
* Mostly this is because it's hard to scan a block in advance to
|
|
|
|
* work out better trees; dynamic trees are great when you're
|
|
|
|
* compressing a large file under no significant time constraint,
|
|
|
|
* but when you're compressing little bits in real time, things get
|
|
|
|
* hairier.
|
|
|
|
*
|
|
|
|
* I suppose it's possible that I could compute Huffman trees based
|
|
|
|
* on the frequencies in the _previous_ block, as a sort of
|
|
|
|
* heuristic, but I'm not confident that the gain would balance out
|
|
|
|
* having to transmit the trees.
|
|
|
|
*/
|
|
|
|
|
|
|
|
struct Outbuf {
|
2019-02-11 06:58:07 +00:00
|
|
|
strbuf *outbuf;
|
2000-11-01 21:34:21 +00:00
|
|
|
unsigned long outbits;
|
|
|
|
int noutbits;
|
Convert a lot of 'int' variables to 'bool'.
My normal habit these days, in new code, is to treat int and bool as
_almost_ completely separate types. I'm still willing to use C's
implicit test for zero on an integer (e.g. 'if (!blob.len)' is fine,
no need to spell it out as blob.len != 0), but generally, if a
variable is going to be conceptually a boolean, I like to declare it
bool and assign to it using 'true' or 'false' rather than 0 or 1.
PuTTY is an exception, because it predates the C99 bool, and I've
stuck to its existing coding style even when adding new code to it.
But it's been annoying me more and more, so now that I've decided C99
bool is an acceptable thing to require from our toolchain in the first
place, here's a quite thorough trawl through the source doing
'boolification'. Many variables and function parameters are now typed
as bool rather than int; many assignments of 0 or 1 to those variables
are now spelled 'true' or 'false'.
I managed this thorough conversion with the help of a custom clang
plugin that I wrote to trawl the AST and apply heuristics to point out
where things might want changing. So I've even managed to do a decent
job on parts of the code I haven't looked at in years!
To make the plugin's work easier, I pushed platform front ends
generally in the direction of using standard 'bool' in preference to
platform-specific boolean types like Windows BOOL or GTK's gboolean;
I've left the platform booleans in places they _have_ to be for the
platform APIs to work right, but variables only used by my own code
have been converted wherever I found them.
In a few places there are int values that look very like booleans in
_most_ of the places they're used, but have a rarely-used third value,
or a distinction between different nonzero values that most users
don't care about. In these cases, I've _removed_ uses of 'true' and
'false' for the return values, to emphasise that there's something
more subtle going on than a simple boolean answer:
- the 'multisel' field in dialog.h's list box structure, for which
the GTK front end in particular recognises a difference between 1
and 2 but nearly everything else treats as boolean
- the 'urgent' parameter to plug_receive, where 1 vs 2 tells you
something about the specific location of the urgent pointer, but
most clients only care about 0 vs 'something nonzero'
- the return value of wc_match, where -1 indicates a syntax error in
the wildcard.
- the return values from SSH-1 RSA-key loading functions, which use
-1 for 'wrong passphrase' and 0 for all other failures (so any
caller which already knows it's not loading an _encrypted private_
key can treat them as boolean)
- term->esc_query, and the 'query' parameter in toggle_mode in
terminal.c, which _usually_ hold 0 for ESC[123h or 1 for ESC[?123h,
but can also hold -1 for some other intervening character that we
don't support.
In a few places there's an integer that I haven't turned into a bool
even though it really _can_ only take values 0 or 1 (and, as above,
tried to make the call sites consistent in not calling those values
true and false), on the grounds that I thought it would make it more
confusing to imply that the 0 value was in some sense 'negative' or
bad and the 1 positive or good:
- the return value of plug_accepting uses the POSIXish convention of
0=success and nonzero=error; I think if I made it bool then I'd
also want to reverse its sense, and that's a job for a separate
piece of work.
- the 'screen' parameter to lineptr() in terminal.c, where 0 and 1
represent the default and alternate screens. There's no obvious
reason why one of those should be considered 'true' or 'positive'
or 'success' - they're just indices - so I've left it as int.
ssh_scp_recv had particularly confusing semantics for its previous int
return value: its call sites used '<= 0' to check for error, but it
never actually returned a negative number, just 0 or 1. Now the
function and its call sites agree that it's a bool.
In a couple of places I've renamed variables called 'ret', because I
don't like that name any more - it's unclear whether it means the
return value (in preparation) for the _containing_ function or the
return value received from a subroutine call, and occasionally I've
accidentally used the same variable for both and introduced a bug. So
where one of those got in my way, I've renamed it to 'toret' or 'retd'
(the latter short for 'returned') in line with my usual modern
practice, but I haven't done a thorough job of finding all of them.
Finally, one amusing side effect of doing this is that I've had to
separate quite a few chained assignments. It used to be perfectly fine
to write 'a = b = c = TRUE' when a,b,c were int and TRUE was just a
the 'true' defined by stdbool.h, that idiom provokes a warning from
gcc: 'suggest parentheses around assignment used as truth value'!
2018-11-02 19:23:19 +00:00
|
|
|
bool firstblock;
|
2000-11-01 21:34:21 +00:00
|
|
|
};
|
|
|
|
|
2001-05-06 14:35:20 +00:00
|
|
|
static void outbits(struct Outbuf *out, unsigned long bits, int nbits)
|
|
|
|
{
|
2000-11-01 21:34:21 +00:00
|
|
|
assert(out->noutbits + nbits <= 32);
|
|
|
|
out->outbits |= bits << out->noutbits;
|
|
|
|
out->noutbits += nbits;
|
|
|
|
while (out->noutbits >= 8) {
|
2019-02-11 06:58:07 +00:00
|
|
|
put_byte(out->outbuf, out->outbits & 0xFF);
|
2001-05-06 14:35:20 +00:00
|
|
|
out->outbits >>= 8;
|
|
|
|
out->noutbits -= 8;
|
2000-11-01 21:34:21 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static const unsigned char mirrorbytes[256] = {
|
|
|
|
0x00, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0,
|
|
|
|
0x10, 0x90, 0x50, 0xd0, 0x30, 0xb0, 0x70, 0xf0,
|
|
|
|
0x08, 0x88, 0x48, 0xc8, 0x28, 0xa8, 0x68, 0xe8,
|
|
|
|
0x18, 0x98, 0x58, 0xd8, 0x38, 0xb8, 0x78, 0xf8,
|
|
|
|
0x04, 0x84, 0x44, 0xc4, 0x24, 0xa4, 0x64, 0xe4,
|
|
|
|
0x14, 0x94, 0x54, 0xd4, 0x34, 0xb4, 0x74, 0xf4,
|
|
|
|
0x0c, 0x8c, 0x4c, 0xcc, 0x2c, 0xac, 0x6c, 0xec,
|
|
|
|
0x1c, 0x9c, 0x5c, 0xdc, 0x3c, 0xbc, 0x7c, 0xfc,
|
|
|
|
0x02, 0x82, 0x42, 0xc2, 0x22, 0xa2, 0x62, 0xe2,
|
|
|
|
0x12, 0x92, 0x52, 0xd2, 0x32, 0xb2, 0x72, 0xf2,
|
|
|
|
0x0a, 0x8a, 0x4a, 0xca, 0x2a, 0xaa, 0x6a, 0xea,
|
|
|
|
0x1a, 0x9a, 0x5a, 0xda, 0x3a, 0xba, 0x7a, 0xfa,
|
|
|
|
0x06, 0x86, 0x46, 0xc6, 0x26, 0xa6, 0x66, 0xe6,
|
|
|
|
0x16, 0x96, 0x56, 0xd6, 0x36, 0xb6, 0x76, 0xf6,
|
|
|
|
0x0e, 0x8e, 0x4e, 0xce, 0x2e, 0xae, 0x6e, 0xee,
|
|
|
|
0x1e, 0x9e, 0x5e, 0xde, 0x3e, 0xbe, 0x7e, 0xfe,
|
|
|
|
0x01, 0x81, 0x41, 0xc1, 0x21, 0xa1, 0x61, 0xe1,
|
|
|
|
0x11, 0x91, 0x51, 0xd1, 0x31, 0xb1, 0x71, 0xf1,
|
|
|
|
0x09, 0x89, 0x49, 0xc9, 0x29, 0xa9, 0x69, 0xe9,
|
|
|
|
0x19, 0x99, 0x59, 0xd9, 0x39, 0xb9, 0x79, 0xf9,
|
|
|
|
0x05, 0x85, 0x45, 0xc5, 0x25, 0xa5, 0x65, 0xe5,
|
|
|
|
0x15, 0x95, 0x55, 0xd5, 0x35, 0xb5, 0x75, 0xf5,
|
|
|
|
0x0d, 0x8d, 0x4d, 0xcd, 0x2d, 0xad, 0x6d, 0xed,
|
|
|
|
0x1d, 0x9d, 0x5d, 0xdd, 0x3d, 0xbd, 0x7d, 0xfd,
|
|
|
|
0x03, 0x83, 0x43, 0xc3, 0x23, 0xa3, 0x63, 0xe3,
|
|
|
|
0x13, 0x93, 0x53, 0xd3, 0x33, 0xb3, 0x73, 0xf3,
|
|
|
|
0x0b, 0x8b, 0x4b, 0xcb, 0x2b, 0xab, 0x6b, 0xeb,
|
|
|
|
0x1b, 0x9b, 0x5b, 0xdb, 0x3b, 0xbb, 0x7b, 0xfb,
|
|
|
|
0x07, 0x87, 0x47, 0xc7, 0x27, 0xa7, 0x67, 0xe7,
|
|
|
|
0x17, 0x97, 0x57, 0xd7, 0x37, 0xb7, 0x77, 0xf7,
|
|
|
|
0x0f, 0x8f, 0x4f, 0xcf, 0x2f, 0xaf, 0x6f, 0xef,
|
|
|
|
0x1f, 0x9f, 0x5f, 0xdf, 0x3f, 0xbf, 0x7f, 0xff,
|
|
|
|
};
|
|
|
|
|
|
|
|
typedef struct {
|
2001-04-16 17:18:24 +00:00
|
|
|
short code, extrabits;
|
|
|
|
int min, max;
|
2000-11-01 21:34:21 +00:00
|
|
|
} coderecord;
|
|
|
|
|
|
|
|
static const coderecord lencodes[] = {
|
2001-05-06 14:35:20 +00:00
|
|
|
{257, 0, 3, 3},
|
|
|
|
{258, 0, 4, 4},
|
|
|
|
{259, 0, 5, 5},
|
|
|
|
{260, 0, 6, 6},
|
|
|
|
{261, 0, 7, 7},
|
|
|
|
{262, 0, 8, 8},
|
|
|
|
{263, 0, 9, 9},
|
|
|
|
{264, 0, 10, 10},
|
|
|
|
{265, 1, 11, 12},
|
|
|
|
{266, 1, 13, 14},
|
|
|
|
{267, 1, 15, 16},
|
|
|
|
{268, 1, 17, 18},
|
|
|
|
{269, 2, 19, 22},
|
|
|
|
{270, 2, 23, 26},
|
|
|
|
{271, 2, 27, 30},
|
|
|
|
{272, 2, 31, 34},
|
|
|
|
{273, 3, 35, 42},
|
|
|
|
{274, 3, 43, 50},
|
|
|
|
{275, 3, 51, 58},
|
|
|
|
{276, 3, 59, 66},
|
|
|
|
{277, 4, 67, 82},
|
|
|
|
{278, 4, 83, 98},
|
|
|
|
{279, 4, 99, 114},
|
|
|
|
{280, 4, 115, 130},
|
|
|
|
{281, 5, 131, 162},
|
|
|
|
{282, 5, 163, 194},
|
|
|
|
{283, 5, 195, 226},
|
|
|
|
{284, 5, 227, 257},
|
|
|
|
{285, 0, 258, 258},
|
2000-11-01 21:34:21 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
static const coderecord distcodes[] = {
|
2001-05-06 14:35:20 +00:00
|
|
|
{0, 0, 1, 1},
|
|
|
|
{1, 0, 2, 2},
|
|
|
|
{2, 0, 3, 3},
|
|
|
|
{3, 0, 4, 4},
|
|
|
|
{4, 1, 5, 6},
|
|
|
|
{5, 1, 7, 8},
|
|
|
|
{6, 2, 9, 12},
|
|
|
|
{7, 2, 13, 16},
|
|
|
|
{8, 3, 17, 24},
|
|
|
|
{9, 3, 25, 32},
|
|
|
|
{10, 4, 33, 48},
|
|
|
|
{11, 4, 49, 64},
|
|
|
|
{12, 5, 65, 96},
|
|
|
|
{13, 5, 97, 128},
|
|
|
|
{14, 6, 129, 192},
|
|
|
|
{15, 6, 193, 256},
|
|
|
|
{16, 7, 257, 384},
|
|
|
|
{17, 7, 385, 512},
|
|
|
|
{18, 8, 513, 768},
|
|
|
|
{19, 8, 769, 1024},
|
|
|
|
{20, 9, 1025, 1536},
|
|
|
|
{21, 9, 1537, 2048},
|
|
|
|
{22, 10, 2049, 3072},
|
|
|
|
{23, 10, 3073, 4096},
|
|
|
|
{24, 11, 4097, 6144},
|
|
|
|
{25, 11, 6145, 8192},
|
|
|
|
{26, 12, 8193, 12288},
|
|
|
|
{27, 12, 12289, 16384},
|
|
|
|
{28, 13, 16385, 24576},
|
|
|
|
{29, 13, 24577, 32768},
|
2000-11-01 21:34:21 +00:00
|
|
|
};
|
|
|
|
|
2001-05-06 14:35:20 +00:00
|
|
|
static void zlib_literal(struct LZ77Context *ectx, unsigned char c)
|
|
|
|
{
|
|
|
|
struct Outbuf *out = (struct Outbuf *) ectx->userdata;
|
2000-11-01 21:34:21 +00:00
|
|
|
|
|
|
|
if (c <= 143) {
|
2001-05-06 14:35:20 +00:00
|
|
|
/* 0 through 143 are 8 bits long starting at 00110000. */
|
|
|
|
outbits(out, mirrorbytes[0x30 + c], 8);
|
2000-11-01 21:34:21 +00:00
|
|
|
} else {
|
2001-05-06 14:35:20 +00:00
|
|
|
/* 144 through 255 are 9 bits long starting at 110010000. */
|
|
|
|
outbits(out, 1 + 2 * mirrorbytes[0x90 - 144 + c], 9);
|
2000-11-01 21:34:21 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2001-05-06 14:35:20 +00:00
|
|
|
static void zlib_match(struct LZ77Context *ectx, int distance, int len)
|
|
|
|
{
|
2000-11-01 21:34:21 +00:00
|
|
|
const coderecord *d, *l;
|
|
|
|
int i, j, k;
|
2001-05-06 14:35:20 +00:00
|
|
|
struct Outbuf *out = (struct Outbuf *) ectx->userdata;
|
2001-03-05 16:38:42 +00:00
|
|
|
|
2000-11-01 21:34:21 +00:00
|
|
|
while (len > 0) {
|
2001-05-06 14:35:20 +00:00
|
|
|
int thislen;
|
|
|
|
|
2000-11-01 21:34:21 +00:00
|
|
|
/*
|
|
|
|
* We can transmit matches of lengths 3 through 258
|
|
|
|
* inclusive. So if len exceeds 258, we must transmit in
|
|
|
|
* several steps, with 258 or less in each step.
|
|
|
|
*
|
|
|
|
* Specifically: if len >= 261, we can transmit 258 and be
|
|
|
|
* sure of having at least 3 left for the next step. And if
|
|
|
|
* len <= 258, we can just transmit len. But if len == 259
|
|
|
|
* or 260, we must transmit len-3.
|
|
|
|
*/
|
2001-05-06 14:35:20 +00:00
|
|
|
thislen = (len > 260 ? 258 : len <= 258 ? len : len - 3);
|
|
|
|
len -= thislen;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Binary-search to find which length code we're
|
|
|
|
* transmitting.
|
|
|
|
*/
|
|
|
|
i = -1;
|
2019-01-04 07:13:08 +00:00
|
|
|
j = lenof(lencodes);
|
2001-05-13 14:02:28 +00:00
|
|
|
while (1) {
|
|
|
|
assert(j - i >= 2);
|
2001-05-06 14:35:20 +00:00
|
|
|
k = (j + i) / 2;
|
|
|
|
if (thislen < lencodes[k].min)
|
|
|
|
j = k;
|
|
|
|
else if (thislen > lencodes[k].max)
|
|
|
|
i = k;
|
|
|
|
else {
|
|
|
|
l = &lencodes[k];
|
|
|
|
break; /* found it! */
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Transmit the length code. 256-279 are seven bits
|
|
|
|
* starting at 0000000; 280-287 are eight bits starting at
|
|
|
|
* 11000000.
|
|
|
|
*/
|
|
|
|
if (l->code <= 279) {
|
|
|
|
outbits(out, mirrorbytes[(l->code - 256) * 2], 7);
|
|
|
|
} else {
|
|
|
|
outbits(out, mirrorbytes[0xc0 - 280 + l->code], 8);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Transmit the extra bits.
|
|
|
|
*/
|
|
|
|
if (l->extrabits)
|
|
|
|
outbits(out, thislen - l->min, l->extrabits);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Binary-search to find which distance code we're
|
|
|
|
* transmitting.
|
|
|
|
*/
|
|
|
|
i = -1;
|
2019-01-04 07:13:08 +00:00
|
|
|
j = lenof(distcodes);
|
2001-05-13 14:02:28 +00:00
|
|
|
while (1) {
|
|
|
|
assert(j - i >= 2);
|
2001-05-06 14:35:20 +00:00
|
|
|
k = (j + i) / 2;
|
|
|
|
if (distance < distcodes[k].min)
|
|
|
|
j = k;
|
|
|
|
else if (distance > distcodes[k].max)
|
|
|
|
i = k;
|
|
|
|
else {
|
|
|
|
d = &distcodes[k];
|
|
|
|
break; /* found it! */
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Transmit the distance code. Five bits starting at 00000.
|
|
|
|
*/
|
|
|
|
outbits(out, mirrorbytes[d->code * 8], 5);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Transmit the extra bits.
|
|
|
|
*/
|
|
|
|
if (d->extrabits)
|
|
|
|
outbits(out, distance - d->min, d->extrabits);
|
2000-11-01 21:34:21 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-09-14 08:16:41 +00:00
|
|
|
struct ssh_zlib_compressor {
|
|
|
|
struct LZ77Context ectx;
|
|
|
|
ssh_compressor sc;
|
|
|
|
};
|
|
|
|
|
|
|
|
ssh_compressor *zlib_compress_init(void)
|
2001-05-06 14:35:20 +00:00
|
|
|
{
|
2000-11-01 21:34:21 +00:00
|
|
|
struct Outbuf *out;
|
2018-09-14 08:16:41 +00:00
|
|
|
struct ssh_zlib_compressor *comp = snew(struct ssh_zlib_compressor);
|
2000-11-01 21:34:21 +00:00
|
|
|
|
2018-09-14 08:16:41 +00:00
|
|
|
lz77_init(&comp->ectx);
|
|
|
|
comp->sc.vt = &ssh_zlib;
|
|
|
|
comp->ectx.literal = zlib_literal;
|
|
|
|
comp->ectx.match = zlib_match;
|
2000-11-01 21:34:21 +00:00
|
|
|
|
2003-03-29 16:14:26 +00:00
|
|
|
out = snew(struct Outbuf);
|
2019-02-11 06:58:07 +00:00
|
|
|
out->outbuf = NULL;
|
2000-11-01 21:34:21 +00:00
|
|
|
out->outbits = out->noutbits = 0;
|
Convert a lot of 'int' variables to 'bool'.
My normal habit these days, in new code, is to treat int and bool as
_almost_ completely separate types. I'm still willing to use C's
implicit test for zero on an integer (e.g. 'if (!blob.len)' is fine,
no need to spell it out as blob.len != 0), but generally, if a
variable is going to be conceptually a boolean, I like to declare it
bool and assign to it using 'true' or 'false' rather than 0 or 1.
PuTTY is an exception, because it predates the C99 bool, and I've
stuck to its existing coding style even when adding new code to it.
But it's been annoying me more and more, so now that I've decided C99
bool is an acceptable thing to require from our toolchain in the first
place, here's a quite thorough trawl through the source doing
'boolification'. Many variables and function parameters are now typed
as bool rather than int; many assignments of 0 or 1 to those variables
are now spelled 'true' or 'false'.
I managed this thorough conversion with the help of a custom clang
plugin that I wrote to trawl the AST and apply heuristics to point out
where things might want changing. So I've even managed to do a decent
job on parts of the code I haven't looked at in years!
To make the plugin's work easier, I pushed platform front ends
generally in the direction of using standard 'bool' in preference to
platform-specific boolean types like Windows BOOL or GTK's gboolean;
I've left the platform booleans in places they _have_ to be for the
platform APIs to work right, but variables only used by my own code
have been converted wherever I found them.
In a few places there are int values that look very like booleans in
_most_ of the places they're used, but have a rarely-used third value,
or a distinction between different nonzero values that most users
don't care about. In these cases, I've _removed_ uses of 'true' and
'false' for the return values, to emphasise that there's something
more subtle going on than a simple boolean answer:
- the 'multisel' field in dialog.h's list box structure, for which
the GTK front end in particular recognises a difference between 1
and 2 but nearly everything else treats as boolean
- the 'urgent' parameter to plug_receive, where 1 vs 2 tells you
something about the specific location of the urgent pointer, but
most clients only care about 0 vs 'something nonzero'
- the return value of wc_match, where -1 indicates a syntax error in
the wildcard.
- the return values from SSH-1 RSA-key loading functions, which use
-1 for 'wrong passphrase' and 0 for all other failures (so any
caller which already knows it's not loading an _encrypted private_
key can treat them as boolean)
- term->esc_query, and the 'query' parameter in toggle_mode in
terminal.c, which _usually_ hold 0 for ESC[123h or 1 for ESC[?123h,
but can also hold -1 for some other intervening character that we
don't support.
In a few places there's an integer that I haven't turned into a bool
even though it really _can_ only take values 0 or 1 (and, as above,
tried to make the call sites consistent in not calling those values
true and false), on the grounds that I thought it would make it more
confusing to imply that the 0 value was in some sense 'negative' or
bad and the 1 positive or good:
- the return value of plug_accepting uses the POSIXish convention of
0=success and nonzero=error; I think if I made it bool then I'd
also want to reverse its sense, and that's a job for a separate
piece of work.
- the 'screen' parameter to lineptr() in terminal.c, where 0 and 1
represent the default and alternate screens. There's no obvious
reason why one of those should be considered 'true' or 'positive'
or 'success' - they're just indices - so I've left it as int.
ssh_scp_recv had particularly confusing semantics for its previous int
return value: its call sites used '<= 0' to check for error, but it
never actually returned a negative number, just 0 or 1. Now the
function and its call sites agree that it's a bool.
In a couple of places I've renamed variables called 'ret', because I
don't like that name any more - it's unclear whether it means the
return value (in preparation) for the _containing_ function or the
return value received from a subroutine call, and occasionally I've
accidentally used the same variable for both and introduced a bug. So
where one of those got in my way, I've renamed it to 'toret' or 'retd'
(the latter short for 'returned') in line with my usual modern
practice, but I haven't done a thorough job of finding all of them.
Finally, one amusing side effect of doing this is that I've had to
separate quite a few chained assignments. It used to be perfectly fine
to write 'a = b = c = TRUE' when a,b,c were int and TRUE was just a
the 'true' defined by stdbool.h, that idiom provokes a warning from
gcc: 'suggest parentheses around assignment used as truth value'!
2018-11-02 19:23:19 +00:00
|
|
|
out->firstblock = true;
|
2018-09-14 08:16:41 +00:00
|
|
|
comp->ectx.userdata = out;
|
2002-10-25 13:26:33 +00:00
|
|
|
|
2018-09-14 08:16:41 +00:00
|
|
|
return &comp->sc;
|
2002-10-25 13:26:33 +00:00
|
|
|
}
|
2000-11-01 21:34:21 +00:00
|
|
|
|
2018-09-14 08:16:41 +00:00
|
|
|
void zlib_compress_cleanup(ssh_compressor *sc)
|
2002-10-25 13:26:33 +00:00
|
|
|
{
|
2018-09-14 08:16:41 +00:00
|
|
|
struct ssh_zlib_compressor *comp =
|
2018-10-05 22:49:08 +00:00
|
|
|
container_of(sc, struct ssh_zlib_compressor, sc);
|
2019-02-11 06:58:07 +00:00
|
|
|
struct Outbuf *out = (struct Outbuf *)comp->ectx.userdata;
|
|
|
|
if (out->outbuf)
|
|
|
|
strbuf_free(out->outbuf);
|
|
|
|
sfree(out);
|
2018-09-14 08:16:41 +00:00
|
|
|
sfree(comp->ectx.ictx);
|
|
|
|
sfree(comp);
|
2000-11-01 21:34:21 +00:00
|
|
|
}
|
|
|
|
|
2018-11-27 19:22:02 +00:00
|
|
|
void zlib_compress_block(ssh_compressor *sc,
|
|
|
|
const unsigned char *block, int len,
|
Move password-packet padding into the BPP module.
Now when we construct a packet containing sensitive data, we just set
a field saying '... and make it take up at least this much space, to
disguise its true size', and nothing in the rest of the system worries
about that flag until ssh2bpp.c acts on it.
Also, I've changed the strategy for doing the padding. Previously, we
were following the real packet with an SSH_MSG_IGNORE to make up the
size. But that was only a partial defence: it works OK against passive
traffic analysis, but an attacker proxying the TCP stream and
dribbling it out one byte at a time could still have found out the
size of the real packet by noting when the dribbled data provoked a
response. Now I put the SSH_MSG_IGNORE _first_, which should defeat
that attack.
But that in turn doesn't work when we're doing compression, because we
can't predict the compressed sizes accurately enough to make that
strategy sensible. Fortunately, compression provides an alternative
strategy anyway: if we've got zlib turned on when we send one of these
sensitive packets, then we can pad out the compressed zlib data as
much as we like by adding empty RFC1951 blocks (effectively chaining
ZLIB_PARTIAL_FLUSHes). So both strategies should now be dribble-proof.
2018-07-09 19:30:11 +00:00
|
|
|
unsigned char **outblock, int *outlen,
|
|
|
|
int minlen)
|
2001-05-06 14:35:20 +00:00
|
|
|
{
|
2018-09-14 08:16:41 +00:00
|
|
|
struct ssh_zlib_compressor *comp =
|
2018-10-05 22:49:08 +00:00
|
|
|
container_of(sc, struct ssh_zlib_compressor, sc);
|
2018-09-14 08:16:41 +00:00
|
|
|
struct Outbuf *out = (struct Outbuf *) comp->ectx.userdata;
|
Convert a lot of 'int' variables to 'bool'.
My normal habit these days, in new code, is to treat int and bool as
_almost_ completely separate types. I'm still willing to use C's
implicit test for zero on an integer (e.g. 'if (!blob.len)' is fine,
no need to spell it out as blob.len != 0), but generally, if a
variable is going to be conceptually a boolean, I like to declare it
bool and assign to it using 'true' or 'false' rather than 0 or 1.
PuTTY is an exception, because it predates the C99 bool, and I've
stuck to its existing coding style even when adding new code to it.
But it's been annoying me more and more, so now that I've decided C99
bool is an acceptable thing to require from our toolchain in the first
place, here's a quite thorough trawl through the source doing
'boolification'. Many variables and function parameters are now typed
as bool rather than int; many assignments of 0 or 1 to those variables
are now spelled 'true' or 'false'.
I managed this thorough conversion with the help of a custom clang
plugin that I wrote to trawl the AST and apply heuristics to point out
where things might want changing. So I've even managed to do a decent
job on parts of the code I haven't looked at in years!
To make the plugin's work easier, I pushed platform front ends
generally in the direction of using standard 'bool' in preference to
platform-specific boolean types like Windows BOOL or GTK's gboolean;
I've left the platform booleans in places they _have_ to be for the
platform APIs to work right, but variables only used by my own code
have been converted wherever I found them.
In a few places there are int values that look very like booleans in
_most_ of the places they're used, but have a rarely-used third value,
or a distinction between different nonzero values that most users
don't care about. In these cases, I've _removed_ uses of 'true' and
'false' for the return values, to emphasise that there's something
more subtle going on than a simple boolean answer:
- the 'multisel' field in dialog.h's list box structure, for which
the GTK front end in particular recognises a difference between 1
and 2 but nearly everything else treats as boolean
- the 'urgent' parameter to plug_receive, where 1 vs 2 tells you
something about the specific location of the urgent pointer, but
most clients only care about 0 vs 'something nonzero'
- the return value of wc_match, where -1 indicates a syntax error in
the wildcard.
- the return values from SSH-1 RSA-key loading functions, which use
-1 for 'wrong passphrase' and 0 for all other failures (so any
caller which already knows it's not loading an _encrypted private_
key can treat them as boolean)
- term->esc_query, and the 'query' parameter in toggle_mode in
terminal.c, which _usually_ hold 0 for ESC[123h or 1 for ESC[?123h,
but can also hold -1 for some other intervening character that we
don't support.
In a few places there's an integer that I haven't turned into a bool
even though it really _can_ only take values 0 or 1 (and, as above,
tried to make the call sites consistent in not calling those values
true and false), on the grounds that I thought it would make it more
confusing to imply that the 0 value was in some sense 'negative' or
bad and the 1 positive or good:
- the return value of plug_accepting uses the POSIXish convention of
0=success and nonzero=error; I think if I made it bool then I'd
also want to reverse its sense, and that's a job for a separate
piece of work.
- the 'screen' parameter to lineptr() in terminal.c, where 0 and 1
represent the default and alternate screens. There's no obvious
reason why one of those should be considered 'true' or 'positive'
or 'success' - they're just indices - so I've left it as int.
ssh_scp_recv had particularly confusing semantics for its previous int
return value: its call sites used '<= 0' to check for error, but it
never actually returned a negative number, just 0 or 1. Now the
function and its call sites agree that it's a bool.
In a couple of places I've renamed variables called 'ret', because I
don't like that name any more - it's unclear whether it means the
return value (in preparation) for the _containing_ function or the
return value received from a subroutine call, and occasionally I've
accidentally used the same variable for both and introduced a bug. So
where one of those got in my way, I've renamed it to 'toret' or 'retd'
(the latter short for 'returned') in line with my usual modern
practice, but I haven't done a thorough job of finding all of them.
Finally, one amusing side effect of doing this is that I've had to
separate quite a few chained assignments. It used to be perfectly fine
to write 'a = b = c = TRUE' when a,b,c were int and TRUE was just a
the 'true' defined by stdbool.h, that idiom provokes a warning from
gcc: 'suggest parentheses around assignment used as truth value'!
2018-11-02 19:23:19 +00:00
|
|
|
bool in_block;
|
2000-11-01 21:34:21 +00:00
|
|
|
|
2019-02-11 06:58:07 +00:00
|
|
|
assert(!out->outbuf);
|
2019-03-01 19:28:00 +00:00
|
|
|
out->outbuf = strbuf_new_nm();
|
2000-11-01 21:34:21 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* If this is the first block, output the Zlib (RFC1950) header
|
|
|
|
* bytes 78 9C. (Deflate compression, 32K window size, default
|
|
|
|
* algorithm.)
|
|
|
|
*/
|
|
|
|
if (out->firstblock) {
|
2001-05-06 14:35:20 +00:00
|
|
|
outbits(out, 0x9C78, 16);
|
Convert a lot of 'int' variables to 'bool'.
My normal habit these days, in new code, is to treat int and bool as
_almost_ completely separate types. I'm still willing to use C's
implicit test for zero on an integer (e.g. 'if (!blob.len)' is fine,
no need to spell it out as blob.len != 0), but generally, if a
variable is going to be conceptually a boolean, I like to declare it
bool and assign to it using 'true' or 'false' rather than 0 or 1.
PuTTY is an exception, because it predates the C99 bool, and I've
stuck to its existing coding style even when adding new code to it.
But it's been annoying me more and more, so now that I've decided C99
bool is an acceptable thing to require from our toolchain in the first
place, here's a quite thorough trawl through the source doing
'boolification'. Many variables and function parameters are now typed
as bool rather than int; many assignments of 0 or 1 to those variables
are now spelled 'true' or 'false'.
I managed this thorough conversion with the help of a custom clang
plugin that I wrote to trawl the AST and apply heuristics to point out
where things might want changing. So I've even managed to do a decent
job on parts of the code I haven't looked at in years!
To make the plugin's work easier, I pushed platform front ends
generally in the direction of using standard 'bool' in preference to
platform-specific boolean types like Windows BOOL or GTK's gboolean;
I've left the platform booleans in places they _have_ to be for the
platform APIs to work right, but variables only used by my own code
have been converted wherever I found them.
In a few places there are int values that look very like booleans in
_most_ of the places they're used, but have a rarely-used third value,
or a distinction between different nonzero values that most users
don't care about. In these cases, I've _removed_ uses of 'true' and
'false' for the return values, to emphasise that there's something
more subtle going on than a simple boolean answer:
- the 'multisel' field in dialog.h's list box structure, for which
the GTK front end in particular recognises a difference between 1
and 2 but nearly everything else treats as boolean
- the 'urgent' parameter to plug_receive, where 1 vs 2 tells you
something about the specific location of the urgent pointer, but
most clients only care about 0 vs 'something nonzero'
- the return value of wc_match, where -1 indicates a syntax error in
the wildcard.
- the return values from SSH-1 RSA-key loading functions, which use
-1 for 'wrong passphrase' and 0 for all other failures (so any
caller which already knows it's not loading an _encrypted private_
key can treat them as boolean)
- term->esc_query, and the 'query' parameter in toggle_mode in
terminal.c, which _usually_ hold 0 for ESC[123h or 1 for ESC[?123h,
but can also hold -1 for some other intervening character that we
don't support.
In a few places there's an integer that I haven't turned into a bool
even though it really _can_ only take values 0 or 1 (and, as above,
tried to make the call sites consistent in not calling those values
true and false), on the grounds that I thought it would make it more
confusing to imply that the 0 value was in some sense 'negative' or
bad and the 1 positive or good:
- the return value of plug_accepting uses the POSIXish convention of
0=success and nonzero=error; I think if I made it bool then I'd
also want to reverse its sense, and that's a job for a separate
piece of work.
- the 'screen' parameter to lineptr() in terminal.c, where 0 and 1
represent the default and alternate screens. There's no obvious
reason why one of those should be considered 'true' or 'positive'
or 'success' - they're just indices - so I've left it as int.
ssh_scp_recv had particularly confusing semantics for its previous int
return value: its call sites used '<= 0' to check for error, but it
never actually returned a negative number, just 0 or 1. Now the
function and its call sites agree that it's a bool.
In a couple of places I've renamed variables called 'ret', because I
don't like that name any more - it's unclear whether it means the
return value (in preparation) for the _containing_ function or the
return value received from a subroutine call, and occasionally I've
accidentally used the same variable for both and introduced a bug. So
where one of those got in my way, I've renamed it to 'toret' or 'retd'
(the latter short for 'returned') in line with my usual modern
practice, but I haven't done a thorough job of finding all of them.
Finally, one amusing side effect of doing this is that I've had to
separate quite a few chained assignments. It used to be perfectly fine
to write 'a = b = c = TRUE' when a,b,c were int and TRUE was just a
the 'true' defined by stdbool.h, that idiom provokes a warning from
gcc: 'suggest parentheses around assignment used as truth value'!
2018-11-02 19:23:19 +00:00
|
|
|
out->firstblock = false;
|
2001-03-05 16:38:42 +00:00
|
|
|
|
2018-10-29 19:50:29 +00:00
|
|
|
in_block = false;
|
2001-05-13 14:02:28 +00:00
|
|
|
} else
|
2018-10-29 19:50:29 +00:00
|
|
|
in_block = true;
|
2000-11-01 21:34:21 +00:00
|
|
|
|
Move password-packet padding into the BPP module.
Now when we construct a packet containing sensitive data, we just set
a field saying '... and make it take up at least this much space, to
disguise its true size', and nothing in the rest of the system worries
about that flag until ssh2bpp.c acts on it.
Also, I've changed the strategy for doing the padding. Previously, we
were following the real packet with an SSH_MSG_IGNORE to make up the
size. But that was only a partial defence: it works OK against passive
traffic analysis, but an attacker proxying the TCP stream and
dribbling it out one byte at a time could still have found out the
size of the real packet by noting when the dribbled data provoked a
response. Now I put the SSH_MSG_IGNORE _first_, which should defeat
that attack.
But that in turn doesn't work when we're doing compression, because we
can't predict the compressed sizes accurately enough to make that
strategy sensible. Fortunately, compression provides an alternative
strategy anyway: if we've got zlib turned on when we send one of these
sensitive packets, then we can pad out the compressed zlib data as
much as we like by adding empty RFC1951 blocks (effectively chaining
ZLIB_PARTIAL_FLUSHes). So both strategies should now be dribble-proof.
2018-07-09 19:30:11 +00:00
|
|
|
if (!in_block) {
|
|
|
|
/*
|
|
|
|
* Start a Deflate (RFC1951) fixed-trees block. We
|
|
|
|
* transmit a zero bit (BFINAL=0), followed by a zero
|
|
|
|
* bit and a one bit (BTYPE=01). Of course these are in
|
|
|
|
* the wrong order (01 0).
|
|
|
|
*/
|
|
|
|
outbits(out, 2, 3);
|
|
|
|
}
|
2001-05-06 14:35:20 +00:00
|
|
|
|
Move password-packet padding into the BPP module.
Now when we construct a packet containing sensitive data, we just set
a field saying '... and make it take up at least this much space, to
disguise its true size', and nothing in the rest of the system worries
about that flag until ssh2bpp.c acts on it.
Also, I've changed the strategy for doing the padding. Previously, we
were following the real packet with an SSH_MSG_IGNORE to make up the
size. But that was only a partial defence: it works OK against passive
traffic analysis, but an attacker proxying the TCP stream and
dribbling it out one byte at a time could still have found out the
size of the real packet by noting when the dribbled data provoked a
response. Now I put the SSH_MSG_IGNORE _first_, which should defeat
that attack.
But that in turn doesn't work when we're doing compression, because we
can't predict the compressed sizes accurately enough to make that
strategy sensible. Fortunately, compression provides an alternative
strategy anyway: if we've got zlib turned on when we send one of these
sensitive packets, then we can pad out the compressed zlib data as
much as we like by adding empty RFC1951 blocks (effectively chaining
ZLIB_PARTIAL_FLUSHes). So both strategies should now be dribble-proof.
2018-07-09 19:30:11 +00:00
|
|
|
/*
|
|
|
|
* Do the compression.
|
|
|
|
*/
|
2018-11-27 19:23:15 +00:00
|
|
|
lz77_compress(&comp->ectx, block, len);
|
2001-05-06 14:35:20 +00:00
|
|
|
|
Move password-packet padding into the BPP module.
Now when we construct a packet containing sensitive data, we just set
a field saying '... and make it take up at least this much space, to
disguise its true size', and nothing in the rest of the system worries
about that flag until ssh2bpp.c acts on it.
Also, I've changed the strategy for doing the padding. Previously, we
were following the real packet with an SSH_MSG_IGNORE to make up the
size. But that was only a partial defence: it works OK against passive
traffic analysis, but an attacker proxying the TCP stream and
dribbling it out one byte at a time could still have found out the
size of the real packet by noting when the dribbled data provoked a
response. Now I put the SSH_MSG_IGNORE _first_, which should defeat
that attack.
But that in turn doesn't work when we're doing compression, because we
can't predict the compressed sizes accurately enough to make that
strategy sensible. Fortunately, compression provides an alternative
strategy anyway: if we've got zlib turned on when we send one of these
sensitive packets, then we can pad out the compressed zlib data as
much as we like by adding empty RFC1951 blocks (effectively chaining
ZLIB_PARTIAL_FLUSHes). So both strategies should now be dribble-proof.
2018-07-09 19:30:11 +00:00
|
|
|
/*
|
|
|
|
* End the block (by transmitting code 256, which is
|
|
|
|
* 0000000 in fixed-tree mode), and transmit some empty
|
|
|
|
* blocks to ensure we have emitted the byte containing the
|
|
|
|
* last piece of genuine data. There are three ways we can
|
|
|
|
* do this:
|
|
|
|
*
|
|
|
|
* - Minimal flush. Output end-of-block and then open a
|
|
|
|
* new static block. This takes 9 bits, which is
|
|
|
|
* guaranteed to flush out the last genuine code in the
|
|
|
|
* closed block; but allegedly zlib can't handle it.
|
|
|
|
*
|
|
|
|
* - Zlib partial flush. Output EOB, open and close an
|
|
|
|
* empty static block, and _then_ open the new block.
|
|
|
|
* This is the best zlib can handle.
|
|
|
|
*
|
|
|
|
* - Zlib sync flush. Output EOB, then an empty
|
|
|
|
* _uncompressed_ block (000, then sync to byte
|
|
|
|
* boundary, then send bytes 00 00 FF FF). Then open the
|
|
|
|
* new block.
|
|
|
|
*
|
|
|
|
* For the moment, we will use Zlib partial flush.
|
|
|
|
*/
|
|
|
|
outbits(out, 0, 7); /* close block */
|
|
|
|
outbits(out, 2, 3 + 7); /* empty static block */
|
|
|
|
outbits(out, 2, 3); /* open new block */
|
2001-05-06 14:35:20 +00:00
|
|
|
|
Move password-packet padding into the BPP module.
Now when we construct a packet containing sensitive data, we just set
a field saying '... and make it take up at least this much space, to
disguise its true size', and nothing in the rest of the system worries
about that flag until ssh2bpp.c acts on it.
Also, I've changed the strategy for doing the padding. Previously, we
were following the real packet with an SSH_MSG_IGNORE to make up the
size. But that was only a partial defence: it works OK against passive
traffic analysis, but an attacker proxying the TCP stream and
dribbling it out one byte at a time could still have found out the
size of the real packet by noting when the dribbled data provoked a
response. Now I put the SSH_MSG_IGNORE _first_, which should defeat
that attack.
But that in turn doesn't work when we're doing compression, because we
can't predict the compressed sizes accurately enough to make that
strategy sensible. Fortunately, compression provides an alternative
strategy anyway: if we've got zlib turned on when we send one of these
sensitive packets, then we can pad out the compressed zlib data as
much as we like by adding empty RFC1951 blocks (effectively chaining
ZLIB_PARTIAL_FLUSHes). So both strategies should now be dribble-proof.
2018-07-09 19:30:11 +00:00
|
|
|
/*
|
|
|
|
* If we've been asked to pad out the compressed data until it's
|
|
|
|
* at least a given length, do so by emitting further empty static
|
|
|
|
* blocks.
|
|
|
|
*/
|
2019-02-11 06:58:07 +00:00
|
|
|
while (out->outbuf->len < minlen) {
|
Move password-packet padding into the BPP module.
Now when we construct a packet containing sensitive data, we just set
a field saying '... and make it take up at least this much space, to
disguise its true size', and nothing in the rest of the system worries
about that flag until ssh2bpp.c acts on it.
Also, I've changed the strategy for doing the padding. Previously, we
were following the real packet with an SSH_MSG_IGNORE to make up the
size. But that was only a partial defence: it works OK against passive
traffic analysis, but an attacker proxying the TCP stream and
dribbling it out one byte at a time could still have found out the
size of the real packet by noting when the dribbled data provoked a
response. Now I put the SSH_MSG_IGNORE _first_, which should defeat
that attack.
But that in turn doesn't work when we're doing compression, because we
can't predict the compressed sizes accurately enough to make that
strategy sensible. Fortunately, compression provides an alternative
strategy anyway: if we've got zlib turned on when we send one of these
sensitive packets, then we can pad out the compressed zlib data as
much as we like by adding empty RFC1951 blocks (effectively chaining
ZLIB_PARTIAL_FLUSHes). So both strategies should now be dribble-proof.
2018-07-09 19:30:11 +00:00
|
|
|
outbits(out, 0, 7); /* close block */
|
|
|
|
outbits(out, 2, 3); /* open new static block */
|
2001-03-05 16:38:42 +00:00
|
|
|
}
|
|
|
|
|
2019-02-11 06:58:07 +00:00
|
|
|
*outlen = out->outbuf->len;
|
|
|
|
*outblock = (unsigned char *)strbuf_to_str(out->outbuf);
|
|
|
|
out->outbuf = NULL;
|
2000-11-01 21:34:21 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* ----------------------------------------------------------------------
|
|
|
|
* Zlib decompression. Of course, even though our compressor always
|
|
|
|
* uses static trees, our _decompressor_ has to be capable of
|
|
|
|
* handling dynamic trees if it sees them.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The way we work the Huffman decode is to have a table lookup on
|
|
|
|
* the first N bits of the input stream (in the order they arrive,
|
|
|
|
* of course, i.e. the first bit of the Huffman code is in bit 0).
|
|
|
|
* Each table entry lists the number of bits to consume, plus
|
|
|
|
* either an output code or a pointer to a secondary table.
|
|
|
|
*/
|
|
|
|
struct zlib_table;
|
|
|
|
struct zlib_tableentry;
|
|
|
|
|
|
|
|
struct zlib_tableentry {
|
|
|
|
unsigned char nbits;
|
2001-04-16 17:18:24 +00:00
|
|
|
short code;
|
2000-11-01 21:34:21 +00:00
|
|
|
struct zlib_table *nexttable;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct zlib_table {
|
2001-05-06 14:35:20 +00:00
|
|
|
int mask; /* mask applied to input bit stream */
|
2000-11-01 21:34:21 +00:00
|
|
|
struct zlib_tableentry *table;
|
|
|
|
};
|
|
|
|
|
|
|
|
#define MAXCODELEN 16
|
|
|
|
#define MAXSYMS 288
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Build a single-level decode table for elements
|
|
|
|
* [minlength,maxlength) of the provided code/length tables, and
|
|
|
|
* recurse to build subtables.
|
|
|
|
*/
|
|
|
|
static struct zlib_table *zlib_mkonetab(int *codes, unsigned char *lengths,
|
2001-05-06 14:35:20 +00:00
|
|
|
int nsyms,
|
|
|
|
int pfx, int pfxbits, int bits)
|
|
|
|
{
|
2003-03-29 16:14:26 +00:00
|
|
|
struct zlib_table *tab = snew(struct zlib_table);
|
2000-11-01 21:34:21 +00:00
|
|
|
int pfxmask = (1 << pfxbits) - 1;
|
|
|
|
int nbits, i, j, code;
|
|
|
|
|
2003-03-29 16:14:26 +00:00
|
|
|
tab->table = snewn(1 << bits, struct zlib_tableentry);
|
2000-11-01 21:34:21 +00:00
|
|
|
tab->mask = (1 << bits) - 1;
|
|
|
|
|
|
|
|
for (code = 0; code <= tab->mask; code++) {
|
2001-05-06 14:35:20 +00:00
|
|
|
tab->table[code].code = -1;
|
|
|
|
tab->table[code].nbits = 0;
|
|
|
|
tab->table[code].nexttable = NULL;
|
2000-11-01 21:34:21 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < nsyms; i++) {
|
2001-05-06 14:35:20 +00:00
|
|
|
if (lengths[i] <= pfxbits || (codes[i] & pfxmask) != pfx)
|
|
|
|
continue;
|
|
|
|
code = (codes[i] >> pfxbits) & tab->mask;
|
|
|
|
for (j = code; j <= tab->mask; j += 1 << (lengths[i] - pfxbits)) {
|
|
|
|
tab->table[j].code = i;
|
|
|
|
nbits = lengths[i] - pfxbits;
|
|
|
|
if (tab->table[j].nbits < nbits)
|
|
|
|
tab->table[j].nbits = nbits;
|
|
|
|
}
|
2000-11-01 21:34:21 +00:00
|
|
|
}
|
|
|
|
for (code = 0; code <= tab->mask; code++) {
|
2001-05-06 14:35:20 +00:00
|
|
|
if (tab->table[code].nbits <= bits)
|
|
|
|
continue;
|
|
|
|
/* Generate a subtable. */
|
|
|
|
tab->table[code].code = -1;
|
|
|
|
nbits = tab->table[code].nbits - bits;
|
|
|
|
if (nbits > 7)
|
|
|
|
nbits = 7;
|
|
|
|
tab->table[code].nbits = bits;
|
|
|
|
tab->table[code].nexttable = zlib_mkonetab(codes, lengths, nsyms,
|
|
|
|
pfx | (code << pfxbits),
|
|
|
|
pfxbits + bits, nbits);
|
2000-11-01 21:34:21 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return tab;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Build a decode table, given a set of Huffman tree lengths.
|
|
|
|
*/
|
2001-05-06 14:35:20 +00:00
|
|
|
static struct zlib_table *zlib_mktable(unsigned char *lengths,
|
|
|
|
int nlengths)
|
|
|
|
{
|
2000-11-01 21:34:21 +00:00
|
|
|
int count[MAXCODELEN], startcode[MAXCODELEN], codes[MAXSYMS];
|
|
|
|
int code, maxlen;
|
|
|
|
int i, j;
|
|
|
|
|
|
|
|
/* Count the codes of each length. */
|
|
|
|
maxlen = 0;
|
2001-05-06 14:35:20 +00:00
|
|
|
for (i = 1; i < MAXCODELEN; i++)
|
|
|
|
count[i] = 0;
|
2000-11-01 21:34:21 +00:00
|
|
|
for (i = 0; i < nlengths; i++) {
|
2001-05-06 14:35:20 +00:00
|
|
|
count[lengths[i]]++;
|
|
|
|
if (maxlen < lengths[i])
|
|
|
|
maxlen = lengths[i];
|
2000-11-01 21:34:21 +00:00
|
|
|
}
|
|
|
|
/* Determine the starting code for each length block. */
|
|
|
|
code = 0;
|
|
|
|
for (i = 1; i < MAXCODELEN; i++) {
|
2001-05-06 14:35:20 +00:00
|
|
|
startcode[i] = code;
|
|
|
|
code += count[i];
|
|
|
|
code <<= 1;
|
2000-11-01 21:34:21 +00:00
|
|
|
}
|
|
|
|
/* Determine the code for each symbol. Mirrored, of course. */
|
|
|
|
for (i = 0; i < nlengths; i++) {
|
2001-05-06 14:35:20 +00:00
|
|
|
code = startcode[lengths[i]]++;
|
|
|
|
codes[i] = 0;
|
|
|
|
for (j = 0; j < lengths[i]; j++) {
|
|
|
|
codes[i] = (codes[i] << 1) | (code & 1);
|
|
|
|
code >>= 1;
|
|
|
|
}
|
2000-11-01 21:34:21 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Now we have the complete list of Huffman codes. Build a
|
|
|
|
* table.
|
|
|
|
*/
|
|
|
|
return zlib_mkonetab(codes, lengths, nlengths, 0, 0,
|
2001-05-06 14:35:20 +00:00
|
|
|
maxlen < 9 ? maxlen : 9);
|
2000-11-01 21:34:21 +00:00
|
|
|
}
|
|
|
|
|
2001-05-06 14:35:20 +00:00
|
|
|
static int zlib_freetable(struct zlib_table **ztab)
|
|
|
|
{
|
2001-01-31 09:10:18 +00:00
|
|
|
struct zlib_table *tab;
|
|
|
|
int code;
|
|
|
|
|
|
|
|
if (ztab == NULL)
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
if (*ztab == NULL)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
tab = *ztab;
|
|
|
|
|
|
|
|
for (code = 0; code <= tab->mask; code++)
|
|
|
|
if (tab->table[code].nexttable != NULL)
|
|
|
|
zlib_freetable(&tab->table[code].nexttable);
|
|
|
|
|
|
|
|
sfree(tab->table);
|
|
|
|
tab->table = NULL;
|
|
|
|
|
|
|
|
sfree(tab);
|
|
|
|
*ztab = NULL;
|
|
|
|
|
2001-05-06 14:35:20 +00:00
|
|
|
return (0);
|
2001-01-31 09:10:18 +00:00
|
|
|
}
|
|
|
|
|
2002-10-25 13:26:33 +00:00
|
|
|
struct zlib_decompress_ctx {
|
2000-11-01 21:34:21 +00:00
|
|
|
struct zlib_table *staticlentable, *staticdisttable;
|
|
|
|
struct zlib_table *currlentable, *currdisttable, *lenlentable;
|
|
|
|
enum {
|
2001-05-06 14:35:20 +00:00
|
|
|
START, OUTSIDEBLK,
|
|
|
|
TREES_HDR, TREES_LENLEN, TREES_LEN, TREES_LENREP,
|
|
|
|
INBLK, GOTLENSYM, GOTLEN, GOTDISTSYM,
|
|
|
|
UNCOMP_LEN, UNCOMP_NLEN, UNCOMP_DATA
|
2000-11-01 21:34:21 +00:00
|
|
|
} state;
|
2001-05-06 14:35:20 +00:00
|
|
|
int sym, hlit, hdist, hclen, lenptr, lenextrabits, lenaddon, len,
|
|
|
|
lenrep;
|
2000-11-01 21:34:21 +00:00
|
|
|
int uncomplen;
|
|
|
|
unsigned char lenlen[19];
|
2019-04-28 08:59:28 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Array that accumulates the code lengths sent in the header of a
|
|
|
|
* dynamic-Huffman-tree block.
|
|
|
|
*
|
|
|
|
* There are 286 actual symbols in the literal/length alphabet
|
|
|
|
* (256 literals plus 20 length categories), and 30 symbols in the
|
|
|
|
* distance alphabet. However, the block header transmits the
|
|
|
|
* number of code lengths for the former alphabet as a 5-bit value
|
|
|
|
* HLIT to be added to 257, and the latter as a 5-bit value HDIST
|
|
|
|
* to be added to 1. This means that the number of _code lengths_
|
|
|
|
* can go as high as 288 for the symbol alphabet and 32 for the
|
|
|
|
* distance alphabet - each of those values being 2 more than the
|
|
|
|
* maximum number of actual symbols.
|
|
|
|
*
|
|
|
|
* It's tempting to rule that sending out-of-range HLIT or HDIST
|
|
|
|
* is therefore just illegal, and to fault it when we initially
|
|
|
|
* receive that header. But instead I've chosen to permit the
|
|
|
|
* Huffman-code definition to include code length entries for
|
|
|
|
* those unused symbols; if a header of that form is transmitted,
|
|
|
|
* then the effect will be that in the main body of the block,
|
|
|
|
* some bit sequence(s) will generate an illegal symbol number,
|
|
|
|
* and _that_ will be faulted as a decoding error.
|
|
|
|
*
|
|
|
|
* Rationale: this can already happen! The standard Huffman code
|
|
|
|
* used in a _static_ block for the literal/length alphabet is
|
|
|
|
* defined in such a way that it includes codes for symbols 287
|
|
|
|
* and 288, which are then never actually sent in the body of the
|
|
|
|
* block. And I think that if the standard static tree definition
|
|
|
|
* is willing to include Huffman codes that don't correspond to a
|
|
|
|
* symbol, then it's an excessive restriction on dynamic tables
|
|
|
|
* not to permit them to do the same. In particular, it would be
|
|
|
|
* strange for a dynamic block not to be able to exactly mimic
|
|
|
|
* either or both of the Huffman codes used by a static block for
|
|
|
|
* the corresponding alphabet.
|
|
|
|
*
|
|
|
|
* So we place no constraint on HLIT or HDIST during code
|
|
|
|
* construction, and we make this array large enough to include
|
|
|
|
* the maximum number of code lengths that can possibly arise as a
|
|
|
|
* result. It's only trying to _use_ the junk Huffman codes after
|
|
|
|
* table construction is completed that will provoke a decode
|
|
|
|
* error.
|
|
|
|
*/
|
|
|
|
unsigned char lengths[288 + 32];
|
|
|
|
|
2000-11-01 21:34:21 +00:00
|
|
|
unsigned long bits;
|
|
|
|
int nbits;
|
|
|
|
unsigned char window[WINSIZE];
|
|
|
|
int winpos;
|
2019-02-11 06:58:07 +00:00
|
|
|
strbuf *outblk;
|
2018-09-14 08:16:41 +00:00
|
|
|
|
|
|
|
ssh_decompressor dc;
|
2002-10-25 13:26:33 +00:00
|
|
|
};
|
2000-11-01 21:34:21 +00:00
|
|
|
|
2018-09-14 08:16:41 +00:00
|
|
|
ssh_decompressor *zlib_decompress_init(void)
|
2001-05-06 14:35:20 +00:00
|
|
|
{
|
2003-03-29 16:14:26 +00:00
|
|
|
struct zlib_decompress_ctx *dctx = snew(struct zlib_decompress_ctx);
|
2000-11-01 21:34:21 +00:00
|
|
|
unsigned char lengths[288];
|
2002-10-25 13:26:33 +00:00
|
|
|
|
2000-11-01 21:34:21 +00:00
|
|
|
memset(lengths, 8, 144);
|
2001-05-06 14:35:20 +00:00
|
|
|
memset(lengths + 144, 9, 256 - 144);
|
|
|
|
memset(lengths + 256, 7, 280 - 256);
|
|
|
|
memset(lengths + 280, 8, 288 - 280);
|
2002-10-25 13:26:33 +00:00
|
|
|
dctx->staticlentable = zlib_mktable(lengths, 288);
|
2000-11-01 21:34:21 +00:00
|
|
|
memset(lengths, 5, 32);
|
2002-10-25 13:26:33 +00:00
|
|
|
dctx->staticdisttable = zlib_mktable(lengths, 32);
|
|
|
|
dctx->state = START; /* even before header */
|
|
|
|
dctx->currlentable = dctx->currdisttable = dctx->lenlentable = NULL;
|
|
|
|
dctx->bits = 0;
|
|
|
|
dctx->nbits = 0;
|
2002-10-26 12:58:13 +00:00
|
|
|
dctx->winpos = 0;
|
2019-02-11 06:58:07 +00:00
|
|
|
dctx->outblk = NULL;
|
2002-10-25 13:26:33 +00:00
|
|
|
|
2018-09-14 08:16:41 +00:00
|
|
|
dctx->dc.vt = &ssh_zlib;
|
|
|
|
return &dctx->dc;
|
2002-10-25 13:26:33 +00:00
|
|
|
}
|
|
|
|
|
2018-09-14 08:16:41 +00:00
|
|
|
void zlib_decompress_cleanup(ssh_decompressor *dc)
|
2002-10-25 13:26:33 +00:00
|
|
|
{
|
2018-09-14 08:16:41 +00:00
|
|
|
struct zlib_decompress_ctx *dctx =
|
2018-10-05 22:49:08 +00:00
|
|
|
container_of(dc, struct zlib_decompress_ctx, dc);
|
2004-01-18 09:14:41 +00:00
|
|
|
|
2002-10-25 13:26:33 +00:00
|
|
|
if (dctx->currlentable && dctx->currlentable != dctx->staticlentable)
|
|
|
|
zlib_freetable(&dctx->currlentable);
|
|
|
|
if (dctx->currdisttable && dctx->currdisttable != dctx->staticdisttable)
|
|
|
|
zlib_freetable(&dctx->currdisttable);
|
|
|
|
if (dctx->lenlentable)
|
|
|
|
zlib_freetable(&dctx->lenlentable);
|
2004-01-18 09:14:41 +00:00
|
|
|
zlib_freetable(&dctx->staticlentable);
|
|
|
|
zlib_freetable(&dctx->staticdisttable);
|
2019-02-11 06:58:07 +00:00
|
|
|
if (dctx->outblk)
|
|
|
|
strbuf_free(dctx->outblk);
|
2002-10-25 13:26:33 +00:00
|
|
|
sfree(dctx);
|
2000-11-01 21:34:21 +00:00
|
|
|
}
|
|
|
|
|
2003-01-05 23:36:53 +00:00
|
|
|
static int zlib_huflookup(unsigned long *bitsp, int *nbitsp,
|
2001-05-06 14:35:20 +00:00
|
|
|
struct zlib_table *tab)
|
|
|
|
{
|
2000-11-01 21:34:21 +00:00
|
|
|
unsigned long bits = *bitsp;
|
|
|
|
int nbits = *nbitsp;
|
|
|
|
while (1) {
|
2001-05-06 14:35:20 +00:00
|
|
|
struct zlib_tableentry *ent;
|
|
|
|
ent = &tab->table[bits & tab->mask];
|
|
|
|
if (ent->nbits > nbits)
|
|
|
|
return -1; /* not enough data */
|
|
|
|
bits >>= ent->nbits;
|
|
|
|
nbits -= ent->nbits;
|
|
|
|
if (ent->code == -1)
|
|
|
|
tab = ent->nexttable;
|
|
|
|
else {
|
|
|
|
*bitsp = bits;
|
|
|
|
*nbitsp = nbits;
|
|
|
|
return ent->code;
|
|
|
|
}
|
2003-06-26 13:41:30 +00:00
|
|
|
|
|
|
|
if (!tab) {
|
|
|
|
/*
|
|
|
|
* There was a missing entry in the table, presumably
|
|
|
|
* due to an invalid Huffman table description, and the
|
|
|
|
* subsequent data has attempted to use the missing
|
|
|
|
* entry. Return a decoding failure.
|
|
|
|
*/
|
|
|
|
return -2;
|
|
|
|
}
|
2000-11-01 21:34:21 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2002-10-25 13:26:33 +00:00
|
|
|
static void zlib_emit_char(struct zlib_decompress_ctx *dctx, int c)
|
2001-05-06 14:35:20 +00:00
|
|
|
{
|
2002-10-25 13:26:33 +00:00
|
|
|
dctx->window[dctx->winpos] = c;
|
|
|
|
dctx->winpos = (dctx->winpos + 1) & (WINSIZE - 1);
|
2019-02-11 06:58:07 +00:00
|
|
|
put_byte(dctx->outblk, c);
|
2000-11-01 21:34:21 +00:00
|
|
|
}
|
|
|
|
|
2002-10-25 13:26:33 +00:00
|
|
|
#define EATBITS(n) ( dctx->nbits -= (n), dctx->bits >>= (n) )
|
2000-11-01 21:34:21 +00:00
|
|
|
|
2018-11-27 19:22:02 +00:00
|
|
|
bool zlib_decompress_block(ssh_decompressor *dc,
|
|
|
|
const unsigned char *block, int len,
|
Convert a lot of 'int' variables to 'bool'.
My normal habit these days, in new code, is to treat int and bool as
_almost_ completely separate types. I'm still willing to use C's
implicit test for zero on an integer (e.g. 'if (!blob.len)' is fine,
no need to spell it out as blob.len != 0), but generally, if a
variable is going to be conceptually a boolean, I like to declare it
bool and assign to it using 'true' or 'false' rather than 0 or 1.
PuTTY is an exception, because it predates the C99 bool, and I've
stuck to its existing coding style even when adding new code to it.
But it's been annoying me more and more, so now that I've decided C99
bool is an acceptable thing to require from our toolchain in the first
place, here's a quite thorough trawl through the source doing
'boolification'. Many variables and function parameters are now typed
as bool rather than int; many assignments of 0 or 1 to those variables
are now spelled 'true' or 'false'.
I managed this thorough conversion with the help of a custom clang
plugin that I wrote to trawl the AST and apply heuristics to point out
where things might want changing. So I've even managed to do a decent
job on parts of the code I haven't looked at in years!
To make the plugin's work easier, I pushed platform front ends
generally in the direction of using standard 'bool' in preference to
platform-specific boolean types like Windows BOOL or GTK's gboolean;
I've left the platform booleans in places they _have_ to be for the
platform APIs to work right, but variables only used by my own code
have been converted wherever I found them.
In a few places there are int values that look very like booleans in
_most_ of the places they're used, but have a rarely-used third value,
or a distinction between different nonzero values that most users
don't care about. In these cases, I've _removed_ uses of 'true' and
'false' for the return values, to emphasise that there's something
more subtle going on than a simple boolean answer:
- the 'multisel' field in dialog.h's list box structure, for which
the GTK front end in particular recognises a difference between 1
and 2 but nearly everything else treats as boolean
- the 'urgent' parameter to plug_receive, where 1 vs 2 tells you
something about the specific location of the urgent pointer, but
most clients only care about 0 vs 'something nonzero'
- the return value of wc_match, where -1 indicates a syntax error in
the wildcard.
- the return values from SSH-1 RSA-key loading functions, which use
-1 for 'wrong passphrase' and 0 for all other failures (so any
caller which already knows it's not loading an _encrypted private_
key can treat them as boolean)
- term->esc_query, and the 'query' parameter in toggle_mode in
terminal.c, which _usually_ hold 0 for ESC[123h or 1 for ESC[?123h,
but can also hold -1 for some other intervening character that we
don't support.
In a few places there's an integer that I haven't turned into a bool
even though it really _can_ only take values 0 or 1 (and, as above,
tried to make the call sites consistent in not calling those values
true and false), on the grounds that I thought it would make it more
confusing to imply that the 0 value was in some sense 'negative' or
bad and the 1 positive or good:
- the return value of plug_accepting uses the POSIXish convention of
0=success and nonzero=error; I think if I made it bool then I'd
also want to reverse its sense, and that's a job for a separate
piece of work.
- the 'screen' parameter to lineptr() in terminal.c, where 0 and 1
represent the default and alternate screens. There's no obvious
reason why one of those should be considered 'true' or 'positive'
or 'success' - they're just indices - so I've left it as int.
ssh_scp_recv had particularly confusing semantics for its previous int
return value: its call sites used '<= 0' to check for error, but it
never actually returned a negative number, just 0 or 1. Now the
function and its call sites agree that it's a bool.
In a couple of places I've renamed variables called 'ret', because I
don't like that name any more - it's unclear whether it means the
return value (in preparation) for the _containing_ function or the
return value received from a subroutine call, and occasionally I've
accidentally used the same variable for both and introduced a bug. So
where one of those got in my way, I've renamed it to 'toret' or 'retd'
(the latter short for 'returned') in line with my usual modern
practice, but I haven't done a thorough job of finding all of them.
Finally, one amusing side effect of doing this is that I've had to
separate quite a few chained assignments. It used to be perfectly fine
to write 'a = b = c = TRUE' when a,b,c were int and TRUE was just a
the 'true' defined by stdbool.h, that idiom provokes a warning from
gcc: 'suggest parentheses around assignment used as truth value'!
2018-11-02 19:23:19 +00:00
|
|
|
unsigned char **outblock, int *outlen)
|
2001-05-06 14:35:20 +00:00
|
|
|
{
|
2018-09-14 08:16:41 +00:00
|
|
|
struct zlib_decompress_ctx *dctx =
|
2018-10-05 22:49:08 +00:00
|
|
|
container_of(dc, struct zlib_decompress_ctx, dc);
|
2000-11-01 21:34:21 +00:00
|
|
|
const coderecord *rec;
|
2004-10-21 10:59:53 +00:00
|
|
|
int code, blktype, rep, dist, nlen, header;
|
2000-11-01 21:34:21 +00:00
|
|
|
static const unsigned char lenlenmap[] = {
|
2001-05-06 14:35:20 +00:00
|
|
|
16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
|
2000-11-01 21:34:21 +00:00
|
|
|
};
|
|
|
|
|
2019-02-11 06:58:07 +00:00
|
|
|
assert(!dctx->outblk);
|
2019-03-01 19:28:00 +00:00
|
|
|
dctx->outblk = strbuf_new_nm();
|
2000-11-01 21:34:21 +00:00
|
|
|
|
2002-10-25 13:26:33 +00:00
|
|
|
while (len > 0 || dctx->nbits > 0) {
|
|
|
|
while (dctx->nbits < 24 && len > 0) {
|
|
|
|
dctx->bits |= (*block++) << dctx->nbits;
|
|
|
|
dctx->nbits += 8;
|
2001-05-06 14:35:20 +00:00
|
|
|
len--;
|
|
|
|
}
|
2002-10-25 13:26:33 +00:00
|
|
|
switch (dctx->state) {
|
2001-05-06 14:35:20 +00:00
|
|
|
case START:
|
2004-10-21 10:59:53 +00:00
|
|
|
/* Expect 16-bit zlib header. */
|
2002-10-25 13:26:33 +00:00
|
|
|
if (dctx->nbits < 16)
|
2001-05-06 14:35:20 +00:00
|
|
|
goto finished; /* done all we can */
|
2004-10-21 10:59:53 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* The header is stored as a big-endian 16-bit integer,
|
|
|
|
* in contrast to the general little-endian policy in
|
|
|
|
* the rest of the format :-(
|
|
|
|
*/
|
|
|
|
header = (((dctx->bits & 0xFF00) >> 8) |
|
|
|
|
((dctx->bits & 0x00FF) << 8));
|
|
|
|
EATBITS(16);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Check the header:
|
|
|
|
*
|
|
|
|
* - bits 8-11 should be 1000 (Deflate/RFC1951)
|
|
|
|
* - bits 12-15 should be at most 0111 (window size)
|
|
|
|
* - bit 5 should be zero (no dictionary present)
|
|
|
|
* - we don't care about bits 6-7 (compression rate)
|
|
|
|
* - bits 0-4 should be set up to make the whole thing
|
|
|
|
* a multiple of 31 (checksum).
|
|
|
|
*/
|
|
|
|
if ((header & 0x0F00) != 0x0800 ||
|
|
|
|
(header & 0xF000) > 0x7000 ||
|
|
|
|
(header & 0x0020) != 0x0000 ||
|
|
|
|
(header % 31) != 0)
|
|
|
|
goto decode_error;
|
|
|
|
|
2002-10-25 13:26:33 +00:00
|
|
|
dctx->state = OUTSIDEBLK;
|
2001-05-06 14:35:20 +00:00
|
|
|
break;
|
|
|
|
case OUTSIDEBLK:
|
|
|
|
/* Expect 3-bit block header. */
|
2002-10-25 13:26:33 +00:00
|
|
|
if (dctx->nbits < 3)
|
2001-05-06 14:35:20 +00:00
|
|
|
goto finished; /* done all we can */
|
|
|
|
EATBITS(1);
|
2002-10-25 13:26:33 +00:00
|
|
|
blktype = dctx->bits & 3;
|
2001-05-06 14:35:20 +00:00
|
|
|
EATBITS(2);
|
|
|
|
if (blktype == 0) {
|
2002-10-25 13:26:33 +00:00
|
|
|
int to_eat = dctx->nbits & 7;
|
|
|
|
dctx->state = UNCOMP_LEN;
|
2000-11-01 21:34:21 +00:00
|
|
|
EATBITS(to_eat); /* align to byte boundary */
|
2001-05-06 14:35:20 +00:00
|
|
|
} else if (blktype == 1) {
|
2002-10-25 13:26:33 +00:00
|
|
|
dctx->currlentable = dctx->staticlentable;
|
|
|
|
dctx->currdisttable = dctx->staticdisttable;
|
|
|
|
dctx->state = INBLK;
|
2001-05-06 14:35:20 +00:00
|
|
|
} else if (blktype == 2) {
|
2002-10-25 13:26:33 +00:00
|
|
|
dctx->state = TREES_HDR;
|
2001-05-06 14:35:20 +00:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
case TREES_HDR:
|
|
|
|
/*
|
|
|
|
* Dynamic block header. Five bits of HLIT, five of
|
|
|
|
* HDIST, four of HCLEN.
|
|
|
|
*/
|
2002-10-25 13:26:33 +00:00
|
|
|
if (dctx->nbits < 5 + 5 + 4)
|
2001-05-06 14:35:20 +00:00
|
|
|
goto finished; /* done all we can */
|
2002-10-25 13:26:33 +00:00
|
|
|
dctx->hlit = 257 + (dctx->bits & 31);
|
2001-05-06 14:35:20 +00:00
|
|
|
EATBITS(5);
|
2002-10-25 13:26:33 +00:00
|
|
|
dctx->hdist = 1 + (dctx->bits & 31);
|
2001-05-06 14:35:20 +00:00
|
|
|
EATBITS(5);
|
2002-10-25 13:26:33 +00:00
|
|
|
dctx->hclen = 4 + (dctx->bits & 15);
|
2001-05-06 14:35:20 +00:00
|
|
|
EATBITS(4);
|
2002-10-25 13:26:33 +00:00
|
|
|
dctx->lenptr = 0;
|
|
|
|
dctx->state = TREES_LENLEN;
|
|
|
|
memset(dctx->lenlen, 0, sizeof(dctx->lenlen));
|
2001-05-06 14:35:20 +00:00
|
|
|
break;
|
|
|
|
case TREES_LENLEN:
|
2002-10-25 13:26:33 +00:00
|
|
|
if (dctx->nbits < 3)
|
2001-05-06 14:35:20 +00:00
|
|
|
goto finished;
|
2002-10-25 13:26:33 +00:00
|
|
|
while (dctx->lenptr < dctx->hclen && dctx->nbits >= 3) {
|
|
|
|
dctx->lenlen[lenlenmap[dctx->lenptr++]] =
|
|
|
|
(unsigned char) (dctx->bits & 7);
|
2001-05-06 14:35:20 +00:00
|
|
|
EATBITS(3);
|
|
|
|
}
|
2002-10-25 13:26:33 +00:00
|
|
|
if (dctx->lenptr == dctx->hclen) {
|
|
|
|
dctx->lenlentable = zlib_mktable(dctx->lenlen, 19);
|
|
|
|
dctx->state = TREES_LEN;
|
|
|
|
dctx->lenptr = 0;
|
2001-05-06 14:35:20 +00:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
case TREES_LEN:
|
2002-10-25 13:26:33 +00:00
|
|
|
if (dctx->lenptr >= dctx->hlit + dctx->hdist) {
|
|
|
|
dctx->currlentable = zlib_mktable(dctx->lengths, dctx->hlit);
|
|
|
|
dctx->currdisttable = zlib_mktable(dctx->lengths + dctx->hlit,
|
|
|
|
dctx->hdist);
|
|
|
|
zlib_freetable(&dctx->lenlentable);
|
|
|
|
dctx->lenlentable = NULL;
|
|
|
|
dctx->state = INBLK;
|
2001-05-06 14:35:20 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
code =
|
2002-10-25 13:26:33 +00:00
|
|
|
zlib_huflookup(&dctx->bits, &dctx->nbits, dctx->lenlentable);
|
2001-05-06 14:35:20 +00:00
|
|
|
if (code == -1)
|
|
|
|
goto finished;
|
2003-06-26 13:41:30 +00:00
|
|
|
if (code == -2)
|
|
|
|
goto decode_error;
|
2001-05-06 14:35:20 +00:00
|
|
|
if (code < 16)
|
2002-10-25 13:26:33 +00:00
|
|
|
dctx->lengths[dctx->lenptr++] = code;
|
2001-05-06 14:35:20 +00:00
|
|
|
else {
|
2002-10-25 13:26:33 +00:00
|
|
|
dctx->lenextrabits = (code == 16 ? 2 : code == 17 ? 3 : 7);
|
|
|
|
dctx->lenaddon = (code == 18 ? 11 : 3);
|
|
|
|
dctx->lenrep = (code == 16 && dctx->lenptr > 0 ?
|
|
|
|
dctx->lengths[dctx->lenptr - 1] : 0);
|
|
|
|
dctx->state = TREES_LENREP;
|
2001-05-06 14:35:20 +00:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
case TREES_LENREP:
|
2002-10-25 13:26:33 +00:00
|
|
|
if (dctx->nbits < dctx->lenextrabits)
|
2001-05-06 14:35:20 +00:00
|
|
|
goto finished;
|
|
|
|
rep =
|
2002-10-25 13:26:33 +00:00
|
|
|
dctx->lenaddon +
|
|
|
|
(dctx->bits & ((1 << dctx->lenextrabits) - 1));
|
|
|
|
EATBITS(dctx->lenextrabits);
|
|
|
|
while (rep > 0 && dctx->lenptr < dctx->hlit + dctx->hdist) {
|
|
|
|
dctx->lengths[dctx->lenptr] = dctx->lenrep;
|
|
|
|
dctx->lenptr++;
|
2001-05-06 14:35:20 +00:00
|
|
|
rep--;
|
|
|
|
}
|
2002-10-25 13:26:33 +00:00
|
|
|
dctx->state = TREES_LEN;
|
2001-05-06 14:35:20 +00:00
|
|
|
break;
|
|
|
|
case INBLK:
|
|
|
|
code =
|
2002-10-25 13:26:33 +00:00
|
|
|
zlib_huflookup(&dctx->bits, &dctx->nbits, dctx->currlentable);
|
2001-05-06 14:35:20 +00:00
|
|
|
if (code == -1)
|
|
|
|
goto finished;
|
2003-06-26 13:41:30 +00:00
|
|
|
if (code == -2)
|
|
|
|
goto decode_error;
|
2001-05-06 14:35:20 +00:00
|
|
|
if (code < 256)
|
2002-10-25 13:26:33 +00:00
|
|
|
zlib_emit_char(dctx, code);
|
2001-05-06 14:35:20 +00:00
|
|
|
else if (code == 256) {
|
2002-10-25 13:26:33 +00:00
|
|
|
dctx->state = OUTSIDEBLK;
|
|
|
|
if (dctx->currlentable != dctx->staticlentable) {
|
|
|
|
zlib_freetable(&dctx->currlentable);
|
|
|
|
dctx->currlentable = NULL;
|
|
|
|
}
|
|
|
|
if (dctx->currdisttable != dctx->staticdisttable) {
|
|
|
|
zlib_freetable(&dctx->currdisttable);
|
|
|
|
dctx->currdisttable = NULL;
|
|
|
|
}
|
2019-04-28 09:02:17 +00:00
|
|
|
} else if (code < 286) {
|
2002-10-25 13:26:33 +00:00
|
|
|
dctx->state = GOTLENSYM;
|
|
|
|
dctx->sym = code;
|
2019-04-28 09:02:17 +00:00
|
|
|
} else {
|
|
|
|
/* literal/length symbols 286 and 287 are invalid */
|
|
|
|
goto decode_error;
|
|
|
|
}
|
2001-05-06 14:35:20 +00:00
|
|
|
break;
|
|
|
|
case GOTLENSYM:
|
2002-10-25 13:26:33 +00:00
|
|
|
rec = &lencodes[dctx->sym - 257];
|
|
|
|
if (dctx->nbits < rec->extrabits)
|
2001-05-06 14:35:20 +00:00
|
|
|
goto finished;
|
2002-10-25 13:26:33 +00:00
|
|
|
dctx->len =
|
|
|
|
rec->min + (dctx->bits & ((1 << rec->extrabits) - 1));
|
2001-05-06 14:35:20 +00:00
|
|
|
EATBITS(rec->extrabits);
|
2002-10-25 13:26:33 +00:00
|
|
|
dctx->state = GOTLEN;
|
2001-05-06 14:35:20 +00:00
|
|
|
break;
|
|
|
|
case GOTLEN:
|
|
|
|
code =
|
2002-10-25 13:26:33 +00:00
|
|
|
zlib_huflookup(&dctx->bits, &dctx->nbits,
|
|
|
|
dctx->currdisttable);
|
2001-05-06 14:35:20 +00:00
|
|
|
if (code == -1)
|
|
|
|
goto finished;
|
2003-06-26 13:41:30 +00:00
|
|
|
if (code == -2)
|
|
|
|
goto decode_error;
|
Add a missing bounds check in the Deflate decompressor.
The symbol alphabet used for encoding ranges of backward distances in
a Deflate compressed block contains 32 symbol values, but two of them
(symbols 30 and 31) have no meaning, and hence it is an encoding error
for them to appear in a compressed block. If a compressed file did so
anyway, this decompressor would index past the end of the distcodes[]
array. Oops.
This is clearly a bug, but I don't believe it's a vulnerability. The
nonsense record we load from distcodes[] in this situation contains an
indeterminate bogus value for 'extrabits' (how many more bits to read
from the input stream to complete the backward distance) and also for
the offset to add to the backward distance after that. But neither of
these can lead to a buffer overflow: if extrabits is so big that
dctx->nbits (which is capped at 32) never exceeds it, then the
decompressor will simply swallow all further data without producing
any output, and otherwise the decompressor will consume _some_ number
of spare bits from the input, work out a backward distance and an
offset in the sliding window which will be utter nonsense and probably
out of bounds, but fortunately will then AND the offset with 0x7FFF at
the last minute, which makes it safe again. So I think the worst that
a malicious compressor can do is to cause the decompressor to generate
strange data, which of course it could do anyway if it wanted to by
sending that same data legally compressed.
[originally from svn r10278]
2014-10-01 18:33:45 +00:00
|
|
|
if (code >= 30) /* dist symbols 30 and 31 are invalid */
|
|
|
|
goto decode_error;
|
2002-10-25 13:26:33 +00:00
|
|
|
dctx->state = GOTDISTSYM;
|
|
|
|
dctx->sym = code;
|
2001-05-06 14:35:20 +00:00
|
|
|
break;
|
|
|
|
case GOTDISTSYM:
|
2002-10-25 13:26:33 +00:00
|
|
|
rec = &distcodes[dctx->sym];
|
|
|
|
if (dctx->nbits < rec->extrabits)
|
2001-05-06 14:35:20 +00:00
|
|
|
goto finished;
|
2002-10-25 13:26:33 +00:00
|
|
|
dist = rec->min + (dctx->bits & ((1 << rec->extrabits) - 1));
|
2001-05-06 14:35:20 +00:00
|
|
|
EATBITS(rec->extrabits);
|
2002-10-25 13:26:33 +00:00
|
|
|
dctx->state = INBLK;
|
|
|
|
while (dctx->len--)
|
|
|
|
zlib_emit_char(dctx, dctx->window[(dctx->winpos - dist) &
|
|
|
|
(WINSIZE - 1)]);
|
2000-11-01 21:34:21 +00:00
|
|
|
break;
|
|
|
|
case UNCOMP_LEN:
|
|
|
|
/*
|
|
|
|
* Uncompressed block. We expect to see a 16-bit LEN.
|
|
|
|
*/
|
2002-10-25 13:26:33 +00:00
|
|
|
if (dctx->nbits < 16)
|
2000-11-01 21:34:21 +00:00
|
|
|
goto finished;
|
2002-10-25 13:26:33 +00:00
|
|
|
dctx->uncomplen = dctx->bits & 0xFFFF;
|
2000-11-01 21:34:21 +00:00
|
|
|
EATBITS(16);
|
2002-10-25 13:26:33 +00:00
|
|
|
dctx->state = UNCOMP_NLEN;
|
2000-11-01 21:34:21 +00:00
|
|
|
break;
|
|
|
|
case UNCOMP_NLEN:
|
|
|
|
/*
|
|
|
|
* Uncompressed block. We expect to see a 16-bit NLEN,
|
|
|
|
* which should be the one's complement of the previous
|
|
|
|
* LEN.
|
|
|
|
*/
|
2002-10-25 13:26:33 +00:00
|
|
|
if (dctx->nbits < 16)
|
2000-11-01 21:34:21 +00:00
|
|
|
goto finished;
|
2002-10-25 13:26:33 +00:00
|
|
|
nlen = dctx->bits & 0xFFFF;
|
2000-11-01 21:34:21 +00:00
|
|
|
EATBITS(16);
|
2011-03-01 23:44:06 +00:00
|
|
|
if (dctx->uncomplen != (nlen ^ 0xFFFF))
|
|
|
|
goto decode_error;
|
2002-10-25 13:26:33 +00:00
|
|
|
if (dctx->uncomplen == 0)
|
|
|
|
dctx->state = OUTSIDEBLK; /* block is empty */
|
2002-08-08 17:03:58 +00:00
|
|
|
else
|
2002-10-25 13:26:33 +00:00
|
|
|
dctx->state = UNCOMP_DATA;
|
2000-11-01 21:34:21 +00:00
|
|
|
break;
|
|
|
|
case UNCOMP_DATA:
|
2002-10-25 13:26:33 +00:00
|
|
|
if (dctx->nbits < 8)
|
2000-11-01 21:34:21 +00:00
|
|
|
goto finished;
|
2002-10-25 13:26:33 +00:00
|
|
|
zlib_emit_char(dctx, dctx->bits & 0xFF);
|
2000-11-01 21:34:21 +00:00
|
|
|
EATBITS(8);
|
2002-10-25 13:26:33 +00:00
|
|
|
if (--dctx->uncomplen == 0)
|
|
|
|
dctx->state = OUTSIDEBLK; /* end of uncompressed block */
|
2000-11-01 21:34:21 +00:00
|
|
|
break;
|
2001-05-06 14:35:20 +00:00
|
|
|
}
|
2000-11-01 21:34:21 +00:00
|
|
|
}
|
|
|
|
|
2001-05-06 14:35:20 +00:00
|
|
|
finished:
|
2019-02-11 06:58:07 +00:00
|
|
|
*outlen = dctx->outblk->len;
|
|
|
|
*outblock = (unsigned char *)strbuf_to_str(dctx->outblk);
|
|
|
|
dctx->outblk = NULL;
|
Convert a lot of 'int' variables to 'bool'.
My normal habit these days, in new code, is to treat int and bool as
_almost_ completely separate types. I'm still willing to use C's
implicit test for zero on an integer (e.g. 'if (!blob.len)' is fine,
no need to spell it out as blob.len != 0), but generally, if a
variable is going to be conceptually a boolean, I like to declare it
bool and assign to it using 'true' or 'false' rather than 0 or 1.
PuTTY is an exception, because it predates the C99 bool, and I've
stuck to its existing coding style even when adding new code to it.
But it's been annoying me more and more, so now that I've decided C99
bool is an acceptable thing to require from our toolchain in the first
place, here's a quite thorough trawl through the source doing
'boolification'. Many variables and function parameters are now typed
as bool rather than int; many assignments of 0 or 1 to those variables
are now spelled 'true' or 'false'.
I managed this thorough conversion with the help of a custom clang
plugin that I wrote to trawl the AST and apply heuristics to point out
where things might want changing. So I've even managed to do a decent
job on parts of the code I haven't looked at in years!
To make the plugin's work easier, I pushed platform front ends
generally in the direction of using standard 'bool' in preference to
platform-specific boolean types like Windows BOOL or GTK's gboolean;
I've left the platform booleans in places they _have_ to be for the
platform APIs to work right, but variables only used by my own code
have been converted wherever I found them.
In a few places there are int values that look very like booleans in
_most_ of the places they're used, but have a rarely-used third value,
or a distinction between different nonzero values that most users
don't care about. In these cases, I've _removed_ uses of 'true' and
'false' for the return values, to emphasise that there's something
more subtle going on than a simple boolean answer:
- the 'multisel' field in dialog.h's list box structure, for which
the GTK front end in particular recognises a difference between 1
and 2 but nearly everything else treats as boolean
- the 'urgent' parameter to plug_receive, where 1 vs 2 tells you
something about the specific location of the urgent pointer, but
most clients only care about 0 vs 'something nonzero'
- the return value of wc_match, where -1 indicates a syntax error in
the wildcard.
- the return values from SSH-1 RSA-key loading functions, which use
-1 for 'wrong passphrase' and 0 for all other failures (so any
caller which already knows it's not loading an _encrypted private_
key can treat them as boolean)
- term->esc_query, and the 'query' parameter in toggle_mode in
terminal.c, which _usually_ hold 0 for ESC[123h or 1 for ESC[?123h,
but can also hold -1 for some other intervening character that we
don't support.
In a few places there's an integer that I haven't turned into a bool
even though it really _can_ only take values 0 or 1 (and, as above,
tried to make the call sites consistent in not calling those values
true and false), on the grounds that I thought it would make it more
confusing to imply that the 0 value was in some sense 'negative' or
bad and the 1 positive or good:
- the return value of plug_accepting uses the POSIXish convention of
0=success and nonzero=error; I think if I made it bool then I'd
also want to reverse its sense, and that's a job for a separate
piece of work.
- the 'screen' parameter to lineptr() in terminal.c, where 0 and 1
represent the default and alternate screens. There's no obvious
reason why one of those should be considered 'true' or 'positive'
or 'success' - they're just indices - so I've left it as int.
ssh_scp_recv had particularly confusing semantics for its previous int
return value: its call sites used '<= 0' to check for error, but it
never actually returned a negative number, just 0 or 1. Now the
function and its call sites agree that it's a bool.
In a couple of places I've renamed variables called 'ret', because I
don't like that name any more - it's unclear whether it means the
return value (in preparation) for the _containing_ function or the
return value received from a subroutine call, and occasionally I've
accidentally used the same variable for both and introduced a bug. So
where one of those got in my way, I've renamed it to 'toret' or 'retd'
(the latter short for 'returned') in line with my usual modern
practice, but I haven't done a thorough job of finding all of them.
Finally, one amusing side effect of doing this is that I've had to
separate quite a few chained assignments. It used to be perfectly fine
to write 'a = b = c = TRUE' when a,b,c were int and TRUE was just a
the 'true' defined by stdbool.h, that idiom provokes a warning from
gcc: 'suggest parentheses around assignment used as truth value'!
2018-11-02 19:23:19 +00:00
|
|
|
return true;
|
2003-06-26 13:41:30 +00:00
|
|
|
|
|
|
|
decode_error:
|
2019-02-11 06:58:07 +00:00
|
|
|
*outblock = NULL;
|
2003-06-26 13:41:30 +00:00
|
|
|
*outlen = 0;
|
Convert a lot of 'int' variables to 'bool'.
My normal habit these days, in new code, is to treat int and bool as
_almost_ completely separate types. I'm still willing to use C's
implicit test for zero on an integer (e.g. 'if (!blob.len)' is fine,
no need to spell it out as blob.len != 0), but generally, if a
variable is going to be conceptually a boolean, I like to declare it
bool and assign to it using 'true' or 'false' rather than 0 or 1.
PuTTY is an exception, because it predates the C99 bool, and I've
stuck to its existing coding style even when adding new code to it.
But it's been annoying me more and more, so now that I've decided C99
bool is an acceptable thing to require from our toolchain in the first
place, here's a quite thorough trawl through the source doing
'boolification'. Many variables and function parameters are now typed
as bool rather than int; many assignments of 0 or 1 to those variables
are now spelled 'true' or 'false'.
I managed this thorough conversion with the help of a custom clang
plugin that I wrote to trawl the AST and apply heuristics to point out
where things might want changing. So I've even managed to do a decent
job on parts of the code I haven't looked at in years!
To make the plugin's work easier, I pushed platform front ends
generally in the direction of using standard 'bool' in preference to
platform-specific boolean types like Windows BOOL or GTK's gboolean;
I've left the platform booleans in places they _have_ to be for the
platform APIs to work right, but variables only used by my own code
have been converted wherever I found them.
In a few places there are int values that look very like booleans in
_most_ of the places they're used, but have a rarely-used third value,
or a distinction between different nonzero values that most users
don't care about. In these cases, I've _removed_ uses of 'true' and
'false' for the return values, to emphasise that there's something
more subtle going on than a simple boolean answer:
- the 'multisel' field in dialog.h's list box structure, for which
the GTK front end in particular recognises a difference between 1
and 2 but nearly everything else treats as boolean
- the 'urgent' parameter to plug_receive, where 1 vs 2 tells you
something about the specific location of the urgent pointer, but
most clients only care about 0 vs 'something nonzero'
- the return value of wc_match, where -1 indicates a syntax error in
the wildcard.
- the return values from SSH-1 RSA-key loading functions, which use
-1 for 'wrong passphrase' and 0 for all other failures (so any
caller which already knows it's not loading an _encrypted private_
key can treat them as boolean)
- term->esc_query, and the 'query' parameter in toggle_mode in
terminal.c, which _usually_ hold 0 for ESC[123h or 1 for ESC[?123h,
but can also hold -1 for some other intervening character that we
don't support.
In a few places there's an integer that I haven't turned into a bool
even though it really _can_ only take values 0 or 1 (and, as above,
tried to make the call sites consistent in not calling those values
true and false), on the grounds that I thought it would make it more
confusing to imply that the 0 value was in some sense 'negative' or
bad and the 1 positive or good:
- the return value of plug_accepting uses the POSIXish convention of
0=success and nonzero=error; I think if I made it bool then I'd
also want to reverse its sense, and that's a job for a separate
piece of work.
- the 'screen' parameter to lineptr() in terminal.c, where 0 and 1
represent the default and alternate screens. There's no obvious
reason why one of those should be considered 'true' or 'positive'
or 'success' - they're just indices - so I've left it as int.
ssh_scp_recv had particularly confusing semantics for its previous int
return value: its call sites used '<= 0' to check for error, but it
never actually returned a negative number, just 0 or 1. Now the
function and its call sites agree that it's a bool.
In a couple of places I've renamed variables called 'ret', because I
don't like that name any more - it's unclear whether it means the
return value (in preparation) for the _containing_ function or the
return value received from a subroutine call, and occasionally I've
accidentally used the same variable for both and introduced a bug. So
where one of those got in my way, I've renamed it to 'toret' or 'retd'
(the latter short for 'returned') in line with my usual modern
practice, but I haven't done a thorough job of finding all of them.
Finally, one amusing side effect of doing this is that I've had to
separate quite a few chained assignments. It used to be perfectly fine
to write 'a = b = c = TRUE' when a,b,c were int and TRUE was just a
the 'true' defined by stdbool.h, that idiom provokes a warning from
gcc: 'suggest parentheses around assignment used as truth value'!
2018-11-02 19:23:19 +00:00
|
|
|
return false;
|
2000-11-01 21:34:21 +00:00
|
|
|
}
|
|
|
|
|
2019-01-04 06:51:44 +00:00
|
|
|
const ssh_compression_alg ssh_zlib = {
|
2000-11-01 21:34:21 +00:00
|
|
|
"zlib",
|
2011-03-04 22:34:47 +00:00
|
|
|
"zlib@openssh.com", /* delayed version */
|
2000-11-01 21:34:21 +00:00
|
|
|
zlib_compress_init,
|
2002-10-25 13:26:33 +00:00
|
|
|
zlib_compress_cleanup,
|
2000-11-01 21:34:21 +00:00
|
|
|
zlib_compress_block,
|
|
|
|
zlib_decompress_init,
|
2002-10-25 13:26:33 +00:00
|
|
|
zlib_decompress_cleanup,
|
2001-03-05 16:38:42 +00:00
|
|
|
zlib_decompress_block,
|
2002-10-25 13:26:33 +00:00
|
|
|
"zlib (RFC1950)"
|
2000-11-01 21:34:21 +00:00
|
|
|
};
|