Initial script (text) format support

See #37 for details.
This commit is contained in:
Michał Trojnara 2024-02-12 10:54:18 +01:00
parent 1bdcad619e
commit 4576895718
19 changed files with 1366 additions and 36 deletions

View File

@ -48,7 +48,7 @@ configure_file(Config.h.in config.h)
target_compile_definitions(osslsigncode PRIVATE HAVE_CONFIG_H=1)
# set sources
target_sources(osslsigncode PRIVATE osslsigncode.c helpers.c msi.c pe.c cab.c cat.c appx.c)
target_sources(osslsigncode PRIVATE osslsigncode.c helpers.c utf.c msi.c pe.c cab.c cat.c appx.c script.c)
if(NOT UNIX)
target_sources(osslsigncode PRIVATE applink.c)
endif(NOT UNIX)

8
appx.c
View File

@ -547,6 +547,8 @@ static PKCS7 *appx_pkcs7_extract(FILE_FORMAT_CTX *ctx)
*/
static int appx_remove_pkcs7(FILE_FORMAT_CTX *ctx, BIO *hash, BIO *outdata)
{
uint8_t *data = NULL;
size_t dataSize;
uint64_t cdOffset, noEntries = 0;
ZIP_FILE *zip = ctx->appx_ctx->zip;
ZIP_CENTRAL_DIRECTORY_ENTRY *entry = zipGetCDEntryByName(zip, CONTENT_TYPES_FILENAME);
@ -558,6 +560,12 @@ static int appx_remove_pkcs7(FILE_FORMAT_CTX *ctx, BIO *hash, BIO *outdata)
printf("Not a valid .appx file: content types file missing\n");
return 1; /* FAILED */
}
/* read signature data */
dataSize = zipReadFileDataByName(&data, ctx->appx_ctx->zip, APP_SIGNATURE_FILENAME);
if (dataSize <= 0) {
return 1; /* FAILED, no signature */
}
OPENSSL_free(data);
if (!appx_remove_ct_signature_entry(zip, entry)) {
printf("Failed to remove signature entry\n");
return 1; /* FAILED */

7
cab.c
View File

@ -427,11 +427,16 @@ static int cab_remove_pkcs7(FILE_FORMAT_CTX *ctx, BIO *hash, BIO *outdata)
size_t i, written, len;
uint32_t tmp;
uint16_t nfolders, flags;
char *buf = OPENSSL_malloc(SIZE_64K);
char *buf;
/* squash the unused parameter warning */
(void)hash;
if (ctx->cab_ctx->sigpos == 0 || ctx->cab_ctx->siglen == 0
|| ctx->cab_ctx->sigpos > ctx->cab_ctx->fileend) {
return 1; /* FAILED, no signature */
}
buf = OPENSSL_malloc(SIZE_64K);
/*
* u1 signature[4] 4643534D MSCF: 0-3
* u4 reserved1 00000000: 4-7

View File

@ -129,9 +129,9 @@ string(SUBSTRING ${sha256sum} 0 64 leafhash)
enable_testing()
set(extensions_all "exe" "ex_" "msi" "256appx" "512appx" "cat")
set(extensions_nocat "exe" "ex_" "msi" "256appx" "512appx")
set(extensions_nocatappx "exe" "ex_" "msi")
set(extensions_all "exe" "ex_" "msi" "256appx" "512appx" "cat" "ps1" "psc1" "mof")
set(extensions_nocat "exe" "ex_" "msi" "256appx" "512appx" "ps1" "psc1" "mof")
set(extensions_nocatappx "exe" "ex_" "msi" "ps1" "psc1" "mof")
set(formats "pem" "der")
# Test 1

View File

@ -595,6 +595,9 @@ static int spc_indirect_data_content_create(u_char **blob, int *len, FILE_FORMAT
void *hash;
SpcIndirectDataContent *idc = SpcIndirectDataContent_new();
if (!ctx->format->data_blob_get || !ctx->format->hash_length_get) {
return 0; /* FAILED */
}
if (ctx->format->md_get) {
/* APPX file specific - use a hash algorithm specified in the AppxBlockMap.xml file */
mdtype = EVP_MD_nid(ctx->format->md_get(ctx));

38
msi.c
View File

@ -100,30 +100,6 @@ static const u_char msi_zeroes[] = {
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
};
typedef struct {
ASN1_INTEGER *a;
ASN1_OCTET_STRING *string;
ASN1_INTEGER *b;
ASN1_INTEGER *c;
ASN1_INTEGER *d;
ASN1_INTEGER *e;
ASN1_INTEGER *f;
} SpcSipInfo;
DECLARE_ASN1_FUNCTIONS(SpcSipInfo)
ASN1_SEQUENCE(SpcSipInfo) = {
ASN1_SIMPLE(SpcSipInfo, a, ASN1_INTEGER),
ASN1_SIMPLE(SpcSipInfo, string, ASN1_OCTET_STRING),
ASN1_SIMPLE(SpcSipInfo, b, ASN1_INTEGER),
ASN1_SIMPLE(SpcSipInfo, c, ASN1_INTEGER),
ASN1_SIMPLE(SpcSipInfo, d, ASN1_INTEGER),
ASN1_SIMPLE(SpcSipInfo, e, ASN1_INTEGER),
ASN1_SIMPLE(SpcSipInfo, f, ASN1_INTEGER),
} ASN1_SEQUENCE_END(SpcSipInfo)
IMPLEMENT_ASN1_FUNCTIONS(SpcSipInfo)
typedef struct {
u_char signature[8]; /* 0xd0, 0xcf, 0x11, 0xe0, 0xa1, 0xb1, 0x1a, 0xe1 */
u_char unused_clsid[16]; /* reserved and unused */
@ -321,6 +297,10 @@ static FILE_FORMAT_CTX *msi_ctx_new(GLOBAL_OPTIONS *options, BIO *hash, BIO *out
/*
* Allocate and return SpcSipInfo object.
* Subject Interface Package (SIP) is an internal Microsoft API for
* transforming arbitrary files into a digestible stream.
* These ClassIDs are found in the indirect data section and identify
* the type of processor needed to validate the signature.
* [out] p: SpcSipInfo data
* [out] plen: SpcSipInfo data length
* [in] ctx: structure holds input and output data (unused)
@ -328,7 +308,7 @@ static FILE_FORMAT_CTX *msi_ctx_new(GLOBAL_OPTIONS *options, BIO *hash, BIO *out
*/
static ASN1_OBJECT *msi_spc_sip_info_get(u_char **p, int *plen, FILE_FORMAT_CTX *ctx)
{
const u_char msistr[] = {
const u_char SpcUUIDSipInfoMsi[] = {
0xf1, 0x10, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00,
0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46
};
@ -344,7 +324,7 @@ static ASN1_OBJECT *msi_spc_sip_info_get(u_char **p, int *plen, FILE_FORMAT_CTX
ASN1_INTEGER_set(si->d, 0);
ASN1_INTEGER_set(si->e, 0);
ASN1_INTEGER_set(si->f, 0);
ASN1_OCTET_STRING_set(si->string, msistr, sizeof msistr);
ASN1_OCTET_STRING_set(si->string, SpcUUIDSipInfoMsi, sizeof SpcUUIDSipInfoMsi);
*plen = i2d_SpcSipInfo(si, NULL);
*p = OPENSSL_malloc((size_t)*plen);
i2d_SpcSipInfo(si, p);
@ -638,9 +618,15 @@ static PKCS7 *msi_pkcs7_extract_to_nest(FILE_FORMAT_CTX *ctx)
*/
static int msi_remove_pkcs7(FILE_FORMAT_CTX *ctx, BIO *hash, BIO *outdata)
{
MSI_ENTRY *ds;
/* squash the unused parameter warning */
(void)hash;
ds = msi_signatures_get(ctx->msi_ctx->dirent, NULL);
if (!ds) {
return 1; /* FAILED, no signature */
}
if (!msi_dirent_delete(ctx->msi_ctx->dirent, digital_signature_ex,
sizeof digital_signature_ex)) {
return 1; /* FAILED */

View File

@ -123,6 +123,18 @@ ASN1_SEQUENCE(SpcSpOpusInfo) = {
IMPLEMENT_ASN1_FUNCTIONS(SpcSpOpusInfo)
ASN1_SEQUENCE(SpcSipInfo) = {
ASN1_SIMPLE(SpcSipInfo, a, ASN1_INTEGER),
ASN1_SIMPLE(SpcSipInfo, string, ASN1_OCTET_STRING),
ASN1_SIMPLE(SpcSipInfo, b, ASN1_INTEGER),
ASN1_SIMPLE(SpcSipInfo, c, ASN1_INTEGER),
ASN1_SIMPLE(SpcSipInfo, d, ASN1_INTEGER),
ASN1_SIMPLE(SpcSipInfo, e, ASN1_INTEGER),
ASN1_SIMPLE(SpcSipInfo, f, ASN1_INTEGER),
} ASN1_SEQUENCE_END(SpcSipInfo)
IMPLEMENT_ASN1_FUNCTIONS(SpcSipInfo)
ASN1_SEQUENCE(SpcAttributeTypeAndOptionalValue) = {
ASN1_SIMPLE(SpcAttributeTypeAndOptionalValue, type, ASN1_OBJECT),
ASN1_OPT(SpcAttributeTypeAndOptionalValue, value, ASN1_ANY)
@ -2844,6 +2856,8 @@ static int check_attached_data(GLOBAL_OPTIONS *options)
tmp_options->infile = options->outfile;
tmp_options->cmd = CMD_VERIFY;
ctx = file_format_script.ctx_new(tmp_options, NULL, NULL);
if (!ctx)
ctx = file_format_msi.ctx_new(tmp_options, NULL, NULL);
if (!ctx)
ctx = file_format_pe.ctx_new(tmp_options, NULL, NULL);
@ -4317,6 +4331,8 @@ int main(int argc, char **argv)
DO_EXIT_1("Failed to create file: %s\n", options.outfile);
}
}
ctx = file_format_script.ctx_new(&options, hash, outdata);
if (!ctx)
ctx = file_format_msi.ctx_new(&options, hash, outdata);
if (!ctx)
ctx = file_format_pe.ctx_new(&options, hash, outdata);
@ -4366,6 +4382,9 @@ int main(int argc, char **argv)
DO_EXIT_0("Unsupported command: remove-signature\n");
}
ret = ctx->format->remove_pkcs7(ctx, hash, outdata);
if (ret) {
DO_EXIT_0("Unable to remove existing signature\n");
}
if (ctx->format->update_data_size) {
ctx->format->update_data_size(ctx, outdata, NULL);
}

View File

@ -82,6 +82,10 @@
#define PROVIDE_ASKPASS 1
#endif
#ifdef _MSC_VER
/* not WIN32, because strcasecmp exists in MinGW */
#define strcasecmp _stricmp
#endif
#ifdef WIN32
#define remove_file(filename) _unlink(filename)
@ -332,6 +336,18 @@ typedef struct {
DECLARE_ASN1_FUNCTIONS(SpcSpOpusInfo)
typedef struct {
ASN1_INTEGER *a;
ASN1_OCTET_STRING *string;
ASN1_INTEGER *b;
ASN1_INTEGER *c;
ASN1_INTEGER *d;
ASN1_INTEGER *e;
ASN1_INTEGER *f;
} SpcSipInfo;
DECLARE_ASN1_FUNCTIONS(SpcSipInfo)
typedef struct {
ASN1_OBJECT *type;
ASN1_TYPE *value;
@ -468,6 +484,8 @@ typedef struct {
DECLARE_ASN1_FUNCTIONS(MsCtlContent)
typedef struct file_format_st FILE_FORMAT;
typedef struct script_ctx_st SCRIPT_CTX;
typedef struct msi_ctx_st MSI_CTX;
typedef struct pe_ctx_st PE_CTX;
typedef struct cab_ctx_st CAB_CTX;
@ -478,6 +496,7 @@ typedef struct {
FILE_FORMAT *format;
GLOBAL_OPTIONS *options;
union {
SCRIPT_CTX *script_ctx;
MSI_CTX *msi_ctx;
PE_CTX *pe_ctx;
CAB_CTX *cab_ctx;
@ -486,6 +505,7 @@ typedef struct {
};
} FILE_FORMAT_CTX;
extern FILE_FORMAT file_format_script;
extern FILE_FORMAT file_format_msi;
extern FILE_FORMAT file_format_pe;
extern FILE_FORMAT file_format_cab;

6
pe.c
View File

@ -404,9 +404,9 @@ static PKCS7 *pe_pkcs7_extract_to_nest(FILE_FORMAT_CTX *ctx)
*/
static int pe_remove_pkcs7(FILE_FORMAT_CTX *ctx, BIO *hash, BIO *outdata)
{
if (ctx->pe_ctx->sigpos == 0) {
printf("PE file does not have any signature\n");
return 1; /* FAILED */
if (ctx->pe_ctx->sigpos == 0 || ctx->pe_ctx->siglen == 0
|| ctx->pe_ctx->sigpos > ctx->pe_ctx->fileend) {
return 1; /* FAILED, no signature */
}
/* Strip current signature */
ctx->pe_ctx->fileend = ctx->pe_ctx->sigpos;

865
script.c Normal file
View File

@ -0,0 +1,865 @@
/*
* Script file support library
*
* Copyright (C) 2021-2024 Michał Trojnara <Michal.Trojnara@stunnel.org>
*/
#include "osslsigncode.h"
#include "helpers.h"
#include "utf.h"
typedef enum {comment_hash, comment_xml, comment_c, comment_not_found} comment_style;
typedef struct {
const char *extension;
comment_style comment;
} SCRIPT_FORMAT;
const SCRIPT_FORMAT supported_formats[] = {
{".ps1", comment_hash},
{".ps1xml", comment_xml},
{".psc1", comment_xml},
{".psd1", comment_hash},
{".psm1", comment_hash},
{".cdxml", comment_xml},
{".mof", comment_c},
{NULL, comment_not_found},
};
const char *signature_begin = "SIG # Begin signature block";
const char *signature_end = "SIG # End signature block";
typedef struct {
const char *open;
const char *close;
} SCRIPT_COMMENT;
const SCRIPT_COMMENT comment_text[] = {
[comment_hash] = {"# ", ""},
[comment_xml] = {"<!-- ", " -->"},
[comment_c] = {"/* ", " */"}
};
struct script_ctx_st {
const SCRIPT_COMMENT *comment_text;
int utf;
uint32_t sigpos;
uint32_t fileend;
};
#define LINE_MAX_LEN 100
/* FILE_FORMAT method prototypes */
static FILE_FORMAT_CTX *script_ctx_new(GLOBAL_OPTIONS *options, BIO *hash, BIO *outdata);
static ASN1_OBJECT *script_spc_sip_info_get(u_char **p, int *plen, FILE_FORMAT_CTX *ctx);
static PKCS7 *script_pkcs7_contents_get(FILE_FORMAT_CTX *ctx, BIO *hash, const EVP_MD *md);
static int script_hash_length_get(FILE_FORMAT_CTX *ctx);
static int script_check_file(FILE_FORMAT_CTX *ctx, int detached);
static u_char *script_digest_calc(FILE_FORMAT_CTX *ctx, const EVP_MD *md);
static int script_verify_digests(FILE_FORMAT_CTX *ctx, PKCS7 *p7);
static PKCS7 *script_pkcs7_extract(FILE_FORMAT_CTX *ctx);
static PKCS7 *script_pkcs7_extract_to_nest(FILE_FORMAT_CTX *ctx);
static int script_remove_pkcs7(FILE_FORMAT_CTX *ctx, BIO *hash, BIO *outdata);
static int script_process_data(FILE_FORMAT_CTX *ctx, BIO *hash, BIO *outdata);
static PKCS7 *script_pkcs7_signature_new(FILE_FORMAT_CTX *ctx, BIO *hash);
static int script_append_pkcs7(FILE_FORMAT_CTX *ctx, BIO *outdata, PKCS7 *p7);
static BIO *script_bio_free(BIO *hash, BIO *outdata);
static void script_ctx_cleanup(FILE_FORMAT_CTX *ctx, BIO *hash, BIO *outdata);
FILE_FORMAT file_format_script = {
.ctx_new = script_ctx_new,
.data_blob_get = script_spc_sip_info_get,
.pkcs7_contents_get = script_pkcs7_contents_get,
.hash_length_get = script_hash_length_get,
.check_file = script_check_file,
.digest_calc = script_digest_calc,
.verify_digests = script_verify_digests,
.pkcs7_extract = script_pkcs7_extract,
.pkcs7_extract_to_nest = script_pkcs7_extract_to_nest,
.remove_pkcs7 = script_remove_pkcs7,
.process_data = script_process_data,
.pkcs7_signature_new = script_pkcs7_signature_new,
.append_pkcs7 = script_append_pkcs7,
.bio_free = script_bio_free,
.ctx_cleanup = script_ctx_cleanup,
};
/* helper functions */
static SCRIPT_CTX *script_ctx_get(char *indata, uint32_t filesize, const SCRIPT_COMMENT *comment, int utf);
static void write_commented(FILE_FORMAT_CTX *ctx, BIO *outdata, const char *data, size_t length);
static void write_in_encoding(FILE_FORMAT_CTX *ctx, BIO *outdata, const char *line, size_t length);
static size_t utf8_to_utf16(const char *data, size_t len, uint16_t **out_utf16);
static size_t utf16_to_utf8(const uint16_t *data, size_t len, char **out_utf8);
static BIO *script_digest_calc_bio(FILE_FORMAT_CTX *ctx, const EVP_MD *md);
static int script_digest_convert(BIO *hash, FILE_FORMAT_CTX *ctx, size_t len);
static int script_write_bio(BIO *data, char *indata, size_t len);
/*
* Allocate and return a script file format context.
* [in, out] options: structure holds the input data
* [out] hash: message digest BIO
* [in] outdata: outdata file BIO (unused)
* [returns] pointer to script file format context
*/
static FILE_FORMAT_CTX *script_ctx_new(GLOBAL_OPTIONS *options, BIO *hash, BIO *outdata)
{
FILE_FORMAT_CTX *ctx;
SCRIPT_CTX *script_ctx;
const SCRIPT_FORMAT *fmt;
uint32_t filesize;
const uint8_t utf16_bom[] = {0xff, 0xfe};
size_t name_len;
int utf;
/* squash the unused parameter warning */
(void)outdata;
/* find out whether our format is supported */
name_len = strlen(options->infile);
for (fmt = supported_formats; fmt->comment != comment_not_found; fmt++) {
size_t ext_len = strlen(fmt->extension);
if(name_len > ext_len && !strcasecmp(options->infile + name_len - ext_len, fmt->extension))
break;
}
if (fmt->comment == comment_not_found)
return NULL;
printf("Script file format: %s\n", fmt->extension);
filesize = get_file_size(options->infile);
if (filesize == 0)
return NULL; /* FAILED */
options->indata = map_file(options->infile, filesize);
if (!options->indata) {
return NULL; /* FAILED */
}
utf = memcmp(options->indata, utf16_bom, sizeof utf16_bom) ? 8 : 16;
/* initialize script context */
script_ctx = script_ctx_get(options->indata, filesize, comment_text + fmt->comment, utf);
if (!script_ctx) {
unmap_file(options->indata, filesize);
return NULL; /* FAILED */
}
/* initialize file format context */
ctx = OPENSSL_malloc(sizeof(FILE_FORMAT_CTX));
memset(ctx, 0, sizeof(FILE_FORMAT_CTX));
ctx->format = &file_format_script;
ctx->options = options;
ctx->script_ctx = script_ctx;
if (hash)
BIO_push(hash, BIO_new(BIO_s_null()));
/* FIXME: user interface logic belongs to osslsigncode.c */
if (options->pagehash == 1)
printf("Warning: -ph option is only valid for PE files\n");
if (options->jp >= 0)
printf("Warning: -jp option is only valid for CAB files\n");
return ctx;
}
/*
* Allocate and return SpcSipInfo object.
* Subject Interface Package (SIP) is an internal Microsoft API for
* transforming arbitrary files into a digestible stream.
* These ClassIDs are found in the indirect data section and identify
* the type of processor needed to validate the signature.
* https://github.com/sassoftware/relic/blob/620d0b75ec67c0158a8a9120950abe04327d922f/lib/authenticode/structs.go#L154
* [out] p: SpcSipInfo data
* [out] plen: SpcSipInfo data length
* [in] ctx: structure holds input and output data
* [returns] pointer to ASN1_OBJECT structure corresponding to SPC_SIPINFO_OBJID
*/
static ASN1_OBJECT *script_spc_sip_info_get(u_char **p, int *plen, FILE_FORMAT_CTX *ctx)
{
const u_char SpcUUIDSipInfoPs[] = {
0x1f, 0xcc, 0x3b, 0x60, 0x59, 0x4b, 0x08, 0x4e,
0xb7, 0x24, 0xd2, 0xc6, 0x29, 0x7e, 0xf3, 0x51
};
ASN1_OBJECT *dtype;
SpcSipInfo *si = SpcSipInfo_new();
/* squash the unused parameter warning */
(void)ctx;
ASN1_INTEGER_set(si->a, 65536);
ASN1_INTEGER_set(si->b, 0);
ASN1_INTEGER_set(si->c, 0);
ASN1_INTEGER_set(si->d, 0);
ASN1_INTEGER_set(si->e, 0);
ASN1_INTEGER_set(si->f, 0);
ASN1_OCTET_STRING_set(si->string, SpcUUIDSipInfoPs, sizeof SpcUUIDSipInfoPs);
*plen = i2d_SpcSipInfo(si, NULL);
*p = OPENSSL_malloc((size_t)*plen);
i2d_SpcSipInfo(si, p);
*p -= *plen;
dtype = OBJ_txt2obj(SPC_SIPINFO_OBJID, 1);
SpcSipInfo_free(si);
return dtype; /* OK */
}
/*
* Allocate and return a data content to be signed.
* [in] ctx: structure holds input and output data
* [in] hash: message digest BIO
* [in] md: message digest algorithm
* [returns] data content
*/
static PKCS7 *script_pkcs7_contents_get(FILE_FORMAT_CTX *ctx, BIO *hash, const EVP_MD *md)
{
ASN1_OCTET_STRING *content;
BIO *bhash;
/* squash the unused parameter warning */
(void)hash;
bhash = script_digest_calc_bio(ctx, md);
if (!bhash) {
return NULL; /* FAILED */
}
content = spc_indirect_data_content_get(bhash, ctx);
BIO_free_all(bhash);
return pkcs7_set_content(content);
}
static int script_hash_length_get(FILE_FORMAT_CTX *ctx)
{
return EVP_MD_size(ctx->options->md);
}
/*
* Check if the signature exists.
* FIXME: check it in pkcs7_extract()
* [in, out] ctx: structure holds input and output data
* [in] detached: embedded/detached PKCS#7 signature switch
* [returns] 0 on error or 1 on success
*/
static int script_check_file(FILE_FORMAT_CTX *ctx, int detached)
{
if (!ctx) {
printf("Init error\n\n");
return 0; /* FAILED */
}
if (detached) {
printf("Checking the specified catalog file\n\n");
return 1; /* OK */
}
if (ctx->script_ctx->sigpos == 0
|| ctx->script_ctx->sigpos > ctx->script_ctx->fileend) {
printf("No signature found\n\n");
return 0; /* FAILED */
}
return 1; /* OK */
}
/*
* Compute a simple sha1/sha256 message digest of the MSI file
* for use with a catalog file.
* [in] ctx: structure holds input and output data
* [in] md: message digest algorithm
* [returns] pointer to calculated message digest
*/
static u_char *script_digest_calc(FILE_FORMAT_CTX *ctx, const EVP_MD *md)
{
u_char *mdbuf;
BIO *hash = BIO_new(BIO_f_md());
if (!BIO_set_md(hash, md)) {
printf("Unable to set the message digest of BIO\n");
BIO_free_all(hash);
return NULL; /* FAILED */
}
BIO_push(hash, BIO_new(BIO_s_null()));
if (!script_write_bio(hash, ctx->options->indata, ctx->script_ctx->fileend)) {
BIO_free_all(hash);
return NULL; /* FAILED */
}
mdbuf = OPENSSL_malloc((size_t)EVP_MD_size(md));
BIO_gets(hash, (char*)mdbuf, EVP_MD_size(md));
BIO_free_all(hash);
return mdbuf; /* OK */
}
/*
* Calculate the hash and compare to PKCS#7 signedData.
* [in] ctx: structure holds input and output data
* [in] p7: PKCS#7 signature
* [returns] 0 on error or 1 on success
*/
static int script_verify_digests(FILE_FORMAT_CTX *ctx, PKCS7 *p7)
{
int mdtype = -1;
u_char mdbuf[EVP_MAX_MD_SIZE];
u_char *cmdbuf = NULL;
const EVP_MD *md;
BIO *bhash;
/* FIXME: this shared code most likely belongs in osslsigncode.c */
if (is_content_type(p7, SPC_INDIRECT_DATA_OBJID)) {
ASN1_STRING *content_val = p7->d.sign->contents->d.other->value.sequence;
const u_char *p = content_val->data;
SpcIndirectDataContent *idc = d2i_SpcIndirectDataContent(NULL, &p, content_val->length);
if (idc) {
if (idc->messageDigest && idc->messageDigest->digest && idc->messageDigest->digestAlgorithm) {
mdtype = OBJ_obj2nid(idc->messageDigest->digestAlgorithm->algorithm);
memcpy(mdbuf, idc->messageDigest->digest->data, (size_t)idc->messageDigest->digest->length);
}
SpcIndirectDataContent_free(idc);
}
}
if (mdtype == -1) {
printf("Failed to extract current message digest\n\n");
return 0; /* FAILED */
}
md = EVP_get_digestbynid(mdtype);
bhash = script_digest_calc_bio(ctx, md);
if (!bhash)
return 0; /* FAILED */
cmdbuf = OPENSSL_malloc((size_t)EVP_MD_size(md));
BIO_gets(bhash, (char*)cmdbuf, EVP_MD_size(md));
BIO_free_all(bhash);
if (!compare_digests(mdbuf, cmdbuf, mdtype)) {
printf("Signature verification: failed\n\n");
OPENSSL_free(cmdbuf);
return 0; /* FAILED */
}
OPENSSL_free(cmdbuf);
return 1; /* OK */
}
/*
* Extract existing signature in DER format.
* [in] ctx: structure holds input and output data
* [returns] pointer to PKCS#7 structure
*/
static PKCS7 *script_pkcs7_extract(FILE_FORMAT_CTX *ctx)
{
const char *signature_data = ctx->options->indata + ctx->script_ctx->sigpos;
size_t signature_len = ctx->script_ctx->fileend - ctx->script_ctx->sigpos;
size_t base64_len, der_max_length, der_length;
char *ptr;
BIO *bio_mem, *bio_b64 = NULL;
char *base64_data = NULL;
char *der_data = NULL;
const char *der_tmp;
char *clean_base64 = NULL;
int clean_base64_len = 0;
const char *open_tag = ctx->script_ctx->comment_text->open;
const char *close_tag = ctx->script_ctx->comment_text->close;
size_t open_tag_len = strlen(open_tag);
size_t close_tag_len = strlen(close_tag);
size_t signature_begin_len = strlen(signature_begin);
size_t signature_end_len = strlen(signature_end);
PKCS7 *retval = NULL;
/* extract Base64 signature */
if (ctx->script_ctx->utf == 8) {
base64_len = signature_len;
base64_data = OPENSSL_malloc(base64_len);
memcpy(base64_data, signature_data, base64_len);
} else {
base64_len = utf16_to_utf8((const void *)signature_data,
signature_len, &base64_data);
}
/* allocate memory for cleaned Base64 */
clean_base64 = OPENSSL_malloc(base64_len);
if (!clean_base64) {
printf("Malloc failed\n");
goto cleanup;
}
/* copy clean Base64 data */
for (ptr = base64_data;;) {
/* find the opening tag */
for(;;) {
if (ptr + open_tag_len >= base64_data + base64_len) {
printf("Signature line too long\n");
goto cleanup;
}
if (!memcmp(ptr, open_tag, (size_t)open_tag_len)) {
ptr += open_tag_len;
break;
}
ptr++;
}
/* process signature_begin and signature_end */
if (ptr + signature_begin_len < base64_data + base64_len &&
!memcmp(ptr, signature_begin, signature_begin_len))
ptr += signature_begin_len;
if (ptr + signature_end_len <= base64_data + base64_len &&
!memcmp(ptr, signature_end, signature_end_len))
break; /* success */
/* copy until the closing tag */
for(;;) {
if (ptr + close_tag_len >= base64_data + base64_len) {
printf("Signature line too long\n");
goto cleanup;
}
if (close_tag_len) {
if (!memcmp(ptr, close_tag, (size_t)close_tag_len)) {
ptr += close_tag_len;
break;
}
}
if (*ptr == '\r') {
ptr++;
} else if (*ptr == '\n') {
ptr++;
break;
} else {
clean_base64[clean_base64_len++] = *ptr++;
}
}
}
/* prepare for Base64 decoding */
bio_mem = BIO_new_mem_buf(clean_base64, clean_base64_len);
bio_b64 = BIO_new(BIO_f_base64());
BIO_push(bio_b64, bio_mem);
BIO_set_flags(bio_b64, BIO_FLAGS_BASE64_NO_NL);
/* allocate memory for DER output */
der_max_length = BIO_ctrl_pending(bio_b64);
der_data = OPENSSL_malloc(der_max_length);
if (!der_data)
goto cleanup;
/* decode Base64 to DER */
if (!BIO_read_ex(bio_b64, der_data, der_max_length, &der_length))
goto cleanup;
if (der_length <= 0)
goto cleanup;
/* decode DER */
der_tmp = der_data;
retval = d2i_PKCS7(NULL, (const unsigned char **)&der_tmp, (int)der_length);
cleanup:
OPENSSL_free(base64_data);
OPENSSL_free(clean_base64);
OPENSSL_free(der_data);
BIO_free_all(bio_b64);
return retval;
}
/*
* Extract existing signature in DER format.
* [in] ctx: structure holds input and output data
* [returns] pointer to PKCS#7 structure
*/
static PKCS7 *script_pkcs7_extract_to_nest(FILE_FORMAT_CTX *ctx)
{
return script_pkcs7_extract(ctx);
}
/*
* Remove existing signature.
* [in, out] ctx: structure holds input and output data
* [out] hash: message digest BIO
* [out] outdata: outdata file BIO
* [returns] 1 on error or 0 on success
*/
static int script_remove_pkcs7(FILE_FORMAT_CTX *ctx, BIO *hash, BIO *outdata)
{
/* squash the unused parameter warning */
(void)hash;
if (ctx->script_ctx->sigpos == 0
|| ctx->script_ctx->sigpos > ctx->script_ctx->fileend) {
return 1; /* FAILED, no signature */
}
if (!script_write_bio(outdata, ctx->options->indata, ctx->script_ctx->sigpos)) {
return 1; /* FAILED */
}
return 0; /* OK */
}
/*
* Initialize outdata file and calculate a hash (message digest) of data.
* [in, out] ctx: structure holds input and output data
* [out] hash: message digest BIO
* [out] outdata: outdata file BIO
* [returns] 1 on error or 0 on success
*/
static int script_process_data(FILE_FORMAT_CTX *ctx, BIO *hash, BIO *outdata)
{
if (ctx->script_ctx->sigpos > 0) {
/* Strip current signature */
ctx->script_ctx->fileend = ctx->script_ctx->sigpos;
}
if (!script_write_bio(outdata, ctx->options->indata, ctx->script_ctx->fileend))
return 1; /* FAILED */
if (!script_digest_convert(hash, ctx, ctx->script_ctx->fileend))
return 1; /* FAILED */
return 0; /* OK */
}
/*
* Create a new PKCS#7 signature.
* [in, out] ctx: structure holds input and output data
* [out] hash: message digest BIO
* [returns] pointer to PKCS#7 structure
*/
static PKCS7 *script_pkcs7_signature_new(FILE_FORMAT_CTX *ctx, BIO *hash)
{
ASN1_OCTET_STRING *content;
PKCS7 *p7 = pkcs7_create(ctx);
if (!p7) {
printf("Creating a new signature failed\n");
return NULL; /* FAILED */
}
if (!add_indirect_data_object(p7)) {
printf("Adding SPC_INDIRECT_DATA_OBJID failed\n");
PKCS7_free(p7);
return NULL; /* FAILED */
}
content = spc_indirect_data_content_get(hash, ctx);
if (!content) {
printf("Failed to get spcIndirectDataContent\n");
return NULL; /* FAILED */
}
if (!sign_spc_indirect_data_content(p7, content)) {
printf("Failed to set signed content\n");
PKCS7_free(p7);
ASN1_OCTET_STRING_free(content);
return NULL; /* FAILED */
}
ASN1_OCTET_STRING_free(content);
return p7;
}
/*
* Append signature to the outfile.
* [in, out] ctx: structure holds input and output data
* [out] outdata: outdata file BIO
* [in] p7: PKCS#7 signature
* [returns] 1 on error or 0 on success
*/
static int script_append_pkcs7(FILE_FORMAT_CTX *ctx, BIO *outdata, PKCS7 *p7)
{
BIO *bio, *b64;
BUF_MEM *buffer;
size_t i;
static const char crlf[] = {0x0d, 0x0a};
/* convert to BASE64 */
b64 = BIO_new(BIO_f_base64()); /* BIO for base64 encoding */
if (!b64)
return 1; /* FAILED */
BIO_set_flags(b64, BIO_FLAGS_BASE64_NO_NL);
bio = BIO_new(BIO_s_mem()); /* BIO to hold the base64 data */
if (!bio) {
BIO_free(b64);
return 1; /* FAILED */
}
bio = BIO_push(b64, bio); /* chain base64 BIO onto memory BIO */
if (!i2d_PKCS7_bio(bio, p7)) {
BIO_free_all(bio);
return 1; /* FAILED */
}
(void)BIO_flush(bio);
BIO_get_mem_ptr(bio, &buffer);
(void)BIO_set_close(bio, BIO_NOCLOSE);
/* split to individual lines and write to outdata */
write_commented(ctx, outdata, signature_begin, strlen(signature_begin));
for (i = 0; i < buffer->length; i += 64) {
write_commented(ctx, outdata, buffer->data + i,
buffer->length - i < 64 ? buffer->length - i : 64);
}
write_commented(ctx, outdata, signature_end, strlen(signature_end));
/* signtool expects CRLF terminator at the end of the text file */
write_in_encoding(ctx, outdata, crlf, sizeof crlf);
BUF_MEM_free(buffer);
BIO_free_all(bio);
return 0; /* OK */
}
/*
* Free up an entire outdata BIO chain.
* [out] hash: message digest BIO
* [out] outdata: outdata file BIO
* [returns] none
*/
static BIO *script_bio_free(BIO *hash, BIO *outdata)
{
BIO_free_all(hash);
BIO_free_all(outdata);
/* FIXME: why doesn't the function return void instead of BIO *? */
return NULL;
}
/*
* Deallocate a FILE_FORMAT_CTX structure and script format specific structures.
* [in, out] ctx: structure holds input and output data
* [out] hash: message digest BIO
* [out] outdata: outdata file BIO
* [returns] none
*/
static void script_ctx_cleanup(FILE_FORMAT_CTX *ctx, BIO *hash, BIO *outdata)
{
if (outdata) {
BIO_free_all(hash);
BIO_free_all(outdata);
}
unmap_file(ctx->options->indata, ctx->script_ctx->fileend);
OPENSSL_free(ctx->script_ctx);
OPENSSL_free(ctx);
}
/*
* Script helper functions
*/
static SCRIPT_CTX *script_ctx_get(char *indata, uint32_t filesize, const SCRIPT_COMMENT *comment, int utf)
{
SCRIPT_CTX *script_ctx;
const char *input_pos, *signature_pos, *ptr;
uint32_t line[LINE_MAX_LEN], sig_start[40], cr, lf;
size_t sig_pos = 0, line_pos = 0, sig_start_pos = 0;
utf8DecodeRune("\r", 1, &cr);
utf8DecodeRune("\n", 1, &lf);
/* compute runes for the beginning of the signature */
for (ptr = comment->open; *ptr; sig_start_pos++)
ptr = utf8DecodeRune(ptr, 1, sig_start + sig_start_pos);
for (ptr = signature_begin; *ptr; sig_start_pos++)
ptr = utf8DecodeRune(ptr, 1, sig_start + sig_start_pos);
for (ptr = comment->close; *ptr; sig_start_pos++)
ptr = utf8DecodeRune(ptr, 1, sig_start + sig_start_pos);
/* find the beginning of the signature */
for (signature_pos = input_pos = indata; input_pos < indata + filesize; ) {
const char *input_prev = input_pos;
input_pos = utf == 8 ?
utf8DecodeRune(input_pos,
(size_t)(indata + filesize - input_pos),
line + line_pos) :
(const char *)utf16DecodeRune((const void *)input_pos,
(size_t)(indata + filesize - input_pos)/2,
line + line_pos);
if (!memcmp(line + line_pos, &lf, sizeof lf)) {
if (line_pos >= sig_start_pos &&
!memcmp(line, sig_start, sig_start_pos * sizeof(uint32_t))) {
sig_pos = (size_t)(signature_pos - indata);
if (!memcmp(line + line_pos - 1, &cr, sizeof cr))
sig_pos -= (size_t)utf / 8;
break; /* SUCCEEDED */
}
line_pos = 0;
signature_pos = input_prev; /* previous line */
} else if (line_pos < LINE_MAX_LEN - 1) {
line_pos++; /* we can ignore lines longer than our buffer */
}
}
printf("Signature position: %ld\n", sig_pos);
script_ctx = OPENSSL_malloc(sizeof(SCRIPT_CTX));
script_ctx->comment_text = comment;
script_ctx->utf = utf;
script_ctx->fileend = filesize;
script_ctx->sigpos = (uint32_t)sig_pos;
return script_ctx; /* OK */
}
/* write a commented line to the bio:
* - prepend with CRLF ("\r\n")
* - add opening/closing comment tags
* - adjust encoding if needed */
static void write_commented(FILE_FORMAT_CTX *ctx, BIO *outdata, const char *data, size_t length)
{
const char *open_tag = ctx->script_ctx->comment_text->open;
const char *close_tag = ctx->script_ctx->comment_text->close;
size_t open_tag_len = strlen(open_tag);
size_t close_tag_len = strlen(close_tag);
char *line;
/* the buffer needs to be long enough for:
* - CRLF ("\r\n")
* - opening tag
* - up to 64 bytes of data
* - closing tag
* - trailing NUL ("\0") */
line = OPENSSL_malloc(2 + open_tag_len + length + close_tag_len + 1);
strcpy(line, "\r\n");
strcat(line, open_tag);
memcpy(line + 2 + open_tag_len, data, length);
line[2 + open_tag_len + length] = '\0';
strcat(line, close_tag);
/* adjust encoding */
write_in_encoding(ctx, outdata, line, strlen(line));
OPENSSL_free(line);
}
/* adjust encoding if needed */
static void write_in_encoding(FILE_FORMAT_CTX *ctx, BIO *outdata, const char *line, size_t length)
{
size_t written;
if (ctx->script_ctx->utf == 8) {
BIO_write_ex(outdata, line, length, &written);
} else {
uint16_t *utf16_data = NULL;
size_t utf16_len = utf8_to_utf16(line, length, &utf16_data);
BIO_write_ex(outdata, utf16_data, utf16_len, &written);
OPENSSL_free(utf16_data);
}
}
/* convert len bytes of UTF-8 to UTF-16
* return the number of output bytes
*/
static size_t utf8_to_utf16(const char *data, size_t len, uint16_t **out_utf16)
{
size_t utf16_len = utf8UTF16Count(data, len);
*out_utf16 = OPENSSL_malloc(utf16_len * sizeof(uint16_t));
if (!*out_utf16)
return 0; /* memory allocation failed */
const char *s = data;
uint16_t *d = *out_utf16;
uint32_t rune;
size_t remaining_len = len;
while (remaining_len > 0) {
s = utf8DecodeRune(s, remaining_len, &rune);
if (!s || s < data)
break; /* invalid UTF-8 sequence */
size_t consumed = (size_t)(s - data);
remaining_len -= consumed;
data = s;
d += utf16EncodeRune(rune, d);
}
return (size_t)(2 * (d - *out_utf16));
}
/* convert len bytes of UTF-16 to UTF-8
* return the number of output bytes
*/
static size_t utf16_to_utf8(const uint16_t *data, size_t len, char **out_utf8)
{
size_t utf8_len = utf16UTF8Count(data, len/2);
*out_utf8 = OPENSSL_malloc(utf8_len);
if (!*out_utf8)
return 0; /* memory allocation failed */
const uint16_t *s = data;
char *d = *out_utf8;
uint32_t rune;
size_t remaining_len = len/2;
while (remaining_len > 0) {
s = utf16DecodeRune(s, remaining_len, &rune);
if (!s || s < data)
break; /* invalid UTF-16 sequence */
size_t consumed = (size_t)(s - data);
remaining_len -= consumed;
data = s;
d += utf8EncodeRune(rune, d);
}
return (size_t)(d - *out_utf8);
}
/*
* Compute a message digest value of a signed or unsigned script file.
* [in] ctx: structure holds input and output data
* [in] md: message digest algorithm
* [returns] calculated message digest BIO
*/
static BIO *script_digest_calc_bio(FILE_FORMAT_CTX *ctx, const EVP_MD *md)
{
size_t fileend;
BIO *hash = BIO_new(BIO_f_md());
if (ctx->script_ctx->sigpos)
fileend = ctx->script_ctx->sigpos;
else
fileend = ctx->script_ctx->fileend;
if (!BIO_set_md(hash, md)) {
printf("Unable to set the message digest of BIO\n");
BIO_free_all(hash);
return NULL; /* FAILED */
}
BIO_push(hash, BIO_new(BIO_s_null()));
if (!script_digest_convert(hash, ctx, fileend)) {
printf("Unable calc a message digest value\n");
BIO_free_all(hash);
return NULL; /* FAILED */
}
return hash;
}
/*
* Compute a message digest value
* [in, out] hash: message digest BIO
* [in] ctx: structure holds input and output data
* [in] len: mapped file length
* [returns] 0 on error or 1 on success
*/
static int script_digest_convert(BIO *hash, FILE_FORMAT_CTX *ctx, size_t len)
{
if (ctx->script_ctx->utf == 8) { /* need to convert to UTF-16 */
uint16_t *utf16_data = NULL;
size_t utf16_len = utf8_to_utf16(ctx->options->indata,
len, &utf16_data);
if (!script_write_bio(hash, (char *)utf16_data, utf16_len)) {
OPENSSL_free(utf16_data);
return 0; /* FAILED */
}
OPENSSL_free(utf16_data);
} else { /* already UTF-16 -> no need to convert */
if (!script_write_bio(hash, ctx->options->indata, len)) {
return 0; /* FAILED */
}
}
return 1; /* OK */
}
/*
* Write len bytes from data to BIO
* [in, out] bio: message digest or outdata BIO
* [in] indata: mapped file
* [in] len: indata length
* [returns] 0 on error or 1 on success
*/
static int script_write_bio(BIO *bio, char *indata, size_t len)
{
size_t i = 0, written;
while (len > 0) {
if (!BIO_write_ex(bio, indata + i, len, &written))
return 0; /* FAILED */
len -= written;
i += written;
}
return 1; /* OK */
}
/*
Local Variables:
c-basic-offset: 4
tab-width: 4
indent-tabs-mode: nil
End:
vim: set ts=4 expandtab:
*/

0
tests/files/unsigned.256appx Executable file → Normal file
View File

0
tests/files/unsigned.512appx Executable file → Normal file
View File

Binary file not shown.

BIN
tests/files/unsigned.mof Normal file

Binary file not shown.

2
tests/files/unsigned.ps1 Normal file
View File

@ -0,0 +1,2 @@
cls
Write-Host "żółć"

View File

@ -0,0 +1,5 @@
<?xml version="1.0" encoding="utf-8"?>
<PSConsoleFile ConsoleSchemaVersion="1.0">
<PSVersion>5.1.19041.3930</PSVersion>
<PSSnapIns />
</PSConsoleFile>

View File

@ -46,3 +46,12 @@ CATATTR1=0x11010001:OSAttr:2:6.0
<HASH>CABfile=..\files\unsigned.ex_
<HASH>CABfileATTR1=0x11010001:File:unsigned.ex_
<HASH>PS1file=..\files\unsigned.ps1
<HASH>PS1fileATTR1=0x11010001:File:unsigned.ps1
<HASH>PSC1file=..\files\unsigned.psc1
<HASH>PSC1fileATTR1=0x11010001:File:unsigned.psc1
<HASH>MOFfile=..\files\unsigned.mof
<HASH>MOFfileATTR1=0x11010001:File:unsigned.mof

347
utf.c Normal file
View File

@ -0,0 +1,347 @@
// utf by pietro gagliardi (andlabs) — https://github.com/andlabs/utf/
// 10 november 2016
#include "utf.h"
// this code imitates Go's unicode/utf8 and unicode/utf16
// the biggest difference is that a rune is unsigned instead of signed (because Go guarantees what a right shift on a signed number will do, whereas C does not)
// it is also an imitation so we can license it under looser terms than the Go source
#define badrune 0xFFFD
// encoded must be at most 4 bytes
// TODO clean this code up somehow
size_t utf8EncodeRune(uint32_t rune, char *encoded)
{
uint8_t b, c, d, e;
size_t n;
// not in the valid range for Unicode
if (rune > 0x10FFFF)
rune = badrune;
// surrogate runes cannot be encoded
if (rune >= 0xD800 && rune < 0xE000)
rune = badrune;
if (rune < 0x80) { // ASCII bytes represent themselves
b = (uint8_t) (rune & 0xFF);
n = 1;
goto done;
}
if (rune < 0x800) { // two-byte encoding
c = (uint8_t) (rune & 0x3F);
c |= 0x80;
rune >>= 6;
b = (uint8_t) (rune & 0x1F);
b |= 0xC0;
n = 2;
goto done;
}
if (rune < 0x10000) { // three-byte encoding
d = (uint8_t) (rune & 0x3F);
d |= 0x80;
rune >>= 6;
c = (uint8_t) (rune & 0x3F);
c |= 0x80;
rune >>= 6;
b = (uint8_t) (rune & 0x0F);
b |= 0xE0;
n = 3;
goto done;
}
// otherwise use a four-byte encoding
e = (uint8_t) (rune & 0x3F);
e |= 0x80;
rune >>= 6;
d = (uint8_t) (rune & 0x3F);
d |= 0x80;
rune >>= 6;
c = (uint8_t) (rune & 0x3F);
c |= 0x80;
rune >>= 6;
b = (uint8_t) (rune & 0x07);
b |= 0xF0;
n = 4;
done:
encoded[0] = (char)b;
if (n > 1)
encoded[1] = (char)c;
if (n > 2)
encoded[2] = (char)d;
if (n > 3)
encoded[3] = (char)e;
return n;
}
const char *utf8DecodeRune(const char *s, size_t nElem, uint32_t *rune)
{
uint8_t b, c;
uint8_t lowestAllowed, highestAllowed;
size_t i, expected;
int bad;
b = (uint8_t) (*s);
if (b < 0x80) { // ASCII bytes represent themselves
*rune = b;
s++;
return s;
}
// 0xC0 and 0xC1 cover 2-byte overlong equivalents
// 0xF5 to 0xFD cover values > 0x10FFFF
// 0xFE and 0xFF were never defined (always illegal)
if (b < 0xC2 || b > 0xF4) { // invalid
*rune = badrune;
s++;
return s;
}
// this determines the range of allowed first continuation bytes
lowestAllowed = 0x80;
highestAllowed = 0xBF;
switch (b) {
case 0xE0:
// disallow 3-byte overlong equivalents
lowestAllowed = 0xA0;
break;
case 0xED:
// disallow surrogate characters
highestAllowed = 0x9F;
break;
case 0xF0:
// disallow 4-byte overlong equivalents
lowestAllowed = 0x90;
break;
case 0xF4:
// disallow values > 0x10FFFF
highestAllowed = 0x8F;
break;
}
// and this determines how many continuation bytes are expected
expected = 1;
if (b >= 0xE0)
expected++;
if (b >= 0xF0)
expected++;
if (nElem != 0) { // are there enough bytes?
nElem--;
if (nElem < expected) { // nope
*rune = badrune;
s++;
return s;
}
}
// ensure that everything is correct
// if not, **only** consume the initial byte
bad = 0;
for (i = 0; i < expected; i++) {
c = (uint8_t) (s[1 + i]);
if (c < lowestAllowed || c > highestAllowed) {
bad = 1;
break;
}
// the old lowestAllowed and highestAllowed is only for the first continuation byte
lowestAllowed = 0x80;
highestAllowed = 0xBF;
}
if (bad) {
*rune = badrune;
s++;
return s;
}
// now do the topmost bits
if (b < 0xE0)
*rune = b & 0x1F;
else if (b < 0xF0)
*rune = b & 0x0F;
else
*rune = b & 0x07;
s++; // we can finally move on
// now do the continuation bytes
for (; expected; expected--) {
c = (uint8_t) (*s);
s++;
c &= 0x3F; // strip continuation bits
*rune <<= 6;
*rune |= c;
}
return s;
}
// encoded must have at most 2 elements
size_t utf16EncodeRune(uint32_t rune, uint16_t *encoded)
{
uint16_t low, high;
// not in the valid range for Unicode
if (rune > 0x10FFFF)
rune = badrune;
// surrogate runes cannot be encoded
if (rune >= 0xD800 && rune < 0xE000)
rune = badrune;
if (rune < 0x10000) {
encoded[0] = (uint16_t) rune;
return 1;
}
rune -= 0x10000;
low = (uint16_t) (rune & 0x3FF);
rune >>= 10;
high = (uint16_t) (rune & 0x3FF);
encoded[0] = high | 0xD800;
encoded[1] = low | 0xDC00;
return 2;
}
// TODO see if this can be cleaned up somehow
const uint16_t *utf16DecodeRune(const uint16_t *s, size_t nElem, uint32_t *rune)
{
uint16_t high, low;
if (*s < 0xD800 || *s >= 0xE000) {
// self-representing character
*rune = *s;
s++;
return s;
}
if (*s >= 0xDC00) {
// out-of-order surrogates
*rune = badrune;
s++;
return s;
}
if (nElem == 1) { // not enough elements
*rune = badrune;
s++;
return s;
}
high = *s;
high &= 0x3FF;
if (s[1] < 0xDC00 || s[1] >= 0xE000) {
// bad surrogate pair
*rune = badrune;
s++;
return s;
}
s++;
low = *s;
s++;
low &= 0x3FF;
*rune = high;
*rune <<= 10;
*rune |= low;
*rune += 0x10000;
return s;
}
// TODO find a way to reduce the code in all of these somehow
// TODO find a way to remove u as well
size_t utf8RuneCount(const char *s, size_t nElem)
{
size_t len;
uint32_t rune;
if (nElem != 0) {
const char *t, *u;
len = 0;
t = s;
while (nElem != 0) {
u = utf8DecodeRune(t, nElem, &rune);
len++;
nElem -= (size_t)(u - t);
t = u;
}
return len;
}
len = 0;
while (*s) {
s = utf8DecodeRune(s, nElem, &rune);
len++;
}
return len;
}
size_t utf8UTF16Count(const char *s, size_t nElem)
{
size_t len;
uint32_t rune;
uint16_t encoded[2];
if (nElem != 0) {
const char *t, *u;
len = 0;
t = s;
while (nElem != 0) {
u = utf8DecodeRune(t, nElem, &rune);
len += utf16EncodeRune(rune, encoded);
nElem -= (size_t)(u - t);
t = u;
}
return len;
}
len = 0;
while (*s) {
s = utf8DecodeRune(s, nElem, &rune);
len += utf16EncodeRune(rune, encoded);
}
return len;
}
size_t utf16RuneCount(const uint16_t *s, size_t nElem)
{
size_t len;
uint32_t rune;
if (nElem != 0) {
const uint16_t *t, *u;
len = 0;
t = s;
while (nElem != 0) {
u = utf16DecodeRune(t, nElem, &rune);
len++;
nElem -= (size_t)(u - t);
t = u;
}
return len;
}
len = 0;
while (*s) {
s = utf16DecodeRune(s, nElem, &rune);
len++;
}
return len;
}
size_t utf16UTF8Count(const uint16_t *s, size_t nElem)
{
size_t len;
uint32_t rune;
char encoded[4];
if (nElem != 0) {
const uint16_t *t, *u;
len = 0;
t = s;
while (nElem != 0) {
u = utf16DecodeRune(t, nElem, &rune);
len += utf8EncodeRune(rune, encoded);
nElem -= (size_t)(u - t);
t = u;
}
return len;
}
len = 0;
while (*s) {
s = utf16DecodeRune(s, nElem, &rune);
len += utf8EncodeRune(rune, encoded);
}
return len;
}

61
utf.h Normal file
View File

@ -0,0 +1,61 @@
// utf by pietro gagliardi (andlabs) — https://github.com/andlabs/utf/
// 10 november 2016
#ifdef __cplusplus
extern "C" {
#endif
#include <stddef.h>
#include <stdint.h>
// if nElem == 0, assume the buffer has no upper limit and is '\0' terminated
// otherwise, assume buffer is NOT '\0' terminated but is bounded by nElem *elements*
extern size_t utf8EncodeRune(uint32_t rune, char *encoded);
extern const char *utf8DecodeRune(const char *s, size_t nElem, uint32_t *rune);
extern size_t utf16EncodeRune(uint32_t rune, uint16_t *encoded);
extern const uint16_t *utf16DecodeRune(const uint16_t *s, size_t nElem, uint32_t *rune);
extern size_t utf8RuneCount(const char *s, size_t nElem);
extern size_t utf8UTF16Count(const char *s, size_t nElem);
extern size_t utf16RuneCount(const uint16_t *s, size_t nElem);
extern size_t utf16UTF8Count(const uint16_t *s, size_t nElem);
#ifdef __cplusplus
}
// Provide overloads on Windows for using these functions with wchar_t and WCHAR when wchar_t is a keyword in C++ mode (the default).
// Otherwise, you'd need to cast to pass a wchar_t pointer, WCHAR pointer, or equivalent to these functions.
// We use __wchar_t to be independent of the setting; see https://blogs.msdn.microsoft.com/oldnewthing/20161201-00/?p=94836 (ironically posted one day after I initially wrote this code!).
// TODO check this on MinGW-w64
// TODO check this under /Wall
// TODO C-style casts enough? or will that fail in /Wall?
// TODO same for UniChar/unichar on Mac? if both are unsigned then we have nothing to worry about
#if defined(_MSC_VER)
inline size_t utf16EncodeRune(uint32_t rune, __wchar_t *encoded)
{
return utf16EncodeRune(rune, reinterpret_cast<uint16_t *>(encoded));
}
inline const __wchar_t *utf16DecodeRune(const __wchar_t *s, size_t nElem, uint32_t *rune)
{
const uint16_t *ret;
ret = utf16DecodeRune(reinterpret_cast<const uint16_t *>(s), nElem, rune);
return reinterpret_cast<const __wchar_t *>(ret);
}
inline size_t utf16RuneCount(const __wchar_t *s, size_t nElem)
{
return utf16RuneCount(reinterpret_cast<const uint16_t *>(s), nElem);
}
inline size_t utf16UTF8Count(const __wchar_t *s, size_t nElem)
{
return utf16UTF8Count(reinterpret_cast<const uint16_t *>(s), nElem);
}
#endif
#endif