/*- * Copyright (c)2015 Takehiko NOZAKI, * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include #include #include #include #include #include #include "citrus_namespace.h" #include "citrus_bcs.h" #include "citrus_module.h" #include "citrus_hash.h" #include "citrus_iconv.h" #include "citrus_iconv_u8u32.h" _CITRUS_ICONV_DECLS(iconv_u8u32); _CITRUS_ICONV_DEF_OPS(iconv_u8u32); #define _ENDIAN_UNKNOWN 0 #define _ENDIAN_BIG 1 #define _ENDIAN_LITTLE 2 struct bom { int byteorder; char ch[4]; int order[4]; }; struct _citrus_iconv_u8u32_shared { int byteorder; }; struct _citrus_iconv_u8u32_context { int byteorder; }; static const char utf8len[] = { /* 0x00 - 0x0f */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x10 - 0x1f */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x20 - 0x2f */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x30 - 0x3f */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x40 - 0x4f */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x50 - 0x5f */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x60 - 0x6f */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x70 - 0x7f */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x80 - 0x8f */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x90 - 0x9f */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xa0 - 0xaf */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xb0 - 0xbf */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xc0 - 0xcf */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 0xd0 - 0xdf */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 0xe0 - 0xef */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, /* 0xf0 - 0xff */ 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 0, 0, }; static const char utf8mask[] = { 0x00, /* dummy */ 0x7f, 0x1f, 0x0f, 0x07, 0x03, 0x01 }; static const struct bom big = { _ENDIAN_BIG, { 0x00, 0x00, 0xfe, 0xff }, { 3, 2, 1, 0 } }; static const struct bom little = { _ENDIAN_LITTLE, { 0xff, 0xfe, 0x00, 0x00 }, { 0, 1, 2, 3 } }; static const struct bom *bom[] = { #if BYTE_ORDER == BIG_ENDIAN &big, #else &little, #endif &big, &little }; static __inline size_t ucs4len(int32_t ucs4) { if ((ucs4 & ~0x7f) == 0) { return 1; } else if ((ucs4 & ~0x7ff) == 0) { return 2; } else if ((ucs4 & ~0xffff) == 0) { if (ucs4 < 0xd800 || ucs4 > 0xdfff) return 3; } else if ((ucs4 & ~0x1fffff) == 0) { return 4; } else if ((ucs4 & ~0x3ffffff) == 0) { return 5; } else if ((ucs4 & ~0x7fffffff) == 0) { return 6; } return 0; } int _citrus_iconv_u8u32_iconv_getops(struct _citrus_iconv_ops *ops, size_t lenops, uint32_t expected_version) { if (expected_version < _CITRUS_ICONV_ABI_VERSION || lenops < sizeof(*ops)) return EINVAL; memcpy(ops, &_citrus_iconv_u8u32_iconv_ops, sizeof(_citrus_iconv_u8u32_iconv_ops)); return 0; } /* ---------------------------------------------------------------------- */ static int /*ARGSUSED*/ _citrus_iconv_u8u32_iconv_init_shared(struct _citrus_iconv_shared *ci, const char * __restrict curdir, const char * __restrict src, const char * __restrict dst, const void * __restrict var, size_t lenvar) { struct _citrus_iconv_u8u32_shared *shared; const char *p; shared = malloc(sizeof(*shared)); if (shared == NULL) return errno; shared->byteorder = _ENDIAN_UNKNOWN; if (lenvar > 0) { p = (const char *)var; if (!_bcs_strncasecmp(p, "UTF32BE", lenvar)) shared->byteorder = _ENDIAN_BIG; else if (!_bcs_strncasecmp(p, "UTF32LE", lenvar)) shared->byteorder = _ENDIAN_LITTLE; } ci->ci_closure = (void *)shared; return 0; } static void _citrus_iconv_u8u32_iconv_uninit_shared(struct _citrus_iconv_shared *ci) { free(ci->ci_closure); } static int _citrus_iconv_u8u32_iconv_init_context(struct _citrus_iconv *cv) { const struct _citrus_iconv_u8u32_shared *shared; struct _citrus_iconv_u8u32_context *ctx; shared = cv->cv_shared->ci_closure; ctx = malloc(sizeof(*ctx)); if (ctx == NULL) return errno; ctx->byteorder = shared->byteorder; cv->cv_closure = (void *)ctx; return 0; } static void _citrus_iconv_u8u32_iconv_uninit_context(struct _citrus_iconv *cv) { free(cv->cv_closure); } static int _citrus_iconv_u8u32_iconv_convert(struct _citrus_iconv * __restrict cv, const char * __restrict * __restrict inp, size_t * __restrict inbytesp, char * __restrict * __restrict outp, size_t * __restrict outbytesp, uint32_t flags, size_t * __restrict invalids) { const struct _citrus_iconv_u8u32_shared *shared; struct _citrus_iconv_u8u32_context *ctx; const char *in; char *out; size_t inbytes, outbytes, len, i; const struct bom *bp; int32_t ucs4; int ret; shared = cv->cv_shared->ci_closure; ctx = cv->cv_closure; if (inp == NULL || *inp == NULL) { if (outp == NULL || *outp == NULL) ctx->byteorder = shared->byteorder; *invalids = 0; return 0; } in = *inp; inbytes = *inbytesp; out = *outp; outbytes = *outbytesp; switch (ctx->byteorder) { case _ENDIAN_UNKNOWN: bp = bom[ctx->byteorder]; if (outbytes < 4) { ret = E2BIG; goto fatal; } memcpy(out, bp->ch, 4); outbytes -=4, out += 4; ctx->byteorder = bp->byteorder; break; case _ENDIAN_BIG: case _ENDIAN_LITTLE: bp = bom[ctx->byteorder]; break; default: ret = EILSEQ; goto fatal; } for (;;) { if (inbytes == 0) break; if (outbytes < 4) { ret = E2BIG; goto fatal; } len = utf8len[*in & 0xff]; if (len == 0) { ret = EILSEQ; goto fatal; } if (inbytes < len) { ret = EINVAL; goto fatal; } ucs4 = in[0] & utf8mask[len]; for (i = 1; i < len; ++i) { if ((in[i] & 0xc0) != 0x80) { ret = EILSEQ; goto fatal; } ucs4 <<= 6; ucs4 |= in[i] & 0x3f; } if (ucs4len(ucs4) != len) { ret = EILSEQ; goto fatal; } out[bp->order[0]] = ucs4 & 0xff; out[bp->order[1]] = (ucs4 >> 8) & 0xff; out[bp->order[2]] = (ucs4 >> 16) & 0xff; out[bp->order[3]] = (ucs4 >> 24) & 0xff; inbytes -= len, in += len; outbytes -= 4, out += 4; } ret = 0; fatal: *inp = in; *inbytesp = inbytes; *outp = out; *outbytesp = outbytes; *invalids = 0; return ret; }