aboutsummaryrefslogtreecommitdiff
path: root/arch/all/iconv/src/conv/utf8_to_wchar.c
blob: 22654ef96f8e3783d0ad10a339629aafcfafb9a1 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
/* ****************************************************************************
 * conv/utf8_to_wchar.c -- UTF-8 to `wchar_t`.
 * Copyright (C) 2017 Thomas "Cakeisalie5" Touhey <thomas@touhey.fr>
 *
 * This file is part of the 'all/iconv' module in libcarrot, an experimental
 * modular libc project.
 *
 * This file is governed by the CeCILL-C license under French law and abiding
 * by the rules of distribution of free software. You can use, modify and or
 * redistribute it under the terms of the CeCILL-C license as circulated by
 * CEA, CNRS and INRIA at the following URL: http://www.cecill.info
 *
 * As a counterpart to the access to the source code and rights to copy, modify
 * and redistribute granted by the license, users are provided only with a
 * limited warranty and the software's author, the holder of the economic
 * rights, and the successive licensors have only limited liability.
 *
 * In this respect, the user's attention is drawn to the risks associated with
 * loading, using, modifying and/or developing and reproducing the software by
 * the user in light of its specific status of free software, that may mean
 * that it is complicated to manipulate, and that also therefore means that it
 * is reserved for developers and experienced professionals having in-depth
 * computer knowledge. Users are therefore encouraged to load and test the
 * software's suitability as regards their requirements in conditions enabling
 * the security of their systems and/or data to be ensured and, more generally,
 * to use and operate it in the same conditions as regards security.
 *
 * The fact that you are presently reading this means you have had knowledge of
 * the CeCILL-C license and that you accept its terms.
 * ************************************************************************* */
#include "../iconv.h"
#define set_utf8_cookie(c_count, c_ini) { \
	count = (c_count); \
	val   = (c_ini); }

/**
 *	__iconv_utf8_to_wchar:
 *	Convert one UTF-8 sequence to a wchar_t.
 *
 *	@arg	step		the step data.
 *	@return				the error code (0 if ok).
 */

int __iconv_utf8_to_wchar(struct __iconv_step *step)
{
	int byte = *step->in;
	wchar_t *out; wchar_t val;
	int outleft;

	/* Initialize the output. */
	if (step->next) {
		/* Next step input buffer. */
		step->next->in = (void*)&val;
		step->next->inleft = sizeof(wchar_t);
		outleft = sizeof(wchar_t);
		out = &val;

		/* Next step output buffer. */
		step->next->out     = step->out;
		step->next->outleft = step->outleft;
	} else {
		out = (wchar_t*)step->out;
		outleft = step->outleft;
	}

	/* Check if too big. */
	if (outleft < (int)sizeof(wchar_t))
		return (I_E2BIG);

	/* Check if it's special. */
	if (byte & 0x80) {
		unsigned char *in = (void*)step->in;
		int count;

		if (~byte & 0x40) {
			/* error: not a starting byte! */
			return (I_EILSEQ);
		}

		/* Make the cookie. */
		if (~byte & 0x20)
			set_utf8_cookie(1, byte & 0x1F)
		else if (~byte & 0x10)
			set_utf8_cookie(2, byte & 0x0F)
		else if (~byte & 0x08)
			set_utf8_cookie(3, byte & 0x07)
		else if (~byte & 0x04)
			set_utf8_cookie(4, byte & 0x03)
		else if (~byte & 0x02)
			set_utf8_cookie(5, byte & 0x01)
		else {
			/* error: not a valid starting byte */
			return (I_EILSEQ);
		}

		/* Get the data. */
		if (step->inleft - 1 < count)
			return (I_EINVAL);

		step->in     += count;
		step->inleft -= count;
		while (--count) {
			int byte = *++in;
			if (~byte & 0x80 || byte & 0x40) {
				/* not a valid continuation byte! */
				return (I_EILSEQ);
			}

			val = (val << 6) | (byte & 0x3F);
		}
	} else val = byte;

	/* Put into the final thing. */
	*out = val;

	/* Call next step, end otherwise. */
	if (step->next)
		return ((*step->next_func)(step->next));
	return (0);
}