Go to the source code of this file.
|
#define | isutf(c) (((c)&0xC0)!=0x80) |
|
|
int | switch_u8_toucs (uint32_t *dest, int sz, char *src, int srcsz) |
|
int | switch_u8_toutf8 (char *dest, int sz, uint32_t *src, int srcsz) |
|
int | switch_u8_wc_toutf8 (char *dest, uint32_t ch) |
|
int | switch_u8_offset (char *str, int charnum) |
|
int | switch_u8_charnum (char *s, int offset) |
|
uint32_t | switch_u8_nextchar (char *s, int *i) |
|
void | switch_u8_inc (char *s, int *i) |
|
void | switch_u8_dec (char *s, int *i) |
|
int | switch_u8_seqlen (char *s) |
|
int | switch_u8_read_escape_sequence (char *src, uint32_t *dest) |
|
int | switch_u8_escape_wchar (char *buf, int sz, uint32_t ch) |
|
int | switch_u8_unescape (char *buf, int sz, char *src) |
|
int | switch_u8_escape (char *buf, int sz, char *src, int escape_quotes) |
|
int | octal_digit (char c) |
|
int | hex_digit (char c) |
|
char * | switch_u8_strchr (char *s, uint32_t ch, int *charn) |
|
char * | switch_u8_memchr (char *s, uint32_t ch, size_t sz, int *charn) |
|
int | switch_u8_strlen (char *s) |
|
int | switch_u8_is_locale_utf8 (char *locale) |
|
uint32_t | switch_u8_get_char (char *s, int *i) |
|
#define isutf |
( |
|
c | ) |
(((c)&0xC0)!=0x80) |
int octal_digit |
( |
char |
c | ) |
|
int switch_u8_charnum |
( |
char * |
s, |
|
|
int |
offset |
|
) |
| |
Definition at line 207 of file switch_utf8.c.
References isutf.
209 int charnum = 0, offs=0;
211 while (offs < offset && s[offs]) {
212 (void)(
isutf(s[++offs]) ||
isutf(s[++offs]) ||
213 isutf(s[++offs]) || ++offs);
void switch_u8_dec |
( |
char * |
s, |
|
|
int * |
i |
|
) |
| |
int switch_u8_escape |
( |
char * |
buf, |
|
|
int |
sz, |
|
|
char * |
src, |
|
|
int |
escape_quotes |
|
) |
| |
Definition at line 385 of file switch_utf8.c.
References switch_u8_escape_wchar(), and switch_u8_nextchar().
389 while (src[i] && c < sz) {
390 if (escape_quotes && src[i] ==
'"') {
391 amt = snprintf(
buf, sz - c,
"\\\"");
switch_byte_t switch_byte_t * buf
int switch_u8_escape_wchar(char *buf, int sz, uint32_t ch)
uint32_t switch_u8_nextchar(char *s, int *i)
int switch_u8_escape_wchar |
( |
char * |
buf, |
|
|
int |
sz, |
|
|
uint32_t |
ch |
|
) |
| |
Definition at line 357 of file switch_utf8.c.
Referenced by switch_u8_escape().
360 return snprintf(
buf, sz,
"\\n");
361 else if (ch == L
'\t')
362 return snprintf(
buf, sz,
"\\t");
363 else if (ch == L
'\r')
364 return snprintf(
buf, sz,
"\\r");
365 else if (ch == L
'\b')
366 return snprintf(
buf, sz,
"\\b");
367 else if (ch == L
'\f')
368 return snprintf(
buf, sz,
"\\f");
369 else if (ch == L
'\v')
370 return snprintf(
buf, sz,
"\\v");
371 else if (ch == L
'\a')
372 return snprintf(
buf, sz,
"\\a");
373 else if (ch == L
'\\')
374 return snprintf(
buf, sz,
"\\\\");
375 else if (ch < 32 || ch == 0x7f)
376 return snprintf(
buf, sz,
"\\x%hhX", (
unsigned char)ch);
377 else if (ch > 0xFFFF)
378 return snprintf(
buf, sz,
"\\U%.8X", (uint32_t)ch);
379 else if (ch >= 0x80 && ch <= 0xFFFF)
380 return snprintf(
buf, sz,
"\\u%.4hX", (
unsigned short)ch);
382 return snprintf(
buf, sz,
"%c", (
char)ch);
switch_byte_t switch_byte_t * buf
uint32_t switch_u8_get_char |
( |
char * |
s, |
|
|
int * |
i |
|
) |
| |
void switch_u8_inc |
( |
char * |
s, |
|
|
int * |
i |
|
) |
| |
int switch_u8_is_locale_utf8 |
( |
char * |
locale | ) |
|
Definition at line 447 of file switch_utf8.c.
450 const char* cp = locale;
452 for (; *cp !=
'\0' && *cp !=
'@' && *cp !=
'+' && *cp !=
','; cp++) {
454 const char* encoding = ++cp;
455 for (; *cp !=
'\0' && *cp !=
'@' && *cp !=
'+' && *cp !=
','; cp++)
457 if ((cp-encoding == 5 && !strncmp(encoding,
"UTF-8", 5))
458 || (cp-encoding == 4 && !strncmp(encoding,
"utf8", 4)))
char* switch_u8_memchr |
( |
char * |
s, |
|
|
uint32_t |
ch, |
|
|
size_t |
sz, |
|
|
int * |
charn |
|
) |
| |
uint32_t switch_u8_nextchar |
( |
char * |
s, |
|
|
int * |
i |
|
) |
| |
int switch_u8_offset |
( |
char * |
str, |
|
|
int |
charnum |
|
) |
| |
Definition at line 194 of file switch_utf8.c.
References isutf.
198 while (charnum > 0 && str[offs]) {
199 (void)(
isutf(str[++offs]) ||
isutf(str[++offs]) ||
200 isutf(str[++offs]) || ++offs);
int switch_u8_read_escape_sequence |
( |
char * |
src, |
|
|
uint32_t * |
dest |
|
) |
| |
Definition at line 273 of file switch_utf8.c.
References hex_digit(), and octal_digit().
Referenced by switch_u8_unescape().
276 char digs[9]=
"\0\0\0\0\0\0\0\0";
279 ch = (uint32_t)str[0];
282 else if (str[0] ==
't')
284 else if (str[0] ==
'r')
286 else if (str[0] ==
'b')
288 else if (str[0] ==
'f')
290 else if (str[0] ==
'v')
292 else if (str[0] ==
'a')
297 digs[dno++] = str[i++];
299 ch = strtol(digs, NULL, 8);
301 else if (str[0] ==
'x') {
303 digs[dno++] = str[i++];
306 ch = strtol(digs, NULL, 16);
308 else if (str[0] ==
'u') {
310 digs[dno++] = str[i++];
313 ch = strtol(digs, NULL, 16);
315 else if (str[0] ==
'U') {
317 digs[dno++] = str[i++];
320 ch = strtol(digs, NULL, 16);
int switch_u8_seqlen |
( |
char * |
s | ) |
|
char* switch_u8_strchr |
( |
char * |
s, |
|
|
uint32_t |
ch, |
|
|
int * |
charn |
|
) |
| |
int switch_u8_strlen |
( |
char * |
s | ) |
|
int switch_u8_toucs |
( |
uint32_t * |
dest, |
|
|
int |
sz, |
|
|
char * |
src, |
|
|
int |
srcsz |
|
) |
| |
Definition at line 80 of file switch_utf8.c.
References offsetsFromUTF8, and trailingBytesForUTF8.
83 char *src_end = src + srcsz;
94 if (src + nb >= src_end)
100 case 3: ch += (
unsigned char)*src++; ch <<= 6;
101 case 2: ch += (
unsigned char)*src++; ch <<= 6;
102 case 1: ch += (
unsigned char)*src++; ch <<= 6;
103 case 0: ch += (
unsigned char)*src++;
static const uint32_t offsetsFromUTF8[6]
static const char trailingBytesForUTF8[256]
int switch_u8_toutf8 |
( |
char * |
dest, |
|
|
int |
sz, |
|
|
uint32_t * |
src, |
|
|
int |
srcsz |
|
) |
| |
Definition at line 125 of file switch_utf8.c.
129 char *dest_end = dest + sz;
131 while (srcsz<0 ? src[i]!=0 : i < srcsz) {
134 if (dest >= dest_end)
138 else if (ch < 0x800) {
139 if (dest >= dest_end-1)
141 *dest++ = (ch>>6) | 0xC0;
142 *dest++ = (ch & 0x3F) | 0x80;
144 else if (ch < 0x10000) {
145 if (dest >= dest_end-2)
147 *dest++ = (ch>>12) | 0xE0;
148 *dest++ = ((ch>>6) & 0x3F) | 0x80;
149 *dest++ = (ch & 0x3F) | 0x80;
151 else if (ch < 0x110000) {
152 if (dest >= dest_end-3)
154 *dest++ = (ch>>18) | 0xF0;
155 *dest++ = ((ch>>12) & 0x3F) | 0x80;
156 *dest++ = ((ch>>6) & 0x3F) | 0x80;
157 *dest++ = (ch & 0x3F) | 0x80;
int switch_u8_unescape |
( |
char * |
buf, |
|
|
int |
sz, |
|
|
char * |
src |
|
) |
| |
int switch_u8_wc_toutf8 |
( |
char * |
dest, |
|
|
uint32_t |
ch |
|
) |
| |
Definition at line 166 of file switch_utf8.c.
Referenced by switch_u8_unescape().
173 dest[0] = (ch>>6) | 0xC0;
174 dest[1] = (ch & 0x3F) | 0x80;
178 dest[0] = (ch>>12) | 0xE0;
179 dest[1] = ((ch>>6) & 0x3F) | 0x80;
180 dest[2] = (ch & 0x3F) | 0x80;
184 dest[0] = (ch>>18) | 0xF0;
185 dest[1] = ((ch>>12) & 0x3F) | 0x80;
186 dest[2] = ((ch>>6) & 0x3F) | 0x80;
187 dest[3] = (ch & 0x3F) | 0x80;