aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--common/meson.build1
-rw-r--r--common/unicode.c101
-rw-r--r--include/unicode.h33
3 files changed, 0 insertions, 135 deletions
diff --git a/common/meson.build b/common/meson.build
index 4ad872d1..3af1f1d5 100644
--- a/common/meson.build
+++ b/common/meson.build
@@ -9,7 +9,6 @@ lib_sway_common = static_library(
9 'list.c', 9 'list.c',
10 'pango.c', 10 'pango.c',
11 'stringop.c', 11 'stringop.c',
12 'unicode.c',
13 'util.c' 12 'util.c'
14 ), 13 ),
15 dependencies: [ 14 dependencies: [
diff --git a/common/unicode.c b/common/unicode.c
deleted file mode 100644
index 5070e083..00000000
--- a/common/unicode.c
+++ /dev/null
@@ -1,101 +0,0 @@
1#include <stdint.h>
2#include <stddef.h>
3#include "unicode.h"
4
5size_t utf8_chsize(uint32_t ch) {
6 if (ch < 0x80) {
7 return 1;
8 } else if (ch < 0x800) {
9 return 2;
10 } else if (ch < 0x10000) {
11 return 3;
12 }
13 return 4;
14}
15
16static const uint8_t masks[] = {
17 0x7F,
18 0x1F,
19 0x0F,
20 0x07,
21 0x03,
22 0x01
23};
24
25uint32_t utf8_decode(const char **char_str) {
26 uint8_t **s = (uint8_t **)char_str;
27
28 uint32_t cp = 0;
29 if (**s < 128) {
30 // shortcut
31 cp = **s;
32 ++*s;
33 return cp;
34 }
35 int size = utf8_size((char *)*s);
36 if (size == -1) {
37 ++*s;
38 return UTF8_INVALID;
39 }
40 uint8_t mask = masks[size - 1];
41 cp = **s & mask;
42 ++*s;
43 while (--size) {
44 cp <<= 6;
45 cp |= **s & 0x3f;
46 ++*s;
47 }
48 return cp;
49}
50
51size_t utf8_encode(char *str, uint32_t ch) {
52 size_t len = 0;
53 uint8_t first;
54
55 if (ch < 0x80) {
56 first = 0;
57 len = 1;
58 } else if (ch < 0x800) {
59 first = 0xc0;
60 len = 2;
61 } else if (ch < 0x10000) {
62 first = 0xe0;
63 len = 3;
64 } else {
65 first = 0xf0;
66 len = 4;
67 }
68
69 for (size_t i = len - 1; i > 0; --i) {
70 str[i] = (ch & 0x3f) | 0x80;
71 ch >>= 6;
72 }
73
74 str[0] = ch | first;
75 return len;
76}
77
78
79static const struct {
80 uint8_t mask;
81 uint8_t result;
82 int octets;
83} sizes[] = {
84 { 0x80, 0x00, 1 },
85 { 0xE0, 0xC0, 2 },
86 { 0xF0, 0xE0, 3 },
87 { 0xF8, 0xF0, 4 },
88 { 0xFC, 0xF8, 5 },
89 { 0xFE, 0xF8, 6 },
90 { 0x80, 0x80, -1 },
91};
92
93int utf8_size(const char *s) {
94 uint8_t c = (uint8_t)*s;
95 for (size_t i = 0; i < sizeof(sizes) / sizeof(*sizes); ++i) {
96 if ((c & sizes[i].mask) == sizes[i].result) {
97 return sizes[i].octets;
98 }
99 }
100 return -1;
101}
diff --git a/include/unicode.h b/include/unicode.h
deleted file mode 100644
index e2ee9588..00000000
--- a/include/unicode.h
+++ /dev/null
@@ -1,33 +0,0 @@
1#ifndef _SWAY_UNICODE_H
2#define _SWAY_UNICODE_H
3#include <stddef.h>
4#include <stdint.h>
5
6// Technically UTF-8 supports up to 6 byte codepoints, but Unicode itself
7// doesn't really bother with more than 4.
8#define UTF8_MAX_SIZE 4
9
10#define UTF8_INVALID 0x80
11
12/**
13 * Grabs the next UTF-8 character and advances the string pointer
14 */
15uint32_t utf8_decode(const char **str);
16
17/**
18 * Encodes a character as UTF-8 and returns the length of that character.
19 */
20size_t utf8_encode(char *str, uint32_t ch);
21
22/**
23 * Returns the size of the next UTF-8 character
24 */
25int utf8_size(const char *str);
26
27/**
28 * Returns the size of a UTF-8 character
29 */
30size_t utf8_chsize(uint32_t ch);
31
32#endif
33