From ddbaff53da5b99563fa371db0b09544e139fdabb Mon Sep 17 00:00:00 2001 From: Heinrich Schuchardt Date: Sat, 27 Feb 2021 14:08:37 +0100 Subject: lib/charset: utf8_get() should return error utf8_get() should return an error if hitting an illegal UTF-8 sequence and not silently convert the input to a question mark. Correct utf_8() and the its unit test. console_read_unicode() now will ignore illegal UTF-8 sequences. Signed-off-by: Heinrich Schuchardt --- lib/charset.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) (limited to 'lib/charset.c') diff --git a/lib/charset.c b/lib/charset.c index 1345c8f9f0..946d5ee23e 100644 --- a/lib/charset.c +++ b/lib/charset.c @@ -32,7 +32,7 @@ static struct capitalization_table capitalization_table[] = * * @read_u8: - stream reader * @src: - string buffer passed to stream reader, optional - * Return: - Unicode code point + * Return: - Unicode code point, or -1 */ static int get_code(u8 (*read_u8)(void *data), void *data) { @@ -78,7 +78,7 @@ static int get_code(u8 (*read_u8)(void *data), void *data) } return ch; error: - return '?'; + return -1; } /** @@ -120,14 +120,21 @@ static u8 read_console(void *data) int console_read_unicode(s32 *code) { - if (!tstc()) { - /* No input available */ - return 1; - } + for (;;) { + s32 c; - /* Read Unicode code */ - *code = get_code(read_console, NULL); - return 0; + if (!tstc()) { + /* No input available */ + return 1; + } + + /* Read Unicode code */ + c = get_code(read_console, NULL); + if (c > 0) { + *code = c; + return 0; + } + } } s32 utf8_get(const char **src) -- cgit v1.2.3