Mercurial > libjeffpc
changeset 813:d563f6d72c1e
unicode: make utf8_is_valid_str return the number of codepoints found
This can be used as a (hacky) estimate for how many characters will be
printed. (It is hacky because it assumes that combining characters do not
exist and that all languages have discrete characters.)
Signed-off-by: Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
author | Josef 'Jeff' Sipek <jeffpc@josefsipek.net> |
---|---|
date | Tue, 25 Aug 2020 19:57:25 -0400 |
parents | 59a473863eaa |
children | be934c5f8a42 |
files | include/jeffpc/unicode.h mapfile-vers unicode.c |
diffstat | 3 files changed, 9 insertions(+), 5 deletions(-) [+] |
line wrap: on
line diff
--- a/include/jeffpc/unicode.h Sat Jul 18 10:16:00 2020 -0400 +++ b/include/jeffpc/unicode.h Tue Aug 25 19:57:25 2020 -0400 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 Josef 'Jeff' Sipek <jeffpc@josefsipek.net> + * Copyright (c) 2018-2020 Josef 'Jeff' Sipek <jeffpc@josefsipek.net> * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -31,7 +31,8 @@ extern size_t utf8_to_utf32(const char *in, size_t inlen, uint32_t *out); extern ssize_t utf32_to_utf8(uint32_t cp, char *buf, size_t buflen); -extern int utf8_is_valid_str(const char *src, size_t slen); +/* returns the number of codepoints found, or negated errno */ +extern ssize_t utf8_is_valid_str(const char *src, size_t slen); static inline bool utf32_is_valid(uint32_t cp) {
--- a/mapfile-vers Sat Jul 18 10:16:00 2020 -0400 +++ b/mapfile-vers Tue Aug 25 19:57:25 2020 -0400 @@ -295,6 +295,7 @@ tree_swap; # unicode + utf8_is_valid_str; utf8_to_utf32; utf32_to_utf8;
--- a/unicode.c Sat Jul 18 10:16:00 2020 -0400 +++ b/unicode.c Tue Aug 25 19:57:25 2020 -0400 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 Josef 'Jeff' Sipek <jeffpc@josefsipek.net> + * Copyright (c) 2018-2020 Josef 'Jeff' Sipek <jeffpc@josefsipek.net> * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -137,9 +137,10 @@ return len; } -int utf8_is_valid_str(const char *src, size_t slen) +ssize_t utf8_is_valid_str(const char *src, size_t slen) { size_t i = 0; + ssize_t ret = 0; while (i < slen) { uint32_t cp; @@ -150,7 +151,8 @@ return -EILSEQ; i += cplen; + ret++; } - return 0; + return ret; }