changeset 813:d563f6d72c1e

unicode: make utf8_is_valid_str return the number of codepoints found This can be used as a (hacky) estimate for how many characters will be printed. (It is hacky because it assumes that combining characters do not exist and that all languages have discrete characters.) Signed-off-by: Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
author Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
date Tue, 25 Aug 2020 19:57:25 -0400
parents 59a473863eaa
children be934c5f8a42
files include/jeffpc/unicode.h mapfile-vers unicode.c
diffstat 3 files changed, 9 insertions(+), 5 deletions(-) [+]
line wrap: on
line diff
--- a/include/jeffpc/unicode.h	Sat Jul 18 10:16:00 2020 -0400
+++ b/include/jeffpc/unicode.h	Tue Aug 25 19:57:25 2020 -0400
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018 Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
+ * Copyright (c) 2018-2020 Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
@@ -31,7 +31,8 @@
 extern size_t utf8_to_utf32(const char *in, size_t inlen, uint32_t *out);
 extern ssize_t utf32_to_utf8(uint32_t cp, char *buf, size_t buflen);
 
-extern int utf8_is_valid_str(const char *src, size_t slen);
+/* returns the number of codepoints found, or negated errno */
+extern ssize_t utf8_is_valid_str(const char *src, size_t slen);
 
 static inline bool utf32_is_valid(uint32_t cp)
 {
--- a/mapfile-vers	Sat Jul 18 10:16:00 2020 -0400
+++ b/mapfile-vers	Tue Aug 25 19:57:25 2020 -0400
@@ -295,6 +295,7 @@
 		tree_swap;
 
 		# unicode
+		utf8_is_valid_str;
 		utf8_to_utf32;
 		utf32_to_utf8;
 
--- a/unicode.c	Sat Jul 18 10:16:00 2020 -0400
+++ b/unicode.c	Tue Aug 25 19:57:25 2020 -0400
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018 Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
+ * Copyright (c) 2018-2020 Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
@@ -137,9 +137,10 @@
 	return len;
 }
 
-int utf8_is_valid_str(const char *src, size_t slen)
+ssize_t utf8_is_valid_str(const char *src, size_t slen)
 {
 	size_t i = 0;
+	ssize_t ret = 0;
 
 	while (i < slen) {
 		uint32_t cp;
@@ -150,7 +151,8 @@
 			return -EILSEQ;
 
 		i += cplen;
+		ret++;
 	}
 
-	return 0;
+	return ret;
 }