Mercurial > blahgd
changeset 945:fd7503352fd4
post fmt3: simplify UTF8 char handling
Instead of relying on the grammar to reassemble UTF-8 codepoints, do it in
the lexer. Not only should this be more efficient, it will allow us to do
more advanced unicode handling in libjeffpc.
Signed-off-by: Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
author | Josef 'Jeff' Sipek <jeffpc@josefsipek.net> |
---|---|
date | Mon, 26 Mar 2018 22:30:51 -0400 |
parents | 5b0116b502d1 |
children | b2025204a62d |
files | post_fmt3.l post_fmt3.y |
diffstat | 2 files changed, 4 insertions(+), 6 deletions(-) [+] |
line wrap: on
line diff
--- a/post_fmt3.l Mon Mar 26 22:31:44 2018 -0400 +++ b/post_fmt3.l Mon Mar 26 22:30:51 2018 -0400 @@ -106,9 +106,8 @@ '{1,2} { yylval->ptr = STR_DUP(yytext); return CQUOT; } [.,()/=!:;\+?@*#|] { yylval->ptr = STR_DUP(yytext); return CHAR; } ["<>] { yylval->ptr = STR_DUP(yytext); return SCHAR; } -[\xe0-\xef] { yylval->ptr = STR_DUP(yytext); return UTF8FIRST3; } -[\xc0-\xdf] { yylval->ptr = STR_DUP(yytext); return UTF8FIRST2; } -[\x80-\xbf] { yylval->ptr = STR_DUP(yytext); return UTF8REST; } +[\xe0-\xef][\x80-\xbf][\x80-\xbf] { yylval->ptr = STR_DUP(yytext); return UTF8CHAR; } +[\xc0-\xdf][\x80-\xbf] { yylval->ptr = STR_DUP(yytext); return UTF8CHAR; } [A-Za-z0-9]+ { yylval->ptr = STR_DUP(yytext); return WORD; } . { fmt3_error2("post text contains invalid characters", yytext); yyterminate(); } %%
--- a/post_fmt3.y Mon Mar 26 22:31:44 2018 -0400 +++ b/post_fmt3.y Mon Mar 26 22:30:51 2018 -0400 @@ -140,7 +140,7 @@ /* generic tokens */ %token <ptr> WSPACE %token <ptr> DASH OQUOT CQUOT SCHAR CHAR -%token <ptr> UTF8FIRST3 UTF8FIRST2 UTF8REST WORD +%token <ptr> UTF8CHAR WORD %token PERCENT ELLIPSIS %token PAREND @@ -180,8 +180,7 @@ ; thing : WORD { $$ = $1; } - | UTF8FIRST2 UTF8REST { $$ = str_cat(2, $1, $2); } - | UTF8FIRST3 UTF8REST UTF8REST { $$ = str_cat(3, $1, $2, $3); } + | UTF8CHAR { $$ = $1; } | '\n' { $$ = data->texttt_nesting ? STATIC_STR("\n") : STATIC_STR(" "); } | WSPACE { $$ = $1; } | DASH { $$ = dash($1); }