Mercurial > illumos > illumos-gate
changeset 13399:a1d28d03839f
992 towlower/towupper are broken
Reviewed by: Garrett D'Amore <garrett@nexenta.com>
Approved by: Gordon Ross <gwr@nexenta.com>
author | Yuri Pankov <yuri.pankov@gmail.com> |
---|---|
date | Thu, 12 May 2011 03:21:34 +0400 |
parents | fa0b6e3a91f5 |
children | 71e59c2d8715 |
files | usr/src/cmd/localedef/Makefile usr/src/cmd/localedef/ctype.c usr/src/cmd/localedef/data/ctype.sh |
diffstat | 3 files changed, 61 insertions(+), 6 deletions(-) [+] |
line wrap: on
line diff
--- a/usr/src/cmd/localedef/Makefile Mon Jul 11 16:32:19 2011 -0400 +++ b/usr/src/cmd/localedef/Makefile Thu May 12 03:21:34 2011 +0400 @@ -31,7 +31,7 @@ YFLAGS = -d -b parser CLEANFILES = $(OBJS) parser.tab.c parser.tab.h CLEANFILES += \ - UTF-8.cm \ + UTF-8.cm UTF-8.ct \ 8859-1.cm 8859-2.cm 8859-3.cm 8859-4.cm \ 8859-5.cm 8859-5.cm 8859-6.cm 8859-7.cm \ 8859-8.cm 8859-9.cm 8859-9.cm 8859-10.cm \ @@ -179,6 +179,8 @@ vi_VN \ zh_CN zh_HK zh_MO zh_SG zh_TW +UTF8SRCS = $(UTF_8_LOCALES:%=data/%.UTF-8.src) + LOCNAMES = \ $(ISO8859_1_LOCALES:%=%.ISO8859-1) \ $(ISO8859_2_LOCALES:%=%.ISO8859-2) \ @@ -251,8 +253,12 @@ include ../Makefile.targ -locale/%.UTF-8/stamp: data/%.UTF-8.src UTF-8.cm locale $(PROG) - ./$(PROG) -U -i $< -f UTF-8.cm $(@D) +# Strip LC_CTYPE contents for UTF-8 locales and replace them +# with UTF-8.ct we compiled +locale/%.UTF-8/stamp: data/%.UTF-8.src UTF-8.cm \ + UTF-8.ct locale $(PROG) + $(SED) '/^LC_CTYPE/,/^END LC_CTYPE/d;$$r UTF-8.ct' $< | \ + ./$(PROG) -U -f UTF-8.cm $(@D) $(TOUCH) $@ locale/%.ISO8859-1/stamp: data/%.UTF-8.src 8859-1.cm locale $(PROG) ./$(PROG) -U -i $< -f 8859-1.cm $(@D) @@ -295,6 +301,9 @@ UTF-8.cm: data/UTF-8.cm $(LN) -sf data/UTF-8.cm $@ +UTF-8.ct: $(UTF8SRCS) + $(SH) data/ctype.sh $(UTF8SRCS) > $@ + %.cm: data/%.TXT UTF-8.cm $(RM) $@ $(PERL) data/convert_map.pl $< > $@
--- a/usr/src/cmd/localedef/ctype.c Mon Jul 11 16:32:19 2011 -0400 +++ b/usr/src/cmd/localedef/ctype.c Thu May 12 03:21:34 2011 +0400 @@ -10,7 +10,7 @@ */ /* - * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Copyright 2010,2011 Nexenta Systems, Inc. All rights reserved. */ /* @@ -321,8 +321,8 @@ ct[rl.runetype_ext_nranges - 1].map = ctn->ctype; last_ct = ctn; } - if (ctn->toupper == 0) { - last_up = NULL; + if (ctn->tolower == 0) { + last_lo = NULL; } else if ((last_lo != NULL) && (last_lo->tolower + 1 == ctn->tolower)) { lo[rl.maplower_ext_nranges-1].max = wc;
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/usr/src/cmd/localedef/data/ctype.sh Thu May 12 03:21:34 2011 +0400 @@ -0,0 +1,46 @@ +#! /usr/bin/sh +# +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2011 Nexenta Systems, Inc. All rights reserved. +# + +# Combine LC_CTYPE classes from all .UTF-8.src files to be compiled by localedef +# into one LC_CTYPE/LCL_DATA used by all locales, so we have the same case +# mapping tables, character classes, etc. for all of them. This is not general +# purpose parser but is good enough for the stock files supplied with CLDR. + +printf "\nLC_CTYPE\n" + +for i in upper lower alpha space cntrl graph print punct digit xdigit blank \ + toupper tolower; do + # sed can't match both range patterns on the same line so we just make + # it look like valid multiline class by duplicating the definition + sed -E "/^$i.*>$/ { + s,$,;/, + h + s,^$i(.*>);/$,\1, + H + x + }" $@ |\ + sed -E -n "/^$i/,/(>|\))$/ { + s,^$i,, + s,(>|\))$,\1;/, + /^$/d + p + }" |\ + sort -u |\ + sed -E "1 s,^,$i,;$ s,(>|\));/,\1," +done + +printf "\nEND LC_CTYPE\n"