annotate src/lib/unicodemap.pl @ 23007:36e01285b5b8

lib: buffer - Improve header comment for buffer_insert() and buffer_delete().
author Stephan Bosch <stephan.bosch@dovecot.fi>
date Mon, 18 Mar 2019 00:52:37 +0100
parents 4c9420265987
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
6129
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1 #!/usr/bin/env perl
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
2 use strict;
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
3
10811
23858ce6422e unichar: Optimized 8bit character conversions.
Timo Sirainen <tss@iki.fi>
parents: 10270
diff changeset
4 my (%titlecase8, %uni8_decomp);
6129
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
5 my (@titlecase16_keys, @titlecase16_values);
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
6 my (@titlecase32_keys, @titlecase32_values);
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
7 my (@uni16_decomp_keys, @uni16_decomp_values);
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
8 my (@uni32_decomp_keys, @uni32_decomp_values);
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
9 my (@multidecomp_keys, @multidecomp_offsets, @multidecomp_values);
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
10 while (<>) {
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
11 chomp $_;
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
12 my @arr = split(";");
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
13 my $code = eval("0x".$arr[0]);
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
14 my $decomp = $arr[5];
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
15 my $titlecode = $arr[14];
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
16
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
17 if ($titlecode ne "") {
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
18 # titlecase mapping
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
19 my $value = eval("0x$titlecode");
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
20 if ($value == $code) {
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
21 # the same character, ignore
10811
23858ce6422e unichar: Optimized 8bit character conversions.
Timo Sirainen <tss@iki.fi>
parents: 10270
diff changeset
22 } elsif ($code <= 0xff) {
23858ce6422e unichar: Optimized 8bit character conversions.
Timo Sirainen <tss@iki.fi>
parents: 10270
diff changeset
23 die "Error: We've assumed 8bit keys have max. 16bit values" if ($value > 0xffff);
23858ce6422e unichar: Optimized 8bit character conversions.
Timo Sirainen <tss@iki.fi>
parents: 10270
diff changeset
24 $titlecase8{$code} = $value;
23858ce6422e unichar: Optimized 8bit character conversions.
Timo Sirainen <tss@iki.fi>
parents: 10270
diff changeset
25 } elsif ($code <= 0xffff) {
23858ce6422e unichar: Optimized 8bit character conversions.
Timo Sirainen <tss@iki.fi>
parents: 10270
diff changeset
26 die "Error: We've assumed 16bit keys have max. 16bit values" if ($value > 0xffff);
6129
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
27 push @titlecase16_keys, $code;
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
28 push @titlecase16_values, $value;
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
29 } else {
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
30 push @titlecase32_keys, $code;
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
31 push @titlecase32_values, $value;
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
32 }
16355
4c9420265987 liblib: Fix Unicode decomposition
Florian Zeitz <florob@babelmonkeys.de>
parents: 10811
diff changeset
33 } elsif ($decomp =~ /(?:\<[^>]*> )?(.+)/) {
6129
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
34 # decompositions
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
35 my $decomp_codes = $1;
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
36 if ($decomp_codes =~ /^([0-9A-Z]*)$/i) {
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
37 # unicharacter decomposition. use separate lists for this
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
38 my $value = eval("0x$1");
16355
4c9420265987 liblib: Fix Unicode decomposition
Florian Zeitz <florob@babelmonkeys.de>
parents: 10811
diff changeset
39 if ($value > 0xffffffff) {
4c9420265987 liblib: Fix Unicode decomposition
Florian Zeitz <florob@babelmonkeys.de>
parents: 10811
diff changeset
40 print STDERR "Error: We've assumed decomposition codes are max. 32bit\n";
10270
8984dc0e7c72 Make unicodemap.pl give correct return code when it fails to complete, so that make will stop processing.
Jørgen P. Tjernø <jorgen@devsoft.no>
parents: 9962
diff changeset
41 exit 1;
6129
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
42 }
10811
23858ce6422e unichar: Optimized 8bit character conversions.
Timo Sirainen <tss@iki.fi>
parents: 10270
diff changeset
43 if ($code <= 0xff) {
23858ce6422e unichar: Optimized 8bit character conversions.
Timo Sirainen <tss@iki.fi>
parents: 10270
diff changeset
44 $uni8_decomp{$code} = $value;
23858ce6422e unichar: Optimized 8bit character conversions.
Timo Sirainen <tss@iki.fi>
parents: 10270
diff changeset
45 } elsif ($code <= 0xffff) {
6129
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
46 push @uni16_decomp_keys, $code;
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
47 push @uni16_decomp_values, $value;
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
48 } else {
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
49 push @uni32_decomp_keys, $code;
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
50 push @uni32_decomp_values, $value;
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
51 }
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
52 } else {
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
53 # multicharacter decomposition.
9962
c89df0c0e313 New UnicodeMap.txt has >16bit multi-decomposition keys, support them.
Timo Sirainen <tss@iki.fi>
parents: 8434
diff changeset
54 if ($code > 0xffffffff) {
c89df0c0e313 New UnicodeMap.txt has >16bit multi-decomposition keys, support them.
Timo Sirainen <tss@iki.fi>
parents: 8434
diff changeset
55 print STDERR "Error: We've assumed multi-decomposition key codes are max. 32bit\n";
10270
8984dc0e7c72 Make unicodemap.pl give correct return code when it fails to complete, so that make will stop processing.
Jørgen P. Tjernø <jorgen@devsoft.no>
parents: 9962
diff changeset
56 exit 1;
6129
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
57 }
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
58
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
59 push @multidecomp_keys, $code;
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
60 push @multidecomp_offsets, scalar(@multidecomp_values);
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
61
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
62 foreach my $dcode (split(" ", $decomp_codes)) {
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
63 my $value = eval("0x$dcode");
16355
4c9420265987 liblib: Fix Unicode decomposition
Florian Zeitz <florob@babelmonkeys.de>
parents: 10811
diff changeset
64 if ($value > 0xffffffff) {
4c9420265987 liblib: Fix Unicode decomposition
Florian Zeitz <florob@babelmonkeys.de>
parents: 10811
diff changeset
65 print STDERR "Error: We've assumed decomposition codes are max. 32bit\n";
10270
8984dc0e7c72 Make unicodemap.pl give correct return code when it fails to complete, so that make will stop processing.
Jørgen P. Tjernø <jorgen@devsoft.no>
parents: 9962
diff changeset
66 exit 1;
6129
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
67 }
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
68 push @multidecomp_values, $value;
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
69 }
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
70 push @multidecomp_values, 0;
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
71 }
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
72 }
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
73 }
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
74
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
75 sub print_list {
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
76 my @list = @{$_[0]};
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
77
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
78 my $last = $#list;
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
79 my $n = 0;
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
80 foreach my $key (@list) {
16355
4c9420265987 liblib: Fix Unicode decomposition
Florian Zeitz <florob@babelmonkeys.de>
parents: 10811
diff changeset
81 printf("0x%05x", $key);
6129
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
82 last if ($n == $last);
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
83 print ",";
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
84
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
85 $n++;
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
86 if (($n % 8) == 0) {
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
87 print "\n\t";
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
88 } else {
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
89 print " ";
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
90 }
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
91 }
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
92 }
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
93
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
94 print "/* This file is automatically generated by unicodemap.pl from UnicodeData.txt
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
95
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
96 NOTE: decompositions for characters having titlecase characters
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
97 are not included, because we first translate everything to titlecase */\n";
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
98
10811
23858ce6422e unichar: Optimized 8bit character conversions.
Timo Sirainen <tss@iki.fi>
parents: 10270
diff changeset
99 sub print_map8 {
23858ce6422e unichar: Optimized 8bit character conversions.
Timo Sirainen <tss@iki.fi>
parents: 10270
diff changeset
100 my %map = %{$_[0]};
23858ce6422e unichar: Optimized 8bit character conversions.
Timo Sirainen <tss@iki.fi>
parents: 10270
diff changeset
101 my @list;
23858ce6422e unichar: Optimized 8bit character conversions.
Timo Sirainen <tss@iki.fi>
parents: 10270
diff changeset
102 for (my $i = 0; $i <= 0xff; $i++) {
23858ce6422e unichar: Optimized 8bit character conversions.
Timo Sirainen <tss@iki.fi>
parents: 10270
diff changeset
103 if (defined($map{$i})) {
23858ce6422e unichar: Optimized 8bit character conversions.
Timo Sirainen <tss@iki.fi>
parents: 10270
diff changeset
104 push @list, $map{$i};
23858ce6422e unichar: Optimized 8bit character conversions.
Timo Sirainen <tss@iki.fi>
parents: 10270
diff changeset
105 } else {
23858ce6422e unichar: Optimized 8bit character conversions.
Timo Sirainen <tss@iki.fi>
parents: 10270
diff changeset
106 push @list, $i;
23858ce6422e unichar: Optimized 8bit character conversions.
Timo Sirainen <tss@iki.fi>
parents: 10270
diff changeset
107 }
23858ce6422e unichar: Optimized 8bit character conversions.
Timo Sirainen <tss@iki.fi>
parents: 10270
diff changeset
108 }
23858ce6422e unichar: Optimized 8bit character conversions.
Timo Sirainen <tss@iki.fi>
parents: 10270
diff changeset
109 print_list(\@list);
23858ce6422e unichar: Optimized 8bit character conversions.
Timo Sirainen <tss@iki.fi>
parents: 10270
diff changeset
110 }
23858ce6422e unichar: Optimized 8bit character conversions.
Timo Sirainen <tss@iki.fi>
parents: 10270
diff changeset
111
23858ce6422e unichar: Optimized 8bit character conversions.
Timo Sirainen <tss@iki.fi>
parents: 10270
diff changeset
112 print "static const uint16_t titlecase8_map[256] = {\n\t";
23858ce6422e unichar: Optimized 8bit character conversions.
Timo Sirainen <tss@iki.fi>
parents: 10270
diff changeset
113 print_map8(\%titlecase8);
23858ce6422e unichar: Optimized 8bit character conversions.
Timo Sirainen <tss@iki.fi>
parents: 10270
diff changeset
114 print "\n};\n";
23858ce6422e unichar: Optimized 8bit character conversions.
Timo Sirainen <tss@iki.fi>
parents: 10270
diff changeset
115
8434
fa755adddb11 Constify generated unicode tables.
Andrey Panin <pazke@donpac.ru>
parents: 6129
diff changeset
116 print "static const uint16_t titlecase16_keys[] = {\n\t";
6129
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
117 print_list(\@titlecase16_keys);
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
118 print "\n};\n";
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
119
8434
fa755adddb11 Constify generated unicode tables.
Andrey Panin <pazke@donpac.ru>
parents: 6129
diff changeset
120 print "static const uint16_t titlecase16_values[] = {\n\t";
6129
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
121 print_list(\@titlecase16_values);
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
122 print "\n};\n";
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
123
8434
fa755adddb11 Constify generated unicode tables.
Andrey Panin <pazke@donpac.ru>
parents: 6129
diff changeset
124 print "static const uint32_t titlecase32_keys[] = {\n\t";
6129
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
125 print_list(\@titlecase32_keys);
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
126 print "\n};\n";
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
127
8434
fa755adddb11 Constify generated unicode tables.
Andrey Panin <pazke@donpac.ru>
parents: 6129
diff changeset
128 print "static const uint32_t titlecase32_values[] = {\n\t";
6129
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
129 print_list(\@titlecase32_values);
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
130 print "\n};\n";
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
131
10811
23858ce6422e unichar: Optimized 8bit character conversions.
Timo Sirainen <tss@iki.fi>
parents: 10270
diff changeset
132 print "static const uint16_t uni8_decomp_map[256] = {\n\t";
23858ce6422e unichar: Optimized 8bit character conversions.
Timo Sirainen <tss@iki.fi>
parents: 10270
diff changeset
133 print_map8(\%uni8_decomp);
23858ce6422e unichar: Optimized 8bit character conversions.
Timo Sirainen <tss@iki.fi>
parents: 10270
diff changeset
134 print "\n};\n";
23858ce6422e unichar: Optimized 8bit character conversions.
Timo Sirainen <tss@iki.fi>
parents: 10270
diff changeset
135
8434
fa755adddb11 Constify generated unicode tables.
Andrey Panin <pazke@donpac.ru>
parents: 6129
diff changeset
136 print "static const uint16_t uni16_decomp_keys[] = {\n\t";
6129
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
137 print_list(\@uni16_decomp_keys);
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
138 print "\n};\n";
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
139
16355
4c9420265987 liblib: Fix Unicode decomposition
Florian Zeitz <florob@babelmonkeys.de>
parents: 10811
diff changeset
140 print "static const uint32_t uni16_decomp_values[] = {\n\t";
6129
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
141 print_list(\@uni16_decomp_values);
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
142 print "\n};\n";
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
143
8434
fa755adddb11 Constify generated unicode tables.
Andrey Panin <pazke@donpac.ru>
parents: 6129
diff changeset
144 print "static const uint32_t uni32_decomp_keys[] = {\n\t";
6129
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
145 print_list(\@uni32_decomp_keys);
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
146 print "\n};\n";
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
147
16355
4c9420265987 liblib: Fix Unicode decomposition
Florian Zeitz <florob@babelmonkeys.de>
parents: 10811
diff changeset
148 print "static const uint32_t uni32_decomp_values[] = {\n\t";
6129
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
149 print_list(\@uni32_decomp_values);
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
150 print "\n};\n";
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
151
9962
c89df0c0e313 New UnicodeMap.txt has >16bit multi-decomposition keys, support them.
Timo Sirainen <tss@iki.fi>
parents: 8434
diff changeset
152 print "static const uint32_t multidecomp_keys[] = {\n\t";
6129
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
153 print_list(\@multidecomp_keys);
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
154 print "\n};\n";
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
155
8434
fa755adddb11 Constify generated unicode tables.
Andrey Panin <pazke@donpac.ru>
parents: 6129
diff changeset
156 print "static const uint16_t multidecomp_offsets[] = {\n\t";
6129
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
157 print_list(\@multidecomp_offsets);
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
158 print "\n};\n";
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
159
16355
4c9420265987 liblib: Fix Unicode decomposition
Florian Zeitz <florob@babelmonkeys.de>
parents: 10811
diff changeset
160 print "static const uint32_t multidecomp_values[] = {\n\t";
6129
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
161 print_list(\@multidecomp_values);
04b9eb27283c Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
162 print "\n};\n";