view src/lib/unicodemap.pl @ 23007:36e01285b5b8

lib: buffer - Improve header comment for buffer_insert() and buffer_delete().
author Stephan Bosch <stephan.bosch@dovecot.fi>
date Mon, 18 Mar 2019 00:52:37 +0100
parents 4c9420265987
children
line wrap: on
line source

#!/usr/bin/env perl
use strict;

my (%titlecase8, %uni8_decomp);
my (@titlecase16_keys, @titlecase16_values);
my (@titlecase32_keys, @titlecase32_values);
my (@uni16_decomp_keys, @uni16_decomp_values);
my (@uni32_decomp_keys, @uni32_decomp_values);
my (@multidecomp_keys, @multidecomp_offsets, @multidecomp_values);
while (<>) {
  chomp $_;
  my @arr = split(";");
  my $code = eval("0x".$arr[0]);
  my $decomp = $arr[5];
  my $titlecode = $arr[14];
  
  if ($titlecode ne "") {
    # titlecase mapping
    my $value = eval("0x$titlecode");
    if ($value == $code) { 
      # the same character, ignore
    } elsif ($code <= 0xff) {
      die "Error: We've assumed 8bit keys have max. 16bit values" if ($value > 0xffff);
      $titlecase8{$code} = $value;
    } elsif ($code <= 0xffff) {
      die "Error: We've assumed 16bit keys have max. 16bit values" if ($value > 0xffff);
      push @titlecase16_keys, $code;
      push @titlecase16_values, $value;
    } else {
      push @titlecase32_keys, $code;
      push @titlecase32_values, $value;
    }
  } elsif ($decomp =~ /(?:\<[^>]*> )?(.+)/) {
    # decompositions
    my $decomp_codes = $1;
    if ($decomp_codes =~ /^([0-9A-Z]*)$/i) {
      # unicharacter decomposition. use separate lists for this
      my $value = eval("0x$1");
      if ($value > 0xffffffff) {
	print STDERR "Error: We've assumed decomposition codes are max. 32bit\n";
	exit 1;
      }
      if ($code <= 0xff) {
        $uni8_decomp{$code} = $value;
      } elsif ($code <= 0xffff) {
	push @uni16_decomp_keys, $code;
	push @uni16_decomp_values, $value;
      } else {
	push @uni32_decomp_keys, $code;
	push @uni32_decomp_values, $value;
      }
    } else {
      # multicharacter decomposition.
      if ($code > 0xffffffff) {
	print STDERR "Error: We've assumed multi-decomposition key codes are max. 32bit\n";
	exit 1;
      }
      
      push @multidecomp_keys, $code;
      push @multidecomp_offsets, scalar(@multidecomp_values);

      foreach my $dcode (split(" ", $decomp_codes)) {
	my $value = eval("0x$dcode");
	if ($value > 0xffffffff) {
	  print STDERR "Error: We've assumed decomposition codes are max. 32bit\n";
	  exit 1;
	}
	push @multidecomp_values, $value;
      }
      push @multidecomp_values, 0;
    }
  }
}

sub print_list {
  my @list = @{$_[0]};
  
  my $last = $#list;
  my $n = 0;
  foreach my $key (@list) {
    printf("0x%05x", $key);
    last if ($n == $last);
    print ",";
    
    $n++;
    if (($n % 8) == 0) {
      print "\n\t";
    } else {
      print " ";
    }
  }
}

print "/* This file is automatically generated by unicodemap.pl from UnicodeData.txt

   NOTE: decompositions for characters having titlecase characters
   are not included, because we first translate everything to titlecase */\n";

sub print_map8 {
  my %map = %{$_[0]};
  my @list;
  for (my $i = 0; $i <= 0xff; $i++) {
    if (defined($map{$i})) {
      push @list, $map{$i};
    } else {
      push @list, $i;
    }
  }
  print_list(\@list);
}

print "static const uint16_t titlecase8_map[256] = {\n\t";
print_map8(\%titlecase8);
print "\n};\n";

print "static const uint16_t titlecase16_keys[] = {\n\t";
print_list(\@titlecase16_keys);
print "\n};\n";

print "static const uint16_t titlecase16_values[] = {\n\t";
print_list(\@titlecase16_values);
print "\n};\n";

print "static const uint32_t titlecase32_keys[] = {\n\t";
print_list(\@titlecase32_keys);
print "\n};\n";

print "static const uint32_t titlecase32_values[] = {\n\t";
print_list(\@titlecase32_values);
print "\n};\n";

print "static const uint16_t uni8_decomp_map[256] = {\n\t";
print_map8(\%uni8_decomp);
print "\n};\n";

print "static const uint16_t uni16_decomp_keys[] = {\n\t";
print_list(\@uni16_decomp_keys);
print "\n};\n";

print "static const uint32_t uni16_decomp_values[] = {\n\t";
print_list(\@uni16_decomp_values);
print "\n};\n";

print "static const uint32_t uni32_decomp_keys[] = {\n\t";
print_list(\@uni32_decomp_keys);
print "\n};\n";

print "static const uint32_t uni32_decomp_values[] = {\n\t";
print_list(\@uni32_decomp_values);
print "\n};\n";

print "static const uint32_t multidecomp_keys[] = {\n\t";
print_list(\@multidecomp_keys);
print "\n};\n";

print "static const uint16_t multidecomp_offsets[] = {\n\t";
print_list(\@multidecomp_offsets);
print "\n};\n";

print "static const uint32_t multidecomp_values[] = {\n\t";
print_list(\@multidecomp_values);
print "\n};\n";