view usr/src/cmd/localedef/data/convert_map.pl @ 13222:02526851ba75

357 fix license on strptime.c 358 the prototype CDDL header needs minor grammar fixes Reviewed by: trisk@opensolaris.org Reviewed by: gwr@nexenta.com Reviewed by: gber@openindiana.org
author Garrett D'Amore <garrett@nexenta.com>
date Thu, 21 Oct 2010 14:44:37 -0700
parents e35262a09b82
children
line wrap: on
line source

#! /usr/perl5/bin/perl
#
# This file and its contents are supplied under the terms of the
# Common Development and Distribution License ("CDDL"), version 1.0.
# You may only use this file in accordance with the terms of version
# 1.0 of the CDDL.
#
# A full copy of the text of the CDDL should have accompanied this
# source.  A copy is of the CDDL is also available via the Internet
# at http://www.illumos.org/license/CDDL.
#

#
# Copyright 2010 Nexenta Systems, Inc.  All rights reserved.
#

# This converts MAPPING files to localedef character maps
# suitable for use with the UTF-8 derived localedef data.

sub ucs_to_utf8
{
    my $ucs = shift;
    my $utf8;

    if ($ucs <= 0x7f) {
	$utf8 = sprintf("\\x%02X", $ucs).$utf8;
    } elsif ($ucs <= 0x7ff) {
	$utf8 = sprintf("\\x%02X", ($ucs & 0x3f) | 0x80).$utf8;
	$ucs >>= 6;
	$utf8 = sprintf("\\x%02X", $ucs | 0xc0).$utf8;

    } elsif ($ucs <= 0xffff) {
	$utf8 = sprintf("\\x%02X", ($ucs & 0x3f) | 0x80).$utf8;
	$ucs >>= 6;
	$utf8 = sprintf("\\x%02X", ($ucs & 0x3f) | 0x80).$utf8;
	$ucs >>= 6;
	$utf8 = sprintf("\\x%02X", $ucs | 0xe0).$utf8;

    } elsif ($ucs <= 0x1fffff) {
	$utf8 = sprintf("\\x%02X", ($ucs & 0x3f) | 0x80).$utf8;
	$ucs >>= 6;
	$utf8 = sprintf("\\x%02X", ($ucs & 0x3f) | 0x80).$utf8;
	$ucs >>= 6;
	$utf8 = sprintf("\\x%02X", ($ucs & 0x3f) | 0x80).$utf8;
	$ucs >>= 6;
	$utf8 = sprintf("\\x%02X", $ucs | 0xf0).$utf8;

    } elsif ($ucs <= 0x03ffffff) {
	$utf8 = sprintf("\\x%02X", ($ucs & 0x3f) | 0x80).$utf8;
	$ucs >>= 6;
	$utf8 = sprintf("\\x%02X", ($ucs & 0x3f) | 0x80).$utf8;
	$ucs >>= 6;
	$utf8 = sprintf("\\x%02X", ($ucs & 0x3f) | 0x80).$utf8;
	$ucs >>= 6;
	$utf8 = sprintf("\\x%02X", ($ucs & 0x3f) | 0x80).$utf8;
	$ucs >>= 6;
	$utf8 = sprintf("\\x%02X", $ucs | 0xf8).$utf8;

    } else {
	$utf8 = sprintf("\\x%02X", ($ucs & 0x3f) | 0x80).$utf8;
	$ucs >>= 6;
	$utf8 = sprintf("\\x%02X", ($ucs & 0x3f) | 0x80).$utf8;
	$ucs >>= 6;
	$utf8 = sprintf("\\x%02X", ($ucs & 0x3f) | 0x80).$utf8;
	$ucs >>= 6;
	$utf8 = sprintf("\\x%02X", ($ucs & 0x3f) | 0x80).$utf8;
	$ucs >>= 6;
	$utf8 = sprintf("\\x%02X", ($ucs & 0x3f) | 0x80).$utf8;
	$ucs >>= 6;
	$utf8 = sprintf("\\x%02X", $ucs | 0xf8).$utf8;
    }

    return ($utf8);
}

my %unames;
my %uvalues;

#
# This is not a general purpose Character Map parser, but its good enough
# for the stock one supplied with CLDR.
#
sub load_utf8_cm
{
    my $file = shift;

    open(UTF8, "$file") || die "open";

    while (<UTF8>) {
	next if (/^#/);
	next if (/^\s*$/);
	next if (/^\s*CHARMAP\s*$/);
	next if (/^\s*END\s*CHARMAP\s*$/);
	chomp;
	@words = split /\s+/;
	$name = $words[0];
	$utf8val = $words[1];

	if (defined($unames{$utf8val})) {
	    $unames{$utf8val} .= "\n" .$name;
	} else {
	    $unames{$utf8val} = $name;
	}
	$uvalues{$name} = $utf8val;
    }
    close(UTF8);
}

my %map;

sub load_map
{
    my $file = shift;

    open(MAP, "$file") || die "open";

    while (<MAP>) {
	next if (/^#/);
	next if (/^\s*$/);
	chomp;
	@words = split /\s+/;
	$utf8 = $words[1];
	$utf8 =~ s/^\\x[0]*//;
	$utf8 = ucs_to_utf8(hex($utf8));
	$val = $words[0];
	if (defined ($map{$val})) {
	    $map{$val} .= " ".$utf8;
	} else {
	    $map{$val} = $utf8;
	}
    }
}

sub mb_str
{
    my $val = shift;
    my $str = "";
    $val = hex($val);

    if ($val == 0) {
	return ("\\x00");
    }
    while ($val) {
	$str = sprintf("\\x%02x", $val & 0xff).$str;
	$val >>= 8;
    }
    return ($str);
}

$mf = shift(@ARGV);

load_utf8_cm("UTF-8.cm");
load_map($mf);


print("CHARMAP\n");
foreach $val (sort (keys (%map))) {
    #$utf8 = $map{$val};
    foreach $utf8 (split / /, $map{$val}) {
	$ref = $unames{$utf8};
	foreach $name (sort (split /\n/, $ref)) {
	    print "$name";
	    my $nt = int((64 - length($name) + 7) / 8);
	    while ($nt) {
		print "\t";
		$nt--;
	    }
	    print mb_str($val)."\n";
	}
    }
}
print "END CHARMAP\n";