The conversion tools have been further improved and some erroneous

conversions have been detected and fixed.

It is now possible to add options after the encoding in the parameter
list for convert-keymap.pl. This is currently used to selectively
enable interpretation of the ISO8859-1 currency symbol as the Euro
sign found in ISO5589-15, or to add a Yen symbol in place of '\' for
specific Japanese keyboards. The option are appended to the parameter
list, as in e.g. "convert-keymap.pl german.iso.kbd ISO5589-1 EURO".

The options are appended to the encoding in the form "+EURO" or "+YEN"
in KBDFILES.map, to keep the meaning of the columns intact.

MFC after:	3 days
This commit is contained in:
Stefan Eßer 2014-08-20 17:07:41 +00:00
parent 81ade99c2c
commit 5a0fb8c8d6
3 changed files with 93 additions and 52 deletions

View File

@ -1,7 +1,15 @@
# $FreeBSD$
ISO8859-15 be.iso.kbd be.kbd
ISO8859-15 be.iso.acc.kbd be.acc.kbd
#
# The Files are converted by "convert-keymaps.pl" from the given encoding to UCS.
#
# An additional "+EURO" causes the translation of the generic currency symbol to
# an Euro symbol, even if the source locale does not support an Euro symbol.
# This conversion is only performed for the "E" key (not e.g. on Shift-4, which
# still generates the currency symbol).
#
# Encoding syscons file name newcons (vt) file name
ISO8859-1+EURO be.iso.kbd be.kbd
ISO8859-1+EURO be.iso.acc.kbd be.acc.kbd
ISO8859-5 bg.bds.ctrlcaps.kbd bg.bds.kbd
ISO8859-5 bg.phonetic.ctrlcaps.kbd bg.bds.ctrlcaps.kbd
@ -14,15 +22,15 @@ CP850 br275.cp850.kbd br.noacc.kbd
#CP1251 by.cp1251.kbd by.kbd.from-cp1251 (result identical to CP1251)
ISO8859-5 by.iso5.kbd by.kbd
ISO8859-2 ce.iso2.kbd centraleuropean.kbd
ISO8859-2 ce.iso2.kbd centraleuropean.qwerty.kbd
ISO8859-1 colemak.iso15.acc.kbd colemak.kbd
ISO8859-2 cs.latin2.qwertz.kbd cz.kbd
ISO8859-2 cz.iso2.kbd cz.kbd.from-ce
ISO8859-2 cz.iso2.kbd cz.qwerty.kbd.from-ce
ISO8859-15 danish.iso.kbd dk.kbd
ISO8859-15 danish.iso.acc.kbd dk.acc.kbd
ISO8859-1+EURO danish.iso.kbd dk.kbd
ISO8859-1+EURO danish.iso.acc.kbd dk.acc.kbd
CP865 danish.cp865.kbd dk.kbd.from-cp865
ISO8859-1 danish.iso.macbook.kbd dk.macbook.kbd
@ -36,19 +44,19 @@ ISO8859-1 estonian.iso.kbd ee.kbd.from-iso1
ISO8859-15 estonian.iso15.kbd ee.kbd
CP850 estonian.cp850.kbd ee.kbd.from-cp850
ISO8859-15 finnish.iso.kbd fi.kbd
ISO8859-1+EURO finnish.iso.kbd fi.kbd
CP850 finnish.cp850.kbd fi.kbd.from-cp850
ISO8859-15 fr.iso.kbd fr.kbd
ISO8859-15 fr.iso.acc.kbd fr.acc.kbd
ISO8859-15 fr.macbook.acc.kbd fr.macbook.kbd
ISO8859-1 fr.dvorak.kbd fr.dvorak.kbd
ISO8859-15 fr.dvorak.acc.kbd fr.dvorak.acc.kbd
ISO8859-1+EURO fr.iso.kbd fr.kbd
ISO8859-1+EURO fr.iso.acc.kbd fr.acc.kbd
ISO8859-1+EURO fr.macbook.acc.kbd fr.macbook.kbd
ISO8859-1+EURO fr.dvorak.kbd fr.dvorak.kbd
ISO8859-1 fr.dvorak.acc.kbd fr.dvorak.acc.kbd
ISO8859-15 fr_CA.iso.acc.kbd ca-fr.kbd
ISO8859-1+EURO fr_CA.iso.acc.kbd ca-fr.kbd
ISO8859-15 german.iso.kbd de.kbd
ISO8859-15 german.iso.acc.kbd de.acc.kbd
ISO8859-1+EURO german.iso.kbd de.noacc.kbd
ISO8859-1+EURO german.iso.acc.kbd de.acc.kbd
CP850 german.cp850.kbd de.kbd.from-cp850
ISO8859-7 gr.elot.acc.kbd gr.elot.acc.kbd
@ -66,12 +74,12 @@ ARMSCII-8 hy.armscii-8.kbd am.kbd
ISO8859-1 icelandic.iso.kbd is.kbd
ISO8859-1 icelandic.iso.acc.kbd is.acc.kbd
ISO8859-15 it.iso.kbd it.kbd
ISO8859-1+EURO it.iso.kbd it.kbd
ISO8859-1 jp.106.kbd jp.kbd
ISO8859-1 jp.106x.kbd jp.capsctrl.kbd
ISO8859-1 jp.pc98.kbd jp.pc98.kbd
ISO8859-1 jp.pc98.iso.kbd jp.pc98.iso.kbd
ISO8859-1+YEN jp.106.kbd jp.kbd
ISO8859-1+YEN jp.106x.kbd jp.capsctrl.kbd
ISO8859-1+YEN jp.pc98.kbd jp.pc98.kbd
ISO8859-1+YEN jp.pc98.iso.kbd jp.pc98.iso.kbd
PT154 kk.pt154.kst.kbd kz.kst.kbd
PT154 kk.pt154.io.kbd kz.io.kbd
@ -87,8 +95,8 @@ ISO8859-1 norwegian.dvorak.kbd no.dvorak.kbd
ISO8859-2 pl_PL.ISO8859-2.kbd pl.kbd
ISO8859-2 pl_PL.dvorak.kbd pl.dvorak.kbd
ISO8859-15 pt.iso.kbd pt.kbd
ISO8859-15 pt.iso.acc.kbd pt.acc.kbd
ISO8859-1+EURO pt.iso.kbd pt.kbd
ISO8859-1+EURO pt.iso.acc.kbd pt.acc.kbd
CP866 ru.cp866.kbd ru.kbd.from-cp866
ISO8859-5 ru.iso5.kbd ru.kbd.from-iso5
@ -96,31 +104,31 @@ KOI8-R ru.koi8-r.kbd ru.kbd
KOI8-R ru.koi8-r.shift.kbd ru.shift.kbd
KOI8-R ru.koi8-r.win.kbd ru.win.kbd
ISO8859-15 spanish.dvorak.kbd es.dvorak.kbd
ISO8859-1 spanish.iso.kbd es.kbd.from-iso1
ISO8859-1 spanish.iso.acc.kbd es.acc.kbd
ISO8859-15 spanish.iso15.acc.kbd es.kbd
ISO8859-1+EURO spanish.dvorak.kbd es.dvorak.kbd
ISO8859-1+EURO spanish.iso.kbd es.kbd.from-iso1
ISO8859-1+EURO spanish.iso.acc.kbd es.acc.kbd
ISO8859-1+EURO spanish.iso15.acc.kbd es.kbd
ISO8859-2 si.iso.kbd si.kbd
ISO8859-2 sk.iso2.kbd sk.kbd
ISO8859-1 swedish.iso.kbd se.kbd
ISO8859-1+EURO swedish.iso.kbd se.kbd
CP850 swedish.cp850.kbd se.kbd.from-cp850
ISO8859-1 swissfrench.iso.kbd ch-fr.kbd
ISO8859-1 swissfrench.iso.acc.kbd ch-fr.acc.kbd
ISO8859-1+EURO swissfrench.iso.kbd ch-fr.kbd
ISO8859-1+EURO swissfrench.iso.acc.kbd ch-fr.acc.kbd
CP850 swissfrench.cp850.kbd ch-fr.kbd.from-cp850
ISO8859-1 swissgerman.iso.kbd ch.kbd
ISO8859-1 swissgerman.iso.acc.kbd ch.acc.kbd
ISO8859-1+EURO swissgerman.iso.kbd ch.kbd
ISO8859-1+EURO swissgerman.iso.acc.kbd ch.acc.kbd
CP850 swissgerman.cp850.kbd ch.kbd.from-cp850
ISO8859-1 swissgerman.macbook.acc.kbd ch.macbook.acc.kbd
ISO8859-1+EURO swissgerman.macbook.acc.kbd ch.macbook.acc.kbd
ISO8859-9 tr.iso9.q.kbd tr.kbd
ISO8859-15 uk.iso.kbd uk.kbd
ISO8859-15 uk.iso-ctrl.kbd uk.capsctrl.kbd
ISO8859-1+EURO uk.iso.kbd uk.kbd
ISO8859-1+EURO uk.iso-ctrl.kbd uk.capsctrl.kbd
#CP850 uk.cp850.kbd uk.kbd.from-cp850 (no ¤ and different Alt/Alt-Shift encodings)
#CP850 uk.cp850-ctrl.kbd uk.capsctrl.kbd.from-cp850 (no ¤ and different Alt/Alt-Shift encodings)
ISO8859-15 uk.dvorak.kbd uk.dvorak.kbd

View File

@ -6,9 +6,26 @@ use Encode;
use strict;
use utf8;
die "Usage: $0 filename.kbd CHARSET" unless ($ARGV[1]);
my $converter = Text::Iconv->new($ARGV[1], "UTF-8");
# command line parsing
die "Usage: $0 filename.kbd CHARSET [EURO]"
unless ($ARGV[1]);
my $inputfile = shift; # first command argument
my $converter = Text::Iconv->new(shift, "UTF-8"); # second argument
my $use_euro;
my $use_yen;
my $current_char;
my $current_scancode;
while (my $arg = shift) {
$use_euro = 1, next
if $arg eq "EURO";
$use_yen = 1, next
if $arg eq "YEN";
die "Unknown encoding option '$arg'\n";
}
# converter functions
sub local_to_UCS_string
{
my ($string) = @_;
@ -18,21 +35,35 @@ sub local_to_UCS_string
sub prettyprint_token
{
my ($code) = @_;
my ($ucs_char) = @_;
return "'" . chr($code) . "'"
if 32 <= $code and $code <= 126; # print as ASCII if possible
# return sprintf "%d", $code; # <---- temporary decimal
return sprintf "0x%02x", $code
if $code <= 255; # print as hex number, else
return sprintf "0x%04x", $code;
return "'" . chr($ucs_char) . "'"
if 32 <= $ucs_char and $ucs_char <= 126; # print as ASCII if possible
# return sprintf "%d", $ucs_char; # <---- temporary decimal
return sprintf "0x%02x", $ucs_char
if $ucs_char <= 255; # print as hex number, else
return sprintf "0x%04x", $ucs_char;
}
sub local_to_UCS_code
{
my ($char) = @_;
return prettyprint_token(ord(Encode::decode("UTF-8", local_to_UCS_string($char))));
my $ucs_char = ord(Encode::decode("UTF-8", local_to_UCS_string($char)));
$current_char = lc(chr($ucs_char)), print("SETCUR: $ucs_char\n")
if $current_char eq "";
$ucs_char = 0x20ac # replace with Euro character
if $ucs_char == 0xa4 and $use_euro and $current_char eq "e";
$ucs_char = 0xa5 # replace with Jap. Yen character on PC kbd
if $ucs_char == ord('\\') and $use_yen and $current_scancode == 125;
$ucs_char = 0xa5 # replace with Jap. Yen character on PC98x1 kbd
if $ucs_char == ord('\\') and $use_yen and $current_scancode == 13;
return prettyprint_token($ucs_char);
}
sub malformed_to_UCS_code
@ -62,7 +93,6 @@ sub convert_token
sub tokenize { # split on white space and parentheses (but not within token)
my ($line) = @_;
#print "<< $line";
$line =~ s/'\('/ _lpar_ /g; # prevent splitting of '('
$line =~ s/'\)'/ _rpar_ /g; # prevent splitting of ')'
$line =~ s/'''/'_squote_'/g; # remove quoted single quotes from matches below
@ -70,7 +100,6 @@ sub tokenize { # split on white space and parentheses (but not within token)
my $matches;
do {
$matches = ($line =~ s/^([^']*)'([^']+)'/$1_squoteL_$2_squoteR_/g);
# print "-> $line<> $matches: ('$1','$2')\n";
} while $matches;
$line =~ s/_squoteL_ _squoteR_/ _spc_ /g; # prevent splitting of ' '
my @KEYTOKEN = split (" ", $line);
@ -78,12 +107,11 @@ sub tokenize { # split on white space and parentheses (but not within token)
grep(s/_spc_/' '/, @KEYTOKEN);
grep(s/_lpar_/'('/, @KEYTOKEN);
grep(s/_rpar_/')'/, @KEYTOKEN);
#printf ">> $line%s\n", join('|', @KEYTOKEN);
return @KEYTOKEN;
}
# main program
open FH, "<$ARGV[0]";
open FH, "<$inputfile";
while (<FH>) {
if (m/^#/) {
print local_to_UCS_string($_);
@ -95,7 +123,10 @@ while (<FH>) {
my $C;
foreach $C (@KEYTOKEN) {
if ($at_bol) {
$current_char = "";
$current_scancode = -1;
if ($C =~ m/^\s*\d/) { # line begins with key code number
$current_scancode = $C;
printf " %03d ", $C;
} elsif ($C =~ m/^[a-z]/) { # line begins with accent name or paren
printf " %-4s ", $C; # accent name starts accent definition
@ -109,6 +140,7 @@ while (<FH>) {
if ($C =~ m/^([BCNO])$/) {
print " $1"; # special case: effect of Caps Lock/Num Lock
} elsif ($C eq "(") {
$current_char = "";
print " ( ";
} elsif ($C eq ")") {
print " )";

View File

@ -83,12 +83,13 @@ my $kbdfile;
foreach $kbdfile (glob("$dir_keymaps_syscons/*.kbd")) {
my $basename;
($basename = $kbdfile) =~ s:.*/::;
my $encoding = $ENCODING{$basename};
my ($encoding) = $ENCODING{$basename};
$encoding =~ s/\+/ /g; # e.g. "ISO8859-1+EURO" -> "ISO8859-1 EURO"
my $outfile = $FILE_NEW{$basename};
if ($encoding and $outfile) {
if (-r $kbdfile) {
print "converting from '$basename' ($encoding) to '$outfile' (Unicode)\n";
my $cmdline = "$dir_convtool/convert-keymap.pl $kbdfile $ENCODING{$basename} > $dir_keymaps_output/$outfile";
print "converting from '$basename' ($encoding) to '$outfile' (UCS)\n";
my $cmdline = "$dir_convtool/convert-keymap.pl $kbdfile $encoding > $dir_keymaps_output/$outfile";
system "$cmdline";
} else {
print "$kbdfile not found\n";