The conversion tools have been further improved and some erroneous
conversions have been detected and fixed. It is now possible to add options after the encoding in the parameter list for convert-keymap.pl. This is currently used to selectively enable interpretation of the ISO8859-1 currency symbol as the Euro sign found in ISO5589-15, or to add a Yen symbol in place of '\' for specific Japanese keyboards. The option are appended to the parameter list, as in e.g. "convert-keymap.pl german.iso.kbd ISO5589-1 EURO". The options are appended to the encoding in the form "+EURO" or "+YEN" in KBDFILES.map, to keep the meaning of the columns intact. MFC after: 3 days
This commit is contained in:
parent
81ade99c2c
commit
5a0fb8c8d6
@ -1,7 +1,15 @@
|
||||
# $FreeBSD$
|
||||
|
||||
ISO8859-15 be.iso.kbd be.kbd
|
||||
ISO8859-15 be.iso.acc.kbd be.acc.kbd
|
||||
#
|
||||
# The Files are converted by "convert-keymaps.pl" from the given encoding to UCS.
|
||||
#
|
||||
# An additional "+EURO" causes the translation of the generic currency symbol to
|
||||
# an Euro symbol, even if the source locale does not support an Euro symbol.
|
||||
# This conversion is only performed for the "E" key (not e.g. on Shift-4, which
|
||||
# still generates the currency symbol).
|
||||
#
|
||||
# Encoding syscons file name newcons (vt) file name
|
||||
ISO8859-1+EURO be.iso.kbd be.kbd
|
||||
ISO8859-1+EURO be.iso.acc.kbd be.acc.kbd
|
||||
|
||||
ISO8859-5 bg.bds.ctrlcaps.kbd bg.bds.kbd
|
||||
ISO8859-5 bg.phonetic.ctrlcaps.kbd bg.bds.ctrlcaps.kbd
|
||||
@ -14,15 +22,15 @@ CP850 br275.cp850.kbd br.noacc.kbd
|
||||
#CP1251 by.cp1251.kbd by.kbd.from-cp1251 (result identical to CP1251)
|
||||
ISO8859-5 by.iso5.kbd by.kbd
|
||||
|
||||
ISO8859-2 ce.iso2.kbd centraleuropean.kbd
|
||||
ISO8859-2 ce.iso2.kbd centraleuropean.qwerty.kbd
|
||||
|
||||
ISO8859-1 colemak.iso15.acc.kbd colemak.kbd
|
||||
|
||||
ISO8859-2 cs.latin2.qwertz.kbd cz.kbd
|
||||
ISO8859-2 cz.iso2.kbd cz.kbd.from-ce
|
||||
ISO8859-2 cz.iso2.kbd cz.qwerty.kbd.from-ce
|
||||
|
||||
ISO8859-15 danish.iso.kbd dk.kbd
|
||||
ISO8859-15 danish.iso.acc.kbd dk.acc.kbd
|
||||
ISO8859-1+EURO danish.iso.kbd dk.kbd
|
||||
ISO8859-1+EURO danish.iso.acc.kbd dk.acc.kbd
|
||||
CP865 danish.cp865.kbd dk.kbd.from-cp865
|
||||
ISO8859-1 danish.iso.macbook.kbd dk.macbook.kbd
|
||||
|
||||
@ -36,19 +44,19 @@ ISO8859-1 estonian.iso.kbd ee.kbd.from-iso1
|
||||
ISO8859-15 estonian.iso15.kbd ee.kbd
|
||||
CP850 estonian.cp850.kbd ee.kbd.from-cp850
|
||||
|
||||
ISO8859-15 finnish.iso.kbd fi.kbd
|
||||
ISO8859-1+EURO finnish.iso.kbd fi.kbd
|
||||
CP850 finnish.cp850.kbd fi.kbd.from-cp850
|
||||
|
||||
ISO8859-15 fr.iso.kbd fr.kbd
|
||||
ISO8859-15 fr.iso.acc.kbd fr.acc.kbd
|
||||
ISO8859-15 fr.macbook.acc.kbd fr.macbook.kbd
|
||||
ISO8859-1 fr.dvorak.kbd fr.dvorak.kbd
|
||||
ISO8859-15 fr.dvorak.acc.kbd fr.dvorak.acc.kbd
|
||||
ISO8859-1+EURO fr.iso.kbd fr.kbd
|
||||
ISO8859-1+EURO fr.iso.acc.kbd fr.acc.kbd
|
||||
ISO8859-1+EURO fr.macbook.acc.kbd fr.macbook.kbd
|
||||
ISO8859-1+EURO fr.dvorak.kbd fr.dvorak.kbd
|
||||
ISO8859-1 fr.dvorak.acc.kbd fr.dvorak.acc.kbd
|
||||
|
||||
ISO8859-15 fr_CA.iso.acc.kbd ca-fr.kbd
|
||||
ISO8859-1+EURO fr_CA.iso.acc.kbd ca-fr.kbd
|
||||
|
||||
ISO8859-15 german.iso.kbd de.kbd
|
||||
ISO8859-15 german.iso.acc.kbd de.acc.kbd
|
||||
ISO8859-1+EURO german.iso.kbd de.noacc.kbd
|
||||
ISO8859-1+EURO german.iso.acc.kbd de.acc.kbd
|
||||
CP850 german.cp850.kbd de.kbd.from-cp850
|
||||
|
||||
ISO8859-7 gr.elot.acc.kbd gr.elot.acc.kbd
|
||||
@ -66,12 +74,12 @@ ARMSCII-8 hy.armscii-8.kbd am.kbd
|
||||
ISO8859-1 icelandic.iso.kbd is.kbd
|
||||
ISO8859-1 icelandic.iso.acc.kbd is.acc.kbd
|
||||
|
||||
ISO8859-15 it.iso.kbd it.kbd
|
||||
ISO8859-1+EURO it.iso.kbd it.kbd
|
||||
|
||||
ISO8859-1 jp.106.kbd jp.kbd
|
||||
ISO8859-1 jp.106x.kbd jp.capsctrl.kbd
|
||||
ISO8859-1 jp.pc98.kbd jp.pc98.kbd
|
||||
ISO8859-1 jp.pc98.iso.kbd jp.pc98.iso.kbd
|
||||
ISO8859-1+YEN jp.106.kbd jp.kbd
|
||||
ISO8859-1+YEN jp.106x.kbd jp.capsctrl.kbd
|
||||
ISO8859-1+YEN jp.pc98.kbd jp.pc98.kbd
|
||||
ISO8859-1+YEN jp.pc98.iso.kbd jp.pc98.iso.kbd
|
||||
|
||||
PT154 kk.pt154.kst.kbd kz.kst.kbd
|
||||
PT154 kk.pt154.io.kbd kz.io.kbd
|
||||
@ -87,8 +95,8 @@ ISO8859-1 norwegian.dvorak.kbd no.dvorak.kbd
|
||||
ISO8859-2 pl_PL.ISO8859-2.kbd pl.kbd
|
||||
ISO8859-2 pl_PL.dvorak.kbd pl.dvorak.kbd
|
||||
|
||||
ISO8859-15 pt.iso.kbd pt.kbd
|
||||
ISO8859-15 pt.iso.acc.kbd pt.acc.kbd
|
||||
ISO8859-1+EURO pt.iso.kbd pt.kbd
|
||||
ISO8859-1+EURO pt.iso.acc.kbd pt.acc.kbd
|
||||
|
||||
CP866 ru.cp866.kbd ru.kbd.from-cp866
|
||||
ISO8859-5 ru.iso5.kbd ru.kbd.from-iso5
|
||||
@ -96,31 +104,31 @@ KOI8-R ru.koi8-r.kbd ru.kbd
|
||||
KOI8-R ru.koi8-r.shift.kbd ru.shift.kbd
|
||||
KOI8-R ru.koi8-r.win.kbd ru.win.kbd
|
||||
|
||||
ISO8859-15 spanish.dvorak.kbd es.dvorak.kbd
|
||||
ISO8859-1 spanish.iso.kbd es.kbd.from-iso1
|
||||
ISO8859-1 spanish.iso.acc.kbd es.acc.kbd
|
||||
ISO8859-15 spanish.iso15.acc.kbd es.kbd
|
||||
ISO8859-1+EURO spanish.dvorak.kbd es.dvorak.kbd
|
||||
ISO8859-1+EURO spanish.iso.kbd es.kbd.from-iso1
|
||||
ISO8859-1+EURO spanish.iso.acc.kbd es.acc.kbd
|
||||
ISO8859-1+EURO spanish.iso15.acc.kbd es.kbd
|
||||
|
||||
ISO8859-2 si.iso.kbd si.kbd
|
||||
|
||||
ISO8859-2 sk.iso2.kbd sk.kbd
|
||||
|
||||
ISO8859-1 swedish.iso.kbd se.kbd
|
||||
ISO8859-1+EURO swedish.iso.kbd se.kbd
|
||||
CP850 swedish.cp850.kbd se.kbd.from-cp850
|
||||
|
||||
ISO8859-1 swissfrench.iso.kbd ch-fr.kbd
|
||||
ISO8859-1 swissfrench.iso.acc.kbd ch-fr.acc.kbd
|
||||
ISO8859-1+EURO swissfrench.iso.kbd ch-fr.kbd
|
||||
ISO8859-1+EURO swissfrench.iso.acc.kbd ch-fr.acc.kbd
|
||||
CP850 swissfrench.cp850.kbd ch-fr.kbd.from-cp850
|
||||
|
||||
ISO8859-1 swissgerman.iso.kbd ch.kbd
|
||||
ISO8859-1 swissgerman.iso.acc.kbd ch.acc.kbd
|
||||
ISO8859-1+EURO swissgerman.iso.kbd ch.kbd
|
||||
ISO8859-1+EURO swissgerman.iso.acc.kbd ch.acc.kbd
|
||||
CP850 swissgerman.cp850.kbd ch.kbd.from-cp850
|
||||
ISO8859-1 swissgerman.macbook.acc.kbd ch.macbook.acc.kbd
|
||||
ISO8859-1+EURO swissgerman.macbook.acc.kbd ch.macbook.acc.kbd
|
||||
|
||||
ISO8859-9 tr.iso9.q.kbd tr.kbd
|
||||
|
||||
ISO8859-15 uk.iso.kbd uk.kbd
|
||||
ISO8859-15 uk.iso-ctrl.kbd uk.capsctrl.kbd
|
||||
ISO8859-1+EURO uk.iso.kbd uk.kbd
|
||||
ISO8859-1+EURO uk.iso-ctrl.kbd uk.capsctrl.kbd
|
||||
#CP850 uk.cp850.kbd uk.kbd.from-cp850 (no ¤ and different Alt/Alt-Shift encodings)
|
||||
#CP850 uk.cp850-ctrl.kbd uk.capsctrl.kbd.from-cp850 (no ¤ and different Alt/Alt-Shift encodings)
|
||||
ISO8859-15 uk.dvorak.kbd uk.dvorak.kbd
|
||||
|
@ -6,9 +6,26 @@ use Encode;
|
||||
use strict;
|
||||
use utf8;
|
||||
|
||||
die "Usage: $0 filename.kbd CHARSET" unless ($ARGV[1]);
|
||||
my $converter = Text::Iconv->new($ARGV[1], "UTF-8");
|
||||
# command line parsing
|
||||
die "Usage: $0 filename.kbd CHARSET [EURO]"
|
||||
unless ($ARGV[1]);
|
||||
|
||||
my $inputfile = shift; # first command argument
|
||||
my $converter = Text::Iconv->new(shift, "UTF-8"); # second argument
|
||||
my $use_euro;
|
||||
my $use_yen;
|
||||
my $current_char;
|
||||
my $current_scancode;
|
||||
|
||||
while (my $arg = shift) {
|
||||
$use_euro = 1, next
|
||||
if $arg eq "EURO";
|
||||
$use_yen = 1, next
|
||||
if $arg eq "YEN";
|
||||
die "Unknown encoding option '$arg'\n";
|
||||
}
|
||||
|
||||
# converter functions
|
||||
sub local_to_UCS_string
|
||||
{
|
||||
my ($string) = @_;
|
||||
@ -18,21 +35,35 @@ sub local_to_UCS_string
|
||||
|
||||
sub prettyprint_token
|
||||
{
|
||||
my ($code) = @_;
|
||||
my ($ucs_char) = @_;
|
||||
|
||||
return "'" . chr($code) . "'"
|
||||
if 32 <= $code and $code <= 126; # print as ASCII if possible
|
||||
# return sprintf "%d", $code; # <---- temporary decimal
|
||||
return sprintf "0x%02x", $code
|
||||
if $code <= 255; # print as hex number, else
|
||||
return sprintf "0x%04x", $code;
|
||||
return "'" . chr($ucs_char) . "'"
|
||||
if 32 <= $ucs_char and $ucs_char <= 126; # print as ASCII if possible
|
||||
# return sprintf "%d", $ucs_char; # <---- temporary decimal
|
||||
return sprintf "0x%02x", $ucs_char
|
||||
if $ucs_char <= 255; # print as hex number, else
|
||||
return sprintf "0x%04x", $ucs_char;
|
||||
}
|
||||
|
||||
sub local_to_UCS_code
|
||||
{
|
||||
my ($char) = @_;
|
||||
|
||||
return prettyprint_token(ord(Encode::decode("UTF-8", local_to_UCS_string($char))));
|
||||
my $ucs_char = ord(Encode::decode("UTF-8", local_to_UCS_string($char)));
|
||||
|
||||
$current_char = lc(chr($ucs_char)), print("SETCUR: $ucs_char\n")
|
||||
if $current_char eq "";
|
||||
|
||||
$ucs_char = 0x20ac # replace with Euro character
|
||||
if $ucs_char == 0xa4 and $use_euro and $current_char eq "e";
|
||||
|
||||
$ucs_char = 0xa5 # replace with Jap. Yen character on PC kbd
|
||||
if $ucs_char == ord('\\') and $use_yen and $current_scancode == 125;
|
||||
|
||||
$ucs_char = 0xa5 # replace with Jap. Yen character on PC98x1 kbd
|
||||
if $ucs_char == ord('\\') and $use_yen and $current_scancode == 13;
|
||||
|
||||
return prettyprint_token($ucs_char);
|
||||
}
|
||||
|
||||
sub malformed_to_UCS_code
|
||||
@ -62,7 +93,6 @@ sub convert_token
|
||||
sub tokenize { # split on white space and parentheses (but not within token)
|
||||
my ($line) = @_;
|
||||
|
||||
#print "<< $line";
|
||||
$line =~ s/'\('/ _lpar_ /g; # prevent splitting of '('
|
||||
$line =~ s/'\)'/ _rpar_ /g; # prevent splitting of ')'
|
||||
$line =~ s/'''/'_squote_'/g; # remove quoted single quotes from matches below
|
||||
@ -70,7 +100,6 @@ sub tokenize { # split on white space and parentheses (but not within token)
|
||||
my $matches;
|
||||
do {
|
||||
$matches = ($line =~ s/^([^']*)'([^']+)'/$1_squoteL_$2_squoteR_/g);
|
||||
# print "-> $line<> $matches: ('$1','$2')\n";
|
||||
} while $matches;
|
||||
$line =~ s/_squoteL_ _squoteR_/ _spc_ /g; # prevent splitting of ' '
|
||||
my @KEYTOKEN = split (" ", $line);
|
||||
@ -78,12 +107,11 @@ sub tokenize { # split on white space and parentheses (but not within token)
|
||||
grep(s/_spc_/' '/, @KEYTOKEN);
|
||||
grep(s/_lpar_/'('/, @KEYTOKEN);
|
||||
grep(s/_rpar_/')'/, @KEYTOKEN);
|
||||
#printf ">> $line%s\n", join('|', @KEYTOKEN);
|
||||
return @KEYTOKEN;
|
||||
}
|
||||
|
||||
# main program
|
||||
open FH, "<$ARGV[0]";
|
||||
open FH, "<$inputfile";
|
||||
while (<FH>) {
|
||||
if (m/^#/) {
|
||||
print local_to_UCS_string($_);
|
||||
@ -95,7 +123,10 @@ while (<FH>) {
|
||||
my $C;
|
||||
foreach $C (@KEYTOKEN) {
|
||||
if ($at_bol) {
|
||||
$current_char = "";
|
||||
$current_scancode = -1;
|
||||
if ($C =~ m/^\s*\d/) { # line begins with key code number
|
||||
$current_scancode = $C;
|
||||
printf " %03d ", $C;
|
||||
} elsif ($C =~ m/^[a-z]/) { # line begins with accent name or paren
|
||||
printf " %-4s ", $C; # accent name starts accent definition
|
||||
@ -109,6 +140,7 @@ while (<FH>) {
|
||||
if ($C =~ m/^([BCNO])$/) {
|
||||
print " $1"; # special case: effect of Caps Lock/Num Lock
|
||||
} elsif ($C eq "(") {
|
||||
$current_char = "";
|
||||
print " ( ";
|
||||
} elsif ($C eq ")") {
|
||||
print " )";
|
||||
|
@ -83,12 +83,13 @@ my $kbdfile;
|
||||
foreach $kbdfile (glob("$dir_keymaps_syscons/*.kbd")) {
|
||||
my $basename;
|
||||
($basename = $kbdfile) =~ s:.*/::;
|
||||
my $encoding = $ENCODING{$basename};
|
||||
my ($encoding) = $ENCODING{$basename};
|
||||
$encoding =~ s/\+/ /g; # e.g. "ISO8859-1+EURO" -> "ISO8859-1 EURO"
|
||||
my $outfile = $FILE_NEW{$basename};
|
||||
if ($encoding and $outfile) {
|
||||
if (-r $kbdfile) {
|
||||
print "converting from '$basename' ($encoding) to '$outfile' (Unicode)\n";
|
||||
my $cmdline = "$dir_convtool/convert-keymap.pl $kbdfile $ENCODING{$basename} > $dir_keymaps_output/$outfile";
|
||||
print "converting from '$basename' ($encoding) to '$outfile' (UCS)\n";
|
||||
my $cmdline = "$dir_convtool/convert-keymap.pl $kbdfile $encoding > $dir_keymaps_output/$outfile";
|
||||
system "$cmdline";
|
||||
} else {
|
||||
print "$kbdfile not found\n";
|
||||
|
Loading…
Reference in New Issue
Block a user