The conversion tools have been further improved and some erroneous

conversions have been detected and fixed. It is now possible to add options after the encoding in the parameter list for convert-keymap.pl. This is currently used to selectively enable interpretation of the ISO8859-1 currency symbol as the Euro sign found in ISO5589-15, or to add a Yen symbol in place of '\' for specific Japanese keyboards. The option are appended to the parameter list, as in e.g. "convert-keymap.pl german.iso.kbd ISO5589-1 EURO". The options are appended to the encoding in the form "+EURO" or "+YEN" in KBDFILES.map, to keep the meaning of the columns intact. MFC after: 3 days
svn path=/head/; revision=270232
2014-08-20 17:07:41 +00:00 · 2014-08-20 17:07:41 +00:00 · 5a0fb8c8d6 · 2020-12-20 02:59:44 +00:00
commit 5a0fb8c8d6
parent 81ade99c2c
3 changed files with 93 additions and 52 deletions
--- a/tools/tools/vt/keymaps/KBDFILES.map
+++ b/tools/tools/vt/keymaps/KBDFILES.map
@ -1,7 +1,15 @@
 # $FreeBSD$
-
-ISO8859-15	be.iso.kbd			be.kbd
-ISO8859-15	be.iso.acc.kbd			be.acc.kbd
+#
+# The Files are converted by "convert-keymaps.pl" from the given encoding to UCS.
+#
+# An additional "+EURO" causes the translation of the generic currency symbol to 
+# an Euro symbol, even if the source locale does not support an Euro symbol.
+# This conversion is only performed for the "E" key (not e.g. on Shift-4, which 
+# still generates the currency symbol).
+#
+# Encoding      syscons file name               newcons (vt) file name
+ISO8859-1+EURO	be.iso.kbd			be.kbd
+ISO8859-1+EURO	be.iso.acc.kbd			be.acc.kbd

 ISO8859-5	bg.bds.ctrlcaps.kbd		bg.bds.kbd
 ISO8859-5	bg.phonetic.ctrlcaps.kbd	bg.bds.ctrlcaps.kbd
@ -14,15 +22,15 @@ CP850		br275.cp850.kbd			br.noacc.kbd
 #CP1251		by.cp1251.kbd			by.kbd.from-cp1251	(result identical to CP1251)
 ISO8859-5	by.iso5.kbd			by.kbd

-ISO8859-2	ce.iso2.kbd			centraleuropean.kbd
+ISO8859-2	ce.iso2.kbd			centraleuropean.qwerty.kbd

 ISO8859-1	colemak.iso15.acc.kbd		colemak.kbd

 ISO8859-2	cs.latin2.qwertz.kbd		cz.kbd
-ISO8859-2	cz.iso2.kbd			cz.kbd.from-ce
+ISO8859-2	cz.iso2.kbd			cz.qwerty.kbd.from-ce

-ISO8859-15	danish.iso.kbd			dk.kbd
-ISO8859-15	danish.iso.acc.kbd		dk.acc.kbd
+ISO8859-1+EURO	danish.iso.kbd			dk.kbd
+ISO8859-1+EURO	danish.iso.acc.kbd		dk.acc.kbd
 CP865		danish.cp865.kbd		dk.kbd.from-cp865
 ISO8859-1	danish.iso.macbook.kbd		dk.macbook.kbd

@ -36,19 +44,19 @@ ISO8859-1	estonian.iso.kbd		ee.kbd.from-iso1
 ISO8859-15	estonian.iso15.kbd		ee.kbd
 CP850		estonian.cp850.kbd		ee.kbd.from-cp850

-ISO8859-15	finnish.iso.kbd			fi.kbd
+ISO8859-1+EURO	finnish.iso.kbd			fi.kbd
 CP850		finnish.cp850.kbd		fi.kbd.from-cp850

-ISO8859-15	fr.iso.kbd			fr.kbd
-ISO8859-15	fr.iso.acc.kbd			fr.acc.kbd
-ISO8859-15	fr.macbook.acc.kbd		fr.macbook.kbd
-ISO8859-1	fr.dvorak.kbd			fr.dvorak.kbd
-ISO8859-15	fr.dvorak.acc.kbd		fr.dvorak.acc.kbd
+ISO8859-1+EURO	fr.iso.kbd			fr.kbd
+ISO8859-1+EURO	fr.iso.acc.kbd			fr.acc.kbd
+ISO8859-1+EURO	fr.macbook.acc.kbd		fr.macbook.kbd
+ISO8859-1+EURO	fr.dvorak.kbd			fr.dvorak.kbd
+ISO8859-1	fr.dvorak.acc.kbd		fr.dvorak.acc.kbd

-ISO8859-15	fr_CA.iso.acc.kbd		ca-fr.kbd
+ISO8859-1+EURO	fr_CA.iso.acc.kbd		ca-fr.kbd

-ISO8859-15	german.iso.kbd			de.kbd
-ISO8859-15	german.iso.acc.kbd		de.acc.kbd
+ISO8859-1+EURO	german.iso.kbd			de.noacc.kbd
+ISO8859-1+EURO	german.iso.acc.kbd		de.acc.kbd
 CP850		german.cp850.kbd		de.kbd.from-cp850

 ISO8859-7	gr.elot.acc.kbd			gr.elot.acc.kbd
@ -66,12 +74,12 @@ ARMSCII-8	hy.armscii-8.kbd		am.kbd
 ISO8859-1	icelandic.iso.kbd		is.kbd
 ISO8859-1	icelandic.iso.acc.kbd		is.acc.kbd

-ISO8859-15	it.iso.kbd			it.kbd
+ISO8859-1+EURO	it.iso.kbd			it.kbd

-ISO8859-1	jp.106.kbd			jp.kbd
-ISO8859-1	jp.106x.kbd			jp.capsctrl.kbd
-ISO8859-1	jp.pc98.kbd			jp.pc98.kbd
-ISO8859-1	jp.pc98.iso.kbd			jp.pc98.iso.kbd
+ISO8859-1+YEN	jp.106.kbd			jp.kbd
+ISO8859-1+YEN	jp.106x.kbd			jp.capsctrl.kbd
+ISO8859-1+YEN	jp.pc98.kbd			jp.pc98.kbd
+ISO8859-1+YEN	jp.pc98.iso.kbd			jp.pc98.iso.kbd

 PT154		kk.pt154.kst.kbd		kz.kst.kbd
 PT154		kk.pt154.io.kbd			kz.io.kbd
@ -87,8 +95,8 @@ ISO8859-1	norwegian.dvorak.kbd		no.dvorak.kbd
 ISO8859-2	pl_PL.ISO8859-2.kbd		pl.kbd
 ISO8859-2	pl_PL.dvorak.kbd		pl.dvorak.kbd

-ISO8859-15	pt.iso.kbd			pt.kbd
-ISO8859-15	pt.iso.acc.kbd			pt.acc.kbd
+ISO8859-1+EURO	pt.iso.kbd			pt.kbd
+ISO8859-1+EURO	pt.iso.acc.kbd			pt.acc.kbd

 CP866		ru.cp866.kbd			ru.kbd.from-cp866
 ISO8859-5	ru.iso5.kbd			ru.kbd.from-iso5
@ -96,31 +104,31 @@ KOI8-R		ru.koi8-r.kbd			ru.kbd
 KOI8-R		ru.koi8-r.shift.kbd		ru.shift.kbd
 KOI8-R		ru.koi8-r.win.kbd		ru.win.kbd

-ISO8859-15	spanish.dvorak.kbd		es.dvorak.kbd
-ISO8859-1	spanish.iso.kbd			es.kbd.from-iso1
-ISO8859-1	spanish.iso.acc.kbd		es.acc.kbd
-ISO8859-15	spanish.iso15.acc.kbd		es.kbd
+ISO8859-1+EURO	spanish.dvorak.kbd		es.dvorak.kbd
+ISO8859-1+EURO	spanish.iso.kbd			es.kbd.from-iso1
+ISO8859-1+EURO	spanish.iso.acc.kbd		es.acc.kbd
+ISO8859-1+EURO	spanish.iso15.acc.kbd		es.kbd

 ISO8859-2	si.iso.kbd			si.kbd

 ISO8859-2	sk.iso2.kbd			sk.kbd

-ISO8859-1	swedish.iso.kbd			se.kbd
+ISO8859-1+EURO	swedish.iso.kbd			se.kbd
 CP850		swedish.cp850.kbd		se.kbd.from-cp850

-ISO8859-1	swissfrench.iso.kbd		ch-fr.kbd
-ISO8859-1	swissfrench.iso.acc.kbd		ch-fr.acc.kbd
+ISO8859-1+EURO	swissfrench.iso.kbd		ch-fr.kbd
+ISO8859-1+EURO	swissfrench.iso.acc.kbd		ch-fr.acc.kbd
 CP850		swissfrench.cp850.kbd		ch-fr.kbd.from-cp850

-ISO8859-1	swissgerman.iso.kbd		ch.kbd
-ISO8859-1	swissgerman.iso.acc.kbd		ch.acc.kbd
+ISO8859-1+EURO	swissgerman.iso.kbd		ch.kbd
+ISO8859-1+EURO	swissgerman.iso.acc.kbd		ch.acc.kbd
 CP850		swissgerman.cp850.kbd		ch.kbd.from-cp850
-ISO8859-1	swissgerman.macbook.acc.kbd	ch.macbook.acc.kbd
+ISO8859-1+EURO	swissgerman.macbook.acc.kbd	ch.macbook.acc.kbd

 ISO8859-9	tr.iso9.q.kbd			tr.kbd

-ISO8859-15	uk.iso.kbd			uk.kbd
-ISO8859-15	uk.iso-ctrl.kbd			uk.capsctrl.kbd
+ISO8859-1+EURO	uk.iso.kbd			uk.kbd
+ISO8859-1+EURO	uk.iso-ctrl.kbd			uk.capsctrl.kbd
 #CP850		uk.cp850.kbd			uk.kbd.from-cp850		(no ¤ and different Alt/Alt-Shift encodings)
 #CP850		uk.cp850-ctrl.kbd		uk.capsctrl.kbd.from-cp850	(no ¤ and different Alt/Alt-Shift encodings)
 ISO8859-15	uk.dvorak.kbd			uk.dvorak.kbd
--- a/tools/tools/vt/keymaps/convert-keymap.pl
+++ b/tools/tools/vt/keymaps/convert-keymap.pl
@ -6,9 +6,26 @@ use Encode;
 use strict;
 use utf8;

-die "Usage: $0 filename.kbd CHARSET" unless ($ARGV[1]);
-my $converter = Text::Iconv->new($ARGV[1], "UTF-8");
+# command line parsing
+die "Usage: $0 filename.kbd CHARSET [EURO]"
+    unless ($ARGV[1]);

+my $inputfile = shift;					# first command argument
+my $converter = Text::Iconv->new(shift, "UTF-8");	# second argument
+my $use_euro;
+my $use_yen;
+my $current_char;
+my $current_scancode;
+
+while (my $arg = shift) {
+    $use_euro = 1, next
+	if $arg eq "EURO";
+    $use_yen = 1, next
+	if $arg eq "YEN";
+    die "Unknown encoding option '$arg'\n";
+}
+
+# converter functions
 sub local_to_UCS_string
 {
    my ($string) = @_;
@ -18,21 +35,35 @@ sub local_to_UCS_string

 sub prettyprint_token
 {
-    my ($code) = @_;
+    my ($ucs_char) = @_;

-    return "'" . chr($code) . "'"
-        if 32 <= $code and $code <= 126; # print as ASCII if possible
-#    return sprintf "%d", $code; # <---- temporary decimal
-    return sprintf "0x%02x", $code
-        if $code <= 255;        # print as hex number, else
-    return sprintf "0x%04x", $code;
+    return "'" . chr($ucs_char) . "'"
+        if 32 <= $ucs_char and $ucs_char <= 126; # print as ASCII if possible
+#    return sprintf "%d", $ucs_char; # <---- temporary decimal
+    return sprintf "0x%02x", $ucs_char
+        if $ucs_char <= 255;        # print as hex number, else
+    return sprintf "0x%04x", $ucs_char;
 }

 sub local_to_UCS_code
 {
    my ($char) = @_;

-    return prettyprint_token(ord(Encode::decode("UTF-8", local_to_UCS_string($char))));
+    my $ucs_char = ord(Encode::decode("UTF-8", local_to_UCS_string($char)));
+
+    $current_char = lc(chr($ucs_char)), print("SETCUR: $ucs_char\n")
+	if $current_char eq "";
+
+    $ucs_char = 0x20ac	# replace with Euro character
+	if $ucs_char == 0xa4 and $use_euro and $current_char eq "e";
+
+    $ucs_char = 0xa5	# replace with Jap. Yen character on PC kbd
+	if $ucs_char == ord('\\') and $use_yen and $current_scancode == 125;
+
+    $ucs_char = 0xa5	# replace with Jap. Yen character on PC98x1 kbd
+	if $ucs_char == ord('\\') and $use_yen and $current_scancode == 13;
+
+    return prettyprint_token($ucs_char);
 }

 sub malformed_to_UCS_code
@ -62,7 +93,6 @@ sub convert_token
 sub tokenize { # split on white space and parentheses (but not within token)
    my ($line) = @_;

-#print "<< $line";
    $line =~ s/'\('/ _lpar_ /g; # prevent splitting of '('
    $line =~ s/'\)'/ _rpar_ /g; # prevent splitting of ')'
    $line =~ s/'''/'_squote_'/g; # remove quoted single quotes from matches below
@ -70,7 +100,6 @@ sub tokenize { # split on white space and parentheses (but not within token)
    my $matches;
    do {
 	$matches = ($line =~ s/^([^']*)'([^']+)'/$1_squoteL_$2_squoteR_/g);
-#	print "-> $line<> $matches: ('$1','$2')\n";
    } while $matches;
    $line =~ s/_squoteL_ _squoteR_/ _spc_ /g; # prevent splitting of ' '
    my @KEYTOKEN = split (" ", $line);
@ -78,12 +107,11 @@ sub tokenize { # split on white space and parentheses (but not within token)
    grep(s/_spc_/' '/, @KEYTOKEN);
    grep(s/_lpar_/'('/, @KEYTOKEN);
    grep(s/_rpar_/')'/, @KEYTOKEN);
-#printf ">> $line%s\n", join('|', @KEYTOKEN);
    return @KEYTOKEN;
 }

 # main program
-open FH, "<$ARGV[0]";
+open FH, "<$inputfile";
 while (<FH>) {
    if (m/^#/) {
 	print local_to_UCS_string($_);
@ -95,7 +123,10 @@ while (<FH>) {
 	my $C;
 	foreach $C (@KEYTOKEN) {
 	    if ($at_bol) {
+		$current_char = "";
+		$current_scancode = -1;
 		if ($C =~ m/^\s*\d/) { # line begins with key code number
+		    $current_scancode = $C;
 		    printf "  %03d   ", $C;
 		} elsif ($C =~ m/^[a-z]/) { # line begins with accent name or paren
 		    printf "  %-4s ", $C; # accent name starts accent definition
@ -109,6 +140,7 @@ while (<FH>) {
 		if ($C =~ m/^([BCNO])$/) {
 		    print " $1"; # special case: effect of Caps Lock/Num Lock
 		} elsif ($C eq "(") {
+		    $current_char = "";
 		    print " ( ";
 		} elsif ($C eq ")") {
 		    print " )";
--- a/tools/tools/vt/keymaps/convert-keymaps.pl
+++ b/tools/tools/vt/keymaps/convert-keymaps.pl
@ -83,12 +83,13 @@ my $kbdfile;
 foreach $kbdfile (glob("$dir_keymaps_syscons/*.kbd")) {
    my $basename;
    ($basename = $kbdfile) =~ s:.*/::;
-    my $encoding = $ENCODING{$basename};
+    my ($encoding) = $ENCODING{$basename};
+    $encoding =~ s/\+/ /g;		# e.g. "ISO8859-1+EURO" -> "ISO8859-1 EURO"
    my $outfile = $FILE_NEW{$basename};
    if ($encoding and $outfile) {
 	if (-r $kbdfile) {
-	    print "converting from '$basename' ($encoding) to '$outfile' (Unicode)\n";
-	    my $cmdline = "$dir_convtool/convert-keymap.pl $kbdfile $ENCODING{$basename} > $dir_keymaps_output/$outfile";
+	    print "converting from '$basename' ($encoding) to '$outfile' (UCS)\n";
+	    my $cmdline = "$dir_convtool/convert-keymap.pl $kbdfile $encoding > $dir_keymaps_output/$outfile";
 	    system "$cmdline";
 	} else {
 	    print "$kbdfile not found\n";