Import the tools used to convert the keymap files from SYSCONS (in locale

dependent encoding) to NEWCONS (Unicode).

The file "LANG.map" is used to convert INDEX.keymaps. It has 3 columns:
- the language ID as used in the source file
- the language ID to be used in the generated file (e.g. "iw" -> "he")
- the encoding of the menu texts for this language
The conversion result is written to STDOUT.

The file "KBDFILES.map" is used to batch convert keymap files. It's
columns are:
- the encoding used for the keymap sounce file
- the name of the source file
- the name of the generated file
The output files are created in the TEMP sub-directory of the vt keymap
directory, in order to preserve (possibly uncommitted) keymap files in
/usr/src/share/vt/keymaps.

The convert-keymap.pl script can be directly executed by passing the
source file name and the encoding on the command line. It writes to
STDOUT and generates hex Unicode codepoints by default. (This can be
changed to decimal in the script.)

While written for the one-time conversion of the SYSCONS keymaps into
the format required for NEWCONS, I think these tools may be useful for
easy conversion of possible further SYSCONS keymap files, that have not
been committed to the source tree.
This commit is contained in:
Stefan Eßer 2014-08-18 09:40:19 +00:00
parent 7d6fa255bf
commit ca1351acba
5 changed files with 469 additions and 0 deletions

View File

@ -0,0 +1,141 @@
# $FreeBSD$
ISO8859-15 be.iso.kbd be.kbd
ISO8859-15 be.iso.acc.kbd be.acc.kbd
ISO8859-5 bg.bds.ctrlcaps.kbd bg.bds.kbd
ISO8859-5 bg.phonetic.ctrlcaps.kbd bg.bds.ctrlcaps.kbd
ISO8859-1 br275.iso.kbd br.kbd
ISO8859-1 br275.iso.acc.kbd br.acc.kbd
CP850 br275.cp850.kbd br.kbd.from-cp850
CP1131 by.cp1131.kbd by.kbd.from-cp1131
CP1251 by.cp1251.kbd by.kbd.from-cp1251
ISO8859-5 by.iso5.kbd by.kbd.from-iso5
ISO8859-2 ce.iso2.kbd centraleuropean.kbd
ISO8859-1 colemak.iso15.acc.kbd colemak.kbd
ISO8859-2 cs.latin2.qwertz.kbd cz.kbd
ISO8859-2 cz.iso2.kbd cz.kbd.from-ce
ISO8859-15 danish.iso.kbd dk.kbd
ISO8859-15 danish.iso.acc.kbd dk.acc.kbd
CP865 danish.cp865.kbd dk.kbd.from-cp865
ISO8859-1 danish.iso.macbook.kbd dk.macbook.kbd
ISO8859-1 dutch.iso.acc.kbd nl.kbd
ISO8859-15 eee_nordic.kbd nordic.asus-eee.kbd
ISO8859-7 el.iso07.kbd gr.kbd
ISO8859-1 estonian.iso.kbd ee.kbd.from-iso1
ISO8859-15 estonian.iso15.kbd ee.kbd
CP850 estonian.cp850.kbd ee.kbd.from-cp850
ISO8859-15 finnish.iso.kbd fi.kbd
CP850 finnish.cp850.kbd fi.kbd.from-cp850
ISO8859-15 fr.iso.kbd fr.kbd
ISO8859-15 fr.iso.acc.kbd fr.acc.kbd
ISO8859-15 fr.macbook.acc.kbd fr.macbook.kbd
ISO8859-1 fr.dvorak.kbd fr.dvorak.kbd
ISO8859-15 fr.dvorak.acc.kbd fr.dvorak.acc.kbd
ISO8859-15 fr_CA.iso.acc.kbd ca-fr.kbd
ISO8859-15 german.iso.kbd de.kbd
ISO8859-15 german.iso.acc.kbd de.acc.kbd
CP850 german.cp850.kbd de.kbd.from-cp850
ISO8859-7 gr.elot.acc.kbd gr.elot.acc.kbd
ISO8859-7 gr.us101.acc.kbd gr.101.acc.kbd
ISO8859-8 iw.iso8.kbd il.kbd
ISO8859-2 hr.iso.kbd hr.kbd
ISO8859-2 hu.iso2.101keys.kbd hu.101.kbd
ISO8859-2 hu.iso2.102keys.kbd hu.102.kbd
ARMSCII-8 hy.armscii-8.kbd am.kbd
ISO8859-1 icelandic.iso.kbd is.kbd
ISO8859-1 icelandic.iso.acc.kbd is.acc.kbd
ISO8859-15 it.iso.kbd it.kbd
ISO8859-1 jp.106.kbd jp.kbd
ISO8859-1 jp.106x.kbd jp.capsctrl.kbd
ISO8859-1 jp.pc98.kbd jp.pc98.kbd
ISO8859-1 jp.pc98.iso.kbd jp.pc98.iso.kbd
PT154 kk.pt154.kst.kbd kz.kst.kbd
PT154 kk.pt154.io.kbd kz.io.kbd
ISO8859-1 latinamerican.kbd latinamerican.kbd
ISO8859-1 latinamerican.iso.acc.kbd latinamerican.acc.kbd
ISO8859-4 lt.iso4.kbd lt.kbd
ISO8859-1 norwegian.iso.kbd no.kbd
ISO8859-1 norwegian.dvorak.kbd no.dvorak.kbd
ISO8859-2 pl_PL.ISO8859-2.kbd pl.kbd
ISO8859-2 pl_PL.dvorak.kbd pl.dvorak.kbd
ISO8859-15 pt.iso.kbd pt.kbd
ISO8859-15 pt.iso.acc.kbd pt.acc.kbd
CP866 ru.cp866.kbd ru.kbd.from-cp866
ISO8859-5 ru.iso5.kbd ru.kbd.from-iso5
KOI8-R ru.koi8-r.kbd ru.kbd
KOI8-R ru.koi8-r.shift.kbd ru.shift.kbd
KOI8-R ru.koi8-r.win.kbd ru.win.kbd
ISO8859-15 spanish.dvorak.kbd es.dvorak.kbd
ISO8859-1 spanish.iso.kbd es.kbd.from-iso1
ISO8859-1 spanish.iso.acc.kbd es.acc.kbd
ISO8859-15 spanish.iso15.acc.kbd es.kbd
ISO8859-2 si.iso.kbd si.kbd
ISO8859-2 sk.iso2.kbd sk.kbd
ISO8859-1 swedish.iso.kbd se.kbd
CP850 swedish.cp850.kbd se.kbd.from-cp850
ISO8859-1 swissfrench.iso.kbd ch-fr.kbd
ISO8859-1 swissfrench.iso.acc.kbd ch-fr.acc.kbd
CP850 swissfrench.cp850.kbd ch-fr.kbd.from-cp850
ISO8859-1 swissgerman.iso.kbd ch.kbd
ISO8859-1 swissgerman.iso.acc.kbd ch.acc.kbd
CP850 swissgerman.cp850.kbd ch.kbd.from-cp850
ISO8859-1 swissgerman.macbook.acc.kbd ch.macbook.acc.kbd
ISO8859-9 tr.iso9.q.kbd tr.kbd
ISO8859-1 uk.iso.kbd uk.kbd
ISO8859-1 uk.iso-ctrl.kbd uk.capsctrl.kbd
CP850 uk.cp850.kbd uk.kbd.from-cp850
CP850 uk.cp850-ctrl.kbd uk.capsctrl.kbd.from-cp850
ISO8859-1 uk.dvorak.kbd uk.dvorak.kbd
ISO8859-1 us.iso.kbd us.kbd
ISO8859-1 us.iso.acc.kbd us.acc.kbd
ISO8859-1 us.dvorak.kbd us.dvorak.kbd
ISO8859-1 us.dvorakr.kbd us.dvorakr.kbd
ISO8859-1 us.dvorakl.kbd us.dvorakl.kbd
ISO8859-1 us.dvorakp.kbd us.dvorakp.kbd
ISO8859-1 us.dvorakx.kbd us.dvorakx.kbd
ISO8859-1 us.emacs.kbd us.emacs.kbd
ISO8859-1 us.pc-ctrl.kbd us.ctrl.kbd
ISO8859-1 us.unix.kbd us.unix.kbd
ISO8859-5 ua.iso5.kbd ua.kbd.from-iso5
KOI8-U ua.koi8-u.kbd ua.kbd
KOI8-U ua.koi8-u.shift.alt.kbd ua.shift.alt.kbd

View File

@ -0,0 +1,29 @@
# $FreeBSD$
bg bg ISO8859-5
cs cs ISO8859-2
da da ISO8859-15
de de ISO8859-15
el el ISO8859-7
en en ISO8859-1
es es ISO8859-1
fi fi ISO8859-1
fr fr ISO8859-1
hr hr ISO8859-2
hu hu ISO8859-2
hy hy ARMSCII-8
is is ISO8859-1
it it ISO8859-15
iw he ISO8859-8
ja ja ISO8859-1
kk kk PT154
nl nl ISO8859-15
no no ISO8859-1
pl pl ISO8859-2
pt pt ISO8859-15
ro ro ISO8859-1
ru ru KOI8-R
sk sk ISO8859-2
sl sl ISO8859-2
sv sv ISO8859-1
tr tr ISO8859-9
uk uk KOI8-U

View File

@ -0,0 +1,94 @@
#!/usr/local/bin/perl
# $FreeBSD$
use Text::Iconv;
use Encode;
use strict;
use utf8;
# directories and filenames
$0 =~ m:^(.*)/:;
my $dir_convtool = $1 || ".";
my $dir_keymaps_syscons = "/usr/src/share/syscons/keymaps";
my $dir_keymaps_config = "$dir_convtool";
my $dir_keymaps_vt = "/usr/src/share/vt/keymaps";
my $dir_keymaps_output = "$dir_keymaps_vt/OUTPUT";
my $keymap_index = "$dir_keymaps_syscons/INDEX.keymaps";
my $language_map = "$dir_keymaps_config/LANG.map";
my $keymapfile_map = "$dir_keymaps_config/KBDFILES.map";
# global variables
my %LANG_NEW; # index: lang_old
my %ENCODING; # index: lang_old, file_old
my %FILE_NEW; # index: file_old
# subroutines
sub local_to_UCS_string
{
my ($string, $old_enc) = @_;
my $converter = Text::Iconv->new($old_enc, "UTF-8");
my $result = $converter->convert($string);
printf "!!! conversion failed for '$string' ($old_enc)\n"
unless $result;
return $result;
}
sub lang_fixup {
my ($langlist) = @_;
my $result;
my $lang;
for $lang (split(/,/, $langlist)) {
$result .= ","
if $result;
$result .= $LANG_NEW{$lang};
}
return $result;
}
# main program
open LANGMAP, "<$language_map"
or die "$!";
while (<LANGMAP>) {
next
if m/^#/;
my ($lang_old, $lang_new, $encoding) = split(" ");
# print "$lang_old|$lang_new|$encoding\n";
$LANG_NEW{$lang_old} = $lang_new;
$ENCODING{$lang_old} = $encoding;
$ENCODING{$lang_new} = $encoding;
}
close LANGMAP;
$FILE_NEW{"MENU"} = "MENU"; # dummy identity mapping
$FILE_NEW{"FONT"} = "FONT"; # dummy identity mapping
open FILEMAP, "<$keymapfile_map"
or die "$!";
while (<FILEMAP>) {
next
if m/^#/;
my ($encoding, $file_old, $file_new) = split(" ");
# print "--> ", join("|", $encoding, $file_old, $file_new, $file_locale), "\n";
if ($encoding and $file_old and $file_new) {
$ENCODING{$file_old} = $encoding;
$FILE_NEW{$file_old} = $file_new;
}
}
close FILEMAP;
open MENUFILE, "<$keymap_index"
or die "$!";
while (<MENUFILE>) {
if (m/^$/ or m/^#/) {
print;
} else {
my ($file_old, $langlist, $menutext) = split(/:/);
my ($lang) = split(/,/, $langlist); # first language in list selects encoding
$menutext = local_to_UCS_string($menutext, $ENCODING{$lang})
unless $file_old eq "FONT";
printf "%s:%s:%s", $FILE_NEW{$file_old}, lang_fixup($langlist), $menutext;
}
}

View File

@ -0,0 +1,106 @@
#!/usr/bin/perl
# $FreeBSD$
use Text::Iconv;
use Encode;
use strict;
use utf8;
die "Usage: $0 filename.kbd CHARSET" unless ($ARGV[1]);
my $converter = Text::Iconv->new($ARGV[1], "UTF-8");
sub local_to_UCS_string
{
my ($string) = @_;
return $converter->convert($string);
}
sub prettyprint_token
{
my ($code) = @_;
return "'" . chr($code) . "'"
if 32 <= $code and $code <= 126; # print as ASCII if possible
# return sprintf "%d", $code; # <---- temporary decimal
return sprintf "0x%02x", $code
if $code <= 255; # print as hex number, else
return sprintf "0x%04x", $code;
}
sub local_to_UCS_code
{
my ($char) = @_;
return prettyprint_token(ord(Encode::decode("UTF-8", local_to_UCS_string($char))));
}
sub convert_token
{
my ($C) = @_;
return $1
if $C =~ m/^([a-z][a-z0-9]*)$/; # key token
return local_to_UCS_code(chr($1))
if $C =~ m/^(\d+)$/; # decimal number
return local_to_UCS_code(chr(hex($1)))
if $C =~ m/^0x([0-9a-f]+)$/i; # hex number
return local_to_UCS_code($1)
if $C =~ m/^'(.)'$/; # character
return "<?$C?>"; # uncovered case
}
sub tokenize { # split on white space and parentheses (but not within token)
my ($line) = @_;
$line =~ s/' '/ _spc_ /g; # prevent splitting of ' '
$line =~ s/'\('/ _lpar_ /g; # prevent splitting of '('
$line =~ s/'\)'/ _rpar_ /g; # prevent splitting of ')'
$line =~ s/([()])/ $1 /g; # insert blanks around remaining parentheses
my @KEYTOKEN = split (" ", $line);
grep(s/_spc_/' '/, @KEYTOKEN);
grep(s/_lpar_/'('/, @KEYTOKEN);
grep(s/_rpar_/')'/, @KEYTOKEN);
return @KEYTOKEN;
}
# main program
open FH, "<$ARGV[0]";
while (<FH>) {
if (m/^#/) {
print local_to_UCS_string($_);
} elsif (m/^\s*$/) {
print "\n";
} else {
my @KEYTOKEN = tokenize($_);
my $at_bol = 1;
my $C;
foreach $C (@KEYTOKEN) {
if ($at_bol) {
if ($C =~ m/^\s*\d/) { # line begins with key code number
printf " %03d ", $C;
} elsif ($C =~ m/^[a-z]/) { # line begins with accent name or paren
printf " %-4s ", $C; # accent name starts accent definition
} elsif ($C eq "(") {
printf "%17s", "( "; # paren continues accent definition
} else {
print "UNKNOWN DEFINITION: $_";
}
$at_bol = 0;
} else {
if ($C =~ m/^([BCNO])$/) {
print " $1"; # special case: effect of Caps Lock/Num Lock
} elsif ($C eq "(") {
print " ( ";
} elsif ($C eq ")") {
print " )";
} else {
printf "%-6s ", convert_token($C);
}
}
}
print "\n";
}
}
close FH;

View File

@ -0,0 +1,99 @@
#!/usr/local/bin/perl
# $FreeBSD$
use Text::Iconv;
use Encode;
use strict;
use utf8;
# directories and filenames
$0 =~ m:^(.*)/:;
my $dir_convtool = $1 || ".";
my $dir_keymaps_syscons = "/usr/src/share/syscons/keymaps";
my $dir_keymaps_config = "$dir_convtool";
my $dir_keymaps_vt = "/usr/src/share/vt/keymaps";
my $dir_keymaps_output = "$dir_keymaps_vt/OUTPUT";
my $keymap_index = "$dir_keymaps_syscons/INDEX.keymaps";
my $language_map = "$dir_keymaps_config/LANG.map";
my $keymapfile_map = "$dir_keymaps_config/KBDFILES.map";
# global variables
my %LANG_NEW; # index: lang_old
my %ENCODING; # index: lang_old, file_old
my %FILE_NEW; # index: file_old
# subroutines
sub local_to_UCS_string
{
my ($string, $old_enc) = @_;
my $converter = Text::Iconv->new($old_enc, "UTF-8");
my $result = $converter->convert($string);
printf "!!! conversion failed for '$string' ($old_enc)\n"
unless $result;
return $result;
}
sub lang_fixup {
my ($langlist) = @_;
my $result;
my $lang;
for $lang (split(/,/, $langlist)) {
$result .= ","
if $result;
$result .= $LANG_NEW{$lang};
}
return $result;
}
# main program
open LANGMAP, "<$language_map"
or die "$!";
while (<LANGMAP>) {
next
if m/^#/;
my ($lang_old, $lang_new, $encoding) = split(" ");
# print "$lang_old|$lang_new|$encoding\n";
$LANG_NEW{$lang_old} = $lang_new;
$ENCODING{$lang_old} = $encoding;
$ENCODING{$lang_new} = $encoding;
}
close LANGMAP;
$FILE_NEW{"MENU"} = "MENU"; # dummy identity mapping
$FILE_NEW{"FONT"} = "FONT"; # dummy identity mapping
open FILEMAP, "<$keymapfile_map"
or die "$!";
while (<FILEMAP>) {
next
if m/^#/;
my ($encoding, $file_old, $file_new) = split(" ");
# print "--> ", join("|", $encoding, $file_old, $file_new, $file_locale), "\n";
if ($encoding and $file_old and $file_new) {
$ENCODING{$file_old} = $encoding;
$FILE_NEW{$file_old} = $file_new;
}
}
close FILEMAP;
my $kbdfile;
foreach $kbdfile (glob("$dir_keymaps_syscons/*.kbd")) {
my $basename;
($basename = $kbdfile) =~ s:.*/::;
my $encoding = $ENCODING{$basename};
my $outfile = $FILE_NEW{$basename};
if ($encoding and $outfile) {
if (-r $kbdfile) {
print "converting from '$basename' ($encoding) to '$outfile' (Unicode)\n";
my $cmdline = "$dir_convtool/convert-keymap.pl $kbdfile $ENCODING{$basename} > $dir_keymaps_output/$outfile";
system "$cmdline";
} else {
print "$kbdfile not found\n";
}
} else {
print "Unknown input file: $basename\n";
}
}