update wcwidth data from utf8proc
Character width data being out of date is a constant source of weird rendering issues and wasted time trying to diagnose those, e.g. as reported by Jeremy Chadwick: https://gitlab.com/muttmua/mutt/-/issues/67 Sadly, there is no real ("standard") wcwidth data source, so this tries to rectify the problem using the utf8proc one (through its C API) which would hopefully benefeat both FreeBSD and utf8proc through bug reports (if any). Reviewed by: bapt Differential Revision: https://reviews.freebsd.org/D27259
This commit is contained in:
parent
f7b0aedd1c
commit
b803311a18
@ -225,7 +225,7 @@ posix/${enc}.cm:
|
||||
.for area in ${BASE_LOCALES_OF_INTEREST}
|
||||
posixsrc: build-tools posix/${area}.UTF-8.src
|
||||
.ORDER: build-tools posix/${area}.UTF-8.src
|
||||
posix/${area}.UTF-8.src:
|
||||
posix/${area}.UTF-8.src:
|
||||
mkdir -p posix && \
|
||||
${JAVA_CLDR} org.unicode.cldr.posix.GeneratePOSIX \
|
||||
-d posix -m ${area} -c UTF-8
|
||||
@ -239,4 +239,20 @@ posix/${area}.${encoding}.src:
|
||||
-d posix -m ${area} -c ${encoding}
|
||||
.endfor
|
||||
|
||||
# generate widths.txt using the data from libut8proc
|
||||
GETWIDTHS=${TOOLSDIR}/getwidths
|
||||
MKWIDTHS=${TOOLSDIR}/mkwidths.pl
|
||||
WIDTHS= ${ETCDIR}/final-maps/widths.txt
|
||||
|
||||
U8CFLAGS!=pkgconf --cflags libutf8proc
|
||||
U8LIBS!=pkgconf --libs libutf8proc
|
||||
CFLAGS+=${U8CFLAGS}
|
||||
LDFLAGS+=${U8LIBS}
|
||||
|
||||
CLEANFILES+=${TOOLSDIR}/getwidths
|
||||
|
||||
widths: ${WIDTHS}
|
||||
${WIDTHS}: posixcm ${GETWIDTHS}
|
||||
${GETWIDTHS} | ${MKWIDTHS} ${.OBJDIR}/posix/UTF-8.cm ${.TARGET}
|
||||
|
||||
.include <bsd.obj.mk>
|
||||
|
@ -55,4 +55,8 @@ Targets:
|
||||
|
||||
make install
|
||||
Install the build results into $LOCALESRCDIR.
|
||||
|
||||
make widths
|
||||
Generate widths.txt. Requires pkgconf and utf8proc
|
||||
packages to be installed.
|
||||
[EOF]
|
||||
|
File diff suppressed because it is too large
Load Diff
51
tools/tools/locale/tools/getwidths.c
Normal file
51
tools/tools/locale/tools/getwidths.c
Normal file
@ -0,0 +1,51 @@
|
||||
/*-
|
||||
* Copyright 2020 Yuri Pankov <yuripv@FreeBSD.org>
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <locale.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include <utf8proc.h>
|
||||
|
||||
int
|
||||
main(void)
|
||||
{
|
||||
int32_t wc;
|
||||
int i, wcw;
|
||||
|
||||
setlocale(LC_CTYPE, "C.UTF-8");
|
||||
|
||||
printf("%s\n", utf8proc_version());
|
||||
|
||||
for (wc = 0; wc < 0x110000; wc++) {
|
||||
wcw = utf8proc_charwidth(wc);
|
||||
if (wcw == 1)
|
||||
continue;
|
||||
printf("%04X %d\n", wc, wcw);
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
125
tools/tools/locale/tools/mkwidths.pl
Executable file
125
tools/tools/locale/tools/mkwidths.pl
Executable file
@ -0,0 +1,125 @@
|
||||
#!/usr/local/bin/perl -w
|
||||
|
||||
# SPDX-License-Identifier: BSD-2-Clause-FreeBSD
|
||||
#
|
||||
# Copyright 2009 Edwin Groothuis <edwin@FreeBSD.org>
|
||||
# Copyright 2015 John Marino <draco@marino.st>
|
||||
# Copyright 2020 Yuri Pankov <yuripv@FreeBSD.org>
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
# 1. Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
# SUCH DAMAGE.
|
||||
#
|
||||
# $FreeBSD$
|
||||
|
||||
use strict;
|
||||
use Encode qw(encode decode);
|
||||
|
||||
my %utf8map = ();
|
||||
my $utf8charmap = "$ARGV[0]";
|
||||
my $outfilename = "$ARGV[1]";
|
||||
|
||||
get_utf8map("$utf8charmap");
|
||||
generate_header();
|
||||
make_widths("$outfilename");
|
||||
generate_footer();
|
||||
|
||||
############################
|
||||
|
||||
sub utf8to32 {
|
||||
my @kl = split /\\x/, $_[0];
|
||||
|
||||
shift @kl if ($kl[0] eq '');
|
||||
my $k = pack('H2' x scalar @kl, @kl);
|
||||
my $ux = encode('UTF-32BE', decode('UTF-8', $k));
|
||||
my $u = uc(unpack('H*', $ux));
|
||||
# Remove BOM
|
||||
$u =~ s/^0000FEFF//;
|
||||
# Remove heading bytes of 0
|
||||
while ($u =~ m/^0/ and length($u) > 4) {
|
||||
$u =~ s/^0//;
|
||||
}
|
||||
|
||||
return $u;
|
||||
}
|
||||
|
||||
sub get_utf8map {
|
||||
my $file = shift;
|
||||
|
||||
open(FIN, $file);
|
||||
my @lines = <FIN>;
|
||||
close(FIN);
|
||||
chomp(@lines);
|
||||
|
||||
my $incharmap = 0;
|
||||
foreach my $l (@lines) {
|
||||
$l =~ s/\r//;
|
||||
next if ($l =~ /^\#/);
|
||||
next if ($l eq "");
|
||||
|
||||
if ($l eq "CHARMAP") {
|
||||
$incharmap = 1;
|
||||
next;
|
||||
}
|
||||
|
||||
next if (!$incharmap);
|
||||
last if ($l eq "END CHARMAP");
|
||||
|
||||
$l =~ /^(<[^\s]+>)\s+(.*)/;
|
||||
my $k = utf8to32($2); # UTF-8 char code
|
||||
my $v = $1;
|
||||
|
||||
# print STDERR "register: $k - $v\n";
|
||||
$utf8map{$k} = $v;
|
||||
}
|
||||
}
|
||||
|
||||
sub generate_header {
|
||||
my $version = <STDIN>;
|
||||
chomp($version);
|
||||
|
||||
open(FOUT, ">", "$outfilename")
|
||||
or die ("can't write to $outfilename\n");
|
||||
print FOUT <<EOF;
|
||||
# Warning: Do not edit. This file is automatically generated from the
|
||||
# tools in /usr/src/tools/tools/locale. The data is obtained from the
|
||||
# utf8proc $version.
|
||||
# -----------------------------------------------------------------------------
|
||||
WIDTH
|
||||
EOF
|
||||
}
|
||||
|
||||
sub generate_footer {
|
||||
print FOUT "END WIDTH\n";
|
||||
close (FOUT);
|
||||
}
|
||||
|
||||
sub make_widths {
|
||||
my @lines = <STDIN>;
|
||||
chomp(@lines);
|
||||
|
||||
foreach my $l (@lines) {
|
||||
my ($wc, $wcw) = split(/ /, $l, -1);
|
||||
|
||||
next if !defined $utf8map{$wc};
|
||||
|
||||
print FOUT "$utf8map{$wc}\t$wcw\n";
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user