bbf9a45630
original commit log: ===== I had originally suspected the parsing of ctype definition files as being the source of the ctype flag mis-definitions, but it wasn't. In the process, I simplified the cc_list parsing so I'm committing the no-impact improvement separately. It removes some parsing redundancies and won't parse partial range definitions anymore. ==== Submitted by: marino Obtained from: Dragonfly MFC after: 1 month
706 lines
12 KiB
Plaintext
706 lines
12 KiB
Plaintext
%{
|
|
/*
|
|
* Copyright 2010 Nexenta Systems, Inc. All rights reserved.
|
|
* Copyright 2015 John Marino <draco@marino.st>
|
|
*
|
|
* This source code is derived from the illumos localedef command, and
|
|
* provided under BSD-style license terms by Nexenta Systems, Inc.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
*
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
* POSSIBILITY OF SUCH DAMAGE.
|
|
*
|
|
* $FreeBSD$
|
|
*/
|
|
|
|
/*
|
|
* POSIX localedef grammar.
|
|
*/
|
|
|
|
#include <wchar.h>
|
|
#include <stdio.h>
|
|
#include <limits.h>
|
|
#include "localedef.h"
|
|
|
|
%}
|
|
%union {
|
|
int num;
|
|
wchar_t wc;
|
|
char *token;
|
|
collsym_t *collsym;
|
|
collelem_t *collelem;
|
|
}
|
|
|
|
%token T_CODE_SET
|
|
%token T_MB_CUR_MAX
|
|
%token T_MB_CUR_MIN
|
|
%token T_COM_CHAR
|
|
%token T_ESC_CHAR
|
|
%token T_LT
|
|
%token T_GT
|
|
%token T_NL
|
|
%token T_SEMI
|
|
%token T_COMMA
|
|
%token T_ELLIPSIS
|
|
%token T_RPAREN
|
|
%token T_LPAREN
|
|
%token T_QUOTE
|
|
%token T_NULL
|
|
%token T_WS
|
|
%token T_END
|
|
%token T_COPY
|
|
%token T_CHARMAP
|
|
%token T_WIDTH
|
|
%token T_CTYPE
|
|
%token T_ISUPPER
|
|
%token T_ISLOWER
|
|
%token T_ISALPHA
|
|
%token T_ISDIGIT
|
|
%token T_ISPUNCT
|
|
%token T_ISXDIGIT
|
|
%token T_ISSPACE
|
|
%token T_ISPRINT
|
|
%token T_ISGRAPH
|
|
%token T_ISBLANK
|
|
%token T_ISCNTRL
|
|
%token T_ISALNUM
|
|
%token T_ISSPECIAL
|
|
%token T_ISPHONOGRAM
|
|
%token T_ISIDEOGRAM
|
|
%token T_ISENGLISH
|
|
%token T_ISNUMBER
|
|
%token T_TOUPPER
|
|
%token T_TOLOWER
|
|
%token T_COLLATE
|
|
%token T_COLLATING_SYMBOL
|
|
%token T_COLLATING_ELEMENT
|
|
%token T_ORDER_START
|
|
%token T_ORDER_END
|
|
%token T_FORWARD
|
|
%token T_BACKWARD
|
|
%token T_POSITION
|
|
%token T_FROM
|
|
%token T_UNDEFINED
|
|
%token T_IGNORE
|
|
%token T_MESSAGES
|
|
%token T_YESSTR
|
|
%token T_NOSTR
|
|
%token T_YESEXPR
|
|
%token T_NOEXPR
|
|
%token T_MONETARY
|
|
%token T_INT_CURR_SYMBOL
|
|
%token T_CURRENCY_SYMBOL
|
|
%token T_MON_DECIMAL_POINT
|
|
%token T_MON_THOUSANDS_SEP
|
|
%token T_POSITIVE_SIGN
|
|
%token T_NEGATIVE_SIGN
|
|
%token T_MON_GROUPING
|
|
%token T_INT_FRAC_DIGITS
|
|
%token T_FRAC_DIGITS
|
|
%token T_P_CS_PRECEDES
|
|
%token T_P_SEP_BY_SPACE
|
|
%token T_N_CS_PRECEDES
|
|
%token T_N_SEP_BY_SPACE
|
|
%token T_P_SIGN_POSN
|
|
%token T_N_SIGN_POSN
|
|
%token T_INT_P_CS_PRECEDES
|
|
%token T_INT_N_CS_PRECEDES
|
|
%token T_INT_P_SEP_BY_SPACE
|
|
%token T_INT_N_SEP_BY_SPACE
|
|
%token T_INT_P_SIGN_POSN
|
|
%token T_INT_N_SIGN_POSN
|
|
%token T_NUMERIC
|
|
%token T_DECIMAL_POINT
|
|
%token T_THOUSANDS_SEP
|
|
%token T_GROUPING
|
|
%token T_TIME
|
|
%token T_ABDAY
|
|
%token T_DAY
|
|
%token T_ABMON
|
|
%token T_MON
|
|
%token T_ERA
|
|
%token T_ERA_D_FMT
|
|
%token T_ERA_T_FMT
|
|
%token T_ERA_D_T_FMT
|
|
%token T_ALT_DIGITS
|
|
%token T_D_T_FMT
|
|
%token T_D_FMT
|
|
%token T_T_FMT
|
|
%token T_AM_PM
|
|
%token T_T_FMT_AMPM
|
|
%token T_DATE_FMT
|
|
%token <wc> T_CHAR
|
|
%token <token> T_NAME
|
|
%token <num> T_NUMBER
|
|
%token <token> T_SYMBOL
|
|
%token <collsym> T_COLLSYM
|
|
%token <collelem> T_COLLELEM
|
|
|
|
%%
|
|
|
|
localedef : setting_list categories
|
|
| categories
|
|
;
|
|
|
|
string : T_QUOTE charlist T_QUOTE
|
|
| T_QUOTE T_QUOTE
|
|
;
|
|
|
|
charlist : charlist T_CHAR
|
|
{
|
|
add_wcs($2);
|
|
}
|
|
| T_CHAR
|
|
{
|
|
add_wcs($1);
|
|
}
|
|
;
|
|
|
|
setting_list : setting_list setting
|
|
| setting
|
|
;
|
|
|
|
|
|
setting : T_COM_CHAR T_CHAR T_NL
|
|
{
|
|
com_char = $2;
|
|
}
|
|
| T_ESC_CHAR T_CHAR T_NL
|
|
{
|
|
esc_char = $2;
|
|
}
|
|
| T_MB_CUR_MAX T_NUMBER T_NL
|
|
{
|
|
mb_cur_max = $2;
|
|
}
|
|
| T_MB_CUR_MIN T_NUMBER T_NL
|
|
{
|
|
mb_cur_min = $2;
|
|
}
|
|
| T_CODE_SET string T_NL
|
|
{
|
|
wchar_t *w = get_wcs();
|
|
set_wide_encoding(to_mb_string(w));
|
|
free(w);
|
|
}
|
|
| T_CODE_SET T_NAME T_NL
|
|
{
|
|
set_wide_encoding($2);
|
|
}
|
|
;
|
|
|
|
copycat : T_COPY T_NAME T_NL
|
|
{
|
|
copy_category($2);
|
|
}
|
|
| T_COPY string T_NL
|
|
{
|
|
wchar_t *w = get_wcs();
|
|
copy_category(to_mb_string(w));
|
|
free(w);
|
|
}
|
|
;
|
|
|
|
categories : categories category
|
|
| category
|
|
;
|
|
|
|
|
|
category : charmap
|
|
| messages
|
|
| monetary
|
|
| ctype
|
|
| collate
|
|
| numeric
|
|
| time
|
|
;
|
|
|
|
|
|
charmap : T_CHARMAP T_NL charmap_list T_END T_CHARMAP T_NL
|
|
| T_WIDTH T_NL width_list T_END T_WIDTH T_NL
|
|
;
|
|
|
|
|
|
charmap_list : charmap_list charmap_entry
|
|
| charmap_entry
|
|
;
|
|
|
|
|
|
charmap_entry : T_SYMBOL T_CHAR
|
|
{
|
|
add_charmap($1, $2);
|
|
scan_to_eol();
|
|
}
|
|
| T_SYMBOL T_ELLIPSIS T_SYMBOL T_CHAR
|
|
{
|
|
add_charmap_range($1, $3, $4);
|
|
scan_to_eol();
|
|
}
|
|
| T_NL
|
|
;
|
|
|
|
width_list : width_list width_entry
|
|
| width_entry
|
|
;
|
|
|
|
width_entry : T_CHAR T_NUMBER T_NL
|
|
{
|
|
add_width($1, $2);
|
|
}
|
|
| T_SYMBOL T_NUMBER T_NL
|
|
{
|
|
add_charmap_undefined($1);
|
|
}
|
|
| T_CHAR T_ELLIPSIS T_CHAR T_NUMBER T_NL
|
|
{
|
|
add_width_range($1, $3, $4);
|
|
}
|
|
| T_SYMBOL T_ELLIPSIS T_SYMBOL T_NUMBER T_NL
|
|
{
|
|
add_charmap_undefined($1);
|
|
add_charmap_undefined($3);
|
|
}
|
|
| T_CHAR T_ELLIPSIS T_SYMBOL T_NUMBER T_NL
|
|
{
|
|
add_width($1, $4);
|
|
add_charmap_undefined($3);
|
|
}
|
|
| T_SYMBOL T_ELLIPSIS T_CHAR T_NUMBER T_NL
|
|
{
|
|
add_width($3, $4);
|
|
add_charmap_undefined($1);
|
|
}
|
|
| T_NL
|
|
;
|
|
|
|
ctype : T_CTYPE T_NL ctype_list T_END T_CTYPE T_NL
|
|
{
|
|
dump_ctype();
|
|
}
|
|
| T_CTYPE T_NL copycat T_END T_CTYPE T_NL
|
|
;
|
|
|
|
ctype_list : ctype_list ctype_kw
|
|
| ctype_kw
|
|
;
|
|
|
|
ctype_kw : T_ISUPPER cc_list T_NL
|
|
| T_ISLOWER cc_list T_NL
|
|
| T_ISALPHA cc_list T_NL
|
|
| T_ISDIGIT cc_list T_NL
|
|
| T_ISPUNCT cc_list T_NL
|
|
| T_ISXDIGIT cc_list T_NL
|
|
| T_ISSPACE cc_list T_NL
|
|
| T_ISPRINT cc_list T_NL
|
|
| T_ISGRAPH cc_list T_NL
|
|
| T_ISBLANK cc_list T_NL
|
|
| T_ISCNTRL cc_list T_NL
|
|
| T_ISALNUM cc_list T_NL
|
|
| T_ISSPECIAL cc_list T_NL
|
|
| T_ISENGLISH cc_list T_NL
|
|
| T_ISNUMBER cc_list T_NL
|
|
| T_ISIDEOGRAM cc_list T_NL
|
|
| T_ISPHONOGRAM cc_list T_NL
|
|
| T_TOUPPER conv_list T_NL
|
|
| T_TOLOWER conv_list T_NL
|
|
;
|
|
|
|
cc_list : cc_list T_SEMI cc_range_end
|
|
| cc_list T_SEMI cc_char
|
|
| cc_char
|
|
;
|
|
|
|
cc_range_end : T_ELLIPSIS T_SEMI T_CHAR
|
|
{
|
|
add_ctype_range($3);
|
|
}
|
|
;
|
|
|
|
cc_char : T_CHAR
|
|
{
|
|
add_ctype($1);
|
|
}
|
|
| T_SYMBOL
|
|
{
|
|
add_charmap_undefined($1);
|
|
}
|
|
;
|
|
|
|
conv_list : conv_list T_SEMI conv_pair
|
|
| conv_pair
|
|
;
|
|
|
|
|
|
conv_pair : T_LPAREN T_CHAR T_COMMA T_CHAR T_RPAREN
|
|
{
|
|
add_caseconv($2, $4);
|
|
}
|
|
| T_LPAREN T_SYMBOL T_COMMA T_CHAR T_RPAREN
|
|
{
|
|
add_charmap_undefined($2);
|
|
}
|
|
| T_LPAREN T_SYMBOL T_COMMA T_SYMBOL T_RPAREN
|
|
{
|
|
add_charmap_undefined($2);
|
|
add_charmap_undefined($4);
|
|
}
|
|
| T_LPAREN T_CHAR T_COMMA T_SYMBOL T_RPAREN
|
|
{
|
|
add_charmap_undefined($4);
|
|
}
|
|
;
|
|
|
|
collate : T_COLLATE T_NL coll_order T_END T_COLLATE T_NL
|
|
{
|
|
dump_collate();
|
|
}
|
|
| T_COLLATE T_NL coll_optional coll_order T_END T_COLLATE T_NL
|
|
{
|
|
dump_collate();
|
|
}
|
|
| T_COLLATE T_NL copycat T_END T_COLLATE T_NL
|
|
;
|
|
|
|
|
|
coll_optional : coll_optional coll_symbols
|
|
| coll_optional coll_elements
|
|
| coll_symbols
|
|
| coll_elements
|
|
;
|
|
|
|
|
|
coll_symbols : T_COLLATING_SYMBOL T_SYMBOL T_NL
|
|
{
|
|
define_collsym($2);
|
|
}
|
|
;
|
|
|
|
|
|
coll_elements : T_COLLATING_ELEMENT T_SYMBOL T_FROM string T_NL
|
|
{
|
|
define_collelem($2, get_wcs());
|
|
}
|
|
;
|
|
|
|
coll_order : T_ORDER_START T_NL order_list T_ORDER_END T_NL
|
|
{
|
|
/* If no order list supplied default to one forward */
|
|
add_order_bit(T_FORWARD);
|
|
add_order_directive();
|
|
}
|
|
| T_ORDER_START order_args T_NL order_list T_ORDER_END T_NL
|
|
;
|
|
|
|
|
|
order_args : order_args T_SEMI order_arg
|
|
{
|
|
add_order_directive();
|
|
}
|
|
| order_arg
|
|
{
|
|
add_order_directive();
|
|
}
|
|
;
|
|
|
|
order_arg : order_arg T_COMMA order_dir
|
|
| order_dir
|
|
;
|
|
|
|
order_dir : T_FORWARD
|
|
{
|
|
add_order_bit(T_FORWARD);
|
|
}
|
|
| T_BACKWARD
|
|
{
|
|
add_order_bit(T_BACKWARD);
|
|
}
|
|
| T_POSITION
|
|
{
|
|
add_order_bit(T_POSITION);
|
|
}
|
|
;
|
|
|
|
order_list : order_list order_item
|
|
| order_item
|
|
;
|
|
|
|
order_item : T_COLLSYM T_NL
|
|
{
|
|
end_order_collsym($1);
|
|
}
|
|
| order_itemkw T_NL
|
|
{
|
|
end_order();
|
|
}
|
|
| order_itemkw order_weights T_NL
|
|
{
|
|
end_order();
|
|
}
|
|
;
|
|
|
|
order_itemkw : T_CHAR
|
|
{
|
|
start_order_char($1);
|
|
}
|
|
| T_ELLIPSIS
|
|
{
|
|
start_order_ellipsis();
|
|
}
|
|
| T_COLLELEM
|
|
{
|
|
start_order_collelem($1);
|
|
}
|
|
| T_UNDEFINED
|
|
{
|
|
start_order_undefined();
|
|
}
|
|
| T_SYMBOL
|
|
{
|
|
start_order_symbol($1);
|
|
}
|
|
;
|
|
|
|
order_weights : order_weights T_SEMI order_weight
|
|
| order_weights T_SEMI
|
|
| order_weight
|
|
;
|
|
|
|
order_weight : T_COLLELEM
|
|
{
|
|
add_order_collelem($1);
|
|
}
|
|
| T_COLLSYM
|
|
{
|
|
add_order_collsym($1);
|
|
}
|
|
| T_CHAR
|
|
{
|
|
add_order_char($1);
|
|
}
|
|
| T_ELLIPSIS
|
|
{
|
|
add_order_ellipsis();
|
|
}
|
|
| T_IGNORE
|
|
{
|
|
add_order_ignore();
|
|
}
|
|
| T_SYMBOL
|
|
{
|
|
add_order_symbol($1);
|
|
}
|
|
| T_QUOTE order_str T_QUOTE
|
|
{
|
|
add_order_subst();
|
|
}
|
|
;
|
|
|
|
order_str : order_str order_stritem
|
|
| order_stritem
|
|
;
|
|
|
|
order_stritem : T_CHAR
|
|
{
|
|
add_subst_char($1);
|
|
}
|
|
| T_COLLSYM
|
|
{
|
|
add_subst_collsym($1);
|
|
}
|
|
| T_COLLELEM
|
|
{
|
|
add_subst_collelem($1);
|
|
}
|
|
| T_SYMBOL
|
|
{
|
|
add_subst_symbol($1);
|
|
}
|
|
;
|
|
|
|
messages : T_MESSAGES T_NL messages_list T_END T_MESSAGES T_NL
|
|
{
|
|
dump_messages();
|
|
}
|
|
| T_MESSAGES T_NL copycat T_END T_MESSAGES T_NL
|
|
;
|
|
|
|
messages_list : messages_list messages_item
|
|
| messages_item
|
|
;
|
|
|
|
messages_kw : T_YESSTR
|
|
| T_NOSTR
|
|
| T_YESEXPR
|
|
| T_NOEXPR
|
|
;
|
|
|
|
messages_item : messages_kw string T_NL
|
|
{
|
|
add_message(get_wcs());
|
|
}
|
|
;
|
|
|
|
monetary : T_MONETARY T_NL monetary_list T_END T_MONETARY T_NL
|
|
{
|
|
dump_monetary();
|
|
}
|
|
| T_MONETARY T_NL copycat T_END T_MONETARY T_NL
|
|
;
|
|
|
|
monetary_list : monetary_list monetary_kw
|
|
| monetary_kw
|
|
;
|
|
|
|
monetary_strkw : T_INT_CURR_SYMBOL
|
|
| T_CURRENCY_SYMBOL
|
|
| T_MON_DECIMAL_POINT
|
|
| T_MON_THOUSANDS_SEP
|
|
| T_POSITIVE_SIGN
|
|
| T_NEGATIVE_SIGN
|
|
;
|
|
|
|
monetary_numkw : T_INT_FRAC_DIGITS
|
|
| T_FRAC_DIGITS
|
|
| T_P_CS_PRECEDES
|
|
| T_P_SEP_BY_SPACE
|
|
| T_N_CS_PRECEDES
|
|
| T_N_SEP_BY_SPACE
|
|
| T_P_SIGN_POSN
|
|
| T_N_SIGN_POSN
|
|
| T_INT_P_CS_PRECEDES
|
|
| T_INT_N_CS_PRECEDES
|
|
| T_INT_P_SEP_BY_SPACE
|
|
| T_INT_N_SEP_BY_SPACE
|
|
| T_INT_P_SIGN_POSN
|
|
| T_INT_N_SIGN_POSN
|
|
;
|
|
|
|
monetary_kw : monetary_strkw string T_NL
|
|
{
|
|
add_monetary_str(get_wcs());
|
|
}
|
|
| monetary_numkw T_NUMBER T_NL
|
|
{
|
|
add_monetary_num($2);
|
|
}
|
|
| T_MON_GROUPING mon_group_list T_NL
|
|
;
|
|
|
|
mon_group_list : T_NUMBER
|
|
{
|
|
reset_monetary_group();
|
|
add_monetary_group($1);
|
|
}
|
|
| mon_group_list T_SEMI T_NUMBER
|
|
{
|
|
add_monetary_group($3);
|
|
}
|
|
;
|
|
|
|
|
|
numeric : T_NUMERIC T_NL numeric_list T_END T_NUMERIC T_NL
|
|
{
|
|
dump_numeric();
|
|
}
|
|
| T_NUMERIC T_NL copycat T_END T_NUMERIC T_NL
|
|
;
|
|
|
|
|
|
numeric_list : numeric_list numeric_item
|
|
| numeric_item
|
|
;
|
|
|
|
|
|
numeric_item : numeric_strkw string T_NL
|
|
{
|
|
add_numeric_str(get_wcs());
|
|
}
|
|
| T_GROUPING group_list T_NL
|
|
;
|
|
|
|
numeric_strkw : T_DECIMAL_POINT
|
|
| T_THOUSANDS_SEP
|
|
;
|
|
|
|
|
|
group_list : T_NUMBER
|
|
{
|
|
reset_numeric_group();
|
|
add_numeric_group($1);
|
|
}
|
|
| group_list T_SEMI T_NUMBER
|
|
{
|
|
add_numeric_group($3);
|
|
}
|
|
;
|
|
|
|
|
|
time : T_TIME T_NL time_kwlist T_END T_TIME T_NL
|
|
{
|
|
dump_time();
|
|
}
|
|
| T_TIME T_NL copycat T_END T_NUMERIC T_NL
|
|
;
|
|
|
|
time_kwlist : time_kwlist time_kw
|
|
| time_kw
|
|
;
|
|
|
|
time_kw : time_strkw string T_NL
|
|
{
|
|
add_time_str(get_wcs());
|
|
}
|
|
| time_listkw time_list T_NL
|
|
{
|
|
check_time_list();
|
|
}
|
|
;
|
|
|
|
time_listkw : T_ABDAY
|
|
| T_DAY
|
|
| T_ABMON
|
|
| T_MON
|
|
| T_ERA
|
|
| T_ALT_DIGITS
|
|
| T_AM_PM
|
|
;
|
|
|
|
time_strkw : T_ERA_D_T_FMT
|
|
| T_ERA_T_FMT
|
|
| T_ERA_D_FMT
|
|
| T_D_T_FMT
|
|
| T_D_FMT
|
|
| T_T_FMT
|
|
| T_T_FMT_AMPM
|
|
| T_DATE_FMT
|
|
;
|
|
|
|
time_list : time_list T_SEMI string
|
|
{
|
|
add_time_list(get_wcs());
|
|
}
|
|
| string
|
|
{
|
|
reset_time_list();
|
|
add_time_list(get_wcs());
|
|
}
|
|
;
|