From 38f168e1a3b83f2dcf9ad354f4f4a7429c9edbba Mon Sep 17 00:00:00 2001 From: Yuri Pankov Date: Sat, 13 Jun 2020 14:11:02 +0000 Subject: [PATCH] nvi: fallback to ISO8859-1 as last resort Current logic of using user's locale encoding that is UTF-8 doesn't make much sense if we already failed the looks_utf8() check and skipped encoding set using "fileencoding" as being UTF-8 as well; fallback to ISO8859-1 in that case. Reviewed by: Zhihao Yuan Differential Revision: https://reviews.freebsd.org/D24919 --- contrib/nvi/common/exf.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/contrib/nvi/common/exf.c b/contrib/nvi/common/exf.c index 1fcf7f6da498..b03aa98bc72c 100644 --- a/contrib/nvi/common/exf.c +++ b/contrib/nvi/common/exf.c @@ -1237,7 +1237,10 @@ file_encinit(SCR *sp) } /* - * Detect UTF-8 and fallback to the locale/preset encoding. + * 1. Check for valid UTF-8. + * 2. Check if fallback fileencoding is set and is NOT UTF-8. + * 3. Check if user locale's encoding is NOT UTF-8. + * 4. Use ISO8859-1 as last resort. * * XXX * A manually set O_FILEENCODING indicates the "fallback @@ -1246,9 +1249,13 @@ file_encinit(SCR *sp) */ if (looks_utf8(buf, blen) > 1) o_set(sp, O_FILEENCODING, OS_STRDUP, "utf-8", 0); - else if (!O_ISSET(sp, O_FILEENCODING) || - !strcasecmp(O_STR(sp, O_FILEENCODING), "utf-8")) + else if (O_ISSET(sp, O_FILEENCODING) && + strcasecmp(O_STR(sp, O_FILEENCODING), "utf-8") != 0) + /* Use fileencoding as is */ ; + else if (strcasecmp(codeset(), "utf-8") != 0) o_set(sp, O_FILEENCODING, OS_STRDUP, codeset(), 0); + else + o_set(sp, O_FILEENCODING, OS_STRDUP, "iso8859-1", 0); conv_enc(sp, O_FILEENCODING, 0); #endif