Add another matching algorithhm to do heuristics for international

language text files.

Should finally close PR # bin/1925: file does not consider cyrillic
text..., though i've never got any response from the originator about
my suggestion.

While i was at it, also move out the `magic' file to /usr/share/misc,
there's nothing that magic with this file to justify its life under
/etc.
This commit is contained in:
Joerg Wunsch 1996-12-11 14:09:12 +00:00
parent 09ab8202c0
commit 97857d5a23
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=20323
4 changed files with 83 additions and 6 deletions

View File

@ -1,6 +1,6 @@
# Makefile for file(1) cmd.
# Copyright (c) Ian F. Darwin 86/09/01 - see LEGAL.NOTICE.
# @(#)$Id: Makefile,v 1.4 1995/07/25 00:36:03 bde Exp $
# @(#)$Id: Makefile,v 1.5 1996/08/17 22:27:08 wosch Exp $
#
# This software is not subject to any license of the American Telephone
# and Telegraph Company or of the Regents of the University of California.
@ -23,7 +23,7 @@
# 4. This notice may not be removed or altered.
#
# Hacked and dismembered for bmake (Geoff Rehmet).
MAGIC= /etc/magic
MAGIC= /usr/share/misc/magic
MAGICOWN= bin
MAGICGRP= bin
MAGICMODE= 444
@ -33,7 +33,7 @@ CFLAGS+= -DMAGIC='"$(MAGIC)"'
PROG= file
SRCS= file.c apprentice.c fsmagic.c softmagic.c ascmagic.c \
compress.c is_tar.c print.c
compress.c is_tar.c print.c international.c
MAN1= file.1
MAN5= magic.5
@ -51,7 +51,7 @@ magic: $(MAGFILES)
cat $(MAGFILES) > $(.TARGET)
# called from /usr/src/etc/Makefile
etc-magic:
beforeinstall:
${INSTALL} -c -o $(MAGICOWN) -g $(MAGICGRP) -m $(MAGICMODE) magic \
$(DESTDIR)$(MAGIC)

View File

@ -26,7 +26,7 @@
*/
#ifndef lint
static char *moduleid =
"@(#)$Id: file.c,v 1.2 1995/05/30 06:30:01 rgrimes Exp $";
"@(#)$Id: file.c,v 1.3 1996/01/23 12:40:11 mpp Exp $";
#endif /* lint */
#include <stdio.h>
@ -343,6 +343,10 @@ int nb, zflag;
if (ascmagic(buf, nb))
return 'a';
/* see if it's international language text */
if (internatmagic(buf, nb))
return 'i';
/* abandon hope, all ye who remain here */
ckfputs("data", stdout);
return '\0';

View File

@ -1,6 +1,6 @@
/*
* file.h - definitions for file(1) program
* @(#)$Id: file.h,v 1.2 1995/05/30 06:30:02 rgrimes Exp $
* @(#)$Id: file.h,v 1.3 1996/01/23 12:40:13 mpp Exp $
*
* Copyright (c) Ian F. Darwin, 1987.
* Written by Ian F. Darwin.
@ -87,6 +87,7 @@ extern void error __P((const char *, ...));
extern void ckfputs __P((const char *, FILE *));
struct stat;
extern int fsmagic __P((const char *, struct stat *));
extern int internatmagic __P((unsigned char *, int));
extern int is_compress __P((const unsigned char *, int *));
extern int is_tar __P((unsigned char *, int));
extern void magwarn __P((const char *, ...));

View File

@ -0,0 +1,72 @@
#include "file.h"
#include <string.h>
#define F 0
#define T 1
/*
* List of characters that look "reasonable" in international
* language texts. That's almost all characters :), except a
* few in the control range of ASCII (all the known international
* charactersets share the bottom half with ASCII).
*/
static char maybe_internat[256] = {
F, F, F, F, F, F, F, F, T, T, T, T, T, T, F, F, /* 0x0X */
F, F, F, F, F, F, F, F, F, F, F, T, F, F, F, F, /* 0x1X */
T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x2X */
T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x3X */
T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x4X */
T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x5X */
T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x6X */
T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, F, /* 0x7X */
T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x8X */
T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x9X */
T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0xaX */
T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0xbX */
T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0xcX */
T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0xdX */
T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0xeX */
T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T /* 0xfX */
};
/* Maximal length of a line we consider "reasonable". */
#define MAXLINELEN 300
int
internatmagic(buf, nbytes)
unsigned char *buf;
int nbytes;
{
int i;
unsigned char *cp;
nbytes--;
/* First, look whether there are "unreasonable" characters. */
for (i = 0, cp = buf; i < nbytes; i++, cp++)
if (!maybe_internat[*cp])
return 0;
/*
* Now, look whether the file consists of lines of
* "reasonable" length.
*/
for (i = 0; i < nbytes;) {
cp = memchr(buf, '\n', nbytes - i);
if (cp == NULL) {
/* Don't fail if we hit the end of buffer. */
if (i + MAXLINELEN >= nbytes)
break;
else
return 0;
}
if (cp - buf > MAXLINELEN)
return 0;
i += (cp - buf + 1);
buf = cp + 1;
}
ckfputs("International language text", stdout);
return 1;
}