When f[w]printf() is called on an unbuffered file like stdout, it

sets up a fake buffered FILE and then effectively calls itself
recursively. Unfortunately, gcc doesn't know how to do tail call
elimination in this case, and actually makes things worse by
inlining __sbprintf(). This means that f[w]printf() to stderr was
allocating about 5k of stack on 64-bit platforms, much of which was
never used.

I've reorganized things to eliminate the waste. In addition to saving
some stack space, this improves performance in my tests by anywhere
from 5% to 17% (depending on the test) when -fstack-protector is
enabled. I found no statistically significant performance difference
when stack protection is turned off. (The tests redirected stderr to
/dev/null.)
This commit is contained in:
David Schultz 2009-01-17 18:57:12 +00:00
parent 0f9e2596e2
commit a1805f7bb9
2 changed files with 23 additions and 14 deletions

View File

@ -65,7 +65,8 @@ __FBSDID("$FreeBSD$");
#include "printflocal.h"
static int __sprint(FILE *, struct __suio *);
static int __sbprintf(FILE *, const char *, va_list) __printflike(2, 0);
static int __sbprintf(FILE *, const char *, va_list) __printflike(2, 0)
__noinline;
static char *__wcsconv(wchar_t *, int);
#define CHAR char
@ -102,6 +103,10 @@ __sbprintf(FILE *fp, const char *fmt, va_list ap)
FILE fake;
unsigned char buf[BUFSIZ];
/* XXX This is probably not needed. */
if (prepwrite(fp) != 0)
return (EOF);
/* copy the important variables */
fake._flags = fp->_flags & ~__SNBF;
fake._file = fp->_file;
@ -193,7 +198,12 @@ vfprintf(FILE * __restrict fp, const char * __restrict fmt0, va_list ap)
int ret;
FLOCKFILE(fp);
ret = __vfprintf(fp, fmt0, ap);
/* optimise fprintf(stderr) (and other unbuffered Unix files) */
if ((fp->_flags & (__SNBF|__SWR|__SRW)) == (__SNBF|__SWR) &&
fp->_file >= 0)
ret = __sbprintf(fp, fmt0, ap);
else
ret = __vfprintf(fp, fmt0, ap);
FUNLOCKFILE(fp);
return (ret);
}
@ -367,11 +377,6 @@ __vfprintf(FILE *fp, const char *fmt0, va_list ap)
if (prepwrite(fp) != 0)
return (EOF);
/* optimise fprintf(stderr) (and other unbuffered Unix files) */
if ((fp->_flags & (__SNBF|__SWR|__SRW)) == (__SNBF|__SWR) &&
fp->_file >= 0)
return (__sbprintf(fp, fmt0, ap));
thousands_sep = '\0';
grouping = NULL;
convbuf = NULL;

View File

@ -67,7 +67,7 @@ __FBSDID("$FreeBSD$");
#include "printflocal.h"
static int __sprint(FILE *, struct __suio *);
static int __sbprintf(FILE *, const wchar_t *, va_list);
static int __sbprintf(FILE *, const wchar_t *, va_list) __noinline;
static wint_t __xfputwc(wchar_t, FILE *);
static wchar_t *__mbsconv(char *, int);
@ -114,6 +114,10 @@ __sbprintf(FILE *fp, const wchar_t *fmt, va_list ap)
FILE fake;
unsigned char buf[BUFSIZ];
/* XXX This is probably not needed. */
if (prepwrite(fp) != 0)
return (EOF);
/* copy the important variables */
fake._flags = fp->_flags & ~__SNBF;
fake._file = fp->_file;
@ -250,7 +254,12 @@ vfwprintf(FILE * __restrict fp, const wchar_t * __restrict fmt0, va_list ap)
int ret;
FLOCKFILE(fp);
ret = __vfwprintf(fp, fmt0, ap);
/* optimise fprintf(stderr) (and other unbuffered Unix files) */
if ((fp->_flags & (__SNBF|__SWR|__SRW)) == (__SNBF|__SWR) &&
fp->_file >= 0)
ret = __sbprintf(fp, fmt0, ap);
else
ret = __vfwprintf(fp, fmt0, ap);
FUNLOCKFILE(fp);
return (ret);
}
@ -419,11 +428,6 @@ __vfwprintf(FILE *fp, const wchar_t *fmt0, va_list ap)
if (prepwrite(fp) != 0)
return (EOF);
/* optimise fprintf(stderr) (and other unbuffered Unix files) */
if ((fp->_flags & (__SNBF|__SWR|__SRW)) == (__SNBF|__SWR) &&
fp->_file >= 0)
return (__sbprintf(fp, fmt0, ap));
thousands_sep = '\0';
grouping = NULL;
convbuf = NULL;