Replace hand-crafted naive byte-by-byte zero block detection routine
with macro based around memcmp(). The latter is expected to be some 8 times faster on a modern 64-bit architectures. In practice, throughput of doing conv=sparse from /dev/zero to /dev/null went up some 5-fold here from 1.9GB/sec to 9.7GB/sec with this change (bs=128k). MFC after: 2 weeks
This commit is contained in:
parent
7d2b0bd7d7
commit
dead7b5e47
10
bin/dd/dd.c
10
bin/dd/dd.c
@ -511,7 +511,7 @@ void
|
||||
dd_out(int force)
|
||||
{
|
||||
u_char *outp;
|
||||
size_t cnt, i, n;
|
||||
size_t cnt, n;
|
||||
ssize_t nw;
|
||||
static int warned;
|
||||
int sparse;
|
||||
@ -544,12 +544,8 @@ dd_out(int force)
|
||||
do {
|
||||
sparse = 0;
|
||||
if (ddflags & C_SPARSE) {
|
||||
sparse = 1; /* Is buffer sparse? */
|
||||
for (i = 0; i < cnt; i++)
|
||||
if (outp[i] != 0) {
|
||||
sparse = 0;
|
||||
break;
|
||||
}
|
||||
/* Is buffer sparse? */
|
||||
sparse = BISZERO(outp, cnt);
|
||||
}
|
||||
if (sparse && !force) {
|
||||
pending += cnt;
|
||||
|
@ -103,3 +103,7 @@ typedef struct {
|
||||
#define C_PROGRESS 0x40000000
|
||||
|
||||
#define C_PARITY (C_PAREVEN | C_PARODD | C_PARNONE | C_PARSET)
|
||||
|
||||
#define BISZERO(p, s) ((s) > 0 && *((const char *)p) == 0 && !memcmp( \
|
||||
(const void *)(p), (const void *) \
|
||||
((const char *)p + 1), (s) - 1))
|
||||
|
Loading…
Reference in New Issue
Block a user