o Rearrange struct inpcb fields to optimize the TCP output code path

considering cache line hits and misses.  Put the lock and hash list
  glue into the first cache line, put inp_refcount inp_flags inp_socket
  into the second cache line.
o On allocation zero out entire structure except the lock and list entries,
  including inp_route inp_lle inp_gencnt.  When inp_route and inp_lle were
  introduced, they were added below inp_zero_size, resulting on not being
  cleared after free/alloc.  This definitely was a source of bugs with route
  caching.  Could be that r315956 has just fixed one of them.
  The inp_gencnt is reinitialized on every alloc, so it is safe to clear it.

This has been proved to improve TCP performance at Netflix.

Obtained from:		rrs
Differential Revision:	D10686
This commit is contained in:
Gleb Smirnoff 2017-05-24 17:47:16 +00:00
parent dcfffd0470
commit 6a6cefac3d
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=318793
2 changed files with 17 additions and 15 deletions

View File

@ -300,7 +300,7 @@ in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo)
inp = uma_zalloc(pcbinfo->ipi_zone, M_NOWAIT);
if (inp == NULL)
return (ENOBUFS);
bzero(inp, inp_zero_size);
bzero(&inp->inp_start_zero, inp_zero_size);
inp->inp_pcbinfo = pcbinfo;
inp->inp_socket = so;
inp->inp_cred = crhold(so->so_cred);

View File

@ -183,26 +183,29 @@ struct icmp6_filter;
struct inpcbpolicy;
struct m_snd_tag;
struct inpcb {
/* Cache line #1 (amd64) */
LIST_ENTRY(inpcb) inp_hash; /* (h/i) hash list */
LIST_ENTRY(inpcb) inp_pcbgrouphash; /* (g/i) hash list */
LIST_ENTRY(inpcb) inp_list; /* (p/l) list for all PCBs for proto */
/* (p[w]) for list iteration */
/* (p[r]/l) for addition/removal */
struct rwlock inp_lock;
/* Cache line #2 (amd64) */
#define inp_start_zero inp_refcount
#define inp_zero_size (sizeof(struct inpcb) - \
offsetof(struct inpcb, inp_start_zero))
u_int inp_refcount; /* (i) refcount */
int inp_flags; /* (i) generic IP/datagram flags */
int inp_flags2; /* (i) generic IP/datagram flags #2*/
void *inp_ppcb; /* (i) pointer to per-protocol pcb */
struct socket *inp_socket; /* (i) back pointer to socket */
struct inpcbinfo *inp_pcbinfo; /* (c) PCB list info */
struct inpcbgroup *inp_pcbgroup; /* (g/i) PCB group list */
LIST_ENTRY(inpcb) inp_pcbgroup_wild; /* (g/i/h) group wildcard entry */
struct socket *inp_socket; /* (i) back pointer to socket */
struct ucred *inp_cred; /* (c) cache of socket cred */
u_int32_t inp_flow; /* (i) IPv6 flow information */
int inp_flags; /* (i) generic IP/datagram flags */
int inp_flags2; /* (i) generic IP/datagram flags #2*/
u_char inp_vflag; /* (i) IP version flag (v4/v6) */
u_char inp_ip_ttl; /* (i) time to live proto */
u_char inp_ip_p; /* (c) protocol proto */
u_char inp_ip_minttl; /* (i) minimum TTL or drop */
uint32_t inp_flowid; /* (x) flow id / queue id */
u_int inp_refcount; /* (i) refcount */
struct m_snd_tag *inp_snd_tag; /* (i) send tag for outgoing mbufs */
uint32_t inp_flowtype; /* (x) M_HASHTYPE value */
uint32_t inp_rss_listen_bucket; /* (x) overridden RSS listen bucket */
@ -235,17 +238,16 @@ struct inpcb {
};
LIST_ENTRY(inpcb) inp_portlist; /* (i/h) */
struct inpcbport *inp_phd; /* (i/h) head of this list */
#define inp_zero_size offsetof(struct inpcb, inp_gencnt)
inp_gen_t inp_gencnt; /* (c) generation count */
struct llentry *inp_lle; /* cached L2 information */
struct rwlock inp_lock;
rt_gen_t inp_rt_cookie; /* generation for route entry */
union { /* cached L3 information */
struct route inpu_route;
struct route_in6 inpu_route6;
} inp_rtu;
#define inp_route inp_rtu.inpu_route
#define inp_route6 inp_rtu.inpu_route6
struct route inp_route;
struct route_in6 inp_route6;
};
LIST_ENTRY(inpcb) inp_list; /* (p/l) list for all PCBs for proto */
/* (p[w]) for list iteration */
/* (p[r]/l) for addition/removal */
};
#endif /* _KERNEL */