Add support for receiving and setting flowtype, flowid and RSS bucket

information as part of recvmsg().

This is primarily used for debugging/verification of the various
processing paths in the IP, PCB and driver layers.

Unfortunately the current implementation of the control message path
results in a ~10% or so drop in UDP frame throughput when it's used.

Differential Revision:	https://reviews.freebsd.org/D527
Reviewed by:	grehan
This commit is contained in:
adrian 2014-09-09 01:45:39 +00:00
parent 4f769d2ecf
commit e623d51cd5
5 changed files with 105 additions and 0 deletions

View File

@ -492,6 +492,8 @@ __END_DECLS
#define IP_FLOWID 90 /* get flow id for the given socket/inp */
#define IP_FLOWTYPE 91 /* get flow type (M_HASHTYPE) */
#define IP_RSSBUCKETID 92 /* get RSS flowid -> bucket mapping */
#define IP_RECVFLOWID 93 /* bool; receive IP flowid/flowtype w/ datagram */
#define IP_RECVRSSBUCKETID 94 /* bool; receive IP RSS bucket id w/ datagram */
/*
* Defaults and limits for options

View File

@ -549,6 +549,8 @@ short inp_so_options(const struct inpcb *inp);
#define INP_REUSEADDR 0x00000020 /* SO_REUSEADDR option is set */
#define INP_BINDMULTI 0x00000040 /* IP_BINDMULTI option is set */
#define INP_RSS_BUCKET_SET 0x00000080 /* IP_RSS_LISTEN_BUCKET is set */
#define INP_RECVFLOWID 0x00000100 /* populate recv datagram with flow info */
#define INP_RECVRSSBUCKETID 0x00000200 /* populate recv datagram with bucket id */
/*
* Flags passed to in_pcblookup*() functions.

View File

@ -1662,6 +1662,43 @@ makedummy:
if (*mp)
mp = &(*mp)->m_next;
}
if (inp->inp_flags2 & INP_RECVFLOWID) {
uint32_t flowid, flow_type;
flowid = m->m_pkthdr.flowid;
flow_type = M_HASHTYPE_GET(m);
/*
* XXX should handle the failure of one or the
* other - don't populate both?
*/
*mp = sbcreatecontrol((caddr_t) &flowid,
sizeof(uint32_t), IP_FLOWID, IPPROTO_IP);
if (*mp)
mp = &(*mp)->m_next;
*mp = sbcreatecontrol((caddr_t) &flow_type,
sizeof(uint32_t), IP_FLOWTYPE, IPPROTO_IP);
if (*mp)
mp = &(*mp)->m_next;
}
#ifdef RSS
if (inp->inp_flags2 & INP_RECVRSSBUCKETID) {
uint32_t flowid, flow_type;
uint32_t rss_bucketid;
flowid = m->m_pkthdr.flowid;
flow_type = M_HASHTYPE_GET(m);
if (rss_hash2bucket(flowid, flow_type, &rss_bucketid) == 0) {
*mp = sbcreatecontrol((caddr_t) &rss_bucketid,
sizeof(uint32_t), IP_RSSBUCKETID, IPPROTO_IP);
if (*mp)
mp = &(*mp)->m_next;
}
}
#endif
}
/*

View File

@ -1016,6 +1016,10 @@ ip_ctloutput(struct socket *so, struct sockopt *sopt)
case IP_ONESBCAST:
case IP_DONTFRAG:
case IP_RECVTOS:
case IP_RECVFLOWID:
#ifdef RSS
case IP_RECVRSSBUCKETID:
#endif
error = sooptcopyin(sopt, &optval, sizeof optval,
sizeof optval);
if (error)
@ -1094,6 +1098,9 @@ ip_ctloutput(struct socket *so, struct sockopt *sopt)
case IP_BINDMULTI:
OPTSET2(INP_BINDMULTI, optval);
break;
case IP_RECVFLOWID:
OPTSET2(INP_RECVFLOWID, optval);
break;
#ifdef RSS
case IP_RSS_LISTEN_BUCKET:
if ((optval >= 0) &&
@ -1104,6 +1111,9 @@ ip_ctloutput(struct socket *so, struct sockopt *sopt)
error = EINVAL;
}
break;
case IP_RECVRSSBUCKETID:
OPTSET2(INP_RECVRSSBUCKETID, optval);
break;
#endif
}
break;
@ -1219,8 +1229,10 @@ ip_ctloutput(struct socket *so, struct sockopt *sopt)
case IP_BINDMULTI:
case IP_FLOWID:
case IP_FLOWTYPE:
case IP_RECVFLOWID:
#ifdef RSS
case IP_RSSBUCKETID:
case IP_RECVRSSBUCKETID:
#endif
switch (sopt->sopt_name) {
@ -1290,6 +1302,9 @@ ip_ctloutput(struct socket *so, struct sockopt *sopt)
case IP_FLOWTYPE:
optval = inp->inp_flowtype;
break;
case IP_RECVFLOWID:
optval = OPTBIT2(INP_RECVFLOWID);
break;
#ifdef RSS
case IP_RSSBUCKETID:
retval = rss_hash2bucket(inp->inp_flowid,
@ -1300,6 +1315,9 @@ ip_ctloutput(struct socket *so, struct sockopt *sopt)
else
error = EINVAL;
break;
case IP_RECVRSSBUCKETID:
optval = OPTBIT2(INP_RECVRSSBUCKETID);
break;
#endif
case IP_BINDMULTI:
optval = OPTBIT2(INP_BINDMULTI);

View File

@ -43,6 +43,7 @@ __FBSDID("$FreeBSD$");
#include "opt_inet.h"
#include "opt_inet6.h"
#include "opt_ipsec.h"
#include "opt_rss.h"
#include <sys/param.h>
#include <sys/domain.h>
@ -1084,6 +1085,9 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
u_char tos;
uint8_t pr;
uint16_t cscov = 0;
uint32_t flowid = 0;
int flowid_type = 0;
int use_flowid = 0;
/*
* udp_output() may need to temporarily bind or connect the current
@ -1147,6 +1151,32 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
tos = *(u_char *)CMSG_DATA(cm);
break;
case IP_FLOWID:
if (cm->cmsg_len != CMSG_LEN(sizeof(uint32_t))) {
error = EINVAL;
break;
}
flowid = *(uint32_t *) CMSG_DATA(cm);
break;
case IP_FLOWTYPE:
if (cm->cmsg_len != CMSG_LEN(sizeof(uint32_t))) {
error = EINVAL;
break;
}
flowid_type = *(uint32_t *) CMSG_DATA(cm);
use_flowid = 1;
break;
#ifdef RSS
case IP_RSSBUCKETID:
if (cm->cmsg_len != CMSG_LEN(sizeof(uint32_t))) {
error = EINVAL;
break;
}
/* This is just a placeholder for now */
break;
#endif /* RSS */
default:
error = ENOPROTOOPT;
break;
@ -1395,6 +1425,22 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
((struct ip *)ui)->ip_tos = tos; /* XXX */
UDPSTAT_INC(udps_opackets);
/*
* Setup flowid / RSS information for outbound socket.
*
* Once the UDP code decides to set a flowid some other way,
* this allows the flowid to be overridden by userland.
*/
if (use_flowid) {
m->m_flags |= M_FLOWID;
m->m_pkthdr.flowid = flowid;
M_HASHTYPE_SET(m, flowid_type);
}
#ifdef RSS
ipflags |= IP_NODEFAULTFLOWID;
#endif /* RSS */
if (unlock_udbinfo == UH_WLOCKED)
INP_HASH_WUNLOCK(pcbinfo);
else if (unlock_udbinfo == UH_RLOCKED)