Now that there is no R/W lock on PCB list the pcblist sysctls

handlers can be greatly simplified.  All the previous double
cycling and complex locking was added to avoid these functions
holding global PCB locks for extended period of time, preventing
addition of new entries.
This commit is contained in:
Gleb Smirnoff 2019-11-07 21:27:32 +00:00
parent d40c0d47cd
commit 032677ceb5
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=354484
6 changed files with 104 additions and 249 deletions

View File

@ -629,71 +629,41 @@ div_ctlinput(int cmd, struct sockaddr *sa, void *vip)
static int
div_pcblist(SYSCTL_HANDLER_ARGS)
{
int error, i, n;
struct inpcb *inp, **inp_list;
inp_gen_t gencnt;
struct xinpgen xig;
struct epoch_tracker et;
struct inpcb *inp;
int error;
if (req->newptr != 0)
return EPERM;
/*
* The process of preparing the TCB list is too time-consuming and
* resource-intensive to repeat twice on every request.
*/
if (req->oldptr == 0) {
int n;
n = V_divcbinfo.ipi_count;
n += imax(n / 8, 10);
req->oldidx = 2 * (sizeof xig) + n * sizeof(struct xinpcb);
return 0;
}
if (req->newptr != 0)
return EPERM;
/*
* OK, now we're committed to doing something.
*/
INP_INFO_WLOCK(&V_divcbinfo);
gencnt = V_divcbinfo.ipi_gencnt;
n = V_divcbinfo.ipi_count;
INP_INFO_WUNLOCK(&V_divcbinfo);
error = sysctl_wire_old_buffer(req,
2 * sizeof(xig) + n*sizeof(struct xinpcb));
if (error != 0)
if ((error = sysctl_wire_old_buffer(req, 0)) != 0)
return (error);
bzero(&xig, sizeof(xig));
xig.xig_len = sizeof xig;
xig.xig_count = n;
xig.xig_gen = gencnt;
xig.xig_count = V_divcbinfo.ipi_count;
xig.xig_gen = V_divcbinfo.ipi_gencnt;
xig.xig_sogen = so_gencnt;
error = SYSCTL_OUT(req, &xig, sizeof xig);
if (error)
return error;
inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK);
if (inp_list == NULL)
return ENOMEM;
INP_INFO_RLOCK_ET(&V_divcbinfo, et);
for (inp = CK_LIST_FIRST(V_divcbinfo.ipi_listhead), i = 0; inp && i < n;
inp = CK_LIST_NEXT(inp, inp_list)) {
INP_WLOCK(inp);
if (inp->inp_gencnt <= gencnt &&
cr_canseeinpcb(req->td->td_ucred, inp) == 0) {
in_pcbref(inp);
inp_list[i++] = inp;
}
INP_WUNLOCK(inp);
}
INP_INFO_RUNLOCK_ET(&V_divcbinfo, et);
n = i;
error = 0;
for (i = 0; i < n; i++) {
inp = inp_list[i];
NET_EPOCH_ENTER(et);
for (inp = CK_LIST_FIRST(V_divcbinfo.ipi_listhead);
inp != NULL;
inp = CK_LIST_NEXT(inp, inp_list)) {
INP_RLOCK(inp);
if (inp->inp_gencnt <= gencnt) {
if (inp->inp_gencnt <= xig.xig_gen) {
struct xinpcb xi;
in_pcbtoxinpcb(inp, &xi);
@ -702,17 +672,9 @@ div_pcblist(SYSCTL_HANDLER_ARGS)
} else
INP_RUNLOCK(inp);
}
INP_INFO_WLOCK(&V_divcbinfo);
for (i = 0; i < n; i++) {
inp = inp_list[i];
INP_RLOCK(inp);
if (!in_pcbrele_rlocked(inp))
INP_RUNLOCK(inp);
}
INP_INFO_WUNLOCK(&V_divcbinfo);
NET_EPOCH_EXIT(et);
if (!error) {
struct epoch_tracker et;
/*
* Give the user an updated idea of our state.
* If the generation differs from what we told
@ -720,15 +682,13 @@ div_pcblist(SYSCTL_HANDLER_ARGS)
* while we were processing this request, and it
* might be necessary to retry.
*/
INP_INFO_RLOCK_ET(&V_divcbinfo, et);
xig.xig_gen = V_divcbinfo.ipi_gencnt;
xig.xig_sogen = so_gencnt;
xig.xig_count = V_divcbinfo.ipi_count;
INP_INFO_RUNLOCK_ET(&V_divcbinfo, et);
error = SYSCTL_OUT(req, &xig, sizeof xig);
}
free(inp_list, M_TEMP);
return error;
return (error);
}
#ifdef SYSCTL_NODE

View File

@ -1067,97 +1067,67 @@ rip_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
static int
rip_pcblist(SYSCTL_HANDLER_ARGS)
{
int error, i, n;
struct inpcb *inp, **inp_list;
inp_gen_t gencnt;
struct xinpgen xig;
struct epoch_tracker et;
struct inpcb *inp;
int error;
if (req->newptr != 0)
return (EPERM);
/*
* The process of preparing the TCB list is too time-consuming and
* resource-intensive to repeat twice on every request.
*/
if (req->oldptr == 0) {
int n;
n = V_ripcbinfo.ipi_count;
n += imax(n / 8, 10);
req->oldidx = 2 * (sizeof xig) + n * sizeof(struct xinpcb);
return (0);
}
if (req->newptr != 0)
return (EPERM);
/*
* OK, now we're committed to doing something.
*/
INP_INFO_WLOCK(&V_ripcbinfo);
gencnt = V_ripcbinfo.ipi_gencnt;
n = V_ripcbinfo.ipi_count;
INP_INFO_WUNLOCK(&V_ripcbinfo);
if ((error = sysctl_wire_old_buffer(req, 0)) != 0)
return (error);
bzero(&xig, sizeof(xig));
xig.xig_len = sizeof xig;
xig.xig_count = n;
xig.xig_gen = gencnt;
xig.xig_count = V_ripcbinfo.ipi_count;
xig.xig_gen = V_ripcbinfo.ipi_gencnt;
xig.xig_sogen = so_gencnt;
error = SYSCTL_OUT(req, &xig, sizeof xig);
if (error)
return (error);
inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK);
INP_INFO_RLOCK_ET(&V_ripcbinfo, et);
for (inp = CK_LIST_FIRST(V_ripcbinfo.ipi_listhead), i = 0; inp && i < n;
inp = CK_LIST_NEXT(inp, inp_list)) {
INP_WLOCK(inp);
if (inp->inp_gencnt <= gencnt &&
cr_canseeinpcb(req->td->td_ucred, inp) == 0) {
in_pcbref(inp);
inp_list[i++] = inp;
}
INP_WUNLOCK(inp);
}
INP_INFO_RUNLOCK_ET(&V_ripcbinfo, et);
n = i;
error = 0;
for (i = 0; i < n; i++) {
inp = inp_list[i];
NET_EPOCH_ENTER(et);
for (inp = CK_LIST_FIRST(V_ripcbinfo.ipi_listhead);
inp != NULL;
inp = CK_LIST_NEXT(inp, inp_list)) {
INP_RLOCK(inp);
if (inp->inp_gencnt <= gencnt) {
if (inp->inp_gencnt <= xig.xig_gen &&
cr_canseeinpcb(req->td->td_ucred, inp) == 0) {
struct xinpcb xi;
in_pcbtoxinpcb(inp, &xi);
INP_RUNLOCK(inp);
error = SYSCTL_OUT(req, &xi, sizeof xi);
if (error)
break;
} else
INP_RUNLOCK(inp);
}
INP_INFO_WLOCK(&V_ripcbinfo);
for (i = 0; i < n; i++) {
inp = inp_list[i];
INP_RLOCK(inp);
if (!in_pcbrele_rlocked(inp))
INP_RUNLOCK(inp);
}
INP_INFO_WUNLOCK(&V_ripcbinfo);
NET_EPOCH_EXIT(et);
if (!error) {
struct epoch_tracker et;
/*
* Give the user an updated idea of our state. If the
* generation differs from what we told her before, she knows
* that something happened while we were processing this
* request, and it might be necessary to retry.
*/
INP_INFO_RLOCK_ET(&V_ripcbinfo, et);
xig.xig_gen = V_ripcbinfo.ipi_gencnt;
xig.xig_sogen = so_gencnt;
xig.xig_count = V_ripcbinfo.ipi_count;
INP_INFO_RUNLOCK_ET(&V_ripcbinfo, et);
error = SYSCTL_OUT(req, &xig, sizeof xig);
}
free(inp_list, M_TEMP);
return (error);
}

View File

@ -2127,17 +2127,17 @@ tcp_notify(struct inpcb *inp, int error)
static int
tcp_pcblist(SYSCTL_HANDLER_ARGS)
{
int error, i, m, n, pcb_count;
struct inpcb *inp, **inp_list;
inp_gen_t gencnt;
struct xinpgen xig;
struct epoch_tracker et;
struct inpcb *inp;
struct xinpgen xig;
int error;
if (req->newptr != NULL)
return (EPERM);
/*
* The process of preparing the TCB list is too time-consuming and
* resource-intensive to repeat twice on every request.
*/
if (req->oldptr == NULL) {
int n;
n = V_tcbinfo.ipi_count +
counter_u64_fetch(V_tcps_states[TCPS_SYN_RECEIVED]);
n += imax(n / 8, 10);
@ -2145,44 +2145,29 @@ tcp_pcblist(SYSCTL_HANDLER_ARGS)
return (0);
}
if (req->newptr != NULL)
return (EPERM);
/*
* OK, now we're committed to doing something.
*/
INP_LIST_RLOCK(&V_tcbinfo);
gencnt = V_tcbinfo.ipi_gencnt;
n = V_tcbinfo.ipi_count;
INP_LIST_RUNLOCK(&V_tcbinfo);
m = counter_u64_fetch(V_tcps_states[TCPS_SYN_RECEIVED]);
error = sysctl_wire_old_buffer(req, 2 * (sizeof xig)
+ (n + m) * sizeof(struct xtcpcb));
if (error != 0)
if ((error = sysctl_wire_old_buffer(req, 0)) != 0)
return (error);
bzero(&xig, sizeof(xig));
xig.xig_len = sizeof xig;
xig.xig_count = n + m;
xig.xig_gen = gencnt;
xig.xig_count = V_tcbinfo.ipi_count +
counter_u64_fetch(V_tcps_states[TCPS_SYN_RECEIVED]);
xig.xig_gen = V_tcbinfo.ipi_gencnt;
xig.xig_sogen = so_gencnt;
error = SYSCTL_OUT(req, &xig, sizeof xig);
if (error)
return (error);
error = syncache_pcblist(req, m, &pcb_count);
error = syncache_pcblist(req);
if (error)
return (error);
inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK);
INP_INFO_WLOCK(&V_tcbinfo);
for (inp = CK_LIST_FIRST(V_tcbinfo.ipi_listhead), i = 0;
inp != NULL && i < n; inp = CK_LIST_NEXT(inp, inp_list)) {
INP_WLOCK(inp);
if (inp->inp_gencnt <= gencnt) {
NET_EPOCH_ENTER(et);
for (inp = CK_LIST_FIRST(V_tcbinfo.ipi_listhead);
inp != NULL;
inp = CK_LIST_NEXT(inp, inp_list)) {
INP_RLOCK(inp);
if (inp->inp_gencnt <= xig.xig_gen) {
/*
* XXX: This use of cr_cansee(), introduced with
* TCP state changes, is not quite right, but for
@ -2197,36 +2182,18 @@ tcp_pcblist(SYSCTL_HANDLER_ARGS)
} else
error = cr_canseeinpcb(req->td->td_ucred, inp);
if (error == 0) {
in_pcbref(inp);
inp_list[i++] = inp;
struct xtcpcb xt;
tcp_inptoxtp(inp, &xt);
INP_RUNLOCK(inp);
error = SYSCTL_OUT(req, &xt, sizeof xt);
if (error)
break;
}
}
INP_WUNLOCK(inp);
}
INP_INFO_WUNLOCK(&V_tcbinfo);
n = i;
error = 0;
for (i = 0; i < n; i++) {
inp = inp_list[i];
INP_RLOCK(inp);
if (inp->inp_gencnt <= gencnt) {
struct xtcpcb xt;
tcp_inptoxtp(inp, &xt);
INP_RUNLOCK(inp);
error = SYSCTL_OUT(req, &xt, sizeof xt);
} else
INP_RUNLOCK(inp);
}
INP_INFO_RLOCK_ET(&V_tcbinfo, et);
for (i = 0; i < n; i++) {
inp = inp_list[i];
INP_RLOCK(inp);
if (!in_pcbrele_rlocked(inp))
INP_RUNLOCK(inp);
}
INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
NET_EPOCH_EXIT(et);
if (!error) {
/*
@ -2236,14 +2203,13 @@ tcp_pcblist(SYSCTL_HANDLER_ARGS)
* while we were processing this request, and it
* might be necessary to retry.
*/
INP_LIST_RLOCK(&V_tcbinfo);
xig.xig_gen = V_tcbinfo.ipi_gencnt;
xig.xig_sogen = so_gencnt;
xig.xig_count = V_tcbinfo.ipi_count + pcb_count;
INP_LIST_RUNLOCK(&V_tcbinfo);
xig.xig_count = V_tcbinfo.ipi_count +
counter_u64_fetch(V_tcps_states[TCPS_SYN_RECEIVED]);
error = SYSCTL_OUT(req, &xig, sizeof xig);
}
free(inp_list, M_TEMP);
return (error);
}

View File

@ -2452,46 +2452,41 @@ syncache_unpause(void *arg)
* amount of space the caller allocated for this function to use.
*/
int
syncache_pcblist(struct sysctl_req *req, int max_pcbs, int *pcbs_exported)
syncache_pcblist(struct sysctl_req *req)
{
struct xtcpcb xt;
struct syncache *sc;
struct syncache_head *sch;
int count, error, i;
int error, i;
for (count = 0, error = 0, i = 0; i < V_tcp_syncache.hashsize; i++) {
bzero(&xt, sizeof(xt));
xt.xt_len = sizeof(xt);
xt.t_state = TCPS_SYN_RECEIVED;
xt.xt_inp.xi_socket.xso_protocol = IPPROTO_TCP;
xt.xt_inp.xi_socket.xso_len = sizeof (struct xsocket);
xt.xt_inp.xi_socket.so_type = SOCK_STREAM;
xt.xt_inp.xi_socket.so_state = SS_ISCONNECTING;
for (i = 0; i < V_tcp_syncache.hashsize; i++) {
sch = &V_tcp_syncache.hashbase[i];
SCH_LOCK(sch);
TAILQ_FOREACH(sc, &sch->sch_bucket, sc_hash) {
if (count >= max_pcbs) {
SCH_UNLOCK(sch);
goto exit;
}
if (cr_cansee(req->td->td_ucred, sc->sc_cred) != 0)
continue;
bzero(&xt, sizeof(xt));
xt.xt_len = sizeof(xt);
if (sc->sc_inc.inc_flags & INC_ISIPV6)
xt.xt_inp.inp_vflag = INP_IPV6;
else
xt.xt_inp.inp_vflag = INP_IPV4;
bcopy(&sc->sc_inc, &xt.xt_inp.inp_inc,
sizeof (struct in_conninfo));
xt.t_state = TCPS_SYN_RECEIVED;
xt.xt_inp.xi_socket.xso_protocol = IPPROTO_TCP;
xt.xt_inp.xi_socket.xso_len = sizeof (struct xsocket);
xt.xt_inp.xi_socket.so_type = SOCK_STREAM;
xt.xt_inp.xi_socket.so_state = SS_ISCONNECTING;
error = SYSCTL_OUT(req, &xt, sizeof xt);
if (error) {
SCH_UNLOCK(sch);
goto exit;
return (0);
}
count++;
}
SCH_UNLOCK(sch);
}
exit:
*pcbs_exported = count;
return error;
return (0);
}

View File

@ -48,7 +48,7 @@ int syncache_add(struct in_conninfo *, struct tcpopt *,
void *, void *);
void syncache_chkrst(struct in_conninfo *, struct tcphdr *, struct mbuf *);
void syncache_badack(struct in_conninfo *);
int syncache_pcblist(struct sysctl_req *req, int max_pcbs, int *pcbs_exported);
int syncache_pcblist(struct sysctl_req *);
struct syncache {
TAILQ_ENTRY(syncache) sc_hash;

View File

@ -851,87 +851,53 @@ udplite_ctlinput(int cmd, struct sockaddr *sa, void *vip)
static int
udp_pcblist(SYSCTL_HANDLER_ARGS)
{
int error, i, n;
struct inpcb *inp, **inp_list;
inp_gen_t gencnt;
struct xinpgen xig;
struct epoch_tracker et;
struct inpcb *inp;
int error;
if (req->newptr != 0)
return (EPERM);
/*
* The process of preparing the PCB list is too time-consuming and
* resource-intensive to repeat twice on every request.
*/
if (req->oldptr == 0) {
int n;
n = V_udbinfo.ipi_count;
n += imax(n / 8, 10);
req->oldidx = 2 * (sizeof xig) + n * sizeof(struct xinpcb);
return (0);
}
if (req->newptr != 0)
return (EPERM);
/*
* OK, now we're committed to doing something.
*/
INP_INFO_RLOCK_ET(&V_udbinfo, et);
gencnt = V_udbinfo.ipi_gencnt;
n = V_udbinfo.ipi_count;
INP_INFO_RUNLOCK_ET(&V_udbinfo, et);
error = sysctl_wire_old_buffer(req, 2 * (sizeof xig)
+ n * sizeof(struct xinpcb));
if (error != 0)
if ((error = sysctl_wire_old_buffer(req, 0)) != 0)
return (error);
bzero(&xig, sizeof(xig));
xig.xig_len = sizeof xig;
xig.xig_count = n;
xig.xig_gen = gencnt;
xig.xig_count = V_udbinfo.ipi_count;
xig.xig_gen = V_udbinfo.ipi_gencnt;
xig.xig_sogen = so_gencnt;
error = SYSCTL_OUT(req, &xig, sizeof xig);
if (error)
return (error);
inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK);
if (inp_list == NULL)
return (ENOMEM);
INP_INFO_RLOCK_ET(&V_udbinfo, et);
for (inp = CK_LIST_FIRST(V_udbinfo.ipi_listhead), i = 0; inp && i < n;
inp = CK_LIST_NEXT(inp, inp_list)) {
INP_WLOCK(inp);
if (inp->inp_gencnt <= gencnt &&
cr_canseeinpcb(req->td->td_ucred, inp) == 0) {
in_pcbref(inp);
inp_list[i++] = inp;
}
INP_WUNLOCK(inp);
}
INP_INFO_RUNLOCK_ET(&V_udbinfo, et);
n = i;
error = 0;
for (i = 0; i < n; i++) {
inp = inp_list[i];
NET_EPOCH_ENTER(et);
for (inp = CK_LIST_FIRST(V_udbinfo.ipi_listhead);
inp != NULL;
inp = CK_LIST_NEXT(inp, inp_list)) {
INP_RLOCK(inp);
if (inp->inp_gencnt <= gencnt) {
if (inp->inp_gencnt <= xig.xig_gen &&
cr_canseeinpcb(req->td->td_ucred, inp) == 0) {
struct xinpcb xi;
in_pcbtoxinpcb(inp, &xi);
INP_RUNLOCK(inp);
error = SYSCTL_OUT(req, &xi, sizeof xi);
if (error)
break;
} else
INP_RUNLOCK(inp);
}
INP_INFO_WLOCK(&V_udbinfo);
for (i = 0; i < n; i++) {
inp = inp_list[i];
INP_RLOCK(inp);
if (!in_pcbrele_rlocked(inp))
INP_RUNLOCK(inp);
}
INP_INFO_WUNLOCK(&V_udbinfo);
NET_EPOCH_EXIT(et);
if (!error) {
/*
@ -940,14 +906,12 @@ udp_pcblist(SYSCTL_HANDLER_ARGS)
* that something happened while we were processing this
* request, and it might be necessary to retry.
*/
INP_INFO_RLOCK_ET(&V_udbinfo, et);
xig.xig_gen = V_udbinfo.ipi_gencnt;
xig.xig_sogen = so_gencnt;
xig.xig_count = V_udbinfo.ipi_count;
INP_INFO_RUNLOCK_ET(&V_udbinfo, et);
error = SYSCTL_OUT(req, &xig, sizeof xig);
}
free(inp_list, M_TEMP);
return (error);
}