From 032677ceb535e53dcd041eff471a6fee631afe9b Mon Sep 17 00:00:00 2001 From: Gleb Smirnoff Date: Thu, 7 Nov 2019 21:27:32 +0000 Subject: [PATCH] Now that there is no R/W lock on PCB list the pcblist sysctls handlers can be greatly simplified. All the previous double cycling and complex locking was added to avoid these functions holding global PCB locks for extended period of time, preventing addition of new entries. --- sys/netinet/ip_divert.c | 76 ++++++++---------------------- sys/netinet/raw_ip.c | 72 +++++++++-------------------- sys/netinet/tcp_subr.c | 94 ++++++++++++-------------------------- sys/netinet/tcp_syncache.c | 33 ++++++------- sys/netinet/tcp_syncache.h | 2 +- sys/netinet/udp_usrreq.c | 76 ++++++++---------------------- 6 files changed, 104 insertions(+), 249 deletions(-) diff --git a/sys/netinet/ip_divert.c b/sys/netinet/ip_divert.c index 0f5147a39d3c..90513ae97aaa 100644 --- a/sys/netinet/ip_divert.c +++ b/sys/netinet/ip_divert.c @@ -629,71 +629,41 @@ div_ctlinput(int cmd, struct sockaddr *sa, void *vip) static int div_pcblist(SYSCTL_HANDLER_ARGS) { - int error, i, n; - struct inpcb *inp, **inp_list; - inp_gen_t gencnt; struct xinpgen xig; struct epoch_tracker et; + struct inpcb *inp; + int error; + + if (req->newptr != 0) + return EPERM; - /* - * The process of preparing the TCB list is too time-consuming and - * resource-intensive to repeat twice on every request. - */ if (req->oldptr == 0) { + int n; + n = V_divcbinfo.ipi_count; n += imax(n / 8, 10); req->oldidx = 2 * (sizeof xig) + n * sizeof(struct xinpcb); return 0; } - if (req->newptr != 0) - return EPERM; - - /* - * OK, now we're committed to doing something. - */ - INP_INFO_WLOCK(&V_divcbinfo); - gencnt = V_divcbinfo.ipi_gencnt; - n = V_divcbinfo.ipi_count; - INP_INFO_WUNLOCK(&V_divcbinfo); - - error = sysctl_wire_old_buffer(req, - 2 * sizeof(xig) + n*sizeof(struct xinpcb)); - if (error != 0) + if ((error = sysctl_wire_old_buffer(req, 0)) != 0) return (error); bzero(&xig, sizeof(xig)); xig.xig_len = sizeof xig; - xig.xig_count = n; - xig.xig_gen = gencnt; + xig.xig_count = V_divcbinfo.ipi_count; + xig.xig_gen = V_divcbinfo.ipi_gencnt; xig.xig_sogen = so_gencnt; error = SYSCTL_OUT(req, &xig, sizeof xig); if (error) return error; - inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK); - if (inp_list == NULL) - return ENOMEM; - - INP_INFO_RLOCK_ET(&V_divcbinfo, et); - for (inp = CK_LIST_FIRST(V_divcbinfo.ipi_listhead), i = 0; inp && i < n; - inp = CK_LIST_NEXT(inp, inp_list)) { - INP_WLOCK(inp); - if (inp->inp_gencnt <= gencnt && - cr_canseeinpcb(req->td->td_ucred, inp) == 0) { - in_pcbref(inp); - inp_list[i++] = inp; - } - INP_WUNLOCK(inp); - } - INP_INFO_RUNLOCK_ET(&V_divcbinfo, et); - n = i; - - error = 0; - for (i = 0; i < n; i++) { - inp = inp_list[i]; + NET_EPOCH_ENTER(et); + for (inp = CK_LIST_FIRST(V_divcbinfo.ipi_listhead); + inp != NULL; + inp = CK_LIST_NEXT(inp, inp_list)) { INP_RLOCK(inp); - if (inp->inp_gencnt <= gencnt) { + if (inp->inp_gencnt <= xig.xig_gen) { struct xinpcb xi; in_pcbtoxinpcb(inp, &xi); @@ -702,17 +672,9 @@ div_pcblist(SYSCTL_HANDLER_ARGS) } else INP_RUNLOCK(inp); } - INP_INFO_WLOCK(&V_divcbinfo); - for (i = 0; i < n; i++) { - inp = inp_list[i]; - INP_RLOCK(inp); - if (!in_pcbrele_rlocked(inp)) - INP_RUNLOCK(inp); - } - INP_INFO_WUNLOCK(&V_divcbinfo); + NET_EPOCH_EXIT(et); if (!error) { - struct epoch_tracker et; /* * Give the user an updated idea of our state. * If the generation differs from what we told @@ -720,15 +682,13 @@ div_pcblist(SYSCTL_HANDLER_ARGS) * while we were processing this request, and it * might be necessary to retry. */ - INP_INFO_RLOCK_ET(&V_divcbinfo, et); xig.xig_gen = V_divcbinfo.ipi_gencnt; xig.xig_sogen = so_gencnt; xig.xig_count = V_divcbinfo.ipi_count; - INP_INFO_RUNLOCK_ET(&V_divcbinfo, et); error = SYSCTL_OUT(req, &xig, sizeof xig); } - free(inp_list, M_TEMP); - return error; + + return (error); } #ifdef SYSCTL_NODE diff --git a/sys/netinet/raw_ip.c b/sys/netinet/raw_ip.c index 1dd7a9de3f56..95f1a7c04b93 100644 --- a/sys/netinet/raw_ip.c +++ b/sys/netinet/raw_ip.c @@ -1067,97 +1067,67 @@ rip_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, static int rip_pcblist(SYSCTL_HANDLER_ARGS) { - int error, i, n; - struct inpcb *inp, **inp_list; - inp_gen_t gencnt; struct xinpgen xig; struct epoch_tracker et; + struct inpcb *inp; + int error; + + if (req->newptr != 0) + return (EPERM); - /* - * The process of preparing the TCB list is too time-consuming and - * resource-intensive to repeat twice on every request. - */ if (req->oldptr == 0) { + int n; + n = V_ripcbinfo.ipi_count; n += imax(n / 8, 10); req->oldidx = 2 * (sizeof xig) + n * sizeof(struct xinpcb); return (0); } - if (req->newptr != 0) - return (EPERM); - - /* - * OK, now we're committed to doing something. - */ - INP_INFO_WLOCK(&V_ripcbinfo); - gencnt = V_ripcbinfo.ipi_gencnt; - n = V_ripcbinfo.ipi_count; - INP_INFO_WUNLOCK(&V_ripcbinfo); + if ((error = sysctl_wire_old_buffer(req, 0)) != 0) + return (error); bzero(&xig, sizeof(xig)); xig.xig_len = sizeof xig; - xig.xig_count = n; - xig.xig_gen = gencnt; + xig.xig_count = V_ripcbinfo.ipi_count; + xig.xig_gen = V_ripcbinfo.ipi_gencnt; xig.xig_sogen = so_gencnt; error = SYSCTL_OUT(req, &xig, sizeof xig); if (error) return (error); - inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK); - - INP_INFO_RLOCK_ET(&V_ripcbinfo, et); - for (inp = CK_LIST_FIRST(V_ripcbinfo.ipi_listhead), i = 0; inp && i < n; - inp = CK_LIST_NEXT(inp, inp_list)) { - INP_WLOCK(inp); - if (inp->inp_gencnt <= gencnt && - cr_canseeinpcb(req->td->td_ucred, inp) == 0) { - in_pcbref(inp); - inp_list[i++] = inp; - } - INP_WUNLOCK(inp); - } - INP_INFO_RUNLOCK_ET(&V_ripcbinfo, et); - n = i; - - error = 0; - for (i = 0; i < n; i++) { - inp = inp_list[i]; + NET_EPOCH_ENTER(et); + for (inp = CK_LIST_FIRST(V_ripcbinfo.ipi_listhead); + inp != NULL; + inp = CK_LIST_NEXT(inp, inp_list)) { INP_RLOCK(inp); - if (inp->inp_gencnt <= gencnt) { + if (inp->inp_gencnt <= xig.xig_gen && + cr_canseeinpcb(req->td->td_ucred, inp) == 0) { struct xinpcb xi; in_pcbtoxinpcb(inp, &xi); INP_RUNLOCK(inp); error = SYSCTL_OUT(req, &xi, sizeof xi); + if (error) + break; } else INP_RUNLOCK(inp); } - INP_INFO_WLOCK(&V_ripcbinfo); - for (i = 0; i < n; i++) { - inp = inp_list[i]; - INP_RLOCK(inp); - if (!in_pcbrele_rlocked(inp)) - INP_RUNLOCK(inp); - } - INP_INFO_WUNLOCK(&V_ripcbinfo); + NET_EPOCH_EXIT(et); if (!error) { - struct epoch_tracker et; /* * Give the user an updated idea of our state. If the * generation differs from what we told her before, she knows * that something happened while we were processing this * request, and it might be necessary to retry. */ - INP_INFO_RLOCK_ET(&V_ripcbinfo, et); xig.xig_gen = V_ripcbinfo.ipi_gencnt; xig.xig_sogen = so_gencnt; xig.xig_count = V_ripcbinfo.ipi_count; - INP_INFO_RUNLOCK_ET(&V_ripcbinfo, et); error = SYSCTL_OUT(req, &xig, sizeof xig); } - free(inp_list, M_TEMP); + return (error); } diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c index 2941a255aeae..70e4243dbaf5 100644 --- a/sys/netinet/tcp_subr.c +++ b/sys/netinet/tcp_subr.c @@ -2127,17 +2127,17 @@ tcp_notify(struct inpcb *inp, int error) static int tcp_pcblist(SYSCTL_HANDLER_ARGS) { - int error, i, m, n, pcb_count; - struct inpcb *inp, **inp_list; - inp_gen_t gencnt; - struct xinpgen xig; struct epoch_tracker et; + struct inpcb *inp; + struct xinpgen xig; + int error; + + if (req->newptr != NULL) + return (EPERM); - /* - * The process of preparing the TCB list is too time-consuming and - * resource-intensive to repeat twice on every request. - */ if (req->oldptr == NULL) { + int n; + n = V_tcbinfo.ipi_count + counter_u64_fetch(V_tcps_states[TCPS_SYN_RECEIVED]); n += imax(n / 8, 10); @@ -2145,44 +2145,29 @@ tcp_pcblist(SYSCTL_HANDLER_ARGS) return (0); } - if (req->newptr != NULL) - return (EPERM); - - /* - * OK, now we're committed to doing something. - */ - INP_LIST_RLOCK(&V_tcbinfo); - gencnt = V_tcbinfo.ipi_gencnt; - n = V_tcbinfo.ipi_count; - INP_LIST_RUNLOCK(&V_tcbinfo); - - m = counter_u64_fetch(V_tcps_states[TCPS_SYN_RECEIVED]); - - error = sysctl_wire_old_buffer(req, 2 * (sizeof xig) - + (n + m) * sizeof(struct xtcpcb)); - if (error != 0) + if ((error = sysctl_wire_old_buffer(req, 0)) != 0) return (error); bzero(&xig, sizeof(xig)); xig.xig_len = sizeof xig; - xig.xig_count = n + m; - xig.xig_gen = gencnt; + xig.xig_count = V_tcbinfo.ipi_count + + counter_u64_fetch(V_tcps_states[TCPS_SYN_RECEIVED]); + xig.xig_gen = V_tcbinfo.ipi_gencnt; xig.xig_sogen = so_gencnt; error = SYSCTL_OUT(req, &xig, sizeof xig); if (error) return (error); - error = syncache_pcblist(req, m, &pcb_count); + error = syncache_pcblist(req); if (error) return (error); - inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK); - - INP_INFO_WLOCK(&V_tcbinfo); - for (inp = CK_LIST_FIRST(V_tcbinfo.ipi_listhead), i = 0; - inp != NULL && i < n; inp = CK_LIST_NEXT(inp, inp_list)) { - INP_WLOCK(inp); - if (inp->inp_gencnt <= gencnt) { + NET_EPOCH_ENTER(et); + for (inp = CK_LIST_FIRST(V_tcbinfo.ipi_listhead); + inp != NULL; + inp = CK_LIST_NEXT(inp, inp_list)) { + INP_RLOCK(inp); + if (inp->inp_gencnt <= xig.xig_gen) { /* * XXX: This use of cr_cansee(), introduced with * TCP state changes, is not quite right, but for @@ -2197,36 +2182,18 @@ tcp_pcblist(SYSCTL_HANDLER_ARGS) } else error = cr_canseeinpcb(req->td->td_ucred, inp); if (error == 0) { - in_pcbref(inp); - inp_list[i++] = inp; + struct xtcpcb xt; + + tcp_inptoxtp(inp, &xt); + INP_RUNLOCK(inp); + error = SYSCTL_OUT(req, &xt, sizeof xt); + if (error) + break; } - } - INP_WUNLOCK(inp); - } - INP_INFO_WUNLOCK(&V_tcbinfo); - n = i; - - error = 0; - for (i = 0; i < n; i++) { - inp = inp_list[i]; - INP_RLOCK(inp); - if (inp->inp_gencnt <= gencnt) { - struct xtcpcb xt; - - tcp_inptoxtp(inp, &xt); - INP_RUNLOCK(inp); - error = SYSCTL_OUT(req, &xt, sizeof xt); } else INP_RUNLOCK(inp); } - INP_INFO_RLOCK_ET(&V_tcbinfo, et); - for (i = 0; i < n; i++) { - inp = inp_list[i]; - INP_RLOCK(inp); - if (!in_pcbrele_rlocked(inp)) - INP_RUNLOCK(inp); - } - INP_INFO_RUNLOCK_ET(&V_tcbinfo, et); + NET_EPOCH_EXIT(et); if (!error) { /* @@ -2236,14 +2203,13 @@ tcp_pcblist(SYSCTL_HANDLER_ARGS) * while we were processing this request, and it * might be necessary to retry. */ - INP_LIST_RLOCK(&V_tcbinfo); xig.xig_gen = V_tcbinfo.ipi_gencnt; xig.xig_sogen = so_gencnt; - xig.xig_count = V_tcbinfo.ipi_count + pcb_count; - INP_LIST_RUNLOCK(&V_tcbinfo); + xig.xig_count = V_tcbinfo.ipi_count + + counter_u64_fetch(V_tcps_states[TCPS_SYN_RECEIVED]); error = SYSCTL_OUT(req, &xig, sizeof xig); } - free(inp_list, M_TEMP); + return (error); } diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c index 72546b885acc..234cb509f8b1 100644 --- a/sys/netinet/tcp_syncache.c +++ b/sys/netinet/tcp_syncache.c @@ -2452,46 +2452,41 @@ syncache_unpause(void *arg) * amount of space the caller allocated for this function to use. */ int -syncache_pcblist(struct sysctl_req *req, int max_pcbs, int *pcbs_exported) +syncache_pcblist(struct sysctl_req *req) { struct xtcpcb xt; struct syncache *sc; struct syncache_head *sch; - int count, error, i; + int error, i; - for (count = 0, error = 0, i = 0; i < V_tcp_syncache.hashsize; i++) { + bzero(&xt, sizeof(xt)); + xt.xt_len = sizeof(xt); + xt.t_state = TCPS_SYN_RECEIVED; + xt.xt_inp.xi_socket.xso_protocol = IPPROTO_TCP; + xt.xt_inp.xi_socket.xso_len = sizeof (struct xsocket); + xt.xt_inp.xi_socket.so_type = SOCK_STREAM; + xt.xt_inp.xi_socket.so_state = SS_ISCONNECTING; + + for (i = 0; i < V_tcp_syncache.hashsize; i++) { sch = &V_tcp_syncache.hashbase[i]; SCH_LOCK(sch); TAILQ_FOREACH(sc, &sch->sch_bucket, sc_hash) { - if (count >= max_pcbs) { - SCH_UNLOCK(sch); - goto exit; - } if (cr_cansee(req->td->td_ucred, sc->sc_cred) != 0) continue; - bzero(&xt, sizeof(xt)); - xt.xt_len = sizeof(xt); if (sc->sc_inc.inc_flags & INC_ISIPV6) xt.xt_inp.inp_vflag = INP_IPV6; else xt.xt_inp.inp_vflag = INP_IPV4; bcopy(&sc->sc_inc, &xt.xt_inp.inp_inc, sizeof (struct in_conninfo)); - xt.t_state = TCPS_SYN_RECEIVED; - xt.xt_inp.xi_socket.xso_protocol = IPPROTO_TCP; - xt.xt_inp.xi_socket.xso_len = sizeof (struct xsocket); - xt.xt_inp.xi_socket.so_type = SOCK_STREAM; - xt.xt_inp.xi_socket.so_state = SS_ISCONNECTING; error = SYSCTL_OUT(req, &xt, sizeof xt); if (error) { SCH_UNLOCK(sch); - goto exit; + return (0); } - count++; } SCH_UNLOCK(sch); } -exit: - *pcbs_exported = count; - return error; + + return (0); } diff --git a/sys/netinet/tcp_syncache.h b/sys/netinet/tcp_syncache.h index b7d4ba1fe28c..d9ddde9f8c80 100644 --- a/sys/netinet/tcp_syncache.h +++ b/sys/netinet/tcp_syncache.h @@ -48,7 +48,7 @@ int syncache_add(struct in_conninfo *, struct tcpopt *, void *, void *); void syncache_chkrst(struct in_conninfo *, struct tcphdr *, struct mbuf *); void syncache_badack(struct in_conninfo *); -int syncache_pcblist(struct sysctl_req *req, int max_pcbs, int *pcbs_exported); +int syncache_pcblist(struct sysctl_req *); struct syncache { TAILQ_ENTRY(syncache) sc_hash; diff --git a/sys/netinet/udp_usrreq.c b/sys/netinet/udp_usrreq.c index 6e1340a0029c..1517eeb7ed43 100644 --- a/sys/netinet/udp_usrreq.c +++ b/sys/netinet/udp_usrreq.c @@ -851,87 +851,53 @@ udplite_ctlinput(int cmd, struct sockaddr *sa, void *vip) static int udp_pcblist(SYSCTL_HANDLER_ARGS) { - int error, i, n; - struct inpcb *inp, **inp_list; - inp_gen_t gencnt; struct xinpgen xig; struct epoch_tracker et; + struct inpcb *inp; + int error; + + if (req->newptr != 0) + return (EPERM); - /* - * The process of preparing the PCB list is too time-consuming and - * resource-intensive to repeat twice on every request. - */ if (req->oldptr == 0) { + int n; + n = V_udbinfo.ipi_count; n += imax(n / 8, 10); req->oldidx = 2 * (sizeof xig) + n * sizeof(struct xinpcb); return (0); } - if (req->newptr != 0) - return (EPERM); - - /* - * OK, now we're committed to doing something. - */ - INP_INFO_RLOCK_ET(&V_udbinfo, et); - gencnt = V_udbinfo.ipi_gencnt; - n = V_udbinfo.ipi_count; - INP_INFO_RUNLOCK_ET(&V_udbinfo, et); - - error = sysctl_wire_old_buffer(req, 2 * (sizeof xig) - + n * sizeof(struct xinpcb)); - if (error != 0) + if ((error = sysctl_wire_old_buffer(req, 0)) != 0) return (error); bzero(&xig, sizeof(xig)); xig.xig_len = sizeof xig; - xig.xig_count = n; - xig.xig_gen = gencnt; + xig.xig_count = V_udbinfo.ipi_count; + xig.xig_gen = V_udbinfo.ipi_gencnt; xig.xig_sogen = so_gencnt; error = SYSCTL_OUT(req, &xig, sizeof xig); if (error) return (error); - inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK); - if (inp_list == NULL) - return (ENOMEM); - - INP_INFO_RLOCK_ET(&V_udbinfo, et); - for (inp = CK_LIST_FIRST(V_udbinfo.ipi_listhead), i = 0; inp && i < n; - inp = CK_LIST_NEXT(inp, inp_list)) { - INP_WLOCK(inp); - if (inp->inp_gencnt <= gencnt && - cr_canseeinpcb(req->td->td_ucred, inp) == 0) { - in_pcbref(inp); - inp_list[i++] = inp; - } - INP_WUNLOCK(inp); - } - INP_INFO_RUNLOCK_ET(&V_udbinfo, et); - n = i; - - error = 0; - for (i = 0; i < n; i++) { - inp = inp_list[i]; + NET_EPOCH_ENTER(et); + for (inp = CK_LIST_FIRST(V_udbinfo.ipi_listhead); + inp != NULL; + inp = CK_LIST_NEXT(inp, inp_list)) { INP_RLOCK(inp); - if (inp->inp_gencnt <= gencnt) { + if (inp->inp_gencnt <= xig.xig_gen && + cr_canseeinpcb(req->td->td_ucred, inp) == 0) { struct xinpcb xi; in_pcbtoxinpcb(inp, &xi); INP_RUNLOCK(inp); error = SYSCTL_OUT(req, &xi, sizeof xi); + if (error) + break; } else INP_RUNLOCK(inp); } - INP_INFO_WLOCK(&V_udbinfo); - for (i = 0; i < n; i++) { - inp = inp_list[i]; - INP_RLOCK(inp); - if (!in_pcbrele_rlocked(inp)) - INP_RUNLOCK(inp); - } - INP_INFO_WUNLOCK(&V_udbinfo); + NET_EPOCH_EXIT(et); if (!error) { /* @@ -940,14 +906,12 @@ udp_pcblist(SYSCTL_HANDLER_ARGS) * that something happened while we were processing this * request, and it might be necessary to retry. */ - INP_INFO_RLOCK_ET(&V_udbinfo, et); xig.xig_gen = V_udbinfo.ipi_gencnt; xig.xig_sogen = so_gencnt; xig.xig_count = V_udbinfo.ipi_count; - INP_INFO_RUNLOCK_ET(&V_udbinfo, et); error = SYSCTL_OUT(req, &xig, sizeof xig); } - free(inp_list, M_TEMP); + return (error); }