From 08dde4d421553e179ec0385860b53945bf5bd172 Mon Sep 17 00:00:00 2001 From: Enji Cooper Date: Wed, 10 Aug 2016 03:10:34 +0000 Subject: [PATCH 01/76] Highball memory requirement (4GB) with common/{raise,safety} Both test suites require more memory than my amd64 VM using GENERIC-NODEBUG can provide and reliably panic it with OOM issues in dtrace(4). Some of the testcases fail, but this at least bypasses the panic behavior on platforms that don't have enough resources MFC after: 2 weeks Discussed with: markj Sponsored by: EMC / Isilon Storage Division --- .../dtrace/tests/common/raise/Makefile | 2 ++ .../dtrace/tests/common/safety/Makefile | 2 ++ .../dtrace/tests/tools/genmakefiles.sh | 19 ++++++++++++++++--- 3 files changed, 20 insertions(+), 3 deletions(-) diff --git a/cddl/usr.sbin/dtrace/tests/common/raise/Makefile b/cddl/usr.sbin/dtrace/tests/common/raise/Makefile index cde512f2d21f..83af0e40de1f 100644 --- a/cddl/usr.sbin/dtrace/tests/common/raise/Makefile +++ b/cddl/usr.sbin/dtrace/tests/common/raise/Makefile @@ -20,4 +20,6 @@ CFILES= \ tst.raise3.c \ +TEST_METADATA.t_dtrace_contrib+= required_memory="4g" + .include "../../dtrace.test.mk" diff --git a/cddl/usr.sbin/dtrace/tests/common/safety/Makefile b/cddl/usr.sbin/dtrace/tests/common/safety/Makefile index 53260533b641..5056260d0621 100644 --- a/cddl/usr.sbin/dtrace/tests/common/safety/Makefile +++ b/cddl/usr.sbin/dtrace/tests/common/safety/Makefile @@ -53,4 +53,6 @@ CFILES= \ +TEST_METADATA.t_dtrace_contrib+= required_memory="4g" + .include "../../dtrace.test.mk" diff --git a/cddl/usr.sbin/dtrace/tests/tools/genmakefiles.sh b/cddl/usr.sbin/dtrace/tests/tools/genmakefiles.sh index 9953064bcd3e..4b889442f45b 100755 --- a/cddl/usr.sbin/dtrace/tests/tools/genmakefiles.sh +++ b/cddl/usr.sbin/dtrace/tests/tools/genmakefiles.sh @@ -34,15 +34,28 @@ genmakefile() # One-off variable definitions. local special - if [ "$basedir" = proc ]; then + case "$basedir" in + proc) special=" LIBADD.tst.sigwait.exe+= rt " - elif [ "$basedir" = uctf ]; then + ;; + raise) + special=" +TEST_METADATA.t_dtrace_contrib+= required_memory=\"4g\" +" + ;; + safety) + special=" +TEST_METADATA.t_dtrace_contrib+= required_memory=\"4g\" +" + ;; + uctf) special=" WITH_CTF=YES " - fi + ;; + esac local makefile=$(mktemp) cat <<__EOF__ > $makefile From 127d6d1a6456760a3cbbd3877739cc77796e48ea Mon Sep 17 00:00:00 2001 From: Sepherosa Ziehau Date: Wed, 10 Aug 2016 03:11:07 +0000 Subject: [PATCH 02/76] hyperv/hn: Reorganize send done callback. MFC after: 1 week Sponsored by: Microsoft Differential Revision: https://reviews.freebsd.org/D7450 --- sys/dev/hyperv/netvsc/hv_net_vsc.c | 141 +++++++++--------- sys/dev/hyperv/netvsc/hv_net_vsc.h | 30 +--- sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c | 45 +++--- sys/dev/hyperv/netvsc/hv_rndis_filter.c | 79 +++++----- sys/dev/hyperv/netvsc/hv_rndis_filter.h | 3 +- sys/dev/hyperv/netvsc/if_hnvar.h | 83 +++++++++++ 6 files changed, 221 insertions(+), 160 deletions(-) create mode 100644 sys/dev/hyperv/netvsc/if_hnvar.h diff --git a/sys/dev/hyperv/netvsc/hv_net_vsc.c b/sys/dev/hyperv/netvsc/hv_net_vsc.c index 77fbb50004a0..9aa310ad0635 100644 --- a/sys/dev/hyperv/netvsc/hv_net_vsc.c +++ b/sys/dev/hyperv/netvsc/hv_net_vsc.c @@ -68,6 +68,12 @@ static void hv_nv_on_receive_completion(struct vmbus_channel *chan, static void hv_nv_on_receive(netvsc_dev *net_dev, struct hn_rx_ring *rxr, struct vmbus_channel *chan, const struct vmbus_chanpkt_hdr *pkt); +static void hn_nvs_sent_none(struct hn_send_ctx *sndc, + struct netvsc_dev_ *net_dev, struct vmbus_channel *chan, + const struct nvsp_msg_ *msg); + +static struct hn_send_ctx hn_send_ctx_none = + HN_SEND_CTX_INITIALIZER(hn_nvs_sent_none, NULL); /* * @@ -141,6 +147,7 @@ hv_nv_get_next_send_section(netvsc_dev *net_dev) static int hv_nv_init_rx_buffer_with_net_vsp(struct hn_softc *sc) { + struct hn_send_ctx sndc; netvsc_dev *net_dev; nvsp_msg *init_pkt; int ret = 0; @@ -189,9 +196,10 @@ hv_nv_init_rx_buffer_with_net_vsp(struct hn_softc *sc) /* Send the gpadl notification request */ + hn_send_ctx_init_simple(&sndc, hn_nvs_sent_wakeup, NULL); ret = vmbus_chan_send(sc->hn_prichan, VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC, - init_pkt, sizeof(nvsp_msg), (uint64_t)(uintptr_t)init_pkt); + init_pkt, sizeof(nvsp_msg), (uint64_t)(uintptr_t)&sndc); if (ret != 0) { goto cleanup; } @@ -240,6 +248,7 @@ hv_nv_init_rx_buffer_with_net_vsp(struct hn_softc *sc) static int hv_nv_init_send_buffer_with_net_vsp(struct hn_softc *sc) { + struct hn_send_ctx sndc; netvsc_dev *net_dev; nvsp_msg *init_pkt; int ret = 0; @@ -287,9 +296,10 @@ hv_nv_init_send_buffer_with_net_vsp(struct hn_softc *sc) /* Send the gpadl notification request */ + hn_send_ctx_init_simple(&sndc, hn_nvs_sent_wakeup, NULL); ret = vmbus_chan_send(sc->hn_prichan, VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC, - init_pkt, sizeof(nvsp_msg), (uint64_t)init_pkt); + init_pkt, sizeof(nvsp_msg), (uint64_t)(uintptr_t)&sndc); if (ret != 0) { goto cleanup; } @@ -348,8 +358,7 @@ hv_nv_destroy_rx_buffer(netvsc_dev *net_dev) ret = vmbus_chan_send(net_dev->sc->hn_prichan, VMBUS_CHANPKT_TYPE_INBAND, 0, revoke_pkt, sizeof(nvsp_msg), - (uint64_t)(uintptr_t)revoke_pkt); - + (uint64_t)(uintptr_t)&hn_send_ctx_none); /* * If we failed here, we might as well return and have a leak * rather than continue and a bugchk @@ -414,9 +423,8 @@ hv_nv_destroy_send_buffer(netvsc_dev *net_dev) NETVSC_SEND_BUFFER_ID; ret = vmbus_chan_send(net_dev->sc->hn_prichan, - VMBUS_CHANPKT_TYPE_INBAND, 0, - revoke_pkt, sizeof(nvsp_msg), - (uint64_t)(uintptr_t)revoke_pkt); + VMBUS_CHANPKT_TYPE_INBAND, 0, revoke_pkt, sizeof(nvsp_msg), + (uint64_t)(uintptr_t)&hn_send_ctx_none); /* * If we failed here, we might as well return and have a leak * rather than continue and a bugchk @@ -466,6 +474,7 @@ static int hv_nv_negotiate_nvsp_protocol(struct hn_softc *sc, netvsc_dev *net_dev, uint32_t nvsp_ver) { + struct hn_send_ctx sndc; nvsp_msg *init_pkt; int ret; @@ -480,9 +489,10 @@ hv_nv_negotiate_nvsp_protocol(struct hn_softc *sc, netvsc_dev *net_dev, init_pkt->msgs.init_msgs.init.protocol_version_2 = nvsp_ver; /* Send the init request */ + hn_send_ctx_init_simple(&sndc, hn_nvs_sent_wakeup, NULL); ret = vmbus_chan_send(sc->hn_prichan, VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC, - init_pkt, sizeof(nvsp_msg), (uint64_t)(uintptr_t)init_pkt); + init_pkt, sizeof(nvsp_msg), (uint64_t)(uintptr_t)&sndc); if (ret != 0) return (-1); @@ -524,7 +534,7 @@ hv_nv_send_ndis_config(struct hn_softc *sc, uint32_t mtu) /* Send the configuration packet */ ret = vmbus_chan_send(sc->hn_prichan, VMBUS_CHANPKT_TYPE_INBAND, 0, - init_pkt, sizeof(nvsp_msg), (uint64_t)(uintptr_t)init_pkt); + init_pkt, sizeof(nvsp_msg), (uint64_t)(uintptr_t)&hn_send_ctx_none); if (ret != 0) return (-EINVAL); @@ -602,7 +612,7 @@ hv_nv_connect_to_vsp(struct hn_softc *sc) /* Send the init request */ ret = vmbus_chan_send(sc->hn_prichan, VMBUS_CHANPKT_TYPE_INBAND, 0, - init_pkt, sizeof(nvsp_msg), (uint64_t)(uintptr_t)init_pkt); + init_pkt, sizeof(nvsp_msg), (uint64_t)(uintptr_t)&hn_send_ctx_none); if (ret != 0) { goto cleanup; } @@ -731,6 +741,43 @@ hv_nv_on_device_remove(struct hn_softc *sc, boolean_t destroy_channel) return (0); } +void +hn_nvs_sent_wakeup(struct hn_send_ctx *sndc __unused, + struct netvsc_dev_ *net_dev, struct vmbus_channel *chan __unused, + const struct nvsp_msg_ *msg) +{ + /* Copy the response back */ + memcpy(&net_dev->channel_init_packet, msg, sizeof(nvsp_msg)); + sema_post(&net_dev->channel_init_sema); +} + +static void +hn_nvs_sent_none(struct hn_send_ctx *sndc __unused, + struct netvsc_dev_ *net_dev __unused, struct vmbus_channel *chan __unused, + const struct nvsp_msg_ *msg __unused) +{ + /* EMPTY */ +} + +void +hn_chim_free(struct netvsc_dev_ *net_dev, uint32_t chim_idx) +{ + u_long mask; + uint32_t idx; + + idx = chim_idx / BITS_PER_LONG; + KASSERT(idx < net_dev->bitsmap_words, + ("invalid chimney index 0x%x", chim_idx)); + + mask = 1UL << (chim_idx % BITS_PER_LONG); + KASSERT(net_dev->send_section_bitsmap[idx] & mask, + ("index bitmap 0x%lx, chimney index %u, " + "bitmap idx %d, bitmask 0x%lx", + net_dev->send_section_bitsmap[idx], chim_idx, idx, mask)); + + atomic_clear_long(&net_dev->send_section_bitsmap[idx], mask); +} + /* * Net VSC on send completion */ @@ -738,59 +785,15 @@ static void hv_nv_on_send_completion(netvsc_dev *net_dev, struct vmbus_channel *chan, const struct vmbus_chanpkt_hdr *pkt) { - const nvsp_msg *nvsp_msg_pkt; - netvsc_packet *net_vsc_pkt; + struct hn_send_ctx *sndc; - nvsp_msg_pkt = VMBUS_CHANPKT_CONST_DATA(pkt); - - if (nvsp_msg_pkt->hdr.msg_type == nvsp_msg_type_init_complete - || nvsp_msg_pkt->hdr.msg_type - == nvsp_msg_1_type_send_rx_buf_complete - || nvsp_msg_pkt->hdr.msg_type - == nvsp_msg_1_type_send_send_buf_complete - || nvsp_msg_pkt->hdr.msg_type - == nvsp_msg5_type_subchannel) { - /* Copy the response back */ - memcpy(&net_dev->channel_init_packet, nvsp_msg_pkt, - sizeof(nvsp_msg)); - sema_post(&net_dev->channel_init_sema); - } else if (nvsp_msg_pkt->hdr.msg_type == - nvsp_msg_1_type_send_rndis_pkt_complete) { - /* Get the send context */ - net_vsc_pkt = - (netvsc_packet *)(unsigned long)pkt->cph_xactid; - if (NULL != net_vsc_pkt) { - if (net_vsc_pkt->send_buf_section_idx != - NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX) { - u_long mask; - int idx; - - idx = net_vsc_pkt->send_buf_section_idx / - BITS_PER_LONG; - KASSERT(idx < net_dev->bitsmap_words, - ("invalid section index %u", - net_vsc_pkt->send_buf_section_idx)); - mask = 1UL << - (net_vsc_pkt->send_buf_section_idx % - BITS_PER_LONG); - - KASSERT(net_dev->send_section_bitsmap[idx] & - mask, - ("index bitmap 0x%lx, section index %u, " - "bitmap idx %d, bitmask 0x%lx", - net_dev->send_section_bitsmap[idx], - net_vsc_pkt->send_buf_section_idx, - idx, mask)); - atomic_clear_long( - &net_dev->send_section_bitsmap[idx], mask); - } - - /* Notify the layer above us */ - net_vsc_pkt->compl.send.on_send_completion(chan, - net_vsc_pkt->compl.send.send_completion_context); - - } - } + sndc = (struct hn_send_ctx *)(uintptr_t)pkt->cph_xactid; + sndc->hn_cb(sndc, net_dev, chan, VMBUS_CHANPKT_CONST_DATA(pkt)); + /* + * NOTE: + * 'sndc' CAN NOT be accessed anymore, since it can be freed by + * its callback. + */ } /* @@ -799,14 +802,14 @@ hv_nv_on_send_completion(netvsc_dev *net_dev, struct vmbus_channel *chan, * Returns 0 on success, non-zero on failure. */ int -hv_nv_on_send(struct vmbus_channel *chan, - netvsc_packet *pkt, struct vmbus_gpa *gpa, int gpa_cnt) +hv_nv_on_send(struct vmbus_channel *chan, bool is_data_pkt, + struct hn_send_ctx *sndc, struct vmbus_gpa *gpa, int gpa_cnt) { nvsp_msg send_msg; int ret; send_msg.hdr.msg_type = nvsp_msg_1_type_send_rndis_pkt; - if (pkt->is_data_pkt) { + if (is_data_pkt) { /* 0 is RMC_DATA */ send_msg.msgs.vers_1_msgs.send_rndis_pkt.chan_type = 0; } else { @@ -815,17 +818,17 @@ hv_nv_on_send(struct vmbus_channel *chan, } send_msg.msgs.vers_1_msgs.send_rndis_pkt.send_buf_section_idx = - pkt->send_buf_section_idx; + sndc->hn_chim_idx; send_msg.msgs.vers_1_msgs.send_rndis_pkt.send_buf_section_size = - pkt->send_buf_section_size; + sndc->hn_chim_sz; if (gpa_cnt) { ret = vmbus_chan_send_sglist(chan, gpa, gpa_cnt, - &send_msg, sizeof(nvsp_msg), (uint64_t)(uintptr_t)pkt); + &send_msg, sizeof(nvsp_msg), (uint64_t)(uintptr_t)sndc); } else { ret = vmbus_chan_send(chan, VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC, - &send_msg, sizeof(nvsp_msg), (uint64_t)(uintptr_t)pkt); + &send_msg, sizeof(nvsp_msg), (uint64_t)(uintptr_t)sndc); } return (ret); diff --git a/sys/dev/hyperv/netvsc/hv_net_vsc.h b/sys/dev/hyperv/netvsc/hv_net_vsc.h index e1e1b1634d52..81eb0416052c 100644 --- a/sys/dev/hyperv/netvsc/hv_net_vsc.h +++ b/sys/dev/hyperv/netvsc/hv_net_vsc.h @@ -1112,29 +1112,8 @@ typedef void (*pfn_on_send_rx_completion)(struct vmbus_channel *, void *); #endif typedef struct netvsc_packet_ { - uint8_t is_data_pkt; /* One byte */ - uint16_t vlan_tci; - uint32_t status; - - /* Completion */ - union { - struct { - uint64_t rx_completion_tid; - void *rx_completion_context; - /* This is no longer used */ - pfn_on_send_rx_completion on_rx_completion; - } rx; - struct { - uint64_t send_completion_tid; - void *send_completion_context; - /* Still used in netvsc and filter code */ - pfn_on_send_rx_completion on_send_completion; - } send; - } compl; - uint32_t send_buf_section_idx; - uint32_t send_buf_section_size; - - void *rndis_mesg; + uint16_t vlan_tci; + uint32_t status; uint32_t tot_data_buf_len; void *data; } netvsc_packet; @@ -1270,14 +1249,15 @@ typedef struct hn_softc { * Externs */ extern int hv_promisc_mode; +struct hn_send_ctx; void netvsc_linkstatus_callback(struct hn_softc *sc, uint32_t status); netvsc_dev *hv_nv_on_device_add(struct hn_softc *sc, void *additional_info, struct hn_rx_ring *rxr); int hv_nv_on_device_remove(struct hn_softc *sc, boolean_t destroy_channel); -int hv_nv_on_send(struct vmbus_channel *chan, netvsc_packet *pkt, - struct vmbus_gpa *gpa, int gpa_cnt); +int hv_nv_on_send(struct vmbus_channel *chan, bool is_data_pkt, + struct hn_send_ctx *sndc, struct vmbus_gpa *gpa, int gpa_cnt); int hv_nv_get_next_send_section(netvsc_dev *net_dev); void hv_nv_subchan_attach(struct vmbus_channel *chan, struct hn_rx_ring *rxr); diff --git a/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c b/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c index 38390d702c18..b3dc6d100f39 100644 --- a/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c +++ b/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c @@ -166,7 +166,7 @@ struct hn_txdesc { struct hn_tx_ring *txr; int refs; uint32_t flags; /* HN_TXD_FLAG_ */ - netvsc_packet netvsc_pkt; /* XXX to be removed */ + struct hn_send_ctx send_ctx; bus_dmamap_t data_dmap; @@ -781,14 +781,14 @@ hn_txeof(struct hn_tx_ring *txr) } static void -hn_tx_done(struct vmbus_channel *chan, void *xpkt) +hn_tx_done(struct hn_send_ctx *sndc, struct netvsc_dev_ *net_dev, + struct vmbus_channel *chan, const struct nvsp_msg_ *msg __unused) { - netvsc_packet *packet = xpkt; - struct hn_txdesc *txd; + struct hn_txdesc *txd = sndc->hn_cbarg; struct hn_tx_ring *txr; - txd = (struct hn_txdesc *)(uintptr_t) - packet->compl.send.send_completion_tid; + if (sndc->hn_chim_idx != NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX) + hn_chim_free(net_dev, sndc->hn_chim_idx); txr = txd->txr; KASSERT(txr->hn_chan == chan, @@ -835,16 +835,14 @@ hn_encap(struct hn_tx_ring *txr, struct hn_txdesc *txd, struct mbuf **m_head0) bus_dma_segment_t segs[HN_TX_DATA_SEGCNT_MAX]; int error, nsegs, i; struct mbuf *m_head = *m_head0; - netvsc_packet *packet; rndis_msg *rndis_mesg; rndis_packet *rndis_pkt; rndis_per_packet_info *rppi; struct rndis_hash_value *hash_value; - uint32_t rndis_msg_size; + uint32_t rndis_msg_size, tot_data_buf_len, send_buf_section_idx; + int send_buf_section_size; - packet = &txd->netvsc_pkt; - packet->is_data_pkt = TRUE; - packet->tot_data_buf_len = m_head->m_pkthdr.len; + tot_data_buf_len = m_head->m_pkthdr.len; /* * extension points to the area reserved for the @@ -859,7 +857,7 @@ hn_encap(struct hn_tx_ring *txr, struct hn_txdesc *txd, struct mbuf **m_head0) rndis_pkt = &rndis_mesg->msg.packet; rndis_pkt->data_offset = sizeof(rndis_packet); - rndis_pkt->data_length = packet->tot_data_buf_len; + rndis_pkt->data_length = tot_data_buf_len; rndis_pkt->per_pkt_info_offset = sizeof(rndis_packet); rndis_msg_size = RNDIS_MESSAGE_SIZE(rndis_packet); @@ -967,15 +965,14 @@ hn_encap(struct hn_tx_ring *txr, struct hn_txdesc *txd, struct mbuf **m_head0) } } - rndis_mesg->msg_len = packet->tot_data_buf_len + rndis_msg_size; - packet->tot_data_buf_len = rndis_mesg->msg_len; + rndis_mesg->msg_len = tot_data_buf_len + rndis_msg_size; + tot_data_buf_len = rndis_mesg->msg_len; /* * Chimney send, if the packet could fit into one chimney buffer. */ - if (packet->tot_data_buf_len < txr->hn_tx_chimney_size) { + if (tot_data_buf_len < txr->hn_tx_chimney_size) { netvsc_dev *net_dev = txr->hn_sc->net_dev; - uint32_t send_buf_section_idx; txr->hn_tx_chimney_tried++; send_buf_section_idx = @@ -990,9 +987,7 @@ hn_encap(struct hn_tx_ring *txr, struct hn_txdesc *txd, struct mbuf **m_head0) dest += rndis_msg_size; m_copydata(m_head, 0, m_head->m_pkthdr.len, dest); - packet->send_buf_section_idx = send_buf_section_idx; - packet->send_buf_section_size = - packet->tot_data_buf_len; + send_buf_section_size = tot_data_buf_len; txr->hn_gpa_cnt = 0; txr->hn_tx_chimney++; goto done; @@ -1039,16 +1034,14 @@ hn_encap(struct hn_tx_ring *txr, struct hn_txdesc *txd, struct mbuf **m_head0) gpa->gpa_len = segs[i].ds_len; } - packet->send_buf_section_idx = - NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX; - packet->send_buf_section_size = 0; + send_buf_section_idx = NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX; + send_buf_section_size = 0; done: txd->m = m_head; /* Set the completion routine */ - packet->compl.send.on_send_completion = hn_tx_done; - packet->compl.send.send_completion_context = packet; - packet->compl.send.send_completion_tid = (uint64_t)(uintptr_t)txd; + hn_send_ctx_init(&txd->send_ctx, hn_tx_done, txd, + send_buf_section_idx, send_buf_section_size); return 0; } @@ -1068,7 +1061,7 @@ hn_send_pkt(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd) * Make sure that txd is not freed before ETHER_BPF_MTAP. */ hn_txdesc_hold(txd); - error = hv_nv_on_send(txr->hn_chan, &txd->netvsc_pkt, + error = hv_nv_on_send(txr->hn_chan, true, &txd->send_ctx, txr->hn_gpa, txr->hn_gpa_cnt); if (!error) { ETHER_BPF_MTAP(ifp, txd->m); diff --git a/sys/dev/hyperv/netvsc/hv_rndis_filter.c b/sys/dev/hyperv/netvsc/hv_rndis_filter.c index 22acfd61031d..e7e74b6e36e3 100644 --- a/sys/dev/hyperv/netvsc/hv_rndis_filter.c +++ b/sys/dev/hyperv/netvsc/hv_rndis_filter.c @@ -85,11 +85,17 @@ static int hv_rf_set_packet_filter(rndis_device *device, uint32_t new_filter); static int hv_rf_init_device(rndis_device *device); static int hv_rf_open_device(rndis_device *device); static int hv_rf_close_device(rndis_device *device); -static void hv_rf_on_send_request_completion(struct vmbus_channel *, void *context); -static void hv_rf_on_send_request_halt_completion(struct vmbus_channel *, void *context); int hv_rf_send_offload_request(struct hn_softc *sc, rndis_offload_params *offloads); + +static void hn_rndis_sent_halt(struct hn_send_ctx *sndc, + struct netvsc_dev_ *net_dev, struct vmbus_channel *chan, + const struct nvsp_msg_ *msg); +static void hn_rndis_sent_cb(struct hn_send_ctx *sndc, + struct netvsc_dev_ *net_dev, struct vmbus_channel *chan, + const struct nvsp_msg_ *msg); + /* * Set the Per-Packet-Info with the specified type */ @@ -238,17 +244,14 @@ static int hv_rf_send_request(rndis_device *device, rndis_request *request, uint32_t message_type) { - netvsc_packet *packet; netvsc_dev *net_dev = device->net_dev; - int send_buf_section_idx; + uint32_t send_buf_section_idx, tot_data_buf_len; struct vmbus_gpa gpa[2]; - int gpa_cnt; + int gpa_cnt, send_buf_section_size; + hn_sent_callback_t cb; /* Set up the packet to send it */ - packet = &request->pkt; - - packet->is_data_pkt = FALSE; - packet->tot_data_buf_len = request->request_msg.msg_len; + tot_data_buf_len = request->request_msg.msg_len; gpa_cnt = 1; gpa[0].gpa_page = hv_get_phys_addr(&request->request_msg) >> PAGE_SHIFT; @@ -265,16 +268,12 @@ hv_rf_send_request(rndis_device *device, rndis_request *request, gpa[1].gpa_len = request->request_msg.msg_len - gpa[0].gpa_len; } - packet->compl.send.send_completion_context = request; /* packet */ - if (message_type != REMOTE_NDIS_HALT_MSG) { - packet->compl.send.on_send_completion = - hv_rf_on_send_request_completion; - } else { - packet->compl.send.on_send_completion = - hv_rf_on_send_request_halt_completion; - } - packet->compl.send.send_completion_tid = (unsigned long)device; - if (packet->tot_data_buf_len < net_dev->send_section_size) { + if (message_type != REMOTE_NDIS_HALT_MSG) + cb = hn_rndis_sent_cb; + else + cb = hn_rndis_sent_halt; + + if (tot_data_buf_len < net_dev->send_section_size) { send_buf_section_idx = hv_nv_get_next_send_section(net_dev); if (send_buf_section_idx != NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX) { @@ -282,19 +281,20 @@ hv_rf_send_request(rndis_device *device, rndis_request *request, send_buf_section_idx * net_dev->send_section_size); memcpy(dest, &request->request_msg, request->request_msg.msg_len); - packet->send_buf_section_idx = send_buf_section_idx; - packet->send_buf_section_size = packet->tot_data_buf_len; + send_buf_section_size = tot_data_buf_len; gpa_cnt = 0; goto sendit; } /* Failed to allocate chimney send buffer; move on */ } - packet->send_buf_section_idx = NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX; - packet->send_buf_section_size = 0; + send_buf_section_idx = NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX; + send_buf_section_size = 0; sendit: - return hv_nv_on_send(device->net_dev->sc->hn_prichan, packet, - gpa, gpa_cnt); + hn_send_ctx_init(&request->send_ctx, cb, request, + send_buf_section_idx, send_buf_section_size); + return hv_nv_on_send(device->net_dev->sc->hn_prichan, false, + &request->send_ctx, gpa, gpa_cnt); } /* @@ -1056,6 +1056,7 @@ int hv_rf_on_device_add(struct hn_softc *sc, void *additl_info, int nchan, struct hn_rx_ring *rxr) { + struct hn_send_ctx sndc; int ret; netvsc_dev *net_dev; rndis_device *rndis_dev; @@ -1162,9 +1163,10 @@ hv_rf_on_device_add(struct hn_softc *sc, void *additl_info, init_pkt->msgs.vers_5_msgs.subchannel_request.num_subchannels = net_dev->num_channel - 1; + hn_send_ctx_init_simple(&sndc, hn_nvs_sent_wakeup, NULL); ret = vmbus_chan_send(sc->hn_prichan, VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC, - init_pkt, sizeof(nvsp_msg), (uint64_t)(uintptr_t)init_pkt); + init_pkt, sizeof(nvsp_msg), (uint64_t)(uintptr_t)&sndc); if (ret != 0) { device_printf(dev, "Fail to allocate subchannel\n"); goto out; @@ -1235,23 +1237,22 @@ hv_rf_on_close(struct hn_softc *sc) return (hv_rf_close_device((rndis_device *)net_dev->extension)); } -/* - * RNDIS filter on send request completion callback - */ -static void -hv_rf_on_send_request_completion(struct vmbus_channel *chan __unused, - void *context __unused) +static void +hn_rndis_sent_cb(struct hn_send_ctx *sndc, struct netvsc_dev_ *net_dev, + struct vmbus_channel *chan __unused, const struct nvsp_msg_ *msg __unused) { + if (sndc->hn_chim_idx != NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX) + hn_chim_free(net_dev, sndc->hn_chim_idx); } -/* - * RNDIS filter on send request (halt only) completion callback - */ -static void -hv_rf_on_send_request_halt_completion(struct vmbus_channel *chan __unused, - void *context) +static void +hn_rndis_sent_halt(struct hn_send_ctx *sndc, struct netvsc_dev_ *net_dev, + struct vmbus_channel *chan __unused, const struct nvsp_msg_ *msg __unused) { - rndis_request *request = context; + rndis_request *request = sndc->hn_cbarg; + + if (sndc->hn_chim_idx != NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX) + hn_chim_free(net_dev, sndc->hn_chim_idx); /* * Notify hv_rf_halt_device() about halt completion. diff --git a/sys/dev/hyperv/netvsc/hv_rndis_filter.h b/sys/dev/hyperv/netvsc/hv_rndis_filter.h index 2f940db33e89..ebfda20b94db 100644 --- a/sys/dev/hyperv/netvsc/hv_rndis_filter.h +++ b/sys/dev/hyperv/netvsc/hv_rndis_filter.h @@ -33,6 +33,7 @@ #include #include +#include /* * Defines @@ -75,7 +76,7 @@ typedef struct rndis_request_ { uint8_t buf_resp[PAGE_SIZE]; /* Simplify allocation by having a netvsc packet inline */ - netvsc_packet pkt; + struct hn_send_ctx send_ctx; /* * The max request size is sizeof(rndis_msg) + PAGE_SIZE. diff --git a/sys/dev/hyperv/netvsc/if_hnvar.h b/sys/dev/hyperv/netvsc/if_hnvar.h new file mode 100644 index 000000000000..6e5fb5090284 --- /dev/null +++ b/sys/dev/hyperv/netvsc/if_hnvar.h @@ -0,0 +1,83 @@ +/*- + * Copyright (c) 2016 Microsoft Corp. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _IF_HNVAR_H_ +#define _IF_HNVAR_H_ + +#include +#include + +struct netvsc_dev_; +struct nvsp_msg_; + +struct vmbus_channel; +struct hn_send_ctx; + +typedef void (*hn_sent_callback_t) + (struct hn_send_ctx *, struct netvsc_dev_ *, + struct vmbus_channel *, const struct nvsp_msg_ *); + +struct hn_send_ctx { + hn_sent_callback_t hn_cb; + void *hn_cbarg; + uint32_t hn_chim_idx; + int hn_chim_sz; +}; + +#define HN_SEND_CTX_INITIALIZER(cb, cbarg) \ +{ \ + .hn_cb = cb, \ + .hn_cbarg = cbarg, \ + .hn_chim_idx = NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX, \ + .hn_chim_sz = 0 \ +} + +static __inline void +hn_send_ctx_init(struct hn_send_ctx *sndc, hn_sent_callback_t cb, + void *cbarg, uint32_t chim_idx, int chim_sz) +{ + sndc->hn_cb = cb; + sndc->hn_cbarg = cbarg; + sndc->hn_chim_idx = chim_idx; + sndc->hn_chim_sz = chim_sz; +} + +static __inline void +hn_send_ctx_init_simple(struct hn_send_ctx *sndc, hn_sent_callback_t cb, + void *cbarg) +{ + hn_send_ctx_init(sndc, cb, cbarg, + NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX, 0); +} + +void hn_nvs_sent_wakeup(struct hn_send_ctx *sndc, + struct netvsc_dev_ *net_dev, struct vmbus_channel *chan, + const struct nvsp_msg_ *msg); +void hn_chim_free(struct netvsc_dev_ *net_dev, uint32_t chim_idx); + +#endif /* !_IF_HNVAR_H_ */ From 4088f71f9b8ace31d6c8d64bd4b2e83bce4ff00f Mon Sep 17 00:00:00 2001 From: Andrew Turner Date: Wed, 10 Aug 2016 10:13:34 +0000 Subject: [PATCH 03/76] Implement pmap_align_superpage on arm64 based on the amd64 implementation. This will be needed when superpage support is added. Obtained from: ABT Systems Ltd MFC after: 1 month Sponsored by: The FreeBSD Foundation --- sys/arm64/arm64/pmap.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c index a451cb176e3a..aa56e50c4b3b 100644 --- a/sys/arm64/arm64/pmap.c +++ b/sys/arm64/arm64/pmap.c @@ -3490,6 +3490,20 @@ void pmap_align_superpage(vm_object_t object, vm_ooffset_t offset, vm_offset_t *addr, vm_size_t size) { + vm_offset_t superpage_offset; + + if (size < L2_SIZE) + return; + if (object != NULL && (object->flags & OBJ_COLORED) != 0) + offset += ptoa(object->pg_color); + superpage_offset = offset & L2_OFFSET; + if (size - ((L2_SIZE - superpage_offset) & L2_OFFSET) < L2_SIZE || + (*addr & L2_OFFSET) == superpage_offset) + return; + if ((*addr & L2_OFFSET) < superpage_offset) + *addr = (*addr & ~L2_OFFSET) + superpage_offset; + else + *addr = ((*addr + L2_OFFSET) & ~L2_OFFSET) + superpage_offset; } /** From 4cf6e978b49a6179fa71fccc7e7b6fce9ab21dd7 Mon Sep 17 00:00:00 2001 From: Andrew Turner Date: Wed, 10 Aug 2016 10:36:11 +0000 Subject: [PATCH 04/76] Uncomment the vm.kvm_size and vm.kvm_free sysctls. These work as expected so there is no reason to leave them commented out. Obtained from: ABT Systems Ltd MFC after: 1 month Sponsored by: The FreeBSD Foundation --- sys/arm64/arm64/pmap.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c index aa56e50c4b3b..4ed131e54859 100644 --- a/sys/arm64/arm64/pmap.c +++ b/sys/arm64/arm64/pmap.c @@ -1574,7 +1574,6 @@ pmap_release(pmap_t pmap) vm_page_free_zero(m); } -#if 0 static int kvm_size(SYSCTL_HANDLER_ARGS) { @@ -1594,7 +1593,6 @@ kvm_free(SYSCTL_HANDLER_ARGS) } SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 0, 0, kvm_free, "LU", "Amount of KVM free"); -#endif /* 0 */ /* * grow the number of kernel page table entries, if needed From 5f8228b2f379773efef8123a36d3de40af95d27a Mon Sep 17 00:00:00 2001 From: Ruslan Bukin Date: Wed, 10 Aug 2016 12:41:36 +0000 Subject: [PATCH 05/76] o Remove operation in machine mode. Machine privilege level was specially designed to use in vendor's firmware or bootloader. We have implemented operation in machine mode in FreeBSD as part of understanding RISC-V ISA, but it is time to remove it. We now use BBL (Berkeley Boot Loader) -- standard RISC-V firmware, which provides operation in machine mode for us. We now use standard SBI calls to machine mode, instead of handmade 'syscalls'. o Remove HTIF bus. HTIF bus is now legacy and no longer exists in RISC-V specification. HTIF code still exists in Spike simulator, but BBL do not provide raw interface to it. Memory disk is only choice for now to have multiuser booted in Spike, until Spike has implemented more devices (e.g. Virtio, etc). Sponsored by: DARPA, AFRL Sponsored by: HEIF5 --- sys/boot/fdt/dts/riscv/qemu.dts | 12 +- sys/boot/fdt/dts/riscv/rocket.dts | 12 +- sys/boot/fdt/dts/riscv/spike.dts | 14 +- sys/conf/files.riscv | 6 +- sys/conf/ldscript.riscv | 2 +- sys/riscv/conf/GENERIC | 2 +- sys/riscv/conf/QEMU | 4 +- sys/riscv/conf/ROCKET | 4 +- sys/riscv/conf/SPIKE | 4 +- sys/riscv/htif/htif.c | 278 ------------- sys/riscv/htif/htif_block.c | 299 -------------- sys/riscv/include/cpufunc.h | 15 - sys/riscv/include/pcpu.h | 3 +- sys/riscv/include/riscvreg.h | 16 - sys/riscv/{htif/htif.h => include/sbi.h} | 76 ++-- sys/riscv/include/vmparam.h | 23 +- sys/riscv/riscv/exception.S | 385 ------------------ sys/riscv/riscv/genassym.c | 1 - sys/riscv/riscv/identcpu.c | 5 +- sys/riscv/riscv/intr_machdep.c | 3 +- sys/riscv/riscv/locore.S | 276 +++++-------- sys/riscv/riscv/machdep.c | 18 +- sys/riscv/riscv/mp_machdep.c | 12 +- sys/riscv/riscv/pmap.c | 39 +- .../htif_console.c => riscv/riscv_console.c} | 176 ++++---- sys/riscv/riscv/sbi.S | 52 +++ sys/riscv/riscv/timer.c | 3 +- sys/riscv/riscv/vm_machdep.c | 7 +- 28 files changed, 362 insertions(+), 1385 deletions(-) delete mode 100644 sys/riscv/htif/htif.c delete mode 100644 sys/riscv/htif/htif_block.c rename sys/riscv/{htif/htif.h => include/sbi.h} (53%) rename sys/riscv/{htif/htif_console.c => riscv/riscv_console.c} (71%) create mode 100644 sys/riscv/riscv/sbi.S diff --git a/sys/boot/fdt/dts/riscv/qemu.dts b/sys/boot/fdt/dts/riscv/qemu.dts index 067e18fcea8c..8da1ae5b663f 100644 --- a/sys/boot/fdt/dts/riscv/qemu.dts +++ b/sys/boot/fdt/dts/riscv/qemu.dts @@ -72,15 +72,11 @@ clock-frequency = < 400000000 >; }; - htif0: htif@0 { - compatible = "riscv,htif"; - interrupts = < 0 >; + console0: console@0 { + compatible = "riscv,console"; + status = "okay"; + interrupts = < 1 >; interrupt-parent = < &pic0 >; - - console0: console@0 { - compatible = "htif,console"; - status = "okay"; - }; }; }; diff --git a/sys/boot/fdt/dts/riscv/rocket.dts b/sys/boot/fdt/dts/riscv/rocket.dts index b8c139fd67f7..8ce718d8295f 100644 --- a/sys/boot/fdt/dts/riscv/rocket.dts +++ b/sys/boot/fdt/dts/riscv/rocket.dts @@ -83,15 +83,11 @@ clock-frequency = < 1000000 >; }; - htif0: htif@0 { - compatible = "riscv,htif"; - interrupts = < 0 >; + console0: console@0 { + compatible = "riscv,console"; + status = "okay"; + interrupts = < 1 >; interrupt-parent = < &pic0 >; - - console0: console@0 { - compatible = "htif,console"; - status = "okay"; - }; }; }; diff --git a/sys/boot/fdt/dts/riscv/spike.dts b/sys/boot/fdt/dts/riscv/spike.dts index 27d68f034c3b..dfe27f349351 100644 --- a/sys/boot/fdt/dts/riscv/spike.dts +++ b/sys/boot/fdt/dts/riscv/spike.dts @@ -65,6 +65,10 @@ }; memory { + /* + * This is not used currently. + * We take information from sbi_query_memory. + */ device_type = "memory"; reg = <0x80000000 0x40000000>; /* 1GB at 0x80000000 */ }; @@ -90,15 +94,11 @@ clock-frequency = < 1000000 >; }; - htif0: htif@0 { - compatible = "riscv,htif"; + console0: console@0 { + compatible = "riscv,console"; + status = "okay"; interrupts = < 1 >; interrupt-parent = < &pic0 >; - - console0: console@0 { - compatible = "htif,console"; - status = "okay"; - }; }; }; diff --git a/sys/conf/files.riscv b/sys/conf/files.riscv index fe30078f183f..e6908bed8ae2 100644 --- a/sys/conf/files.riscv +++ b/sys/conf/files.riscv @@ -19,9 +19,6 @@ libkern/flsl.c standard libkern/flsll.c standard libkern/memmove.c standard libkern/memset.c standard -riscv/htif/htif.c optional htif -riscv/htif/htif_block.c optional htif -riscv/htif/htif_console.c optional htif riscv/riscv/autoconf.c standard riscv/riscv/bcopy.c standard riscv/riscv/bus_machdep.c standard @@ -36,6 +33,7 @@ riscv/riscv/db_interface.c optional ddb riscv/riscv/db_trace.c optional ddb riscv/riscv/dump_machdep.c standard riscv/riscv/elf_machdep.c standard +riscv/riscv/exception.S standard riscv/riscv/intr_machdep.c standard riscv/riscv/in_cksum.c optional inet | inet6 riscv/riscv/identcpu.c standard @@ -47,6 +45,8 @@ riscv/riscv/mem.c standard riscv/riscv/nexus.c standard riscv/riscv/ofw_machdep.c optional fdt riscv/riscv/pmap.c standard +riscv/riscv/riscv_console.c optional rcons +riscv/riscv/sbi.S standard riscv/riscv/stack_machdep.c optional ddb | stack riscv/riscv/support.S standard riscv/riscv/swtch.S standard diff --git a/sys/conf/ldscript.riscv b/sys/conf/ldscript.riscv index 152b97eca30d..5fe32aea1782 100644 --- a/sys/conf/ldscript.riscv +++ b/sys/conf/ldscript.riscv @@ -6,7 +6,7 @@ SEARCH_DIR(/usr/lib); SECTIONS { /* Read-only sections, merged into text segment: */ - . = kernbase + 0x80000000 /* KERNENTRY */; + . = kernbase; .text : AT(ADDR(.text) - kernbase) { *(.text) diff --git a/sys/riscv/conf/GENERIC b/sys/riscv/conf/GENERIC index dd73c3a352ac..6c53da2a051c 100644 --- a/sys/riscv/conf/GENERIC +++ b/sys/riscv/conf/GENERIC @@ -76,7 +76,7 @@ options SMP # Uncomment for memory disk # options MD_ROOT -# options MD_ROOT_SIZE=8192 # 8MB ram disk +# options MD_ROOT_SIZE=32768 # 32MB ram disk # makeoptions MFS_IMAGE=/path/to/img # options ROOTDEVNAME=\"ufs:/dev/md0\" diff --git a/sys/riscv/conf/QEMU b/sys/riscv/conf/QEMU index 26c65430844f..7411aefc6b1d 100644 --- a/sys/riscv/conf/QEMU +++ b/sys/riscv/conf/QEMU @@ -21,8 +21,8 @@ include GENERIC ident QEMU -device htif -options ROOTDEVNAME=\"ufs:/dev/htif_blk0\" +device rcons +options ROOTDEVNAME=\"ufs:/dev/md0\" # RISCVTODO: This needs to be done via loader (when it's available). options FDT_DTB_STATIC diff --git a/sys/riscv/conf/ROCKET b/sys/riscv/conf/ROCKET index 9a9e3efe46e3..1b2b1c08b8dd 100644 --- a/sys/riscv/conf/ROCKET +++ b/sys/riscv/conf/ROCKET @@ -21,8 +21,8 @@ include GENERIC ident ROCKET -device htif -options ROOTDEVNAME=\"ufs:/dev/htif_blk0\" +device rcons +options ROOTDEVNAME=\"ufs:/dev/md0\" # RISCVTODO: This needs to be done via loader (when it's available). options FDT_DTB_STATIC diff --git a/sys/riscv/conf/SPIKE b/sys/riscv/conf/SPIKE index 6c4a7434bb4c..cc36e6f97b00 100644 --- a/sys/riscv/conf/SPIKE +++ b/sys/riscv/conf/SPIKE @@ -21,8 +21,8 @@ include GENERIC ident SPIKE -device htif -options ROOTDEVNAME=\"ufs:/dev/htif_blk0\" +device rcons +options ROOTDEVNAME=\"ufs:/dev/md0\" # RISCVTODO: This needs to be done via loader (when it's available). options FDT_DTB_STATIC diff --git a/sys/riscv/htif/htif.c b/sys/riscv/htif/htif.c deleted file mode 100644 index 9a42db2c427e..000000000000 --- a/sys/riscv/htif/htif.c +++ /dev/null @@ -1,278 +0,0 @@ -/*- - * Copyright (c) 2015-2016 Ruslan Bukin - * All rights reserved. - * - * Portions of this software were developed by SRI International and the - * University of Cambridge Computer Laboratory under DARPA/AFRL contract - * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme. - * - * Portions of this software were developed by the University of Cambridge - * Computer Laboratory as part of the CTSRD Project, with support from the - * UK Higher Education Innovation Fund (HEIF). - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include -__FBSDID("$FreeBSD$"); - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -#include "htif.h" - -static struct resource_spec htif_spec[] = { - { SYS_RES_IRQ, 0, RF_ACTIVE | RF_SHAREABLE}, - { -1, 0 } -}; - -struct intr_entry { - void (*func) (void *, uint64_t); - void *arg; -}; - -struct intr_entry intrs[HTIF_NDEV]; - -uint64_t -htif_command(uint64_t arg) -{ - - return (machine_command(ECALL_HTIF_CMD, arg)); -} - -int -htif_setup_intr(int id, void *func, void *arg) -{ - - if (id >= HTIF_NDEV) - return (-1); - - intrs[id].func = func; - intrs[id].arg = arg; - - return (0); -} - -static void -htif_handle_entry(struct htif_softc *sc) -{ - uint64_t entry; - uint8_t devcmd; - uint8_t devid; - - entry = machine_command(ECALL_HTIF_GET_ENTRY, 0); - while (entry) { - devid = HTIF_DEV_ID(entry); - devcmd = HTIF_DEV_CMD(entry); - - if (devcmd == HTIF_CMD_IDENTIFY) { - /* Enumeration interrupt */ - if (devid == sc->identify_id) - sc->identify_done = 1; - } else { - /* Device interrupt */ - if (intrs[devid].func != NULL) - intrs[devid].func(intrs[devid].arg, entry); - } - - entry = machine_command(ECALL_HTIF_GET_ENTRY, 0); - } -} - -static int -htif_intr(void *arg) -{ - struct htif_softc *sc; - - sc = arg; - - csr_clear(sip, SIP_SSIP); - - htif_handle_entry(sc); - - return (FILTER_HANDLED); -} - -static int -htif_add_device(struct htif_softc *sc, int i, char *id, char *name) -{ - struct htif_dev_ivars *di; - - di = malloc(sizeof(struct htif_dev_ivars), M_DEVBUF, M_WAITOK | M_ZERO); - di->sc = sc; - di->index = i; - di->id = malloc(HTIF_ID_LEN, M_DEVBUF, M_WAITOK | M_ZERO); - memcpy(di->id, id, HTIF_ID_LEN); - - di->dev = device_add_child(sc->dev, name, -1); - device_set_ivars(di->dev, di); - - return (0); -} - -static int -htif_enumerate(struct htif_softc *sc) -{ - char id[HTIF_ID_LEN] __aligned(HTIF_ALIGN); - uint64_t paddr; - uint64_t data; - uint64_t cmd; - int len; - int i; - - device_printf(sc->dev, "Enumerating devices\n"); - - for (i = 0; i < HTIF_NDEV; i++) { - paddr = pmap_kextract((vm_offset_t)&id); - data = (paddr << IDENTIFY_PADDR_SHIFT); - data |= IDENTIFY_IDENT; - - sc->identify_id = i; - sc->identify_done = 0; - - cmd = i; - cmd <<= HTIF_DEV_ID_SHIFT; - cmd |= (HTIF_CMD_IDENTIFY << HTIF_CMD_SHIFT); - cmd |= data; - - htif_command(cmd); - - len = strnlen(id, sizeof(id)); - if (len <= 0) - break; - - if (bootverbose) - printf(" %d %s\n", i, id); - - if (strncmp(id, "disk", 4) == 0) - htif_add_device(sc, i, id, "htif_blk"); - else if (strncmp(id, "bcd", 3) == 0) - htif_add_device(sc, i, id, "htif_console"); - else if (strncmp(id, "syscall_proxy", 13) == 0) - htif_add_device(sc, i, id, "htif_syscall_proxy"); - } - - return (bus_generic_attach(sc->dev)); -} - -int -htif_read_ivar(device_t dev, device_t child, int which, uintptr_t *result) -{ - struct htif_dev_ivars *ivars; - - ivars = device_get_ivars(child); - - switch (which) { - case HTIF_IVAR_INDEX: - *result = ivars->index; - break; - case HTIF_IVAR_ID: - *result = (uintptr_t)ivars->id; - default: - return (EINVAL); - } - - return (0); -} - -static int -htif_probe(device_t dev) -{ - - if (!ofw_bus_status_okay(dev)) - return (ENXIO); - - if (!ofw_bus_is_compatible(dev, "riscv,htif")) - return (ENXIO); - - device_set_desc(dev, "HTIF bus device"); - return (BUS_PROBE_DEFAULT); -} - -static int -htif_attach(device_t dev) -{ - struct htif_softc *sc; - int error; - - sc = device_get_softc(dev); - sc->dev = dev; - - if (bus_alloc_resources(dev, htif_spec, sc->res)) { - device_printf(dev, "could not allocate resources\n"); - return (ENXIO); - } - - /* Setup IRQs handler */ - error = bus_setup_intr(dev, sc->res[0], INTR_TYPE_CLK, - htif_intr, NULL, sc, &sc->ihl[0]); - if (error) { - device_printf(dev, "Unable to alloc int resource.\n"); - return (ENXIO); - } - - csr_set(sie, SIE_SSIE); - - return (htif_enumerate(sc)); -} - -static device_method_t htif_methods[] = { - DEVMETHOD(device_probe, htif_probe), - DEVMETHOD(device_attach, htif_attach), - - /* Bus interface */ - DEVMETHOD(bus_read_ivar, htif_read_ivar), - - DEVMETHOD_END -}; - -static driver_t htif_driver = { - "htif", - htif_methods, - sizeof(struct htif_softc) -}; - -static devclass_t htif_devclass; - -DRIVER_MODULE(htif, simplebus, htif_driver, - htif_devclass, 0, 0); diff --git a/sys/riscv/htif/htif_block.c b/sys/riscv/htif/htif_block.c deleted file mode 100644 index 93662d825c1e..000000000000 --- a/sys/riscv/htif/htif_block.c +++ /dev/null @@ -1,299 +0,0 @@ -/*- - * Copyright (c) 2015-2016 Ruslan Bukin - * All rights reserved. - * - * Portions of this software were developed by SRI International and the - * University of Cambridge Computer Laboratory under DARPA/AFRL contract - * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme. - * - * Portions of this software were developed by the University of Cambridge - * Computer Laboratory as part of the CTSRD Project, with support from the - * UK Higher Education Innovation Fund (HEIF). - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include -__FBSDID("$FreeBSD$"); - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#include -#include - -#include -#include -#include -#include - -#include "htif.h" - -#define SECTOR_SIZE_SHIFT (9) -#define SECTOR_SIZE (1 << SECTOR_SIZE_SHIFT) - -#define HTIF_BLK_LOCK(_sc) mtx_lock(&(_sc)->sc_mtx) -#define HTIF_BLK_UNLOCK(_sc) mtx_unlock(&(_sc)->sc_mtx) -#define HTIF_BLK_LOCK_INIT(_sc) \ - mtx_init(&_sc->sc_mtx, device_get_nameunit(_sc->dev), \ - "htif_blk", MTX_DEF) -#define HTIF_BLK_LOCK_DESTROY(_sc) mtx_destroy(&_sc->sc_mtx); -#define HTIF_BLK_ASSERT_LOCKED(_sc) mtx_assert(&_sc->sc_mtx, MA_OWNED); -#define HTIF_BLK_ASSERT_UNLOCKED(_sc) mtx_assert(&_sc->sc_mtx, MA_NOTOWNED); - -static void htif_blk_task(void *arg); - -static disk_open_t htif_blk_open; -static disk_close_t htif_blk_close; -static disk_strategy_t htif_blk_strategy; - -struct htif_blk_softc { - device_t dev; - struct disk *disk; - struct mtx htif_io_mtx; - struct mtx sc_mtx; - struct proc *p; - struct bio_queue_head bio_queue; - int running; - int intr_chan; - int cmd_done; - int index; - uint16_t curtag; -}; - -struct htif_blk_request { - uint64_t addr; - uint64_t offset; /* offset in bytes */ - uint64_t size; /* length in bytes */ - uint64_t tag; -}; - -static void -htif_blk_intr(void *arg, uint64_t entry) -{ - struct htif_blk_softc *sc; - uint64_t devcmd; - uint64_t data; - - sc = arg; - - devcmd = HTIF_DEV_CMD(entry); - data = HTIF_DEV_DATA(entry); - - if (sc->curtag == data) { - wmb(); - sc->cmd_done = 1; - wakeup(&sc->intr_chan); - } else { - device_printf(sc->dev, "Unexpected tag %d (should be %d)\n", - data, sc->curtag); - } -} - -static int -htif_blk_probe(device_t dev) -{ - - return (0); -} - -static int -htif_blk_attach(device_t dev) -{ - struct htif_blk_softc *sc; - char prefix[] = " size="; - char *str; - long size; - - sc = device_get_softc(dev); - sc->dev = dev; - - mtx_init(&sc->htif_io_mtx, device_get_nameunit(dev), "htif_blk", MTX_DEF); - HTIF_BLK_LOCK_INIT(sc); - - str = strstr(htif_get_id(dev), prefix); - - size = strtol((str + 6), NULL, 10); - if (size == 0) { - return (ENXIO); - } - - sc->index = htif_get_index(dev); - if (sc->index < 0) - return (EINVAL); - htif_setup_intr(sc->index, htif_blk_intr, sc); - - sc->disk = disk_alloc(); - sc->disk->d_drv1 = sc; - - sc->disk->d_maxsize = 4096; /* Max transfer */ - sc->disk->d_name = "htif_blk"; - sc->disk->d_open = htif_blk_open; - sc->disk->d_close = htif_blk_close; - sc->disk->d_strategy = htif_blk_strategy; - sc->disk->d_unit = 0; - sc->disk->d_sectorsize = SECTOR_SIZE; - sc->disk->d_mediasize = size; - disk_create(sc->disk, DISK_VERSION); - - bioq_init(&sc->bio_queue); - - sc->running = 1; - - kproc_create(&htif_blk_task, sc, &sc->p, 0, 0, "%s: transfer", - device_get_nameunit(dev)); - - return (0); -} - -static int -htif_blk_open(struct disk *dp) -{ - - return (0); -} - -static int -htif_blk_close(struct disk *dp) -{ - - return (0); -} - -static void -htif_blk_task(void *arg) -{ - struct htif_blk_request req __aligned(HTIF_ALIGN); - struct htif_blk_softc *sc; - uint64_t req_paddr; - struct bio *bp; - uint64_t paddr; - uint64_t resp; - uint64_t cmd; - int i; - - sc = (struct htif_blk_softc *)arg; - - while (1) { - HTIF_BLK_LOCK(sc); - do { - bp = bioq_takefirst(&sc->bio_queue); - if (bp == NULL) - msleep(sc, &sc->sc_mtx, PRIBIO, "jobqueue", 0); - } while (bp == NULL); - HTIF_BLK_UNLOCK(sc); - - if (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE) { - HTIF_BLK_LOCK(sc); - - rmb(); - req.offset = (bp->bio_pblkno * sc->disk->d_sectorsize); - req.size = bp->bio_bcount; - paddr = vtophys(bp->bio_data); - KASSERT(paddr != 0, ("paddr is 0")); - req.addr = paddr; - sc->curtag++; - req.tag = sc->curtag; - - cmd = sc->index; - cmd <<= HTIF_DEV_ID_SHIFT; - if (bp->bio_cmd == BIO_READ) - cmd |= (HTIF_CMD_READ << HTIF_CMD_SHIFT); - else - cmd |= (HTIF_CMD_WRITE << HTIF_CMD_SHIFT); - req_paddr = vtophys(&req); - KASSERT(req_paddr != 0, ("req_paddr is 0")); - cmd |= req_paddr; - - sc->cmd_done = 0; - resp = htif_command(cmd); - htif_blk_intr(sc, resp); - - /* Wait for interrupt */ - i = 0; - while (sc->cmd_done == 0) { - msleep(&sc->intr_chan, &sc->sc_mtx, PRIBIO, "intr", hz/2); - - if (i++ > 2) { - /* TODO: try to re-issue operation on timeout ? */ - bp->bio_error = EIO; - bp->bio_flags |= BIO_ERROR; - disk_err(bp, "hard error", -1, 1); - break; - } - } - HTIF_BLK_UNLOCK(sc); - - biodone(bp); - } else { - printf("unknown op %d\n", bp->bio_cmd); - } - } -} - -static void -htif_blk_strategy(struct bio *bp) -{ - struct htif_blk_softc *sc; - - sc = bp->bio_disk->d_drv1; - - HTIF_BLK_LOCK(sc); - if (sc->running > 0) { - bioq_disksort(&sc->bio_queue, bp); - HTIF_BLK_UNLOCK(sc); - wakeup(sc); - } else { - HTIF_BLK_UNLOCK(sc); - biofinish(bp, NULL, ENXIO); - } -} - -static device_method_t htif_blk_methods[] = { - DEVMETHOD(device_probe, htif_blk_probe), - DEVMETHOD(device_attach, htif_blk_attach), -}; - -static driver_t htif_blk_driver = { - "htif_blk", - htif_blk_methods, - sizeof(struct htif_blk_softc) -}; - -static devclass_t htif_blk_devclass; - -DRIVER_MODULE(htif_blk, htif, htif_blk_driver, htif_blk_devclass, 0, 0); diff --git a/sys/riscv/include/cpufunc.h b/sys/riscv/include/cpufunc.h index 2db73ede884f..6cac649b4e95 100644 --- a/sys/riscv/include/cpufunc.h +++ b/sys/riscv/include/cpufunc.h @@ -81,21 +81,6 @@ intr_enable(void) ); } -static __inline register_t -machine_command(uint64_t cmd, uint64_t arg) -{ - uint64_t res; - - __asm __volatile( - "mv t5, %2\n" - "mv t6, %1\n" - "ecall\n" - "mv %0, t6" : "=&r"(res) : "r"(arg), "r"(cmd) - ); - - return (res); -} - #define cpu_nullop() riscv_nullop() #define cpufunc_nullop() riscv_nullop() #define cpu_setttb(a) riscv_setttb(a) diff --git a/sys/riscv/include/pcpu.h b/sys/riscv/include/pcpu.h index d72b11c012e1..ea5336029d68 100644 --- a/sys/riscv/include/pcpu.h +++ b/sys/riscv/include/pcpu.h @@ -46,8 +46,7 @@ #define PCPU_MD_FIELDS \ uint32_t pc_pending_ipis; /* IPIs pending to this CPU */ \ - uint64_t pc_reg; /* CPU MMIO base (PA) */ \ - char __pad[117] + char __pad[125] #ifdef _KERNEL diff --git a/sys/riscv/include/riscvreg.h b/sys/riscv/include/riscvreg.h index 8c6b4dce2ebd..2e3a4a2e9f5d 100644 --- a/sys/riscv/include/riscvreg.h +++ b/sys/riscv/include/riscvreg.h @@ -37,19 +37,6 @@ #ifndef _MACHINE_RISCVREG_H_ #define _MACHINE_RISCVREG_H_ -/* Machine mode requests */ -#define ECALL_MTIMECMP 0x01 -#define ECALL_HTIF_GET_ENTRY 0x02 -#define ECALL_MCPUID_GET 0x03 -#define ECALL_MIMPID_GET 0x04 -#define ECALL_SEND_IPI 0x05 -#define ECALL_CLEAR_IPI 0x06 -#define ECALL_MIE_SET 0x07 -#define ECALL_IO_IRQ_MASK 0x08 -#define ECALL_HTIF_CMD 0x09 -#define ECALL_HTIF_CMD_REQ 0x0a -#define ECALL_HTIF_CMD_RESP 0x0b - #define EXCP_SHIFT 0 #define EXCP_MASK (0xf << EXCP_SHIFT) #define EXCP_MISALIGNED_FETCH 0 @@ -65,9 +52,6 @@ #define EXCP_HYPERVISOR_ECALL 10 #define EXCP_MACHINE_ECALL 11 #define EXCP_INTR (1ul << 63) -#define EXCP_INTR_SOFTWARE 0 -#define EXCP_INTR_TIMER 1 -#define EXCP_INTR_HTIF 2 #define SSTATUS_UIE (1 << 0) #define SSTATUS_SIE (1 << 1) diff --git a/sys/riscv/htif/htif.h b/sys/riscv/include/sbi.h similarity index 53% rename from sys/riscv/htif/htif.h rename to sys/riscv/include/sbi.h index a1183d97c08e..76690ab84b08 100644 --- a/sys/riscv/htif/htif.h +++ b/sys/riscv/include/sbi.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2015 Ruslan Bukin + * Copyright (c) 2016 Ruslan Bukin * All rights reserved. * * Portions of this software were developed by SRI International and the @@ -34,60 +34,32 @@ * $FreeBSD$ */ -#define HTIF_DEV_ID_SHIFT (56) -#define HTIF_DEV_ID_MASK (0xfful << HTIF_DEV_ID_SHIFT) -#define HTIF_CMD_SHIFT (48) -#define HTIF_CMD_MASK (0xfful << HTIF_CMD_SHIFT) -#define HTIF_DATA_SHIFT (0) -#define HTIF_DATA_MASK (0xffffffff << HTIF_DATA_SHIFT) +#ifndef _MACHINE_SBI_H_ +#define _MACHINE_SBI_H_ -#define HTIF_CMD_READ (0x00ul) -#define HTIF_CMD_WRITE (0x01ul) -#define HTIF_CMD_READ_CONTROL_REG (0x02ul) -#define HTIF_CMD_WRITE_CONTROL_REG (0x03ul) -#define HTIF_CMD_IDENTIFY (0xfful) -#define IDENTIFY_PADDR_SHIFT 8 -#define IDENTIFY_IDENT 0xff +typedef struct { + uint64_t base; + uint64_t size; + uint64_t node_id; +} memory_block_info; -#define HTIF_NDEV (256) -#define HTIF_ID_LEN (64) -#define HTIF_ALIGN (64) +uint64_t sbi_query_memory(uint64_t id, memory_block_info *p); +uint64_t sbi_hart_id(void); +uint64_t sbi_num_harts(void); +uint64_t sbi_timebase(void); +void sbi_set_timer(uint64_t stime_value); +void sbi_send_ipi(uint64_t hart_id); +uint64_t sbi_clear_ipi(void); +void sbi_shutdown(void); -#define HTIF_DEV_CMD(entry) ((entry & HTIF_CMD_MASK) >> HTIF_CMD_SHIFT) -#define HTIF_DEV_ID(entry) ((entry & HTIF_DEV_ID_MASK) >> HTIF_DEV_ID_SHIFT) -#define HTIF_DEV_DATA(entry) ((entry & HTIF_DATA_MASK) >> HTIF_DATA_SHIFT) +void sbi_console_putchar(unsigned char ch); +int sbi_console_getchar(void); -/* bus softc */ -struct htif_softc { - struct resource *res[1]; - void *ihl[1]; - device_t dev; - uint64_t identify_id; - uint64_t identify_done; -}; +void sbi_remote_sfence_vm(uint64_t hart_mask_ptr, uint64_t asid); +void sbi_remote_sfence_vm_range(uint64_t hart_mask_ptr, uint64_t asid, uint64_t start, uint64_t size); +void sbi_remote_fence_i(uint64_t hart_mask_ptr); -/* device private data */ -struct htif_dev_ivars { - char *id; - int index; - device_t dev; - struct htif_softc *sc; -}; +uint64_t sbi_mask_interrupt(uint64_t which); +uint64_t sbi_unmask_interrupt(uint64_t which); -uint64_t htif_command(uint64_t); -int htif_setup_intr(int id, void *func, void *arg); -int htif_read_ivar(device_t dev, device_t child, int which, uintptr_t *result); - -enum htif_device_ivars { - HTIF_IVAR_INDEX, - HTIF_IVAR_ID, -}; - -/* - * Simplified accessors for HTIF devices - */ -#define HTIF_ACCESSOR(var, ivar, type) \ - __BUS_ACCESSOR(htif, var, HTIF, ivar, type) - -HTIF_ACCESSOR(index, INDEX, int); -HTIF_ACCESSOR(id, ID, char *); +#endif /* !_MACHINE_SBI_H_ */ diff --git a/sys/riscv/include/vmparam.h b/sys/riscv/include/vmparam.h index 4e1cdcfd6e52..7ecd884f9da0 100644 --- a/sys/riscv/include/vmparam.h +++ b/sys/riscv/include/vmparam.h @@ -156,26 +156,26 @@ #define VM_MIN_KERNEL_ADDRESS (0xffffffc000000000UL) #define VM_MAX_KERNEL_ADDRESS (0xffffffc800000000UL) -/* Direct Map for 128 GiB of PA: 0x0 - 0x1fffffffff */ +/* 128 GiB maximum for the direct map region */ #define DMAP_MIN_ADDRESS (0xffffffd000000000UL) -#define DMAP_MAX_ADDRESS (0xffffffefffffffffUL) +#define DMAP_MAX_ADDRESS (0xfffffff000000000UL) -#define DMAP_MIN_PHYSADDR (0x0000000000000000UL) -#define DMAP_MAX_PHYSADDR (DMAP_MAX_ADDRESS - DMAP_MIN_ADDRESS) +#define DMAP_MIN_PHYSADDR (dmap_phys_base) +#define DMAP_MAX_PHYSADDR (dmap_phys_max) /* True if pa is in the dmap range */ #define PHYS_IN_DMAP(pa) ((pa) >= DMAP_MIN_PHYSADDR && \ - (pa) <= DMAP_MAX_PHYSADDR) + (pa) < DMAP_MAX_PHYSADDR) /* True if va is in the dmap range */ #define VIRT_IN_DMAP(va) ((va) >= DMAP_MIN_ADDRESS && \ - (va) <= DMAP_MAX_ADDRESS) + (va) < (dmap_max_addr)) #define PHYS_TO_DMAP(pa) \ ({ \ KASSERT(PHYS_IN_DMAP(pa), \ ("%s: PA out of range, PA: 0x%lx", __func__, \ (vm_paddr_t)(pa))); \ - (pa) | DMAP_MIN_ADDRESS; \ + ((pa) - dmap_phys_base) + DMAP_MIN_ADDRESS; \ }) #define DMAP_TO_PHYS(va) \ @@ -183,7 +183,7 @@ KASSERT(VIRT_IN_DMAP(va), \ ("%s: VA out of range, VA: 0x%lx", __func__, \ (vm_offset_t)(va))); \ - (va) & ~DMAP_MIN_ADDRESS; \ + ((va) - DMAP_MIN_ADDRESS) + dmap_phys_base; \ }) #define VM_MIN_USER_ADDRESS (0x0000000000000000UL) @@ -196,7 +196,7 @@ #define SHAREDPAGE (VM_MAXUSER_ADDRESS - PAGE_SIZE) #define USRSTACK SHAREDPAGE -#define KERNENTRY (0x80000000) +#define KERNENTRY (0) /* * How many physical pages per kmem arena virtual page. @@ -233,9 +233,14 @@ * #define UMA_MD_SMALL_ALLOC */ +#ifndef LOCORE +extern vm_paddr_t dmap_phys_base; +extern vm_paddr_t dmap_phys_max; +extern vm_offset_t dmap_max_addr; extern u_int tsb_kernel_ldd_phys; extern vm_offset_t vm_max_kernel_address; extern vm_offset_t init_pt_va; +#endif #define ZERO_REGION_SIZE (64 * 1024) /* 64KB */ diff --git a/sys/riscv/riscv/exception.S b/sys/riscv/riscv/exception.S index 5b8ef7a7ae49..d5efd732ca65 100644 --- a/sys/riscv/riscv/exception.S +++ b/sys/riscv/riscv/exception.S @@ -235,388 +235,3 @@ ENTRY(cpu_exception_handler_user) csrrw sp, sscratch, sp sret END(cpu_exception_handler_user) - -/* - * Trap handlers - */ - .text -bad_trap: - j bad_trap - -machine_trap: - /* Save state */ - csrrw sp, mscratch, sp - addi sp, sp, -64 - sd t0, (8 * 0)(sp) - sd t1, (8 * 1)(sp) - sd t2, (8 * 2)(sp) - sd t3, (8 * 3)(sp) - sd t4, (8 * 4)(sp) - sd t5, (8 * 5)(sp) - sd a0, (8 * 7)(sp) - - csrr t3, mstatus /* Required for debug */ - csrr t0, mcause - bltz t0, machine_interrupt - - li t1, EXCP_SUPERVISOR_ECALL - beq t0, t1, supervisor_call -4: - /* NOT REACHED */ - j 4b - -machine_interrupt: - /* Type of interrupt ? */ - csrr t0, mcause - andi t0, t0, EXCP_MASK -#if 0 - /* lowRISC TODO */ - li t1, 4 - beq t1, t0, io_interrupt /* lowRISC only */ -#endif - li t1, 1 - beq t1, t0, supervisor_software_interrupt - li t1, 3 - beq t1, t0, machine_software_interrupt - li t1, 5 - beq t1, t0, supervisor_timer_interrupt - li t1, 7 - beq t1, t0, machine_timer_interrupt - - /* NOT REACHED */ -1: - j 1b - -#if 0 - /* lowRISC TODO */ -io_interrupt: - /* Disable IO interrupts so we can go to supervisor mode */ - csrwi CSR_IO_IRQ, 0 - - /* Handle the trap in supervisor mode */ - j exit_mrts -#endif - -supervisor_software_interrupt: -1: - /* Nothing here as we are using mideleg feature */ - j 1b - -machine_software_interrupt: - /* Clear IPI */ - li t0, 0x40001000 - csrr t2, mhartid - li t3, 0x1000 - mul t2, t2, t3 - add t0, t0, t2 - li t2, 0 - sd t2, 0(t0) - - /* Clear machine software pending bit */ - li t0, MIP_MSIP - csrc mip, t0 - - /* Post supervisor software interrupt */ - li t0, MIP_SSIP - csrs mip, t0 - - j exit - -supervisor_timer_interrupt: -1: - /* Nothing here as we are using mideleg feature */ - j 1b - -machine_timer_interrupt: - /* Disable machine timer interrupts */ - li t0, MIE_MTIE - csrc mie, t0 - - /* Clear machine timer interrupt pending */ - li t0, MIP_MTIP - csrc mip, t0 - - /* Post supervisor timer interrupt */ - li t0, MIP_STIP - csrs mip, t0 - - /* - * Check for HTIF interrupts. - * The only interrupt expected here is key press. - */ - la t0, htif_lock - li t2, 1 - amoswap.d t3, t2, 0(t0) - bnez t3, 5f /* Another operation in progress, give up */ - - /* We have lock */ - la t1, fromhost - ld t5, 0(t1) - beqz t5, 4f - - /* Console GET intr ? */ - mv t1, t5 - li t0, 0x100 - srli t1, t1, 48 - beq t1, t0, 2f -1: - /* There is no interrupts except keypress */ - j 1b - -2: - /* Save entry */ - la t0, htif_ring - li t4, (HTIF_RING_SIZE) - add t0, t0, t4 /* t0 == htif_ring_cursor */ - - ld t1, 0(t0) /* load ptr to cursor */ - sd t5, 0(t1) /* put entry */ - li t4, 1 - sd t4, 8(t1) /* mark used */ - ld t4, 16(t1) /* take next */ - /* Update cursor */ - sd t4, 0(t0) - - /* Post supervisor software interrupt */ - li t0, MIP_SSIP - csrs mip, t0 - -3: - la t1, fromhost - li t5, 0 - sd t5, 0(t1) - -4: - /* Release lock */ - la t0, htif_lock - li t2, 0 - amoswap.d t3, t2, 0(t0) - -5: - j exit - -supervisor_call: - csrr t1, mepc - addi t1, t1, 4 /* Next instruction in t1 */ - li t4, ECALL_HTIF_CMD - beq t5, t4, htif_cmd - li t4, ECALL_HTIF_CMD_REQ - beq t5, t4, htif_cmd_req - li t4, ECALL_HTIF_CMD_RESP - beq t5, t4, htif_cmd_resp - li t4, ECALL_HTIF_GET_ENTRY - beq t5, t4, htif_get_entry - li t4, ECALL_MTIMECMP - beq t5, t4, set_mtimecmp - li t4, ECALL_MCPUID_GET - beq t5, t4, mcpuid_get - li t4, ECALL_MIMPID_GET - beq t5, t4, mimpid_get - li t4, ECALL_SEND_IPI - beq t5, t4, send_ipi - li t4, ECALL_CLEAR_IPI - beq t5, t4, clear_ipi - li t4, ECALL_MIE_SET - beq t5, t4, mie_set -#if 0 - /* lowRISC TODO */ - li t4, ECALL_IO_IRQ_MASK - beq t5, t4, io_irq_mask -#endif - j exit_next_instr - -#if 0 - /* lowRISC TODO */ -io_irq_mask: - csrw CSR_IO_IRQ, t6 - j exit_next_instr -#endif - -mie_set: - csrs mie, t6 - j exit_next_instr - -mcpuid_get: - csrr t6, misa - j exit_next_instr - -mimpid_get: - csrr t6, mimpid - j exit_next_instr - -send_ipi: - /* CPU ipi MMIO register in t6 */ - mv t0, t6 - li t2, 1 - sd t2, 0(t0) - j exit_next_instr - -clear_ipi: - /* Do only clear if there are no new entries in HTIF ring */ - la t0, htif_ring - li t4, (HTIF_RING_SIZE) - add t0, t0, t4 /* t0 == ptr to htif_ring_cursor */ - ld t2, 8(t0) /* load htif_ring_last */ - ld t2, 8(t2) /* load used */ - bnez t2, 1f - - /* Clear supervisor software interrupt pending bit */ - li t0, MIP_SSIP - csrc mip, t0 - -1: - j exit_next_instr - -htif_get_entry: - /* Get a htif_ring for current core */ - la t0, htif_ring - li t4, (HTIF_RING_SIZE + 8) - add t0, t0, t4 /* t0 == htif_ring_last */ - - /* Check for new entries */ - li t6, 0 /* preset return value */ - ld t2, 0(t0) /* load ptr to last */ - ld t4, 8(t2) /* get used */ - beqz t4, 1f /* No new entries. Exit */ - - /* Get one */ - ld t6, 0(t2) /* get entry */ - li t4, 0 - sd t4, 8(t2) /* mark free */ - sd t4, 0(t2) /* free entry, just in case */ - ld t4, 16(t2) /* take next */ - sd t4, 0(t0) /* update ptr to last */ -1: - /* Exit. Result is stored in t6 */ - j exit_next_instr - -htif_cmd_resp: - la t0, htif_lock - li t2, 1 -1: - amoswap.d t3, t2, 0(t0) - bnez t3, 1b - - /* We have lock. Read for data */ - la t4, fromhost - ld t6, 0(t4) - beqz t6, 2f - - /* Clear event */ - li t5, 0 - sd t5, 0(t4) - -2: - /* Release lock */ - la t0, htif_lock - li t2, 0 - amoswap.d t3, t2, 0(t0) - - j exit_next_instr - -htif_cmd_req: - la t0, htif_lock - li t2, 1 -1: - amoswap.d t3, t2, 0(t0) - bnez t3, 1b - - /* We have lock. Store new request */ - la t4, tohost - sd t6, 0(t4) - - /* Release lock */ - la t0, htif_lock - li t2, 0 - amoswap.d t3, t2, 0(t0) - - j exit_next_instr - -htif_cmd: - la t0, htif_lock - li t2, 1 -1: - amoswap.d t3, t2, 0(t0) - bnez t3, 1b - - mv t3, t6 - - /* We have lock. Store new request */ - la t4, tohost - sd t6, 0(t4) -2: - /* Poll for result */ - la t4, fromhost - ld t6, 0(t4) - beqz t6, 2b - - /* Check for unexpected event */ - srli t0, t6, 48 - srli t2, t3, 48 - beq t2, t0, 3f - - /* - * We have something unexpected (e.g. keyboard keypress) - * Save entry. - */ - la t0, htif_ring - li t4, (HTIF_RING_SIZE) - add t0, t0, t4 /* t0 == htif_ring_cursor */ - - ld t2, 0(t0) /* load ptr to cursor */ - sd t6, 0(t2) /* put entry */ - li t4, 1 - sd t4, 8(t2) /* mark used */ - ld t4, 16(t2) /* take next */ - /* Update cursor */ - sd t4, 0(t0) - - /* Post supervisor software interrupt */ - li t0, MIP_SSIP - csrs mip, t0 - - /* Clear and look for response again */ - la t2, fromhost - li t5, 0 - sd t5, 0(t2) - j 2b - -3: - la t2, fromhost - li t5, 0 - sd t5, 0(t2) - - /* Release lock */ - la t0, htif_lock - li t2, 0 - amoswap.d t3, t2, 0(t0) - - j exit_next_instr - -set_mtimecmp: - /* Enable interrupts */ - li t0, (MIE_MTIE | MIE_STIE) - csrs mie, t0 - j exit_next_instr - -/* - * Trap exit functions - */ -exit_next_instr: - /* Next instruction is in t1 */ - csrw mepc, t1 -exit: - /* Restore state */ - ld t0, (8 * 0)(sp) - ld t1, (8 * 1)(sp) - ld t2, (8 * 2)(sp) - ld t3, (8 * 3)(sp) - ld t4, (8 * 4)(sp) - ld t5, (8 * 5)(sp) - ld a0, (8 * 7)(sp) - addi sp, sp, 64 - csrrw sp, mscratch, sp - mret - -exit_mrts: - j exit_mrts diff --git a/sys/riscv/riscv/genassym.c b/sys/riscv/riscv/genassym.c index c6c8b8518fa5..c9e9bc1c991a 100644 --- a/sys/riscv/riscv/genassym.c +++ b/sys/riscv/riscv/genassym.c @@ -57,7 +57,6 @@ __FBSDID("$FreeBSD$"); #include ASSYM(KERNBASE, KERNBASE); -ASSYM(KERNENTRY, KERNENTRY); ASSYM(VM_MAXUSER_ADDRESS, VM_MAXUSER_ADDRESS); ASSYM(VM_MAX_KERNEL_ADDRESS, VM_MAX_KERNEL_ADDRESS); ASSYM(TDF_ASTPENDING, TDF_ASTPENDING); diff --git a/sys/riscv/riscv/identcpu.c b/sys/riscv/riscv/identcpu.c index 8226423b2fc1..94ad889e0ed9 100644 --- a/sys/riscv/riscv/identcpu.c +++ b/sys/riscv/riscv/identcpu.c @@ -101,8 +101,9 @@ identify_cpu(void) cpu_partsp = NULL; - mimpid = machine_command(ECALL_MIMPID_GET, 0); - misa = machine_command(ECALL_MCPUID_GET, 0); + /* TODO: can we get mimpid and misa somewhere ? */ + mimpid = 0; + misa = 0; cpu = PCPU_GET(cpuid); diff --git a/sys/riscv/riscv/intr_machdep.c b/sys/riscv/riscv/intr_machdep.c index 3dd988f712b3..879cea710e7e 100644 --- a/sys/riscv/riscv/intr_machdep.c +++ b/sys/riscv/riscv/intr_machdep.c @@ -47,6 +47,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #ifdef SMP #include @@ -267,7 +268,7 @@ ipi_send(struct pcpu *pc, int ipi) CTR3(KTR_SMP, "%s: cpu=%d, ipi=%x", __func__, pc->pc_cpuid, ipi); atomic_set_32(&pc->pc_pending_ipis, ipi); - machine_command(ECALL_SEND_IPI, pc->pc_reg); + sbi_send_ipi(pc->pc_cpuid); CTR1(KTR_SMP, "%s: sent", __func__); } diff --git a/sys/riscv/riscv/locore.S b/sys/riscv/riscv/locore.S index 1783fb908171..2b36e2e8aeb8 100644 --- a/sys/riscv/riscv/locore.S +++ b/sys/riscv/riscv/locore.S @@ -43,118 +43,47 @@ #include #include -#define HTIF_RING_NENTRIES (512) -#define HTIF_RING_ENTRY_SZ (24) -#define HTIF_RING_SIZE (HTIF_RING_ENTRY_SZ * HTIF_RING_NENTRIES) -#define HW_STACK_SIZE (96) - -/* - * Event queue: - * - * struct htif_ring { - * uint64_t data; - * uint64_t used; - * uint64_t next; - * } htif_ring[HTIF_RING_NENTRIES]; - * uint64_t htif_ring_cursor; - * uint64_t htif_ring_last; - */ - -.macro build_ring - la t0, htif_ring - li t1, 0 - sd t1, 0(t0) /* zero data */ - sd t1, 8(t0) /* zero used */ - mv t2, t0 - mv t3, t0 - li t5, (HTIF_RING_SIZE) - li t6, 0 - add t4, t0, t5 -1: - addi t3, t3, HTIF_RING_ENTRY_SZ /* pointer to next */ - beq t3, t4, 2f /* finish */ - sd t3, 16(t2) /* store pointer */ - addi t2, t2, HTIF_RING_ENTRY_SZ /* next entry */ - addi t6, t6, 1 /* counter */ - j 1b -2: - addi t3, t3, -HTIF_RING_ENTRY_SZ - sd t0, 16(t3) /* last -> first */ - - li t2, (HTIF_RING_SIZE) - add s0, t0, t2 - sd t0, 0(s0) /* cursor */ - sd t0, 8(s0) /* last */ - /* finish building ring */ -.endm - .globl kernbase .set kernbase, KERNBASE /* Trap entries */ .text -mentry: - /* Vectors */ - j _start /* reset */ - j bad_trap /* NMI (non-maskable interrupt) */ - j machine_trap - /* Reset vector */ .text .globl _start _start: - /* Setup machine trap vector */ - la t0, machine_trap - csrw mtvec, t0 - - /* Delegate interrupts to supervisor mode */ - li t0, (MIP_SSIP | MIP_STIP | MIP_SEIP) - csrw mideleg, t0 - - /* Delegate exceptions to supervisor mode */ - li t0, (1 << EXCP_MISALIGNED_FETCH) | \ - (1 << EXCP_FAULT_FETCH) | \ - (1 << EXCP_ILLEGAL_INSTRUCTION) | \ - (1 << EXCP_FAULT_LOAD) | \ - (1 << EXCP_FAULT_STORE) | \ - (1 << EXCP_BREAKPOINT) | \ - (1 << EXCP_USER_ECALL) - csrw medeleg, t0 - + /* Setup supervisor trap vector */ la t0, cpu_exception_handler - li t1, KERNBASE - add t0, t0, t1 csrw stvec, t0 - /* Direct secondary cores to mpentry */ - csrr a0, mhartid - bnez a0, mpentry - - li t1, 0 - la t0, tohost - sd t1, 0(t0) - la t0, fromhost - sd t1, 0(t0) - - /* Build event queue for current core */ - build_ring - - /* Setup machine-mode stack for CPU 0 */ - la t0, hardstack_end - csrw mscratch, t0 - + /* Ensure sscratch is zero */ li t0, 0 csrw sscratch, t0 - li s10, PAGE_SIZE - li s9, (PAGE_SIZE * KSTACK_PAGES) + /* Load physical memory information */ + li a0, 0 + la a1, memory_info + call sbi_query_memory - /* Page tables */ + /* Store base to s6 */ + la s6, memory_info + ld s6, 0(s6) /* s6 = physmem base */ + + /* Direct secondary cores to mpentry */ + call sbi_hart_id + bnez a0, mpentry + + /* + * Page tables + */ /* Create an L1 page for early devmap */ la s1, pagetable_l1 la s2, pagetable_l2_devmap /* Link to next level PN */ + li t0, KERNBASE + sub s2, s2, t0 + add s2, s2, s6 srli s2, s2, PAGE_SHIFT li a5, (VM_MAX_KERNEL_ADDRESS - L2_SIZE) @@ -170,19 +99,74 @@ _start: add t0, s1, a5 sd t6, (t0) - /* Add single Level 1 entry for kernel */ + /* Create an L1 page for SBI */ + la s1, pagetable_l1 + la s2, pagetable_l2_sbi /* Link to next level PN */ + li t0, KERNBASE + sub s2, s2, t0 + add s2, s2, s6 + srli s2, s2, PAGE_SHIFT + li a5, 511 + li t4, PTE_V + slli t5, s2, PTE_PPN0_S /* (s2 << PTE_PPN0_S) */ + or t6, t4, t5 + + /* Store SBI L1 PTE entry to position */ + li a6, PTE_SIZE + mulw a5, a5, a6 + add t0, s1, a5 + sd t6, (t0) + + /* Create an L2 page for SBI */ + la s1, pagetable_l2_sbi + la s2, pagetable_l3_sbi /* Link to next level PN */ + li t0, KERNBASE + sub s2, s2, t0 + add s2, s2, s6 + srli s2, s2, PAGE_SHIFT + li a5, 511 + li t4, PTE_V + slli t5, s2, PTE_PPN0_S /* (s2 << PTE_PPN0_S) */ + or t6, t4, t5 + + /* Store SBI L2 PTE entry to position */ + li a6, PTE_SIZE + mulw a5, a5, a6 + add t0, s1, a5 + sd t6, (t0) + + /* Create an L3 page for SBI */ + la s1, pagetable_l3_sbi + li s2, 0x80009000 + srli s2, s2, PAGE_SHIFT + li a5, 511 + li t4, PTE_V | PTE_RX | PTE_W + slli t5, s2, PTE_PPN0_S /* (s2 << PTE_PPN0_S) */ + or t6, t4, t5 + + /* Store SBI L3 PTE entry to position */ + li a6, PTE_SIZE + mulw a5, a5, a6 + add t0, s1, a5 + sd t6, (t0) + /* END SBI page creation */ + + /* Add L1 entry for kernel */ la s1, pagetable_l1 la s2, pagetable_l2 /* Link to next level PN */ + li t0, KERNBASE + sub s2, s2, t0 + add s2, s2, s6 srli s2, s2, PAGE_SHIFT - li a5, (KERNBASE + KERNENTRY) + li a5, KERNBASE srli a5, a5, L1_SHIFT /* >> L1_SHIFT */ andi a5, a5, 0x1ff /* & 0x1ff */ li t4, PTE_V slli t5, s2, PTE_PPN0_S /* (s2 << PTE_PPN0_S) */ or t6, t4, t5 - /* Store single level1 PTE entry to position */ + /* Store L1 PTE entry to position */ li a6, PTE_SIZE mulw a5, a5, a6 add t0, s1, a5 @@ -190,8 +174,7 @@ _start: /* Level 2 superpages (512 x 2MiB) */ la s1, pagetable_l2 - li t4, KERNENTRY - srli t4, t4, 21 /* Div by 2 MiB */ + srli t4, s6, 21 /* Div physmem base by 2 MiB */ li t2, 512 /* Build 512 entries */ add t3, t4, t2 li t5, 0 @@ -206,32 +189,13 @@ _start: bltu t4, t3, 2b /* Set page tables base register */ - la s1, pagetable_l1 - srli s1, s1, PAGE_SHIFT - csrw sptbr, s1 + la s2, pagetable_l1 + li t0, KERNBASE + sub s2, s2, t0 + add s2, s2, s6 + srli s2, s2, PAGE_SHIFT + csrw sptbr, s2 - /* Page tables END */ - - /* Enter supervisor mode */ - li s0, ((MSTATUS_VM_SV39 << MSTATUS_VM_SHIFT) | \ - (MSTATUS_PRV_S << MSTATUS_MPP_SHIFT)); - csrw mstatus, s0 - - /* - * Enable machine-mode software interrupts - * so we can deliver IPI to this core. - */ - li t0, MIE_MSIE - csrs mie, t0 - - /* Exit from machine mode */ - la t0, .Lmmu_on - li s11, KERNBASE - add t0, t0, s11 - csrw mepc, t0 - mret - -.Lmmu_on: /* Initialize stack pointer */ la s3, initstack_end mv sp, s3 @@ -247,8 +211,10 @@ _start: /* Fill riscv_bootparams */ addi sp, sp, -16 + la t0, pagetable_l1 sd t0, 0(sp) /* kern_l1pt */ + la t0, initstack_end sd t0, 8(sp) /* kern_stack */ @@ -260,19 +226,6 @@ _start: initstack: .space (PAGE_SIZE * KSTACK_PAGES) initstack_end: -hardstack: - .space (HW_STACK_SIZE * MAXCPU) -hardstack_end: - - .globl htif_ring -htif_ring: - .space (HTIF_RING_SIZE + 16) -htif_lock: - .space (8) -tohost: - .space (8) -fromhost: - .space (8) ENTRY(sigcode) mv a0, sp @@ -305,6 +258,14 @@ pagetable_l2: .space PAGE_SIZE pagetable_l2_devmap: .space PAGE_SIZE +pagetable_l2_sbi: + .space PAGE_SIZE +pagetable_l3_sbi: + .space PAGE_SIZE + + .globl memory_info +memory_info: + .space (24) .globl init_pt_va init_pt_va: @@ -321,7 +282,6 @@ END(mpentry) * mpentry(unsigned long) * * Called by a core when it is being brought online. - * The data in x0 is passed straight to init_secondary. */ ENTRY(mpentry) /* @@ -340,44 +300,12 @@ ENTRY(mpentry) beqz t1, 1b /* Set page tables base register */ - la t0, pagetable_l1 - srli t0, t0, PAGE_SHIFT - csrw sptbr, t0 - - /* Configure mstatus */ - li s0, ((MSTATUS_VM_SV39 << MSTATUS_VM_SHIFT) | \ - (MSTATUS_PRV_S << MSTATUS_MPP_SHIFT)); - csrw mstatus, s0 - - /* Setup stack for machine mode exceptions */ - la t0, hardstack_end - li t1, HW_STACK_SIZE - mulw t1, t1, a0 - sub t0, t0, t1 - csrw mscratch, t0 - - li t0, 0 - csrw sscratch, t0 - - /* - * Enable machine-mode software interrupts - * so we can deliver IPI to this core. - */ - li t0, MIE_MSIE - csrs mie, t0 - - /* - * Exit from machine mode and go to - * the virtual address space. - */ - la t0, mp_virtdone - li s11, KERNBASE - add t0, t0, s11 - csrw mepc, t0 - mret - -mp_virtdone: - /* We are now in virtual address space */ + la s2, pagetable_l1 + li t0, KERNBASE + sub s2, s2, t0 + add s2, s2, s6 + srli s2, s2, PAGE_SHIFT + csrw sptbr, s2 /* Setup stack pointer */ la t0, secondary_stacks @@ -388,5 +316,3 @@ mp_virtdone: call init_secondary END(mpentry) #endif - -#include "exception.S" diff --git a/sys/riscv/riscv/machdep.c b/sys/riscv/riscv/machdep.c index fb969f1e7702..7bc4402597de 100644 --- a/sys/riscv/riscv/machdep.c +++ b/sys/riscv/riscv/machdep.c @@ -80,6 +80,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include @@ -116,6 +117,7 @@ int64_t idcache_line_size; /* The minimum cache line size */ extern int *end; extern int *initstack_end; +extern memory_block_info memory_info; struct pcpu *pcpup; @@ -728,12 +730,9 @@ fake_preload_metadata(struct riscv_bootparams *rvbp __unused) void initriscv(struct riscv_bootparams *rvbp) { - struct mem_region mem_regions[FDT_MEM_REGIONS]; vm_offset_t lastaddr; - int mem_regions_sz; vm_size_t kernlen; caddr_t kmdp; - int i; /* Set the module data location */ lastaddr = fake_preload_metadata(rvbp); @@ -755,12 +754,20 @@ initriscv(struct riscv_bootparams *rvbp) /* Load the physical memory ranges */ physmap_idx = 0; +#if 0 + struct mem_region mem_regions[FDT_MEM_REGIONS]; + int mem_regions_sz; + int i; /* Grab physical memory regions information from device tree. */ if (fdt_get_mem_regions(mem_regions, &mem_regions_sz, NULL) != 0) panic("Cannot get physical memory regions"); for (i = 0; i < mem_regions_sz; i++) add_physmap_entry(mem_regions[i].mr_start, mem_regions[i].mr_size, physmap, &physmap_idx); +#endif + + add_physmap_entry(memory_info.base, memory_info.size, + physmap, &physmap_idx); /* Set the pcpu data, this is needed by pmap_bootstrap */ pcpup = &__pcpu[0]; @@ -778,14 +785,15 @@ initriscv(struct riscv_bootparams *rvbp) /* Bootstrap enough of pmap to enter the kernel proper */ kernlen = (lastaddr - KERNBASE); - pmap_bootstrap(rvbp->kern_l1pt, KERNENTRY, kernlen); + pmap_bootstrap(rvbp->kern_l1pt, memory_info.base, kernlen); cninit(); init_proc0(rvbp->kern_stack); /* set page table base register for thread0 */ - thread0.td_pcb->pcb_l1addr = (rvbp->kern_l1pt - KERNBASE); + thread0.td_pcb->pcb_l1addr = \ + (rvbp->kern_l1pt - KERNBASE + memory_info.base); msgbufinit(msgbufp, msgbufsize); mutex_init(); diff --git a/sys/riscv/riscv/mp_machdep.c b/sys/riscv/riscv/mp_machdep.c index 9152e700e36d..33353ac2a3c8 100644 --- a/sys/riscv/riscv/mp_machdep.c +++ b/sys/riscv/riscv/mp_machdep.c @@ -61,6 +61,7 @@ __FBSDID("$FreeBSD$"); #include #include +#include #ifdef VFP #include #endif @@ -273,14 +274,7 @@ ipi_handler(void *arg) u_int cpu, ipi; int bit; - /* - * We have shared interrupt line for both IPI and HTIF, - * so we don't really need to clear pending bit here - * as it will be cleared later in htif_intr. - * But lets assume HTIF is optional part, so do clear - * pending bit if there is no new entires in htif_ring. - */ - machine_command(ECALL_CLEAR_IPI, 0); + sbi_clear_ipi(); cpu = PCPU_GET(cpuid); @@ -382,12 +376,10 @@ cpu_init_fdt(u_int id, phandle_t node, u_int addr_size, pcell_t *reg) /* We are already running on cpu 0 */ if (id == 0) { - pcpup->pc_reg = target_cpu; return (1); } pcpu_init(pcpup, id, sizeof(struct pcpu)); - pcpup->pc_reg = target_cpu; dpcpu[id - 1] = (void *)kmem_malloc(kernel_arena, DPCPU_SIZE, M_WAITOK | M_ZERO); diff --git a/sys/riscv/riscv/pmap.c b/sys/riscv/riscv/pmap.c index 73bc5c89687d..f09fc857a897 100644 --- a/sys/riscv/riscv/pmap.c +++ b/sys/riscv/riscv/pmap.c @@ -220,6 +220,14 @@ vm_offset_t kernel_vm_end = 0; struct msgbuf *msgbufp = NULL; +vm_paddr_t dmap_phys_base; /* The start of the dmap region */ +vm_paddr_t dmap_phys_max; /* The limit of the dmap region */ +vm_offset_t dmap_max_addr; /* The virtual address limit of the dmap */ + +/* This code assumes all L1 DMAP entries will be used */ +CTASSERT((DMAP_MIN_ADDRESS & ~L1_OFFSET) == DMAP_MIN_ADDRESS); +CTASSERT((DMAP_MAX_ADDRESS & ~L1_OFFSET) == DMAP_MAX_ADDRESS); + static struct rwlock_padalign pvh_global_lock; /* @@ -458,6 +466,10 @@ pmap_early_vtophys(vm_offset_t l1pt, vm_offset_t va) l2 = pmap_early_page_idx(l1pt, va, &l1_slot, &l2_slot); + /* Check locore has used L2 superpages */ + KASSERT((l2[l2_slot] & PTE_RX) != 0, + ("Invalid bootstrap L2 table")); + /* L2 is superpages */ ret = (l2[l2_slot] >> PTE_PPN1_S) << L2_SHIFT; ret += (va & L2_OFFSET); @@ -466,7 +478,7 @@ pmap_early_vtophys(vm_offset_t l1pt, vm_offset_t va) } static void -pmap_bootstrap_dmap(vm_offset_t l1pt, vm_paddr_t kernstart) +pmap_bootstrap_dmap(vm_offset_t kern_l1, vm_paddr_t min_pa, vm_paddr_t max_pa) { vm_offset_t va; vm_paddr_t pa; @@ -475,19 +487,12 @@ pmap_bootstrap_dmap(vm_offset_t l1pt, vm_paddr_t kernstart) pt_entry_t entry; pn_t pn; - /* - * Initialize DMAP starting from zero physical address. - * TODO: remove this once machine-mode code splitted out. - */ - kernstart = 0; - printf("%s: l1pt 0x%016lx kernstart 0x%016lx\n", __func__, l1pt, kernstart); - - pa = kernstart & ~L1_OFFSET; + pa = dmap_phys_base = min_pa & ~L1_OFFSET; va = DMAP_MIN_ADDRESS; - l1 = (pd_entry_t *)l1pt; + l1 = (pd_entry_t *)kern_l1; l1_slot = pmap_l1_index(DMAP_MIN_ADDRESS); - for (; va < DMAP_MAX_ADDRESS; + for (; va < DMAP_MAX_ADDRESS && pa < max_pa; pa += L1_SIZE, va += L1_SIZE, l1_slot++) { KASSERT(l1_slot < Ln_ENTRIES, ("Invalid L1 index")); @@ -498,6 +503,10 @@ pmap_bootstrap_dmap(vm_offset_t l1pt, vm_paddr_t kernstart) pmap_load_store(&l1[l1_slot], entry); } + /* Set the upper limit of the DMAP region */ + dmap_phys_max = pa; + dmap_max_addr = va; + cpu_dcache_wb_range((vm_offset_t)l1, PAGE_SIZE); cpu_tlb_flushID(); } @@ -552,7 +561,7 @@ pmap_bootstrap(vm_offset_t l1pt, vm_paddr_t kernstart, vm_size_t kernlen) pt_entry_t *l2; vm_offset_t va, freemempos; vm_offset_t dpcpu, msgbufpv; - vm_paddr_t pa, min_pa; + vm_paddr_t pa, min_pa, max_pa; int i; kern_delta = KERNBASE - kernstart; @@ -574,7 +583,7 @@ pmap_bootstrap(vm_offset_t l1pt, vm_paddr_t kernstart, vm_size_t kernlen) LIST_INIT(&allpmaps); /* Assume the address we were loaded to is a valid physical address */ - min_pa = KERNBASE - kern_delta; + min_pa = max_pa = KERNBASE - kern_delta; /* * Find the minimum physical address. physmap is sorted, @@ -585,11 +594,13 @@ pmap_bootstrap(vm_offset_t l1pt, vm_paddr_t kernstart, vm_size_t kernlen) continue; if (physmap[i] <= min_pa) min_pa = physmap[i]; + if (physmap[i + 1] > max_pa) + max_pa = physmap[i + 1]; break; } /* Create a direct map region early so we can use it for pa -> va */ - pmap_bootstrap_dmap(l1pt, min_pa); + pmap_bootstrap_dmap(l1pt, min_pa, max_pa); va = KERNBASE; pa = KERNBASE - kern_delta; diff --git a/sys/riscv/htif/htif_console.c b/sys/riscv/riscv/riscv_console.c similarity index 71% rename from sys/riscv/htif/htif_console.c rename to sys/riscv/riscv/riscv_console.c index 7eca398b799b..d59ed59c18ae 100644 --- a/sys/riscv/htif/htif_console.c +++ b/sys/riscv/riscv/riscv_console.c @@ -47,16 +47,41 @@ __FBSDID("$FreeBSD$"); #include #include #include - -#include -#include - -#include "htif.h" +#include #include - #include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +static struct resource_spec rcons_spec[] = { + { SYS_RES_IRQ, 0, RF_ACTIVE | RF_SHAREABLE}, + { -1, 0 } +}; + +/* bus softc */ +struct rcons_softc { + struct resource *res[1]; + void *ihl[1]; + device_t dev; +}; + +/* CN Console interface */ + static tsw_outwakeup_t riscvtty_outwakeup; static struct ttydevsw riscv_ttydevsw = { @@ -86,8 +111,6 @@ CONSOLE_DRIVER(riscv); #define MAX_BURST_LEN 1 #define QUEUE_SIZE 256 -#define CONSOLE_DEFAULT_ID 1ul -#define SPIN_IN_MACHINE_MODE 1 struct queue_entry { uint64_t data; @@ -102,13 +125,8 @@ struct queue_entry *entry_served; static void riscv_putc(int c) { - uint64_t cmd; - cmd = (HTIF_CMD_WRITE << HTIF_CMD_SHIFT); - cmd |= (CONSOLE_DEFAULT_ID << HTIF_DEV_ID_SHIFT); - cmd |= c; - - machine_command(ECALL_HTIF_CMD, cmd); + sbi_console_putchar(c); } #ifdef EARLY_PRINTF @@ -215,39 +233,23 @@ riscv_cnungrab(struct consdev *cp) static int riscv_cngetc(struct consdev *cp) { -#if defined(KDB) - uint64_t devcmd; - uint64_t entry; - uint64_t devid; -#endif - uint64_t cmd; uint8_t data; int ch; - cmd = (HTIF_CMD_READ << HTIF_CMD_SHIFT); - cmd |= (CONSOLE_DEFAULT_ID << HTIF_DEV_ID_SHIFT); - - machine_command(ECALL_HTIF_CMD_REQ, cmd); - #if defined(KDB) + /* + * RISCVTODO: BBL polls for console data on timer interrupt, + * but interrupts are turned off in KDB. + * So we currently do not have console in KDB. + */ if (kdb_active) { + ch = sbi_console_getchar(); + while (ch) { + entry_last->data = ch; + entry_last->used = 1; + entry_last = entry_last->next; - entry = machine_command(ECALL_HTIF_CMD_RESP, 0); - while (entry) { - devid = HTIF_DEV_ID(entry); - devcmd = HTIF_DEV_CMD(entry); - data = HTIF_DEV_DATA(entry); - - if (devid == CONSOLE_DEFAULT_ID && devcmd == 0) { - entry_last->data = data; - entry_last->used = 1; - entry_last = entry_last->next; - } else { - printf("Lost interrupt: devid %d\n", - devid); - } - - entry = machine_command(ECALL_HTIF_CMD_RESP, 0); + ch = sbi_console_getchar(); } } #endif @@ -275,75 +277,83 @@ riscv_cnputc(struct consdev *cp, int c) riscv_putc(c); } -/* - * Bus interface. - */ +/* Bus interface */ -struct htif_console_softc { - device_t dev; - int running; - int intr_chan; - int cmd_done; - int curtag; - int index; -}; - -static void -htif_console_intr(void *arg, uint64_t entry) +static int +rcons_intr(void *arg) { - struct htif_console_softc *sc; - uint8_t devcmd; - uint64_t data; + int c; - sc = arg; - - devcmd = HTIF_DEV_CMD(entry); - data = HTIF_DEV_DATA(entry); - - if (devcmd == 0) { - entry_last->data = data; + c = sbi_console_getchar(); + if (c > 0 && c < 0xff) { + entry_last->data = c; entry_last->used = 1; entry_last = entry_last->next; } + + csr_clear(sip, SIP_SSIP); + + return (FILTER_HANDLED); } static int -htif_console_probe(device_t dev) +rcons_probe(device_t dev) { - return (0); + if (!ofw_bus_status_okay(dev)) + return (ENXIO); + + if (!ofw_bus_is_compatible(dev, "riscv,console")) + return (ENXIO); + + device_set_desc(dev, "RISC-V console"); + return (BUS_PROBE_DEFAULT); } static int -htif_console_attach(device_t dev) +rcons_attach(device_t dev) { - struct htif_console_softc *sc; + struct rcons_softc *sc; + int error; sc = device_get_softc(dev); sc->dev = dev; - sc->index = htif_get_index(dev); - if (sc->index < 0) - return (EINVAL); + if (bus_alloc_resources(dev, rcons_spec, sc->res)) { + device_printf(dev, "could not allocate resources\n"); + return (ENXIO); + } - htif_setup_intr(sc->index, htif_console_intr, sc); + /* Setup IRQs handler */ + error = bus_setup_intr(dev, sc->res[0], INTR_TYPE_CLK, + rcons_intr, NULL, sc, &sc->ihl[0]); + if (error) { + device_printf(dev, "Unable to alloc int resource.\n"); + return (ENXIO); + } + + csr_set(sie, SIE_SSIE); + + bus_generic_attach(sc->dev); + + sbi_console_getchar(); return (0); } -static device_method_t htif_console_methods[] = { - DEVMETHOD(device_probe, htif_console_probe), - DEVMETHOD(device_attach, htif_console_attach), +static device_method_t rcons_methods[] = { + DEVMETHOD(device_probe, rcons_probe), + DEVMETHOD(device_attach, rcons_attach), + DEVMETHOD_END }; -static driver_t htif_console_driver = { - "htif_console", - htif_console_methods, - sizeof(struct htif_console_softc) +static driver_t rcons_driver = { + "rcons", + rcons_methods, + sizeof(struct rcons_softc) }; -static devclass_t htif_console_devclass; +static devclass_t rcons_devclass; -DRIVER_MODULE(htif_console, htif, htif_console_driver, - htif_console_devclass, 0, 0); +DRIVER_MODULE(rcons, simplebus, rcons_driver, rcons_devclass, 0, 0); diff --git a/sys/riscv/riscv/sbi.S b/sys/riscv/riscv/sbi.S new file mode 100644 index 000000000000..b5b2916e0f4f --- /dev/null +++ b/sys/riscv/riscv/sbi.S @@ -0,0 +1,52 @@ +/*- + * Copyright (c) 2016 Ruslan Bukin + * All rights reserved. + * + * Portions of this software were developed by SRI International and the + * University of Cambridge Computer Laboratory under DARPA/AFRL contract + * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme. + * + * Portions of this software were developed by the University of Cambridge + * Computer Laboratory as part of the CTSRD Project, with support from the + * UK Higher Education Innovation Fund (HEIF). + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +.globl sbi_hart_id; sbi_hart_id = -2048 +.globl sbi_num_harts; sbi_num_harts = -2032 +.globl sbi_query_memory; sbi_query_memory = -2016 +.globl sbi_console_putchar; sbi_console_putchar = -2000 +.globl sbi_console_getchar; sbi_console_getchar = -1984 +.globl sbi_send_ipi; sbi_send_ipi = -1952 +.globl sbi_clear_ipi; sbi_clear_ipi = -1936 +.globl sbi_timebase; sbi_timebase = -1920 +.globl sbi_shutdown; sbi_shutdown = -1904 +.globl sbi_set_timer; sbi_set_timer = -1888 +.globl sbi_mask_interrupt; sbi_mask_interrupt = -1872 +.globl sbi_unmask_interrupt; sbi_unmask_interrupt = -1856 +.globl sbi_remote_sfence_vm; sbi_remote_sfence_vm = -1840 +.globl sbi_remote_sfence_vm_range; sbi_remote_sfence_vm_range = -1824 +.globl sbi_remote_fence_i; sbi_remote_fence_i = -1808 diff --git a/sys/riscv/riscv/timer.c b/sys/riscv/riscv/timer.c index ae8ec67b6e90..0462e62b1aac 100644 --- a/sys/riscv/riscv/timer.c +++ b/sys/riscv/riscv/timer.c @@ -59,6 +59,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include @@ -135,7 +136,7 @@ riscv_tmr_start(struct eventtimer *et, sbintime_t first, sbintime_t period) cpu = PCPU_GET(cpuid); WRITE8(sc, TIMER_MTIMECMP(cpu), counts); csr_set(sie, SIE_STIE); - machine_command(ECALL_MTIMECMP, counts); + sbi_set_timer(counts); return (0); } diff --git a/sys/riscv/riscv/vm_machdep.c b/sys/riscv/riscv/vm_machdep.c index 1f6613077142..d2d4c6e3d3ec 100644 --- a/sys/riscv/riscv/vm_machdep.c +++ b/sys/riscv/riscv/vm_machdep.c @@ -53,6 +53,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include /* * Finish a fork operation, with process p2 nearly set up. @@ -106,9 +107,9 @@ void cpu_reset(void) { - printf("cpu_reset"); - while(1) - __asm volatile("wfi" ::: "memory"); + sbi_shutdown(); + + while(1); } void From 2d700cb557aa42f30d2d872e9f89e3b39469f552 Mon Sep 17 00:00:00 2001 From: Ruslan Bukin Date: Wed, 10 Aug 2016 13:32:27 +0000 Subject: [PATCH 06/76] Remove extra -msoft-float flags settings. This helps to build firmware modules. Sponsored by: DARPA, AFRL Sponsored by: HEIF5 --- share/mk/bsd.cpu.mk | 5 ----- sys/modules/dtrace/dtrace/Makefile | 5 ----- 2 files changed, 10 deletions(-) diff --git a/share/mk/bsd.cpu.mk b/share/mk/bsd.cpu.mk index a967f892846b..8e7aef3246de 100644 --- a/share/mk/bsd.cpu.mk +++ b/share/mk/bsd.cpu.mk @@ -327,11 +327,6 @@ CFLAGS += -mfloat-abi=softfp .endif .endif -.if ${MACHINE_CPUARCH} == "riscv" -CFLAGS += -msoft-float -ACFLAGS += -msoft-float -.endif - # NB: COPTFLAGS is handled in /usr/src/sys/conf/kern.pre.mk .if !defined(NO_CPU_CFLAGS) diff --git a/sys/modules/dtrace/dtrace/Makefile b/sys/modules/dtrace/dtrace/Makefile index 3c0812c5ccd7..417266c5641b 100644 --- a/sys/modules/dtrace/dtrace/Makefile +++ b/sys/modules/dtrace/dtrace/Makefile @@ -58,11 +58,6 @@ assym.o: assym.s ${AS} -meabi=5 -o assym.o assym.s .endif -.if ${MACHINE_CPUARCH} == "riscv" -assym.o: assym.s - ${AS} -msoft-float -o assym.o assym.s -.endif - .include CFLAGS+= -include ${SYSDIR}/cddl/compat/opensolaris/sys/debug_compat.h From e42f8233fc446b54af6ee62e2b48a49ac6240829 Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Wed, 10 Aug 2016 13:44:03 +0000 Subject: [PATCH 07/76] Unconditionally perform checks that FPU region was entered, when #NM exception is caught in kernel mode. There are third-party modules which trigger the issue, and since the problem causes usermode state corruption at least, panic in production kernels as well. Sponsored by: The FreeBSD Foundation MFC after: 1 week --- sys/amd64/amd64/trap.c | 4 ++-- sys/i386/i386/trap.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/sys/amd64/amd64/trap.c b/sys/amd64/amd64/trap.c index 1b85b3298daa..04c5dcc0eacd 100644 --- a/sys/amd64/amd64/trap.c +++ b/sys/amd64/amd64/trap.c @@ -443,8 +443,8 @@ trap(struct trapframe *frame) goto out; case T_DNA: - KASSERT(!PCB_USER_FPU(td->td_pcb), - ("Unregistered use of FPU in kernel")); + if (PCB_USER_FPU(td->td_pcb)) + panic("Unregistered use of FPU in kernel"); fpudna(); goto out; diff --git a/sys/i386/i386/trap.c b/sys/i386/i386/trap.c index 40f72042d27f..c540a49a00e5 100644 --- a/sys/i386/i386/trap.c +++ b/sys/i386/i386/trap.c @@ -540,8 +540,8 @@ trap(struct trapframe *frame) case T_DNA: #ifdef DEV_NPX - KASSERT(!PCB_USER_FPU(td->td_pcb), - ("Unregistered use of FPU in kernel")); + if (PCB_USER_FPU(td->td_pcb)) + panic("Unregistered use of FPU in kernel"); if (npxdna()) goto out; #endif From 49c394a9708052882cb296a2d5ed4bf95e662456 Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Wed, 10 Aug 2016 13:47:12 +0000 Subject: [PATCH 08/76] Re-schedule signals after kthread exits, since apparently there are processes which combine kernel and non-kernel threads, e.g. nfsd. For such processes, termination of a kthread must recheck signal delivery among other threads according to masks. Reported and tested by: pho Sponsored by: The FreeBSD Foundation MFC after: 1 week --- sys/kern/kern_kthread.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/sys/kern/kern_kthread.c b/sys/kern/kern_kthread.c index 72c01f0f0d40..04f3423fb958 100644 --- a/sys/kern/kern_kthread.c +++ b/sys/kern/kern_kthread.c @@ -320,11 +320,13 @@ void kthread_exit(void) { struct proc *p; + struct thread *td; - p = curthread->td_proc; + td = curthread; + p = td->td_proc; /* A module may be waiting for us to exit. */ - wakeup(curthread); + wakeup(td); /* * The last exiting thread in a kernel process must tear down @@ -337,9 +339,10 @@ kthread_exit(void) rw_wunlock(&tidhash_lock); kproc_exit(0); } - LIST_REMOVE(curthread, td_hash); + LIST_REMOVE(td, td_hash); rw_wunlock(&tidhash_lock); - umtx_thread_exit(curthread); + umtx_thread_exit(td); + tdsigcleanup(td); PROC_SLOCK(p); thread_exit(); } From 3cd77f54168a5cb4b6f65fb685360b51c5b9ae40 Mon Sep 17 00:00:00 2001 From: Ruslan Bukin Date: Wed, 10 Aug 2016 13:49:17 +0000 Subject: [PATCH 09/76] Consider CROSS_BINUTILS_PREFIX environment variable so we use correct objdump. Sponsored by: DARPA, AFRL Sponsored by: HEIF5 --- sys/tools/embed_mfs.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/tools/embed_mfs.sh b/sys/tools/embed_mfs.sh index 3f20257b7a58..66c50c2da111 100644 --- a/sys/tools/embed_mfs.sh +++ b/sys/tools/embed_mfs.sh @@ -36,7 +36,7 @@ mfs_size=`stat -f '%z' $2 2> /dev/null` # If we can't determine MFS image size - bail. [ -z ${mfs_size} ] && echo "Can't determine MFS image size" && exit 1 -sec_info=`objdump -h $1 2> /dev/null | grep " oldmfs "` +sec_info=`${CROSS_BINUTILS_PREFIX}objdump -h $1 2> /dev/null | grep " oldmfs "` # If we can't find the mfs section within the given kernel - bail. [ -z "${sec_info}" ] && echo "Can't locate mfs section within kernel" && exit 1 From 15ad3e51c530eff9d2356c48d9807ea36124322a Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Wed, 10 Aug 2016 13:50:21 +0000 Subject: [PATCH 10/76] Convert another tmpfs assert into runtime check. The offset of the directory file, passed to getdirentries(2) syscall, is user-controllable. The value of the offset must not be asserted, instead the invalid value should be checked and rejected if invalid. Reported and tested by: pho Sponsored by: The FreeBSD Foundation MFC after: 1 week --- sys/fs/tmpfs/tmpfs_subr.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/sys/fs/tmpfs/tmpfs_subr.c b/sys/fs/tmpfs/tmpfs_subr.c index 72e879205581..7ce4789c72ab 100644 --- a/sys/fs/tmpfs/tmpfs_subr.c +++ b/sys/fs/tmpfs/tmpfs_subr.c @@ -819,10 +819,13 @@ tmpfs_dir_lookup_cookie(struct tmpfs_node *node, off_t cookie, goto out; } - MPASS((cookie & TMPFS_DIRCOOKIE_MASK) == cookie); - dekey.td_hash = cookie; - /* Recover if direntry for cookie was removed */ - de = RB_NFIND(tmpfs_dir, dirhead, &dekey); + if ((cookie & TMPFS_DIRCOOKIE_MASK) != cookie) { + de = NULL; + } else { + dekey.td_hash = cookie; + /* Recover if direntry for cookie was removed */ + de = RB_NFIND(tmpfs_dir, dirhead, &dekey); + } dc->tdc_tree = de; dc->tdc_current = de; if (de != NULL && tmpfs_dirent_duphead(de)) { From 3a77833e87a68b01242cfe5f7d0002be04f1a0b0 Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Wed, 10 Aug 2016 14:41:53 +0000 Subject: [PATCH 11/76] Fix indentation. Reported by: hselasky MFC after: 17 days --- sys/kern/kern_timeout.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/kern/kern_timeout.c b/sys/kern/kern_timeout.c index e6d6d4491e64..942a78a44e03 100644 --- a/sys/kern/kern_timeout.c +++ b/sys/kern/kern_timeout.c @@ -984,7 +984,7 @@ callout_when(sbintime_t sbt, sbintime_t precision, int flags, if ((flags & C_HARDCLOCK) == 0) to_sbt += tick_sbt; } else - to_sbt = sbinuptime(); + to_sbt = sbinuptime(); if (SBT_MAX - to_sbt < sbt) to_sbt = SBT_MAX; else From 5f521d7ba72145092ea23ff6081d8791ad6c1f9d Mon Sep 17 00:00:00 2001 From: Ed Schouten Date: Wed, 10 Aug 2016 15:16:28 +0000 Subject: [PATCH 12/76] Make libcrypt thread-safe. Add crypt_r(3). glibc has a pretty nice function called crypt_r(3), which is nothing more than crypt(3), but thread-safe. It accomplishes this by introducing a 'struct crypt_data' structure that contains a buffer that is large enough to hold the resulting string. Let's go ahead and also add this function. It would be a shame if a useful function like this wouldn't be usable in multithreaded apps. Refactor crypt.c and all of the backends to no longer declare static arrays, but write their output in a provided buffer. There is no need to do any buffer length computation here, as we'll just need to ensure that 'struct crypt_data' is large enough, which it is. _PASSWORD_LEN is defined to 128 bytes, but in this case I'm picking 256, as this is going to be part of the actual ABI. Differential Revision: https://reviews.freebsd.org/D7306 --- include/unistd.h | 7 ++ lib/libcrypt/Makefile | 3 +- lib/libcrypt/crypt-md5.c | 50 ++++++------- lib/libcrypt/crypt-nthash.c | 31 +++----- lib/libcrypt/crypt-sha256.c | 86 +++++---------------- lib/libcrypt/crypt-sha512.c | 108 ++++++++------------------- lib/libcrypt/crypt.3 | 21 +++++- lib/libcrypt/crypt.c | 35 ++++++--- lib/libcrypt/crypt.h | 14 ++-- lib/libcrypt/misc.c | 4 +- secure/lib/libcrypt/crypt-blowfish.c | 42 +++++------ secure/lib/libcrypt/crypt-des.c | 59 ++++++--------- 12 files changed, 189 insertions(+), 271 deletions(-) diff --git a/include/unistd.h b/include/unistd.h index 0d20027f8d56..0fb36e50e050 100644 --- a/include/unistd.h +++ b/include/unistd.h @@ -484,11 +484,18 @@ pid_t vfork(void) __returns_twice; #if __BSD_VISIBLE struct timeval; /* select(2) */ + +struct crypt_data { + int initialized; /* For compatibility with glibc. */ + char __buf[256]; /* Buffer returned by crypt_r(). */ +}; + int acct(const char *); int async_daemon(void); int check_utility_compat(const char *); const char * crypt_get_format(void); +char *crypt_r(const char *, const char *, struct crypt_data *); int crypt_set_format(const char *); int des_cipher(const char *, char *, long, int); int des_setkey(const char *key); diff --git a/lib/libcrypt/Makefile b/lib/libcrypt/Makefile index 3b982a3a62b7..fc966f597d76 100644 --- a/lib/libcrypt/Makefile +++ b/lib/libcrypt/Makefile @@ -17,7 +17,8 @@ SRCS= crypt.c misc.c \ crypt-sha256.c sha256c.c \ crypt-sha512.c sha512c.c MAN= crypt.3 -MLINKS= crypt.3 crypt_get_format.3 crypt.3 crypt_set_format.3 +MLINKS= crypt.3 crypt_get_format.3 crypt.3 crypt_r.3 \ + crypt.3 crypt_set_format.3 CFLAGS+= -I${.CURDIR}/../libmd -I${.CURDIR}/../libutil \ -I${.CURDIR}/../../sys/crypto/sha2 diff --git a/lib/libcrypt/crypt-md5.c b/lib/libcrypt/crypt-md5.c index 33186cd4de95..55986750e616 100644 --- a/lib/libcrypt/crypt-md5.c +++ b/lib/libcrypt/crypt-md5.c @@ -41,31 +41,27 @@ __FBSDID("$FreeBSD$"); * UNIX password */ -char * -crypt_md5(const char *pw, const char *salt) +int +crypt_md5(const char *pw, const char *salt, char *buffer) { MD5_CTX ctx,ctx1; unsigned long l; int sl, pl; u_int i; u_char final[MD5_SIZE]; - static const char *sp, *ep; - static char passwd[120], *p; + const char *ep; static const char *magic = "$1$"; - /* Refine the Salt first */ - sp = salt; - - /* If it starts with the magic string, then skip that */ - if(!strncmp(sp, magic, strlen(magic))) - sp += strlen(magic); + /* If the salt starts with the magic string, skip that. */ + if (!strncmp(salt, magic, strlen(magic))) + salt += strlen(magic); /* It stops at the first '$', max 8 chars */ - for(ep = sp; *ep && *ep != '$' && ep < (sp + 8); ep++) + for (ep = salt; *ep && *ep != '$' && ep < salt + 8; ep++) continue; /* get the length of the true salt */ - sl = ep - sp; + sl = ep - salt; MD5Init(&ctx); @@ -76,12 +72,12 @@ crypt_md5(const char *pw, const char *salt) MD5Update(&ctx, (const u_char *)magic, strlen(magic)); /* Then the raw salt */ - MD5Update(&ctx, (const u_char *)sp, (u_int)sl); + MD5Update(&ctx, (const u_char *)salt, (u_int)sl); /* Then just as many characters of the MD5(pw,salt,pw) */ MD5Init(&ctx1); MD5Update(&ctx1, (const u_char *)pw, strlen(pw)); - MD5Update(&ctx1, (const u_char *)sp, (u_int)sl); + MD5Update(&ctx1, (const u_char *)salt, (u_int)sl); MD5Update(&ctx1, (const u_char *)pw, strlen(pw)); MD5Final(final, &ctx1); for(pl = (int)strlen(pw); pl > 0; pl -= MD5_SIZE) @@ -99,9 +95,9 @@ crypt_md5(const char *pw, const char *salt) MD5Update(&ctx, (const u_char *)pw, 1); /* Now make the output string */ - strcpy(passwd, magic); - strncat(passwd, sp, (u_int)sl); - strcat(passwd, "$"); + buffer = stpcpy(buffer, magic); + buffer = stpncpy(buffer, salt, (u_int)sl); + *buffer++ = '$'; MD5Final(final, &ctx); @@ -118,7 +114,7 @@ crypt_md5(const char *pw, const char *salt) MD5Update(&ctx1, (const u_char *)final, MD5_SIZE); if(i % 3) - MD5Update(&ctx1, (const u_char *)sp, (u_int)sl); + MD5Update(&ctx1, (const u_char *)salt, (u_int)sl); if(i % 7) MD5Update(&ctx1, (const u_char *)pw, strlen(pw)); @@ -130,24 +126,22 @@ crypt_md5(const char *pw, const char *salt) MD5Final(final, &ctx1); } - p = passwd + strlen(passwd); - l = (final[ 0]<<16) | (final[ 6]<<8) | final[12]; - _crypt_to64(p, l, 4); p += 4; + _crypt_to64(buffer, l, 4); buffer += 4; l = (final[ 1]<<16) | (final[ 7]<<8) | final[13]; - _crypt_to64(p, l, 4); p += 4; + _crypt_to64(buffer, l, 4); buffer += 4; l = (final[ 2]<<16) | (final[ 8]<<8) | final[14]; - _crypt_to64(p, l, 4); p += 4; + _crypt_to64(buffer, l, 4); buffer += 4; l = (final[ 3]<<16) | (final[ 9]<<8) | final[15]; - _crypt_to64(p, l, 4); p += 4; + _crypt_to64(buffer, l, 4); buffer += 4; l = (final[ 4]<<16) | (final[10]<<8) | final[ 5]; - _crypt_to64(p, l, 4); p += 4; + _crypt_to64(buffer, l, 4); buffer += 4; l = final[11]; - _crypt_to64(p, l, 2); p += 2; - *p = '\0'; + _crypt_to64(buffer, l, 2); buffer += 2; + *buffer = '\0'; /* Don't leave anything around in vm they could use. */ memset(final, 0, sizeof(final)); - return (passwd); + return (0); } diff --git a/lib/libcrypt/crypt-nthash.c b/lib/libcrypt/crypt-nthash.c index 19b84ceb831d..b637eb6b5fa3 100644 --- a/lib/libcrypt/crypt-nthash.c +++ b/lib/libcrypt/crypt-nthash.c @@ -46,16 +46,14 @@ __FBSDID("$FreeBSD$"); */ /* ARGSUSED */ -char * -crypt_nthash(const char *pw, const char *salt __unused) +int +crypt_nthash(const char *pw, const char *salt __unused, char *buffer) { size_t unipwLen; - int i, j; - static char hexconvtab[] = "0123456789abcdef"; + int i; + static const char hexconvtab[] = "0123456789abcdef"; static const char *magic = "$3$"; - static char passwd[120]; u_int16_t unipw[128]; - char final[MD4_SIZE*2 + 1]; u_char hash[MD4_SIZE]; const char *s; MD4_CTX ctx; @@ -70,19 +68,14 @@ crypt_nthash(const char *pw, const char *salt __unused) MD4Init(&ctx); MD4Update(&ctx, (u_char *)unipw, unipwLen*sizeof(u_int16_t)); MD4Final(hash, &ctx); - - for (i = j = 0; i < MD4_SIZE; i++) { - final[j++] = hexconvtab[hash[i] >> 4]; - final[j++] = hexconvtab[hash[i] & 15]; + + buffer = stpcpy(buffer, magic); + *buffer++ = '$'; + for (i = 0; i < MD4_SIZE; i++) { + *buffer++ = hexconvtab[hash[i] >> 4]; + *buffer++ = hexconvtab[hash[i] & 15]; } - final[j] = '\0'; + *buffer = '\0'; - strcpy(passwd, magic); - strcat(passwd, "$"); - strncat(passwd, final, MD4_SIZE*2); - - /* Don't leave anything around in vm they could use. */ - memset(final, 0, sizeof(final)); - - return (passwd); + return (0); } diff --git a/lib/libcrypt/crypt-sha256.c b/lib/libcrypt/crypt-sha256.c index cab7405bfd60..0cfef105734e 100644 --- a/lib/libcrypt/crypt-sha256.c +++ b/lib/libcrypt/crypt-sha256.c @@ -59,11 +59,10 @@ static const char sha256_rounds_prefix[] = "rounds="; /* Maximum number of rounds. */ #define ROUNDS_MAX 999999999 -static char * -crypt_sha256_r(const char *key, const char *salt, char *buffer, int buflen) +int +crypt_sha256(const char *key, const char *salt, char *buffer) { u_long srounds; - int n; uint8_t alt_result[32], temp_result[32]; SHA256_CTX ctx, alt_ctx; size_t salt_len, key_len, cnt, rounds; @@ -210,42 +209,27 @@ crypt_sha256_r(const char *key, const char *salt, char *buffer, int buflen) /* Now we can construct the result string. It consists of three * parts. */ - cp = stpncpy(buffer, sha256_salt_prefix, MAX(0, buflen)); - buflen -= sizeof(sha256_salt_prefix) - 1; + cp = stpcpy(buffer, sha256_salt_prefix); - if (rounds_custom) { - n = snprintf(cp, MAX(0, buflen), "%s%zu$", - sha256_rounds_prefix, rounds); + if (rounds_custom) + cp += sprintf(cp, "%s%zu$", sha256_rounds_prefix, rounds); - cp += n; - buflen -= n; - } + cp = stpncpy(cp, salt, salt_len); - cp = stpncpy(cp, salt, MIN((size_t)MAX(0, buflen), salt_len)); - buflen -= MIN((size_t)MAX(0, buflen), salt_len); + *cp++ = '$'; - if (buflen > 0) { - *cp++ = '$'; - --buflen; - } - - b64_from_24bit(alt_result[0], alt_result[10], alt_result[20], 4, &buflen, &cp); - b64_from_24bit(alt_result[21], alt_result[1], alt_result[11], 4, &buflen, &cp); - b64_from_24bit(alt_result[12], alt_result[22], alt_result[2], 4, &buflen, &cp); - b64_from_24bit(alt_result[3], alt_result[13], alt_result[23], 4, &buflen, &cp); - b64_from_24bit(alt_result[24], alt_result[4], alt_result[14], 4, &buflen, &cp); - b64_from_24bit(alt_result[15], alt_result[25], alt_result[5], 4, &buflen, &cp); - b64_from_24bit(alt_result[6], alt_result[16], alt_result[26], 4, &buflen, &cp); - b64_from_24bit(alt_result[27], alt_result[7], alt_result[17], 4, &buflen, &cp); - b64_from_24bit(alt_result[18], alt_result[28], alt_result[8], 4, &buflen, &cp); - b64_from_24bit(alt_result[9], alt_result[19], alt_result[29], 4, &buflen, &cp); - b64_from_24bit(0, alt_result[31], alt_result[30], 3, &buflen, &cp); - if (buflen <= 0) { - errno = ERANGE; - buffer = NULL; - } - else - *cp = '\0'; /* Terminate the string. */ + b64_from_24bit(alt_result[0], alt_result[10], alt_result[20], 4, &cp); + b64_from_24bit(alt_result[21], alt_result[1], alt_result[11], 4, &cp); + b64_from_24bit(alt_result[12], alt_result[22], alt_result[2], 4, &cp); + b64_from_24bit(alt_result[3], alt_result[13], alt_result[23], 4, &cp); + b64_from_24bit(alt_result[24], alt_result[4], alt_result[14], 4, &cp); + b64_from_24bit(alt_result[15], alt_result[25], alt_result[5], 4, &cp); + b64_from_24bit(alt_result[6], alt_result[16], alt_result[26], 4, &cp); + b64_from_24bit(alt_result[27], alt_result[7], alt_result[17], 4, &cp); + b64_from_24bit(alt_result[18], alt_result[28], alt_result[8], 4, &cp); + b64_from_24bit(alt_result[9], alt_result[19], alt_result[29], 4, &cp); + b64_from_24bit(0, alt_result[31], alt_result[30], 3, &cp); + *cp = '\0'; /* Terminate the string. */ /* Clear the buffer for the intermediate result so that people * attaching to processes or reading core dumps cannot get any @@ -263,37 +247,7 @@ crypt_sha256_r(const char *key, const char *salt, char *buffer, int buflen) if (copied_salt != NULL) memset(copied_salt, '\0', salt_len); - return buffer; -} - -/* This entry point is equivalent to crypt(3). */ -char * -crypt_sha256(const char *key, const char *salt) -{ - /* We don't want to have an arbitrary limit in the size of the - * password. We can compute an upper bound for the size of the - * result in advance and so we can prepare the buffer we pass to - * `crypt_sha256_r'. */ - static char *buffer; - static int buflen; - int needed; - char *new_buffer; - - needed = (sizeof(sha256_salt_prefix) - 1 - + sizeof(sha256_rounds_prefix) + 9 + 1 - + strlen(salt) + 1 + 43 + 1); - - if (buflen < needed) { - new_buffer = (char *)realloc(buffer, needed); - - if (new_buffer == NULL) - return NULL; - - buffer = new_buffer; - buflen = needed; - } - - return crypt_sha256_r(key, salt, buffer, buflen); + return (0); } #ifdef TEST diff --git a/lib/libcrypt/crypt-sha512.c b/lib/libcrypt/crypt-sha512.c index 8e0054fb5a89..2e7cd4ec580e 100644 --- a/lib/libcrypt/crypt-sha512.c +++ b/lib/libcrypt/crypt-sha512.c @@ -59,11 +59,10 @@ static const char sha512_rounds_prefix[] = "rounds="; /* Maximum number of rounds. */ #define ROUNDS_MAX 999999999 -static char * -crypt_sha512_r(const char *key, const char *salt, char *buffer, int buflen) +int +crypt_sha512(const char *key, const char *salt, char *buffer) { u_long srounds; - int n; uint8_t alt_result[64], temp_result[64]; SHA512_CTX ctx, alt_ctx; size_t salt_len, key_len, cnt, rounds; @@ -210,54 +209,39 @@ crypt_sha512_r(const char *key, const char *salt, char *buffer, int buflen) /* Now we can construct the result string. It consists of three * parts. */ - cp = stpncpy(buffer, sha512_salt_prefix, MAX(0, buflen)); - buflen -= sizeof(sha512_salt_prefix) - 1; + cp = stpcpy(buffer, sha512_salt_prefix); - if (rounds_custom) { - n = snprintf(cp, MAX(0, buflen), "%s%zu$", - sha512_rounds_prefix, rounds); + if (rounds_custom) + cp += sprintf(cp, "%s%zu$", sha512_rounds_prefix, rounds); - cp += n; - buflen -= n; - } + cp = stpncpy(cp, salt, salt_len); - cp = stpncpy(cp, salt, MIN((size_t)MAX(0, buflen), salt_len)); - buflen -= MIN((size_t)MAX(0, buflen), salt_len); + *cp++ = '$'; - if (buflen > 0) { - *cp++ = '$'; - --buflen; - } + b64_from_24bit(alt_result[0], alt_result[21], alt_result[42], 4, &cp); + b64_from_24bit(alt_result[22], alt_result[43], alt_result[1], 4, &cp); + b64_from_24bit(alt_result[44], alt_result[2], alt_result[23], 4, &cp); + b64_from_24bit(alt_result[3], alt_result[24], alt_result[45], 4, &cp); + b64_from_24bit(alt_result[25], alt_result[46], alt_result[4], 4, &cp); + b64_from_24bit(alt_result[47], alt_result[5], alt_result[26], 4, &cp); + b64_from_24bit(alt_result[6], alt_result[27], alt_result[48], 4, &cp); + b64_from_24bit(alt_result[28], alt_result[49], alt_result[7], 4, &cp); + b64_from_24bit(alt_result[50], alt_result[8], alt_result[29], 4, &cp); + b64_from_24bit(alt_result[9], alt_result[30], alt_result[51], 4, &cp); + b64_from_24bit(alt_result[31], alt_result[52], alt_result[10], 4, &cp); + b64_from_24bit(alt_result[53], alt_result[11], alt_result[32], 4, &cp); + b64_from_24bit(alt_result[12], alt_result[33], alt_result[54], 4, &cp); + b64_from_24bit(alt_result[34], alt_result[55], alt_result[13], 4, &cp); + b64_from_24bit(alt_result[56], alt_result[14], alt_result[35], 4, &cp); + b64_from_24bit(alt_result[15], alt_result[36], alt_result[57], 4, &cp); + b64_from_24bit(alt_result[37], alt_result[58], alt_result[16], 4, &cp); + b64_from_24bit(alt_result[59], alt_result[17], alt_result[38], 4, &cp); + b64_from_24bit(alt_result[18], alt_result[39], alt_result[60], 4, &cp); + b64_from_24bit(alt_result[40], alt_result[61], alt_result[19], 4, &cp); + b64_from_24bit(alt_result[62], alt_result[20], alt_result[41], 4, &cp); + b64_from_24bit(0, 0, alt_result[63], 2, &cp); - b64_from_24bit(alt_result[0], alt_result[21], alt_result[42], 4, &buflen, &cp); - b64_from_24bit(alt_result[22], alt_result[43], alt_result[1], 4, &buflen, &cp); - b64_from_24bit(alt_result[44], alt_result[2], alt_result[23], 4, &buflen, &cp); - b64_from_24bit(alt_result[3], alt_result[24], alt_result[45], 4, &buflen, &cp); - b64_from_24bit(alt_result[25], alt_result[46], alt_result[4], 4, &buflen, &cp); - b64_from_24bit(alt_result[47], alt_result[5], alt_result[26], 4, &buflen, &cp); - b64_from_24bit(alt_result[6], alt_result[27], alt_result[48], 4, &buflen, &cp); - b64_from_24bit(alt_result[28], alt_result[49], alt_result[7], 4, &buflen, &cp); - b64_from_24bit(alt_result[50], alt_result[8], alt_result[29], 4, &buflen, &cp); - b64_from_24bit(alt_result[9], alt_result[30], alt_result[51], 4, &buflen, &cp); - b64_from_24bit(alt_result[31], alt_result[52], alt_result[10], 4, &buflen, &cp); - b64_from_24bit(alt_result[53], alt_result[11], alt_result[32], 4, &buflen, &cp); - b64_from_24bit(alt_result[12], alt_result[33], alt_result[54], 4, &buflen, &cp); - b64_from_24bit(alt_result[34], alt_result[55], alt_result[13], 4, &buflen, &cp); - b64_from_24bit(alt_result[56], alt_result[14], alt_result[35], 4, &buflen, &cp); - b64_from_24bit(alt_result[15], alt_result[36], alt_result[57], 4, &buflen, &cp); - b64_from_24bit(alt_result[37], alt_result[58], alt_result[16], 4, &buflen, &cp); - b64_from_24bit(alt_result[59], alt_result[17], alt_result[38], 4, &buflen, &cp); - b64_from_24bit(alt_result[18], alt_result[39], alt_result[60], 4, &buflen, &cp); - b64_from_24bit(alt_result[40], alt_result[61], alt_result[19], 4, &buflen, &cp); - b64_from_24bit(alt_result[62], alt_result[20], alt_result[41], 4, &buflen, &cp); - b64_from_24bit(0, 0, alt_result[63], 2, &buflen, &cp); - - if (buflen <= 0) { - errno = ERANGE; - buffer = NULL; - } - else - *cp = '\0'; /* Terminate the string. */ + *cp = '\0'; /* Terminate the string. */ /* Clear the buffer for the intermediate result so that people * attaching to processes or reading core dumps cannot get any @@ -275,37 +259,7 @@ crypt_sha512_r(const char *key, const char *salt, char *buffer, int buflen) if (copied_salt != NULL) memset(copied_salt, '\0', salt_len); - return buffer; -} - -/* This entry point is equivalent to crypt(3). */ -char * -crypt_sha512(const char *key, const char *salt) -{ - /* We don't want to have an arbitrary limit in the size of the - * password. We can compute an upper bound for the size of the - * result in advance and so we can prepare the buffer we pass to - * `crypt_sha512_r'. */ - static char *buffer; - static int buflen; - int needed; - char *new_buffer; - - needed = (sizeof(sha512_salt_prefix) - 1 - + sizeof(sha512_rounds_prefix) + 9 + 1 - + strlen(salt) + 1 + 86 + 1); - - if (buflen < needed) { - new_buffer = (char *)realloc(buffer, needed); - - if (new_buffer == NULL) - return NULL; - - buffer = new_buffer; - buflen = needed; - } - - return crypt_sha512_r(key, salt, buffer, buflen); + return (0); } #ifdef TEST diff --git a/lib/libcrypt/crypt.3 b/lib/libcrypt/crypt.3 index 828c37f51d63..f16dfd5dd053 100644 --- a/lib/libcrypt/crypt.3 +++ b/lib/libcrypt/crypt.3 @@ -29,7 +29,7 @@ .\" .\" $FreeBSD$ .\" -.Dd March 9, 2014 +.Dd August 10, 2016 .Dt CRYPT 3 .Os .Sh NAME @@ -41,6 +41,8 @@ .In unistd.h .Ft char * .Fn crypt "const char *key" "const char *salt" +.Ft char * +.Fn crypt_r "const char *key" "const char *salt" "struct crypt_data *data" .Ft const char * .Fn crypt_get_format "void" .Ft int @@ -246,10 +248,20 @@ The .Fn crypt_set_format function sets the default encoding format according to the supplied .Fa string . +.Pp +The +.Fn crypt_r +function behaves identically to +.Fn crypt , +except that the resulting string is stored in +.Fa data , +making it thread-safe. .Sh RETURN VALUES The .Fn crypt -function returns a pointer to the encrypted value on success, and NULL on +and +.Fn crypt_r +functions return a pointer to the encrypted value on success, and NULL on failure. Note: this is not a standard behaviour, AT&T .Fn crypt @@ -280,6 +292,11 @@ section of the code (FreeSec 1.0) was developed outside the United States of America as an unencumbered replacement for the U.S.-only .Nx libcrypt encryption library. +.Pp +The +.Fn crypt_r +function was added in +.Fx 12.0 . .Sh AUTHORS .An -nosplit Originally written by diff --git a/lib/libcrypt/crypt.c b/lib/libcrypt/crypt.c index 623809e5afce..54dc33b54977 100644 --- a/lib/libcrypt/crypt.c +++ b/lib/libcrypt/crypt.c @@ -46,9 +46,9 @@ __FBSDID("$FreeBSD$"); * and it needs to be the default for backward compatibility. */ static const struct crypt_format { - const char *const name; - char *(*const func)(const char *, const char *); - const char *const magic; + const char *name; + int (*func)(const char *, const char *, char *); + const char *magic; } crypt_formats[] = { { "md5", crypt_md5, "$1$" }, #ifdef HAS_BLOWFISH @@ -104,20 +104,37 @@ crypt_set_format(const char *format) * otherwise, the currently selected format is used. */ char * -crypt(const char *passwd, const char *salt) +crypt_r(const char *passwd, const char *salt, struct crypt_data *data) { const struct crypt_format *cf; + int (*func)(const char *, const char *, char *); #ifdef HAS_DES int len; #endif for (cf = crypt_formats; cf->name != NULL; ++cf) - if (cf->magic != NULL && strstr(salt, cf->magic) == salt) - return (cf->func(passwd, salt)); + if (cf->magic != NULL && strstr(salt, cf->magic) == salt) { + func = cf->func; + goto match; + } #ifdef HAS_DES len = strlen(salt); - if ((len == 13 || len == 2) && strspn(salt, DES_SALT_ALPHABET) == len) - return (crypt_des(passwd, salt)); + if ((len == 13 || len == 2) && strspn(salt, DES_SALT_ALPHABET) == len) { + func = crypt_des; + goto match; + } #endif - return (crypt_format->func(passwd, salt)); + func = crypt_format->func; +match: + if (func(passwd, salt, data->__buf) != 0) + return (NULL); + return (data->__buf); +} + +char * +crypt(const char *passwd, const char *salt) +{ + static struct crypt_data data; + + return (crypt_r(passwd, salt, &data)); } diff --git a/lib/libcrypt/crypt.h b/lib/libcrypt/crypt.h index b33ad0943e29..81b0e0324efd 100644 --- a/lib/libcrypt/crypt.h +++ b/lib/libcrypt/crypt.h @@ -32,12 +32,12 @@ #define MD4_SIZE 16 #define MD5_SIZE 16 -char *crypt_des(const char *pw, const char *salt); -char *crypt_md5(const char *pw, const char *salt); -char *crypt_nthash(const char *pw, const char *salt); -char *crypt_blowfish(const char *pw, const char *salt); -char *crypt_sha256 (const char *pw, const char *salt); -char *crypt_sha512 (const char *pw, const char *salt); +int crypt_des(const char *pw, const char *salt, char *buf); +int crypt_md5(const char *pw, const char *salt, char *buf); +int crypt_nthash(const char *pw, const char *salt, char *buf); +int crypt_blowfish(const char *pw, const char *salt, char *buf); +int crypt_sha256 (const char *pw, const char *salt, char *buf); +int crypt_sha512 (const char *pw, const char *salt, char *buf); extern void _crypt_to64(char *s, u_long v, int n); -extern void b64_from_24bit(uint8_t B2, uint8_t B1, uint8_t B0, int n, int *buflen, char **cp); +extern void b64_from_24bit(uint8_t B2, uint8_t B1, uint8_t B0, int n, char **cp); diff --git a/lib/libcrypt/misc.c b/lib/libcrypt/misc.c index 0f63ce04214c..2202ffb652cd 100644 --- a/lib/libcrypt/misc.c +++ b/lib/libcrypt/misc.c @@ -47,7 +47,7 @@ _crypt_to64(char *s, u_long v, int n) } void -b64_from_24bit(uint8_t B2, uint8_t B1, uint8_t B0, int n, int *buflen, char **cp) +b64_from_24bit(uint8_t B2, uint8_t B1, uint8_t B0, int n, char **cp) { uint32_t w; int i; @@ -56,8 +56,6 @@ b64_from_24bit(uint8_t B2, uint8_t B1, uint8_t B0, int n, int *buflen, char **cp for (i = 0; i < n; i++) { **cp = itoa64[w&0x3f]; (*cp)++; - if ((*buflen)-- < 0) - break; w >>= 6; } } diff --git a/secure/lib/libcrypt/crypt-blowfish.c b/secure/lib/libcrypt/crypt-blowfish.c index acd9057b6e85..eb2d3456b25c 100644 --- a/secure/lib/libcrypt/crypt-blowfish.c +++ b/secure/lib/libcrypt/crypt-blowfish.c @@ -75,8 +75,6 @@ __FBSDID("$FreeBSD$"); static void encode_base64(u_int8_t *, u_int8_t *, u_int16_t); static void decode_base64(u_int8_t *, u_int16_t, const u_int8_t *); -static char encrypted[_PASSWORD_LEN]; - const static u_int8_t Base64Code[] = "./ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"; @@ -135,8 +133,8 @@ decode_base64(u_int8_t *buffer, u_int16_t len, const u_int8_t *data) /* We handle $Vers$log2(NumRounds)$salt+passwd$ i.e. $2$04$iwouldntknowwhattosayetKdJ6iFtacBqJdKe6aW7ou */ -char * -crypt_blowfish(const char *key, const char *salt) +int +crypt_blowfish(const char *key, const char *salt, char *buffer) { blf_ctx state; u_int32_t rounds, i, k; @@ -157,10 +155,8 @@ crypt_blowfish(const char *key, const char *salt) /* Discard "$" identifier */ salt++; - if (*salt > BCRYPT_VERSION) { - /* How do I handle errors ? Return NULL */ - return NULL; - } + if (*salt > BCRYPT_VERSION) + return (-1); /* Check for minor versions */ if (salt[1] != '$') { @@ -174,7 +170,7 @@ crypt_blowfish(const char *key, const char *salt) salt++; break; default: - return NULL; + return (-1); } } else minr = 0; @@ -184,15 +180,15 @@ crypt_blowfish(const char *key, const char *salt) if (salt[2] != '$') /* Out of sync with passwd entry */ - return NULL; + return (-1); memcpy(arounds, salt, sizeof(arounds)); if (arounds[sizeof(arounds) - 1] != '$') - return NULL; + return (-1); arounds[sizeof(arounds) - 1] = 0; logr = strtonum(arounds, BCRYPT_MINLOGROUNDS, 31, NULL); if (logr == 0) - return NULL; + return (-1); /* Computer power doesn't increase linearly, 2^x should be fine */ rounds = 1U << logr; @@ -201,7 +197,7 @@ crypt_blowfish(const char *key, const char *salt) } if (strlen(salt) * 3 / 4 < BCRYPT_MAXSALT) - return NULL; + return (-1); /* We dont want the base64 salt but the raw data */ decode_base64(csalt, BCRYPT_MAXSALT, (const u_int8_t *) salt); @@ -248,23 +244,23 @@ crypt_blowfish(const char *key, const char *salt) } - i = 0; - encrypted[i++] = '$'; - encrypted[i++] = BCRYPT_VERSION; + *buffer++ = '$'; + *buffer++ = BCRYPT_VERSION; if (minr) - encrypted[i++] = minr; - encrypted[i++] = '$'; + *buffer++ = minr; + *buffer++ = '$'; - snprintf(encrypted + i, 4, "%2.2u$", logr); + snprintf(buffer, 4, "%2.2u$", logr); + buffer += 3; - encode_base64((u_int8_t *) encrypted + i + 3, csalt, BCRYPT_MAXSALT); - encode_base64((u_int8_t *) encrypted + strlen(encrypted), ciphertext, - 4 * BCRYPT_BLOCKS - 1); + encode_base64((u_int8_t *)buffer, csalt, BCRYPT_MAXSALT); + buffer += strlen(buffer); + encode_base64((u_int8_t *)buffer, ciphertext, 4 * BCRYPT_BLOCKS - 1); memset(&state, 0, sizeof(state)); memset(ciphertext, 0, sizeof(ciphertext)); memset(csalt, 0, sizeof(csalt)); memset(cdata, 0, sizeof(cdata)); - return encrypted; + return (0); } static void diff --git a/secure/lib/libcrypt/crypt-des.c b/secure/lib/libcrypt/crypt-des.c index 6bb9bc03c76f..4601e46fe31b 100644 --- a/secure/lib/libcrypt/crypt-des.c +++ b/secure/lib/libcrypt/crypt-des.c @@ -588,13 +588,12 @@ des_cipher(const char *in, char *out, u_long salt, int count) return(retval); } -char * -crypt_des(const char *key, const char *setting) +int +crypt_des(const char *key, const char *setting, char *buffer) { int i; u_int32_t count, salt, l, r0, r1, keybuf[2]; - u_char *p, *q; - static char output[21]; + u_char *q; if (!des_initialised) des_init(); @@ -610,7 +609,7 @@ crypt_des(const char *key, const char *setting) key++; } if (des_setkey((char *)keybuf)) - return(NULL); + return (-1); if (*setting == _PASSWORD_EFMT1) { /* @@ -629,7 +628,7 @@ crypt_des(const char *key, const char *setting) * Encrypt the key with itself. */ if (des_cipher((char *)keybuf, (char *)keybuf, 0L, 1)) - return(NULL); + return (-1); /* * And XOR with the next 8 characters of the key. */ @@ -638,19 +637,9 @@ crypt_des(const char *key, const char *setting) *q++ ^= *key++ << 1; if (des_setkey((char *)keybuf)) - return(NULL); + return (-1); } - strncpy(output, setting, 9); - - /* - * Double check that we weren't given a short setting. - * If we were, the above code will probably have created - * wierd values for count and salt, but we don't really care. - * Just make sure the output string doesn't have an extra - * NUL in it. - */ - output[9] = '\0'; - p = (u_char *)output + strlen(output); + buffer = stpncpy(buffer, setting, 9); } else { /* * "old"-style: @@ -662,43 +651,41 @@ crypt_des(const char *key, const char *setting) salt = (ascii_to_bin(setting[1]) << 6) | ascii_to_bin(setting[0]); - output[0] = setting[0]; + *buffer++ = setting[0]; /* * If the encrypted password that the salt was extracted from * is only 1 character long, the salt will be corrupted. We * need to ensure that the output string doesn't have an extra * NUL in it! */ - output[1] = setting[1] ? setting[1] : output[0]; - - p = (u_char *)output + 2; + *buffer++ = setting[1] ? setting[1] : setting[0]; } setup_salt(salt); /* * Do it. */ if (do_des(0L, 0L, &r0, &r1, (int)count)) - return(NULL); + return (-1); /* * Now encode the result... */ l = (r0 >> 8); - *p++ = ascii64[(l >> 18) & 0x3f]; - *p++ = ascii64[(l >> 12) & 0x3f]; - *p++ = ascii64[(l >> 6) & 0x3f]; - *p++ = ascii64[l & 0x3f]; + *buffer++ = ascii64[(l >> 18) & 0x3f]; + *buffer++ = ascii64[(l >> 12) & 0x3f]; + *buffer++ = ascii64[(l >> 6) & 0x3f]; + *buffer++ = ascii64[l & 0x3f]; l = (r0 << 16) | ((r1 >> 16) & 0xffff); - *p++ = ascii64[(l >> 18) & 0x3f]; - *p++ = ascii64[(l >> 12) & 0x3f]; - *p++ = ascii64[(l >> 6) & 0x3f]; - *p++ = ascii64[l & 0x3f]; + *buffer++ = ascii64[(l >> 18) & 0x3f]; + *buffer++ = ascii64[(l >> 12) & 0x3f]; + *buffer++ = ascii64[(l >> 6) & 0x3f]; + *buffer++ = ascii64[l & 0x3f]; l = r1 << 2; - *p++ = ascii64[(l >> 12) & 0x3f]; - *p++ = ascii64[(l >> 6) & 0x3f]; - *p++ = ascii64[l & 0x3f]; - *p = 0; + *buffer++ = ascii64[(l >> 12) & 0x3f]; + *buffer++ = ascii64[(l >> 6) & 0x3f]; + *buffer++ = ascii64[l & 0x3f]; + *buffer = '\0'; - return(output); + return (0); } From 382172be686cf55d308e38daef0bb219fb9abe10 Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Wed, 10 Aug 2016 15:24:15 +0000 Subject: [PATCH 13/76] sigio: do a lockless check in funsetownlist There is no need to grab the lock first to see if sigio is used, and it typically is not. --- sys/kern/kern_descrip.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c index 1005830df416..f3959acfbd5c 100644 --- a/sys/kern/kern_descrip.c +++ b/sys/kern/kern_descrip.c @@ -942,6 +942,8 @@ funsetown(struct sigio **sigiop) { struct sigio *sigio; + if (*sigiop == NULL) + return; SIGIO_LOCK(); sigio = *sigiop; if (sigio == NULL) { From 7c34b35b5725b67833d32336902348a627bebc6b Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Wed, 10 Aug 2016 15:25:44 +0000 Subject: [PATCH 14/76] ktrace: do a lockless check on fork to see if tracing is enabled This saves 2 lock acquisitions in the common case. --- sys/kern/kern_ktrace.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/sys/kern/kern_ktrace.c b/sys/kern/kern_ktrace.c index 0e30fc9cd9c8..676cd438420b 100644 --- a/sys/kern/kern_ktrace.c +++ b/sys/kern/kern_ktrace.c @@ -572,9 +572,14 @@ void ktrprocfork(struct proc *p1, struct proc *p2) { + MPASS(p2->p_tracevp == NULL); + MPASS(p2->p_traceflag == 0); + + if (p1->p_traceflag == 0) + return; + PROC_LOCK(p1); mtx_lock(&ktrace_mtx); - KASSERT(p2->p_tracevp == NULL, ("new process has a ktrace vnode")); if (p1->p_traceflag & KTRFAC_INHERIT) { p2->p_traceflag = p1->p_traceflag; if ((p2->p_tracevp = p1->p_tracevp) != NULL) { From 66bd4c2eeb2e15f721ff01f0e4a658ae3198174e Mon Sep 17 00:00:00 2001 From: Ed Schouten Date: Wed, 10 Aug 2016 15:45:25 +0000 Subject: [PATCH 15/76] Make cpu_set_user_tls() work when called on the running thread. On all the other architectures, this function can also be called on the currently running thread. In this case, we shouldn't fix up the address in the PCB, but also patch up the register itself. Otherwise it will not become active and will simply become overwritten by the next switch. Reviewed by: imp MFC after: 1 week Differential Revision: https://reviews.freebsd.org/D7437 --- sys/arm64/arm64/vm_machdep.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sys/arm64/arm64/vm_machdep.c b/sys/arm64/arm64/vm_machdep.c index 1de942487f47..1f8466d29d96 100644 --- a/sys/arm64/arm64/vm_machdep.c +++ b/sys/arm64/arm64/vm_machdep.c @@ -201,6 +201,8 @@ cpu_set_user_tls(struct thread *td, void *tls_base) pcb = td->td_pcb; pcb->pcb_tpidr_el0 = (register_t)tls_base; + if (td == curthread) + WRITE_SPECIALREG(tpidr_el0, tls_base); return (0); } From 411455a8fbcfbb51e60fb5f5d4a02d6e76c0a88c Mon Sep 17 00:00:00 2001 From: Edward Tomasz Napierala Date: Wed, 10 Aug 2016 16:12:31 +0000 Subject: [PATCH 16/76] Replace all remaining calls to vprint(9) with vn_printf(9), and remove the old macro. MFC after: 1 month --- sys/fs/smbfs/smbfs_node.c | 2 +- sys/fs/unionfs/union_vnops.c | 4 ++-- sys/kern/vfs_default.c | 4 ++-- sys/kern/vfs_lookup.c | 4 ++-- sys/kern/vfs_mount.c | 2 +- sys/kern/vfs_subr.c | 6 +++--- sys/sys/vnode.h | 1 - sys/ufs/ffs/ffs_snapshot.c | 2 +- sys/ufs/ffs/ffs_vnops.c | 2 +- sys/ufs/ufs/ufs_lookup.c | 2 +- sys/ufs/ufs/ufs_quota.c | 2 +- sys/vm/vm_object.c | 2 +- 12 files changed, 16 insertions(+), 17 deletions(-) diff --git a/sys/fs/smbfs/smbfs_node.c b/sys/fs/smbfs/smbfs_node.c index 05d19e98ef97..bf4223336f59 100644 --- a/sys/fs/smbfs/smbfs_node.c +++ b/sys/fs/smbfs/smbfs_node.c @@ -132,7 +132,7 @@ smbfs_node_alloc(struct mount *mp, struct vnode *dvp, const char *dirnm, } dnp = dvp ? VTOSMB(dvp) : NULL; if (dnp == NULL && dvp != NULL) { - vprint("smbfs_node_alloc: dead parent vnode", dvp); + vn_printf(dvp, "smbfs_node_alloc: dead parent vnode "); return EINVAL; } error = vfs_hash_get(mp, smbfs_hash(name, nmlen), LK_EXCLUSIVE, td, diff --git a/sys/fs/unionfs/union_vnops.c b/sys/fs/unionfs/union_vnops.c index 6b60dbd47fb5..16cc438a2205 100644 --- a/sys/fs/unionfs/union_vnops.c +++ b/sys/fs/unionfs/union_vnops.c @@ -1753,9 +1753,9 @@ unionfs_print(struct vop_print_args *ap) */ if (unp->un_uppervp != NULLVP) - vprint("unionfs: upper", unp->un_uppervp); + vn_printf(unp->un_uppervp, "unionfs: upper "); if (unp->un_lowervp != NULLVP) - vprint("unionfs: lower", unp->un_lowervp); + vn_printf(unp->un_lowervp, "unionfs: lower "); return (0); } diff --git a/sys/kern/vfs_default.c b/sys/kern/vfs_default.c index a7977bf9cf11..8679ad782a74 100644 --- a/sys/kern/vfs_default.c +++ b/sys/kern/vfs_default.c @@ -256,7 +256,7 @@ static int vop_nostrategy (struct vop_strategy_args *ap) { printf("No strategy for buffer at %p\n", ap->a_bp); - vprint("vnode", ap->a_vp); + vn_printf(ap->a_vp, "vnode "); ap->a_bp->b_ioflags |= BIO_ERROR; ap->a_bp->b_error = EOPNOTSUPP; bufdone(ap->a_bp); @@ -722,7 +722,7 @@ vop_stdfsync(ap) } BO_UNLOCK(bo); if (error == EAGAIN) - vprint("fsync: giving up on dirty", vp); + vn_printf(vp, "fsync: giving up on dirty "); return (error); } diff --git a/sys/kern/vfs_lookup.c b/sys/kern/vfs_lookup.c index f33dc8bd7f06..07244c99ea29 100644 --- a/sys/kern/vfs_lookup.c +++ b/sys/kern/vfs_lookup.c @@ -721,7 +721,7 @@ lookup(struct nameidata *ndp) if (needs_exclusive_leaf(dp->v_mount, cnp->cn_flags)) cnp->cn_lkflags = LK_EXCLUSIVE; #ifdef NAMEI_DIAGNOSTIC - vprint("lookup in", dp); + vn_printf(dp, "lookup in "); #endif lkflags_save = cnp->cn_lkflags; cnp->cn_lkflags = compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags, @@ -1007,7 +1007,7 @@ relookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp) * We now have a segment name to search for, and a directory to search. */ #ifdef NAMEI_DIAGNOSTIC - vprint("search in:", dp); + vn_printf(dp, "search in "); #endif if ((error = VOP_LOOKUP(dp, vpp, cnp)) != 0) { KASSERT(*vpp == NULL, ("leaf should be empty")); diff --git a/sys/kern/vfs_mount.c b/sys/kern/vfs_mount.c index 247714f07cb4..6e45a2c71b1e 100644 --- a/sys/kern/vfs_mount.c +++ b/sys/kern/vfs_mount.c @@ -510,7 +510,7 @@ vfs_mount_destroy(struct mount *mp) struct vnode *vp; TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes) - vprint("", vp); + vn_printf(vp, "dangling vnode "); panic("unmount: dangling vnode"); } KASSERT(TAILQ_EMPTY(&mp->mnt_uppers), ("mnt_uppers")); diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c index 14991f2d8a5a..b369b9760ace 100644 --- a/sys/kern/vfs_subr.c +++ b/sys/kern/vfs_subr.c @@ -2645,7 +2645,7 @@ vputx(struct vnode *vp, int func) error = 0; if (vp->v_usecount != 0) { - vprint("vputx: usecount not zero", vp); + vn_printf(vp, "vputx: usecount not zero for vnode "); panic("vputx: usecount not zero"); } @@ -3036,7 +3036,7 @@ vflush(struct mount *mp, int rootrefs, int flags, struct thread *td) busy++; #ifdef DIAGNOSTIC if (busyprt) - vprint("vflush: busy vnode", vp); + vn_printf(vp, "vflush: busy vnode "); #endif } VOP_UNLOCK(vp, 0); @@ -3409,7 +3409,7 @@ DB_SHOW_COMMAND(lockedvnods, lockedvnodes) TAILQ_FOREACH(mp, &mountlist, mnt_list) { TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes) { if (vp->v_type != VMARKER && VOP_ISLOCKED(vp)) - vprint("", vp); + vn_printf(vp, "vnode "); } } } diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h index eb0672eb3009..8c6bbfd17076 100644 --- a/sys/sys/vnode.h +++ b/sys/sys/vnode.h @@ -655,7 +655,6 @@ int vtruncbuf(struct vnode *vp, struct ucred *cred, off_t length, int blksize); void vunref(struct vnode *); void vn_printf(struct vnode *vp, const char *fmt, ...) __printflike(2,3); -#define vprint(label, vp) vn_printf((vp), "%s\n", (label)) int vrecycle(struct vnode *vp); int vn_bmap_seekhole(struct vnode *vp, u_long cmd, off_t *off, struct ucred *cred); diff --git a/sys/ufs/ffs/ffs_snapshot.c b/sys/ufs/ffs/ffs_snapshot.c index 59ae322686d8..5ef439f6a1a8 100644 --- a/sys/ufs/ffs/ffs_snapshot.c +++ b/sys/ufs/ffs/ffs_snapshot.c @@ -558,7 +558,7 @@ ffs_snapshot(mp, snapfile) } VI_UNLOCK(xvp); if (snapdebug) - vprint("ffs_snapshot: busy vnode", xvp); + vn_printf(xvp, "ffs_snapshot: busy vnode "); if (VOP_GETATTR(xvp, &vat, td->td_ucred) == 0 && vat.va_nlink > 0) { VOP_UNLOCK(xvp, 0); diff --git a/sys/ufs/ffs/ffs_vnops.c b/sys/ufs/ffs/ffs_vnops.c index 5a70e5c1da15..69182850ffe2 100644 --- a/sys/ufs/ffs/ffs_vnops.c +++ b/sys/ufs/ffs/ffs_vnops.c @@ -342,7 +342,7 @@ ffs_syncvnode(struct vnode *vp, int waitfor, int flags) goto loop; #ifdef INVARIANTS if (!vn_isdisk(vp, NULL)) - vprint("ffs_fsync: dirty", vp); + vn_printf(vp, "ffs_fsync: dirty "); #endif } BO_UNLOCK(bo); diff --git a/sys/ufs/ufs/ufs_lookup.c b/sys/ufs/ufs/ufs_lookup.c index 29d96a03ad64..dcff4fbe3d7a 100644 --- a/sys/ufs/ufs/ufs_lookup.c +++ b/sys/ufs/ufs/ufs_lookup.c @@ -1136,7 +1136,7 @@ ufs_direnter(dvp, tvp, dirp, cnp, newdirbp, isrename) error = UFS_TRUNCATE(dvp, (off_t)dp->i_endoff, IO_NORMAL | (DOINGASYNC(dvp) ? 0 : IO_SYNC), cr); if (error != 0) - vprint("ufs_direnter: failed to truncate", dvp); + vn_printf(dvp, "ufs_direnter: failed to truncate "); #ifdef UFS_DIRHASH if (error == 0 && dp->i_dirhash != NULL) ufsdirhash_dirtrunc(dp, dp->i_endoff); diff --git a/sys/ufs/ufs/ufs_quota.c b/sys/ufs/ufs/ufs_quota.c index 4fbb8a18032f..c236706f3346 100644 --- a/sys/ufs/ufs/ufs_quota.c +++ b/sys/ufs/ufs/ufs_quota.c @@ -469,7 +469,7 @@ chkdquot(struct inode *ip) continue; if (ip->i_dquot[i] == NODQUOT) { UFS_UNLOCK(ump); - vprint("chkdquot: missing dquot", ITOV(ip)); + vn_printf(ITOV(ip), "chkdquot: missing dquot "); panic("chkdquot: missing dquot"); } } diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c index 297b1171423a..17723c7308f0 100644 --- a/sys/vm/vm_object.c +++ b/sys/vm/vm_object.c @@ -471,7 +471,7 @@ vm_object_vndeallocate(vm_object_t object) KASSERT(vp != NULL, ("vm_object_vndeallocate: missing vp")); #ifdef INVARIANTS if (object->ref_count == 0) { - vprint("vm_object_vndeallocate", vp); + vn_printf(vp, "vm_object_vndeallocate "); panic("vm_object_vndeallocate: bad object reference count"); } #endif From be46a7c54dd2a7204106562dfde2574b64bc29a3 Mon Sep 17 00:00:00 2001 From: Michael Tuexen Date: Wed, 10 Aug 2016 17:19:33 +0000 Subject: [PATCH 17/76] Improve a consistency check to not detect valid cases for unordered user messages using DATA chunks as invalid ones. While there, ensure that error causes are provided when sending ABORT chunks in case of reassembly problems detected. Thanks to Taylor Brandstetter for making me aware of this problem. MFC after: 3 days --- sys/netinet/sctp_indata.c | 45 +++++++++++++++++++++++++++------------ 1 file changed, 31 insertions(+), 14 deletions(-) diff --git a/sys/netinet/sctp_indata.c b/sys/netinet/sctp_indata.c index e5ff349e7322..40112530f4fc 100644 --- a/sys/netinet/sctp_indata.c +++ b/sys/netinet/sctp_indata.c @@ -1747,21 +1747,27 @@ sctp_process_a_data_chunk(struct sctp_tcb *stcb, struct sctp_association *asoc, * If its a fragmented message, lets see if we can find the control * on the reassembly queues. */ - if ((chtype == SCTP_IDATA) && ((chunk_flags & SCTP_DATA_FIRST_FRAG) == 0) && (fsn == 0)) { + if ((chtype == SCTP_IDATA) && + ((chunk_flags & SCTP_DATA_FIRST_FRAG) == 0) && + (fsn == 0)) { /* * The first *must* be fsn 0, and other (middle/end) pieces - * can *not* be fsn 0. + * can *not* be fsn 0. XXX: This can happen in case of a + * wrap around. Ignore is for now. */ + snprintf(msg, sizeof(msg), "FSN zero for MID=%8.8x, but flags=%2.2x", + msg_id, chunk_flags); goto err_out; } + control = sctp_find_reasm_entry(strm, msg_id, ordered, old_data); + SCTPDBG(SCTP_DEBUG_XXX, "chunk_flags:0x%x look for control on queues %p\n", + chunk_flags, control); if ((chunk_flags & SCTP_DATA_NOT_FRAG) != SCTP_DATA_NOT_FRAG) { /* See if we can find the re-assembly entity */ - control = sctp_find_reasm_entry(strm, msg_id, ordered, old_data); - SCTPDBG(SCTP_DEBUG_XXX, "chunk_flags:0x%x look for control on queues %p\n", - chunk_flags, control); - if (control) { + if (control != NULL) { /* We found something, does it belong? */ if (ordered && (msg_id != control->sinfo_ssn)) { + snprintf(msg, sizeof(msg), "Reassembly problem (MID=%8.8x)", msg_id); err_out: op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg); stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_15; @@ -1774,6 +1780,8 @@ sctp_process_a_data_chunk(struct sctp_tcb *stcb, struct sctp_association *asoc, * We can't have a switched order with an * unordered chunk */ + snprintf(msg, sizeof(msg), "All fragments of a user message must be ordered or unordered (TSN=%8.8x)", + tsn); goto err_out; } if (!ordered && (((control->sinfo_flags >> 8) & SCTP_DATA_UNORDERED) == 0)) { @@ -1781,6 +1789,8 @@ sctp_process_a_data_chunk(struct sctp_tcb *stcb, struct sctp_association *asoc, * We can't have a switched unordered with a * ordered chunk */ + snprintf(msg, sizeof(msg), "All fragments of a user message must be ordered or unordered (TSN=%8.8x)", + tsn); goto err_out; } } @@ -1790,14 +1800,21 @@ sctp_process_a_data_chunk(struct sctp_tcb *stcb, struct sctp_association *asoc, * re-assembly going on with the same Stream/Seq (for * ordered) or in the same Stream for unordered. */ - SCTPDBG(SCTP_DEBUG_XXX, "chunk_flags:0x%x look for msg in case we have dup\n", - chunk_flags); - if (sctp_find_reasm_entry(strm, msg_id, ordered, old_data)) { - SCTPDBG(SCTP_DEBUG_XXX, "chunk_flags: 0x%x dup detected on msg_id: %u\n", - chunk_flags, - msg_id); - - goto err_out; + if (control != NULL) { + if (ordered || (old_data == 0)) { + SCTPDBG(SCTP_DEBUG_XXX, "chunk_flags: 0x%x dup detected on msg_id: %u\n", + chunk_flags, msg_id); + snprintf(msg, sizeof(msg), "Duplicate MID=%8.8x detected.", msg_id); + goto err_out; + } else { + if ((tsn == control->fsn_included + 1) && + (control->end_added == 0)) { + snprintf(msg, sizeof(msg), "Illegal message sequence, missing end for MID: %8.8x", control->fsn_included); + goto err_out; + } else { + control = NULL; + } + } } } /* now do the tests */ From e9a44873095d8022fbafb0820a89ae862dd28d75 Mon Sep 17 00:00:00 2001 From: Bryan Drewery Date: Wed, 10 Aug 2016 18:18:51 +0000 Subject: [PATCH 18/76] Trim out excessive / with -v when target directory ends with a trailing '/'. This is a minor nit after r289391 made all installations to a directory always end in a trailing '/'. MFC after: 3 days Sponsored by: EMC / Isilon Storage Division --- usr.bin/xinstall/xinstall.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/usr.bin/xinstall/xinstall.c b/usr.bin/xinstall/xinstall.c index f8b4568cd542..8cf7b9f1266f 100644 --- a/usr.bin/xinstall/xinstall.c +++ b/usr.bin/xinstall/xinstall.c @@ -749,8 +749,9 @@ install(const char *from_name, const char *to_name, u_long fset, u_int flags) } /* Build the target path. */ if (flags & DIRECTORY) { - (void)snprintf(pathbuf, sizeof(pathbuf), "%s/%s", + (void)snprintf(pathbuf, sizeof(pathbuf), "%s%s%s", to_name, + to_name[strlen(to_name) - 1] == '/' ? "" : "/", (p = strrchr(from_name, '/')) ? ++p : from_name); to_name = pathbuf; } From b81ddc83c9698ddc41acbdd7bafb151184a20bdc Mon Sep 17 00:00:00 2001 From: Bryan Drewery Date: Wed, 10 Aug 2016 18:19:02 +0000 Subject: [PATCH 19/76] Fix -S with -b not atomically updating the destination file. With both of these flags, the backup was created via rename(dest, backup) followed by rename(tmp, dest). This left the destination file missing for a moment which contradicts the point of -S. This fixes a race with installworld where PRECIOUSPROG and PRECIOUSLIB files (which use -S for installation) would briefly be missing. In the case of installing rtld with parallel installworld it could render an error due to not having rtld present to run install/cp in another process. Reported by: jhb Reviewed by: jhb MFC after: 1 week Sponsored by: EMC / Isilon Storage Division Differential Revision: https://reviews.freebsd.org/D7451 --- usr.bin/xinstall/xinstall.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/usr.bin/xinstall/xinstall.c b/usr.bin/xinstall/xinstall.c index 8cf7b9f1266f..61977d1d1291 100644 --- a/usr.bin/xinstall/xinstall.c +++ b/usr.bin/xinstall/xinstall.c @@ -892,11 +892,17 @@ install(const char *from_name, const char *to_name, u_long fset, u_int flags) } if (verbose) (void)printf("install: %s -> %s\n", to_name, backup); - if (rename(to_name, backup) < 0) { + if (unlink(backup) < 0 && errno != ENOENT) { serrno = errno; unlink(tempfile); errno = serrno; - err(EX_OSERR, "rename: %s to %s", to_name, + err(EX_OSERR, "unlink: %s", backup); + } + if (link(to_name, backup) < 0) { + serrno = errno; + unlink(tempfile); + errno = serrno; + err(EX_OSERR, "link: %s to %s", to_name, backup); } } From 49cfff9131605baeb0dc8c1bf7624553b58e3474 Mon Sep 17 00:00:00 2001 From: Bryan Drewery Date: Wed, 10 Aug 2016 18:19:05 +0000 Subject: [PATCH 20/76] Support -v for -l. MFC after: 1 week Sponsored by: EMC / Isilon Storage Division --- usr.bin/xinstall/xinstall.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/usr.bin/xinstall/xinstall.c b/usr.bin/xinstall/xinstall.c index 61977d1d1291..0fc394d8d21e 100644 --- a/usr.bin/xinstall/xinstall.c +++ b/usr.bin/xinstall/xinstall.c @@ -533,6 +533,9 @@ do_link(const char *from_name, const char *to_name, if (target_sb->st_flags & NOCHANGEBITS) (void)chflags(to_name, target_sb->st_flags & ~NOCHANGEBITS); + if (verbose) + printf("install: link %s -> %s\n", + from_name, to_name); unlink(to_name); ret = rename(tmpl, to_name); /* @@ -543,8 +546,12 @@ do_link(const char *from_name, const char *to_name, (void)unlink(tmpl); } return (ret); - } else + } else { + if (verbose) + printf("install: link %s -> %s\n", + from_name, to_name); return (link(from_name, to_name)); + } } /* @@ -575,12 +582,18 @@ do_symlink(const char *from_name, const char *to_name, ~NOCHANGEBITS); unlink(to_name); + if (verbose) + printf("install: symlink %s -> %s\n", + from_name, to_name); if (rename(tmpl, to_name) == -1) { /* Remove temporary link before exiting. */ (void)unlink(tmpl); err(EX_OSERR, "%s: rename", to_name); } } else { + if (verbose) + printf("install: symlink %s -> %s\n", + from_name, to_name); if (symlink(from_name, to_name) == -1) err(EX_OSERR, "symlink %s -> %s", from_name, to_name); } From 89ad63ad2d77437b9ee4b21cb80c5f0552388adf Mon Sep 17 00:00:00 2001 From: Bryan Drewery Date: Wed, 10 Aug 2016 18:19:08 +0000 Subject: [PATCH 21/76] Fix -S with -l not being atomic. It was unlinking the target even though it uses rename(2) which already effectively does that. -S is intended to not unlink(2) the target first. MFC after: 1 week Reviewed by: jhb Sponsored by: EMC / Isilon Storage Division Differential Revision: https://reviews.freebsd.org/D7452 --- usr.bin/xinstall/xinstall.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/usr.bin/xinstall/xinstall.c b/usr.bin/xinstall/xinstall.c index 0fc394d8d21e..dec555848a6c 100644 --- a/usr.bin/xinstall/xinstall.c +++ b/usr.bin/xinstall/xinstall.c @@ -536,7 +536,6 @@ do_link(const char *from_name, const char *to_name, if (verbose) printf("install: link %s -> %s\n", from_name, to_name); - unlink(to_name); ret = rename(tmpl, to_name); /* * If rename has posix semantics, then the temporary @@ -580,8 +579,6 @@ do_symlink(const char *from_name, const char *to_name, if (target_sb->st_flags & NOCHANGEBITS) (void)chflags(to_name, target_sb->st_flags & ~NOCHANGEBITS); - unlink(to_name); - if (verbose) printf("install: symlink %s -> %s\n", from_name, to_name); From 4a4768d948c03380d01f225d24bda4e77ec4f4f6 Mon Sep 17 00:00:00 2001 From: Bryan Drewery Date: Wed, 10 Aug 2016 18:19:11 +0000 Subject: [PATCH 22/76] Fix -b failure not restoring flags on the destination file. MFC after: 1 week Sponsored by: EMC / Isilon Storage Division --- usr.bin/xinstall/xinstall.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/usr.bin/xinstall/xinstall.c b/usr.bin/xinstall/xinstall.c index dec555848a6c..49e986b1b36f 100644 --- a/usr.bin/xinstall/xinstall.c +++ b/usr.bin/xinstall/xinstall.c @@ -904,6 +904,8 @@ install(const char *from_name, const char *to_name, u_long fset, u_int flags) (void)printf("install: %s -> %s\n", to_name, backup); if (unlink(backup) < 0 && errno != ENOENT) { serrno = errno; + if (to_sb.st_flags & NOCHANGEBITS) + (void)chflags(to_name, to_sb.st_flags); unlink(tempfile); errno = serrno; err(EX_OSERR, "unlink: %s", backup); @@ -911,6 +913,8 @@ install(const char *from_name, const char *to_name, u_long fset, u_int flags) if (link(to_name, backup) < 0) { serrno = errno; unlink(tempfile); + if (to_sb.st_flags & NOCHANGEBITS) + (void)chflags(to_name, to_sb.st_flags); errno = serrno; err(EX_OSERR, "link: %s to %s", to_name, backup); @@ -1133,16 +1137,26 @@ create_newfile(const char *path, int target, struct stat *sbp) if (dobackup) { if ((size_t)snprintf(backup, MAXPATHLEN, "%s%s", - path, suffix) != strlen(path) + strlen(suffix)) + path, suffix) != strlen(path) + strlen(suffix)) { + saved_errno = errno; + if (sbp->st_flags & NOCHANGEBITS) + (void)chflags(path, sbp->st_flags); + errno = saved_errno; errx(EX_OSERR, "%s: backup filename too long", path); + } (void)snprintf(backup, MAXPATHLEN, "%s%s", path, suffix); if (verbose) (void)printf("install: %s -> %s\n", path, backup); - if (rename(path, backup) < 0) + if (rename(path, backup) < 0) { + saved_errno = errno; + if (sbp->st_flags & NOCHANGEBITS) + (void)chflags(path, sbp->st_flags); + errno = saved_errno; err(EX_OSERR, "rename: %s to %s", path, backup); + } } else if (unlink(path) < 0) saved_errno = errno; From c8a8570cdb688eac58d13537581b0d40c007f0cd Mon Sep 17 00:00:00 2001 From: Bryan Drewery Date: Wed, 10 Aug 2016 18:19:14 +0000 Subject: [PATCH 23/76] Squelch a false-positive Clang static analyzer warning. MFC after: 1 week Sponsored by: EMC / Isilon Storage Division --- usr.bin/xinstall/xinstall.c | 1 + 1 file changed, 1 insertion(+) diff --git a/usr.bin/xinstall/xinstall.c b/usr.bin/xinstall/xinstall.c index 49e986b1b36f..44ab194f9fbb 100644 --- a/usr.bin/xinstall/xinstall.c +++ b/usr.bin/xinstall/xinstall.c @@ -149,6 +149,7 @@ main(int argc, char *argv[]) char *p; const char *to_name; + fset = 0; iflags = 0; group = owner = NULL; while ((ch = getopt(argc, argv, "B:bCcD:df:g:h:l:M:m:N:o:pSsT:Uv")) != From c870956d72f2947ba6a68e4aeca4f18dd240c5c9 Mon Sep 17 00:00:00 2001 From: Bryan Drewery Date: Wed, 10 Aug 2016 18:19:17 +0000 Subject: [PATCH 24/76] Support rmdir(2). MFC after: 3 days Sponsored by: EMC / Isilon Storage Division --- usr.bin/truss/syscalls.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/usr.bin/truss/syscalls.c b/usr.bin/truss/syscalls.c index 3bf7b9d69ede..d8e21ffe9f57 100644 --- a/usr.bin/truss/syscalls.c +++ b/usr.bin/truss/syscalls.c @@ -279,6 +279,8 @@ static struct syscall decoded_syscalls[] = { .args = { { Name, 0 }, { Name, 1 } } }, { .name = "renameat", .ret_type = 1, .nargs = 4, .args = { { Atfd, 0 }, { Name, 1 }, { Atfd, 2 }, { Name, 3 } } }, + { .name = "rmdir", .ret_type = 1, .nargs = 2, + .args = { { Name, 0 }, { Octal, 1 } } }, { .name = "rfork", .ret_type = 1, .nargs = 1, .args = { { Rforkflags, 0 } } }, { .name = "select", .ret_type = 1, .nargs = 5, From 14009f5e90d7f8abde1ddfbf3267884e4a9a37ea Mon Sep 17 00:00:00 2001 From: Li-Wen Hsu Date: Wed, 10 Aug 2016 18:22:42 +0000 Subject: [PATCH 25/76] Only remove empty directories before packaging. This preserves files are intentionally empty, most of them are in tests.txz Reviewed by: bdrewery MFC after: 3 days --- Makefile.inc1 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile.inc1 b/Makefile.inc1 index a27d562df736..7e9a3526b932 100644 --- a/Makefile.inc1 +++ b/Makefile.inc1 @@ -1013,7 +1013,7 @@ distributeworld installworld stageworld: _installcheck_world .PHONY ${IMAKEENV} rm -rf ${INSTALLTMP} .if make(distributeworld) .for dist in ${EXTRA_DISTRIBUTIONS} - find ${DESTDIR}/${DISTDIR}/${dist} -mindepth 1 -empty -delete + find ${DESTDIR}/${DISTDIR}/${dist} -mindepth 1 -type d -empty -delete .endfor .if defined(NO_ROOT) .for dist in base ${EXTRA_DISTRIBUTIONS} From bf38f89f99de8c6becfd040e511373a0835766d0 Mon Sep 17 00:00:00 2001 From: "Stephen J. Kiernan" Date: Wed, 10 Aug 2016 18:23:23 +0000 Subject: [PATCH 26/76] Add kernel environment variables under smbios.system for the following SMBIOS Type 1 fields: smbios.system.sku - SKU Number (SMBIOS 2.4 and above) smbios.system.family - Family (SMBIOS 2.4 and above) Add kernel environment variables under smbios.planar for the following SMBIOS Type 2 fields: smbios.planar.tag - Asset Tag smbios.planar.location - Location in Chassis Reviewed by: jhb, grembo Approved by: sjg (mentor) MFC after: 2 weeks Sponsored by: Juniper Networks, Inc. Differential Revision: https://reviews.freebsd.org/D7453 --- sys/boot/i386/libi386/smbios.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sys/boot/i386/libi386/smbios.c b/sys/boot/i386/libi386/smbios.c index 7a7ce4ba4f56..4a8f3bc4ef08 100644 --- a/sys/boot/i386/libi386/smbios.c +++ b/sys/boot/i386/libi386/smbios.c @@ -238,6 +238,10 @@ smbios_parse_table(const caddr_t addr) smbios_setenv("smbios.system.serial", addr, 0x07); smbios_setuuid("smbios.system.uuid", addr + 0x08, smbios.ver); #endif + if (smbios.major >= 2 && smbios.minor >= 4) { + smbios_setenv("smbios.system.sku", addr, 0x19); + smbios_setenv("smbios.system.family", addr, 0x1a); + } break; case 2: /* 3.3.3 Base Board (or Module) Information (Type 2) */ @@ -246,7 +250,9 @@ smbios_parse_table(const caddr_t addr) smbios_setenv("smbios.planar.version", addr, 0x06); #ifdef SMBIOS_SERIAL_NUMBERS smbios_setenv("smbios.planar.serial", addr, 0x07); + smbios_setenv("smbios.planar.tag", addr, 0x08); #endif + smbios_setenv("smbios.planar.location", addr, 0x0a); break; case 3: /* 3.3.4 System Enclosure or Chassis (Type 3) */ From 6178f8e8a6ab27d49d4d0c5cbd05be4ca2d140f7 Mon Sep 17 00:00:00 2001 From: Bryan Drewery Date: Wed, 10 Aug 2016 18:45:26 +0000 Subject: [PATCH 27/76] Use proper argument length for rmdir(2) for r303934. Reported by: kib X-MFC-With: r303934 MFC after: 3 days Sponsored by: EMC / Isilon Storage Division --- usr.bin/truss/syscalls.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/usr.bin/truss/syscalls.c b/usr.bin/truss/syscalls.c index d8e21ffe9f57..7969c592cfcb 100644 --- a/usr.bin/truss/syscalls.c +++ b/usr.bin/truss/syscalls.c @@ -279,8 +279,8 @@ static struct syscall decoded_syscalls[] = { .args = { { Name, 0 }, { Name, 1 } } }, { .name = "renameat", .ret_type = 1, .nargs = 4, .args = { { Atfd, 0 }, { Name, 1 }, { Atfd, 2 }, { Name, 3 } } }, - { .name = "rmdir", .ret_type = 1, .nargs = 2, - .args = { { Name, 0 }, { Octal, 1 } } }, + { .name = "rmdir", .ret_type = 1, .nargs = 1, + .args = { { Name, 0 } } }, { .name = "rfork", .ret_type = 1, .nargs = 1, .args = { { Rforkflags, 0 } } }, { .name = "select", .ret_type = 1, .nargs = 5, From 13b4b4df987d8f05c2cd5813cfe05db953c37c9e Mon Sep 17 00:00:00 2001 From: Ed Schouten Date: Wed, 10 Aug 2016 21:02:41 +0000 Subject: [PATCH 28/76] Provide the CloudABI vDSO to its executables. CloudABI executables already provide support for passing in vDSOs. This functionality is used by the emulator for OS X to inject system call handlers. On FreeBSD, we could use it to optimize calls to gettimeofday(), etc. Though I don't have any plans to optimize any system calls right now, let's go ahead and already pass in a vDSO. This will allow us to simplify the executables, as the traditional "syscall" shims can be removed entirely. It also means that we gain more flexibility with regards to adding and removing system calls. Reviewed by: kib Differential Revision: https://reviews.freebsd.org/D7438 --- sys/amd64/cloudabi64/cloudabi64_sysvec.c | 1 - sys/arm64/cloudabi64/cloudabi64_sysvec.c | 1 - sys/compat/cloudabi/cloudabi_util.h | 5 ++ sys/compat/cloudabi/cloudabi_vdso.c | 88 +++++++++++++++++++++ sys/compat/cloudabi64/cloudabi64_module.c | 11 +++ sys/compat/cloudabi64/cloudabi64_vdso.lds.s | 51 ++++++++++++ sys/conf/files | 1 + sys/conf/files.amd64 | 12 +++ sys/conf/files.arm64 | 12 +++ sys/modules/cloudabi/Makefile | 2 +- sys/modules/cloudabi64/Makefile | 32 +++++++- 11 files changed, 211 insertions(+), 5 deletions(-) create mode 100644 sys/compat/cloudabi/cloudabi_vdso.c create mode 100644 sys/compat/cloudabi64/cloudabi64_vdso.lds.s diff --git a/sys/amd64/cloudabi64/cloudabi64_sysvec.c b/sys/amd64/cloudabi64/cloudabi64_sysvec.c index 08d85a2d5cb7..cefdef983238 100644 --- a/sys/amd64/cloudabi64/cloudabi64_sysvec.c +++ b/sys/amd64/cloudabi64/cloudabi64_sysvec.c @@ -196,7 +196,6 @@ static struct sysentvec cloudabi64_elf_sysvec = { .sv_pagesize = PAGE_SIZE, .sv_minuser = VM_MIN_ADDRESS, .sv_maxuser = VM_MAXUSER_ADDRESS, - .sv_usrstack = USRSTACK, .sv_stackprot = VM_PROT_READ | VM_PROT_WRITE, .sv_copyout_strings = cloudabi64_copyout_strings, .sv_setregs = cloudabi64_proc_setregs, diff --git a/sys/arm64/cloudabi64/cloudabi64_sysvec.c b/sys/arm64/cloudabi64/cloudabi64_sysvec.c index cf3e594244bb..ebcb54e1a867 100644 --- a/sys/arm64/cloudabi64/cloudabi64_sysvec.c +++ b/sys/arm64/cloudabi64/cloudabi64_sysvec.c @@ -165,7 +165,6 @@ static struct sysentvec cloudabi64_elf_sysvec = { .sv_pagesize = PAGE_SIZE, .sv_minuser = VM_MIN_ADDRESS, .sv_maxuser = VM_MAXUSER_ADDRESS, - .sv_usrstack = USRSTACK, .sv_stackprot = VM_PROT_READ | VM_PROT_WRITE, .sv_copyout_strings = cloudabi64_copyout_strings, .sv_setregs = cloudabi64_proc_setregs, diff --git a/sys/compat/cloudabi/cloudabi_util.h b/sys/compat/cloudabi/cloudabi_util.h index c0a02aa88dcb..6eb65aa87b32 100644 --- a/sys/compat/cloudabi/cloudabi_util.h +++ b/sys/compat/cloudabi/cloudabi_util.h @@ -33,6 +33,7 @@ #include struct file; +struct sysentvec; struct thread; struct timespec; @@ -76,4 +77,8 @@ int cloudabi_futex_lock_wrlock(struct thread *, cloudabi_lock_t *, cloudabi_scope_t, cloudabi_clockid_t, cloudabi_timestamp_t, cloudabi_timestamp_t); +/* vDSO setup and teardown. */ +void cloudabi_vdso_init(struct sysentvec *, char *, char *); +void cloudabi_vdso_destroy(struct sysentvec *); + #endif diff --git a/sys/compat/cloudabi/cloudabi_vdso.c b/sys/compat/cloudabi/cloudabi_vdso.c new file mode 100644 index 000000000000..27c3d365af83 --- /dev/null +++ b/sys/compat/cloudabi/cloudabi_vdso.c @@ -0,0 +1,88 @@ +/*- + * Copyright (c) 2016 Nuxi, https://nuxi.nl/ + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include + +void +cloudabi_vdso_init(struct sysentvec *sv, char *begin, char *end) +{ + vm_page_t m; + vm_object_t obj; + vm_offset_t addr; + size_t i, pages, pages_length, vdso_length; + + /* Determine the number of pages needed to store the vDSO. */ + vdso_length = end - begin; + pages = howmany(vdso_length, PAGE_SIZE); + pages_length = pages * PAGE_SIZE; + + /* Allocate a VM object and fill it with the vDSO. */ + obj = vm_pager_allocate(OBJT_PHYS, 0, pages_length, + VM_PROT_DEFAULT, 0, NULL); + addr = kva_alloc(PAGE_SIZE); + for (i = 0; i < pages; ++i) { + VM_OBJECT_WLOCK(obj); + m = vm_page_grab(obj, i, VM_ALLOC_NOBUSY | VM_ALLOC_ZERO); + m->valid = VM_PAGE_BITS_ALL; + VM_OBJECT_WUNLOCK(obj); + + pmap_qenter(addr, &m, 1); + memcpy((void *)addr, begin + i * PAGE_SIZE, + MIN(vdso_length - i * PAGE_SIZE, PAGE_SIZE)); + pmap_qremove(addr, 1); + } + kva_free(addr, PAGE_SIZE); + + /* + * Place the vDSO at the top of the address space. The user + * stack can start right below it. + */ + sv->sv_shared_page_base = sv->sv_maxuser - pages_length; + sv->sv_shared_page_len = pages_length; + sv->sv_shared_page_obj = obj; + sv->sv_usrstack = sv->sv_shared_page_base; +} + +void +cloudabi_vdso_destroy(struct sysentvec *sv) +{ + + vm_object_deallocate(sv->sv_shared_page_obj); +} diff --git a/sys/compat/cloudabi64/cloudabi64_module.c b/sys/compat/cloudabi64/cloudabi64_module.c index 246a887105cb..da1ea1149a4b 100644 --- a/sys/compat/cloudabi64/cloudabi64_module.c +++ b/sys/compat/cloudabi64/cloudabi64_module.c @@ -38,8 +38,13 @@ __FBSDID("$FreeBSD$"); #include +#include + #include +extern char _binary_cloudabi64_vdso_o_start[]; +extern char _binary_cloudabi64_vdso_o_end[]; + register_t * cloudabi64_copyout_strings(struct image_params *imgp) { @@ -107,6 +112,8 @@ cloudabi64_fixup(register_t **stack_base, struct image_params *imgp) PTR(CLOUDABI_AT_PHDR, args->phdr), VAL(CLOUDABI_AT_PHNUM, args->phnum), VAL(CLOUDABI_AT_TID, td->td_tid), + PTR(CLOUDABI_AT_SYSINFO_EHDR, + imgp->proc->p_sysent->sv_shared_page_base), #undef VAL #undef PTR { .a_type = CLOUDABI_AT_NULL }, @@ -127,6 +134,9 @@ cloudabi64_modevent(module_t mod, int type, void *data) switch (type) { case MOD_LOAD: + cloudabi_vdso_init(cloudabi64_brand.sysvec, + _binary_cloudabi64_vdso_o_start, + _binary_cloudabi64_vdso_o_end); if (elf64_insert_brand_entry(&cloudabi64_brand) < 0) { printf("Failed to add CloudABI ELF brand handler\n"); return (EINVAL); @@ -139,6 +149,7 @@ cloudabi64_modevent(module_t mod, int type, void *data) printf("Failed to remove CloudABI ELF brand handler\n"); return (EINVAL); } + cloudabi_vdso_destroy(cloudabi64_brand.sysvec); return (0); default: return (EOPNOTSUPP); diff --git a/sys/compat/cloudabi64/cloudabi64_vdso.lds.s b/sys/compat/cloudabi64/cloudabi64_vdso.lds.s new file mode 100644 index 000000000000..29c94d3cdde8 --- /dev/null +++ b/sys/compat/cloudabi64/cloudabi64_vdso.lds.s @@ -0,0 +1,51 @@ +/* + * Linker script for 64-bit vDSO for CloudABI. + * Based on sys/amd64/linux/linux_vdso.lds.s + * + * $FreeBSD$ + */ + +SECTIONS +{ + . = . + SIZEOF_HEADERS; + + .hash : { *(.hash) } :text + .gnu.hash : { *(.gnu.hash) } + .dynsym : { *(.dynsym) } + .dynstr : { *(.dynstr) } + .gnu.version : { *(.gnu.version) } + .gnu.version_d : { *(.gnu.version_d) } + .gnu.version_r : { *(.gnu.version_r) } + + .note : { *(.note.*) } :text :note + + .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr + .eh_frame : { KEEP (*(.eh_frame)) } :text + + .dynamic : { *(.dynamic) } :text :dynamic + + .rodata : { *(.rodata*) } :text + .data : { + *(.data*) + *(.sdata*) + *(.got.plt) *(.got) + *(.gnu.linkonce.d.*) + *(.bss*) + *(.dynbss*) + *(.gnu.linkonce.b.*) + } + + .altinstructions : { *(.altinstructions) } + .altinstr_replacement : { *(.altinstr_replacement) } + + . = ALIGN(0x100); + .text : { *(.test .text*) } :text =0x90909090 +} + +PHDRS +{ + text PT_LOAD FLAGS(5) FILEHDR PHDRS; /* PF_R|PF_X */ + dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ + note PT_NOTE FLAGS(4); /* PF_R */ + eh_frame_hdr PT_GNU_EH_FRAME; +} diff --git a/sys/conf/files b/sys/conf/files index 4f5fa71ee9b8..993f0c3892ab 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -284,6 +284,7 @@ compat/cloudabi/cloudabi_proc.c optional compat_cloudabi64 compat/cloudabi/cloudabi_random.c optional compat_cloudabi64 compat/cloudabi/cloudabi_sock.c optional compat_cloudabi64 compat/cloudabi/cloudabi_thread.c optional compat_cloudabi64 +compat/cloudabi/cloudabi_vdso.c optional compat_cloudabi64 compat/cloudabi64/cloudabi64_fd.c optional compat_cloudabi64 compat/cloudabi64/cloudabi64_module.c optional compat_cloudabi64 compat/cloudabi64/cloudabi64_poll.c optional compat_cloudabi64 diff --git a/sys/conf/files.amd64 b/sys/conf/files.amd64 index 331b811081f7..df5d1bcfb88e 100644 --- a/sys/conf/files.amd64 +++ b/sys/conf/files.amd64 @@ -8,6 +8,18 @@ # dependency lines other than the first are silently ignored. # # +cloudabi64_vdso.o optional compat_cloudabi64 \ + dependency "$S/contrib/cloudabi/cloudabi_vdso_x86_64.c" \ + compile-with "${CC} -shared -nostdinc -nostdlib -Wl,-T$S/compat/cloudabi64/cloudabi64_vdso.lds.s -D_KERNEL -I. -I$S -I$S/contrib/cloudabi -O2 -fomit-frame-pointer $S/contrib/cloudabi/cloudabi_vdso_x86_64.c -o ${.TARGET}" \ + no-obj no-implicit-rule \ + clean "cloudabi64_vdso.o" +# +cloudabi64_vdso_blob.o optional compat_cloudabi64 \ + dependency "cloudabi64_vdso.o" \ + compile-with "${OBJCOPY} --input-target binary --output-target elf64-x86-64-freebsd --binary-architecture i386 cloudabi64_vdso.o ${.TARGET}" \ + no-implicit-rule \ + clean "cloudabi64_vdso_blob.o" +# linux32_genassym.o optional compat_linux32 \ dependency "$S/amd64/linux32/linux32_genassym.c" \ compile-with "${CC} ${CFLAGS:N-fno-common} -c ${.IMPSRC}" \ diff --git a/sys/conf/files.arm64 b/sys/conf/files.arm64 index b4c82085ace0..31acaf9ce32f 100644 --- a/sys/conf/files.arm64 +++ b/sys/conf/files.arm64 @@ -1,4 +1,16 @@ # $FreeBSD$ +cloudabi64_vdso.o optional compat_cloudabi64 \ + dependency "$S/contrib/cloudabi/cloudabi_vdso_aarch64.c" \ + compile-with "${CC} -shared -nostdinc -nostdlib -Wl,-T$S/compat/cloudabi64/cloudabi64_vdso.lds.s -D_KERNEL -I. -I$S -I$S/contrib/cloudabi -O2 -fomit-frame-pointer $S/contrib/cloudabi/cloudabi_vdso_aarch64.c -o ${.TARGET}" \ + no-obj no-implicit-rule \ + clean "cloudabi64_vdso.o" +# +cloudabi64_vdso_blob.o optional compat_cloudabi64 \ + dependency "cloudabi64_vdso.o" \ + compile-with "${OBJCOPY} --input-target binary --output-target elf64-littleaarch64 --binary-architecture aarch64 cloudabi64_vdso.o ${.TARGET}" \ + no-implicit-rule \ + clean "cloudabi64_vdso_blob.o" +# arm/arm/generic_timer.c standard arm/arm/gic.c standard arm/arm/gic_fdt.c optional fdt diff --git a/sys/modules/cloudabi/Makefile b/sys/modules/cloudabi/Makefile index bba50362eab1..c608580c7f30 100644 --- a/sys/modules/cloudabi/Makefile +++ b/sys/modules/cloudabi/Makefile @@ -5,6 +5,6 @@ KMOD= cloudabi SRCS= cloudabi_clock.c cloudabi_errno.c cloudabi_fd.c cloudabi_file.c \ cloudabi_futex.c cloudabi_mem.c cloudabi_proc.c cloudabi_random.c \ - cloudabi_sock.c cloudabi_thread.c vnode_if.h + cloudabi_sock.c cloudabi_thread.c cloudabi_vdso.c vnode_if.h .include diff --git a/sys/modules/cloudabi64/Makefile b/sys/modules/cloudabi64/Makefile index 3aee96d97525..b6fe38057ba8 100644 --- a/sys/modules/cloudabi64/Makefile +++ b/sys/modules/cloudabi64/Makefile @@ -1,11 +1,39 @@ # $FreeBSD$ -.PATH: ${.CURDIR}/../../compat/cloudabi64 -.PATH: ${.CURDIR}/../../${MACHINE}/cloudabi64 +SYSDIR?=${.CURDIR}/../.. + +.PATH: ${SYSDIR}/compat/cloudabi64 +.PATH: ${SYSDIR}/${MACHINE}/cloudabi64 KMOD= cloudabi64 SRCS= cloudabi64_fd.c cloudabi64_module.c cloudabi64_poll.c \ cloudabi64_sock.c cloudabi64_syscalls.c cloudabi64_sysent.c \ cloudabi64_sysvec.c cloudabi64_thread.c +OBJS= cloudabi64_vdso_blob.o +CLEANFILES=cloudabi64_vdso.o + +.if ${MACHINE_CPUARCH} == "aarch64" +VDSO_SRCS=${SYSDIR}/contrib/cloudabi/cloudabi_vdso_aarch64.c +OUTPUT_TARGET=elf64-littleaarch64 +BINARY_ARCHITECTURE=aarch64 +.elif ${MACHINE_CPUARCH} == "amd64" +VDSO_SRCS=${SYSDIR}/contrib/cloudabi/cloudabi_vdso_x86_64.c +OUTPUT_TARGET=elf64-x86-64-freebsd +BINARY_ARCHITECTURE=i386 +.endif + +cloudabi64_vdso.o: ${VDSO_SRCS} + ${CC} -shared -nostdinc -nostdlib \ + -Wl,-T${SYSDIR}/compat/cloudabi64/cloudabi64_vdso.lds.s \ + -D_KERNEL -I. -I${SYSDIR} -I${SYSDIR}/contrib/cloudabi \ + -O2 -fomit-frame-pointer \ + ${VDSO_SRCS} -o ${.TARGET} + +cloudabi64_vdso_blob.o: cloudabi64_vdso.o + ${OBJCOPY} --input-target binary \ + --output-target ${OUTPUT_TARGET} \ + --binary-architecture ${BINARY_ARCHITECTURE} \ + cloudabi64_vdso.o ${.TARGET} + .include From cca89ee385459d978856d451dfab52ddc385fae6 Mon Sep 17 00:00:00 2001 From: Bryan Drewery Date: Wed, 10 Aug 2016 21:59:59 +0000 Subject: [PATCH 29/76] Fix sorting in r303934. Reported by: jhb X-MFC-With: r303934 MFC after: 3 days Sponsored by: EMC / Isilon Storage Division --- usr.bin/truss/syscalls.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/usr.bin/truss/syscalls.c b/usr.bin/truss/syscalls.c index 7969c592cfcb..7046b56049c9 100644 --- a/usr.bin/truss/syscalls.c +++ b/usr.bin/truss/syscalls.c @@ -279,10 +279,10 @@ static struct syscall decoded_syscalls[] = { .args = { { Name, 0 }, { Name, 1 } } }, { .name = "renameat", .ret_type = 1, .nargs = 4, .args = { { Atfd, 0 }, { Name, 1 }, { Atfd, 2 }, { Name, 3 } } }, - { .name = "rmdir", .ret_type = 1, .nargs = 1, - .args = { { Name, 0 } } }, { .name = "rfork", .ret_type = 1, .nargs = 1, .args = { { Rforkflags, 0 } } }, + { .name = "rmdir", .ret_type = 1, .nargs = 1, + .args = { { Name, 0 } } }, { .name = "select", .ret_type = 1, .nargs = 5, .args = { { Int, 0 }, { Fd_set, 1 }, { Fd_set, 2 }, { Fd_set, 3 }, { Timeval, 4 } } }, From d23b7f655cd636fd20ea16ea523944735b291b0a Mon Sep 17 00:00:00 2001 From: Devin Teske Date: Wed, 10 Aug 2016 23:24:21 +0000 Subject: [PATCH 30/76] Allow enforce_statfs (see jail(8)) to be set per jail Reviewed by: jelischer MFC after: 3 days --- etc/rc.d/jail | 1 + 1 file changed, 1 insertion(+) diff --git a/etc/rc.d/jail b/etc/rc.d/jail index 51ecf77eb9cc..216c80e7fca3 100755 --- a/etc/rc.d/jail +++ b/etc/rc.d/jail @@ -260,6 +260,7 @@ parse_options() extract_var $_jv set_hostname_allow allow.set_hostname YN NO extract_var $_jv sysvipc_allow allow.sysvipc YN NO + extract_var $_jv enforce_statfs enforce_statfs - 2 extract_var $_jv osreldate osreldate extract_var $_jv osrelease osrelease for _p in $_parameters; do From 0ea702cd466d31b9c4392b850b5564a80c21b407 Mon Sep 17 00:00:00 2001 From: Sepherosa Ziehau Date: Thu, 11 Aug 2016 03:12:56 +0000 Subject: [PATCH 31/76] cam/da: Add quirk for I-O Data USB Flash Disk PR: 211716 Submitted by: Jun Su Reported by: Jun Su MFC after: 1 week Sponsored by: Microsoft --- sys/cam/scsi/scsi_da.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sys/cam/scsi/scsi_da.c b/sys/cam/scsi/scsi_da.c index b869467d5288..8bf3ed0241a1 100644 --- a/sys/cam/scsi/scsi_da.c +++ b/sys/cam/scsi/scsi_da.c @@ -814,6 +814,14 @@ static struct da_quirk_entry da_quirk_table[] = {T_DIRECT, SIP_MEDIA_REMOVABLE, "JetFlash", "Transcend*", "*"}, /*quirks*/ DA_Q_NO_RC16 }, + { + /* + * I-O Data USB Flash Disk + * PR: usb/211716 + */ + {T_DIRECT, SIP_MEDIA_REMOVABLE, "I-O DATA", "USB Flash Disk*", + "*"}, /*quirks*/ DA_Q_NO_RC16 + }, /* ATA/SATA devices over SAS/USB/... */ { /* Hitachi Advanced Format (4k) drives */ From 99fc691eecd38caeb621a4280f0a18f167d7b644 Mon Sep 17 00:00:00 2001 From: Sepherosa Ziehau Date: Thu, 11 Aug 2016 03:20:38 +0000 Subject: [PATCH 32/76] hyperv/vmbus: Add macro to get channel packet data length. MFC after: 1 week Sponsored by: Microsoft Differential Revision: https://reviews.freebsd.org/D7455 --- sys/dev/hyperv/include/vmbus.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sys/dev/hyperv/include/vmbus.h b/sys/dev/hyperv/include/vmbus.h index 8fde5885fdff..0a16e6e8957d 100644 --- a/sys/dev/hyperv/include/vmbus.h +++ b/sys/dev/hyperv/include/vmbus.h @@ -89,6 +89,11 @@ struct vmbus_chanpkt_hdr { (const void *)((const uint8_t *)(pkt) + \ VMBUS_CHANPKT_GETLEN((pkt)->cph_hlen)) +/* Include padding */ +#define VMBUS_CHANPKT_DATALEN(pkt) \ + (VMBUS_CHANPKT_GETLEN((pkt)->cph_tlen) -\ + VMBUS_CHANPKT_GETLEN((pkt)->cph_hlen)) + struct vmbus_rxbuf_desc { uint32_t rb_len; uint32_t rb_ofs; From c357102c5b8f95736c596ebaae3d39a821b26595 Mon Sep 17 00:00:00 2001 From: John Baldwin Date: Thu, 11 Aug 2016 05:18:09 +0000 Subject: [PATCH 33/76] Remove files unused after pulling system call names from libsysdecode. --- usr.bin/kdump/linux32_syscalls.conf | 11 ----------- usr.bin/kdump/linux_syscalls.conf | 11 ----------- usr.bin/truss/makesyscallsconf.sh | 21 --------------------- 3 files changed, 43 deletions(-) delete mode 100644 usr.bin/kdump/linux32_syscalls.conf delete mode 100644 usr.bin/kdump/linux_syscalls.conf delete mode 100755 usr.bin/truss/makesyscallsconf.sh diff --git a/usr.bin/kdump/linux32_syscalls.conf b/usr.bin/kdump/linux32_syscalls.conf deleted file mode 100644 index 66a67fd8e2f0..000000000000 --- a/usr.bin/kdump/linux32_syscalls.conf +++ /dev/null @@ -1,11 +0,0 @@ -# $FreeBSD$ -sysnames="linux32_syscalls.c" -sysproto="/dev/null" -sysproto_h=_LINUX32_SYSPROTO_H_ -syshdr="/dev/null" -syssw="/dev/null" -sysmk="/dev/null" -syscallprefix="LINUX32_SYS_" -switchname="/dev/null" -namesname="linux32_syscallnames" -systrace="/dev/null" diff --git a/usr.bin/kdump/linux_syscalls.conf b/usr.bin/kdump/linux_syscalls.conf deleted file mode 100644 index 82adb560c131..000000000000 --- a/usr.bin/kdump/linux_syscalls.conf +++ /dev/null @@ -1,11 +0,0 @@ -# $FreeBSD$ -sysnames="linux_syscalls.c" -sysproto="/dev/null" -sysproto_h=_LINUX_SYSPROTO_H_ -syshdr="/dev/null" -syssw="/dev/null" -sysmk="/dev/null" -syscallprefix="LINUX_SYS_" -switchname="/dev/null" -namesname="linux_syscallnames" -systrace="/dev/null" diff --git a/usr.bin/truss/makesyscallsconf.sh b/usr.bin/truss/makesyscallsconf.sh deleted file mode 100755 index 81eb78ea9f09..000000000000 --- a/usr.bin/truss/makesyscallsconf.sh +++ /dev/null @@ -1,21 +0,0 @@ -#! /bin/sh -# $FreeBSD$ - -ABI="$1" -CONF="$2" - -header="${ABI}_syscalls.h" - -cat > "${CONF}" << EOF -sysnames="${header}.tmp" -sysproto="/dev/null" -sysproto_h="/dev/null" -syshdr="/dev/null" -sysmk="/dev/null" -syssw="/dev/null" -syshide="/dev/null" -syscallprefix="SYS_" -switchname="sysent" -namesname="syscallnames" -systrace="/dev/null" -EOF From 6212aa15fc7b7de8e2b5e57c8593f2d592ebd192 Mon Sep 17 00:00:00 2001 From: Sepherosa Ziehau Date: Thu, 11 Aug 2016 05:49:49 +0000 Subject: [PATCH 34/76] hyperv/vmbus: Add APIs for various types of transactions. Reviewed by: Jun Su MFC after: 1 week Sponsored by: Microsoft Differential Revision: https://reviews.freebsd.org/D7456 --- sys/conf/files.amd64 | 1 + sys/conf/files.i386 | 1 + sys/dev/hyperv/include/vmbus_xact.h | 54 ++++++ sys/dev/hyperv/vmbus/vmbus_xact.c | 278 ++++++++++++++++++++++++++++ sys/modules/hyperv/vmbus/Makefile | 3 +- 5 files changed, 336 insertions(+), 1 deletion(-) create mode 100644 sys/dev/hyperv/include/vmbus_xact.h create mode 100644 sys/dev/hyperv/vmbus/vmbus_xact.c diff --git a/sys/conf/files.amd64 b/sys/conf/files.amd64 index df5d1bcfb88e..b9c6c67f9665 100644 --- a/sys/conf/files.amd64 +++ b/sys/conf/files.amd64 @@ -294,6 +294,7 @@ dev/hyperv/vmbus/vmbus_br.c optional hyperv dev/hyperv/vmbus/vmbus_chan.c optional hyperv dev/hyperv/vmbus/vmbus_et.c optional hyperv dev/hyperv/vmbus/vmbus_if.m optional hyperv +dev/hyperv/vmbus/vmbus_xact.c optional hyperv dev/hyperv/vmbus/amd64/hyperv_machdep.c optional hyperv dev/hyperv/vmbus/amd64/vmbus_vector.S optional hyperv dev/nfe/if_nfe.c optional nfe pci diff --git a/sys/conf/files.i386 b/sys/conf/files.i386 index 6ac3783a0fc1..516bf77f7446 100644 --- a/sys/conf/files.i386 +++ b/sys/conf/files.i386 @@ -253,6 +253,7 @@ dev/hyperv/vmbus/vmbus_br.c optional hyperv dev/hyperv/vmbus/vmbus_chan.c optional hyperv dev/hyperv/vmbus/vmbus_et.c optional hyperv dev/hyperv/vmbus/vmbus_if.m optional hyperv +dev/hyperv/vmbus/vmbus_xact.c optional hyperv dev/hyperv/vmbus/i386/hyperv_machdep.c optional hyperv dev/hyperv/vmbus/i386/vmbus_vector.S optional hyperv dev/ichwd/ichwd.c optional ichwd diff --git a/sys/dev/hyperv/include/vmbus_xact.h b/sys/dev/hyperv/include/vmbus_xact.h new file mode 100644 index 000000000000..e214d6880fc4 --- /dev/null +++ b/sys/dev/hyperv/include/vmbus_xact.h @@ -0,0 +1,54 @@ +/*- + * Copyright (c) 2016 Microsoft Corp. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _VMBUS_XACT_H_ +#define _VMBUS_XACT_H_ + +#include +#include + +struct vmbus_xact; +struct vmbus_xact_ctx; + +struct vmbus_xact_ctx *vmbus_xact_ctx_create(bus_dma_tag_t dtag, + size_t req_size, size_t resp_size); +void vmbus_xact_ctx_destroy(struct vmbus_xact_ctx *ctx); +struct vmbus_xact *vmbus_xact_get(struct vmbus_xact_ctx *ctx, + size_t req_len); +void vmbus_xact_put(struct vmbus_xact *xact); + +void *vmbus_xact_req_data(const struct vmbus_xact *xact); +bus_addr_t vmbus_xact_req_paddr(const struct vmbus_xact *xact); +void vmbus_xact_activate(struct vmbus_xact *xact); +void vmbus_xact_deactivate(struct vmbus_xact *xact); +const void *vmbus_xact_wait(struct vmbus_xact *xact, + size_t *resp_len); +void vmbus_xact_wakeup(struct vmbus_xact *xact, + const void *data, size_t dlen); + +#endif /* !_VMBUS_XACT_H_ */ diff --git a/sys/dev/hyperv/vmbus/vmbus_xact.c b/sys/dev/hyperv/vmbus/vmbus_xact.c new file mode 100644 index 000000000000..3bc9008ae05e --- /dev/null +++ b/sys/dev/hyperv/vmbus/vmbus_xact.c @@ -0,0 +1,278 @@ +/*- + * Copyright (c) 2016 Microsoft Corp. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include + +#include +#include + +struct vmbus_xact { + struct vmbus_xact_ctx *x_ctx; + + void *x_req; + struct hyperv_dma x_req_dma; + + const void *x_resp; + size_t x_resp_len; + void *x_resp0; +}; + +struct vmbus_xact_ctx { + uint32_t xc_flags; + size_t xc_req_size; + size_t xc_resp_size; + + struct vmbus_xact *xc_free; + struct mtx xc_free_lock; + + struct vmbus_xact *xc_active; + struct mtx xc_active_lock; +}; + +#define VMBUS_XACT_CTXF_DESTROY 0x0001 + +static struct vmbus_xact *vmbus_xact_alloc(struct vmbus_xact_ctx *, + bus_dma_tag_t); +static void vmbus_xact_free(struct vmbus_xact *); +static struct vmbus_xact *vmbus_xact_get1(struct vmbus_xact_ctx *, + uint32_t); + +static struct vmbus_xact * +vmbus_xact_alloc(struct vmbus_xact_ctx *ctx, bus_dma_tag_t parent_dtag) +{ + struct vmbus_xact *xact; + + xact = malloc(sizeof(*xact), M_DEVBUF, M_WAITOK | M_ZERO); + xact->x_ctx = ctx; + + /* XXX assume that page aligned is enough */ + xact->x_req = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0, + ctx->xc_req_size, &xact->x_req_dma, BUS_DMA_WAITOK); + if (xact->x_req == NULL) { + free(xact, M_DEVBUF); + return (NULL); + } + xact->x_resp0 = malloc(ctx->xc_resp_size, M_DEVBUF, M_WAITOK); + + return (xact); +} + +static void +vmbus_xact_free(struct vmbus_xact *xact) +{ + + hyperv_dmamem_free(&xact->x_req_dma, xact->x_req); + free(xact->x_resp0, M_DEVBUF); + free(xact, M_DEVBUF); +} + +static struct vmbus_xact * +vmbus_xact_get1(struct vmbus_xact_ctx *ctx, uint32_t dtor_flag) +{ + struct vmbus_xact *xact; + + mtx_lock(&ctx->xc_free_lock); + + while ((ctx->xc_flags & dtor_flag) == 0 && ctx->xc_free == NULL) + mtx_sleep(&ctx->xc_free, &ctx->xc_free_lock, 0, "gxact", 0); + if (ctx->xc_flags & dtor_flag) { + /* Being destroyed */ + xact = NULL; + } else { + xact = ctx->xc_free; + KASSERT(xact != NULL, ("no free xact")); + KASSERT(xact->x_resp == NULL, ("xact has pending response")); + ctx->xc_free = NULL; + } + + mtx_unlock(&ctx->xc_free_lock); + + return (xact); +} + +struct vmbus_xact_ctx * +vmbus_xact_ctx_create(bus_dma_tag_t dtag, size_t req_size, size_t resp_size) +{ + struct vmbus_xact_ctx *ctx; + + ctx = malloc(sizeof(*ctx), M_DEVBUF, M_WAITOK | M_ZERO); + ctx->xc_req_size = req_size; + ctx->xc_resp_size = resp_size; + + ctx->xc_free = vmbus_xact_alloc(ctx, dtag); + if (ctx->xc_free == NULL) { + free(ctx, M_DEVBUF); + return (NULL); + } + + mtx_init(&ctx->xc_free_lock, "vmbus xact free", NULL, MTX_DEF); + mtx_init(&ctx->xc_active_lock, "vmbus xact active", NULL, MTX_DEF); + + return (ctx); +} + +void +vmbus_xact_ctx_destroy(struct vmbus_xact_ctx *ctx) +{ + struct vmbus_xact *xact; + + mtx_lock(&ctx->xc_free_lock); + ctx->xc_flags |= VMBUS_XACT_CTXF_DESTROY; + mtx_unlock(&ctx->xc_free_lock); + wakeup(&ctx->xc_free); + + xact = vmbus_xact_get1(ctx, 0); + if (xact == NULL) + panic("can't get xact"); + + vmbus_xact_free(xact); + mtx_destroy(&ctx->xc_free_lock); + mtx_destroy(&ctx->xc_active_lock); + free(ctx, M_DEVBUF); +} + +struct vmbus_xact * +vmbus_xact_get(struct vmbus_xact_ctx *ctx, size_t req_len) +{ + struct vmbus_xact *xact; + + if (req_len > ctx->xc_req_size) + panic("invalid request size %zu", req_len); + + xact = vmbus_xact_get1(ctx, VMBUS_XACT_CTXF_DESTROY); + if (xact == NULL) + return (NULL); + + memset(xact->x_req, 0, req_len); + return (xact); +} + +void +vmbus_xact_put(struct vmbus_xact *xact) +{ + struct vmbus_xact_ctx *ctx = xact->x_ctx; + + KASSERT(ctx->xc_active == NULL, ("pending active xact")); + xact->x_resp = NULL; + + mtx_lock(&ctx->xc_free_lock); + KASSERT(ctx->xc_free == NULL, ("has free xact")); + ctx->xc_free = xact; + mtx_unlock(&ctx->xc_free_lock); + wakeup(&ctx->xc_free); +} + +void * +vmbus_xact_req_data(const struct vmbus_xact *xact) +{ + + return (xact->x_req); +} + +bus_addr_t +vmbus_xact_req_paddr(const struct vmbus_xact *xact) +{ + + return (xact->x_req_dma.hv_paddr); +} + +void +vmbus_xact_activate(struct vmbus_xact *xact) +{ + struct vmbus_xact_ctx *ctx = xact->x_ctx; + + KASSERT(xact->x_resp == NULL, ("xact has pending response")); + + mtx_lock(&ctx->xc_active_lock); + KASSERT(ctx->xc_active == NULL, ("pending active xact")); + ctx->xc_active = xact; + mtx_unlock(&ctx->xc_active_lock); +} + +void +vmbus_xact_deactivate(struct vmbus_xact *xact) +{ + struct vmbus_xact_ctx *ctx = xact->x_ctx; + + mtx_lock(&ctx->xc_active_lock); + KASSERT(ctx->xc_active == xact, ("xact mismatch")); + ctx->xc_active = NULL; + mtx_unlock(&ctx->xc_active_lock); +} + +const void * +vmbus_xact_wait(struct vmbus_xact *xact, size_t *resp_len) +{ + struct vmbus_xact_ctx *ctx = xact->x_ctx; + const void *resp; + + mtx_lock(&ctx->xc_active_lock); + + KASSERT(ctx->xc_active == xact, ("xact mismatch")); + while (xact->x_resp == NULL) { + mtx_sleep(&ctx->xc_active, &ctx->xc_active_lock, 0, + "wxact", 0); + } + ctx->xc_active = NULL; + + resp = xact->x_resp; + *resp_len = xact->x_resp_len; + + mtx_unlock(&ctx->xc_active_lock); + + return (resp); +} + +void +vmbus_xact_wakeup(struct vmbus_xact *xact, const void *data, size_t dlen) +{ + struct vmbus_xact_ctx *ctx = xact->x_ctx; + size_t cplen = dlen; + + if (cplen > ctx->xc_resp_size) { + printf("vmbus: xact response truncated %zu -> %zu\n", + cplen, ctx->xc_resp_size); + cplen = ctx->xc_resp_size; + } + + mtx_lock(&ctx->xc_active_lock); + + KASSERT(ctx->xc_active == xact, ("xact mismatch")); + memcpy(xact->x_resp0, data, cplen); + xact->x_resp_len = cplen; + xact->x_resp = xact->x_resp0; + + mtx_unlock(&ctx->xc_active_lock); + wakeup(&ctx->xc_active); +} diff --git a/sys/modules/hyperv/vmbus/Makefile b/sys/modules/hyperv/vmbus/Makefile index fe1e7fc9ed48..14eacf5a085e 100644 --- a/sys/modules/hyperv/vmbus/Makefile +++ b/sys/modules/hyperv/vmbus/Makefile @@ -10,7 +10,8 @@ SRCS= hyperv.c \ vmbus.c \ vmbus_br.c \ vmbus_chan.c \ - vmbus_et.c + vmbus_et.c \ + vmbus_xact.c SRCS+= acpi_if.h bus_if.h device_if.h opt_acpi.h vmbus_if.h # XXX: for assym.s From 584e7c57f1212045d7a9dd71cb731dcc242d1546 Mon Sep 17 00:00:00 2001 From: Sepherosa Ziehau Date: Thu, 11 Aug 2016 06:14:54 +0000 Subject: [PATCH 35/76] hyperv/hn: Switch to vmbus xact APIs for NVS initialization Reviewed by: Jun Su MFC after: 1 week Sponsored by: Microsoft Differential Revision: https://reviews.freebsd.org/D7457 --- sys/dev/hyperv/netvsc/hv_net_vsc.c | 104 ++++++++++++------ sys/dev/hyperv/netvsc/hv_net_vsc.h | 1 + sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c | 13 ++- sys/dev/hyperv/netvsc/hv_rndis_filter.c | 10 +- sys/dev/hyperv/netvsc/if_hnreg.h | 62 +++++++++++ sys/dev/hyperv/netvsc/if_hnvar.h | 4 +- 6 files changed, 152 insertions(+), 42 deletions(-) create mode 100644 sys/dev/hyperv/netvsc/if_hnreg.h diff --git a/sys/dev/hyperv/netvsc/hv_net_vsc.c b/sys/dev/hyperv/netvsc/hv_net_vsc.c index 9aa310ad0635..4ae95acb8548 100644 --- a/sys/dev/hyperv/netvsc/hv_net_vsc.c +++ b/sys/dev/hyperv/netvsc/hv_net_vsc.c @@ -45,9 +45,11 @@ #include #include -#include "hv_net_vsc.h" -#include "hv_rndis.h" -#include "hv_rndis_filter.h" +#include +#include +#include +#include +#include MALLOC_DEFINE(M_NETVSC, "netvsc", "Hyper-V netvsc driver"); @@ -70,7 +72,10 @@ static void hv_nv_on_receive(netvsc_dev *net_dev, const struct vmbus_chanpkt_hdr *pkt); static void hn_nvs_sent_none(struct hn_send_ctx *sndc, struct netvsc_dev_ *net_dev, struct vmbus_channel *chan, - const struct nvsp_msg_ *msg); + const struct nvsp_msg_ *msg, int); +static void hn_nvs_sent_xact(struct hn_send_ctx *sndc, + struct netvsc_dev_ *net_dev, struct vmbus_channel *chan, + const struct nvsp_msg_ *msg, int dlen); static struct hn_send_ctx hn_send_ctx_none = HN_SEND_CTX_INITIALIZER(hn_nvs_sent_none, NULL); @@ -462,45 +467,64 @@ hv_nv_destroy_send_buffer(netvsc_dev *net_dev) return (ret); } - -/* - * Attempt to negotiate the caller-specified NVSP version - * - * For NVSP v2, Server 2008 R2 does not set - * init_pkt->msgs.init_msgs.init_compl.negotiated_prot_vers - * to the negotiated version, so we cannot rely on that. - */ static int hv_nv_negotiate_nvsp_protocol(struct hn_softc *sc, netvsc_dev *net_dev, - uint32_t nvsp_ver) + uint32_t nvs_ver) { struct hn_send_ctx sndc; - nvsp_msg *init_pkt; - int ret; + struct vmbus_xact *xact; + struct hn_nvs_init *init; + const struct hn_nvs_init_resp *resp; + size_t resp_len; + uint32_t status; + int error; - init_pkt = &net_dev->channel_init_packet; - memset(init_pkt, 0, sizeof(nvsp_msg)); - init_pkt->hdr.msg_type = nvsp_msg_type_init; + xact = vmbus_xact_get(sc->hn_xact, sizeof(*init)); + if (xact == NULL) { + if_printf(sc->hn_ifp, "no xact for nvs init\n"); + return (ENXIO); + } - /* - * Specify parameter as the only acceptable protocol version - */ - init_pkt->msgs.init_msgs.init.p1.protocol_version = nvsp_ver; - init_pkt->msgs.init_msgs.init.protocol_version_2 = nvsp_ver; + init = vmbus_xact_req_data(xact); + init->nvs_type = HN_NVS_TYPE_INIT; + init->nvs_ver_min = nvs_ver; + init->nvs_ver_max = nvs_ver; - /* Send the init request */ - hn_send_ctx_init_simple(&sndc, hn_nvs_sent_wakeup, NULL); - ret = vmbus_chan_send(sc->hn_prichan, + vmbus_xact_activate(xact); + hn_send_ctx_init_simple(&sndc, hn_nvs_sent_xact, xact); + + error = vmbus_chan_send(sc->hn_prichan, VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC, - init_pkt, sizeof(nvsp_msg), (uint64_t)(uintptr_t)&sndc); - if (ret != 0) - return (-1); + init, sizeof(*init), (uint64_t)(uintptr_t)&sndc); + if (error) { + if_printf(sc->hn_ifp, "send nvs init failed: %d\n", error); + vmbus_xact_deactivate(xact); + vmbus_xact_put(xact); + return (error); + } - sema_wait(&net_dev->channel_init_sema); - - if (init_pkt->msgs.init_msgs.init_compl.status != nvsp_status_success) + resp = vmbus_xact_wait(xact, &resp_len); + if (resp_len < sizeof(*resp)) { + if_printf(sc->hn_ifp, "invalid init resp length %zu\n", + resp_len); + vmbus_xact_put(xact); return (EINVAL); + } + if (resp->nvs_type != HN_NVS_TYPE_INIT_RESP) { + if_printf(sc->hn_ifp, "not init resp, type %u\n", + resp->nvs_type); + vmbus_xact_put(xact); + return (EINVAL); + } + status = resp->nvs_status; + vmbus_xact_put(xact); + + if (status != HN_NVS_STATUS_OK) { + if_printf(sc->hn_ifp, "nvs init failed for ver 0x%x\n", + nvs_ver); + return (EINVAL); + } return (0); } @@ -744,17 +768,26 @@ hv_nv_on_device_remove(struct hn_softc *sc, boolean_t destroy_channel) void hn_nvs_sent_wakeup(struct hn_send_ctx *sndc __unused, struct netvsc_dev_ *net_dev, struct vmbus_channel *chan __unused, - const struct nvsp_msg_ *msg) + const struct nvsp_msg_ *msg, int dlen __unused) { /* Copy the response back */ memcpy(&net_dev->channel_init_packet, msg, sizeof(nvsp_msg)); sema_post(&net_dev->channel_init_sema); } +static void +hn_nvs_sent_xact(struct hn_send_ctx *sndc, + struct netvsc_dev_ *net_dev __unused, struct vmbus_channel *chan __unused, + const struct nvsp_msg_ *msg, int dlen) +{ + + vmbus_xact_wakeup(sndc->hn_cbarg, msg, dlen); +} + static void hn_nvs_sent_none(struct hn_send_ctx *sndc __unused, struct netvsc_dev_ *net_dev __unused, struct vmbus_channel *chan __unused, - const struct nvsp_msg_ *msg __unused) + const struct nvsp_msg_ *msg __unused, int dlen __unused) { /* EMPTY */ } @@ -788,7 +821,8 @@ hv_nv_on_send_completion(netvsc_dev *net_dev, struct vmbus_channel *chan, struct hn_send_ctx *sndc; sndc = (struct hn_send_ctx *)(uintptr_t)pkt->cph_xactid; - sndc->hn_cb(sndc, net_dev, chan, VMBUS_CHANPKT_CONST_DATA(pkt)); + sndc->hn_cb(sndc, net_dev, chan, VMBUS_CHANPKT_CONST_DATA(pkt), + VMBUS_CHANPKT_DATALEN(pkt)); /* * NOTE: * 'sndc' CAN NOT be accessed anymore, since it can be freed by diff --git a/sys/dev/hyperv/netvsc/hv_net_vsc.h b/sys/dev/hyperv/netvsc/hv_net_vsc.h index 81eb0416052c..69f0a61f6e07 100644 --- a/sys/dev/hyperv/netvsc/hv_net_vsc.h +++ b/sys/dev/hyperv/netvsc/hv_net_vsc.h @@ -1243,6 +1243,7 @@ typedef struct hn_softc { struct taskqueue *hn_tx_taskq; struct sysctl_oid *hn_tx_sysctl_tree; struct sysctl_oid *hn_rx_sysctl_tree; + struct vmbus_xact_ctx *hn_xact; } hn_softc_t; /* diff --git a/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c b/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c index b3dc6d100f39..a6055290ab03 100644 --- a/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c +++ b/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c @@ -115,6 +115,7 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include "hv_net_vsc.h" #include "hv_rndis.h" @@ -124,6 +125,9 @@ __FBSDID("$FreeBSD$"); /* Short for Hyper-V network interface */ #define NETVSC_DEVNAME "hn" +#define HN_XACT_REQ_SIZE (2 * PAGE_SIZE) +#define HN_XACT_RESP_SIZE (2 * PAGE_SIZE) + /* * It looks like offset 0 of buf is reserved to hold the softc pointer. * The sc pointer evidently not needed, and is not presently populated. @@ -542,6 +546,11 @@ netvsc_attach(device_t dev) IFCAP_LRO; ifp->if_hwassist = sc->hn_tx_ring[0].hn_csum_assist | CSUM_TSO; + sc->hn_xact = vmbus_xact_ctx_create(bus_get_dma_tag(dev), + HN_XACT_REQ_SIZE, HN_XACT_RESP_SIZE); + if (sc->hn_xact == NULL) + goto failed; + error = hv_rf_on_device_add(sc, &device_info, ring_cnt, &sc->hn_rx_ring[0]); if (error) @@ -643,6 +652,7 @@ netvsc_detach(device_t dev) if (sc->hn_tx_taskq != hn_tx_taskq) taskqueue_free(sc->hn_tx_taskq); + vmbus_xact_ctx_destroy(sc->hn_xact); return (0); } @@ -782,7 +792,8 @@ hn_txeof(struct hn_tx_ring *txr) static void hn_tx_done(struct hn_send_ctx *sndc, struct netvsc_dev_ *net_dev, - struct vmbus_channel *chan, const struct nvsp_msg_ *msg __unused) + struct vmbus_channel *chan, const struct nvsp_msg_ *msg __unused, + int dlen __unused) { struct hn_txdesc *txd = sndc->hn_cbarg; struct hn_tx_ring *txr; diff --git a/sys/dev/hyperv/netvsc/hv_rndis_filter.c b/sys/dev/hyperv/netvsc/hv_rndis_filter.c index e7e74b6e36e3..7daf7b282f5f 100644 --- a/sys/dev/hyperv/netvsc/hv_rndis_filter.c +++ b/sys/dev/hyperv/netvsc/hv_rndis_filter.c @@ -91,10 +91,10 @@ hv_rf_send_offload_request(struct hn_softc *sc, static void hn_rndis_sent_halt(struct hn_send_ctx *sndc, struct netvsc_dev_ *net_dev, struct vmbus_channel *chan, - const struct nvsp_msg_ *msg); + const struct nvsp_msg_ *msg, int dlen); static void hn_rndis_sent_cb(struct hn_send_ctx *sndc, struct netvsc_dev_ *net_dev, struct vmbus_channel *chan, - const struct nvsp_msg_ *msg); + const struct nvsp_msg_ *msg, int dlen); /* * Set the Per-Packet-Info with the specified type @@ -1239,7 +1239,8 @@ hv_rf_on_close(struct hn_softc *sc) static void hn_rndis_sent_cb(struct hn_send_ctx *sndc, struct netvsc_dev_ *net_dev, - struct vmbus_channel *chan __unused, const struct nvsp_msg_ *msg __unused) + struct vmbus_channel *chan __unused, const struct nvsp_msg_ *msg __unused, + int dlen __unused) { if (sndc->hn_chim_idx != NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX) hn_chim_free(net_dev, sndc->hn_chim_idx); @@ -1247,7 +1248,8 @@ hn_rndis_sent_cb(struct hn_send_ctx *sndc, struct netvsc_dev_ *net_dev, static void hn_rndis_sent_halt(struct hn_send_ctx *sndc, struct netvsc_dev_ *net_dev, - struct vmbus_channel *chan __unused, const struct nvsp_msg_ *msg __unused) + struct vmbus_channel *chan __unused, const struct nvsp_msg_ *msg __unused, + int dlen __unused) { rndis_request *request = sndc->hn_cbarg; diff --git a/sys/dev/hyperv/netvsc/if_hnreg.h b/sys/dev/hyperv/netvsc/if_hnreg.h new file mode 100644 index 000000000000..6f6e4c4117cc --- /dev/null +++ b/sys/dev/hyperv/netvsc/if_hnreg.h @@ -0,0 +1,62 @@ +/*- + * Copyright (c) 2016 Microsoft Corp. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _IF_HNREG_H_ +#define _IF_HNREG_H_ + +#include +#include + +#define HN_NVS_STATUS_OK 1 + +#define HN_NVS_TYPE_INIT 1 +#define HN_NVS_TYPE_INIT_RESP 2 + +/* + * Any size less than this one will _not_ work, e.g. hn_nvs_init + * only has 12B valid data, however, if only 12B data were sent, + * Hypervisor would never reply. + */ +#define HN_NVS_REQSIZE_MIN 32 + +struct hn_nvs_init { + uint32_t nvs_type; /* HN_NVS_TYPE_INIT */ + uint32_t nvs_ver_min; + uint32_t nvs_ver_max; + uint8_t nvs_rsvd[20]; +} __packed; +CTASSERT(sizeof(struct hn_nvs_init) >= HN_NVS_REQSIZE_MIN); + +struct hn_nvs_init_resp { + uint32_t nvs_type; /* HN_NVS_TYPE_INIT_RESP */ + uint32_t nvs_ver; /* deprecated */ + uint32_t nvs_rsvd; + uint32_t nvs_status; /* HN_NVS_STATUS_ */ +} __packed; + +#endif /* !_IF_HNREG_H_ */ diff --git a/sys/dev/hyperv/netvsc/if_hnvar.h b/sys/dev/hyperv/netvsc/if_hnvar.h index 6e5fb5090284..9d2c1267ff73 100644 --- a/sys/dev/hyperv/netvsc/if_hnvar.h +++ b/sys/dev/hyperv/netvsc/if_hnvar.h @@ -40,7 +40,7 @@ struct hn_send_ctx; typedef void (*hn_sent_callback_t) (struct hn_send_ctx *, struct netvsc_dev_ *, - struct vmbus_channel *, const struct nvsp_msg_ *); + struct vmbus_channel *, const struct nvsp_msg_ *, int); struct hn_send_ctx { hn_sent_callback_t hn_cb; @@ -77,7 +77,7 @@ hn_send_ctx_init_simple(struct hn_send_ctx *sndc, hn_sent_callback_t cb, void hn_nvs_sent_wakeup(struct hn_send_ctx *sndc, struct netvsc_dev_ *net_dev, struct vmbus_channel *chan, - const struct nvsp_msg_ *msg); + const struct nvsp_msg_ *msg, int dlen); void hn_chim_free(struct netvsc_dev_ *net_dev, uint32_t chim_idx); #endif /* !_IF_HNVAR_H_ */ From 3dba61dd3cfd4b35309643e84c178f2df1cce282 Mon Sep 17 00:00:00 2001 From: Sepherosa Ziehau Date: Thu, 11 Aug 2016 06:24:17 +0000 Subject: [PATCH 36/76] hyperv/vmbus: Use xact APIs to implement post message Hypercall APIs Avoid code duplication. MFC after: 1 week Sponsored by: Microsoft Differential Revision: https://reviews.freebsd.org/D7458 --- sys/dev/hyperv/include/vmbus_xact.h | 7 +- sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c | 2 +- sys/dev/hyperv/vmbus/vmbus.c | 233 ++++-------------- sys/dev/hyperv/vmbus/vmbus_var.h | 2 +- sys/dev/hyperv/vmbus/vmbus_xact.c | 45 +++- 5 files changed, 91 insertions(+), 198 deletions(-) diff --git a/sys/dev/hyperv/include/vmbus_xact.h b/sys/dev/hyperv/include/vmbus_xact.h index e214d6880fc4..c2919aa8c1f2 100644 --- a/sys/dev/hyperv/include/vmbus_xact.h +++ b/sys/dev/hyperv/include/vmbus_xact.h @@ -36,7 +36,8 @@ struct vmbus_xact; struct vmbus_xact_ctx; struct vmbus_xact_ctx *vmbus_xact_ctx_create(bus_dma_tag_t dtag, - size_t req_size, size_t resp_size); + size_t req_size, size_t resp_size, + size_t priv_size); void vmbus_xact_ctx_destroy(struct vmbus_xact_ctx *ctx); struct vmbus_xact *vmbus_xact_get(struct vmbus_xact_ctx *ctx, size_t req_len); @@ -44,11 +45,15 @@ void vmbus_xact_put(struct vmbus_xact *xact); void *vmbus_xact_req_data(const struct vmbus_xact *xact); bus_addr_t vmbus_xact_req_paddr(const struct vmbus_xact *xact); +void *vmbus_xact_priv(const struct vmbus_xact *xact, + size_t priv_len); void vmbus_xact_activate(struct vmbus_xact *xact); void vmbus_xact_deactivate(struct vmbus_xact *xact); const void *vmbus_xact_wait(struct vmbus_xact *xact, size_t *resp_len); void vmbus_xact_wakeup(struct vmbus_xact *xact, const void *data, size_t dlen); +void vmbus_xact_ctx_wakeup(struct vmbus_xact_ctx *ctx, + const void *data, size_t dlen); #endif /* !_VMBUS_XACT_H_ */ diff --git a/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c b/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c index a6055290ab03..4653e776da92 100644 --- a/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c +++ b/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c @@ -547,7 +547,7 @@ netvsc_attach(device_t dev) ifp->if_hwassist = sc->hn_tx_ring[0].hn_csum_assist | CSUM_TSO; sc->hn_xact = vmbus_xact_ctx_create(bus_get_dma_tag(dev), - HN_XACT_REQ_SIZE, HN_XACT_RESP_SIZE); + HN_XACT_REQ_SIZE, HN_XACT_RESP_SIZE, 0); if (sc->hn_xact == NULL) goto failed; diff --git a/sys/dev/hyperv/vmbus/vmbus.c b/sys/dev/hyperv/vmbus/vmbus.c index 05c9c5b2d90c..6753c9e9b524 100644 --- a/sys/dev/hyperv/vmbus/vmbus.c +++ b/sys/dev/hyperv/vmbus/vmbus.c @@ -50,6 +50,7 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include #include #include @@ -62,25 +63,10 @@ __FBSDID("$FreeBSD$"); #define VMBUS_GPADL_START 0xe1e10 struct vmbus_msghc { - struct hypercall_postmsg_in *mh_inprm; + struct vmbus_xact *mh_xact; struct hypercall_postmsg_in mh_inprm_save; - struct hyperv_dma mh_inprm_dma; - - struct vmbus_message *mh_resp; - struct vmbus_message mh_resp0; }; -struct vmbus_msghc_ctx { - struct vmbus_msghc *mhc_free; - struct mtx mhc_free_lock; - uint32_t mhc_flags; - - struct vmbus_msghc *mhc_active; - struct mtx mhc_active_lock; -}; - -#define VMBUS_MSGHC_CTXF_DESTROY 0x0001 - static int vmbus_probe(device_t); static int vmbus_attach(device_t); static int vmbus_detach(device_t); @@ -116,15 +102,6 @@ static int vmbus_doattach(struct vmbus_softc *); static void vmbus_event_proc_dummy(struct vmbus_softc *, int); -static struct vmbus_msghc_ctx *vmbus_msghc_ctx_create(bus_dma_tag_t); -static void vmbus_msghc_ctx_destroy( - struct vmbus_msghc_ctx *); -static void vmbus_msghc_ctx_free(struct vmbus_msghc_ctx *); -static struct vmbus_msghc *vmbus_msghc_alloc(bus_dma_tag_t); -static void vmbus_msghc_free(struct vmbus_msghc *); -static struct vmbus_msghc *vmbus_msghc_get1(struct vmbus_msghc_ctx *, - uint32_t); - static struct vmbus_softc *vmbus_sc; extern inthand_t IDTVEC(vmbus_isr); @@ -182,85 +159,6 @@ vmbus_get_softc(void) return vmbus_sc; } -static struct vmbus_msghc * -vmbus_msghc_alloc(bus_dma_tag_t parent_dtag) -{ - struct vmbus_msghc *mh; - - mh = malloc(sizeof(*mh), M_DEVBUF, M_WAITOK | M_ZERO); - - mh->mh_inprm = hyperv_dmamem_alloc(parent_dtag, - HYPERCALL_PARAM_ALIGN, 0, HYPERCALL_POSTMSGIN_SIZE, - &mh->mh_inprm_dma, BUS_DMA_WAITOK); - if (mh->mh_inprm == NULL) { - free(mh, M_DEVBUF); - return NULL; - } - return mh; -} - -static void -vmbus_msghc_free(struct vmbus_msghc *mh) -{ - hyperv_dmamem_free(&mh->mh_inprm_dma, mh->mh_inprm); - free(mh, M_DEVBUF); -} - -static void -vmbus_msghc_ctx_free(struct vmbus_msghc_ctx *mhc) -{ - KASSERT(mhc->mhc_active == NULL, ("still have active msg hypercall")); - KASSERT(mhc->mhc_free == NULL, ("still have hypercall msg")); - - mtx_destroy(&mhc->mhc_free_lock); - mtx_destroy(&mhc->mhc_active_lock); - free(mhc, M_DEVBUF); -} - -static struct vmbus_msghc_ctx * -vmbus_msghc_ctx_create(bus_dma_tag_t parent_dtag) -{ - struct vmbus_msghc_ctx *mhc; - - mhc = malloc(sizeof(*mhc), M_DEVBUF, M_WAITOK | M_ZERO); - mtx_init(&mhc->mhc_free_lock, "vmbus msghc free", NULL, MTX_DEF); - mtx_init(&mhc->mhc_active_lock, "vmbus msghc act", NULL, MTX_DEF); - - mhc->mhc_free = vmbus_msghc_alloc(parent_dtag); - if (mhc->mhc_free == NULL) { - vmbus_msghc_ctx_free(mhc); - return NULL; - } - return mhc; -} - -static struct vmbus_msghc * -vmbus_msghc_get1(struct vmbus_msghc_ctx *mhc, uint32_t dtor_flag) -{ - struct vmbus_msghc *mh; - - mtx_lock(&mhc->mhc_free_lock); - - while ((mhc->mhc_flags & dtor_flag) == 0 && mhc->mhc_free == NULL) { - mtx_sleep(&mhc->mhc_free, &mhc->mhc_free_lock, 0, - "gmsghc", 0); - } - if (mhc->mhc_flags & dtor_flag) { - /* Being destroyed */ - mh = NULL; - } else { - mh = mhc->mhc_free; - KASSERT(mh != NULL, ("no free hypercall msg")); - KASSERT(mh->mh_resp == NULL, - ("hypercall msg has pending response")); - mhc->mhc_free = NULL; - } - - mtx_unlock(&mhc->mhc_free_lock); - - return mh; -} - void vmbus_msghc_reset(struct vmbus_msghc *mh, size_t dsize) { @@ -269,7 +167,7 @@ vmbus_msghc_reset(struct vmbus_msghc *mh, size_t dsize) if (dsize > HYPERCALL_POSTMSGIN_DSIZE_MAX) panic("invalid data size %zu", dsize); - inprm = mh->mh_inprm; + inprm = vmbus_xact_req_data(mh->mh_xact); memset(inprm, 0, HYPERCALL_POSTMSGIN_SIZE); inprm->hc_connid = VMBUS_CONNID_MESSAGE; inprm->hc_msgtype = HYPERV_MSGTYPE_CHANNEL; @@ -280,63 +178,50 @@ struct vmbus_msghc * vmbus_msghc_get(struct vmbus_softc *sc, size_t dsize) { struct vmbus_msghc *mh; + struct vmbus_xact *xact; if (dsize > HYPERCALL_POSTMSGIN_DSIZE_MAX) panic("invalid data size %zu", dsize); - mh = vmbus_msghc_get1(sc->vmbus_msg_hc, VMBUS_MSGHC_CTXF_DESTROY); - if (mh == NULL) - return NULL; + xact = vmbus_xact_get(sc->vmbus_xc, + dsize + __offsetof(struct hypercall_postmsg_in, hc_data[0])); + if (xact == NULL) + return (NULL); + + mh = vmbus_xact_priv(xact, sizeof(*mh)); + mh->mh_xact = xact; vmbus_msghc_reset(mh, dsize); - return mh; + return (mh); } void -vmbus_msghc_put(struct vmbus_softc *sc, struct vmbus_msghc *mh) +vmbus_msghc_put(struct vmbus_softc *sc __unused, struct vmbus_msghc *mh) { - struct vmbus_msghc_ctx *mhc = sc->vmbus_msg_hc; - KASSERT(mhc->mhc_active == NULL, ("msg hypercall is active")); - mh->mh_resp = NULL; - - mtx_lock(&mhc->mhc_free_lock); - KASSERT(mhc->mhc_free == NULL, ("has free hypercall msg")); - mhc->mhc_free = mh; - mtx_unlock(&mhc->mhc_free_lock); - wakeup(&mhc->mhc_free); + vmbus_xact_put(mh->mh_xact); } void * vmbus_msghc_dataptr(struct vmbus_msghc *mh) { - return mh->mh_inprm->hc_data; -} + struct hypercall_postmsg_in *inprm; -static void -vmbus_msghc_ctx_destroy(struct vmbus_msghc_ctx *mhc) -{ - struct vmbus_msghc *mh; - - mtx_lock(&mhc->mhc_free_lock); - mhc->mhc_flags |= VMBUS_MSGHC_CTXF_DESTROY; - mtx_unlock(&mhc->mhc_free_lock); - wakeup(&mhc->mhc_free); - - mh = vmbus_msghc_get1(mhc, 0); - if (mh == NULL) - panic("can't get msghc"); - - vmbus_msghc_free(mh); - vmbus_msghc_ctx_free(mhc); + inprm = vmbus_xact_req_data(mh->mh_xact); + return (inprm->hc_data); } int vmbus_msghc_exec_noresult(struct vmbus_msghc *mh) { sbintime_t time = SBT_1MS; + struct hypercall_postmsg_in *inprm; + bus_addr_t inprm_paddr; int i; + inprm = vmbus_xact_req_data(mh->mh_xact); + inprm_paddr = vmbus_xact_req_paddr(mh->mh_xact); + /* * Save the input parameter so that we could restore the input * parameter if the Hypercall failed. @@ -345,7 +230,7 @@ vmbus_msghc_exec_noresult(struct vmbus_msghc *mh) * Is this really necessary?! i.e. Will the Hypercall ever * overwrite the input parameter? */ - memcpy(&mh->mh_inprm_save, mh->mh_inprm, HYPERCALL_POSTMSGIN_SIZE); + memcpy(&mh->mh_inprm_save, inprm, HYPERCALL_POSTMSGIN_SIZE); /* * In order to cope with transient failures, e.g. insufficient @@ -357,7 +242,7 @@ vmbus_msghc_exec_noresult(struct vmbus_msghc *mh) for (i = 0; i < HC_RETRY_MAX; ++i) { uint64_t status; - status = hypercall_post_message(mh->mh_inprm_dma.hv_paddr); + status = hypercall_post_message(inprm_paddr); if (status == HYPERCALL_STATUS_SUCCESS) return 0; @@ -366,8 +251,7 @@ vmbus_msghc_exec_noresult(struct vmbus_msghc *mh) time *= 2; /* Restore input parameter and try again */ - memcpy(mh->mh_inprm, &mh->mh_inprm_save, - HYPERCALL_POSTMSGIN_SIZE); + memcpy(inprm, &mh->mh_inprm_save, HYPERCALL_POSTMSGIN_SIZE); } #undef HC_RETRY_MAX @@ -376,62 +260,30 @@ vmbus_msghc_exec_noresult(struct vmbus_msghc *mh) } int -vmbus_msghc_exec(struct vmbus_softc *sc, struct vmbus_msghc *mh) +vmbus_msghc_exec(struct vmbus_softc *sc __unused, struct vmbus_msghc *mh) { - struct vmbus_msghc_ctx *mhc = sc->vmbus_msg_hc; int error; - KASSERT(mh->mh_resp == NULL, ("hypercall msg has pending response")); - - mtx_lock(&mhc->mhc_active_lock); - KASSERT(mhc->mhc_active == NULL, ("pending active msg hypercall")); - mhc->mhc_active = mh; - mtx_unlock(&mhc->mhc_active_lock); - + vmbus_xact_activate(mh->mh_xact); error = vmbus_msghc_exec_noresult(mh); - if (error) { - mtx_lock(&mhc->mhc_active_lock); - KASSERT(mhc->mhc_active == mh, ("msghc mismatch")); - mhc->mhc_active = NULL; - mtx_unlock(&mhc->mhc_active_lock); - } + if (error) + vmbus_xact_deactivate(mh->mh_xact); return error; } const struct vmbus_message * -vmbus_msghc_wait_result(struct vmbus_softc *sc, struct vmbus_msghc *mh) +vmbus_msghc_wait_result(struct vmbus_softc *sc __unused, struct vmbus_msghc *mh) { - struct vmbus_msghc_ctx *mhc = sc->vmbus_msg_hc; + size_t resp_len; - mtx_lock(&mhc->mhc_active_lock); - - KASSERT(mhc->mhc_active == mh, ("msghc mismatch")); - while (mh->mh_resp == NULL) { - mtx_sleep(&mhc->mhc_active, &mhc->mhc_active_lock, 0, - "wmsghc", 0); - } - mhc->mhc_active = NULL; - - mtx_unlock(&mhc->mhc_active_lock); - - return mh->mh_resp; + return (vmbus_xact_wait(mh->mh_xact, &resp_len)); } void vmbus_msghc_wakeup(struct vmbus_softc *sc, const struct vmbus_message *msg) { - struct vmbus_msghc_ctx *mhc = sc->vmbus_msg_hc; - struct vmbus_msghc *mh; - mtx_lock(&mhc->mhc_active_lock); - - mh = mhc->mhc_active; - KASSERT(mh != NULL, ("no pending msg hypercall")); - memcpy(&mh->mh_resp0, msg, sizeof(mh->mh_resp0)); - mh->mh_resp = &mh->mh_resp0; - - mtx_unlock(&mhc->mhc_active_lock); - wakeup(&mhc->mhc_active); + vmbus_xact_ctx_wakeup(sc->vmbus_xc, msg, sizeof(*msg)); } uint32_t @@ -1187,9 +1039,10 @@ vmbus_doattach(struct vmbus_softc *sc) /* * Create context for "post message" Hypercalls */ - sc->vmbus_msg_hc = vmbus_msghc_ctx_create( - bus_get_dma_tag(sc->vmbus_dev)); - if (sc->vmbus_msg_hc == NULL) { + sc->vmbus_xc = vmbus_xact_ctx_create(bus_get_dma_tag(sc->vmbus_dev), + HYPERCALL_POSTMSGIN_SIZE, VMBUS_MSG_SIZE, + sizeof(struct vmbus_msghc)); + if (sc->vmbus_xc == NULL) { ret = ENXIO; goto cleanup; } @@ -1244,9 +1097,9 @@ vmbus_doattach(struct vmbus_softc *sc) cleanup: vmbus_intr_teardown(sc); vmbus_dma_free(sc); - if (sc->vmbus_msg_hc != NULL) { - vmbus_msghc_ctx_destroy(sc->vmbus_msg_hc); - sc->vmbus_msg_hc = NULL; + if (sc->vmbus_xc != NULL) { + vmbus_xact_ctx_destroy(sc->vmbus_xc); + sc->vmbus_xc = NULL; } free(sc->vmbus_chmap, M_DEVBUF); mtx_destroy(&sc->vmbus_scan_lock); @@ -1305,9 +1158,9 @@ vmbus_detach(device_t dev) vmbus_intr_teardown(sc); vmbus_dma_free(sc); - if (sc->vmbus_msg_hc != NULL) { - vmbus_msghc_ctx_destroy(sc->vmbus_msg_hc); - sc->vmbus_msg_hc = NULL; + if (sc->vmbus_xc != NULL) { + vmbus_xact_ctx_destroy(sc->vmbus_xc); + sc->vmbus_xc = NULL; } free(sc->vmbus_chmap, M_DEVBUF); diff --git a/sys/dev/hyperv/vmbus/vmbus_var.h b/sys/dev/hyperv/vmbus/vmbus_var.h index c278c1512607..47d9004e5719 100644 --- a/sys/dev/hyperv/vmbus/vmbus_var.h +++ b/sys/dev/hyperv/vmbus/vmbus_var.h @@ -86,7 +86,7 @@ struct vmbus_softc { u_long *vmbus_rx_evtflags; /* compat evtflgs from host */ struct vmbus_channel **vmbus_chmap; - struct vmbus_msghc_ctx *vmbus_msg_hc; + struct vmbus_xact_ctx *vmbus_xc; struct vmbus_pcpu_data vmbus_pcpu[MAXCPU]; /* diff --git a/sys/dev/hyperv/vmbus/vmbus_xact.c b/sys/dev/hyperv/vmbus/vmbus_xact.c index 3bc9008ae05e..642c165bc293 100644 --- a/sys/dev/hyperv/vmbus/vmbus_xact.c +++ b/sys/dev/hyperv/vmbus/vmbus_xact.c @@ -39,6 +39,7 @@ __FBSDID("$FreeBSD$"); struct vmbus_xact { struct vmbus_xact_ctx *x_ctx; + void *x_priv; void *x_req; struct hyperv_dma x_req_dma; @@ -52,6 +53,7 @@ struct vmbus_xact_ctx { uint32_t xc_flags; size_t xc_req_size; size_t xc_resp_size; + size_t xc_priv_size; struct vmbus_xact *xc_free; struct mtx xc_free_lock; @@ -83,6 +85,8 @@ vmbus_xact_alloc(struct vmbus_xact_ctx *ctx, bus_dma_tag_t parent_dtag) free(xact, M_DEVBUF); return (NULL); } + if (ctx->xc_priv_size != 0) + xact->x_priv = malloc(ctx->xc_priv_size, M_DEVBUF, M_WAITOK); xact->x_resp0 = malloc(ctx->xc_resp_size, M_DEVBUF, M_WAITOK); return (xact); @@ -94,6 +98,8 @@ vmbus_xact_free(struct vmbus_xact *xact) hyperv_dmamem_free(&xact->x_req_dma, xact->x_req); free(xact->x_resp0, M_DEVBUF); + if (xact->x_priv != NULL) + free(xact->x_priv, M_DEVBUF); free(xact, M_DEVBUF); } @@ -122,13 +128,15 @@ vmbus_xact_get1(struct vmbus_xact_ctx *ctx, uint32_t dtor_flag) } struct vmbus_xact_ctx * -vmbus_xact_ctx_create(bus_dma_tag_t dtag, size_t req_size, size_t resp_size) +vmbus_xact_ctx_create(bus_dma_tag_t dtag, size_t req_size, size_t resp_size, + size_t priv_size) { struct vmbus_xact_ctx *ctx; ctx = malloc(sizeof(*ctx), M_DEVBUF, M_WAITOK | M_ZERO); ctx->xc_req_size = req_size; ctx->xc_resp_size = resp_size; + ctx->xc_priv_size = priv_size; ctx->xc_free = vmbus_xact_alloc(ctx, dtag); if (ctx->xc_free == NULL) { @@ -207,6 +215,15 @@ vmbus_xact_req_paddr(const struct vmbus_xact *xact) return (xact->x_req_dma.hv_paddr); } +void * +vmbus_xact_priv(const struct vmbus_xact *xact, size_t priv_len) +{ + + if (priv_len > xact->x_ctx->xc_priv_size) + panic("invalid priv size %zu", priv_len); + return (xact->x_priv); +} + void vmbus_xact_activate(struct vmbus_xact *xact) { @@ -254,25 +271,43 @@ vmbus_xact_wait(struct vmbus_xact *xact, size_t *resp_len) return (resp); } -void -vmbus_xact_wakeup(struct vmbus_xact *xact, const void *data, size_t dlen) +static void +vmbus_xact_save_resp(struct vmbus_xact *xact, const void *data, size_t dlen) { struct vmbus_xact_ctx *ctx = xact->x_ctx; size_t cplen = dlen; + mtx_assert(&ctx->xc_active_lock, MA_OWNED); + if (cplen > ctx->xc_resp_size) { printf("vmbus: xact response truncated %zu -> %zu\n", cplen, ctx->xc_resp_size); cplen = ctx->xc_resp_size; } - mtx_lock(&ctx->xc_active_lock); - KASSERT(ctx->xc_active == xact, ("xact mismatch")); memcpy(xact->x_resp0, data, cplen); xact->x_resp_len = cplen; xact->x_resp = xact->x_resp0; +} +void +vmbus_xact_wakeup(struct vmbus_xact *xact, const void *data, size_t dlen) +{ + struct vmbus_xact_ctx *ctx = xact->x_ctx; + + mtx_lock(&ctx->xc_active_lock); + vmbus_xact_save_resp(xact, data, dlen); + mtx_unlock(&ctx->xc_active_lock); + wakeup(&ctx->xc_active); +} + +void +vmbus_xact_ctx_wakeup(struct vmbus_xact_ctx *ctx, const void *data, size_t dlen) +{ + mtx_lock(&ctx->xc_active_lock); + KASSERT(ctx->xc_active != NULL, ("no pending xact")); + vmbus_xact_save_resp(ctx->xc_active, data, dlen); mtx_unlock(&ctx->xc_active_lock); wakeup(&ctx->xc_active); } From d293d0c934649ad9f1ab82ba3ffc64c47af53a67 Mon Sep 17 00:00:00 2001 From: Edward Tomasz Napierala Date: Thu, 11 Aug 2016 07:11:15 +0000 Subject: [PATCH 37/76] Remove unused textvp_fullpath() macro. MFC after: 1 month --- sys/sys/vnode.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h index 8c6bbfd17076..02707117d21b 100644 --- a/sys/sys/vnode.h +++ b/sys/sys/vnode.h @@ -618,8 +618,6 @@ u_quad_t init_va_filerev(void); int speedup_syncer(void); int vn_vptocnp(struct vnode **vp, struct ucred *cred, char *buf, u_int *buflen); -#define textvp_fullpath(p, rb, rfb) \ - vn_fullpath(FIRST_THREAD_IN_PROC(p), (p)->p_textvp, rb, rfb) int vn_fullpath(struct thread *td, struct vnode *vn, char **retbuf, char **freebuf); int vn_fullpath_global(struct thread *td, struct vnode *vn, From 500481733550caebb7ed1c0c7fb2a411d9eb3229 Mon Sep 17 00:00:00 2001 From: Mark Johnston Date: Thu, 11 Aug 2016 07:58:23 +0000 Subject: [PATCH 38/76] Remove b_pin_count from struct buf. It was added in r153192 for XFS and doesn't appear to have been used for anything else. XFS was disconnected in r241607 and removed entirely in r247631. Reported by: mlaier Reviewed by: imp, kib Differential Revision: https://reviews.freebsd.org/D7468 --- sys/kern/vfs_bio.c | 57 +----------------------------------------- sys/kern/vfs_cluster.c | 14 ----------- sys/sys/buf.h | 1 - 3 files changed, 1 insertion(+), 71 deletions(-) diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c index 2378ad653f3f..cb9ee9a786b4 100644 --- a/sys/kern/vfs_bio.c +++ b/sys/kern/vfs_bio.c @@ -1479,7 +1479,6 @@ buf_alloc(void) bp->b_npages = 0; bp->b_dirtyoff = bp->b_dirtyend = 0; bp->b_bufobj = NULL; - bp->b_pin_count = 0; bp->b_data = bp->b_kvabase = unmapped_buf; bp->b_fsprivate1 = NULL; bp->b_fsprivate2 = NULL; @@ -1908,9 +1907,6 @@ bufwrite(struct buf *bp) BUF_ASSERT_HELD(bp); - if (bp->b_pin_count > 0) - bunpin_wait(bp); - KASSERT(!(bp->b_vflags & BV_BKGRDINPROG), ("FFS background buffer should not get here %p", bp)); @@ -3123,10 +3119,7 @@ flushbufqueues(struct vnode *lvp, int target, int flushdeps) mtx_unlock(&bqlocks[queue]); if (error != 0) continue; - if (bp->b_pin_count > 0) { - BUF_UNLOCK(bp); - continue; - } + /* * BKGRDINPROG can only be set with the buf and bufobj * locks both held. We tolerate a race to clear it here. @@ -3546,19 +3539,6 @@ getblk(struct vnode *vp, daddr_t blkno, int size, int slpflag, int slptimeo, if ((bp->b_flags & B_VMIO) == 0 || (size > bp->b_kvasize)) { if (bp->b_flags & B_DELWRI) { - /* - * If buffer is pinned and caller does - * not want sleep waiting for it to be - * unpinned, bail out - * */ - if (bp->b_pin_count > 0) { - if (flags & GB_LOCK_NOWAIT) { - bqrelse(bp); - return (NULL); - } else { - bunpin_wait(bp); - } - } bp->b_flags |= B_NOCACHE; bwrite(bp); } else { @@ -4632,41 +4612,6 @@ bufobj_wwait(struct bufobj *bo, int slpflag, int timeo) return (error); } -void -bpin(struct buf *bp) -{ - struct mtx *mtxp; - - mtxp = mtx_pool_find(mtxpool_sleep, bp); - mtx_lock(mtxp); - bp->b_pin_count++; - mtx_unlock(mtxp); -} - -void -bunpin(struct buf *bp) -{ - struct mtx *mtxp; - - mtxp = mtx_pool_find(mtxpool_sleep, bp); - mtx_lock(mtxp); - if (--bp->b_pin_count == 0) - wakeup(bp); - mtx_unlock(mtxp); -} - -void -bunpin_wait(struct buf *bp) -{ - struct mtx *mtxp; - - mtxp = mtx_pool_find(mtxpool_sleep, bp); - mtx_lock(mtxp); - while (bp->b_pin_count > 0) - msleep(bp, mtxp, PRIBIO, "bwunpin", 0); - mtx_unlock(mtxp); -} - /* * Set bio_data or bio_ma for struct bio from the struct buf. */ diff --git a/sys/kern/vfs_cluster.c b/sys/kern/vfs_cluster.c index f6be7a7e53dc..4606517dc7c7 100644 --- a/sys/kern/vfs_cluster.c +++ b/sys/kern/vfs_cluster.c @@ -836,12 +836,6 @@ cluster_wbuild(struct vnode *vp, long size, daddr_t start_lbn, int len, --len; continue; } - if (tbp->b_pin_count > 0) { - BUF_UNLOCK(tbp); - ++start_lbn; - --len; - continue; - } bremfree(tbp); tbp->b_flags &= ~B_DONE; @@ -953,14 +947,6 @@ cluster_wbuild(struct vnode *vp, long size, daddr_t start_lbn, int len, break; } - /* - * Do not pull in pinned buffers. - */ - if (tbp->b_pin_count > 0) { - BUF_UNLOCK(tbp); - break; - } - /* * Ok, it's passed all the tests, * so remove it from the free list diff --git a/sys/sys/buf.h b/sys/sys/buf.h index d287fe068da4..c485e384ed64 100644 --- a/sys/sys/buf.h +++ b/sys/sys/buf.h @@ -139,7 +139,6 @@ struct buf { void *b_fsprivate1; void *b_fsprivate2; void *b_fsprivate3; - int b_pin_count; }; #define b_object b_bufobj->bo_object From d6eb9b024915640956c60f183f7d7b215e54f238 Mon Sep 17 00:00:00 2001 From: "Andrey V. Elsukov" Date: Thu, 11 Aug 2016 10:10:10 +0000 Subject: [PATCH 39/76] Restore "nat global" support. Now zero value of arg1 used to specify "tablearg", use the old "tablearg" value for "nat global". Introduce new macro IP_FW_NAT44_GLOBAL to replace hardcoded magic number to specify "nat global". Also replace 65535 magic number with corresponding macro. Fix typo in comments. PR: 211256 Tested by: Victor Chernov MFC after: 3 days --- sbin/ipfw/ipfw2.c | 4 ++-- sys/netinet/ip_fw.h | 1 + sys/netpfil/ipfw/ip_fw2.c | 2 +- sys/netpfil/ipfw/ip_fw_sockopt.c | 24 +++++++++++++++--------- 4 files changed, 19 insertions(+), 12 deletions(-) diff --git a/sbin/ipfw/ipfw2.c b/sbin/ipfw/ipfw2.c index a02ce39cf3fd..cedefda9468e 100644 --- a/sbin/ipfw/ipfw2.c +++ b/sbin/ipfw/ipfw2.c @@ -1583,7 +1583,7 @@ show_static_rule(struct cmdline_opts *co, struct format_opts *fo, break; case O_NAT: - if (cmd->arg1 != 0) + if (cmd->arg1 != IP_FW_NAT44_GLOBAL) bprint_uint_arg(bp, "nat ", cmd->arg1); else bprintf(bp, "nat global"); @@ -3776,7 +3776,7 @@ compile_rule(char *av[], uint32_t *rbuf, int *rbufsize, struct tidx *tstate) action->len = F_INSN_SIZE(ipfw_insn_nat); CHECK_ACTLEN; if (*av != NULL && _substrcmp(*av, "global") == 0) { - action->arg1 = 0; + action->arg1 = IP_FW_NAT44_GLOBAL; av++; break; } else diff --git a/sys/netinet/ip_fw.h b/sys/netinet/ip_fw.h index 6b07d1b3f177..0deac8a44fb5 100644 --- a/sys/netinet/ip_fw.h +++ b/sys/netinet/ip_fw.h @@ -60,6 +60,7 @@ #define IPFW_ARG_MAX 65534 #define IP_FW_TABLEARG 65535 /* Compat value for old clients */ #define IP_FW_TARG 0 /* Current tablearg value */ +#define IP_FW_NAT44_GLOBAL 65535 /* arg1 value for "nat global" */ /* * Number of entries in the call stack of the call/return commands. diff --git a/sys/netpfil/ipfw/ip_fw2.c b/sys/netpfil/ipfw/ip_fw2.c index 6b4a34b20b46..d336479e98dc 100644 --- a/sys/netpfil/ipfw/ip_fw2.c +++ b/sys/netpfil/ipfw/ip_fw2.c @@ -2508,7 +2508,7 @@ do { \ set_match(args, f_pos, chain); /* Check if this is 'global' nat rule */ - if (cmd->arg1 == 0) { + if (cmd->arg1 == IP_FW_NAT44_GLOBAL) { retval = ipfw_nat_ptr(args, NULL, m); break; } diff --git a/sys/netpfil/ipfw/ip_fw_sockopt.c b/sys/netpfil/ipfw/ip_fw_sockopt.c index a62b19e4d3da..8031e311e89d 100644 --- a/sys/netpfil/ipfw/ip_fw_sockopt.c +++ b/sys/netpfil/ipfw/ip_fw_sockopt.c @@ -530,9 +530,11 @@ import_rule0(struct rule_check_info *ci) /* * Alter opcodes: - * 1) convert tablearg value from 65335 to 0 - * 2) Add high bit to O_SETFIB/O_SETDSCP values (to make room for targ). + * 1) convert tablearg value from 65535 to 0 + * 2) Add high bit to O_SETFIB/O_SETDSCP values (to make room + * for targ). * 3) convert table number in iface opcodes to u16 + * 4) convert old `nat global` into new 65535 */ l = krule->cmd_len; cmd = krule->cmd; @@ -554,19 +556,21 @@ import_rule0(struct rule_check_info *ci) case O_NETGRAPH: case O_NGTEE: case O_NAT: - if (cmd->arg1 == 65535) + if (cmd->arg1 == IP_FW_TABLEARG) cmd->arg1 = IP_FW_TARG; + else if (cmd->arg1 == 0) + cmd->arg1 = IP_FW_NAT44_GLOBAL; break; case O_SETFIB: case O_SETDSCP: - if (cmd->arg1 == 65535) + if (cmd->arg1 == IP_FW_TABLEARG) cmd->arg1 = IP_FW_TARG; else cmd->arg1 |= 0x8000; break; case O_LIMIT: lcmd = (ipfw_insn_limit *)cmd; - if (lcmd->conn_limit == 65535) + if (lcmd->conn_limit == IP_FW_TABLEARG) lcmd->conn_limit = IP_FW_TARG; break; /* Interface tables */ @@ -612,7 +616,7 @@ export_rule0(struct ip_fw *krule, struct ip_fw_rule0 *urule, int len) /* * Alter opcodes: - * 1) convert tablearg value from 0 to 65335 + * 1) convert tablearg value from 0 to 65535 * 2) Remove highest bit from O_SETFIB/O_SETDSCP values. * 3) convert table number in iface opcodes to int */ @@ -637,19 +641,21 @@ export_rule0(struct ip_fw *krule, struct ip_fw_rule0 *urule, int len) case O_NGTEE: case O_NAT: if (cmd->arg1 == IP_FW_TARG) - cmd->arg1 = 65535; + cmd->arg1 = IP_FW_TABLEARG; + else if (cmd->arg1 == IP_FW_NAT44_GLOBAL) + cmd->arg1 = 0; break; case O_SETFIB: case O_SETDSCP: if (cmd->arg1 == IP_FW_TARG) - cmd->arg1 = 65535; + cmd->arg1 = IP_FW_TABLEARG; else cmd->arg1 &= ~0x8000; break; case O_LIMIT: lcmd = (ipfw_insn_limit *)cmd; if (lcmd->conn_limit == IP_FW_TARG) - lcmd->conn_limit = 65535; + lcmd->conn_limit = IP_FW_TABLEARG; break; /* Interface tables */ case O_XMIT: From b44d5b4a497402ce1ea34fe89565887d09d996c1 Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Thu, 11 Aug 2016 12:37:11 +0000 Subject: [PATCH 40/76] The pmap_delayed_invl_wait() function blocks on turnstile, it does not spin, in the committed version. Remove stray '*' in the text. Sponsored by: The FreeBSD Foundation. MFC after: 3 days --- sys/amd64/amd64/pmap.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index 52adb660c3d2..f87d3b5c599b 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -561,9 +561,9 @@ pmap_delayed_invl_wait(vm_page_t m) * block to complete before proceeding. * * The function works by setting the DI generation number for m's PV - * list to at least * the number for the current thread. This forces - * a caller to pmap_delayed_invl_wait() to spin until current thread - * calls pmap_delayed_invl_finished(). + * list to at least the DI generation number of the current thread. + * This forces a caller of pmap_delayed_invl_wait() to block until + * current thread calls pmap_delayed_invl_finished(). */ static void pmap_delayed_invl_page(vm_page_t m) From 01a62066c3e51e495439f6518c6e5a8185d85fac Mon Sep 17 00:00:00 2001 From: Ruslan Bukin Date: Thu, 11 Aug 2016 13:42:31 +0000 Subject: [PATCH 41/76] Revert r303911 "Remove extra -msoft-float flags settings." This was not properly tested. --- share/mk/bsd.cpu.mk | 5 +++++ sys/modules/dtrace/dtrace/Makefile | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/share/mk/bsd.cpu.mk b/share/mk/bsd.cpu.mk index 8e7aef3246de..a967f892846b 100644 --- a/share/mk/bsd.cpu.mk +++ b/share/mk/bsd.cpu.mk @@ -327,6 +327,11 @@ CFLAGS += -mfloat-abi=softfp .endif .endif +.if ${MACHINE_CPUARCH} == "riscv" +CFLAGS += -msoft-float +ACFLAGS += -msoft-float +.endif + # NB: COPTFLAGS is handled in /usr/src/sys/conf/kern.pre.mk .if !defined(NO_CPU_CFLAGS) diff --git a/sys/modules/dtrace/dtrace/Makefile b/sys/modules/dtrace/dtrace/Makefile index 417266c5641b..3c0812c5ccd7 100644 --- a/sys/modules/dtrace/dtrace/Makefile +++ b/sys/modules/dtrace/dtrace/Makefile @@ -58,6 +58,11 @@ assym.o: assym.s ${AS} -meabi=5 -o assym.o assym.s .endif +.if ${MACHINE_CPUARCH} == "riscv" +assym.o: assym.s + ${AS} -msoft-float -o assym.o assym.s +.endif + .include CFLAGS+= -include ${SYSDIR}/cddl/compat/opensolaris/sys/debug_compat.h From c12582546e6775ee47be87f3397a6957c08dcc42 Mon Sep 17 00:00:00 2001 From: Edward Tomasz Napierala Date: Thu, 11 Aug 2016 14:27:23 +0000 Subject: [PATCH 42/76] Implement autofs_print(), for improved debugging experience. MFC after: 1 month --- sys/fs/autofs/autofs_vnops.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/sys/fs/autofs/autofs_vnops.c b/sys/fs/autofs/autofs_vnops.c index e53e38e55323..3dfdb559e9b3 100644 --- a/sys/fs/autofs/autofs_vnops.c +++ b/sys/fs/autofs/autofs_vnops.c @@ -329,6 +329,21 @@ autofs_mkdir(struct vop_mkdir_args *ap) return (error); } +static int +autofs_print(struct vop_print_args *ap) +{ + struct vnode *vp; + struct autofs_node *anp; + + vp = ap->a_vp; + anp = vp->v_data; + + printf(" name \"%s\", fileno %d, cached %d, wildcards %d\n", + anp->an_name, anp->an_fileno, anp->an_cached, anp->an_wildcards); + + return (0); +} + /* * Write out a single 'struct dirent', based on 'name' and 'fileno' arguments. */ @@ -529,6 +544,7 @@ struct vop_vector autofs_vnodeops = { .vop_link = VOP_EOPNOTSUPP, .vop_mkdir = autofs_mkdir, .vop_mknod = VOP_EOPNOTSUPP, + .vop_print = autofs_print, .vop_read = VOP_EOPNOTSUPP, .vop_readdir = autofs_readdir, .vop_remove = VOP_EOPNOTSUPP, From 57b82d475ead8f039125867050638b85c4153273 Mon Sep 17 00:00:00 2001 From: "Stephen J. Kiernan" Date: Thu, 11 Aug 2016 15:00:55 +0000 Subject: [PATCH 43/76] Add the missing space between .asciz directive and opening quote for some lines within #ifdef BTXLDR_VERBOSE/#endif Reported by: Kevin Zheng Reviewed by: jhb Approved by: sjg (mentor) Obtained from: Juniper Networks, Inc. MFC after: 2 weeks Differential Revision: https://reviews.freebsd.org/D7464 --- sys/boot/i386/btx/btxldr/btxldr.S | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sys/boot/i386/btx/btxldr/btxldr.S b/sys/boot/i386/btx/btxldr/btxldr.S index 848b930b7907..c3e544e3a6fb 100644 --- a/sys/boot/i386/btx/btxldr/btxldr.S +++ b/sys/boot/i386/btx/btxldr/btxldr.S @@ -382,12 +382,12 @@ e_fmt: .asciz "Error: Client format not supported\n" #ifdef BTXLDR_VERBOSE m_mem: .asciz "Starting in protected mode (base mem=\0)\n" m_esp: .asciz "Arguments passed (esp=\0):\n" -m_args: .asciz"\n" +m_args: .asciz "\n" m_rel_bi: .asciz "Relocated bootinfo (size=48) to \0\n" m_rel_args: .asciz "Relocated arguments (size=18) to \0\n" m_rel_btx: .asciz "Relocated kernel (size=\0) to \0\n" From 9172e7feebc76e1c3492e55dfdb28d8f0b31425e Mon Sep 17 00:00:00 2001 From: "Bjoern A. Zeeb" Date: Thu, 11 Aug 2016 15:06:12 +0000 Subject: [PATCH 44/76] Revert r303890 for now here as camdd fails to build on powerpc* due to device_t only being available under _KERNEL. Reported by: bde (_KERNEL in general), kib (build failure) MFC after: 1 day X-MFC with: r303890 --- sys/powerpc/include/bus_dma.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sys/powerpc/include/bus_dma.h b/sys/powerpc/include/bus_dma.h index d89985420f06..e070a9423815 100644 --- a/sys/powerpc/include/bus_dma.h +++ b/sys/powerpc/include/bus_dma.h @@ -30,6 +30,8 @@ #include -int bus_dma_tag_set_iommu(bus_dma_tag_t, device_t iommu, void *cookie); +struct device; + +int bus_dma_tag_set_iommu(bus_dma_tag_t, struct device *iommu, void *cookie); #endif /* _POWERPC_BUS_DMA_H_ */ From 017ec3303619da9c9afd3611ef9ce0bd14139d8c Mon Sep 17 00:00:00 2001 From: Bryan Drewery Date: Thu, 11 Aug 2016 17:06:48 +0000 Subject: [PATCH 45/76] PROGS: Support INTERNALPROG.prog=yes to not install it. MFC after: 3 days Sponsored by: EMC / Isilon Storage Division --- share/mk/bsd.README | 1 + share/mk/bsd.progs.mk | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/share/mk/bsd.README b/share/mk/bsd.README index fb5948e708b4..210e6008246c 100644 --- a/share/mk/bsd.README +++ b/share/mk/bsd.README @@ -331,6 +331,7 @@ PROGS_CXX PROG and PROGS_CXX in one Makefile. To define - DEBUG_FLAGS - DPADD - DPSRCS + - INTERNALPROG (no installation) - LDADD - LDFLAGS - LIBADD diff --git a/share/mk/bsd.progs.mk b/share/mk/bsd.progs.mk index d26ba76f85e3..3254bdadc061 100644 --- a/share/mk/bsd.progs.mk +++ b/share/mk/bsd.progs.mk @@ -24,8 +24,8 @@ PROGS += ${PROGS_CXX} # just one of many PROG_OVERRIDE_VARS += BINDIR BINGRP BINOWN BINMODE DPSRCS MAN NO_WERROR \ PROGNAME SRCS STRIP WARNS -PROG_VARS += CFLAGS CXXFLAGS DEBUG_FLAGS DPADD LDADD LIBADD LINKS \ - LDFLAGS MLINKS ${PROG_OVERRIDE_VARS} +PROG_VARS += CFLAGS CXXFLAGS DEBUG_FLAGS DPADD INTERNALPROG LDADD LIBADD \ + LINKS LDFLAGS MLINKS ${PROG_OVERRIDE_VARS} .for v in ${PROG_VARS:O:u} .if empty(${PROG_OVERRIDE_VARS:M$v}) .if defined(${v}.${PROG}) From 4be723f63e3f1655eec67399df9a9c60221ea783 Mon Sep 17 00:00:00 2001 From: Steven Hartland Date: Thu, 11 Aug 2016 21:13:58 +0000 Subject: [PATCH 46/76] Fix vtnet hang with max_virtqueue_pairs > VTNET_MAX_QUEUE_PAIRS Correctly limit npairs passed to vtnet_ctrl_mq_cmd. This ensures that VQ_ALLOC_INFO_INIT is called with the correct value, preventing the system from hanging when max_virtqueue_pairs > VTNET_MAX_QUEUE_PAIRS. Add new sysctl requested_vq_pairs which allow the user to configure the requested number of virtqueue pairs. The actual value will still take into account the system limits. Also missing sysctls for the current tunables so their values can be seen. PR: 207446 Reported by: Andy Carrel MFC after: 3 days Relnotes: Yes Sponsored by: Multiplay --- sys/dev/virtio/network/if_vtnet.c | 63 +++++++++++++++++----------- sys/dev/virtio/network/if_vtnetvar.h | 1 + 2 files changed, 39 insertions(+), 25 deletions(-) diff --git a/sys/dev/virtio/network/if_vtnet.c b/sys/dev/virtio/network/if_vtnet.c index 6b89b869242e..37189753012e 100644 --- a/sys/dev/virtio/network/if_vtnet.c +++ b/sys/dev/virtio/network/if_vtnet.c @@ -230,18 +230,32 @@ static void vtnet_disable_interrupts(struct vtnet_softc *); static int vtnet_tunable_int(struct vtnet_softc *, const char *, int); /* Tunables. */ +static SYSCTL_NODE(_hw, OID_AUTO, vtnet, CTLFLAG_RD, 0, "VNET driver parameters"); static int vtnet_csum_disable = 0; TUNABLE_INT("hw.vtnet.csum_disable", &vtnet_csum_disable); +SYSCTL_INT(_hw_vtnet, OID_AUTO, csum_disable, CTLFLAG_RDTUN, + &vtnet_csum_disable, 0, "Disables receive and send checksum offload"); static int vtnet_tso_disable = 0; TUNABLE_INT("hw.vtnet.tso_disable", &vtnet_tso_disable); +SYSCTL_INT(_hw_vtnet, OID_AUTO, tso_disable, CTLFLAG_RDTUN, &vtnet_tso_disable, + 0, "Disables TCP Segmentation Offload"); static int vtnet_lro_disable = 0; TUNABLE_INT("hw.vtnet.lro_disable", &vtnet_lro_disable); +SYSCTL_INT(_hw_vtnet, OID_AUTO, lro_disable, CTLFLAG_RDTUN, &vtnet_lro_disable, + 0, "Disables TCP Large Receive Offload"); static int vtnet_mq_disable = 0; TUNABLE_INT("hw.vtnet.mq_disable", &vtnet_mq_disable); -static int vtnet_mq_max_pairs = 0; +SYSCTL_INT(_hw_vtnet, OID_AUTO, mq_disable, CTLFLAG_RDTUN, &vtnet_mq_disable, + 0, "Disables Multi Queue support"); +static int vtnet_mq_max_pairs = VTNET_MAX_QUEUE_PAIRS; TUNABLE_INT("hw.vtnet.mq_max_pairs", &vtnet_mq_max_pairs); +SYSCTL_INT(_hw_vtnet, OID_AUTO, mq_max_pairs, CTLFLAG_RDTUN, + &vtnet_mq_max_pairs, 0, "Sets the maximum number of Multi Queue pairs"); static int vtnet_rx_process_limit = 512; TUNABLE_INT("hw.vtnet.rx_process_limit", &vtnet_rx_process_limit); +SYSCTL_INT(_hw_vtnet, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN, + &vtnet_rx_process_limit, 0, + "Limits the number RX segments processed in a single pass"); static uma_zone_t vtnet_tx_header_zone; @@ -597,7 +611,6 @@ static void vtnet_setup_features(struct vtnet_softc *sc) { device_t dev; - int max_pairs, max; dev = sc->vtnet_dev; @@ -646,32 +659,31 @@ vtnet_setup_features(struct vtnet_softc *sc) if (virtio_with_feature(dev, VIRTIO_NET_F_MQ) && sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) { - max_pairs = virtio_read_dev_config_2(dev, + sc->vtnet_max_vq_pairs = virtio_read_dev_config_2(dev, offsetof(struct virtio_net_config, max_virtqueue_pairs)); - if (max_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN || - max_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX) - max_pairs = 1; } else - max_pairs = 1; + sc->vtnet_max_vq_pairs = 1; - if (max_pairs > 1) { + if (sc->vtnet_max_vq_pairs > 1) { /* - * Limit the maximum number of queue pairs to the number of - * CPUs or the configured maximum. The actual number of - * queues that get used may be less. + * Limit the maximum number of queue pairs to the lower of + * the number of CPUs and the configured maximum. + * The actual number of queues that get used may be less. */ - max = vtnet_tunable_int(sc, "mq_max_pairs", vtnet_mq_max_pairs); - if (max > 0 && max_pairs > max) - max_pairs = max; - if (max_pairs > mp_ncpus) - max_pairs = mp_ncpus; - if (max_pairs > VTNET_MAX_QUEUE_PAIRS) - max_pairs = VTNET_MAX_QUEUE_PAIRS; - if (max_pairs > 1) - sc->vtnet_flags |= VTNET_FLAG_MULTIQ; - } + int max; - sc->vtnet_max_vq_pairs = max_pairs; + max = vtnet_tunable_int(sc, "mq_max_pairs", vtnet_mq_max_pairs); + if (max > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN) { + if (max > mp_ncpus) + max = mp_ncpus; + if (max > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX) + max = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX; + if (max > 1) { + sc->vtnet_requested_vq_pairs = max; + sc->vtnet_flags |= VTNET_FLAG_MULTIQ; + } + } + } } static int @@ -2982,13 +2994,11 @@ vtnet_set_active_vq_pairs(struct vtnet_softc *sc) dev = sc->vtnet_dev; if ((sc->vtnet_flags & VTNET_FLAG_MULTIQ) == 0) { - MPASS(sc->vtnet_max_vq_pairs == 1); sc->vtnet_act_vq_pairs = 1; return; } - /* BMV: Just use the maximum configured for now. */ - npairs = sc->vtnet_max_vq_pairs; + npairs = sc->vtnet_requested_vq_pairs; if (vtnet_ctrl_mq_cmd(sc, npairs) != 0) { device_printf(dev, @@ -3852,6 +3862,9 @@ vtnet_setup_sysctl(struct vtnet_softc *sc) SYSCTL_ADD_INT(ctx, child, OID_AUTO, "max_vq_pairs", CTLFLAG_RD, &sc->vtnet_max_vq_pairs, 0, "Maximum number of supported virtqueue pairs"); + SYSCTL_ADD_INT(ctx, child, OID_AUTO, "requested_vq_pairs", + CTLFLAG_RD, &sc->vtnet_requested_vq_pairs, 0, + "Requested number of virtqueue pairs"); SYSCTL_ADD_INT(ctx, child, OID_AUTO, "act_vq_pairs", CTLFLAG_RD, &sc->vtnet_act_vq_pairs, 0, "Number of active virtqueue pairs"); diff --git a/sys/dev/virtio/network/if_vtnetvar.h b/sys/dev/virtio/network/if_vtnetvar.h index f89f6b11fb21..15436d983ca9 100644 --- a/sys/dev/virtio/network/if_vtnetvar.h +++ b/sys/dev/virtio/network/if_vtnetvar.h @@ -155,6 +155,7 @@ struct vtnet_softc { int vtnet_if_flags; int vtnet_act_vq_pairs; int vtnet_max_vq_pairs; + int vtnet_requested_vq_pairs; struct virtqueue *vtnet_ctrl_vq; struct vtnet_mac_filter *vtnet_mac_filter; From 564fff60a0d992d2e5f9996a2f93fd26a0d67069 Mon Sep 17 00:00:00 2001 From: Emmanuel Vadot Date: Thu, 11 Aug 2016 23:04:26 +0000 Subject: [PATCH 47/76] Rename pcduino3b.dts to pcduino3.dts The only difference between 3 and 3B is the size of the RJ45 port. And now we have a uboot port that expect pcduino3.dts to be present. Reported by: imp --- sys/boot/fdt/dts/arm/{pcduino3b.dts => pcduino3.dts} | 0 sys/modules/dtb/allwinner/Makefile | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename sys/boot/fdt/dts/arm/{pcduino3b.dts => pcduino3.dts} (100%) diff --git a/sys/boot/fdt/dts/arm/pcduino3b.dts b/sys/boot/fdt/dts/arm/pcduino3.dts similarity index 100% rename from sys/boot/fdt/dts/arm/pcduino3b.dts rename to sys/boot/fdt/dts/arm/pcduino3.dts diff --git a/sys/modules/dtb/allwinner/Makefile b/sys/modules/dtb/allwinner/Makefile index 9dd0799724ae..541e8c044cb7 100644 --- a/sys/modules/dtb/allwinner/Makefile +++ b/sys/modules/dtb/allwinner/Makefile @@ -7,7 +7,7 @@ DTS= \ cubieboard2.dts \ olimex-a20-som-evb.dts \ olinuxino-lime.dts \ - pcduino3b.dts \ + pcduino3.dts \ sinovoip-bpi-m3.dts .include From bca0155f6439342483e827b5a86c1445443b2ae0 Mon Sep 17 00:00:00 2001 From: Mike Karels Date: Thu, 11 Aug 2016 23:52:24 +0000 Subject: [PATCH 48/76] Fix kernel build with TCP_RFC7413 option The current in_pcb.h includes route.h, which includes sockaddr structures. Including should require ; add it in the appropriate place. PR: 211385 Submitted by: Sergey Kandaurov and iron at mail.ua Reviewed by: gnn Approved by: gnn (mentor) MFC after: 1 day --- sys/netinet/tcp_fastopen.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sys/netinet/tcp_fastopen.c b/sys/netinet/tcp_fastopen.c index ec02d18fada1..e2b3b8637f60 100644 --- a/sys/netinet/tcp_fastopen.c +++ b/sys/netinet/tcp_fastopen.c @@ -108,6 +108,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include From 162fe2aa40b2f617148ab81c2a9053d98dc57d58 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Fri, 12 Aug 2016 01:13:34 +0000 Subject: [PATCH 49/76] Print out some more fields. Tested: * AR9331 SoC (Carambola 2) - specifically looking for the tuning caps fields. --- tools/tools/ath/ath_ee_9300_print/main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/tools/ath/ath_ee_9300_print/main.c b/tools/tools/ath/ath_ee_9300_print/main.c index acf646e38644..df278e18f888 100644 --- a/tools/tools/ath/ath_ee_9300_print/main.c +++ b/tools/tools/ath/ath_ee_9300_print/main.c @@ -72,8 +72,10 @@ eeprom_9300_base_print(const uint16_t *buf) ee_base->device_cap, ee_base->device_type); - printf("| pwrTableOffset: %d dB, feature_enable: 0x%02x MiscConfig: 0x%02x |\n", + printf("| pwrTableOffset: %d dB, TuningCaps=0x%02x 0x%02x feature_enable: 0x%02x MiscConfig: 0x%02x |\n", ee_base->pwrTableOffset, + ee_base->params_for_tuning_caps[0], + ee_base->params_for_tuning_caps[1], ee_base->feature_enable, ee_base->misc_configuration); From 791444089f5dd584327e2e79b58e016796f02ad5 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Fri, 12 Aug 2016 03:22:58 +0000 Subject: [PATCH 50/76] Correct errors and clean up the comments on the active queue scan. Eliminate some unnecessary blank lines. Reviewed by: kib, markj MFC after: 1 week --- sys/vm/vm_pageout.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c index 2bdecccaf8ec..02ded8b37c28 100644 --- a/sys/vm/vm_pageout.c +++ b/sys/vm/vm_pageout.c @@ -1188,15 +1188,13 @@ vm_pageout_scan(struct vm_domain *vmd, int pass) /* * Scan the active queue for pages that can be deactivated. Update * the per-page activity counter and use it to identify deactivation - * candidates. + * candidates. Held pages may be deactivated. */ for (m = TAILQ_FIRST(&pq->pq_pl), scanned = 0; m != NULL && (scanned < min_scan || (page_shortage > 0 && scanned < maxscan)); m = next, scanned++) { - KASSERT(m->queue == PQ_ACTIVE, ("vm_pageout_scan: page %p isn't active", m)); - next = TAILQ_NEXT(m, plinks.q); if ((m->flags & PG_MARKER) != 0) continue; @@ -1210,8 +1208,8 @@ vm_pageout_scan(struct vm_domain *vmd, int pass) } /* - * The count for pagedaemon pages is done after checking the - * page for eligibility... + * The count for page daemon pages is updated after checking + * the page for eligibility. */ PCPU_INC(cnt.v_pdpages); @@ -1225,12 +1223,17 @@ vm_pageout_scan(struct vm_domain *vmd, int pass) act_delta = 0; /* - * Unlocked object ref count check. Two races are possible. - * 1) The ref was transitioning to zero and we saw non-zero, - * the pmap bits will be checked unnecessarily. - * 2) The ref was transitioning to one and we saw zero. - * The page lock prevents a new reference to this page so - * we need not check the reference bits. + * Perform an unsynchronized object ref count check. While + * the page lock ensures that the page is not reallocated to + * another object, in particular, one with unmanaged mappings + * that cannot support pmap_ts_referenced(), two races are, + * nonetheless, possible: + * 1) The count was transitioning to zero, but we saw a non- + * zero value. pmap_ts_referenced() will return zero + * because the page is not mapped. + * 2) The count was transitioning to one, but we saw zero. + * This race delays the detection of a new reference. At + * worst, we will deactivate and reactivate the page. */ if (m->object->ref_count != 0) act_delta += pmap_ts_referenced(m); From e2f68161004f312bf10e4b683ac866b45f6e8489 Mon Sep 17 00:00:00 2001 From: Ed Schouten Date: Fri, 12 Aug 2016 07:03:58 +0000 Subject: [PATCH 51/76] Reimplement dirname(3) to be thread-safe. Now that we've updated the prototypes of the basename(3) and dirname(3) functions to conform to POSIX, let's go ahead and reimplement dirname(3) in such a way that it's thread-safe, but also guaranteed to succeed. C libraries like glibc, musl and the one that's part of Solaris already follow such an approach. Move the existing implementation to another source file, freebsd11_dirname.c to keep existing users of the API that pass in a constant string happy, using symbol versioning. Put a new version of the function in dirname.c, obtained from CloudABI's C library. This version scans through the pathname string from left to right, normalizing it, while discarding the last pathname component. Reviewed by: emaste, jilles Differential Revision: https://reviews.freebsd.org/D7355 --- lib/libc/gen/Makefile.inc | 1 + lib/libc/gen/Symbol.map | 5 +- lib/libc/gen/dirname.3 | 58 ++++++--------- lib/libc/gen/dirname.c | 133 +++++++++++++++++++--------------- lib/libc/gen/dirname_compat.c | 79 ++++++++++++++++++++ 5 files changed, 181 insertions(+), 95 deletions(-) create mode 100644 lib/libc/gen/dirname_compat.c diff --git a/lib/libc/gen/Makefile.inc b/lib/libc/gen/Makefile.inc index 696d907ac609..58f582fb4e73 100644 --- a/lib/libc/gen/Makefile.inc +++ b/lib/libc/gen/Makefile.inc @@ -29,6 +29,7 @@ SRCS+= __getosreldate.c \ devname.c \ dirfd.c \ dirname.c \ + dirname_compat.c \ disklabel.c \ dlfcn.c \ drand48.c \ diff --git a/lib/libc/gen/Symbol.map b/lib/libc/gen/Symbol.map index 0d5dfec80cce..6e5db3d8abb6 100644 --- a/lib/libc/gen/Symbol.map +++ b/lib/libc/gen/Symbol.map @@ -82,7 +82,6 @@ FBSD_1.0 { daemon; devname; devname_r; - dirname; getdiskbyname; dladdr; dlclose; @@ -418,6 +417,10 @@ FBSD_1.4 { stravis; }; +FBSD_1.5 { + dirname; +}; + FBSDprivate_1.0 { /* needed by thread libraries */ __thr_jtable; diff --git a/lib/libc/gen/dirname.3 b/lib/libc/gen/dirname.3 index 6685618a9869..18405f9af6ef 100644 --- a/lib/libc/gen/dirname.3 +++ b/lib/libc/gen/dirname.3 @@ -16,7 +16,7 @@ .\" .\" $FreeBSD$ .\" -.Dd July 29, 2016 +.Dd August 12, 2016 .Dt DIRNAME 3 .Os .Sh NAME @@ -37,6 +37,7 @@ Any trailing .Sq \&/ characters are not counted as part of the directory name. +.Sh RETURN VALUES If .Fa path is a null pointer, the empty string, or contains no @@ -46,40 +47,24 @@ characters, returns a pointer to the string .Qq \&. , signifying the current directory. -.Sh IMPLEMENTATION NOTES -The -.Fn dirname -function -returns a pointer to internal storage space allocated on the first call -that will be overwritten -by subsequent calls. -.Pp -Other vendor implementations of -.Fn dirname -may store their result in the input buffer, -making it safe to use in multithreaded applications. -Future versions of -.Fx -will follow this approach as well. -.Sh RETURN VALUES -On successful completion, -.Fn dirname -returns a pointer to the parent directory of +Otherwise, +it returns a pointer to the parent directory of .Fa path . -.Pp -If +.Sh IMPLEMENTATION NOTES +This implementation of .Fn dirname -fails, a null pointer is returned and the global variable -.Va errno -is set to indicate the error. -.Sh ERRORS -The following error codes may be set in -.Va errno : -.Bl -tag -width Er -.It Bq Er ENAMETOOLONG -The path component to be returned was larger than -.Dv MAXPATHLEN . -.El +uses the buffer provided by the caller to store the resulting parent +directory. +Other vendor implementations may return a pointer to internal storage +space instead. +The advantage of the former approach is that it ensures thread-safety, +while also placing no upper limit on the supported length of the +pathname. +.Pp +The algorithm used by this implementation also discards redundant +slashes and +.Qq \&. +pathname components from the pathname string. .Sh SEE ALSO .Xr basename 1 , .Xr dirname 1 , @@ -96,5 +81,10 @@ function first appeared in .Ox 2.2 and .Fx 4.2 . +.Pp +In +.Fx 12.0 , +this function was reimplemented to store its result in the provided +input buffer. .Sh AUTHORS -.An "Todd C. Miller" +.An Nuxi, the Netherlands diff --git a/lib/libc/gen/dirname.c b/lib/libc/gen/dirname.c index 5e0042fd2d10..3113631ac943 100644 --- a/lib/libc/gen/dirname.c +++ b/lib/libc/gen/dirname.c @@ -1,77 +1,90 @@ -/* $OpenBSD: dirname.c,v 1.13 2005/08/08 08:05:33 espie Exp $ */ - -/* - * Copyright (c) 1997, 2004 Todd C. Miller +/*- + * Copyright (c) 2015-2016 Nuxi, https://nuxi.nl/ * - * Permission to use, copy, modify, and distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); -#include #include -#include +#include #include -#include char * dirname(char *path) { - static char *dname = NULL; - size_t len; - const char *endp; + const char *in, *prev, *begin, *end; + char *out; + size_t prevlen; + bool skipslash; - if (dname == NULL) { - dname = (char *)malloc(MAXPATHLEN); - if (dname == NULL) - return(NULL); + /* + * If path is a null pointer or points to an empty string, + * dirname() shall return a pointer to the string ".". + */ + if (path == NULL || *path == '\0') + return ((char *)"."); + + /* Retain at least one leading slash character. */ + in = out = *path == '/' ? path + 1 : path; + + skipslash = true; + prev = "."; + prevlen = 1; + for (;;) { + /* Extract the next pathname component. */ + while (*in == '/') + ++in; + begin = in; + while (*in != '/' && *in != '\0') + ++in; + end = in; + if (begin == end) + break; + + /* + * Copy over the previous pathname component, except if + * it's dot. There is no point in retaining those. + */ + if (prevlen != 1 || *prev != '.') { + if (!skipslash) + *out++ = '/'; + skipslash = false; + memmove(out, prev, prevlen); + out += prevlen; + } + + /* Preserve the pathname component for the next iteration. */ + prev = begin; + prevlen = end - begin; } - /* Empty or NULL string gets treated as "." */ - if (path == NULL || *path == '\0') { - dname[0] = '.'; - dname[1] = '\0'; - return (dname); - } - - /* Strip any trailing slashes */ - endp = path + strlen(path) - 1; - while (endp > path && *endp == '/') - endp--; - - /* Find the start of the dir */ - while (endp > path && *endp != '/') - endp--; - - /* Either the dir is "/" or there are no slashes */ - if (endp == path) { - dname[0] = *endp == '/' ? '/' : '.'; - dname[1] = '\0'; - return (dname); - } else { - /* Move forward past the separating slashes */ - do { - endp--; - } while (endp > path && *endp == '/'); - } - - len = endp - path + 1; - if (len >= MAXPATHLEN) { - errno = ENAMETOOLONG; - return (NULL); - } - memcpy(dname, path, len); - dname[len] = '\0'; - return (dname); + /* + * If path does not contain a '/', then dirname() shall return a + * pointer to the string ".". + */ + if (out == path) + *out++ = '.'; + *out = '\0'; + return (path); } diff --git a/lib/libc/gen/dirname_compat.c b/lib/libc/gen/dirname_compat.c new file mode 100644 index 000000000000..48ad32c8d70a --- /dev/null +++ b/lib/libc/gen/dirname_compat.c @@ -0,0 +1,79 @@ +/* $OpenBSD: dirname.c,v 1.13 2005/08/08 08:05:33 espie Exp $ */ + +/* + * Copyright (c) 1997, 2004 Todd C. Miller + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include + +char * +__freebsd11_dirname(char *path) +{ + static char *dname = NULL; + size_t len; + const char *endp; + + if (dname == NULL) { + dname = (char *)malloc(MAXPATHLEN); + if (dname == NULL) + return(NULL); + } + + /* Empty or NULL string gets treated as "." */ + if (path == NULL || *path == '\0') { + dname[0] = '.'; + dname[1] = '\0'; + return (dname); + } + + /* Strip any trailing slashes */ + endp = path + strlen(path) - 1; + while (endp > path && *endp == '/') + endp--; + + /* Find the start of the dir */ + while (endp > path && *endp != '/') + endp--; + + /* Either the dir is "/" or there are no slashes */ + if (endp == path) { + dname[0] = *endp == '/' ? '/' : '.'; + dname[1] = '\0'; + return (dname); + } else { + /* Move forward past the separating slashes */ + do { + endp--; + } while (endp > path && *endp == '/'); + } + + len = endp - path + 1; + if (len >= MAXPATHLEN) { + errno = ENAMETOOLONG; + return (NULL); + } + memcpy(dname, path, len); + dname[len] = '\0'; + return (dname); +} + +__sym_compat(dirname, __freebsd11_dirname, FBSD_1.0); From 9b3922212a9c2ee3c3b300ed220ec15b431b64b4 Mon Sep 17 00:00:00 2001 From: Sepherosa Ziehau Date: Fri, 12 Aug 2016 07:14:40 +0000 Subject: [PATCH 52/76] hyperv/hn: Simplify NDIS configuration. MFC after: 1 week Sponsored by: Microsoft Differential Revision: https://reviews.freebsd.org/D7466 --- sys/dev/hyperv/netvsc/hv_net_vsc.c | 36 +++++++++--------------------- sys/dev/hyperv/netvsc/if_hnreg.h | 14 ++++++++++++ 2 files changed, 25 insertions(+), 25 deletions(-) diff --git a/sys/dev/hyperv/netvsc/hv_net_vsc.c b/sys/dev/hyperv/netvsc/hv_net_vsc.c index 4ae95acb8548..2216aa70394b 100644 --- a/sys/dev/hyperv/netvsc/hv_net_vsc.c +++ b/sys/dev/hyperv/netvsc/hv_net_vsc.c @@ -536,33 +536,19 @@ hv_nv_negotiate_nvsp_protocol(struct hn_softc *sc, netvsc_dev *net_dev, static int hv_nv_send_ndis_config(struct hn_softc *sc, uint32_t mtu) { - netvsc_dev *net_dev; - nvsp_msg *init_pkt; - int ret; + struct hn_nvs_ndis_conf conf; + int error; - net_dev = hv_nv_get_outbound_net_device(sc); - if (!net_dev) - return (-ENODEV); + memset(&conf, 0, sizeof(conf)); + conf.nvs_type = HN_NVS_TYPE_NDIS_CONF; + conf.nvs_mtu = mtu; + conf.nvs_caps = HN_NVS_NDIS_CONF_VLAN; - /* - * Set up configuration packet, write MTU - * Indicate we are capable of handling VLAN tags - */ - init_pkt = &net_dev->channel_init_packet; - memset(init_pkt, 0, sizeof(nvsp_msg)); - init_pkt->hdr.msg_type = nvsp_msg_2_type_send_ndis_config; - init_pkt->msgs.vers_2_msgs.send_ndis_config.mtu = mtu; - init_pkt-> - msgs.vers_2_msgs.send_ndis_config.capabilities.u1.u2.ieee8021q - = 1; - - /* Send the configuration packet */ - ret = vmbus_chan_send(sc->hn_prichan, VMBUS_CHANPKT_TYPE_INBAND, 0, - init_pkt, sizeof(nvsp_msg), (uint64_t)(uintptr_t)&hn_send_ctx_none); - if (ret != 0) - return (-EINVAL); - - return (0); + error = vmbus_chan_send(sc->hn_prichan, VMBUS_CHANPKT_TYPE_INBAND, 0, + &conf, sizeof(conf), (uint64_t)(uintptr_t)&hn_send_ctx_none); + if (error) + if_printf(sc->hn_ifp, "send nvs ndis conf failed: %d\n", error); + return (error); } /* diff --git a/sys/dev/hyperv/netvsc/if_hnreg.h b/sys/dev/hyperv/netvsc/if_hnreg.h index 6f6e4c4117cc..94c33ef6d7d5 100644 --- a/sys/dev/hyperv/netvsc/if_hnreg.h +++ b/sys/dev/hyperv/netvsc/if_hnreg.h @@ -36,6 +36,7 @@ #define HN_NVS_TYPE_INIT 1 #define HN_NVS_TYPE_INIT_RESP 2 +#define HN_NVS_TYPE_NDIS_CONF 125 /* * Any size less than this one will _not_ work, e.g. hn_nvs_init @@ -59,4 +60,17 @@ struct hn_nvs_init_resp { uint32_t nvs_status; /* HN_NVS_STATUS_ */ } __packed; +/* No reponse */ +struct hn_nvs_ndis_conf { + uint32_t nvs_type; /* HN_NVS_TYPE_NDIS_CONF */ + uint32_t nvs_mtu; + uint32_t nvs_rsvd; + uint64_t nvs_caps; /* HN_NVS_NDIS_CONF_ */ + uint8_t nvs_rsvd1[12]; +} __packed; +CTASSERT(sizeof(struct hn_nvs_ndis_conf) >= HN_NVS_REQSIZE_MIN); + +#define HN_NVS_NDIS_CONF_SRIOV 0x0004 +#define HN_NVS_NDIS_CONF_VLAN 0x0008 + #endif /* !_IF_HNREG_H_ */ From 6bdee22607adea4071c4d152c433c541d551acbd Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Fri, 12 Aug 2016 07:52:13 +0000 Subject: [PATCH 53/76] Remove unused prototypes. Sponsored by: The FreeBSD Foundation MFC after: 1 week --- usr.bin/kdump/kdump.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/usr.bin/kdump/kdump.c b/usr.bin/kdump/kdump.c index 8cfe5403388c..776909a13a37 100644 --- a/usr.bin/kdump/kdump.c +++ b/usr.bin/kdump/kdump.c @@ -105,8 +105,6 @@ void ktrgenio(struct ktr_genio *, int); void ktrpsig(struct ktr_psig *); void ktrcsw(struct ktr_csw *); void ktrcsw_old(struct ktr_csw_old *); -void ktruser_malloc(void *); -void ktruser_rtld(int, void *); void ktruser(int, void *); void ktrcaprights(cap_rights_t *); void ktritimerval(struct itimerval *it); From 68e9b1e8e1dc4b508a2c7f7f87a9b21723172aeb Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Fri, 12 Aug 2016 07:54:59 +0000 Subject: [PATCH 54/76] Decode 32bit utrace records on the 64bit host. Suppose that ktrace is performed on 32bit binary running on 64bit host. In this case, the kernel records are 64bit, while utrace records from rtld and malloc are 32bit. Make kdump useful to see decoded utrace data in that case. Sponsored by: The FreeBSD Foundation MFC after: 1 week --- lib/libsysdecode/utrace.c | 56 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 54 insertions(+), 2 deletions(-) diff --git a/lib/libsysdecode/utrace.c b/lib/libsysdecode/utrace.c index 6a251390b2bc..28e52c3fe4db 100644 --- a/lib/libsysdecode/utrace.c +++ b/lib/libsysdecode/utrace.c @@ -33,7 +33,7 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include +#include #include #define UTRACE_DLOPEN_START 1 @@ -59,6 +59,18 @@ struct utrace_rtld { char name[MAXPATHLEN]; }; +#ifdef __LP64__ +struct utrace_rtld32 { + char sig[4]; /* 'RTLD' */ + int event; + uint32_t handle; + uint32_t mapbase; + uint32_t mapsize; + int refcnt; + char name[MAXPATHLEN]; +}; +#endif + static int print_utrace_rtld(FILE *fp, void *p) { @@ -145,6 +157,14 @@ struct utrace_malloc { void *r; }; +#ifdef __LP64__ +struct utrace_malloc32 { + uint32_t p; + uint32_t s; + uint32_t r; +}; +#endif + static void print_utrace_malloc(FILE *fp, void *p) { @@ -163,6 +183,12 @@ print_utrace_malloc(FILE *fp, void *p) int sysdecode_utrace(FILE *fp, void *p, size_t len) { +#ifdef __LP64__ + struct utrace_rtld ur; + struct utrace_rtld32 *pr; + struct utrace_malloc um; + struct utrace_malloc32 *pm; +#endif if (len == sizeof(struct utrace_rtld) && bcmp(p, "RTLD", 4) == 0) { return (print_utrace_rtld(fp, p)); @@ -172,6 +198,32 @@ sysdecode_utrace(FILE *fp, void *p, size_t len) print_utrace_malloc(fp, p); return (1); } - + +#ifdef __LP64__ + if (len == sizeof(struct utrace_rtld32) && bcmp(p, "RTLD", 4) == 0) { + pr = p; + memset(&ur, 0, sizeof(ur)); + memcpy(ur.sig, pr->sig, sizeof(ur.sig)); + ur.event = pr->event; + ur.handle = (void *)(uintptr_t)pr->handle; + ur.mapbase = (void *)(uintptr_t)pr->mapbase; + ur.mapsize = pr->mapsize; + ur.refcnt = pr->refcnt; + memcpy(ur.name, pr->name, sizeof(ur.name)); + return (print_utrace_rtld(fp, &ur)); + } + + if (len == sizeof(struct utrace_malloc32)) { + pm = p; + memset(&um, 0, sizeof(um)); + um.p = pm->p == (uint32_t)-1 ? (void *)(intptr_t)-1 : + (void *)(uintptr_t)pm->p; + um.s = pm->s; + um.r = (void *)(uintptr_t)pm->r; + print_utrace_malloc(fp, &um); + return (1); + } +#endif + return (0); } From 386c97e61604077f42771be74bd20a87a02e8dd8 Mon Sep 17 00:00:00 2001 From: Sepherosa Ziehau Date: Fri, 12 Aug 2016 07:57:03 +0000 Subject: [PATCH 55/76] hyperv/hn: Simplify NDIS initialization. MFC after: 1 week Sponsored by: Microsoft Differential Revision: https://reviews.freebsd.org/D7467 --- sys/dev/hyperv/netvsc/hv_net_vsc.c | 37 +++++++++--------------------- sys/dev/hyperv/netvsc/hv_rndis.h | 4 ++++ sys/dev/hyperv/netvsc/if_hnreg.h | 10 ++++++++ 3 files changed, 25 insertions(+), 26 deletions(-) diff --git a/sys/dev/hyperv/netvsc/hv_net_vsc.c b/sys/dev/hyperv/netvsc/hv_net_vsc.c index 2216aa70394b..5ca49a0ad50c 100644 --- a/sys/dev/hyperv/netvsc/hv_net_vsc.c +++ b/sys/dev/hyperv/netvsc/hv_net_vsc.c @@ -558,8 +558,6 @@ static int hv_nv_connect_to_vsp(struct hn_softc *sc) { netvsc_dev *net_dev; - nvsp_msg *init_pkt; - uint32_t ndis_version; uint32_t protocol_list[] = { NVSP_PROTOCOL_VERSION_1, NVSP_PROTOCOL_VERSION_2, NVSP_PROTOCOL_VERSION_4, @@ -569,6 +567,7 @@ hv_nv_connect_to_vsp(struct hn_softc *sc) int ret = 0; device_t dev = sc->hn_dev; struct ifnet *ifp = sc->hn_ifp; + struct hn_nvs_ndis_init ndis; net_dev = hv_nv_get_outbound_net_device(sc); @@ -601,37 +600,23 @@ hv_nv_connect_to_vsp(struct hn_softc *sc) ret = hv_nv_send_ndis_config(sc, ifp->if_mtu); /* - * Send the NDIS version + * Initialize NDIS. */ - init_pkt = &net_dev->channel_init_packet; - memset(init_pkt, 0, sizeof(nvsp_msg)); - - if (net_dev->nvsp_version <= NVSP_PROTOCOL_VERSION_4) { - ndis_version = NDIS_VERSION_6_1; - } else { - ndis_version = NDIS_VERSION_6_30; - } - - init_pkt->hdr.msg_type = nvsp_msg_1_type_send_ndis_vers; - init_pkt->msgs.vers_1_msgs.send_ndis_vers.ndis_major_vers = - (ndis_version & 0xFFFF0000) >> 16; - init_pkt->msgs.vers_1_msgs.send_ndis_vers.ndis_minor_vers = - ndis_version & 0xFFFF; - - /* Send the init request */ + memset(&ndis, 0, sizeof(ndis)); + ndis.nvs_type = HN_NVS_TYPE_NDIS_INIT; + ndis.nvs_ndis_major = NDIS_VERSION_MAJOR_6; + if (net_dev->nvsp_version <= NVSP_PROTOCOL_VERSION_4) + ndis.nvs_ndis_minor = NDIS_VERSION_MINOR_1; + else + ndis.nvs_ndis_minor = NDIS_VERSION_MINOR_30; ret = vmbus_chan_send(sc->hn_prichan, VMBUS_CHANPKT_TYPE_INBAND, 0, - init_pkt, sizeof(nvsp_msg), (uint64_t)(uintptr_t)&hn_send_ctx_none); + &ndis, sizeof(ndis), (uint64_t)(uintptr_t)&hn_send_ctx_none); if (ret != 0) { + if_printf(sc->hn_ifp, "send nvs ndis init failed: %d\n", ret); goto cleanup; } - /* - * TODO: BUGBUG - We have to wait for the above msg since the netvsp - * uses KMCL which acknowledges packet (completion packet) - * since our Vmbus always set the VMBUS_CHANPKT_FLAG_RC flag - */ - /* sema_wait(&NetVscChannel->channel_init_sema); */ /* Post the big receive buffer to NetVSP */ if (net_dev->nvsp_version <= NVSP_PROTOCOL_VERSION_2) diff --git a/sys/dev/hyperv/netvsc/hv_rndis.h b/sys/dev/hyperv/netvsc/hv_rndis.h index da2b408c6494..6527668de9fe 100644 --- a/sys/dev/hyperv/netvsc/hv_rndis.h +++ b/sys/dev/hyperv/netvsc/hv_rndis.h @@ -41,6 +41,10 @@ #define NDIS_VERSION_6_1 0x00060001 #define NDIS_VERSION_6_30 0x0006001e +#define NDIS_VERSION_MAJOR_6 6 +#define NDIS_VERSION_MINOR_1 1 +#define NDIS_VERSION_MINOR_30 30 + #define NDIS_VERSION (NDIS_VERSION_5_1) /* diff --git a/sys/dev/hyperv/netvsc/if_hnreg.h b/sys/dev/hyperv/netvsc/if_hnreg.h index 94c33ef6d7d5..4866b81977b1 100644 --- a/sys/dev/hyperv/netvsc/if_hnreg.h +++ b/sys/dev/hyperv/netvsc/if_hnreg.h @@ -36,6 +36,7 @@ #define HN_NVS_TYPE_INIT 1 #define HN_NVS_TYPE_INIT_RESP 2 +#define HN_NVS_TYPE_NDIS_INIT 100 #define HN_NVS_TYPE_NDIS_CONF 125 /* @@ -73,4 +74,13 @@ CTASSERT(sizeof(struct hn_nvs_ndis_conf) >= HN_NVS_REQSIZE_MIN); #define HN_NVS_NDIS_CONF_SRIOV 0x0004 #define HN_NVS_NDIS_CONF_VLAN 0x0008 +/* No response */ +struct hn_nvs_ndis_init { + uint32_t nvs_type; /* HN_NVS_TYPE_NDIS_INIT */ + uint32_t nvs_ndis_major; /* NDIS_VERSION_MAJOR_ */ + uint32_t nvs_ndis_minor; /* NDIS_VERSION_MINOR_ */ + uint8_t nvs_rsvd[20]; +} __packed; +CTASSERT(sizeof(struct hn_nvs_ndis_init) >= HN_NVS_REQSIZE_MIN); + #endif /* !_IF_HNREG_H_ */ From d293af4049cd6ee7bf85d0dbbe7f0910041ec2b7 Mon Sep 17 00:00:00 2001 From: Sepherosa Ziehau Date: Fri, 12 Aug 2016 08:07:56 +0000 Subject: [PATCH 56/76] hyperv/hn: Switch to vmbus xact APIs for NVS RXBUF connection. MFC after: 1 week Sponsored by: Microsoft Differential Revision: https://reviews.freebsd.org/D7469 --- sys/dev/hyperv/netvsc/hv_net_vsc.c | 129 +++++++++++++++-------------- sys/dev/hyperv/netvsc/hv_net_vsc.h | 1 - sys/dev/hyperv/netvsc/if_hnreg.h | 26 ++++++ 3 files changed, 91 insertions(+), 65 deletions(-) diff --git a/sys/dev/hyperv/netvsc/hv_net_vsc.c b/sys/dev/hyperv/netvsc/hv_net_vsc.c index 5ca49a0ad50c..617409fac0ac 100644 --- a/sys/dev/hyperv/netvsc/hv_net_vsc.c +++ b/sys/dev/hyperv/netvsc/hv_net_vsc.c @@ -152,10 +152,14 @@ hv_nv_get_next_send_section(netvsc_dev *net_dev) static int hv_nv_init_rx_buffer_with_net_vsp(struct hn_softc *sc) { + struct vmbus_xact *xact; + struct hn_nvs_rxbuf_conn *conn; + const struct hn_nvs_rxbuf_connresp *resp; + size_t resp_len; struct hn_send_ctx sndc; netvsc_dev *net_dev; - nvsp_msg *init_pkt; - int ret = 0; + uint32_t status; + int error; net_dev = hv_nv_get_outbound_net_device(sc); if (!net_dev) { @@ -167,7 +171,7 @@ hv_nv_init_rx_buffer_with_net_vsp(struct hn_softc *sc) BUS_DMA_WAITOK | BUS_DMA_ZERO); if (net_dev->rx_buf == NULL) { device_printf(sc->hn_dev, "allocate rxbuf failed\n"); - return ENOMEM; + return (ENOMEM); } /* @@ -177,74 +181,76 @@ hv_nv_init_rx_buffer_with_net_vsp(struct hn_softc *sc) * Only primary channel has RXBUF connected to it. Sub-channels * just share this RXBUF. */ - ret = vmbus_chan_gpadl_connect(sc->hn_prichan, + error = vmbus_chan_gpadl_connect(sc->hn_prichan, net_dev->rxbuf_dma.hv_paddr, net_dev->rx_buf_size, &net_dev->rx_buf_gpadl_handle); - if (ret != 0) { - device_printf(sc->hn_dev, "rxbuf gpadl connect failed: %d\n", - ret); + if (error) { + if_printf(sc->hn_ifp, "rxbuf gpadl connect failed: %d\n", + error); goto cleanup; } - - /* sema_wait(&ext->channel_init_sema); KYS CHECK */ - - /* Notify the NetVsp of the gpadl handle */ - init_pkt = &net_dev->channel_init_packet; - - memset(init_pkt, 0, sizeof(nvsp_msg)); - - init_pkt->hdr.msg_type = nvsp_msg_1_type_send_rx_buf; - init_pkt->msgs.vers_1_msgs.send_rx_buf.gpadl_handle = - net_dev->rx_buf_gpadl_handle; - init_pkt->msgs.vers_1_msgs.send_rx_buf.id = - NETVSC_RECEIVE_BUFFER_ID; - - /* Send the gpadl notification request */ - - hn_send_ctx_init_simple(&sndc, hn_nvs_sent_wakeup, NULL); - ret = vmbus_chan_send(sc->hn_prichan, - VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC, - init_pkt, sizeof(nvsp_msg), (uint64_t)(uintptr_t)&sndc); - if (ret != 0) { - goto cleanup; - } - - sema_wait(&net_dev->channel_init_sema); - - /* Check the response */ - if (init_pkt->msgs.vers_1_msgs.send_rx_buf_complete.status - != nvsp_status_success) { - ret = EINVAL; - goto cleanup; - } - - net_dev->rx_section_count = - init_pkt->msgs.vers_1_msgs.send_rx_buf_complete.num_sections; - - net_dev->rx_sections = malloc(net_dev->rx_section_count * - sizeof(nvsp_1_rx_buf_section), M_NETVSC, M_WAITOK); - memcpy(net_dev->rx_sections, - init_pkt->msgs.vers_1_msgs.send_rx_buf_complete.sections, - net_dev->rx_section_count * sizeof(nvsp_1_rx_buf_section)); - /* - * For first release, there should only be 1 section that represents - * the entire receive buffer + * Connect RXBUF to NVS. */ - if (net_dev->rx_section_count != 1 - || net_dev->rx_sections->offset != 0) { - ret = EINVAL; + + xact = vmbus_xact_get(sc->hn_xact, sizeof(*conn)); + if (xact == NULL) { + if_printf(sc->hn_ifp, "no xact for nvs rxbuf conn\n"); + error = ENXIO; goto cleanup; } - goto exit; + conn = vmbus_xact_req_data(xact); + conn->nvs_type = HN_NVS_TYPE_RXBUF_CONN; + conn->nvs_gpadl = net_dev->rx_buf_gpadl_handle; + conn->nvs_sig = HN_NVS_RXBUF_SIG; + + hn_send_ctx_init_simple(&sndc, hn_nvs_sent_xact, xact); + vmbus_xact_activate(xact); + + error = vmbus_chan_send(sc->hn_prichan, + VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC, + conn, sizeof(*conn), (uint64_t)(uintptr_t)&sndc); + if (error != 0) { + if_printf(sc->hn_ifp, "send nvs rxbuf conn failed: %d\n", + error); + vmbus_xact_deactivate(xact); + vmbus_xact_put(xact); + goto cleanup; + } + + resp = vmbus_xact_wait(xact, &resp_len); + if (resp_len < sizeof(*resp)) { + if_printf(sc->hn_ifp, "invalid rxbuf conn resp length %zu\n", + resp_len); + vmbus_xact_put(xact); + error = EINVAL; + goto cleanup; + } + if (resp->nvs_type != HN_NVS_TYPE_RXBUF_CONNRESP) { + if_printf(sc->hn_ifp, "not rxbuf conn resp, type %u\n", + resp->nvs_type); + vmbus_xact_put(xact); + error = EINVAL; + goto cleanup; + } + + status = resp->nvs_status; + vmbus_xact_put(xact); + + if (status != HN_NVS_STATUS_OK) { + if_printf(sc->hn_ifp, "rxbuf conn failed: %x\n", status); + error = EINVAL; + goto cleanup; + } + net_dev->rx_section_count = 1; + + return (0); cleanup: hv_nv_destroy_rx_buffer(net_dev); - -exit: - return (ret); + return (error); } /* @@ -371,6 +377,7 @@ hv_nv_destroy_rx_buffer(netvsc_dev *net_dev) if (ret != 0) { return (ret); } + net_dev->rx_section_count = 0; } /* Tear down the gpadl on the vsp end */ @@ -393,12 +400,6 @@ hv_nv_destroy_rx_buffer(netvsc_dev *net_dev) net_dev->rx_buf = NULL; } - if (net_dev->rx_sections) { - free(net_dev->rx_sections, M_NETVSC); - net_dev->rx_sections = NULL; - net_dev->rx_section_count = 0; - } - return (ret); } diff --git a/sys/dev/hyperv/netvsc/hv_net_vsc.h b/sys/dev/hyperv/netvsc/hv_net_vsc.h index 69f0a61f6e07..a861c3e18d53 100644 --- a/sys/dev/hyperv/netvsc/hv_net_vsc.h +++ b/sys/dev/hyperv/netvsc/hv_net_vsc.h @@ -1060,7 +1060,6 @@ typedef struct netvsc_dev_ { uint32_t rx_buf_size; uint32_t rx_buf_gpadl_handle; uint32_t rx_section_count; - nvsp_1_rx_buf_section *rx_sections; /* Used for NetVSP initialization protocol */ struct sema channel_init_sema; diff --git a/sys/dev/hyperv/netvsc/if_hnreg.h b/sys/dev/hyperv/netvsc/if_hnreg.h index 4866b81977b1..cdbd5b7107db 100644 --- a/sys/dev/hyperv/netvsc/if_hnreg.h +++ b/sys/dev/hyperv/netvsc/if_hnreg.h @@ -32,11 +32,15 @@ #include #include +#define HN_NVS_RXBUF_SIG 0xcafe + #define HN_NVS_STATUS_OK 1 #define HN_NVS_TYPE_INIT 1 #define HN_NVS_TYPE_INIT_RESP 2 #define HN_NVS_TYPE_NDIS_INIT 100 +#define HN_NVS_TYPE_RXBUF_CONN 101 +#define HN_NVS_TYPE_RXBUF_CONNRESP 102 #define HN_NVS_TYPE_NDIS_CONF 125 /* @@ -83,4 +87,26 @@ struct hn_nvs_ndis_init { } __packed; CTASSERT(sizeof(struct hn_nvs_ndis_init) >= HN_NVS_REQSIZE_MIN); +struct hn_nvs_rxbuf_conn { + uint32_t nvs_type; /* HN_NVS_TYPE_RXBUF_CONN */ + uint32_t nvs_gpadl; /* RXBUF vmbus GPADL */ + uint16_t nvs_sig; /* HN_NVS_RXBUF_SIG */ + uint8_t nvs_rsvd[22]; +} __packed; +CTASSERT(sizeof(struct hn_nvs_rxbuf_conn) >= HN_NVS_REQSIZE_MIN); + +struct hn_nvs_rxbuf_sect { + uint32_t nvs_start; + uint32_t nvs_slotsz; + uint32_t nvs_slotcnt; + uint32_t nvs_end; +} __packed; + +struct hn_nvs_rxbuf_connresp { + uint32_t nvs_type; /* HN_NVS_TYPE_RXBUF_CONNRESP */ + uint32_t nvs_status; /* HN_NVS_STATUS_ */ + uint32_t nvs_nsect; /* # of elem in nvs_sect */ + struct hn_nvs_rxbuf_sect nvs_sect[]; +} __packed; + #endif /* !_IF_HNREG_H_ */ From 30bebccae08f225f57bd04d165426f788c52f4f5 Mon Sep 17 00:00:00 2001 From: Maxim Konovalov Date: Fri, 12 Aug 2016 08:16:35 +0000 Subject: [PATCH 57/76] o Move tmpstr varibale initialization out of assert(3) call. This fixes acpidump(8) compiled with "WITHOUT_ASSERT_DEBUG=yes" that removes assert(3)'s from the code. Submitted by: Alexander Nedotsukov --- usr.sbin/acpi/acpidump/acpi.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/usr.sbin/acpi/acpidump/acpi.c b/usr.sbin/acpi/acpidump/acpi.c index e790ef118b74..6d3f31a1938c 100644 --- a/usr.sbin/acpi/acpidump/acpi.c +++ b/usr.sbin/acpi/acpidump/acpi.c @@ -1494,8 +1494,8 @@ aml_disassemble(ACPI_TABLE_HEADER *rsdt, ACPI_TABLE_HEADER *dsdp) perror("mkdtemp tmp working dir"); return; } - assert((size_t)snprintf(tmpstr, sizeof(tmpstr), "%s%s", wrkdir, iname) - <= sizeof(tmpstr) - 1); + len = (size_t)snprintf(tmpstr, sizeof(tmpstr), "%s%s", wrkdir, iname); + assert(len <= sizeof(tmpstr) - 1); fd = open(tmpstr, O_CREAT | O_WRONLY, S_IRUSR | S_IWUSR); if (fd < 0) { perror("iasl tmp file"); @@ -1527,8 +1527,8 @@ aml_disassemble(ACPI_TABLE_HEADER *rsdt, ACPI_TABLE_HEADER *dsdp) } /* Dump iasl's output to stdout */ - assert((size_t)snprintf(tmpstr, sizeof(tmpstr), "%s%s", wrkdir, oname) - <= sizeof(tmpstr) -1); + len = (size_t)snprintf(tmpstr, sizeof(tmpstr), "%s%s", wrkdir, oname); + assert(len <= sizeof(tmpstr) - 1); fp = fopen(tmpstr, "r"); if (unlink(tmpstr) < 0) { perror("unlink"); From 55c4b97bfa19583d1ac599aaeea4ccf7c20b96cc Mon Sep 17 00:00:00 2001 From: Sepherosa Ziehau Date: Fri, 12 Aug 2016 08:21:02 +0000 Subject: [PATCH 58/76] hyperv/hn: Switch to vmbus xact APIs for NVS chimney buffer connection. MFC after: 1 week Sponsored by: Microsoft Differential Revision: https://reviews.freebsd.org/D7470 --- sys/dev/hyperv/netvsc/hv_net_vsc.c | 103 +++++++++++++++++++---------- sys/dev/hyperv/netvsc/if_hnreg.h | 17 +++++ 2 files changed, 86 insertions(+), 34 deletions(-) diff --git a/sys/dev/hyperv/netvsc/hv_net_vsc.c b/sys/dev/hyperv/netvsc/hv_net_vsc.c index 617409fac0ac..a1fcb3fb0c87 100644 --- a/sys/dev/hyperv/netvsc/hv_net_vsc.c +++ b/sys/dev/hyperv/netvsc/hv_net_vsc.c @@ -241,7 +241,7 @@ hv_nv_init_rx_buffer_with_net_vsp(struct hn_softc *sc) if (status != HN_NVS_STATUS_OK) { if_printf(sc->hn_ifp, "rxbuf conn failed: %x\n", status); - error = EINVAL; + error = EIO; goto cleanup; } net_dev->rx_section_count = 1; @@ -260,9 +260,13 @@ static int hv_nv_init_send_buffer_with_net_vsp(struct hn_softc *sc) { struct hn_send_ctx sndc; + struct vmbus_xact *xact; + struct hn_nvs_chim_conn *chim; + const struct hn_nvs_chim_connresp *resp; + size_t resp_len; + uint32_t status, sectsz; netvsc_dev *net_dev; - nvsp_msg *init_pkt; - int ret = 0; + int error; net_dev = hv_nv_get_outbound_net_device(sc); if (!net_dev) { @@ -274,7 +278,7 @@ hv_nv_init_send_buffer_with_net_vsp(struct hn_softc *sc) BUS_DMA_WAITOK | BUS_DMA_ZERO); if (net_dev->send_buf == NULL) { device_printf(sc->hn_dev, "allocate chimney txbuf failed\n"); - return ENOMEM; + return (ENOMEM); } /* @@ -284,48 +288,77 @@ hv_nv_init_send_buffer_with_net_vsp(struct hn_softc *sc) * Only primary channel has chimney sending buffer connected to it. * Sub-channels just share this chimney sending buffer. */ - ret = vmbus_chan_gpadl_connect(sc->hn_prichan, + error = vmbus_chan_gpadl_connect(sc->hn_prichan, net_dev->txbuf_dma.hv_paddr, net_dev->send_buf_size, &net_dev->send_buf_gpadl_handle); - if (ret != 0) { - device_printf(sc->hn_dev, "chimney sending buffer gpadl " - "connect failed: %d\n", ret); + if (error) { + if_printf(sc->hn_ifp, "chimney sending buffer gpadl " + "connect failed: %d\n", error); goto cleanup; } - /* Notify the NetVsp of the gpadl handle */ + /* + * Connect chimney sending buffer to NVS + */ - init_pkt = &net_dev->channel_init_packet; + xact = vmbus_xact_get(sc->hn_xact, sizeof(*chim)); + if (xact == NULL) { + if_printf(sc->hn_ifp, "no xact for nvs chim conn\n"); + error = ENXIO; + goto cleanup; + } - memset(init_pkt, 0, sizeof(nvsp_msg)); + chim = vmbus_xact_req_data(xact); + chim->nvs_type = HN_NVS_TYPE_CHIM_CONN; + chim->nvs_gpadl = net_dev->send_buf_gpadl_handle; + chim->nvs_sig = HN_NVS_CHIM_SIG; - init_pkt->hdr.msg_type = nvsp_msg_1_type_send_send_buf; - init_pkt->msgs.vers_1_msgs.send_rx_buf.gpadl_handle = - net_dev->send_buf_gpadl_handle; - init_pkt->msgs.vers_1_msgs.send_rx_buf.id = - NETVSC_SEND_BUFFER_ID; + hn_send_ctx_init_simple(&sndc, hn_nvs_sent_xact, xact); + vmbus_xact_activate(xact); - /* Send the gpadl notification request */ - - hn_send_ctx_init_simple(&sndc, hn_nvs_sent_wakeup, NULL); - ret = vmbus_chan_send(sc->hn_prichan, + error = vmbus_chan_send(sc->hn_prichan, VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC, - init_pkt, sizeof(nvsp_msg), (uint64_t)(uintptr_t)&sndc); - if (ret != 0) { + chim, sizeof(*chim), (uint64_t)(uintptr_t)&sndc); + if (error) { + if_printf(sc->hn_ifp, "send nvs chim conn failed: %d\n", + error); + vmbus_xact_deactivate(xact); + vmbus_xact_put(xact); goto cleanup; } - sema_wait(&net_dev->channel_init_sema); - - /* Check the response */ - if (init_pkt->msgs.vers_1_msgs.send_send_buf_complete.status - != nvsp_status_success) { - ret = EINVAL; + resp = vmbus_xact_wait(xact, &resp_len); + if (resp_len < sizeof(*resp)) { + if_printf(sc->hn_ifp, "invalid chim conn resp length %zu\n", + resp_len); + vmbus_xact_put(xact); + error = EINVAL; + goto cleanup; + } + if (resp->nvs_type != HN_NVS_TYPE_CHIM_CONNRESP) { + if_printf(sc->hn_ifp, "not chim conn resp, type %u\n", + resp->nvs_type); + vmbus_xact_put(xact); + error = EINVAL; goto cleanup; } - net_dev->send_section_size = - init_pkt->msgs.vers_1_msgs.send_send_buf_complete.section_size; + status = resp->nvs_status; + sectsz = resp->nvs_sectsz; + vmbus_xact_put(xact); + + if (status != HN_NVS_STATUS_OK) { + if_printf(sc->hn_ifp, "chim conn failed: %x\n", status); + error = EIO; + goto cleanup; + } + if (sectsz == 0) { + if_printf(sc->hn_ifp, "zero chimney sending buffer " + "section size\n"); + return 0; + } + + net_dev->send_section_size = sectsz; net_dev->send_section_count = net_dev->send_buf_size / net_dev->send_section_size; net_dev->bitsmap_words = howmany(net_dev->send_section_count, @@ -334,13 +367,15 @@ hv_nv_init_send_buffer_with_net_vsp(struct hn_softc *sc) malloc(net_dev->bitsmap_words * sizeof(long), M_NETVSC, M_WAITOK | M_ZERO); - goto exit; + if (bootverbose) { + if_printf(sc->hn_ifp, "chimney sending buffer %u/%u\n", + net_dev->send_section_size, net_dev->send_section_count); + } + return 0; cleanup: hv_nv_destroy_send_buffer(net_dev); - -exit: - return (ret); + return (error); } /* diff --git a/sys/dev/hyperv/netvsc/if_hnreg.h b/sys/dev/hyperv/netvsc/if_hnreg.h index cdbd5b7107db..1e6239583f74 100644 --- a/sys/dev/hyperv/netvsc/if_hnreg.h +++ b/sys/dev/hyperv/netvsc/if_hnreg.h @@ -33,6 +33,7 @@ #include #define HN_NVS_RXBUF_SIG 0xcafe +#define HN_NVS_CHIM_SIG 0xface #define HN_NVS_STATUS_OK 1 @@ -41,6 +42,8 @@ #define HN_NVS_TYPE_NDIS_INIT 100 #define HN_NVS_TYPE_RXBUF_CONN 101 #define HN_NVS_TYPE_RXBUF_CONNRESP 102 +#define HN_NVS_TYPE_CHIM_CONN 104 +#define HN_NVS_TYPE_CHIM_CONNRESP 105 #define HN_NVS_TYPE_NDIS_CONF 125 /* @@ -109,4 +112,18 @@ struct hn_nvs_rxbuf_connresp { struct hn_nvs_rxbuf_sect nvs_sect[]; } __packed; +struct hn_nvs_chim_conn { + uint32_t nvs_type; /* HN_NVS_TYPE_CHIM_CONN */ + uint32_t nvs_gpadl; /* chimney buf vmbus GPADL */ + uint16_t nvs_sig; /* NDIS_NVS_CHIM_SIG */ + uint8_t nvs_rsvd[22]; +} __packed; +CTASSERT(sizeof(struct hn_nvs_chim_conn) >= HN_NVS_REQSIZE_MIN); + +struct hn_nvs_chim_connresp { + uint32_t nvs_type; /* HN_NVS_TYPE_CHIM_CONNRESP */ + uint32_t nvs_status; /* HN_NVS_STATUS_ */ + uint32_t nvs_sectsz; /* section size */ +} __packed; + #endif /* !_IF_HNREG_H_ */ From 8b204d6643ce45aaab0fab16fc3e0a8fa179e202 Mon Sep 17 00:00:00 2001 From: Sepherosa Ziehau Date: Fri, 12 Aug 2016 08:29:26 +0000 Subject: [PATCH 59/76] hyperv/hn: Simplify RXBUF disconnection. MFC after: 1 week Sponsored by: Microsoft Differential Revision: https://reviews.freebsd.org/D7472 --- sys/dev/hyperv/netvsc/hv_net_vsc.c | 28 ++++++++++------------------ sys/dev/hyperv/netvsc/if_hnreg.h | 9 +++++++++ 2 files changed, 19 insertions(+), 18 deletions(-) diff --git a/sys/dev/hyperv/netvsc/hv_net_vsc.c b/sys/dev/hyperv/netvsc/hv_net_vsc.c index a1fcb3fb0c87..c1d1c487fa96 100644 --- a/sys/dev/hyperv/netvsc/hv_net_vsc.c +++ b/sys/dev/hyperv/netvsc/hv_net_vsc.c @@ -384,32 +384,24 @@ hv_nv_init_send_buffer_with_net_vsp(struct hn_softc *sc) static int hv_nv_destroy_rx_buffer(netvsc_dev *net_dev) { - nvsp_msg *revoke_pkt; int ret = 0; - /* - * If we got a section count, it means we received a - * send_rx_buf_complete msg - * (ie sent nvsp_msg_1_type_send_rx_buf msg) therefore, - * we need to send a revoke msg here - */ if (net_dev->rx_section_count) { - /* Send the revoke receive buffer */ - revoke_pkt = &net_dev->revoke_packet; - memset(revoke_pkt, 0, sizeof(nvsp_msg)); + struct hn_nvs_rxbuf_disconn disconn; - revoke_pkt->hdr.msg_type = nvsp_msg_1_type_revoke_rx_buf; - revoke_pkt->msgs.vers_1_msgs.revoke_rx_buf.id = - NETVSC_RECEIVE_BUFFER_ID; + /* + * Disconnect RXBUF from NVS. + */ + memset(&disconn, 0, sizeof(disconn)); + disconn.nvs_type = HN_NVS_TYPE_RXBUF_DISCONN; + disconn.nvs_sig = HN_NVS_RXBUF_SIG; ret = vmbus_chan_send(net_dev->sc->hn_prichan, - VMBUS_CHANPKT_TYPE_INBAND, 0, revoke_pkt, sizeof(nvsp_msg), + VMBUS_CHANPKT_TYPE_INBAND, 0, &disconn, sizeof(disconn), (uint64_t)(uintptr_t)&hn_send_ctx_none); - /* - * If we failed here, we might as well return and have a leak - * rather than continue and a bugchk - */ if (ret != 0) { + if_printf(net_dev->sc->hn_ifp, + "send rxbuf disconn failed: %d\n", ret); return (ret); } net_dev->rx_section_count = 0; diff --git a/sys/dev/hyperv/netvsc/if_hnreg.h b/sys/dev/hyperv/netvsc/if_hnreg.h index 1e6239583f74..abe371a36915 100644 --- a/sys/dev/hyperv/netvsc/if_hnreg.h +++ b/sys/dev/hyperv/netvsc/if_hnreg.h @@ -42,6 +42,7 @@ #define HN_NVS_TYPE_NDIS_INIT 100 #define HN_NVS_TYPE_RXBUF_CONN 101 #define HN_NVS_TYPE_RXBUF_CONNRESP 102 +#define HN_NVS_TYPE_RXBUF_DISCONN 103 #define HN_NVS_TYPE_CHIM_CONN 104 #define HN_NVS_TYPE_CHIM_CONNRESP 105 #define HN_NVS_TYPE_NDIS_CONF 125 @@ -112,6 +113,14 @@ struct hn_nvs_rxbuf_connresp { struct hn_nvs_rxbuf_sect nvs_sect[]; } __packed; +/* No response */ +struct hn_nvs_rxbuf_disconn { + uint32_t nvs_type; /* HN_NVS_TYPE_RXBUF_DISCONN */ + uint16_t nvs_sig; /* HN_NVS_RXBUF_SIG */ + uint8_t nvs_rsvd[26]; +} __packed; +CTASSERT(sizeof(struct hn_nvs_rxbuf_disconn) >= HN_NVS_REQSIZE_MIN); + struct hn_nvs_chim_conn { uint32_t nvs_type; /* HN_NVS_TYPE_CHIM_CONN */ uint32_t nvs_gpadl; /* chimney buf vmbus GPADL */ From 510a3f1b7911655bc3012850d8575d97bd1cfa5e Mon Sep 17 00:00:00 2001 From: Andrew Turner Date: Fri, 12 Aug 2016 10:29:34 +0000 Subject: [PATCH 60/76] Implement promotions and demotions in the arm64 pmap code. For now we don't promote memory as I am not sure all the demotion cases are handled, however it is useful to implement pmap_page_set_memattr. This is used, for example, when mapping uncached memory for bus_dma(9). pmap_page_set_memattr needs to demote the DMAP region as on ARM we need to ensure all mappings to the same physical address have the same attributes. Reviewed by: kib Obtained from: ABT Systems Ltd MFC after: 1 month Sponsored by: The FreeBSD Foundation Differential Revision: https://reviews.freebsd.org/D6987 --- sys/arm64/arm64/pmap.c | 470 ++++++++++++++++++++++++++++++++++++++- sys/arm64/arm64/trap.c | 23 +- sys/arm64/include/pmap.h | 2 + 3 files changed, 479 insertions(+), 16 deletions(-) diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c index 4ed131e54859..d9ea4e329766 100644 --- a/sys/arm64/arm64/pmap.c +++ b/sys/arm64/arm64/pmap.c @@ -229,6 +229,13 @@ CTASSERT((DMAP_MAX_ADDRESS & ~L0_OFFSET) == DMAP_MAX_ADDRESS); #define DMAP_TABLES ((DMAP_MAX_ADDRESS - DMAP_MIN_ADDRESS) >> L0_SHIFT) extern pt_entry_t pagetable_dmap[]; +static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters"); + +static int superpages_enabled = 1; +SYSCTL_INT(_vm_pmap, OID_AUTO, superpages_enabled, + CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &superpages_enabled, 0, + "Are large page mappings enabled?"); + /* * Data for the pv entry allocation mechanism */ @@ -243,6 +250,13 @@ static vm_page_t reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp); static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va); static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va); + +static int pmap_change_attr(vm_offset_t va, vm_size_t size, int mode); +static int pmap_change_attr_locked(vm_offset_t va, vm_size_t size, int mode); +static pt_entry_t *pmap_demote_l1(pmap_t pmap, pt_entry_t *l1, vm_offset_t va); +static pt_entry_t *pmap_demote_l2_locked(pmap_t pmap, pt_entry_t *l2, + vm_offset_t va, struct rwlock **lockp); +static pt_entry_t *pmap_demote_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t va); static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp); static int pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t sva, @@ -422,6 +436,13 @@ pmap_pte(pmap_t pmap, vm_offset_t va, int *level) return (l3); } +static inline bool +pmap_superpages_enabled(void) +{ + + return (superpages_enabled != 0); +} + bool pmap_get_tables(pmap_t pmap, vm_offset_t va, pd_entry_t **l0, pd_entry_t **l1, pd_entry_t **l2, pt_entry_t **l3) @@ -836,6 +857,11 @@ pmap_init(void) { int i; + /* + * Are large page mappings enabled? + */ + TUNABLE_INT_FETCH("vm.pmap.superpages_enabled", &superpages_enabled); + /* * Initialize the pv chunk list mutex. */ @@ -2000,6 +2026,15 @@ pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) l3_paddr = pmap_load(l2); + if ((l3_paddr & ATTR_DESCR_MASK) == L2_BLOCK) { + KASSERT((l3_paddr & ATTR_SW_MANAGED) == 0, + ("%s: TODO: Demote managed pages", __func__)); + if (pmap_demote_l2_locked(pmap, l2, sva & ~L2_OFFSET, + &lock) == NULL) + continue; + l3_paddr = pmap_load(l2); + } + /* * Weed out invalid mappings. */ @@ -2193,6 +2228,99 @@ pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) pmap_invalidate_all(pmap); } +/* + * Performs a break-before-make update of a pmap entry. This is needed when + * either promoting or demoting pages to ensure the TLB doesn't get into an + * inconsistent state. + */ +static void +pmap_update_entry(pmap_t pmap, pd_entry_t *pte, pd_entry_t newpte, + vm_offset_t va) +{ + register_t intr; + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + + /* + * Ensure we don't get switched out with the page table in an + * inconsistent state. We also need to ensure no interrupts fire + * as they may make use of an address we are about to invalidate. + */ + intr = intr_disable(); + critical_enter(); + + /* Clear the old mapping */ + pmap_load_clear(pte); + PTE_SYNC(pte); + pmap_invalidate_page(pmap, va); + + /* Create the new mapping */ + pmap_load_store(pte, newpte); + PTE_SYNC(pte); + + critical_exit(); + intr_restore(intr); +} + +/* + * Tries to promote the 512, contiguous 4KB page mappings that are within a + * single level 2 table entry to a single 2MB page mapping. For promotion + * to occur, two conditions must be met: (1) the 4KB page mappings must map + * aligned, contiguous physical memory and (2) the 4KB page mappings must have + * identical characteristics. + */ +static void +pmap_promote_l2(pmap_t pmap, pd_entry_t *l2, vm_offset_t va, + struct rwlock **lockp) +{ + pt_entry_t *firstl3, *l3, newl2, oldl3, pa; + register_t intr; + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + + firstl3 = (pt_entry_t *)PHYS_TO_DMAP(pmap_load(l2) & ~ATTR_MASK); + newl2 = pmap_load(firstl3); + /* Ignore managed pages for now */ + if ((newl2 & ATTR_SW_MANAGED) != 0) + return; + + /* Check the alingment is valid */ + if (((newl2 & ~ATTR_MASK) & L2_OFFSET) != 0) + return; + + pa = newl2 + L2_SIZE - PAGE_SIZE; + for (l3 = firstl3 + NL3PG - 1; l3 > firstl3; l3--) { + oldl3 = pmap_load(l3); + if (oldl3 != pa) + return; + pa -= PAGE_SIZE; + } + + newl2 &= ~ATTR_DESCR_MASK; + newl2 |= L2_BLOCK; + + /* + * Ensure we don't get switched out with the page table in an + * inconsistent state. We also need to ensure no interrupts fire + * as they may make use of an address we are about to invalidate. + */ + intr = intr_disable(); + critical_enter(); + + /* Clear the old mapping */ + pmap_load_clear(l2); + PTE_SYNC(l2); + pmap_invalidate_range(pmap, rounddown2(va, L2_SIZE), + roundup2(va, L2_SIZE)); + + /* Create the new mapping */ + pmap_load_store(l2, newl2); + PTE_SYNC(l2); + + critical_exit(); + intr_restore(intr); +} + /* * Insert the given physical page (p) at * the specified virtual address (v) in the @@ -2212,7 +2340,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, struct rwlock *lock; pd_entry_t *pde; pt_entry_t new_l3, orig_l3; - pt_entry_t *l3; + pt_entry_t *l2, *l3; pv_entry_t pv; vm_paddr_t opa, pa, l1_pa, l2_pa, l3_pa; vm_page_t mpte, om, l1_m, l2_m, l3_m; @@ -2239,6 +2367,20 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, lock = NULL; PMAP_LOCK(pmap); + pde = pmap_pde(pmap, va, &lvl); + if (pde != NULL && lvl == 1) { + l2 = pmap_l1_to_l2(pde, va); + if ((pmap_load(l2) & ATTR_DESCR_MASK) == L2_BLOCK && + (l3 = pmap_demote_l2_locked(pmap, l2, va, &lock)) != NULL) { + if (va < VM_MAXUSER_ADDRESS) { + mpte = PHYS_TO_VM_PAGE( + pmap_load(l2) & ~ATTR_MASK); + mpte->wire_count++; + } + goto havel3; + } + } + if (va < VM_MAXUSER_ADDRESS) { nosleep = (flags & PMAP_ENTER_NOSLEEP) != 0; mpte = pmap_alloc_l3(pmap, va, nosleep ? NULL : &lock); @@ -2320,6 +2462,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, l3 = pmap_l2_to_l3(pde, va); pmap_invalidate_page(pmap, va); } +havel3: om = NULL; orig_l3 = pmap_load(l3); @@ -2400,7 +2543,6 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, if (orig_l3 != 0) { validate: orig_l3 = pmap_load_store(l3, new_l3); - PTE_SYNC(l3); opa = orig_l3 & ~ATTR_MASK; if (opa != pa) { @@ -2419,12 +2561,24 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, } } else { pmap_load_store(l3, new_l3); - PTE_SYNC(l3); } + + PTE_SYNC(l3); pmap_invalidate_page(pmap, va); + if ((pmap != pmap_kernel()) && (pmap == &curproc->p_vmspace->vm_pmap)) cpu_icache_sync_range(va, PAGE_SIZE); + /* XXX: Not yet, not all demotions are handled */ +#if 0 + if ((mpte == NULL || mpte->wire_count == NL3PG) && + pmap_superpages_enabled() && (m->flags & PG_FICTITIOUS) == 0 && + vm_reserv_level_iffullpop(m) == 0) { + KASSERT(lvl == 2, ("Invalid pde level %d", lvl)); + pmap_promote_l2(pmap, pde, va, &lock); + } +#endif + if (lock != NULL) rw_wunlock(lock); PMAP_UNLOCK(pmap); @@ -3340,14 +3494,271 @@ pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma) m->md.pv_memattr = ma; /* - * ARM64TODO: Implement the below (from the amd64 pmap) * If "m" is a normal page, update its direct mapping. This update * can be relied upon to perform any cache operations that are * required for data coherence. */ if ((m->flags & PG_FICTITIOUS) == 0 && - PHYS_IN_DMAP(VM_PAGE_TO_PHYS(m))) - panic("ARM64TODO: pmap_page_set_memattr"); + pmap_change_attr(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)), PAGE_SIZE, + m->md.pv_memattr) != 0) + panic("memory attribute change on the direct map failed"); +} + +/* + * Changes the specified virtual address range's memory type to that given by + * the parameter "mode". The specified virtual address range must be + * completely contained within either the direct map or the kernel map. If + * the virtual address range is contained within the kernel map, then the + * memory type for each of the corresponding ranges of the direct map is also + * changed. (The corresponding ranges of the direct map are those ranges that + * map the same physical pages as the specified virtual address range.) These + * changes to the direct map are necessary because Intel describes the + * behavior of their processors as "undefined" if two or more mappings to the + * same physical page have different memory types. + * + * Returns zero if the change completed successfully, and either EINVAL or + * ENOMEM if the change failed. Specifically, EINVAL is returned if some part + * of the virtual address range was not mapped, and ENOMEM is returned if + * there was insufficient memory available to complete the change. In the + * latter case, the memory type may have been changed on some part of the + * virtual address range or the direct map. + */ +static int +pmap_change_attr(vm_offset_t va, vm_size_t size, int mode) +{ + int error; + + PMAP_LOCK(kernel_pmap); + error = pmap_change_attr_locked(va, size, mode); + PMAP_UNLOCK(kernel_pmap); + return (error); +} + +static int +pmap_change_attr_locked(vm_offset_t va, vm_size_t size, int mode) +{ + vm_offset_t base, offset, tmpva; + pt_entry_t l3, *pte, *newpte; + int lvl; + + PMAP_LOCK_ASSERT(kernel_pmap, MA_OWNED); + base = trunc_page(va); + offset = va & PAGE_MASK; + size = round_page(offset + size); + + if (!VIRT_IN_DMAP(base)) + return (EINVAL); + + for (tmpva = base; tmpva < base + size; ) { + pte = pmap_pte(kernel_pmap, va, &lvl); + if (pte == NULL) + return (EINVAL); + + if ((pmap_load(pte) & ATTR_IDX_MASK) == ATTR_IDX(mode)) { + /* + * We already have the correct attribute, + * ignore this entry. + */ + switch (lvl) { + default: + panic("Invalid DMAP table level: %d\n", lvl); + case 1: + tmpva = (tmpva & ~L1_OFFSET) + L1_SIZE; + break; + case 2: + tmpva = (tmpva & ~L2_OFFSET) + L2_SIZE; + break; + case 3: + tmpva += PAGE_SIZE; + break; + } + } else { + /* + * Split the entry to an level 3 table, then + * set the new attribute. + */ + switch (lvl) { + default: + panic("Invalid DMAP table level: %d\n", lvl); + case 1: + newpte = pmap_demote_l1(kernel_pmap, pte, + tmpva & ~L1_OFFSET); + if (newpte == NULL) + return (EINVAL); + pte = pmap_l1_to_l2(pte, tmpva); + case 2: + newpte = pmap_demote_l2(kernel_pmap, pte, + tmpva & ~L2_OFFSET); + if (newpte == NULL) + return (EINVAL); + pte = pmap_l2_to_l3(pte, tmpva); + case 3: + /* Update the entry */ + l3 = pmap_load(pte); + l3 &= ~ATTR_IDX_MASK; + l3 |= ATTR_IDX(mode); + + pmap_update_entry(kernel_pmap, pte, l3, tmpva); + + /* + * If moving to a non-cacheable entry flush + * the cache. + */ + if (mode == VM_MEMATTR_UNCACHEABLE) + cpu_dcache_wbinv_range(tmpva, L3_SIZE); + + break; + } + tmpva += PAGE_SIZE; + } + } + + return (0); +} + +/* + * Create an L2 table to map all addresses within an L1 mapping. + */ +static pt_entry_t * +pmap_demote_l1(pmap_t pmap, pt_entry_t *l1, vm_offset_t va) +{ + pt_entry_t *l2, newl2, oldl1; + vm_offset_t tmpl1; + vm_paddr_t l2phys, phys; + vm_page_t ml2; + int i; + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + oldl1 = pmap_load(l1); + KASSERT((oldl1 & ATTR_DESCR_MASK) == L1_BLOCK, + ("pmap_demote_l1: Demoting a non-block entry")); + KASSERT((va & L1_OFFSET) == 0, + ("pmap_demote_l1: Invalid virtual address %#lx", va)); + + tmpl1 = 0; + if (va <= (vm_offset_t)l1 && va + L1_SIZE > (vm_offset_t)l1) { + tmpl1 = kva_alloc(PAGE_SIZE); + if (tmpl1 == 0) + return (NULL); + } + + if ((ml2 = vm_page_alloc(NULL, 0, VM_ALLOC_INTERRUPT | + VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) { + CTR2(KTR_PMAP, "pmap_demote_l1: failure for va %#lx" + " in pmap %p", va, pmap); + return (NULL); + } + + l2phys = VM_PAGE_TO_PHYS(ml2); + l2 = (pt_entry_t *)PHYS_TO_DMAP(l2phys); + + /* Address the range points at */ + phys = oldl1 & ~ATTR_MASK; + /* The attributed from the old l1 table to be copied */ + newl2 = oldl1 & ATTR_MASK; + + /* Create the new entries */ + for (i = 0; i < Ln_ENTRIES; i++) { + l2[i] = newl2 | phys; + phys += L2_SIZE; + } + cpu_dcache_wb_range((vm_offset_t)l2, PAGE_SIZE); + + if (tmpl1 != 0) { + pmap_kenter(tmpl1, PAGE_SIZE, + DMAP_TO_PHYS((vm_offset_t)l1) & ~L3_OFFSET, CACHED_MEMORY); + l1 = (pt_entry_t *)(tmpl1 + ((vm_offset_t)l1 & PAGE_MASK)); + } + + pmap_update_entry(pmap, l1, l2phys | L1_TABLE, va); + + if (tmpl1 != 0) { + pmap_kremove(tmpl1); + kva_free(tmpl1, PAGE_SIZE); + } + + return (l2); +} + +/* + * Create an L3 table to map all addresses within an L2 mapping. + */ +static pt_entry_t * +pmap_demote_l2_locked(pmap_t pmap, pt_entry_t *l2, vm_offset_t va, + struct rwlock **lockp) +{ + pt_entry_t *l3, newl3, oldl2; + vm_offset_t tmpl2; + vm_paddr_t l3phys, phys; + vm_page_t ml3; + int i; + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + oldl2 = pmap_load(l2); + KASSERT((oldl2 & ATTR_DESCR_MASK) == L2_BLOCK, + ("pmap_demote_l2: Demoting a non-block entry")); + KASSERT((va & L2_OFFSET) == 0, + ("pmap_demote_l2: Invalid virtual address %#lx", va)); + KASSERT((oldl2 & ATTR_SW_MANAGED) == 0, + ("pmap_demote_l2: TODO: Demote managed pages")); + + tmpl2 = 0; + if (va <= (vm_offset_t)l2 && va + L2_SIZE > (vm_offset_t)l2) { + tmpl2 = kva_alloc(PAGE_SIZE); + if (tmpl2 == 0) + return (NULL); + } + + if ((ml3 = vm_page_alloc(NULL, 0, VM_ALLOC_INTERRUPT | + VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) { + CTR2(KTR_PMAP, "pmap_demote_l2: failure for va %#lx" + " in pmap %p", va, pmap); + return (NULL); + } + + l3phys = VM_PAGE_TO_PHYS(ml3); + l3 = (pt_entry_t *)PHYS_TO_DMAP(l3phys); + + /* Address the range points at */ + phys = oldl2 & ~ATTR_MASK; + /* The attributed from the old l2 table to be copied */ + newl3 = (oldl2 & (ATTR_MASK & ~ATTR_DESCR_MASK)) | L3_PAGE; + + /* Create the new entries */ + for (i = 0; i < Ln_ENTRIES; i++) { + l3[i] = newl3 | phys; + phys += L3_SIZE; + } + cpu_dcache_wb_range((vm_offset_t)l3, PAGE_SIZE); + + if (tmpl2 != 0) { + pmap_kenter(tmpl2, PAGE_SIZE, + DMAP_TO_PHYS((vm_offset_t)l2) & ~L3_OFFSET, CACHED_MEMORY); + l2 = (pt_entry_t *)(tmpl2 + ((vm_offset_t)l2 & PAGE_MASK)); + } + + pmap_update_entry(pmap, l2, l3phys | L2_TABLE, va); + + if (tmpl2 != 0) { + pmap_kremove(tmpl2); + kva_free(tmpl2, PAGE_SIZE); + } + + return (l3); + +} + +static pt_entry_t * +pmap_demote_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t va) +{ + struct rwlock *lock; + pt_entry_t *l3; + + lock = NULL; + l3 = pmap_demote_l2_locked(pmap, l2, va, &lock); + if (lock != NULL) + rw_wunlock(lock); + return (l3); } /* @@ -3480,6 +3891,53 @@ pmap_sync_icache(pmap_t pmap, vm_offset_t va, vm_size_t sz) } } +int +pmap_fault(pmap_t pmap, uint64_t esr, uint64_t far) +{ +#ifdef SMP + uint64_t par; +#endif + + switch (ESR_ELx_EXCEPTION(esr)) { + case EXCP_DATA_ABORT_L: + case EXCP_DATA_ABORT: + break; + default: + return (KERN_FAILURE); + } + +#ifdef SMP + PMAP_LOCK(pmap); + switch (esr & ISS_DATA_DFSC_MASK) { + case ISS_DATA_DFSC_TF_L0: + case ISS_DATA_DFSC_TF_L1: + case ISS_DATA_DFSC_TF_L2: + case ISS_DATA_DFSC_TF_L3: + /* Ask the MMU to check the address */ + if (pmap == kernel_pmap) + par = arm64_address_translate_s1e1r(far); + else + par = arm64_address_translate_s1e0r(far); + + /* + * If the translation was successful the address was invalid + * due to a break-before-make sequence. We can unlock and + * return success to the trap handler. + */ + if (PAR_SUCCESS(par)) { + PMAP_UNLOCK(pmap); + return (KERN_SUCCESS); + } + break; + default: + break; + } + PMAP_UNLOCK(pmap); +#endif + + return (KERN_FAILURE); +} + /* * Increase the starting virtual address of the given mapping if a * different alignment might result in more superpage mappings. diff --git a/sys/arm64/arm64/trap.c b/sys/arm64/arm64/trap.c index ea932edca8f6..d9367dfcf742 100644 --- a/sys/arm64/arm64/trap.c +++ b/sys/arm64/arm64/trap.c @@ -179,16 +179,6 @@ data_abort(struct trapframe *frame, uint64_t esr, uint64_t far, int lower) return; } - KASSERT(td->td_md.md_spinlock_count == 0, - ("data abort with spinlock held")); - if (td->td_critnest != 0 || WITNESS_CHECK(WARN_SLEEPOK | - WARN_GIANTOK, NULL, "Kernel page fault") != 0) { - print_registers(frame); - printf(" far: %16lx\n", far); - printf(" esr: %.8lx\n", esr); - panic("data abort in critical section or under mutex"); - } - p = td->td_proc; if (lower) map = &p->p_vmspace->vm_map; @@ -200,6 +190,19 @@ data_abort(struct trapframe *frame, uint64_t esr, uint64_t far, int lower) map = &p->p_vmspace->vm_map; } + if (pmap_fault(map->pmap, esr, far) == KERN_SUCCESS) + return; + + KASSERT(td->td_md.md_spinlock_count == 0, + ("data abort with spinlock held")); + if (td->td_critnest != 0 || WITNESS_CHECK(WARN_SLEEPOK | + WARN_GIANTOK, NULL, "Kernel page fault") != 0) { + print_registers(frame); + printf(" far: %16lx\n", far); + printf(" esr: %.8lx\n", esr); + panic("data abort in critical section or under mutex"); + } + va = trunc_page(far); ftype = ((esr >> 6) & 1) ? VM_PROT_READ | VM_PROT_WRITE : VM_PROT_READ; diff --git a/sys/arm64/include/pmap.h b/sys/arm64/include/pmap.h index 578eb46988f1..1ea61f6fe1cb 100644 --- a/sys/arm64/include/pmap.h +++ b/sys/arm64/include/pmap.h @@ -151,6 +151,8 @@ void pmap_unmap_io_transient(vm_page_t *, vm_offset_t *, int, boolean_t); bool pmap_get_tables(pmap_t, vm_offset_t, pd_entry_t **, pd_entry_t **, pd_entry_t **, pt_entry_t **); +int pmap_fault(pmap_t, uint64_t, uint64_t); + #define pmap_page_is_mapped(m) (!TAILQ_EMPTY(&(m)->md.pv_list)) #endif /* _KERNEL */ From 786c805027b1e5d026b11b7475ac826038b442f7 Mon Sep 17 00:00:00 2001 From: Bryan Drewery Date: Fri, 12 Aug 2016 11:06:54 +0000 Subject: [PATCH 61/76] PORTS_MODULES: Don't leak in CC/CXX/CPP. These may have ccache in them or -target/--sysroot from external compiler or SYSTEM_COMPILER support. Many ports do not support a CC with spaces in it, such as emulators/virtualbox-ose-kmod. Passing --sysroot to ports makes no sense as ports doesn't support --sysroot currently. If these variables need to be overridden for ports then they can be set in make.conf or passed as make arguments. MFC after: 1 week Sponsored by: EMC / Isilon Storage Division --- sys/conf/kern.post.mk | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sys/conf/kern.post.mk b/sys/conf/kern.post.mk index 4cb60c002b62..32ec5b1b445c 100644 --- a/sys/conf/kern.post.mk +++ b/sys/conf/kern.post.mk @@ -65,6 +65,10 @@ OSRELDATE!= awk '/^\#define[[:space:]]*__FreeBSD_version/ { print $$3 }' \ # Keep the related ports builds in the obj directory so that they are only rebuilt once per kernel build WRKDIRPREFIX?= ${MAKEOBJDIRPREFIX}${SRC_BASE}/sys/${KERNCONF} PORTSMODULESENV=\ + env \ + -u CC \ + -u CXX \ + -u CPP \ PATH=${PATH}:${LOCALBASE}/bin:${LOCALBASE}/sbin \ SRC_BASE=${SRC_BASE} \ OSVERSION=${OSRELDATE} \ From c8de0c57d7968173287624ea40da736725752d99 Mon Sep 17 00:00:00 2001 From: Bryan Drewery Date: Fri, 12 Aug 2016 13:52:51 +0000 Subject: [PATCH 62/76] Avoid showing the bootstrap make command for check-old, etc. Reported by: koobs MFC after: 1 week Sponsored by: EMC / Isilon Storage Division --- Makefile | 3 ++- Makefile.inc1 | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index d3dc7b1f109e..cf973dcad417 100644 --- a/Makefile +++ b/Makefile @@ -209,7 +209,8 @@ SUB_MAKE= `test -x ${MYMAKE} && echo ${MYMAKE} || echo ${MAKE}` \ SUB_MAKE= ${MAKE} -m ${.CURDIR}/share/mk .endif -_MAKE= PATH=${PATH} ${SUB_MAKE} -f Makefile.inc1 TARGET=${_TARGET} TARGET_ARCH=${_TARGET_ARCH} +_MAKE= PATH=${PATH} MAKE_CMD=${MAKE} ${SUB_MAKE} -f Makefile.inc1 \ + TARGET=${_TARGET} TARGET_ARCH=${_TARGET_ARCH} # Only allow meta mode for the whitelisted targets. See META_TGT_WHITELIST # above. diff --git a/Makefile.inc1 b/Makefile.inc1 index 7e9a3526b932..14b553a9058b 100644 --- a/Makefile.inc1 +++ b/Makefile.inc1 @@ -2378,11 +2378,11 @@ check-old-dirs: .PHONY done delete-old: delete-old-files delete-old-dirs .PHONY - @echo "To remove old libraries run '${MAKE} delete-old-libs'." + @echo "To remove old libraries run '${MAKE_CMD} delete-old-libs'." check-old: check-old-files check-old-libs check-old-dirs .PHONY - @echo "To remove old files and directories run '${MAKE} delete-old'." - @echo "To remove old libraries run '${MAKE} delete-old-libs'." + @echo "To remove old files and directories run '${MAKE_CMD} delete-old'." + @echo "To remove old libraries run '${MAKE_CMD} delete-old-libs'." .endif From 15919690b502a1b60b44d40ed9d8347f5aaaba80 Mon Sep 17 00:00:00 2001 From: Warren Block Date: Fri, 12 Aug 2016 14:10:11 +0000 Subject: [PATCH 63/76] Correct the history of where ps first appeared. PR: 211741 Submitted by: Sevan Janiyan MFC after: 1 week --- bin/ps/ps.1 | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bin/ps/ps.1 b/bin/ps/ps.1 index ea094bccbcb1..c19f255ae233 100644 --- a/bin/ps/ps.1 +++ b/bin/ps/ps.1 @@ -768,7 +768,8 @@ operating systems. The .Nm command appeared in -.At v4 . +.At v3 +in section 8 of the manual. .Sh BUGS Since .Nm From f566b25e0da88e6954bba6456460b503f5d1155a Mon Sep 17 00:00:00 2001 From: Bryan Drewery Date: Fri, 12 Aug 2016 16:05:53 +0000 Subject: [PATCH 64/76] Avoid taking PROC_LOCK in syscalls if not being traced. MFC after: 1 week Sponsored by: EMC / Isilon Storage Division --- sys/dev/filemon/filemon.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sys/dev/filemon/filemon.c b/sys/dev/filemon/filemon.c index 919af9d6dbe6..26e1bc38d071 100644 --- a/sys/dev/filemon/filemon.c +++ b/sys/dev/filemon/filemon.c @@ -137,6 +137,8 @@ filemon_proc_get(struct proc *p) { struct filemon *filemon; + if (p->p_filemon == NULL) + return (NULL); PROC_LOCK(p); filemon = filemon_acquire(p->p_filemon); PROC_UNLOCK(p); From 563e7037148a62f54ad13d41b8bce937c57dd506 Mon Sep 17 00:00:00 2001 From: John Baldwin Date: Fri, 12 Aug 2016 16:13:50 +0000 Subject: [PATCH 65/76] Remove description of P_FOLLOWFORK as this flag was removed. --- bin/ps/ps.1 | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/bin/ps/ps.1 b/bin/ps/ps.1 index c19f255ae233..8ec78775b8f4 100644 --- a/bin/ps/ps.1 +++ b/bin/ps/ps.1 @@ -29,7 +29,7 @@ .\" @(#)ps.1 8.3 (Berkeley) 4/18/94 .\" $FreeBSD$ .\" -.Dd July 28, 2016 +.Dd August 12, 2016 .Dt PS 1 .Os .Sh NAME @@ -319,7 +319,6 @@ the include file .It Dv "P_ADVLOCK" Ta No "0x00001" Ta "Process may hold a POSIX advisory lock" .It Dv "P_CONTROLT" Ta No "0x00002" Ta "Has a controlling terminal" .It Dv "P_KPROC" Ta No "0x00004" Ta "Kernel process" -.It Dv "P_FOLLOWFORK" Ta No "0x00008" Ta "Attach debugger to new children" .It Dv "P_PPWAIT" Ta No "0x00010" Ta "Parent is waiting for child to exec/exit" .It Dv "P_PROFIL" Ta No "0x00020" Ta "Has started profiling" .It Dv "P_STOPPROF" Ta No "0x00040" Ta "Has thread in requesting to stop prof" From 20ee0f7112c7b3658e5292276fb500bf274ac9ca Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Fri, 12 Aug 2016 18:29:11 +0000 Subject: [PATCH 66/76] Remove all remaining uses of TAILQ_FOREACH_FROM() from rtld-elf. Sponsored by: The FreeBSD Foundation MFC after: 1 week --- libexec/rtld-elf/rtld.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/libexec/rtld-elf/rtld.c b/libexec/rtld-elf/rtld.c index ca722cfa175b..01beeb874e0c 100644 --- a/libexec/rtld-elf/rtld.c +++ b/libexec/rtld-elf/rtld.c @@ -2164,8 +2164,7 @@ load_needed_objects(Obj_Entry *first, int flags) { Obj_Entry *obj; - obj = first; - TAILQ_FOREACH_FROM(obj, &obj_list, next) { + for (obj = first; obj != NULL; obj = TAILQ_NEXT(obj, next)) { if (obj->marker) continue; if (process_needed(obj, obj->needed, flags) == -1) @@ -2769,9 +2768,8 @@ relocate_objects(Obj_Entry *first, bool bind_now, Obj_Entry *rtldobj, Obj_Entry *obj; int error; - error = 0; - obj = first; - TAILQ_FOREACH_FROM(obj, &obj_list, next) { + for (error = 0, obj = first; obj != NULL; + obj = TAILQ_NEXT(obj, next)) { if (obj->marker) continue; error = relocate_object(obj, bind_now, rtldobj, flags, @@ -2811,8 +2809,7 @@ resolve_objects_ifunc(Obj_Entry *first, bool bind_now, int flags, { Obj_Entry *obj; - obj = first; - TAILQ_FOREACH_FROM(obj, &obj_list, next) { + for (obj = first; obj != NULL; obj = TAILQ_NEXT(obj, next)) { if (obj->marker) continue; if (resolve_object_ifunc(obj, bind_now, flags, lockstate) == -1) @@ -4316,7 +4313,7 @@ trace_loaded_objects(Obj_Entry *obj) list_containers = getenv(_LD("TRACE_LOADED_OBJECTS_ALL")); - TAILQ_FOREACH_FROM(obj, &obj_list, next) { + for (; obj != NULL; obj = TAILQ_NEXT(obj, next)) { Needed_Entry *needed; char *name, *path; bool is_lib; @@ -4661,8 +4658,7 @@ allocate_tls(Obj_Entry *objs, void *oldtls, size_t tcbsize, size_t tcbalign) */ free_tls(oldtls, 2*sizeof(Elf_Addr), sizeof(Elf_Addr)); } else { - obj = objs; - TAILQ_FOREACH_FROM(obj, &obj_list, next) { + for (obj = objs; obj != NULL; obj = TAILQ_NEXT(obj, next)) { if (obj->marker || obj->tlsoffset == 0) continue; addr = segbase - obj->tlsoffset; From 09944776b5aeef402b30fbabe3663aaed3cadfa1 Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Fri, 12 Aug 2016 18:31:44 +0000 Subject: [PATCH 67/76] Fill phdr and phsize for rtld object. It is needed for dl_iterate_phdr() reporting the correct values. PR: 211367 Sponsored by: The FreeBSD Foundation MFC after: 1 week --- libexec/rtld-elf/rtld.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/libexec/rtld-elf/rtld.c b/libexec/rtld-elf/rtld.c index 01beeb874e0c..d891d5247e0c 100644 --- a/libexec/rtld-elf/rtld.c +++ b/libexec/rtld-elf/rtld.c @@ -1916,6 +1916,7 @@ static void init_rtld(caddr_t mapbase, Elf_Auxinfo **aux_info) { Obj_Entry objtmp; /* Temporary rtld object */ + const Elf_Ehdr *ehdr; const Elf_Dyn *dyn_rpath; const Elf_Dyn *dyn_soname; const Elf_Dyn *dyn_runpath; @@ -1954,6 +1955,9 @@ init_rtld(caddr_t mapbase, Elf_Auxinfo **aux_info) relocate_objects(&objtmp, true, &objtmp, 0, NULL); } + ehdr = (Elf_Ehdr *)mapbase; + objtmp.phdr = (Elf_Phdr *)((char *)mapbase + ehdr->e_phoff); + objtmp.phsize = ehdr->e_phnum * sizeof(objtmp.phdr[0]); /* Initialize the object list. */ TAILQ_INIT(&obj_list); From f214036e9937683966e172112e2ca70a6ed5b1d1 Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Fri, 12 Aug 2016 19:31:41 +0000 Subject: [PATCH 68/76] Move defines common between rtld and libsysdecode into the header, instead of copying inline into sources. Sponsored by: The FreeBSD Foundation MFC after: 1 week --- lib/libsysdecode/Makefile | 1 + lib/libsysdecode/utrace.c | 34 ++++--------------- libexec/rtld-elf/rtld.c | 30 ++-------------- libexec/rtld-elf/rtld_utrace.h | 62 ++++++++++++++++++++++++++++++++++ 4 files changed, 73 insertions(+), 54 deletions(-) create mode 100644 libexec/rtld-elf/rtld_utrace.h diff --git a/lib/libsysdecode/Makefile b/lib/libsysdecode/Makefile index 742c513469fe..123ea49ca572 100644 --- a/lib/libsysdecode/Makefile +++ b/lib/libsysdecode/Makefile @@ -9,6 +9,7 @@ SRCS= errno.c ioctl.c syscallnames.c utrace.c INCS= sysdecode.h CFLAGS+= -I${.CURDIR}/../../sys +CFLAGS+= -I${.CURDIR}/../../libexec/rtld-elf MAN+= sysdecode.3 \ sysdecode_abi_to_freebsd_errno.3 \ diff --git a/lib/libsysdecode/utrace.c b/lib/libsysdecode/utrace.c index 28e52c3fe4db..dfd0e70380b0 100644 --- a/lib/libsysdecode/utrace.c +++ b/lib/libsysdecode/utrace.c @@ -35,33 +35,11 @@ __FBSDID("$FreeBSD$"); #include #include #include - -#define UTRACE_DLOPEN_START 1 -#define UTRACE_DLOPEN_STOP 2 -#define UTRACE_DLCLOSE_START 3 -#define UTRACE_DLCLOSE_STOP 4 -#define UTRACE_LOAD_OBJECT 5 -#define UTRACE_UNLOAD_OBJECT 6 -#define UTRACE_ADD_RUNDEP 7 -#define UTRACE_PRELOAD_FINISHED 8 -#define UTRACE_INIT_CALL 9 -#define UTRACE_FINI_CALL 10 -#define UTRACE_DLSYM_START 11 -#define UTRACE_DLSYM_STOP 12 - -struct utrace_rtld { - char sig[4]; /* 'RTLD' */ - int event; - void *handle; - void *mapbase; - size_t mapsize; - int refcnt; - char name[MAXPATHLEN]; -}; +#include "rtld_utrace.h" #ifdef __LP64__ struct utrace_rtld32 { - char sig[4]; /* 'RTLD' */ + char sig[4]; int event; uint32_t handle; uint32_t mapbase; @@ -189,10 +167,11 @@ sysdecode_utrace(FILE *fp, void *p, size_t len) struct utrace_malloc um; struct utrace_malloc32 *pm; #endif + static const char rtld_utrace_sig[RTLD_UTRACE_SIG_SZ] = RTLD_UTRACE_SIG; - if (len == sizeof(struct utrace_rtld) && bcmp(p, "RTLD", 4) == 0) { + if (len == sizeof(struct utrace_rtld) && bcmp(p, rtld_utrace_sig, + sizeof(rtld_utrace_sig)) == 0) return (print_utrace_rtld(fp, p)); - } if (len == sizeof(struct utrace_malloc)) { print_utrace_malloc(fp, p); @@ -200,7 +179,8 @@ sysdecode_utrace(FILE *fp, void *p, size_t len) } #ifdef __LP64__ - if (len == sizeof(struct utrace_rtld32) && bcmp(p, "RTLD", 4) == 0) { + if (len == sizeof(struct utrace_rtld32) && bcmp(p, rtld_utrace_sig, + sizeof(rtld_utrace_sig)) == 0) { pr = p; memset(&ur, 0, sizeof(ur)); memcpy(ur.sig, pr->sig, sizeof(ur.sig)); diff --git a/libexec/rtld-elf/rtld.c b/libexec/rtld-elf/rtld.c index d891d5247e0c..fe25e59ec65a 100644 --- a/libexec/rtld-elf/rtld.c +++ b/libexec/rtld-elf/rtld.c @@ -59,6 +59,7 @@ #include "paths.h" #include "rtld_tls.h" #include "rtld_printf.h" +#include "rtld_utrace.h" #include "notes.h" /* Types. */ @@ -273,29 +274,6 @@ char *ld_env_prefix = LD_; (dlp)->num_alloc = obj_count, \ (dlp)->num_used = 0) -#define UTRACE_DLOPEN_START 1 -#define UTRACE_DLOPEN_STOP 2 -#define UTRACE_DLCLOSE_START 3 -#define UTRACE_DLCLOSE_STOP 4 -#define UTRACE_LOAD_OBJECT 5 -#define UTRACE_UNLOAD_OBJECT 6 -#define UTRACE_ADD_RUNDEP 7 -#define UTRACE_PRELOAD_FINISHED 8 -#define UTRACE_INIT_CALL 9 -#define UTRACE_FINI_CALL 10 -#define UTRACE_DLSYM_START 11 -#define UTRACE_DLSYM_STOP 12 - -struct utrace_rtld { - char sig[4]; /* 'RTLD' */ - int event; - void *handle; - void *mapbase; /* Used for 'parent' and 'init/fini' */ - size_t mapsize; - int refcnt; /* Used for 'mode' */ - char name[MAXPATHLEN]; -}; - #define LD_UTRACE(e, h, mb, ms, r, n) do { \ if (ld_utrace != NULL) \ ld_utrace_log(e, h, mb, ms, r, n); \ @@ -306,11 +284,9 @@ ld_utrace_log(int event, void *handle, void *mapbase, size_t mapsize, int refcnt, const char *name) { struct utrace_rtld ut; + static const char rtld_utrace_sig[RTLD_UTRACE_SIG_SZ] = RTLD_UTRACE_SIG; - ut.sig[0] = 'R'; - ut.sig[1] = 'T'; - ut.sig[2] = 'L'; - ut.sig[3] = 'D'; + memcpy(ut.sig, rtld_utrace_sig, sizeof(ut.sig)); ut.event = event; ut.handle = handle; ut.mapbase = mapbase; diff --git a/libexec/rtld-elf/rtld_utrace.h b/libexec/rtld-elf/rtld_utrace.h new file mode 100644 index 000000000000..8ac8a02775fd --- /dev/null +++ b/libexec/rtld-elf/rtld_utrace.h @@ -0,0 +1,62 @@ +/*- + * Copyright (c) 2007 John Baldwin + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef RTLD_UTRACE_H +#define RTLD_UTRACE_H + +#include + +#define UTRACE_DLOPEN_START 1 +#define UTRACE_DLOPEN_STOP 2 +#define UTRACE_DLCLOSE_START 3 +#define UTRACE_DLCLOSE_STOP 4 +#define UTRACE_LOAD_OBJECT 5 +#define UTRACE_UNLOAD_OBJECT 6 +#define UTRACE_ADD_RUNDEP 7 +#define UTRACE_PRELOAD_FINISHED 8 +#define UTRACE_INIT_CALL 9 +#define UTRACE_FINI_CALL 10 +#define UTRACE_DLSYM_START 11 +#define UTRACE_DLSYM_STOP 12 + +#define RTLD_UTRACE_SIG_SZ 4 +#define RTLD_UTRACE_SIG "RTLD" + +struct utrace_rtld { + char sig[RTLD_UTRACE_SIG_SZ]; + int event; + void *handle; + void *mapbase; /* Used for 'parent' and 'init/fini' */ + size_t mapsize; + int refcnt; /* Used for 'mode' */ + char name[MAXPATHLEN]; +}; + +#endif From 0e4c843ab4b8a3df31f7400c504e1b52fa585c9a Mon Sep 17 00:00:00 2001 From: John Baldwin Date: Fri, 12 Aug 2016 19:47:20 +0000 Subject: [PATCH 69/76] Add defines needed to export SMBIOS serial numbers Some defines needed for exporting serial numbers from the SMBIOS were missed during integration of SMBIOS support in the EFI boot loader (r281138). This is needed for getting the hostid set from the system hardware UUID. PR: 206031 Submitted by: Thomas Eberhardt MFC after: 1 week --- sys/boot/efi/loader/Makefile | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/sys/boot/efi/loader/Makefile b/sys/boot/efi/loader/Makefile index 6b1b4acb342b..2f54032cd059 100644 --- a/sys/boot/efi/loader/Makefile +++ b/sys/boot/efi/loader/Makefile @@ -63,6 +63,18 @@ CFLAGS+= -DNO_PCI -DEFI LIBSTAND= ${.OBJDIR}/../../../../lib/libstand/libstand.a .endif +.if !defined(BOOT_HIDE_SERIAL_NUMBERS) +# Export serial numbers, UUID, and asset tag from loader. +CFLAGS+= -DSMBIOS_SERIAL_NUMBERS +.if defined(BOOT_LITTLE_ENDIAN_UUID) +# Use little-endian UUID format as defined in SMBIOS 2.6. +CFLAGS+= -DSMBIOS_LITTLE_ENDIAN_UUID +.elif defined(BOOT_NETWORK_ENDIAN_UUID) +# Use network-endian UUID format for backward compatibility. +CFLAGS+= -DSMBIOS_NETWORK_ENDIAN_UUID +.endif +.endif + .if ${MK_FORTH} != "no" BOOT_FORTH= yes CFLAGS+= -DBOOT_FORTH From 23ac9029f96b2b6a5b7ede0b00e8e6dbfe35c721 Mon Sep 17 00:00:00 2001 From: Stephen Hurd Date: Fri, 12 Aug 2016 21:29:44 +0000 Subject: [PATCH 70/76] Update iflib to support more NIC designs - Move group task queue into kern/subr_gtaskqueue.c - Change intr_enable to return an int so it can be detected if it's not implemented - Allow different TX/RX queues per set to be different sizes - Don't split up TX mbufs before transmit - Allow a completion queue for TX as well as RX - Pass the RX budget to isc_rxd_available() to allow an earlier return and avoid multiple calls Submitted by: shurd Reviewed by: gallatin Approved by: scottl Differential Revision: https://reviews.freebsd.org/D7393 --- sys/conf/files | 1 + sys/kern/subr_gtaskqueue.c | 864 +++++++++++++++++++++++++++++++++++++ sys/kern/subr_taskqueue.c | 360 ---------------- sys/net/ifdi_if.m | 5 +- sys/net/iflib.c | 539 ++++++++++++++++------- sys/net/iflib.h | 45 +- sys/sys/_task.h | 11 +- sys/sys/gtaskqueue.h | 125 ++++++ sys/sys/taskqueue.h | 74 ---- 9 files changed, 1409 insertions(+), 615 deletions(-) create mode 100644 sys/kern/subr_gtaskqueue.c create mode 100644 sys/sys/gtaskqueue.h diff --git a/sys/conf/files b/sys/conf/files index 993f0c3892ab..b75d3994a80d 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -3349,6 +3349,7 @@ kern/subr_disk.c standard kern/subr_eventhandler.c standard kern/subr_fattime.c standard kern/subr_firmware.c optional firmware +kern/subr_gtaskqueue.c standard kern/subr_hash.c standard kern/subr_hints.c standard kern/subr_kdb.c standard diff --git a/sys/kern/subr_gtaskqueue.c b/sys/kern/subr_gtaskqueue.c new file mode 100644 index 000000000000..2d655bde0b25 --- /dev/null +++ b/sys/kern/subr_gtaskqueue.c @@ -0,0 +1,864 @@ +/*- + * Copyright (c) 2000 Doug Rabson + * Copyright (c) 2014 Jeff Roberson + * Copyright (c) 2016 Matthew Macy + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static MALLOC_DEFINE(M_GTASKQUEUE, "taskqueue", "Task Queues"); +static void gtaskqueue_thread_enqueue(void *); +static void gtaskqueue_thread_loop(void *arg); + + +struct gtaskqueue_busy { + struct gtask *tb_running; + TAILQ_ENTRY(gtaskqueue_busy) tb_link; +}; + +static struct gtask * const TB_DRAIN_WAITER = (struct gtask *)0x1; + +struct gtaskqueue { + STAILQ_HEAD(, gtask) tq_queue; + gtaskqueue_enqueue_fn tq_enqueue; + void *tq_context; + char *tq_name; + TAILQ_HEAD(, gtaskqueue_busy) tq_active; + struct mtx tq_mutex; + struct thread **tq_threads; + int tq_tcount; + int tq_spin; + int tq_flags; + int tq_callouts; + taskqueue_callback_fn tq_callbacks[TASKQUEUE_NUM_CALLBACKS]; + void *tq_cb_contexts[TASKQUEUE_NUM_CALLBACKS]; +}; + +#define TQ_FLAGS_ACTIVE (1 << 0) +#define TQ_FLAGS_BLOCKED (1 << 1) +#define TQ_FLAGS_UNLOCKED_ENQUEUE (1 << 2) + +#define DT_CALLOUT_ARMED (1 << 0) + +#define TQ_LOCK(tq) \ + do { \ + if ((tq)->tq_spin) \ + mtx_lock_spin(&(tq)->tq_mutex); \ + else \ + mtx_lock(&(tq)->tq_mutex); \ + } while (0) +#define TQ_ASSERT_LOCKED(tq) mtx_assert(&(tq)->tq_mutex, MA_OWNED) + +#define TQ_UNLOCK(tq) \ + do { \ + if ((tq)->tq_spin) \ + mtx_unlock_spin(&(tq)->tq_mutex); \ + else \ + mtx_unlock(&(tq)->tq_mutex); \ + } while (0) +#define TQ_ASSERT_UNLOCKED(tq) mtx_assert(&(tq)->tq_mutex, MA_NOTOWNED) + +static __inline int +TQ_SLEEP(struct gtaskqueue *tq, void *p, struct mtx *m, int pri, const char *wm, + int t) +{ + if (tq->tq_spin) + return (msleep_spin(p, m, wm, t)); + return (msleep(p, m, pri, wm, t)); +} + +static struct gtaskqueue * +_gtaskqueue_create(const char *name, int mflags, + taskqueue_enqueue_fn enqueue, void *context, + int mtxflags, const char *mtxname __unused) +{ + struct gtaskqueue *queue; + char *tq_name; + + tq_name = malloc(TASKQUEUE_NAMELEN, M_GTASKQUEUE, mflags | M_ZERO); + if (!tq_name) + return (NULL); + + snprintf(tq_name, TASKQUEUE_NAMELEN, "%s", (name) ? name : "taskqueue"); + + queue = malloc(sizeof(struct gtaskqueue), M_GTASKQUEUE, mflags | M_ZERO); + if (!queue) + return (NULL); + + STAILQ_INIT(&queue->tq_queue); + TAILQ_INIT(&queue->tq_active); + queue->tq_enqueue = enqueue; + queue->tq_context = context; + queue->tq_name = tq_name; + queue->tq_spin = (mtxflags & MTX_SPIN) != 0; + queue->tq_flags |= TQ_FLAGS_ACTIVE; + if (enqueue == gtaskqueue_thread_enqueue) + queue->tq_flags |= TQ_FLAGS_UNLOCKED_ENQUEUE; + mtx_init(&queue->tq_mutex, tq_name, NULL, mtxflags); + + return (queue); +} + + +/* + * Signal a taskqueue thread to terminate. + */ +static void +gtaskqueue_terminate(struct thread **pp, struct gtaskqueue *tq) +{ + + while (tq->tq_tcount > 0 || tq->tq_callouts > 0) { + wakeup(tq); + TQ_SLEEP(tq, pp, &tq->tq_mutex, PWAIT, "taskqueue_destroy", 0); + } +} + +static void +gtaskqueue_free(struct gtaskqueue *queue) +{ + + TQ_LOCK(queue); + queue->tq_flags &= ~TQ_FLAGS_ACTIVE; + gtaskqueue_terminate(queue->tq_threads, queue); + KASSERT(TAILQ_EMPTY(&queue->tq_active), ("Tasks still running?")); + KASSERT(queue->tq_callouts == 0, ("Armed timeout tasks")); + mtx_destroy(&queue->tq_mutex); + free(queue->tq_threads, M_GTASKQUEUE); + free(queue->tq_name, M_GTASKQUEUE); + free(queue, M_GTASKQUEUE); +} + +int +grouptaskqueue_enqueue(struct gtaskqueue *queue, struct gtask *gtask) +{ + TQ_LOCK(queue); + if (gtask->ta_flags & TASK_ENQUEUED) { + TQ_UNLOCK(queue); + return (0); + } + STAILQ_INSERT_TAIL(&queue->tq_queue, gtask, ta_link); + gtask->ta_flags |= TASK_ENQUEUED; + TQ_UNLOCK(queue); + if ((queue->tq_flags & TQ_FLAGS_BLOCKED) == 0) + queue->tq_enqueue(queue->tq_context); + return (0); +} + +static void +gtaskqueue_task_nop_fn(void *context) +{ +} + +/* + * Block until all currently queued tasks in this taskqueue + * have begun execution. Tasks queued during execution of + * this function are ignored. + */ +static void +gtaskqueue_drain_tq_queue(struct gtaskqueue *queue) +{ + struct gtask t_barrier; + + if (STAILQ_EMPTY(&queue->tq_queue)) + return; + + /* + * Enqueue our barrier after all current tasks, but with + * the highest priority so that newly queued tasks cannot + * pass it. Because of the high priority, we can not use + * taskqueue_enqueue_locked directly (which drops the lock + * anyway) so just insert it at tail while we have the + * queue lock. + */ + GTASK_INIT(&t_barrier, 0, USHRT_MAX, gtaskqueue_task_nop_fn, &t_barrier); + STAILQ_INSERT_TAIL(&queue->tq_queue, &t_barrier, ta_link); + t_barrier.ta_flags |= TASK_ENQUEUED; + + /* + * Once the barrier has executed, all previously queued tasks + * have completed or are currently executing. + */ + while (t_barrier.ta_flags & TASK_ENQUEUED) + TQ_SLEEP(queue, &t_barrier, &queue->tq_mutex, PWAIT, "-", 0); +} + +/* + * Block until all currently executing tasks for this taskqueue + * complete. Tasks that begin execution during the execution + * of this function are ignored. + */ +static void +gtaskqueue_drain_tq_active(struct gtaskqueue *queue) +{ + struct gtaskqueue_busy tb_marker, *tb_first; + + if (TAILQ_EMPTY(&queue->tq_active)) + return; + + /* Block taskq_terminate().*/ + queue->tq_callouts++; + + /* + * Wait for all currently executing taskqueue threads + * to go idle. + */ + tb_marker.tb_running = TB_DRAIN_WAITER; + TAILQ_INSERT_TAIL(&queue->tq_active, &tb_marker, tb_link); + while (TAILQ_FIRST(&queue->tq_active) != &tb_marker) + TQ_SLEEP(queue, &tb_marker, &queue->tq_mutex, PWAIT, "-", 0); + TAILQ_REMOVE(&queue->tq_active, &tb_marker, tb_link); + + /* + * Wakeup any other drain waiter that happened to queue up + * without any intervening active thread. + */ + tb_first = TAILQ_FIRST(&queue->tq_active); + if (tb_first != NULL && tb_first->tb_running == TB_DRAIN_WAITER) + wakeup(tb_first); + + /* Release taskqueue_terminate(). */ + queue->tq_callouts--; + if ((queue->tq_flags & TQ_FLAGS_ACTIVE) == 0) + wakeup_one(queue->tq_threads); +} + +void +gtaskqueue_block(struct gtaskqueue *queue) +{ + + TQ_LOCK(queue); + queue->tq_flags |= TQ_FLAGS_BLOCKED; + TQ_UNLOCK(queue); +} + +void +gtaskqueue_unblock(struct gtaskqueue *queue) +{ + + TQ_LOCK(queue); + queue->tq_flags &= ~TQ_FLAGS_BLOCKED; + if (!STAILQ_EMPTY(&queue->tq_queue)) + queue->tq_enqueue(queue->tq_context); + TQ_UNLOCK(queue); +} + +static void +gtaskqueue_run_locked(struct gtaskqueue *queue) +{ + struct gtaskqueue_busy tb; + struct gtaskqueue_busy *tb_first; + struct gtask *gtask; + + KASSERT(queue != NULL, ("tq is NULL")); + TQ_ASSERT_LOCKED(queue); + tb.tb_running = NULL; + + while (STAILQ_FIRST(&queue->tq_queue)) { + TAILQ_INSERT_TAIL(&queue->tq_active, &tb, tb_link); + + /* + * Carefully remove the first task from the queue and + * clear its TASK_ENQUEUED flag + */ + gtask = STAILQ_FIRST(&queue->tq_queue); + KASSERT(gtask != NULL, ("task is NULL")); + STAILQ_REMOVE_HEAD(&queue->tq_queue, ta_link); + gtask->ta_flags &= ~TASK_ENQUEUED; + tb.tb_running = gtask; + TQ_UNLOCK(queue); + + KASSERT(gtask->ta_func != NULL, ("task->ta_func is NULL")); + gtask->ta_func(gtask->ta_context); + + TQ_LOCK(queue); + tb.tb_running = NULL; + wakeup(gtask); + + TAILQ_REMOVE(&queue->tq_active, &tb, tb_link); + tb_first = TAILQ_FIRST(&queue->tq_active); + if (tb_first != NULL && + tb_first->tb_running == TB_DRAIN_WAITER) + wakeup(tb_first); + } +} + +static int +task_is_running(struct gtaskqueue *queue, struct gtask *gtask) +{ + struct gtaskqueue_busy *tb; + + TQ_ASSERT_LOCKED(queue); + TAILQ_FOREACH(tb, &queue->tq_active, tb_link) { + if (tb->tb_running == gtask) + return (1); + } + return (0); +} + +static int +gtaskqueue_cancel_locked(struct gtaskqueue *queue, struct gtask *gtask) +{ + + if (gtask->ta_flags & TASK_ENQUEUED) + STAILQ_REMOVE(&queue->tq_queue, gtask, gtask, ta_link); + gtask->ta_flags &= ~TASK_ENQUEUED; + return (task_is_running(queue, gtask) ? EBUSY : 0); +} + +int +gtaskqueue_cancel(struct gtaskqueue *queue, struct gtask *gtask) +{ + int error; + + TQ_LOCK(queue); + error = gtaskqueue_cancel_locked(queue, gtask); + TQ_UNLOCK(queue); + + return (error); +} + +void +gtaskqueue_drain(struct gtaskqueue *queue, struct gtask *gtask) +{ + + if (!queue->tq_spin) + WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__); + + TQ_LOCK(queue); + while ((gtask->ta_flags & TASK_ENQUEUED) || task_is_running(queue, gtask)) + TQ_SLEEP(queue, gtask, &queue->tq_mutex, PWAIT, "-", 0); + TQ_UNLOCK(queue); +} + +void +gtaskqueue_drain_all(struct gtaskqueue *queue) +{ + + if (!queue->tq_spin) + WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__); + + TQ_LOCK(queue); + gtaskqueue_drain_tq_queue(queue); + gtaskqueue_drain_tq_active(queue); + TQ_UNLOCK(queue); +} + +static int +_gtaskqueue_start_threads(struct gtaskqueue **tqp, int count, int pri, + cpuset_t *mask, const char *name, va_list ap) +{ + char ktname[MAXCOMLEN + 1]; + struct thread *td; + struct gtaskqueue *tq; + int i, error; + + if (count <= 0) + return (EINVAL); + + vsnprintf(ktname, sizeof(ktname), name, ap); + tq = *tqp; + + tq->tq_threads = malloc(sizeof(struct thread *) * count, M_GTASKQUEUE, + M_NOWAIT | M_ZERO); + if (tq->tq_threads == NULL) { + printf("%s: no memory for %s threads\n", __func__, ktname); + return (ENOMEM); + } + + for (i = 0; i < count; i++) { + if (count == 1) + error = kthread_add(gtaskqueue_thread_loop, tqp, NULL, + &tq->tq_threads[i], RFSTOPPED, 0, "%s", ktname); + else + error = kthread_add(gtaskqueue_thread_loop, tqp, NULL, + &tq->tq_threads[i], RFSTOPPED, 0, + "%s_%d", ktname, i); + if (error) { + /* should be ok to continue, taskqueue_free will dtrt */ + printf("%s: kthread_add(%s): error %d", __func__, + ktname, error); + tq->tq_threads[i] = NULL; /* paranoid */ + } else + tq->tq_tcount++; + } + for (i = 0; i < count; i++) { + if (tq->tq_threads[i] == NULL) + continue; + td = tq->tq_threads[i]; + if (mask) { + error = cpuset_setthread(td->td_tid, mask); + /* + * Failing to pin is rarely an actual fatal error; + * it'll just affect performance. + */ + if (error) + printf("%s: curthread=%llu: can't pin; " + "error=%d\n", + __func__, + (unsigned long long) td->td_tid, + error); + } + thread_lock(td); + sched_prio(td, pri); + sched_add(td, SRQ_BORING); + thread_unlock(td); + } + + return (0); +} + +static int +gtaskqueue_start_threads(struct gtaskqueue **tqp, int count, int pri, + const char *name, ...) +{ + va_list ap; + int error; + + va_start(ap, name); + error = _gtaskqueue_start_threads(tqp, count, pri, NULL, name, ap); + va_end(ap); + return (error); +} + +static inline void +gtaskqueue_run_callback(struct gtaskqueue *tq, + enum taskqueue_callback_type cb_type) +{ + taskqueue_callback_fn tq_callback; + + TQ_ASSERT_UNLOCKED(tq); + tq_callback = tq->tq_callbacks[cb_type]; + if (tq_callback != NULL) + tq_callback(tq->tq_cb_contexts[cb_type]); +} + +static void +gtaskqueue_thread_loop(void *arg) +{ + struct gtaskqueue **tqp, *tq; + + tqp = arg; + tq = *tqp; + gtaskqueue_run_callback(tq, TASKQUEUE_CALLBACK_TYPE_INIT); + TQ_LOCK(tq); + while ((tq->tq_flags & TQ_FLAGS_ACTIVE) != 0) { + /* XXX ? */ + gtaskqueue_run_locked(tq); + /* + * Because taskqueue_run() can drop tq_mutex, we need to + * check if the TQ_FLAGS_ACTIVE flag wasn't removed in the + * meantime, which means we missed a wakeup. + */ + if ((tq->tq_flags & TQ_FLAGS_ACTIVE) == 0) + break; + TQ_SLEEP(tq, tq, &tq->tq_mutex, 0, "-", 0); + } + gtaskqueue_run_locked(tq); + /* + * This thread is on its way out, so just drop the lock temporarily + * in order to call the shutdown callback. This allows the callback + * to look at the taskqueue, even just before it dies. + */ + TQ_UNLOCK(tq); + gtaskqueue_run_callback(tq, TASKQUEUE_CALLBACK_TYPE_SHUTDOWN); + TQ_LOCK(tq); + + /* rendezvous with thread that asked us to terminate */ + tq->tq_tcount--; + wakeup_one(tq->tq_threads); + TQ_UNLOCK(tq); + kthread_exit(); +} + +static void +gtaskqueue_thread_enqueue(void *context) +{ + struct gtaskqueue **tqp, *tq; + + tqp = context; + tq = *tqp; + wakeup_one(tq); +} + + +static struct gtaskqueue * +gtaskqueue_create_fast(const char *name, int mflags, + taskqueue_enqueue_fn enqueue, void *context) +{ + return _gtaskqueue_create(name, mflags, enqueue, context, + MTX_SPIN, "fast_taskqueue"); +} + + +struct taskqgroup_cpu { + LIST_HEAD(, grouptask) tgc_tasks; + struct gtaskqueue *tgc_taskq; + int tgc_cnt; + int tgc_cpu; +}; + +struct taskqgroup { + struct taskqgroup_cpu tqg_queue[MAXCPU]; + struct mtx tqg_lock; + char * tqg_name; + int tqg_adjusting; + int tqg_stride; + int tqg_cnt; +}; + +struct taskq_bind_task { + struct gtask bt_task; + int bt_cpuid; +}; + +static void +taskqgroup_cpu_create(struct taskqgroup *qgroup, int idx) +{ + struct taskqgroup_cpu *qcpu; + + qcpu = &qgroup->tqg_queue[idx]; + LIST_INIT(&qcpu->tgc_tasks); + qcpu->tgc_taskq = gtaskqueue_create_fast(NULL, M_WAITOK, + taskqueue_thread_enqueue, &qcpu->tgc_taskq); + gtaskqueue_start_threads(&qcpu->tgc_taskq, 1, PI_SOFT, + "%s_%d", qgroup->tqg_name, idx); + qcpu->tgc_cpu = idx * qgroup->tqg_stride; +} + +static void +taskqgroup_cpu_remove(struct taskqgroup *qgroup, int idx) +{ + + gtaskqueue_free(qgroup->tqg_queue[idx].tgc_taskq); +} + +/* + * Find the taskq with least # of tasks that doesn't currently have any + * other queues from the uniq identifier. + */ +static int +taskqgroup_find(struct taskqgroup *qgroup, void *uniq) +{ + struct grouptask *n; + int i, idx, mincnt; + int strict; + + mtx_assert(&qgroup->tqg_lock, MA_OWNED); + if (qgroup->tqg_cnt == 0) + return (0); + idx = -1; + mincnt = INT_MAX; + /* + * Two passes; First scan for a queue with the least tasks that + * does not already service this uniq id. If that fails simply find + * the queue with the least total tasks; + */ + for (strict = 1; mincnt == INT_MAX; strict = 0) { + for (i = 0; i < qgroup->tqg_cnt; i++) { + if (qgroup->tqg_queue[i].tgc_cnt > mincnt) + continue; + if (strict) { + LIST_FOREACH(n, + &qgroup->tqg_queue[i].tgc_tasks, gt_list) + if (n->gt_uniq == uniq) + break; + if (n != NULL) + continue; + } + mincnt = qgroup->tqg_queue[i].tgc_cnt; + idx = i; + } + } + if (idx == -1) + panic("taskqgroup_find: Failed to pick a qid."); + + return (idx); +} + +void +taskqgroup_attach(struct taskqgroup *qgroup, struct grouptask *gtask, + void *uniq, int irq, char *name) +{ + cpuset_t mask; + int qid; + + gtask->gt_uniq = uniq; + gtask->gt_name = name; + gtask->gt_irq = irq; + gtask->gt_cpu = -1; + mtx_lock(&qgroup->tqg_lock); + qid = taskqgroup_find(qgroup, uniq); + qgroup->tqg_queue[qid].tgc_cnt++; + LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list); + gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq; + if (irq != -1 && smp_started) { + CPU_ZERO(&mask); + CPU_SET(qgroup->tqg_queue[qid].tgc_cpu, &mask); + mtx_unlock(&qgroup->tqg_lock); + intr_setaffinity(irq, &mask); + } else + mtx_unlock(&qgroup->tqg_lock); +} + +int +taskqgroup_attach_cpu(struct taskqgroup *qgroup, struct grouptask *gtask, + void *uniq, int cpu, int irq, char *name) +{ + cpuset_t mask; + int i, qid; + + qid = -1; + gtask->gt_uniq = uniq; + gtask->gt_name = name; + gtask->gt_irq = irq; + gtask->gt_cpu = cpu; + mtx_lock(&qgroup->tqg_lock); + if (smp_started) { + for (i = 0; i < qgroup->tqg_cnt; i++) + if (qgroup->tqg_queue[i].tgc_cpu == cpu) { + qid = i; + break; + } + if (qid == -1) { + mtx_unlock(&qgroup->tqg_lock); + return (EINVAL); + } + } else + qid = 0; + qgroup->tqg_queue[qid].tgc_cnt++; + LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list); + gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq; + if (irq != -1 && smp_started) { + CPU_ZERO(&mask); + CPU_SET(qgroup->tqg_queue[qid].tgc_cpu, &mask); + mtx_unlock(&qgroup->tqg_lock); + intr_setaffinity(irq, &mask); + } else + mtx_unlock(&qgroup->tqg_lock); + return (0); +} + +void +taskqgroup_detach(struct taskqgroup *qgroup, struct grouptask *gtask) +{ + int i; + + mtx_lock(&qgroup->tqg_lock); + for (i = 0; i < qgroup->tqg_cnt; i++) + if (qgroup->tqg_queue[i].tgc_taskq == gtask->gt_taskqueue) + break; + if (i == qgroup->tqg_cnt) + panic("taskqgroup_detach: task not in group\n"); + qgroup->tqg_queue[i].tgc_cnt--; + LIST_REMOVE(gtask, gt_list); + mtx_unlock(&qgroup->tqg_lock); + gtask->gt_taskqueue = NULL; +} + +static void +taskqgroup_binder(void *ctx) +{ + struct taskq_bind_task *gtask = (struct taskq_bind_task *)ctx; + cpuset_t mask; + int error; + + CPU_ZERO(&mask); + CPU_SET(gtask->bt_cpuid, &mask); + error = cpuset_setthread(curthread->td_tid, &mask); + thread_lock(curthread); + sched_bind(curthread, gtask->bt_cpuid); + thread_unlock(curthread); + + if (error) + printf("taskqgroup_binder: setaffinity failed: %d\n", + error); + free(gtask, M_DEVBUF); +} + +static void +taskqgroup_bind(struct taskqgroup *qgroup) +{ + struct taskq_bind_task *gtask; + int i; + + /* + * Bind taskqueue threads to specific CPUs, if they have been assigned + * one. + */ + for (i = 0; i < qgroup->tqg_cnt; i++) { + gtask = malloc(sizeof (*gtask), M_DEVBUF, M_NOWAIT); + GTASK_INIT(>ask->bt_task, 0, 0, taskqgroup_binder, gtask); + gtask->bt_cpuid = qgroup->tqg_queue[i].tgc_cpu; + grouptaskqueue_enqueue(qgroup->tqg_queue[i].tgc_taskq, + >ask->bt_task); + } +} + +static int +_taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride) +{ + LIST_HEAD(, grouptask) gtask_head = LIST_HEAD_INITIALIZER(NULL); + cpuset_t mask; + struct grouptask *gtask; + int i, old_cnt, qid; + + mtx_assert(&qgroup->tqg_lock, MA_OWNED); + + if (cnt < 1 || cnt * stride > mp_ncpus || !smp_started) { + printf("taskqgroup_adjust failed cnt: %d stride: %d mp_ncpus: %d smp_started: %d\n", + cnt, stride, mp_ncpus, smp_started); + return (EINVAL); + } + if (qgroup->tqg_adjusting) { + printf("taskqgroup_adjust failed: adjusting\n"); + return (EBUSY); + } + qgroup->tqg_adjusting = 1; + old_cnt = qgroup->tqg_cnt; + mtx_unlock(&qgroup->tqg_lock); + /* + * Set up queue for tasks added before boot. + */ + if (old_cnt == 0) { + LIST_SWAP(>ask_head, &qgroup->tqg_queue[0].tgc_tasks, + grouptask, gt_list); + qgroup->tqg_queue[0].tgc_cnt = 0; + } + + /* + * If new taskq threads have been added. + */ + for (i = old_cnt; i < cnt; i++) + taskqgroup_cpu_create(qgroup, i); + mtx_lock(&qgroup->tqg_lock); + qgroup->tqg_cnt = cnt; + qgroup->tqg_stride = stride; + + /* + * Adjust drivers to use new taskqs. + */ + for (i = 0; i < old_cnt; i++) { + while ((gtask = LIST_FIRST(&qgroup->tqg_queue[i].tgc_tasks))) { + LIST_REMOVE(gtask, gt_list); + qgroup->tqg_queue[i].tgc_cnt--; + LIST_INSERT_HEAD(>ask_head, gtask, gt_list); + } + } + + while ((gtask = LIST_FIRST(>ask_head))) { + LIST_REMOVE(gtask, gt_list); + if (gtask->gt_cpu == -1) + qid = taskqgroup_find(qgroup, gtask->gt_uniq); + else { + for (i = 0; i < qgroup->tqg_cnt; i++) + if (qgroup->tqg_queue[i].tgc_cpu == gtask->gt_cpu) { + qid = i; + break; + } + } + qgroup->tqg_queue[qid].tgc_cnt++; + LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, + gt_list); + gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq; + } + /* + * Set new CPU and IRQ affinity + */ + for (i = 0; i < cnt; i++) { + qgroup->tqg_queue[i].tgc_cpu = i * qgroup->tqg_stride; + CPU_ZERO(&mask); + CPU_SET(qgroup->tqg_queue[i].tgc_cpu, &mask); + LIST_FOREACH(gtask, &qgroup->tqg_queue[i].tgc_tasks, gt_list) { + if (gtask->gt_irq == -1) + continue; + intr_setaffinity(gtask->gt_irq, &mask); + } + } + mtx_unlock(&qgroup->tqg_lock); + + /* + * If taskq thread count has been reduced. + */ + for (i = cnt; i < old_cnt; i++) + taskqgroup_cpu_remove(qgroup, i); + + mtx_lock(&qgroup->tqg_lock); + qgroup->tqg_adjusting = 0; + + taskqgroup_bind(qgroup); + + return (0); +} + +int +taskqgroup_adjust(struct taskqgroup *qgroup, int cpu, int stride) +{ + int error; + + mtx_lock(&qgroup->tqg_lock); + error = _taskqgroup_adjust(qgroup, cpu, stride); + mtx_unlock(&qgroup->tqg_lock); + + return (error); +} + +struct taskqgroup * +taskqgroup_create(char *name) +{ + struct taskqgroup *qgroup; + + qgroup = malloc(sizeof(*qgroup), M_GTASKQUEUE, M_WAITOK | M_ZERO); + mtx_init(&qgroup->tqg_lock, "taskqgroup", NULL, MTX_DEF); + qgroup->tqg_name = name; + LIST_INIT(&qgroup->tqg_queue[0].tgc_tasks); + + return (qgroup); +} + +void +taskqgroup_destroy(struct taskqgroup *qgroup) +{ + +} diff --git a/sys/kern/subr_taskqueue.c b/sys/kern/subr_taskqueue.c index 12124b8aa005..5a20148f573c 100644 --- a/sys/kern/subr_taskqueue.c +++ b/sys/kern/subr_taskqueue.c @@ -260,22 +260,6 @@ taskqueue_enqueue_locked(struct taskqueue *queue, struct task *task) return (0); } -int -grouptaskqueue_enqueue(struct taskqueue *queue, struct task *task) -{ - TQ_LOCK(queue); - if (task->ta_pending) { - TQ_UNLOCK(queue); - return (0); - } - STAILQ_INSERT_TAIL(&queue->tq_queue, task, ta_link); - task->ta_pending = 1; - TQ_UNLOCK(queue); - if ((queue->tq_flags & TQ_FLAGS_BLOCKED) == 0) - queue->tq_enqueue(queue->tq_context); - return (0); -} - int taskqueue_enqueue(struct taskqueue *queue, struct task *task) { @@ -806,347 +790,3 @@ taskqueue_member(struct taskqueue *queue, struct thread *td) } return (ret); } - -struct taskqgroup_cpu { - LIST_HEAD(, grouptask) tgc_tasks; - struct taskqueue *tgc_taskq; - int tgc_cnt; - int tgc_cpu; -}; - -struct taskqgroup { - struct taskqgroup_cpu tqg_queue[MAXCPU]; - struct mtx tqg_lock; - char * tqg_name; - int tqg_adjusting; - int tqg_stride; - int tqg_cnt; -}; - -struct taskq_bind_task { - struct task bt_task; - int bt_cpuid; -}; - -static void -taskqgroup_cpu_create(struct taskqgroup *qgroup, int idx) -{ - struct taskqgroup_cpu *qcpu; - int i, j; - - qcpu = &qgroup->tqg_queue[idx]; - LIST_INIT(&qcpu->tgc_tasks); - qcpu->tgc_taskq = taskqueue_create_fast(NULL, M_WAITOK, - taskqueue_thread_enqueue, &qcpu->tgc_taskq); - taskqueue_start_threads(&qcpu->tgc_taskq, 1, PI_SOFT, - "%s_%d", qgroup->tqg_name, idx); - - for (i = CPU_FIRST(), j = 0; j < idx * qgroup->tqg_stride; - j++, i = CPU_NEXT(i)) { - /* - * Wait: evaluate the idx * qgroup->tqg_stride'th CPU, - * potentially wrapping the actual count - */ - } - qcpu->tgc_cpu = i; -} - -static void -taskqgroup_cpu_remove(struct taskqgroup *qgroup, int idx) -{ - - taskqueue_free(qgroup->tqg_queue[idx].tgc_taskq); -} - -/* - * Find the taskq with least # of tasks that doesn't currently have any - * other queues from the uniq identifier. - */ -static int -taskqgroup_find(struct taskqgroup *qgroup, void *uniq) -{ - struct grouptask *n; - int i, idx, mincnt; - int strict; - - mtx_assert(&qgroup->tqg_lock, MA_OWNED); - if (qgroup->tqg_cnt == 0) - return (0); - idx = -1; - mincnt = INT_MAX; - /* - * Two passes; First scan for a queue with the least tasks that - * does not already service this uniq id. If that fails simply find - * the queue with the least total tasks; - */ - for (strict = 1; mincnt == INT_MAX; strict = 0) { - for (i = 0; i < qgroup->tqg_cnt; i++) { - if (qgroup->tqg_queue[i].tgc_cnt > mincnt) - continue; - if (strict) { - LIST_FOREACH(n, - &qgroup->tqg_queue[i].tgc_tasks, gt_list) - if (n->gt_uniq == uniq) - break; - if (n != NULL) - continue; - } - mincnt = qgroup->tqg_queue[i].tgc_cnt; - idx = i; - } - } - if (idx == -1) - panic("taskqgroup_find: Failed to pick a qid."); - - return (idx); -} - -void -taskqgroup_attach(struct taskqgroup *qgroup, struct grouptask *gtask, - void *uniq, int irq, char *name) -{ - cpuset_t mask; - int qid; - - gtask->gt_uniq = uniq; - gtask->gt_name = name; - gtask->gt_irq = irq; - gtask->gt_cpu = -1; - mtx_lock(&qgroup->tqg_lock); - qid = taskqgroup_find(qgroup, uniq); - qgroup->tqg_queue[qid].tgc_cnt++; - LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list); - gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq; - if (irq != -1 && smp_started) { - CPU_ZERO(&mask); - CPU_SET(qgroup->tqg_queue[qid].tgc_cpu, &mask); - mtx_unlock(&qgroup->tqg_lock); - intr_setaffinity(irq, &mask); - } else - mtx_unlock(&qgroup->tqg_lock); -} - -int -taskqgroup_attach_cpu(struct taskqgroup *qgroup, struct grouptask *gtask, - void *uniq, int cpu, int irq, char *name) -{ - cpuset_t mask; - int i, qid; - - qid = -1; - gtask->gt_uniq = uniq; - gtask->gt_name = name; - gtask->gt_irq = irq; - gtask->gt_cpu = cpu; - mtx_lock(&qgroup->tqg_lock); - if (smp_started) { - for (i = 0; i < qgroup->tqg_cnt; i++) - if (qgroup->tqg_queue[i].tgc_cpu == cpu) { - qid = i; - break; - } - if (qid == -1) { - mtx_unlock(&qgroup->tqg_lock); - return (EINVAL); - } - } else - qid = 0; - qgroup->tqg_queue[qid].tgc_cnt++; - LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list); - gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq; - if (irq != -1 && smp_started) { - CPU_ZERO(&mask); - CPU_SET(qgroup->tqg_queue[qid].tgc_cpu, &mask); - mtx_unlock(&qgroup->tqg_lock); - intr_setaffinity(irq, &mask); - } else - mtx_unlock(&qgroup->tqg_lock); - return (0); -} - -void -taskqgroup_detach(struct taskqgroup *qgroup, struct grouptask *gtask) -{ - int i; - - mtx_lock(&qgroup->tqg_lock); - for (i = 0; i < qgroup->tqg_cnt; i++) - if (qgroup->tqg_queue[i].tgc_taskq == gtask->gt_taskqueue) - break; - if (i == qgroup->tqg_cnt) - panic("taskqgroup_detach: task not in group\n"); - qgroup->tqg_queue[i].tgc_cnt--; - LIST_REMOVE(gtask, gt_list); - mtx_unlock(&qgroup->tqg_lock); - gtask->gt_taskqueue = NULL; -} - -static void -taskqgroup_binder(void *ctx, int pending) -{ - struct taskq_bind_task *task = (struct taskq_bind_task *)ctx; - cpuset_t mask; - int error; - - CPU_ZERO(&mask); - CPU_SET(task->bt_cpuid, &mask); - error = cpuset_setthread(curthread->td_tid, &mask); - thread_lock(curthread); - sched_bind(curthread, task->bt_cpuid); - thread_unlock(curthread); - - if (error) - printf("taskqgroup_binder: setaffinity failed: %d\n", - error); - free(task, M_DEVBUF); -} - -static void -taskqgroup_bind(struct taskqgroup *qgroup) -{ - struct taskq_bind_task *task; - int i; - - /* - * Bind taskqueue threads to specific CPUs, if they have been assigned - * one. - */ - for (i = 0; i < qgroup->tqg_cnt; i++) { - task = malloc(sizeof (*task), M_DEVBUF, M_NOWAIT); - TASK_INIT(&task->bt_task, 0, taskqgroup_binder, task); - task->bt_cpuid = qgroup->tqg_queue[i].tgc_cpu; - taskqueue_enqueue(qgroup->tqg_queue[i].tgc_taskq, - &task->bt_task); - } -} - -static int -_taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride) -{ - LIST_HEAD(, grouptask) gtask_head = LIST_HEAD_INITIALIZER(NULL); - cpuset_t mask; - struct grouptask *gtask; - int i, k, old_cnt, qid, cpu; - - mtx_assert(&qgroup->tqg_lock, MA_OWNED); - - if (cnt < 1 || cnt * stride > mp_ncpus || !smp_started) { - printf("taskqgroup_adjust failed cnt: %d stride: %d " - "mp_ncpus: %d smp_started: %d\n", cnt, stride, mp_ncpus, - smp_started); - return (EINVAL); - } - if (qgroup->tqg_adjusting) { - printf("taskqgroup_adjust failed: adjusting\n"); - return (EBUSY); - } - qgroup->tqg_adjusting = 1; - old_cnt = qgroup->tqg_cnt; - mtx_unlock(&qgroup->tqg_lock); - /* - * Set up queue for tasks added before boot. - */ - if (old_cnt == 0) { - LIST_SWAP(>ask_head, &qgroup->tqg_queue[0].tgc_tasks, - grouptask, gt_list); - qgroup->tqg_queue[0].tgc_cnt = 0; - } - - /* - * If new taskq threads have been added. - */ - for (i = old_cnt; i < cnt; i++) - taskqgroup_cpu_create(qgroup, i); - mtx_lock(&qgroup->tqg_lock); - qgroup->tqg_cnt = cnt; - qgroup->tqg_stride = stride; - - /* - * Adjust drivers to use new taskqs. - */ - for (i = 0; i < old_cnt; i++) { - while ((gtask = LIST_FIRST(&qgroup->tqg_queue[i].tgc_tasks))) { - LIST_REMOVE(gtask, gt_list); - qgroup->tqg_queue[i].tgc_cnt--; - LIST_INSERT_HEAD(>ask_head, gtask, gt_list); - } - } - - while ((gtask = LIST_FIRST(>ask_head))) { - LIST_REMOVE(gtask, gt_list); - if (gtask->gt_cpu == -1) - qid = taskqgroup_find(qgroup, gtask->gt_uniq); - else { - for (i = 0; i < qgroup->tqg_cnt; i++) - if (qgroup->tqg_queue[i].tgc_cpu == gtask->gt_cpu) { - qid = i; - break; - } - } - qgroup->tqg_queue[qid].tgc_cnt++; - LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, - gt_list); - gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq; - } - /* - * Set new CPU and IRQ affinity - */ - cpu = CPU_FIRST(); - for (i = 0; i < cnt; i++) { - qgroup->tqg_queue[i].tgc_cpu = cpu; - for (k = 0; k < qgroup->tqg_stride; k++) - cpu = CPU_NEXT(cpu); - CPU_ZERO(&mask); - CPU_SET(qgroup->tqg_queue[i].tgc_cpu, &mask); - LIST_FOREACH(gtask, &qgroup->tqg_queue[i].tgc_tasks, gt_list) { - if (gtask->gt_irq == -1) - continue; - intr_setaffinity(gtask->gt_irq, &mask); - } - } - mtx_unlock(&qgroup->tqg_lock); - - /* - * If taskq thread count has been reduced. - */ - for (i = cnt; i < old_cnt; i++) - taskqgroup_cpu_remove(qgroup, i); - - mtx_lock(&qgroup->tqg_lock); - qgroup->tqg_adjusting = 0; - - taskqgroup_bind(qgroup); - - return (0); -} - -int -taskqgroup_adjust(struct taskqgroup *qgroup, int cpu, int stride) -{ - int error; - - mtx_lock(&qgroup->tqg_lock); - error = _taskqgroup_adjust(qgroup, cpu, stride); - mtx_unlock(&qgroup->tqg_lock); - - return (error); -} - -struct taskqgroup * -taskqgroup_create(char *name) -{ - struct taskqgroup *qgroup; - - qgroup = malloc(sizeof(*qgroup), M_TASKQUEUE, M_WAITOK | M_ZERO); - mtx_init(&qgroup->tqg_lock, "taskqgroup", NULL, MTX_DEF); - qgroup->tqg_name = name; - LIST_INIT(&qgroup->tqg_queue[0].tgc_tasks); - - return (qgroup); -} - -void -taskqgroup_destroy(struct taskqgroup *qgroup) -{ - -} diff --git a/sys/net/ifdi_if.m b/sys/net/ifdi_if.m index 60629e46b6cd..301a6ee88f8e 100644 --- a/sys/net/ifdi_if.m +++ b/sys/net/ifdi_if.m @@ -60,9 +60,10 @@ CODE { return (0); } - static void + static int null_queue_intr_enable(if_ctx_t _ctx __unused, uint16_t _qid __unused) { + return (ENOTSUP); } static void @@ -194,7 +195,7 @@ METHOD void intr_disable { if_ctx_t _ctx; }; -METHOD void queue_intr_enable { +METHOD int queue_intr_enable { if_ctx_t _ctx; uint16_t _qid; } DEFAULT null_queue_intr_enable; diff --git a/sys/net/iflib.c b/sys/net/iflib.c index 89409beb9fa4..5938aca90877 100644 --- a/sys/net/iflib.c +++ b/sys/net/iflib.c @@ -49,6 +49,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include @@ -101,7 +102,6 @@ __FBSDID("$FreeBSD$"); * Enable mbuf vectors for compressing long mbuf chains */ - /* * NB: * - Prefetching in tx cleaning should perhaps be a tunable. The distance ahead @@ -181,8 +181,10 @@ struct iflib_ctx { struct sysctl_oid *ifc_sysctl_node; uint16_t ifc_sysctl_ntxqs; uint16_t ifc_sysctl_nrxqs; - uint16_t ifc_sysctl_ntxds; - uint16_t ifc_sysctl_nrxds; + uint16_t ifc_sysctl_qs_eq_override; + + uint16_t ifc_sysctl_ntxds[8]; + uint16_t ifc_sysctl_nrxds[8]; struct if_txrx ifc_txrx; #define isc_txd_encap ifc_txrx.ift_txd_encap #define isc_txd_flush ifc_txrx.ift_txd_flush @@ -294,10 +296,11 @@ typedef struct iflib_sw_tx_desc_array { #define IFLIB_RESTART_BUDGET 8 -#define IFC_LEGACY 0x1 -#define IFC_QFLUSH 0x2 -#define IFC_MULTISEG 0x4 -#define IFC_DMAR 0x8 +#define IFC_LEGACY 0x01 +#define IFC_QFLUSH 0x02 +#define IFC_MULTISEG 0x04 +#define IFC_DMAR 0x08 +#define IFC_SC_ALLOCATED 0x10 #define CSUM_OFFLOAD (CSUM_IP_TSO|CSUM_IP6_TSO|CSUM_IP| \ CSUM_IP_UDP|CSUM_IP_TCP|CSUM_IP_SCTP| \ @@ -311,6 +314,7 @@ struct iflib_txq { uint8_t ift_db_pending; uint8_t ift_db_pending_queued; uint8_t ift_npending; + uint8_t ift_br_offset; /* implicit pad */ uint64_t ift_processed; uint64_t ift_cleaned; @@ -414,6 +418,7 @@ struct iflib_rxq { uint16_t ifr_cq_cidx; uint16_t ifr_cq_pidx; uint8_t ifr_cq_gen; + uint8_t ifr_fl_offset; if_ctx_t ifr_ctx; iflib_fl_t ifr_fl; @@ -604,7 +609,7 @@ static void iflib_tx_structures_free(if_ctx_t ctx); static void iflib_rx_structures_free(if_ctx_t ctx); static int iflib_queues_alloc(if_ctx_t ctx); static int iflib_tx_credits_update(if_ctx_t ctx, iflib_txq_t txq); -static int iflib_rxd_avail(if_ctx_t ctx, iflib_rxq_t rxq, int cidx); +static int iflib_rxd_avail(if_ctx_t ctx, iflib_rxq_t rxq, int cidx, int budget); static int iflib_qset_structures_setup(if_ctx_t ctx); static int iflib_msix_init(if_ctx_t ctx); static int iflib_legacy_setup(if_ctx_t ctx, driver_filter_t filter, void *filterarg, int *rid, char *str); @@ -875,7 +880,7 @@ iflib_netmap_rxsync(struct netmap_kring *kring, int flags) for (fl = rxq->ifr_fl, i = 0; i < rxq->ifr_nfl; i++, fl++) { nic_i = fl->ifl_cidx; nm_i = netmap_idx_n2k(kring, nic_i); - avail = ctx->isc_rxd_available(ctx->ifc_softc, kring->ring_id, nic_i); + avail = ctx->isc_rxd_available(ctx->ifc_softc, kring->ring_id, nic_i, INT_MAX); for (n = 0; avail > 0; n++, avail--) { error = ctx->isc_rxd_pkt_get(ctx->ifc_softc, &ri); if (error) @@ -930,7 +935,7 @@ iflib_netmap_rxsync(struct netmap_kring *kring, int flags) /* * XXX we should be batching this operation - TODO */ - ctx->isc_rxd_refill(ctx->ifc_softc, rxq->ifr_id, fl->ifl_id, nic_i, &paddr, &vaddr, 1); + ctx->isc_rxd_refill(ctx->ifc_softc, rxq->ifr_id, fl->ifl_id, nic_i, &paddr, &vaddr, 1, fl->ifl_buf_size); bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_sds[nic_i].ifsd_map, BUS_DMASYNC_PREREAD); nm_i = nm_next(nm_i, lim); @@ -958,6 +963,7 @@ static int iflib_netmap_attach(if_ctx_t ctx) { struct netmap_adapter na; + if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; bzero(&na, sizeof(na)); @@ -966,8 +972,8 @@ iflib_netmap_attach(if_ctx_t ctx) MPASS(ctx->ifc_softc_ctx.isc_ntxqsets); MPASS(ctx->ifc_softc_ctx.isc_nrxqsets); - na.num_tx_desc = ctx->ifc_sctx->isc_ntxd; - na.num_rx_desc = ctx->ifc_sctx->isc_ntxd; + na.num_tx_desc = scctx->isc_ntxd[0]; + na.num_rx_desc = scctx->isc_nrxd[0]; na.nm_txsync = iflib_netmap_txsync; na.nm_rxsync = iflib_netmap_rxsync; na.nm_register = iflib_netmap_register; @@ -986,7 +992,7 @@ iflib_netmap_txq_init(if_ctx_t ctx, iflib_txq_t txq) if (slot == 0) return; - for (int i = 0; i < ctx->ifc_sctx->isc_ntxd; i++) { + for (int i = 0; i < ctx->ifc_softc_ctx.isc_ntxd[0]; i++) { /* * In netmap mode, set the map for the packet buffer. @@ -1011,7 +1017,7 @@ iflib_netmap_rxq_init(if_ctx_t ctx, iflib_rxq_t rxq) if (slot == 0) return; sd = rxq->ifr_fl[0].ifl_sds; - nrxd = ctx->ifc_sctx->isc_nrxd; + nrxd = ctx->ifc_softc_ctx.isc_nrxd[0]; for (int i = 0; i < nrxd; i++, sd++) { int sj = netmap_idx_n2k(&na->rx_rings[rxq->ifr_id], i); uint64_t paddr; @@ -1021,7 +1027,7 @@ iflib_netmap_rxq_init(if_ctx_t ctx, iflib_rxq_t rxq) vaddr = addr = PNMB(na, slot + sj, &paddr); netmap_load_map(na, rxq->ifr_fl[0].ifl_ifdi->idi_tag, sd->ifsd_map, addr); /* Update descriptor and the cached value */ - ctx->isc_rxd_refill(ctx->ifc_softc, rxq->ifr_id, 0 /* fl_id */, i, &paddr, &vaddr, 1); + ctx->isc_rxd_refill(ctx->ifc_softc, rxq->ifr_id, 0 /* fl_id */, i, &paddr, &vaddr, 1, rxq->ifr_fl[0].ifl_buf_size); } /* preserve queue */ if (ctx->ifc_ifp->if_capenable & IFCAP_NETMAP) { @@ -1236,7 +1242,8 @@ iflib_txsd_alloc(iflib_txq_t txq) nsegments = scctx->isc_tx_nsegments; ntsosegments = scctx->isc_tx_tso_segments_max; - MPASS(sctx->isc_ntxd > 0); + MPASS(scctx->isc_ntxd[0] > 0); + MPASS(scctx->isc_ntxd[txq->ift_br_offset] > 0); MPASS(nsegments > 0); MPASS(ntsosegments > 0); /* @@ -1259,13 +1266,11 @@ iflib_txsd_alloc(iflib_txq_t txq) sctx->isc_tx_maxsize, nsegments, sctx->isc_tx_maxsegsize); goto fail; } -#ifdef INVARIANTS +#ifdef IFLIB_DIAGNOSTICS device_printf(dev,"maxsize: %zd nsegments: %d maxsegsize: %zd\n", sctx->isc_tx_maxsize, nsegments, sctx->isc_tx_maxsegsize); + #endif - device_printf(dev,"TSO maxsize: %d ntsosegments: %d maxsegsize: %d\n", - scctx->isc_tx_tso_size_max, ntsosegments, - scctx->isc_tx_tso_segsize_max); if ((err = bus_dma_tag_create(bus_get_dma_tag(dev), 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ @@ -1282,21 +1287,21 @@ iflib_txsd_alloc(iflib_txq_t txq) goto fail; } -#ifdef INVARIANTS +#ifdef IFLIB_DIAGNOSTICS device_printf(dev,"TSO maxsize: %d ntsosegments: %d maxsegsize: %d\n", scctx->isc_tx_tso_size_max, ntsosegments, scctx->isc_tx_tso_segsize_max); #endif if (!(txq->ift_sds.ifsd_flags = (uint8_t *) malloc(sizeof(uint8_t) * - sctx->isc_ntxd, M_IFLIB, M_NOWAIT | M_ZERO))) { + scctx->isc_ntxd[txq->ift_br_offset], M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate tx_buffer memory\n"); err = ENOMEM; goto fail; } if (!(txq->ift_sds.ifsd_m = (struct mbuf **) malloc(sizeof(struct mbuf *) * - sctx->isc_ntxd, M_IFLIB, M_NOWAIT | M_ZERO))) { + scctx->isc_ntxd[txq->ift_br_offset], M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate tx_buffer memory\n"); err = ENOMEM; goto fail; @@ -1308,13 +1313,13 @@ iflib_txsd_alloc(iflib_txq_t txq) return (0); if (!(txq->ift_sds.ifsd_map = - (bus_dmamap_t *) malloc(sizeof(bus_dmamap_t) * sctx->isc_ntxd, M_IFLIB, M_NOWAIT | M_ZERO))) { + (bus_dmamap_t *) malloc(sizeof(bus_dmamap_t) * scctx->isc_ntxd[txq->ift_br_offset], M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate tx_buffer map memory\n"); err = ENOMEM; goto fail; } - for (int i = 0; i < sctx->isc_ntxd; i++) { + for (int i = 0; i < scctx->isc_ntxd[txq->ift_br_offset]; i++) { err = bus_dmamap_create(txq->ift_desc_tag, 0, &txq->ift_sds.ifsd_map[i]); if (err != 0) { device_printf(dev, "Unable to create TX DMA map\n"); @@ -1348,9 +1353,8 @@ static void iflib_txq_destroy(iflib_txq_t txq) { if_ctx_t ctx = txq->ift_ctx; - if_shared_ctx_t sctx = ctx->ifc_sctx; - for (int i = 0; i < sctx->isc_ntxd; i++) + for (int i = 0; i < txq->ift_size; i++) iflib_txsd_destroy(ctx, txq, i); if (txq->ift_sds.ifsd_map != NULL) { free(txq->ift_sds.ifsd_map, M_IFLIB); @@ -1390,7 +1394,7 @@ iflib_txsd_free(if_ctx_t ctx, iflib_txq_t txq, int i) bus_dmamap_unload(txq->ift_desc_tag, txq->ift_sds.ifsd_map[i]); } - m_freem(*mp); + m_free(*mp); DBG_COUNTER_INC(tx_frees); *mp = NULL; } @@ -1399,7 +1403,7 @@ static int iflib_txq_setup(iflib_txq_t txq) { if_ctx_t ctx = txq->ift_ctx; - if_shared_ctx_t sctx = ctx->ifc_sctx; + if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; iflib_dma_info_t di; int i; @@ -1408,7 +1412,7 @@ iflib_txq_setup(iflib_txq_t txq) /* Reset indices */ txq->ift_cidx_processed = txq->ift_pidx = txq->ift_cidx = txq->ift_npending = 0; - txq->ift_size = sctx->isc_ntxd; + txq->ift_size = scctx->isc_ntxd[txq->ift_br_offset]; for (i = 0, di = txq->ift_ifdi; i < ctx->ifc_nhwtxqs; i++, di++) bzero((void *)di->idi_vaddr, di->idi_size); @@ -1433,22 +1437,25 @@ iflib_rxsd_alloc(iflib_rxq_t rxq) { if_ctx_t ctx = rxq->ifr_ctx; if_shared_ctx_t sctx = ctx->ifc_sctx; + if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; device_t dev = ctx->ifc_dev; iflib_fl_t fl; iflib_rxsd_t rxsd; int err; - MPASS(sctx->isc_nrxd > 0); + MPASS(scctx->isc_nrxd[0] > 0); + MPASS(scctx->isc_nrxd[rxq->ifr_fl_offset] > 0); fl = rxq->ifr_fl; for (int i = 0; i < rxq->ifr_nfl; i++, fl++) { fl->ifl_sds = malloc(sizeof(struct iflib_sw_rx_desc) * - sctx->isc_nrxd, M_IFLIB, M_WAITOK | M_ZERO); + scctx->isc_nrxd[rxq->ifr_fl_offset], M_IFLIB, + M_WAITOK | M_ZERO); if (fl->ifl_sds == NULL) { device_printf(dev, "Unable to allocate rx sw desc memory\n"); return (ENOMEM); } - fl->ifl_size = sctx->isc_nrxd; /* this isn't necessarily the same */ + fl->ifl_size = scctx->isc_nrxd[rxq->ifr_fl_offset]; /* this isn't necessarily the same */ err = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ @@ -1468,7 +1475,7 @@ iflib_rxsd_alloc(iflib_rxq_t rxq) } rxsd = fl->ifl_sds; - for (int i = 0; i < sctx->isc_nrxd; i++, rxsd++) { + for (int i = 0; i < scctx->isc_nrxd[rxq->ifr_fl_offset]; i++, rxsd++) { err = bus_dmamap_create(fl->ifl_desc_tag, 0, &rxsd->ifsd_map); if (err) { device_printf(dev, "%s: bus_dmamap_create failed: %d\n", @@ -1626,7 +1633,7 @@ _iflib_fl_refill(if_ctx_t ctx, iflib_fl_t fl, int count) } if (n == 0 || i == IFLIB_MAX_RX_REFRESH) { ctx->isc_rxd_refill(ctx->ifc_softc, fl->ifl_rxq->ifr_id, fl->ifl_id, pidx, - fl->ifl_bus_addrs, fl->ifl_vm_addrs, i); + fl->ifl_bus_addrs, fl->ifl_vm_addrs, i, fl->ifl_buf_size); i = 0; pidx = fl->ifl_pidx; } @@ -1854,7 +1861,11 @@ iflib_init_locked(if_ctx_t ctx) for (i = 0, rxq = ctx->ifc_rxqs; i < sctx->isc_nrxqsets; i++, rxq++) { iflib_netmap_rxq_init(ctx, rxq); } +#ifdef INVARIANTS + i = if_getdrvflags(ifp); +#endif IFDI_INIT(ctx); + MPASS(if_getdrvflags(ifp) == i); for (i = 0, rxq = ctx->ifc_rxqs; i < sctx->isc_nrxqsets; i++, rxq++) { for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) { if (iflib_fl_setup(fl)) { @@ -1902,7 +1913,6 @@ iflib_stop(if_ctx_t ctx) iflib_txq_t txq = ctx->ifc_txqs; iflib_rxq_t rxq = ctx->ifc_rxqs; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; - if_shared_ctx_t sctx = ctx->ifc_sctx; iflib_dma_info_t di; iflib_fl_t fl; int i, j; @@ -1920,7 +1930,7 @@ iflib_stop(if_ctx_t ctx) /* clean any enqueued buffers */ iflib_txq_check_drain(txq, 0); /* Free any existing tx buffers. */ - for (j = 0; j < sctx->isc_ntxd; j++) { + for (j = 0; j < txq->ift_size; j++) { iflib_txsd_free(ctx, txq, j); } txq->ift_processed = txq->ift_cleaned = txq->ift_cidx_processed = 0; @@ -1990,13 +2000,24 @@ assemble_segments(iflib_rxq_t rxq, if_rxd_info_t ri) caddr_t cl; i = 0; + mh = NULL; do { sd = rxd_frag_to_sd(rxq, &ri->iri_frags[i], &cltype, TRUE); MPASS(sd->ifsd_cl != NULL); MPASS(sd->ifsd_m != NULL); + + /* Don't include zero-length frags */ + if (ri->iri_frags[i].irf_len == 0) { + /* XXX we can save the cluster here, but not the mbuf */ + m_init(sd->ifsd_m, M_NOWAIT, MT_DATA, 0); + m_free(sd->ifsd_m); + sd->ifsd_m = NULL; + continue; + } + m = sd->ifsd_m; - if (i == 0) { + if (mh == NULL) { flags = M_PKTHDR|M_EXT; mh = mt = m; padlen = ri->iri_pad; @@ -2019,14 +2040,12 @@ assemble_segments(iflib_rxq_t rxq, if_rxd_info_t ri) */ m->m_data += padlen; ri->iri_len -= padlen; - m->m_len = ri->iri_len; + m->m_len = ri->iri_frags[i].irf_len; } while (++i < ri->iri_nfrags); return (mh); } - - /* * Process one software descriptor */ @@ -2037,13 +2056,14 @@ iflib_rxd_pkt_get(iflib_rxq_t rxq, if_rxd_info_t ri) iflib_rxsd_t sd; /* should I merge this back in now that the two paths are basically duplicated? */ - if (ri->iri_len <= IFLIB_RX_COPY_THRESH) { + if (ri->iri_nfrags == 1 && + ri->iri_frags[0].irf_len <= IFLIB_RX_COPY_THRESH) { sd = rxd_frag_to_sd(rxq, &ri->iri_frags[0], NULL, FALSE); m = sd->ifsd_m; sd->ifsd_m = NULL; m_init(m, M_NOWAIT, MT_DATA, M_PKTHDR); memcpy(m->m_data, sd->ifsd_cl, ri->iri_len); - m->m_len = ri->iri_len; + m->m_len = ri->iri_frags[0].irf_len; } else { m = assemble_segments(rxq, ri); } @@ -2063,13 +2083,13 @@ iflib_rxeof(iflib_rxq_t rxq, int budget) { if_ctx_t ctx = rxq->ifr_ctx; if_shared_ctx_t sctx = ctx->ifc_sctx; + if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; int avail, i; uint16_t *cidxp; struct if_rxd_info ri; int err, budget_left, rx_bytes, rx_pkts; iflib_fl_t fl; struct ifnet *ifp; - struct lro_entry *queued; int lro_enabled; /* * XXX early demux data packets so that if_input processing only handles @@ -2084,11 +2104,11 @@ iflib_rxeof(iflib_rxq_t rxq, int budget) mh = mt = NULL; MPASS(budget > 0); rx_pkts = rx_bytes = 0; - if (sctx->isc_flags & IFLIB_HAS_CQ) + if (sctx->isc_flags & IFLIB_HAS_RXCQ) cidxp = &rxq->ifr_cq_cidx; else cidxp = &rxq->ifr_fl[0].ifl_cidx; - if ((avail = iflib_rxd_avail(ctx, rxq, *cidxp)) == 0) { + if ((avail = iflib_rxd_avail(ctx, rxq, *cidxp, budget)) == 0) { for (i = 0, fl = &rxq->ifr_fl[0]; i < sctx->isc_nfl; i++, fl++) __iflib_fl_refill_lt(ctx, fl, budget + 8); DBG_COUNTER_INC(rx_unavail); @@ -2112,10 +2132,11 @@ iflib_rxeof(iflib_rxq_t rxq, int budget) /* in lieu of handling correctly - make sure it isn't being unhandled */ MPASS(err == 0); - if (sctx->isc_flags & IFLIB_HAS_CQ) { - /* we know we consumed _one_ CQ entry */ - if (++rxq->ifr_cq_cidx == sctx->isc_nrxd) { - rxq->ifr_cq_cidx = 0; + if (sctx->isc_flags & IFLIB_HAS_RXCQ) { + *cidxp = ri.iri_cidx; + /* Update our consumer index */ + while (rxq->ifr_cq_cidx >= scctx->isc_nrxd[0]) { + rxq->ifr_cq_cidx -= scctx->isc_nrxd[0]; rxq->ifr_cq_gen = 0; } /* was this only a completion queue message? */ @@ -2128,7 +2149,7 @@ iflib_rxeof(iflib_rxq_t rxq, int budget) /* will advance the cidx on the corresponding free lists */ m = iflib_rxd_pkt_get(rxq, &ri); if (avail == 0 && budget_left) - avail = iflib_rxd_avail(ctx, rxq, *cidxp); + avail = iflib_rxd_avail(ctx, rxq, *cidxp, budget_left); if (__predict_false(m == NULL)) { DBG_COUNTER_INC(rx_mbuf_null); @@ -2148,7 +2169,6 @@ iflib_rxeof(iflib_rxq_t rxq, int budget) ifp = ctx->ifc_ifp; lro_enabled = (if_getcapenable(ifp) & IFCAP_LRO); - while (mh != NULL) { m = mh; mh = mh->m_nextpkt; @@ -2162,32 +2182,33 @@ iflib_rxeof(iflib_rxq_t rxq, int budget) DBG_COUNTER_INC(rx_if_input); ifp->if_input(ifp, m); } + if_inc_counter(ifp, IFCOUNTER_IBYTES, rx_bytes); if_inc_counter(ifp, IFCOUNTER_IPACKETS, rx_pkts); /* * Flush any outstanding LRO work */ - while ((queued = LIST_FIRST(&rxq->ifr_lc.lro_active)) != NULL) { - LIST_REMOVE(queued, next); #if defined(INET6) || defined(INET) - tcp_lro_flush(&rxq->ifr_lc, queued); + tcp_lro_flush_all(&rxq->ifr_lc); #endif - } - return (iflib_rxd_avail(ctx, rxq, *cidxp)); + if (avail) + return true; + return (iflib_rxd_avail(ctx, rxq, *cidxp, 1)); } #define M_CSUM_FLAGS(m) ((m)->m_pkthdr.csum_flags) #define M_HAS_VLANTAG(m) (m->m_flags & M_VLANTAG) -#define TXQ_MAX_DB_DEFERRED(ctx) (ctx->ifc_sctx->isc_ntxd >> 5) -#define TXQ_MAX_DB_CONSUMED(ctx) (ctx->ifc_sctx->isc_ntxd >> 4) +#define TXQ_MAX_DB_DEFERRED(size) (size >> 5) +#define TXQ_MAX_DB_CONSUMED(size) (size >> 4) static __inline void iflib_txd_db_check(if_ctx_t ctx, iflib_txq_t txq, int ring) { uint32_t dbval; - if (ring || txq->ift_db_pending >= TXQ_MAX_DB_DEFERRED(ctx)) { + if (ring || txq->ift_db_pending >= + TXQ_MAX_DB_DEFERRED(txq->ift_size)) { /* the lock will only ever be contended in the !min_latency case */ if (!TXDB_TRYLOCK(txq)) @@ -2233,9 +2254,9 @@ static int iflib_parse_header(iflib_txq_t txq, if_pkt_info_t pi, struct mbuf **mp) { struct ether_vlan_header *eh; - struct mbuf *m; + struct mbuf *m, *n; - m = *mp; + n = m = *mp; /* * Determine where frame payload starts. * Jump over vlan headers if already present, @@ -2261,7 +2282,6 @@ iflib_parse_header(iflib_txq_t txq, if_pkt_info_t pi, struct mbuf **mp) { struct ip *ip = NULL; struct tcphdr *th = NULL; - struct mbuf *n; int minthlen; minthlen = min(m->m_pkthdr.len, pi->ipi_ehdrlen + sizeof(*ip) + sizeof(*th)); @@ -2403,37 +2423,31 @@ collapse_pkthdr(struct mbuf *m0) /* * If dodgy hardware rejects the scatter gather chain we've handed it - * we'll need to rebuild the mbuf chain before we can call m_defrag + * we'll need to remove the mbuf chain from ifsg_m[] before we can add the + * m_defrag'd mbufs */ static __noinline struct mbuf * -iflib_rebuild_mbuf(iflib_txq_t txq) +iflib_remove_mbuf(iflib_txq_t txq) { - - int ntxd, mhlen, len, i, pidx; + int ntxd, i, pidx; struct mbuf *m, *mh, **ifsd_m; - if_shared_ctx_t sctx; pidx = txq->ift_pidx; ifsd_m = txq->ift_sds.ifsd_m; - sctx = txq->ift_ctx->ifc_sctx; - ntxd = sctx->isc_ntxd; + ntxd = txq->ift_size; mh = m = ifsd_m[pidx]; ifsd_m[pidx] = NULL; #if MEMORY_LOGGING txq->ift_dequeued++; #endif - len = m->m_len; - mhlen = m->m_pkthdr.len; i = 1; - while (len < mhlen && (m->m_next == NULL)) { - m->m_next = ifsd_m[(pidx + i) & (ntxd-1)]; + while (m) { ifsd_m[(pidx + i) & (ntxd -1)] = NULL; #if MEMORY_LOGGING txq->ift_dequeued++; #endif m = m->m_next; - len += m->m_len; i++; } return (mh); @@ -2446,6 +2460,7 @@ iflib_busdma_load_mbuf_sg(iflib_txq_t txq, bus_dma_tag_t tag, bus_dmamap_t map, { if_ctx_t ctx; if_shared_ctx_t sctx; + if_softc_ctx_t scctx; int i, next, pidx, mask, err, maxsegsz, ntxd, count; struct mbuf *m, *tmp, **ifsd_m, **mp; @@ -2459,8 +2474,9 @@ iflib_busdma_load_mbuf_sg(iflib_txq_t txq, bus_dma_tag_t tag, bus_dmamap_t map, ctx = txq->ift_ctx; sctx = ctx->ifc_sctx; + scctx = &ctx->ifc_softc_ctx; ifsd_m = txq->ift_sds.ifsd_m; - ntxd = sctx->isc_ntxd; + ntxd = txq->ift_size; pidx = txq->ift_pidx; if (map != NULL) { uint8_t *ifsd_flags = txq->ift_sds.ifsd_flags; @@ -2472,13 +2488,12 @@ iflib_busdma_load_mbuf_sg(iflib_txq_t txq, bus_dma_tag_t tag, bus_dmamap_t map, ifsd_flags[pidx] |= TX_SW_DESC_MAPPED; i = 0; next = pidx; - mask = (sctx->isc_ntxd-1); + mask = (txq->ift_size-1); m = *m0; do { mp = &ifsd_m[next]; *mp = m; m = m->m_next; - (*mp)->m_next = NULL; if (__predict_false((*mp)->m_len == 0)) { m_free(*mp); *mp = NULL; @@ -2529,13 +2544,12 @@ iflib_busdma_load_mbuf_sg(iflib_txq_t txq, bus_dma_tag_t tag, bus_dmamap_t map, count++; tmp = m; m = m->m_next; - tmp->m_next = NULL; } while (m != NULL); *nsegs = i; } return (0); err: - *m0 = iflib_rebuild_mbuf(txq); + *m0 = iflib_remove_mbuf(txq); return (EFBIG); } @@ -2558,7 +2572,7 @@ iflib_encap(iflib_txq_t txq, struct mbuf **m_headp) sctx = ctx->ifc_sctx; scctx = &ctx->ifc_softc_ctx; segs = txq->ift_segs; - ntxd = sctx->isc_ntxd; + ntxd = txq->ift_size; m_head = *m_headp; map = NULL; @@ -2645,14 +2659,14 @@ iflib_encap(iflib_txq_t txq, struct mbuf **m_headp) if (map != NULL) bus_dmamap_unload(desc_tag, map); DBG_COUNTER_INC(encap_txq_avail_fail); - if (txq->ift_task.gt_task.ta_pending == 0) + if ((txq->ift_task.gt_task.ta_flags & TASK_ENQUEUED) == 0) GROUPTASK_ENQUEUE(&txq->ift_task); return (ENOBUFS); } pi.ipi_segs = segs; pi.ipi_nsegs = nsegs; - MPASS(pidx >= 0 && pidx < sctx->isc_ntxd); + MPASS(pidx >= 0 && pidx < txq->ift_size); #ifdef PKT_DEBUG print_pkt(&pi); #endif @@ -2661,11 +2675,12 @@ iflib_encap(iflib_txq_t txq, struct mbuf **m_headp) BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); DBG_COUNTER_INC(tx_encap); - MPASS(pi.ipi_new_pidx >= 0 && pi.ipi_new_pidx < sctx->isc_ntxd); + MPASS(pi.ipi_new_pidx >= 0 && + pi.ipi_new_pidx < txq->ift_size); ndesc = pi.ipi_new_pidx - pi.ipi_pidx; if (pi.ipi_new_pidx < pi.ipi_pidx) { - ndesc += sctx->isc_ntxd; + ndesc += txq->ift_size; txq->ift_gen = 1; } MPASS(pi.ipi_new_pidx != pidx); @@ -2678,7 +2693,7 @@ iflib_encap(iflib_txq_t txq, struct mbuf **m_headp) txq->ift_pidx = pi.ipi_new_pidx; txq->ift_npending += pi.ipi_ndescs; } else if (__predict_false(err == EFBIG && remap < 2)) { - *m_headp = m_head = iflib_rebuild_mbuf(txq); + *m_headp = m_head = iflib_remove_mbuf(txq); remap = 1; txq->ift_txd_encap_efbig++; goto defrag; @@ -2700,7 +2715,7 @@ iflib_encap(iflib_txq_t txq, struct mbuf **m_headp) #define NTXQSETS(ctx) ((ctx)->ifc_softc_ctx.isc_ntxqsets) #define NRXQSETS(ctx) ((ctx)->ifc_softc_ctx.isc_nrxqsets) -#define QIDX(ctx, m) ((((m)->m_pkthdr.flowid & ctx->ifc_softc_ctx.isc_rss_table_mask) % NRXQSETS(ctx)) + FIRST_QSET(ctx)) +#define QIDX(ctx, m) ((((m)->m_pkthdr.flowid & ctx->ifc_softc_ctx.isc_rss_table_mask) % NTXQSETS(ctx)) + FIRST_QSET(ctx)) #define DESC_RECLAIMABLE(q) ((int)((q)->ift_processed - (q)->ift_cleaned - (q)->ift_ctx->ifc_softc_ctx.isc_tx_nsegments)) #define RECLAIM_THRESH(ctx) ((ctx)->ifc_sctx->isc_tx_reclaim_thresh) #define MAX_TX_DESC(ctx) ((ctx)->ifc_softc_ctx.isc_tx_tso_segments_max) @@ -2712,7 +2727,7 @@ iflib_encap(iflib_txq_t txq, struct mbuf **m_headp) * * ORing with 2 assures that min occupancy is never less than 2 without any conditional logic */ -#define TXQ_MIN_OCCUPANCY(ctx) ((ctx->ifc_sctx->isc_ntxd >> 6)| 0x2) +#define TXQ_MIN_OCCUPANCY(size) ((size >> 6)| 0x2) static inline int iflib_txq_min_occupancy(iflib_txq_t txq) @@ -2720,7 +2735,9 @@ iflib_txq_min_occupancy(iflib_txq_t txq) if_ctx_t ctx; ctx = txq->ift_ctx; - return (get_inuse(txq->ift_size, txq->ift_cidx, txq->ift_pidx, txq->ift_gen) < TXQ_MIN_OCCUPANCY(ctx) + MAX_TX_DESC(ctx)); + return (get_inuse(txq->ift_size, txq->ift_cidx, txq->ift_pidx, + txq->ift_gen) < TXQ_MIN_OCCUPANCY(txq->ift_size) + + MAX_TX_DESC(ctx)); } static void @@ -2734,7 +2751,7 @@ iflib_tx_desc_free(iflib_txq_t txq, int n) cidx = txq->ift_cidx; gen = txq->ift_gen; - qsize = txq->ift_ctx->ifc_sctx->isc_ntxd; + qsize = txq->ift_size; mask = qsize-1; hasmap = txq->ift_sds.ifsd_map != NULL; ifsd_flags = txq->ift_sds.ifsd_flags; @@ -2760,7 +2777,7 @@ iflib_tx_desc_free(iflib_txq_t txq, int n) /* XXX we don't support any drivers that batch packets yet */ MPASS(m->m_nextpkt == NULL); - m_freem(m); + m_free(m); ifsd_m[cidx] = NULL; #if MEMORY_LOGGING txq->ift_dequeued++; @@ -2856,7 +2873,7 @@ iflib_txq_drain(struct ifmp_ring *r, uint32_t cidx, uint32_t pidx) if (__predict_false(ctx->ifc_flags & IFC_QFLUSH)) { DBG_COUNTER_INC(txq_drain_flushing); for (i = 0; i < avail; i++) { - m_freem(r->items[(cidx + i) & (r->size-1)]); + m_free(r->items[(cidx + i) & (r->size-1)]); r->items[(cidx + i) & (r->size-1)] = NULL; } return (avail); @@ -2903,7 +2920,7 @@ iflib_txq_drain(struct ifmp_ring *r, uint32_t cidx, uint32_t pidx) if (__predict_false(!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING))) break; - if (desc_used > TXQ_MAX_DB_CONSUMED(ctx)) + if (desc_used > TXQ_MAX_DB_CONSUMED(txq->ift_size)) break; } @@ -2924,7 +2941,7 @@ iflib_txq_drain(struct ifmp_ring *r, uint32_t cidx, uint32_t pidx) } static void -_task_fn_tx(void *context, int pending) +_task_fn_tx(void *context) { iflib_txq_t txq = context; if_ctx_t ctx = txq->ift_ctx; @@ -2935,11 +2952,12 @@ _task_fn_tx(void *context, int pending) } static void -_task_fn_rx(void *context, int pending) +_task_fn_rx(void *context) { iflib_rxq_t rxq = context; if_ctx_t ctx = rxq->ifr_ctx; bool more; + int rc; DBG_COUNTER_INC(task_fn_rxs); if (__predict_false(!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING))) @@ -2950,7 +2968,8 @@ _task_fn_rx(void *context, int pending) IFDI_INTR_ENABLE(ctx); else { DBG_COUNTER_INC(rx_intr_enables); - IFDI_QUEUE_INTR_ENABLE(ctx, rxq->ifr_id); + rc = IFDI_QUEUE_INTR_ENABLE(ctx, rxq->ifr_id); + KASSERT(rc != ENOTSUP, ("MSI-X support requires queue_intr_enable, but not implemented in driver")); } } if (__predict_false(!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING))) @@ -2960,7 +2979,7 @@ _task_fn_rx(void *context, int pending) } static void -_task_fn_admin(void *context, int pending) +_task_fn_admin(void *context) { if_ctx_t ctx = context; if_softc_ctx_t sctx = &ctx->ifc_softc_ctx; @@ -2990,7 +3009,7 @@ _task_fn_admin(void *context, int pending) static void -_task_fn_iov(void *context, int pending) +_task_fn_iov(void *context) { if_ctx_t ctx = context; @@ -3049,8 +3068,7 @@ iflib_if_transmit(if_t ifp, struct mbuf *m) if_ctx_t ctx = if_getsoftc(ifp); iflib_txq_t txq; - struct mbuf *marr[8], **mp, *next; - int err, i, count, qidx; + int err, qidx; if (__predict_false((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || !LINK_ACTIVE(ctx))) { DBG_COUNTER_INC(tx_frees); @@ -3058,6 +3076,7 @@ iflib_if_transmit(if_t ifp, struct mbuf *m) return (0); } + MPASS(m->m_nextpkt == NULL); qidx = 0; if ((NTXQSETS(ctx) > 1) && M_HASHTYPE_GET(m)) qidx = QIDX(ctx, m); @@ -3077,6 +3096,7 @@ iflib_if_transmit(if_t ifp, struct mbuf *m) return (ENOBUFS); } #endif +#ifdef notyet qidx = count = 0; mp = marr; next = m; @@ -3098,22 +3118,21 @@ iflib_if_transmit(if_t ifp, struct mbuf *m) next = next->m_nextpkt; mp[i]->m_nextpkt = NULL; } +#endif DBG_COUNTER_INC(tx_seen); - err = ifmp_ring_enqueue(txq->ift_br[0], (void **)mp, count, TX_BATCH_SIZE); + err = ifmp_ring_enqueue(txq->ift_br[0], (void **)&m, 1, TX_BATCH_SIZE); - if (iflib_txq_can_drain(txq->ift_br[0])) - GROUPTASK_ENQUEUE(&txq->ift_task); if (err) { + GROUPTASK_ENQUEUE(&txq->ift_task); /* support forthcoming later */ #ifdef DRIVER_BACKPRESSURE txq->ift_closed = TRUE; #endif - for (i = 0; i < count; i++) - m_freem(mp[i]); ifmp_ring_check_drainage(txq->ift_br[0], TX_BATCH_SIZE); + m_freem(m); + } else if (TXQ_AVAIL(txq) < (txq->ift_size >> 1)) { + GROUPTASK_ENQUEUE(&txq->ift_task); } - if (count > nitems(marr)) - free(mp, M_IFLIB); return (err); } @@ -3138,13 +3157,13 @@ iflib_if_qflush(if_t ifp) if_qflush(ifp); } -#define IFCAP_REINIT (IFCAP_HWCSUM|IFCAP_TSO4|IFCAP_TSO6|IFCAP_VLAN_HWTAGGING|IFCAP_VLAN_MTU | \ - IFCAP_VLAN_HWFILTER | IFCAP_VLAN_HWTSO) -#define IFCAP_FLAGS (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_HWCSUM | IFCAP_LRO | \ +#define IFCAP_FLAGS (IFCAP_TXCSUM_IPV6 | IFCAP_RXCSUM_IPV6 | IFCAP_HWCSUM | IFCAP_LRO | \ IFCAP_TSO4 | IFCAP_TSO6 | IFCAP_VLAN_HWTAGGING | \ IFCAP_VLAN_MTU | IFCAP_VLAN_HWFILTER | IFCAP_VLAN_HWTSO) +#define IFCAP_REINIT IFCAP_FLAGS + static int iflib_if_ioctl(if_t ifp, u_long command, caddr_t data) { @@ -3428,6 +3447,9 @@ iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ct if_ctx_t ctx; if_t ifp; if_softc_ctx_t scctx; + int i; + uint16_t main_txq; + uint16_t main_rxq; ctx = malloc(sizeof(* ctx), M_IFLIB, M_WAITOK|M_ZERO); @@ -3435,6 +3457,7 @@ iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ct if (sc == NULL) { sc = malloc(sctx->isc_driver->size, M_IFLIB, M_WAITOK|M_ZERO); device_set_softc(dev, ctx); + ctx->ifc_flags |= IFC_SC_ALLOCATED; } ctx->ifc_sctx = sctx; @@ -3447,28 +3470,112 @@ iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ct return (err); } iflib_add_device_sysctl_pre(ctx); + + scctx = &ctx->ifc_softc_ctx; + /* + * XXX sanity check that ntxd & nrxd are a power of 2 + */ + if (ctx->ifc_sysctl_ntxqs != 0) + scctx->isc_ntxqsets = ctx->ifc_sysctl_ntxqs; + if (ctx->ifc_sysctl_nrxqs != 0) + scctx->isc_nrxqsets = ctx->ifc_sysctl_nrxqs; + + for (i = 0; i < sctx->isc_ntxqs; i++) { + if (ctx->ifc_sysctl_ntxds[i] != 0) + scctx->isc_ntxd[i] = ctx->ifc_sysctl_ntxds[i]; + else + scctx->isc_ntxd[i] = sctx->isc_ntxd_default[i]; + } + + for (i = 0; i < sctx->isc_nrxqs; i++) { + if (ctx->ifc_sysctl_nrxds[i] != 0) + scctx->isc_nrxd[i] = ctx->ifc_sysctl_nrxds[i]; + else + scctx->isc_nrxd[i] = sctx->isc_nrxd_default[i]; + } + + for (i = 0; i < sctx->isc_nrxqs; i++) { + if (scctx->isc_nrxd[i] < sctx->isc_nrxd_min[i]) { + device_printf(dev, "nrxd%d: %d less than nrxd_min %d - resetting to min\n", + i, scctx->isc_nrxd[i], sctx->isc_nrxd_min[i]); + scctx->isc_nrxd[i] = sctx->isc_nrxd_min[i]; + } + if (scctx->isc_nrxd[i] > sctx->isc_nrxd_max[i]) { + device_printf(dev, "nrxd%d: %d greater than nrxd_max %d - resetting to max\n", + i, scctx->isc_nrxd[i], sctx->isc_nrxd_max[i]); + scctx->isc_nrxd[i] = sctx->isc_nrxd_max[i]; + } + } + + for (i = 0; i < sctx->isc_ntxqs; i++) { + if (scctx->isc_ntxd[i] < sctx->isc_ntxd_min[i]) { + device_printf(dev, "ntxd%d: %d less than ntxd_min %d - resetting to min\n", + i, scctx->isc_ntxd[i], sctx->isc_ntxd_min[i]); + scctx->isc_ntxd[i] = sctx->isc_ntxd_min[i]; + } + if (scctx->isc_ntxd[i] > sctx->isc_ntxd_max[i]) { + device_printf(dev, "ntxd%d: %d greater than ntxd_max %d - resetting to max\n", + i, scctx->isc_ntxd[i], sctx->isc_ntxd_max[i]); + scctx->isc_ntxd[i] = sctx->isc_ntxd_max[i]; + } + } + if ((err = IFDI_ATTACH_PRE(ctx)) != 0) { device_printf(dev, "IFDI_ATTACH_PRE failed %d\n", err); return (err); } + if (scctx->isc_ntxqsets_max) + scctx->isc_ntxqsets = min(scctx->isc_ntxqsets, scctx->isc_ntxqsets_max); + if (scctx->isc_nrxqsets_max) + scctx->isc_nrxqsets = min(scctx->isc_nrxqsets, scctx->isc_nrxqsets_max); + #ifdef ACPI_DMAR if (dmar_get_dma_tag(device_get_parent(dev), dev) != NULL) ctx->ifc_flags |= IFC_DMAR; #endif - scctx = &ctx->ifc_softc_ctx; msix_bar = scctx->isc_msix_bar; - if (scctx->isc_tx_nsegments > sctx->isc_ntxd / MAX_SINGLE_PACKET_FRACTION) - scctx->isc_tx_nsegments = max(1, sctx->isc_ntxd / MAX_SINGLE_PACKET_FRACTION); - if (scctx->isc_tx_tso_segments_max > sctx->isc_ntxd / MAX_SINGLE_PACKET_FRACTION) - scctx->isc_tx_tso_segments_max = max(1, sctx->isc_ntxd / MAX_SINGLE_PACKET_FRACTION); - ifp = ctx->ifc_ifp; - /* - * XXX sanity check that ntxd & nrxd are a power of 2 - */ + if(sctx->isc_flags & IFLIB_HAS_TXCQ) + main_txq = 1; + else + main_txq = 0; + + if(sctx->isc_flags & IFLIB_HAS_RXCQ) + main_rxq = 1; + else + main_rxq = 0; + + /* XXX change for per-queue sizes */ + device_printf(dev, "using %d tx descriptors and %d rx descriptors\n", + scctx->isc_ntxd[main_txq], scctx->isc_nrxd[main_rxq]); + for (i = 0; i < sctx->isc_nrxqs; i++) { + if (!powerof2(scctx->isc_nrxd[i])) { + /* round down instead? */ + device_printf(dev, "# rx descriptors must be a power of 2\n"); + err = EINVAL; + goto fail; + } + } + for (i = 0; i < sctx->isc_ntxqs; i++) { + if (!powerof2(scctx->isc_ntxd[i])) { + device_printf(dev, + "# tx descriptors must be a power of 2"); + err = EINVAL; + goto fail; + } + } + + if (scctx->isc_tx_nsegments > scctx->isc_ntxd[main_txq] / + MAX_SINGLE_PACKET_FRACTION) + scctx->isc_tx_nsegments = max(1, scctx->isc_ntxd[main_txq] / + MAX_SINGLE_PACKET_FRACTION); + if (scctx->isc_tx_tso_segments_max > scctx->isc_ntxd[main_txq] / + MAX_SINGLE_PACKET_FRACTION) + scctx->isc_tx_tso_segments_max = max(1, + scctx->isc_ntxd[main_txq] / MAX_SINGLE_PACKET_FRACTION); /* * Protect the stack against modern hardware @@ -3482,7 +3589,7 @@ iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ct ifp->if_hw_tsomaxsegsize = scctx->isc_tx_tso_segsize_max; if (scctx->isc_rss_table_size == 0) scctx->isc_rss_table_size = 64; - scctx->isc_rss_table_mask = scctx->isc_rss_table_size-1;; + scctx->isc_rss_table_mask = scctx->isc_rss_table_size-1; /* ** Now setup MSI or MSI/X, should ** return us the number of supported @@ -3520,7 +3627,7 @@ iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ct MPASS(msix == 1); rid = 1; } - if ((err = iflib_legacy_setup(ctx, ctx->isc_legacy_intr, ctx, &rid, "irq0")) != 0) { + if ((err = iflib_legacy_setup(ctx, ctx->isc_legacy_intr, ctx->ifc_softc, &rid, "irq0")) != 0) { device_printf(dev, "iflib_legacy_setup failed %d\n", err); goto fail_intr_free; } @@ -3536,6 +3643,7 @@ iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ct } *ctxp = ctx; + if_setgetcounterfn(ctx->ifc_ifp, iflib_if_get_counter); iflib_add_device_sysctl_post(ctx); return (0); fail_detach: @@ -3599,7 +3707,7 @@ iflib_device_deregister(if_ctx_t ctx) led_destroy(ctx->ifc_led_dev); /* XXX drain any dependent tasks */ tqg = qgroup_if_io_tqg; - for (txq = ctx->ifc_txqs, i = 0, rxq = ctx->ifc_rxqs; i < NTXQSETS(ctx); i++, txq++) { + for (txq = ctx->ifc_txqs, i = 0; i < NTXQSETS(ctx); i++, txq++) { callout_drain(&txq->ift_timer); callout_drain(&txq->ift_db_check); if (txq->ift_task.gt_uniq != NULL) @@ -3616,6 +3724,7 @@ iflib_device_deregister(if_ctx_t ctx) taskqgroup_detach(tqg, &ctx->ifc_vflr_task); IFDI_DETACH(ctx); + device_set_softc(ctx->ifc_dev, NULL); if (ctx->ifc_softc_ctx.isc_intr != IFLIB_INTR_LEGACY) { pci_release_msi(dev); } @@ -3633,6 +3742,9 @@ iflib_device_deregister(if_ctx_t ctx) iflib_tx_structures_free(ctx); iflib_rx_structures_free(ctx); + if (ctx->ifc_flags & IFC_SC_ALLOCATED) + free(ctx->ifc_softc, M_IFLIB); + free(ctx, M_IFLIB); return (0); } @@ -3782,7 +3894,13 @@ _iflib_assert(if_shared_ctx_t sctx) MPASS(sctx->isc_txrx->ift_rxd_pkt_get); MPASS(sctx->isc_txrx->ift_rxd_refill); MPASS(sctx->isc_txrx->ift_rxd_flush); - MPASS(sctx->isc_nrxd); + + MPASS(sctx->isc_nrxd_min[0]); + MPASS(sctx->isc_nrxd_max[0]); + MPASS(sctx->isc_nrxd_default[0]); + MPASS(sctx->isc_ntxd_min[0]); + MPASS(sctx->isc_ntxd_max[0]); + MPASS(sctx->isc_ntxd_default[0]); } static int @@ -3796,7 +3914,6 @@ iflib_register(if_ctx_t ctx) _iflib_assert(sctx); CTX_LOCK_INIT(ctx, device_get_nameunit(ctx->ifc_dev)); - MPASS(ctx->ifc_flags == 0); ifp = ctx->ifc_ifp = if_gethandle(IFT_ETHER); if (ifp == NULL) { @@ -3818,7 +3935,6 @@ iflib_register(if_ctx_t ctx) if_setioctlfn(ifp, iflib_if_ioctl); if_settransmitfn(ifp, iflib_if_transmit); if_setqflushfn(ifp, iflib_if_qflush); - if_setgetcounterfn(ifp, iflib_if_get_counter); if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST); if_setcapabilities(ifp, 0); @@ -3842,16 +3958,17 @@ static int iflib_queues_alloc(if_ctx_t ctx) { if_shared_ctx_t sctx = ctx->ifc_sctx; + if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; device_t dev = ctx->ifc_dev; - int nrxqsets = ctx->ifc_softc_ctx.isc_nrxqsets; - int ntxqsets = ctx->ifc_softc_ctx.isc_ntxqsets; + int nrxqsets = scctx->isc_nrxqsets; + int ntxqsets = scctx->isc_ntxqsets; iflib_txq_t txq; iflib_rxq_t rxq; iflib_fl_t fl = NULL; - int i, j, cpu, err, txconf, rxconf, fl_ifdi_offset; + int i, j, cpu, err, txconf, rxconf; iflib_dma_info_t ifdip; - uint32_t *rxqsizes = sctx->isc_rxqsizes; - uint32_t *txqsizes = sctx->isc_txqsizes; + uint32_t *rxqsizes = scctx->isc_rxqsizes; + uint32_t *txqsizes = scctx->isc_txqsizes; uint8_t nrxqs = sctx->isc_nrxqs; uint8_t ntxqs = sctx->isc_ntxqs; int nfree_lists = sctx->isc_nfl ? sctx->isc_nfl : 1; @@ -3860,10 +3977,11 @@ iflib_queues_alloc(if_ctx_t ctx) struct ifmp_ring **brscp; int nbuf_rings = 1; /* XXX determine dynamically */ - KASSERT(ntxqs > 0, ("number of queues must be at least 1")); - KASSERT(nrxqs > 0, ("number of queues must be at least 1")); + KASSERT(ntxqs > 0, ("number of queues per qset must be at least 1")); + KASSERT(nrxqs > 0, ("number of queues per qset must be at least 1")); brscp = NULL; + txq = NULL; rxq = NULL; /* Allocate the TX ring struct memory */ @@ -3891,8 +4009,6 @@ iflib_queues_alloc(if_ctx_t ctx) ctx->ifc_txqs = txq; ctx->ifc_rxqs = rxq; - txq = NULL; - rxq = NULL; /* * XXX handle allocation failure @@ -3916,6 +4032,11 @@ iflib_queues_alloc(if_ctx_t ctx) } txq->ift_ctx = ctx; txq->ift_id = i; + if (sctx->isc_flags & IFLIB_HAS_TXCQ) { + txq->ift_br_offset = 1; + } else { + txq->ift_br_offset = 0; + } /* XXX fix this */ txq->ift_timer.c_cpu = cpu; txq->ift_db_check.c_cpu = cpu; @@ -3970,10 +4091,10 @@ iflib_queues_alloc(if_ctx_t ctx) } rxq->ifr_ctx = ctx; rxq->ifr_id = i; - if (sctx->isc_flags & IFLIB_HAS_CQ) { - fl_ifdi_offset = 1; + if (sctx->isc_flags & IFLIB_HAS_RXCQ) { + rxq->ifr_fl_offset = 1; } else { - fl_ifdi_offset = 0; + rxq->ifr_fl_offset = 0; } rxq->ifr_nfl = nfree_lists; if (!(fl = @@ -3986,7 +4107,8 @@ iflib_queues_alloc(if_ctx_t ctx) for (j = 0; j < nfree_lists; j++) { rxq->ifr_fl[j].ifl_rxq = rxq; rxq->ifr_fl[j].ifl_id = j; - rxq->ifr_fl[j].ifl_ifdi = &rxq->ifr_ifdi[j + fl_ifdi_offset]; + rxq->ifr_fl[j].ifl_ifdi = + &rxq->ifr_ifdi[j + rxq->ifr_fl_offset]; } /* Allocate receive buffers for the ring*/ if (iflib_rxsd_alloc(rxq)) { @@ -4106,12 +4228,13 @@ iflib_rx_structures_setup(if_ctx_t ctx) for (q = 0; q < ctx->ifc_softc_ctx.isc_nrxqsets; q++, rxq++) { #if defined(INET6) || defined(INET) tcp_lro_free(&rxq->ifr_lc); - if ((err = tcp_lro_init(&rxq->ifr_lc)) != 0) { + if ((err = tcp_lro_init_args(&rxq->ifr_lc, ctx->ifc_ifp, + TCP_LRO_ENTRIES, min(1024, + ctx->ifc_softc_ctx.isc_nrxd[rxq->ifr_fl_offset]))) != 0) { device_printf(ctx->ifc_dev, "LRO Initialization failed!\n"); goto fail; } rxq->ifr_lro_enabled = TRUE; - rxq->ifr_lc.ifp = ctx->ifc_ifp; #endif IFDI_RXQ_SETUP(ctx, rxq->ifr_id); } @@ -4142,7 +4265,7 @@ iflib_rx_structures_free(if_ctx_t ctx) { iflib_rxq_t rxq = ctx->ifc_rxqs; - for (int i = 0; i < ctx->ifc_softc_ctx.isc_ntxqsets; i++, rxq++) { + for (int i = 0; i < ctx->ifc_softc_ctx.isc_nrxqsets; i++, rxq++) { iflib_rx_sds_free(rxq); } } @@ -4193,7 +4316,7 @@ iflib_irq_alloc_generic(if_ctx_t ctx, if_irq_t irq, int rid, struct taskqgroup *tqg; iflib_filter_info_t info; cpuset_t cpus; - task_fn_t *fn; + gtask_fn_t *fn; int tqrid, err; void *q; @@ -4254,7 +4377,7 @@ iflib_softirq_alloc_generic(if_ctx_t ctx, int rid, iflib_intr_type_t type, void { struct grouptask *gtask; struct taskqgroup *tqg; - task_fn_t *fn; + gtask_fn_t *fn; void *q; switch (type) { @@ -4310,7 +4433,7 @@ iflib_legacy_setup(if_ctx_t ctx, driver_filter_t filter, void *filter_arg, int * iflib_filter_info_t info; struct grouptask *gtask; struct taskqgroup *tqg; - task_fn_t *fn; + gtask_fn_t *fn; int tqrid; void *q; int err; @@ -4385,7 +4508,7 @@ iflib_io_tqg_attach(struct grouptask *gt, void *uniq, int cpu, char *name) } void -iflib_config_gtask_init(if_ctx_t ctx, struct grouptask *gtask, task_fn_t *fn, +iflib_config_gtask_init(if_ctx_t ctx, struct grouptask *gtask, gtask_fn_t *fn, char *name) { @@ -4394,14 +4517,21 @@ iflib_config_gtask_init(if_ctx_t ctx, struct grouptask *gtask, task_fn_t *fn, } void -iflib_link_state_change(if_ctx_t ctx, int link_state) +iflib_config_gtask_deinit(struct grouptask *gtask) +{ + + taskqgroup_detach(qgroup_if_config_tqg, gtask); +} + +void +iflib_link_state_change(if_ctx_t ctx, int link_state, uint64_t baudrate) { if_t ifp = ctx->ifc_ifp; iflib_txq_t txq = ctx->ifc_txqs; -#if 0 + if_setbaudrate(ifp, baudrate); -#endif + /* If link down, disable watchdog */ if ((ctx->ifc_link_state == LINK_STATE_UP) && (link_state == LINK_STATE_DOWN)) { for (int i = 0; i < ctx->ifc_softc_ctx.isc_ntxqsets; i++, txq++) @@ -4431,10 +4561,11 @@ iflib_tx_credits_update(if_ctx_t ctx, iflib_txq_t txq) } static int -iflib_rxd_avail(if_ctx_t ctx, iflib_rxq_t rxq, int cidx) +iflib_rxd_avail(if_ctx_t ctx, iflib_rxq_t rxq, int cidx, int budget) { - return (ctx->isc_rxd_available(ctx->ifc_softc, rxq->ifr_id, cidx)); + return (ctx->isc_rxd_available(ctx->ifc_softc, rxq->ifr_id, cidx, + budget)); } void @@ -4468,8 +4599,9 @@ iflib_msix_init(if_ctx_t ctx) int iflib_num_tx_queues, iflib_num_rx_queues; int err, admincnt, bar; - iflib_num_tx_queues = ctx->ifc_sysctl_ntxqs; - iflib_num_rx_queues = ctx->ifc_sysctl_nrxqs; + iflib_num_tx_queues = scctx->isc_ntxqsets; + iflib_num_rx_queues = scctx->isc_nrxqsets; + bar = ctx->ifc_softc_ctx.isc_msix_bar; admincnt = sctx->isc_admin_intrcnt; /* Override by tuneable */ @@ -4549,18 +4681,31 @@ iflib_msix_init(if_ctx_t ctx) if (queues > rss_getnumbuckets()) queues = rss_getnumbuckets(); #endif - if (iflib_num_rx_queues > 0 && iflib_num_rx_queues < queues) - queues = rx_queues = iflib_num_rx_queues; + if (iflib_num_rx_queues > 0 && iflib_num_rx_queues < queuemsgs - admincnt) + rx_queues = iflib_num_rx_queues; else rx_queues = queues; + /* + * We want this to be all logical CPUs by default + */ if (iflib_num_tx_queues > 0 && iflib_num_tx_queues < queues) tx_queues = iflib_num_tx_queues; else - tx_queues = queues; + tx_queues = mp_ncpus; + + if (ctx->ifc_sysctl_qs_eq_override == 0) { +#ifdef INVARIANTS + if (tx_queues != rx_queues) + device_printf(dev, "queue equality override not set, capping rx_queues at %d and tx_queues at %d\n", + min(rx_queues, tx_queues), min(rx_queues, tx_queues)); +#endif + tx_queues = min(rx_queues, tx_queues); + rx_queues = min(rx_queues, tx_queues); + } device_printf(dev, "using %d rx queues %d tx queues \n", rx_queues, tx_queues); - vectors = queues + admincnt; + vectors = rx_queues + admincnt; if ((err = pci_alloc_msix(dev, &vectors)) == 0) { device_printf(dev, "Using MSIX interrupts with %d vectors\n", vectors); @@ -4568,6 +4713,7 @@ iflib_msix_init(if_ctx_t ctx) scctx->isc_nrxqsets = rx_queues; scctx->isc_ntxqsets = tx_queues; scctx->isc_intr = IFLIB_INTR_MSIX; + return (vectors); } else { device_printf(dev, "failed to allocate %d msix vectors, err: %d - using MSI\n", vectors, err); @@ -4617,7 +4763,58 @@ mp_ring_state_handler(SYSCTL_HANDLER_ARGS) return(rc); } +enum iflib_ndesc_handler { + IFLIB_NTXD_HANDLER, + IFLIB_NRXD_HANDLER, +}; +static int +mp_ndesc_handler(SYSCTL_HANDLER_ARGS) +{ + if_ctx_t ctx = (void *)arg1; + enum iflib_ndesc_handler type = arg2; + char buf[256] = {0}; + uint16_t *ndesc; + char *p, *next; + int nqs, rc, i; + + MPASS(type == IFLIB_NTXD_HANDLER || type == IFLIB_NRXD_HANDLER); + + nqs = 8; + switch(type) { + case IFLIB_NTXD_HANDLER: + ndesc = ctx->ifc_sysctl_ntxds; + if (ctx->ifc_sctx) + nqs = ctx->ifc_sctx->isc_ntxqs; + break; + case IFLIB_NRXD_HANDLER: + ndesc = ctx->ifc_sysctl_nrxds; + if (ctx->ifc_sctx) + nqs = ctx->ifc_sctx->isc_nrxqs; + break; + } + if (nqs == 0) + nqs = 8; + + for (i=0; i<8; i++) { + if (i >= nqs) + break; + if (i) + strcat(buf, ","); + sprintf(strchr(buf, 0), "%d", ndesc[i]); + } + + rc = sysctl_handle_string(oidp, buf, sizeof(buf), req); + if (rc || req->newptr == NULL) + return rc; + + for (i = 0, next = buf, p = strsep(&next, " ,"); i < 8 && p; + i++, p = strsep(&next, " ,")) { + ndesc[i] = strtoul(p, NULL, 10); + } + + return(rc); +} #define NAME_BUFLEN 32 static void @@ -4634,19 +4831,29 @@ iflib_add_device_sysctl_pre(if_ctx_t ctx) CTLFLAG_RD, NULL, "IFLIB fields"); oid_list = SYSCTL_CHILDREN(node); + SYSCTL_ADD_STRING(ctx_list, oid_list, OID_AUTO, "driver_version", + CTLFLAG_RD, ctx->ifc_sctx->isc_driver_version, 0, + "driver version"); + SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_ntxqs", CTLFLAG_RWTUN, &ctx->ifc_sysctl_ntxqs, 0, "# of txqs to use, 0 => use default #"); SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_nrxqs", - CTLFLAG_RWTUN, &ctx->ifc_sysctl_ntxqs, 0, - "# of txqs to use, 0 => use default #"); - SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_ntxds", - CTLFLAG_RWTUN, &ctx->ifc_sysctl_ntxds, 0, - "# of tx descriptors to use, 0 => use default #"); - SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_nrxds", - CTLFLAG_RWTUN, &ctx->ifc_sysctl_nrxds, 0, - "# of rx descriptors to use, 0 => use default #"); + CTLFLAG_RWTUN, &ctx->ifc_sysctl_nrxqs, 0, + "# of rxqs to use, 0 => use default #"); + SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_qs_enable", + CTLFLAG_RWTUN, &ctx->ifc_sysctl_qs_eq_override, 0, + "permit #txq != #rxq"); + /* XXX change for per-queue sizes */ + SYSCTL_ADD_PROC(ctx_list, oid_list, OID_AUTO, "override_ntxds", + CTLTYPE_STRING|CTLFLAG_RWTUN, ctx, IFLIB_NTXD_HANDLER, + mp_ndesc_handler, "A", + "list of # of tx descriptors to use, 0 = use default #"); + SYSCTL_ADD_PROC(ctx_list, oid_list, OID_AUTO, "override_nrxds", + CTLTYPE_STRING|CTLFLAG_RWTUN, ctx, IFLIB_NRXD_HANDLER, + mp_ndesc_handler, "A", + "list of # of rx descriptors to use, 0 = use default #"); } static void @@ -4700,7 +4907,7 @@ iflib_add_device_sysctl_post(if_ctx_t ctx) &txq->ift_mbuf_defrag_failed, "# of times m_defrag failed"); SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "no_desc_avail", CTLFLAG_RD, - &txq->ift_mbuf_defrag_failed, "# of times no descriptors were available"); + &txq->ift_no_desc_avail, "# of times no descriptors were available"); SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "tx_map_failed", CTLFLAG_RD, &txq->ift_map_failed, "# of times dma map failed"); @@ -4763,7 +4970,7 @@ iflib_add_device_sysctl_post(if_ctx_t ctx) queue_node = SYSCTL_ADD_NODE(ctx_list, child, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "Queue Name"); queue_list = SYSCTL_CHILDREN(queue_node); - if (sctx->isc_flags & IFLIB_HAS_CQ) { + if (sctx->isc_flags & IFLIB_HAS_RXCQ) { SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "rxq_cq_pidx", CTLFLAG_RD, &rxq->ifr_cq_pidx, 1, "Producer Index"); diff --git a/sys/net/iflib.h b/sys/net/iflib.h index c301b917897c..cf4a786baa1b 100644 --- a/sys/net/iflib.h +++ b/sys/net/iflib.h @@ -35,6 +35,7 @@ #include #include #include +#include /* @@ -63,12 +64,14 @@ typedef struct if_int_delay_info *if_int_delay_info_t; typedef struct if_rxd_frag { uint8_t irf_flid; uint16_t irf_idx; + uint16_t irf_len; } *if_rxd_frag_t; typedef struct if_rxd_info { /* set by iflib */ uint16_t iri_qsidx; /* qset index */ uint16_t iri_vtag; /* vlan tag - if flag set */ + /* XXX redundant with the new irf_len field */ uint16_t iri_len; /* packet length */ uint16_t iri_cidx; /* consumer index of cq */ struct ifnet *iri_ifp; /* some drivers >1 interface per softc */ @@ -156,10 +159,11 @@ typedef struct if_txrx { void (*ift_txd_flush) (void *, uint16_t, uint32_t); int (*ift_txd_credits_update) (void *, uint16_t, uint32_t, bool); - int (*ift_rxd_available) (void *, uint16_t qsidx, uint32_t pidx); + int (*ift_rxd_available) (void *, uint16_t qsidx, uint32_t pidx, + int budget); int (*ift_rxd_pkt_get) (void *, if_rxd_info_t ri); void (*ift_rxd_refill) (void * , uint16_t qsidx, uint8_t flidx, uint32_t pidx, - uint64_t *paddrs, caddr_t *vaddrs, uint16_t count); + uint64_t *paddrs, caddr_t *vaddrs, uint16_t count, uint16_t buf_size); void (*ift_rxd_flush) (void *, uint16_t qsidx, uint8_t flidx, uint32_t pidx); int (*ift_legacy_intr) (void *); } *if_txrx_t; @@ -170,11 +174,20 @@ typedef struct if_softc_ctx { int isc_ntxqsets; int isc_msix_bar; /* can be model specific - initialize in attach_pre */ int isc_tx_nsegments; /* can be model specific - initialize in attach_pre */ + int isc_ntxd[8]; + int isc_nrxd[8]; + + uint32_t isc_txqsizes[8]; + uint32_t isc_rxqsizes[8]; + int isc_max_txqsets; + int isc_max_rxqsets; int isc_tx_tso_segments_max; int isc_tx_tso_size_max; int isc_tx_tso_segsize_max; int isc_rss_table_size; int isc_rss_table_mask; + int isc_nrxqsets_max; + int isc_ntxqsets_max; iflib_intr_mode_t isc_intr; uint16_t isc_max_frame_size; /* set at init time by driver */ @@ -188,8 +201,6 @@ struct if_shared_ctx { int isc_magic; if_txrx_t isc_txrx; driver_t *isc_driver; - int isc_ntxd; - int isc_nrxd; int isc_nfl; int isc_flags; bus_size_t isc_q_align; @@ -199,14 +210,11 @@ struct if_shared_ctx { bus_size_t isc_rx_maxsegsize; int isc_rx_nsegments; int isc_rx_process_limit; - - - uint32_t isc_txqsizes[8]; int isc_ntxqs; /* # of tx queues per tx qset - usually 1 */ - uint32_t isc_rxqsizes[8]; int isc_nrxqs; /* # of rx queues per rx qset - intel 1, chelsio 2, broadcom 3 */ int isc_admin_intrcnt; /* # of admin/link interrupts */ + int isc_tx_reclaim_thresh; /* fields necessary for probe */ @@ -215,6 +223,12 @@ struct if_shared_ctx { /* optional function to transform the read values to match the table*/ void (*isc_parse_devinfo) (uint16_t *device_id, uint16_t *subvendor_id, uint16_t *subdevice_id, uint16_t *rev_id); + int isc_nrxd_min[8]; + int isc_nrxd_default[8]; + int isc_nrxd_max[8]; + int isc_ntxd_min[8]; + int isc_ntxd_default[8]; + int isc_ntxd_max[8]; }; typedef struct iflib_dma_info { @@ -240,9 +254,9 @@ typedef enum { /* - * Interface has a separate command queue + * Interface has a separate command queue for RX */ -#define IFLIB_HAS_CQ 0x1 +#define IFLIB_HAS_RXCQ 0x1 /* * Driver has already allocated vectors */ @@ -252,6 +266,10 @@ typedef enum { * Interface is a virtual function */ #define IFLIB_IS_VF 0x4 +/* + * Interface has a separate command queue for TX + */ +#define IFLIB_HAS_TXCQ 0x8 /* @@ -308,7 +326,10 @@ void iflib_irq_free(if_ctx_t ctx, if_irq_t irq); void iflib_io_tqg_attach(struct grouptask *gt, void *uniq, int cpu, char *name); void iflib_config_gtask_init(if_ctx_t ctx, struct grouptask *gtask, - task_fn_t *fn, char *name); + gtask_fn_t *fn, char *name); + +void iflib_config_gtask_deinit(struct grouptask *gtask); + void iflib_tx_intr_deferred(if_ctx_t ctx, int txqid); @@ -317,7 +338,7 @@ void iflib_admin_intr_deferred(if_ctx_t ctx); void iflib_iov_intr_deferred(if_ctx_t ctx); -void iflib_link_state_change(if_ctx_t ctx, int linkstate); +void iflib_link_state_change(if_ctx_t ctx, int linkstate, uint64_t baudrate); int iflib_dma_alloc(if_ctx_t ctx, int size, iflib_dma_info_t dma, int mapflags); void iflib_dma_free(iflib_dma_info_t dma); diff --git a/sys/sys/_task.h b/sys/sys/_task.h index ce8978199230..d3be7198dc2f 100644 --- a/sys/sys/_task.h +++ b/sys/sys/_task.h @@ -42,6 +42,7 @@ * (q) taskqueue lock */ typedef void task_fn_t(void *context, int pending); +typedef void gtask_fn_t(void *context); struct task { STAILQ_ENTRY(task) ta_link; /* (q) link for queue */ @@ -51,8 +52,16 @@ struct task { void *ta_context; /* (c) argument for handler */ }; +struct gtask { + STAILQ_ENTRY(gtask) ta_link; /* (q) link for queue */ + uint16_t ta_flags; /* (q) state flags */ + u_short ta_priority; /* (c) Priority */ + gtask_fn_t *ta_func; /* (c) task handler */ + void *ta_context; /* (c) argument for handler */ +}; + struct grouptask { - struct task gt_task; + struct gtask gt_task; void *gt_taskqueue; LIST_ENTRY(grouptask) gt_list; void *gt_uniq; diff --git a/sys/sys/gtaskqueue.h b/sys/sys/gtaskqueue.h new file mode 100644 index 000000000000..88d4b54cdbbf --- /dev/null +++ b/sys/sys/gtaskqueue.h @@ -0,0 +1,125 @@ +/*- + * Copyright (c) 2014 Jeffrey Roberson + * Copyright (c) 2016 Matthew Macy + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _SYS_GTASKQUEUE_H_ +#define _SYS_GTASKQUEUE_H_ +#include + +#ifndef _KERNEL +#error "no user-servicable parts inside" +#endif + +struct gtaskqueue; +typedef void (*gtaskqueue_enqueue_fn)(void *context); + +/* + * Taskqueue groups. Manages dynamic thread groups and irq binding for + * device and other tasks. + */ + +void gtaskqueue_block(struct gtaskqueue *queue); +void gtaskqueue_unblock(struct gtaskqueue *queue); + +int gtaskqueue_cancel(struct gtaskqueue *queue, struct gtask *gtask); +void gtaskqueue_drain(struct gtaskqueue *queue, struct gtask *task); +void gtaskqueue_drain_all(struct gtaskqueue *queue); + +int grouptaskqueue_enqueue(struct gtaskqueue *queue, struct gtask *task); +void taskqgroup_attach(struct taskqgroup *qgroup, struct grouptask *grptask, + void *uniq, int irq, char *name); +int taskqgroup_attach_cpu(struct taskqgroup *qgroup, struct grouptask *grptask, + void *uniq, int cpu, int irq, char *name); +void taskqgroup_detach(struct taskqgroup *qgroup, struct grouptask *gtask); +struct taskqgroup *taskqgroup_create(char *name); +void taskqgroup_destroy(struct taskqgroup *qgroup); +int taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride); + +#define TASK_ENQUEUED 0x1 +#define TASK_SKIP_WAKEUP 0x2 + + +#define GTASK_INIT(task, flags, priority, func, context) do { \ + (task)->ta_flags = flags; \ + (task)->ta_priority = (priority); \ + (task)->ta_func = (func); \ + (task)->ta_context = (context); \ +} while (0) + +#define GROUPTASK_INIT(gtask, priority, func, context) \ + GTASK_INIT(&(gtask)->gt_task, TASK_SKIP_WAKEUP, priority, func, context) + +#define GROUPTASK_ENQUEUE(gtask) \ + grouptaskqueue_enqueue((gtask)->gt_taskqueue, &(gtask)->gt_task) + +#define TASKQGROUP_DECLARE(name) \ +extern struct taskqgroup *qgroup_##name + + +#ifdef EARLY_AP_STARTUP +#define TASKQGROUP_DEFINE(name, cnt, stride) \ + \ +struct taskqgroup *qgroup_##name; \ + \ +static void \ +taskqgroup_define_##name(void *arg) \ +{ \ + qgroup_##name = taskqgroup_create(#name); \ + taskqgroup_adjust(qgroup_##name, (cnt), (stride)); \ +} \ + \ +SYSINIT(taskqgroup_##name, SI_SUB_INIT_IF, SI_ORDER_FIRST, \ + taskqgroup_define_##name, NULL) +#else +#define TASKQGROUP_DEFINE(name, cnt, stride) \ + \ +struct taskqgroup *qgroup_##name; \ + \ +static void \ +taskqgroup_define_##name(void *arg) \ +{ \ + qgroup_##name = taskqgroup_create(#name); \ +} \ + \ +SYSINIT(taskqgroup_##name, SI_SUB_INIT_IF, SI_ORDER_FIRST, \ + taskqgroup_define_##name, NULL); \ + \ +static void \ +taskqgroup_adjust_##name(void *arg) \ +{ \ + taskqgroup_adjust(qgroup_##name, (cnt), (stride)); \ +} \ + \ +SYSINIT(taskqgroup_adj_##name, SI_SUB_SMP, SI_ORDER_ANY, \ + taskqgroup_adjust_##name, NULL); \ + \ +struct __hack +#endif +TASKQGROUP_DECLARE(net); + +#endif /* !_SYS_GTASKQUEUE_H_ */ diff --git a/sys/sys/taskqueue.h b/sys/sys/taskqueue.h index c986ffb4171f..a6c6655832ec 100644 --- a/sys/sys/taskqueue.h +++ b/sys/sys/taskqueue.h @@ -204,78 +204,4 @@ struct taskqueue *taskqueue_create_fast(const char *name, int mflags, taskqueue_enqueue_fn enqueue, void *context); -/* - * Taskqueue groups. Manages dynamic thread groups and irq binding for - * device and other tasks. - */ -int grouptaskqueue_enqueue(struct taskqueue *queue, struct task *task); -void taskqgroup_attach(struct taskqgroup *qgroup, struct grouptask *gtask, - void *uniq, int irq, char *name); -int taskqgroup_attach_cpu(struct taskqgroup *qgroup, struct grouptask *gtask, - void *uniq, int cpu, int irq, char *name); -void taskqgroup_detach(struct taskqgroup *qgroup, struct grouptask *gtask); -struct taskqgroup *taskqgroup_create(char *name); -void taskqgroup_destroy(struct taskqgroup *qgroup); -int taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride); - -#define TASK_SKIP_WAKEUP 0x1 - -#define GTASK_INIT(task, priority, func, context) do { \ - (task)->ta_pending = 0; \ - (task)->ta_priority = (priority); \ - (task)->ta_func = (func); \ - (task)->ta_context = (context); \ -} while (0) - -#define GROUPTASK_INIT(gtask, priority, func, context) \ - GTASK_INIT(&(gtask)->gt_task, priority, func, context) - -#define GROUPTASK_ENQUEUE(gtask) \ - grouptaskqueue_enqueue((gtask)->gt_taskqueue, &(gtask)->gt_task) - -#define TASKQGROUP_DECLARE(name) \ -extern struct taskqgroup *qgroup_##name - -#ifdef EARLY_AP_STARTUP -#define TASKQGROUP_DEFINE(name, cnt, stride) \ - \ -struct taskqgroup *qgroup_##name; \ - \ -static void \ -taskqgroup_define_##name(void *arg) \ -{ \ - qgroup_##name = taskqgroup_create(#name); \ - taskqgroup_adjust(qgroup_##name, (cnt), (stride)); \ -} \ - \ -SYSINIT(taskqgroup_##name, SI_SUB_INIT_IF, SI_ORDER_FIRST, \ - taskqgroup_define_##name, NULL) -#else -#define TASKQGROUP_DEFINE(name, cnt, stride) \ - \ -struct taskqgroup *qgroup_##name; \ - \ -static void \ -taskqgroup_define_##name(void *arg) \ -{ \ - qgroup_##name = taskqgroup_create(#name); \ -} \ - \ -SYSINIT(taskqgroup_##name, SI_SUB_INIT_IF, SI_ORDER_FIRST, \ - taskqgroup_define_##name, NULL); \ - \ -static void \ -taskqgroup_adjust_##name(void *arg) \ -{ \ - taskqgroup_adjust(qgroup_##name, (cnt), (stride)); \ -} \ - \ -SYSINIT(taskqgroup_adj_##name, SI_SUB_SMP, SI_ORDER_ANY, \ - taskqgroup_adjust_##name, NULL); \ - \ -struct __hack -#endif - -TASKQGROUP_DECLARE(net); - #endif /* !_SYS_TASKQUEUE_H_ */ From f83cc0aae4f22207056bf41aaad34799f240081a Mon Sep 17 00:00:00 2001 From: Edward Tomasz Napierala Date: Fri, 12 Aug 2016 22:20:52 +0000 Subject: [PATCH 71/76] Print vnode details when vnode locking assertion gets triggered. MFC after: 1 month --- sys/kern/vfs_subr.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c index b369b9760ace..be6163c55a4b 100644 --- a/sys/kern/vfs_subr.c +++ b/sys/kern/vfs_subr.c @@ -4402,6 +4402,10 @@ int vfs_badlock_print = 1; /* Print lock violations. */ SYSCTL_INT(_debug, OID_AUTO, vfs_badlock_print, CTLFLAG_RW, &vfs_badlock_print, 0, "Print lock violations"); +int vfs_badlock_vnode = 1; /* Print vnode details on lock violations. */ +SYSCTL_INT(_debug, OID_AUTO, vfs_badlock_vnode, CTLFLAG_RW, &vfs_badlock_vnode, + 0, "Print vnode details on lock violations"); + #ifdef KDB int vfs_badlock_backtrace = 1; /* Print backtrace at lock violations. */ SYSCTL_INT(_debug, OID_AUTO, vfs_badlock_backtrace, CTLFLAG_RW, @@ -4416,6 +4420,8 @@ vfs_badlock(const char *msg, const char *str, struct vnode *vp) if (vfs_badlock_backtrace) kdb_backtrace(); #endif + if (vfs_badlock_vnode) + vn_printf(vp, "vnode "); if (vfs_badlock_print) printf("%s: %p %s\n", str, (void *)vp, msg); if (vfs_badlock_ddb) From f8acef5a3e15f9d72e824a687a687ea67918f327 Mon Sep 17 00:00:00 2001 From: Edward Tomasz Napierala Date: Fri, 12 Aug 2016 22:22:11 +0000 Subject: [PATCH 72/76] Remove unused "X" vnode lock assertion, somehow missed in r303743. MFC after: 1 month --- sys/kern/vnode_if.src | 1 - 1 file changed, 1 deletion(-) diff --git a/sys/kern/vnode_if.src b/sys/kern/vnode_if.src index b55336255e90..9f3aa140d7ed 100644 --- a/sys/kern/vnode_if.src +++ b/sys/kern/vnode_if.src @@ -44,7 +44,6 @@ # U: unlocked. # -: not applicable. vnode does not yet (or no longer) exists. # =: the same on input and output, may be either L or U. -# X: locked if not nil. # # The paramater named "vpp" is assumed to be always used with double # indirection (**vpp) and that name is hard-coded in vnode_if.awk ! From 1c214db6ca0eaf877f10738decdf019eecc826f3 Mon Sep 17 00:00:00 2001 From: Toomas Soome Date: Fri, 12 Aug 2016 22:25:01 +0000 Subject: [PATCH 73/76] Reviewed by: allanjude Approved by: allanjude (mentor) Differential Revision: https://reviews.freebsd.org/D7491 --- share/misc/committers-src.dot | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/share/misc/committers-src.dot b/share/misc/committers-src.dot index ef0e0c955512..bab00b0a4852 100644 --- a/share/misc/committers-src.dot +++ b/share/misc/committers-src.dot @@ -312,6 +312,7 @@ theraven [label="David Chisnall\ntheraven@FreeBSD.org\n2011/11/11"] thompsa [label="Andrew Thompson\nthompsa@FreeBSD.org\n2005/05/25"] ticso [label="Bernd Walter\nticso@FreeBSD.org\n2002/01/31"] tijl [label="Tijl Coosemans\ntijl@FreeBSD.org\n2010/07/16"] +tsoome [label="Toomas Soome\ntsoome@FreeBSD.org\n2016/08/10"] trasz [label="Edward Tomasz Napierala\ntrasz@FreeBSD.org\n2008/08/22"] trhodes [label="Tom Rhodes\ntrhodes@FreeBSD.org\n2002/05/28"] trociny [label="Mikolaj Golub\ntrociny@FreeBSD.org\n2011/03/10"] @@ -363,6 +364,8 @@ adrian -> sgalabov ae -> melifaro +allanjude -> tsoome + alc -> davide andre -> qingli @@ -520,6 +523,7 @@ imp -> sanpei imp -> shiba imp -> takawata imp -> toshi +imp -> tsoome imp -> uch jake -> bms From 1b819cf2651636a85d478c5df6be98113f83bc33 Mon Sep 17 00:00:00 2001 From: Rick Macklem Date: Fri, 12 Aug 2016 22:44:59 +0000 Subject: [PATCH 74/76] Update the nfsstats structure to include the changes needed by the patch in D1626 plus changes so that it includes counts for NFSv4.1 (and the draft of NFSv4.2). Also, make all the counts uint64_t and add a vers field at the beginning, so that future revisions can easily be implemented. There is code in place to handle the old vesion of the nfsstats structure for backwards binary compatibility. Subsequent commits will update nfsstat(8) to use the new fields. Submitted by: will (earlier version) Reviewed by: ken MFC after: 1 month Relnotes: yes Differential Revision: https://reviews.freebsd.org/D1626 --- sys/fs/nfs/nfs_commonkrpc.c | 12 +- sys/fs/nfs/nfs_commonport.c | 208 ++++++++++++++++++++++-------- sys/fs/nfs/nfsport.h | 102 ++++++++++++--- sys/fs/nfs/nfsproto.h | 4 +- sys/fs/nfsclient/nfs_clbio.c | 18 +-- sys/fs/nfsclient/nfs_clcomsubs.c | 6 +- sys/fs/nfsclient/nfs_clstate.c | 58 ++++----- sys/fs/nfsclient/nfs_clsubs.c | 6 +- sys/fs/nfsclient/nfs_clvfsops.c | 1 - sys/fs/nfsclient/nfs_clvnops.c | 26 ++-- sys/fs/nfsserver/nfs_nfsdcache.c | 36 +++--- sys/fs/nfsserver/nfs_nfsdport.c | 5 + sys/fs/nfsserver/nfs_nfsdsocket.c | 103 +++++++++++++-- sys/fs/nfsserver/nfs_nfsdstate.c | 50 +++---- 14 files changed, 439 insertions(+), 196 deletions(-) diff --git a/sys/fs/nfs/nfs_commonkrpc.c b/sys/fs/nfs/nfs_commonkrpc.c index 1a50ec8685fa..ad77f5edbf82 100644 --- a/sys/fs/nfs/nfs_commonkrpc.c +++ b/sys/fs/nfs/nfs_commonkrpc.c @@ -89,7 +89,7 @@ uint32_t nfscl_nfs4_done_probes[NFSV41_NPROCS + 1]; NFSSTATESPINLOCK; NFSREQSPINLOCK; NFSDLOCKMUTEX; -extern struct nfsstats newnfsstats; +extern struct nfsstatsv1 nfsstatsv1; extern struct nfsreqhead nfsd_reqq; extern int nfscl_ticks; extern void (*ncl_call_invalcaches)(struct vnode *); @@ -642,7 +642,7 @@ newnfs_request(struct nfsrv_descript *nd, struct nfsmount *nmp, procnum = NFSV4PROC_COMPOUND; if (nmp != NULL) { - NFSINCRGLOBAL(newnfsstats.rpcrequests); + NFSINCRGLOBAL(nfsstatsv1.rpcrequests); /* Map the procnum to the old NFSv2 one, as required. */ if ((nd->nd_flag & ND_NFSV2) != 0) { @@ -762,18 +762,18 @@ newnfs_request(struct nfsrv_descript *nd, struct nfsmount *nmp, if (stat == RPC_SUCCESS) { error = 0; } else if (stat == RPC_TIMEDOUT) { - NFSINCRGLOBAL(newnfsstats.rpctimeouts); + NFSINCRGLOBAL(nfsstatsv1.rpctimeouts); error = ETIMEDOUT; } else if (stat == RPC_VERSMISMATCH) { - NFSINCRGLOBAL(newnfsstats.rpcinvalid); + NFSINCRGLOBAL(nfsstatsv1.rpcinvalid); error = EOPNOTSUPP; } else if (stat == RPC_PROGVERSMISMATCH) { - NFSINCRGLOBAL(newnfsstats.rpcinvalid); + NFSINCRGLOBAL(nfsstatsv1.rpcinvalid); error = EPROTONOSUPPORT; } else if (stat == RPC_INTR) { error = EINTR; } else { - NFSINCRGLOBAL(newnfsstats.rpcinvalid); + NFSINCRGLOBAL(nfsstatsv1.rpcinvalid); error = EACCES; } if (error) { diff --git a/sys/fs/nfs/nfs_commonport.c b/sys/fs/nfs/nfs_commonport.c index a22073f87c20..23a861d4a9b1 100644 --- a/sys/fs/nfs/nfs_commonport.c +++ b/sys/fs/nfs/nfs_commonport.c @@ -58,7 +58,7 @@ extern void (*nfsd_call_recall)(struct vnode *, int, struct ucred *, extern int nfsrv_useacl; struct mount nfsv4root_mnt; int newnfs_numnfsd = 0; -struct nfsstats newnfsstats; +struct nfsstatsv1 nfsstatsv1; int nfs_numnfscbd = 0; int nfscl_debuglevel = 0; char nfsv4_callbackaddr[INET6_ADDRSTRLEN]; @@ -69,6 +69,7 @@ void (*ncl_call_invalcaches)(struct vnode *) = NULL; static int nfs_realign_test; static int nfs_realign_count; +static struct ext_nfsstats oldnfsstats; SYSCTL_NODE(_vfs, OID_AUTO, nfs, CTLFLAG_RW, 0, "NFS filesystem"); SYSCTL_INT(_vfs_nfs, OID_AUTO, realign_test, CTLFLAG_RW, &nfs_realign_test, @@ -446,9 +447,12 @@ nfssvc_nfscommon(struct thread *td, struct nfssvc_args *uap) static int nfssvc_call(struct thread *p, struct nfssvc_args *uap, struct ucred *cred) { - int error = EINVAL; + int error = EINVAL, i, j; struct nfsd_idargs nid; struct nfsd_oidargs onid; + struct { + int vers; /* Just the first field of nfsstats. */ + } nfsstatver; if (uap->flag & NFSSVC_IDNAME) { if ((uap->flag & NFSSVC_NEWSTRUCT) != 0) @@ -472,63 +476,157 @@ nfssvc_call(struct thread *p, struct nfssvc_args *uap, struct ucred *cred) error = nfssvc_idname(&nid); goto out; } else if (uap->flag & NFSSVC_GETSTATS) { - error = copyout(&newnfsstats, - CAST_USER_ADDR_T(uap->argp), sizeof (newnfsstats)); + if ((uap->flag & NFSSVC_NEWSTRUCT) == 0) { + /* Copy fields to the old ext_nfsstat structure. */ + oldnfsstats.attrcache_hits = + nfsstatsv1.attrcache_hits; + oldnfsstats.attrcache_misses = + nfsstatsv1.attrcache_misses; + oldnfsstats.lookupcache_hits = + nfsstatsv1.lookupcache_hits; + oldnfsstats.lookupcache_misses = + nfsstatsv1.lookupcache_misses; + oldnfsstats.direofcache_hits = + nfsstatsv1.direofcache_hits; + oldnfsstats.direofcache_misses = + nfsstatsv1.direofcache_misses; + oldnfsstats.accesscache_hits = + nfsstatsv1.accesscache_hits; + oldnfsstats.accesscache_misses = + nfsstatsv1.accesscache_misses; + oldnfsstats.biocache_reads = + nfsstatsv1.biocache_reads; + oldnfsstats.read_bios = + nfsstatsv1.read_bios; + oldnfsstats.read_physios = + nfsstatsv1.read_physios; + oldnfsstats.biocache_writes = + nfsstatsv1.biocache_writes; + oldnfsstats.write_bios = + nfsstatsv1.write_bios; + oldnfsstats.write_physios = + nfsstatsv1.write_physios; + oldnfsstats.biocache_readlinks = + nfsstatsv1.biocache_readlinks; + oldnfsstats.readlink_bios = + nfsstatsv1.readlink_bios; + oldnfsstats.biocache_readdirs = + nfsstatsv1.biocache_readdirs; + oldnfsstats.readdir_bios = + nfsstatsv1.readdir_bios; + for (i = 0; i < NFSV4_NPROCS; i++) + oldnfsstats.rpccnt[i] = nfsstatsv1.rpccnt[i]; + oldnfsstats.rpcretries = nfsstatsv1.rpcretries; + for (i = 0; i < NFSV4OP_NOPS; i++) + oldnfsstats.srvrpccnt[i] = + nfsstatsv1.srvrpccnt[i]; + for (i = NFSV42_NOPS, j = NFSV4OP_NOPS; + i < NFSV42_NOPS + NFSV4OP_FAKENOPS; i++, j++) + oldnfsstats.srvrpccnt[j] = + nfsstatsv1.srvrpccnt[i]; + oldnfsstats.srvrpc_errs = nfsstatsv1.srvrpc_errs; + oldnfsstats.srv_errs = nfsstatsv1.srv_errs; + oldnfsstats.rpcrequests = nfsstatsv1.rpcrequests; + oldnfsstats.rpctimeouts = nfsstatsv1.rpctimeouts; + oldnfsstats.rpcunexpected = nfsstatsv1.rpcunexpected; + oldnfsstats.rpcinvalid = nfsstatsv1.rpcinvalid; + oldnfsstats.srvcache_inproghits = + nfsstatsv1.srvcache_inproghits; + oldnfsstats.srvcache_idemdonehits = + nfsstatsv1.srvcache_idemdonehits; + oldnfsstats.srvcache_nonidemdonehits = + nfsstatsv1.srvcache_nonidemdonehits; + oldnfsstats.srvcache_misses = + nfsstatsv1.srvcache_misses; + oldnfsstats.srvcache_tcppeak = + nfsstatsv1.srvcache_tcppeak; + oldnfsstats.srvcache_size = nfsstatsv1.srvcache_size; + oldnfsstats.srvclients = nfsstatsv1.srvclients; + oldnfsstats.srvopenowners = nfsstatsv1.srvopenowners; + oldnfsstats.srvopens = nfsstatsv1.srvopens; + oldnfsstats.srvlockowners = nfsstatsv1.srvlockowners; + oldnfsstats.srvlocks = nfsstatsv1.srvlocks; + oldnfsstats.srvdelegates = nfsstatsv1.srvdelegates; + for (i = 0; i < NFSV4OP_CBNOPS; i++) + oldnfsstats.cbrpccnt[i] = + nfsstatsv1.cbrpccnt[i]; + oldnfsstats.clopenowners = nfsstatsv1.clopenowners; + oldnfsstats.clopens = nfsstatsv1.clopens; + oldnfsstats.cllockowners = nfsstatsv1.cllockowners; + oldnfsstats.cllocks = nfsstatsv1.cllocks; + oldnfsstats.cldelegates = nfsstatsv1.cldelegates; + oldnfsstats.cllocalopenowners = + nfsstatsv1.cllocalopenowners; + oldnfsstats.cllocalopens = nfsstatsv1.cllocalopens; + oldnfsstats.cllocallockowners = + nfsstatsv1.cllocallockowners; + oldnfsstats.cllocallocks = nfsstatsv1.cllocallocks; + error = copyout(&oldnfsstats, uap->argp, + sizeof (oldnfsstats)); + } else { + error = copyin(uap->argp, &nfsstatver, + sizeof(nfsstatver)); + if (error == 0 && nfsstatver.vers != NFSSTATS_V1) + error = EPERM; + if (error == 0) + error = copyout(&nfsstatsv1, uap->argp, + sizeof (nfsstatsv1)); + } if (error == 0) { if ((uap->flag & NFSSVC_ZEROCLTSTATS) != 0) { - newnfsstats.attrcache_hits = 0; - newnfsstats.attrcache_misses = 0; - newnfsstats.lookupcache_hits = 0; - newnfsstats.lookupcache_misses = 0; - newnfsstats.direofcache_hits = 0; - newnfsstats.direofcache_misses = 0; - newnfsstats.accesscache_hits = 0; - newnfsstats.accesscache_misses = 0; - newnfsstats.biocache_reads = 0; - newnfsstats.read_bios = 0; - newnfsstats.read_physios = 0; - newnfsstats.biocache_writes = 0; - newnfsstats.write_bios = 0; - newnfsstats.write_physios = 0; - newnfsstats.biocache_readlinks = 0; - newnfsstats.readlink_bios = 0; - newnfsstats.biocache_readdirs = 0; - newnfsstats.readdir_bios = 0; - newnfsstats.rpcretries = 0; - newnfsstats.rpcrequests = 0; - newnfsstats.rpctimeouts = 0; - newnfsstats.rpcunexpected = 0; - newnfsstats.rpcinvalid = 0; - bzero(newnfsstats.rpccnt, - sizeof(newnfsstats.rpccnt)); + nfsstatsv1.attrcache_hits = 0; + nfsstatsv1.attrcache_misses = 0; + nfsstatsv1.lookupcache_hits = 0; + nfsstatsv1.lookupcache_misses = 0; + nfsstatsv1.direofcache_hits = 0; + nfsstatsv1.direofcache_misses = 0; + nfsstatsv1.accesscache_hits = 0; + nfsstatsv1.accesscache_misses = 0; + nfsstatsv1.biocache_reads = 0; + nfsstatsv1.read_bios = 0; + nfsstatsv1.read_physios = 0; + nfsstatsv1.biocache_writes = 0; + nfsstatsv1.write_bios = 0; + nfsstatsv1.write_physios = 0; + nfsstatsv1.biocache_readlinks = 0; + nfsstatsv1.readlink_bios = 0; + nfsstatsv1.biocache_readdirs = 0; + nfsstatsv1.readdir_bios = 0; + nfsstatsv1.rpcretries = 0; + nfsstatsv1.rpcrequests = 0; + nfsstatsv1.rpctimeouts = 0; + nfsstatsv1.rpcunexpected = 0; + nfsstatsv1.rpcinvalid = 0; + bzero(nfsstatsv1.rpccnt, + sizeof(nfsstatsv1.rpccnt)); } if ((uap->flag & NFSSVC_ZEROSRVSTATS) != 0) { - newnfsstats.srvrpc_errs = 0; - newnfsstats.srv_errs = 0; - newnfsstats.srvcache_inproghits = 0; - newnfsstats.srvcache_idemdonehits = 0; - newnfsstats.srvcache_nonidemdonehits = 0; - newnfsstats.srvcache_misses = 0; - newnfsstats.srvcache_tcppeak = 0; - newnfsstats.srvclients = 0; - newnfsstats.srvopenowners = 0; - newnfsstats.srvopens = 0; - newnfsstats.srvlockowners = 0; - newnfsstats.srvlocks = 0; - newnfsstats.srvdelegates = 0; - newnfsstats.clopenowners = 0; - newnfsstats.clopens = 0; - newnfsstats.cllockowners = 0; - newnfsstats.cllocks = 0; - newnfsstats.cldelegates = 0; - newnfsstats.cllocalopenowners = 0; - newnfsstats.cllocalopens = 0; - newnfsstats.cllocallockowners = 0; - newnfsstats.cllocallocks = 0; - bzero(newnfsstats.srvrpccnt, - sizeof(newnfsstats.srvrpccnt)); - bzero(newnfsstats.cbrpccnt, - sizeof(newnfsstats.cbrpccnt)); + nfsstatsv1.srvrpc_errs = 0; + nfsstatsv1.srv_errs = 0; + nfsstatsv1.srvcache_inproghits = 0; + nfsstatsv1.srvcache_idemdonehits = 0; + nfsstatsv1.srvcache_nonidemdonehits = 0; + nfsstatsv1.srvcache_misses = 0; + nfsstatsv1.srvcache_tcppeak = 0; + nfsstatsv1.srvclients = 0; + nfsstatsv1.srvopenowners = 0; + nfsstatsv1.srvopens = 0; + nfsstatsv1.srvlockowners = 0; + nfsstatsv1.srvlocks = 0; + nfsstatsv1.srvdelegates = 0; + nfsstatsv1.clopenowners = 0; + nfsstatsv1.clopens = 0; + nfsstatsv1.cllockowners = 0; + nfsstatsv1.cllocks = 0; + nfsstatsv1.cldelegates = 0; + nfsstatsv1.cllocalopenowners = 0; + nfsstatsv1.cllocalopens = 0; + nfsstatsv1.cllocallockowners = 0; + nfsstatsv1.cllocallocks = 0; + bzero(nfsstatsv1.srvrpccnt, + sizeof(nfsstatsv1.srvrpccnt)); + bzero(nfsstatsv1.cbrpccnt, + sizeof(nfsstatsv1.cbrpccnt)); } } goto out; diff --git a/sys/fs/nfs/nfsport.h b/sys/fs/nfs/nfsport.h index 6b41e2fbd915..3bfbb1c01f68 100644 --- a/sys/fs/nfs/nfsport.h +++ b/sys/fs/nfs/nfsport.h @@ -55,6 +55,7 @@ #include #include #include +#include #include #include #include @@ -254,24 +255,26 @@ /* * Must be one more than last op#. + * NFSv4.2 isn't implemented yet, but define the op# limit for it. */ #define NFSV41_NOPS 59 +#define NFSV42_NOPS 72 /* Quirky case if the illegal op code */ #define NFSV4OP_OPILLEGAL 10044 /* - * Fake NFSV4OP_xxx used for nfsstat. Start at NFSV4OP_NOPS. + * Fake NFSV4OP_xxx used for nfsstat. Start at NFSV42_NOPS. */ -#define NFSV4OP_SYMLINK (NFSV4OP_NOPS) -#define NFSV4OP_MKDIR (NFSV4OP_NOPS + 1) -#define NFSV4OP_RMDIR (NFSV4OP_NOPS + 2) -#define NFSV4OP_READDIRPLUS (NFSV4OP_NOPS + 3) -#define NFSV4OP_MKNOD (NFSV4OP_NOPS + 4) -#define NFSV4OP_FSSTAT (NFSV4OP_NOPS + 5) -#define NFSV4OP_FSINFO (NFSV4OP_NOPS + 6) -#define NFSV4OP_PATHCONF (NFSV4OP_NOPS + 7) -#define NFSV4OP_V3CREATE (NFSV4OP_NOPS + 8) +#define NFSV4OP_SYMLINK (NFSV42_NOPS) +#define NFSV4OP_MKDIR (NFSV42_NOPS + 1) +#define NFSV4OP_RMDIR (NFSV42_NOPS + 2) +#define NFSV4OP_READDIRPLUS (NFSV42_NOPS + 3) +#define NFSV4OP_MKNOD (NFSV42_NOPS + 4) +#define NFSV4OP_FSSTAT (NFSV42_NOPS + 5) +#define NFSV4OP_FSINFO (NFSV42_NOPS + 6) +#define NFSV4OP_PATHCONF (NFSV42_NOPS + 7) +#define NFSV4OP_V3CREATE (NFSV42_NOPS + 8) /* * This is the count of the fake operations listed above. @@ -285,12 +288,12 @@ #define NFSV4OP_CBRECALL 4 /* - * Must be one greater than the last Callback Operation#. + * Must be one greater than the last Callback Operation# for NFSv4.0. */ #define NFSV4OP_CBNOPS 5 /* - * Additional Callback Ops for NFSv4.1 only. Not yet in nfsstats. + * Additional Callback Ops for NFSv4.1 only. */ #define NFSV4OP_CBLAYOUTRECALL 5 #define NFSV4OP_CBNOTIFY 6 @@ -303,6 +306,9 @@ #define NFSV4OP_CBNOTIFYLOCK 13 #define NFSV4OP_CBNOTIFYDEVID 14 +#define NFSV41_CBNOPS 15 +#define NFSV42_CBNOPS 16 + /* * The lower numbers -> 21 are used by NFSv2 and v3. These define higher * numbers used by NFSv4. @@ -360,7 +366,72 @@ #endif /* NFS_V3NPROCS */ /* - * Stats structure + * New stats structure. + * The vers field will be set to NFSSTATS_V1 by the caller. + */ +#define NFSSTATS_V1 1 +struct nfsstatsv1 { + int vers; /* Set to version requested by caller. */ + uint64_t attrcache_hits; + uint64_t attrcache_misses; + uint64_t lookupcache_hits; + uint64_t lookupcache_misses; + uint64_t direofcache_hits; + uint64_t direofcache_misses; + uint64_t accesscache_hits; + uint64_t accesscache_misses; + uint64_t biocache_reads; + uint64_t read_bios; + uint64_t read_physios; + uint64_t biocache_writes; + uint64_t write_bios; + uint64_t write_physios; + uint64_t biocache_readlinks; + uint64_t readlink_bios; + uint64_t biocache_readdirs; + uint64_t readdir_bios; + uint64_t rpccnt[NFSV41_NPROCS + 15]; + uint64_t rpcretries; + uint64_t srvrpccnt[NFSV42_NOPS + NFSV4OP_FAKENOPS]; + uint64_t srvrpc_errs; + uint64_t srv_errs; + uint64_t rpcrequests; + uint64_t rpctimeouts; + uint64_t rpcunexpected; + uint64_t rpcinvalid; + uint64_t srvcache_inproghits; + uint64_t srvcache_idemdonehits; + uint64_t srvcache_nonidemdonehits; + uint64_t srvcache_misses; + uint64_t srvcache_tcppeak; + int srvcache_size; /* Updated by atomic_xx_int(). */ + uint64_t srvclients; + uint64_t srvopenowners; + uint64_t srvopens; + uint64_t srvlockowners; + uint64_t srvlocks; + uint64_t srvdelegates; + uint64_t cbrpccnt[NFSV42_CBNOPS]; + uint64_t clopenowners; + uint64_t clopens; + uint64_t cllockowners; + uint64_t cllocks; + uint64_t cldelegates; + uint64_t cllocalopenowners; + uint64_t cllocalopens; + uint64_t cllocallockowners; + uint64_t cllocallocks; + uint64_t srvstartcnt; + uint64_t srvdonecnt; + uint64_t srvbytes[NFSV42_NOPS + NFSV4OP_FAKENOPS]; + uint64_t srvops[NFSV42_NOPS + NFSV4OP_FAKENOPS]; + struct bintime srvduration[NFSV42_NOPS + NFSV4OP_FAKENOPS]; + struct bintime busyfrom; + struct bintime busytime; +}; + +/* + * Old stats structure. */ struct ext_nfsstats { int attrcache_hits; @@ -415,11 +486,6 @@ struct ext_nfsstats { }; #ifdef _KERNEL -/* - * Define the ext_nfsstats as nfsstats for the kernel code. - */ -#define nfsstats ext_nfsstats - /* * Define NFS_NPROCS as NFSV4_NPROCS for the experimental kernel code. */ diff --git a/sys/fs/nfs/nfsproto.h b/sys/fs/nfs/nfsproto.h index 08e7b24b5b1d..daa49a09eec1 100644 --- a/sys/fs/nfs/nfsproto.h +++ b/sys/fs/nfs/nfsproto.h @@ -345,10 +345,10 @@ /* * NFSPROC_NOOP is a fake op# that can't be the same as any V2/3/4 Procedure - * or Operation#. Since the NFS V4 Op #s go higher, use NFSV41_NOPS, which + * or Operation#. Since the NFS V4 Op #s go higher, use NFSV42_NOPS, which * is one greater than the highest Op#. */ -#define NFSPROC_NOOP NFSV41_NOPS +#define NFSPROC_NOOP NFSV42_NOPS /* Actual Version 2 procedure numbers */ #define NFSV2PROC_NULL 0 diff --git a/sys/fs/nfsclient/nfs_clbio.c b/sys/fs/nfsclient/nfs_clbio.c index c804393ed8ed..492a072f4bdc 100644 --- a/sys/fs/nfsclient/nfs_clbio.c +++ b/sys/fs/nfsclient/nfs_clbio.c @@ -60,7 +60,7 @@ __FBSDID("$FreeBSD$"); #include extern int newnfs_directio_allow_mmap; -extern struct nfsstats newnfsstats; +extern struct nfsstatsv1 nfsstatsv1; extern struct mtx ncl_iod_mutex; extern int ncl_numasync; extern enum nfsiod_state ncl_iodwant[NFS_MAXASYNCDAEMON]; @@ -466,7 +466,7 @@ ncl_bioread(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *cred) switch (vp->v_type) { case VREG: - NFSINCRGLOBAL(newnfsstats.biocache_reads); + NFSINCRGLOBAL(nfsstatsv1.biocache_reads); lbn = uio->uio_offset / biosize; on = uio->uio_offset - (lbn * biosize); @@ -543,7 +543,7 @@ ncl_bioread(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *cred) n = MIN((unsigned)(bcount - on), uio->uio_resid); break; case VLNK: - NFSINCRGLOBAL(newnfsstats.biocache_readlinks); + NFSINCRGLOBAL(nfsstatsv1.biocache_readlinks); bp = nfs_getcacheblk(vp, (daddr_t)0, NFS_MAXPATHLEN, td); if (!bp) { error = newnfs_sigintr(nmp, td); @@ -563,7 +563,7 @@ ncl_bioread(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *cred) on = 0; break; case VDIR: - NFSINCRGLOBAL(newnfsstats.biocache_readdirs); + NFSINCRGLOBAL(nfsstatsv1.biocache_readdirs); if (np->n_direofoffset && uio->uio_offset >= np->n_direofoffset) { return (0); @@ -992,7 +992,7 @@ ncl_write(struct vop_write_args *ap) } } - NFSINCRGLOBAL(newnfsstats.biocache_writes); + NFSINCRGLOBAL(nfsstatsv1.biocache_writes); lbn = uio->uio_offset / biosize; on = uio->uio_offset - (lbn * biosize); n = MIN((unsigned)(biosize - on), uio->uio_resid); @@ -1606,7 +1606,7 @@ ncl_doio(struct vnode *vp, struct buf *bp, struct ucred *cr, struct thread *td, switch (vp->v_type) { case VREG: uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE; - NFSINCRGLOBAL(newnfsstats.read_bios); + NFSINCRGLOBAL(nfsstatsv1.read_bios); error = ncl_readrpc(vp, uiop, cr); if (!error) { @@ -1641,11 +1641,11 @@ ncl_doio(struct vnode *vp, struct buf *bp, struct ucred *cr, struct thread *td, break; case VLNK: uiop->uio_offset = (off_t)0; - NFSINCRGLOBAL(newnfsstats.readlink_bios); + NFSINCRGLOBAL(nfsstatsv1.readlink_bios); error = ncl_readlinkrpc(vp, uiop, cr); break; case VDIR: - NFSINCRGLOBAL(newnfsstats.readdir_bios); + NFSINCRGLOBAL(nfsstatsv1.readdir_bios); uiop->uio_offset = ((u_quad_t)bp->b_lblkno) * NFS_DIRBLKSIZ; if ((nmp->nm_flag & NFSMNT_RDIRPLUS) != 0) { error = ncl_readdirplusrpc(vp, uiop, cr, td); @@ -1707,7 +1707,7 @@ ncl_doio(struct vnode *vp, struct buf *bp, struct ucred *cr, struct thread *td, + bp->b_dirtyoff; io.iov_base = (char *)bp->b_data + bp->b_dirtyoff; uiop->uio_rw = UIO_WRITE; - NFSINCRGLOBAL(newnfsstats.write_bios); + NFSINCRGLOBAL(nfsstatsv1.write_bios); if ((bp->b_flags & (B_ASYNC | B_NEEDCOMMIT | B_NOCACHE | B_CLUSTER)) == B_ASYNC) iomode = NFSWRITE_UNSTABLE; diff --git a/sys/fs/nfsclient/nfs_clcomsubs.c b/sys/fs/nfsclient/nfs_clcomsubs.c index 1fc7d1b7ce41..e108b4bf9553 100644 --- a/sys/fs/nfsclient/nfs_clcomsubs.c +++ b/sys/fs/nfsclient/nfs_clcomsubs.c @@ -42,7 +42,7 @@ __FBSDID("$FreeBSD$"); #ifndef APPLEKEXT #include -extern struct nfsstats newnfsstats; +extern struct nfsstatsv1 nfsstatsv1; extern struct nfsv4_opflag nfsv4_opflag[NFSV41_NOPS]; extern int ncl_mbuf_mlen; extern enum vtype newnv2tov_type[8]; @@ -241,8 +241,8 @@ nfscl_reqstart(struct nfsrv_descript *nd, int procnum, struct nfsmount *nmp, } else { (void) nfsm_fhtom(nd, nfhp, fhlen, 0); } - if (procnum < NFSV4_NPROCS) - NFSINCRGLOBAL(newnfsstats.rpccnt[procnum]); + if (procnum < NFSV41_NPROCS) + NFSINCRGLOBAL(nfsstatsv1.rpccnt[procnum]); } #ifndef APPLE diff --git a/sys/fs/nfsclient/nfs_clstate.c b/sys/fs/nfsclient/nfs_clstate.c index 20adaf1cddd7..e5b871f99579 100644 --- a/sys/fs/nfsclient/nfs_clstate.c +++ b/sys/fs/nfsclient/nfs_clstate.c @@ -84,7 +84,7 @@ __FBSDID("$FreeBSD$"); /* * Global variables */ -extern struct nfsstats newnfsstats; +extern struct nfsstatsv1 nfsstatsv1; extern struct nfsreqhead nfsd_reqq; extern u_int32_t newnfs_false, newnfs_true; extern int nfscl_debuglevel; @@ -343,10 +343,10 @@ nfscl_newopen(struct nfsclclient *clp, struct nfscldeleg *dp, nowp->nfsow_defunct = 0; nfscl_lockinit(&nowp->nfsow_rwlock); if (dp != NULL) { - newnfsstats.cllocalopenowners++; + nfsstatsv1.cllocalopenowners++; LIST_INSERT_HEAD(&dp->nfsdl_owner, nowp, nfsow_list); } else { - newnfsstats.clopenowners++; + nfsstatsv1.clopenowners++; LIST_INSERT_HEAD(&clp->nfsc_owner, nowp, nfsow_list); } owp = *owpp = nowp; @@ -380,9 +380,9 @@ nfscl_newopen(struct nfsclclient *clp, struct nfscldeleg *dp, TAILQ_INSERT_HEAD(&clp->nfsc_deleg, dp, nfsdl_list); dp->nfsdl_timestamp = NFSD_MONOSEC + 120; - newnfsstats.cllocalopens++; + nfsstatsv1.cllocalopens++; } else { - newnfsstats.clopens++; + nfsstatsv1.clopens++; } LIST_INSERT_HEAD(&owp->nfsow_open, nop, nfso_list); *opp = nop; @@ -430,7 +430,7 @@ nfscl_deleg(mount_t mp, struct nfsclclient *clp, u_int8_t *nfhp, LIST_INSERT_HEAD(NFSCLDELEGHASH(clp, nfhp, fhlen), dp, nfsdl_hash); dp->nfsdl_timestamp = NFSD_MONOSEC + 120; - newnfsstats.cldelegates++; + nfsstatsv1.cldelegates++; nfscl_delegcnt++; } else { /* @@ -1071,10 +1071,10 @@ nfscl_getbytelock(vnode_t vp, u_int64_t off, u_int64_t len, LIST_INIT(&nlp->nfsl_lock); if (donelocally) { nlp->nfsl_open = NULL; - newnfsstats.cllocallockowners++; + nfsstatsv1.cllocallockowners++; } else { nlp->nfsl_open = op; - newnfsstats.cllockowners++; + nfsstatsv1.cllockowners++; } LIST_INSERT_HEAD(lhp, nlp, nfsl_list); lp = nlp; @@ -1402,9 +1402,9 @@ nfscl_freeopen(struct nfsclopen *op, int local) nfscl_freealllocks(&op->nfso_lock, local); FREE((caddr_t)op, M_NFSCLOPEN); if (local) - newnfsstats.cllocalopens--; + nfsstatsv1.cllocalopens--; else - newnfsstats.clopens--; + nfsstatsv1.clopens--; } /* @@ -1483,9 +1483,9 @@ nfscl_freeopenowner(struct nfsclowner *owp, int local) LIST_REMOVE(owp, nfsow_list); FREE((caddr_t)owp, M_NFSCLOWNER); if (local) - newnfsstats.cllocalopenowners--; + nfsstatsv1.cllocalopenowners--; else - newnfsstats.clopenowners--; + nfsstatsv1.clopenowners--; } /* @@ -1502,9 +1502,9 @@ nfscl_freelockowner(struct nfscllockowner *lp, int local) } FREE((caddr_t)lp, M_NFSCLLOCKOWNER); if (local) - newnfsstats.cllocallockowners--; + nfsstatsv1.cllocallockowners--; else - newnfsstats.cllockowners--; + nfsstatsv1.cllockowners--; } /* @@ -1517,9 +1517,9 @@ nfscl_freelock(struct nfscllock *lop, int local) LIST_REMOVE(lop, nfslo_list); FREE((caddr_t)lop, M_NFSCLLOCK); if (local) - newnfsstats.cllocallocks--; + nfsstatsv1.cllocallocks--; else - newnfsstats.cllocks--; + nfsstatsv1.cllocks--; } /* @@ -1553,7 +1553,7 @@ nfscl_freedeleg(struct nfscldeleghead *hdp, struct nfscldeleg *dp) TAILQ_REMOVE(hdp, dp, nfsdl_list); LIST_REMOVE(dp, nfsdl_hash); FREE((caddr_t)dp, M_NFSCLDELEG); - newnfsstats.cldelegates--; + nfsstatsv1.cldelegates--; nfscl_delegcnt--; } @@ -1621,18 +1621,18 @@ nfscl_expireclient(struct nfsclclient *clp, struct nfsmount *nmp, LIST_REMOVE(op, nfso_list); op->nfso_own = towp; LIST_INSERT_HEAD(&towp->nfsow_open, op, nfso_list); - newnfsstats.cllocalopens--; - newnfsstats.clopens++; + nfsstatsv1.cllocalopens--; + nfsstatsv1.clopens++; } } else { /* Just add the openowner to the client list */ LIST_REMOVE(owp, nfsow_list); owp->nfsow_clp = clp; LIST_INSERT_HEAD(&clp->nfsc_owner, owp, nfsow_list); - newnfsstats.cllocalopenowners--; - newnfsstats.clopenowners++; - newnfsstats.cllocalopens--; - newnfsstats.clopens++; + nfsstatsv1.cllocalopenowners--; + nfsstatsv1.clopenowners++; + nfsstatsv1.cllocalopens--; + nfsstatsv1.clopens++; } } owp = nowp; @@ -2282,9 +2282,9 @@ nfscl_insertlock(struct nfscllockowner *lp, struct nfscllock *new_lop, else LIST_INSERT_AFTER(insert_lop, new_lop, nfslo_list); if (local) - newnfsstats.cllocallocks++; + nfsstatsv1.cllocallocks++; else - newnfsstats.cllocks++; + nfsstatsv1.cllocks++; } /* @@ -2571,7 +2571,7 @@ nfscl_renewthread(struct nfsclclient *clp, NFSPROC_T *p) LIST_REMOVE(dp, nfsdl_hash); TAILQ_INSERT_HEAD(&dh, dp, nfsdl_list); nfscl_delegcnt--; - newnfsstats.cldelegates--; + nfsstatsv1.cldelegates--; } NFSLOCKCLSTATE(); } @@ -2612,7 +2612,7 @@ nfscl_renewthread(struct nfsclclient *clp, NFSPROC_T *p) LIST_REMOVE(dp, nfsdl_hash); TAILQ_INSERT_HEAD(&dh, dp, nfsdl_list); nfscl_delegcnt--; - newnfsstats.cldelegates--; + nfsstatsv1.cldelegates--; } } dp = ndp; @@ -3215,8 +3215,8 @@ nfscl_docb(struct nfsrv_descript *nd, NFSPROC_T *p) break; } nd->nd_procnum = op; - if (op < NFSV4OP_CBNOPS) - newnfsstats.cbrpccnt[nd->nd_procnum]++; + if (op < NFSV41_CBNOPS) + nfsstatsv1.cbrpccnt[nd->nd_procnum]++; switch (op) { case NFSV4OP_CBGETATTR: NFSCL_DEBUG(4, "cbgetattr\n"); diff --git a/sys/fs/nfsclient/nfs_clsubs.c b/sys/fs/nfsclient/nfs_clsubs.c index 836a183f29e2..a8e94fc2872e 100644 --- a/sys/fs/nfsclient/nfs_clsubs.c +++ b/sys/fs/nfsclient/nfs_clsubs.c @@ -83,7 +83,7 @@ extern enum nfsiod_state ncl_iodwant[NFS_MAXASYNCDAEMON]; extern struct nfsmount *ncl_iodmount[NFS_MAXASYNCDAEMON]; extern int ncl_numasync; extern unsigned int ncl_iodmax; -extern struct nfsstats newnfsstats; +extern struct nfsstatsv1 nfsstatsv1; struct task ncl_nfsiodnew_task; @@ -219,12 +219,12 @@ ncl_getattrcache(struct vnode *vp, struct vattr *vaper) if ((time_second - np->n_attrstamp) >= timeo && (mustflush != 0 || np->n_attrstamp == 0)) { - newnfsstats.attrcache_misses++; + nfsstatsv1.attrcache_misses++; mtx_unlock(&np->n_mtx); KDTRACE_NFS_ATTRCACHE_GET_MISS(vp); return( ENOENT); } - newnfsstats.attrcache_hits++; + nfsstatsv1.attrcache_hits++; if (vap->va_size != np->n_size) { if (vap->va_type == VREG) { if (np->n_flag & NMODIFIED) { diff --git a/sys/fs/nfsclient/nfs_clvfsops.c b/sys/fs/nfsclient/nfs_clvfsops.c index a3e2c7096d10..524a372d897a 100644 --- a/sys/fs/nfsclient/nfs_clvfsops.c +++ b/sys/fs/nfsclient/nfs_clvfsops.c @@ -78,7 +78,6 @@ FEATURE(nfscl, "NFSv4 client"); extern int nfscl_ticks; extern struct timeval nfsboottime; -extern struct nfsstats newnfsstats; extern int nfsrv_useacl; extern int nfscl_debuglevel; extern enum nfsiod_state ncl_iodwant[NFS_MAXASYNCDAEMON]; diff --git a/sys/fs/nfsclient/nfs_clvnops.c b/sys/fs/nfsclient/nfs_clvnops.c index 55e514befaa4..69c1fe7848c5 100644 --- a/sys/fs/nfsclient/nfs_clvnops.c +++ b/sys/fs/nfsclient/nfs_clvnops.c @@ -100,7 +100,7 @@ uint32_t nfscl_accesscache_load_done_id; #define TRUE 1 #define FALSE 0 -extern struct nfsstats newnfsstats; +extern struct nfsstatsv1 nfsstatsv1; extern int nfsrv_useacl; extern int nfscl_debuglevel; MALLOC_DECLARE(M_NEWNFSREQ); @@ -258,14 +258,6 @@ int newnfs_directio_allow_mmap = 1; SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_directio_allow_mmap, CTLFLAG_RW, &newnfs_directio_allow_mmap, 0, "Enable mmaped IO on file with O_DIRECT opens"); -#if 0 -SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_hits, CTLFLAG_RD, - &newnfsstats.accesscache_hits, 0, "NFS ACCESS cache hit count"); - -SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_misses, CTLFLAG_RD, - &newnfsstats.accesscache_misses, 0, "NFS ACCESS cache miss count"); -#endif - #define NFSACCESS_ALL (NFSACCESS_READ | NFSACCESS_MODIFY \ | NFSACCESS_EXTEND | NFSACCESS_EXECUTE \ | NFSACCESS_DELETE | NFSACCESS_LOOKUP) @@ -418,7 +410,7 @@ nfs_access(struct vop_access_args *ap) if (time_second < (np->n_accesscache[i].stamp + nfsaccess_cache_timeout) && (np->n_accesscache[i].mode & mode) == mode) { - NFSINCRGLOBAL(newnfsstats.accesscache_hits); + NFSINCRGLOBAL(nfsstatsv1.accesscache_hits); gotahit = 1; } break; @@ -437,7 +429,7 @@ nfs_access(struct vop_access_args *ap) /* * Either a no, or a don't know. Go to the wire. */ - NFSINCRGLOBAL(newnfsstats.accesscache_misses); + NFSINCRGLOBAL(nfsstatsv1.accesscache_misses); error = nfs34_access_otw(vp, wmode, ap->a_td, ap->a_cred, &rmode); if (!error && @@ -857,7 +849,7 @@ nfs_getattr(struct vop_getattr_args *ap) if (NFS_ISV34(vp) && nfs_prime_access_cache && nfsaccess_cache_timeout > 0) { - NFSINCRGLOBAL(newnfsstats.accesscache_misses); + NFSINCRGLOBAL(nfsstatsv1.accesscache_misses); nfs34_access_otw(vp, NFSACCESS_ALL, td, ap->a_cred, NULL); if (ncl_getattrcache(vp, ap->a_vap) == 0) { nfscl_deleggetmodtime(vp, &ap->a_vap->va_mtime); @@ -1114,7 +1106,7 @@ nfs_lookup(struct vop_lookup_args *ap) ((u_int)(ticks - ncticks) < (nmp->nm_nametimeo * hz) && VOP_GETATTR(newvp, &vattr, cnp->cn_cred) == 0 && timespeccmp(&vattr.va_ctime, &nctime, ==))) { - NFSINCRGLOBAL(newnfsstats.lookupcache_hits); + NFSINCRGLOBAL(nfsstatsv1.lookupcache_hits); if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN)) cnp->cn_flags |= SAVENAME; @@ -1141,7 +1133,7 @@ nfs_lookup(struct vop_lookup_args *ap) if ((u_int)(ticks - ncticks) < (nmp->nm_negnametimeo * hz) && VOP_GETATTR(dvp, &vattr, cnp->cn_cred) == 0 && timespeccmp(&vattr.va_mtime, &nctime, ==)) { - NFSINCRGLOBAL(newnfsstats.lookupcache_hits); + NFSINCRGLOBAL(nfsstatsv1.lookupcache_hits); return (ENOENT); } cache_purge_negative(dvp); @@ -1149,7 +1141,7 @@ nfs_lookup(struct vop_lookup_args *ap) error = 0; newvp = NULLVP; - NFSINCRGLOBAL(newnfsstats.lookupcache_misses); + NFSINCRGLOBAL(nfsstatsv1.lookupcache_misses); error = nfsrpc_lookup(dvp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_cred, td, &dnfsva, &nfsva, &nfhp, &attrflag, &dattrflag, NULL); @@ -2227,7 +2219,7 @@ nfs_readdir(struct vop_readdir_args *ap) if ((NFS_ISV4(vp) && np->n_change == vattr.va_filerev) || !NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime)) { mtx_unlock(&np->n_mtx); - NFSINCRGLOBAL(newnfsstats.direofcache_hits); + NFSINCRGLOBAL(nfsstatsv1.direofcache_hits); if (ap->a_eofflag != NULL) *ap->a_eofflag = 1; return (0); @@ -2254,7 +2246,7 @@ nfs_readdir(struct vop_readdir_args *ap) error = ncl_bioread(vp, uio, 0, ap->a_cred); if (!error && uio->uio_resid == tresid) { - NFSINCRGLOBAL(newnfsstats.direofcache_misses); + NFSINCRGLOBAL(nfsstatsv1.direofcache_misses); if (ap->a_eofflag != NULL) *ap->a_eofflag = 1; } diff --git a/sys/fs/nfsserver/nfs_nfsdcache.c b/sys/fs/nfsserver/nfs_nfsdcache.c index 0f78b3f06ec1..0b7bf8f3e1fb 100644 --- a/sys/fs/nfsserver/nfs_nfsdcache.c +++ b/sys/fs/nfsserver/nfs_nfsdcache.c @@ -159,7 +159,7 @@ __FBSDID("$FreeBSD$"); #ifndef APPLEKEXT #include -extern struct nfsstats newnfsstats; +extern struct nfsstatsv1 nfsstatsv1; extern struct mtx nfsrc_udpmtx; extern struct nfsrchash_bucket nfsrchash_table[NFSRVCACHE_HASHSIZE]; extern struct nfsrchash_bucket nfsrcahash_table[NFSRVCACHE_HASHSIZE]; @@ -318,8 +318,8 @@ nfsrvd_initcache(void) TAILQ_INIT(&nfsrvudplru); nfsrc_tcpsavedreplies = 0; nfsrc_udpcachesize = 0; - newnfsstats.srvcache_tcppeak = 0; - newnfsstats.srvcache_size = 0; + nfsstatsv1.srvcache_tcppeak = 0; + nfsstatsv1.srvcache_size = 0; } /* @@ -395,14 +395,14 @@ nfsrc_getudp(struct nfsrv_descript *nd, struct nfsrvcache *newrp) TAILQ_REMOVE(&nfsrvudplru, rp, rc_lru); TAILQ_INSERT_TAIL(&nfsrvudplru, rp, rc_lru); if (rp->rc_flag & RC_INPROG) { - newnfsstats.srvcache_inproghits++; + nfsstatsv1.srvcache_inproghits++; mtx_unlock(mutex); ret = RC_DROPIT; } else if (rp->rc_flag & RC_REPSTATUS) { /* * V2 only. */ - newnfsstats.srvcache_nonidemdonehits++; + nfsstatsv1.srvcache_nonidemdonehits++; mtx_unlock(mutex); nfsrvd_rephead(nd); *(nd->nd_errp) = rp->rc_status; @@ -410,7 +410,7 @@ nfsrc_getudp(struct nfsrv_descript *nd, struct nfsrvcache *newrp) rp->rc_timestamp = NFSD_MONOSEC + NFSRVCACHE_UDPTIMEOUT; } else if (rp->rc_flag & RC_REPMBUF) { - newnfsstats.srvcache_nonidemdonehits++; + nfsstatsv1.srvcache_nonidemdonehits++; mtx_unlock(mutex); nd->nd_mreq = m_copym(rp->rc_reply, 0, M_COPYALL, M_WAITOK); @@ -425,8 +425,8 @@ nfsrc_getudp(struct nfsrv_descript *nd, struct nfsrvcache *newrp) goto out; } } - newnfsstats.srvcache_misses++; - atomic_add_int(&newnfsstats.srvcache_size, 1); + nfsstatsv1.srvcache_misses++; + atomic_add_int(&nfsstatsv1.srvcache_size, 1); nfsrc_udpcachesize++; newrp->rc_flag |= RC_INPROG; @@ -480,7 +480,7 @@ nfsrvd_updatecache(struct nfsrv_descript *nd) * Reply from cache is a special case returned by nfsrv_checkseqid(). */ if (nd->nd_repstat == NFSERR_REPLYFROMCACHE) { - newnfsstats.srvcache_nonidemdonehits++; + nfsstatsv1.srvcache_nonidemdonehits++; mtx_unlock(mutex); nd->nd_repstat = 0; if (nd->nd_mreq) @@ -519,8 +519,8 @@ nfsrvd_updatecache(struct nfsrv_descript *nd) if (!(rp->rc_flag & RC_UDP)) { atomic_add_int(&nfsrc_tcpsavedreplies, 1); if (nfsrc_tcpsavedreplies > - newnfsstats.srvcache_tcppeak) - newnfsstats.srvcache_tcppeak = + nfsstatsv1.srvcache_tcppeak) + nfsstatsv1.srvcache_tcppeak = nfsrc_tcpsavedreplies; } mtx_unlock(mutex); @@ -678,7 +678,7 @@ nfsrc_gettcp(struct nfsrv_descript *nd, struct nfsrvcache *newrp) panic("nfs tcp cache0"); rp->rc_flag |= RC_LOCKED; if (rp->rc_flag & RC_INPROG) { - newnfsstats.srvcache_inproghits++; + nfsstatsv1.srvcache_inproghits++; mtx_unlock(mutex); if (newrp->rc_sockref == rp->rc_sockref) nfsrc_marksametcpconn(rp->rc_sockref); @@ -687,7 +687,7 @@ nfsrc_gettcp(struct nfsrv_descript *nd, struct nfsrvcache *newrp) /* * V2 only. */ - newnfsstats.srvcache_nonidemdonehits++; + nfsstatsv1.srvcache_nonidemdonehits++; mtx_unlock(mutex); if (newrp->rc_sockref == rp->rc_sockref) nfsrc_marksametcpconn(rp->rc_sockref); @@ -696,7 +696,7 @@ nfsrc_gettcp(struct nfsrv_descript *nd, struct nfsrvcache *newrp) *(nd->nd_errp) = rp->rc_status; rp->rc_timestamp = NFSD_MONOSEC + nfsrc_tcptimeout; } else if (rp->rc_flag & RC_REPMBUF) { - newnfsstats.srvcache_nonidemdonehits++; + nfsstatsv1.srvcache_nonidemdonehits++; mtx_unlock(mutex); if (newrp->rc_sockref == rp->rc_sockref) nfsrc_marksametcpconn(rp->rc_sockref); @@ -711,8 +711,8 @@ nfsrc_gettcp(struct nfsrv_descript *nd, struct nfsrvcache *newrp) free((caddr_t)newrp, M_NFSRVCACHE); goto out; } - newnfsstats.srvcache_misses++; - atomic_add_int(&newnfsstats.srvcache_size, 1); + nfsstatsv1.srvcache_misses++; + atomic_add_int(&nfsstatsv1.srvcache_size, 1); /* * For TCP, multiple entries for a key are allowed, so don't @@ -801,7 +801,7 @@ nfsrc_freecache(struct nfsrvcache *rp) atomic_add_int(&nfsrc_tcpsavedreplies, -1); } FREE((caddr_t)rp, M_NFSRVCACHE); - atomic_add_int(&newnfsstats.srvcache_size, -1); + atomic_add_int(&nfsstatsv1.srvcache_size, -1); } /* @@ -825,7 +825,7 @@ nfsrvd_cleancache(void) nfsrc_freecache(rp); } } - newnfsstats.srvcache_size = 0; + nfsstatsv1.srvcache_size = 0; mtx_unlock(&nfsrc_udpmtx); nfsrc_tcpsavedreplies = 0; } diff --git a/sys/fs/nfsserver/nfs_nfsdport.c b/sys/fs/nfsserver/nfs_nfsdport.c index 2f6782994c33..52896a844708 100644 --- a/sys/fs/nfsserver/nfs_nfsdport.c +++ b/sys/fs/nfsserver/nfs_nfsdport.c @@ -62,6 +62,7 @@ extern struct nfsclienthashhead *nfsclienthash; extern struct nfslockhashhead *nfslockhash; extern struct nfssessionhash *nfssessionhash; extern int nfsrv_sessionhashsize; +extern struct nfsstatsv1 nfsstatsv1; struct vfsoptlist nfsv4root_opt, nfsv4root_newopt; NFSDLOCKMUTEX; struct nfsrchash_bucket nfsrchash_table[NFSRVCACHE_HASHSIZE]; @@ -686,6 +687,8 @@ nfsvno_read(struct vnode *vp, off_t off, int cnt, struct ucred *cred, uiop->uio_td = NULL; nh = nfsrv_sequential_heuristic(uiop, vp); ioflag |= nh->nh_seqcount << IO_SEQSHIFT; + /* XXX KDM make this more systematic? */ + nfsstatsv1.srvbytes[NFSV4OP_READ] += uiop->uio_resid; error = VOP_READ(vp, uiop, IO_NODELOCKED | ioflag, cred); FREE((caddr_t)iv2, M_TEMP); if (error) { @@ -758,6 +761,8 @@ nfsvno_write(struct vnode *vp, off_t off, int retlen, int cnt, int stable, uiop->uio_offset = off; nh = nfsrv_sequential_heuristic(uiop, vp); ioflags |= nh->nh_seqcount << IO_SEQSHIFT; + /* XXX KDM make this more systematic? */ + nfsstatsv1.srvbytes[NFSV4OP_WRITE] += uiop->uio_resid; error = VOP_WRITE(vp, uiop, ioflags, cred); if (error == 0) nh->nh_nextoff = uiop->uio_offset; diff --git a/sys/fs/nfsserver/nfs_nfsdsocket.c b/sys/fs/nfsserver/nfs_nfsdsocket.c index e09116cf2caa..f45bba4986d3 100644 --- a/sys/fs/nfsserver/nfs_nfsdsocket.c +++ b/sys/fs/nfsserver/nfs_nfsdsocket.c @@ -41,7 +41,7 @@ __FBSDID("$FreeBSD$"); #ifndef APPLEKEXT #include -extern struct nfsstats newnfsstats; +extern struct nfsstatsv1 nfsstatsv1; extern struct nfsrvfh nfs_pubfh, nfs_rootfh; extern int nfs_pubfhset, nfs_rootfhset; extern struct nfsv4lock nfsv4rootfs_lock; @@ -400,6 +400,68 @@ static int nfsv3to4op[NFS_V3NPROCS] = { NFSV4OP_COMMIT, }; +static struct mtx nfsrvd_statmtx; +MTX_SYSINIT(nfsst, &nfsrvd_statmtx, "NFSstat", MTX_DEF); + +static void +nfsrvd_statstart(int op, struct bintime *now) +{ + if (op > (NFSV42_NOPS + NFSV4OP_FAKENOPS)) { + printf("%s: op %d invalid\n", __func__, op); + return; + } + + mtx_lock(&nfsrvd_statmtx); + if (nfsstatsv1.srvstartcnt == nfsstatsv1.srvdonecnt) { + if (now != NULL) + nfsstatsv1.busyfrom = *now; + else + binuptime(&nfsstatsv1.busyfrom); + + } + nfsstatsv1.srvrpccnt[op]++; + nfsstatsv1.srvstartcnt++; + mtx_unlock(&nfsrvd_statmtx); + +} + +static void +nfsrvd_statend(int op, uint64_t bytes, struct bintime *now, + struct bintime *then) +{ + struct bintime dt, lnow; + + if (op > (NFSV42_NOPS + NFSV4OP_FAKENOPS)) { + printf("%s: op %d invalid\n", __func__, op); + return; + } + + if (now == NULL) { + now = &lnow; + binuptime(now); + } + + mtx_lock(&nfsrvd_statmtx); + + nfsstatsv1.srvbytes[op] += bytes; + nfsstatsv1.srvops[op]++; + + if (then != NULL) { + dt = *now; + bintime_sub(&dt, then); + bintime_add(&nfsstatsv1.srvduration[op], &dt); + } + + dt = *now; + bintime_sub(&dt, &nfsstatsv1.busyfrom); + bintime_add(&nfsstatsv1.busytime, &dt); + nfsstatsv1.busyfrom = *now; + + nfsstatsv1.srvdonecnt++; + + mtx_unlock(&nfsrvd_statmtx); +} + /* * Do an RPC. Basically, get the file handles translated to vnode pointers * and then call the appropriate server routine. The server routines are @@ -476,7 +538,9 @@ nfsrvd_dorpc(struct nfsrv_descript *nd, int isdgram, u_char *tag, int taglen, */ if (nd->nd_repstat && (nd->nd_flag & ND_NFSV2)) { *nd->nd_errp = nfsd_errmap(nd); - NFSINCRGLOBAL(newnfsstats.srvrpccnt[nfsv3to4op[nd->nd_procnum]]); + nfsrvd_statstart(nfsv3to4op[nd->nd_procnum], /*now*/ NULL); + nfsrvd_statend(nfsv3to4op[nd->nd_procnum], /*bytes*/ 0, + /*now*/ NULL, /*then*/ NULL); if (mp != NULL && nfs_writerpc[nd->nd_procnum] != 0) vn_finished_write(mp); goto out; @@ -491,6 +555,11 @@ nfsrvd_dorpc(struct nfsrv_descript *nd, int isdgram, u_char *tag, int taglen, if (nd->nd_flag & ND_NFSV4) { nfsrvd_compound(nd, isdgram, tag, taglen, minorvers, p); } else { + struct bintime start_time; + + binuptime(&start_time); + nfsrvd_statstart(nfsv3to4op[nd->nd_procnum], &start_time); + if (nfs_retfh[nd->nd_procnum] == 1) { if (vp) NFSVOPUNLOCK(vp, 0); @@ -505,7 +574,9 @@ nfsrvd_dorpc(struct nfsrv_descript *nd, int isdgram, u_char *tag, int taglen, } if (mp != NULL && nfs_writerpc[nd->nd_procnum] != 0) vn_finished_write(mp); - NFSINCRGLOBAL(newnfsstats.srvrpccnt[nfsv3to4op[nd->nd_procnum]]); + + nfsrvd_statend(nfsv3to4op[nd->nd_procnum], /*bytes*/ 0, + /*now*/ NULL, /*then*/ &start_time); } if (error) { if (error != EBADRPC) @@ -547,7 +618,7 @@ static void nfsrvd_compound(struct nfsrv_descript *nd, int isdgram, u_char *tag, int taglen, u_int32_t minorvers, NFSPROC_T *p) { - int i, lktype, op, op0 = 0; + int i, lktype, op, op0 = 0, statsinprog = 0; u_int32_t *tl; struct nfsclient *clp, *nclp; int numops, error = 0, igotlock; @@ -559,6 +630,7 @@ nfsrvd_compound(struct nfsrv_descript *nd, int isdgram, u_char *tag, struct nfsexstuff nes, vpnes, savevpnes; fsid_t cur_fsid, save_fsid; static u_int64_t compref = 0; + struct bintime start_time; NFSVNO_EXINIT(&vpnes); NFSVNO_EXINIT(&savevpnes); @@ -686,6 +758,11 @@ nfsrvd_compound(struct nfsrv_descript *nd, int isdgram, u_char *tag, *repp = *tl; op = fxdr_unsigned(int, *tl); NFSD_DEBUG(4, "op=%d\n", op); + + binuptime(&start_time); + nfsrvd_statstart(op, &start_time); + statsinprog = 1; + if (op < NFSV4OP_ACCESS || (op >= NFSV4OP_NOPS && (nd->nd_flag & ND_NFSV41) == 0) || (op >= NFSV41_NOPS && (nd->nd_flag & ND_NFSV41) != 0)) { @@ -771,12 +848,6 @@ nfsrvd_compound(struct nfsrv_descript *nd, int isdgram, u_char *tag, } if (nfsv4_opflag[op].savereply) nd->nd_flag |= ND_SAVEREPLY; - /* - * For now, newnfsstats.srvrpccnt[] doesn't have entries - * for the NFSv4.1 operations. - */ - if (nd->nd_procnum < NFSV4OP_NOPS) - NFSINCRGLOBAL(newnfsstats.srvrpccnt[nd->nd_procnum]); switch (op) { case NFSV4OP_PUTFH: error = nfsrv_mtofh(nd, &fh); @@ -1007,6 +1078,13 @@ nfsrvd_compound(struct nfsrv_descript *nd, int isdgram, u_char *tag, } error = 0; } + + if (statsinprog != 0) { + nfsrvd_statend(op, /*bytes*/ 0, /*now*/ NULL, + /*then*/ &start_time); + statsinprog = 0; + } + retops++; if (nd->nd_repstat) { *repp = nfsd_errmap(nd); @@ -1016,6 +1094,11 @@ nfsrvd_compound(struct nfsrv_descript *nd, int isdgram, u_char *tag, } } nfsmout: + if (statsinprog != 0) { + nfsrvd_statend(op, /*bytes*/ 0, /*now*/ NULL, + /*then*/ &start_time); + statsinprog = 0; + } if (error) { if (error == EBADRPC || error == NFSERR_BADXDR) nd->nd_repstat = NFSERR_BADXDR; diff --git a/sys/fs/nfsserver/nfs_nfsdstate.c b/sys/fs/nfsserver/nfs_nfsdstate.c index b0965017847d..1b90d021d6ca 100644 --- a/sys/fs/nfsserver/nfs_nfsdstate.c +++ b/sys/fs/nfsserver/nfs_nfsdstate.c @@ -37,7 +37,7 @@ int nfsrv_dolocallocks = 0; struct nfsv4lock nfsv4rootfs_lock; extern int newnfs_numnfsd; -extern struct nfsstats newnfsstats; +extern struct nfsstatsv1 nfsstatsv1; extern int nfsrv_lease; extern struct timeval nfsboottime; extern u_int32_t newnfs_true, newnfs_false; @@ -273,7 +273,7 @@ nfsrv_setclient(struct nfsrv_descript *nd, struct nfsclient **new_clpp, LIST_INIT(&new_clp->lc_stateid[i]); LIST_INSERT_HEAD(NFSCLIENTHASH(new_clp->lc_clientid), new_clp, lc_hash); - newnfsstats.srvclients++; + nfsstatsv1.srvclients++; nfsrv_openpluslock++; nfsrv_clients++; NFSLOCKV4ROOTMUTEX(); @@ -377,7 +377,7 @@ nfsrv_setclient(struct nfsrv_descript *nd, struct nfsclient **new_clpp, } LIST_INSERT_HEAD(NFSCLIENTHASH(new_clp->lc_clientid), new_clp, lc_hash); - newnfsstats.srvclients++; + nfsstatsv1.srvclients++; nfsrv_openpluslock++; nfsrv_clients++; NFSLOCKV4ROOTMUTEX(); @@ -441,7 +441,7 @@ nfsrv_setclient(struct nfsrv_descript *nd, struct nfsclient **new_clpp, } LIST_INSERT_HEAD(NFSCLIENTHASH(new_clp->lc_clientid), new_clp, lc_hash); - newnfsstats.srvclients++; + nfsstatsv1.srvclients++; nfsrv_openpluslock++; nfsrv_clients++; } @@ -815,7 +815,7 @@ nfsrv_adminrevoke(struct nfsd_clid *revokep, NFSPROC_T *p) /* * Dump out stats for all clients. Called from nfssvc(2), that is used - * newnfsstats. + * nfsstatsv1. */ APPLESTATIC void nfsrv_dumpclients(struct nfsd_dumpclients *dumpp, int maxcnt) @@ -1219,7 +1219,7 @@ nfsrv_zapclient(struct nfsclient *clp, NFSPROC_T *p) free(clp->lc_stateid, M_NFSDCLIENT); free(clp, M_NFSDCLIENT); NFSLOCKSTATE(); - newnfsstats.srvclients--; + nfsstatsv1.srvclients--; nfsrv_openpluslock--; nfsrv_clients--; NFSUNLOCKSTATE(); @@ -1260,7 +1260,7 @@ nfsrv_freedeleg(struct nfsstate *stp) nfsv4_testlock(&lfp->lf_locallock_lck) == 0) nfsrv_freenfslockfile(lfp); FREE((caddr_t)stp, M_NFSDSTATE); - newnfsstats.srvdelegates--; + nfsstatsv1.srvdelegates--; nfsrv_openpluslock--; nfsrv_delegatecnt--; } @@ -1286,7 +1286,7 @@ nfsrv_freeopenowner(struct nfsstate *stp, int cansleep, NFSPROC_T *p) if (stp->ls_op) nfsrvd_derefcache(stp->ls_op); FREE((caddr_t)stp, M_NFSDSTATE); - newnfsstats.srvopenowners--; + nfsstatsv1.srvopenowners--; nfsrv_openpluslock--; } @@ -1336,7 +1336,7 @@ nfsrv_freeopen(struct nfsstate *stp, vnode_t vp, int cansleep, NFSPROC_T *p) if (cansleep != 0) NFSUNLOCKSTATE(); FREE((caddr_t)stp, M_NFSDSTATE); - newnfsstats.srvopens--; + nfsstatsv1.srvopens--; nfsrv_openpluslock--; return (ret); } @@ -1355,7 +1355,7 @@ nfsrv_freelockowner(struct nfsstate *stp, vnode_t vp, int cansleep, if (stp->ls_op) nfsrvd_derefcache(stp->ls_op); FREE((caddr_t)stp, M_NFSDSTATE); - newnfsstats.srvlockowners--; + nfsstatsv1.srvlockowners--; nfsrv_openpluslock--; } @@ -1430,7 +1430,7 @@ nfsrv_freenfslock(struct nfslock *lop) if (lop->lo_lckfile.le_prev != NULL) { LIST_REMOVE(lop, lo_lckfile); - newnfsstats.srvlocks--; + nfsstatsv1.srvlocks--; nfsrv_openpluslock--; } LIST_REMOVE(lop, lo_lckowner); @@ -2200,7 +2200,7 @@ nfsrv_lockctrl(vnode_t vp, struct nfsstate **new_stpp, LIST_INSERT_HEAD(&stp->ls_open, new_stp, ls_list); *new_lopp = NULL; *new_stpp = NULL; - newnfsstats.srvlockowners++; + nfsstatsv1.srvlockowners++; nfsrv_openpluslock++; } if (filestruct_locked != 0) { @@ -2849,12 +2849,12 @@ nfsrv_openctrl(struct nfsrv_descript *nd, vnode_t vp, LIST_INSERT_HEAD(&new_stp->ls_open, new_open, ls_list); LIST_INSERT_HEAD(&clp->lc_open, new_stp, ls_list); *new_stpp = NULL; - newnfsstats.srvopenowners++; + nfsstatsv1.srvopenowners++; nfsrv_openpluslock++; } openstp = new_open; new_open = NULL; - newnfsstats.srvopens++; + nfsstatsv1.srvopens++; nfsrv_openpluslock++; break; } @@ -2913,7 +2913,7 @@ nfsrv_openctrl(struct nfsrv_descript *nd, vnode_t vp, NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt) || !NFSVNO_DELEGOK(vp)) *rflagsp |= NFSV4OPEN_RECALL; - newnfsstats.srvdelegates++; + nfsstatsv1.srvdelegates++; nfsrv_openpluslock++; nfsrv_delegatecnt++; @@ -2953,12 +2953,12 @@ nfsrv_openctrl(struct nfsrv_descript *nd, vnode_t vp, LIST_INSERT_HEAD(&new_stp->ls_open, new_open, ls_list); LIST_INSERT_HEAD(&clp->lc_open, new_stp, ls_list); *new_stpp = NULL; - newnfsstats.srvopenowners++; + nfsstatsv1.srvopenowners++; nfsrv_openpluslock++; } openstp = new_open; new_open = NULL; - newnfsstats.srvopens++; + nfsstatsv1.srvopens++; nfsrv_openpluslock++; } else { error = NFSERR_RECLAIMCONFLICT; @@ -3027,7 +3027,7 @@ nfsrv_openctrl(struct nfsrv_descript *nd, vnode_t vp, new_deleg->ls_stateid), new_deleg, ls_hash); LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list); new_deleg = NULL; - newnfsstats.srvdelegates++; + nfsstatsv1.srvdelegates++; nfsrv_openpluslock++; nfsrv_delegatecnt++; } @@ -3049,7 +3049,7 @@ nfsrv_openctrl(struct nfsrv_descript *nd, vnode_t vp, new_open, ls_hash); openstp = new_open; new_open = NULL; - newnfsstats.srvopens++; + nfsstatsv1.srvopens++; nfsrv_openpluslock++; /* @@ -3094,7 +3094,7 @@ nfsrv_openctrl(struct nfsrv_descript *nd, vnode_t vp, new_deleg->ls_stateid), new_deleg, ls_hash); LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list); new_deleg = NULL; - newnfsstats.srvdelegates++; + nfsstatsv1.srvdelegates++; nfsrv_openpluslock++; nfsrv_delegatecnt++; } @@ -3173,7 +3173,7 @@ nfsrv_openctrl(struct nfsrv_descript *nd, vnode_t vp, LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list); new_deleg = NULL; - newnfsstats.srvdelegates++; + nfsstatsv1.srvdelegates++; nfsrv_openpluslock++; nfsrv_delegatecnt++; } @@ -3191,9 +3191,9 @@ nfsrv_openctrl(struct nfsrv_descript *nd, vnode_t vp, openstp = new_open; new_open = NULL; *new_stpp = NULL; - newnfsstats.srvopens++; + nfsstatsv1.srvopens++; nfsrv_openpluslock++; - newnfsstats.srvopenowners++; + nfsstatsv1.srvopenowners++; nfsrv_openpluslock++; } if (!error) { @@ -3645,7 +3645,7 @@ nfsrv_insertlock(struct nfslock *new_lop, struct nfslock *insert_lop, else LIST_INSERT_AFTER(insert_lop, new_lop, lo_lckowner); if (stp != NULL) { - newnfsstats.srvlocks++; + nfsstatsv1.srvlocks++; nfsrv_openpluslock++; } } @@ -3843,7 +3843,7 @@ nfsrv_checkseqid(struct nfsrv_descript *nd, u_int32_t seqid, * just set lc_program to 0 to indicate no callbacks are possible. * (For cases where the address can't be parsed or is 0.0.0.0.0.0, set * the address to the client's transport address. This won't be used - * for callbacks, but can be printed out by newnfsstats for info.) + * for callbacks, but can be printed out by nfsstats for info.) * Return error if the xdr can't be parsed, 0 otherwise. */ APPLESTATIC int From 04ee829f135555d7d462694546ca67d30532dc38 Mon Sep 17 00:00:00 2001 From: Enji Cooper Date: Sat, 13 Aug 2016 01:49:11 +0000 Subject: [PATCH 75/76] Increase timeout from 10 minutes to 20 minutes for all tests On particular slow networks, it can (on average) take longer to resolve hosts to IP* addresses. 20 minutes seemed reasonable for my work network This will be solved in a more meaningful way (if possible) using concurrency in the near future MFC after: 2 weeks Sponsored by: EMC / Isilon Storage Division --- lib/libc/tests/resolv/resolv_test.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/libc/tests/resolv/resolv_test.c b/lib/libc/tests/resolv/resolv_test.c index 74e89b13b80f..1da42e3bcd18 100644 --- a/lib/libc/tests/resolv/resolv_test.c +++ b/lib/libc/tests/resolv/resolv_test.c @@ -291,7 +291,7 @@ do { \ ATF_TC(getaddrinfo_test); ATF_TC_HEAD(getaddrinfo_test, tc) { - atf_tc_set_md_var(tc, "timeout", "450"); + atf_tc_set_md_var(tc, "timeout", "1200"); } ATF_TC_BODY(getaddrinfo_test, tc) { @@ -301,7 +301,7 @@ ATF_TC_BODY(getaddrinfo_test, tc) ATF_TC(gethostby_test); ATF_TC_HEAD(gethostby_test, tc) { - atf_tc_set_md_var(tc, "timeout", "450"); + atf_tc_set_md_var(tc, "timeout", "1200"); } ATF_TC_BODY(gethostby_test, tc) { @@ -312,7 +312,7 @@ ATF_TC_BODY(gethostby_test, tc) ATF_TC(getipnodeby_test); ATF_TC_HEAD(getipnodeby_test, tc) { - atf_tc_set_md_var(tc, "timeout", "450"); + atf_tc_set_md_var(tc, "timeout", "1200"); } ATF_TC_BODY(getipnodeby_test, tc) { From 8b8a0b888382882c48792d43bef07ca36a5d07cc Mon Sep 17 00:00:00 2001 From: Enji Cooper Date: Sat, 13 Aug 2016 02:05:06 +0000 Subject: [PATCH 76/76] Initialize `ai` to NULL and test for `ai` with type-appropriate values Depending on the address family and ai_flags containing AI_V4MAPPED, it might not do a proper DNS lookup on the provided DNS address Convert some `ai` boolean true/false checks to NULL/non-NULL while here. MFC after: 1 week PR: 211790 Reported by: Herbie.Robinson@stratus.com Sponsored by: EMC / Isilon Storage Division --- lib/libc/net/getaddrinfo.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/lib/libc/net/getaddrinfo.c b/lib/libc/net/getaddrinfo.c index 0f82fd3cd212..56500c69d3f5 100644 --- a/lib/libc/net/getaddrinfo.c +++ b/lib/libc/net/getaddrinfo.c @@ -2249,6 +2249,8 @@ _dns_getaddrinfo(void *rv, void *cb_data, va_list ap) struct res_target q, q2; res_state res; + ai = NULL; + hostname = va_arg(ap, char *); pai = va_arg(ap, const struct addrinfo *); @@ -2327,16 +2329,16 @@ _dns_getaddrinfo(void *rv, void *cb_data, va_list ap) /* prefer IPv6 */ if (q.next) { ai = getanswer(buf2, q2.n, q2.name, q2.qtype, pai, res); - if (ai) { + if (ai != NULL) { cur->ai_next = ai; while (cur && cur->ai_next) cur = cur->ai_next; } } - if (!ai || pai->ai_family != AF_UNSPEC || + if (ai == NULL || pai->ai_family != AF_UNSPEC || (pai->ai_flags & (AI_ALL | AI_V4MAPPED)) != AI_V4MAPPED) { ai = getanswer(buf, q.n, q.name, q.qtype, pai, res); - if (ai) + if (ai != NULL) cur->ai_next = ai; } free(buf);