From 21be80ae8096c91d543b78883f262ef3d7b8b12c Mon Sep 17 00:00:00 2001 From: Hans Petter Selasky Date: Wed, 30 Jan 2019 09:44:54 +0000 Subject: [PATCH 01/90] Add support for Audio Sink and Audio Source profiles to sdpd(8). This allows user-space programs like virtual_oss(8) to act as a Bluetooth speaker device. MFC after: 1 week Sponsored by: Mellanox Technologies --- lib/libsdp/sdp.h | 18 +++ usr.sbin/bluetooth/sdpd/Makefile | 3 +- usr.sbin/bluetooth/sdpd/audio_sink.c | 188 +++++++++++++++++++++++++ usr.sbin/bluetooth/sdpd/audio_source.c | 188 +++++++++++++++++++++++++ usr.sbin/bluetooth/sdpd/profile.c | 4 + 5 files changed, 400 insertions(+), 1 deletion(-) create mode 100644 usr.sbin/bluetooth/sdpd/audio_sink.c create mode 100644 usr.sbin/bluetooth/sdpd/audio_source.c diff --git a/lib/libsdp/sdp.h b/lib/libsdp/sdp.h index b6d533898582..f120fc50f774 100644 --- a/lib/libsdp/sdp.h +++ b/lib/libsdp/sdp.h @@ -586,6 +586,24 @@ void sdp_print (uint32_t level, uint8_t const *start, #define SDP_PDU_SERVICE_UNREGISTER_REQUEST 0x82 #define SDP_PDU_SERVICE_CHANGE_REQUEST 0x83 +struct sdp_audio_sink_profile +{ + uint16_t psm; + uint16_t protover; + uint16_t features; +}; +typedef struct sdp_audio_sink_profile sdp_audio_sink_profile_t; +typedef struct sdp_audio_sink_profile *sdp_audio_sink_profile_p; + +struct sdp_audio_source_profile +{ + uint16_t psm; + uint16_t protover; + uint16_t features; +}; +typedef struct sdp_audio_source_profile sdp_audio_source_profile_t; +typedef struct sdp_audio_source_profile *sdp_audio_source_profile_p; + struct sdp_dun_profile { uint8_t server_channel; diff --git a/usr.sbin/bluetooth/sdpd/Makefile b/usr.sbin/bluetooth/sdpd/Makefile index c47441f02cd0..df443fca4f4e 100644 --- a/usr.sbin/bluetooth/sdpd/Makefile +++ b/usr.sbin/bluetooth/sdpd/Makefile @@ -3,7 +3,8 @@ PROG= sdpd MAN= sdpd.8 -SRCS= bgd.c dun.c ftrn.c gn.c irmc.c irmc_command.c lan.c log.c \ +SRCS= audio_sink.c audio_source.c \ + bgd.c dun.c ftrn.c gn.c irmc.c irmc_command.c lan.c log.c \ main.c nap.c opush.c panu.c profile.c provider.c sar.c scr.c \ sd.c server.c sp.c srr.c ssar.c ssr.c sur.c uuid.c diff --git a/usr.sbin/bluetooth/sdpd/audio_sink.c b/usr.sbin/bluetooth/sdpd/audio_sink.c new file mode 100644 index 000000000000..0ffdfa88146b --- /dev/null +++ b/usr.sbin/bluetooth/sdpd/audio_sink.c @@ -0,0 +1,188 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2019 Hans Petter Selasky + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include +#define L2CAP_SOCKET_CHECKED +#include +#include +#include +#include "profile.h" +#include "provider.h" + +static int32_t +audio_sink_profile_create_service_class_id_list( + uint8_t *buf, uint8_t const *const eob, + uint8_t const *data, uint32_t datalen) +{ + static const uint16_t service_classes[] = { + SDP_SERVICE_CLASS_AUDIO_SINK, + }; + + return (common_profile_create_service_class_id_list( + buf, eob, + (uint8_t const *)service_classes, + sizeof(service_classes))); +} + +static int32_t +audio_sink_profile_create_protocol_descriptor_list( + uint8_t *buf, uint8_t const *const eob, + uint8_t const *data, uint32_t datalen) +{ + provider_p provider = (provider_p) data; + sdp_audio_sink_profile_p audio_sink = (sdp_audio_sink_profile_p) provider->data; + + if (buf + 18 > eob) + return (-1); + + SDP_PUT8(SDP_DATA_SEQ8, buf); + SDP_PUT8(16, buf); + + SDP_PUT8(SDP_DATA_SEQ8, buf); + SDP_PUT8(6, buf); + + SDP_PUT8(SDP_DATA_UUID16, buf); + SDP_PUT16(SDP_UUID_PROTOCOL_L2CAP, buf); + + SDP_PUT8(SDP_DATA_UINT16, buf); + SDP_PUT16(audio_sink->psm, buf); + + SDP_PUT8(SDP_DATA_SEQ8, buf); + SDP_PUT8(6, buf); + + SDP_PUT8(SDP_DATA_UUID16, buf); + SDP_PUT16(SDP_UUID_PROTOCOL_AVDTP, buf); + + SDP_PUT8(SDP_DATA_UINT16, buf); + SDP_PUT16(audio_sink->protover, buf); + + return (18); +} + +static int32_t +audio_sink_profile_create_browse_group_list( + uint8_t *buf, uint8_t const *const eob, + uint8_t const *data, uint32_t datalen) +{ + + if (buf + 5 > eob) + return (-1); + + SDP_PUT8(SDP_DATA_SEQ8, buf); + SDP_PUT8(3, buf); + + SDP_PUT8(SDP_DATA_UUID16, buf); + SDP_PUT16(SDP_SERVICE_CLASS_PUBLIC_BROWSE_GROUP, buf); + + return (5); +} + +static int32_t +audio_sink_profile_create_bluetooth_profile_descriptor_list( + uint8_t *buf, uint8_t const *const eob, + uint8_t const *data, uint32_t datalen) +{ + static const uint16_t profile_descriptor_list[] = { + SDP_SERVICE_CLASS_ADVANCED_AUDIO_DISTRIBUTION, + 0x0100 + }; + + return (common_profile_create_bluetooth_profile_descriptor_list( + buf, eob, + (uint8_t const *)profile_descriptor_list, + sizeof(profile_descriptor_list))); +} + +static int32_t +audio_sink_profile_create_service_name( + uint8_t *buf, uint8_t const *const eob, + uint8_t const *data, uint32_t datalen) +{ + static const char service_name[] = "Audio SNK"; + + return (common_profile_create_string8( + buf, eob, + (uint8_t const *)service_name, strlen(service_name))); +} + +static int32_t +audio_sink_create_supported_features( + uint8_t *buf, uint8_t const *const eob, + uint8_t const *data, uint32_t datalen) +{ + provider_p provider = (provider_p) data; + sdp_audio_sink_profile_p audio_sink = (sdp_audio_sink_profile_p) provider->data; + + if (buf + 3 > eob) + return (-1); + + SDP_PUT8(SDP_DATA_UINT16, buf); + SDP_PUT16(audio_sink->features, buf); + + return (3); +} + +static int32_t +audio_sink_profile_valid(uint8_t const *data, uint32_t datalen) +{ + + if (datalen < sizeof(struct sdp_audio_sink_profile)) + return (0); + return (1); +} + +static const attr_t audio_sink_profile_attrs[] = { + {SDP_ATTR_SERVICE_RECORD_HANDLE, + common_profile_create_service_record_handle}, + {SDP_ATTR_SERVICE_CLASS_ID_LIST, + audio_sink_profile_create_service_class_id_list}, + {SDP_ATTR_PROTOCOL_DESCRIPTOR_LIST, + audio_sink_profile_create_protocol_descriptor_list}, + {SDP_ATTR_BROWSE_GROUP_LIST, + audio_sink_profile_create_browse_group_list}, + {SDP_ATTR_LANGUAGE_BASE_ATTRIBUTE_ID_LIST, + common_profile_create_language_base_attribute_id_list}, + {SDP_ATTR_BLUETOOTH_PROFILE_DESCRIPTOR_LIST, + audio_sink_profile_create_bluetooth_profile_descriptor_list}, + {SDP_ATTR_PRIMARY_LANGUAGE_BASE_ID + SDP_ATTR_SERVICE_NAME_OFFSET, + audio_sink_profile_create_service_name}, + {SDP_ATTR_PRIMARY_LANGUAGE_BASE_ID + SDP_ATTR_PROVIDER_NAME_OFFSET, + common_profile_create_service_provider_name}, + {SDP_ATTR_SUPPORTED_FEATURES, + audio_sink_create_supported_features}, + {} /* end entry */ +}; + +profile_t audio_sink_profile_descriptor = { + SDP_SERVICE_CLASS_AUDIO_SINK, + sizeof(sdp_audio_sink_profile_t), + audio_sink_profile_valid, + (attr_t const *const)&audio_sink_profile_attrs +}; diff --git a/usr.sbin/bluetooth/sdpd/audio_source.c b/usr.sbin/bluetooth/sdpd/audio_source.c new file mode 100644 index 000000000000..1d58a323f8cb --- /dev/null +++ b/usr.sbin/bluetooth/sdpd/audio_source.c @@ -0,0 +1,188 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2019 Hans Petter Selasky + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include +#define L2CAP_SOCKET_CHECKED +#include +#include +#include +#include "profile.h" +#include "provider.h" + +static int32_t +audio_source_profile_create_service_class_id_list( + uint8_t *buf, uint8_t const *const eob, + uint8_t const *data, uint32_t datalen) +{ + static const uint16_t service_classes[] = { + SDP_SERVICE_CLASS_AUDIO_SOURCE, + }; + + return (common_profile_create_service_class_id_list( + buf, eob, + (uint8_t const *)service_classes, + sizeof(service_classes))); +} + +static int32_t +audio_source_profile_create_protocol_descriptor_list( + uint8_t *buf, uint8_t const *const eob, + uint8_t const *data, uint32_t datalen) +{ + provider_p provider = (provider_p) data; + sdp_audio_source_profile_p audio_source = (sdp_audio_source_profile_p) provider->data; + + if (buf + 18 > eob) + return (-1); + + SDP_PUT8(SDP_DATA_SEQ8, buf); + SDP_PUT8(16, buf); + + SDP_PUT8(SDP_DATA_SEQ8, buf); + SDP_PUT8(6, buf); + + SDP_PUT8(SDP_DATA_UUID16, buf); + SDP_PUT16(SDP_UUID_PROTOCOL_L2CAP, buf); + + SDP_PUT8(SDP_DATA_UINT16, buf); + SDP_PUT16(audio_source->psm, buf); + + SDP_PUT8(SDP_DATA_SEQ8, buf); + SDP_PUT8(6, buf); + + SDP_PUT8(SDP_DATA_UUID16, buf); + SDP_PUT16(SDP_UUID_PROTOCOL_AVDTP, buf); + + SDP_PUT8(SDP_DATA_UINT16, buf); + SDP_PUT16(audio_source->protover, buf); + + return (18); +} + +static int32_t +audio_source_profile_create_browse_group_list( + uint8_t *buf, uint8_t const *const eob, + uint8_t const *data, uint32_t datalen) +{ + + if (buf + 5 > eob) + return (-1); + + SDP_PUT8(SDP_DATA_SEQ8, buf); + SDP_PUT8(3, buf); + + SDP_PUT8(SDP_DATA_UUID16, buf); + SDP_PUT16(SDP_SERVICE_CLASS_PUBLIC_BROWSE_GROUP, buf); + + return (5); +} + +static int32_t +audio_source_profile_create_bluetooth_profile_descriptor_list( + uint8_t *buf, uint8_t const *const eob, + uint8_t const *data, uint32_t datalen) +{ + static const uint16_t profile_descriptor_list[] = { + SDP_SERVICE_CLASS_ADVANCED_AUDIO_DISTRIBUTION, + 0x0100 + }; + + return (common_profile_create_bluetooth_profile_descriptor_list( + buf, eob, + (uint8_t const *)profile_descriptor_list, + sizeof(profile_descriptor_list))); +} + +static int32_t +audio_source_profile_create_service_name( + uint8_t *buf, uint8_t const *const eob, + uint8_t const *data, uint32_t datalen) +{ + static const char service_name[] = "Audio SRC"; + + return (common_profile_create_string8( + buf, eob, + (uint8_t const *)service_name, strlen(service_name))); +} + +static int32_t +audio_source_create_supported_features( + uint8_t *buf, uint8_t const *const eob, + uint8_t const *data, uint32_t datalen) +{ + provider_p provider = (provider_p) data; + sdp_audio_source_profile_p audio_source = (sdp_audio_source_profile_p) provider->data; + + if (buf + 3 > eob) + return (-1); + + SDP_PUT8(SDP_DATA_UINT16, buf); + SDP_PUT16(audio_source->features, buf); + + return (3); +} + +static int32_t +audio_source_profile_valid(uint8_t const *data, uint32_t datalen) +{ + + if (datalen < sizeof(struct sdp_audio_source_profile)) + return (0); + return (1); +} + +static const attr_t audio_source_profile_attrs[] = { + {SDP_ATTR_SERVICE_RECORD_HANDLE, + common_profile_create_service_record_handle}, + {SDP_ATTR_SERVICE_CLASS_ID_LIST, + audio_source_profile_create_service_class_id_list}, + {SDP_ATTR_PROTOCOL_DESCRIPTOR_LIST, + audio_source_profile_create_protocol_descriptor_list}, + {SDP_ATTR_BROWSE_GROUP_LIST, + audio_source_profile_create_browse_group_list}, + {SDP_ATTR_LANGUAGE_BASE_ATTRIBUTE_ID_LIST, + common_profile_create_language_base_attribute_id_list}, + {SDP_ATTR_BLUETOOTH_PROFILE_DESCRIPTOR_LIST, + audio_source_profile_create_bluetooth_profile_descriptor_list}, + {SDP_ATTR_PRIMARY_LANGUAGE_BASE_ID + SDP_ATTR_SERVICE_NAME_OFFSET, + audio_source_profile_create_service_name}, + {SDP_ATTR_PRIMARY_LANGUAGE_BASE_ID + SDP_ATTR_PROVIDER_NAME_OFFSET, + common_profile_create_service_provider_name}, + {SDP_ATTR_SUPPORTED_FEATURES, + audio_source_create_supported_features}, + {} /* end entry */ +}; + +profile_t audio_source_profile_descriptor = { + SDP_SERVICE_CLASS_AUDIO_SOURCE, + sizeof(sdp_audio_source_profile_t), + audio_source_profile_valid, + (attr_t const *const)&audio_source_profile_attrs +}; diff --git a/usr.sbin/bluetooth/sdpd/profile.c b/usr.sbin/bluetooth/sdpd/profile.c index d2be21e1e34c..cef9a03ceea6 100644 --- a/usr.sbin/bluetooth/sdpd/profile.c +++ b/usr.sbin/bluetooth/sdpd/profile.c @@ -48,6 +48,8 @@ profile_p profile_get_descriptor(uint16_t uuid) { + extern profile_t audio_sink_profile_descriptor; + extern profile_t audio_source_profile_descriptor; extern profile_t dun_profile_descriptor; extern profile_t ftrn_profile_descriptor; extern profile_t irmc_profile_descriptor; @@ -60,6 +62,8 @@ profile_get_descriptor(uint16_t uuid) extern profile_t panu_profile_descriptor; static const profile_p profiles[] = { + &audio_sink_profile_descriptor, + &audio_source_profile_descriptor, &dun_profile_descriptor, &ftrn_profile_descriptor, &irmc_profile_descriptor, From 27c36a12f1584b53d2454dac238eeed3dedc82ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= Date: Wed, 30 Jan 2019 11:34:52 +0000 Subject: [PATCH 02/90] xen: introduce a new way to setup event channel upcall The main differences with the currently implemented method are: - Requires a local APIC EOI, since it doesn't bypass the local APIC as the previous method used to do. - Can be set to use different IDT vectors on each vCPU. Note that FreeBSD doesn't make use of this feature since the event channel IDT vector is reserved system wide. Note that the old method of setting the event channel upcall is not removed, and will be used as a fallback if this newly introduced method is not available. MFC after: 1 month Sponsored by: Citrix Systems R&D --- sys/x86/xen/hvm.c | 47 ++++++++++++++++++++++++++++++++++++++++-- sys/x86/xen/xen_intr.c | 5 +++++ sys/xen/hvm.h | 1 + 3 files changed, 51 insertions(+), 2 deletions(-) diff --git a/sys/x86/xen/hvm.c b/sys/x86/xen/hvm.c index 6983a20ecf82..a0a0e4b75153 100644 --- a/sys/x86/xen/hvm.c +++ b/sys/x86/xen/hvm.c @@ -51,6 +51,7 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include #include #include @@ -88,6 +89,12 @@ int xen_vector_callback_enabled; */ uint32_t hvm_start_flags; +/** + * Signal whether the vector injected for the event channel upcall requires to + * be EOI'ed on the local APIC. + */ +bool xen_evtchn_needs_ack; + /*------------------------------- Per-CPU Data -------------------------------*/ DPCPU_DEFINE(struct vcpu_info, vcpu_local_info); DPCPU_DEFINE(struct vcpu_info *, vcpu_info); @@ -223,6 +230,19 @@ xen_hvm_init_shared_info_page(void) panic("HYPERVISOR_memory_op failed"); } +static int +set_percpu_callback(unsigned int vcpu) +{ + struct xen_hvm_evtchn_upcall_vector vec; + int error; + + vec.vcpu = vcpu; + vec.vector = IDT_EVTCHN; + error = HYPERVISOR_hvm_op(HVMOP_set_evtchn_upcall_vector, &vec); + + return (error != 0 ? xen_translate_error(error) : 0); +} + /* * Tell the hypervisor how to contact us for event channel callbacks. */ @@ -240,12 +260,20 @@ xen_hvm_set_callback(device_t dev) if (xen_feature(XENFEAT_hvm_callback_vector) != 0) { int error; - xhp.value = HVM_CALLBACK_VECTOR(IDT_EVTCHN); + error = set_percpu_callback(0); + if (error == 0) { + xen_evtchn_needs_ack = true; + /* Trick toolstack to think we are enlightened */ + xhp.value = 1; + } else + xhp.value = HVM_CALLBACK_VECTOR(IDT_EVTCHN); error = HYPERVISOR_hvm_op(HVMOP_set_param, &xhp); if (error == 0) { xen_vector_callback_enabled = 1; return; - } + } else if (xen_evtchn_needs_ack) + panic("Unable to setup fake HVM param: %d", error); + printf("Xen HVM callback vector registration failed (%d). " "Falling back to emulated device interrupt\n", error); } @@ -360,6 +388,7 @@ xen_hvm_init(enum xen_hvm_init_type init_type) } xen_vector_callback_enabled = 0; + xen_evtchn_needs_ack = false; xen_hvm_set_callback(NULL); /* @@ -427,6 +456,20 @@ xen_hvm_cpu_init(void) PCPU_SET(vcpu_id, (regs[0] & XEN_HVM_CPUID_VCPU_ID_PRESENT) ? regs[1] : PCPU_GET(acpi_id)); + if (xen_evtchn_needs_ack && !IS_BSP()) { + /* + * Setup the per-vpcu event channel upcall vector. This is only + * required when using the new HVMOP_set_evtchn_upcall_vector + * hypercall, which allows using a different vector for each + * vCPU. Note that FreeBSD uses the same vector for all vCPUs + * because it's not dynamically allocated. + */ + rc = set_percpu_callback(PCPU_GET(vcpu_id)); + if (rc != 0) + panic("Event channel upcall vector setup failed: %d", + rc); + } + /* * Set the vCPU info. * diff --git a/sys/x86/xen/xen_intr.c b/sys/x86/xen/xen_intr.c index d366a61dc9ec..f230794d3a50 100644 --- a/sys/x86/xen/xen_intr.c +++ b/sys/x86/xen/xen_intr.c @@ -60,6 +60,7 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include #include #include @@ -620,6 +621,10 @@ xen_intr_handle_upcall(struct trapframe *trap_frame) l1 &= ~(1UL << l1i); } } + + if (xen_evtchn_needs_ack) + lapic_eoi(); + critical_exit(); } diff --git a/sys/xen/hvm.h b/sys/xen/hvm.h index bc7518d26575..e34a552dc714 100644 --- a/sys/xen/hvm.h +++ b/sys/xen/hvm.h @@ -104,5 +104,6 @@ void xen_hvm_suspend(void); void xen_hvm_resume(bool suspend_cancelled); extern uint32_t hvm_start_flags; +extern bool xen_evtchn_needs_ack; #endif /* __XEN_HVM_H__ */ From a99bdc110b8067513b386dad00e4b89c71625355 Mon Sep 17 00:00:00 2001 From: Andriy Voskoboinyk Date: Wed, 30 Jan 2019 11:40:12 +0000 Subject: [PATCH 03/90] Fix compilation with 'option NDISAPI + device ndis' and without 'device pccard' in the kernel config file. PR: 171532 Reported by: Robert Bonomi MFC after: 1 week --- sys/compat/ndis/ndis_var.h | 2 - sys/dev/if_ndis/if_ndis.c | 12 --- sys/dev/if_ndis/if_ndis_pccard.c | 137 +++++++++++++++---------------- 3 files changed, 67 insertions(+), 84 deletions(-) diff --git a/sys/compat/ndis/ndis_var.h b/sys/compat/ndis/ndis_var.h index 0782853cf661..3ca050c0ada4 100644 --- a/sys/compat/ndis/ndis_var.h +++ b/sys/compat/ndis/ndis_var.h @@ -1736,8 +1736,6 @@ extern int ndis_get_supported_oids(void *, ndis_oid **, int *); extern int ndis_send_packets(void *, ndis_packet **, int); extern int ndis_send_packet(void *, ndis_packet *); extern int ndis_convert_res(void *); -extern int ndis_alloc_amem(void *); -extern void ndis_free_amem(void *); extern void ndis_free_packet(ndis_packet *); extern void ndis_free_bufs(ndis_buffer *); extern int ndis_reset_nic(void *); diff --git a/sys/dev/if_ndis/if_ndis.c b/sys/dev/if_ndis/if_ndis.c index 7a545dbc8f8f..404ae4d92d53 100644 --- a/sys/dev/if_ndis/if_ndis.c +++ b/sys/dev/if_ndis/if_ndis.c @@ -568,15 +568,6 @@ ndis_attach(device_t dev) callout_init(&sc->ndis_stat_callout, 1); mbufq_init(&sc->ndis_rxqueue, INT_MAX); /* XXXGL: sane maximum */ - if (sc->ndis_iftype == PCMCIABus) { - error = ndis_alloc_amem(sc); - if (error) { - device_printf(dev, "failed to allocate " - "attribute memory\n"); - goto fail; - } - } - /* Create sysctl registry nodes */ ndis_create_sysctls(sc); @@ -1098,9 +1089,6 @@ ndis_detach(device_t dev) if (ifp != NULL) if_free(ifp); - if (sc->ndis_iftype == PCMCIABus) - ndis_free_amem(sc); - if (sc->ndis_sc) ndis_destroy_dma(sc); diff --git a/sys/dev/if_ndis/if_ndis_pccard.c b/sys/dev/if_ndis/if_ndis_pccard.c index 36d52986691d..ee4e174ae3ef 100644 --- a/sys/dev/if_ndis/if_ndis_pccard.c +++ b/sys/dev/if_ndis/if_ndis_pccard.c @@ -74,6 +74,7 @@ MODULE_DEPEND(ndis, pccard, 1, 1, 1); static int ndis_probe_pccard (device_t); static int ndis_attach_pccard (device_t); +static int ndis_detach_pccard (device_t); static struct resource_list *ndis_get_resource_list (device_t, device_t); static int ndis_devcompare (interface_type, @@ -91,7 +92,7 @@ static device_method_t ndis_methods[] = { /* Device interface */ DEVMETHOD(device_probe, ndis_probe_pccard), DEVMETHOD(device_attach, ndis_attach_pccard), - DEVMETHOD(device_detach, ndis_detach), + DEVMETHOD(device_detach, ndis_detach_pccard), DEVMETHOD(device_shutdown, ndis_shutdown), DEVMETHOD(device_suspend, ndis_suspend), DEVMETHOD(device_resume, ndis_resume), @@ -175,6 +176,50 @@ ndis_probe_pccard(dev) return(ENXIO); } +#define NDIS_AM_RID 3 + +static int +ndis_alloc_amem(struct ndis_softc *sc) +{ + int error, rid; + + rid = NDIS_AM_RID; + sc->ndis_res_am = bus_alloc_resource_anywhere(sc->ndis_dev, + SYS_RES_MEMORY, &rid, 0x1000, RF_ACTIVE); + + if (sc->ndis_res_am == NULL) { + device_printf(sc->ndis_dev, + "failed to allocate attribute memory\n"); + return(ENXIO); + } + sc->ndis_rescnt++; + resource_list_add(&sc->ndis_rl, SYS_RES_MEMORY, rid, + rman_get_start(sc->ndis_res_am), rman_get_end(sc->ndis_res_am), + rman_get_size(sc->ndis_res_am)); + + error = CARD_SET_MEMORY_OFFSET(device_get_parent(sc->ndis_dev), + sc->ndis_dev, rid, 0, NULL); + + if (error) { + device_printf(sc->ndis_dev, + "CARD_SET_MEMORY_OFFSET() returned 0x%x\n", error); + return(error); + } + + error = CARD_SET_RES_FLAGS(device_get_parent(sc->ndis_dev), + sc->ndis_dev, SYS_RES_MEMORY, rid, PCCARD_A_MEM_ATTR); + + if (error) { + device_printf(sc->ndis_dev, + "CARD_SET_RES_FLAGS() returned 0x%x\n", error); + return(error); + } + + sc->ndis_am_rid = rid; + + return(0); +} + /* * Attach the interface. Allocate softc structures, do ifmedia * setup and ethernet/BPF attach. @@ -251,12 +296,33 @@ ndis_attach_pccard(dev) sc->ndis_devidx = devidx; + error = ndis_alloc_amem(sc); + if (error) { + device_printf(dev, "failed to allocate attribute memory\n"); + goto fail; + } + error = ndis_attach(dev); fail: return(error); } +static int +ndis_detach_pccard(device_t dev) +{ + struct ndis_softc *sc = device_get_softc(dev); + + (void) ndis_detach(dev); + + if (sc->ndis_res_am != NULL) + bus_release_resource(sc->ndis_dev, SYS_RES_MEMORY, + sc->ndis_am_rid, sc->ndis_res_am); + resource_list_free(&sc->ndis_rl); + + return (0); +} + static struct resource_list * ndis_get_resource_list(dev, child) device_t dev; @@ -267,72 +333,3 @@ ndis_get_resource_list(dev, child) sc = device_get_softc(dev); return (&sc->ndis_rl); } - -#define NDIS_AM_RID 3 - -int -ndis_alloc_amem(arg) - void *arg; -{ - struct ndis_softc *sc; - int error, rid; - - if (arg == NULL) - return(EINVAL); - - sc = arg; - rid = NDIS_AM_RID; - sc->ndis_res_am = bus_alloc_resource_anywhere(sc->ndis_dev, - SYS_RES_MEMORY, &rid, 0x1000, RF_ACTIVE); - - if (sc->ndis_res_am == NULL) { - device_printf(sc->ndis_dev, - "failed to allocate attribute memory\n"); - return(ENXIO); - } - sc->ndis_rescnt++; - resource_list_add(&sc->ndis_rl, SYS_RES_MEMORY, rid, - rman_get_start(sc->ndis_res_am), rman_get_end(sc->ndis_res_am), - rman_get_size(sc->ndis_res_am)); - - error = CARD_SET_MEMORY_OFFSET(device_get_parent(sc->ndis_dev), - sc->ndis_dev, rid, 0, NULL); - - if (error) { - device_printf(sc->ndis_dev, - "CARD_SET_MEMORY_OFFSET() returned 0x%x\n", error); - return(error); - } - - error = CARD_SET_RES_FLAGS(device_get_parent(sc->ndis_dev), - sc->ndis_dev, SYS_RES_MEMORY, rid, PCCARD_A_MEM_ATTR); - - if (error) { - device_printf(sc->ndis_dev, - "CARD_SET_RES_FLAGS() returned 0x%x\n", error); - return(error); - } - - sc->ndis_am_rid = rid; - - return(0); -} - -void -ndis_free_amem(arg) - void *arg; -{ - struct ndis_softc *sc; - - if (arg == NULL) - return; - - sc = arg; - - if (sc->ndis_res_am != NULL) - bus_release_resource(sc->ndis_dev, SYS_RES_MEMORY, - sc->ndis_am_rid, sc->ndis_res_am); - resource_list_free(&sc->ndis_rl); - - return; -} From 602fa8323919fbe89ce9a311b0d1b3a6c8d0cfad Mon Sep 17 00:00:00 2001 From: Andriy Voskoboinyk Date: Wed, 30 Jan 2019 12:32:47 +0000 Subject: [PATCH 04/90] ndiscvt(8): abort if no IDs were found during conversion. Checked with Broadcom driver mentioned in PR 179285. PR: 69268 Submitted by: MFC after: 5 days --- usr.sbin/ndiscvt/inf.c | 26 ++++++++++++++------------ usr.sbin/ndiscvt/ndiscvt.c | 3 ++- 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/usr.sbin/ndiscvt/inf.c b/usr.sbin/ndiscvt/inf.c index eb1015083dbb..442e7f61b1fd 100644 --- a/usr.sbin/ndiscvt/inf.c +++ b/usr.sbin/ndiscvt/inf.c @@ -62,9 +62,9 @@ static struct assign (struct assign *); static struct section *find_section (const char *); -static void dump_deviceids_pci (void); -static void dump_deviceids_pcmcia (void); -static void dump_deviceids_usb (void); +static int dump_deviceids_pci (void); +static int dump_deviceids_pcmcia (void); +static int dump_deviceids_usb (void); static void dump_pci_id (const char *); static void dump_pcmcia_id (const char *); static void dump_usb_id (const char *); @@ -85,9 +85,11 @@ inf_parse (FILE *fp, FILE *outfp) yyin = fp; yyparse(); - dump_deviceids_pci(); - dump_deviceids_pcmcia(); - dump_deviceids_usb(); + if (dump_deviceids_pci() == 0 && + dump_deviceids_pcmcia() == 0 && + dump_deviceids_usb() == 0) + return (-1); + fprintf(outfp, "#ifdef NDIS_REGVALS\n"); dump_regvals(); fprintf(outfp, "#endif /* NDIS_REGVALS */\n"); @@ -280,7 +282,7 @@ dump_usb_id(const char *s) fprintf(ofp, "\t\\\n\t{ %s, %s, ", vidstr, pidstr); } -static void +static int dump_deviceids_pci() { struct assign *manf, *dev; @@ -370,10 +372,10 @@ dump_deviceids_pci() fprintf(ofp, "\n\n"); - return; + return (found); } -static void +static int dump_deviceids_pcmcia() { struct assign *manf, *dev; @@ -463,10 +465,10 @@ dump_deviceids_pcmcia() fprintf(ofp, "\n\n"); - return; + return (found); } -static void +static int dump_deviceids_usb() { struct assign *manf, *dev; @@ -556,7 +558,7 @@ dump_deviceids_usb() fprintf(ofp, "\n\n"); - return; + return (found); } static void diff --git a/usr.sbin/ndiscvt/ndiscvt.c b/usr.sbin/ndiscvt/ndiscvt.c index 276a6ef7bfdd..1b7660700786 100644 --- a/usr.sbin/ndiscvt/ndiscvt.c +++ b/usr.sbin/ndiscvt/ndiscvt.c @@ -372,7 +372,8 @@ main(int argc, char *argv[]) err(1, "opening .INF file '%s' failed", inffile); - inf_parse(fp, outfp); + if (inf_parse(fp, outfp) != 0) + errx(1, "creating .INF file - no entries created, are you using the correct files?"); fclose(fp); } From f20e36c166ca9710236ed693dd5974fb59771102 Mon Sep 17 00:00:00 2001 From: Andriy Voskoboinyk Date: Wed, 30 Jan 2019 13:19:05 +0000 Subject: [PATCH 05/90] rsu(4): add support for ifconfig(8) 'maxretry' option. Tested with Asus USB-N10, STA mode; maxretry 0 / 1 / 6 / 12 Checked with RTL8188EE, MONITOR mode. MFC after: 1 week --- sys/dev/usb/wlan/if_rsu.c | 10 +++++++++- sys/dev/usb/wlan/if_rsureg.h | 3 +++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/sys/dev/usb/wlan/if_rsu.c b/sys/dev/usb/wlan/if_rsu.c index 6a294a0c7111..0d2e531657cc 100644 --- a/sys/dev/usb/wlan/if_rsu.c +++ b/sys/dev/usb/wlan/if_rsu.c @@ -2755,7 +2755,7 @@ rsu_tx_start(struct rsu_softc *sc, struct ieee80211_node *ni, struct ieee80211_frame *wh; struct ieee80211_key *k = NULL; struct r92s_tx_desc *txd; - uint8_t rate, ridx, type, cipher; + uint8_t rate, ridx, type, cipher, qos; int prio = 0; uint8_t which; int hasqos; @@ -2804,12 +2804,14 @@ rsu_tx_start(struct rsu_softc *sc, struct ieee80211_node *ni, prio = M_WME_GETAC(m0); which = rsu_wme_ac_xfer_map[prio]; hasqos = 1; + qos = ((const struct ieee80211_qosframe *)wh)->i_qos[0]; } else { /* Non-QoS TID */ /* XXX TODO: tid=0 for non-qos TID? */ which = rsu_wme_ac_xfer_map[WME_AC_BE]; hasqos = 0; prio = 0; + qos = 0; } qid = rsu_ac2qid[prio]; @@ -2868,6 +2870,12 @@ rsu_tx_start(struct rsu_softc *sc, struct ieee80211_node *ni, if (ismcast) txd->txdw2 |= htole32(R92S_TXDW2_BMCAST); + if (!ismcast && (!qos || (qos & IEEE80211_QOS_ACKPOLICY) != + IEEE80211_QOS_ACKPOLICY_NOACK)) { + txd->txdw2 |= htole32(R92S_TXDW2_RTY_LMT_ENA); + txd->txdw2 |= htole32(SM(R92S_TXDW2_RTY_LMT, tp->maxretry)); + } + /* Force mgmt / mcast / ucast rate if needed. */ if (rate != 0) { /* Data rate fallback limit (max). */ diff --git a/sys/dev/usb/wlan/if_rsureg.h b/sys/dev/usb/wlan/if_rsureg.h index b1cd36ea3f0f..a6ab170be7ce 100644 --- a/sys/dev/usb/wlan/if_rsureg.h +++ b/sys/dev/usb/wlan/if_rsureg.h @@ -688,6 +688,9 @@ struct r92s_tx_desc { #define R92S_TXDW1_HWPC 0x80000000 uint32_t txdw2; +#define R92S_TXDW2_RTY_LMT_M 0x0000003f +#define R92S_TXDW2_RTY_LMT_S 0 +#define R92S_TXDW2_RTY_LMT_ENA 0x00000040 #define R92S_TXDW2_BMCAST 0x00000080 #define R92S_TXDW2_AGGEN 0x20000000 #define R92S_TXDW2_BK 0x40000000 From b97de13ae097269d8eec91ffc7145ddd3ce03660 Mon Sep 17 00:00:00 2001 From: Marius Strobl Date: Wed, 30 Jan 2019 13:21:26 +0000 Subject: [PATCH 06/90] - Stop iflib(4) from leaking MSI messages on detachment by calling bus_teardown_intr(9) before pci_release_msi(9). - Ensure that iflib(4) and associated drivers pass correct RIDs to bus_release_resource(9) by obtaining the RIDs via rman_get_rid(9) on the corresponding resources instead of using the RIDs initially passed to bus_alloc_resource_any(9) as the latter function may change those RIDs. Solely em(4) for the ioport resource (but not others) and bnxt(4) were using the correct RIDs by caching the ones returned by bus_alloc_resource_any(9). - Change the logic of iflib_msix_init() around to only map the MSI-X BAR if MSI-X is actually supported, i. e. pci_msix_count(9) returns > 0. Otherwise the "Unable to map MSIX table " message triggers for devices that simply don't support MSI-X and the user may think that something is wrong while in fact everything works as expected. - Put some (mostly redundant) debug messages emitted by iflib(4) and em(4) during attachment under bootverbose. The non-verbose output of em(4) seen during attachment now is close to the one prior to the conversion to iflib(4). - Replace various variants of spelling "MSI-X" (several in messages) with "MSI-X" as used in the PCI specifications. - Remove some trailing whitespace from messages emitted by iflib(4) and change them to consistently start with uppercase. - Remove some obsolete comments about releasing interrupts from drivers and correct a few others. Reviewed by: erj, Jacob Keller, shurd Differential Revision: https://reviews.freebsd.org/D18980 --- sys/dev/e1000/if_em.c | 59 +++++++++++++++++------------- sys/dev/e1000/if_em.h | 5 +-- sys/dev/ixgbe/if_ix.c | 10 ++--- sys/dev/ixgbe/if_ixv.c | 5 +-- sys/dev/ixl/if_iavf.c | 9 +++-- sys/dev/ixl/if_ixl.c | 10 ++--- sys/dev/ixl/ixl_iw.c | 2 +- sys/dev/ixl/ixl_pf_main.c | 15 ++++---- sys/dev/vmware/vmxnet3/if_vmx.c | 13 +++---- sys/net/iflib.c | 65 ++++++++++++++++++--------------- 10 files changed, 100 insertions(+), 93 deletions(-) diff --git a/sys/dev/e1000/if_em.c b/sys/dev/e1000/if_em.c index 388a0bac53b5..12c431c7ec14 100644 --- a/sys/dev/e1000/if_em.c +++ b/sys/dev/e1000/if_em.c @@ -293,7 +293,7 @@ static void em_disable_aspm(struct adapter *); int em_intr(void *arg); static void em_disable_promisc(if_ctx_t ctx); -/* MSIX handlers */ +/* MSI-X handlers */ static int em_if_msix_intr_assign(if_ctx_t, int); static int em_msix_link(void *); static void em_handle_link(void *context); @@ -780,7 +780,9 @@ em_if_attach_pre(if_ctx_t ctx) scctx->isc_msix_bar = PCIR_BAR(EM_MSIX_BAR); scctx->isc_tx_nsegments = EM_MAX_SCATTER; scctx->isc_nrxqsets_max = scctx->isc_ntxqsets_max = em_set_num_queues(ctx); - device_printf(dev, "attach_pre capping queues at %d\n", scctx->isc_ntxqsets_max); + if (bootverbose) + device_printf(dev, "attach_pre capping queues at %d\n", + scctx->isc_ntxqsets_max); if (adapter->hw.mac.type >= igb_mac_min) { int try_second_bar; @@ -1301,7 +1303,7 @@ em_if_init(if_ctx_t ctx) em_if_set_promisc(ctx, IFF_PROMISC); e1000_clear_hw_cntrs_base_generic(&adapter->hw); - /* MSI/X configuration for 82574 */ + /* MSI-X configuration for 82574 */ if (adapter->hw.mac.type == e1000_82574) { int tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); @@ -1427,7 +1429,7 @@ em_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid) /********************************************************************* * - * MSIX RX Interrupt Service routine + * MSI-X RX Interrupt Service routine * **********************************************************************/ static int @@ -1442,7 +1444,7 @@ em_msix_que(void *arg) /********************************************************************* * - * MSIX Link Fast Interrupt Service routine + * MSI-X Link Fast Interrupt Service routine * **********************************************************************/ static int @@ -1912,7 +1914,6 @@ em_allocate_pci_resources(if_ctx_t ctx) for (rid = PCIR_BAR(0); rid < PCIR_CIS;) { val = pci_read_config(dev, rid, 4); if (EM_BAR_TYPE(val) == EM_BAR_TYPE_IO) { - adapter->io_rid = rid; break; } rid += 4; @@ -1924,8 +1925,8 @@ em_allocate_pci_resources(if_ctx_t ctx) device_printf(dev, "Unable to locate IO BAR\n"); return (ENXIO); } - adapter->ioport = bus_alloc_resource_any(dev, - SYS_RES_IOPORT, &adapter->io_rid, RF_ACTIVE); + adapter->ioport = bus_alloc_resource_any(dev, SYS_RES_IOPORT, + &rid, RF_ACTIVE); if (adapter->ioport == NULL) { device_printf(dev, "Unable to allocate bus resource: " "ioport\n"); @@ -1945,7 +1946,7 @@ em_allocate_pci_resources(if_ctx_t ctx) /********************************************************************* * - * Setup the MSIX Interrupt handlers + * Set up the MSI-X Interrupt handlers * **********************************************************************/ static int @@ -1974,7 +1975,7 @@ em_if_msix_intr_assign(if_ctx_t ctx, int msix) * Set the bit to enable interrupt * in E1000_IMS -- bits 20 and 21 * are for RX0 and RX1, note this has - * NOTHING to do with the MSIX vector + * NOTHING to do with the MSI-X vector */ if (adapter->hw.mac.type == e1000_82574) { rx_que->eims = 1 << (20 + i); @@ -2001,7 +2002,7 @@ em_if_msix_intr_assign(if_ctx_t ctx, int msix) * Set the bit to enable interrupt * in E1000_IMS -- bits 22 and 23 * are for TX0 and TX1, note this has - * NOTHING to do with the MSIX vector + * NOTHING to do with the MSI-X vector */ if (adapter->hw.mac.type == e1000_82574) { tx_que->eims = 1 << (22 + i); @@ -2050,7 +2051,7 @@ igb_configure_queues(struct adapter *adapter) E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME | E1000_GPIE_PBA | E1000_GPIE_NSICR); - /* Turn on MSIX */ + /* Turn on MSI-X */ switch (adapter->hw.mac.type) { case e1000_82580: case e1000_i350: @@ -2184,7 +2185,7 @@ em_free_pci_resources(if_ctx_t ctx) struct em_rx_queue *que = adapter->rx_queues; device_t dev = iflib_get_dev(ctx); - /* Release all msix queue resources */ + /* Release all MSI-X queue resources */ if (adapter->intr_type == IFLIB_INTR_MSIX) iflib_irq_free(ctx, &adapter->irq); @@ -2192,24 +2193,26 @@ em_free_pci_resources(if_ctx_t ctx) iflib_irq_free(ctx, &que->que_irq); } - /* First release all the interrupt resources */ if (adapter->memory != NULL) { bus_release_resource(dev, SYS_RES_MEMORY, - PCIR_BAR(0), adapter->memory); + rman_get_rid(adapter->memory), adapter->memory); adapter->memory = NULL; } if (adapter->flash != NULL) { bus_release_resource(dev, SYS_RES_MEMORY, - EM_FLASH, adapter->flash); + rman_get_rid(adapter->flash), adapter->flash); adapter->flash = NULL; } - if (adapter->ioport != NULL) + + if (adapter->ioport != NULL) { bus_release_resource(dev, SYS_RES_IOPORT, - adapter->io_rid, adapter->ioport); + rman_get_rid(adapter->ioport), adapter->ioport); + adapter->ioport = NULL; + } } -/* Setup MSI or MSI/X */ +/* Set up MSI or MSI-X */ static int em_setup_msix(if_ctx_t ctx) { @@ -2851,7 +2854,9 @@ em_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int ntxqs txr->tx_paddr = paddrs[i*ntxqs]; } - device_printf(iflib_get_dev(ctx), "allocated for %d tx_queues\n", adapter->tx_num_queues); + if (bootverbose) + device_printf(iflib_get_dev(ctx), + "allocated for %d tx_queues\n", adapter->tx_num_queues); return (0); fail: em_if_queues_free(ctx); @@ -2889,8 +2894,10 @@ em_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int nrxqs rxr->rx_base = (union e1000_rx_desc_extended *)vaddrs[i*nrxqs]; rxr->rx_paddr = paddrs[i*nrxqs]; } - - device_printf(iflib_get_dev(ctx), "allocated for %d rx_queues\n", adapter->rx_num_queues); + + if (bootverbose) + device_printf(iflib_get_dev(ctx), + "allocated for %d rx_queues\n", adapter->rx_num_queues); return (0); fail: @@ -3133,7 +3140,7 @@ em_initialize_receive_unit(if_ctx_t ctx) rfctl = E1000_READ_REG(hw, E1000_RFCTL); rfctl |= E1000_RFCTL_EXTEN; /* - * When using MSIX interrupts we need to throttle + * When using MSI-X interrupts we need to throttle * using the EITR register (82574 only) */ if (hw->mac.type == e1000_82574) { @@ -4007,7 +4014,7 @@ em_add_hw_stats(struct adapter *adapter) "Driver dropped packets"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq", CTLFLAG_RD, &adapter->link_irq, - "Link MSIX IRQ Handled"); + "Link MSI-X IRQ Handled"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_fail", CTLFLAG_RD, &adapter->mbuf_defrag_failed, "Defragmenting mbuf chain failed"); @@ -4524,7 +4531,7 @@ em_print_debug_info(struct adapter *adapter) /* * 82574 only: - * Write a new value to the EEPROM increasing the number of MSIX + * Write a new value to the EEPROM increasing the number of MSI-X * vectors from 3 to 5, for proper multiqueue support. */ static void @@ -4539,7 +4546,7 @@ em_enable_vectors_82574(if_ctx_t ctx) printf("Current cap: %#06x\n", edata); if (((edata & EM_NVM_MSIX_N_MASK) >> EM_NVM_MSIX_N_SHIFT) != 4) { device_printf(dev, "Writing to eeprom: increasing " - "reported MSIX vectors from 3 to 5...\n"); + "reported MSI-X vectors from 3 to 5...\n"); edata &= ~(EM_NVM_MSIX_N_MASK); edata |= 4 << EM_NVM_MSIX_N_SHIFT; e1000_write_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata); diff --git a/sys/dev/e1000/if_em.h b/sys/dev/e1000/if_em.h index be172a78cbbe..f12fda8db759 100644 --- a/sys/dev/e1000/if_em.h +++ b/sys/dev/e1000/if_em.h @@ -352,8 +352,8 @@ /* * 82574 has a nonstandard address for EIAC - * and since its only used in MSIX, and in - * the em driver only 82574 uses MSIX we can + * and since its only used in MSI-X, and in + * the em driver only 82574 uses MSI-X we can * solve it just using this define. */ #define EM_EIAC 0x000DC @@ -468,7 +468,6 @@ struct adapter { struct resource *memory; struct resource *flash; struct resource *ioport; - int io_rid; struct resource *res; void *tag; diff --git a/sys/dev/ixgbe/if_ix.c b/sys/dev/ixgbe/if_ix.c index b73e0449673a..ed93bf33661a 100644 --- a/sys/dev/ixgbe/if_ix.c +++ b/sys/dev/ixgbe/if_ix.c @@ -2021,7 +2021,7 @@ ixgbe_if_msix_intr_assign(if_ctx_t ctx, int msix) cpu_id = rss_getcpu(i % rss_getnumbuckets()); } else { /* - * Bind the msix vector, and thus the + * Bind the MSI-X vector, and thus the * rings to the corresponding cpu. * * This just happens to match the default RSS @@ -3814,7 +3814,7 @@ ixgbe_free_pci_resources(if_ctx_t ctx) struct ix_rx_queue *que = adapter->rx_queues; device_t dev = iflib_get_dev(ctx); - /* Release all msix queue resources */ + /* Release all MSI-X queue resources */ if (adapter->intr_type == IFLIB_INTR_MSIX) iflib_irq_free(ctx, &adapter->irq); @@ -3824,13 +3824,9 @@ ixgbe_free_pci_resources(if_ctx_t ctx) } } - /* - * Free link/admin interrupt - */ if (adapter->pci_mem != NULL) bus_release_resource(dev, SYS_RES_MEMORY, - PCIR_BAR(0), adapter->pci_mem); - + rman_get_rid(adapter->pci_mem), adapter->pci_mem); } /* ixgbe_free_pci_resources */ /************************************************************************ diff --git a/sys/dev/ixgbe/if_ixv.c b/sys/dev/ixgbe/if_ixv.c index cd0fb5939e45..2409a4e33992 100644 --- a/sys/dev/ixgbe/if_ixv.c +++ b/sys/dev/ixgbe/if_ixv.c @@ -1132,7 +1132,7 @@ ixv_free_pci_resources(if_ctx_t ctx) struct ix_rx_queue *que = adapter->rx_queues; device_t dev = iflib_get_dev(ctx); - /* Release all msix queue resources */ + /* Release all MSI-X queue resources */ if (adapter->intr_type == IFLIB_INTR_MSIX) iflib_irq_free(ctx, &adapter->irq); @@ -1142,10 +1142,9 @@ ixv_free_pci_resources(if_ctx_t ctx) } } - /* Clean the Legacy or Link interrupt last */ if (adapter->pci_mem != NULL) bus_release_resource(dev, SYS_RES_MEMORY, - PCIR_BAR(0), adapter->pci_mem); + rman_get_rid(adapter->pci_mem), adapter->pci_mem); } /* ixv_free_pci_resources */ /************************************************************************ diff --git a/sys/dev/ixl/if_iavf.c b/sys/dev/ixl/if_iavf.c index 7272ab1ef31d..aa84e5776813 100644 --- a/sys/dev/ixl/if_iavf.c +++ b/sys/dev/ixl/if_iavf.c @@ -358,7 +358,7 @@ iavf_if_attach_pre(if_ctx_t ctx) goto err_early; } - iavf_dbg_init(sc, "Allocated PCI resources and MSIX vectors\n"); + iavf_dbg_init(sc, "Allocated PCI resources and MSI-X vectors\n"); /* * XXX: This is called by init_shared_code in the PF driver, @@ -407,7 +407,8 @@ iavf_if_attach_pre(if_ctx_t ctx) goto err_aq; } - device_printf(dev, "VSIs %d, QPs %d, MSIX %d, RSS sizes: key %d lut %d\n", + device_printf(dev, + "VSIs %d, QPs %d, MSI-X %d, RSS sizes: key %d lut %d\n", sc->vf_res->num_vsis, sc->vf_res->num_queue_pairs, sc->vf_res->max_vectors, @@ -1485,7 +1486,7 @@ iavf_free_pci_resources(struct iavf_sc *sc) struct ixl_rx_queue *rx_que = vsi->rx_queues; device_t dev = sc->dev; - /* We may get here before stations are setup */ + /* We may get here before stations are set up */ if (rx_que == NULL) goto early; @@ -1498,7 +1499,7 @@ iavf_free_pci_resources(struct iavf_sc *sc) early: if (sc->pci_mem != NULL) bus_release_resource(dev, SYS_RES_MEMORY, - PCIR_BAR(0), sc->pci_mem); + rman_get_rid(sc->pci_mem), sc->pci_mem); } diff --git a/sys/dev/ixl/if_ixl.c b/sys/dev/ixl/if_ixl.c index a03a14435482..fc9ad8e11d7c 100644 --- a/sys/dev/ixl/if_ixl.c +++ b/sys/dev/ixl/if_ixl.c @@ -299,7 +299,7 @@ int ixl_limit_iwarp_msix = IXL_IW_MAX_MSIX; #endif TUNABLE_INT("hw.ixl.limit_iwarp_msix", &ixl_limit_iwarp_msix); SYSCTL_INT(_hw_ixl, OID_AUTO, limit_iwarp_msix, CTLFLAG_RDTUN, - &ixl_limit_iwarp_msix, 0, "Limit MSIX vectors assigned to iWARP"); + &ixl_limit_iwarp_msix, 0, "Limit MSI-X vectors assigned to iWARP"); #endif extern struct if_txrx ixl_txrx_hwb; @@ -684,14 +684,14 @@ ixl_if_attach_post(if_ctx_t ctx) error = ixl_iw_pf_attach(pf); if (error) { device_printf(dev, - "interfacing to iwarp driver failed: %d\n", + "interfacing to iWARP driver failed: %d\n", error); goto err; } else device_printf(dev, "iWARP ready\n"); } else - device_printf(dev, - "iwarp disabled on this device (no msix vectors)\n"); + device_printf(dev, "iWARP disabled on this device " + "(no MSI-X vectors)\n"); } else { pf->iw_enabled = false; device_printf(dev, "The device is not iWARP enabled\n"); @@ -857,7 +857,7 @@ ixl_if_init(if_ctx_t ctx) /* Set up RSS */ ixl_config_rss(pf); - /* Set up MSI/X routing and the ITR settings */ + /* Set up MSI-X routing and the ITR settings */ if (vsi->shared->isc_intr == IFLIB_INTR_MSIX) { ixl_configure_queue_intr_msix(pf); ixl_configure_itr(pf); diff --git a/sys/dev/ixl/ixl_iw.c b/sys/dev/ixl/ixl_iw.c index f597252e6def..6557e9dca4b3 100644 --- a/sys/dev/ixl/ixl_iw.c +++ b/sys/dev/ixl/ixl_iw.c @@ -333,7 +333,7 @@ ixl_iw_pf_msix_init(void *pf_handle, if ((msix_info->aeq_vector < IXL_IW_VEC_BASE(pf)) || (msix_info->aeq_vector >= IXL_IW_VEC_LIMIT(pf))) { - printf("%s: invalid MSIX vector (%i) for AEQ\n", + printf("%s: invalid MSI-X vector (%i) for AEQ\n", __func__, msix_info->aeq_vector); return (EINVAL); } diff --git a/sys/dev/ixl/ixl_pf_main.c b/sys/dev/ixl/ixl_pf_main.c index 6d393813e831..3a4a0dc31aff 100644 --- a/sys/dev/ixl/ixl_pf_main.c +++ b/sys/dev/ixl/ixl_pf_main.c @@ -278,7 +278,8 @@ ixl_get_hw_capabilities(struct ixl_pf *pf) } /* Print a subset of the capability information. */ - device_printf(dev, "PF-ID[%d]: VFs %d, MSIX %d, VF MSIX %d, QPs %d, %s\n", + device_printf(dev, + "PF-ID[%d]: VFs %d, MSI-X %d, VF MSI-X %d, QPs %d, %s\n", hw->pf_id, hw->func_caps.num_vfs, hw->func_caps.num_msix_vectors, hw->func_caps.num_msix_vectors_vf, hw->func_caps.num_tx_qp, (hw->func_caps.mdio_port_mode == 2) ? "I2C" : @@ -505,7 +506,7 @@ ixl_intr(void *arg) /********************************************************************* * - * MSIX VSI Interrupt Service routine + * MSI-X VSI Interrupt Service routine * **********************************************************************/ int @@ -524,7 +525,7 @@ ixl_msix_que(void *arg) /********************************************************************* * - * MSIX Admin Queue Interrupt Service routine + * MSI-X Admin Queue Interrupt Service routine * **********************************************************************/ int @@ -791,7 +792,7 @@ ixl_configure_intr0_msix(struct ixl_pf *pf) /* * 0x7FF is the end of the queue list. * This means we won't use MSI-X vector 0 for a queue interrupt - * in MSIX mode. + * in MSI-X mode. */ wr32(hw, I40E_PFINT_LNKLST0, 0x7FF); /* Value is in 2 usec units, so 0x3E is 62*2 = 124 usecs. */ @@ -909,12 +910,12 @@ ixl_free_pci_resources(struct ixl_pf *pf) device_t dev = iflib_get_dev(vsi->ctx); struct ixl_rx_queue *rx_que = vsi->rx_queues; - /* We may get here before stations are setup */ + /* We may get here before stations are set up */ if (rx_que == NULL) goto early; /* - ** Release all msix VSI resources: + ** Release all MSI-X VSI resources: */ iflib_irq_free(vsi->ctx, &vsi->irq); @@ -923,7 +924,7 @@ ixl_free_pci_resources(struct ixl_pf *pf) early: if (pf->pci_mem != NULL) bus_release_resource(dev, SYS_RES_MEMORY, - PCIR_BAR(0), pf->pci_mem); + rman_get_rid(pf->pci_mem), pf->pci_mem); } void diff --git a/sys/dev/vmware/vmxnet3/if_vmx.c b/sys/dev/vmware/vmxnet3/if_vmx.c index 38b61d3a7e8a..0ab8b040bc7d 100644 --- a/sys/dev/vmware/vmxnet3/if_vmx.c +++ b/sys/dev/vmware/vmxnet3/if_vmx.c @@ -398,7 +398,7 @@ vmxnet3_attach_pre(if_ctx_t ctx) /* * Configure the softc context to attempt to configure the interrupt * mode now indicated by intr_config. iflib will follow the usual - * fallback path MSIX -> MSI -> LEGACY, starting at the configured + * fallback path MSI-X -> MSI -> LEGACY, starting at the configured * starting mode. */ switch (intr_config & 0x03) { @@ -620,19 +620,18 @@ static void vmxnet3_free_resources(struct vmxnet3_softc *sc) { device_t dev; - int rid; dev = sc->vmx_dev; if (sc->vmx_res0 != NULL) { - rid = PCIR_BAR(0); - bus_release_resource(dev, SYS_RES_MEMORY, rid, sc->vmx_res0); + bus_release_resource(dev, SYS_RES_MEMORY, + rman_get_rid(sc->vmx_res0), sc->vmx_res0); sc->vmx_res0 = NULL; } if (sc->vmx_res1 != NULL) { - rid = PCIR_BAR(1); - bus_release_resource(dev, SYS_RES_MEMORY, rid, sc->vmx_res1); + bus_release_resource(dev, SYS_RES_MEMORY, + rman_get_rid(sc->vmx_res1), sc->vmx_res1); sc->vmx_res1 = NULL; } } @@ -1074,7 +1073,7 @@ vmxnet3_init_shared_data(struct vmxnet3_softc *sc) ds->automask = sc->vmx_intr_mask_mode == VMXNET3_IMM_AUTO; /* * Total number of interrupt indexes we are using in the shared - * config data, even though we don't actually allocate MSIX + * config data, even though we don't actually allocate MSI-X * resources for the tx queues. Some versions of the device will * fail to initialize successfully if interrupt indexes are used in * the shared config that exceed the number of interrupts configured diff --git a/sys/net/iflib.c b/sys/net/iflib.c index f02b94de0ece..0a566753e9d6 100644 --- a/sys/net/iflib.c +++ b/sys/net/iflib.c @@ -4409,8 +4409,8 @@ iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ct main_rxq = (sctx->isc_flags & IFLIB_HAS_RXCQ) ? 1 : 0; /* XXX change for per-queue sizes */ - device_printf(dev, "using %d tx descriptors and %d rx descriptors\n", - scctx->isc_ntxd[main_txq], scctx->isc_nrxd[main_rxq]); + device_printf(dev, "Using %d tx descriptors and %d rx descriptors\n", + scctx->isc_ntxd[main_txq], scctx->isc_nrxd[main_rxq]); for (i = 0; i < sctx->isc_nrxqs; i++) { if (!powerof2(scctx->isc_nrxd[i])) { /* round down instead? */ @@ -4472,9 +4472,8 @@ iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ct MPASS(CPU_COUNT(&ctx->ifc_cpus) > 0); /* - ** Now setup MSI or MSI/X, should - ** return us the number of supported - ** vectors. (Will be 1 for MSI) + ** Now set up MSI or MSI-X, should return us the number of supported + ** vectors (will be 1 for a legacy interrupt and MSI). */ if (sctx->isc_flags & IFLIB_SKIP_MSIX) { msix = scctx->isc_vectors; @@ -4671,8 +4670,8 @@ iflib_pseudo_register(device_t dev, if_shared_ctx_t sctx, if_ctx_t *ctxp, main_rxq = (sctx->isc_flags & IFLIB_HAS_RXCQ) ? 1 : 0; /* XXX change for per-queue sizes */ - device_printf(dev, "using %d tx descriptors and %d rx descriptors\n", - scctx->isc_ntxd[main_txq], scctx->isc_nrxd[main_rxq]); + device_printf(dev, "Using %d tx descriptors and %d rx descriptors\n", + scctx->isc_ntxd[main_txq], scctx->isc_nrxd[main_rxq]); for (i = 0; i < sctx->isc_nrxqs; i++) { if (!powerof2(scctx->isc_nrxd[i])) { /* round down instead? */ @@ -4928,15 +4927,15 @@ static void iflib_free_intr_mem(if_ctx_t ctx) { - if (ctx->ifc_softc_ctx.isc_intr != IFLIB_INTR_LEGACY) { - pci_release_msi(ctx->ifc_dev); - } if (ctx->ifc_softc_ctx.isc_intr != IFLIB_INTR_MSIX) { iflib_irq_free(ctx, &ctx->ifc_legacy_irq); } + if (ctx->ifc_softc_ctx.isc_intr != IFLIB_INTR_LEGACY) { + pci_release_msi(ctx->ifc_dev); + } if (ctx->ifc_msix_mem != NULL) { bus_release_resource(ctx->ifc_dev, SYS_RES_MEMORY, - ctx->ifc_softc_ctx.isc_msix_bar, ctx->ifc_msix_mem); + rman_get_rid(ctx->ifc_msix_mem), ctx->ifc_msix_mem); ctx->ifc_msix_mem = NULL; } } @@ -5763,11 +5762,13 @@ iflib_softirq_alloc_generic(if_ctx_t ctx, if_irq_t irq, iflib_intr_type_t type, void iflib_irq_free(if_ctx_t ctx, if_irq_t irq) { + if (irq->ii_tag) bus_teardown_intr(ctx->ifc_dev, irq->ii_res, irq->ii_tag); if (irq->ii_res) - bus_release_resource(ctx->ifc_dev, SYS_RES_IRQ, irq->ii_rid, irq->ii_res); + bus_release_resource(ctx->ifc_dev, SYS_RES_IRQ, + rman_get_rid(irq->ii_res), irq->ii_res); } static int @@ -5961,7 +5962,9 @@ iflib_msix_init(if_ctx_t ctx) iflib_num_tx_queues = ctx->ifc_sysctl_ntxqs; iflib_num_rx_queues = ctx->ifc_sysctl_nrxqs; - device_printf(dev, "msix_init qsets capped at %d\n", imax(scctx->isc_ntxqsets, scctx->isc_nrxqsets)); + if (bootverbose) + device_printf(dev, "msix_init qsets capped at %d\n", + imax(scctx->isc_ntxqsets, scctx->isc_nrxqsets)); bar = ctx->ifc_softc_ctx.isc_msix_bar; admincnt = sctx->isc_admin_intrcnt; @@ -5969,30 +5972,27 @@ iflib_msix_init(if_ctx_t ctx) if (scctx->isc_disable_msix) goto msi; + /* First try MSI-X */ + if ((msgs = pci_msix_count(dev)) == 0) { + if (bootverbose) + device_printf(dev, "MSI-X not supported or disabled\n"); + goto msi; + } /* * bar == -1 => "trust me I know what I'm doing" * Some drivers are for hardware that is so shoddily * documented that no one knows which bars are which * so the developer has to map all bars. This hack - * allows shoddy garbage to use msix in this framework. + * allows shoddy garbage to use MSI-X in this framework. */ if (bar != -1) { ctx->ifc_msix_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &bar, RF_ACTIVE); if (ctx->ifc_msix_mem == NULL) { - /* May not be enabled */ - device_printf(dev, "Unable to map MSIX table \n"); + device_printf(dev, "Unable to map MSI-X table\n"); goto msi; } } - /* First try MSI/X */ - if ((msgs = pci_msix_count(dev)) == 0) { /* system has msix disabled */ - device_printf(dev, "System has MSIX disabled \n"); - bus_release_resource(dev, SYS_RES_MEMORY, - bar, ctx->ifc_msix_mem); - ctx->ifc_msix_mem = NULL; - goto msi; - } #if IFLIB_DEBUG /* use only 1 qset in debug mode */ queuemsgs = min(msgs - admincnt, 1); @@ -6005,8 +6005,10 @@ iflib_msix_init(if_ctx_t ctx) queues = queuemsgs; #endif queues = imin(CPU_COUNT(&ctx->ifc_cpus), queues); - device_printf(dev, "pxm cpus: %d queue msgs: %d admincnt: %d\n", - CPU_COUNT(&ctx->ifc_cpus), queuemsgs, admincnt); + if (bootverbose) + device_printf(dev, + "intr CPUs: %d queue msgs: %d admincnt: %d\n", + CPU_COUNT(&ctx->ifc_cpus), queuemsgs, admincnt); #ifdef RSS /* If we're doing RSS, clamp at the number of RSS buckets */ if (queues > rss_getnumbuckets()) @@ -6042,11 +6044,13 @@ iflib_msix_init(if_ctx_t ctx) rx_queues = min(rx_queues, tx_queues); } - device_printf(dev, "using %d rx queues %d tx queues \n", rx_queues, tx_queues); + device_printf(dev, "Using %d rx queues %d tx queues\n", + rx_queues, tx_queues); vectors = rx_queues + admincnt; if ((err = pci_alloc_msix(dev, &vectors)) == 0) { - device_printf(dev, "Using MSIX interrupts with %d vectors\n", vectors); + device_printf(dev, "Using MSI-X interrupts with %d vectors\n", + vectors); scctx->isc_vectors = vectors; scctx->isc_nrxqsets = rx_queues; scctx->isc_ntxqsets = tx_queues; @@ -6055,7 +6059,8 @@ iflib_msix_init(if_ctx_t ctx) return (vectors); } else { device_printf(dev, - "failed to allocate %d msix vectors, err: %d - using MSI\n", vectors, err); + "failed to allocate %d MSI-X vectors, err: %d - using MSI\n", + vectors, err); bus_release_resource(dev, SYS_RES_MEMORY, bar, ctx->ifc_msix_mem); ctx->ifc_msix_mem = NULL; @@ -6191,7 +6196,7 @@ iflib_add_device_sysctl_pre(if_ctx_t ctx) "permit #txq != #rxq"); SYSCTL_ADD_INT(ctx_list, oid_list, OID_AUTO, "disable_msix", CTLFLAG_RWTUN, &ctx->ifc_softc_ctx.isc_disable_msix, 0, - "disable MSIX (default 0)"); + "disable MSI-X (default 0)"); SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "rx_budget", CTLFLAG_RWTUN, &ctx->ifc_sysctl_rx_budget, 0, "set the rx budget"); From 19c4ec08ad6646bc0fa1d662807056b9e55ef6f4 Mon Sep 17 00:00:00 2001 From: Vincenzo Maffione Date: Wed, 30 Jan 2019 15:51:55 +0000 Subject: [PATCH 07/90] netmap: fix lock order reversal related to kqueue usage When using poll(), select() or kevent() on netmap file descriptors, netmap executes the equivalent of NIOCTXSYNC and NIOCRXSYNC commands, before collecting the events that are ready. In other words, the poll/kevent callback has side effects. This is done to avoid the overhead of two system call per iteration (e.g., poll() + ioctl(NIOC*XSYNC)). When the kqueue subsystem invokes the kqueue(9) f_event callback (netmap_knrw), it holds the lock of the struct knlist object associated to the netmap port (the lock is provided at initialization, by calling knlist_init_mtx). However, netmap_knrw() may need to wake up another netmap port (or even the same one), which means that it may need to call knote(). Since knote() needs the lock of the struct knlist object associated to the to-be-wake-up netmap port, it is possible to have a lock order reversal problem (AB/BA deadlock). This change prevents the deadlock by executing the knote() call in a per-selinfo taskqueue, where it is possible to hold a mutex. Reviewed by: aleksandr.fedorov_itglobal.com MFC after: 2 weeks Differential Revision: https://reviews.freebsd.org/D18956 --- sys/dev/netmap/netmap.c | 19 ++++++--- sys/dev/netmap/netmap_freebsd.c | 68 ++++++++++++++++++++++++--------- sys/dev/netmap/netmap_kern.h | 5 ++- 3 files changed, 69 insertions(+), 23 deletions(-) diff --git a/sys/dev/netmap/netmap.c b/sys/dev/netmap/netmap.c index 8b508737e328..3f1f130b25fa 100644 --- a/sys/dev/netmap/netmap.c +++ b/sys/dev/netmap/netmap.c @@ -830,6 +830,7 @@ netmap_krings_create(struct netmap_adapter *na, u_int tailroom) struct netmap_kring *kring; u_int n[NR_TXRX]; enum txrx t; + int err = 0; if (na->tx_rings != NULL) { if (netmap_debug & NM_DEBUG_ON) @@ -869,7 +870,6 @@ netmap_krings_create(struct netmap_adapter *na, u_int tailroom) for (i = 0; i < n[t]; i++) { kring = NMR(na, t)[i]; bzero(kring, sizeof(*kring)); - kring->na = na; kring->notify_na = na; kring->ring_id = i; kring->tx = t; @@ -895,13 +895,21 @@ netmap_krings_create(struct netmap_adapter *na, u_int tailroom) nm_txrx2str(t), i); ND("ktx %s h %d c %d t %d", kring->name, kring->rhead, kring->rcur, kring->rtail); + err = nm_os_selinfo_init(&kring->si, kring->name); + if (err) { + netmap_krings_delete(na); + return err; + } mtx_init(&kring->q_lock, (t == NR_TX ? "nm_txq_lock" : "nm_rxq_lock"), NULL, MTX_DEF); - nm_os_selinfo_init(&kring->si); + kring->na = na; /* setting this field marks the mutex as initialized */ + } + err = nm_os_selinfo_init(&na->si[t], na->name); + if (err) { + netmap_krings_delete(na); + return err; } - nm_os_selinfo_init(&na->si[t]); } - return 0; } @@ -925,7 +933,8 @@ netmap_krings_delete(struct netmap_adapter *na) /* we rely on the krings layout described above */ for ( ; kring != na->tailroom; kring++) { - mtx_destroy(&(*kring)->q_lock); + if ((*kring)->na != NULL) + mtx_destroy(&(*kring)->q_lock); nm_os_selinfo_uninit(&(*kring)->si); } nm_os_free(na->tx_rings); diff --git a/sys/dev/netmap/netmap_freebsd.c b/sys/dev/netmap/netmap_freebsd.c index 94bde267a279..f94083f7d044 100644 --- a/sys/dev/netmap/netmap_freebsd.c +++ b/sys/dev/netmap/netmap_freebsd.c @@ -58,6 +58,7 @@ #include /* RFNOWAIT */ #include /* sched_bind() */ #include /* mp_maxid */ +#include /* taskqueue_enqueue(), taskqueue_create(), ... */ #include #include #include /* IFT_ETHER */ @@ -75,16 +76,48 @@ /* ======================== FREEBSD-SPECIFIC ROUTINES ================== */ -void nm_os_selinfo_init(NM_SELINFO_T *si) { - struct mtx *m = &si->m; - mtx_init(m, "nm_kn_lock", NULL, MTX_DEF); - knlist_init_mtx(&si->si.si_note, m); +static void +nm_kqueue_notify(void *opaque, int pending) +{ + struct nm_selinfo *si = opaque; + + /* We use a non-zero hint to distinguish this notification call + * from the call done in kqueue_scan(), which uses hint=0. + */ + KNOTE_UNLOCKED(&si->si.si_note, /*hint=*/0x100); +} + +int nm_os_selinfo_init(NM_SELINFO_T *si, const char *name) { + int err; + + TASK_INIT(&si->ntfytask, 0, nm_kqueue_notify, si); + si->ntfytq = taskqueue_create(name, M_NOWAIT, + taskqueue_thread_enqueue, &si->ntfytq); + if (si->ntfytq == NULL) + return -ENOMEM; + err = taskqueue_start_threads(&si->ntfytq, 1, PI_NET, "tq %s", name); + if (err) { + taskqueue_free(si->ntfytq); + si->ntfytq = NULL; + return err; + } + + snprintf(si->mtxname, sizeof(si->mtxname), "nmkl%s", name); + mtx_init(&si->m, si->mtxname, NULL, MTX_DEF); + knlist_init_mtx(&si->si.si_note, &si->m); + + return (0); } void nm_os_selinfo_uninit(NM_SELINFO_T *si) { - /* XXX kqueue(9) needed; these will mirror knlist_init. */ + if (si->ntfytq == NULL) { + return; /* si was not initialized */ + } + taskqueue_drain(si->ntfytq, &si->ntfytask); + taskqueue_free(si->ntfytq); + si->ntfytq = NULL; knlist_delete(&si->si.si_note, curthread, /*islocked=*/0); knlist_destroy(&si->si.si_note); /* now we don't need the mutex anymore */ @@ -1292,13 +1325,18 @@ nm_os_kctx_destroy(struct nm_kctx *nmk) /* * In addition to calling selwakeuppri(), nm_os_selwakeup() also - * needs to call KNOTE to wake up kqueue listeners. - * We use a non-zero 'hint' argument to inform the netmap_knrw() - * function that it is being called from 'nm_os_selwakeup'; this - * is necessary because when netmap_knrw() is called by the kevent - * subsystem (i.e. kevent_scan()) we also need to call netmap_poll(). - * The knote uses a private mutex associated to the 'si' (see struct - * selinfo, struct nm_selinfo, and nm_os_selinfo_init). + * needs to call knote() to wake up kqueue listeners. + * This operation is deferred to a taskqueue in order to avoid possible + * lock order reversals; these may happen because knote() grabs a + * private lock associated to the 'si' (see struct selinfo, + * struct nm_selinfo, and nm_os_selinfo_init), and nm_os_selwakeup() + * can be called while holding the lock associated to a different + * 'si'. + * When calling knote() we use a non-zero 'hint' argument to inform + * the netmap_knrw() function that it is being called from + * 'nm_os_selwakeup'; this is necessary because when netmap_knrw() is + * called by the kevent subsystem (i.e. kevent_scan()) we also need to + * call netmap_poll(). * * The netmap_kqfilter() function registers one or another f_event * depending on read or write mode. A pointer to the struct @@ -1315,11 +1353,7 @@ nm_os_selwakeup(struct nm_selinfo *si) if (netmap_verbose) nm_prinf("on knote %p", &si->si.si_note); selwakeuppri(&si->si, PI_NET); - /* We use a non-zero hint to distinguish this notification call - * from the call done in kqueue_scan(), which uses hint=0. - */ - KNOTE(&si->si.si_note, /*hint=*/0x100, - mtx_owned(&si->m) ? KNF_LISTLOCKED : 0); + taskqueue_enqueue(si->ntfytq, &si->ntfytask); } void diff --git a/sys/dev/netmap/netmap_kern.h b/sys/dev/netmap/netmap_kern.h index e9b83a23532b..4578269e43a2 100644 --- a/sys/dev/netmap/netmap_kern.h +++ b/sys/dev/netmap/netmap_kern.h @@ -133,7 +133,10 @@ struct netmap_adapter *netmap_getna(if_t ifp); struct nm_selinfo { struct selinfo si; + struct taskqueue *ntfytq; + struct task ntfytask; struct mtx m; + char mtxname[32]; }; @@ -295,7 +298,7 @@ struct netmap_priv_d; struct nm_bdg_args; /* os-specific NM_SELINFO_T initialzation/destruction functions */ -void nm_os_selinfo_init(NM_SELINFO_T *); +int nm_os_selinfo_init(NM_SELINFO_T *, const char *name); void nm_os_selinfo_uninit(NM_SELINFO_T *); const char *nm_dump_buf(char *p, int len, int lim, char *dst); From d49ca25de64aa563562cc97b750452a8f031c726 Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Wed, 30 Jan 2019 16:28:27 +0000 Subject: [PATCH 08/90] Rename rtld-elf/malloc.c to rtld-elf/rtld_malloc.c. Then malloc.c file name is too generic to use it for libthr.a. Sponsored by: The FreeBSD Foundation MFC after: 13 days --- lib/libthr/Makefile | 4 ++-- libexec/rtld-elf/Makefile | 14 +++++++++++--- libexec/rtld-elf/{malloc.c => rtld_malloc.c} | 0 3 files changed, 13 insertions(+), 5 deletions(-) rename libexec/rtld-elf/{malloc.c => rtld_malloc.c} (100%) diff --git a/lib/libthr/Makefile b/lib/libthr/Makefile index 56905d36e8c5..4a11cdb6c969 100644 --- a/lib/libthr/Makefile +++ b/lib/libthr/Makefile @@ -27,7 +27,7 @@ CFLAGS+=-I${SRCTOP}/lib/libthread_db CFLAGS+=-Winline CFLAGS.thr_stack.c+= -Wno-cast-align -CFLAGS.malloc.c+= -Wno-cast-align +CFLAGS.rtld_malloc.c+= -Wno-cast-align .include .if !(${COMPILER_TYPE} == "gcc" && ${COMPILER_VERSION} < 40300) CFLAGS.thr_symbols.c+= -Wno-missing-variable-declarations @@ -58,7 +58,7 @@ PRECIOUSLIB= .endif .include "${.CURDIR}/sys/Makefile.inc" .include "${.CURDIR}/thread/Makefile.inc" -SRCS+= malloc.c +SRCS+= rtld_malloc.c .if ${MK_INSTALLLIB} != "no" SYMLINKS+=lib${LIB}.a ${LIBDIR}/libpthread.a diff --git a/libexec/rtld-elf/Makefile b/libexec/rtld-elf/Makefile index b0f02a7205e8..71b75b9273b3 100644 --- a/libexec/rtld-elf/Makefile +++ b/libexec/rtld-elf/Makefile @@ -14,9 +14,17 @@ PROG?= ld-elf.so.1 .if (${PROG:M*ld-elf32*} != "") TAGS+= lib32 .endif -SRCS= rtld_start.S \ - reloc.c rtld.c rtld_lock.c rtld_printf.c map_object.c \ - malloc.c xmalloc.c debug.c libmap.c +SRCS= \ + rtld_start.S \ + reloc.c \ + rtld.c \ + rtld_lock.c \ + rtld_malloc.c \ + rtld_printf.c \ + map_object.c \ + xmalloc.c \ + debug.c \ + libmap.c MAN= rtld.1 CSTD?= gnu99 CFLAGS+= -Wall -DFREEBSD_ELF -DIN_RTLD -ffreestanding diff --git a/libexec/rtld-elf/malloc.c b/libexec/rtld-elf/rtld_malloc.c similarity index 100% rename from libexec/rtld-elf/malloc.c rename to libexec/rtld-elf/rtld_malloc.c From e259e5f4c022b117474b4d29203f66cf08aa8a1b Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Wed, 30 Jan 2019 16:29:15 +0000 Subject: [PATCH 09/90] Remove duplicate declarations. Submitted by: bde MFC after: 2 months --- sys/i386/i386/minidump_machdep_base.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/sys/i386/i386/minidump_machdep_base.c b/sys/i386/i386/minidump_machdep_base.c index c2a3852edc1e..17b197f26d2b 100644 --- a/sys/i386/i386/minidump_machdep_base.c +++ b/sys/i386/i386/minidump_machdep_base.c @@ -52,9 +52,6 @@ CTASSERT(sizeof(struct kerneldumpheader) == 512); #define MD_ALIGN(x) (((off_t)(x) + PAGE_MASK) & ~PAGE_MASK) #define DEV_ALIGN(x) roundup2((off_t)(x), DEV_BSIZE) -extern uint32_t *vm_page_dump; -extern int vm_page_dump_size; - static struct kerneldumpheader kdh; /* Handle chunked writes. */ @@ -64,7 +61,6 @@ static uint64_t counter, progress; CTASSERT(sizeof(*vm_page_dump) == 4); - static int is_dumpable(vm_paddr_t pa) { From a5fde7ef528a1762ecab65d69947c795315498f6 Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Wed, 30 Jan 2019 16:50:53 +0000 Subject: [PATCH 10/90] Relax BIO_FLUSH ordering in da(4), respecting BIO_ORDERED. r212160 tightened this from always using MSG_SIMPLE_Q_TAG to always MSG_ORDERED_Q_TAG. Since it also marked all BIO_FLUSH requests with BIO_ORDERED, this commit changes nothing immediately, but it returns BIO_FLUSH callers ability to actually specify ordering they really need, alike to other request types. MFC after: 2 weeks Sponsored by: iXsystems, Inc. --- sys/cam/scsi/scsi_da.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/cam/scsi/scsi_da.c b/sys/cam/scsi/scsi_da.c index a3160d613979..92e4ed2560f8 100644 --- a/sys/cam/scsi/scsi_da.c +++ b/sys/cam/scsi/scsi_da.c @@ -3321,7 +3321,7 @@ dastart(struct cam_periph *periph, union ccb *start_ccb) scsi_synchronize_cache(&start_ccb->csio, /*retries*/1, /*cbfcnp*/dadone, - MSG_ORDERED_Q_TAG, + /*tag_action*/tag_code, /*begin_lba*/0, /*lb_count*/0, SSD_FULL_SIZE, From 475a76e3ce9aea7fe1838b7b3ce151e3b8ac2559 Mon Sep 17 00:00:00 2001 From: Ravi Pokala Date: Wed, 30 Jan 2019 16:55:00 +0000 Subject: [PATCH 11/90] Remove unecessary "All rights reserved" from files under my or Panasas's copyright. When all member nations of the Buenos Aires Convention adopted the Berne Convention, the phrase "All rights reserved" became unnecessary to assert copyright. Remove it from files under my or Panasas's copyright. The files related to jedec_dimm(4) also bear avg@'s copyright; he has approved this change. Approved by: avg Sponsored by: Panasas --- share/man/man4/imcsmb.4 | 1 - share/man/man4/jedec_dimm.4 | 1 - sys/dev/imcsmb/imcsmb.c | 1 - sys/dev/imcsmb/imcsmb_pci.c | 1 - sys/dev/imcsmb/imcsmb_reg.h | 1 - sys/dev/imcsmb/imcsmb_var.h | 1 - sys/dev/jedec_dimm/jedec_dimm.c | 1 - sys/dev/jedec_dimm/jedec_dimm.h | 1 - 8 files changed, 8 deletions(-) diff --git a/share/man/man4/imcsmb.4 b/share/man/man4/imcsmb.4 index 221f9362ed52..b1acb8bca00d 100644 --- a/share/man/man4/imcsmb.4 +++ b/share/man/man4/imcsmb.4 @@ -2,7 +2,6 @@ .\" SPDX-License-Identifier: BSD-2-Clause-FreeBSD .\" .\" Copyright (c) 2018 Panasas -.\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions diff --git a/share/man/man4/jedec_dimm.4 b/share/man/man4/jedec_dimm.4 index c7c57b64c028..ea4183fafc1a 100644 --- a/share/man/man4/jedec_dimm.4 +++ b/share/man/man4/jedec_dimm.4 @@ -3,7 +3,6 @@ .\" .\" Copyright (c) 2016 Andriy Gapon .\" Copyright (c) 2018 Ravi Pokala -.\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions diff --git a/sys/dev/imcsmb/imcsmb.c b/sys/dev/imcsmb/imcsmb.c index 9cf1a51821bb..0c18fb9f68cb 100644 --- a/sys/dev/imcsmb/imcsmb.c +++ b/sys/dev/imcsmb/imcsmb.c @@ -4,7 +4,6 @@ * Authors: Joe Kloss; Ravi Pokala (rpokala@freebsd.org) * * Copyright (c) 2017-2018 Panasas - * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions diff --git a/sys/dev/imcsmb/imcsmb_pci.c b/sys/dev/imcsmb/imcsmb_pci.c index 87d1489b4f3e..0843e61f88d1 100644 --- a/sys/dev/imcsmb/imcsmb_pci.c +++ b/sys/dev/imcsmb/imcsmb_pci.c @@ -4,7 +4,6 @@ * Authors: Joe Kloss; Ravi Pokala (rpokala@freebsd.org) * * Copyright (c) 2017-2018 Panasas - * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions diff --git a/sys/dev/imcsmb/imcsmb_reg.h b/sys/dev/imcsmb/imcsmb_reg.h index 3c256ddfead6..4d4f56986664 100644 --- a/sys/dev/imcsmb/imcsmb_reg.h +++ b/sys/dev/imcsmb/imcsmb_reg.h @@ -4,7 +4,6 @@ * Authors: Joe Kloss; Ravi Pokala (rpokala@freebsd.org) * * Copyright (c) 2017-2018 Panasas - * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions diff --git a/sys/dev/imcsmb/imcsmb_var.h b/sys/dev/imcsmb/imcsmb_var.h index ea3753bb85cc..0bf81c417042 100644 --- a/sys/dev/imcsmb/imcsmb_var.h +++ b/sys/dev/imcsmb/imcsmb_var.h @@ -4,7 +4,6 @@ * Authors: Joe Kloss; Ravi Pokala (rpokala@freebsd.org) * * Copyright (c) 2017-2018 Panasas - * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions diff --git a/sys/dev/jedec_dimm/jedec_dimm.c b/sys/dev/jedec_dimm/jedec_dimm.c index 66144cfa6678..10235dc39193 100644 --- a/sys/dev/jedec_dimm/jedec_dimm.c +++ b/sys/dev/jedec_dimm/jedec_dimm.c @@ -5,7 +5,6 @@ * * Copyright (c) 2016 Andriy Gapon * Copyright (c) 2018 Panasas - * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions diff --git a/sys/dev/jedec_dimm/jedec_dimm.h b/sys/dev/jedec_dimm/jedec_dimm.h index f6c5485b6e9b..3b330251efc5 100644 --- a/sys/dev/jedec_dimm/jedec_dimm.h +++ b/sys/dev/jedec_dimm/jedec_dimm.h @@ -4,7 +4,6 @@ * Authors: Ravi Pokala (rpokala@freebsd.org) * * Copyright (c) 2018 Panasas - * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions From 9de921ee5993c75a9fe3ec8ff307d0b80633cd5e Mon Sep 17 00:00:00 2001 From: Hans Petter Selasky Date: Wed, 30 Jan 2019 17:11:08 +0000 Subject: [PATCH 12/90] Export vendor specific USB MIDI device list to PnP info. MFC after: 1 week Sponsored by: Mellanox Technologies --- sys/dev/sound/usb/uaudio.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sys/dev/sound/usb/uaudio.c b/sys/dev/sound/usb/uaudio.c index df1d131f874e..ba2fff153a91 100644 --- a/sys/dev/sound/usb/uaudio.c +++ b/sys/dev/sound/usb/uaudio.c @@ -6285,3 +6285,4 @@ MODULE_DEPEND(uaudio, usb, 1, 1, 1); MODULE_DEPEND(uaudio, sound, SOUND_MINVER, SOUND_PREFVER, SOUND_MAXVER); MODULE_VERSION(uaudio, 1); USB_PNP_HOST_INFO(uaudio_devs); +USB_PNP_HOST_INFO(uaudio_vendor_midi); From 6afd9210909a1a7f6ed5e5a15fc9d1cf815a555e Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Wed, 30 Jan 2019 17:24:50 +0000 Subject: [PATCH 13/90] Only sort requests of types that have concept of offset. Other types, such as BIO_FLUSH or BIO_ZONE, or especially new/unknown ones, may imply some degree of ordering even if strict ordering is not requested explicitly. MFC after: 2 weeks Sponsored by: iXsystems, Inc. --- sys/kern/subr_disk.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/sys/kern/subr_disk.c b/sys/kern/subr_disk.c index 328111337c93..ea364a2a1dfa 100644 --- a/sys/kern/subr_disk.c +++ b/sys/kern/subr_disk.c @@ -259,6 +259,17 @@ bioq_disksort(struct bio_queue_head *head, struct bio *bp) return; } + /* + * We should only sort requests of types that have concept of offset. + * Other types, such as BIO_FLUSH or BIO_ZONE, may imply some degree + * of ordering even if strict ordering is not requested explicitly. + */ + if (bp->bio_cmd != BIO_READ && bp->bio_cmd != BIO_WRITE && + bp->bio_cmd != BIO_DELETE) { + bioq_insert_tail(head, bp); + return; + } + if (bioq_batchsize > 0 && head->batched > bioq_batchsize) { bioq_insert_tail(head, bp); return; From 54cde30f925c37fad249c48d79ded7f58095a159 Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Wed, 30 Jan 2019 17:39:44 +0000 Subject: [PATCH 14/90] Remove BIO_ORDERED flag from BIO_FLUSH sent by ZFS. In all cases where ZFS sends BIO_FLUSH, it first waits for all related writes to complete, so its BIO_FLUSH does not care about strict ordering. Removal of one makes life much easier at least for NVMe driver, which hardware has no concept of request ordering, relying completely on software. MFC after: 2 weeks Sponsored by: iXsystems, Inc. --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c | 1 - 1 file changed, 1 deletion(-) diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c index 7794bd505525..639f48906aca 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c @@ -1097,7 +1097,6 @@ vdev_geom_io_start(zio_t *zio) break; case ZIO_TYPE_IOCTL: bp->bio_cmd = BIO_FLUSH; - bp->bio_flags |= BIO_ORDERED; bp->bio_data = NULL; bp->bio_offset = cp->provider->mediasize; bp->bio_length = 0; From 435a8c1560801c16124e6dbb92fc19c472fd4153 Mon Sep 17 00:00:00 2001 From: Brooks Davis Date: Wed, 30 Jan 2019 17:44:30 +0000 Subject: [PATCH 15/90] Add a simple port filter to SIFTR. SIFTR does not allow any kind of filtering, but captures every packet processed by the TCP stack. Often, only a specific session or service is of interest, and doing the filtering in post-processing of the log adds to the overhead of SIFTR. This adds a new sysctl net.inet.siftr.port_filter. When set to zero, all packets get captured as previously. If set to any other value, only packets where either the source or the destination ports match, are captured in the log file. Submitted by: Richard Scheffenegger Reviewed by: Cheng Cui Differential Revision: https://reviews.freebsd.org/D18897 --- sys/netinet/siftr.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/sys/netinet/siftr.c b/sys/netinet/siftr.c index 217d41c44723..4d063c360386 100644 --- a/sys/netinet/siftr.c +++ b/sys/netinet/siftr.c @@ -272,6 +272,7 @@ static volatile unsigned int siftr_exit_pkt_manager_thread = 0; static unsigned int siftr_enabled = 0; static unsigned int siftr_pkts_per_log = 1; static unsigned int siftr_generate_hashes = 0; +static uint16_t siftr_port_filter = 0; /* static unsigned int siftr_binary_log = 0; */ static char siftr_logfile[PATH_MAX] = "/var/log/siftr.log"; static char siftr_logfile_shadow[PATH_MAX] = "/var/log/siftr.log"; @@ -317,6 +318,10 @@ SYSCTL_UINT(_net_inet_siftr, OID_AUTO, genhashes, CTLFLAG_RW, &siftr_generate_hashes, 0, "enable packet hash generation"); +SYSCTL_U16(_net_inet_siftr, OID_AUTO, port_filter, CTLFLAG_RW, + &siftr_port_filter, 0, + "enable packet filter on a TCP port"); + /* XXX: TODO SYSCTL_UINT(_net_inet_siftr, OID_AUTO, binary, CTLFLAG_RW, &siftr_binary_log, 0, @@ -907,6 +912,16 @@ siftr_chkpkt(void *arg, struct mbuf **m, struct ifnet *ifp, int dir, goto inp_unlock; } + /* + * Only pkts selected by the tcp port filter + * can be inserted into the pkt_queue + */ + if ((siftr_port_filter != 0) && + (siftr_port_filter != ntohs(inp->inp_lport)) && + (siftr_port_filter != ntohs(inp->inp_fport))) { + goto inp_unlock; + } + pn = malloc(sizeof(struct pkt_node), M_SIFTR_PKTNODE, M_NOWAIT|M_ZERO); if (pn == NULL) { @@ -1083,6 +1098,16 @@ siftr_chkpkt6(void *arg, struct mbuf **m, struct ifnet *ifp, int dir, goto inp_unlock6; } + /* + * Only pkts selected by the tcp port filter + * can be inserted into the pkt_queue + */ + if ((siftr_port_filter != 0) && + (siftr_port_filter != ntohs(inp->inp_lport)) && + (siftr_port_filter != ntohs(inp->inp_fport))) { + goto inp_unlock6; + } + pn = malloc(sizeof(struct pkt_node), M_SIFTR_PKTNODE, M_NOWAIT|M_ZERO); if (pn == NULL) { From 441a6b699f953ab4a5889b728c9ecb9cd76aa34d Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Wed, 30 Jan 2019 18:56:45 +0000 Subject: [PATCH 16/90] Remove stale now comment, forgotten in r343582. MFC after: 2 weeks --- sys/cam/scsi/scsi_da.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/sys/cam/scsi/scsi_da.c b/sys/cam/scsi/scsi_da.c index 92e4ed2560f8..c8be3a01f851 100644 --- a/sys/cam/scsi/scsi_da.c +++ b/sys/cam/scsi/scsi_da.c @@ -3314,9 +3314,7 @@ dastart(struct cam_periph *periph, union ccb *start_ccb) /* * BIO_FLUSH doesn't currently communicate * range data, so we synchronize the cache - * over the whole disk. We also force - * ordered tag semantics the flush applies - * to all previously queued I/O. + * over the whole disk. */ scsi_synchronize_cache(&start_ccb->csio, /*retries*/1, From 9c812c8d4e60e030786f87fb88dfe9bb6a470118 Mon Sep 17 00:00:00 2001 From: Ed Maste Date: Wed, 30 Jan 2019 19:19:14 +0000 Subject: [PATCH 17/90] freebsd-update: regenerate man page database after update These are currently not reproducible because they're built by the makewhatis on the freebsd-update build host, not the one in the tree. Regenerate after update, and later we can avoid including it in freebsd-update data. PR: 214545, 217389 Reviewed by: delphij MFC after: 1 month Sponsored by: The FreeBSD Foundation Differential Revision: https://reviews.freebsd.org/D10482 --- usr.sbin/freebsd-update/freebsd-update.sh | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/usr.sbin/freebsd-update/freebsd-update.sh b/usr.sbin/freebsd-update/freebsd-update.sh index 2ed1f43fa0b1..8349fccb4301 100644 --- a/usr.sbin/freebsd-update/freebsd-update.sh +++ b/usr.sbin/freebsd-update/freebsd-update.sh @@ -2943,6 +2943,17 @@ Kernel updates have been installed. Please reboot and run cap_mkdb ${BASEDIR}/etc/login.conf fi + # Rebuild man page databases, if necessary. + for D in /usr/share/man /usr/share/openssl/man; do + if [ ! -d ${BASEDIR}/$D ]; then + continue + fi + if [ -z "$(find ${BASEDIR}/$D -type f -newer ${BASEDIR}/$D/mandoc.db)" ]; then + continue; + fi + makewhatis ${BASEDIR}/$D + done + # We've finished installing the world and deleting old files # which are not shared libraries. touch $1/worlddone From b63abbf63a301011f557d71835ed149f84dd0dcb Mon Sep 17 00:00:00 2001 From: Cy Schubert Date: Wed, 30 Jan 2019 20:22:33 +0000 Subject: [PATCH 18/90] When copying a NAT rule struct to userland for save by ipfs, use the length of the struct in memmove() rather than an unintialized variable. This fixes the first of two kernel page faults when ipfs is invoked. PR: 235110 Reported by: David.Boyd49@twc.com MFC after: 2 weeks --- sys/contrib/ipfilter/netinet/ip_nat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/contrib/ipfilter/netinet/ip_nat.c b/sys/contrib/ipfilter/netinet/ip_nat.c index 7c3e0c9fcee4..dc441cbd0293 100644 --- a/sys/contrib/ipfilter/netinet/ip_nat.c +++ b/sys/contrib/ipfilter/netinet/ip_nat.c @@ -1866,7 +1866,7 @@ ipf_nat_getent(softc, data, getlock) */ if (nat->nat_ptr != NULL) bcopy((char *)nat->nat_ptr, (char *)&ipn->ipn_ipnat, - ipn->ipn_ipnat.in_size); + sizeof(nat->nat_ptr)); /* * If we also know the NAT entry has an associated filter rule, From b403765e8c91e29b2bbc549ac627824e8ec6200f Mon Sep 17 00:00:00 2001 From: Cy Schubert Date: Wed, 30 Jan 2019 20:23:16 +0000 Subject: [PATCH 19/90] Do not obtain an already held read lock. This causes a witness panic when ipfs is invoked. This is the second of two panics resolving PR 235110. PR: 235110 Reported by: David.Boyd49@twc.com MFC after: 2 weeks --- sys/contrib/ipfilter/netinet/ip_nat.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/sys/contrib/ipfilter/netinet/ip_nat.c b/sys/contrib/ipfilter/netinet/ip_nat.c index dc441cbd0293..eb41753b027c 100644 --- a/sys/contrib/ipfilter/netinet/ip_nat.c +++ b/sys/contrib/ipfilter/netinet/ip_nat.c @@ -1904,21 +1904,17 @@ ipf_nat_getent(softc, data, getlock) } } if (error == 0) { - if (getlock) { - READ_ENTER(&softc->ipf_nat); - getlock = 0; - } error = ipf_outobjsz(softc, data, ipn, IPFOBJ_NATSAVE, ipns.ipn_dsize); } finished: - if (getlock) { - READ_ENTER(&softc->ipf_nat); - } if (ipn != NULL) { KFREES(ipn, ipns.ipn_dsize); } + if (getlock) { + RWLOCK_EXIT(&softc->ipf_nat); + } return error; } From 87a8583b24ce39f3b1a8cc23e5c15da08be3ee96 Mon Sep 17 00:00:00 2001 From: Ed Maste Date: Wed, 30 Jan 2019 20:44:51 +0000 Subject: [PATCH 20/90] readelf: decode flag bits in DT_FLAGS/DT_FLAGS_1 Decode d_val when the tag is DT_FLAGS or DT_FLAGS_1 based on the information at: https://docs.oracle.com/cd/E23824_01/html/819-0690/chapter6-42444.html PR: 232983 Submitted by: Bora Ozarslan borako.ozarslan@gmail.com Sponsored by: The FreeBSD Foundation Differential Revision: https://reviews.freebsd.org/D18784 --- contrib/elftoolchain/readelf/readelf.c | 140 +++++++++++++++++++++++++ 1 file changed, 140 insertions(+) diff --git a/contrib/elftoolchain/readelf/readelf.c b/contrib/elftoolchain/readelf/readelf.c index 5c202ca87b3f..80be58f92c1f 100644 --- a/contrib/elftoolchain/readelf/readelf.c +++ b/contrib/elftoolchain/readelf/readelf.c @@ -293,6 +293,8 @@ static void dump_dwarf_ranges_foreach(struct readelf *re, Dwarf_Die die, static void dump_dwarf_str(struct readelf *re); static void dump_eflags(struct readelf *re, uint64_t e_flags); static void dump_elf(struct readelf *re); +static void dump_dt_flags_val(uint64_t d_val); +static void dump_dt_flags_1_val(uint64_t d_val); static void dump_dyn_val(struct readelf *re, GElf_Dyn *dyn, uint32_t stab); static void dump_dynamic(struct readelf *re); static void dump_liblist(struct readelf *re); @@ -2804,11 +2806,149 @@ dump_dyn_val(struct readelf *re, GElf_Dyn *dyn, uint32_t stab) case DT_GNU_PRELINKED: printf(" %s\n", timestamp(dyn->d_un.d_val)); break; + case DT_FLAGS: + dump_dt_flags_val(dyn->d_un.d_val); + break; + case DT_FLAGS_1: + dump_dt_flags_1_val(dyn->d_un.d_val); + break; default: printf("\n"); } } +static void +dump_dt_flags_val(uint64_t d_val) +{ + if (d_val & 0x1) { + d_val ^= 0x1; + printf(" ORIGIN"); + } + if (d_val & 0x2) { + d_val ^= 0x2; + printf(" SYMBOLIC"); + } + if (d_val & 0x4) { + d_val ^= 0x4; + printf(" TEXTREL"); + } + if (d_val & 0x8) { + d_val ^= 0x8; + printf(" BIND_NOW"); + } + if (d_val & 0x10) { + d_val ^= 0x10; + printf(" STATIC_TLS"); + } + if (d_val) + printf(" %lx", d_val); + printf("\n"); +} + +static void +dump_dt_flags_1_val(uint64_t d_val) +{ + if (d_val & 0x1) { + d_val ^= 0x1; + printf(" NOW"); + } + if (d_val & 0x2) { + d_val ^= 0x2; + printf(" GLOBAL"); + } + if (d_val & 0x4) { + d_val ^= 0x4; + printf(" GROUP"); + } + if (d_val & 0x8) { + d_val ^= 0x8; + printf(" NODELETE"); + } + if (d_val & 0x10) { + d_val ^= 0x10; + printf(" LOADFLTR"); + } + if (d_val & 0x20) { + d_val ^= 0x20; + printf(" INITFIRST"); + } + if (d_val & 0x40) { + d_val ^= 0x40; + printf(" NOOPEN"); + } + if (d_val & 0x80) { + d_val ^= 0x80; + printf(" ORIGIN"); + } + if (d_val & 0x100) { + d_val ^= 0x100; + printf(" DIRECT"); + } + if (d_val & 0x400) { + d_val ^= 0x400; + printf(" INTERPOSE"); + } + if (d_val & 0x800) { + d_val ^= 0x800; + printf(" NODEFLIB"); + } + if (d_val & 0x1000) { + d_val ^= 0x1000; + printf(" NODUMP"); + } + if (d_val & 0x2000) { + d_val ^= 0x2000; + printf(" CONFALT"); + } + if (d_val & 0x4000) { + d_val ^= 0x4000; + printf(" ENDFILTEE"); + } + if (d_val & 0x8000) { + d_val ^= 0x8000; + printf(" DISPRELDNE"); + } + if (d_val & 0x10000) { + d_val ^= 0x10000; + printf(" DISPRELPND"); + } + if (d_val & 0x20000) { + d_val ^= 0x20000; + printf(" NODIRECT"); + } + if (d_val & 0x40000) { + d_val ^= 0x40000; + printf(" IGNMULDEF"); + } + if (d_val & 0x80000) { + d_val ^= 0x80000; + printf(" NOKSYMS"); + } + if (d_val & 0x100000) { + d_val ^= 0x100000; + printf(" NOHDR"); + } + if (d_val & 0x200000) { + d_val ^= 0x200000; + printf(" EDITED"); + } + if (d_val & 0x400000) { + d_val ^= 0x400000; + printf(" NORELOC"); + } + if (d_val & 0x800000) { + d_val ^= 0x800000; + printf(" SYMINTPOSE"); + } + if (d_val & 0x1000000) { + d_val ^= 0x1000000; + printf(" GLOBAUDIT"); + } + if (d_val) + printf(" %lx", d_val); + printf("\n"); +} + static void dump_rel(struct readelf *re, struct section *s, Elf_Data *d) { From fb4e718261df905015011af819f8d49db784bb99 Mon Sep 17 00:00:00 2001 From: Ed Maste Date: Wed, 30 Jan 2019 21:46:12 +0000 Subject: [PATCH 21/90] readelf: fix i386 build Use %jx and (uintmax_t) cast. PR: 232983 MFC with: r343592 Sponsored by: The FreeBSD Foundation --- contrib/elftoolchain/readelf/readelf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/contrib/elftoolchain/readelf/readelf.c b/contrib/elftoolchain/readelf/readelf.c index 80be58f92c1f..c186de9b93e2 100644 --- a/contrib/elftoolchain/readelf/readelf.c +++ b/contrib/elftoolchain/readelf/readelf.c @@ -2841,7 +2841,7 @@ dump_dt_flags_val(uint64_t d_val) printf(" STATIC_TLS"); } if (d_val) - printf(" %lx", d_val); + printf(" %jx", (uintmax_t)d_val); printf("\n"); } @@ -2945,7 +2945,7 @@ dump_dt_flags_1_val(uint64_t d_val) printf(" GLOBAUDIT"); } if (d_val) - printf(" %lx", d_val); + printf(" %jx", (uintmax_t)d_val); printf("\n"); } From fa790ea99f905379ae87b34f519acc5d5ed4ff97 Mon Sep 17 00:00:00 2001 From: David C Somayajulu Date: Thu, 31 Jan 2019 00:09:38 +0000 Subject: [PATCH 22/90] Add RDMA (iWARP and RoCEv1) support David Somayajulu (davidcs): Overall RDMA Driver infrastructure and iWARP Anand Khoje (akhoje@marvell.com): RoCEv1 verbs implementation MFC after:5 days --- sys/dev/qlnx/qlnxe/ecore_iwarp.c | 3970 ++++++++++++++++ sys/dev/qlnx/qlnxe/ecore_ll2.c | 2211 +++++++++ sys/dev/qlnx/qlnxe/ecore_ooo.c | 603 +++ sys/dev/qlnx/qlnxe/ecore_rdma.c | 2697 +++++++++++ sys/dev/qlnx/qlnxe/ecore_roce.c | 1579 +++++++ sys/dev/qlnx/qlnxe/qlnx_rdma.c | 347 ++ sys/dev/qlnx/qlnxe/qlnx_rdma.h | 69 + sys/dev/qlnx/qlnxr/qlnxr_cm.c | 887 ++++ sys/dev/qlnx/qlnxr/qlnxr_cm.h | 112 + sys/dev/qlnx/qlnxr/qlnxr_def.h | 924 ++++ sys/dev/qlnx/qlnxr/qlnxr_os.c | 1366 ++++++ sys/dev/qlnx/qlnxr/qlnxr_roce.h | 675 +++ sys/dev/qlnx/qlnxr/qlnxr_user.h | 112 + sys/dev/qlnx/qlnxr/qlnxr_verbs.c | 7306 ++++++++++++++++++++++++++++++ sys/dev/qlnx/qlnxr/qlnxr_verbs.h | 267 ++ sys/modules/qlnx/Makefile | 1 + sys/modules/qlnx/qlnxe/Makefile | 20 + sys/modules/qlnx/qlnxr/Makefile | 85 + 18 files changed, 23231 insertions(+) create mode 100644 sys/dev/qlnx/qlnxe/ecore_iwarp.c create mode 100644 sys/dev/qlnx/qlnxe/ecore_ll2.c create mode 100644 sys/dev/qlnx/qlnxe/ecore_ooo.c create mode 100644 sys/dev/qlnx/qlnxe/ecore_rdma.c create mode 100644 sys/dev/qlnx/qlnxe/ecore_roce.c create mode 100644 sys/dev/qlnx/qlnxe/qlnx_rdma.c create mode 100644 sys/dev/qlnx/qlnxe/qlnx_rdma.h create mode 100644 sys/dev/qlnx/qlnxr/qlnxr_cm.c create mode 100644 sys/dev/qlnx/qlnxr/qlnxr_cm.h create mode 100644 sys/dev/qlnx/qlnxr/qlnxr_def.h create mode 100644 sys/dev/qlnx/qlnxr/qlnxr_os.c create mode 100644 sys/dev/qlnx/qlnxr/qlnxr_roce.h create mode 100644 sys/dev/qlnx/qlnxr/qlnxr_user.h create mode 100644 sys/dev/qlnx/qlnxr/qlnxr_verbs.c create mode 100644 sys/dev/qlnx/qlnxr/qlnxr_verbs.h create mode 100644 sys/modules/qlnx/qlnxr/Makefile diff --git a/sys/dev/qlnx/qlnxe/ecore_iwarp.c b/sys/dev/qlnx/qlnxe/ecore_iwarp.c new file mode 100644 index 000000000000..eec3613499a9 --- /dev/null +++ b/sys/dev/qlnx/qlnxe/ecore_iwarp.c @@ -0,0 +1,3970 @@ +/* + * Copyright (c) 2018-2019 Cavium, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * File : ecore_iwarp.c + */ +#include +__FBSDID("$FreeBSD$"); + +#include "bcm_osal.h" +#include "ecore.h" +#include "ecore_status.h" +#include "ecore_sp_commands.h" +#include "ecore_cxt.h" +#include "ecore_rdma.h" +#include "reg_addr.h" +#include "ecore_hw.h" +#include "ecore_hsi_iwarp.h" +#include "ecore_ll2.h" +#include "ecore_ooo.h" +#ifndef LINUX_REMOVE +#include "ecore_tcp_ip.h" +#endif + +#ifdef _NTDDK_ +#pragma warning(push) +#pragma warning(disable : 28123) +#pragma warning(disable : 28167) +#endif + +/* Default values used for MPA Rev 1 */ +#define ECORE_IWARP_ORD_DEFAULT 32 +#define ECORE_IWARP_IRD_DEFAULT 32 + +#define ECORE_IWARP_MAX_FW_MSS 4120 + +struct mpa_v2_hdr { + __be16 ird; + __be16 ord; +}; + +#define MPA_V2_PEER2PEER_MODEL 0x8000 +#define MPA_V2_SEND_RTR 0x4000 /* on ird */ +#define MPA_V2_READ_RTR 0x4000 /* on ord */ +#define MPA_V2_WRITE_RTR 0x8000 +#define MPA_V2_IRD_ORD_MASK 0x3FFF + +#define MPA_REV2(_mpa_rev) (_mpa_rev == MPA_NEGOTIATION_TYPE_ENHANCED) + +#define ECORE_IWARP_INVALID_TCP_CID 0xffffffff +/* How many times fin will be sent before FW aborts and send RST */ +#define ECORE_IWARP_MAX_FIN_RT_DEFAULT 2 +#define ECORE_IWARP_RCV_WND_SIZE_MIN (0xffff) +/* INTERNAL: These numbers are derived from BRB buffer sizes to obtain optimal performance */ +#define ECORE_IWARP_RCV_WND_SIZE_BB_DEF_2_PORTS (200*1024) +#define ECORE_IWARP_RCV_WND_SIZE_BB_DEF_4_PORTS (100*1024) +#define ECORE_IWARP_RCV_WND_SIZE_AH_DEF_2_PORTS (150*1024) +#define ECORE_IWARP_RCV_WND_SIZE_AH_DEF_4_PORTS (90*1024) +#define ECORE_IWARP_MAX_WND_SCALE (14) +/* Timestamp header is the length of the timestamp option (10): + * kind:8 bit, length:8 bit, timestamp:32 bit, ack: 32bit + * rounded up to a multiple of 4 + */ +#define TIMESTAMP_HEADER_SIZE (12) + +static enum _ecore_status_t +ecore_iwarp_async_event(struct ecore_hwfn *p_hwfn, + u8 fw_event_code, + u16 OSAL_UNUSED echo, + union event_ring_data *data, + u8 fw_return_code); + +static enum _ecore_status_t +ecore_iwarp_empty_ramrod(struct ecore_hwfn *p_hwfn, + struct ecore_iwarp_listener *listener); + +static OSAL_INLINE struct ecore_iwarp_fpdu * +ecore_iwarp_get_curr_fpdu(struct ecore_hwfn *p_hwfn, u16 cid); + +/* Override devinfo with iWARP specific values */ +void +ecore_iwarp_init_devinfo(struct ecore_hwfn *p_hwfn) +{ + struct ecore_rdma_device *dev = p_hwfn->p_rdma_info->dev; + + dev->max_inline = IWARP_REQ_MAX_INLINE_DATA_SIZE; + dev->max_qp = OSAL_MIN_T(u64, + IWARP_MAX_QPS, + p_hwfn->p_rdma_info->num_qps) - + ECORE_IWARP_PREALLOC_CNT; + + dev->max_cq = dev->max_qp; + + dev->max_qp_resp_rd_atomic_resc = ECORE_IWARP_IRD_DEFAULT; + dev->max_qp_req_rd_atomic_resc = ECORE_IWARP_ORD_DEFAULT; +} + +enum _ecore_status_t +ecore_iwarp_init_hw(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt) +{ + p_hwfn->rdma_prs_search_reg = PRS_REG_SEARCH_TCP; + ecore_wr(p_hwfn, p_ptt, p_hwfn->rdma_prs_search_reg, 1); + p_hwfn->b_rdma_enabled_in_prs = true; + + return 0; +} + +void +ecore_iwarp_init_fw_ramrod(struct ecore_hwfn *p_hwfn, + struct iwarp_init_func_ramrod_data *p_ramrod) +{ + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "ooo handle = %d\n", + p_hwfn->p_rdma_info->iwarp.ll2_ooo_handle); + + p_ramrod->iwarp.ll2_ooo_q_index = + p_hwfn->hw_info.resc_start[ECORE_LL2_QUEUE] + + p_hwfn->p_rdma_info->iwarp.ll2_ooo_handle; + + p_ramrod->tcp.max_fin_rt = ECORE_IWARP_MAX_FIN_RT_DEFAULT; + return; +} + +static enum _ecore_status_t +ecore_iwarp_alloc_cid(struct ecore_hwfn *p_hwfn, u32 *cid) +{ + enum _ecore_status_t rc; + + OSAL_SPIN_LOCK(&p_hwfn->p_rdma_info->lock); + + rc = ecore_rdma_bmap_alloc_id(p_hwfn, + &p_hwfn->p_rdma_info->cid_map, + cid); + + OSAL_SPIN_UNLOCK(&p_hwfn->p_rdma_info->lock); + *cid += ecore_cxt_get_proto_cid_start(p_hwfn, + p_hwfn->p_rdma_info->proto); + if (rc != ECORE_SUCCESS) { + DP_NOTICE(p_hwfn, false, "Failed in allocating iwarp cid\n"); + return rc; + } + + rc = ecore_cxt_dynamic_ilt_alloc(p_hwfn, ECORE_ELEM_CXT, *cid); + + if (rc != ECORE_SUCCESS) { + OSAL_SPIN_LOCK(&p_hwfn->p_rdma_info->lock); + *cid -= ecore_cxt_get_proto_cid_start(p_hwfn, + p_hwfn->p_rdma_info->proto); + + ecore_bmap_release_id(p_hwfn, + &p_hwfn->p_rdma_info->cid_map, + *cid); + + OSAL_SPIN_UNLOCK(&p_hwfn->p_rdma_info->lock); + } + + return rc; +} + +static void +ecore_iwarp_set_tcp_cid(struct ecore_hwfn *p_hwfn, u32 cid) +{ + cid -= ecore_cxt_get_proto_cid_start(p_hwfn, + p_hwfn->p_rdma_info->proto); + + OSAL_SPIN_LOCK(&p_hwfn->p_rdma_info->lock); + ecore_bmap_set_id(p_hwfn, + &p_hwfn->p_rdma_info->tcp_cid_map, + cid); + OSAL_SPIN_UNLOCK(&p_hwfn->p_rdma_info->lock); +} + +/* This function allocates a cid for passive tcp ( called from syn receive) + * the reason it's separate from the regular cid allocation is because it + * is assured that these cids already have ilt alloacted. They are preallocated + * to ensure that we won't need to allocate memory during syn processing + */ +static enum _ecore_status_t +ecore_iwarp_alloc_tcp_cid(struct ecore_hwfn *p_hwfn, u32 *cid) +{ + enum _ecore_status_t rc; + + OSAL_SPIN_LOCK(&p_hwfn->p_rdma_info->lock); + + rc = ecore_rdma_bmap_alloc_id(p_hwfn, + &p_hwfn->p_rdma_info->tcp_cid_map, + cid); + + OSAL_SPIN_UNLOCK(&p_hwfn->p_rdma_info->lock); + + *cid += ecore_cxt_get_proto_cid_start(p_hwfn, + p_hwfn->p_rdma_info->proto); + if (rc != ECORE_SUCCESS) { + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "can't allocate iwarp tcp cid max-count=%d\n", + p_hwfn->p_rdma_info->tcp_cid_map.max_count); + + *cid = ECORE_IWARP_INVALID_TCP_CID; + } + + return rc; +} + +/* We have two cid maps, one for tcp which should be used only from passive + * syn processing and replacing a pre-allocated ep in the list. the second + * for active tcp and for QPs. + */ +static void ecore_iwarp_cid_cleaned(struct ecore_hwfn *p_hwfn, u32 cid) +{ + cid -= ecore_cxt_get_proto_cid_start(p_hwfn, + p_hwfn->p_rdma_info->proto); + + OSAL_SPIN_LOCK(&p_hwfn->p_rdma_info->lock); + + if (cid < ECORE_IWARP_PREALLOC_CNT) { + ecore_bmap_release_id(p_hwfn, + &p_hwfn->p_rdma_info->tcp_cid_map, + cid); + } else { + ecore_bmap_release_id(p_hwfn, + &p_hwfn->p_rdma_info->cid_map, + cid); + } + + OSAL_SPIN_UNLOCK(&p_hwfn->p_rdma_info->lock); +} + +enum _ecore_status_t +ecore_iwarp_create_qp(struct ecore_hwfn *p_hwfn, + struct ecore_rdma_qp *qp, + struct ecore_rdma_create_qp_out_params *out_params) +{ + struct iwarp_create_qp_ramrod_data *p_ramrod; + struct ecore_sp_init_data init_data; + struct ecore_spq_entry *p_ent; + enum _ecore_status_t rc; + u16 physical_queue; + u32 cid; + + qp->shared_queue = + OSAL_DMA_ALLOC_COHERENT(p_hwfn->p_dev, + &qp->shared_queue_phys_addr, + IWARP_SHARED_QUEUE_PAGE_SIZE); + if (!qp->shared_queue) { + DP_NOTICE(p_hwfn, false, + "ecore iwarp create qp failed: cannot allocate memory (shared queue).\n"); + return ECORE_NOMEM; + } else { + out_params->sq_pbl_virt = (u8 *)qp->shared_queue + + IWARP_SHARED_QUEUE_PAGE_SQ_PBL_OFFSET; + out_params->sq_pbl_phys = qp->shared_queue_phys_addr + + IWARP_SHARED_QUEUE_PAGE_SQ_PBL_OFFSET; + out_params->rq_pbl_virt = (u8 *)qp->shared_queue + + IWARP_SHARED_QUEUE_PAGE_RQ_PBL_OFFSET; + out_params->rq_pbl_phys = qp->shared_queue_phys_addr + + IWARP_SHARED_QUEUE_PAGE_RQ_PBL_OFFSET; + } + + rc = ecore_iwarp_alloc_cid(p_hwfn, &cid); + if (rc != ECORE_SUCCESS) + goto err1; + + qp->icid = (u16)cid; + + OSAL_MEMSET(&init_data, 0, sizeof(init_data)); + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.cid = qp->icid; + init_data.comp_mode = ECORE_SPQ_MODE_EBLOCK; + + rc = ecore_sp_init_request(p_hwfn, &p_ent, + IWARP_RAMROD_CMD_ID_CREATE_QP, + PROTOCOLID_IWARP, &init_data); + if (rc != ECORE_SUCCESS) + return rc; + + p_ramrod = &p_ent->ramrod.iwarp_create_qp; + + SET_FIELD(p_ramrod->flags, + IWARP_CREATE_QP_RAMROD_DATA_FMR_AND_RESERVED_EN, + qp->fmr_and_reserved_lkey); + + SET_FIELD(p_ramrod->flags, + IWARP_CREATE_QP_RAMROD_DATA_SIGNALED_COMP, + qp->signal_all); + + SET_FIELD(p_ramrod->flags, + IWARP_CREATE_QP_RAMROD_DATA_RDMA_RD_EN, + qp->incoming_rdma_read_en); + + SET_FIELD(p_ramrod->flags, + IWARP_CREATE_QP_RAMROD_DATA_RDMA_WR_EN, + qp->incoming_rdma_write_en); + + SET_FIELD(p_ramrod->flags, + IWARP_CREATE_QP_RAMROD_DATA_ATOMIC_EN, + qp->incoming_atomic_en); + + SET_FIELD(p_ramrod->flags, + IWARP_CREATE_QP_RAMROD_DATA_SRQ_FLG, + qp->use_srq); + + p_ramrod->pd = qp->pd; + p_ramrod->sq_num_pages = qp->sq_num_pages; + p_ramrod->rq_num_pages = qp->rq_num_pages; + + p_ramrod->qp_handle_for_cqe.hi = OSAL_CPU_TO_LE32(qp->qp_handle.hi); + p_ramrod->qp_handle_for_cqe.lo = OSAL_CPU_TO_LE32(qp->qp_handle.lo); + + p_ramrod->cq_cid_for_sq = + OSAL_CPU_TO_LE32((p_hwfn->hw_info.opaque_fid << 16) | + qp->sq_cq_id); + p_ramrod->cq_cid_for_rq = + OSAL_CPU_TO_LE32((p_hwfn->hw_info.opaque_fid << 16) | + qp->rq_cq_id); + + p_ramrod->dpi = OSAL_CPU_TO_LE16(qp->dpi); + + physical_queue = ecore_get_cm_pq_idx(p_hwfn, PQ_FLAGS_OFLD); + p_ramrod->physical_q0 = OSAL_CPU_TO_LE16(physical_queue); + physical_queue = ecore_get_cm_pq_idx(p_hwfn, PQ_FLAGS_ACK); + p_ramrod->physical_q1 = OSAL_CPU_TO_LE16(physical_queue); + + rc = ecore_spq_post(p_hwfn, p_ent, OSAL_NULL); + + if (rc != ECORE_SUCCESS) + goto err1; + + return rc; + +err1: + OSAL_DMA_FREE_COHERENT(p_hwfn->p_dev, + qp->shared_queue, + qp->shared_queue_phys_addr, + IWARP_SHARED_QUEUE_PAGE_SIZE); + + return rc; +} + +static enum _ecore_status_t +ecore_iwarp_modify_fw(struct ecore_hwfn *p_hwfn, + struct ecore_rdma_qp *qp) +{ + struct iwarp_modify_qp_ramrod_data *p_ramrod; + struct ecore_sp_init_data init_data; + struct ecore_spq_entry *p_ent; + enum _ecore_status_t rc; + + /* Get SPQ entry */ + OSAL_MEMSET(&init_data, 0, sizeof(init_data)); + init_data.cid = qp->icid; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = ECORE_SPQ_MODE_EBLOCK; + + rc = ecore_sp_init_request(p_hwfn, &p_ent, + IWARP_RAMROD_CMD_ID_MODIFY_QP, + p_hwfn->p_rdma_info->proto, + &init_data); + if (rc != ECORE_SUCCESS) + return rc; + + p_ramrod = &p_ent->ramrod.iwarp_modify_qp; + SET_FIELD(p_ramrod->flags, IWARP_MODIFY_QP_RAMROD_DATA_STATE_TRANS_EN, + 0x1); + if (qp->iwarp_state == ECORE_IWARP_QP_STATE_CLOSING) + p_ramrod->transition_to_state = IWARP_MODIFY_QP_STATE_CLOSING; + else + p_ramrod->transition_to_state = IWARP_MODIFY_QP_STATE_ERROR; + + rc = ecore_spq_post(p_hwfn, p_ent, OSAL_NULL); + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "QP(0x%x)rc=%d\n", + qp->icid, rc); + + return rc; +} + +enum ecore_iwarp_qp_state +ecore_roce2iwarp_state(enum ecore_roce_qp_state state) +{ + switch (state) { + case ECORE_ROCE_QP_STATE_RESET: + case ECORE_ROCE_QP_STATE_INIT: + case ECORE_ROCE_QP_STATE_RTR: + return ECORE_IWARP_QP_STATE_IDLE; + case ECORE_ROCE_QP_STATE_RTS: + return ECORE_IWARP_QP_STATE_RTS; + case ECORE_ROCE_QP_STATE_SQD: + return ECORE_IWARP_QP_STATE_CLOSING; + case ECORE_ROCE_QP_STATE_ERR: + return ECORE_IWARP_QP_STATE_ERROR; + case ECORE_ROCE_QP_STATE_SQE: + return ECORE_IWARP_QP_STATE_TERMINATE; + } + return ECORE_IWARP_QP_STATE_ERROR; +} + +static enum ecore_roce_qp_state +ecore_iwarp2roce_state(enum ecore_iwarp_qp_state state) +{ + switch (state) { + case ECORE_IWARP_QP_STATE_IDLE: + return ECORE_ROCE_QP_STATE_INIT; + case ECORE_IWARP_QP_STATE_RTS: + return ECORE_ROCE_QP_STATE_RTS; + case ECORE_IWARP_QP_STATE_TERMINATE: + return ECORE_ROCE_QP_STATE_SQE; + case ECORE_IWARP_QP_STATE_CLOSING: + return ECORE_ROCE_QP_STATE_SQD; + case ECORE_IWARP_QP_STATE_ERROR: + return ECORE_ROCE_QP_STATE_ERR; + } + return ECORE_ROCE_QP_STATE_ERR; +} + +const char *iwarp_state_names[] = { + "IDLE", + "RTS", + "TERMINATE", + "CLOSING", + "ERROR", +}; + +enum _ecore_status_t +ecore_iwarp_modify_qp(struct ecore_hwfn *p_hwfn, + struct ecore_rdma_qp *qp, + enum ecore_iwarp_qp_state new_state, + bool internal) +{ + enum ecore_iwarp_qp_state prev_iw_state; + enum _ecore_status_t rc = 0; + bool modify_fw = false; + + /* modify QP can be called from upper-layer or as a result of async + * RST/FIN... therefore need to protect + */ + OSAL_SPIN_LOCK(&p_hwfn->p_rdma_info->iwarp.qp_lock); + prev_iw_state = qp->iwarp_state; + + if (prev_iw_state == new_state) { + OSAL_SPIN_UNLOCK(&p_hwfn->p_rdma_info->iwarp.qp_lock); + return ECORE_SUCCESS; + } + + switch (prev_iw_state) { + case ECORE_IWARP_QP_STATE_IDLE: + switch (new_state) { + case ECORE_IWARP_QP_STATE_RTS: + qp->iwarp_state = ECORE_IWARP_QP_STATE_RTS; + break; + case ECORE_IWARP_QP_STATE_ERROR: + qp->iwarp_state = ECORE_IWARP_QP_STATE_ERROR; + if (!internal) + modify_fw = true; + break; + default: + break; + } + break; + case ECORE_IWARP_QP_STATE_RTS: + switch (new_state) { + case ECORE_IWARP_QP_STATE_CLOSING: + if (!internal) + modify_fw = true; + + qp->iwarp_state = ECORE_IWARP_QP_STATE_CLOSING; + break; + case ECORE_IWARP_QP_STATE_ERROR: + if (!internal) + modify_fw = true; + qp->iwarp_state = ECORE_IWARP_QP_STATE_ERROR; + break; + default: + break; + } + break; + case ECORE_IWARP_QP_STATE_ERROR: + switch (new_state) { + case ECORE_IWARP_QP_STATE_IDLE: + /* TODO: destroy flow -> need to destroy EP&QP */ + qp->iwarp_state = new_state; + break; + case ECORE_IWARP_QP_STATE_CLOSING: + /* could happen due to race... do nothing.... */ + break; + default: + rc = ECORE_INVAL; + } + break; + case ECORE_IWARP_QP_STATE_TERMINATE: + case ECORE_IWARP_QP_STATE_CLOSING: + qp->iwarp_state = new_state; + break; + default: + break; + } + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "QP(0x%x) %s --> %s %s\n", + qp->icid, + iwarp_state_names[prev_iw_state], + iwarp_state_names[qp->iwarp_state], + internal ? "internal" : " "); + + OSAL_SPIN_UNLOCK(&p_hwfn->p_rdma_info->iwarp.qp_lock); + + if (modify_fw) + ecore_iwarp_modify_fw(p_hwfn, qp); + + return rc; +} + +enum _ecore_status_t +ecore_iwarp_fw_destroy(struct ecore_hwfn *p_hwfn, + struct ecore_rdma_qp *qp) +{ + struct ecore_sp_init_data init_data; + struct ecore_spq_entry *p_ent; + enum _ecore_status_t rc; + + /* Get SPQ entry */ + OSAL_MEMSET(&init_data, 0, sizeof(init_data)); + init_data.cid = qp->icid; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = ECORE_SPQ_MODE_EBLOCK; + + rc = ecore_sp_init_request(p_hwfn, &p_ent, + IWARP_RAMROD_CMD_ID_DESTROY_QP, + p_hwfn->p_rdma_info->proto, + &init_data); + if (rc != ECORE_SUCCESS) + return rc; + + rc = ecore_spq_post(p_hwfn, p_ent, OSAL_NULL); + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "QP(0x%x) rc = %d\n", qp->icid, rc); + + return rc; +} + +static void ecore_iwarp_destroy_ep(struct ecore_hwfn *p_hwfn, + struct ecore_iwarp_ep *ep, + bool remove_from_active_list) +{ + OSAL_DMA_FREE_COHERENT(p_hwfn->p_dev, + ep->ep_buffer_virt, + ep->ep_buffer_phys, + sizeof(*ep->ep_buffer_virt)); + + if (remove_from_active_list) { + OSAL_SPIN_LOCK(&p_hwfn->p_rdma_info->iwarp.iw_lock); + + OSAL_LIST_REMOVE_ENTRY(&ep->list_entry, + &p_hwfn->p_rdma_info->iwarp.ep_list); + + OSAL_SPIN_UNLOCK(&p_hwfn->p_rdma_info->iwarp.iw_lock); + } + + if (ep->qp) + ep->qp->ep = OSAL_NULL; + + OSAL_FREE(p_hwfn->p_dev, ep); +} + +enum _ecore_status_t +ecore_iwarp_destroy_qp(struct ecore_hwfn *p_hwfn, + struct ecore_rdma_qp *qp) +{ + enum _ecore_status_t rc = ECORE_SUCCESS; + struct ecore_iwarp_ep *ep = qp->ep; + struct ecore_iwarp_fpdu *fpdu; + int wait_count = 0; + + fpdu = ecore_iwarp_get_curr_fpdu(p_hwfn, qp->icid); + if (fpdu && fpdu->incomplete_bytes) + DP_NOTICE(p_hwfn, false, + "Pending Partial fpdu with incomplete bytes=%d\n", + fpdu->incomplete_bytes); + + if (qp->iwarp_state != ECORE_IWARP_QP_STATE_ERROR) { + + rc = ecore_iwarp_modify_qp(p_hwfn, qp, + ECORE_IWARP_QP_STATE_ERROR, + false); + + if (rc != ECORE_SUCCESS) + return rc; + } + + /* Make sure ep is closed before returning and freeing memory. */ + if (ep) { + while (ep->state != ECORE_IWARP_EP_CLOSED) { + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "Waiting for ep->state to be closed...state=%x\n", + ep->state); + + OSAL_MSLEEP(100); + if (wait_count++ > 200) { + DP_NOTICE(p_hwfn, false, "ep state close timeout state=%x\n", + ep->state); + break; + } + } + + ecore_iwarp_destroy_ep(p_hwfn, ep, false); + } + + rc = ecore_iwarp_fw_destroy(p_hwfn, qp); + + if (qp->shared_queue) + OSAL_DMA_FREE_COHERENT(p_hwfn->p_dev, + qp->shared_queue, + qp->shared_queue_phys_addr, + IWARP_SHARED_QUEUE_PAGE_SIZE); + + return rc; +} + +static enum _ecore_status_t +ecore_iwarp_create_ep(struct ecore_hwfn *p_hwfn, + struct ecore_iwarp_ep **ep_out) +{ + struct ecore_iwarp_ep *ep; + enum _ecore_status_t rc; + + ep = OSAL_ZALLOC(p_hwfn->p_dev, GFP_KERNEL, sizeof(*ep)); + if (!ep) { + DP_NOTICE(p_hwfn, false, + "ecore create ep failed: cannot allocate memory (ep). rc = %d\n", + ECORE_NOMEM); + return ECORE_NOMEM; + } + + ep->state = ECORE_IWARP_EP_INIT; + + /* ep_buffer is allocated once and is structured as follows: + * [MAX_PRIV_DATA_LEN][MAX_PRIV_DATA_LEN][union async_output] + * We could have allocated this in three calls but since all together + * it is less than a page, we do one allocation and initialize pointers + * accordingly + */ + ep->ep_buffer_virt = OSAL_DMA_ALLOC_COHERENT( + p_hwfn->p_dev, + &ep->ep_buffer_phys, + sizeof(*ep->ep_buffer_virt)); + + if (!ep->ep_buffer_virt) { + DP_NOTICE(p_hwfn, false, + "ecore create ep failed: cannot allocate memory (ulp buffer). rc = %d\n", + ECORE_NOMEM); + rc = ECORE_NOMEM; + goto err; + } + + ep->sig = 0xdeadbeef; + + *ep_out = ep; + + return ECORE_SUCCESS; + +err: + OSAL_FREE(p_hwfn->p_dev, ep); + return rc; +} + +static void +ecore_iwarp_print_tcp_ramrod(struct ecore_hwfn *p_hwfn, + struct iwarp_tcp_offload_ramrod_data *p_tcp_ramrod) +{ + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, ">>> PRINT TCP RAMROD\n"); + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "local_mac=%x %x %x\n", + p_tcp_ramrod->tcp.local_mac_addr_lo, + p_tcp_ramrod->tcp.local_mac_addr_mid, + p_tcp_ramrod->tcp.local_mac_addr_hi); + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "remote_mac=%x %x %x\n", + p_tcp_ramrod->tcp.remote_mac_addr_lo, + p_tcp_ramrod->tcp.remote_mac_addr_mid, + p_tcp_ramrod->tcp.remote_mac_addr_hi); + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "vlan_id=%x\n", + p_tcp_ramrod->tcp.vlan_id); + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "flags=%x\n", + p_tcp_ramrod->tcp.flags); + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "ip_version=%x\n", + p_tcp_ramrod->tcp.ip_version); + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "local_ip=%x.%x.%x.%x\n", + p_tcp_ramrod->tcp.local_ip[0], + p_tcp_ramrod->tcp.local_ip[1], + p_tcp_ramrod->tcp.local_ip[2], + p_tcp_ramrod->tcp.local_ip[3]); + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "remote_ip=%x.%x.%x.%x\n", + p_tcp_ramrod->tcp.remote_ip[0], + p_tcp_ramrod->tcp.remote_ip[1], + p_tcp_ramrod->tcp.remote_ip[2], + p_tcp_ramrod->tcp.remote_ip[3]); + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "flow_label=%x\n", + p_tcp_ramrod->tcp.flow_label); + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "ttl=%x\n", + p_tcp_ramrod->tcp.ttl); + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "tos_or_tc=%x\n", + p_tcp_ramrod->tcp.tos_or_tc); + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "local_port=%x\n", + p_tcp_ramrod->tcp.local_port); + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "remote_port=%x\n", + p_tcp_ramrod->tcp.remote_port); + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "mss=%x\n", + p_tcp_ramrod->tcp.mss); + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "rcv_wnd_scale=%x\n", + p_tcp_ramrod->tcp.rcv_wnd_scale); + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "connect_mode=%x\n", + p_tcp_ramrod->tcp.connect_mode); + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "syn_ip_payload_length=%x\n", + p_tcp_ramrod->tcp.syn_ip_payload_length); + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "syn_phy_addr_lo=%x\n", + p_tcp_ramrod->tcp.syn_phy_addr_lo); + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "syn_phy_addr_hi=%x\n", + p_tcp_ramrod->tcp.syn_phy_addr_hi); + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "<<p_rdma_info->iwarp; + struct iwarp_tcp_offload_ramrod_data *p_tcp_ramrod; + struct ecore_sp_init_data init_data; + struct ecore_spq_entry *p_ent; + dma_addr_t async_output_phys; + dma_addr_t in_pdata_phys; + enum _ecore_status_t rc; + u16 physical_q; + u8 tcp_flags; + int i; + + OSAL_MEMSET(&init_data, 0, sizeof(init_data)); + init_data.cid = ep->tcp_cid; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + + if (ep->connect_mode == TCP_CONNECT_PASSIVE) { + init_data.comp_mode = ECORE_SPQ_MODE_CB; + } else { + init_data.comp_mode = ECORE_SPQ_MODE_EBLOCK; + } + + rc = ecore_sp_init_request(p_hwfn, &p_ent, + IWARP_RAMROD_CMD_ID_TCP_OFFLOAD, + PROTOCOLID_IWARP, &init_data); + if (rc != ECORE_SUCCESS) + return rc; + + p_tcp_ramrod = &p_ent->ramrod.iwarp_tcp_offload; + + /* Point to the "second half" of the ulp buffer */ + in_pdata_phys = ep->ep_buffer_phys + + OFFSETOF(struct ecore_iwarp_ep_memory, in_pdata); + p_tcp_ramrod->iwarp.incoming_ulp_buffer.addr.hi = + DMA_HI_LE(in_pdata_phys); + p_tcp_ramrod->iwarp.incoming_ulp_buffer.addr.lo = + DMA_LO_LE(in_pdata_phys); + p_tcp_ramrod->iwarp.incoming_ulp_buffer.len = + OSAL_CPU_TO_LE16(sizeof(ep->ep_buffer_virt->in_pdata)); + + async_output_phys = ep->ep_buffer_phys + + OFFSETOF(struct ecore_iwarp_ep_memory, async_output); + + p_tcp_ramrod->iwarp.async_eqe_output_buf.hi = + DMA_HI_LE(async_output_phys); + p_tcp_ramrod->iwarp.async_eqe_output_buf.lo = + DMA_LO_LE(async_output_phys); + p_tcp_ramrod->iwarp.handle_for_async.hi = OSAL_CPU_TO_LE32(PTR_HI(ep)); + p_tcp_ramrod->iwarp.handle_for_async.lo = OSAL_CPU_TO_LE32(PTR_LO(ep)); + + physical_q = ecore_get_cm_pq_idx(p_hwfn, PQ_FLAGS_OFLD); + p_tcp_ramrod->iwarp.physical_q0 = OSAL_CPU_TO_LE16(physical_q); + physical_q = ecore_get_cm_pq_idx(p_hwfn, PQ_FLAGS_ACK); + p_tcp_ramrod->iwarp.physical_q1 = OSAL_CPU_TO_LE16(physical_q); + p_tcp_ramrod->iwarp.mpa_mode = iwarp_info->mpa_rev; + + ecore_set_fw_mac_addr(&p_tcp_ramrod->tcp.remote_mac_addr_hi, + &p_tcp_ramrod->tcp.remote_mac_addr_mid, + &p_tcp_ramrod->tcp.remote_mac_addr_lo, + ep->remote_mac_addr); + ecore_set_fw_mac_addr(&p_tcp_ramrod->tcp.local_mac_addr_hi, + &p_tcp_ramrod->tcp.local_mac_addr_mid, + &p_tcp_ramrod->tcp.local_mac_addr_lo, + ep->local_mac_addr); + + p_tcp_ramrod->tcp.vlan_id = OSAL_CPU_TO_LE16(ep->cm_info.vlan); + + tcp_flags = p_hwfn->p_rdma_info->iwarp.tcp_flags; + p_tcp_ramrod->tcp.flags = 0; + SET_FIELD(p_tcp_ramrod->tcp.flags, + TCP_OFFLOAD_PARAMS_OPT2_TS_EN, + !!(tcp_flags & ECORE_IWARP_TS_EN)); + + SET_FIELD(p_tcp_ramrod->tcp.flags, + TCP_OFFLOAD_PARAMS_OPT2_DA_EN, + !!(tcp_flags & ECORE_IWARP_DA_EN)); + + p_tcp_ramrod->tcp.ip_version = ep->cm_info.ip_version; + + for (i = 0; i < 4; i++) { + p_tcp_ramrod->tcp.remote_ip[i] = + OSAL_CPU_TO_LE32(ep->cm_info.remote_ip[i]); + p_tcp_ramrod->tcp.local_ip[i] = + OSAL_CPU_TO_LE32(ep->cm_info.local_ip[i]); + } + + p_tcp_ramrod->tcp.remote_port = + OSAL_CPU_TO_LE16(ep->cm_info.remote_port); + p_tcp_ramrod->tcp.local_port = OSAL_CPU_TO_LE16(ep->cm_info.local_port); + p_tcp_ramrod->tcp.mss = OSAL_CPU_TO_LE16(ep->mss); + p_tcp_ramrod->tcp.flow_label = 0; + p_tcp_ramrod->tcp.ttl = 0x40; + p_tcp_ramrod->tcp.tos_or_tc = 0; + + p_tcp_ramrod->tcp.max_rt_time = ECORE_IWARP_DEF_MAX_RT_TIME; + p_tcp_ramrod->tcp.cwnd = ECORE_IWARP_DEF_CWND_FACTOR * p_tcp_ramrod->tcp.mss; + p_tcp_ramrod->tcp.ka_max_probe_cnt = ECORE_IWARP_DEF_KA_MAX_PROBE_CNT; + p_tcp_ramrod->tcp.ka_timeout = ECORE_IWARP_DEF_KA_TIMEOUT; + p_tcp_ramrod->tcp.ka_interval = ECORE_IWARP_DEF_KA_INTERVAL; + + p_tcp_ramrod->tcp.rcv_wnd_scale = + (u8)p_hwfn->p_rdma_info->iwarp.rcv_wnd_scale; + p_tcp_ramrod->tcp.connect_mode = ep->connect_mode; + + if (ep->connect_mode == TCP_CONNECT_PASSIVE) { + p_tcp_ramrod->tcp.syn_ip_payload_length = + OSAL_CPU_TO_LE16(ep->syn_ip_payload_length); + p_tcp_ramrod->tcp.syn_phy_addr_hi = + DMA_HI_LE(ep->syn_phy_addr); + p_tcp_ramrod->tcp.syn_phy_addr_lo = + DMA_LO_LE(ep->syn_phy_addr); + } + + ecore_iwarp_print_tcp_ramrod(p_hwfn, p_tcp_ramrod); + + rc = ecore_spq_post(p_hwfn, p_ent, OSAL_NULL); + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "EP(0x%x) Offload completed rc=%d\n" , ep->tcp_cid, rc); + + return rc; +} + +/* This function should be called after IWARP_EVENT_TYPE_ASYNC_CONNECT_COMPLETE + * is received. it will be called from the dpc context. + */ +static enum _ecore_status_t +ecore_iwarp_mpa_offload(struct ecore_hwfn *p_hwfn, + struct ecore_iwarp_ep *ep) +{ + struct iwarp_mpa_offload_ramrod_data *p_mpa_ramrod; + struct ecore_iwarp_info *iwarp_info; + struct ecore_sp_init_data init_data; + struct ecore_spq_entry *p_ent; + dma_addr_t async_output_phys; + dma_addr_t out_pdata_phys; + dma_addr_t in_pdata_phys; + struct ecore_rdma_qp *qp; + bool reject; + enum _ecore_status_t rc; + + if (!ep) + return ECORE_INVAL; + + qp = ep->qp; + reject = (qp == OSAL_NULL); + + OSAL_MEMSET(&init_data, 0, sizeof(init_data)); + init_data.cid = reject ? ep->tcp_cid : qp->icid; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + + if (ep->connect_mode == TCP_CONNECT_ACTIVE || !ep->event_cb) + init_data.comp_mode = ECORE_SPQ_MODE_CB; + else + init_data.comp_mode = ECORE_SPQ_MODE_EBLOCK; + + rc = ecore_sp_init_request(p_hwfn, &p_ent, + IWARP_RAMROD_CMD_ID_MPA_OFFLOAD, + PROTOCOLID_IWARP, &init_data); + + if (rc != ECORE_SUCCESS) + return rc; + + p_mpa_ramrod = &p_ent->ramrod.iwarp_mpa_offload; + out_pdata_phys = ep->ep_buffer_phys + + OFFSETOF(struct ecore_iwarp_ep_memory, out_pdata); + p_mpa_ramrod->common.outgoing_ulp_buffer.addr.hi = + DMA_HI_LE(out_pdata_phys); + p_mpa_ramrod->common.outgoing_ulp_buffer.addr.lo = + DMA_LO_LE(out_pdata_phys); + p_mpa_ramrod->common.outgoing_ulp_buffer.len = + ep->cm_info.private_data_len; + p_mpa_ramrod->common.crc_needed = p_hwfn->p_rdma_info->iwarp.crc_needed; + + p_mpa_ramrod->common.out_rq.ord = ep->cm_info.ord; + p_mpa_ramrod->common.out_rq.ird = ep->cm_info.ird; + + p_mpa_ramrod->tcp_cid = p_hwfn->hw_info.opaque_fid << 16 | ep->tcp_cid; + + in_pdata_phys = ep->ep_buffer_phys + + OFFSETOF(struct ecore_iwarp_ep_memory, in_pdata); + p_mpa_ramrod->tcp_connect_side = ep->connect_mode; + p_mpa_ramrod->incoming_ulp_buffer.addr.hi = + DMA_HI_LE(in_pdata_phys); + p_mpa_ramrod->incoming_ulp_buffer.addr.lo = + DMA_LO_LE(in_pdata_phys); + p_mpa_ramrod->incoming_ulp_buffer.len = + OSAL_CPU_TO_LE16(sizeof(ep->ep_buffer_virt->in_pdata)); + async_output_phys = ep->ep_buffer_phys + + OFFSETOF(struct ecore_iwarp_ep_memory, async_output); + p_mpa_ramrod->async_eqe_output_buf.hi = + DMA_HI_LE(async_output_phys); + p_mpa_ramrod->async_eqe_output_buf.lo = + DMA_LO_LE(async_output_phys); + p_mpa_ramrod->handle_for_async.hi = OSAL_CPU_TO_LE32(PTR_HI(ep)); + p_mpa_ramrod->handle_for_async.lo = OSAL_CPU_TO_LE32(PTR_LO(ep)); + + if (!reject) { + p_mpa_ramrod->shared_queue_addr.hi = + DMA_HI_LE(qp->shared_queue_phys_addr); + p_mpa_ramrod->shared_queue_addr.lo = + DMA_LO_LE(qp->shared_queue_phys_addr); + + p_mpa_ramrod->stats_counter_id = + RESC_START(p_hwfn, ECORE_RDMA_STATS_QUEUE) + + qp->stats_queue; + } else { + p_mpa_ramrod->common.reject = 1; + } + + iwarp_info = &p_hwfn->p_rdma_info->iwarp; + p_mpa_ramrod->rcv_wnd = iwarp_info->rcv_wnd_size; + p_mpa_ramrod->mode = ep->mpa_rev; + SET_FIELD(p_mpa_ramrod->rtr_pref, + IWARP_MPA_OFFLOAD_RAMROD_DATA_RTR_SUPPORTED, + ep->rtr_type); + + ep->state = ECORE_IWARP_EP_MPA_OFFLOADED; + rc = ecore_spq_post(p_hwfn, p_ent, OSAL_NULL); + if (!reject) + ep->cid = qp->icid; /* Now they're migrated. */ + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "QP(0x%x) EP(0x%x) MPA Offload rc = %d IRD=0x%x ORD=0x%x rtr_type=%d mpa_rev=%d reject=%d\n", + reject ? 0xffff : qp->icid, ep->tcp_cid, rc, ep->cm_info.ird, + ep->cm_info.ord, ep->rtr_type, ep->mpa_rev, reject); + return rc; +} + +static void +ecore_iwarp_mpa_received(struct ecore_hwfn *p_hwfn, + struct ecore_iwarp_ep *ep) +{ + struct ecore_iwarp_info *iwarp_info = &p_hwfn->p_rdma_info->iwarp; + struct ecore_iwarp_cm_event_params params; + struct mpa_v2_hdr *mpa_v2_params; + union async_output *async_data; + u16 mpa_ord, mpa_ird; + u8 mpa_hdr_size = 0; + u8 mpa_rev; + + async_data = &ep->ep_buffer_virt->async_output; + + mpa_rev = async_data->mpa_request.mpa_handshake_mode; + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "private_data_len=%x handshake_mode=%x private_data=(%x)\n", + async_data->mpa_request.ulp_data_len, + mpa_rev, + *((u32 *)((u8 *)ep->ep_buffer_virt->in_pdata))); + + if (ep->listener->state > ECORE_IWARP_LISTENER_STATE_UNPAUSE) { + /* MPA reject initiated by ecore */ + OSAL_MEMSET(&ep->cm_info, 0, sizeof(ep->cm_info)); + ep->event_cb = OSAL_NULL; + ecore_iwarp_mpa_offload(p_hwfn, ep); + return; + } + + if (mpa_rev == MPA_NEGOTIATION_TYPE_ENHANCED) { + if (iwarp_info->mpa_rev == MPA_NEGOTIATION_TYPE_BASIC) { + DP_ERR(p_hwfn, "MPA_NEGOTIATE Received MPA rev 2 on driver supporting only MPA rev 1\n"); + /* MPA_REV2 ToDo: close the tcp connection. */ + return; + } + + /* Read ord/ird values from private data buffer */ + mpa_v2_params = + (struct mpa_v2_hdr *)(ep->ep_buffer_virt->in_pdata); + mpa_hdr_size = sizeof(*mpa_v2_params); + + mpa_ord = ntohs(mpa_v2_params->ord); + mpa_ird = ntohs(mpa_v2_params->ird); + + /* Temprary store in cm_info incoming ord/ird requested, later + * replace with negotiated value during accept + */ + ep->cm_info.ord = (u8)OSAL_MIN_T(u16, + (mpa_ord & MPA_V2_IRD_ORD_MASK), + ECORE_IWARP_ORD_DEFAULT); + + ep->cm_info.ird = (u8)OSAL_MIN_T(u16, + (mpa_ird & MPA_V2_IRD_ORD_MASK), + ECORE_IWARP_IRD_DEFAULT); + + /* Peer2Peer negotiation */ + ep->rtr_type = MPA_RTR_TYPE_NONE; + if (mpa_ird & MPA_V2_PEER2PEER_MODEL) { + if (mpa_ord & MPA_V2_WRITE_RTR) + ep->rtr_type |= MPA_RTR_TYPE_ZERO_WRITE; + + if (mpa_ord & MPA_V2_READ_RTR) + ep->rtr_type |= MPA_RTR_TYPE_ZERO_READ; + + if (mpa_ird & MPA_V2_SEND_RTR) + ep->rtr_type |= MPA_RTR_TYPE_ZERO_SEND; + + ep->rtr_type &= iwarp_info->rtr_type; + /* if we're left with no match send our capabilities */ + if (ep->rtr_type == MPA_RTR_TYPE_NONE) + ep->rtr_type = iwarp_info->rtr_type; + + /* prioritize write over send and read */ + if (ep->rtr_type & MPA_RTR_TYPE_ZERO_WRITE) + ep->rtr_type = MPA_RTR_TYPE_ZERO_WRITE; + } + + ep->mpa_rev = MPA_NEGOTIATION_TYPE_ENHANCED; + } else { + ep->cm_info.ord = ECORE_IWARP_ORD_DEFAULT; + ep->cm_info.ird = ECORE_IWARP_IRD_DEFAULT; + ep->mpa_rev = MPA_NEGOTIATION_TYPE_BASIC; + } + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "MPA_NEGOTIATE (v%d): ORD: 0x%x IRD: 0x%x rtr:0x%x ulp_data_len = %x mpa_hdr_size = %x\n", + mpa_rev, ep->cm_info.ord, ep->cm_info.ird, ep->rtr_type, + async_data->mpa_request.ulp_data_len, + mpa_hdr_size); + + /* Strip mpa v2 hdr from private data before sending to upper layer */ + ep->cm_info.private_data = + ep->ep_buffer_virt->in_pdata + mpa_hdr_size; + + ep->cm_info.private_data_len = + async_data->mpa_request.ulp_data_len - mpa_hdr_size; + + params.event = ECORE_IWARP_EVENT_MPA_REQUEST; + params.cm_info = &ep->cm_info; + params.ep_context = ep; + params.status = ECORE_SUCCESS; + + ep->state = ECORE_IWARP_EP_MPA_REQ_RCVD; + ep->event_cb(ep->cb_context, ¶ms); +} + +static void +ecore_iwarp_move_to_ep_list(struct ecore_hwfn *p_hwfn, + osal_list_t *list, struct ecore_iwarp_ep *ep) +{ + OSAL_SPIN_LOCK(&ep->listener->lock); + OSAL_LIST_REMOVE_ENTRY(&ep->list_entry, &ep->listener->ep_list); + OSAL_SPIN_UNLOCK(&ep->listener->lock); + OSAL_SPIN_LOCK(&p_hwfn->p_rdma_info->iwarp.iw_lock); + OSAL_LIST_PUSH_TAIL(&ep->list_entry, list); + OSAL_SPIN_UNLOCK(&p_hwfn->p_rdma_info->iwarp.iw_lock); +} + +static void +ecore_iwarp_return_ep(struct ecore_hwfn *p_hwfn, + struct ecore_iwarp_ep *ep) +{ + ep->state = ECORE_IWARP_EP_INIT; + if (ep->qp) + ep->qp->ep = OSAL_NULL; + ep->qp = OSAL_NULL; + OSAL_MEMSET(&ep->cm_info, 0, sizeof(ep->cm_info)); + + if (ep->tcp_cid == ECORE_IWARP_INVALID_TCP_CID) { + /* We don't care about the return code, it's ok if tcp_cid + * remains invalid...in this case we'll defer allocation + */ + ecore_iwarp_alloc_tcp_cid(p_hwfn, &ep->tcp_cid); + } + + ecore_iwarp_move_to_ep_list(p_hwfn, + &p_hwfn->p_rdma_info->iwarp.ep_free_list, + ep); +} + +static void +ecore_iwarp_parse_private_data(struct ecore_hwfn *p_hwfn, + struct ecore_iwarp_ep *ep) +{ + struct mpa_v2_hdr *mpa_v2_params; + union async_output *async_data; + u16 mpa_ird, mpa_ord; + u8 mpa_data_size = 0; + + if (MPA_REV2(p_hwfn->p_rdma_info->iwarp.mpa_rev)) { + mpa_v2_params = (struct mpa_v2_hdr *) + ((u8 *)ep->ep_buffer_virt->in_pdata); + mpa_data_size = sizeof(*mpa_v2_params); + mpa_ird = ntohs(mpa_v2_params->ird); + mpa_ord = ntohs(mpa_v2_params->ord); + + ep->cm_info.ird = (u8)(mpa_ord & MPA_V2_IRD_ORD_MASK); + ep->cm_info.ord = (u8)(mpa_ird & MPA_V2_IRD_ORD_MASK); + } /* else: Ord / Ird already configured */ + + async_data = &ep->ep_buffer_virt->async_output; + + ep->cm_info.private_data = ep->ep_buffer_virt->in_pdata + mpa_data_size; + ep->cm_info.private_data_len = + async_data->mpa_response.ulp_data_len - mpa_data_size; +} + +static void +ecore_iwarp_mpa_reply_arrived(struct ecore_hwfn *p_hwfn, + struct ecore_iwarp_ep *ep) +{ + struct ecore_iwarp_cm_event_params params; + + if (ep->connect_mode == TCP_CONNECT_PASSIVE) { + DP_NOTICE(p_hwfn, true, "MPA reply event not expected on passive side!\n"); + return; + } + + params.event = ECORE_IWARP_EVENT_ACTIVE_MPA_REPLY; + + ecore_iwarp_parse_private_data(p_hwfn, ep); + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "MPA_NEGOTIATE (v%d): ORD: 0x%x IRD: 0x%x\n", + ep->mpa_rev, ep->cm_info.ord, ep->cm_info.ird); + + params.cm_info = &ep->cm_info; + params.ep_context = ep; + params.status = ECORE_SUCCESS; + + ep->mpa_reply_processed = true; + + ep->event_cb(ep->cb_context, ¶ms); +} + +#define ECORE_IWARP_CONNECT_MODE_STRING(ep) \ + (ep->connect_mode == TCP_CONNECT_PASSIVE) ? "Passive" : "Active" + +/* Called as a result of the event: + * IWARP_EVENT_TYPE_ASYNC_MPA_HANDSHAKE_COMPLETE + */ +static void +ecore_iwarp_mpa_complete(struct ecore_hwfn *p_hwfn, + struct ecore_iwarp_ep *ep, + u8 fw_return_code) +{ + struct ecore_iwarp_cm_event_params params; + + if (ep->connect_mode == TCP_CONNECT_ACTIVE) + params.event = ECORE_IWARP_EVENT_ACTIVE_COMPLETE; + else + params.event = ECORE_IWARP_EVENT_PASSIVE_COMPLETE; + + if (ep->connect_mode == TCP_CONNECT_ACTIVE && + !ep->mpa_reply_processed) { + ecore_iwarp_parse_private_data(p_hwfn, ep); + } + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "MPA_NEGOTIATE (v%d): ORD: 0x%x IRD: 0x%x\n", + ep->mpa_rev, ep->cm_info.ord, ep->cm_info.ird); + + params.cm_info = &ep->cm_info; + + params.ep_context = ep; + + if ((ep->connect_mode == TCP_CONNECT_PASSIVE) && + (ep->state != ECORE_IWARP_EP_MPA_OFFLOADED)) { + /* This is a FW bug. Shouldn't get complete without offload */ + DP_NOTICE(p_hwfn, false, "%s(0x%x) ERROR: Got MPA complete without MPA offload fw_return_code=%d ep->state=%d\n", + ECORE_IWARP_CONNECT_MODE_STRING(ep), ep->tcp_cid, + fw_return_code, ep->state); + ep->state = ECORE_IWARP_EP_CLOSED; + return; + } + + if ((ep->connect_mode == TCP_CONNECT_PASSIVE) && + (ep->state == ECORE_IWARP_EP_ABORTING)) + return; + + ep->state = ECORE_IWARP_EP_CLOSED; + + switch (fw_return_code) { + case RDMA_RETURN_OK: + ep->qp->max_rd_atomic_req = ep->cm_info.ord; + ep->qp->max_rd_atomic_resp = ep->cm_info.ird; + ecore_iwarp_modify_qp(p_hwfn, ep->qp, + ECORE_IWARP_QP_STATE_RTS, + 1); + ep->state = ECORE_IWARP_EP_ESTABLISHED; + params.status = ECORE_SUCCESS; + break; + case IWARP_CONN_ERROR_MPA_TIMEOUT: + DP_NOTICE(p_hwfn, false, "%s(0x%x) MPA timeout\n", + ECORE_IWARP_CONNECT_MODE_STRING(ep), ep->cid); + params.status = ECORE_TIMEOUT; + break; + case IWARP_CONN_ERROR_MPA_ERROR_REJECT: + DP_NOTICE(p_hwfn, false, "%s(0x%x) MPA Reject\n", + ECORE_IWARP_CONNECT_MODE_STRING(ep), ep->cid); + params.status = ECORE_CONN_REFUSED; + break; + case IWARP_CONN_ERROR_MPA_RST: + DP_NOTICE(p_hwfn, false, "%s(0x%x) MPA reset(tcp cid: 0x%x)\n", + ECORE_IWARP_CONNECT_MODE_STRING(ep), ep->cid, + ep->tcp_cid); + params.status = ECORE_CONN_RESET; + break; + case IWARP_CONN_ERROR_MPA_FIN: + DP_NOTICE(p_hwfn, false, "%s(0x%x) MPA received FIN\n", + ECORE_IWARP_CONNECT_MODE_STRING(ep), ep->cid); + params.status = ECORE_CONN_REFUSED; + break; + case IWARP_CONN_ERROR_MPA_INSUF_IRD: + DP_NOTICE(p_hwfn, false, "%s(0x%x) MPA insufficient ird\n", + ECORE_IWARP_CONNECT_MODE_STRING(ep), ep->cid); + params.status = ECORE_CONN_REFUSED; + break; + case IWARP_CONN_ERROR_MPA_RTR_MISMATCH: + DP_NOTICE(p_hwfn, false, "%s(0x%x) MPA RTR MISMATCH\n", + ECORE_IWARP_CONNECT_MODE_STRING(ep), ep->cid); + params.status = ECORE_CONN_REFUSED; + break; + case IWARP_CONN_ERROR_MPA_INVALID_PACKET: + DP_NOTICE(p_hwfn, false, "%s(0x%x) MPA Invalid Packet\n", + ECORE_IWARP_CONNECT_MODE_STRING(ep), ep->cid); + params.status = ECORE_CONN_REFUSED; + break; + case IWARP_CONN_ERROR_MPA_LOCAL_ERROR: + DP_NOTICE(p_hwfn, false, "%s(0x%x) MPA Local Error\n", + ECORE_IWARP_CONNECT_MODE_STRING(ep), ep->cid); + params.status = ECORE_CONN_REFUSED; + break; + case IWARP_CONN_ERROR_MPA_TERMINATE: + DP_NOTICE(p_hwfn, false, "%s(0x%x) MPA TERMINATE\n", + ECORE_IWARP_CONNECT_MODE_STRING(ep), ep->cid); + params.status = ECORE_CONN_REFUSED; + break; + default: + params.status = ECORE_CONN_RESET; + break; + } + + if (ep->event_cb) + ep->event_cb(ep->cb_context, ¶ms); + + /* on passive side, if there is no associated QP (REJECT) we need to + * return the ep to the pool, otherwise we wait for QP to release it. + * Since we add an element in accept instead of this one. in anycase + * we need to remove it from the ep_list (active connections)... + */ + if (fw_return_code != RDMA_RETURN_OK) { + ep->tcp_cid = ECORE_IWARP_INVALID_TCP_CID; + if ((ep->connect_mode == TCP_CONNECT_PASSIVE) && + (ep->qp == OSAL_NULL)) { /* Rejected */ + ecore_iwarp_return_ep(p_hwfn, ep); + } else { + OSAL_SPIN_LOCK(&p_hwfn->p_rdma_info->iwarp.iw_lock); + OSAL_LIST_REMOVE_ENTRY( + &ep->list_entry, + &p_hwfn->p_rdma_info->iwarp.ep_list); + OSAL_SPIN_UNLOCK(&p_hwfn->p_rdma_info->iwarp.iw_lock); + } + } +} + +static void +ecore_iwarp_mpa_v2_set_private(struct ecore_hwfn *p_hwfn, + struct ecore_iwarp_ep *ep, + u8 *mpa_data_size) +{ + struct mpa_v2_hdr *mpa_v2_params; + u16 mpa_ird, mpa_ord; + + *mpa_data_size = 0; + if (MPA_REV2(ep->mpa_rev)) { + mpa_v2_params = + (struct mpa_v2_hdr *)ep->ep_buffer_virt->out_pdata; + *mpa_data_size = sizeof(*mpa_v2_params); + + mpa_ird = (u16)ep->cm_info.ird; + mpa_ord = (u16)ep->cm_info.ord; + + if (ep->rtr_type != MPA_RTR_TYPE_NONE) { + mpa_ird |= MPA_V2_PEER2PEER_MODEL; + + if (ep->rtr_type & MPA_RTR_TYPE_ZERO_SEND) + mpa_ird |= MPA_V2_SEND_RTR; + + if (ep->rtr_type & MPA_RTR_TYPE_ZERO_WRITE) + mpa_ord |= MPA_V2_WRITE_RTR; + + if (ep->rtr_type & MPA_RTR_TYPE_ZERO_READ) + mpa_ord |= MPA_V2_READ_RTR; + } + + mpa_v2_params->ird = htons(mpa_ird); + mpa_v2_params->ord = htons(mpa_ord); + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "MPA_NEGOTIATE Header: [%x ord:%x ird] %x ord:%x ird:%x peer2peer:%x rtr_send:%x rtr_write:%x rtr_read:%x\n", + mpa_v2_params->ird, + mpa_v2_params->ord, + *((u32 *)mpa_v2_params), + mpa_ord & MPA_V2_IRD_ORD_MASK, + mpa_ird & MPA_V2_IRD_ORD_MASK, + !!(mpa_ird & MPA_V2_PEER2PEER_MODEL), + !!(mpa_ird & MPA_V2_SEND_RTR), + !!(mpa_ord & MPA_V2_WRITE_RTR), + !!(mpa_ord & MPA_V2_READ_RTR)); + } +} + +enum _ecore_status_t +ecore_iwarp_connect(void *rdma_cxt, + struct ecore_iwarp_connect_in *iparams, + struct ecore_iwarp_connect_out *oparams) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)rdma_cxt; + struct ecore_iwarp_info *iwarp_info; + struct ecore_iwarp_ep *ep; + enum _ecore_status_t rc; + u8 mpa_data_size = 0; + u8 ts_hdr_size = 0; + u32 cid; + + if ((iparams->cm_info.ord > ECORE_IWARP_ORD_DEFAULT) || + (iparams->cm_info.ird > ECORE_IWARP_IRD_DEFAULT)) { + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "QP(0x%x) ERROR: Invalid ord(0x%x)/ird(0x%x)\n", + iparams->qp->icid, iparams->cm_info.ord, + iparams->cm_info.ird); + + return ECORE_INVAL; + } + + iwarp_info = &p_hwfn->p_rdma_info->iwarp; + + /* Allocate ep object */ + rc = ecore_iwarp_alloc_cid(p_hwfn, &cid); + if (rc != ECORE_SUCCESS) + return rc; + + if (iparams->qp->ep == OSAL_NULL) { + rc = ecore_iwarp_create_ep(p_hwfn, &ep); + if (rc != ECORE_SUCCESS) + return rc; + } else { + ep = iparams->qp->ep; + DP_ERR(p_hwfn, "Note re-use of QP for different connect\n"); + ep->state = ECORE_IWARP_EP_INIT; + } + + ep->tcp_cid = cid; + + OSAL_SPIN_LOCK(&p_hwfn->p_rdma_info->iwarp.iw_lock); + OSAL_LIST_PUSH_TAIL(&ep->list_entry, + &p_hwfn->p_rdma_info->iwarp.ep_list); + OSAL_SPIN_UNLOCK(&p_hwfn->p_rdma_info->iwarp.iw_lock); + + ep->qp = iparams->qp; + ep->qp->ep = ep; + OSAL_MEMCPY(ep->remote_mac_addr, + iparams->remote_mac_addr, + ETH_ALEN); + OSAL_MEMCPY(ep->local_mac_addr, + iparams->local_mac_addr, + ETH_ALEN); + OSAL_MEMCPY(&ep->cm_info, &iparams->cm_info, sizeof(ep->cm_info)); + + ep->cm_info.ord = iparams->cm_info.ord; + ep->cm_info.ird = iparams->cm_info.ird; + + ep->rtr_type = iwarp_info->rtr_type; + if (iwarp_info->peer2peer == 0) + ep->rtr_type = MPA_RTR_TYPE_NONE; + + if ((ep->rtr_type & MPA_RTR_TYPE_ZERO_READ) && + (ep->cm_info.ord == 0)) + ep->cm_info.ord = 1; + + ep->mpa_rev = iwarp_info->mpa_rev; + + ecore_iwarp_mpa_v2_set_private(p_hwfn, ep, &mpa_data_size); + + ep->cm_info.private_data = (u8 *)ep->ep_buffer_virt->out_pdata; + ep->cm_info.private_data_len = + iparams->cm_info.private_data_len + mpa_data_size; + + OSAL_MEMCPY((u8 *)(u8 *)ep->ep_buffer_virt->out_pdata + mpa_data_size, + iparams->cm_info.private_data, + iparams->cm_info.private_data_len); + + if (p_hwfn->p_rdma_info->iwarp.tcp_flags & ECORE_IWARP_TS_EN) + ts_hdr_size = TIMESTAMP_HEADER_SIZE; + + ep->mss = iparams->mss - ts_hdr_size; + ep->mss = OSAL_MIN_T(u16, ECORE_IWARP_MAX_FW_MSS, ep->mss); + + ep->event_cb = iparams->event_cb; + ep->cb_context = iparams->cb_context; + ep->connect_mode = TCP_CONNECT_ACTIVE; + + oparams->ep_context = ep; + + rc = ecore_iwarp_tcp_offload(p_hwfn, ep); + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "QP(0x%x) EP(0x%x) rc = %d\n", + iparams->qp->icid, ep->tcp_cid, rc); + + if (rc != ECORE_SUCCESS) + ecore_iwarp_destroy_ep(p_hwfn, ep, true); + + return rc; +} + +static struct ecore_iwarp_ep * +ecore_iwarp_get_free_ep(struct ecore_hwfn *p_hwfn) +{ + struct ecore_iwarp_ep *ep = OSAL_NULL; + enum _ecore_status_t rc; + + OSAL_SPIN_LOCK(&p_hwfn->p_rdma_info->iwarp.iw_lock); + + if (OSAL_LIST_IS_EMPTY(&p_hwfn->p_rdma_info->iwarp.ep_free_list)) { + DP_ERR(p_hwfn, "Ep list is empty\n"); + goto out; + } + + ep = OSAL_LIST_FIRST_ENTRY(&p_hwfn->p_rdma_info->iwarp.ep_free_list, + struct ecore_iwarp_ep, + list_entry); + + /* in some cases we could have failed allocating a tcp cid when added + * from accept / failure... retry now..this is not the common case. + */ + if (ep->tcp_cid == ECORE_IWARP_INVALID_TCP_CID) { + rc = ecore_iwarp_alloc_tcp_cid(p_hwfn, &ep->tcp_cid); + /* if we fail we could look for another entry with a valid + * tcp_cid, but since we don't expect to reach this anyway + * it's not worth the handling + */ + if (rc) { + ep->tcp_cid = ECORE_IWARP_INVALID_TCP_CID; + ep = OSAL_NULL; + goto out; + } + } + + OSAL_LIST_REMOVE_ENTRY(&ep->list_entry, + &p_hwfn->p_rdma_info->iwarp.ep_free_list); + +out: + OSAL_SPIN_UNLOCK(&p_hwfn->p_rdma_info->iwarp.iw_lock); + return ep; +} + +/* takes into account timer scan ~20 ms and interrupt/dpc overhead */ +#define ECORE_IWARP_MAX_CID_CLEAN_TIME 100 +/* Technically we shouldn't reach this count with 100 ms iteration sleep */ +#define ECORE_IWARP_MAX_NO_PROGRESS_CNT 5 + +/* This function waits for all the bits of a bmap to be cleared, as long as + * there is progress ( i.e. the number of bits left to be cleared decreases ) + * the function continues. + */ +static enum _ecore_status_t +ecore_iwarp_wait_cid_map_cleared(struct ecore_hwfn *p_hwfn, + struct ecore_bmap *bmap) +{ + int prev_weight = 0; + int wait_count = 0; + int weight = 0; + + weight = OSAL_BITMAP_WEIGHT(bmap->bitmap, bmap->max_count); + prev_weight = weight; + + while (weight) { + OSAL_MSLEEP(ECORE_IWARP_MAX_CID_CLEAN_TIME); + + weight = OSAL_BITMAP_WEIGHT(bmap->bitmap, bmap->max_count); + + if (prev_weight == weight) { + wait_count++; + } else { + prev_weight = weight; + wait_count = 0; + } + + if (wait_count > ECORE_IWARP_MAX_NO_PROGRESS_CNT) { + DP_NOTICE(p_hwfn, false, + "%s bitmap wait timed out (%d cids pending)\n", + bmap->name, weight); + return ECORE_TIMEOUT; + } + } + return ECORE_SUCCESS; +} + +static enum _ecore_status_t +ecore_iwarp_wait_for_all_cids(struct ecore_hwfn *p_hwfn) +{ + enum _ecore_status_t rc; + int i; + + rc = ecore_iwarp_wait_cid_map_cleared( + p_hwfn, &p_hwfn->p_rdma_info->tcp_cid_map); + if (rc) + return rc; + + /* Now free the tcp cids from the main cid map */ + for (i = 0; i < ECORE_IWARP_PREALLOC_CNT; i++) { + ecore_bmap_release_id(p_hwfn, + &p_hwfn->p_rdma_info->cid_map, + i); + } + + /* Now wait for all cids to be completed */ + rc = ecore_iwarp_wait_cid_map_cleared( + p_hwfn, &p_hwfn->p_rdma_info->cid_map); + + return rc; +} + +static void +ecore_iwarp_free_prealloc_ep(struct ecore_hwfn *p_hwfn) +{ + struct ecore_iwarp_ep *ep; + u32 cid; + + while (!OSAL_LIST_IS_EMPTY(&p_hwfn->p_rdma_info->iwarp.ep_free_list)) { + OSAL_SPIN_LOCK(&p_hwfn->p_rdma_info->iwarp.iw_lock); + + ep = OSAL_LIST_FIRST_ENTRY( + &p_hwfn->p_rdma_info->iwarp.ep_free_list, + struct ecore_iwarp_ep, list_entry); + + if (ep == OSAL_NULL) { + OSAL_SPIN_UNLOCK(&p_hwfn->p_rdma_info->iwarp.iw_lock); + break; + } + +#ifdef _NTDDK_ +#pragma warning(suppress : 6011) +#endif + OSAL_LIST_REMOVE_ENTRY( + &ep->list_entry, + &p_hwfn->p_rdma_info->iwarp.ep_free_list); + + OSAL_SPIN_UNLOCK(&p_hwfn->p_rdma_info->iwarp.iw_lock); + + if (ep->tcp_cid != ECORE_IWARP_INVALID_TCP_CID) { + cid = ep->tcp_cid - ecore_cxt_get_proto_cid_start( + p_hwfn, p_hwfn->p_rdma_info->proto); + + OSAL_SPIN_LOCK(&p_hwfn->p_rdma_info->lock); + + ecore_bmap_release_id(p_hwfn, + &p_hwfn->p_rdma_info->tcp_cid_map, + cid); + + OSAL_SPIN_UNLOCK(&p_hwfn->p_rdma_info->lock); + } + + ecore_iwarp_destroy_ep(p_hwfn, ep, false); + } +} + +static enum _ecore_status_t +ecore_iwarp_prealloc_ep(struct ecore_hwfn *p_hwfn, bool init) +{ + struct ecore_iwarp_ep *ep; + int rc = ECORE_SUCCESS; + u32 cid; + int count; + int i; + + if (init) + count = ECORE_IWARP_PREALLOC_CNT; + else + count = 1; + + for (i = 0; i < count; i++) { + rc = ecore_iwarp_create_ep(p_hwfn, &ep); + if (rc != ECORE_SUCCESS) + return rc; + + /* During initialization we allocate from the main pool, + * afterwards we allocate only from the tcp_cid. + */ + if (init) { + rc = ecore_iwarp_alloc_cid(p_hwfn, &cid); + if (rc != ECORE_SUCCESS) + goto err; + ecore_iwarp_set_tcp_cid(p_hwfn, cid); + } else { + /* We don't care about the return code, it's ok if + * tcp_cid remains invalid...in this case we'll + * defer allocation + */ + ecore_iwarp_alloc_tcp_cid(p_hwfn, &cid); + } + + ep->tcp_cid = cid; + + OSAL_SPIN_LOCK(&p_hwfn->p_rdma_info->iwarp.iw_lock); + OSAL_LIST_PUSH_TAIL(&ep->list_entry, + &p_hwfn->p_rdma_info->iwarp.ep_free_list); + OSAL_SPIN_UNLOCK(&p_hwfn->p_rdma_info->iwarp.iw_lock); + } + + return rc; + +err: + ecore_iwarp_destroy_ep(p_hwfn, ep, false); + + return rc; +} + +enum _ecore_status_t +ecore_iwarp_alloc(struct ecore_hwfn *p_hwfn) +{ + enum _ecore_status_t rc; + +#ifdef CONFIG_ECORE_LOCK_ALLOC + OSAL_SPIN_LOCK_ALLOC(p_hwfn, &p_hwfn->p_rdma_info->iwarp.iw_lock); + OSAL_SPIN_LOCK_ALLOC(p_hwfn, &p_hwfn->p_rdma_info->iwarp.qp_lock); +#endif + OSAL_SPIN_LOCK_INIT(&p_hwfn->p_rdma_info->iwarp.iw_lock); + OSAL_SPIN_LOCK_INIT(&p_hwfn->p_rdma_info->iwarp.qp_lock); + + /* Allocate bitmap for tcp cid. These are used by passive side + * to ensure it can allocate a tcp cid during dpc that was + * pre-acquired and doesn't require dynamic allocation of ilt + */ + rc = ecore_rdma_bmap_alloc(p_hwfn, &p_hwfn->p_rdma_info->tcp_cid_map, + ECORE_IWARP_PREALLOC_CNT, + "TCP_CID"); + if (rc != ECORE_SUCCESS) { + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "Failed to allocate tcp cid, rc = %d\n", + rc); + return rc; + } + + OSAL_LIST_INIT(&p_hwfn->p_rdma_info->iwarp.ep_free_list); +//DAVIDS OSAL_SPIN_LOCK_INIT(&p_hwfn->p_rdma_info->iwarp.iw_lock); + rc = ecore_iwarp_prealloc_ep(p_hwfn, true); + if (rc != ECORE_SUCCESS) { + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "ecore_iwarp_prealloc_ep failed, rc = %d\n", + rc); + return rc; + } + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "ecore_iwarp_prealloc_ep success, rc = %d\n", + rc); + + return ecore_ooo_alloc(p_hwfn); +} + +void +ecore_iwarp_resc_free(struct ecore_hwfn *p_hwfn) +{ + struct ecore_iwarp_info *iwarp_info = &p_hwfn->p_rdma_info->iwarp; + +#ifdef CONFIG_ECORE_LOCK_ALLOC + OSAL_SPIN_LOCK_DEALLOC(iwarp_info->iw_lock); + OSAL_SPIN_LOCK_DEALLOC(iwarp_info->qp_lock); +#endif + ecore_ooo_free(p_hwfn); + if (iwarp_info->partial_fpdus) + OSAL_FREE(p_hwfn->p_dev, iwarp_info->partial_fpdus); + if (iwarp_info->mpa_bufs) + OSAL_FREE(p_hwfn->p_dev, iwarp_info->mpa_bufs); + if (iwarp_info->mpa_intermediate_buf) + OSAL_FREE(p_hwfn->p_dev, iwarp_info->mpa_intermediate_buf); + + ecore_rdma_bmap_free(p_hwfn, &p_hwfn->p_rdma_info->tcp_cid_map, 1); +} + + +enum _ecore_status_t +ecore_iwarp_accept(void *rdma_cxt, + struct ecore_iwarp_accept_in *iparams) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)rdma_cxt; + struct ecore_iwarp_ep *ep; + u8 mpa_data_size = 0; + enum _ecore_status_t rc; + + ep = (struct ecore_iwarp_ep *)iparams->ep_context; + if (!ep) { + DP_ERR(p_hwfn, "Ep Context receive in accept is NULL\n"); + return ECORE_INVAL; + } + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "QP(0x%x) EP(0x%x)\n", + iparams->qp->icid, ep->tcp_cid); + + if ((iparams->ord > ECORE_IWARP_ORD_DEFAULT) || + (iparams->ird > ECORE_IWARP_IRD_DEFAULT)) { + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "QP(0x%x) EP(0x%x) ERROR: Invalid ord(0x%x)/ird(0x%x)\n", + iparams->qp->icid, ep->tcp_cid, + iparams->ord, iparams->ord); + return ECORE_INVAL; + } + + /* We could reach qp->ep != OSAL NULL if we do accept on the same qp */ + if (iparams->qp->ep == OSAL_NULL) { + /* We need to add a replacement for the ep to the free list */ + ecore_iwarp_prealloc_ep(p_hwfn, false); + } else { + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "Note re-use of QP for different connect\n"); + /* Return the old ep to the free_pool */ + ecore_iwarp_return_ep(p_hwfn, iparams->qp->ep); + } + + ecore_iwarp_move_to_ep_list(p_hwfn, + &p_hwfn->p_rdma_info->iwarp.ep_list, + ep); + ep->listener = OSAL_NULL; + ep->cb_context = iparams->cb_context; + ep->qp = iparams->qp; + ep->qp->ep = ep; + + if (ep->mpa_rev == MPA_NEGOTIATION_TYPE_ENHANCED) { + /* Negotiate ord/ird: if upperlayer requested ord larger than + * ird advertised by remote, we need to decrease our ord + * to match remote ord + */ + if (iparams->ord > ep->cm_info.ird) { + iparams->ord = ep->cm_info.ird; + } + + /* For chelsio compatability, if rtr_zero read is requested + * we can't set ird to zero + */ + if ((ep->rtr_type & MPA_RTR_TYPE_ZERO_READ) && + (iparams->ird == 0)) + iparams->ird = 1; + } + + /* Update cm_info ord/ird to be negotiated values */ + ep->cm_info.ord = iparams->ord; + ep->cm_info.ird = iparams->ird; + + ecore_iwarp_mpa_v2_set_private(p_hwfn, ep, &mpa_data_size); + + ep->cm_info.private_data = ep->ep_buffer_virt->out_pdata; + ep->cm_info.private_data_len = + iparams->private_data_len + mpa_data_size; + + OSAL_MEMCPY((u8 *)ep->ep_buffer_virt->out_pdata + mpa_data_size, + iparams->private_data, + iparams->private_data_len); + + if (ep->state == ECORE_IWARP_EP_CLOSED) { + DP_NOTICE(p_hwfn, false, + "(0x%x) Accept called on EP in CLOSED state\n", + ep->tcp_cid); + ep->tcp_cid = ECORE_IWARP_INVALID_TCP_CID; + ecore_iwarp_return_ep(p_hwfn, ep); + return ECORE_CONN_RESET; + } + + rc = ecore_iwarp_mpa_offload(p_hwfn, ep); + if (rc) { + ecore_iwarp_modify_qp(p_hwfn, + iparams->qp, + ECORE_IWARP_QP_STATE_ERROR, + 1); + } + + return rc; +} + +enum _ecore_status_t +ecore_iwarp_reject(void *rdma_cxt, + struct ecore_iwarp_reject_in *iparams) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)rdma_cxt; + struct ecore_iwarp_ep *ep; + u8 mpa_data_size = 0; + enum _ecore_status_t rc; + + ep = (struct ecore_iwarp_ep *)iparams->ep_context; + if (!ep) { + DP_ERR(p_hwfn, "Ep Context receive in reject is NULL\n"); + return ECORE_INVAL; + } + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "EP(0x%x)\n", ep->tcp_cid); + + ep->cb_context = iparams->cb_context; + ep->qp = OSAL_NULL; + + ecore_iwarp_mpa_v2_set_private(p_hwfn, ep, &mpa_data_size); + + ep->cm_info.private_data = ep->ep_buffer_virt->out_pdata; + ep->cm_info.private_data_len = + iparams->private_data_len + mpa_data_size; + + OSAL_MEMCPY((u8 *)ep->ep_buffer_virt->out_pdata + mpa_data_size, + iparams->private_data, + iparams->private_data_len); + + if (ep->state == ECORE_IWARP_EP_CLOSED) { + DP_NOTICE(p_hwfn, false, + "(0x%x) Reject called on EP in CLOSED state\n", + ep->tcp_cid); + ep->tcp_cid = ECORE_IWARP_INVALID_TCP_CID; + ecore_iwarp_return_ep(p_hwfn, ep); + return ECORE_CONN_RESET; + } + + rc = ecore_iwarp_mpa_offload(p_hwfn, ep); + return rc; +} + +static void +ecore_iwarp_print_cm_info(struct ecore_hwfn *p_hwfn, + struct ecore_iwarp_cm_info *cm_info) +{ + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "ip_version = %d\n", + cm_info->ip_version); + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "remote_ip %x.%x.%x.%x\n", + cm_info->remote_ip[0], + cm_info->remote_ip[1], + cm_info->remote_ip[2], + cm_info->remote_ip[3]); + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "local_ip %x.%x.%x.%x\n", + cm_info->local_ip[0], + cm_info->local_ip[1], + cm_info->local_ip[2], + cm_info->local_ip[3]); + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "remote_port = %x\n", + cm_info->remote_port); + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "local_port = %x\n", + cm_info->local_port); + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "vlan = %x\n", + cm_info->vlan); + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "private_data_len = %x\n", + cm_info->private_data_len); + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "ord = %d\n", + cm_info->ord); + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "ird = %d\n", + cm_info->ird); +} + +static int +ecore_iwarp_ll2_post_rx(struct ecore_hwfn *p_hwfn, + struct ecore_iwarp_ll2_buff *buf, + u8 handle) +{ + enum _ecore_status_t rc; + + rc = ecore_ll2_post_rx_buffer( + p_hwfn, + handle, + buf->data_phys_addr, + (u16)buf->buff_size, + buf, 1); + + if (rc) { + DP_NOTICE(p_hwfn, false, + "Failed to repost rx buffer to ll2 rc = %d, handle=%d\n", + rc, handle); + OSAL_DMA_FREE_COHERENT( + p_hwfn->p_dev, + buf->data, + buf->data_phys_addr, + buf->buff_size); + OSAL_FREE(p_hwfn->p_dev, buf); + } + + return rc; +} + +static bool +ecore_iwarp_ep_exists(struct ecore_hwfn *p_hwfn, + struct ecore_iwarp_listener *listener, + struct ecore_iwarp_cm_info *cm_info) +{ + struct ecore_iwarp_ep *ep = OSAL_NULL; + bool found = false; + + OSAL_SPIN_LOCK(&listener->lock); + OSAL_LIST_FOR_EACH_ENTRY(ep, &listener->ep_list, + list_entry, struct ecore_iwarp_ep) { + if ((ep->cm_info.local_port == cm_info->local_port) && + (ep->cm_info.remote_port == cm_info->remote_port) && + (ep->cm_info.vlan == cm_info->vlan) && + !OSAL_MEMCMP(&(ep->cm_info.local_ip), cm_info->local_ip, + sizeof(cm_info->local_ip)) && + !OSAL_MEMCMP(&(ep->cm_info.remote_ip), cm_info->remote_ip, + sizeof(cm_info->remote_ip))) { + found = true; + break; + } + } + + OSAL_SPIN_UNLOCK(&listener->lock); + + if (found) { + DP_NOTICE(p_hwfn, false, "SYN received on active connection - dropping\n"); + ecore_iwarp_print_cm_info(p_hwfn, cm_info); + + return true; + } + + return false; +} + +static struct ecore_iwarp_listener * +ecore_iwarp_get_listener(struct ecore_hwfn *p_hwfn, + struct ecore_iwarp_cm_info *cm_info) +{ + struct ecore_iwarp_listener *listener = OSAL_NULL; + static const u32 ip_zero[4] = {0, 0, 0, 0}; + bool found = false; + + ecore_iwarp_print_cm_info(p_hwfn, cm_info); + + OSAL_LIST_FOR_EACH_ENTRY(listener, + &p_hwfn->p_rdma_info->iwarp.listen_list, + list_entry, struct ecore_iwarp_listener) { + + if (listener->port == cm_info->local_port) { + /* Any IP (i.e. 0.0.0.0 ) will be treated as any vlan */ + if (!OSAL_MEMCMP(listener->ip_addr, + ip_zero, + sizeof(ip_zero))) { + found = true; + break; + } + + /* If not any IP -> check vlan as well */ + if (!OSAL_MEMCMP(listener->ip_addr, + cm_info->local_ip, + sizeof(cm_info->local_ip)) && + + (listener->vlan == cm_info->vlan)) { + found = true; + break; + } + } + } + + if (found && listener->state == ECORE_IWARP_LISTENER_STATE_ACTIVE) { + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "listener found = %p\n", + listener); + return listener; + } + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "listener not found\n"); + return OSAL_NULL; +} + +static enum _ecore_status_t +ecore_iwarp_parse_rx_pkt(struct ecore_hwfn *p_hwfn, + struct ecore_iwarp_cm_info *cm_info, + void *buf, + u8 *remote_mac_addr, + u8 *local_mac_addr, + int *payload_len, + int *tcp_start_offset) +{ + struct ecore_vlan_ethhdr *vethh; + struct ecore_ethhdr *ethh; + struct ecore_iphdr *iph; + struct ecore_ipv6hdr *ip6h; + struct ecore_tcphdr *tcph; + bool vlan_valid = false; + int eth_hlen, ip_hlen; + u16 eth_type; + int i; + + ethh = (struct ecore_ethhdr *)buf; + eth_type = ntohs(ethh->h_proto); + if (eth_type == ETH_P_8021Q) { + vlan_valid = true; + vethh = (struct ecore_vlan_ethhdr *)ethh; + cm_info->vlan = ntohs(vethh->h_vlan_TCI) & VLAN_VID_MASK; + eth_type = ntohs(vethh->h_vlan_encapsulated_proto); + } + + eth_hlen = ETH_HLEN + (vlan_valid ? sizeof(u32) : 0); + + OSAL_MEMCPY(remote_mac_addr, + ethh->h_source, + ETH_ALEN); + + OSAL_MEMCPY(local_mac_addr, + ethh->h_dest, + ETH_ALEN); + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "eth_type =%d Source mac: [0x%x]:[0x%x]:[0x%x]:[0x%x]:[0x%x]:[0x%x]\n", + eth_type, ethh->h_source[0], ethh->h_source[1], + ethh->h_source[2], ethh->h_source[3], + ethh->h_source[4], ethh->h_source[5]); + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "eth_hlen=%d destination mac: [0x%x]:[0x%x]:[0x%x]:[0x%x]:[0x%x]:[0x%x]\n", + eth_hlen, ethh->h_dest[0], ethh->h_dest[1], + ethh->h_dest[2], ethh->h_dest[3], + ethh->h_dest[4], ethh->h_dest[5]); + + iph = (struct ecore_iphdr *)((u8 *)(ethh) + eth_hlen); + + if (eth_type == ETH_P_IP) { + if (iph->protocol != IPPROTO_TCP) { + DP_NOTICE(p_hwfn, false, + "Unexpected ip protocol on ll2 %x\n", + iph->protocol); + return ECORE_INVAL; + } + + cm_info->local_ip[0] = ntohl(iph->daddr); + cm_info->remote_ip[0] = ntohl(iph->saddr); + cm_info->ip_version = (enum ecore_tcp_ip_version)TCP_IPV4; + + ip_hlen = (iph->ihl)*sizeof(u32); + *payload_len = ntohs(iph->tot_len) - ip_hlen; + + } else if (eth_type == ETH_P_IPV6) { + ip6h = (struct ecore_ipv6hdr *)iph; + + if (ip6h->nexthdr != IPPROTO_TCP) { + DP_NOTICE(p_hwfn, false, + "Unexpected ip protocol on ll2 %x\n", + iph->protocol); + return ECORE_INVAL; + } + + for (i = 0; i < 4; i++) { + cm_info->local_ip[i] = + ntohl(ip6h->daddr.in6_u.u6_addr32[i]); + cm_info->remote_ip[i] = + ntohl(ip6h->saddr.in6_u.u6_addr32[i]); + } + cm_info->ip_version = (enum ecore_tcp_ip_version)TCP_IPV6; + + ip_hlen = sizeof(*ip6h); + *payload_len = ntohs(ip6h->payload_len); + } else { + DP_NOTICE(p_hwfn, false, + "Unexpected ethertype on ll2 %x\n", eth_type); + return ECORE_INVAL; + } + + tcph = (struct ecore_tcphdr *)((u8 *)iph + ip_hlen); + + if (!tcph->syn) { + DP_NOTICE(p_hwfn, false, + "Only SYN type packet expected on this ll2 conn, iph->ihl=%d source=%d dest=%d\n", + iph->ihl, tcph->source, tcph->dest); + return ECORE_INVAL; + } + + cm_info->local_port = ntohs(tcph->dest); + cm_info->remote_port = ntohs(tcph->source); + + ecore_iwarp_print_cm_info(p_hwfn, cm_info); + + *tcp_start_offset = eth_hlen + ip_hlen; + + return ECORE_SUCCESS; +} + +static struct ecore_iwarp_fpdu * +ecore_iwarp_get_curr_fpdu(struct ecore_hwfn *p_hwfn, u16 cid) +{ + struct ecore_iwarp_info *iwarp_info = &p_hwfn->p_rdma_info->iwarp; + struct ecore_iwarp_fpdu *partial_fpdu; + u32 idx = cid - ecore_cxt_get_proto_cid_start(p_hwfn, PROTOCOLID_IWARP); + + if (idx >= iwarp_info->max_num_partial_fpdus) { + DP_ERR(p_hwfn, "Invalid cid %x max_num_partial_fpdus=%x\n", cid, + iwarp_info->max_num_partial_fpdus); + return OSAL_NULL; + } + + partial_fpdu = &iwarp_info->partial_fpdus[idx]; + + return partial_fpdu; +} + +enum ecore_iwarp_mpa_pkt_type { + ECORE_IWARP_MPA_PKT_PACKED, + ECORE_IWARP_MPA_PKT_PARTIAL, + ECORE_IWARP_MPA_PKT_UNALIGNED +}; + +#define ECORE_IWARP_INVALID_FPDU_LENGTH 0xffff +#define ECORE_IWARP_MPA_FPDU_LENGTH_SIZE (2) +#define ECORE_IWARP_MPA_CRC32_DIGEST_SIZE (4) + +/* Pad to multiple of 4 */ +#define ECORE_IWARP_PDU_DATA_LEN_WITH_PAD(data_len) (((data_len) + 3) & ~3) + +#define ECORE_IWARP_FPDU_LEN_WITH_PAD(_mpa_len) \ + (ECORE_IWARP_PDU_DATA_LEN_WITH_PAD(_mpa_len + \ + ECORE_IWARP_MPA_FPDU_LENGTH_SIZE) + \ + ECORE_IWARP_MPA_CRC32_DIGEST_SIZE) + +/* fpdu can be fragmented over maximum 3 bds: header, partial mpa, unaligned */ +#define ECORE_IWARP_MAX_BDS_PER_FPDU 3 + +char *pkt_type_str[] = { + "ECORE_IWARP_MPA_PKT_PACKED", + "ECORE_IWARP_MPA_PKT_PARTIAL", + "ECORE_IWARP_MPA_PKT_UNALIGNED" +}; + +static enum _ecore_status_t +ecore_iwarp_recycle_pkt(struct ecore_hwfn *p_hwfn, + struct ecore_iwarp_fpdu *fpdu, + struct ecore_iwarp_ll2_buff *buf); + +static enum ecore_iwarp_mpa_pkt_type +ecore_iwarp_mpa_classify(struct ecore_hwfn *p_hwfn, + struct ecore_iwarp_fpdu *fpdu, + u16 tcp_payload_len, + u8 *mpa_data) + +{ + enum ecore_iwarp_mpa_pkt_type pkt_type; + u16 mpa_len; + + if (fpdu->incomplete_bytes) { + pkt_type = ECORE_IWARP_MPA_PKT_UNALIGNED; + goto out; + } + + /* special case of one byte remaining... */ + if (tcp_payload_len == 1) { + /* lower byte will be read next packet */ + fpdu->fpdu_length = *mpa_data << 8; + pkt_type = ECORE_IWARP_MPA_PKT_PARTIAL; + goto out; + } + + mpa_len = ntohs(*((u16 *)(mpa_data))); + fpdu->fpdu_length = ECORE_IWARP_FPDU_LEN_WITH_PAD(mpa_len); + + if (fpdu->fpdu_length <= tcp_payload_len) + pkt_type = ECORE_IWARP_MPA_PKT_PACKED; + else + pkt_type = ECORE_IWARP_MPA_PKT_PARTIAL; + +out: + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "MPA_ALIGN: %s: fpdu_length=0x%x tcp_payload_len:0x%x\n", + pkt_type_str[pkt_type], fpdu->fpdu_length, tcp_payload_len); + + return pkt_type; +} + +static void +ecore_iwarp_init_fpdu(struct ecore_iwarp_ll2_buff *buf, + struct ecore_iwarp_fpdu *fpdu, + struct unaligned_opaque_data *pkt_data, + u16 tcp_payload_size, u8 placement_offset) +{ + fpdu->mpa_buf = buf; + fpdu->pkt_hdr = buf->data_phys_addr + placement_offset; + fpdu->pkt_hdr_size = pkt_data->tcp_payload_offset; + + fpdu->mpa_frag = buf->data_phys_addr + pkt_data->first_mpa_offset; + fpdu->mpa_frag_virt = (u8 *)(buf->data) + pkt_data->first_mpa_offset; + + if (tcp_payload_size == 1) + fpdu->incomplete_bytes = ECORE_IWARP_INVALID_FPDU_LENGTH; + else if (tcp_payload_size < fpdu->fpdu_length) + fpdu->incomplete_bytes = fpdu->fpdu_length - tcp_payload_size; + else + fpdu->incomplete_bytes = 0; /* complete fpdu */ + + fpdu->mpa_frag_len = fpdu->fpdu_length - fpdu->incomplete_bytes; +} + +static enum _ecore_status_t +ecore_iwarp_copy_fpdu(struct ecore_hwfn *p_hwfn, + struct ecore_iwarp_fpdu *fpdu, + struct unaligned_opaque_data *pkt_data, + struct ecore_iwarp_ll2_buff *buf, + u16 tcp_payload_size) + +{ + u8 *tmp_buf = p_hwfn->p_rdma_info->iwarp.mpa_intermediate_buf; + enum _ecore_status_t rc; + + /* need to copy the data from the partial packet stored in fpdu + * to the new buf, for this we also need to move the data currently + * placed on the buf. The assumption is that the buffer is big enough + * since fpdu_length <= mss, we use an intermediate buffer since + * we may need to copy the new data to an overlapping location + */ + if ((fpdu->mpa_frag_len + tcp_payload_size) > (u16)buf->buff_size) { + DP_ERR(p_hwfn, + "MPA ALIGN: Unexpected: buffer is not large enough for split fpdu buff_size = %d mpa_frag_len = %d, tcp_payload_size = %d, incomplete_bytes = %d\n", + buf->buff_size, fpdu->mpa_frag_len, tcp_payload_size, + fpdu->incomplete_bytes); + return ECORE_INVAL; + } + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "MPA ALIGN Copying fpdu: [%p, %d] [%p, %d]\n", + fpdu->mpa_frag_virt, fpdu->mpa_frag_len, + (u8 *)(buf->data) + pkt_data->first_mpa_offset, + tcp_payload_size); + + OSAL_MEMCPY(tmp_buf, fpdu->mpa_frag_virt, fpdu->mpa_frag_len); + OSAL_MEMCPY(tmp_buf + fpdu->mpa_frag_len, + (u8 *)(buf->data) + pkt_data->first_mpa_offset, + tcp_payload_size); + + rc = ecore_iwarp_recycle_pkt(p_hwfn, fpdu, fpdu->mpa_buf); + if (rc) + return rc; + + /* If we managed to post the buffer copy the data to the new buffer + * o/w this will occur in the next round... + */ + OSAL_MEMCPY((u8 *)(buf->data), tmp_buf, + fpdu->mpa_frag_len + tcp_payload_size); + + fpdu->mpa_buf = buf; + /* fpdu->pkt_hdr remains as is */ + /* fpdu->mpa_frag is overriden with new buf */ + fpdu->mpa_frag = buf->data_phys_addr; + fpdu->mpa_frag_virt = buf->data; + fpdu->mpa_frag_len += tcp_payload_size; + + fpdu->incomplete_bytes -= tcp_payload_size; + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "MPA ALIGN: split fpdu buff_size = %d mpa_frag_len = %d, tcp_payload_size = %d, incomplete_bytes = %d\n", + buf->buff_size, fpdu->mpa_frag_len, tcp_payload_size, + fpdu->incomplete_bytes); + + return 0; +} + +static void +ecore_iwarp_update_fpdu_length(struct ecore_hwfn *p_hwfn, + struct ecore_iwarp_fpdu *fpdu, + u8 *mpa_data) +{ + u16 mpa_len; + + /* Update incomplete packets if needed */ + if (fpdu->incomplete_bytes == ECORE_IWARP_INVALID_FPDU_LENGTH) { + mpa_len = fpdu->fpdu_length | *mpa_data; + fpdu->fpdu_length = ECORE_IWARP_FPDU_LEN_WITH_PAD(mpa_len); + fpdu->mpa_frag_len = fpdu->fpdu_length; + /* one byte of hdr */ + fpdu->incomplete_bytes = fpdu->fpdu_length - 1; + DP_VERBOSE(p_hwfn, + ECORE_MSG_RDMA, + "MPA_ALIGN: Partial header mpa_len=%x fpdu_length=%x incomplete_bytes=%x\n", + mpa_len, fpdu->fpdu_length, fpdu->incomplete_bytes); + } +} + +#define ECORE_IWARP_IS_RIGHT_EDGE(_curr_pkt) \ + (GET_FIELD(_curr_pkt->flags, \ + UNALIGNED_OPAQUE_DATA_PKT_REACHED_WIN_RIGHT_EDGE)) + +/* This function is used to recycle a buffer using the ll2 drop option. It + * uses the mechanism to ensure that all buffers posted to tx before this one + * were completed. The buffer sent here will be sent as a cookie in the tx + * completion function and can then be reposted to rx chain when done. The flow + * that requires this is the flow where a FPDU splits over more than 3 tcp + * segments. In this case the driver needs to re-post a rx buffer instead of + * the one received, but driver can't simply repost a buffer it copied from + * as there is a case where the buffer was originally a packed FPDU, and is + * partially posted to FW. Driver needs to ensure FW is done with it. + */ +static enum _ecore_status_t +ecore_iwarp_recycle_pkt(struct ecore_hwfn *p_hwfn, + struct ecore_iwarp_fpdu *fpdu, + struct ecore_iwarp_ll2_buff *buf) +{ + struct ecore_ll2_tx_pkt_info tx_pkt; + enum _ecore_status_t rc; + u8 ll2_handle; + + OSAL_MEM_ZERO(&tx_pkt, sizeof(tx_pkt)); + tx_pkt.num_of_bds = 1; + tx_pkt.tx_dest = ECORE_LL2_TX_DEST_DROP; + tx_pkt.l4_hdr_offset_w = fpdu->pkt_hdr_size >> 2; + tx_pkt.first_frag = fpdu->pkt_hdr; + tx_pkt.first_frag_len = fpdu->pkt_hdr_size; + buf->piggy_buf = OSAL_NULL; + tx_pkt.cookie = buf; + + ll2_handle = p_hwfn->p_rdma_info->iwarp.ll2_mpa_handle; + + rc = ecore_ll2_prepare_tx_packet(p_hwfn, + ll2_handle, + &tx_pkt, true); + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "MPA_ALIGN: send drop tx packet [%lx, 0x%x], buf=%p, rc=%d\n", + (long unsigned int)tx_pkt.first_frag, + tx_pkt.first_frag_len, buf, rc); + + if (rc) + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "Can't drop packet rc=%d\n", rc); + + return rc; +} + +static enum _ecore_status_t +ecore_iwarp_win_right_edge(struct ecore_hwfn *p_hwfn, + struct ecore_iwarp_fpdu *fpdu) +{ + struct ecore_ll2_tx_pkt_info tx_pkt; + enum _ecore_status_t rc; + u8 ll2_handle; + + OSAL_MEM_ZERO(&tx_pkt, sizeof(tx_pkt)); + tx_pkt.num_of_bds = 1; + tx_pkt.tx_dest = ECORE_LL2_TX_DEST_LB; + tx_pkt.l4_hdr_offset_w = fpdu->pkt_hdr_size >> 2; + + tx_pkt.first_frag = fpdu->pkt_hdr; + tx_pkt.first_frag_len = fpdu->pkt_hdr_size; + tx_pkt.enable_ip_cksum = true; + tx_pkt.enable_l4_cksum = true; + tx_pkt.calc_ip_len = true; + /* vlan overload with enum iwarp_ll2_tx_queues */ + tx_pkt.vlan = IWARP_LL2_ALIGNED_RIGHT_TRIMMED_TX_QUEUE; + + ll2_handle = p_hwfn->p_rdma_info->iwarp.ll2_mpa_handle; + + rc = ecore_ll2_prepare_tx_packet(p_hwfn, + ll2_handle, + &tx_pkt, true); + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "MPA_ALIGN: Sent right edge FPDU num_bds=%d [%lx, 0x%x], rc=%d\n", + tx_pkt.num_of_bds, (long unsigned int)tx_pkt.first_frag, + tx_pkt.first_frag_len, rc); + + if (rc) + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "Can't send right edge rc=%d\n", rc); + + return rc; +} + +static enum _ecore_status_t +ecore_iwarp_send_fpdu(struct ecore_hwfn *p_hwfn, + struct ecore_iwarp_fpdu *fpdu, + struct unaligned_opaque_data *curr_pkt, + struct ecore_iwarp_ll2_buff *buf, + u16 tcp_payload_size, + enum ecore_iwarp_mpa_pkt_type pkt_type) +{ + struct ecore_ll2_tx_pkt_info tx_pkt; + enum _ecore_status_t rc; + u8 ll2_handle; + + OSAL_MEM_ZERO(&tx_pkt, sizeof(tx_pkt)); + + tx_pkt.num_of_bds = (pkt_type == ECORE_IWARP_MPA_PKT_UNALIGNED) ? 3 : 2; + tx_pkt.tx_dest = ECORE_LL2_TX_DEST_LB; + tx_pkt.l4_hdr_offset_w = fpdu->pkt_hdr_size >> 2; + + /* Send the mpa_buf only with the last fpdu (in case of packed) */ + if ((pkt_type == ECORE_IWARP_MPA_PKT_UNALIGNED) || + (tcp_payload_size <= fpdu->fpdu_length)) + tx_pkt.cookie = fpdu->mpa_buf; + + tx_pkt.first_frag = fpdu->pkt_hdr; + tx_pkt.first_frag_len = fpdu->pkt_hdr_size; + tx_pkt.enable_ip_cksum = true; + tx_pkt.enable_l4_cksum = true; + tx_pkt.calc_ip_len = true; + /* vlan overload with enum iwarp_ll2_tx_queues */ + tx_pkt.vlan = IWARP_LL2_ALIGNED_TX_QUEUE; + + /* special case of unaligned packet and not packed, need to send + * both buffers as cookie to release. + */ + if (tcp_payload_size == fpdu->incomplete_bytes) { + fpdu->mpa_buf->piggy_buf = buf; + } + + ll2_handle = p_hwfn->p_rdma_info->iwarp.ll2_mpa_handle; + + rc = ecore_ll2_prepare_tx_packet(p_hwfn, + ll2_handle, + &tx_pkt, true); + if (rc) + goto err; + + rc = ecore_ll2_set_fragment_of_tx_packet(p_hwfn, ll2_handle, + fpdu->mpa_frag, + fpdu->mpa_frag_len); + if (rc) + goto err; + + if (fpdu->incomplete_bytes) { + rc = ecore_ll2_set_fragment_of_tx_packet( + p_hwfn, ll2_handle, + buf->data_phys_addr + curr_pkt->first_mpa_offset, + fpdu->incomplete_bytes); + + if (rc) + goto err; + } + +err: + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "MPA_ALIGN: Sent FPDU num_bds=%d [%lx, 0x%x], [0x%lx, 0x%x], [0x%lx, 0x%x] (cookie %p) rc=%d\n", + tx_pkt.num_of_bds, (long unsigned int)tx_pkt.first_frag, + tx_pkt.first_frag_len, (long unsigned int)fpdu->mpa_frag, + fpdu->mpa_frag_len, (long unsigned int)buf->data_phys_addr + + curr_pkt->first_mpa_offset, fpdu->incomplete_bytes, + tx_pkt.cookie, rc); + + return rc; +} + +static void +ecore_iwarp_mpa_get_data(struct ecore_hwfn *p_hwfn, + struct unaligned_opaque_data *curr_pkt, + u32 opaque_data0, u32 opaque_data1) +{ + u64 opaque_data; + + opaque_data = HILO_64(opaque_data1, opaque_data0); + *curr_pkt = *((struct unaligned_opaque_data *)&opaque_data); + + /* fix endianity */ + curr_pkt->first_mpa_offset = curr_pkt->tcp_payload_offset + + OSAL_LE16_TO_CPU(curr_pkt->first_mpa_offset); + curr_pkt->cid = OSAL_LE32_TO_CPU(curr_pkt->cid); + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "OPAQUE0=0x%x OPAQUE1=0x%x first_mpa_offset:0x%x\ttcp_payload_offset:0x%x\tflags:0x%x\tcid:0x%x\n", + opaque_data0, opaque_data1, curr_pkt->first_mpa_offset, + curr_pkt->tcp_payload_offset, curr_pkt->flags, + curr_pkt->cid); +} + +static void +ecore_iwarp_mpa_print_tcp_seq(struct ecore_hwfn *p_hwfn, + void *buf) +{ + struct ecore_vlan_ethhdr *vethh; + struct ecore_ethhdr *ethh; + struct ecore_iphdr *iph; + struct ecore_ipv6hdr *ip6h; + struct ecore_tcphdr *tcph; + bool vlan_valid = false; + int eth_hlen, ip_hlen; + u16 eth_type; + + if ((p_hwfn->dp_level > ECORE_LEVEL_VERBOSE) || + !(p_hwfn->dp_module & ECORE_MSG_RDMA)) + return; + + ethh = (struct ecore_ethhdr *)buf; + eth_type = ntohs(ethh->h_proto); + if (eth_type == ETH_P_8021Q) { + vlan_valid = true; + vethh = (struct ecore_vlan_ethhdr *)ethh; + eth_type = ntohs(vethh->h_vlan_encapsulated_proto); + } + + eth_hlen = ETH_HLEN + (vlan_valid ? sizeof(u32) : 0); + + iph = (struct ecore_iphdr *)((u8 *)(ethh) + eth_hlen); + + if (eth_type == ETH_P_IP) { + ip_hlen = (iph->ihl)*sizeof(u32); + } else if (eth_type == ETH_P_IPV6) { + ip6h = (struct ecore_ipv6hdr *)iph; + ip_hlen = sizeof(*ip6h); + } else { + DP_ERR(p_hwfn, "Unexpected ethertype on ll2 %x\n", eth_type); + return; + } + + tcph = (struct ecore_tcphdr *)((u8 *)iph + ip_hlen); + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "Processing MPA PKT: tcp_seq=0x%x tcp_ack_seq=0x%x\n", + ntohl(tcph->seq), ntohl(tcph->ack_seq)); + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "eth_type =%d Source mac: [0x%x]:[0x%x]:[0x%x]:[0x%x]:[0x%x]:[0x%x]\n", + eth_type, ethh->h_source[0], ethh->h_source[1], + ethh->h_source[2], ethh->h_source[3], + ethh->h_source[4], ethh->h_source[5]); + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "eth_hlen=%d destination mac: [0x%x]:[0x%x]:[0x%x]:[0x%x]:[0x%x]:[0x%x]\n", + eth_hlen, ethh->h_dest[0], ethh->h_dest[1], + ethh->h_dest[2], ethh->h_dest[3], + ethh->h_dest[4], ethh->h_dest[5]); + + return; +} + +/* This function is called when an unaligned or incomplete MPA packet arrives + * driver needs to align the packet, perhaps using previous data and send + * it down to FW once it is aligned. + */ +static enum _ecore_status_t +ecore_iwarp_process_mpa_pkt(struct ecore_hwfn *p_hwfn, + struct ecore_iwarp_ll2_mpa_buf *mpa_buf) +{ + struct ecore_iwarp_ll2_buff *buf = mpa_buf->ll2_buf; + enum ecore_iwarp_mpa_pkt_type pkt_type; + struct unaligned_opaque_data *curr_pkt = &mpa_buf->data; + struct ecore_iwarp_fpdu *fpdu; + u8 *mpa_data; + enum _ecore_status_t rc = ECORE_SUCCESS; + + ecore_iwarp_mpa_print_tcp_seq( + p_hwfn, (u8 *)(buf->data) + mpa_buf->placement_offset); + + fpdu = ecore_iwarp_get_curr_fpdu(p_hwfn, curr_pkt->cid & 0xffff); + if (!fpdu) {/* something corrupt with cid, post rx back */ + DP_ERR(p_hwfn, "Invalid cid, drop and post back to rx cid=%x\n", + curr_pkt->cid); + rc = ecore_iwarp_ll2_post_rx( + p_hwfn, buf, p_hwfn->p_rdma_info->iwarp.ll2_mpa_handle); + + if (rc) { /* not much we can do here except log and free */ + DP_ERR(p_hwfn, "Post rx buffer failed\n"); + + /* we don't expect any failures from rx, not even + * busy since we allocate #bufs=#descs + */ + rc = ECORE_UNKNOWN_ERROR; + } + return rc; + } + + do { + mpa_data = ((u8 *)(buf->data) + curr_pkt->first_mpa_offset); + + pkt_type = ecore_iwarp_mpa_classify(p_hwfn, fpdu, + mpa_buf->tcp_payload_len, + mpa_data); + + switch (pkt_type) { + case ECORE_IWARP_MPA_PKT_PARTIAL: + ecore_iwarp_init_fpdu(buf, fpdu, + curr_pkt, + mpa_buf->tcp_payload_len, + mpa_buf->placement_offset); + + if (!ECORE_IWARP_IS_RIGHT_EDGE(curr_pkt)) { + mpa_buf->tcp_payload_len = 0; + break; + } + + rc = ecore_iwarp_win_right_edge(p_hwfn, fpdu); + + if (rc) { + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "Can't send FPDU:reset rc=%d\n", rc); + OSAL_MEM_ZERO(fpdu, sizeof(*fpdu)); + break; + } + + mpa_buf->tcp_payload_len = 0; + break; + case ECORE_IWARP_MPA_PKT_PACKED: + if (fpdu->fpdu_length == 8) { + DP_ERR(p_hwfn, "SUSPICIOUS fpdu_length = 0x%x: assuming bug...aborting this packet...\n", + fpdu->fpdu_length); + mpa_buf->tcp_payload_len = 0; + break; + } + + ecore_iwarp_init_fpdu(buf, fpdu, + curr_pkt, + mpa_buf->tcp_payload_len, + mpa_buf->placement_offset); + + rc = ecore_iwarp_send_fpdu(p_hwfn, fpdu, curr_pkt, buf, + mpa_buf->tcp_payload_len, + pkt_type); + if (rc) { + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "Can't send FPDU:reset rc=%d\n", rc); + OSAL_MEM_ZERO(fpdu, sizeof(*fpdu)); + break; + } + mpa_buf->tcp_payload_len -= fpdu->fpdu_length; + curr_pkt->first_mpa_offset += fpdu->fpdu_length; + break; + case ECORE_IWARP_MPA_PKT_UNALIGNED: + ecore_iwarp_update_fpdu_length(p_hwfn, fpdu, mpa_data); + if (mpa_buf->tcp_payload_len < fpdu->incomplete_bytes) { + /* special handling of fpdu split over more + * than 2 segments + */ + if (ECORE_IWARP_IS_RIGHT_EDGE(curr_pkt)) { + rc = ecore_iwarp_win_right_edge(p_hwfn, + fpdu); + /* packet will be re-processed later */ + if (rc) + return rc; + } + + rc = ecore_iwarp_copy_fpdu( + p_hwfn, fpdu, curr_pkt, + buf, mpa_buf->tcp_payload_len); + + /* packet will be re-processed later */ + if (rc) + return rc; + + mpa_buf->tcp_payload_len = 0; + + break; + } + + rc = ecore_iwarp_send_fpdu(p_hwfn, fpdu, curr_pkt, buf, + mpa_buf->tcp_payload_len, + pkt_type); + if (rc) { + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "Can't send FPDU:delay rc=%d\n", rc); + /* don't reset fpdu -> we need it for next + * classify + */ + break; + } + mpa_buf->tcp_payload_len -= fpdu->incomplete_bytes; + curr_pkt->first_mpa_offset += fpdu->incomplete_bytes; + /* The framed PDU was sent - no more incomplete bytes */ + fpdu->incomplete_bytes = 0; + break; + } + + } while (mpa_buf->tcp_payload_len && !rc); + + return rc; +} + +static void +ecore_iwarp_process_pending_pkts(struct ecore_hwfn *p_hwfn) +{ + struct ecore_iwarp_info *iwarp_info = &p_hwfn->p_rdma_info->iwarp; + struct ecore_iwarp_ll2_mpa_buf *mpa_buf = OSAL_NULL; + enum _ecore_status_t rc; + + while (!OSAL_LIST_IS_EMPTY(&iwarp_info->mpa_buf_pending_list)) { + mpa_buf = OSAL_LIST_FIRST_ENTRY( + &iwarp_info->mpa_buf_pending_list, + struct ecore_iwarp_ll2_mpa_buf, + list_entry); + + rc = ecore_iwarp_process_mpa_pkt(p_hwfn, mpa_buf); + + /* busy means break and continue processing later, don't + * remove the buf from the pending list. + */ + if (rc == ECORE_BUSY) + break; + +#ifdef _NTDDK_ +#pragma warning(suppress : 6011) +#pragma warning(suppress : 28182) +#endif + OSAL_LIST_REMOVE_ENTRY( + &mpa_buf->list_entry, + &iwarp_info->mpa_buf_pending_list); + + OSAL_LIST_PUSH_TAIL(&mpa_buf->list_entry, + &iwarp_info->mpa_buf_list); + + if (rc) { /* different error, don't continue */ + DP_NOTICE(p_hwfn, false, "process pkts failed rc=%d\n", + rc); + break; + } + } +} + +static void +ecore_iwarp_ll2_comp_mpa_pkt(void *cxt, + struct ecore_ll2_comp_rx_data *data) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)cxt; + struct ecore_iwarp_info *iwarp_info = &p_hwfn->p_rdma_info->iwarp; + struct ecore_iwarp_ll2_mpa_buf *mpa_buf; + + iwarp_info->unalign_rx_comp++; + + mpa_buf = OSAL_LIST_FIRST_ENTRY(&iwarp_info->mpa_buf_list, + struct ecore_iwarp_ll2_mpa_buf, + list_entry); + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "LL2 MPA CompRx buf=%p placement_offset=%d, payload_len=0x%x mpa_buf=%p\n", + data->cookie, data->u.placement_offset, + data->length.packet_length, mpa_buf); + + if (!mpa_buf) { + DP_ERR(p_hwfn, "no free mpa buf. this is a driver bug.\n"); + return; + } + OSAL_LIST_REMOVE_ENTRY(&mpa_buf->list_entry, &iwarp_info->mpa_buf_list); + + ecore_iwarp_mpa_get_data(p_hwfn, &mpa_buf->data, + data->opaque_data_0, data->opaque_data_1); + + mpa_buf->tcp_payload_len = data->length.packet_length - + mpa_buf->data.first_mpa_offset; + mpa_buf->ll2_buf = (struct ecore_iwarp_ll2_buff *)data->cookie; + mpa_buf->data.first_mpa_offset += data->u.placement_offset; + mpa_buf->placement_offset = data->u.placement_offset; + + OSAL_LIST_PUSH_TAIL(&mpa_buf->list_entry, + &iwarp_info->mpa_buf_pending_list); + + ecore_iwarp_process_pending_pkts(p_hwfn); +} + +static void +ecore_iwarp_ll2_comp_syn_pkt(void *cxt, struct ecore_ll2_comp_rx_data *data) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)cxt; + struct ecore_iwarp_ll2_buff *buf = + (struct ecore_iwarp_ll2_buff *)data->cookie; + struct ecore_iwarp_listener *listener; + struct ecore_iwarp_cm_info cm_info; + struct ecore_ll2_tx_pkt_info tx_pkt; + u8 remote_mac_addr[ETH_ALEN]; + u8 local_mac_addr[ETH_ALEN]; + struct ecore_iwarp_ep *ep; + enum _ecore_status_t rc; + int tcp_start_offset; + u8 ts_hdr_size = 0; + int payload_len; + u32 hdr_size; + + OSAL_MEM_ZERO(&cm_info, sizeof(cm_info)); + + /* Check if packet was received with errors... */ + if (data->err_flags != 0) { + DP_NOTICE(p_hwfn, false, "Error received on SYN packet: 0x%x\n", + data->err_flags); + goto err; + } + + if (GET_FIELD(data->parse_flags, + PARSING_AND_ERR_FLAGS_L4CHKSMWASCALCULATED) && + GET_FIELD(data->parse_flags, + PARSING_AND_ERR_FLAGS_L4CHKSMERROR)) { + DP_NOTICE(p_hwfn, false, "Syn packet received with checksum error\n"); + goto err; + } + + rc = ecore_iwarp_parse_rx_pkt( + p_hwfn, &cm_info, (u8 *)(buf->data) + data->u.placement_offset, + remote_mac_addr, local_mac_addr, &payload_len, + &tcp_start_offset); + if (rc) + goto err; + + /* Check if there is a listener for this 4-tuple */ + listener = ecore_iwarp_get_listener(p_hwfn, &cm_info); + if (!listener) { + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "SYN received on tuple not listened on parse_flags=%d packet len=%d\n", + data->parse_flags, data->length.packet_length); + + OSAL_MEMSET(&tx_pkt, 0, sizeof(tx_pkt)); + tx_pkt.num_of_bds = 1; + tx_pkt.bd_flags = 0; + tx_pkt.l4_hdr_offset_w = (data->length.packet_length) >> 2; + tx_pkt.tx_dest = ECORE_LL2_TX_DEST_LB; + tx_pkt.first_frag = buf->data_phys_addr + + data->u.placement_offset; + tx_pkt.first_frag_len = data->length.packet_length; + tx_pkt.cookie = buf; + + rc = ecore_ll2_prepare_tx_packet( + p_hwfn, + p_hwfn->p_rdma_info->iwarp.ll2_syn_handle, + &tx_pkt, true); + + if (rc) { + DP_NOTICE(p_hwfn, false, + "Can't post SYN back to chip rc=%d\n", rc); + goto err; + } + return; + } + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "Received syn on listening port\n"); + + /* For debugging purpose... */ + if (listener->drop) + goto err; + + /* There may be an open ep on this connection if this is a syn + * retrasnmit... need to make sure there isn't... + */ + if (ecore_iwarp_ep_exists(p_hwfn, listener, &cm_info)) + goto err; + + ep = ecore_iwarp_get_free_ep(p_hwfn); + if (ep == OSAL_NULL) + goto err; + + OSAL_SPIN_LOCK(&listener->lock); + OSAL_LIST_PUSH_TAIL(&ep->list_entry, &listener->ep_list); + OSAL_SPIN_UNLOCK(&listener->lock); + + OSAL_MEMCPY(ep->remote_mac_addr, + remote_mac_addr, + ETH_ALEN); + OSAL_MEMCPY(ep->local_mac_addr, + local_mac_addr, + ETH_ALEN); + + OSAL_MEMCPY(&ep->cm_info, &cm_info, sizeof(ep->cm_info)); + + if (p_hwfn->p_rdma_info->iwarp.tcp_flags & ECORE_IWARP_TS_EN) + ts_hdr_size = TIMESTAMP_HEADER_SIZE; + + hdr_size = ((cm_info.ip_version == ECORE_TCP_IPV4) ? 40 : 60) + + ts_hdr_size; + ep->mss = p_hwfn->p_rdma_info->iwarp.max_mtu - hdr_size; + ep->mss = OSAL_MIN_T(u16, ECORE_IWARP_MAX_FW_MSS, ep->mss); + + ep->listener = listener; + ep->event_cb = listener->event_cb; + ep->cb_context = listener->cb_context; + ep->connect_mode = TCP_CONNECT_PASSIVE; + + ep->syn = buf; + ep->syn_ip_payload_length = (u16)payload_len; + ep->syn_phy_addr = buf->data_phys_addr + data->u.placement_offset + + tcp_start_offset; + + rc = ecore_iwarp_tcp_offload(p_hwfn, ep); + if (rc != ECORE_SUCCESS) { + ecore_iwarp_return_ep(p_hwfn, ep); + goto err; + } + return; + +err: + ecore_iwarp_ll2_post_rx( + p_hwfn, buf, p_hwfn->p_rdma_info->iwarp.ll2_syn_handle); +} + +static void +ecore_iwarp_ll2_rel_rx_pkt(void *cxt, + u8 OSAL_UNUSED connection_handle, + void *cookie, + dma_addr_t OSAL_UNUSED rx_buf_addr, + bool OSAL_UNUSED b_last_packet) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)cxt; + struct ecore_iwarp_ll2_buff *buffer = + (struct ecore_iwarp_ll2_buff *)cookie; + + OSAL_DMA_FREE_COHERENT(p_hwfn->p_dev, + buffer->data, + buffer->data_phys_addr, + buffer->buff_size); + + OSAL_FREE(p_hwfn->p_dev, buffer); +} + +static void +ecore_iwarp_ll2_comp_tx_pkt(void *cxt, + u8 connection_handle, + void *cookie, + dma_addr_t OSAL_UNUSED first_frag_addr, + bool OSAL_UNUSED b_last_fragment, + bool OSAL_UNUSED b_last_packet) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)cxt; + struct ecore_iwarp_ll2_buff *buffer = + (struct ecore_iwarp_ll2_buff *)cookie; + struct ecore_iwarp_ll2_buff *piggy; + + if (!buffer) /* can happen in packed mpa unaligned... */ + return; + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "LL2 CompTX buf=%p piggy_buf=%p handle=%d\n", + buffer, buffer->piggy_buf, connection_handle); + + /* we got a tx packet -> this was originally a rx packet... now we + * can post it back... + */ + piggy = buffer->piggy_buf; + if (piggy) { + buffer->piggy_buf = OSAL_NULL; + ecore_iwarp_ll2_post_rx(p_hwfn, piggy, + connection_handle); + } + + ecore_iwarp_ll2_post_rx(p_hwfn, buffer, + connection_handle); + + if (connection_handle == p_hwfn->p_rdma_info->iwarp.ll2_mpa_handle) + ecore_iwarp_process_pending_pkts(p_hwfn); + + return; +} + +static void +ecore_iwarp_ll2_rel_tx_pkt(void *cxt, + u8 OSAL_UNUSED connection_handle, + void *cookie, + dma_addr_t OSAL_UNUSED first_frag_addr, + bool OSAL_UNUSED b_last_fragment, + bool OSAL_UNUSED b_last_packet) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)cxt; + struct ecore_iwarp_ll2_buff *buffer = + (struct ecore_iwarp_ll2_buff *)cookie; + + if (!buffer) + return; + + if (buffer->piggy_buf) { + OSAL_DMA_FREE_COHERENT( + p_hwfn->p_dev, + buffer->piggy_buf->data, + buffer->piggy_buf->data_phys_addr, + buffer->piggy_buf->buff_size); + + OSAL_FREE(p_hwfn->p_dev, buffer->piggy_buf); + } + + OSAL_DMA_FREE_COHERENT(p_hwfn->p_dev, + buffer->data, + buffer->data_phys_addr, + buffer->buff_size); + + OSAL_FREE(p_hwfn->p_dev, buffer); + return; +} + +/* Current known slowpath for iwarp ll2 is unalign flush. When this completion + * is received, need to reset the FPDU. + */ +static void +ecore_iwarp_ll2_slowpath(void *cxt, + u8 OSAL_UNUSED connection_handle, + u32 opaque_data_0, + u32 opaque_data_1) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)cxt; + struct unaligned_opaque_data unalign_data; + struct ecore_iwarp_fpdu *fpdu; + + ecore_iwarp_mpa_get_data(p_hwfn, &unalign_data, + opaque_data_0, opaque_data_1); + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "(0x%x) Flush fpdu\n", + unalign_data.cid); + + fpdu = ecore_iwarp_get_curr_fpdu(p_hwfn, (u16)unalign_data.cid); + if (fpdu) + OSAL_MEM_ZERO(fpdu, sizeof(*fpdu)); +} + +static int +ecore_iwarp_ll2_stop(struct ecore_hwfn *p_hwfn) +{ + struct ecore_iwarp_info *iwarp_info = &p_hwfn->p_rdma_info->iwarp; + int rc = 0; + + if (iwarp_info->ll2_syn_handle != ECORE_IWARP_HANDLE_INVAL) { + + rc = ecore_ll2_terminate_connection(p_hwfn, + iwarp_info->ll2_syn_handle); + if (rc) + DP_INFO(p_hwfn, "Failed to terminate syn connection\n"); + + ecore_ll2_release_connection(p_hwfn, + iwarp_info->ll2_syn_handle); + iwarp_info->ll2_syn_handle = ECORE_IWARP_HANDLE_INVAL; + } + + if (iwarp_info->ll2_ooo_handle != ECORE_IWARP_HANDLE_INVAL) { + rc = ecore_ll2_terminate_connection(p_hwfn, + iwarp_info->ll2_ooo_handle); + if (rc) + DP_INFO(p_hwfn, "Failed to terminate ooo connection\n"); + + ecore_ll2_release_connection(p_hwfn, + iwarp_info->ll2_ooo_handle); + iwarp_info->ll2_ooo_handle = ECORE_IWARP_HANDLE_INVAL; + } + + if (iwarp_info->ll2_mpa_handle != ECORE_IWARP_HANDLE_INVAL) { + rc = ecore_ll2_terminate_connection(p_hwfn, + iwarp_info->ll2_mpa_handle); + if (rc) + DP_INFO(p_hwfn, "Failed to terminate mpa connection\n"); + + ecore_ll2_release_connection(p_hwfn, + iwarp_info->ll2_mpa_handle); + iwarp_info->ll2_mpa_handle = ECORE_IWARP_HANDLE_INVAL; + } + + ecore_llh_remove_mac_filter(p_hwfn->p_dev, 0, + p_hwfn->p_rdma_info->iwarp.mac_addr); + + return rc; +} + +static int +ecore_iwarp_ll2_alloc_buffers(struct ecore_hwfn *p_hwfn, + int num_rx_bufs, + int buff_size, + u8 ll2_handle) +{ + struct ecore_iwarp_ll2_buff *buffer; + int rc = 0; + int i; + + for (i = 0; i < num_rx_bufs; i++) { + buffer = OSAL_ZALLOC(p_hwfn->p_dev, + GFP_KERNEL, sizeof(*buffer)); + if (!buffer) { + DP_INFO(p_hwfn, "Failed to allocate LL2 buffer desc\n"); + break; + } + + buffer->data = + OSAL_DMA_ALLOC_COHERENT(p_hwfn->p_dev, + &buffer->data_phys_addr, + buff_size); + + if (!buffer->data) { + DP_INFO(p_hwfn, "Failed to allocate LL2 buffers\n"); + OSAL_FREE(p_hwfn->p_dev, buffer); + rc = ECORE_NOMEM; + break; + } + + buffer->buff_size = buff_size; + rc = ecore_iwarp_ll2_post_rx(p_hwfn, buffer, ll2_handle); + + if (rc) + break; /* buffers will be deallocated by ecore_ll2 */ + } + return rc; +} + +#define ECORE_IWARP_CACHE_PADDING(size) \ + (((size) + ETH_CACHE_LINE_SIZE - 1) & ~(ETH_CACHE_LINE_SIZE - 1)) + +#define ECORE_IWARP_MAX_BUF_SIZE(mtu) \ + ECORE_IWARP_CACHE_PADDING(mtu + ETH_HLEN + 2*VLAN_HLEN + 2 +\ + ETH_CACHE_LINE_SIZE) + +static int +ecore_iwarp_ll2_start(struct ecore_hwfn *p_hwfn, + struct ecore_rdma_start_in_params *params) +{ + struct ecore_iwarp_info *iwarp_info; + struct ecore_ll2_acquire_data data; + struct ecore_ll2_cbs cbs; + u32 mpa_buff_size; + int rc = ECORE_SUCCESS; + u16 n_ooo_bufs; + int i; + + iwarp_info = &p_hwfn->p_rdma_info->iwarp; + iwarp_info->ll2_syn_handle = ECORE_IWARP_HANDLE_INVAL; + iwarp_info->ll2_ooo_handle = ECORE_IWARP_HANDLE_INVAL; + iwarp_info->ll2_mpa_handle = ECORE_IWARP_HANDLE_INVAL; + + iwarp_info->max_mtu = params->max_mtu; + + OSAL_MEMCPY(p_hwfn->p_rdma_info->iwarp.mac_addr, params->mac_addr, + ETH_ALEN); + + rc = ecore_llh_add_mac_filter(p_hwfn->p_dev, 0, params->mac_addr); + if (rc != ECORE_SUCCESS) + return rc; + + /* Start SYN connection */ + cbs.rx_comp_cb = ecore_iwarp_ll2_comp_syn_pkt; + cbs.rx_release_cb = ecore_iwarp_ll2_rel_rx_pkt; + cbs.tx_comp_cb = ecore_iwarp_ll2_comp_tx_pkt; + cbs.tx_release_cb = ecore_iwarp_ll2_rel_tx_pkt; + cbs.cookie = p_hwfn; + + OSAL_MEMSET(&data, 0, sizeof(data)); + data.input.conn_type = ECORE_LL2_TYPE_IWARP; + data.input.mtu = ECORE_IWARP_MAX_SYN_PKT_SIZE; + data.input.rx_num_desc = ECORE_IWARP_LL2_SYN_RX_SIZE; + data.input.tx_num_desc = ECORE_IWARP_LL2_SYN_TX_SIZE; + data.input.tx_max_bds_per_packet = 1; /* will never be fragmented */ + data.input.tx_tc = PKT_LB_TC; + data.input.tx_dest = ECORE_LL2_TX_DEST_LB; + data.p_connection_handle = &iwarp_info->ll2_syn_handle; + data.cbs = &cbs; + + rc = ecore_ll2_acquire_connection(p_hwfn, &data); + if (rc) { + DP_NOTICE(p_hwfn, false, "Failed to acquire LL2 connection\n"); + ecore_llh_remove_mac_filter(p_hwfn->p_dev, 0, params->mac_addr); + return rc; + } + + rc = ecore_ll2_establish_connection(p_hwfn, iwarp_info->ll2_syn_handle); + if (rc) { + DP_NOTICE(p_hwfn, false, + "Failed to establish LL2 connection\n"); + goto err; + } + + rc = ecore_iwarp_ll2_alloc_buffers(p_hwfn, + ECORE_IWARP_LL2_SYN_RX_SIZE, + ECORE_IWARP_MAX_SYN_PKT_SIZE, + iwarp_info->ll2_syn_handle); + if (rc) + goto err; + + /* Start OOO connection */ + data.input.conn_type = ECORE_LL2_TYPE_OOO; + data.input.mtu = params->max_mtu; + + n_ooo_bufs = params->iwarp.ooo_num_rx_bufs; + + if (n_ooo_bufs > ECORE_IWARP_LL2_OOO_MAX_RX_SIZE) + n_ooo_bufs = ECORE_IWARP_LL2_OOO_MAX_RX_SIZE; + + data.input.rx_num_desc = n_ooo_bufs; + data.input.rx_num_ooo_buffers = n_ooo_bufs; + + p_hwfn->p_rdma_info->iwarp.num_ooo_rx_bufs = data.input.rx_num_desc; + data.input.tx_max_bds_per_packet = 1; /* will never be fragmented */ + data.input.tx_num_desc = ECORE_IWARP_LL2_OOO_DEF_TX_SIZE; + data.p_connection_handle = &iwarp_info->ll2_ooo_handle; + data.input.secondary_queue = true; + + rc = ecore_ll2_acquire_connection(p_hwfn, &data); + if (rc) + goto err; + + rc = ecore_ll2_establish_connection(p_hwfn, iwarp_info->ll2_ooo_handle); + if (rc) + goto err; + + /* Start MPA connection */ + cbs.rx_comp_cb = ecore_iwarp_ll2_comp_mpa_pkt; + cbs.slowpath_cb = ecore_iwarp_ll2_slowpath; + + OSAL_MEMSET(&data, 0, sizeof(data)); + data.input.conn_type = ECORE_LL2_TYPE_IWARP; + data.input.mtu = params->max_mtu; + data.input.rx_num_desc = n_ooo_bufs * 2; + /* we allocate the same amount for TX to reduce the chance we + * run out of tx descriptors + */ + data.input.tx_num_desc = data.input.rx_num_desc; + data.input.tx_max_bds_per_packet = ECORE_IWARP_MAX_BDS_PER_FPDU; + data.p_connection_handle = &iwarp_info->ll2_mpa_handle; + data.input.secondary_queue = true; + data.cbs = &cbs; + + rc = ecore_ll2_acquire_connection(p_hwfn, &data); + if (rc) + goto err; + + rc = ecore_ll2_establish_connection(p_hwfn, iwarp_info->ll2_mpa_handle); + if (rc) + goto err; + + mpa_buff_size = ECORE_IWARP_MAX_BUF_SIZE(params->max_mtu); + rc = ecore_iwarp_ll2_alloc_buffers(p_hwfn, + data.input.rx_num_desc, + mpa_buff_size, + iwarp_info->ll2_mpa_handle); + if (rc) + goto err; + + iwarp_info->partial_fpdus = + OSAL_ZALLOC(p_hwfn->p_dev, GFP_KERNEL, + sizeof(*iwarp_info->partial_fpdus) * + (u16)p_hwfn->p_rdma_info->num_qps); + + if (!iwarp_info->partial_fpdus) { + DP_NOTICE(p_hwfn, false, + "Failed to allocate ecore_iwarp_info(partial_fpdus)\n"); + goto err; + } + + iwarp_info->max_num_partial_fpdus = (u16)p_hwfn->p_rdma_info->num_qps; + + /* The mpa_bufs array serves for pending RX packets received on the + * mpa ll2 that don't have place on the tx ring and require later + * processing. We can't fail on allocation of such a struct therefore + * we allocate enough to take care of all rx packets + */ + iwarp_info->mpa_bufs = + OSAL_ZALLOC(p_hwfn->p_dev, GFP_KERNEL, + sizeof(*iwarp_info->mpa_bufs) * + data.input.rx_num_desc); + + if (!iwarp_info->mpa_bufs) { + DP_NOTICE(p_hwfn, false, + "Failed to allocate mpa_bufs array mem_size=%d\n", + (u32)(sizeof(*iwarp_info->mpa_bufs) * + data.input.rx_num_desc)); + goto err; + } + + iwarp_info->mpa_intermediate_buf = + OSAL_ZALLOC(p_hwfn->p_dev, GFP_KERNEL, mpa_buff_size); + if (!iwarp_info->mpa_intermediate_buf) { + DP_NOTICE(p_hwfn, false, + "Failed to allocate mpa_intermediate_buf mem_size=%d\n", + mpa_buff_size); + goto err; + } + + OSAL_LIST_INIT(&iwarp_info->mpa_buf_pending_list); + OSAL_LIST_INIT(&iwarp_info->mpa_buf_list); + for (i = 0; i < data.input.rx_num_desc; i++) { + OSAL_LIST_PUSH_TAIL(&iwarp_info->mpa_bufs[i].list_entry, + &iwarp_info->mpa_buf_list); + } + + return rc; + +err: + ecore_iwarp_ll2_stop(p_hwfn); + + return rc; +} + +static void +ecore_iwarp_set_defaults(struct ecore_hwfn *p_hwfn, + struct ecore_rdma_start_in_params *params) +{ + u32 rcv_wnd_size; + u32 n_ooo_bufs; + + /* rcv_wnd_size = 0: use defaults */ + rcv_wnd_size = params->iwarp.rcv_wnd_size; + if (!rcv_wnd_size) { + if (ecore_device_num_ports(p_hwfn->p_dev) == 4) { + rcv_wnd_size = ECORE_IS_AH(p_hwfn->p_dev) ? + ECORE_IWARP_RCV_WND_SIZE_AH_DEF_4_PORTS : + ECORE_IWARP_RCV_WND_SIZE_BB_DEF_4_PORTS; + } else { + rcv_wnd_size = ECORE_IS_AH(p_hwfn->p_dev) ? + ECORE_IWARP_RCV_WND_SIZE_AH_DEF_2_PORTS : + ECORE_IWARP_RCV_WND_SIZE_BB_DEF_2_PORTS; + } + params->iwarp.rcv_wnd_size = rcv_wnd_size; + } + + n_ooo_bufs = params->iwarp.ooo_num_rx_bufs; + if (!n_ooo_bufs) { + n_ooo_bufs = (u32)(((u64)ECORE_MAX_OOO * + params->iwarp.rcv_wnd_size) / + params->max_mtu); + n_ooo_bufs = OSAL_MIN_T(u32, n_ooo_bufs, USHRT_MAX); + params->iwarp.ooo_num_rx_bufs = (u16)n_ooo_bufs; + } +} + +enum _ecore_status_t +ecore_iwarp_setup(struct ecore_hwfn *p_hwfn, + struct ecore_rdma_start_in_params *params) +{ + enum _ecore_status_t rc = ECORE_SUCCESS; + struct ecore_iwarp_info *iwarp_info; + u32 rcv_wnd_size; + + iwarp_info = &(p_hwfn->p_rdma_info->iwarp); + + if (!params->iwarp.rcv_wnd_size || !params->iwarp.ooo_num_rx_bufs) + ecore_iwarp_set_defaults(p_hwfn, params); + + /* Scale 0 will set window of 0xFFFC (64K -4). + * Scale x will set window of 0xFFFC << (x) + * Therefore we subtract log2(64K) so that result is 0 + */ + rcv_wnd_size = params->iwarp.rcv_wnd_size; + if (rcv_wnd_size < ECORE_IWARP_RCV_WND_SIZE_MIN) + rcv_wnd_size = ECORE_IWARP_RCV_WND_SIZE_MIN; + + iwarp_info->rcv_wnd_scale = OSAL_MIN_T(u32, OSAL_LOG2(rcv_wnd_size) - + OSAL_LOG2(ECORE_IWARP_RCV_WND_SIZE_MIN), ECORE_IWARP_MAX_WND_SCALE); + iwarp_info->rcv_wnd_size = rcv_wnd_size >> iwarp_info->rcv_wnd_scale; + + iwarp_info->tcp_flags = params->iwarp.flags; + iwarp_info->crc_needed = params->iwarp.crc_needed; + switch (params->iwarp.mpa_rev) { + case ECORE_MPA_REV1: + iwarp_info->mpa_rev = MPA_NEGOTIATION_TYPE_BASIC; + break; + case ECORE_MPA_REV2: + iwarp_info->mpa_rev = MPA_NEGOTIATION_TYPE_ENHANCED; + break; + } + + iwarp_info->peer2peer = params->iwarp.mpa_peer2peer; + iwarp_info->rtr_type = MPA_RTR_TYPE_NONE; + + if (params->iwarp.mpa_rtr & ECORE_MPA_RTR_TYPE_ZERO_SEND) + iwarp_info->rtr_type |= MPA_RTR_TYPE_ZERO_SEND; + + if (params->iwarp.mpa_rtr & ECORE_MPA_RTR_TYPE_ZERO_WRITE) + iwarp_info->rtr_type |= MPA_RTR_TYPE_ZERO_WRITE; + + if (params->iwarp.mpa_rtr & ECORE_MPA_RTR_TYPE_ZERO_READ) + iwarp_info->rtr_type |= MPA_RTR_TYPE_ZERO_READ; + + //DAVIDS OSAL_SPIN_LOCK_INIT(&p_hwfn->p_rdma_info->iwarp.qp_lock); + OSAL_LIST_INIT(&p_hwfn->p_rdma_info->iwarp.ep_list); + OSAL_LIST_INIT(&p_hwfn->p_rdma_info->iwarp.listen_list); + + ecore_spq_register_async_cb(p_hwfn, PROTOCOLID_IWARP, + ecore_iwarp_async_event); + ecore_ooo_setup(p_hwfn); + + rc = ecore_iwarp_ll2_start(p_hwfn, params); + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "MPA_REV = %d. peer2peer=%d rtr=%x\n", + iwarp_info->mpa_rev, + iwarp_info->peer2peer, + iwarp_info->rtr_type); + + return rc; +} + +enum _ecore_status_t +ecore_iwarp_stop(struct ecore_hwfn *p_hwfn) +{ + enum _ecore_status_t rc; + + ecore_iwarp_free_prealloc_ep(p_hwfn); + rc = ecore_iwarp_wait_for_all_cids(p_hwfn); + if (rc != ECORE_SUCCESS) + return rc; + + ecore_spq_unregister_async_cb(p_hwfn, PROTOCOLID_IWARP); + + return ecore_iwarp_ll2_stop(p_hwfn); +} + +static void +ecore_iwarp_qp_in_error(struct ecore_hwfn *p_hwfn, + struct ecore_iwarp_ep *ep, + u8 fw_return_code) +{ + struct ecore_iwarp_cm_event_params params; + + ecore_iwarp_modify_qp(p_hwfn, ep->qp, ECORE_IWARP_QP_STATE_ERROR, true); + + params.event = ECORE_IWARP_EVENT_CLOSE; + params.ep_context = ep; + params.cm_info = &ep->cm_info; + params.status = (fw_return_code == IWARP_QP_IN_ERROR_GOOD_CLOSE) ? + ECORE_SUCCESS : ECORE_CONN_RESET; + + ep->state = ECORE_IWARP_EP_CLOSED; + OSAL_SPIN_LOCK(&p_hwfn->p_rdma_info->iwarp.iw_lock); + OSAL_LIST_REMOVE_ENTRY(&ep->list_entry, + &p_hwfn->p_rdma_info->iwarp.ep_list); + OSAL_SPIN_UNLOCK(&p_hwfn->p_rdma_info->iwarp.iw_lock); + + ep->event_cb(ep->cb_context, ¶ms); +} + +static void +ecore_iwarp_exception_received(struct ecore_hwfn *p_hwfn, + struct ecore_iwarp_ep *ep, + int fw_ret_code) +{ + struct ecore_iwarp_cm_event_params params; + bool event_cb = false; + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "EP(0x%x) fw_ret_code=%d\n", + ep->cid, fw_ret_code); + + switch (fw_ret_code) { + case IWARP_EXCEPTION_DETECTED_LLP_CLOSED: + params.status = ECORE_SUCCESS; + params.event = ECORE_IWARP_EVENT_DISCONNECT; + event_cb = true; + break; + case IWARP_EXCEPTION_DETECTED_LLP_RESET: + params.status = ECORE_CONN_RESET; + params.event = ECORE_IWARP_EVENT_DISCONNECT; + event_cb = true; + break; + case IWARP_EXCEPTION_DETECTED_RQ_EMPTY: + params.event = ECORE_IWARP_EVENT_RQ_EMPTY; + event_cb = true; + break; + case IWARP_EXCEPTION_DETECTED_IRQ_FULL: + params.event = ECORE_IWARP_EVENT_IRQ_FULL; + event_cb = true; + break; + case IWARP_EXCEPTION_DETECTED_LLP_TIMEOUT: + params.event = ECORE_IWARP_EVENT_LLP_TIMEOUT; + event_cb = true; + break; + case IWARP_EXCEPTION_DETECTED_REMOTE_PROTECTION_ERROR: + params.event = ECORE_IWARP_EVENT_REMOTE_PROTECTION_ERROR; + event_cb = true; + break; + case IWARP_EXCEPTION_DETECTED_CQ_OVERFLOW: + params.event = ECORE_IWARP_EVENT_CQ_OVERFLOW; + event_cb = true; + break; + case IWARP_EXCEPTION_DETECTED_LOCAL_CATASTROPHIC: + params.event = ECORE_IWARP_EVENT_QP_CATASTROPHIC; + event_cb = true; + break; + case IWARP_EXCEPTION_DETECTED_LOCAL_ACCESS_ERROR: + params.event = ECORE_IWARP_EVENT_LOCAL_ACCESS_ERROR; + event_cb = true; + break; + case IWARP_EXCEPTION_DETECTED_REMOTE_OPERATION_ERROR: + params.event = ECORE_IWARP_EVENT_REMOTE_OPERATION_ERROR; + event_cb = true; + break; + case IWARP_EXCEPTION_DETECTED_TERMINATE_RECEIVED: + params.event = ECORE_IWARP_EVENT_TERMINATE_RECEIVED; + event_cb = true; + break; + default: + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "Unhandled exception received...\n"); + break; + } + + if (event_cb) { + params.ep_context = ep; + params.cm_info = &ep->cm_info; + ep->event_cb(ep->cb_context, ¶ms); + } +} + +static void +ecore_iwarp_tcp_connect_unsuccessful(struct ecore_hwfn *p_hwfn, + struct ecore_iwarp_ep *ep, + u8 fw_return_code) +{ + struct ecore_iwarp_cm_event_params params; + + OSAL_MEM_ZERO(¶ms, sizeof(params)); + params.event = ECORE_IWARP_EVENT_ACTIVE_COMPLETE; + params.ep_context = ep; + params.cm_info = &ep->cm_info; + ep->state = ECORE_IWARP_EP_CLOSED; + + switch (fw_return_code) { + case IWARP_CONN_ERROR_TCP_CONNECT_INVALID_PACKET: + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "%s(0x%x) TCP connect got invalid packet\n", + ECORE_IWARP_CONNECT_MODE_STRING(ep), + ep->tcp_cid); + params.status = ECORE_CONN_RESET; + break; + case IWARP_CONN_ERROR_TCP_CONNECTION_RST: + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "%s(0x%x) TCP Connection Reset\n", + ECORE_IWARP_CONNECT_MODE_STRING(ep), + ep->tcp_cid); + params.status = ECORE_CONN_RESET; + break; + case IWARP_CONN_ERROR_TCP_CONNECT_TIMEOUT: + DP_NOTICE(p_hwfn, false, "%s(0x%x) TCP timeout\n", + ECORE_IWARP_CONNECT_MODE_STRING(ep), + ep->tcp_cid); + params.status = ECORE_TIMEOUT; + break; + case IWARP_CONN_ERROR_MPA_NOT_SUPPORTED_VER: + DP_NOTICE(p_hwfn, false, "%s(0x%x) MPA not supported VER\n", + ECORE_IWARP_CONNECT_MODE_STRING(ep), + ep->tcp_cid); + params.status = ECORE_CONN_REFUSED; + break; + case IWARP_CONN_ERROR_MPA_INVALID_PACKET: + DP_NOTICE(p_hwfn, false, "%s(0x%x) MPA Invalid Packet\n", + ECORE_IWARP_CONNECT_MODE_STRING(ep), ep->tcp_cid); + params.status = ECORE_CONN_RESET; + break; + default: + DP_ERR(p_hwfn, "%s(0x%x) Unexpected return code tcp connect: %d\n", + ECORE_IWARP_CONNECT_MODE_STRING(ep), ep->tcp_cid, + fw_return_code); + params.status = ECORE_CONN_RESET; + break; + } + + if (ep->connect_mode == TCP_CONNECT_PASSIVE) { + ep->tcp_cid = ECORE_IWARP_INVALID_TCP_CID; + ecore_iwarp_return_ep(p_hwfn, ep); + } else { + ep->event_cb(ep->cb_context, ¶ms); + OSAL_SPIN_LOCK(&p_hwfn->p_rdma_info->iwarp.iw_lock); + OSAL_LIST_REMOVE_ENTRY(&ep->list_entry, + &p_hwfn->p_rdma_info->iwarp.ep_list); + OSAL_SPIN_UNLOCK(&p_hwfn->p_rdma_info->iwarp.iw_lock); + } +} + +static void +ecore_iwarp_connect_complete(struct ecore_hwfn *p_hwfn, + struct ecore_iwarp_ep *ep, + u8 fw_return_code) +{ + if (ep->connect_mode == TCP_CONNECT_PASSIVE) { + /* Done with the SYN packet, post back to ll2 rx */ + ecore_iwarp_ll2_post_rx( + p_hwfn, ep->syn, + p_hwfn->p_rdma_info->iwarp.ll2_syn_handle); + + ep->syn = OSAL_NULL; + + if (ep->state == ECORE_IWARP_EP_ABORTING) + return; + + /* If connect failed - upper layer doesn't know about it */ + if (fw_return_code == RDMA_RETURN_OK) + ecore_iwarp_mpa_received(p_hwfn, ep); + else + ecore_iwarp_tcp_connect_unsuccessful(p_hwfn, ep, + fw_return_code); + + } else { + if (fw_return_code == RDMA_RETURN_OK) + ecore_iwarp_mpa_offload(p_hwfn, ep); + else + ecore_iwarp_tcp_connect_unsuccessful(p_hwfn, ep, + fw_return_code); + } +} + +static OSAL_INLINE bool +ecore_iwarp_check_ep_ok(struct ecore_hwfn *p_hwfn, + struct ecore_iwarp_ep *ep) +{ + if (ep == OSAL_NULL) { + DP_ERR(p_hwfn, "ERROR ON ASYNC ep=%p\n", ep); + return false; + } + + if (ep->sig != 0xdeadbeef) { + DP_ERR(p_hwfn, "ERROR ON ASYNC ep=%p\n", ep); + return false; + } + + return true; +} + +static enum _ecore_status_t +ecore_iwarp_async_event(struct ecore_hwfn *p_hwfn, + u8 fw_event_code, + u16 OSAL_UNUSED echo, + union event_ring_data *data, + u8 fw_return_code) +{ + struct regpair *fw_handle = &data->rdma_data.async_handle; + struct ecore_iwarp_ep *ep = OSAL_NULL; + u16 cid; + + ep = (struct ecore_iwarp_ep *)(osal_uintptr_t)HILO_64(fw_handle->hi, + fw_handle->lo); + + switch (fw_event_code) { + /* Async completion after TCP 3-way handshake */ + case IWARP_EVENT_TYPE_ASYNC_CONNECT_COMPLETE: + if (!ecore_iwarp_check_ep_ok(p_hwfn, ep)) + return ECORE_INVAL; + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "EP(0x%x) IWARP_EVENT_TYPE_ASYNC_CONNECT_COMPLETE fw_ret_code=%d\n", + ep->tcp_cid, fw_return_code); + ecore_iwarp_connect_complete(p_hwfn, ep, fw_return_code); + break; + case IWARP_EVENT_TYPE_ASYNC_EXCEPTION_DETECTED: + if (!ecore_iwarp_check_ep_ok(p_hwfn, ep)) + return ECORE_INVAL; + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "QP(0x%x) IWARP_EVENT_TYPE_ASYNC_EXCEPTION_DETECTED fw_ret_code=%d\n", + ep->cid, fw_return_code); + ecore_iwarp_exception_received(p_hwfn, ep, fw_return_code); + break; + /* Async completion for Close Connection ramrod */ + case IWARP_EVENT_TYPE_ASYNC_QP_IN_ERROR_STATE: + if (!ecore_iwarp_check_ep_ok(p_hwfn, ep)) + return ECORE_INVAL; + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "QP(0x%x) IWARP_EVENT_TYPE_ASYNC_QP_IN_ERROR_STATE fw_ret_code=%d\n", + ep->cid, fw_return_code); + ecore_iwarp_qp_in_error(p_hwfn, ep, fw_return_code); + break; + /* Async event for active side only */ + case IWARP_EVENT_TYPE_ASYNC_ENHANCED_MPA_REPLY_ARRIVED: + if (!ecore_iwarp_check_ep_ok(p_hwfn, ep)) + return ECORE_INVAL; + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "QP(0x%x) IWARP_EVENT_TYPE_ASYNC_MPA_HANDSHAKE_MPA_REPLY_ARRIVED fw_ret_code=%d\n", + ep->cid, fw_return_code); + ecore_iwarp_mpa_reply_arrived(p_hwfn, ep); + break; + /* MPA Negotiations completed */ + case IWARP_EVENT_TYPE_ASYNC_MPA_HANDSHAKE_COMPLETE: + if (!ecore_iwarp_check_ep_ok(p_hwfn, ep)) + return ECORE_INVAL; + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "QP(0x%x) IWARP_EVENT_TYPE_ASYNC_MPA_HANDSHAKE_COMPLETE fw_ret_code=%d\n", + ep->cid, fw_return_code); + ecore_iwarp_mpa_complete(p_hwfn, ep, fw_return_code); + break; + case IWARP_EVENT_TYPE_ASYNC_CID_CLEANED: + cid = (u16)OSAL_LE32_TO_CPU(fw_handle->lo); + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "(0x%x)IWARP_EVENT_TYPE_ASYNC_CID_CLEANED\n", + cid); + ecore_iwarp_cid_cleaned(p_hwfn, cid); + + break; + case IWARP_EVENT_TYPE_ASYNC_CQ_OVERFLOW: + DP_NOTICE(p_hwfn, false, + "IWARP_EVENT_TYPE_ASYNC_CQ_OVERFLOW\n"); + + p_hwfn->p_rdma_info->events.affiliated_event( + p_hwfn->p_rdma_info->events.context, + ECORE_IWARP_EVENT_CQ_OVERFLOW, + (void *)fw_handle); + break; + default: + DP_ERR(p_hwfn, "Received unexpected async iwarp event %d\n", + fw_event_code); + return ECORE_INVAL; + } + return ECORE_SUCCESS; +} + +enum _ecore_status_t +ecore_iwarp_create_listen(void *rdma_cxt, + struct ecore_iwarp_listen_in *iparams, + struct ecore_iwarp_listen_out *oparams) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)rdma_cxt; + struct ecore_iwarp_listener *listener; + + listener = OSAL_ZALLOC(p_hwfn->p_dev, GFP_KERNEL, sizeof(*listener)); + + if (!listener) { + DP_NOTICE(p_hwfn, + false, + "ecore iwarp create listener failed: cannot allocate memory (listener). rc = %d\n", + ECORE_NOMEM); + return ECORE_NOMEM; + } + listener->ip_version = iparams->ip_version; + OSAL_MEMCPY(listener->ip_addr, + iparams->ip_addr, + sizeof(listener->ip_addr)); + listener->port = iparams->port; + listener->vlan = iparams->vlan; + + listener->event_cb = iparams->event_cb; + listener->cb_context = iparams->cb_context; + listener->max_backlog = iparams->max_backlog; + listener->state = ECORE_IWARP_LISTENER_STATE_ACTIVE; + oparams->handle = listener; + + OSAL_SPIN_LOCK_INIT(&listener->lock); + OSAL_LIST_INIT(&listener->ep_list); + OSAL_SPIN_LOCK(&p_hwfn->p_rdma_info->iwarp.iw_lock); + OSAL_LIST_PUSH_TAIL(&listener->list_entry, + &p_hwfn->p_rdma_info->iwarp.listen_list); + OSAL_SPIN_UNLOCK(&p_hwfn->p_rdma_info->iwarp.iw_lock); + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "callback=%p handle=%p ip=%x:%x:%x:%x port=0x%x vlan=0x%x\n", + listener->event_cb, + listener, + listener->ip_addr[0], + listener->ip_addr[1], + listener->ip_addr[2], + listener->ip_addr[3], + listener->port, + listener->vlan); + + return ECORE_SUCCESS; +} + +static void +ecore_iwarp_pause_complete(struct ecore_iwarp_listener *listener) +{ + struct ecore_iwarp_cm_event_params params; + + if (listener->state == ECORE_IWARP_LISTENER_STATE_UNPAUSE) + listener->state = ECORE_IWARP_LISTENER_STATE_ACTIVE; + + params.event = ECORE_IWARP_EVENT_LISTEN_PAUSE_COMP; + listener->event_cb(listener->cb_context, ¶ms); +} + +static void +ecore_iwarp_tcp_abort_comp(struct ecore_hwfn *p_hwfn, void *cookie, + union event_ring_data OSAL_UNUSED *data, + u8 OSAL_UNUSED fw_return_code) +{ + struct ecore_iwarp_ep *ep = (struct ecore_iwarp_ep *)cookie; + struct ecore_iwarp_listener *listener = ep->listener; + + ecore_iwarp_return_ep(p_hwfn, ep); + + if (OSAL_LIST_IS_EMPTY(&listener->ep_list)) + listener->done = true; +} + +static void +ecore_iwarp_abort_inflight_connections(struct ecore_hwfn *p_hwfn, + struct ecore_iwarp_listener *listener) +{ + struct ecore_spq_entry *p_ent = OSAL_NULL; + struct ecore_iwarp_ep *ep = OSAL_NULL; + struct ecore_sp_init_data init_data; + struct ecore_spq_comp_cb comp_data; + enum _ecore_status_t rc; + + /* remove listener from list before destroying listener */ + OSAL_LIST_REMOVE_ENTRY(&listener->list_entry, + &p_hwfn->p_rdma_info->iwarp.listen_list); + if (OSAL_LIST_IS_EMPTY(&listener->ep_list)) { + listener->done = true; + return; + } + OSAL_MEMSET(&init_data, 0, sizeof(init_data)); + init_data.p_comp_data = &comp_data; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = ECORE_SPQ_MODE_CB; + init_data.p_comp_data->function = ecore_iwarp_tcp_abort_comp; + + OSAL_LIST_FOR_EACH_ENTRY(ep, &listener->ep_list, + list_entry, struct ecore_iwarp_ep) { + ep->state = ECORE_IWARP_EP_ABORTING; + init_data.p_comp_data->cookie = ep; + init_data.cid = ep->tcp_cid; + rc = ecore_sp_init_request(p_hwfn, &p_ent, + IWARP_RAMROD_CMD_ID_ABORT_TCP_OFFLOAD, + PROTOCOLID_IWARP, + &init_data); + if (rc == ECORE_SUCCESS) + ecore_spq_post(p_hwfn, p_ent, OSAL_NULL); + } +} + +static void +ecore_iwarp_listener_state_transition(struct ecore_hwfn *p_hwfn, void *cookie, + union event_ring_data OSAL_UNUSED *data, + u8 OSAL_UNUSED fw_return_code) +{ + struct ecore_iwarp_listener *listener = (struct ecore_iwarp_listener *)cookie; + + switch (listener->state) { + case ECORE_IWARP_LISTENER_STATE_PAUSE: + case ECORE_IWARP_LISTENER_STATE_UNPAUSE: + ecore_iwarp_pause_complete(listener); + break; + case ECORE_IWARP_LISTENER_STATE_DESTROYING: + ecore_iwarp_abort_inflight_connections(p_hwfn, listener); + break; + default: + break; + } +} + +static enum _ecore_status_t +ecore_iwarp_empty_ramrod(struct ecore_hwfn *p_hwfn, + struct ecore_iwarp_listener *listener) +{ + struct ecore_spq_entry *p_ent = OSAL_NULL; + struct ecore_spq_comp_cb comp_data; + struct ecore_sp_init_data init_data; + enum _ecore_status_t rc; + + OSAL_MEMSET(&init_data, 0, sizeof(init_data)); + init_data.p_comp_data = &comp_data; + init_data.cid = ecore_spq_get_cid(p_hwfn); + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = ECORE_SPQ_MODE_CB; + init_data.p_comp_data->function = ecore_iwarp_listener_state_transition; + init_data.p_comp_data->cookie = listener; + rc = ecore_sp_init_request(p_hwfn, &p_ent, + COMMON_RAMROD_EMPTY, + PROTOCOLID_COMMON, + &init_data); + if (rc != ECORE_SUCCESS) + return rc; + + rc = ecore_spq_post(p_hwfn, p_ent, OSAL_NULL); + if (rc != ECORE_SUCCESS) + return rc; + + return rc; +} + +enum _ecore_status_t +ecore_iwarp_pause_listen(void *rdma_cxt, void *handle, + bool pause, bool comp) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)rdma_cxt; + struct ecore_iwarp_listener *listener = + (struct ecore_iwarp_listener *)handle; + enum _ecore_status_t rc; + + listener->state = pause ? + ECORE_IWARP_LISTENER_STATE_PAUSE : + ECORE_IWARP_LISTENER_STATE_UNPAUSE; + if (!comp) + return ECORE_SUCCESS; + + rc = ecore_iwarp_empty_ramrod(p_hwfn, listener); + if (rc != ECORE_SUCCESS) + return rc; + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "listener=%p, state=%d\n", + listener, listener->state); + + return ECORE_PENDING; +} + +enum _ecore_status_t +ecore_iwarp_destroy_listen(void *rdma_cxt, void *handle) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)rdma_cxt; + struct ecore_iwarp_listener *listener = + (struct ecore_iwarp_listener *)handle; + enum _ecore_status_t rc; + int wait_count = 0; + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "handle=%p\n", handle); + + listener->state = ECORE_IWARP_LISTENER_STATE_DESTROYING; + rc = ecore_iwarp_empty_ramrod(p_hwfn, listener); + if (rc != ECORE_SUCCESS) + return rc; + + while (!listener->done) { + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "Waiting for ep list to be empty...\n"); + OSAL_MSLEEP(100); + if (wait_count++ > 200) { + DP_NOTICE(p_hwfn, false, "ep list close timeout\n"); + break; + } + } + + OSAL_FREE(p_hwfn->p_dev, listener); + + return ECORE_SUCCESS; +} + +enum _ecore_status_t +ecore_iwarp_send_rtr(void *rdma_cxt, struct ecore_iwarp_send_rtr_in *iparams) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)rdma_cxt; + struct ecore_sp_init_data init_data; + struct ecore_spq_entry *p_ent; + struct ecore_rdma_qp *qp; + struct ecore_iwarp_ep *ep; + enum _ecore_status_t rc; + + ep = (struct ecore_iwarp_ep *)iparams->ep_context; + if (!ep) { + DP_ERR(p_hwfn, "Ep Context receive in send_rtr is NULL\n"); + return ECORE_INVAL; + } + + qp = ep->qp; + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "QP(0x%x) EP(0x%x)\n", + qp->icid, ep->tcp_cid); + + OSAL_MEMSET(&init_data, 0, sizeof(init_data)); + init_data.cid = qp->icid; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = ECORE_SPQ_MODE_CB; + + rc = ecore_sp_init_request(p_hwfn, &p_ent, + IWARP_RAMROD_CMD_ID_MPA_OFFLOAD_SEND_RTR, + PROTOCOLID_IWARP, &init_data); + + if (rc != ECORE_SUCCESS) + return rc; + + rc = ecore_spq_post(p_hwfn, p_ent, OSAL_NULL); + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "ecore_iwarp_send_rtr, rc = 0x%x\n", + rc); + + return rc; +} + +enum _ecore_status_t +ecore_iwarp_query_qp(struct ecore_rdma_qp *qp, + struct ecore_rdma_query_qp_out_params *out_params) +{ + out_params->state = ecore_iwarp2roce_state(qp->iwarp_state); + return ECORE_SUCCESS; +} + +#ifdef _NTDDK_ +#pragma warning(pop) +#endif diff --git a/sys/dev/qlnx/qlnxe/ecore_ll2.c b/sys/dev/qlnx/qlnxe/ecore_ll2.c new file mode 100644 index 000000000000..95b31d3bebc9 --- /dev/null +++ b/sys/dev/qlnx/qlnxe/ecore_ll2.c @@ -0,0 +1,2211 @@ +/* + * Copyright (c) 2018-2019 Cavium, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * File : ecore_ll2.c + */ +#include +__FBSDID("$FreeBSD$"); + +#include "bcm_osal.h" + +#include "ecore.h" +#include "ecore_status.h" +#include "ecore_ll2.h" +#include "reg_addr.h" +#include "ecore_int.h" +#include "ecore_cxt.h" +#include "ecore_sp_commands.h" +#include "ecore_hw.h" +#include "reg_addr.h" +#include "ecore_dev_api.h" +#include "ecore_iro.h" +#include "ecore_gtt_reg_addr.h" +#include "ecore_ooo.h" +#include "ecore_hw.h" +#include "ecore_mcp.h" + +#define ECORE_LL2_RX_REGISTERED(ll2) ((ll2)->rx_queue.b_cb_registred) +#define ECORE_LL2_TX_REGISTERED(ll2) ((ll2)->tx_queue.b_cb_registred) + +#ifdef _NTDDK_ +#pragma warning(push) +#pragma warning(disable : 28167) +#pragma warning(disable : 28123) +#pragma warning(disable : 28121) +#endif + +static struct ecore_ll2_info * +__ecore_ll2_handle_sanity(struct ecore_hwfn *p_hwfn, + u8 connection_handle, + bool b_lock, bool b_only_active) +{ + struct ecore_ll2_info *p_ll2_conn, *p_ret = OSAL_NULL; + + if (connection_handle >= ECORE_MAX_NUM_OF_LL2_CONNECTIONS) + return OSAL_NULL; + + if (!p_hwfn->p_ll2_info) + return OSAL_NULL; + + /* TODO - is there really need for the locked vs. unlocked + * variant? I simply used what was already there. + */ + p_ll2_conn = &p_hwfn->p_ll2_info[connection_handle]; + + if (b_only_active) { + if (b_lock) + OSAL_MUTEX_ACQUIRE(&p_ll2_conn->mutex); + if (p_ll2_conn->b_active) + p_ret = p_ll2_conn; + if (b_lock) + OSAL_MUTEX_RELEASE(&p_ll2_conn->mutex); + } else { + p_ret = p_ll2_conn; + } + + return p_ret; +} + +static struct ecore_ll2_info * +ecore_ll2_handle_sanity(struct ecore_hwfn *p_hwfn, + u8 connection_handle) +{ + return __ecore_ll2_handle_sanity(p_hwfn, connection_handle, + false, true); +} + +static struct ecore_ll2_info * +ecore_ll2_handle_sanity_lock(struct ecore_hwfn *p_hwfn, + u8 connection_handle) +{ + return __ecore_ll2_handle_sanity(p_hwfn, connection_handle, + true, true); +} + +static struct ecore_ll2_info * +ecore_ll2_handle_sanity_inactive(struct ecore_hwfn *p_hwfn, + u8 connection_handle) +{ + return __ecore_ll2_handle_sanity(p_hwfn, connection_handle, + false, false); +} + +#ifndef LINUX_REMOVE +/* TODO - is this really been used by anyone? Is it a on future todo list? */ +enum _ecore_status_t +ecore_ll2_get_fragment_of_tx_packet(struct ecore_hwfn *p_hwfn, + u8 connection_handle, + dma_addr_t *p_addr, + bool *b_last_fragment) +{ + struct ecore_ll2_tx_packet *p_pkt; + struct ecore_ll2_info *p_ll2_conn; + u16 cur_frag_idx = 0; + + p_ll2_conn = ecore_ll2_handle_sanity(p_hwfn, connection_handle); + if (p_ll2_conn == OSAL_NULL) + return ECORE_INVAL; + p_pkt = &p_ll2_conn->tx_queue.cur_completing_packet; + + if (!p_ll2_conn->tx_queue.b_completing_packet || !p_addr) + return ECORE_INVAL; + + if (p_ll2_conn->tx_queue.cur_completing_bd_idx == p_pkt->bd_used) + return ECORE_INVAL; + + /* Packet is available and has at least one more frag - provide it */ + cur_frag_idx = p_ll2_conn->tx_queue.cur_completing_bd_idx++; + *p_addr = p_pkt->bds_set[cur_frag_idx].tx_frag; + if (b_last_fragment) + *b_last_fragment = p_pkt->bd_used == + p_ll2_conn->tx_queue.cur_completing_bd_idx; + + return ECORE_SUCCESS; +} +#endif + +static void ecore_ll2_txq_flush(struct ecore_hwfn *p_hwfn, + u8 connection_handle) +{ + bool b_last_packet = false, b_last_frag = false; + struct ecore_ll2_tx_packet *p_pkt = OSAL_NULL; + struct ecore_ll2_info *p_ll2_conn; + struct ecore_ll2_tx_queue *p_tx; + unsigned long flags = 0; + dma_addr_t tx_frag; + + p_ll2_conn = ecore_ll2_handle_sanity_inactive(p_hwfn, + connection_handle); + if (p_ll2_conn == OSAL_NULL) + return; + p_tx = &p_ll2_conn->tx_queue; + + OSAL_SPIN_LOCK_IRQSAVE(&p_tx->lock, flags); + while (!OSAL_LIST_IS_EMPTY(&p_tx->active_descq)) { + p_pkt = OSAL_LIST_FIRST_ENTRY(&p_tx->active_descq, + struct ecore_ll2_tx_packet, + list_entry); + + if (p_pkt == OSAL_NULL) + break; + +#if defined(_NTDDK_) +#pragma warning(suppress : 6011 28182) +#endif + OSAL_LIST_REMOVE_ENTRY(&p_pkt->list_entry, + &p_tx->active_descq); + b_last_packet = OSAL_LIST_IS_EMPTY(&p_tx->active_descq); + OSAL_LIST_PUSH_TAIL(&p_pkt->list_entry, + &p_tx->free_descq); + OSAL_SPIN_UNLOCK_IRQSAVE(&p_tx->lock, flags); + if (p_ll2_conn->input.conn_type == ECORE_LL2_TYPE_OOO) { + struct ecore_ooo_buffer *p_buffer; + + p_buffer = (struct ecore_ooo_buffer *)p_pkt->cookie; + ecore_ooo_put_free_buffer(p_hwfn->p_ooo_info, p_buffer); + } else { + p_tx->cur_completing_packet = *p_pkt; + p_tx->cur_completing_bd_idx = 1; + b_last_frag = p_tx->cur_completing_bd_idx == + p_pkt->bd_used; + + tx_frag = p_pkt->bds_set[0].tx_frag; + p_ll2_conn->cbs.tx_release_cb(p_ll2_conn->cbs.cookie, + p_ll2_conn->my_id, + p_pkt->cookie, + tx_frag, + b_last_frag, + b_last_packet); + } + OSAL_SPIN_LOCK_IRQSAVE(&p_tx->lock, flags); + } + OSAL_SPIN_UNLOCK_IRQSAVE(&p_tx->lock, flags); +} + +static enum _ecore_status_t +ecore_ll2_txq_completion(struct ecore_hwfn *p_hwfn, + void *p_cookie) +{ + struct ecore_ll2_info *p_ll2_conn = (struct ecore_ll2_info*)p_cookie; + struct ecore_ll2_tx_queue *p_tx = &p_ll2_conn->tx_queue; + u16 new_idx = 0, num_bds = 0, num_bds_in_packet = 0; + struct ecore_ll2_tx_packet *p_pkt; + bool b_last_frag = false; + unsigned long flags; + enum _ecore_status_t rc = ECORE_INVAL; + + OSAL_SPIN_LOCK_IRQSAVE(&p_tx->lock, flags); + if (p_tx->b_completing_packet) { + /* TODO - this looks completely unnecessary to me - the only + * way we can re-enter is by the DPC calling us again, but this + * would only happen AFTER we return, and we unset this at end + * of the function. + */ + rc = ECORE_BUSY; + goto out; + } + + new_idx = OSAL_LE16_TO_CPU(*p_tx->p_fw_cons); + num_bds = ((s16)new_idx - (s16)p_tx->bds_idx); + while (num_bds) { + if (OSAL_LIST_IS_EMPTY(&p_tx->active_descq)) + goto out; + + p_pkt = OSAL_LIST_FIRST_ENTRY(&p_tx->active_descq, + struct ecore_ll2_tx_packet, + list_entry); + if (!p_pkt) + goto out; + + p_tx->b_completing_packet = true; + p_tx->cur_completing_packet = *p_pkt; + num_bds_in_packet = p_pkt->bd_used; +#if defined(_NTDDK_) +#pragma warning(suppress : 6011 28182) +#endif + OSAL_LIST_REMOVE_ENTRY(&p_pkt->list_entry, + &p_tx->active_descq); + + if (num_bds < num_bds_in_packet) { + DP_NOTICE(p_hwfn, true, + "Rest of BDs does not cover whole packet\n"); + goto out; + } + + num_bds -= num_bds_in_packet; + p_tx->bds_idx += num_bds_in_packet; + while (num_bds_in_packet--) + ecore_chain_consume(&p_tx->txq_chain); + + p_tx->cur_completing_bd_idx = 1; + b_last_frag = p_tx->cur_completing_bd_idx == + p_pkt->bd_used; + OSAL_LIST_PUSH_TAIL(&p_pkt->list_entry, + &p_tx->free_descq); + + OSAL_SPIN_UNLOCK_IRQSAVE(&p_tx->lock, flags); + + p_ll2_conn->cbs.tx_comp_cb(p_ll2_conn->cbs.cookie, + p_ll2_conn->my_id, + p_pkt->cookie, + p_pkt->bds_set[0].tx_frag, + b_last_frag, + !num_bds); + + OSAL_SPIN_LOCK_IRQSAVE(&p_tx->lock, flags); + } + + p_tx->b_completing_packet = false; + rc = ECORE_SUCCESS; +out: + OSAL_SPIN_UNLOCK_IRQSAVE(&p_tx->lock, flags); + return rc; +} + +static void ecore_ll2_rxq_parse_gsi(union core_rx_cqe_union *p_cqe, + struct ecore_ll2_comp_rx_data *data) +{ + data->parse_flags = + OSAL_LE16_TO_CPU(p_cqe->rx_cqe_gsi.parse_flags.flags); + data->length.data_length = + OSAL_LE16_TO_CPU(p_cqe->rx_cqe_gsi.data_length); + data->vlan = + OSAL_LE16_TO_CPU(p_cqe->rx_cqe_gsi.vlan); + data->opaque_data_0 = + OSAL_LE32_TO_CPU(p_cqe->rx_cqe_gsi.src_mac_addrhi); + data->opaque_data_1 = + OSAL_LE16_TO_CPU(p_cqe->rx_cqe_gsi.src_mac_addrlo); + data->u.data_length_error = + p_cqe->rx_cqe_gsi.data_length_error; + data->qp_id = OSAL_LE16_TO_CPU(p_cqe->rx_cqe_gsi.qp_id); + + data->src_qp = OSAL_LE32_TO_CPU(p_cqe->rx_cqe_gsi.src_qp); +} + +static void ecore_ll2_rxq_parse_reg(union core_rx_cqe_union *p_cqe, + struct ecore_ll2_comp_rx_data *data) +{ + data->parse_flags = + OSAL_LE16_TO_CPU(p_cqe->rx_cqe_fp.parse_flags.flags); + data->err_flags = + OSAL_LE16_TO_CPU(p_cqe->rx_cqe_fp.err_flags.flags); + data->length.packet_length = + OSAL_LE16_TO_CPU(p_cqe->rx_cqe_fp.packet_length); + data->vlan = + OSAL_LE16_TO_CPU(p_cqe->rx_cqe_fp.vlan); + data->opaque_data_0 = + OSAL_LE32_TO_CPU(p_cqe->rx_cqe_fp.opaque_data.data[0]); + data->opaque_data_1 = + OSAL_LE32_TO_CPU(p_cqe->rx_cqe_fp.opaque_data.data[1]); + data->u.placement_offset = + p_cqe->rx_cqe_fp.placement_offset; +} + +#if defined(_NTDDK_) +#pragma warning(suppress : 28167 26110) +#endif +static enum _ecore_status_t +ecore_ll2_handle_slowpath(struct ecore_hwfn *p_hwfn, + struct ecore_ll2_info *p_ll2_conn, + union core_rx_cqe_union *p_cqe, + unsigned long *p_lock_flags) +{ + struct ecore_ll2_rx_queue *p_rx = &p_ll2_conn->rx_queue; + struct core_rx_slow_path_cqe *sp_cqe; + + sp_cqe = &p_cqe->rx_cqe_sp; + if (sp_cqe->ramrod_cmd_id != CORE_RAMROD_RX_QUEUE_FLUSH) { + DP_NOTICE(p_hwfn, true, + "LL2 - unexpected Rx CQE slowpath ramrod_cmd_id:%d\n", + sp_cqe->ramrod_cmd_id); + return ECORE_INVAL; + } + + if (p_ll2_conn->cbs.slowpath_cb == OSAL_NULL) { + DP_NOTICE(p_hwfn, true, + "LL2 - received RX_QUEUE_FLUSH but no callback was provided\n"); + return ECORE_INVAL; + } + + OSAL_SPIN_UNLOCK_IRQSAVE(&p_rx->lock, *p_lock_flags); + + p_ll2_conn->cbs.slowpath_cb(p_ll2_conn->cbs.cookie, + p_ll2_conn->my_id, + OSAL_LE32_TO_CPU(sp_cqe->opaque_data.data[0]), + OSAL_LE32_TO_CPU(sp_cqe->opaque_data.data[1])); + + OSAL_SPIN_LOCK_IRQSAVE(&p_rx->lock, *p_lock_flags); + + return ECORE_SUCCESS; +} + +static enum _ecore_status_t +ecore_ll2_rxq_handle_completion(struct ecore_hwfn *p_hwfn, + struct ecore_ll2_info *p_ll2_conn, + union core_rx_cqe_union *p_cqe, + unsigned long *p_lock_flags, + bool b_last_cqe) +{ + struct ecore_ll2_rx_queue *p_rx = &p_ll2_conn->rx_queue; + struct ecore_ll2_rx_packet *p_pkt = OSAL_NULL; + struct ecore_ll2_comp_rx_data data; + + if (!OSAL_LIST_IS_EMPTY(&p_rx->active_descq)) + p_pkt = OSAL_LIST_FIRST_ENTRY(&p_rx->active_descq, + struct ecore_ll2_rx_packet, + list_entry); + if (!p_pkt) { + DP_NOTICE(p_hwfn, false, + "[%d] LL2 Rx completion but active_descq is empty\n", + p_ll2_conn->input.conn_type); + + return ECORE_IO; + } + + OSAL_LIST_REMOVE_ENTRY(&p_pkt->list_entry, &p_rx->active_descq); + + if (p_cqe->rx_cqe_sp.type == CORE_RX_CQE_TYPE_REGULAR) + ecore_ll2_rxq_parse_reg(p_cqe, &data); + else + ecore_ll2_rxq_parse_gsi(p_cqe, &data); + + if (ecore_chain_consume(&p_rx->rxq_chain) != p_pkt->rxq_bd) { + DP_NOTICE(p_hwfn, false, + "Mismatch between active_descq and the LL2 Rx chain\n"); + /* TODO - didn't return error value since this wasn't handled + * before, but this is obviously lacking. + */ + } + + OSAL_LIST_PUSH_TAIL(&p_pkt->list_entry, &p_rx->free_descq); + + data.connection_handle = p_ll2_conn->my_id; + data.cookie = p_pkt->cookie; + data.rx_buf_addr = p_pkt->rx_buf_addr; + data.b_last_packet = b_last_cqe; + + OSAL_SPIN_UNLOCK_IRQSAVE(&p_rx->lock, *p_lock_flags); + p_ll2_conn->cbs.rx_comp_cb(p_ll2_conn->cbs.cookie, + &data); + + OSAL_SPIN_LOCK_IRQSAVE(&p_rx->lock, *p_lock_flags); + + return ECORE_SUCCESS; +} + +static enum _ecore_status_t ecore_ll2_rxq_completion(struct ecore_hwfn *p_hwfn, + void *cookie) +{ + struct ecore_ll2_info *p_ll2_conn = (struct ecore_ll2_info*)cookie; + struct ecore_ll2_rx_queue *p_rx = &p_ll2_conn->rx_queue; + union core_rx_cqe_union *cqe = OSAL_NULL; + u16 cq_new_idx = 0, cq_old_idx = 0; + unsigned long flags = 0; + enum _ecore_status_t rc = ECORE_SUCCESS; + + OSAL_SPIN_LOCK_IRQSAVE(&p_rx->lock, flags); + cq_new_idx = OSAL_LE16_TO_CPU(*p_rx->p_fw_cons); + cq_old_idx = ecore_chain_get_cons_idx(&p_rx->rcq_chain); + + while (cq_new_idx != cq_old_idx) { + bool b_last_cqe = (cq_new_idx == cq_old_idx); + + cqe = (union core_rx_cqe_union *)ecore_chain_consume(&p_rx->rcq_chain); + cq_old_idx = ecore_chain_get_cons_idx(&p_rx->rcq_chain); + + DP_VERBOSE(p_hwfn, ECORE_MSG_LL2, + "LL2 [sw. cons %04x, fw. at %04x] - Got Packet of type %02x\n", + cq_old_idx, cq_new_idx, cqe->rx_cqe_sp.type); + + switch (cqe->rx_cqe_sp.type) { + case CORE_RX_CQE_TYPE_SLOW_PATH: + rc = ecore_ll2_handle_slowpath(p_hwfn, p_ll2_conn, + cqe, &flags); + break; + case CORE_RX_CQE_TYPE_GSI_OFFLOAD: + case CORE_RX_CQE_TYPE_REGULAR: + rc = ecore_ll2_rxq_handle_completion(p_hwfn, p_ll2_conn, + cqe, &flags, + b_last_cqe); + break; + default: + rc = ECORE_IO; + } + } + + OSAL_SPIN_UNLOCK_IRQSAVE(&p_rx->lock, flags); + return rc; +} + +static void ecore_ll2_rxq_flush(struct ecore_hwfn *p_hwfn, + u8 connection_handle) +{ + struct ecore_ll2_info *p_ll2_conn = OSAL_NULL; + struct ecore_ll2_rx_packet *p_pkt = OSAL_NULL; + struct ecore_ll2_rx_queue *p_rx; + unsigned long flags = 0; + + p_ll2_conn = ecore_ll2_handle_sanity_inactive(p_hwfn, + connection_handle); + if (p_ll2_conn == OSAL_NULL) + return; + p_rx = &p_ll2_conn->rx_queue; + + OSAL_SPIN_LOCK_IRQSAVE(&p_rx->lock, flags); + while (!OSAL_LIST_IS_EMPTY(&p_rx->active_descq)) { + bool b_last; + p_pkt = OSAL_LIST_FIRST_ENTRY(&p_rx->active_descq, + struct ecore_ll2_rx_packet, + list_entry); + if (p_pkt == OSAL_NULL) + break; +#if defined(_NTDDK_) +#pragma warning(suppress : 6011 28182) +#endif + OSAL_LIST_REMOVE_ENTRY(&p_pkt->list_entry, + &p_rx->active_descq); + OSAL_LIST_PUSH_TAIL(&p_pkt->list_entry, + &p_rx->free_descq); + b_last = OSAL_LIST_IS_EMPTY(&p_rx->active_descq); + OSAL_SPIN_UNLOCK_IRQSAVE(&p_rx->lock, flags); + + if (p_ll2_conn->input.conn_type == ECORE_LL2_TYPE_OOO) { + struct ecore_ooo_buffer *p_buffer; + + p_buffer = (struct ecore_ooo_buffer *)p_pkt->cookie; + ecore_ooo_put_free_buffer(p_hwfn->p_ooo_info, p_buffer); + } else { + dma_addr_t rx_buf_addr = p_pkt->rx_buf_addr; + void *cookie = p_pkt->cookie; + + p_ll2_conn->cbs.rx_release_cb(p_ll2_conn->cbs.cookie, + p_ll2_conn->my_id, + cookie, + rx_buf_addr, + b_last); + } + OSAL_SPIN_LOCK_IRQSAVE(&p_rx->lock, flags); + } + OSAL_SPIN_UNLOCK_IRQSAVE(&p_rx->lock, flags); +} + +static bool +ecore_ll2_lb_rxq_handler_slowpath(struct ecore_hwfn *p_hwfn, + struct core_rx_slow_path_cqe *p_cqe) +{ + struct ooo_opaque *iscsi_ooo; + u32 cid; + + if (p_cqe->ramrod_cmd_id != CORE_RAMROD_RX_QUEUE_FLUSH) + return false; + + iscsi_ooo = (struct ooo_opaque *)&p_cqe->opaque_data; + if (iscsi_ooo->ooo_opcode != TCP_EVENT_DELETE_ISLES) + return false; + + /* Need to make a flush */ + cid = OSAL_LE32_TO_CPU(iscsi_ooo->cid); + ecore_ooo_release_connection_isles(p_hwfn->p_ooo_info, cid); + + return true; +} + +static enum _ecore_status_t +ecore_ll2_lb_rxq_handler(struct ecore_hwfn *p_hwfn, + struct ecore_ll2_info *p_ll2_conn) +{ + struct ecore_ll2_rx_queue *p_rx = &p_ll2_conn->rx_queue; + u16 packet_length = 0, parse_flags = 0, vlan = 0; + struct ecore_ll2_rx_packet *p_pkt = OSAL_NULL; + u32 num_ooo_add_to_peninsula = 0, cid; + union core_rx_cqe_union *cqe = OSAL_NULL; + u16 cq_new_idx = 0, cq_old_idx = 0; + struct ecore_ooo_buffer *p_buffer; + struct ooo_opaque *iscsi_ooo; + u8 placement_offset = 0; + u8 cqe_type; + + cq_new_idx = OSAL_LE16_TO_CPU(*p_rx->p_fw_cons); + cq_old_idx = ecore_chain_get_cons_idx(&p_rx->rcq_chain); + if (cq_new_idx == cq_old_idx) + return ECORE_SUCCESS; + + while (cq_new_idx != cq_old_idx) { + struct core_rx_fast_path_cqe *p_cqe_fp; + + cqe = (union core_rx_cqe_union *)ecore_chain_consume(&p_rx->rcq_chain); + cq_old_idx = ecore_chain_get_cons_idx(&p_rx->rcq_chain); + cqe_type = cqe->rx_cqe_sp.type; + + if (cqe_type == CORE_RX_CQE_TYPE_SLOW_PATH) + if (ecore_ll2_lb_rxq_handler_slowpath(p_hwfn, + &cqe->rx_cqe_sp)) + continue; + + if (cqe_type != CORE_RX_CQE_TYPE_REGULAR) { + DP_NOTICE(p_hwfn, true, + "Got a non-regular LB LL2 completion [type 0x%02x]\n", + cqe_type); + return ECORE_INVAL; + } + p_cqe_fp = &cqe->rx_cqe_fp; + + placement_offset = p_cqe_fp->placement_offset; + parse_flags = OSAL_LE16_TO_CPU(p_cqe_fp->parse_flags.flags); + packet_length = OSAL_LE16_TO_CPU(p_cqe_fp->packet_length); + vlan = OSAL_LE16_TO_CPU(p_cqe_fp->vlan); + iscsi_ooo = (struct ooo_opaque *)&p_cqe_fp->opaque_data; + ecore_ooo_save_history_entry(p_hwfn->p_ooo_info, iscsi_ooo); + cid = OSAL_LE32_TO_CPU(iscsi_ooo->cid); + + /* Process delete isle first*/ + if (iscsi_ooo->drop_size) + ecore_ooo_delete_isles(p_hwfn, p_hwfn->p_ooo_info, cid, + iscsi_ooo->drop_isle, + iscsi_ooo->drop_size); + + if (iscsi_ooo->ooo_opcode == TCP_EVENT_NOP) + continue; + + /* Now process create/add/join isles */ + if (OSAL_LIST_IS_EMPTY(&p_rx->active_descq)) { + DP_NOTICE(p_hwfn, true, + "LL2 OOO RX chain has no submitted buffers\n"); + return ECORE_IO; + } + + p_pkt = OSAL_LIST_FIRST_ENTRY(&p_rx->active_descq, + struct ecore_ll2_rx_packet, + list_entry); + + if ((iscsi_ooo->ooo_opcode == TCP_EVENT_ADD_NEW_ISLE) || + (iscsi_ooo->ooo_opcode == TCP_EVENT_ADD_ISLE_RIGHT) || + (iscsi_ooo->ooo_opcode == TCP_EVENT_ADD_ISLE_LEFT) || + (iscsi_ooo->ooo_opcode == TCP_EVENT_ADD_PEN) || + (iscsi_ooo->ooo_opcode == TCP_EVENT_JOIN)) { + if (!p_pkt) { + DP_NOTICE(p_hwfn, true, + "LL2 OOO RX packet is not valid\n"); + return ECORE_IO; + } +#if defined(_NTDDK_) +#pragma warning(suppress : 6011 28182) +#endif + OSAL_LIST_REMOVE_ENTRY(&p_pkt->list_entry, + &p_rx->active_descq); + p_buffer = (struct ecore_ooo_buffer *)p_pkt->cookie; + p_buffer->packet_length = packet_length; + p_buffer->parse_flags = parse_flags; + p_buffer->vlan = vlan; + p_buffer->placement_offset = placement_offset; + if (ecore_chain_consume(&p_rx->rxq_chain) != + p_pkt->rxq_bd) { + /**/ + } + ecore_ooo_dump_rx_event(p_hwfn, iscsi_ooo, p_buffer); + OSAL_LIST_PUSH_TAIL(&p_pkt->list_entry, + &p_rx->free_descq); + + switch (iscsi_ooo->ooo_opcode) { + case TCP_EVENT_ADD_NEW_ISLE: + ecore_ooo_add_new_isle(p_hwfn, + p_hwfn->p_ooo_info, + cid, + iscsi_ooo->ooo_isle, + p_buffer); + break; + case TCP_EVENT_ADD_ISLE_RIGHT: + ecore_ooo_add_new_buffer(p_hwfn, + p_hwfn->p_ooo_info, + cid, + iscsi_ooo->ooo_isle, + p_buffer, + ECORE_OOO_RIGHT_BUF); + break; + case TCP_EVENT_ADD_ISLE_LEFT: + ecore_ooo_add_new_buffer(p_hwfn, + p_hwfn->p_ooo_info, + cid, + iscsi_ooo->ooo_isle, + p_buffer, + ECORE_OOO_LEFT_BUF); + break; + case TCP_EVENT_JOIN: + ecore_ooo_add_new_buffer(p_hwfn, + p_hwfn->p_ooo_info, + cid, + iscsi_ooo->ooo_isle + + 1, + p_buffer, + ECORE_OOO_LEFT_BUF); + ecore_ooo_join_isles(p_hwfn, + p_hwfn->p_ooo_info, + cid, + iscsi_ooo->ooo_isle); + break; + case TCP_EVENT_ADD_PEN: + num_ooo_add_to_peninsula++; + ecore_ooo_put_ready_buffer(p_hwfn->p_ooo_info, + p_buffer, true); + break; + } + } else { + DP_NOTICE(p_hwfn, true, + "Unexpected event (%d) TX OOO completion\n", + iscsi_ooo->ooo_opcode); + } + } + + return ECORE_SUCCESS; +} + +static void +ecore_ooo_submit_tx_buffers(struct ecore_hwfn *p_hwfn, + struct ecore_ll2_info *p_ll2_conn) +{ + struct ecore_ll2_tx_pkt_info tx_pkt; + struct ecore_ooo_buffer *p_buffer; + dma_addr_t first_frag; + u16 l4_hdr_offset_w; + u8 bd_flags; + enum _ecore_status_t rc; + + /* Submit Tx buffers here */ + while ((p_buffer = ecore_ooo_get_ready_buffer(p_hwfn->p_ooo_info))) { + l4_hdr_offset_w = 0; + bd_flags = 0; + + first_frag = p_buffer->rx_buffer_phys_addr + + p_buffer->placement_offset; + SET_FIELD(bd_flags, CORE_TX_BD_DATA_FORCE_VLAN_MODE, 1); + SET_FIELD(bd_flags, CORE_TX_BD_DATA_L4_PROTOCOL, 1); + + OSAL_MEM_ZERO(&tx_pkt, sizeof(tx_pkt)); + tx_pkt.num_of_bds = 1; + tx_pkt.vlan = p_buffer->vlan; + tx_pkt.bd_flags = bd_flags; + tx_pkt.l4_hdr_offset_w = l4_hdr_offset_w; + tx_pkt.tx_dest = (enum ecore_ll2_tx_dest)p_ll2_conn->tx_dest; + tx_pkt.first_frag = first_frag; + tx_pkt.first_frag_len = p_buffer->packet_length; + tx_pkt.cookie = p_buffer; + + rc = ecore_ll2_prepare_tx_packet(p_hwfn, p_ll2_conn->my_id, + &tx_pkt, true); + if (rc != ECORE_SUCCESS) { + ecore_ooo_put_ready_buffer(p_hwfn->p_ooo_info, + p_buffer, false); + break; + } + } +} + +static void +ecore_ooo_submit_rx_buffers(struct ecore_hwfn *p_hwfn, + struct ecore_ll2_info *p_ll2_conn) +{ + struct ecore_ooo_buffer *p_buffer; + enum _ecore_status_t rc; + + while ((p_buffer = ecore_ooo_get_free_buffer(p_hwfn->p_ooo_info))) { + rc = ecore_ll2_post_rx_buffer(p_hwfn, + p_ll2_conn->my_id, + p_buffer->rx_buffer_phys_addr, + 0, p_buffer, true); + if (rc != ECORE_SUCCESS) { + ecore_ooo_put_free_buffer(p_hwfn->p_ooo_info, p_buffer); + break; + } + } +} + +static enum _ecore_status_t +ecore_ll2_lb_rxq_completion(struct ecore_hwfn *p_hwfn, + void *p_cookie) +{ + struct ecore_ll2_info *p_ll2_conn = (struct ecore_ll2_info *)p_cookie; + enum _ecore_status_t rc; + + rc = ecore_ll2_lb_rxq_handler(p_hwfn, p_ll2_conn); + if (rc != ECORE_SUCCESS) + return rc; + + ecore_ooo_submit_rx_buffers(p_hwfn, p_ll2_conn); + ecore_ooo_submit_tx_buffers(p_hwfn, p_ll2_conn); + + return 0; +} + +static enum _ecore_status_t +ecore_ll2_lb_txq_completion(struct ecore_hwfn *p_hwfn, + void *p_cookie) +{ + struct ecore_ll2_info *p_ll2_conn = (struct ecore_ll2_info *)p_cookie; + struct ecore_ll2_tx_queue *p_tx = &p_ll2_conn->tx_queue; + struct ecore_ll2_tx_packet *p_pkt = OSAL_NULL; + struct ecore_ooo_buffer *p_buffer; + bool b_dont_submit_rx = false; + u16 new_idx = 0, num_bds = 0; + enum _ecore_status_t rc; + + new_idx = OSAL_LE16_TO_CPU(*p_tx->p_fw_cons); + num_bds = ((s16)new_idx - (s16)p_tx->bds_idx); + + if (!num_bds) + return ECORE_SUCCESS; + + while (num_bds) { + + if (OSAL_LIST_IS_EMPTY(&p_tx->active_descq)) + return ECORE_INVAL; + + p_pkt = OSAL_LIST_FIRST_ENTRY(&p_tx->active_descq, + struct ecore_ll2_tx_packet, + list_entry); + if (!p_pkt) + return ECORE_INVAL; + + if (p_pkt->bd_used != 1) { + DP_NOTICE(p_hwfn, true, + "Unexpectedly many BDs(%d) in TX OOO completion\n", + p_pkt->bd_used); + return ECORE_INVAL; + } + + OSAL_LIST_REMOVE_ENTRY(&p_pkt->list_entry, + &p_tx->active_descq); + + num_bds--; + p_tx->bds_idx++; + ecore_chain_consume(&p_tx->txq_chain); + + p_buffer = (struct ecore_ooo_buffer *)p_pkt->cookie; + OSAL_LIST_PUSH_TAIL(&p_pkt->list_entry, + &p_tx->free_descq); + + if (b_dont_submit_rx) { + ecore_ooo_put_free_buffer(p_hwfn->p_ooo_info, p_buffer); + continue; + } + + rc = ecore_ll2_post_rx_buffer(p_hwfn, p_ll2_conn->my_id, + p_buffer->rx_buffer_phys_addr, 0, + p_buffer, true); + if (rc != ECORE_SUCCESS) { + ecore_ooo_put_free_buffer(p_hwfn->p_ooo_info, p_buffer); + b_dont_submit_rx = true; + } + } + + ecore_ooo_submit_tx_buffers(p_hwfn, p_ll2_conn); + + return ECORE_SUCCESS; +} + +static enum _ecore_status_t ecore_sp_ll2_rx_queue_start(struct ecore_hwfn *p_hwfn, + struct ecore_ll2_info *p_ll2_conn, + u8 action_on_error) +{ + enum ecore_ll2_conn_type conn_type = p_ll2_conn->input.conn_type; + struct ecore_ll2_rx_queue *p_rx = &p_ll2_conn->rx_queue; + struct core_rx_start_ramrod_data *p_ramrod = OSAL_NULL; + struct ecore_spq_entry *p_ent = OSAL_NULL; + struct ecore_sp_init_data init_data; + u16 cqe_pbl_size; + enum _ecore_status_t rc = ECORE_SUCCESS; + + /* Get SPQ entry */ + OSAL_MEMSET(&init_data, 0, sizeof(init_data)); + init_data.cid = p_ll2_conn->cid; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = ECORE_SPQ_MODE_EBLOCK; + + rc = ecore_sp_init_request(p_hwfn, &p_ent, + CORE_RAMROD_RX_QUEUE_START, + PROTOCOLID_CORE, &init_data); + if (rc != ECORE_SUCCESS) + return rc; + + p_ramrod = &p_ent->ramrod.core_rx_queue_start; + + p_ramrod->sb_id = OSAL_CPU_TO_LE16(ecore_int_get_sp_sb_id(p_hwfn)); + p_ramrod->sb_index = p_rx->rx_sb_index; + p_ramrod->complete_event_flg = 1; + + p_ramrod->mtu = OSAL_CPU_TO_LE16(p_ll2_conn->input.mtu); + DMA_REGPAIR_LE(p_ramrod->bd_base, + p_rx->rxq_chain.p_phys_addr); + cqe_pbl_size = (u16)ecore_chain_get_page_cnt(&p_rx->rcq_chain); + p_ramrod->num_of_pbl_pages = OSAL_CPU_TO_LE16(cqe_pbl_size); + DMA_REGPAIR_LE(p_ramrod->cqe_pbl_addr, + ecore_chain_get_pbl_phys(&p_rx->rcq_chain)); + + p_ramrod->drop_ttl0_flg = p_ll2_conn->input.rx_drop_ttl0_flg; + p_ramrod->inner_vlan_stripping_en = + p_ll2_conn->input.rx_vlan_removal_en; + + if (OSAL_TEST_BIT(ECORE_MF_UFP_SPECIFIC, &p_hwfn->p_dev->mf_bits) && + (p_ll2_conn->input.conn_type == ECORE_LL2_TYPE_FCOE)) + p_ramrod->report_outer_vlan = 1; + p_ramrod->queue_id = p_ll2_conn->queue_id; + p_ramrod->main_func_queue = p_ll2_conn->main_func_queue; + + if (OSAL_TEST_BIT(ECORE_MF_LL2_NON_UNICAST, + &p_hwfn->p_dev->mf_bits) && + p_ramrod->main_func_queue && + ((conn_type != ECORE_LL2_TYPE_ROCE) && + (conn_type != ECORE_LL2_TYPE_IWARP))) { + p_ramrod->mf_si_bcast_accept_all = 1; + p_ramrod->mf_si_mcast_accept_all = 1; + } else { + p_ramrod->mf_si_bcast_accept_all = 0; + p_ramrod->mf_si_mcast_accept_all = 0; + } + + p_ramrod->action_on_error.error_type = action_on_error; + p_ramrod->gsi_offload_flag = p_ll2_conn->input.gsi_enable; + return ecore_spq_post(p_hwfn, p_ent, OSAL_NULL); +} + +static enum _ecore_status_t ecore_sp_ll2_tx_queue_start(struct ecore_hwfn *p_hwfn, + struct ecore_ll2_info *p_ll2_conn) +{ + enum ecore_ll2_conn_type conn_type = p_ll2_conn->input.conn_type; + struct ecore_ll2_tx_queue *p_tx = &p_ll2_conn->tx_queue; + struct core_tx_start_ramrod_data *p_ramrod = OSAL_NULL; + struct ecore_spq_entry *p_ent = OSAL_NULL; + struct ecore_sp_init_data init_data; + u16 pq_id = 0, pbl_size; + enum _ecore_status_t rc = ECORE_NOTIMPL; + + if (!ECORE_LL2_TX_REGISTERED(p_ll2_conn)) + return ECORE_SUCCESS; + + if (p_ll2_conn->input.conn_type == ECORE_LL2_TYPE_OOO) + p_ll2_conn->tx_stats_en = 0; + else + p_ll2_conn->tx_stats_en = 1; + + /* Get SPQ entry */ + OSAL_MEMSET(&init_data, 0, sizeof(init_data)); + init_data.cid = p_ll2_conn->cid; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = ECORE_SPQ_MODE_EBLOCK; + + rc = ecore_sp_init_request(p_hwfn, &p_ent, + CORE_RAMROD_TX_QUEUE_START, + PROTOCOLID_CORE, &init_data); + if (rc != ECORE_SUCCESS) + return rc; + + p_ramrod = &p_ent->ramrod.core_tx_queue_start; + + p_ramrod->sb_id = OSAL_CPU_TO_LE16(ecore_int_get_sp_sb_id(p_hwfn)); + p_ramrod->sb_index = p_tx->tx_sb_index; + p_ramrod->mtu = OSAL_CPU_TO_LE16(p_ll2_conn->input.mtu); + p_ramrod->stats_en = p_ll2_conn->tx_stats_en; + p_ramrod->stats_id = p_ll2_conn->tx_stats_id; + + DMA_REGPAIR_LE(p_ramrod->pbl_base_addr, + ecore_chain_get_pbl_phys(&p_tx->txq_chain)); + pbl_size = (u16)ecore_chain_get_page_cnt(&p_tx->txq_chain); + p_ramrod->pbl_size = OSAL_CPU_TO_LE16(pbl_size); + + /* TODO RESC_ALLOC pq for ll2 */ + switch (p_ll2_conn->input.tx_tc) { + case PURE_LB_TC: + pq_id = ecore_get_cm_pq_idx(p_hwfn, PQ_FLAGS_LB); + break; + case PKT_LB_TC: + pq_id = ecore_get_cm_pq_idx(p_hwfn, PQ_FLAGS_OOO); + break; + default: + pq_id = ecore_get_cm_pq_idx(p_hwfn, PQ_FLAGS_OFLD); + } + + p_ramrod->qm_pq_id = OSAL_CPU_TO_LE16(pq_id); + + switch (conn_type) { + case ECORE_LL2_TYPE_FCOE: + p_ramrod->conn_type = PROTOCOLID_FCOE; + break; + case ECORE_LL2_TYPE_ISCSI: + p_ramrod->conn_type = PROTOCOLID_ISCSI; + break; + case ECORE_LL2_TYPE_ROCE: + p_ramrod->conn_type = PROTOCOLID_ROCE; + break; + case ECORE_LL2_TYPE_IWARP: + p_ramrod->conn_type = PROTOCOLID_IWARP; + break; + case ECORE_LL2_TYPE_OOO: + if (p_hwfn->hw_info.personality == ECORE_PCI_ISCSI) { + p_ramrod->conn_type = PROTOCOLID_ISCSI; + } else { + p_ramrod->conn_type = PROTOCOLID_IWARP; + } + break; + default: + p_ramrod->conn_type = PROTOCOLID_ETH; + DP_NOTICE(p_hwfn, false, "Unknown connection type: %d\n", + conn_type); + } + + p_ramrod->gsi_offload_flag = p_ll2_conn->input.gsi_enable; + + rc = ecore_spq_post(p_hwfn, p_ent, OSAL_NULL); + if (rc != ECORE_SUCCESS) + return rc; + + rc = ecore_db_recovery_add(p_hwfn->p_dev, p_tx->doorbell_addr, + &p_tx->db_msg, DB_REC_WIDTH_32B, + DB_REC_KERNEL); + return rc; +} + +static enum _ecore_status_t ecore_sp_ll2_rx_queue_stop(struct ecore_hwfn *p_hwfn, + struct ecore_ll2_info *p_ll2_conn) +{ + struct core_rx_stop_ramrod_data *p_ramrod = OSAL_NULL; + struct ecore_spq_entry *p_ent = OSAL_NULL; + struct ecore_sp_init_data init_data; + enum _ecore_status_t rc = ECORE_NOTIMPL; + + /* Get SPQ entry */ + OSAL_MEMSET(&init_data, 0, sizeof(init_data)); + init_data.cid = p_ll2_conn->cid; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = ECORE_SPQ_MODE_EBLOCK; + + rc = ecore_sp_init_request(p_hwfn, &p_ent, + CORE_RAMROD_RX_QUEUE_STOP, + PROTOCOLID_CORE, &init_data); + if (rc != ECORE_SUCCESS) + return rc; + + p_ramrod = &p_ent->ramrod.core_rx_queue_stop; + + p_ramrod->complete_event_flg = 1; + p_ramrod->queue_id = p_ll2_conn->queue_id; + + return ecore_spq_post(p_hwfn, p_ent, OSAL_NULL); +} + +static enum _ecore_status_t ecore_sp_ll2_tx_queue_stop(struct ecore_hwfn *p_hwfn, + struct ecore_ll2_info *p_ll2_conn) +{ + struct ecore_ll2_tx_queue *p_tx = &p_ll2_conn->tx_queue; + struct ecore_spq_entry *p_ent = OSAL_NULL; + struct ecore_sp_init_data init_data; + enum _ecore_status_t rc = ECORE_NOTIMPL; + + ecore_db_recovery_del(p_hwfn->p_dev, p_tx->doorbell_addr, + &p_tx->db_msg); + + /* Get SPQ entry */ + OSAL_MEMSET(&init_data, 0, sizeof(init_data)); + init_data.cid = p_ll2_conn->cid; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = ECORE_SPQ_MODE_EBLOCK; + + rc = ecore_sp_init_request(p_hwfn, &p_ent, + CORE_RAMROD_TX_QUEUE_STOP, + PROTOCOLID_CORE, &init_data); + if (rc != ECORE_SUCCESS) + return rc; + + return ecore_spq_post(p_hwfn, p_ent, OSAL_NULL); +} + +static enum _ecore_status_t +ecore_ll2_acquire_connection_rx(struct ecore_hwfn *p_hwfn, + struct ecore_ll2_info *p_ll2_info) +{ + struct ecore_ll2_rx_packet *p_descq; + u32 capacity; + enum _ecore_status_t rc = ECORE_SUCCESS; + + if (!p_ll2_info->input.rx_num_desc) + goto out; + + rc = ecore_chain_alloc(p_hwfn->p_dev, + ECORE_CHAIN_USE_TO_CONSUME_PRODUCE, + ECORE_CHAIN_MODE_NEXT_PTR, + ECORE_CHAIN_CNT_TYPE_U16, + p_ll2_info->input.rx_num_desc, + sizeof(struct core_rx_bd), + &p_ll2_info->rx_queue.rxq_chain, OSAL_NULL); + if (rc) { + DP_NOTICE(p_hwfn, false, + "Failed to allocate ll2 rxq chain\n"); + goto out; + } + + capacity = ecore_chain_get_capacity(&p_ll2_info->rx_queue.rxq_chain); + p_descq = OSAL_ZALLOC(p_hwfn->p_dev, GFP_KERNEL, + capacity * sizeof(struct ecore_ll2_rx_packet)); + if (!p_descq) { + rc = ECORE_NOMEM; + DP_NOTICE(p_hwfn, false, + "Failed to allocate ll2 Rx desc\n"); + goto out; + } + p_ll2_info->rx_queue.descq_array = p_descq; + + rc = ecore_chain_alloc(p_hwfn->p_dev, + ECORE_CHAIN_USE_TO_CONSUME_PRODUCE, + ECORE_CHAIN_MODE_PBL, + ECORE_CHAIN_CNT_TYPE_U16, + p_ll2_info->input.rx_num_desc, + sizeof(struct core_rx_fast_path_cqe), + &p_ll2_info->rx_queue.rcq_chain, OSAL_NULL); + if (rc != ECORE_SUCCESS) { + DP_NOTICE(p_hwfn, false, + "Failed to allocate ll2 rcq chain\n"); + goto out; + } + + DP_VERBOSE(p_hwfn, ECORE_MSG_LL2, + "Allocated LL2 Rxq [Type %08x] with 0x%08x buffers\n", + p_ll2_info->input.conn_type, + p_ll2_info->input.rx_num_desc); + +out: + return rc; +} + +static enum _ecore_status_t +ecore_ll2_acquire_connection_tx(struct ecore_hwfn *p_hwfn, + struct ecore_ll2_info *p_ll2_info) +{ + struct ecore_ll2_tx_packet *p_descq; + u32 capacity; + enum _ecore_status_t rc = ECORE_SUCCESS; + u32 desc_size; + + if (!p_ll2_info->input.tx_num_desc) + goto out; + + rc = ecore_chain_alloc(p_hwfn->p_dev, + ECORE_CHAIN_USE_TO_CONSUME_PRODUCE, + ECORE_CHAIN_MODE_PBL, + ECORE_CHAIN_CNT_TYPE_U16, + p_ll2_info->input.tx_num_desc, + sizeof(struct core_tx_bd), + &p_ll2_info->tx_queue.txq_chain, OSAL_NULL); + if (rc != ECORE_SUCCESS) + goto out; + + capacity = ecore_chain_get_capacity(&p_ll2_info->tx_queue.txq_chain); + desc_size = (sizeof(*p_descq) + + (p_ll2_info->input.tx_max_bds_per_packet - 1) * + sizeof(p_descq->bds_set)); + + p_descq = OSAL_ZALLOC(p_hwfn->p_dev, GFP_KERNEL, + capacity * desc_size); + if (!p_descq) { + rc = ECORE_NOMEM; + goto out; + } + p_ll2_info->tx_queue.descq_array = p_descq; + + DP_VERBOSE(p_hwfn, ECORE_MSG_LL2, + "Allocated LL2 Txq [Type %08x] with 0x%08x buffers\n", + p_ll2_info->input.conn_type, + p_ll2_info->input.tx_num_desc); + +out: + if (rc != ECORE_SUCCESS) + DP_NOTICE(p_hwfn, false, + "Can't allocate memory for Tx LL2 with 0x%08x buffers\n", + p_ll2_info->input.tx_num_desc); + return rc; +} + +static enum _ecore_status_t +ecore_ll2_acquire_connection_ooo(struct ecore_hwfn *p_hwfn, + struct ecore_ll2_info *p_ll2_info, u16 mtu) +{ + struct ecore_ooo_buffer *p_buf = OSAL_NULL; + u32 rx_buffer_size = 0; + void *p_virt; + u16 buf_idx; + enum _ecore_status_t rc = ECORE_SUCCESS; + + if (p_ll2_info->input.conn_type != ECORE_LL2_TYPE_OOO) + return rc; + + /* Correct number of requested OOO buffers if needed */ + if (!p_ll2_info->input.rx_num_ooo_buffers) { + u16 num_desc = p_ll2_info->input.rx_num_desc; + + if (!num_desc) + return ECORE_INVAL; + p_ll2_info->input.rx_num_ooo_buffers = num_desc * 2; + } + + /* TODO - use some defines for buffer size */ + rx_buffer_size = mtu + 14 + 4 + 8 + ETH_CACHE_LINE_SIZE; + rx_buffer_size = (rx_buffer_size + ETH_CACHE_LINE_SIZE - 1) & + ~(ETH_CACHE_LINE_SIZE - 1); + + for (buf_idx = 0; buf_idx < p_ll2_info->input.rx_num_ooo_buffers; + buf_idx++) { + p_buf = OSAL_ZALLOC(p_hwfn->p_dev, GFP_KERNEL, sizeof(*p_buf)); + if (!p_buf) { + DP_NOTICE(p_hwfn, false, + "Failed to allocate ooo descriptor\n"); + rc = ECORE_NOMEM; + goto out; + } + + p_buf->rx_buffer_size = rx_buffer_size; + p_virt = OSAL_DMA_ALLOC_COHERENT(p_hwfn->p_dev, + &p_buf->rx_buffer_phys_addr, + p_buf->rx_buffer_size); + if (!p_virt) { + DP_NOTICE(p_hwfn, false, + "Failed to allocate ooo buffer\n"); + OSAL_FREE(p_hwfn->p_dev, p_buf); + rc = ECORE_NOMEM; + goto out; + } + p_buf->rx_buffer_virt_addr = p_virt; + ecore_ooo_put_free_buffer(p_hwfn->p_ooo_info, p_buf); + } + + DP_VERBOSE(p_hwfn, ECORE_MSG_LL2, + "Allocated [%04x] LL2 OOO buffers [each of size 0x%08x]\n", + p_ll2_info->input.rx_num_ooo_buffers, rx_buffer_size); + +out: + return rc; +} + +static enum _ecore_status_t +ecore_ll2_set_cbs(struct ecore_ll2_info *p_ll2_info, + const struct ecore_ll2_cbs *cbs) +{ + if (!cbs || (!cbs->rx_comp_cb || + !cbs->rx_release_cb || + !cbs->tx_comp_cb || + !cbs->tx_release_cb || + !cbs->cookie)) + return ECORE_INVAL; + + p_ll2_info->cbs.rx_comp_cb = cbs->rx_comp_cb; + p_ll2_info->cbs.rx_release_cb = cbs->rx_release_cb; + p_ll2_info->cbs.tx_comp_cb = cbs->tx_comp_cb; + p_ll2_info->cbs.tx_release_cb = cbs->tx_release_cb; + p_ll2_info->cbs.slowpath_cb = cbs->slowpath_cb; + p_ll2_info->cbs.cookie = cbs->cookie; + + return ECORE_SUCCESS; +} + +static enum core_error_handle +ecore_ll2_get_error_choice(enum ecore_ll2_error_handle err) +{ + switch (err) { + case ECORE_LL2_DROP_PACKET: + return LL2_DROP_PACKET; + case ECORE_LL2_DO_NOTHING: + return LL2_DO_NOTHING; + case ECORE_LL2_ASSERT: + return LL2_ASSERT; + default: + return LL2_DO_NOTHING; + } +} + +enum _ecore_status_t +ecore_ll2_acquire_connection(void *cxt, + struct ecore_ll2_acquire_data *data) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)cxt; + ecore_int_comp_cb_t comp_rx_cb, comp_tx_cb; + struct ecore_ll2_info *p_ll2_info = OSAL_NULL; + enum _ecore_status_t rc; + u8 i, *p_tx_max; + + if (!data->p_connection_handle || !p_hwfn->p_ll2_info) { + DP_NOTICE(p_hwfn, false, "Invalid connection handle, ll2_info not allocated\n"); + return ECORE_INVAL; + } + + /* Find a free connection to be used */ + for (i = 0; (i < ECORE_MAX_NUM_OF_LL2_CONNECTIONS); i++) { + OSAL_MUTEX_ACQUIRE(&p_hwfn->p_ll2_info[i].mutex); + if (p_hwfn->p_ll2_info[i].b_active) { + OSAL_MUTEX_RELEASE(&p_hwfn->p_ll2_info[i].mutex); + continue; + } + + p_hwfn->p_ll2_info[i].b_active = true; + p_ll2_info = &p_hwfn->p_ll2_info[i]; + OSAL_MUTEX_RELEASE(&p_hwfn->p_ll2_info[i].mutex); + break; + } + if (p_ll2_info == OSAL_NULL) { + DP_NOTICE(p_hwfn, false, "No available ll2 connection\n"); + return ECORE_BUSY; + } + + OSAL_MEMCPY(&p_ll2_info->input, &data->input, + sizeof(p_ll2_info->input)); + + switch (data->input.tx_dest) { + case ECORE_LL2_TX_DEST_NW: + p_ll2_info->tx_dest = CORE_TX_DEST_NW; + break; + case ECORE_LL2_TX_DEST_LB: + p_ll2_info->tx_dest = CORE_TX_DEST_LB; + break; + case ECORE_LL2_TX_DEST_DROP: + p_ll2_info->tx_dest = CORE_TX_DEST_DROP; + break; + default: + return ECORE_INVAL; + } + + if ((data->input.conn_type == ECORE_LL2_TYPE_OOO) || + data->input.secondary_queue) + p_ll2_info->main_func_queue = false; + else + p_ll2_info->main_func_queue = true; + + /* Correct maximum number of Tx BDs */ + p_tx_max = &p_ll2_info->input.tx_max_bds_per_packet; + if (*p_tx_max == 0) + *p_tx_max = CORE_LL2_TX_MAX_BDS_PER_PACKET; + else + *p_tx_max = OSAL_MIN_T(u8, *p_tx_max, + CORE_LL2_TX_MAX_BDS_PER_PACKET); + + rc = ecore_ll2_set_cbs(p_ll2_info, data->cbs); + if (rc) { + DP_NOTICE(p_hwfn, false, "Invalid callback functions\n"); + goto q_allocate_fail; + } + + rc = ecore_ll2_acquire_connection_rx(p_hwfn, p_ll2_info); + if (rc != ECORE_SUCCESS) { + DP_NOTICE(p_hwfn, false, "ll2 acquire rx connection failed\n"); + goto q_allocate_fail; + } + + rc = ecore_ll2_acquire_connection_tx(p_hwfn, p_ll2_info); + if (rc != ECORE_SUCCESS) { + DP_NOTICE(p_hwfn, false, "ll2 acquire tx connection failed\n"); + goto q_allocate_fail; + } + + rc = ecore_ll2_acquire_connection_ooo(p_hwfn, p_ll2_info, + data->input.mtu); + if (rc != ECORE_SUCCESS) { + DP_NOTICE(p_hwfn, false, "ll2 acquire ooo connection failed\n"); + goto q_allocate_fail; + } + + /* Register callbacks for the Rx/Tx queues */ + if (data->input.conn_type == ECORE_LL2_TYPE_OOO) { + comp_rx_cb = ecore_ll2_lb_rxq_completion; + comp_tx_cb = ecore_ll2_lb_txq_completion; + + } else { + comp_rx_cb = ecore_ll2_rxq_completion; + comp_tx_cb = ecore_ll2_txq_completion; + } + + if (data->input.rx_num_desc) { + ecore_int_register_cb(p_hwfn, comp_rx_cb, + &p_hwfn->p_ll2_info[i], + &p_ll2_info->rx_queue.rx_sb_index, + &p_ll2_info->rx_queue.p_fw_cons); + p_ll2_info->rx_queue.b_cb_registred = true; + } + + if (data->input.tx_num_desc) { + ecore_int_register_cb(p_hwfn, + comp_tx_cb, + &p_hwfn->p_ll2_info[i], + &p_ll2_info->tx_queue.tx_sb_index, + &p_ll2_info->tx_queue.p_fw_cons); + p_ll2_info->tx_queue.b_cb_registred = true; + } + + *(data->p_connection_handle) = i; + return rc; + +q_allocate_fail: + ecore_ll2_release_connection(p_hwfn, i); + return ECORE_NOMEM; +} + +static enum _ecore_status_t ecore_ll2_establish_connection_rx(struct ecore_hwfn *p_hwfn, + struct ecore_ll2_info *p_ll2_conn) +{ + enum ecore_ll2_error_handle error_input; + enum core_error_handle error_mode; + u8 action_on_error = 0; + + if (!ECORE_LL2_RX_REGISTERED(p_ll2_conn)) + return ECORE_SUCCESS; + + DIRECT_REG_WR(p_hwfn, p_ll2_conn->rx_queue.set_prod_addr, 0x0); + error_input = p_ll2_conn->input.ai_err_packet_too_big; + error_mode = ecore_ll2_get_error_choice(error_input); + SET_FIELD(action_on_error, + CORE_RX_ACTION_ON_ERROR_PACKET_TOO_BIG, error_mode); + error_input = p_ll2_conn->input.ai_err_no_buf; + error_mode = ecore_ll2_get_error_choice(error_input); + SET_FIELD(action_on_error, + CORE_RX_ACTION_ON_ERROR_NO_BUFF, error_mode); + + return ecore_sp_ll2_rx_queue_start(p_hwfn, p_ll2_conn, action_on_error); +} + +static void +ecore_ll2_establish_connection_ooo(struct ecore_hwfn *p_hwfn, + struct ecore_ll2_info *p_ll2_conn) +{ + if (p_ll2_conn->input.conn_type != ECORE_LL2_TYPE_OOO) + return; + + ecore_ooo_release_all_isles(p_hwfn->p_ooo_info); + ecore_ooo_submit_rx_buffers(p_hwfn, p_ll2_conn); +} + +enum _ecore_status_t ecore_ll2_establish_connection(void *cxt, + u8 connection_handle) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)cxt; + struct e4_core_conn_context *p_cxt; + struct ecore_ll2_info *p_ll2_conn; + struct ecore_cxt_info cxt_info; + struct ecore_ll2_rx_queue *p_rx; + struct ecore_ll2_tx_queue *p_tx; + struct ecore_ll2_tx_packet *p_pkt; + struct ecore_ptt *p_ptt; + enum _ecore_status_t rc = ECORE_NOTIMPL; + u32 i, capacity; + u32 desc_size; + u8 qid; + + p_ptt = ecore_ptt_acquire(p_hwfn); + if (!p_ptt) + return ECORE_AGAIN; + + p_ll2_conn = ecore_ll2_handle_sanity_lock(p_hwfn, connection_handle); + if (p_ll2_conn == OSAL_NULL) { + rc = ECORE_INVAL; + goto out; + } + + p_rx = &p_ll2_conn->rx_queue; + p_tx = &p_ll2_conn->tx_queue; + + ecore_chain_reset(&p_rx->rxq_chain); + ecore_chain_reset(&p_rx->rcq_chain); + OSAL_LIST_INIT(&p_rx->active_descq); + OSAL_LIST_INIT(&p_rx->free_descq); + OSAL_LIST_INIT(&p_rx->posting_descq); + OSAL_SPIN_LOCK_INIT(&p_rx->lock); + capacity = ecore_chain_get_capacity(&p_rx->rxq_chain); + for (i = 0; i < capacity; i++) + OSAL_LIST_PUSH_TAIL(&p_rx->descq_array[i].list_entry, + &p_rx->free_descq); + *p_rx->p_fw_cons = 0; + + ecore_chain_reset(&p_tx->txq_chain); + OSAL_LIST_INIT(&p_tx->active_descq); + OSAL_LIST_INIT(&p_tx->free_descq); + OSAL_LIST_INIT(&p_tx->sending_descq); + OSAL_SPIN_LOCK_INIT(&p_tx->lock); + capacity = ecore_chain_get_capacity(&p_tx->txq_chain); + /* The size of the element in descq_array is flexible */ + desc_size = (sizeof(*p_pkt) + + (p_ll2_conn->input.tx_max_bds_per_packet - 1) * + sizeof(p_pkt->bds_set)); + + for (i = 0; i < capacity; i++) { + p_pkt = (struct ecore_ll2_tx_packet *)((u8 *)p_tx->descq_array + + desc_size*i); + OSAL_LIST_PUSH_TAIL(&p_pkt->list_entry, + &p_tx->free_descq); + } + p_tx->cur_completing_bd_idx = 0; + p_tx->bds_idx = 0; + p_tx->b_completing_packet = false; + p_tx->cur_send_packet = OSAL_NULL; + p_tx->cur_send_frag_num = 0; + p_tx->cur_completing_frag_num = 0; + *p_tx->p_fw_cons = 0; + + rc = ecore_cxt_acquire_cid(p_hwfn, PROTOCOLID_CORE, &p_ll2_conn->cid); + if (rc) + goto out; + cxt_info.iid = p_ll2_conn->cid; + rc = ecore_cxt_get_cid_info(p_hwfn, &cxt_info); + if (rc) { + DP_NOTICE(p_hwfn, true, "Cannot find context info for cid=%d\n", + p_ll2_conn->cid); + goto out; + } + + p_cxt = cxt_info.p_cxt; + + /* @@@TBD we zero the context until we have ilt_reset implemented. */ + OSAL_MEM_ZERO(p_cxt, sizeof(*p_cxt)); + + qid = ecore_ll2_handle_to_queue_id(p_hwfn, connection_handle); + p_ll2_conn->queue_id = qid; + p_ll2_conn->tx_stats_id = qid; + p_rx->set_prod_addr = (u8 OSAL_IOMEM*)p_hwfn->regview + + GTT_BAR0_MAP_REG_TSDM_RAM + + TSTORM_LL2_RX_PRODS_OFFSET(qid); + p_tx->doorbell_addr = (u8 OSAL_IOMEM*)p_hwfn->doorbells + + DB_ADDR(p_ll2_conn->cid, + DQ_DEMS_LEGACY); + + /* prepare db data */ + SET_FIELD(p_tx->db_msg.params, CORE_DB_DATA_DEST, DB_DEST_XCM); + SET_FIELD(p_tx->db_msg.params, CORE_DB_DATA_AGG_CMD, + DB_AGG_CMD_SET); + SET_FIELD(p_tx->db_msg.params, CORE_DB_DATA_AGG_VAL_SEL, + DQ_XCM_CORE_TX_BD_PROD_CMD); + p_tx->db_msg.agg_flags = DQ_XCM_CORE_DQ_CF_CMD; + + rc = ecore_ll2_establish_connection_rx(p_hwfn, p_ll2_conn); + if (rc) + goto out; + + rc = ecore_sp_ll2_tx_queue_start(p_hwfn, p_ll2_conn); + if (rc) + goto out; + + if (!ECORE_IS_RDMA_PERSONALITY(p_hwfn)) + ecore_wr(p_hwfn, p_ptt, PRS_REG_USE_LIGHT_L2, 1); + + ecore_ll2_establish_connection_ooo(p_hwfn, p_ll2_conn); + + if (p_ll2_conn->input.conn_type == ECORE_LL2_TYPE_FCOE) { + if (!OSAL_TEST_BIT(ECORE_MF_UFP_SPECIFIC, + &p_hwfn->p_dev->mf_bits)) + ecore_llh_add_protocol_filter(p_hwfn->p_dev, 0, + ECORE_LLH_FILTER_ETHERTYPE, + 0x8906, 0); + ecore_llh_add_protocol_filter(p_hwfn->p_dev, 0, + ECORE_LLH_FILTER_ETHERTYPE, + 0x8914, 0); + } + +out: + ecore_ptt_release(p_hwfn, p_ptt); + + return rc; +} + +static void ecore_ll2_post_rx_buffer_notify_fw(struct ecore_hwfn *p_hwfn, + struct ecore_ll2_rx_queue *p_rx, + struct ecore_ll2_rx_packet *p_curp) +{ + struct ecore_ll2_rx_packet *p_posting_packet = OSAL_NULL; + struct core_ll2_rx_prod rx_prod = {0, 0, 0}; + bool b_notify_fw = false; + u16 bd_prod, cq_prod; + + /* This handles the flushing of already posted buffers */ + while (!OSAL_LIST_IS_EMPTY(&p_rx->posting_descq)) { + p_posting_packet = OSAL_LIST_FIRST_ENTRY(&p_rx->posting_descq, + struct ecore_ll2_rx_packet, + list_entry); +#if defined(_NTDDK_) +#pragma warning(suppress : 6011 28182) +#endif + OSAL_LIST_REMOVE_ENTRY(&p_posting_packet->list_entry, &p_rx->posting_descq); + OSAL_LIST_PUSH_TAIL(&p_posting_packet->list_entry, &p_rx->active_descq); + b_notify_fw = true; + } + + /* This handles the supplied packet [if there is one] */ + if (p_curp) { + OSAL_LIST_PUSH_TAIL(&p_curp->list_entry, + &p_rx->active_descq); + b_notify_fw = true; + } + + if (!b_notify_fw) + return; + + bd_prod = ecore_chain_get_prod_idx(&p_rx->rxq_chain); + cq_prod = ecore_chain_get_prod_idx(&p_rx->rcq_chain); + rx_prod.bd_prod = OSAL_CPU_TO_LE16(bd_prod); + rx_prod.cqe_prod = OSAL_CPU_TO_LE16(cq_prod); + DIRECT_REG_WR(p_hwfn, p_rx->set_prod_addr, *((u32 *)&rx_prod)); +} + +enum _ecore_status_t ecore_ll2_post_rx_buffer(void *cxt, + u8 connection_handle, + dma_addr_t addr, + u16 buf_len, + void *cookie, + u8 notify_fw) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)cxt; + struct core_rx_bd_with_buff_len *p_curb = OSAL_NULL; + struct ecore_ll2_rx_packet *p_curp = OSAL_NULL; + struct ecore_ll2_info *p_ll2_conn; + struct ecore_ll2_rx_queue *p_rx; + unsigned long flags; + void *p_data; + enum _ecore_status_t rc = ECORE_SUCCESS; + + p_ll2_conn = ecore_ll2_handle_sanity(p_hwfn, connection_handle); + if (p_ll2_conn == OSAL_NULL) + return ECORE_INVAL; + p_rx = &p_ll2_conn->rx_queue; + if (p_rx->set_prod_addr == OSAL_NULL) + return ECORE_IO; + + OSAL_SPIN_LOCK_IRQSAVE(&p_rx->lock, flags); + if (!OSAL_LIST_IS_EMPTY(&p_rx->free_descq)) + p_curp = OSAL_LIST_FIRST_ENTRY(&p_rx->free_descq, + struct ecore_ll2_rx_packet, + list_entry); + if (p_curp) { + if (ecore_chain_get_elem_left(&p_rx->rxq_chain) && + ecore_chain_get_elem_left(&p_rx->rcq_chain)) { + p_data = ecore_chain_produce(&p_rx->rxq_chain); + p_curb = (struct core_rx_bd_with_buff_len *)p_data; + ecore_chain_produce(&p_rx->rcq_chain); + } + } + + /* If we're lacking entires, let's try to flush buffers to FW */ + if (!p_curp || !p_curb) { + rc = ECORE_BUSY; + p_curp = OSAL_NULL; + goto out_notify; + } + + /* We have an Rx packet we can fill */ + DMA_REGPAIR_LE(p_curb->addr, addr); + p_curb->buff_length = OSAL_CPU_TO_LE16(buf_len); + p_curp->rx_buf_addr = addr; + p_curp->cookie = cookie; + p_curp->rxq_bd = p_curb; + p_curp->buf_length = buf_len; + OSAL_LIST_REMOVE_ENTRY(&p_curp->list_entry, + &p_rx->free_descq); + + /* Check if we only want to enqueue this packet without informing FW */ + if (!notify_fw) { + OSAL_LIST_PUSH_TAIL(&p_curp->list_entry, + &p_rx->posting_descq); + goto out; + } + +out_notify: + ecore_ll2_post_rx_buffer_notify_fw(p_hwfn, p_rx, p_curp); +out: + OSAL_SPIN_UNLOCK_IRQSAVE(&p_rx->lock, flags); + return rc; +} + +static void ecore_ll2_prepare_tx_packet_set(struct ecore_ll2_tx_queue *p_tx, + struct ecore_ll2_tx_packet *p_curp, + struct ecore_ll2_tx_pkt_info *pkt, + u8 notify_fw) +{ + OSAL_LIST_REMOVE_ENTRY(&p_curp->list_entry, + &p_tx->free_descq); + p_curp->cookie = pkt->cookie; + p_curp->bd_used = pkt->num_of_bds; + p_curp->notify_fw = notify_fw; + p_tx->cur_send_packet = p_curp; + p_tx->cur_send_frag_num = 0; + + p_curp->bds_set[p_tx->cur_send_frag_num].tx_frag = pkt->first_frag; + p_curp->bds_set[p_tx->cur_send_frag_num].frag_len = pkt->first_frag_len; + p_tx->cur_send_frag_num++; +} + +static void ecore_ll2_prepare_tx_packet_set_bd( + struct ecore_hwfn *p_hwfn, + struct ecore_ll2_info *p_ll2, + struct ecore_ll2_tx_packet *p_curp, + struct ecore_ll2_tx_pkt_info *pkt) +{ + struct ecore_chain *p_tx_chain = &p_ll2->tx_queue.txq_chain; + u16 prod_idx = ecore_chain_get_prod_idx(p_tx_chain); + struct core_tx_bd *start_bd = OSAL_NULL; + enum core_roce_flavor_type roce_flavor; + enum core_tx_dest tx_dest; + u16 bd_data = 0, frag_idx; + + roce_flavor = (pkt->ecore_roce_flavor == ECORE_LL2_ROCE) ? + CORE_ROCE : CORE_RROCE; + + switch (pkt->tx_dest) { + case ECORE_LL2_TX_DEST_NW: + tx_dest = CORE_TX_DEST_NW; + break; + case ECORE_LL2_TX_DEST_LB: + tx_dest = CORE_TX_DEST_LB; + break; + case ECORE_LL2_TX_DEST_DROP: + tx_dest = CORE_TX_DEST_DROP; + break; + default: + tx_dest = CORE_TX_DEST_LB; + break; + } + + start_bd = (struct core_tx_bd*)ecore_chain_produce(p_tx_chain); + + if (ECORE_IS_IWARP_PERSONALITY(p_hwfn) && + (p_ll2->input.conn_type == ECORE_LL2_TYPE_OOO)) { + start_bd->nw_vlan_or_lb_echo = + OSAL_CPU_TO_LE16(IWARP_LL2_IN_ORDER_TX_QUEUE); + } else { + start_bd->nw_vlan_or_lb_echo = OSAL_CPU_TO_LE16(pkt->vlan); + if (OSAL_TEST_BIT(ECORE_MF_UFP_SPECIFIC, &p_hwfn->p_dev->mf_bits) && + (p_ll2->input.conn_type == ECORE_LL2_TYPE_FCOE)) + pkt->remove_stag = true; + } + + SET_FIELD(start_bd->bitfield1, CORE_TX_BD_L4_HDR_OFFSET_W, + OSAL_CPU_TO_LE16(pkt->l4_hdr_offset_w)); + SET_FIELD(start_bd->bitfield1, CORE_TX_BD_TX_DST, tx_dest); + bd_data |= pkt->bd_flags; + SET_FIELD(bd_data, CORE_TX_BD_DATA_START_BD, 0x1); + SET_FIELD(bd_data, CORE_TX_BD_DATA_NBDS, pkt->num_of_bds); + SET_FIELD(bd_data, CORE_TX_BD_DATA_ROCE_FLAV, roce_flavor); + SET_FIELD(bd_data, CORE_TX_BD_DATA_IP_CSUM, !!(pkt->enable_ip_cksum)); + SET_FIELD(bd_data, CORE_TX_BD_DATA_L4_CSUM, !!(pkt->enable_l4_cksum)); + SET_FIELD(bd_data, CORE_TX_BD_DATA_IP_LEN, !!(pkt->calc_ip_len)); + SET_FIELD(bd_data, CORE_TX_BD_DATA_DISABLE_STAG_INSERTION, + !!(pkt->remove_stag)); + + start_bd->bd_data.as_bitfield = OSAL_CPU_TO_LE16(bd_data); + DMA_REGPAIR_LE(start_bd->addr, pkt->first_frag); + start_bd->nbytes = OSAL_CPU_TO_LE16(pkt->first_frag_len); + + DP_VERBOSE(p_hwfn, (ECORE_MSG_TX_QUEUED | ECORE_MSG_LL2), + "LL2 [q 0x%02x cid 0x%08x type 0x%08x] Tx Producer at [0x%04x] - set with a %04x bytes %02x BDs buffer at %08x:%08x\n", + p_ll2->queue_id, p_ll2->cid, p_ll2->input.conn_type, + prod_idx, pkt->first_frag_len, pkt->num_of_bds, + OSAL_LE32_TO_CPU(start_bd->addr.hi), + OSAL_LE32_TO_CPU(start_bd->addr.lo)); + + if (p_ll2->tx_queue.cur_send_frag_num == pkt->num_of_bds) + return; + + /* Need to provide the packet with additional BDs for frags */ + for (frag_idx = p_ll2->tx_queue.cur_send_frag_num; + frag_idx < pkt->num_of_bds; frag_idx++) { + struct core_tx_bd **p_bd = &p_curp->bds_set[frag_idx].txq_bd; + + *p_bd = (struct core_tx_bd *)ecore_chain_produce(p_tx_chain); + (*p_bd)->bd_data.as_bitfield = 0; + (*p_bd)->bitfield1 = 0; + p_curp->bds_set[frag_idx].tx_frag = 0; + p_curp->bds_set[frag_idx].frag_len = 0; + } +} + +/* This should be called while the Txq spinlock is being held */ +static void ecore_ll2_tx_packet_notify(struct ecore_hwfn *p_hwfn, + struct ecore_ll2_info *p_ll2_conn) +{ + bool b_notify = p_ll2_conn->tx_queue.cur_send_packet->notify_fw; + struct ecore_ll2_tx_queue *p_tx = &p_ll2_conn->tx_queue; + struct ecore_ll2_tx_packet *p_pkt = OSAL_NULL; + u16 bd_prod; + + /* If there are missing BDs, don't do anything now */ + if (p_ll2_conn->tx_queue.cur_send_frag_num != + p_ll2_conn->tx_queue.cur_send_packet->bd_used) + return; + + + /* Push the current packet to the list and clean after it */ + OSAL_LIST_PUSH_TAIL(&p_ll2_conn->tx_queue.cur_send_packet->list_entry, + &p_ll2_conn->tx_queue.sending_descq); + p_ll2_conn->tx_queue.cur_send_packet = OSAL_NULL; + p_ll2_conn->tx_queue.cur_send_frag_num = 0; + + /* Notify FW of packet only if requested to */ + if (!b_notify) + return; + + bd_prod = ecore_chain_get_prod_idx(&p_ll2_conn->tx_queue.txq_chain); + + while (!OSAL_LIST_IS_EMPTY(&p_tx->sending_descq)) { + p_pkt = OSAL_LIST_FIRST_ENTRY(&p_tx->sending_descq, + struct ecore_ll2_tx_packet, + list_entry); + if (p_pkt == OSAL_NULL) + break; +#if defined(_NTDDK_) +#pragma warning(suppress : 6011 28182) +#endif + OSAL_LIST_REMOVE_ENTRY(&p_pkt->list_entry, + &p_tx->sending_descq); + OSAL_LIST_PUSH_TAIL(&p_pkt->list_entry, &p_tx->active_descq); + } + + p_tx->db_msg.spq_prod = OSAL_CPU_TO_LE16(bd_prod); + + /* Make sure the BDs data is updated before ringing the doorbell */ + OSAL_WMB(p_hwfn->p_dev); + + //DIRECT_REG_WR(p_hwfn, p_tx->doorbell_addr, *((u32 *)&p_tx->db_msg)); + DIRECT_REG_WR_DB(p_hwfn, p_tx->doorbell_addr, *((u32 *)&p_tx->db_msg)); + + DP_VERBOSE(p_hwfn, (ECORE_MSG_TX_QUEUED | ECORE_MSG_LL2), + "LL2 [q 0x%02x cid 0x%08x type 0x%08x] Doorbelled [producer 0x%04x]\n", + p_ll2_conn->queue_id, p_ll2_conn->cid, + p_ll2_conn->input.conn_type, + p_tx->db_msg.spq_prod); +} + +enum _ecore_status_t ecore_ll2_prepare_tx_packet( + void *cxt, + u8 connection_handle, + struct ecore_ll2_tx_pkt_info *pkt, + bool notify_fw) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)cxt; + struct ecore_ll2_tx_packet *p_curp = OSAL_NULL; + struct ecore_ll2_info *p_ll2_conn = OSAL_NULL; + struct ecore_ll2_tx_queue *p_tx; + struct ecore_chain *p_tx_chain; + unsigned long flags; + enum _ecore_status_t rc = ECORE_SUCCESS; + + p_ll2_conn = ecore_ll2_handle_sanity(p_hwfn, connection_handle); + if (p_ll2_conn == OSAL_NULL) + return ECORE_INVAL; + p_tx = &p_ll2_conn->tx_queue; + p_tx_chain = &p_tx->txq_chain; + + if (pkt->num_of_bds > p_ll2_conn->input.tx_max_bds_per_packet) + return ECORE_IO; /* coalescing is requireed */ + + OSAL_SPIN_LOCK_IRQSAVE(&p_tx->lock, flags); + if (p_tx->cur_send_packet) { + rc = ECORE_EXISTS; + goto out; + } + + /* Get entry, but only if we have tx elements for it */ + if (!OSAL_LIST_IS_EMPTY(&p_tx->free_descq)) + p_curp = OSAL_LIST_FIRST_ENTRY(&p_tx->free_descq, + struct ecore_ll2_tx_packet, + list_entry); + if (p_curp && ecore_chain_get_elem_left(p_tx_chain) < pkt->num_of_bds) + p_curp = OSAL_NULL; + + if (!p_curp) { + rc = ECORE_BUSY; + goto out; + } + + /* Prepare packet and BD, and perhaps send a doorbell to FW */ + ecore_ll2_prepare_tx_packet_set(p_tx, p_curp, pkt, notify_fw); + + ecore_ll2_prepare_tx_packet_set_bd(p_hwfn, p_ll2_conn, p_curp, + pkt); + + ecore_ll2_tx_packet_notify(p_hwfn, p_ll2_conn); + +out: + OSAL_SPIN_UNLOCK_IRQSAVE(&p_tx->lock, flags); + return rc; +} + +enum _ecore_status_t ecore_ll2_set_fragment_of_tx_packet(void *cxt, + u8 connection_handle, + dma_addr_t addr, + u16 nbytes) +{ + struct ecore_ll2_tx_packet *p_cur_send_packet = OSAL_NULL; + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)cxt; + struct ecore_ll2_info *p_ll2_conn = OSAL_NULL; + u16 cur_send_frag_num = 0; + struct core_tx_bd *p_bd; + unsigned long flags; + + p_ll2_conn = ecore_ll2_handle_sanity(p_hwfn, connection_handle); + if (p_ll2_conn == OSAL_NULL) + return ECORE_INVAL; + + if (!p_ll2_conn->tx_queue.cur_send_packet) + return ECORE_INVAL; + + p_cur_send_packet = p_ll2_conn->tx_queue.cur_send_packet; + cur_send_frag_num = p_ll2_conn->tx_queue.cur_send_frag_num; + + if (cur_send_frag_num >= p_cur_send_packet->bd_used) + return ECORE_INVAL; + + /* Fill the BD information, and possibly notify FW */ + p_bd = p_cur_send_packet->bds_set[cur_send_frag_num].txq_bd; + DMA_REGPAIR_LE(p_bd->addr, addr); + p_bd->nbytes = OSAL_CPU_TO_LE16(nbytes); + p_cur_send_packet->bds_set[cur_send_frag_num].tx_frag = addr; + p_cur_send_packet->bds_set[cur_send_frag_num].frag_len = nbytes; + + p_ll2_conn->tx_queue.cur_send_frag_num++; + + OSAL_SPIN_LOCK_IRQSAVE(&p_ll2_conn->tx_queue.lock, flags); + ecore_ll2_tx_packet_notify(p_hwfn, p_ll2_conn); + OSAL_SPIN_UNLOCK_IRQSAVE(&p_ll2_conn->tx_queue.lock, flags); + + return ECORE_SUCCESS; +} + +enum _ecore_status_t ecore_ll2_terminate_connection(void *cxt, + u8 connection_handle) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)cxt; + struct ecore_ll2_info *p_ll2_conn = OSAL_NULL; + enum _ecore_status_t rc = ECORE_NOTIMPL; + struct ecore_ptt *p_ptt; + + p_ptt = ecore_ptt_acquire(p_hwfn); + if (!p_ptt) + return ECORE_AGAIN; + + p_ll2_conn = ecore_ll2_handle_sanity_lock(p_hwfn, connection_handle); + if (p_ll2_conn == OSAL_NULL) { + rc = ECORE_INVAL; + goto out; + } + + /* Stop Tx & Rx of connection, if needed */ + if (ECORE_LL2_TX_REGISTERED(p_ll2_conn)) { + rc = ecore_sp_ll2_tx_queue_stop(p_hwfn, p_ll2_conn); + if (rc != ECORE_SUCCESS) + goto out; + ecore_ll2_txq_flush(p_hwfn, connection_handle); + } + + if (ECORE_LL2_RX_REGISTERED(p_ll2_conn)) { + rc = ecore_sp_ll2_rx_queue_stop(p_hwfn, p_ll2_conn); + if (rc) + goto out; + ecore_ll2_rxq_flush(p_hwfn, connection_handle); + } + + if (p_ll2_conn->input.conn_type == ECORE_LL2_TYPE_OOO) + ecore_ooo_release_all_isles(p_hwfn->p_ooo_info); + + if (p_ll2_conn->input.conn_type == ECORE_LL2_TYPE_FCOE) { + if (!OSAL_TEST_BIT(ECORE_MF_UFP_SPECIFIC, + &p_hwfn->p_dev->mf_bits)) + ecore_llh_remove_protocol_filter(p_hwfn->p_dev, 0, + ECORE_LLH_FILTER_ETHERTYPE, + 0x8906, 0); + ecore_llh_remove_protocol_filter(p_hwfn->p_dev, 0, + ECORE_LLH_FILTER_ETHERTYPE, + 0x8914, 0); + } + +out: + ecore_ptt_release(p_hwfn, p_ptt); + + return rc; +} + +static void ecore_ll2_release_connection_ooo(struct ecore_hwfn *p_hwfn, + struct ecore_ll2_info *p_ll2_conn) +{ + struct ecore_ooo_buffer *p_buffer; + + if (p_ll2_conn->input.conn_type != ECORE_LL2_TYPE_OOO) + return; + + ecore_ooo_release_all_isles(p_hwfn->p_ooo_info); + while ((p_buffer = ecore_ooo_get_free_buffer(p_hwfn->p_ooo_info))) { + OSAL_DMA_FREE_COHERENT(p_hwfn->p_dev, + p_buffer->rx_buffer_virt_addr, + p_buffer->rx_buffer_phys_addr, + p_buffer->rx_buffer_size); + OSAL_FREE(p_hwfn->p_dev, p_buffer); + } +} + +void ecore_ll2_release_connection(void *cxt, + u8 connection_handle) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)cxt; + struct ecore_ll2_info *p_ll2_conn = OSAL_NULL; + + p_ll2_conn = ecore_ll2_handle_sanity(p_hwfn, connection_handle); + if (p_ll2_conn == OSAL_NULL) + return; + + if (ECORE_LL2_RX_REGISTERED(p_ll2_conn)) { + p_ll2_conn->rx_queue.b_cb_registred = false; + ecore_int_unregister_cb(p_hwfn, + p_ll2_conn->rx_queue.rx_sb_index); + } + + if (ECORE_LL2_TX_REGISTERED(p_ll2_conn)) { + p_ll2_conn->tx_queue.b_cb_registred = false; + ecore_int_unregister_cb(p_hwfn, + p_ll2_conn->tx_queue.tx_sb_index); + } + + OSAL_FREE(p_hwfn->p_dev, p_ll2_conn->tx_queue.descq_array); + ecore_chain_free(p_hwfn->p_dev, &p_ll2_conn->tx_queue.txq_chain); + + OSAL_FREE(p_hwfn->p_dev, p_ll2_conn->rx_queue.descq_array); + ecore_chain_free(p_hwfn->p_dev, &p_ll2_conn->rx_queue.rxq_chain); + ecore_chain_free(p_hwfn->p_dev, &p_ll2_conn->rx_queue.rcq_chain); + + ecore_cxt_release_cid(p_hwfn, p_ll2_conn->cid); + + ecore_ll2_release_connection_ooo(p_hwfn, p_ll2_conn); + + OSAL_MUTEX_ACQUIRE(&p_ll2_conn->mutex); + p_ll2_conn->b_active = false; + OSAL_MUTEX_RELEASE(&p_ll2_conn->mutex); +} + +/* ECORE LL2: internal functions */ + +enum _ecore_status_t ecore_ll2_alloc(struct ecore_hwfn *p_hwfn) +{ + struct ecore_ll2_info *p_ll2_info; + u8 i; + + /* Allocate LL2's set struct */ + p_ll2_info = OSAL_ZALLOC(p_hwfn->p_dev, GFP_KERNEL, + sizeof(struct ecore_ll2_info) * + ECORE_MAX_NUM_OF_LL2_CONNECTIONS); + if (!p_ll2_info) { + DP_NOTICE(p_hwfn, false, + "Failed to allocate `struct ecore_ll2'\n"); + return ECORE_NOMEM; + } + + p_hwfn->p_ll2_info = p_ll2_info; + + for (i = 0; i < ECORE_MAX_NUM_OF_LL2_CONNECTIONS; i++) { +#ifdef CONFIG_ECORE_LOCK_ALLOC + if (OSAL_MUTEX_ALLOC(p_hwfn, &p_ll2_info[i].mutex)) + goto handle_err; + if (OSAL_SPIN_LOCK_ALLOC(p_hwfn, &p_ll2_info[i].rx_queue.lock)) + goto handle_err; + if (OSAL_SPIN_LOCK_ALLOC(p_hwfn, &p_ll2_info[i].tx_queue.lock)) + goto handle_err; +#endif + p_ll2_info[i].my_id = i; + } + + return ECORE_SUCCESS; +#ifdef CONFIG_ECORE_LOCK_ALLOC +handle_err: + ecore_ll2_free(p_hwfn); + return ECORE_NOMEM; +#endif +} + +void ecore_ll2_setup(struct ecore_hwfn *p_hwfn) +{ + int i; + + for (i = 0; i < ECORE_MAX_NUM_OF_LL2_CONNECTIONS; i++) + OSAL_MUTEX_INIT(&p_hwfn->p_ll2_info[i].mutex); +} + +void ecore_ll2_free(struct ecore_hwfn *p_hwfn) +{ +#ifdef CONFIG_ECORE_LOCK_ALLOC + int i; +#endif + if (!p_hwfn->p_ll2_info) + return; + +#ifdef CONFIG_ECORE_LOCK_ALLOC + for (i = 0; i < ECORE_MAX_NUM_OF_LL2_CONNECTIONS; i++) { + OSAL_SPIN_LOCK_DEALLOC(&p_hwfn->p_ll2_info[i].rx_queue.lock); + OSAL_SPIN_LOCK_DEALLOC(&p_hwfn->p_ll2_info[i].tx_queue.lock); + OSAL_MUTEX_DEALLOC(&p_hwfn->p_ll2_info[i].mutex); + } +#endif + OSAL_FREE(p_hwfn->p_dev, p_hwfn->p_ll2_info); + p_hwfn->p_ll2_info = OSAL_NULL; +} + +static void _ecore_ll2_get_port_stats(struct ecore_hwfn *p_hwfn, + struct ecore_ptt *p_ptt, + struct ecore_ll2_stats *p_stats) +{ + struct core_ll2_port_stats port_stats; + + OSAL_MEMSET(&port_stats, 0, sizeof(port_stats)); + ecore_memcpy_from(p_hwfn, p_ptt, &port_stats, + BAR0_MAP_REG_TSDM_RAM + + TSTORM_LL2_PORT_STAT_OFFSET(MFW_PORT(p_hwfn)), + sizeof(port_stats)); + + p_stats->gsi_invalid_hdr += + HILO_64_REGPAIR(port_stats.gsi_invalid_hdr); + p_stats->gsi_invalid_pkt_length += + HILO_64_REGPAIR(port_stats.gsi_invalid_pkt_length); + p_stats->gsi_unsupported_pkt_typ += + HILO_64_REGPAIR(port_stats.gsi_unsupported_pkt_typ); + p_stats->gsi_crcchksm_error += + HILO_64_REGPAIR(port_stats.gsi_crcchksm_error); +} + +static void _ecore_ll2_get_tstats(struct ecore_hwfn *p_hwfn, + struct ecore_ptt *p_ptt, + struct ecore_ll2_info *p_ll2_conn, + struct ecore_ll2_stats *p_stats) +{ + struct core_ll2_tstorm_per_queue_stat tstats; + u8 qid = p_ll2_conn->queue_id; + u32 tstats_addr; + + OSAL_MEMSET(&tstats, 0, sizeof(tstats)); + tstats_addr = BAR0_MAP_REG_TSDM_RAM + + CORE_LL2_TSTORM_PER_QUEUE_STAT_OFFSET(qid); + ecore_memcpy_from(p_hwfn, p_ptt, &tstats, + tstats_addr, + sizeof(tstats)); + + p_stats->packet_too_big_discard += + HILO_64_REGPAIR(tstats.packet_too_big_discard); + p_stats->no_buff_discard += + HILO_64_REGPAIR(tstats.no_buff_discard); +} + +static void _ecore_ll2_get_ustats(struct ecore_hwfn *p_hwfn, + struct ecore_ptt *p_ptt, + struct ecore_ll2_info *p_ll2_conn, + struct ecore_ll2_stats *p_stats) +{ + struct core_ll2_ustorm_per_queue_stat ustats; + u8 qid = p_ll2_conn->queue_id; + u32 ustats_addr; + + OSAL_MEMSET(&ustats, 0, sizeof(ustats)); + ustats_addr = BAR0_MAP_REG_USDM_RAM + + CORE_LL2_USTORM_PER_QUEUE_STAT_OFFSET(qid); + ecore_memcpy_from(p_hwfn, p_ptt, &ustats, + ustats_addr, + sizeof(ustats)); + + p_stats->rcv_ucast_bytes += HILO_64_REGPAIR(ustats.rcv_ucast_bytes); + p_stats->rcv_mcast_bytes += HILO_64_REGPAIR(ustats.rcv_mcast_bytes); + p_stats->rcv_bcast_bytes += HILO_64_REGPAIR(ustats.rcv_bcast_bytes); + p_stats->rcv_ucast_pkts += HILO_64_REGPAIR(ustats.rcv_ucast_pkts); + p_stats->rcv_mcast_pkts += HILO_64_REGPAIR(ustats.rcv_mcast_pkts); + p_stats->rcv_bcast_pkts += HILO_64_REGPAIR(ustats.rcv_bcast_pkts); +} + +static void _ecore_ll2_get_pstats(struct ecore_hwfn *p_hwfn, + struct ecore_ptt *p_ptt, + struct ecore_ll2_info *p_ll2_conn, + struct ecore_ll2_stats *p_stats) +{ + struct core_ll2_pstorm_per_queue_stat pstats; + u8 stats_id = p_ll2_conn->tx_stats_id; + u32 pstats_addr; + + OSAL_MEMSET(&pstats, 0, sizeof(pstats)); + pstats_addr = BAR0_MAP_REG_PSDM_RAM + + CORE_LL2_PSTORM_PER_QUEUE_STAT_OFFSET(stats_id); + ecore_memcpy_from(p_hwfn, p_ptt, &pstats, + pstats_addr, + sizeof(pstats)); + + p_stats->sent_ucast_bytes += HILO_64_REGPAIR(pstats.sent_ucast_bytes); + p_stats->sent_mcast_bytes += HILO_64_REGPAIR(pstats.sent_mcast_bytes); + p_stats->sent_bcast_bytes += HILO_64_REGPAIR(pstats.sent_bcast_bytes); + p_stats->sent_ucast_pkts += HILO_64_REGPAIR(pstats.sent_ucast_pkts); + p_stats->sent_mcast_pkts += HILO_64_REGPAIR(pstats.sent_mcast_pkts); + p_stats->sent_bcast_pkts += HILO_64_REGPAIR(pstats.sent_bcast_pkts); +} + +enum _ecore_status_t __ecore_ll2_get_stats(void *cxt, + u8 connection_handle, + struct ecore_ll2_stats *p_stats) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)cxt; + struct ecore_ll2_info *p_ll2_conn = OSAL_NULL; + struct ecore_ptt *p_ptt; + + if ((connection_handle >= ECORE_MAX_NUM_OF_LL2_CONNECTIONS) || + !p_hwfn->p_ll2_info) { + return ECORE_INVAL; + } + + p_ll2_conn = &p_hwfn->p_ll2_info[connection_handle]; + + p_ptt = ecore_ptt_acquire(p_hwfn); + if (!p_ptt) { + DP_ERR(p_hwfn, "Failed to acquire ptt\n"); + return ECORE_INVAL; + } + + if (p_ll2_conn->input.gsi_enable) + _ecore_ll2_get_port_stats(p_hwfn, p_ptt, p_stats); + + _ecore_ll2_get_tstats(p_hwfn, p_ptt, p_ll2_conn, p_stats); + + _ecore_ll2_get_ustats(p_hwfn, p_ptt, p_ll2_conn, p_stats); + + if (p_ll2_conn->tx_stats_en) + _ecore_ll2_get_pstats(p_hwfn, p_ptt, p_ll2_conn, p_stats); + + ecore_ptt_release(p_hwfn, p_ptt); + + return ECORE_SUCCESS; +} + +enum _ecore_status_t ecore_ll2_get_stats(void *cxt, + u8 connection_handle, + struct ecore_ll2_stats *p_stats) +{ + OSAL_MEMSET(p_stats, 0, sizeof(*p_stats)); + + return __ecore_ll2_get_stats(cxt, connection_handle, p_stats); +} + +/**/ + +#ifdef _NTDDK_ +#pragma warning(pop) +#endif diff --git a/sys/dev/qlnx/qlnxe/ecore_ooo.c b/sys/dev/qlnx/qlnxe/ecore_ooo.c new file mode 100644 index 000000000000..50359b6e20c6 --- /dev/null +++ b/sys/dev/qlnx/qlnxe/ecore_ooo.c @@ -0,0 +1,603 @@ +/* + * Copyright (c) 2018-2019 Cavium, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * File : ecore_ooo.c + */ +#include +__FBSDID("$FreeBSD$"); + +#include "bcm_osal.h" + +#include "ecore.h" +#include "ecore_status.h" +#include "ecore_ll2.h" +#include "ecore_ooo.h" +#include "ecore_iscsi.h" +#include "ecore_cxt.h" +/* + * Static OOO functions + */ + +static struct ecore_ooo_archipelago * +ecore_ooo_seek_archipelago(struct ecore_ooo_info *p_ooo_info, u32 cid) +{ + u32 idx = (cid & 0xffff) - p_ooo_info->cid_base; + struct ecore_ooo_archipelago *p_archipelago; + + if (idx >= p_ooo_info->max_num_archipelagos) + return OSAL_NULL; + + p_archipelago = &p_ooo_info->p_archipelagos_mem[idx]; + + if (OSAL_LIST_IS_EMPTY(&p_archipelago->isles_list)) + return OSAL_NULL; + + return p_archipelago; +} + +static struct ecore_ooo_isle *ecore_ooo_seek_isle(struct ecore_hwfn *p_hwfn, + struct ecore_ooo_info *p_ooo_info, + u32 cid, u8 isle) +{ + struct ecore_ooo_archipelago *p_archipelago = OSAL_NULL; + struct ecore_ooo_isle *p_isle = OSAL_NULL; + u8 the_num_of_isle = 1; + + p_archipelago = ecore_ooo_seek_archipelago(p_ooo_info, cid); + if (!p_archipelago) { + DP_NOTICE(p_hwfn, true, + "Connection %d is not found in OOO list\n", cid); + return OSAL_NULL; + } + + OSAL_LIST_FOR_EACH_ENTRY(p_isle, + &p_archipelago->isles_list, + list_entry, struct ecore_ooo_isle) { + if (the_num_of_isle == isle) + return p_isle; + the_num_of_isle++; + } + + return OSAL_NULL; +} + +void ecore_ooo_save_history_entry(struct ecore_ooo_info *p_ooo_info, + struct ooo_opaque *p_cqe) +{ + struct ecore_ooo_history *p_history = &p_ooo_info->ooo_history; + + if (p_history->head_idx == p_history->num_of_cqes) + p_history->head_idx = 0; + p_history->p_cqes[p_history->head_idx] = *p_cqe; + p_history->head_idx++; +} + +//#ifdef CONFIG_ECORE_ISCSI +#if defined(CONFIG_ECORE_ISCSI) || defined(CONFIG_ECORE_IWARP) +enum _ecore_status_t ecore_ooo_alloc(struct ecore_hwfn *p_hwfn) +{ + u16 max_num_archipelagos = 0, cid_base; + struct ecore_ooo_info *p_ooo_info; + u16 max_num_isles = 0; + u32 i; + + switch (p_hwfn->hw_info.personality) { + case ECORE_PCI_ISCSI: + max_num_archipelagos = + p_hwfn->pf_params.iscsi_pf_params.num_cons; + cid_base =(u16)ecore_cxt_get_proto_cid_start(p_hwfn, + PROTOCOLID_ISCSI); + break; + case ECORE_PCI_ETH_RDMA: + case ECORE_PCI_ETH_IWARP: + max_num_archipelagos = + (u16)ecore_cxt_get_proto_cid_count(p_hwfn, + PROTOCOLID_IWARP, + OSAL_NULL); + cid_base = (u16)ecore_cxt_get_proto_cid_start(p_hwfn, + PROTOCOLID_IWARP); + break; + default: + DP_NOTICE(p_hwfn, true, + "Failed to allocate ecore_ooo_info: unknown personalization\n"); + return ECORE_INVAL; + } + + max_num_isles = ECORE_MAX_NUM_ISLES + max_num_archipelagos; + + if (!max_num_archipelagos) { + DP_NOTICE(p_hwfn, true, + "Failed to allocate ecore_ooo_info: unknown amount of connections\n"); + return ECORE_INVAL; + } + + p_ooo_info = OSAL_ZALLOC(p_hwfn->p_dev, GFP_KERNEL, + sizeof(*p_ooo_info)); + if (!p_ooo_info) { + DP_NOTICE(p_hwfn, true, "Failed to allocate ecore_ooo_info\n"); + return ECORE_NOMEM; + } + p_ooo_info->cid_base = cid_base; /* We look only at the icid */ + p_ooo_info->max_num_archipelagos = max_num_archipelagos; + + OSAL_LIST_INIT(&p_ooo_info->free_buffers_list); + OSAL_LIST_INIT(&p_ooo_info->ready_buffers_list); + OSAL_LIST_INIT(&p_ooo_info->free_isles_list); + + p_ooo_info->p_isles_mem = + OSAL_ZALLOC(p_hwfn->p_dev, GFP_KERNEL, + sizeof(struct ecore_ooo_isle) * + max_num_isles); + if (!p_ooo_info->p_isles_mem) { + DP_NOTICE(p_hwfn,true, + "Failed to allocate ecore_ooo_info (isles)\n"); + goto no_isles_mem; + } + + for (i = 0; i < max_num_isles; i++) { + OSAL_LIST_INIT(&p_ooo_info->p_isles_mem[i].buffers_list); + OSAL_LIST_PUSH_TAIL(&p_ooo_info->p_isles_mem[i].list_entry, + &p_ooo_info->free_isles_list); + } + + p_ooo_info->p_archipelagos_mem = + OSAL_ZALLOC(p_hwfn->p_dev, GFP_KERNEL, + sizeof(struct ecore_ooo_archipelago) * + max_num_archipelagos); + if (!p_ooo_info->p_archipelagos_mem) { + DP_NOTICE(p_hwfn,true, + "Failed to allocate ecore_ooo_info(archpelagos)\n"); + goto no_archipelagos_mem; + } + + for (i = 0; i < max_num_archipelagos; i++) { + OSAL_LIST_INIT(&p_ooo_info->p_archipelagos_mem[i].isles_list); + } + + p_ooo_info->ooo_history.p_cqes = + OSAL_ZALLOC(p_hwfn->p_dev, GFP_KERNEL, + sizeof(struct ooo_opaque) * + ECORE_MAX_NUM_OOO_HISTORY_ENTRIES); + if (!p_ooo_info->ooo_history.p_cqes) { + DP_NOTICE(p_hwfn,true, + "Failed to allocate ecore_ooo_info(history)\n"); + goto no_history_mem; + } + p_ooo_info->ooo_history.num_of_cqes = + ECORE_MAX_NUM_OOO_HISTORY_ENTRIES; + + p_hwfn->p_ooo_info = p_ooo_info; + return ECORE_SUCCESS; + +no_history_mem: + OSAL_FREE(p_hwfn->p_dev, p_ooo_info->p_archipelagos_mem); +no_archipelagos_mem: + OSAL_FREE(p_hwfn->p_dev, p_ooo_info->p_isles_mem); +no_isles_mem: + OSAL_FREE(p_hwfn->p_dev, p_ooo_info); + return ECORE_NOMEM; +} +#endif + +void ecore_ooo_release_connection_isles(struct ecore_ooo_info *p_ooo_info, + u32 cid) +{ + struct ecore_ooo_archipelago *p_archipelago; + struct ecore_ooo_buffer *p_buffer; + struct ecore_ooo_isle *p_isle; + + p_archipelago = ecore_ooo_seek_archipelago(p_ooo_info, cid); + if (!p_archipelago) + return; + + while (!OSAL_LIST_IS_EMPTY(&p_archipelago->isles_list)) { + p_isle = OSAL_LIST_FIRST_ENTRY( + &p_archipelago->isles_list, + struct ecore_ooo_isle, list_entry); + +#if defined(_NTDDK_) +#pragma warning(suppress : 6011 28182) +#endif + OSAL_LIST_REMOVE_ENTRY(&p_isle->list_entry, + &p_archipelago->isles_list); + + while (!OSAL_LIST_IS_EMPTY(&p_isle->buffers_list)) { + p_buffer = + OSAL_LIST_FIRST_ENTRY( + &p_isle->buffers_list , + struct ecore_ooo_buffer, list_entry); + + if (p_buffer == OSAL_NULL) + break; +#if defined(_NTDDK_) +#pragma warning(suppress : 6011 28182) +#endif + OSAL_LIST_REMOVE_ENTRY(&p_buffer->list_entry, + &p_isle->buffers_list); + OSAL_LIST_PUSH_TAIL(&p_buffer->list_entry, + &p_ooo_info->free_buffers_list); + } + OSAL_LIST_PUSH_TAIL(&p_isle->list_entry, + &p_ooo_info->free_isles_list); + } + +} + +void ecore_ooo_release_all_isles(struct ecore_ooo_info *p_ooo_info) +{ + struct ecore_ooo_archipelago *p_archipelago; + struct ecore_ooo_buffer *p_buffer; + struct ecore_ooo_isle *p_isle; + u32 i; + + for (i = 0; i < p_ooo_info->max_num_archipelagos; i++) { + p_archipelago = &(p_ooo_info->p_archipelagos_mem[i]); + +#if defined(_NTDDK_) +#pragma warning(suppress : 6011 28182) +#endif + while (!OSAL_LIST_IS_EMPTY(&p_archipelago->isles_list)) { + p_isle = OSAL_LIST_FIRST_ENTRY( + &p_archipelago->isles_list, + struct ecore_ooo_isle, list_entry); + +#if defined(_NTDDK_) +#pragma warning(suppress : 6011 28182) +#endif + OSAL_LIST_REMOVE_ENTRY(&p_isle->list_entry, + &p_archipelago->isles_list); + + while (!OSAL_LIST_IS_EMPTY(&p_isle->buffers_list)) { + p_buffer = + OSAL_LIST_FIRST_ENTRY( + &p_isle->buffers_list , + struct ecore_ooo_buffer, list_entry); + + if (p_buffer == OSAL_NULL) + break; +#if defined(_NTDDK_) +#pragma warning(suppress : 6011 28182) +#endif + OSAL_LIST_REMOVE_ENTRY(&p_buffer->list_entry, + &p_isle->buffers_list); + OSAL_LIST_PUSH_TAIL(&p_buffer->list_entry, + &p_ooo_info->free_buffers_list); + } + OSAL_LIST_PUSH_TAIL(&p_isle->list_entry, + &p_ooo_info->free_isles_list); + } + } + if (!OSAL_LIST_IS_EMPTY(&p_ooo_info->ready_buffers_list)) { + OSAL_LIST_SPLICE_TAIL_INIT(&p_ooo_info->ready_buffers_list, + &p_ooo_info->free_buffers_list); + } +} + +//#ifdef CONFIG_ECORE_ISCSI +#if defined(CONFIG_ECORE_ISCSI) || defined(CONFIG_ECORE_IWARP) +void ecore_ooo_setup(struct ecore_hwfn *p_hwfn) +{ + ecore_ooo_release_all_isles(p_hwfn->p_ooo_info); + OSAL_MEM_ZERO(p_hwfn->p_ooo_info->ooo_history.p_cqes, + p_hwfn->p_ooo_info->ooo_history.num_of_cqes * + sizeof(struct ooo_opaque)); + p_hwfn->p_ooo_info->ooo_history.head_idx = 0; +} + +void ecore_ooo_free(struct ecore_hwfn *p_hwfn) +{ + struct ecore_ooo_info *p_ooo_info = p_hwfn->p_ooo_info; + struct ecore_ooo_buffer *p_buffer; + + if (!p_ooo_info) + return; + + ecore_ooo_release_all_isles(p_ooo_info); + while (!OSAL_LIST_IS_EMPTY(&p_ooo_info->free_buffers_list)) { + p_buffer = OSAL_LIST_FIRST_ENTRY(&p_ooo_info-> + free_buffers_list, + struct ecore_ooo_buffer, + list_entry); + if (p_buffer == OSAL_NULL) + break; +#if defined(_NTDDK_) +#pragma warning(suppress : 6011 28182) +#endif + OSAL_LIST_REMOVE_ENTRY(&p_buffer->list_entry, + &p_ooo_info->free_buffers_list); + OSAL_DMA_FREE_COHERENT(p_hwfn->p_dev, + p_buffer->rx_buffer_virt_addr, + p_buffer->rx_buffer_phys_addr, + p_buffer->rx_buffer_size); + OSAL_FREE(p_hwfn->p_dev, p_buffer); + } + + OSAL_FREE(p_hwfn->p_dev, p_ooo_info->p_isles_mem); + OSAL_FREE(p_hwfn->p_dev, p_ooo_info->p_archipelagos_mem); + OSAL_FREE(p_hwfn->p_dev, p_ooo_info->ooo_history.p_cqes); + OSAL_FREE(p_hwfn->p_dev, p_ooo_info); + p_hwfn->p_ooo_info = OSAL_NULL; +} +#endif + +void ecore_ooo_put_free_buffer(struct ecore_ooo_info *p_ooo_info, + struct ecore_ooo_buffer *p_buffer) +{ + OSAL_LIST_PUSH_TAIL(&p_buffer->list_entry, + &p_ooo_info->free_buffers_list); +} + +struct ecore_ooo_buffer * +ecore_ooo_get_free_buffer(struct ecore_ooo_info *p_ooo_info) +{ + struct ecore_ooo_buffer *p_buffer = OSAL_NULL; + + if (!OSAL_LIST_IS_EMPTY(&p_ooo_info->free_buffers_list)) { + p_buffer = + OSAL_LIST_FIRST_ENTRY( + &p_ooo_info->free_buffers_list, + struct ecore_ooo_buffer, list_entry); + + OSAL_LIST_REMOVE_ENTRY(&p_buffer->list_entry, + &p_ooo_info->free_buffers_list); + } + + return p_buffer; +} + +void ecore_ooo_put_ready_buffer(struct ecore_ooo_info *p_ooo_info, + struct ecore_ooo_buffer *p_buffer, u8 on_tail) +{ + if (on_tail) { + OSAL_LIST_PUSH_TAIL(&p_buffer->list_entry, + &p_ooo_info->ready_buffers_list); + } else { + OSAL_LIST_PUSH_HEAD(&p_buffer->list_entry, + &p_ooo_info->ready_buffers_list); + } +} + +struct ecore_ooo_buffer * +ecore_ooo_get_ready_buffer(struct ecore_ooo_info *p_ooo_info) +{ + struct ecore_ooo_buffer *p_buffer = OSAL_NULL; + + if (!OSAL_LIST_IS_EMPTY(&p_ooo_info->ready_buffers_list)) { + p_buffer = + OSAL_LIST_FIRST_ENTRY( + &p_ooo_info->ready_buffers_list, + struct ecore_ooo_buffer, list_entry); + + OSAL_LIST_REMOVE_ENTRY(&p_buffer->list_entry, + &p_ooo_info->ready_buffers_list); + } + + return p_buffer; +} + +void ecore_ooo_delete_isles(struct ecore_hwfn *p_hwfn, + struct ecore_ooo_info *p_ooo_info, + u32 cid, + u8 drop_isle, + u8 drop_size) +{ + struct ecore_ooo_archipelago *p_archipelago = OSAL_NULL; + struct ecore_ooo_isle *p_isle = OSAL_NULL; + u8 isle_idx; + + p_archipelago = ecore_ooo_seek_archipelago(p_ooo_info, cid); + for (isle_idx = 0; isle_idx < drop_size; isle_idx++) { + p_isle = ecore_ooo_seek_isle(p_hwfn, p_ooo_info, + cid, drop_isle); + if (!p_isle) { + DP_NOTICE(p_hwfn, true, + "Isle %d is not found(cid %d)\n", + drop_isle, cid); + return; + } + if (OSAL_LIST_IS_EMPTY(&p_isle->buffers_list)) { + DP_NOTICE(p_hwfn, true, + "Isle %d is empty(cid %d)\n", + drop_isle, cid); + } else { + OSAL_LIST_SPLICE_TAIL_INIT(&p_isle->buffers_list, + &p_ooo_info->free_buffers_list); + } +#if defined(_NTDDK_) +#pragma warning(suppress : 6011) +#endif + OSAL_LIST_REMOVE_ENTRY(&p_isle->list_entry, + &p_archipelago->isles_list); + p_ooo_info->cur_isles_number--; + OSAL_LIST_PUSH_HEAD(&p_isle->list_entry, + &p_ooo_info->free_isles_list); + } +} + +void ecore_ooo_add_new_isle(struct ecore_hwfn *p_hwfn, + struct ecore_ooo_info *p_ooo_info, + u32 cid, u8 ooo_isle, + struct ecore_ooo_buffer *p_buffer) +{ + struct ecore_ooo_archipelago *p_archipelago = OSAL_NULL; + struct ecore_ooo_isle *p_prev_isle = OSAL_NULL; + struct ecore_ooo_isle *p_isle = OSAL_NULL; + + if (ooo_isle > 1) { + p_prev_isle = ecore_ooo_seek_isle(p_hwfn, p_ooo_info, cid, ooo_isle - 1); + if (!p_prev_isle) { + DP_NOTICE(p_hwfn, true, + "Isle %d is not found(cid %d)\n", + ooo_isle - 1, cid); + return; + } + } + p_archipelago = ecore_ooo_seek_archipelago(p_ooo_info, cid); + if (!p_archipelago && (ooo_isle != 1)) { + DP_NOTICE(p_hwfn, true, + "Connection %d is not found in OOO list\n", cid); + return; + } + + if (!OSAL_LIST_IS_EMPTY(&p_ooo_info->free_isles_list)) { + p_isle = + OSAL_LIST_FIRST_ENTRY( + &p_ooo_info->free_isles_list, + struct ecore_ooo_isle, list_entry); + + OSAL_LIST_REMOVE_ENTRY(&p_isle->list_entry, + &p_ooo_info->free_isles_list); + if (!OSAL_LIST_IS_EMPTY(&p_isle->buffers_list)) { + DP_NOTICE(p_hwfn, true, "Free isle is not empty\n"); + OSAL_LIST_INIT(&p_isle->buffers_list); + } + } else { + DP_NOTICE(p_hwfn, true, "No more free isles\n"); + return; + } + + if (!p_archipelago) { + u32 idx = (cid & 0xffff) - p_ooo_info->cid_base; + + p_archipelago = &p_ooo_info->p_archipelagos_mem[idx]; + } + OSAL_LIST_PUSH_HEAD(&p_buffer->list_entry, &p_isle->buffers_list); + p_ooo_info->cur_isles_number++; + p_ooo_info->gen_isles_number++; + if (p_ooo_info->cur_isles_number > p_ooo_info->max_isles_number) + p_ooo_info->max_isles_number = p_ooo_info->cur_isles_number; + if (!p_prev_isle) { + OSAL_LIST_PUSH_HEAD(&p_isle->list_entry, &p_archipelago->isles_list); + } else { + OSAL_LIST_INSERT_ENTRY_AFTER(&p_isle->list_entry, + &p_prev_isle->list_entry, + &p_archipelago->isles_list); + } +} + +void ecore_ooo_add_new_buffer(struct ecore_hwfn *p_hwfn, + struct ecore_ooo_info *p_ooo_info, + u32 cid, + u8 ooo_isle, + struct ecore_ooo_buffer *p_buffer, + u8 buffer_side) +{ + struct ecore_ooo_isle * p_isle = OSAL_NULL; + p_isle = ecore_ooo_seek_isle(p_hwfn, p_ooo_info, cid, ooo_isle); + if (!p_isle) { + DP_NOTICE(p_hwfn, true, + "Isle %d is not found(cid %d)\n", + ooo_isle, cid); + return; + } + if (buffer_side == ECORE_OOO_LEFT_BUF) { + OSAL_LIST_PUSH_HEAD(&p_buffer->list_entry, + &p_isle->buffers_list); + } else { + OSAL_LIST_PUSH_TAIL(&p_buffer->list_entry, + &p_isle->buffers_list); + } +} + +void ecore_ooo_join_isles(struct ecore_hwfn *p_hwfn, + struct ecore_ooo_info *p_ooo_info, + u32 cid, u8 left_isle) +{ + struct ecore_ooo_archipelago *p_archipelago = OSAL_NULL; + struct ecore_ooo_isle *p_right_isle = OSAL_NULL; + struct ecore_ooo_isle *p_left_isle = OSAL_NULL; + + p_right_isle = ecore_ooo_seek_isle(p_hwfn, p_ooo_info, cid, + left_isle + 1); + if (!p_right_isle) { + DP_NOTICE(p_hwfn, true, + "Right isle %d is not found(cid %d)\n", + left_isle + 1, cid); + return; + } + p_archipelago = ecore_ooo_seek_archipelago(p_ooo_info, cid); + OSAL_LIST_REMOVE_ENTRY(&p_right_isle->list_entry, + &p_archipelago->isles_list); + p_ooo_info->cur_isles_number--; + if (left_isle) { + p_left_isle = ecore_ooo_seek_isle(p_hwfn, p_ooo_info, cid, + left_isle); + if (!p_left_isle) { + DP_NOTICE(p_hwfn, true, + "Left isle %d is not found(cid %d)\n", + left_isle, cid); + return; + } + OSAL_LIST_SPLICE_TAIL_INIT(&p_right_isle->buffers_list, + &p_left_isle->buffers_list); + } else { + OSAL_LIST_SPLICE_TAIL_INIT(&p_right_isle->buffers_list, + &p_ooo_info->ready_buffers_list); + } + OSAL_LIST_PUSH_TAIL(&p_right_isle->list_entry, + &p_ooo_info->free_isles_list); +} + +void ecore_ooo_dump_rx_event(struct ecore_hwfn *p_hwfn, + struct ooo_opaque *iscsi_ooo, + struct ecore_ooo_buffer *p_buffer) +{ + int i; + u32 dp_module = ECORE_MSG_OOO; + u32 ph_hi, ph_lo; + u8 *packet_buffer = 0; + + if (p_hwfn->dp_level > ECORE_LEVEL_VERBOSE) + return; + if (!(p_hwfn->dp_module & dp_module)) + return; + + packet_buffer = (u8 *)p_buffer->rx_buffer_virt_addr + + p_buffer->placement_offset; + DP_VERBOSE(p_hwfn, dp_module, + "******************************************************\n"); + ph_hi = DMA_HI(p_buffer->rx_buffer_phys_addr); + ph_lo = DMA_LO(p_buffer->rx_buffer_phys_addr); + DP_VERBOSE(p_hwfn, dp_module, + "0x%x-%x: CID 0x%x, OP 0x%x, ISLE 0x%x\n", + ph_hi, ph_lo, + iscsi_ooo->cid, iscsi_ooo->ooo_opcode, iscsi_ooo->ooo_isle); + for (i = 0; i < 64; i = i + 8) { + DP_VERBOSE(p_hwfn, dp_module, + "0x%x-%x: 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x\n", + ph_hi, ph_lo, + packet_buffer[i], + packet_buffer[i + 1], + packet_buffer[i + 2], + packet_buffer[i + 3], + packet_buffer[i + 4], + packet_buffer[i + 5], + packet_buffer[i + 6], + packet_buffer[i + 7]); + } +} diff --git a/sys/dev/qlnx/qlnxe/ecore_rdma.c b/sys/dev/qlnx/qlnxe/ecore_rdma.c new file mode 100644 index 000000000000..eb23aeb5cbfe --- /dev/null +++ b/sys/dev/qlnx/qlnxe/ecore_rdma.c @@ -0,0 +1,2697 @@ +/* + * Copyright (c) 2018-2019 Cavium, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * File : ecore_rdma.c + */ +#include +__FBSDID("$FreeBSD$"); + +#include "bcm_osal.h" +#include "ecore.h" +#include "ecore_status.h" +#include "ecore_sp_commands.h" +#include "ecore_cxt.h" +#include "ecore_rdma.h" +#include "reg_addr.h" +#include "ecore_rt_defs.h" +#include "ecore_init_ops.h" +#include "ecore_hw.h" +#include "ecore_mcp.h" +#include "ecore_init_fw_funcs.h" +#include "ecore_int.h" +#include "pcics_reg_driver.h" +#include "ecore_iro.h" +#include "ecore_gtt_reg_addr.h" +#include "ecore_hsi_iwarp.h" +#include "ecore_ll2.h" +#include "ecore_ooo.h" +#ifndef LINUX_REMOVE +#include "ecore_tcp_ip.h" +#endif + +enum _ecore_status_t ecore_rdma_bmap_alloc(struct ecore_hwfn *p_hwfn, + struct ecore_bmap *bmap, + u32 max_count, + char *name) +{ + u32 size_in_bytes; + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "max_count = %08x\n", max_count); + + bmap->max_count = max_count; + + if (!max_count) { + bmap->bitmap = OSAL_NULL; + return ECORE_SUCCESS; + } + + size_in_bytes = sizeof(unsigned long) * + DIV_ROUND_UP(max_count, (sizeof(unsigned long) * 8)); + + bmap->bitmap = OSAL_ZALLOC(p_hwfn->p_dev, GFP_KERNEL, size_in_bytes); + if (!bmap->bitmap) + { + DP_NOTICE(p_hwfn, false, + "ecore bmap alloc failed: cannot allocate memory (bitmap). rc = %d\n", + ECORE_NOMEM); + return ECORE_NOMEM; + } + + OSAL_SNPRINTF(bmap->name, QEDR_MAX_BMAP_NAME, "%s", name); + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "ECORE_SUCCESS\n"); + return ECORE_SUCCESS; +} + +enum _ecore_status_t ecore_rdma_bmap_alloc_id(struct ecore_hwfn *p_hwfn, + struct ecore_bmap *bmap, + u32 *id_num) +{ + *id_num = OSAL_FIND_FIRST_ZERO_BIT(bmap->bitmap, bmap->max_count); + if (*id_num >= bmap->max_count) + return ECORE_INVAL; + + OSAL_SET_BIT(*id_num, bmap->bitmap); + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "%s bitmap: allocated id %d\n", + bmap->name, *id_num); + + return ECORE_SUCCESS; +} + +void ecore_bmap_set_id(struct ecore_hwfn *p_hwfn, + struct ecore_bmap *bmap, + u32 id_num) +{ + if (id_num >= bmap->max_count) { + DP_NOTICE(p_hwfn, true, + "%s bitmap: cannot set id %d max is %d\n", + bmap->name, id_num, bmap->max_count); + + return; + } + + OSAL_SET_BIT(id_num, bmap->bitmap); + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "%s bitmap: set id %d\n", + bmap->name, id_num); +} + +void ecore_bmap_release_id(struct ecore_hwfn *p_hwfn, + struct ecore_bmap *bmap, + u32 id_num) +{ + bool b_acquired; + + if (id_num >= bmap->max_count) + return; + + b_acquired = OSAL_TEST_AND_CLEAR_BIT(id_num, bmap->bitmap); + if (!b_acquired) + { + DP_NOTICE(p_hwfn, false, "%s bitmap: id %d already released\n", + bmap->name, id_num); + return; + } + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "%s bitmap: released id %d\n", + bmap->name, id_num); +} + +int ecore_bmap_test_id(struct ecore_hwfn *p_hwfn, + struct ecore_bmap *bmap, + u32 id_num) +{ + if (id_num >= bmap->max_count) { + DP_NOTICE(p_hwfn, true, + "%s bitmap: id %d too high. max is %d\n", + bmap->name, id_num, bmap->max_count); + return -1; + } + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "%s bitmap: tested id %d\n", + bmap->name, id_num); + + return OSAL_TEST_BIT(id_num, bmap->bitmap); +} + +static bool ecore_bmap_is_empty(struct ecore_bmap *bmap) +{ + return (bmap->max_count == + OSAL_FIND_FIRST_BIT(bmap->bitmap, bmap->max_count)); +} + +#ifndef LINUX_REMOVE +u32 ecore_rdma_get_sb_id(struct ecore_hwfn *p_hwfn, u32 rel_sb_id) +{ + /* first sb id for RoCE is after all the l2 sb */ + return FEAT_NUM(p_hwfn, ECORE_PF_L2_QUE) + rel_sb_id; +} + +u32 ecore_rdma_query_cau_timer_res(void) +{ + return ECORE_CAU_DEF_RX_TIMER_RES; +} +#endif + +enum _ecore_status_t ecore_rdma_info_alloc(struct ecore_hwfn *p_hwfn) +{ + struct ecore_rdma_info *p_rdma_info; + + p_rdma_info = OSAL_ZALLOC(p_hwfn->p_dev, GFP_KERNEL, sizeof(*p_rdma_info)); + if (!p_rdma_info) { + DP_NOTICE(p_hwfn, false, + "ecore rdma alloc failed: cannot allocate memory (rdma info).\n"); + return ECORE_NOMEM; + } + p_hwfn->p_rdma_info = p_rdma_info; + +#ifdef CONFIG_ECORE_LOCK_ALLOC + if (OSAL_SPIN_LOCK_ALLOC(p_hwfn, &p_rdma_info->lock)) { + ecore_rdma_info_free(p_hwfn); + return ECORE_NOMEM; + } +#endif + OSAL_SPIN_LOCK_INIT(&p_rdma_info->lock); + + return ECORE_SUCCESS; +} + +void ecore_rdma_info_free(struct ecore_hwfn *p_hwfn) +{ +#ifdef CONFIG_ECORE_LOCK_ALLOC + OSAL_SPIN_LOCK_DEALLOC(&p_hwfn->p_rdma_info->lock); +#endif + OSAL_FREE(p_hwfn->p_dev, p_hwfn->p_rdma_info); + p_hwfn->p_rdma_info = OSAL_NULL; +} + +static enum _ecore_status_t ecore_rdma_inc_ref_cnt(struct ecore_hwfn *p_hwfn) +{ + enum _ecore_status_t rc = ECORE_INVAL; + + OSAL_SPIN_LOCK(&p_hwfn->p_rdma_info->lock); + if (p_hwfn->p_rdma_info->active) { + p_hwfn->p_rdma_info->ref_cnt++; + rc = ECORE_SUCCESS; + } else { + DP_INFO(p_hwfn, "Ref cnt requested for inactive rdma\n"); + } + OSAL_SPIN_UNLOCK(&p_hwfn->p_rdma_info->lock); + return rc; +} + +static void ecore_rdma_dec_ref_cnt(struct ecore_hwfn *p_hwfn) +{ + OSAL_SPIN_LOCK(&p_hwfn->p_rdma_info->lock); + p_hwfn->p_rdma_info->ref_cnt--; + OSAL_SPIN_UNLOCK(&p_hwfn->p_rdma_info->lock); +} + +static void ecore_rdma_activate(struct ecore_hwfn *p_hwfn) +{ + OSAL_SPIN_LOCK(&p_hwfn->p_rdma_info->lock); + p_hwfn->p_rdma_info->active = true; + OSAL_SPIN_UNLOCK(&p_hwfn->p_rdma_info->lock); +} + +/* Part of deactivating rdma is letting all the relevant flows complete before + * we start shutting down: Currently query-stats which can be called from MCP + * context. + */ +/* The longest time it can take a rdma flow to complete */ +#define ECORE_RDMA_MAX_FLOW_TIME (100) +static enum _ecore_status_t ecore_rdma_deactivate(struct ecore_hwfn *p_hwfn) +{ + int wait_count; + + OSAL_SPIN_LOCK(&p_hwfn->p_rdma_info->lock); + p_hwfn->p_rdma_info->active = false; + OSAL_SPIN_UNLOCK(&p_hwfn->p_rdma_info->lock); + + /* We'll give each flow it's time to complete... */ + wait_count = p_hwfn->p_rdma_info->ref_cnt; + + while (p_hwfn->p_rdma_info->ref_cnt) { + OSAL_MSLEEP(ECORE_RDMA_MAX_FLOW_TIME); + if (--wait_count == 0) { + DP_NOTICE(p_hwfn, false, + "Timeout on refcnt=%d\n", + p_hwfn->p_rdma_info->ref_cnt); + return ECORE_TIMEOUT; + } + } + return ECORE_SUCCESS; +} + +static enum _ecore_status_t ecore_rdma_alloc(struct ecore_hwfn *p_hwfn) +{ + struct ecore_rdma_info *p_rdma_info = p_hwfn->p_rdma_info; + u32 num_cons, num_tasks; + enum _ecore_status_t rc; + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "Allocating RDMA\n"); + + if (!p_rdma_info) + return ECORE_INVAL; + + if (p_hwfn->hw_info.personality == ECORE_PCI_ETH_IWARP) + p_rdma_info->proto = PROTOCOLID_IWARP; + else + p_rdma_info->proto = PROTOCOLID_ROCE; + + num_cons = ecore_cxt_get_proto_cid_count(p_hwfn, p_rdma_info->proto, + OSAL_NULL); + + if (IS_IWARP(p_hwfn)) + p_rdma_info->num_qps = num_cons; + else + p_rdma_info->num_qps = num_cons / 2; + + /* INTERNAL: RoCE & iWARP use the same taskid */ + num_tasks = ecore_cxt_get_proto_tid_count(p_hwfn, PROTOCOLID_ROCE); + + /* Each MR uses a single task */ + p_rdma_info->num_mrs = num_tasks; + + /* Queue zone lines are shared between RoCE and L2 in such a way that + * they can be used by each without obstructing the other. + */ + p_rdma_info->queue_zone_base = (u16) RESC_START(p_hwfn, ECORE_L2_QUEUE); + p_rdma_info->max_queue_zones = (u16) RESC_NUM(p_hwfn, ECORE_L2_QUEUE); + + /* Allocate a struct with device params and fill it */ + p_rdma_info->dev = OSAL_ZALLOC(p_hwfn->p_dev, GFP_KERNEL, sizeof(*p_rdma_info->dev)); + if (!p_rdma_info->dev) + { + rc = ECORE_NOMEM; + DP_NOTICE(p_hwfn, false, + "ecore rdma alloc failed: cannot allocate memory (rdma info dev). rc = %d\n", + rc); + return rc; + } + + /* Allocate a struct with port params and fill it */ + p_rdma_info->port = OSAL_ZALLOC(p_hwfn->p_dev, GFP_KERNEL, sizeof(*p_rdma_info->port)); + if (!p_rdma_info->port) + { + DP_NOTICE(p_hwfn, false, + "ecore rdma alloc failed: cannot allocate memory (rdma info port)\n"); + return ECORE_NOMEM; + } + + /* Allocate bit map for pd's */ + rc = ecore_rdma_bmap_alloc(p_hwfn, &p_rdma_info->pd_map, RDMA_MAX_PDS, + "PD"); + if (rc != ECORE_SUCCESS) + { + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "Failed to allocate pd_map,rc = %d\n", + rc); + return rc; + } + + /* Allocate bit map for XRC Domains */ + rc = ecore_rdma_bmap_alloc(p_hwfn, &p_rdma_info->xrcd_map, + ECORE_RDMA_MAX_XRCDS, "XRCD"); + if (rc != ECORE_SUCCESS) + { + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "Failed to allocate xrcd_map,rc = %d\n", + rc); + return rc; + } + + /* Allocate DPI bitmap */ + rc = ecore_rdma_bmap_alloc(p_hwfn, &p_rdma_info->dpi_map, + p_hwfn->dpi_count, "DPI"); + if (rc != ECORE_SUCCESS) + { + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "Failed to allocate DPI bitmap, rc = %d\n", rc); + return rc; + } + + /* Allocate bitmap for cq's. The maximum number of CQs is bounded to + * twice the number of QPs. + */ + rc = ecore_rdma_bmap_alloc(p_hwfn, &p_rdma_info->cq_map, + num_cons, "CQ"); + if (rc != ECORE_SUCCESS) + { + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "Failed to allocate cq bitmap, rc = %d\n", rc); + return rc; + } + + /* Allocate bitmap for toggle bit for cq icids + * We toggle the bit every time we create or resize cq for a given icid. + * The maximum number of CQs is bounded to the number of connections we + * support. (num_qps in iWARP or num_qps/2 in RoCE). + */ + rc = ecore_rdma_bmap_alloc(p_hwfn, &p_rdma_info->toggle_bits, + num_cons, "Toggle"); + if (rc != ECORE_SUCCESS) + { + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "Failed to allocate toogle bits, rc = %d\n", rc); + return rc; + } + + /* Allocate bitmap for itids */ + rc = ecore_rdma_bmap_alloc(p_hwfn, &p_rdma_info->tid_map, + p_rdma_info->num_mrs, "MR"); + if (rc != ECORE_SUCCESS) + { + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "Failed to allocate itids bitmaps, rc = %d\n", rc); + return rc; + } + + /* Allocate bitmap for qps. */ + rc = ecore_rdma_bmap_alloc(p_hwfn, &p_rdma_info->qp_map, + p_rdma_info->num_qps, "QP"); + if (rc != ECORE_SUCCESS) + { + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "Failed to allocate qp bitmap, rc = %d\n", rc); + return rc; + } + + /* Allocate bitmap for cids used for responders/requesters. */ + rc = ecore_rdma_bmap_alloc(p_hwfn, &p_rdma_info->cid_map, num_cons, + "REAL CID"); + if (rc != ECORE_SUCCESS) + { + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "Failed to allocate cid bitmap, rc = %d\n", rc); + return rc; + } + + /* The first SRQ follows the last XRC SRQ. This means that the + * SRQ IDs start from an offset equals to max_xrc_srqs. + */ + p_rdma_info->srq_id_offset = (u16)ecore_cxt_get_xrc_srq_count(p_hwfn); + rc = ecore_rdma_bmap_alloc(p_hwfn, &p_rdma_info->xrc_srq_map, + p_rdma_info->srq_id_offset, "XRC SRQ"); + if (rc != ECORE_SUCCESS) { + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "Failed to allocate xrc srq bitmap, rc = %d\n", rc); + return rc; + } + + /* Allocate bitmap for srqs */ + p_rdma_info->num_srqs = ecore_cxt_get_srq_count(p_hwfn); + rc = ecore_rdma_bmap_alloc(p_hwfn, &p_rdma_info->srq_map, + p_rdma_info->num_srqs, + "SRQ"); + if (rc != ECORE_SUCCESS) { + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "Failed to allocate srq bitmap, rc = %d\n", rc); + + return rc; + } + + if (IS_IWARP(p_hwfn)) + rc = ecore_iwarp_alloc(p_hwfn); + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "rc = %d\n", rc); + + return rc; +} + +void ecore_rdma_bmap_free(struct ecore_hwfn *p_hwfn, + struct ecore_bmap *bmap, + bool check) +{ + int weight, line, item, last_line, last_item; + u64 *pmap; + + if (!bmap || !bmap->bitmap) + return; + + if (!check) + goto end; + + weight = OSAL_BITMAP_WEIGHT(bmap->bitmap, bmap->max_count); + if (!weight) + goto end; + + DP_NOTICE(p_hwfn, false, + "%s bitmap not free - size=%d, weight=%d, 512 bits per line\n", + bmap->name, bmap->max_count, weight); + + pmap = (u64 *)bmap->bitmap; + last_line = bmap->max_count / (64*8); + last_item = last_line * 8 + (((bmap->max_count % (64*8)) + 63) / 64); + + /* print aligned non-zero lines, if any */ + for (item = 0, line = 0; line < last_line; line++, item += 8) { + if (OSAL_BITMAP_WEIGHT((unsigned long *)&pmap[item], 64*8)) + DP_NOTICE(p_hwfn, false, + "line 0x%04x: 0x%016llx 0x%016llx 0x%016llx 0x%016llx 0x%016llx 0x%016llx 0x%016llx 0x%016llx\n", + line, (unsigned long long)pmap[item], + (unsigned long long)pmap[item+1], + (unsigned long long)pmap[item+2], + (unsigned long long)pmap[item+3], + (unsigned long long)pmap[item+4], + (unsigned long long)pmap[item+5], + (unsigned long long)pmap[item+6], + (unsigned long long)pmap[item+7]); + } + + /* print last unaligned non-zero line, if any */ + if ((bmap->max_count % (64*8)) && + (OSAL_BITMAP_WEIGHT((unsigned long *)&pmap[item], + bmap->max_count-item*64))) { + u8 str_last_line[200] = { 0 }; + int offset; + + offset = OSAL_SPRINTF(str_last_line, "line 0x%04x: ", line); + for (; item < last_item; item++) { + offset += OSAL_SPRINTF(str_last_line+offset, + "0x%016llx ", + (unsigned long long)pmap[item]); + } + DP_NOTICE(p_hwfn, false, "%s\n", str_last_line); + } + +end: + OSAL_FREE(p_hwfn->p_dev, bmap->bitmap); + bmap->bitmap = OSAL_NULL; +} + + +void ecore_rdma_resc_free(struct ecore_hwfn *p_hwfn) +{ + if (IS_IWARP(p_hwfn)) + ecore_iwarp_resc_free(p_hwfn); + + ecore_rdma_bmap_free(p_hwfn, &p_hwfn->p_rdma_info->cid_map, 1); + ecore_rdma_bmap_free(p_hwfn, &p_hwfn->p_rdma_info->qp_map, 1); + ecore_rdma_bmap_free(p_hwfn, &p_hwfn->p_rdma_info->pd_map, 1); + ecore_rdma_bmap_free(p_hwfn, &p_hwfn->p_rdma_info->xrcd_map, 1); + ecore_rdma_bmap_free(p_hwfn, &p_hwfn->p_rdma_info->dpi_map, 1); + ecore_rdma_bmap_free(p_hwfn, &p_hwfn->p_rdma_info->cq_map, 1); + ecore_rdma_bmap_free(p_hwfn, &p_hwfn->p_rdma_info->toggle_bits, 0); + ecore_rdma_bmap_free(p_hwfn, &p_hwfn->p_rdma_info->tid_map, 1); + ecore_rdma_bmap_free(p_hwfn, &p_hwfn->p_rdma_info->srq_map, 1); + ecore_rdma_bmap_free(p_hwfn, &p_hwfn->p_rdma_info->xrc_srq_map, 1); + + OSAL_FREE(p_hwfn->p_dev, p_hwfn->p_rdma_info->port); + p_hwfn->p_rdma_info->port = OSAL_NULL; + + OSAL_FREE(p_hwfn->p_dev, p_hwfn->p_rdma_info->dev); + p_hwfn->p_rdma_info->dev = OSAL_NULL; +} + +static OSAL_INLINE void ecore_rdma_free_reserved_lkey(struct ecore_hwfn *p_hwfn) +{ + ecore_rdma_free_tid(p_hwfn, p_hwfn->p_rdma_info->dev->reserved_lkey); +} + +static void ecore_rdma_free_ilt(struct ecore_hwfn *p_hwfn) +{ + /* Free Connection CXT */ + ecore_cxt_free_ilt_range( + p_hwfn, ECORE_ELEM_CXT, + ecore_cxt_get_proto_cid_start(p_hwfn, + p_hwfn->p_rdma_info->proto), + ecore_cxt_get_proto_cid_count(p_hwfn, + p_hwfn->p_rdma_info->proto, + OSAL_NULL)); + + /* Free Task CXT ( Intentionally RoCE as task-id is shared between + * RoCE and iWARP + */ + ecore_cxt_free_ilt_range(p_hwfn, ECORE_ELEM_TASK, 0, + ecore_cxt_get_proto_tid_count( + p_hwfn, PROTOCOLID_ROCE)); + + /* Free TSDM CXT */ + ecore_cxt_free_ilt_range(p_hwfn, ECORE_ELEM_SRQ, 0, + ecore_cxt_get_srq_count(p_hwfn)); +} + +static void ecore_rdma_free(struct ecore_hwfn *p_hwfn) +{ + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "\n"); + + ecore_rdma_free_reserved_lkey(p_hwfn); + + ecore_rdma_resc_free(p_hwfn); + + ecore_rdma_free_ilt(p_hwfn); +} + +static void ecore_rdma_get_guid(struct ecore_hwfn *p_hwfn, u8 *guid) +{ + u8 mac_addr[6]; + + OSAL_MEMCPY(&mac_addr[0], &p_hwfn->hw_info.hw_mac_addr[0], ETH_ALEN); + guid[0] = mac_addr[0] ^ 2; + guid[1] = mac_addr[1]; + guid[2] = mac_addr[2]; + guid[3] = 0xff; + guid[4] = 0xfe; + guid[5] = mac_addr[3]; + guid[6] = mac_addr[4]; + guid[7] = mac_addr[5]; +} + + +static void ecore_rdma_init_events( + struct ecore_hwfn *p_hwfn, + struct ecore_rdma_start_in_params *params) +{ + struct ecore_rdma_events *events; + + events = &p_hwfn->p_rdma_info->events; + + events->unaffiliated_event = params->events->unaffiliated_event; + events->affiliated_event = params->events->affiliated_event; + events->context = params->events->context; +} + +static void ecore_rdma_init_devinfo( + struct ecore_hwfn *p_hwfn, + struct ecore_rdma_start_in_params *params) +{ + struct ecore_rdma_device *dev = p_hwfn->p_rdma_info->dev; + u32 pci_status_control; + + /* Vendor specific information */ + dev->vendor_id = p_hwfn->p_dev->vendor_id; + dev->vendor_part_id = p_hwfn->p_dev->device_id; + dev->hw_ver = 0; + dev->fw_ver = STORM_FW_VERSION; + + ecore_rdma_get_guid(p_hwfn, (u8 *)(&dev->sys_image_guid)); + dev->node_guid = dev->sys_image_guid; + + dev->max_sge = OSAL_MIN_T(u32, RDMA_MAX_SGE_PER_SQ_WQE, + RDMA_MAX_SGE_PER_RQ_WQE); + + if (p_hwfn->p_dev->rdma_max_sge) { + dev->max_sge = OSAL_MIN_T(u32, + p_hwfn->p_dev->rdma_max_sge, + dev->max_sge); + } + + /* Set these values according to configuration + * MAX SGE for SRQ is not defined by FW for now + * define it in driver. + * TODO: Get this value from FW. + */ + dev->max_srq_sge = ECORE_RDMA_MAX_SGE_PER_SRQ_WQE; + if (p_hwfn->p_dev->rdma_max_srq_sge) { + dev->max_srq_sge = OSAL_MIN_T(u32, + p_hwfn->p_dev->rdma_max_srq_sge, + dev->max_srq_sge); + } + + dev->max_inline = ROCE_REQ_MAX_INLINE_DATA_SIZE; + dev->max_inline = (p_hwfn->p_dev->rdma_max_inline) ? + OSAL_MIN_T(u32, + p_hwfn->p_dev->rdma_max_inline, + dev->max_inline) : + dev->max_inline; + + dev->max_wqe = ECORE_RDMA_MAX_WQE; + dev->max_cnq = (u8)FEAT_NUM(p_hwfn, ECORE_RDMA_CNQ); + + /* The number of QPs may be higher than ECORE_ROCE_MAX_QPS. because + * it is up-aligned to 16 and then to ILT page size within ecore cxt. + * This is OK in terms of ILT but we don't want to configure the FW + * above its abilities + */ + dev->max_qp = OSAL_MIN_T(u64, ROCE_MAX_QPS, + p_hwfn->p_rdma_info->num_qps); + + /* CQs uses the same icids that QPs use hence they are limited by the + * number of icids. There are two icids per QP. + */ + dev->max_cq = dev->max_qp * 2; + + /* The number of mrs is smaller by 1 since the first is reserved */ + dev->max_mr = p_hwfn->p_rdma_info->num_mrs - 1; + dev->max_mr_size = ECORE_RDMA_MAX_MR_SIZE; + /* The maximum CQE capacity per CQ supported */ + /* max number of cqes will be in two layer pbl, + * 8 is the pointer size in bytes + * 32 is the size of cq element in bytes + */ + if (params->roce.cq_mode == ECORE_RDMA_CQ_MODE_32_BITS) + dev->max_cqe = ECORE_RDMA_MAX_CQE_32_BIT; + else + dev->max_cqe = ECORE_RDMA_MAX_CQE_16_BIT; + + dev->max_mw = 0; + dev->max_fmr = ECORE_RDMA_MAX_FMR; + dev->max_mr_mw_fmr_pbl = (OSAL_PAGE_SIZE/8) * (OSAL_PAGE_SIZE/8); + dev->max_mr_mw_fmr_size = dev->max_mr_mw_fmr_pbl * OSAL_PAGE_SIZE; + dev->max_pkey = ECORE_RDMA_MAX_P_KEY; + /* Right now we dont take any parameters from user + * So assign predefined max_srq to num_srqs. + */ + dev->max_srq = p_hwfn->p_rdma_info->num_srqs; + + /* SRQ WQE size */ + dev->max_srq_wr = ECORE_RDMA_MAX_SRQ_WQE_ELEM; + + dev->max_qp_resp_rd_atomic_resc = RDMA_RING_PAGE_SIZE / + (RDMA_RESP_RD_ATOMIC_ELM_SIZE*2); + dev->max_qp_req_rd_atomic_resc = RDMA_RING_PAGE_SIZE / + RDMA_REQ_RD_ATOMIC_ELM_SIZE; + + dev->max_dev_resp_rd_atomic_resc = + dev->max_qp_resp_rd_atomic_resc * p_hwfn->p_rdma_info->num_qps; + dev->page_size_caps = ECORE_RDMA_PAGE_SIZE_CAPS; + dev->dev_ack_delay = ECORE_RDMA_ACK_DELAY; + dev->max_pd = RDMA_MAX_PDS; + dev->max_ah = dev->max_qp; + dev->max_stats_queues = (u8)RESC_NUM(p_hwfn, ECORE_RDMA_STATS_QUEUE); + + /* Set capablities */ + dev->dev_caps = 0; + SET_FIELD(dev->dev_caps, ECORE_RDMA_DEV_CAP_RNR_NAK, 1); + SET_FIELD(dev->dev_caps, ECORE_RDMA_DEV_CAP_PORT_ACTIVE_EVENT, 1); + SET_FIELD(dev->dev_caps, ECORE_RDMA_DEV_CAP_PORT_CHANGE_EVENT, 1); + SET_FIELD(dev->dev_caps, ECORE_RDMA_DEV_CAP_RESIZE_CQ, 1); + SET_FIELD(dev->dev_caps, ECORE_RDMA_DEV_CAP_BASE_MEMORY_EXT, 1); + SET_FIELD(dev->dev_caps, ECORE_RDMA_DEV_CAP_BASE_QUEUE_EXT, 1); + SET_FIELD(dev->dev_caps, ECORE_RDMA_DEV_CAP_ZBVA, 1); + SET_FIELD(dev->dev_caps, ECORE_RDMA_DEV_CAP_LOCAL_INV_FENCE, 1); + + /* Check atomic operations support in PCI configuration space. */ + OSAL_PCI_READ_CONFIG_DWORD(p_hwfn->p_dev, + PCICFG_DEVICE_STATUS_CONTROL_2, + &pci_status_control); + + if (pci_status_control & + PCICFG_DEVICE_STATUS_CONTROL_2_ATOMIC_REQ_ENABLE) + SET_FIELD(dev->dev_caps, ECORE_RDMA_DEV_CAP_ATOMIC_OP, 1); + + if (IS_IWARP(p_hwfn)) + ecore_iwarp_init_devinfo(p_hwfn); +} + +static void ecore_rdma_init_port( + struct ecore_hwfn *p_hwfn) +{ + struct ecore_rdma_port *port = p_hwfn->p_rdma_info->port; + struct ecore_rdma_device *dev = p_hwfn->p_rdma_info->dev; + + port->port_state = p_hwfn->mcp_info->link_output.link_up ? + ECORE_RDMA_PORT_UP : ECORE_RDMA_PORT_DOWN; + + port->max_msg_size = OSAL_MIN_T(u64, + (dev->max_mr_mw_fmr_size * + p_hwfn->p_dev->rdma_max_sge), + ((u64)1 << 31)); + + port->pkey_bad_counter = 0; +} + +static enum _ecore_status_t ecore_rdma_init_hw( + struct ecore_hwfn *p_hwfn, + struct ecore_ptt *p_ptt) +{ + u32 ll2_ethertype_en; + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "Initializing HW\n"); + p_hwfn->b_rdma_enabled_in_prs = false; + + if (IS_IWARP(p_hwfn)) + return ecore_iwarp_init_hw(p_hwfn, p_ptt); + + ecore_wr(p_hwfn, + p_ptt, + PRS_REG_ROCE_DEST_QP_MAX_PF, + 0); + + p_hwfn->rdma_prs_search_reg = PRS_REG_SEARCH_ROCE; + + /* We delay writing to this reg until first cid is allocated. See + * ecore_cxt_dynamic_ilt_alloc function for more details + */ + + ll2_ethertype_en = ecore_rd(p_hwfn, + p_ptt, + PRS_REG_LIGHT_L2_ETHERTYPE_EN); + ecore_wr(p_hwfn, p_ptt, PRS_REG_LIGHT_L2_ETHERTYPE_EN, + (ll2_ethertype_en | 0x01)); + +#ifndef REAL_ASIC_ONLY + if (ECORE_IS_BB_A0(p_hwfn->p_dev) && ECORE_IS_CMT(p_hwfn->p_dev)) { + ecore_wr(p_hwfn, + p_ptt, + NIG_REG_LLH_ENG_CLS_ENG_ID_TBL, + 0); + ecore_wr(p_hwfn, + p_ptt, + NIG_REG_LLH_ENG_CLS_ENG_ID_TBL + 4, + 0); + } +#endif + + if (ecore_cxt_get_proto_cid_start(p_hwfn, PROTOCOLID_ROCE) % 2) + { + DP_NOTICE(p_hwfn, + true, + "The first RoCE's cid should be even\n"); + return ECORE_UNKNOWN_ERROR; + } + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "Initializing HW - Done\n"); + return ECORE_SUCCESS; +} + +static enum _ecore_status_t +ecore_rdma_start_fw(struct ecore_hwfn *p_hwfn, +#ifdef CONFIG_DCQCN + struct ecore_ptt *p_ptt, +#else + struct ecore_ptt OSAL_UNUSED *p_ptt, +#endif + struct ecore_rdma_start_in_params *params) +{ + struct rdma_init_func_ramrod_data *p_ramrod; + struct rdma_init_func_hdr *pheader; + struct ecore_rdma_info *p_rdma_info; + struct ecore_sp_init_data init_data; + struct ecore_spq_entry *p_ent; + u16 igu_sb_id, sb_id; + u8 ll2_queue_id; + u32 cnq_id; + enum _ecore_status_t rc; + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "Starting FW\n"); + + p_rdma_info = p_hwfn->p_rdma_info; + + /* Save the number of cnqs for the function close ramrod */ + p_rdma_info->num_cnqs = params->desired_cnq; + + /* Get SPQ entry */ + OSAL_MEMSET(&init_data, 0, sizeof(init_data)); + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = ECORE_SPQ_MODE_EBLOCK; + + rc = ecore_sp_init_request(p_hwfn, &p_ent, RDMA_RAMROD_FUNC_INIT, + p_rdma_info->proto, &init_data); + if (rc != ECORE_SUCCESS) + return rc; + + if (IS_IWARP(p_hwfn)) { + ecore_iwarp_init_fw_ramrod(p_hwfn, + &p_ent->ramrod.iwarp_init_func); + p_ramrod = &p_ent->ramrod.iwarp_init_func.rdma; + } else { + +#ifdef CONFIG_DCQCN + rc = ecore_roce_dcqcn_cfg(p_hwfn, ¶ms->roce.dcqcn_params, + &p_ent->ramrod.roce_init_func, p_ptt); + if (rc != ECORE_SUCCESS) { + DP_NOTICE(p_hwfn, false, + "Failed to configure DCQCN. rc = %d.\n", rc); + return rc; + } +#endif + p_ramrod = &p_ent->ramrod.roce_init_func.rdma; + + /* The ll2_queue_id is used only for UD QPs */ + ll2_queue_id = ecore_ll2_handle_to_queue_id( + p_hwfn, params->roce.ll2_handle); + p_ent->ramrod.roce_init_func.roce.ll2_queue_id = ll2_queue_id; + + } + + pheader = &p_ramrod->params_header; + pheader->cnq_start_offset = (u8)RESC_START(p_hwfn, ECORE_RDMA_CNQ_RAM); + pheader->num_cnqs = params->desired_cnq; + + /* The first SRQ ILT page is used for XRC SRQs and all the following + * pages contain regular SRQs. Hence the first regular SRQ ID is the + * maximum number XRC SRQs. + */ + pheader->first_reg_srq_id = p_rdma_info->srq_id_offset; + pheader->reg_srq_base_addr = + ecore_cxt_get_ilt_page_size(p_hwfn, ILT_CLI_TSDM); + + if (params->roce.cq_mode == ECORE_RDMA_CQ_MODE_16_BITS) + pheader->cq_ring_mode = 1; /* 1=16 bits */ + else + pheader->cq_ring_mode = 0; /* 0=32 bits */ + + for (cnq_id = 0; cnq_id < params->desired_cnq; cnq_id++) + { + sb_id = (u16)OSAL_GET_RDMA_SB_ID(p_hwfn, cnq_id); + igu_sb_id = ecore_get_igu_sb_id(p_hwfn, sb_id); + p_ramrod->cnq_params[cnq_id].sb_num = + OSAL_CPU_TO_LE16(igu_sb_id); + + p_ramrod->cnq_params[cnq_id].sb_index = + p_hwfn->pf_params.rdma_pf_params.gl_pi; + + p_ramrod->cnq_params[cnq_id].num_pbl_pages = + params->cnq_pbl_list[cnq_id].num_pbl_pages; + + p_ramrod->cnq_params[cnq_id].pbl_base_addr.hi = + DMA_HI_LE(params->cnq_pbl_list[cnq_id].pbl_ptr); + p_ramrod->cnq_params[cnq_id].pbl_base_addr.lo = + DMA_LO_LE(params->cnq_pbl_list[cnq_id].pbl_ptr); + + /* we arbitrarily decide that cnq_id will be as qz_offset */ + p_ramrod->cnq_params[cnq_id].queue_zone_num = + OSAL_CPU_TO_LE16(p_rdma_info->queue_zone_base + cnq_id); + } + + rc = ecore_spq_post(p_hwfn, p_ent, OSAL_NULL); + + return rc; +} + +enum _ecore_status_t ecore_rdma_alloc_tid(void *rdma_cxt, + u32 *itid) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)rdma_cxt; + enum _ecore_status_t rc; + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "Allocate TID\n"); + + OSAL_SPIN_LOCK(&p_hwfn->p_rdma_info->lock); + rc = ecore_rdma_bmap_alloc_id(p_hwfn, + &p_hwfn->p_rdma_info->tid_map, + itid); + OSAL_SPIN_UNLOCK(&p_hwfn->p_rdma_info->lock); + if (rc != ECORE_SUCCESS) { + DP_NOTICE(p_hwfn, false, "Failed in allocating tid\n"); + goto out; + } + + rc = ecore_cxt_dynamic_ilt_alloc(p_hwfn, ECORE_ELEM_TASK, *itid); +out: + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "Allocate TID - done, rc = %d\n", rc); + return rc; +} + +static OSAL_INLINE enum _ecore_status_t ecore_rdma_reserve_lkey( + struct ecore_hwfn *p_hwfn) +{ + struct ecore_rdma_device *dev = p_hwfn->p_rdma_info->dev; + + /* Tid 0 will be used as the key for "reserved MR". + * The driver should allocate memory for it so it can be loaded but no + * ramrod should be passed on it. + */ + ecore_rdma_alloc_tid(p_hwfn, &dev->reserved_lkey); + if (dev->reserved_lkey != RDMA_RESERVED_LKEY) + { + DP_NOTICE(p_hwfn, true, + "Reserved lkey should be equal to RDMA_RESERVED_LKEY\n"); + return ECORE_INVAL; + } + + return ECORE_SUCCESS; +} + +static enum _ecore_status_t ecore_rdma_setup(struct ecore_hwfn *p_hwfn, + struct ecore_ptt *p_ptt, + struct ecore_rdma_start_in_params *params) +{ + enum _ecore_status_t rc = 0; + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "RDMA setup\n"); + + ecore_rdma_init_devinfo(p_hwfn, params); + ecore_rdma_init_port(p_hwfn); + ecore_rdma_init_events(p_hwfn, params); + + rc = ecore_rdma_reserve_lkey(p_hwfn); + if (rc != ECORE_SUCCESS) + return rc; + + rc = ecore_rdma_init_hw(p_hwfn, p_ptt); + if (rc != ECORE_SUCCESS) + return rc; + + if (IS_IWARP(p_hwfn)) { + rc = ecore_iwarp_setup(p_hwfn, params); + if (rc != ECORE_SUCCESS) + return rc; + } else { + rc = ecore_roce_setup(p_hwfn); + if (rc != ECORE_SUCCESS) + return rc; + } + + return ecore_rdma_start_fw(p_hwfn, p_ptt, params); +} + + +enum _ecore_status_t ecore_rdma_stop(void *rdma_cxt) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)rdma_cxt; + struct rdma_close_func_ramrod_data *p_ramrod; + struct ecore_sp_init_data init_data; + struct ecore_spq_entry *p_ent; + struct ecore_ptt *p_ptt; + u32 ll2_ethertype_en; + enum _ecore_status_t rc = ECORE_TIMEOUT; + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "RDMA stop\n"); + + rc = ecore_rdma_deactivate(p_hwfn); + if (rc != ECORE_SUCCESS) + return rc; + + p_ptt = ecore_ptt_acquire(p_hwfn); + if (!p_ptt) { + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "Failed to acquire PTT\n"); + return rc; + } + +#ifdef CONFIG_DCQCN + ecore_roce_stop_rl(p_hwfn); +#endif + + /* Disable RoCE search */ + ecore_wr(p_hwfn, p_ptt, p_hwfn->rdma_prs_search_reg, 0); + p_hwfn->b_rdma_enabled_in_prs = false; + + ecore_wr(p_hwfn, + p_ptt, + PRS_REG_ROCE_DEST_QP_MAX_PF, + 0); + + ll2_ethertype_en = ecore_rd(p_hwfn, + p_ptt, + PRS_REG_LIGHT_L2_ETHERTYPE_EN); + + ecore_wr(p_hwfn, p_ptt, PRS_REG_LIGHT_L2_ETHERTYPE_EN, + (ll2_ethertype_en & 0xFFFE)); + +#ifndef REAL_ASIC_ONLY + /* INTERNAL: In CMT mode, re-initialize nig to direct packets to both + * enginesfor L2 performance, Roce requires all traffic to go just to + * engine 0. + */ + if (ECORE_IS_BB_A0(p_hwfn->p_dev) && ECORE_IS_CMT(p_hwfn->p_dev)) { + DP_ERR(p_hwfn->p_dev, + "On Everest 4 Big Bear Board revision A0 when RoCE driver is loaded L2 performance is sub-optimal (all traffic is routed to engine 0). For optimal L2 results either remove RoCE driver or use board revision B0\n"); + + ecore_wr(p_hwfn, + p_ptt, + NIG_REG_LLH_ENG_CLS_ENG_ID_TBL, + 0x55555555); + ecore_wr(p_hwfn, + p_ptt, + NIG_REG_LLH_ENG_CLS_ENG_ID_TBL + 0x4, + 0x55555555); + } +#endif + + if (IS_IWARP(p_hwfn)) { + rc = ecore_iwarp_stop(p_hwfn); + if (rc != ECORE_SUCCESS) { + ecore_ptt_release(p_hwfn, p_ptt); + return 0; + } + } else { + rc = ecore_roce_stop(p_hwfn); + if (rc != ECORE_SUCCESS) { + ecore_ptt_release(p_hwfn, p_ptt); + return 0; + } + } + + ecore_ptt_release(p_hwfn, p_ptt); + + /* Get SPQ entry */ + OSAL_MEMSET(&init_data, 0, sizeof(init_data)); + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = ECORE_SPQ_MODE_EBLOCK; + + /* Stop RoCE */ + rc = ecore_sp_init_request(p_hwfn, &p_ent, RDMA_RAMROD_FUNC_CLOSE, + p_hwfn->p_rdma_info->proto, &init_data); + if (rc != ECORE_SUCCESS) + goto out; + + p_ramrod = &p_ent->ramrod.rdma_close_func; + + p_ramrod->num_cnqs = p_hwfn->p_rdma_info->num_cnqs; + p_ramrod->cnq_start_offset = (u8)RESC_START(p_hwfn, ECORE_RDMA_CNQ_RAM); + + rc = ecore_spq_post(p_hwfn, p_ent, OSAL_NULL); + +out: + ecore_rdma_free(p_hwfn); + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "RDMA stop done, rc = %d\n", rc); + return rc; +} + +enum _ecore_status_t ecore_rdma_add_user(void *rdma_cxt, + struct ecore_rdma_add_user_out_params *out_params) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)rdma_cxt; + u32 dpi_start_offset; + u32 returned_id = 0; + enum _ecore_status_t rc; + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "Adding User\n"); + + /* Allocate DPI */ + OSAL_SPIN_LOCK(&p_hwfn->p_rdma_info->lock); + rc = ecore_rdma_bmap_alloc_id(p_hwfn, &p_hwfn->p_rdma_info->dpi_map, + &returned_id); + OSAL_SPIN_UNLOCK(&p_hwfn->p_rdma_info->lock); + + if (rc != ECORE_SUCCESS) + DP_NOTICE(p_hwfn, false, "Failed in allocating dpi\n"); + + out_params->dpi = (u16)returned_id; + + /* Calculate the corresponding DPI address */ + dpi_start_offset = p_hwfn->dpi_start_offset; + + out_params->dpi_addr = (u64)(osal_int_ptr_t)((u8 OSAL_IOMEM*)p_hwfn->doorbells + + dpi_start_offset + + ((out_params->dpi) * p_hwfn->dpi_size)); + + out_params->dpi_phys_addr = p_hwfn->db_phys_addr + dpi_start_offset + + out_params->dpi * p_hwfn->dpi_size; + + out_params->dpi_size = p_hwfn->dpi_size; + out_params->wid_count = p_hwfn->wid_count; + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "Adding user - done, rc = %d\n", rc); + return rc; +} + +struct ecore_rdma_port *ecore_rdma_query_port(void *rdma_cxt) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)rdma_cxt; + struct ecore_rdma_port *p_port = p_hwfn->p_rdma_info->port; + struct ecore_mcp_link_state *p_link_output; + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "RDMA Query port\n"); + + /* The link state is saved only for the leading hwfn */ + p_link_output = + &ECORE_LEADING_HWFN(p_hwfn->p_dev)->mcp_info->link_output; + + /* Link may have changed... */ + p_port->port_state = p_link_output->link_up ? ECORE_RDMA_PORT_UP + : ECORE_RDMA_PORT_DOWN; + + p_port->link_speed = p_link_output->speed; + + p_port->max_msg_size = RDMA_MAX_DATA_SIZE_IN_WQE; + + return p_port; +} + +struct ecore_rdma_device *ecore_rdma_query_device(void *rdma_cxt) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)rdma_cxt; + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "Query device\n"); + + /* Return struct with device parameters */ + return p_hwfn->p_rdma_info->dev; +} + +void ecore_rdma_free_tid(void *rdma_cxt, + u32 itid) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)rdma_cxt; + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "itid = %08x\n", itid); + + OSAL_SPIN_LOCK(&p_hwfn->p_rdma_info->lock); + ecore_bmap_release_id(p_hwfn, + &p_hwfn->p_rdma_info->tid_map, + itid); + OSAL_SPIN_UNLOCK(&p_hwfn->p_rdma_info->lock); +} + +void ecore_rdma_cnq_prod_update(void *rdma_cxt, u8 qz_offset, u16 prod) +{ + struct ecore_hwfn *p_hwfn; + u16 qz_num; + u32 addr; + + p_hwfn = (struct ecore_hwfn *)rdma_cxt; + + if (qz_offset > p_hwfn->p_rdma_info->max_queue_zones) { + DP_NOTICE(p_hwfn, false, + "queue zone offset %d is too large (max is %d)\n", + qz_offset, p_hwfn->p_rdma_info->max_queue_zones); + return; + } + + qz_num = p_hwfn->p_rdma_info->queue_zone_base + qz_offset; + addr = GTT_BAR0_MAP_REG_USDM_RAM + + USTORM_COMMON_QUEUE_CONS_OFFSET(qz_num); + + REG_WR16(p_hwfn, addr, prod); + + /* keep prod updates ordered */ + OSAL_WMB(p_hwfn->p_dev); +} + +enum _ecore_status_t ecore_rdma_alloc_pd(void *rdma_cxt, + u16 *pd) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)rdma_cxt; + u32 returned_id; + enum _ecore_status_t rc; + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "Alloc PD\n"); + + /* Allocates an unused protection domain */ + OSAL_SPIN_LOCK(&p_hwfn->p_rdma_info->lock); + rc = ecore_rdma_bmap_alloc_id(p_hwfn, + &p_hwfn->p_rdma_info->pd_map, + &returned_id); + OSAL_SPIN_UNLOCK(&p_hwfn->p_rdma_info->lock); + if (rc != ECORE_SUCCESS) + DP_NOTICE(p_hwfn, false, "Failed in allocating pd id\n"); + + *pd = (u16)returned_id; + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "Alloc PD - done, rc = %d\n", rc); + return rc; +} + +void ecore_rdma_free_pd(void *rdma_cxt, + u16 pd) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)rdma_cxt; + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "pd = %08x\n", pd); + + /* Returns a previously allocated protection domain for reuse */ + OSAL_SPIN_LOCK(&p_hwfn->p_rdma_info->lock); + ecore_bmap_release_id(p_hwfn, &p_hwfn->p_rdma_info->pd_map, pd); + OSAL_SPIN_UNLOCK(&p_hwfn->p_rdma_info->lock); +} + +enum _ecore_status_t ecore_rdma_alloc_xrcd(void *rdma_cxt, + u16 *xrcd_id) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)rdma_cxt; + u32 returned_id; + enum _ecore_status_t rc; + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "Alloc XRCD\n"); + + /* Allocates an unused XRC domain */ + OSAL_SPIN_LOCK(&p_hwfn->p_rdma_info->lock); + rc = ecore_rdma_bmap_alloc_id(p_hwfn, + &p_hwfn->p_rdma_info->xrcd_map, + &returned_id); + OSAL_SPIN_UNLOCK(&p_hwfn->p_rdma_info->lock); + if (rc != ECORE_SUCCESS) + DP_NOTICE(p_hwfn, false, "Failed in allocating xrcd id\n"); + + *xrcd_id = (u16)returned_id; + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "Alloc XRCD - done, rc = %d\n", rc); + return rc; +} + +void ecore_rdma_free_xrcd(void *rdma_cxt, + u16 xrcd_id) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)rdma_cxt; + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "xrcd_id = %08x\n", xrcd_id); + + /* Returns a previously allocated protection domain for reuse */ + OSAL_SPIN_LOCK(&p_hwfn->p_rdma_info->lock); + ecore_bmap_release_id(p_hwfn, &p_hwfn->p_rdma_info->xrcd_map, xrcd_id); + OSAL_SPIN_UNLOCK(&p_hwfn->p_rdma_info->lock); +} + +static enum ecore_rdma_toggle_bit +ecore_rdma_toggle_bit_create_resize_cq(struct ecore_hwfn *p_hwfn, + u16 icid) +{ + struct ecore_rdma_info *p_info = p_hwfn->p_rdma_info; + enum ecore_rdma_toggle_bit toggle_bit; + u32 bmap_id; + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "icid = %08x\n", icid); + + /* the function toggle the bit that is related to a given icid + * and returns the new toggle bit's value + */ + bmap_id = icid - ecore_cxt_get_proto_cid_start(p_hwfn, p_info->proto); + + OSAL_SPIN_LOCK(&p_info->lock); + toggle_bit = !OSAL_TEST_AND_FLIP_BIT(bmap_id, p_info->toggle_bits.bitmap); + OSAL_SPIN_UNLOCK(&p_info->lock); + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "ECORE_RDMA_TOGGLE_BIT_= %d\n", + toggle_bit); + + return toggle_bit; +} + +enum _ecore_status_t ecore_rdma_create_cq(void *rdma_cxt, + struct ecore_rdma_create_cq_in_params *params, + u16 *icid) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)rdma_cxt; + struct ecore_rdma_info *p_info = p_hwfn->p_rdma_info; + struct rdma_create_cq_ramrod_data *p_ramrod; + enum ecore_rdma_toggle_bit toggle_bit; + struct ecore_sp_init_data init_data; + struct ecore_spq_entry *p_ent; + enum _ecore_status_t rc; + u32 returned_id; + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "cq_handle = %08x%08x\n", + params->cq_handle_hi, params->cq_handle_lo); + + /* Allocate icid */ + OSAL_SPIN_LOCK(&p_info->lock); + rc = ecore_rdma_bmap_alloc_id(p_hwfn, &p_info->cq_map, &returned_id); + OSAL_SPIN_UNLOCK(&p_info->lock); + + if (rc != ECORE_SUCCESS) + { + DP_NOTICE(p_hwfn, false, "Can't create CQ, rc = %d\n", rc); + return rc; + } + + *icid = (u16)(returned_id + + ecore_cxt_get_proto_cid_start( + p_hwfn, p_info->proto)); + + /* Check if icid requires a page allocation */ + rc = ecore_cxt_dynamic_ilt_alloc(p_hwfn, ECORE_ELEM_CXT, *icid); + if (rc != ECORE_SUCCESS) + goto err; + + /* Get SPQ entry */ + OSAL_MEMSET(&init_data, 0, sizeof(init_data)); + init_data.cid = *icid; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = ECORE_SPQ_MODE_EBLOCK; + + /* Send create CQ ramrod */ + rc = ecore_sp_init_request(p_hwfn, &p_ent, + RDMA_RAMROD_CREATE_CQ, + p_info->proto, &init_data); + if (rc != ECORE_SUCCESS) + goto err; + + p_ramrod = &p_ent->ramrod.rdma_create_cq; + + p_ramrod->cq_handle.hi = OSAL_CPU_TO_LE32(params->cq_handle_hi); + p_ramrod->cq_handle.lo = OSAL_CPU_TO_LE32(params->cq_handle_lo); + p_ramrod->dpi = OSAL_CPU_TO_LE16(params->dpi); + p_ramrod->is_two_level_pbl = params->pbl_two_level; + p_ramrod->max_cqes = OSAL_CPU_TO_LE32(params->cq_size); + DMA_REGPAIR_LE(p_ramrod->pbl_addr, params->pbl_ptr); + p_ramrod->pbl_num_pages = OSAL_CPU_TO_LE16(params->pbl_num_pages); + p_ramrod->cnq_id = (u8)RESC_START(p_hwfn, ECORE_RDMA_CNQ_RAM) + + params->cnq_id; + p_ramrod->int_timeout = params->int_timeout; + /* INTERNAL: Two layer PBL is currently not supported, ignoring next line */ + /* INTERNAL: p_ramrod->pbl_log_page_size = params->pbl_page_size_log - 12; */ + + /* toggle the bit for every resize or create cq for a given icid */ + toggle_bit = ecore_rdma_toggle_bit_create_resize_cq(p_hwfn, *icid); + + p_ramrod->toggle_bit = toggle_bit; + + rc = ecore_spq_post(p_hwfn, p_ent, OSAL_NULL); + if (rc != ECORE_SUCCESS) { + /* restore toggle bit */ + ecore_rdma_toggle_bit_create_resize_cq(p_hwfn, *icid); + goto err; + } + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "Created CQ, rc = %d\n", rc); + return rc; + +err: + /* release allocated icid */ + OSAL_SPIN_LOCK(&p_info->lock); + ecore_bmap_release_id(p_hwfn, &p_info->cq_map, returned_id); + OSAL_SPIN_UNLOCK(&p_info->lock); + + DP_NOTICE(p_hwfn, false, "Create CQ failed, rc = %d\n", rc); + + return rc; +} + +enum _ecore_status_t ecore_rdma_destroy_cq(void *rdma_cxt, + struct ecore_rdma_destroy_cq_in_params *in_params, + struct ecore_rdma_destroy_cq_out_params *out_params) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)rdma_cxt; + struct rdma_destroy_cq_output_params *p_ramrod_res; + struct rdma_destroy_cq_ramrod_data *p_ramrod; + struct ecore_sp_init_data init_data; + struct ecore_spq_entry *p_ent; + dma_addr_t ramrod_res_phys; + enum _ecore_status_t rc = ECORE_NOMEM; + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "icid = %08x\n", in_params->icid); + + p_ramrod_res = (struct rdma_destroy_cq_output_params *) + OSAL_DMA_ALLOC_COHERENT(p_hwfn->p_dev, &ramrod_res_phys, + sizeof(struct rdma_destroy_cq_output_params)); + if (!p_ramrod_res) + { + DP_NOTICE(p_hwfn, false, + "ecore destroy cq failed: cannot allocate memory (ramrod)\n"); + return rc; + } + + /* Get SPQ entry */ + OSAL_MEMSET(&init_data, 0, sizeof(init_data)); + init_data.cid = in_params->icid; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = ECORE_SPQ_MODE_EBLOCK; + + /* Send destroy CQ ramrod */ + rc = ecore_sp_init_request(p_hwfn, &p_ent, + RDMA_RAMROD_DESTROY_CQ, + p_hwfn->p_rdma_info->proto, &init_data); + if (rc != ECORE_SUCCESS) + goto err; + + p_ramrod = &p_ent->ramrod.rdma_destroy_cq; + DMA_REGPAIR_LE(p_ramrod->output_params_addr, ramrod_res_phys); + + rc = ecore_spq_post(p_hwfn, p_ent, OSAL_NULL); + if (rc != ECORE_SUCCESS) + goto err; + + out_params->num_cq_notif = + OSAL_LE16_TO_CPU(p_ramrod_res->cnq_num); + + OSAL_DMA_FREE_COHERENT(p_hwfn->p_dev, p_ramrod_res, ramrod_res_phys, + sizeof(struct rdma_destroy_cq_output_params)); + + /* Free icid */ + OSAL_SPIN_LOCK(&p_hwfn->p_rdma_info->lock); + + ecore_bmap_release_id(p_hwfn, + &p_hwfn->p_rdma_info->cq_map, + (in_params->icid - ecore_cxt_get_proto_cid_start( + p_hwfn, p_hwfn->p_rdma_info->proto))); + + OSAL_SPIN_UNLOCK(&p_hwfn->p_rdma_info->lock); + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "Destroyed CQ, rc = %d\n", rc); + return rc; + +err: + OSAL_DMA_FREE_COHERENT(p_hwfn->p_dev, p_ramrod_res, ramrod_res_phys, + sizeof(struct rdma_destroy_cq_output_params)); + + return rc; +} + +void ecore_rdma_set_fw_mac(u16 *p_fw_mac, u8 *p_ecore_mac) +{ + p_fw_mac[0] = OSAL_CPU_TO_LE16((p_ecore_mac[0] << 8) + p_ecore_mac[1]); + p_fw_mac[1] = OSAL_CPU_TO_LE16((p_ecore_mac[2] << 8) + p_ecore_mac[3]); + p_fw_mac[2] = OSAL_CPU_TO_LE16((p_ecore_mac[4] << 8) + p_ecore_mac[5]); +} + + +enum _ecore_status_t ecore_rdma_query_qp(void *rdma_cxt, + struct ecore_rdma_qp *qp, + struct ecore_rdma_query_qp_out_params *out_params) + +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)rdma_cxt; + enum _ecore_status_t rc = ECORE_SUCCESS; + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "icid = %08x\n", qp->icid); + + /* The following fields are filled in from qp and not FW as they can't + * be modified by FW + */ + out_params->mtu = qp->mtu; + out_params->dest_qp = qp->dest_qp; + out_params->incoming_atomic_en = qp->incoming_atomic_en; + out_params->e2e_flow_control_en = qp->e2e_flow_control_en; + out_params->incoming_rdma_read_en = qp->incoming_rdma_read_en; + out_params->incoming_rdma_write_en = qp->incoming_rdma_write_en; + out_params->dgid = qp->dgid; + out_params->flow_label = qp->flow_label; + out_params->hop_limit_ttl = qp->hop_limit_ttl; + out_params->traffic_class_tos = qp->traffic_class_tos; + out_params->timeout = qp->ack_timeout; + out_params->rnr_retry = qp->rnr_retry_cnt; + out_params->retry_cnt = qp->retry_cnt; + out_params->min_rnr_nak_timer = qp->min_rnr_nak_timer; + out_params->pkey_index = 0; + out_params->max_rd_atomic = qp->max_rd_atomic_req; + out_params->max_dest_rd_atomic = qp->max_rd_atomic_resp; + out_params->sqd_async = qp->sqd_async; + + if (IS_IWARP(p_hwfn)) + rc = ecore_iwarp_query_qp(qp, out_params); + else + rc = ecore_roce_query_qp(p_hwfn, qp, out_params); + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "Query QP, rc = %d\n", rc); + return rc; +} + + +enum _ecore_status_t ecore_rdma_destroy_qp(void *rdma_cxt, + struct ecore_rdma_qp *qp, + struct ecore_rdma_destroy_qp_out_params *out_params) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)rdma_cxt; + enum _ecore_status_t rc = ECORE_SUCCESS; + + if (!rdma_cxt || !qp) { + DP_ERR(p_hwfn, + "ecore rdma destroy qp failed: invalid NULL input. rdma_cxt=%p, qp=%p\n", + rdma_cxt, qp); + return ECORE_INVAL; + } + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "QP(0x%x)\n", qp->icid); + + if (IS_IWARP(p_hwfn)) + rc = ecore_iwarp_destroy_qp(p_hwfn, qp); + else + rc = ecore_roce_destroy_qp(p_hwfn, qp, out_params); + + /* free qp params struct */ + OSAL_FREE(p_hwfn->p_dev, qp); + + return rc; +} + + +struct ecore_rdma_qp *ecore_rdma_create_qp(void *rdma_cxt, + struct ecore_rdma_create_qp_in_params *in_params, + struct ecore_rdma_create_qp_out_params *out_params) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)rdma_cxt; + struct ecore_rdma_qp *qp; + u8 max_stats_queues; + enum _ecore_status_t rc = 0; + + if (!rdma_cxt || !in_params || !out_params || !p_hwfn->p_rdma_info) { + DP_ERR(p_hwfn->p_dev, + "ecore roce create qp failed due to NULL entry (rdma_cxt=%p, in=%p, out=%p, roce_info=?\n", + rdma_cxt, + in_params, + out_params); + return OSAL_NULL; + } + + /* Some sanity checks... */ + max_stats_queues = p_hwfn->p_rdma_info->dev->max_stats_queues; + if (in_params->stats_queue >= max_stats_queues) { + DP_ERR(p_hwfn->p_dev, + "ecore rdma create qp failed due to invalid statistics queue %d. maximum is %d\n", + in_params->stats_queue, max_stats_queues); + return OSAL_NULL; + } + + if (IS_IWARP(p_hwfn)) { + if (in_params->sq_num_pages*sizeof(struct regpair) > + IWARP_SHARED_QUEUE_PAGE_SQ_PBL_MAX_SIZE) { + DP_NOTICE(p_hwfn->p_dev, true, "Sq num pages: %d exceeds maximum\n", + in_params->sq_num_pages); + return OSAL_NULL; + } + if (in_params->rq_num_pages*sizeof(struct regpair) > + IWARP_SHARED_QUEUE_PAGE_RQ_PBL_MAX_SIZE) { + DP_NOTICE(p_hwfn->p_dev, true, + "Rq num pages: %d exceeds maximum\n", + in_params->rq_num_pages); + return OSAL_NULL; + } + } + + qp = OSAL_ZALLOC(p_hwfn->p_dev, + GFP_KERNEL, + sizeof(struct ecore_rdma_qp)); + if (!qp) + { + DP_NOTICE(p_hwfn, false, "Failed to allocate ecore_rdma_qp\n"); + return OSAL_NULL; + } + + qp->cur_state = ECORE_ROCE_QP_STATE_RESET; +#ifdef CONFIG_ECORE_IWARP + qp->iwarp_state = ECORE_IWARP_QP_STATE_IDLE; +#endif + qp->qp_handle.hi = OSAL_CPU_TO_LE32(in_params->qp_handle_hi); + qp->qp_handle.lo = OSAL_CPU_TO_LE32(in_params->qp_handle_lo); + qp->qp_handle_async.hi = OSAL_CPU_TO_LE32(in_params->qp_handle_async_hi); + qp->qp_handle_async.lo = OSAL_CPU_TO_LE32(in_params->qp_handle_async_lo); + qp->use_srq = in_params->use_srq; + qp->signal_all = in_params->signal_all; + qp->fmr_and_reserved_lkey = in_params->fmr_and_reserved_lkey; + qp->pd = in_params->pd; + qp->dpi = in_params->dpi; + qp->sq_cq_id = in_params->sq_cq_id; + qp->sq_num_pages = in_params->sq_num_pages; + qp->sq_pbl_ptr = in_params->sq_pbl_ptr; + qp->rq_cq_id = in_params->rq_cq_id; + qp->rq_num_pages = in_params->rq_num_pages; + qp->rq_pbl_ptr = in_params->rq_pbl_ptr; + qp->srq_id = in_params->srq_id; + qp->req_offloaded = false; + qp->resp_offloaded = false; + /* e2e_flow_control cannot be done in case of S-RQ. + * Refer to 9.7.7.2 End-to-End Flow Control section of IB spec + */ + qp->e2e_flow_control_en = qp->use_srq ? false : true; + qp->stats_queue = in_params->stats_queue; + qp->qp_type = in_params->qp_type; + qp->xrcd_id = in_params->xrcd_id; + + if (IS_IWARP(p_hwfn)) { + rc = ecore_iwarp_create_qp(p_hwfn, qp, out_params); + qp->qpid = qp->icid; + } else { + rc = ecore_roce_alloc_qp_idx(p_hwfn, &qp->qp_idx); + qp->icid = ECORE_ROCE_QP_TO_ICID(qp->qp_idx); + qp->qpid = ((0xFF << 16) | qp->icid); + } + + if (rc != ECORE_SUCCESS) { + OSAL_FREE(p_hwfn->p_dev, qp); + return OSAL_NULL; + } + + out_params->icid = qp->icid; + out_params->qp_id = qp->qpid; + + /* INTERNAL: max_sq_sges future use only*/ + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "Create QP, rc = %d\n", rc); + return qp; +} + +#define ECORE_RDMA_ECN_SHIFT 0 +#define ECORE_RDMA_ECN_MASK 0x3 +#define ECORE_RDMA_DSCP_SHIFT 2 +#define ECORE_RDMA_DSCP_MASK 0x3f +#define ECORE_RDMA_VLAN_PRIO_SHIFT 13 +#define ECORE_RDMA_VLAN_PRIO_MASK 0x7 +enum _ecore_status_t ecore_rdma_modify_qp( + void *rdma_cxt, + struct ecore_rdma_qp *qp, + struct ecore_rdma_modify_qp_in_params *params) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)rdma_cxt; + enum ecore_roce_qp_state prev_state; + enum _ecore_status_t rc = ECORE_SUCCESS; + + if (GET_FIELD(params->modify_flags, + ECORE_RDMA_MODIFY_QP_VALID_RDMA_OPS_EN)) + { + qp->incoming_rdma_read_en = params->incoming_rdma_read_en; + qp->incoming_rdma_write_en = params->incoming_rdma_write_en; + qp->incoming_atomic_en = params->incoming_atomic_en; + } + + /* Update QP structure with the updated values */ + if (GET_FIELD(params->modify_flags, + ECORE_ROCE_MODIFY_QP_VALID_ROCE_MODE)) + { + qp->roce_mode = params->roce_mode; + } + if (GET_FIELD(params->modify_flags, ECORE_ROCE_MODIFY_QP_VALID_PKEY)) + { + qp->pkey = params->pkey; + } + if (GET_FIELD(params->modify_flags, + ECORE_ROCE_MODIFY_QP_VALID_E2E_FLOW_CONTROL_EN)) + { + qp->e2e_flow_control_en = params->e2e_flow_control_en; + } + if (GET_FIELD(params->modify_flags, + ECORE_ROCE_MODIFY_QP_VALID_DEST_QP)) + { + qp->dest_qp = params->dest_qp; + } + if (GET_FIELD(params->modify_flags, + ECORE_ROCE_MODIFY_QP_VALID_ADDRESS_VECTOR)) + { + /* Indicates that the following parameters have changed: + * Traffic class, flow label, hop limit, source GID, + * destination GID, loopback indicator + */ + qp->flow_label = params->flow_label; + qp->hop_limit_ttl = params->hop_limit_ttl; + + qp->sgid = params->sgid; + qp->dgid = params->dgid; + qp->udp_src_port = params->udp_src_port; + qp->vlan_id = params->vlan_id; + qp->traffic_class_tos = params->traffic_class_tos; + + /* apply global override values */ + if (p_hwfn->p_rdma_info->glob_cfg.vlan_pri_en) + SET_FIELD(qp->vlan_id, ECORE_RDMA_VLAN_PRIO, + p_hwfn->p_rdma_info->glob_cfg.vlan_pri); + + if (p_hwfn->p_rdma_info->glob_cfg.ecn_en) + SET_FIELD(qp->traffic_class_tos, ECORE_RDMA_ECN, + p_hwfn->p_rdma_info->glob_cfg.ecn); + + if (p_hwfn->p_rdma_info->glob_cfg.dscp_en) + SET_FIELD(qp->traffic_class_tos, ECORE_RDMA_DSCP, + p_hwfn->p_rdma_info->glob_cfg.dscp); + + qp->mtu = params->mtu; + + OSAL_MEMCPY((u8 *)&qp->remote_mac_addr[0], + (u8 *)¶ms->remote_mac_addr[0], ETH_ALEN); + if (params->use_local_mac) { + OSAL_MEMCPY((u8 *)&qp->local_mac_addr[0], + (u8 *)¶ms->local_mac_addr[0], + ETH_ALEN); + } else { + OSAL_MEMCPY((u8 *)&qp->local_mac_addr[0], + (u8 *)&p_hwfn->hw_info.hw_mac_addr, + ETH_ALEN); + } + } + if (GET_FIELD(params->modify_flags, ECORE_ROCE_MODIFY_QP_VALID_RQ_PSN)) + { + qp->rq_psn = params->rq_psn; + } + if (GET_FIELD(params->modify_flags, ECORE_ROCE_MODIFY_QP_VALID_SQ_PSN)) + { + qp->sq_psn = params->sq_psn; + } + if (GET_FIELD(params->modify_flags, + ECORE_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_REQ)) + { + qp->max_rd_atomic_req = params->max_rd_atomic_req; + } + if (GET_FIELD(params->modify_flags, + ECORE_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_RESP)) + { + qp->max_rd_atomic_resp = params->max_rd_atomic_resp; + } + if (GET_FIELD(params->modify_flags, + ECORE_ROCE_MODIFY_QP_VALID_ACK_TIMEOUT)) + { + qp->ack_timeout = params->ack_timeout; + } + if (GET_FIELD(params->modify_flags, + ECORE_ROCE_MODIFY_QP_VALID_RETRY_CNT)) + { + qp->retry_cnt = params->retry_cnt; + } + if (GET_FIELD(params->modify_flags, + ECORE_ROCE_MODIFY_QP_VALID_RNR_RETRY_CNT)) + { + qp->rnr_retry_cnt = params->rnr_retry_cnt; + } + if (GET_FIELD(params->modify_flags, + ECORE_ROCE_MODIFY_QP_VALID_MIN_RNR_NAK_TIMER)) + { + qp->min_rnr_nak_timer = params->min_rnr_nak_timer; + } + + qp->sqd_async = params->sqd_async; + + prev_state = qp->cur_state; + if (GET_FIELD(params->modify_flags, + ECORE_RDMA_MODIFY_QP_VALID_NEW_STATE)) + { + qp->cur_state = params->new_state; + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "qp->cur_state=%d\n", + qp->cur_state); + } + + if (qp->qp_type == ECORE_RDMA_QP_TYPE_XRC_INI) { + qp->has_req = 1; + } else if (qp->qp_type == ECORE_RDMA_QP_TYPE_XRC_TGT) + { + qp->has_resp = 1; + } else { + qp->has_req = 1; + qp->has_resp = 1; + } + + if (IS_IWARP(p_hwfn)) { + enum ecore_iwarp_qp_state new_state = + ecore_roce2iwarp_state(qp->cur_state); + + rc = ecore_iwarp_modify_qp(p_hwfn, qp, new_state, 0); + } else { + rc = ecore_roce_modify_qp(p_hwfn, qp, prev_state, params); + } + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "Modify QP, rc = %d\n", rc); + return rc; +} + +enum _ecore_status_t ecore_rdma_register_tid(void *rdma_cxt, + struct ecore_rdma_register_tid_in_params *params) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)rdma_cxt; + struct rdma_register_tid_ramrod_data *p_ramrod; + struct ecore_sp_init_data init_data; + struct ecore_spq_entry *p_ent; + enum rdma_tid_type tid_type; + u8 fw_return_code; + enum _ecore_status_t rc; + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "itid = %08x\n", params->itid); + + /* Get SPQ entry */ + OSAL_MEMSET(&init_data, 0, sizeof(init_data)); + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = ECORE_SPQ_MODE_EBLOCK; + + rc = ecore_sp_init_request(p_hwfn, &p_ent, RDMA_RAMROD_REGISTER_MR, + p_hwfn->p_rdma_info->proto, &init_data); + if (rc != ECORE_SUCCESS) { + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "rc = %d\n", rc); + return rc; + } + + if (p_hwfn->p_rdma_info->last_tid < params->itid) { + p_hwfn->p_rdma_info->last_tid = params->itid; + } + + p_ramrod = &p_ent->ramrod.rdma_register_tid; + + p_ramrod->flags = 0; + SET_FIELD(p_ramrod->flags, + RDMA_REGISTER_TID_RAMROD_DATA_TWO_LEVEL_PBL, + params->pbl_two_level); + + SET_FIELD(p_ramrod->flags, + RDMA_REGISTER_TID_RAMROD_DATA_ZERO_BASED, + params->zbva); + + SET_FIELD(p_ramrod->flags, + RDMA_REGISTER_TID_RAMROD_DATA_PHY_MR, + params->phy_mr); + + /* Don't initialize D/C field, as it may override other bits. */ + if (!(params->tid_type == ECORE_RDMA_TID_FMR) && + !(params->dma_mr)) + SET_FIELD(p_ramrod->flags, + RDMA_REGISTER_TID_RAMROD_DATA_PAGE_SIZE_LOG, + params->page_size_log - 12); + + SET_FIELD(p_ramrod->flags, + RDMA_REGISTER_TID_RAMROD_DATA_REMOTE_READ, + params->remote_read); + + SET_FIELD(p_ramrod->flags, + RDMA_REGISTER_TID_RAMROD_DATA_REMOTE_WRITE, + params->remote_write); + + SET_FIELD(p_ramrod->flags, + RDMA_REGISTER_TID_RAMROD_DATA_REMOTE_ATOMIC, + params->remote_atomic); + + SET_FIELD(p_ramrod->flags, + RDMA_REGISTER_TID_RAMROD_DATA_LOCAL_WRITE, + params->local_write); + + SET_FIELD(p_ramrod->flags, + RDMA_REGISTER_TID_RAMROD_DATA_LOCAL_READ, + params->local_read); + + SET_FIELD(p_ramrod->flags, + RDMA_REGISTER_TID_RAMROD_DATA_ENABLE_MW_BIND, + params->mw_bind); + + SET_FIELD(p_ramrod->flags1, + RDMA_REGISTER_TID_RAMROD_DATA_PBL_PAGE_SIZE_LOG, + params->pbl_page_size_log - 12); + + SET_FIELD(p_ramrod->flags2, + RDMA_REGISTER_TID_RAMROD_DATA_DMA_MR, + params->dma_mr); + + switch (params->tid_type) + { + case ECORE_RDMA_TID_REGISTERED_MR: + tid_type = RDMA_TID_REGISTERED_MR; + break; + case ECORE_RDMA_TID_FMR: + tid_type = RDMA_TID_FMR; + break; + case ECORE_RDMA_TID_MW_TYPE1: + tid_type = RDMA_TID_MW_TYPE1; + break; + case ECORE_RDMA_TID_MW_TYPE2A: + tid_type = RDMA_TID_MW_TYPE2A; + break; + default: + rc = ECORE_INVAL; + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "rc = %d\n", rc); + return rc; + } + SET_FIELD(p_ramrod->flags1, + RDMA_REGISTER_TID_RAMROD_DATA_TID_TYPE, + tid_type); + + p_ramrod->itid = OSAL_CPU_TO_LE32(params->itid); + p_ramrod->key = params->key; + p_ramrod->pd = OSAL_CPU_TO_LE16(params->pd); + p_ramrod->length_hi = (u8)(params->length >> 32); + p_ramrod->length_lo = DMA_LO_LE(params->length); + if (params->zbva) + { + /* Lower 32 bits of the registered MR address. + * In case of zero based MR, will hold FBO + */ + p_ramrod->va.hi = 0; + p_ramrod->va.lo = OSAL_CPU_TO_LE32(params->fbo); + } else { + DMA_REGPAIR_LE(p_ramrod->va, params->vaddr); + } + DMA_REGPAIR_LE(p_ramrod->pbl_base, params->pbl_ptr); + + /* DIF */ + if (params->dif_enabled) { + SET_FIELD(p_ramrod->flags2, + RDMA_REGISTER_TID_RAMROD_DATA_DIF_ON_HOST_FLG, 1); + DMA_REGPAIR_LE(p_ramrod->dif_error_addr, + params->dif_error_addr); + DMA_REGPAIR_LE(p_ramrod->dif_runt_addr, params->dif_runt_addr); + } + + rc = ecore_spq_post(p_hwfn, p_ent, &fw_return_code); + if (rc) + return rc; + + if (fw_return_code != RDMA_RETURN_OK) { + DP_NOTICE(p_hwfn, true, "fw_return_code = %d\n", fw_return_code); + return ECORE_UNKNOWN_ERROR; + } + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "Register TID, rc = %d\n", rc); + return rc; +} + +static OSAL_INLINE int ecore_rdma_send_deregister_tid_ramrod( + struct ecore_hwfn *p_hwfn, + u32 itid, + u8 *fw_return_code) +{ + struct ecore_sp_init_data init_data; + struct rdma_deregister_tid_ramrod_data *p_ramrod; + struct ecore_spq_entry *p_ent; + enum _ecore_status_t rc; + + /* Get SPQ entry */ + OSAL_MEMSET(&init_data, 0, sizeof(init_data)); + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = ECORE_SPQ_MODE_EBLOCK; + + rc = ecore_sp_init_request(p_hwfn, &p_ent, + RDMA_RAMROD_DEREGISTER_MR, + p_hwfn->p_rdma_info->proto, &init_data); + if (rc != ECORE_SUCCESS) { + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "rc = %d\n", rc); + return rc; + } + + p_ramrod = &p_ent->ramrod.rdma_deregister_tid; + p_ramrod->itid = OSAL_CPU_TO_LE32(itid); + + rc = ecore_spq_post(p_hwfn, p_ent, fw_return_code); + if (rc != ECORE_SUCCESS) + { + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "rc = %d\n", rc); + return rc; + } + + return rc; +} + +#define ECORE_RDMA_DEREGISTER_TIMEOUT_MSEC (1) + +enum _ecore_status_t ecore_rdma_deregister_tid(void *rdma_cxt, + u32 itid) +{ + enum _ecore_status_t rc; + u8 fw_ret_code; + struct ecore_ptt *p_ptt; + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)rdma_cxt; + + /* First attempt */ + rc = ecore_rdma_send_deregister_tid_ramrod(p_hwfn, itid, &fw_ret_code); + if (rc != ECORE_SUCCESS) + return rc; + + if (fw_ret_code != RDMA_RETURN_NIG_DRAIN_REQ) + goto done; + + /* Second attempt, after 1msec, if device still holds data. + * This can occur since 'destroy QP' returns to the caller rather fast. + * The synchronous part of it returns after freeing a few of the + * resources but not all of them, allowing the consumer to continue its + * flow. All of the resources will be freed after the asynchronous part + * of the destroy QP is complete. + */ + OSAL_MSLEEP(ECORE_RDMA_DEREGISTER_TIMEOUT_MSEC); + rc = ecore_rdma_send_deregister_tid_ramrod(p_hwfn, itid, &fw_ret_code); + if (rc != ECORE_SUCCESS) + return rc; + + if (fw_ret_code != RDMA_RETURN_NIG_DRAIN_REQ) + goto done; + + /* Third and last attempt, perform NIG drain and resend the ramrod */ + p_ptt = ecore_ptt_acquire(p_hwfn); + if (!p_ptt) + return ECORE_TIMEOUT; + + rc = ecore_mcp_drain(p_hwfn, p_ptt); + if (rc != ECORE_SUCCESS) { + ecore_ptt_release(p_hwfn, p_ptt); + return rc; + } + + ecore_ptt_release(p_hwfn, p_ptt); + + rc = ecore_rdma_send_deregister_tid_ramrod(p_hwfn, itid, &fw_ret_code); + if (rc != ECORE_SUCCESS) + return rc; + +done: + if (fw_ret_code == RDMA_RETURN_OK) { + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "De-registered itid=%d\n", + itid); + return ECORE_SUCCESS; + } else if (fw_ret_code == RDMA_RETURN_DEREGISTER_MR_BAD_STATE_ERR) { + /* INTERNAL: This error is returned in case trying to deregister + * a MR that is not allocated. We define "allocated" as either: + * 1. Registered. + * 2. This is an FMR MR type, which is not currently registered + * but can accept FMR WQEs on SQ. + */ + DP_NOTICE(p_hwfn, false, "itid=%d, fw_ret_code=%d\n", itid, + fw_ret_code); + return ECORE_INVAL; + } else { /* fw_ret_code == RDMA_RETURN_NIG_DRAIN_REQ */ + DP_NOTICE(p_hwfn, true, + "deregister failed after three attempts. itid=%d, fw_ret_code=%d\n", + itid, fw_ret_code); + return ECORE_UNKNOWN_ERROR; + } +} + +static struct ecore_bmap *ecore_rdma_get_srq_bmap(struct ecore_hwfn *p_hwfn, bool is_xrc) +{ + if (is_xrc) + return &p_hwfn->p_rdma_info->xrc_srq_map; + + return &p_hwfn->p_rdma_info->srq_map; +} + +u16 ecore_rdma_get_fw_srq_id(struct ecore_hwfn *p_hwfn, u16 id, bool is_xrc) +{ + if (is_xrc) + return id; + + return id + p_hwfn->p_rdma_info->srq_id_offset; +} + +enum _ecore_status_t +ecore_rdma_modify_srq(void *rdma_cxt, + struct ecore_rdma_modify_srq_in_params *in_params) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)rdma_cxt; + struct rdma_srq_modify_ramrod_data *p_ramrod; + struct ecore_sp_init_data init_data; + struct ecore_spq_entry *p_ent; + u16 opaque_fid, fw_srq_id; + enum _ecore_status_t rc; + + OSAL_MEMSET(&init_data, 0, sizeof(init_data)); + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = ECORE_SPQ_MODE_EBLOCK; + /* Send modify SRQ ramrod */ + rc = ecore_sp_init_request(p_hwfn, &p_ent, + RDMA_RAMROD_MODIFY_SRQ, + p_hwfn->p_rdma_info->proto, &init_data); + if (rc != ECORE_SUCCESS) + return rc; + + p_ramrod = &p_ent->ramrod.rdma_modify_srq; + + fw_srq_id = ecore_rdma_get_fw_srq_id(p_hwfn, in_params->srq_id, + in_params->is_xrc); + p_ramrod->srq_id.srq_idx = OSAL_CPU_TO_LE16(fw_srq_id); + opaque_fid = p_hwfn->hw_info.opaque_fid; + p_ramrod->srq_id.opaque_fid = OSAL_CPU_TO_LE16(opaque_fid); + p_ramrod->wqe_limit = OSAL_CPU_TO_LE16(in_params->wqe_limit); + + rc = ecore_spq_post(p_hwfn, p_ent, OSAL_NULL); + if (rc != ECORE_SUCCESS) + return rc; + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "modified SRQ id = %x, is_xrc=%u\n", + in_params->srq_id, in_params->is_xrc); + + return rc; +} + +enum _ecore_status_t +ecore_rdma_destroy_srq(void *rdma_cxt, + struct ecore_rdma_destroy_srq_in_params *in_params) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)rdma_cxt; + struct rdma_srq_destroy_ramrod_data *p_ramrod; + struct ecore_sp_init_data init_data; + struct ecore_spq_entry *p_ent; + u16 opaque_fid, fw_srq_id; + struct ecore_bmap *bmap; + enum _ecore_status_t rc; + + opaque_fid = p_hwfn->hw_info.opaque_fid; + + OSAL_MEMSET(&init_data, 0, sizeof(init_data)); + init_data.opaque_fid = opaque_fid; + init_data.comp_mode = ECORE_SPQ_MODE_EBLOCK; + + /* Send destroy SRQ ramrod */ + rc = ecore_sp_init_request(p_hwfn, &p_ent, + RDMA_RAMROD_DESTROY_SRQ, + p_hwfn->p_rdma_info->proto, &init_data); + if (rc != ECORE_SUCCESS) + return rc; + + p_ramrod = &p_ent->ramrod.rdma_destroy_srq; + + fw_srq_id = ecore_rdma_get_fw_srq_id(p_hwfn, in_params->srq_id, + in_params->is_xrc); + p_ramrod->srq_id.srq_idx = OSAL_CPU_TO_LE16(fw_srq_id); + p_ramrod->srq_id.opaque_fid = OSAL_CPU_TO_LE16(opaque_fid); + + rc = ecore_spq_post(p_hwfn, p_ent, OSAL_NULL); + + if (rc != ECORE_SUCCESS) + return rc; + + bmap = ecore_rdma_get_srq_bmap(p_hwfn, in_params->is_xrc); + + OSAL_SPIN_LOCK(&p_hwfn->p_rdma_info->lock); + ecore_bmap_release_id(p_hwfn, bmap, in_params->srq_id); + OSAL_SPIN_UNLOCK(&p_hwfn->p_rdma_info->lock); + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "XRC/SRQ destroyed Id = %x, is_xrc=%u\n", + in_params->srq_id, in_params->is_xrc); + + return rc; +} + +enum _ecore_status_t +ecore_rdma_create_srq(void *rdma_cxt, + struct ecore_rdma_create_srq_in_params *in_params, + struct ecore_rdma_create_srq_out_params *out_params) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)rdma_cxt; + struct rdma_srq_create_ramrod_data *p_ramrod; + struct ecore_sp_init_data init_data; + enum ecore_cxt_elem_type elem_type; + struct ecore_spq_entry *p_ent; + u16 opaque_fid, fw_srq_id; + struct ecore_bmap *bmap; + u32 returned_id; + enum _ecore_status_t rc; + + /* Allocate XRC/SRQ ID */ + bmap = ecore_rdma_get_srq_bmap(p_hwfn, in_params->is_xrc); + OSAL_SPIN_LOCK(&p_hwfn->p_rdma_info->lock); + rc = ecore_rdma_bmap_alloc_id(p_hwfn, bmap, &returned_id); + OSAL_SPIN_UNLOCK(&p_hwfn->p_rdma_info->lock); + + if (rc != ECORE_SUCCESS) { + DP_NOTICE(p_hwfn, false, + "failed to allocate xrc/srq id (is_xrc=%u)\n", + in_params->is_xrc); + return rc; + } + /* Allocate XRC/SRQ ILT page */ + elem_type = (in_params->is_xrc) ? (ECORE_ELEM_XRC_SRQ) : (ECORE_ELEM_SRQ); + rc = ecore_cxt_dynamic_ilt_alloc(p_hwfn, elem_type, returned_id); + if (rc != ECORE_SUCCESS) + goto err; + + OSAL_MEMSET(&init_data, 0, sizeof(init_data)); + opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.opaque_fid = opaque_fid; + init_data.comp_mode = ECORE_SPQ_MODE_EBLOCK; + + /* Create XRC/SRQ ramrod */ + rc = ecore_sp_init_request(p_hwfn, &p_ent, + RDMA_RAMROD_CREATE_SRQ, + p_hwfn->p_rdma_info->proto, &init_data); + if (rc != ECORE_SUCCESS) + goto err; + + p_ramrod = &p_ent->ramrod.rdma_create_srq; + + p_ramrod->pbl_base_addr.hi = DMA_HI_LE(in_params->pbl_base_addr); + p_ramrod->pbl_base_addr.lo = DMA_LO_LE(in_params->pbl_base_addr); + p_ramrod->pages_in_srq_pbl = OSAL_CPU_TO_LE16(in_params->num_pages); + p_ramrod->pd_id = OSAL_CPU_TO_LE16(in_params->pd_id); + p_ramrod->srq_id.opaque_fid = OSAL_CPU_TO_LE16(opaque_fid); + p_ramrod->page_size = OSAL_CPU_TO_LE16(in_params->page_size); + p_ramrod->producers_addr.hi = DMA_HI_LE(in_params->prod_pair_addr); + p_ramrod->producers_addr.lo = DMA_LO_LE(in_params->prod_pair_addr); + fw_srq_id = ecore_rdma_get_fw_srq_id(p_hwfn, (u16) returned_id, + in_params->is_xrc); + p_ramrod->srq_id.srq_idx = OSAL_CPU_TO_LE16(fw_srq_id); + + if (in_params->is_xrc) { + SET_FIELD(p_ramrod->flags, + RDMA_SRQ_CREATE_RAMROD_DATA_XRC_FLAG, + 1); + SET_FIELD(p_ramrod->flags, + RDMA_SRQ_CREATE_RAMROD_DATA_RESERVED_KEY_EN, + in_params->reserved_key_en); + p_ramrod->xrc_srq_cq_cid = OSAL_CPU_TO_LE32(in_params->cq_cid); + p_ramrod->xrc_domain = OSAL_CPU_TO_LE16(in_params->xrcd_id); + } + + rc = ecore_spq_post(p_hwfn, p_ent, OSAL_NULL); + + if (rc != ECORE_SUCCESS) + goto err; + + out_params->srq_id = (u16)returned_id; + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "XRC/SRQ created Id = %x (is_xrc=%u)\n", + out_params->srq_id, in_params->is_xrc); + return rc; + +err: + OSAL_SPIN_LOCK(&p_hwfn->p_rdma_info->lock); + ecore_bmap_release_id(p_hwfn, bmap, returned_id); + OSAL_SPIN_UNLOCK(&p_hwfn->p_rdma_info->lock); + + return rc; +} + +bool ecore_rdma_allocated_qps(struct ecore_hwfn *p_hwfn) +{ + bool result; + + /* if rdma info has not been allocated, naturally there are no qps */ + if (!p_hwfn->p_rdma_info) + return false; + + OSAL_SPIN_LOCK(&p_hwfn->p_rdma_info->lock); + if (!p_hwfn->p_rdma_info->qp_map.bitmap) + result = false; + else + result = !ecore_bmap_is_empty(&p_hwfn->p_rdma_info->qp_map); + OSAL_SPIN_UNLOCK(&p_hwfn->p_rdma_info->lock); + return result; +} + +enum _ecore_status_t ecore_rdma_resize_cq(void *rdma_cxt, + struct ecore_rdma_resize_cq_in_params *in_params, + struct ecore_rdma_resize_cq_out_params *out_params) +{ + enum _ecore_status_t rc; + enum ecore_rdma_toggle_bit toggle_bit; + struct ecore_spq_entry *p_ent; + struct rdma_resize_cq_ramrod_data *p_ramrod; + u8 fw_return_code; + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)rdma_cxt; + dma_addr_t ramrod_res_phys; + struct rdma_resize_cq_output_params *p_ramrod_res; + struct ecore_sp_init_data init_data; + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "icid = %08x\n", in_params->icid); + + /* Send resize CQ ramrod */ + + p_ramrod_res = (struct rdma_resize_cq_output_params *) + OSAL_DMA_ALLOC_COHERENT(p_hwfn->p_dev, &ramrod_res_phys, + sizeof(*p_ramrod_res)); + if (!p_ramrod_res) + { + rc = ECORE_NOMEM; + DP_NOTICE(p_hwfn, false, + "ecore resize cq failed: cannot allocate memory (ramrod). rc = %d\n", + rc); + return rc; + } + + /* Get SPQ entry */ + OSAL_MEMSET(&init_data, 0, sizeof(init_data)); + init_data.cid = in_params->icid; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = ECORE_SPQ_MODE_EBLOCK; + + rc = ecore_sp_init_request(p_hwfn, &p_ent, + RDMA_RAMROD_RESIZE_CQ, + p_hwfn->p_rdma_info->proto, &init_data); + if (rc != ECORE_SUCCESS) + goto err; + + p_ramrod = &p_ent->ramrod.rdma_resize_cq; + + p_ramrod->flags = 0; + + /* toggle the bit for every resize or create cq for a given icid */ + toggle_bit = ecore_rdma_toggle_bit_create_resize_cq(p_hwfn, + in_params->icid); + + SET_FIELD(p_ramrod->flags, + RDMA_RESIZE_CQ_RAMROD_DATA_TOGGLE_BIT, + toggle_bit); + + SET_FIELD(p_ramrod->flags, + RDMA_RESIZE_CQ_RAMROD_DATA_IS_TWO_LEVEL_PBL, + in_params->pbl_two_level); + + p_ramrod->pbl_log_page_size = in_params->pbl_page_size_log - 12; + p_ramrod->pbl_num_pages = OSAL_CPU_TO_LE16(in_params->pbl_num_pages); + p_ramrod->max_cqes = OSAL_CPU_TO_LE32(in_params->cq_size); + p_ramrod->pbl_addr.hi = DMA_HI_LE(in_params->pbl_ptr); + p_ramrod->pbl_addr.lo = DMA_LO_LE(in_params->pbl_ptr); + + p_ramrod->output_params_addr.hi = DMA_HI_LE(ramrod_res_phys); + p_ramrod->output_params_addr.lo = DMA_LO_LE(ramrod_res_phys); + + rc = ecore_spq_post(p_hwfn, p_ent, &fw_return_code); + if (rc != ECORE_SUCCESS) + goto err; + + if (fw_return_code != RDMA_RETURN_OK) + { + DP_NOTICE(p_hwfn, fw_return_code != RDMA_RETURN_RESIZE_CQ_ERR, + "fw_return_code = %d\n", fw_return_code); + DP_NOTICE(p_hwfn, + true, "fw_return_code = %d\n", fw_return_code); + rc = ECORE_UNKNOWN_ERROR; + goto err; + } + + out_params->prod = OSAL_LE32_TO_CPU(p_ramrod_res->old_cq_prod); + out_params->cons = OSAL_LE32_TO_CPU(p_ramrod_res->old_cq_cons); + + OSAL_DMA_FREE_COHERENT(p_hwfn->p_dev, p_ramrod_res, ramrod_res_phys, + sizeof(*p_ramrod_res)); + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "rc = %d\n", rc); + + return rc; + +err: + OSAL_DMA_FREE_COHERENT(p_hwfn->p_dev, p_ramrod_res, ramrod_res_phys, + sizeof(*p_ramrod_res)); + DP_NOTICE(p_hwfn, false, "rc = %d\n", rc); + + return rc; +} + +enum _ecore_status_t ecore_rdma_start(void *rdma_cxt, + struct ecore_rdma_start_in_params *params) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)rdma_cxt; + struct ecore_ptt *p_ptt; + enum _ecore_status_t rc = ECORE_TIMEOUT; + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "desired_cnq = %08x\n", params->desired_cnq); + + p_ptt = ecore_ptt_acquire(p_hwfn); + if (!p_ptt) + goto err; + + rc = ecore_rdma_alloc(p_hwfn); + if (rc) + goto err1; + + rc = ecore_rdma_setup(p_hwfn, p_ptt, params); + if (rc) + goto err2; + + ecore_ptt_release(p_hwfn, p_ptt); + + ecore_rdma_activate(p_hwfn); + return rc; + +err2: + ecore_rdma_free(p_hwfn); +err1: + ecore_ptt_release(p_hwfn, p_ptt); +err: + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "RDMA start - error, rc = %d\n", rc); + return rc; +} + +enum _ecore_status_t ecore_rdma_query_stats(void *rdma_cxt, u8 stats_queue, + struct ecore_rdma_stats_out_params *out_params) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)rdma_cxt; + u8 abs_stats_queue, max_stats_queues; + u32 pstats_addr, tstats_addr, addr; + struct ecore_rdma_info *info; + struct ecore_ptt *p_ptt; +#ifdef CONFIG_ECORE_IWARP + u32 xstats_addr; +#endif + enum _ecore_status_t rc = ECORE_SUCCESS; + + if (!p_hwfn) + return ECORE_INVAL; + + if (!p_hwfn->p_rdma_info) { + DP_INFO(p_hwfn->p_dev, "ecore rdma query stats failed due to NULL rdma_info\n"); + return ECORE_INVAL; + } + + info = p_hwfn->p_rdma_info; + + rc = ecore_rdma_inc_ref_cnt(p_hwfn); + if (rc != ECORE_SUCCESS) + return rc; + + max_stats_queues = p_hwfn->p_rdma_info->dev->max_stats_queues; + if (stats_queue >= max_stats_queues) { + DP_ERR(p_hwfn->p_dev, + "ecore rdma query stats failed due to invalid statistics queue %d. maximum is %d\n", + stats_queue, max_stats_queues); + rc = ECORE_INVAL; + goto err; + } + + /* Statistics collected in statistics queues (for PF/VF) */ + abs_stats_queue = RESC_START(p_hwfn, ECORE_RDMA_STATS_QUEUE) + + stats_queue; + pstats_addr = BAR0_MAP_REG_PSDM_RAM + + PSTORM_RDMA_QUEUE_STAT_OFFSET(abs_stats_queue); + tstats_addr = BAR0_MAP_REG_TSDM_RAM + + TSTORM_RDMA_QUEUE_STAT_OFFSET(abs_stats_queue); + +#ifdef CONFIG_ECORE_IWARP + /* Statistics per PF ID */ + xstats_addr = BAR0_MAP_REG_XSDM_RAM + + XSTORM_IWARP_RXMIT_STATS_OFFSET(p_hwfn->rel_pf_id); +#endif + + OSAL_MEMSET(&info->rdma_sent_pstats, 0, sizeof(info->rdma_sent_pstats)); + OSAL_MEMSET(&info->rdma_rcv_tstats, 0, sizeof(info->rdma_rcv_tstats)); + OSAL_MEMSET(&info->roce.event_stats, 0, sizeof(info->roce.event_stats)); + OSAL_MEMSET(&info->roce.dcqcn_rx_stats, 0,sizeof(info->roce.dcqcn_rx_stats)); + OSAL_MEMSET(&info->roce.dcqcn_tx_stats, 0,sizeof(info->roce.dcqcn_tx_stats)); +#ifdef CONFIG_ECORE_IWARP + OSAL_MEMSET(&info->iwarp.stats, 0, sizeof(info->iwarp.stats)); +#endif + + p_ptt = ecore_ptt_acquire(p_hwfn); + + if (!p_ptt) { + rc = ECORE_TIMEOUT; + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "rc = %d\n", rc); + goto err; + } + + ecore_memcpy_from(p_hwfn, p_ptt, &info->rdma_sent_pstats, + pstats_addr, sizeof(struct rdma_sent_stats)); + + ecore_memcpy_from(p_hwfn, p_ptt, &info->rdma_rcv_tstats, + tstats_addr, sizeof(struct rdma_rcv_stats)); + + addr = BAR0_MAP_REG_TSDM_RAM + + TSTORM_ROCE_EVENTS_STAT_OFFSET(p_hwfn->rel_pf_id); + ecore_memcpy_from(p_hwfn, p_ptt, &info->roce.event_stats, addr, + sizeof(struct roce_events_stats)); + + addr = BAR0_MAP_REG_YSDM_RAM + + YSTORM_ROCE_DCQCN_RECEIVED_STATS_OFFSET(p_hwfn->rel_pf_id); + ecore_memcpy_from(p_hwfn, p_ptt, &info->roce.dcqcn_rx_stats, addr, + sizeof(struct roce_dcqcn_received_stats)); + + addr = BAR0_MAP_REG_PSDM_RAM + + PSTORM_ROCE_DCQCN_SENT_STATS_OFFSET(p_hwfn->rel_pf_id); + ecore_memcpy_from(p_hwfn, p_ptt, &info->roce.dcqcn_tx_stats, addr, + sizeof(struct roce_dcqcn_sent_stats)); + +#ifdef CONFIG_ECORE_IWARP + ecore_memcpy_from(p_hwfn, p_ptt, &info->iwarp.stats, + xstats_addr, sizeof(struct iwarp_rxmit_stats_drv)); +#endif + + ecore_ptt_release(p_hwfn, p_ptt); + + OSAL_MEMSET(out_params, 0, sizeof(*out_params)); + + out_params->sent_bytes = + HILO_64_REGPAIR(info->rdma_sent_pstats.sent_bytes); + out_params->sent_pkts = + HILO_64_REGPAIR(info->rdma_sent_pstats.sent_pkts); + out_params->rcv_bytes = + HILO_64_REGPAIR(info->rdma_rcv_tstats.rcv_bytes); + out_params->rcv_pkts = + HILO_64_REGPAIR(info->rdma_rcv_tstats.rcv_pkts); + + out_params->silent_drops = + OSAL_LE16_TO_CPU(info->roce.event_stats.silent_drops); + out_params->rnr_nacks_sent = + OSAL_LE16_TO_CPU(info->roce.event_stats.rnr_naks_sent); + out_params->icrc_errors = + OSAL_LE32_TO_CPU(info->roce.event_stats.icrc_error_count); + out_params->retransmit_events = + OSAL_LE32_TO_CPU(info->roce.event_stats.retransmit_count); + out_params->ecn_pkt_rcv = + HILO_64_REGPAIR(info->roce.dcqcn_rx_stats.ecn_pkt_rcv); + out_params->cnp_pkt_rcv = + HILO_64_REGPAIR(info->roce.dcqcn_rx_stats.cnp_pkt_rcv); + out_params->cnp_pkt_sent = + HILO_64_REGPAIR(info->roce.dcqcn_tx_stats.cnp_pkt_sent); + +#ifdef CONFIG_ECORE_IWARP + out_params->iwarp_tx_fast_rxmit_cnt = + HILO_64_REGPAIR(info->iwarp.stats.tx_fast_retransmit_event_cnt); + out_params->iwarp_tx_slow_start_cnt = + HILO_64_REGPAIR( + info->iwarp.stats.tx_go_to_slow_start_event_cnt); + out_params->unalign_rx_comp = info->iwarp.unalign_rx_comp; +#endif + +err: + ecore_rdma_dec_ref_cnt(p_hwfn); + + return rc; +} + +enum _ecore_status_t +ecore_rdma_query_counters(void *rdma_cxt, + struct ecore_rdma_counters_out_params *out_params) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)rdma_cxt; + unsigned long *bitmap; + unsigned int nbits; + + if (!p_hwfn->p_rdma_info) + return ECORE_INVAL; + + OSAL_MEMSET(out_params, 0, sizeof(*out_params)); + + bitmap = p_hwfn->p_rdma_info->pd_map.bitmap; + nbits = p_hwfn->p_rdma_info->pd_map.max_count; + out_params->pd_count = OSAL_BITMAP_WEIGHT(bitmap, nbits); + out_params->max_pd = nbits; + + bitmap = p_hwfn->p_rdma_info->dpi_map.bitmap; + nbits = p_hwfn->p_rdma_info->dpi_map.max_count; + out_params->dpi_count = OSAL_BITMAP_WEIGHT(bitmap, nbits); + out_params->max_dpi = nbits; + + bitmap = p_hwfn->p_rdma_info->cq_map.bitmap; + nbits = p_hwfn->p_rdma_info->cq_map.max_count; + out_params->cq_count = OSAL_BITMAP_WEIGHT(bitmap, nbits); + out_params->max_cq = nbits; + + bitmap = p_hwfn->p_rdma_info->qp_map.bitmap; + nbits = p_hwfn->p_rdma_info->qp_map.max_count; + out_params->qp_count = OSAL_BITMAP_WEIGHT(bitmap, nbits); + out_params->max_qp = nbits; + + bitmap = p_hwfn->p_rdma_info->tid_map.bitmap; + nbits = p_hwfn->p_rdma_info->tid_map.max_count; + out_params->tid_count = OSAL_BITMAP_WEIGHT(bitmap, nbits); + out_params->max_tid = nbits; + + bitmap = p_hwfn->p_rdma_info->srq_map.bitmap; + nbits = p_hwfn->p_rdma_info->srq_map.max_count; + out_params->srq_count = OSAL_BITMAP_WEIGHT(bitmap, nbits); + out_params->max_srq = nbits; + + bitmap = p_hwfn->p_rdma_info->xrc_srq_map.bitmap; + nbits = p_hwfn->p_rdma_info->xrc_srq_map.max_count; + out_params->xrc_srq_count = OSAL_BITMAP_WEIGHT(bitmap, nbits); + out_params->max_xrc_srq = nbits; + + bitmap = p_hwfn->p_rdma_info->xrcd_map.bitmap; + nbits = p_hwfn->p_rdma_info->xrcd_map.max_count; + out_params->xrcd_count = OSAL_BITMAP_WEIGHT(bitmap, nbits); + out_params->max_xrcd = nbits; + + return ECORE_SUCCESS; +} + +enum _ecore_status_t ecore_rdma_resize_cnq(void *rdma_cxt, + struct ecore_rdma_resize_cnq_in_params *params) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)rdma_cxt; + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "cnq_id = %08x\n", params->cnq_id); + + /* @@@TBD: waiting for fw (there is no ramrod yet) */ + return ECORE_NOTIMPL; +} + +void ecore_rdma_remove_user(void *rdma_cxt, + u16 dpi) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)rdma_cxt; + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "dpi = %08x\n", dpi); + + OSAL_SPIN_LOCK(&p_hwfn->p_rdma_info->lock); + ecore_bmap_release_id(p_hwfn, &p_hwfn->p_rdma_info->dpi_map, dpi); + OSAL_SPIN_UNLOCK(&p_hwfn->p_rdma_info->lock); +} + +#ifndef LINUX_REMOVE +enum _ecore_status_t +ecore_rdma_set_glob_cfg(struct ecore_hwfn *p_hwfn, + struct ecore_rdma_glob_cfg *in_params, + u32 glob_cfg_bits) +{ + struct ecore_rdma_glob_cfg glob_cfg; + enum _ecore_status_t rc = ECORE_SUCCESS; + + DP_VERBOSE(p_hwfn->p_dev, ECORE_MSG_RDMA, + "dscp %d dscp en %d ecn %d ecn en %d vlan pri %d vlan_pri_en %d\n", + in_params->dscp, in_params->dscp_en, + in_params->ecn, in_params->ecn_en, in_params->vlan_pri, + in_params->vlan_pri_en); + + /* Read global cfg to local */ + OSAL_MEMCPY(&glob_cfg, &p_hwfn->p_rdma_info->glob_cfg, + sizeof(glob_cfg)); + + if (glob_cfg_bits & ECORE_RDMA_DCSP_BIT_MASK) { + if (in_params->dscp > MAX_DSCP) { + DP_ERR(p_hwfn->p_dev, "invalid glob dscp %d\n", + in_params->dscp); + return ECORE_INVAL; + } + glob_cfg.dscp = in_params->dscp; + } + + if (glob_cfg_bits & ECORE_RDMA_DCSP_EN_BIT_MASK) { + if (in_params->dscp_en > 1) { + DP_ERR(p_hwfn->p_dev, "invalid glob_dscp_en %d\n", + in_params->dscp_en); + return ECORE_INVAL; + } + glob_cfg.dscp_en = in_params->dscp_en; + } + + if (glob_cfg_bits & ECORE_RDMA_ECN_BIT_MASK) { + if (in_params->ecn > INET_ECN_ECT_0) { + DP_ERR(p_hwfn->p_dev, "invalid glob ecn %d\n", + in_params->ecn); + return ECORE_INVAL; + } + glob_cfg.ecn = in_params->ecn; + } + + if (glob_cfg_bits & ECORE_RDMA_ECN_EN_BIT_MASK) { + if (in_params->ecn_en > 1) { + DP_ERR(p_hwfn->p_dev, "invalid glob ecn en %d\n", + in_params->ecn_en); + return ECORE_INVAL; + } + glob_cfg.ecn_en = in_params->ecn_en; + } + + if (glob_cfg_bits & ECORE_RDMA_VLAN_PRIO_BIT_MASK) { + if (in_params->vlan_pri > MAX_VLAN_PRIO) { + DP_ERR(p_hwfn->p_dev, "invalid glob vlan pri %d\n", + in_params->vlan_pri); + return ECORE_INVAL; + } + glob_cfg.vlan_pri = in_params->vlan_pri; + } + + if (glob_cfg_bits & ECORE_RDMA_VLAN_PRIO_EN_BIT_MASK) { + if (in_params->vlan_pri_en > 1) { + DP_ERR(p_hwfn->p_dev, "invalid glob vlan pri en %d\n", + in_params->vlan_pri_en); + return ECORE_INVAL; + } + glob_cfg.vlan_pri_en = in_params->vlan_pri_en; + } + + /* Write back local cfg to global */ + OSAL_MEMCPY(&p_hwfn->p_rdma_info->glob_cfg, &glob_cfg, + sizeof(glob_cfg)); + + return rc; +} + +enum _ecore_status_t +ecore_rdma_get_glob_cfg(struct ecore_hwfn *p_hwfn, + struct ecore_rdma_glob_cfg *out_params) +{ + OSAL_MEMCPY(out_params, &p_hwfn->p_rdma_info->glob_cfg, + sizeof(struct ecore_rdma_glob_cfg)); + + return ECORE_SUCCESS; +} +#endif /* LINUX_REMOVE */ diff --git a/sys/dev/qlnx/qlnxe/ecore_roce.c b/sys/dev/qlnx/qlnxe/ecore_roce.c new file mode 100644 index 000000000000..7a5d1f6c38e9 --- /dev/null +++ b/sys/dev/qlnx/qlnxe/ecore_roce.c @@ -0,0 +1,1579 @@ +/* + * Copyright (c) 2018-2019 Cavium, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * File : ecore_roce.c + */ +#include +__FBSDID("$FreeBSD$"); + +#include "bcm_osal.h" +#include "ecore.h" +#include "ecore_status.h" +#include "ecore_sp_commands.h" +#include "ecore_cxt.h" +#include "ecore_rdma.h" +#include "reg_addr.h" +#include "ecore_rt_defs.h" +#include "ecore_init_ops.h" +#include "ecore_hw.h" +#include "ecore_mcp.h" +#include "ecore_init_fw_funcs.h" +#include "ecore_int.h" +#include "pcics_reg_driver.h" +#include "ecore_iro.h" +#include "ecore_gtt_reg_addr.h" +#ifndef LINUX_REMOVE +#include "ecore_tcp_ip.h" +#endif + +#ifdef _NTDDK_ +#pragma warning(push) +#pragma warning(disable : 28167) +#pragma warning(disable : 28123) +#pragma warning(disable : 28182) +#pragma warning(disable : 6011) +#endif + +static void ecore_roce_free_icid(struct ecore_hwfn *p_hwfn, u16 icid); + +static enum _ecore_status_t +ecore_roce_async_event(struct ecore_hwfn *p_hwfn, + u8 fw_event_code, + u16 OSAL_UNUSED echo, + union event_ring_data *data, + u8 OSAL_UNUSED fw_return_code) +{ + if (fw_event_code == ROCE_ASYNC_EVENT_DESTROY_QP_DONE) { + u16 icid = (u16)OSAL_LE32_TO_CPU( + data->rdma_data.rdma_destroy_qp_data.cid); + + /* icid release in this async event can occur only if the icid + * was offloaded to the FW. In case it wasn't offloaded this is + * handled in ecore_roce_sp_destroy_qp. + */ + ecore_roce_free_icid(p_hwfn, icid); + } else + p_hwfn->p_rdma_info->events.affiliated_event( + p_hwfn->p_rdma_info->events.context, + fw_event_code, + (void *)&data->rdma_data.async_handle); + + return ECORE_SUCCESS; +} + + + +#ifdef CONFIG_DCQCN +static enum _ecore_status_t ecore_roce_start_rl( + struct ecore_hwfn *p_hwfn, + struct ecore_roce_dcqcn_params *dcqcn_params) +{ + struct ecore_rl_update_params params; + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "\n"); + OSAL_MEMSET(¶ms, 0, sizeof(params)); + + params.rl_id_first = (u8)RESC_START(p_hwfn, ECORE_RL); + params.rl_id_last = RESC_START(p_hwfn, ECORE_RL) + + ecore_init_qm_get_num_pf_rls(p_hwfn); + params.dcqcn_update_param_flg = 1; + params.rl_init_flg = 1; + params.rl_start_flg = 1; + params.rl_stop_flg = 0; + params.rl_dc_qcn_flg = 1; + + params.rl_bc_rate = dcqcn_params->rl_bc_rate; + params.rl_max_rate = dcqcn_params->rl_max_rate; + params.rl_r_ai = dcqcn_params->rl_r_ai; + params.rl_r_hai = dcqcn_params->rl_r_hai; + params.dcqcn_gd = dcqcn_params->dcqcn_gd; + params.dcqcn_k_us = dcqcn_params->dcqcn_k_us; + params.dcqcn_timeuot_us = dcqcn_params->dcqcn_timeout_us; + + return ecore_sp_rl_update(p_hwfn, ¶ms); +} + +enum _ecore_status_t ecore_roce_stop_rl(struct ecore_hwfn *p_hwfn) +{ + struct ecore_rl_update_params params; + + if (!p_hwfn->p_rdma_info->roce.dcqcn_reaction_point) + return ECORE_SUCCESS; + + OSAL_MEMSET(¶ms, 0, sizeof(params)); + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "\n"); + + params.rl_id_first = (u8)RESC_START(p_hwfn, ECORE_RL); + params.rl_id_last = RESC_START(p_hwfn, ECORE_RL) + + ecore_init_qm_get_num_pf_rls(p_hwfn); + params.rl_stop_flg = 1; + + return ecore_sp_rl_update(p_hwfn, ¶ms); +} + +#define NIG_REG_ROCE_DUPLICATE_TO_HOST_BTH 2 +#define NIG_REG_ROCE_DUPLICATE_TO_HOST_ECN 1 + +enum _ecore_status_t ecore_roce_dcqcn_cfg( + struct ecore_hwfn *p_hwfn, + struct ecore_roce_dcqcn_params *params, + struct roce_init_func_ramrod_data *p_ramrod, + struct ecore_ptt *p_ptt) +{ + u32 val = 0; + enum _ecore_status_t rc = ECORE_SUCCESS; + + if (!p_hwfn->pf_params.rdma_pf_params.enable_dcqcn || + p_hwfn->p_rdma_info->proto == PROTOCOLID_IWARP) + return rc; + + p_hwfn->p_rdma_info->roce.dcqcn_enabled = 0; + if (params->notification_point) { + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "Configuring dcqcn notification point: timeout = 0x%x\n", + params->cnp_send_timeout); + p_ramrod->roce.cnp_send_timeout = params->cnp_send_timeout; + p_hwfn->p_rdma_info->roce.dcqcn_enabled = 1; + /* Configure NIG to duplicate to host and storm when: + * - (ECN == 2'b11 (notification point) + */ + val |= 1 << NIG_REG_ROCE_DUPLICATE_TO_HOST_ECN; + } + + if (params->reaction_point) { + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, + "Configuring dcqcn reaction point\n"); + p_hwfn->p_rdma_info->roce.dcqcn_enabled = 1; + p_hwfn->p_rdma_info->roce.dcqcn_reaction_point = 1; + /* Configure NIG to duplicate to host and storm when: + * - BTH opcode equals bth_hdr_flow_ctrl_opcode_2 + * (reaction point) + */ + val |= 1 << NIG_REG_ROCE_DUPLICATE_TO_HOST_BTH; + + rc = ecore_roce_start_rl(p_hwfn, params); + } + + if (rc) + return rc; + + p_ramrod->roce.cnp_dscp = params->cnp_dscp; + p_ramrod->roce.cnp_vlan_priority = params->cnp_vlan_priority; + + ecore_wr(p_hwfn, + p_ptt, + NIG_REG_ROCE_DUPLICATE_TO_HOST, + val); + + return rc; +} +#endif + + +enum _ecore_status_t ecore_roce_stop(struct ecore_hwfn *p_hwfn) +{ + struct ecore_bmap *cid_map = &p_hwfn->p_rdma_info->cid_map; + int wait_count = 0; + + /* when destroying a_RoCE QP the control is returned to the + * user after the synchronous part. The asynchronous part may + * take a little longer. We delay for a short while if an + * asyn destroy QP is still expected. Beyond the added delay + * we clear the bitmap anyway. + */ + while (OSAL_BITMAP_WEIGHT(cid_map->bitmap, cid_map->max_count)) { + OSAL_MSLEEP(100); + if (wait_count++ > 20) { + DP_NOTICE(p_hwfn, false, + "cid bitmap wait timed out\n"); + break; + } + } + + ecore_spq_unregister_async_cb(p_hwfn, PROTOCOLID_ROCE); + + return ECORE_SUCCESS; +} + + +static void ecore_rdma_copy_gids(struct ecore_rdma_qp *qp, __le32 *src_gid, + __le32 *dst_gid) { + u32 i; + + if (qp->roce_mode == ROCE_V2_IPV4) { + /* The IPv4 addresses shall be aligned to the highest word. + * The lower words must be zero. + */ + OSAL_MEMSET(src_gid, 0, sizeof(union ecore_gid)); + OSAL_MEMSET(dst_gid, 0, sizeof(union ecore_gid)); + src_gid[3] = OSAL_CPU_TO_LE32(qp->sgid.ipv4_addr); + dst_gid[3] = OSAL_CPU_TO_LE32(qp->dgid.ipv4_addr); + } else { + /* RoCE, and RoCE v2 - IPv6: GIDs and IPv6 addresses coincide in + * location and size + */ + for (i = 0; i < OSAL_ARRAY_SIZE(qp->sgid.dwords); i++) { + src_gid[i] = OSAL_CPU_TO_LE32(qp->sgid.dwords[i]); + dst_gid[i] = OSAL_CPU_TO_LE32(qp->dgid.dwords[i]); + } + } +} + +static enum roce_flavor ecore_roce_mode_to_flavor(enum roce_mode roce_mode) +{ + enum roce_flavor flavor; + + switch (roce_mode) { + case ROCE_V1: + flavor = PLAIN_ROCE; + break; + case ROCE_V2_IPV4: + flavor = RROCE_IPV4; + break; + case ROCE_V2_IPV6: + flavor = (enum roce_flavor)ROCE_V2_IPV6; + break; + default: + flavor = (enum roce_flavor)MAX_ROCE_MODE; + break; + } + return flavor; +} + +#if 0 +static void ecore_roce_free_cid_pair(struct ecore_hwfn *p_hwfn, u16 cid) +{ + OSAL_SPIN_LOCK(&p_hwfn->p_rdma_info->lock); + ecore_bmap_release_id(p_hwfn, &p_hwfn->p_rdma_info->qp_map, cid); + ecore_bmap_release_id(p_hwfn, &p_hwfn->p_rdma_info->qp_map, cid + 1); + OSAL_SPIN_UNLOCK(&p_hwfn->p_rdma_info->lock); +} +#endif + +static void ecore_roce_free_qp(struct ecore_hwfn *p_hwfn, u16 qp_idx) +{ + OSAL_SPIN_LOCK(&p_hwfn->p_rdma_info->lock); + ecore_bmap_release_id(p_hwfn, &p_hwfn->p_rdma_info->qp_map, qp_idx); + OSAL_SPIN_UNLOCK(&p_hwfn->p_rdma_info->lock); +} + +#define ECORE_ROCE_CREATE_QP_ATTEMPTS (20) +#define ECORE_ROCE_CREATE_QP_MSLEEP (10) + +static enum _ecore_status_t ecore_roce_wait_free_cids(struct ecore_hwfn *p_hwfn, u32 qp_idx) +{ + struct ecore_rdma_info *p_rdma_info = p_hwfn->p_rdma_info; + bool cids_free = false; + u32 icid, iter = 0; + int req, resp; + + icid = ECORE_ROCE_QP_TO_ICID(qp_idx); + + /* Make sure that the cids that were used by the QP index are free. + * This is necessary because the destroy flow returns to the user before + * the device finishes clean up. + * It can happen in the following flows: + * (1) ib_destroy_qp followed by an ib_create_qp + * (2) ib_modify_qp to RESET followed (not immediately), by an + * ib_modify_qp to RTR + */ + + do { + OSAL_SPIN_LOCK(&p_rdma_info->lock); + resp = ecore_bmap_test_id(p_hwfn, &p_rdma_info->cid_map, icid); + req = ecore_bmap_test_id(p_hwfn, &p_rdma_info->cid_map, icid + 1); + if (!resp && !req) + cids_free = true; + + OSAL_SPIN_UNLOCK(&p_rdma_info->lock); + + if (!cids_free) { + OSAL_MSLEEP(ECORE_ROCE_CREATE_QP_MSLEEP); + iter++; + } + } while (!cids_free && iter < ECORE_ROCE_CREATE_QP_ATTEMPTS); + + if (!cids_free) { + DP_ERR(p_hwfn->p_dev, + "responder and/or requester CIDs are still in use. resp=%d, req=%d\n", + resp, req); + return ECORE_AGAIN; + } + + return ECORE_SUCCESS; +} + +enum _ecore_status_t ecore_roce_alloc_qp_idx( + struct ecore_hwfn *p_hwfn, u16 *qp_idx16) +{ + struct ecore_rdma_info *p_rdma_info = p_hwfn->p_rdma_info; + u32 start_cid, icid, cid, qp_idx; + enum _ecore_status_t rc; + + OSAL_SPIN_LOCK(&p_rdma_info->lock); + rc = ecore_rdma_bmap_alloc_id(p_hwfn, &p_rdma_info->qp_map, &qp_idx); + if (rc != ECORE_SUCCESS) { + DP_NOTICE(p_hwfn, false, "failed to allocate qp\n"); + OSAL_SPIN_UNLOCK(&p_rdma_info->lock); + return rc; + } + + OSAL_SPIN_UNLOCK(&p_rdma_info->lock); + + /* Verify the cid bits that of this qp index are clear */ + rc = ecore_roce_wait_free_cids(p_hwfn, qp_idx); + if (rc) { + rc = ECORE_UNKNOWN_ERROR; + goto err; + } + + /* Allocate a DMA-able context for an ILT page, if not existing, for the + * associated iids. + * Note: If second allocation fails there's no need to free the first as + * it will be used in the future. + */ + icid = ECORE_ROCE_QP_TO_ICID(qp_idx); + start_cid = ecore_cxt_get_proto_cid_start(p_hwfn, p_rdma_info->proto); + cid = start_cid + icid; + + rc = ecore_cxt_dynamic_ilt_alloc(p_hwfn, ECORE_ELEM_CXT, cid); + if (rc != ECORE_SUCCESS) + goto err; + + rc = ecore_cxt_dynamic_ilt_alloc(p_hwfn, ECORE_ELEM_CXT, cid + 1); + if (rc != ECORE_SUCCESS) + goto err; + + /* qp index is under 2^16 */ + *qp_idx16 = (u16)qp_idx; + + return ECORE_SUCCESS; + +err: + ecore_roce_free_qp(p_hwfn, (u16)qp_idx); + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "rc = %d\n", rc); + + return rc; +} + +static void ecore_roce_set_cid(struct ecore_hwfn *p_hwfn, + u32 cid) +{ + OSAL_SPIN_LOCK(&p_hwfn->p_rdma_info->lock); + ecore_bmap_set_id(p_hwfn, + &p_hwfn->p_rdma_info->cid_map, + cid); + OSAL_SPIN_UNLOCK(&p_hwfn->p_rdma_info->lock); +} + +static enum _ecore_status_t ecore_roce_sp_create_responder( + struct ecore_hwfn *p_hwfn, + struct ecore_rdma_qp *qp) +{ + struct roce_create_qp_resp_ramrod_data *p_ramrod; + u16 regular_latency_queue, low_latency_queue; + struct ecore_sp_init_data init_data; + enum roce_flavor roce_flavor; + struct ecore_spq_entry *p_ent; + enum _ecore_status_t rc; + u32 cid_start; + u16 fw_srq_id; + bool is_xrc; + + if (!qp->has_resp) + return ECORE_SUCCESS; + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "qp_idx = %08x\n", qp->qp_idx); + + /* Allocate DMA-able memory for IRQ */ + qp->irq_num_pages = 1; + qp->irq = OSAL_DMA_ALLOC_COHERENT(p_hwfn->p_dev, + &qp->irq_phys_addr, + RDMA_RING_PAGE_SIZE); + if (!qp->irq) { + rc = ECORE_NOMEM; + DP_NOTICE(p_hwfn, false, + "ecore create responder failed: cannot allocate memory (irq). rc = %d\n", + rc); + return rc; + } + + /* Get SPQ entry */ + OSAL_MEMSET(&init_data, 0, sizeof(init_data)); + init_data.cid = qp->icid; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = ECORE_SPQ_MODE_EBLOCK; + + rc = ecore_sp_init_request(p_hwfn, &p_ent, ROCE_RAMROD_CREATE_QP, + PROTOCOLID_ROCE, &init_data); + if (rc != ECORE_SUCCESS) + goto err; + + p_ramrod = &p_ent->ramrod.roce_create_qp_resp; + + p_ramrod->flags = 0; + + roce_flavor = ecore_roce_mode_to_flavor(qp->roce_mode); + SET_FIELD(p_ramrod->flags, + ROCE_CREATE_QP_RESP_RAMROD_DATA_ROCE_FLAVOR, + roce_flavor); + + SET_FIELD(p_ramrod->flags, + ROCE_CREATE_QP_RESP_RAMROD_DATA_RDMA_RD_EN, + qp->incoming_rdma_read_en); + + SET_FIELD(p_ramrod->flags, + ROCE_CREATE_QP_RESP_RAMROD_DATA_RDMA_WR_EN, + qp->incoming_rdma_write_en); + + SET_FIELD(p_ramrod->flags, + ROCE_CREATE_QP_RESP_RAMROD_DATA_ATOMIC_EN, + qp->incoming_atomic_en); + + SET_FIELD(p_ramrod->flags, + ROCE_CREATE_QP_RESP_RAMROD_DATA_E2E_FLOW_CONTROL_EN, + qp->e2e_flow_control_en); + + SET_FIELD(p_ramrod->flags, + ROCE_CREATE_QP_RESP_RAMROD_DATA_SRQ_FLG, + qp->use_srq); + + SET_FIELD(p_ramrod->flags, + ROCE_CREATE_QP_RESP_RAMROD_DATA_RESERVED_KEY_EN, + qp->fmr_and_reserved_lkey); + + SET_FIELD(p_ramrod->flags, + ROCE_CREATE_QP_RESP_RAMROD_DATA_XRC_FLAG, + ecore_rdma_is_xrc_qp(qp)); + + /* TBD: future use only + * #define ROCE_CREATE_QP_RESP_RAMROD_DATA_PRI_MASK + * #define ROCE_CREATE_QP_RESP_RAMROD_DATA_PRI_SHIFT + */ + SET_FIELD(p_ramrod->flags, + ROCE_CREATE_QP_RESP_RAMROD_DATA_MIN_RNR_NAK_TIMER, + qp->min_rnr_nak_timer); + + p_ramrod->max_ird = + qp->max_rd_atomic_resp; + p_ramrod->traffic_class = qp->traffic_class_tos; + p_ramrod->hop_limit = qp->hop_limit_ttl; + p_ramrod->irq_num_pages = qp->irq_num_pages; + p_ramrod->p_key = OSAL_CPU_TO_LE16(qp->pkey); + p_ramrod->flow_label = OSAL_CPU_TO_LE32(qp->flow_label); + p_ramrod->dst_qp_id = OSAL_CPU_TO_LE32(qp->dest_qp); + p_ramrod->mtu = OSAL_CPU_TO_LE16(qp->mtu); + p_ramrod->initial_psn = OSAL_CPU_TO_LE32(qp->rq_psn); + p_ramrod->pd = OSAL_CPU_TO_LE16(qp->pd); + p_ramrod->rq_num_pages = OSAL_CPU_TO_LE16(qp->rq_num_pages); + DMA_REGPAIR_LE(p_ramrod->rq_pbl_addr, qp->rq_pbl_ptr); + DMA_REGPAIR_LE(p_ramrod->irq_pbl_addr, qp->irq_phys_addr); + ecore_rdma_copy_gids(qp, p_ramrod->src_gid, p_ramrod->dst_gid); + p_ramrod->qp_handle_for_async.hi = + OSAL_CPU_TO_LE32(qp->qp_handle_async.hi); + p_ramrod->qp_handle_for_async.lo = + OSAL_CPU_TO_LE32(qp->qp_handle_async.lo); + p_ramrod->qp_handle_for_cqe.hi = OSAL_CPU_TO_LE32(qp->qp_handle.hi); + p_ramrod->qp_handle_for_cqe.lo = OSAL_CPU_TO_LE32(qp->qp_handle.lo); + p_ramrod->cq_cid = OSAL_CPU_TO_LE32((p_hwfn->hw_info.opaque_fid << 16) | qp->rq_cq_id); + p_ramrod->xrc_domain = OSAL_CPU_TO_LE16(qp->xrcd_id); + +#ifdef CONFIG_DCQCN + /* when dcqcn is enabled physical queues are determined accoridng to qp id */ + if (p_hwfn->p_rdma_info->roce.dcqcn_enabled) + regular_latency_queue = + ecore_get_cm_pq_idx_rl(p_hwfn, + (qp->icid >> 1) % + ROCE_DCQCN_RP_MAX_QPS); + else +#endif + regular_latency_queue = ecore_get_cm_pq_idx(p_hwfn, PQ_FLAGS_OFLD); + low_latency_queue = ecore_get_cm_pq_idx(p_hwfn, PQ_FLAGS_LLT); + + p_ramrod->regular_latency_phy_queue = OSAL_CPU_TO_LE16(regular_latency_queue); + p_ramrod->low_latency_phy_queue = OSAL_CPU_TO_LE16(low_latency_queue); + p_ramrod->dpi = OSAL_CPU_TO_LE16(qp->dpi); + + ecore_rdma_set_fw_mac(p_ramrod->remote_mac_addr, qp->remote_mac_addr); + ecore_rdma_set_fw_mac(p_ramrod->local_mac_addr, qp->local_mac_addr); + + p_ramrod->udp_src_port = qp->udp_src_port; + p_ramrod->vlan_id = OSAL_CPU_TO_LE16(qp->vlan_id); + is_xrc = ecore_rdma_is_xrc_qp(qp); + fw_srq_id = ecore_rdma_get_fw_srq_id(p_hwfn, qp->srq_id, is_xrc); + p_ramrod->srq_id.srq_idx = OSAL_CPU_TO_LE16(fw_srq_id); + p_ramrod->srq_id.opaque_fid = OSAL_CPU_TO_LE16(p_hwfn->hw_info.opaque_fid); + + p_ramrod->stats_counter_id = RESC_START(p_hwfn, ECORE_RDMA_STATS_QUEUE) + + qp->stats_queue; + + rc = ecore_spq_post(p_hwfn, p_ent, OSAL_NULL); + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "rc = %d regular physical queue = 0x%x, low latency physical queue 0x%x\n", + rc, regular_latency_queue, low_latency_queue); + + if (rc != ECORE_SUCCESS) + goto err; + + qp->resp_offloaded = true; + qp->cq_prod.resp = 0; + + cid_start = ecore_cxt_get_proto_cid_start(p_hwfn, + p_hwfn->p_rdma_info->proto); + ecore_roce_set_cid(p_hwfn, qp->icid - cid_start); + + return rc; + +err: + DP_NOTICE(p_hwfn, false, "create responder - failed, rc = %d\n", rc); + OSAL_DMA_FREE_COHERENT(p_hwfn->p_dev, + qp->irq, + qp->irq_phys_addr, + qp->irq_num_pages * + RDMA_RING_PAGE_SIZE); + + return rc; +} + +static enum _ecore_status_t ecore_roce_sp_create_requester( + struct ecore_hwfn *p_hwfn, + struct ecore_rdma_qp *qp) +{ + struct roce_create_qp_req_ramrod_data *p_ramrod; + u16 regular_latency_queue, low_latency_queue; + struct ecore_sp_init_data init_data; + enum roce_flavor roce_flavor; + struct ecore_spq_entry *p_ent; + enum _ecore_status_t rc; + u32 cid_start; + + if (!qp->has_req) + return ECORE_SUCCESS; + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "icid = %08x\n", qp->icid); + + /* Allocate DMA-able memory for ORQ */ + qp->orq_num_pages = 1; + qp->orq = OSAL_DMA_ALLOC_COHERENT(p_hwfn->p_dev, + &qp->orq_phys_addr, + RDMA_RING_PAGE_SIZE); + if (!qp->orq) + { + rc = ECORE_NOMEM; + DP_NOTICE(p_hwfn, false, + "ecore create requester failed: cannot allocate memory (orq). rc = %d\n", + rc); + return rc; + } + + /* Get SPQ entry */ + OSAL_MEMSET(&init_data, 0, sizeof(init_data)); + init_data.cid = qp->icid + 1; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = ECORE_SPQ_MODE_EBLOCK; + + rc = ecore_sp_init_request(p_hwfn, &p_ent, + ROCE_RAMROD_CREATE_QP, + PROTOCOLID_ROCE, &init_data); + if (rc != ECORE_SUCCESS) + goto err; + + p_ramrod = &p_ent->ramrod.roce_create_qp_req; + + p_ramrod->flags = 0; + + roce_flavor = ecore_roce_mode_to_flavor(qp->roce_mode); + SET_FIELD(p_ramrod->flags, + ROCE_CREATE_QP_REQ_RAMROD_DATA_ROCE_FLAVOR, + roce_flavor); + + SET_FIELD(p_ramrod->flags, + ROCE_CREATE_QP_REQ_RAMROD_DATA_FMR_AND_RESERVED_EN, + qp->fmr_and_reserved_lkey); + + SET_FIELD(p_ramrod->flags, + ROCE_CREATE_QP_REQ_RAMROD_DATA_SIGNALED_COMP, + qp->signal_all); + + /* TBD: + * future use only + * #define ROCE_CREATE_QP_REQ_RAMROD_DATA_PRI_MASK + * #define ROCE_CREATE_QP_REQ_RAMROD_DATA_PRI_SHIFT + */ + SET_FIELD(p_ramrod->flags, + ROCE_CREATE_QP_REQ_RAMROD_DATA_ERR_RETRY_CNT, + qp->retry_cnt); + + SET_FIELD(p_ramrod->flags, + ROCE_CREATE_QP_REQ_RAMROD_DATA_RNR_NAK_CNT, + qp->rnr_retry_cnt); + + SET_FIELD(p_ramrod->flags, + ROCE_CREATE_QP_REQ_RAMROD_DATA_XRC_FLAG, + ecore_rdma_is_xrc_qp(qp)); + + p_ramrod->max_ord = qp->max_rd_atomic_req; + p_ramrod->traffic_class = qp->traffic_class_tos; + p_ramrod->hop_limit = qp->hop_limit_ttl; + p_ramrod->orq_num_pages = qp->orq_num_pages; + p_ramrod->p_key = OSAL_CPU_TO_LE16(qp->pkey); + p_ramrod->flow_label = OSAL_CPU_TO_LE32(qp->flow_label); + p_ramrod->dst_qp_id = OSAL_CPU_TO_LE32(qp->dest_qp); + p_ramrod->ack_timeout_val = OSAL_CPU_TO_LE32(qp->ack_timeout); + p_ramrod->mtu = OSAL_CPU_TO_LE16(qp->mtu); + p_ramrod->initial_psn = OSAL_CPU_TO_LE32(qp->sq_psn); + p_ramrod->pd = OSAL_CPU_TO_LE16(qp->pd); + p_ramrod->sq_num_pages = OSAL_CPU_TO_LE16(qp->sq_num_pages); + DMA_REGPAIR_LE(p_ramrod->sq_pbl_addr, qp->sq_pbl_ptr); + DMA_REGPAIR_LE(p_ramrod->orq_pbl_addr, qp->orq_phys_addr); + ecore_rdma_copy_gids(qp, p_ramrod->src_gid, p_ramrod->dst_gid); + p_ramrod->qp_handle_for_async.hi = + OSAL_CPU_TO_LE32(qp->qp_handle_async.hi); + p_ramrod->qp_handle_for_async.lo = + OSAL_CPU_TO_LE32(qp->qp_handle_async.lo); + p_ramrod->qp_handle_for_cqe.hi = OSAL_CPU_TO_LE32(qp->qp_handle.hi); + p_ramrod->qp_handle_for_cqe.lo = OSAL_CPU_TO_LE32(qp->qp_handle.lo); + p_ramrod->cq_cid = OSAL_CPU_TO_LE32((p_hwfn->hw_info.opaque_fid << 16) | + qp->sq_cq_id); + +#ifdef CONFIG_DCQCN + /* when dcqcn is enabled physical queues are determined accoridng to qp id */ + if (p_hwfn->p_rdma_info->roce.dcqcn_enabled) + regular_latency_queue = + ecore_get_cm_pq_idx_rl(p_hwfn, + (qp->icid >> 1) % + ROCE_DCQCN_RP_MAX_QPS); + else +#endif + regular_latency_queue = ecore_get_cm_pq_idx(p_hwfn, PQ_FLAGS_OFLD); + low_latency_queue = ecore_get_cm_pq_idx(p_hwfn, PQ_FLAGS_LLT); + + p_ramrod->regular_latency_phy_queue = OSAL_CPU_TO_LE16(regular_latency_queue); + p_ramrod->low_latency_phy_queue = OSAL_CPU_TO_LE16(low_latency_queue); + p_ramrod->dpi = OSAL_CPU_TO_LE16(qp->dpi); + + ecore_rdma_set_fw_mac(p_ramrod->remote_mac_addr, qp->remote_mac_addr); + ecore_rdma_set_fw_mac(p_ramrod->local_mac_addr, qp->local_mac_addr); + + p_ramrod->udp_src_port = qp->udp_src_port; + p_ramrod->vlan_id = OSAL_CPU_TO_LE16(qp->vlan_id); + p_ramrod->stats_counter_id = RESC_START(p_hwfn, ECORE_RDMA_STATS_QUEUE) + + qp->stats_queue; + + rc = ecore_spq_post(p_hwfn, p_ent, OSAL_NULL); + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "rc = %d\n", rc); + + if (rc != ECORE_SUCCESS) + goto err; + + qp->req_offloaded = true; + qp->cq_prod.req = 0; + + cid_start = ecore_cxt_get_proto_cid_start(p_hwfn, + p_hwfn->p_rdma_info->proto); + ecore_roce_set_cid(p_hwfn, qp->icid + 1 - cid_start); + + return rc; + +err: + DP_NOTICE(p_hwfn, false, "Create requested - failed, rc = %d\n", rc); + OSAL_DMA_FREE_COHERENT(p_hwfn->p_dev, + qp->orq, + qp->orq_phys_addr, + qp->orq_num_pages * + RDMA_RING_PAGE_SIZE); + return rc; +} + +static enum _ecore_status_t ecore_roce_sp_modify_responder( + struct ecore_hwfn *p_hwfn, + struct ecore_rdma_qp *qp, + bool move_to_err, + u32 modify_flags) +{ + struct roce_modify_qp_resp_ramrod_data *p_ramrod; + struct ecore_sp_init_data init_data; + struct ecore_spq_entry *p_ent; + enum _ecore_status_t rc; + + if (!qp->has_resp) + return ECORE_SUCCESS; + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "icid = %08x\n", qp->icid); + + if (move_to_err && !qp->resp_offloaded) + return ECORE_SUCCESS; + + /* Get SPQ entry */ + OSAL_MEMSET(&init_data, 0, sizeof(init_data)); + init_data.cid = qp->icid; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = ECORE_SPQ_MODE_EBLOCK; + + rc = ecore_sp_init_request(p_hwfn, &p_ent, + ROCE_EVENT_MODIFY_QP, + PROTOCOLID_ROCE, &init_data); + if (rc != ECORE_SUCCESS) + { + DP_NOTICE(p_hwfn, false, "rc = %d\n", rc); + return rc; + } + + p_ramrod = &p_ent->ramrod.roce_modify_qp_resp; + + p_ramrod->flags = 0; + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_RESP_RAMROD_DATA_MOVE_TO_ERR_FLG, + move_to_err); + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_RESP_RAMROD_DATA_RDMA_RD_EN, + qp->incoming_rdma_read_en); + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_RESP_RAMROD_DATA_RDMA_WR_EN, + qp->incoming_rdma_write_en); + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_RESP_RAMROD_DATA_ATOMIC_EN, + qp->incoming_atomic_en); + + SET_FIELD(p_ramrod->flags, + ROCE_CREATE_QP_RESP_RAMROD_DATA_E2E_FLOW_CONTROL_EN, + qp->e2e_flow_control_en); + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_RESP_RAMROD_DATA_RDMA_OPS_EN_FLG, + GET_FIELD(modify_flags, + ECORE_RDMA_MODIFY_QP_VALID_RDMA_OPS_EN)); + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_RESP_RAMROD_DATA_P_KEY_FLG, + GET_FIELD(modify_flags, ECORE_ROCE_MODIFY_QP_VALID_PKEY)); + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_RESP_RAMROD_DATA_ADDRESS_VECTOR_FLG, + GET_FIELD(modify_flags, + ECORE_ROCE_MODIFY_QP_VALID_ADDRESS_VECTOR)); + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_RESP_RAMROD_DATA_MAX_IRD_FLG, + GET_FIELD(modify_flags, + ECORE_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_RESP)); + + /* TBD: future use only + * #define ROCE_MODIFY_QP_RESP_RAMROD_DATA_PRI_FLG_MASK + * #define ROCE_MODIFY_QP_RESP_RAMROD_DATA_PRI_FLG_SHIFT + */ + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_RESP_RAMROD_DATA_MIN_RNR_NAK_TIMER_FLG, + GET_FIELD(modify_flags, + ECORE_ROCE_MODIFY_QP_VALID_MIN_RNR_NAK_TIMER)); + + p_ramrod->fields = 0; + SET_FIELD(p_ramrod->fields, + ROCE_MODIFY_QP_RESP_RAMROD_DATA_MIN_RNR_NAK_TIMER, + qp->min_rnr_nak_timer); + + p_ramrod->max_ird = qp->max_rd_atomic_resp; + p_ramrod->traffic_class = qp->traffic_class_tos; + p_ramrod->hop_limit = qp->hop_limit_ttl; + p_ramrod->p_key = OSAL_CPU_TO_LE16(qp->pkey); + p_ramrod->flow_label = OSAL_CPU_TO_LE32(qp->flow_label); + p_ramrod->mtu = OSAL_CPU_TO_LE16(qp->mtu); + ecore_rdma_copy_gids(qp, p_ramrod->src_gid, p_ramrod->dst_gid); + rc = ecore_spq_post(p_hwfn, p_ent, OSAL_NULL); + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "Modify responder, rc = %d\n", rc); + return rc; +} + +static enum _ecore_status_t ecore_roce_sp_modify_requester( + struct ecore_hwfn *p_hwfn, + struct ecore_rdma_qp *qp, + bool move_to_sqd, + bool move_to_err, + u32 modify_flags) +{ + struct roce_modify_qp_req_ramrod_data *p_ramrod; + struct ecore_sp_init_data init_data; + struct ecore_spq_entry *p_ent; + enum _ecore_status_t rc; + + if (!qp->has_req) + return ECORE_SUCCESS; + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "icid = %08x\n", qp->icid); + + if (move_to_err && !(qp->req_offloaded)) + return ECORE_SUCCESS; + + /* Get SPQ entry */ + OSAL_MEMSET(&init_data, 0, sizeof(init_data)); + init_data.cid = qp->icid + 1; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = ECORE_SPQ_MODE_EBLOCK; + + rc = ecore_sp_init_request(p_hwfn, &p_ent, + ROCE_EVENT_MODIFY_QP, + PROTOCOLID_ROCE, &init_data); + if (rc != ECORE_SUCCESS) { + DP_NOTICE(p_hwfn, false, "rc = %d\n", rc); + return rc; + } + + p_ramrod = &p_ent->ramrod.roce_modify_qp_req; + + p_ramrod->flags = 0; + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_REQ_RAMROD_DATA_MOVE_TO_ERR_FLG, + move_to_err); + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_REQ_RAMROD_DATA_MOVE_TO_SQD_FLG, + move_to_sqd); + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_REQ_RAMROD_DATA_EN_SQD_ASYNC_NOTIFY, + qp->sqd_async); + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_REQ_RAMROD_DATA_P_KEY_FLG, + GET_FIELD(modify_flags, ECORE_ROCE_MODIFY_QP_VALID_PKEY)); + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_REQ_RAMROD_DATA_ADDRESS_VECTOR_FLG, + GET_FIELD(modify_flags, + ECORE_ROCE_MODIFY_QP_VALID_ADDRESS_VECTOR)); + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_REQ_RAMROD_DATA_MAX_ORD_FLG, + GET_FIELD(modify_flags, + ECORE_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_REQ)); + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_REQ_RAMROD_DATA_RNR_NAK_CNT_FLG, + GET_FIELD(modify_flags, + ECORE_ROCE_MODIFY_QP_VALID_RNR_RETRY_CNT)); + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_REQ_RAMROD_DATA_ERR_RETRY_CNT_FLG, + GET_FIELD(modify_flags, + ECORE_ROCE_MODIFY_QP_VALID_RETRY_CNT)); + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_REQ_RAMROD_DATA_ACK_TIMEOUT_FLG, + GET_FIELD(modify_flags, + ECORE_ROCE_MODIFY_QP_VALID_ACK_TIMEOUT)); + + /* TBD: future use only + * #define ROCE_MODIFY_QP_REQ_RAMROD_DATA_PRI_FLG_MASK + * #define ROCE_MODIFY_QP_REQ_RAMROD_DATA_PRI_FLG_SHIFT + */ + + p_ramrod->fields = 0; + SET_FIELD(p_ramrod->fields, + ROCE_MODIFY_QP_REQ_RAMROD_DATA_ERR_RETRY_CNT, + qp->retry_cnt); + + SET_FIELD(p_ramrod->fields, + ROCE_MODIFY_QP_REQ_RAMROD_DATA_RNR_NAK_CNT, + qp->rnr_retry_cnt); + + p_ramrod->max_ord = qp->max_rd_atomic_req; + p_ramrod->traffic_class = qp->traffic_class_tos; + p_ramrod->hop_limit = qp->hop_limit_ttl; + p_ramrod->p_key = OSAL_CPU_TO_LE16(qp->pkey); + p_ramrod->flow_label = OSAL_CPU_TO_LE32(qp->flow_label); + p_ramrod->ack_timeout_val = OSAL_CPU_TO_LE32(qp->ack_timeout); + p_ramrod->mtu = OSAL_CPU_TO_LE16(qp->mtu); + ecore_rdma_copy_gids(qp, p_ramrod->src_gid, p_ramrod->dst_gid); + rc = ecore_spq_post(p_hwfn, p_ent, OSAL_NULL); + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "Modify requester, rc = %d\n", rc); + return rc; +} + +static enum _ecore_status_t ecore_roce_sp_destroy_qp_responder( + struct ecore_hwfn *p_hwfn, + struct ecore_rdma_qp *qp, + u32 *num_invalidated_mw, + u32 *cq_prod) +{ + struct roce_destroy_qp_resp_output_params *p_ramrod_res; + struct roce_destroy_qp_resp_ramrod_data *p_ramrod; + struct ecore_sp_init_data init_data; + struct ecore_spq_entry *p_ent; + dma_addr_t ramrod_res_phys; + enum _ecore_status_t rc; + + if (!qp->has_resp) { + *num_invalidated_mw = 0; + *cq_prod = 0; + return ECORE_SUCCESS; + } + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "icid = %08x\n", qp->icid); + + *num_invalidated_mw = 0; + + if (!qp->resp_offloaded) { + *cq_prod = qp->cq_prod.resp; + return ECORE_SUCCESS; + } + + /* Get SPQ entry */ + OSAL_MEMSET(&init_data, 0, sizeof(init_data)); + init_data.cid = qp->icid; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = ECORE_SPQ_MODE_EBLOCK; + + rc = ecore_sp_init_request(p_hwfn, &p_ent, + ROCE_RAMROD_DESTROY_QP, + PROTOCOLID_ROCE, &init_data); + if (rc != ECORE_SUCCESS) + return rc; + + p_ramrod = &p_ent->ramrod.roce_destroy_qp_resp; + + p_ramrod_res = (struct roce_destroy_qp_resp_output_params *)OSAL_DMA_ALLOC_COHERENT(p_hwfn->p_dev, + &ramrod_res_phys, sizeof(*p_ramrod_res)); + + if (!p_ramrod_res) + { + rc = ECORE_NOMEM; + DP_NOTICE(p_hwfn, false, + "ecore destroy responder failed: cannot allocate memory (ramrod). rc = %d\n", + rc); + return rc; + } + + DMA_REGPAIR_LE(p_ramrod->output_params_addr, ramrod_res_phys); + + rc = ecore_spq_post(p_hwfn, p_ent, OSAL_NULL); + if (rc != ECORE_SUCCESS) + goto err; + + *num_invalidated_mw + = OSAL_LE32_TO_CPU(p_ramrod_res->num_invalidated_mw); + *cq_prod = OSAL_LE32_TO_CPU(p_ramrod_res->cq_prod); + qp->cq_prod.resp = *cq_prod; + + /* Free IRQ - only if ramrod succeeded, in case FW is still using it */ + OSAL_DMA_FREE_COHERENT(p_hwfn->p_dev, + qp->irq, + qp->irq_phys_addr, + qp->irq_num_pages * + RDMA_RING_PAGE_SIZE); + + qp->resp_offloaded = false; + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "Destroy responder, rc = %d\n", rc); + + /* "fall through" */ + +err: + OSAL_DMA_FREE_COHERENT(p_hwfn->p_dev, p_ramrod_res, ramrod_res_phys, + sizeof(*p_ramrod_res)); + + return rc; +} + +static enum _ecore_status_t ecore_roce_sp_destroy_qp_requester( + struct ecore_hwfn *p_hwfn, + struct ecore_rdma_qp *qp, + u32 *num_bound_mw, + u32 *cq_prod) +{ + struct roce_destroy_qp_req_output_params *p_ramrod_res; + struct roce_destroy_qp_req_ramrod_data *p_ramrod; + struct ecore_sp_init_data init_data; + struct ecore_spq_entry *p_ent; + dma_addr_t ramrod_res_phys; + enum _ecore_status_t rc; + + if (!qp->has_req) { + *num_bound_mw = 0; + *cq_prod = 0; + return ECORE_SUCCESS; + } + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "icid = %08x\n", qp->icid); + + if (!qp->req_offloaded) { + *cq_prod = qp->cq_prod.req; + return ECORE_SUCCESS; + } + + p_ramrod_res = (struct roce_destroy_qp_req_output_params *) + OSAL_DMA_ALLOC_COHERENT(p_hwfn->p_dev, &ramrod_res_phys, + sizeof(*p_ramrod_res)); + if (!p_ramrod_res) + { + DP_NOTICE(p_hwfn, false, + "ecore destroy requester failed: cannot allocate memory (ramrod)\n"); + return ECORE_NOMEM; + } + + /* Get SPQ entry */ + OSAL_MEMSET(&init_data, 0, sizeof(init_data)); + init_data.cid = qp->icid + 1; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = ECORE_SPQ_MODE_EBLOCK; + + rc = ecore_sp_init_request(p_hwfn, &p_ent, ROCE_RAMROD_DESTROY_QP, + PROTOCOLID_ROCE, &init_data); + if (rc != ECORE_SUCCESS) + goto err; + + p_ramrod = &p_ent->ramrod.roce_destroy_qp_req; + DMA_REGPAIR_LE(p_ramrod->output_params_addr, ramrod_res_phys); + + rc = ecore_spq_post(p_hwfn, p_ent, OSAL_NULL); + if (rc != ECORE_SUCCESS) + goto err; + + *num_bound_mw = OSAL_LE32_TO_CPU(p_ramrod_res->num_bound_mw); + *cq_prod = OSAL_LE32_TO_CPU(p_ramrod_res->cq_prod); + qp->cq_prod.req = *cq_prod; + + /* Free ORQ - only if ramrod succeeded, in case FW is still using it */ + OSAL_DMA_FREE_COHERENT(p_hwfn->p_dev, + qp->orq, + qp->orq_phys_addr, + qp->orq_num_pages * + RDMA_RING_PAGE_SIZE); + + qp->req_offloaded = false; + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "Destroy requester, rc = %d\n", rc); + + /* "fall through" */ + +err: + OSAL_DMA_FREE_COHERENT(p_hwfn->p_dev, p_ramrod_res, ramrod_res_phys, + sizeof(*p_ramrod_res)); + + return rc; +} + +static OSAL_INLINE enum _ecore_status_t ecore_roce_sp_query_responder( + struct ecore_hwfn *p_hwfn, + struct ecore_rdma_qp *qp, + struct ecore_rdma_query_qp_out_params *out_params) +{ + struct roce_query_qp_resp_output_params *p_resp_ramrod_res; + struct roce_query_qp_resp_ramrod_data *p_resp_ramrod; + struct ecore_sp_init_data init_data; + dma_addr_t resp_ramrod_res_phys; + struct ecore_spq_entry *p_ent; + enum _ecore_status_t rc = ECORE_SUCCESS; + bool error_flag; + + if (!qp->resp_offloaded) { + /* Don't send query qp for the responder */ + out_params->rq_psn = qp->rq_psn; + + return ECORE_SUCCESS; + } + + /* Send a query responder ramrod to the FW */ + p_resp_ramrod_res = (struct roce_query_qp_resp_output_params *) + OSAL_DMA_ALLOC_COHERENT(p_hwfn->p_dev, &resp_ramrod_res_phys, + sizeof(*p_resp_ramrod_res)); + if (!p_resp_ramrod_res) + { + DP_NOTICE(p_hwfn, false, + "ecore query qp failed: cannot allocate memory (ramrod)\n"); + return ECORE_NOMEM; + } + + /* Get SPQ entry */ + OSAL_MEMSET(&init_data, 0, sizeof(init_data)); + init_data.cid = qp->icid; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = ECORE_SPQ_MODE_EBLOCK; + rc = ecore_sp_init_request(p_hwfn, &p_ent, ROCE_RAMROD_QUERY_QP, + PROTOCOLID_ROCE, &init_data); + if (rc != ECORE_SUCCESS) + goto err; + + p_resp_ramrod = &p_ent->ramrod.roce_query_qp_resp; + DMA_REGPAIR_LE(p_resp_ramrod->output_params_addr, resp_ramrod_res_phys); + + rc = ecore_spq_post(p_hwfn, p_ent, OSAL_NULL); + if (rc != ECORE_SUCCESS) + goto err; + + out_params->rq_psn = OSAL_LE32_TO_CPU(p_resp_ramrod_res->psn); + error_flag = GET_FIELD( + OSAL_LE32_TO_CPU(p_resp_ramrod_res->err_flag), + ROCE_QUERY_QP_RESP_OUTPUT_PARAMS_ERROR_FLG); + if (error_flag) + qp->cur_state = ECORE_ROCE_QP_STATE_ERR; + +err: + OSAL_DMA_FREE_COHERENT(p_hwfn->p_dev, p_resp_ramrod_res, + resp_ramrod_res_phys, + sizeof(*p_resp_ramrod_res)); + + return rc; +} + +static OSAL_INLINE enum _ecore_status_t ecore_roce_sp_query_requester( + struct ecore_hwfn *p_hwfn, + struct ecore_rdma_qp *qp, + struct ecore_rdma_query_qp_out_params *out_params, + bool *sq_draining) +{ + struct roce_query_qp_req_output_params *p_req_ramrod_res; + struct roce_query_qp_req_ramrod_data *p_req_ramrod; + struct ecore_sp_init_data init_data; + dma_addr_t req_ramrod_res_phys; + struct ecore_spq_entry *p_ent; + enum _ecore_status_t rc = ECORE_SUCCESS; + bool error_flag; + + if (!qp->req_offloaded) + { + /* Don't send query qp for the requester */ + out_params->sq_psn = qp->sq_psn; + out_params->draining = false; + + *sq_draining = 0; + + return ECORE_SUCCESS; + } + + /* Send a query requester ramrod to the FW */ + p_req_ramrod_res = (struct roce_query_qp_req_output_params *) + OSAL_DMA_ALLOC_COHERENT(p_hwfn->p_dev, &req_ramrod_res_phys, + sizeof(*p_req_ramrod_res)); + if (!p_req_ramrod_res) + { + DP_NOTICE(p_hwfn, false, + "ecore query qp failed: cannot allocate memory (ramrod). rc = %d\n", + rc); + return ECORE_NOMEM; + } + + /* Get SPQ entry */ + init_data.cid = qp->icid + 1; + rc = ecore_sp_init_request(p_hwfn, &p_ent, ROCE_RAMROD_QUERY_QP, + PROTOCOLID_ROCE, &init_data); + if (rc != ECORE_SUCCESS) + goto err; + + p_req_ramrod = &p_ent->ramrod.roce_query_qp_req; + DMA_REGPAIR_LE(p_req_ramrod->output_params_addr, req_ramrod_res_phys); + + rc = ecore_spq_post(p_hwfn, p_ent, OSAL_NULL); + if (rc != ECORE_SUCCESS) + goto err; + + out_params->sq_psn = OSAL_LE32_TO_CPU(p_req_ramrod_res->psn); + error_flag = GET_FIELD(OSAL_LE32_TO_CPU(p_req_ramrod_res->flags), + ROCE_QUERY_QP_REQ_OUTPUT_PARAMS_ERR_FLG); + if (error_flag) + qp->cur_state = ECORE_ROCE_QP_STATE_ERR; + else + *sq_draining = GET_FIELD( + OSAL_LE32_TO_CPU(p_req_ramrod_res->flags), + ROCE_QUERY_QP_REQ_OUTPUT_PARAMS_SQ_DRAINING_FLG); + +err: + OSAL_DMA_FREE_COHERENT(p_hwfn->p_dev, p_req_ramrod_res, + req_ramrod_res_phys, sizeof(*p_req_ramrod_res)); + + return rc; +} + +enum _ecore_status_t ecore_roce_query_qp( + struct ecore_hwfn *p_hwfn, + struct ecore_rdma_qp *qp, + struct ecore_rdma_query_qp_out_params *out_params) +{ + enum _ecore_status_t rc; + + rc = ecore_roce_sp_query_responder(p_hwfn, qp, out_params); + if (rc) + return rc; + + rc = ecore_roce_sp_query_requester(p_hwfn, qp, out_params, + &out_params->draining); + if (rc) + return rc; + + out_params->state = qp->cur_state; + + return ECORE_SUCCESS; +} + +enum _ecore_status_t ecore_roce_destroy_qp(struct ecore_hwfn *p_hwfn, + struct ecore_rdma_qp *qp, + struct ecore_rdma_destroy_qp_out_params *out_params) +{ + u32 cq_prod_resp = qp->cq_prod.resp, cq_prod_req = qp->cq_prod.req; + u32 num_invalidated_mw = 0; + u32 num_bound_mw = 0; + enum _ecore_status_t rc; + + /* Destroys the specified QP + * Note: if qp state != RESET/ERR/INIT then upper driver first need to + * call modify qp to move the qp to ERR state + */ + if ((qp->cur_state != ECORE_ROCE_QP_STATE_RESET) && + (qp->cur_state != ECORE_ROCE_QP_STATE_ERR) && + (qp->cur_state != ECORE_ROCE_QP_STATE_INIT)) + { + DP_NOTICE(p_hwfn, + true, + "QP must be in error, reset or init state before destroying it\n"); + return ECORE_INVAL; + } + + if (qp->cur_state != ECORE_ROCE_QP_STATE_RESET) { + rc = ecore_roce_sp_destroy_qp_responder(p_hwfn, + qp, + &num_invalidated_mw, + &cq_prod_resp); + if (rc != ECORE_SUCCESS) + return rc; + + /* Send destroy requester ramrod */ + rc = ecore_roce_sp_destroy_qp_requester(p_hwfn, qp, + &num_bound_mw, + &cq_prod_req); + if (rc != ECORE_SUCCESS) + return rc; + + /* resp_ofload was true, num_invalidated_mw is valid */ + if (num_invalidated_mw != num_bound_mw) { + DP_NOTICE(p_hwfn, + true, + "number of invalidate memory windows is different from bounded ones\n"); + return ECORE_INVAL; + } + } + + ecore_roce_free_qp(p_hwfn, qp->qp_idx); + + out_params->rq_cq_prod = cq_prod_resp; + out_params->sq_cq_prod = cq_prod_req; + + return ECORE_SUCCESS; +} + +enum _ecore_status_t ecore_roce_destroy_ud_qp(void *rdma_cxt, u16 cid) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)rdma_cxt; + struct ecore_sp_init_data init_data; + struct ecore_spq_entry *p_ent; + enum _ecore_status_t rc; + + if (!rdma_cxt) { + DP_ERR(p_hwfn->p_dev, + "destroy ud qp failed due to NULL rdma_cxt\n"); + return ECORE_INVAL; + } + + /* Get SPQ entry */ + OSAL_MEMSET(&init_data, 0, sizeof(init_data)); + init_data.cid = cid; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = ECORE_SPQ_MODE_EBLOCK; + rc = ecore_sp_init_request(p_hwfn, &p_ent, ROCE_RAMROD_DESTROY_UD_QP, + PROTOCOLID_ROCE, &init_data); + if (rc != ECORE_SUCCESS) + goto err; + + rc = ecore_spq_post(p_hwfn, p_ent, OSAL_NULL); + if (rc != ECORE_SUCCESS) + goto err; + + ecore_roce_free_qp(p_hwfn, ECORE_ROCE_ICID_TO_QP(cid)); + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "freed a ud qp with cid=%d\n", cid); + + return ECORE_SUCCESS; + +err: + DP_ERR(p_hwfn, "failed destroying a ud qp with cid=%d\n", cid); + + return rc; +} + + +enum _ecore_status_t ecore_roce_create_ud_qp(void *rdma_cxt, + struct ecore_rdma_create_qp_out_params *out_params) +{ + struct ecore_hwfn *p_hwfn = (struct ecore_hwfn *)rdma_cxt; + struct ecore_sp_init_data init_data; + struct ecore_spq_entry *p_ent; + enum _ecore_status_t rc; + u16 icid, qp_idx; + + if (!rdma_cxt || !out_params) { + DP_ERR(p_hwfn->p_dev, + "ecore roce create ud qp failed due to NULL entry (rdma_cxt=%p, out=%p)\n", + rdma_cxt, out_params); + return ECORE_INVAL; + } + + rc = ecore_roce_alloc_qp_idx(p_hwfn, &qp_idx); + if (rc != ECORE_SUCCESS) + goto err; + + icid = ECORE_ROCE_QP_TO_ICID(qp_idx); + + /* Get SPQ entry */ + OSAL_MEMSET(&init_data, 0, sizeof(init_data)); + init_data.cid = icid; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = ECORE_SPQ_MODE_EBLOCK; + rc = ecore_sp_init_request(p_hwfn, &p_ent, ROCE_RAMROD_CREATE_UD_QP, + PROTOCOLID_ROCE, &init_data); + if (rc != ECORE_SUCCESS) + goto err1; + + rc = ecore_spq_post(p_hwfn, p_ent, OSAL_NULL); + if (rc != ECORE_SUCCESS) + goto err1; + + out_params->icid = icid; + out_params->qp_id = ((0xFF << 16) | icid); + + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "created a ud qp with icid=%d\n", + icid); + + return ECORE_SUCCESS; + +err1: + ecore_roce_free_qp(p_hwfn, qp_idx); + +err: + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "failed creating a ud qp\n"); + + return rc; +} + + +enum _ecore_status_t +ecore_roce_modify_qp(struct ecore_hwfn *p_hwfn, + struct ecore_rdma_qp *qp, + enum ecore_roce_qp_state prev_state, + struct ecore_rdma_modify_qp_in_params *params) +{ + u32 num_invalidated_mw = 0, num_bound_mw = 0; + enum _ecore_status_t rc = ECORE_SUCCESS; + + /* Perform additional operations according to the current state and the + * next state + */ + if (((prev_state == ECORE_ROCE_QP_STATE_INIT) || + (prev_state == ECORE_ROCE_QP_STATE_RESET)) && + (qp->cur_state == ECORE_ROCE_QP_STATE_RTR)) + { + /* Init->RTR or Reset->RTR */ + + /* Verify the cid bits that of this qp index are clear */ + rc = ecore_roce_wait_free_cids(p_hwfn, qp->qp_idx); + if (rc) + return rc; + + rc = ecore_roce_sp_create_responder(p_hwfn, qp); + return rc; + + } else if ((prev_state == ECORE_ROCE_QP_STATE_RTR) && + (qp->cur_state == ECORE_ROCE_QP_STATE_RTS)) + { + /* RTR-> RTS */ + rc = ecore_roce_sp_create_requester(p_hwfn, qp); + if (rc != ECORE_SUCCESS) + return rc; + + /* Send modify responder ramrod */ + rc = ecore_roce_sp_modify_responder(p_hwfn, qp, false, + params->modify_flags); + return rc; + + } else if ((prev_state == ECORE_ROCE_QP_STATE_RTS) && + (qp->cur_state == ECORE_ROCE_QP_STATE_RTS)) + { + /* RTS->RTS */ + rc = ecore_roce_sp_modify_responder(p_hwfn, qp, false, + params->modify_flags); + if (rc != ECORE_SUCCESS) + return rc; + + rc = ecore_roce_sp_modify_requester(p_hwfn, qp, false, false, + params->modify_flags); + return rc; + + } else if ((prev_state == ECORE_ROCE_QP_STATE_RTS) && + (qp->cur_state == ECORE_ROCE_QP_STATE_SQD)) + { + /* RTS->SQD */ + rc = ecore_roce_sp_modify_requester(p_hwfn, qp, true, false, + params->modify_flags); + return rc; + + } else if ((prev_state == ECORE_ROCE_QP_STATE_SQD) && + (qp->cur_state == ECORE_ROCE_QP_STATE_SQD)) + { + /* SQD->SQD */ + rc = ecore_roce_sp_modify_responder(p_hwfn, qp, false, + params->modify_flags); + if (rc != ECORE_SUCCESS) + return rc; + + rc = ecore_roce_sp_modify_requester(p_hwfn, qp, false, false, + params->modify_flags); + return rc; + + } else if ((prev_state == ECORE_ROCE_QP_STATE_SQD) && + (qp->cur_state == ECORE_ROCE_QP_STATE_RTS)) + { + /* SQD->RTS */ + rc = ecore_roce_sp_modify_responder(p_hwfn, qp, false, + params->modify_flags); + if (rc != ECORE_SUCCESS) + return rc; + + rc = ecore_roce_sp_modify_requester(p_hwfn, qp, false, false, + params->modify_flags); + + return rc; + } else if (qp->cur_state == ECORE_ROCE_QP_STATE_ERR) { + /* ->ERR */ + rc = ecore_roce_sp_modify_responder(p_hwfn, qp, true, + params->modify_flags); + if (rc != ECORE_SUCCESS) + return rc; + + rc = ecore_roce_sp_modify_requester(p_hwfn, qp, false, true, + params->modify_flags); + return rc; + + } else if (qp->cur_state == ECORE_ROCE_QP_STATE_RESET) { + /* Any state -> RESET */ + + /* Send destroy responder ramrod */ + rc = ecore_roce_sp_destroy_qp_responder(p_hwfn, qp, + &num_invalidated_mw, + &qp->cq_prod.resp); + + if (rc != ECORE_SUCCESS) + return rc; + + rc = ecore_roce_sp_destroy_qp_requester(p_hwfn, qp, + &num_bound_mw, + &qp->cq_prod.req); + + + if (rc != ECORE_SUCCESS) + return rc; + + if (num_invalidated_mw != num_bound_mw) { + DP_NOTICE(p_hwfn, + true, + "number of invalidate memory windows is different from bounded ones\n"); + return ECORE_INVAL; + } + } else { + DP_VERBOSE(p_hwfn, ECORE_MSG_RDMA, "ECORE_SUCCESS\n"); + } + + return rc; +} + +static void ecore_roce_free_icid(struct ecore_hwfn *p_hwfn, u16 icid) +{ + struct ecore_rdma_info *p_rdma_info = p_hwfn->p_rdma_info; + u32 start_cid, cid; + + start_cid = ecore_cxt_get_proto_cid_start(p_hwfn, p_rdma_info->proto); + cid = icid - start_cid; + + OSAL_SPIN_LOCK(&p_rdma_info->lock); + + ecore_bmap_release_id(p_hwfn, &p_rdma_info->cid_map, cid); + + OSAL_SPIN_UNLOCK(&p_hwfn->p_rdma_info->lock); +} + +static void ecore_rdma_dpm_conf(struct ecore_hwfn *p_hwfn, + struct ecore_ptt *p_ptt) +{ + u32 val; + + val = (p_hwfn->dcbx_no_edpm || p_hwfn->db_bar_no_edpm) ? 0 : 1; + + ecore_wr(p_hwfn, p_ptt, DORQ_REG_PF_DPM_ENABLE, val); + DP_VERBOSE(p_hwfn, (ECORE_MSG_DCB | ECORE_MSG_RDMA), + "Changing DPM_EN state to %d (DCBX=%d, DB_BAR=%d)\n", + val, p_hwfn->dcbx_no_edpm, p_hwfn->db_bar_no_edpm); +} + +/* This function disables EDPM due to DCBx considerations */ +void ecore_roce_dpm_dcbx(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt) +{ + u8 val; + + /* if any QPs are already active, we want to disable DPM, since their + * context information contains information from before the latest DCBx + * update. Otherwise enable it. + */ + val = (ecore_rdma_allocated_qps(p_hwfn)) ? true : false; + p_hwfn->dcbx_no_edpm = (u8)val; + + ecore_rdma_dpm_conf(p_hwfn, p_ptt); +} + +/* This function disables EDPM due to doorbell bar considerations */ +void ecore_rdma_dpm_bar(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt) +{ + p_hwfn->db_bar_no_edpm = true; + + ecore_rdma_dpm_conf(p_hwfn, p_ptt); +} + +enum _ecore_status_t ecore_roce_setup(struct ecore_hwfn *p_hwfn) +{ + return ecore_spq_register_async_cb(p_hwfn, PROTOCOLID_ROCE, + ecore_roce_async_event); +} + +#ifdef _NTDDK_ +#pragma warning(pop) +#endif diff --git a/sys/dev/qlnx/qlnxe/qlnx_rdma.c b/sys/dev/qlnx/qlnxe/qlnx_rdma.c new file mode 100644 index 000000000000..dc105e1e9e45 --- /dev/null +++ b/sys/dev/qlnx/qlnxe/qlnx_rdma.c @@ -0,0 +1,347 @@ +/* + * Copyright (c) 2018-2019 Cavium, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * File : qlnx_rdma.c + * Author: David C Somayajulu + */ +#include +__FBSDID("$FreeBSD$"); + + +#include "qlnx_os.h" +#include "bcm_osal.h" + +#include "reg_addr.h" +#include "ecore_gtt_reg_addr.h" +#include "ecore.h" +#include "ecore_chain.h" +#include "ecore_status.h" +#include "ecore_hw.h" +#include "ecore_rt_defs.h" +#include "ecore_init_ops.h" +#include "ecore_int.h" +#include "ecore_cxt.h" +#include "ecore_spq.h" +#include "ecore_init_fw_funcs.h" +#include "ecore_sp_commands.h" +#include "ecore_dev_api.h" +#include "ecore_l2_api.h" +#ifdef CONFIG_ECORE_SRIOV +#include "ecore_sriov.h" +#include "ecore_vf.h" +#endif +#ifdef CONFIG_ECORE_LL2 +#include "ecore_ll2.h" +#endif +#ifdef CONFIG_ECORE_FCOE +#include "ecore_fcoe.h" +#endif +#ifdef CONFIG_ECORE_ISCSI +#include "ecore_iscsi.h" +#endif +#include "ecore_mcp.h" +#include "ecore_hw_defs.h" +#include "mcp_public.h" + +#ifdef CONFIG_ECORE_RDMA +#include "ecore_rdma.h" +#endif + +#ifdef CONFIG_ECORE_ROCE +#include "ecore_roce.h" +#endif + +#ifdef CONFIG_ECORE_IWARP +#include "ecore_iwarp.h" +#endif + +#include "ecore_iro.h" +#include "nvm_cfg.h" +#include "ecore_dev_api.h" +#include "ecore_dbg_fw_funcs.h" + +#include "qlnx_ioctl.h" +#include "qlnx_def.h" +#include "qlnx_rdma.h" +#include "qlnx_ver.h" +#include + +struct mtx qlnx_rdma_dev_lock; +struct qlnx_rdma_if *qlnx_rdma_if = NULL; + +qlnx_host_t *qlnx_host_list = NULL; + +void +qlnx_rdma_init(void) +{ + if (!mtx_initialized(&qlnx_rdma_dev_lock)) { + mtx_init(&qlnx_rdma_dev_lock, "qlnx_rdma_dev_lock", NULL, MTX_DEF); + } + return; +} + +void +qlnx_rdma_deinit(void) +{ + if (mtx_initialized(&qlnx_rdma_dev_lock) && (qlnx_host_list == NULL)) { + mtx_destroy(&qlnx_rdma_dev_lock); + } + return; +} + +static void +_qlnx_rdma_dev_add(struct qlnx_host *ha) +{ + QL_DPRINT12(ha, "enter ha = %p qlnx_rdma_if = %p\n", ha, qlnx_rdma_if); + + if (qlnx_rdma_if == NULL) + return; + + if (ha->personality != ECORE_PCI_ETH_IWARP && + ha->personality != ECORE_PCI_ETH_ROCE) + return; + + ha->qlnx_rdma = qlnx_rdma_if->add(ha); + + QL_DPRINT12(ha, "exit (ha = %p, qlnx_rdma = %p)\n", ha, ha->qlnx_rdma); + return; +} + +void +qlnx_rdma_dev_add(struct qlnx_host *ha) +{ + QL_DPRINT12(ha, "enter ha = %p\n", ha); + + if (ha->personality != ECORE_PCI_ETH_IWARP && + ha->personality != ECORE_PCI_ETH_ROCE) + return; + + mtx_lock(&qlnx_rdma_dev_lock); + + if (qlnx_host_list == NULL) { + qlnx_host_list = ha; + ha->next = NULL; + } else { + ha->next = qlnx_host_list; + qlnx_host_list = ha; + } + + mtx_unlock(&qlnx_rdma_dev_lock); + + _qlnx_rdma_dev_add(ha); + + QL_DPRINT12(ha, "exit (%p)\n", ha); + + return; +} + +static int +_qlnx_rdma_dev_remove(struct qlnx_host *ha) +{ + int ret = 0; + + QL_DPRINT12(ha, "enter ha = %p qlnx_rdma_if = %p\n", ha, qlnx_rdma_if); + + if (qlnx_rdma_if == NULL) + return (ret); + + if (ha->personality != ECORE_PCI_ETH_IWARP && + ha->personality != ECORE_PCI_ETH_ROCE) + return (ret); + + ret = qlnx_rdma_if->remove(ha, ha->qlnx_rdma); + + QL_DPRINT12(ha, "exit ha = %p qlnx_rdma_if = %p\n", ha, qlnx_rdma_if); + return (ret); +} + +int +qlnx_rdma_dev_remove(struct qlnx_host *ha) +{ + int ret = 0; + qlnx_host_t *ha_prev; + qlnx_host_t *ha_cur; + + QL_DPRINT12(ha, "enter ha = %p\n", ha); + + if ((qlnx_host_list == NULL) || (ha == NULL)) + return (ret); + + if (ha->personality != ECORE_PCI_ETH_IWARP && + ha->personality != ECORE_PCI_ETH_ROCE) + return (ret); + + ret = _qlnx_rdma_dev_remove(ha); + + if (ret) + return (ret); + + mtx_lock(&qlnx_rdma_dev_lock); + + if (qlnx_host_list == ha) { + qlnx_host_list = ha->next; + ha->next = NULL; + mtx_unlock(&qlnx_rdma_dev_lock); + QL_DPRINT12(ha, "exit0 ha = %p\n", ha); + return (ret); + } + + ha_prev = ha_cur = qlnx_host_list; + + while ((ha_cur != ha) && (ha_cur != NULL)) { + ha_prev = ha_cur; + ha_cur = ha_cur->next; + } + + if (ha_cur == ha) { + ha_prev = ha->next; + ha->next = NULL; + } + + mtx_unlock(&qlnx_rdma_dev_lock); + + QL_DPRINT12(ha, "exit1 ha = %p\n", ha); + return (ret); +} + +int +qlnx_rdma_register_if(qlnx_rdma_if_t *rdma_if) +{ + qlnx_host_t *ha; + + if (mtx_initialized(&qlnx_rdma_dev_lock)) { + + mtx_lock(&qlnx_rdma_dev_lock); + qlnx_rdma_if = rdma_if; + + ha = qlnx_host_list; + + while (ha != NULL) { + _qlnx_rdma_dev_add(ha); + ha = ha->next; + } + + mtx_unlock(&qlnx_rdma_dev_lock); + + return (0); + } + + return (-1); +} + +int +qlnx_rdma_deregister_if(qlnx_rdma_if_t *rdma_if) +{ + int ret = 0; + qlnx_host_t *ha; + + printf("%s: enter rdma_if = %p\n", __func__, rdma_if); + + if (mtx_initialized(&qlnx_rdma_dev_lock)) { + + mtx_lock(&qlnx_rdma_dev_lock); + + ha = qlnx_host_list; + + while (ha != NULL) { + + mtx_unlock(&qlnx_rdma_dev_lock); + + if (ha->dbg_level & 0xF000) + ret = EBUSY; + else + ret = _qlnx_rdma_dev_remove(ha); + + device_printf(ha->pci_dev, "%s [%d]: ret = 0x%x\n", + __func__, __LINE__, ret); + if (ret) + return (ret); + + mtx_lock(&qlnx_rdma_dev_lock); + + ha->qlnx_rdma = NULL; + + ha = ha->next; + } + + if (!ret) + qlnx_rdma_if = NULL; + + mtx_unlock(&qlnx_rdma_dev_lock); + + } + printf("%s: exit rdma_if = %p\n", __func__, rdma_if); + + return (ret); +} + + +void +qlnx_rdma_dev_open(struct qlnx_host *ha) +{ + QL_DPRINT12(ha, "enter ha = %p qlnx_rdma_if = %p\n", ha, qlnx_rdma_if); + + if (qlnx_rdma_if == NULL) + return; + + if (ha->personality != ECORE_PCI_ETH_IWARP && + ha->personality != ECORE_PCI_ETH_ROCE) + return; + + qlnx_rdma_if->notify(ha, ha->qlnx_rdma, QLNX_ETHDEV_UP); + + QL_DPRINT12(ha, "exit ha = %p qlnx_rdma_if = %p\n", ha, qlnx_rdma_if); + return; +} + + +void +qlnx_rdma_dev_close(struct qlnx_host *ha) +{ + QL_DPRINT12(ha, "enter ha = %p qlnx_rdma_if = %p\n", ha, qlnx_rdma_if); + + if (qlnx_rdma_if == NULL) + return; + + if (ha->personality != ECORE_PCI_ETH_IWARP && + ha->personality != ECORE_PCI_ETH_ROCE) + return; + + qlnx_rdma_if->notify(ha, ha->qlnx_rdma, QLNX_ETHDEV_DOWN); + + QL_DPRINT12(ha, "exit ha = %p qlnx_rdma_if = %p\n", ha, qlnx_rdma_if); + return; +} + +int +qlnx_rdma_get_num_irqs(struct qlnx_host *ha) +{ + return (QLNX_NUM_CNQ + ecore_rdma_get_sb_id(&ha->cdev.hwfns[0], 0) + 2); +} + + diff --git a/sys/dev/qlnx/qlnxe/qlnx_rdma.h b/sys/dev/qlnx/qlnxe/qlnx_rdma.h new file mode 100644 index 000000000000..9b3526a9e8d7 --- /dev/null +++ b/sys/dev/qlnx/qlnxe/qlnx_rdma.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2018-2019 Cavium, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ + + +/* + * File: qlnx_rdma.h + * Author: David C Somayajulu + */ + +#ifndef _QLNX_RDMA_H_ +#define _QLNX_RDMA_H_ + +enum qlnx_rdma_event { + QLNX_ETHDEV_UP = 0x10, + QLNX_ETHDEV_DOWN = 0x11, + QLNX_ETHDEV_CHANGE_ADDR = 0x12 +}; + +struct qlnx_rdma_if { + void * (*add)(void *ha); + int (*remove)(void *ha, void *qlnx_rdma_dev); + void (*notify)(void *ha, void *qlnx_rdma_dev, enum qlnx_rdma_event); +}; +typedef struct qlnx_rdma_if qlnx_rdma_if_t; + +extern int qlnx_rdma_register_if(qlnx_rdma_if_t *rdma_if); +extern int qlnx_rdma_deregister_if(qlnx_rdma_if_t *rdma_if); +extern int qlnx_rdma_ll2_set_mac_filter(void *rdma_ctx, uint8_t *old_mac_address, + uint8_t *new_mac_address); + +#define QLNX_NUM_CNQ 1 + +extern int qlnx_rdma_get_num_irqs(struct qlnx_host *ha); +extern void qlnx_rdma_dev_add(struct qlnx_host *ha); +extern void qlnx_rdma_dev_open(struct qlnx_host *ha); +extern void qlnx_rdma_dev_close(struct qlnx_host *ha); +extern int qlnx_rdma_dev_remove(struct qlnx_host *ha); +extern void qlnx_rdma_changeaddr(struct qlnx_host *ha); + +extern void qlnx_rdma_init(void); +extern void qlnx_rdma_deinit(void); + +#endif /* #ifndef _QLNX_RDMA_H_ */ diff --git a/sys/dev/qlnx/qlnxr/qlnxr_cm.c b/sys/dev/qlnx/qlnxr/qlnxr_cm.c new file mode 100644 index 000000000000..23c8c3000765 --- /dev/null +++ b/sys/dev/qlnx/qlnxr/qlnxr_cm.c @@ -0,0 +1,887 @@ +/* + * Copyright (c) 2018-2019 Cavium, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include "qlnxr_def.h" +#include "rdma_common.h" +#include "qlnxr_cm.h" + +void +qlnxr_inc_sw_gsi_cons(struct qlnxr_qp_hwq_info *info) +{ + info->gsi_cons = (info->gsi_cons + 1) % info->max_wr; +} + +void +qlnxr_store_gsi_qp_cq(struct qlnxr_dev *dev, + struct qlnxr_qp *qp, + struct ib_qp_init_attr *attrs) +{ + QL_DPRINT12(dev->ha, "enter\n"); + + dev->gsi_qp_created = 1; + dev->gsi_sqcq = get_qlnxr_cq((attrs->send_cq)); + dev->gsi_rqcq = get_qlnxr_cq((attrs->recv_cq)); + dev->gsi_qp = qp; + + QL_DPRINT12(dev->ha, "exit\n"); + + return; +} + +void +qlnxr_ll2_complete_tx_packet(void *cxt, + uint8_t connection_handle, + void *cookie, + dma_addr_t first_frag_addr, + bool b_last_fragment, + bool b_last_packet) +{ + struct qlnxr_dev *dev = (struct qlnxr_dev *)cxt; + struct ecore_roce_ll2_packet *pkt = cookie; + struct qlnxr_cq *cq = dev->gsi_sqcq; + struct qlnxr_qp *qp = dev->gsi_qp; + unsigned long flags; + + QL_DPRINT12(dev->ha, "enter\n"); + + qlnx_dma_free_coherent(&dev->ha->cdev, pkt->header.vaddr, + pkt->header.baddr, pkt->header.len); + kfree(pkt); + + spin_lock_irqsave(&qp->q_lock, flags); + + qlnxr_inc_sw_gsi_cons(&qp->sq); + + spin_unlock_irqrestore(&qp->q_lock, flags); + + if (cq->ibcq.comp_handler) + (*cq->ibcq.comp_handler) (&cq->ibcq, cq->ibcq.cq_context); + + QL_DPRINT12(dev->ha, "exit\n"); + + return; +} + +void +qlnxr_ll2_complete_rx_packet(void *cxt, + struct ecore_ll2_comp_rx_data *data) +{ + struct qlnxr_dev *dev = (struct qlnxr_dev *)cxt; + struct qlnxr_cq *cq = dev->gsi_rqcq; + // struct qlnxr_qp *qp = dev->gsi_qp; + struct qlnxr_qp *qp = NULL; + unsigned long flags; + uint32_t qp_num = 0; + // uint32_t delay_count = 0, gsi_cons = 0; + //void * dest_va; + + QL_DPRINT12(dev->ha, "enter\n"); + + if (data->u.data_length_error) { + /* TODO: add statistic */ + } + + if (data->cookie == NULL) { + QL_DPRINT12(dev->ha, "cookie is NULL, bad sign\n"); + } + + qp_num = (0xFF << 16) | data->qp_id; + + if (data->qp_id == 1) { + qp = dev->gsi_qp; + } else { + /* TODO: This will be needed for UD QP support */ + /* For RoCEv1 this is invalid */ + QL_DPRINT12(dev->ha, "invalid QP\n"); + return; + } + /* note: currently only one recv sg is supported */ + QL_DPRINT12(dev->ha, "MAD received on QP : %x\n", data->rx_buf_addr); + + spin_lock_irqsave(&qp->q_lock, flags); + + qp->rqe_wr_id[qp->rq.gsi_cons].rc = + data->u.data_length_error ? -EINVAL : 0; + qp->rqe_wr_id[qp->rq.gsi_cons].vlan_id = data->vlan; + /* note: length stands for data length i.e. GRH is excluded */ + qp->rqe_wr_id[qp->rq.gsi_cons].sg_list[0].length = + data->length.data_length; + *((u32 *)&qp->rqe_wr_id[qp->rq.gsi_cons].smac[0]) = + ntohl(data->opaque_data_0); + *((u16 *)&qp->rqe_wr_id[qp->rq.gsi_cons].smac[4]) = + ntohs((u16)data->opaque_data_1); + + qlnxr_inc_sw_gsi_cons(&qp->rq); + + spin_unlock_irqrestore(&qp->q_lock, flags); + + if (cq->ibcq.comp_handler) + (*cq->ibcq.comp_handler) (&cq->ibcq, cq->ibcq.cq_context); + + QL_DPRINT12(dev->ha, "exit\n"); + + return; +} + +void qlnxr_ll2_release_rx_packet(void *cxt, + u8 connection_handle, + void *cookie, + dma_addr_t rx_buf_addr, + bool b_last_packet) +{ + /* Do nothing... */ +} + +static void +qlnxr_destroy_gsi_cq(struct qlnxr_dev *dev, + struct ib_qp_init_attr *attrs) +{ + struct ecore_rdma_destroy_cq_in_params iparams; + struct ecore_rdma_destroy_cq_out_params oparams; + struct qlnxr_cq *cq; + + QL_DPRINT12(dev->ha, "enter\n"); + + cq = get_qlnxr_cq((attrs->send_cq)); + iparams.icid = cq->icid; + ecore_rdma_destroy_cq(dev->rdma_ctx, &iparams, &oparams); + ecore_chain_free(&dev->ha->cdev, &cq->pbl); + + cq = get_qlnxr_cq((attrs->recv_cq)); + /* if a dedicated recv_cq was used, delete it too */ + if (iparams.icid != cq->icid) { + iparams.icid = cq->icid; + ecore_rdma_destroy_cq(dev->rdma_ctx, &iparams, &oparams); + ecore_chain_free(&dev->ha->cdev, &cq->pbl); + } + + QL_DPRINT12(dev->ha, "exit\n"); + + return; +} + +static inline int +qlnxr_check_gsi_qp_attrs(struct qlnxr_dev *dev, + struct ib_qp_init_attr *attrs) +{ + QL_DPRINT12(dev->ha, "enter\n"); + + if (attrs->cap.max_recv_sge > QLNXR_GSI_MAX_RECV_SGE) { + QL_DPRINT11(dev->ha, + "(attrs->cap.max_recv_sge > QLNXR_GSI_MAX_RECV_SGE)\n"); + return -EINVAL; + } + + if (attrs->cap.max_recv_wr > QLNXR_GSI_MAX_RECV_WR) { + QL_DPRINT11(dev->ha, + "(attrs->cap.max_recv_wr > QLNXR_GSI_MAX_RECV_WR)\n"); + return -EINVAL; + } + + if (attrs->cap.max_send_wr > QLNXR_GSI_MAX_SEND_WR) { + QL_DPRINT11(dev->ha, + "(attrs->cap.max_send_wr > QLNXR_GSI_MAX_SEND_WR)\n"); + return -EINVAL; + } + + QL_DPRINT12(dev->ha, "exit\n"); + + return 0; +} + + +static int +qlnxr_ll2_post_tx(struct qlnxr_dev *dev, struct ecore_roce_ll2_packet *pkt) +{ + enum ecore_ll2_roce_flavor_type roce_flavor; + struct ecore_ll2_tx_pkt_info ll2_tx_pkt; + int rc; + int i; + + QL_DPRINT12(dev->ha, "enter\n"); + + memset(&ll2_tx_pkt, 0, sizeof(ll2_tx_pkt)); + + if (pkt->roce_mode != ROCE_V1) { + QL_DPRINT11(dev->ha, "roce_mode != ROCE_V1\n"); + return (-1); + } + + roce_flavor = (pkt->roce_mode == ROCE_V1) ? + ECORE_LL2_ROCE : ECORE_LL2_RROCE; + + ll2_tx_pkt.num_of_bds = 1 /* hdr */ + pkt->n_seg; + ll2_tx_pkt.vlan = 0; /* ??? */ + ll2_tx_pkt.tx_dest = ECORE_LL2_TX_DEST_NW; + ll2_tx_pkt.ecore_roce_flavor = roce_flavor; + ll2_tx_pkt.first_frag = pkt->header.baddr; + ll2_tx_pkt.first_frag_len = pkt->header.len; + ll2_tx_pkt.cookie = pkt; + ll2_tx_pkt.enable_ip_cksum = 1; // Only for RoCEv2:IPv4 + + /* tx header */ + rc = ecore_ll2_prepare_tx_packet(dev->rdma_ctx, + dev->gsi_ll2_handle, + &ll2_tx_pkt, + 1); + if (rc) { + + QL_DPRINT11(dev->ha, "ecore_ll2_prepare_tx_packet failed\n"); + + /* TX failed while posting header - release resources*/ + qlnx_dma_free_coherent(&dev->ha->cdev, + pkt->header.vaddr, + pkt->header.baddr, + pkt->header.len); + + kfree(pkt); + + return rc; + } + + /* tx payload */ + for (i = 0; i < pkt->n_seg; i++) { + rc = ecore_ll2_set_fragment_of_tx_packet(dev->rdma_ctx, + dev->gsi_ll2_handle, + pkt->payload[i].baddr, + pkt->payload[i].len); + if (rc) { + /* if failed not much to do here, partial packet has + * been posted we can't free memory, will need to wait + * for completion + */ + QL_DPRINT11(dev->ha, + "ecore_ll2_set_fragment_of_tx_packet failed\n"); + return rc; + } + } + struct ecore_ll2_stats stats = {0}; + rc = ecore_ll2_get_stats(dev->rdma_ctx, dev->gsi_ll2_handle, &stats); + if (rc) { + QL_DPRINT11(dev->ha, "failed to obtain ll2 stats\n"); + } + QL_DPRINT12(dev->ha, "exit\n"); + + return 0; +} + +int +qlnxr_ll2_stop(struct qlnxr_dev *dev) +{ + int rc; + + QL_DPRINT12(dev->ha, "enter\n"); + + if (dev->gsi_ll2_handle == 0xFF) + return 0; + + /* remove LL2 MAC address filter */ + rc = qlnx_rdma_ll2_set_mac_filter(dev->rdma_ctx, + dev->gsi_ll2_mac_address, NULL); + + rc = ecore_ll2_terminate_connection(dev->rdma_ctx, + dev->gsi_ll2_handle); + + ecore_ll2_release_connection(dev->rdma_ctx, dev->gsi_ll2_handle); + + dev->gsi_ll2_handle = 0xFF; + + QL_DPRINT12(dev->ha, "exit rc = %d\n", rc); + return rc; +} + +int qlnxr_ll2_start(struct qlnxr_dev *dev, + struct ib_qp_init_attr *attrs, + struct qlnxr_qp *qp) +{ + struct ecore_ll2_acquire_data data; + struct ecore_ll2_cbs cbs; + int rc; + + QL_DPRINT12(dev->ha, "enter\n"); + + /* configure and start LL2 */ + cbs.rx_comp_cb = qlnxr_ll2_complete_rx_packet; + cbs.tx_comp_cb = qlnxr_ll2_complete_tx_packet; + cbs.rx_release_cb = qlnxr_ll2_release_rx_packet; + cbs.tx_release_cb = qlnxr_ll2_complete_tx_packet; + cbs.cookie = dev; + dev->gsi_ll2_handle = 0xFF; + + memset(&data, 0, sizeof(data)); + data.input.conn_type = ECORE_LL2_TYPE_ROCE; + data.input.mtu = dev->ha->ifp->if_mtu; + data.input.rx_num_desc = 8 * 1024; + data.input.rx_drop_ttl0_flg = 1; + data.input.rx_vlan_removal_en = 0; + data.input.tx_num_desc = 8 * 1024; + data.input.tx_tc = 0; + data.input.tx_dest = ECORE_LL2_TX_DEST_NW; + data.input.ai_err_packet_too_big = ECORE_LL2_DROP_PACKET; + data.input.ai_err_no_buf = ECORE_LL2_DROP_PACKET; + data.input.gsi_enable = 1; + data.p_connection_handle = &dev->gsi_ll2_handle; + data.cbs = &cbs; + + rc = ecore_ll2_acquire_connection(dev->rdma_ctx, &data); + + if (rc) { + QL_DPRINT11(dev->ha, + "ecore_ll2_acquire_connection failed: %d\n", + rc); + return rc; + } + + QL_DPRINT11(dev->ha, + "ll2 connection acquired successfully\n"); + rc = ecore_ll2_establish_connection(dev->rdma_ctx, + dev->gsi_ll2_handle); + + if (rc) { + QL_DPRINT11(dev->ha, + "ecore_ll2_establish_connection failed\n", rc); + goto err1; + } + + QL_DPRINT11(dev->ha, + "ll2 connection established successfully\n"); + rc = qlnx_rdma_ll2_set_mac_filter(dev->rdma_ctx, NULL, + dev->ha->primary_mac); + if (rc) { + QL_DPRINT11(dev->ha, "qlnx_rdma_ll2_set_mac_filter failed\n", rc); + goto err2; + } + + QL_DPRINT12(dev->ha, "exit rc = %d\n", rc); + return 0; + +err2: + ecore_ll2_terminate_connection(dev->rdma_ctx, dev->gsi_ll2_handle); +err1: + ecore_ll2_release_connection(dev->rdma_ctx, dev->gsi_ll2_handle); + + QL_DPRINT12(dev->ha, "exit rc = %d\n", rc); + return rc; +} + +struct ib_qp* +qlnxr_create_gsi_qp(struct qlnxr_dev *dev, + struct ib_qp_init_attr *attrs, + struct qlnxr_qp *qp) +{ + int rc; + + QL_DPRINT12(dev->ha, "enter\n"); + + rc = qlnxr_check_gsi_qp_attrs(dev, attrs); + + if (rc) { + QL_DPRINT11(dev->ha, "qlnxr_check_gsi_qp_attrs failed\n"); + return ERR_PTR(rc); + } + + rc = qlnxr_ll2_start(dev, attrs, qp); + if (rc) { + QL_DPRINT11(dev->ha, "qlnxr_ll2_start failed\n"); + return ERR_PTR(rc); + } + + /* create QP */ + qp->ibqp.qp_num = 1; + qp->rq.max_wr = attrs->cap.max_recv_wr; + qp->sq.max_wr = attrs->cap.max_send_wr; + + qp->rqe_wr_id = kzalloc(qp->rq.max_wr * sizeof(*qp->rqe_wr_id), + GFP_KERNEL); + if (!qp->rqe_wr_id) { + QL_DPRINT11(dev->ha, "(!qp->rqe_wr_id)\n"); + goto err; + } + + qp->wqe_wr_id = kzalloc(qp->sq.max_wr * sizeof(*qp->wqe_wr_id), + GFP_KERNEL); + if (!qp->wqe_wr_id) { + QL_DPRINT11(dev->ha, "(!qp->wqe_wr_id)\n"); + goto err; + } + + qlnxr_store_gsi_qp_cq(dev, qp, attrs); + memcpy(dev->gsi_ll2_mac_address, dev->ha->primary_mac, ETH_ALEN); + + /* the GSI CQ is handled by the driver so remove it from the FW */ + qlnxr_destroy_gsi_cq(dev, attrs); + dev->gsi_rqcq->cq_type = QLNXR_CQ_TYPE_GSI; + dev->gsi_rqcq->cq_type = QLNXR_CQ_TYPE_GSI; + + QL_DPRINT12(dev->ha, "exit &qp->ibqp = %p\n", &qp->ibqp); + + return &qp->ibqp; +err: + kfree(qp->rqe_wr_id); + + rc = qlnxr_ll2_stop(dev); + + QL_DPRINT12(dev->ha, "exit with error\n"); + + return ERR_PTR(-ENOMEM); +} + +int +qlnxr_destroy_gsi_qp(struct qlnxr_dev *dev) +{ + int rc = 0; + + QL_DPRINT12(dev->ha, "enter\n"); + + rc = qlnxr_ll2_stop(dev); + + QL_DPRINT12(dev->ha, "exit rc = %d\n", rc); + return (rc); +} + + +static inline bool +qlnxr_get_vlan_id_gsi(struct ib_ah_attr *ah_attr, u16 *vlan_id) +{ + u16 tmp_vlan_id; + union ib_gid *dgid = &ah_attr->grh.dgid; + + tmp_vlan_id = (dgid->raw[11] << 8) | dgid->raw[12]; + if (tmp_vlan_id < 0x1000) { + *vlan_id = tmp_vlan_id; + return true; + } else { + *vlan_id = 0; + return false; + } +} + +#define QLNXR_MAX_UD_HEADER_SIZE (100) +#define QLNXR_GSI_QPN (1) +static inline int +qlnxr_gsi_build_header(struct qlnxr_dev *dev, + struct qlnxr_qp *qp, + struct ib_send_wr *swr, + struct ib_ud_header *udh, + int *roce_mode) +{ + bool has_vlan = false, has_grh_ipv6 = true; + struct ib_ah_attr *ah_attr = &get_qlnxr_ah((ud_wr(swr)->ah))->attr; + struct ib_global_route *grh = &ah_attr->grh; + union ib_gid sgid; + int send_size = 0; + u16 vlan_id = 0; + u16 ether_type; + +#if __FreeBSD_version >= 1102000 + int rc = 0; + int ip_ver = 0; + bool has_udp = false; +#endif /* #if __FreeBSD_version >= 1102000 */ + + +#if !DEFINE_IB_AH_ATTR_WITH_DMAC + u8 mac[ETH_ALEN]; +#endif + int i; + + send_size = 0; + for (i = 0; i < swr->num_sge; ++i) + send_size += swr->sg_list[i].length; + + has_vlan = qlnxr_get_vlan_id_gsi(ah_attr, &vlan_id); + ether_type = ETH_P_ROCE; + *roce_mode = ROCE_V1; + if (grh->sgid_index < QLNXR_MAX_SGID) + sgid = dev->sgid_tbl[grh->sgid_index]; + else + sgid = dev->sgid_tbl[0]; + +#if __FreeBSD_version >= 1102000 + + rc = ib_ud_header_init(send_size, false /* LRH */, true /* ETH */, + has_vlan, has_grh_ipv6, ip_ver, has_udp, + 0 /* immediate */, udh); + + if (rc) { + QL_DPRINT11(dev->ha, "gsi post send: failed to init header\n"); + return rc; + } + +#else + ib_ud_header_init(send_size, false /* LRH */, true /* ETH */, + has_vlan, has_grh_ipv6, 0 /* immediate */, udh); + +#endif /* #if __FreeBSD_version >= 1102000 */ + + /* ENET + VLAN headers*/ +#if DEFINE_IB_AH_ATTR_WITH_DMAC + memcpy(udh->eth.dmac_h, ah_attr->dmac, ETH_ALEN); +#else + qlnxr_get_dmac(dev, ah_attr, mac); + memcpy(udh->eth.dmac_h, mac, ETH_ALEN); +#endif + memcpy(udh->eth.smac_h, dev->ha->primary_mac, ETH_ALEN); + if (has_vlan) { + udh->eth.type = htons(ETH_P_8021Q); + udh->vlan.tag = htons(vlan_id); + udh->vlan.type = htons(ether_type); + } else { + udh->eth.type = htons(ether_type); + } + + for (int j = 0; j < 4; j++) { + QL_DPRINT12(dev->ha, "destination mac: %x\n", + udh->eth.dmac_h[j]); + } + for (int j = 0; j < 4; j++) { + QL_DPRINT12(dev->ha, "source mac: %x\n", + udh->eth.smac_h[j]); + } + + QL_DPRINT12(dev->ha, "QP: %p, opcode: %d, wq: %lx, roce: %x, hops:%d," + "imm : %d, vlan :%d, AH: %p\n", + qp, swr->opcode, swr->wr_id, *roce_mode, grh->hop_limit, + 0, has_vlan, get_qlnxr_ah((ud_wr(swr)->ah))); + + if (has_grh_ipv6) { + /* GRH / IPv6 header */ + udh->grh.traffic_class = grh->traffic_class; + udh->grh.flow_label = grh->flow_label; + udh->grh.hop_limit = grh->hop_limit; + udh->grh.destination_gid = grh->dgid; + memcpy(&udh->grh.source_gid.raw, &sgid.raw, + sizeof(udh->grh.source_gid.raw)); + QL_DPRINT12(dev->ha, "header: tc: %x, flow_label : %x, " + "hop_limit: %x \n", udh->grh.traffic_class, + udh->grh.flow_label, udh->grh.hop_limit); + for (i = 0; i < 16; i++) { + QL_DPRINT12(dev->ha, "udh dgid = %x\n", udh->grh.destination_gid.raw[i]); + } + for (i = 0; i < 16; i++) { + QL_DPRINT12(dev->ha, "udh sgid = %x\n", udh->grh.source_gid.raw[i]); + } + udh->grh.next_header = 0x1b; + } +#ifdef DEFINE_IB_UD_HEADER_INIT_UDP_PRESENT + /* This is for RoCEv2 */ + else { + /* IPv4 header */ + u32 ipv4_addr; + + udh->ip4.protocol = IPPROTO_UDP; + udh->ip4.tos = htonl(grh->flow_label); + udh->ip4.frag_off = htons(IP_DF); + udh->ip4.ttl = grh->hop_limit; + + ipv4_addr = qedr_get_ipv4_from_gid(sgid.raw); + udh->ip4.saddr = ipv4_addr; + ipv4_addr = qedr_get_ipv4_from_gid(grh->dgid.raw); + udh->ip4.daddr = ipv4_addr; + /* note: checksum is calculated by the device */ + } +#endif + + /* BTH */ + udh->bth.solicited_event = !!(swr->send_flags & IB_SEND_SOLICITED); + udh->bth.pkey = QLNXR_ROCE_PKEY_DEFAULT;/* TODO: ib_get_cahced_pkey?! */ + //udh->bth.destination_qpn = htonl(ud_wr(swr)->remote_qpn); + udh->bth.destination_qpn = OSAL_CPU_TO_BE32(ud_wr(swr)->remote_qpn); + //udh->bth.psn = htonl((qp->sq_psn++) & ((1 << 24) - 1)); + udh->bth.psn = OSAL_CPU_TO_BE32((qp->sq_psn++) & ((1 << 24) - 1)); + udh->bth.opcode = IB_OPCODE_UD_SEND_ONLY; + + /* DETH */ + //udh->deth.qkey = htonl(0x80010000); /* qp->qkey */ /* TODO: what is?! */ + //udh->deth.source_qpn = htonl(QLNXR_GSI_QPN); + udh->deth.qkey = OSAL_CPU_TO_BE32(0x80010000); /* qp->qkey */ /* TODO: what is?! */ + udh->deth.source_qpn = OSAL_CPU_TO_BE32(QLNXR_GSI_QPN); + QL_DPRINT12(dev->ha, "exit\n"); + return 0; +} + +static inline int +qlnxr_gsi_build_packet(struct qlnxr_dev *dev, + struct qlnxr_qp *qp, struct ib_send_wr *swr, + struct ecore_roce_ll2_packet **p_packet) +{ + u8 ud_header_buffer[QLNXR_MAX_UD_HEADER_SIZE]; + struct ecore_roce_ll2_packet *packet; + int roce_mode, header_size; + struct ib_ud_header udh; + int i, rc; + + QL_DPRINT12(dev->ha, "enter\n"); + + *p_packet = NULL; + + rc = qlnxr_gsi_build_header(dev, qp, swr, &udh, &roce_mode); + if (rc) { + QL_DPRINT11(dev->ha, + "qlnxr_gsi_build_header failed rc = %d\n", rc); + return rc; + } + + header_size = ib_ud_header_pack(&udh, &ud_header_buffer); + + packet = kzalloc(sizeof(*packet), GFP_ATOMIC); + if (!packet) { + QL_DPRINT11(dev->ha, "packet == NULL\n"); + return -ENOMEM; + } + + packet->header.vaddr = qlnx_dma_alloc_coherent(&dev->ha->cdev, + &packet->header.baddr, + header_size); + if (!packet->header.vaddr) { + QL_DPRINT11(dev->ha, "packet->header.vaddr == NULL\n"); + kfree(packet); + return -ENOMEM; + } + + if (memcmp(udh.eth.smac_h, udh.eth.dmac_h, ETH_ALEN)) + packet->tx_dest = ECORE_ROCE_LL2_TX_DEST_NW; + else + packet->tx_dest = ECORE_ROCE_LL2_TX_DEST_LB; + + packet->roce_mode = roce_mode; + memcpy(packet->header.vaddr, ud_header_buffer, header_size); + packet->header.len = header_size; + packet->n_seg = swr->num_sge; + qp->wqe_wr_id[qp->sq.prod].bytes_len = IB_GRH_BYTES; //RDMA_GRH_BYTES + for (i = 0; i < packet->n_seg; i++) { + packet->payload[i].baddr = swr->sg_list[i].addr; + packet->payload[i].len = swr->sg_list[i].length; + qp->wqe_wr_id[qp->sq.prod].bytes_len += + packet->payload[i].len; + QL_DPRINT11(dev->ha, "baddr: %p, len: %d\n", + packet->payload[i].baddr, + packet->payload[i].len); + } + + *p_packet = packet; + + QL_DPRINT12(dev->ha, "exit, packet->n_seg: %d\n", packet->n_seg); + return 0; +} + +int +qlnxr_gsi_post_send(struct ib_qp *ibqp, + struct ib_send_wr *wr, + struct ib_send_wr **bad_wr) +{ + struct ecore_roce_ll2_packet *pkt = NULL; + struct qlnxr_qp *qp = get_qlnxr_qp(ibqp); + struct qlnxr_dev *dev = qp->dev; + unsigned long flags; + int rc; + + QL_DPRINT12(dev->ha, "exit\n"); + + if (qp->state != ECORE_ROCE_QP_STATE_RTS) { + QL_DPRINT11(dev->ha, + "(qp->state != ECORE_ROCE_QP_STATE_RTS)\n"); + *bad_wr = wr; + return -EINVAL; + } + + if (wr->num_sge > RDMA_MAX_SGE_PER_SQ_WQE) { + QL_DPRINT11(dev->ha, + "(wr->num_sge > RDMA_MAX_SGE_PER_SQ_WQE)\n"); + rc = -EINVAL; + goto err; + } + + if (wr->opcode != IB_WR_SEND) { + QL_DPRINT11(dev->ha, "(wr->opcode > IB_WR_SEND)\n"); + rc = -EINVAL; + goto err; + } + + spin_lock_irqsave(&qp->q_lock, flags); + + rc = qlnxr_gsi_build_packet(dev, qp, wr, &pkt); + if(rc) { + spin_unlock_irqrestore(&qp->q_lock, flags); + QL_DPRINT11(dev->ha, "qlnxr_gsi_build_packet failed\n"); + goto err; + } + + rc = qlnxr_ll2_post_tx(dev, pkt); + + if (!rc) { + qp->wqe_wr_id[qp->sq.prod].wr_id = wr->wr_id; + qp->wqe_wr_id[qp->sq.prod].signaled = + !!(wr->send_flags & IB_SEND_SIGNALED); + qp->wqe_wr_id[qp->sq.prod].opcode = IB_WC_SEND; + qlnxr_inc_sw_prod(&qp->sq); + QL_DPRINT11(dev->ha, "packet sent over gsi qp\n"); + } else { + QL_DPRINT11(dev->ha, "qlnxr_ll2_post_tx failed\n"); + rc = -EAGAIN; + *bad_wr = wr; + } + + spin_unlock_irqrestore(&qp->q_lock, flags); + + if (wr->next != NULL) { + *bad_wr = wr->next; + rc=-EINVAL; + } + + QL_DPRINT12(dev->ha, "exit\n"); + return rc; + +err: + *bad_wr = wr; + QL_DPRINT12(dev->ha, "exit error\n"); + return rc; +} + +#define QLNXR_LL2_RX_BUFFER_SIZE (4 * 1024) +int +qlnxr_gsi_post_recv(struct ib_qp *ibqp, + struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr) +{ + struct qlnxr_dev *dev = get_qlnxr_dev((ibqp->device)); + struct qlnxr_qp *qp = get_qlnxr_qp(ibqp); + unsigned long flags; + int rc = 0; + + QL_DPRINT12(dev->ha, "enter, wr: %p\n", wr); + + if ((qp->state != ECORE_ROCE_QP_STATE_RTR) && + (qp->state != ECORE_ROCE_QP_STATE_RTS)) { + *bad_wr = wr; + QL_DPRINT11(dev->ha, "exit 0\n"); + return -EINVAL; + } + + spin_lock_irqsave(&qp->q_lock, flags); + + while (wr) { + if (wr->num_sge > QLNXR_GSI_MAX_RECV_SGE) { + QL_DPRINT11(dev->ha, "exit 1\n"); + goto err; + } + + rc = ecore_ll2_post_rx_buffer(dev->rdma_ctx, + dev->gsi_ll2_handle, + wr->sg_list[0].addr, + wr->sg_list[0].length, + 0 /* cookie */, + 1 /* notify_fw */); + if (rc) { + QL_DPRINT11(dev->ha, "exit 2\n"); + goto err; + } + + memset(&qp->rqe_wr_id[qp->rq.prod], 0, + sizeof(qp->rqe_wr_id[qp->rq.prod])); + qp->rqe_wr_id[qp->rq.prod].sg_list[0] = wr->sg_list[0]; + qp->rqe_wr_id[qp->rq.prod].wr_id = wr->wr_id; + + qlnxr_inc_sw_prod(&qp->rq); + + wr = wr->next; + } + + spin_unlock_irqrestore(&qp->q_lock, flags); + + QL_DPRINT12(dev->ha, "exit rc = %d\n", rc); + return rc; +err: + + spin_unlock_irqrestore(&qp->q_lock, flags); + *bad_wr = wr; + + QL_DPRINT12(dev->ha, "exit with -ENOMEM\n"); + return -ENOMEM; +} + +int +qlnxr_gsi_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) +{ + struct qlnxr_dev *dev = get_qlnxr_dev((ibcq->device)); + struct qlnxr_cq *cq = get_qlnxr_cq(ibcq); + struct qlnxr_qp *qp = dev->gsi_qp; + unsigned long flags; + int i = 0; + + QL_DPRINT12(dev->ha, "enter\n"); + + spin_lock_irqsave(&cq->cq_lock, flags); + + while (i < num_entries && qp->rq.cons != qp->rq.gsi_cons) { + memset(&wc[i], 0, sizeof(*wc)); + + wc[i].qp = &qp->ibqp; + wc[i].wr_id = qp->rqe_wr_id[qp->rq.cons].wr_id; + wc[i].opcode = IB_WC_RECV; + wc[i].pkey_index = 0; + wc[i].status = (qp->rqe_wr_id[qp->rq.cons].rc)? + IB_WC_GENERAL_ERR:IB_WC_SUCCESS; + /* 0 - currently only one recv sg is supported */ + wc[i].byte_len = qp->rqe_wr_id[qp->rq.cons].sg_list[0].length; + wc[i].wc_flags |= IB_WC_GRH | IB_WC_IP_CSUM_OK; + +#if __FreeBSD_version >= 1100000 + memcpy(&wc[i].smac, qp->rqe_wr_id[qp->rq.cons].smac, ETH_ALEN); + wc[i].wc_flags |= IB_WC_WITH_SMAC; + + if (qp->rqe_wr_id[qp->rq.cons].vlan_id) { + wc[i].wc_flags |= IB_WC_WITH_VLAN; + wc[i].vlan_id = qp->rqe_wr_id[qp->rq.cons].vlan_id; + } + +#endif + qlnxr_inc_sw_cons(&qp->rq); + i++; + } + + while (i < num_entries && qp->sq.cons != qp->sq.gsi_cons) { + memset(&wc[i], 0, sizeof(*wc)); + + wc[i].qp = &qp->ibqp; + wc[i].wr_id = qp->wqe_wr_id[qp->sq.cons].wr_id; + wc[i].opcode = IB_WC_SEND; + wc[i].status = IB_WC_SUCCESS; + + qlnxr_inc_sw_cons(&qp->sq); + i++; + } + + spin_unlock_irqrestore(&cq->cq_lock, flags); + + QL_DPRINT12(dev->ha, "exit i = %d\n", i); + return i; +} + diff --git a/sys/dev/qlnx/qlnxr/qlnxr_cm.h b/sys/dev/qlnx/qlnxr/qlnxr_cm.h new file mode 100644 index 000000000000..79afc547362d --- /dev/null +++ b/sys/dev/qlnx/qlnxr/qlnxr_cm.h @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2018-2019 Cavium, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ + + + +#ifndef __QLNXR_CM_H__ +#define __QLNXR_CM_H__ + + +/* ECORE LL2 has a limit to the number of buffers it can handle. + * FYI, OFED used 512 and 128 for recv and send. + */ +#define QLNXR_GSI_MAX_RECV_WR (4096) +#define QLNXR_GSI_MAX_SEND_WR (4096) + +#define QLNXR_GSI_MAX_RECV_SGE (1) /* LL2 FW limitation */ + +/* future OFED/kernel will have these */ +#define ETH_P_ROCE (0x8915) +#define QLNXR_ROCE_V2_UDP_SPORT (0000) + +#if __FreeBSD_version >= 1102000 + +#define rdma_wr(_wr) rdma_wr(_wr) +#define ud_wr(_wr) ud_wr(_wr) +#define atomic_wr(_wr) atomic_wr(_wr) + +#else + +#define rdma_wr(_wr) (&(_wr->wr.rdma)) +#define ud_wr(_wr) (&(_wr->wr.ud)) +#define atomic_wr(_wr) (&(_wr->wr.atomic)) + +#endif /* #if __FreeBSD_version >= 1102000 */ + +static inline u32 qlnxr_get_ipv4_from_gid(u8 *gid) +{ + return *(u32 *)(void *)&gid[12]; +} + +struct ecore_roce_ll2_header { + void *vaddr; + dma_addr_t baddr; + size_t len; +}; + +struct ecore_roce_ll2_buffer { + dma_addr_t baddr; + size_t len; +}; + +struct ecore_roce_ll2_packet { + struct ecore_roce_ll2_header header; + int n_seg; + struct ecore_roce_ll2_buffer payload[RDMA_MAX_SGE_PER_SQ_WQE]; + int roce_mode; + enum ecore_roce_ll2_tx_dest tx_dest; +}; + +/* RDMA CM */ + +extern int qlnxr_gsi_poll_cq(struct ib_cq *ibcq, + int num_entries, + struct ib_wc *wc); + +extern int qlnxr_gsi_post_recv(struct ib_qp *ibqp, + struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr); + +extern int qlnxr_gsi_post_send(struct ib_qp *ibqp, + struct ib_send_wr *wr, + struct ib_send_wr **bad_wr); + +extern struct ib_qp* qlnxr_create_gsi_qp(struct qlnxr_dev *dev, + struct ib_qp_init_attr *attrs, + struct qlnxr_qp *qp); + +extern void qlnxr_store_gsi_qp_cq(struct qlnxr_dev *dev, + struct qlnxr_qp *qp, + struct ib_qp_init_attr *attrs); + +extern void qlnxr_inc_sw_gsi_cons(struct qlnxr_qp_hwq_info *info); + +extern int qlnxr_destroy_gsi_qp(struct qlnxr_dev *dev); + +#endif /* #ifndef __QLNXR_CM_H__ */ diff --git a/sys/dev/qlnx/qlnxr/qlnxr_def.h b/sys/dev/qlnx/qlnxr/qlnxr_def.h new file mode 100644 index 000000000000..8e4394d4c43e --- /dev/null +++ b/sys/dev/qlnx/qlnxr/qlnxr_def.h @@ -0,0 +1,924 @@ +/* + * Copyright (c) 2018-2019 Cavium, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ + + +/* + * File: qlnxr_def.h + * Author: David C Somayajulu + */ + +#ifndef __QLNX_DEF_H_ +#define __QLNX_DEF_H_ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#if __FreeBSD_version < 1100000 +#undef MODULE_VERSION +#endif + +#include "qlnx_os.h" +#include "bcm_osal.h" + +#include "reg_addr.h" +#include "ecore_gtt_reg_addr.h" +#include "ecore.h" +#include "ecore_chain.h" +#include "ecore_status.h" +#include "ecore_hw.h" +#include "ecore_rt_defs.h" +#include "ecore_init_ops.h" +#include "ecore_int.h" +#include "ecore_cxt.h" +#include "ecore_spq.h" +#include "ecore_init_fw_funcs.h" +#include "ecore_sp_commands.h" +#include "ecore_dev_api.h" +#include "ecore_l2_api.h" +#ifdef CONFIG_ECORE_SRIOV +#include "ecore_sriov.h" +#include "ecore_vf.h" +#endif +#ifdef CONFIG_ECORE_LL2 +#include "ecore_ll2.h" +#endif +#ifdef CONFIG_ECORE_FCOE +#include "ecore_fcoe.h" +#endif +#ifdef CONFIG_ECORE_ISCSI +#include "ecore_iscsi.h" +#endif +#include "ecore_mcp.h" +#include "ecore_hw_defs.h" +#include "mcp_public.h" + +#ifdef CONFIG_ECORE_RDMA +#include "ecore_rdma.h" +#include "ecore_rdma_api.h" +#endif + +#ifdef CONFIG_ECORE_ROCE +#include "ecore_roce.h" +#endif + +#ifdef CONFIG_ECORE_IWARP +#include "ecore_iwarp.h" +#endif + +#include "ecore_iro.h" +#include "nvm_cfg.h" + +#include "ecore_dbg_fw_funcs.h" +#include "rdma_common.h" + +#include "qlnx_ioctl.h" +#include "qlnx_def.h" +#include "qlnx_rdma.h" +#include "qlnxr_verbs.h" +#include "qlnxr_user.h" +#include "qlnx_ver.h" +#include + +#define QLNXR_ROCE_INTERFACE_VERSION 1801 + +#define QLNXR_MODULE_VERSION "8.18.1.0" +#define QLNXR_NODE_DESC "QLogic 579xx RoCE HCA" + +#define OC_SKH_DEVICE_PF 0x720 +#define OC_SKH_DEVICE_VF 0x728 +#define QLNXR_MAX_AH 512 + +/* QLNXR Limitations */ + +/* SQ/RQ Limitations + * An S/RQ PBL contains a list a pointers to pages. Each page contains S/RQE + * elements. Several S/RQE elements make an S/RQE, up to a certain maximum that + * is different between SQ and RQ. The size of the PBL was chosen such as not to + * limit the MAX_WR supported by ECORE, and rounded up to a power of two. + */ +/* SQ */ +#define QLNXR_MAX_SQ_PBL (0x8000) /* 2^15 bytes */ +#define QLNXR_MAX_SQ_PBL_ENTRIES (0x10000 / sizeof(void *)) /* number */ +#define QLNXR_SQE_ELEMENT_SIZE (sizeof(struct rdma_sq_sge)) /* bytes */ +#define QLNXR_MAX_SQE_ELEMENTS_PER_SQE (ROCE_REQ_MAX_SINGLE_SQ_WQE_SIZE / \ + QLNXR_SQE_ELEMENT_SIZE) /* number */ +#define QLNXR_MAX_SQE_ELEMENTS_PER_PAGE ((RDMA_RING_PAGE_SIZE) / \ + QLNXR_SQE_ELEMENT_SIZE) /* number */ +#define QLNXR_MAX_SQE ((QLNXR_MAX_SQ_PBL_ENTRIES) * (RDMA_RING_PAGE_SIZE) / \ + (QLNXR_SQE_ELEMENT_SIZE) / (QLNXR_MAX_SQE_ELEMENTS_PER_SQE)) +/* RQ */ +#define QLNXR_MAX_RQ_PBL (0x2000) /* 2^13 bytes */ +#define QLNXR_MAX_RQ_PBL_ENTRIES (0x10000 / sizeof(void *)) /* number */ +#define QLNXR_RQE_ELEMENT_SIZE (sizeof(struct rdma_rq_sge)) /* bytes */ +#define QLNXR_MAX_RQE_ELEMENTS_PER_RQE (RDMA_MAX_SGE_PER_RQ_WQE) /* number */ +#define QLNXR_MAX_RQE_ELEMENTS_PER_PAGE ((RDMA_RING_PAGE_SIZE) / \ + QLNXR_RQE_ELEMENT_SIZE) /* number */ +#define QLNXR_MAX_RQE ((QLNXR_MAX_RQ_PBL_ENTRIES) * (RDMA_RING_PAGE_SIZE) / \ + (QLNXR_RQE_ELEMENT_SIZE) / (QLNXR_MAX_RQE_ELEMENTS_PER_RQE)) + +/* CQE Limitation + * Although FW supports two layer PBL we use single layer since it is more + * than enough. For that layer we use a maximum size of 512 kB, again, because + * it reaches the maximum number of page pointers. Notice is the '-1' in the + * calculation that comes from having a u16 for the number of pages i.e. 0xffff + * is the maximum number of pages (in single layer). + */ +#define QLNXR_CQE_SIZE (sizeof(union rdma_cqe)) +#define QLNXR_MAX_CQE_PBL_SIZE (512*1024) /* 512kB */ +#define QLNXR_MAX_CQE_PBL_ENTRIES (((QLNXR_MAX_CQE_PBL_SIZE) / \ + sizeof(u64)) - 1) /* 64k -1 */ +#define QLNXR_MAX_CQES ((u32)((QLNXR_MAX_CQE_PBL_ENTRIES) * (ECORE_CHAIN_PAGE_SIZE)\ + / QLNXR_CQE_SIZE)) /* 8M -4096/32 = 8,388,480 */ + +/* CNQ size Limitation + * The maximum CNQ size is not reachable because the FW supports a chain of u16 + * (specifically 64k-1). The FW can buffer CNQ elements avoiding an overflow, on + * the expense of performance. Hence we set it to an arbitrarily smaller value + * than the maximum. + */ +#define QLNXR_ROCE_MAX_CNQ_SIZE (0x4000) /* 2^16 */ + +#define QLNXR_MAX_PORT (1) +#define QLNXR_PORT (1) + +#define QLNXR_UVERBS(CMD_NAME) (1ull << IB_USER_VERBS_CMD_##CMD_NAME) + +#define convert_to_64bit(lo, hi) ((u64)hi << 32 | (u64)lo) + +/* The following number is used to determine if a handle recevied from the FW + * actually point to a CQ/QP. + */ +#define QLNXR_CQ_MAGIC_NUMBER (0x11223344) +#define QLNXR_QP_MAGIC_NUMBER (0x77889900) + +/* Fast path debug prints */ +#define FP_DP_VERBOSE(...) +/* #define FP_DP_VERBOSE(...) DP_VERBOSE(__VA_ARGS__) */ + +#define FW_PAGE_SIZE (RDMA_RING_PAGE_SIZE) + +#define QLNXR_MSG_INIT 0x10000, +#define QLNXR_MSG_FAIL 0x10000, +#define QLNXR_MSG_CQ 0x20000, +#define QLNXR_MSG_RQ 0x40000, +#define QLNXR_MSG_SQ 0x80000, +#define QLNXR_MSG_QP (QLNXR_MSG_SQ | QLNXR_MSG_RQ), +#define QLNXR_MSG_MR 0x100000, +#define QLNXR_MSG_GSI 0x200000, +#define QLNXR_MSG_MISC 0x400000, +#define QLNXR_MSG_SRQ 0x800000, +#define QLNXR_MSG_IWARP 0x1000000, + +#define QLNXR_ROCE_PKEY_MAX 1 +#define QLNXR_ROCE_PKEY_TABLE_LEN 1 +#define QLNXR_ROCE_PKEY_DEFAULT 0xffff + +#define QLNXR_MAX_SGID 128 /* TBD - add more source gids... */ + +#define QLNXR_ENET_STATE_BIT (0) + +#define QLNXR_MAX_MSIX (16) + + +struct qlnxr_cnq { + struct qlnxr_dev *dev; + struct ecore_chain pbl; + struct ecore_sb_info *sb; + char name[32]; + u64 n_comp; + __le16 *hw_cons_ptr; + u8 index; + int irq_rid; + struct resource *irq; + void *irq_handle; +}; + +struct qlnxr_device_attr { + /* Vendor specific information */ + u32 vendor_id; + u32 vendor_part_id; + u32 hw_ver; + u64 fw_ver; + + u64 node_guid; /* node GUID */ + u64 sys_image_guid; /* System image GUID */ + + u8 max_cnq; + u8 max_sge; /* Maximum # of scatter/gather entries + * per Work Request supported + */ + u16 max_inline; + u32 max_sqe; /* Maximum number of send outstanding send work + * requests on any Work Queue supported + */ + u32 max_rqe; /* Maximum number of receive outstanding receive + * work requests on any Work Queue supported + */ + u8 max_qp_resp_rd_atomic_resc; /* Maximum number of RDMA Reads + * & atomic operation that can + * be outstanding per QP + */ + + u8 max_qp_req_rd_atomic_resc; /* The maximum depth per QP for + * initiation of RDMA Read + * & atomic operations + */ + u64 max_dev_resp_rd_atomic_resc; + u32 max_cq; + u32 max_qp; + u32 max_mr; /* Maximum # of MRs supported */ + u64 max_mr_size; /* Size (in bytes) of largest contiguous memory + * block that can be registered by this device + */ + u32 max_cqe; + u32 max_mw; /* Maximum # of memory windows supported */ + u32 max_fmr; + u32 max_mr_mw_fmr_pbl; + u64 max_mr_mw_fmr_size; + u32 max_pd; /* Maximum # of protection domains supported */ + u32 max_ah; + u8 max_pkey; + u32 max_srq; /* Maximum number of SRQs */ + u32 max_srq_wr; /* Maximum number of WRs per SRQ */ + u8 max_srq_sge; /* Maximum number of SGE per WQE */ + u8 max_stats_queues; /* Maximum number of statistics queues */ + u32 dev_caps; + + /* Abilty to support RNR-NAK generation */ + +#define QLNXR_ROCE_DEV_CAP_RNR_NAK_MASK 0x1 +#define QLNXR_ROCE_DEV_CAP_RNR_NAK_SHIFT 0 + /* Abilty to support shutdown port */ +#define QLNXR_ROCE_DEV_CAP_SHUTDOWN_PORT_MASK 0x1 +#define QLNXR_ROCE_DEV_CAP_SHUTDOWN_PORT_SHIFT 1 + /* Abilty to support port active event */ +#define QLNXR_ROCE_DEV_CAP_PORT_ACTIVE_EVENT_MASK 0x1 +#define QLNXR_ROCE_DEV_CAP_PORT_ACTIVE_EVENT_SHIFT 2 + /* Abilty to support port change event */ +#define QLNXR_ROCE_DEV_CAP_PORT_CHANGE_EVENT_MASK 0x1 +#define QLNXR_ROCE_DEV_CAP_PORT_CHANGE_EVENT_SHIFT 3 + /* Abilty to support system image GUID */ +#define QLNXR_ROCE_DEV_CAP_SYS_IMAGE_MASK 0x1 +#define QLNXR_ROCE_DEV_CAP_SYS_IMAGE_SHIFT 4 + /* Abilty to support bad P_Key counter support */ +#define QLNXR_ROCE_DEV_CAP_BAD_PKEY_CNT_MASK 0x1 +#define QLNXR_ROCE_DEV_CAP_BAD_PKEY_CNT_SHIFT 5 + /* Abilty to support atomic operations */ +#define QLNXR_ROCE_DEV_CAP_ATOMIC_OP_MASK 0x1 +#define QLNXR_ROCE_DEV_CAP_ATOMIC_OP_SHIFT 6 +#define QLNXR_ROCE_DEV_CAP_RESIZE_CQ_MASK 0x1 +#define QLNXR_ROCE_DEV_CAP_RESIZE_CQ_SHIFT 7 + /* Abilty to support modifying the maximum number of + * outstanding work requests per QP + */ +#define QLNXR_ROCE_DEV_CAP_RESIZE_MAX_WR_MASK 0x1 +#define QLNXR_ROCE_DEV_CAP_RESIZE_MAX_WR_SHIFT 8 + + /* Abilty to support automatic path migration */ +#define QLNXR_ROCE_DEV_CAP_AUTO_PATH_MIG_MASK 0x1 +#define QLNXR_ROCE_DEV_CAP_AUTO_PATH_MIG_SHIFT 9 + /* Abilty to support the base memory management extensions */ +#define QLNXR_ROCE_DEV_CAP_BASE_MEMORY_EXT_MASK 0x1 +#define QLNXR_ROCE_DEV_CAP_BASE_MEMORY_EXT_SHIFT 10 +#define QLNXR_ROCE_DEV_CAP_BASE_QUEUE_EXT_MASK 0x1 +#define QLNXR_ROCE_DEV_CAP_BASE_QUEUE_EXT_SHIFT 11 + /* Abilty to support multipile page sizes per memory region */ +#define QLNXR_ROCE_DEV_CAP_MULTI_PAGE_PER_MR_EXT_MASK 0x1 +#define QLNXR_ROCE_DEV_CAP_MULTI_PAGE_PER_MR_EXT_SHIFT 12 + /* Abilty to support block list physical buffer list */ +#define QLNXR_ROCE_DEV_CAP_BLOCK_MODE_MASK 0x1 +#define QLNXR_ROCE_DEV_CAP_BLOCK_MODE_SHIFT 13 + /* Abilty to support zero based virtual addresses */ +#define QLNXR_ROCE_DEV_CAP_ZBVA_MASK 0x1 +#define QLNXR_ROCE_DEV_CAP_ZBVA_SHIFT 14 + /* Abilty to support local invalidate fencing */ +#define QLNXR_ROCE_DEV_CAP_LOCAL_INV_FENCE_MASK 0x1 +#define QLNXR_ROCE_DEV_CAP_LOCAL_INV_FENCE_SHIFT 15 + /* Abilty to support Loopback on QP */ +#define QLNXR_ROCE_DEV_CAP_LB_INDICATOR_MASK 0x1 +#define QLNXR_ROCE_DEV_CAP_LB_INDICATOR_SHIFT 16 + u64 page_size_caps; + u8 dev_ack_delay; + u32 reserved_lkey; /* Value of reserved L_key */ + u32 bad_pkey_counter;/* Bad P_key counter support + * indicator + */ + struct ecore_rdma_events events; +}; + +struct qlnxr_dev { + struct ib_device ibdev; + qlnx_host_t *ha; + struct ecore_dev *cdev; + + /* Added to extend Applications Support */ + struct pci_dev *pdev; + uint32_t dp_module; + uint8_t dp_level; + + void *rdma_ctx; + + struct mtx idr_lock; + struct idr qpidr; + + uint32_t wq_multiplier; + int num_cnq; + + struct ecore_sb_info sb_array[QLNXR_MAX_MSIX]; + struct qlnxr_cnq cnq_array[QLNXR_MAX_MSIX]; + + int sb_start; + + int gsi_qp_created; + struct qlnxr_cq *gsi_sqcq; + struct qlnxr_cq *gsi_rqcq; + struct qlnxr_qp *gsi_qp; + + /* TBD: we'll need an array of these probablly per DPI... */ + void __iomem *db_addr; + uint64_t db_phys_addr; + uint32_t db_size; + uint16_t dpi; + + uint64_t guid; + enum ib_atomic_cap atomic_cap; + + union ib_gid sgid_tbl[QLNXR_MAX_SGID]; + struct mtx sgid_lock; + struct notifier_block nb_inet; + struct notifier_block nb_inet6; + + uint8_t mr_key; + struct list_head entry; + + struct dentry *dbgfs; + + uint8_t gsi_ll2_mac_address[ETH_ALEN]; + uint8_t gsi_ll2_handle; + + unsigned long enet_state; + + struct workqueue_struct *iwarp_wq; + + volatile uint32_t pd_count; + struct qlnxr_device_attr attr; + uint8_t user_dpm_enabled; +}; + +typedef struct qlnxr_dev qlnxr_dev_t; + + +struct qlnxr_pd { + struct ib_pd ibpd; + u32 pd_id; + struct qlnxr_ucontext *uctx; +}; + +struct qlnxr_ucontext { + struct ib_ucontext ibucontext; + struct qlnxr_dev *dev; + struct qlnxr_pd *pd; + u64 dpi_addr; + u64 dpi_phys_addr; + u32 dpi_size; + u16 dpi; + + struct list_head mm_head; + struct mutex mm_list_lock; +}; + + + +struct qlnxr_dev_attr { + struct ib_device_attr ib_attr; +}; + +struct qlnxr_dma_mem { + void *va; + dma_addr_t pa; + u32 size; +}; + +struct qlnxr_pbl { + struct list_head list_entry; + void *va; + dma_addr_t pa; +}; + +struct qlnxr_queue_info { + void *va; + dma_addr_t dma; + u32 size; + u16 len; + u16 entry_size; /* Size of an element in the queue */ + u16 id; /* qid, where to ring the doorbell. */ + u16 head, tail; + bool created; +}; + +struct qlnxr_eq { + struct qlnxr_queue_info q; + u32 vector; + int cq_cnt; + struct qlnxr_dev *dev; + char irq_name[32]; +}; + +struct qlnxr_mq { + struct qlnxr_queue_info sq; + struct qlnxr_queue_info cq; + bool rearm_cq; +}; + +struct phy_info { + u16 auto_speeds_supported; + u16 fixed_speeds_supported; + u16 phy_type; + u16 interface_type; +}; + +union db_prod64 { + struct rdma_pwm_val32_data data; + u64 raw; +}; + +enum qlnxr_cq_type { + QLNXR_CQ_TYPE_GSI, + QLNXR_CQ_TYPE_KERNEL, + QLNXR_CQ_TYPE_USER +}; + +struct qlnxr_pbl_info { + u32 num_pbls; + u32 num_pbes; + u32 pbl_size; + u32 pbe_size; + bool two_layered; +}; + +struct qlnxr_userq { + struct ib_umem *umem; + struct qlnxr_pbl_info pbl_info; + struct qlnxr_pbl *pbl_tbl; + u64 buf_addr; + size_t buf_len; +}; + +struct qlnxr_cq { + struct ib_cq ibcq; /* must be first */ + + enum qlnxr_cq_type cq_type; + uint32_t sig; + uint16_t icid; + + /* relevant to cqs created from kernel space only (ULPs) */ + spinlock_t cq_lock; + uint8_t arm_flags; + struct ecore_chain pbl; + + void __iomem *db_addr; /* db address for cons update*/ + union db_prod64 db; + + uint8_t pbl_toggle; + union rdma_cqe *latest_cqe; + union rdma_cqe *toggle_cqe; + + /* TODO: remove since it is redundant with 32 bit chains */ + uint32_t cq_cons; + + /* relevant to cqs created from user space only (applications) */ + struct qlnxr_userq q; + + /* destroy-IRQ handler race prevention */ + uint8_t destroyed; + uint16_t cnq_notif; +}; + + +struct qlnxr_ah { + struct ib_ah ibah; + struct ib_ah_attr attr; +}; + +union db_prod32 { + struct rdma_pwm_val16_data data; + u32 raw; +}; + +struct qlnxr_qp_hwq_info { + /* WQE Elements*/ + struct ecore_chain pbl; + u64 p_phys_addr_tbl; + u32 max_sges; + + /* WQE */ + u16 prod; /* WQE prod index for SW ring */ + u16 cons; /* WQE cons index for SW ring */ + u16 wqe_cons; + u16 gsi_cons; /* filled in by GSI implementation */ + u16 max_wr; + + /* DB */ + void __iomem *db; /* Doorbell address */ + union db_prod32 db_data; /* Doorbell data */ + + /* Required for iwarp_only */ + void __iomem *iwarp_db2; /* Doorbell address */ + union db_prod32 iwarp_db2_data; /* Doorbell data */ +}; + +#define QLNXR_INC_SW_IDX(p_info, index) \ + do { \ + p_info->index = (p_info->index + 1) & \ + ecore_chain_get_capacity(p_info->pbl) \ + } while (0) + +struct qlnxr_srq_hwq_info { + u32 max_sges; + u32 max_wr; + struct ecore_chain pbl; + u64 p_phys_addr_tbl; + u32 wqe_prod; /* WQE prod index in HW ring */ + u32 sge_prod; /* SGE prod index in HW ring */ + u32 wr_prod_cnt; /* wr producer count */ + u32 wr_cons_cnt; /* wr consumer count */ + u32 num_elems; + + u32 *virt_prod_pair_addr; /* producer pair virtual address */ + dma_addr_t phy_prod_pair_addr; /* producer pair physical address */ +}; + +struct qlnxr_srq { + struct ib_srq ibsrq; + struct qlnxr_dev *dev; + /* relevant to cqs created from user space only (applications) */ + struct qlnxr_userq usrq; + struct qlnxr_srq_hwq_info hw_srq; + struct ib_umem *prod_umem; + u16 srq_id; + /* lock to protect srq recv post */ + spinlock_t lock; +}; + +enum qlnxr_qp_err_bitmap { + QLNXR_QP_ERR_SQ_FULL = 1 << 0, + QLNXR_QP_ERR_RQ_FULL = 1 << 1, + QLNXR_QP_ERR_BAD_SR = 1 << 2, + QLNXR_QP_ERR_BAD_RR = 1 << 3, + QLNXR_QP_ERR_SQ_PBL_FULL = 1 << 4, + QLNXR_QP_ERR_RQ_PBL_FULL = 1 << 5, +}; + +struct mr_info { + struct qlnxr_pbl *pbl_table; + struct qlnxr_pbl_info pbl_info; + struct list_head free_pbl_list; + struct list_head inuse_pbl_list; + u32 completed; + u32 completed_handled; +}; + +#if __FreeBSD_version < 1102000 +#define DEFINE_IB_FAST_REG +#else +#define DEFINE_ALLOC_MR +#endif + +#ifdef DEFINE_IB_FAST_REG +struct qlnxr_fast_reg_page_list { + struct ib_fast_reg_page_list ibfrpl; + struct qlnxr_dev *dev; + struct mr_info info; +}; +#endif +struct qlnxr_qp { + struct ib_qp ibqp; /* must be first */ + struct qlnxr_dev *dev; + struct qlnxr_iw_ep *ep; + struct qlnxr_qp_hwq_info sq; + struct qlnxr_qp_hwq_info rq; + + u32 max_inline_data; + +#if __FreeBSD_version >= 1100000 + spinlock_t q_lock ____cacheline_aligned; +#else + spinlock_t q_lock; +#endif + + struct qlnxr_cq *sq_cq; + struct qlnxr_cq *rq_cq; + struct qlnxr_srq *srq; + enum ecore_roce_qp_state state; /* QP state */ + u32 id; + struct qlnxr_pd *pd; + enum ib_qp_type qp_type; + struct ecore_rdma_qp *ecore_qp; + u32 qp_id; + u16 icid; + u16 mtu; + int sgid_idx; + u32 rq_psn; + u32 sq_psn; + u32 qkey; + u32 dest_qp_num; + u32 sig; /* unique siganture to identify valid QP */ + + /* relevant to qps created from kernel space only (ULPs) */ + u8 prev_wqe_size; + u16 wqe_cons; + u32 err_bitmap; + bool signaled; + /* SQ shadow */ + struct { + u64 wr_id; + enum ib_wc_opcode opcode; + u32 bytes_len; + u8 wqe_size; + bool signaled; + dma_addr_t icrc_mapping; + u32 *icrc; +#ifdef DEFINE_IB_FAST_REG + struct qlnxr_fast_reg_page_list *frmr; +#endif + struct qlnxr_mr *mr; + } *wqe_wr_id; + + /* RQ shadow */ + struct { + u64 wr_id; + struct ib_sge sg_list[RDMA_MAX_SGE_PER_RQ_WQE]; + uint8_t wqe_size; + + /* for GSI only */ + u8 smac[ETH_ALEN]; + u16 vlan_id; + int rc; + } *rqe_wr_id; + + /* relevant to qps created from user space only (applications) */ + struct qlnxr_userq usq; + struct qlnxr_userq urq; + atomic_t refcnt; + bool destroyed; +}; + +enum qlnxr_mr_type { + QLNXR_MR_USER, + QLNXR_MR_KERNEL, + QLNXR_MR_DMA, + QLNXR_MR_FRMR +}; + + +struct qlnxr_mr { + struct ib_mr ibmr; + struct ib_umem *umem; + + struct ecore_rdma_register_tid_in_params hw_mr; + enum qlnxr_mr_type type; + + struct qlnxr_dev *dev; + struct mr_info info; + + u64 *pages; + u32 npages; + + u64 *iova_start; /* valid only for kernel_mr */ +}; + + +struct qlnxr_mm { + struct { + u64 phy_addr; + unsigned long len; + } key; + struct list_head entry; +}; + +struct qlnxr_iw_listener { + struct qlnxr_dev *dev; + struct iw_cm_id *cm_id; + int backlog; + void *ecore_handle; +}; + +struct qlnxr_iw_ep { + struct qlnxr_dev *dev; + struct iw_cm_id *cm_id; + struct qlnxr_qp *qp; + void *ecore_context; + u8 during_connect; +}; + +static inline void +qlnxr_inc_sw_cons(struct qlnxr_qp_hwq_info *info) +{ + info->cons = (info->cons + 1) % info->max_wr; + info->wqe_cons++; +} + +static inline void +qlnxr_inc_sw_prod(struct qlnxr_qp_hwq_info *info) +{ + info->prod = (info->prod + 1) % info->max_wr; +} + +static inline struct qlnxr_dev * +get_qlnxr_dev(struct ib_device *ibdev) +{ + return container_of(ibdev, struct qlnxr_dev, ibdev); +} + +static inline struct qlnxr_ucontext * +get_qlnxr_ucontext(struct ib_ucontext *ibucontext) +{ + return container_of(ibucontext, struct qlnxr_ucontext, ibucontext); +} + +static inline struct qlnxr_pd * +get_qlnxr_pd(struct ib_pd *ibpd) +{ + return container_of(ibpd, struct qlnxr_pd, ibpd); +} + +static inline struct qlnxr_cq * +get_qlnxr_cq(struct ib_cq *ibcq) +{ + return container_of(ibcq, struct qlnxr_cq, ibcq); +} + +static inline struct qlnxr_qp * +get_qlnxr_qp(struct ib_qp *ibqp) +{ + return container_of(ibqp, struct qlnxr_qp, ibqp); +} + +static inline struct qlnxr_mr * +get_qlnxr_mr(struct ib_mr *ibmr) +{ + return container_of(ibmr, struct qlnxr_mr, ibmr); +} + +static inline struct qlnxr_ah * +get_qlnxr_ah(struct ib_ah *ibah) +{ + return container_of(ibah, struct qlnxr_ah, ibah); +} + +static inline struct qlnxr_srq * +get_qlnxr_srq(struct ib_srq *ibsrq) +{ + return container_of(ibsrq, struct qlnxr_srq, ibsrq); +} + +static inline bool qlnxr_qp_has_srq(struct qlnxr_qp *qp) +{ + return !!qp->srq; +} + +static inline bool qlnxr_qp_has_sq(struct qlnxr_qp *qp) +{ + if (qp->qp_type == IB_QPT_GSI) + return 0; + + return 1; +} + +static inline bool qlnxr_qp_has_rq(struct qlnxr_qp *qp) +{ + if (qp->qp_type == IB_QPT_GSI || qlnxr_qp_has_srq(qp)) + return 0; + + return 1; +} + + +#ifdef DEFINE_IB_FAST_REG +static inline struct qlnxr_fast_reg_page_list *get_qlnxr_frmr_list( + struct ib_fast_reg_page_list *ifrpl) +{ + return container_of(ifrpl, struct qlnxr_fast_reg_page_list, ibfrpl); +} +#endif + +#define SET_FIELD2(value, name, flag) \ + do { \ + (value) |= ((flag) << (name ## _SHIFT)); \ + } while (0) + +#define QLNXR_RESP_IMM (RDMA_CQE_RESPONDER_IMM_FLG_MASK << \ + RDMA_CQE_RESPONDER_IMM_FLG_SHIFT) +#define QLNXR_RESP_RDMA (RDMA_CQE_RESPONDER_RDMA_FLG_MASK << \ + RDMA_CQE_RESPONDER_RDMA_FLG_SHIFT) +#define QLNXR_RESP_INV (RDMA_CQE_RESPONDER_INV_FLG_MASK << \ + RDMA_CQE_RESPONDER_INV_FLG_SHIFT) + +#define QLNXR_RESP_RDMA_IMM (QLNXR_RESP_IMM | QLNXR_RESP_RDMA) + +static inline int +qlnxr_get_dmac(struct qlnxr_dev *dev, struct ib_ah_attr *ah_attr, u8 *mac_addr) +{ +#ifdef DEFINE_NO_IP_BASED_GIDS + u8 *guid = &ah_attr->grh.dgid.raw[8]; /* GID's 64 MSBs are the GUID */ +#endif + union ib_gid zero_sgid = { { 0 } }; + struct in6_addr in6; + + if (!memcmp(&ah_attr->grh.dgid, &zero_sgid, sizeof(union ib_gid))) { + memset(mac_addr, 0x00, ETH_ALEN); + return -EINVAL; + } + + memcpy(&in6, ah_attr->grh.dgid.raw, sizeof(in6)); + +#ifdef DEFINE_NO_IP_BASED_GIDS + /* get the MAC address from the GUID i.e. EUI-64 to MAC address */ + mac_addr[0] = guid[0] ^ 2; /* toggle the local/universal bit to local */ + mac_addr[1] = guid[1]; + mac_addr[2] = guid[2]; + mac_addr[3] = guid[5]; + mac_addr[4] = guid[6]; + mac_addr[5] = guid[7]; +#else + memcpy(mac_addr, ah_attr->dmac, ETH_ALEN); +#endif + return 0; +} + +extern int qlnx_rdma_ll2_set_mac_filter(void *rdma_ctx, uint8_t *old_mac_address, + uint8_t *new_mac_address); + + +#define QLNXR_ROCE_PKEY_MAX 1 +#define QLNXR_ROCE_PKEY_TABLE_LEN 1 +#define QLNXR_ROCE_PKEY_DEFAULT 0xffff + +#if __FreeBSD_version < 1100000 +#define DEFINE_IB_AH_ATTR_WITH_DMAC (0) +#define DEFINE_IB_UMEM_WITH_CHUNK (1) +#else +#define DEFINE_IB_AH_ATTR_WITH_DMAC (1) +#endif + +#define QLNX_IS_IWARP(rdev) IS_IWARP(ECORE_LEADING_HWFN(rdev->cdev)) +#define QLNX_IS_ROCE(rdev) IS_ROCE(ECORE_LEADING_HWFN(rdev->cdev)) + +#define MAX_RXMIT_CONNS 16 + +#endif /* #ifndef __QLNX_DEF_H_ */ diff --git a/sys/dev/qlnx/qlnxr/qlnxr_os.c b/sys/dev/qlnx/qlnxr/qlnxr_os.c new file mode 100644 index 000000000000..a9e426e1ab18 --- /dev/null +++ b/sys/dev/qlnx/qlnxr/qlnxr_os.c @@ -0,0 +1,1366 @@ +/* + * Copyright (c) 2018-2019 Cavium, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + + +/* + * File: qlnxr_os.c + */ +#include +__FBSDID("$FreeBSD$"); + +#include "qlnxr_def.h" + +SYSCTL_NODE(_dev, OID_AUTO, qnxr, CTLFLAG_RW, 0, "Qlogic RDMA module"); + +uint32_t delayed_ack = 0; +SYSCTL_UINT(_dev_qnxr, OID_AUTO, delayed_ack, CTLFLAG_RW, &delayed_ack, 1, + "iWARP: Delayed Ack: 0 - Disabled 1 - Enabled. Default: Disabled"); + +uint32_t timestamp = 1; +SYSCTL_UINT(_dev_qnxr, OID_AUTO, timestamp, CTLFLAG_RW, ×tamp, 1, + "iWARP: Timestamp: 0 - Disabled 1 - Enabled. Default:Enabled"); + +uint32_t rcv_wnd_size = 0; +SYSCTL_UINT(_dev_qnxr, OID_AUTO, rcv_wnd_size, CTLFLAG_RW, &rcv_wnd_size, 1, + "iWARP: Receive Window Size in K. Default 1M"); + +uint32_t crc_needed = 1; +SYSCTL_UINT(_dev_qnxr, OID_AUTO, crc_needed, CTLFLAG_RW, &crc_needed, 1, + "iWARP: CRC needed 0 - Disabled 1 - Enabled. Default:Enabled"); + +uint32_t peer2peer = 1; +SYSCTL_UINT(_dev_qnxr, OID_AUTO, peer2peer, CTLFLAG_RW, &peer2peer, 1, + "iWARP: Support peer2peer ULPs 0 - Disabled 1 - Enabled. Default:Enabled"); + +uint32_t mpa_enhanced = 1; +SYSCTL_UINT(_dev_qnxr, OID_AUTO, mpa_enhanced, CTLFLAG_RW, &mpa_enhanced, 1, + "iWARP: MPA Enhanced mode. Default:1"); + +uint32_t rtr_type = 7; +SYSCTL_UINT(_dev_qnxr, OID_AUTO, rtr_type, CTLFLAG_RW, &rtr_type, 1, + "iWARP: RDMAP opcode to use for the RTR message: BITMAP 1: RDMA_SEND 2: RDMA_WRITE 4: RDMA_READ. Default: 7"); + + +#define QNXR_WQ_MULTIPLIER_MIN (1) +#define QNXR_WQ_MULTIPLIER_MAX (7) +#define QNXR_WQ_MULTIPLIER_DFT (3) + +uint32_t wq_multiplier= QNXR_WQ_MULTIPLIER_DFT; +SYSCTL_UINT(_dev_qnxr, OID_AUTO, wq_multiplier, CTLFLAG_RW, &wq_multiplier, 1, + " When creating a WQ the actual number of WQE created will" + " be multiplied by this number (default is 3)."); +static ssize_t +show_rev(struct device *device, struct device_attribute *attr, + char *buf) +{ + struct qlnxr_dev *dev = dev_get_drvdata(device); + + return sprintf(buf, "0x%x\n", dev->cdev->vendor_id); +} + +static ssize_t +show_hca_type(struct device *device, + struct device_attribute *attr, char *buf) +{ + struct qlnxr_dev *dev = dev_get_drvdata(device); + return sprintf(buf, "QLogic0x%x\n", dev->cdev->device_id); +} + +static ssize_t +show_fw_ver(struct device *device, + struct device_attribute *attr, char *buf) +{ + struct qlnxr_dev *dev = dev_get_drvdata(device); + uint32_t fw_ver = (uint32_t) dev->attr.fw_ver; + + return sprintf(buf, "%d.%d.%d\n", + (fw_ver >> 24) & 0xff, (fw_ver >> 16) & 0xff, + (fw_ver >> 8) & 0xff); +} +static ssize_t +show_board(struct device *device, + struct device_attribute *attr, char *buf) +{ + struct qlnxr_dev *dev = dev_get_drvdata(device); + return sprintf(buf, "%x\n", dev->cdev->device_id); +} + +static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); +static DEVICE_ATTR(hca_type, S_IRUGO, show_hca_type, NULL); +static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL); +static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL); + +static struct device_attribute *qlnxr_class_attributes[] = { + &dev_attr_hw_rev, + &dev_attr_hca_type, + &dev_attr_fw_ver, + &dev_attr_board_id +}; + +static void +qlnxr_ib_dispatch_event(qlnxr_dev_t *dev, uint8_t port_num, + enum ib_event_type type) +{ + struct ib_event ibev; + + QL_DPRINT12(dev->ha, "enter\n"); + + ibev.device = &dev->ibdev; + ibev.element.port_num = port_num; + ibev.event = type; + + ib_dispatch_event(&ibev); + + QL_DPRINT12(dev->ha, "exit\n"); +} + +static int +__qlnxr_iw_destroy_listen(struct iw_cm_id *cm_id) +{ + qlnxr_iw_destroy_listen(cm_id); + + return (0); +} + +static int +qlnxr_register_device(qlnxr_dev_t *dev) +{ + struct ib_device *ibdev; + struct iw_cm_verbs *iwcm; + int ret; + + QL_DPRINT12(dev->ha, "enter\n"); + + ibdev = &dev->ibdev; + + strlcpy(ibdev->name, "qlnxr%d", IB_DEVICE_NAME_MAX); + + memset(&ibdev->node_guid, 0, sizeof(ibdev->node_guid)); + memcpy(&ibdev->node_guid, dev->ha->primary_mac, ETHER_ADDR_LEN); + + memcpy(ibdev->node_desc, QLNXR_NODE_DESC, sizeof(QLNXR_NODE_DESC)); + + ibdev->owner = THIS_MODULE; + ibdev->uverbs_abi_ver = 7; + ibdev->local_dma_lkey = 0; + + ibdev->uverbs_cmd_mask = + (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | + (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | + (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | + (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | + (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | + (1ull << IB_USER_VERBS_CMD_REG_MR) | + (1ull << IB_USER_VERBS_CMD_DEREG_MR) | + (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | + (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | + (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | + (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | + (1ull << IB_USER_VERBS_CMD_CREATE_QP) | + (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | + (1ull << IB_USER_VERBS_CMD_QUERY_QP) | + (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | + (1ull << IB_USER_VERBS_CMD_POLL_CQ) | + (1ull << IB_USER_VERBS_CMD_POST_SEND) | + (1ull << IB_USER_VERBS_CMD_POST_RECV); + + if (QLNX_IS_IWARP(dev)) { + ibdev->node_type = RDMA_NODE_RNIC; + ibdev->query_gid = qlnxr_iw_query_gid; + } else { + ibdev->node_type = RDMA_NODE_IB_CA; + ibdev->query_gid = qlnxr_query_gid; + ibdev->uverbs_cmd_mask |= + (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | + (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | + (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | + (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | + (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV); + ibdev->create_srq = qlnxr_create_srq; + ibdev->destroy_srq = qlnxr_destroy_srq; + ibdev->modify_srq = qlnxr_modify_srq; + ibdev->query_srq = qlnxr_query_srq; + ibdev->post_srq_recv = qlnxr_post_srq_recv; + } + + ibdev->phys_port_cnt = 1; + ibdev->num_comp_vectors = dev->num_cnq; + + /* mandatory verbs. */ + ibdev->query_device = qlnxr_query_device; + ibdev->query_port = qlnxr_query_port; + ibdev->modify_port = qlnxr_modify_port; + + ibdev->alloc_ucontext = qlnxr_alloc_ucontext; + ibdev->dealloc_ucontext = qlnxr_dealloc_ucontext; + /* mandatory to support user space verbs consumer. */ + ibdev->mmap = qlnxr_mmap; + + ibdev->alloc_pd = qlnxr_alloc_pd; + ibdev->dealloc_pd = qlnxr_dealloc_pd; + + ibdev->create_cq = qlnxr_create_cq; + ibdev->destroy_cq = qlnxr_destroy_cq; + ibdev->resize_cq = qlnxr_resize_cq; + ibdev->req_notify_cq = qlnxr_arm_cq; + + ibdev->create_qp = qlnxr_create_qp; + ibdev->modify_qp = qlnxr_modify_qp; + ibdev->query_qp = qlnxr_query_qp; + ibdev->destroy_qp = qlnxr_destroy_qp; + + ibdev->query_pkey = qlnxr_query_pkey; + ibdev->create_ah = qlnxr_create_ah; + ibdev->destroy_ah = qlnxr_destroy_ah; + ibdev->query_ah = qlnxr_query_ah; + ibdev->modify_ah = qlnxr_modify_ah; + ibdev->get_dma_mr = qlnxr_get_dma_mr; + ibdev->dereg_mr = qlnxr_dereg_mr; + ibdev->reg_user_mr = qlnxr_reg_user_mr; + +#if __FreeBSD_version >= 1102000 + ibdev->alloc_mr = qlnxr_alloc_mr; + ibdev->map_mr_sg = qlnxr_map_mr_sg; + ibdev->get_port_immutable = qlnxr_get_port_immutable; +#else + ibdev->reg_phys_mr = qlnxr_reg_kernel_mr; + ibdev->alloc_fast_reg_mr = qlnxr_alloc_frmr; + ibdev->alloc_fast_reg_page_list = qlnxr_alloc_frmr_page_list; + ibdev->free_fast_reg_page_list = qlnxr_free_frmr_page_list; +#endif /* #if __FreeBSD_version >= 1102000 */ + + ibdev->poll_cq = qlnxr_poll_cq; + ibdev->post_send = qlnxr_post_send; + ibdev->post_recv = qlnxr_post_recv; + ibdev->process_mad = qlnxr_process_mad; + + + + ibdev->dma_device = &dev->pdev->dev; + + ibdev->get_link_layer = qlnxr_link_layer; + + if (QLNX_IS_IWARP(dev)) { + iwcm = kmalloc(sizeof(*iwcm), GFP_KERNEL); + + device_printf(dev->ha->pci_dev, "device is IWARP\n"); + if (iwcm == NULL) + return (-ENOMEM); + + ibdev->iwcm = iwcm; + + iwcm->connect = qlnxr_iw_connect; + iwcm->accept = qlnxr_iw_accept; + iwcm->reject = qlnxr_iw_reject; + +#if (__FreeBSD_version >= 1004000) && (__FreeBSD_version < 1102000) + + iwcm->create_listen_ep = qlnxr_iw_create_listen; + iwcm->destroy_listen_ep = qlnxr_iw_destroy_listen; +#else + iwcm->create_listen = qlnxr_iw_create_listen; + iwcm->destroy_listen = __qlnxr_iw_destroy_listen; +#endif + iwcm->add_ref = qlnxr_iw_qp_add_ref; + iwcm->rem_ref = qlnxr_iw_qp_rem_ref; + iwcm->get_qp = qlnxr_iw_get_qp; + } + + ret = ib_register_device(ibdev, NULL); + if (ret) { + kfree(iwcm); + } + + QL_DPRINT12(dev->ha, "exit\n"); + return ret; +} + +#define HILO_U64(hi, lo) ((((u64)(hi)) << 32) + (lo)) + +static void +qlnxr_intr(void *handle) +{ + struct qlnxr_cnq *cnq = handle; + struct qlnxr_cq *cq; + struct regpair *cq_handle; + u16 hw_comp_cons, sw_comp_cons; + qlnx_host_t *ha; + + ha = cnq->dev->ha; + + QL_DPRINT12(ha, "enter cnq = %p\n", handle); + + ecore_sb_ack(cnq->sb, IGU_INT_DISABLE, 0 /*do not update*/); + + ecore_sb_update_sb_idx(cnq->sb); + + hw_comp_cons = le16_to_cpu(*cnq->hw_cons_ptr); + sw_comp_cons = ecore_chain_get_cons_idx(&cnq->pbl); + + rmb(); + + QL_DPRINT12(ha, "enter cnq = %p hw_comp_cons = 0x%x sw_comp_cons = 0x%x\n", + handle, hw_comp_cons, sw_comp_cons); + + while (sw_comp_cons != hw_comp_cons) { + cq_handle = (struct regpair *)ecore_chain_consume(&cnq->pbl); + cq = (struct qlnxr_cq *)(uintptr_t)HILO_U64(cq_handle->hi, + cq_handle->lo); + + if (cq == NULL) { + QL_DPRINT11(ha, "cq == NULL\n"); + break; + } + + if (cq->sig != QLNXR_CQ_MAGIC_NUMBER) { + QL_DPRINT11(ha, + "cq->sig = 0x%x QLNXR_CQ_MAGIC_NUMBER = 0x%x\n", + cq->sig, QLNXR_CQ_MAGIC_NUMBER); + break; + } + cq->arm_flags = 0; + + if (!cq->destroyed && cq->ibcq.comp_handler) { + QL_DPRINT11(ha, "calling comp_handler = %p " + "ibcq = %p cq_context = 0x%x\n", + &cq->ibcq, cq->ibcq.cq_context); + + (*cq->ibcq.comp_handler) (&cq->ibcq, cq->ibcq.cq_context); + } + cq->cnq_notif++; + + sw_comp_cons = ecore_chain_get_cons_idx(&cnq->pbl); + + cnq->n_comp++; + } + + ecore_rdma_cnq_prod_update(cnq->dev->rdma_ctx, cnq->index, sw_comp_cons); + + ecore_sb_ack(cnq->sb, IGU_INT_ENABLE, 1 /*update*/); + + QL_DPRINT12(ha, "exit cnq = %p\n", handle); + return; +} + +static void +qlnxr_release_irqs(struct qlnxr_dev *dev) +{ + int i; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + for (i = 0; i < dev->num_cnq; i++) { + if (dev->cnq_array[i].irq_handle) + (void)bus_teardown_intr(dev->ha->pci_dev, + dev->cnq_array[i].irq, + dev->cnq_array[i].irq_handle); + + if (dev->cnq_array[i].irq) + (void) bus_release_resource(dev->ha->pci_dev, + SYS_RES_IRQ, + dev->cnq_array[i].irq_rid, + dev->cnq_array[i].irq); + } + QL_DPRINT12(ha, "exit\n"); + return; +} + +static int +qlnxr_setup_irqs(struct qlnxr_dev *dev) +{ + int start_irq_rid; + int i; + qlnx_host_t *ha; + + ha = dev->ha; + + start_irq_rid = dev->sb_start + 2; + + QL_DPRINT12(ha, "enter start_irq_rid = %d num_rss = %d\n", + start_irq_rid, dev->ha->num_rss); + + + for (i = 0; i < dev->num_cnq; i++) { + + dev->cnq_array[i].irq_rid = start_irq_rid + i; + + dev->cnq_array[i].irq = bus_alloc_resource_any(dev->ha->pci_dev, + SYS_RES_IRQ, + &dev->cnq_array[i].irq_rid, + (RF_ACTIVE | RF_SHAREABLE)); + + if (dev->cnq_array[i].irq == NULL) { + + QL_DPRINT11(ha, + "bus_alloc_resource_any failed irq_rid = %d\n", + dev->cnq_array[i].irq_rid); + + goto qlnxr_setup_irqs_err; + } + + if (bus_setup_intr(dev->ha->pci_dev, + dev->cnq_array[i].irq, + (INTR_TYPE_NET | INTR_MPSAFE), + NULL, qlnxr_intr, &dev->cnq_array[i], + &dev->cnq_array[i].irq_handle)) { + + QL_DPRINT11(ha, "bus_setup_intr failed\n"); + goto qlnxr_setup_irqs_err; + } + QL_DPRINT12(ha, "irq_rid = %d irq = %p irq_handle = %p\n", + dev->cnq_array[i].irq_rid, dev->cnq_array[i].irq, + dev->cnq_array[i].irq_handle); + } + + QL_DPRINT12(ha, "exit\n"); + return (0); + +qlnxr_setup_irqs_err: + qlnxr_release_irqs(dev); + + QL_DPRINT12(ha, "exit -1\n"); + return (-1); +} + +static void +qlnxr_free_resources(struct qlnxr_dev *dev) +{ + int i; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter dev->num_cnq = %d\n", dev->num_cnq); + + if (QLNX_IS_IWARP(dev)) { + if (dev->iwarp_wq != NULL) + destroy_workqueue(dev->iwarp_wq); + } + + for (i = 0; i < dev->num_cnq; i++) { + qlnx_free_mem_sb(dev->ha, &dev->sb_array[i]); + ecore_chain_free(&dev->ha->cdev, &dev->cnq_array[i].pbl); + } + + bzero(dev->cnq_array, (sizeof(struct qlnxr_cnq) * QLNXR_MAX_MSIX)); + bzero(dev->sb_array, (sizeof(struct ecore_sb_info) * QLNXR_MAX_MSIX)); + bzero(dev->sgid_tbl, (sizeof(union ib_gid) * QLNXR_MAX_SGID)); + + if (mtx_initialized(&dev->idr_lock)) + mtx_destroy(&dev->idr_lock); + + if (mtx_initialized(&dev->sgid_lock)) + mtx_destroy(&dev->sgid_lock); + + QL_DPRINT12(ha, "exit\n"); + return; +} + + +static int +qlnxr_alloc_resources(struct qlnxr_dev *dev) +{ + uint16_t n_entries; + int i, rc; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + bzero(dev->sgid_tbl, (sizeof (union ib_gid) * QLNXR_MAX_SGID)); + + mtx_init(&dev->idr_lock, "idr_lock", NULL, MTX_DEF); + mtx_init(&dev->sgid_lock, "sgid_lock", NULL, MTX_DEF); + + idr_init(&dev->qpidr); + + bzero(dev->sb_array, (sizeof (struct ecore_sb_info) * QLNXR_MAX_MSIX)); + bzero(dev->cnq_array, (sizeof (struct qlnxr_cnq) * QLNXR_MAX_MSIX)); + + dev->sb_start = ecore_rdma_get_sb_id(dev->rdma_ctx, 0); + + QL_DPRINT12(ha, "dev->sb_start = 0x%x\n", dev->sb_start); + + /* Allocate CNQ PBLs */ + + n_entries = min_t(u32, ECORE_RDMA_MAX_CNQ_SIZE, QLNXR_ROCE_MAX_CNQ_SIZE); + + for (i = 0; i < dev->num_cnq; i++) { + rc = qlnx_alloc_mem_sb(dev->ha, &dev->sb_array[i], + dev->sb_start + i); + if (rc) + goto qlnxr_alloc_resources_exit; + + rc = ecore_chain_alloc(&dev->ha->cdev, + ECORE_CHAIN_USE_TO_CONSUME_PRODUCE, + ECORE_CHAIN_MODE_PBL, + ECORE_CHAIN_CNT_TYPE_U16, + n_entries, + sizeof(struct regpair *), + &dev->cnq_array[i].pbl, + NULL); + + /* configure cnq, except name since ibdev.name is still NULL */ + dev->cnq_array[i].dev = dev; + dev->cnq_array[i].sb = &dev->sb_array[i]; + dev->cnq_array[i].hw_cons_ptr = + &(dev->sb_array[i].sb_virt->pi_array[ECORE_ROCE_PROTOCOL_INDEX]); + dev->cnq_array[i].index = i; + sprintf(dev->cnq_array[i].name, "qlnxr%d@pci:%d", + i, (dev->ha->pci_func)); + + } + + QL_DPRINT12(ha, "exit\n"); + return 0; + +qlnxr_alloc_resources_exit: + + qlnxr_free_resources(dev); + + QL_DPRINT12(ha, "exit -ENOMEM\n"); + return -ENOMEM; +} + +void +qlnxr_affiliated_event(void *context, u8 e_code, void *fw_handle) +{ +#define EVENT_TYPE_NOT_DEFINED 0 +#define EVENT_TYPE_CQ 1 +#define EVENT_TYPE_QP 2 +#define EVENT_TYPE_GENERAL 3 + + struct qlnxr_dev *dev = (struct qlnxr_dev *)context; + struct regpair *async_handle = (struct regpair *)fw_handle; + u64 roceHandle64 = ((u64)async_handle->hi << 32) + async_handle->lo; + struct qlnxr_cq *cq = (struct qlnxr_cq *)(uintptr_t)roceHandle64; + struct qlnxr_qp *qp = (struct qlnxr_qp *)(uintptr_t)roceHandle64; + u8 event_type = EVENT_TYPE_NOT_DEFINED; + struct ib_event event; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter context = %p e_code = 0x%x fw_handle = %p\n", + context, e_code, fw_handle); + + if (QLNX_IS_IWARP(dev)) { + switch (e_code) { + + case ECORE_IWARP_EVENT_CQ_OVERFLOW: + event.event = IB_EVENT_CQ_ERR; + event_type = EVENT_TYPE_CQ; + break; + + default: + QL_DPRINT12(ha, + "unsupported event %d on handle=%llx\n", + e_code, roceHandle64); + break; + } + } else { + switch (e_code) { + + case ROCE_ASYNC_EVENT_CQ_OVERFLOW_ERR: + event.event = IB_EVENT_CQ_ERR; + event_type = EVENT_TYPE_CQ; + break; + + case ROCE_ASYNC_EVENT_SQ_DRAINED: + event.event = IB_EVENT_SQ_DRAINED; + event_type = EVENT_TYPE_QP; + break; + + case ROCE_ASYNC_EVENT_QP_CATASTROPHIC_ERR: + event.event = IB_EVENT_QP_FATAL; + event_type = EVENT_TYPE_QP; + break; + + case ROCE_ASYNC_EVENT_LOCAL_INVALID_REQUEST_ERR: + event.event = IB_EVENT_QP_REQ_ERR; + event_type = EVENT_TYPE_QP; + break; + + case ROCE_ASYNC_EVENT_LOCAL_ACCESS_ERR: + event.event = IB_EVENT_QP_ACCESS_ERR; + event_type = EVENT_TYPE_QP; + break; + + /* NOTE the following are not implemented in FW + * ROCE_ASYNC_EVENT_CQ_ERR + * ROCE_ASYNC_EVENT_COMM_EST + */ + /* TODO associate the following events - + * ROCE_ASYNC_EVENT_SRQ_LIMIT + * ROCE_ASYNC_EVENT_LAST_WQE_REACHED + * ROCE_ASYNC_EVENT_LOCAL_CATASTROPHIC_ERR (un-affiliated) + */ + default: + QL_DPRINT12(ha, + "unsupported event 0x%x on fw_handle = %p\n", + e_code, fw_handle); + break; + } + } + + switch (event_type) { + + case EVENT_TYPE_CQ: + if (cq && cq->sig == QLNXR_CQ_MAGIC_NUMBER) { + struct ib_cq *ibcq = &cq->ibcq; + + if (ibcq->event_handler) { + event.device = ibcq->device; + event.element.cq = ibcq; + ibcq->event_handler(&event, ibcq->cq_context); + } + } else { + QL_DPRINT11(ha, + "CQ event with invalid CQ pointer" + " Handle = %llx\n", roceHandle64); + } + QL_DPRINT12(ha, + "CQ event 0x%x on handle = %p\n", e_code, cq); + break; + + case EVENT_TYPE_QP: + if (qp && qp->sig == QLNXR_QP_MAGIC_NUMBER) { + struct ib_qp *ibqp = &qp->ibqp; + + if (ibqp->event_handler) { + event.device = ibqp->device; + event.element.qp = ibqp; + ibqp->event_handler(&event, ibqp->qp_context); + } + } else { + QL_DPRINT11(ha, + "QP event 0x%x with invalid QP pointer" + " qp handle = %p\n", + e_code, roceHandle64); + } + QL_DPRINT12(ha, "QP event 0x%x on qp handle = %p\n", + e_code, qp); + break; + + case EVENT_TYPE_GENERAL: + break; + + default: + break; + + } + + QL_DPRINT12(ha, "exit\n"); + + return; +} + +void +qlnxr_unaffiliated_event(void *context, u8 e_code) +{ + struct qlnxr_dev *dev = (struct qlnxr_dev *)context; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter/exit \n"); + return; +} + + +static int +qlnxr_set_device_attr(struct qlnxr_dev *dev) +{ + struct ecore_rdma_device *ecore_attr; + struct qlnxr_device_attr *attr; + u32 page_size; + + ecore_attr = ecore_rdma_query_device(dev->rdma_ctx); + + page_size = ~dev->attr.page_size_caps + 1; + if(page_size > PAGE_SIZE) { + QL_DPRINT12(dev->ha, "Kernel page size : %ld is smaller than" + " minimum page size : %ld required by qlnxr\n", + PAGE_SIZE, page_size); + return -ENODEV; + } + attr = &dev->attr; + attr->vendor_id = ecore_attr->vendor_id; + attr->vendor_part_id = ecore_attr->vendor_part_id; + + QL_DPRINT12(dev->ha, "in qlnxr_set_device_attr, vendor : %x device : %x\n", + attr->vendor_id, attr->vendor_part_id); + + attr->hw_ver = ecore_attr->hw_ver; + attr->fw_ver = ecore_attr->fw_ver; + attr->node_guid = ecore_attr->node_guid; + attr->sys_image_guid = ecore_attr->sys_image_guid; + attr->max_cnq = ecore_attr->max_cnq; + attr->max_sge = ecore_attr->max_sge; + attr->max_inline = ecore_attr->max_inline; + attr->max_sqe = min_t(u32, ecore_attr->max_wqe, QLNXR_MAX_SQE); + attr->max_rqe = min_t(u32, ecore_attr->max_wqe, QLNXR_MAX_RQE); + attr->max_qp_resp_rd_atomic_resc = ecore_attr->max_qp_resp_rd_atomic_resc; + attr->max_qp_req_rd_atomic_resc = ecore_attr->max_qp_req_rd_atomic_resc; + attr->max_dev_resp_rd_atomic_resc = + ecore_attr->max_dev_resp_rd_atomic_resc; + attr->max_cq = ecore_attr->max_cq; + attr->max_qp = ecore_attr->max_qp; + attr->max_mr = ecore_attr->max_mr; + attr->max_mr_size = ecore_attr->max_mr_size; + attr->max_cqe = min_t(u64, ecore_attr->max_cqe, QLNXR_MAX_CQES); + attr->max_mw = ecore_attr->max_mw; + attr->max_fmr = ecore_attr->max_fmr; + attr->max_mr_mw_fmr_pbl = ecore_attr->max_mr_mw_fmr_pbl; + attr->max_mr_mw_fmr_size = ecore_attr->max_mr_mw_fmr_size; + attr->max_pd = ecore_attr->max_pd; + attr->max_ah = ecore_attr->max_ah; + attr->max_pkey = ecore_attr->max_pkey; + attr->max_srq = ecore_attr->max_srq; + attr->max_srq_wr = ecore_attr->max_srq_wr; + //attr->dev_caps = ecore_attr->dev_caps; + attr->page_size_caps = ecore_attr->page_size_caps; + attr->dev_ack_delay = ecore_attr->dev_ack_delay; + attr->reserved_lkey = ecore_attr->reserved_lkey; + attr->bad_pkey_counter = ecore_attr->bad_pkey_counter; + attr->max_stats_queues = ecore_attr->max_stats_queues; + + return 0; +} + + +static int +qlnxr_init_hw(struct qlnxr_dev *dev) +{ + struct ecore_rdma_events events; + struct ecore_rdma_add_user_out_params out_params; + struct ecore_rdma_cnq_params *cur_pbl; + struct ecore_rdma_start_in_params *in_params; + dma_addr_t p_phys_table; + u32 page_cnt; + int rc = 0; + int i; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + in_params = kzalloc(sizeof(*in_params), GFP_KERNEL); + if (!in_params) { + rc = -ENOMEM; + goto out; + } + + bzero(&out_params, sizeof(struct ecore_rdma_add_user_out_params)); + bzero(&events, sizeof(struct ecore_rdma_events)); + + in_params->desired_cnq = dev->num_cnq; + + for (i = 0; i < dev->num_cnq; i++) { + cur_pbl = &in_params->cnq_pbl_list[i]; + + page_cnt = ecore_chain_get_page_cnt(&dev->cnq_array[i].pbl); + cur_pbl->num_pbl_pages = page_cnt; + + p_phys_table = ecore_chain_get_pbl_phys(&dev->cnq_array[i].pbl); + cur_pbl->pbl_ptr = (u64)p_phys_table; + } + + events.affiliated_event = qlnxr_affiliated_event; + events.unaffiliated_event = qlnxr_unaffiliated_event; + events.context = dev; + + in_params->events = &events; + in_params->roce.cq_mode = ECORE_RDMA_CQ_MODE_32_BITS; + in_params->max_mtu = dev->ha->max_frame_size; + + + if (QLNX_IS_IWARP(dev)) { + if (delayed_ack) + in_params->iwarp.flags |= ECORE_IWARP_DA_EN; + + if (timestamp) + in_params->iwarp.flags |= ECORE_IWARP_TS_EN; + + in_params->iwarp.rcv_wnd_size = rcv_wnd_size*1024; + in_params->iwarp.crc_needed = crc_needed; + in_params->iwarp.ooo_num_rx_bufs = + (MAX_RXMIT_CONNS * in_params->iwarp.rcv_wnd_size) / + in_params->max_mtu; + + in_params->iwarp.mpa_peer2peer = peer2peer; + in_params->iwarp.mpa_rev = + mpa_enhanced ? ECORE_MPA_REV2 : ECORE_MPA_REV1; + in_params->iwarp.mpa_rtr = rtr_type; + } + + memcpy(&in_params->mac_addr[0], dev->ha->primary_mac, ETH_ALEN); + + rc = ecore_rdma_start(dev->rdma_ctx, in_params); + if (rc) + goto out; + + rc = ecore_rdma_add_user(dev->rdma_ctx, &out_params); + if (rc) + goto out; + + dev->db_addr = (void *)(uintptr_t)out_params.dpi_addr; + dev->db_phys_addr = out_params.dpi_phys_addr; + dev->db_size = out_params.dpi_size; + dev->dpi = out_params.dpi; + + qlnxr_set_device_attr(dev); + + QL_DPRINT12(ha, + "cdev->doorbells = %p, db_phys_addr = %p db_size = 0x%x\n", + (void *)ha->cdev.doorbells, + (void *)ha->cdev.db_phys_addr, ha->cdev.db_size); + + QL_DPRINT12(ha, + "db_addr = %p db_phys_addr = %p db_size = 0x%x dpi = 0x%x\n", + (void *)dev->db_addr, (void *)dev->db_phys_addr, + dev->db_size, dev->dpi); +out: + kfree(in_params); + + QL_DPRINT12(ha, "exit\n"); + return rc; +} + +static void +qlnxr_build_sgid_mac(union ib_gid *sgid, unsigned char *mac_addr, + bool is_vlan, u16 vlan_id) +{ + sgid->global.subnet_prefix = OSAL_CPU_TO_BE64(0xfe80000000000000LL); + sgid->raw[8] = mac_addr[0] ^ 2; + sgid->raw[9] = mac_addr[1]; + sgid->raw[10] = mac_addr[2]; + if (is_vlan) { + sgid->raw[11] = vlan_id >> 8; + sgid->raw[12] = vlan_id & 0xff; + } else { + sgid->raw[11] = 0xff; + sgid->raw[12] = 0xfe; + } + sgid->raw[13] = mac_addr[3]; + sgid->raw[14] = mac_addr[4]; + sgid->raw[15] = mac_addr[5]; +} +static bool +qlnxr_add_sgid(struct qlnxr_dev *dev, union ib_gid *new_sgid); + +static void +qlnxr_add_ip_based_gid(struct qlnxr_dev *dev, struct ifnet *ifp) +{ + struct ifaddr *ifa; + union ib_gid gid; + + CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { + if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET) { + + QL_DPRINT12(dev->ha, "IP address : %x\n", ((struct sockaddr_in *) ifa->ifa_addr)->sin_addr.s_addr); + ipv6_addr_set_v4mapped( + ((struct sockaddr_in *) ifa->ifa_addr)->sin_addr.s_addr, + (struct in6_addr *)&gid); + QL_DPRINT12(dev->ha, "gid generated : %llx\n", gid); + + qlnxr_add_sgid(dev, &gid); + } + } + for (int i = 0; i < 16; i++) { + QL_DPRINT12(dev->ha, "gid generated : %x\n", gid.raw[i]); + } +} + +static bool +qlnxr_add_sgid(struct qlnxr_dev *dev, union ib_gid *new_sgid) +{ + union ib_gid zero_sgid = { { 0 } }; + int i; + //unsigned long flags; + mtx_lock(&dev->sgid_lock); + for (i = 0; i < QLNXR_MAX_SGID; i++) { + if (!memcmp(&dev->sgid_tbl[i], &zero_sgid, + sizeof(union ib_gid))) { + /* found free entry */ + memcpy(&dev->sgid_tbl[i], new_sgid, + sizeof(union ib_gid)); + QL_DPRINT12(dev->ha, "copying sgid : %llx\n", + *new_sgid); + mtx_unlock(&dev->sgid_lock); + //TODO ib_dispatch event here? + return true; + } else if (!memcmp(&dev->sgid_tbl[i], new_sgid, + sizeof(union ib_gid))) { + /* entry already present, no addition required */ + mtx_unlock(&dev->sgid_lock); + QL_DPRINT12(dev->ha, "sgid present : %llx\n", + *new_sgid); + return false; + } + } + if (i == QLNXR_MAX_SGID) { + QL_DPRINT12(dev->ha, "didn't find an empty entry in sgid_tbl\n"); + } + mtx_unlock(&dev->sgid_lock); + return false; +} + +static bool qlnxr_del_sgid(struct qlnxr_dev *dev, union ib_gid *gid) +{ + int found = false; + int i; + //unsigned long flags; + + QL_DPRINT12(dev->ha, "removing gid %llx %llx\n", + gid->global.interface_id, + gid->global.subnet_prefix); + mtx_lock(&dev->sgid_lock); + /* first is the default sgid which cannot be deleted */ + for (i = 1; i < QLNXR_MAX_SGID; i++) { + if (!memcmp(&dev->sgid_tbl[i], gid, sizeof(union ib_gid))) { + /* found matching entry */ + memset(&dev->sgid_tbl[i], 0, sizeof(union ib_gid)); + found = true; + break; + } + } + mtx_unlock(&dev->sgid_lock); + + return found; +} + +#if __FreeBSD_version < 1100000 + +static inline int +is_vlan_dev(struct ifnet *ifp) +{ + return (ifp->if_type == IFT_L2VLAN); +} + +static inline uint16_t +vlan_dev_vlan_id(struct ifnet *ifp) +{ + uint16_t vtag; + + if (VLAN_TAG(ifp, &vtag) == 0) + return (vtag); + + return (0); +} + +#endif /* #if __FreeBSD_version < 1100000 */ + +static void +qlnxr_add_sgids(struct qlnxr_dev *dev) +{ + qlnx_host_t *ha = dev->ha; + u16 vlan_id; + bool is_vlan; + union ib_gid vgid; + + qlnxr_add_ip_based_gid(dev, ha->ifp); + /* MAC/VLAN base GIDs */ + is_vlan = is_vlan_dev(ha->ifp); + vlan_id = (is_vlan) ? vlan_dev_vlan_id(ha->ifp) : 0; + qlnxr_build_sgid_mac(&vgid, ha->primary_mac, is_vlan, vlan_id); + qlnxr_add_sgid(dev, &vgid); +} + +static int +qlnxr_add_default_sgid(struct qlnxr_dev *dev) +{ + /* GID Index 0 - Invariant manufacturer-assigned EUI-64 */ + union ib_gid *sgid = &dev->sgid_tbl[0]; + struct ecore_rdma_device *qattr; + qlnx_host_t *ha; + ha = dev->ha; + + qattr = ecore_rdma_query_device(dev->rdma_ctx); + if(sgid == NULL) + QL_DPRINT12(ha, "sgid = NULL?\n"); + + sgid->global.subnet_prefix = OSAL_CPU_TO_BE64(0xfe80000000000000LL); + QL_DPRINT12(ha, "node_guid = %llx", dev->attr.node_guid); + memcpy(&sgid->raw[8], &qattr->node_guid, + sizeof(qattr->node_guid)); + //memcpy(&sgid->raw[8], &dev->attr.node_guid, + // sizeof(dev->attr.node_guid)); + QL_DPRINT12(ha, "DEFAULT sgid=[%x][%x][%x][%x][%x][%x][%x][%x][%x][%x][%x][%x][%x][%x][%x][%x]\n", + sgid->raw[0], sgid->raw[1], sgid->raw[2], sgid->raw[3], sgid->raw[4], sgid->raw[5], + sgid->raw[6], sgid->raw[7], sgid->raw[8], sgid->raw[9], sgid->raw[10], sgid->raw[11], + sgid->raw[12], sgid->raw[13], sgid->raw[14], sgid->raw[15]); + return 0; +} + +static int qlnxr_addr_event (struct qlnxr_dev *dev, + unsigned long event, + struct ifnet *ifp, + union ib_gid *gid) +{ + bool is_vlan = false; + union ib_gid vgid; + u16 vlan_id = 0xffff; + + QL_DPRINT12(dev->ha, "Link event occured\n"); + is_vlan = is_vlan_dev(dev->ha->ifp); + vlan_id = (is_vlan) ? vlan_dev_vlan_id(dev->ha->ifp) : 0; + + switch (event) { + case NETDEV_UP : + qlnxr_add_sgid(dev, gid); + if (is_vlan) { + qlnxr_build_sgid_mac(&vgid, dev->ha->primary_mac, is_vlan, vlan_id); + qlnxr_add_sgid(dev, &vgid); + } + break; + case NETDEV_DOWN : + qlnxr_del_sgid(dev, gid); + if (is_vlan) { + qlnxr_build_sgid_mac(&vgid, dev->ha->primary_mac, is_vlan, vlan_id); + qlnxr_del_sgid(dev, &vgid); + } + break; + default : + break; + } + return 1; +} + +static int qlnxr_inetaddr_event(struct notifier_block *notifier, + unsigned long event, void *ptr) +{ + struct ifaddr *ifa = ptr; + union ib_gid gid; + struct qlnxr_dev *dev = container_of(notifier, struct qlnxr_dev, nb_inet); + qlnx_host_t *ha = dev->ha; + + ipv6_addr_set_v4mapped( + ((struct sockaddr_in *) ifa->ifa_addr)->sin_addr.s_addr, + (struct in6_addr *)&gid); + return qlnxr_addr_event(dev, event, ha->ifp, &gid); +} + +static int +qlnxr_register_inet(struct qlnxr_dev *dev) +{ + int ret; + dev->nb_inet.notifier_call = qlnxr_inetaddr_event; + ret = register_inetaddr_notifier(&dev->nb_inet); + if (ret) { + QL_DPRINT12(dev->ha, "Failed to register inetaddr\n"); + return ret; + } + /* TODO : add for CONFIG_IPV6) */ + return 0; +} + +static int +qlnxr_build_sgid_tbl(struct qlnxr_dev *dev) +{ + qlnxr_add_default_sgid(dev); + qlnxr_add_sgids(dev); + return 0; +} + +static struct qlnx_rdma_if qlnxr_drv; + +static void * +qlnxr_add(void *eth_dev) +{ + struct qlnxr_dev *dev; + int ret; + //device_t pci_dev; + qlnx_host_t *ha; + + ha = eth_dev; + + QL_DPRINT12(ha, "enter [ha = %p]\n", ha); + + dev = (struct qlnxr_dev *)ib_alloc_device(sizeof(struct qlnxr_dev)); + + if (dev == NULL) + return (NULL); + + dev->ha = eth_dev; + dev->cdev = &ha->cdev; + /* Added to extend Application support */ + dev->pdev = kzalloc(sizeof(struct pci_dev), GFP_KERNEL); + + dev->pdev->dev = *(dev->ha->pci_dev); + dev->pdev->device = pci_get_device(dev->ha->pci_dev); + dev->pdev->vendor = pci_get_vendor(dev->ha->pci_dev); + + dev->rdma_ctx = &ha->cdev.hwfns[0]; + dev->wq_multiplier = wq_multiplier; + dev->num_cnq = QLNX_NUM_CNQ; + + QL_DPRINT12(ha, + "ha = %p dev = %p ha->cdev = %p\n", + ha, dev, &ha->cdev); + QL_DPRINT12(ha, + "dev->cdev = %p dev->rdma_ctx = %p\n", + dev->cdev, dev->rdma_ctx); + + ret = qlnxr_alloc_resources(dev); + + if (ret) + goto qlnxr_add_err; + + ret = qlnxr_setup_irqs(dev); + + if (ret) { + qlnxr_free_resources(dev); + goto qlnxr_add_err; + } + + ret = qlnxr_init_hw(dev); + + if (ret) { + qlnxr_release_irqs(dev); + qlnxr_free_resources(dev); + goto qlnxr_add_err; + } + + qlnxr_register_device(dev); + for (int i = 0; i < ARRAY_SIZE(qlnxr_class_attributes); ++i) { + if (device_create_file(&dev->ibdev.dev, qlnxr_class_attributes[i])) + goto sysfs_err; + } + qlnxr_build_sgid_tbl(dev); + //ret = qlnxr_register_inet(dev); + QL_DPRINT12(ha, "exit\n"); + if (!test_and_set_bit(QLNXR_ENET_STATE_BIT, &dev->enet_state)) { + QL_DPRINT12(ha, "dispatching IB_PORT_ACITVE event\n"); + qlnxr_ib_dispatch_event(dev, QLNXR_PORT, + IB_EVENT_PORT_ACTIVE); + } + + return (dev); +sysfs_err: + for (int i = 0; i < ARRAY_SIZE(qlnxr_class_attributes); ++i) { + device_remove_file(&dev->ibdev.dev, qlnxr_class_attributes[i]); + } + ib_unregister_device(&dev->ibdev); + +qlnxr_add_err: + ib_dealloc_device(&dev->ibdev); + + QL_DPRINT12(ha, "exit failed\n"); + return (NULL); +} + +static void +qlnxr_remove_sysfiles(struct qlnxr_dev *dev) +{ + int i; + for (i = 0; i < ARRAY_SIZE(qlnxr_class_attributes); ++i) + device_remove_file(&dev->ibdev.dev, qlnxr_class_attributes[i]); +} + +static int +qlnxr_remove(void *eth_dev, void *qlnx_rdma_dev) +{ + struct qlnxr_dev *dev; + qlnx_host_t *ha; + + dev = qlnx_rdma_dev; + ha = eth_dev; + + if ((ha == NULL) || (dev == NULL)) + return (0); + + QL_DPRINT12(ha, "enter ha = %p qlnx_rdma_dev = %p pd_count = %d\n", + ha, qlnx_rdma_dev, dev->pd_count); + + qlnxr_ib_dispatch_event(dev, QLNXR_PORT, + IB_EVENT_PORT_ERR); + + if (QLNX_IS_IWARP(dev)) { + if (dev->pd_count) + return (EBUSY); + } + + ib_unregister_device(&dev->ibdev); + + if (QLNX_IS_ROCE(dev)) { + if (dev->pd_count) + return (EBUSY); + } + + ecore_rdma_remove_user(dev->rdma_ctx, dev->dpi); + ecore_rdma_stop(dev->rdma_ctx); + + qlnxr_release_irqs(dev); + + qlnxr_free_resources(dev); + + qlnxr_remove_sysfiles(dev); + ib_dealloc_device(&dev->ibdev); + + QL_DPRINT12(ha, "exit ha = %p qlnx_rdma_dev = %p\n", ha, qlnx_rdma_dev); + return (0); +} + +int +qlnx_rdma_ll2_set_mac_filter(void *rdma_ctx, uint8_t *old_mac_address, + uint8_t *new_mac_address) +{ + struct ecore_hwfn *p_hwfn = rdma_ctx; + struct qlnx_host *ha; + int ret = 0; + + ha = (struct qlnx_host *)(p_hwfn->p_dev); + QL_DPRINT2(ha, "enter rdma_ctx (%p)\n", rdma_ctx); + + if (old_mac_address) + ecore_llh_remove_mac_filter(p_hwfn->p_dev, 0, old_mac_address); + + if (new_mac_address) + ret = ecore_llh_add_mac_filter(p_hwfn->p_dev, 0, new_mac_address); + + QL_DPRINT2(ha, "exit rdma_ctx (%p)\n", rdma_ctx); + return (ret); +} + +static void +qlnxr_mac_address_change(struct qlnxr_dev *dev) +{ + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter/exit\n"); + + return; +} + +static void +qlnxr_notify(void *eth_dev, void *qlnx_rdma_dev, enum qlnx_rdma_event event) +{ + struct qlnxr_dev *dev; + qlnx_host_t *ha; + + dev = qlnx_rdma_dev; + + if (dev == NULL) + return; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter (%p, %d)\n", qlnx_rdma_dev, event); + + switch (event) { + + case QLNX_ETHDEV_UP: + if (!test_and_set_bit(QLNXR_ENET_STATE_BIT, &dev->enet_state)) + qlnxr_ib_dispatch_event(dev, QLNXR_PORT, + IB_EVENT_PORT_ACTIVE); + break; + + case QLNX_ETHDEV_CHANGE_ADDR: + qlnxr_mac_address_change(dev); + break; + + case QLNX_ETHDEV_DOWN: + if (test_and_set_bit(QLNXR_ENET_STATE_BIT, &dev->enet_state)) + qlnxr_ib_dispatch_event(dev, QLNXR_PORT, + IB_EVENT_PORT_ERR); + break; + } + + QL_DPRINT12(ha, "exit (%p, %d)\n", qlnx_rdma_dev, event); + return; +} + +static int +qlnxr_mod_load(void) +{ + int ret; + + + qlnxr_drv.add = qlnxr_add; + qlnxr_drv.remove = qlnxr_remove; + qlnxr_drv.notify = qlnxr_notify; + + ret = qlnx_rdma_register_if(&qlnxr_drv); + + return (0); +} + +static int +qlnxr_mod_unload(void) +{ + int ret; + + ret = qlnx_rdma_deregister_if(&qlnxr_drv); + return (ret); +} + +static int +qlnxr_event_handler(module_t mod, int event, void *arg) +{ + + int ret = 0; + + switch (event) { + + case MOD_LOAD: + ret = qlnxr_mod_load(); + break; + + case MOD_UNLOAD: + ret = qlnxr_mod_unload(); + break; + + default: + break; + } + + return (ret); +} + +static moduledata_t qlnxr_mod_info = { + .name = "qlnxr", + .evhand = qlnxr_event_handler, +}; + +MODULE_VERSION(qlnxr, 1); +MODULE_DEPEND(qlnxr, if_qlnxe, 1, 1, 1); +MODULE_DEPEND(qlnxr, ibcore, 1, 1, 1); + +#if __FreeBSD_version >= 1100000 +MODULE_DEPEND(qlnxr, linuxkpi, 1, 1, 1); +#endif /* #if __FreeBSD_version >= 1100000 */ + +DECLARE_MODULE(qlnxr, qlnxr_mod_info, SI_SUB_LAST, SI_ORDER_ANY); + diff --git a/sys/dev/qlnx/qlnxr/qlnxr_roce.h b/sys/dev/qlnx/qlnxr/qlnxr_roce.h new file mode 100644 index 000000000000..9a39cb5d18db --- /dev/null +++ b/sys/dev/qlnx/qlnxr/qlnxr_roce.h @@ -0,0 +1,675 @@ +/* + * Copyright (c) 2018-2019 Cavium, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + * + */ + +#ifndef __QLNXR_ROCE_H__ +#define __QLNXR_ROCE_H__ + + +/* + * roce completion notification queue element + */ +struct roce_cnqe { + struct regpair cq_handle; +}; + + +struct roce_cqe_responder { + struct regpair srq_wr_id; + struct regpair qp_handle; + __le32 imm_data_or_inv_r_Key; + __le32 length; + __le32 reserved0; + __le16 rq_cons; + u8 flags; +#define ROCE_CQE_RESPONDER_TOGGLE_BIT_MASK 0x1 +#define ROCE_CQE_RESPONDER_TOGGLE_BIT_SHIFT 0 +#define ROCE_CQE_RESPONDER_TYPE_MASK 0x3 +#define ROCE_CQE_RESPONDER_TYPE_SHIFT 1 +#define ROCE_CQE_RESPONDER_INV_FLG_MASK 0x1 +#define ROCE_CQE_RESPONDER_INV_FLG_SHIFT 3 +#define ROCE_CQE_RESPONDER_IMM_FLG_MASK 0x1 +#define ROCE_CQE_RESPONDER_IMM_FLG_SHIFT 4 +#define ROCE_CQE_RESPONDER_RDMA_FLG_MASK 0x1 +#define ROCE_CQE_RESPONDER_RDMA_FLG_SHIFT 5 +#define ROCE_CQE_RESPONDER_RESERVED2_MASK 0x3 +#define ROCE_CQE_RESPONDER_RESERVED2_SHIFT 6 + u8 status; +}; + +struct roce_cqe_requester { + __le16 sq_cons; + __le16 reserved0; + __le32 reserved1; + struct regpair qp_handle; + struct regpair reserved2; + __le32 reserved3; + __le16 reserved4; + u8 flags; +#define ROCE_CQE_REQUESTER_TOGGLE_BIT_MASK 0x1 +#define ROCE_CQE_REQUESTER_TOGGLE_BIT_SHIFT 0 +#define ROCE_CQE_REQUESTER_TYPE_MASK 0x3 +#define ROCE_CQE_REQUESTER_TYPE_SHIFT 1 +#define ROCE_CQE_REQUESTER_RESERVED5_MASK 0x1F +#define ROCE_CQE_REQUESTER_RESERVED5_SHIFT 3 + u8 status; +}; + +struct roce_cqe_common { + struct regpair reserved0; + struct regpair qp_handle; + __le16 reserved1[7]; + u8 flags; +#define ROCE_CQE_COMMON_TOGGLE_BIT_MASK 0x1 +#define ROCE_CQE_COMMON_TOGGLE_BIT_SHIFT 0 +#define ROCE_CQE_COMMON_TYPE_MASK 0x3 +#define ROCE_CQE_COMMON_TYPE_SHIFT 1 +#define ROCE_CQE_COMMON_RESERVED2_MASK 0x1F +#define ROCE_CQE_COMMON_RESERVED2_SHIFT 3 + u8 status; +}; + +/* + * roce completion queue element + */ +union roce_cqe { + struct roce_cqe_responder resp; + struct roce_cqe_requester req; + struct roce_cqe_common cmn; +}; + + + + +/* + * CQE requester status enumeration + */ +enum roce_cqe_requester_status_enum { + ROCE_CQE_REQ_STS_OK, + ROCE_CQE_REQ_STS_BAD_RESPONSE_ERR, + ROCE_CQE_REQ_STS_LOCAL_LENGTH_ERR, + ROCE_CQE_REQ_STS_LOCAL_QP_OPERATION_ERR, + ROCE_CQE_REQ_STS_LOCAL_PROTECTION_ERR, + ROCE_CQE_REQ_STS_MEMORY_MGT_OPERATION_ERR, + ROCE_CQE_REQ_STS_REMOTE_INVALID_REQUEST_ERR, + ROCE_CQE_REQ_STS_REMOTE_ACCESS_ERR, + ROCE_CQE_REQ_STS_REMOTE_OPERATION_ERR, + ROCE_CQE_REQ_STS_RNR_NAK_RETRY_CNT_ERR, + ROCE_CQE_REQ_STS_TRANSPORT_RETRY_CNT_ERR, + ROCE_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR, + MAX_ROCE_CQE_REQUESTER_STATUS_ENUM +}; + + + +/* + * CQE responder status enumeration + */ +enum roce_cqe_responder_status_enum { + ROCE_CQE_RESP_STS_OK, + ROCE_CQE_RESP_STS_LOCAL_ACCESS_ERR, + ROCE_CQE_RESP_STS_LOCAL_LENGTH_ERR, + ROCE_CQE_RESP_STS_LOCAL_QP_OPERATION_ERR, + ROCE_CQE_RESP_STS_LOCAL_PROTECTION_ERR, + ROCE_CQE_RESP_STS_MEMORY_MGT_OPERATION_ERR, + ROCE_CQE_RESP_STS_REMOTE_INVALID_REQUEST_ERR, + ROCE_CQE_RESP_STS_WORK_REQUEST_FLUSHED_ERR, + MAX_ROCE_CQE_RESPONDER_STATUS_ENUM +}; + + +/* + * CQE type enumeration + */ +enum roce_cqe_type { + ROCE_CQE_TYPE_REQUESTER, + ROCE_CQE_TYPE_RESPONDER_RQ, + ROCE_CQE_TYPE_RESPONDER_SRQ, + ROCE_CQE_TYPE_INVALID, + MAX_ROCE_CQE_TYPE +}; + + +/* + * memory window type enumeration + */ +enum roce_mw_type { + ROCE_MW_TYPE_1, + ROCE_MW_TYPE_2A, + MAX_ROCE_MW_TYPE +}; + + +struct roce_rq_sge { + struct regpair addr; + __le32 length; + __le32 flags; +#define ROCE_RQ_SGE_L_KEY_MASK 0x3FFFFFF +#define ROCE_RQ_SGE_L_KEY_SHIFT 0 +#define ROCE_RQ_SGE_NUM_SGES_MASK 0x7 +#define ROCE_RQ_SGE_NUM_SGES_SHIFT 26 +#define ROCE_RQ_SGE_RESERVED0_MASK 0x7 +#define ROCE_RQ_SGE_RESERVED0_SHIFT 29 +}; + + +struct roce_sq_atomic_wqe { + struct regpair remote_va; + __le32 xrc_srq; + u8 req_type; + u8 flags; +#define ROCE_SQ_ATOMIC_WQE_COMP_FLG_MASK 0x1 +#define ROCE_SQ_ATOMIC_WQE_COMP_FLG_SHIFT 0 +#define ROCE_SQ_ATOMIC_WQE_RD_FENCE_FLG_MASK 0x1 +#define ROCE_SQ_ATOMIC_WQE_RD_FENCE_FLG_SHIFT 1 +#define ROCE_SQ_ATOMIC_WQE_INV_FENCE_FLG_MASK 0x1 +#define ROCE_SQ_ATOMIC_WQE_INV_FENCE_FLG_SHIFT 2 +#define ROCE_SQ_ATOMIC_WQE_SE_FLG_MASK 0x1 +#define ROCE_SQ_ATOMIC_WQE_SE_FLG_SHIFT 3 +#define ROCE_SQ_ATOMIC_WQE_INLINE_FLG_MASK 0x1 +#define ROCE_SQ_ATOMIC_WQE_INLINE_FLG_SHIFT 4 +#define ROCE_SQ_ATOMIC_WQE_RESERVED0_MASK 0x7 +#define ROCE_SQ_ATOMIC_WQE_RESERVED0_SHIFT 5 + u8 reserved1; + u8 prev_wqe_size; + struct regpair swap_data; + __le32 r_key; + __le32 reserved2; + struct regpair cmp_data; + struct regpair reserved3; +}; + + +/* + * First element (16 bytes) of atomic wqe + */ +struct roce_sq_atomic_wqe_1st { + struct regpair remote_va; + __le32 xrc_srq; + u8 req_type; + u8 flags; +#define ROCE_SQ_ATOMIC_WQE_1ST_COMP_FLG_MASK 0x1 +#define ROCE_SQ_ATOMIC_WQE_1ST_COMP_FLG_SHIFT 0 +#define ROCE_SQ_ATOMIC_WQE_1ST_RD_FENCE_FLG_MASK 0x1 +#define ROCE_SQ_ATOMIC_WQE_1ST_RD_FENCE_FLG_SHIFT 1 +#define ROCE_SQ_ATOMIC_WQE_1ST_INV_FENCE_FLG_MASK 0x1 +#define ROCE_SQ_ATOMIC_WQE_1ST_INV_FENCE_FLG_SHIFT 2 +#define ROCE_SQ_ATOMIC_WQE_1ST_SE_FLG_MASK 0x1 +#define ROCE_SQ_ATOMIC_WQE_1ST_SE_FLG_SHIFT 3 +#define ROCE_SQ_ATOMIC_WQE_1ST_INLINE_FLG_MASK 0x1 +#define ROCE_SQ_ATOMIC_WQE_1ST_INLINE_FLG_SHIFT 4 +#define ROCE_SQ_ATOMIC_WQE_1ST_RESERVED0_MASK 0x7 +#define ROCE_SQ_ATOMIC_WQE_1ST_RESERVED0_SHIFT 5 + u8 reserved1; + u8 prev_wqe_size; +}; + + +/* + * Second element (16 bytes) of atomic wqe + */ +struct roce_sq_atomic_wqe_2nd { + struct regpair swap_data; + __le32 r_key; + __le32 reserved2; +}; + + +/* + * Third element (16 bytes) of atomic wqe + */ +struct roce_sq_atomic_wqe_3rd { + struct regpair cmp_data; + struct regpair reserved3; +}; + + +struct roce_sq_bind_wqe { + struct regpair addr; + __le32 l_key; + u8 req_type; + u8 flags; +#define ROCE_SQ_BIND_WQE_COMP_FLG_MASK 0x1 +#define ROCE_SQ_BIND_WQE_COMP_FLG_SHIFT 0 +#define ROCE_SQ_BIND_WQE_RD_FENCE_FLG_MASK 0x1 +#define ROCE_SQ_BIND_WQE_RD_FENCE_FLG_SHIFT 1 +#define ROCE_SQ_BIND_WQE_INV_FENCE_FLG_MASK 0x1 +#define ROCE_SQ_BIND_WQE_INV_FENCE_FLG_SHIFT 2 +#define ROCE_SQ_BIND_WQE_SE_FLG_MASK 0x1 +#define ROCE_SQ_BIND_WQE_SE_FLG_SHIFT 3 +#define ROCE_SQ_BIND_WQE_INLINE_FLG_MASK 0x1 +#define ROCE_SQ_BIND_WQE_INLINE_FLG_SHIFT 4 +#define ROCE_SQ_BIND_WQE_RESERVED0_MASK 0x7 +#define ROCE_SQ_BIND_WQE_RESERVED0_SHIFT 5 + u8 access_ctrl; +#define ROCE_SQ_BIND_WQE_REMOTE_READ_MASK 0x1 +#define ROCE_SQ_BIND_WQE_REMOTE_READ_SHIFT 0 +#define ROCE_SQ_BIND_WQE_REMOTE_WRITE_MASK 0x1 +#define ROCE_SQ_BIND_WQE_REMOTE_WRITE_SHIFT 1 +#define ROCE_SQ_BIND_WQE_ENABLE_ATOMIC_MASK 0x1 +#define ROCE_SQ_BIND_WQE_ENABLE_ATOMIC_SHIFT 2 +#define ROCE_SQ_BIND_WQE_LOCAL_READ_MASK 0x1 +#define ROCE_SQ_BIND_WQE_LOCAL_READ_SHIFT 3 +#define ROCE_SQ_BIND_WQE_LOCAL_WRITE_MASK 0x1 +#define ROCE_SQ_BIND_WQE_LOCAL_WRITE_SHIFT 4 +#define ROCE_SQ_BIND_WQE_RESERVED1_MASK 0x7 +#define ROCE_SQ_BIND_WQE_RESERVED1_SHIFT 5 + u8 prev_wqe_size; + u8 bind_ctrl; +#define ROCE_SQ_BIND_WQE_ZERO_BASED_MASK 0x1 +#define ROCE_SQ_BIND_WQE_ZERO_BASED_SHIFT 0 +#define ROCE_SQ_BIND_WQE_MW_TYPE_MASK 0x1 +#define ROCE_SQ_BIND_WQE_MW_TYPE_SHIFT 1 +#define ROCE_SQ_BIND_WQE_RESERVED2_MASK 0x3F +#define ROCE_SQ_BIND_WQE_RESERVED2_SHIFT 2 + u8 reserved3[2]; + u8 length_hi; + __le32 length_lo; + __le32 parent_l_key; + __le32 reserved6; +}; + + +/* + * First element (16 bytes) of bind wqe + */ +struct roce_sq_bind_wqe_1st { + struct regpair addr; + __le32 l_key; + u8 req_type; + u8 flags; +#define ROCE_SQ_BIND_WQE_1ST_COMP_FLG_MASK 0x1 +#define ROCE_SQ_BIND_WQE_1ST_COMP_FLG_SHIFT 0 +#define ROCE_SQ_BIND_WQE_1ST_RD_FENCE_FLG_MASK 0x1 +#define ROCE_SQ_BIND_WQE_1ST_RD_FENCE_FLG_SHIFT 1 +#define ROCE_SQ_BIND_WQE_1ST_INV_FENCE_FLG_MASK 0x1 +#define ROCE_SQ_BIND_WQE_1ST_INV_FENCE_FLG_SHIFT 2 +#define ROCE_SQ_BIND_WQE_1ST_SE_FLG_MASK 0x1 +#define ROCE_SQ_BIND_WQE_1ST_SE_FLG_SHIFT 3 +#define ROCE_SQ_BIND_WQE_1ST_INLINE_FLG_MASK 0x1 +#define ROCE_SQ_BIND_WQE_1ST_INLINE_FLG_SHIFT 4 +#define ROCE_SQ_BIND_WQE_1ST_RESERVED0_MASK 0x7 +#define ROCE_SQ_BIND_WQE_1ST_RESERVED0_SHIFT 5 + u8 access_ctrl; +#define ROCE_SQ_BIND_WQE_1ST_REMOTE_READ_MASK 0x1 +#define ROCE_SQ_BIND_WQE_1ST_REMOTE_READ_SHIFT 0 +#define ROCE_SQ_BIND_WQE_1ST_REMOTE_WRITE_MASK 0x1 +#define ROCE_SQ_BIND_WQE_1ST_REMOTE_WRITE_SHIFT 1 +#define ROCE_SQ_BIND_WQE_1ST_ENABLE_ATOMIC_MASK 0x1 +#define ROCE_SQ_BIND_WQE_1ST_ENABLE_ATOMIC_SHIFT 2 +#define ROCE_SQ_BIND_WQE_1ST_LOCAL_READ_MASK 0x1 +#define ROCE_SQ_BIND_WQE_1ST_LOCAL_READ_SHIFT 3 +#define ROCE_SQ_BIND_WQE_1ST_LOCAL_WRITE_MASK 0x1 +#define ROCE_SQ_BIND_WQE_1ST_LOCAL_WRITE_SHIFT 4 +#define ROCE_SQ_BIND_WQE_1ST_RESERVED1_MASK 0x7 +#define ROCE_SQ_BIND_WQE_1ST_RESERVED1_SHIFT 5 + u8 prev_wqe_size; +}; + + +/* + * Second element (16 bytes) of bind wqe + */ +struct roce_sq_bind_wqe_2nd { + u8 bind_ctrl; +#define ROCE_SQ_BIND_WQE_2ND_ZERO_BASED_MASK 0x1 +#define ROCE_SQ_BIND_WQE_2ND_ZERO_BASED_SHIFT 0 +#define ROCE_SQ_BIND_WQE_2ND_MW_TYPE_MASK 0x1 +#define ROCE_SQ_BIND_WQE_2ND_MW_TYPE_SHIFT 1 +#define ROCE_SQ_BIND_WQE_2ND_RESERVED2_MASK 0x3F +#define ROCE_SQ_BIND_WQE_2ND_RESERVED2_SHIFT 2 + u8 reserved3[2]; + u8 length_hi; + __le32 length_lo; + __le32 parent_l_key; + __le32 reserved6; +}; + + +/* + * Structure with only the SQ WQE common fields. Size is of one SQ element (16B) + */ +struct roce_sq_common_wqe { + __le32 reserved1[3]; + u8 req_type; + u8 flags; +#define ROCE_SQ_COMMON_WQE_COMP_FLG_MASK 0x1 +#define ROCE_SQ_COMMON_WQE_COMP_FLG_SHIFT 0 +#define ROCE_SQ_COMMON_WQE_RD_FENCE_FLG_MASK 0x1 +#define ROCE_SQ_COMMON_WQE_RD_FENCE_FLG_SHIFT 1 +#define ROCE_SQ_COMMON_WQE_INV_FENCE_FLG_MASK 0x1 +#define ROCE_SQ_COMMON_WQE_INV_FENCE_FLG_SHIFT 2 +#define ROCE_SQ_COMMON_WQE_SE_FLG_MASK 0x1 +#define ROCE_SQ_COMMON_WQE_SE_FLG_SHIFT 3 +#define ROCE_SQ_COMMON_WQE_INLINE_FLG_MASK 0x1 +#define ROCE_SQ_COMMON_WQE_INLINE_FLG_SHIFT 4 +#define ROCE_SQ_COMMON_WQE_RESERVED0_MASK 0x7 +#define ROCE_SQ_COMMON_WQE_RESERVED0_SHIFT 5 + u8 reserved2; + u8 prev_wqe_size; +}; + + +struct roce_sq_fmr_wqe { + struct regpair addr; + __le32 l_key; + u8 req_type; + u8 flags; +#define ROCE_SQ_FMR_WQE_COMP_FLG_MASK 0x1 +#define ROCE_SQ_FMR_WQE_COMP_FLG_SHIFT 0 +#define ROCE_SQ_FMR_WQE_RD_FENCE_FLG_MASK 0x1 +#define ROCE_SQ_FMR_WQE_RD_FENCE_FLG_SHIFT 1 +#define ROCE_SQ_FMR_WQE_INV_FENCE_FLG_MASK 0x1 +#define ROCE_SQ_FMR_WQE_INV_FENCE_FLG_SHIFT 2 +#define ROCE_SQ_FMR_WQE_SE_FLG_MASK 0x1 +#define ROCE_SQ_FMR_WQE_SE_FLG_SHIFT 3 +#define ROCE_SQ_FMR_WQE_INLINE_FLG_MASK 0x1 +#define ROCE_SQ_FMR_WQE_INLINE_FLG_SHIFT 4 +#define ROCE_SQ_FMR_WQE_RESERVED0_MASK 0x7 +#define ROCE_SQ_FMR_WQE_RESERVED0_SHIFT 5 + u8 access_ctrl; +#define ROCE_SQ_FMR_WQE_REMOTE_READ_MASK 0x1 +#define ROCE_SQ_FMR_WQE_REMOTE_READ_SHIFT 0 +#define ROCE_SQ_FMR_WQE_REMOTE_WRITE_MASK 0x1 +#define ROCE_SQ_FMR_WQE_REMOTE_WRITE_SHIFT 1 +#define ROCE_SQ_FMR_WQE_ENABLE_ATOMIC_MASK 0x1 +#define ROCE_SQ_FMR_WQE_ENABLE_ATOMIC_SHIFT 2 +#define ROCE_SQ_FMR_WQE_LOCAL_READ_MASK 0x1 +#define ROCE_SQ_FMR_WQE_LOCAL_READ_SHIFT 3 +#define ROCE_SQ_FMR_WQE_LOCAL_WRITE_MASK 0x1 +#define ROCE_SQ_FMR_WQE_LOCAL_WRITE_SHIFT 4 +#define ROCE_SQ_FMR_WQE_RESERVED1_MASK 0x7 +#define ROCE_SQ_FMR_WQE_RESERVED1_SHIFT 5 + u8 prev_wqe_size; + u8 fmr_ctrl; +#define ROCE_SQ_FMR_WQE_PAGE_SIZE_LOG_MASK 0x1F +#define ROCE_SQ_FMR_WQE_PAGE_SIZE_LOG_SHIFT 0 +#define ROCE_SQ_FMR_WQE_ZERO_BASED_MASK 0x1 +#define ROCE_SQ_FMR_WQE_ZERO_BASED_SHIFT 5 +#define ROCE_SQ_FMR_WQE_BIND_EN_MASK 0x1 +#define ROCE_SQ_FMR_WQE_BIND_EN_SHIFT 6 +#define ROCE_SQ_FMR_WQE_RESERVED2_MASK 0x1 +#define ROCE_SQ_FMR_WQE_RESERVED2_SHIFT 7 + u8 reserved3[2]; + u8 length_hi; + __le32 length_lo; + struct regpair pbl_addr; +}; + + +/* + * First element (16 bytes) of fmr wqe + */ +struct roce_sq_fmr_wqe_1st { + struct regpair addr; + __le32 l_key; + u8 req_type; + u8 flags; +#define ROCE_SQ_FMR_WQE_1ST_COMP_FLG_MASK 0x1 +#define ROCE_SQ_FMR_WQE_1ST_COMP_FLG_SHIFT 0 +#define ROCE_SQ_FMR_WQE_1ST_RD_FENCE_FLG_MASK 0x1 +#define ROCE_SQ_FMR_WQE_1ST_RD_FENCE_FLG_SHIFT 1 +#define ROCE_SQ_FMR_WQE_1ST_INV_FENCE_FLG_MASK 0x1 +#define ROCE_SQ_FMR_WQE_1ST_INV_FENCE_FLG_SHIFT 2 +#define ROCE_SQ_FMR_WQE_1ST_SE_FLG_MASK 0x1 +#define ROCE_SQ_FMR_WQE_1ST_SE_FLG_SHIFT 3 +#define ROCE_SQ_FMR_WQE_1ST_INLINE_FLG_MASK 0x1 +#define ROCE_SQ_FMR_WQE_1ST_INLINE_FLG_SHIFT 4 +#define ROCE_SQ_FMR_WQE_1ST_RESERVED0_MASK 0x7 +#define ROCE_SQ_FMR_WQE_1ST_RESERVED0_SHIFT 5 + u8 access_ctrl; +#define ROCE_SQ_FMR_WQE_1ST_REMOTE_READ_MASK 0x1 +#define ROCE_SQ_FMR_WQE_1ST_REMOTE_READ_SHIFT 0 +#define ROCE_SQ_FMR_WQE_1ST_REMOTE_WRITE_MASK 0x1 +#define ROCE_SQ_FMR_WQE_1ST_REMOTE_WRITE_SHIFT 1 +#define ROCE_SQ_FMR_WQE_1ST_ENABLE_ATOMIC_MASK 0x1 +#define ROCE_SQ_FMR_WQE_1ST_ENABLE_ATOMIC_SHIFT 2 +#define ROCE_SQ_FMR_WQE_1ST_LOCAL_READ_MASK 0x1 +#define ROCE_SQ_FMR_WQE_1ST_LOCAL_READ_SHIFT 3 +#define ROCE_SQ_FMR_WQE_1ST_LOCAL_WRITE_MASK 0x1 +#define ROCE_SQ_FMR_WQE_1ST_LOCAL_WRITE_SHIFT 4 +#define ROCE_SQ_FMR_WQE_1ST_RESERVED1_MASK 0x7 +#define ROCE_SQ_FMR_WQE_1ST_RESERVED1_SHIFT 5 + u8 prev_wqe_size; +}; + + +/* + * Second element (16 bytes) of fmr wqe + */ +struct roce_sq_fmr_wqe_2nd { + u8 fmr_ctrl; +#define ROCE_SQ_FMR_WQE_2ND_PAGE_SIZE_LOG_MASK 0x1F +#define ROCE_SQ_FMR_WQE_2ND_PAGE_SIZE_LOG_SHIFT 0 +#define ROCE_SQ_FMR_WQE_2ND_ZERO_BASED_MASK 0x1 +#define ROCE_SQ_FMR_WQE_2ND_ZERO_BASED_SHIFT 5 +#define ROCE_SQ_FMR_WQE_2ND_BIND_EN_MASK 0x1 +#define ROCE_SQ_FMR_WQE_2ND_BIND_EN_SHIFT 6 +#define ROCE_SQ_FMR_WQE_2ND_RESERVED2_MASK 0x1 +#define ROCE_SQ_FMR_WQE_2ND_RESERVED2_SHIFT 7 + u8 reserved3[2]; + u8 length_hi; + __le32 length_lo; + struct regpair pbl_addr; +}; + + +struct roce_sq_local_inv_wqe { + struct regpair reserved; + __le32 inv_l_key; + u8 req_type; + u8 flags; +#define ROCE_SQ_LOCAL_INV_WQE_COMP_FLG_MASK 0x1 +#define ROCE_SQ_LOCAL_INV_WQE_COMP_FLG_SHIFT 0 +#define ROCE_SQ_LOCAL_INV_WQE_RD_FENCE_FLG_MASK 0x1 +#define ROCE_SQ_LOCAL_INV_WQE_RD_FENCE_FLG_SHIFT 1 +#define ROCE_SQ_LOCAL_INV_WQE_INV_FENCE_FLG_MASK 0x1 +#define ROCE_SQ_LOCAL_INV_WQE_INV_FENCE_FLG_SHIFT 2 +#define ROCE_SQ_LOCAL_INV_WQE_SE_FLG_MASK 0x1 +#define ROCE_SQ_LOCAL_INV_WQE_SE_FLG_SHIFT 3 +#define ROCE_SQ_LOCAL_INV_WQE_INLINE_FLG_MASK 0x1 +#define ROCE_SQ_LOCAL_INV_WQE_INLINE_FLG_SHIFT 4 +#define ROCE_SQ_LOCAL_INV_WQE_RESERVED0_MASK 0x7 +#define ROCE_SQ_LOCAL_INV_WQE_RESERVED0_SHIFT 5 + u8 reserved1; + u8 prev_wqe_size; +}; + + +struct roce_sq_rdma_wqe { + __le32 imm_data; + __le32 length; + __le32 xrc_srq; + u8 req_type; + u8 flags; +#define ROCE_SQ_RDMA_WQE_COMP_FLG_MASK 0x1 +#define ROCE_SQ_RDMA_WQE_COMP_FLG_SHIFT 0 +#define ROCE_SQ_RDMA_WQE_RD_FENCE_FLG_MASK 0x1 +#define ROCE_SQ_RDMA_WQE_RD_FENCE_FLG_SHIFT 1 +#define ROCE_SQ_RDMA_WQE_INV_FENCE_FLG_MASK 0x1 +#define ROCE_SQ_RDMA_WQE_INV_FENCE_FLG_SHIFT 2 +#define ROCE_SQ_RDMA_WQE_SE_FLG_MASK 0x1 +#define ROCE_SQ_RDMA_WQE_SE_FLG_SHIFT 3 +#define ROCE_SQ_RDMA_WQE_INLINE_FLG_MASK 0x1 +#define ROCE_SQ_RDMA_WQE_INLINE_FLG_SHIFT 4 +#define ROCE_SQ_RDMA_WQE_RESERVED0_MASK 0x7 +#define ROCE_SQ_RDMA_WQE_RESERVED0_SHIFT 5 + u8 wqe_size; + u8 prev_wqe_size; + struct regpair remote_va; + __le32 r_key; + __le32 reserved1; +}; + + +/* + * First element (16 bytes) of rdma wqe + */ +struct roce_sq_rdma_wqe_1st { + __le32 imm_data; + __le32 length; + __le32 xrc_srq; + u8 req_type; + u8 flags; +#define ROCE_SQ_RDMA_WQE_1ST_COMP_FLG_MASK 0x1 +#define ROCE_SQ_RDMA_WQE_1ST_COMP_FLG_SHIFT 0 +#define ROCE_SQ_RDMA_WQE_1ST_RD_FENCE_FLG_MASK 0x1 +#define ROCE_SQ_RDMA_WQE_1ST_RD_FENCE_FLG_SHIFT 1 +#define ROCE_SQ_RDMA_WQE_1ST_INV_FENCE_FLG_MASK 0x1 +#define ROCE_SQ_RDMA_WQE_1ST_INV_FENCE_FLG_SHIFT 2 +#define ROCE_SQ_RDMA_WQE_1ST_SE_FLG_MASK 0x1 +#define ROCE_SQ_RDMA_WQE_1ST_SE_FLG_SHIFT 3 +#define ROCE_SQ_RDMA_WQE_1ST_INLINE_FLG_MASK 0x1 +#define ROCE_SQ_RDMA_WQE_1ST_INLINE_FLG_SHIFT 4 +#define ROCE_SQ_RDMA_WQE_1ST_RESERVED0_MASK 0x7 +#define ROCE_SQ_RDMA_WQE_1ST_RESERVED0_SHIFT 5 + u8 wqe_size; + u8 prev_wqe_size; +}; + + +/* + * Second element (16 bytes) of rdma wqe + */ +struct roce_sq_rdma_wqe_2nd { + struct regpair remote_va; + __le32 r_key; + __le32 reserved1; +}; + + +/* + * SQ WQE req type enumeration + */ +enum roce_sq_req_type { + ROCE_SQ_REQ_TYPE_SEND, + ROCE_SQ_REQ_TYPE_SEND_WITH_IMM, + ROCE_SQ_REQ_TYPE_SEND_WITH_INVALIDATE, + ROCE_SQ_REQ_TYPE_RDMA_WR, + ROCE_SQ_REQ_TYPE_RDMA_WR_WITH_IMM, + ROCE_SQ_REQ_TYPE_RDMA_RD, + ROCE_SQ_REQ_TYPE_ATOMIC_CMP_AND_SWAP, + ROCE_SQ_REQ_TYPE_ATOMIC_ADD, + ROCE_SQ_REQ_TYPE_LOCAL_INVALIDATE, + ROCE_SQ_REQ_TYPE_FAST_MR, + ROCE_SQ_REQ_TYPE_BIND, + ROCE_SQ_REQ_TYPE_INVALID, + MAX_ROCE_SQ_REQ_TYPE +}; + + +struct roce_sq_send_wqe { + __le32 inv_key_or_imm_data; + __le32 length; + __le32 xrc_srq; + u8 req_type; + u8 flags; +#define ROCE_SQ_SEND_WQE_COMP_FLG_MASK 0x1 +#define ROCE_SQ_SEND_WQE_COMP_FLG_SHIFT 0 +#define ROCE_SQ_SEND_WQE_RD_FENCE_FLG_MASK 0x1 +#define ROCE_SQ_SEND_WQE_RD_FENCE_FLG_SHIFT 1 +#define ROCE_SQ_SEND_WQE_INV_FENCE_FLG_MASK 0x1 +#define ROCE_SQ_SEND_WQE_INV_FENCE_FLG_SHIFT 2 +#define ROCE_SQ_SEND_WQE_SE_FLG_MASK 0x1 +#define ROCE_SQ_SEND_WQE_SE_FLG_SHIFT 3 +#define ROCE_SQ_SEND_WQE_INLINE_FLG_MASK 0x1 +#define ROCE_SQ_SEND_WQE_INLINE_FLG_SHIFT 4 +#define ROCE_SQ_SEND_WQE_RESERVED0_MASK 0x7 +#define ROCE_SQ_SEND_WQE_RESERVED0_SHIFT 5 + u8 wqe_size; + u8 prev_wqe_size; +}; + + +struct roce_sq_sge { + __le32 length; + struct regpair addr; + __le32 l_key; +}; + + +struct roce_srq_prod { + __le16 prod; +}; + + +struct roce_srq_sge { + struct regpair addr; + __le32 length; + __le32 l_key; + struct regpair wr_id; + u8 flags; +#define ROCE_SRQ_SGE_NUM_SGES_MASK 0x3 +#define ROCE_SRQ_SGE_NUM_SGES_SHIFT 0 +#define ROCE_SRQ_SGE_RESERVED0_MASK 0x3F +#define ROCE_SRQ_SGE_RESERVED0_SHIFT 2 + u8 reserved1; + __le16 reserved2; + __le32 reserved3; +}; + + +/* + * RoCE doorbell data for SQ and RQ + */ +struct roce_pwm_val16_data { + __le16 icid; + __le16 prod_val; +}; + + +union roce_pwm_val16_data_union { + struct roce_pwm_val16_data as_struct; + __le32 as_dword; +}; + + +/* + * RoCE doorbell data for CQ + */ +struct roce_pwm_val32_data { + __le16 icid; + u8 agg_flags; + u8 params; +#define ROCE_PWM_VAL32_DATA_AGG_CMD_MASK 0x3 +#define ROCE_PWM_VAL32_DATA_AGG_CMD_SHIFT 0 +#define ROCE_PWM_VAL32_DATA_BYPASS_EN_MASK 0x1 +#define ROCE_PWM_VAL32_DATA_BYPASS_EN_SHIFT 2 +#define ROCE_PWM_VAL32_DATA_RESERVED_MASK 0x1F +#define ROCE_PWM_VAL32_DATA_RESERVED_SHIFT 3 + __le32 cq_cons_val; +}; + + +union roce_pwm_val32_data_union { + struct roce_pwm_val32_data as_struct; + struct regpair as_repair; +}; + +#endif /* __QLNXR_ROCE_H__ */ diff --git a/sys/dev/qlnx/qlnxr/qlnxr_user.h b/sys/dev/qlnx/qlnxr/qlnxr_user.h new file mode 100644 index 000000000000..ac6755188468 --- /dev/null +++ b/sys/dev/qlnx/qlnxr/qlnxr_user.h @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2018-2019 Cavium, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + * + */ + +#ifndef __QLNXR_USER_H__ +#define __QLNXR_USER_H__ + +#define QLNXR_ABI_VERSION (7) +#define QLNXR_BE_ROCE_ABI_VERSION (1) + +/* user kernel communication data structures. */ + +struct qlnxr_alloc_ucontext_resp { + u64 db_pa; + u32 db_size; + + uint32_t max_send_wr; + uint32_t max_recv_wr; + uint32_t max_srq_wr; + uint32_t sges_per_send_wr; + uint32_t sges_per_recv_wr; + uint32_t sges_per_srq_wr; + int max_cqes; + uint8_t dpm_enabled; + uint8_t wids_enabled; + uint16_t wid_count; +}; + +struct qlnxr_alloc_pd_ureq { + u64 rsvd1; +}; + +struct qlnxr_alloc_pd_uresp { + u32 pd_id; +}; + +struct qlnxr_create_cq_ureq { + uint64_t addr; /* user space virtual address of CQ buffer */ + size_t len; /* size of CQ buffer */ +}; + +struct qlnxr_create_cq_uresp { + u32 db_offset; + u16 icid; +}; + +struct qlnxr_create_qp_ureq { + u32 qp_handle_hi; + u32 qp_handle_lo; + + /* SQ */ + uint64_t sq_addr; /* user space virtual address of SQ buffer */ + size_t sq_len; /* length of SQ buffer */ + + /* RQ */ + uint64_t rq_addr; /* user space virtual address of RQ buffer */ + size_t rq_len; /* length of RQ buffer */ +}; + +struct qlnxr_create_qp_uresp { + u32 qp_id; + int atomic_supported; + + /* SQ*/ + u32 sq_db_offset; + u16 sq_icid; + + /* RQ */ + u32 rq_db_offset; + u16 rq_icid; + + u32 rq_db2_offset; +}; + +struct qlnxr_create_srq_ureq { + /* user space virtual address of producer pair */ + uint64_t prod_pair_addr; + uint64_t srq_addr; /* user space virtual address of SQ buffer */ + size_t srq_len; /* length of SQ buffer */ +}; + +struct qlnxr_create_srq_uresp { + u16 srq_id; +}; + +#endif /* #ifndef __QLNXR_USER_H__ */ diff --git a/sys/dev/qlnx/qlnxr/qlnxr_verbs.c b/sys/dev/qlnx/qlnxr/qlnxr_verbs.c new file mode 100644 index 000000000000..7f551647101b --- /dev/null +++ b/sys/dev/qlnx/qlnxr/qlnxr_verbs.c @@ -0,0 +1,7306 @@ +/* + * Copyright (c) 2018-2019 Cavium, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + + +/* + * File: qlnxr_verbs.c + */ +#include +__FBSDID("$FreeBSD$"); + +#include "qlnxr_def.h" +#include "rdma_common.h" +#include "qlnxr_roce.h" +#include "qlnxr_cm.h" + +#define upper_32_bits(x) (uint32_t)(x >> 32) +#define lower_32_bits(x) (uint32_t)(x) +#define HILO_U64(hi, lo) ((((u64)(hi)) << 32) + (lo)) + +#define TYPEPTR_ADDR_SET(type_ptr, field, vaddr) \ + do { \ + (type_ptr)->field.hi = cpu_to_le32(upper_32_bits(vaddr));\ + (type_ptr)->field.lo = cpu_to_le32(lower_32_bits(vaddr));\ + } while (0) + + +#define RQ_SGE_SET(sge, vaddr, vlength, vflags) \ + do { \ + TYPEPTR_ADDR_SET(sge, addr, vaddr); \ + (sge)->length = cpu_to_le32(vlength); \ + (sge)->flags = cpu_to_le32(vflags); \ + } while (0) + +#define SRQ_HDR_SET(hdr, vwr_id, num_sge) \ + do { \ + TYPEPTR_ADDR_SET(hdr, wr_id, vwr_id); \ + (hdr)->num_sges = num_sge; \ + } while (0) + +#define SRQ_SGE_SET(sge, vaddr, vlength, vlkey) \ + do { \ + TYPEPTR_ADDR_SET(sge, addr, vaddr); \ + (sge)->length = cpu_to_le32(vlength); \ + (sge)->l_key = cpu_to_le32(vlkey); \ + } while (0) + +#define NIPQUAD(addr) \ + ((unsigned char *)&addr)[0], \ + ((unsigned char *)&addr)[1], \ + ((unsigned char *)&addr)[2], \ + ((unsigned char *)&addr)[3] + +struct ib_srq *qlnxr_create_srq(struct ib_pd *, + struct ib_srq_init_attr *, + struct ib_udata *); + +int qlnxr_destroy_srq(struct ib_srq *); + +int qlnxr_modify_srq(struct ib_srq *, + struct ib_srq_attr *, + enum ib_srq_attr_mask, + struct ib_udata *); +static int +qlnxr_check_srq_params(struct ib_pd *ibpd, + struct qlnxr_dev *dev, + struct ib_srq_init_attr *attrs); + +static int +qlnxr_init_srq_user_params(struct ib_ucontext *ib_ctx, + struct qlnxr_srq *srq, + struct qlnxr_create_srq_ureq *ureq, + int access, int dmasync); + +static int +qlnxr_alloc_srq_kernel_params(struct qlnxr_srq *srq, + struct qlnxr_dev *dev, + struct ib_srq_init_attr *init_attr); + +extern enum _ecore_status_t +ecore_rdma_modify_srq(void *rdma_cxt, + struct ecore_rdma_modify_srq_in_params *in_params); + +extern enum _ecore_status_t +ecore_rdma_destroy_srq(void *rdma_cxt, + struct ecore_rdma_destroy_srq_in_params *in_params); + +extern enum _ecore_status_t +ecore_rdma_create_srq(void *rdma_cxt, + struct ecore_rdma_create_srq_in_params *in_params, + struct ecore_rdma_create_srq_out_params *out_params); + + +static int +qlnxr_copy_srq_uresp(struct qlnxr_dev *dev, + struct qlnxr_srq *srq, + struct ib_udata *udata); + +static void +qlnxr_free_srq_user_params(struct qlnxr_srq *srq); + +static void +qlnxr_free_srq_kernel_params(struct qlnxr_srq *srq); + + +static u32 +qlnxr_srq_elem_left(struct qlnxr_srq_hwq_info *hw_srq); + +int +qlnxr_iw_query_gid(struct ib_device *ibdev, u8 port, int index, + union ib_gid *sgid) +{ + struct qlnxr_dev *dev; + qlnx_host_t *ha; + + dev = get_qlnxr_dev(ibdev); + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + memset(sgid->raw, 0, sizeof(sgid->raw)); + + memcpy(sgid->raw, dev->ha->primary_mac, sizeof (dev->ha->primary_mac)); + + QL_DPRINT12(ha, "exit\n"); + + return 0; +} + +int +qlnxr_query_gid(struct ib_device *ibdev, u8 port, int index, + union ib_gid *sgid) +{ + struct qlnxr_dev *dev; + qlnx_host_t *ha; + + dev = get_qlnxr_dev(ibdev); + ha = dev->ha; + QL_DPRINT12(ha, "enter index: %d\n", index); +#if 0 + int ret = 0; + /* @@@: if DEFINE_ROCE_GID_TABLE to be used here */ + //if (!rdma_cap_roce_gid_table(ibdev, port)) { + if (!(rdma_protocol_roce(ibdev, port) && + ibdev->add_gid && ibdev->del_gid)) { + QL_DPRINT11(ha, "acquire gid failed\n"); + return -ENODEV; + } + + ret = ib_get_cached_gid(ibdev, port, index, sgid, NULL); + if (ret == -EAGAIN) { + memcpy(sgid, &zgid, sizeof(*sgid)); + return 0; + } +#endif + if ((index >= QLNXR_MAX_SGID) || (index < 0)) { + QL_DPRINT12(ha, "invalid gid index %d\n", index); + memset(sgid, 0, sizeof(*sgid)); + return -EINVAL; + } + memcpy(sgid, &dev->sgid_tbl[index], sizeof(*sgid)); + + QL_DPRINT12(ha, "exit : %p\n", sgid); + + return 0; +} + +struct ib_srq * +qlnxr_create_srq(struct ib_pd *ibpd, struct ib_srq_init_attr *init_attr, + struct ib_udata *udata) +{ + struct qlnxr_dev *dev; + qlnx_host_t *ha; + struct ecore_rdma_destroy_srq_in_params destroy_in_params; + struct ecore_rdma_create_srq_out_params out_params; + struct ecore_rdma_create_srq_in_params in_params; + u64 pbl_base_addr, phy_prod_pair_addr; + struct qlnxr_pd *pd = get_qlnxr_pd(ibpd); + struct ib_ucontext *ib_ctx = NULL; + struct qlnxr_srq_hwq_info *hw_srq; + struct qlnxr_ucontext *ctx = NULL; + struct qlnxr_create_srq_ureq ureq; + u32 page_cnt, page_size; + struct qlnxr_srq *srq; + int ret = 0; + + dev = get_qlnxr_dev((ibpd->device)); + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + ret = qlnxr_check_srq_params(ibpd, dev, init_attr); + + srq = kzalloc(sizeof(*srq), GFP_KERNEL); + if (!srq) { + QL_DPRINT11(ha, "cannot allocate memory for srq\n"); + return NULL; //@@@ : TODO what to return here? + } + + srq->dev = dev; + hw_srq = &srq->hw_srq; + spin_lock_init(&srq->lock); + memset(&in_params, 0, sizeof(in_params)); + + if (udata && ibpd->uobject && ibpd->uobject->context) { + ib_ctx = ibpd->uobject->context; + ctx = get_qlnxr_ucontext(ib_ctx); + + memset(&ureq, 0, sizeof(ureq)); + if (ib_copy_from_udata(&ureq, udata, min(sizeof(ureq), + udata->inlen))) { + QL_DPRINT11(ha, "problem" + " copying data from user space\n"); + goto err0; + } + + ret = qlnxr_init_srq_user_params(ib_ctx, srq, &ureq, 0, 0); + if (ret) + goto err0; + + page_cnt = srq->usrq.pbl_info.num_pbes; + pbl_base_addr = srq->usrq.pbl_tbl->pa; + phy_prod_pair_addr = hw_srq->phy_prod_pair_addr; + // @@@ : if DEFINE_IB_UMEM_PAGE_SHIFT + // page_size = BIT(srq->usrq.umem->page_shift); + // else + page_size = srq->usrq.umem->page_size; + } else { + struct ecore_chain *pbl; + ret = qlnxr_alloc_srq_kernel_params(srq, dev, init_attr); + if (ret) + goto err0; + pbl = &hw_srq->pbl; + + page_cnt = ecore_chain_get_page_cnt(pbl); + pbl_base_addr = ecore_chain_get_pbl_phys(pbl); + phy_prod_pair_addr = hw_srq->phy_prod_pair_addr; + page_size = pbl->elem_per_page << 4; + } + + in_params.pd_id = pd->pd_id; + in_params.pbl_base_addr = pbl_base_addr; + in_params.prod_pair_addr = phy_prod_pair_addr; + in_params.num_pages = page_cnt; + in_params.page_size = page_size; + + ret = ecore_rdma_create_srq(dev->rdma_ctx, &in_params, &out_params); + if (ret) + goto err1; + + srq->srq_id = out_params.srq_id; + + if (udata) { + ret = qlnxr_copy_srq_uresp(dev, srq, udata); + if (ret) + goto err2; + } + + QL_DPRINT12(ha, "created srq with srq_id = 0x%0x\n", srq->srq_id); + return &srq->ibsrq; +err2: + memset(&in_params, 0, sizeof(in_params)); + destroy_in_params.srq_id = srq->srq_id; + ecore_rdma_destroy_srq(dev->rdma_ctx, &destroy_in_params); + +err1: + if (udata) + qlnxr_free_srq_user_params(srq); + else + qlnxr_free_srq_kernel_params(srq); + +err0: + kfree(srq); + return ERR_PTR(-EFAULT); +} + +int +qlnxr_destroy_srq(struct ib_srq *ibsrq) +{ + struct qlnxr_dev *dev; + struct qlnxr_srq *srq; + qlnx_host_t *ha; + struct ecore_rdma_destroy_srq_in_params in_params; + + srq = get_qlnxr_srq(ibsrq); + dev = srq->dev; + ha = dev->ha; + + memset(&in_params, 0, sizeof(in_params)); + in_params.srq_id = srq->srq_id; + + ecore_rdma_destroy_srq(dev->rdma_ctx, &in_params); + + if (ibsrq->pd->uobject && ibsrq->pd->uobject->context) + qlnxr_free_srq_user_params(srq); + else + qlnxr_free_srq_kernel_params(srq); + + QL_DPRINT12(ha, "destroyed srq_id=0x%0x\n", srq->srq_id); + kfree(srq); + return 0; +} + +int +qlnxr_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, + enum ib_srq_attr_mask attr_mask, struct ib_udata *udata) +{ + struct qlnxr_dev *dev; + struct qlnxr_srq *srq; + qlnx_host_t *ha; + struct ecore_rdma_modify_srq_in_params in_params; + int ret = 0; + + srq = get_qlnxr_srq(ibsrq); + dev = srq->dev; + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + if (attr_mask & IB_SRQ_MAX_WR) { + QL_DPRINT12(ha, "invalid attribute mask=0x%x" + " specified for %p\n", attr_mask, srq); + return -EINVAL; + } + + if (attr_mask & IB_SRQ_LIMIT) { + if (attr->srq_limit >= srq->hw_srq.max_wr) { + QL_DPRINT12(ha, "invalid srq_limit=0x%x" + " (max_srq_limit = 0x%x)\n", + attr->srq_limit, srq->hw_srq.max_wr); + return -EINVAL; + } + memset(&in_params, 0, sizeof(in_params)); + in_params.srq_id = srq->srq_id; + in_params.wqe_limit = attr->srq_limit; + ret = ecore_rdma_modify_srq(dev->rdma_ctx, &in_params); + if (ret) + return ret; + } + + QL_DPRINT12(ha, "modified srq with srq_id = 0x%0x\n", srq->srq_id); + return 0; +} + +int +qlnxr_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr) +{ + struct qlnxr_dev *dev; + struct qlnxr_srq *srq; + qlnx_host_t *ha; + struct ecore_rdma_device *qattr; + srq = get_qlnxr_srq(ibsrq); + dev = srq->dev; + ha = dev->ha; + //qattr = &dev->attr; + qattr = ecore_rdma_query_device(dev->rdma_ctx); + QL_DPRINT12(ha, "enter\n"); + + if (!dev->rdma_ctx) { + QL_DPRINT12(ha, "called with invalid params" + " rdma_ctx is NULL\n"); + return -EINVAL; + } + + srq_attr->srq_limit = qattr->max_srq; + srq_attr->max_wr = qattr->max_srq_wr; + srq_attr->max_sge = qattr->max_sge; + + QL_DPRINT12(ha, "exit\n"); + return 0; +} + +/* Increment srq wr producer by one */ +static +void qlnxr_inc_srq_wr_prod (struct qlnxr_srq_hwq_info *info) +{ + info->wr_prod_cnt++; +} + +/* Increment srq wr consumer by one */ +static +void qlnxr_inc_srq_wr_cons(struct qlnxr_srq_hwq_info *info) +{ + info->wr_cons_cnt++; +} + +/* get_port_immutable verb is not available in FreeBSD */ +#if 0 +int +qlnxr_roce_port_immutable(struct ib_device *ibdev, u8 port_num, + struct ib_port_immutable *immutable) +{ + struct qlnxr_dev *dev; + qlnx_host_t *ha; + dev = get_qlnxr_dev(ibdev); + ha = dev->ha; + + QL_DPRINT12(ha, "entered but not implemented!!!\n"); +} +#endif + +int +qlnxr_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr) +{ + struct qlnxr_dev *dev; + struct qlnxr_srq *srq; + qlnx_host_t *ha; + struct qlnxr_srq_hwq_info *hw_srq; + struct ecore_chain *pbl; + unsigned long flags; + int status = 0; + u32 num_sge, offset; + + srq = get_qlnxr_srq(ibsrq); + dev = srq->dev; + ha = dev->ha; + hw_srq = &srq->hw_srq; + + QL_DPRINT12(ha, "enter\n"); + spin_lock_irqsave(&srq->lock, flags); + + pbl = &srq->hw_srq.pbl; + while (wr) { + struct rdma_srq_wqe_header *hdr; + int i; + + if (!qlnxr_srq_elem_left(hw_srq) || + wr->num_sge > srq->hw_srq.max_sges) { + QL_DPRINT11(ha, "WR cannot be posted" + " (%d, %d) || (%d > %d)\n", + hw_srq->wr_prod_cnt, hw_srq->wr_cons_cnt, + wr->num_sge, srq->hw_srq.max_sges); + status = -ENOMEM; + *bad_wr = wr; + break; + } + + hdr = ecore_chain_produce(pbl); + num_sge = wr->num_sge; + /* Set number of sge and WR id in header */ + SRQ_HDR_SET(hdr, wr->wr_id, num_sge); + + /* PBL is maintained in case of WR granularity. + * So increment WR producer in case we post a WR. + */ + qlnxr_inc_srq_wr_prod(hw_srq); + hw_srq->wqe_prod++; + hw_srq->sge_prod++; + + QL_DPRINT12(ha, "SRQ WR : SGEs: %d with wr_id[%d] = %llx\n", + wr->num_sge, hw_srq->wqe_prod, wr->wr_id); + + for (i = 0; i < wr->num_sge; i++) { + struct rdma_srq_sge *srq_sge = + ecore_chain_produce(pbl); + /* Set SGE length, lkey and address */ + SRQ_SGE_SET(srq_sge, wr->sg_list[i].addr, + wr->sg_list[i].length, wr->sg_list[i].lkey); + + QL_DPRINT12(ha, "[%d]: len %d, key %x, addr %x:%x\n", + i, srq_sge->length, srq_sge->l_key, + srq_sge->addr.hi, srq_sge->addr.lo); + hw_srq->sge_prod++; + } + wmb(); + /* + * SRQ prod is 8 bytes. Need to update SGE prod in index + * in first 4 bytes and need to update WQE prod in next + * 4 bytes. + */ + *(srq->hw_srq.virt_prod_pair_addr) = hw_srq->sge_prod; + offset = offsetof(struct rdma_srq_producers, wqe_prod); + *((u8 *)srq->hw_srq.virt_prod_pair_addr + offset) = + hw_srq->wqe_prod; + /* Flush prod after updating it */ + wmb(); + wr = wr->next; + } + + QL_DPRINT12(ha, "Elements in SRQ: %d\n", + ecore_chain_get_elem_left(pbl)); + + spin_unlock_irqrestore(&srq->lock, flags); + QL_DPRINT12(ha, "exit\n"); + return status; +} + +int +#if __FreeBSD_version < 1102000 +qlnxr_query_device(struct ib_device *ibdev, struct ib_device_attr *attr) +#else +qlnxr_query_device(struct ib_device *ibdev, struct ib_device_attr *attr, + struct ib_udata *udata) +#endif /* #if __FreeBSD_version < 1102000 */ + +{ + struct qlnxr_dev *dev; + struct ecore_rdma_device *qattr; + qlnx_host_t *ha; + + dev = get_qlnxr_dev(ibdev); + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + +#if __FreeBSD_version > 1102000 + if (udata->inlen || udata->outlen) + return -EINVAL; +#endif /* #if __FreeBSD_version > 1102000 */ + + if (dev->rdma_ctx == NULL) { + return -EINVAL; + } + + qattr = ecore_rdma_query_device(dev->rdma_ctx); + + memset(attr, 0, sizeof *attr); + + attr->fw_ver = qattr->fw_ver; + attr->sys_image_guid = qattr->sys_image_guid; + attr->max_mr_size = qattr->max_mr_size; + attr->page_size_cap = qattr->page_size_caps; + attr->vendor_id = qattr->vendor_id; + attr->vendor_part_id = qattr->vendor_part_id; + attr->hw_ver = qattr->hw_ver; + attr->max_qp = qattr->max_qp; + attr->device_cap_flags = IB_DEVICE_CURR_QP_STATE_MOD | + IB_DEVICE_RC_RNR_NAK_GEN | + IB_DEVICE_LOCAL_DMA_LKEY | + IB_DEVICE_MEM_MGT_EXTENSIONS; + + attr->max_sge = qattr->max_sge; + attr->max_sge_rd = qattr->max_sge; + attr->max_cq = qattr->max_cq; + attr->max_cqe = qattr->max_cqe; + attr->max_mr = qattr->max_mr; + attr->max_mw = qattr->max_mw; + attr->max_pd = qattr->max_pd; + attr->atomic_cap = dev->atomic_cap; + attr->max_fmr = qattr->max_fmr; + attr->max_map_per_fmr = 16; /* TBD: FMR */ + + /* There is an implicit assumption in some of the ib_xxx apps that the + * qp_rd_atom is smaller than the qp_init_rd_atom. Specifically, in + * communication the qp_rd_atom is passed to the other side and used as + * init_rd_atom without check device capabilities for init_rd_atom. + * for this reason, we set the qp_rd_atom to be the minimum between the + * two...There is an additional assumption in mlx4 driver that the + * values are power of two, fls is performed on the value - 1, which + * in fact gives a larger power of two for values which are not a power + * of two. This should be fixed in mlx4 driver, but until then -> + * we provide a value that is a power of two in our code. + */ + attr->max_qp_init_rd_atom = + 1 << (fls(qattr->max_qp_req_rd_atomic_resc) - 1); + attr->max_qp_rd_atom = + min(1 << (fls(qattr->max_qp_resp_rd_atomic_resc) - 1), + attr->max_qp_init_rd_atom); + + attr->max_srq = qattr->max_srq; + attr->max_srq_sge = qattr->max_srq_sge; + attr->max_srq_wr = qattr->max_srq_wr; + + /* TODO: R&D to more properly configure the following */ + attr->local_ca_ack_delay = qattr->dev_ack_delay; + attr->max_fast_reg_page_list_len = qattr->max_mr/8; + attr->max_pkeys = QLNXR_ROCE_PKEY_MAX; + attr->max_ah = qattr->max_ah; + + QL_DPRINT12(ha, "exit\n"); + return 0; +} + +static inline void +get_link_speed_and_width(int speed, uint8_t *ib_speed, uint8_t *ib_width) +{ + switch (speed) { + case 1000: + *ib_speed = IB_SPEED_SDR; + *ib_width = IB_WIDTH_1X; + break; + case 10000: + *ib_speed = IB_SPEED_QDR; + *ib_width = IB_WIDTH_1X; + break; + + case 20000: + *ib_speed = IB_SPEED_DDR; + *ib_width = IB_WIDTH_4X; + break; + + case 25000: + *ib_speed = IB_SPEED_EDR; + *ib_width = IB_WIDTH_1X; + break; + + case 40000: + *ib_speed = IB_SPEED_QDR; + *ib_width = IB_WIDTH_4X; + break; + + case 50000: + *ib_speed = IB_SPEED_QDR; + *ib_width = IB_WIDTH_4X; // TODO doesn't add up to 50... + break; + + case 100000: + *ib_speed = IB_SPEED_EDR; + *ib_width = IB_WIDTH_4X; + break; + + default: + /* Unsupported */ + *ib_speed = IB_SPEED_SDR; + *ib_width = IB_WIDTH_1X; + } + return; +} + +int +qlnxr_query_port(struct ib_device *ibdev, uint8_t port, + struct ib_port_attr *attr) +{ + struct qlnxr_dev *dev; + struct ecore_rdma_port *rdma_port; + qlnx_host_t *ha; + + dev = get_qlnxr_dev(ibdev); + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + if (port > 1) { + QL_DPRINT12(ha, "port [%d] > 1 \n", port); + return -EINVAL; + } + + if (dev->rdma_ctx == NULL) { + QL_DPRINT12(ha, "rdma_ctx == NULL\n"); + return -EINVAL; + } + + rdma_port = ecore_rdma_query_port(dev->rdma_ctx); + memset(attr, 0, sizeof *attr); + + if (rdma_port->port_state == ECORE_RDMA_PORT_UP) { + attr->state = IB_PORT_ACTIVE; + attr->phys_state = 5; + } else { + attr->state = IB_PORT_DOWN; + attr->phys_state = 3; + } + + attr->max_mtu = IB_MTU_4096; + attr->active_mtu = iboe_get_mtu(dev->ha->ifp->if_mtu); + attr->lid = 0; + attr->lmc = 0; + attr->sm_lid = 0; + attr->sm_sl = 0; + attr->port_cap_flags = 0; + + if (QLNX_IS_IWARP(dev)) { + attr->gid_tbl_len = 1; + attr->pkey_tbl_len = 1; + } else { + attr->gid_tbl_len = QLNXR_MAX_SGID; + attr->pkey_tbl_len = QLNXR_ROCE_PKEY_TABLE_LEN; + } + + attr->bad_pkey_cntr = rdma_port->pkey_bad_counter; + attr->qkey_viol_cntr = 0; + + get_link_speed_and_width(rdma_port->link_speed, + &attr->active_speed, &attr->active_width); + + attr->max_msg_sz = rdma_port->max_msg_size; + attr->max_vl_num = 4; /* TODO -> figure this one out... */ + + QL_DPRINT12(ha, "state = %d phys_state = %d " + " link_speed = %d active_speed = %d active_width = %d" + " attr->gid_tbl_len = %d attr->pkey_tbl_len = %d" + " max_msg_sz = 0x%x max_vl_num = 0x%x \n", + attr->state, attr->phys_state, + rdma_port->link_speed, attr->active_speed, + attr->active_width, attr->gid_tbl_len, attr->pkey_tbl_len, + attr->max_msg_sz, attr->max_vl_num); + + QL_DPRINT12(ha, "exit\n"); + return 0; +} + +int +qlnxr_modify_port(struct ib_device *ibdev, uint8_t port, int mask, + struct ib_port_modify *props) +{ + struct qlnxr_dev *dev; + qlnx_host_t *ha; + + dev = get_qlnxr_dev(ibdev); + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + if (port > 1) { + QL_DPRINT12(ha, "port (%d) > 1\n", port); + return -EINVAL; + } + + QL_DPRINT12(ha, "exit\n"); + return 0; +} + +enum rdma_link_layer +qlnxr_link_layer(struct ib_device *ibdev, uint8_t port_num) +{ + struct qlnxr_dev *dev; + qlnx_host_t *ha; + + dev = get_qlnxr_dev(ibdev); + ha = dev->ha; + + QL_DPRINT12(ha, "ibdev = %p port_num = 0x%x\n", ibdev, port_num); + + return IB_LINK_LAYER_ETHERNET; +} + +struct ib_pd * +qlnxr_alloc_pd(struct ib_device *ibdev, struct ib_ucontext *context, + struct ib_udata *udata) +{ + struct qlnxr_pd *pd = NULL; + u16 pd_id; + int rc; + struct qlnxr_dev *dev; + qlnx_host_t *ha; + + dev = get_qlnxr_dev(ibdev); + ha = dev->ha; + + QL_DPRINT12(ha, "ibdev = %p context = %p" + " udata = %p enter\n", ibdev, context, udata); + + if (dev->rdma_ctx == NULL) { + QL_DPRINT11(ha, "dev->rdma_ctx = NULL\n"); + rc = -1; + goto err; + } + + pd = kzalloc(sizeof(*pd), GFP_KERNEL); + if (!pd) { + rc = -ENOMEM; + QL_DPRINT11(ha, "kzalloc(pd) = NULL\n"); + goto err; + } + + rc = ecore_rdma_alloc_pd(dev->rdma_ctx, &pd_id); + if (rc) { + QL_DPRINT11(ha, "ecore_rdma_alloc_pd failed\n"); + goto err; + } + + pd->pd_id = pd_id; + + if (udata && context) { + + rc = ib_copy_to_udata(udata, &pd->pd_id, sizeof(pd->pd_id)); + if (rc) { + QL_DPRINT11(ha, "ib_copy_to_udata failed\n"); + ecore_rdma_free_pd(dev->rdma_ctx, pd_id); + goto err; + } + + pd->uctx = get_qlnxr_ucontext(context); + pd->uctx->pd = pd; + } + + atomic_add_rel_32(&dev->pd_count, 1); + QL_DPRINT12(ha, "exit [pd, pd_id, pd_count] = [%p, 0x%x, %d]\n", + pd, pd_id, dev->pd_count); + + return &pd->ibpd; + +err: + kfree(pd); + QL_DPRINT12(ha, "exit -1\n"); + return ERR_PTR(rc); +} + +int +qlnxr_dealloc_pd(struct ib_pd *ibpd) +{ + struct qlnxr_pd *pd; + struct qlnxr_dev *dev; + qlnx_host_t *ha; + + pd = get_qlnxr_pd(ibpd); + dev = get_qlnxr_dev((ibpd->device)); + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + if (pd == NULL) { + QL_DPRINT11(ha, "pd = NULL\n"); + } else { + ecore_rdma_free_pd(dev->rdma_ctx, pd->pd_id); + kfree(pd); + atomic_subtract_rel_32(&dev->pd_count, 1); + QL_DPRINT12(ha, "exit [pd, pd_id, pd_count] = [%p, 0x%x, %d]\n", + pd, pd->pd_id, dev->pd_count); + } + + QL_DPRINT12(ha, "exit\n"); + return 0; +} + +#define ROCE_WQE_ELEM_SIZE sizeof(struct rdma_sq_sge) +#define RDMA_MAX_SGE_PER_SRQ (4) /* Should be part of HSI */ +/* Should be part of HSI */ +#define RDMA_MAX_SRQ_WQE_SIZE (RDMA_MAX_SGE_PER_SRQ + 1) /* +1 for header */ +#define DB_ADDR_SHIFT(addr) ((addr) << DB_PWM_ADDR_OFFSET_SHIFT) + +static void qlnxr_cleanup_user(struct qlnxr_dev *, struct qlnxr_qp *); +static void qlnxr_cleanup_kernel(struct qlnxr_dev *, struct qlnxr_qp *); + +int +qlnxr_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey) +{ + struct qlnxr_dev *dev; + qlnx_host_t *ha; + + dev = get_qlnxr_dev(ibdev); + ha = dev->ha; + + QL_DPRINT12(ha, "enter index = 0x%x\n", index); + + if (index > QLNXR_ROCE_PKEY_TABLE_LEN) + return -EINVAL; + + *pkey = QLNXR_ROCE_PKEY_DEFAULT; + + QL_DPRINT12(ha, "exit\n"); + return 0; +} + + +static inline bool +qlnxr_get_vlan_id_qp(qlnx_host_t *ha, struct ib_qp_attr *attr, int attr_mask, + u16 *vlan_id) +{ + bool ret = false; + + QL_DPRINT12(ha, "enter \n"); + + *vlan_id = 0; + +#if __FreeBSD_version >= 1100000 + u16 tmp_vlan_id; + +#if __FreeBSD_version >= 1102000 + union ib_gid *dgid; + + dgid = &attr->ah_attr.grh.dgid; + tmp_vlan_id = (dgid->raw[11] << 8) | dgid->raw[12]; + + if (!(tmp_vlan_id & ~EVL_VLID_MASK)) { + *vlan_id = tmp_vlan_id; + ret = true; + } +#else + tmp_vlan_id = attr->vlan_id; + + if ((attr_mask & IB_QP_VID) && (!(tmp_vlan_id & ~EVL_VLID_MASK))) { + *vlan_id = tmp_vlan_id; + ret = true; + } + +#endif /* #if __FreeBSD_version > 1102000 */ + +#else + ret = true; + +#endif /* #if __FreeBSD_version >= 1100000 */ + + QL_DPRINT12(ha, "exit vlan_id = 0x%x ret = %d \n", *vlan_id, ret); + + return (ret); +} + +static inline void +get_gid_info(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask, + struct qlnxr_dev *dev, + struct qlnxr_qp *qp, + struct ecore_rdma_modify_qp_in_params *qp_params) +{ + int i; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + memcpy(&qp_params->sgid.bytes[0], + &dev->sgid_tbl[qp->sgid_idx].raw[0], + sizeof(qp_params->sgid.bytes)); + memcpy(&qp_params->dgid.bytes[0], + &attr->ah_attr.grh.dgid.raw[0], + sizeof(qp_params->dgid)); + + qlnxr_get_vlan_id_qp(ha, attr, attr_mask, &qp_params->vlan_id); + + for (i = 0; i < (sizeof(qp_params->sgid.dwords)/sizeof(uint32_t)); i++) { + qp_params->sgid.dwords[i] = ntohl(qp_params->sgid.dwords[i]); + qp_params->dgid.dwords[i] = ntohl(qp_params->dgid.dwords[i]); + } + + QL_DPRINT12(ha, "exit\n"); + return; +} + + + +static int +qlnxr_add_mmap(struct qlnxr_ucontext *uctx, u64 phy_addr, unsigned long len) +{ + struct qlnxr_mm *mm; + qlnx_host_t *ha; + + ha = uctx->dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + mm = kzalloc(sizeof(*mm), GFP_KERNEL); + if (mm == NULL) { + QL_DPRINT11(ha, "mm = NULL\n"); + return -ENOMEM; + } + + mm->key.phy_addr = phy_addr; + + /* This function might be called with a length which is not a multiple + * of PAGE_SIZE, while the mapping is PAGE_SIZE grained and the kernel + * forces this granularity by increasing the requested size if needed. + * When qedr_mmap is called, it will search the list with the updated + * length as a key. To prevent search failures, the length is rounded up + * in advance to PAGE_SIZE. + */ + mm->key.len = roundup(len, PAGE_SIZE); + INIT_LIST_HEAD(&mm->entry); + + mutex_lock(&uctx->mm_list_lock); + list_add(&mm->entry, &uctx->mm_head); + mutex_unlock(&uctx->mm_list_lock); + + QL_DPRINT12(ha, "added (addr=0x%llx,len=0x%lx) for ctx=%p\n", + (unsigned long long)mm->key.phy_addr, + (unsigned long)mm->key.len, uctx); + + return 0; +} + +static bool +qlnxr_search_mmap(struct qlnxr_ucontext *uctx, u64 phy_addr, unsigned long len) +{ + bool found = false; + struct qlnxr_mm *mm; + qlnx_host_t *ha; + + ha = uctx->dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + mutex_lock(&uctx->mm_list_lock); + list_for_each_entry(mm, &uctx->mm_head, entry) { + if (len != mm->key.len || phy_addr != mm->key.phy_addr) + continue; + + found = true; + break; + } + mutex_unlock(&uctx->mm_list_lock); + + QL_DPRINT12(ha, + "searched for (addr=0x%llx,len=0x%lx) for ctx=%p, found=%d\n", + mm->key.phy_addr, mm->key.len, uctx, found); + + return found; +} + +struct +ib_ucontext *qlnxr_alloc_ucontext(struct ib_device *ibdev, + struct ib_udata *udata) +{ + int rc; + struct qlnxr_ucontext *ctx; + struct qlnxr_alloc_ucontext_resp uresp; + struct qlnxr_dev *dev = get_qlnxr_dev(ibdev); + qlnx_host_t *ha = dev->ha; + struct ecore_rdma_add_user_out_params oparams; + + if (!udata) { + return ERR_PTR(-EFAULT); + } + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return ERR_PTR(-ENOMEM); + + rc = ecore_rdma_add_user(dev->rdma_ctx, &oparams); + if (rc) { + QL_DPRINT12(ha, + "Failed to allocate a DPI for a new RoCE application " + ",rc = %d. To overcome this, consider to increase " + "the number of DPIs, increase the doorbell BAR size " + "or just close unnecessary RoCE applications. In " + "order to increase the number of DPIs consult the " + "README\n", rc); + goto err; + } + + ctx->dpi = oparams.dpi; + ctx->dpi_addr = oparams.dpi_addr; + ctx->dpi_phys_addr = oparams.dpi_phys_addr; + ctx->dpi_size = oparams.dpi_size; + INIT_LIST_HEAD(&ctx->mm_head); + mutex_init(&ctx->mm_list_lock); + + memset(&uresp, 0, sizeof(uresp)); + uresp.dpm_enabled = offsetof(struct qlnxr_alloc_ucontext_resp, dpm_enabled) + < udata->outlen ? dev->user_dpm_enabled : 0; //TODO: figure this out + uresp.wids_enabled = offsetof(struct qlnxr_alloc_ucontext_resp, wids_enabled) + < udata->outlen ? 1 : 0; //TODO: figure this out + uresp.wid_count = offsetof(struct qlnxr_alloc_ucontext_resp, wid_count) + < udata->outlen ? oparams.wid_count : 0; //TODO: figure this out + uresp.db_pa = ctx->dpi_phys_addr; + uresp.db_size = ctx->dpi_size; + uresp.max_send_wr = dev->attr.max_sqe; + uresp.max_recv_wr = dev->attr.max_rqe; + uresp.max_srq_wr = dev->attr.max_srq_wr; + uresp.sges_per_send_wr = QLNXR_MAX_SQE_ELEMENTS_PER_SQE; + uresp.sges_per_recv_wr = QLNXR_MAX_RQE_ELEMENTS_PER_RQE; + uresp.sges_per_srq_wr = dev->attr.max_srq_sge; + uresp.max_cqes = QLNXR_MAX_CQES; + + rc = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); + if (rc) + goto err; + + ctx->dev = dev; + + rc = qlnxr_add_mmap(ctx, ctx->dpi_phys_addr, ctx->dpi_size); + if (rc) + goto err; + QL_DPRINT12(ha, "Allocated user context %p\n", + &ctx->ibucontext); + + return &ctx->ibucontext; +err: + kfree(ctx); + return ERR_PTR(rc); +} + +int +qlnxr_dealloc_ucontext(struct ib_ucontext *ibctx) +{ + struct qlnxr_ucontext *uctx = get_qlnxr_ucontext(ibctx); + struct qlnxr_dev *dev = uctx->dev; + qlnx_host_t *ha = dev->ha; + struct qlnxr_mm *mm, *tmp; + int status = 0; + + QL_DPRINT12(ha, "Deallocating user context %p\n", + uctx); + + if (dev) { + ecore_rdma_remove_user(uctx->dev->rdma_ctx, uctx->dpi); + } + + list_for_each_entry_safe(mm, tmp, &uctx->mm_head, entry) { + QL_DPRINT12(ha, "deleted addr= 0x%llx, len = 0x%lx for" + " ctx=%p\n", + mm->key.phy_addr, mm->key.len, uctx); + list_del(&mm->entry); + kfree(mm); + } + kfree(uctx); + return status; +} + +int +qlnxr_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) +{ + struct qlnxr_ucontext *ucontext = get_qlnxr_ucontext(context); + struct qlnxr_dev *dev = get_qlnxr_dev((context->device)); + unsigned long vm_page = vma->vm_pgoff << PAGE_SHIFT; + u64 unmapped_db; + unsigned long len = (vma->vm_end - vma->vm_start); + int rc = 0; + bool found; + qlnx_host_t *ha; + + ha = dev->ha; + +#if __FreeBSD_version > 1102000 + unmapped_db = dev->db_phys_addr + (ucontext->dpi * ucontext->dpi_size); +#else + unmapped_db = dev->db_phys_addr; +#endif /* #if __FreeBSD_version > 1102000 */ + + QL_DPRINT12(ha, "qedr_mmap enter vm_page=0x%lx" + " vm_pgoff=0x%lx unmapped_db=0x%llx db_size=%x, len=%lx\n", + vm_page, vma->vm_pgoff, unmapped_db, + dev->db_size, len); + + if ((vma->vm_start & (PAGE_SIZE - 1)) || (len & (PAGE_SIZE - 1))) { + QL_DPRINT11(ha, "Vma_start not page aligned " + "vm_start = %ld vma_end = %ld\n", vma->vm_start, + vma->vm_end); + return -EINVAL; + } + + found = qlnxr_search_mmap(ucontext, vm_page, len); + if (!found) { + QL_DPRINT11(ha, "Vma_pgoff not found in mapped array = %ld\n", + vma->vm_pgoff); + return -EINVAL; + } + + QL_DPRINT12(ha, "Mapping doorbell bar\n"); + +#if __FreeBSD_version > 1102000 + + if ((vm_page < unmapped_db) || + ((vm_page + len) > (unmapped_db + ucontext->dpi_size))) { + QL_DPRINT11(ha, "failed pages are outside of dpi;" + "page address=0x%lx, unmapped_db=0x%lx, dpi_size=0x%x\n", + vm_page, unmapped_db, ucontext->dpi_size); + return -EINVAL; + } + + if (vma->vm_flags & VM_READ) { + QL_DPRINT11(ha, "failed mmap, cannot map doorbell bar for read\n"); + return -EINVAL; + } + + vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); + rc = io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, len, + vma->vm_page_prot); + +#else + + if ((vm_page >= unmapped_db) && (vm_page <= (unmapped_db + + dev->db_size))) { + + QL_DPRINT12(ha, "Mapping doorbell bar\n"); + + vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); + + rc = io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, + PAGE_SIZE, vma->vm_page_prot); + } else { + QL_DPRINT12(ha, "Mapping chains\n"); + rc = io_remap_pfn_range(vma, vma->vm_start, + vma->vm_pgoff, len, vma->vm_page_prot); + } + +#endif /* #if __FreeBSD_version > 1102000 */ + + QL_DPRINT12(ha, "exit [%d]\n", rc); + return rc; +} + +struct ib_mr * +qlnxr_get_dma_mr(struct ib_pd *ibpd, int acc) +{ + struct qlnxr_mr *mr; + struct qlnxr_dev *dev = get_qlnxr_dev((ibpd->device)); + struct qlnxr_pd *pd = get_qlnxr_pd(ibpd); + int rc; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + if (acc & IB_ACCESS_MW_BIND) { + QL_DPRINT12(ha, "Unsupported access flags received for dma mr\n"); + } + + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) { + rc = -ENOMEM; + QL_DPRINT12(ha, "kzalloc(mr) failed %d\n", rc); + goto err0; + } + + mr->type = QLNXR_MR_DMA; + + rc = ecore_rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid); + if (rc) { + QL_DPRINT12(ha, "ecore_rdma_alloc_tid failed %d\n", rc); + goto err1; + } + + /* index only, 18 bit long, lkey = itid << 8 | key */ + mr->hw_mr.tid_type = ECORE_RDMA_TID_REGISTERED_MR; + mr->hw_mr.pd = pd->pd_id; + mr->hw_mr.local_read = 1; + mr->hw_mr.local_write = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0; + mr->hw_mr.remote_read = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0; + mr->hw_mr.remote_write = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0; + mr->hw_mr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0; + mr->hw_mr.dma_mr = true; + + rc = ecore_rdma_register_tid(dev->rdma_ctx, &mr->hw_mr); + if (rc) { + QL_DPRINT12(ha, "ecore_rdma_register_tid failed %d\n", rc); + goto err2; + } + + mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key; + + if (mr->hw_mr.remote_write || mr->hw_mr.remote_read || + mr->hw_mr.remote_atomic) { + mr->ibmr.rkey = mr->hw_mr.itid << 8 | mr->hw_mr.key; + } + + QL_DPRINT12(ha, "lkey = %x\n", mr->ibmr.lkey); + + return &mr->ibmr; + +err2: + ecore_rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid); +err1: + kfree(mr); +err0: + QL_DPRINT12(ha, "exit [%d]\n", rc); + + return ERR_PTR(rc); +} + +static void +qlnxr_free_pbl(struct qlnxr_dev *dev, struct qlnxr_pbl_info *pbl_info, + struct qlnxr_pbl *pbl) +{ + int i; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + for (i = 0; i < pbl_info->num_pbls; i++) { + if (!pbl[i].va) + continue; + qlnx_dma_free_coherent(&dev->ha->cdev, pbl[i].va, pbl[i].pa, + pbl_info->pbl_size); + } + kfree(pbl); + + QL_DPRINT12(ha, "exit\n"); + return; +} + +#define MIN_FW_PBL_PAGE_SIZE (4*1024) +#define MAX_FW_PBL_PAGE_SIZE (64*1024) + +#define NUM_PBES_ON_PAGE(_page_size) (_page_size / sizeof(u64)) +#define MAX_PBES_ON_PAGE NUM_PBES_ON_PAGE(MAX_FW_PBL_PAGE_SIZE) +#define MAX_PBES_TWO_LAYER (MAX_PBES_ON_PAGE*MAX_PBES_ON_PAGE) + +static struct qlnxr_pbl * +qlnxr_alloc_pbl_tbl(struct qlnxr_dev *dev, + struct qlnxr_pbl_info *pbl_info, gfp_t flags) +{ + void *va; + dma_addr_t pa; + dma_addr_t *pbl_main_tbl; + struct qlnxr_pbl *pbl_table; + int i, rc = 0; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + pbl_table = kzalloc(sizeof(*pbl_table) * pbl_info->num_pbls, flags); + + if (!pbl_table) { + QL_DPRINT12(ha, "pbl_table = NULL\n"); + return NULL; + } + + for (i = 0; i < pbl_info->num_pbls; i++) { + va = qlnx_dma_alloc_coherent(&dev->ha->cdev, &pa, pbl_info->pbl_size); + if (!va) { + QL_DPRINT11(ha, "Failed to allocate pbl#%d\n", i); + rc = -ENOMEM; + goto err; + } + memset(va, 0, pbl_info->pbl_size); + pbl_table[i].va = va; + pbl_table[i].pa = pa; + } + + /* Two-Layer PBLs, if we have more than one pbl we need to initialize + * the first one with physical pointers to all of the rest + */ + pbl_main_tbl = (dma_addr_t *)pbl_table[0].va; + for (i = 0; i < pbl_info->num_pbls - 1; i++) + pbl_main_tbl[i] = pbl_table[i + 1].pa; + + QL_DPRINT12(ha, "exit\n"); + return pbl_table; + +err: + qlnxr_free_pbl(dev, pbl_info, pbl_table); + + QL_DPRINT12(ha, "exit with error\n"); + return NULL; +} + +static int +qlnxr_prepare_pbl_tbl(struct qlnxr_dev *dev, + struct qlnxr_pbl_info *pbl_info, + u32 num_pbes, + int two_layer_capable) +{ + u32 pbl_capacity; + u32 pbl_size; + u32 num_pbls; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + if ((num_pbes > MAX_PBES_ON_PAGE) && two_layer_capable) { + if (num_pbes > MAX_PBES_TWO_LAYER) { + QL_DPRINT11(ha, "prepare pbl table: too many pages %d\n", + num_pbes); + return -EINVAL; + } + + /* calculate required pbl page size */ + pbl_size = MIN_FW_PBL_PAGE_SIZE; + pbl_capacity = NUM_PBES_ON_PAGE(pbl_size) * + NUM_PBES_ON_PAGE(pbl_size); + + while (pbl_capacity < num_pbes) { + pbl_size *= 2; + pbl_capacity = pbl_size / sizeof(u64); + pbl_capacity = pbl_capacity * pbl_capacity; + } + + num_pbls = DIV_ROUND_UP(num_pbes, NUM_PBES_ON_PAGE(pbl_size)); + num_pbls++; /* One for the layer0 ( points to the pbls) */ + pbl_info->two_layered = true; + } else { + /* One layered PBL */ + num_pbls = 1; + pbl_size = max_t(u32, MIN_FW_PBL_PAGE_SIZE, \ + roundup_pow_of_two((num_pbes * sizeof(u64)))); + pbl_info->two_layered = false; + } + + pbl_info->num_pbls = num_pbls; + pbl_info->pbl_size = pbl_size; + pbl_info->num_pbes = num_pbes; + + QL_DPRINT12(ha, "prepare pbl table: num_pbes=%d, num_pbls=%d pbl_size=%d\n", + pbl_info->num_pbes, pbl_info->num_pbls, pbl_info->pbl_size); + + return 0; +} + +#define upper_32_bits(x) (uint32_t)(x >> 32) +#define lower_32_bits(x) (uint32_t)(x) + +static void +qlnxr_populate_pbls(struct qlnxr_dev *dev, struct ib_umem *umem, + struct qlnxr_pbl *pbl, struct qlnxr_pbl_info *pbl_info) +{ + struct regpair *pbe; + struct qlnxr_pbl *pbl_tbl; + struct scatterlist *sg; + int shift, pg_cnt, pages, pbe_cnt, total_num_pbes = 0; + qlnx_host_t *ha; + +#ifdef DEFINE_IB_UMEM_WITH_CHUNK + int i; + struct ib_umem_chunk *chunk = NULL; +#else + int entry; +#endif + + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + if (!pbl_info) { + QL_DPRINT11(ha, "PBL_INFO not initialized\n"); + return; + } + + if (!pbl_info->num_pbes) { + QL_DPRINT11(ha, "pbl_info->num_pbes == 0\n"); + return; + } + + /* If we have a two layered pbl, the first pbl points to the rest + * of the pbls and the first entry lays on the second pbl in the table + */ + if (pbl_info->two_layered) + pbl_tbl = &pbl[1]; + else + pbl_tbl = pbl; + + pbe = (struct regpair *)pbl_tbl->va; + if (!pbe) { + QL_DPRINT12(ha, "pbe is NULL\n"); + return; + } + + pbe_cnt = 0; + + shift = ilog2(umem->page_size); + +#ifndef DEFINE_IB_UMEM_WITH_CHUNK + + for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { + +#else + list_for_each_entry(chunk, &umem->chunk_list, list) { + /* get all the dma regions from the chunk. */ + for (i = 0; i < chunk->nmap; i++) { + sg = &chunk->page_list[i]; +#endif + pages = sg_dma_len(sg) >> shift; + for (pg_cnt = 0; pg_cnt < pages; pg_cnt++) { + /* store the page address in pbe */ + pbe->lo = + cpu_to_le32(sg_dma_address(sg) + + (umem->page_size * pg_cnt)); + pbe->hi = + cpu_to_le32(upper_32_bits + ((sg_dma_address(sg) + + umem->page_size * pg_cnt))); + + QL_DPRINT12(ha, + "Populate pbl table:" + " pbe->addr=0x%x:0x%x " + " pbe_cnt = %d total_num_pbes=%d" + " pbe=%p\n", pbe->lo, pbe->hi, pbe_cnt, + total_num_pbes, pbe); + + pbe_cnt ++; + total_num_pbes ++; + pbe++; + + if (total_num_pbes == pbl_info->num_pbes) + return; + + /* if the given pbl is full storing the pbes, + * move to next pbl. + */ + if (pbe_cnt == + (pbl_info->pbl_size / sizeof(u64))) { + pbl_tbl++; + pbe = (struct regpair *)pbl_tbl->va; + pbe_cnt = 0; + } + } +#ifdef DEFINE_IB_UMEM_WITH_CHUNK + } +#endif + } + QL_DPRINT12(ha, "exit\n"); + return; +} + +static void +free_mr_info(struct qlnxr_dev *dev, struct mr_info *info) +{ + struct qlnxr_pbl *pbl, *tmp; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + if (info->pbl_table) + list_add_tail(&info->pbl_table->list_entry, + &info->free_pbl_list); + + if (!list_empty(&info->inuse_pbl_list)) + list_splice(&info->inuse_pbl_list, &info->free_pbl_list); + + list_for_each_entry_safe(pbl, tmp, &info->free_pbl_list, list_entry) { + list_del(&pbl->list_entry); + qlnxr_free_pbl(dev, &info->pbl_info, pbl); + } + QL_DPRINT12(ha, "exit\n"); + + return; +} + +static int +qlnxr_init_mr_info(struct qlnxr_dev *dev, struct mr_info *info, + size_t page_list_len, bool two_layered) +{ + int rc; + struct qlnxr_pbl *tmp; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + INIT_LIST_HEAD(&info->free_pbl_list); + INIT_LIST_HEAD(&info->inuse_pbl_list); + + rc = qlnxr_prepare_pbl_tbl(dev, &info->pbl_info, + page_list_len, two_layered); + if (rc) { + QL_DPRINT11(ha, "qlnxr_prepare_pbl_tbl [%d]\n", rc); + goto done; + } + + info->pbl_table = qlnxr_alloc_pbl_tbl(dev, &info->pbl_info, GFP_KERNEL); + + if (!info->pbl_table) { + rc = -ENOMEM; + QL_DPRINT11(ha, "qlnxr_alloc_pbl_tbl returned NULL\n"); + goto done; + } + + QL_DPRINT12(ha, "pbl_table_pa = %pa\n", &info->pbl_table->pa); + + /* in usual case we use 2 PBLs, so we add one to free + * list and allocating another one + */ + tmp = qlnxr_alloc_pbl_tbl(dev, &info->pbl_info, GFP_KERNEL); + + if (!tmp) { + QL_DPRINT11(ha, "Extra PBL is not allocated\n"); + goto done; /* it's OK if second allocation fails, so rc = 0*/ + } + + list_add_tail(&tmp->list_entry, &info->free_pbl_list); + + QL_DPRINT12(ha, "extra pbl_table_pa = %pa\n", &tmp->pa); + +done: + if (rc) + free_mr_info(dev, info); + + QL_DPRINT12(ha, "exit [%d]\n", rc); + + return rc; +} + + +struct ib_mr * +#if __FreeBSD_version >= 1102000 +qlnxr_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len, + u64 usr_addr, int acc, struct ib_udata *udata) +#else +qlnxr_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len, + u64 usr_addr, int acc, struct ib_udata *udata, int mr_id) +#endif /* #if __FreeBSD_version >= 1102000 */ +{ + int rc = -ENOMEM; + struct qlnxr_dev *dev = get_qlnxr_dev((ibpd->device)); + struct qlnxr_mr *mr; + struct qlnxr_pd *pd; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + pd = get_qlnxr_pd(ibpd); + + QL_DPRINT12(ha, "qedr_register user mr pd = %d" + " start = %lld, len = %lld, usr_addr = %lld, acc = %d\n", + pd->pd_id, start, len, usr_addr, acc); + + if (acc & IB_ACCESS_REMOTE_WRITE && !(acc & IB_ACCESS_LOCAL_WRITE)) { + QL_DPRINT11(ha, + "(acc & IB_ACCESS_REMOTE_WRITE &&" + " !(acc & IB_ACCESS_LOCAL_WRITE))\n"); + return ERR_PTR(-EINVAL); + } + + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) { + QL_DPRINT11(ha, "kzalloc(mr) failed\n"); + return ERR_PTR(rc); + } + + mr->type = QLNXR_MR_USER; + + mr->umem = ib_umem_get(ibpd->uobject->context, start, len, acc, 0); + if (IS_ERR(mr->umem)) { + rc = -EFAULT; + QL_DPRINT11(ha, "ib_umem_get failed [%p]\n", mr->umem); + goto err0; + } + + rc = qlnxr_init_mr_info(dev, &mr->info, ib_umem_page_count(mr->umem), 1); + if (rc) { + QL_DPRINT11(ha, + "qlnxr_init_mr_info failed [%d]\n", rc); + goto err1; + } + + qlnxr_populate_pbls(dev, mr->umem, mr->info.pbl_table, + &mr->info.pbl_info); + + rc = ecore_rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid); + + if (rc) { + QL_DPRINT11(ha, "roce alloc tid returned an error %d\n", rc); + goto err1; + } + + /* index only, 18 bit long, lkey = itid << 8 | key */ + mr->hw_mr.tid_type = ECORE_RDMA_TID_REGISTERED_MR; + mr->hw_mr.key = 0; + mr->hw_mr.pd = pd->pd_id; + mr->hw_mr.local_read = 1; + mr->hw_mr.local_write = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0; + mr->hw_mr.remote_read = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0; + mr->hw_mr.remote_write = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0; + mr->hw_mr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0; + mr->hw_mr.mw_bind = false; /* TBD MW BIND */ + mr->hw_mr.pbl_ptr = mr->info.pbl_table[0].pa; + mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered; + mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size); + mr->hw_mr.page_size_log = ilog2(mr->umem->page_size); /* for the MR pages */ + +#if __FreeBSD_version >= 1102000 + mr->hw_mr.fbo = ib_umem_offset(mr->umem); +#else + mr->hw_mr.fbo = mr->umem->offset; +#endif + mr->hw_mr.length = len; + mr->hw_mr.vaddr = usr_addr; + mr->hw_mr.zbva = false; /* TBD figure when this should be true */ + mr->hw_mr.phy_mr = false; /* Fast MR - True, Regular Register False */ + mr->hw_mr.dma_mr = false; + + rc = ecore_rdma_register_tid(dev->rdma_ctx, &mr->hw_mr); + if (rc) { + QL_DPRINT11(ha, "roce register tid returned an error %d\n", rc); + goto err2; + } + + mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key; + if (mr->hw_mr.remote_write || mr->hw_mr.remote_read || + mr->hw_mr.remote_atomic) + mr->ibmr.rkey = mr->hw_mr.itid << 8 | mr->hw_mr.key; + + QL_DPRINT12(ha, "register user mr lkey: %x\n", mr->ibmr.lkey); + + return (&mr->ibmr); + +err2: + ecore_rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid); +err1: + qlnxr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table); +err0: + kfree(mr); + + QL_DPRINT12(ha, "exit [%d]\n", rc); + return (ERR_PTR(rc)); +} + +int +qlnxr_dereg_mr(struct ib_mr *ib_mr) +{ + struct qlnxr_mr *mr = get_qlnxr_mr(ib_mr); + struct qlnxr_dev *dev = get_qlnxr_dev((ib_mr->device)); + int rc = 0; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + if ((mr->type != QLNXR_MR_DMA) && (mr->type != QLNXR_MR_FRMR)) + qlnxr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table); + + /* it could be user registered memory. */ + if (mr->umem) + ib_umem_release(mr->umem); + + kfree(mr->pages); + + kfree(mr); + + QL_DPRINT12(ha, "exit\n"); + return rc; +} + +static int +qlnxr_copy_cq_uresp(struct qlnxr_dev *dev, + struct qlnxr_cq *cq, struct ib_udata *udata) +{ + struct qlnxr_create_cq_uresp uresp; + int rc; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + memset(&uresp, 0, sizeof(uresp)); + + uresp.db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT); + uresp.icid = cq->icid; + + rc = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); + + if (rc) { + QL_DPRINT12(ha, "ib_copy_to_udata error cqid=0x%x[%d]\n", + cq->icid, rc); + } + + QL_DPRINT12(ha, "exit [%d]\n", rc); + return rc; +} + +static void +consume_cqe(struct qlnxr_cq *cq) +{ + + if (cq->latest_cqe == cq->toggle_cqe) + cq->pbl_toggle ^= RDMA_RESIZE_CQ_RAMROD_DATA_TOGGLE_BIT_MASK; + + cq->latest_cqe = ecore_chain_consume(&cq->pbl); +} + +static inline int +qlnxr_align_cq_entries(int entries) +{ + u64 size, aligned_size; + + /* We allocate an extra entry that we don't report to the FW. + * Why? + * The CQE size is 32 bytes but the FW writes in chunks of 64 bytes + * (for performance purposes). Allocating an extra entry and telling + * the FW we have less prevents overwriting the first entry in case of + * a wrap i.e. when the FW writes the last entry and the application + * hasn't read the first one. + */ + size = (entries + 1) * QLNXR_CQE_SIZE; + + /* We align to PAGE_SIZE. + * Why? + * Since the CQ is going to be mapped and the mapping is anyhow in whole + * kernel pages we benefit from the possibly extra CQEs. + */ + aligned_size = ALIGN(size, PAGE_SIZE); + + /* note: for CQs created in user space the result of this function + * should match the size mapped in user space + */ + return (aligned_size / QLNXR_CQE_SIZE); +} + +static inline int +qlnxr_init_user_queue(struct ib_ucontext *ib_ctx, struct qlnxr_dev *dev, + struct qlnxr_userq *q, u64 buf_addr, size_t buf_len, + int access, int dmasync, int alloc_and_init) +{ + int page_cnt; + int rc; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + q->buf_addr = buf_addr; + q->buf_len = buf_len; + + QL_DPRINT12(ha, "buf_addr : %llx, buf_len : %x, access : %x" + " dmasync : %x\n", q->buf_addr, q->buf_len, + access, dmasync); + + q->umem = ib_umem_get(ib_ctx, q->buf_addr, q->buf_len, access, dmasync); + + if (IS_ERR(q->umem)) { + QL_DPRINT11(ha, "ib_umem_get failed [%lx]\n", PTR_ERR(q->umem)); + return PTR_ERR(q->umem); + } + + page_cnt = ib_umem_page_count(q->umem); + rc = qlnxr_prepare_pbl_tbl(dev, &q->pbl_info, page_cnt, + 0 /* SQ and RQ don't support dual layer pbl. + * CQ may, but this is yet uncoded. + */); + if (rc) { + QL_DPRINT11(ha, "qlnxr_prepare_pbl_tbl failed [%d]\n", rc); + goto err; + } + + if (alloc_and_init) { + q->pbl_tbl = qlnxr_alloc_pbl_tbl(dev, &q->pbl_info, GFP_KERNEL); + + if (!q->pbl_tbl) { + QL_DPRINT11(ha, "qlnxr_alloc_pbl_tbl failed\n"); + rc = -ENOMEM; + goto err; + } + + qlnxr_populate_pbls(dev, q->umem, q->pbl_tbl, &q->pbl_info); + } else { + q->pbl_tbl = kzalloc(sizeof(*q->pbl_tbl), GFP_KERNEL); + + if (!q->pbl_tbl) { + QL_DPRINT11(ha, "qlnxr_alloc_pbl_tbl failed\n"); + rc = -ENOMEM; + goto err; + } + } + + QL_DPRINT12(ha, "exit\n"); + return 0; + +err: + ib_umem_release(q->umem); + q->umem = NULL; + + QL_DPRINT12(ha, "exit [%d]\n", rc); + return rc; +} + +#if __FreeBSD_version >= 1102000 + +struct ib_cq * +qlnxr_create_cq(struct ib_device *ibdev, + const struct ib_cq_init_attr *attr, + struct ib_ucontext *ib_ctx, + struct ib_udata *udata) + +#else + +#if __FreeBSD_version >= 1100000 + +struct ib_cq * +qlnxr_create_cq(struct ib_device *ibdev, + struct ib_cq_init_attr *attr, + struct ib_ucontext *ib_ctx, + struct ib_udata *udata) + +#else + +struct ib_cq * +qlnxr_create_cq(struct ib_device *ibdev, + int entries, + int vector, + struct ib_ucontext *ib_ctx, + struct ib_udata *udata) +#endif /* #if __FreeBSD_version >= 1100000 */ + +#endif /* #if __FreeBSD_version >= 1102000 */ +{ + struct qlnxr_ucontext *ctx; + struct ecore_rdma_destroy_cq_out_params destroy_oparams; + struct ecore_rdma_destroy_cq_in_params destroy_iparams; + struct qlnxr_dev *dev; + struct ecore_rdma_create_cq_in_params params; + struct qlnxr_create_cq_ureq ureq; + +#if __FreeBSD_version >= 1100000 + int vector = attr->comp_vector; + int entries = attr->cqe; +#endif + struct qlnxr_cq *cq; + int chain_entries, rc, page_cnt; + u64 pbl_ptr; + u16 icid; + qlnx_host_t *ha; + + dev = get_qlnxr_dev(ibdev); + ha = dev->ha; + + QL_DPRINT12(ha, "called from %s. entries = %d, " + "vector = %d\n", + (udata ? "User Lib" : "Kernel"), entries, vector); + + memset(¶ms, 0, sizeof(struct ecore_rdma_create_cq_in_params)); + memset(&destroy_iparams, 0, sizeof(struct ecore_rdma_destroy_cq_in_params)); + memset(&destroy_oparams, 0, sizeof(struct ecore_rdma_destroy_cq_out_params)); + + if (entries > QLNXR_MAX_CQES) { + QL_DPRINT11(ha, + "the number of entries %d is too high. " + "Must be equal or below %d.\n", + entries, QLNXR_MAX_CQES); + return ERR_PTR(-EINVAL); + } + chain_entries = qlnxr_align_cq_entries(entries); + chain_entries = min_t(int, chain_entries, QLNXR_MAX_CQES); + + cq = qlnx_zalloc((sizeof(struct qlnxr_cq))); + + if (!cq) + return ERR_PTR(-ENOMEM); + + if (udata) { + memset(&ureq, 0, sizeof(ureq)); + + if (ib_copy_from_udata(&ureq, udata, + min(sizeof(ureq), udata->inlen))) { + QL_DPRINT11(ha, "ib_copy_from_udata failed\n"); + goto err0; + } + + if (!ureq.len) { + QL_DPRINT11(ha, "ureq.len == 0\n"); + goto err0; + } + + cq->cq_type = QLNXR_CQ_TYPE_USER; + + qlnxr_init_user_queue(ib_ctx, dev, &cq->q, ureq.addr, ureq.len, + IB_ACCESS_LOCAL_WRITE, 1, 1); + + pbl_ptr = cq->q.pbl_tbl->pa; + page_cnt = cq->q.pbl_info.num_pbes; + cq->ibcq.cqe = chain_entries; + } else { + cq->cq_type = QLNXR_CQ_TYPE_KERNEL; + + rc = ecore_chain_alloc(&dev->ha->cdev, + ECORE_CHAIN_USE_TO_CONSUME, + ECORE_CHAIN_MODE_PBL, + ECORE_CHAIN_CNT_TYPE_U32, + chain_entries, + sizeof(union roce_cqe), + &cq->pbl, NULL); + + if (rc) + goto err1; + + page_cnt = ecore_chain_get_page_cnt(&cq->pbl); + pbl_ptr = ecore_chain_get_pbl_phys(&cq->pbl); + cq->ibcq.cqe = cq->pbl.capacity; + } + + params.cq_handle_hi = upper_32_bits((uintptr_t)cq); + params.cq_handle_lo = lower_32_bits((uintptr_t)cq); + params.cnq_id = vector; + params.cq_size = chain_entries - 1; + params.pbl_num_pages = page_cnt; + params.pbl_ptr = pbl_ptr; + params.pbl_two_level = 0; + + if (ib_ctx != NULL) { + ctx = get_qlnxr_ucontext(ib_ctx); + params.dpi = ctx->dpi; + } else { + params.dpi = dev->dpi; + } + + rc = ecore_rdma_create_cq(dev->rdma_ctx, ¶ms, &icid); + if (rc) + goto err2; + + cq->icid = icid; + cq->sig = QLNXR_CQ_MAGIC_NUMBER; + spin_lock_init(&cq->cq_lock); + + if (ib_ctx) { + rc = qlnxr_copy_cq_uresp(dev, cq, udata); + if (rc) + goto err3; + } else { + /* Generate doorbell address. + * Configure bits 3-9 with DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT. + * TODO: consider moving to device scope as it is a function of + * the device. + * TODO: add ifdef if plan to support 16 bit. + */ + cq->db_addr = dev->db_addr + + DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT); + cq->db.data.icid = cq->icid; + cq->db.data.params = DB_AGG_CMD_SET << + RDMA_PWM_VAL32_DATA_AGG_CMD_SHIFT; + + /* point to the very last element, passing it we will toggle */ + cq->toggle_cqe = ecore_chain_get_last_elem(&cq->pbl); + cq->pbl_toggle = RDMA_RESIZE_CQ_RAMROD_DATA_TOGGLE_BIT_MASK; + + /* must be different from pbl_toggle */ + cq->latest_cqe = NULL; + consume_cqe(cq); + cq->cq_cons = ecore_chain_get_cons_idx_u32(&cq->pbl); + } + + QL_DPRINT12(ha, "exit icid = 0x%0x, addr = %p," + " number of entries = 0x%x\n", + cq->icid, cq, params.cq_size); + QL_DPRINT12(ha,"cq_addr = %p\n", cq); + return &cq->ibcq; + +err3: + destroy_iparams.icid = cq->icid; + ecore_rdma_destroy_cq(dev->rdma_ctx, &destroy_iparams, &destroy_oparams); +err2: + if (udata) + qlnxr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl); + else + ecore_chain_free(&dev->ha->cdev, &cq->pbl); +err1: + if (udata) + ib_umem_release(cq->q.umem); +err0: + kfree(cq); + + QL_DPRINT12(ha, "exit error\n"); + + return ERR_PTR(-EINVAL); +} + +int qlnxr_resize_cq(struct ib_cq *ibcq, int new_cnt, struct ib_udata *udata) +{ + int status = 0; + struct qlnxr_dev *dev = get_qlnxr_dev((ibcq->device)); + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter/exit\n"); + + return status; +} + +int +qlnxr_destroy_cq(struct ib_cq *ibcq) +{ + struct qlnxr_dev *dev = get_qlnxr_dev((ibcq->device)); + struct ecore_rdma_destroy_cq_out_params oparams; + struct ecore_rdma_destroy_cq_in_params iparams; + struct qlnxr_cq *cq = get_qlnxr_cq(ibcq); + int rc = 0; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter cq_id = %d\n", cq->icid); + + cq->destroyed = 1; + + /* TODO: Syncronize irq of the CNQ the CQ belongs to for validation + * that all completions with notification are dealt with. The rest + * of the completions are not interesting + */ + + /* GSIs CQs are handled by driver, so they don't exist in the FW */ + + if (cq->cq_type != QLNXR_CQ_TYPE_GSI) { + + iparams.icid = cq->icid; + + rc = ecore_rdma_destroy_cq(dev->rdma_ctx, &iparams, &oparams); + + if (rc) { + QL_DPRINT12(ha, "ecore_rdma_destroy_cq failed cq_id = %d\n", + cq->icid); + return rc; + } + + QL_DPRINT12(ha, "free cq->pbl cq_id = %d\n", cq->icid); + ecore_chain_free(&dev->ha->cdev, &cq->pbl); + } + + if (ibcq->uobject && ibcq->uobject->context) { + qlnxr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl); + ib_umem_release(cq->q.umem); + } + + cq->sig = ~cq->sig; + + kfree(cq); + + QL_DPRINT12(ha, "exit cq_id = %d\n", cq->icid); + + return rc; +} + +static int +qlnxr_check_qp_attrs(struct ib_pd *ibpd, + struct qlnxr_dev *dev, + struct ib_qp_init_attr *attrs, + struct ib_udata *udata) +{ + struct ecore_rdma_device *qattr; + qlnx_host_t *ha; + + qattr = ecore_rdma_query_device(dev->rdma_ctx); + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + QL_DPRINT12(ha, "attrs->sq_sig_type = %d\n", attrs->sq_sig_type); + QL_DPRINT12(ha, "attrs->qp_type = %d\n", attrs->qp_type); + QL_DPRINT12(ha, "attrs->create_flags = %d\n", attrs->create_flags); + +#if __FreeBSD_version < 1102000 + QL_DPRINT12(ha, "attrs->qpg_type = %d\n", attrs->qpg_type); +#endif + + QL_DPRINT12(ha, "attrs->port_num = %d\n", attrs->port_num); + QL_DPRINT12(ha, "attrs->cap.max_send_wr = 0x%x\n", attrs->cap.max_send_wr); + QL_DPRINT12(ha, "attrs->cap.max_recv_wr = 0x%x\n", attrs->cap.max_recv_wr); + QL_DPRINT12(ha, "attrs->cap.max_send_sge = 0x%x\n", attrs->cap.max_send_sge); + QL_DPRINT12(ha, "attrs->cap.max_recv_sge = 0x%x\n", attrs->cap.max_recv_sge); + QL_DPRINT12(ha, "attrs->cap.max_inline_data = 0x%x\n", + attrs->cap.max_inline_data); + +#if __FreeBSD_version < 1102000 + QL_DPRINT12(ha, "attrs->cap.qpg_tss_mask_sz = 0x%x\n", + attrs->cap.qpg_tss_mask_sz); +#endif + + QL_DPRINT12(ha, "\n\nqattr->vendor_id = 0x%x\n", qattr->vendor_id); + QL_DPRINT12(ha, "qattr->vendor_part_id = 0x%x\n", qattr->vendor_part_id); + QL_DPRINT12(ha, "qattr->hw_ver = 0x%x\n", qattr->hw_ver); + QL_DPRINT12(ha, "qattr->fw_ver = %p\n", (void *)qattr->fw_ver); + QL_DPRINT12(ha, "qattr->node_guid = %p\n", (void *)qattr->node_guid); + QL_DPRINT12(ha, "qattr->sys_image_guid = %p\n", + (void *)qattr->sys_image_guid); + QL_DPRINT12(ha, "qattr->max_cnq = 0x%x\n", qattr->max_cnq); + QL_DPRINT12(ha, "qattr->max_sge = 0x%x\n", qattr->max_sge); + QL_DPRINT12(ha, "qattr->max_srq_sge = 0x%x\n", qattr->max_srq_sge); + QL_DPRINT12(ha, "qattr->max_inline = 0x%x\n", qattr->max_inline); + QL_DPRINT12(ha, "qattr->max_wqe = 0x%x\n", qattr->max_wqe); + QL_DPRINT12(ha, "qattr->max_srq_wqe = 0x%x\n", qattr->max_srq_wqe); + QL_DPRINT12(ha, "qattr->max_qp_resp_rd_atomic_resc = 0x%x\n", + qattr->max_qp_resp_rd_atomic_resc); + QL_DPRINT12(ha, "qattr->max_qp_req_rd_atomic_resc = 0x%x\n", + qattr->max_qp_req_rd_atomic_resc); + QL_DPRINT12(ha, "qattr->max_dev_resp_rd_atomic_resc = 0x%x\n", + qattr->max_dev_resp_rd_atomic_resc); + QL_DPRINT12(ha, "qattr->max_cq = 0x%x\n", qattr->max_cq); + QL_DPRINT12(ha, "qattr->max_qp = 0x%x\n", qattr->max_qp); + QL_DPRINT12(ha, "qattr->max_srq = 0x%x\n", qattr->max_srq); + QL_DPRINT12(ha, "qattr->max_mr = 0x%x\n", qattr->max_mr); + QL_DPRINT12(ha, "qattr->max_mr_size = %p\n", (void *)qattr->max_mr_size); + QL_DPRINT12(ha, "qattr->max_cqe = 0x%x\n", qattr->max_cqe); + QL_DPRINT12(ha, "qattr->max_mw = 0x%x\n", qattr->max_mw); + QL_DPRINT12(ha, "qattr->max_fmr = 0x%x\n", qattr->max_fmr); + QL_DPRINT12(ha, "qattr->max_mr_mw_fmr_pbl = 0x%x\n", + qattr->max_mr_mw_fmr_pbl); + QL_DPRINT12(ha, "qattr->max_mr_mw_fmr_size = %p\n", + (void *)qattr->max_mr_mw_fmr_size); + QL_DPRINT12(ha, "qattr->max_pd = 0x%x\n", qattr->max_pd); + QL_DPRINT12(ha, "qattr->max_ah = 0x%x\n", qattr->max_ah); + QL_DPRINT12(ha, "qattr->max_pkey = 0x%x\n", qattr->max_pkey); + QL_DPRINT12(ha, "qattr->max_srq_wr = 0x%x\n", qattr->max_srq_wr); + QL_DPRINT12(ha, "qattr->max_stats_queues = 0x%x\n", + qattr->max_stats_queues); + //QL_DPRINT12(ha, "qattr->dev_caps = 0x%x\n", qattr->dev_caps); + QL_DPRINT12(ha, "qattr->page_size_caps = %p\n", + (void *)qattr->page_size_caps); + QL_DPRINT12(ha, "qattr->dev_ack_delay = 0x%x\n", qattr->dev_ack_delay); + QL_DPRINT12(ha, "qattr->reserved_lkey = 0x%x\n", qattr->reserved_lkey); + QL_DPRINT12(ha, "qattr->bad_pkey_counter = 0x%x\n", + qattr->bad_pkey_counter); + + if ((attrs->qp_type == IB_QPT_GSI) && udata) { + QL_DPRINT12(ha, "unexpected udata when creating GSI QP\n"); + return -EINVAL; + } + + if (udata && !(ibpd->uobject && ibpd->uobject->context)) { + QL_DPRINT12(ha, "called from user without context\n"); + return -EINVAL; + } + + /* QP0... attrs->qp_type == IB_QPT_GSI */ + if (attrs->qp_type != IB_QPT_RC && attrs->qp_type != IB_QPT_GSI) { + QL_DPRINT12(ha, "unsupported qp type=0x%x requested\n", + attrs->qp_type); + return -EINVAL; + } + if (attrs->qp_type == IB_QPT_GSI && attrs->srq) { + QL_DPRINT12(ha, "cannot create GSI qp with SRQ\n"); + return -EINVAL; + } + /* Skip the check for QP1 to support CM size of 128 */ + if (attrs->cap.max_send_wr > qattr->max_wqe) { + QL_DPRINT12(ha, "cannot create a SQ with %d elements " + " (max_send_wr=0x%x)\n", + attrs->cap.max_send_wr, qattr->max_wqe); + return -EINVAL; + } + if (!attrs->srq && (attrs->cap.max_recv_wr > qattr->max_wqe)) { + QL_DPRINT12(ha, "cannot create a RQ with %d elements" + " (max_recv_wr=0x%x)\n", + attrs->cap.max_recv_wr, qattr->max_wqe); + return -EINVAL; + } + if (attrs->cap.max_inline_data > qattr->max_inline) { + QL_DPRINT12(ha, + "unsupported inline data size=0x%x " + "requested (max_inline=0x%x)\n", + attrs->cap.max_inline_data, qattr->max_inline); + return -EINVAL; + } + if (attrs->cap.max_send_sge > qattr->max_sge) { + QL_DPRINT12(ha, + "unsupported send_sge=0x%x " + "requested (max_send_sge=0x%x)\n", + attrs->cap.max_send_sge, qattr->max_sge); + return -EINVAL; + } + if (attrs->cap.max_recv_sge > qattr->max_sge) { + QL_DPRINT12(ha, + "unsupported recv_sge=0x%x requested " + " (max_recv_sge=0x%x)\n", + attrs->cap.max_recv_sge, qattr->max_sge); + return -EINVAL; + } + /* unprivileged user space cannot create special QP */ + if (ibpd->uobject && attrs->qp_type == IB_QPT_GSI) { + QL_DPRINT12(ha, + "userspace can't create special QPs of type=0x%x\n", + attrs->qp_type); + return -EINVAL; + } + /* allow creating only one GSI type of QP */ + if (attrs->qp_type == IB_QPT_GSI && dev->gsi_qp_created) { + QL_DPRINT12(ha, + "create qp: GSI special QPs already created.\n"); + return -EINVAL; + } + + /* verify consumer QPs are not trying to use GSI QP's CQ */ + if ((attrs->qp_type != IB_QPT_GSI) && (dev->gsi_qp_created)) { + struct qlnxr_cq *send_cq = get_qlnxr_cq(attrs->send_cq); + struct qlnxr_cq *recv_cq = get_qlnxr_cq(attrs->recv_cq); + + if ((send_cq->cq_type == QLNXR_CQ_TYPE_GSI) || + (recv_cq->cq_type == QLNXR_CQ_TYPE_GSI)) { + QL_DPRINT11(ha, "consumer QP cannot use GSI CQs.\n"); + return -EINVAL; + } + } + QL_DPRINT12(ha, "exit\n"); + return 0; +} + +static int +qlnxr_copy_srq_uresp(struct qlnxr_dev *dev, + struct qlnxr_srq *srq, + struct ib_udata *udata) +{ + struct qlnxr_create_srq_uresp uresp; + qlnx_host_t *ha; + int rc; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + memset(&uresp, 0, sizeof(uresp)); + + uresp.srq_id = srq->srq_id; + + rc = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); + + QL_DPRINT12(ha, "exit [%d]\n", rc); + return rc; +} + +static void +qlnxr_copy_rq_uresp(struct qlnxr_dev *dev, + struct qlnxr_create_qp_uresp *uresp, + struct qlnxr_qp *qp) +{ + qlnx_host_t *ha; + + ha = dev->ha; + + /* Return if QP is associated with SRQ instead of RQ */ + QL_DPRINT12(ha, "enter qp->srq = %p\n", qp->srq); + + if (qp->srq) + return; + + /* iWARP requires two doorbells per RQ. */ + if (QLNX_IS_IWARP(dev)) { + + uresp->rq_db_offset = + DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_IWARP_RQ_PROD); + uresp->rq_db2_offset = + DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_FLAGS); + + QL_DPRINT12(ha, "uresp->rq_db_offset = 0x%x " + "uresp->rq_db2_offset = 0x%x\n", + uresp->rq_db_offset, uresp->rq_db2_offset); + } else { + uresp->rq_db_offset = + DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD); + } + uresp->rq_icid = qp->icid; + + QL_DPRINT12(ha, "exit\n"); + return; +} + +static void +qlnxr_copy_sq_uresp(struct qlnxr_dev *dev, + struct qlnxr_create_qp_uresp *uresp, + struct qlnxr_qp *qp) +{ + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + uresp->sq_db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD); + + /* iWARP uses the same cid for rq and sq*/ + if (QLNX_IS_IWARP(dev)) { + uresp->sq_icid = qp->icid; + QL_DPRINT12(ha, "uresp->sq_icid = 0x%x\n", uresp->sq_icid); + } else + uresp->sq_icid = qp->icid + 1; + + QL_DPRINT12(ha, "exit\n"); + return; +} + +static int +qlnxr_copy_qp_uresp(struct qlnxr_dev *dev, + struct qlnxr_qp *qp, + struct ib_udata *udata) +{ + int rc; + struct qlnxr_create_qp_uresp uresp; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter qp->icid =0x%x\n", qp->icid); + + memset(&uresp, 0, sizeof(uresp)); + qlnxr_copy_sq_uresp(dev, &uresp, qp); + qlnxr_copy_rq_uresp(dev, &uresp, qp); + + uresp.atomic_supported = dev->atomic_cap != IB_ATOMIC_NONE; + uresp.qp_id = qp->qp_id; + + rc = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); + + QL_DPRINT12(ha, "exit [%d]\n", rc); + return rc; +} + + +static void +qlnxr_set_common_qp_params(struct qlnxr_dev *dev, + struct qlnxr_qp *qp, + struct qlnxr_pd *pd, + struct ib_qp_init_attr *attrs) +{ + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + spin_lock_init(&qp->q_lock); + + atomic_set(&qp->refcnt, 1); + qp->pd = pd; + qp->sig = QLNXR_QP_MAGIC_NUMBER; + qp->qp_type = attrs->qp_type; + qp->max_inline_data = ROCE_REQ_MAX_INLINE_DATA_SIZE; + qp->sq.max_sges = attrs->cap.max_send_sge; + qp->state = ECORE_ROCE_QP_STATE_RESET; + qp->signaled = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR) ? true : false; + qp->sq_cq = get_qlnxr_cq(attrs->send_cq); + qp->rq_cq = get_qlnxr_cq(attrs->recv_cq); + qp->dev = dev; + + if (!attrs->srq) { + /* QP is associated with RQ instead of SRQ */ + qp->rq.max_sges = attrs->cap.max_recv_sge; + QL_DPRINT12(ha, "RQ params:\trq_max_sges = %d, rq_cq_id = %d\n", + qp->rq.max_sges, qp->rq_cq->icid); + } else { + qp->srq = get_qlnxr_srq(attrs->srq); + } + + QL_DPRINT12(ha, + "QP params:\tpd = %d, qp_type = %d, max_inline_data = %d," + " state = %d, signaled = %d, use_srq=%d\n", + pd->pd_id, qp->qp_type, qp->max_inline_data, + qp->state, qp->signaled, ((attrs->srq) ? 1 : 0)); + QL_DPRINT12(ha, "SQ params:\tsq_max_sges = %d, sq_cq_id = %d\n", + qp->sq.max_sges, qp->sq_cq->icid); + return; +} + +static int +qlnxr_check_srq_params(struct ib_pd *ibpd, + struct qlnxr_dev *dev, + struct ib_srq_init_attr *attrs) +{ + struct ecore_rdma_device *qattr; + qlnx_host_t *ha; + + ha = dev->ha; + qattr = ecore_rdma_query_device(dev->rdma_ctx); + + QL_DPRINT12(ha, "enter\n"); + + if (attrs->attr.max_wr > qattr->max_srq_wqe) { + QL_DPRINT12(ha, "unsupported srq_wr=0x%x" + " requested (max_srq_wr=0x%x)\n", + attrs->attr.max_wr, qattr->max_srq_wr); + return -EINVAL; + } + + if (attrs->attr.max_sge > qattr->max_sge) { + QL_DPRINT12(ha, + "unsupported sge=0x%x requested (max_srq_sge=0x%x)\n", + attrs->attr.max_sge, qattr->max_sge); + return -EINVAL; + } + + if (attrs->attr.srq_limit > attrs->attr.max_wr) { + QL_DPRINT12(ha, + "unsupported srq_limit=0x%x requested" + " (max_srq_limit=0x%x)\n", + attrs->attr.srq_limit, attrs->attr.srq_limit); + return -EINVAL; + } + + QL_DPRINT12(ha, "exit\n"); + return 0; +} + + +static void +qlnxr_free_srq_user_params(struct qlnxr_srq *srq) +{ + struct qlnxr_dev *dev = srq->dev; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + qlnxr_free_pbl(srq->dev, &srq->usrq.pbl_info, srq->usrq.pbl_tbl); + ib_umem_release(srq->usrq.umem); + ib_umem_release(srq->prod_umem); + + QL_DPRINT12(ha, "exit\n"); + return; +} + +static void +qlnxr_free_srq_kernel_params(struct qlnxr_srq *srq) +{ + struct qlnxr_srq_hwq_info *hw_srq = &srq->hw_srq; + struct qlnxr_dev *dev = srq->dev; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + ecore_chain_free(dev->cdev, &hw_srq->pbl); + + qlnx_dma_free_coherent(&dev->cdev, + hw_srq->virt_prod_pair_addr, + hw_srq->phy_prod_pair_addr, + sizeof(struct rdma_srq_producers)); + + QL_DPRINT12(ha, "exit\n"); + + return; +} + +static int +qlnxr_init_srq_user_params(struct ib_ucontext *ib_ctx, + struct qlnxr_srq *srq, + struct qlnxr_create_srq_ureq *ureq, + int access, int dmasync) +{ +#ifdef DEFINE_IB_UMEM_WITH_CHUNK + struct ib_umem_chunk *chunk; +#endif + struct scatterlist *sg; + int rc; + struct qlnxr_dev *dev = srq->dev; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + rc = qlnxr_init_user_queue(ib_ctx, srq->dev, &srq->usrq, ureq->srq_addr, + ureq->srq_len, access, dmasync, 1); + if (rc) + return rc; + + srq->prod_umem = ib_umem_get(ib_ctx, ureq->prod_pair_addr, + sizeof(struct rdma_srq_producers), + access, dmasync); + if (IS_ERR(srq->prod_umem)) { + + qlnxr_free_pbl(srq->dev, &srq->usrq.pbl_info, srq->usrq.pbl_tbl); + ib_umem_release(srq->usrq.umem); + + QL_DPRINT12(ha, "ib_umem_get failed for producer [%p]\n", + PTR_ERR(srq->prod_umem)); + + return PTR_ERR(srq->prod_umem); + } + +#ifdef DEFINE_IB_UMEM_WITH_CHUNK + chunk = container_of((&srq->prod_umem->chunk_list)->next, + typeof(*chunk), list); + sg = &chunk->page_list[0]; +#else + sg = srq->prod_umem->sg_head.sgl; +#endif + srq->hw_srq.phy_prod_pair_addr = sg_dma_address(sg); + + QL_DPRINT12(ha, "exit\n"); + return 0; +} + + +static int +qlnxr_alloc_srq_kernel_params(struct qlnxr_srq *srq, + struct qlnxr_dev *dev, + struct ib_srq_init_attr *init_attr) +{ + struct qlnxr_srq_hwq_info *hw_srq = &srq->hw_srq; + dma_addr_t phy_prod_pair_addr; + u32 num_elems, max_wr; + void *va; + int rc; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + va = qlnx_dma_alloc_coherent(&dev->cdev, + &phy_prod_pair_addr, + sizeof(struct rdma_srq_producers)); + if (!va) { + QL_DPRINT11(ha, "qlnx_dma_alloc_coherent failed for produceer\n"); + return -ENOMEM; + } + + hw_srq->phy_prod_pair_addr = phy_prod_pair_addr; + hw_srq->virt_prod_pair_addr = va; + + max_wr = init_attr->attr.max_wr; + + num_elems = max_wr * RDMA_MAX_SRQ_WQE_SIZE; + + rc = ecore_chain_alloc(dev->cdev, + ECORE_CHAIN_USE_TO_CONSUME_PRODUCE, + ECORE_CHAIN_MODE_PBL, + ECORE_CHAIN_CNT_TYPE_U32, + num_elems, + ECORE_RDMA_SRQ_WQE_ELEM_SIZE, + &hw_srq->pbl, NULL); + + if (rc) { + QL_DPRINT11(ha, "ecore_chain_alloc failed [%d]\n", rc); + goto err0; + } + + hw_srq->max_wr = max_wr; + hw_srq->num_elems = num_elems; + hw_srq->max_sges = RDMA_MAX_SGE_PER_SRQ; + + QL_DPRINT12(ha, "exit\n"); + return 0; + +err0: + qlnx_dma_free_coherent(&dev->cdev, va, phy_prod_pair_addr, + sizeof(struct rdma_srq_producers)); + + QL_DPRINT12(ha, "exit [%d]\n", rc); + return rc; +} + +static inline void +qlnxr_init_common_qp_in_params(struct qlnxr_dev *dev, + struct qlnxr_pd *pd, + struct qlnxr_qp *qp, + struct ib_qp_init_attr *attrs, + bool fmr_and_reserved_lkey, + struct ecore_rdma_create_qp_in_params *params) +{ + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + /* QP handle to be written in an async event */ + params->qp_handle_async_lo = lower_32_bits((uintptr_t)qp); + params->qp_handle_async_hi = upper_32_bits((uintptr_t)qp); + + params->signal_all = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR); + params->fmr_and_reserved_lkey = fmr_and_reserved_lkey; + params->pd = pd->pd_id; + params->dpi = pd->uctx ? pd->uctx->dpi : dev->dpi; + params->sq_cq_id = get_qlnxr_cq(attrs->send_cq)->icid; + params->stats_queue = 0; + + params->rq_cq_id = get_qlnxr_cq(attrs->recv_cq)->icid; + + if (qp->srq) { + /* QP is associated with SRQ instead of RQ */ + params->srq_id = qp->srq->srq_id; + params->use_srq = true; + QL_DPRINT11(ha, "exit srq_id = 0x%x use_srq = 0x%x\n", + params->srq_id, params->use_srq); + return; + } + + params->srq_id = 0; + params->use_srq = false; + + QL_DPRINT12(ha, "exit\n"); + return; +} + + +static inline void +qlnxr_qp_user_print( struct qlnxr_dev *dev, + struct qlnxr_qp *qp) +{ + QL_DPRINT12((dev->ha), "qp=%p. sq_addr=0x%llx, sq_len=%zd, " + "rq_addr=0x%llx, rq_len=%zd\n", + qp, qp->usq.buf_addr, qp->usq.buf_len, qp->urq.buf_addr, + qp->urq.buf_len); + return; +} + +static int +qlnxr_idr_add(struct qlnxr_dev *dev, void *ptr, u32 id) +{ + u32 newid; + int rc; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + if (!QLNX_IS_IWARP(dev)) + return 0; + + do { + if (!idr_pre_get(&dev->qpidr, GFP_KERNEL)) { + QL_DPRINT11(ha, "idr_pre_get failed\n"); + return -ENOMEM; + } + + mtx_lock(&dev->idr_lock); + + rc = idr_get_new_above(&dev->qpidr, ptr, id, &newid); + + mtx_unlock(&dev->idr_lock); + + } while (rc == -EAGAIN); + + QL_DPRINT12(ha, "exit [%d]\n", rc); + + return rc; +} + +static void +qlnxr_idr_remove(struct qlnxr_dev *dev, u32 id) +{ + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + if (!QLNX_IS_IWARP(dev)) + return; + + mtx_lock(&dev->idr_lock); + idr_remove(&dev->qpidr, id); + mtx_unlock(&dev->idr_lock); + + QL_DPRINT12(ha, "exit \n"); + + return; +} + +static inline void +qlnxr_iwarp_populate_user_qp(struct qlnxr_dev *dev, + struct qlnxr_qp *qp, + struct ecore_rdma_create_qp_out_params *out_params) +{ + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + qp->usq.pbl_tbl->va = out_params->sq_pbl_virt; + qp->usq.pbl_tbl->pa = out_params->sq_pbl_phys; + + qlnxr_populate_pbls(dev, qp->usq.umem, qp->usq.pbl_tbl, + &qp->usq.pbl_info); + + if (qp->srq) { + QL_DPRINT11(ha, "qp->srq = %p\n", qp->srq); + return; + } + + qp->urq.pbl_tbl->va = out_params->rq_pbl_virt; + qp->urq.pbl_tbl->pa = out_params->rq_pbl_phys; + + qlnxr_populate_pbls(dev, qp->urq.umem, qp->urq.pbl_tbl, + &qp->urq.pbl_info); + + QL_DPRINT12(ha, "exit\n"); + return; +} + +static int +qlnxr_create_user_qp(struct qlnxr_dev *dev, + struct qlnxr_qp *qp, + struct ib_pd *ibpd, + struct ib_udata *udata, + struct ib_qp_init_attr *attrs) +{ + struct ecore_rdma_destroy_qp_out_params d_out_params; + struct ecore_rdma_create_qp_in_params in_params; + struct ecore_rdma_create_qp_out_params out_params; + struct qlnxr_pd *pd = get_qlnxr_pd(ibpd); + struct ib_ucontext *ib_ctx = NULL; + struct qlnxr_ucontext *ctx = NULL; + struct qlnxr_create_qp_ureq ureq; + int alloc_and_init = QLNX_IS_ROCE(dev); + int rc = -EINVAL; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + ib_ctx = ibpd->uobject->context; + ctx = get_qlnxr_ucontext(ib_ctx); + + memset(&ureq, 0, sizeof(ureq)); + rc = ib_copy_from_udata(&ureq, udata, sizeof(ureq)); + + if (rc) { + QL_DPRINT11(ha, "ib_copy_from_udata failed [%d]\n", rc); + return rc; + } + + /* SQ - read access only (0), dma sync not required (0) */ + rc = qlnxr_init_user_queue(ib_ctx, dev, &qp->usq, ureq.sq_addr, + ureq.sq_len, 0, 0, + alloc_and_init); + if (rc) { + QL_DPRINT11(ha, "qlnxr_init_user_queue failed [%d]\n", rc); + return rc; + } + + if (!qp->srq) { + /* RQ - read access only (0), dma sync not required (0) */ + rc = qlnxr_init_user_queue(ib_ctx, dev, &qp->urq, ureq.rq_addr, + ureq.rq_len, 0, 0, + alloc_and_init); + + if (rc) { + QL_DPRINT11(ha, "qlnxr_init_user_queue failed [%d]\n", rc); + return rc; + } + } + + memset(&in_params, 0, sizeof(in_params)); + qlnxr_init_common_qp_in_params(dev, pd, qp, attrs, false, &in_params); + in_params.qp_handle_lo = ureq.qp_handle_lo; + in_params.qp_handle_hi = ureq.qp_handle_hi; + in_params.sq_num_pages = qp->usq.pbl_info.num_pbes; + in_params.sq_pbl_ptr = qp->usq.pbl_tbl->pa; + + if (!qp->srq) { + in_params.rq_num_pages = qp->urq.pbl_info.num_pbes; + in_params.rq_pbl_ptr = qp->urq.pbl_tbl->pa; + } + + qp->ecore_qp = ecore_rdma_create_qp(dev->rdma_ctx, &in_params, &out_params); + + if (!qp->ecore_qp) { + rc = -ENOMEM; + QL_DPRINT11(ha, "ecore_rdma_create_qp failed\n"); + goto err1; + } + + if (QLNX_IS_IWARP(dev)) + qlnxr_iwarp_populate_user_qp(dev, qp, &out_params); + + qp->qp_id = out_params.qp_id; + qp->icid = out_params.icid; + + rc = qlnxr_copy_qp_uresp(dev, qp, udata); + + if (rc) { + QL_DPRINT11(ha, "qlnxr_copy_qp_uresp failed\n"); + goto err; + } + + qlnxr_qp_user_print(dev, qp); + + QL_DPRINT12(ha, "exit\n"); + return 0; +err: + rc = ecore_rdma_destroy_qp(dev->rdma_ctx, qp->ecore_qp, &d_out_params); + + if (rc) + QL_DPRINT12(ha, "fatal fault\n"); + +err1: + qlnxr_cleanup_user(dev, qp); + + QL_DPRINT12(ha, "exit[%d]\n", rc); + return rc; +} + +static void +qlnxr_set_roce_db_info(struct qlnxr_dev *dev, + struct qlnxr_qp *qp) +{ + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter qp = %p qp->srq %p\n", qp, qp->srq); + + qp->sq.db = dev->db_addr + + DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD); + qp->sq.db_data.data.icid = qp->icid + 1; + + if (!qp->srq) { + qp->rq.db = dev->db_addr + + DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD); + qp->rq.db_data.data.icid = qp->icid; + } + + QL_DPRINT12(ha, "exit\n"); + return; +} + +static void +qlnxr_set_iwarp_db_info(struct qlnxr_dev *dev, + struct qlnxr_qp *qp) + +{ + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter qp = %p qp->srq %p\n", qp, qp->srq); + + qp->sq.db = dev->db_addr + + DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD); + qp->sq.db_data.data.icid = qp->icid; + + if (!qp->srq) { + qp->rq.db = dev->db_addr + + DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_IWARP_RQ_PROD); + qp->rq.db_data.data.icid = qp->icid; + + qp->rq.iwarp_db2 = dev->db_addr + + DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_FLAGS); + qp->rq.iwarp_db2_data.data.icid = qp->icid; + qp->rq.iwarp_db2_data.data.value = DQ_TCM_IWARP_POST_RQ_CF_CMD; + } + + QL_DPRINT12(ha, + "qp->sq.db = %p qp->sq.db_data.data.icid =0x%x\n" + "\t\t\tqp->rq.db = %p qp->rq.db_data.data.icid =0x%x\n" + "\t\t\tqp->rq.iwarp_db2 = %p qp->rq.iwarp_db2.data.icid =0x%x" + " qp->rq.iwarp_db2.data.prod_val =0x%x\n", + qp->sq.db, qp->sq.db_data.data.icid, + qp->rq.db, qp->rq.db_data.data.icid, + qp->rq.iwarp_db2, qp->rq.iwarp_db2_data.data.icid, + qp->rq.iwarp_db2_data.data.value); + + QL_DPRINT12(ha, "exit\n"); + return; +} + +static int +qlnxr_roce_create_kernel_qp(struct qlnxr_dev *dev, + struct qlnxr_qp *qp, + struct ecore_rdma_create_qp_in_params *in_params, + u32 n_sq_elems, + u32 n_rq_elems) +{ + struct ecore_rdma_create_qp_out_params out_params; + int rc; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + rc = ecore_chain_alloc( + dev->cdev, + ECORE_CHAIN_USE_TO_PRODUCE, + ECORE_CHAIN_MODE_PBL, + ECORE_CHAIN_CNT_TYPE_U32, + n_sq_elems, + QLNXR_SQE_ELEMENT_SIZE, + &qp->sq.pbl, + NULL); + + if (rc) { + QL_DPRINT11(ha, "ecore_chain_alloc qp->sq.pbl failed[%d]\n", rc); + return rc; + } + + in_params->sq_num_pages = ecore_chain_get_page_cnt(&qp->sq.pbl); + in_params->sq_pbl_ptr = ecore_chain_get_pbl_phys(&qp->sq.pbl); + + if (!qp->srq) { + + rc = ecore_chain_alloc( + dev->cdev, + ECORE_CHAIN_USE_TO_CONSUME_PRODUCE, + ECORE_CHAIN_MODE_PBL, + ECORE_CHAIN_CNT_TYPE_U32, + n_rq_elems, + QLNXR_RQE_ELEMENT_SIZE, + &qp->rq.pbl, + NULL); + + if (rc) { + QL_DPRINT11(ha, + "ecore_chain_alloc qp->rq.pbl failed[%d]\n", rc); + return rc; + } + + in_params->rq_num_pages = ecore_chain_get_page_cnt(&qp->rq.pbl); + in_params->rq_pbl_ptr = ecore_chain_get_pbl_phys(&qp->rq.pbl); + } + + qp->ecore_qp = ecore_rdma_create_qp(dev->rdma_ctx, in_params, &out_params); + + if (!qp->ecore_qp) { + QL_DPRINT11(ha, "qp->ecore_qp == NULL\n"); + return -EINVAL; + } + + qp->qp_id = out_params.qp_id; + qp->icid = out_params.icid; + + qlnxr_set_roce_db_info(dev, qp); + + QL_DPRINT12(ha, "exit\n"); + return 0; +} + +static int +qlnxr_iwarp_create_kernel_qp(struct qlnxr_dev *dev, + struct qlnxr_qp *qp, + struct ecore_rdma_create_qp_in_params *in_params, + u32 n_sq_elems, + u32 n_rq_elems) +{ + struct ecore_rdma_destroy_qp_out_params d_out_params; + struct ecore_rdma_create_qp_out_params out_params; + struct ecore_chain_ext_pbl ext_pbl; + int rc; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + in_params->sq_num_pages = ECORE_CHAIN_PAGE_CNT(n_sq_elems, + QLNXR_SQE_ELEMENT_SIZE, + ECORE_CHAIN_MODE_PBL); + in_params->rq_num_pages = ECORE_CHAIN_PAGE_CNT(n_rq_elems, + QLNXR_RQE_ELEMENT_SIZE, + ECORE_CHAIN_MODE_PBL); + + QL_DPRINT12(ha, "n_sq_elems = 0x%x" + " n_rq_elems = 0x%x in_params\n" + "\t\t\tqp_handle_lo\t\t= 0x%08x\n" + "\t\t\tqp_handle_hi\t\t= 0x%08x\n" + "\t\t\tqp_handle_async_lo\t\t= 0x%08x\n" + "\t\t\tqp_handle_async_hi\t\t= 0x%08x\n" + "\t\t\tuse_srq\t\t\t= 0x%x\n" + "\t\t\tsignal_all\t\t= 0x%x\n" + "\t\t\tfmr_and_reserved_lkey\t= 0x%x\n" + "\t\t\tpd\t\t\t= 0x%x\n" + "\t\t\tdpi\t\t\t= 0x%x\n" + "\t\t\tsq_cq_id\t\t\t= 0x%x\n" + "\t\t\tsq_num_pages\t\t= 0x%x\n" + "\t\t\tsq_pbl_ptr\t\t= %p\n" + "\t\t\tmax_sq_sges\t\t= 0x%x\n" + "\t\t\trq_cq_id\t\t\t= 0x%x\n" + "\t\t\trq_num_pages\t\t= 0x%x\n" + "\t\t\trq_pbl_ptr\t\t= %p\n" + "\t\t\tsrq_id\t\t\t= 0x%x\n" + "\t\t\tstats_queue\t\t= 0x%x\n", + n_sq_elems, n_rq_elems, + in_params->qp_handle_lo, + in_params->qp_handle_hi, + in_params->qp_handle_async_lo, + in_params->qp_handle_async_hi, + in_params->use_srq, + in_params->signal_all, + in_params->fmr_and_reserved_lkey, + in_params->pd, + in_params->dpi, + in_params->sq_cq_id, + in_params->sq_num_pages, + (void *)in_params->sq_pbl_ptr, + in_params->max_sq_sges, + in_params->rq_cq_id, + in_params->rq_num_pages, + (void *)in_params->rq_pbl_ptr, + in_params->srq_id, + in_params->stats_queue ); + + memset(&out_params, 0, sizeof (struct ecore_rdma_create_qp_out_params)); + memset(&ext_pbl, 0, sizeof (struct ecore_chain_ext_pbl)); + + qp->ecore_qp = ecore_rdma_create_qp(dev->rdma_ctx, in_params, &out_params); + + if (!qp->ecore_qp) { + QL_DPRINT11(ha, "ecore_rdma_create_qp failed\n"); + return -EINVAL; + } + + /* Now we allocate the chain */ + ext_pbl.p_pbl_virt = out_params.sq_pbl_virt; + ext_pbl.p_pbl_phys = out_params.sq_pbl_phys; + + QL_DPRINT12(ha, "ext_pbl.p_pbl_virt = %p " + "ext_pbl.p_pbl_phys = %p\n", + ext_pbl.p_pbl_virt, ext_pbl.p_pbl_phys); + + rc = ecore_chain_alloc( + dev->cdev, + ECORE_CHAIN_USE_TO_PRODUCE, + ECORE_CHAIN_MODE_PBL, + ECORE_CHAIN_CNT_TYPE_U32, + n_sq_elems, + QLNXR_SQE_ELEMENT_SIZE, + &qp->sq.pbl, + &ext_pbl); + + if (rc) { + QL_DPRINT11(ha, + "ecore_chain_alloc qp->sq.pbl failed rc = %d\n", rc); + goto err; + } + + ext_pbl.p_pbl_virt = out_params.rq_pbl_virt; + ext_pbl.p_pbl_phys = out_params.rq_pbl_phys; + + QL_DPRINT12(ha, "ext_pbl.p_pbl_virt = %p " + "ext_pbl.p_pbl_phys = %p\n", + ext_pbl.p_pbl_virt, ext_pbl.p_pbl_phys); + + if (!qp->srq) { + + rc = ecore_chain_alloc( + dev->cdev, + ECORE_CHAIN_USE_TO_CONSUME_PRODUCE, + ECORE_CHAIN_MODE_PBL, + ECORE_CHAIN_CNT_TYPE_U32, + n_rq_elems, + QLNXR_RQE_ELEMENT_SIZE, + &qp->rq.pbl, + &ext_pbl); + + if (rc) { + QL_DPRINT11(ha,, "ecore_chain_alloc qp->rq.pbl" + " failed rc = %d\n", rc); + goto err; + } + } + + QL_DPRINT12(ha, "qp_id = 0x%x icid =0x%x\n", + out_params.qp_id, out_params.icid); + + qp->qp_id = out_params.qp_id; + qp->icid = out_params.icid; + + qlnxr_set_iwarp_db_info(dev, qp); + + QL_DPRINT12(ha, "exit\n"); + return 0; + +err: + ecore_rdma_destroy_qp(dev->rdma_ctx, qp->ecore_qp, &d_out_params); + + QL_DPRINT12(ha, "exit rc = %d\n", rc); + return rc; +} + +static int +qlnxr_create_kernel_qp(struct qlnxr_dev *dev, + struct qlnxr_qp *qp, + struct ib_pd *ibpd, + struct ib_qp_init_attr *attrs) +{ + struct ecore_rdma_create_qp_in_params in_params; + struct qlnxr_pd *pd = get_qlnxr_pd(ibpd); + int rc = -EINVAL; + u32 n_rq_elems; + u32 n_sq_elems; + u32 n_sq_entries; + struct ecore_rdma_device *qattr = ecore_rdma_query_device(dev->rdma_ctx); + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + memset(&in_params, 0, sizeof(in_params)); + + /* A single work request may take up to MAX_SQ_WQE_SIZE elements in + * the ring. The ring should allow at least a single WR, even if the + * user requested none, due to allocation issues. + * We should add an extra WR since the prod and cons indices of + * wqe_wr_id are managed in such a way that the WQ is considered full + * when (prod+1)%max_wr==cons. We currently don't do that because we + * double the number of entries due an iSER issue that pushes far more + * WRs than indicated. If we decline its ib_post_send() then we get + * error prints in the dmesg we'd like to avoid. + */ + qp->sq.max_wr = min_t(u32, attrs->cap.max_send_wr * dev->wq_multiplier, + qattr->max_wqe); + + qp->wqe_wr_id = kzalloc(qp->sq.max_wr * sizeof(*qp->wqe_wr_id), + GFP_KERNEL); + if (!qp->wqe_wr_id) { + QL_DPRINT11(ha, "failed SQ shadow memory allocation\n"); + return -ENOMEM; + } + + /* QP handle to be written in CQE */ + in_params.qp_handle_lo = lower_32_bits((uintptr_t)qp); + in_params.qp_handle_hi = upper_32_bits((uintptr_t)qp); + + /* A single work request may take up to MAX_RQ_WQE_SIZE elements in + * the ring. There ring should allow at least a single WR, even if the + * user requested none, due to allocation issues. + */ + qp->rq.max_wr = (u16)max_t(u32, attrs->cap.max_recv_wr, 1); + + /* Allocate driver internal RQ array */ + if (!qp->srq) { + qp->rqe_wr_id = kzalloc(qp->rq.max_wr * sizeof(*qp->rqe_wr_id), + GFP_KERNEL); + if (!qp->rqe_wr_id) { + QL_DPRINT11(ha, "failed RQ shadow memory allocation\n"); + kfree(qp->wqe_wr_id); + return -ENOMEM; + } + } + + //qlnxr_init_common_qp_in_params(dev, pd, qp, attrs, true, &in_params); + + in_params.qp_handle_async_lo = lower_32_bits((uintptr_t)qp); + in_params.qp_handle_async_hi = upper_32_bits((uintptr_t)qp); + + in_params.signal_all = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR); + in_params.fmr_and_reserved_lkey = true; + in_params.pd = pd->pd_id; + in_params.dpi = pd->uctx ? pd->uctx->dpi : dev->dpi; + in_params.sq_cq_id = get_qlnxr_cq(attrs->send_cq)->icid; + in_params.stats_queue = 0; + + in_params.rq_cq_id = get_qlnxr_cq(attrs->recv_cq)->icid; + + if (qp->srq) { + /* QP is associated with SRQ instead of RQ */ + in_params.srq_id = qp->srq->srq_id; + in_params.use_srq = true; + QL_DPRINT11(ha, "exit srq_id = 0x%x use_srq = 0x%x\n", + in_params.srq_id, in_params.use_srq); + } else { + in_params.srq_id = 0; + in_params.use_srq = false; + } + + n_sq_entries = attrs->cap.max_send_wr; + n_sq_entries = min_t(u32, n_sq_entries, qattr->max_wqe); + n_sq_entries = max_t(u32, n_sq_entries, 1); + n_sq_elems = n_sq_entries * QLNXR_MAX_SQE_ELEMENTS_PER_SQE; + + n_rq_elems = qp->rq.max_wr * QLNXR_MAX_RQE_ELEMENTS_PER_RQE; + + if (QLNX_IS_ROCE(dev)) { + rc = qlnxr_roce_create_kernel_qp(dev, qp, &in_params, + n_sq_elems, n_rq_elems); + } else { + rc = qlnxr_iwarp_create_kernel_qp(dev, qp, &in_params, + n_sq_elems, n_rq_elems); + } + + if (rc) + qlnxr_cleanup_kernel(dev, qp); + + QL_DPRINT12(ha, "exit [%d]\n", rc); + return rc; +} + +struct ib_qp * +qlnxr_create_qp(struct ib_pd *ibpd, + struct ib_qp_init_attr *attrs, + struct ib_udata *udata) +{ + struct qlnxr_dev *dev = get_qlnxr_dev(ibpd->device); + struct qlnxr_pd *pd = get_qlnxr_pd(ibpd); + struct qlnxr_qp *qp; + int rc = 0; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + rc = qlnxr_check_qp_attrs(ibpd, dev, attrs, udata); + if (rc) { + QL_DPRINT11(ha, "qlnxr_check_qp_attrs failed [%d]\n", rc); + return ERR_PTR(rc); + } + + QL_DPRINT12(ha, "called from %s, event_handle=%p," + " eepd=%p sq_cq=%p, sq_icid=%d, rq_cq=%p, rq_icid=%d\n", + (udata ? "user library" : "kernel"), + attrs->event_handler, pd, + get_qlnxr_cq(attrs->send_cq), + get_qlnxr_cq(attrs->send_cq)->icid, + get_qlnxr_cq(attrs->recv_cq), + get_qlnxr_cq(attrs->recv_cq)->icid); + + qp = qlnx_zalloc(sizeof(struct qlnxr_qp)); + + if (!qp) { + QL_DPRINT11(ha, "kzalloc(qp) failed\n"); + return ERR_PTR(-ENOMEM); + } + + qlnxr_set_common_qp_params(dev, qp, pd, attrs); + + if (attrs->qp_type == IB_QPT_GSI) { + QL_DPRINT11(ha, "calling qlnxr_create_gsi_qp\n"); + return qlnxr_create_gsi_qp(dev, attrs, qp); + } + + if (udata) { + rc = qlnxr_create_user_qp(dev, qp, ibpd, udata, attrs); + + if (rc) { + QL_DPRINT11(ha, "qlnxr_create_user_qp failed\n"); + goto err; + } + } else { + rc = qlnxr_create_kernel_qp(dev, qp, ibpd, attrs); + + if (rc) { + QL_DPRINT11(ha, "qlnxr_create_kernel_qp failed\n"); + goto err; + } + } + + qp->ibqp.qp_num = qp->qp_id; + + rc = qlnxr_idr_add(dev, qp, qp->qp_id); + + if (rc) { + QL_DPRINT11(ha, "qlnxr_idr_add failed\n"); + goto err; + } + + QL_DPRINT12(ha, "exit [%p]\n", &qp->ibqp); + + return &qp->ibqp; +err: + kfree(qp); + + QL_DPRINT12(ha, "failed exit\n"); + return ERR_PTR(-EFAULT); +} + + +static enum ib_qp_state +qlnxr_get_ibqp_state(enum ecore_roce_qp_state qp_state) +{ + enum ib_qp_state state = IB_QPS_ERR; + + switch (qp_state) { + case ECORE_ROCE_QP_STATE_RESET: + state = IB_QPS_RESET; + break; + + case ECORE_ROCE_QP_STATE_INIT: + state = IB_QPS_INIT; + break; + + case ECORE_ROCE_QP_STATE_RTR: + state = IB_QPS_RTR; + break; + + case ECORE_ROCE_QP_STATE_RTS: + state = IB_QPS_RTS; + break; + + case ECORE_ROCE_QP_STATE_SQD: + state = IB_QPS_SQD; + break; + + case ECORE_ROCE_QP_STATE_ERR: + state = IB_QPS_ERR; + break; + + case ECORE_ROCE_QP_STATE_SQE: + state = IB_QPS_SQE; + break; + } + return state; +} + +static enum ecore_roce_qp_state +qlnxr_get_state_from_ibqp( enum ib_qp_state qp_state) +{ + enum ecore_roce_qp_state ecore_qp_state; + + ecore_qp_state = ECORE_ROCE_QP_STATE_ERR; + + switch (qp_state) { + case IB_QPS_RESET: + ecore_qp_state = ECORE_ROCE_QP_STATE_RESET; + break; + + case IB_QPS_INIT: + ecore_qp_state = ECORE_ROCE_QP_STATE_INIT; + break; + + case IB_QPS_RTR: + ecore_qp_state = ECORE_ROCE_QP_STATE_RTR; + break; + + case IB_QPS_RTS: + ecore_qp_state = ECORE_ROCE_QP_STATE_RTS; + break; + + case IB_QPS_SQD: + ecore_qp_state = ECORE_ROCE_QP_STATE_SQD; + break; + + case IB_QPS_ERR: + ecore_qp_state = ECORE_ROCE_QP_STATE_ERR; + break; + + default: + ecore_qp_state = ECORE_ROCE_QP_STATE_ERR; + break; + } + + return (ecore_qp_state); +} + +static void +qlnxr_reset_qp_hwq_info(struct qlnxr_qp_hwq_info *qph) +{ + ecore_chain_reset(&qph->pbl); + qph->prod = qph->cons = 0; + qph->wqe_cons = 0; + qph->db_data.data.value = cpu_to_le16(0); + + return; +} + +static int +qlnxr_update_qp_state(struct qlnxr_dev *dev, + struct qlnxr_qp *qp, + enum ecore_roce_qp_state new_state) +{ + int status = 0; + uint32_t reg_addr; + struct ecore_dev *cdev; + qlnx_host_t *ha; + + ha = dev->ha; + cdev = &ha->cdev; + + QL_DPRINT12(ha, "enter qp = %p new_state = 0x%x qp->state = 0x%x\n", + qp, new_state, qp->state); + + if (new_state == qp->state) { + return 0; + } + + switch (qp->state) { + case ECORE_ROCE_QP_STATE_RESET: + switch (new_state) { + case ECORE_ROCE_QP_STATE_INIT: + qp->prev_wqe_size = 0; + qlnxr_reset_qp_hwq_info(&qp->sq); + if (!(qp->srq)) + qlnxr_reset_qp_hwq_info(&qp->rq); + break; + default: + status = -EINVAL; + break; + }; + break; + case ECORE_ROCE_QP_STATE_INIT: + /* INIT->XXX */ + switch (new_state) { + case ECORE_ROCE_QP_STATE_RTR: + /* Update doorbell (in case post_recv was done before move to RTR) */ + if (qp->srq) + break; + wmb(); + //writel(qp->rq.db_data.raw, qp->rq.db); + //if (QLNX_IS_IWARP(dev)) + // writel(qp->rq.iwarp_db2_data.raw, + // qp->rq.iwarp_db2); + + reg_addr = (uint32_t)((uint8_t *)qp->rq.db - + (uint8_t *)cdev->doorbells); + + bus_write_4(ha->pci_dbells, reg_addr, qp->rq.db_data.raw); + bus_barrier(ha->pci_dbells, 0, 0, BUS_SPACE_BARRIER_READ); + + if (QLNX_IS_IWARP(dev)) { + reg_addr = (uint32_t)((uint8_t *)qp->rq.iwarp_db2 - + (uint8_t *)cdev->doorbells); + bus_write_4(ha->pci_dbells, reg_addr,\ + qp->rq.iwarp_db2_data.raw); + bus_barrier(ha->pci_dbells, 0, 0,\ + BUS_SPACE_BARRIER_READ); + } + + + mmiowb(); + break; + case ECORE_ROCE_QP_STATE_ERR: + /* TBD:flush qps... */ + break; + default: + /* invalid state change. */ + status = -EINVAL; + break; + }; + break; + case ECORE_ROCE_QP_STATE_RTR: + /* RTR->XXX */ + switch (new_state) { + case ECORE_ROCE_QP_STATE_RTS: + break; + case ECORE_ROCE_QP_STATE_ERR: + break; + default: + /* invalid state change. */ + status = -EINVAL; + break; + }; + break; + case ECORE_ROCE_QP_STATE_RTS: + /* RTS->XXX */ + switch (new_state) { + case ECORE_ROCE_QP_STATE_SQD: + break; + case ECORE_ROCE_QP_STATE_ERR: + break; + default: + /* invalid state change. */ + status = -EINVAL; + break; + }; + break; + case ECORE_ROCE_QP_STATE_SQD: + /* SQD->XXX */ + switch (new_state) { + case ECORE_ROCE_QP_STATE_RTS: + case ECORE_ROCE_QP_STATE_ERR: + break; + default: + /* invalid state change. */ + status = -EINVAL; + break; + }; + break; + case ECORE_ROCE_QP_STATE_ERR: + /* ERR->XXX */ + switch (new_state) { + case ECORE_ROCE_QP_STATE_RESET: + if ((qp->rq.prod != qp->rq.cons) || + (qp->sq.prod != qp->sq.cons)) { + QL_DPRINT11(ha, + "Error->Reset with rq/sq " + "not empty rq.prod=0x%x rq.cons=0x%x" + " sq.prod=0x%x sq.cons=0x%x\n", + qp->rq.prod, qp->rq.cons, + qp->sq.prod, qp->sq.cons); + status = -EINVAL; + } + break; + default: + status = -EINVAL; + break; + }; + break; + default: + status = -EINVAL; + break; + }; + + QL_DPRINT12(ha, "exit\n"); + return status; +} + +int +qlnxr_modify_qp(struct ib_qp *ibqp, + struct ib_qp_attr *attr, + int attr_mask, + struct ib_udata *udata) +{ + int rc = 0; + struct qlnxr_qp *qp = get_qlnxr_qp(ibqp); + struct qlnxr_dev *dev = get_qlnxr_dev(&qp->dev->ibdev); + struct ecore_rdma_modify_qp_in_params qp_params = { 0 }; + enum ib_qp_state old_qp_state, new_qp_state; + struct ecore_rdma_device *qattr = ecore_rdma_query_device(dev->rdma_ctx); + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, + "enter qp = %p attr_mask = 0x%x, state = %d udata = %p\n", + qp, attr_mask, attr->qp_state, udata); + + old_qp_state = qlnxr_get_ibqp_state(qp->state); + if (attr_mask & IB_QP_STATE) + new_qp_state = attr->qp_state; + else + new_qp_state = old_qp_state; + + if (QLNX_IS_ROCE(dev)) { +#if __FreeBSD_version >= 1100000 + if (!ib_modify_qp_is_ok(old_qp_state, + new_qp_state, + ibqp->qp_type, + attr_mask, + IB_LINK_LAYER_ETHERNET)) { + QL_DPRINT12(ha, + "invalid attribute mask=0x%x" + " specified for qpn=0x%x of type=0x%x \n" + " old_qp_state=0x%x, new_qp_state=0x%x\n", + attr_mask, qp->qp_id, ibqp->qp_type, + old_qp_state, new_qp_state); + rc = -EINVAL; + goto err; + } +#else + if (!ib_modify_qp_is_ok(old_qp_state, + new_qp_state, + ibqp->qp_type, + attr_mask )) { + QL_DPRINT12(ha, + "invalid attribute mask=0x%x" + " specified for qpn=0x%x of type=0x%x \n" + " old_qp_state=0x%x, new_qp_state=0x%x\n", + attr_mask, qp->qp_id, ibqp->qp_type, + old_qp_state, new_qp_state); + rc = -EINVAL; + goto err; + } + +#endif /* #if __FreeBSD_version >= 1100000 */ + } + /* translate the masks... */ + if (attr_mask & IB_QP_STATE) { + SET_FIELD(qp_params.modify_flags, + ECORE_RDMA_MODIFY_QP_VALID_NEW_STATE, 1); + qp_params.new_state = qlnxr_get_state_from_ibqp(attr->qp_state); + } + + // TBD consider changing ecore to be a flag as well... + if (attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY) + qp_params.sqd_async = true; + + if (attr_mask & IB_QP_PKEY_INDEX) { + SET_FIELD(qp_params.modify_flags, + ECORE_ROCE_MODIFY_QP_VALID_PKEY, + 1); + if (attr->pkey_index >= QLNXR_ROCE_PKEY_TABLE_LEN) { + rc = -EINVAL; + goto err; + } + + qp_params.pkey = QLNXR_ROCE_PKEY_DEFAULT; + } + + if (attr_mask & IB_QP_QKEY) { + qp->qkey = attr->qkey; + } + + /* tbd consider splitting in ecore.. */ + if (attr_mask & IB_QP_ACCESS_FLAGS) { + SET_FIELD(qp_params.modify_flags, + ECORE_RDMA_MODIFY_QP_VALID_RDMA_OPS_EN, 1); + qp_params.incoming_rdma_read_en = + attr->qp_access_flags & IB_ACCESS_REMOTE_READ; + qp_params.incoming_rdma_write_en = + attr->qp_access_flags & IB_ACCESS_REMOTE_WRITE; + qp_params.incoming_atomic_en = + attr->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC; + } + + if (attr_mask & (IB_QP_AV | IB_QP_PATH_MTU)) { + if (attr_mask & IB_QP_PATH_MTU) { + if (attr->path_mtu < IB_MTU_256 || + attr->path_mtu > IB_MTU_4096) { + + QL_DPRINT12(ha, + "Only MTU sizes of 256, 512, 1024," + " 2048 and 4096 are supported " + " attr->path_mtu = [%d]\n", + attr->path_mtu); + + rc = -EINVAL; + goto err; + } + qp->mtu = min(ib_mtu_enum_to_int(attr->path_mtu), + ib_mtu_enum_to_int( + iboe_get_mtu(dev->ha->ifp->if_mtu))); + } + + if (qp->mtu == 0) { + qp->mtu = ib_mtu_enum_to_int( + iboe_get_mtu(dev->ha->ifp->if_mtu)); + QL_DPRINT12(ha, "fixing zetoed MTU to qp->mtu = %d\n", + qp->mtu); + } + + SET_FIELD(qp_params.modify_flags, + ECORE_ROCE_MODIFY_QP_VALID_ADDRESS_VECTOR, + 1); + + qp_params.traffic_class_tos = attr->ah_attr.grh.traffic_class; + qp_params.flow_label = attr->ah_attr.grh.flow_label; + qp_params.hop_limit_ttl = attr->ah_attr.grh.hop_limit; + + qp->sgid_idx = attr->ah_attr.grh.sgid_index; + + get_gid_info(ibqp, attr, attr_mask, dev, qp, &qp_params); + + rc = qlnxr_get_dmac(dev, &attr->ah_attr, qp_params.remote_mac_addr); + if (rc) + return rc; + + qp_params.use_local_mac = true; + memcpy(qp_params.local_mac_addr, dev->ha->primary_mac, ETH_ALEN); + + QL_DPRINT12(ha, "dgid=0x%x:0x%x:0x%x:0x%x\n", + qp_params.dgid.dwords[0], qp_params.dgid.dwords[1], + qp_params.dgid.dwords[2], qp_params.dgid.dwords[3]); + QL_DPRINT12(ha, "sgid=0x%x:0x%x:0x%x:0x%x\n", + qp_params.sgid.dwords[0], qp_params.sgid.dwords[1], + qp_params.sgid.dwords[2], qp_params.sgid.dwords[3]); + QL_DPRINT12(ha, + "remote_mac=[0x%x:0x%x:0x%x:0x%x:0x%x:0x%x]\n", + qp_params.remote_mac_addr[0], + qp_params.remote_mac_addr[1], + qp_params.remote_mac_addr[2], + qp_params.remote_mac_addr[3], + qp_params.remote_mac_addr[4], + qp_params.remote_mac_addr[5]); + + qp_params.mtu = qp->mtu; + } + + if (qp_params.mtu == 0) { + /* stay with current MTU */ + if (qp->mtu) { + qp_params.mtu = qp->mtu; + } else { + qp_params.mtu = ib_mtu_enum_to_int( + iboe_get_mtu(dev->ha->ifp->if_mtu)); + } + } + + if (attr_mask & IB_QP_TIMEOUT) { + SET_FIELD(qp_params.modify_flags, \ + ECORE_ROCE_MODIFY_QP_VALID_ACK_TIMEOUT, 1); + + qp_params.ack_timeout = attr->timeout; + if (attr->timeout) { + u32 temp; + + /* 12.7.34 LOCAL ACK TIMEOUT + * Value representing the transport (ACK) timeout for + * use by the remote, expressed as (4.096 μS*2Local ACK + * Timeout) + */ + /* We use 1UL since the temporal value may be overflow + * 32 bits + */ + temp = 4096 * (1UL << attr->timeout) / 1000 / 1000; + qp_params.ack_timeout = temp; /* FW requires [msec] */ + } + else + qp_params.ack_timeout = 0; /* infinite */ + } + if (attr_mask & IB_QP_RETRY_CNT) { + SET_FIELD(qp_params.modify_flags,\ + ECORE_ROCE_MODIFY_QP_VALID_RETRY_CNT, 1); + qp_params.retry_cnt = attr->retry_cnt; + } + + if (attr_mask & IB_QP_RNR_RETRY) { + SET_FIELD(qp_params.modify_flags, + ECORE_ROCE_MODIFY_QP_VALID_RNR_RETRY_CNT, + 1); + qp_params.rnr_retry_cnt = attr->rnr_retry; + } + + if (attr_mask & IB_QP_RQ_PSN) { + SET_FIELD(qp_params.modify_flags, + ECORE_ROCE_MODIFY_QP_VALID_RQ_PSN, + 1); + qp_params.rq_psn = attr->rq_psn; + qp->rq_psn = attr->rq_psn; + } + + if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) { + if (attr->max_rd_atomic > qattr->max_qp_req_rd_atomic_resc) { + rc = -EINVAL; + QL_DPRINT12(ha, + "unsupported max_rd_atomic=%d, supported=%d\n", + attr->max_rd_atomic, + qattr->max_qp_req_rd_atomic_resc); + goto err; + } + + SET_FIELD(qp_params.modify_flags, + ECORE_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_REQ, + 1); + qp_params.max_rd_atomic_req = attr->max_rd_atomic; + } + + if (attr_mask & IB_QP_MIN_RNR_TIMER) { + SET_FIELD(qp_params.modify_flags, + ECORE_ROCE_MODIFY_QP_VALID_MIN_RNR_NAK_TIMER, + 1); + qp_params.min_rnr_nak_timer = attr->min_rnr_timer; + } + + if (attr_mask & IB_QP_SQ_PSN) { + SET_FIELD(qp_params.modify_flags, + ECORE_ROCE_MODIFY_QP_VALID_SQ_PSN, + 1); + qp_params.sq_psn = attr->sq_psn; + qp->sq_psn = attr->sq_psn; + } + + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) { + if (attr->max_dest_rd_atomic > + qattr->max_qp_resp_rd_atomic_resc) { + QL_DPRINT12(ha, + "unsupported max_dest_rd_atomic=%d, " + "supported=%d\n", + attr->max_dest_rd_atomic, + qattr->max_qp_resp_rd_atomic_resc); + + rc = -EINVAL; + goto err; + } + + SET_FIELD(qp_params.modify_flags, + ECORE_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_RESP, + 1); + qp_params.max_rd_atomic_resp = attr->max_dest_rd_atomic; + } + + if (attr_mask & IB_QP_DEST_QPN) { + SET_FIELD(qp_params.modify_flags, + ECORE_ROCE_MODIFY_QP_VALID_DEST_QP, + 1); + + qp_params.dest_qp = attr->dest_qp_num; + qp->dest_qp_num = attr->dest_qp_num; + } + + /* + * Update the QP state before the actual ramrod to prevent a race with + * fast path. Modifying the QP state to error will cause the device to + * flush the CQEs and while polling the flushed CQEs will considered as + * a potential issue if the QP isn't in error state. + */ + if ((attr_mask & IB_QP_STATE) && (qp->qp_type != IB_QPT_GSI) && + (!udata) && (qp_params.new_state == ECORE_ROCE_QP_STATE_ERR)) + qp->state = ECORE_ROCE_QP_STATE_ERR; + + if (qp->qp_type != IB_QPT_GSI) + rc = ecore_rdma_modify_qp(dev->rdma_ctx, qp->ecore_qp, &qp_params); + + if (attr_mask & IB_QP_STATE) { + if ((qp->qp_type != IB_QPT_GSI) && (!udata)) + rc = qlnxr_update_qp_state(dev, qp, qp_params.new_state); + qp->state = qp_params.new_state; + } + +err: + QL_DPRINT12(ha, "exit\n"); + return rc; +} + +static int +qlnxr_to_ib_qp_acc_flags(struct ecore_rdma_query_qp_out_params *params) +{ + int ib_qp_acc_flags = 0; + + if (params->incoming_rdma_write_en) + ib_qp_acc_flags |= IB_ACCESS_REMOTE_WRITE; + if (params->incoming_rdma_read_en) + ib_qp_acc_flags |= IB_ACCESS_REMOTE_READ; + if (params->incoming_atomic_en) + ib_qp_acc_flags |= IB_ACCESS_REMOTE_ATOMIC; + if (true) /* FIXME -> local write ?? */ + ib_qp_acc_flags |= IB_ACCESS_LOCAL_WRITE; + + return ib_qp_acc_flags; +} + +static enum ib_mtu +qlnxr_mtu_int_to_enum(u16 mtu) +{ + enum ib_mtu ib_mtu_size; + + switch (mtu) { + case 256: + ib_mtu_size = IB_MTU_256; + break; + + case 512: + ib_mtu_size = IB_MTU_512; + break; + + case 1024: + ib_mtu_size = IB_MTU_1024; + break; + + case 2048: + ib_mtu_size = IB_MTU_2048; + break; + + case 4096: + ib_mtu_size = IB_MTU_4096; + break; + + default: + ib_mtu_size = IB_MTU_1024; + break; + } + return (ib_mtu_size); +} + +int +qlnxr_query_qp(struct ib_qp *ibqp, + struct ib_qp_attr *qp_attr, + int attr_mask, + struct ib_qp_init_attr *qp_init_attr) +{ + int rc = 0; + struct ecore_rdma_query_qp_out_params params; + struct qlnxr_qp *qp = get_qlnxr_qp(ibqp); + struct qlnxr_dev *dev = qp->dev; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + memset(¶ms, 0, sizeof(params)); + + rc = ecore_rdma_query_qp(dev->rdma_ctx, qp->ecore_qp, ¶ms); + if (rc) + goto err; + + memset(qp_attr, 0, sizeof(*qp_attr)); + memset(qp_init_attr, 0, sizeof(*qp_init_attr)); + + qp_attr->qp_state = qlnxr_get_ibqp_state(params.state); + qp_attr->cur_qp_state = qlnxr_get_ibqp_state(params.state); + + /* In some cases in iWARP qelr will ask for the state only */ + if (QLNX_IS_IWARP(dev) && (attr_mask == IB_QP_STATE)) { + QL_DPRINT11(ha, "only state requested\n"); + return 0; + } + + qp_attr->path_mtu = qlnxr_mtu_int_to_enum(params.mtu); + qp_attr->path_mig_state = IB_MIG_MIGRATED; + qp_attr->rq_psn = params.rq_psn; + qp_attr->sq_psn = params.sq_psn; + qp_attr->dest_qp_num = params.dest_qp; + + qp_attr->qp_access_flags = qlnxr_to_ib_qp_acc_flags(¶ms); + + QL_DPRINT12(ha, "qp_state = 0x%x cur_qp_state = 0x%x " + "path_mtu = %d qp_access_flags = 0x%x\n", + qp_attr->qp_state, qp_attr->cur_qp_state, qp_attr->path_mtu, + qp_attr->qp_access_flags); + + qp_attr->cap.max_send_wr = qp->sq.max_wr; + qp_attr->cap.max_recv_wr = qp->rq.max_wr; + qp_attr->cap.max_send_sge = qp->sq.max_sges; + qp_attr->cap.max_recv_sge = qp->rq.max_sges; + qp_attr->cap.max_inline_data = qp->max_inline_data; + qp_init_attr->cap = qp_attr->cap; + + memcpy(&qp_attr->ah_attr.grh.dgid.raw[0], ¶ms.dgid.bytes[0], + sizeof(qp_attr->ah_attr.grh.dgid.raw)); + + qp_attr->ah_attr.grh.flow_label = params.flow_label; + qp_attr->ah_attr.grh.sgid_index = qp->sgid_idx; + qp_attr->ah_attr.grh.hop_limit = params.hop_limit_ttl; + qp_attr->ah_attr.grh.traffic_class = params.traffic_class_tos; + + qp_attr->ah_attr.ah_flags = IB_AH_GRH; + qp_attr->ah_attr.port_num = 1; /* FIXME -> check this */ + qp_attr->ah_attr.sl = 0;/* FIXME -> check this */ + qp_attr->timeout = params.timeout; + qp_attr->rnr_retry = params.rnr_retry; + qp_attr->retry_cnt = params.retry_cnt; + qp_attr->min_rnr_timer = params.min_rnr_nak_timer; + qp_attr->pkey_index = params.pkey_index; + qp_attr->port_num = 1; /* FIXME -> check this */ + qp_attr->ah_attr.src_path_bits = 0; + qp_attr->ah_attr.static_rate = 0; + qp_attr->alt_pkey_index = 0; + qp_attr->alt_port_num = 0; + qp_attr->alt_timeout = 0; + memset(&qp_attr->alt_ah_attr, 0, sizeof(qp_attr->alt_ah_attr)); + + qp_attr->sq_draining = (params.state == ECORE_ROCE_QP_STATE_SQD) ? 1 : 0; + qp_attr->max_dest_rd_atomic = params.max_dest_rd_atomic; + qp_attr->max_rd_atomic = params.max_rd_atomic; + qp_attr->en_sqd_async_notify = (params.sqd_async)? 1 : 0; + + QL_DPRINT12(ha, "max_inline_data=%d\n", + qp_attr->cap.max_inline_data); + +err: + QL_DPRINT12(ha, "exit\n"); + return rc; +} + + +static void +qlnxr_cleanup_user(struct qlnxr_dev *dev, struct qlnxr_qp *qp) +{ + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + if (qp->usq.umem) + ib_umem_release(qp->usq.umem); + + qp->usq.umem = NULL; + + if (qp->urq.umem) + ib_umem_release(qp->urq.umem); + + qp->urq.umem = NULL; + + QL_DPRINT12(ha, "exit\n"); + return; +} + +static void +qlnxr_cleanup_kernel(struct qlnxr_dev *dev, struct qlnxr_qp *qp) +{ + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + if (qlnxr_qp_has_sq(qp)) { + QL_DPRINT12(ha, "freeing SQ\n"); + ha->qlnxr_debug = 1; +// ecore_chain_free(dev->cdev, &qp->sq.pbl); + ha->qlnxr_debug = 0; + kfree(qp->wqe_wr_id); + } + + if (qlnxr_qp_has_rq(qp)) { + QL_DPRINT12(ha, "freeing RQ\n"); + ha->qlnxr_debug = 1; + // ecore_chain_free(dev->cdev, &qp->rq.pbl); + ha->qlnxr_debug = 0; + kfree(qp->rqe_wr_id); + } + + QL_DPRINT12(ha, "exit\n"); + return; +} + +int +qlnxr_free_qp_resources(struct qlnxr_dev *dev, + struct qlnxr_qp *qp) +{ + int rc = 0; + qlnx_host_t *ha; + struct ecore_rdma_destroy_qp_out_params d_out_params; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + +#if 0 + if (qp->qp_type != IB_QPT_GSI) { + rc = ecore_rdma_destroy_qp(dev->rdma_ctx, qp->ecore_qp, + &d_out_params); + if (rc) + return rc; + } + + if (qp->ibqp.uobject && qp->ibqp.uobject->context) + qlnxr_cleanup_user(dev, qp); + else + qlnxr_cleanup_kernel(dev, qp); +#endif + + if (qp->ibqp.uobject && qp->ibqp.uobject->context) + qlnxr_cleanup_user(dev, qp); + else + qlnxr_cleanup_kernel(dev, qp); + + if (qp->qp_type != IB_QPT_GSI) { + rc = ecore_rdma_destroy_qp(dev->rdma_ctx, qp->ecore_qp, + &d_out_params); + if (rc) + return rc; + } + + QL_DPRINT12(ha, "exit\n"); + return 0; +} + +int +qlnxr_destroy_qp(struct ib_qp *ibqp) +{ + struct qlnxr_qp *qp = get_qlnxr_qp(ibqp); + struct qlnxr_dev *dev = qp->dev; + int rc = 0; + struct ib_qp_attr attr; + int attr_mask = 0; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter qp = %p, qp_type=%d\n", qp, qp->qp_type); + + qp->destroyed = 1; + + if (QLNX_IS_ROCE(dev) && (qp->state != (ECORE_ROCE_QP_STATE_RESET | + ECORE_ROCE_QP_STATE_ERR | + ECORE_ROCE_QP_STATE_INIT))) { + + attr.qp_state = IB_QPS_ERR; + attr_mask |= IB_QP_STATE; + + /* change the QP state to ERROR */ + qlnxr_modify_qp(ibqp, &attr, attr_mask, NULL); + } + + if (qp->qp_type == IB_QPT_GSI) + qlnxr_destroy_gsi_qp(dev); + + qp->sig = ~qp->sig; + + qlnxr_free_qp_resources(dev, qp); + + if (atomic_dec_and_test(&qp->refcnt)) { + /* TODO: only for iWARP? */ + qlnxr_idr_remove(dev, qp->qp_id); + kfree(qp); + } + + QL_DPRINT12(ha, "exit\n"); + return rc; +} + +static inline int +qlnxr_wq_is_full(struct qlnxr_qp_hwq_info *wq) +{ + return (((wq->prod + 1) % wq->max_wr) == wq->cons); +} + +static int +sge_data_len(struct ib_sge *sg_list, int num_sge) +{ + int i, len = 0; + for (i = 0; i < num_sge; i++) + len += sg_list[i].length; + return len; +} + +static void +swap_wqe_data64(u64 *p) +{ + int i; + + for (i = 0; i < QLNXR_SQE_ELEMENT_SIZE / sizeof(u64); i++, p++) + *p = cpu_to_be64(cpu_to_le64(*p)); +} + + +static u32 +qlnxr_prepare_sq_inline_data(struct qlnxr_dev *dev, + struct qlnxr_qp *qp, + u8 *wqe_size, + struct ib_send_wr *wr, + struct ib_send_wr **bad_wr, + u8 *bits, + u8 bit) +{ + int i, seg_siz; + char *seg_prt, *wqe; + u32 data_size = sge_data_len(wr->sg_list, wr->num_sge); + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter[%d]\n", data_size); + + if (data_size > ROCE_REQ_MAX_INLINE_DATA_SIZE) { + QL_DPRINT12(ha, + "Too much inline data in WR:[%d, %d]\n", + data_size, ROCE_REQ_MAX_INLINE_DATA_SIZE); + *bad_wr = wr; + return 0; + } + + if (!data_size) + return data_size; + + /* set the bit */ + *bits |= bit; + + seg_prt = wqe = NULL; + seg_siz = 0; + + /* copy data inline */ + for (i = 0; i < wr->num_sge; i++) { + u32 len = wr->sg_list[i].length; + void *src = (void *)(uintptr_t)wr->sg_list[i].addr; + + while (len > 0) { + u32 cur; + + /* new segment required */ + if (!seg_siz) { + wqe = (char *)ecore_chain_produce(&qp->sq.pbl); + seg_prt = wqe; + seg_siz = sizeof(struct rdma_sq_common_wqe); + (*wqe_size)++; + } + + /* calculate currently allowed length */ + cur = MIN(len, seg_siz); + + memcpy(seg_prt, src, cur); + + /* update segment variables */ + seg_prt += cur; + seg_siz -= cur; + /* update sge variables */ + src += cur; + len -= cur; + + /* swap fully-completed segments */ + if (!seg_siz) + swap_wqe_data64((u64 *)wqe); + } + } + + /* swap last not completed segment */ + if (seg_siz) + swap_wqe_data64((u64 *)wqe); + + QL_DPRINT12(ha, "exit\n"); + return data_size; +} + +static u32 +qlnxr_prepare_sq_sges(struct qlnxr_dev *dev, struct qlnxr_qp *qp, + u8 *wqe_size, struct ib_send_wr *wr) +{ + int i; + u32 data_size = 0; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter wr->num_sge = %d \n", wr->num_sge); + + for (i = 0; i < wr->num_sge; i++) { + struct rdma_sq_sge *sge = ecore_chain_produce(&qp->sq.pbl); + + TYPEPTR_ADDR_SET(sge, addr, wr->sg_list[i].addr); + sge->l_key = cpu_to_le32(wr->sg_list[i].lkey); + sge->length = cpu_to_le32(wr->sg_list[i].length); + data_size += wr->sg_list[i].length; + } + + if (wqe_size) + *wqe_size += wr->num_sge; + + QL_DPRINT12(ha, "exit data_size = %d\n", data_size); + return data_size; +} + +static u32 +qlnxr_prepare_sq_rdma_data(struct qlnxr_dev *dev, + struct qlnxr_qp *qp, + struct rdma_sq_rdma_wqe_1st *rwqe, + struct rdma_sq_rdma_wqe_2nd *rwqe2, + struct ib_send_wr *wr, + struct ib_send_wr **bad_wr) +{ + qlnx_host_t *ha; + u32 ret = 0; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + rwqe2->r_key = cpu_to_le32(rdma_wr(wr)->rkey); + TYPEPTR_ADDR_SET(rwqe2, remote_va, rdma_wr(wr)->remote_addr); + + if (wr->send_flags & IB_SEND_INLINE) { + u8 flags = 0; + SET_FIELD2(flags, RDMA_SQ_RDMA_WQE_1ST_INLINE_FLG, 1); + return qlnxr_prepare_sq_inline_data(dev, qp, &rwqe->wqe_size, + wr, bad_wr, &rwqe->flags, flags); + } + + ret = qlnxr_prepare_sq_sges(dev, qp, &rwqe->wqe_size, wr); + + QL_DPRINT12(ha, "exit ret = 0x%x\n", ret); + + return (ret); +} + +static u32 +qlnxr_prepare_sq_send_data(struct qlnxr_dev *dev, + struct qlnxr_qp *qp, + struct rdma_sq_send_wqe *swqe, + struct rdma_sq_send_wqe *swqe2, + struct ib_send_wr *wr, + struct ib_send_wr **bad_wr) +{ + qlnx_host_t *ha; + u32 ret = 0; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + memset(swqe2, 0, sizeof(*swqe2)); + + if (wr->send_flags & IB_SEND_INLINE) { + u8 flags = 0; + SET_FIELD2(flags, RDMA_SQ_SEND_WQE_INLINE_FLG, 1); + return qlnxr_prepare_sq_inline_data(dev, qp, &swqe->wqe_size, + wr, bad_wr, &swqe->flags, flags); + } + + ret = qlnxr_prepare_sq_sges(dev, qp, &swqe->wqe_size, wr); + + QL_DPRINT12(ha, "exit ret = 0x%x\n", ret); + + return (ret); +} + +static void +qlnx_handle_completed_mrs(struct qlnxr_dev *dev, struct mr_info *info) +{ + qlnx_host_t *ha; + + ha = dev->ha; + + int work = info->completed - info->completed_handled - 1; + + QL_DPRINT12(ha, "enter [%d]\n", work); + + while (work-- > 0 && !list_empty(&info->inuse_pbl_list)) { + struct qlnxr_pbl *pbl; + + /* Free all the page list that are possible to be freed + * (all the ones that were invalidated), under the assumption + * that if an FMR was completed successfully that means that + * if there was an invalidate operation before it also ended + */ + pbl = list_first_entry(&info->inuse_pbl_list, + struct qlnxr_pbl, + list_entry); + list_del(&pbl->list_entry); + list_add_tail(&pbl->list_entry, &info->free_pbl_list); + info->completed_handled++; + } + + QL_DPRINT12(ha, "exit\n"); + return; +} + +#if __FreeBSD_version >= 1102000 + +static int qlnxr_prepare_reg(struct qlnxr_qp *qp, + struct rdma_sq_fmr_wqe_1st *fwqe1, + struct ib_reg_wr *wr) +{ + struct qlnxr_mr *mr = get_qlnxr_mr(wr->mr); + struct rdma_sq_fmr_wqe_2nd *fwqe2; + + fwqe2 = (struct rdma_sq_fmr_wqe_2nd *)ecore_chain_produce(&qp->sq.pbl); + fwqe1->addr.hi = upper_32_bits(mr->ibmr.iova); + fwqe1->addr.lo = lower_32_bits(mr->ibmr.iova); + fwqe1->l_key = wr->key; + + fwqe2->access_ctrl = 0; + + SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_REMOTE_READ, + !!(wr->access & IB_ACCESS_REMOTE_READ)); + SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_REMOTE_WRITE, + !!(wr->access & IB_ACCESS_REMOTE_WRITE)); + SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_ENABLE_ATOMIC, + !!(wr->access & IB_ACCESS_REMOTE_ATOMIC)); + SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_LOCAL_READ, 1); + SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_LOCAL_WRITE, + !!(wr->access & IB_ACCESS_LOCAL_WRITE)); + fwqe2->fmr_ctrl = 0; + + SET_FIELD2(fwqe2->fmr_ctrl, RDMA_SQ_FMR_WQE_2ND_PAGE_SIZE_LOG, + ilog2(mr->ibmr.page_size) - 12); + + fwqe2->length_hi = 0; /* TODO - figure out why length is only 32bit.. */ + fwqe2->length_lo = mr->ibmr.length; + fwqe2->pbl_addr.hi = upper_32_bits(mr->info.pbl_table->pa); + fwqe2->pbl_addr.lo = lower_32_bits(mr->info.pbl_table->pa); + + qp->wqe_wr_id[qp->sq.prod].mr = mr; + + return 0; +} + +#else + +static void +build_frmr_pbes(struct qlnxr_dev *dev, struct ib_send_wr *wr, + struct mr_info *info) +{ + int i; + u64 buf_addr = 0; + int num_pbes, total_num_pbes = 0; + struct regpair *pbe; + struct qlnxr_pbl *pbl_tbl = info->pbl_table; + struct qlnxr_pbl_info *pbl_info = &info->pbl_info; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + pbe = (struct regpair *)pbl_tbl->va; + num_pbes = 0; + + for (i = 0; i < wr->wr.fast_reg.page_list_len; i++) { + buf_addr = wr->wr.fast_reg.page_list->page_list[i]; + pbe->lo = cpu_to_le32((u32)buf_addr); + pbe->hi = cpu_to_le32((u32)upper_32_bits(buf_addr)); + + num_pbes += 1; + pbe++; + total_num_pbes++; + + if (total_num_pbes == pbl_info->num_pbes) + return; + + /* if the given pbl is full storing the pbes, + * move to next pbl. + */ + if (num_pbes == + (pbl_info->pbl_size / sizeof(u64))) { + pbl_tbl++; + pbe = (struct regpair *)pbl_tbl->va; + num_pbes = 0; + } + } + QL_DPRINT12(ha, "exit\n"); + + return; +} + +static int +qlnxr_prepare_safe_pbl(struct qlnxr_dev *dev, struct mr_info *info) +{ + int rc = 0; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + if (info->completed == 0) { + //DP_VERBOSE(dev, QLNXR_MSG_MR, "First FMR\n"); + /* first fmr */ + return 0; + } + + qlnx_handle_completed_mrs(dev, info); + + list_add_tail(&info->pbl_table->list_entry, &info->inuse_pbl_list); + + if (list_empty(&info->free_pbl_list)) { + info->pbl_table = qlnxr_alloc_pbl_tbl(dev, &info->pbl_info, + GFP_ATOMIC); + } else { + info->pbl_table = list_first_entry(&info->free_pbl_list, + struct qlnxr_pbl, + list_entry); + list_del(&info->pbl_table->list_entry); + } + + if (!info->pbl_table) + rc = -ENOMEM; + + QL_DPRINT12(ha, "exit\n"); + return rc; +} + +static inline int +qlnxr_prepare_fmr(struct qlnxr_qp *qp, + struct rdma_sq_fmr_wqe_1st *fwqe1, + struct ib_send_wr *wr) +{ + struct qlnxr_dev *dev = qp->dev; + u64 fbo; + struct qlnxr_fast_reg_page_list *frmr_list = + get_qlnxr_frmr_list(wr->wr.fast_reg.page_list); + struct rdma_sq_fmr_wqe *fwqe2 = + (struct rdma_sq_fmr_wqe *)ecore_chain_produce(&qp->sq.pbl); + int rc = 0; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + if (wr->wr.fast_reg.page_list_len == 0) + BUG(); + + rc = qlnxr_prepare_safe_pbl(dev, &frmr_list->info); + if (rc) + return rc; + + fwqe1->addr.hi = upper_32_bits(wr->wr.fast_reg.iova_start); + fwqe1->addr.lo = lower_32_bits(wr->wr.fast_reg.iova_start); + fwqe1->l_key = wr->wr.fast_reg.rkey; + + SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_REMOTE_READ, + !!(wr->wr.fast_reg.access_flags & IB_ACCESS_REMOTE_READ)); + SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_REMOTE_WRITE, + !!(wr->wr.fast_reg.access_flags & IB_ACCESS_REMOTE_WRITE)); + SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_ENABLE_ATOMIC, + !!(wr->wr.fast_reg.access_flags & IB_ACCESS_REMOTE_ATOMIC)); + SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_LOCAL_READ, 1); + SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_LOCAL_WRITE, + !!(wr->wr.fast_reg.access_flags & IB_ACCESS_LOCAL_WRITE)); + + fwqe2->fmr_ctrl = 0; + + SET_FIELD2(fwqe2->fmr_ctrl, RDMA_SQ_FMR_WQE_2ND_PAGE_SIZE_LOG, + ilog2(1 << wr->wr.fast_reg.page_shift) - 12); + SET_FIELD2(fwqe2->fmr_ctrl, RDMA_SQ_FMR_WQE_2ND_ZERO_BASED, 0); + + fwqe2->length_hi = 0; /* Todo - figure this out... why length is only 32bit.. */ + fwqe2->length_lo = wr->wr.fast_reg.length; + fwqe2->pbl_addr.hi = upper_32_bits(frmr_list->info.pbl_table->pa); + fwqe2->pbl_addr.lo = lower_32_bits(frmr_list->info.pbl_table->pa); + + /* produce another wqe for fwqe3 */ + ecore_chain_produce(&qp->sq.pbl); + + fbo = wr->wr.fast_reg.iova_start - + (wr->wr.fast_reg.page_list->page_list[0] & PAGE_MASK); + + QL_DPRINT12(ha, "wr.fast_reg.iova_start = %p rkey=%x addr=%x:%x" + " length = %x pbl_addr %x:%x\n", + wr->wr.fast_reg.iova_start, wr->wr.fast_reg.rkey, + fwqe1->addr.hi, fwqe1->addr.lo, fwqe2->length_lo, + fwqe2->pbl_addr.hi, fwqe2->pbl_addr.lo); + + build_frmr_pbes(dev, wr, &frmr_list->info); + + qp->wqe_wr_id[qp->sq.prod].frmr = frmr_list; + + QL_DPRINT12(ha, "exit\n"); + return 0; +} + +#endif /* #if __FreeBSD_version >= 1102000 */ + +static enum ib_wc_opcode +qlnxr_ib_to_wc_opcode(enum ib_wr_opcode opcode) +{ + switch (opcode) { + case IB_WR_RDMA_WRITE: + case IB_WR_RDMA_WRITE_WITH_IMM: + return IB_WC_RDMA_WRITE; + case IB_WR_SEND_WITH_IMM: + case IB_WR_SEND: + case IB_WR_SEND_WITH_INV: + return IB_WC_SEND; + case IB_WR_RDMA_READ: + return IB_WC_RDMA_READ; + case IB_WR_ATOMIC_CMP_AND_SWP: + return IB_WC_COMP_SWAP; + case IB_WR_ATOMIC_FETCH_AND_ADD: + return IB_WC_FETCH_ADD; + +#if __FreeBSD_version >= 1102000 + case IB_WR_REG_MR: + return IB_WC_REG_MR; +#else + case IB_WR_FAST_REG_MR: + return IB_WC_FAST_REG_MR; +#endif /* #if __FreeBSD_version >= 1102000 */ + + case IB_WR_LOCAL_INV: + return IB_WC_LOCAL_INV; + default: + return IB_WC_SEND; + } +} +static inline bool +qlnxr_can_post_send(struct qlnxr_qp *qp, struct ib_send_wr *wr) +{ + int wq_is_full, err_wr, pbl_is_full; + struct qlnxr_dev *dev = qp->dev; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter[qp, wr] = [%p,%p]\n", qp, wr); + + /* prevent SQ overflow and/or processing of a bad WR */ + err_wr = wr->num_sge > qp->sq.max_sges; + wq_is_full = qlnxr_wq_is_full(&qp->sq); + pbl_is_full = ecore_chain_get_elem_left_u32(&qp->sq.pbl) < + QLNXR_MAX_SQE_ELEMENTS_PER_SQE; + if (wq_is_full || err_wr || pbl_is_full) { + if (wq_is_full && + !(qp->err_bitmap & QLNXR_QP_ERR_SQ_FULL)) { + + qp->err_bitmap |= QLNXR_QP_ERR_SQ_FULL; + + QL_DPRINT12(ha, + "error: WQ is full. Post send on QP failed" + " (this error appears only once) " + "[qp, wr, qp->err_bitmap]=[%p, %p, 0x%x]\n", + qp, wr, qp->err_bitmap); + } + + if (err_wr && + !(qp->err_bitmap & QLNXR_QP_ERR_BAD_SR)) { + + qp->err_bitmap |= QLNXR_QP_ERR_BAD_SR; + + QL_DPRINT12(ha, + "error: WQ is bad. Post send on QP failed" + " (this error appears only once) " + "[qp, wr, qp->err_bitmap]=[%p, %p, 0x%x]\n", + qp, wr, qp->err_bitmap); + } + + if (pbl_is_full && + !(qp->err_bitmap & QLNXR_QP_ERR_SQ_PBL_FULL)) { + + qp->err_bitmap |= QLNXR_QP_ERR_SQ_PBL_FULL; + + QL_DPRINT12(ha, + "error: WQ PBL is full. Post send on QP failed" + " (this error appears only once) " + "[qp, wr, qp->err_bitmap]=[%p, %p, 0x%x]\n", + qp, wr, qp->err_bitmap); + } + return false; + } + QL_DPRINT12(ha, "exit[qp, wr] = [%p,%p]\n", qp, wr); + return true; +} + +int +qlnxr_post_send(struct ib_qp *ibqp, + struct ib_send_wr *wr, + struct ib_send_wr **bad_wr) +{ + struct qlnxr_dev *dev = get_qlnxr_dev(ibqp->device); + struct qlnxr_qp *qp = get_qlnxr_qp(ibqp); + unsigned long flags; + int status = 0, rc = 0; + bool comp; + qlnx_host_t *ha; + uint32_t reg_addr; + + *bad_wr = NULL; + ha = dev->ha; + + QL_DPRINT12(ha, "exit[ibqp, wr, bad_wr] = [%p, %p, %p]\n", + ibqp, wr, bad_wr); + + if (!(ha->ifp->if_drv_flags & IFF_DRV_RUNNING)) + return -EINVAL; + + if (qp->qp_type == IB_QPT_GSI) + return qlnxr_gsi_post_send(ibqp, wr, bad_wr); + + spin_lock_irqsave(&qp->q_lock, flags); + + if (QLNX_IS_ROCE(dev) && (qp->state != ECORE_ROCE_QP_STATE_RTS) && + (qp->state != ECORE_ROCE_QP_STATE_ERR) && + (qp->state != ECORE_ROCE_QP_STATE_SQD)) { + spin_unlock_irqrestore(&qp->q_lock, flags); + *bad_wr = wr; + QL_DPRINT11(ha, "QP in wrong state! QP icid=0x%x state %d\n", + qp->icid, qp->state); + return -EINVAL; + } + + if (!wr) { + QL_DPRINT11(ha, "Got an empty post send???\n"); + } + + while (wr) { + struct rdma_sq_common_wqe *wqe; + struct rdma_sq_send_wqe *swqe; + struct rdma_sq_send_wqe *swqe2; + struct rdma_sq_rdma_wqe_1st *rwqe; + struct rdma_sq_rdma_wqe_2nd *rwqe2; + struct rdma_sq_local_inv_wqe *iwqe; + struct rdma_sq_atomic_wqe *awqe1; + struct rdma_sq_atomic_wqe *awqe2; + struct rdma_sq_atomic_wqe *awqe3; + struct rdma_sq_fmr_wqe_1st *fwqe1; + + if (!qlnxr_can_post_send(qp, wr)) { + status = -ENOMEM; + *bad_wr = wr; + break; + } + + wqe = ecore_chain_produce(&qp->sq.pbl); + + qp->wqe_wr_id[qp->sq.prod].signaled = + !!(wr->send_flags & IB_SEND_SIGNALED) || qp->signaled; + + /* common fields */ + wqe->flags = 0; + wqe->flags |= (RDMA_SQ_SEND_WQE_COMP_FLG_MASK << + RDMA_SQ_SEND_WQE_COMP_FLG_SHIFT); + + SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_SE_FLG, \ + !!(wr->send_flags & IB_SEND_SOLICITED)); + + comp = (!!(wr->send_flags & IB_SEND_SIGNALED)) || + (qp->signaled); + + SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_COMP_FLG, comp); + SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_RD_FENCE_FLG, \ + !!(wr->send_flags & IB_SEND_FENCE)); + + wqe->prev_wqe_size = qp->prev_wqe_size; + + qp->wqe_wr_id[qp->sq.prod].opcode = qlnxr_ib_to_wc_opcode(wr->opcode); + + + switch (wr->opcode) { + + case IB_WR_SEND_WITH_IMM: + + wqe->req_type = RDMA_SQ_REQ_TYPE_SEND_WITH_IMM; + swqe = (struct rdma_sq_send_wqe *)wqe; + swqe->wqe_size = 2; + swqe2 = (struct rdma_sq_send_wqe *) + ecore_chain_produce(&qp->sq.pbl); + swqe->inv_key_or_imm_data = + cpu_to_le32(wr->ex.imm_data); + swqe->length = cpu_to_le32( + qlnxr_prepare_sq_send_data(dev, + qp, swqe, swqe2, wr, + bad_wr)); + + qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size; + qp->prev_wqe_size = swqe->wqe_size; + qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length; + + QL_DPRINT12(ha, "SEND w/ IMM length = %d imm data=%x\n", + swqe->length, wr->ex.imm_data); + + break; + + case IB_WR_SEND: + + wqe->req_type = RDMA_SQ_REQ_TYPE_SEND; + swqe = (struct rdma_sq_send_wqe *)wqe; + + swqe->wqe_size = 2; + swqe2 = (struct rdma_sq_send_wqe *) + ecore_chain_produce(&qp->sq.pbl); + swqe->length = cpu_to_le32( + qlnxr_prepare_sq_send_data(dev, + qp, swqe, swqe2, wr, + bad_wr)); + qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size; + qp->prev_wqe_size = swqe->wqe_size; + qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length; + + QL_DPRINT12(ha, "SEND w/o IMM length = %d\n", + swqe->length); + + break; + + case IB_WR_SEND_WITH_INV: + + wqe->req_type = RDMA_SQ_REQ_TYPE_SEND_WITH_INVALIDATE; + swqe = (struct rdma_sq_send_wqe *)wqe; + swqe2 = (struct rdma_sq_send_wqe *) + ecore_chain_produce(&qp->sq.pbl); + swqe->wqe_size = 2; + swqe->inv_key_or_imm_data = + cpu_to_le32(wr->ex.invalidate_rkey); + swqe->length = cpu_to_le32(qlnxr_prepare_sq_send_data(dev, + qp, swqe, swqe2, wr, bad_wr)); + qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size; + qp->prev_wqe_size = swqe->wqe_size; + qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length; + + QL_DPRINT12(ha, "SEND w INVALIDATE length = %d\n", + swqe->length); + break; + + case IB_WR_RDMA_WRITE_WITH_IMM: + + wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_WR_WITH_IMM; + rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe; + + rwqe->wqe_size = 2; + rwqe->imm_data = htonl(cpu_to_le32(wr->ex.imm_data)); + rwqe2 = (struct rdma_sq_rdma_wqe_2nd *) + ecore_chain_produce(&qp->sq.pbl); + rwqe->length = cpu_to_le32(qlnxr_prepare_sq_rdma_data(dev, + qp, rwqe, rwqe2, wr, bad_wr)); + qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size; + qp->prev_wqe_size = rwqe->wqe_size; + qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length; + + QL_DPRINT12(ha, + "RDMA WRITE w/ IMM length = %d imm data=%x\n", + rwqe->length, rwqe->imm_data); + + break; + + case IB_WR_RDMA_WRITE: + + wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_WR; + rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe; + + rwqe->wqe_size = 2; + rwqe2 = (struct rdma_sq_rdma_wqe_2nd *) + ecore_chain_produce(&qp->sq.pbl); + rwqe->length = cpu_to_le32(qlnxr_prepare_sq_rdma_data(dev, + qp, rwqe, rwqe2, wr, bad_wr)); + qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size; + qp->prev_wqe_size = rwqe->wqe_size; + qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length; + + QL_DPRINT12(ha, + "RDMA WRITE w/o IMM length = %d\n", + rwqe->length); + + break; + + case IB_WR_RDMA_READ_WITH_INV: + + QL_DPRINT12(ha, + "RDMA READ WITH INVALIDATE not supported\n"); + + *bad_wr = wr; + rc = -EINVAL; + + break; + + case IB_WR_RDMA_READ: + + wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_RD; + rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe; + + rwqe->wqe_size = 2; + rwqe2 = (struct rdma_sq_rdma_wqe_2nd *) + ecore_chain_produce(&qp->sq.pbl); + rwqe->length = cpu_to_le32(qlnxr_prepare_sq_rdma_data(dev, + qp, rwqe, rwqe2, wr, bad_wr)); + + qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size; + qp->prev_wqe_size = rwqe->wqe_size; + qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length; + + QL_DPRINT12(ha, "RDMA READ length = %d\n", + rwqe->length); + + break; + + case IB_WR_ATOMIC_CMP_AND_SWP: + case IB_WR_ATOMIC_FETCH_AND_ADD: + + QL_DPRINT12(ha, + "ATOMIC operation = %s\n", + ((wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) ? + "IB_WR_ATOMIC_CMP_AND_SWP" : + "IB_WR_ATOMIC_FETCH_AND_ADD")); + + awqe1 = (struct rdma_sq_atomic_wqe *)wqe; + awqe1->prev_wqe_size = 4; + + awqe2 = (struct rdma_sq_atomic_wqe *) + ecore_chain_produce(&qp->sq.pbl); + + TYPEPTR_ADDR_SET(awqe2, remote_va, \ + atomic_wr(wr)->remote_addr); + + awqe2->r_key = cpu_to_le32(atomic_wr(wr)->rkey); + + awqe3 = (struct rdma_sq_atomic_wqe *) + ecore_chain_produce(&qp->sq.pbl); + + if (wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) { + wqe->req_type = RDMA_SQ_REQ_TYPE_ATOMIC_ADD; + TYPEPTR_ADDR_SET(awqe3, swap_data, + atomic_wr(wr)->compare_add); + } else { + wqe->req_type = RDMA_SQ_REQ_TYPE_ATOMIC_CMP_AND_SWAP; + TYPEPTR_ADDR_SET(awqe3, swap_data, + atomic_wr(wr)->swap); + TYPEPTR_ADDR_SET(awqe3, cmp_data, + atomic_wr(wr)->compare_add); + } + + qlnxr_prepare_sq_sges(dev, qp, NULL, wr); + + qp->wqe_wr_id[qp->sq.prod].wqe_size = awqe1->prev_wqe_size; + qp->prev_wqe_size = awqe1->prev_wqe_size; + + break; + + case IB_WR_LOCAL_INV: + + QL_DPRINT12(ha, + "INVALIDATE length (IB_WR_LOCAL_INV)\n"); + + iwqe = (struct rdma_sq_local_inv_wqe *)wqe; + iwqe->prev_wqe_size = 1; + + iwqe->req_type = RDMA_SQ_REQ_TYPE_LOCAL_INVALIDATE; + iwqe->inv_l_key = wr->ex.invalidate_rkey; + qp->wqe_wr_id[qp->sq.prod].wqe_size = iwqe->prev_wqe_size; + qp->prev_wqe_size = iwqe->prev_wqe_size; + + break; + +#if __FreeBSD_version >= 1102000 + + case IB_WR_REG_MR: + + QL_DPRINT12(ha, "IB_WR_REG_MR\n"); + + wqe->req_type = RDMA_SQ_REQ_TYPE_FAST_MR; + fwqe1 = (struct rdma_sq_fmr_wqe_1st *)wqe; + fwqe1->wqe_size = 2; + + rc = qlnxr_prepare_reg(qp, fwqe1, reg_wr(wr)); + if (rc) { + QL_DPRINT11(ha, "IB_WR_REG_MR failed rc=%d\n", rc); + *bad_wr = wr; + break; + } + + qp->wqe_wr_id[qp->sq.prod].wqe_size = fwqe1->wqe_size; + qp->prev_wqe_size = fwqe1->wqe_size; + + break; +#else + case IB_WR_FAST_REG_MR: + + QL_DPRINT12(ha, "FAST_MR (IB_WR_FAST_REG_MR)\n"); + + wqe->req_type = RDMA_SQ_REQ_TYPE_FAST_MR; + fwqe1 = (struct rdma_sq_fmr_wqe_1st *)wqe; + fwqe1->prev_wqe_size = 3; + + rc = qlnxr_prepare_fmr(qp, fwqe1, wr); + + if (rc) { + QL_DPRINT12(ha, + "FAST_MR (IB_WR_FAST_REG_MR) failed" + " rc = %d\n", rc); + *bad_wr = wr; + break; + } + + qp->wqe_wr_id[qp->sq.prod].wqe_size = fwqe1->prev_wqe_size; + qp->prev_wqe_size = fwqe1->prev_wqe_size; + + break; +#endif /* #if __FreeBSD_version >= 1102000 */ + + default: + + QL_DPRINT12(ha, "Invalid Opcode 0x%x!\n", wr->opcode); + + rc = -EINVAL; + *bad_wr = wr; + break; + } + + if (*bad_wr) { + /* + * restore prod to its position before this WR was processed + */ + ecore_chain_set_prod(&qp->sq.pbl, + le16_to_cpu(qp->sq.db_data.data.value), + wqe); + /* restore prev_wqe_size */ + qp->prev_wqe_size = wqe->prev_wqe_size; + status = rc; + + QL_DPRINT12(ha, "failed *bad_wr = %p\n", *bad_wr); + break; /* out of the loop */ + } + + qp->wqe_wr_id[qp->sq.prod].wr_id = wr->wr_id; + + qlnxr_inc_sw_prod(&qp->sq); + + qp->sq.db_data.data.value++; + + wr = wr->next; + } + + /* Trigger doorbell + * If there was a failure in the first WR then it will be triggered in + * vane. However this is not harmful (as long as the producer value is + * unchanged). For performance reasons we avoid checking for this + * redundant doorbell. + */ + wmb(); + //writel(qp->sq.db_data.raw, qp->sq.db); + + reg_addr = (uint32_t)((uint8_t *)qp->sq.db - (uint8_t *)ha->cdev.doorbells); + bus_write_4(ha->pci_dbells, reg_addr, qp->sq.db_data.raw); + bus_barrier(ha->pci_dbells, 0, 0, BUS_SPACE_BARRIER_READ); + + mmiowb(); + + spin_unlock_irqrestore(&qp->q_lock, flags); + + QL_DPRINT12(ha, "exit[ibqp, wr, bad_wr] = [%p, %p, %p]\n", + ibqp, wr, bad_wr); + + return status; +} + +static u32 +qlnxr_srq_elem_left(struct qlnxr_srq_hwq_info *hw_srq) +{ + u32 used; + + /* Calculate number of elements used based on producer + * count and consumer count and subtract it from max + * work request supported so that we get elements left. + */ + used = hw_srq->wr_prod_cnt - hw_srq->wr_cons_cnt; + + return hw_srq->max_wr - used; +} + + +int +qlnxr_post_recv(struct ib_qp *ibqp, + struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr) +{ + struct qlnxr_qp *qp = get_qlnxr_qp(ibqp); + struct qlnxr_dev *dev = qp->dev; + unsigned long flags; + int status = 0; + qlnx_host_t *ha; + uint32_t reg_addr; + + ha = dev->ha; + + if (!(ha->ifp->if_drv_flags & IFF_DRV_RUNNING)) + return -EINVAL; + + QL_DPRINT12(ha, "enter\n"); + + if (qp->qp_type == IB_QPT_GSI) { + QL_DPRINT12(ha, "(qp->qp_type = IB_QPT_GSI)\n"); + return qlnxr_gsi_post_recv(ibqp, wr, bad_wr); + } + + if (qp->srq) { + QL_DPRINT11(ha, "qp->srq [%p]" + " QP is associated with SRQ, cannot post RQ buffers\n", + qp->srq); + return -EINVAL; + } + + spin_lock_irqsave(&qp->q_lock, flags); + + if (qp->state == ECORE_ROCE_QP_STATE_RESET) { + spin_unlock_irqrestore(&qp->q_lock, flags); + *bad_wr = wr; + + QL_DPRINT11(ha, "qp->qp_type = ECORE_ROCE_QP_STATE_RESET\n"); + + return -EINVAL; + } + + while (wr) { + int i; + + if ((ecore_chain_get_elem_left_u32(&qp->rq.pbl) < + QLNXR_MAX_RQE_ELEMENTS_PER_RQE) || + (wr->num_sge > qp->rq.max_sges)) { + status = -ENOMEM; + *bad_wr = wr; + break; + } + for (i = 0; i < wr->num_sge; i++) { + u32 flags = 0; + struct rdma_rq_sge *rqe = ecore_chain_produce(&qp->rq.pbl); + + /* first one must include the number of SGE in the list */ + if (!i) + SET_FIELD(flags, RDMA_RQ_SGE_NUM_SGES, wr->num_sge); + + SET_FIELD(flags, RDMA_RQ_SGE_L_KEY, wr->sg_list[i].lkey); + + RQ_SGE_SET(rqe, wr->sg_list[i].addr, \ + wr->sg_list[i].length, flags); + } + /* Special case of no sges. FW requires between 1-4 sges... + * in this case we need to post 1 sge with length zero. this is + * because rdma write with immediate consumes an RQ. */ + if (!wr->num_sge) { + u32 flags = 0; + struct rdma_rq_sge *rqe = ecore_chain_produce(&qp->rq.pbl); + + /* first one must include the number of SGE in the list */ + SET_FIELD(flags, RDMA_RQ_SGE_L_KEY, 0); + SET_FIELD(flags, RDMA_RQ_SGE_NUM_SGES, 1); + + //RQ_SGE_SET(rqe, 0, 0, flags); + rqe->addr.hi = 0; + rqe->addr.lo = 0; + + rqe->length = 0; + rqe->flags = cpu_to_le32(flags); + + i = 1; + } + + qp->rqe_wr_id[qp->rq.prod].wr_id = wr->wr_id; + qp->rqe_wr_id[qp->rq.prod].wqe_size = i; + + qlnxr_inc_sw_prod(&qp->rq); + + wmb(); + + qp->rq.db_data.data.value++; + + // writel(qp->rq.db_data.raw, qp->rq.db); + mmiowb(); + // if (QLNX_IS_IWARP(dev)) { + // writel(qp->rq.iwarp_db2_data.raw, qp->rq.iwarp_db2); + // mmiowb(); /* for second doorbell */ + // } + + reg_addr = (uint32_t)((uint8_t *)qp->rq.db - + (uint8_t *)ha->cdev.doorbells); + + bus_write_4(ha->pci_dbells, reg_addr, qp->rq.db_data.raw); + bus_barrier(ha->pci_dbells, 0, 0, BUS_SPACE_BARRIER_READ); + + if (QLNX_IS_IWARP(dev)) { + reg_addr = (uint32_t)((uint8_t *)qp->rq.iwarp_db2 - + (uint8_t *)ha->cdev.doorbells); + bus_write_4(ha->pci_dbells, reg_addr, \ + qp->rq.iwarp_db2_data.raw); + bus_barrier(ha->pci_dbells, 0, 0, \ + BUS_SPACE_BARRIER_READ); + } + + wr = wr->next; + } + + spin_unlock_irqrestore(&qp->q_lock, flags); + + QL_DPRINT12(ha, "exit status = 0x%x\n", status); + + return status; +} + +/* In fmr we need to increase the number of fmr completed counter for the fmr + * algorithm determining whether we can free a pbl or not. + * we need to perform this whether the work request was signaled or not. for + * this purpose we call this function from the condition that checks if a wr + * should be skipped, to make sure we don't miss it ( possibly this fmr + * operation was not signalted) + */ +static inline void +qlnxr_chk_if_fmr(struct qlnxr_qp *qp) +{ +#if __FreeBSD_version >= 1102000 + + if (qp->wqe_wr_id[qp->sq.cons].opcode == IB_WC_REG_MR) + qp->wqe_wr_id[qp->sq.cons].mr->info.completed++; +#else + if (qp->wqe_wr_id[qp->sq.cons].opcode == IB_WC_FAST_REG_MR) + qp->wqe_wr_id[qp->sq.cons].frmr->info.completed++; + +#endif /* #if __FreeBSD_version >= 1102000 */ +} + +static int +process_req(struct qlnxr_dev *dev, + struct qlnxr_qp *qp, + struct qlnxr_cq *cq, + int num_entries, + struct ib_wc *wc, + u16 hw_cons, + enum ib_wc_status status, + int force) +{ + u16 cnt = 0; + qlnx_host_t *ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + while (num_entries && qp->sq.wqe_cons != hw_cons) { + if (!qp->wqe_wr_id[qp->sq.cons].signaled && !force) { + qlnxr_chk_if_fmr(qp); + /* skip WC */ + goto next_cqe; + } + + /* fill WC */ + wc->status = status; + wc->vendor_err = 0; + wc->wc_flags = 0; + wc->src_qp = qp->id; + wc->qp = &qp->ibqp; + + // common section + wc->wr_id = qp->wqe_wr_id[qp->sq.cons].wr_id; + wc->opcode = qp->wqe_wr_id[qp->sq.cons].opcode; + + switch (wc->opcode) { + + case IB_WC_RDMA_WRITE: + + wc->byte_len = qp->wqe_wr_id[qp->sq.cons].bytes_len; + + QL_DPRINT12(ha, + "opcode = IB_WC_RDMA_WRITE bytes = %d\n", + qp->wqe_wr_id[qp->sq.cons].bytes_len); + break; + + case IB_WC_COMP_SWAP: + case IB_WC_FETCH_ADD: + wc->byte_len = 8; + break; + +#if __FreeBSD_version >= 1102000 + case IB_WC_REG_MR: + qp->wqe_wr_id[qp->sq.cons].mr->info.completed++; + break; +#else + case IB_WC_FAST_REG_MR: + qp->wqe_wr_id[qp->sq.cons].frmr->info.completed++; + break; +#endif /* #if __FreeBSD_version >= 1102000 */ + + case IB_WC_RDMA_READ: + case IB_WC_SEND: + + QL_DPRINT12(ha, "opcode = 0x%x \n", wc->opcode); + break; + default: + ;//DP_ERR("TBD ERROR"); + } + + num_entries--; + wc++; + cnt++; +next_cqe: + while (qp->wqe_wr_id[qp->sq.cons].wqe_size--) + ecore_chain_consume(&qp->sq.pbl); + qlnxr_inc_sw_cons(&qp->sq); + } + + QL_DPRINT12(ha, "exit cnt = 0x%x\n", cnt); + return cnt; +} + +static int +qlnxr_poll_cq_req(struct qlnxr_dev *dev, + struct qlnxr_qp *qp, + struct qlnxr_cq *cq, + int num_entries, + struct ib_wc *wc, + struct rdma_cqe_requester *req) +{ + int cnt = 0; + qlnx_host_t *ha = dev->ha; + + QL_DPRINT12(ha, "enter req->status = 0x%x\n", req->status); + + switch (req->status) { + + case RDMA_CQE_REQ_STS_OK: + + cnt = process_req(dev, qp, cq, num_entries, wc, req->sq_cons, + IB_WC_SUCCESS, 0); + break; + + case RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR: + + if (qp->state != ECORE_ROCE_QP_STATE_ERR) + cnt = process_req(dev, qp, cq, num_entries, wc, req->sq_cons, + IB_WC_WR_FLUSH_ERR, 1); + break; + + default: /* other errors case */ + + /* process all WQE before the cosumer */ + qp->state = ECORE_ROCE_QP_STATE_ERR; + cnt = process_req(dev, qp, cq, num_entries, wc, + req->sq_cons - 1, IB_WC_SUCCESS, 0); + wc += cnt; + /* if we have extra WC fill it with actual error info */ + + if (cnt < num_entries) { + enum ib_wc_status wc_status; + + switch (req->status) { + case RDMA_CQE_REQ_STS_BAD_RESPONSE_ERR: + wc_status = IB_WC_BAD_RESP_ERR; + break; + case RDMA_CQE_REQ_STS_LOCAL_LENGTH_ERR: + wc_status = IB_WC_LOC_LEN_ERR; + break; + case RDMA_CQE_REQ_STS_LOCAL_QP_OPERATION_ERR: + wc_status = IB_WC_LOC_QP_OP_ERR; + break; + case RDMA_CQE_REQ_STS_LOCAL_PROTECTION_ERR: + wc_status = IB_WC_LOC_PROT_ERR; + break; + case RDMA_CQE_REQ_STS_MEMORY_MGT_OPERATION_ERR: + wc_status = IB_WC_MW_BIND_ERR; + break; + case RDMA_CQE_REQ_STS_REMOTE_INVALID_REQUEST_ERR: + wc_status = IB_WC_REM_INV_REQ_ERR; + break; + case RDMA_CQE_REQ_STS_REMOTE_ACCESS_ERR: + wc_status = IB_WC_REM_ACCESS_ERR; + break; + case RDMA_CQE_REQ_STS_REMOTE_OPERATION_ERR: + wc_status = IB_WC_REM_OP_ERR; + break; + case RDMA_CQE_REQ_STS_RNR_NAK_RETRY_CNT_ERR: + wc_status = IB_WC_RNR_RETRY_EXC_ERR; + break; + case RDMA_CQE_REQ_STS_TRANSPORT_RETRY_CNT_ERR: + wc_status = IB_WC_RETRY_EXC_ERR; + break; + default: + wc_status = IB_WC_GENERAL_ERR; + } + + cnt += process_req(dev, qp, cq, 1, wc, req->sq_cons, + wc_status, 1 /* force use of WC */); + } + } + + QL_DPRINT12(ha, "exit cnt = %d\n", cnt); + return cnt; +} + +static void +__process_resp_one(struct qlnxr_dev *dev, + struct qlnxr_qp *qp, + struct qlnxr_cq *cq, + struct ib_wc *wc, + struct rdma_cqe_responder *resp, + u64 wr_id) +{ + enum ib_wc_status wc_status = IB_WC_SUCCESS; +#if __FreeBSD_version < 1102000 + u8 flags; +#endif + qlnx_host_t *ha = dev->ha; + + QL_DPRINT12(ha, "enter qp = %p resp->status = 0x%x\n", + qp, resp->status); + + wc->opcode = IB_WC_RECV; + wc->wc_flags = 0; + + switch (resp->status) { + + case RDMA_CQE_RESP_STS_LOCAL_ACCESS_ERR: + wc_status = IB_WC_LOC_ACCESS_ERR; + break; + + case RDMA_CQE_RESP_STS_LOCAL_LENGTH_ERR: + wc_status = IB_WC_LOC_LEN_ERR; + break; + + case RDMA_CQE_RESP_STS_LOCAL_QP_OPERATION_ERR: + wc_status = IB_WC_LOC_QP_OP_ERR; + break; + + case RDMA_CQE_RESP_STS_LOCAL_PROTECTION_ERR: + wc_status = IB_WC_LOC_PROT_ERR; + break; + + case RDMA_CQE_RESP_STS_MEMORY_MGT_OPERATION_ERR: + wc_status = IB_WC_MW_BIND_ERR; + break; + + case RDMA_CQE_RESP_STS_REMOTE_INVALID_REQUEST_ERR: + wc_status = IB_WC_REM_INV_RD_REQ_ERR; + break; + + case RDMA_CQE_RESP_STS_OK: + +#if __FreeBSD_version >= 1102000 + if (resp->flags & QLNXR_RESP_IMM) { + wc->ex.imm_data = + le32_to_cpu(resp->imm_data_or_inv_r_Key); + wc->wc_flags |= IB_WC_WITH_IMM; + + if (resp->flags & QLNXR_RESP_RDMA) + wc->opcode = IB_WC_RECV_RDMA_WITH_IMM; + + if (resp->flags & QLNXR_RESP_INV) { + QL_DPRINT11(ha, + "Invalid flags QLNXR_RESP_INV [0x%x]" + "qp = %p qp->id = 0x%x cq = %p" + " cq->icid = 0x%x\n", + resp->flags, qp, qp->id, cq, cq->icid ); + } + } else if (resp->flags & QLNXR_RESP_INV) { + wc->ex.imm_data = + le32_to_cpu(resp->imm_data_or_inv_r_Key); + wc->wc_flags |= IB_WC_WITH_INVALIDATE; + + if (resp->flags & QLNXR_RESP_RDMA) { + QL_DPRINT11(ha, + "Invalid flags QLNXR_RESP_RDMA [0x%x]" + "qp = %p qp->id = 0x%x cq = %p" + " cq->icid = 0x%x\n", + resp->flags, qp, qp->id, cq, cq->icid ); + } + } else if (resp->flags & QLNXR_RESP_RDMA) { + QL_DPRINT11(ha, "Invalid flags QLNXR_RESP_RDMA [0x%x]" + "qp = %p qp->id = 0x%x cq = %p cq->icid = 0x%x\n", + resp->flags, qp, qp->id, cq, cq->icid ); + } +#else + wc_status = IB_WC_SUCCESS; + wc->byte_len = le32_to_cpu(resp->length); + + flags = resp->flags & QLNXR_RESP_RDMA_IMM; + + switch (flags) { + + case QLNXR_RESP_RDMA_IMM: + /* update opcode */ + wc->opcode = IB_WC_RECV_RDMA_WITH_IMM; + /* fall to set imm data */ + case QLNXR_RESP_IMM: + wc->ex.imm_data = + le32_to_cpu(resp->imm_data_or_inv_r_Key); + wc->wc_flags |= IB_WC_WITH_IMM; + break; + case QLNXR_RESP_RDMA: + QL_DPRINT11(ha, "Invalid flags QLNXR_RESP_RDMA [0x%x]" + "qp = %p qp->id = 0x%x cq = %p cq->icid = 0x%x\n", + resp->flags, qp, qp->id, cq, cq->icid ); + break; + default: + /* valid configuration, but nothing todo here */ + ; + } +#endif /* #if __FreeBSD_version >= 1102000 */ + + break; + default: + wc_status = IB_WC_GENERAL_ERR; + } + + /* fill WC */ + wc->status = wc_status; + wc->vendor_err = 0; + wc->src_qp = qp->id; + wc->qp = &qp->ibqp; + wc->wr_id = wr_id; + + QL_DPRINT12(ha, "exit status = 0x%x\n", wc_status); + + return; +} + +static int +process_resp_one_srq(struct qlnxr_dev *dev, + struct qlnxr_qp *qp, + struct qlnxr_cq *cq, + struct ib_wc *wc, + struct rdma_cqe_responder *resp) +{ + struct qlnxr_srq *srq = qp->srq; + u64 wr_id; + qlnx_host_t *ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + wr_id = HILO_U64(resp->srq_wr_id.hi, resp->srq_wr_id.lo); + + if (resp->status == RDMA_CQE_RESP_STS_WORK_REQUEST_FLUSHED_ERR) { + wc->status = IB_WC_WR_FLUSH_ERR; + wc->vendor_err = 0; + wc->wr_id = wr_id; + wc->byte_len = 0; + wc->src_qp = qp->id; + wc->qp = &qp->ibqp; + wc->wr_id = wr_id; + } else { + __process_resp_one(dev, qp, cq, wc, resp, wr_id); + } + + /* PBL is maintained in case of WR granularity. + * So increment WR consumer after consuming WR + */ + srq->hw_srq.wr_cons_cnt++; + + QL_DPRINT12(ha, "exit\n"); + return 1; +} + +static int +process_resp_one(struct qlnxr_dev *dev, + struct qlnxr_qp *qp, + struct qlnxr_cq *cq, + struct ib_wc *wc, + struct rdma_cqe_responder *resp) +{ + qlnx_host_t *ha = dev->ha; + u64 wr_id = qp->rqe_wr_id[qp->rq.cons].wr_id; + + QL_DPRINT12(ha, "enter\n"); + + __process_resp_one(dev, qp, cq, wc, resp, wr_id); + + while (qp->rqe_wr_id[qp->rq.cons].wqe_size--) + ecore_chain_consume(&qp->rq.pbl); + qlnxr_inc_sw_cons(&qp->rq); + + QL_DPRINT12(ha, "exit\n"); + return 1; +} + +static int +process_resp_flush(struct qlnxr_qp *qp, + int num_entries, + struct ib_wc *wc, + u16 hw_cons) +{ + u16 cnt = 0; + qlnx_host_t *ha = qp->dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + while (num_entries && qp->rq.wqe_cons != hw_cons) { + /* fill WC */ + wc->status = IB_WC_WR_FLUSH_ERR; + wc->vendor_err = 0; + wc->wc_flags = 0; + wc->src_qp = qp->id; + wc->byte_len = 0; + wc->wr_id = qp->rqe_wr_id[qp->rq.cons].wr_id; + wc->qp = &qp->ibqp; + num_entries--; + wc++; + cnt++; + while (qp->rqe_wr_id[qp->rq.cons].wqe_size--) + ecore_chain_consume(&qp->rq.pbl); + qlnxr_inc_sw_cons(&qp->rq); + } + + QL_DPRINT12(ha, "exit cnt = 0x%x\n", cnt); + return cnt; +} + +static void +try_consume_resp_cqe(struct qlnxr_cq *cq, + struct qlnxr_qp *qp, + struct rdma_cqe_responder *resp, + int *update) +{ + if (le16_to_cpu(resp->rq_cons) == qp->rq.wqe_cons) { + consume_cqe(cq); + *update |= 1; + } +} + +static int +qlnxr_poll_cq_resp_srq(struct qlnxr_dev *dev, + struct qlnxr_qp *qp, + struct qlnxr_cq *cq, + int num_entries, + struct ib_wc *wc, + struct rdma_cqe_responder *resp, + int *update) +{ + int cnt; + qlnx_host_t *ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + cnt = process_resp_one_srq(dev, qp, cq, wc, resp); + consume_cqe(cq); + *update |= 1; + + QL_DPRINT12(ha, "exit cnt = 0x%x\n", cnt); + return cnt; +} + +static int +qlnxr_poll_cq_resp(struct qlnxr_dev *dev, + struct qlnxr_qp *qp, + struct qlnxr_cq *cq, + int num_entries, + struct ib_wc *wc, + struct rdma_cqe_responder *resp, + int *update) +{ + int cnt; + qlnx_host_t *ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + if (resp->status == RDMA_CQE_RESP_STS_WORK_REQUEST_FLUSHED_ERR) { + cnt = process_resp_flush(qp, num_entries, wc, + resp->rq_cons); + try_consume_resp_cqe(cq, qp, resp, update); + } else { + cnt = process_resp_one(dev, qp, cq, wc, resp); + consume_cqe(cq); + *update |= 1; + } + + QL_DPRINT12(ha, "exit cnt = 0x%x\n", cnt); + return cnt; +} + +static void +try_consume_req_cqe(struct qlnxr_cq *cq, struct qlnxr_qp *qp, + struct rdma_cqe_requester *req, int *update) +{ + if (le16_to_cpu(req->sq_cons) == qp->sq.wqe_cons) { + consume_cqe(cq); + *update |= 1; + } +} + +static void +doorbell_cq(struct qlnxr_dev *dev, struct qlnxr_cq *cq, u32 cons, u8 flags) +{ + uint64_t reg_addr; + qlnx_host_t *ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + wmb(); + cq->db.data.agg_flags = flags; + cq->db.data.value = cpu_to_le32(cons); + + reg_addr = (uint64_t)((uint8_t *)cq->db_addr - + (uint8_t *)(ha->cdev.doorbells)); + + bus_write_8(ha->pci_dbells, reg_addr, cq->db.raw); + bus_barrier(ha->pci_dbells, 0, 0, BUS_SPACE_BARRIER_READ); + + QL_DPRINT12(ha, "exit\n"); + return; + +//#ifdef __LP64__ +// writeq(cq->db.raw, cq->db_addr); +//#else + /* Note that since the FW allows 64 bit write only, in 32bit systems + * the value of db_addr must be low enough. This is currently not + * enforced. + */ +// writel(cq->db.raw & 0xffffffff, cq->db_addr); +// mmiowb(); +//#endif +} + + +static int +is_valid_cqe(struct qlnxr_cq *cq, union rdma_cqe *cqe) +{ + struct rdma_cqe_requester *resp_cqe = &cqe->req; + return (resp_cqe->flags & RDMA_RESIZE_CQ_RAMROD_DATA_TOGGLE_BIT_MASK) == + cq->pbl_toggle; +} + +int +qlnxr_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) +{ + struct qlnxr_cq *cq = get_qlnxr_cq(ibcq); + struct qlnxr_dev *dev = get_qlnxr_dev((ibcq->device)); + int done = 0; + union rdma_cqe *cqe = cq->latest_cqe; + int update = 0; + u32 old_cons, new_cons; + unsigned long flags; + qlnx_host_t *ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + if (!(ha->ifp->if_drv_flags & IFF_DRV_RUNNING)) + return -EINVAL; + + if (cq->destroyed) { + QL_DPRINT11(ha, "called after destroy for cq %p (icid=%d)\n", + cq, cq->icid); + return 0; + } + + if (cq->cq_type == QLNXR_CQ_TYPE_GSI) + return qlnxr_gsi_poll_cq(ibcq, num_entries, wc); + + spin_lock_irqsave(&cq->cq_lock, flags); + + old_cons = ecore_chain_get_cons_idx_u32(&cq->pbl); + + while (num_entries && is_valid_cqe(cq, cqe)) { + int cnt = 0; + struct qlnxr_qp *qp; + struct rdma_cqe_requester *resp_cqe; + enum rdma_cqe_type cqe_type; + + /* prevent speculative reads of any field of CQE */ + rmb(); + + resp_cqe = &cqe->req; + qp = (struct qlnxr_qp *)(uintptr_t)HILO_U64(resp_cqe->qp_handle.hi, + resp_cqe->qp_handle.lo); + + if (!qp) { + QL_DPRINT11(ha, "qp = NULL\n"); + break; + } + + wc->qp = &qp->ibqp; + + cqe_type = GET_FIELD(resp_cqe->flags, RDMA_CQE_REQUESTER_TYPE); + + switch (cqe_type) { + case RDMA_CQE_TYPE_REQUESTER: + cnt = qlnxr_poll_cq_req(dev, qp, cq, num_entries, + wc, &cqe->req); + try_consume_req_cqe(cq, qp, &cqe->req, &update); + break; + case RDMA_CQE_TYPE_RESPONDER_RQ: + cnt = qlnxr_poll_cq_resp(dev, qp, cq, num_entries, + wc, &cqe->resp, &update); + break; + case RDMA_CQE_TYPE_RESPONDER_SRQ: + cnt = qlnxr_poll_cq_resp_srq(dev, qp, cq, num_entries, + wc, &cqe->resp, &update); + break; + case RDMA_CQE_TYPE_INVALID: + default: + QL_DPRINT11(ha, "cqe type [0x%x] invalid\n", cqe_type); + break; + } + num_entries -= cnt; + wc += cnt; + done += cnt; + + cqe = cq->latest_cqe; + } + new_cons = ecore_chain_get_cons_idx_u32(&cq->pbl); + + cq->cq_cons += new_cons - old_cons; + + if (update) { + /* doorbell notifies abount latest VALID entry, + * but chain already point to the next INVALID one + */ + doorbell_cq(dev, cq, cq->cq_cons - 1, cq->arm_flags); + QL_DPRINT12(ha, "cq = %p cons = 0x%x " + "arm_flags = 0x%x db.icid = 0x%x\n", cq, + (cq->cq_cons - 1), cq->arm_flags, cq->db.data.icid); + } + + spin_unlock_irqrestore(&cq->cq_lock, flags); + + QL_DPRINT12(ha, "exit\n"); + + return done; +} + + +int +qlnxr_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) +{ + struct qlnxr_cq *cq = get_qlnxr_cq(ibcq); + unsigned long sflags; + struct qlnxr_dev *dev; + qlnx_host_t *ha; + + dev = get_qlnxr_dev((ibcq->device)); + ha = dev->ha; + + QL_DPRINT12(ha, "enter ibcq = %p flags = 0x%x " + "cp = %p cons = 0x%x cq_type = 0x%x\n", ibcq, + flags, cq, cq->cq_cons, cq->cq_type); + + if (!(ha->ifp->if_drv_flags & IFF_DRV_RUNNING)) + return -EINVAL; + + if (cq->destroyed) { + QL_DPRINT11(ha, "cq was already destroyed cq = %p icid=%d\n", + cq, cq->icid); + return -EINVAL; + } + + if (cq->cq_type == QLNXR_CQ_TYPE_GSI) { + return 0; + } + + spin_lock_irqsave(&cq->cq_lock, sflags); + + cq->arm_flags = 0; + + if (flags & IB_CQ_SOLICITED) { + cq->arm_flags |= DQ_UCM_ROCE_CQ_ARM_SE_CF_CMD; + } + if (flags & IB_CQ_NEXT_COMP) { + cq->arm_flags |= DQ_UCM_ROCE_CQ_ARM_CF_CMD; + } + + doorbell_cq(dev, cq, (cq->cq_cons - 1), cq->arm_flags); + + spin_unlock_irqrestore(&cq->cq_lock, sflags); + + QL_DPRINT12(ha, "exit ibcq = %p flags = 0x%x\n", ibcq, flags); + return 0; +} + + +static struct qlnxr_mr * +__qlnxr_alloc_mr(struct ib_pd *ibpd, int max_page_list_len) +{ + struct qlnxr_pd *pd = get_qlnxr_pd(ibpd); + struct qlnxr_dev *dev = get_qlnxr_dev((ibpd->device)); + struct qlnxr_mr *mr; + int rc = -ENOMEM; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter ibpd = %p pd = %p " + " pd_id = %d max_page_list_len = %d\n", + ibpd, pd, pd->pd_id, max_page_list_len); + + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) { + QL_DPRINT11(ha, "kzalloc(mr) failed\n"); + return ERR_PTR(rc); + } + + mr->dev = dev; + mr->type = QLNXR_MR_FRMR; + + rc = qlnxr_init_mr_info(dev, &mr->info, max_page_list_len, + 1 /* allow dual layer pbl */); + if (rc) { + QL_DPRINT11(ha, "qlnxr_init_mr_info failed\n"); + goto err0; + } + + rc = ecore_rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid); + if (rc) { + QL_DPRINT11(ha, "ecore_rdma_alloc_tid failed\n"); + goto err0; + } + + /* index only, 18 bit long, lkey = itid << 8 | key */ + mr->hw_mr.tid_type = ECORE_RDMA_TID_FMR; + mr->hw_mr.key = 0; + mr->hw_mr.pd = pd->pd_id; + mr->hw_mr.local_read = 1; + mr->hw_mr.local_write = 0; + mr->hw_mr.remote_read = 0; + mr->hw_mr.remote_write = 0; + mr->hw_mr.remote_atomic = 0; + mr->hw_mr.mw_bind = false; /* TBD MW BIND */ + mr->hw_mr.pbl_ptr = 0; /* Will be supplied during post */ + mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered; + mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size); + mr->hw_mr.fbo = 0; + mr->hw_mr.length = 0; + mr->hw_mr.vaddr = 0; + mr->hw_mr.zbva = false; /* TBD figure when this should be true */ + mr->hw_mr.phy_mr = true; /* Fast MR - True, Regular Register False */ + mr->hw_mr.dma_mr = false; + + rc = ecore_rdma_register_tid(dev->rdma_ctx, &mr->hw_mr); + if (rc) { + QL_DPRINT11(ha, "ecore_rdma_register_tid failed\n"); + goto err1; + } + + mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key; + mr->ibmr.rkey = mr->ibmr.lkey; + + QL_DPRINT12(ha, "exit mr = %p mr->ibmr.lkey = 0x%x\n", + mr, mr->ibmr.lkey); + + return mr; + +err1: + ecore_rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid); +err0: + kfree(mr); + + QL_DPRINT12(ha, "exit\n"); + + return ERR_PTR(rc); +} + +#if __FreeBSD_version >= 1102000 + +struct ib_mr * +qlnxr_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type, u32 max_num_sg) +{ + struct qlnxr_dev *dev; + struct qlnxr_mr *mr; + qlnx_host_t *ha; + + dev = get_qlnxr_dev(ibpd->device); + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + if (mr_type != IB_MR_TYPE_MEM_REG) + return ERR_PTR(-EINVAL); + + mr = __qlnxr_alloc_mr(ibpd, max_num_sg); + + if (IS_ERR(mr)) + return ERR_PTR(-EINVAL); + + QL_DPRINT12(ha, "exit mr = %p &mr->ibmr = %p\n", mr, &mr->ibmr); + + return &mr->ibmr; +} + +static int +qlnxr_set_page(struct ib_mr *ibmr, u64 addr) +{ + struct qlnxr_mr *mr = get_qlnxr_mr(ibmr); + struct qlnxr_pbl *pbl_table; + struct regpair *pbe; + struct qlnxr_dev *dev; + qlnx_host_t *ha; + u32 pbes_in_page; + + dev = mr->dev; + ha = dev->ha; + + if (unlikely(mr->npages == mr->info.pbl_info.num_pbes)) { + QL_DPRINT12(ha, "fails mr->npages %d\n", mr->npages); + return -ENOMEM; + } + + QL_DPRINT12(ha, "mr->npages %d addr = %p enter\n", mr->npages, + ((void *)addr)); + + pbes_in_page = mr->info.pbl_info.pbl_size / sizeof(u64); + pbl_table = mr->info.pbl_table + (mr->npages / pbes_in_page); + pbe = (struct regpair *)pbl_table->va; + pbe += mr->npages % pbes_in_page; + pbe->lo = cpu_to_le32((u32)addr); + pbe->hi = cpu_to_le32((u32)upper_32_bits(addr)); + + mr->npages++; + + QL_DPRINT12(ha, "mr->npages %d addr = %p exit \n", mr->npages, + ((void *)addr)); + return 0; +} + +int +qlnxr_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, + int sg_nents, unsigned int *sg_offset) +{ + int ret; + struct qlnxr_mr *mr = get_qlnxr_mr(ibmr); + qlnx_host_t *ha; + + if (mr == NULL) + return (-1); + + if (mr->dev == NULL) + return (-1); + + ha = mr->dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + mr->npages = 0; + qlnx_handle_completed_mrs(mr->dev, &mr->info); + + ret = ib_sg_to_pages(ibmr, sg, sg_nents, NULL, qlnxr_set_page); + + QL_DPRINT12(ha, "exit ret = %d\n", ret); + + return (ret); +} + +#else + +struct ib_mr * +qlnxr_alloc_frmr(struct ib_pd *ibpd, int max_page_list_len) +{ + struct qlnxr_dev *dev; + struct qlnxr_mr *mr; + qlnx_host_t *ha; + struct ib_mr *ibmr = NULL; + + dev = get_qlnxr_dev((ibpd->device)); + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + mr = __qlnxr_alloc_mr(ibpd, max_page_list_len); + + if (IS_ERR(mr)) { + ibmr = ERR_PTR(-EINVAL); + } else { + ibmr = &mr->ibmr; + } + + QL_DPRINT12(ha, "exit %p\n", ibmr); + return (ibmr); +} + +void +qlnxr_free_frmr_page_list(struct ib_fast_reg_page_list *page_list) +{ + struct qlnxr_fast_reg_page_list *frmr_list; + + frmr_list = get_qlnxr_frmr_list(page_list); + + free_mr_info(frmr_list->dev, &frmr_list->info); + + kfree(frmr_list->ibfrpl.page_list); + kfree(frmr_list); + + return; +} + +struct ib_fast_reg_page_list * +qlnxr_alloc_frmr_page_list(struct ib_device *ibdev, int page_list_len) +{ + struct qlnxr_fast_reg_page_list *frmr_list = NULL; + struct qlnxr_dev *dev; + int size = page_list_len * sizeof(u64); + int rc = -ENOMEM; + qlnx_host_t *ha; + + dev = get_qlnxr_dev(ibdev); + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + frmr_list = kzalloc(sizeof(*frmr_list), GFP_KERNEL); + if (!frmr_list) { + QL_DPRINT11(ha, "kzalloc(frmr_list) failed\n"); + goto err; + } + + frmr_list->dev = dev; + frmr_list->ibfrpl.page_list = kzalloc(size, GFP_KERNEL); + if (!frmr_list->ibfrpl.page_list) { + QL_DPRINT11(ha, "frmr_list->ibfrpl.page_list = NULL failed\n"); + goto err0; + } + + rc = qlnxr_init_mr_info(dev, &frmr_list->info, page_list_len, + 1 /* allow dual layer pbl */); + if (rc) + goto err1; + + QL_DPRINT12(ha, "exit %p\n", &frmr_list->ibfrpl); + + return &frmr_list->ibfrpl; + +err1: + kfree(frmr_list->ibfrpl.page_list); +err0: + kfree(frmr_list); +err: + QL_DPRINT12(ha, "exit with error\n"); + + return ERR_PTR(rc); +} + +static int +qlnxr_validate_phys_buf_list(qlnx_host_t *ha, struct ib_phys_buf *buf_list, + int buf_cnt, uint64_t *total_size) +{ + u64 size = 0; + + *total_size = 0; + + if (!buf_cnt || buf_list == NULL) { + QL_DPRINT11(ha, + "failed buf_list = %p buf_cnt = %d\n", buf_list, buf_cnt); + return (-1); + } + + size = buf_list->size; + + if (!size) { + QL_DPRINT11(ha, + "failed buf_list = %p buf_cnt = %d" + " buf_list->size = 0\n", buf_list, buf_cnt); + return (-1); + } + + while (buf_cnt) { + + *total_size += buf_list->size; + + if (buf_list->size != size) { + QL_DPRINT11(ha, + "failed buf_list = %p buf_cnt = %d" + " all buffers should have same size\n", + buf_list, buf_cnt); + return (-1); + } + + buf_list++; + buf_cnt--; + } + return (0); +} + +static size_t +qlnxr_get_num_pages(qlnx_host_t *ha, struct ib_phys_buf *buf_list, + int buf_cnt) +{ + int i; + size_t num_pages = 0; + u64 size; + + for (i = 0; i < buf_cnt; i++) { + + size = 0; + while (size < buf_list->size) { + size += PAGE_SIZE; + num_pages++; + } + buf_list++; + } + return (num_pages); +} + +static void +qlnxr_populate_phys_mem_pbls(struct qlnxr_dev *dev, + struct ib_phys_buf *buf_list, int buf_cnt, + struct qlnxr_pbl *pbl, struct qlnxr_pbl_info *pbl_info) +{ + struct regpair *pbe; + struct qlnxr_pbl *pbl_tbl; + int pg_cnt, pages, pbe_cnt, total_num_pbes = 0; + qlnx_host_t *ha; + int i; + u64 pbe_addr; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + if (!pbl_info) { + QL_DPRINT11(ha, "PBL_INFO not initialized\n"); + return; + } + + if (!pbl_info->num_pbes) { + QL_DPRINT11(ha, "pbl_info->num_pbes == 0\n"); + return; + } + + /* If we have a two layered pbl, the first pbl points to the rest + * of the pbls and the first entry lays on the second pbl in the table + */ + if (pbl_info->two_layered) + pbl_tbl = &pbl[1]; + else + pbl_tbl = pbl; + + pbe = (struct regpair *)pbl_tbl->va; + if (!pbe) { + QL_DPRINT12(ha, "pbe is NULL\n"); + return; + } + + pbe_cnt = 0; + + for (i = 0; i < buf_cnt; i++) { + + pages = buf_list->size >> PAGE_SHIFT; + + for (pg_cnt = 0; pg_cnt < pages; pg_cnt++) { + /* store the page address in pbe */ + + pbe_addr = buf_list->addr + (PAGE_SIZE * pg_cnt); + + pbe->lo = cpu_to_le32((u32)pbe_addr); + pbe->hi = cpu_to_le32(((u32)(pbe_addr >> 32))); + + QL_DPRINT12(ha, "Populate pbl table:" + " pbe->addr=0x%x:0x%x " + " pbe_cnt = %d total_num_pbes=%d" + " pbe=%p\n", pbe->lo, pbe->hi, pbe_cnt, + total_num_pbes, pbe); + + pbe_cnt ++; + total_num_pbes ++; + pbe++; + + if (total_num_pbes == pbl_info->num_pbes) + return; + + /* if the given pbl is full storing the pbes, + * move to next pbl. */ + + if (pbe_cnt == (pbl_info->pbl_size / sizeof(u64))) { + pbl_tbl++; + pbe = (struct regpair *)pbl_tbl->va; + pbe_cnt = 0; + } + } + buf_list++; + } + QL_DPRINT12(ha, "exit\n"); + return; +} + +struct ib_mr * +qlnxr_reg_kernel_mr(struct ib_pd *ibpd, + struct ib_phys_buf *buf_list, + int buf_cnt, int acc, u64 *iova_start) +{ + int rc = -ENOMEM; + struct qlnxr_dev *dev = get_qlnxr_dev((ibpd->device)); + struct qlnxr_mr *mr; + struct qlnxr_pd *pd; + qlnx_host_t *ha; + size_t num_pages = 0; + uint64_t length; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + pd = get_qlnxr_pd(ibpd); + + QL_DPRINT12(ha, "pd = %d buf_list = %p, buf_cnt = %d," + " iova_start = %p, acc = %d\n", + pd->pd_id, buf_list, buf_cnt, iova_start, acc); + + //if (acc & IB_ACCESS_REMOTE_WRITE && !(acc & IB_ACCESS_LOCAL_WRITE)) { + // QL_DPRINT11(ha, "(acc & IB_ACCESS_REMOTE_WRITE &&" + // " !(acc & IB_ACCESS_LOCAL_WRITE))\n"); + // return ERR_PTR(-EINVAL); + //} + + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) { + QL_DPRINT11(ha, "kzalloc(mr) failed\n"); + return ERR_PTR(rc); + } + + mr->type = QLNXR_MR_KERNEL; + mr->iova_start = iova_start; + + rc = qlnxr_validate_phys_buf_list(ha, buf_list, buf_cnt, &length); + if (rc) + goto err0; + + num_pages = qlnxr_get_num_pages(ha, buf_list, buf_cnt); + if (!num_pages) + goto err0; + + rc = qlnxr_init_mr_info(dev, &mr->info, num_pages, 1); + if (rc) { + QL_DPRINT11(ha, + "qlnxr_init_mr_info failed [%d]\n", rc); + goto err1; + } + + qlnxr_populate_phys_mem_pbls(dev, buf_list, buf_cnt, mr->info.pbl_table, + &mr->info.pbl_info); + + rc = ecore_rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid); + + if (rc) { + QL_DPRINT11(ha, "roce alloc tid returned an error %d\n", rc); + goto err1; + } + + /* index only, 18 bit long, lkey = itid << 8 | key */ + mr->hw_mr.tid_type = ECORE_RDMA_TID_REGISTERED_MR; + mr->hw_mr.key = 0; + mr->hw_mr.pd = pd->pd_id; + mr->hw_mr.local_read = 1; + mr->hw_mr.local_write = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0; + mr->hw_mr.remote_read = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0; + mr->hw_mr.remote_write = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0; + mr->hw_mr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0; + mr->hw_mr.mw_bind = false; /* TBD MW BIND */ + mr->hw_mr.pbl_ptr = mr->info.pbl_table[0].pa; + mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered; + mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size); + mr->hw_mr.page_size_log = ilog2(PAGE_SIZE); /* for the MR pages */ + + mr->hw_mr.fbo = 0; + + mr->hw_mr.length = length; + mr->hw_mr.vaddr = (uint64_t)iova_start; + mr->hw_mr.zbva = false; /* TBD figure when this should be true */ + mr->hw_mr.phy_mr = false; /* Fast MR - True, Regular Register False */ + mr->hw_mr.dma_mr = false; + + rc = ecore_rdma_register_tid(dev->rdma_ctx, &mr->hw_mr); + if (rc) { + QL_DPRINT11(ha, "roce register tid returned an error %d\n", rc); + goto err2; + } + + mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key; + if (mr->hw_mr.remote_write || mr->hw_mr.remote_read || + mr->hw_mr.remote_atomic) + mr->ibmr.rkey = mr->hw_mr.itid << 8 | mr->hw_mr.key; + + QL_DPRINT12(ha, "lkey: %x\n", mr->ibmr.lkey); + + return (&mr->ibmr); + +err2: + ecore_rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid); +err1: + qlnxr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table); +err0: + kfree(mr); + + QL_DPRINT12(ha, "exit [%d]\n", rc); + return (ERR_PTR(rc)); +} + +#endif /* #if __FreeBSD_version >= 1102000 */ + +struct ib_ah * +#if __FreeBSD_version >= 1102000 +qlnxr_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr, + struct ib_udata *udata) +#else +qlnxr_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr) +#endif /* #if __FreeBSD_version >= 1102000 */ +{ + struct qlnxr_dev *dev; + qlnx_host_t *ha; + struct qlnxr_ah *ah; + + dev = get_qlnxr_dev((ibpd->device)); + ha = dev->ha; + + QL_DPRINT12(ha, "in create_ah\n"); + + ah = kzalloc(sizeof(*ah), GFP_ATOMIC); + if (!ah) { + QL_DPRINT12(ha, "no address handle can be allocated\n"); + return ERR_PTR(-ENOMEM); + } + + ah->attr = *attr; + + return &ah->ibah; +} + +int +qlnxr_destroy_ah(struct ib_ah *ibah) +{ + struct qlnxr_dev *dev; + qlnx_host_t *ha; + struct qlnxr_ah *ah = get_qlnxr_ah(ibah); + + dev = get_qlnxr_dev((ibah->device)); + ha = dev->ha; + + QL_DPRINT12(ha, "in destroy_ah\n"); + + kfree(ah); + return 0; +} + +int +qlnxr_query_ah(struct ib_ah *ibah, struct ib_ah_attr *attr) +{ + struct qlnxr_dev *dev; + qlnx_host_t *ha; + + dev = get_qlnxr_dev((ibah->device)); + ha = dev->ha; + QL_DPRINT12(ha, "Query AH not supported\n"); + return -EINVAL; +} + +int +qlnxr_modify_ah(struct ib_ah *ibah, struct ib_ah_attr *attr) +{ + struct qlnxr_dev *dev; + qlnx_host_t *ha; + + dev = get_qlnxr_dev((ibah->device)); + ha = dev->ha; + QL_DPRINT12(ha, "Modify AH not supported\n"); + return -ENOSYS; +} + +#if __FreeBSD_version >= 1102000 +int +qlnxr_process_mad(struct ib_device *ibdev, + int process_mad_flags, + u8 port_num, + const struct ib_wc *in_wc, + const struct ib_grh *in_grh, + const struct ib_mad_hdr *mad_hdr, + size_t in_mad_size, + struct ib_mad_hdr *out_mad, + size_t *out_mad_size, + u16 *out_mad_pkey_index) + +#else + +int +qlnxr_process_mad(struct ib_device *ibdev, + int process_mad_flags, + u8 port_num, + struct ib_wc *in_wc, + struct ib_grh *in_grh, + struct ib_mad *in_mad, + struct ib_mad *out_mad) + +#endif /* #if __FreeBSD_version >= 1102000 */ +{ + struct qlnxr_dev *dev; + qlnx_host_t *ha; + + dev = get_qlnxr_dev(ibdev); + ha = dev->ha; + QL_DPRINT12(ha, "process mad not supported\n"); + + return -ENOSYS; +// QL_DPRINT12(ha, "qlnxr_process_mad in_mad %x %x %x %x %x %x %x %x\n", +// in_mad->mad_hdr.attr_id, in_mad->mad_hdr.base_version, +// in_mad->mad_hdr.attr_mod, in_mad->mad_hdr.class_specific, +// in_mad->mad_hdr.class_version, in_mad->mad_hdr.method, +// in_mad->mad_hdr.mgmt_class, in_mad->mad_hdr.status); + +// return IB_MAD_RESULT_SUCCESS; +} + + +#if __FreeBSD_version >= 1102000 +int +qlnxr_get_port_immutable(struct ib_device *ibdev, u8 port_num, + struct ib_port_immutable *immutable) +{ + struct qlnxr_dev *dev; + qlnx_host_t *ha; + struct ib_port_attr attr; + int err; + + dev = get_qlnxr_dev(ibdev); + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + err = qlnxr_query_port(ibdev, port_num, &attr); + if (err) + return err; + + if (QLNX_IS_IWARP(dev)) { + immutable->pkey_tbl_len = 1; + immutable->gid_tbl_len = 1; + immutable->core_cap_flags = RDMA_CORE_PORT_IWARP; + immutable->max_mad_size = 0; + } else { + immutable->pkey_tbl_len = attr.pkey_tbl_len; + immutable->gid_tbl_len = attr.gid_tbl_len; + immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE; + immutable->max_mad_size = IB_MGMT_MAD_SIZE; + } + + QL_DPRINT12(ha, "exit\n"); + return 0; +} +#endif /* #if __FreeBSD_version > 1102000 */ + + +/***** iWARP related functions *************/ + + +static void +qlnxr_iw_mpa_request(void *context, + struct ecore_iwarp_cm_event_params *params) +{ + struct qlnxr_iw_listener *listener = (struct qlnxr_iw_listener *)context; + struct qlnxr_dev *dev = listener->dev; + struct qlnxr_iw_ep *ep; + struct iw_cm_event event; + struct sockaddr_in *laddr; + struct sockaddr_in *raddr; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + if (params->cm_info->ip_version != ECORE_TCP_IPV4) { + QL_DPRINT11(ha, "only IPv4 supported [0x%x]\n", + params->cm_info->ip_version); + return; + } + + ep = kzalloc(sizeof(*ep), GFP_ATOMIC); + + if (!ep) { + QL_DPRINT11(ha, "kzalloc{ep) failed\n"); + return; + } + + ep->dev = dev; + ep->ecore_context = params->ep_context; + + memset(&event, 0, sizeof(event)); + + event.event = IW_CM_EVENT_CONNECT_REQUEST; + event.status = params->status; + + laddr = (struct sockaddr_in *)&event.local_addr; + raddr = (struct sockaddr_in *)&event.remote_addr; + + laddr->sin_family = AF_INET; + raddr->sin_family = AF_INET; + + laddr->sin_port = htons(params->cm_info->local_port); + raddr->sin_port = htons(params->cm_info->remote_port); + + laddr->sin_addr.s_addr = htonl(params->cm_info->local_ip[0]); + raddr->sin_addr.s_addr = htonl(params->cm_info->remote_ip[0]); + + event.provider_data = (void *)ep; + event.private_data = (void *)params->cm_info->private_data; + event.private_data_len = (u8)params->cm_info->private_data_len; + +#if __FreeBSD_version >= 1100000 + event.ord = params->cm_info->ord; + event.ird = params->cm_info->ird; +#endif /* #if __FreeBSD_version >= 1100000 */ + + listener->cm_id->event_handler(listener->cm_id, &event); + + QL_DPRINT12(ha, "exit\n"); + + return; +} + +static void +qlnxr_iw_issue_event(void *context, + struct ecore_iwarp_cm_event_params *params, + enum iw_cm_event_type event_type, + char *str) +{ + struct qlnxr_iw_ep *ep = (struct qlnxr_iw_ep *)context; + struct qlnxr_dev *dev = ep->dev; + struct iw_cm_event event; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + memset(&event, 0, sizeof(event)); + event.status = params->status; + event.event = event_type; + + if (params->cm_info != NULL) { +#if __FreeBSD_version >= 1100000 + event.ird = params->cm_info->ird; + event.ord = params->cm_info->ord; + QL_DPRINT12(ha, "ord=[%d] \n", event.ord); + QL_DPRINT12(ha, "ird=[%d] \n", event.ird); +#endif /* #if __FreeBSD_version >= 1100000 */ + + event.private_data_len = params->cm_info->private_data_len; + event.private_data = (void *)params->cm_info->private_data; + QL_DPRINT12(ha, "private_data_len=[%d] \n", + event.private_data_len); + } + + QL_DPRINT12(ha, "event=[%d] %s\n", event.event, str); + QL_DPRINT12(ha, "status=[%d] \n", event.status); + + if (ep) { + if (ep->cm_id) + ep->cm_id->event_handler(ep->cm_id, &event); + else + QL_DPRINT11(ha, "ep->cm_id == NULL \n"); + } else { + QL_DPRINT11(ha, "ep == NULL \n"); + } + + QL_DPRINT12(ha, "exit\n"); + + return; +} + +static void +qlnxr_iw_close_event(void *context, + struct ecore_iwarp_cm_event_params *params) +{ + struct qlnxr_iw_ep *ep = (struct qlnxr_iw_ep *)context; + struct qlnxr_dev *dev = ep->dev; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + if (ep->cm_id) { + qlnxr_iw_issue_event(context, + params, + IW_CM_EVENT_CLOSE, + "IW_CM_EVENT_EVENT_CLOSE"); + ep->cm_id->rem_ref(ep->cm_id); + ep->cm_id = NULL; + } + + QL_DPRINT12(ha, "exit\n"); + + return; +} + +#if __FreeBSD_version >= 1102000 + +static void +qlnxr_iw_passive_complete(void *context, + struct ecore_iwarp_cm_event_params *params) +{ + struct qlnxr_iw_ep *ep = (struct qlnxr_iw_ep *)context; + struct qlnxr_dev *dev = ep->dev; + qlnx_host_t *ha; + + ha = dev->ha; + + /* We will only reach the following state if MPA_REJECT was called on + * passive. In this case there will be no associated QP. + */ + if ((params->status == -ECONNREFUSED) && (ep->qp == NULL)) { + QL_DPRINT11(ha, "PASSIVE connection refused releasing ep...\n"); + kfree(ep); + return; + } + + /* We always issue an established event, however, ofed does not look + * at event code for established. So if there was a failure, we follow + * with close... + */ + qlnxr_iw_issue_event(context, + params, + IW_CM_EVENT_ESTABLISHED, + "IW_CM_EVENT_ESTABLISHED"); + + if (params->status < 0) { + qlnxr_iw_close_event(context, params); + } + + return; +} + +struct qlnxr_discon_work { + struct work_struct work; + struct qlnxr_iw_ep *ep; + enum ecore_iwarp_event_type event; + int status; +}; + +static void +qlnxr_iw_disconnect_worker(struct work_struct *work) +{ + struct qlnxr_discon_work *dwork = + container_of(work, struct qlnxr_discon_work, work); + struct ecore_rdma_modify_qp_in_params qp_params = { 0 }; + struct qlnxr_iw_ep *ep = dwork->ep; + struct qlnxr_dev *dev = ep->dev; + struct qlnxr_qp *qp = ep->qp; + struct iw_cm_event event; + + if (qp->destroyed) { + kfree(dwork); + qlnxr_iw_qp_rem_ref(&qp->ibqp); + return; + } + + memset(&event, 0, sizeof(event)); + event.status = dwork->status; + event.event = IW_CM_EVENT_DISCONNECT; + + /* Success means graceful disconnect was requested. modifying + * to SQD is translated to graceful disconnect. O/w reset is sent + */ + if (dwork->status) + qp_params.new_state = ECORE_ROCE_QP_STATE_ERR; + else + qp_params.new_state = ECORE_ROCE_QP_STATE_SQD; + + kfree(dwork); + + if (ep->cm_id) + ep->cm_id->event_handler(ep->cm_id, &event); + + SET_FIELD(qp_params.modify_flags, + ECORE_RDMA_MODIFY_QP_VALID_NEW_STATE, 1); + + ecore_rdma_modify_qp(dev->rdma_ctx, qp->ecore_qp, &qp_params); + + qlnxr_iw_qp_rem_ref(&qp->ibqp); + + return; +} + +void +qlnxr_iw_disconnect_event(void *context, + struct ecore_iwarp_cm_event_params *params) +{ + struct qlnxr_discon_work *work; + struct qlnxr_iw_ep *ep = (struct qlnxr_iw_ep *)context; + struct qlnxr_dev *dev = ep->dev; + struct qlnxr_qp *qp = ep->qp; + + work = kzalloc(sizeof(*work), GFP_ATOMIC); + if (!work) + return; + + qlnxr_iw_qp_add_ref(&qp->ibqp); + work->ep = ep; + work->event = params->event; + work->status = params->status; + + INIT_WORK(&work->work, qlnxr_iw_disconnect_worker); + queue_work(dev->iwarp_wq, &work->work); + + return; +} + +#endif /* #if __FreeBSD_version >= 1102000 */ + +static int +qlnxr_iw_mpa_reply(void *context, + struct ecore_iwarp_cm_event_params *params) +{ + struct qlnxr_iw_ep *ep = (struct qlnxr_iw_ep *)context; + struct qlnxr_dev *dev = ep->dev; + struct ecore_iwarp_send_rtr_in rtr_in; + int rc; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + if (!(ha->ifp->if_drv_flags & IFF_DRV_RUNNING)) + return -EINVAL; + + bzero(&rtr_in, sizeof(struct ecore_iwarp_send_rtr_in)); + rtr_in.ep_context = params->ep_context; + + rc = ecore_iwarp_send_rtr(dev->rdma_ctx, &rtr_in); + + QL_DPRINT12(ha, "exit rc = %d\n", rc); + return rc; +} + + +void +qlnxr_iw_qp_event(void *context, + struct ecore_iwarp_cm_event_params *params, + enum ib_event_type ib_event, + char *str) +{ + struct qlnxr_iw_ep *ep = (struct qlnxr_iw_ep *)context; + struct qlnxr_dev *dev = ep->dev; + struct ib_qp *ibqp = &(ep->qp->ibqp); + struct ib_event event; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, + "[context, event, event_handler] = [%p, 0x%x, %s, %p] enter\n", + context, params->event, str, ibqp->event_handler); + + if (ibqp->event_handler) { + event.event = ib_event; + event.device = ibqp->device; + event.element.qp = ibqp; + ibqp->event_handler(&event, ibqp->qp_context); + } + + return; +} + +int +qlnxr_iw_event_handler(void *context, + struct ecore_iwarp_cm_event_params *params) +{ + struct qlnxr_iw_ep *ep = (struct qlnxr_iw_ep *)context; + struct qlnxr_dev *dev = ep->dev; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "[context, event] = [%p, 0x%x] " + "enter\n", context, params->event); + + switch (params->event) { + + /* Passive side request received */ + case ECORE_IWARP_EVENT_MPA_REQUEST: + qlnxr_iw_mpa_request(context, params); + break; + + case ECORE_IWARP_EVENT_ACTIVE_MPA_REPLY: + qlnxr_iw_mpa_reply(context, params); + break; + + /* Passive side established ( ack on mpa response ) */ + case ECORE_IWARP_EVENT_PASSIVE_COMPLETE: + +#if __FreeBSD_version >= 1102000 + + ep->during_connect = 0; + qlnxr_iw_passive_complete(context, params); + +#else + qlnxr_iw_issue_event(context, + params, + IW_CM_EVENT_ESTABLISHED, + "IW_CM_EVENT_ESTABLISHED"); +#endif /* #if __FreeBSD_version >= 1102000 */ + break; + + /* Active side reply received */ + case ECORE_IWARP_EVENT_ACTIVE_COMPLETE: + ep->during_connect = 0; + qlnxr_iw_issue_event(context, + params, + IW_CM_EVENT_CONNECT_REPLY, + "IW_CM_EVENT_CONNECT_REPLY"); + if (params->status < 0) { + struct qlnxr_iw_ep *ep = (struct qlnxr_iw_ep *)context; + + ep->cm_id->rem_ref(ep->cm_id); + ep->cm_id = NULL; + } + break; + + case ECORE_IWARP_EVENT_DISCONNECT: + +#if __FreeBSD_version >= 1102000 + qlnxr_iw_disconnect_event(context, params); +#else + qlnxr_iw_issue_event(context, + params, + IW_CM_EVENT_DISCONNECT, + "IW_CM_EVENT_DISCONNECT"); + qlnxr_iw_close_event(context, params); +#endif /* #if __FreeBSD_version >= 1102000 */ + break; + + case ECORE_IWARP_EVENT_CLOSE: + ep->during_connect = 0; + qlnxr_iw_close_event(context, params); + break; + + case ECORE_IWARP_EVENT_RQ_EMPTY: + qlnxr_iw_qp_event(context, params, IB_EVENT_QP_FATAL, + "IWARP_EVENT_RQ_EMPTY"); + break; + + case ECORE_IWARP_EVENT_IRQ_FULL: + qlnxr_iw_qp_event(context, params, IB_EVENT_QP_FATAL, + "IWARP_EVENT_IRQ_FULL"); + break; + + case ECORE_IWARP_EVENT_LLP_TIMEOUT: + qlnxr_iw_qp_event(context, params, IB_EVENT_QP_FATAL, + "IWARP_EVENT_LLP_TIMEOUT"); + break; + + case ECORE_IWARP_EVENT_REMOTE_PROTECTION_ERROR: + qlnxr_iw_qp_event(context, params, IB_EVENT_QP_ACCESS_ERR, + "IWARP_EVENT_REMOTE_PROTECTION_ERROR"); + break; + + case ECORE_IWARP_EVENT_CQ_OVERFLOW: + qlnxr_iw_qp_event(context, params, IB_EVENT_QP_FATAL, + "QED_IWARP_EVENT_CQ_OVERFLOW"); + break; + + case ECORE_IWARP_EVENT_QP_CATASTROPHIC: + qlnxr_iw_qp_event(context, params, IB_EVENT_QP_FATAL, + "QED_IWARP_EVENT_QP_CATASTROPHIC"); + break; + + case ECORE_IWARP_EVENT_LOCAL_ACCESS_ERROR: + qlnxr_iw_qp_event(context, params, IB_EVENT_QP_ACCESS_ERR, + "IWARP_EVENT_LOCAL_ACCESS_ERROR"); + break; + + case ECORE_IWARP_EVENT_REMOTE_OPERATION_ERROR: + qlnxr_iw_qp_event(context, params, IB_EVENT_QP_FATAL, + "IWARP_EVENT_REMOTE_OPERATION_ERROR"); + break; + + case ECORE_IWARP_EVENT_TERMINATE_RECEIVED: + QL_DPRINT12(ha, "Got terminate message" + " ECORE_IWARP_EVENT_TERMINATE_RECEIVED\n"); + break; + + default: + QL_DPRINT12(ha, + "Unknown event [0x%x] received \n", params->event); + break; + }; + + QL_DPRINT12(ha, "[context, event] = [%p, 0x%x] " + "exit\n", context, params->event); + return 0; +} + +static int +qlnxr_addr4_resolve(struct qlnxr_dev *dev, + struct sockaddr_in *src_in, + struct sockaddr_in *dst_in, + u8 *dst_mac) +{ + int rc; + +#if __FreeBSD_version >= 1100000 + rc = arpresolve(dev->ha->ifp, 0, NULL, (struct sockaddr *)dst_in, + dst_mac, NULL, NULL); +#else + struct llentry *lle; + + rc = arpresolve(dev->ha->ifp, NULL, NULL, (struct sockaddr *)dst_in, + dst_mac, &lle); +#endif + + QL_DPRINT12(dev->ha, "rc = %d " + "sa_len = 0x%x sa_family = 0x%x IP Address = %d.%d.%d.%d " + "Dest MAC %02x:%02x:%02x:%02x:%02x:%02x\n", rc, + dst_in->sin_len, dst_in->sin_family, + NIPQUAD((dst_in->sin_addr.s_addr)), + dst_mac[0], dst_mac[1], dst_mac[2], + dst_mac[3], dst_mac[4], dst_mac[5]); + + return rc; +} + +int +qlnxr_iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) +{ + struct qlnxr_dev *dev; + struct ecore_iwarp_connect_out out_params; + struct ecore_iwarp_connect_in in_params; + struct qlnxr_iw_ep *ep; + struct qlnxr_qp *qp; + struct sockaddr_in *laddr; + struct sockaddr_in *raddr; + int rc = 0; + qlnx_host_t *ha; + + dev = get_qlnxr_dev((cm_id->device)); + ha = dev->ha; + + QL_DPRINT12(ha, "[cm_id, conn_param] = [%p, %p] " + "enter \n", cm_id, conn_param); + + if (!(ha->ifp->if_drv_flags & IFF_DRV_RUNNING)) + return -EINVAL; + + qp = idr_find(&dev->qpidr, conn_param->qpn); + + laddr = (struct sockaddr_in *)&cm_id->local_addr; + raddr = (struct sockaddr_in *)&cm_id->remote_addr; + + QL_DPRINT12(ha, + "local = [%d.%d.%d.%d, %d] remote = [%d.%d.%d.%d, %d]\n", + NIPQUAD((laddr->sin_addr.s_addr)), laddr->sin_port, + NIPQUAD((raddr->sin_addr.s_addr)), raddr->sin_port); + + ep = kzalloc(sizeof(*ep), GFP_KERNEL); + if (!ep) { + QL_DPRINT11(ha, "struct qlnxr_iw_ep " + "alloc memory failed\n"); + return -ENOMEM; + } + + ep->dev = dev; + ep->qp = qp; + cm_id->add_ref(cm_id); + ep->cm_id = cm_id; + + memset(&in_params, 0, sizeof (struct ecore_iwarp_connect_in)); + memset(&out_params, 0, sizeof (struct ecore_iwarp_connect_out)); + + in_params.event_cb = qlnxr_iw_event_handler; + in_params.cb_context = ep; + + in_params.cm_info.ip_version = ECORE_TCP_IPV4; + + in_params.cm_info.remote_ip[0] = ntohl(raddr->sin_addr.s_addr); + in_params.cm_info.local_ip[0] = ntohl(laddr->sin_addr.s_addr); + in_params.cm_info.remote_port = ntohs(raddr->sin_port); + in_params.cm_info.local_port = ntohs(laddr->sin_port); + in_params.cm_info.vlan = 0; + in_params.mss = dev->ha->ifp->if_mtu - 40; + + QL_DPRINT12(ha, "remote_ip = [%d.%d.%d.%d] " + "local_ip = [%d.%d.%d.%d] remote_port = %d local_port = %d " + "vlan = %d\n", + NIPQUAD((in_params.cm_info.remote_ip[0])), + NIPQUAD((in_params.cm_info.local_ip[0])), + in_params.cm_info.remote_port, in_params.cm_info.local_port, + in_params.cm_info.vlan); + + rc = qlnxr_addr4_resolve(dev, laddr, raddr, (u8 *)in_params.remote_mac_addr); + + if (rc) { + QL_DPRINT11(ha, "qlnxr_addr4_resolve failed\n"); + goto err; + } + + QL_DPRINT12(ha, "ord = %d ird=%d private_data=%p" + " private_data_len=%d rq_psn=%d\n", + conn_param->ord, conn_param->ird, conn_param->private_data, + conn_param->private_data_len, qp->rq_psn); + + in_params.cm_info.ord = conn_param->ord; + in_params.cm_info.ird = conn_param->ird; + in_params.cm_info.private_data = conn_param->private_data; + in_params.cm_info.private_data_len = conn_param->private_data_len; + in_params.qp = qp->ecore_qp; + + memcpy(in_params.local_mac_addr, dev->ha->primary_mac, ETH_ALEN); + + rc = ecore_iwarp_connect(dev->rdma_ctx, &in_params, &out_params); + + if (rc) { + QL_DPRINT12(ha, "ecore_iwarp_connect failed\n"); + goto err; + } + + QL_DPRINT12(ha, "exit\n"); + + return rc; + +err: + cm_id->rem_ref(cm_id); + kfree(ep); + + QL_DPRINT12(ha, "exit [%d]\n", rc); + return rc; +} + +int +qlnxr_iw_create_listen(struct iw_cm_id *cm_id, int backlog) +{ + struct qlnxr_dev *dev; + struct qlnxr_iw_listener *listener; + struct ecore_iwarp_listen_in iparams; + struct ecore_iwarp_listen_out oparams; + struct sockaddr_in *laddr; + qlnx_host_t *ha; + int rc; + + dev = get_qlnxr_dev((cm_id->device)); + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + if (!(ha->ifp->if_drv_flags & IFF_DRV_RUNNING)) + return -EINVAL; + + laddr = (struct sockaddr_in *)&cm_id->local_addr; + + listener = kzalloc(sizeof(*listener), GFP_KERNEL); + + if (listener == NULL) { + QL_DPRINT11(ha, "listener memory alloc failed\n"); + return -ENOMEM; + } + + listener->dev = dev; + cm_id->add_ref(cm_id); + listener->cm_id = cm_id; + listener->backlog = backlog; + + memset(&iparams, 0, sizeof (struct ecore_iwarp_listen_in)); + memset(&oparams, 0, sizeof (struct ecore_iwarp_listen_out)); + + iparams.cb_context = listener; + iparams.event_cb = qlnxr_iw_event_handler; + iparams.max_backlog = backlog; + + iparams.ip_version = ECORE_TCP_IPV4; + + iparams.ip_addr[0] = ntohl(laddr->sin_addr.s_addr); + iparams.port = ntohs(laddr->sin_port); + iparams.vlan = 0; + + QL_DPRINT12(ha, "[%d.%d.%d.%d, %d] iparamsport=%d\n", + NIPQUAD((laddr->sin_addr.s_addr)), + laddr->sin_port, iparams.port); + + rc = ecore_iwarp_create_listen(dev->rdma_ctx, &iparams, &oparams); + if (rc) { + QL_DPRINT11(ha, + "ecore_iwarp_create_listen failed rc = %d\n", rc); + goto err; + } + + listener->ecore_handle = oparams.handle; + cm_id->provider_data = listener; + + QL_DPRINT12(ha, "exit\n"); + return rc; + +err: + cm_id->rem_ref(cm_id); + kfree(listener); + + QL_DPRINT12(ha, "exit [%d]\n", rc); + return rc; +} + +void +qlnxr_iw_destroy_listen(struct iw_cm_id *cm_id) +{ + struct qlnxr_iw_listener *listener = cm_id->provider_data; + struct qlnxr_dev *dev = get_qlnxr_dev((cm_id->device)); + int rc = 0; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter\n"); + + if (listener->ecore_handle) + rc = ecore_iwarp_destroy_listen(dev->rdma_ctx, + listener->ecore_handle); + + cm_id->rem_ref(cm_id); + + QL_DPRINT12(ha, "exit [%d]\n", rc); + return; +} + +int +qlnxr_iw_accept(struct iw_cm_id *cm_id, + struct iw_cm_conn_param *conn_param) +{ + struct qlnxr_iw_ep *ep = (struct qlnxr_iw_ep *)cm_id->provider_data; + struct qlnxr_dev *dev = ep->dev; + struct qlnxr_qp *qp; + struct ecore_iwarp_accept_in params; + int rc; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter qpid=%d\n", conn_param->qpn); + + if (!(ha->ifp->if_drv_flags & IFF_DRV_RUNNING)) + return -EINVAL; + + qp = idr_find(&dev->qpidr, conn_param->qpn); + if (!qp) { + QL_DPRINT11(ha, "idr_find failed invalid qpn = %d\n", + conn_param->qpn); + return -EINVAL; + } + ep->qp = qp; + qp->ep = ep; + cm_id->add_ref(cm_id); + ep->cm_id = cm_id; + + params.ep_context = ep->ecore_context; + params.cb_context = ep; + params.qp = ep->qp->ecore_qp; + params.private_data = conn_param->private_data; + params.private_data_len = conn_param->private_data_len; + params.ird = conn_param->ird; + params.ord = conn_param->ord; + + rc = ecore_iwarp_accept(dev->rdma_ctx, ¶ms); + if (rc) { + QL_DPRINT11(ha, "ecore_iwarp_accept failed %d\n", rc); + goto err; + } + + QL_DPRINT12(ha, "exit\n"); + return 0; +err: + cm_id->rem_ref(cm_id); + QL_DPRINT12(ha, "exit rc = %d\n", rc); + return rc; +} + +int +qlnxr_iw_reject(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len) +{ +#if __FreeBSD_version >= 1102000 + + struct qlnxr_iw_ep *ep = (struct qlnxr_iw_ep *)cm_id->provider_data; + struct qlnxr_dev *dev = ep->dev; + struct ecore_iwarp_reject_in params; + int rc; + + params.ep_context = ep->ecore_context; + params.cb_context = ep; + params.private_data = pdata; + params.private_data_len = pdata_len; + ep->qp = NULL; + + rc = ecore_iwarp_reject(dev->rdma_ctx, ¶ms); + + return rc; + +#else + + printf("iWARP reject_cr not implemented\n"); + return -EINVAL; + +#endif /* #if __FreeBSD_version >= 1102000 */ +} + +void +qlnxr_iw_qp_add_ref(struct ib_qp *ibqp) +{ + struct qlnxr_qp *qp = get_qlnxr_qp(ibqp); + qlnx_host_t *ha; + + ha = qp->dev->ha; + + QL_DPRINT12(ha, "enter ibqp = %p\n", ibqp); + + atomic_inc(&qp->refcnt); + + QL_DPRINT12(ha, "exit \n"); + return; +} + +void +qlnxr_iw_qp_rem_ref(struct ib_qp *ibqp) +{ + struct qlnxr_qp *qp = get_qlnxr_qp(ibqp); + qlnx_host_t *ha; + + ha = qp->dev->ha; + + QL_DPRINT12(ha, "enter ibqp = %p qp = %p\n", ibqp, qp); + + if (atomic_dec_and_test(&qp->refcnt)) { + qlnxr_idr_remove(qp->dev, qp->qp_id); + kfree(qp); + } + + QL_DPRINT12(ha, "exit \n"); + return; +} + +struct ib_qp * +qlnxr_iw_get_qp(struct ib_device *ibdev, int qpn) +{ + struct qlnxr_dev *dev = get_qlnxr_dev(ibdev); + struct ib_qp *qp; + qlnx_host_t *ha; + + ha = dev->ha; + + QL_DPRINT12(ha, "enter dev = %p ibdev = %p qpn = %d\n", dev, ibdev, qpn); + + qp = idr_find(&dev->qpidr, qpn); + + QL_DPRINT12(ha, "exit qp = %p\n", qp); + + return (qp); +} diff --git a/sys/dev/qlnx/qlnxr/qlnxr_verbs.h b/sys/dev/qlnx/qlnxr/qlnxr_verbs.h new file mode 100644 index 000000000000..c202f4457b85 --- /dev/null +++ b/sys/dev/qlnx/qlnxr/qlnxr_verbs.h @@ -0,0 +1,267 @@ +/* + * Copyright (c) 2018-2019 Cavium, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ + + +#ifndef __QLNXR_VERBS_H__ +#define __QLNXR_VERBS_H__ + +extern int qlnxr_iw_query_gid(struct ib_device *, + uint8_t port, + int index, + union ib_gid *gid); + +extern int qlnxr_query_gid(struct ib_device *, + u8 port, + int index, + union ib_gid *gid); + +extern struct ib_srq *qlnxr_create_srq(struct ib_pd *, + struct ib_srq_init_attr *, + struct ib_udata *); + +extern int qlnxr_destroy_srq(struct ib_srq *); + + +extern int qlnxr_modify_srq(struct ib_srq *, + struct ib_srq_attr *, + enum ib_srq_attr_mask, + struct ib_udata *); + +extern int qlnxr_query_srq(struct ib_srq *, + struct ib_srq_attr *); + +extern int qlnxr_post_srq_recv(struct ib_srq *, + struct ib_recv_wr *, + struct ib_recv_wr **bad_recv_wr); + +#if __FreeBSD_version < 1102000 +extern int qlnxr_query_device(struct ib_device *, struct ib_device_attr *); +#else +extern int qlnxr_query_device(struct ib_device *, struct ib_device_attr *, + struct ib_udata *); +extern int qlnxr_get_port_immutable(struct ib_device *ibdev, u8 port_num, + struct ib_port_immutable *immutable); +#endif + +extern int qlnxr_query_port(struct ib_device *, + u8 port, + struct ib_port_attr *props); + +extern int qlnxr_modify_port(struct ib_device *, + u8 port, + int mask, + struct ib_port_modify *props); + +extern enum rdma_link_layer qlnxr_link_layer(struct ib_device *device, + uint8_t port_num); + +struct ib_pd *qlnxr_alloc_pd(struct ib_device *, + struct ib_ucontext *, + struct ib_udata *); + +extern int qlnxr_dealloc_pd(struct ib_pd *pd); + +#if __FreeBSD_version >= 1102000 +extern struct ib_cq *qlnxr_create_cq(struct ib_device *ibdev, + const struct ib_cq_init_attr *attr, + struct ib_ucontext *ib_ctx, + struct ib_udata *udata); +#else +#if __FreeBSD_version >= 1100000 +extern struct ib_cq *qlnxr_create_cq(struct ib_device *ibdev, + struct ib_cq_init_attr *attr, + struct ib_ucontext *ib_ctx, + struct ib_udata *udata); +#else +extern struct ib_cq *qlnxr_create_cq(struct ib_device *ibdev, + int cqe, + int comp_vector, + struct ib_ucontext *ib_ctx, + struct ib_udata *udata); +#endif +#endif /* #if __FreeBSD_version >= 1102000 */ + +extern int qlnxr_destroy_cq(struct ib_cq *); + +extern int qlnxr_resize_cq(struct ib_cq *, + int cqe, + struct ib_udata *); + +extern int qlnxr_poll_cq(struct ib_cq *, + int num_entries, + struct ib_wc *wc); + + +extern struct ib_qp *qlnxr_create_qp(struct ib_pd *, + struct ib_qp_init_attr *attrs, + struct ib_udata *); + +extern int qlnxr_modify_qp(struct ib_qp *, + struct ib_qp_attr *attr, + int attr_mask, + struct ib_udata *udata); + +extern int qlnxr_query_qp(struct ib_qp *, + struct ib_qp_attr *qp_attr, + int qp_attr_mask, + struct ib_qp_init_attr *); + +extern int qlnxr_destroy_qp(struct ib_qp *); + +extern int qlnxr_query_pkey(struct ib_device *, + u8 port, + u16 index, + u16 *pkey); + +#if __FreeBSD_version >= 1102000 +extern struct ib_ah *qlnxr_create_ah(struct ib_pd *ibpd, + struct ib_ah_attr *attr, struct ib_udata *udata); +#else +extern struct ib_ah *qlnxr_create_ah(struct ib_pd *ibpd, + struct ib_ah_attr *attr); +#endif /* #if __FreeBSD_version >= 1102000 */ + +extern int qlnxr_destroy_ah(struct ib_ah *ibah); + +extern int qlnxr_query_ah(struct ib_ah *ibah, + struct ib_ah_attr *attr); + +extern int qlnxr_modify_ah(struct ib_ah *ibah, + struct ib_ah_attr *attr); + +#if __FreeBSD_version >= 1102000 +extern int qlnxr_process_mad(struct ib_device *ibdev, + int process_mad_flags, + u8 port_num, + const struct ib_wc *in_wc, + const struct ib_grh *in_grh, + const struct ib_mad_hdr *mad_hdr, + size_t in_mad_size, + struct ib_mad_hdr *out_mad, + size_t *out_mad_size, + u16 *out_mad_pkey_index); +#else +extern int qlnxr_process_mad(struct ib_device *ibdev, + int process_mad_flags, + u8 port_num, + struct ib_wc *in_wc, + struct ib_grh *in_grh, + struct ib_mad *in_mad, + struct ib_mad *out_mad); +#endif /* #if __FreeBSD_version >= 1102000 */ + +extern int qlnxr_post_send(struct ib_qp *, + struct ib_send_wr *, + struct ib_send_wr **bad_wr); + +extern int qlnxr_post_recv(struct ib_qp *, + struct ib_recv_wr *, + struct ib_recv_wr **bad_wr); + +extern int qlnxr_arm_cq(struct ib_cq *, + enum ib_cq_notify_flags flags); + +extern struct ib_mr *qlnxr_get_dma_mr(struct ib_pd *, + int acc); + +#if __FreeBSD_version < 1102000 +extern struct ib_mr *qlnxr_reg_kernel_mr(struct ib_pd *, + struct ib_phys_buf *buffer_list, + int num_phys_buf, + int acc, + u64 *iova_start); +#endif /* #if __FreeBSD_version < 1102000 */ + +extern int qlnxr_dereg_mr(struct ib_mr *); + +#if __FreeBSD_version >= 1102000 +extern struct ib_mr *qlnxr_reg_user_mr(struct ib_pd *, + u64 start, + u64 length, + u64 virt, + int acc, + struct ib_udata *); +#else +extern struct ib_mr *qlnxr_reg_user_mr(struct ib_pd *, + u64 start, + u64 length, + u64 virt, + int acc, + struct ib_udata *, + int mr_id); +#endif /* #if __FreeBSD_version >= 1102000 */ + +#if __FreeBSD_version >= 1102000 + +extern struct ib_mr *qlnxr_alloc_mr(struct ib_pd *pd, + enum ib_mr_type mr_type, u32 max_num_sg); +extern int qlnxr_map_mr_sg(struct ib_mr *mr, struct scatterlist *sg, + int sg_nents, unsigned int *sg_offset); +#else + +extern struct ib_mr *qlnxr_alloc_frmr(struct ib_pd *pd, + int max_page_list_len); + + +extern struct ib_fast_reg_page_list *qlnxr_alloc_frmr_page_list( + struct ib_device *ibdev, + int page_list_len); + +extern void qlnxr_free_frmr_page_list(struct ib_fast_reg_page_list *page_list); + +#endif /* #if __FreeBSD_version >= 1102000 */ + +extern struct ib_ucontext *qlnxr_alloc_ucontext(struct ib_device *ibdev, + struct ib_udata *udata); + +extern int qlnxr_dealloc_ucontext(struct ib_ucontext *ibctx); + + +extern int qlnxr_mmap(struct ib_ucontext *, struct vm_area_struct *vma); + +extern int qlnxr_iw_connect(struct iw_cm_id *cm_id, + struct iw_cm_conn_param *conn_param); + +extern int qlnxr_iw_create_listen(struct iw_cm_id *cm_id, int backlog); + +void qlnxr_iw_destroy_listen(struct iw_cm_id *cm_id); + +extern int qlnxr_iw_accept(struct iw_cm_id *cm_id, + struct iw_cm_conn_param *conn_param); + +extern int qlnxr_iw_reject(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len); + +extern void qlnxr_iw_qp_add_ref(struct ib_qp *qp); + +extern void qlnxr_iw_qp_rem_ref(struct ib_qp *qp); + +extern struct ib_qp *qlnxr_iw_get_qp(struct ib_device *dev, int qpn); + +#endif /* #ifndef __QLNXR_VERBS_H__ */ + diff --git a/sys/modules/qlnx/Makefile b/sys/modules/qlnx/Makefile index 0a846999d472..e5dbaa01ba76 100644 --- a/sys/modules/qlnx/Makefile +++ b/sys/modules/qlnx/Makefile @@ -37,5 +37,6 @@ SYSDIR?=${SRCTOP}/sys SUBDIR=qlnxe SUBDIR+=qlnxev +SUBDIR+=qlnxr .include diff --git a/sys/modules/qlnx/qlnxe/Makefile b/sys/modules/qlnx/qlnxe/Makefile index 9034b0eaa112..9bdd80ca153e 100644 --- a/sys/modules/qlnx/qlnxe/Makefile +++ b/sys/modules/qlnx/qlnxe/Makefile @@ -47,6 +47,17 @@ SRCS+=ecore_mng_tlv.c SRCS+=ecore_sriov.c SRCS+=ecore_vf.c +#roce/iwarp files. Compilation can be turned off roce/iwarp are not required. +# In other words if you don't need RDMA please comment out SRCS adds for +# ecore_rdma.c ecore_roce.c ecore_iwarp.c ecore_ooo.c ecore_ll2.c qlnx_rdma.c +SRCS+=ecore_rdma.c +SRCS+=ecore_roce.c +SRCS+=ecore_iwarp.c +SRCS+=ecore_ooo.c +SRCS+=ecore_ll2.c +SRCS+=qlnx_rdma.c + + SRCS+=qlnx_ioctl.c SRCS+=qlnx_os.c @@ -75,4 +86,13 @@ CFLAGS+= -I${SRCTOP}/sys/compat/linuxkpi/common/include CFLAGS += -DCONFIG_ECORE_SRIOV +# For roce/iwarp files. Compilation can be turned off if roce/iwarp are not required. +# In other words if you don't need RDMA please comment out the CFLAGS which define +# CONFIG_ECORE_LL2 CONFIG_ECORE_ROCE CONFIG_ECORE_IWARP QLNX_ENABLE_IWARP +CFLAGS += -DCONFIG_ECORE_LL2 +CFLAGS += -DCONFIG_ECORE_ROCE +CFLAGS += -DCONFIG_ECORE_IWARP +CFLAGS += -DCONFIG_ECORE_RDMA +CFLAGS += -DQLNX_ENABLE_IWARP + CWARNFLAGS+= -Wno-cast-qual diff --git a/sys/modules/qlnx/qlnxr/Makefile b/sys/modules/qlnx/qlnxr/Makefile new file mode 100644 index 000000000000..c8e1103599cb --- /dev/null +++ b/sys/modules/qlnx/qlnxr/Makefile @@ -0,0 +1,85 @@ +#/* +# * Copyright (c) 2017-2018 Cavium, Inc. +# * All rights reserved. +# * +# * Redistribution and use in source and binary forms, with or without +# * modification, are permitted provided that the following conditions +# * are met: +# * +# * 1. Redistributions of source code must retain the above copyright +# * notice, this list of conditions and the following disclaimer. +# * 2. Redistributions in binary form must reproduce the above copyright +# * notice, this list of conditions and the following disclaimer in the +# * documentation and/or other materials provided with the distribution. +# * +# * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +# * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# * POSSIBILITY OF SUCH DAMAGE. +# */ +#/* +# * File : Makefile +# * Author : David C Somayajulu, Cavium, Inc., San Jose, CA 95131. +# */ +# +# $FreeBSD$ +# + +#.PATH: ${.CURDIR} +#OFEDDIR= /usr/src/sys +#ETHDRVR=${.CURDIR}/../qlnxe + +.PATH: ${SRCTOP}/sys/dev/qlnx/qlnxr +OFEDDIR=${SRCTOP}/sys +ETHDRVR=${SRCTOP}/sys/dev/qlnx/qlnxe + +KMOD= qlnxr +SRCS= device_if.h bus_if.h vnode_if.h pci_if.h \ + opt_inet.h opt_inet6.h \ + qlnxr_os.c\ + qlnxr_cm.c\ + qlnxr_verbs.c + +.include + +CFLAGS+= -I${.CURDIR} +CFLAGS+= -I${ETHDRVR} +CFLAGS+= -I${OFEDDIR}/ofed/include +CFLAGS+= -I${OFEDDIR}/ofed/include/uapi +CFLAGS+= -I${OFEDDIR}/compat/linuxkpi/common/include + +CFLAGS+= -DLINUX_TYPES_DEFINED +CFLAGS+= -DCONFIG_INFINIBAND_USER_MEM +CFLAGS+= -DINET6 -DINET +#CFLAGS+= -DDEFINE_NO_IP_BASED_GIDS + +CWARNEXTRA += -Wno-cast-qual +CWARNEXTRA += -Wno-unused-function +CWARNEXTRA += -Wno-gnu-variable-sized-type-not-at-end +CWARNEXTRA += -Wno-missing-prototypes +CWARNEXTRA += -Wno-constant-conversion +CWARNEXTRA += -Wno-format + +CWARNEXTRA += -Wno-shift-sign-overflow +CWARNEXTRA += -Wno-empty-body + +CFLAGS += -DQLNX_DEBUG +CFLAGS += -DECORE_PACKAGE +CFLAGS += -DCONFIG_ECORE_L2 +CFLAGS += -DCONFIG_ECORE_LL2 +CFLAGS += -DCONFIG_ECORE_ROCE +CFLAGS += -DCONFIG_ECORE_IWARP +CFLAGS += -DCONFIG_ECORE_RDMA +CFLAGS += -DECORE_CONFIG_DIRECT_HWFN +CFLAGS += -g -fno-inline +CFLAGS += -DQLNX_RDMA + +CFLAGS+= -Wno-cast-qual -Wno-pointer-arith + From 9303f819551d1692ac934e1e3f8be457dd67b04f Mon Sep 17 00:00:00 2001 From: Kyle Evans Date: Thu, 31 Jan 2019 02:49:24 +0000 Subject: [PATCH 23/90] libc/tests: Add test case for jemalloc/libthr bug fixed in r343566 Submitted by: Andrew Gierth (original reproducer; kevans massaged for atf) Reviewed by: kib MFC after: 2 weeks X-MFC-with: r343566 (or after) Differential Revision: https://reviews.freebsd.org/D19027 --- lib/libc/tests/stdlib/Makefile | 3 + lib/libc/tests/stdlib/dynthr_mod/Makefile | 11 +++ lib/libc/tests/stdlib/dynthr_mod/dynthr_mod.c | 71 ++++++++++++++ lib/libc/tests/stdlib/dynthr_test.c | 93 +++++++++++++++++++ 4 files changed, 178 insertions(+) create mode 100644 lib/libc/tests/stdlib/dynthr_mod/Makefile create mode 100644 lib/libc/tests/stdlib/dynthr_mod/dynthr_mod.c create mode 100644 lib/libc/tests/stdlib/dynthr_test.c diff --git a/lib/libc/tests/stdlib/Makefile b/lib/libc/tests/stdlib/Makefile index bb9542b185ed..9f7afa112491 100644 --- a/lib/libc/tests/stdlib/Makefile +++ b/lib/libc/tests/stdlib/Makefile @@ -2,6 +2,7 @@ .include +ATF_TESTS_C+= dynthr_test ATF_TESTS_C+= heapsort_test ATF_TESTS_C+= mergesort_test ATF_TESTS_C+= qsort_test @@ -62,4 +63,6 @@ LIBADD.${t}+= netbsd util LIBADD.strtod_test+= m +SUBDIR+= dynthr_mod + .include diff --git a/lib/libc/tests/stdlib/dynthr_mod/Makefile b/lib/libc/tests/stdlib/dynthr_mod/Makefile new file mode 100644 index 000000000000..b2a93b3af6d6 --- /dev/null +++ b/lib/libc/tests/stdlib/dynthr_mod/Makefile @@ -0,0 +1,11 @@ +# $FreeBSD$ + +SHLIB_NAME= dynthr_mod.so +SHLIBDIR= ${TESTSDIR} +SRCS= dynthr_mod.c +LIBADD= pthread + +TESTSDIR:= ${TESTSBASE}/${RELDIR:C/libc\/tests/libc/:H} + + +.include diff --git a/lib/libc/tests/stdlib/dynthr_mod/dynthr_mod.c b/lib/libc/tests/stdlib/dynthr_mod/dynthr_mod.c new file mode 100644 index 000000000000..c455808f6b92 --- /dev/null +++ b/lib/libc/tests/stdlib/dynthr_mod/dynthr_mod.c @@ -0,0 +1,71 @@ +/* + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (C) 2019 Andrew Gierth + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * Though this file is initially distributed under the 2-clause BSD license, + * the author grants permission for its redistribution under alternative + * licenses as set forth at . + * This paragraph and the RELICENSE.txt file are not part of the license and + * may be omitted in redistributions. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include + +static pthread_t thr; + +static void * +mod_thread(void *ptr) +{ + char *volatile dummy; + + dummy = malloc(500); + return (NULL); +} + +void +mod_main(int op) +{ + int rc; + + switch (op) { + case 1: + rc = pthread_create(&thr, NULL, mod_thread, NULL); + if (rc != 0) + _exit(1); + break; + case 0: + pthread_join(thr, NULL); + break; + } +} + diff --git a/lib/libc/tests/stdlib/dynthr_test.c b/lib/libc/tests/stdlib/dynthr_test.c new file mode 100644 index 000000000000..d72ee280ccb2 --- /dev/null +++ b/lib/libc/tests/stdlib/dynthr_test.c @@ -0,0 +1,93 @@ +/* + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (C) 2019 Andrew Gierth + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * Though this file is initially distributed under the 2-clause BSD license, + * the author grants permission for its redistribution under alternative + * licenses as set forth at . + * This paragraph and the RELICENSE.txt file are not part of the license and + * may be omitted in redistributions. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include + +#include + +typedef void (modfunc_t)(int op); + +/* + * Minimal test case for PR 235158; mutual dependencies between jemalloc and + * libthr causing issues in thread creation. Specifically to this case, libthr + * uses calloc to initialize pthread mutexes, and jemalloc uses pthread mutexes. + * + * Deferred initialization provided by jemalloc proved to be fragile, causing + * issues like in the referenced PR where thread creation in a shared object + * loaded via dlopen(3) would stall unless the calling application also linked + * against pthread. + */ +ATF_TC(maintc); +ATF_TC_HEAD(maintc, tc) +{ + + atf_tc_set_md_var(tc, "timeout", "3"); +} + +ATF_TC_BODY(maintc, tc) +{ + char *libpath; + modfunc_t *func; + void *mod_handle; + const char *srcdir; + dlfunc_t rawfunc; + + srcdir = atf_tc_get_config_var(tc, "srcdir"); + if (asprintf(&libpath, "%s/dynthr_mod.so", srcdir) < 0) + atf_tc_fail("failed to construct path to libthr"); + mod_handle = dlopen(libpath, RTLD_LOCAL); + free(libpath); + if (mod_handle == NULL) + atf_tc_fail("failed to open dynthr_mod.so: %s", dlerror()); + rawfunc = dlfunc(mod_handle, "mod_main"); + if (rawfunc == NULL) + atf_tc_fail("failed to resolve function mod_main"); + func = (modfunc_t *)rawfunc; + func(1); + func(0); +} + +ATF_TP_ADD_TCS(tp) +{ + + ATF_TP_ADD_TC(tp, maintc); + return (atf_no_error()); +} From 43f5d5a277f798d598a9320abef7478cd6a2c18d Mon Sep 17 00:00:00 2001 From: Cy Schubert Date: Thu, 31 Jan 2019 04:16:52 +0000 Subject: [PATCH 24/90] Document the instance context pointer. MFC after: 3 days --- sys/contrib/ipfilter/netinet/fil.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sys/contrib/ipfilter/netinet/fil.c b/sys/contrib/ipfilter/netinet/fil.c index 2370d9479322..7413061bb1c5 100644 --- a/sys/contrib/ipfilter/netinet/fil.c +++ b/sys/contrib/ipfilter/netinet/fil.c @@ -2815,7 +2815,8 @@ ipf_firewall(fin, passp) /* -2 == requires authentication */ /* Kernel: */ /* > 0 == filter error # for packet */ -/* Parameters: ip(I) - pointer to start of IPv4/6 packet */ +/* Parameters: ctx(I) - pointer to the instance context */ +/* ip(I) - pointer to start of IPv4/6 packet */ /* hlen(I) - length of header */ /* ifp(I) - pointer to interface this packet is on */ /* out(I) - 0 == packet going in, 1 == packet going out */ From 6cbda6d943864818aeda19356b570865baa11872 Mon Sep 17 00:00:00 2001 From: Kyle Evans Date: Thu, 31 Jan 2019 05:20:11 +0000 Subject: [PATCH 25/90] install(1): Fix relative path calculation with partial common dest/src For example, from the referenced PR [1]: $ mkdir /tmp/lib/ /tmp/libexec $ touch /tmp/lib/foo.so $ install -lrs /tmp/lib/foo.so /tmp/libexec/ The common path identification bits terminate src at /tmp/lib/ and the destination at /tmp/libe. The subsequent backtracking is then incorrect, as it traverses the destination and backtraces exactly one level while eating the 'libexec' because it was previously (falsely) identified as common with 'lib'. The obvious fix would be to make sure we've actually terminated just after directory separators and rewind a character if we haven't. In the above example, we would end up rewinding to /tmp/ and subsequently doing the right thing. Test case added. PR: 235330 [1] MFC after: 1 week --- usr.bin/xinstall/tests/install_test.sh | 24 ++++++++++++++++++++++++ usr.bin/xinstall/xinstall.c | 15 +++++++++++++-- 2 files changed, 37 insertions(+), 2 deletions(-) diff --git a/usr.bin/xinstall/tests/install_test.sh b/usr.bin/xinstall/tests/install_test.sh index 4332f9b8268c..92044f34c20b 100755 --- a/usr.bin/xinstall/tests/install_test.sh +++ b/usr.bin/xinstall/tests/install_test.sh @@ -377,6 +377,29 @@ mkdir_simple_body() { atf_check install -d dir1/dir2/dir3 } +atf_test_case symbolic_link_relative_absolute_common +symbolic_link_relative_absolute_common_head() { + atf_set "descr" "Verify -l rs with absolute paths having common components" +} +symbolic_link_relative_absolute_common_body() { + filename=foo.so + src_path=lib + src_path_prefixed=$PWD/$src_path + dest_path=$PWD/libexec/ + src_file=$src_path_prefixed/$filename + dest_file=$dest_path/$filename + + atf_check mkdir $src_path_prefixed $dest_path + atf_check touch $src_file + atf_check install -l sr $src_file $dest_path + + dest_path_relative=$(readlink $dest_file) + src_path_relative="../lib/$filename" + if [ "$src_path_relative" != "$dest_path_relative" ]; then + atf_fail "unexpected symlink contents ('$src_path_relative' != '$dest_path_relative')" + fi +} + atf_init_test_cases() { atf_add_test_case copy_to_nonexistent atf_add_test_case copy_to_nonexistent_safe @@ -415,5 +438,6 @@ atf_init_test_cases() { atf_add_test_case symbolic_link_relative_absolute_source_and_dest1 atf_add_test_case symbolic_link_relative_absolute_source_and_dest1_double_slash atf_add_test_case symbolic_link_relative_absolute_source_and_dest2 + atf_add_test_case symbolic_link_relative_absolute_common atf_add_test_case mkdir_simple } diff --git a/usr.bin/xinstall/xinstall.c b/usr.bin/xinstall/xinstall.c index 880766b34623..d9aca00d8efc 100644 --- a/usr.bin/xinstall/xinstall.c +++ b/usr.bin/xinstall/xinstall.c @@ -673,7 +673,7 @@ makelink(const char *from_name, const char *to_name, } if (dolink & LN_RELATIVE) { - char *to_name_copy, *cp, *d, *s; + char *to_name_copy, *cp, *d, *ld, *ls, *s; if (*from_name != '/') { /* this is already a relative link */ @@ -709,8 +709,19 @@ makelink(const char *from_name, const char *to_name, free(to_name_copy); /* Trim common path components. */ - for (s = src, d = dst; *s == *d; s++, d++) + ls = ld = NULL; + for (s = src, d = dst; *s == *d; ls = s, ld = d, s++, d++) continue; + /* + * If we didn't end after a directory separator, then we've + * falsely matched the last component. For example, if one + * invoked install -lrs /lib/foo.so /libexec/ then the source + * would terminate just after the separator while the + * destination would terminate in the middle of 'libexec', + * leading to a full directory getting falsely eaten. + */ + if ((ls != NULL && *ls != '/') || (ld != NULL && *ld != '/')) + s--, d--; while (*s != '/') s--, d--; From 9b1a29716a1a32d1a03b295fa037ffdf97435e61 Mon Sep 17 00:00:00 2001 From: Andriy Voskoboinyk Date: Thu, 31 Jan 2019 10:44:00 +0000 Subject: [PATCH 26/90] ipw(4): reuse ieee80211_tx_complete function This should partially fix 'netstat -b -I wlan0' output MFC after: 1 week --- sys/dev/ipw/if_ipw.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/sys/dev/ipw/if_ipw.c b/sys/dev/ipw/if_ipw.c index d6ab94c1b5d8..49150f9151bf 100644 --- a/sys/dev/ipw/if_ipw.c +++ b/sys/dev/ipw/if_ipw.c @@ -1326,10 +1326,7 @@ ipw_release_sbd(struct ipw_softc *sc, struct ipw_soft_bd *sbd) bus_dmamap_unload(sc->txbuf_dmat, sbuf->map); SLIST_INSERT_HEAD(&sc->free_sbuf, sbuf, next); - if (sbuf->m->m_flags & M_TXCB) - ieee80211_process_callback(sbuf->ni, sbuf->m, 0/*XXX*/); - m_freem(sbuf->m); - ieee80211_free_node(sbuf->ni); + ieee80211_tx_complete(sbuf->ni, sbuf->m, 0/*XXX*/); sc->sc_tx_timer = 0; break; From 838b61c1f0029564d6bafbef2a1259144614622e Mon Sep 17 00:00:00 2001 From: Andriy Voskoboinyk Date: Thu, 31 Jan 2019 11:12:31 +0000 Subject: [PATCH 27/90] bwn(4): reuse ieee80211_tx_complete function. MFC after: 1 week --- sys/dev/bwn/if_bwn.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/sys/dev/bwn/if_bwn.c b/sys/dev/bwn/if_bwn.c index 6f8bac67bc72..d6a1926282ef 100644 --- a/sys/dev/bwn/if_bwn.c +++ b/sys/dev/bwn/if_bwn.c @@ -6211,20 +6211,15 @@ bwn_pio_handle_txeof(struct bwn_mac *mac, tq->tq_used -= roundup(tp->tp_m->m_pkthdr.len + BWN_HDRSIZE(mac), 4); tq->tq_free++; - /* XXX ieee80211_tx_complete()? */ if (tp->tp_ni != NULL) { /* * Do any tx complete callback. Note this must * be done before releasing the node reference. */ - bwn_ratectl_tx_complete(tp->tp_ni, status); - if (tp->tp_m->m_flags & M_TXCB) - ieee80211_process_callback(tp->tp_ni, tp->tp_m, 0); - ieee80211_free_node(tp->tp_ni); - tp->tp_ni = NULL; } - m_freem(tp->tp_m); + ieee80211_tx_complete(tp->tp_ni, tp->tp_m, 0); + tp->tp_ni = NULL; tp->tp_m = NULL; TAILQ_INSERT_TAIL(&tq->tq_pktlist, tp, tp_list); From ca5efb62ee5af78c01703bbb9e794a2a21df5747 Mon Sep 17 00:00:00 2001 From: Ed Maste Date: Thu, 31 Jan 2019 15:07:32 +0000 Subject: [PATCH 28/90] Enable lld as the system linker by default on i386 The migration to LLVM's lld linker has been in progress for quite some time - I opened an LLVM tracking bug (23214) in April 2015 to track issues using lld as FreeBSD's linker, and requested the first exp-run using lld as /usr/bin/ld in November 2016. In 12.0 LLD is the system linker on amd64, arm64, and armv7. i386 was not switched initially as there were additional ports failures not found on amd64. Those have largely been addressed now, although there are a small number of issues that are still being worked on. In some of these cases having lld as the system linker makes it easier for developers and third parties to investigate failures. Thanks to antoine@ for handling the exp-runs and to everyone in the FreeBSD and LLVM communites who have fixed issues with lld to get us to this point. PR: 214864 Relnotes: Yes Sponsored by: The FreeBSD Foundation --- share/mk/src.opts.mk | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/share/mk/src.opts.mk b/share/mk/src.opts.mk index 7aa53f35e5f4..be1d4efb8dde 100644 --- a/share/mk/src.opts.mk +++ b/share/mk/src.opts.mk @@ -321,11 +321,9 @@ __DEFAULT_YES_OPTIONS+=LLVM_LIBUNWIND .else __DEFAULT_NO_OPTIONS+=LLVM_LIBUNWIND .endif -.if ${__T} == "aarch64" || ${__T} == "amd64" || ${__T} == "armv7" +.if ${__T} == "aarch64" || ${__T} == "amd64" || ${__T} == "armv7" || \ + ${__T} == "i386" __DEFAULT_YES_OPTIONS+=LLD_BOOTSTRAP LLD_IS_LD -.elif ${__T} == "i386" -__DEFAULT_YES_OPTIONS+=LLD_BOOTSTRAP -__DEFAULT_NO_OPTIONS+=LLD_IS_LD .else __DEFAULT_NO_OPTIONS+=LLD_BOOTSTRAP LLD_IS_LD .endif From 75fe717698fb52c2f03fadd32c526a62ad774ae0 Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Thu, 31 Jan 2019 15:44:49 +0000 Subject: [PATCH 29/90] Reserve a bit in the FreeBSD feature control note for marking the image as not compatible with ASLR. Requested by: emaste Sponsored by: The FreeBSD Foundation MFC after: 3 days Differential revision: https://reviews.freebsd.org/D5603 --- sys/sys/elf_common.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sys/sys/elf_common.h b/sys/sys/elf_common.h index 618a5e2e1acb..2d61e953401a 100644 --- a/sys/sys/elf_common.h +++ b/sys/sys/elf_common.h @@ -762,6 +762,9 @@ typedef struct { #define NT_FREEBSD_ARCH_TAG 3 #define NT_FREEBSD_FEATURE_CTL 4 +/* NT_FREEBSD_FEATURE_CTL desc[0] bits */ +#define NT_FREEBSD_FCTL_ASLR_DISABLE 0x00000001 + /* Values for n_type. Used in core files. */ #define NT_PRSTATUS 1 /* Process status. */ #define NT_FPREGSET 2 /* Floating point registers. */ From 1f7d14836820370003db65c5d8ddb6d8c3104ce9 Mon Sep 17 00:00:00 2001 From: Ed Maste Date: Thu, 31 Jan 2019 15:50:11 +0000 Subject: [PATCH 30/90] regen src.conf.5 after r343606 --- share/man/man5/src.conf.5 | 37 +++++++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/share/man/man5/src.conf.5 b/share/man/man5/src.conf.5 index 59374785d793..a4cb42b41e68 100644 --- a/share/man/man5/src.conf.5 +++ b/share/man/man5/src.conf.5 @@ -1,6 +1,6 @@ .\" DO NOT EDIT-- this file is @generated by tools/build/options/makeman. .\" $FreeBSD$ -.Dd December 15, 2018 +.Dd January 31, 2019 .Dt SRC.CONF 5 .Os .Sh NAME @@ -335,6 +335,8 @@ When set, it enforces these options: .It .Va WITHOUT_CTF .It +.Va WITHOUT_LOADER_ZFS +.It .Va WITHOUT_ZFS .El .It Va WITHOUT_CLANG @@ -1045,12 +1047,12 @@ amd64/amd64, arm/armv7, arm64/aarch64 and i386/i386. Set to use GNU binutils ld as the system linker, instead of LLVM's LLD. .Pp This is a default setting on -arm/arm, arm/armv6, i386/i386, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64, powerpc/powerpcspe, riscv/riscv64 and sparc64/sparc64. +arm/arm, arm/armv6, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64, powerpc/powerpcspe, riscv/riscv64 and sparc64/sparc64. .It Va WITH_LLD_IS_LD Set to use LLVM's LLD as the system linker, instead of GNU binutils ld. .Pp This is a default setting on -amd64/amd64, arm/armv7 and arm64/aarch64. +amd64/amd64, arm/armv7, arm64/aarch64 and i386/i386. .It Va WITHOUT_LLVM_COV Set to not build the .Xr llvm-cov 1 @@ -1082,7 +1084,7 @@ The option should be used rather than this in most cases. .Pp This is a default setting on -riscv/riscv64 and sparc64/sparc64. +arm/arm, arm/armv6, riscv/riscv64 and sparc64/sparc64. .It Va WITH_LLVM_TARGET_AARCH64 Set to build LLVM target support for AArch64. The @@ -1090,7 +1092,7 @@ The option should be used rather than this in most cases. .Pp This is a default setting on -amd64/amd64, arm/arm, arm/armv6, arm/armv7, arm64/aarch64, i386/i386, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64 and powerpc/powerpcspe. +amd64/amd64, arm/armv7, arm64/aarch64, i386/i386, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64 and powerpc/powerpcspe. .It Va WITHOUT_LLVM_TARGET_ALL Set to only build the required LLVM target support. This option is preferred to specific target support options. @@ -1156,7 +1158,7 @@ The option should be used rather than this in most cases. .Pp This is a default setting on -riscv/riscv64 and sparc64/sparc64. +arm/arm, arm/armv6, riscv/riscv64 and sparc64/sparc64. .It Va WITH_LLVM_TARGET_MIPS Set to build LLVM target support for MIPS. The @@ -1164,7 +1166,7 @@ The option should be used rather than this in most cases. .Pp This is a default setting on -amd64/amd64, arm/arm, arm/armv6, arm/armv7, arm64/aarch64, i386/i386, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64 and powerpc/powerpcspe. +amd64/amd64, arm/armv7, arm64/aarch64, i386/i386, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64 and powerpc/powerpcspe. .It Va WITHOUT_LLVM_TARGET_POWERPC Set to not build LLVM target support for PowerPC. The @@ -1172,7 +1174,7 @@ The option should be used rather than this in most cases. .Pp This is a default setting on -riscv/riscv64 and sparc64/sparc64. +arm/arm, arm/armv6, riscv/riscv64 and sparc64/sparc64. .It Va WITH_LLVM_TARGET_POWERPC Set to build LLVM target support for PowerPC. The @@ -1180,7 +1182,7 @@ The option should be used rather than this in most cases. .Pp This is a default setting on -amd64/amd64, arm/arm, arm/armv6, arm/armv7, arm64/aarch64, i386/i386, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64 and powerpc/powerpcspe. +amd64/amd64, arm/armv7, arm64/aarch64, i386/i386, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64 and powerpc/powerpcspe. .It Va WITHOUT_LLVM_TARGET_SPARC Set to not build LLVM target support for SPARC. The @@ -1188,7 +1190,7 @@ The option should be used rather than this in most cases. .Pp This is a default setting on -riscv/riscv64 and sparc64/sparc64. +arm/arm, arm/armv6, riscv/riscv64 and sparc64/sparc64. .It Va WITH_LLVM_TARGET_SPARC Set to build LLVM target support for SPARC. The @@ -1196,7 +1198,7 @@ The option should be used rather than this in most cases. .Pp This is a default setting on -amd64/amd64, arm/arm, arm/armv6, arm/armv7, arm64/aarch64, i386/i386, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64 and powerpc/powerpcspe. +amd64/amd64, arm/armv7, arm64/aarch64, i386/i386, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64 and powerpc/powerpcspe. .It Va WITHOUT_LLVM_TARGET_X86 Set to not build LLVM target support for X86. The @@ -1204,7 +1206,7 @@ The option should be used rather than this in most cases. .Pp This is a default setting on -riscv/riscv64 and sparc64/sparc64. +arm/arm, arm/armv6, riscv/riscv64 and sparc64/sparc64. .It Va WITH_LLVM_TARGET_X86 Set to build LLVM target support for X86. The @@ -1212,7 +1214,7 @@ The option should be used rather than this in most cases. .Pp This is a default setting on -amd64/amd64, arm/arm, arm/armv6, arm/armv7, arm64/aarch64, i386/i386, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64 and powerpc/powerpcspe. +amd64/amd64, arm/armv7, arm64/aarch64, i386/i386, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64 and powerpc/powerpcspe. .It Va WITH_LOADER_FIREWIRE Enable firewire support in /boot/loader on x86. This option is a nop on all other platforms. @@ -1259,6 +1261,13 @@ Set to build ubldr. .Pp This is a default setting on arm/arm, arm/armv6, arm/armv7, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64 and powerpc/powerpcspe. +.It Va WITH_LOADER_VERBOSE +Set to build with extra verbose debugging in the loader. +May explode already nearly too large loader over the limit. +Use with care. + +.It Va WITHOUT_LOADER_ZFS +Set to not build ZFS file system boot loader support. .It Va WITHOUT_LOCALES Set to not build localization files; see .Xr locale 1 . @@ -1890,7 +1899,7 @@ without support for the IEEE 802.1X protocol and without support for EAP-PEAP, EAP-TLS, EAP-LEAP, and EAP-TTLS protocols (usable only via 802.1X). .It Va WITHOUT_ZFS -Set to not build ZFS file system. +Set to not build ZFS file system kernel module, libraries, and user commands. .It Va WITHOUT_ZONEINFO Set to not build the timezone database. When set, it enforces these options: From 0f663f7258ba4f5ce92fabea8f669ff0b6811655 Mon Sep 17 00:00:00 2001 From: Ed Maste Date: Thu, 31 Jan 2019 16:11:15 +0000 Subject: [PATCH 31/90] elfdump: whitespace fixup in advance of other changes --- usr.bin/elfdump/elfdump.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/usr.bin/elfdump/elfdump.c b/usr.bin/elfdump/elfdump.c index aadf1e84088d..23e3fa0acf72 100644 --- a/usr.bin/elfdump/elfdump.c +++ b/usr.bin/elfdump/elfdump.c @@ -320,7 +320,7 @@ static const char *p_flags[] = { /* http://www.sco.com/developers/gabi/latest/ch4.sheader.html#sh_type */ static const char * sh_types(uint64_t machine, uint64_t sht) { - static char unknown_buf[64]; + static char unknown_buf[64]; if (sht < 0x60000000) { switch (sht) { @@ -1068,11 +1068,11 @@ elf_print_note(Elf32_Ehdr *e, void *sh) name = elf_get_word(e, sh, SH_NAME); n = (char *)e + offset; fprintf(out, "\nnote (%s):\n", shstrtab + name); - while (n < ((char *)e + offset + size)) { + while (n < ((char *)e + offset + size)) { namesz = elf_get_word(e, n, N_NAMESZ); descsz = elf_get_word(e, n, N_DESCSZ); - s = n + sizeof(Elf_Note); - desc = elf_get_word(e, n + sizeof(Elf_Note) + namesz, 0); + s = n + sizeof(Elf_Note); + desc = elf_get_word(e, n + sizeof(Elf_Note) + namesz, 0); fprintf(out, "\t%s %d\n", s, desc); n += sizeof(Elf_Note) + namesz + descsz; } From 2bc7b0242ff04f81fb7b942b0b69b7897fb89935 Mon Sep 17 00:00:00 2001 From: Ed Maste Date: Thu, 31 Jan 2019 16:19:04 +0000 Subject: [PATCH 32/90] elfdump: include note type names Based on a patch submitted by Dan McGregor. PR: 228290 MFC after: 1 week Sponsored by: The FreeBSD Foundation --- usr.bin/elfdump/elfdump.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/usr.bin/elfdump/elfdump.c b/usr.bin/elfdump/elfdump.c index 23e3fa0acf72..46c94c651366 100644 --- a/usr.bin/elfdump/elfdump.c +++ b/usr.bin/elfdump/elfdump.c @@ -317,6 +317,11 @@ static const char *p_flags[] = { "PF_X|PF_W|PF_R" }; +static const char *nt_types[] = { + "", "NT_FREEBSD_ABI_TAG", "NT_FREEBSD_NOINIT_TAG", + "NT_FREEBSD_ARCH_TAG", "NT_FREEBSD_FEATURE_CTL" +}; + /* http://www.sco.com/developers/gabi/latest/ch4.sheader.html#sh_type */ static const char * sh_types(uint64_t machine, uint64_t sht) { @@ -1071,9 +1076,14 @@ elf_print_note(Elf32_Ehdr *e, void *sh) while (n < ((char *)e + offset + size)) { namesz = elf_get_word(e, n, N_NAMESZ); descsz = elf_get_word(e, n, N_DESCSZ); + type = elf_get_word(e, n, N_TYPE); + if (type < nitems(nt_types)) + nt_type = nt_types[type]; + else + nt_type = "Unknown type"; s = n + sizeof(Elf_Note); desc = elf_get_word(e, n + sizeof(Elf_Note) + namesz, 0); - fprintf(out, "\t%s %d\n", s, desc); + fprintf(out, "\t%s %d (%s)\n", s, desc, nt_type); n += sizeof(Elf_Note) + namesz + descsz; } } From 8ae9aa2772f8febf64634f72d78451d324ee25ed Mon Sep 17 00:00:00 2001 From: Ed Maste Date: Thu, 31 Jan 2019 16:21:09 +0000 Subject: [PATCH 33/90] elfdump: fix build after r343610 One patch hunk did not survive the trip from git to svn. PR: 228290 MFC with: r343610 --- usr.bin/elfdump/elfdump.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/usr.bin/elfdump/elfdump.c b/usr.bin/elfdump/elfdump.c index 46c94c651366..235376e2910f 100644 --- a/usr.bin/elfdump/elfdump.c +++ b/usr.bin/elfdump/elfdump.c @@ -1066,7 +1066,9 @@ elf_print_note(Elf32_Ehdr *e, void *sh) u_int32_t namesz; u_int32_t descsz; u_int32_t desc; + u_int32_t type; char *n, *s; + const char *nt_type; offset = elf_get_off(e, sh, SH_OFFSET); size = elf_get_size(e, sh, SH_SIZE); From 97d368d62b44c605320e2eea60d554bd6cb58b76 Mon Sep 17 00:00:00 2001 From: Ed Maste Date: Thu, 31 Jan 2019 16:49:06 +0000 Subject: [PATCH 34/90] elfdump: use designated array initialization for note types This ensures the note type name is in the correct slot. PR: 228290 Submitted by: kib MFC with: 343610 Sponsored by: The FreeBSD Foundation --- usr.bin/elfdump/elfdump.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/usr.bin/elfdump/elfdump.c b/usr.bin/elfdump/elfdump.c index 235376e2910f..2bdf98830088 100644 --- a/usr.bin/elfdump/elfdump.c +++ b/usr.bin/elfdump/elfdump.c @@ -317,9 +317,13 @@ static const char *p_flags[] = { "PF_X|PF_W|PF_R" }; +#define NT_ELEM(x) [x] = #x, static const char *nt_types[] = { - "", "NT_FREEBSD_ABI_TAG", "NT_FREEBSD_NOINIT_TAG", - "NT_FREEBSD_ARCH_TAG", "NT_FREEBSD_FEATURE_CTL" + "", + NT_ELEM(NT_FREEBSD_ABI_TAG) + NT_ELEM(NT_FREEBSD_NOINIT_TAG) + NT_ELEM(NT_FREEBSD_ARCH_TAG) + NT_ELEM(NT_FREEBSD_FEATURE_CTL) }; /* http://www.sco.com/developers/gabi/latest/ch4.sheader.html#sh_type */ @@ -1079,7 +1083,7 @@ elf_print_note(Elf32_Ehdr *e, void *sh) namesz = elf_get_word(e, n, N_NAMESZ); descsz = elf_get_word(e, n, N_DESCSZ); type = elf_get_word(e, n, N_TYPE); - if (type < nitems(nt_types)) + if (type < nitems(nt_types) && nt_types[type] != NULL) nt_type = nt_types[type]; else nt_type = "Unknown type"; From 675f752cc56a23d7f7852d450b9ee450ba5c8a8b Mon Sep 17 00:00:00 2001 From: Ed Maste Date: Thu, 31 Jan 2019 17:04:55 +0000 Subject: [PATCH 35/90] readelf: dump elf note data Output format is compatible with GNU readelf's handling of unknown note types (modulo a GNU char signedness bug); future changes will add type- specific decoding. Reviewed by: kib MFC after: 1 week Relnotes: Yes Sponsored by: The FreeBSD Foundation --- contrib/elftoolchain/readelf/readelf.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/contrib/elftoolchain/readelf/readelf.c b/contrib/elftoolchain/readelf/readelf.c index c186de9b93e2..0b7106eb22c9 100644 --- a/contrib/elftoolchain/readelf/readelf.c +++ b/contrib/elftoolchain/readelf/readelf.c @@ -3567,6 +3567,7 @@ dump_notes_content(struct readelf *re, const char *buf, size_t sz, off_t off) { Elf_Note *note; const char *end, *name; + uint32_t i; printf("\nNotes at offset %#010jx with length %#010jx:\n", (uintmax_t) off, (uintmax_t) sz); @@ -3578,7 +3579,9 @@ dump_notes_content(struct readelf *re, const char *buf, size_t sz, off_t off) return; } note = (Elf_Note *)(uintptr_t) buf; - name = (char *)(uintptr_t)(note + 1); + buf += sizeof(Elf_Note); + name = buf; + buf += roundup2(note->n_namesz, 4); /* * The name field is required to be nul-terminated, and * n_namesz includes the terminating nul in observed @@ -3596,8 +3599,11 @@ dump_notes_content(struct readelf *re, const char *buf, size_t sz, off_t off) printf(" %-13s %#010jx", name, (uintmax_t) note->n_descsz); printf(" %s\n", note_type(name, re->ehdr.e_type, note->n_type)); - buf += sizeof(Elf_Note) + roundup2(note->n_namesz, 4) + - roundup2(note->n_descsz, 4); + printf(" description data:"); + for (i = 0; i < note->n_descsz; i++) + printf(" %02x", (unsigned char)buf[i]); + printf("\n"); + buf += roundup2(note->n_descsz, 4); } } From 37125720b93e20b2fb05da7b2a41bf33adbbbcac Mon Sep 17 00:00:00 2001 From: Gleb Smirnoff Date: Thu, 31 Jan 2019 17:52:48 +0000 Subject: [PATCH 36/90] In zone_alloc_bucket() max argument was calculated based on uz_count. Then bucket_alloc() also selects bucket size based on uz_count. However, since zone lock is dropped, uz_count may reduce. In this case max may be greater than ub_entries and that would yield into writing beyond end of the allocation. Reported by: pho --- sys/vm/uma_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/vm/uma_core.c b/sys/vm/uma_core.c index 63196404e53d..809bb7c2393f 100644 --- a/sys/vm/uma_core.c +++ b/sys/vm/uma_core.c @@ -2844,7 +2844,7 @@ zone_alloc_bucket(uma_zone_t zone, void *udata, int domain, int flags, int max) return (NULL); bucket->ub_cnt = zone->uz_import(zone->uz_arg, bucket->ub_bucket, - max, domain, flags); + MIN(max, bucket->ub_entries), domain, flags); /* * Initialize the memory if necessary. From c75f49f7d82bff402ff18d749ace25f69d3ee069 Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Thu, 31 Jan 2019 19:05:56 +0000 Subject: [PATCH 37/90] Make iflib a loadable module. iflib is already a module, but it is unconditionally compiled into the kernel. There are drivers which do not need iflib(4), and there are situations where somebody might not want iflib in kernel because of using the corresponding driver as module. Reviewed by: marius Discussed with: erj Sponsored by: The FreeBSD Foundation MFC after: 2 weeks Differential revision: https://reviews.freebsd.org/D19041 --- UPDATING | 7 +++++++ share/man/man4/bnxt.4 | 3 ++- share/man/man4/em.4 | 5 +++-- share/man/man4/iavf.4 | 3 ++- share/man/man4/ixgbe.4 | 5 +++-- share/man/man4/ixl.4 | 3 ++- share/man/man4/vmx.4 | 5 +++-- sys/amd64/conf/GENERIC | 13 +++++++------ sys/arm64/conf/GENERIC | 7 +++++-- sys/conf/NOTES | 9 ++++++--- sys/conf/files | 8 ++++---- sys/dev/ixgbe/if_ixv.c | 4 +--- sys/i386/conf/GENERIC | 9 +++++---- sys/mips/conf/OCTEON1 | 2 ++ sys/mips/conf/std.XLP | 1 + sys/modules/Makefile | 1 + sys/modules/iflib/Makefile | 13 +++++++++++++ sys/powerpc/conf/GENERIC64 | 2 ++ sys/powerpc/conf/MPC85XX | 1 + sys/powerpc/conf/MPC85XXSPE | 1 + sys/powerpc/conf/QORIQ64 | 1 + sys/powerpc/conf/dpaa/DPAA | 1 + sys/sparc64/conf/GENERIC | 2 ++ 23 files changed, 75 insertions(+), 31 deletions(-) create mode 100644 sys/modules/iflib/Makefile diff --git a/UPDATING b/UPDATING index f720ed4e8ad2..53d8622be575 100644 --- a/UPDATING +++ b/UPDATING @@ -31,6 +31,13 @@ NOTE TO PEOPLE WHO THINK THAT FreeBSD 13.x IS SLOW: disable the most expensive debugging functionality run "ln -s 'abort:false,junk:false' /etc/malloc.conf".) +20190131: + Iflib is no longer unconditionally compiled into the kernel. Drivers + using iflib and statically compiled into the kernel, now require + the 'device iflib' config option. For the same drivers loaded as + modules on kernels not having 'device iflib', the iflib.ko module + is loaded automatically. + 20181230: r342635 changes the way efibootmgr(8) works by requiring users to add the -b (bootnum) parameter for commands where the bootnum was previously diff --git a/share/man/man4/bnxt.4 b/share/man/man4/bnxt.4 index 12c2f9a0626a..915e35cc55d8 100644 --- a/share/man/man4/bnxt.4 +++ b/share/man/man4/bnxt.4 @@ -25,7 +25,7 @@ .\" .\" $FreeBSD$ .\" -.Dd September 20, 2018 +.Dd January 30, 2019 .Dt BNXT 4 .Os .Sh NAME @@ -36,6 +36,7 @@ To compile this driver into the kernel, place the following lines in your kernel configuration file: .Bd -ragged -offset indent +.Cd "device iflib" .Cd "device bnxt" .Ed .Pp diff --git a/share/man/man4/em.4 b/share/man/man4/em.4 index 7f84be1fbbf4..c79e88cc9ed2 100644 --- a/share/man/man4/em.4 +++ b/share/man/man4/em.4 @@ -31,7 +31,7 @@ .\" .\" $FreeBSD$ .\" -.Dd September 20, 2018 +.Dd January 30, 2019 .Dt EM 4 .Os .Sh NAME @@ -39,9 +39,10 @@ .Nd "Intel(R) PRO/1000 Gigabit Ethernet adapter driver" .Sh SYNOPSIS To compile this driver into the kernel, -place the following line in your +place the following lines in your kernel configuration file: .Bd -ragged -offset indent +.Cd "device iflib" .Cd "device em" .Ed .Pp diff --git a/share/man/man4/iavf.4 b/share/man/man4/iavf.4 index 0531c89ed8b3..c870284f2836 100644 --- a/share/man/man4/iavf.4 +++ b/share/man/man4/iavf.4 @@ -31,7 +31,7 @@ .\" .\" $FreeBSD$ .\" -.Dd October 5, 2018 +.Dd January 30, 2019 .Dt IAVF 4 .Os .Sh NAME @@ -41,6 +41,7 @@ To compile this driver into the kernel, place the following lines in your kernel configuration file: .Bd -ragged -offset indent +.Cd "device iflib" .Cd "device iavf" .Ed .Pp diff --git a/share/man/man4/ixgbe.4 b/share/man/man4/ixgbe.4 index 99c1cc7f8e27..f51ac8dbf262 100644 --- a/share/man/man4/ixgbe.4 +++ b/share/man/man4/ixgbe.4 @@ -31,7 +31,7 @@ .\" .\" $FreeBSD$ .\" -.Dd December 19, 2010 +.Dd January 30, 2019 .Dt IXGBE 4 .Os .Sh NAME @@ -39,9 +39,10 @@ .Nd "Intel(R) 10Gb Ethernet driver for the FreeBSD operating system" .Sh SYNOPSIS To compile this driver into the kernel, -place the following line in your +place the following lines in your kernel configuration file: .Bd -ragged -offset indent +.Cd "device iflib" .Cd "device ixgbe" .Ed .Pp diff --git a/share/man/man4/ixl.4 b/share/man/man4/ixl.4 index d98ffd365c96..c8e674332f42 100644 --- a/share/man/man4/ixl.4 +++ b/share/man/man4/ixl.4 @@ -31,7 +31,7 @@ .\" .\" $FreeBSD$ .\" -.Dd October 5, 2018 +.Dd January 30, 2019 .Dt IXL 4 .Os .Sh NAME @@ -41,6 +41,7 @@ To compile this driver into the kernel, place the following lines in your kernel configuration file: .Bd -ragged -offset indent +.Cd "device iflib" .Cd "device ixl" .Ed .Pp diff --git a/share/man/man4/vmx.4 b/share/man/man4/vmx.4 index 974c3f840a90..09795c3e7fbc 100644 --- a/share/man/man4/vmx.4 +++ b/share/man/man4/vmx.4 @@ -17,7 +17,7 @@ .\" .\" $FreeBSD$ .\" -.Dd March 17, 2014 +.Dd January 30, 2019 .Dt VMX 4 .Os .Sh NAME @@ -25,9 +25,10 @@ .Nd VMware VMXNET3 Virtual Interface Controller device .Sh SYNOPSIS To compile this driver into the kernel, -place the following line in your +place the following lines in your kernel configuration file: .Bd -ragged -offset indent +.Cd "device iflib" .Cd "device vmx" .Ed .Pp diff --git a/sys/amd64/conf/GENERIC b/sys/amd64/conf/GENERIC index 2a6d980d4ffe..9a8532ba6f03 100644 --- a/sys/amd64/conf/GENERIC +++ b/sys/amd64/conf/GENERIC @@ -234,14 +234,18 @@ device ppi # Parallel port interface device device puc # Multi I/O cards and multi-channel UARTs -# PCI Ethernet NICs. -device bxe # Broadcom NetXtreme II BCM5771X/BCM578XX 10GbE -device de # DEC/Intel DC21x4x (``Tulip'') +# PCI/PCI-X/PCIe Ethernet NICs that use iflib infrastructure +device iflib device em # Intel PRO/1000 Gigabit Ethernet Family device ix # Intel PRO/10GbE PCIE PF Ethernet device ixv # Intel PRO/10GbE PCIE VF Ethernet device ixl # Intel 700 Series Physical Function device iavf # Intel Adaptive Virtual Function +device vmx # VMware VMXNET3 Ethernet + +# PCI Ethernet NICs. +device bxe # Broadcom NetXtreme II BCM5771X/BCM578XX 10GbE +device de # DEC/Intel DC21x4x (``Tulip'') device le # AMD Am7900 LANCE and Am79C9xx PCnet device ti # Alteon Networks Tigon I/II gigabit Ethernet device txp # 3Com 3cR990 (``Typhoon'') @@ -369,9 +373,6 @@ device hyperv # HyperV drivers options XENHVM # Xen HVM kernel infrastructure device xenpci # Xen HVM Hypervisor services driver -# VMware support -device vmx # VMware VMXNET3 Ethernet - # Netmap provides direct access to TX/RX rings on supported NICs device netmap # netmap(4) support diff --git a/sys/arm64/conf/GENERIC b/sys/arm64/conf/GENERIC index 5178455c364c..641ad1cac97e 100644 --- a/sys/arm64/conf/GENERIC +++ b/sys/arm64/conf/GENERIC @@ -145,14 +145,17 @@ device al_pci # Annapurna Alpine PCI-E options PCI_HP # PCI-Express native HotPlug options PCI_IOV # PCI SR-IOV support +# PCI/PCI-X/PCIe Ethernet NICs that use iflib infrastructure +device iflib +device em # Intel PRO/1000 Gigabit Ethernet Family +device ix # Intel 10Gb Ethernet Family + # Ethernet NICs device mdio device mii device miibus # MII bus support device awg # Allwinner EMAC Gigabit Ethernet device axgbe # AMD Opteron A1100 integrated NIC -device em # Intel PRO/1000 Gigabit Ethernet Family -device ix # Intel 10Gb Ethernet Family device msk # Marvell/SysKonnect Yukon II Gigabit Ethernet device neta # Marvell Armada 370/38x/XP/3700 NIC device smc # SMSC LAN91C111 diff --git a/sys/conf/NOTES b/sys/conf/NOTES index 9a43bd26164d..97ec484ae0d9 100644 --- a/sys/conf/NOTES +++ b/sys/conf/NOTES @@ -2092,15 +2092,18 @@ device vte # DM&P Vortex86 RDC R6040 Fast Ethernet device wb # Winbond W89C840F device xl # 3Com 3c90x (``Boomerang'', ``Cyclone'') +# PCI/PCI-X/PCIe Ethernet NICs that use iflib infrastructure +device iflib +device em # Intel Pro/1000 Gigabit Ethernet +device ix # Intel Pro/10Gbe PCIE Ethernet +device ixv # Intel Pro/10Gbe PCIE Ethernet VF + # PCI Ethernet NICs. device cxgb # Chelsio T3 10 Gigabit Ethernet device cxgb_t3fw # Chelsio T3 10 Gigabit Ethernet firmware device cxgbe # Chelsio T4-T6 1/10/25/40/100 Gigabit Ethernet device cxgbev # Chelsio T4-T6 Virtual Functions device de # DEC/Intel DC21x4x (``Tulip'') -device em # Intel Pro/1000 Gigabit Ethernet -device ix # Intel Pro/10Gbe PCIE Ethernet -device ixv # Intel Pro/10Gbe PCIE Ethernet VF device le # AMD Am7900 LANCE and Am79C9xx PCnet device mxge # Myricom Myri-10G 10GbE NIC device oce # Emulex 10 GbE (OneConnect Ethernet) diff --git a/sys/conf/files b/sys/conf/files index 92a3068664a7..c7c15cd3738c 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -4132,10 +4132,10 @@ net/if_tun.c optional tun net/if_tap.c optional tap net/if_vlan.c optional vlan net/if_vxlan.c optional vxlan inet | vxlan inet6 -net/ifdi_if.m optional ether pci -net/iflib.c optional ether pci -net/iflib_clone.c optional ether pci -net/mp_ring.c optional ether +net/ifdi_if.m optional ether pci iflib +net/iflib.c optional ether pci iflib +net/iflib_clone.c optional ether pci iflib +net/mp_ring.c optional ether iflib net/mppcc.c optional netgraph_mppc_compression net/mppcd.c optional netgraph_mppc_compression net/netisr.c standard diff --git a/sys/dev/ixgbe/if_ixv.c b/sys/dev/ixgbe/if_ixv.c index 2409a4e33992..b81ffe8f2ec7 100644 --- a/sys/dev/ixgbe/if_ixv.c +++ b/sys/dev/ixgbe/if_ixv.c @@ -144,11 +144,9 @@ static driver_t ixv_driver = { devclass_t ixv_devclass; DRIVER_MODULE(ixv, pci, ixv_driver, ixv_devclass, 0, 0); IFLIB_PNP_INFO(pci, ixv_driver, ixv_vendor_info_array); +MODULE_DEPEND(ixv, iflib, 1, 1, 1); MODULE_DEPEND(ixv, pci, 1, 1, 1); MODULE_DEPEND(ixv, ether, 1, 1, 1); -#ifdef DEV_NETMAP -MODULE_DEPEND(ixv, netmap, 1, 1, 1); -#endif /* DEV_NETMAP */ static device_method_t ixv_if_methods[] = { DEVMETHOD(ifdi_attach_pre, ixv_if_attach_pre), diff --git a/sys/i386/conf/GENERIC b/sys/i386/conf/GENERIC index f352b9eb5d42..0536fd43c621 100644 --- a/sys/i386/conf/GENERIC +++ b/sys/i386/conf/GENERIC @@ -220,10 +220,14 @@ device ppi # Parallel port interface device device puc # Multi I/O cards and multi-channel UARTs +# PCI/PCI-X/PCIe Ethernet NICs that use iflib infrastructure +device iflib +device em # Intel PRO/1000 Gigabit Ethernet Family +device vmx # VMware VMXNET3 Ethernet + # PCI Ethernet NICs. device bxe # Broadcom NetXtreme II BCM5771X/BCM578XX 10GbE device de # DEC/Intel DC21x4x (``Tulip'') -device em # Intel PRO/1000 Gigabit Ethernet Family device le # AMD Am7900 LANCE and Am79C9xx PCnet device ti # Alteon Networks Tigon I/II gigabit Ethernet device txp # 3Com 3cR990 (``Typhoon'') @@ -362,9 +366,6 @@ device hyperv # HyperV drivers options XENHVM # Xen HVM kernel infrastructure device xenpci # Xen HVM Hypervisor services driver -# VMware support -device vmx # VMware VMXNET3 Ethernet - # evdev interface options EVDEV_SUPPORT # evdev support in legacy drivers device evdev # input event device support diff --git a/sys/mips/conf/OCTEON1 b/sys/mips/conf/OCTEON1 index 4a61198952b0..89bcf2cc7ff1 100644 --- a/sys/mips/conf/OCTEON1 +++ b/sys/mips/conf/OCTEON1 @@ -155,6 +155,8 @@ device octm # physical port, but may eventually provide support for DSA or similar instead. #device mv88e61xxphy # Marvell 88E61XX +device iflib + # PCI Ethernet NICs. device em # Intel PRO/1000 Gigabit Ethernet Family device ix # Intel PRO/10GbE PF PCIE Ethernet Family diff --git a/sys/mips/conf/std.XLP b/sys/mips/conf/std.XLP index 753c18b0c8e3..979b8ad5501d 100644 --- a/sys/mips/conf/std.XLP +++ b/sys/mips/conf/std.XLP @@ -75,6 +75,7 @@ device ether device xlpge #device re device msk +device iflib device em # Disks diff --git a/sys/modules/Makefile b/sys/modules/Makefile index e94995825a86..d34d260e9fa8 100644 --- a/sys/modules/Makefile +++ b/sys/modules/Makefile @@ -169,6 +169,7 @@ SUBDIR= \ if_tun \ if_vlan \ if_vxlan \ + iflib \ ${_iir} \ imgact_binmisc \ ${_intelspi} \ diff --git a/sys/modules/iflib/Makefile b/sys/modules/iflib/Makefile new file mode 100644 index 000000000000..9cae25a6a0a9 --- /dev/null +++ b/sys/modules/iflib/Makefile @@ -0,0 +1,13 @@ +# $FreeBSD$ + +.PATH: ${SRCTOP}/sys/net + +KMOD= iflib +SRCS= \ + iflib.c \ + iflib_clone.c \ + mp_ring.c +SRCS+= ifdi_if.c +SRCS+= device_if.h bus_if.h pci_if.h ifdi_if.h + +.include diff --git a/sys/powerpc/conf/GENERIC64 b/sys/powerpc/conf/GENERIC64 index 34a2f3636f85..05447f451e13 100644 --- a/sys/powerpc/conf/GENERIC64 +++ b/sys/powerpc/conf/GENERIC64 @@ -159,6 +159,8 @@ device scc device uart device uart_z8530 +device iflib + # Ethernet hardware device em # Intel PRO/1000 Gigabit Ethernet Family device ix # Intel PRO/10GbE PCIE PF Ethernet Family diff --git a/sys/powerpc/conf/MPC85XX b/sys/powerpc/conf/MPC85XX index cdf48ddbb6f4..402107ba9868 100644 --- a/sys/powerpc/conf/MPC85XX +++ b/sys/powerpc/conf/MPC85XX @@ -74,6 +74,7 @@ device cryptodev device da device ds1307 device ds1553 +device iflib device em device alc device ether diff --git a/sys/powerpc/conf/MPC85XXSPE b/sys/powerpc/conf/MPC85XXSPE index 5d7dc3d5fd4e..08b1eee4342f 100644 --- a/sys/powerpc/conf/MPC85XXSPE +++ b/sys/powerpc/conf/MPC85XXSPE @@ -74,6 +74,7 @@ device cryptodev device da device ds1307 device ds1553 +device iflib device em device alc device ether diff --git a/sys/powerpc/conf/QORIQ64 b/sys/powerpc/conf/QORIQ64 index 57cd4437aff7..4493c30d5afa 100644 --- a/sys/powerpc/conf/QORIQ64 +++ b/sys/powerpc/conf/QORIQ64 @@ -81,6 +81,7 @@ device cryptodev device da device ds1307 device ds1553 +device iflib device em device alc device dpaa diff --git a/sys/powerpc/conf/dpaa/DPAA b/sys/powerpc/conf/dpaa/DPAA index d7e784731cbc..2cbc908ec120 100644 --- a/sys/powerpc/conf/dpaa/DPAA +++ b/sys/powerpc/conf/dpaa/DPAA @@ -74,6 +74,7 @@ device sdhci # Network devices device miibus # MII bus support +device iflib device em diff --git a/sys/sparc64/conf/GENERIC b/sys/sparc64/conf/GENERIC index 92d76773d17e..cb768e39a55c 100644 --- a/sys/sparc64/conf/GENERIC +++ b/sys/sparc64/conf/GENERIC @@ -181,6 +181,8 @@ device uart # Multi-uart driver #device ppi # Parallel port interface device #device vpo # Requires scbus and da +device iflib + # PCI Ethernet NICs. #device de # DEC/Intel DC21x4x (``Tulip'') device em # Intel PRO/1000 adapter Gigabit Ethernet Card From f8d49128a955b85b0e2f8775f97748e6044cc524 Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Thu, 31 Jan 2019 20:04:18 +0000 Subject: [PATCH 38/90] Make iflib a loadable module: add seemingly missed header. Reported by: CI (i.e. it is not reproducable in my local builds) Sponsored by: The FreeBSD Foundation MFC after: 2 weeks --- sys/modules/iflib/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/modules/iflib/Makefile b/sys/modules/iflib/Makefile index 9cae25a6a0a9..c8311dd19e3c 100644 --- a/sys/modules/iflib/Makefile +++ b/sys/modules/iflib/Makefile @@ -8,6 +8,6 @@ SRCS= \ iflib_clone.c \ mp_ring.c SRCS+= ifdi_if.c -SRCS+= device_if.h bus_if.h pci_if.h ifdi_if.h +SRCS+= device_if.h bus_if.h pci_if.h pci_iov_if.h ifdi_if.h .include From f712b16127bdd9ff7c8f4d6c0d6a8b31fbbe32d5 Mon Sep 17 00:00:00 2001 From: Gleb Smirnoff Date: Thu, 31 Jan 2019 21:04:50 +0000 Subject: [PATCH 39/90] Revert r316461: Remove "IPFW static rules" rmlock, and use pfil's global lock. The pfil(9) system is about to be converted to epoch(9) synchronization, so we need [temporarily] go back with ipfw internal locking. Discussed with: ae --- sys/net/pfil.c | 1 + sys/net/pfil.h | 4 ---- sys/netpfil/ipfw/ip_fw2.c | 1 + sys/netpfil/ipfw/ip_fw_dynamic.c | 1 - sys/netpfil/ipfw/ip_fw_iface.c | 1 - sys/netpfil/ipfw/ip_fw_nat.c | 1 - sys/netpfil/ipfw/ip_fw_private.h | 20 ++++++++++++-------- sys/netpfil/ipfw/ip_fw_sockopt.c | 1 - sys/netpfil/ipfw/ip_fw_table.c | 1 - sys/netpfil/ipfw/ip_fw_table_value.c | 1 - 10 files changed, 14 insertions(+), 18 deletions(-) diff --git a/sys/net/pfil.c b/sys/net/pfil.c index 19b930a32e35..96069123a935 100644 --- a/sys/net/pfil.c +++ b/sys/net/pfil.c @@ -64,6 +64,7 @@ LIST_HEAD(pfilheadhead, pfil_head); VNET_DEFINE(struct pfilheadhead, pfil_head_list); #define V_pfil_head_list VNET(pfil_head_list) VNET_DEFINE(struct rmlock, pfil_lock); +#define V_pfil_lock VNET(pfil_lock) #define PFIL_LOCK_INIT_REAL(l, t) \ rm_init_flags(l, "PFil " t " rmlock", RM_RECURSE) diff --git a/sys/net/pfil.h b/sys/net/pfil.h index 8fdaf5a69119..bfe108a1f1fe 100644 --- a/sys/net/pfil.h +++ b/sys/net/pfil.h @@ -40,7 +40,6 @@ #include #include #include -#include struct mbuf; struct ifnet; @@ -101,9 +100,6 @@ struct pfil_head { LIST_ENTRY(pfil_head) ph_list; }; -VNET_DECLARE(struct rmlock, pfil_lock); -#define V_pfil_lock VNET(pfil_lock) - /* Public functions for pfil hook management by packet filters. */ struct pfil_head *pfil_head_get(int, u_long); int pfil_add_hook_flags(pfil_func_flags_t, void *, int, struct pfil_head *); diff --git a/sys/netpfil/ipfw/ip_fw2.c b/sys/netpfil/ipfw/ip_fw2.c index 5c91b76c2fd6..833f6b6ecebf 100644 --- a/sys/netpfil/ipfw/ip_fw2.c +++ b/sys/netpfil/ipfw/ip_fw2.c @@ -1404,6 +1404,7 @@ ipfw_chk(struct ip_fw_args *args) int is_ipv4 = 0; int done = 0; /* flag to exit the outer loop */ + IPFW_RLOCK_TRACKER; if (m->m_flags & M_SKIP_FIREWALL || (! V_ipfw_vnet_ready)) return (IP_FW_PASS); /* accept */ diff --git a/sys/netpfil/ipfw/ip_fw_dynamic.c b/sys/netpfil/ipfw/ip_fw_dynamic.c index d48af280f105..473db72454d4 100644 --- a/sys/netpfil/ipfw/ip_fw_dynamic.c +++ b/sys/netpfil/ipfw/ip_fw_dynamic.c @@ -53,7 +53,6 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include #include #include diff --git a/sys/netpfil/ipfw/ip_fw_iface.c b/sys/netpfil/ipfw/ip_fw_iface.c index beb3b9115aac..b2aa7d3205db 100644 --- a/sys/netpfil/ipfw/ip_fw_iface.c +++ b/sys/netpfil/ipfw/ip_fw_iface.c @@ -50,7 +50,6 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include #include #include diff --git a/sys/netpfil/ipfw/ip_fw_nat.c b/sys/netpfil/ipfw/ip_fw_nat.c index 7cd1d33d5910..9702d9dadaae 100644 --- a/sys/netpfil/ipfw/ip_fw_nat.c +++ b/sys/netpfil/ipfw/ip_fw_nat.c @@ -45,7 +45,6 @@ __FBSDID("$FreeBSD$"); #include #include -#include #include #include #include diff --git a/sys/netpfil/ipfw/ip_fw_private.h b/sys/netpfil/ipfw/ip_fw_private.h index dcd38eb8e8eb..e0e24122168b 100644 --- a/sys/netpfil/ipfw/ip_fw_private.h +++ b/sys/netpfil/ipfw/ip_fw_private.h @@ -312,6 +312,8 @@ struct ip_fw_chain { void **srvstate; /* runtime service mappings */ #if defined( __linux__ ) || defined( _WIN32 ) spinlock_t rwmtx; +#else + struct rmlock rwmtx; #endif int static_len; /* total len of static rules (v0) */ uint32_t gencnt; /* NAT generation count */ @@ -452,23 +454,25 @@ struct ipfw_ifc { #define IPFW_PF_RUNLOCK(p) IPFW_RUNLOCK(p) #else /* FreeBSD */ #define IPFW_LOCK_INIT(_chain) do { \ + rm_init_flags(&(_chain)->rwmtx, "IPFW static rules", RM_RECURSE); \ rw_init(&(_chain)->uh_lock, "IPFW UH lock"); \ } while (0) #define IPFW_LOCK_DESTROY(_chain) do { \ + rm_destroy(&(_chain)->rwmtx); \ rw_destroy(&(_chain)->uh_lock); \ } while (0) -#define IPFW_RLOCK_ASSERT(_chain) rm_assert(&V_pfil_lock, RA_RLOCKED) -#define IPFW_WLOCK_ASSERT(_chain) rm_assert(&V_pfil_lock, RA_WLOCKED) +#define IPFW_RLOCK_ASSERT(_chain) rm_assert(&(_chain)->rwmtx, RA_RLOCKED) +#define IPFW_WLOCK_ASSERT(_chain) rm_assert(&(_chain)->rwmtx, RA_WLOCKED) #define IPFW_RLOCK_TRACKER struct rm_priotracker _tracker -#define IPFW_RLOCK(p) rm_rlock(&V_pfil_lock, &_tracker) -#define IPFW_RUNLOCK(p) rm_runlock(&V_pfil_lock, &_tracker) -#define IPFW_WLOCK(p) rm_wlock(&V_pfil_lock) -#define IPFW_WUNLOCK(p) rm_wunlock(&V_pfil_lock) -#define IPFW_PF_RLOCK(p) -#define IPFW_PF_RUNLOCK(p) +#define IPFW_RLOCK(p) rm_rlock(&(p)->rwmtx, &_tracker) +#define IPFW_RUNLOCK(p) rm_runlock(&(p)->rwmtx, &_tracker) +#define IPFW_WLOCK(p) rm_wlock(&(p)->rwmtx) +#define IPFW_WUNLOCK(p) rm_wunlock(&(p)->rwmtx) +#define IPFW_PF_RLOCK(p) IPFW_RLOCK(p) +#define IPFW_PF_RUNLOCK(p) IPFW_RUNLOCK(p) #endif #define IPFW_UH_RLOCK_ASSERT(_chain) rw_assert(&(_chain)->uh_lock, RA_RLOCKED) diff --git a/sys/netpfil/ipfw/ip_fw_sockopt.c b/sys/netpfil/ipfw/ip_fw_sockopt.c index edbd96a91283..a83e75447633 100644 --- a/sys/netpfil/ipfw/ip_fw_sockopt.c +++ b/sys/netpfil/ipfw/ip_fw_sockopt.c @@ -60,7 +60,6 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include #include #include #include diff --git a/sys/netpfil/ipfw/ip_fw_table.c b/sys/netpfil/ipfw/ip_fw_table.c index 67593aa9e2fc..7d34977c224f 100644 --- a/sys/netpfil/ipfw/ip_fw_table.c +++ b/sys/netpfil/ipfw/ip_fw_table.c @@ -55,7 +55,6 @@ __FBSDID("$FreeBSD$"); #include #include #include /* ip_fw.h requires IFNAMSIZ */ -#include #include #include /* struct ipfw_rule_ref */ diff --git a/sys/netpfil/ipfw/ip_fw_table_value.c b/sys/netpfil/ipfw/ip_fw_table_value.c index d60fc34f39ea..e92a5a979fb8 100644 --- a/sys/netpfil/ipfw/ip_fw_table_value.c +++ b/sys/netpfil/ipfw/ip_fw_table_value.c @@ -50,7 +50,6 @@ __FBSDID("$FreeBSD$"); #include #include #include /* ip_fw.h requires IFNAMSIZ */ -#include #include #include /* struct ipfw_rule_ref */ From 829c56fc089d9e43580bf862d41dc1680d610fdc Mon Sep 17 00:00:00 2001 From: John Baldwin Date: Thu, 31 Jan 2019 21:35:37 +0000 Subject: [PATCH 40/90] Don't set IFCAP_TXRTLMT during lagg_clone_create(). lagg_capabilities() will set the capability once interfaces supporting the feature are added to the lagg. Setting it on a lagg without any interfaces is pointless as the if_snd_tag_alloc call will always fail in that case. Reviewed by: hselasky, gallatin MFC after: 2 weeks Sponsored by: Netflix Differential Revision: https://reviews.freebsd.org/D19040 --- sys/net/if_lagg.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/sys/net/if_lagg.c b/sys/net/if_lagg.c index a1e7908f55ed..3bea2e95d69b 100644 --- a/sys/net/if_lagg.c +++ b/sys/net/if_lagg.c @@ -514,10 +514,8 @@ lagg_clone_create(struct if_clone *ifc, int unit, caddr_t params) ifp->if_flags = IFF_SIMPLEX | IFF_BROADCAST | IFF_MULTICAST; #ifdef RATELIMIT ifp->if_snd_tag_alloc = lagg_snd_tag_alloc; - ifp->if_capenable = ifp->if_capabilities = IFCAP_HWSTATS | IFCAP_TXRTLMT; -#else - ifp->if_capenable = ifp->if_capabilities = IFCAP_HWSTATS; #endif + ifp->if_capenable = ifp->if_capabilities = IFCAP_HWSTATS; /* * Attach as an ordinary ethernet device, children will be attached From b2c1e8e6204924586f74d21d19c16b55301c85cc Mon Sep 17 00:00:00 2001 From: Eric Joyner Date: Thu, 31 Jan 2019 21:44:33 +0000 Subject: [PATCH 41/90] ix(4): Run {mod,msf,mbx,fdir,phy}_task in if_update_admin_status From Piotr: This patch introduces adapter->task_requests register responsible for recording requests for mod_task, msf_task, mbx_task, fdir_task and phy_task calls. Instead of enqueueing these tasks with GROUPTASK_ENQUEUE, handlers will be called directly from ixgbe_if_update_admin_status() while holding ctx lock. SIOCGIFXMEDIA ioctl() call reads adapter->media list. The list is deleted and rewritten in ixgbe_handle_msf() task without holding ctx lock. This change is needed to maintain data coherency when sharing adapter info via ioctl() calls. Patch co-authored by Krzysztof Galazka . PR: 221317 Submitted by: Piotr Pietruszewski Reviewed by: sbruno@, IntelNetworking Sponsored by: Intel Corporation Differential Revision: https://reviews.freebsd.org/D18468 --- sys/dev/ixgbe/if_ix.c | 104 ++++++++++++++++++------------------- sys/dev/ixgbe/ixgbe.h | 7 +-- sys/dev/ixgbe/ixgbe_type.h | 7 +++ 3 files changed, 59 insertions(+), 59 deletions(-) diff --git a/sys/dev/ixgbe/if_ix.c b/sys/dev/ixgbe/if_ix.c index ed93bf33661a..7f7902e29fdc 100644 --- a/sys/dev/ixgbe/if_ix.c +++ b/sys/dev/ixgbe/if_ix.c @@ -120,6 +120,7 @@ static int ixgbe_if_resume(if_ctx_t ctx); static void ixgbe_if_stop(if_ctx_t ctx); void ixgbe_if_enable_intr(if_ctx_t ctx); static void ixgbe_if_disable_intr(if_ctx_t ctx); +static void ixgbe_link_intr_enable(if_ctx_t ctx); static int ixgbe_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t qid); static void ixgbe_if_media_status(if_ctx_t ctx, struct ifmediareq * ifmr); static int ixgbe_if_media_change(if_ctx_t ctx); @@ -173,7 +174,7 @@ static void ixgbe_init_device_features(struct adapter *adapter); static void ixgbe_check_fan_failure(struct adapter *, u32, bool); static void ixgbe_add_media_types(if_ctx_t ctx); static void ixgbe_update_stats_counters(struct adapter *adapter); -static void ixgbe_config_link(struct adapter *adapter); +static void ixgbe_config_link(if_ctx_t ctx); static void ixgbe_get_slot_info(struct adapter *); static void ixgbe_check_wol_support(struct adapter *adapter); static void ixgbe_enable_rx_drop(struct adapter *); @@ -254,6 +255,7 @@ static device_method_t ixgbe_if_methods[] = { DEVMETHOD(ifdi_msix_intr_assign, ixgbe_if_msix_intr_assign), DEVMETHOD(ifdi_intr_enable, ixgbe_if_enable_intr), DEVMETHOD(ifdi_intr_disable, ixgbe_if_disable_intr), + DEVMETHOD(ifdi_link_intr_enable, ixgbe_link_intr_enable), DEVMETHOD(ifdi_tx_queue_intr_enable, ixgbe_if_rx_queue_intr_enable), DEVMETHOD(ifdi_rx_queue_intr_enable, ixgbe_if_rx_queue_intr_enable), DEVMETHOD(ifdi_tx_queues_alloc, ixgbe_if_tx_queues_alloc), @@ -446,19 +448,6 @@ ixgbe_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, } - iflib_config_gtask_init(ctx, &adapter->mod_task, ixgbe_handle_mod, - "mod_task"); - iflib_config_gtask_init(ctx, &adapter->msf_task, ixgbe_handle_msf, - "msf_task"); - iflib_config_gtask_init(ctx, &adapter->phy_task, ixgbe_handle_phy, - "phy_task"); - if (adapter->feat_cap & IXGBE_FEATURE_SRIOV) - iflib_config_gtask_init(ctx, &adapter->mbx_task, - ixgbe_handle_mbx, "mbx_task"); - if (adapter->feat_en & IXGBE_FEATURE_FDIR) - iflib_config_gtask_init(ctx, &adapter->fdir_task, - ixgbe_reinit_fdir, "fdir_task"); - device_printf(iflib_get_dev(ctx), "allocated for %d queues\n", adapter->num_tx_queues); @@ -1362,8 +1351,9 @@ ixgbe_is_sfp(struct ixgbe_hw *hw) * ixgbe_config_link ************************************************************************/ static void -ixgbe_config_link(struct adapter *adapter) +ixgbe_config_link(if_ctx_t ctx) { + struct adapter *adapter = iflib_get_softc(ctx); struct ixgbe_hw *hw = &adapter->hw; u32 autoneg, err = 0; bool sfp, negotiate; @@ -1371,7 +1361,8 @@ ixgbe_config_link(struct adapter *adapter) sfp = ixgbe_is_sfp(hw); if (sfp) { - GROUPTASK_ENQUEUE(&adapter->mod_task); + adapter->task_requests |= IXGBE_REQUEST_TASK_MOD; + iflib_admin_intr_deferred(ctx); } else { if (hw->mac.ops.check_link) err = ixgbe_check_link(hw, &adapter->link_speed, @@ -1388,7 +1379,6 @@ ixgbe_config_link(struct adapter *adapter) err = hw->mac.ops.setup_link(hw, autoneg, adapter->link_up); } - } /* ixgbe_config_link */ /************************************************************************ @@ -2096,8 +2086,6 @@ ixgbe_if_media_status(if_ctx_t ctx, struct ifmediareq * ifmr) INIT_DEBUGOUT("ixgbe_if_media_status: begin"); - iflib_admin_intr_deferred(ctx); - ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; @@ -2386,7 +2374,7 @@ ixgbe_msix_link(void *arg) /* Link status change */ if (eicr & IXGBE_EICR_LSC) { IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_EIMC_LSC); - iflib_admin_intr_deferred(adapter->ctx); + adapter->task_requests |= IXGBE_REQUEST_TASK_LSC; } if (adapter->hw.mac.type != ixgbe_mac_82598EB) { @@ -2397,7 +2385,7 @@ ixgbe_msix_link(void *arg) return (FILTER_HANDLED); /* Disable the interrupt */ IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_EICR_FLOW_DIR); - GROUPTASK_ENQUEUE(&adapter->fdir_task); + adapter->task_requests |= IXGBE_REQUEST_TASK_FDIR; } else if (eicr & IXGBE_EICR_ECC) { device_printf(iflib_get_dev(adapter->ctx), @@ -2441,7 +2429,7 @@ ixgbe_msix_link(void *arg) /* Check for VF message */ if ((adapter->feat_en & IXGBE_FEATURE_SRIOV) && (eicr & IXGBE_EICR_MAILBOX)) - GROUPTASK_ENQUEUE(&adapter->mbx_task); + adapter->task_requests |= IXGBE_REQUEST_TASK_MBX; } if (ixgbe_is_sfp(hw)) { @@ -2453,16 +2441,14 @@ ixgbe_msix_link(void *arg) if (eicr & eicr_mask) { IXGBE_WRITE_REG(hw, IXGBE_EICR, eicr_mask); - if (atomic_cmpset_acq_int(&adapter->sfp_reinit, 0, 1)) - GROUPTASK_ENQUEUE(&adapter->mod_task); + adapter->task_requests |= IXGBE_REQUEST_TASK_MOD; } if ((hw->mac.type == ixgbe_mac_82599EB) && (eicr & IXGBE_EICR_GPI_SDP1_BY_MAC(hw))) { IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1_BY_MAC(hw)); - if (atomic_cmpset_acq_int(&adapter->sfp_reinit, 0, 1)) - GROUPTASK_ENQUEUE(&adapter->msf_task); + adapter->task_requests |= IXGBE_REQUEST_TASK_MSF; } } @@ -2476,13 +2462,10 @@ ixgbe_msix_link(void *arg) if ((hw->phy.type == ixgbe_phy_x550em_ext_t) && (eicr & IXGBE_EICR_GPI_SDP0_X540)) { IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP0_X540); - GROUPTASK_ENQUEUE(&adapter->phy_task); + adapter->task_requests |= IXGBE_REQUEST_TASK_PHY; } - /* Re-enable other interrupts */ - IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EIMS_OTHER); - - return (FILTER_HANDLED); + return (adapter->task_requests != 0) ? FILTER_SCHEDULE_THREAD : FILTER_HANDLED; } /* ixgbe_msix_link */ /************************************************************************ @@ -2646,12 +2629,6 @@ ixgbe_if_detach(if_ctx_t ctx) return (EBUSY); } - iflib_config_gtask_deinit(&adapter->mod_task); - iflib_config_gtask_deinit(&adapter->msf_task); - iflib_config_gtask_deinit(&adapter->phy_task); - if (adapter->feat_cap & IXGBE_FEATURE_SRIOV) - iflib_config_gtask_deinit(&adapter->mbx_task); - ixgbe_setup_low_power_mode(ctx); /* let hardware know driver is unloading */ @@ -2910,6 +2887,12 @@ ixgbe_if_init(if_ctx_t ctx) /* Configure RX settings */ ixgbe_initialize_receive_units(ctx); + /* + * Initialize variable holding task enqueue requests + * from MSI-X interrupts + */ + adapter->task_requests = 0; + /* Enable SDP & MSI-X interrupts based on adapter */ ixgbe_config_gpie(adapter); @@ -3011,7 +2994,7 @@ ixgbe_if_init(if_ctx_t ctx) ixgbe_set_phy_power(hw, TRUE); /* Config/Enable Link */ - ixgbe_config_link(adapter); + ixgbe_config_link(ctx); /* Hardware Packet Buffer & Flow Control setup */ ixgbe_config_delay_values(adapter); @@ -3374,7 +3357,6 @@ ixgbe_handle_mod(void *context) device_t dev = iflib_get_dev(ctx); u32 err, cage_full = 0; - adapter->sfp_reinit = 1; if (adapter->hw.need_crosstalk_fix) { switch (hw->mac.type) { case ixgbe_mac_82599EB: @@ -3411,11 +3393,11 @@ ixgbe_handle_mod(void *context) "Setup failure - unsupported SFP+ module type.\n"); goto handle_mod_out; } - GROUPTASK_ENQUEUE(&adapter->msf_task); + adapter->task_requests |= IXGBE_REQUEST_TASK_MSF; return; handle_mod_out: - adapter->sfp_reinit = 0; + adapter->task_requests &= ~(IXGBE_REQUEST_TASK_MSF); } /* ixgbe_handle_mod */ @@ -3431,9 +3413,6 @@ ixgbe_handle_msf(void *context) u32 autoneg; bool negotiate; - if (adapter->sfp_reinit != 1) - return; - /* get_supported_phy_layer will call hw->phy.ops.identify_sfp() */ adapter->phy_layer = ixgbe_get_supported_physical_layer(hw); @@ -3447,8 +3426,6 @@ ixgbe_handle_msf(void *context) ifmedia_removeall(adapter->media); ixgbe_add_media_types(adapter->ctx); ifmedia_set(adapter->media, IFM_ETHER | IFM_AUTO); - - adapter->sfp_reinit = 0; } /* ixgbe_handle_msf */ /************************************************************************ @@ -3543,10 +3520,20 @@ ixgbe_if_update_admin_status(if_ctx_t ctx) } } - ixgbe_update_stats_counters(adapter); + /* Handle task requests from msix_link() */ + if (adapter->task_requests & IXGBE_REQUEST_TASK_MOD) + ixgbe_handle_mod(ctx); + if (adapter->task_requests & IXGBE_REQUEST_TASK_MSF) + ixgbe_handle_msf(ctx); + if (adapter->task_requests & IXGBE_REQUEST_TASK_MBX) + ixgbe_handle_mbx(ctx); + if (adapter->task_requests & IXGBE_REQUEST_TASK_FDIR) + ixgbe_reinit_fdir(ctx); + if (adapter->task_requests & IXGBE_REQUEST_TASK_PHY) + ixgbe_handle_phy(ctx); + adapter->task_requests = 0; - /* Re-enable link interrupts */ - IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, IXGBE_EIMS_LSC); + ixgbe_update_stats_counters(adapter); } /* ixgbe_if_update_admin_status */ /************************************************************************ @@ -3681,6 +3668,18 @@ ixgbe_if_disable_intr(if_ctx_t ctx) } /* ixgbe_if_disable_intr */ +/************************************************************************ + * ixgbe_link_intr_enable + ************************************************************************/ +static void +ixgbe_link_intr_enable(if_ctx_t ctx) +{ + struct ixgbe_hw *hw = &((struct adapter *)iflib_get_softc(ctx))->hw; + + /* Re-enable other interrupts */ + IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EIMS_OTHER | IXGBE_EIMS_LSC); +} /* ixgbe_link_intr_enable */ + /************************************************************************ * ixgbe_if_rx_queue_intr_enable ************************************************************************/ @@ -3784,22 +3783,21 @@ ixgbe_intr(void *arg) if (eicr & eicr_mask) { IXGBE_WRITE_REG(hw, IXGBE_EICR, eicr_mask); - GROUPTASK_ENQUEUE(&adapter->mod_task); + adapter->task_requests |= IXGBE_REQUEST_TASK_MOD; } if ((hw->mac.type == ixgbe_mac_82599EB) && (eicr & IXGBE_EICR_GPI_SDP1_BY_MAC(hw))) { IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1_BY_MAC(hw)); - if (atomic_cmpset_acq_int(&adapter->sfp_reinit, 0, 1)) - GROUPTASK_ENQUEUE(&adapter->msf_task); + adapter->task_requests |= IXGBE_REQUEST_TASK_MSF; } } /* External PHY interrupt */ if ((hw->phy.type == ixgbe_phy_x550em_ext_t) && (eicr & IXGBE_EICR_GPI_SDP0_X540)) - GROUPTASK_ENQUEUE(&adapter->phy_task); + adapter->task_requests |= IXGBE_REQUEST_TASK_PHY; return (FILTER_SCHEDULE_THREAD); } /* ixgbe_intr */ diff --git a/sys/dev/ixgbe/ixgbe.h b/sys/dev/ixgbe/ixgbe.h index 714c740a5ba5..afc55dccd4ef 100644 --- a/sys/dev/ixgbe/ixgbe.h +++ b/sys/dev/ixgbe/ixgbe.h @@ -428,16 +428,11 @@ struct adapter { /* Support for pluggable optics */ bool sfp_probe; - struct grouptask mod_task; /* SFP tasklet */ - struct grouptask msf_task; /* Multispeed Fiber */ - struct grouptask mbx_task; /* VF -> PF mailbox interrupt */ - int sfp_reinit; /* Flow Director */ int fdir_reinit; - struct grouptask fdir_task; - struct grouptask phy_task; /* PHY intr tasklet */ + u32 task_requests; /* * Queues: diff --git a/sys/dev/ixgbe/ixgbe_type.h b/sys/dev/ixgbe/ixgbe_type.h index 36101dac2961..fc5f191ee65e 100644 --- a/sys/dev/ixgbe/ixgbe_type.h +++ b/sys/dev/ixgbe/ixgbe_type.h @@ -4427,4 +4427,11 @@ struct ixgbe_bypass_eeprom { #define IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD \ (0x1F << IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD_SHIFT) +#define IXGBE_REQUEST_TASK_MOD 0x01 +#define IXGBE_REQUEST_TASK_MSF 0x02 +#define IXGBE_REQUEST_TASK_MBX 0x04 +#define IXGBE_REQUEST_TASK_FDIR 0x08 +#define IXGBE_REQUEST_TASK_PHY 0x10 +#define IXGBE_REQUEST_TASK_LSC 0x20 + #endif /* _IXGBE_TYPE_H_ */ From 7aad1f4edc99797d41d2a4a90d4ebdd91bf3a4a2 Mon Sep 17 00:00:00 2001 From: Eric Joyner Date: Thu, 31 Jan 2019 21:53:03 +0000 Subject: [PATCH 42/90] ix(4),ixv(4): Fix TSO offloads when TXCSUM is disabled This patch and commit message are based on r340256 created by Jacob Keller: The iflib stack does not disable TSO automatically when TXCSUM is disabled, instead assuming that the driver will correctly handle TSOs even when CSUM_IP is not set. This results in iflib calling ixgbe_isc_txd_encap with packets which have CSUM_IP_TSO, but do not have CSUM_IP or CSUM_IP_TCP set. Because of this, ixgbe_tx_ctx_setup will not setup the IPv4 checksum offloading. This results in bad TSO packets being sent if a user disables TXCSUM without disabling TSO. Fix this by updating the ixgbe_tx_ctx_setup function to check both CSUM_IP and CSUM_IP_TSO when deciding whether to enable checksums. Once this is corrected, another issue for TSO packets is revealed. The driver sets IFLIB_NEED_ZERO_CSUM in order to enable a work around that causes the ip->sum field to be zero'd. This is necessary for ix hardware to correctly perform TSOs. However, if TXCSUM is disabled, then the work around is not enabled, as CSUM_IP will not be set when the iflib stack checks to see if it should clear the sum field. Fix this by adding IFLIB_TSO_INIT_IP to the iflib flags for the ix and ixv interface files. Once both of these changes are made, the ix and ixv drivers should correctly offload TSO packets when TSO offload is enabled, regardless of whether TXCSUM is enabled or disabled. Submitted by: Piotr Pietruszewski Reviewed by: IntelNetworking Sponsored by: Intel Corporation Differential Revision: https://reviews.freebsd.org/D18470 --- sys/dev/ixgbe/if_ix.c | 1 + sys/dev/ixgbe/if_ixv.c | 1 + sys/dev/ixgbe/ix_txrx.c | 2 +- 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/sys/dev/ixgbe/if_ix.c b/sys/dev/ixgbe/if_ix.c index 7f7902e29fdc..b4b13892957f 100644 --- a/sys/dev/ixgbe/if_ix.c +++ b/sys/dev/ixgbe/if_ix.c @@ -379,6 +379,7 @@ static struct if_shared_ctx ixgbe_sctx_init = { .isc_vendor_info = ixgbe_vendor_info_array, .isc_driver_version = ixgbe_driver_version, .isc_driver = &ixgbe_if_driver, + .isc_flags = IFLIB_TSO_INIT_IP, .isc_nrxd_min = {MIN_RXD}, .isc_ntxd_min = {MIN_TXD}, diff --git a/sys/dev/ixgbe/if_ixv.c b/sys/dev/ixgbe/if_ixv.c index b81ffe8f2ec7..79b7d78d4c73 100644 --- a/sys/dev/ixgbe/if_ixv.c +++ b/sys/dev/ixgbe/if_ixv.c @@ -220,6 +220,7 @@ static struct if_shared_ctx ixv_sctx_init = { .isc_vendor_info = ixv_vendor_info_array, .isc_driver_version = ixv_driver_version, .isc_driver = &ixv_if_driver, + .isc_flags = IFLIB_TSO_INIT_IP, .isc_nrxd_min = {MIN_RXD}, .isc_ntxd_min = {MIN_TXD}, diff --git a/sys/dev/ixgbe/ix_txrx.c b/sys/dev/ixgbe/ix_txrx.c index 2e021a120f20..78316b4c4659 100644 --- a/sys/dev/ixgbe/ix_txrx.c +++ b/sys/dev/ixgbe/ix_txrx.c @@ -131,7 +131,7 @@ ixgbe_tx_ctx_setup(struct ixgbe_adv_tx_context_desc *TXD, if_pkt_info_t pi) switch (pi->ipi_ipproto) { case IPPROTO_TCP: - if (pi->ipi_csum_flags & (CSUM_IP_TCP | CSUM_IP6_TCP)) + if (pi->ipi_csum_flags & (CSUM_IP_TCP | CSUM_IP6_TCP | CSUM_TSO)) type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP; else offload = FALSE; From 919e7b5359a7e7346c175b32022b5d698fde17bc Mon Sep 17 00:00:00 2001 From: Mark Johnston Date: Thu, 31 Jan 2019 22:27:39 +0000 Subject: [PATCH 43/90] Prevent some kobj memory allocation failures from panicking the system. Parts of the kobj(9) KPI assume a non-sleepable context for the purpose of internal memory allocations, but currently have no way to signal an allocation failure to the caller, so they just panic in this case. This can occur even when kobj_create() is called with M_WAITOK. Fix some instances of the problem by plumbing wait flags from kobj_create() through internal subroutines. Change kobj_class_compile() to assume a sleepable context when called externally, since all existing callers use it in a sleepable context. To fix the problem fully the kobj_init() KPI must be changed. Reported and tested by: pho Reviewed by: kib (previous version) MFC after: 1 week Sponsored by: The FreeBSD Foundation Differential Revision: https://reviews.freebsd.org/D19023 --- sys/kern/subr_kobj.c | 95 ++++++++++++++++++++++++-------------------- 1 file changed, 52 insertions(+), 43 deletions(-) diff --git a/sys/kern/subr_kobj.c b/sys/kern/subr_kobj.c index 1a68c8d5aef7..8cf8d549dfb1 100644 --- a/sys/kern/subr_kobj.c +++ b/sys/kern/subr_kobj.c @@ -125,35 +125,40 @@ kobj_class_compile_common(kobj_class_t cls, kobj_ops_t ops) cls->ops = ops; } -void -kobj_class_compile(kobj_class_t cls) +static int +kobj_class_compile1(kobj_class_t cls, int mflags) { kobj_ops_t ops; KOBJ_ASSERT(MA_NOTOWNED); - /* - * Allocate space for the compiled ops table. - */ - ops = malloc(sizeof(struct kobj_ops), M_KOBJ, M_NOWAIT); - if (!ops) - panic("%s: out of memory", __func__); + ops = malloc(sizeof(struct kobj_ops), M_KOBJ, mflags); + if (ops == NULL) + return (ENOMEM); - KOBJ_LOCK(); - /* * We may have lost a race for kobj_class_compile here - check * to make sure someone else hasn't already compiled this * class. */ + KOBJ_LOCK(); if (cls->ops) { KOBJ_UNLOCK(); free(ops, M_KOBJ); - return; + return (0); } - kobj_class_compile_common(cls, ops); KOBJ_UNLOCK(); + return (0); +} + +void +kobj_class_compile(kobj_class_t cls) +{ + int error; + + error = kobj_class_compile1(cls, M_WAITOK); + KASSERT(error == 0, ("kobj_class_compile1 returned %d", error)); } void @@ -254,24 +259,6 @@ kobj_class_free(kobj_class_t cls) free(ops, M_KOBJ); } -kobj_t -kobj_create(kobj_class_t cls, - struct malloc_type *mtype, - int mflags) -{ - kobj_t obj; - - /* - * Allocate and initialise the new object. - */ - obj = malloc(cls->size, mtype, mflags | M_ZERO); - if (!obj) - return NULL; - kobj_init(obj, cls); - - return obj; -} - static void kobj_init_common(kobj_t obj, kobj_class_t cls) { @@ -280,30 +267,52 @@ kobj_init_common(kobj_t obj, kobj_class_t cls) cls->refs++; } -void -kobj_init(kobj_t obj, kobj_class_t cls) +static int +kobj_init1(kobj_t obj, kobj_class_t cls, int mflags) { - KOBJ_ASSERT(MA_NOTOWNED); - retry: - KOBJ_LOCK(); + int error; - /* - * Consider compiling the class' method table. - */ - if (!cls->ops) { + KOBJ_LOCK(); + while (cls->ops == NULL) { /* * kobj_class_compile doesn't want the lock held * because of the call to malloc - we drop the lock * and re-try. */ KOBJ_UNLOCK(); - kobj_class_compile(cls); - goto retry; + error = kobj_class_compile1(cls, mflags); + if (error != 0) + return (error); + KOBJ_LOCK(); } - kobj_init_common(obj, cls); - KOBJ_UNLOCK(); + return (0); +} + +kobj_t +kobj_create(kobj_class_t cls, struct malloc_type *mtype, int mflags) +{ + kobj_t obj; + + obj = malloc(cls->size, mtype, mflags | M_ZERO); + if (obj == NULL) + return (NULL); + if (kobj_init1(obj, cls, mflags) != 0) { + free(obj, mtype); + return (NULL); + } + return (obj); +} + +void +kobj_init(kobj_t obj, kobj_class_t cls) +{ + int error; + + error = kobj_init1(obj, cls, M_NOWAIT); + if (error != 0) + panic("kobj_init1 failed: error %d", error); } void From eec189c70ba5bed05b2831426bb04e8a0b38fdb2 Mon Sep 17 00:00:00 2001 From: Gleb Smirnoff Date: Thu, 31 Jan 2019 22:37:28 +0000 Subject: [PATCH 44/90] Add new m_ext type for data for M_NOFREE mbufs, which doesn't actually do anything except several assertions. This type is going to be used for temporary on stack mbufs, that point into data in receive ring of a NIC, that shall not be freed. Such mbuf can not be stored or reallocated, its life time is current context. --- sys/kern/kern_mbuf.c | 7 ++++++- sys/sys/mbuf.h | 1 + 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/sys/kern/kern_mbuf.c b/sys/kern/kern_mbuf.c index 9339590be7ec..b929106f6132 100644 --- a/sys/kern/kern_mbuf.c +++ b/sys/kern/kern_mbuf.c @@ -847,7 +847,8 @@ mb_free_ext(struct mbuf *m) */ if (m->m_flags & M_NOFREE) { freembuf = 0; - KASSERT(m->m_ext.ext_type == EXT_EXTREF, + KASSERT(m->m_ext.ext_type == EXT_EXTREF || + m->m_ext.ext_type == EXT_RXRING, ("%s: no-free mbuf %p has wrong type", __func__, m)); } else freembuf = 1; @@ -891,6 +892,10 @@ mb_free_ext(struct mbuf *m) ("%s: ext_free not set", __func__)); m->m_ext.ext_free(m); break; + case EXT_RXRING: + KASSERT(m->m_ext.ext_free == NULL, + ("%s: ext_free is set", __func__)); + break; default: KASSERT(m->m_ext.ext_type == 0, ("%s: unknown ext_type", __func__)); diff --git a/sys/sys/mbuf.h b/sys/sys/mbuf.h index 827a01e3ba79..75217ed65c37 100644 --- a/sys/sys/mbuf.h +++ b/sys/sys/mbuf.h @@ -443,6 +443,7 @@ struct mbuf { #define EXT_JUMBO16 5 /* jumbo cluster 16184 bytes */ #define EXT_PACKET 6 /* mbuf+cluster from packet zone */ #define EXT_MBUF 7 /* external mbuf reference */ +#define EXT_RXRING 8 /* data in NIC receive ring */ #define EXT_VENDOR1 224 /* for vendor-internal use */ #define EXT_VENDOR2 225 /* for vendor-internal use */ From 7dcbca8d672cab1f7ce445e2ecfe53f09c45cf1c Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Thu, 31 Jan 2019 22:43:20 +0000 Subject: [PATCH 45/90] nvdimm: enumerate NVDIMM SPA ranges from the root device Move the enumeration of NVDIMM SPA ranges from the spa GEOM class initializer into the NVDIMM root device. This will be necessary for a later change where NVDIMM namespaces require NVDIMM device enumeration to be reliably ordered before SPA enumeration. Submitted by: D Scott Phillips Sponsored by: Intel Corporation MFC after: 1 week Differential revision: https://reviews.freebsd.org/D18734 --- sys/dev/nvdimm/nvdimm.c | 45 ++++++++++++++ sys/dev/nvdimm/nvdimm_spa.c | 115 +++--------------------------------- sys/dev/nvdimm/nvdimm_var.h | 11 +++- 3 files changed, 60 insertions(+), 111 deletions(-) diff --git a/sys/dev/nvdimm/nvdimm.c b/sys/dev/nvdimm/nvdimm.c index bdc4f57ab2dd..b93e50e88080 100644 --- a/sys/dev/nvdimm/nvdimm.c +++ b/sys/dev/nvdimm/nvdimm.c @@ -227,6 +227,31 @@ nvdimm_resume(device_t dev) return (0); } +static int +nvdimm_root_create_spa(void *nfitsubtbl, void *arg) +{ + enum SPA_mapping_type spa_type; + ACPI_NFIT_SYSTEM_ADDRESS *nfitaddr; + struct SPA_mapping *spa; + struct nvdimm_root_dev *dev; + int error; + + nfitaddr = nfitsubtbl; + dev = arg; + spa_type = nvdimm_spa_type_from_uuid( + (struct uuid *)nfitaddr->RangeGuid); + if (spa_type == SPA_TYPE_UNKNOWN) + return (0); + spa = malloc(sizeof(struct SPA_mapping), M_NVDIMM, M_WAITOK | M_ZERO); + error = nvdimm_spa_init(spa, nfitaddr, spa_type); + if (error != 0) { + nvdimm_spa_fini(spa); + free(spa, M_NVDIMM); + } + SLIST_INSERT_HEAD(&dev->spas, spa, link); + return (0); +} + static ACPI_STATUS nvdimm_root_create_dev(ACPI_HANDLE handle, UINT32 nesting_level, void *context, void **return_value) @@ -276,6 +301,7 @@ nvdimm_root_attach(device_t dev) { ACPI_HANDLE handle; ACPI_STATUS status; + ACPI_TABLE_NFIT *nfitbl; int error; handle = acpi_get_handle(dev); @@ -284,15 +310,33 @@ nvdimm_root_attach(device_t dev) if (ACPI_FAILURE(status)) device_printf(dev, "failed adding children\n"); error = bus_generic_attach(dev); + if (error != 0) + return (error); + status = AcpiGetTable(ACPI_SIG_NFIT, 1, (ACPI_TABLE_HEADER **)&nfitbl); + if (ACPI_FAILURE(status)) { + device_printf(dev, "cannot get NFIT\n"); + return (ENXIO); + } + error = nvdimm_iterate_nfit(nfitbl, ACPI_NFIT_TYPE_SYSTEM_ADDRESS, + nvdimm_root_create_spa, device_get_softc(dev)); + AcpiPutTable(&nfitbl->Header); return (error); } static int nvdimm_root_detach(device_t dev) { + struct nvdimm_root_dev *root; + struct SPA_mapping *spa, *next; device_t *children; int i, error, num_children; + root = device_get_softc(dev); + SLIST_FOREACH_SAFE(spa, &root->spas, link, next) { + nvdimm_spa_fini(spa); + SLIST_REMOVE_HEAD(&root->spas, link); + free(spa, M_NVDIMM); + } error = bus_generic_detach(dev); if (error != 0) return (error); @@ -356,6 +400,7 @@ static device_method_t nvdimm_root_methods[] = { static driver_t nvdimm_root_driver = { "nvdimm_root", nvdimm_root_methods, + sizeof(struct nvdimm_root_dev), }; DRIVER_MODULE(nvdimm_root, acpi, nvdimm_root_driver, nvdimm_root_devclass, NULL, diff --git a/sys/dev/nvdimm/nvdimm_spa.c b/sys/dev/nvdimm/nvdimm_spa.c index b621193f951d..0bbb41746d85 100644 --- a/sys/dev/nvdimm/nvdimm_spa.c +++ b/sys/dev/nvdimm/nvdimm_spa.c @@ -82,19 +82,6 @@ __FBSDID("$FreeBSD$"); #define UUID_INITIALIZER_PERSISTENT_VIRTUAL_CD \ {0x08018188,0x42cd,0xbb48,0x10,0x0f,{0x53,0x87,0xd5,0x3d,0xed,0x3d}} -struct SPA_mapping *spa_mappings; -int spa_mappings_cnt; - -static int -nvdimm_spa_count(void *nfitsubtbl __unused, void *arg) -{ - int *cnt; - - cnt = arg; - (*cnt)++; - return (0); -} - static struct nvdimm_SPA_uuid_list_elm { const char *u_name; struct uuid u_id; @@ -419,22 +406,17 @@ nvdimm_spa_g_access(struct g_provider *pp, int r, int w, int e) return (0); } -static g_init_t nvdimm_spa_g_init; -static g_fini_t nvdimm_spa_g_fini; - struct g_class nvdimm_spa_g_class = { .name = "SPA", .version = G_VERSION, .start = nvdimm_spa_g_start, .access = nvdimm_spa_g_access, - .init = nvdimm_spa_g_init, - .fini = nvdimm_spa_g_fini, }; DECLARE_GEOM_CLASS(nvdimm_spa_g_class, g_spa); -static int -nvdimm_spa_init_one(struct SPA_mapping *spa, ACPI_NFIT_SYSTEM_ADDRESS *nfitaddr, - int spa_type) +int +nvdimm_spa_init(struct SPA_mapping *spa, ACPI_NFIT_SYSTEM_ADDRESS *nfitaddr, + enum SPA_mapping_type spa_type) { struct make_dev_args mda; struct sglist *spa_sg; @@ -512,7 +494,7 @@ nvdimm_spa_init_one(struct SPA_mapping *spa, ACPI_NFIT_SYSTEM_ADDRESS *nfitaddr, if (error1 == 0) error1 = error; } else { - g_topology_assert(); + g_topology_lock(); spa->spa_g = g_new_geomf(&nvdimm_spa_g_class, "spa%d", spa->spa_nfit_idx); spa->spa_g->softc = spa; @@ -526,12 +508,13 @@ nvdimm_spa_init_one(struct SPA_mapping *spa, ACPI_NFIT_SYSTEM_ADDRESS *nfitaddr, spa->spa_g_devstat = devstat_new_entry("spa", spa->spa_nfit_idx, DEV_BSIZE, DEVSTAT_ALL_SUPPORTED, DEVSTAT_TYPE_DIRECT, DEVSTAT_PRIORITY_MAX); + g_topology_unlock(); } return (error1); } -static void -nvdimm_spa_fini_one(struct SPA_mapping *spa) +void +nvdimm_spa_fini(struct SPA_mapping *spa) { mtx_lock(&spa->spa_g_mtx); @@ -563,87 +546,3 @@ nvdimm_spa_fini_one(struct SPA_mapping *spa) mtx_destroy(&spa->spa_g_mtx); mtx_destroy(&spa->spa_g_stat_mtx); } - -static int -nvdimm_spa_parse(void *nfitsubtbl, void *arg) -{ - ACPI_NFIT_SYSTEM_ADDRESS *nfitaddr; - struct SPA_mapping *spa; - enum SPA_mapping_type spa_type; - int error, *i; - - i = arg; - spa = &spa_mappings[(*i)++]; - nfitaddr = nfitsubtbl; - spa_type = nvdimm_spa_type_from_uuid( - (struct uuid *)&nfitaddr->RangeGuid); - if (spa_type == SPA_TYPE_UNKNOWN) { - printf("Unknown SPA UUID %d ", nfitaddr->RangeIndex); - printf_uuid((struct uuid *)&nfitaddr->RangeGuid); - printf("\n"); - return (0); - } - error = nvdimm_spa_init_one(spa, nfitaddr, spa_type); - if (error != 0) - nvdimm_spa_fini_one(spa); - return (0); -} - -static int -nvdimm_spa_init1(ACPI_TABLE_NFIT *nfitbl) -{ - int error, i; - - error = nvdimm_iterate_nfit(nfitbl, ACPI_NFIT_TYPE_SYSTEM_ADDRESS, - nvdimm_spa_count, &spa_mappings_cnt); - if (error != 0) - return (error); - spa_mappings = malloc(sizeof(struct SPA_mapping) * spa_mappings_cnt, - M_NVDIMM, M_WAITOK | M_ZERO); - i = 0; - error = nvdimm_iterate_nfit(nfitbl, ACPI_NFIT_TYPE_SYSTEM_ADDRESS, - nvdimm_spa_parse, &i); - if (error != 0) { - free(spa_mappings, M_NVDIMM); - spa_mappings = NULL; - return (error); - } - return (0); -} - -static void -nvdimm_spa_g_init(struct g_class *mp __unused) -{ - ACPI_TABLE_NFIT *nfitbl; - ACPI_STATUS status; - int error; - - spa_mappings_cnt = 0; - spa_mappings = NULL; - if (acpi_disabled("nvdimm")) - return; - status = AcpiGetTable(ACPI_SIG_NFIT, 1, (ACPI_TABLE_HEADER **)&nfitbl); - if (ACPI_FAILURE(status)) { - if (bootverbose) - printf("nvdimm_spa_g_init: cannot find NFIT\n"); - return; - } - error = nvdimm_spa_init1(nfitbl); - if (error != 0) - printf("nvdimm_spa_g_init: error %d\n", error); - AcpiPutTable(&nfitbl->Header); -} - -static void -nvdimm_spa_g_fini(struct g_class *mp __unused) -{ - int i; - - if (spa_mappings == NULL) - return; - for (i = 0; i < spa_mappings_cnt; i++) - nvdimm_spa_fini_one(&spa_mappings[i]); - free(spa_mappings, M_NVDIMM); - spa_mappings = NULL; - spa_mappings_cnt = 0; -} diff --git a/sys/dev/nvdimm/nvdimm_var.h b/sys/dev/nvdimm/nvdimm_var.h index c70476e38b98..6e627c24918a 100644 --- a/sys/dev/nvdimm/nvdimm_var.h +++ b/sys/dev/nvdimm/nvdimm_var.h @@ -44,6 +44,10 @@ __BUS_ACCESSOR(nvdimm_root, acpi_handle, NVDIMM_ROOT, ACPI_HANDLE, ACPI_HANDLE) __BUS_ACCESSOR(nvdimm_root, device_handle, NVDIMM_ROOT, DEVICE_HANDLE, nfit_handle_t) +struct nvdimm_root_dev { + SLIST_HEAD(, SPA_mapping) spas; +}; + struct nvdimm_dev { device_t nv_dev; nfit_handle_t nv_handle; @@ -64,6 +68,7 @@ enum SPA_mapping_type { }; struct SPA_mapping { + SLIST_ENTRY(SPA_mapping) link; enum SPA_mapping_type spa_type; int spa_domain; int spa_nfit_idx; @@ -84,14 +89,14 @@ struct SPA_mapping { bool spa_g_proc_exiting; }; -extern struct SPA_mapping *spa_mappings; -extern int spa_mappings_cnt; - MALLOC_DECLARE(M_NVDIMM); enum SPA_mapping_type nvdimm_spa_type_from_uuid(struct uuid *); struct nvdimm_dev *nvdimm_find_by_handle(nfit_handle_t nv_handle); int nvdimm_iterate_nfit(ACPI_TABLE_NFIT *nfitbl, enum AcpiNfitType type, int (*cb)(void *, void *), void *arg); +int nvdimm_spa_init(struct SPA_mapping *spa, ACPI_NFIT_SYSTEM_ADDRESS *nfitaddr, + enum SPA_mapping_type spa_type); +void nvdimm_spa_fini(struct SPA_mapping *spa); #endif /* __DEV_NVDIMM_VAR_H__ */ From 7674dce0a4b7d49efee2adfdd8b9cbd0ee50c688 Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Thu, 31 Jan 2019 22:47:04 +0000 Subject: [PATCH 46/90] nvdimm: only enumerate present nvdimm devices Not all child devices of the NVDIMM root device represent DIMM devices which are present in the system. The spec says (ACPI 6.2, sec 9.20.2): For each NVDIMM present or intended to be supported by platform, platform firmware also exposes an NVDIMM device ... under the NVDIMM root device. Present NVDIMM devices are found by walking all of the NFIT table's SPA ranges, then walking the NVDIMM regions mentioned by those SPA ranges. A set of NFIT walking helper functions are introduced to avoid the need to splat the enumeration logic across several disparate callbacks. Submitted by: D Scott Phillips Sponsored by: Intel Corporation MFC after: 1 week Differential revision: https://reviews.freebsd.org/D18439 --- sys/dev/nvdimm/nvdimm.c | 238 +++++++++++++---------------------- sys/dev/nvdimm/nvdimm_nfit.c | 203 ++++++++++++++++++++++++++++++ sys/dev/nvdimm/nvdimm_var.h | 14 ++- sys/modules/nvdimm/Makefile | 1 + 4 files changed, 305 insertions(+), 151 deletions(-) create mode 100644 sys/dev/nvdimm/nvdimm_nfit.c diff --git a/sys/dev/nvdimm/nvdimm.c b/sys/dev/nvdimm/nvdimm.c index b93e50e88080..d4c22dff2409 100644 --- a/sys/dev/nvdimm/nvdimm.c +++ b/sys/dev/nvdimm/nvdimm.c @@ -76,99 +76,6 @@ nvdimm_find_by_handle(nfit_handle_t nv_handle) return (res); } -static int -nvdimm_parse_flush_addr(void *nfitsubtbl, void *arg) -{ - ACPI_NFIT_FLUSH_ADDRESS *nfitflshaddr; - struct nvdimm_dev *nv; - int i; - - nfitflshaddr = nfitsubtbl; - nv = arg; - if (nfitflshaddr->DeviceHandle != nv->nv_handle) - return (0); - - MPASS(nv->nv_flush_addr == NULL && nv->nv_flush_addr_cnt == 0); - nv->nv_flush_addr = mallocarray(nfitflshaddr->HintCount, - sizeof(uint64_t *), M_NVDIMM, M_WAITOK); - for (i = 0; i < nfitflshaddr->HintCount; i++) - nv->nv_flush_addr[i] = (uint64_t *)nfitflshaddr->HintAddress[i]; - nv->nv_flush_addr_cnt = nfitflshaddr->HintCount; - return (0); -} - -int -nvdimm_iterate_nfit(ACPI_TABLE_NFIT *nfitbl, enum AcpiNfitType type, - int (*cb)(void *, void *), void *arg) -{ - ACPI_NFIT_HEADER *nfithdr; - ACPI_NFIT_SYSTEM_ADDRESS *nfitaddr; - ACPI_NFIT_MEMORY_MAP *nfitmap; - ACPI_NFIT_INTERLEAVE *nfitintrl; - ACPI_NFIT_SMBIOS *nfitsmbios; - ACPI_NFIT_CONTROL_REGION *nfitctlreg; - ACPI_NFIT_DATA_REGION *nfitdtreg; - ACPI_NFIT_FLUSH_ADDRESS *nfitflshaddr; - char *ptr; - int error; - - error = 0; - for (ptr = (char *)(nfitbl + 1); - ptr < (char *)nfitbl + nfitbl->Header.Length; - ptr += nfithdr->Length) { - nfithdr = (ACPI_NFIT_HEADER *)ptr; - if (nfithdr->Type != type) - continue; - switch (nfithdr->Type) { - case ACPI_NFIT_TYPE_SYSTEM_ADDRESS: - nfitaddr = __containerof(nfithdr, - ACPI_NFIT_SYSTEM_ADDRESS, Header); - error = cb(nfitaddr, arg); - break; - case ACPI_NFIT_TYPE_MEMORY_MAP: - nfitmap = __containerof(nfithdr, - ACPI_NFIT_MEMORY_MAP, Header); - error = cb(nfitmap, arg); - break; - case ACPI_NFIT_TYPE_INTERLEAVE: - nfitintrl = __containerof(nfithdr, - ACPI_NFIT_INTERLEAVE, Header); - error = cb(nfitintrl, arg); - break; - case ACPI_NFIT_TYPE_SMBIOS: - nfitsmbios = __containerof(nfithdr, - ACPI_NFIT_SMBIOS, Header); - error = cb(nfitsmbios, arg); - break; - case ACPI_NFIT_TYPE_CONTROL_REGION: - nfitctlreg = __containerof(nfithdr, - ACPI_NFIT_CONTROL_REGION, Header); - error = cb(nfitctlreg, arg); - break; - case ACPI_NFIT_TYPE_DATA_REGION: - nfitdtreg = __containerof(nfithdr, - ACPI_NFIT_DATA_REGION, Header); - error = cb(nfitdtreg, arg); - break; - case ACPI_NFIT_TYPE_FLUSH_ADDRESS: - nfitflshaddr = __containerof(nfithdr, - ACPI_NFIT_FLUSH_ADDRESS, Header); - error = cb(nfitflshaddr, arg); - break; - case ACPI_NFIT_TYPE_RESERVED: - default: - if (bootverbose) - printf("NFIT subtype %d unknown\n", - nfithdr->Type); - error = 0; - break; - } - if (error != 0) - break; - } - return (error); -} - static int nvdimm_probe(device_t dev) { @@ -197,8 +104,8 @@ nvdimm_attach(device_t dev) device_printf(dev, "cannot get NFIT\n"); return (ENXIO); } - nvdimm_iterate_nfit(nfitbl, ACPI_NFIT_TYPE_FLUSH_ADDRESS, - nvdimm_parse_flush_addr, nv); + acpi_nfit_get_flush_addrs(nfitbl, nv->nv_handle, &nv->nv_flush_addr, + &nv->nv_flush_addr_cnt); AcpiPutTable(&nfitbl->Header); return (0); } @@ -227,57 +134,92 @@ nvdimm_resume(device_t dev) return (0); } -static int -nvdimm_root_create_spa(void *nfitsubtbl, void *arg) +static ACPI_STATUS +find_dimm(ACPI_HANDLE handle, UINT32 nesting_level, void *context, + void **return_value) { - enum SPA_mapping_type spa_type; - ACPI_NFIT_SYSTEM_ADDRESS *nfitaddr; - struct SPA_mapping *spa; - struct nvdimm_root_dev *dev; - int error; + ACPI_DEVICE_INFO *device_info; + ACPI_STATUS status; - nfitaddr = nfitsubtbl; - dev = arg; - spa_type = nvdimm_spa_type_from_uuid( - (struct uuid *)nfitaddr->RangeGuid); - if (spa_type == SPA_TYPE_UNKNOWN) - return (0); - spa = malloc(sizeof(struct SPA_mapping), M_NVDIMM, M_WAITOK | M_ZERO); - error = nvdimm_spa_init(spa, nfitaddr, spa_type); - if (error != 0) { - nvdimm_spa_fini(spa); - free(spa, M_NVDIMM); + status = AcpiGetObjectInfo(handle, &device_info); + if (ACPI_FAILURE(status)) + return_ACPI_STATUS(AE_ERROR); + if (device_info->Address == (uintptr_t)context) { + *(ACPI_HANDLE *)return_value = handle; + return_ACPI_STATUS(AE_CTRL_TERMINATE); } - SLIST_INSERT_HEAD(&dev->spas, spa, link); + return_ACPI_STATUS(AE_OK); +} + +static ACPI_HANDLE +get_dimm_acpi_handle(ACPI_HANDLE root_handle, nfit_handle_t adr) +{ + ACPI_HANDLE res; + ACPI_STATUS status; + + res = NULL; + status = AcpiWalkNamespace(ACPI_TYPE_DEVICE, root_handle, 1, find_dimm, + NULL, (void *)(uintptr_t)adr, &res); + if (ACPI_FAILURE(status)) + res = NULL; + return (res); +} + +static int +nvdimm_root_create_devs(device_t dev, ACPI_TABLE_NFIT *nfitbl) +{ + ACPI_HANDLE root_handle, dimm_handle; + device_t child; + nfit_handle_t *dimm_ids, *dimm; + uintptr_t *ivars; + int num_dimm_ids; + + root_handle = acpi_get_handle(dev); + acpi_nfit_get_dimm_ids(nfitbl, &dimm_ids, &num_dimm_ids); + for (dimm = dimm_ids; dimm < dimm_ids + num_dimm_ids; dimm++) { + dimm_handle = get_dimm_acpi_handle(root_handle, *dimm); + child = BUS_ADD_CHILD(dev, 100, "nvdimm", -1); + if (child == NULL) { + device_printf(dev, "failed to create nvdimm\n"); + return (ENXIO); + } + ivars = mallocarray(NVDIMM_ROOT_IVAR_MAX, sizeof(uintptr_t), + M_NVDIMM, M_ZERO | M_WAITOK); + device_set_ivars(child, ivars); + nvdimm_root_set_acpi_handle(child, dimm_handle); + nvdimm_root_set_device_handle(child, *dimm); + } + free(dimm_ids, M_NVDIMM); return (0); } -static ACPI_STATUS -nvdimm_root_create_dev(ACPI_HANDLE handle, UINT32 nesting_level, void *context, - void **return_value) +static int +nvdimm_root_create_spas(struct nvdimm_root_dev *dev, ACPI_TABLE_NFIT *nfitbl) { - ACPI_STATUS status; - ACPI_DEVICE_INFO *device_info; - device_t parent, child; - uintptr_t *ivars; + ACPI_NFIT_SYSTEM_ADDRESS **spas, **spa; + struct SPA_mapping *spa_mapping; + enum SPA_mapping_type spa_type; + int error, num_spas; - parent = context; - child = BUS_ADD_CHILD(parent, 100, "nvdimm", -1); - if (child == NULL) { - device_printf(parent, "failed to create nvdimm\n"); - return_ACPI_STATUS(AE_ERROR); + error = 0; + acpi_nfit_get_spa_ranges(nfitbl, &spas, &num_spas); + for (spa = spas; spa < spas + num_spas; spa++) { + spa_type = nvdimm_spa_type_from_uuid( + (struct uuid *)(*spa)->RangeGuid); + if (spa_type == SPA_TYPE_UNKNOWN) + continue; + spa_mapping = malloc(sizeof(struct SPA_mapping), M_NVDIMM, + M_WAITOK | M_ZERO); + error = nvdimm_spa_init(spa_mapping, *spa, spa_type); + if (error != 0) { + nvdimm_spa_fini(spa_mapping); + free(spa, M_NVDIMM); + break; + } + SLIST_INSERT_HEAD(&dev->spas, spa_mapping, link); } - status = AcpiGetObjectInfo(handle, &device_info); - if (ACPI_FAILURE(status)) { - device_printf(parent, "failed to get nvdimm device info\n"); - return_ACPI_STATUS(AE_ERROR); - } - ivars = mallocarray(NVDIMM_ROOT_IVAR_MAX - 1, sizeof(uintptr_t), - M_NVDIMM, M_ZERO | M_WAITOK); - device_set_ivars(child, ivars); - nvdimm_root_set_acpi_handle(child, handle); - nvdimm_root_set_device_handle(child, device_info->Address); - return_ACPI_STATUS(AE_OK); + free(spas, M_NVDIMM); + return (error); } static char *nvdimm_root_id[] = {"ACPI0012", NULL}; @@ -299,26 +241,24 @@ nvdimm_root_probe(device_t dev) static int nvdimm_root_attach(device_t dev) { - ACPI_HANDLE handle; - ACPI_STATUS status; + struct nvdimm_root_dev *root; ACPI_TABLE_NFIT *nfitbl; + ACPI_STATUS status; int error; - handle = acpi_get_handle(dev); - status = AcpiWalkNamespace(ACPI_TYPE_DEVICE, handle, 1, - nvdimm_root_create_dev, NULL, dev, NULL); - if (ACPI_FAILURE(status)) - device_printf(dev, "failed adding children\n"); - error = bus_generic_attach(dev); - if (error != 0) - return (error); status = AcpiGetTable(ACPI_SIG_NFIT, 1, (ACPI_TABLE_HEADER **)&nfitbl); if (ACPI_FAILURE(status)) { device_printf(dev, "cannot get NFIT\n"); return (ENXIO); } - error = nvdimm_iterate_nfit(nfitbl, ACPI_NFIT_TYPE_SYSTEM_ADDRESS, - nvdimm_root_create_spa, device_get_softc(dev)); + error = nvdimm_root_create_devs(dev, nfitbl); + if (error != 0) + return (error); + error = bus_generic_attach(dev); + if (error != 0) + return (error); + root = device_get_softc(dev); + error = nvdimm_root_create_spas(root, nfitbl); AcpiPutTable(&nfitbl->Header); return (error); } diff --git a/sys/dev/nvdimm/nvdimm_nfit.c b/sys/dev/nvdimm/nvdimm_nfit.c new file mode 100644 index 000000000000..ae00b88f8018 --- /dev/null +++ b/sys/dev/nvdimm/nvdimm_nfit.c @@ -0,0 +1,203 @@ +/*- + * Copyright (c) 2018 Intel Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include + +#include +#include +#include + +static int +uint32_t_compare(const void *a, const void *b) +{ + + return (*(const uint32_t *)a - *(const uint32_t *)b); +} + +static int +find_matches(ACPI_TABLE_NFIT *nfitbl, uint16_t type, uint16_t offset, + uint64_t mask, uint64_t value, void **ptrs, int ptrs_len) +{ + ACPI_NFIT_HEADER *h, *end; + uint64_t val; + size_t load_size; + int count; + + h = (ACPI_NFIT_HEADER *)(nfitbl + 1); + end = (ACPI_NFIT_HEADER *)((char *)nfitbl + + nfitbl->Header.Length); + load_size = roundup2(flsl(mask), 8) / 8; + count = 0; + + while (h < end) { + if (h->Type == type) { + bcopy((char *)h + offset, &val, load_size); + val &= mask; + if (val == value) { + if (ptrs_len > 0) { + ptrs[count] = h; + ptrs_len--; + } + count++; + } + } + if (h->Length == 0) + break; + h = (ACPI_NFIT_HEADER *)((char *)h + h->Length); + } + return (count); +} + +static void +malloc_find_matches(ACPI_TABLE_NFIT *nfitbl, uint16_t type, uint16_t offset, + uint64_t mask, uint64_t value, void ***ptrs, int *ptrs_len) +{ + int count; + + count = find_matches(nfitbl, type, offset, mask, value, NULL, 0); + *ptrs_len = count; + if (count == 0) { + *ptrs = NULL; + return; + } + *ptrs = mallocarray(count, sizeof(void *), M_NVDIMM, M_WAITOK); + find_matches(nfitbl, type, offset, mask, value, *ptrs, *ptrs_len); +} + +void +acpi_nfit_get_dimm_ids(ACPI_TABLE_NFIT *nfitbl, nfit_handle_t **listp, + int *countp) +{ + ACPI_NFIT_SYSTEM_ADDRESS **spas; + ACPI_NFIT_MEMORY_MAP ***regions; + int i, j, k, maxids, num_spas, *region_counts; + + acpi_nfit_get_spa_ranges(nfitbl, &spas, &num_spas); + if (num_spas == 0) { + *listp = NULL; + *countp = 0; + return; + } + regions = mallocarray(num_spas, sizeof(uint16_t *), M_NVDIMM, + M_WAITOK); + region_counts = mallocarray(num_spas, sizeof(int), M_NVDIMM, M_WAITOK); + for (i = 0; i < num_spas; i++) { + acpi_nfit_get_region_mappings_by_spa_range(nfitbl, + spas[i]->RangeIndex, ®ions[i], ®ion_counts[i]); + } + maxids = 0; + for (i = 0; i < num_spas; i++) { + maxids += region_counts[i]; + } + *listp = mallocarray(maxids, sizeof(nfit_handle_t), M_NVDIMM, M_WAITOK); + k = 0; + for (i = 0; i < num_spas; i++) { + for (j = 0; j < region_counts[i]; j++) + (*listp)[k++] = regions[i][j]->DeviceHandle; + } + qsort((*listp), maxids, sizeof(uint32_t), uint32_t_compare); + i = 0; + for (j = 1; j < maxids; j++) { + if ((*listp)[i] != (*listp)[j]) + (*listp)[++i] = (*listp)[j]; + } + *countp = i + 1; + free(region_counts, M_NVDIMM); + for (i = 0; i < num_spas; i++) + free(regions[i], M_NVDIMM); + free(regions, M_NVDIMM); + free(spas, M_NVDIMM); +} + +void +acpi_nfit_get_spa_range(ACPI_TABLE_NFIT *nfitbl, uint16_t range_index, + ACPI_NFIT_SYSTEM_ADDRESS **spa) +{ + + *spa = NULL; + find_matches(nfitbl, ACPI_NFIT_TYPE_SYSTEM_ADDRESS, + offsetof(ACPI_NFIT_SYSTEM_ADDRESS, RangeIndex), UINT16_MAX, + range_index, (void **)spa, 1); +} + +void +acpi_nfit_get_spa_ranges(ACPI_TABLE_NFIT *nfitbl, + ACPI_NFIT_SYSTEM_ADDRESS ***listp, int *countp) +{ + + malloc_find_matches(nfitbl, ACPI_NFIT_TYPE_SYSTEM_ADDRESS, 0, 0, 0, + (void ***)listp, countp); +} + +void +acpi_nfit_get_region_mappings_by_spa_range(ACPI_TABLE_NFIT *nfitbl, + uint16_t spa_range_index, ACPI_NFIT_MEMORY_MAP ***listp, int *countp) +{ + + malloc_find_matches(nfitbl, ACPI_NFIT_TYPE_MEMORY_MAP, + offsetof(ACPI_NFIT_MEMORY_MAP, RangeIndex), UINT16_MAX, + spa_range_index, (void ***)listp, countp); +} + +void acpi_nfit_get_control_region(ACPI_TABLE_NFIT *nfitbl, + uint16_t control_region_index, ACPI_NFIT_CONTROL_REGION **out) +{ + + *out = NULL; + find_matches(nfitbl, ACPI_NFIT_TYPE_CONTROL_REGION, + offsetof(ACPI_NFIT_CONTROL_REGION, RegionIndex), UINT16_MAX, + control_region_index, (void **)out, 1); +} + +void +acpi_nfit_get_flush_addrs(ACPI_TABLE_NFIT *nfitbl, nfit_handle_t dimm, + uint64_t ***listp, int *countp) +{ + ACPI_NFIT_FLUSH_ADDRESS *subtable; + int i; + + subtable = NULL; + find_matches(nfitbl, ACPI_NFIT_TYPE_FLUSH_ADDRESS, + offsetof(ACPI_NFIT_FLUSH_ADDRESS, DeviceHandle), UINT32_MAX, + dimm, (void **)&subtable, 1); + if (subtable == NULL || subtable->HintCount == 0) { + *listp = NULL; + *countp = 0; + return; + } + *countp = subtable->HintCount; + *listp = mallocarray(subtable->HintCount, sizeof(uint64_t *), M_NVDIMM, + M_WAITOK); + for (i = 0; i < subtable->HintCount; i++) + (*listp)[i] = (uint64_t *)(intptr_t)subtable->HintAddress[i]; +} diff --git a/sys/dev/nvdimm/nvdimm_var.h b/sys/dev/nvdimm/nvdimm_var.h index 6e627c24918a..c4f61988cc39 100644 --- a/sys/dev/nvdimm/nvdimm_var.h +++ b/sys/dev/nvdimm/nvdimm_var.h @@ -91,10 +91,20 @@ struct SPA_mapping { MALLOC_DECLARE(M_NVDIMM); +void acpi_nfit_get_dimm_ids(ACPI_TABLE_NFIT *nfitbl, nfit_handle_t **listp, + int *countp); +void acpi_nfit_get_spa_range(ACPI_TABLE_NFIT *nfitbl, uint16_t range_index, + ACPI_NFIT_SYSTEM_ADDRESS **spa); +void acpi_nfit_get_spa_ranges(ACPI_TABLE_NFIT *nfitbl, + ACPI_NFIT_SYSTEM_ADDRESS ***listp, int *countp); +void acpi_nfit_get_region_mappings_by_spa_range(ACPI_TABLE_NFIT *nfitbl, + uint16_t spa_range_index, ACPI_NFIT_MEMORY_MAP ***listp, int *countp); +void acpi_nfit_get_control_region(ACPI_TABLE_NFIT *nfitbl, + uint16_t control_region_index, ACPI_NFIT_CONTROL_REGION **out); +void acpi_nfit_get_flush_addrs(ACPI_TABLE_NFIT *nfitbl, nfit_handle_t dimm, + uint64_t ***listp, int *countp); enum SPA_mapping_type nvdimm_spa_type_from_uuid(struct uuid *); struct nvdimm_dev *nvdimm_find_by_handle(nfit_handle_t nv_handle); -int nvdimm_iterate_nfit(ACPI_TABLE_NFIT *nfitbl, enum AcpiNfitType type, - int (*cb)(void *, void *), void *arg); int nvdimm_spa_init(struct SPA_mapping *spa, ACPI_NFIT_SYSTEM_ADDRESS *nfitaddr, enum SPA_mapping_type spa_type); void nvdimm_spa_fini(struct SPA_mapping *spa); diff --git a/sys/modules/nvdimm/Makefile b/sys/modules/nvdimm/Makefile index 5381345a9486..77fa6453ecbc 100644 --- a/sys/modules/nvdimm/Makefile +++ b/sys/modules/nvdimm/Makefile @@ -4,6 +4,7 @@ KMOD= nvdimm SRCS= nvdimm.c \ + nvdimm_nfit.c \ nvdimm_spa.c SRCS+= acpi_if.h bus_if.h device_if.h From 90f2d5012a44f34fc23a7d1743336266960510b7 Mon Sep 17 00:00:00 2001 From: Brooks Davis Date: Thu, 31 Jan 2019 22:58:17 +0000 Subject: [PATCH 47/90] Regen after r342190. Differential Revision: https://reviews.freebsd.org/D18444 --- sys/kern/systrace_args.c | 4 ++-- sys/sys/sysproto.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/sys/kern/systrace_args.c b/sys/kern/systrace_args.c index d570f0a50fbc..196d13491a98 100644 --- a/sys/kern/systrace_args.c +++ b/sys/kern/systrace_args.c @@ -972,7 +972,7 @@ systrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args) uarg[1] = p->namelen; /* u_int */ uarg[2] = (intptr_t) p->old; /* void * */ uarg[3] = (intptr_t) p->oldlenp; /* size_t * */ - uarg[4] = (intptr_t) p->new; /* void * */ + uarg[4] = (intptr_t) p->new; /* const void * */ uarg[5] = p->newlen; /* size_t */ *n_args = 6; break; @@ -4867,7 +4867,7 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) p = "userland size_t *"; break; case 4: - p = "userland void *"; + p = "userland const void *"; break; case 5: p = "size_t"; diff --git a/sys/sys/sysproto.h b/sys/sys/sysproto.h index 58bbd715d061..d16a92a3b68c 100644 --- a/sys/sys/sysproto.h +++ b/sys/sys/sysproto.h @@ -560,7 +560,7 @@ struct sysctl_args { char namelen_l_[PADL_(u_int)]; u_int namelen; char namelen_r_[PADR_(u_int)]; char old_l_[PADL_(void *)]; void * old; char old_r_[PADR_(void *)]; char oldlenp_l_[PADL_(size_t *)]; size_t * oldlenp; char oldlenp_r_[PADR_(size_t *)]; - char new_l_[PADL_(void *)]; void * new; char new_r_[PADR_(void *)]; + char new_l_[PADL_(const void *)]; const void * new; char new_r_[PADR_(const void *)]; char newlen_l_[PADL_(size_t)]; size_t newlen; char newlen_r_[PADR_(size_t)]; }; struct mlock_args { From b252313f0b3a4659c02e61d3a0bba471c89bcfa9 Mon Sep 17 00:00:00 2001 From: Gleb Smirnoff Date: Thu, 31 Jan 2019 23:01:03 +0000 Subject: [PATCH 48/90] New pfil(9) KPI together with newborn pfil API and control utility. The KPI have been reviewed and cleansed of features that were planned back 20 years ago and never implemented. The pfil(9) internals have been made opaque to protocols with only returned types and function declarations exposed. The KPI is made more strict, but at the same time more extensible, as kernel uses same command structures that userland ioctl uses. In nutshell [KA]PI is about declaring filtering points, declaring filters and linking and unlinking them together. New [KA]PI makes it possible to reconfigure pfil(9) configuration: change order of hooks, rehook filter from one filtering point to a different one, disconnect a hook on output leaving it on input only, prepend/append a filter to existing list of filters. Now it possible for a single packet filter to provide multiple rulesets that may be linked to different points. Think of per-interface ACLs in Cisco or Juniper. None of existing packet filters yet support that, however limited usage is already possible, e.g. default ruleset can be moved to single interface, as soon as interface would pride their filtering points. Another future feature is possiblity to create pfil heads, that provide not an mbuf pointer but just a memory pointer with length. That would allow filtering at very early stages of a packet lifecycle, e.g. when packet has just been received by a NIC and no mbuf was yet allocated. Differential Revision: https://reviews.freebsd.org/D18951 --- ObsoleteFiles.inc | 6 + sbin/Makefile | 1 + sbin/pfilctl/Makefile | 9 + sbin/pfilctl/pfilctl.8 | 117 +++ sbin/pfilctl/pfilctl.c | 230 +++++ share/man/man9/Makefile | 6 +- share/man/man9/pfil.9 | 293 ++---- sys/contrib/ipfilter/netinet/ip_fil_freebsd.c | 115 ++- sys/net/if_bridge.c | 104 +- sys/net/if_enc.c | 10 +- sys/net/if_ethersubr.c | 41 +- sys/net/if_var.h | 5 +- sys/net/pfil.c | 892 +++++++++++------- sys/net/pfil.h | 213 +++-- sys/netinet/ip_fastfwd.c | 16 +- sys/netinet/ip_input.c | 25 +- sys/netinet/ip_output.c | 15 +- sys/netinet/ip_var.h | 5 +- sys/netinet/siftr.c | 70 +- sys/netinet6/ip6_fastfwd.c | 12 +- sys/netinet6/ip6_forward.c | 9 +- sys/netinet6/ip6_input.c | 25 +- sys/netinet6/ip6_output.c | 15 +- sys/netinet6/ip6_var.h | 6 +- sys/netpfil/ipfw/ip_fw_eaction.c | 2 +- sys/netpfil/ipfw/ip_fw_pfil.c | 169 ++-- sys/netpfil/pf/pf_ioctl.c | 160 ++-- 27 files changed, 1576 insertions(+), 995 deletions(-) create mode 100644 sbin/pfilctl/Makefile create mode 100644 sbin/pfilctl/pfilctl.8 create mode 100644 sbin/pfilctl/pfilctl.c diff --git a/ObsoleteFiles.inc b/ObsoleteFiles.inc index 3a872c3ca3ff..1cdb447200a3 100644 --- a/ObsoleteFiles.inc +++ b/ObsoleteFiles.inc @@ -38,6 +38,12 @@ # xargs -n1 | sort | uniq -d; # done +# 20190131: pfil(9) changed +OLD_FILES+=usr/share/man/man9/pfil_hook_get.9 +OLD_FILES+=usr/share/man/man9/pfil_rlock.9 +OLD_FILES+=usr/share/man/man9/pfil_runlock.9 +OLD_FILES+=usr/share/man/man9/pfil_wlock.9 +OLD_FILES+=usr/share/man/man9/pfil_wunlock.9 # 20190126: adv(4) / adw(4) removal OLD_FILES+=usr/share/man/man4/adv.4.gz OLD_FILES+=usr/share/man/man4/adw.4.gz diff --git a/sbin/Makefile b/sbin/Makefile index 4f08a82fe572..2c4b042d91ec 100644 --- a/sbin/Makefile +++ b/sbin/Makefile @@ -52,6 +52,7 @@ SUBDIR=adjkerntz \ newfs_msdos \ nfsiod \ nos-tun \ + pfilctl \ ping \ rcorder \ reboot \ diff --git a/sbin/pfilctl/Makefile b/sbin/pfilctl/Makefile new file mode 100644 index 000000000000..04f0a622ce14 --- /dev/null +++ b/sbin/pfilctl/Makefile @@ -0,0 +1,9 @@ +# $FreeBSD$ + +PROG= pfilctl +SRCS= pfilctl.c +WARNS?= 6 + +MAN= pfilctl.8 + +.include diff --git a/sbin/pfilctl/pfilctl.8 b/sbin/pfilctl/pfilctl.8 new file mode 100644 index 000000000000..d0a50e489a03 --- /dev/null +++ b/sbin/pfilctl/pfilctl.8 @@ -0,0 +1,117 @@ +.\" Copyright (c) 2019 Gleb Smirnoff +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD$ +.\" +.Dd January 28, 2019 +.Dt PFILCTL 8 +.Os +.Sh NAME +.Nm pfilctl +.Nd pfil(9) control utility +.Sh SYNOPSIS +.Nm +.Cm heads +.Nm +.Cm hooks +.Nm +.Cm link +.Aq Fl i | Fl o +.Op Fl a +.Ar hook Ar head +.Nm +.Cm unlink +.Aq Fl i | Fl o +.Ar hook Ar head +.Sh DESCRIPTION +The +.Nm +utility is intended to view and change configuration of the +.Xr pfil 9 +packet filtering hooks and filters on them. +.Sh COMMANDS +.Bl -tag -width "unlink" +.It Cm heads +List available packet filtering points. +.It Cm hooks +List available packet filters. +.It Xo +.Cm link +.Aq Fl i | Fl o +.Op Fl a +.Ar hook Ar head +.Xc +Link +.Ar hook +to +.Ar head . +With the +.Fl i +flag the hook will be connected as input and with +.Fl o +as output hook. +At least one of +.Fl i +or +.Fl o +is required. +By default +.Nm +will prepend the hook in front of other hooks if any present: +new hook will be as close to the wire as possible, so that on input +it will be the first filter and on output it will be the last. +Adding the +.Fl a +flag switches to appending new hook instead of prepending. +.It Xo +.Cm unlink +.Aq Fl i | Fl o +.Ar hook Ar head +.Xc +Unlink +.Ar hook +on +.Ar head . +At least one of +.Fl i +or +.Fl o +is required. +With the +.Fl i +flag the hook will be removed from the input list of hooks +and with +.Fl o +on output list. +.El +.Sh SEE ALSO +.Xr ipfilter 4 , +.Xr ipfw 4 , +.Xr pf 4 , +.Xr pfil 9 +.Sh AUTHORS +.An -nosplit +The +.Nm +utility was written by +.An Gleb Smirnoff Aq Mt glebius@FreeBSD.org . diff --git a/sbin/pfilctl/pfilctl.c b/sbin/pfilctl/pfilctl.c new file mode 100644 index 000000000000..363feabca116 --- /dev/null +++ b/sbin/pfilctl/pfilctl.c @@ -0,0 +1,230 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2019 Gleb Smirnoff + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +static int dev; + +static const char * const typenames[] = { + [PFIL_TYPE_IP4] = "IPv4", + [PFIL_TYPE_IP6] = "IPv6", + [PFIL_TYPE_ETHERNET] = "Ethernet", +}; + +static void listheads(int argc, char *argv[]); +static void listhooks(int argc, char *argv[]); +static void hook(int argc, char *argv[]); +static void help(void); + +static const struct cmd { + const char *cmd_name; + void (*cmd_func)(int argc, char *argv[]); +} cmds[] = { + { "heads", listheads }, + { "hooks", listhooks }, + { "link", hook }, + { "unlink", hook }, + { NULL, NULL }, +}; + +int +main(int argc __unused, char *argv[] __unused) +{ + int cmd = -1; + + if (--argc == 0) + help(); + argv++; + + for (int i = 0; cmds[i].cmd_name != NULL; i++) + if (!strncmp(argv[0], cmds[i].cmd_name, strlen(argv[0]))) { + if (cmd != -1) + errx(1, "ambiguous command: %s", argv[0]); + cmd = i; + } + if (cmd == -1) + errx(1, "unknown command: %s", argv[0]); + + dev = open("/dev/" PFILDEV, O_RDWR); + if (dev == -1) + err(1, "open(%s)", "/dev/" PFILDEV); + + (*cmds[cmd].cmd_func)(argc, argv); + + return (0); +} + +static void +help(void) +{ + extern char *__progname; + + fprintf(stderr, "usage: %s (heads|hooks|link|unlink)\n", __progname); + exit(0); +} + +static void +listheads(int argc __unused, char *argv[] __unused) +{ + struct pfilioc_list plh; + u_int nheads, nhooks, i; + int j, h; + + plh.pio_nheads = 0; + plh.pio_nhooks = 0; + if (ioctl(dev, PFILIOC_LISTHEADS, &plh) != 0) + err(1, "ioctl(PFILIOC_LISTHEADS)"); + +retry: + plh.pio_heads = calloc(plh.pio_nheads, sizeof(struct pfilioc_head)); + if (plh.pio_heads == NULL) + err(1, "malloc"); + plh.pio_hooks = calloc(plh.pio_nhooks, sizeof(struct pfilioc_hook)); + if (plh.pio_hooks == NULL) + err(1, "malloc"); + + nheads = plh.pio_nheads; + nhooks = plh.pio_nhooks; + + if (ioctl(dev, PFILIOC_LISTHEADS, &plh) != 0) + err(1, "ioctl(PFILIOC_LISTHEADS)"); + + if (plh.pio_nheads > nheads || plh.pio_nhooks > nhooks) { + free(plh.pio_heads); + free(plh.pio_hooks); + goto retry; + } + +#define FMTHD "%16s %8s\n" +#define FMTHK "%29s %16s %16s\n" + printf(FMTHD, "Intercept point", "Type"); + for (i = 0, h = 0; i < plh.pio_nheads; i++) { + printf(FMTHD, plh.pio_heads[i].pio_name, + typenames[plh.pio_heads[i].pio_type]); + for (j = 0; j < plh.pio_heads[i].pio_nhooksin; j++, h++) + printf(FMTHK, "In", plh.pio_hooks[h].pio_module, + plh.pio_hooks[h].pio_ruleset); + for (j = 0; j < plh.pio_heads[i].pio_nhooksout; j++, h++) + printf(FMTHK, "Out", plh.pio_hooks[h].pio_module, + plh.pio_hooks[h].pio_ruleset); + } +} + +static void +listhooks(int argc __unused, char *argv[] __unused) +{ + struct pfilioc_list plh; + u_int nhooks, i; + + plh.pio_nhooks = 0; + if (ioctl(dev, PFILIOC_LISTHEADS, &plh) != 0) + err(1, "ioctl(PFILIOC_LISTHEADS)"); +retry: + plh.pio_hooks = calloc(plh.pio_nhooks, sizeof(struct pfilioc_hook)); + if (plh.pio_hooks == NULL) + err(1, "malloc"); + + nhooks = plh.pio_nhooks; + + if (ioctl(dev, PFILIOC_LISTHOOKS, &plh) != 0) + err(1, "ioctl(PFILIOC_LISTHOOKS)"); + + if (plh.pio_nhooks > nhooks) { + free(plh.pio_hooks); + goto retry; + } + + printf("Available hooks:\n"); + for (i = 0; i < plh.pio_nhooks; i++) { + printf("\t%s:%s %s\n", plh.pio_hooks[i].pio_module, + plh.pio_hooks[i].pio_ruleset, + typenames[plh.pio_hooks[i].pio_type]); + } +} + +static void +hook(int argc, char *argv[]) +{ + struct pfilioc_link req; + int c; + char *ruleset; + + if (argv[0][0] == 'u') + req.pio_flags = PFIL_UNLINK; + else + req.pio_flags = 0; + + while ((c = getopt(argc, argv, "ioa")) != -1) + switch (c) { + case 'i': + req.pio_flags |= PFIL_IN; + break; + case 'o': + req.pio_flags |= PFIL_OUT; + break; + case 'a': + req.pio_flags |= PFIL_APPEND; + break; + default: + help(); + } + + if (!PFIL_DIR(req.pio_flags)) + help(); + + argc -= optind; + argv += optind; + + if (argc != 2) + help(); + + /* link mod:ruleset head */ + if ((ruleset = strchr(argv[0], ':')) == NULL) + help(); + *ruleset = '\0'; + ruleset++; + + strlcpy(req.pio_name, argv[1], sizeof(req.pio_name)); + strlcpy(req.pio_module, argv[0], sizeof(req.pio_module)); + strlcpy(req.pio_ruleset, ruleset, sizeof(req.pio_ruleset)); + + if (ioctl(dev, PFILIOC_LINK, &req) != 0) + err(1, "ioctl(PFILIOC_LINK)"); +} diff --git a/share/man/man9/Makefile b/share/man/man9/Makefile index 3034c24c6adb..bd816bf519fa 100644 --- a/share/man/man9/Makefile +++ b/share/man/man9/Makefile @@ -1635,13 +1635,9 @@ MLINKS+=pci_iov_schema.9 pci_iov_schema_alloc_node.9 \ MLINKS+=pfil.9 pfil_add_hook.9 \ pfil.9 pfil_head_register.9 \ pfil.9 pfil_head_unregister.9 \ - pfil.9 pfil_hook_get.9 \ pfil.9 pfil_remove_hook.9 \ - pfil.9 pfil_rlock.9 \ pfil.9 pfil_run_hooks.9 \ - pfil.9 pfil_runlock.9 \ - pfil.9 pfil_wlock.9 \ - pfil.9 pfil_wunlock.9 + pfil.9 pfil_link.9 MLINKS+=pfind.9 zpfind.9 MLINKS+=PHOLD.9 PRELE.9 \ PHOLD.9 _PHOLD.9 \ diff --git a/share/man/man9/pfil.9 b/share/man/man9/pfil.9 index 843191e0b4ab..c2186cf1b540 100644 --- a/share/man/man9/pfil.9 +++ b/share/man/man9/pfil.9 @@ -1,5 +1,6 @@ .\" $NetBSD: pfil.9,v 1.22 2003/07/01 13:04:06 wiz Exp $ .\" +.\" Copyright (c) 2019 Gleb Smirnoff .\" Copyright (c) 1996 Matthew R. Green .\" All rights reserved. .\" @@ -28,194 +29,127 @@ .\" .\" $FreeBSD$ .\" -.Dd March 10, 2018 +.Dd January 28, 2019 .Dt PFIL 9 .Os .Sh NAME .Nm pfil , .Nm pfil_head_register , .Nm pfil_head_unregister , -.Nm pfil_head_get , -.Nm pfil_add_hook , -.Nm pfil_add_hook_flags , -.Nm pfil_remove_hook , -.Nm pfil_remove_hook_flags , -.Nm pfil_run_hooks , -.Nm pfil_rlock , -.Nm pfil_runlock , -.Nm pfil_wlock , -.Nm pfil_wunlock +.Nm pfil_link , +.Nm pfil_run_hooks .Nd packet filter interface .Sh SYNOPSIS .In sys/param.h .In sys/mbuf.h -.In net/if.h .In net/pfil.h -.Bd -literal -typedef int (*pfil_func_t)(void *arg, struct mbuf **mp, struct ifnet *, int dir, struct inpcb); -.Bd -literal -typedef int (*pfil_func_flags_t)(void *arg, struct mbuf **mp, struct ifnet *, int dir, int flags, struct inpcb); -.Ft int -.Fn pfil_head_register "struct pfil_head *head" -.Ft int -.Fn pfil_head_unregister "struct pfil_head *head" -.Ft "struct pfil_head *" -.Fn pfil_head_get "int af" "u_long dlt" -.Ft int -.Fn pfil_add_hook "pfil_func_t" "void *arg" "struct pfil_head *" -.Ft int -.Fn pfil_add_hook_flags "pfil_func_flags_t" "void *arg" "int flags" "struct pfil_head *" -.Ft int -.Fn pfil_remove_hook "pfil_func_t" "void *arg" "struct pfil_head *" -.Ft int -.Fn pfil_remove_hook_flags "pfil_func_flags_t" "void *arg" "int flags" "struct pfil_head *" -.Ft int -.Fn pfil_run_hooks "struct pfil_head *head" "struct mbuf **mp" "struct ifnet *" "int dir" "int flags" "struct inpcb *" +.Ft pfil_head_t +.Fn pfil_head_register "struct pfil_head_args *args" .Ft void -.Fn pfil_rlock "struct pfil_head *" "struct rm_priotracker *" +.Fn pfil_head_unregister "struct pfil_head_t *head" +.Ft pfil_hook_t +.Fn pfil_add_hook "struct pfil_hook_args *" .Ft void -.Fn pfil_runlock "struct pfil_head *" "struct rm_priotracker *" -.Ft void -.Fn pfil_wlock "struct pfil_head *" -.Ft void -.Fn pfil_wunlock "struct pfil_head *" -.Ed +.Fn pfil_remove_hook "pfil_hook_t" +.Ft int +.Fn pfil_link "struct pfil_link_args *args" +.Ft int +.Fn pfil_run_hooks "phil_head_t *" "pfil_packet_t" "struct ifnet *" "int" "struct inpcb *" .Sh DESCRIPTION The .Nm -framework allows for a specified function to be invoked for every -incoming or outgoing packet for a particular network I/O stream. +framework allows for a specified function or a list of functions +to be invoked for every incoming or outgoing packet for a particular +network I/O stream. These hooks may be used to implement a firewall or perform packet transformations. .Pp -Packet filtering points are registered with +Packet filtering points, for historical reasons named +.Em heads , +are registered with .Fn pfil_head_register . -Filtering points are identified by a key -.Pq Vt "void *" -and a data link type -.Pq Vt int -in the -.Vt pfil_head -structure. -Packet filters use the key and data link type to look up the filtering -point with which they register themselves. -The key is unique to the filtering point. -The data link type is a -.Xr bpf 4 -DLT constant indicating what kind of header is present on the packet -at the filtering point. -Each filtering point uses common per-VNET rmlock by default. -This can be changed by specifying -.Vt PFIL_FLAG_PRIVATE_LOCK -as -.Vt "flags" -field in the -.Vt pfil_head -structure. -Note that specifying private lock can break filters sharing the same -ruleset and/or state between different data link types. -Filtering points may be unregistered with the -.Fn pfil_head_unregister -function. +The function is supplied with special versioned +.Vt struct pfil_head_args +structure that specifies type and features of the head as well as +human readable name. +If the filtering point to be ever destroyed, the subsystem that +created it must unregister it with call to +.Fn pfil_head_unregister . .Pp -Packet filters register/unregister themselves with a filtering point -with the +Packet filtering systems may register arbitrary number of filters, +for historical reasons named +.Em hooks . +To register a new hook .Fn pfil_add_hook -and +with special versioned +.Vt struct pfil_hook_args +structure is called. +The structure specifies type and features of the hook, pointer to +the actual filtering function and user readable name of the filtering +module and ruleset name. +Later hooks can be removed with .Fn pfil_remove_hook -functions, respectively. -.I -The head is looked up using the -.Fn pfil_head_get -function, which takes the key and data link type that the packet filter -expects. -Filters may provide an argument to be passed to the filter when -invoked on a packet. -.Pp -When a filter is invoked, the packet appears just as if it -.Dq came off the wire . -That is, all protocol fields are in network byte order. -The filter is called with its specified argument, the pointer to the -pointer to the -.Vt mbuf -containing the packet, the pointer to the network -interface that the packet is traversing, and the direction -.Dv ( PFIL_IN -or -.Dv PFIL_OUT ) -that the packet is traveling. -The -.Vt flags -argument will indicate if an outgoing packet is simply being forwarded with the -value PFIL_FWD. -The filter may change which mbuf the -.Vt "mbuf\ **" -argument references. -The filter returns an error (errno) if the packet processing is to stop, or 0 -if the processing is to continue. -If the packet processing is to stop, it is the responsibility of the -filter to free the packet. -.Pp -Every filter hook is called with -.Nm -read lock held. -All heads uses the same lock within the same VNET instance. -Packet filter can use this lock instead of own locking model to -improve performance. -Since -.Nm -uses -.Xr rmlock 9 -.Fn pfil_rlock -and -.Fn pfil_runlock -require -.Va struct rm_priotracker -to be passed as argument. -Filter can acquire and release writer lock via -.Fn pfil_wlock -and -.Fn pfil_wunlock functions. -See -.Xr rmlock 9 -for more details. -.Sh FILTERING POINTS -Currently, filtering points are implemented for the following link types: .Pp -.Bl -tag -width "AF_INET6" -offset XXX -compact -.It AF_INET +To connect existing +.Em hook +to an existing +.Em head +function +.Fn pfil_link +shall be used. +The function is supplied with versioned +.Vt struct pfil_link_args +structure that specifies either literal names of hook and head or +pointers to them. +Typically +.Fn pfil_link +is called by filtering modules to autoregister their default ruleset +and default filtering points. +It also serves on the kernel side of +.Xr ioctl 2 +when user changes +.Nm +configuration with help of +.Xr pfilctl 8 +utility. +.Pp +For every packet traveling through a +.Em head +the latter shall invoke +.Fn pfil_run_hooks . +The function can accept either +.Vt struct mbuf * +pointer or a +.Vt void * +pointer and length. +In case if a hooked filtering module cannot understand +.Vt void * +pointer +.Nm +will provide it with a fake one. +All calls to +.Fn pfil_run_hooks +are performed in network +.Xr epoch 9 . +.Sh HEADS (filtering points) +By default kernel creates the following heads: +.Bl -tag -width "ethernet" +.It inet IPv4 packets. -.It AF_INET6 +.It inet6 IPv6 packets. -.It AF_LINK +.It ethernet Link-layer packets. .El -.Sh RETURN VALUES -If successful, -.Fn pfil_head_get -returns the -.Vt pfil_head -structure for the given key/dlt. -The -.Fn pfil_add_hook -and -.Fn pfil_remove_hook -functions -return 0 if successful. -If called with flag -.Dv PFIL_WAITOK , -.Fn pfil_remove_hook -is expected to always succeed. .Pp -The -.Fn pfil_head_unregister -function -might sleep! +Default rulesets are automatically linked to these heads to preserve +historical behavavior. .Sh SEE ALSO -.Xr bpf 4 , -.Xr if_bridge 4 , -.Xr rmlock 9 +.Xr ipfilter 4 , +.Xr ipfw 4 , +.Xr pf 4 , +.Xr pfilctl 8 .Sh HISTORY The .Nm @@ -223,45 +157,8 @@ interface first appeared in .Nx 1.3 . The .Nm -input and output lists were originally implemented as -.In sys/queue.h -.Dv LIST -structures; -however this was changed in -.Nx 1.4 -to -.Dv TAILQ -structures. -This change was to allow the input and output filters to be processed in -reverse order, to allow the same path to be taken, in or out of the kernel. -.Pp -The -.Nm -interface was changed in 1.4T to accept a 3rd parameter to both -.Fn pfil_add_hook -and -.Fn pfil_remove_hook , -introducing the capability of per-protocol filtering. -This was done primarily in order to support filtering of IPv6. -.Pp -In 1.5K, the -.Nm -framework was changed to work with an arbitrary number of filtering points, -as well as be less IP-centric. -.Pp -Fine-grained locking was added in +interface was imported into .Fx 5.2 . -.Nm -lock export was added in -.Fx 10.0 . -.Sh BUGS -When a -.Vt pfil_head -is being modified, no traffic is diverted -(to avoid deadlock). -This means that traffic may be dropped unconditionally for a short period -of time. -.Fn pfil_run_hooks -will return -.Er ENOBUFS -to indicate this. +In +.Fx 13.0 +the interface was significantly rewritten. diff --git a/sys/contrib/ipfilter/netinet/ip_fil_freebsd.c b/sys/contrib/ipfilter/netinet/ip_fil_freebsd.c index 309027b500cc..292a119e2c43 100644 --- a/sys/contrib/ipfilter/netinet/ip_fil_freebsd.c +++ b/sys/contrib/ipfilter/netinet/ip_fil_freebsd.c @@ -25,6 +25,7 @@ static const char rcsid[] = "@(#)$Id$"; # include "opt_random_ip_id.h" #endif #include +#include #include #include #include @@ -126,32 +127,33 @@ static void ipf_ifevent(arg, ifp) -static int -ipf_check_wrapper(void *arg, struct mbuf **mp, struct ifnet *ifp, int dir) +static pfil_return_t +ipf_check_wrapper(struct mbuf **mp, struct ifnet *ifp, int flags, + void *ruleset __unused, struct inpcb *inp) { struct ip *ip = mtod(*mp, struct ip *); - int rv; + pfil_return_t rv; CURVNET_SET(ifp->if_vnet); - rv = ipf_check(&V_ipfmain, ip, ip->ip_hl << 2, ifp, (dir == PFIL_OUT), - mp); + rv = ipf_check(&V_ipfmain, ip, ip->ip_hl << 2, ifp, (flags & PFIL_OUT), + mp); CURVNET_RESTORE(); - return rv; + return (rv == 0 ? PFIL_PASS : PFIL_DROPPED); } -# ifdef USE_INET6 -# include - -static int -ipf_check_wrapper6(void *arg, struct mbuf **mp, struct ifnet *ifp, int dir) +#ifdef USE_INET6 +static pfil_return_t +ipf_check_wrapper6(struct mbuf **mp, struct ifnet *ifp, int flags, + void *ruleset __unused, struct inpcb *inp) { - int error; + pfil_return_t rv; CURVNET_SET(ifp->if_vnet); - error = ipf_check(&V_ipfmain, mtod(*mp, struct ip *), - sizeof(struct ip6_hdr), ifp, (dir == PFIL_OUT), mp); + rv = ipf_check(&V_ipfmain, mtod(*mp, struct ip *), + sizeof(struct ip6_hdr), ifp, (flags & PFIL_OUT), mp); CURVNET_RESTORE(); - return (error); + + return (rv == 0 ? PFIL_PASS : PFIL_DROPPED); } # endif #if defined(IPFILTER_LKM) @@ -1318,53 +1320,62 @@ ipf_inject(fin, m) return error; } -int ipf_pfil_unhook(void) { - struct pfil_head *ph_inet; -#ifdef USE_INET6 - struct pfil_head *ph_inet6; -#endif +VNET_DEFINE_STATIC(pfil_hook_t, ipf_inet_hook); +VNET_DEFINE_STATIC(pfil_hook_t, ipf_inet6_hook); +#define V_ipf_inet_hook VNET(ipf_inet_hook) +#define V_ipf_inet6_hook VNET(ipf_inet6_hook) - ph_inet = pfil_head_get(PFIL_TYPE_AF, AF_INET); - if (ph_inet != NULL) - pfil_remove_hook((void *)ipf_check_wrapper, NULL, - PFIL_IN|PFIL_OUT|PFIL_WAITOK, ph_inet); -# ifdef USE_INET6 - ph_inet6 = pfil_head_get(PFIL_TYPE_AF, AF_INET6); - if (ph_inet6 != NULL) - pfil_remove_hook((void *)ipf_check_wrapper6, NULL, - PFIL_IN|PFIL_OUT|PFIL_WAITOK, ph_inet6); -# endif +int ipf_pfil_unhook(void) { + + pfil_remove_hook(V_ipf_inet_hook); + +#ifdef USE_INET6 + pfil_remove_hook(V_ipf_inet6_hook); +#endif return (0); } int ipf_pfil_hook(void) { - struct pfil_head *ph_inet; + struct pfil_hook_args pha; + struct pfil_link_args pla; + int error, error6; + + pha.pa_version = PFIL_VERSION; + pha.pa_flags = PFIL_IN | PFIL_OUT; + pha.pa_modname = "ipfilter"; + pha.pa_rulname = "default"; + pha.pa_func = ipf_check_wrapper; + pha.pa_ruleset = NULL; + pha.pa_type = PFIL_TYPE_IP4; + V_ipf_inet_hook = pfil_add_hook(&pha); + #ifdef USE_INET6 - struct pfil_head *ph_inet6; + pha.pa_func = ipf_check_wrapper6; + pha.pa_type = PFIL_TYPE_IP6; + V_ipf_inet6_hook = pfil_add_hook(&pha); #endif - ph_inet = pfil_head_get(PFIL_TYPE_AF, AF_INET); -# ifdef USE_INET6 - ph_inet6 = pfil_head_get(PFIL_TYPE_AF, AF_INET6); -# endif - if (ph_inet == NULL -# ifdef USE_INET6 - && ph_inet6 == NULL -# endif - ) { - return ENODEV; - } + pla.pa_version = PFIL_VERSION; + pla.pa_flags = PFIL_IN | PFIL_OUT | + PFIL_HEADPTR | PFIL_HOOKPTR; + pla.pa_head = V_inet_pfil_head; + pla.pa_hook = V_ipf_inet_hook; + error = pfil_link(&pla); - if (ph_inet != NULL) - pfil_add_hook((void *)ipf_check_wrapper, NULL, - PFIL_IN|PFIL_OUT|PFIL_WAITOK, ph_inet); -# ifdef USE_INET6 - if (ph_inet6 != NULL) - pfil_add_hook((void *)ipf_check_wrapper6, NULL, - PFIL_IN|PFIL_OUT|PFIL_WAITOK, ph_inet6); -# endif - return (0); + error6 = 0; +#ifdef USE_INET6 + pla.pa_head = V_inet6_pfil_head; + pla.pa_hook = V_ipf_inet6_hook; + error6 = pfil_link(&pla); +#endif + + if (error || error6) + error = ENODEV; + else + error = 0; + + return (error); } void diff --git a/sys/net/if_bridge.c b/sys/net/if_bridge.c index 578e0756498b..96dc046840d4 100644 --- a/sys/net/if_bridge.c +++ b/sys/net/if_bridge.c @@ -1970,9 +1970,9 @@ bridge_dummynet(struct mbuf *m, struct ifnet *ifp) return; } - if (PFIL_HOOKED(&V_inet_pfil_hook) + if (PFIL_HOOKED_OUT(V_inet_pfil_head) #ifdef INET6 - || PFIL_HOOKED(&V_inet6_pfil_hook) + || PFIL_HOOKED_OUT(V_inet6_pfil_head) #endif ) { if (bridge_pfil(&m, sc->sc_ifp, ifp, PFIL_OUT) != 0) @@ -2230,9 +2230,9 @@ bridge_forward(struct bridge_softc *sc, struct bridge_iflist *sbif, ETHER_BPF_MTAP(ifp, m); /* run the packet filter */ - if (PFIL_HOOKED(&V_inet_pfil_hook) + if (PFIL_HOOKED_IN(V_inet_pfil_head) #ifdef INET6 - || PFIL_HOOKED(&V_inet6_pfil_hook) + || PFIL_HOOKED_IN(V_inet6_pfil_head) #endif ) { BRIDGE_UNLOCK(sc); @@ -2270,9 +2270,9 @@ bridge_forward(struct bridge_softc *sc, struct bridge_iflist *sbif, BRIDGE_UNLOCK(sc); - if (PFIL_HOOKED(&V_inet_pfil_hook) + if (PFIL_HOOKED_OUT(V_inet_pfil_head) #ifdef INET6 - || PFIL_HOOKED(&V_inet6_pfil_hook) + || PFIL_HOOKED_OUT(V_inet6_pfil_head) #endif ) { if (bridge_pfil(&m, ifp, dst_if, PFIL_OUT) != 0) @@ -2409,7 +2409,7 @@ bridge_input(struct ifnet *ifp, struct mbuf *m) #ifdef INET6 # define OR_PFIL_HOOKED_INET6 \ - || PFIL_HOOKED(&V_inet6_pfil_hook) + || PFIL_HOOKED_IN(V_inet6_pfil_head) #else # define OR_PFIL_HOOKED_INET6 #endif @@ -2427,7 +2427,7 @@ bridge_input(struct ifnet *ifp, struct mbuf *m) if_inc_counter(iface, IFCOUNTER_IBYTES, m->m_pkthdr.len); \ /* Filter on the physical interface. */ \ if (V_pfil_local_phys && \ - (PFIL_HOOKED(&V_inet_pfil_hook) \ + (PFIL_HOOKED_IN(V_inet_pfil_head) \ OR_PFIL_HOOKED_INET6)) { \ if (bridge_pfil(&m, NULL, ifp, \ PFIL_IN) != 0 || m == NULL) { \ @@ -2517,9 +2517,9 @@ bridge_broadcast(struct bridge_softc *sc, struct ifnet *src_if, } /* Filter on the bridge interface before broadcasting */ - if (runfilt && (PFIL_HOOKED(&V_inet_pfil_hook) + if (runfilt && (PFIL_HOOKED_OUT(V_inet_pfil_head) #ifdef INET6 - || PFIL_HOOKED(&V_inet6_pfil_hook) + || PFIL_HOOKED_OUT(V_inet6_pfil_head) #endif )) { if (bridge_pfil(&m, sc->sc_ifp, NULL, PFIL_OUT) != 0) @@ -2564,9 +2564,9 @@ bridge_broadcast(struct bridge_softc *sc, struct ifnet *src_if, * pointer so we do not redundantly filter on the bridge for * each interface we broadcast on. */ - if (runfilt && (PFIL_HOOKED(&V_inet_pfil_hook) + if (runfilt && (PFIL_HOOKED_OUT(V_inet_pfil_head) #ifdef INET6 - || PFIL_HOOKED(&V_inet6_pfil_hook) + || PFIL_HOOKED_OUT(V_inet6_pfil_head) #endif )) { if (used == 0) { @@ -3101,6 +3101,7 @@ bridge_pfil(struct mbuf **mp, struct ifnet *bifp, struct ifnet *ifp, int dir) struct ip *ip; struct llc llc1; u_int16_t ether_type; + pfil_return_t rv; snap = 0; error = -1; /* Default error if not error == 0 */ @@ -3172,14 +3173,14 @@ bridge_pfil(struct mbuf **mp, struct ifnet *bifp, struct ifnet *ifp, int dir) } /* Run the packet through pfil before stripping link headers */ - if (PFIL_HOOKED(&V_link_pfil_hook) && V_pfil_ipfw != 0 && - dir == PFIL_OUT && ifp != NULL) { - - error = pfil_run_hooks(&V_link_pfil_hook, mp, ifp, dir, 0, - NULL); - - if (*mp == NULL || error != 0) /* packet consumed by filter */ - return (error); + if (PFIL_HOOKED_OUT(V_link_pfil_head) && V_pfil_ipfw != 0 && + dir == PFIL_OUT && ifp != NULL) { + switch (pfil_run_hooks(V_link_pfil_head, mp, ifp, dir, NULL)) { + case PFIL_DROPPED: + return (EPERM); + case PFIL_CONSUMED: + return (0); + } } /* Strip off the Ethernet header and keep a copy. */ @@ -3217,6 +3218,7 @@ bridge_pfil(struct mbuf **mp, struct ifnet *bifp, struct ifnet *ifp, int dir) /* * Run the packet through pfil */ + rv = PFIL_PASS; switch (ether_type) { case ETHERTYPE_IP: /* @@ -3226,25 +3228,19 @@ bridge_pfil(struct mbuf **mp, struct ifnet *bifp, struct ifnet *ifp, int dir) * Keep the order: * in_if -> bridge_if -> out_if */ - if (V_pfil_bridge && dir == PFIL_OUT && bifp != NULL) - error = pfil_run_hooks(&V_inet_pfil_hook, mp, bifp, - dir, 0, NULL); - - if (*mp == NULL || error != 0) /* filter may consume */ + if (V_pfil_bridge && dir == PFIL_OUT && bifp != NULL && (rv = + pfil_run_hooks(V_inet_pfil_head, mp, bifp, dir, NULL)) != + PFIL_PASS) break; - if (V_pfil_member && ifp != NULL) - error = pfil_run_hooks(&V_inet_pfil_hook, mp, ifp, - dir, 0, NULL); - - if (*mp == NULL || error != 0) /* filter may consume */ + if (V_pfil_member && ifp != NULL && (rv = + pfil_run_hooks(V_inet_pfil_head, mp, ifp, dir, NULL)) != + PFIL_PASS) break; - if (V_pfil_bridge && dir == PFIL_IN && bifp != NULL) - error = pfil_run_hooks(&V_inet_pfil_hook, mp, bifp, - dir, 0, NULL); - - if (*mp == NULL || error != 0) /* filter may consume */ + if (V_pfil_bridge && dir == PFIL_IN && bifp != NULL && (rv = + pfil_run_hooks(V_inet_pfil_head, mp, bifp, dir, NULL)) != + PFIL_PASS) break; /* check if we need to fragment the packet */ @@ -3280,34 +3276,32 @@ bridge_pfil(struct mbuf **mp, struct ifnet *bifp, struct ifnet *ifp, int dir) break; #ifdef INET6 case ETHERTYPE_IPV6: - if (V_pfil_bridge && dir == PFIL_OUT && bifp != NULL) - error = pfil_run_hooks(&V_inet6_pfil_hook, mp, bifp, - dir, 0, NULL); - - if (*mp == NULL || error != 0) /* filter may consume */ + if (V_pfil_bridge && dir == PFIL_OUT && bifp != NULL && (rv = + pfil_run_hooks(V_inet6_pfil_head, mp, bifp, dir, NULL)) != + PFIL_PASS) break; - if (V_pfil_member && ifp != NULL) - error = pfil_run_hooks(&V_inet6_pfil_hook, mp, ifp, - dir, 0, NULL); - - if (*mp == NULL || error != 0) /* filter may consume */ + if (V_pfil_member && ifp != NULL && (rv = + pfil_run_hooks(V_inet6_pfil_head, mp, ifp, dir, NULL)) != + PFIL_PASS) break; - if (V_pfil_bridge && dir == PFIL_IN && bifp != NULL) - error = pfil_run_hooks(&V_inet6_pfil_hook, mp, bifp, - dir, 0, NULL); + if (V_pfil_bridge && dir == PFIL_IN && bifp != NULL && (rv = + pfil_run_hooks(V_inet6_pfil_head, mp, bifp, dir, NULL)) != + PFIL_PASS) + break; break; #endif - default: - error = 0; - break; } - if (*mp == NULL) - return (error); - if (error != 0) - goto bad; + switch (rv) { + case PFIL_CONSUMED: + return (0); + case PFIL_DROPPED: + return (EPERM); + default: + break; + } error = -1; diff --git a/sys/net/if_enc.c b/sys/net/if_enc.c index 01e416535dbd..7bb196b672c1 100644 --- a/sys/net/if_enc.c +++ b/sys/net/if_enc.c @@ -285,24 +285,24 @@ enc_hhook(int32_t hhook_type, int32_t hhook_id, void *udata, void *ctx_data, switch (hhook_id) { #ifdef INET case AF_INET: - ph = &V_inet_pfil_hook; + ph = V_inet_pfil_head; break; #endif #ifdef INET6 case AF_INET6: - ph = &V_inet6_pfil_hook; + ph = V_inet6_pfil_head; break; #endif default: ph = NULL; } - if (ph == NULL || !PFIL_HOOKED(ph)) + if (ph == NULL || (pdir == PFIL_OUT && !PFIL_HOOKED_OUT(ph)) || + (pdir == PFIL_IN && !PFIL_HOOKED_IN(ph))) return (0); /* Make a packet looks like it was received on enc(4) */ rcvif = (*ctx->mp)->m_pkthdr.rcvif; (*ctx->mp)->m_pkthdr.rcvif = ifp; - if (pfil_run_hooks(ph, ctx->mp, ifp, pdir, 0, ctx->inp) != 0 || - *ctx->mp == NULL) { + if (pfil_run_hooks(ph, ctx->mp, ifp, pdir, ctx->inp) != PFIL_PASS) { *ctx->mp = NULL; /* consumed by filter */ return (EACCES); } diff --git a/sys/net/if_ethersubr.c b/sys/net/if_ethersubr.c index 628e32b17642..ff7705b2e21d 100644 --- a/sys/net/if_ethersubr.c +++ b/sys/net/if_ethersubr.c @@ -90,7 +90,7 @@ CTASSERT(sizeof (struct ether_header) == ETHER_ADDR_LEN * 2 + 2); CTASSERT(sizeof (struct ether_addr) == ETHER_ADDR_LEN); #endif -VNET_DEFINE(struct pfil_head, link_pfil_hook); /* Packet filter hooks */ +VNET_DEFINE(pfil_head_t, link_pfil_head); /* Packet filter hooks */ /* netgraph node hooks for ng_ether(4) */ void (*ng_ether_input_p)(struct ifnet *ifp, struct mbuf **mp); @@ -457,7 +457,6 @@ ether_set_pcp(struct mbuf **mp, struct ifnet *ifp, uint8_t pcp) int ether_output_frame(struct ifnet *ifp, struct mbuf *m) { - int error; uint8_t pcp; pcp = ifp->if_pcp; @@ -465,15 +464,14 @@ ether_output_frame(struct ifnet *ifp, struct mbuf *m) !ether_set_pcp(&m, ifp, pcp)) return (0); - if (PFIL_HOOKED(&V_link_pfil_hook)) { - error = pfil_run_hooks(&V_link_pfil_hook, &m, ifp, - PFIL_OUT, 0, NULL); - if (error != 0) + if (PFIL_HOOKED_OUT(V_link_pfil_head)) + switch (pfil_run_hooks(V_link_pfil_head, &m, ifp, PFIL_OUT, + NULL)) { + case PFIL_DROPPED: return (EACCES); - - if (m == NULL) + case PFIL_CONSUMED: return (0); - } + } #ifdef EXPERIMENTAL #if defined(INET6) && defined(INET) @@ -737,14 +735,14 @@ SYSINIT(ether, SI_SUB_INIT_IF, SI_ORDER_ANY, ether_init, NULL); static void vnet_ether_init(__unused void *arg) { - int i; + struct pfil_head_args args; + + args.pa_version = PFIL_VERSION; + args.pa_flags = PFIL_IN | PFIL_OUT; + args.pa_type = PFIL_TYPE_ETHERNET; + args.pa_headname = PFIL_ETHER_NAME; + V_link_pfil_head = pfil_head_register(&args); - /* Initialize packet filter hooks. */ - V_link_pfil_hook.ph_type = PFIL_TYPE_AF; - V_link_pfil_hook.ph_af = AF_LINK; - if ((i = pfil_head_register(&V_link_pfil_hook)) != 0) - printf("%s: WARNING: unable to register pfil link hook, " - "error %d\n", __func__, i); #ifdef VIMAGE netisr_register_vnet(ðer_nh); #endif @@ -756,11 +754,8 @@ VNET_SYSINIT(vnet_ether_init, SI_SUB_PROTO_IF, SI_ORDER_ANY, static void vnet_ether_pfil_destroy(__unused void *arg) { - int i; - if ((i = pfil_head_unregister(&V_link_pfil_hook)) != 0) - printf("%s: WARNING: unable to unregister pfil link hook, " - "error %d\n", __func__, i); + pfil_head_unregister(V_link_pfil_head); } VNET_SYSUNINIT(vnet_ether_pfil_uninit, SI_SUB_PROTO_PFIL, SI_ORDER_ANY, vnet_ether_pfil_destroy, NULL); @@ -818,10 +813,8 @@ ether_demux(struct ifnet *ifp, struct mbuf *m) KASSERT(ifp != NULL, ("%s: NULL interface pointer", __func__)); /* Do not grab PROMISC frames in case we are re-entered. */ - if (PFIL_HOOKED(&V_link_pfil_hook) && !(m->m_flags & M_PROMISC)) { - i = pfil_run_hooks(&V_link_pfil_hook, &m, ifp, PFIL_IN, 0, - NULL); - + if (PFIL_HOOKED_IN(V_link_pfil_head) && !(m->m_flags & M_PROMISC)) { + i = pfil_run_hooks(V_link_pfil_head, &m, ifp, PFIL_IN, NULL); if (i != 0 || m == NULL) return; } diff --git a/sys/net/if_var.h b/sys/net/if_var.h index 42241e74fb81..22cac21b2136 100644 --- a/sys/net/if_var.h +++ b/sys/net/if_var.h @@ -95,8 +95,9 @@ CK_STAILQ_HEAD(ifmultihead, ifmultiaddr); CK_STAILQ_HEAD(ifgrouphead, ifg_group); #ifdef _KERNEL -VNET_DECLARE(struct pfil_head, link_pfil_hook); /* packet filter hooks */ -#define V_link_pfil_hook VNET(link_pfil_hook) +VNET_DECLARE(struct pfil_head *, link_pfil_head); +#define V_link_pfil_head VNET(link_pfil_head) +#define PFIL_ETHER_NAME "ethernet" #define HHOOK_IPSEC_INET 0 #define HHOOK_IPSEC_INET6 1 diff --git a/sys/net/pfil.c b/sys/net/pfil.c index 96069123a935..acfb25467fee 100644 --- a/sys/net/pfil.c +++ b/sys/net/pfil.c @@ -4,6 +4,7 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * + * Copyright (c) 2019 Gleb Smirnoff * Copyright (c) 1996 Matthew R. Green * All rights reserved. * @@ -32,15 +33,15 @@ */ #include +#include #include +#include #include #include #include -#include #include #include #include -#include #include #include #include @@ -50,180 +51,167 @@ #include #include -static struct mtx pfil_global_lock; +static MALLOC_DEFINE(M_PFIL, "pfil", "pfil(9) packet filter hooks"); -MTX_SYSINIT(pfil_heads_lock, &pfil_global_lock, "pfil_head_list lock", - MTX_DEF); +static int pfil_ioctl(struct cdev *, u_long, caddr_t, int, struct thread *); +static struct cdevsw pfil_cdevsw = { + .d_ioctl = pfil_ioctl, + .d_name = PFILDEV, + .d_version = D_VERSION, +}; +static struct cdev *pfil_dev; -static struct packet_filter_hook *pfil_chain_get(int, struct pfil_head *); -static int pfil_chain_add(pfil_chain_t *, struct packet_filter_hook *, int); -static int pfil_chain_remove(pfil_chain_t *, void *, void *); -static int pfil_add_hook_priv(void *, void *, int, struct pfil_head *, bool); +static struct mtx pfil_lock; +MTX_SYSINIT(pfil_mtxinit, &pfil_lock, "pfil(9) lock", MTX_DEF); +#define PFIL_LOCK() mtx_lock(&pfil_lock) +#define PFIL_UNLOCK() mtx_unlock(&pfil_lock) +#define PFIL_LOCK_ASSERT() mtx_assert(&pfil_lock, MA_OWNED) + +#define PFIL_EPOCH net_epoch_preempt +#define PFIL_EPOCH_ENTER(et) epoch_enter_preempt(net_epoch_preempt, &(et)) +#define PFIL_EPOCH_EXIT(et) epoch_exit_preempt(net_epoch_preempt, &(et)) + +struct pfil_hook { + pfil_func_t hook_func; + void *hook_ruleset; + int hook_flags; + int hook_links; + enum pfil_types hook_type; + const char *hook_modname; + const char *hook_rulname; + LIST_ENTRY(pfil_hook) hook_list; +}; + +struct pfil_link { + CK_STAILQ_ENTRY(pfil_link) link_chain; + pfil_func_t link_func; + void *link_ruleset; + int link_flags; + struct pfil_hook *link_hook; + struct epoch_context link_epoch_ctx; +}; + +typedef CK_STAILQ_HEAD(pfil_chain, pfil_link) pfil_chain_t; +struct pfil_head { + int head_nhooksin; + int head_nhooksout; + pfil_chain_t head_in; + pfil_chain_t head_out; + int head_flags; + enum pfil_types head_type; + LIST_ENTRY(pfil_head) head_list; + const char *head_name; +}; LIST_HEAD(pfilheadhead, pfil_head); -VNET_DEFINE(struct pfilheadhead, pfil_head_list); +VNET_DEFINE_STATIC(struct pfilheadhead, pfil_head_list) = + LIST_HEAD_INITIALIZER(pfil_head_list); #define V_pfil_head_list VNET(pfil_head_list) -VNET_DEFINE(struct rmlock, pfil_lock); -#define V_pfil_lock VNET(pfil_lock) -#define PFIL_LOCK_INIT_REAL(l, t) \ - rm_init_flags(l, "PFil " t " rmlock", RM_RECURSE) -#define PFIL_LOCK_DESTROY_REAL(l) \ - rm_destroy(l) -#define PFIL_LOCK_INIT(p) do { \ - if ((p)->flags & PFIL_FLAG_PRIVATE_LOCK) { \ - PFIL_LOCK_INIT_REAL(&(p)->ph_lock, "private"); \ - (p)->ph_plock = &(p)->ph_lock; \ - } else \ - (p)->ph_plock = &V_pfil_lock; \ -} while (0) -#define PFIL_LOCK_DESTROY(p) do { \ - if ((p)->flags & PFIL_FLAG_PRIVATE_LOCK) \ - PFIL_LOCK_DESTROY_REAL((p)->ph_plock); \ -} while (0) +LIST_HEAD(pfilhookhead, pfil_hook); +VNET_DEFINE_STATIC(struct pfilhookhead, pfil_hook_list) = + LIST_HEAD_INITIALIZER(pfil_hook_list); +#define V_pfil_hook_list VNET(pfil_hook_list) -#define PFIL_TRY_RLOCK(p, t) rm_try_rlock((p)->ph_plock, (t)) -#define PFIL_RLOCK(p, t) rm_rlock((p)->ph_plock, (t)) -#define PFIL_WLOCK(p) rm_wlock((p)->ph_plock) -#define PFIL_RUNLOCK(p, t) rm_runlock((p)->ph_plock, (t)) -#define PFIL_WUNLOCK(p) rm_wunlock((p)->ph_plock) -#define PFIL_WOWNED(p) rm_wowned((p)->ph_plock) +static struct pfil_link *pfil_link_remove(pfil_chain_t *, pfil_hook_t ); +static void pfil_link_free(epoch_context_t); -#define PFIL_HEADLIST_LOCK() mtx_lock(&pfil_global_lock) -#define PFIL_HEADLIST_UNLOCK() mtx_unlock(&pfil_global_lock) +static __noinline int +pfil_fake_mbuf(pfil_func_t func, void *mem, struct ifnet *ifp, int flags, + void *ruleset, struct inpcb *inp) +{ + struct mbuf m, *mp; + pfil_return_t rv; + + (void)m_init(&m, M_NOWAIT, MT_DATA, M_NOFREE | M_PKTHDR); + m_extadd(&m, mem, PFIL_LENGTH(flags), NULL, NULL, NULL, 0, EXT_RXRING); + m.m_len = m.m_pkthdr.len = PFIL_LENGTH(flags); + mp = &m; + flags &= ~(PFIL_MEMPTR | PFIL_LENMASK); + + rv = func(&mp, ifp, flags, ruleset, inp); + if (rv == PFIL_PASS && mp != &m) { + /* + * Firewalls that need pfil_fake_mbuf() most likely don't + * know to return PFIL_REALLOCED. + */ + rv = PFIL_REALLOCED; + *(struct mbuf **)mem = mp; + } + + return (rv); +} /* * pfil_run_hooks() runs the specified packet filter hook chain. */ int -pfil_run_hooks(struct pfil_head *ph, struct mbuf **mp, struct ifnet *ifp, - int dir, int flags, struct inpcb *inp) +pfil_run_hooks(struct pfil_head *head, pfil_packet_t p, struct ifnet *ifp, + int flags, struct inpcb *inp) { - struct rm_priotracker rmpt; - struct packet_filter_hook *pfh; - struct mbuf *m = *mp; - int rv = 0; + struct epoch_tracker et; + pfil_chain_t *pch; + struct pfil_link *link; + pfil_return_t rv, rvi; - PFIL_RLOCK(ph, &rmpt); - KASSERT(ph->ph_nhooks >= 0, ("Pfil hook count dropped < 0")); - for (pfh = pfil_chain_get(dir, ph); pfh != NULL; - pfh = TAILQ_NEXT(pfh, pfil_chain)) { - if (pfh->pfil_func_flags != NULL) { - rv = (*pfh->pfil_func_flags)(pfh->pfil_arg, &m, ifp, - dir, flags, inp); - if (rv != 0 || m == NULL) - break; - } - if (pfh->pfil_func != NULL) { - rv = (*pfh->pfil_func)(pfh->pfil_arg, &m, ifp, dir, - inp); - if (rv != 0 || m == NULL) - break; + if (PFIL_DIR(flags) == PFIL_IN) + pch = &head->head_in; + else if (__predict_true(PFIL_DIR(flags) == PFIL_OUT)) + pch = &head->head_out; + else + panic("%s: bogus flags %d", __func__, flags); + + rv = PFIL_PASS; + PFIL_EPOCH_ENTER(et); + CK_STAILQ_FOREACH(link, pch, link_chain) { + if ((flags & PFIL_MEMPTR) && !(link->link_flags & PFIL_MEMPTR)) + rvi = pfil_fake_mbuf(link->link_func, p.mem, ifp, + flags, link->link_ruleset, inp); + else + rvi = (*link->link_func)(p, ifp, flags, + link->link_ruleset, inp); + if (rvi == PFIL_DROPPED || rvi == PFIL_CONSUMED) { + rv = rvi; + break; + } else if (rv == PFIL_REALLOCED) { + flags &= ~(PFIL_MEMPTR | PFIL_LENMASK); + rv = rvi; } } - PFIL_RUNLOCK(ph, &rmpt); - *mp = m; - return (rv); -} - -static struct packet_filter_hook * -pfil_chain_get(int dir, struct pfil_head *ph) -{ - - if (dir == PFIL_IN) - return (TAILQ_FIRST(&ph->ph_in)); - else if (dir == PFIL_OUT) - return (TAILQ_FIRST(&ph->ph_out)); - else - return (NULL); -} - -/* - * pfil_try_rlock() acquires rm reader lock for specified head - * if this is immediately possible. - */ -int -pfil_try_rlock(struct pfil_head *ph, struct rm_priotracker *tracker) -{ - - return (PFIL_TRY_RLOCK(ph, tracker)); -} - -/* - * pfil_rlock() acquires rm reader lock for specified head. - */ -void -pfil_rlock(struct pfil_head *ph, struct rm_priotracker *tracker) -{ - - PFIL_RLOCK(ph, tracker); -} - -/* - * pfil_runlock() releases reader lock for specified head. - */ -void -pfil_runlock(struct pfil_head *ph, struct rm_priotracker *tracker) -{ - - PFIL_RUNLOCK(ph, tracker); -} - -/* - * pfil_wlock() acquires writer lock for specified head. - */ -void -pfil_wlock(struct pfil_head *ph) -{ - - PFIL_WLOCK(ph); -} - -/* - * pfil_wunlock() releases writer lock for specified head. - */ -void -pfil_wunlock(struct pfil_head *ph) -{ - - PFIL_WUNLOCK(ph); -} - -/* - * pfil_wowned() returns a non-zero value if the current thread owns - * an exclusive lock. - */ -int -pfil_wowned(struct pfil_head *ph) -{ - - return (PFIL_WOWNED(ph)); + PFIL_EPOCH_EXIT(et); + return (rvi); } /* * pfil_head_register() registers a pfil_head with the packet filter hook * mechanism. */ -int -pfil_head_register(struct pfil_head *ph) +pfil_head_t +pfil_head_register(struct pfil_head_args *pa) { - struct pfil_head *lph; + struct pfil_head *head, *list; - PFIL_HEADLIST_LOCK(); - LIST_FOREACH(lph, &V_pfil_head_list, ph_list) { - if (ph->ph_type == lph->ph_type && - ph->ph_un.phu_val == lph->ph_un.phu_val) { - PFIL_HEADLIST_UNLOCK(); - return (EEXIST); + MPASS(pa->pa_version == PFIL_VERSION); + + head = malloc(sizeof(struct pfil_head), M_PFIL, M_WAITOK); + + head->head_nhooksin = head->head_nhooksout = 0; + head->head_flags = pa->pa_flags; + head->head_type = pa->pa_type; + head->head_name = pa->pa_headname; + CK_STAILQ_INIT(&head->head_in); + CK_STAILQ_INIT(&head->head_out); + + PFIL_LOCK(); + LIST_FOREACH(list, &V_pfil_head_list, head_list) + if (strcmp(pa->pa_headname, list->head_name) == 0) { + printf("pfil: duplicate head \"%s\"\n", + pa->pa_headname); } - } - PFIL_LOCK_INIT(ph); - ph->ph_nhooks = 0; - TAILQ_INIT(&ph->ph_in); - TAILQ_INIT(&ph->ph_out); - LIST_INSERT_HEAD(&V_pfil_head_list, ph, ph_list); - PFIL_HEADLIST_UNLOCK(); - return (0); + LIST_INSERT_HEAD(&V_pfil_head_list, head, head_list); + PFIL_UNLOCK(); + + return (head); } /* @@ -231,245 +219,441 @@ pfil_head_register(struct pfil_head *ph) * mechanism. The producer of the hook promises that all outstanding * invocations of the hook have completed before it unregisters the hook. */ -int -pfil_head_unregister(struct pfil_head *ph) +void +pfil_head_unregister(pfil_head_t ph) { - struct packet_filter_hook *pfh, *pfnext; - - PFIL_HEADLIST_LOCK(); - LIST_REMOVE(ph, ph_list); - PFIL_HEADLIST_UNLOCK(); - TAILQ_FOREACH_SAFE(pfh, &ph->ph_in, pfil_chain, pfnext) - free(pfh, M_IFADDR); - TAILQ_FOREACH_SAFE(pfh, &ph->ph_out, pfil_chain, pfnext) - free(pfh, M_IFADDR); - PFIL_LOCK_DESTROY(ph); - return (0); + struct pfil_link *link, *next; + + PFIL_LOCK(); + LIST_REMOVE(ph, head_list); + + CK_STAILQ_FOREACH_SAFE(link, &ph->head_in, link_chain, next) { + link->link_hook->hook_links--; + free(link, M_PFIL); + } + CK_STAILQ_FOREACH_SAFE(link, &ph->head_out, link_chain, next) { + link->link_hook->hook_links--; + free(link, M_PFIL); + } + PFIL_UNLOCK(); } -/* - * pfil_head_get() returns the pfil_head for a given key/dlt. - */ -struct pfil_head * -pfil_head_get(int type, u_long val) +pfil_hook_t +pfil_add_hook(struct pfil_hook_args *pa) { - struct pfil_head *ph; + struct pfil_hook *hook, *list; - PFIL_HEADLIST_LOCK(); - LIST_FOREACH(ph, &V_pfil_head_list, ph_list) - if (ph->ph_type == type && ph->ph_un.phu_val == val) - break; - PFIL_HEADLIST_UNLOCK(); - return (ph); -} + MPASS(pa->pa_version == PFIL_VERSION); -/* - * pfil_add_hook_flags() adds a function to the packet filter hook. the - * flags are: - * PFIL_IN call me on incoming packets - * PFIL_OUT call me on outgoing packets - * PFIL_ALL call me on all of the above - * PFIL_WAITOK OK to call malloc with M_WAITOK. - */ -int -pfil_add_hook_flags(pfil_func_flags_t func, void *arg, int flags, - struct pfil_head *ph) -{ - return (pfil_add_hook_priv(func, arg, flags, ph, true)); -} + hook = malloc(sizeof(struct pfil_hook), M_PFIL, M_WAITOK | M_ZERO); + hook->hook_func = pa->pa_func; + hook->hook_ruleset = pa->pa_ruleset; + hook->hook_flags = pa->pa_flags; + hook->hook_type = pa->pa_type; + hook->hook_modname = pa->pa_modname; + hook->hook_rulname = pa->pa_rulname; -/* - * pfil_add_hook() adds a function to the packet filter hook. the - * flags are: - * PFIL_IN call me on incoming packets - * PFIL_OUT call me on outgoing packets - * PFIL_ALL call me on all of the above - * PFIL_WAITOK OK to call malloc with M_WAITOK. - */ -int -pfil_add_hook(pfil_func_t func, void *arg, int flags, struct pfil_head *ph) -{ - return (pfil_add_hook_priv(func, arg, flags, ph, false)); + PFIL_LOCK(); + LIST_FOREACH(list, &V_pfil_hook_list, hook_list) + if (strcmp(pa->pa_modname, list->hook_modname) == 0 && + strcmp(pa->pa_rulname, list->hook_rulname) == 0) { + printf("pfil: duplicate hook \"%s:%s\"\n", + pa->pa_modname, pa->pa_rulname); + } + LIST_INSERT_HEAD(&V_pfil_hook_list, hook, hook_list); + PFIL_UNLOCK(); + + return (hook); } static int -pfil_add_hook_priv(void *func, void *arg, int flags, - struct pfil_head *ph, bool hasflags) +pfil_unlink(struct pfil_link_args *pa, pfil_head_t head, pfil_hook_t hook) { - struct packet_filter_hook *pfh1 = NULL; - struct packet_filter_hook *pfh2 = NULL; - int err; + struct pfil_link *in, *out; - if (flags & PFIL_IN) { - pfh1 = (struct packet_filter_hook *)malloc(sizeof(*pfh1), - M_IFADDR, (flags & PFIL_WAITOK) ? M_WAITOK : M_NOWAIT); - if (pfh1 == NULL) { - err = ENOMEM; - goto error; + PFIL_LOCK_ASSERT(); + + if (pa->pa_flags & PFIL_IN) { + in = pfil_link_remove(&head->head_in, hook); + if (in != NULL) { + head->head_nhooksin--; + hook->hook_links--; } - } - if (flags & PFIL_OUT) { - pfh2 = (struct packet_filter_hook *)malloc(sizeof(*pfh1), - M_IFADDR, (flags & PFIL_WAITOK) ? M_WAITOK : M_NOWAIT); - if (pfh2 == NULL) { - err = ENOMEM; - goto error; + } else + in = NULL; + if (pa->pa_flags & PFIL_OUT) { + out = pfil_link_remove(&head->head_out, hook); + if (out != NULL) { + head->head_nhooksout--; + hook->hook_links--; } - } - PFIL_WLOCK(ph); - if (flags & PFIL_IN) { - pfh1->pfil_func_flags = hasflags ? func : NULL; - pfh1->pfil_func = hasflags ? NULL : func; - pfh1->pfil_arg = arg; - err = pfil_chain_add(&ph->ph_in, pfh1, flags & ~PFIL_OUT); - if (err) - goto locked_error; - ph->ph_nhooks++; - } - if (flags & PFIL_OUT) { - pfh2->pfil_func_flags = hasflags ? func : NULL; - pfh2->pfil_func = hasflags ? NULL : func; - pfh2->pfil_arg = arg; - err = pfil_chain_add(&ph->ph_out, pfh2, flags & ~PFIL_IN); - if (err) { - if (flags & PFIL_IN) - pfil_chain_remove(&ph->ph_in, func, arg); - goto locked_error; - } - ph->ph_nhooks++; - } - PFIL_WUNLOCK(ph); - return (0); -locked_error: - PFIL_WUNLOCK(ph); -error: - if (pfh1 != NULL) - free(pfh1, M_IFADDR); - if (pfh2 != NULL) - free(pfh2, M_IFADDR); - return (err); -} + } else + out = NULL; + PFIL_UNLOCK(); -/* - * pfil_remove_hook_flags removes a specific function from the packet filter hook - * chain. - */ -int -pfil_remove_hook_flags(pfil_func_flags_t func, void *arg, int flags, - struct pfil_head *ph) -{ - return (pfil_remove_hook((pfil_func_t)func, arg, flags, ph)); -} + if (in != NULL) + epoch_call(PFIL_EPOCH, &in->link_epoch_ctx, pfil_link_free); + if (out != NULL) + epoch_call(PFIL_EPOCH, &out->link_epoch_ctx, pfil_link_free); -/* - * pfil_remove_hook removes a specific function from the packet filter hook - * chain. - */ -int -pfil_remove_hook(pfil_func_t func, void *arg, int flags, struct pfil_head *ph) -{ - int err = 0; - - PFIL_WLOCK(ph); - if (flags & PFIL_IN) { - err = pfil_chain_remove(&ph->ph_in, func, arg); - if (err == 0) - ph->ph_nhooks--; - } - if ((err == 0) && (flags & PFIL_OUT)) { - err = pfil_chain_remove(&ph->ph_out, func, arg); - if (err == 0) - ph->ph_nhooks--; - } - PFIL_WUNLOCK(ph); - return (err); -} - -/* - * Internal: Add a new pfil hook into a hook chain. - */ -static int -pfil_chain_add(pfil_chain_t *chain, struct packet_filter_hook *pfh1, int flags) -{ - struct packet_filter_hook *pfh; - - /* - * First make sure the hook is not already there. - */ - TAILQ_FOREACH(pfh, chain, pfil_chain) - if (((pfh->pfil_func != NULL && pfh->pfil_func == pfh1->pfil_func) || - (pfh->pfil_func_flags != NULL && - pfh->pfil_func_flags == pfh1->pfil_func_flags)) && - pfh->pfil_arg == pfh1->pfil_arg) - return (EEXIST); - - /* - * Insert the input list in reverse order of the output list so that - * the same path is followed in or out of the kernel. - */ - if (flags & PFIL_IN) - TAILQ_INSERT_HEAD(chain, pfh1, pfil_chain); + if (in == NULL && out == NULL) + return (ENOENT); else - TAILQ_INSERT_TAIL(chain, pfh1, pfil_chain); + return (0); +} + +int +pfil_link(struct pfil_link_args *pa) +{ + struct pfil_link *in, *out, *link; + struct pfil_head *head; + struct pfil_hook *hook; + int error; + + MPASS(pa->pa_version == PFIL_VERSION); + + if ((pa->pa_flags & (PFIL_IN | PFIL_UNLINK)) == PFIL_IN) + in = malloc(sizeof(*in), M_PFIL, M_WAITOK | M_ZERO); + else + in = NULL; + if ((pa->pa_flags & (PFIL_OUT | PFIL_UNLINK)) == PFIL_OUT) + out = malloc(sizeof(*out), M_PFIL, M_WAITOK | M_ZERO); + else + out = NULL; + + PFIL_LOCK(); + if (pa->pa_flags & PFIL_HEADPTR) + head = pa->pa_head; + else + LIST_FOREACH(head, &V_pfil_head_list, head_list) + if (strcmp(pa->pa_headname, head->head_name) == 0) + break; + if (pa->pa_flags & PFIL_HOOKPTR) + hook = pa->pa_hook; + else + LIST_FOREACH(hook, &V_pfil_hook_list, hook_list) + if (strcmp(pa->pa_modname, hook->hook_modname) == 0 && + strcmp(pa->pa_rulname, hook->hook_rulname) == 0) + break; + if (head == NULL || hook == NULL) { + error = ENOENT; + goto fail; + } + + if (pa->pa_flags & PFIL_UNLINK) + return (pfil_unlink(pa, head, hook)); + + if (head->head_type != hook->hook_type || + ((hook->hook_flags & pa->pa_flags) & ~head->head_flags)) { + error = EINVAL; + goto fail; + } + + if (pa->pa_flags & PFIL_IN) + CK_STAILQ_FOREACH(link, &head->head_in, link_chain) + if (link->link_hook == hook) { + error = EEXIST; + goto fail; + } + if (pa->pa_flags & PFIL_OUT) + CK_STAILQ_FOREACH(link, &head->head_out, link_chain) + if (link->link_hook == hook) { + error = EEXIST; + goto fail; + } + + if (pa->pa_flags & PFIL_IN) { + in->link_hook = hook; + in->link_func = hook->hook_func; + in->link_flags = hook->hook_flags; + in->link_ruleset = hook->hook_ruleset; + if (pa->pa_flags & PFIL_APPEND) + CK_STAILQ_INSERT_TAIL(&head->head_in, in, link_chain); + else + CK_STAILQ_INSERT_HEAD(&head->head_in, in, link_chain); + hook->hook_links++; + head->head_nhooksin++; + } + if (pa->pa_flags & PFIL_OUT) { + out->link_hook = hook; + out->link_func = hook->hook_func; + out->link_flags = hook->hook_flags; + out->link_ruleset = hook->hook_ruleset; + if (pa->pa_flags & PFIL_APPEND) + CK_STAILQ_INSERT_HEAD(&head->head_out, out, link_chain); + else + CK_STAILQ_INSERT_TAIL(&head->head_out, out, link_chain); + hook->hook_links++; + head->head_nhooksout++; + } + PFIL_UNLOCK(); + return (0); + +fail: + PFIL_UNLOCK(); + free(in, M_PFIL); + free(out, M_PFIL); + return (error); +} + +static void +pfil_link_free(epoch_context_t ctx) +{ + struct pfil_link *link; + + link = __containerof(ctx, struct pfil_link, link_epoch_ctx); + free(link, M_PFIL); +} + +/* + * pfil_remove_hook removes a filter from all filtering points. + */ +void +pfil_remove_hook(pfil_hook_t hook) +{ + struct pfil_head *head; + struct pfil_link *in, *out; + + PFIL_LOCK(); + LIST_FOREACH(head, &V_pfil_head_list, head_list) { +retry: + in = pfil_link_remove(&head->head_in, hook); + if (in != NULL) { + head->head_nhooksin--; + hook->hook_links--; + epoch_call(PFIL_EPOCH, &in->link_epoch_ctx, + pfil_link_free); + } + out = pfil_link_remove(&head->head_out, hook); + if (out != NULL) { + head->head_nhooksout--; + hook->hook_links--; + epoch_call(PFIL_EPOCH, &out->link_epoch_ctx, + pfil_link_free); + } + if (in != NULL || out != NULL) + /* What if some stupid admin put same filter twice? */ + goto retry; + } + LIST_REMOVE(hook, hook_list); + PFIL_UNLOCK(); + MPASS(hook->hook_links == 0); + free(hook, M_PFIL); } /* * Internal: Remove a pfil hook from a hook chain. */ -static int -pfil_chain_remove(pfil_chain_t *chain, void *func, void *arg) +static struct pfil_link * +pfil_link_remove(pfil_chain_t *chain, pfil_hook_t hook) { - struct packet_filter_hook *pfh; + struct pfil_link *link; - TAILQ_FOREACH(pfh, chain, pfil_chain) - if ((pfh->pfil_func == func || pfh->pfil_func_flags == func) && - pfh->pfil_arg == arg) { - TAILQ_REMOVE(chain, pfh, pfil_chain); - free(pfh, M_IFADDR); - return (0); + PFIL_LOCK_ASSERT(); + + CK_STAILQ_FOREACH(link, chain, link_chain) + if (link->link_hook == hook) { + CK_STAILQ_REMOVE(chain, link, pfil_link, link_chain); + return (link); } - return (ENOENT); + + return (NULL); } -/* - * Stuff that must be initialized for every instance (including the first of - * course). - */ static void -vnet_pfil_init(const void *unused __unused) +pfil_init(const void *unused __unused) { + struct make_dev_args args; + int error; - LIST_INIT(&V_pfil_head_list); - PFIL_LOCK_INIT_REAL(&V_pfil_lock, "shared"); + make_dev_args_init(&args); + args.mda_flags = MAKEDEV_WAITOK | MAKEDEV_CHECKNAME; + args.mda_devsw = &pfil_cdevsw; + args.mda_uid = UID_ROOT; + args.mda_gid = GID_WHEEL; + args.mda_mode = 0600; + error = make_dev_s(&args, &pfil_dev, PFILDEV); + KASSERT(error == 0, ("%s: failed to create dev: %d", __func__, error)); } - /* - * Called for the removal of each instance. - */ -static void -vnet_pfil_uninit(const void *unused __unused) -{ - - KASSERT(LIST_EMPTY(&V_pfil_head_list), - ("%s: pfil_head_list %p not empty", __func__, &V_pfil_head_list)); - PFIL_LOCK_DESTROY_REAL(&V_pfil_lock); -} - -/* - * Starting up. - * - * VNET_SYSINIT is called for each existing vnet and each new vnet. * Make sure the pfil bits are first before any possible subsystem which * might piggyback on the SI_SUB_PROTO_PFIL. */ -VNET_SYSINIT(vnet_pfil_init, SI_SUB_PROTO_PFIL, SI_ORDER_FIRST, - vnet_pfil_init, NULL); - +SYSINIT(pfil_init, SI_SUB_PROTO_PFIL, SI_ORDER_FIRST, pfil_init, NULL); + /* - * Closing up shop. These are done in REVERSE ORDER. Not called on reboot. - * - * VNET_SYSUNINIT is called for each exiting vnet as it exits. + * User control interface. */ -VNET_SYSUNINIT(vnet_pfil_uninit, SI_SUB_PROTO_PFIL, SI_ORDER_FIRST, - vnet_pfil_uninit, NULL); +static int pfilioc_listheads(struct pfilioc_list *); +static int pfilioc_listhooks(struct pfilioc_list *); +static int pfilioc_link(struct pfilioc_link *); + +static int +pfil_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, + struct thread *td) +{ + int error; + + error = 0; + switch (cmd) { + case PFILIOC_LISTHEADS: + error = pfilioc_listheads((struct pfilioc_list *)addr); + break; + case PFILIOC_LISTHOOKS: + error = pfilioc_listhooks((struct pfilioc_list *)addr); + break; + case PFILIOC_LINK: + error = pfilioc_link((struct pfilioc_link *)addr); + break; + default: + return (EINVAL); + } + + return (error); +} + +static int +pfilioc_listheads(struct pfilioc_list *req) +{ + struct pfil_head *head; + struct pfil_link *link; + struct pfilioc_head *iohead; + struct pfilioc_hook *iohook; + u_int nheads, nhooks, hd, hk; + int error; + + PFIL_LOCK(); +restart: + nheads = nhooks = 0; + LIST_FOREACH(head, &V_pfil_head_list, head_list) { + nheads++; + nhooks += head->head_nhooksin + head->head_nhooksout; + } + PFIL_UNLOCK(); + + if (req->pio_nheads < nheads || req->pio_nhooks < nhooks) { + req->pio_nheads = nheads; + req->pio_nhooks = nhooks; + return (0); + } + + iohead = malloc(sizeof(*iohead) * nheads, M_TEMP, M_WAITOK); + iohook = malloc(sizeof(*iohook) * nhooks, M_TEMP, M_WAITOK); + + hd = hk = 0; + PFIL_LOCK(); + LIST_FOREACH(head, &V_pfil_head_list, head_list) { + if (hd + 1 > nheads || + hk + head->head_nhooksin + head->head_nhooksout > nhooks) { + /* Configuration changed during malloc(). */ + free(iohead, M_TEMP); + free(iohook, M_TEMP); + goto restart; + } + strlcpy(iohead[hd].pio_name, head->head_name, + sizeof(iohead[0].pio_name)); + iohead[hd].pio_nhooksin = head->head_nhooksin; + iohead[hd].pio_nhooksout = head->head_nhooksout; + iohead[hd].pio_type = head->head_type; + CK_STAILQ_FOREACH(link, &head->head_in, link_chain) { + strlcpy(iohook[hk].pio_module, + link->link_hook->hook_modname, + sizeof(iohook[0].pio_module)); + strlcpy(iohook[hk].pio_ruleset, + link->link_hook->hook_rulname, + sizeof(iohook[0].pio_ruleset)); + hk++; + } + CK_STAILQ_FOREACH(link, &head->head_out, link_chain) { + strlcpy(iohook[hk].pio_module, + link->link_hook->hook_modname, + sizeof(iohook[0].pio_module)); + strlcpy(iohook[hk].pio_ruleset, + link->link_hook->hook_rulname, + sizeof(iohook[0].pio_ruleset)); + hk++; + } + hd++; + } + PFIL_UNLOCK(); + + error = copyout(iohead, req->pio_heads, + sizeof(*iohead) * min(hd, req->pio_nheads)); + if (error == 0) + error = copyout(iohook, req->pio_hooks, + sizeof(*iohook) * min(req->pio_nhooks, hk)); + + req->pio_nheads = hd; + req->pio_nhooks = hk; + + free(iohead, M_TEMP); + free(iohook, M_TEMP); + + return (error); +} + +static int +pfilioc_listhooks(struct pfilioc_list *req) +{ + struct pfil_hook *hook; + struct pfilioc_hook *iohook; + u_int nhooks, hk; + int error; + + PFIL_LOCK(); +restart: + nhooks = 0; + LIST_FOREACH(hook, &V_pfil_hook_list, hook_list) + nhooks++; + PFIL_UNLOCK(); + + if (req->pio_nhooks < nhooks) { + req->pio_nhooks = nhooks; + return (0); + } + + iohook = malloc(sizeof(*iohook) * nhooks, M_TEMP, M_WAITOK); + + hk = 0; + PFIL_LOCK(); + LIST_FOREACH(hook, &V_pfil_hook_list, hook_list) { + if (hk + 1 > nhooks) { + /* Configuration changed during malloc(). */ + free(iohook, M_TEMP); + goto restart; + } + strlcpy(iohook[hk].pio_module, hook->hook_modname, + sizeof(iohook[0].pio_module)); + strlcpy(iohook[hk].pio_ruleset, hook->hook_rulname, + sizeof(iohook[0].pio_ruleset)); + iohook[hk].pio_type = hook->hook_type; + iohook[hk].pio_flags = hook->hook_flags; + hk++; + } + PFIL_UNLOCK(); + + error = copyout(iohook, req->pio_hooks, + sizeof(*iohook) * min(req->pio_nhooks, hk)); + req->pio_nhooks = hk; + free(iohook, M_TEMP); + + return (error); +} + +static int +pfilioc_link(struct pfilioc_link *req) +{ + struct pfil_link_args args; + + if (req->pio_flags & ~(PFIL_IN | PFIL_OUT | PFIL_UNLINK | PFIL_APPEND)) + return (EINVAL); + + args.pa_version = PFIL_VERSION; + args.pa_flags = req->pio_flags; + args.pa_headname = req->pio_name; + args.pa_modname = req->pio_module; + args.pa_rulname = req->pio_ruleset; + + return (pfil_link(&args)); +} diff --git a/sys/net/pfil.h b/sys/net/pfil.h index bfe108a1f1fe..13d78e6a277f 100644 --- a/sys/net/pfil.h +++ b/sys/net/pfil.h @@ -4,6 +4,7 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * + * Copyright (c) 2019 Gleb Smirnoff * Copyright (c) 1996 Matthew R. Green * All rights reserved. * @@ -34,94 +35,158 @@ #ifndef _NET_PFIL_H_ #define _NET_PFIL_H_ -#include -#include -#include -#include -#include -#include +#include +enum pfil_types { + PFIL_TYPE_IP4, + PFIL_TYPE_IP6, + PFIL_TYPE_ETHERNET, +}; + +#define MAXPFILNAME 64 + +struct pfilioc_head { + char pio_name[MAXPFILNAME]; + int pio_nhooksin; + int pio_nhooksout; + enum pfil_types pio_type; +}; + +struct pfilioc_hook { + char pio_module[MAXPFILNAME]; + char pio_ruleset[MAXPFILNAME]; + int pio_flags; + enum pfil_types pio_type; +}; + +struct pfilioc_list { + u_int pio_nheads; + u_int pio_nhooks; + struct pfilioc_head *pio_heads; + struct pfilioc_hook *pio_hooks; +}; + +struct pfilioc_link { + char pio_name[MAXPFILNAME]; + char pio_module[MAXPFILNAME]; + char pio_ruleset[MAXPFILNAME]; + int pio_flags; +}; + +#define PFILDEV "pfil" +#define PFILIOC_LISTHEADS _IOWR('P', 1, struct pfilioc_list) +#define PFILIOC_LISTHOOKS _IOWR('P', 2, struct pfilioc_list) +#define PFILIOC_LINK _IOW('P', 3, struct pfilioc_link) + +#define PFIL_IN 0x00010000 +#define PFIL_OUT 0x00020000 +#define PFIL_FWD 0x00040000 +#define PFIL_DIR(f) ((f) & (PFIL_IN|PFIL_OUT)) +#define PFIL_MEMPTR 0x00080000 +#define PFIL_HEADPTR 0x00100000 +#define PFIL_HOOKPTR 0x00200000 +#define PFIL_APPEND 0x00400000 +#define PFIL_UNLINK 0x00800000 +#define PFIL_LENMASK 0x0000ffff +#define PFIL_LENGTH(f) ((f) & PFIL_LENMASK) + +#ifdef _KERNEL struct mbuf; struct ifnet; struct inpcb; -typedef int (*pfil_func_t)(void *, struct mbuf **, struct ifnet *, int, - struct inpcb *); -typedef int (*pfil_func_flags_t)(void *, struct mbuf **, struct ifnet *, - int, int, struct inpcb *); +typedef union { + struct mbuf **m; + void *mem; +} pfil_packet_t __attribute__((__transparent_union__)); + +typedef enum { + PFIL_PASS = 0, + PFIL_DROPPED, + PFIL_CONSUMED, + PFIL_REALLOCED, +} pfil_return_t; + +typedef pfil_return_t (*pfil_func_t)(pfil_packet_t, struct ifnet *, int, + void *, struct inpcb *); +/* + * A pfil head is created by a packet intercept point. + * + * A pfil hook is created by a packet filter. + * + * Hooks are chained on heads. Historically some hooking happens + * automatically, e.g. ipfw(4), pf(4) and ipfilter(4) would register + * theirselves on IPv4 and IPv6 input/output. + */ + +typedef struct pfil_hook * pfil_hook_t; +typedef struct pfil_head * pfil_head_t; /* - * The packet filter hooks are designed for anything to call them to - * possibly intercept the packet. Multiple filter hooks are chained - * together and after each other in the specified order. + * Give us a chance to modify pfil_xxx_args structures in future. */ -struct packet_filter_hook { - TAILQ_ENTRY(packet_filter_hook) pfil_chain; - pfil_func_t pfil_func; - pfil_func_flags_t pfil_func_flags; - void *pfil_arg; -}; +#define PFIL_VERSION 1 -#define PFIL_IN 0x00000001 -#define PFIL_OUT 0x00000002 -#define PFIL_WAITOK 0x00000004 -#define PFIL_FWD 0x00000008 -#define PFIL_ALL (PFIL_IN|PFIL_OUT) - -typedef TAILQ_HEAD(pfil_chain, packet_filter_hook) pfil_chain_t; - -#define PFIL_TYPE_AF 1 /* key is AF_* type */ -#define PFIL_TYPE_IFNET 2 /* key is ifnet pointer */ - -#define PFIL_FLAG_PRIVATE_LOCK 0x01 /* Personal lock instead of global */ - -/* - * A pfil head is created by each protocol or packet intercept point. - * For packet is then run through the hook chain for inspection. - */ -struct pfil_head { - pfil_chain_t ph_in; - pfil_chain_t ph_out; - int ph_type; - int ph_nhooks; -#if defined( __linux__ ) || defined( _WIN32 ) - rwlock_t ph_mtx; -#else - struct rmlock *ph_plock; /* Pointer to the used lock */ - struct rmlock ph_lock; /* Private lock storage */ - int flags; -#endif - union { - u_long phu_val; - void *phu_ptr; - } ph_un; -#define ph_af ph_un.phu_val -#define ph_ifnet ph_un.phu_ptr - LIST_ENTRY(pfil_head) ph_list; +/* Argument structure used by packet filters to register themselves. */ +struct pfil_hook_args { + int pa_version; + int pa_flags; + enum pfil_types pa_type; + pfil_func_t pa_func; + void *pa_ruleset; + const char *pa_modname; + const char *pa_rulname; }; /* Public functions for pfil hook management by packet filters. */ -struct pfil_head *pfil_head_get(int, u_long); -int pfil_add_hook_flags(pfil_func_flags_t, void *, int, struct pfil_head *); -int pfil_add_hook(pfil_func_t, void *, int, struct pfil_head *); -int pfil_remove_hook_flags(pfil_func_flags_t, void *, int, struct pfil_head *); -int pfil_remove_hook(pfil_func_t, void *, int, struct pfil_head *); -#define PFIL_HOOKED(p) ((p)->ph_nhooks > 0) +pfil_hook_t pfil_add_hook(struct pfil_hook_args *); +void pfil_remove_hook(pfil_hook_t); -/* Public functions to run the packet inspection by protocols. */ -int pfil_run_hooks(struct pfil_head *, struct mbuf **, struct ifnet *, int, - int, struct inpcb *inp); +/* Argument structure used by ioctl() and packet filters to set filters. */ +struct pfil_link_args { + int pa_version; + int pa_flags; + union { + const char *pa_headname; + pfil_head_t pa_head; + }; + union { + struct { + const char *pa_modname; + const char *pa_rulname; + }; + pfil_hook_t pa_hook; + }; +}; -/* Public functions for pfil head management by protocols. */ -int pfil_head_register(struct pfil_head *); -int pfil_head_unregister(struct pfil_head *); +/* Public function to configure filter chains. Used by ioctl() and filters. */ +int pfil_link(struct pfil_link_args *); -/* Public pfil locking functions for self managed locks by packet filters. */ -int pfil_try_rlock(struct pfil_head *, struct rm_priotracker *); -void pfil_rlock(struct pfil_head *, struct rm_priotracker *); -void pfil_runlock(struct pfil_head *, struct rm_priotracker *); -void pfil_wlock(struct pfil_head *); -void pfil_wunlock(struct pfil_head *); -int pfil_wowned(struct pfil_head *ph); +/* Argument structure used by inspection points to register themselves. */ +struct pfil_head_args { + int pa_version; + int pa_flags; + enum pfil_types pa_type; + const char *pa_headname; +}; +/* Public functions for pfil head management by inspection points. */ +pfil_head_t pfil_head_register(struct pfil_head_args *); +void pfil_head_unregister(pfil_head_t); + +/* Public functions to run the packet inspection by inspection points. */ +int pfil_run_hooks(struct pfil_head *, pfil_packet_t, struct ifnet *, int, + struct inpcb *inp); +/* + * Minimally exposed structure to avoid function call in case of absence + * of any filters by protocols and macros to do the check. + */ +struct _pfil_head { + int head_nhooksin; + int head_nhooksout; +}; +#define PFIL_HOOKED_IN(p) (((struct _pfil_head *)(p))->head_nhooksin > 0) +#define PFIL_HOOKED_OUT(p) (((struct _pfil_head *)(p))->head_nhooksout > 0) + +#endif /* _KERNEL */ #endif /* _NET_PFIL_H_ */ diff --git a/sys/netinet/ip_fastfwd.c b/sys/netinet/ip_fastfwd.c index 643a75e2294b..77a08b1a8af1 100644 --- a/sys/netinet/ip_fastfwd.c +++ b/sys/netinet/ip_fastfwd.c @@ -90,11 +90,11 @@ __FBSDID("$FreeBSD$"); #include #include -#include #include #include #include #include +#include #include #include @@ -228,12 +228,11 @@ ip_tryforward(struct mbuf *m) /* * Run through list of ipfilter hooks for input packets */ - if (!PFIL_HOOKED(&V_inet_pfil_hook)) + if (!PFIL_HOOKED_IN(V_inet_pfil_head)) goto passin; - if (pfil_run_hooks( - &V_inet_pfil_hook, &m, m->m_pkthdr.rcvif, PFIL_IN, 0, NULL) || - m == NULL) + if (pfil_run_hooks(V_inet_pfil_head, &m, m->m_pkthdr.rcvif, PFIL_IN, + NULL) != PFIL_PASS) goto drop; M_ASSERTVALID(m); @@ -321,13 +320,12 @@ ip_tryforward(struct mbuf *m) /* * Step 5: outgoing firewall packet processing */ - if (!PFIL_HOOKED(&V_inet_pfil_hook)) + if (!PFIL_HOOKED_OUT(V_inet_pfil_head)) goto passout; - if (pfil_run_hooks(&V_inet_pfil_hook, &m, nh.nh_ifp, PFIL_OUT, PFIL_FWD, - NULL) || m == NULL) { + if (pfil_run_hooks(V_inet_pfil_head, &m, nh.nh_ifp, + PFIL_OUT | PFIL_FWD, NULL) != PFIL_PASS) goto drop; - } M_ASSERTVALID(m); M_ASSERTPKTHDR(m); diff --git a/sys/netinet/ip_input.c b/sys/netinet/ip_input.c index dd00d13a4d71..a1ec5935a826 100644 --- a/sys/netinet/ip_input.c +++ b/sys/netinet/ip_input.c @@ -57,11 +57,11 @@ __FBSDID("$FreeBSD$"); #include #include -#include #include #include #include #include +#include #include #include #include @@ -134,7 +134,7 @@ SYSCTL_INT(_net_inet_ip, OID_AUTO, check_interface, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip_checkinterface), 0, "Verify packet arrives on correct interface"); -VNET_DEFINE(struct pfil_head, inet_pfil_hook); /* Packet filter hooks */ +VNET_DEFINE(pfil_head_t, inet_pfil_head); /* Packet filter hooks */ static struct netisr_handler ip_nh = { .nh_name = "ip", @@ -301,6 +301,7 @@ SYSCTL_PROC(_net_inet_ip, IPCTL_INTRDQDROPS, intr_direct_queue_drops, void ip_init(void) { + struct pfil_head_args args; struct protosw *pr; int i; @@ -311,11 +312,11 @@ ip_init(void) ipreass_init(); /* Initialize packet filter hooks. */ - V_inet_pfil_hook.ph_type = PFIL_TYPE_AF; - V_inet_pfil_hook.ph_af = AF_INET; - if ((i = pfil_head_register(&V_inet_pfil_hook)) != 0) - printf("%s: WARNING: unable to register pfil hook, " - "error %d\n", __func__, i); + args.pa_version = PFIL_VERSION; + args.pa_flags = PFIL_IN | PFIL_OUT; + args.pa_type = PFIL_TYPE_IP4; + args.pa_headname = PFIL_INET_NAME; + V_inet_pfil_head = pfil_head_register(&args); if (hhook_head_register(HHOOK_TYPE_IPSEC_IN, AF_INET, &V_ipsec_hhh_in[HHOOK_IPSEC_INET], @@ -377,10 +378,7 @@ ip_destroy(void *unused __unused) #endif netisr_unregister_vnet(&ip_nh); - if ((error = pfil_head_unregister(&V_inet_pfil_hook)) != 0) - printf("%s: WARNING: unable to unregister pfil hook, " - "error %d\n", __func__, error); - + pfil_head_unregister(V_inet_pfil_head); error = hhook_head_deregister(V_ipsec_hhh_in[HHOOK_IPSEC_INET]); if (error != 0) { printf("%s: WARNING: unable to deregister input helper hook " @@ -599,11 +597,12 @@ ip_input(struct mbuf *m) */ /* Jump over all PFIL processing if hooks are not active. */ - if (!PFIL_HOOKED(&V_inet_pfil_hook)) + if (!PFIL_HOOKED_IN(V_inet_pfil_head)) goto passin; odst = ip->ip_dst; - if (pfil_run_hooks(&V_inet_pfil_hook, &m, ifp, PFIL_IN, 0, NULL) != 0) + if (pfil_run_hooks(V_inet_pfil_head, &m, ifp, PFIL_IN, NULL) != + PFIL_PASS) return; if (m == NULL) /* consumed by filter */ return; diff --git a/sys/netinet/ip_output.c b/sys/netinet/ip_output.c index 7595c3f90535..0c7a26503d07 100644 --- a/sys/netinet/ip_output.c +++ b/sys/netinet/ip_output.c @@ -121,11 +121,16 @@ ip_output_pfil(struct mbuf **mp, struct ifnet *ifp, struct inpcb *inp, /* Run through list of hooks for output packets. */ odst.s_addr = ip->ip_dst.s_addr; - *error = pfil_run_hooks(&V_inet_pfil_hook, mp, ifp, PFIL_OUT, 0, inp); - m = *mp; - if ((*error) != 0 || m == NULL) + switch (pfil_run_hooks(V_inet_pfil_head, mp, ifp, PFIL_OUT, inp)) { + case PFIL_DROPPED: + *error = EPERM; + /* FALLTHROUGH */ + case PFIL_CONSUMED: return 1; /* Finished */ - + case PFIL_PASS: + *error = 0; + } + m = *mp; ip = mtod(m, struct ip *); /* See if destination IP address was changed by packet filter. */ @@ -568,7 +573,7 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags, #endif /* IPSEC */ /* Jump over all PFIL processing if hooks are not active. */ - if (PFIL_HOOKED(&V_inet_pfil_hook)) { + if (PFIL_HOOKED_OUT(V_inet_pfil_head)) { switch (ip_output_pfil(&m, ifp, inp, dst, &fibnum, &error)) { case 1: /* Finished */ goto done; diff --git a/sys/netinet/ip_var.h b/sys/netinet/ip_var.h index 86615a15ad26..d55a18bba91d 100644 --- a/sys/netinet/ip_var.h +++ b/sys/netinet/ip_var.h @@ -241,8 +241,9 @@ extern int (*ip_rsvp_vif)(struct socket *, struct sockopt *); extern void (*ip_rsvp_force_done)(struct socket *); extern int (*rsvp_input_p)(struct mbuf **, int *, int); -VNET_DECLARE(struct pfil_head, inet_pfil_hook); /* packet filter hooks */ -#define V_inet_pfil_hook VNET(inet_pfil_hook) +VNET_DECLARE(struct pfil_head *, inet_pfil_head); +#define V_inet_pfil_head VNET(inet_pfil_head) +#define PFIL_INET_NAME "inet" void in_delayed_cksum(struct mbuf *m); diff --git a/sys/netinet/siftr.c b/sys/netinet/siftr.c index 4d063c360386..68d8f8e16fa1 100644 --- a/sys/netinet/siftr.c +++ b/sys/netinet/siftr.c @@ -94,10 +94,12 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #ifdef SIFTR_IPV6 #include +#include #include #endif /* SIFTR_IPV6 */ @@ -831,9 +833,9 @@ siftr_siftdata(struct pkt_node *pn, struct inpcb *inp, struct tcpcb *tp, * It's very important to use the M_NOWAIT flag with all function calls * that support it so that they won't sleep, otherwise you get a panic. */ -static int -siftr_chkpkt(void *arg, struct mbuf **m, struct ifnet *ifp, int dir, - struct inpcb *inp) +static pfil_return_t +siftr_chkpkt(struct mbuf **m, struct ifnet *ifp, int flags, + void *ruleset __unused, struct inpcb *inp) { struct pkt_node *pn; struct ip *ip; @@ -841,9 +843,10 @@ siftr_chkpkt(void *arg, struct mbuf **m, struct ifnet *ifp, int dir, struct tcpcb *tp; struct siftr_stats *ss; unsigned int ip_hl; - int inp_locally_locked; + int inp_locally_locked, dir; inp_locally_locked = 0; + dir = PFIL_DIR(flags); ss = DPCPU_PTR(ss); /* @@ -1007,15 +1010,13 @@ siftr_chkpkt(void *arg, struct mbuf **m, struct ifnet *ifp, int dir, INP_RUNLOCK(inp); ret: - /* Returning 0 ensures pfil will not discard the pkt */ - return (0); + return (PFIL_PASS); } #ifdef SIFTR_IPV6 static int -siftr_chkpkt6(void *arg, struct mbuf **m, struct ifnet *ifp, int dir, - struct inpcb *inp) +siftr_chkpkt6(struct mbuf **m, struct ifnet *ifp, int flags, struct inpcb *inp) { struct pkt_node *pn; struct ip6_hdr *ip6; @@ -1023,9 +1024,10 @@ siftr_chkpkt6(void *arg, struct mbuf **m, struct ifnet *ifp, int dir, struct tcpcb *tp; struct siftr_stats *ss; unsigned int ip6_hl; - int inp_locally_locked; + int inp_locally_locked, dir; inp_locally_locked = 0; + dir = PFIL_DIR(flags); ss = DPCPU_PTR(ss); /* @@ -1138,37 +1140,53 @@ siftr_chkpkt6(void *arg, struct mbuf **m, struct ifnet *ifp, int dir, } #endif /* #ifdef SIFTR_IPV6 */ - +VNET_DEFINE_STATIC(pfil_hook_t, siftr_inet_hook); +#define V_siftr_inet_hook VNET(siftr_inet_hook) +#ifdef INET6 +VNET_DEFINE_STATIC(pfil_hook_t, siftr_inet6_hook); +#define V_siftr_inet6_hook VNET(siftr_inet6_hook) +#endif static int siftr_pfil(int action) { - struct pfil_head *pfh_inet; -#ifdef SIFTR_IPV6 - struct pfil_head *pfh_inet6; -#endif + struct pfil_hook_args pha; + struct pfil_link_args pla; + + pha.pa_version = PFIL_VERSION; + pha.pa_flags = PFIL_IN | PFIL_OUT; + pha.pa_modname = "siftr"; + pha.pa_ruleset = NULL; + pha.pa_rulname = "default"; + + pla.pa_version = PFIL_VERSION; + pla.pa_flags = PFIL_IN | PFIL_OUT | + PFIL_HEADPTR | PFIL_HOOKPTR; + VNET_ITERATOR_DECL(vnet_iter); VNET_LIST_RLOCK(); VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); - pfh_inet = pfil_head_get(PFIL_TYPE_AF, AF_INET); -#ifdef SIFTR_IPV6 - pfh_inet6 = pfil_head_get(PFIL_TYPE_AF, AF_INET6); -#endif if (action == HOOK) { - pfil_add_hook(siftr_chkpkt, NULL, - PFIL_IN | PFIL_OUT | PFIL_WAITOK, pfh_inet); + pha.pa_func = siftr_chkpkt; + pha.pa_type = PFIL_TYPE_IP4; + V_siftr_inet_hook = pfil_add_hook(&pha); + pla.pa_hook = V_siftr_inet_hook; + pla.pa_head = V_inet_pfil_head; + (void)pfil_link(&pla); #ifdef SIFTR_IPV6 - pfil_add_hook(siftr_chkpkt6, NULL, - PFIL_IN | PFIL_OUT | PFIL_WAITOK, pfh_inet6); + pha.pa_func = siftr_chkpkt6; + pha.pa_type = PFIL_TYPE_IP6; + V_siftr_inet6_hook = pfil_add_hook(&pha); + pla.pa_hook = V_siftr_inet6_hook; + pla.pa_head = V_inet6_pfil_head; + (void)pfil_link(&pla); #endif } else if (action == UNHOOK) { - pfil_remove_hook(siftr_chkpkt, NULL, - PFIL_IN | PFIL_OUT | PFIL_WAITOK, pfh_inet); + pfil_remove_hook(V_siftr_inet_hook); #ifdef SIFTR_IPV6 - pfil_remove_hook(siftr_chkpkt6, NULL, - PFIL_IN | PFIL_OUT | PFIL_WAITOK, pfh_inet6); + pfil_remove_hook(V_siftr_inet6_hook); #endif } CURVNET_RESTORE(); diff --git a/sys/netinet6/ip6_fastfwd.c b/sys/netinet6/ip6_fastfwd.c index 8f8b176607c5..11eb0e7548d5 100644 --- a/sys/netinet6/ip6_fastfwd.c +++ b/sys/netinet6/ip6_fastfwd.c @@ -156,10 +156,10 @@ ip6_tryforward(struct mbuf *m) /* * Incoming packet firewall processing. */ - if (!PFIL_HOOKED(&V_inet6_pfil_hook)) + if (!PFIL_HOOKED_IN(V_inet6_pfil_head)) goto passin; - if (pfil_run_hooks(&V_inet6_pfil_hook, &m, rcvif, PFIL_IN, 0, - NULL) != 0 || m == NULL) + if (pfil_run_hooks(V_inet6_pfil_head, &m, rcvif, PFIL_IN, NULL) != + PFIL_PASS) goto dropin; /* * If packet filter sets the M_FASTFWD_OURS flag, this means @@ -195,7 +195,7 @@ ip6_tryforward(struct mbuf *m) in6_ifstat_inc(rcvif, ifs6_in_noroute); goto dropin; } - if (!PFIL_HOOKED(&V_inet6_pfil_hook)) { + if (!PFIL_HOOKED_OUT(V_inet6_pfil_head)) { if (m->m_pkthdr.len > nh.nh_mtu) { in6_ifstat_inc(nh.nh_ifp, ifs6_in_toobig); icmp6_error(m, ICMP6_PACKET_TOO_BIG, 0, nh.nh_mtu); @@ -208,8 +208,8 @@ ip6_tryforward(struct mbuf *m) /* * Outgoing packet firewall processing. */ - if (pfil_run_hooks(&V_inet6_pfil_hook, &m, nh.nh_ifp, PFIL_OUT, - PFIL_FWD, NULL) != 0 || m == NULL) + if (pfil_run_hooks(V_inet6_pfil_head, &m, nh.nh_ifp, PFIL_OUT | + PFIL_FWD, NULL) != PFIL_PASS) goto dropout; /* diff --git a/sys/netinet6/ip6_forward.c b/sys/netinet6/ip6_forward.c index ed743f65c867..0676a58225a3 100644 --- a/sys/netinet6/ip6_forward.c +++ b/sys/netinet6/ip6_forward.c @@ -320,15 +320,14 @@ ip6_forward(struct mbuf *m, int srcrt) in6_clearscope(&ip6->ip6_dst); /* Jump over all PFIL processing if hooks are not active. */ - if (!PFIL_HOOKED(&V_inet6_pfil_hook)) + if (!PFIL_HOOKED_OUT(V_inet6_pfil_head)) goto pass; odst = ip6->ip6_dst; /* Run through list of hooks for forwarded packets. */ - error = pfil_run_hooks(&V_inet6_pfil_hook, &m, rt->rt_ifp, PFIL_OUT, - PFIL_FWD, NULL); - if (error != 0 || m == NULL) - goto freecopy; /* consumed by filter */ + if (pfil_run_hooks(V_inet6_pfil_head, &m, rt->rt_ifp, PFIL_OUT | + PFIL_FWD, NULL) != PFIL_PASS) + goto freecopy; ip6 = mtod(m, struct ip6_hdr *); /* See if destination IP address was changed by packet filter. */ diff --git a/sys/netinet6/ip6_input.c b/sys/netinet6/ip6_input.c index 712b9923d8e7..531cfff43f0e 100644 --- a/sys/netinet6/ip6_input.c +++ b/sys/netinet6/ip6_input.c @@ -191,7 +191,7 @@ SYSCTL_PROC(_net_inet6_ip6, IPV6CTL_INTRDQMAXLEN, intr_direct_queue_maxlen, #endif -VNET_DEFINE(struct pfil_head, inet6_pfil_hook); +VNET_DEFINE(pfil_head_t, inet6_pfil_head); VNET_PCPUSTAT_DEFINE(struct ip6stat, ip6stat); VNET_PCPUSTAT_SYSINIT(ip6stat); @@ -214,6 +214,7 @@ static struct mbuf *ip6_pullexthdr(struct mbuf *, size_t, int); void ip6_init(void) { + struct pfil_head_args args; struct protosw *pr; int i; @@ -227,11 +228,11 @@ ip6_init(void) &V_in6_ifaddrhmask); /* Initialize packet filter hooks. */ - V_inet6_pfil_hook.ph_type = PFIL_TYPE_AF; - V_inet6_pfil_hook.ph_af = AF_INET6; - if ((i = pfil_head_register(&V_inet6_pfil_hook)) != 0) - printf("%s: WARNING: unable to register pfil hook, " - "error %d\n", __func__, i); + args.pa_version = PFIL_VERSION; + args.pa_flags = PFIL_IN | PFIL_OUT; + args.pa_type = PFIL_TYPE_IP6; + args.pa_headname = PFIL_INET6_NAME; + V_inet6_pfil_head = pfil_head_register(&args); if (hhook_head_register(HHOOK_TYPE_IPSEC_IN, AF_INET6, &V_ipsec_hhh_in[HHOOK_IPSEC_INET6], @@ -359,9 +360,7 @@ ip6_destroy(void *unused __unused) #endif netisr_unregister_vnet(&ip6_nh); - if ((error = pfil_head_unregister(&V_inet6_pfil_hook)) != 0) - printf("%s: WARNING: unable to unregister pfil hook, " - "error %d\n", __func__, error); + pfil_head_unregister(V_inet6_pfil_head); error = hhook_head_deregister(V_ipsec_hhh_in[HHOOK_IPSEC_INET6]); if (error != 0) { printf("%s: WARNING: unable to deregister input helper hook " @@ -758,14 +757,12 @@ ip6_input(struct mbuf *m) */ /* Jump over all PFIL processing if hooks are not active. */ - if (!PFIL_HOOKED(&V_inet6_pfil_hook)) + if (!PFIL_HOOKED_IN(V_inet6_pfil_head)) goto passin; odst = ip6->ip6_dst; - if (pfil_run_hooks(&V_inet6_pfil_hook, &m, - m->m_pkthdr.rcvif, PFIL_IN, 0, NULL)) - return; - if (m == NULL) /* consumed by filter */ + if (pfil_run_hooks(V_inet6_pfil_head, &m, m->m_pkthdr.rcvif, PFIL_IN, + NULL) != PFIL_PASS) return; ip6 = mtod(m, struct ip6_hdr *); srcrt = !IN6_ARE_ADDR_EQUAL(&odst, &ip6->ip6_dst); diff --git a/sys/netinet6/ip6_output.c b/sys/netinet6/ip6_output.c index 741521abb8a1..e36beb355b38 100644 --- a/sys/netinet6/ip6_output.c +++ b/sys/netinet6/ip6_output.c @@ -792,16 +792,21 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, } /* Jump over all PFIL processing if hooks are not active. */ - if (!PFIL_HOOKED(&V_inet6_pfil_hook)) + if (!PFIL_HOOKED_OUT(V_inet6_pfil_head)) goto passout; odst = ip6->ip6_dst; /* Run through list of hooks for output packets. */ - error = pfil_run_hooks(&V_inet6_pfil_hook, &m, ifp, PFIL_OUT, 0, inp); - if (error != 0 || m == NULL) + switch (pfil_run_hooks(V_inet6_pfil_head, &m, ifp, PFIL_OUT, inp)) { + case PFIL_PASS: + ip6 = mtod(m, struct ip6_hdr *); + break; + case PFIL_DROPPED: + error = EPERM; + /* FALLTHROUGH */ + case PFIL_CONSUMED: goto done; - /* adjust pointer */ - ip6 = mtod(m, struct ip6_hdr *); + } needfiblookup = 0; /* See if destination IP address was changed by packet filter. */ diff --git a/sys/netinet6/ip6_var.h b/sys/netinet6/ip6_var.h index f235572dd03e..bf15f833b326 100644 --- a/sys/netinet6/ip6_var.h +++ b/sys/netinet6/ip6_var.h @@ -346,8 +346,10 @@ VNET_DECLARE(int, ip6_use_defzone); /* Whether to use the default scope * zone when unspecified */ #define V_ip6_use_defzone VNET(ip6_use_defzone) -VNET_DECLARE (struct pfil_head, inet6_pfil_hook); /* packet filter hooks */ -#define V_inet6_pfil_hook VNET(inet6_pfil_hook) +VNET_DECLARE(struct pfil_head *, inet6_pfil_head); +#define V_inet6_pfil_head VNET(inet6_pfil_head) +#define PFIL_INET6_NAME "inet6" + #ifdef IPSTEALTH VNET_DECLARE(int, ip6stealth); #define V_ip6stealth VNET(ip6stealth) diff --git a/sys/netpfil/ipfw/ip_fw_eaction.c b/sys/netpfil/ipfw/ip_fw_eaction.c index 05cc174cb283..1cb2f812936c 100644 --- a/sys/netpfil/ipfw/ip_fw_eaction.c +++ b/sys/netpfil/ipfw/ip_fw_eaction.c @@ -38,9 +38,9 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include #include /* ip_fw.h requires IFNAMSIZ */ +#include #include #include /* struct ipfw_rule_ref */ #include diff --git a/sys/netpfil/ipfw/ip_fw_pfil.c b/sys/netpfil/ipfw/ip_fw_pfil.c index feb4a20f9b69..5a3cd052bb2d 100644 --- a/sys/netpfil/ipfw/ip_fw_pfil.c +++ b/sys/netpfil/ipfw/ip_fw_pfil.c @@ -48,6 +48,7 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include #include #include @@ -85,10 +86,6 @@ int ipfw_chg_hook(SYSCTL_HANDLER_ARGS); /* Forward declarations. */ static int ipfw_divert(struct mbuf **, int, struct ipfw_rule_ref *, int); -int ipfw_check_packet(void *, struct mbuf **, struct ifnet *, int, - struct inpcb *); -int ipfw_check_frame(void *, struct mbuf **, struct ifnet *, int, - struct inpcb *); #ifdef SYSCTL_NODE @@ -120,16 +117,17 @@ SYSEND * dummynet, divert, netgraph or other modules. * The packet may be consumed. */ -int -ipfw_check_packet(void *arg, struct mbuf **m0, struct ifnet *ifp, int dir, - struct inpcb *inp) +static pfil_return_t +ipfw_check_packet(struct mbuf **m0, struct ifnet *ifp, int dir, + void *ruleset __unused, struct inpcb *inp) { struct ip_fw_args args; struct m_tag *tag; - int ipfw, ret; + pfil_return_t ret; + int ipfw; /* convert dir to IPFW values */ - dir = (dir == PFIL_IN) ? DIR_IN : DIR_OUT; + dir = (dir & PFIL_IN) ? DIR_IN : DIR_OUT; args.flags = 0; again: /* @@ -155,17 +153,15 @@ ipfw_check_packet(void *arg, struct mbuf **m0, struct ifnet *ifp, int dir, KASSERT(*m0 != NULL || ipfw == IP_FW_DENY, ("%s: m0 is NULL", __func__)); - /* breaking out of the switch means drop */ + ret = PFIL_PASS; switch (ipfw) { case IP_FW_PASS: /* next_hop may be set by ipfw_chk */ if ((args.flags & (IPFW_ARGS_NH4 | IPFW_ARGS_NH4PTR | - IPFW_ARGS_NH6 | IPFW_ARGS_NH6PTR)) == 0) { - ret = 0; + IPFW_ARGS_NH6 | IPFW_ARGS_NH6PTR)) == 0) break; - } #if (!defined(INET6) && !defined(INET)) - ret = EACCES; + ret = PFIL_DROPPED; #else { void *psa; @@ -210,8 +206,8 @@ ipfw_check_packet(void *arg, struct mbuf **m0, struct ifnet *ifp, int dir, tag = m_tag_get(PACKET_TAG_IPFORWARD, len, M_NOWAIT); if (tag == NULL) { - ret = EACCES; - break; /* i.e. drop */ + ret = PFIL_DROPPED; + break; } } if ((args.flags & IPFW_ARGS_NH6) == 0) @@ -238,7 +234,7 @@ ipfw_check_packet(void *arg, struct mbuf **m0, struct ifnet *ifp, int dir, * comparisons. */ if (sa6_embedscope(sa6, V_ip6_use_defzone) != 0) { - ret = EACCES; + ret = PFIL_DROPPED; break; } if (in6_localip(&sa6->sin6_addr)) @@ -250,20 +246,23 @@ ipfw_check_packet(void *arg, struct mbuf **m0, struct ifnet *ifp, int dir, break; case IP_FW_DENY: - ret = EACCES; - break; /* i.e. drop */ + ret = PFIL_DROPPED; + break; case IP_FW_DUMMYNET: - ret = EACCES; - if (ip_dn_io_ptr == NULL) - break; /* i.e. drop */ + if (ip_dn_io_ptr == NULL) { + ret = PFIL_DROPPED; + break; + } MPASS(args.flags & IPFW_ARGS_REF); if (mtod(*m0, struct ip *)->ip_v == 4) - ret = ip_dn_io_ptr(m0, dir, &args); + (void )ip_dn_io_ptr(m0, dir, &args); else if (mtod(*m0, struct ip *)->ip_v == 6) - ret = ip_dn_io_ptr(m0, dir | PROTO_IPV6, &args); - else - break; /* drop it */ + (void )ip_dn_io_ptr(m0, dir | PROTO_IPV6, &args); + else { + ret = PFIL_DROPPED; + break; + } /* * XXX should read the return value. * dummynet normally eats the packet and sets *m0=NULL @@ -273,41 +272,42 @@ ipfw_check_packet(void *arg, struct mbuf **m0, struct ifnet *ifp, int dir, */ if (*m0 != NULL) goto again; + ret = PFIL_CONSUMED; break; case IP_FW_TEE: case IP_FW_DIVERT: if (ip_divert_ptr == NULL) { - ret = EACCES; - break; /* i.e. drop */ + ret = PFIL_DROPPED; + break; } MPASS(args.flags & IPFW_ARGS_REF); - ret = ipfw_divert(m0, dir, &args.rule, + (void )ipfw_divert(m0, dir, &args.rule, (ipfw == IP_FW_TEE) ? 1 : 0); /* continue processing for the original packet (tee). */ if (*m0) goto again; + ret = PFIL_CONSUMED; break; case IP_FW_NGTEE: case IP_FW_NETGRAPH: if (ng_ipfw_input_p == NULL) { - ret = EACCES; - break; /* i.e. drop */ + ret = PFIL_DROPPED; + break; } MPASS(args.flags & IPFW_ARGS_REF); - ret = ng_ipfw_input_p(m0, dir, &args, + (void )ng_ipfw_input_p(m0, dir, &args, (ipfw == IP_FW_NGTEE) ? 1 : 0); if (ipfw == IP_FW_NGTEE) /* ignore errors for NGTEE */ goto again; /* continue with packet */ + ret = PFIL_CONSUMED; break; case IP_FW_NAT: /* honor one-pass in case of successful nat */ - if (V_fw_one_pass) { - ret = 0; + if (V_fw_one_pass) break; - } goto again; case IP_FW_REASS: @@ -317,7 +317,7 @@ ipfw_check_packet(void *arg, struct mbuf **m0, struct ifnet *ifp, int dir, KASSERT(0, ("%s: unknown retval", __func__)); } - if (ret != 0) { + if (ret != PFIL_PASS) { if (*m0) FREE_PKT(*m0); *m0 = NULL; @@ -329,16 +329,17 @@ ipfw_check_packet(void *arg, struct mbuf **m0, struct ifnet *ifp, int dir, /* * ipfw processing for ethernet packets (in and out). */ -int -ipfw_check_frame(void *arg, struct mbuf **m0, struct ifnet *ifp, int dir, - struct inpcb *inp) +static pfil_return_t +ipfw_check_frame(struct mbuf **m0, struct ifnet *ifp, int dir, + void *ruleset __unused, struct inpcb *inp) { struct ip_fw_args args; struct ether_header save_eh; struct ether_header *eh; struct m_tag *mtag; struct mbuf *m; - int i, ret; + pfil_return_t ret; + int i; args.flags = IPFW_ARGS_ETHER; again: @@ -367,7 +368,7 @@ ipfw_check_frame(void *arg, struct mbuf **m0, struct ifnet *ifp, int dir, m_adj(m, ETHER_HDR_LEN); /* strip ethernet header */ args.m = m; /* the packet we are looking at */ - args.oif = dir == PFIL_OUT ? ifp: NULL; /* destination, if any */ + args.oif = dir & PFIL_OUT ? ifp: NULL; /* destination, if any */ args.eh = &save_eh; /* MAC header for bridged/MAC packets */ args.inp = inp; /* used by ipfw uid/gid/jail rules */ i = ipfw_chk(&args); @@ -388,46 +389,46 @@ ipfw_check_frame(void *arg, struct mbuf **m0, struct ifnet *ifp, int dir, } *m0 = m; - ret = 0; + ret = PFIL_PASS; /* Check result of ipfw_chk() */ switch (i) { case IP_FW_PASS: break; case IP_FW_DENY: - ret = EACCES; - break; /* i.e. drop */ + ret = PFIL_DROPPED; + break; case IP_FW_DUMMYNET: - ret = EACCES; - - if (ip_dn_io_ptr == NULL) - break; /* i.e. drop */ - + if (ip_dn_io_ptr == NULL) { + ret = PFIL_DROPPED; + break; + } *m0 = NULL; - dir = (dir == PFIL_IN) ? DIR_IN : DIR_OUT; + dir = (dir & PFIL_IN) ? DIR_IN : DIR_OUT; MPASS(args.flags & IPFW_ARGS_REF); ip_dn_io_ptr(&m, dir | PROTO_LAYER2, &args); - return 0; + return (PFIL_CONSUMED); case IP_FW_NGTEE: case IP_FW_NETGRAPH: if (ng_ipfw_input_p == NULL) { - ret = EACCES; - break; /* i.e. drop */ + ret = PFIL_DROPPED; + break; } MPASS(args.flags & IPFW_ARGS_REF); - ret = ng_ipfw_input_p(m0, (dir == PFIL_IN) ? DIR_IN : DIR_OUT, + (void )ng_ipfw_input_p(m0, (dir & PFIL_IN) ? DIR_IN : DIR_OUT, &args, (i == IP_FW_NGTEE) ? 1 : 0); if (i == IP_FW_NGTEE) /* ignore errors for NGTEE */ goto again; /* continue with packet */ + ret = PFIL_CONSUMED; break; default: KASSERT(0, ("%s: unknown retval", __func__)); } - if (ret != 0) { + if (ret != PFIL_PASS) { if (*m0) FREE_PKT(*m0); *m0 = NULL; @@ -531,20 +532,62 @@ ipfw_divert(struct mbuf **m0, int incoming, struct ipfw_rule_ref *rule, /* * attach or detach hooks for a given protocol family */ +VNET_DEFINE_STATIC(pfil_hook_t, ipfw_inet_hook); +VNET_DEFINE_STATIC(pfil_hook_t, ipfw_inet6_hook); +VNET_DEFINE_STATIC(pfil_hook_t, ipfw_link_hook); +#define V_ipfw_inet_hook VNET(ipfw_inet_hook) +#define V_ipfw_inet6_hook VNET(ipfw_inet6_hook) +#define V_ipfw_link_hook VNET(ipfw_link_hook) + static int ipfw_hook(int onoff, int pf) { - struct pfil_head *pfh; - pfil_func_t hook_func; + struct pfil_hook_args pha; + struct pfil_link_args pla; + pfil_hook_t *h; - pfh = pfil_head_get(PFIL_TYPE_AF, pf); - if (pfh == NULL) - return ENOENT; + pha.pa_version = PFIL_VERSION; + pha.pa_flags = PFIL_IN | PFIL_OUT; + pha.pa_modname = "ipfw"; + pha.pa_ruleset = NULL; - hook_func = (pf == AF_LINK) ? ipfw_check_frame : ipfw_check_packet; + pla.pa_version = PFIL_VERSION; + pla.pa_flags = PFIL_IN | PFIL_OUT | + PFIL_HEADPTR | PFIL_HOOKPTR; - (void) (onoff ? pfil_add_hook : pfil_remove_hook) - (hook_func, NULL, PFIL_IN | PFIL_OUT | PFIL_WAITOK, pfh); + switch (pf) { + case AF_INET: + pha.pa_func = ipfw_check_packet; + pha.pa_type = PFIL_TYPE_IP4; + pha.pa_rulname = "default"; + h = &V_ipfw_inet_hook; + pla.pa_head = V_inet_pfil_head; + break; +#ifdef INET6 + case AF_INET6: + pha.pa_func = ipfw_check_packet; + pha.pa_type = PFIL_TYPE_IP6; + pha.pa_rulname = "default6"; + h = &V_ipfw_inet6_hook; + pla.pa_head = V_inet6_pfil_head; + break; +#endif + case AF_LINK: + pha.pa_func = ipfw_check_frame; + pha.pa_type = PFIL_TYPE_ETHERNET; + pha.pa_rulname = "default-link"; + h = &V_ipfw_link_hook; + pla.pa_head = V_link_pfil_head; + break; + } + + if (onoff) { + *h = pfil_add_hook(&pha); + pla.pa_hook = *h; + (void)pfil_link(&pla); + } else + if (*h != NULL) + pfil_remove_hook(*h); return 0; } diff --git a/sys/netpfil/pf/pf_ioctl.c b/sys/netpfil/pf/pf_ioctl.c index eba8a7f64a3c..91b2c1d33839 100644 --- a/sys/netpfil/pf/pf_ioctl.c +++ b/sys/netpfil/pf/pf_ioctl.c @@ -169,16 +169,16 @@ static void pf_tbladdr_copyout(struct pf_addr_wrap *); * Wrapper functions for pfil(9) hooks */ #ifdef INET -static int pf_check_in(void *arg, struct mbuf **m, struct ifnet *ifp, - int dir, int flags, struct inpcb *inp); -static int pf_check_out(void *arg, struct mbuf **m, struct ifnet *ifp, - int dir, int flags, struct inpcb *inp); +static pfil_return_t pf_check_in(struct mbuf **m, struct ifnet *ifp, + int flags, void *ruleset __unused, struct inpcb *inp); +static pfil_return_t pf_check_out(struct mbuf **m, struct ifnet *ifp, + int flags, void *ruleset __unused, struct inpcb *inp); #endif #ifdef INET6 -static int pf_check6_in(void *arg, struct mbuf **m, struct ifnet *ifp, - int dir, int flags, struct inpcb *inp); -static int pf_check6_out(void *arg, struct mbuf **m, struct ifnet *ifp, - int dir, int flags, struct inpcb *inp); +static pfil_return_t pf_check6_in(struct mbuf **m, struct ifnet *ifp, + int flags, void *ruleset __unused, struct inpcb *inp); +static pfil_return_t pf_check6_out(struct mbuf **m, struct ifnet *ifp, + int flags, void *ruleset __unused, struct inpcb *inp); #endif static int hook_pf(void); @@ -4003,9 +4003,9 @@ shutdown_pf(void) } #ifdef INET -static int -pf_check_in(void *arg, struct mbuf **m, struct ifnet *ifp, int dir, int flags, - struct inpcb *inp) +static pfil_return_t +pf_check_in(struct mbuf **m, struct ifnet *ifp, int flags, + void *ruleset __unused, struct inpcb *inp) { int chk; @@ -4015,14 +4015,12 @@ pf_check_in(void *arg, struct mbuf **m, struct ifnet *ifp, int dir, int flags, *m = NULL; } - if (chk != PF_PASS) - return (EACCES); - return (0); + return (chk == PF_PASS ? PFIL_PASS : PFIL_DROPPED); } -static int -pf_check_out(void *arg, struct mbuf **m, struct ifnet *ifp, int dir, int flags, - struct inpcb *inp) +static pfil_return_t +pf_check_out(struct mbuf **m, struct ifnet *ifp, int flags, + void *ruleset __unused, struct inpcb *inp) { int chk; @@ -4032,16 +4030,14 @@ pf_check_out(void *arg, struct mbuf **m, struct ifnet *ifp, int dir, int flags, *m = NULL; } - if (chk != PF_PASS) - return (EACCES); - return (0); + return (chk == PF_PASS ? PFIL_PASS : PFIL_DROPPED); } #endif #ifdef INET6 -static int -pf_check6_in(void *arg, struct mbuf **m, struct ifnet *ifp, int dir, int flags, - struct inpcb *inp) +static pfil_return_t +pf_check6_in(struct mbuf **m, struct ifnet *ifp, int flags, + void *ruleset __unused, struct inpcb *inp) { int chk; @@ -4057,14 +4053,13 @@ pf_check6_in(void *arg, struct mbuf **m, struct ifnet *ifp, int dir, int flags, m_freem(*m); *m = NULL; } - if (chk != PF_PASS) - return (EACCES); - return (0); + + return (chk == PF_PASS ? PFIL_PASS : PFIL_DROPPED); } -static int -pf_check6_out(void *arg, struct mbuf **m, struct ifnet *ifp, int dir, int flags, - struct inpcb *inp) +static pfil_return_t +pf_check6_out(struct mbuf **m, struct ifnet *ifp, int flags, + void *ruleset __unused, struct inpcb *inp) { int chk; @@ -4075,45 +4070,76 @@ pf_check6_out(void *arg, struct mbuf **m, struct ifnet *ifp, int dir, int flags, m_freem(*m); *m = NULL; } - if (chk != PF_PASS) - return (EACCES); - return (0); + + return (chk == PF_PASS ? PFIL_PASS : PFIL_DROPPED); } #endif /* INET6 */ +#ifdef INET +VNET_DEFINE_STATIC(pfil_hook_t, pf_ip4_in_hook); +VNET_DEFINE_STATIC(pfil_hook_t, pf_ip4_out_hook); +#define V_pf_ip4_in_hook VNET(pf_ip4_in_hook) +#define V_pf_ip4_out_hook VNET(pf_ip4_out_hook) +#endif +#ifdef INET6 +VNET_DEFINE_STATIC(pfil_hook_t, pf_ip6_in_hook); +VNET_DEFINE_STATIC(pfil_hook_t, pf_ip6_out_hook); +#define V_pf_ip6_in_hook VNET(pf_ip6_in_hook) +#define V_pf_ip6_out_hook VNET(pf_ip6_out_hook) +#endif + static int hook_pf(void) { -#ifdef INET - struct pfil_head *pfh_inet; -#endif -#ifdef INET6 - struct pfil_head *pfh_inet6; -#endif + struct pfil_hook_args pha; + struct pfil_link_args pla; if (V_pf_pfil_hooked) return (0); + pha.pa_version = PFIL_VERSION; + pha.pa_modname = "pf"; + pha.pa_ruleset = NULL; + + pla.pa_version = PFIL_VERSION; + #ifdef INET - pfh_inet = pfil_head_get(PFIL_TYPE_AF, AF_INET); - if (pfh_inet == NULL) - return (ESRCH); /* XXX */ - pfil_add_hook_flags(pf_check_in, NULL, PFIL_IN | PFIL_WAITOK, pfh_inet); - pfil_add_hook_flags(pf_check_out, NULL, PFIL_OUT | PFIL_WAITOK, pfh_inet); + pha.pa_type = PFIL_TYPE_IP4; + pha.pa_func = pf_check_in; + pha.pa_flags = PFIL_IN; + pha.pa_rulname = "default-in"; + V_pf_ip4_in_hook = pfil_add_hook(&pha); + pla.pa_flags = PFIL_IN | PFIL_HEADPTR | PFIL_HOOKPTR; + pla.pa_head = V_inet_pfil_head; + pla.pa_hook = V_pf_ip4_in_hook; + (void)pfil_link(&pla); + pha.pa_func = pf_check_out; + pha.pa_flags = PFIL_OUT; + pha.pa_rulname = "default-out"; + V_pf_ip4_out_hook = pfil_add_hook(&pha); + pla.pa_flags = PFIL_OUT | PFIL_HEADPTR | PFIL_HOOKPTR; + pla.pa_head = V_inet_pfil_head; + pla.pa_hook = V_pf_ip4_out_hook; + (void)pfil_link(&pla); #endif #ifdef INET6 - pfh_inet6 = pfil_head_get(PFIL_TYPE_AF, AF_INET6); - if (pfh_inet6 == NULL) { -#ifdef INET - pfil_remove_hook_flags(pf_check_in, NULL, PFIL_IN | PFIL_WAITOK, - pfh_inet); - pfil_remove_hook_flags(pf_check_out, NULL, PFIL_OUT | PFIL_WAITOK, - pfh_inet); -#endif - return (ESRCH); /* XXX */ - } - pfil_add_hook_flags(pf_check6_in, NULL, PFIL_IN | PFIL_WAITOK, pfh_inet6); - pfil_add_hook_flags(pf_check6_out, NULL, PFIL_OUT | PFIL_WAITOK, pfh_inet6); + pha.pa_type = PFIL_TYPE_IP6; + pha.pa_func = pf_check6_in; + pha.pa_flags = PFIL_IN; + pha.pa_rulname = "default-in6"; + V_pf_ip6_in_hook = pfil_add_hook(&pha); + pla.pa_flags = PFIL_IN | PFIL_HEADPTR | PFIL_HOOKPTR; + pla.pa_head = V_inet6_pfil_head; + pla.pa_hook = V_pf_ip6_in_hook; + (void)pfil_link(&pla); + pha.pa_func = pf_check6_out; + pha.pa_rulname = "default-out6"; + pha.pa_flags = PFIL_OUT; + V_pf_ip6_out_hook = pfil_add_hook(&pha); + pla.pa_flags = PFIL_OUT | PFIL_HEADPTR | PFIL_HOOKPTR; + pla.pa_head = V_inet6_pfil_head; + pla.pa_hook = V_pf_ip6_out_hook; + (void)pfil_link(&pla); #endif V_pf_pfil_hooked = 1; @@ -4123,33 +4149,17 @@ hook_pf(void) static int dehook_pf(void) { -#ifdef INET - struct pfil_head *pfh_inet; -#endif -#ifdef INET6 - struct pfil_head *pfh_inet6; -#endif if (V_pf_pfil_hooked == 0) return (0); #ifdef INET - pfh_inet = pfil_head_get(PFIL_TYPE_AF, AF_INET); - if (pfh_inet == NULL) - return (ESRCH); /* XXX */ - pfil_remove_hook_flags(pf_check_in, NULL, PFIL_IN | PFIL_WAITOK, - pfh_inet); - pfil_remove_hook_flags(pf_check_out, NULL, PFIL_OUT | PFIL_WAITOK, - pfh_inet); + pfil_remove_hook(V_pf_ip4_in_hook); + pfil_remove_hook(V_pf_ip4_out_hook); #endif #ifdef INET6 - pfh_inet6 = pfil_head_get(PFIL_TYPE_AF, AF_INET6); - if (pfh_inet6 == NULL) - return (ESRCH); /* XXX */ - pfil_remove_hook_flags(pf_check6_in, NULL, PFIL_IN | PFIL_WAITOK, - pfh_inet6); - pfil_remove_hook_flags(pf_check6_out, NULL, PFIL_OUT | PFIL_WAITOK, - pfh_inet6); + pfil_remove_hook(V_pf_ip6_in_hook); + pfil_remove_hook(V_pf_ip6_out_hook); #endif V_pf_pfil_hooked = 0; From 12aec82c09464c8523cba1bfb1f612957d1c062d Mon Sep 17 00:00:00 2001 From: Brooks Davis Date: Thu, 31 Jan 2019 23:01:12 +0000 Subject: [PATCH 49/90] Remove iBCS2: also remove xenix syscall function support. Missed in r342243. --- sys/kern/makesyscalls.sh | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/sys/kern/makesyscalls.sh b/sys/kern/makesyscalls.sh index c794a5e87fbc..7d18fcabddf7 100644 --- a/sys/kern/makesyscalls.sh +++ b/sys/kern/makesyscalls.sh @@ -526,8 +526,7 @@ sed -e ' if (!flag("NOPROTO") && !flag("NODEF")) { if (funcname == "nosys" || funcname == "lkmnosys" || funcname == "sysarch" || funcname ~ /^freebsd/ || - funcname ~ /^linux/ || funcname ~ /^xenix/ || - funcname ~ /^cloudabi/) { + funcname ~ /^linux/ || funcname ~ /^cloudabi/) { printf("%s\t%s(struct thread *, struct %s *)", rettype, funcname, argalias) > sysdcl } else { @@ -546,8 +545,7 @@ sed -e ' } else { if (funcname == "nosys" || funcname == "sysarch" || funcname == "lkmnosys" || funcname ~ /^freebsd/ || - funcname ~ /^linux/ || funcname ~ /^xenix/ || - funcname ~ /^cloudabi/) { + funcname ~ /^linux/ || funcname ~ /^cloudabi/) { printf("%s, %s, NULL, 0, 0, %s, %s },", funcname, auditev, flags, thr_flag) > sysent column = column + length(funcname) + length(auditev) + length(flags) + 3 } else { From ab3cf2b4763d5efd1d18375c948fb86a159a3aa5 Mon Sep 17 00:00:00 2001 From: Bryan Drewery Date: Thu, 31 Jan 2019 23:21:18 +0000 Subject: [PATCH 50/90] Shar files may be seen as binary by grep. Suggest using -a to egrep to properly see executed commands. This is a minor improvement to the manpage. A better improvement would be removal or gigantic warnings. Sponsored by: Dell EMC MFC after: 1 week --- usr.bin/shar/shar.1 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/usr.bin/shar/shar.1 b/usr.bin/shar/shar.1 index 432ae6fb86bc..c3153821250c 100644 --- a/usr.bin/shar/shar.1 +++ b/usr.bin/shar/shar.1 @@ -28,7 +28,7 @@ .\" @(#)shar.1 8.1 (Berkeley) 6/6/93 .\" $FreeBSD$ .\" -.Dd June 6, 1993 +.Dd January 31, 2019 .Dt SHAR 1 .Os .Sh NAME @@ -103,5 +103,5 @@ Archives produced using this implementation of .Nm may be easily examined with the command: .Bd -literal -offset indent -egrep -v '^[X#]' shar.file +egrep -av '^[X#]' shar.file .Ed From 7722142ba8f368c484cc5220ef55e1a2675ca685 Mon Sep 17 00:00:00 2001 From: Marcelo Araujo Date: Thu, 31 Jan 2019 23:32:19 +0000 Subject: [PATCH 51/90] Mostly a cosmetic change to replace strlen with strnlen. Obtained from: Project ACRN MFC after: 2 weeks --- usr.sbin/bhyve/bhyverun.c | 4 ++-- usr.sbin/bhyve/smbiostbl.c | 4 ++-- usr.sbin/bhyve/usb_mouse.c | 3 ++- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/usr.sbin/bhyve/bhyverun.c b/usr.sbin/bhyve/bhyverun.c index 7be2b0287b8d..dc6e47a22f8d 100644 --- a/usr.sbin/bhyve/bhyverun.c +++ b/usr.sbin/bhyve/bhyverun.c @@ -233,8 +233,8 @@ usage(int code) " -W: force virtio to use single-vector MSI\n" " -x: local apic is in x2APIC mode\n" " -Y: disable MPtable generation\n", - progname, (int)strlen(progname), "", (int)strlen(progname), "", - (int)strlen(progname), ""); + progname, (int)strnlen(progname, PATH_MAX), "", (int)strnlen(progname, PATH_MAX), "", + (int)strnlen(progname, PATH_MAX), ""); exit(code); } diff --git a/usr.sbin/bhyve/smbiostbl.c b/usr.sbin/bhyve/smbiostbl.c index f9ee3adace24..27f960423755 100644 --- a/usr.sbin/bhyve/smbiostbl.c +++ b/usr.sbin/bhyve/smbiostbl.c @@ -558,7 +558,7 @@ smbios_generic_initializer(struct smbios_structure *template_entry, int len; string = template_strings[i]; - len = strlen(string) + 1; + len = strnlen(string, SMBIOS_MAX_LENGTH) + 1; memcpy(curaddr, string, len); curaddr += len; } @@ -611,7 +611,7 @@ smbios_type1_initializer(struct smbios_structure *template_entry, return (-1); MD5Init(&mdctx); - MD5Update(&mdctx, vmname, strlen(vmname)); + MD5Update(&mdctx, vmname, strnlen(vmname, PATH_MAX)); MD5Update(&mdctx, hostname, sizeof(hostname)); MD5Final(digest, &mdctx); diff --git a/usr.sbin/bhyve/usb_mouse.c b/usr.sbin/bhyve/usb_mouse.c index e61301207192..a81c9f8314f0 100644 --- a/usr.sbin/bhyve/usb_mouse.c +++ b/usr.sbin/bhyve/usb_mouse.c @@ -70,6 +70,7 @@ enum { UMSTR_MAX }; +#define UMOUSE_DESC_MAX_LEN 32 static const char *umouse_desc_strings[] = { "\x04\x09", "BHYVE", @@ -441,7 +442,7 @@ umouse_request(void *scarg, struct usb_data_xfer *xfer) goto done; } - slen = 2 + strlen(str) * 2; + slen = 2 + strnlen(str, UMOUSE_DESC_MAX_LEN) * 2; udata[0] = slen; udata[1] = UDESC_STRING; From 2790ca97d9f8b6c4e2b42b007fa7746ad187116a Mon Sep 17 00:00:00 2001 From: Gleb Smirnoff Date: Fri, 1 Feb 2019 00:33:17 +0000 Subject: [PATCH 52/90] Fix build without INET6. --- sys/netpfil/ipfw/ip_fw_pfil.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sys/netpfil/ipfw/ip_fw_pfil.c b/sys/netpfil/ipfw/ip_fw_pfil.c index 5a3cd052bb2d..25726bd4636e 100644 --- a/sys/netpfil/ipfw/ip_fw_pfil.c +++ b/sys/netpfil/ipfw/ip_fw_pfil.c @@ -533,10 +533,12 @@ ipfw_divert(struct mbuf **m0, int incoming, struct ipfw_rule_ref *rule, * attach or detach hooks for a given protocol family */ VNET_DEFINE_STATIC(pfil_hook_t, ipfw_inet_hook); -VNET_DEFINE_STATIC(pfil_hook_t, ipfw_inet6_hook); -VNET_DEFINE_STATIC(pfil_hook_t, ipfw_link_hook); #define V_ipfw_inet_hook VNET(ipfw_inet_hook) +#ifdef INET6 +VNET_DEFINE_STATIC(pfil_hook_t, ipfw_inet6_hook); #define V_ipfw_inet6_hook VNET(ipfw_inet6_hook) +#endif +VNET_DEFINE_STATIC(pfil_hook_t, ipfw_link_hook); #define V_ipfw_link_hook VNET(ipfw_link_hook) static int From 2e15db7bcd57af2130a939db29804e5751f25420 Mon Sep 17 00:00:00 2001 From: Gleb Smirnoff Date: Fri, 1 Feb 2019 00:34:18 +0000 Subject: [PATCH 53/90] Hopefully fix compilation by other compilers. --- sbin/pfilctl/pfilctl.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sbin/pfilctl/pfilctl.c b/sbin/pfilctl/pfilctl.c index 363feabca116..e360c73cb279 100644 --- a/sbin/pfilctl/pfilctl.c +++ b/sbin/pfilctl/pfilctl.c @@ -94,9 +94,8 @@ main(int argc __unused, char *argv[] __unused) static void help(void) { - extern char *__progname; - fprintf(stderr, "usage: %s (heads|hooks|link|unlink)\n", __progname); + fprintf(stderr, "usage: %s (heads|hooks|link|unlink)\n", getprogname()); exit(0); } From 4edc7f418a977965207b33fc8d5f20346cc22e8b Mon Sep 17 00:00:00 2001 From: Marcelo Araujo Date: Fri, 1 Feb 2019 03:09:11 +0000 Subject: [PATCH 54/90] Revert r343634: Mostly a cosmetic change to replace strlen with strnlen. Requested by: kib and imp --- usr.sbin/bhyve/bhyverun.c | 4 ++-- usr.sbin/bhyve/smbiostbl.c | 4 ++-- usr.sbin/bhyve/usb_mouse.c | 3 +-- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/usr.sbin/bhyve/bhyverun.c b/usr.sbin/bhyve/bhyverun.c index dc6e47a22f8d..7be2b0287b8d 100644 --- a/usr.sbin/bhyve/bhyverun.c +++ b/usr.sbin/bhyve/bhyverun.c @@ -233,8 +233,8 @@ usage(int code) " -W: force virtio to use single-vector MSI\n" " -x: local apic is in x2APIC mode\n" " -Y: disable MPtable generation\n", - progname, (int)strnlen(progname, PATH_MAX), "", (int)strnlen(progname, PATH_MAX), "", - (int)strnlen(progname, PATH_MAX), ""); + progname, (int)strlen(progname), "", (int)strlen(progname), "", + (int)strlen(progname), ""); exit(code); } diff --git a/usr.sbin/bhyve/smbiostbl.c b/usr.sbin/bhyve/smbiostbl.c index 27f960423755..f9ee3adace24 100644 --- a/usr.sbin/bhyve/smbiostbl.c +++ b/usr.sbin/bhyve/smbiostbl.c @@ -558,7 +558,7 @@ smbios_generic_initializer(struct smbios_structure *template_entry, int len; string = template_strings[i]; - len = strnlen(string, SMBIOS_MAX_LENGTH) + 1; + len = strlen(string) + 1; memcpy(curaddr, string, len); curaddr += len; } @@ -611,7 +611,7 @@ smbios_type1_initializer(struct smbios_structure *template_entry, return (-1); MD5Init(&mdctx); - MD5Update(&mdctx, vmname, strnlen(vmname, PATH_MAX)); + MD5Update(&mdctx, vmname, strlen(vmname)); MD5Update(&mdctx, hostname, sizeof(hostname)); MD5Final(digest, &mdctx); diff --git a/usr.sbin/bhyve/usb_mouse.c b/usr.sbin/bhyve/usb_mouse.c index a81c9f8314f0..e61301207192 100644 --- a/usr.sbin/bhyve/usb_mouse.c +++ b/usr.sbin/bhyve/usb_mouse.c @@ -70,7 +70,6 @@ enum { UMSTR_MAX }; -#define UMOUSE_DESC_MAX_LEN 32 static const char *umouse_desc_strings[] = { "\x04\x09", "BHYVE", @@ -442,7 +441,7 @@ umouse_request(void *scarg, struct usb_data_xfer *xfer) goto done; } - slen = 2 + strnlen(str, UMOUSE_DESC_MAX_LEN) * 2; + slen = 2 + strlen(str) * 2; udata[0] = slen; udata[1] = UDESC_STRING; From 647b6041448a95a3dae26fed40aa01ab3e9b7218 Mon Sep 17 00:00:00 2001 From: Gleb Smirnoff Date: Fri, 1 Feb 2019 07:48:37 +0000 Subject: [PATCH 55/90] Unbreak call to ipf_check(): it expects the out parameter to be 0 or 1. Pointy hat to: glebius Reported by: cy --- sys/contrib/ipfilter/netinet/ip_fil_freebsd.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sys/contrib/ipfilter/netinet/ip_fil_freebsd.c b/sys/contrib/ipfilter/netinet/ip_fil_freebsd.c index 292a119e2c43..dce75517dd63 100644 --- a/sys/contrib/ipfilter/netinet/ip_fil_freebsd.c +++ b/sys/contrib/ipfilter/netinet/ip_fil_freebsd.c @@ -135,8 +135,8 @@ ipf_check_wrapper(struct mbuf **mp, struct ifnet *ifp, int flags, pfil_return_t rv; CURVNET_SET(ifp->if_vnet); - rv = ipf_check(&V_ipfmain, ip, ip->ip_hl << 2, ifp, (flags & PFIL_OUT), - mp); + rv = ipf_check(&V_ipfmain, ip, ip->ip_hl << 2, ifp, + !!(flags & PFIL_OUT), mp); CURVNET_RESTORE(); return (rv == 0 ? PFIL_PASS : PFIL_DROPPED); } @@ -150,7 +150,7 @@ ipf_check_wrapper6(struct mbuf **mp, struct ifnet *ifp, int flags, CURVNET_SET(ifp->if_vnet); rv = ipf_check(&V_ipfmain, mtod(*mp, struct ip *), - sizeof(struct ip6_hdr), ifp, (flags & PFIL_OUT), mp); + sizeof(struct ip6_hdr), ifp, !!(flags & PFIL_OUT), mp); CURVNET_RESTORE(); return (rv == 0 ? PFIL_PASS : PFIL_DROPPED); From 547392731f87aed6405565dfc6ba9c4a9fee4d37 Mon Sep 17 00:00:00 2001 From: Gleb Smirnoff Date: Fri, 1 Feb 2019 08:10:26 +0000 Subject: [PATCH 56/90] Repair siftr(4): PFIL_IN and PFIL_OUT are defines of some value, relying on them having particular values can break things. --- sys/netinet/siftr.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/sys/netinet/siftr.c b/sys/netinet/siftr.c index 68d8f8e16fa1..c251fb44c8bc 100644 --- a/sys/netinet/siftr.c +++ b/sys/netinet/siftr.c @@ -172,8 +172,11 @@ static MALLOC_DEFINE(M_SIFTR_HASHNODE, "siftr_hashnode", struct pkt_node { /* Timestamp of pkt as noted in the pfil hook. */ struct timeval tval; - /* Direction pkt is travelling; either PFIL_IN or PFIL_OUT. */ - uint8_t direction; + /* Direction pkt is travelling. */ + enum { + DIR_IN = 0, + DIR_OUT = 1, + } direction; /* IP version pkt_node relates to; either INP_IPV4 or INP_IPV6. */ uint8_t ipver; /* Hash of the pkt which triggered the log message. */ @@ -286,11 +289,7 @@ static struct alq *siftr_alq = NULL; static struct mtx siftr_pkt_queue_mtx; static struct mtx siftr_pkt_mgr_mtx; static struct thread *siftr_pkt_manager_thr = NULL; -/* - * pfil.h defines PFIL_IN as 1 and PFIL_OUT as 2, - * which we use as an index into this array. - */ -static char direction[3] = {'\0', 'i','o'}; +static char direction[2] = {'i','o'}; /* Required function prototypes. */ static int siftr_sysctl_enabled_handler(SYSCTL_HANDLER_ARGS); @@ -409,7 +408,7 @@ siftr_process_pkt(struct pkt_node * pkt_node) LIST_INSERT_HEAD(counter_list, hash_node, nodes); } else { /* Malloc failed. */ - if (pkt_node->direction == PFIL_IN) + if (pkt_node->direction == DIR_IN) ss->nskip_in_malloc++; else ss->nskip_out_malloc++; @@ -812,7 +811,7 @@ siftr_siftdata(struct pkt_node *pn, struct inpcb *inp, struct tcpcb *tp, INP_RUNLOCK(inp); pn->ipver = ipver; - pn->direction = dir; + pn->direction = (dir == PFIL_IN ? DIR_IN : DIR_OUT); /* * Significantly more accurate than using getmicrotime(), but slower! From 116ef4d6e7c636ebe96f415332f01ab7ec4d0c7f Mon Sep 17 00:00:00 2001 From: Michael Tuexen Date: Fri, 1 Feb 2019 12:33:00 +0000 Subject: [PATCH 57/90] When handling SYN-ACK segments in the SYN-RCVD state, set tp->snd_wnd consistently. This inconsistency was observed when working on the bug reported in PR 235256, although it does not fix the reported issue. The fix for the PR will be a separate commit. PR: 235256 Reviewed by: rrs@, Richard Scheffenegger MFC after: 3 days Sponsored by: Netflix, Inc. Differential Revision: https://reviews.freebsd.org/D19033 --- sys/netinet/tcp_input.c | 2 +- sys/netinet/tcp_stacks/rack.c | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c index 39351d897f25..b2c4f66da9f0 100644 --- a/sys/netinet/tcp_input.c +++ b/sys/netinet/tcp_input.c @@ -2385,8 +2385,8 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) == (TF_RCVD_SCALE|TF_REQ_SCALE)) { tp->rcv_scale = tp->request_r_scale; - tp->snd_wnd = tiwin; } + tp->snd_wnd = tiwin; /* * Make transitions: * SYN-RECEIVED -> ESTABLISHED diff --git a/sys/netinet/tcp_stacks/rack.c b/sys/netinet/tcp_stacks/rack.c index 158d2cb3abf8..608b6e7e15ad 100644 --- a/sys/netinet/tcp_stacks/rack.c +++ b/sys/netinet/tcp_stacks/rack.c @@ -5433,6 +5433,7 @@ rack_do_syn_recv(struct mbuf *m, struct tcphdr *th, struct socket *so, tp->ts_recent_age = tcp_ts_getticks(); tp->ts_recent = to->to_tsval; } + tp->snd_wnd = tiwin; /* * If the ACK bit is off: if in SYN-RECEIVED state or SENDSYN flag * is on (half-synchronized state), then queue data for later @@ -5440,7 +5441,6 @@ rack_do_syn_recv(struct mbuf *m, struct tcphdr *th, struct socket *so, */ if ((thflags & TH_ACK) == 0) { if (IS_FASTOPEN(tp->t_flags)) { - tp->snd_wnd = tiwin; cc_conn_init(tp); } return (rack_process_data(m, th, so, tp, drop_hdrlen, tlen, @@ -5452,7 +5452,6 @@ rack_do_syn_recv(struct mbuf *m, struct tcphdr *th, struct socket *so, if ((tp->t_flags & (TF_RCVD_SCALE | TF_REQ_SCALE)) == (TF_RCVD_SCALE | TF_REQ_SCALE)) { tp->rcv_scale = tp->request_r_scale; - tp->snd_wnd = tiwin; } /* * Make transitions: SYN-RECEIVED -> ESTABLISHED SYN-RECEIVED* -> From 90bdbe955c57bbd54492ffb1adb1edc8f316439d Mon Sep 17 00:00:00 2001 From: Bruce Evans Date: Fri, 1 Feb 2019 16:07:49 +0000 Subject: [PATCH 58/90] Fix function keys for syscons in cons25 mode (vidcontrol -T cons25). kbd(4) (but only documented in atkbd(4)) maintains a table of strings for 96 function keys. Using teken broke this 9+ years ago for the most usable first 12 function keys and for 10 cursor keys, by supplying its own non-programmable strings so that the keyboard driver's strings are not used. Fix this by supplying NULL in the teken layer for syscons in cons25 mode so that the the strings are found in the kbd(4) layer. vt needs more changes to use kbd(4)'s tables. Teken's cons25 table is still needed to supply nonempty strings for vt in cons25 mode. Keep using teken's xterm tables for both syscons and vt in xterm mode. Function keys should at least default to xterm values in xterm mode, and kbd(4) doesn't support this. teken_set_cons25() sets a sticky flag to ask for the fix, and space is reserved for another new flag. vt should set this flag when it uses kbd(4)'s tables. PR: 226553 (for vt) --- sys/teken/teken.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/sys/teken/teken.c b/sys/teken/teken.c index bc3af434e2d9..f4e4c8a57a2d 100644 --- a/sys/teken/teken.c +++ b/sys/teken/teken.c @@ -58,6 +58,7 @@ #define TS_CONS25 0x0040 /* cons25 emulation. */ #define TS_INSTRING 0x0080 /* Inside string. */ #define TS_CURSORKEYS 0x0100 /* Cursor keys mode. */ +#define TS_CONS25KEYS 0x0400 /* Fuller cons25 emul (fix function keys). */ /* Character that blanks a cell. */ #define BLANK ' ' @@ -411,7 +412,7 @@ void teken_set_cons25(teken_t *t) { - t->t_stateflags |= TS_CONS25; + t->t_stateflags |= TS_CONS25 | TS_CONS25KEYS; } /* @@ -722,6 +723,9 @@ teken_get_sequence(const teken_t *t, unsigned int k) { /* Cons25 mode. */ + if ((t->t_stateflags & (TS_CONS25 | TS_CONS25KEYS)) == + (TS_CONS25 | TS_CONS25KEYS)) + return (NULL); /* Don't override good kbd(4) strings. */ if (t->t_stateflags & TS_CONS25 && k < sizeof special_strings_cons25 / sizeof(char *)) return (special_strings_cons25[k]); From 6b2779a02230e2270531e1697a48f076da7cebed Mon Sep 17 00:00:00 2001 From: Ed Maste Date: Fri, 1 Feb 2019 20:28:15 +0000 Subject: [PATCH 59/90] readelf: use table-based DT_FLAGS and DT_FLAGS_1 decoding Fewer lines of code and more maintainable. Reviewed by: brooks, kib MFC after: 1 week Sponsored by: The FreeBSD Foundation Differential Revision: https://reviews.freebsd.org/D19053 --- contrib/elftoolchain/readelf/readelf.c | 196 ++++++++----------------- 1 file changed, 60 insertions(+), 136 deletions(-) diff --git a/contrib/elftoolchain/readelf/readelf.c b/contrib/elftoolchain/readelf/readelf.c index 0b7106eb22c9..8507847c6548 100644 --- a/contrib/elftoolchain/readelf/readelf.c +++ b/contrib/elftoolchain/readelf/readelf.c @@ -220,6 +220,11 @@ struct mips_option { const char *desc; }; +struct flag_desc { + uint64_t flag; + const char *desc; +}; + static void add_dumpop(struct readelf *re, size_t si, const char *sn, int op, int t); static const char *aeabi_adv_simd_arch(uint64_t simd); @@ -293,8 +298,7 @@ static void dump_dwarf_ranges_foreach(struct readelf *re, Dwarf_Die die, static void dump_dwarf_str(struct readelf *re); static void dump_eflags(struct readelf *re, uint64_t e_flags); static void dump_elf(struct readelf *re); -static void dump_dt_flags_val(uint64_t d_val); -static void dump_dt_flags_1_val(uint64_t d_val); +static void dump_flags(struct flag_desc *fd, uint64_t flags); static void dump_dyn_val(struct readelf *re, GElf_Dyn *dyn, uint32_t stab); static void dump_dynamic(struct readelf *re); static void dump_liblist(struct readelf *re); @@ -2722,6 +2726,58 @@ dump_arch_dyn_val(struct readelf *re, GElf_Dyn *dyn) } } +static void +dump_flags(struct flag_desc *desc, uint64_t val) +{ + struct flag_desc *fd; + + for (fd = desc; fd->flag != 0; fd++) { + if (val & fd->flag) { + val &= ~fd->flag; + printf(" %s", fd->desc); + } + } + if (val != 0) + printf(" unknown (0x%jx)", (uintmax_t)val); +} + +static struct flag_desc dt_flags[] = { + { DF_ORIGIN, "ORIGIN" }, + { DF_SYMBOLIC, "SYMBOLIC" }, + { DF_TEXTREL, "TEXTREL" }, + { DF_BIND_NOW, "BIND_NOW" }, + { DF_STATIC_TLS, "STATIC_TLS" }, + { 0, NULL } +}; + +static struct flag_desc dt_flags_1[] = { + { DF_1_BIND_NOW, "NOW" }, + { DF_1_GLOBAL, "GLOBAL" }, + { 0x4, "GROUP" }, + { DF_1_NODELETE, "NODELETE" }, + { DF_1_LOADFLTR, "LOADFLTR" }, + { 0x20, "INITFIRST" }, + { DF_1_NOOPEN, "NOOPEN" }, + { DF_1_ORIGIN, "ORIGIN" }, + { 0x100, "DIRECT" }, + { DF_1_INTERPOSE, "INTERPOSE" }, + { DF_1_NODEFLIB, "NODEFLIB" }, + { 0x1000, "NODUMP" }, + { 0x2000, "CONFALT" }, + { 0x4000, "ENDFILTEE" }, + { 0x8000, "DISPRELDNE" }, + { 0x10000, "DISPRELPND" }, + { 0x20000, "NODIRECT" }, + { 0x40000, "IGNMULDEF" }, + { 0x80000, "NOKSYMS" }, + { 0x100000, "NOHDR" }, + { 0x200000, "EDITED" }, + { 0x400000, "NORELOC" }, + { 0x800000, "SYMINTPOSE" }, + { 0x1000000, "GLOBAUDIT" }, + { 0, NULL } +}; + static void dump_dyn_val(struct readelf *re, GElf_Dyn *dyn, uint32_t stab) { @@ -2807,148 +2863,16 @@ dump_dyn_val(struct readelf *re, GElf_Dyn *dyn, uint32_t stab) printf(" %s\n", timestamp(dyn->d_un.d_val)); break; case DT_FLAGS: - dump_dt_flags_val(dyn->d_un.d_val); + dump_flags(dt_flags, dyn->d_un.d_val); break; case DT_FLAGS_1: - dump_dt_flags_1_val(dyn->d_un.d_val); + dump_flags(dt_flags_1, dyn->d_un.d_val); break; default: printf("\n"); } } -static void -dump_dt_flags_val(uint64_t d_val) -{ - if (d_val & 0x1) { - d_val ^= 0x1; - printf(" ORIGIN"); - } - if (d_val & 0x2) { - d_val ^= 0x2; - printf(" SYMBOLIC"); - } - if (d_val & 0x4) { - d_val ^= 0x4; - printf(" TEXTREL"); - } - if (d_val & 0x8) { - d_val ^= 0x8; - printf(" BIND_NOW"); - } - if (d_val & 0x10) { - d_val ^= 0x10; - printf(" STATIC_TLS"); - } - if (d_val) - printf(" %jx", (uintmax_t)d_val); - printf("\n"); -} - -static void -dump_dt_flags_1_val(uint64_t d_val) -{ - if (d_val & 0x1) { - d_val ^= 0x1; - printf(" NOW"); - } - if (d_val & 0x2) { - d_val ^= 0x2; - printf(" GLOBAL"); - } - if (d_val & 0x4) { - d_val ^= 0x4; - printf(" GROUP"); - } - if (d_val & 0x8) { - d_val ^= 0x8; - printf(" NODELETE"); - } - if (d_val & 0x10) { - d_val ^= 0x10; - printf(" LOADFLTR"); - } - if (d_val & 0x20) { - d_val ^= 0x20; - printf(" INITFIRST"); - } - if (d_val & 0x40) { - d_val ^= 0x40; - printf(" NOOPEN"); - } - if (d_val & 0x80) { - d_val ^= 0x80; - printf(" ORIGIN"); - } - if (d_val & 0x100) { - d_val ^= 0x100; - printf(" DIRECT"); - } - if (d_val & 0x400) { - d_val ^= 0x400; - printf(" INTERPOSE"); - } - if (d_val & 0x800) { - d_val ^= 0x800; - printf(" NODEFLIB"); - } - if (d_val & 0x1000) { - d_val ^= 0x1000; - printf(" NODUMP"); - } - if (d_val & 0x2000) { - d_val ^= 0x2000; - printf(" CONFALT"); - } - if (d_val & 0x4000) { - d_val ^= 0x4000; - printf(" ENDFILTEE"); - } - if (d_val & 0x8000) { - d_val ^= 0x8000; - printf(" DISPRELDNE"); - } - if (d_val & 0x10000) { - d_val ^= 0x10000; - printf(" DISPRELPND"); - } - if (d_val & 0x20000) { - d_val ^= 0x20000; - printf(" NODIRECT"); - } - if (d_val & 0x40000) { - d_val ^= 0x40000; - printf(" IGNMULDEF"); - } - if (d_val & 0x80000) { - d_val ^= 0x80000; - printf(" NOKSYMS"); - } - if (d_val & 0x100000) { - d_val ^= 0x100000; - printf(" NOHDR"); - } - if (d_val & 0x200000) { - d_val ^= 0x200000; - printf(" EDITED"); - } - if (d_val & 0x400000) { - d_val ^= 0x400000; - printf(" NORELOC"); - } - if (d_val & 0x800000) { - d_val ^= 0x800000; - printf(" SYMINTPOSE"); - } - if (d_val & 0x1000000) { - d_val ^= 0x1000000; - printf(" GLOBAUDIT"); - } - if (d_val) - printf(" %jx", (uintmax_t)d_val); - printf("\n"); -} - static void dump_rel(struct readelf *re, struct section *s, Elf_Data *d) { From cb7c3f124a9b17d0f4200008d0661c239ec3520f Mon Sep 17 00:00:00 2001 From: Navdeep Parhar Date: Fri, 1 Feb 2019 20:42:49 +0000 Subject: [PATCH 60/90] cxgbe(4): Improved error reporting and diagnostics. "slow" interrupt handler: - Expand the list of INT_CAUSE registers known to the driver. - Add decode information for many more bits but decouple it from the rest of intr_info so that it is entirely optional. - Call t4_fatal_err exactly once, and from the top level PL intr handler. t4_fatal_err: - Use t4_shutdown_adapter from the common code to stop the adapter. - Stop servicing slow interrupts after the first fatal one. Driver/firmware interaction: - CH_DUMP_MBOX: note whether the mailbox being dumped is a command or a reply or something else. - Log the raw value of pcie_fw for some errors. - Use correct log levels (debug vs. error). Sponsored by: Chelsio Communications --- sys/dev/cxgbe/adapter.h | 53 +- sys/dev/cxgbe/common/common.h | 10 +- sys/dev/cxgbe/common/t4_hw.c | 1888 +++++++++++++++++++++------------ sys/dev/cxgbe/t4_main.c | 32 +- sys/dev/cxgbe/t4_sge.c | 6 +- 5 files changed, 1279 insertions(+), 710 deletions(-) diff --git a/sys/dev/cxgbe/adapter.h b/sys/dev/cxgbe/adapter.h index 6f9fc82ab9be..e7a890af69d7 100644 --- a/sys/dev/cxgbe/adapter.h +++ b/sys/dev/cxgbe/adapter.h @@ -155,7 +155,7 @@ enum { CHK_MBOX_ACCESS = (1 << 2), MASTER_PF = (1 << 3), ADAP_SYSCTL_CTX = (1 << 4), - /* TOM_INIT_DONE= (1 << 5), No longer used */ + ADAP_ERR = (1 << 5), BUF_PACKING_OK = (1 << 6), IS_VF = (1 << 7), @@ -175,6 +175,7 @@ enum { DF_LOAD_FW_ANYTIME = (1 << 1), /* Allow LOAD_FW after init */ DF_DISABLE_TCB_CACHE = (1 << 2), /* Disable TCB cache (T6+) */ DF_DISABLE_CFG_RETRY = (1 << 3), /* Disable fallback config */ + DF_VERBOSE_SLOWINTR = (1 << 4), /* Chatty slow intr handler */ }; #define IS_DOOMED(vi) ((vi)->flags & DOOMED) @@ -932,24 +933,6 @@ struct adapter { #define TXQ_LOCK_ASSERT_OWNED(txq) EQ_LOCK_ASSERT_OWNED(&(txq)->eq) #define TXQ_LOCK_ASSERT_NOTOWNED(txq) EQ_LOCK_ASSERT_NOTOWNED(&(txq)->eq) -#define CH_DUMP_MBOX(sc, mbox, data_reg) \ - do { \ - if (sc->debug_flags & DF_DUMP_MBOX) { \ - log(LOG_NOTICE, \ - "%s mbox %u: %016llx %016llx %016llx %016llx " \ - "%016llx %016llx %016llx %016llx\n", \ - device_get_nameunit(sc->dev), mbox, \ - (unsigned long long)t4_read_reg64(sc, data_reg), \ - (unsigned long long)t4_read_reg64(sc, data_reg + 8), \ - (unsigned long long)t4_read_reg64(sc, data_reg + 16), \ - (unsigned long long)t4_read_reg64(sc, data_reg + 24), \ - (unsigned long long)t4_read_reg64(sc, data_reg + 32), \ - (unsigned long long)t4_read_reg64(sc, data_reg + 40), \ - (unsigned long long)t4_read_reg64(sc, data_reg + 48), \ - (unsigned long long)t4_read_reg64(sc, data_reg + 56)); \ - } \ - } while (0) - #define for_each_txq(vi, iter, q) \ for (q = &vi->pi->adapter->sge.txq[vi->first_txq], iter = 0; \ iter < vi->ntxq; ++iter, ++q) @@ -1105,6 +1088,38 @@ t4_use_ldst(struct adapter *sc) #endif } +static inline void +CH_DUMP_MBOX(struct adapter *sc, int mbox, const int reg, + const char *msg, const __be64 *const p, const bool err) +{ + + if (!(sc->debug_flags & DF_DUMP_MBOX) && !err) + return; + if (p != NULL) { + log(err ? LOG_ERR : LOG_DEBUG, + "%s: mbox %u %s %016llx %016llx %016llx %016llx " + "%016llx %016llx %016llx %016llx\n", + device_get_nameunit(sc->dev), mbox, msg, + (long long)be64_to_cpu(p[0]), (long long)be64_to_cpu(p[1]), + (long long)be64_to_cpu(p[2]), (long long)be64_to_cpu(p[3]), + (long long)be64_to_cpu(p[4]), (long long)be64_to_cpu(p[5]), + (long long)be64_to_cpu(p[6]), (long long)be64_to_cpu(p[7])); + } else { + log(err ? LOG_ERR : LOG_DEBUG, + "%s: mbox %u %s %016llx %016llx %016llx %016llx " + "%016llx %016llx %016llx %016llx\n", + device_get_nameunit(sc->dev), mbox, msg, + (long long)t4_read_reg64(sc, reg), + (long long)t4_read_reg64(sc, reg + 8), + (long long)t4_read_reg64(sc, reg + 16), + (long long)t4_read_reg64(sc, reg + 24), + (long long)t4_read_reg64(sc, reg + 32), + (long long)t4_read_reg64(sc, reg + 40), + (long long)t4_read_reg64(sc, reg + 48), + (long long)t4_read_reg64(sc, reg + 56)); + } +} + /* t4_main.c */ extern int t4_ntxq; extern int t4_nrxq; diff --git a/sys/dev/cxgbe/common/common.h b/sys/dev/cxgbe/common/common.h index e072a6759a69..62694993237c 100644 --- a/sys/dev/cxgbe/common/common.h +++ b/sys/dev/cxgbe/common/common.h @@ -34,10 +34,6 @@ #include "t4_hw.h" -#define GLBL_INTR_MASK (F_CIM | F_MPS | F_PL | F_PCIE | F_MC0 | F_EDC0 | \ - F_EDC1 | F_LE | F_TP | F_MA | F_PM_TX | F_PM_RX | F_ULP_RX | \ - F_CPL_SWITCH | F_SGE | F_ULP_TX) - enum { MAX_NPORTS = 4, /* max # of ports */ SERNUM_LEN = 24, /* Serial # length */ @@ -581,7 +577,7 @@ struct fw_filter_wr; void t4_intr_enable(struct adapter *adapter); void t4_intr_disable(struct adapter *adapter); void t4_intr_clear(struct adapter *adapter); -int t4_slow_intr_handler(struct adapter *adapter); +int t4_slow_intr_handler(struct adapter *adapter, bool verbose); int t4_hash_mac_addr(const u8 *addr); int t4_link_l1cfg(struct adapter *adap, unsigned int mbox, unsigned int port, @@ -621,9 +617,7 @@ int t4_init_sge_params(struct adapter *adapter); int t4_init_tp_params(struct adapter *adap, bool sleep_ok); int t4_filter_field_shift(const struct adapter *adap, int filter_sel); int t4_port_init(struct adapter *adap, int mbox, int pf, int vf, int port_id); -void t4_fatal_err(struct adapter *adapter); -void t4_db_full(struct adapter *adapter); -void t4_db_dropped(struct adapter *adapter); +void t4_fatal_err(struct adapter *adapter, bool fw_error); int t4_set_trace_filter(struct adapter *adapter, const struct trace_params *tp, int filter_index, int enable); void t4_get_trace_filter(struct adapter *adapter, struct trace_params *tp, diff --git a/sys/dev/cxgbe/common/t4_hw.c b/sys/dev/cxgbe/common/t4_hw.c index 7116b38b1639..15b04194cc20 100644 --- a/sys/dev/cxgbe/common/t4_hw.c +++ b/sys/dev/cxgbe/common/t4_hw.c @@ -212,8 +212,8 @@ static void t4_report_fw_error(struct adapter *adap) pcie_fw = t4_read_reg(adap, A_PCIE_FW); if (pcie_fw & F_PCIE_FW_ERR) { - CH_ERR(adap, "Firmware reports adapter error: %s\n", - reason[G_PCIE_FW_EVAL(pcie_fw)]); + CH_ERR(adap, "firmware reports adapter error: %s (0x%08x)\n", + reason[G_PCIE_FW_EVAL(pcie_fw)], pcie_fw); adap->flags &= ~FW_OK; } } @@ -340,7 +340,6 @@ int t4_wr_mbox_meat_timeout(struct adapter *adap, int mbox, const void *cmd, u32 v; u64 res; int i, ms, delay_idx, ret, next_tx_check; - const __be64 *p = cmd; u32 data_reg = PF_REG(mbox, A_CIM_PF_MAILBOX_DATA); u32 ctl_reg = PF_REG(mbox, A_CIM_PF_MAILBOX_CTRL); u32 ctl; @@ -351,7 +350,7 @@ int t4_wr_mbox_meat_timeout(struct adapter *adap, int mbox, const void *cmd, if (adap->flags & CHK_MBOX_ACCESS) ASSERT_SYNCHRONIZED_OP(adap); - if ((size & 15) || size > MBOX_LEN) + if (size <= 0 || (size & 15) || size > MBOX_LEN) return -EINVAL; if (adap->flags & IS_VF) { @@ -381,8 +380,7 @@ int t4_wr_mbox_meat_timeout(struct adapter *adap, int mbox, const void *cmd, } /* - * If we were unable to gain access, dequeue ourselves from the - * mailbox atomic access list and report the error to our caller. + * If we were unable to gain access, report the error to our caller. */ if (v != X_MBOWNER_PL) { t4_report_fw_error(adap); @@ -398,23 +396,17 @@ int t4_wr_mbox_meat_timeout(struct adapter *adap, int mbox, const void *cmd, * presaged the firmware crashing ... */ if (ctl & F_MBMSGVALID) { - CH_ERR(adap, "found VALID command in mbox %u: %016llx %016llx " - "%016llx %016llx %016llx %016llx %016llx %016llx\n", - mbox, (unsigned long long)t4_read_reg64(adap, data_reg), - (unsigned long long)t4_read_reg64(adap, data_reg + 8), - (unsigned long long)t4_read_reg64(adap, data_reg + 16), - (unsigned long long)t4_read_reg64(adap, data_reg + 24), - (unsigned long long)t4_read_reg64(adap, data_reg + 32), - (unsigned long long)t4_read_reg64(adap, data_reg + 40), - (unsigned long long)t4_read_reg64(adap, data_reg + 48), - (unsigned long long)t4_read_reg64(adap, data_reg + 56)); + CH_DUMP_MBOX(adap, mbox, data_reg, "VLD", NULL, true); } /* * Copy in the new mailbox command and send it on its way ... */ - for (i = 0; i < size; i += 8, p++) - t4_write_reg64(adap, data_reg + i, be64_to_cpu(*p)); + memset(cmd_rpl, 0, sizeof(cmd_rpl)); + memcpy(cmd_rpl, cmd, size); + CH_DUMP_MBOX(adap, mbox, 0, "cmd", cmd_rpl, false); + for (i = 0; i < ARRAY_SIZE(cmd_rpl); i++) + t4_write_reg64(adap, data_reg + i * 8, be64_to_cpu(cmd_rpl[i])); if (adap->flags & IS_VF) { /* @@ -432,8 +424,6 @@ int t4_wr_mbox_meat_timeout(struct adapter *adap, int mbox, const void *cmd, t4_read_reg(adap, data_reg); } - CH_DUMP_MBOX(adap, mbox, data_reg); - t4_write_reg(adap, ctl_reg, F_MBMSGVALID | V_MBOWNER(X_MBOWNER_FW)); read_tx_state(adap, &tx_state[0]); /* also flushes the write_reg */ next_tx_check = 1000; @@ -480,10 +470,9 @@ int t4_wr_mbox_meat_timeout(struct adapter *adap, int mbox, const void *cmd, * Retrieve the command reply and release the mailbox. */ get_mbox_rpl(adap, cmd_rpl, MBOX_LEN/8, data_reg); + CH_DUMP_MBOX(adap, mbox, 0, "rpl", cmd_rpl, false); t4_write_reg(adap, ctl_reg, V_MBOWNER(X_MBOWNER_NONE)); - CH_DUMP_MBOX(adap, mbox, data_reg); - res = be64_to_cpu(cmd_rpl[0]); if (G_FW_CMD_OP(res >> 32) == FW_DEBUG_CMD) { fw_asrt(adap, (struct fw_debug_cmd *)cmd_rpl); @@ -500,26 +489,13 @@ int t4_wr_mbox_meat_timeout(struct adapter *adap, int mbox, const void *cmd, * errors ... */ ret = (pcie_fw & F_PCIE_FW_ERR) ? -ENXIO : -ETIMEDOUT; - CH_ERR(adap, "command %#x in mailbox %d timed out\n", - *(const u8 *)cmd, mbox); - - /* If DUMP_MBOX is set the mbox has already been dumped */ - if ((adap->debug_flags & DF_DUMP_MBOX) == 0) { - p = cmd; - CH_ERR(adap, "mbox: %016llx %016llx %016llx %016llx " - "%016llx %016llx %016llx %016llx\n", - (unsigned long long)be64_to_cpu(p[0]), - (unsigned long long)be64_to_cpu(p[1]), - (unsigned long long)be64_to_cpu(p[2]), - (unsigned long long)be64_to_cpu(p[3]), - (unsigned long long)be64_to_cpu(p[4]), - (unsigned long long)be64_to_cpu(p[5]), - (unsigned long long)be64_to_cpu(p[6]), - (unsigned long long)be64_to_cpu(p[7])); - } + CH_ERR(adap, "command %#x in mbox %d timed out (0x%08x).\n", + *(const u8 *)cmd, mbox, pcie_fw); + CH_DUMP_MBOX(adap, mbox, 0, "cmdsent", cmd_rpl, true); + CH_DUMP_MBOX(adap, mbox, data_reg, "current", NULL, true); t4_report_fw_error(adap); - t4_fatal_err(adap); + t4_fatal_err(adap, true); return ret; } @@ -3965,785 +3941,1330 @@ int t4_restart_aneg(struct adapter *adap, unsigned int mbox, unsigned int port) return t4_wr_mbox(adap, mbox, &c, sizeof(c), NULL); } -typedef void (*int_handler_t)(struct adapter *adap); - -struct intr_info { - unsigned int mask; /* bits to check in interrupt status */ - const char *msg; /* message to print or NULL */ - short stat_idx; /* stat counter to increment or -1 */ - unsigned short fatal; /* whether the condition reported is fatal */ - int_handler_t int_handler; /* platform-specific int handler */ +struct intr_details { + u32 mask; + const char *msg; }; -/** - * t4_handle_intr_status - table driven interrupt handler - * @adapter: the adapter that generated the interrupt - * @reg: the interrupt status register to process - * @acts: table of interrupt actions - * - * A table driven interrupt handler that applies a set of masks to an - * interrupt status word and performs the corresponding actions if the - * interrupts described by the mask have occurred. The actions include - * optionally emitting a warning or alert message. The table is terminated - * by an entry specifying mask 0. Returns the number of fatal interrupt - * conditions. - */ -static int t4_handle_intr_status(struct adapter *adapter, unsigned int reg, - const struct intr_info *acts) -{ - int fatal = 0; - unsigned int mask = 0; - unsigned int status = t4_read_reg(adapter, reg); +struct intr_action { + u32 mask; + int arg; + bool (*action)(struct adapter *, int, bool); +}; - for ( ; acts->mask; ++acts) { - if (!(status & acts->mask)) +struct intr_info { + const char *name; /* name of the INT_CAUSE register */ + int cause_reg; /* INT_CAUSE register */ + int enable_reg; /* INT_ENABLE register */ + u32 fatal; /* bits that are fatal */ + const struct intr_details *details; + const struct intr_action *actions; +}; + +static inline char +intr_alert_char(u32 cause, u32 enable, u32 fatal) +{ + + if (cause & fatal) + return ('!'); + if (cause & enable) + return ('*'); + return ('-'); +} + +static void +t4_show_intr_info(struct adapter *adap, const struct intr_info *ii, u32 cause) +{ + u32 enable, leftover; + const struct intr_details *details; + char alert; + + enable = t4_read_reg(adap, ii->enable_reg); + alert = intr_alert_char(cause, enable, ii->fatal); + CH_ALERT(adap, "%c %s 0x%x = 0x%08x, E 0x%08x, F 0x%08x\n", + alert, ii->name, ii->cause_reg, cause, enable, ii->fatal); + + leftover = cause; + for (details = ii->details; details && details->mask != 0; details++) { + u32 msgbits = details->mask & cause; + if (msgbits == 0) continue; - if (acts->fatal) { - fatal++; - CH_ALERT(adapter, "%s (0x%x)\n", acts->msg, - status & acts->mask); - } else if (acts->msg) - CH_WARN_RATELIMIT(adapter, "%s (0x%x)\n", acts->msg, - status & acts->mask); - if (acts->int_handler) - acts->int_handler(adapter); - mask |= acts->mask; + alert = intr_alert_char(msgbits, enable, ii->fatal); + CH_ALERT(adap, " %c [0x%08x] %s\n", alert, msgbits, + details->msg); + leftover &= ~msgbits; } - status &= mask; - if (status) /* clear processed interrupts */ - t4_write_reg(adapter, reg, status); - return fatal; + if (leftover != 0 && leftover != cause) + CH_ALERT(adap, " ? [0x%08x]\n", leftover); +} + +/* + * Returns true for fatal error. + */ +static bool +t4_handle_intr(struct adapter *adap, const struct intr_info *ii, + u32 additional_cause, bool verbose) +{ + u32 cause; + bool fatal; + const struct intr_action *action; + + /* read and display cause. */ + cause = t4_read_reg(adap, ii->cause_reg); + if (verbose || cause != 0) + t4_show_intr_info(adap, ii, cause); + fatal = (cause & ii->fatal) != 0; + cause |= additional_cause; + if (cause == 0) + return (false); + + for (action = ii->actions; action && action->mask != 0; action++) { + if (!(action->mask & cause)) + continue; + fatal |= (action->action)(adap, action->arg, verbose); + } + + /* clear */ + t4_write_reg(adap, ii->cause_reg, cause); + (void)t4_read_reg(adap, ii->cause_reg); + + return (fatal); } /* * Interrupt handler for the PCIE module. */ -static void pcie_intr_handler(struct adapter *adapter) +static bool pcie_intr_handler(struct adapter *adap, int arg, bool verbose) { - static const struct intr_info sysbus_intr_info[] = { - { F_RNPP, "RXNP array parity error", -1, 1 }, - { F_RPCP, "RXPC array parity error", -1, 1 }, - { F_RCIP, "RXCIF array parity error", -1, 1 }, - { F_RCCP, "Rx completions control array parity error", -1, 1 }, - { F_RFTP, "RXFT array parity error", -1, 1 }, + static const struct intr_details sysbus_intr_details[] = { + { F_RNPP, "RXNP array parity error" }, + { F_RPCP, "RXPC array parity error" }, + { F_RCIP, "RXCIF array parity error" }, + { F_RCCP, "Rx completions control array parity error" }, + { F_RFTP, "RXFT array parity error" }, { 0 } }; - static const struct intr_info pcie_port_intr_info[] = { - { F_TPCP, "TXPC array parity error", -1, 1 }, - { F_TNPP, "TXNP array parity error", -1, 1 }, - { F_TFTP, "TXFT array parity error", -1, 1 }, - { F_TCAP, "TXCA array parity error", -1, 1 }, - { F_TCIP, "TXCIF array parity error", -1, 1 }, - { F_RCAP, "RXCA array parity error", -1, 1 }, - { F_OTDD, "outbound request TLP discarded", -1, 1 }, - { F_RDPE, "Rx data parity error", -1, 1 }, - { F_TDUE, "Tx uncorrectable data error", -1, 1 }, + static const struct intr_info sysbus_intr_info = { + .name = "PCIE_CORE_UTL_SYSTEM_BUS_AGENT_STATUS", + .cause_reg = A_PCIE_CORE_UTL_SYSTEM_BUS_AGENT_STATUS, + .enable_reg = A_PCIE_CORE_UTL_SYSTEM_BUS_AGENT_INTERRUPT_ENABLE, + .fatal = F_RFTP | F_RCCP | F_RCIP | F_RPCP | F_RNPP, + .details = sysbus_intr_details, + .actions = NULL, + }; + static const struct intr_details pcie_port_intr_details[] = { + { F_TPCP, "TXPC array parity error" }, + { F_TNPP, "TXNP array parity error" }, + { F_TFTP, "TXFT array parity error" }, + { F_TCAP, "TXCA array parity error" }, + { F_TCIP, "TXCIF array parity error" }, + { F_RCAP, "RXCA array parity error" }, + { F_OTDD, "outbound request TLP discarded" }, + { F_RDPE, "Rx data parity error" }, + { F_TDUE, "Tx uncorrectable data error" }, { 0 } }; - static const struct intr_info pcie_intr_info[] = { - { F_MSIADDRLPERR, "MSI AddrL parity error", -1, 1 }, - { F_MSIADDRHPERR, "MSI AddrH parity error", -1, 1 }, - { F_MSIDATAPERR, "MSI data parity error", -1, 1 }, - { F_MSIXADDRLPERR, "MSI-X AddrL parity error", -1, 1 }, - { F_MSIXADDRHPERR, "MSI-X AddrH parity error", -1, 1 }, - { F_MSIXDATAPERR, "MSI-X data parity error", -1, 1 }, - { F_MSIXDIPERR, "MSI-X DI parity error", -1, 1 }, - { F_PIOCPLPERR, "PCI PIO completion FIFO parity error", -1, 1 }, - { F_PIOREQPERR, "PCI PIO request FIFO parity error", -1, 1 }, - { F_TARTAGPERR, "PCI PCI target tag FIFO parity error", -1, 1 }, - { F_CCNTPERR, "PCI CMD channel count parity error", -1, 1 }, - { F_CREQPERR, "PCI CMD channel request parity error", -1, 1 }, - { F_CRSPPERR, "PCI CMD channel response parity error", -1, 1 }, - { F_DCNTPERR, "PCI DMA channel count parity error", -1, 1 }, - { F_DREQPERR, "PCI DMA channel request parity error", -1, 1 }, - { F_DRSPPERR, "PCI DMA channel response parity error", -1, 1 }, - { F_HCNTPERR, "PCI HMA channel count parity error", -1, 1 }, - { F_HREQPERR, "PCI HMA channel request parity error", -1, 1 }, - { F_HRSPPERR, "PCI HMA channel response parity error", -1, 1 }, - { F_CFGSNPPERR, "PCI config snoop FIFO parity error", -1, 1 }, - { F_FIDPERR, "PCI FID parity error", -1, 1 }, - { F_INTXCLRPERR, "PCI INTx clear parity error", -1, 1 }, - { F_MATAGPERR, "PCI MA tag parity error", -1, 1 }, - { F_PIOTAGPERR, "PCI PIO tag parity error", -1, 1 }, - { F_RXCPLPERR, "PCI Rx completion parity error", -1, 1 }, - { F_RXWRPERR, "PCI Rx write parity error", -1, 1 }, - { F_RPLPERR, "PCI replay buffer parity error", -1, 1 }, - { F_PCIESINT, "PCI core secondary fault", -1, 1 }, - { F_PCIEPINT, "PCI core primary fault", -1, 1 }, - { F_UNXSPLCPLERR, "PCI unexpected split completion error", -1, - 0 }, + static const struct intr_info pcie_port_intr_info = { + .name = "PCIE_CORE_UTL_PCI_EXPRESS_PORT_STATUS", + .cause_reg = A_PCIE_CORE_UTL_PCI_EXPRESS_PORT_STATUS, + .enable_reg = A_PCIE_CORE_UTL_PCI_EXPRESS_PORT_INTERRUPT_ENABLE, + .fatal = F_TPCP | F_TNPP | F_TFTP | F_TCAP | F_TCIP | F_RCAP | + F_OTDD | F_RDPE | F_TDUE, + .details = pcie_port_intr_details, + .actions = NULL, + }; + static const struct intr_details pcie_intr_details[] = { + { F_MSIADDRLPERR, "MSI AddrL parity error" }, + { F_MSIADDRHPERR, "MSI AddrH parity error" }, + { F_MSIDATAPERR, "MSI data parity error" }, + { F_MSIXADDRLPERR, "MSI-X AddrL parity error" }, + { F_MSIXADDRHPERR, "MSI-X AddrH parity error" }, + { F_MSIXDATAPERR, "MSI-X data parity error" }, + { F_MSIXDIPERR, "MSI-X DI parity error" }, + { F_PIOCPLPERR, "PCIe PIO completion FIFO parity error" }, + { F_PIOREQPERR, "PCIe PIO request FIFO parity error" }, + { F_TARTAGPERR, "PCIe target tag FIFO parity error" }, + { F_CCNTPERR, "PCIe CMD channel count parity error" }, + { F_CREQPERR, "PCIe CMD channel request parity error" }, + { F_CRSPPERR, "PCIe CMD channel response parity error" }, + { F_DCNTPERR, "PCIe DMA channel count parity error" }, + { F_DREQPERR, "PCIe DMA channel request parity error" }, + { F_DRSPPERR, "PCIe DMA channel response parity error" }, + { F_HCNTPERR, "PCIe HMA channel count parity error" }, + { F_HREQPERR, "PCIe HMA channel request parity error" }, + { F_HRSPPERR, "PCIe HMA channel response parity error" }, + { F_CFGSNPPERR, "PCIe config snoop FIFO parity error" }, + { F_FIDPERR, "PCIe FID parity error" }, + { F_INTXCLRPERR, "PCIe INTx clear parity error" }, + { F_MATAGPERR, "PCIe MA tag parity error" }, + { F_PIOTAGPERR, "PCIe PIO tag parity error" }, + { F_RXCPLPERR, "PCIe Rx completion parity error" }, + { F_RXWRPERR, "PCIe Rx write parity error" }, + { F_RPLPERR, "PCIe replay buffer parity error" }, + { F_PCIESINT, "PCIe core secondary fault" }, + { F_PCIEPINT, "PCIe core primary fault" }, + { F_UNXSPLCPLERR, "PCIe unexpected split completion error" }, { 0 } }; + static const struct intr_details t5_pcie_intr_details[] = { + { F_IPGRPPERR, "Parity errors observed by IP" }, + { F_NONFATALERR, "PCIe non-fatal error" }, + { F_READRSPERR, "Outbound read error" }, + { F_TRGT1GRPPERR, "PCIe TRGT1 group FIFOs parity error" }, + { F_IPSOTPERR, "PCIe IP SOT buffer SRAM parity error" }, + { F_IPRETRYPERR, "PCIe IP replay buffer parity error" }, + { F_IPRXDATAGRPPERR, "PCIe IP Rx data group SRAMs parity error" }, + { F_IPRXHDRGRPPERR, "PCIe IP Rx header group SRAMs parity error" }, + { F_PIOTAGQPERR, "PIO tag queue FIFO parity error" }, + { F_MAGRPPERR, "MA group FIFO parity error" }, + { F_VFIDPERR, "VFID SRAM parity error" }, + { F_FIDPERR, "FID SRAM parity error" }, + { F_CFGSNPPERR, "config snoop FIFO parity error" }, + { F_HRSPPERR, "HMA channel response data SRAM parity error" }, + { F_HREQRDPERR, "HMA channel read request SRAM parity error" }, + { F_HREQWRPERR, "HMA channel write request SRAM parity error" }, + { F_DRSPPERR, "DMA channel response data SRAM parity error" }, + { F_DREQRDPERR, "DMA channel write request SRAM parity error" }, + { F_CRSPPERR, "CMD channel response data SRAM parity error" }, + { F_CREQRDPERR, "CMD channel read request SRAM parity error" }, + { F_MSTTAGQPERR, "PCIe master tag queue SRAM parity error" }, + { F_TGTTAGQPERR, "PCIe target tag queue FIFO parity error" }, + { F_PIOREQGRPPERR, "PIO request group FIFOs parity error" }, + { F_PIOCPLGRPPERR, "PIO completion group FIFOs parity error" }, + { F_MSIXDIPERR, "MSI-X DI SRAM parity error" }, + { F_MSIXDATAPERR, "MSI-X data SRAM parity error" }, + { F_MSIXADDRHPERR, "MSI-X AddrH SRAM parity error" }, + { F_MSIXADDRLPERR, "MSI-X AddrL SRAM parity error" }, + { F_MSIXSTIPERR, "MSI-X STI SRAM parity error" }, + { F_MSTTIMEOUTPERR, "Master timeout FIFO parity error" }, + { F_MSTGRPPERR, "Master response read queue SRAM parity error" }, + { 0 } + }; + struct intr_info pcie_intr_info = { + .name = "PCIE_INT_CAUSE", + .cause_reg = A_PCIE_INT_CAUSE, + .enable_reg = A_PCIE_INT_ENABLE, + .fatal = 0, + .details = NULL, + .actions = NULL, + }; + bool fatal = false; - static const struct intr_info t5_pcie_intr_info[] = { - { F_MSTGRPPERR, "Master Response Read Queue parity error", - -1, 1 }, - { F_MSTTIMEOUTPERR, "Master Timeout FIFO parity error", -1, 1 }, - { F_MSIXSTIPERR, "MSI-X STI SRAM parity error", -1, 1 }, - { F_MSIXADDRLPERR, "MSI-X AddrL parity error", -1, 1 }, - { F_MSIXADDRHPERR, "MSI-X AddrH parity error", -1, 1 }, - { F_MSIXDATAPERR, "MSI-X data parity error", -1, 1 }, - { F_MSIXDIPERR, "MSI-X DI parity error", -1, 1 }, - { F_PIOCPLGRPPERR, "PCI PIO completion Group FIFO parity error", - -1, 1 }, - { F_PIOREQGRPPERR, "PCI PIO request Group FIFO parity error", - -1, 1 }, - { F_TARTAGPERR, "PCI PCI target tag FIFO parity error", -1, 1 }, - { F_MSTTAGQPERR, "PCI master tag queue parity error", -1, 1 }, - { F_CREQPERR, "PCI CMD channel request parity error", -1, 1 }, - { F_CRSPPERR, "PCI CMD channel response parity error", -1, 1 }, - { F_DREQWRPERR, "PCI DMA channel write request parity error", - -1, 1 }, - { F_DREQPERR, "PCI DMA channel request parity error", -1, 1 }, - { F_DRSPPERR, "PCI DMA channel response parity error", -1, 1 }, - { F_HREQWRPERR, "PCI HMA channel count parity error", -1, 1 }, - { F_HREQPERR, "PCI HMA channel request parity error", -1, 1 }, - { F_HRSPPERR, "PCI HMA channel response parity error", -1, 1 }, - { F_CFGSNPPERR, "PCI config snoop FIFO parity error", -1, 1 }, - { F_FIDPERR, "PCI FID parity error", -1, 1 }, - { F_VFIDPERR, "PCI INTx clear parity error", -1, 1 }, - { F_MAGRPPERR, "PCI MA group FIFO parity error", -1, 1 }, - { F_PIOTAGPERR, "PCI PIO tag parity error", -1, 1 }, - { F_IPRXHDRGRPPERR, "PCI IP Rx header group parity error", - -1, 1 }, - { F_IPRXDATAGRPPERR, "PCI IP Rx data group parity error", - -1, 1 }, - { F_RPLPERR, "PCI IP replay buffer parity error", -1, 1 }, - { F_IPSOTPERR, "PCI IP SOT buffer parity error", -1, 1 }, - { F_TRGT1GRPPERR, "PCI TRGT1 group FIFOs parity error", -1, 1 }, - { F_READRSPERR, "Outbound read error", -1, - 0 }, - { 0 } - }; + if (is_t4(adap)) { + fatal |= t4_handle_intr(adap, &sysbus_intr_info, 0, verbose); + fatal |= t4_handle_intr(adap, &pcie_port_intr_info, 0, verbose); - int fat; + pcie_intr_info.fatal = 0x3fffffc0; + pcie_intr_info.details = pcie_intr_details; + } else { + pcie_intr_info.fatal = is_t5(adap) ? 0xbfffff40 : 0x9fffff40; + pcie_intr_info.details = t5_pcie_intr_details; + } + fatal |= t4_handle_intr(adap, &pcie_intr_info, 0, verbose); - if (is_t4(adapter)) - fat = t4_handle_intr_status(adapter, - A_PCIE_CORE_UTL_SYSTEM_BUS_AGENT_STATUS, - sysbus_intr_info) + - t4_handle_intr_status(adapter, - A_PCIE_CORE_UTL_PCI_EXPRESS_PORT_STATUS, - pcie_port_intr_info) + - t4_handle_intr_status(adapter, A_PCIE_INT_CAUSE, - pcie_intr_info); - else - fat = t4_handle_intr_status(adapter, A_PCIE_INT_CAUSE, - t5_pcie_intr_info); - if (fat) - t4_fatal_err(adapter); + return (fatal); } /* * TP interrupt handler. */ -static void tp_intr_handler(struct adapter *adapter) +static bool tp_intr_handler(struct adapter *adap, int arg, bool verbose) { - static const struct intr_info tp_intr_info[] = { - { 0x3fffffff, "TP parity error", -1, 1 }, - { F_FLMTXFLSTEMPTY, "TP out of Tx pages", -1, 1 }, + static const struct intr_details tp_intr_details[] = { + { 0x3fffffff, "TP parity error" }, + { F_FLMTXFLSTEMPTY, "TP out of Tx pages" }, { 0 } }; + static const struct intr_info tp_intr_info = { + .name = "TP_INT_CAUSE", + .cause_reg = A_TP_INT_CAUSE, + .enable_reg = A_TP_INT_ENABLE, + .fatal = 0x7fffffff, + .details = tp_intr_details, + .actions = NULL, + }; - if (t4_handle_intr_status(adapter, A_TP_INT_CAUSE, tp_intr_info)) - t4_fatal_err(adapter); + return (t4_handle_intr(adap, &tp_intr_info, 0, verbose)); } /* * SGE interrupt handler. */ -static void sge_intr_handler(struct adapter *adapter) +static bool sge_intr_handler(struct adapter *adap, int arg, bool verbose) { - u64 v; - u32 err; - - static const struct intr_info sge_intr_info[] = { + static const struct intr_info sge_int1_info = { + .name = "SGE_INT_CAUSE1", + .cause_reg = A_SGE_INT_CAUSE1, + .enable_reg = A_SGE_INT_ENABLE1, + .fatal = 0xffffffff, + .details = NULL, + .actions = NULL, + }; + static const struct intr_info sge_int2_info = { + .name = "SGE_INT_CAUSE2", + .cause_reg = A_SGE_INT_CAUSE2, + .enable_reg = A_SGE_INT_ENABLE2, + .fatal = 0xffffffff, + .details = NULL, + .actions = NULL, + }; + static const struct intr_details sge_int3_details[] = { + { F_ERR_FLM_DBP, + "DBP pointer delivery for invalid context or QID" }, + { F_ERR_FLM_IDMA1 | F_ERR_FLM_IDMA0, + "Invalid QID or header request by IDMA" }, + { F_ERR_FLM_HINT, "FLM hint is for invalid context or QID" }, + { F_ERR_PCIE_ERROR3, "SGE PCIe error for DBP thread 3" }, + { F_ERR_PCIE_ERROR2, "SGE PCIe error for DBP thread 2" }, + { F_ERR_PCIE_ERROR1, "SGE PCIe error for DBP thread 1" }, + { F_ERR_PCIE_ERROR0, "SGE PCIe error for DBP thread 0" }, + { F_ERR_TIMER_ABOVE_MAX_QID, + "SGE GTS with timer 0-5 for IQID > 1023" }, { F_ERR_CPL_EXCEED_IQE_SIZE, - "SGE received CPL exceeding IQE size", -1, 1 }, - { F_ERR_INVALID_CIDX_INC, - "SGE GTS CIDX increment too large", -1, 0 }, - { F_ERR_CPL_OPCODE_0, "SGE received 0-length CPL", -1, 0 }, - { F_DBFIFO_LP_INT, NULL, -1, 0, t4_db_full }, + "SGE received CPL exceeding IQE size" }, + { F_ERR_INVALID_CIDX_INC, "SGE GTS CIDX increment too large" }, + { F_ERR_ITP_TIME_PAUSED, "SGE ITP error" }, + { F_ERR_CPL_OPCODE_0, "SGE received 0-length CPL" }, + { F_ERR_DROPPED_DB, "SGE DB dropped" }, { F_ERR_DATA_CPL_ON_HIGH_QID1 | F_ERR_DATA_CPL_ON_HIGH_QID0, - "SGE IQID > 1023 received CPL for FL", -1, 0 }, - { F_ERR_BAD_DB_PIDX3, "SGE DBP 3 pidx increment too large", -1, - 0 }, - { F_ERR_BAD_DB_PIDX2, "SGE DBP 2 pidx increment too large", -1, - 0 }, - { F_ERR_BAD_DB_PIDX1, "SGE DBP 1 pidx increment too large", -1, - 0 }, - { F_ERR_BAD_DB_PIDX0, "SGE DBP 0 pidx increment too large", -1, - 0 }, + "SGE IQID > 1023 received CPL for FL" }, + { F_ERR_BAD_DB_PIDX3 | F_ERR_BAD_DB_PIDX2 | F_ERR_BAD_DB_PIDX1 | + F_ERR_BAD_DB_PIDX0, "SGE DBP pidx increment too large" }, + { F_ERR_ING_PCIE_CHAN, "SGE Ingress PCIe channel mismatch" }, { F_ERR_ING_CTXT_PRIO, - "SGE too many priority ingress contexts", -1, 0 }, - { F_INGRESS_SIZE_ERR, "SGE illegal ingress QID", -1, 0 }, - { F_EGRESS_SIZE_ERR, "SGE illegal egress QID", -1, 0 }, - { F_ERR_PCIE_ERROR0 | F_ERR_PCIE_ERROR1 | - F_ERR_PCIE_ERROR2 | F_ERR_PCIE_ERROR3, - "SGE PCIe error for a DBP thread", -1, 0 }, - { 0 } - }; - - static const struct intr_info t4t5_sge_intr_info[] = { - { F_ERR_DROPPED_DB, NULL, -1, 0, t4_db_dropped }, - { F_DBFIFO_HP_INT, NULL, -1, 0, t4_db_full }, + "Ingress context manager priority user error" }, { F_ERR_EGR_CTXT_PRIO, - "SGE too many priority egress contexts", -1, 0 }, + "Egress context manager priority user error" }, + { F_DBFIFO_HP_INT, "High priority DB FIFO threshold reached" }, + { F_DBFIFO_LP_INT, "Low priority DB FIFO threshold reached" }, + { F_REG_ADDRESS_ERR, "Undefined SGE register accessed" }, + { F_INGRESS_SIZE_ERR, "SGE illegal ingress QID" }, + { F_EGRESS_SIZE_ERR, "SGE illegal egress QID" }, + { 0x0000000f, "SGE context access for invalid queue" }, { 0 } }; - - /* - * For now, treat below interrupts as fatal so that we disable SGE and - * get better debug */ - static const struct intr_info t6_sge_intr_info[] = { + static const struct intr_details t6_sge_int3_details[] = { + { F_ERR_FLM_DBP, + "DBP pointer delivery for invalid context or QID" }, + { F_ERR_FLM_IDMA1 | F_ERR_FLM_IDMA0, + "Invalid QID or header request by IDMA" }, + { F_ERR_FLM_HINT, "FLM hint is for invalid context or QID" }, + { F_ERR_PCIE_ERROR3, "SGE PCIe error for DBP thread 3" }, + { F_ERR_PCIE_ERROR2, "SGE PCIe error for DBP thread 2" }, + { F_ERR_PCIE_ERROR1, "SGE PCIe error for DBP thread 1" }, + { F_ERR_PCIE_ERROR0, "SGE PCIe error for DBP thread 0" }, + { F_ERR_TIMER_ABOVE_MAX_QID, + "SGE GTS with timer 0-5 for IQID > 1023" }, + { F_ERR_CPL_EXCEED_IQE_SIZE, + "SGE received CPL exceeding IQE size" }, + { F_ERR_INVALID_CIDX_INC, "SGE GTS CIDX increment too large" }, + { F_ERR_ITP_TIME_PAUSED, "SGE ITP error" }, + { F_ERR_CPL_OPCODE_0, "SGE received 0-length CPL" }, + { F_ERR_DROPPED_DB, "SGE DB dropped" }, + { F_ERR_DATA_CPL_ON_HIGH_QID1 | F_ERR_DATA_CPL_ON_HIGH_QID0, + "SGE IQID > 1023 received CPL for FL" }, + { F_ERR_BAD_DB_PIDX3 | F_ERR_BAD_DB_PIDX2 | F_ERR_BAD_DB_PIDX1 | + F_ERR_BAD_DB_PIDX0, "SGE DBP pidx increment too large" }, + { F_ERR_ING_PCIE_CHAN, "SGE Ingress PCIe channel mismatch" }, + { F_ERR_ING_CTXT_PRIO, + "Ingress context manager priority user error" }, + { F_ERR_EGR_CTXT_PRIO, + "Egress context manager priority user error" }, + { F_DBP_TBUF_FULL, "SGE DBP tbuf full" }, { F_FATAL_WRE_LEN, - "SGE Actual WRE packet is less than advertized length", - -1, 1 }, + "SGE WRE packet less than advertized length" }, + { F_REG_ADDRESS_ERR, "Undefined SGE register accessed" }, + { F_INGRESS_SIZE_ERR, "SGE illegal ingress QID" }, + { F_EGRESS_SIZE_ERR, "SGE illegal egress QID" }, + { 0x0000000f, "SGE context access for invalid queue" }, { 0 } }; + struct intr_info sge_int3_info = { + .name = "SGE_INT_CAUSE3", + .cause_reg = A_SGE_INT_CAUSE3, + .enable_reg = A_SGE_INT_ENABLE3, + .fatal = F_ERR_CPL_EXCEED_IQE_SIZE, + .details = NULL, + .actions = NULL, + }; + static const struct intr_info sge_int4_info = { + .name = "SGE_INT_CAUSE4", + .cause_reg = A_SGE_INT_CAUSE4, + .enable_reg = A_SGE_INT_ENABLE4, + .fatal = 0, + .details = NULL, + .actions = NULL, + }; + static const struct intr_info sge_int5_info = { + .name = "SGE_INT_CAUSE5", + .cause_reg = A_SGE_INT_CAUSE5, + .enable_reg = A_SGE_INT_ENABLE5, + .fatal = 0xffffffff, + .details = NULL, + .actions = NULL, + }; + static const struct intr_info sge_int6_info = { + .name = "SGE_INT_CAUSE6", + .cause_reg = A_SGE_INT_CAUSE6, + .enable_reg = A_SGE_INT_ENABLE6, + .fatal = 0, + .details = NULL, + .actions = NULL, + }; - v = (u64)t4_read_reg(adapter, A_SGE_INT_CAUSE1) | - ((u64)t4_read_reg(adapter, A_SGE_INT_CAUSE2) << 32); - if (v) { - CH_ALERT(adapter, "SGE parity error (%#llx)\n", - (unsigned long long)v); - t4_write_reg(adapter, A_SGE_INT_CAUSE1, v); - t4_write_reg(adapter, A_SGE_INT_CAUSE2, v >> 32); + bool fatal; + u32 v; + + if (chip_id(adap) <= CHELSIO_T5) { + sge_int3_info.details = sge_int3_details; + } else { + sge_int3_info.details = t6_sge_int3_details; } - v |= t4_handle_intr_status(adapter, A_SGE_INT_CAUSE3, sge_intr_info); - if (chip_id(adapter) <= CHELSIO_T5) - v |= t4_handle_intr_status(adapter, A_SGE_INT_CAUSE3, - t4t5_sge_intr_info); - else - v |= t4_handle_intr_status(adapter, A_SGE_INT_CAUSE3, - t6_sge_intr_info); + fatal = false; + fatal |= t4_handle_intr(adap, &sge_int1_info, 0, verbose); + fatal |= t4_handle_intr(adap, &sge_int2_info, 0, verbose); + fatal |= t4_handle_intr(adap, &sge_int3_info, 0, verbose); + fatal |= t4_handle_intr(adap, &sge_int4_info, 0, verbose); + if (chip_id(adap) >= CHELSIO_T5) + fatal |= t4_handle_intr(adap, &sge_int5_info, 0, verbose); + if (chip_id(adap) >= CHELSIO_T6) + fatal |= t4_handle_intr(adap, &sge_int6_info, 0, verbose); - err = t4_read_reg(adapter, A_SGE_ERROR_STATS); - if (err & F_ERROR_QID_VALID) { - CH_ERR(adapter, "SGE error for queue %u\n", G_ERROR_QID(err)); - if (err & F_UNCAPTURED_ERROR) - CH_ERR(adapter, "SGE UNCAPTURED_ERROR set (clearing)\n"); - t4_write_reg(adapter, A_SGE_ERROR_STATS, F_ERROR_QID_VALID | - F_UNCAPTURED_ERROR); + v = t4_read_reg(adap, A_SGE_ERROR_STATS); + if (v & F_ERROR_QID_VALID) { + CH_ERR(adap, "SGE error for QID %u\n", G_ERROR_QID(v)); + if (v & F_UNCAPTURED_ERROR) + CH_ERR(adap, "SGE UNCAPTURED_ERROR set (clearing)\n"); + t4_write_reg(adap, A_SGE_ERROR_STATS, + F_ERROR_QID_VALID | F_UNCAPTURED_ERROR); } - if (v != 0) - t4_fatal_err(adapter); + return (fatal); } -#define CIM_OBQ_INTR (F_OBQULP0PARERR | F_OBQULP1PARERR | F_OBQULP2PARERR |\ - F_OBQULP3PARERR | F_OBQSGEPARERR | F_OBQNCSIPARERR) -#define CIM_IBQ_INTR (F_IBQTP0PARERR | F_IBQTP1PARERR | F_IBQULPPARERR |\ - F_IBQSGEHIPARERR | F_IBQSGELOPARERR | F_IBQNCSIPARERR) - /* * CIM interrupt handler. */ -static void cim_intr_handler(struct adapter *adapter) +static bool cim_intr_handler(struct adapter *adap, int arg, bool verbose) { - static const struct intr_info cim_intr_info[] = { - { F_PREFDROPINT, "CIM control register prefetch drop", -1, 1 }, - { CIM_OBQ_INTR, "CIM OBQ parity error", -1, 1 }, - { CIM_IBQ_INTR, "CIM IBQ parity error", -1, 1 }, - { F_MBUPPARERR, "CIM mailbox uP parity error", -1, 1 }, - { F_MBHOSTPARERR, "CIM mailbox host parity error", -1, 1 }, - { F_TIEQINPARERRINT, "CIM TIEQ outgoing parity error", -1, 1 }, - { F_TIEQOUTPARERRINT, "CIM TIEQ incoming parity error", -1, 1 }, - { F_TIMER0INT, "CIM TIMER0 interrupt", -1, 1 }, - { 0 } + static const struct intr_details cim_host_intr_details[] = { + /* T6+ */ + { F_PCIE2CIMINTFPARERR, "CIM IBQ PCIe interface parity error" }, + + /* T5+ */ + { F_MA_CIM_INTFPERR, "MA2CIM interface parity error" }, + { F_PLCIM_MSTRSPDATAPARERR, + "PL2CIM master response data parity error" }, + { F_NCSI2CIMINTFPARERR, "CIM IBQ NC-SI interface parity error" }, + { F_SGE2CIMINTFPARERR, "CIM IBQ SGE interface parity error" }, + { F_ULP2CIMINTFPARERR, "CIM IBQ ULP_TX interface parity error" }, + { F_TP2CIMINTFPARERR, "CIM IBQ TP interface parity error" }, + { F_OBQSGERX1PARERR, "CIM OBQ SGE1_RX parity error" }, + { F_OBQSGERX0PARERR, "CIM OBQ SGE0_RX parity error" }, + + /* T4+ */ + { F_TIEQOUTPARERRINT, "CIM TIEQ outgoing FIFO parity error" }, + { F_TIEQINPARERRINT, "CIM TIEQ incoming FIFO parity error" }, + { F_MBHOSTPARERR, "CIM mailbox host read parity error" }, + { F_MBUPPARERR, "CIM mailbox uP parity error" }, + { F_IBQTP0PARERR, "CIM IBQ TP0 parity error" }, + { F_IBQTP1PARERR, "CIM IBQ TP1 parity error" }, + { F_IBQULPPARERR, "CIM IBQ ULP parity error" }, + { F_IBQSGELOPARERR, "CIM IBQ SGE_LO parity error" }, + { F_IBQSGEHIPARERR | F_IBQPCIEPARERR, /* same bit */ + "CIM IBQ PCIe/SGE_HI parity error" }, + { F_IBQNCSIPARERR, "CIM IBQ NC-SI parity error" }, + { F_OBQULP0PARERR, "CIM OBQ ULP0 parity error" }, + { F_OBQULP1PARERR, "CIM OBQ ULP1 parity error" }, + { F_OBQULP2PARERR, "CIM OBQ ULP2 parity error" }, + { F_OBQULP3PARERR, "CIM OBQ ULP3 parity error" }, + { F_OBQSGEPARERR, "CIM OBQ SGE parity error" }, + { F_OBQNCSIPARERR, "CIM OBQ NC-SI parity error" }, + { F_TIMER1INT, "CIM TIMER0 interrupt" }, + { F_TIMER0INT, "CIM TIMER0 interrupt" }, + { F_PREFDROPINT, "CIM control register prefetch drop" }, + { 0} }; - static const struct intr_info cim_upintr_info[] = { - { F_RSVDSPACEINT, "CIM reserved space access", -1, 1 }, - { F_ILLTRANSINT, "CIM illegal transaction", -1, 1 }, - { F_ILLWRINT, "CIM illegal write", -1, 1 }, - { F_ILLRDINT, "CIM illegal read", -1, 1 }, - { F_ILLRDBEINT, "CIM illegal read BE", -1, 1 }, - { F_ILLWRBEINT, "CIM illegal write BE", -1, 1 }, - { F_SGLRDBOOTINT, "CIM single read from boot space", -1, 1 }, - { F_SGLWRBOOTINT, "CIM single write to boot space", -1, 1 }, - { F_BLKWRBOOTINT, "CIM block write to boot space", -1, 1 }, - { F_SGLRDFLASHINT, "CIM single read from flash space", -1, 1 }, - { F_SGLWRFLASHINT, "CIM single write to flash space", -1, 1 }, - { F_BLKWRFLASHINT, "CIM block write to flash space", -1, 1 }, - { F_SGLRDEEPROMINT, "CIM single EEPROM read", -1, 1 }, - { F_SGLWREEPROMINT, "CIM single EEPROM write", -1, 1 }, - { F_BLKRDEEPROMINT, "CIM block EEPROM read", -1, 1 }, - { F_BLKWREEPROMINT, "CIM block EEPROM write", -1, 1 }, - { F_SGLRDCTLINT , "CIM single read from CTL space", -1, 1 }, - { F_SGLWRCTLINT , "CIM single write to CTL space", -1, 1 }, - { F_BLKRDCTLINT , "CIM block read from CTL space", -1, 1 }, - { F_BLKWRCTLINT , "CIM block write to CTL space", -1, 1 }, - { F_SGLRDPLINT , "CIM single read from PL space", -1, 1 }, - { F_SGLWRPLINT , "CIM single write to PL space", -1, 1 }, - { F_BLKRDPLINT , "CIM block read from PL space", -1, 1 }, - { F_BLKWRPLINT , "CIM block write to PL space", -1, 1 }, - { F_REQOVRLOOKUPINT , "CIM request FIFO overwrite", -1, 1 }, - { F_RSPOVRLOOKUPINT , "CIM response FIFO overwrite", -1, 1 }, - { F_TIMEOUTINT , "CIM PIF timeout", -1, 1 }, - { F_TIMEOUTMAINT , "CIM PIF MA timeout", -1, 1 }, - { 0 } + struct intr_info cim_host_intr_info = { + .name = "CIM_HOST_INT_CAUSE", + .cause_reg = A_CIM_HOST_INT_CAUSE, + .enable_reg = A_CIM_HOST_INT_ENABLE, + .fatal = 0, + .details = cim_host_intr_details, + .actions = NULL, + }; + static const struct intr_details cim_host_upacc_intr_details[] = { + { F_EEPROMWRINT, "CIM EEPROM came out of busy state" }, + { F_TIMEOUTMAINT, "CIM PIF MA timeout" }, + { F_TIMEOUTINT, "CIM PIF timeout" }, + { F_RSPOVRLOOKUPINT, "CIM response FIFO overwrite" }, + { F_REQOVRLOOKUPINT, "CIM request FIFO overwrite" }, + { F_BLKWRPLINT, "CIM block write to PL space" }, + { F_BLKRDPLINT, "CIM block read from PL space" }, + { F_SGLWRPLINT, + "CIM single write to PL space with illegal BEs" }, + { F_SGLRDPLINT, + "CIM single read from PL space with illegal BEs" }, + { F_BLKWRCTLINT, "CIM block write to CTL space" }, + { F_BLKRDCTLINT, "CIM block read from CTL space" }, + { F_SGLWRCTLINT, + "CIM single write to CTL space with illegal BEs" }, + { F_SGLRDCTLINT, + "CIM single read from CTL space with illegal BEs" }, + { F_BLKWREEPROMINT, "CIM block write to EEPROM space" }, + { F_BLKRDEEPROMINT, "CIM block read from EEPROM space" }, + { F_SGLWREEPROMINT, + "CIM single write to EEPROM space with illegal BEs" }, + { F_SGLRDEEPROMINT, + "CIM single read from EEPROM space with illegal BEs" }, + { F_BLKWRFLASHINT, "CIM block write to flash space" }, + { F_BLKRDFLASHINT, "CIM block read from flash space" }, + { F_SGLWRFLASHINT, "CIM single write to flash space" }, + { F_SGLRDFLASHINT, + "CIM single read from flash space with illegal BEs" }, + { F_BLKWRBOOTINT, "CIM block write to boot space" }, + { F_BLKRDBOOTINT, "CIM block read from boot space" }, + { F_SGLWRBOOTINT, "CIM single write to boot space" }, + { F_SGLRDBOOTINT, + "CIM single read from boot space with illegal BEs" }, + { F_ILLWRBEINT, "CIM illegal write BEs" }, + { F_ILLRDBEINT, "CIM illegal read BEs" }, + { F_ILLRDINT, "CIM illegal read" }, + { F_ILLWRINT, "CIM illegal write" }, + { F_ILLTRANSINT, "CIM illegal transaction" }, + { F_RSVDSPACEINT, "CIM reserved space access" }, + {0} + }; + static const struct intr_info cim_host_upacc_intr_info = { + .name = "CIM_HOST_UPACC_INT_CAUSE", + .cause_reg = A_CIM_HOST_UPACC_INT_CAUSE, + .enable_reg = A_CIM_HOST_UPACC_INT_ENABLE, + .fatal = 0x3fffeeff, + .details = cim_host_upacc_intr_details, + .actions = NULL, + }; + static const struct intr_info cim_pf_host_intr_info = { + .name = "CIM_PF_HOST_INT_CAUSE", + .cause_reg = MYPF_REG(A_CIM_PF_HOST_INT_CAUSE), + .enable_reg = MYPF_REG(A_CIM_PF_HOST_INT_ENABLE), + .fatal = 0, + .details = NULL, + .actions = NULL, }; u32 val, fw_err; - int fat; + bool fatal; - fw_err = t4_read_reg(adapter, A_PCIE_FW); + fw_err = t4_read_reg(adap, A_PCIE_FW); if (fw_err & F_PCIE_FW_ERR) - t4_report_fw_error(adapter); + t4_report_fw_error(adap); - /* When the Firmware detects an internal error which normally wouldn't + /* + * When the Firmware detects an internal error which normally wouldn't * raise a Host Interrupt, it forces a CIM Timer0 interrupt in order * to make sure the Host sees the Firmware Crash. So if we have a * Timer0 interrupt and don't see a Firmware Crash, ignore the Timer0 * interrupt. */ - val = t4_read_reg(adapter, A_CIM_HOST_INT_CAUSE); - if (val & F_TIMER0INT) - if (!(fw_err & F_PCIE_FW_ERR) || - (G_PCIE_FW_EVAL(fw_err) != PCIE_FW_EVAL_CRASH)) - t4_write_reg(adapter, A_CIM_HOST_INT_CAUSE, - F_TIMER0INT); + val = t4_read_reg(adap, A_CIM_HOST_INT_CAUSE); + if (val & F_TIMER0INT && (!(fw_err & F_PCIE_FW_ERR) || + G_PCIE_FW_EVAL(fw_err) != PCIE_FW_EVAL_CRASH)) { + t4_write_reg(adap, A_CIM_HOST_INT_CAUSE, F_TIMER0INT); + } - fat = t4_handle_intr_status(adapter, A_CIM_HOST_INT_CAUSE, - cim_intr_info) + - t4_handle_intr_status(adapter, A_CIM_HOST_UPACC_INT_CAUSE, - cim_upintr_info); - if (fat) - t4_fatal_err(adapter); + fatal = false; + if (is_t4(adap)) + cim_host_intr_info.fatal = 0x001fffe2; + else if (is_t5(adap)) + cim_host_intr_info.fatal = 0x007dffe2; + else + cim_host_intr_info.fatal = 0x007dffe6; + fatal |= t4_handle_intr(adap, &cim_host_intr_info, 0, verbose); + fatal |= t4_handle_intr(adap, &cim_host_upacc_intr_info, 0, verbose); + fatal |= t4_handle_intr(adap, &cim_pf_host_intr_info, 0, verbose); + + return (fatal); } /* * ULP RX interrupt handler. */ -static void ulprx_intr_handler(struct adapter *adapter) +static bool ulprx_intr_handler(struct adapter *adap, int arg, bool verbose) { - static const struct intr_info ulprx_intr_info[] = { - { F_CAUSE_CTX_1, "ULPRX channel 1 context error", -1, 1 }, - { F_CAUSE_CTX_0, "ULPRX channel 0 context error", -1, 1 }, - { 0x7fffff, "ULPRX parity error", -1, 1 }, + static const struct intr_details ulprx_intr_details[] = { + /* T5+ */ + { F_SE_CNT_MISMATCH_1, "ULPRX SE count mismatch in channel 1" }, + { F_SE_CNT_MISMATCH_0, "ULPRX SE count mismatch in channel 0" }, + + /* T4+ */ + { F_CAUSE_CTX_1, "ULPRX channel 1 context error" }, + { F_CAUSE_CTX_0, "ULPRX channel 0 context error" }, + { 0x007fffff, "ULPRX parity error" }, { 0 } }; + static const struct intr_info ulprx_intr_info = { + .name = "ULP_RX_INT_CAUSE", + .cause_reg = A_ULP_RX_INT_CAUSE, + .enable_reg = A_ULP_RX_INT_ENABLE, + .fatal = 0x07ffffff, + .details = ulprx_intr_details, + .actions = NULL, + }; + static const struct intr_info ulprx_intr2_info = { + .name = "ULP_RX_INT_CAUSE_2", + .cause_reg = A_ULP_RX_INT_CAUSE_2, + .enable_reg = A_ULP_RX_INT_ENABLE_2, + .fatal = 0, + .details = NULL, + .actions = NULL, + }; + bool fatal = false; - if (t4_handle_intr_status(adapter, A_ULP_RX_INT_CAUSE, ulprx_intr_info)) - t4_fatal_err(adapter); + fatal |= t4_handle_intr(adap, &ulprx_intr_info, 0, verbose); + fatal |= t4_handle_intr(adap, &ulprx_intr2_info, 0, verbose); + + return (fatal); } /* * ULP TX interrupt handler. */ -static void ulptx_intr_handler(struct adapter *adapter) +static bool ulptx_intr_handler(struct adapter *adap, int arg, bool verbose) { - static const struct intr_info ulptx_intr_info[] = { - { F_PBL_BOUND_ERR_CH3, "ULPTX channel 3 PBL out of bounds", -1, - 0 }, - { F_PBL_BOUND_ERR_CH2, "ULPTX channel 2 PBL out of bounds", -1, - 0 }, - { F_PBL_BOUND_ERR_CH1, "ULPTX channel 1 PBL out of bounds", -1, - 0 }, - { F_PBL_BOUND_ERR_CH0, "ULPTX channel 0 PBL out of bounds", -1, - 0 }, - { 0xfffffff, "ULPTX parity error", -1, 1 }, + static const struct intr_details ulptx_intr_details[] = { + { F_PBL_BOUND_ERR_CH3, "ULPTX channel 3 PBL out of bounds" }, + { F_PBL_BOUND_ERR_CH2, "ULPTX channel 2 PBL out of bounds" }, + { F_PBL_BOUND_ERR_CH1, "ULPTX channel 1 PBL out of bounds" }, + { F_PBL_BOUND_ERR_CH0, "ULPTX channel 0 PBL out of bounds" }, + { 0x0fffffff, "ULPTX parity error" }, { 0 } }; + static const struct intr_info ulptx_intr_info = { + .name = "ULP_TX_INT_CAUSE", + .cause_reg = A_ULP_TX_INT_CAUSE, + .enable_reg = A_ULP_TX_INT_ENABLE, + .fatal = 0x0fffffff, + .details = ulptx_intr_details, + .actions = NULL, + }; + static const struct intr_info ulptx_intr2_info = { + .name = "ULP_TX_INT_CAUSE_2", + .cause_reg = A_ULP_TX_INT_CAUSE_2, + .enable_reg = A_ULP_TX_INT_ENABLE_2, + .fatal = 0, + .details = NULL, + .actions = NULL, + }; + bool fatal = false; - if (t4_handle_intr_status(adapter, A_ULP_TX_INT_CAUSE, ulptx_intr_info)) - t4_fatal_err(adapter); + fatal |= t4_handle_intr(adap, &ulptx_intr_info, 0, verbose); + fatal |= t4_handle_intr(adap, &ulptx_intr2_info, 0, verbose); + + return (fatal); +} + +static bool pmtx_dump_dbg_stats(struct adapter *adap, int arg, bool verbose) +{ + int i; + u32 data[17]; + + t4_read_indirect(adap, A_PM_TX_DBG_CTRL, A_PM_TX_DBG_DATA, &data[0], + ARRAY_SIZE(data), A_PM_TX_DBG_STAT0); + for (i = 0; i < ARRAY_SIZE(data); i++) { + CH_ALERT(adap, " - PM_TX_DBG_STAT%u (0x%x) = 0x%08x\n", i, + A_PM_TX_DBG_STAT0 + i, data[i]); + } + + return (false); } /* * PM TX interrupt handler. */ -static void pmtx_intr_handler(struct adapter *adapter) +static bool pmtx_intr_handler(struct adapter *adap, int arg, bool verbose) { - static const struct intr_info pmtx_intr_info[] = { - { F_PCMD_LEN_OVFL0, "PMTX channel 0 pcmd too large", -1, 1 }, - { F_PCMD_LEN_OVFL1, "PMTX channel 1 pcmd too large", -1, 1 }, - { F_PCMD_LEN_OVFL2, "PMTX channel 2 pcmd too large", -1, 1 }, - { F_ZERO_C_CMD_ERROR, "PMTX 0-length pcmd", -1, 1 }, - { 0xffffff0, "PMTX framing error", -1, 1 }, - { F_OESPI_PAR_ERROR, "PMTX oespi parity error", -1, 1 }, - { F_DB_OPTIONS_PAR_ERROR, "PMTX db_options parity error", -1, - 1 }, - { F_ICSPI_PAR_ERROR, "PMTX icspi parity error", -1, 1 }, - { F_C_PCMD_PAR_ERROR, "PMTX c_pcmd parity error", -1, 1}, + static const struct intr_action pmtx_intr_actions[] = { + { 0xffffffff, 0, pmtx_dump_dbg_stats }, + { 0 }, + }; + static const struct intr_details pmtx_intr_details[] = { + { F_PCMD_LEN_OVFL0, "PMTX channel 0 pcmd too large" }, + { F_PCMD_LEN_OVFL1, "PMTX channel 1 pcmd too large" }, + { F_PCMD_LEN_OVFL2, "PMTX channel 2 pcmd too large" }, + { F_ZERO_C_CMD_ERROR, "PMTX 0-length pcmd" }, + { 0x0f000000, "PMTX icspi FIFO2X Rx framing error" }, + { 0x00f00000, "PMTX icspi FIFO Rx framing error" }, + { 0x000f0000, "PMTX icspi FIFO Tx framing error" }, + { 0x0000f000, "PMTX oespi FIFO Rx framing error" }, + { 0x00000f00, "PMTX oespi FIFO Tx framing error" }, + { 0x000000f0, "PMTX oespi FIFO2X Tx framing error" }, + { F_OESPI_PAR_ERROR, "PMTX oespi parity error" }, + { F_DB_OPTIONS_PAR_ERROR, "PMTX db_options parity error" }, + { F_ICSPI_PAR_ERROR, "PMTX icspi parity error" }, + { F_C_PCMD_PAR_ERROR, "PMTX c_pcmd parity error" }, { 0 } }; + static const struct intr_info pmtx_intr_info = { + .name = "PM_TX_INT_CAUSE", + .cause_reg = A_PM_TX_INT_CAUSE, + .enable_reg = A_PM_TX_INT_ENABLE, + .fatal = 0xffffffff, + .details = pmtx_intr_details, + .actions = pmtx_intr_actions, + }; - if (t4_handle_intr_status(adapter, A_PM_TX_INT_CAUSE, pmtx_intr_info)) - t4_fatal_err(adapter); + return (t4_handle_intr(adap, &pmtx_intr_info, 0, verbose)); } /* * PM RX interrupt handler. */ -static void pmrx_intr_handler(struct adapter *adapter) +static bool pmrx_intr_handler(struct adapter *adap, int arg, bool verbose) { - static const struct intr_info pmrx_intr_info[] = { - { F_ZERO_E_CMD_ERROR, "PMRX 0-length pcmd", -1, 1 }, - { 0x3ffff0, "PMRX framing error", -1, 1 }, - { F_OCSPI_PAR_ERROR, "PMRX ocspi parity error", -1, 1 }, - { F_DB_OPTIONS_PAR_ERROR, "PMRX db_options parity error", -1, - 1 }, - { F_IESPI_PAR_ERROR, "PMRX iespi parity error", -1, 1 }, - { F_E_PCMD_PAR_ERROR, "PMRX e_pcmd parity error", -1, 1}, + static const struct intr_details pmrx_intr_details[] = { + /* T6+ */ + { 0x18000000, "PMRX ospi overflow" }, + { F_MA_INTF_SDC_ERR, "PMRX MA interface SDC parity error" }, + { F_BUNDLE_LEN_PARERR, "PMRX bundle len FIFO parity error" }, + { F_BUNDLE_LEN_OVFL, "PMRX bundle len FIFO overflow" }, + { F_SDC_ERR, "PMRX SDC error" }, + + /* T4+ */ + { F_ZERO_E_CMD_ERROR, "PMRX 0-length pcmd" }, + { 0x003c0000, "PMRX iespi FIFO2X Rx framing error" }, + { 0x0003c000, "PMRX iespi Rx framing error" }, + { 0x00003c00, "PMRX iespi Tx framing error" }, + { 0x00000300, "PMRX ocspi Rx framing error" }, + { 0x000000c0, "PMRX ocspi Tx framing error" }, + { 0x00000030, "PMRX ocspi FIFO2X Tx framing error" }, + { F_OCSPI_PAR_ERROR, "PMRX ocspi parity error" }, + { F_DB_OPTIONS_PAR_ERROR, "PMRX db_options parity error" }, + { F_IESPI_PAR_ERROR, "PMRX iespi parity error" }, + { F_E_PCMD_PAR_ERROR, "PMRX e_pcmd parity error"}, { 0 } }; + static const struct intr_info pmrx_intr_info = { + .name = "PM_RX_INT_CAUSE", + .cause_reg = A_PM_RX_INT_CAUSE, + .enable_reg = A_PM_RX_INT_ENABLE, + .fatal = 0x1fffffff, + .details = pmrx_intr_details, + .actions = NULL, + }; - if (t4_handle_intr_status(adapter, A_PM_RX_INT_CAUSE, pmrx_intr_info)) - t4_fatal_err(adapter); + return (t4_handle_intr(adap, &pmrx_intr_info, 0, verbose)); } /* * CPL switch interrupt handler. */ -static void cplsw_intr_handler(struct adapter *adapter) +static bool cplsw_intr_handler(struct adapter *adap, int arg, bool verbose) { - static const struct intr_info cplsw_intr_info[] = { - { F_CIM_OP_MAP_PERR, "CPLSW CIM op_map parity error", -1, 1 }, - { F_CIM_OVFL_ERROR, "CPLSW CIM overflow", -1, 1 }, - { F_TP_FRAMING_ERROR, "CPLSW TP framing error", -1, 1 }, - { F_SGE_FRAMING_ERROR, "CPLSW SGE framing error", -1, 1 }, - { F_CIM_FRAMING_ERROR, "CPLSW CIM framing error", -1, 1 }, - { F_ZERO_SWITCH_ERROR, "CPLSW no-switch error", -1, 1 }, + static const struct intr_details cplsw_intr_details[] = { + /* T5+ */ + { F_PERR_CPL_128TO128_1, "CPLSW 128TO128 FIFO1 parity error" }, + { F_PERR_CPL_128TO128_0, "CPLSW 128TO128 FIFO0 parity error" }, + + /* T4+ */ + { F_CIM_OP_MAP_PERR, "CPLSW CIM op_map parity error" }, + { F_CIM_OVFL_ERROR, "CPLSW CIM overflow" }, + { F_TP_FRAMING_ERROR, "CPLSW TP framing error" }, + { F_SGE_FRAMING_ERROR, "CPLSW SGE framing error" }, + { F_CIM_FRAMING_ERROR, "CPLSW CIM framing error" }, + { F_ZERO_SWITCH_ERROR, "CPLSW no-switch error" }, { 0 } }; + struct intr_info cplsw_intr_info = { + .name = "CPL_INTR_CAUSE", + .cause_reg = A_CPL_INTR_CAUSE, + .enable_reg = A_CPL_INTR_ENABLE, + .fatal = 0, + .details = cplsw_intr_details, + .actions = NULL, + }; - if (t4_handle_intr_status(adapter, A_CPL_INTR_CAUSE, cplsw_intr_info)) - t4_fatal_err(adapter); + if (is_t4(adap)) + cplsw_intr_info.fatal = 0x2f; + else if (is_t5(adap)) + cplsw_intr_info.fatal = 0xef; + else + cplsw_intr_info.fatal = 0xff; + + return (t4_handle_intr(adap, &cplsw_intr_info, 0, verbose)); } +#define T4_LE_FATAL_MASK (F_PARITYERR | F_UNKNOWNCMD | F_REQQPARERR) +#define T6_LE_PERRCRC_MASK (F_PIPELINEERR | F_CLIPTCAMACCFAIL | \ + F_SRVSRAMACCFAIL | F_CLCAMCRCPARERR | F_CLCAMINTPERR | F_SSRAMINTPERR | \ + F_SRVSRAMPERR | F_VFSRAMPERR | F_TCAMINTPERR | F_TCAMCRCERR | \ + F_HASHTBLMEMACCERR | F_MAIFWRINTPERR | F_HASHTBLMEMCRCERR) +#define T6_LE_FATAL_MASK (T6_LE_PERRCRC_MASK | F_T6_UNKNOWNCMD | \ + F_TCAMACCFAIL | F_HASHTBLACCFAIL | F_CMDTIDERR | F_CMDPRSRINTERR | \ + F_TOTCNTERR | F_CLCAMFIFOERR | F_CLIPSUBERR) + /* * LE interrupt handler. */ -static void le_intr_handler(struct adapter *adap) +static bool le_intr_handler(struct adapter *adap, int arg, bool verbose) { - unsigned int chip_ver = chip_id(adap); - static const struct intr_info le_intr_info[] = { - { F_LIPMISS, "LE LIP miss", -1, 0 }, - { F_LIP0, "LE 0 LIP error", -1, 0 }, - { F_PARITYERR, "LE parity error", -1, 1 }, - { F_UNKNOWNCMD, "LE unknown command", -1, 1 }, - { F_REQQPARERR, "LE request queue parity error", -1, 1 }, + static const struct intr_details le_intr_details[] = { + { F_REQQPARERR, "LE request queue parity error" }, + { F_UNKNOWNCMD, "LE unknown command" }, + { F_ACTRGNFULL, "LE active region full" }, + { F_PARITYERR, "LE parity error" }, + { F_LIPMISS, "LE LIP miss" }, + { F_LIP0, "LE 0 LIP error" }, { 0 } }; - - static const struct intr_info t6_le_intr_info[] = { - { F_T6_LIPMISS, "LE LIP miss", -1, 0 }, - { F_T6_LIP0, "LE 0 LIP error", -1, 0 }, - { F_TCAMINTPERR, "LE parity error", -1, 1 }, - { F_T6_UNKNOWNCMD, "LE unknown command", -1, 1 }, - { F_SSRAMINTPERR, "LE request queue parity error", -1, 1 }, + static const struct intr_details t6_le_intr_details[] = { + { F_CLIPSUBERR, "LE CLIP CAM reverse substitution error" }, + { F_CLCAMFIFOERR, "LE CLIP CAM internal FIFO error" }, + { F_CTCAMINVLDENT, "Invalid IPv6 CLIP TCAM entry" }, + { F_TCAMINVLDENT, "Invalid IPv6 TCAM entry" }, + { F_TOTCNTERR, "LE total active < TCAM count" }, + { F_CMDPRSRINTERR, "LE internal error in parser" }, + { F_CMDTIDERR, "Incorrect tid in LE command" }, + { F_T6_ACTRGNFULL, "LE active region full" }, + { F_T6_ACTCNTIPV6TZERO, "LE IPv6 active open TCAM counter -ve" }, + { F_T6_ACTCNTIPV4TZERO, "LE IPv4 active open TCAM counter -ve" }, + { F_T6_ACTCNTIPV6ZERO, "LE IPv6 active open counter -ve" }, + { F_T6_ACTCNTIPV4ZERO, "LE IPv4 active open counter -ve" }, + { F_HASHTBLACCFAIL, "Hash table read error (proto conflict)" }, + { F_TCAMACCFAIL, "LE TCAM access failure" }, + { F_T6_UNKNOWNCMD, "LE unknown command" }, + { F_T6_LIP0, "LE found 0 LIP during CLIP substitution" }, + { F_T6_LIPMISS, "LE CLIP lookup miss" }, + { T6_LE_PERRCRC_MASK, "LE parity/CRC error" }, { 0 } }; + struct intr_info le_intr_info = { + .name = "LE_DB_INT_CAUSE", + .cause_reg = A_LE_DB_INT_CAUSE, + .enable_reg = A_LE_DB_INT_ENABLE, + .fatal = 0, + .details = NULL, + .actions = NULL, + }; - if (t4_handle_intr_status(adap, A_LE_DB_INT_CAUSE, - (chip_ver <= CHELSIO_T5) ? - le_intr_info : t6_le_intr_info)) - t4_fatal_err(adap); + if (chip_id(adap) <= CHELSIO_T5) { + le_intr_info.details = le_intr_details; + le_intr_info.fatal = T4_LE_FATAL_MASK; + if (is_t5(adap)) + le_intr_info.fatal |= F_VFPARERR; + } else { + le_intr_info.details = t6_le_intr_details; + le_intr_info.fatal = T6_LE_FATAL_MASK; + } + + return (t4_handle_intr(adap, &le_intr_info, 0, verbose)); } /* * MPS interrupt handler. */ -static void mps_intr_handler(struct adapter *adapter) +static bool mps_intr_handler(struct adapter *adap, int arg, bool verbose) { - static const struct intr_info mps_rx_intr_info[] = { - { 0xffffff, "MPS Rx parity error", -1, 1 }, + static const struct intr_details mps_rx_perr_intr_details[] = { + { 0xffffffff, "MPS Rx parity error" }, { 0 } }; - static const struct intr_info mps_tx_intr_info[] = { - { V_TPFIFO(M_TPFIFO), "MPS Tx TP FIFO parity error", -1, 1 }, - { F_NCSIFIFO, "MPS Tx NC-SI FIFO parity error", -1, 1 }, - { V_TXDATAFIFO(M_TXDATAFIFO), "MPS Tx data FIFO parity error", - -1, 1 }, - { V_TXDESCFIFO(M_TXDESCFIFO), "MPS Tx desc FIFO parity error", - -1, 1 }, - { F_BUBBLE, "MPS Tx underflow", -1, 1 }, - { F_SECNTERR, "MPS Tx SOP/EOP error", -1, 1 }, - { F_FRMERR, "MPS Tx framing error", -1, 1 }, + static const struct intr_info mps_rx_perr_intr_info = { + .name = "MPS_RX_PERR_INT_CAUSE", + .cause_reg = A_MPS_RX_PERR_INT_CAUSE, + .enable_reg = A_MPS_RX_PERR_INT_ENABLE, + .fatal = 0xffffffff, + .details = mps_rx_perr_intr_details, + .actions = NULL, + }; + static const struct intr_details mps_tx_intr_details[] = { + { F_PORTERR, "MPS Tx destination port is disabled" }, + { F_FRMERR, "MPS Tx framing error" }, + { F_SECNTERR, "MPS Tx SOP/EOP error" }, + { F_BUBBLE, "MPS Tx underflow" }, + { V_TXDESCFIFO(M_TXDESCFIFO), "MPS Tx desc FIFO parity error" }, + { V_TXDATAFIFO(M_TXDATAFIFO), "MPS Tx data FIFO parity error" }, + { F_NCSIFIFO, "MPS Tx NC-SI FIFO parity error" }, + { V_TPFIFO(M_TPFIFO), "MPS Tx TP FIFO parity error" }, { 0 } }; - static const struct intr_info mps_trc_intr_info[] = { - { V_FILTMEM(M_FILTMEM), "MPS TRC filter parity error", -1, 1 }, - { V_PKTFIFO(M_PKTFIFO), "MPS TRC packet FIFO parity error", -1, - 1 }, - { F_MISCPERR, "MPS TRC misc parity error", -1, 1 }, + struct intr_info mps_tx_intr_info = { + .name = "MPS_TX_INT_CAUSE", + .cause_reg = A_MPS_TX_INT_CAUSE, + .enable_reg = A_MPS_TX_INT_ENABLE, + .fatal = 0x1ffff, + .details = mps_tx_intr_details, + .actions = NULL, + }; + static const struct intr_details mps_trc_intr_details[] = { + { F_MISCPERR, "MPS TRC misc parity error" }, + { V_PKTFIFO(M_PKTFIFO), "MPS TRC packet FIFO parity error" }, + { V_FILTMEM(M_FILTMEM), "MPS TRC filter parity error" }, { 0 } }; - static const struct intr_info mps_stat_sram_intr_info[] = { - { 0x1fffff, "MPS statistics SRAM parity error", -1, 1 }, + static const struct intr_info mps_trc_intr_info = { + .name = "MPS_TRC_INT_CAUSE", + .cause_reg = A_MPS_TRC_INT_CAUSE, + .enable_reg = A_MPS_TRC_INT_ENABLE, + .fatal = F_MISCPERR | V_PKTFIFO(M_PKTFIFO) | V_FILTMEM(M_FILTMEM), + .details = mps_trc_intr_details, + .actions = NULL, + }; + static const struct intr_details mps_stat_sram_intr_details[] = { + { 0xffffffff, "MPS statistics SRAM parity error" }, { 0 } }; - static const struct intr_info mps_stat_tx_intr_info[] = { - { 0xfffff, "MPS statistics Tx FIFO parity error", -1, 1 }, + static const struct intr_info mps_stat_sram_intr_info = { + .name = "MPS_STAT_PERR_INT_CAUSE_SRAM", + .cause_reg = A_MPS_STAT_PERR_INT_CAUSE_SRAM, + .enable_reg = A_MPS_STAT_PERR_INT_ENABLE_SRAM, + .fatal = 0x1fffffff, + .details = mps_stat_sram_intr_details, + .actions = NULL, + }; + static const struct intr_details mps_stat_tx_intr_details[] = { + { 0xffffff, "MPS statistics Tx FIFO parity error" }, { 0 } }; - static const struct intr_info mps_stat_rx_intr_info[] = { - { 0xffffff, "MPS statistics Rx FIFO parity error", -1, 1 }, + static const struct intr_info mps_stat_tx_intr_info = { + .name = "MPS_STAT_PERR_INT_CAUSE_TX_FIFO", + .cause_reg = A_MPS_STAT_PERR_INT_CAUSE_TX_FIFO, + .enable_reg = A_MPS_STAT_PERR_INT_ENABLE_TX_FIFO, + .fatal = 0xffffff, + .details = mps_stat_tx_intr_details, + .actions = NULL, + }; + static const struct intr_details mps_stat_rx_intr_details[] = { + { 0xffffff, "MPS statistics Rx FIFO parity error" }, { 0 } }; - static const struct intr_info mps_cls_intr_info[] = { - { F_MATCHSRAM, "MPS match SRAM parity error", -1, 1 }, - { F_MATCHTCAM, "MPS match TCAM parity error", -1, 1 }, - { F_HASHSRAM, "MPS hash SRAM parity error", -1, 1 }, + static const struct intr_info mps_stat_rx_intr_info = { + .name = "MPS_STAT_PERR_INT_CAUSE_RX_FIFO", + .cause_reg = A_MPS_STAT_PERR_INT_CAUSE_RX_FIFO, + .enable_reg = A_MPS_STAT_PERR_INT_ENABLE_RX_FIFO, + .fatal = 0xffffff, + .details = mps_stat_rx_intr_details, + .actions = NULL, + }; + static const struct intr_details mps_cls_intr_details[] = { + { F_HASHSRAM, "MPS hash SRAM parity error" }, + { F_MATCHTCAM, "MPS match TCAM parity error" }, + { F_MATCHSRAM, "MPS match SRAM parity error" }, { 0 } }; + static const struct intr_info mps_cls_intr_info = { + .name = "MPS_CLS_INT_CAUSE", + .cause_reg = A_MPS_CLS_INT_CAUSE, + .enable_reg = A_MPS_CLS_INT_ENABLE, + .fatal = F_MATCHSRAM | F_MATCHTCAM | F_HASHSRAM, + .details = mps_cls_intr_details, + .actions = NULL, + }; + static const struct intr_details mps_stat_sram1_intr_details[] = { + { 0xff, "MPS statistics SRAM1 parity error" }, + { 0 } + }; + static const struct intr_info mps_stat_sram1_intr_info = { + .name = "MPS_STAT_PERR_INT_CAUSE_SRAM1", + .cause_reg = A_MPS_STAT_PERR_INT_CAUSE_SRAM1, + .enable_reg = A_MPS_STAT_PERR_INT_ENABLE_SRAM1, + .fatal = 0xff, + .details = mps_stat_sram1_intr_details, + .actions = NULL, + }; - int fat; + bool fatal; - fat = t4_handle_intr_status(adapter, A_MPS_RX_PERR_INT_CAUSE, - mps_rx_intr_info) + - t4_handle_intr_status(adapter, A_MPS_TX_INT_CAUSE, - mps_tx_intr_info) + - t4_handle_intr_status(adapter, A_MPS_TRC_INT_CAUSE, - mps_trc_intr_info) + - t4_handle_intr_status(adapter, A_MPS_STAT_PERR_INT_CAUSE_SRAM, - mps_stat_sram_intr_info) + - t4_handle_intr_status(adapter, A_MPS_STAT_PERR_INT_CAUSE_TX_FIFO, - mps_stat_tx_intr_info) + - t4_handle_intr_status(adapter, A_MPS_STAT_PERR_INT_CAUSE_RX_FIFO, - mps_stat_rx_intr_info) + - t4_handle_intr_status(adapter, A_MPS_CLS_INT_CAUSE, - mps_cls_intr_info); + if (chip_id(adap) == CHELSIO_T6) + mps_tx_intr_info.fatal &= ~F_BUBBLE; + + fatal = false; + fatal |= t4_handle_intr(adap, &mps_rx_perr_intr_info, 0, verbose); + fatal |= t4_handle_intr(adap, &mps_tx_intr_info, 0, verbose); + fatal |= t4_handle_intr(adap, &mps_trc_intr_info, 0, verbose); + fatal |= t4_handle_intr(adap, &mps_stat_sram_intr_info, 0, verbose); + fatal |= t4_handle_intr(adap, &mps_stat_tx_intr_info, 0, verbose); + fatal |= t4_handle_intr(adap, &mps_stat_rx_intr_info, 0, verbose); + fatal |= t4_handle_intr(adap, &mps_cls_intr_info, 0, verbose); + if (chip_id(adap) > CHELSIO_T4) { + fatal |= t4_handle_intr(adap, &mps_stat_sram1_intr_info, 0, + verbose); + } + + t4_write_reg(adap, A_MPS_INT_CAUSE, is_t4(adap) ? 0 : 0xffffffff); + t4_read_reg(adap, A_MPS_INT_CAUSE); /* flush */ + + return (fatal); - t4_write_reg(adapter, A_MPS_INT_CAUSE, 0); - t4_read_reg(adapter, A_MPS_INT_CAUSE); /* flush */ - if (fat) - t4_fatal_err(adapter); } -#define MEM_INT_MASK (F_PERR_INT_CAUSE | F_ECC_CE_INT_CAUSE | \ - F_ECC_UE_INT_CAUSE) - /* * EDC/MC interrupt handler. */ -static void mem_intr_handler(struct adapter *adapter, int idx) +static bool mem_intr_handler(struct adapter *adap, int idx, bool verbose) { - static const char name[4][7] = { "EDC0", "EDC1", "MC/MC0", "MC1" }; + static const char name[4][5] = { "EDC0", "EDC1", "MC0", "MC1" }; + unsigned int count_reg, v; + static const struct intr_details mem_intr_details[] = { + { F_ECC_UE_INT_CAUSE, "Uncorrectable ECC data error(s)" }, + { F_ECC_CE_INT_CAUSE, "Correctable ECC data error(s)" }, + { F_PERR_INT_CAUSE, "FIFO parity error" }, + { 0 } + }; + struct intr_info ii = { + .fatal = F_PERR_INT_CAUSE | F_ECC_UE_INT_CAUSE, + .details = mem_intr_details, + .actions = NULL, + }; + bool fatal; - unsigned int addr, cnt_addr, v; - - if (idx <= MEM_EDC1) { - addr = EDC_REG(A_EDC_INT_CAUSE, idx); - cnt_addr = EDC_REG(A_EDC_ECC_STATUS, idx); - } else if (idx == MEM_MC) { - if (is_t4(adapter)) { - addr = A_MC_INT_CAUSE; - cnt_addr = A_MC_ECC_STATUS; + switch (idx) { + case MEM_EDC0: + ii.name = "EDC0_INT_CAUSE"; + ii.cause_reg = EDC_REG(A_EDC_INT_CAUSE, 0); + ii.enable_reg = EDC_REG(A_EDC_INT_ENABLE, 0); + count_reg = EDC_REG(A_EDC_ECC_STATUS, 0); + break; + case MEM_EDC1: + ii.name = "EDC1_INT_CAUSE"; + ii.cause_reg = EDC_REG(A_EDC_INT_CAUSE, 1); + ii.enable_reg = EDC_REG(A_EDC_INT_ENABLE, 1); + count_reg = EDC_REG(A_EDC_ECC_STATUS, 1); + break; + case MEM_MC0: + ii.name = "MC0_INT_CAUSE"; + if (is_t4(adap)) { + ii.cause_reg = A_MC_INT_CAUSE; + ii.enable_reg = A_MC_INT_ENABLE; + count_reg = A_MC_ECC_STATUS; } else { - addr = A_MC_P_INT_CAUSE; - cnt_addr = A_MC_P_ECC_STATUS; + ii.cause_reg = A_MC_P_INT_CAUSE; + ii.enable_reg = A_MC_P_INT_ENABLE; + count_reg = A_MC_P_ECC_STATUS; } - } else { - addr = MC_REG(A_MC_P_INT_CAUSE, 1); - cnt_addr = MC_REG(A_MC_P_ECC_STATUS, 1); + break; + case MEM_MC1: + ii.name = "MC1_INT_CAUSE"; + ii.cause_reg = MC_REG(A_MC_P_INT_CAUSE, 1); + ii.enable_reg = MC_REG(A_MC_P_INT_ENABLE, 1); + count_reg = MC_REG(A_MC_P_ECC_STATUS, 1); + break; } - v = t4_read_reg(adapter, addr) & MEM_INT_MASK; - if (v & F_PERR_INT_CAUSE) - CH_ALERT(adapter, "%s FIFO parity error\n", - name[idx]); - if (v & F_ECC_CE_INT_CAUSE) { - u32 cnt = G_ECC_CECNT(t4_read_reg(adapter, cnt_addr)); + fatal = t4_handle_intr(adap, &ii, 0, verbose); - if (idx <= MEM_EDC1) - t4_edc_err_read(adapter, idx); - - t4_write_reg(adapter, cnt_addr, V_ECC_CECNT(M_ECC_CECNT)); - CH_WARN_RATELIMIT(adapter, - "%u %s correctable ECC data error%s\n", - cnt, name[idx], cnt > 1 ? "s" : ""); + v = t4_read_reg(adap, count_reg); + if (v != 0) { + if (G_ECC_UECNT(v) != 0) { + CH_ALERT(adap, + "%s: %u uncorrectable ECC data error(s)\n", + name[idx], G_ECC_UECNT(v)); + } + if (G_ECC_CECNT(v) != 0) { + if (idx <= MEM_EDC1) + t4_edc_err_read(adap, idx); + CH_WARN_RATELIMIT(adap, + "%s: %u correctable ECC data error(s)\n", + name[idx], G_ECC_CECNT(v)); + } + t4_write_reg(adap, count_reg, 0xffffffff); } - if (v & F_ECC_UE_INT_CAUSE) - CH_ALERT(adapter, - "%s uncorrectable ECC data error\n", name[idx]); - t4_write_reg(adapter, addr, v); - if (v & (F_PERR_INT_CAUSE | F_ECC_UE_INT_CAUSE)) - t4_fatal_err(adapter); + return (fatal); } +static bool ma_wrap_status(struct adapter *adap, int arg, bool verbose) +{ + u32 v; + + v = t4_read_reg(adap, A_MA_INT_WRAP_STATUS); + CH_ALERT(adap, + "MA address wrap-around error by client %u to address %#x\n", + G_MEM_WRAP_CLIENT_NUM(v), G_MEM_WRAP_ADDRESS(v) << 4); + t4_write_reg(adap, A_MA_INT_WRAP_STATUS, v); + + return (false); +} + + /* * MA interrupt handler. */ -static void ma_intr_handler(struct adapter *adapter) +static bool ma_intr_handler(struct adapter *adap, int arg, bool verbose) { - u32 v, status = t4_read_reg(adapter, A_MA_INT_CAUSE); + static const struct intr_action ma_intr_actions[] = { + { F_MEM_WRAP_INT_CAUSE, 0, ma_wrap_status }, + { 0 }, + }; + static const struct intr_info ma_intr_info = { + .name = "MA_INT_CAUSE", + .cause_reg = A_MA_INT_CAUSE, + .enable_reg = A_MA_INT_ENABLE, + .fatal = F_MEM_WRAP_INT_CAUSE | F_MEM_PERR_INT_CAUSE | + F_MEM_TO_INT_CAUSE, + .details = NULL, + .actions = ma_intr_actions, + }; + static const struct intr_info ma_perr_status1 = { + .name = "MA_PARITY_ERROR_STATUS1", + .cause_reg = A_MA_PARITY_ERROR_STATUS1, + .enable_reg = A_MA_PARITY_ERROR_ENABLE1, + .fatal = 0xffffffff, + .details = NULL, + .actions = NULL, + }; + static const struct intr_info ma_perr_status2 = { + .name = "MA_PARITY_ERROR_STATUS2", + .cause_reg = A_MA_PARITY_ERROR_STATUS2, + .enable_reg = A_MA_PARITY_ERROR_ENABLE2, + .fatal = 0xffffffff, + .details = NULL, + .actions = NULL, + }; + bool fatal; - if (status & F_MEM_PERR_INT_CAUSE) { - CH_ALERT(adapter, - "MA parity error, parity status %#x\n", - t4_read_reg(adapter, A_MA_PARITY_ERROR_STATUS1)); - if (is_t5(adapter)) - CH_ALERT(adapter, - "MA parity error, parity status %#x\n", - t4_read_reg(adapter, - A_MA_PARITY_ERROR_STATUS2)); - } - if (status & F_MEM_WRAP_INT_CAUSE) { - v = t4_read_reg(adapter, A_MA_INT_WRAP_STATUS); - CH_ALERT(adapter, "MA address wrap-around error by " - "client %u to address %#x\n", - G_MEM_WRAP_CLIENT_NUM(v), - G_MEM_WRAP_ADDRESS(v) << 4); - } - t4_write_reg(adapter, A_MA_INT_CAUSE, status); - t4_fatal_err(adapter); + fatal = false; + fatal |= t4_handle_intr(adap, &ma_intr_info, 0, verbose); + fatal |= t4_handle_intr(adap, &ma_perr_status1, 0, verbose); + if (chip_id(adap) > CHELSIO_T4) + fatal |= t4_handle_intr(adap, &ma_perr_status2, 0, verbose); + + return (fatal); } /* * SMB interrupt handler. */ -static void smb_intr_handler(struct adapter *adap) +static bool smb_intr_handler(struct adapter *adap, int arg, bool verbose) { - static const struct intr_info smb_intr_info[] = { - { F_MSTTXFIFOPARINT, "SMB master Tx FIFO parity error", -1, 1 }, - { F_MSTRXFIFOPARINT, "SMB master Rx FIFO parity error", -1, 1 }, - { F_SLVFIFOPARINT, "SMB slave FIFO parity error", -1, 1 }, + static const struct intr_details smb_intr_details[] = { + { F_MSTTXFIFOPARINT, "SMB master Tx FIFO parity error" }, + { F_MSTRXFIFOPARINT, "SMB master Rx FIFO parity error" }, + { F_SLVFIFOPARINT, "SMB slave FIFO parity error" }, { 0 } }; + static const struct intr_info smb_intr_info = { + .name = "SMB_INT_CAUSE", + .cause_reg = A_SMB_INT_CAUSE, + .enable_reg = A_SMB_INT_ENABLE, + .fatal = F_SLVFIFOPARINT | F_MSTRXFIFOPARINT | F_MSTTXFIFOPARINT, + .details = smb_intr_details, + .actions = NULL, + }; - if (t4_handle_intr_status(adap, A_SMB_INT_CAUSE, smb_intr_info)) - t4_fatal_err(adap); + return (t4_handle_intr(adap, &smb_intr_info, 0, verbose)); } /* * NC-SI interrupt handler. */ -static void ncsi_intr_handler(struct adapter *adap) +static bool ncsi_intr_handler(struct adapter *adap, int arg, bool verbose) { - static const struct intr_info ncsi_intr_info[] = { - { F_CIM_DM_PRTY_ERR, "NC-SI CIM parity error", -1, 1 }, - { F_MPS_DM_PRTY_ERR, "NC-SI MPS parity error", -1, 1 }, - { F_TXFIFO_PRTY_ERR, "NC-SI Tx FIFO parity error", -1, 1 }, - { F_RXFIFO_PRTY_ERR, "NC-SI Rx FIFO parity error", -1, 1 }, + static const struct intr_details ncsi_intr_details[] = { + { F_CIM_DM_PRTY_ERR, "NC-SI CIM parity error" }, + { F_MPS_DM_PRTY_ERR, "NC-SI MPS parity error" }, + { F_TXFIFO_PRTY_ERR, "NC-SI Tx FIFO parity error" }, + { F_RXFIFO_PRTY_ERR, "NC-SI Rx FIFO parity error" }, { 0 } }; + static const struct intr_info ncsi_intr_info = { + .name = "NCSI_INT_CAUSE", + .cause_reg = A_NCSI_INT_CAUSE, + .enable_reg = A_NCSI_INT_ENABLE, + .fatal = F_RXFIFO_PRTY_ERR | F_TXFIFO_PRTY_ERR | + F_MPS_DM_PRTY_ERR | F_CIM_DM_PRTY_ERR, + .details = ncsi_intr_details, + .actions = NULL, + }; - if (t4_handle_intr_status(adap, A_NCSI_INT_CAUSE, ncsi_intr_info)) - t4_fatal_err(adap); + return (t4_handle_intr(adap, &ncsi_intr_info, 0, verbose)); } /* - * XGMAC interrupt handler. + * MAC interrupt handler. */ -static void xgmac_intr_handler(struct adapter *adap, int port) +static bool mac_intr_handler(struct adapter *adap, int port, bool verbose) { - u32 v, int_cause_reg; + static const struct intr_details mac_intr_details[] = { + { F_TXFIFO_PRTY_ERR, "MAC Tx FIFO parity error" }, + { F_RXFIFO_PRTY_ERR, "MAC Rx FIFO parity error" }, + { 0 } + }; + char name[32]; + struct intr_info ii; + bool fatal = false; + + if (is_t4(adap)) { + snprintf(name, sizeof(name), "XGMAC_PORT%u_INT_CAUSE", port); + ii.name = &name[0]; + ii.cause_reg = PORT_REG(port, A_XGMAC_PORT_INT_CAUSE); + ii.enable_reg = PORT_REG(port, A_XGMAC_PORT_INT_EN); + ii.fatal = F_TXFIFO_PRTY_ERR | F_RXFIFO_PRTY_ERR, + ii.details = mac_intr_details, + ii.actions = NULL; + } else { + snprintf(name, sizeof(name), "MAC_PORT%u_INT_CAUSE", port); + ii.name = &name[0]; + ii.cause_reg = T5_PORT_REG(port, A_MAC_PORT_INT_CAUSE); + ii.enable_reg = T5_PORT_REG(port, A_MAC_PORT_INT_EN); + ii.fatal = F_TXFIFO_PRTY_ERR | F_RXFIFO_PRTY_ERR, + ii.details = mac_intr_details, + ii.actions = NULL; + } + fatal |= t4_handle_intr(adap, &ii, 0, verbose); + + if (chip_id(adap) >= CHELSIO_T5) { + snprintf(name, sizeof(name), "MAC_PORT%u_PERR_INT_CAUSE", port); + ii.name = &name[0]; + ii.cause_reg = T5_PORT_REG(port, A_MAC_PORT_PERR_INT_CAUSE); + ii.enable_reg = T5_PORT_REG(port, A_MAC_PORT_PERR_INT_EN); + ii.fatal = 0; + ii.details = NULL; + ii.actions = NULL; + fatal |= t4_handle_intr(adap, &ii, 0, verbose); + } + + if (chip_id(adap) >= CHELSIO_T6) { + snprintf(name, sizeof(name), "MAC_PORT%u_PERR_INT_CAUSE_100G", port); + ii.name = &name[0]; + ii.cause_reg = T5_PORT_REG(port, A_MAC_PORT_PERR_INT_CAUSE_100G); + ii.enable_reg = T5_PORT_REG(port, A_MAC_PORT_PERR_INT_EN_100G); + ii.fatal = 0; + ii.details = NULL; + ii.actions = NULL; + fatal |= t4_handle_intr(adap, &ii, 0, verbose); + } + + return (fatal); +} + +static bool plpl_intr_handler(struct adapter *adap, int arg, bool verbose) +{ + static const struct intr_details plpl_intr_details[] = { + { F_FATALPERR, "Fatal parity error" }, + { F_PERRVFID, "VFID_MAP parity error" }, + { 0 } + }; + struct intr_info plpl_intr_info = { + .name = "PL_PL_INT_CAUSE", + .cause_reg = A_PL_PL_INT_CAUSE, + .enable_reg = A_PL_PL_INT_ENABLE, + .fatal = F_FATALPERR, + .details = plpl_intr_details, + .actions = NULL, + }; if (is_t4(adap)) - int_cause_reg = PORT_REG(port, A_XGMAC_PORT_INT_CAUSE); - else - int_cause_reg = T5_PORT_REG(port, A_MAC_PORT_INT_CAUSE); + plpl_intr_info.fatal |= F_PERRVFID; - v = t4_read_reg(adap, int_cause_reg); - - v &= (F_TXFIFO_PRTY_ERR | F_RXFIFO_PRTY_ERR); - if (!v) - return; - - if (v & F_TXFIFO_PRTY_ERR) - CH_ALERT(adap, "XGMAC %d Tx FIFO parity error\n", - port); - if (v & F_RXFIFO_PRTY_ERR) - CH_ALERT(adap, "XGMAC %d Rx FIFO parity error\n", - port); - t4_write_reg(adap, int_cause_reg, v); - t4_fatal_err(adap); + return (t4_handle_intr(adap, &plpl_intr_info, 0, verbose)); } -/* - * PL interrupt handler. - */ -static void pl_intr_handler(struct adapter *adap) -{ - static const struct intr_info pl_intr_info[] = { - { F_FATALPERR, "Fatal parity error", -1, 1 }, - { F_PERRVFID, "PL VFID_MAP parity error", -1, 1 }, - { 0 } - }; - - static const struct intr_info t5_pl_intr_info[] = { - { F_FATALPERR, "Fatal parity error", -1, 1 }, - { 0 } - }; - - if (t4_handle_intr_status(adap, A_PL_PL_INT_CAUSE, - is_t4(adap) ? - pl_intr_info : t5_pl_intr_info)) - t4_fatal_err(adap); -} - -#define PF_INTR_MASK (F_PFSW | F_PFCIM) - /** * t4_slow_intr_handler - control path interrupt handler - * @adapter: the adapter + * @adap: the adapter + * @verbose: increased verbosity, for debug * * T4 interrupt handler for non-data global interrupt events, e.g., errors. * The designation 'slow' is because it involves register reads, while * data interrupts typically don't involve any MMIOs. */ -int t4_slow_intr_handler(struct adapter *adapter) +int t4_slow_intr_handler(struct adapter *adap, bool verbose) { - u32 cause = t4_read_reg(adapter, A_PL_INT_CAUSE); + static const struct intr_details pl_intr_details[] = { + { F_MC1, "MC1" }, + { F_UART, "UART" }, + { F_ULP_TX, "ULP TX" }, + { F_SGE, "SGE" }, + { F_HMA, "HMA" }, + { F_CPL_SWITCH, "CPL Switch" }, + { F_ULP_RX, "ULP RX" }, + { F_PM_RX, "PM RX" }, + { F_PM_TX, "PM TX" }, + { F_MA, "MA" }, + { F_TP, "TP" }, + { F_LE, "LE" }, + { F_EDC1, "EDC1" }, + { F_EDC0, "EDC0" }, + { F_MC, "MC0" }, + { F_PCIE, "PCIE" }, + { F_PMU, "PMU" }, + { F_MAC3, "MAC3" }, + { F_MAC2, "MAC2" }, + { F_MAC1, "MAC1" }, + { F_MAC0, "MAC0" }, + { F_SMB, "SMB" }, + { F_SF, "SF" }, + { F_PL, "PL" }, + { F_NCSI, "NC-SI" }, + { F_MPS, "MPS" }, + { F_MI, "MI" }, + { F_DBG, "DBG" }, + { F_I2CM, "I2CM" }, + { F_CIM, "CIM" }, + { 0 } + }; + static const struct intr_info pl_perr_cause = { + .name = "PL_PERR_CAUSE", + .cause_reg = A_PL_PERR_CAUSE, + .enable_reg = A_PL_PERR_ENABLE, + .fatal = 0xffffffff, + .details = pl_intr_details, + .actions = NULL, + }; + static const struct intr_action pl_intr_action[] = { + { F_MC1, MEM_MC1, mem_intr_handler }, + { F_ULP_TX, -1, ulptx_intr_handler }, + { F_SGE, -1, sge_intr_handler }, + { F_CPL_SWITCH, -1, cplsw_intr_handler }, + { F_ULP_RX, -1, ulprx_intr_handler }, + { F_PM_RX, -1, pmrx_intr_handler}, + { F_PM_TX, -1, pmtx_intr_handler}, + { F_MA, -1, ma_intr_handler }, + { F_TP, -1, tp_intr_handler }, + { F_LE, -1, le_intr_handler }, + { F_EDC1, MEM_EDC1, mem_intr_handler }, + { F_EDC0, MEM_EDC0, mem_intr_handler }, + { F_MC0, MEM_MC0, mem_intr_handler }, + { F_PCIE, -1, pcie_intr_handler }, + { F_MAC3, 3, mac_intr_handler}, + { F_MAC2, 2, mac_intr_handler}, + { F_MAC1, 1, mac_intr_handler}, + { F_MAC0, 0, mac_intr_handler}, + { F_SMB, -1, smb_intr_handler}, + { F_PL, -1, plpl_intr_handler }, + { F_NCSI, -1, ncsi_intr_handler}, + { F_MPS, -1, mps_intr_handler }, + { F_CIM, -1, cim_intr_handler }, + { 0 } + }; + static const struct intr_info pl_intr_info = { + .name = "PL_INT_CAUSE", + .cause_reg = A_PL_INT_CAUSE, + .enable_reg = A_PL_INT_ENABLE, + .fatal = 0, + .details = pl_intr_details, + .actions = pl_intr_action, + }; + bool fatal; + u32 perr; - if (!(cause & GLBL_INTR_MASK)) - return 0; - if (cause & F_CIM) - cim_intr_handler(adapter); - if (cause & F_MPS) - mps_intr_handler(adapter); - if (cause & F_NCSI) - ncsi_intr_handler(adapter); - if (cause & F_PL) - pl_intr_handler(adapter); - if (cause & F_SMB) - smb_intr_handler(adapter); - if (cause & F_MAC0) - xgmac_intr_handler(adapter, 0); - if (cause & F_MAC1) - xgmac_intr_handler(adapter, 1); - if (cause & F_MAC2) - xgmac_intr_handler(adapter, 2); - if (cause & F_MAC3) - xgmac_intr_handler(adapter, 3); - if (cause & F_PCIE) - pcie_intr_handler(adapter); - if (cause & F_MC0) - mem_intr_handler(adapter, MEM_MC); - if (is_t5(adapter) && (cause & F_MC1)) - mem_intr_handler(adapter, MEM_MC1); - if (cause & F_EDC0) - mem_intr_handler(adapter, MEM_EDC0); - if (cause & F_EDC1) - mem_intr_handler(adapter, MEM_EDC1); - if (cause & F_LE) - le_intr_handler(adapter); - if (cause & F_TP) - tp_intr_handler(adapter); - if (cause & F_MA) - ma_intr_handler(adapter); - if (cause & F_PM_TX) - pmtx_intr_handler(adapter); - if (cause & F_PM_RX) - pmrx_intr_handler(adapter); - if (cause & F_ULP_RX) - ulprx_intr_handler(adapter); - if (cause & F_CPL_SWITCH) - cplsw_intr_handler(adapter); - if (cause & F_SGE) - sge_intr_handler(adapter); - if (cause & F_ULP_TX) - ulptx_intr_handler(adapter); + perr = t4_read_reg(adap, pl_perr_cause.cause_reg); + if (verbose || perr != 0) { + t4_show_intr_info(adap, &pl_perr_cause, perr); + if (perr != 0) + t4_write_reg(adap, pl_perr_cause.cause_reg, perr); + if (verbose) + perr |= t4_read_reg(adap, pl_intr_info.enable_reg); + } + fatal = t4_handle_intr(adap, &pl_intr_info, perr, verbose); + if (fatal) + t4_fatal_err(adap, false); - /* Clear the interrupts just processed for which we are the master. */ - t4_write_reg(adapter, A_PL_INT_CAUSE, cause & GLBL_INTR_MASK); - (void)t4_read_reg(adapter, A_PL_INT_CAUSE); /* flush */ - return 1; + return (0); } +#define PF_INTR_MASK (F_PFSW | F_PFCIM) + /** * t4_intr_enable - enable interrupts * @adapter: the adapter whose interrupts should be enabled @@ -4757,92 +5278,131 @@ int t4_slow_intr_handler(struct adapter *adapter) * non PF-specific interrupts from the various HW modules. Only one PCI * function at a time should be doing this. */ -void t4_intr_enable(struct adapter *adapter) +void t4_intr_enable(struct adapter *adap) { u32 val = 0; - u32 whoami = t4_read_reg(adapter, A_PL_WHOAMI); - u32 pf = (chip_id(adapter) <= CHELSIO_T5 - ? G_SOURCEPF(whoami) - : G_T6_SOURCEPF(whoami)); - if (chip_id(adapter) <= CHELSIO_T5) + if (chip_id(adap) <= CHELSIO_T5) val = F_ERR_DROPPED_DB | F_ERR_EGR_CTXT_PRIO | F_DBFIFO_HP_INT; else val = F_ERR_PCIE_ERROR0 | F_ERR_PCIE_ERROR1 | F_FATAL_WRE_LEN; - t4_write_reg(adapter, A_SGE_INT_ENABLE3, F_ERR_CPL_EXCEED_IQE_SIZE | - F_ERR_INVALID_CIDX_INC | F_ERR_CPL_OPCODE_0 | - F_ERR_DATA_CPL_ON_HIGH_QID1 | F_INGRESS_SIZE_ERR | - F_ERR_DATA_CPL_ON_HIGH_QID0 | F_ERR_BAD_DB_PIDX3 | - F_ERR_BAD_DB_PIDX2 | F_ERR_BAD_DB_PIDX1 | - F_ERR_BAD_DB_PIDX0 | F_ERR_ING_CTXT_PRIO | - F_DBFIFO_LP_INT | F_EGRESS_SIZE_ERR | val); - t4_write_reg(adapter, MYPF_REG(A_PL_PF_INT_ENABLE), PF_INTR_MASK); - t4_set_reg_field(adapter, A_PL_INT_MAP0, 0, 1 << pf); + val |= F_ERR_CPL_EXCEED_IQE_SIZE | F_ERR_INVALID_CIDX_INC | + F_ERR_CPL_OPCODE_0 | F_ERR_DATA_CPL_ON_HIGH_QID1 | + F_INGRESS_SIZE_ERR | F_ERR_DATA_CPL_ON_HIGH_QID0 | + F_ERR_BAD_DB_PIDX3 | F_ERR_BAD_DB_PIDX2 | F_ERR_BAD_DB_PIDX1 | + F_ERR_BAD_DB_PIDX0 | F_ERR_ING_CTXT_PRIO | F_DBFIFO_LP_INT | + F_EGRESS_SIZE_ERR; + t4_set_reg_field(adap, A_SGE_INT_ENABLE3, val, val); + t4_write_reg(adap, MYPF_REG(A_PL_PF_INT_ENABLE), PF_INTR_MASK); + t4_set_reg_field(adap, A_PL_INT_MAP0, 0, 1 << adap->pf); } /** * t4_intr_disable - disable interrupts - * @adapter: the adapter whose interrupts should be disabled + * @adap: the adapter whose interrupts should be disabled * * Disable interrupts. We only disable the top-level interrupt * concentrators. The caller must be a PCI function managing global * interrupts. */ -void t4_intr_disable(struct adapter *adapter) +void t4_intr_disable(struct adapter *adap) { - u32 whoami = t4_read_reg(adapter, A_PL_WHOAMI); - u32 pf = (chip_id(adapter) <= CHELSIO_T5 - ? G_SOURCEPF(whoami) - : G_T6_SOURCEPF(whoami)); - t4_write_reg(adapter, MYPF_REG(A_PL_PF_INT_ENABLE), 0); - t4_set_reg_field(adapter, A_PL_INT_MAP0, 1 << pf, 0); + t4_write_reg(adap, MYPF_REG(A_PL_PF_INT_ENABLE), 0); + t4_set_reg_field(adap, A_PL_INT_MAP0, 1 << adap->pf, 0); } /** * t4_intr_clear - clear all interrupts - * @adapter: the adapter whose interrupts should be cleared + * @adap: the adapter whose interrupts should be cleared * * Clears all interrupts. The caller must be a PCI function managing * global interrupts. */ -void t4_intr_clear(struct adapter *adapter) +void t4_intr_clear(struct adapter *adap) { - static const unsigned int cause_reg[] = { - A_SGE_INT_CAUSE1, A_SGE_INT_CAUSE2, A_SGE_INT_CAUSE3, - A_PCIE_NONFAT_ERR, A_PCIE_INT_CAUSE, - A_MA_INT_WRAP_STATUS, A_MA_PARITY_ERROR_STATUS1, A_MA_INT_CAUSE, - A_EDC_INT_CAUSE, EDC_REG(A_EDC_INT_CAUSE, 1), - A_CIM_HOST_INT_CAUSE, A_CIM_HOST_UPACC_INT_CAUSE, + static const u32 cause_reg[] = { + A_CIM_HOST_INT_CAUSE, + A_CIM_HOST_UPACC_INT_CAUSE, MYPF_REG(A_CIM_PF_HOST_INT_CAUSE), - A_TP_INT_CAUSE, - A_ULP_RX_INT_CAUSE, A_ULP_TX_INT_CAUSE, - A_PM_RX_INT_CAUSE, A_PM_TX_INT_CAUSE, - A_MPS_RX_PERR_INT_CAUSE, A_CPL_INTR_CAUSE, - MYPF_REG(A_PL_PF_INT_CAUSE), - A_PL_PL_INT_CAUSE, + EDC_REG(A_EDC_INT_CAUSE, 0), EDC_REG(A_EDC_INT_CAUSE, 1), A_LE_DB_INT_CAUSE, + A_MA_INT_WRAP_STATUS, + A_MA_PARITY_ERROR_STATUS1, + A_MA_INT_CAUSE, + A_MPS_CLS_INT_CAUSE, + A_MPS_RX_PERR_INT_CAUSE, + A_MPS_STAT_PERR_INT_CAUSE_RX_FIFO, + A_MPS_STAT_PERR_INT_CAUSE_SRAM, + A_MPS_TRC_INT_CAUSE, + A_MPS_TX_INT_CAUSE, + A_MPS_STAT_PERR_INT_CAUSE_TX_FIFO, + A_NCSI_INT_CAUSE, + A_PCIE_INT_CAUSE, + A_PCIE_NONFAT_ERR, + A_PL_PL_INT_CAUSE, + A_PM_RX_INT_CAUSE, + A_PM_TX_INT_CAUSE, + A_SGE_INT_CAUSE1, + A_SGE_INT_CAUSE2, + A_SGE_INT_CAUSE3, + A_SGE_INT_CAUSE4, + A_SMB_INT_CAUSE, + A_TP_INT_CAUSE, + A_ULP_RX_INT_CAUSE, + A_ULP_RX_INT_CAUSE_2, + A_ULP_TX_INT_CAUSE, + A_ULP_TX_INT_CAUSE_2, + + MYPF_REG(A_PL_PF_INT_CAUSE), }; + int i; + const int nchan = adap->chip_params->nchan; - unsigned int i; + for (i = 0; i < ARRAY_SIZE(cause_reg); i++) + t4_write_reg(adap, cause_reg[i], 0xffffffff); - for (i = 0; i < ARRAY_SIZE(cause_reg); ++i) - t4_write_reg(adapter, cause_reg[i], 0xffffffff); + if (is_t4(adap)) { + t4_write_reg(adap, A_PCIE_CORE_UTL_SYSTEM_BUS_AGENT_STATUS, + 0xffffffff); + t4_write_reg(adap, A_PCIE_CORE_UTL_PCI_EXPRESS_PORT_STATUS, + 0xffffffff); + t4_write_reg(adap, A_MC_INT_CAUSE, 0xffffffff); + for (i = 0; i < nchan; i++) { + t4_write_reg(adap, PORT_REG(i, A_XGMAC_PORT_INT_CAUSE), + 0xffffffff); + } + } + if (chip_id(adap) >= CHELSIO_T5) { + t4_write_reg(adap, A_MA_PARITY_ERROR_STATUS2, 0xffffffff); + t4_write_reg(adap, A_MPS_STAT_PERR_INT_CAUSE_SRAM1, 0xffffffff); + t4_write_reg(adap, A_SGE_INT_CAUSE5, 0xffffffff); + t4_write_reg(adap, A_MC_P_INT_CAUSE, 0xffffffff); + if (is_t5(adap)) { + t4_write_reg(adap, MC_REG(A_MC_P_INT_CAUSE, 1), + 0xffffffff); + } + for (i = 0; i < nchan; i++) { + t4_write_reg(adap, T5_PORT_REG(i, + A_MAC_PORT_PERR_INT_CAUSE), 0xffffffff); + if (chip_id(adap) > CHELSIO_T5) { + t4_write_reg(adap, T5_PORT_REG(i, + A_MAC_PORT_PERR_INT_CAUSE_100G), + 0xffffffff); + } + t4_write_reg(adap, T5_PORT_REG(i, A_MAC_PORT_INT_CAUSE), + 0xffffffff); + } + } + if (chip_id(adap) >= CHELSIO_T6) { + t4_write_reg(adap, A_SGE_INT_CAUSE6, 0xffffffff); + } - t4_write_reg(adapter, is_t4(adapter) ? A_MC_INT_CAUSE : - A_MC_P_INT_CAUSE, 0xffffffff); - - if (is_t4(adapter)) { - t4_write_reg(adapter, A_PCIE_CORE_UTL_SYSTEM_BUS_AGENT_STATUS, - 0xffffffff); - t4_write_reg(adapter, A_PCIE_CORE_UTL_PCI_EXPRESS_PORT_STATUS, - 0xffffffff); - } else - t4_write_reg(adapter, A_MA_PARITY_ERROR_STATUS2, 0xffffffff); - - t4_write_reg(adapter, A_PL_INT_CAUSE, GLBL_INTR_MASK); - (void) t4_read_reg(adapter, A_PL_INT_CAUSE); /* flush */ + t4_write_reg(adap, A_MPS_INT_CAUSE, is_t4(adap) ? 0 : 0xffffffff); + t4_write_reg(adap, A_PL_PERR_CAUSE, 0xffffffff); + t4_write_reg(adap, A_PL_INT_CAUSE, 0xffffffff); + (void) t4_read_reg(adap, A_PL_INT_CAUSE); /* flush */ } /** diff --git a/sys/dev/cxgbe/t4_main.c b/sys/dev/cxgbe/t4_main.c index fe2fb1951282..64f995bd842f 100644 --- a/sys/dev/cxgbe/t4_main.c +++ b/sys/dev/cxgbe/t4_main.c @@ -1077,6 +1077,7 @@ t4_attach(device_t dev) rc = partition_resources(sc); if (rc != 0) goto done; /* error message displayed already */ + t4_intr_clear(sc); } rc = get_params__post_init(sc); @@ -2563,14 +2564,23 @@ vcxgbe_detach(device_t dev) } void -t4_fatal_err(struct adapter *sc) +t4_fatal_err(struct adapter *sc, bool fw_error) { - t4_set_reg_field(sc, A_SGE_CONTROL, F_GLOBALENABLE, 0); - t4_intr_disable(sc); - log(LOG_EMERG, "%s: encountered fatal error, adapter stopped.\n", + + t4_shutdown_adapter(sc); + log(LOG_ALERT, "%s: encountered fatal error, adapter stopped.\n", device_get_nameunit(sc->dev)); if (t4_panic_on_fatal_err) panic("panic requested on fatal error"); + + if (fw_error) { + ASSERT_SYNCHRONIZED_OP(sc); + sc->flags |= ADAP_ERR; + } else { + ADAPTER_LOCK(sc); + sc->flags |= ADAP_ERR; + ADAPTER_UNLOCK(sc); + } } void @@ -10069,20 +10079,6 @@ t4_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data, int fflag, return (rc); } -void -t4_db_full(struct adapter *sc) -{ - - CXGBE_UNIMPLEMENTED(__func__); -} - -void -t4_db_dropped(struct adapter *sc) -{ - - CXGBE_UNIMPLEMENTED(__func__); -} - #ifdef TCP_OFFLOAD static int toe_capability(struct vi_info *vi, int enable) diff --git a/sys/dev/cxgbe/t4_sge.c b/sys/dev/cxgbe/t4_sge.c index 7950d8cc89ab..b24e4d125ee0 100644 --- a/sys/dev/cxgbe/t4_sge.c +++ b/sys/dev/cxgbe/t4_sge.c @@ -1394,8 +1394,12 @@ void t4_intr_err(void *arg) { struct adapter *sc = arg; + const bool verbose = (sc->debug_flags & DF_VERBOSE_SLOWINTR) != 0; - t4_slow_intr_handler(sc); + if (sc->flags & ADAP_ERR) + return; + + t4_slow_intr_handler(sc, verbose); } /* From c3f5a3665163f217202167ea521faac19a8c657f Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Fri, 1 Feb 2019 20:46:47 +0000 Subject: [PATCH 61/90] x86: correctly limit max memory resource address.. CPU and buses can manage up to the limit reported by cpu_maxphyaddr, so set mem_rman to the value returned by cpu_getmaxphyaddr(). For the PAE mode, it was missed both when rman_res_t was increased to uintmax_t, and from the PAE merge commit. When importing smaps or dump_avail chunks into memory rman, do not blindly ignore resources which ends above the limit, chomp them instead if start is below the limit. The same change was already done to i386 add_physmap_entry(). Based on the submission by: bde MFC after: 2 months --- sys/x86/x86/nexus.c | 32 +++++++++++--------------------- 1 file changed, 11 insertions(+), 21 deletions(-) diff --git a/sys/x86/x86/nexus.c b/sys/x86/x86/nexus.c index 40f8b01b1213..91877499ca6c 100644 --- a/sys/x86/x86/nexus.c +++ b/sys/x86/x86/nexus.c @@ -61,6 +61,7 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include #include #include @@ -269,11 +270,7 @@ nexus_init_resources(void) panic("nexus_init_resources port_rman"); mem_rman.rm_start = 0; -#ifndef PAE - mem_rman.rm_end = BUS_SPACE_MAXADDR; -#else - mem_rman.rm_end = ((1ULL << cpu_maxphyaddr) - 1); -#endif + mem_rman.rm_end = cpu_getmaxphyaddr(); mem_rman.rm_type = RMAN_ARRAY; mem_rman.rm_descr = "I/O memory addresses"; if (rman_init(&mem_rman) @@ -787,6 +784,7 @@ ram_attach(device_t dev) { struct bios_smap *smapbase, *smap, *smapend; struct resource *res; + rman_res_t length; vm_paddr_t *p; caddr_t kmdp; uint32_t smapsize; @@ -807,16 +805,12 @@ ram_attach(device_t dev) if (smap->type != SMAP_TYPE_MEMORY || smap->length == 0) continue; -#ifdef __i386__ - /* - * Resources use long's to track resources, so - * we can't include memory regions above 4GB. - */ - if (smap->base > ~0ul) + if (smap->base > mem_rman.rm_end) continue; -#endif + length = smap->base + smap->length > mem_rman.rm_end ? + mem_rman.rm_end - smap->base : smap->length; error = bus_set_resource(dev, SYS_RES_MEMORY, rid, - smap->base, smap->length); + smap->base, length); if (error) panic( "ram_attach: resource %d failed set with %d", @@ -841,16 +835,12 @@ ram_attach(device_t dev) * segment is 0. */ for (rid = 0, p = dump_avail; p[1] != 0; rid++, p += 2) { -#ifdef PAE - /* - * Resources use long's to track resources, so we can't - * include memory regions above 4GB. - */ - if (p[0] > ~0ul) + if (p[0] > mem_rman.rm_end) break; -#endif + length = (p[1] > mem_rman.rm_end ? mem_rman.rm_end : p[1]) - + p[0]; error = bus_set_resource(dev, SYS_RES_MEMORY, rid, p[0], - p[1] - p[0]); + length); if (error) panic("ram_attach: resource %d failed set with %d", rid, error); From a6786c179904499c0ea0acb025bcab98e5f5d2e5 Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Fri, 1 Feb 2019 21:09:36 +0000 Subject: [PATCH 62/90] Disable boot-time memory test on i386 be default. With the current 24G memory limit for GENERIC, the boot time test causes quite visible delay, amplified by the default debug.late_console = 0. The comment text is copied from the same setting explanation for amd64. Suggested by: bde Discussed with: emaste Sponsored by: The FreeBSD Foundation MFC after: 2 months --- sys/i386/i386/machdep.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/sys/i386/i386/machdep.c b/sys/i386/i386/machdep.c index d7324f977e4b..c46dc81ea6a5 100644 --- a/sys/i386/i386/machdep.c +++ b/sys/i386/i386/machdep.c @@ -2005,13 +2005,15 @@ getmemsize(int first) Maxmem = atop(physmap[physmap_idx + 1]); /* - * By default enable the memory test on real hardware, and disable - * it if we appear to be running in a VM. This avoids touching all - * pages unnecessarily, which doesn't matter on real hardware but is - * bad for shared VM hosts. Use a general name so that - * one could eventually do more with the code than just disable it. + * The boot memory test is disabled by default, as it takes a + * significant amount of time on large-memory systems, and is + * unfriendly to virtual machines as it unnecessarily touches all + * pages. + * + * A general name is used as the code may be extended to support + * additional tests beyond the current "page present" test. */ - memtest = (vm_guest > VM_GUEST_NO) ? 0 : 1; + memtest = 0; TUNABLE_ULONG_FETCH("hw.memtest.tests", &memtest); if (atop(physmap[physmap_idx + 1]) != Maxmem && From 3f0e38d70cd2f01a69d870dac89ece4950e16f36 Mon Sep 17 00:00:00 2001 From: Ed Maste Date: Fri, 1 Feb 2019 22:24:14 +0000 Subject: [PATCH 63/90] readelf: decode FreeBSD note types Decode NT_FREEBSD_ABI_TAG, NT_FREEBSD_ARCH_TAG, and NT_FREEBSD_FEATURE_CTL. Reviewed by: brooks, kib (earlier) MFC after: 2 weeks Relnotes: Yes Sponsored by: The FreeBSD Foundation Differential Revision: https://reviews.freebsd.org/D19054 --- contrib/elftoolchain/readelf/readelf.c | 55 +++++++++++++++++++++++--- 1 file changed, 50 insertions(+), 5 deletions(-) diff --git a/contrib/elftoolchain/readelf/readelf.c b/contrib/elftoolchain/readelf/readelf.c index 8507847c6548..a53f65f6af77 100644 --- a/contrib/elftoolchain/readelf/readelf.c +++ b/contrib/elftoolchain/readelf/readelf.c @@ -313,6 +313,8 @@ static void dump_mips_specific_info(struct readelf *re); static void dump_notes(struct readelf *re); static void dump_notes_content(struct readelf *re, const char *buf, size_t sz, off_t off); +static void dump_notes_data(const char *name, uint32_t type, const char *buf, + size_t sz); static void dump_svr4_hash(struct section *s); static void dump_svr4_hash64(struct readelf *re, struct section *s); static void dump_gnu_hash(struct readelf *re, struct section *s); @@ -3486,12 +3488,58 @@ dump_notes(struct readelf *re) } } +static struct flag_desc note_feature_ctl_flags[] = { + { NT_FREEBSD_FCTL_ASLR_DISABLE, "ASLR_DISABLE" }, + { 0, NULL } +}; + +static void +dump_notes_data(const char *name, uint32_t type, const char *buf, size_t sz) +{ + size_t i; + const uint32_t *ubuf; + + /* Note data is at least 4-byte aligned. */ + if (((uintptr_t)buf & 3) != 0) { + warnx("bad note data alignment"); + goto unknown; + } + ubuf = (const uint32_t *)(const void *)buf; + + if (strcmp(name, "FreeBSD") == 0) { + switch (type) { + case NT_FREEBSD_ABI_TAG: + if (sz != 4) + goto unknown; + printf(" ABI tag: %u\n", ubuf[0]); + return; + /* NT_FREEBSD_NOINIT_TAG carries no data, treat as unknown. */ + case NT_FREEBSD_ARCH_TAG: + if (sz != 4) + goto unknown; + printf(" Arch tag: %x\n", ubuf[0]); + return; + case NT_FREEBSD_FEATURE_CTL: + if (sz != 4) + goto unknown; + printf(" Features:"); + dump_flags(note_feature_ctl_flags, ubuf[0]); + printf("\n"); + return; + } + } +unknown: + printf(" description data:"); + for (i = 0; i < sz; i++) + printf(" %02x", (unsigned char)buf[i]); + printf("\n"); +} + static void dump_notes_content(struct readelf *re, const char *buf, size_t sz, off_t off) { Elf_Note *note; const char *end, *name; - uint32_t i; printf("\nNotes at offset %#010jx with length %#010jx:\n", (uintmax_t) off, (uintmax_t) sz); @@ -3523,10 +3571,7 @@ dump_notes_content(struct readelf *re, const char *buf, size_t sz, off_t off) printf(" %-13s %#010jx", name, (uintmax_t) note->n_descsz); printf(" %s\n", note_type(name, re->ehdr.e_type, note->n_type)); - printf(" description data:"); - for (i = 0; i < note->n_descsz; i++) - printf(" %02x", (unsigned char)buf[i]); - printf("\n"); + dump_notes_data(name, note->n_type, buf, note->n_descsz); buf += roundup2(note->n_descsz, 4); } } From f4d8b4f81c238a815dd56d277886cb45eacbe3a4 Mon Sep 17 00:00:00 2001 From: Conrad Meyer Date: Fri, 1 Feb 2019 23:04:45 +0000 Subject: [PATCH 64/90] qlnxr(4), qlnxe(4): Unbreak gcc build Remove redundant definitions and conditionalize Clang-specific CFLAGS. Sponsored by: Dell EMC Isilon --- sys/dev/qlnx/qlnxe/qlnx_rdma.h | 2 -- sys/dev/qlnx/qlnxr/qlnxr_verbs.c | 23 ----------------------- sys/modules/qlnx/qlnxr/Makefile | 6 ++++-- 3 files changed, 4 insertions(+), 27 deletions(-) diff --git a/sys/dev/qlnx/qlnxe/qlnx_rdma.h b/sys/dev/qlnx/qlnxe/qlnx_rdma.h index 9b3526a9e8d7..03c43c8b8201 100644 --- a/sys/dev/qlnx/qlnxe/qlnx_rdma.h +++ b/sys/dev/qlnx/qlnxe/qlnx_rdma.h @@ -51,8 +51,6 @@ typedef struct qlnx_rdma_if qlnx_rdma_if_t; extern int qlnx_rdma_register_if(qlnx_rdma_if_t *rdma_if); extern int qlnx_rdma_deregister_if(qlnx_rdma_if_t *rdma_if); -extern int qlnx_rdma_ll2_set_mac_filter(void *rdma_ctx, uint8_t *old_mac_address, - uint8_t *new_mac_address); #define QLNX_NUM_CNQ 1 diff --git a/sys/dev/qlnx/qlnxr/qlnxr_verbs.c b/sys/dev/qlnx/qlnxr/qlnxr_verbs.c index 7f551647101b..54ca86977fd0 100644 --- a/sys/dev/qlnx/qlnxr/qlnxr_verbs.c +++ b/sys/dev/qlnx/qlnxr/qlnxr_verbs.c @@ -74,16 +74,6 @@ __FBSDID("$FreeBSD$"); ((unsigned char *)&addr)[2], \ ((unsigned char *)&addr)[3] -struct ib_srq *qlnxr_create_srq(struct ib_pd *, - struct ib_srq_init_attr *, - struct ib_udata *); - -int qlnxr_destroy_srq(struct ib_srq *); - -int qlnxr_modify_srq(struct ib_srq *, - struct ib_srq_attr *, - enum ib_srq_attr_mask, - struct ib_udata *); static int qlnxr_check_srq_params(struct ib_pd *ibpd, struct qlnxr_dev *dev, @@ -100,19 +90,6 @@ qlnxr_alloc_srq_kernel_params(struct qlnxr_srq *srq, struct qlnxr_dev *dev, struct ib_srq_init_attr *init_attr); -extern enum _ecore_status_t -ecore_rdma_modify_srq(void *rdma_cxt, - struct ecore_rdma_modify_srq_in_params *in_params); - -extern enum _ecore_status_t -ecore_rdma_destroy_srq(void *rdma_cxt, - struct ecore_rdma_destroy_srq_in_params *in_params); - -extern enum _ecore_status_t -ecore_rdma_create_srq(void *rdma_cxt, - struct ecore_rdma_create_srq_in_params *in_params, - struct ecore_rdma_create_srq_out_params *out_params); - static int qlnxr_copy_srq_uresp(struct qlnxr_dev *dev, diff --git a/sys/modules/qlnx/qlnxr/Makefile b/sys/modules/qlnx/qlnxr/Makefile index c8e1103599cb..0f7486cbe155 100644 --- a/sys/modules/qlnx/qlnxr/Makefile +++ b/sys/modules/qlnx/qlnxr/Makefile @@ -62,12 +62,14 @@ CFLAGS+= -DINET6 -DINET CWARNEXTRA += -Wno-cast-qual CWARNEXTRA += -Wno-unused-function +.if ${COMPILER_TYPE} == "clang" CWARNEXTRA += -Wno-gnu-variable-sized-type-not-at-end +.endif CWARNEXTRA += -Wno-missing-prototypes -CWARNEXTRA += -Wno-constant-conversion +CWARNEXTRA += ${NO_WCONSTANT_CONVERSION} CWARNEXTRA += -Wno-format -CWARNEXTRA += -Wno-shift-sign-overflow +CWARNEXTRA += ${NO_WSHIFT_COUNT_OVERFLOW} CWARNEXTRA += -Wno-empty-body CFLAGS += -DQLNX_DEBUG From 50b06886a715d7be052e1742f7d11ce82928f134 Mon Sep 17 00:00:00 2001 From: Eric van Gyzen Date: Fri, 1 Feb 2019 23:15:54 +0000 Subject: [PATCH 65/90] libm: squelch -Woverflow from gcc6 Sponsored by: Dell EMC Isilon --- lib/msun/Makefile | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/lib/msun/Makefile b/lib/msun/Makefile index 0cba3fc8b53b..6c9af4c018c8 100644 --- a/lib/msun/Makefile +++ b/lib/msun/Makefile @@ -108,6 +108,15 @@ COMMON_SRCS+= catrigl.c \ s_nextafterl.c s_nexttoward.c s_remquol.c s_rintl.c s_roundl.c \ s_scalbnl.c s_sinl.c s_sincosl.c \ s_tanhl.c s_tanl.c s_truncl.c w_cabsl.c +# Work around this warning from gcc 6: +# lib/msun/ld80/e_powl.c:275:1: error: floating constant exceeds range of +# 'long double' [-Werror=overflow] +# if( y >= LDBL_MAX ) +# See also: https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=130067 +.include +.if ${COMPILER_TYPE} == "gcc" && ${COMPILER_VERSION} >= 60000 +CFLAGS.e_powl.c+= -Wno-error=overflow +.endif .endif # C99 complex functions From ac818ca6441d08bd857740110662fe49e872a2d8 Mon Sep 17 00:00:00 2001 From: Eric van Gyzen Date: Fri, 1 Feb 2019 23:16:59 +0000 Subject: [PATCH 66/90] rtld: pacify -Wmaybe-uninitialized from gcc6 Sponsored by: Dell EMC Isilon --- libexec/rtld-elf/i386/reloc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/libexec/rtld-elf/i386/reloc.c b/libexec/rtld-elf/i386/reloc.c index 84bbaf5f2f89..ef6e805aeeb4 100644 --- a/libexec/rtld-elf/i386/reloc.c +++ b/libexec/rtld-elf/i386/reloc.c @@ -146,6 +146,10 @@ reloc_non_plt(Obj_Entry *obj, Obj_Entry *obj_rtld, int flags, } else cache = NULL; + /* Appease some compilers. */ + symval = 0; + def = NULL; + rellim = (const Elf_Rel *)((const char *)obj->rel + obj->relsize); for (rel = obj->rel; rel < rellim; rel++) { switch (ELF_R_TYPE(rel->r_info)) { From 59568a0e52561dffc6c782c10186028bcaa5466f Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Sat, 2 Feb 2019 04:11:59 +0000 Subject: [PATCH 67/90] Fix integer math overflow in UMA hash_alloc(). 512GB of ZFS ABD ARC means abd_chunk zone of 128M 4KB items. To manage them UMA tries to allocate 2GB hash table, which size does not fit into the int variable, causing later allocation failure, which makes ARC shrink back below the 512GB, not letting it to use more RAM. With this change I easily reached >700GB ARC size on 768GB RAM machine. MFC after: 1 week Sponsored by: iXsystems, Inc. --- sys/vm/uma_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/vm/uma_core.c b/sys/vm/uma_core.c index 809bb7c2393f..c2571f1c4153 100644 --- a/sys/vm/uma_core.c +++ b/sys/vm/uma_core.c @@ -623,7 +623,7 @@ static int hash_alloc(struct uma_hash *hash) { int oldsize; - int alloc; + size_t alloc; oldsize = hash->uh_hashsize; From d49fc192c14136825eb5c5fd1ebb957c0b909cc0 Mon Sep 17 00:00:00 2001 From: Justin Hibbits Date: Sat, 2 Feb 2019 04:15:16 +0000 Subject: [PATCH 68/90] powerpc/powernv: Add a driver for the POWER9 XIVE interrupt controller The XIVE (External Interrupt Virtualization Engine) is a new interrupt controller present in IBM's POWER9 processor. It's a very powerful, very complex device using queues and shared memory to improve interrupt dispatch performance in a virtualized environment. This yields a ~10% performance improvment over the XICS emulation mode, measured in both buildworld, and 'dd' from nvme to /dev/null. Currently, this only supports native access. MFC after: 1 month --- sys/conf/files.powerpc | 1 + sys/powerpc/powernv/opal.h | 26 + sys/powerpc/powernv/platform_powernv.c | 6 +- sys/powerpc/powernv/xive.c | 764 +++++++++++++++++++++++++ sys/powerpc/pseries/xics.c | 15 +- 5 files changed, 804 insertions(+), 8 deletions(-) create mode 100644 sys/powerpc/powernv/xive.c diff --git a/sys/conf/files.powerpc b/sys/conf/files.powerpc index 60c6f2304bdd..47287515b5b2 100644 --- a/sys/conf/files.powerpc +++ b/sys/conf/files.powerpc @@ -201,6 +201,7 @@ powerpc/powernv/opalcall.S optional powernv powerpc/powernv/platform_powernv.c optional powernv powerpc/powernv/powernv_centaur.c optional powernv powerpc/powernv/powernv_xscom.c optional powernv +powerpc/powernv/xive.c optional powernv powerpc/powerpc/altivec.c optional powerpc | powerpc64 powerpc/powerpc/autoconf.c standard powerpc/powerpc/bus_machdep.c standard diff --git a/sys/powerpc/powernv/opal.h b/sys/powerpc/powernv/opal.h index fafeec390e68..043b61a291a3 100644 --- a/sys/powerpc/powernv/opal.h +++ b/sys/powerpc/powernv/opal.h @@ -82,6 +82,20 @@ int opal_call(uint64_t token, ...); #define OPAL_INT_SET_MFRR 125 #define OPAL_PCI_TCE_KILL 126 #define OPAL_XIVE_RESET 128 +#define OPAL_XIVE_GET_IRQ_INFO 129 +#define OPAL_XIVE_GET_IRQ_CONFIG 130 +#define OPAL_XIVE_SET_IRQ_CONFIG 131 +#define OPAL_XIVE_GET_QUEUE_INFO 132 +#define OPAL_XIVE_SET_QUEUE_INFO 133 +#define OPAL_XIVE_DONATE_PAGE 134 +#define OPAL_XIVE_ALLOCATE_VP_BLOCK 135 +#define OPAL_XIVE_FREE_VP_BLOCK 136 +#define OPAL_XIVE_GET_VP_INFO 137 +#define OPAL_XIVE_SET_VP_INFO 138 +#define OPAL_XIVE_ALLOCATE_IRQ 139 +#define OPAL_XIVE_FREE_IRQ 140 +#define OPAL_XIVE_SYNC 141 +#define OPAL_XIVE_DUMP 142 #define OPAL_SENSOR_GROUP_CLEAR 156 #define OPAL_SENSOR_READ_U64 162 #define OPAL_SENSOR_GROUP_ENABLE 163 @@ -114,6 +128,18 @@ int opal_call(uint64_t token, ...); #define OPAL_BUSY_EVENT -12 #define OPAL_ASYNC_COMPLETION -15 #define OPAL_EMPTY -16 +#define OPAL_XIVE_PROVISIONING -31 +#define OPAL_XIVE_FREE_ACTIVE -32 + +#define OPAL_XIVE_XICS_MODE_EMU 0 +#define OPAL_XIVE_XICS_MODE_EXP 1 + +#define OPAL_XIVE_VP_ENABLED 0x00000001 +#define OPAL_XIVE_VP_SINGLE_ESCALATION 0x00000002 + +#define OPAL_XIVE_EQ_ENABLED 0x00000001 +#define OPAL_XIVE_EQ_ALWAYS_NOTIFY 0x00000002 +#define OPAL_XIVE_EQ_ESCALATE 0x00000004 struct opal_msg { uint32_t msg_type; diff --git a/sys/powerpc/powernv/platform_powernv.c b/sys/powerpc/powernv/platform_powernv.c index afcb3d5b90b8..1291eb02e4e5 100644 --- a/sys/powerpc/powernv/platform_powernv.c +++ b/sys/powerpc/powernv/platform_powernv.c @@ -59,7 +59,8 @@ __FBSDID("$FreeBSD$"); extern void *ap_pcpu; #endif -extern void xicp_smp_cpu_startup(void); +void (*powernv_smp_ap_extra_init)(void); + static int powernv_probe(platform_t); static int powernv_attach(platform_t); void powernv_mem_regions(platform_t, struct mem_region *phys, int *physsz, @@ -473,7 +474,8 @@ static void powernv_smp_ap_init(platform_t platform) { - xicp_smp_cpu_startup(); + if (powernv_smp_ap_extra_init != NULL) + powernv_smp_ap_extra_init(); } static void diff --git a/sys/powerpc/powernv/xive.c b/sys/powerpc/powernv/xive.c new file mode 100644 index 000000000000..0c1406199ac5 --- /dev/null +++ b/sys/powerpc/powernv/xive.c @@ -0,0 +1,764 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright 2019 Justin Hibbits + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include "opt_platform.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include + +#include +#include + +#ifdef POWERNV +#include +#endif + +#include "pic_if.h" + +#define XIVE_PRIORITY 7 /* Random non-zero number */ +#define MAX_XIVE_IRQS (1<<24) /* 24-bit XIRR field */ + +/* Registers */ +#define XIVE_TM_QW1_OS 0x010 /* Guest OS registers */ +#define XIVE_TM_QW2_HV_POOL 0x020 /* Hypervisor pool registers */ +#define XIVE_TM_QW3_HV 0x030 /* Hypervisor registers */ + +#define XIVE_TM_NSR 0x00 +#define XIVE_TM_CPPR 0x01 +#define XIVE_TM_IPB 0x02 +#define XIVE_TM_LSMFB 0x03 +#define XIVE_TM_ACK_CNT 0x04 +#define XIVE_TM_INC 0x05 +#define XIVE_TM_AGE 0x06 +#define XIVE_TM_PIPR 0x07 + +#define TM_WORD0 0x0 +#define TM_WORD2 0x8 +#define TM_QW2W2_VP 0x80000000 + +#define XIVE_TM_SPC_ACK 0x800 +#define TM_QW3NSR_HE_SHIFT 14 +#define TM_QW3_NSR_HE_NONE 0 +#define TM_QW3_NSR_HE_POOL 1 +#define TM_QW3_NSR_HE_PHYS 2 +#define TM_QW3_NSR_HE_LSI 3 +#define XIVE_TM_SPC_PULL_POOL_CTX 0x828 + +#define XIVE_IRQ_LOAD_EOI 0x000 +#define XIVE_IRQ_STORE_EOI 0x400 +#define XIVE_IRQ_PQ_00 0xc00 +#define XIVE_IRQ_PQ_01 0xd00 + +#define XIVE_IRQ_VAL_P 0x02 +#define XIVE_IRQ_VAL_Q 0x01 + +struct xive_softc; +struct xive_irq; + +extern void (*powernv_smp_ap_extra_init)(void); + +/* Private support */ +static void xive_setup_cpu(void); +static void xive_smp_cpu_startup(void); +static void xive_init_irq(struct xive_irq *irqd, u_int irq); +static struct xive_irq *xive_configure_irq(u_int irq); +static int xive_provision_page(struct xive_softc *sc); + + +/* Interfaces */ +static int xive_probe(device_t); +static int xive_attach(device_t); +static int xics_probe(device_t); +static int xics_attach(device_t); + +static void xive_bind(device_t, u_int, cpuset_t, void **); +static void xive_dispatch(device_t, struct trapframe *); +static void xive_enable(device_t, u_int, u_int, void **); +static void xive_eoi(device_t, u_int, void *); +static void xive_ipi(device_t, u_int); +static void xive_mask(device_t, u_int, void *); +static void xive_unmask(device_t, u_int, void *); +static void xive_translate_code(device_t dev, u_int irq, int code, + enum intr_trigger *trig, enum intr_polarity *pol); + +static device_method_t xive_methods[] = { + /* Device interface */ + DEVMETHOD(device_probe, xive_probe), + DEVMETHOD(device_attach, xive_attach), + + /* PIC interface */ + DEVMETHOD(pic_bind, xive_bind), + DEVMETHOD(pic_dispatch, xive_dispatch), + DEVMETHOD(pic_enable, xive_enable), + DEVMETHOD(pic_eoi, xive_eoi), + DEVMETHOD(pic_ipi, xive_ipi), + DEVMETHOD(pic_mask, xive_mask), + DEVMETHOD(pic_unmask, xive_unmask), + DEVMETHOD(pic_translate_code, xive_translate_code), + + DEVMETHOD_END +}; + +static device_method_t xics_methods[] = { + /* Device interface */ + DEVMETHOD(device_probe, xics_probe), + DEVMETHOD(device_attach, xics_attach), + + DEVMETHOD_END +}; + +struct xive_softc { + struct mtx sc_mtx; + struct resource *sc_mem; + vm_size_t sc_prov_page_size; + uint32_t sc_offset; +}; + +struct xive_queue { + uint32_t *q_page; + uint32_t *q_eoi_page; + uint32_t q_toggle; + uint32_t q_size; + uint32_t q_index; + uint32_t q_mask; +}; + +struct xive_irq { + uint32_t girq; + uint32_t lirq; + uint64_t vp; + uint64_t flags; +#define OPAL_XIVE_IRQ_EOI_VIA_FW 0x00000020 +#define OPAL_XIVE_IRQ_MASK_VIA_FW 0x00000010 +#define OPAL_XIVE_IRQ_SHIFT_BUG 0x00000008 +#define OPAL_XIVE_IRQ_LSI 0x00000004 +#define OPAL_XIVE_IRQ_STORE_EOI 0x00000002 +#define OPAL_XIVE_IRQ_TRIGGER_PAGE 0x00000001 + uint8_t prio; + vm_offset_t eoi_page; + vm_offset_t trig_page; + vm_size_t esb_size; + int chip; +}; + +struct xive_cpu { + uint64_t vp; + uint64_t flags; + struct xive_irq ipi_data; + struct xive_queue queue; /* We only use a single queue for now. */ + uint64_t cam; + uint32_t chip; +}; + +static driver_t xive_driver = { + "xive", + xive_methods, + sizeof(struct xive_softc) +}; + +static driver_t xics_driver = { + "xivevc", + xics_methods, + 0 +}; + +static devclass_t xive_devclass; +static devclass_t xics_devclass; + +EARLY_DRIVER_MODULE(xive, ofwbus, xive_driver, xive_devclass, 0, 0, + BUS_PASS_INTERRUPT-1); +EARLY_DRIVER_MODULE(xivevc, ofwbus, xics_driver, xics_devclass, 0, 0, + BUS_PASS_INTERRUPT); + +MALLOC_DEFINE(M_XIVE, "xive", "XIVE Memory"); + +DPCPU_DEFINE_STATIC(struct xive_cpu, xive_cpu_data); + +static int xive_ipi_vector = -1; + +/* + * XIVE Exploitation mode driver. + * + * The XIVE, present in the POWER9 CPU, can run in two modes: XICS emulation + * mode, and "Exploitation mode". XICS emulation mode is compatible with the + * POWER8 and earlier XICS interrupt controller, using OPAL calls to emulate + * hypervisor calls and memory accesses. Exploitation mode gives us raw access + * to the XIVE MMIO, improving performance significantly. + * + * The XIVE controller is a very bizarre interrupt controller. It uses queues + * in memory to pass interrupts around, and maps itself into 512GB of physical + * device address space, giving each interrupt in the system one or more pages + * of address space. An IRQ is tied to a virtual processor, which could be a + * physical CPU thread, or a guest CPU thread (LPAR running on a physical + * thread). Thus, the controller can route interrupts directly to guest OSes + * bypassing processing by the hypervisor, thereby improving performance of the + * guest OS. + * + * An IRQ, in addition to being tied to a virtual processor, has one or two + * page mappings: an EOI page, and an optional trigger page. The trigger page + * could be the same as the EOI page. Level-sensitive interrupts (LSIs) don't + * have a trigger page, as they're external interrupts controlled by physical + * lines. MSIs and IPIs have trigger pages. An IPI is really just another IRQ + * in the XIVE, which is triggered by software. + * + * An interesting behavior of the XIVE controller is that oftentimes the + * contents of an address location don't actually matter, but the direction of + * the action is the signifier (read vs write), and the address is significant. + * Hence, masking and unmasking an interrupt is done by reading different + * addresses in the EOI page, and triggering an interrupt consists of writing to + * the trigger page. + * + * Additionally, the MMIO region mapped is CPU-sensitive, just like the + * per-processor register space (private access) in OpenPIC. In order for a CPU + * to receive interrupts it must itself configure its CPPR (Current Processor + * Priority Register), it cannot be set by any other processor. This + * necessitates the xive_smp_cpu_startup() function. + * + * Queues are pages of memory, sized powers-of-two, that are shared with the + * XIVE. The XIVE writes into the queue with an alternating polarity bit, which + * flips when the queue wraps. + */ + +/* + * Offset-based read/write interfaces. + */ +static uint16_t +xive_read_2(struct xive_softc *sc, bus_size_t offset) +{ + + return (bus_read_2(sc->sc_mem, sc->sc_offset + offset)); +} + +static void +xive_write_1(struct xive_softc *sc, bus_size_t offset, uint8_t val) +{ + + bus_write_1(sc->sc_mem, sc->sc_offset + offset, val); +} + +/* EOI and Trigger page access interfaces. */ +static uint64_t +xive_read_mmap8(vm_offset_t addr) +{ + return (*(volatile uint64_t *)addr); +} + +static void +xive_write_mmap8(vm_offset_t addr, uint64_t val) +{ + *(uint64_t *)(addr) = val; +} + + +/* Device interfaces. */ +static int +xive_probe(device_t dev) +{ + + if (!ofw_bus_is_compatible(dev, "ibm,opal-xive-pe")) + return (ENXIO); + + device_set_desc(dev, "External Interrupt Virtualization Engine"); + + /* Make sure we always win against the xicp driver. */ + return (BUS_PROBE_DEFAULT); +} + +static int +xics_probe(device_t dev) +{ + + if (!ofw_bus_is_compatible(dev, "ibm,opal-xive-vc")) + return (ENXIO); + + device_set_desc(dev, "External Interrupt Virtualization Engine Root"); + return (BUS_PROBE_DEFAULT); +} + +static int +xive_attach(device_t dev) +{ + struct xive_softc *sc = device_get_softc(dev); + struct xive_cpu *xive_cpud; + phandle_t phandle = ofw_bus_get_node(dev); + int64_t vp_block; + int error; + int rid; + int i, order; + uint64_t vp_id; + int64_t ipi_irq; + + opal_call(OPAL_XIVE_RESET, OPAL_XIVE_XICS_MODE_EXP); + + error = OF_getencprop(phandle, "ibm,xive-provision-page-size", + (pcell_t *)&sc->sc_prov_page_size, sizeof(sc->sc_prov_page_size)); + + rid = 1; /* Get the Hypervisor-level register set. */ + sc->sc_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, + &rid, RF_ACTIVE); + sc->sc_offset = XIVE_TM_QW3_HV; + + mtx_init(&sc->sc_mtx, "XIVE", NULL, MTX_DEF); + + order = fls(mp_maxid + (mp_maxid - 1)) - 1; + + do { + vp_block = opal_call(OPAL_XIVE_ALLOCATE_VP_BLOCK, order); + if (vp_block == OPAL_BUSY) + DELAY(10); + else if (vp_block == OPAL_XIVE_PROVISIONING) + xive_provision_page(sc); + else + break; + } while (1); + + if (vp_block < 0) { + device_printf(dev, + "Unable to allocate VP block. Opal error %d\n", + (int)vp_block); + bus_release_resource(dev, SYS_RES_MEMORY, rid, sc->sc_mem); + return (ENXIO); + } + + /* + * Set up the VPs. Try to do as much as we can in attach, to lessen + * what's needed at AP spawn time. + */ + CPU_FOREACH(i) { + vp_id = pcpu_find(i)->pc_hwref; + + xive_cpud = DPCPU_ID_PTR(i, xive_cpu_data); + xive_cpud->vp = vp_id + vp_block; + opal_call(OPAL_XIVE_GET_VP_INFO, xive_cpud->vp, NULL, + vtophys(&xive_cpud->cam), NULL, vtophys(&xive_cpud->chip)); + + /* Allocate the queue page and populate the queue state data. */ + xive_cpud->queue.q_page = contigmalloc(PAGE_SIZE, M_XIVE, + M_ZERO | M_WAITOK, 0, BUS_SPACE_MAXADDR, PAGE_SIZE, 0); + xive_cpud->queue.q_size = 1 << PAGE_SHIFT; + xive_cpud->queue.q_mask = + ((xive_cpud->queue.q_size / sizeof(int)) - 1); + xive_cpud->queue.q_toggle = 0; + xive_cpud->queue.q_index = 0; + do { + error = opal_call(OPAL_XIVE_SET_VP_INFO, xive_cpud->vp, + OPAL_XIVE_VP_ENABLED, 0); + } while (error == OPAL_BUSY); + error = opal_call(OPAL_XIVE_SET_QUEUE_INFO, vp_id, + XIVE_PRIORITY, vtophys(xive_cpud->queue.q_page), PAGE_SHIFT, + OPAL_XIVE_EQ_ALWAYS_NOTIFY | OPAL_XIVE_EQ_ENABLED); + + do { + ipi_irq = opal_call(OPAL_XIVE_ALLOCATE_IRQ, + xive_cpud->chip); + } while (ipi_irq == OPAL_BUSY); + + if (ipi_irq < 0) + device_printf(root_pic, + "Failed allocating IPI. OPAL error %d\n", + (int)ipi_irq); + else { + xive_init_irq(&xive_cpud->ipi_data, ipi_irq); + xive_cpud->ipi_data.vp = vp_id; + xive_cpud->ipi_data.lirq = MAX_XIVE_IRQS; + opal_call(OPAL_XIVE_SET_IRQ_CONFIG, ipi_irq, + xive_cpud->ipi_data.vp, XIVE_PRIORITY, + MAX_XIVE_IRQS); + } + } + + powerpc_register_pic(dev, OF_xref_from_node(phandle), MAX_XIVE_IRQS, + 1 /* Number of IPIs */, FALSE); + root_pic = dev; + + xive_setup_cpu(); + powernv_smp_ap_extra_init = xive_smp_cpu_startup; + + return (0); +} + +static int +xics_attach(device_t dev) +{ + phandle_t phandle = ofw_bus_get_node(dev); + + /* The XIVE (root PIC) will handle all our interrupts */ + powerpc_register_pic(root_pic, OF_xref_from_node(phandle), + MAX_XIVE_IRQS, 1 /* Number of IPIs */, FALSE); + + return (0); +} + +/* + * PIC I/F methods. + */ + +static void +xive_bind(device_t dev, u_int irq, cpuset_t cpumask, void **priv) +{ + struct xive_irq *irqd; + int cpu; + int ncpus, i, error; + + if (*priv == NULL) + *priv = xive_configure_irq(irq); + + irqd = *priv; + + /* + * This doesn't appear to actually support affinity groups, so pick a + * random CPU. + */ + ncpus = 0; + CPU_FOREACH(cpu) + if (CPU_ISSET(cpu, &cpumask)) ncpus++; + + i = mftb() % ncpus; + ncpus = 0; + CPU_FOREACH(cpu) { + if (!CPU_ISSET(cpu, &cpumask)) + continue; + if (ncpus == i) + break; + ncpus++; + } + + opal_call(OPAL_XIVE_SYNC); + + irqd->vp = pcpu_find(cpu)->pc_hwref; + error = opal_call(OPAL_XIVE_SET_IRQ_CONFIG, irq, irqd->vp, + XIVE_PRIORITY, irqd->lirq); + + if (error < 0) + panic("Cannot bind interrupt %d to CPU %d", irq, cpu); + + xive_eoi(dev, irq, irqd); +} + +/* Read the next entry in the queue page and update the index. */ +static int +xive_read_eq(struct xive_queue *q) +{ + uint32_t i = be32toh(q->q_page[q->q_index]); + + /* Check validity, using current queue polarity. */ + if ((i >> 31) == q->q_toggle) + return (0); + + q->q_index = (q->q_index + 1) & q->q_mask; + + if (q->q_index == 0) + q->q_toggle ^= 1; + + return (i & 0x7fffffff); +} + +static void +xive_dispatch(device_t dev, struct trapframe *tf) +{ + struct xive_softc *sc; + struct xive_cpu *xive_cpud; + uint32_t vector; + uint16_t ack; + uint8_t cppr, he; + + sc = device_get_softc(dev); + + for (;;) { + ack = xive_read_2(sc, XIVE_TM_SPC_ACK); + cppr = (ack & 0xff); + + he = ack >> TM_QW3NSR_HE_SHIFT; + + if (he == TM_QW3_NSR_HE_NONE) + break; + switch (he) { + case TM_QW3_NSR_HE_NONE: + goto end; + case TM_QW3_NSR_HE_POOL: + case TM_QW3_NSR_HE_LSI: + device_printf(dev, + "Unexpected interrupt he type: %d\n", he); + goto end; + case TM_QW3_NSR_HE_PHYS: + break; + } + + xive_cpud = DPCPU_PTR(xive_cpu_data); + xive_write_1(sc, XIVE_TM_CPPR, cppr); + + for (;;) { + vector = xive_read_eq(&xive_cpud->queue); + + if (vector == 0) + break; + + if (vector == MAX_XIVE_IRQS) + vector = xive_ipi_vector; + + powerpc_dispatch_intr(vector, tf); + } + } +end: + xive_write_1(sc, XIVE_TM_CPPR, 0xff); +} + +static void +xive_enable(device_t dev, u_int irq, u_int vector, void **priv) +{ + struct xive_irq *irqd; + cell_t status, cpu; + + if (irq == MAX_XIVE_IRQS) { + if (xive_ipi_vector == -1) + xive_ipi_vector = vector; + return; + } + if (*priv == NULL) + *priv = xive_configure_irq(irq); + + irqd = *priv; + + /* Bind to this CPU to start */ + cpu = PCPU_GET(hwref); + irqd->lirq = vector; + + for (;;) { + status = opal_call(OPAL_XIVE_SET_IRQ_CONFIG, irq, cpu, + XIVE_PRIORITY, vector); + if (status != OPAL_BUSY) + break; + DELAY(10); + } + + if (status != 0) + panic("OPAL_SET_XIVE IRQ %d -> cpu %d failed: %d", irq, + cpu, status); + + xive_unmask(dev, irq, *priv); +} + +static void +xive_eoi(device_t dev, u_int irq, void *priv) +{ + struct xive_irq *rirq; + struct xive_cpu *cpud; + uint8_t eoi_val; + + if (irq == MAX_XIVE_IRQS) { + cpud = DPCPU_PTR(xive_cpu_data); + rirq = &cpud->ipi_data; + } else + rirq = priv; + + if (rirq->flags & OPAL_XIVE_IRQ_EOI_VIA_FW) + opal_call(OPAL_INT_EOI, irq); + else if (rirq->flags & OPAL_XIVE_IRQ_STORE_EOI) + xive_write_mmap8(rirq->eoi_page + XIVE_IRQ_STORE_EOI, 0); + else if (rirq->flags & OPAL_XIVE_IRQ_LSI) + xive_read_mmap8(rirq->eoi_page + XIVE_IRQ_LOAD_EOI); + else { + eoi_val = xive_read_mmap8(rirq->eoi_page + XIVE_IRQ_PQ_00); + if ((eoi_val & XIVE_IRQ_VAL_Q) && rirq->trig_page != 0) + xive_write_mmap8(rirq->trig_page, 0); + } +} + +static void +xive_ipi(device_t dev, u_int cpu) +{ + struct xive_cpu *xive_cpud; + + xive_cpud = DPCPU_ID_PTR(cpu, xive_cpu_data); + + if (xive_cpud->ipi_data.trig_page == 0) + return; + xive_write_mmap8(xive_cpud->ipi_data.trig_page, 0); +} + +static void +xive_mask(device_t dev, u_int irq, void *priv) +{ + struct xive_irq *rirq; + + /* Never mask IPIs */ + if (irq == MAX_XIVE_IRQS) + return; + + rirq = priv; + + if (!(rirq->flags & OPAL_XIVE_IRQ_LSI)) + return; + xive_read_mmap8(rirq->eoi_page + XIVE_IRQ_PQ_01); +} + +static void +xive_unmask(device_t dev, u_int irq, void *priv) +{ + struct xive_irq *rirq; + + rirq = priv; + + xive_read_mmap8(rirq->eoi_page + XIVE_IRQ_PQ_00); +} + +static void +xive_translate_code(device_t dev, u_int irq, int code, + enum intr_trigger *trig, enum intr_polarity *pol) +{ + switch (code) { + case 0: + /* L to H edge */ + *trig = INTR_TRIGGER_EDGE; + *pol = INTR_POLARITY_HIGH; + break; + case 1: + /* Active L level */ + *trig = INTR_TRIGGER_LEVEL; + *pol = INTR_POLARITY_LOW; + break; + default: + *trig = INTR_TRIGGER_CONFORM; + *pol = INTR_POLARITY_CONFORM; + } +} + +/* Private functions. */ +/* + * Setup the current CPU. Called by the BSP at driver attachment, and by each + * AP at wakeup (via xive_smp_cpu_startup()). + */ +static void +xive_setup_cpu(void) +{ + struct xive_softc *sc; + struct xive_cpu *cpup; + uint32_t val; + + cpup = DPCPU_PTR(xive_cpu_data); + + sc = device_get_softc(root_pic); + + val = bus_read_4(sc->sc_mem, XIVE_TM_QW2_HV_POOL + TM_WORD2); + if (val & TM_QW2W2_VP) + bus_read_8(sc->sc_mem, XIVE_TM_SPC_PULL_POOL_CTX); + + bus_write_4(sc->sc_mem, XIVE_TM_QW2_HV_POOL + TM_WORD0, 0xff); + bus_write_4(sc->sc_mem, XIVE_TM_QW2_HV_POOL + TM_WORD2, + TM_QW2W2_VP | cpup->cam); + + xive_unmask(root_pic, cpup->ipi_data.girq, &cpup->ipi_data); + xive_write_1(sc, XIVE_TM_CPPR, 0xff); +} + +/* Populate an IRQ structure, mapping the EOI and trigger pages. */ +static void +xive_init_irq(struct xive_irq *irqd, u_int irq) +{ + uint64_t eoi_phys, trig_phys; + uint32_t esb_shift; + + opal_call(OPAL_XIVE_GET_IRQ_INFO, irq, + vtophys(&irqd->flags), vtophys(&eoi_phys), + vtophys(&trig_phys), vtophys(&esb_shift), + vtophys(&irqd->chip)); + + irqd->girq = irq; + irqd->esb_size = 1 << esb_shift; + irqd->eoi_page = (vm_offset_t)pmap_mapdev(eoi_phys, irqd->esb_size); + + if (eoi_phys == trig_phys) + irqd->trig_page = irqd->eoi_page; + else if (trig_phys != 0) + irqd->trig_page = (vm_offset_t)pmap_mapdev(trig_phys, + irqd->esb_size); + else + irqd->trig_page = 0; + + opal_call(OPAL_XIVE_GET_IRQ_CONFIG, irq, vtophys(&irqd->vp), + vtophys(&irqd->prio), vtophys(&irqd->lirq)); +} + +/* Allocate an IRQ struct before populating it. */ +static struct xive_irq * +xive_configure_irq(u_int irq) +{ + struct xive_irq *irqd; + + irqd = malloc(sizeof(struct xive_irq), M_XIVE, M_WAITOK); + + xive_init_irq(irqd, irq); + + return (irqd); +} + +/* + * Part of the OPAL API. OPAL_XIVE_ALLOCATE_VP_BLOCK might require more pages, + * provisioned through this call. + */ +static int +xive_provision_page(struct xive_softc *sc) +{ + void *prov_page; + int error; + + do { + prov_page = contigmalloc(sc->sc_prov_page_size, M_XIVE, 0, + 0, BUS_SPACE_MAXADDR, + sc->sc_prov_page_size, sc->sc_prov_page_size); + + error = opal_call(OPAL_XIVE_DONATE_PAGE, -1, + vtophys(prov_page)); + } while (error == OPAL_XIVE_PROVISIONING); + + return (0); +} + +/* The XIVE_TM_CPPR register must be set by each thread */ +static void +xive_smp_cpu_startup(void) +{ + + xive_setup_cpu(); +} diff --git a/sys/powerpc/pseries/xics.c b/sys/powerpc/pseries/xics.c index 4dbfcfbd30cb..fc9a82dd2b4d 100644 --- a/sys/powerpc/pseries/xics.c +++ b/sys/powerpc/pseries/xics.c @@ -61,9 +61,6 @@ __FBSDID("$FreeBSD$"); #define XICP_IPI 2 #define MAX_XICP_IRQS (1<<24) /* 24-bit XIRR field */ -#define XIVE_XICS_MODE_EMU 0 -#define XIVE_XICS_MODE_EXP 1 - static int xicp_probe(device_t); static int xicp_attach(device_t); static int xics_probe(device_t); @@ -78,7 +75,8 @@ static void xicp_mask(device_t, u_int, void *priv); static void xicp_unmask(device_t, u_int, void *priv); #ifdef POWERNV -void xicp_smp_cpu_startup(void); +extern void (*powernv_smp_ap_extra_init)(void); +static void xicp_smp_cpu_startup(void); #endif static device_method_t xicp_methods[] = { @@ -238,7 +236,7 @@ xicp_attach(device_t dev) * compatibility mode. */ sc->xics_emu = true; - opal_call(OPAL_XIVE_RESET, XIVE_XICS_MODE_EMU); + opal_call(OPAL_XIVE_RESET, OPAL_XIVE_XICS_MODE_EMU); #endif } else { sc->cpu_range[0] = 0; @@ -280,6 +278,11 @@ xicp_attach(device_t dev) 1 /* Number of IPIs */, FALSE); root_pic = dev; +#ifdef POWERNV + if (sc->xics_emu) + powernv_smp_ap_extra_init = xicp_smp_cpu_startup; +#endif + return (0); } @@ -556,7 +559,7 @@ xicp_unmask(device_t dev, u_int irq, void *priv) #ifdef POWERNV /* This is only used on POWER9 systems with the XIVE's XICS emulation. */ -void +static void xicp_smp_cpu_startup(void) { struct xicp_softc *sc; From d38ca3297cc62ba3c98ce8a4ce96931a31aa0b40 Mon Sep 17 00:00:00 2001 From: Gleb Smirnoff Date: Sat, 2 Feb 2019 05:49:05 +0000 Subject: [PATCH 69/90] Return PFIL_CONSUMED if packet was consumed. While here gather all the identical endings of pf_check_*() into single function. PR: 235411 --- sys/netpfil/pf/pf_ioctl.c | 44 +++++++++++++++++++++------------------ 1 file changed, 24 insertions(+), 20 deletions(-) diff --git a/sys/netpfil/pf/pf_ioctl.c b/sys/netpfil/pf/pf_ioctl.c index 91b2c1d33839..fd946d158ba1 100644 --- a/sys/netpfil/pf/pf_ioctl.c +++ b/sys/netpfil/pf/pf_ioctl.c @@ -4002,6 +4002,26 @@ shutdown_pf(void) return (error); } +static pfil_return_t +pf_check_return(int chk, struct mbuf **m) +{ + + switch (chk) { + case PF_PASS: + if (*m == NULL) + return (PFIL_CONSUMED); + else + return (PFIL_PASS); + break; + default: + if (*m != NULL) { + m_freem(*m); + *m = NULL; + } + return (PFIL_DROPPED); + } +} + #ifdef INET static pfil_return_t pf_check_in(struct mbuf **m, struct ifnet *ifp, int flags, @@ -4010,12 +4030,8 @@ pf_check_in(struct mbuf **m, struct ifnet *ifp, int flags, int chk; chk = pf_test(PF_IN, flags, ifp, m, inp); - if (chk && *m) { - m_freem(*m); - *m = NULL; - } - return (chk == PF_PASS ? PFIL_PASS : PFIL_DROPPED); + return (pf_check_return(chk, m)); } static pfil_return_t @@ -4025,12 +4041,8 @@ pf_check_out(struct mbuf **m, struct ifnet *ifp, int flags, int chk; chk = pf_test(PF_OUT, flags, ifp, m, inp); - if (chk && *m) { - m_freem(*m); - *m = NULL; - } - return (chk == PF_PASS ? PFIL_PASS : PFIL_DROPPED); + return (pf_check_return(chk, m)); } #endif @@ -4049,12 +4061,8 @@ pf_check6_in(struct mbuf **m, struct ifnet *ifp, int flags, CURVNET_SET(ifp->if_vnet); chk = pf_test6(PF_IN, flags, (*m)->m_flags & M_LOOP ? V_loif : ifp, m, inp); CURVNET_RESTORE(); - if (chk && *m) { - m_freem(*m); - *m = NULL; - } - return (chk == PF_PASS ? PFIL_PASS : PFIL_DROPPED); + return (pf_check_return(chk, m)); } static pfil_return_t @@ -4066,12 +4074,8 @@ pf_check6_out(struct mbuf **m, struct ifnet *ifp, int flags, CURVNET_SET(ifp->if_vnet); chk = pf_test6(PF_OUT, flags, ifp, m, inp); CURVNET_RESTORE(); - if (chk && *m) { - m_freem(*m); - *m = NULL; - } - return (chk == PF_PASS ? PFIL_PASS : PFIL_DROPPED); + return (pf_check_return(chk, m)); } #endif /* INET6 */ From bce0fd800aff6018e2568ad3755c4e8e4d9fb94a Mon Sep 17 00:00:00 2001 From: Andriy Voskoboinyk Date: Sat, 2 Feb 2019 16:01:16 +0000 Subject: [PATCH 70/90] run(4): fix allocated memory type and -Wincompatible-pointer-types compiler warning. PR: 177366 MFC after: 3 days --- sys/dev/usb/wlan/if_run.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/sys/dev/usb/wlan/if_run.c b/sys/dev/usb/wlan/if_run.c index d92fdcf3368e..8fbd76bd315a 100644 --- a/sys/dev/usb/wlan/if_run.c +++ b/sys/dev/usb/wlan/if_run.c @@ -2029,7 +2029,14 @@ run_read_eeprom(struct run_softc *sc) static struct ieee80211_node * run_node_alloc(struct ieee80211vap *vap, const uint8_t mac[IEEE80211_ADDR_LEN]) { - return malloc(sizeof (struct run_node), M_DEVBUF, M_NOWAIT | M_ZERO); + struct run_node *rn; + + rn = malloc(sizeof (struct run_node), M_80211_NODE, M_NOWAIT | M_ZERO); + + if (rn == NULL) + return (NULL); + + return (&rn->ni); } static int From 943607571a356ffcbc45e63542f9eb6534b375de Mon Sep 17 00:00:00 2001 From: Andriy Voskoboinyk Date: Sat, 2 Feb 2019 16:06:06 +0000 Subject: [PATCH 71/90] run(4): revert previous commit; there were no compiler warning (at least, from clang(1)). --- sys/dev/usb/wlan/if_run.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/sys/dev/usb/wlan/if_run.c b/sys/dev/usb/wlan/if_run.c index 8fbd76bd315a..d92fdcf3368e 100644 --- a/sys/dev/usb/wlan/if_run.c +++ b/sys/dev/usb/wlan/if_run.c @@ -2029,14 +2029,7 @@ run_read_eeprom(struct run_softc *sc) static struct ieee80211_node * run_node_alloc(struct ieee80211vap *vap, const uint8_t mac[IEEE80211_ADDR_LEN]) { - struct run_node *rn; - - rn = malloc(sizeof (struct run_node), M_80211_NODE, M_NOWAIT | M_ZERO); - - if (rn == NULL) - return (NULL); - - return (&rn->ni); + return malloc(sizeof (struct run_node), M_DEVBUF, M_NOWAIT | M_ZERO); } static int From 6ecec3817e16dd5ab98ec7c4757efed8e1bfd671 Mon Sep 17 00:00:00 2001 From: Andriy Voskoboinyk Date: Sat, 2 Feb 2019 16:07:56 +0000 Subject: [PATCH 72/90] run(4): fix allocated memory type for ieee80211_node(4). PR: 177366 MFC after: 3 days --- sys/dev/usb/wlan/if_run.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sys/dev/usb/wlan/if_run.c b/sys/dev/usb/wlan/if_run.c index d92fdcf3368e..658a3e65f869 100644 --- a/sys/dev/usb/wlan/if_run.c +++ b/sys/dev/usb/wlan/if_run.c @@ -2029,7 +2029,8 @@ run_read_eeprom(struct run_softc *sc) static struct ieee80211_node * run_node_alloc(struct ieee80211vap *vap, const uint8_t mac[IEEE80211_ADDR_LEN]) { - return malloc(sizeof (struct run_node), M_DEVBUF, M_NOWAIT | M_ZERO); + return malloc(sizeof (struct run_node), M_80211_NODE, + M_NOWAIT | M_ZERO); } static int From 4215ce482091f70654b984579f05ace2d101ba5d Mon Sep 17 00:00:00 2001 From: Andriy Voskoboinyk Date: Sat, 2 Feb 2019 16:15:46 +0000 Subject: [PATCH 73/90] sys/dev/wtap: Check return value from malloc(..., M_NOWAIT) and drop unneeded cast. MFC after: 3 days --- sys/dev/wtap/if_wtap.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sys/dev/wtap/if_wtap.c b/sys/dev/wtap/if_wtap.c index 7316263143c8..fe0a610c6058 100644 --- a/sys/dev/wtap/if_wtap.c +++ b/sys/dev/wtap/if_wtap.c @@ -373,7 +373,7 @@ wtap_vap_delete(struct ieee80211vap *vap) destroy_dev(avp->av_dev); callout_stop(&avp->av_swba); ieee80211_vap_detach(vap); - free((struct wtap_vap*) vap, M_80211_VAP); + free(avp, M_80211_VAP); } static void @@ -602,6 +602,8 @@ wtap_node_alloc(struct ieee80211vap *vap, const uint8_t mac[IEEE80211_ADDR_LEN]) ni = malloc(sizeof(struct ieee80211_node), M_80211_NODE, M_NOWAIT|M_ZERO); + if (ni == NULL) + return (NULL); ni->ni_txrate = 130; return ni; From 4ab4d681f344a1ddedae9e3554315dbc8c6eee09 Mon Sep 17 00:00:00 2001 From: Andriy Voskoboinyk Date: Sat, 2 Feb 2019 16:21:23 +0000 Subject: [PATCH 74/90] Do not acquire IEEE80211_LOCK twice in cac_timeout(); reuse locked function instead. It is externally visible since r257065. MFC after: 5 days --- sys/net80211/ieee80211_dfs.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sys/net80211/ieee80211_dfs.c b/sys/net80211/ieee80211_dfs.c index 0266769d9b0f..abe67576d711 100644 --- a/sys/net80211/ieee80211_dfs.c +++ b/sys/net80211/ieee80211_dfs.c @@ -156,8 +156,7 @@ cac_timeout(void *arg) /* XXX clobbers any existing desired channel */ /* NB: dfs->newchan may be NULL, that's ok */ vap->iv_des_chan = dfs->newchan; - /* XXX recursive lock need ieee80211_new_state_locked */ - ieee80211_new_state(vap, IEEE80211_S_SCAN, 0); + ieee80211_new_state_locked(vap, IEEE80211_S_SCAN, 0); } else { if_printf(vap->iv_ifp, "CAC timer on channel %u (%u MHz) expired; " From 378478f9fc9ce44fd7c19bf0298f430078c26369 Mon Sep 17 00:00:00 2001 From: Andriy Voskoboinyk Date: Sat, 2 Feb 2019 16:23:45 +0000 Subject: [PATCH 75/90] Drop unused M_80211_COM malloc(9) type. It is not used since r287197. MFC after: 3 days --- sys/net80211/ieee80211_freebsd.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/sys/net80211/ieee80211_freebsd.c b/sys/net80211/ieee80211_freebsd.c index 9e82e9e1ff83..251b6b8d3c96 100644 --- a/sys/net80211/ieee80211_freebsd.c +++ b/sys/net80211/ieee80211_freebsd.c @@ -68,8 +68,6 @@ SYSCTL_INT(_net_wlan, OID_AUTO, debug, CTLFLAG_RW, &ieee80211_debug, 0, "debugging printfs"); #endif -static MALLOC_DEFINE(M_80211_COM, "80211com", "802.11 com state"); - static const char wlanname[] = "wlan"; static struct if_clone *wlan_cloner; From 769d56eccf62ada5443e8225abb7000875e77821 Mon Sep 17 00:00:00 2001 From: Patrick Kelsey Date: Sat, 2 Feb 2019 21:14:53 +0000 Subject: [PATCH 76/90] Fix interrupt index configuratoin when using MSI interrupts. When in MSI mode, the device was only being configured with one interrupt index, but it needs two - one for the actual interrupt and one to park the tx queue at. Also clarified comments relating to interrupt index assignment. Reported by: Yuri Pankov MFC after: 1 day --- sys/dev/vmware/vmxnet3/if_vmx.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/sys/dev/vmware/vmxnet3/if_vmx.c b/sys/dev/vmware/vmxnet3/if_vmx.c index 0ab8b040bc7d..456d935abdef 100644 --- a/sys/dev/vmware/vmxnet3/if_vmx.c +++ b/sys/dev/vmware/vmxnet3/if_vmx.c @@ -676,14 +676,16 @@ vmxnet3_set_interrupt_idx(struct vmxnet3_softc *sc) scctx = sc->vmx_scctx; /* - * There is either one interrupt, or there is one interrupt per - * receive queue. If there is one interrupt, then all interrupt - * indexes are zero. If there is one interrupt per receive queue, - * the transmit queue interrupt indexes are assigned the receive - * queue interrupt indexesin round-robin fashion. - * - * The event interrupt is always the last interrupt index. + * There is always one interrupt per receive queue, assigned + * starting with the first interrupt. When there is only one + * interrupt available, the event interrupt shares the receive queue + * interrupt, otherwise it uses the interrupt following the last + * receive queue interrupt. Transmit queues are not assigned + * interrupts, so they are given indexes beyond the indexes that + * correspond to the real interrupts. */ + + /* The event interrupt is always the last vector. */ sc->vmx_event_intr_idx = scctx->isc_vectors - 1; intr_idx = 0; @@ -1073,14 +1075,14 @@ vmxnet3_init_shared_data(struct vmxnet3_softc *sc) ds->automask = sc->vmx_intr_mask_mode == VMXNET3_IMM_AUTO; /* * Total number of interrupt indexes we are using in the shared - * config data, even though we don't actually allocate MSI-X + * config data, even though we don't actually allocate interrupt * resources for the tx queues. Some versions of the device will * fail to initialize successfully if interrupt indexes are used in * the shared config that exceed the number of interrupts configured * here. */ ds->nintr = (scctx->isc_vectors == 1) ? - 1 : (scctx->isc_nrxqsets + scctx->isc_ntxqsets + 1); + 2 : (scctx->isc_nrxqsets + scctx->isc_ntxqsets + 1); ds->evintr = sc->vmx_event_intr_idx; ds->ictrl = VMXNET3_ICTRL_DISABLE_ALL; From 5faab7782202ab1e6815c5da956d8997f59aa105 Mon Sep 17 00:00:00 2001 From: Vincenzo Maffione Date: Sat, 2 Feb 2019 22:39:29 +0000 Subject: [PATCH 77/90] netmap: upgrade sync-kloop support Add SYNC_KLOOP_MODE option, and add support for direct mode, where application executes the TXSYNC and RXSYNC in the context of the ioeventfd wake up callback. MFC after: 5 days --- sys/dev/netmap/netmap.c | 9 +- sys/dev/netmap/netmap_kern.h | 9 + sys/dev/netmap/netmap_kloop.c | 407 ++++++++++++++++++++++++++-------- sys/net/netmap.h | 25 ++- 4 files changed, 346 insertions(+), 104 deletions(-) diff --git a/sys/dev/netmap/netmap.c b/sys/dev/netmap/netmap.c index 3f1f130b25fa..61739bec59ee 100644 --- a/sys/dev/netmap/netmap.c +++ b/sys/dev/netmap/netmap.c @@ -2971,6 +2971,9 @@ nmreq_opt_size_by_type(uint32_t nro_reqtype, uint64_t nro_size) case NETMAP_REQ_OPT_CSB: rv = sizeof(struct nmreq_opt_csb); break; + case NETMAP_REQ_OPT_SYNC_KLOOP_MODE: + rv = sizeof(struct nmreq_opt_sync_kloop_mode); + break; } /* subtract the common header */ return rv - sizeof(struct nmreq_option); @@ -3287,10 +3290,8 @@ netmap_poll(struct netmap_priv_d *priv, int events, NM_SELRECORD_T *sr) * there are pending packets to send. The latter can be disabled * passing NETMAP_NO_TX_POLL in the NIOCREG call. */ - si[NR_RX] = nm_si_user(priv, NR_RX) ? &na->si[NR_RX] : - &na->rx_rings[priv->np_qfirst[NR_RX]]->si; - si[NR_TX] = nm_si_user(priv, NR_TX) ? &na->si[NR_TX] : - &na->tx_rings[priv->np_qfirst[NR_TX]]->si; + si[NR_RX] = priv->np_si[NR_RX]; + si[NR_TX] = priv->np_si[NR_TX]; #ifdef __FreeBSD__ /* diff --git a/sys/dev/netmap/netmap_kern.h b/sys/dev/netmap/netmap_kern.h index 4578269e43a2..3f4b00d814f4 100644 --- a/sys/dev/netmap/netmap_kern.h +++ b/sys/dev/netmap/netmap_kern.h @@ -1169,6 +1169,15 @@ nm_kr_txempty(struct netmap_kring *kring) * rxsync_prologue */ #define nm_kr_rxempty(_k) nm_kr_txempty(_k) +/* True if the application needs to wait for more space on the ring + * (more received packets or more free tx slots). + * Only valid after *xsync_prologue. */ +static inline int +nm_kr_wouldblock(struct netmap_kring *kring) +{ + return kring->rcur == kring->nr_hwtail; +} + /* * protect against multiple threads using the same ring. * also check that the ring has not been stopped or locked diff --git a/sys/dev/netmap/netmap_kloop.c b/sys/dev/netmap/netmap_kloop.c index 98536cd03f57..2bd3685a2dff 100644 --- a/sys/dev/netmap/netmap_kloop.c +++ b/sys/dev/netmap/netmap_kloop.c @@ -141,6 +141,9 @@ sync_kloop_kring_dump(const char *title, const struct netmap_kring *kring) kring->rcur, kring->rtail, kring->nr_hwtail); } +/* Arguments for netmap_sync_kloop_tx_ring() and + * netmap_sync_kloop_rx_ring(). + */ struct sync_kloop_ring_args { struct netmap_kring *kring; struct nm_csb_atok *csb_atok; @@ -148,6 +151,10 @@ struct sync_kloop_ring_args { #ifdef SYNC_KLOOP_POLL struct eventfd_ctx *irq_ctx; #endif /* SYNC_KLOOP_POLL */ + /* Are we busy waiting rather than using a schedule() loop ? */ + bool busy_wait; + /* Are we processing in the context of VM exit ? */ + bool direct; }; static void @@ -161,10 +168,16 @@ netmap_sync_kloop_tx_ring(const struct sync_kloop_ring_args *a) uint32_t num_slots; int batch; + if (unlikely(nm_kr_tryget(kring, 1, NULL))) { + return; + } + num_slots = kring->nkr_num_slots; /* Disable application --> kernel notifications. */ - csb_ktoa_kick_enable(csb_ktoa, 0); + if (!a->direct) { + csb_ktoa_kick_enable(csb_ktoa, 0); + } /* Copy the application kring pointers from the CSB */ sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots); @@ -197,7 +210,9 @@ netmap_sync_kloop_tx_ring(const struct sync_kloop_ring_args *a) if (unlikely(nm_txsync_prologue(kring, &shadow_ring) >= num_slots)) { /* Reinit ring and enable notifications. */ netmap_ring_reinit(kring); - csb_ktoa_kick_enable(csb_ktoa, 1); + if (!a->busy_wait) { + csb_ktoa_kick_enable(csb_ktoa, 1); + } break; } @@ -206,8 +221,10 @@ netmap_sync_kloop_tx_ring(const struct sync_kloop_ring_args *a) } if (unlikely(kring->nm_sync(kring, shadow_ring.flags))) { - /* Reenable notifications. */ - csb_ktoa_kick_enable(csb_ktoa, 1); + if (!a->busy_wait) { + /* Reenable notifications. */ + csb_ktoa_kick_enable(csb_ktoa, 1); + } nm_prerr("txsync() failed"); break; } @@ -232,7 +249,8 @@ netmap_sync_kloop_tx_ring(const struct sync_kloop_ring_args *a) /* Interrupt the application if needed. */ #ifdef SYNC_KLOOP_POLL if (a->irq_ctx && more_txspace && csb_atok_intr_enabled(csb_atok)) { - /* Disable application kick to avoid sending unnecessary kicks */ + /* We could disable kernel --> application kicks here, + * to avoid spurious interrupts. */ eventfd_signal(a->irq_ctx, 1); more_txspace = false; } @@ -241,6 +259,9 @@ netmap_sync_kloop_tx_ring(const struct sync_kloop_ring_args *a) /* Read CSB to see if there is more work to do. */ sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots); if (shadow_ring.head == kring->rhead) { + if (a->busy_wait) { + break; + } /* * No more packets to transmit. We enable notifications and * go to sleep, waiting for a kick from the application when new @@ -268,6 +289,8 @@ netmap_sync_kloop_tx_ring(const struct sync_kloop_ring_args *a) } } + nm_kr_put(kring); + #ifdef SYNC_KLOOP_POLL if (a->irq_ctx && more_txspace && csb_atok_intr_enabled(csb_atok)) { eventfd_signal(a->irq_ctx, 1); @@ -297,13 +320,19 @@ netmap_sync_kloop_rx_ring(const struct sync_kloop_ring_args *a) bool some_recvd = false; uint32_t num_slots; + if (unlikely(nm_kr_tryget(kring, 1, NULL))) { + return; + } + num_slots = kring->nkr_num_slots; /* Get RX csb_atok and csb_ktoa pointers from the CSB. */ num_slots = kring->nkr_num_slots; /* Disable notifications. */ - csb_ktoa_kick_enable(csb_ktoa, 0); + if (!a->direct) { + csb_ktoa_kick_enable(csb_ktoa, 0); + } /* Copy the application kring pointers from the CSB */ sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots); @@ -315,7 +344,9 @@ netmap_sync_kloop_rx_ring(const struct sync_kloop_ring_args *a) if (unlikely(nm_rxsync_prologue(kring, &shadow_ring) >= num_slots)) { /* Reinit ring and enable notifications. */ netmap_ring_reinit(kring); - csb_ktoa_kick_enable(csb_ktoa, 1); + if (!a->busy_wait) { + csb_ktoa_kick_enable(csb_ktoa, 1); + } break; } @@ -324,8 +355,10 @@ netmap_sync_kloop_rx_ring(const struct sync_kloop_ring_args *a) } if (unlikely(kring->nm_sync(kring, shadow_ring.flags))) { - /* Reenable notifications. */ - csb_ktoa_kick_enable(csb_ktoa, 1); + if (!a->busy_wait) { + /* Reenable notifications. */ + csb_ktoa_kick_enable(csb_ktoa, 1); + } nm_prerr("rxsync() failed"); break; } @@ -351,7 +384,8 @@ netmap_sync_kloop_rx_ring(const struct sync_kloop_ring_args *a) #ifdef SYNC_KLOOP_POLL /* Interrupt the application if needed. */ if (a->irq_ctx && some_recvd && csb_atok_intr_enabled(csb_atok)) { - /* Disable application kick to avoid sending unnecessary kicks */ + /* We could disable kernel --> application kicks here, + * to avoid spurious interrupts. */ eventfd_signal(a->irq_ctx, 1); some_recvd = false; } @@ -360,6 +394,9 @@ netmap_sync_kloop_rx_ring(const struct sync_kloop_ring_args *a) /* Read CSB to see if there is more work to do. */ sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots); if (sync_kloop_norxslots(kring, shadow_ring.head)) { + if (a->busy_wait) { + break; + } /* * No more slots available for reception. We enable notification and * go to sleep, waiting for a kick from the application when new receive @@ -401,6 +438,7 @@ netmap_sync_kloop_rx_ring(const struct sync_kloop_ring_args *a) } #ifdef SYNC_KLOOP_POLL +struct sync_kloop_poll_ctx; struct sync_kloop_poll_entry { /* Support for receiving notifications from * a netmap ring or from the application. */ @@ -411,12 +449,24 @@ struct sync_kloop_poll_entry { /* Support for sending notifications to the application. */ struct eventfd_ctx *irq_ctx; struct file *irq_filp; + + /* Arguments for the ring processing function. Useful + * in case of custom wake-up function. */ + struct sync_kloop_ring_args *args; + struct sync_kloop_poll_ctx *parent; + }; struct sync_kloop_poll_ctx { poll_table wait_table; unsigned int next_entry; + int (*next_wake_fun)(wait_queue_t *, unsigned, int, void *); unsigned int num_entries; + unsigned int num_tx_rings; + unsigned int num_rings; + /* First num_tx_rings entries are for the TX kicks. + * Then the RX kicks entries follow. The last two + * entries are for TX irq, and RX irq. */ struct sync_kloop_poll_entry entries[0]; }; @@ -433,9 +483,77 @@ sync_kloop_poll_table_queue_proc(struct file *file, wait_queue_head_t *wqh, entry->wqh = wqh; entry->filp = file; /* Use the default wake up function. */ - init_waitqueue_entry(&entry->wait, current); + if (poll_ctx->next_wake_fun == NULL) { + init_waitqueue_entry(&entry->wait, current); + } else { + init_waitqueue_func_entry(&entry->wait, + poll_ctx->next_wake_fun); + } add_wait_queue(wqh, &entry->wait); - poll_ctx->next_entry++; +} + +static int +sync_kloop_tx_kick_wake_fun(wait_queue_t *wait, unsigned mode, + int wake_flags, void *key) +{ + struct sync_kloop_poll_entry *entry = + container_of(wait, struct sync_kloop_poll_entry, wait); + + netmap_sync_kloop_tx_ring(entry->args); + + return 0; +} + +static int +sync_kloop_tx_irq_wake_fun(wait_queue_t *wait, unsigned mode, + int wake_flags, void *key) +{ + struct sync_kloop_poll_entry *entry = + container_of(wait, struct sync_kloop_poll_entry, wait); + struct sync_kloop_poll_ctx *poll_ctx = entry->parent; + int i; + + for (i = 0; i < poll_ctx->num_tx_rings; i++) { + struct eventfd_ctx *irq_ctx = poll_ctx->entries[i].irq_ctx; + + if (irq_ctx) { + eventfd_signal(irq_ctx, 1); + } + } + + return 0; +} + +static int +sync_kloop_rx_kick_wake_fun(wait_queue_t *wait, unsigned mode, + int wake_flags, void *key) +{ + struct sync_kloop_poll_entry *entry = + container_of(wait, struct sync_kloop_poll_entry, wait); + + netmap_sync_kloop_rx_ring(entry->args); + + return 0; +} + +static int +sync_kloop_rx_irq_wake_fun(wait_queue_t *wait, unsigned mode, + int wake_flags, void *key) +{ + struct sync_kloop_poll_entry *entry = + container_of(wait, struct sync_kloop_poll_entry, wait); + struct sync_kloop_poll_ctx *poll_ctx = entry->parent; + int i; + + for (i = poll_ctx->num_tx_rings; i < poll_ctx->num_rings; i++) { + struct eventfd_ctx *irq_ctx = poll_ctx->entries[i].irq_ctx; + + if (irq_ctx) { + eventfd_signal(irq_ctx, 1); + } + } + + return 0; } #endif /* SYNC_KLOOP_POLL */ @@ -455,6 +573,10 @@ netmap_sync_kloop(struct netmap_priv_d *priv, struct nmreq_header *hdr) struct nm_csb_ktoa* csb_ktoa_base; struct netmap_adapter *na; struct nmreq_option *opt; + bool na_could_sleep = false; + bool busy_wait = true; + bool direct_tx = false; + bool direct_rx = false; int err = 0; int i; @@ -505,7 +627,43 @@ netmap_sync_kloop(struct netmap_priv_d *priv, struct nmreq_header *hdr) goto out; } + /* Prepare the arguments for netmap_sync_kloop_tx_ring() + * and netmap_sync_kloop_rx_ring(). */ + for (i = 0; i < num_tx_rings; i++) { + struct sync_kloop_ring_args *a = args + i; + + a->kring = NMR(na, NR_TX)[i + priv->np_qfirst[NR_TX]]; + a->csb_atok = csb_atok_base + i; + a->csb_ktoa = csb_ktoa_base + i; + a->busy_wait = busy_wait; + a->direct = direct_tx; + } + for (i = 0; i < num_rx_rings; i++) { + struct sync_kloop_ring_args *a = args + num_tx_rings + i; + + a->kring = NMR(na, NR_RX)[i + priv->np_qfirst[NR_RX]]; + a->csb_atok = csb_atok_base + num_tx_rings + i; + a->csb_ktoa = csb_ktoa_base + num_tx_rings + i; + a->busy_wait = busy_wait; + a->direct = direct_rx; + } + /* Validate notification options. */ + opt = nmreq_findoption((struct nmreq_option *)(uintptr_t)hdr->nr_options, + NETMAP_REQ_OPT_SYNC_KLOOP_MODE); + if (opt != NULL) { + struct nmreq_opt_sync_kloop_mode *mode_opt = + (struct nmreq_opt_sync_kloop_mode *)opt; + + direct_tx = !!(mode_opt->mode & NM_OPT_SYNC_KLOOP_DIRECT_TX); + direct_rx = !!(mode_opt->mode & NM_OPT_SYNC_KLOOP_DIRECT_RX); + if (mode_opt->mode & ~(NM_OPT_SYNC_KLOOP_DIRECT_TX | + NM_OPT_SYNC_KLOOP_DIRECT_RX)) { + opt->nro_status = err = EINVAL; + goto out; + } + opt->nro_status = 0; + } opt = nmreq_findoption((struct nmreq_option *)(uintptr_t)hdr->nr_options, NETMAP_REQ_OPT_SYNC_KLOOP_EVENTFDS); if (opt != NULL) { @@ -524,54 +682,132 @@ netmap_sync_kloop(struct netmap_priv_d *priv, struct nmreq_header *hdr) #ifdef SYNC_KLOOP_POLL eventfds_opt = (struct nmreq_opt_sync_kloop_eventfds *)opt; opt->nro_status = 0; + + /* Check if some ioeventfd entry is not defined, and force sleep + * synchronization in that case. */ + busy_wait = false; + for (i = 0; i < num_rings; i++) { + if (eventfds_opt->eventfds[i].ioeventfd < 0) { + busy_wait = true; + break; + } + } + + if (busy_wait && (direct_tx || direct_rx)) { + /* For direct processing we need all the + * ioeventfds to be valid. */ + opt->nro_status = err = EINVAL; + goto out; + } + /* We need 2 poll entries for TX and RX notifications coming * from the netmap adapter, plus one entries per ring for the * notifications coming from the application. */ poll_ctx = nm_os_malloc(sizeof(*poll_ctx) + - (2 + num_rings) * sizeof(poll_ctx->entries[0])); + (num_rings + 2) * sizeof(poll_ctx->entries[0])); init_poll_funcptr(&poll_ctx->wait_table, sync_kloop_poll_table_queue_proc); poll_ctx->num_entries = 2 + num_rings; + poll_ctx->num_tx_rings = num_tx_rings; + poll_ctx->num_rings = num_rings; poll_ctx->next_entry = 0; + poll_ctx->next_wake_fun = NULL; + + if (direct_tx && (na->na_flags & NAF_BDG_MAYSLEEP)) { + /* In direct mode, VALE txsync is called from + * wake-up context, where it is not possible + * to sleep. + */ + na->na_flags &= ~NAF_BDG_MAYSLEEP; + na_could_sleep = true; + } + + for (i = 0; i < num_rings + 2; i++) { + poll_ctx->entries[i].args = args + i; + poll_ctx->entries[i].parent = poll_ctx; + } + /* Poll for notifications coming from the applications through - * eventfds . */ - for (i = 0; i < num_rings; i++) { - struct eventfd_ctx *irq; - struct file *filp; + * eventfds. */ + for (i = 0; i < num_rings; i++, poll_ctx->next_entry++) { + struct eventfd_ctx *irq = NULL; + struct file *filp = NULL; unsigned long mask; + bool tx_ring = (i < num_tx_rings); - filp = eventfd_fget(eventfds_opt->eventfds[i].ioeventfd); - if (IS_ERR(filp)) { - err = PTR_ERR(filp); - goto out; - } - mask = filp->f_op->poll(filp, &poll_ctx->wait_table); - if (mask & POLLERR) { - err = EINVAL; - goto out; - } - - filp = eventfd_fget(eventfds_opt->eventfds[i].irqfd); - if (IS_ERR(filp)) { - err = PTR_ERR(filp); - goto out; + if (eventfds_opt->eventfds[i].irqfd >= 0) { + filp = eventfd_fget( + eventfds_opt->eventfds[i].irqfd); + if (IS_ERR(filp)) { + err = PTR_ERR(filp); + goto out; + } + irq = eventfd_ctx_fileget(filp); + if (IS_ERR(irq)) { + err = PTR_ERR(irq); + goto out; + } } poll_ctx->entries[i].irq_filp = filp; - irq = eventfd_ctx_fileget(filp); - if (IS_ERR(irq)) { - err = PTR_ERR(irq); - goto out; - } poll_ctx->entries[i].irq_ctx = irq; + poll_ctx->entries[i].args->busy_wait = busy_wait; + /* Don't let netmap_sync_kloop_*x_ring() use + * IRQs in direct mode. */ + poll_ctx->entries[i].args->irq_ctx = + ((tx_ring && direct_tx) || + (!tx_ring && direct_rx)) ? NULL : + poll_ctx->entries[i].irq_ctx; + poll_ctx->entries[i].args->direct = + (tx_ring ? direct_tx : direct_rx); + + if (!busy_wait) { + filp = eventfd_fget( + eventfds_opt->eventfds[i].ioeventfd); + if (IS_ERR(filp)) { + err = PTR_ERR(filp); + goto out; + } + if (tx_ring && direct_tx) { + /* Override the wake up function + * so that it can directly call + * netmap_sync_kloop_tx_ring(). + */ + poll_ctx->next_wake_fun = + sync_kloop_tx_kick_wake_fun; + } else if (!tx_ring && direct_rx) { + /* Same for direct RX. */ + poll_ctx->next_wake_fun = + sync_kloop_rx_kick_wake_fun; + } else { + poll_ctx->next_wake_fun = NULL; + } + mask = filp->f_op->poll(filp, + &poll_ctx->wait_table); + if (mask & POLLERR) { + err = EINVAL; + goto out; + } + } } + /* Poll for notifications coming from the netmap rings bound to * this file descriptor. */ - { + if (!busy_wait) { NMG_LOCK(); + /* In direct mode, override the wake up function so + * that it can forward the netmap_tx_irq() to the + * guest. */ + poll_ctx->next_wake_fun = direct_tx ? + sync_kloop_tx_irq_wake_fun : NULL; poll_wait(priv->np_filp, priv->np_si[NR_TX], &poll_ctx->wait_table); + poll_ctx->next_entry++; + + poll_ctx->next_wake_fun = direct_rx ? + sync_kloop_rx_irq_wake_fun : NULL; poll_wait(priv->np_filp, priv->np_si[NR_RX], &poll_ctx->wait_table); + poll_ctx->next_entry++; NMG_UNLOCK(); } #else /* SYNC_KLOOP_POLL */ @@ -580,30 +816,9 @@ netmap_sync_kloop(struct netmap_priv_d *priv, struct nmreq_header *hdr) #endif /* SYNC_KLOOP_POLL */ } - /* Prepare the arguments for netmap_sync_kloop_tx_ring() - * and netmap_sync_kloop_rx_ring(). */ - for (i = 0; i < num_tx_rings; i++) { - struct sync_kloop_ring_args *a = args + i; - - a->kring = NMR(na, NR_TX)[i + priv->np_qfirst[NR_TX]]; - a->csb_atok = csb_atok_base + i; - a->csb_ktoa = csb_ktoa_base + i; -#ifdef SYNC_KLOOP_POLL - if (poll_ctx) - a->irq_ctx = poll_ctx->entries[i].irq_ctx; -#endif /* SYNC_KLOOP_POLL */ - } - for (i = 0; i < num_rx_rings; i++) { - struct sync_kloop_ring_args *a = args + num_tx_rings + i; - - a->kring = NMR(na, NR_RX)[i + priv->np_qfirst[NR_RX]]; - a->csb_atok = csb_atok_base + num_tx_rings + i; - a->csb_ktoa = csb_ktoa_base + num_tx_rings + i; -#ifdef SYNC_KLOOP_POLL - if (poll_ctx) - a->irq_ctx = poll_ctx->entries[num_tx_rings + i].irq_ctx; -#endif /* SYNC_KLOOP_POLL */ - } + nm_prinf("kloop busy_wait %u, direct_tx %u, direct_rx %u, " + "na_could_sleep %u", busy_wait, direct_tx, direct_rx, + na_could_sleep); /* Main loop. */ for (;;) { @@ -612,7 +827,7 @@ netmap_sync_kloop(struct netmap_priv_d *priv, struct nmreq_header *hdr) } #ifdef SYNC_KLOOP_POLL - if (poll_ctx) { + if (!busy_wait) { /* It is important to set the task state as * interruptible before processing any TX/RX ring, * so that if a notification on ring Y comes after @@ -627,46 +842,37 @@ netmap_sync_kloop(struct netmap_priv_d *priv, struct nmreq_header *hdr) #endif /* SYNC_KLOOP_POLL */ /* Process all the TX rings bound to this file descriptor. */ - for (i = 0; i < num_tx_rings; i++) { + for (i = 0; !direct_tx && i < num_tx_rings; i++) { struct sync_kloop_ring_args *a = args + i; - - if (unlikely(nm_kr_tryget(a->kring, 1, NULL))) { - continue; - } netmap_sync_kloop_tx_ring(a); - nm_kr_put(a->kring); } /* Process all the RX rings bound to this file descriptor. */ - for (i = 0; i < num_rx_rings; i++) { + for (i = 0; !direct_rx && i < num_rx_rings; i++) { struct sync_kloop_ring_args *a = args + num_tx_rings + i; - - if (unlikely(nm_kr_tryget(a->kring, 1, NULL))) { - continue; - } netmap_sync_kloop_rx_ring(a); - nm_kr_put(a->kring); } -#ifdef SYNC_KLOOP_POLL - if (poll_ctx) { - /* If a poll context is present, yield to the scheduler - * waiting for a notification to come either from - * netmap or the application. */ - schedule_timeout(msecs_to_jiffies(3000)); - } else -#endif /* SYNC_KLOOP_POLL */ - { + if (busy_wait) { /* Default synchronization method: sleep for a while. */ usleep_range(sleep_us, sleep_us); } +#ifdef SYNC_KLOOP_POLL + else { + /* Yield to the scheduler waiting for a notification + * to come either from netmap or the application. */ + schedule_timeout(msecs_to_jiffies(3000)); + } +#endif /* SYNC_KLOOP_POLL */ } out: #ifdef SYNC_KLOOP_POLL if (poll_ctx) { /* Stop polling from netmap and the eventfds, and deallocate * the poll context. */ - __set_current_state(TASK_RUNNING); + if (!busy_wait) { + __set_current_state(TASK_RUNNING); + } for (i = 0; i < poll_ctx->next_entry; i++) { struct sync_kloop_poll_entry *entry = poll_ctx->entries + i; @@ -696,6 +902,9 @@ netmap_sync_kloop(struct netmap_priv_d *priv, struct nmreq_header *hdr) /* Reset the kloop state. */ NMG_LOCK(); priv->np_kloop_state = 0; + if (na_could_sleep) { + na->na_flags |= NAF_BDG_MAYSLEEP; + } NMG_UNLOCK(); return err; @@ -770,14 +979,14 @@ netmap_pt_guest_txsync(struct nm_csb_atok *atok, struct nm_csb_ktoa *ktoa, atok->appl_need_kick = 0; /* - * First part: tell the host (updating the CSB) to process the new - * packets. + * First part: tell the host to process the new packets, + * updating the CSB. */ kring->nr_hwcur = ktoa->hwcur; nm_sync_kloop_appl_write(atok, kring->rcur, kring->rhead); /* Ask for a kick from a guest to the host if needed. */ - if (((kring->rhead != kring->nr_hwcur || nm_kr_txempty(kring)) + if (((kring->rhead != kring->nr_hwcur || nm_kr_wouldblock(kring)) && NM_ACCESS_ONCE(ktoa->kern_need_kick)) || (flags & NAF_FORCE_RECLAIM)) { atok->sync_flags = flags; @@ -787,7 +996,7 @@ netmap_pt_guest_txsync(struct nm_csb_atok *atok, struct nm_csb_ktoa *ktoa, /* * Second part: reclaim buffers for completed transmissions. */ - if (nm_kr_txempty(kring) || (flags & NAF_FORCE_RECLAIM)) { + if (nm_kr_wouldblock(kring) || (flags & NAF_FORCE_RECLAIM)) { nm_sync_kloop_appl_read(ktoa, &kring->nr_hwtail, &kring->nr_hwcur); } @@ -797,7 +1006,7 @@ netmap_pt_guest_txsync(struct nm_csb_atok *atok, struct nm_csb_ktoa *ktoa, * go to sleep and we need to be notified by the host when more free * space is available. */ - if (nm_kr_txempty(kring) && !(kring->nr_kflags & NKR_NOINTR)) { + if (nm_kr_wouldblock(kring) && !(kring->nr_kflags & NKR_NOINTR)) { /* Reenable notifications. */ atok->appl_need_kick = 1; /* Double check, with store-load memory barrier. */ @@ -805,7 +1014,7 @@ netmap_pt_guest_txsync(struct nm_csb_atok *atok, struct nm_csb_ktoa *ktoa, nm_sync_kloop_appl_read(ktoa, &kring->nr_hwtail, &kring->nr_hwcur); /* If there is new free space, disable notifications */ - if (unlikely(!nm_kr_txempty(kring))) { + if (unlikely(!nm_kr_wouldblock(kring))) { atok->appl_need_kick = 0; } } @@ -851,11 +1060,6 @@ netmap_pt_guest_rxsync(struct nm_csb_atok *atok, struct nm_csb_ktoa *ktoa, */ if (kring->rhead != kring->nr_hwcur) { nm_sync_kloop_appl_write(atok, kring->rcur, kring->rhead); - /* Ask for a kick from the guest to the host if needed. */ - if (NM_ACCESS_ONCE(ktoa->kern_need_kick)) { - atok->sync_flags = flags; - notify = true; - } } /* @@ -863,7 +1067,7 @@ netmap_pt_guest_rxsync(struct nm_csb_atok *atok, struct nm_csb_ktoa *ktoa, * we need to be notified by the host when more RX slots have been * completed. */ - if (nm_kr_rxempty(kring) && !(kring->nr_kflags & NKR_NOINTR)) { + if (nm_kr_wouldblock(kring) && !(kring->nr_kflags & NKR_NOINTR)) { /* Reenable notifications. */ atok->appl_need_kick = 1; /* Double check, with store-load memory barrier. */ @@ -871,11 +1075,18 @@ netmap_pt_guest_rxsync(struct nm_csb_atok *atok, struct nm_csb_ktoa *ktoa, nm_sync_kloop_appl_read(ktoa, &kring->nr_hwtail, &kring->nr_hwcur); /* If there are new slots, disable notifications. */ - if (!nm_kr_rxempty(kring)) { + if (!nm_kr_wouldblock(kring)) { atok->appl_need_kick = 0; } } + /* Ask for a kick from the guest to the host if needed. */ + if ((kring->rhead != kring->nr_hwcur || nm_kr_wouldblock(kring)) + && NM_ACCESS_ONCE(ktoa->kern_need_kick)) { + atok->sync_flags = flags; + notify = true; + } + nm_prdis(1, "%s CSB(head:%u cur:%u hwtail:%u) KRING(head:%u cur:%u tail:%u)", kring->name, atok->head, atok->cur, ktoa->hwtail, kring->rhead, kring->rcur, kring->nr_hwtail); diff --git a/sys/net/netmap.h b/sys/net/netmap.h index 098d369b07d6..bb38c748f840 100644 --- a/sys/net/netmap.h +++ b/sys/net/netmap.h @@ -540,7 +540,8 @@ enum { enum { /* On NETMAP_REQ_REGISTER, ask netmap to use memory allocated - * from user-space allocated memory pools (e.g. hugepages). */ + * from user-space allocated memory pools (e.g. hugepages). + */ NETMAP_REQ_OPT_EXTMEM = 1, /* ON NETMAP_REQ_SYNC_KLOOP_START, ask netmap to use eventfd-based @@ -551,8 +552,15 @@ enum { /* On NETMAP_REQ_REGISTER, ask netmap to work in CSB mode, where * head, cur and tail pointers are not exchanged through the * struct netmap_ring header, but rather using an user-provided - * memory area (see struct nm_csb_atok and struct nm_csb_ktoa). */ + * memory area (see struct nm_csb_atok and struct nm_csb_ktoa). + */ NETMAP_REQ_OPT_CSB, + + /* An extension to NETMAP_REQ_OPT_SYNC_KLOOP_EVENTFDS, which specifies + * if the TX and/or RX rings are synced in the context of the VM exit. + * This requires the 'ioeventfd' fields to be valid (cannot be < 0). + */ + NETMAP_REQ_OPT_SYNC_KLOOP_MODE, }; /* @@ -877,6 +885,12 @@ struct nmreq_opt_sync_kloop_eventfds { * their order must agree with the CSB arrays passed in the * NETMAP_REQ_OPT_CSB option. Each entry contains a file descriptor * backed by an eventfd. + * + * If any of the 'ioeventfd' entries is < 0, the event loop uses + * the sleeping synchronization strategy (according to sleep_us), + * and keeps kern_need_kick always disabled. + * Each 'irqfd' can be < 0, and in that case the corresponding queue + * is never notified. */ struct { /* Notifier for the application --> kernel loop direction. */ @@ -886,6 +900,13 @@ struct nmreq_opt_sync_kloop_eventfds { } eventfds[0]; }; +struct nmreq_opt_sync_kloop_mode { + struct nmreq_option nro_opt; /* common header */ +#define NM_OPT_SYNC_KLOOP_DIRECT_TX (1 << 0) +#define NM_OPT_SYNC_KLOOP_DIRECT_RX (1 << 1) + uint32_t mode; +}; + struct nmreq_opt_extmem { struct nmreq_option nro_opt; /* common header */ uint64_t nro_usrptr; /* (in) ptr to usr memory */ From 22cde055c2c89961c9ad26dea91ac4f788e1881b Mon Sep 17 00:00:00 2001 From: Andriy Voskoboinyk Date: Sun, 3 Feb 2019 00:18:29 +0000 Subject: [PATCH 78/90] ifconfig(8): interpret VHT rates correctly for 'list roam / txparam' options They are represented via MCS rate index, not as a 'speed in MBps' * 2. MFC after: 5 days --- sbin/ifconfig/ifieee80211.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/sbin/ifconfig/ifieee80211.c b/sbin/ifconfig/ifieee80211.c index ce1950e65b89..c6ca6c05b3c3 100644 --- a/sbin/ifconfig/ifieee80211.c +++ b/sbin/ifconfig/ifieee80211.c @@ -4301,7 +4301,10 @@ list_roam(int s) rp = &roamparams.params[mode]; if (rp->rssi == 0 && rp->rate == 0) continue; - if (mode == IEEE80211_MODE_11NA || mode == IEEE80211_MODE_11NG) { + if (mode == IEEE80211_MODE_11NA || + mode == IEEE80211_MODE_11NG || + mode == IEEE80211_MODE_VHT_2GHZ || + mode == IEEE80211_MODE_VHT_5GHZ) { if (rp->rssi & 1) LINE_CHECK("roam:%-7.7s rssi %2u.5dBm MCS %2u ", modename[mode], rp->rssi/2, @@ -4332,7 +4335,10 @@ list_txparams(int s) tp = &txparams.params[mode]; if (tp->mgmtrate == 0 && tp->mcastrate == 0) continue; - if (mode == IEEE80211_MODE_11NA || mode == IEEE80211_MODE_11NG) { + if (mode == IEEE80211_MODE_11NA || + mode == IEEE80211_MODE_11NG || + mode == IEEE80211_MODE_VHT_2GHZ || + mode == IEEE80211_MODE_VHT_5GHZ) { if (tp->ucastrate == IEEE80211_FIXED_RATE_NONE) LINE_CHECK("%-7.7s ucast NONE mgmt %2u MCS " "mcast %2u MCS maxretry %u", From 511e2766f189af55be9ea4a175d1a5898bed1a62 Mon Sep 17 00:00:00 2001 From: Andriy Voskoboinyk Date: Sun, 3 Feb 2019 01:32:02 +0000 Subject: [PATCH 79/90] net80211(4): do not setup roaming parameters for unsupported modes. ifconfig(8) prints per-mode parameters if they are non-zero; since we have 13 possible modes with 3...5 typically supported this change should greatly reduce amount of information for 'ifconfig list roam' command. While here ensure that sta_roam_check() will not use roaming parameters for unsupported modes (it should not). This change effectively reverts r188776. MFC after: 2 weeks --- sys/net80211/ieee80211_scan.c | 10 +++++++++- sys/net80211/ieee80211_scan_sta.c | 3 +++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/sys/net80211/ieee80211_scan.c b/sys/net80211/ieee80211_scan.c index 97186ccff755..ee05d89efb5e 100644 --- a/sys/net80211/ieee80211_scan.c +++ b/sys/net80211/ieee80211_scan.c @@ -130,13 +130,21 @@ void ieee80211_scan_vattach(struct ieee80211vap *vap) { struct ieee80211com *ic = vap->iv_ic; + int m; vap->iv_bgscanidle = (IEEE80211_BGSCAN_IDLE_DEFAULT*1000)/hz; vap->iv_bgscanintvl = IEEE80211_BGSCAN_INTVAL_DEFAULT*hz; vap->iv_scanvalid = IEEE80211_SCAN_VALID_DEFAULT*hz; vap->iv_roaming = IEEE80211_ROAMING_AUTO; - memcpy(vap->iv_roamparms, defroam, sizeof(defroam)); + + memset(vap->iv_roamparms, 0, sizeof(vap->iv_roamparms)); + for (m = IEEE80211_MODE_AUTO + 1; m < IEEE80211_MODE_MAX; m++) { + if (isclr(ic->ic_modecaps, m)) + continue; + + memcpy(&vap->iv_roamparms[m], &defroam[m], sizeof(defroam[m])); + } ic->ic_scan_methods->sc_vattach(vap); } diff --git a/sys/net80211/ieee80211_scan_sta.c b/sys/net80211/ieee80211_scan_sta.c index 1cada2927591..901a5a5e8b4a 100644 --- a/sys/net80211/ieee80211_scan_sta.c +++ b/sys/net80211/ieee80211_scan_sta.c @@ -1354,6 +1354,9 @@ sta_roam_check(struct ieee80211_scan_state *ss, struct ieee80211vap *vap) mode = ieee80211_chan2mode(ic->ic_bsschan); roamRate = vap->iv_roamparms[mode].rate; roamRssi = vap->iv_roamparms[mode].rssi; + KASSERT(roamRate != 0 && roamRssi != 0, ("iv_roamparms are not" + "initialized for %s mode!", ieee80211_phymode_name[mode])); + ucastRate = vap->iv_txparms[mode].ucastrate; /* NB: the most up to date rssi is in the node, not the scan cache */ curRssi = ic->ic_node_getrssi(ni); From 2ce6d2b58cbd80fd2dcf7826664c41ae13ee862f Mon Sep 17 00:00:00 2001 From: Andriy Voskoboinyk Date: Sun, 3 Feb 2019 02:32:13 +0000 Subject: [PATCH 80/90] net80211(4): fix rate check when 'roaming' ifconfig(8) option is set to 'auto' Do not try to clear 'basic rate' bit from roamRate; it cannot be here and, actually, this operation clears 'MCS rate' bit instead, breaking comparison for 11n / 11ac modes. Tested with RTL8188CUS, HOSTAP mode + RTL8821AU, STA mode. MFC after: 3 days --- sys/net80211/ieee80211_scan_sta.c | 1 - 1 file changed, 1 deletion(-) diff --git a/sys/net80211/ieee80211_scan_sta.c b/sys/net80211/ieee80211_scan_sta.c index 901a5a5e8b4a..ecfdcd03c7c4 100644 --- a/sys/net80211/ieee80211_scan_sta.c +++ b/sys/net80211/ieee80211_scan_sta.c @@ -1362,7 +1362,6 @@ sta_roam_check(struct ieee80211_scan_state *ss, struct ieee80211vap *vap) curRssi = ic->ic_node_getrssi(ni); if (ucastRate == IEEE80211_FIXED_RATE_NONE) { curRate = ni->ni_txrate; - roamRate &= IEEE80211_RATE_VAL; IEEE80211_DPRINTF(vap, IEEE80211_MSG_ROAM, "%s: currssi %d currate %u roamrssi %d roamrate %u\n", __func__, curRssi, curRate, roamRssi, roamRate); From 35a5128d5042cf9eb6a1b17d368db2b6511ae965 Mon Sep 17 00:00:00 2001 From: Andriy Voskoboinyk Date: Sun, 3 Feb 2019 03:02:59 +0000 Subject: [PATCH 81/90] ifconfig(8): display management / multicast wlan(4) rates properly For 11n / 11ac we are still using non-11n rates for management and multicast traffic by default; check 'MCS rate' bit to determine how to print them correctly. PR: 161035 MFC after: 1 week --- sbin/ifconfig/ifieee80211.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/sbin/ifconfig/ifieee80211.c b/sbin/ifconfig/ifieee80211.c index c6ca6c05b3c3..e8da16b92743 100644 --- a/sbin/ifconfig/ifieee80211.c +++ b/sbin/ifconfig/ifieee80211.c @@ -4324,6 +4324,13 @@ list_roam(int s) } } +/* XXX TODO: rate-to-string method... */ +static const char* +get_mcs_mbs_rate_str(uint8_t rate) +{ + return (rate & IEEE80211_RATE_MCS) ? "MCS " : "Mb/s"; +} + static void list_txparams(int s) { @@ -4340,19 +4347,23 @@ list_txparams(int s) mode == IEEE80211_MODE_VHT_2GHZ || mode == IEEE80211_MODE_VHT_5GHZ) { if (tp->ucastrate == IEEE80211_FIXED_RATE_NONE) - LINE_CHECK("%-7.7s ucast NONE mgmt %2u MCS " - "mcast %2u MCS maxretry %u", + LINE_CHECK("%-7.7s ucast NONE mgmt %2u %s " + "mcast %2u %s maxretry %u", modename[mode], tp->mgmtrate &~ IEEE80211_RATE_MCS, + get_mcs_mbs_rate_str(tp->mgmtrate), tp->mcastrate &~ IEEE80211_RATE_MCS, + get_mcs_mbs_rate_str(tp->mcastrate), tp->maxretry); else - LINE_CHECK("%-7.7s ucast %2u MCS mgmt %2u MCS " - "mcast %2u MCS maxretry %u", + LINE_CHECK("%-7.7s ucast %2u MCS mgmt %2u %s " + "mcast %2u %s maxretry %u", modename[mode], tp->ucastrate &~ IEEE80211_RATE_MCS, tp->mgmtrate &~ IEEE80211_RATE_MCS, + get_mcs_mbs_rate_str(tp->mgmtrate), tp->mcastrate &~ IEEE80211_RATE_MCS, + get_mcs_mbs_rate_str(tp->mcastrate), tp->maxretry); } else { if (tp->ucastrate == IEEE80211_FIXED_RATE_NONE) From 1c4cb65153a3ac4c8b2af4638a84192ff8c8aca0 Mon Sep 17 00:00:00 2001 From: Andriy Voskoboinyk Date: Sun, 3 Feb 2019 04:31:50 +0000 Subject: [PATCH 82/90] net80211(4): do not setup Tx parameters for unsupported modes. That should shorten 'ifconfig list txparam' output since unsupported modes will not be shown. Checked with RTL8188EE, STA mode. MFC after: 2 weeks --- sys/net80211/ieee80211_proto.c | 3 +++ sys/net80211/ieee80211_tdma.c | 3 +++ 2 files changed, 6 insertions(+) diff --git a/sys/net80211/ieee80211_proto.c b/sys/net80211/ieee80211_proto.c index c4a6c4ccac71..24f78e88c070 100644 --- a/sys/net80211/ieee80211_proto.c +++ b/sys/net80211/ieee80211_proto.c @@ -347,6 +347,9 @@ ieee80211_proto_vattach(struct ieee80211vap *vap) * driver and/or user applications. */ for (i = IEEE80211_MODE_11A; i < IEEE80211_MODE_MAX; i++) { + if (isclr(ic->ic_modecaps, i)) + continue; + const struct ieee80211_rateset *rs = &ic->ic_sup_rates[i]; vap->iv_txparms[i].ucastrate = IEEE80211_FIXED_RATE_NONE; diff --git a/sys/net80211/ieee80211_tdma.c b/sys/net80211/ieee80211_tdma.c index 6ea433ce54ca..361273a53664 100644 --- a/sys/net80211/ieee80211_tdma.c +++ b/sys/net80211/ieee80211_tdma.c @@ -127,6 +127,9 @@ static int tdma_process_params(struct ieee80211_node *ni, static void settxparms(struct ieee80211vap *vap, enum ieee80211_phymode mode, int rate) { + if (isclr(vap->iv_ic->ic_modecaps, mode)) + return; + vap->iv_txparms[mode].ucastrate = rate; vap->iv_txparms[mode].mcastrate = rate; } From b90dad3524185ae3e0d696da3509c31e4d9b5a93 Mon Sep 17 00:00:00 2001 From: Andriy Voskoboinyk Date: Sun, 3 Feb 2019 04:41:00 +0000 Subject: [PATCH 83/90] ifconfig(8): actually, non-11n rates should be divided by 2... MFC after: 1 week MFC with: 343698 --- sbin/ifconfig/ifieee80211.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/sbin/ifconfig/ifieee80211.c b/sbin/ifconfig/ifieee80211.c index e8da16b92743..c0649e33e783 100644 --- a/sbin/ifconfig/ifieee80211.c +++ b/sbin/ifconfig/ifieee80211.c @@ -4331,6 +4331,14 @@ get_mcs_mbs_rate_str(uint8_t rate) return (rate & IEEE80211_RATE_MCS) ? "MCS " : "Mb/s"; } +static uint8_t +get_rate_value(uint8_t rate) +{ + if (rate & IEEE80211_RATE_MCS) + return (rate &~ IEEE80211_RATE_MCS); + return (rate / 2); +} + static void list_txparams(int s) { @@ -4350,9 +4358,9 @@ list_txparams(int s) LINE_CHECK("%-7.7s ucast NONE mgmt %2u %s " "mcast %2u %s maxretry %u", modename[mode], - tp->mgmtrate &~ IEEE80211_RATE_MCS, + get_rate_value(tp->mgmtrate), get_mcs_mbs_rate_str(tp->mgmtrate), - tp->mcastrate &~ IEEE80211_RATE_MCS, + get_rate_value(tp->mcastrate), get_mcs_mbs_rate_str(tp->mcastrate), tp->maxretry); else @@ -4360,9 +4368,9 @@ list_txparams(int s) "mcast %2u %s maxretry %u", modename[mode], tp->ucastrate &~ IEEE80211_RATE_MCS, - tp->mgmtrate &~ IEEE80211_RATE_MCS, + get_rate_value(tp->mgmtrate), get_mcs_mbs_rate_str(tp->mgmtrate), - tp->mcastrate &~ IEEE80211_RATE_MCS, + get_rate_value(tp->mcastrate), get_mcs_mbs_rate_str(tp->mcastrate), tp->maxretry); } else { From 0fcd8cab4e32a4899dc1c4a440ad29ba308e656d Mon Sep 17 00:00:00 2001 From: Cy Schubert Date: Sun, 3 Feb 2019 05:25:49 +0000 Subject: [PATCH 84/90] ipfilter #ifdef cleanup. Remove #ifdefs for ancient and irrelevant operating systems from ipfilter. When ipfilter was written the UNIX and UNIX-like systems in use were diverse and plentiful. IRIX, Tru64 (OSF/1) don't exist any more. OpenBSD removed ipfilter shortly after the first time the ipfilter license terms changed in the early 2000's. ipfilter on AIX, HP/UX, and Linux never really caught on. Removal of code for operating systems that ipfilter will never run on again will simplify the code making it easier to fix bugs, complete partially implemented features, and extend ipfilter. Unsupported previous version FreeBSD code and some older NetBSD code has also been removed. What remains is supported FreeBSD, NetBSD, and illumos. FreeBSD and NetBSD have collaborated exchanging patches, while illumos has expressed willingness to have their ipfilter updated to 5.1.2, provided their zone-specific updates to their ipfilter are merged (which are of interest to FreeBSD to allow control of ipfilters in jails from the global zone). Reviewed by: glebius@ MFC after: 1 month Differential Revision: https://reviews.freebsd.org/D19006 --- contrib/ipfilter/arc4random.c | 12 +- contrib/ipfilter/ip_dstlist.c | 14 +- contrib/ipfilter/ip_fil.c | 83 ++-------- contrib/ipfilter/ip_fil_compat.c | 47 +----- contrib/ipfilter/ipf.h | 20 +-- contrib/ipfilter/ipsend/.OLD/ip_compat.h | 118 -------------- contrib/ipfilter/ipsend/44arp.c | 2 - contrib/ipfilter/ipsend/arp.c | 2 - contrib/ipfilter/ipsend/dlcommon.c | 4 - contrib/ipfilter/ipsend/ip.c | 2 - contrib/ipfilter/ipsend/ipresend.c | 10 -- contrib/ipfilter/ipsend/ipsend.c | 16 -- contrib/ipfilter/ipsend/ipsend.h | 7 - contrib/ipfilter/ipsend/ipsopt.c | 2 - contrib/ipfilter/ipsend/iptest.c | 13 -- contrib/ipfilter/ipsend/iptests.c | 22 --- contrib/ipfilter/ipsend/resend.c | 2 - contrib/ipfilter/ipsend/sdlpi.c | 7 - contrib/ipfilter/ipsend/sock.c | 4 - contrib/ipfilter/lib/getifname.c | 3 - contrib/ipfilter/lib/getproto.c | 8 - contrib/ipfilter/lib/inet_addr.c | 2 - contrib/ipfilter/lib/kmem.c | 79 ---------- contrib/ipfilter/lib/printproto.c | 13 -- contrib/ipfilter/md5.c | 7 +- contrib/ipfilter/tools/ipf.c | 9 -- contrib/ipfilter/tools/ipfs.c | 9 -- contrib/ipfilter/tools/ipfstat.c | 45 +----- contrib/ipfilter/tools/ipftest.c | 158 ------------------- contrib/ipfilter/tools/ipmon.c | 43 +---- contrib/ipfilter/tools/ipnat.c | 16 +- contrib/ipfilter/tools/ipnat_y.y | 9 -- contrib/ipfilter/tools/ippool.c | 6 - contrib/ipfilter/tools/ippool_y.y | 2 - sys/contrib/ipfilter/netinet/fil.c | 64 +------- sys/contrib/ipfilter/netinet/ip_auth.c | 52 ++---- sys/contrib/ipfilter/netinet/ip_compat.h | 35 +--- sys/contrib/ipfilter/netinet/ip_dstlist.c | 14 +- sys/contrib/ipfilter/netinet/ip_fil.h | 82 +--------- sys/contrib/ipfilter/netinet/ip_frag.c | 15 +- sys/contrib/ipfilter/netinet/ip_ftp_pxy.c | 8 - sys/contrib/ipfilter/netinet/ip_htable.c | 8 +- sys/contrib/ipfilter/netinet/ip_irc_pxy.c | 6 +- sys/contrib/ipfilter/netinet/ip_log.c | 76 ++------- sys/contrib/ipfilter/netinet/ip_lookup.c | 10 +- sys/contrib/ipfilter/netinet/ip_nat.c | 61 ++----- sys/contrib/ipfilter/netinet/ip_nat.h | 5 +- sys/contrib/ipfilter/netinet/ip_nat6.c | 20 +-- sys/contrib/ipfilter/netinet/ip_pool.c | 10 +- sys/contrib/ipfilter/netinet/ip_proxy.c | 27 +--- sys/contrib/ipfilter/netinet/ip_raudio_pxy.c | 8 - sys/contrib/ipfilter/netinet/ip_scan.c | 10 +- sys/contrib/ipfilter/netinet/ip_state.c | 15 +- sys/contrib/ipfilter/netinet/ip_sync.c | 54 ++----- 54 files changed, 126 insertions(+), 1250 deletions(-) diff --git a/contrib/ipfilter/arc4random.c b/contrib/ipfilter/arc4random.c index 499428702a81..bdb6b2d9ba9a 100644 --- a/contrib/ipfilter/arc4random.c +++ b/contrib/ipfilter/arc4random.c @@ -7,7 +7,7 @@ * * Dan Moschuk */ -#if !defined(SOLARIS2) && !defined(__osf__) +#if !defined(SOLARIS2) # include #endif @@ -16,26 +16,16 @@ #ifdef __FreeBSD__ # include #endif -#if !defined(__osf__) # include -#endif #ifdef __FreeBSD__ # include #endif #include -#ifndef __osf__ # include -#endif #include -#if defined(SOLARIS2) && (SOLARIS2 < 9) -# include -#endif #include #include -#ifdef __osf__ -# include -#endif #include #include #include "netinet/ip_compat.h" diff --git a/contrib/ipfilter/ip_dstlist.c b/contrib/ipfilter/ip_dstlist.c index ce2e72e8130f..99c7a22668df 100644 --- a/contrib/ipfilter/ip_dstlist.c +++ b/contrib/ipfilter/ip_dstlist.c @@ -9,9 +9,6 @@ # define KERNEL 1 # define _KERNEL 1 #endif -#if defined(__osf__) -# define _PROTO_NET_H_ -#endif #include #include #include @@ -21,9 +18,6 @@ # include # include # define _KERNEL -# ifdef __OpenBSD__ -struct file; -# endif # include # undef _KERNEL #else @@ -33,14 +27,12 @@ struct file; # endif #endif #include -#if !defined(linux) # include -#endif #include -#if defined(_KERNEL) && (!defined(__SVR4) && !defined(__svr4__)) +#if defined(_KERNEL) && !defined(__SVR4) # include #endif -#if defined(__SVR4) || defined(__svr4__) +#if defined(__SVR4) # include # include # ifdef _KERNEL @@ -49,7 +41,7 @@ struct file; # include # include #endif -#if defined(__FreeBSD_version) && (__FreeBSD_version >= 300000) +#if defined(__FreeBSD_version) # include #endif diff --git a/contrib/ipfilter/ip_fil.c b/contrib/ipfilter/ip_fil.c index 32cba4cdd372..794d7e205bb3 100644 --- a/contrib/ipfilter/ip_fil.c +++ b/contrib/ipfilter/ip_fil.c @@ -25,24 +25,10 @@ struct rtentry; static void ipf_setifpaddr __P((struct ifnet *, char *)); void init_ifp __P((void)); -#if defined(__sgi) && (IRIX < 60500) -static int no_output __P((struct ifnet *, struct mbuf *, - struct sockaddr *)); -static int write_output __P((struct ifnet *, struct mbuf *, - struct sockaddr *)); -#else -# if TRU64 >= 1885 -static int no_output __P((struct ifnet *, struct mbuf *, - struct sockaddr *, struct rtentry *, char *)); -static int write_output __P((struct ifnet *, struct mbuf *, - struct sockaddr *, struct rtentry *, char *)); -# else static int no_output __P((struct ifnet *, struct mbuf *, struct sockaddr *, struct rtentry *)); static int write_output __P((struct ifnet *, struct mbuf *, struct sockaddr *, struct rtentry *)); -# endif -#endif struct ifaddr { struct sockaddr_storage ifa_addr; @@ -123,17 +109,8 @@ ipf_forgetifp(softc, ifp) static int -#if defined(__sgi) && (IRIX < 60500) -no_output(ifp, m, s) -#else -# if TRU64 >= 1885 -no_output (ifp, m, s, rt, cp) - char *cp; -# else no_output(ifp, m, s, rt) -# endif struct rtentry *rt; -#endif struct ifnet *ifp; struct mbuf *m; struct sockaddr *s; @@ -143,17 +120,8 @@ no_output(ifp, m, s, rt) static int -#if defined(__sgi) && (IRIX < 60500) -write_output(ifp, m, s) -#else -# if TRU64 >= 1885 -write_output (ifp, m, s, rt, cp) - char *cp; -# else write_output(ifp, m, s, rt) -# endif struct rtentry *rt; -#endif struct ifnet *ifp; struct mbuf *m; struct sockaddr *s; @@ -167,8 +135,7 @@ write_output(ifp, m, s, rt) ip = MTOD(mb, ip_t *); #if (defined(NetBSD) && (NetBSD <= 1991011) && (NetBSD >= 199606)) || \ - (defined(OpenBSD) && (OpenBSD >= 199603)) || defined(linux) || \ - (defined(__FreeBSD__) && (__FreeBSD_version >= 501113)) + defined(__FreeBSD__) sprintf(fname, "/tmp/%s", ifp->if_xname); #else sprintf(fname, "/tmp/%s%d", ifp->if_name, ifp->if_unit); @@ -189,42 +156,26 @@ ipf_setifpaddr(ifp, addr) struct ifnet *ifp; char *addr; { -#ifdef __sgi - struct in_ifaddr *ifa; -#else struct ifaddr *ifa; -#endif -#if defined(__NetBSD__) || defined(__OpenBSD__) || defined(__FreeBSD__) +#if defined(__NetBSD__) || defined(__FreeBSD__) if (ifp->if_addrlist.tqh_first != NULL) #else -# ifdef __sgi - if (ifp->in_ifaddr != NULL) -# else if (ifp->if_addrlist != NULL) -# endif #endif return; ifa = (struct ifaddr *)malloc(sizeof(*ifa)); -#if defined(__NetBSD__) || defined(__OpenBSD__) || defined(__FreeBSD__) +#if defined(__NetBSD__) || defined(__FreeBSD__) ifp->if_addrlist.tqh_first = ifa; #else -# ifdef __sgi - ifp->in_ifaddr = ifa; -# else ifp->if_addrlist = ifa; -# endif #endif if (ifa != NULL) { struct sockaddr_in *sin; -#ifdef __sgi - sin = (struct sockaddr_in *)&ifa->ia_addr; -#else sin = (struct sockaddr_in *)&ifa->ifa_addr; -#endif #ifdef USE_INET6 if (index(addr, ':') != NULL) { struct sockaddr_in6 *sin6; @@ -263,8 +214,7 @@ get_unit(name, family) struct ifnet *ifp, **ifpp, **old_ifneta; char *addr; #if (defined(NetBSD) && (NetBSD <= 1991011) && (NetBSD >= 199606)) || \ - (defined(OpenBSD) && (OpenBSD >= 199603)) || defined(linux) || \ - (defined(__FreeBSD__) && (__FreeBSD_version >= 501113)) + defined(__FreeBSD__) if (!*name) return NULL; @@ -333,12 +283,11 @@ get_unit(name, family) } ifp = ifneta[nifs - 1]; -#if defined(__NetBSD__) || defined(__OpenBSD__) || defined(__FreeBSD__) +#if defined(__NetBSD__) || defined(__FreeBSD__) TAILQ_INIT(&ifp->if_addrlist); #endif #if (defined(NetBSD) && (NetBSD <= 1991011) && (NetBSD >= 199606)) || \ - (defined(OpenBSD) && (OpenBSD >= 199603)) || defined(linux) || \ - (defined(__FreeBSD__) && (__FreeBSD_version >= 501113)) + defined(__FreeBSD__) (void) strncpy(ifp->if_xname, name, sizeof(ifp->if_xname)); #else s = name + strlen(name) - 1; @@ -375,8 +324,7 @@ get_ifname(ifp) { static char ifname[LIFNAMSIZ]; -#if defined(__OpenBSD__) || defined(__NetBSD__) || defined(linux) || \ - (defined(__FreeBSD__) && (__FreeBSD_version >= 501113)) +#if defined(__NetBSD__) || defined(__FreeBSD__) sprintf(ifname, "%s", ifp->if_xname); #else if (ifp->if_unit != -1) @@ -397,8 +345,7 @@ init_ifp() int fd; #if (defined(NetBSD) && (NetBSD <= 1991011) && (NetBSD >= 199606)) || \ - (defined(OpenBSD) && (OpenBSD >= 199603)) || defined(linux) || \ - (defined(__FreeBSD__) && (__FreeBSD_version >= 501113)) + defined(__FreeBSD__) for (ifpp = ifneta; ifpp && (ifp = *ifpp); ifpp++) { ifp->if_output = (void *)write_output; sprintf(fname, "/tmp/%s", ifp->if_xname); @@ -717,20 +664,12 @@ ipf_ifpaddr(softc, v, atype, ifptr, inp, inpmask) i6addr_t *inp, *inpmask; { struct ifnet *ifp = ifptr; -#ifdef __sgi - struct in_ifaddr *ifa; -#else struct ifaddr *ifa; -#endif -#if defined(__NetBSD__) || defined(__OpenBSD__) || defined(__FreeBSD__) +#if defined(__NetBSD__) || defined(__FreeBSD__) ifa = ifp->if_addrlist.tqh_first; #else -# ifdef __sgi - ifa = (struct in_ifaddr *)ifp->in_ifaddr; -# else ifa = ifp->if_addrlist; -# endif #endif if (ifa != NULL) { if (v == 4) { @@ -738,11 +677,7 @@ ipf_ifpaddr(softc, v, atype, ifptr, inp, inpmask) mask.sin_addr.s_addr = 0xffffffff; -#ifdef __sgi - sin = (struct sockaddr_in *)&ifa->ia_addr; -#else sin = (struct sockaddr_in *)&ifa->ifa_addr; -#endif return ipf_ifpfillv4addr(atype, sin, &mask, &inp->in4, &inpmask->in4); diff --git a/contrib/ipfilter/ip_fil_compat.c b/contrib/ipfilter/ip_fil_compat.c index d0b356f76904..271c2e065738 100644 --- a/contrib/ipfilter/ip_fil_compat.c +++ b/contrib/ipfilter/ip_fil_compat.c @@ -9,15 +9,12 @@ # define KERNEL 1 # define _KERNEL 1 #endif -#if defined(__osf__) -# define _PROTO_NET_H_ -#endif #include #include #include #include #include -#if __FreeBSD_version >= 220000 && defined(_KERNEL) +#if defined(__FreeBSD_version) && defined(_KERNEL) # include # include #else @@ -26,17 +23,10 @@ #if !defined(_KERNEL) # include # define _KERNEL -# ifdef __OpenBSD__ -struct file; -# endif # include # undef _KERNEL #endif #include -#if (defined(__osf__) || defined(AIX) || defined(__hpux) || defined(__sgi)) && defined(_KERNEL) -# include "radix_ipf_local.h" -# define _RADIX_H_ -#endif #include #if defined(__FreeBSD__) # include @@ -44,7 +34,7 @@ struct file; #endif #if defined(_KERNEL) # include -# if !defined(__SVR4) && !defined(__svr4__) +# if !defined(__SVR4) # include # endif #endif @@ -348,9 +338,6 @@ typedef struct fr_info_4_1_32 { void *fin_qpi; char fin_ifname[LIFNAMSIZ]; #endif -#ifdef __sgi - void *fin_hbuf; -#endif } fr_info_4_1_32_t; typedef struct fr_info_4_1_24 { @@ -389,9 +376,6 @@ typedef struct fr_info_4_1_24 { void *fin_qpi; char fin_ifname[LIFNAMSIZ]; #endif -#ifdef __sgi - void *fin_hbuf; -#endif } fr_info_4_1_24_t; typedef struct fr_info_4_1_23 { @@ -429,9 +413,6 @@ typedef struct fr_info_4_1_23 { void *fin_qpi; char fin_ifname[LIFNAMSIZ]; #endif -#ifdef __sgi - void *fin_hbuf; -#endif } fr_info_4_1_23_t; typedef struct fr_info_4_1_11 { @@ -468,9 +449,6 @@ typedef struct fr_info_4_1_11 { void *fin_qpi; char fin_ifname[LIFNAMSIZ]; #endif -#ifdef __sgi - void *fin_hbuf; -#endif } fr_info_4_1_11_t; /* ------------------------------------------------------------------------ */ @@ -2678,9 +2656,6 @@ fr_info_4_1_32_to_current(old, current) fin->fin_qfm = old->fin_qfm; fin->fin_qpi = old->fin_qpi; #endif -#ifdef __sgi - fin->fin_hbuf = old->fin_hbuf; -#endif } @@ -2719,9 +2694,6 @@ fr_info_4_1_24_to_current(old, current) fin->fin_qfm = old->fin_qfm; fin->fin_qpi = old->fin_qpi; #endif -#ifdef __sgi - fin->fin_hbuf = old->fin_hbuf; -#endif } @@ -2759,9 +2731,6 @@ fr_info_4_1_23_to_current(old, current) fin->fin_qfm = old->fin_qfm; fin->fin_qpi = old->fin_qpi; #endif -#ifdef __sgi - fin->fin_hbuf = fin->fin_hbuf; -#endif } @@ -2799,9 +2768,6 @@ fr_info_4_1_11_to_current(old, current) fin->fin_qfm = old->fin_qfm; fin->fin_qpi = old->fin_qpi; #endif -#ifdef __sgi - fin->fin_hbuf = fin->fin_hbuf; -#endif } @@ -4078,9 +4044,6 @@ fr_info_current_to_4_1_24(current, old) old->fin_qpi = fin->fin_qpi; old->fin_ifname[0] = '\0'; #endif -#ifdef __sgi - old->fin_hbuf = fin->fin_hbuf; -#endif } @@ -4121,9 +4084,6 @@ fr_info_current_to_4_1_23(current, old) old->fin_qpi = fin->fin_qpi; old->fin_ifname[0] = '\0'; #endif -#ifdef __sgi - old->fin_hbuf = fin->fin_hbuf; -#endif } @@ -4164,9 +4124,6 @@ fr_info_current_to_4_1_11(current, old) old->fin_qpi = fin->fin_qpi; old->fin_ifname[0] = '\0'; #endif -#ifdef __sgi - old->fin_hbuf = fin->fin_hbuf; -#endif } diff --git a/contrib/ipfilter/ipf.h b/contrib/ipfilter/ipf.h index 695325a2788f..f5617334a0ba 100644 --- a/contrib/ipfilter/ipf.h +++ b/contrib/ipfilter/ipf.h @@ -12,11 +12,6 @@ #ifndef __IPF_H__ #define __IPF_H__ -#if defined(__osf__) -# define radix_mask ipf_radix_mask -# define radix_node ipf_radix_node -# define radix_node_head ipf_radix_node_head -#endif #include #include @@ -31,9 +26,6 @@ # define _KERNEL # define KERNEL #endif -#ifdef __OpenBSD__ -struct file; -#endif #include #ifdef ADD_KERNEL # undef _KERNEL @@ -188,9 +180,8 @@ typedef struct proxyrule { } proxyrule_t; -#if defined(__NetBSD__) || defined(__OpenBSD__) || \ - (_BSDI_VERSION >= 199701) || (__FreeBSD_version >= 300000) || \ - SOLARIS || defined(__sgi) || defined(__osf__) || defined(linux) +#if defined(__NetBSD__) || defined(__FreeBSD_version) || \ + SOLARIS # include typedef int (* ioctlfunc_t) __P((int, ioctlcmd_t, ...)); #else @@ -200,13 +191,6 @@ typedef int (* addfunc_t) __P((int, ioctlfunc_t, void *)); typedef int (* copyfunc_t) __P((void *, void *, size_t)); -/* - * SunOS4 - */ -#if defined(sun) && !defined(__SVR4) && !defined(__svr4__) -extern int ioctl __P((int, int, void *)); -#endif - extern char thishost[]; extern char flagset[]; extern u_char flags[]; diff --git a/contrib/ipfilter/ipsend/.OLD/ip_compat.h b/contrib/ipfilter/ipsend/.OLD/ip_compat.h index b5b8f0741c25..b77cde636a98 100644 --- a/contrib/ipfilter/ipsend/.OLD/ip_compat.h +++ b/contrib/ipfilter/ipsend/.OLD/ip_compat.h @@ -112,130 +112,12 @@ # define IPOPT_SECUR_TOPSECRET ((u_short)0x6bc5) #endif -#ifdef linux -# if LINUX < 0200 -# define icmp icmphdr -# define icmp_type type -# define icmp_code code -# endif - -/* - * From /usr/include/netinet/ip_var.h - * !%@#!$@# linux... - */ -struct ipovly { - caddr_t ih_next, ih_prev; /* for protocol sequence q's */ - u_char ih_x1; /* (unused) */ - u_char ih_pr; /* protocol */ - short ih_len; /* protocol length */ - struct in_addr ih_src; /* source internet address */ - struct in_addr ih_dst; /* destination internet address */ -}; - -typedef struct { - __u16 th_sport; - __u16 th_dport; - __u32 th_seq; - __u32 th_ack; -# if defined(__i386__) || defined(__MIPSEL__) || defined(__alpha__) ||\ - defined(vax) - __u8 th_res:4; - __u8 th_off:4; -#else - __u8 th_off:4; - __u8 th_res:4; -#endif - __u8 th_flags; - __u16 th_win; - __u16 th_sum; - __u16 th_urp; -} tcphdr_t; - -typedef struct { - __u16 uh_sport; - __u16 uh_dport; - __s16 uh_ulen; - __u16 uh_sum; -} udphdr_t; - -typedef struct { -# if defined(__i386__) || defined(__MIPSEL__) || defined(__alpha__) ||\ - defined(vax) - __u8 ip_hl:4; - __u8 ip_v:4; -# else - __u8 ip_hl:4; - __u8 ip_v:4; -# endif - __u8 ip_tos; - __u16 ip_len; - __u16 ip_id; - __u16 ip_off; - __u8 ip_ttl; - __u8 ip_p; - __u16 ip_sum; - struct in_addr ip_src; - struct in_addr ip_dst; -} ip_t; - -typedef struct { - __u8 ether_dhost[6]; - __u8 ether_shost[6]; - __u16 ether_type; -} ether_header_t; - -typedef struct icmp { - u_char icmp_type; /* type of message, see below */ - u_char icmp_code; /* type sub code */ - u_short icmp_cksum; /* ones complement cksum of struct */ - union { - u_char ih_pptr; /* ICMP_PARAMPROB */ - struct in_addr ih_gwaddr; /* ICMP_REDIRECT */ - struct ih_idseq { - n_short icd_id; - n_short icd_seq; - } ih_idseq; - int ih_void; - } icmp_hun; -#define icmp_pptr icmp_hun.ih_pptr -#define icmp_gwaddr icmp_hun.ih_gwaddr -#define icmp_id icmp_hun.ih_idseq.icd_id -#define icmp_seq icmp_hun.ih_idseq.icd_seq -#define icmp_void icmp_hun.ih_void - union { - struct id_ts { - n_time its_otime; - n_time its_rtime; - n_time its_ttime; - } id_ts; - struct id_ip { - ip_t idi_ip; - /* options and then 64 bits of data */ - } id_ip; - u_long id_mask; - char id_data[1]; - } icmp_dun; -#define icmp_otime icmp_dun.id_ts.its_otime -#define icmp_rtime icmp_dun.id_ts.its_rtime -#define icmp_ttime icmp_dun.id_ts.its_ttime -#define icmp_ip icmp_dun.id_ip.idi_ip -#define icmp_mask icmp_dun.id_mask -#define icmp_data icmp_dun.id_data -} icmphdr_t; - -# define bcopy(a,b,c) memmove(b,a,c) -# define bcmp(a,b,c) memcmp(a,b,c) - -# define ifnet device - -#else typedef struct udphdr udphdr_t; typedef struct tcphdr tcphdr_t; typedef struct ip ip_t; typedef struct ether_header ether_header_t; -#endif #if defined(__SVR4) || defined(__svr4__) # define bcopy(a,b,c) memmove(b,a,c) diff --git a/contrib/ipfilter/ipsend/44arp.c b/contrib/ipfilter/ipsend/44arp.c index 9215959395ab..80521ad15084 100644 --- a/contrib/ipfilter/ipsend/44arp.c +++ b/contrib/ipfilter/ipsend/44arp.c @@ -10,9 +10,7 @@ #include #include #include -#ifndef __osf__ # include -#endif #include #include #include diff --git a/contrib/ipfilter/ipsend/arp.c b/contrib/ipfilter/ipsend/arp.c index 58a1523e5db5..05f255ea47d2 100644 --- a/contrib/ipfilter/ipsend/arp.c +++ b/contrib/ipfilter/ipsend/arp.c @@ -88,7 +88,6 @@ int arp(ip, ether) sin = (struct sockaddr_in *)&ar.arp_pa; sin->sin_family = AF_INET; bcopy(ip, (char *)&sin->sin_addr.s_addr, 4); -#ifndef hpux if ((hp = gethostbyaddr(ip, 4, AF_INET))) # if SOLARIS && (SOLARIS2 >= 10) if (!(ether_hostton(hp->h_name, (struct ether_addr *)ether))) @@ -96,7 +95,6 @@ int arp(ip, ether) if (!(ether_hostton(hp->h_name, ether))) # endif goto savearp; -#endif if (sfd == -1) if ((sfd = socket(AF_INET, SOCK_DGRAM, 0)) == -1) diff --git a/contrib/ipfilter/ipsend/dlcommon.c b/contrib/ipfilter/ipsend/dlcommon.c index 55bc9423ab15..8a8cbf6a6a94 100644 --- a/contrib/ipfilter/ipsend/dlcommon.c +++ b/contrib/ipfilter/ipsend/dlcommon.c @@ -20,11 +20,7 @@ typedef unsigned long ulong; #include #include #include -#ifdef __osf__ -# include -#else # include -#endif #include #include #include diff --git a/contrib/ipfilter/ipsend/ip.c b/contrib/ipfilter/ipsend/ip.c index 4f2eaed3a9b9..c1bb73f0b169 100644 --- a/contrib/ipfilter/ipsend/ip.c +++ b/contrib/ipfilter/ipsend/ip.c @@ -17,11 +17,9 @@ static const char rcsid[] = "@(#)$Id$"; #include #include #include -#ifndef linux # include # include # include -#endif #include #include #include diff --git a/contrib/ipfilter/ipsend/ipresend.c b/contrib/ipfilter/ipsend/ipresend.c index 7520a0e5bf55..ea0b4211c101 100644 --- a/contrib/ipfilter/ipsend/ipresend.c +++ b/contrib/ipfilter/ipsend/ipresend.c @@ -18,9 +18,7 @@ static const char rcsid[] = "@(#)$Id$"; #include #include #include -#ifndef linux #include -#endif #include #include #include @@ -37,9 +35,6 @@ extern struct ipread pcap, iphex, iptext; int opts = 0; #ifndef DEFAULT_DEVICE -# ifdef linux -char default_device[] = "eth0"; -# else # ifdef sun char default_device[] = "le0"; # else @@ -49,15 +44,10 @@ char default_device[] = "ln0"; # ifdef __bsdi__ char default_device[] = "ef0"; # else -# ifdef __sgi -char default_device[] = "ec0"; -# else char default_device[] = "lan0"; -# endif # endif # endif # endif -# endif #else char default_device[] = DEFAULT_DEVICE; #endif diff --git a/contrib/ipfilter/ipsend/ipsend.c b/contrib/ipfilter/ipsend/ipsend.c index 3df5c071e2e3..95a1bb1e5c78 100644 --- a/contrib/ipfilter/ipsend/ipsend.c +++ b/contrib/ipfilter/ipsend/ipsend.c @@ -21,14 +21,10 @@ static const char rcsid[] = "@(#)$Id$"; #include #include #include -#ifndef linux # include -#endif #include "ipsend.h" #include "ipf.h" -#ifndef linux # include -#endif extern char *optarg; @@ -37,27 +33,15 @@ extern void iplang __P((FILE *)); char options[68]; int opts; -#ifdef linux -char default_device[] = "eth0"; -#else # ifdef ultrix char default_device[] = "ln0"; # else # ifdef __bsdi__ char default_device[] = "ef0"; # else -# ifdef __sgi -char default_device[] = "ec0"; -# else -# ifdef __hpux -char default_device[] = "lan0"; -# else char default_device[] = "le0"; -# endif /* __hpux */ -# endif /* __sgi */ # endif /* __bsdi__ */ # endif /* ultrix */ -#endif /* linux */ static void usage __P((char *)); diff --git a/contrib/ipfilter/ipsend/ipsend.h b/contrib/ipfilter/ipsend/ipsend.h index 75a0496e7f83..f409e89c656e 100644 --- a/contrib/ipfilter/ipsend/ipsend.h +++ b/contrib/ipfilter/ipsend/ipsend.h @@ -26,9 +26,6 @@ #include #include "ipf.h" -#ifdef linux -#include -#endif /* XXX: The following is needed by tcpip.h */ #include #include "netinet/tcpip.h" @@ -49,11 +46,7 @@ extern u_32_t buildopts __P((char *, char *, int)); extern int addipopt __P((char *, struct ipopt_names *, int, char *)); extern int initdevice __P((char *, int)); extern int sendip __P((int, char *, int)); -#ifdef linux -extern struct sock *find_tcp __P((int, struct tcpiphdr *)); -#else extern struct tcpcb *find_tcp __P((int, struct tcpiphdr *)); -#endif extern int ip_resend __P((char *, int, struct ipread *, struct in_addr, char *)); extern void ip_test1 __P((char *, int, ip_t *, struct in_addr, int)); diff --git a/contrib/ipfilter/ipsend/ipsopt.c b/contrib/ipfilter/ipsend/ipsopt.c index a2cc4d04aad1..7f9ab5e32d79 100644 --- a/contrib/ipfilter/ipsend/ipsopt.c +++ b/contrib/ipfilter/ipsend/ipsopt.c @@ -20,9 +20,7 @@ static const char rcsid[] = "@(#)$Id$"; #include #include #include -#ifndef linux #include -#endif #include #include #include "ipsend.h" diff --git a/contrib/ipfilter/ipsend/iptest.c b/contrib/ipfilter/ipsend/iptest.c index c6cfb1c75a4a..bc93106c8b89 100644 --- a/contrib/ipfilter/ipsend/iptest.c +++ b/contrib/ipfilter/ipsend/iptest.c @@ -18,12 +18,7 @@ static const char rcsid[] = "@(#)$Id$"; #include #include #include -#ifndef linux #include -#endif -#ifdef linux -#include -#endif #include #include #include @@ -36,9 +31,6 @@ extern char *optarg; extern int optind; char options[68]; -#ifdef linux -char default_device[] = "eth0"; -#else # ifdef sun char default_device[] = "le0"; # else @@ -48,15 +40,10 @@ char default_device[] = "ln0"; # ifdef __bsdi__ char default_device[] = "ef0"; # else -# ifdef __sgi -char default_device[] = "ec0"; -# else char default_device[] = "lan0"; -# endif # endif # endif # endif -#endif static void usage __P((char *)); int main __P((int, char **)); diff --git a/contrib/ipfilter/ipsend/iptests.c b/contrib/ipfilter/ipsend/iptests.c index 0ca02db0b04d..af8772cc2097 100644 --- a/contrib/ipfilter/ipsend/iptests.c +++ b/contrib/ipfilter/ipsend/iptests.c @@ -21,7 +21,6 @@ static const char rcsid[] = "@(#)$Id$"; typedef int boolean_t; #endif #include -#if !defined(__osf__) # ifdef __NetBSD__ # include # include @@ -37,7 +36,6 @@ typedef int boolean_t; # endif # undef _KERNEL # undef KERNEL -#endif #if !defined(solaris) && !defined(linux) && !defined(__sgi) # include # include @@ -66,24 +64,13 @@ typedef int boolean_t; #endif #include #include -#ifdef __hpux -# define _NET_ROUTE_INCLUDED -#endif #include -#if defined(linux) && (LINUX >= 0200) -# include -#endif -#if !defined(linux) # if defined(__FreeBSD__) # include "radix_ipf.h" # endif # if !defined(solaris) # include # endif -#else -# define __KERNEL__ /* because there's a macro not wrapped by this */ -# include /* in this file :-/ */ -#endif #include #include #include @@ -94,20 +81,13 @@ typedef int boolean_t; #include #include #include -#ifdef __hpux -# undef _NET_ROUTE_INCLUDED -#endif -#if !defined(linux) # include # if !defined(__hpux) && !defined(solaris) # include # endif -#endif #include "ipsend.h" -#if !defined(linux) && !defined(__hpux) # include # include -#endif #if defined(__NetBSD_Version__) && (__NetBSD_Version__ >= 106000000) # define USE_NANOSLEEP #endif @@ -951,9 +931,7 @@ void ip_test5(dev, mtu, ip, gwip, ptest) int nfd, i; t = (tcphdr_t *)((char *)ip + (IP_HL(ip) << 2)); -#if !defined(linux) && !defined(__osf__) t->th_x2 = 0; -#endif TCP_OFF_A(t, 0); t->th_sport = htons(1); t->th_dport = htons(1); diff --git a/contrib/ipfilter/ipsend/resend.c b/contrib/ipfilter/ipsend/resend.c index 8fd289ed562e..3401673ff3a1 100644 --- a/contrib/ipfilter/ipsend/resend.c +++ b/contrib/ipfilter/ipsend/resend.c @@ -19,10 +19,8 @@ static const char rcsid[] = "@(#)$Id$"; #include #include #include -#ifndef linux # include # include -#endif #include #include #include diff --git a/contrib/ipfilter/ipsend/sdlpi.c b/contrib/ipfilter/ipsend/sdlpi.c index 1aee2e4108c6..cd540337b2fa 100644 --- a/contrib/ipfilter/ipsend/sdlpi.c +++ b/contrib/ipfilter/ipsend/sdlpi.c @@ -25,14 +25,7 @@ # include # include #endif -#ifdef __osf__ -# include -#else # include -#endif -#ifdef __hpux -# include -#endif #include #include diff --git a/contrib/ipfilter/ipsend/sock.c b/contrib/ipfilter/ipsend/sock.c index d9361dcd44e0..d7eae8a13196 100644 --- a/contrib/ipfilter/ipsend/sock.c +++ b/contrib/ipfilter/ipsend/sock.c @@ -29,7 +29,6 @@ typedef int boolean_t; #else # include #endif -#if !defined(__osf__) # ifdef __NetBSD__ # include # endif @@ -50,7 +49,6 @@ typedef int boolean_t; # undef _KERNEL # undef KERNEL # endif -#endif #include #include #include @@ -74,9 +72,7 @@ typedef int boolean_t; #include #include #include -#ifndef __osf__ # include -#endif #include #define _WANT_INPCB #include diff --git a/contrib/ipfilter/lib/getifname.c b/contrib/ipfilter/lib/getifname.c index 88cad329f1e3..dfba83b83c99 100644 --- a/contrib/ipfilter/lib/getifname.c +++ b/contrib/ipfilter/lib/getifname.c @@ -25,9 +25,6 @@ char *getifname(ptr) # include # include # endif -# ifdef __hpux -# include "compat.h" -# endif # include "../pfil/qif.h" char *ifname; qif_t qif; diff --git a/contrib/ipfilter/lib/getproto.c b/contrib/ipfilter/lib/getproto.c index 6c52cd3b7677..f57fe06358fb 100644 --- a/contrib/ipfilter/lib/getproto.c +++ b/contrib/ipfilter/lib/getproto.c @@ -23,14 +23,6 @@ int getproto(name) if (*s == '\0') return atoi(name); -#ifdef _AIX51 - /* - * For some bogus reason, "ip" is 252 in /etc/protocols on AIX 5 - * The IANA has doubled up on the definition of 0 - it is now also - * used for IPv6 hop-opts, so we can no longer rely on /etc/protocols - * providing the correct name->number mapping - */ -#endif if (!strcasecmp(name, "ip")) return 0; diff --git a/contrib/ipfilter/lib/inet_addr.c b/contrib/ipfilter/lib/inet_addr.c index c7ae44375a5e..8667c2b33038 100644 --- a/contrib/ipfilter/lib/inet_addr.c +++ b/contrib/ipfilter/lib/inet_addr.c @@ -72,7 +72,6 @@ static const char rcsid[] = "@(#)$Id: inet_addr.c,v 1.8.2.3 2004/12/09 19:41:20 # define __P(x) () # endif #endif -#ifndef linux int inet_aton __P((const char *, struct in_addr *)); /* @@ -189,7 +188,6 @@ inet_aton(cp, addr) addr->s_addr = htonl(val); return (1); } -#endif /* these are compatibility routines, not needed on recent BSD releases */ diff --git a/contrib/ipfilter/lib/kmem.c b/contrib/ipfilter/lib/kmem.c index d895bafd0540..de97512cf5d3 100644 --- a/contrib/ipfilter/lib/kmem.c +++ b/contrib/ipfilter/lib/kmem.c @@ -18,9 +18,7 @@ #include #include #include -#if !defined(__sgi) && !defined(__hpux) && !defined(__osf__) && !defined(linux) && !defined(_AIX51) #include -#endif #include #include #include @@ -29,9 +27,6 @@ #include #include #include -#if defined(linux) || defined(__osf__) || defined(__sgi) || defined(__hpux) -# include -#endif #include "kmem.h" @@ -46,82 +41,8 @@ static const char rcsid[] = "@(#)$Id$"; -#if !defined(__sgi) && !defined(__hpux) && !defined(__osf__) && \ - !defined(linux) && !defined(_AIX51) -/* - * For all platforms where there is a libkvm and a kvm_t, we use that... - */ static kvm_t *kvm_f = NULL; -#else -/* - *...and for the others (HP-UX, IRIX, Tru64), we have to provide our own. - */ - -typedef int * kvm_t; - -static kvm_t kvm_f = NULL; -static char *kvm_errstr = NULL; - -kvm_t kvm_open __P((char *, char *, char *, int, char *)); -int kvm_read __P((kvm_t, u_long, char *, size_t)); - -kvm_t kvm_open(kernel, core, swap, mode, errstr) - char *kernel, *core, *swap; - int mode; - char *errstr; -{ - kvm_t k; - int fd; - - kvm_errstr = errstr; - - if (core == NULL) - core = "/dev/kmem"; - - fd = open(core, mode); - if (fd == -1) - return NULL; - k = malloc(sizeof(*k)); - if (k == NULL) - return NULL; - *k = fd; - return k; -} - -int kvm_read(kvm, pos, buffer, size) - kvm_t kvm; - u_long pos; - char *buffer; - size_t size; -{ - int r = 0, left; - char *bufp; - - if (lseek(*kvm, pos, 0) == -1) { - if (kvm_errstr != NULL) { - fprintf(stderr, "%s", kvm_errstr); - perror("lseek"); - } - return -1; - } - - for (bufp = buffer, left = size; left > 0; bufp += r, left -= r) { - r = read(*kvm, bufp, left); -#ifdef __osf__ - /* - * Tru64 returns "0" for successful operation, not the number - * of bytes read. - */ - if (r == 0) - r = left; -#endif - if (r <= 0) - return -1; - } - return r; -} -#endif /* !defined(__sgi) && !defined(__hpux) && !defined(__osf__) */ int openkmem(kern, core) char *kern, *core; diff --git a/contrib/ipfilter/lib/printproto.c b/contrib/ipfilter/lib/printproto.c index d411bfa00421..879da12d7857 100644 --- a/contrib/ipfilter/lib/printproto.c +++ b/contrib/ipfilter/lib/printproto.c @@ -27,14 +27,6 @@ printproto(pr, p, np) PRINTF("udp"); else if (np->in_flags & IPN_ICMPQUERY) PRINTF("icmp"); -#ifdef _AIX51 - /* - * To make up for "ip = 252" and "hopopt = 0" in /etc/protocols - * The IANA has doubled up on the definition of 0 - it is now - * also used for IPv6 hop-opts, so we can no longer rely on - * /etc/protocols providing the correct name->number mapping. - */ -#endif else if (np->in_pr[0] == 0) PRINTF("ip"); else if (pr != NULL) @@ -42,11 +34,6 @@ printproto(pr, p, np) else PRINTF("%d", np->in_pr[0]); } else { -#ifdef _AIX51 - if (p == 0) - PRINTF("ip"); - else -#endif if (pr != NULL) PRINTF("%s", pr->p_name); else diff --git a/contrib/ipfilter/md5.c b/contrib/ipfilter/md5.c index 35756cdde7cc..6ac639935902 100644 --- a/contrib/ipfilter/md5.c +++ b/contrib/ipfilter/md5.c @@ -35,16 +35,11 @@ *********************************************************************** */ -#if defined(linux) && defined(_KERNEL) -extern void *memcpy(void *, const void *, unsigned long); -# define bcopy(a,b,c) memcpy(b,a,c) -#else -# if defined(_KERNEL) && !defined(__sgi) +# if defined(_KERNEL) # include # else # include # endif -#endif #include "md5.h" diff --git a/contrib/ipfilter/tools/ipf.c b/contrib/ipfilter/tools/ipf.c index 166063173b20..0551108488e0 100644 --- a/contrib/ipfilter/tools/ipf.c +++ b/contrib/ipfilter/tools/ipf.c @@ -5,15 +5,6 @@ * * See the IPFILTER.LICENCE file for details on licencing. */ -#ifdef __FreeBSD__ -# ifndef __FreeBSD_cc_version -# include -# else -# if __FreeBSD_cc_version < 430000 -# include -# endif -# endif -#endif #include "ipf.h" #include #include diff --git a/contrib/ipfilter/tools/ipfs.c b/contrib/ipfilter/tools/ipfs.c index 43abd748f59f..7a2fe0252498 100644 --- a/contrib/ipfilter/tools/ipfs.c +++ b/contrib/ipfilter/tools/ipfs.c @@ -5,15 +5,6 @@ * * See the IPFILTER.LICENCE file for details on licencing. */ -#ifdef __FreeBSD__ -# ifndef __FreeBSD_cc_version -# include -# else -# if __FreeBSD_cc_version < 430000 -# include -# endif -# endif -#endif #include #include #include diff --git a/contrib/ipfilter/tools/ipfstat.c b/contrib/ipfilter/tools/ipfstat.c index 3f0060189f23..e18eecaabe28 100644 --- a/contrib/ipfilter/tools/ipfstat.c +++ b/contrib/ipfilter/tools/ipfstat.c @@ -5,58 +5,26 @@ * * See the IPFILTER.LICENCE file for details on licencing. */ -#ifdef __FreeBSD__ -# ifndef __FreeBSD_cc_version -# include -# else -# if __FreeBSD_cc_version < 430000 -# include -# endif -# endif -#endif #include #include #include -#ifdef linux -# include -#else # include -#endif #include -#if defined(sun) && (defined(__svr4__) || defined(__SVR4)) +#if defined(sun) && defined(__SVR4) # include #endif #include "ipf.h" #include "netinet/ipl.h" -#if defined(STATETOP) -# if defined(_BSDI_VERSION) -# undef STATETOP -# endif -# if defined(__FreeBSD__) && \ - (!defined(__FreeBSD_version) || (__FreeBSD_version < 430000)) -# undef STATETOP -# endif -# if defined(__NetBSD_Version__) && (__NetBSD_Version__ < 105000000) -# undef STATETOP -# endif -# if defined(sun) -# if defined(__svr4__) || defined(__SVR4) +#if defined(STATETOP) +# if defined(sun) && defined(__SVR4) # include -# else -# undef STATETOP /* NOT supported on SunOS4 */ -# endif # endif -#endif -#if defined(STATETOP) && !defined(linux) # include # include -#endif -#ifdef STATETOP # include # include # include -# if SOLARIS || defined(__NetBSD__) || defined(_BSDI_VERSION) || \ - defined(__sgi) +# if SOLARIS || defined(__NetBSD__) # ifdef ERR # undef ERR # endif @@ -66,7 +34,7 @@ # endif /* SOLARIS */ #endif /* STATETOP */ #include "kmem.h" -#if defined(__NetBSD__) || (__OpenBSD__) +#if defined(__NetBSD__) # include #endif @@ -75,9 +43,6 @@ static const char sccsid[] = "@(#)fils.c 1.21 4/20/96 (C) 1993-2000 Darren Reed" static const char rcsid[] = "@(#)$Id$"; #endif -#ifdef __hpux -# define nlist nlist64 -#endif extern char *optarg; extern int optind; diff --git a/contrib/ipfilter/tools/ipftest.c b/contrib/ipfilter/tools/ipftest.c index 378523d3bdf3..f9d45f71cbb2 100644 --- a/contrib/ipfilter/tools/ipftest.c +++ b/contrib/ipfilter/tools/ipftest.c @@ -43,9 +43,6 @@ void dumprules __P((frentry_t *)); void drain_log __P((char *)); void fixv4sums __P((mb_t *, ip_t *)); -#if defined(__NetBSD__) || defined(__OpenBSD__) || SOLARIS || \ - (_BSDI_VERSION >= 199701) || (__FreeBSD_version >= 300000) || \ - defined(__osf__) || defined(linux) int ipftestioctl __P((int, ioctlcmd_t, ...)); int ipnattestioctl __P((int, ioctlcmd_t, ...)); int ipstatetestioctl __P((int, ioctlcmd_t, ...)); @@ -53,15 +50,6 @@ int ipauthtestioctl __P((int, ioctlcmd_t, ...)); int ipscantestioctl __P((int, ioctlcmd_t, ...)); int ipsynctestioctl __P((int, ioctlcmd_t, ...)); int ipooltestioctl __P((int, ioctlcmd_t, ...)); -#else -int ipftestioctl __P((dev_t, ioctlcmd_t, void *)); -int ipnattestioctl __P((dev_t, ioctlcmd_t, void *)); -int ipstatetestioctl __P((dev_t, ioctlcmd_t, void *)); -int ipauthtestioctl __P((dev_t, ioctlcmd_t, void *)); -int ipsynctestioctl __P((dev_t, ioctlcmd_t, void *)); -int ipscantestioctl __P((dev_t, ioctlcmd_t, void *)); -int ipooltestioctl __P((dev_t, ioctlcmd_t, void *)); -#endif static ioctlfunc_t iocfunctions[IPL_LOGSIZE] = { ipftestioctl, ipnattestioctl, @@ -292,15 +280,7 @@ main(argc,argv) ipf_state_flush(softc, 1, 0); if (dir && (ifp != NULL) && IP_V(ip) && (m != NULL)) -#if defined(__sgi) && (IRIX < 60500) - (*ifp->if_output)(ifp, (void *)m, NULL); -#else -# if TRU64 >= 1885 - (*ifp->if_output)(ifp, (void *)m, NULL, 0, 0); -# else (*ifp->if_output)(ifp, (void *)m, NULL, 0); -# endif -#endif while ((m != NULL) && (m != &mb)) { n = m->mb_next; @@ -351,9 +331,6 @@ main(argc,argv) } -#if defined(__NetBSD__) || defined(__OpenBSD__) || SOLARIS || \ - (_BSDI_VERSION >= 199701) || (__FreeBSD_version >= 300000) || \ - defined(__osf__) || defined(linux) int ipftestioctl(int dev, ioctlcmd_t cmd, ...) { caddr_t data; @@ -513,141 +490,6 @@ int ipooltestioctl(int dev, ioctlcmd_t cmd, ...) } return 0; } -#else -int ipftestioctl(dev, cmd, data) - dev_t dev; - ioctlcmd_t cmd; - void *data; -{ - int i; - - dev = dev; /* gcc -Wextra */ - i = ipfioctl(softc, IPL_LOGIPF, cmd, data, FWRITE|FREAD); - if ((opts & OPT_DEBUG) || (i != 0)) - fprintf(stderr, "ipfioctl(IPF,%#x,%p) = %d (%d)\n", - cmd, data, i, softc->ipf_interror); - if (i != 0) { - errno = i; - return -1; - } - return 0; -} - - -int ipnattestioctl(dev, cmd, data) - dev_t dev; - ioctlcmd_t cmd; - void *data; -{ - int i; - - dev = dev; /* gcc -Wextra */ - i = ipfioctl(softc, IPL_LOGNAT, cmd, data, FWRITE|FREAD); - if ((opts & OPT_DEBUG) || (i != 0)) - fprintf(stderr, "ipfioctl(NAT,%#x,%p) = %d\n", cmd, data, i); - if (i != 0) { - errno = i; - return -1; - } - return 0; -} - - -int ipstatetestioctl(dev, cmd, data) - dev_t dev; - ioctlcmd_t cmd; - void *data; -{ - int i; - - dev = dev; /* gcc -Wextra */ - i = ipfioctl(softc, IPL_LOGSTATE, cmd, data, FWRITE|FREAD); - if ((opts & OPT_DEBUG) || (i != 0)) - fprintf(stderr, "ipfioctl(STATE,%#x,%p) = %d\n", cmd, data, i); - if (i != 0) { - errno = i; - return -1; - } - return 0; -} - - -int ipauthtestioctl(dev, cmd, data) - dev_t dev; - ioctlcmd_t cmd; - void *data; -{ - int i; - - dev = dev; /* gcc -Wextra */ - i = ipfioctl(softc, IPL_LOGAUTH, cmd, data, FWRITE|FREAD); - if ((opts & OPT_DEBUG) || (i != 0)) - fprintf(stderr, "ipfioctl(AUTH,%#x,%p) = %d\n", cmd, data, i); - if (i != 0) { - errno = i; - return -1; - } - return 0; -} - - -int ipsynctestioctl(dev, cmd, data) - dev_t dev; - ioctlcmd_t cmd; - void *data; -{ - int i; - - dev = dev; /* gcc -Wextra */ - i = ipfioctl(softc, IPL_LOGSYNC, cmd, data, FWRITE|FREAD); - if ((opts & OPT_DEBUG) || (i != 0)) - fprintf(stderr, "ipfioctl(SYNC,%#x,%p) = %d\n", cmd, data, i); - if (i != 0) { - errno = i; - return -1; - } - return 0; -} - - -int ipscantestioctl(dev, cmd, data) - dev_t dev; - ioctlcmd_t cmd; - void *data; -{ - int i; - - dev = dev; /* gcc -Wextra */ - i = ipfioctl(softc, IPL_LOGSCAN, cmd, data, FWRITE|FREAD); - if ((opts & OPT_DEBUG) || (i != 0)) - fprintf(stderr, "ipfioctl(SCAN,%#x,%p) = %d\n", cmd, data, i); - if (i != 0) { - errno = i; - return -1; - } - return 0; -} - - -int ipooltestioctl(dev, cmd, data) - dev_t dev; - ioctlcmd_t cmd; - void *data; -{ - int i; - - dev = dev; /* gcc -Wextra */ - i = ipfioctl(softc, IPL_LOGLOOKUP, cmd, data, FWRITE|FREAD); - if (opts & OPT_DEBUG) - fprintf(stderr, "ipfioctl(POOL,%#x,%p) = %d (%d)\n", - cmd, data, i, softc->ipf_interror); - if (i != 0) { - errno = i; - return -1; - } - return 0; -} -#endif int kmemcpy(addr, offset, size) diff --git a/contrib/ipfilter/tools/ipmon.c b/contrib/ipfilter/tools/ipmon.c index 1c52e7fd87ac..4e4d9cc28f9e 100644 --- a/contrib/ipfilter/tools/ipmon.c +++ b/contrib/ipfilter/tools/ipmon.c @@ -20,12 +20,7 @@ static const char rcsid[] = "@(#)$Id$"; #endif -#if defined(sun) && !defined(SOLARIS2) -#define STRERROR(x) sys_errlist[x] -extern char *sys_errlist[]; -#else #define STRERROR(x) strerror(x) -#endif extern int optind; extern char *optarg; @@ -116,11 +111,7 @@ char *reasons[] = { #ifdef MENTAT static char *pidfile = "/etc/opt/ipf/ipmon.pid"; #else -# if BSD >= 199306 static char *pidfile = "/var/run/ipmon.pid"; -# else -static char *pidfile = "/etc/ipmon.pid"; -# endif #endif static char line[2048]; @@ -138,11 +129,7 @@ static char *icmpname __P((u_int, u_int)); static char *icmpname6 __P((u_int, u_int)); static icmp_type_t *find_icmptype __P((int, icmp_type_t *, size_t)); static icmp_subtype_t *find_icmpsubtype __P((int, icmp_subtype_t *, size_t)); -#ifdef __hpux -static struct tm *get_tm __P((u_32_t)); -#else static struct tm *get_tm __P((time_t)); -#endif char *portlocalname __P((int, char *, u_int)); int main __P((int, char *[])); @@ -400,11 +387,6 @@ static void init_tabs() if (protocols[0]) free(protocols[0]); protocols[0] = strdup("ip"); -#if defined(_AIX51) - if (protocols[252]) - free(protocols[252]); - protocols[252] = NULL; -#endif } if (udp_ports != NULL) { @@ -643,11 +625,7 @@ void dumphex(log, dopts, buf, len) static struct tm *get_tm(sec) -#ifdef __hpux - u_32_t sec; -#else time_t sec; -#endif { struct tm *tm; time_t t; @@ -1123,10 +1101,6 @@ static void print_ipflog(conf, buf, blen) sprintf(t, "%dx ", ipl->ipl_count); t += strlen(t); } -#if (defined(MENTAT) || \ - (defined(NetBSD) && (NetBSD <= 1991011) && (NetBSD >= 199603)) || \ - (defined(__FreeBSD__) && (__FreeBSD_version >= 501113)) || \ - (defined(OpenBSD) && (OpenBSD >= 199603))) || defined(linux) { char ifname[sizeof(ipf->fl_ifname) + 1]; @@ -1134,28 +1108,13 @@ static void print_ipflog(conf, buf, blen) ifname[sizeof(ipf->fl_ifname)] = '\0'; sprintf(t, "%s", ifname); t += strlen(t); -# if defined(MENTAT) || defined(linux) -# if defined(linux) - /* - * On Linux, the loopback interface is just "lo", not "lo0". - */ - if (strcmp(ifname, "lo") != 0) -# endif +# if defined(MENTAT) if (ISALPHA(*(t - 1))) { sprintf(t, "%d", ipf->fl_unit); t += strlen(t); } # endif } -#else - for (len = 0; len < 3; len++) - if (ipf->fl_ifname[len] == '\0') - break; - if (ipf->fl_ifname[len]) - len++; - sprintf(t, "%*.*s%u", len, len, ipf->fl_ifname, ipf->fl_unit); - t += strlen(t); -#endif if ((ipf->fl_group[0] == (char)~0) && (ipf->fl_group[1] == '\0')) strcat(t, " @-1:"); else if (ipf->fl_group[0] == '\0') diff --git a/contrib/ipfilter/tools/ipnat.c b/contrib/ipfilter/tools/ipnat.c index c3a715698036..54a71653f3e2 100644 --- a/contrib/ipfilter/tools/ipnat.c +++ b/contrib/ipfilter/tools/ipnat.c @@ -12,7 +12,7 @@ #include #include #include -#if !defined(__SVR4) && !defined(__svr4__) +#if !defined(__SVR4) #include #else #include @@ -28,7 +28,7 @@ #undef _KERNEL #include #include -#if defined(sun) && (defined(__svr4__) || defined(__SVR4)) +#if defined(sun) && defined(__SVR4) # include # include #endif @@ -42,25 +42,13 @@ #include #include #include -#if defined(linux) -# include -#else # include -#endif #include "ipf.h" #include "netinet/ipl.h" #include "kmem.h" -#ifdef __hpux -# define nlist nlist64 -#endif -#if defined(sun) && !SOLARIS2 -# define STRERROR(x) sys_errlist[x] -extern char *sys_errlist[]; -#else # define STRERROR(x) strerror(x) -#endif #if !defined(lint) static const char sccsid[] ="@(#)ipnat.c 1.9 6/5/96 (C) 1993 Darren Reed"; diff --git a/contrib/ipfilter/tools/ipnat_y.y b/contrib/ipfilter/tools/ipnat_y.y index 39e6a92bdf67..e24641306634 100644 --- a/contrib/ipfilter/tools/ipnat_y.y +++ b/contrib/ipfilter/tools/ipnat_y.y @@ -6,15 +6,6 @@ * See the IPFILTER.LICENCE file for details on licencing. */ %{ -#ifdef __FreeBSD__ -# ifndef __FreeBSD_cc_version -# include -# else -# if __FreeBSD_cc_version < 430000 -# include -# endif -# endif -#endif #include #include #include diff --git a/contrib/ipfilter/tools/ippool.c b/contrib/ipfilter/tools/ippool.c index ea2ef910cb68..01d8fe236e5a 100644 --- a/contrib/ipfilter/tools/ippool.c +++ b/contrib/ipfilter/tools/ippool.c @@ -9,9 +9,7 @@ #include #include #include -#if defined(BSD) && (BSD >= 199306) # include -#endif #include #include @@ -26,11 +24,7 @@ #include #include #include -#ifdef linux -# include -#else # include -#endif #include "ipf.h" #include "netinet/ipl.h" diff --git a/contrib/ipfilter/tools/ippool_y.y b/contrib/ipfilter/tools/ippool_y.y index 2c7574f9432d..2a9d8ee3b079 100644 --- a/contrib/ipfilter/tools/ippool_y.y +++ b/contrib/ipfilter/tools/ippool_y.y @@ -10,9 +10,7 @@ #include #include #include -#if defined(BSD) && (BSD >= 199306) # include -#endif #include #include diff --git a/sys/contrib/ipfilter/netinet/fil.c b/sys/contrib/ipfilter/netinet/fil.c index 7413061bb1c5..ec786bd9fb44 100644 --- a/sys/contrib/ipfilter/netinet/fil.c +++ b/sys/contrib/ipfilter/netinet/fil.c @@ -20,26 +20,18 @@ #include #include #include -#if defined(_KERNEL) && defined(__FreeBSD_version) && \ - (__FreeBSD_version >= 220000) -# if (__FreeBSD_version >= 400000) +#if defined(_KERNEL) && defined(__FreeBSD_version) # if !defined(IPFILTER_LKM) # include "opt_inet6.h" # endif -# if (__FreeBSD_version == 400019) -# define CSUM_DELAY_DATA -# endif -# endif # include #else # include #endif -#if (defined(__SVR4) || defined(__svr4__)) && defined(sun) +#if defined(__SVR4) || defined(sun) /* SOLARIS */ # include #endif -#if !defined(_AIX51) # include -#endif #if defined(_KERNEL) # include # include @@ -50,29 +42,18 @@ # include # include # define _KERNEL -# ifdef __OpenBSD__ -struct file; -# endif # include # undef _KERNEL #endif -#if !defined(__SVR4) && !defined(__svr4__) && !defined(__hpux) && \ - !defined(linux) +#if !defined(__SVR4) # include #else -# if !defined(linux) # include -# endif # if (SOLARIS2 < 5) && defined(sun) # include # endif #endif -#ifdef __hpux -# define _NET_ROUTE_INCLUDED -#endif -#if !defined(linux) # include -#endif #include #include #ifdef sun @@ -81,25 +62,13 @@ struct file; #include #include #include -#if defined(__sgi) && defined(IFF_DRVRLOCK) /* IRIX 6 */ -# include -# include -#endif #include -#if (!defined(__sgi) && !defined(AIX)) || defined(_KERNEL) # include # include -#endif -#ifdef __hpux -# undef _NET_ROUTE_INCLUDED -#endif -#ifdef __osf__ -# undef _RADIX_H_ -#endif #include "netinet/ip_compat.h" #ifdef USE_INET6 # include -# if !SOLARIS && defined(_KERNEL) && !defined(__osf__) && !defined(__hpux) +# if !SOLARIS && defined(_KERNEL) # include # endif #endif @@ -122,7 +91,7 @@ struct file; #if defined(IPFILTER_BPF) && defined(_KERNEL) # include #endif -#if defined(__FreeBSD_version) && (__FreeBSD_version >= 300000) +#if defined(__FreeBSD_version) # include #endif #include "netinet/ipl.h" @@ -131,10 +100,6 @@ struct file; # include extern struct callout ipf_slowtimer_ch; #endif -#if defined(__OpenBSD__) -# include -extern struct timeout ipf_slowtimer_ch; -#endif /* END OF INCLUDES */ #if !defined(lint) @@ -214,10 +179,7 @@ static int ipf_updateipid __P((fr_info_t *)); static int ipf_settimeout __P((struct ipf_main_softc_s *, struct ipftuneable *, ipftuneval_t *)); -#if !defined(_KERNEL) || (!defined(__NetBSD__) && !defined(__OpenBSD__) && \ - !defined(__FreeBSD__)) || \ - FREEBSD_LT_REV(501000) || NETBSD_LT_REV(105000000) || \ - OPENBSD_LT_REV(200006) +#if !defined(_KERNEL) || SOLARIS static int ppsratecheck(struct timeval *, int *, int); #endif @@ -3267,12 +3229,6 @@ ipf_check(ctx, ip, hlen, ifp, out } } else { LBUMP(ipf_stats[out].fr_pass); -#if defined(_KERNEL) && defined(__sgi) - if ((fin->fin_hbuf != NULL) && - (mtod(fin->fin_m, struct ip *) != fin->fin_ip)) { - COPYBACK(fin->fin_m, 0, fin->fin_plen, fin->fin_hbuf); - } -#endif } SPL_X(s); @@ -5483,10 +5439,7 @@ ipf_resolvefunc(softc, data) } -#if !defined(_KERNEL) || (!defined(__NetBSD__) && !defined(__OpenBSD__) && \ - !defined(__FreeBSD__)) || \ - FREEBSD_LT_REV(501000) || NETBSD_LT_REV(105000000) || \ - OPENBSD_LT_REV(200006) +#if !defined(_KERNEL) || SOLARIS /* * From: NetBSD * ppsratecheck(): packets (or events) per second limitation. @@ -10111,9 +10064,6 @@ ipf_slowtimer(softc) ipf_rule_expire(softc); ipf_sync_expire(softc); softc->ipf_ticks++; -# if defined(__OpenBSD__) - timeout_add(&ipf_slowtimer_ch, hz/2); -# endif } diff --git a/sys/contrib/ipfilter/netinet/ip_auth.c b/sys/contrib/ipfilter/netinet/ip_auth.c index 8624c3ba064c..f08d0b2fd1d9 100644 --- a/sys/contrib/ipfilter/netinet/ip_auth.c +++ b/sys/contrib/ipfilter/netinet/ip_auth.c @@ -24,29 +24,24 @@ # endif # include # define _KERNEL -# ifdef __OpenBSD__ -struct file; -# endif # include # undef _KERNEL #endif -#if defined(_KERNEL) && (__FreeBSD_version >= 220000) +#if defined(_KERNEL) && defined(__FreeBSD_version) # include # include #else # include #endif -#if !defined(linux) # include -#endif #include #if defined(_KERNEL) # include -# if !defined(__SVR4) && !defined(__svr4__) && !defined(linux) +# if !defined(__SVR4) # include # endif #endif -#if defined(__SVR4) || defined(__svr4__) +#if defined(__SVR4) # include # include # ifdef _KERNEL @@ -55,11 +50,10 @@ struct file; # include # include #endif -#if (defined(_BSDI_VERSION) && (_BSDI_VERSION >= 199802)) || \ - (defined(__FreeBSD_version) &&(__FreeBSD_version >= 400000)) +#if defined(__FreeBSD_version) # include #endif -#if defined(__NetBSD__) || defined(__OpenBSD__) || defined(bsdi) +#if defined(__NetBSD__) # include #endif #if defined(_KERNEL) && defined(__NetBSD__) && (__NetBSD_Version__ >= 104000000) @@ -76,10 +70,8 @@ struct file; #include #include #include -#if !defined(linux) # include -#endif -#if !defined(_KERNEL) && !defined(__osf__) && !defined(__sgi) +#if !defined(_KERNEL) # define KERNEL # define _KERNEL # define NOT_KERNEL @@ -89,34 +81,26 @@ struct file; # undef KERNEL #endif #include -#if defined(IRIX) && (IRIX < 60516) /* IRIX < 6 */ -extern struct ifqueue ipintrq; /* ip packet input queue */ -#else -# if !defined(__hpux) && !defined(linux) -# if __FreeBSD_version >= 300000 +# if defined(__FreeBSD_version) # include -# if __FreeBSD_version >= 500042 # define IF_QFULL _IF_QFULL # define IF_DROP _IF_DROP -# endif /* __FreeBSD_version >= 500042 */ # endif # include # include -# endif -#endif #include #include #include "netinet/ip_compat.h" #include #include "netinet/ip_fil.h" #include "netinet/ip_auth.h" -#if !defined(MENTAT) && !defined(linux) +#if !defined(MENTAT) # include # ifdef __FreeBSD__ # include # endif #endif -#if (__FreeBSD_version >= 300000) +#if defined(__FreeBSD_version) # include # if defined(_KERNEL) && !defined(IPFILTER_LKM) # include @@ -232,9 +216,6 @@ ipf_auth_soft_init(softc, arg) bzero((char *)softa->ipf_auth_pkts, softa->ipf_auth_size * sizeof(*softa->ipf_auth_pkts)); -#if defined(linux) && defined(_KERNEL) - init_waitqueue_head(&softa->ipf_auth_next_linux); -#endif return 0; } @@ -1106,22 +1087,7 @@ ipf_auth_wait(softc, softa, data) error = EINTR; } # else /* SOLARIS */ -# ifdef __hpux - { - lock_t *l; - - l = get_sleep_lock(&softa->ipf_auth_next); - error = sleep(&softa->ipf_auth_next, PZERO+1); - spinunlock(l); - } -# else -# ifdef __osf__ - error = mpsleep(&softa->ipf_auth_next, PSUSP|PCATCH, "ipf_auth_next", - 0, &softa->ipf_auth_mx, MS_LOCK_SIMPLE); -# else error = SLEEP(&softa->ipf_auth_next, "ipf_auth_next"); -# endif /* __osf__ */ -# endif /* __hpux */ # endif /* SOLARIS */ #endif MUTEX_EXIT(&softa->ipf_auth_mx); diff --git a/sys/contrib/ipfilter/netinet/ip_compat.h b/sys/contrib/ipfilter/netinet/ip_compat.h index 061e4113cc73..03cd97e3c8ee 100644 --- a/sys/contrib/ipfilter/netinet/ip_compat.h +++ b/sys/contrib/ipfilter/netinet/ip_compat.h @@ -33,7 +33,7 @@ #endif #ifndef SOLARIS -# if defined(sun) && (defined(__svr4__) || defined(__SVR4)) +# if defined(sun) && defined(__SVR4) # define SOLARIS 1 # else # define SOLARIS 0 @@ -41,7 +41,7 @@ #endif -#if defined(__SVR4) || defined(__svr4__) || defined(__sgi) +#if defined(__SVR4) # define index strchr # if !defined(_KERNEL) # define bzero(a,b) memset(a,0,b) @@ -62,11 +62,6 @@ # endif #endif -#if defined(__sgi) || defined(bsdi) || defined(__hpux) || defined(hpux) -struct ether_addr { - u_char ether_addr_octet[6]; -}; -#endif # ifdef __STDC__ # define IPL_EXTERN(ep) ipl##ep @@ -100,15 +95,6 @@ struct ether_addr { (__FreeBSD_version > (x))) #define FREEBSD_LT_REV(x) (defined(__FreeBSD_version) && \ (__FreeBSD_version < (x))) -#define BSDOS_GE_REV(x) (defined(_BSDI_VERSION) && \ - (_BSDI_VERSION >= (x))) -#define BSDOS_GT_REV(x) (defined(_BSDI_VERSION) && \ - (_BSDI_VERSION > (x))) -#define BSDOS_LT_REV(x) (defined(_BSDI_VERSION) && \ - (_BSDI_VERSION < (x))) -#define OPENBSD_GE_REV(x) (defined(OpenBSD) && (OpenBSD >= (x))) -#define OPENBSD_GT_REV(x) (defined(OpenBSD) && (OpenBSD > (x))) -#define OPENBSD_LT_REV(x) (defined(OpenBSD) && (OpenBSD < (x))) #define BSD_GE_YEAR(x) (defined(BSD) && (BSD >= (x))) #define BSD_GT_YEAR(x) (defined(BSD) && (BSD > (x))) #define BSD_LT_YEAR(x) (defined(BSD) && (BSD < (x))) @@ -321,8 +307,7 @@ typedef union { #define ipf_isw ipf_lkun_s.ipf_sw #define ipf_magic ipf_lkun_s.ipf_magic -#if !defined(__GNUC__) || \ - (defined(__FreeBSD_version) && (__FreeBSD_version >= 503000)) +#if !defined(__GNUC__) || defined(__FreeBSD_version) # ifndef INLINE # define INLINE # endif @@ -473,11 +458,10 @@ extern mb_t *allocmbt(size_t); #ifdef USE_INET6 -# if defined(__NetBSD__) || defined(__OpenBSD__) || defined(__FreeBSD__) || \ - defined(__osf__) || defined(linux) +# if defined(__NetBSD__) || defined(__FreeBSD__) # include # include -# if defined(_KERNEL) && !defined(__osf__) +# if defined(_KERNEL) # include # endif typedef struct ip6_hdr ip6_t; @@ -497,21 +481,16 @@ typedef struct ip6_hdr ip6_t; # define COPYBACK m_copyback # endif # if (defined(__NetBSD_Version__) && (__NetBSD_Version__ < 105180000)) || \ - defined(__FreeBSD__) || (defined(OpenBSD) && (OpenBSD < 200206)) || \ - defined(_BSDI_VERSION) + defined(__FreeBSD__) # include # endif -# if !defined(__FreeBSD__) || FREEBSD_GE_REV(300000) -# if NETBSD_GE_REV(105180000) || OPENBSD_GE_REV(200111) +# if NETBSD_GE_REV(105180000) # include # else # include extern vm_map_t kmem_map; # endif # include -# else /* !__FreeBSD__ || (__FreeBSD__ && __FreeBSD_version >= 300000) */ -# include -# endif /* !__FreeBSD__ || (__FreeBSD__ && __FreeBSD_version >= 300000) */ # ifdef IPFILTER_M_IPFILTER # include diff --git a/sys/contrib/ipfilter/netinet/ip_dstlist.c b/sys/contrib/ipfilter/netinet/ip_dstlist.c index d97dad2c8f35..4f2e3bb05a18 100644 --- a/sys/contrib/ipfilter/netinet/ip_dstlist.c +++ b/sys/contrib/ipfilter/netinet/ip_dstlist.c @@ -9,9 +9,6 @@ # define KERNEL 1 # define _KERNEL 1 #endif -#if defined(__osf__) -# define _PROTO_NET_H_ -#endif #include #include #include @@ -21,9 +18,6 @@ # include # include # define _KERNEL -# ifdef __OpenBSD__ -struct file; -# endif # include # undef _KERNEL #else @@ -33,14 +27,12 @@ struct file; # endif #endif #include -#if !defined(linux) # include -#endif #include -#if defined(_KERNEL) && (!defined(__SVR4) && !defined(__svr4__)) +#if defined(_KERNEL) && !defined(__SVR4) # include #endif -#if defined(__SVR4) || defined(__svr4__) +#if defined(__SVR4) # include # include # ifdef _KERNEL @@ -49,7 +41,7 @@ struct file; # include # include #endif -#if defined(__FreeBSD_version) && (__FreeBSD_version >= 300000) +#if defined(__FreeBSD_version) # include #endif diff --git a/sys/contrib/ipfilter/netinet/ip_fil.h b/sys/contrib/ipfilter/netinet/ip_fil.h index 55415a9591b7..daaaa6a0776f 100644 --- a/sys/contrib/ipfilter/netinet/ip_fil.h +++ b/sys/contrib/ipfilter/netinet/ip_fil.h @@ -11,9 +11,7 @@ #ifndef __IP_FIL_H__ #define __IP_FIL_H__ -#if !defined(linux) || !defined(_KERNEL) # include -#endif #include "netinet/ip_compat.h" #include "netinet/ipf_rb.h" @@ -21,15 +19,11 @@ # include #endif #if defined(BSD) && defined(_KERNEL) -# if NETBSD_LT_REV(399000000) || defined(__osf__) || FREEBSD_LT_REV(500043) -# include -# else # include -# endif #endif #ifndef SOLARIS -# if defined(sun) && (defined(__svr4__) || defined(__SVR4)) +# if defined(sun) && defined(__SVR4) # define SOLARIS 1 # else # define SOLARIS 0 @@ -44,7 +38,7 @@ # endif #endif -#if defined(__STDC__) || defined(__GNUC__) || defined(_AIX51) +#if defined(__STDC__) || defined(__GNUC__) # define SIOCADAFR _IOW('r', 60, struct ipfobj) # define SIOCRMAFR _IOW('r', 61, struct ipfobj) # define SIOCSETFF _IOW('r', 62, u_int) @@ -458,9 +452,6 @@ typedef struct fr_info { mb_t *fin_qfm; /* pointer to mblk where pkt starts */ void *fin_qpi; char fin_ifname[LIFNAMSIZ]; -#endif -#ifdef __sgi - void *fin_hbuf; #endif void *fin_fraghdr; /* pointer to start of ipv6 frag hdr */ } fr_info_t; @@ -1425,10 +1416,6 @@ typedef struct ipftune { /* ** HPUX Port */ -#ifdef __hpux -/* HP-UX locking sequence deadlock detection module lock MAJOR ID */ -# define IPF_SMAJ 0 /* temp assignment XXX, not critical */ -#endif #if !defined(CDEV_MAJOR) && defined (__FreeBSD_version) && \ (__FreeBSD_version >= 220000) @@ -1624,22 +1611,14 @@ typedef struct ipf_main_softc_s { frentry_t *ipf_rule_explist[2]; ipftoken_t *ipf_token_head; ipftoken_t **ipf_token_tail; -#if defined(__FreeBSD_version) && (__FreeBSD_version >= 300000) && \ - defined(_KERNEL) +#if defined(__FreeBSD_version) && defined(_KERNEL) struct callout ipf_slow_ch; #endif -#if defined(linux) && defined(_KERNEL) - struct timer_list ipf_timer; -#endif #if NETBSD_GE_REV(104040000) struct callout ipf_slow_ch; #endif #if SOLARIS -# if SOLARIS2 >= 7 timeout_id_t ipf_slow_ch; -# else - int ipf_slow_ch; -# endif #endif #if defined(_KERNEL) # if SOLARIS @@ -1662,12 +1641,7 @@ typedef struct ipf_main_softc_s { hook_t *ipf_hk_loop_v6_out; # endif # else -# if defined(linux) && defined(_KERNEL) - struct poll_table_struct ipf_selwait[IPL_LOGSIZE]; - wait_queue_head_t iplh_linux[IPL_LOGSIZE]; -# else struct selinfo ipf_selwait[IPL_LOGSIZE]; -# endif # endif #endif void *ipf_slow; @@ -1697,67 +1671,27 @@ extern void ipfilterattach __P((int)); extern int ipl_enable __P((void)); extern int ipl_disable __P((void)); # ifdef MENTAT +/* XXX MENTAT is always defined for Solaris */ extern int ipf_check __P((void *, struct ip *, int, void *, int, void *, mblk_t **)); # if SOLARIS extern void ipf_prependmbt(fr_info_t *, mblk_t *); -# if SOLARIS2 >= 7 extern int ipfioctl __P((dev_t, int, intptr_t, int, cred_t *, int *)); -# else -extern int ipfioctl __P((dev_t, int, int *, int, cred_t *, int *)); -# endif -# endif -# ifdef __hpux -extern int ipfioctl __P((dev_t, int, caddr_t, int)); -extern int ipf_select __P((dev_t, int)); # endif extern int ipf_qout __P((queue_t *, mblk_t *)); # else /* MENTAT */ +/* XXX MENTAT is never defined for FreeBSD & NetBSD */ extern int ipf_check __P((void *, struct ip *, int, void *, int, mb_t **)); extern int (*fr_checkp) __P((ip_t *, int, void *, int, mb_t **)); extern size_t mbufchainlen __P((mb_t *)); -# ifdef __sgi -# include -extern int ipfioctl __P((dev_t, int, caddr_t, int, cred_t *, int *)); -extern int ipfilter_sgi_attach __P((void)); -extern void ipfilter_sgi_detach __P((void)); -extern void ipfilter_sgi_intfsync __P((void)); -# else # ifdef IPFILTER_LKM extern int ipf_identify __P((char *)); # endif -# if BSDOS_GE_REV(199510) || FREEBSD_GE_REV(220000) || \ - (defined(NetBSD) && (NetBSD >= 199511)) || defined(__OpenBSD__) -# if defined(__NetBSD__) || BSDOS_GE_REV(199701) || \ - defined(__OpenBSD__) || FREEBSD_GE_REV(300000) -# if (__FreeBSD_version >= 500024) -# if (__FreeBSD_version >= 502116) +# if defined(__FreeBSD_version) extern int ipfioctl __P((struct cdev*, u_long, caddr_t, int, struct thread *)); -# else -extern int ipfioctl __P((dev_t, u_long, caddr_t, int, struct thread *)); -# endif /* __FreeBSD_version >= 502116 */ -# else -# if NETBSD_GE_REV(499001000) +# elif defined(__NetBSD__) extern int ipfioctl __P((dev_t, u_long, void *, int, struct lwp *)); -# else -# if NETBSD_GE_REV(399001400) -extern int ipfioctl __P((dev_t, u_long, caddr_t, int, struct lwp *)); -# else -extern int ipfioctl __P((dev_t, u_long, caddr_t, int, struct proc *)); -# endif -# endif -# endif /* __FreeBSD_version >= 500024 */ -# else -extern int ipfioctl __P((dev_t, int, caddr_t, int, struct proc *)); -# endif -# else -# ifdef linux -extern int ipfioctl __P((struct inode *, struct file *, u_int, u_long)); -# else -extern int ipfioctl __P((dev_t, int, caddr_t, int)); -# endif -# endif /* (_BSDI_VERSION >= 199510) */ -# endif /* __ sgi */ +# endif # endif /* MENTAT */ # if defined(__FreeBSD_version) diff --git a/sys/contrib/ipfilter/netinet/ip_frag.c b/sys/contrib/ipfilter/netinet/ip_frag.c index 14b75e2d6a90..80cd5eccaa79 100644 --- a/sys/contrib/ipfilter/netinet/ip_frag.c +++ b/sys/contrib/ipfilter/netinet/ip_frag.c @@ -16,30 +16,21 @@ #include #include #include -#ifdef __hpux -# include -#endif #if !defined(_KERNEL) # include # include # include # define _KERNEL -# ifdef __OpenBSD__ -struct file; -# endif # include # undef _KERNEL #endif -#if defined(_KERNEL) && \ - defined(__FreeBSD_version) && (__FreeBSD_version >= 220000) +#if defined(_KERNEL) && defined(__FreeBSD_version) # include # include #else # include #endif -#if !defined(linux) # include -#endif #include #if defined(_KERNEL) # include @@ -48,7 +39,7 @@ struct file; # endif #endif #if !defined(__SVR4) && !defined(__svr4__) -# if defined(_KERNEL) && !defined(__sgi) && !defined(AIX) +# if defined(_KERNEL) # include # endif #else @@ -66,9 +57,7 @@ struct file; #include #include #include -#if !defined(linux) # include -#endif #include #include #include diff --git a/sys/contrib/ipfilter/netinet/ip_ftp_pxy.c b/sys/contrib/ipfilter/netinet/ip_ftp_pxy.c index 0fc008d6d897..c2cd6fc31e40 100644 --- a/sys/contrib/ipfilter/netinet/ip_ftp_pxy.c +++ b/sys/contrib/ipfilter/netinet/ip_ftp_pxy.c @@ -1308,11 +1308,7 @@ ipf_p_ftp_process(softf, fin, nat, ftp, rv) t = &ftp->ftp_side[1 - rv]; thseq = ntohl(tcp->th_seq); thack = ntohl(tcp->th_ack); -#ifdef __sgi - mlen = fin->fin_plen - off; -#else mlen = MSGDSIZE(m) - off; -#endif DT3(process_debug, tcphdr_t *, tcp, int, off, int, mlen); if (softf->ipf_p_ftp_debug & DEBUG_INFO) @@ -1609,11 +1605,7 @@ ipf_p_ftp_process(softf, fin, nat, ftp, rv) if (tcp->th_flags & TH_FIN) f->ftps_seq[1]++; if (softf->ipf_p_ftp_debug & DEBUG_PARSE_INFO) { -#ifdef __sgi - mlen = fin->fin_plen; -#else mlen = MSGDSIZE(m); -#endif mlen -= off; printf("ftps_seq[1] = %x inc %d len %d\n", f->ftps_seq[1], inc, mlen); diff --git a/sys/contrib/ipfilter/netinet/ip_htable.c b/sys/contrib/ipfilter/netinet/ip_htable.c index 62707f40edd2..0786355cd87a 100644 --- a/sys/contrib/ipfilter/netinet/ip_htable.c +++ b/sys/contrib/ipfilter/netinet/ip_htable.c @@ -20,22 +20,18 @@ # include # include # define _KERNEL -# ifdef __OpenBSD__ -struct file; -# endif # include # undef _KERNEL #endif #include -#if defined(__FreeBSD_version) && (__FreeBSD_version >= 300000) +#if defined(__FreeBSD_version) # include #endif #if defined(__FreeBSD__) # include # include #endif -#if !defined(__svr4__) && !defined(__SVR4) && !defined(__hpux) && \ - !defined(linux) +#if !defined(__SVR4) # include #endif #if defined(_KERNEL) diff --git a/sys/contrib/ipfilter/netinet/ip_irc_pxy.c b/sys/contrib/ipfilter/netinet/ip_irc_pxy.c index b9954b4c067a..1b788720f3f7 100644 --- a/sys/contrib/ipfilter/netinet/ip_irc_pxy.c +++ b/sys/contrib/ipfilter/netinet/ip_irc_pxy.c @@ -278,11 +278,7 @@ ipf_p_irc_send(fin, nat) bzero(ctcpbuf, sizeof(ctcpbuf)); off = (char *)tcp - (char *)ip + (TCP_OFF(tcp) << 2) + fin->fin_ipoff; -#ifdef __sgi - dlen = fin->fin_plen - off; -#else dlen = MSGDSIZE(m) - off; -#endif if (dlen <= 0) return 0; COPYDATA(m, off, MIN(sizeof(ctcpbuf), dlen), ctcpbuf); @@ -361,7 +357,7 @@ ipf_p_irc_send(fin, nat) fin->fin_flx |= FI_DOCKSUM; if (inc != 0) { -#if defined(MENTAT) || defined(__sgi) +#if defined(MENTAT) register u_32_t sum1, sum2; sum1 = fin->fin_plen; diff --git a/sys/contrib/ipfilter/netinet/ip_log.c b/sys/contrib/ipfilter/netinet/ip_log.c index 2d600840a971..76d2e3def1fa 100644 --- a/sys/contrib/ipfilter/netinet/ip_log.c +++ b/sys/contrib/ipfilter/netinet/ip_log.c @@ -19,7 +19,7 @@ # include #endif #ifndef SOLARIS -# if defined(sun) && (defined(__svr4__) || defined(__SVR4)) +# if defined(sun) && defined(__SVR4) # define SOLARIS 1 # else # define SOLARIS 0 @@ -35,15 +35,11 @@ # include # define _KERNEL # define KERNEL -# ifdef __OpenBSD__ -struct file; -# endif # include # undef _KERNEL # undef KERNEL #endif -#if (defined(__FreeBSD_version) && (__FreeBSD_version >= 220000)) && \ - defined(_KERNEL) +#if defined(__FreeBSD_version) && defined(_KERNEL) # include # include #else @@ -56,21 +52,15 @@ struct file; # include # endif #endif /* _KERNEL */ -#if !SOLARIS && !defined(__hpux) && !defined(linux) -# if (defined(NetBSD) && (NetBSD > 199609)) || \ - (defined(OpenBSD) && (OpenBSD > 199603)) || \ - (defined(__FreeBSD_version) && (__FreeBSD_version >= 300000)) +# if defined(NetBSD) || defined(__FreeBSD_version) # include -# else -# include -# endif # include # include -# if __FreeBSD_version >= 500000 +# endif +# if defined(__FreeBSD_version) # include # endif -#else -# if !defined(__hpux) && defined(_KERNEL) +#if SOLARIS && defined(_KERNEL) # include # include # include @@ -80,24 +70,18 @@ struct file; # include # include # include -# endif /* !__hpux */ -#endif /* !SOLARIS && !__hpux */ -#if !defined(linux) +#endif /* SOLARIS && _KERNEL */ # include -#endif #include #include #ifdef sun # include #endif -#if __FreeBSD_version >= 300000 +#if defined(__FreeBSD_version) # include #endif #include -#ifdef __sgi -# include -#endif # include #include #include @@ -107,9 +91,7 @@ struct file; #ifdef USE_INET6 # include #endif -#if !defined(linux) # include -#endif #ifndef _KERNEL # include #endif @@ -120,7 +102,7 @@ struct file; #include "netinet/ip_frag.h" #include "netinet/ip_state.h" #include "netinet/ip_auth.h" -#if (__FreeBSD_version >= 300000) || defined(__NetBSD__) +#if defined(__FreeBSD_version) || defined(__NetBSD__) # include #endif /* END OF INCLUDES */ @@ -138,12 +120,6 @@ typedef struct ipf_log_softc_s { ipfmutex_t ipl_mutex[IPL_LOGSIZE]; # if SOLARIS && defined(_KERNEL) kcondvar_t ipl_wait[IPL_LOGSIZE]; -# endif -# if defined(linux) && defined(_KERNEL) - wait_queue_head_t iplh_linux[IPL_LOGSIZE]; -# endif -# if defined(__hpux) && defined(_KERNEL) - iplog_select_t ipl_ss[IPL_LOGSIZE]; # endif iplog_t **iplh[IPL_LOGSIZE]; iplog_t *iplt[IPL_LOGSIZE]; @@ -386,11 +362,11 @@ ipf_log_pkt(fin, flags) ipflog_t ipfl; u_char p; mb_t *m; -# if (SOLARIS || defined(__hpux)) && defined(_KERNEL) && !defined(FW_HOOKS) +# if SOLARIS && defined(_KERNEL) && !defined(FW_HOOKS) qif_t *ifp; # else struct ifnet *ifp; -# endif /* SOLARIS || __hpux */ +# endif /* SOLARIS */ m = fin->fin_m; if (m == NULL) @@ -460,14 +436,14 @@ ipf_log_pkt(fin, flags) * Get the interface number and name to which this packet is * currently associated. */ -# if (SOLARIS || defined(__hpux)) && defined(_KERNEL) +# if SOLARIS && defined(_KERNEL) # if !defined(FW_HOOKS) ipfl.fl_unit = (u_int)ifp->qf_ppa; # endif COPYIFNAME(fin->fin_v, ifp, ipfl.fl_ifname); # else # if (defined(NetBSD) && (NetBSD <= 1991011) && (NetBSD >= 199603)) || \ - OPENBSD_GE_REV(199603) || defined(linux) || FREEBSD_GE_REV(501113) + defined(__FreeBSD_version) COPYIFNAME(fin->fin_v, ifp, ipfl.fl_ifname); # else ipfl.fl_unit = (u_int)ifp->if_unit; @@ -738,32 +714,9 @@ ipf_log_read(softc, unit, uio) return EINTR; } # else -# if defined(__hpux) && defined(_KERNEL) - lock_t *l; - -# ifdef IPL_SELECT - if (uio->uio_fpflags & (FNBLOCK|FNDELAY)) { - /* this is no blocking system call */ - softl->ipl_readers[unit]--; - MUTEX_EXIT(&softl->ipl_mutex[unit]); - return 0; - } -# endif - - MUTEX_EXIT(&softl->ipl_mutex[unit]); - l = get_sleep_lock(&softl->iplh[unit]); - error = sleep(&softl->iplh[unit], PZERO+1); - spinunlock(l); -# else -# if defined(__osf__) && defined(_KERNEL) - error = mpsleep(&softl->iplh[unit], PSUSP|PCATCH, "ipfread", 0, - &softl->ipl_mutex, MS_LOCK_SIMPLE); -# else MUTEX_EXIT(&softl->ipl_mutex[unit]); SPL_X(s); error = SLEEP(unit + softl->iplh, "ipl sleep"); -# endif /* __osf__ */ -# endif /* __hpux */ SPL_NET(s); MUTEX_ENTER(&softl->ipl_mutex[unit]); if (error) { @@ -781,8 +734,7 @@ ipf_log_read(softc, unit, uio) return EIO; } -# if (defined(BSD) && (BSD >= 199101)) || defined(__FreeBSD__) || \ - defined(__osf__) +# if (defined(BSD) && (BSD >= 199101)) || defined(__FreeBSD__) uio->uio_rw = UIO_READ; # endif diff --git a/sys/contrib/ipfilter/netinet/ip_lookup.c b/sys/contrib/ipfilter/netinet/ip_lookup.c index 45999e0447ff..046939146075 100644 --- a/sys/contrib/ipfilter/netinet/ip_lookup.c +++ b/sys/contrib/ipfilter/netinet/ip_lookup.c @@ -10,15 +10,12 @@ # define KERNEL 1 # define _KERNEL 1 #endif -#if defined(__osf__) -# define _PROTO_NET_H_ -#endif #include #include #include #include #include -#if __FreeBSD_version >= 220000 && defined(_KERNEL) +#if defined(__FreeBSD_version) && defined(_KERNEL) # include # include #else @@ -29,9 +26,6 @@ # include # include # define _KERNEL -# ifdef __OpenBSD__ -struct file; -# endif # include # undef _KERNEL #endif @@ -43,7 +37,7 @@ struct file; #endif #if defined(_KERNEL) # include -# if !defined(__SVR4) && !defined(__svr4__) +# if !defined(__SVR4) # include # endif #else diff --git a/sys/contrib/ipfilter/netinet/ip_nat.c b/sys/contrib/ipfilter/netinet/ip_nat.c index eb41753b027c..9139ff495f44 100644 --- a/sys/contrib/ipfilter/netinet/ip_nat.c +++ b/sys/contrib/ipfilter/netinet/ip_nat.c @@ -31,27 +31,22 @@ struct file; # include # undef KERNEL #endif -#if defined(_KERNEL) && \ - defined(__FreeBSD_version) && (__FreeBSD_version >= 220000) +#if defined(_KERNEL) && defined(__FreeBSD_version) # include # include #else # include #endif -#if !defined(AIX) # include -#endif -#if !defined(linux) # include -#endif #include #if defined(_KERNEL) # include -# if !defined(__SVR4) && !defined(__svr4__) +# if !defined(__SVR4) # include # endif #endif -#if defined(__SVR4) || defined(__svr4__) +#if defined(__SVR4) # include # include # ifdef KERNEL @@ -60,11 +55,11 @@ struct file; # include # include #endif -#if __FreeBSD_version >= 300000 +#if defined(__FreeBSD_version) # include #endif #include -#if __FreeBSD_version >= 300000 +#if defined(__FreeBSD_version) # include #endif #ifdef sun @@ -80,9 +75,7 @@ struct file; extern struct ifnet vpnif; #endif -#if !defined(linux) # include -#endif #include #include #include @@ -97,7 +90,7 @@ extern struct ifnet vpnif; #include "netinet/ip_lookup.h" #include "netinet/ip_dstlist.h" #include "netinet/ip_sync.h" -#if FREEBSD_GE_REV(300000) +#if defined(__FreeBSD_version) # include #endif #ifdef HAS_SYS_MD5_H @@ -1024,7 +1017,7 @@ ipf_nat_ioctl(softc, data, cmd, mode, uid, ctx) KAUTH_REQ_NETWORK_FIREWALL_FW, NULL, NULL, NULL)) # else -# if defined(__FreeBSD_version) && (__FreeBSD_version >= 500034) +# if defined(__FreeBSD_version) if (securelevel_ge(curthread->td_ucred, 3) && (mode & FWRITE)) # else if ((securelevel >= 3) && (mode & FWRITE)) @@ -1036,11 +1029,7 @@ ipf_nat_ioctl(softc, data, cmd, mode, uid, ctx) } #endif -#if defined(__osf__) && defined(_KERNEL) - getlock = 0; -#else getlock = (mode & NAT_LOCKHELD) ? 0 : 1; -#endif n = NULL; nt = NULL; @@ -3302,7 +3291,7 @@ ipf_nat_finalise(fin, nat) u_32_t sum1, sum2, sumd; frentry_t *fr; u_32_t flags; -#if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6) && defined(ICK_M_CTL_MAGIC) +#if SOLARIS && defined(_KERNEL) && defined(ICK_M_CTL_MAGIC) qpktinfo_t *qpi = fin->fin_qpi; #endif @@ -5234,8 +5223,8 @@ ipf_nat_out(fin, nat, natadd, nflags) uh = (udphdr_t *)(ip + 1); uh->uh_ulen += fin->fin_plen; uh->uh_ulen = htons(uh->uh_ulen); -#if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \ - defined(linux) || defined(BRIDGE_IPF) || defined(__FreeBSD__) +#if !defined(_KERNEL) || defined(MENTAT) || \ + defined(BRIDGE_IPF) || defined(__FreeBSD__) ipf_fix_outcksum(0, &ip->ip_sum, sumd, 0); #endif @@ -5655,8 +5644,7 @@ ipf_nat_in(fin, nat, natadd, nflags) } fin->fin_ip->ip_dst = nat->nat_osrcip; fin->fin_daddr = nat->nat_osrcaddr; -#if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \ - defined(__osf__) || defined(linux) +#if !defined(_KERNEL) || defined(MENTAT) ipf_fix_incksum(0, &fin->fin_ip->ip_sum, ipsumd, 0); #endif break; @@ -5688,8 +5676,7 @@ ipf_nat_in(fin, nat, natadd, nflags) sum2 += ntohs(ip->ip_off) & IP_DF; CALC_SUMD(sum1, sum2, sumd); -#if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \ - defined(__osf__) || defined(linux) +#if !defined(_KERNEL) || defined(MENTAT) ipf_fix_outcksum(0, &ip->ip_sum, sumd, 0); #endif PREP_MB_T(fin, m); @@ -6208,27 +6195,6 @@ ipf_nat_log(softc, softn, nat, action) } -#if defined(__OpenBSD__) -/* ------------------------------------------------------------------------ */ -/* Function: ipf_nat_ifdetach */ -/* Returns: Nil */ -/* Parameters: ifp(I) - pointer to network interface */ -/* */ -/* Compatibility interface for OpenBSD to trigger the correct updating of */ -/* interface references within IPFilter. */ -/* ------------------------------------------------------------------------ */ -void -ipf_nat_ifdetach(ifp) - void *ifp; -{ - ipf_main_softc_t *softc; - - softc = ipf_get_softc(0); - - ipf_sync(ifp); - return; -} -#endif /* ------------------------------------------------------------------------ */ @@ -7459,8 +7425,7 @@ ipf_nat_decap(fin, nat) CALC_SUMD(sum1, sum2, sumd); fin->fin_ip->ip_dst = nat->nat_osrcip; fin->fin_daddr = nat->nat_osrcaddr; -#if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \ - defined(__osf__) || defined(linux) +#if !defined(_KERNEL) || defined(MENTAT) ipf_fix_outcksum(0, &fin->fin_ip->ip_sum, sumd, 0); #endif } diff --git a/sys/contrib/ipfilter/netinet/ip_nat.h b/sys/contrib/ipfilter/netinet/ip_nat.h index 6e245f81e727..e65b1681a5f5 100644 --- a/sys/contrib/ipfilter/netinet/ip_nat.h +++ b/sys/contrib/ipfilter/netinet/ip_nat.h @@ -14,7 +14,7 @@ #define __IP_NAT_H__ #ifndef SOLARIS -# if defined(sun) && (defined(__svr4__) || defined(__SVR4)) +# if defined(sun) && defined(__SVR4) # define SOLARIS 1 # else # define SOLARIS 0 @@ -694,9 +694,6 @@ extern int ipf_nat_hostmap_rehash __P((ipf_main_softc_t *, ipftuneable_t *, ipftuneval_t *)); extern nat_t *ipf_nat_icmperrorlookup __P((fr_info_t *, int)); extern nat_t *ipf_nat_icmperror __P((fr_info_t *, u_int *, int)); -#if defined(__OpenBSD__) -extern void ipf_nat_ifdetach __P((void *)); -#endif extern int ipf_nat_init __P((void)); extern nat_t *ipf_nat_inlookup __P((fr_info_t *, u_int, u_int, struct in_addr, struct in_addr)); diff --git a/sys/contrib/ipfilter/netinet/ip_nat6.c b/sys/contrib/ipfilter/netinet/ip_nat6.c index 5985d6f6566c..19f57868db43 100644 --- a/sys/contrib/ipfilter/netinet/ip_nat6.c +++ b/sys/contrib/ipfilter/netinet/ip_nat6.c @@ -29,26 +29,22 @@ struct file; # include # undef _KERNEL #endif -#if defined(_KERNEL) && (__FreeBSD_version >= 220000) +#if defined(_KERNEL) && defined(__FreeBSD_version) # include # include #else # include #endif -#if !defined(AIX) # include -#endif -#if !defined(linux) # include -#endif #include #if defined(_KERNEL) # include -# if !defined(__SVR4) && !defined(__svr4__) +# if !defined(__SVR4) # include # endif #endif -#if defined(__SVR4) || defined(__svr4__) +#if defined(__SVR4) # include # include # ifdef _KERNEL @@ -57,11 +53,11 @@ struct file; # include # include #endif -#if __FreeBSD_version >= 300000 +#if defined(__FreeBSD_version) # include #endif #include -#if __FreeBSD_version >= 300000 +#if defined(__FreeBSD_version) # include #endif #ifdef sun @@ -78,9 +74,7 @@ struct file; extern struct ifnet vpnif; #endif -#if !defined(linux) # include -#endif #include #include #include @@ -94,7 +88,7 @@ extern struct ifnet vpnif; #include "netinet/ip_lookup.h" #include "netinet/ip_dstlist.h" #include "netinet/ip_sync.h" -#if (__FreeBSD_version >= 300000) +#if defined(__FreeBSD_version) # include #endif #ifdef HAS_SYS_MD5_H @@ -970,7 +964,7 @@ ipf_nat6_add(fin, np, natsave, flags, direction) u_int nflags; natinfo_t ni; int move; -#if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6) && defined(ICK_M_CTL_MAGIC) +#if SOLARIS && defined(_KERNEL) && defined(ICK_M_CTL_MAGIC) qpktinfo_t *qpi = fin->fin_qpi; #endif diff --git a/sys/contrib/ipfilter/netinet/ip_pool.c b/sys/contrib/ipfilter/netinet/ip_pool.c index 2a43cdb00bfa..fd511fcd2d89 100644 --- a/sys/contrib/ipfilter/netinet/ip_pool.c +++ b/sys/contrib/ipfilter/netinet/ip_pool.c @@ -9,9 +9,6 @@ # define KERNEL 1 # define _KERNEL 1 #endif -#if defined(__osf__) -# define _PROTO_NET_H_ -#endif #include #include #include @@ -21,9 +18,6 @@ # include # include # define _KERNEL -# ifdef __OpenBSD__ -struct file; -# endif # include # undef _KERNEL #else @@ -36,7 +30,7 @@ struct file; #if defined(_KERNEL) && !defined(SOLARIS2) # include #endif -#if defined(__SVR4) || defined(__svr4__) +#if defined(__SVR4) # include # ifdef _KERNEL # include @@ -44,7 +38,7 @@ struct file; # include # include #endif -#if defined(__FreeBSD_version) && (__FreeBSD_version >= 300000) +#if defined(__FreeBSD_version) # include #endif diff --git a/sys/contrib/ipfilter/netinet/ip_proxy.c b/sys/contrib/ipfilter/netinet/ip_proxy.c index 359c29bdfd3e..29ecdd4201a4 100644 --- a/sys/contrib/ipfilter/netinet/ip_proxy.c +++ b/sys/contrib/ipfilter/netinet/ip_proxy.c @@ -16,43 +16,34 @@ #include #include #include -#if !defined(AIX) # include -#endif #if !defined(_KERNEL) && !defined(__KERNEL__) # include # include # include # include # define _KERNEL -# ifdef __OpenBSD__ -struct file; -# endif # include # undef _KERNEL #endif -#if !defined(linux) # include -#endif #include #if defined(_KERNEL) -# if !defined(__NetBSD__) && !defined(sun) && !defined(__osf__) && \ - !defined(__OpenBSD__) && !defined(__hpux) && !defined(__sgi) && \ - !defined(AIX) +#ifdef __FreeBSD_version # include # endif # include -# if !defined(__SVR4) && !defined(__svr4__) +# if !defined(__SVR4) # include # endif #endif -#if defined(_KERNEL) && (__FreeBSD_version >= 220000) +#if defined(_KERNEL) && defined(__FreeBSD_version) # include # include #else # include #endif -#if defined(__SVR4) || defined(__svr4__) +#if defined(__SVR4) # include # ifdef _KERNEL # include @@ -64,7 +55,7 @@ struct file; # include #endif #include -#if defined(__FreeBSD_version) && (__FreeBSD_version >= 800000) && defined(_KERNEL) +#if defined(__FreeBSD_version) && defined(_KERNEL) #include #else #define CURVNET_SET(arg) @@ -79,9 +70,7 @@ struct file; #include #include #include -#ifndef linux # include -#endif #include #include #include @@ -91,7 +80,7 @@ struct file; #include "netinet/ip_nat.h" #include "netinet/ip_state.h" #include "netinet/ip_proxy.h" -#if (__FreeBSD_version >= 300000) +#if defined(__FreeBSD_version) # include #endif @@ -925,7 +914,7 @@ ipf_proxy_check(fin, nat) ip_t *ip; short rv; int err; -#if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) +#if !defined(_KERNEL) || defined(MENTAT) u_32_t s1, s2, sd; #endif @@ -1017,7 +1006,7 @@ ipf_proxy_check(fin, nat) * packet. */ adjlen = APR_INC(err); -#if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) +#if !defined(_KERNEL) || defined(MENTAT) s1 = LONG_SUM(fin->fin_plen - adjlen); s2 = LONG_SUM(fin->fin_plen); CALC_SUMD(s1, s2, sd); diff --git a/sys/contrib/ipfilter/netinet/ip_raudio_pxy.c b/sys/contrib/ipfilter/netinet/ip_raudio_pxy.c index 031363793cea..62202ffc4488 100644 --- a/sys/contrib/ipfilter/netinet/ip_raudio_pxy.c +++ b/sys/contrib/ipfilter/netinet/ip_raudio_pxy.c @@ -105,11 +105,7 @@ ipf_p_raudio_out(arg, fin, aps, nat) off = (char *)tcp - (char *)fin->fin_ip; off += (TCP_OFF(tcp) << 2) + fin->fin_ipoff; -#ifdef __sgi - dlen = fin->fin_plen - off; -#else dlen = MSGDSIZE(m) - off; -#endif if (dlen <= 0) return 0; @@ -222,11 +218,7 @@ ipf_p_raudio_in(arg, fin, aps, nat) off = (char *)tcp - (char *)fin->fin_ip; off += (TCP_OFF(tcp) << 2) + fin->fin_ipoff; -#ifdef __sgi - dlen = fin->fin_plen - off; -#else dlen = MSGDSIZE(m) - off; -#endif if (dlen <= 0) return 0; diff --git a/sys/contrib/ipfilter/netinet/ip_scan.c b/sys/contrib/ipfilter/netinet/ip_scan.c index 5b7c77e4b102..34bc844eb354 100644 --- a/sys/contrib/ipfilter/netinet/ip_scan.c +++ b/sys/contrib/ipfilter/netinet/ip_scan.c @@ -10,9 +10,6 @@ # define _KERNEL 1 #endif #include -#if defined(__hpux) && (HPUXREV >= 1111) && !defined(_KERNEL) -# include -#endif #include #include #include @@ -20,21 +17,16 @@ # include # include # define _KERNEL -# ifdef __OpenBSD__ -struct file; -# endif # include # undef _KERNEL #else # include -# if !defined(__svr4__) && !defined(__SVR4) +# if !defined(__SVR4) # include # endif #endif #include -#if !defined(__hpux) && !defined(__osf__) && !defined(linux) && !defined(AIX) # include -#endif #ifdef __FreeBSD__ # include # include diff --git a/sys/contrib/ipfilter/netinet/ip_state.c b/sys/contrib/ipfilter/netinet/ip_state.c index 1bf190715340..f364c295e4c0 100644 --- a/sys/contrib/ipfilter/netinet/ip_state.c +++ b/sys/contrib/ipfilter/netinet/ip_state.c @@ -20,7 +20,7 @@ #include #include #if defined(_KERNEL) && defined(__FreeBSD_version) && \ - (__FreeBSD_version >= 400000) && !defined(KLD_MODULE) + !defined(KLD_MODULE) #include "opt_inet6.h" #endif #if !defined(_KERNEL) && !defined(__KERNEL__) @@ -28,30 +28,25 @@ # include # include # define _KERNEL -# ifdef __OpenBSD__ -struct file; -# endif # include # undef _KERNEL #endif -#if defined(_KERNEL) && (__FreeBSD_version >= 220000) +#if defined(_KERNEL) && defined(__FreeBSD_version) # include # include #else # include #endif #include -#if !defined(linux) # include -#endif #include #if defined(_KERNEL) # include -# if !defined(__SVR4) && !defined(__svr4__) +# if !defined(__SVR4) # include # endif #endif -#if defined(__SVR4) || defined(__svr4__) +#if defined(__SVR4) # include # include # ifdef _KERNEL @@ -69,9 +64,7 @@ struct file; #include #include #include -#if !defined(__hpux) && !defined(linux) # include -#endif #include #include #if !defined(_KERNEL) diff --git a/sys/contrib/ipfilter/netinet/ip_sync.c b/sys/contrib/ipfilter/netinet/ip_sync.c index 59094097864b..04018ed7b621 100644 --- a/sys/contrib/ipfilter/netinet/ip_sync.c +++ b/sys/contrib/ipfilter/netinet/ip_sync.c @@ -21,15 +21,12 @@ # include # define _KERNEL # define KERNEL -# ifdef __OpenBSD__ -struct file; -# endif # include # undef _KERNEL # undef KERNEL #else # include -# if !defined(__SVR4) && !defined(__svr4__) +# if !defined(__SVR4) # include # endif # include @@ -40,18 +37,16 @@ struct file; #if defined(__NetBSD__) && (__NetBSD_Version__ >= 104000000) # include #endif -#if defined(_KERNEL) && (__FreeBSD_version >= 220000) +#if defined(_KERNEL) && defined(__FreeBSD_version) # include # include #else # include #endif #include -#if !defined(linux) # include -#endif #include -#if defined(__SVR4) || defined(__svr4__) +#if defined(__SVR4) # include # include # ifdef _KERNEL @@ -69,12 +64,8 @@ struct file; #include #include #include -#if !defined(linux) # include -#endif -#if !defined(__hpux) && !defined(linux) # include -#endif #include #include #include "netinet/ip_compat.h" @@ -88,7 +79,7 @@ struct file; #ifdef USE_INET6 #include #endif -#if (__FreeBSD_version >= 300000) +#if defined(__FreeBSD_version) # include # if defined(_KERNEL) && !defined(IPFILTER_LKM) # include @@ -111,9 +102,6 @@ typedef struct ipf_sync_softc_s { ipfrwlock_t ipf_syncnat; #if SOLARIS && defined(_KERNEL) kcondvar_t ipslwait; -#endif -#if defined(linux) && defined(_KERNEL) - wait_queue_head_t sl_tail_linux; #endif synclist_t **syncstatetab; synclist_t **syncnattab; @@ -308,7 +296,7 @@ ipf_sync_soft_destroy(softc, arg) } -# if !defined(sparc) && !defined(__hppa) +# if !defined(sparc) /* ------------------------------------------------------------------------ */ /* Function: ipf_sync_tcporder */ /* Returns: Nil */ @@ -418,11 +406,11 @@ ipf_sync_storder(way, ips) ips->is_smsk[1] = ntohl(ips->is_smsk[1]); } } -# else /* !defined(sparc) && !defined(__hppa) */ +# else /* !defined(sparc) */ # define ipf_sync_tcporder(x,y) # define ipf_sync_natorder(x,y) # define ipf_sync_storder(x,y) -# endif /* !defined(sparc) && !defined(__hppa) */ +# endif /* !defined(sparc) */ /* ------------------------------------------------------------------------ */ @@ -449,7 +437,7 @@ ipf_sync_write(softc, uio) int err = 0; -# if BSD_GE_YEAR(199306) || defined(__FreeBSD__) || defined(__osf__) +# if BSD_GE_YEAR(199306) || defined(__FreeBSD__) uio->uio_rw = UIO_WRITE; # endif @@ -597,7 +585,7 @@ ipf_sync_read(softc, uio) return EINVAL; } -# if BSD_GE_YEAR(199306) || defined(__FreeBSD__) || defined(__osf__) +# if BSD_GE_YEAR(199306) || defined(__FreeBSD__) uio->uio_rw = UIO_READ; # endif @@ -612,28 +600,6 @@ ipf_sync_read(softc, uio) return EINTR; } # else -# ifdef __hpux - { - lock_t *l; - - l = get_sleep_lock(&softs->sl_tail); - err = sleep(&softs->sl_tail, PZERO+1); - if (err) { - MUTEX_EXIT(&softs->ipsl_mutex); - IPFERROR(110010); - return EINTR; - } - spinunlock(l); - } -# else /* __hpux */ -# ifdef __osf__ - err = mpsleep(&softs->sl_tail, PSUSP|PCATCH, "ipl sleep", 0, - &softs->ipsl_mutex, MS_LOCK_SIMPLE); - if (err) { - IPFERROR(110011); - return EINTR; - } -# else MUTEX_EXIT(&softs->ipsl_mutex); err = SLEEP(&softs->sl_tail, "ipl sleep"); if (err) { @@ -641,8 +607,6 @@ ipf_sync_read(softc, uio) return EINTR; } MUTEX_ENTER(&softs->ipsl_mutex); -# endif /* __osf__ */ -# endif /* __hpux */ # endif /* SOLARIS */ # endif /* _KERNEL */ } From e559413d6f7c264deb3ce6845d39a3325ed3bfa0 Mon Sep 17 00:00:00 2001 From: Cy Schubert Date: Sun, 3 Feb 2019 05:26:01 +0000 Subject: [PATCH 85/90] Remove a redundant ip_compat.h, originally merged from upstream. MFC after: 1 month --- contrib/ipfilter/ipsend/.OLD/ip_compat.h | 126 ----------------------- 1 file changed, 126 deletions(-) delete mode 100644 contrib/ipfilter/ipsend/.OLD/ip_compat.h diff --git a/contrib/ipfilter/ipsend/.OLD/ip_compat.h b/contrib/ipfilter/ipsend/.OLD/ip_compat.h deleted file mode 100644 index b77cde636a98..000000000000 --- a/contrib/ipfilter/ipsend/.OLD/ip_compat.h +++ /dev/null @@ -1,126 +0,0 @@ -/* $FreeBSD$ */ - -/* - * (C)opyright 1995 by Darren Reed. - * - * This code may be freely distributed as long as it retains this notice - * and is not changed in any way. The author accepts no responsibility - * for the use of this software. I hate legaleese, don't you ? - * - * @(#)ip_compat.h 1.2 12/7/95 - */ - -/* - * These #ifdef's are here mainly for linux, but who knows, they may - * not be in other places or maybe one day linux will grow up and some - * of these will turn up there too. - */ -#ifndef ICMP_UNREACH -# define ICMP_UNREACH ICMP_DEST_UNREACH -#endif -#ifndef ICMP_SOURCEQUENCH -# define ICMP_SOURCEQUENCH ICMP_SOURCE_QUENCH -#endif -#ifndef ICMP_TIMXCEED -# define ICMP_TIMXCEED ICMP_TIME_EXCEEDED -#endif -#ifndef ICMP_PARAMPROB -# define ICMP_PARAMPROB ICMP_PARAMETERPROB -#endif -#ifndef IPVERSION -# define IPVERSION 4 -#endif -#ifndef IPOPT_MINOFF -# define IPOPT_MINOFF 4 -#endif -#ifndef IPOPT_COPIED -# define IPOPT_COPIED(x) ((x)&0x80) -#endif -#ifndef IPOPT_EOL -# define IPOPT_EOL 0 -#endif -#ifndef IPOPT_NOP -# define IPOPT_NOP 1 -#endif -#ifndef IP_MF -# define IP_MF ((u_short)0x2000) -#endif -#ifndef ETHERTYPE_IP -# define ETHERTYPE_IP ((u_short)0x0800) -#endif -#ifndef TH_FIN -# define TH_FIN 0x01 -#endif -#ifndef TH_SYN -# define TH_SYN 0x02 -#endif -#ifndef TH_RST -# define TH_RST 0x04 -#endif -#ifndef TH_PUSH -# define TH_PUSH 0x08 -#endif -#ifndef TH_ACK -# define TH_ACK 0x10 -#endif -#ifndef TH_URG -# define TH_URG 0x20 -#endif -#ifndef IPOPT_EOL -# define IPOPT_EOL 0 -#endif -#ifndef IPOPT_NOP -# define IPOPT_NOP 1 -#endif -#ifndef IPOPT_RR -# define IPOPT_RR 7 -#endif -#ifndef IPOPT_TS -# define IPOPT_TS 68 -#endif -#ifndef IPOPT_SECURITY -# define IPOPT_SECURITY 130 -#endif -#ifndef IPOPT_LSRR -# define IPOPT_LSRR 131 -#endif -#ifndef IPOPT_SATID -# define IPOPT_SATID 136 -#endif -#ifndef IPOPT_SSRR -# define IPOPT_SSRR 137 -#endif -#ifndef IPOPT_SECUR_UNCLASS -# define IPOPT_SECUR_UNCLASS ((u_short)0x0000) -#endif -#ifndef IPOPT_SECUR_CONFID -# define IPOPT_SECUR_CONFID ((u_short)0xf135) -#endif -#ifndef IPOPT_SECUR_EFTO -# define IPOPT_SECUR_EFTO ((u_short)0x789a) -#endif -#ifndef IPOPT_SECUR_MMMM -# define IPOPT_SECUR_MMMM ((u_short)0xbc4d) -#endif -#ifndef IPOPT_SECUR_RESTR -# define IPOPT_SECUR_RESTR ((u_short)0xaf13) -#endif -#ifndef IPOPT_SECUR_SECRET -# define IPOPT_SECUR_SECRET ((u_short)0xd788) -#endif -#ifndef IPOPT_SECUR_TOPSECRET -# define IPOPT_SECUR_TOPSECRET ((u_short)0x6bc5) -#endif - - -typedef struct udphdr udphdr_t; -typedef struct tcphdr tcphdr_t; -typedef struct ip ip_t; -typedef struct ether_header ether_header_t; - - -#if defined(__SVR4) || defined(__svr4__) -# define bcopy(a,b,c) memmove(b,a,c) -# define bcmp(a,b,c) memcmp(a,b,c) -# define bzero(a,b) memset(a,0,b) -#endif From e82e8246fc1ab8b3892f69fbdd4ec546f617e2ff Mon Sep 17 00:00:00 2001 From: Cy Schubert Date: Sun, 3 Feb 2019 05:26:04 +0000 Subject: [PATCH 86/90] Remove a reference to HP-UX in a comment. MFC after: 1 month --- sys/contrib/ipfilter/netinet/fil.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/contrib/ipfilter/netinet/fil.c b/sys/contrib/ipfilter/netinet/fil.c index ec786bd9fb44..0036c3a55917 100644 --- a/sys/contrib/ipfilter/netinet/fil.c +++ b/sys/contrib/ipfilter/netinet/fil.c @@ -2784,7 +2784,7 @@ ipf_firewall(fin, passp) /* out(I) - 0 == packet going in, 1 == packet going out */ /* mp(IO) - pointer to caller's buffer pointer that holds this */ /* IP packet. */ -/* Solaris & HP-UX ONLY : */ +/* Solaris: */ /* qpi(I) - pointer to STREAMS queue information for this */ /* interface & direction. */ /* */ From e9a5006bff7649603eca4271b166b3453a906171 Mon Sep 17 00:00:00 2001 From: Cy Schubert Date: Sun, 3 Feb 2019 05:26:07 +0000 Subject: [PATCH 87/90] Kernel module shim sources have no business being in the userland build directory, especially those for other operating systems. The kernel module shims for other operating systems are hereby removed. The kernel module shim for FreeBSD, mlfk_ipl.c, is already in sys/contrib/ipfilter/netinet. The one here is never used and should not be in the userland build directory either. mlfk_rule.c isn't used either however we will keep it in case someone wishes to use this shim to load rules via a kernel module, handy for embedded. In that case it should be copied to sys/contrib/ipfilter/netinet and a Makefile created to employ it. (Probably a useful documentation project when time permits.) MFC after: 1 month --- contrib/ipfilter/ml_ipl.c | 164 ---------- contrib/ipfilter/mlf_ipl.c | 596 ----------------------------------- contrib/ipfilter/mlf_rule.c | 168 ---------- contrib/ipfilter/mlfk_ipl.c | 529 ------------------------------- contrib/ipfilter/mlh_rule.c | 114 ------- contrib/ipfilter/mln_ipl.c | 355 --------------------- contrib/ipfilter/mln_rule.c | 83 ----- contrib/ipfilter/mlo_ipl.c | 364 --------------------- contrib/ipfilter/mlo_rule.c | 80 ----- contrib/ipfilter/mls_ipl.c | 351 --------------------- contrib/ipfilter/mls_rule.c | 116 ------- contrib/ipfilter/mlso_rule.c | 130 -------- 12 files changed, 3050 deletions(-) delete mode 100644 contrib/ipfilter/ml_ipl.c delete mode 100644 contrib/ipfilter/mlf_ipl.c delete mode 100644 contrib/ipfilter/mlf_rule.c delete mode 100644 contrib/ipfilter/mlfk_ipl.c delete mode 100644 contrib/ipfilter/mlh_rule.c delete mode 100644 contrib/ipfilter/mln_ipl.c delete mode 100644 contrib/ipfilter/mln_rule.c delete mode 100644 contrib/ipfilter/mlo_ipl.c delete mode 100644 contrib/ipfilter/mlo_rule.c delete mode 100644 contrib/ipfilter/mls_ipl.c delete mode 100644 contrib/ipfilter/mls_rule.c delete mode 100644 contrib/ipfilter/mlso_rule.c diff --git a/contrib/ipfilter/ml_ipl.c b/contrib/ipfilter/ml_ipl.c deleted file mode 100644 index aaf61a419c06..000000000000 --- a/contrib/ipfilter/ml_ipl.c +++ /dev/null @@ -1,164 +0,0 @@ -/* $FreeBSD$ */ - -/* - * Copyright (C) 2012 by Darren Reed. - * - * See the IPFILTER.LICENCE file for details on licencing. - */ -/* - * 29/12/94 Added code from Marc Huber to allow it to allocate - * its own major char number! Way cool patch! - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#if defined(sun4c) || defined(sun4m) -#include -#endif - -#ifndef IPL_NAME -#define IPL_NAME "/dev/ipf" -#endif - -extern int ipfattach(), ipfopen(), ipfclose(), ipfioctl(), ipfread(); -extern int nulldev(), ipfidentify(), errno; - -struct cdevsw ipfdevsw = -{ - ipfopen, ipfclose, ipfread, nulldev, - ipfioctl, nulldev, nulldev, nulldev, - 0, nulldev, -}; - - -struct dev_ops ipf_ops = -{ - 1, - ipfidentify, - ipfattach, - ipfopen, - ipfclose, - ipfread, - NULL, /* write */ - NULL, /* strategy */ - NULL, /* dump */ - 0, /* psize */ - ipfioctl, - NULL, /* reset */ - NULL /* mmap */ -}; - -int ipf_major = 0; - -#ifdef sun4m -struct vdldrv vd = -{ - VDMAGIC_PSEUDO, - "ipf", - &ipf_ops, - NULL, - &ipfdevsw, - 0, - 0, - NULL, - NULL, - NULL, - 0, - 1, -}; -#else /* sun4m */ -struct vdldrv vd = -{ - VDMAGIC_PSEUDO, /* magic */ - "ipf", /* name */ -#ifdef sun4c - &ipf_ops, /* dev_ops */ -#else - NULL, /* struct mb_ctlr *mb_ctlr */ - NULL, /* struct mb_driver *mb_driver */ - NULL, /* struct mb_device *mb_device */ - 0, /* num ctlrs */ - 1, /* numdevs */ -#endif /* sun4c */ - NULL, /* bdevsw */ - &ipfdevsw, /* cdevsw */ - 0, /* block major */ - 0, /* char major */ -}; -#endif /* sun4m */ - -extern int vd_unuseddev(); -extern struct cdevsw cdevsw[]; -extern int nchrdev; - -xxxinit(fc, vdp, vdi, vds) - u_int fc; - struct vddrv *vdp; - caddr_t vdi; - struct vdstat *vds; -{ - struct vdlinkage *v; - int i; - - switch (fc) - { - case VDLOAD: - while (ipf_major < nchrdev && - cdevsw[ipf_major].d_open != vd_unuseddev) - ipf_major++; - if (ipf_major == nchrdev) - return ENODEV; - vd.Drv_charmajor = ipf_major; - vdp->vdd_vdtab = (struct vdlinkage *)&vd; - return ipf_attach(vdi); - case VDUNLOAD: - return unload(vdp, vdi); - - case VDSTAT: - return 0; - - default: - return EIO; - } -} - -static unload(vdp, vdi) - struct vddrv *vdp; - struct vdioctl_unload *vdi; -{ - int i; - - (void) vn_remove(IPL_NAME, UIO_SYSSPACE, FILE); - return ipfdetach(); -} - - -static int ipf_attach(vdi) -struct vdioctl_load *vdi; -{ - struct vnode *vp; - struct vattr vattr; - int error = 0, fmode = S_IFCHR|0600; - - (void) vn_remove(IPL_NAME, UIO_SYSSPACE, FILE); - vattr_null(&vattr); - vattr.va_type = MFTOVT(fmode); - vattr.va_mode = (fmode & 07777); - vattr.va_rdev = ipf_major<<8; - - error = vn_create(IPL_NAME, UIO_SYSSPACE, &vattr, EXCL, 0, &vp); - if (error == 0) - VN_RELE(vp); - return ipfattach(0); -} diff --git a/contrib/ipfilter/mlf_ipl.c b/contrib/ipfilter/mlf_ipl.c deleted file mode 100644 index 93995af956f0..000000000000 --- a/contrib/ipfilter/mlf_ipl.c +++ /dev/null @@ -1,596 +0,0 @@ -/* $FreeBSD$ */ - -/* - * Copyright (C) 2012 by Darren Reed. - * - * See the IPFILTER.LICENCE file for details on licencing. - */ -/* - * 29/12/94 Added code from Marc Huber to allow it to allocate - * its own major char number! Way cool patch! - */ - - -#include - -#ifdef IPFILTER_LKM -# ifndef __FreeBSD_cc_version -# include -# else -# if __FreeBSD_cc_version < 430000 -# include -# endif -# endif -# define ACTUALLY_LKM_NOT_KERNEL -#else -# ifndef __FreeBSD_cc_version -# include -# else -# if __FreeBSD_cc_version < 430000 -# include -# endif -# endif -#endif -#include -#if defined(__FreeBSD_version) && (__FreeBSD_version >= 220000) -# ifndef ACTUALLY_LKM_NOT_KERNEL -# include "opt_devfs.h" -# endif -# include -# include -# ifdef DEVFS -# include -# endif /*DEVFS*/ -#endif -#include -#include -#if defined(__FreeBSD_version) && (__FreeBSD_version >= 300000) -# include -#endif -#include -#include -#include -#include -#include -#include -#include -#include -#include -#if BSD >= 199506 -# include -#endif -#if (__FreeBSD_version >= 300000) -# include -#endif -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "netinet/ipl.h" -#include "netinet/ip_compat.h" -#include "netinet/ip_fil.h" -#include "netinet/ip_state.h" -#include "netinet/ip_nat.h" -#include "netinet/ip_auth.h" -#include "netinet/ip_frag.h" - - -#if !defined(VOP_LEASE) && defined(LEASE_CHECK) -#define VOP_LEASE LEASE_CHECK -#endif - -int xxxinit __P((struct lkm_table *, int, int)); - -#ifdef SYSCTL_OID -int sysctl_ipf_int SYSCTL_HANDLER_ARGS; -# define SYSCTL_IPF(parent, nbr, name, access, ptr, val, descr) \ - SYSCTL_OID(parent, nbr, name, CTLTYPE_INT|access, \ - ptr, val, sysctl_ipf_int, "I", descr); -# define CTLFLAG_OFF 0x00800000 /* IPFilter must be disabled */ -# define CTLFLAG_RWO (CTLFLAG_RW|CTLFLAG_OFF) -SYSCTL_NODE(_net_inet, OID_AUTO, ipf, CTLFLAG_RW, 0, "IPF"); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_flags, CTLFLAG_RW, &ipf_flags, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, ipf_pass, CTLFLAG_RW, &ipf_pass, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_active, CTLFLAG_RD, &ipf_active, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_chksrc, CTLFLAG_RW, &ipf_chksrc, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_minttl, CTLFLAG_RW, &ipf_minttl, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_tcpidletimeout, CTLFLAG_RWO, - &ipf_tcpidletimeout, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_tcphalfclosed, CTLFLAG_RWO, - &ipf_tcphalfclosed, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_tcpclosewait, CTLFLAG_RWO, - &ipf_tcpclosewait, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_tcplastack, CTLFLAG_RWO, - &ipf_tcplastack, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_tcptimeout, CTLFLAG_RWO, - &ipf_tcptimeout, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_tcpclosed, CTLFLAG_RWO, - &ipf_tcpclosed, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_udptimeout, CTLFLAG_RWO, - &ipf_udptimeout, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_icmptimeout, CTLFLAG_RWO, - &ipf_icmptimeout, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_defnatage, CTLFLAG_RWO, - &ipf_defnatage, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_ipfrttl, CTLFLAG_RW, - &ipf_ipfrttl, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, ipf_running, CTLFLAG_RD, - &ipf_running, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_statesize, CTLFLAG_RWO, - &ipf_statesize, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_statemax, CTLFLAG_RWO, - &ipf_statemax, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_authsize, CTLFLAG_RWO, - &ipf_authsize, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_authused, CTLFLAG_RD, - &ipf_authused, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_defaultauthage, CTLFLAG_RW, - &ipf_defaultauthage, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, ippr_ftp_pasvonly, CTLFLAG_RW, - &ippr_ftp_pasvonly, 0, ""); -#endif - -#ifdef DEVFS -static void *ipf_devfs[IPL_LOGSIZE]; -#endif - -#if !defined(__FreeBSD_version) || (__FreeBSD_version < 220000) -int ipf_major = 0; - -static struct cdevsw ipfdevsw = -{ - ipfopen, /* open */ - ipfclose, /* close */ - ipfread, /* read */ - (void *)nullop, /* write */ - ipfioctl, /* ioctl */ - (void *)nullop, /* stop */ - (void *)nullop, /* reset */ - (void *)NULL, /* tty */ - (void *)nullop, /* select */ - (void *)nullop, /* mmap */ - NULL /* strategy */ -}; - -MOD_DEV(IPL_VERSION, LM_DT_CHAR, -1, &ipfdevsw); - -extern struct cdevsw cdevsw[]; -extern int vd_unuseddev __P((void)); -extern int nchrdev; -#else - -static struct cdevsw ipf_cdevsw = { - ipfopen, ipfclose, ipfread, nowrite, /* 79 */ - ipfioctl, nostop, noreset, nodevtotty, -#if (__FreeBSD_version >= 300000) - seltrue, nommap, nostrategy, "ipf", -#else - noselect, nommap, nostrategy, "ipf", -#endif - NULL, -1 -}; -#endif - -static void ipf_drvinit __P((void *)); - -#ifdef ACTUALLY_LKM_NOT_KERNEL -static int if_ipf_unload __P((struct lkm_table *, int)); -static int if_ipf_load __P((struct lkm_table *, int)); -static int if_ipf_remove __P((void)); -static int ipf_major = CDEV_MAJOR; - -static int ipfaction __P((struct lkm_table *, int)); -static char *ipf_devfiles[] = { IPL_NAME, IPL_NAT, IPL_STATE, IPL_AUTH, - IPL_SCAN, IPL_SYNC, IPL_POOL, NULL }; - -extern int lkmenodev __P((void)); - -static int ipfaction(lkmtp, cmd) - struct lkm_table *lkmtp; - int cmd; -{ -#if !defined(__FreeBSD_version) || (__FreeBSD_version < 220000) - int i = ipf_major; - struct lkm_dev *args = lkmtp->private.lkm_dev; -#endif - int err = 0; - - switch (cmd) - { - case LKM_E_LOAD : - if (lkmexists(lkmtp)) - return EEXIST; - -#if !defined(__FreeBSD_version) || (__FreeBSD_version < 220000) - for (i = 0; i < nchrdev; i++) - if (cdevsw[i].d_open == lkmenodev || - cdevsw[i].d_open == ipfopen) - break; - if (i == nchrdev) { - printf("IP Filter: No free cdevsw slots\n"); - return ENODEV; - } - - ipf_major = i; - args->lkm_offset = i; /* slot in cdevsw[] */ -#endif - printf("IP Filter: loaded into slot %d\n", ipf_major); - err = if_ipf_load(lkmtp, cmd); - if (!err) - ipf_drvinit((void *)NULL); - return err; - break; - case LKM_E_UNLOAD : - err = if_ipf_unload(lkmtp, cmd); - if (!err) { - printf("IP Filter: unloaded from slot %d\n", - ipf_major); -#ifdef DEVFS - if (ipf_devfs[IPL_LOGIPF]) - devfs_remove_dev(ipf_devfs[IPL_LOGIPF]); - if (ipf_devfs[IPL_LOGNAT]) - devfs_remove_dev(ipf_devfs[IPL_LOGNAT]); - if (ipf_devfs[IPL_LOGSTATE]) - devfs_remove_dev(ipf_devfs[IPL_LOGSTATE]); - if (ipf_devfs[IPL_LOGAUTH]) - devfs_remove_dev(ipf_devfs[IPL_LOGAUTH]); - if (ipf_devfs[IPL_LOGSCAN]) - devfs_remove_dev(ipf_devfs[IPL_LOGSCAN]); - if (ipf_devfs[IPL_LOGSYNC]) - devfs_remove_dev(ipf_devfs[IPL_LOGSYNC]); - if (ipf_devfs[IPL_LOGLOOKUP]) - devfs_remove_dev(ipf_devfs[IPL_LOGLOOKUP]); -#endif - } - return err; - case LKM_E_STAT : - break; - default: - err = EIO; - break; - } - return 0; -} - - -static int if_ipf_remove __P((void)) -{ - char *name; - struct nameidata nd; - int error, i; - - for (i = 0; (name = ipf_devfiles[i]); i++) { - NDINIT(&nd, DELETE, LOCKPARENT, UIO_SYSSPACE, name, curproc); - if ((error = namei(&nd))) - return (error); - VOP_LEASE(nd.ni_vp, curproc, curproc->p_ucred, LEASE_WRITE); -#if (__FreeBSD_version >= 300000) - VOP_LOCK(nd.ni_vp, LK_RETRY | LK_EXCLUSIVE, curproc); - VOP_LEASE(nd.ni_dvp, curproc, curproc->p_ucred, LEASE_WRITE); - (void) VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); - - if (nd.ni_dvp == nd.ni_vp) - vrele(nd.ni_dvp); - else - vput(nd.ni_dvp); - if (nd.ni_vp != NULLVP) - vput(nd.ni_vp); -#else - VOP_LOCK(nd.ni_vp); - VOP_LEASE(nd.ni_dvp, curproc, curproc->p_ucred, LEASE_WRITE); - (void) VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); -#endif - } - - return 0; -} - - -static int if_ipf_unload(lkmtp, cmd) - struct lkm_table *lkmtp; - int cmd; -{ - int error = 0; - - error = ipfdetach(); - if (!error) - error = if_ipf_remove(); - return error; -} - - -static int if_ipf_load(lkmtp, cmd) - struct lkm_table *lkmtp; - int cmd; -{ - struct nameidata nd; - struct vattr vattr; - int error = 0, fmode = S_IFCHR|0600, i; - char *name; - - error = ipfattach(); - if (error) - return error; - (void) if_ipf_remove(); - - for (i = 0; (name = ipf_devfiles[i]); i++) { - NDINIT(&nd, CREATE, LOCKPARENT, UIO_SYSSPACE, name, curproc); - if ((error = namei(&nd))) - return error; - if (nd.ni_vp != NULL) { - VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - if (nd.ni_dvp == nd.ni_vp) - vrele(nd.ni_dvp); - else - vput(nd.ni_dvp); - vrele(nd.ni_vp); - return (EEXIST); - } - VATTR_NULL(&vattr); - vattr.va_type = VCHR; - vattr.va_mode = (fmode & 07777); - vattr.va_rdev = (ipf_major << 8) | i; - VOP_LEASE(nd.ni_dvp, curproc, curproc->p_ucred, LEASE_WRITE); - error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); -#if (__FreeBSD_version >= 300000) - vput(nd.ni_dvp); -#endif - if (error) - return error; - } - return 0; -} - -#endif /* actually LKM */ - -#if defined(__FreeBSD_version) && (__FreeBSD_version < 220000) -/* - * strlen isn't present in 2.1.* kernels. - */ -size_t strlen(string) - char *string; -{ - register char *s; - - for (s = string; *s; s++) - ; - return (size_t)(s - string); -} - - -int xxxinit(lkmtp, cmd, ver) - struct lkm_table *lkmtp; - int cmd, ver; -{ - DISPATCH(lkmtp, cmd, ver, ipfaction, ipfaction, ipfaction); -} -#else /* __FREEBSD_version >= 220000 */ -# ifdef IPFILTER_LKM -# include - -# if (__FreeBSD_version >= 300000) -MOD_DEV(if_ipf, LM_DT_CHAR, CDEV_MAJOR, &ipf_cdevsw); -# else -MOD_DECL(if_ipf); - - -static struct lkm_dev _module = { - LM_DEV, - LKM_VERSION, - IPL_VERSION, - CDEV_MAJOR, - LM_DT_CHAR, - { (void *)&ipf_cdevsw } -}; -# endif - - -int if_ipf __P((struct lkm_table *, int, int)); - - -int if_ipf(lkmtp, cmd, ver) - struct lkm_table *lkmtp; - int cmd, ver; -{ -# if (__FreeBSD_version >= 300000) - MOD_DISPATCH(if_ipf, lkmtp, cmd, ver, ipfaction, ipfaction, ipfaction); -# else - DISPATCH(lkmtp, cmd, ver, ipfaction, ipfaction, ipfaction); -# endif -} -# endif /* IPFILTER_LKM */ -static ipf_devsw_installed = 0; - -static void ipf_drvinit __P((void *unused)) -{ - dev_t dev; -# ifdef DEVFS - void **tp = ipf_devfs; -# endif - - if (!ipf_devsw_installed ) { - dev = makedev(CDEV_MAJOR, 0); - cdevsw_add(&dev, &ipf_cdevsw, NULL); - ipf_devsw_installed = 1; - -# ifdef DEVFS - tp[IPL_LOGIPF] = devfs_add_devswf(&ipf_cdevsw, IPL_LOGIPF, - DV_CHR, 0, 0, 0600, "ipf"); - tp[IPL_LOGNAT] = devfs_add_devswf(&ipf_cdevsw, IPL_LOGNAT, - DV_CHR, 0, 0, 0600, "ipnat"); - tp[IPL_LOGSTATE] = devfs_add_devswf(&ipf_cdevsw, IPL_LOGSTATE, - DV_CHR, 0, 0, 0600, - "ipstate"); - tp[IPL_LOGAUTH] = devfs_add_devswf(&ipf_cdevsw, IPL_LOGAUTH, - DV_CHR, 0, 0, 0600, - "ipauth"); -# endif - } -} - - -#ifdef SYSCTL_IPF -int -sysctl_ipf_int SYSCTL_HANDLER_ARGS -{ - int error = 0; - - if (arg1) - error = SYSCTL_OUT(req, arg1, sizeof(int)); - else - error = SYSCTL_OUT(req, &arg2, sizeof(int)); - - if (error || !req->newptr) - return (error); - - if (!arg1) - error = EPERM; - else { - if ((oidp->oid_kind & CTLFLAG_OFF) && (ipf_running > 0)) - error = EBUSY; - else - error = SYSCTL_IN(req, arg1, sizeof(int)); - } - return (error); -} -#endif - - -# if defined(IPFILTER_LKM) || \ - defined(__FreeBSD_version) && (__FreeBSD_version >= 220000) -SYSINIT(ipfdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,ipf_drvinit,NULL) -# endif /* IPFILTER_LKM */ -#endif /* _FreeBSD_version */ - - -/* - * routines below for saving IP headers to buffer - */ -int ipfopen(dev, flags -#if ((BSD >= 199506) || (__FreeBSD_version >= 220000)) -, devtype, p) - int devtype; -# if (__FreeBSD_version >= 500024) - struct thread *p; -# else - struct proc *p; -# endif /* __FreeBSD_version >= 500024 */ -#else -) -#endif -#if (__FreeBSD_version >= 502116) - struct cdev *dev; -#else - dev_t dev; -#endif - int flags; -{ - u_int unit = GET_MINOR(dev); - - if (IPL_LOGMAX < unit) - unit = ENXIO; - else - unit = 0; - return unit; -} - - -int ipfclose(dev, flags -#if ((BSD >= 199506) || (__FreeBSD_version >= 220000)) -, devtype, p) - int devtype; -# if (__FreeBSD_version >= 500024) - struct thread *p; -# else - struct proc *p; -# endif /* __FreeBSD_version >= 500024 */ -#else -) -#endif -#if (__FreeBSD_version >= 502116) - struct cdev *dev; -#else - dev_t dev; -#endif - int flags; -{ - u_int unit = GET_MINOR(dev); - - if (IPL_LOGMAX < unit) - unit = ENXIO; - else - unit = 0; - return unit; -} - -/* - * ipfread/ipflog - * both of these must operate with at least splnet() lest they be - * called during packet processing and cause an inconsistancy to appear in - * the filter lists. - */ -#if (BSD >= 199306) -int ipfread(dev, uio, ioflag) - int ioflag; -#else -int ipfread(dev, uio) -#endif -#if (__FreeBSD_version >= 502116) - struct cdev *dev; -#else - dev_t dev; -#endif - register struct uio *uio; -{ - u_int unit = GET_MINOR(dev); - - if (unit < 0) - return ENXIO; - - if (ipf_running < 1) - return EIO; - - if (unit == IPL_LOGSYNC) - return ipfsync_read(uio); - -#ifdef IPFILTER_LOG - return ipflog_read(unit, uio); -#else - return ENXIO; -#endif -} - - -/* - * ipfwrite - * both of these must operate with at least splnet() lest they be - * called during packet processing and cause an inconsistancy to appear in - * the filter lists. - */ -#if (BSD >= 199306) -int ipfwrite(dev, uio, ioflag) - int ioflag; -#else -int ipfwrite(dev, uio) -#endif -#if (__FreeBSD_version >= 502116) - struct cdev *dev; -#else - dev_t dev; -#endif - register struct uio *uio; -{ - - if (ipf_running < 1) - return EIO; - - if (GET_MINOR(dev) == IPL_LOGSYNC) - return ipfsync_write(uio); - return ENXIO; -} diff --git a/contrib/ipfilter/mlf_rule.c b/contrib/ipfilter/mlf_rule.c deleted file mode 100644 index babd2c64a93b..000000000000 --- a/contrib/ipfilter/mlf_rule.c +++ /dev/null @@ -1,168 +0,0 @@ -/* $FreeBSD$ */ - -/* - * Copyright (C) 2012 by Darren Reed. - * - * See the IPFILTER.LICENCE file for details on licencing. - */ -/* - * 29/12/94 Added code from Marc Huber to allow it to allocate - * its own major char number! Way cool patch! - */ - - -#include - -#if defined(__FreeBSD__) && (__FreeBSD__ > 1) -# ifdef IPFILTER_LKM -# include -# define ACTUALLY_LKM_NOT_KERNEL -# else -# include -# endif -#endif -#include -#if defined(__FreeBSD_version) && (__FreeBSD_version >= 220000) -# include -# include -# ifdef DEVFS -# include -# endif /*DEVFS*/ -#endif -#include -#include -#if defined(__FreeBSD_version) && (__FreeBSD_version >= 300000) -# include -#endif -#include -#include -#include -#include -#include -#include -#include -#include -#include -#if BSD >= 199506 -# include -#endif -#if (__FreeBSD_version >= 300000) -# include -#endif -#if (__FreeBSD_version >= 199511) -#include -#include -#include -#include -#include -#include -#include -#include -#endif -#if (__FreeBSD__ > 1) -# include -#endif -#include -#include "netinet/ip_compat.h" -#include "netinet/ip_fil.h" -#include "netinet/ip_rules.h" - - -int xxxinit __P((struct lkm_table *, int, int)); - -#if !defined(__FreeBSD_version) || (__FreeBSD_version < 220000) -MOD_DEV(IPL_VERSION, LM_DT_CHAR, -1, &ipldevsw); -#endif - -static int ipfrule_ioctl __P((struct lkm_table *, int)); - -#if defined(__FreeBSD_version) && (__FreeBSD_version < 220000) - -int xxxinit(lkmtp, cmd, ver) - struct lkm_table *lkmtp; - int cmd, ver; -{ - DISPATCH(lkmtp, cmd, ver, ipfrule_ioctl, ipfrule_ioctl, ipfrule_ioctl); -} -#else /* __FREEBSD_version >= 220000 */ -# ifdef IPFILTER_LKM -# include - -# if (__FreeBSD_version >= 300000) -MOD_MISC(ipfrule); -# else -MOD_DECL(ipfrule); - - -static struct lkm_misc _module = { - LM_MISC, - LKM_VERSION, - "IP Filter rules", - 0, -}; -# endif - - -int ipfrule __P((struct lkm_table *, int, int)); - - -int ipfrule(lkmtp, cmd, ver) - struct lkm_table *lkmtp; - int cmd, ver; -{ -# if (__FreeBSD_version >= 300000) - MOD_DISPATCH(ipfrule, lkmtp, cmd, ver, ipfrule_ioctl, ipfrule_ioctl, - ipfrule_ioctl); -# else - DISPATCH(lkmtp, cmd, ver, ipfrule_ioctl, ipfrule_ioctl, ipfrule_ioctl); -# endif -} -# endif /* IPFILTER_LKM */ - - -int ipfrule_load(lkmtp, cmd) - struct lkm_table *lkmtp; - int cmd; -{ - return ipfrule_add(); -} - - -int ipfrule_unload(lkmtp, cmd) - struct lkm_table *lkmtp; - int cmd; -{ - return ipfrule_remove(); -} - - -static int ipfrule_ioctl(lkmtp, cmd) - struct lkm_table *lkmtp; - int cmd; -{ - int err = 0; - - switch (cmd) - { - case LKM_E_LOAD : - if (lkmexists(lkmtp)) - return EEXIST; - - err = ipfrule_load(lkmtp, cmd); - if (!err) - ipf_refcnt++; - break; - case LKM_E_UNLOAD : - err = ipfrule_unload(lkmtp, cmd); - if (!err) - ipf_refcnt--; - break; - case LKM_E_STAT : - break; - default: - err = EIO; - break; - } - return err; -} -#endif /* _FreeBSD_version */ diff --git a/contrib/ipfilter/mlfk_ipl.c b/contrib/ipfilter/mlfk_ipl.c deleted file mode 100644 index ba1f44f0c105..000000000000 --- a/contrib/ipfilter/mlfk_ipl.c +++ /dev/null @@ -1,529 +0,0 @@ -/* $FreeBSD$ */ - -/* - * Copyright (C) 2012 by Darren Reed. - * - * See the IPFILTER.LICENCE file for details on licencing. - */ - - -#include -#include -#include -#include -#include -#include -#include -#include -#if __FreeBSD_version >= 500000 -# include -#endif -#include -#include -#include - - -#include "netinet/ipl.h" -#include "netinet/ip_compat.h" -#include "netinet/ip_fil.h" -#include "netinet/ip_state.h" -#include "netinet/ip_nat.h" -#include "netinet/ip_auth.h" -#include "netinet/ip_frag.h" -#include "netinet/ip_sync.h" - -extern ipf_main_softc_t ipfmain; - -#if __FreeBSD_version >= 502116 -static struct cdev *ipf_devs[IPL_LOGSIZE]; -#else -static dev_t ipf_devs[IPL_LOGSIZE]; -#endif - -#if 0 -static int sysctl_ipf_int ( SYSCTL_HANDLER_ARGS ); -#endif -static int ipf_modload(void); -static int ipf_modunload(void); - -#if (__FreeBSD_version >= 500024) -# if (__FreeBSD_version >= 502116) -static int ipfopen __P((struct cdev*, int, int, struct thread *)); -static int ipfclose __P((struct cdev*, int, int, struct thread *)); -# else -static int ipfopen __P((dev_t, int, int, struct thread *)); -static int ipfclose __P((dev_t, int, int, struct thread *)); -# endif /* __FreeBSD_version >= 502116 */ -#else -static int ipfopen __P((dev_t, int, int, struct proc *)); -static int ipfclose __P((dev_t, int, int, struct proc *)); -#endif -#if (__FreeBSD_version >= 502116) -static int ipfread __P((struct cdev*, struct uio *, int)); -static int ipfwrite __P((struct cdev*, struct uio *, int)); -#else -static int ipfread __P((dev_t, struct uio *, int)); -static int ipfwrite __P((dev_t, struct uio *, int)); -#endif /* __FreeBSD_version >= 502116 */ - - - -SYSCTL_DECL(_net_inet); -#define SYSCTL_IPF(parent, nbr, name, access, ptr, val, descr) \ - SYSCTL_OID(parent, nbr, name, CTLTYPE_INT|access, \ - ptr, val, sysctl_ipf_int, "I", descr); -#define CTLFLAG_OFF 0x00800000 /* IPFilter must be disabled */ -#define CTLFLAG_RWO (CTLFLAG_RW|CTLFLAG_OFF) -SYSCTL_NODE(_net_inet, OID_AUTO, ipf, CTLFLAG_RW, 0, "IPF"); -#if 0 -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_flags, CTLFLAG_RW, &ipf_flags, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, ipf_pass, CTLFLAG_RW, &ipf_pass, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_active, CTLFLAG_RD, &ipf_active, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_tcpidletimeout, CTLFLAG_RWO, - &ipf_tcpidletimeout, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_tcphalfclosed, CTLFLAG_RWO, - &ipf_tcphalfclosed, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_tcpclosewait, CTLFLAG_RWO, - &ipf_tcpclosewait, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_tcplastack, CTLFLAG_RWO, - &ipf_tcplastack, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_tcptimeout, CTLFLAG_RWO, - &ipf_tcptimeout, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_tcpclosed, CTLFLAG_RWO, - &ipf_tcpclosed, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_udptimeout, CTLFLAG_RWO, - &ipf_udptimeout, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_udpacktimeout, CTLFLAG_RWO, - &ipf_udpacktimeout, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_icmptimeout, CTLFLAG_RWO, - &ipf_icmptimeout, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_defnatage, CTLFLAG_RWO, - &ipf_nat_defage, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_ipfrttl, CTLFLAG_RW, - &ipf_ipfrttl, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, ipf_running, CTLFLAG_RD, - &ipf_running, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_statesize, CTLFLAG_RWO, - &ipf_state_size, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_statemax, CTLFLAG_RWO, - &ipf_state_max, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, ipf_nattable_sz, CTLFLAG_RWO, - &ipf_nat_table_sz, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, ipf_natrules_sz, CTLFLAG_RWO, - &ipf_nat_maprules_sz, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, ipf_rdrrules_sz, CTLFLAG_RWO, - &ipf_nat_rdrrules_sz, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, ipf_hostmap_sz, CTLFLAG_RWO, - &ipf_nat_hostmap_sz, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_authsize, CTLFLAG_RWO, - &ipf_auth_size, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_authused, CTLFLAG_RD, - &ipf_auth_used, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_defaultauthage, CTLFLAG_RW, - &ipf_auth_defaultage, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_chksrc, CTLFLAG_RW, &ipf_chksrc, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_minttl, CTLFLAG_RW, &ipf_minttl, 0, ""); -#endif - -#define CDEV_MAJOR 79 -#include -#if __FreeBSD_version >= 500043 -# include -static int ipfpoll(struct cdev *dev, int events, struct thread *td); - -static struct cdevsw ipf_cdevsw = { -#if __FreeBSD_version >= 502103 - .d_version = D_VERSION, - .d_flags = 0, /* D_NEEDGIANT - Should be SMP safe */ -#endif - .d_open = ipfopen, - .d_close = ipfclose, - .d_read = ipfread, - .d_write = ipfwrite, - .d_ioctl = ipfioctl, - .d_poll = ipfpoll, - .d_name = "ipf", -#if __FreeBSD_version < 600000 - .d_maj = CDEV_MAJOR, -#endif -}; -#else -static int ipfpoll(dev_t dev, int events, struct proc *td); - -static struct cdevsw ipf_cdevsw = { - /* open */ ipfopen, - /* close */ ipfclose, - /* read */ ipfread, - /* write */ ipfwrite, - /* ioctl */ ipfioctl, - /* poll */ ipfpoll, - /* mmap */ nommap, - /* strategy */ nostrategy, - /* name */ "ipf", - /* maj */ CDEV_MAJOR, - /* dump */ nodump, - /* psize */ nopsize, - /* flags */ 0, -# if (__FreeBSD_version < 500043) - /* bmaj */ -1, -# endif -# if (__FreeBSD_version >= 430000) - /* kqfilter */ NULL -# endif -}; -#endif - -static char *ipf_devfiles[] = { IPL_NAME, IPNAT_NAME, IPSTATE_NAME, IPAUTH_NAME, - IPSYNC_NAME, IPSCAN_NAME, IPLOOKUP_NAME, NULL }; - - -static int -ipfilter_modevent(module_t mod, int type, void *unused) -{ - int error = 0; - - switch (type) - { - case MOD_LOAD : - error = ipf_modload(); - break; - - case MOD_UNLOAD : - error = ipf_modunload(); - break; - default: - error = EINVAL; - break; - } - return error; -} - - -static int -ipf_modload() -{ - char *defpass, *c, *str; - int i, j, error; - - if (ipf_load_all() != 0) - return EIO; - - if (ipf_create_all(&ipfmain) == NULL) - return EIO; - - error = ipfattach(&ipfmain); - if (error) - return error; - - for (i = 0; i < IPL_LOGSIZE; i++) - ipf_devs[i] = NULL; - - for (i = 0; (str = ipf_devfiles[i]); i++) { - c = NULL; - for(j = strlen(str); j > 0; j--) - if (str[j] == '/') { - c = str + j + 1; - break; - } - if (!c) - c = str; - ipf_devs[i] = make_dev(&ipf_cdevsw, i, 0, 0, 0600, c); - } - - error = ipf_pfil_hook(); - if (error != 0) - return error; - ipf_event_reg(); - - if (FR_ISPASS(ipfmain.ipf_pass)) - defpass = "pass"; - else if (FR_ISBLOCK(ipfmain.ipf_pass)) - defpass = "block"; - else - defpass = "no-match -> block"; - - printf("%s initialized. Default = %s all, Logging = %s%s\n", - ipfilter_version, defpass, -#ifdef IPFILTER_LOG - "enabled", -#else - "disabled", -#endif -#ifdef IPFILTER_COMPILED - " (COMPILED)" -#else - "" -#endif - ); - return 0; -} - - -static int -ipf_modunload() -{ - int error, i; - - if (ipfmain.ipf_refcnt) - return EBUSY; - - error = ipf_pfil_unhook(); - if (error != 0) - return error; - - if (ipfmain.ipf_running >= 0) { - error = ipfdetach(&ipfmain); - if (error != 0) - return error; - - ipf_destroy_all(&ipfmain); - ipf_unload_all(); - } else - error = 0; - - ipfmain.ipf_running = -2; - - for (i = 0; ipf_devfiles[i]; i++) { - if (ipf_devs[i] != NULL) - destroy_dev(ipf_devs[i]); - } - - printf("%s unloaded\n", ipfilter_version); - - return error; -} - - -static moduledata_t ipfiltermod = { - "ipfilter", - ipfilter_modevent, - 0 -}; - - -DECLARE_MODULE(ipfilter, ipfiltermod, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY); -#ifdef MODULE_VERSION -MODULE_VERSION(ipfilter, 1); -#endif - - -#if 0 -#ifdef SYSCTL_IPF -int -sysctl_ipf_int ( SYSCTL_HANDLER_ARGS ) -{ - int error = 0; - - if (arg1) - error = SYSCTL_OUT(req, arg1, sizeof(int)); - else - error = SYSCTL_OUT(req, &arg2, sizeof(int)); - - if (error || !req->newptr) - return (error); - - if (!arg1) - error = EPERM; - else { - if ((oidp->oid_kind & CTLFLAG_OFF) && (ipfmain.ipf_running > 0)) - error = EBUSY; - else - error = SYSCTL_IN(req, arg1, sizeof(int)); - } - return (error); -} -#endif -#endif - - -static int -#if __FreeBSD_version >= 500043 -ipfpoll(struct cdev *dev, int events, struct thread *td) -#else -ipfpoll(dev_t dev, int events, struct proc *td) -#endif -{ - u_int unit = GET_MINOR(dev); - int revents; - - if (unit < 0 || unit > IPL_LOGMAX) - return 0; - - revents = 0; - - switch (unit) - { - case IPL_LOGIPF : - case IPL_LOGNAT : - case IPL_LOGSTATE : -#ifdef IPFILTER_LOG - if ((events & (POLLIN | POLLRDNORM)) && ipf_log_canread(&ipfmain, unit)) - revents |= events & (POLLIN | POLLRDNORM); -#endif - break; - case IPL_LOGAUTH : - if ((events & (POLLIN | POLLRDNORM)) && ipf_auth_waiting(&ipfmain)) - revents |= events & (POLLIN | POLLRDNORM); - break; - case IPL_LOGSYNC : - if ((events & (POLLIN | POLLRDNORM)) && ipf_sync_canread(&ipfmain)) - revents |= events & (POLLIN | POLLRDNORM); - if ((events & (POLLOUT | POLLWRNORM)) && ipf_sync_canwrite(&ipfmain)) - revents |= events & (POLLOUT | POLLWRNORM); - break; - case IPL_LOGSCAN : - case IPL_LOGLOOKUP : - default : - break; - } - - if ((revents == 0) && ((events & (POLLIN|POLLRDNORM)) != 0)) - selrecord(td, &ipfmain.ipf_selwait[unit]); - - return revents; -} - - -/* - * routines below for saving IP headers to buffer - */ -static int ipfopen(dev, flags -#if ((BSD >= 199506) || (__FreeBSD_version >= 220000)) -, devtype, p) - int devtype; -# if (__FreeBSD_version >= 500024) - struct thread *p; -# else - struct proc *p; -# endif /* __FreeBSD_version >= 500024 */ -#else -) -#endif -#if (__FreeBSD_version >= 502116) - struct cdev *dev; -#else - dev_t dev; -#endif - int flags; -{ - u_int unit = GET_MINOR(dev); - int error; - - if (IPL_LOGMAX < unit) - error = ENXIO; - else { - switch (unit) - { - case IPL_LOGIPF : - case IPL_LOGNAT : - case IPL_LOGSTATE : - case IPL_LOGAUTH : - case IPL_LOGLOOKUP : - case IPL_LOGSYNC : -#ifdef IPFILTER_SCAN - case IPL_LOGSCAN : -#endif - error = 0; - break; - default : - error = ENXIO; - break; - } - } - return error; -} - - -static int ipfclose(dev, flags -#if ((BSD >= 199506) || (__FreeBSD_version >= 220000)) -, devtype, p) - int devtype; -# if (__FreeBSD_version >= 500024) - struct thread *p; -# else - struct proc *p; -# endif /* __FreeBSD_version >= 500024 */ -#else -) -#endif -#if (__FreeBSD_version >= 502116) - struct cdev *dev; -#else - dev_t dev; -#endif - int flags; -{ - u_int unit = GET_MINOR(dev); - - if (IPL_LOGMAX < unit) - unit = ENXIO; - else - unit = 0; - return unit; -} - -/* - * ipfread/ipflog - * both of these must operate with at least splnet() lest they be - * called during packet processing and cause an inconsistancy to appear in - * the filter lists. - */ -#if (BSD >= 199306) -static int ipfread(dev, uio, ioflag) - int ioflag; -#else -static int ipfread(dev, uio) -#endif -#if (__FreeBSD_version >= 502116) - struct cdev *dev; -#else - dev_t dev; -#endif - struct uio *uio; -{ - u_int unit = GET_MINOR(dev); - - if (unit < 0) - return ENXIO; - - if (ipfmain.ipf_running < 1) - return EIO; - - if (unit == IPL_LOGSYNC) - return ipf_sync_read(&ipfmain, uio); - -#ifdef IPFILTER_LOG - return ipf_log_read(&ipfmain, unit, uio); -#else - return ENXIO; -#endif -} - - -/* - * ipfwrite - * both of these must operate with at least splnet() lest they be - * called during packet processing and cause an inconsistancy to appear in - * the filter lists. - */ -#if (BSD >= 199306) -static int ipfwrite(dev, uio, ioflag) - int ioflag; -#else -static int ipfwrite(dev, uio) -#endif -#if (__FreeBSD_version >= 502116) - struct cdev *dev; -#else - dev_t dev; -#endif - struct uio *uio; -{ - - if (ipfmain.ipf_running < 1) - return EIO; - - if (GET_MINOR(dev) == IPL_LOGSYNC) - return ipf_sync_write(&ipfmain, uio); - return ENXIO; -} diff --git a/contrib/ipfilter/mlh_rule.c b/contrib/ipfilter/mlh_rule.c deleted file mode 100644 index cc2a74c86264..000000000000 --- a/contrib/ipfilter/mlh_rule.c +++ /dev/null @@ -1,114 +0,0 @@ -/* $FreeBSD$ */ - -/* - * Copyright (C) 2012 by Darren Reed. - * - * See the IPFILTER.LICENCE file for details on licencing. - * - */ -/* #pragma ident "@(#)solaris.c 1.12 6/5/96 (C) 1995 Darren Reed"*/ - -/*typedef unsigned int spustate_t;*/ -struct uio; - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "ip_compat.h" -#include "ip_fil.h" -#include "ip_rules.h" - - -/* - * Driver Header - */ -static drv_info_t ipf_drv_info = { - "IP Filter Rules", /* type */ - "pseudo", /* class */ - DRV_PSEUDO|DRV_SAVE_CONF|DRV_MP_SAFE, /* flags */ - -1, /* b_major */ - -1, /* c_major */ - NULL, /* cdio */ - NULL, /* gio_private */ - NULL, /* cdio_private */ -}; - - -extern struct mod_operations gio_mod_ops; -static drv_info_t ipf_drv_info; -extern struct mod_conf_data ipf_conf_data; - -static struct mod_type_data ipf_drv_link = { - IPL_VERSION, (void *)NULL -}; - -static struct modlink ipf_mod_link[] = { - { &gio_mod_ops, (void *)&ipf_drv_link }, - { NULL, (void *)NULL } -}; - -struct modwrapper ipf_wrapper = { - MODREV, - ipf_load, - ipf_unload, - (void (*)())NULL, - (void *)&ipf_conf_data, - ipf_mod_link -}; - - -static int ipf_load(void *arg) -{ - int i; - - i = ipfrule_add(); - if (!i) - ipf_refcnt--; -#ifdef IPFDEBUG - printf("IP Filter Rules: ipfrule_add() = %d\n", i); -#endif - if (!i) - cmn_err(CE_CONT, "IP Filter Rules: Loaded\n"); - return i; -} - - -static int ipf_unload(void *arg) -{ - int i; - - i = ipfrule_remove(); - if (!i) - ipf_refcnt--; -#ifdef IPFDEBUG - printf("IP Filter Rules: ipfrule_remove() = %d\n", i); -#endif - if (!i) - cmn_err(CE_CONT, "IP Filter Rules: Unloaded\n"); - return i; -} diff --git a/contrib/ipfilter/mln_ipl.c b/contrib/ipfilter/mln_ipl.c deleted file mode 100644 index 28b54071634d..000000000000 --- a/contrib/ipfilter/mln_ipl.c +++ /dev/null @@ -1,355 +0,0 @@ -/* $FreeBSD$ */ - -/* - * Copyright (C) 2012 by Darren Reed. - * - * See the IPFILTER.LICENCE file for details on licencing. - * - */ -/* - * 29/12/94 Added code from Marc Huber to allow it to allocate - * its own major char number! Way cool patch! - */ - - -#include - -/* - * Post NetBSD 1.2 has the PFIL interface for packet filters. This turns - * on those hooks. We don't need any special mods with this! - */ -#if (defined(NetBSD) && (NetBSD > 199609) && (NetBSD <= 1991011)) || \ - (defined(NetBSD1_2) && NetBSD1_2 > 1) -# define NETBSD_PF -#endif - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "ipl.h" -#include "ip_compat.h" -#include "ip_fil.h" -#include "ip_auth.h" -#include "ip_state.h" -#include "ip_nat.h" -#include "ip_sync.h" - -#if !defined(__NetBSD_Version__) || __NetBSD_Version__ < 103050000 -#define vn_lock(v,f) VOP_LOCK(v) -#endif - -#if !defined(VOP_LEASE) && defined(LEASE_CHECK) -#define VOP_LEASE LEASE_CHECK -#endif - - -extern int lkmenodev __P((void)); - -#if NetBSD >= 199706 -int ipflkm_lkmentry __P((struct lkm_table *, int, int)); -#else -int xxxinit __P((struct lkm_table *, int, int)); -#endif -static int ipf_unload __P((void)); -static int ipf_load __P((void)); -static int ipf_remove __P((void)); -static int ipfaction __P((struct lkm_table *, int)); -static char *ipf_devfiles[] = { IPL_NAME, IPNAT_NAME, IPSTATE_NAME, - IPAUTH_NAME, IPSYNC_NAME, IPSCAN_NAME, - IPLOOKUP_NAME, NULL }; - -int ipf_major = 0; -extern ipf_main_softc_t ipfmain; -extern const struct cdevsw ipl_cdevsw; - -#if defined(__NetBSD__) && (__NetBSD_Version__ >= 106080000) -MOD_DEV(IPL_VERSION, "ipf", NULL, -1, &ipl_cdevsw, -1); -#else -MOD_DEV(IPL_VERSION, LM_DT_CHAR, -1, &ipldevsw); -#endif - -extern int vd_unuseddev __P((void)); -extern struct cdevsw cdevsw[]; -extern int nchrdev; - - -int -#if NetBSD >= 199706 -ipflkm_lkmentry(lkmtp, cmd, ver) -#else -xxxinit(lkmtp, cmd, ver) -#endif - struct lkm_table *lkmtp; - int cmd, ver; -{ - DISPATCH(lkmtp, cmd, ver, ipfaction, ipfaction, ipfaction); -} - - -static int -ipfaction(lkmtp, cmd) - struct lkm_table *lkmtp; - int cmd; -{ -#if !defined(__NetBSD__) || (__NetBSD_Version__ < 106080000) - int i; -#endif - struct lkm_dev *args = lkmtp->private.lkm_dev; - int err = 0; - - switch (cmd) - { - case LKM_E_LOAD : - if (lkmexists(lkmtp)) - return EEXIST; - -#if defined(__NetBSD__) && (__NetBSD_Version__ >= 106080000) -# if (__NetBSD_Version__ < 200000000) - err = devsw_attach(args->lkm_devname, - args->lkm_bdev, &args->lkm_bdevmaj, - args->lkm_cdev, &args->lkm_cdevmaj); - if (err != 0) - return (err); -# endif - ipf_major = args->lkm_cdevmaj; -#else - for (i = 0; i < nchrdev; i++) - if (cdevsw[i].d_open == (dev_type_open((*)))lkmenodev || - cdevsw[i].d_open == ipfopen) - break; - if (i == nchrdev) { - printf("IP Filter: No free cdevsw slots\n"); - return ENODEV; - } - - ipf_major = i; - args->lkm_offset = i; /* slot in cdevsw[] */ -#endif - printf("IP Filter: loaded into slot %d\n", ipf_major); - return ipf_load(); - case LKM_E_UNLOAD : -#if defined(__NetBSD__) && (__NetBSD_Version__ >= 106080000) - devsw_detach(args->lkm_bdev, args->lkm_cdev); - args->lkm_bdevmaj = -1; - args->lkm_cdevmaj = -1; -#endif - err = ipf_unload(); - if (!err) - printf("IP Filter: unloaded from slot %d\n", - ipf_major); - break; - case LKM_E_STAT : - break; - default: - err = EIO; - break; - } - return err; -} - - -static int -ipf_remove() -{ - char *name; - struct nameidata nd; - int error, i; - - for (i = 0; (name = ipf_devfiles[i]); i++) { -#if (__NetBSD_Version__ > 106009999) -# if (__NetBSD_Version__ > 399001400) -# if (__NetBSD_Version__ > 499001400) - NDINIT(&nd, DELETE, LOCKPARENT|LOCKLEAF, UIO_SYSSPACE, - name); -# else - NDINIT(&nd, DELETE, LOCKPARENT|LOCKLEAF, UIO_SYSSPACE, - name, curlwp); -# endif -# else - NDINIT(&nd, DELETE, LOCKPARENT|LOCKLEAF, UIO_SYSSPACE, - name, curproc); -# endif -#else - NDINIT(&nd, DELETE, LOCKPARENT, UIO_SYSSPACE, name, curproc); -#endif - if ((error = namei(&nd))) - return (error); -#if (__NetBSD_Version__ > 399001400) -# if (__NetBSD_Version__ > 399002000) -# if (__NetBSD_Version__ < 499001400) - VOP_LEASE(nd.ni_dvp, curlwp, curlwp->l_cred, LEASE_WRITE); -# endif -# else - VOP_LEASE(nd.ni_dvp, curlwp, curlwp->l_proc->p_ucred, LEASE_WRITE); -# endif -#else - VOP_LEASE(nd.ni_dvp, curproc, curproc->p_ucred, LEASE_WRITE); -#endif -#if !defined(__NetBSD_Version__) || (__NetBSD_Version__ < 106000000) - vn_lock(nd.ni_vp, LK_EXCLUSIVE | LK_RETRY); -#endif -#if (__NetBSD_Version__ >= 399002000) -# if (__NetBSD_Version__ < 499001400) - VOP_LEASE(nd.ni_vp, curlwp, curlwp->l_cred, LEASE_WRITE); -# endif -#else -# if (__NetBSD_Version__ > 399001400) - VOP_LEASE(nd.ni_vp, curlwp, curlwp->l_proc->p_ucred, LEASE_WRITE); -# else - VOP_LEASE(nd.ni_vp, curproc, curproc->p_ucred, LEASE_WRITE); -# endif -#endif - (void) VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); - } - return 0; -} - - -static int -ipf_unload() -{ - int error = 0; - - /* - * Unloading - remove the filter rule check from the IP - * input/output stream. - */ - if (ipfmain.ipf_refcnt) - error = EBUSY; - else if (ipfmain.ipf_running >= 0) { - error = ipfdetach(&ipfmain); - if (error == 0) { - ipf_destroy_all(&ipfmain); - ipf_unload_all(); - } - } - - if (error == 0) { - ipfmain.ipf_running = -2; - error = ipf_remove(); - printf("%s unloaded\n", ipfilter_version); - } - return error; -} - - -static int -ipf_load() -{ - struct nameidata nd; - struct vattr vattr; - int error = 0, fmode = S_IFCHR|0600, i; - char *name; - - /* - * XXX Remove existing device nodes prior to creating new ones - * XXX using the assigned LKM device slot's major number. In a - * XXX perfect world we could use the ones specified by cdevsw[]. - */ - (void)ipf_remove(); - - bzero((char *)&ipfmain, sizeof(ipfmain)); - error = ipf_load_all(); - if (error != 0) - return error; - if (ipf_create_all(&ipfmain) == NULL) { - ipf_unload_all(); - return EIO; - } - - error = ipfattach(&ipfmain); - if (error != 0) { - (void) ipf_unload(); - return error; - } - - for (i = 0; (error == 0) && (name = ipf_devfiles[i]); i++) { -#if (__NetBSD_Version__ > 399001400) -# if (__NetBSD_Version__ > 499001400) - NDINIT(&nd, CREATE, LOCKPARENT, UIO_SYSSPACE, name); -# else - NDINIT(&nd, CREATE, LOCKPARENT, UIO_SYSSPACE, name, curlwp); -# endif -#else - NDINIT(&nd, CREATE, LOCKPARENT, UIO_SYSSPACE, name, curproc); -#endif - if ((error = namei(&nd))) - break; - if (nd.ni_vp != NULL) { - VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - if (nd.ni_dvp == nd.ni_vp) - vrele(nd.ni_dvp); - else - vput(nd.ni_dvp); - vrele(nd.ni_vp); - error = EEXIST; - break; - } - VATTR_NULL(&vattr); - vattr.va_type = VCHR; - vattr.va_mode = (fmode & 07777); - vattr.va_rdev = (ipf_major << 8) | i; -#if (__NetBSD_Version__ > 399001400) -# if (__NetBSD_Version__ >= 399002000) -# if (__NetBSD_Version__ < 499001400) - VOP_LEASE(nd.ni_dvp, curlwp, curlwp->l_cred, LEASE_WRITE); -# endif -# else - VOP_LEASE(nd.ni_dvp, curlwp, curlwp->l_proc->p_ucred, LEASE_WRITE); -# endif -#else - VOP_LEASE(nd.ni_dvp, curproc, curproc->p_ucred, LEASE_WRITE); -#endif - error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); - if (error == 0) - vput(nd.ni_vp); - } - - if (error == 0) { - char *defpass; - - if (FR_ISPASS(ipfmain.ipf_pass)) - defpass = "pass"; - else if (FR_ISBLOCK(ipfmain.ipf_pass)) - defpass = "block"; - else - defpass = "no-match -> block"; - - printf("%s initialized. Default = %s all, Logging = %s%s\n", - ipfilter_version, defpass, -#ifdef IPFILTER_LOG - "enabled", -#else - "disabled", -#endif -#ifdef IPFILTER_COMPILED - " (COMPILED)" -#else - "" -#endif - ); - ipfmain.ipf_running = 1; - } - return error; -} diff --git a/contrib/ipfilter/mln_rule.c b/contrib/ipfilter/mln_rule.c deleted file mode 100644 index 2df3376816b4..000000000000 --- a/contrib/ipfilter/mln_rule.c +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Copyright (C) 2012 by Darren Reed. - * - * See the IPFILTER.LICENCE file for details on licencing. - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "ip_compat.h" -#include "ip_fil.h" -#include "ip_rules.h" - - -static int ipfruleaction __P((struct lkm_table *, int)); - -#ifdef IPFILTER_LKM -# if NetBSD >= 199706 -int ipfrule_lkmentry __P((struct lkm_table *, int, int)); -# else -int xxxinit __P((struct lkm_table *, int, int)); -# endif - - -MOD_MISC("IPFilter Rules"); - -# if NetBSD >= 199706 -int ipfrule_lkmentry(lkmtp, cmd, ver) -# else -int xxxinit(lkmtp, cmd, ver) -# endif - struct lkm_table *lkmtp; - int cmd, ver; -{ - DISPATCH(lkmtp, cmd, ver, ipfruleaction, ipfruleaction, ipfruleaction); -} - -static int ipfruleaction(lkmtp, cmd) - struct lkm_table *lkmtp; - int cmd; -{ - int err = 0; - - switch (cmd) - { - case LKM_E_LOAD : - if (lkmexists(lkmtp)) - return EEXIST; - - err = ipfrule_add(); - if (!err) - ipf_refcnt++; - break; - case LKM_E_UNLOAD : - err = ipfrule_remove(); - if (!err) - ipf_refcnt--; - break; - case LKM_E_STAT : - break; - default: - err = EIO; - break; - } - return err; -} -#endif /* IPFILTER_LKM */ diff --git a/contrib/ipfilter/mlo_ipl.c b/contrib/ipfilter/mlo_ipl.c deleted file mode 100644 index 35556fa33f54..000000000000 --- a/contrib/ipfilter/mlo_ipl.c +++ /dev/null @@ -1,364 +0,0 @@ -/* $FreeBSD$ */ - -/* - * Copyright (C) 2012 by Darren Reed. - * - * See the IPFILTER.LICENCE file for details on licencing. - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "ipl.h" -#include "ip_compat.h" -#include "ip_fil.h" - -#define vn_lock(v,f) VOP_LOCK(v) - -#if !defined(VOP_LEASE) && defined(LEASE_CHECK) -#define VOP_LEASE LEASE_CHECK -#endif - - -extern int lkmenodev __P((void)); - -#if OpenBSD >= 200311 -int if_ipf_lkmentry __P((struct lkm_table *, int, int)); -#else -int if_ipf __P((struct lkm_table *, int, int)); -#endif -static int ipf_unload __P((void)); -static int ipf_load __P((void)); -static int ipf_remove __P((void)); -static int ipfaction __P((struct lkm_table *, int)); -static char *ipf_devfiles[] = { IPL_NAME, IPNAT_NAME, IPSTATE_NAME, - IPAUTH_NAME, IPSYNC_NAME, IPSCAN_NAME, - IPLOOKUP_NAME, NULL }; - - -struct cdevsw ipfdevsw = -{ - ipfopen, /* open */ - ipfclose, /* close */ - ipfread, /* read */ - (void *)nullop, /* write */ - ipfioctl, /* ioctl */ - (void *)nullop, /* stop */ - (void *)NULL, /* tty */ - (void *)nullop, /* select */ - (void *)nullop, /* mmap */ - NULL /* strategy */ -}; - -int ipf_major = 0; - -MOD_DEV(IPL_VERSION, LM_DT_CHAR, -1, &ipfdevsw); - -extern int vd_unuseddev __P((void)); -extern struct cdevsw cdevsw[]; -extern int nchrdev; - - -#if OpenBSD >= 200311 -int if_ipf_lkmentry (lkmtp, cmd, ver) -#else -int if_ipf(lkmtp, cmd, ver) -#endif - struct lkm_table *lkmtp; - int cmd, ver; -{ - DISPATCH(lkmtp, cmd, ver, ipfaction, ipfaction, ipfaction); -} - -int lkmexists __P((struct lkm_table *)); /* defined in /sys/kern/kern_lkm.c */ - -static int ipfaction(lkmtp, cmd) - struct lkm_table *lkmtp; - int cmd; -{ - int i; - struct lkm_dev *args = lkmtp->private.lkm_dev; - int err = 0; - - switch (cmd) - { - case LKM_E_LOAD : - if (lkmexists(lkmtp)) - return EEXIST; - - for (i = 0; i < nchrdev; i++) - if (cdevsw[i].d_open == (dev_type_open((*)))lkmenodev || - cdevsw[i].d_open == ipfopen) - break; - if (i == nchrdev) { - printf("IP Filter: No free cdevsw slots\n"); - return ENODEV; - } - - ipf_major = i; - args->lkm_offset = i; /* slot in cdevsw[] */ - printf("IP Filter: loaded into slot %d\n", ipf_major); - return ipf_load(); - case LKM_E_UNLOAD : - err = ipf_unload(); - if (!err) - printf("IP Filter: unloaded from slot %d\n", - ipf_major); - break; - case LKM_E_STAT : - break; - default: - err = EIO; - break; - } - return err; -} - - -static int ipf_remove() -{ - struct nameidata nd; - int error, i; - char *name; - - for (i = 0; (name = ipf_devfiles[i]); i++) { -#if OpenBSD >= 200311 - NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_SYSSPACE, - name, curproc); -#else - NDINIT(&nd, DELETE, LOCKPARENT, UIO_SYSSPACE, name, curproc); -#endif - if ((error = namei(&nd))) - return (error); - VOP_LEASE(nd.ni_vp, curproc, curproc->p_ucred, LEASE_WRITE); -#if OpenBSD < 200311 - VOP_LOCK(nd.ni_vp, LK_EXCLUSIVE | LK_RETRY, curproc); - VOP_LEASE(nd.ni_dvp, curproc, curproc->p_ucred, LEASE_WRITE); -#else - (void)uvm_vnp_uncache(nd.ni_vp); - - VOP_LEASE(nd.ni_dvp, curproc, curproc->p_ucred, LEASE_WRITE); - VOP_LEASE(nd.ni_vp, curproc, curproc->p_ucred, LEASE_WRITE); -#endif - (void) VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); - } - return 0; -} - - -static int ipf_unload() -{ - int error = 0; - - /* - * Unloading - remove the filter rule check from the IP - * input/output stream. - */ - if (ipf_refcnt) - error = EBUSY; - else if (ipf_running >= 0) - error = ipfdetach(); - - if (error == 0) { - ipf_running = -2; - error = ipf_remove(); - printf("%s unloaded\n", ipfilter_version); - } - return error; -} - - -static int ipf_load() -{ - struct nameidata nd; - struct vattr vattr; - int error = 0, fmode = S_IFCHR|0600, i; - char *name; - - /* - * XXX Remove existing device nodes prior to creating new ones - * XXX using the assigned LKM device slot's major number. In a - * XXX perfect world we could use the ones specified by cdevsw[]. - */ - (void)ipf_remove(); - - error = ipfattach(); - - for (i = 0; (error == 0) && (name = ipf_devfiles[i]); i++) { - NDINIT(&nd, CREATE, LOCKPARENT, UIO_SYSSPACE, name, curproc); - if ((error = namei(&nd))) - break; - if (nd.ni_vp != NULL) { - VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - if (nd.ni_dvp == nd.ni_vp) - vrele(nd.ni_dvp); - else - vput(nd.ni_dvp); - vrele(nd.ni_vp); - error = EEXIST; - break; - } - VATTR_NULL(&vattr); - vattr.va_type = VCHR; - vattr.va_mode = (fmode & 07777); - vattr.va_rdev = (ipf_major << 8) | i; - VOP_LEASE(nd.ni_dvp, curproc, curproc->p_ucred, LEASE_WRITE); - error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); - } - - if (error == 0) { - char *defpass; - - if (FR_ISPASS(ipf_pass)) - defpass = "pass"; - else if (FR_ISBLOCK(ipf_pass)) - defpass = "block"; - else - defpass = "no-match -> block"; - - printf("%s initialized. Default = %s all, Logging = %s%s\n", - ipfilter_version, defpass, -#ifdef IPFILTER_LOG - "enabled", -#else - "disabled", -#endif -#ifdef IPFILTER_COMPILED - " (COMPILED)" -#else - "" -#endif - ); - ipf_running = 1; - } - return error; -} - - -/* - * routines below for saving IP headers to buffer - */ -int -ipfopen(dev, flags, devtype, p) - dev_t dev; - int flags; - int devtype; - struct proc *p; -{ - u_int min = GET_MINOR(dev); - int error; - - if (IPL_LOGMAX < min) { - error = ENXIO; - } else { - switch (unit) - { - case IPL_LOGIPF : - case IPL_LOGNAT : - case IPL_LOGSTATE : - case IPL_LOGAUTH : - case IPL_LOGLOOKUP : - case IPL_LOGSYNC : -#ifdef IPFILTER_SCAN - case IPL_LOGSCAN : -#endif - error = 0; - break; - default : - error = ENXIO; - break; - } - } - return error; -} - - -int -ipfclose(dev, flags, devtype, p) - dev_t dev; - int flags; - int devtype; - struct proc *p; -{ - u_int min = GET_MINOR(dev); - - if (IPL_LOGMAX < min) - min = ENXIO; - else - min = 0; - return min; -} - - -/* - * ipfread/ipflog - * both of these must operate with at least splnet() lest they be - * called during packet processing and cause an inconsistancy to appear in - * the filter lists. - */ -int -ipfread(dev, uio, ioflag) - dev_t dev; - register struct uio *uio; - int ioflag; -{ - - if (ipf_running < 1) - return EIO; - - if (GET_MINOR(dev) == IPL_LOGSYNC) - return ipfsync_read(uio); - -#ifdef IPFILTER_LOG - return ipflog_read(GET_MINOR(dev), uio); -#else - return ENXIO; -#endif -} - - -/* - * ipfwrite - * both of these must operate with at least splnet() lest they be - * called during packet processing and cause an inconsistancy to appear in - * the filter lists. - */ -int -#if (BSD >= 199306) -ipfwrite(dev, uio, ioflag) - int ioflag; -#else -ipfwrite(dev, uio) -#endif - dev_t dev; - register struct uio *uio; -{ - - if (ipf_running < 1) - return EIO; - - if (GET_MINOR(dev) == IPL_LOGSYNC) - return ipfsync_write(uio); - return ENXIO; -} diff --git a/contrib/ipfilter/mlo_rule.c b/contrib/ipfilter/mlo_rule.c deleted file mode 100644 index dbd4305970ee..000000000000 --- a/contrib/ipfilter/mlo_rule.c +++ /dev/null @@ -1,80 +0,0 @@ -/* $FreeBSD$ */ - -/* - * Copyright (C) 2012 by Darren Reed. - * - * See the IPFILTER.LICENCE file for details on licencing. - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "ip_compat.h" -#include "ip_fil.h" -#include "ip_rules.h" - - -#ifdef IPFILTER_LKM - -static int ipfruleaction __P((struct lkm_table *, int)); - -int ipfrule __P((struct lkm_table *, int, int)); - - -MOD_MISC("IPFilter Rules"); - -int ipfrule(lkmtp, cmd, ver) - struct lkm_table *lkmtp; - int cmd, ver; -{ - DISPATCH(lkmtp, cmd, ver, ipfruleaction, ipfruleaction, ipfruleaction); -} - -int lkmexists __P((struct lkm_table *)); /* defined in /sys/kern/kern_lkm.c */ - -static int ipfruleaction(lkmtp, cmd) - struct lkm_table *lkmtp; - int cmd; -{ - int err = 0; - - switch (cmd) - { - case LKM_E_LOAD : - if (lkmexists(lkmtp)) - return EEXIST; - - err = ipfrule_add(); - if (!err) - ipf_refcnt++; - break; - case LKM_E_UNLOAD : - err = ipfrule_remove(); - if (!err) - ipf_refcnt--; - break; - case LKM_E_STAT : - break; - default: - err = EIO; - break; - } - return err; -} -#endif /* IPFILTER_LKM */ diff --git a/contrib/ipfilter/mls_ipl.c b/contrib/ipfilter/mls_ipl.c deleted file mode 100644 index 4388b617e631..000000000000 --- a/contrib/ipfilter/mls_ipl.c +++ /dev/null @@ -1,351 +0,0 @@ -/* $FreeBSD$ */ - -/* - * Copyright (C) 2012 by Darren Reed. - * - * See the IPFILTER.LICENCE file for details on licencing. - */ -/* - * 29/12/94 Added code from Marc Huber to allow it to allocate - * its own major char number! Way cool patch! - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#if defined(sun4c) || defined(sun4m) -# include -#endif -#include -#include -#include -#include -#include -#include -#include -#include "ipl.h" -#include "ip_compat.h" -#include "ip_fil.h" - - -#if !defined(lint) -static const char sccsid[] = "@(#)mls_ipl.c 2.6 10/15/95 (C) 1993-2000 Darren Reed"; -static const char rcsid[] = "@(#)$Id$"; -#endif - -extern int ipfdetach __P((void)); -#ifndef IPFILTER_LOG -#define ipfread nulldev -#endif -extern int nulldev __P((void)); -extern int errno; - -extern int nodev __P((void)); - -static int unload __P((void)); -static int ipf_attach __P((void)); -int xxxinit __P((u_int, struct vddrv *, caddr_t, struct vdstat *)); -static char *ipf_devfiles[] = { IPL_NAME, IPNAT_NAME, IPSTATE_NAME, - IPAUTH_NAME, IPSYNC_NAME, IPSCAN_NAME, - IPLOOKUP_NAME, NULL }; -static int ipfopen __P((dev_t, int)); -static int ipfclose __P((dev_t, int)); -static int ipfread __P((dev_t, struct uio *)); -static int ipfwrite __P((dev_t, struct uio *)); - - -struct cdevsw ipfdevsw = -{ - ipfopen, ipfclose, ipfread, nulldev, - ipfioctl, nulldev, nulldev, nulldev, - 0, nulldev, -}; - - -struct dev_ops ipf_ops = -{ - 1, - ipfidentify, - ipfattach, - ipfopen, - ipfclose, - ipfread, - ipfwrite, - NULL, /* strategy */ - NULL, /* dump */ - 0, /* psize */ - ipfioctl, - NULL, /* reset */ - NULL /* mmap */ -}; - -int ipf_major = 0; - -#ifdef sun4m -struct vdldrv vd = -{ - VDMAGIC_PSEUDO, - IPL_VERSION, - &ipf_ops, - NULL, - &ipfdevsw, - 0, - 0, - NULL, - NULL, - NULL, - 0, - 1, -}; -#else /* sun4m */ -struct vdldrv vd = -{ - VDMAGIC_PSEUDO, /* magic */ - IPL_VERSION, -#ifdef sun4c - &ipf_ops, /* dev_ops */ -#else - NULL, /* struct mb_ctlr *mb_ctlr */ - NULL, /* struct mb_driver *mb_driver */ - NULL, /* struct mb_device *mb_device */ - 0, /* num ctlrs */ - 1, /* numdevs */ -#endif /* sun4c */ - NULL, /* bdevsw */ - &ipfdevsw, /* cdevsw */ - 0, /* block major */ - 0, /* char major */ -}; -#endif /* sun4m */ - -extern int vd_unuseddev __P((void)); -extern struct cdevsw cdevsw[]; -extern int nchrdev; - -xxxinit(fc, vdp, data, vds) - u_int fc; - struct vddrv *vdp; - caddr_t data; - struct vdstat *vds; -{ - struct vdioctl_load *vdi = (struct vdioctl_load *)data; - - switch (fc) - { - case VDLOAD: - { - struct vdconf *vdc; - if (vdi && vdi->vdi_userconf) - for (vdc = vdi->vdi_userconf; vdc->vdc_type; vdc++) - if (vdc->vdc_type == VDCCHARMAJOR) { - ipf_major = vdc->vdc_data; - break; - } - - if (!ipf_major) { - while (ipf_major < nchrdev && - cdevsw[ipf_major].d_open != vd_unuseddev) - ipf_major++; - if (ipf_major == nchrdev) - return ENODEV; - } - vdp->vdd_vdtab = (struct vdlinkage *)&vd; - vd.Drv_charmajor = ipf_major; - return ipf_attach(); - } - case VDUNLOAD: - return unload(); - case VDSTAT: - return 0; - default: - return EIO; - } -} - - -static int -unload() -{ - int err = 0, i; - char *name; - - if (ipf_refcnt != 0) - err = EBUSY; - else if (ipf_running >= 0) - err = ipfdetach(); - if (err) - return err; - - ipf_running = -2; - for (i = 0; (name = ipf_devfiles[i]); i++) - (void) vn_remove(name, UIO_SYSSPACE, FILE); - printf("%s unloaded\n", ipfilter_version); - return 0; -} - - -static int -ipf_attach() -{ - struct vnode *vp; - struct vattr vattr; - int error = 0, fmode = S_IFCHR|0600, i; - char *name; - - error = ipfattach(); - if (error) - return error; - - for (i = 0; (name = ipf_devfiles[i]); i++) { - (void) vn_remove(name, UIO_SYSSPACE, FILE); - vattr_null(&vattr); - vattr.va_type = MFTOVT(fmode); - vattr.va_mode = (fmode & 07777); - vattr.va_rdev = (ipf_major << 8) | i; - - error = vn_create(name, UIO_SYSSPACE, &vattr, EXCL, 0, &vp); - if (error) { - printf("IP Filter: vn_create(%s) = %d\n", name, error); - break; - } else { - VN_RELE(vp); - } - } - - if (error == 0) { - char *defpass; - - if (FR_ISPASS(ipf_pass)) - defpass = "pass"; - else if (FR_ISBLOCK(ipf_pass)) - defpass = "block"; - else - defpass = "no-match -> block"; - - printf("%s initialized. Default = %s all, Logging = %s%s\n", - ipfilter_version, defpass, -#ifdef IPFILTER_LOG - "enabled", -#else - "disabled", -#endif -#ifdef IPFILTER_COMPILED - " (COMPILED)" -#else - "" -#endif - ); - ipf_running = 1; - } - return error; -} - - -/* - * routines below for saving IP headers to buffer - */ -static int -ipfopen(dev, flags) - dev_t dev; - int flags; -{ - u_int unit = GET_MINOR(dev); - int error; - - if (IPL_LOGMAX < unit) { - error = ENXIO; - } else { - switch (unit) - { - case IPL_LOGIPF : - case IPL_LOGNAT : - case IPL_LOGSTATE : - case IPL_LOGAUTH : - case IPL_LOGLOOKUP : - case IPL_LOGSYNC : -#ifdef IPFILTER_SCAN - case IPL_LOGSCAN : -#endif - error = 0; - break; - default : - error = ENXIO; - break; - } - } - return error; -} - - -static int -ipfclose(dev, flags) - dev_t dev; - int flags; -{ - u_int unit = GET_MINOR(dev); - - if (IPL_LOGMAX < unit) - unit = ENXIO; - else - unit = 0; - return unit; -} - - -/* - * ipfread/ipflog - * both of these must operate with at least splnet() lest they be - * called during packet processing and cause an inconsistancy to appear in - * the filter lists. - */ -static int -ipfread(dev, uio) - dev_t dev; - register struct uio *uio; -{ - - if (ipf_running < 1) { - ipfmain.ipf_interror = 130006; - return EIO; - } - -#ifdef IPFILTER_LOG - return ipflog_read(GET_MINOR(dev), uio); -#else - ipfmain.ipf_interror = 130007; - return ENXIO; -#endif -} - - -/* - * ipfwrite - */ -static int -ipfwrite(dev, uio) - dev_t dev; - register struct uio *uio; -{ - - if (ipf_running < 1) { - ipfmain.ipf_interror = 130008; - return EIO; - } - - if (getminor(dev) == IPL_LOGSYNC) - return ipfsync_write(uio); - ipfmain.ipf_interror = 130009; - return ENXIO; -} diff --git a/contrib/ipfilter/mls_rule.c b/contrib/ipfilter/mls_rule.c deleted file mode 100644 index e37df0c89314..000000000000 --- a/contrib/ipfilter/mls_rule.c +++ /dev/null @@ -1,116 +0,0 @@ -/* $FreeBSD$ */ - -/* - * Copyright (C) 2012 by Darren Reed. - * - * See the IPFILTER.LICENCE file for details on licencing. - */ -/* - * 29/12/94 Added code from Marc Huber to allow it to allocate - * its own major char number! Way cool patch! - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#if defined(sun4c) || defined(sun4m) -# include -#endif -#include -#include -#include -#include -#include -#include -#include -#include "ip_compat.h" -#include "ip_fil.h" -#include "ip_rules.h" - - -extern int errno; - - -int xxxinit __P((u_int, struct vddrv *, caddr_t, struct vdstat *)); - -int ipl_major = 0; - -#ifdef sun4m -struct vdldrv vd = -{ - VDMAGIC_USER, - "IP Filter rules", - NULL, - NULL, - NULL, - 0, - 0, - NULL, - NULL, - NULL, - 0, - 1, -}; -#else /* sun4m */ -struct vdldrv vd = -{ - VDMAGIC_USER, /* magic */ - "IP Filter rules", -#ifdef sun4c - NULL, /* dev_ops */ -#else - NULL, /* struct mb_ctlr *mb_ctlr */ - NULL, /* struct mb_driver *mb_driver */ - NULL, /* struct mb_device *mb_device */ - 0, /* num ctlrs */ - 1, /* numdevs */ -#endif /* sun4c */ - NULL, /* bdevsw */ - NULL, /* cdevsw */ - 0, /* block major */ - 0, /* char major */ -}; -#endif /* sun4m */ - - -xxxinit(fc, vdp, data, vds) - u_int fc; - struct vddrv *vdp; - caddr_t data; - struct vdstat *vds; -{ - struct vdioctl_load *vdi = (struct vdioctl_load *)data; - int err; - - switch (fc) - { - case VDLOAD: - err = ipfrule_add(); - if (!err) - ipf_refcnt++; - break; - case VDUNLOAD: - err = ipfrule_remove(); - if (!err) - ipf_refcnt--; - break; - case VDSTAT: - err = 0; - break; - default: - err = EIO; - break; - } -} diff --git a/contrib/ipfilter/mlso_rule.c b/contrib/ipfilter/mlso_rule.c deleted file mode 100644 index a9395f2d2f71..000000000000 --- a/contrib/ipfilter/mlso_rule.c +++ /dev/null @@ -1,130 +0,0 @@ -/* $FreeBSD$ */ - -/* - * Copyright (C) 2012 by Darren Reed. - * - * See the IPFILTER.LICENCE file for details on licencing. - */ -#pragma ident "@(#)$Id$" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#if SOLARIS2 >= 6 -# include -#endif -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "ip_compat.h" -#include "ip_fil.h" -#include "ip_rules.h" - -char _depends_on[] = "drv/ipf"; - - -extern ipf_main_softc_t ipfmain; -extern struct mod_ops mod_miscops; -static struct modlmisc ipfrulemod = { - &mod_miscops, - "IP Filter rules" -}; - -static struct modlinkage modlink1 = { - MODREV_1, - &ipfrulemod, - NULL -}; - - -int _init() -{ - int ipfruleinst; - - ipfruleinst = mod_install(&modlink1); -#ifdef IPFRULEDEBUG - cmn_err(CE_NOTE, "IP Filter Rules: _init() = %d", ipfruleinst); -#endif - - if (ipfruleinst == 0) { - if (ipfmain.ipf_running >= 0) { - ipfruleinst = ipfrule_add(); - if (!ipfruleinst) - ipfmain.ipf_refcnt++; - else { - cmn_err(CE_NOTE, - "IP Filter Rules: ipfrule_add failed"); - ipfruleinst = -1; - } - } else - ipfruleinst = -1; - } - if (ipfruleinst == 0) - cmn_err(CE_CONT, "IP Filter Rules: loaded\n"); - return ipfruleinst; -} - - -int _fini(void) -{ - int ipfruleinst; - - ipfruleinst = mod_remove(&modlink1); -#ifdef IPFRULEDEBUG - cmn_err(CE_NOTE, "IP Filter Rules: _fini() = %d", ipfruleinst); -#endif - if (ipfruleinst == 0) { - ipfruleinst = ipfrule_remove(); - if (!ipfruleinst) - ipfmain.ipf_refcnt--; - else - ipfruleinst = -1; - } - if (ipfruleinst == 0) - cmn_err(CE_CONT, "IP Filter Rules: unloaded\n"); - return ipfruleinst; -} - - -int _info(modinfop) - struct modinfo *modinfop; -{ - int ipfruleinst; - - ipfruleinst = mod_info(&modlink1, modinfop); -#ifdef IPFRULEDEBUG - cmn_err(CE_NOTE, "IP Filter Rules: _info(%x) = %x", - modinfop, ipfruleinst); -#endif - return ipfruleinst; -} From 4ca6f22e91487074b91133f90fe3b9aed2e4f606 Mon Sep 17 00:00:00 2001 From: Cy Schubert Date: Sun, 3 Feb 2019 05:26:10 +0000 Subject: [PATCH 88/90] new_kmem_alloc(9) is a Solaris/illumos malloc(9). FreeBSD and NetBSD never get here, however a test for SOLARIS, as redundant as this test is, serves to document that this is the illumos definition. This should help those who come after me to follow the code more easily. MFC after: 1 month --- sys/contrib/ipfilter/netinet/ip_compat.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/contrib/ipfilter/netinet/ip_compat.h b/sys/contrib/ipfilter/netinet/ip_compat.h index 03cd97e3c8ee..777495b39aed 100644 --- a/sys/contrib/ipfilter/netinet/ip_compat.h +++ b/sys/contrib/ipfilter/netinet/ip_compat.h @@ -590,7 +590,7 @@ MALLOC_DECLARE(M_IPFILTER); # define COPYOUT(a,b,c) (bcopy((caddr_t)(a), (caddr_t)(b), (c)), 0) # endif -# ifndef KMALLOC +# if SOLARIS && !defined(KMALLOC) # define KMALLOC(a,b) (a) = (b)new_kmem_alloc(sizeof(*(a)), \ KMEM_NOSLEEP) # define KMALLOCS(a,b,c) (a) = (b)new_kmem_alloc((c), KMEM_NOSLEEP) From 2335240c66f0c6742d5d488415d782ed791b76f9 Mon Sep 17 00:00:00 2001 From: Doug Rabson Date: Sun, 3 Feb 2019 08:15:26 +0000 Subject: [PATCH 89/90] Reduce log spam from rpc.statd This only reports failed attempts to contact hosts on the first attempt. --- usr.sbin/rpc.statd/file.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/usr.sbin/rpc.statd/file.c b/usr.sbin/rpc.statd/file.c index c2207c73aebf..ed5d00cc808c 100644 --- a/usr.sbin/rpc.statd/file.c +++ b/usr.sbin/rpc.statd/file.c @@ -248,9 +248,12 @@ void init_file(const char *filename) /* Purpose: Perform SM_NOTIFY procedure at specified host Returns: TRUE if success, FALSE if failed. + Notes: Only report failure if verbose is non-zero. Caller will + only set verbose to non-zero for the first attempt to + contact the host. */ -static int notify_one_host(char *hostname) +static int notify_one_host(char *hostname, int verbose) { struct timeval timeout = { 20, 0 }; /* 20 secs timeout */ CLIENT *cli; @@ -277,7 +280,8 @@ static int notify_one_host(char *hostname) (xdrproc_t)xdr_void, &dummy, timeout) != RPC_SUCCESS) { - syslog(LOG_ERR, "Failed to contact rpc.statd at host %s", hostname); + if (verbose) + syslog(LOG_ERR, "Failed to contact rpc.statd at host %s", hostname); clnt_destroy(cli); return (FALSE); } @@ -346,7 +350,7 @@ void notify_hosts(void) { if (hp->notifyReqd) { - if (notify_one_host(hp->hostname)) + if (notify_one_host(hp->hostname, attempts == 0)) { hp->notifyReqd = FALSE; sync_file(); From 3ca1c423aa157ebf21382543b03d84a64af38e47 Mon Sep 17 00:00:00 2001 From: Gleb Smirnoff Date: Sun, 3 Feb 2019 08:28:02 +0000 Subject: [PATCH 90/90] Teach pfil_ioctl() about VIMAGE. Submitted by: gallatin --- sys/net/pfil.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/sys/net/pfil.c b/sys/net/pfil.c index acfb25467fee..b46992148bd0 100644 --- a/sys/net/pfil.c +++ b/sys/net/pfil.c @@ -46,6 +46,8 @@ #include #include #include +#include +#include #include #include @@ -495,6 +497,7 @@ pfil_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, { int error; + CURVNET_SET(TD_TO_VNET(td)); error = 0; switch (cmd) { case PFILIOC_LISTHEADS: @@ -507,9 +510,10 @@ pfil_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, error = pfilioc_link((struct pfilioc_link *)addr); break; default: - return (EINVAL); + error = EINVAL; + break; } - + CURVNET_RESTORE(); return (error); }