freebsd-dev/sys/netinet6/sctp6_usrreq.c

1298 lines
33 KiB
C
Raw Normal View History

/*-
* Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* a) Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* b) Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the distribution.
*
* c) Neither the name of Cisco Systems, Inc. nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
/* $KAME: sctp6_usrreq.c,v 1.38 2005/08/24 08:08:56 suz Exp $ */
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <netinet/sctp_os.h>
#include <sys/proc.h>
#include <netinet/sctp_pcb.h>
#include <netinet/sctp_header.h>
#include <netinet/sctp_var.h>
#if defined(INET6)
#include <netinet6/sctp6_var.h>
#endif
#include <netinet/sctp_sysctl.h>
#include <netinet/sctp_output.h>
#include <netinet/sctp_uio.h>
#include <netinet/sctp_asconf.h>
#include <netinet/sctputil.h>
#include <netinet/sctp_indata.h>
#include <netinet/sctp_timer.h>
#include <netinet/sctp_auth.h>
#include <netinet/sctp_input.h>
#include <netinet/sctp_output.h>
#include <netinet/sctp_bsd_addr.h>
extern struct protosw inetsw[];
int
sctp6_input(i_pak, offp, proto)
struct mbuf **i_pak;
int *offp;
int proto;
{
struct mbuf *m;
struct ip6_hdr *ip6;
struct sctphdr *sh;
struct sctp_inpcb *in6p = NULL;
struct sctp_nets *net;
int refcount_up = 0;
uint32_t check, calc_check;
uint32_t vrf_id = 0;
struct inpcb *in6p_ip;
struct sctp_chunkhdr *ch;
int length, mlen, offset, iphlen;
uint8_t ecn_bits;
struct sctp_tcb *stcb = NULL;
- fixed autclose to not allow setting on 1-2-1 model. - bounded cookie-life to 1 second minimum in socket option set. - Delayed_ack_time becomes delayed_ack per new socket api document. - Improve port number selection, we now use low/high bounds and no chance of a endless loop. Only one call to random per bind as well. - fixes so set_peer_primary pre-screens addresses to be valid to this host. - maxseg did not allow setting on an assoc basis. We needed to thus track and use an association value instead of a inp value. - Fixed ep get of HB status to report back properly. - use settings flag to tell if assoc level hb is on off not the timer.. since the timer may still run if unconf address are present. - check for crazy ENABLE/DISABLE conditions. - set and get of pmtud (fixed path mtu) not always taking into account ovh. - Getting PMTU info on stcb only needs to return PMTUD_ENABLED if any net is doing PMTU discovery. - Panic or warning fixed to not do so when a valid ip frag is taking place. - sndrcvinfo appearing in both inp and stcb was full size, instead of the non-pad version. This saves about 92 bytes from each struct by carefully converting to use the smaller version. - one-2-one model get(maxseg) would always get ep value, never the tcb's value. - The delayed ack time could be under a tick, this fixes so it bounds it to at least 1 tick for platforms whos tick is more than a ms. - Fragment interleave level set to wrong default value. - Fragment interleave could not set level 0. - Defered stream reset was broken due to a guard check and ntohl issue. - Found two lock order reversals and fixed. - Tighten up address checking, if the user gives an address the sa_len had better be set properly. - Get asoc by assoc-id would return a locked tcb when it was asked not to if the tcb was in the restart hash. - sysctl to dig down and get more association details Reviewed by: gnn
2007-05-28 11:17:24 +00:00
int pkt_len = 0;
int off = *offp;
/* get the VRF and table id's */
if (SCTP_GET_PKT_VRFID(*i_pak, vrf_id)) {
SCTP_RELEASE_PKT(*i_pak);
return (-1);
}
m = SCTP_HEADER_TO_CHAIN(*i_pak);
- fixed autclose to not allow setting on 1-2-1 model. - bounded cookie-life to 1 second minimum in socket option set. - Delayed_ack_time becomes delayed_ack per new socket api document. - Improve port number selection, we now use low/high bounds and no chance of a endless loop. Only one call to random per bind as well. - fixes so set_peer_primary pre-screens addresses to be valid to this host. - maxseg did not allow setting on an assoc basis. We needed to thus track and use an association value instead of a inp value. - Fixed ep get of HB status to report back properly. - use settings flag to tell if assoc level hb is on off not the timer.. since the timer may still run if unconf address are present. - check for crazy ENABLE/DISABLE conditions. - set and get of pmtud (fixed path mtu) not always taking into account ovh. - Getting PMTU info on stcb only needs to return PMTUD_ENABLED if any net is doing PMTU discovery. - Panic or warning fixed to not do so when a valid ip frag is taking place. - sndrcvinfo appearing in both inp and stcb was full size, instead of the non-pad version. This saves about 92 bytes from each struct by carefully converting to use the smaller version. - one-2-one model get(maxseg) would always get ep value, never the tcb's value. - The delayed ack time could be under a tick, this fixes so it bounds it to at least 1 tick for platforms whos tick is more than a ms. - Fragment interleave level set to wrong default value. - Fragment interleave could not set level 0. - Defered stream reset was broken due to a guard check and ntohl issue. - Found two lock order reversals and fixed. - Tighten up address checking, if the user gives an address the sa_len had better be set properly. - Get asoc by assoc-id would return a locked tcb when it was asked not to if the tcb was in the restart hash. - sysctl to dig down and get more association details Reviewed by: gnn
2007-05-28 11:17:24 +00:00
pkt_len = SCTP_HEADER_LEN((*i_pak));
#ifdef SCTP_PACKET_LOGGING
sctp_packet_log(m, pkt_len);
#endif
ip6 = mtod(m, struct ip6_hdr *);
/* Ensure that (sctphdr + sctp_chunkhdr) in a row. */
- fixed autclose to not allow setting on 1-2-1 model. - bounded cookie-life to 1 second minimum in socket option set. - Delayed_ack_time becomes delayed_ack per new socket api document. - Improve port number selection, we now use low/high bounds and no chance of a endless loop. Only one call to random per bind as well. - fixes so set_peer_primary pre-screens addresses to be valid to this host. - maxseg did not allow setting on an assoc basis. We needed to thus track and use an association value instead of a inp value. - Fixed ep get of HB status to report back properly. - use settings flag to tell if assoc level hb is on off not the timer.. since the timer may still run if unconf address are present. - check for crazy ENABLE/DISABLE conditions. - set and get of pmtud (fixed path mtu) not always taking into account ovh. - Getting PMTU info on stcb only needs to return PMTUD_ENABLED if any net is doing PMTU discovery. - Panic or warning fixed to not do so when a valid ip frag is taking place. - sndrcvinfo appearing in both inp and stcb was full size, instead of the non-pad version. This saves about 92 bytes from each struct by carefully converting to use the smaller version. - one-2-one model get(maxseg) would always get ep value, never the tcb's value. - The delayed ack time could be under a tick, this fixes so it bounds it to at least 1 tick for platforms whos tick is more than a ms. - Fragment interleave level set to wrong default value. - Fragment interleave could not set level 0. - Defered stream reset was broken due to a guard check and ntohl issue. - Found two lock order reversals and fixed. - Tighten up address checking, if the user gives an address the sa_len had better be set properly. - Get asoc by assoc-id would return a locked tcb when it was asked not to if the tcb was in the restart hash. - sysctl to dig down and get more association details Reviewed by: gnn
2007-05-28 11:17:24 +00:00
IP6_EXTHDR_GET(sh, struct sctphdr *, m, off,
(int)(sizeof(*sh) + sizeof(*ch)));
if (sh == NULL) {
SCTP_STAT_INCR(sctps_hdrops);
return IPPROTO_DONE;
}
ch = (struct sctp_chunkhdr *)((caddr_t)sh + sizeof(struct sctphdr));
iphlen = off;
offset = iphlen + sizeof(*sh) + sizeof(*ch);
#if defined(NFAITH) && NFAITH > 0
if (faithprefix_p != NULL && (*faithprefix_p) (&ip6->ip6_dst)) {
/* XXX send icmp6 host/port unreach? */
goto bad;
}
#endif /* NFAITH defined and > 0 */
SCTP_STAT_INCR(sctps_recvpackets);
SCTP_STAT_INCR_COUNTER64(sctps_inpackets);
SCTPDBG(SCTP_DEBUG_INPUT1, "V6 input gets a packet iphlen:%d pktlen:%d\n",
- fixed autclose to not allow setting on 1-2-1 model. - bounded cookie-life to 1 second minimum in socket option set. - Delayed_ack_time becomes delayed_ack per new socket api document. - Improve port number selection, we now use low/high bounds and no chance of a endless loop. Only one call to random per bind as well. - fixes so set_peer_primary pre-screens addresses to be valid to this host. - maxseg did not allow setting on an assoc basis. We needed to thus track and use an association value instead of a inp value. - Fixed ep get of HB status to report back properly. - use settings flag to tell if assoc level hb is on off not the timer.. since the timer may still run if unconf address are present. - check for crazy ENABLE/DISABLE conditions. - set and get of pmtud (fixed path mtu) not always taking into account ovh. - Getting PMTU info on stcb only needs to return PMTUD_ENABLED if any net is doing PMTU discovery. - Panic or warning fixed to not do so when a valid ip frag is taking place. - sndrcvinfo appearing in both inp and stcb was full size, instead of the non-pad version. This saves about 92 bytes from each struct by carefully converting to use the smaller version. - one-2-one model get(maxseg) would always get ep value, never the tcb's value. - The delayed ack time could be under a tick, this fixes so it bounds it to at least 1 tick for platforms whos tick is more than a ms. - Fragment interleave level set to wrong default value. - Fragment interleave could not set level 0. - Defered stream reset was broken due to a guard check and ntohl issue. - Found two lock order reversals and fixed. - Tighten up address checking, if the user gives an address the sa_len had better be set properly. - Get asoc by assoc-id would return a locked tcb when it was asked not to if the tcb was in the restart hash. - sysctl to dig down and get more association details Reviewed by: gnn
2007-05-28 11:17:24 +00:00
iphlen, pkt_len);
if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
/* No multi-cast support in SCTP */
goto bad;
}
/* destination port of 0 is illegal, based on RFC2960. */
if (sh->dest_port == 0)
goto bad;
if ((sctp_no_csum_on_loopback == 0) ||
(!SCTP_IS_IT_LOOPBACK(m))) {
/*
* we do NOT validate things from the loopback if the sysctl
* is set to 1.
*/
check = sh->checksum; /* save incoming checksum */
if ((check == 0) && (sctp_no_csum_on_loopback)) {
/*
* special hook for where we got a local address
* somehow routed across a non IFT_LOOP type
* interface
*/
if (IN6_ARE_ADDR_EQUAL(&ip6->ip6_src, &ip6->ip6_dst))
goto sctp_skip_csum;
}
sh->checksum = 0; /* prepare for calc */
calc_check = sctp_calculate_sum(m, &mlen, iphlen);
if (calc_check != check) {
SCTPDBG(SCTP_DEBUG_INPUT1, "Bad CSUM on SCTP packet calc_check:%x check:%x m:%p mlen:%d iphlen:%d\n",
calc_check, check, m, mlen, iphlen);
stcb = sctp_findassociation_addr(m, iphlen, offset - sizeof(*ch),
sh, ch, &in6p, &net, vrf_id);
/* in6p's ref-count increased && stcb locked */
if ((in6p) && (stcb)) {
sctp_send_packet_dropped(stcb, net, m, iphlen, 1);
sctp_chunk_output((struct sctp_inpcb *)in6p, stcb, 2);
} else if ((in6p != NULL) && (stcb == NULL)) {
refcount_up = 1;
}
SCTP_STAT_INCR(sctps_badsum);
SCTP_STAT_INCR_COUNTER32(sctps_checksumerrors);
goto bad;
}
sh->checksum = calc_check;
}
sctp_skip_csum:
net = NULL;
/*
* Locate pcb and tcb for datagram sctp_findassociation_addr() wants
* IP/SCTP/first chunk header...
*/
stcb = sctp_findassociation_addr(m, iphlen, offset - sizeof(*ch),
sh, ch, &in6p, &net, vrf_id);
/* in6p's ref-count increased */
if (in6p == NULL) {
struct sctp_init_chunk *init_chk, chunk_buf;
SCTP_STAT_INCR(sctps_noport);
if (ch->chunk_type == SCTP_INITIATION) {
/*
* we do a trick here to get the INIT tag, dig in
* and get the tag from the INIT and put it in the
* common header.
*/
init_chk = (struct sctp_init_chunk *)sctp_m_getptr(m,
iphlen + sizeof(*sh), sizeof(*init_chk),
(uint8_t *) & chunk_buf);
if (init_chk)
sh->v_tag = init_chk->init.initiate_tag;
else
sh->v_tag = 0;
}
if (ch->chunk_type == SCTP_SHUTDOWN_ACK) {
sctp_send_shutdown_complete2(m, iphlen, sh, vrf_id);
goto bad;
}
if (ch->chunk_type == SCTP_SHUTDOWN_COMPLETE) {
goto bad;
}
if (ch->chunk_type != SCTP_ABORT_ASSOCIATION)
sctp_send_abort(m, iphlen, sh, 0, NULL, vrf_id);
goto bad;
} else if (stcb == NULL) {
refcount_up = 1;
}
in6p_ip = (struct inpcb *)in6p;
#ifdef IPSEC
/*
* Check AH/ESP integrity.
*/
if (in6p_ip && (ipsec6_in_reject(m, in6p_ip))) {
/* XXX */
ipsec6stat.in_polvio++;
goto bad;
}
#endif /* IPSEC */
/*
* CONTROL chunk processing
*/
offset -= sizeof(*ch);
ecn_bits = ((ntohl(ip6->ip6_flow) >> 20) & 0x000000ff);
/* Length now holds the total packet length payload + iphlen */
length = ntohs(ip6->ip6_plen) + iphlen;
/* sa_ignore NO_NULL_CHK */
sctp_common_input_processing(&m, iphlen, offset, length, sh, ch,
in6p, stcb, net, ecn_bits, vrf_id);
/* inp's ref-count reduced && stcb unlocked */
/* XXX this stuff below gets moved to appropriate parts later... */
if (m)
sctp_m_freem(m);
if ((in6p) && refcount_up) {
/* reduce ref-count */
SCTP_INP_WLOCK(in6p);
SCTP_INP_DECR_REF(in6p);
SCTP_INP_WUNLOCK(in6p);
}
return IPPROTO_DONE;
bad:
if (stcb) {
SCTP_TCB_UNLOCK(stcb);
}
if ((in6p) && refcount_up) {
/* reduce ref-count */
SCTP_INP_WLOCK(in6p);
SCTP_INP_DECR_REF(in6p);
SCTP_INP_WUNLOCK(in6p);
}
if (m)
sctp_m_freem(m);
return IPPROTO_DONE;
}
static void
sctp6_notify_mbuf(struct sctp_inpcb *inp,
struct icmp6_hdr *icmp6,
struct sctphdr *sh,
struct sctp_tcb *stcb,
struct sctp_nets *net)
{
uint32_t nxtsz;
if ((inp == NULL) || (stcb == NULL) || (net == NULL) ||
(icmp6 == NULL) || (sh == NULL)) {
goto out;
}
/* First do we even look at it? */
if (ntohl(sh->v_tag) != (stcb->asoc.peer_vtag))
goto out;
if (icmp6->icmp6_type != ICMP6_PACKET_TOO_BIG) {
/* not PACKET TO BIG */
goto out;
}
/*
* ok we need to look closely. We could even get smarter and look at
* anyone that we sent to in case we get a different ICMP that tells
* us there is no way to reach a host, but for this impl, all we
* care about is MTU discovery.
*/
nxtsz = ntohl(icmp6->icmp6_mtu);
/* Stop any PMTU timer */
sctp_timer_stop(SCTP_TIMER_TYPE_PATHMTURAISE, inp, stcb, NULL, SCTP_FROM_SCTP6_USRREQ + SCTP_LOC_1);
/* Adjust destination size limit */
if (net->mtu > nxtsz) {
net->mtu = nxtsz;
}
/* now what about the ep? */
if (stcb->asoc.smallest_mtu > nxtsz) {
struct sctp_tmit_chunk *chk;
/* Adjust that too */
stcb->asoc.smallest_mtu = nxtsz;
/* now off to subtract IP_DF flag if needed */
TAILQ_FOREACH(chk, &stcb->asoc.send_queue, sctp_next) {
if ((uint32_t) (chk->send_size + IP_HDR_SIZE) > nxtsz) {
chk->flags |= CHUNK_FLAGS_FRAGMENT_OK;
}
}
TAILQ_FOREACH(chk, &stcb->asoc.sent_queue, sctp_next) {
if ((uint32_t) (chk->send_size + IP_HDR_SIZE) > nxtsz) {
/*
* For this guy we also mark for immediate
* resend since we sent to big of chunk
*/
chk->flags |= CHUNK_FLAGS_FRAGMENT_OK;
if (chk->sent != SCTP_DATAGRAM_RESEND)
stcb->asoc.sent_queue_retran_cnt++;
chk->sent = SCTP_DATAGRAM_RESEND;
chk->rec.data.doing_fast_retransmit = 0;
chk->sent = SCTP_DATAGRAM_RESEND;
/* Clear any time so NO RTT is being done */
chk->sent_rcv_time.tv_sec = 0;
chk->sent_rcv_time.tv_usec = 0;
stcb->asoc.total_flight -= chk->send_size;
net->flight_size -= chk->send_size;
}
}
}
sctp_timer_start(SCTP_TIMER_TYPE_PATHMTURAISE, inp, stcb, NULL);
out:
if (stcb) {
SCTP_TCB_UNLOCK(stcb);
}
}
void
sctp6_ctlinput(cmd, pktdst, d)
int cmd;
struct sockaddr *pktdst;
void *d;
{
struct sctphdr sh;
struct ip6ctlparam *ip6cp = NULL;
uint32_t vrf_id;
int cm;
vrf_id = SCTP_DEFAULT_VRFID;
if (pktdst->sa_family != AF_INET6 ||
pktdst->sa_len != sizeof(struct sockaddr_in6))
return;
if ((unsigned)cmd >= PRC_NCMDS)
return;
if (PRC_IS_REDIRECT(cmd)) {
d = NULL;
} else if (inet6ctlerrmap[cmd] == 0) {
return;
}
/* if the parameter is from icmp6, decode it. */
if (d != NULL) {
ip6cp = (struct ip6ctlparam *)d;
} else {
ip6cp = (struct ip6ctlparam *)NULL;
}
if (ip6cp) {
/*
* XXX: We assume that when IPV6 is non NULL, M and OFF are
* valid.
*/
/* check if we can safely examine src and dst ports */
struct sctp_inpcb *inp = NULL;
struct sctp_tcb *stcb = NULL;
struct sctp_nets *net = NULL;
struct sockaddr_in6 final;
if (ip6cp->ip6c_m == NULL)
return;
bzero(&sh, sizeof(sh));
bzero(&final, sizeof(final));
inp = NULL;
net = NULL;
m_copydata(ip6cp->ip6c_m, ip6cp->ip6c_off, sizeof(sh),
(caddr_t)&sh);
ip6cp->ip6c_src->sin6_port = sh.src_port;
final.sin6_len = sizeof(final);
final.sin6_family = AF_INET6;
final.sin6_addr = ((struct sockaddr_in6 *)pktdst)->sin6_addr;
final.sin6_port = sh.dest_port;
stcb = sctp_findassociation_addr_sa((struct sockaddr *)ip6cp->ip6c_src,
(struct sockaddr *)&final,
&inp, &net, 1, vrf_id);
/* inp's ref-count increased && stcb locked */
if (stcb != NULL && inp && (inp->sctp_socket != NULL)) {
if (cmd == PRC_MSGSIZE) {
sctp6_notify_mbuf(inp,
ip6cp->ip6c_icmp6,
&sh,
stcb,
net);
/* inp's ref-count reduced && stcb unlocked */
} else {
if (cmd == PRC_HOSTDEAD) {
cm = EHOSTUNREACH;
} else {
cm = inet6ctlerrmap[cmd];
}
sctp_notify(inp, cm, &sh,
(struct sockaddr *)&final,
stcb, net);
/* inp's ref-count reduced && stcb unlocked */
}
} else {
if (PRC_IS_REDIRECT(cmd) && inp) {
in6_rtchange((struct in6pcb *)inp,
inet6ctlerrmap[cmd]);
}
if (inp) {
/* reduce inp's ref-count */
SCTP_INP_WLOCK(inp);
SCTP_INP_DECR_REF(inp);
SCTP_INP_WUNLOCK(inp);
}
if (stcb)
SCTP_TCB_UNLOCK(stcb);
}
}
}
/*
* this routine can probably be collasped into the one in sctp_userreq.c
* since they do the same thing and now we lookup with a sockaddr
*/
static int
sctp6_getcred(SYSCTL_HANDLER_ARGS)
{
struct xucred xuc;
struct sockaddr_in6 addrs[2];
struct sctp_inpcb *inp;
struct sctp_nets *net;
struct sctp_tcb *stcb;
int error;
uint32_t vrf_id;
vrf_id = SCTP_DEFAULT_VRFID;
error = priv_check(req->td, PRIV_NETINET_GETCRED);
if (error)
return (error);
if (req->newlen != sizeof(addrs))
return (EINVAL);
if (req->oldlen != sizeof(struct ucred))
return (EINVAL);
error = SYSCTL_IN(req, addrs, sizeof(addrs));
if (error)
return (error);
stcb = sctp_findassociation_addr_sa(sin6tosa(&addrs[0]),
sin6tosa(&addrs[1]),
&inp, &net, 1, vrf_id);
if (stcb == NULL || inp == NULL || inp->sctp_socket == NULL) {
if ((inp != NULL) && (stcb == NULL)) {
/* reduce ref-count */
SCTP_INP_WLOCK(inp);
SCTP_INP_DECR_REF(inp);
goto cred_can_cont;
}
error = ENOENT;
goto out;
}
SCTP_TCB_UNLOCK(stcb);
/*
* We use the write lock here, only since in the error leg we need
* it. If we used RLOCK, then we would have to
* wlock/decr/unlock/rlock. Which in theory could create a hole.
* Better to use higher wlock.
*/
SCTP_INP_WLOCK(inp);
cred_can_cont:
error = cr_canseesocket(req->td->td_ucred, inp->sctp_socket);
if (error) {
SCTP_INP_WUNLOCK(inp);
goto out;
}
cru2x(inp->sctp_socket->so_cred, &xuc);
SCTP_INP_WUNLOCK(inp);
error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred));
out:
return (error);
}
SYSCTL_PROC(_net_inet6_sctp6, OID_AUTO, getcred, CTLTYPE_OPAQUE | CTLFLAG_RW,
0, 0,
sctp6_getcred, "S,ucred", "Get the ucred of a SCTP6 connection");
/* This is the same as the sctp_abort() could be made common */
static void
sctp6_abort(struct socket *so)
{
struct sctp_inpcb *inp;
uint32_t flags;
inp = (struct sctp_inpcb *)so->so_pcb;
if (inp == 0)
return;
sctp_must_try_again:
flags = inp->sctp_flags;
#ifdef SCTP_LOG_CLOSING
sctp_log_closing(inp, NULL, 17);
#endif
if (((flags & SCTP_PCB_FLAGS_SOCKET_GONE) == 0) &&
(atomic_cmpset_int(&inp->sctp_flags, flags, (flags | SCTP_PCB_FLAGS_SOCKET_GONE | SCTP_PCB_FLAGS_CLOSE_IP)))) {
#ifdef SCTP_LOG_CLOSING
sctp_log_closing(inp, NULL, 16);
#endif
sctp_inpcb_free(inp, SCTP_FREE_SHOULD_USE_ABORT,
SCTP_CALLED_AFTER_CMPSET_OFCLOSE);
SOCK_LOCK(so);
SCTP_SB_CLEAR(so->so_snd);
/*
* same for the rcv ones, they are only here for the
* accounting/select.
*/
SCTP_SB_CLEAR(so->so_rcv);
/* Now null out the reference, we are completely detached. */
so->so_pcb = NULL;
SOCK_UNLOCK(so);
} else {
flags = inp->sctp_flags;
if ((flags & SCTP_PCB_FLAGS_SOCKET_GONE) == 0) {
goto sctp_must_try_again;
}
}
return;
}
static int
sctp6_attach(struct socket *so, int proto, struct thread *p)
{
struct in6pcb *inp6;
int error;
struct sctp_inpcb *inp;
uint32_t vrf_id = SCTP_DEFAULT_VRFID;
inp = (struct sctp_inpcb *)so->so_pcb;
if (inp != NULL)
return EINVAL;
if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
error = SCTP_SORESERVE(so, sctp_sendspace, sctp_recvspace);
if (error)
return error;
}
error = sctp_inpcb_alloc(so, vrf_id);
if (error)
return error;
inp = (struct sctp_inpcb *)so->so_pcb;
SCTP_INP_WLOCK(inp);
inp->sctp_flags |= SCTP_PCB_FLAGS_BOUND_V6; /* I'm v6! */
inp6 = (struct in6pcb *)inp;
inp6->inp_vflag |= INP_IPV6;
inp6->in6p_hops = -1; /* use kernel default */
inp6->in6p_cksum = -1; /* just to be sure */
#ifdef INET
/*
* XXX: ugly!! IPv4 TTL initialization is necessary for an IPv6
* socket as well, because the socket may be bound to an IPv6
* wildcard address, which may match an IPv4-mapped IPv6 address.
*/
inp6->inp_ip_ttl = ip_defttl;
#endif
/*
* Hmm what about the IPSEC stuff that is missing here but in
* sctp_attach()?
*/
SCTP_INP_WUNLOCK(inp);
return 0;
}
static int
sctp6_bind(struct socket *so, struct sockaddr *addr, struct thread *p)
{
struct sctp_inpcb *inp;
struct in6pcb *inp6;
int error;
inp = (struct sctp_inpcb *)so->so_pcb;
if (inp == 0)
return EINVAL;
- fixed autclose to not allow setting on 1-2-1 model. - bounded cookie-life to 1 second minimum in socket option set. - Delayed_ack_time becomes delayed_ack per new socket api document. - Improve port number selection, we now use low/high bounds and no chance of a endless loop. Only one call to random per bind as well. - fixes so set_peer_primary pre-screens addresses to be valid to this host. - maxseg did not allow setting on an assoc basis. We needed to thus track and use an association value instead of a inp value. - Fixed ep get of HB status to report back properly. - use settings flag to tell if assoc level hb is on off not the timer.. since the timer may still run if unconf address are present. - check for crazy ENABLE/DISABLE conditions. - set and get of pmtud (fixed path mtu) not always taking into account ovh. - Getting PMTU info on stcb only needs to return PMTUD_ENABLED if any net is doing PMTU discovery. - Panic or warning fixed to not do so when a valid ip frag is taking place. - sndrcvinfo appearing in both inp and stcb was full size, instead of the non-pad version. This saves about 92 bytes from each struct by carefully converting to use the smaller version. - one-2-one model get(maxseg) would always get ep value, never the tcb's value. - The delayed ack time could be under a tick, this fixes so it bounds it to at least 1 tick for platforms whos tick is more than a ms. - Fragment interleave level set to wrong default value. - Fragment interleave could not set level 0. - Defered stream reset was broken due to a guard check and ntohl issue. - Found two lock order reversals and fixed. - Tighten up address checking, if the user gives an address the sa_len had better be set properly. - Get asoc by assoc-id would return a locked tcb when it was asked not to if the tcb was in the restart hash. - sysctl to dig down and get more association details Reviewed by: gnn
2007-05-28 11:17:24 +00:00
if (addr) {
if ((addr->sa_family == AF_INET6) &&
(addr->sa_len != sizeof(struct sockaddr_in6))) {
return EINVAL;
}
if ((addr->sa_family == AF_INET) &&
(addr->sa_len != sizeof(struct sockaddr_in))) {
return EINVAL;
}
}
inp6 = (struct in6pcb *)inp;
inp6->inp_vflag &= ~INP_IPV4;
inp6->inp_vflag |= INP_IPV6;
- Macroizes the V6ONLY flag check. - Added a short time wait (not used yet) constant - Corrected the type of the crc32c table (it was unsigned long and really is a uint32_t - Got rid of the user of MHeaders until they are truely needed by lower layers. - Fixed an initialization problem in the readq structure (ordering was off). - Found yet another collision bug when the random number generator returns two numbers on one side (during a collision) that are the same. Also added some tracking of cookies that will go away when we know that we have the last collision bug gone. - Fixed an init bug for book_size_scale, that was causing Early FR code to run when it should not. - Fixed a flight size tracking bug that was associated with Early FR but due to above bug also effected all FR's - Fixed it so Max Burst also will apply to Fast Retransmit. - Fixed a bug in the temporary logging code that allowed a static log array overflow - hashinit_flags is now used. - Two last mcopym's were converted to the macro sctp_m_copym that has always been used by all other places - macro sctp_m_copym was converted to upper case. - We now validate sinfo_flags on input (we did not before). - Fixed a bug that prevented a user from sending data and immediately shuting down with one send operation. - Moved to use hashdestroy instead of free() in our macros. - Fixed an init problem in our timed_wait vtag where we did not fully initialize our time-wait blocks. - Timer stops were re-positioned. - A pcb cleanup method was added, however this probably will not be used in BSD.. unless we make module loadable protocols - I think this fixes the mysterious timer bug.. it was a ordering of locks problem in the way we did timers. It now conforms to the timeout(9) manual (except for the _drain part, we had to do this a different way due to locks). - Fixed error return code so we get either CONNREUSED or CONNRESET depending on where one is in progression - Purged an unused clone macro. - Fixed a read erro code issue where we were NOT getting the proper error when the connection was reset. - Purged an unused clone macro. - Fixed a read erro code issue where we were NOT getting the proper error when the connection was reset. Approved by: gnn
2007-01-15 15:12:10 +00:00
if ((addr != NULL) && (SCTP_IPV6_V6ONLY(inp6) == 0)) {
if (addr->sa_family == AF_INET) {
/* binding v4 addr to v6 socket, so reset flags */
inp6->inp_vflag |= INP_IPV4;
inp6->inp_vflag &= ~INP_IPV6;
} else {
struct sockaddr_in6 *sin6_p;
sin6_p = (struct sockaddr_in6 *)addr;
if (IN6_IS_ADDR_UNSPECIFIED(&sin6_p->sin6_addr)) {
inp6->inp_vflag |= INP_IPV4;
} else if (IN6_IS_ADDR_V4MAPPED(&sin6_p->sin6_addr)) {
struct sockaddr_in sin;
in6_sin6_2_sin(&sin, sin6_p);
inp6->inp_vflag |= INP_IPV4;
inp6->inp_vflag &= ~INP_IPV6;
error = sctp_inpcb_bind(so, (struct sockaddr *)&sin, NULL, p);
return error;
}
}
} else if (addr != NULL) {
/* IPV6_V6ONLY socket */
if (addr->sa_family == AF_INET) {
/* can't bind v4 addr to v6 only socket! */
return EINVAL;
} else {
struct sockaddr_in6 *sin6_p;
sin6_p = (struct sockaddr_in6 *)addr;
if (IN6_IS_ADDR_V4MAPPED(&sin6_p->sin6_addr))
/* can't bind v4-mapped addrs either! */
/* NOTE: we don't support SIIT */
return EINVAL;
}
}
error = sctp_inpcb_bind(so, addr, NULL, p);
return error;
}
static void
sctp6_close(struct socket *so)
{
struct sctp_inpcb *inp;
uint32_t flags;
inp = (struct sctp_inpcb *)so->so_pcb;
if (inp == 0)
return;
/*
* Inform all the lower layer assoc that we are done.
*/
sctp_must_try_again:
flags = inp->sctp_flags;
#ifdef SCTP_LOG_CLOSING
sctp_log_closing(inp, NULL, 17);
#endif
if (((flags & SCTP_PCB_FLAGS_SOCKET_GONE) == 0) &&
(atomic_cmpset_int(&inp->sctp_flags, flags, (flags | SCTP_PCB_FLAGS_SOCKET_GONE | SCTP_PCB_FLAGS_CLOSE_IP)))) {
if (((so->so_options & SO_LINGER) && (so->so_linger == 0)) ||
(so->so_rcv.sb_cc > 0)) {
#ifdef SCTP_LOG_CLOSING
sctp_log_closing(inp, NULL, 13);
#endif
sctp_inpcb_free(inp, SCTP_FREE_SHOULD_USE_ABORT
,SCTP_CALLED_AFTER_CMPSET_OFCLOSE);
} else {
#ifdef SCTP_LOG_CLOSING
sctp_log_closing(inp, NULL, 14);
#endif
sctp_inpcb_free(inp, SCTP_FREE_SHOULD_USE_GRACEFUL_CLOSE,
SCTP_CALLED_AFTER_CMPSET_OFCLOSE);
}
/*
* The socket is now detached, no matter what the state of
* the SCTP association.
*/
SOCK_LOCK(so);
SCTP_SB_CLEAR(so->so_snd);
/*
* same for the rcv ones, they are only here for the
* accounting/select.
*/
SCTP_SB_CLEAR(so->so_rcv);
/* Now null out the reference, we are completely detached. */
so->so_pcb = NULL;
SOCK_UNLOCK(so);
} else {
flags = inp->sctp_flags;
if ((flags & SCTP_PCB_FLAGS_SOCKET_GONE) == 0) {
goto sctp_must_try_again;
}
}
return;
}
/* This could be made common with sctp_detach() since they are identical */
static
int
sctp6_disconnect(struct socket *so)
{
struct sctp_inpcb *inp;
inp = (struct sctp_inpcb *)so->so_pcb;
if (inp == NULL) {
return (ENOTCONN);
}
SCTP_INP_RLOCK(inp);
if (inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) {
- Copyright updates (aka 2007) - ZONE get now also take a type cast so it does the cast like mtod does. - New macro SCTP_LIST_EMPTY, which in bsd is just LIST_EMPTY - Removal of const in some of the static hmac functions (not needed) - Store length changes to allow for new fields in auth - Auth code updated to current draft (this should be the RFC version we think). - use uint8_t instead of u_char in LOOPBACK address comparison - Some u_int32_t converted to uint32_t (in crc code) - A bug was found in the mib counts for ordered/unordered count, this was fixed (was referencing a freed mbuf). - SCTP_ASOCLOG_OF_TSNS added (code will probably disappear after my testing completes. It allows us to keep a small log on each assoc of the last 40 TSN's in/out and stream assignment. It is NOT in options and so is only good for private builds. - Some CMT changes in prep for Jana fixing his problem with reneging when CMT is enabled (Concurrent Multipath Transfer = CMT). - Some missing mib stats added. - Correction to number of open assoc's count in mib - Correction to os_bsd.h to get right sha2 macros - Add of special AUTH_04 flags so you can compile the code with the old format (in case the peer does not yet support the latest auth code). - Nonce sum was incorrectly being set in when ecn_nonce was NOT on. - LOR in listen with implicit bind found and fixed. - Moved away from using mbuf's for socket options to using just data pointers. The mbufs were used to harmonize NetBSD code since both Net and Open used this method. We have decided to move away from that and more conform to FreeBSD style (which makes more sense). - Very very nasty bug found in some of my "debug" code. The cookie_how collision case tracking had an endless loop in it if you got a second retransmission of a cookie collision case. This would lock up a CPU .. ugly.. - auth function goes to using size_t instead of int which conforms to socketapi better - Found the nasty bug that happens after 9 days of testing.. you get the data chunk, deliver it and due to the reference to a ch-> that every now and then has been deleted (depending on the postion in the mbuf) you have an invalid ch->ch.flags.. and thus you don't advance the stream sequence number.. so you block the stream permanently. The fix is to make local variables of these guys and set them up before you have any chance of trimming the mbuf. - style fix in sctp_util.h, not sure how this got bad maybe in the last patch? (aka it may not be in the real source). - Found interesting bug when using the extended snd/rcv info where we would get an error on receiving with this. Thats because it was NOT padded to the same size as the snd_rcv info. We increase (add the pad) so the two structs are the same size in sctp_uio.h - In sctp_usrreq.c one of the most common things we did for socket options was to cast the pointer and validate the size. This as been macro-ized to help make the code more readable. - in sctputil.c two things, the socketapi class found a missing flag type (the next msg is a notification) and a missing scope recovery was also fixed. Reviewed by: gnn
2007-02-12 23:24:31 +00:00
if (SCTP_LIST_EMPTY(&inp->sctp_asoc_list)) {
/* No connection */
SCTP_INP_RUNLOCK(inp);
return (ENOTCONN);
} else {
int some_on_streamwheel = 0;
struct sctp_association *asoc;
struct sctp_tcb *stcb;
stcb = LIST_FIRST(&inp->sctp_asoc_list);
if (stcb == NULL) {
SCTP_INP_RUNLOCK(inp);
return (EINVAL);
}
SCTP_TCB_LOCK(stcb);
asoc = &stcb->asoc;
if (((so->so_options & SO_LINGER) &&
(so->so_linger == 0)) ||
(so->so_rcv.sb_cc > 0)) {
if (SCTP_GET_STATE(asoc) !=
SCTP_STATE_COOKIE_WAIT) {
/* Left with Data unread */
struct mbuf *op_err;
op_err = sctp_generate_invmanparam(SCTP_CAUSE_USER_INITIATED_ABT);
sctp_send_abort_tcb(stcb, op_err);
SCTP_STAT_INCR_COUNTER32(sctps_aborted);
}
SCTP_INP_RUNLOCK(inp);
if ((SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) ||
(SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
SCTP_STAT_DECR_GAUGE32(sctps_currestab);
}
sctp_free_assoc(inp, stcb, SCTP_DONOT_SETSCOPE,
SCTP_FROM_SCTP6_USRREQ + SCTP_LOC_2);
/* No unlock tcb assoc is gone */
return (0);
}
if (!TAILQ_EMPTY(&asoc->out_wheel)) {
/* Check to see if some data queued */
struct sctp_stream_out *outs;
TAILQ_FOREACH(outs, &asoc->out_wheel,
next_spoke) {
if (!TAILQ_EMPTY(&outs->outqueue)) {
some_on_streamwheel = 1;
break;
}
}
}
if (TAILQ_EMPTY(&asoc->send_queue) &&
TAILQ_EMPTY(&asoc->sent_queue) &&
(some_on_streamwheel == 0)) {
/* nothing queued to send, so I'm done... */
if ((SCTP_GET_STATE(asoc) !=
SCTP_STATE_SHUTDOWN_SENT) &&
(SCTP_GET_STATE(asoc) !=
SCTP_STATE_SHUTDOWN_ACK_SENT)) {
/* only send SHUTDOWN the first time */
sctp_send_shutdown(stcb, stcb->asoc.primary_destination);
sctp_chunk_output(stcb->sctp_ep, stcb, 1);
- Copyright updates (aka 2007) - ZONE get now also take a type cast so it does the cast like mtod does. - New macro SCTP_LIST_EMPTY, which in bsd is just LIST_EMPTY - Removal of const in some of the static hmac functions (not needed) - Store length changes to allow for new fields in auth - Auth code updated to current draft (this should be the RFC version we think). - use uint8_t instead of u_char in LOOPBACK address comparison - Some u_int32_t converted to uint32_t (in crc code) - A bug was found in the mib counts for ordered/unordered count, this was fixed (was referencing a freed mbuf). - SCTP_ASOCLOG_OF_TSNS added (code will probably disappear after my testing completes. It allows us to keep a small log on each assoc of the last 40 TSN's in/out and stream assignment. It is NOT in options and so is only good for private builds. - Some CMT changes in prep for Jana fixing his problem with reneging when CMT is enabled (Concurrent Multipath Transfer = CMT). - Some missing mib stats added. - Correction to number of open assoc's count in mib - Correction to os_bsd.h to get right sha2 macros - Add of special AUTH_04 flags so you can compile the code with the old format (in case the peer does not yet support the latest auth code). - Nonce sum was incorrectly being set in when ecn_nonce was NOT on. - LOR in listen with implicit bind found and fixed. - Moved away from using mbuf's for socket options to using just data pointers. The mbufs were used to harmonize NetBSD code since both Net and Open used this method. We have decided to move away from that and more conform to FreeBSD style (which makes more sense). - Very very nasty bug found in some of my "debug" code. The cookie_how collision case tracking had an endless loop in it if you got a second retransmission of a cookie collision case. This would lock up a CPU .. ugly.. - auth function goes to using size_t instead of int which conforms to socketapi better - Found the nasty bug that happens after 9 days of testing.. you get the data chunk, deliver it and due to the reference to a ch-> that every now and then has been deleted (depending on the postion in the mbuf) you have an invalid ch->ch.flags.. and thus you don't advance the stream sequence number.. so you block the stream permanently. The fix is to make local variables of these guys and set them up before you have any chance of trimming the mbuf. - style fix in sctp_util.h, not sure how this got bad maybe in the last patch? (aka it may not be in the real source). - Found interesting bug when using the extended snd/rcv info where we would get an error on receiving with this. Thats because it was NOT padded to the same size as the snd_rcv info. We increase (add the pad) so the two structs are the same size in sctp_uio.h - In sctp_usrreq.c one of the most common things we did for socket options was to cast the pointer and validate the size. This as been macro-ized to help make the code more readable. - in sctputil.c two things, the socketapi class found a missing flag type (the next msg is a notification) and a missing scope recovery was also fixed. Reviewed by: gnn
2007-02-12 23:24:31 +00:00
if ((SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) ||
(SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
SCTP_STAT_DECR_GAUGE32(sctps_currestab);
}
asoc->state = SCTP_STATE_SHUTDOWN_SENT;
sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWN,
stcb->sctp_ep, stcb,
asoc->primary_destination);
sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD,
stcb->sctp_ep, stcb,
asoc->primary_destination);
}
} else {
/*
* we still got (or just got) data to send,
* so set SHUTDOWN_PENDING
*/
/*
* XXX sockets draft says that MSG_EOF
* should be sent with no data. currently,
* we will allow user data to be sent first
* and move to SHUTDOWN-PENDING
*/
asoc->state |= SCTP_STATE_SHUTDOWN_PENDING;
}
SCTP_TCB_UNLOCK(stcb);
SCTP_INP_RUNLOCK(inp);
return (0);
}
} else {
/* UDP model does not support this */
SCTP_INP_RUNLOCK(inp);
return EOPNOTSUPP;
}
}
int
sctp_sendm(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr,
struct mbuf *control, struct thread *p);
static int
sctp6_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr,
struct mbuf *control, struct thread *p)
{
struct sctp_inpcb *inp;
struct inpcb *in_inp;
struct in6pcb *inp6;
#ifdef INET
struct sockaddr_in6 *sin6;
#endif /* INET */
/* No SPL needed since sctp_output does this */
inp = (struct sctp_inpcb *)so->so_pcb;
if (inp == NULL) {
if (control) {
SCTP_RELEASE_PKT(control);
control = NULL;
}
SCTP_RELEASE_PKT(m);
return EINVAL;
}
in_inp = (struct inpcb *)inp;
inp6 = (struct in6pcb *)inp;
/*
* For the TCP model we may get a NULL addr, if we are a connected
* socket thats ok.
*/
if ((inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED) &&
(addr == NULL)) {
goto connected_type;
}
if (addr == NULL) {
SCTP_RELEASE_PKT(m);
if (control) {
SCTP_RELEASE_PKT(control);
control = NULL;
}
return (EDESTADDRREQ);
}
#ifdef INET
sin6 = (struct sockaddr_in6 *)addr;
- Macroizes the V6ONLY flag check. - Added a short time wait (not used yet) constant - Corrected the type of the crc32c table (it was unsigned long and really is a uint32_t - Got rid of the user of MHeaders until they are truely needed by lower layers. - Fixed an initialization problem in the readq structure (ordering was off). - Found yet another collision bug when the random number generator returns two numbers on one side (during a collision) that are the same. Also added some tracking of cookies that will go away when we know that we have the last collision bug gone. - Fixed an init bug for book_size_scale, that was causing Early FR code to run when it should not. - Fixed a flight size tracking bug that was associated with Early FR but due to above bug also effected all FR's - Fixed it so Max Burst also will apply to Fast Retransmit. - Fixed a bug in the temporary logging code that allowed a static log array overflow - hashinit_flags is now used. - Two last mcopym's were converted to the macro sctp_m_copym that has always been used by all other places - macro sctp_m_copym was converted to upper case. - We now validate sinfo_flags on input (we did not before). - Fixed a bug that prevented a user from sending data and immediately shuting down with one send operation. - Moved to use hashdestroy instead of free() in our macros. - Fixed an init problem in our timed_wait vtag where we did not fully initialize our time-wait blocks. - Timer stops were re-positioned. - A pcb cleanup method was added, however this probably will not be used in BSD.. unless we make module loadable protocols - I think this fixes the mysterious timer bug.. it was a ordering of locks problem in the way we did timers. It now conforms to the timeout(9) manual (except for the _drain part, we had to do this a different way due to locks). - Fixed error return code so we get either CONNREUSED or CONNRESET depending on where one is in progression - Purged an unused clone macro. - Fixed a read erro code issue where we were NOT getting the proper error when the connection was reset. - Purged an unused clone macro. - Fixed a read erro code issue where we were NOT getting the proper error when the connection was reset. Approved by: gnn
2007-01-15 15:12:10 +00:00
if (SCTP_IPV6_V6ONLY(inp6)) {
/*
* if IPV6_V6ONLY flag, we discard datagrams destined to a
* v4 addr or v4-mapped addr
*/
if (addr->sa_family == AF_INET) {
return EINVAL;
}
if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
return EINVAL;
}
}
if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
if (!ip6_v6only) {
struct sockaddr_in sin;
/* convert v4-mapped into v4 addr and send */
in6_sin6_2_sin(&sin, sin6);
return sctp_sendm(so, flags, m, (struct sockaddr *)&sin,
control, p);
} else {
/* mapped addresses aren't enabled */
return EINVAL;
}
}
#endif /* INET */
connected_type:
/* now what about control */
if (control) {
if (inp->control) {
SCTP_PRINTF("huh? control set?\n");
SCTP_RELEASE_PKT(inp->control);
inp->control = NULL;
}
inp->control = control;
}
/* Place the data */
if (inp->pkt) {
SCTP_BUF_NEXT(inp->pkt_last) = m;
inp->pkt_last = m;
} else {
inp->pkt_last = inp->pkt = m;
}
if (
/* FreeBSD and MacOSX uses a flag passed */
((flags & PRUS_MORETOCOME) == 0)
) {
/*
* note with the current version this code will only be used
* by OpenBSD, NetBSD and FreeBSD have methods for
* re-defining sosend() to use sctp_sosend(). One can
* optionaly switch back to this code (by changing back the
* defininitions but this is not advisable.
*/
int ret;
ret = sctp_output(inp, inp->pkt, addr, inp->control, p, flags);
inp->pkt = NULL;
inp->control = NULL;
return (ret);
} else {
return (0);
}
}
static int
sctp6_connect(struct socket *so, struct sockaddr *addr, struct thread *p)
{
uint32_t vrf_id;
int error = 0;
struct sctp_inpcb *inp;
struct in6pcb *inp6;
struct sctp_tcb *stcb;
#ifdef INET
struct sockaddr_in6 *sin6;
struct sockaddr_storage ss;
#endif /* INET */
inp6 = (struct in6pcb *)so->so_pcb;
inp = (struct sctp_inpcb *)so->so_pcb;
if (inp == 0) {
return (ECONNRESET); /* I made the same as TCP since we are
* not setup? */
}
- fixed autclose to not allow setting on 1-2-1 model. - bounded cookie-life to 1 second minimum in socket option set. - Delayed_ack_time becomes delayed_ack per new socket api document. - Improve port number selection, we now use low/high bounds and no chance of a endless loop. Only one call to random per bind as well. - fixes so set_peer_primary pre-screens addresses to be valid to this host. - maxseg did not allow setting on an assoc basis. We needed to thus track and use an association value instead of a inp value. - Fixed ep get of HB status to report back properly. - use settings flag to tell if assoc level hb is on off not the timer.. since the timer may still run if unconf address are present. - check for crazy ENABLE/DISABLE conditions. - set and get of pmtud (fixed path mtu) not always taking into account ovh. - Getting PMTU info on stcb only needs to return PMTUD_ENABLED if any net is doing PMTU discovery. - Panic or warning fixed to not do so when a valid ip frag is taking place. - sndrcvinfo appearing in both inp and stcb was full size, instead of the non-pad version. This saves about 92 bytes from each struct by carefully converting to use the smaller version. - one-2-one model get(maxseg) would always get ep value, never the tcb's value. - The delayed ack time could be under a tick, this fixes so it bounds it to at least 1 tick for platforms whos tick is more than a ms. - Fragment interleave level set to wrong default value. - Fragment interleave could not set level 0. - Defered stream reset was broken due to a guard check and ntohl issue. - Found two lock order reversals and fixed. - Tighten up address checking, if the user gives an address the sa_len had better be set properly. - Get asoc by assoc-id would return a locked tcb when it was asked not to if the tcb was in the restart hash. - sysctl to dig down and get more association details Reviewed by: gnn
2007-05-28 11:17:24 +00:00
if (addr == NULL) {
return (EINVAL);
}
if ((addr->sa_family == AF_INET6) && (addr->sa_len != sizeof(struct sockaddr_in6))) {
return (EINVAL);
}
if ((addr->sa_family == AF_INET) && (addr->sa_len != sizeof(struct sockaddr_in))) {
return (EINVAL);
}
vrf_id = inp->def_vrf_id;
SCTP_ASOC_CREATE_LOCK(inp);
SCTP_INP_RLOCK(inp);
if ((inp->sctp_flags & SCTP_PCB_FLAGS_UNBOUND) ==
SCTP_PCB_FLAGS_UNBOUND) {
/* Bind a ephemeral port */
SCTP_INP_RUNLOCK(inp);
error = sctp6_bind(so, NULL, p);
if (error) {
SCTP_ASOC_CREATE_UNLOCK(inp);
return (error);
}
SCTP_INP_RLOCK(inp);
}
if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) &&
(inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED)) {
/* We are already connected AND the TCP model */
SCTP_INP_RUNLOCK(inp);
SCTP_ASOC_CREATE_UNLOCK(inp);
return (EADDRINUSE);
}
#ifdef INET
sin6 = (struct sockaddr_in6 *)addr;
- Macroizes the V6ONLY flag check. - Added a short time wait (not used yet) constant - Corrected the type of the crc32c table (it was unsigned long and really is a uint32_t - Got rid of the user of MHeaders until they are truely needed by lower layers. - Fixed an initialization problem in the readq structure (ordering was off). - Found yet another collision bug when the random number generator returns two numbers on one side (during a collision) that are the same. Also added some tracking of cookies that will go away when we know that we have the last collision bug gone. - Fixed an init bug for book_size_scale, that was causing Early FR code to run when it should not. - Fixed a flight size tracking bug that was associated with Early FR but due to above bug also effected all FR's - Fixed it so Max Burst also will apply to Fast Retransmit. - Fixed a bug in the temporary logging code that allowed a static log array overflow - hashinit_flags is now used. - Two last mcopym's were converted to the macro sctp_m_copym that has always been used by all other places - macro sctp_m_copym was converted to upper case. - We now validate sinfo_flags on input (we did not before). - Fixed a bug that prevented a user from sending data and immediately shuting down with one send operation. - Moved to use hashdestroy instead of free() in our macros. - Fixed an init problem in our timed_wait vtag where we did not fully initialize our time-wait blocks. - Timer stops were re-positioned. - A pcb cleanup method was added, however this probably will not be used in BSD.. unless we make module loadable protocols - I think this fixes the mysterious timer bug.. it was a ordering of locks problem in the way we did timers. It now conforms to the timeout(9) manual (except for the _drain part, we had to do this a different way due to locks). - Fixed error return code so we get either CONNREUSED or CONNRESET depending on where one is in progression - Purged an unused clone macro. - Fixed a read erro code issue where we were NOT getting the proper error when the connection was reset. - Purged an unused clone macro. - Fixed a read erro code issue where we were NOT getting the proper error when the connection was reset. Approved by: gnn
2007-01-15 15:12:10 +00:00
if (SCTP_IPV6_V6ONLY(inp6)) {
/*
* if IPV6_V6ONLY flag, ignore connections destined to a v4
* addr or v4-mapped addr
*/
if (addr->sa_family == AF_INET) {
SCTP_INP_RUNLOCK(inp);
SCTP_ASOC_CREATE_UNLOCK(inp);
return EINVAL;
}
if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
SCTP_INP_RUNLOCK(inp);
SCTP_ASOC_CREATE_UNLOCK(inp);
return EINVAL;
}
}
if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
if (!ip6_v6only) {
/* convert v4-mapped into v4 addr */
in6_sin6_2_sin((struct sockaddr_in *)&ss, sin6);
addr = (struct sockaddr *)&ss;
} else {
/* mapped addresses aren't enabled */
SCTP_INP_RUNLOCK(inp);
SCTP_ASOC_CREATE_UNLOCK(inp);
return EINVAL;
}
} else
#endif /* INET */
addr = addr; /* for true v6 address case */
/* Now do we connect? */
if (inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED) {
stcb = LIST_FIRST(&inp->sctp_asoc_list);
if (stcb) {
SCTP_TCB_UNLOCK(stcb);
}
SCTP_INP_RUNLOCK(inp);
} else {
SCTP_INP_RUNLOCK(inp);
SCTP_INP_WLOCK(inp);
SCTP_INP_INCR_REF(inp);
SCTP_INP_WUNLOCK(inp);
stcb = sctp_findassociation_ep_addr(&inp, addr, NULL, NULL, NULL);
if (stcb == NULL) {
SCTP_INP_WLOCK(inp);
SCTP_INP_DECR_REF(inp);
SCTP_INP_WUNLOCK(inp);
}
}
if (stcb != NULL) {
/* Already have or am bring up an association */
SCTP_ASOC_CREATE_UNLOCK(inp);
SCTP_TCB_UNLOCK(stcb);
return (EALREADY);
}
/* We are GOOD to go */
stcb = sctp_aloc_assoc(inp, addr, 1, &error, 0, vrf_id);
SCTP_ASOC_CREATE_UNLOCK(inp);
if (stcb == NULL) {
/* Gak! no memory */
return (error);
}
if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) {
stcb->sctp_ep->sctp_flags |= SCTP_PCB_FLAGS_CONNECTED;
/* Set the connected flag so we can queue data */
soisconnecting(so);
}
stcb->asoc.state = SCTP_STATE_COOKIE_WAIT;
(void)SCTP_GETTIME_TIMEVAL(&stcb->asoc.time_entered);
/* initialize authentication parameters for the assoc */
sctp_initialize_auth_params(inp, stcb);
sctp_send_initiate(inp, stcb);
SCTP_TCB_UNLOCK(stcb);
return error;
}
static int
sctp6_getaddr(struct socket *so, struct sockaddr **addr)
{
struct sockaddr_in6 *sin6;
struct sctp_inpcb *inp;
uint32_t vrf_id;
struct sctp_ifa *sctp_ifa;
int error;
/*
* Do the malloc first in case it blocks.
*/
SCTP_MALLOC_SONAME(sin6, struct sockaddr_in6 *, sizeof *sin6);
sin6->sin6_family = AF_INET6;
sin6->sin6_len = sizeof(*sin6);
inp = (struct sctp_inpcb *)so->so_pcb;
if (inp == NULL) {
SCTP_FREE_SONAME(sin6);
return ECONNRESET;
}
SCTP_INP_RLOCK(inp);
sin6->sin6_port = inp->sctp_lport;
if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) {
/* For the bound all case you get back 0 */
if (inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED) {
struct sctp_tcb *stcb;
struct sockaddr_in6 *sin_a6;
struct sctp_nets *net;
int fnd;
stcb = LIST_FIRST(&inp->sctp_asoc_list);
if (stcb == NULL) {
goto notConn6;
}
fnd = 0;
sin_a6 = NULL;
TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
sin_a6 = (struct sockaddr_in6 *)&net->ro._l_addr;
if (sin_a6 == NULL)
/* this will make coverity happy */
continue;
if (sin_a6->sin6_family == AF_INET6) {
fnd = 1;
break;
}
}
if ((!fnd) || (sin_a6 == NULL)) {
/* punt */
goto notConn6;
}
vrf_id = inp->def_vrf_id;
sctp_ifa = sctp_source_address_selection(inp, stcb, (sctp_route_t *) & net->ro, net, 0, vrf_id);
if (sctp_ifa) {
sin6->sin6_addr = sctp_ifa->address.sin6.sin6_addr;
}
} else {
/* For the bound all case you get back 0 */
notConn6:
memset(&sin6->sin6_addr, 0, sizeof(sin6->sin6_addr));
}
} else {
/* Take the first IPv6 address in the list */
struct sctp_laddr *laddr;
int fnd = 0;
LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) {
if (laddr->ifa->address.sa.sa_family == AF_INET6) {
struct sockaddr_in6 *sin_a;
sin_a = (struct sockaddr_in6 *)&laddr->ifa->address.sin6;
sin6->sin6_addr = sin_a->sin6_addr;
fnd = 1;
break;
}
}
if (!fnd) {
SCTP_FREE_SONAME(sin6);
SCTP_INP_RUNLOCK(inp);
return ENOENT;
}
}
SCTP_INP_RUNLOCK(inp);
/* Scoping things for v6 */
if ((error = sa6_recoverscope(sin6)) != 0) {
SCTP_FREE_SONAME(sin6);
return (error);
}
(*addr) = (struct sockaddr *)sin6;
return (0);
}
static int
sctp6_peeraddr(struct socket *so, struct sockaddr **addr)
{
struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)*addr;
int fnd;
struct sockaddr_in6 *sin_a6;
struct sctp_inpcb *inp;
struct sctp_tcb *stcb;
struct sctp_nets *net;
int error;
/*
* Do the malloc first in case it blocks.
*/
inp = (struct sctp_inpcb *)so->so_pcb;
if ((inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED) == 0) {
/* UDP type and listeners will drop out here */
return (ENOTCONN);
}
SCTP_MALLOC_SONAME(sin6, struct sockaddr_in6 *, sizeof *sin6);
sin6->sin6_family = AF_INET6;
sin6->sin6_len = sizeof(*sin6);
/* We must recapture incase we blocked */
inp = (struct sctp_inpcb *)so->so_pcb;
if (inp == NULL) {
SCTP_FREE_SONAME(sin6);
return ECONNRESET;
}
SCTP_INP_RLOCK(inp);
stcb = LIST_FIRST(&inp->sctp_asoc_list);
if (stcb) {
SCTP_TCB_LOCK(stcb);
}
SCTP_INP_RUNLOCK(inp);
if (stcb == NULL) {
SCTP_FREE_SONAME(sin6);
return ECONNRESET;
}
fnd = 0;
TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
sin_a6 = (struct sockaddr_in6 *)&net->ro._l_addr;
if (sin_a6->sin6_family == AF_INET6) {
fnd = 1;
sin6->sin6_port = stcb->rport;
sin6->sin6_addr = sin_a6->sin6_addr;
break;
}
}
SCTP_TCB_UNLOCK(stcb);
if (!fnd) {
/* No IPv4 address */
SCTP_FREE_SONAME(sin6);
return ENOENT;
}
if ((error = sa6_recoverscope(sin6)) != 0)
return (error);
*addr = (struct sockaddr *)sin6;
return (0);
}
static int
sctp6_in6getaddr(struct socket *so, struct sockaddr **nam)
{
struct sockaddr *addr;
struct in6pcb *inp6 = sotoin6pcb(so);
int error;
if (inp6 == NULL)
return EINVAL;
/* allow v6 addresses precedence */
error = sctp6_getaddr(so, nam);
if (error) {
/* try v4 next if v6 failed */
error = sctp_ingetaddr(so, nam);
if (error) {
return (error);
}
addr = *nam;
/* if I'm V6ONLY, convert it to v4-mapped */
- Macroizes the V6ONLY flag check. - Added a short time wait (not used yet) constant - Corrected the type of the crc32c table (it was unsigned long and really is a uint32_t - Got rid of the user of MHeaders until they are truely needed by lower layers. - Fixed an initialization problem in the readq structure (ordering was off). - Found yet another collision bug when the random number generator returns two numbers on one side (during a collision) that are the same. Also added some tracking of cookies that will go away when we know that we have the last collision bug gone. - Fixed an init bug for book_size_scale, that was causing Early FR code to run when it should not. - Fixed a flight size tracking bug that was associated with Early FR but due to above bug also effected all FR's - Fixed it so Max Burst also will apply to Fast Retransmit. - Fixed a bug in the temporary logging code that allowed a static log array overflow - hashinit_flags is now used. - Two last mcopym's were converted to the macro sctp_m_copym that has always been used by all other places - macro sctp_m_copym was converted to upper case. - We now validate sinfo_flags on input (we did not before). - Fixed a bug that prevented a user from sending data and immediately shuting down with one send operation. - Moved to use hashdestroy instead of free() in our macros. - Fixed an init problem in our timed_wait vtag where we did not fully initialize our time-wait blocks. - Timer stops were re-positioned. - A pcb cleanup method was added, however this probably will not be used in BSD.. unless we make module loadable protocols - I think this fixes the mysterious timer bug.. it was a ordering of locks problem in the way we did timers. It now conforms to the timeout(9) manual (except for the _drain part, we had to do this a different way due to locks). - Fixed error return code so we get either CONNREUSED or CONNRESET depending on where one is in progression - Purged an unused clone macro. - Fixed a read erro code issue where we were NOT getting the proper error when the connection was reset. - Purged an unused clone macro. - Fixed a read erro code issue where we were NOT getting the proper error when the connection was reset. Approved by: gnn
2007-01-15 15:12:10 +00:00
if (SCTP_IPV6_V6ONLY(inp6)) {
struct sockaddr_in6 sin6;
in6_sin_2_v4mapsin6((struct sockaddr_in *)addr, &sin6);
memcpy(addr, &sin6, sizeof(struct sockaddr_in6));
}
}
return (error);
}
static int
sctp6_getpeeraddr(struct socket *so, struct sockaddr **nam)
{
struct sockaddr *addr = *nam;
struct in6pcb *inp6 = sotoin6pcb(so);
int error;
if (inp6 == NULL)
return EINVAL;
/* allow v6 addresses precedence */
error = sctp6_peeraddr(so, nam);
if (error) {
/* try v4 next if v6 failed */
error = sctp_peeraddr(so, nam);
if (error) {
return (error);
}
/* if I'm V6ONLY, convert it to v4-mapped */
- Macroizes the V6ONLY flag check. - Added a short time wait (not used yet) constant - Corrected the type of the crc32c table (it was unsigned long and really is a uint32_t - Got rid of the user of MHeaders until they are truely needed by lower layers. - Fixed an initialization problem in the readq structure (ordering was off). - Found yet another collision bug when the random number generator returns two numbers on one side (during a collision) that are the same. Also added some tracking of cookies that will go away when we know that we have the last collision bug gone. - Fixed an init bug for book_size_scale, that was causing Early FR code to run when it should not. - Fixed a flight size tracking bug that was associated with Early FR but due to above bug also effected all FR's - Fixed it so Max Burst also will apply to Fast Retransmit. - Fixed a bug in the temporary logging code that allowed a static log array overflow - hashinit_flags is now used. - Two last mcopym's were converted to the macro sctp_m_copym that has always been used by all other places - macro sctp_m_copym was converted to upper case. - We now validate sinfo_flags on input (we did not before). - Fixed a bug that prevented a user from sending data and immediately shuting down with one send operation. - Moved to use hashdestroy instead of free() in our macros. - Fixed an init problem in our timed_wait vtag where we did not fully initialize our time-wait blocks. - Timer stops were re-positioned. - A pcb cleanup method was added, however this probably will not be used in BSD.. unless we make module loadable protocols - I think this fixes the mysterious timer bug.. it was a ordering of locks problem in the way we did timers. It now conforms to the timeout(9) manual (except for the _drain part, we had to do this a different way due to locks). - Fixed error return code so we get either CONNREUSED or CONNRESET depending on where one is in progression - Purged an unused clone macro. - Fixed a read erro code issue where we were NOT getting the proper error when the connection was reset. - Purged an unused clone macro. - Fixed a read erro code issue where we were NOT getting the proper error when the connection was reset. Approved by: gnn
2007-01-15 15:12:10 +00:00
if (SCTP_IPV6_V6ONLY(inp6)) {
struct sockaddr_in6 sin6;
in6_sin_2_v4mapsin6((struct sockaddr_in *)addr, &sin6);
memcpy(addr, &sin6, sizeof(struct sockaddr_in6));
}
}
return error;
}
struct pr_usrreqs sctp6_usrreqs = {
.pru_abort = sctp6_abort,
.pru_accept = sctp_accept,
.pru_attach = sctp6_attach,
.pru_bind = sctp6_bind,
.pru_connect = sctp6_connect,
.pru_control = in6_control,
.pru_close = sctp6_close,
.pru_detach = sctp6_close,
.pru_sopoll = sopoll_generic,
.pru_disconnect = sctp6_disconnect,
.pru_listen = sctp_listen,
.pru_peeraddr = sctp6_getpeeraddr,
.pru_send = sctp6_send,
.pru_shutdown = sctp_shutdown,
.pru_sockaddr = sctp6_in6getaddr,
.pru_sosend = sctp_sosend,
.pru_soreceive = sctp_soreceive
};