From e31c8aa8e526df26ff15a5aab776940a6ebaf2aa Mon Sep 17 00:00:00 2001 From: rwatson Date: Tue, 8 Jul 2008 17:22:59 +0000 Subject: [PATCH] Provide some initial chicken-scratching annotations of locking for struct inpcb. Prodded by: bz MFC after: 3 days --- sys/netinet/in_pcb.h | 86 ++++++++++++++++++++++++++++++-------------- 1 file changed, 59 insertions(+), 27 deletions(-) diff --git a/sys/netinet/in_pcb.h b/sys/netinet/in_pcb.h index 6e5c92eb52c6..1e6a430b8f72 100644 --- a/sys/netinet/in_pcb.h +++ b/sys/netinet/in_pcb.h @@ -115,17 +115,49 @@ struct in_conninfo { struct icmp6_filter; +/*- + * struct inpcb captures the network layer state for TCP, UDP, and raw IPv4 + * and IPv6 sockets. In the case of TCP, further per-connection state is + * hung off of inp_ppcb most of the time. Almost all fields of struct inpcb + * are static after creation or protected by a per-inpcb rwlock, inp_lock. A + * few fields also require the global pcbinfo lock for the inpcb to be held, + * when modified, such as the global connection lists and hashes, as well as + * binding information (which affects which hash a connection is on). This + * model means that connections can be looked up without holding the + * per-connection lock, which is important for performance when attempting to + * find the connection for a packet given its IP and port tuple. Writing to + * these fields that write locks be held on both the inpcb and global locks. + * + * Key: + * (c) - Constant after initialization + * (i) - Protected by the inpcb lock + * (p) - Protected by the pcbinfo lock for the inpcb + * (s) - Protected by another subsystem's locks + * (x) - Undefined locking + * + * A few other notes: + * + * When a read lock is held, stability of the field is guaranteed; to write + * to a field, a write lock must generally be held. + * + * netinet/netinet6-layer code should not assume that the inp_socket pointer + * is safe to dereference without inp_lock being held, even for protocols + * other than TCP (where the inpcb persists during TIMEWAIT even after the + * socket has been freed), or there may be close(2)-related races. + * + * The inp_vflag field is overloaded, and would otherwise ideally be (c). + */ struct inpcb { - LIST_ENTRY(inpcb) inp_hash; /* hash list */ - LIST_ENTRY(inpcb) inp_list; /* list for all PCBs of this proto */ - void *inp_ppcb; /* pointer to per-protocol pcb */ - struct inpcbinfo *inp_pcbinfo; /* PCB list info */ - struct socket *inp_socket; /* back pointer to socket */ + LIST_ENTRY(inpcb) inp_hash; /* (i/p) hash list */ + LIST_ENTRY(inpcb) inp_list; /* (i/p) list for all PCBs for proto */ + void *inp_ppcb; /* (i) pointer to per-protocol pcb */ + struct inpcbinfo *inp_pcbinfo; /* (c) PCB list info */ + struct socket *inp_socket; /* (i) back pointer to socket */ - u_int32_t inp_flow; - int inp_flags; /* generic IP/datagram flags */ + u_int32_t inp_flow; /* (i) IPv6 flow information */ + int inp_flags; /* (i) generic IP/datagram flags */ - u_char inp_vflag; /* IP version flag (v4/v6) */ + u_char inp_vflag; /* (i) IP version flag (v4/v6) */ #define INP_IPV4 0x1 #define INP_IPV6 0x2 #define INP_IPV6PROTO 0x4 /* opened under IPv6 protocol */ @@ -133,24 +165,24 @@ struct inpcb { #define INP_ONESBCAST 0x10 /* send all-ones broadcast */ #define INP_DROPPED 0x20 /* protocol drop flag */ #define INP_SOCKREF 0x40 /* strong socket reference */ - u_char inp_ip_ttl; /* time to live proto */ - u_char inp_ip_p; /* protocol proto */ - u_char inp_ip_minttl; /* minimum TTL or drop */ - uint32_t inp_ispare1; /* connection id / queue id */ - void *inp_pspare[2]; /* rtentry / general use */ + u_char inp_ip_ttl; /* (i) time to live proto */ + u_char inp_ip_p; /* (c) protocol proto */ + u_char inp_ip_minttl; /* (i) minimum TTL or drop */ + uint32_t inp_ispare1; /* (x) connection id / queue id */ + void *inp_pspare[2]; /* (x) rtentry / general use */ /* Local and foreign ports, local and foreign addr. */ struct in_conninfo inp_inc; - /* list for this PCB's local port */ - struct label *inp_label; /* MAC label */ - struct inpcbpolicy *inp_sp; /* for IPSEC */ + /* (i/p) list for PCB's local port */ + struct label *inp_label; /* (i) MAC label */ + struct inpcbpolicy *inp_sp; /* (s) for IPSEC */ /* Protocol-dependent part; options. */ struct { - u_char inp4_ip_tos; /* type of service proto */ - struct mbuf *inp4_options; /* IP options */ - struct ip_moptions *inp4_moptions; /* IP multicast options */ + u_char inp4_ip_tos; /* (i) type of service proto */ + struct mbuf *inp4_options; /* (i) IP options */ + struct ip_moptions *inp4_moptions; /* (i) IP multicast options */ } inp_depend4; #define inp_fport inp_inc.inc_fport #define inp_lport inp_inc.inc_lport @@ -160,22 +192,22 @@ struct inpcb { #define inp_options inp_depend4.inp4_options #define inp_moptions inp_depend4.inp4_moptions struct { - /* IP options */ + /* (i) IP options */ struct mbuf *inp6_options; - /* IP6 options for outgoing packets */ + /* (i) IP6 options for outgoing packets */ struct ip6_pktopts *inp6_outputopts; - /* IP multicast options */ + /* (i) IP multicast options */ struct ip6_moptions *inp6_moptions; - /* ICMPv6 code type filter */ + /* (i) ICMPv6 code type filter */ struct icmp6_filter *inp6_icmp6filt; - /* IPV6_CHECKSUM setsockopt */ + /* (i) IPV6_CHECKSUM setsockopt */ int inp6_cksum; short inp6_hops; } inp_depend6; - LIST_ENTRY(inpcb) inp_portlist; - struct inpcbport *inp_phd; /* head of this list */ + LIST_ENTRY(inpcb) inp_portlist; /* (i/p) */ + struct inpcbport *inp_phd; /* (i/p) head of this list */ #define inp_zero_size offsetof(struct inpcb, inp_gencnt) - inp_gen_t inp_gencnt; /* generation count of this instance */ + inp_gen_t inp_gencnt; /* (c) generation count of this instance */ struct rwlock inp_lock; #define in6p_faddr inp_inc.inc6_faddr