patch-2.3.15 linux/net/ipv4/ip_output.c

Next file: linux/net/ipv4/ip_sockglue.c
Previous file: linux/net/ipv4/ip_nat_dumb.c
Back to the patch index
Back to the overall index

diff -u --recursive --new-file v2.3.14/linux/net/ipv4/ip_output.c linux/net/ipv4/ip_output.c
@@ -5,7 +5,7 @@
  *
  *		The Internet Protocol (IP) output module.
  *
- * Version:	$Id: ip_output.c,v 1.67 1999/03/25 00:43:00 davem Exp $
+ * Version:	$Id: ip_output.c,v 1.69 1999/08/20 11:05:48 davem Exp $
  *
  * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -72,8 +72,7 @@
 #include <net/raw.h>
 #include <net/checksum.h>
 #include <linux/igmp.h>
-#include <linux/ip_fw.h>
-#include <linux/firewall.h>
+#include <linux/netfilter_ipv4.h>
 #include <linux/mroute.h>
 #include <linux/netlink.h>
 
@@ -83,7 +82,6 @@
 
 int sysctl_ip_dynaddr = 0;
 
-
 int ip_id_count = 0;
 
 /* Generate a checksum for an outgoing IP datagram. */
@@ -93,6 +91,61 @@
 	iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
 }
 
+/* dev_loopback_xmit for use with netfilter. */
+static int ip_dev_loopback_xmit(struct sk_buff *newskb)
+{
+	newskb->mac.raw = newskb->data;
+	skb_pull(newskb, newskb->nh.raw - newskb->data);
+	newskb->pkt_type = PACKET_LOOPBACK;
+	newskb->ip_summed = CHECKSUM_UNNECESSARY;
+	BUG_TRAP(newskb->dst);
+
+#ifdef CONFIG_NETFILTER_DEBUG
+	nf_debug_ip_loopback_xmit(newskb);
+#endif
+	netif_rx(newskb);
+	return 0;
+}
+
+#ifdef CONFIG_NETFILTER
+/* To preserve the cute illusion that a locally-generated packet can
+   be mangled before routing, we actually reroute if a hook altered
+   the packet. -RR */
+static int route_me_harder(struct sk_buff *skb)
+{
+	struct iphdr *iph = skb->nh.iph;
+	struct rtable *rt;
+
+	if (ip_route_output(&rt, iph->daddr, iph->saddr,
+			    RT_TOS(iph->tos) | RTO_CONN,
+			    skb->sk ? skb->sk->bound_dev_if : 0)) {
+		printk("route_me_harder: No more route.\n");
+		return -EINVAL;
+	}
+
+	/* Drop old route. */
+	dst_release(skb->dst);
+
+	skb->dst = &rt->u.dst;
+	return 0;
+}
+#endif
+
+/* Do route recalc if netfilter changes skb. */
+static inline int
+output_maybe_reroute(struct sk_buff *skb)
+{
+#ifdef CONFIG_NETFILTER
+	if (skb->nfcache & NFC_ALTERED) {
+		if (route_me_harder(skb) != 0) {
+			kfree_skb(skb);
+			return -EINVAL;
+		}
+	}
+#endif
+	return skb->dst->output(skb);
+}
+
 /* 
  *		Add an ip header to a skbuff and send it out.
  */
@@ -101,7 +154,6 @@
 {
 	struct rtable *rt = (struct rtable *)skb->dst;
 	struct iphdr *iph;
-	struct net_device *dev;
 	
 	/* Build the IP header. */
 	if (opt)
@@ -111,11 +163,11 @@
 
 	iph->version  = 4;
 	iph->ihl      = 5;
-	iph->tos      = sk->ip_tos;
+	iph->tos      = sk->protinfo.af_inet.tos;
 	iph->frag_off = 0;
 	if (ip_dont_fragment(sk, &rt->u.dst))
 		iph->frag_off |= htons(IP_DF);
-	iph->ttl      = sk->ip_ttl;
+	iph->ttl      = sk->protinfo.af_inet.ttl;
 	iph->daddr    = rt->rt_dst;
 	iph->saddr    = rt->rt_src;
 	iph->protocol = sk->protocol;
@@ -127,32 +179,45 @@
 		iph->ihl += opt->optlen>>2;
 		ip_options_build(skb, opt, daddr, rt, 0);
 	}
+	ip_send_check(iph);
 
-	dev = rt->u.dst.dev;
+	/* Send it out. */
+	NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, NULL,
+		output_maybe_reroute);
+}
 
-#ifdef CONFIG_FIREWALL
-	/* Now we have no better mechanism to notify about error. */
-	switch (call_out_firewall(PF_INET, dev, iph, NULL, &skb)) {
-	case FW_REJECT:
-		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
-		/* Fall thru... */
-	case FW_BLOCK:
-	case FW_QUEUE:
-		kfree_skb(skb);
-		return;
-	}
-#endif
+static inline int ip_finish_output2(struct sk_buff *skb)
+{
+	struct dst_entry *dst = skb->dst;
+	struct hh_cache *hh = dst->hh;
 
-	ip_send_check(iph);
+#ifdef CONFIG_NETFILTER_DEBUG
+	nf_debug_ip_finish_output2(skb);
+#endif /*CONFIG_NETFILTER_DEBUG*/
 
-	/* Send it out. */
-	skb->dst->output(skb);
-	return;
+	if (hh) {
+		read_lock_bh(&hh->hh_lock);
+  		memcpy(skb->data - 16, hh->hh_data, 16);
+		read_unlock_bh(&hh->hh_lock);
+	        skb_push(skb, hh->hh_len);
+		return hh->hh_output(skb);
+	} else if (dst->neighbour)
+		return dst->neighbour->output(skb);
+
+	printk(KERN_DEBUG "khm\n");
+	kfree_skb(skb);
+	return -EINVAL;
 }
 
-int __ip_finish_output(struct sk_buff *skb)
+__inline__ int ip_finish_output(struct sk_buff *skb)
 {
-	return ip_finish_output(skb);
+	struct net_device *dev = skb->dst->dev;
+
+	skb->dev = dev;
+	skb->protocol = __constant_htons(ETH_P_IP);
+
+	return NF_HOOK(PF_INET, NF_IP_POST_ROUTING, skb, NULL, dev,
+		       ip_finish_output2);
 }
 
 int ip_mc_output(struct sk_buff *skb)
@@ -164,7 +229,6 @@
 	/*
 	 *	If the indicated interface is up and running, send the packet.
 	 */
-	 
 	ip_statistics.IpOutRequests++;
 #ifdef CONFIG_IP_ROUTE_NAT
 	if (rt->rt_flags & RTCF_NAT)
@@ -178,7 +242,7 @@
 	 *	Multicasts are looped back for other local users
 	 */
 
-	if (rt->rt_flags&RTCF_MULTICAST && (!sk || sk->ip_mc_loop)) {
+	if (rt->rt_flags&RTCF_MULTICAST && (!sk || sk->protinfo.af_inet.mc_loop)) {
 #ifdef CONFIG_IP_MROUTE
 		/* Small optimization: do not loopback not local frames,
 		   which returned after forwarding; they will be  dropped
@@ -190,7 +254,13 @@
 		 */
 		if ((rt->rt_flags&RTCF_LOCAL) || !(IPCB(skb)->flags&IPSKB_FORWARDED))
 #endif
-		dev_loopback_xmit(skb);
+		{
+			struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
+			if (newskb)
+				NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, newskb, NULL,
+					newskb->dev, 
+					ip_dev_loopback_xmit);
+		}
 
 		/* Multicasts with ttl 0 must not go beyond the host */
 
@@ -200,8 +270,12 @@
 		}
 	}
 
-	if (rt->rt_flags&RTCF_BROADCAST)
-		dev_loopback_xmit(skb);
+	if (rt->rt_flags&RTCF_BROADCAST) {
+		struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
+		if (newskb)
+			NF_HOOK(PF_INET, NF_IP_POST_ROUTING, newskb, NULL,
+				newskb->dev, ip_dev_loopback_xmit);
+	}
 
 	return ip_finish_output(skb);
 }
@@ -231,82 +305,33 @@
  * most likely make other reliable transport layers above IP easier
  * to implement under Linux.
  */
-void ip_queue_xmit(struct sk_buff *skb)
+static inline int ip_queue_xmit2(struct sk_buff *skb)
 {
 	struct sock *sk = skb->sk;
-	struct ip_options *opt = sk->opt;
-	struct rtable *rt;
+	struct rtable *rt = (struct rtable *)skb->dst;
 	struct net_device *dev;
-	struct iphdr *iph;
-	unsigned int tot_len;
-
-	/* Make sure we can route this packet. */
-	rt = (struct rtable *) sk->dst_cache;
-	if(rt == NULL || rt->u.dst.obsolete) {
-		u32 daddr;
-
-		sk->dst_cache = NULL;
-		ip_rt_put(rt);
-
-		/* Use correct destination address if we have options. */
-		daddr = sk->daddr;
-		if(opt && opt->srr)
-			daddr = opt->faddr;
-
-		/* If this fails, retransmit mechanism of transport layer will
-		 * keep trying until route appears or the connection times itself
-		 * out.
-		 */
-		if(ip_route_output(&rt, daddr, sk->saddr,
-				   RT_TOS(sk->ip_tos) | RTO_CONN | sk->localroute,
-				   sk->bound_dev_if))
-			goto drop;
-		sk->dst_cache = &rt->u.dst;
-	}
-	if(opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway)
-		goto no_route;
-
-	/* We have a route, so grab a reference. */
-	skb->dst = dst_clone(sk->dst_cache);
+	struct iphdr *iph = skb->nh.iph;
 
-	/* OK, we know where to send it, allocate and build IP header. */
-	iph = (struct iphdr *) skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0));
-	iph->version  = 4;
-	iph->ihl      = 5;
-	iph->tos      = sk->ip_tos;
-	iph->frag_off = 0;
-	iph->ttl      = sk->ip_ttl;
-	iph->daddr    = rt->rt_dst;
-	iph->saddr    = rt->rt_src;
-	iph->protocol = sk->protocol;
-	skb->nh.iph   = iph;
-	/* Transport layer set skb->h.foo itself. */
-
-	if(opt && opt->optlen) {
-		iph->ihl += opt->optlen >> 2;
-		ip_options_build(skb, opt, sk->daddr, rt, 0);
+#ifdef CONFIG_NETFILTER
+	/* BLUE-PEN-FOR-ALEXEY.  I don't understand; you mean I can't
+           hold the route as I pass the packet to userspace? -- RR
+
+	   You may hold it, if you really hold it. F.e. if netfilter
+	   does not destroy handed skb with skb->dst attached, it
+	   will be held. When it was stored in info->arg, then
+	   it was not held apparently. Now (without second arg) it is evident,
+	   that it is clean.				   --ANK
+	 */
+	if (rt==NULL || (skb->nfcache & NFC_ALTERED)) {
+		if (route_me_harder(skb) != 0) {
+			kfree_skb(skb);
+			return -EHOSTUNREACH;
+		}
 	}
-
-	tot_len = skb->len;
-	iph->tot_len = htons(tot_len);
-	iph->id = htons(ip_id_count++);
+#endif
 
 	dev = rt->u.dst.dev;
 
-#ifdef CONFIG_FIREWALL
-	/* Now we have no better mechanism to notify about error. */
-	switch (call_out_firewall(PF_INET, dev, iph, NULL, &skb)) {
-	case FW_REJECT:
-		start_bh_atomic();
-		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
-		end_bh_atomic();
-		/* Fall thru... */
-	case FW_BLOCK:
-	case FW_QUEUE:
- 		goto drop;
-	}
-#endif
-
 	/* This can happen when the transport layer has segments queued
 	 * with a cached route, and by the time we get here things are
 	 * re-routed to a device with a different MTU than the original
@@ -318,17 +343,14 @@
 		skb2 = skb_realloc_headroom(skb, (dev->hard_header_len + 15) & ~15);
 		kfree_skb(skb);
 		if (skb2 == NULL)
-			return;
+			return -ENOMEM;
 		if (sk)
 			skb_set_owner_w(skb, sk);
 		skb = skb2;
 		iph = skb->nh.iph;
 	}
 
-	/* Do we need to fragment.  Again this is inefficient.  We
-	 * need to somehow lock the original buffer and use bits of it.
-	 */
-	if (tot_len > rt->u.dst.pmtu)
+	if (skb->len > rt->u.dst.pmtu)
 		goto fragment;
 
 	if (ip_dont_fragment(sk, &rt->u.dst))
@@ -338,37 +360,85 @@
 	ip_send_check(iph);
 
 	skb->priority = sk->priority;
-	skb->dst->output(skb);
-	return;
+	return skb->dst->output(skb);
 
 fragment:
-	if (ip_dont_fragment(sk, &rt->u.dst) &&
-	    tot_len > (iph->ihl<<2) + sizeof(struct tcphdr)+16) {
+	if (ip_dont_fragment(sk, &rt->u.dst)) {
 		/* Reject packet ONLY if TCP might fragment
-		   it itself, if were careful enough.
-		   Test is not precise (f.e. it does not take sacks
-		   into account). Actually, tcp should make it. --ANK (980801)
+		 * it itself, if were careful enough.
 		 */
 		iph->frag_off |= __constant_htons(IP_DF);
 		NETDEBUG(printk(KERN_DEBUG "sending pkt_too_big to self\n"));
 
-		/* icmp_send is not reenterable, so that bh_atomic... --ANK */
-		start_bh_atomic();
 		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
 			  htonl(rt->u.dst.pmtu));
-		end_bh_atomic();
-		goto drop;
+		kfree_skb(skb);
+		return -EMSGSIZE;
+	}
+	return ip_fragment(skb, skb->dst->output);
+}
+
+int ip_queue_xmit(struct sk_buff *skb)
+{
+	struct sock *sk = skb->sk;
+	struct ip_options *opt = sk->protinfo.af_inet.opt;
+	struct rtable *rt;
+	struct iphdr *iph;
+
+	/* Make sure we can route this packet. */
+	rt = (struct rtable *)sk_dst_check(sk, 0);
+	if (rt == NULL) {
+		u32 daddr;
+
+		/* Use correct destination address if we have options. */
+		daddr = sk->daddr;
+		if(opt && opt->srr)
+			daddr = opt->faddr;
+
+		/* If this fails, retransmit mechanism of transport layer will
+		 * keep trying until route appears or the connection times itself
+		 * out.
+		 */
+		if (ip_route_output(&rt, daddr, sk->saddr,
+				    RT_TOS(sk->protinfo.af_inet.tos) | RTO_CONN | sk->localroute,
+				    sk->bound_dev_if))
+			goto no_route;
+		dst_clone(&rt->u.dst);
+		sk_dst_set(sk, &rt->u.dst);
+	}
+	skb->dst = &rt->u.dst;
+
+	if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway)
+		goto no_route;
+
+	/* OK, we know where to send it, allocate and build IP header. */
+	iph = (struct iphdr *) skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0));
+	iph->version  = 4;
+	iph->ihl      = 5;
+	iph->tos      = sk->protinfo.af_inet.tos;
+	iph->frag_off = 0;
+	iph->ttl      = sk->protinfo.af_inet.ttl;
+	iph->daddr    = rt->rt_dst;
+	iph->saddr    = rt->rt_src;
+	iph->protocol = sk->protocol;
+	skb->nh.iph   = iph;
+	/* Transport layer set skb->h.foo itself. */
+
+	if(opt && opt->optlen) {
+		iph->ihl += opt->optlen >> 2;
+		ip_options_build(skb, opt, sk->daddr, rt, 0);
 	}
-	ip_fragment(skb, skb->dst->output);
-	return;
+
+	iph->tot_len = htons(skb->len);
+	iph->id = htons(ip_id_count++);
+
+	return NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
+		       ip_queue_xmit2);
 
 no_route:
-	sk->dst_cache = NULL;
-	ip_rt_put(rt);
 	ip_statistics.IpOutNoRoutes++;
-	/* Fall through... */
-drop:
 	kfree_skb(skb);
+	return -EHOSTUNREACH;
 }
 
 /*
@@ -391,7 +461,7 @@
  *	length to be copied.
  */
 
-int ip_build_xmit_slow(struct sock *sk,
+static int ip_build_xmit_slow(struct sock *sk,
 		  int getfrag (const void *,
 			       char *,
 			       unsigned int,	
@@ -454,8 +524,7 @@
 		fraglen = maxfraglen;
 		offset -= maxfraglen-fragheaderlen;
 	}
-	
-	
+
 	/*
 	 *	The last fragment will not have MF (more fragments) set.
 	 */
@@ -468,16 +537,12 @@
 	 
 	if (offset > 0 && df) { 
 		ip_local_error(sk, EMSGSIZE, rt->rt_dst, sk->dport, mtu);
- 		return(-EMSGSIZE);
+ 		return -EMSGSIZE;
 	}
+	if (flags&MSG_PROBE)
+		goto out;
 
 	/*
-	 *	Lock the device lists.
-	 */
-
-	dev_lock_list();
-	
-	/*
 	 *	Get an identifier
 	 */
 	 
@@ -528,15 +593,15 @@
 				ip_options_build(skb, opt,
 						 ipc->addr, rt, offset);
 			}
-			iph->tos = sk->ip_tos;
+			iph->tos = sk->protinfo.af_inet.tos;
 			iph->tot_len = htons(fraglen - fragheaderlen + iph->ihl*4);
 			iph->id = id;
 			iph->frag_off = htons(offset>>3);
 			iph->frag_off |= mf|df;
 			if (rt->rt_type == RTN_MULTICAST)
-				iph->ttl = sk->ip_mc_ttl;
+				iph->ttl = sk->protinfo.af_inet.mc_ttl;
 			else
-				iph->ttl = sk->ip_ttl;
+				iph->ttl = sk->protinfo.af_inet.ttl;
 			iph->protocol = sk->protocol;
 			iph->check = 0;
 			iph->saddr = rt->rt_src;
@@ -566,38 +631,28 @@
 
 		nfrags++;
 
-#ifdef CONFIG_FIREWALL
-		switch (call_out_firewall(PF_INET, rt->u.dst.dev, skb->nh.iph, NULL, &skb)) {
-		case FW_QUEUE:
-			kfree_skb(skb);
-			continue;
-		case FW_BLOCK:
-		case FW_REJECT:
-			kfree_skb(skb);
-			err = -EPERM;
-			goto error;
+		err = NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, 
+			      skb->dst->dev, output_maybe_reroute);
+		if (err) {
+			if (err > 0)
+				err = sk->protinfo.af_inet.recverr ? net_xmit_errno(err) : 0;
+			if (err)
+				goto error;
 		}
-#endif
-
-		err = -ENETDOWN;
-		if (rt->u.dst.output(skb))
-			goto error;
 	} while (offset >= 0);
 
 	if (nfrags>1)
 		ip_statistics.IpFragCreates += nfrags;
-	dev_unlock_list();
+out:
 	return 0;
 
 error:
 	ip_statistics.IpOutDiscards++;
 	if (nfrags>1)
 		ip_statistics.IpFragCreates += nfrags;
-	dev_unlock_list();
 	return err; 
 }
 
-
 /*
  *	Fast path for unfragmented packets.
  */
@@ -622,7 +677,7 @@
 	 *	choice RAW frames within 20 bytes of maximum size(rare) to the long path
 	 */
 
-	if (!sk->ip_hdrincl) {
+	if (!sk->protinfo.af_inet.hdrincl) {
 		length += sizeof(struct iphdr);
 
 		/*
@@ -636,6 +691,8 @@
 			return -EMSGSIZE;
 		}
 	}
+	if (flags&MSG_PROBE)
+		goto out;
 
 	/*
 	 *	Do path mtu discovery if needed.
@@ -662,18 +719,16 @@
 
 	skb->nh.iph = iph = (struct iphdr *)skb_put(skb, length);
 	
-	dev_lock_list();
-	
-	if(!sk->ip_hdrincl) {
+	if(!sk->protinfo.af_inet.hdrincl) {
 		iph->version=4;
 		iph->ihl=5;
-		iph->tos=sk->ip_tos;
+		iph->tos=sk->protinfo.af_inet.tos;
 		iph->tot_len = htons(length);
 		iph->id=htons(ip_id_count++);
 		iph->frag_off = df;
-		iph->ttl=sk->ip_mc_ttl;
+		iph->ttl=sk->protinfo.af_inet.mc_ttl;
 		if (rt->rt_type != RTN_MULTICAST)
-			iph->ttl=sk->ip_ttl;
+			iph->ttl=sk->protinfo.af_inet.ttl;
 		iph->protocol=sk->protocol;
 		iph->saddr=rt->rt_src;
 		iph->daddr=rt->rt_dst;
@@ -684,25 +739,17 @@
 	else
 		err = getfrag(frag, (void *)iph, 0, length);
 
-	dev_unlock_list();
-
 	if (err)
 		goto error_fault;
 
-#ifdef CONFIG_FIREWALL
-	switch (call_out_firewall(PF_INET, rt->u.dst.dev, iph, NULL, &skb)) {
-	case FW_QUEUE:
-		kfree_skb(skb);
-		return 0;
-	case FW_BLOCK:
-	case FW_REJECT:
-		kfree_skb(skb);
-		err = -EPERM;
+	err = NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
+		      output_maybe_reroute);
+	if (err > 0)
+		err = sk->protinfo.af_inet.recverr ? net_xmit_errno(err) : 0;
+	if (err)
 		goto error;
-	}
-#endif
-
-	return rt->u.dst.output(skb);
+out:
+	return 0;
 
 error_fault:
 	err = -EFAULT;
@@ -723,7 +770,7 @@
  *	Yes this is inefficient, feel free to submit a quicker one.
  */
 
-void ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
+int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
 {
 	struct iphdr *iph;
 	unsigned char *raw;
@@ -734,6 +781,7 @@
 	int offset;
 	int not_last_frag;
 	struct rtable *rt = (struct rtable*)skb->dst;
+	int err = 0;
 
 	dev = rt->u.dst.dev;
 
@@ -754,19 +802,6 @@
 	ptr = raw + hlen;			/* Where to start from */
 
 	/*
-	 *	The protocol doesn't seem to say what to do in the case that the
-	 *	frame + options doesn't fit the mtu. As it used to fall down dead
-	 *	in this case we were fortunate it didn't happen
-	 *
-	 *	It is impossible, because mtu>=68. --ANK (980801)
-	 */
-
-#ifdef CONFIG_NET_PARANOIA
-	if (mtu<8) 
-		goto fail;
-#endif
-
-	/*
 	 *	Fragment the datagram.
 	 */
 
@@ -793,6 +828,7 @@
 
 		if ((skb2 = alloc_skb(len+hlen+dev->hard_header_len+15,GFP_ATOMIC)) == NULL) {
 			NETDEBUG(printk(KERN_INFO "IP: frag: no memory for new fragment!\n"));
+			err = -ENOMEM;
 			goto fail;
 		}
 
@@ -862,15 +898,18 @@
 
 		ip_send_check(iph);
 
-		output(skb2);
+		err = output(skb2);
+		if (err)
+			goto fail;
 	}
 	kfree_skb(skb);
 	ip_statistics.IpFragOKs++;
-	return;
+	return err;
 	
 fail:
 	kfree_skb(skb); 
-	ip_statistics.IpFragFails++; 
+	ip_statistics.IpFragFails++;
+	return err;
 }
 
 /*
@@ -926,24 +965,31 @@
 	struct ipcm_cookie ipc;
 	u32 daddr;
 	struct rtable *rt = (struct rtable*)skb->dst;
-	
+
 	if (ip_options_echo(&replyopts.opt, skb))
 		return;
-	
-	sk->ip_tos = skb->nh.iph->tos;
-	sk->priority = skb->priority;
-	sk->protocol = skb->nh.iph->protocol;
 
 	daddr = ipc.addr = rt->rt_src;
 	ipc.opt = &replyopts.opt;
-	
+
 	if (ipc.opt->srr)
 		daddr = replyopts.opt.faddr;
 	if (ip_route_output(&rt, daddr, rt->rt_spec_dst, RT_TOS(skb->nh.iph->tos), 0))
 		return;
 
-	/* And let IP do all the hard work. */
+	/* And let IP do all the hard work.
+
+	   This chunk is not reenterable, hence spinlock.
+	   Note that it uses the fact, that this function is called
+	   with locally disabled BH and that sk cannot be already spinlocked.
+	 */
+	bh_lock_sock(sk);
+	sk->protinfo.af_inet.tos = skb->nh.iph->tos;
+	sk->priority = skb->priority;
+	sk->protocol = skb->nh.iph->protocol;
 	ip_build_xmit(sk, ip_reply_glue_bits, arg, len, &ipc, rt, MSG_DONTWAIT);
+	bh_unlock_sock(sk);
+
 	ip_rt_put(rt);
 }
 
@@ -956,7 +1002,7 @@
 	__constant_htons(ETH_P_IP),
 	NULL,	/* All devices */
 	ip_rcv,
-	NULL,
+	(void*)1,
 	NULL,
 };
 

FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)