diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 6e84328bdd40..97fb0cb1b97a 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -1084,14 +1084,12 @@ static struct sk_buff *vrf_l3_rcv(struct net_device *vrf_dev, #if IS_ENABLED(CONFIG_IPV6) /* send to link-local or multicast address via interface enslaved to * VRF device. Force lookup to VRF table without changing flow struct - * Note: Caller to this function must hold rcu_read_lock() and no refcnt - * is taken on the dst by this function. */ static struct dst_entry *vrf_link_scope_lookup(const struct net_device *dev, struct flowi6 *fl6) { struct net *net = dev_net(dev); - int flags = RT6_LOOKUP_F_IFACE | RT6_LOOKUP_F_DST_NOREF; + int flags = RT6_LOOKUP_F_IFACE; struct dst_entry *dst = NULL; struct rt6_info *rt; @@ -1101,6 +1099,7 @@ static struct dst_entry *vrf_link_scope_lookup(const struct net_device *dev, */ if (fl6->flowi6_oif == dev->ifindex) { dst = &net->ipv6.ip6_null_entry->dst; + dst_hold(dst); return dst; } diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index b69c16cbbf71..87fd7250eb3c 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -36,7 +36,6 @@ struct route_info { #define RT6_LOOKUP_F_SRCPREF_PUBLIC 0x00000010 #define RT6_LOOKUP_F_SRCPREF_COA 0x00000020 #define RT6_LOOKUP_F_IGNORE_LINKSTATE 0x00000040 -#define RT6_LOOKUP_F_DST_NOREF 0x00000080 /* We do not (yet ?) support IPv6 jumbograms (RFC 2675) * Unlike IPv4, hdr->seg_len doesn't include the IPv6 header @@ -84,10 +83,6 @@ struct dst_entry *ip6_route_input_lookup(struct net *net, struct flowi6 *fl6, const struct sk_buff *skb, int flags); -struct dst_entry *ip6_route_output_flags_noref(struct net *net, - const struct sock *sk, - struct flowi6 *fl6, int flags); - struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk, struct flowi6 *fl6, int flags); @@ -98,16 +93,6 @@ static inline struct dst_entry *ip6_route_output(struct net *net, return ip6_route_output_flags(net, sk, fl6, 0); } -/* Only conditionally release dst if flags indicates - * !RT6_LOOKUP_F_DST_NOREF or dst is in uncached_list. - */ -static inline void ip6_rt_put_flags(struct rt6_info *rt, int flags) -{ - if (!(flags & RT6_LOOKUP_F_DST_NOREF) || - !list_empty(&rt->rt6i_uncached)) - ip6_rt_put(rt); -} - struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6, const struct sk_buff *skb, int flags); struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index d22b6c140f23..bcfae13409b5 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -113,15 +113,14 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6, rt = lookup(net, net->ipv6.fib6_local_tbl, fl6, skb, flags); if (rt != net->ipv6.ip6_null_entry && rt->dst.error != -EAGAIN) return &rt->dst; - ip6_rt_put_flags(rt, flags); + ip6_rt_put(rt); rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, skb, flags); if (rt->dst.error != -EAGAIN) return &rt->dst; - ip6_rt_put_flags(rt, flags); + ip6_rt_put(rt); } - if (!(flags & RT6_LOOKUP_F_DST_NOREF)) - dst_hold(&net->ipv6.ip6_null_entry->dst); + dst_hold(&net->ipv6.ip6_null_entry->dst); return &net->ipv6.ip6_null_entry->dst; } @@ -238,14 +237,13 @@ static int __fib6_rule_action(struct fib_rule *rule, struct flowi *flp, goto out; } again: - ip6_rt_put_flags(rt, flags); + ip6_rt_put(rt); err = -EAGAIN; rt = NULL; goto out; discard_pkt: - if (!(flags & RT6_LOOKUP_F_DST_NOREF)) - dst_hold(&rt->dst); + dst_hold(&rt->dst); out: res->rt6 = rt; return err; diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 87f47bc55c5e..7fa4e6bddf40 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -316,10 +316,9 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6, rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, skb, flags); if (rt->dst.error == -EAGAIN) { - ip6_rt_put_flags(rt, flags); + ip6_rt_put(rt); rt = net->ipv6.ip6_null_entry; - if (!(flags | RT6_LOOKUP_F_DST_NOREF)) - dst_hold(&rt->dst); + dst_hold(&rt->dst); } return &rt->dst; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 546088e50815..ad4d7bd390fe 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1392,6 +1392,9 @@ static struct rt6_info *rt6_get_pcpu_route(const struct fib6_result *res) pcpu_rt = this_cpu_read(*res->nh->rt6i_pcpu); + if (pcpu_rt) + ip6_hold_safe(NULL, &pcpu_rt); + return pcpu_rt; } @@ -1401,9 +1404,12 @@ static struct rt6_info *rt6_make_pcpu_route(struct net *net, struct rt6_info *pcpu_rt, *prev, **p; pcpu_rt = ip6_rt_pcpu_alloc(res); - if (!pcpu_rt) - return NULL; + if (!pcpu_rt) { + dst_hold(&net->ipv6.ip6_null_entry->dst); + return net->ipv6.ip6_null_entry; + } + dst_hold(&pcpu_rt->dst); p = this_cpu_ptr(res->nh->rt6i_pcpu); prev = cmpxchg(p, NULL, pcpu_rt); BUG_ON(prev); @@ -2184,12 +2190,9 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, const struct sk_buff *skb, int flags) { struct fib6_result res = {}; - struct rt6_info *rt = NULL; + struct rt6_info *rt; int strict = 0; - WARN_ON_ONCE((flags & RT6_LOOKUP_F_DST_NOREF) && - !rcu_read_lock_held()); - strict |= flags & RT6_LOOKUP_F_IFACE; strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE; if (net->ipv6.devconf_all->forwarding == 0) @@ -2198,15 +2201,23 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, rcu_read_lock(); fib6_table_lookup(net, table, oif, fl6, &res, strict); - if (res.f6i == net->ipv6.fib6_null_entry) - goto out; + if (res.f6i == net->ipv6.fib6_null_entry) { + rt = net->ipv6.ip6_null_entry; + rcu_read_unlock(); + dst_hold(&rt->dst); + return rt; + } fib6_select_path(net, &res, fl6, oif, false, skb, strict); /*Search through exception table */ rt = rt6_find_cached_rt(&res, &fl6->daddr, &fl6->saddr); if (rt) { - goto out; + if (ip6_hold_safe(net, &rt)) + dst_use_noref(&rt->dst, jiffies); + + rcu_read_unlock(); + return rt; } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) && !res.nh->fib_nh_gw_family)) { /* Create a RTF_CACHE clone which will not be @@ -2214,38 +2225,40 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, * the daddr in the skb during the neighbor look-up is different * from the fl6->daddr used to look-up route here. */ - rt = ip6_rt_cache_alloc(&res, &fl6->daddr, NULL); + struct rt6_info *uncached_rt; + + uncached_rt = ip6_rt_cache_alloc(&res, &fl6->daddr, NULL); - if (rt) { - /* 1 refcnt is taken during ip6_rt_cache_alloc(). - * As rt6_uncached_list_add() does not consume refcnt, - * this refcnt is always returned to the caller even - * if caller sets RT6_LOOKUP_F_DST_NOREF flag. + rcu_read_unlock(); + + if (uncached_rt) { + /* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc() + * No need for another dst_hold() */ - rt6_uncached_list_add(rt); + rt6_uncached_list_add(uncached_rt); atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache); - rcu_read_unlock(); - - return rt; + } else { + uncached_rt = net->ipv6.ip6_null_entry; + dst_hold(&uncached_rt->dst); } + + return uncached_rt; } else { /* Get a percpu copy */ + + struct rt6_info *pcpu_rt; + local_bh_disable(); - rt = rt6_get_pcpu_route(&res); + pcpu_rt = rt6_get_pcpu_route(&res); - if (!rt) - rt = rt6_make_pcpu_route(net, &res); + if (!pcpu_rt) + pcpu_rt = rt6_make_pcpu_route(net, &res); local_bh_enable(); - } -out: - if (!rt) - rt = net->ipv6.ip6_null_entry; - if (!(flags & RT6_LOOKUP_F_DST_NOREF)) - ip6_hold_safe(net, &rt); - rcu_read_unlock(); + rcu_read_unlock(); - return rt; + return pcpu_rt; + } } EXPORT_SYMBOL_GPL(ip6_pol_route); @@ -2412,12 +2425,11 @@ u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6, return mhash >> 1; } -/* Called with rcu held */ void ip6_route_input(struct sk_buff *skb) { const struct ipv6hdr *iph = ipv6_hdr(skb); struct net *net = dev_net(skb->dev); - int flags = RT6_LOOKUP_F_HAS_SADDR | RT6_LOOKUP_F_DST_NOREF; + int flags = RT6_LOOKUP_F_HAS_SADDR; struct ip_tunnel_info *tun_info; struct flowi6 fl6 = { .flowi6_iif = skb->dev->ifindex, @@ -2439,8 +2451,8 @@ void ip6_route_input(struct sk_buff *skb) if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6)) fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, flkeys); skb_dst_drop(skb); - skb_dst_set_noref(skb, ip6_route_input_lookup(net, skb->dev, - &fl6, skb, flags)); + skb_dst_set(skb, + ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags)); } static struct rt6_info *ip6_pol_route_output(struct net *net, @@ -2452,9 +2464,8 @@ static struct rt6_info *ip6_pol_route_output(struct net *net, return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags); } -struct dst_entry *ip6_route_output_flags_noref(struct net *net, - const struct sock *sk, - struct flowi6 *fl6, int flags) +struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk, + struct flowi6 *fl6, int flags) { bool any_src; @@ -2462,7 +2473,6 @@ struct dst_entry *ip6_route_output_flags_noref(struct net *net, (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL)) { struct dst_entry *dst; - /* This function does not take refcnt on the dst */ dst = l3mdev_link_scope_lookup(net, fl6); if (dst) return dst; @@ -2470,7 +2480,6 @@ struct dst_entry *ip6_route_output_flags_noref(struct net *net, fl6->flowi6_iif = LOOPBACK_IFINDEX; - flags |= RT6_LOOKUP_F_DST_NOREF; any_src = ipv6_addr_any(&fl6->saddr); if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) || (fl6->flowi6_oif && any_src)) @@ -2483,28 +2492,6 @@ struct dst_entry *ip6_route_output_flags_noref(struct net *net, return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output); } -EXPORT_SYMBOL_GPL(ip6_route_output_flags_noref); - -struct dst_entry *ip6_route_output_flags(struct net *net, - const struct sock *sk, - struct flowi6 *fl6, - int flags) -{ - struct dst_entry *dst; - struct rt6_info *rt6; - - rcu_read_lock(); - dst = ip6_route_output_flags_noref(net, sk, fl6, flags); - rt6 = (struct rt6_info *)dst; - /* For dst cached in uncached_list, refcnt is already taken. */ - if (list_empty(&rt6->rt6i_uncached) && !dst_hold_safe(dst)) { - dst = &net->ipv6.ip6_null_entry->dst; - dst_hold(dst); - } - rcu_read_unlock(); - - return dst; -} EXPORT_SYMBOL_GPL(ip6_route_output_flags); struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig) @@ -6174,7 +6161,6 @@ static int __net_init ip6_route_net_init(struct net *net) net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops; dst_init_metrics(&net->ipv6.ip6_null_entry->dst, ip6_template_metrics, true); - INIT_LIST_HEAD(&net->ipv6.ip6_null_entry->rt6i_uncached); #ifdef CONFIG_IPV6_MULTIPLE_TABLES net->ipv6.fib6_has_custom_rules = false; @@ -6186,7 +6172,6 @@ static int __net_init ip6_route_net_init(struct net *net) net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops; dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst, ip6_template_metrics, true); - INIT_LIST_HEAD(&net->ipv6.ip6_prohibit_entry->rt6i_uncached); net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template, sizeof(*net->ipv6.ip6_blk_hole_entry), @@ -6196,7 +6181,6 @@ static int __net_init ip6_route_net_init(struct net *net) net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops; dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst, ip6_template_metrics, true); - INIT_LIST_HEAD(&net->ipv6.ip6_blk_hole_entry->rt6i_uncached); #endif net->ipv6.sysctl.flush_delay = 0; diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c index f35899d45a9a..cfc9fcb97465 100644 --- a/net/l3mdev/l3mdev.c +++ b/net/l3mdev/l3mdev.c @@ -118,8 +118,6 @@ EXPORT_SYMBOL_GPL(l3mdev_fib_table_by_index); * local and multicast addresses * @net: network namespace for device index lookup * @fl6: IPv6 flow struct for lookup - * This function does not hold refcnt on the returned dst. - * Caller must hold rcu_read_lock(). */ struct dst_entry *l3mdev_link_scope_lookup(struct net *net, @@ -128,8 +126,9 @@ struct dst_entry *l3mdev_link_scope_lookup(struct net *net, struct dst_entry *dst = NULL; struct net_device *dev; - WARN_ON_ONCE(!rcu_read_lock_held()); if (fl6->flowi6_oif) { + rcu_read_lock(); + dev = dev_get_by_index_rcu(net, fl6->flowi6_oif); if (dev && netif_is_l3_slave(dev)) dev = netdev_master_upper_dev_get_rcu(dev); @@ -137,6 +136,8 @@ struct dst_entry *l3mdev_link_scope_lookup(struct net *net, if (dev && netif_is_l3_master(dev) && dev->l3mdev_ops->l3mdev_link_scope_lookup) dst = dev->l3mdev_ops->l3mdev_link_scope_lookup(dev, fl6); + + rcu_read_unlock(); } return dst;